]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_shutdown.c
216a32baf01a2dbfa8e88f83be717e61ee70bbb7
[apple/xnu.git] / bsd / kern / kern_shutdown.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * File: bsd/kern/kern_shutdown.c
30 *
31 * Copyright (C) 1989, NeXT, Inc.
32 *
33 */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/vm.h>
39 #include <sys/proc_internal.h>
40 #include <sys/user.h>
41 #include <sys/reboot.h>
42 #include <sys/conf.h>
43 #include <sys/vnode_internal.h>
44 #include <sys/file_internal.h>
45 #include <sys/clist.h>
46 #include <sys/callout.h>
47 #include <sys/mbuf.h>
48 #include <sys/msgbuf.h>
49 #include <sys/ioctl.h>
50 #include <sys/signal.h>
51 #include <sys/tty.h>
52 #include <kern/task.h>
53 #include <sys/quota.h>
54 #include <vm/vm_kern.h>
55 #include <mach/vm_param.h>
56 #include <sys/filedesc.h>
57 #include <mach/host_priv.h>
58 #include <mach/host_reboot.h>
59
60 #include <security/audit/audit.h>
61
62 #include <kern/sched_prim.h> /* for thread_block() */
63 #include <kern/host.h> /* for host_priv_self() */
64 #include <net/if_var.h> /* for if_down_all() */
65 #include <sys/buf_internal.h> /* for count_busy_buffers() */
66 #include <sys/mount_internal.h> /* for vfs_unmountall() */
67 #include <mach/task.h> /* for task_suspend() */
68 #include <sys/sysproto.h> /* abused for sync() */
69 #include <kern/clock.h> /* for delay_for_interval() */
70 #include <libkern/OSAtomic.h>
71
72 #include <sys/kdebug.h>
73
74 uint32_t system_inshutdown = 0;
75
76 /* XXX should be in a header file somewhere, but isn't */
77 extern void (*unmountroot_pre_hook)(void);
78
79 unsigned int proc_shutdown_exitcount = 0;
80
81 static int sd_openlog(vfs_context_t);
82 static int sd_closelog(vfs_context_t);
83 static void sd_log(vfs_context_t, const char *, ...);
84 static void proc_shutdown(void);
85 static void kernel_hwm_panic_info(void);
86 extern void IOSystemShutdownNotification(void);
87 #if DEVELOPMENT || DEBUG
88 extern boolean_t kdp_has_polled_corefile(void);
89 #endif /* DEVELOPMENT || DEBUG */
90
91 struct sd_filterargs{
92 int delayterm;
93 int shutdownstate;
94 };
95
96
97 struct sd_iterargs {
98 int signo; /* the signal to be posted */
99 int setsdstate; /* shutdown state to be set */
100 int countproc; /* count processes on action */
101 int activecount; /* number of processes on which action was done */
102 };
103
104 static vnode_t sd_logvp = NULLVP;
105 static off_t sd_log_offset = 0;
106
107
108 static int sd_filt1(proc_t, void *);
109 static int sd_filt2(proc_t, void *);
110 static int sd_callback1(proc_t p, void * arg);
111 static int sd_callback2(proc_t p, void * arg);
112 static int sd_callback3(proc_t p, void * arg);
113
114 extern boolean_t panic_include_zprint;
115 extern vm_offset_t panic_kext_memory_info;
116 extern vm_size_t panic_kext_memory_size;
117
118 static void
119 kernel_hwm_panic_info(void)
120 {
121 mach_memory_info_t *memory_info;
122 unsigned int num_sites;
123 kern_return_t kr;
124
125 panic_include_zprint = TRUE;
126 panic_kext_memory_info = 0;
127 panic_kext_memory_size = 0;
128
129 num_sites = VM_KERN_MEMORY_COUNT + VM_KERN_COUNTER_COUNT;
130 panic_kext_memory_size = round_page(num_sites * sizeof(mach_zone_info_t));
131
132 kr = kmem_alloc(kernel_map, (vm_offset_t *) &panic_kext_memory_info, panic_kext_memory_size, VM_KERN_MEMORY_OSFMK);
133 if (kr != KERN_SUCCESS) {
134 panic_kext_memory_info = 0;
135 return;
136 }
137 memory_info = (mach_memory_info_t *)panic_kext_memory_info;
138 vm_page_diagnose(memory_info, num_sites, 0);
139 return;
140 }
141
142 int
143 get_system_inshutdown()
144 {
145 return (system_inshutdown);
146 }
147
148 int
149 reboot_kernel(int howto, char *message)
150 {
151 int hostboot_option=0;
152
153 if (!OSCompareAndSwap(0, 1, &system_inshutdown)) {
154 if ( (howto&RB_QUICK) == RB_QUICK)
155 goto force_reboot;
156 return (EBUSY);
157 }
158 /*
159 * Temporary hack to notify the power management root domain
160 * that the system will shut down.
161 */
162 IOSystemShutdownNotification();
163
164 if ((howto&RB_QUICK)==RB_QUICK) {
165 printf("Quick reboot...\n");
166 if ((howto&RB_NOSYNC)==0) {
167 sync((proc_t)NULL, (void *)NULL, (int *)NULL);
168 }
169 }
170 else if ((howto&RB_NOSYNC)==0) {
171 int iter, nbusy;
172
173 printf("syncing disks... ");
174
175 /*
176 * Release vnodes held by texts before sync.
177 */
178
179 /* handle live procs (deallocate their root and current directories), suspend initproc */
180 proc_shutdown();
181
182 #if CONFIG_AUDIT
183 audit_shutdown();
184 #endif
185
186 if (unmountroot_pre_hook != NULL)
187 unmountroot_pre_hook();
188
189 sync((proc_t)NULL, (void *)NULL, (int *)NULL);
190
191 if (kdebug_enable)
192 kdbg_dump_trace_to_file("/var/log/shutdown/shutdown.trace");
193
194 /*
195 * Unmount filesystems
196 */
197
198 #if DEVELOPMENT || DEBUG
199 if (!(howto & RB_PANIC) || !kdp_has_polled_corefile())
200 #endif /* DEVELOPMENT || DEBUG */
201 {
202 vfs_unmountall();
203 }
204
205 /* Wait for the buffer cache to clean remaining dirty buffers */
206 for (iter = 0; iter < 100; iter++) {
207 nbusy = count_busy_buffers();
208 if (nbusy == 0)
209 break;
210 printf("%d ", nbusy);
211 delay_for_interval( 1 * nbusy, 1000 * 1000);
212 }
213 if (nbusy)
214 printf("giving up\n");
215 else
216 printf("done\n");
217 }
218 #if NETWORKING
219 /*
220 * Can't just use an splnet() here to disable the network
221 * because that will lock out softints which the disk
222 * drivers depend on to finish DMAs.
223 */
224 if_down_all();
225 #endif /* NETWORKING */
226
227 force_reboot:
228
229 if (howto & RB_PANIC) {
230 if (strncmp(message, "Kernel memory has exceeded limits", 33) == 0) {
231 kernel_hwm_panic_info();
232 }
233 panic ("userspace panic: %s", message);
234 }
235
236 if (howto & RB_POWERDOWN)
237 hostboot_option = HOST_REBOOT_HALT;
238 if (howto & RB_HALT)
239 hostboot_option = HOST_REBOOT_HALT;
240
241 if (howto & RB_UPSDELAY) {
242 hostboot_option = HOST_REBOOT_UPSDELAY;
243 }
244
245 host_reboot(host_priv_self(), hostboot_option);
246 /*
247 * should not be reached
248 */
249 return (0);
250 }
251
252 static int
253 sd_openlog(vfs_context_t ctx)
254 {
255 int error = 0;
256 struct timeval tv;
257
258 /* Open shutdown log */
259 if ((error = vnode_open(PROC_SHUTDOWN_LOG, (O_CREAT | FWRITE | O_NOFOLLOW), 0644, 0, &sd_logvp, ctx))) {
260 printf("Failed to open %s: error %d\n", PROC_SHUTDOWN_LOG, error);
261 sd_logvp = NULLVP;
262 return error;
263 }
264
265 vnode_setsize(sd_logvp, (off_t)0, 0, ctx);
266
267 /* Write a little header */
268 microtime(&tv);
269 sd_log(ctx, "Process shutdown log. Current time is %lu (in seconds).\n\n", tv.tv_sec);
270
271 return 0;
272 }
273
274 static int
275 sd_closelog(vfs_context_t ctx)
276 {
277 int error = 0;
278 if (sd_logvp != NULLVP) {
279 VNOP_FSYNC(sd_logvp, MNT_WAIT, ctx);
280 error = vnode_close(sd_logvp, FWRITE, ctx);
281 }
282
283 return error;
284 }
285
286 static void
287 sd_log(vfs_context_t ctx, const char *fmt, ...)
288 {
289 int resid, log_error, len;
290 char logbuf[100];
291 va_list arglist;
292
293 /* If the log isn't open yet, open it */
294 if (sd_logvp == NULLVP) {
295 if (sd_openlog(ctx) != 0) {
296 /* Couldn't open, we fail out */
297 return;
298 }
299 }
300
301 va_start(arglist, fmt);
302 len = vsnprintf(logbuf, sizeof(logbuf), fmt, arglist);
303 log_error = vn_rdwr(UIO_WRITE, sd_logvp, (caddr_t)logbuf, len, sd_log_offset,
304 UIO_SYSSPACE, IO_UNIT | IO_NOAUTH, vfs_context_ucred(ctx), &resid, vfs_context_proc(ctx));
305 if (log_error == EIO || log_error == 0) {
306 sd_log_offset += (len - resid);
307 }
308
309 va_end(arglist);
310
311 }
312
313 static int
314 sd_filt1(proc_t p, void * args)
315 {
316 proc_t self = current_proc();
317 struct sd_filterargs * sf = (struct sd_filterargs *)args;
318 int delayterm = sf-> delayterm;
319 int shutdownstate = sf->shutdownstate;
320
321 if (((p->p_flag&P_SYSTEM) != 0) || (p->p_ppid == 0)
322 ||(p == self) || (p->p_stat == SZOMB)
323 || (p->p_shutdownstate != shutdownstate)
324 ||((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM))
325 || ((p->p_sigcatch & sigmask(SIGTERM))== 0)) {
326 return(0);
327 }
328 else
329 return(1);
330 }
331
332
333 static int
334 sd_callback1(proc_t p, void * args)
335 {
336 struct sd_iterargs * sd = (struct sd_iterargs *)args;
337 int signo = sd->signo;
338 int setsdstate = sd->setsdstate;
339 int countproc = sd->countproc;
340
341 proc_lock(p);
342 p->p_shutdownstate = setsdstate;
343 if (p->p_stat != SZOMB) {
344 proc_unlock(p);
345 if (countproc != 0) {
346 proc_list_lock();
347 p->p_listflag |= P_LIST_EXITCOUNT;
348 proc_shutdown_exitcount++;
349 proc_list_unlock();
350 }
351
352 psignal(p, signo);
353 if (countproc != 0)
354 sd->activecount++;
355 } else {
356 proc_unlock(p);
357 }
358
359 return PROC_RETURNED;
360 }
361
362 static int
363 sd_filt2(proc_t p, void * args)
364 {
365 proc_t self = current_proc();
366 struct sd_filterargs * sf = (struct sd_filterargs *)args;
367 int delayterm = sf-> delayterm;
368 int shutdownstate = sf->shutdownstate;
369
370 if (((p->p_flag&P_SYSTEM) != 0) || (p->p_ppid == 0)
371 ||(p == self) || (p->p_stat == SZOMB)
372 || (p->p_shutdownstate == shutdownstate)
373 ||((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM))) {
374 return(0);
375 }
376 else
377 return(1);
378 }
379
380 static int
381 sd_callback2(proc_t p, void * args)
382 {
383 struct sd_iterargs * sd = (struct sd_iterargs *)args;
384 int signo = sd->signo;
385 int setsdstate = sd->setsdstate;
386 int countproc = sd->countproc;
387
388 proc_lock(p);
389 p->p_shutdownstate = setsdstate;
390 if (p->p_stat != SZOMB) {
391 proc_unlock(p);
392 if (countproc != 0) {
393 proc_list_lock();
394 p->p_listflag |= P_LIST_EXITCOUNT;
395 proc_shutdown_exitcount++;
396 proc_list_unlock();
397 }
398 psignal(p, signo);
399 if (countproc != 0)
400 sd->activecount++;
401 } else {
402 proc_unlock(p);
403 }
404
405 return PROC_RETURNED;
406 }
407
408 static int
409 sd_callback3(proc_t p, void * args)
410 {
411 struct sd_iterargs * sd = (struct sd_iterargs *)args;
412 vfs_context_t ctx = vfs_context_current();
413
414 int setsdstate = sd->setsdstate;
415
416 proc_lock(p);
417 p->p_shutdownstate = setsdstate;
418 if (p->p_stat != SZOMB) {
419 /*
420 * NOTE: following code ignores sig_lock and plays
421 * with exit_thread correctly. This is OK unless we
422 * are a multiprocessor, in which case I do not
423 * understand the sig_lock. This needs to be fixed.
424 * XXX
425 */
426 if (p->exit_thread) { /* someone already doing it */
427 proc_unlock(p);
428 /* give him a chance */
429 thread_block(THREAD_CONTINUE_NULL);
430 } else {
431 p->exit_thread = current_thread();
432 printf(".");
433
434 sd_log(ctx, "%s[%d] had to be forced closed with exit1().\n", p->p_comm, p->p_pid);
435
436 proc_unlock(p);
437 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE,
438 p->p_pid, 0, 1, 0, 0);
439 sd->activecount++;
440 exit1(p, 1, (int *)NULL);
441 }
442 } else {
443 proc_unlock(p);
444 }
445
446 return PROC_RETURNED;
447 }
448
449
450 /*
451 * proc_shutdown()
452 *
453 * Shutdown down proc system (release references to current and root
454 * dirs for each process).
455 *
456 * POSIX modifications:
457 *
458 * For POSIX fcntl() file locking call vno_lockrelease() on
459 * the file to release all of its record locks, if any.
460 */
461
462 static void
463 proc_shutdown(void)
464 {
465 vfs_context_t ctx = vfs_context_current();
466 struct proc *p, *self;
467 int delayterm = 0;
468 struct sd_filterargs sfargs;
469 struct sd_iterargs sdargs;
470 int error = 0;
471 struct timespec ts;
472
473 /*
474 * Kill as many procs as we can. (Except ourself...)
475 */
476 self = (struct proc *)current_proc();
477
478 /*
479 * Signal the init with SIGTERM so that he does not launch
480 * new processes
481 */
482 p = proc_find(1);
483 if (p && p != self) {
484 psignal(p, SIGTERM);
485 }
486 proc_rele(p);
487
488 printf("Killing all processes ");
489
490 sigterm_loop:
491 /*
492 * send SIGTERM to those procs interested in catching one
493 */
494 sfargs.delayterm = delayterm;
495 sfargs.shutdownstate = 0;
496 sdargs.signo = SIGTERM;
497 sdargs.setsdstate = 1;
498 sdargs.countproc = 1;
499 sdargs.activecount = 0;
500
501 error = 0;
502 /* post a SIGTERM to all that catch SIGTERM and not marked for delay */
503 proc_rebootscan(sd_callback1, (void *)&sdargs, sd_filt1, (void *)&sfargs);
504
505 if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) {
506 proc_list_lock();
507 if (proc_shutdown_exitcount != 0) {
508 /*
509 * now wait for up to 30 seconds to allow those procs catching SIGTERM
510 * to digest it
511 * as soon as these procs have exited, we'll continue on to the next step
512 */
513 ts.tv_sec = 30;
514 ts.tv_nsec = 0;
515 error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
516 if (error != 0) {
517 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
518 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
519 p->p_listflag &= ~P_LIST_EXITCOUNT;
520 }
521 for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
522 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
523 p->p_listflag &= ~P_LIST_EXITCOUNT;
524 }
525 }
526
527 }
528 proc_list_unlock();
529 }
530 if (error == ETIMEDOUT) {
531 /*
532 * log the names of the unresponsive tasks
533 */
534
535
536 proc_list_lock();
537
538 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
539 if (p->p_shutdownstate == 1) {
540 printf("%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
541 sd_log(ctx, "%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
542 }
543 }
544
545 proc_list_unlock();
546
547 delay_for_interval(1000 * 5, 1000 * 1000);
548 }
549
550 /*
551 * send a SIGKILL to all the procs still hanging around
552 */
553 sfargs.delayterm = delayterm;
554 sfargs.shutdownstate = 2;
555 sdargs.signo = SIGKILL;
556 sdargs.setsdstate = 2;
557 sdargs.countproc = 1;
558 sdargs.activecount = 0;
559
560 /* post a SIGKILL to all that catch SIGTERM and not marked for delay */
561 proc_rebootscan(sd_callback2, (void *)&sdargs, sd_filt2, (void *)&sfargs);
562
563 if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) {
564 proc_list_lock();
565 if (proc_shutdown_exitcount != 0) {
566 /*
567 * wait for up to 60 seconds to allow these procs to exit normally
568 *
569 * History: The delay interval was changed from 100 to 200
570 * for NFS requests in particular.
571 */
572 ts.tv_sec = 60;
573 ts.tv_nsec = 0;
574 error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
575 if (error != 0) {
576 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
577 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
578 p->p_listflag &= ~P_LIST_EXITCOUNT;
579 }
580 for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
581 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
582 p->p_listflag &= ~P_LIST_EXITCOUNT;
583 }
584 }
585 }
586 proc_list_unlock();
587 }
588
589 /*
590 * if we still have procs that haven't exited, then brute force 'em
591 */
592 sfargs.delayterm = delayterm;
593 sfargs.shutdownstate = 3;
594 sdargs.signo = 0;
595 sdargs.setsdstate = 3;
596 sdargs.countproc = 0;
597 sdargs.activecount = 0;
598
599 /* post a SIGTERM to all that catch SIGTERM and not marked for delay */
600 proc_rebootscan(sd_callback3, (void *)&sdargs, sd_filt2, (void *)&sfargs);
601 printf("\n");
602
603 /* Now start the termination of processes that are marked for delayed termn */
604 if (delayterm == 0) {
605 delayterm = 1;
606 goto sigterm_loop;
607 }
608
609 sd_closelog(ctx);
610
611 /*
612 * Now that all other processes have been terminated, suspend init
613 */
614 task_suspend_internal(initproc->task);
615
616 /* drop the ref on initproc */
617 proc_rele(initproc);
618 printf("continuing\n");
619 }
620