]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_shutdown.c
xnu-7195.60.75.tar.gz
[apple/xnu.git] / bsd / kern / kern_shutdown.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * File: bsd/kern/kern_shutdown.c
30 *
31 * Copyright (C) 1989, NeXT, Inc.
32 *
33 */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/vm.h>
39 #include <sys/proc_internal.h>
40 #include <sys/user.h>
41 #include <sys/reboot.h>
42 #include <sys/conf.h>
43 #include <sys/vnode_internal.h>
44 #include <sys/file_internal.h>
45 #include <sys/mbuf.h>
46 #include <sys/msgbuf.h>
47 #include <sys/ioctl.h>
48 #include <sys/signal.h>
49 #include <sys/tty.h>
50 #include <kern/task.h>
51 #include <sys/quota.h>
52 #include <vm/vm_kern.h>
53 #include <mach/vm_param.h>
54 #include <sys/filedesc.h>
55 #include <mach/host_priv.h>
56 #include <mach/host_reboot.h>
57
58 #include <security/audit/audit.h>
59
60 #include <kern/sched_prim.h> /* for thread_block() */
61 #include <kern/host.h> /* for host_priv_self() */
62 #include <net/if_var.h> /* for if_down_all() */
63 #include <sys/buf_internal.h> /* for count_busy_buffers() */
64 #include <sys/mount_internal.h> /* for vfs_unmountall() */
65 #include <mach/task.h> /* for task_suspend() */
66 #include <sys/sysproto.h> /* abused for sync() */
67 #include <kern/clock.h> /* for delay_for_interval() */
68 #include <libkern/OSAtomic.h>
69 #include <IOKit/IOPlatformExpert.h>
70
71 #include <sys/kdebug.h>
72
73 uint32_t system_inshutdown = 0;
74
75 #if XNU_TARGET_OS_OSX
76 /* XXX should be in a header file somewhere, but isn't */
77 extern void (*unmountroot_pre_hook)(void);
78 #endif
79
80 unsigned int proc_shutdown_exitcount = 0;
81
82 static int sd_openlog(vfs_context_t);
83 static int sd_closelog(vfs_context_t);
84 static void sd_log(vfs_context_t, const char *, ...);
85 static void proc_shutdown(void);
86 static void zprint_panic_info(void);
87 extern void halt_log_enter(const char * what, const void * pc, uint64_t time);
88
89 #if DEVELOPMENT || DEBUG
90 extern boolean_t kdp_has_polled_corefile(void);
91 #endif /* DEVELOPMENT || DEBUG */
92
93 struct sd_filterargs {
94 int delayterm;
95 int shutdownstate;
96 };
97
98
99 struct sd_iterargs {
100 int signo; /* the signal to be posted */
101 int setsdstate; /* shutdown state to be set */
102 int countproc; /* count processes on action */
103 int activecount; /* number of processes on which action was done */
104 };
105
106 static vnode_t sd_logvp = NULLVP;
107 static off_t sd_log_offset = 0;
108
109
110 static int sd_filt1(proc_t, void *);
111 static int sd_filt2(proc_t, void *);
112 static int sd_callback1(proc_t p, void * arg);
113 static int sd_callback2(proc_t p, void * arg);
114 static int sd_callback3(proc_t p, void * arg);
115
116 extern boolean_t panic_include_zprint;
117 extern mach_memory_info_t *panic_kext_memory_info;
118 extern vm_size_t panic_kext_memory_size;
119
120 static void
121 zprint_panic_info(void)
122 {
123 unsigned int num_sites;
124 kern_return_t kr;
125
126 panic_include_zprint = TRUE;
127 panic_kext_memory_info = NULL;
128 panic_kext_memory_size = 0;
129
130 num_sites = vm_page_diagnose_estimate();
131 panic_kext_memory_size = num_sites * sizeof(panic_kext_memory_info[0]);
132
133 kr = kmem_alloc(kernel_map, (vm_offset_t *)&panic_kext_memory_info, round_page(panic_kext_memory_size), VM_KERN_MEMORY_OSFMK);
134 if (kr != KERN_SUCCESS) {
135 panic_kext_memory_info = NULL;
136 return;
137 }
138
139 vm_page_diagnose(panic_kext_memory_info, num_sites, 0);
140 }
141
142 int
143 get_system_inshutdown()
144 {
145 return system_inshutdown;
146 }
147
148 __abortlike
149 static void
150 panic_kernel(int howto, char *message)
151 {
152 if ((howto & RB_PANIC_ZPRINT) == RB_PANIC_ZPRINT) {
153 zprint_panic_info();
154 }
155 panic("userspace panic: %s", message);
156 }
157
158 extern boolean_t compressor_store_stop_compaction;
159 extern lck_mtx_t vm_swap_data_lock;
160 extern int vm_swapfile_create_thread_running;
161 extern int vm_swapfile_gc_thread_running;
162
163 int
164 reboot_kernel(int howto, char *message)
165 {
166 int hostboot_option = 0;
167 uint64_t startTime;
168
169 if ((howto & (RB_PANIC | RB_QUICK)) == (RB_PANIC | RB_QUICK)) {
170 panic_kernel(howto, message);
171 }
172
173 if (!OSCompareAndSwap(0, 1, &system_inshutdown)) {
174 if ((howto & RB_QUICK) == RB_QUICK) {
175 goto force_reboot;
176 }
177 return EBUSY;
178 }
179
180 lck_mtx_lock(&vm_swap_data_lock);
181
182 /* Turn OFF future swapfile reclaimation / compaction etc.*/
183 compressor_store_stop_compaction = TRUE;
184
185 /* wait for any current swapfile work to end */
186 while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
187 assert_wait((event_t)&compressor_store_stop_compaction, THREAD_UNINT);
188
189 lck_mtx_unlock(&vm_swap_data_lock);
190
191 thread_block(THREAD_CONTINUE_NULL);
192
193 lck_mtx_lock(&vm_swap_data_lock);
194 }
195
196 lck_mtx_unlock(&vm_swap_data_lock);
197
198 /*
199 * Notify the power management root domain that the system will shut down.
200 */
201 IOSystemShutdownNotification(kIOSystemShutdownNotificationStageProcessExit);
202
203 if ((howto & RB_QUICK) == RB_QUICK) {
204 printf("Quick reboot...\n");
205 if ((howto & RB_NOSYNC) == 0) {
206 sync((proc_t)NULL, (void *)NULL, (int *)NULL);
207 }
208 } else if ((howto & RB_NOSYNC) == 0) {
209 int iter, nbusy;
210
211 printf("syncing disks... ");
212
213 /*
214 * Release vnodes held by texts before sync.
215 */
216
217 /* handle live procs (deallocate their root and current directories), suspend initproc */
218
219 startTime = mach_absolute_time();
220 proc_shutdown();
221 halt_log_enter("proc_shutdown", 0, mach_absolute_time() - startTime);
222
223 #if CONFIG_AUDIT
224 startTime = mach_absolute_time();
225 audit_shutdown();
226 halt_log_enter("audit_shutdown", 0, mach_absolute_time() - startTime);
227 #endif
228
229 #if XNU_TARGET_OS_OSX
230 if (unmountroot_pre_hook != NULL) {
231 unmountroot_pre_hook();
232 }
233 #endif
234
235 startTime = mach_absolute_time();
236 sync((proc_t)NULL, (void *)NULL, (int *)NULL);
237
238 if (kdebug_enable) {
239 startTime = mach_absolute_time();
240 kdbg_dump_trace_to_file("/var/log/shutdown/shutdown.trace");
241 halt_log_enter("shutdown.trace", 0, mach_absolute_time() - startTime);
242 }
243
244 IOSystemShutdownNotification(kIOSystemShutdownNotificationStageRootUnmount);
245
246 /*
247 * Unmount filesystems
248 */
249
250 #if DEVELOPMENT || DEBUG
251 if (!(howto & RB_PANIC) || !kdp_has_polled_corefile())
252 #endif /* DEVELOPMENT || DEBUG */
253 {
254 startTime = mach_absolute_time();
255 vfs_unmountall();
256 halt_log_enter("vfs_unmountall", 0, mach_absolute_time() - startTime);
257 }
258
259 /* Wait for the buffer cache to clean remaining dirty buffers */
260 startTime = mach_absolute_time();
261 for (iter = 0; iter < 100; iter++) {
262 nbusy = count_busy_buffers();
263 if (nbusy == 0) {
264 break;
265 }
266 printf("%d ", nbusy);
267 delay_for_interval( 1 * nbusy, 1000 * 1000);
268 }
269 if (nbusy) {
270 printf("giving up\n");
271 } else {
272 printf("done\n");
273 }
274 halt_log_enter("bufferclean", 0, mach_absolute_time() - startTime);
275 }
276 #if NETWORKING
277 /*
278 * Can't just use an splnet() here to disable the network
279 * because that will lock out softints which the disk
280 * drivers depend on to finish DMAs.
281 */
282 startTime = mach_absolute_time();
283 if_down_all();
284 halt_log_enter("if_down_all", 0, mach_absolute_time() - startTime);
285 #endif /* NETWORKING */
286
287 force_reboot:
288
289 if (howto & RB_PANIC) {
290 panic_kernel(howto, message);
291 }
292
293 if (howto & RB_HALT) {
294 hostboot_option = HOST_REBOOT_HALT;
295 }
296
297 if (howto & RB_UPSDELAY) {
298 hostboot_option = HOST_REBOOT_UPSDELAY;
299 }
300
301 host_reboot(host_priv_self(), hostboot_option);
302 /*
303 * should not be reached
304 */
305 return 0;
306 }
307
308 static int
309 sd_openlog(vfs_context_t ctx)
310 {
311 int error = 0;
312 struct timeval tv;
313
314 /* Open shutdown log */
315 if ((error = vnode_open(PROC_SHUTDOWN_LOG, (O_CREAT | FWRITE | O_NOFOLLOW), 0644, 0, &sd_logvp, ctx))) {
316 printf("Failed to open %s: error %d\n", PROC_SHUTDOWN_LOG, error);
317 sd_logvp = NULLVP;
318 return error;
319 }
320
321 vnode_setsize(sd_logvp, (off_t)0, 0, ctx);
322
323 /* Write a little header */
324 microtime(&tv);
325 sd_log(ctx, "Process shutdown log. Current time is %lu (in seconds).\n\n", tv.tv_sec);
326
327 return 0;
328 }
329
330 static int
331 sd_closelog(vfs_context_t ctx)
332 {
333 int error = 0;
334 if (sd_logvp != NULLVP) {
335 VNOP_FSYNC(sd_logvp, MNT_WAIT, ctx);
336 error = vnode_close(sd_logvp, FWRITE, ctx);
337 }
338
339 return error;
340 }
341
342 static void
343 sd_log(vfs_context_t ctx, const char *fmt, ...)
344 {
345 int resid, log_error, len;
346 char logbuf[100];
347 va_list arglist;
348
349 /* If the log isn't open yet, open it */
350 if (sd_logvp == NULLVP) {
351 if (sd_openlog(ctx) != 0) {
352 /* Couldn't open, we fail out */
353 return;
354 }
355 }
356
357 va_start(arglist, fmt);
358 len = vsnprintf(logbuf, sizeof(logbuf), fmt, arglist);
359 log_error = vn_rdwr(UIO_WRITE, sd_logvp, (caddr_t)logbuf, len, sd_log_offset,
360 UIO_SYSSPACE, IO_UNIT | IO_NOAUTH, vfs_context_ucred(ctx), &resid, vfs_context_proc(ctx));
361 if (log_error == EIO || log_error == 0) {
362 sd_log_offset += (len - resid);
363 }
364
365 va_end(arglist);
366 }
367
368 static int
369 sd_filt1(proc_t p, void * args)
370 {
371 proc_t self = current_proc();
372 struct sd_filterargs * sf = (struct sd_filterargs *)args;
373 int delayterm = sf->delayterm;
374 int shutdownstate = sf->shutdownstate;
375
376 if (((p->p_flag & P_SYSTEM) != 0) || (p->p_ppid == 0)
377 || (p == self) || (p->p_stat == SZOMB)
378 || (p->p_shutdownstate != shutdownstate)
379 || ((delayterm == 0) && ((p->p_lflag & P_LDELAYTERM) == P_LDELAYTERM))
380 || ((p->p_sigcatch & sigmask(SIGTERM)) == 0)) {
381 return 0;
382 } else {
383 return 1;
384 }
385 }
386
387
388 static int
389 sd_callback1(proc_t p, void * args)
390 {
391 struct sd_iterargs * sd = (struct sd_iterargs *)args;
392 int signo = sd->signo;
393 int setsdstate = sd->setsdstate;
394 int countproc = sd->countproc;
395
396 proc_lock(p);
397 p->p_shutdownstate = (char)setsdstate;
398 if (p->p_stat != SZOMB) {
399 proc_unlock(p);
400 if (countproc != 0) {
401 proc_list_lock();
402 p->p_listflag |= P_LIST_EXITCOUNT;
403 proc_shutdown_exitcount++;
404 proc_list_unlock();
405 }
406
407 psignal(p, signo);
408 if (countproc != 0) {
409 sd->activecount++;
410 }
411 } else {
412 proc_unlock(p);
413 }
414
415 return PROC_RETURNED;
416 }
417
418 static int
419 sd_filt2(proc_t p, void * args)
420 {
421 proc_t self = current_proc();
422 struct sd_filterargs * sf = (struct sd_filterargs *)args;
423 int delayterm = sf->delayterm;
424 int shutdownstate = sf->shutdownstate;
425
426 if (((p->p_flag & P_SYSTEM) != 0) || (p->p_ppid == 0)
427 || (p == self) || (p->p_stat == SZOMB)
428 || (p->p_shutdownstate == shutdownstate)
429 || ((delayterm == 0) && ((p->p_lflag & P_LDELAYTERM) == P_LDELAYTERM))) {
430 return 0;
431 } else {
432 return 1;
433 }
434 }
435
436 static int
437 sd_callback2(proc_t p, void * args)
438 {
439 struct sd_iterargs * sd = (struct sd_iterargs *)args;
440 int signo = sd->signo;
441 int setsdstate = sd->setsdstate;
442 int countproc = sd->countproc;
443
444 proc_lock(p);
445 p->p_shutdownstate = (char)setsdstate;
446 if (p->p_stat != SZOMB) {
447 proc_unlock(p);
448 if (countproc != 0) {
449 proc_list_lock();
450 p->p_listflag |= P_LIST_EXITCOUNT;
451 proc_shutdown_exitcount++;
452 proc_list_unlock();
453 }
454 psignal(p, signo);
455 if (countproc != 0) {
456 sd->activecount++;
457 }
458 } else {
459 proc_unlock(p);
460 }
461
462 return PROC_RETURNED;
463 }
464
465 static int
466 sd_callback3(proc_t p, void * args)
467 {
468 struct sd_iterargs * sd = (struct sd_iterargs *)args;
469 vfs_context_t ctx = vfs_context_current();
470
471 int setsdstate = sd->setsdstate;
472
473 proc_lock(p);
474 p->p_shutdownstate = (char)setsdstate;
475 if (p->p_stat != SZOMB) {
476 /*
477 * NOTE: following code ignores sig_lock and plays
478 * with exit_thread correctly. This is OK unless we
479 * are a multiprocessor, in which case I do not
480 * understand the sig_lock. This needs to be fixed.
481 * XXX
482 */
483 if (p->exit_thread) { /* someone already doing it */
484 proc_unlock(p);
485 /* give him a chance */
486 thread_block(THREAD_CONTINUE_NULL);
487 } else {
488 p->exit_thread = current_thread();
489 printf(".");
490
491 sd_log(ctx, "%s[%d] had to be forced closed with exit1().\n", p->p_comm, p->p_pid);
492
493 proc_unlock(p);
494 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE,
495 p->p_pid, 0, 1, 0, 0);
496 sd->activecount++;
497 exit1(p, 1, (int *)NULL);
498 }
499 } else {
500 proc_unlock(p);
501 }
502
503 return PROC_RETURNED;
504 }
505
506
507 /*
508 * proc_shutdown()
509 *
510 * Shutdown down proc system (release references to current and root
511 * dirs for each process).
512 *
513 * POSIX modifications:
514 *
515 * For POSIX fcntl() file locking call vno_lockrelease() on
516 * the file to release all of its record locks, if any.
517 */
518
519 static void
520 proc_shutdown(void)
521 {
522 vfs_context_t ctx = vfs_context_current();
523 struct proc *p, *self;
524 int delayterm = 0;
525 struct sd_filterargs sfargs;
526 struct sd_iterargs sdargs;
527 int error = 0;
528 struct timespec ts;
529
530 /*
531 * Kill as many procs as we can. (Except ourself...)
532 */
533 self = (struct proc *)current_proc();
534
535 /*
536 * Signal the init with SIGTERM so that he does not launch
537 * new processes
538 */
539 p = proc_find(1);
540 if (p && p != self) {
541 psignal(p, SIGTERM);
542 }
543 proc_rele(p);
544
545 printf("Killing all processes ");
546
547 sigterm_loop:
548 /*
549 * send SIGTERM to those procs interested in catching one
550 */
551 sfargs.delayterm = delayterm;
552 sfargs.shutdownstate = 0;
553 sdargs.signo = SIGTERM;
554 sdargs.setsdstate = 1;
555 sdargs.countproc = 1;
556 sdargs.activecount = 0;
557
558 error = 0;
559 /* post a SIGTERM to all that catch SIGTERM and not marked for delay */
560 proc_rebootscan(sd_callback1, (void *)&sdargs, sd_filt1, (void *)&sfargs);
561
562 if (sdargs.activecount != 0 && proc_shutdown_exitcount != 0) {
563 proc_list_lock();
564 if (proc_shutdown_exitcount != 0) {
565 /*
566 * now wait for up to 3 seconds to allow those procs catching SIGTERM
567 * to digest it
568 * as soon as these procs have exited, we'll continue on to the next step
569 */
570 ts.tv_sec = 3;
571 ts.tv_nsec = 0;
572 error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
573 if (error != 0) {
574 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
575 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) {
576 p->p_listflag &= ~P_LIST_EXITCOUNT;
577 }
578 }
579 for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
580 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) {
581 p->p_listflag &= ~P_LIST_EXITCOUNT;
582 }
583 }
584 }
585 }
586 proc_list_unlock();
587 }
588 if (error == ETIMEDOUT) {
589 /*
590 * log the names of the unresponsive tasks
591 */
592
593 proc_list_lock();
594
595 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
596 if (p->p_shutdownstate == 1) {
597 printf("%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
598 sd_log(ctx, "%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
599 }
600 }
601
602 proc_list_unlock();
603 }
604
605 /*
606 * send a SIGKILL to all the procs still hanging around
607 */
608 sfargs.delayterm = delayterm;
609 sfargs.shutdownstate = 2;
610 sdargs.signo = SIGKILL;
611 sdargs.setsdstate = 2;
612 sdargs.countproc = 1;
613 sdargs.activecount = 0;
614
615 /* post a SIGKILL to all that catch SIGTERM and not marked for delay */
616 proc_rebootscan(sd_callback2, (void *)&sdargs, sd_filt2, (void *)&sfargs);
617
618 error = 0;
619
620 if (sdargs.activecount != 0 && proc_shutdown_exitcount != 0) {
621 proc_list_lock();
622 if (proc_shutdown_exitcount != 0) {
623 /*
624 * wait for up to 60 seconds to allow these procs to exit normally
625 *
626 * History: The delay interval was changed from 100 to 200
627 * for NFS requests in particular.
628 */
629 ts.tv_sec = 10;
630 ts.tv_nsec = 0;
631 error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
632 if (error != 0) {
633 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
634 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) {
635 p->p_listflag &= ~P_LIST_EXITCOUNT;
636 }
637 }
638 for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
639 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) {
640 p->p_listflag &= ~P_LIST_EXITCOUNT;
641 }
642 }
643 }
644 }
645 proc_list_unlock();
646 }
647
648 if (error == ETIMEDOUT) {
649 /*
650 * log the names of the unresponsive tasks
651 */
652
653 proc_list_lock();
654
655 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
656 if (p->p_shutdownstate == 2) {
657 printf("%s[%d]: didn't act on SIGKILL\n", p->p_comm, p->p_pid);
658 sd_log(ctx, "%s[%d]: didn't act on SIGKILL\n", p->p_comm, p->p_pid);
659 }
660 }
661
662 proc_list_unlock();
663 }
664
665 /*
666 * if we still have procs that haven't exited, then brute force 'em
667 */
668 sfargs.delayterm = delayterm;
669 sfargs.shutdownstate = 3;
670 sdargs.signo = 0;
671 sdargs.setsdstate = 3;
672 sdargs.countproc = 0;
673 sdargs.activecount = 0;
674
675
676
677 /* post a SIGTERM to all that catch SIGTERM and not marked for delay */
678 proc_rebootscan(sd_callback3, (void *)&sdargs, sd_filt2, (void *)&sfargs);
679 printf("\n");
680
681 /* Now start the termination of processes that are marked for delayed termn */
682 if (delayterm == 0) {
683 delayterm = 1;
684 goto sigterm_loop;
685 }
686
687 sd_closelog(ctx);
688
689 /*
690 * Now that all other processes have been terminated, suspend init
691 */
692 task_suspend_internal(initproc->task);
693
694 /* drop the ref on initproc */
695 proc_rele(initproc);
696 printf("continuing\n");
697 }