]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_shutdown.c
xnu-6153.81.5.tar.gz
[apple/xnu.git] / bsd / kern / kern_shutdown.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * File: bsd/kern/kern_shutdown.c
30 *
31 * Copyright (C) 1989, NeXT, Inc.
32 *
33 */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/vm.h>
39 #include <sys/proc_internal.h>
40 #include <sys/user.h>
41 #include <sys/reboot.h>
42 #include <sys/conf.h>
43 #include <sys/vnode_internal.h>
44 #include <sys/file_internal.h>
45 #include <sys/clist.h>
46 #include <sys/callout.h>
47 #include <sys/mbuf.h>
48 #include <sys/msgbuf.h>
49 #include <sys/ioctl.h>
50 #include <sys/signal.h>
51 #include <sys/tty.h>
52 #include <kern/task.h>
53 #include <sys/quota.h>
54 #include <vm/vm_kern.h>
55 #include <mach/vm_param.h>
56 #include <sys/filedesc.h>
57 #include <mach/host_priv.h>
58 #include <mach/host_reboot.h>
59
60 #include <security/audit/audit.h>
61
62 #include <kern/sched_prim.h> /* for thread_block() */
63 #include <kern/host.h> /* for host_priv_self() */
64 #include <net/if_var.h> /* for if_down_all() */
65 #include <sys/buf_internal.h> /* for count_busy_buffers() */
66 #include <sys/mount_internal.h> /* for vfs_unmountall() */
67 #include <mach/task.h> /* for task_suspend() */
68 #include <sys/sysproto.h> /* abused for sync() */
69 #include <kern/clock.h> /* for delay_for_interval() */
70 #include <libkern/OSAtomic.h>
71 #include <IOKit/IOPlatformExpert.h>
72
73 #include <sys/kdebug.h>
74
75 uint32_t system_inshutdown = 0;
76
77 /* XXX should be in a header file somewhere, but isn't */
78 extern void (*unmountroot_pre_hook)(void);
79
80 unsigned int proc_shutdown_exitcount = 0;
81
82 static int sd_openlog(vfs_context_t);
83 static int sd_closelog(vfs_context_t);
84 static void sd_log(vfs_context_t, const char *, ...);
85 static void proc_shutdown(void);
86 static void zprint_panic_info(void);
87 extern void halt_log_enter(const char * what, const void * pc, uint64_t time);
88
89 #if DEVELOPMENT || DEBUG
90 extern boolean_t kdp_has_polled_corefile(void);
91 #endif /* DEVELOPMENT || DEBUG */
92
93 struct sd_filterargs {
94 int delayterm;
95 int shutdownstate;
96 };
97
98
99 struct sd_iterargs {
100 int signo; /* the signal to be posted */
101 int setsdstate; /* shutdown state to be set */
102 int countproc; /* count processes on action */
103 int activecount; /* number of processes on which action was done */
104 };
105
106 static vnode_t sd_logvp = NULLVP;
107 static off_t sd_log_offset = 0;
108
109
110 static int sd_filt1(proc_t, void *);
111 static int sd_filt2(proc_t, void *);
112 static int sd_callback1(proc_t p, void * arg);
113 static int sd_callback2(proc_t p, void * arg);
114 static int sd_callback3(proc_t p, void * arg);
115
116 extern boolean_t panic_include_zprint;
117 extern mach_memory_info_t *panic_kext_memory_info;
118 extern vm_size_t panic_kext_memory_size;
119
120 static void
121 zprint_panic_info(void)
122 {
123 unsigned int num_sites;
124 kern_return_t kr;
125
126 panic_include_zprint = TRUE;
127 panic_kext_memory_info = NULL;
128 panic_kext_memory_size = 0;
129
130 num_sites = vm_page_diagnose_estimate();
131 panic_kext_memory_size = num_sites * sizeof(panic_kext_memory_info[0]);
132
133 kr = kmem_alloc(kernel_map, (vm_offset_t *)&panic_kext_memory_info, round_page(panic_kext_memory_size), VM_KERN_MEMORY_OSFMK);
134 if (kr != KERN_SUCCESS) {
135 panic_kext_memory_info = NULL;
136 return;
137 }
138
139 vm_page_diagnose(panic_kext_memory_info, num_sites, 0);
140 }
141
142 int
143 get_system_inshutdown()
144 {
145 return system_inshutdown;
146 }
147
148 __abortlike
149 static void
150 panic_kernel(int howto, char *message)
151 {
152 if ((howto & RB_PANIC_ZPRINT) == RB_PANIC_ZPRINT) {
153 zprint_panic_info();
154 }
155 panic("userspace panic: %s", message);
156 }
157
158 extern boolean_t compressor_store_stop_compaction;
159 extern lck_mtx_t vm_swap_data_lock;
160 extern int vm_swapfile_create_thread_running;
161 extern int vm_swapfile_gc_thread_running;
162
163 int
164 reboot_kernel(int howto, char *message)
165 {
166 int hostboot_option = 0;
167 uint64_t startTime;
168
169 if ((howto & (RB_PANIC | RB_QUICK)) == (RB_PANIC | RB_QUICK)) {
170 panic_kernel(howto, message);
171 }
172
173 if (!OSCompareAndSwap(0, 1, &system_inshutdown)) {
174 if ((howto & RB_QUICK) == RB_QUICK) {
175 goto force_reboot;
176 }
177 return EBUSY;
178 }
179
180 lck_mtx_lock(&vm_swap_data_lock);
181
182 /* Turn OFF future swapfile reclaimation / compaction etc.*/
183 compressor_store_stop_compaction = TRUE;
184
185 /* wait for any current swapfile work to end */
186 while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
187 assert_wait((event_t)&compressor_store_stop_compaction, THREAD_UNINT);
188
189 lck_mtx_unlock(&vm_swap_data_lock);
190
191 thread_block(THREAD_CONTINUE_NULL);
192
193 lck_mtx_lock(&vm_swap_data_lock);
194 }
195
196 lck_mtx_unlock(&vm_swap_data_lock);
197
198 /*
199 * Notify the power management root domain that the system will shut down.
200 */
201 IOSystemShutdownNotification(kIOSystemShutdownNotificationStageProcessExit);
202
203 if ((howto & RB_QUICK) == RB_QUICK) {
204 printf("Quick reboot...\n");
205 if ((howto & RB_NOSYNC) == 0) {
206 sync((proc_t)NULL, (void *)NULL, (int *)NULL);
207 }
208 } else if ((howto & RB_NOSYNC) == 0) {
209 int iter, nbusy;
210
211 printf("syncing disks... ");
212
213 /*
214 * Release vnodes held by texts before sync.
215 */
216
217 /* handle live procs (deallocate their root and current directories), suspend initproc */
218
219 startTime = mach_absolute_time();
220 proc_shutdown();
221 halt_log_enter("proc_shutdown", 0, mach_absolute_time() - startTime);
222
223 #if CONFIG_AUDIT
224 startTime = mach_absolute_time();
225 audit_shutdown();
226 halt_log_enter("audit_shutdown", 0, mach_absolute_time() - startTime);
227 #endif
228
229 if (unmountroot_pre_hook != NULL) {
230 unmountroot_pre_hook();
231 }
232
233 startTime = mach_absolute_time();
234 sync((proc_t)NULL, (void *)NULL, (int *)NULL);
235
236 if (kdebug_enable) {
237 startTime = mach_absolute_time();
238 kdbg_dump_trace_to_file("/var/log/shutdown/shutdown.trace");
239 halt_log_enter("shutdown.trace", 0, mach_absolute_time() - startTime);
240 }
241
242 IOSystemShutdownNotification(kIOSystemShutdownNotificationStageRootUnmount);
243
244 /*
245 * Unmount filesystems
246 */
247
248 #if DEVELOPMENT || DEBUG
249 if (!(howto & RB_PANIC) || !kdp_has_polled_corefile())
250 #endif /* DEVELOPMENT || DEBUG */
251 {
252 startTime = mach_absolute_time();
253 vfs_unmountall();
254 halt_log_enter("vfs_unmountall", 0, mach_absolute_time() - startTime);
255 }
256
257 /* Wait for the buffer cache to clean remaining dirty buffers */
258 startTime = mach_absolute_time();
259 for (iter = 0; iter < 100; iter++) {
260 nbusy = count_busy_buffers();
261 if (nbusy == 0) {
262 break;
263 }
264 printf("%d ", nbusy);
265 delay_for_interval( 1 * nbusy, 1000 * 1000);
266 }
267 if (nbusy) {
268 printf("giving up\n");
269 } else {
270 printf("done\n");
271 }
272 halt_log_enter("bufferclean", 0, mach_absolute_time() - startTime);
273 }
274 #if NETWORKING
275 /*
276 * Can't just use an splnet() here to disable the network
277 * because that will lock out softints which the disk
278 * drivers depend on to finish DMAs.
279 */
280 startTime = mach_absolute_time();
281 if_down_all();
282 halt_log_enter("if_down_all", 0, mach_absolute_time() - startTime);
283 #endif /* NETWORKING */
284
285 force_reboot:
286
287 if (howto & RB_PANIC) {
288 panic_kernel(howto, message);
289 }
290
291 if (howto & RB_HALT) {
292 hostboot_option = HOST_REBOOT_HALT;
293 }
294
295 if (howto & RB_UPSDELAY) {
296 hostboot_option = HOST_REBOOT_UPSDELAY;
297 }
298
299 host_reboot(host_priv_self(), hostboot_option);
300 /*
301 * should not be reached
302 */
303 return 0;
304 }
305
306 static int
307 sd_openlog(vfs_context_t ctx)
308 {
309 int error = 0;
310 struct timeval tv;
311
312 /* Open shutdown log */
313 if ((error = vnode_open(PROC_SHUTDOWN_LOG, (O_CREAT | FWRITE | O_NOFOLLOW), 0644, 0, &sd_logvp, ctx))) {
314 printf("Failed to open %s: error %d\n", PROC_SHUTDOWN_LOG, error);
315 sd_logvp = NULLVP;
316 return error;
317 }
318
319 vnode_setsize(sd_logvp, (off_t)0, 0, ctx);
320
321 /* Write a little header */
322 microtime(&tv);
323 sd_log(ctx, "Process shutdown log. Current time is %lu (in seconds).\n\n", tv.tv_sec);
324
325 return 0;
326 }
327
328 static int
329 sd_closelog(vfs_context_t ctx)
330 {
331 int error = 0;
332 if (sd_logvp != NULLVP) {
333 VNOP_FSYNC(sd_logvp, MNT_WAIT, ctx);
334 error = vnode_close(sd_logvp, FWRITE, ctx);
335 }
336
337 return error;
338 }
339
340 static void
341 sd_log(vfs_context_t ctx, const char *fmt, ...)
342 {
343 int resid, log_error, len;
344 char logbuf[100];
345 va_list arglist;
346
347 /* If the log isn't open yet, open it */
348 if (sd_logvp == NULLVP) {
349 if (sd_openlog(ctx) != 0) {
350 /* Couldn't open, we fail out */
351 return;
352 }
353 }
354
355 va_start(arglist, fmt);
356 len = vsnprintf(logbuf, sizeof(logbuf), fmt, arglist);
357 log_error = vn_rdwr(UIO_WRITE, sd_logvp, (caddr_t)logbuf, len, sd_log_offset,
358 UIO_SYSSPACE, IO_UNIT | IO_NOAUTH, vfs_context_ucred(ctx), &resid, vfs_context_proc(ctx));
359 if (log_error == EIO || log_error == 0) {
360 sd_log_offset += (len - resid);
361 }
362
363 va_end(arglist);
364 }
365
366 static int
367 sd_filt1(proc_t p, void * args)
368 {
369 proc_t self = current_proc();
370 struct sd_filterargs * sf = (struct sd_filterargs *)args;
371 int delayterm = sf->delayterm;
372 int shutdownstate = sf->shutdownstate;
373
374 if (((p->p_flag & P_SYSTEM) != 0) || (p->p_ppid == 0)
375 || (p == self) || (p->p_stat == SZOMB)
376 || (p->p_shutdownstate != shutdownstate)
377 || ((delayterm == 0) && ((p->p_lflag & P_LDELAYTERM) == P_LDELAYTERM))
378 || ((p->p_sigcatch & sigmask(SIGTERM)) == 0)) {
379 return 0;
380 } else {
381 return 1;
382 }
383 }
384
385
386 static int
387 sd_callback1(proc_t p, void * args)
388 {
389 struct sd_iterargs * sd = (struct sd_iterargs *)args;
390 int signo = sd->signo;
391 int setsdstate = sd->setsdstate;
392 int countproc = sd->countproc;
393
394 proc_lock(p);
395 p->p_shutdownstate = setsdstate;
396 if (p->p_stat != SZOMB) {
397 proc_unlock(p);
398 if (countproc != 0) {
399 proc_list_lock();
400 p->p_listflag |= P_LIST_EXITCOUNT;
401 proc_shutdown_exitcount++;
402 proc_list_unlock();
403 }
404
405 psignal(p, signo);
406 if (countproc != 0) {
407 sd->activecount++;
408 }
409 } else {
410 proc_unlock(p);
411 }
412
413 return PROC_RETURNED;
414 }
415
416 static int
417 sd_filt2(proc_t p, void * args)
418 {
419 proc_t self = current_proc();
420 struct sd_filterargs * sf = (struct sd_filterargs *)args;
421 int delayterm = sf->delayterm;
422 int shutdownstate = sf->shutdownstate;
423
424 if (((p->p_flag & P_SYSTEM) != 0) || (p->p_ppid == 0)
425 || (p == self) || (p->p_stat == SZOMB)
426 || (p->p_shutdownstate == shutdownstate)
427 || ((delayterm == 0) && ((p->p_lflag & P_LDELAYTERM) == P_LDELAYTERM))) {
428 return 0;
429 } else {
430 return 1;
431 }
432 }
433
434 static int
435 sd_callback2(proc_t p, void * args)
436 {
437 struct sd_iterargs * sd = (struct sd_iterargs *)args;
438 int signo = sd->signo;
439 int setsdstate = sd->setsdstate;
440 int countproc = sd->countproc;
441
442 proc_lock(p);
443 p->p_shutdownstate = setsdstate;
444 if (p->p_stat != SZOMB) {
445 proc_unlock(p);
446 if (countproc != 0) {
447 proc_list_lock();
448 p->p_listflag |= P_LIST_EXITCOUNT;
449 proc_shutdown_exitcount++;
450 proc_list_unlock();
451 }
452 psignal(p, signo);
453 if (countproc != 0) {
454 sd->activecount++;
455 }
456 } else {
457 proc_unlock(p);
458 }
459
460 return PROC_RETURNED;
461 }
462
463 static int
464 sd_callback3(proc_t p, void * args)
465 {
466 struct sd_iterargs * sd = (struct sd_iterargs *)args;
467 vfs_context_t ctx = vfs_context_current();
468
469 int setsdstate = sd->setsdstate;
470
471 proc_lock(p);
472 p->p_shutdownstate = setsdstate;
473 if (p->p_stat != SZOMB) {
474 /*
475 * NOTE: following code ignores sig_lock and plays
476 * with exit_thread correctly. This is OK unless we
477 * are a multiprocessor, in which case I do not
478 * understand the sig_lock. This needs to be fixed.
479 * XXX
480 */
481 if (p->exit_thread) { /* someone already doing it */
482 proc_unlock(p);
483 /* give him a chance */
484 thread_block(THREAD_CONTINUE_NULL);
485 } else {
486 p->exit_thread = current_thread();
487 printf(".");
488
489 sd_log(ctx, "%s[%d] had to be forced closed with exit1().\n", p->p_comm, p->p_pid);
490
491 proc_unlock(p);
492 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE,
493 p->p_pid, 0, 1, 0, 0);
494 sd->activecount++;
495 exit1(p, 1, (int *)NULL);
496 }
497 } else {
498 proc_unlock(p);
499 }
500
501 return PROC_RETURNED;
502 }
503
504
505 /*
506 * proc_shutdown()
507 *
508 * Shutdown down proc system (release references to current and root
509 * dirs for each process).
510 *
511 * POSIX modifications:
512 *
513 * For POSIX fcntl() file locking call vno_lockrelease() on
514 * the file to release all of its record locks, if any.
515 */
516
517 static void
518 proc_shutdown(void)
519 {
520 vfs_context_t ctx = vfs_context_current();
521 struct proc *p, *self;
522 int delayterm = 0;
523 struct sd_filterargs sfargs;
524 struct sd_iterargs sdargs;
525 int error = 0;
526 struct timespec ts;
527
528 /*
529 * Kill as many procs as we can. (Except ourself...)
530 */
531 self = (struct proc *)current_proc();
532
533 /*
534 * Signal the init with SIGTERM so that he does not launch
535 * new processes
536 */
537 p = proc_find(1);
538 if (p && p != self) {
539 psignal(p, SIGTERM);
540 }
541 proc_rele(p);
542
543 printf("Killing all processes ");
544
545 sigterm_loop:
546 /*
547 * send SIGTERM to those procs interested in catching one
548 */
549 sfargs.delayterm = delayterm;
550 sfargs.shutdownstate = 0;
551 sdargs.signo = SIGTERM;
552 sdargs.setsdstate = 1;
553 sdargs.countproc = 1;
554 sdargs.activecount = 0;
555
556 error = 0;
557 /* post a SIGTERM to all that catch SIGTERM and not marked for delay */
558 proc_rebootscan(sd_callback1, (void *)&sdargs, sd_filt1, (void *)&sfargs);
559
560 if (sdargs.activecount != 0 && proc_shutdown_exitcount != 0) {
561 proc_list_lock();
562 if (proc_shutdown_exitcount != 0) {
563 /*
564 * now wait for up to 3 seconds to allow those procs catching SIGTERM
565 * to digest it
566 * as soon as these procs have exited, we'll continue on to the next step
567 */
568 ts.tv_sec = 3;
569 ts.tv_nsec = 0;
570 error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
571 if (error != 0) {
572 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
573 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) {
574 p->p_listflag &= ~P_LIST_EXITCOUNT;
575 }
576 }
577 for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
578 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) {
579 p->p_listflag &= ~P_LIST_EXITCOUNT;
580 }
581 }
582 }
583 }
584 proc_list_unlock();
585 }
586 if (error == ETIMEDOUT) {
587 /*
588 * log the names of the unresponsive tasks
589 */
590
591 proc_list_lock();
592
593 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
594 if (p->p_shutdownstate == 1) {
595 printf("%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
596 sd_log(ctx, "%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
597 }
598 }
599
600 proc_list_unlock();
601 }
602
603 /*
604 * send a SIGKILL to all the procs still hanging around
605 */
606 sfargs.delayterm = delayterm;
607 sfargs.shutdownstate = 2;
608 sdargs.signo = SIGKILL;
609 sdargs.setsdstate = 2;
610 sdargs.countproc = 1;
611 sdargs.activecount = 0;
612
613 /* post a SIGKILL to all that catch SIGTERM and not marked for delay */
614 proc_rebootscan(sd_callback2, (void *)&sdargs, sd_filt2, (void *)&sfargs);
615
616 error = 0;
617
618 if (sdargs.activecount != 0 && proc_shutdown_exitcount != 0) {
619 proc_list_lock();
620 if (proc_shutdown_exitcount != 0) {
621 /*
622 * wait for up to 60 seconds to allow these procs to exit normally
623 *
624 * History: The delay interval was changed from 100 to 200
625 * for NFS requests in particular.
626 */
627 ts.tv_sec = 10;
628 ts.tv_nsec = 0;
629 error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
630 if (error != 0) {
631 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
632 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) {
633 p->p_listflag &= ~P_LIST_EXITCOUNT;
634 }
635 }
636 for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
637 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) {
638 p->p_listflag &= ~P_LIST_EXITCOUNT;
639 }
640 }
641 }
642 }
643 proc_list_unlock();
644 }
645
646 if (error == ETIMEDOUT) {
647 /*
648 * log the names of the unresponsive tasks
649 */
650
651 proc_list_lock();
652
653 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
654 if (p->p_shutdownstate == 2) {
655 printf("%s[%d]: didn't act on SIGKILL\n", p->p_comm, p->p_pid);
656 sd_log(ctx, "%s[%d]: didn't act on SIGKILL\n", p->p_comm, p->p_pid);
657 }
658 }
659
660 proc_list_unlock();
661 }
662
663 /*
664 * if we still have procs that haven't exited, then brute force 'em
665 */
666 sfargs.delayterm = delayterm;
667 sfargs.shutdownstate = 3;
668 sdargs.signo = 0;
669 sdargs.setsdstate = 3;
670 sdargs.countproc = 0;
671 sdargs.activecount = 0;
672
673
674
675 /* post a SIGTERM to all that catch SIGTERM and not marked for delay */
676 proc_rebootscan(sd_callback3, (void *)&sdargs, sd_filt2, (void *)&sfargs);
677 printf("\n");
678
679 /* Now start the termination of processes that are marked for delayed termn */
680 if (delayterm == 0) {
681 delayterm = 1;
682 goto sigterm_loop;
683 }
684
685 sd_closelog(ctx);
686
687 /*
688 * Now that all other processes have been terminated, suspend init
689 */
690 task_suspend_internal(initproc->task);
691
692 /* drop the ref on initproc */
693 proc_rele(initproc);
694 printf("continuing\n");
695 }