]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_shutdown.c
dedb8288c0cc525c92ffa5b8a7eb566dc032b7aa
[apple/xnu.git] / bsd / kern / kern_shutdown.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * File: bsd/kern/kern_shutdown.c
30 *
31 * Copyright (C) 1989, NeXT, Inc.
32 *
33 */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/vm.h>
39 #include <sys/proc_internal.h>
40 #include <sys/user.h>
41 #include <sys/reboot.h>
42 #include <sys/conf.h>
43 #include <sys/vnode_internal.h>
44 #include <sys/file_internal.h>
45 #include <sys/clist.h>
46 #include <sys/callout.h>
47 #include <sys/mbuf.h>
48 #include <sys/msgbuf.h>
49 #include <sys/ioctl.h>
50 #include <sys/signal.h>
51 #include <sys/tty.h>
52 #include <kern/task.h>
53 #include <sys/quota.h>
54 #include <vm/vm_kern.h>
55 #include <mach/vm_param.h>
56 #include <sys/filedesc.h>
57 #include <mach/host_priv.h>
58 #include <mach/host_reboot.h>
59
60 #include <security/audit/audit.h>
61
62 #include <kern/sched_prim.h> /* for thread_block() */
63 #include <kern/host.h> /* for host_priv_self() */
64 #include <net/if_var.h> /* for if_down_all() */
65 #include <sys/buf_internal.h> /* for count_busy_buffers() */
66 #include <sys/mount_internal.h> /* for vfs_unmountall() */
67 #include <mach/task.h> /* for task_suspend() */
68 #include <sys/sysproto.h> /* abused for sync() */
69 #include <kern/clock.h> /* for delay_for_interval() */
70 #include <libkern/OSAtomic.h>
71
72 #include <sys/kdebug.h>
73
74 uint32_t system_inshutdown = 0;
75
76 /* XXX should be in a header file somewhere, but isn't */
77 extern void (*unmountroot_pre_hook)(void);
78
79 unsigned int proc_shutdown_exitcount = 0;
80
81 static int sd_openlog(vfs_context_t);
82 static int sd_closelog(vfs_context_t);
83 static void sd_log(vfs_context_t, const char *, ...);
84 static void proc_shutdown(void);
85 static void kernel_hwm_panic_info(void);
86 extern void IOSystemShutdownNotification(void);
87 #if DEVELOPMENT || DEBUG
88 extern boolean_t kdp_has_polled_corefile(void);
89 #endif /* DEVELOPMENT || DEBUG */
90
91 struct sd_filterargs{
92 int delayterm;
93 int shutdownstate;
94 };
95
96
97 struct sd_iterargs {
98 int signo; /* the signal to be posted */
99 int setsdstate; /* shutdown state to be set */
100 int countproc; /* count processes on action */
101 int activecount; /* number of processes on which action was done */
102 };
103
104 static vnode_t sd_logvp = NULLVP;
105 static off_t sd_log_offset = 0;
106
107
108 static int sd_filt1(proc_t, void *);
109 static int sd_filt2(proc_t, void *);
110 static int sd_callback1(proc_t p, void * arg);
111 static int sd_callback2(proc_t p, void * arg);
112 static int sd_callback3(proc_t p, void * arg);
113
114 extern boolean_t panic_include_zprint;
115 extern vm_offset_t panic_kext_memory_info;
116 extern vm_size_t panic_kext_memory_size;
117
118 static void
119 kernel_hwm_panic_info(void)
120 {
121 mach_memory_info_t *memory_info;
122 unsigned int num_sites;
123 kern_return_t kr;
124
125 panic_include_zprint = TRUE;
126 panic_kext_memory_info = 0;
127 panic_kext_memory_size = 0;
128
129 num_sites = VM_KERN_MEMORY_COUNT + VM_KERN_COUNTER_COUNT;
130 panic_kext_memory_size = round_page(num_sites * sizeof(mach_zone_info_t));
131
132 kr = kmem_alloc(kernel_map, (vm_offset_t *) &panic_kext_memory_info, panic_kext_memory_size, VM_KERN_MEMORY_OSFMK);
133 if (kr != KERN_SUCCESS) {
134 panic_kext_memory_info = 0;
135 return;
136 }
137 memory_info = (mach_memory_info_t *)panic_kext_memory_info;
138 vm_page_diagnose(memory_info, num_sites, 0);
139 return;
140 }
141
142 int
143 reboot_kernel(int howto, char *message)
144 {
145 int hostboot_option=0;
146
147 if (!OSCompareAndSwap(0, 1, &system_inshutdown)) {
148 if ( (howto&RB_QUICK) == RB_QUICK)
149 goto force_reboot;
150 return (EBUSY);
151 }
152 /*
153 * Temporary hack to notify the power management root domain
154 * that the system will shut down.
155 */
156 IOSystemShutdownNotification();
157
158 if ((howto&RB_QUICK)==RB_QUICK) {
159 printf("Quick reboot...\n");
160 if ((howto&RB_NOSYNC)==0) {
161 sync((proc_t)NULL, (void *)NULL, (int *)NULL);
162 }
163 }
164 else if ((howto&RB_NOSYNC)==0) {
165 int iter, nbusy;
166
167 printf("syncing disks... ");
168
169 /*
170 * Release vnodes held by texts before sync.
171 */
172
173 /* handle live procs (deallocate their root and current directories), suspend initproc */
174 proc_shutdown();
175
176 #if CONFIG_AUDIT
177 audit_shutdown();
178 #endif
179
180 if (unmountroot_pre_hook != NULL)
181 unmountroot_pre_hook();
182
183 sync((proc_t)NULL, (void *)NULL, (int *)NULL);
184
185 if (kdebug_enable)
186 kdbg_dump_trace_to_file("/var/log/shutdown/shutdown.trace");
187
188 /*
189 * Unmount filesystems
190 */
191
192 #if DEVELOPMENT || DEBUG
193 if (!(howto & RB_PANIC) || !kdp_has_polled_corefile())
194 #endif /* DEVELOPMENT || DEBUG */
195 {
196 vfs_unmountall();
197 }
198
199 /* Wait for the buffer cache to clean remaining dirty buffers */
200 for (iter = 0; iter < 100; iter++) {
201 nbusy = count_busy_buffers();
202 if (nbusy == 0)
203 break;
204 printf("%d ", nbusy);
205 delay_for_interval( 1 * nbusy, 1000 * 1000);
206 }
207 if (nbusy)
208 printf("giving up\n");
209 else
210 printf("done\n");
211 }
212 #if NETWORKING
213 /*
214 * Can't just use an splnet() here to disable the network
215 * because that will lock out softints which the disk
216 * drivers depend on to finish DMAs.
217 */
218 if_down_all();
219 #endif /* NETWORKING */
220
221 force_reboot:
222
223 if (howto & RB_PANIC) {
224 if (strncmp(message, "Kernel memory has exceeded limits", 33) == 0) {
225 kernel_hwm_panic_info();
226 }
227 panic ("userspace panic: %s", message);
228 }
229
230 if (howto & RB_POWERDOWN)
231 hostboot_option = HOST_REBOOT_HALT;
232 if (howto & RB_HALT)
233 hostboot_option = HOST_REBOOT_HALT;
234
235 if (howto & RB_UPSDELAY) {
236 hostboot_option = HOST_REBOOT_UPSDELAY;
237 }
238
239 host_reboot(host_priv_self(), hostboot_option);
240 /*
241 * should not be reached
242 */
243 return (0);
244 }
245
246 static int
247 sd_openlog(vfs_context_t ctx)
248 {
249 int error = 0;
250 struct timeval tv;
251
252 /* Open shutdown log */
253 if ((error = vnode_open(PROC_SHUTDOWN_LOG, (O_CREAT | FWRITE | O_NOFOLLOW), 0644, 0, &sd_logvp, ctx))) {
254 printf("Failed to open %s: error %d\n", PROC_SHUTDOWN_LOG, error);
255 sd_logvp = NULLVP;
256 return error;
257 }
258
259 vnode_setsize(sd_logvp, (off_t)0, 0, ctx);
260
261 /* Write a little header */
262 microtime(&tv);
263 sd_log(ctx, "Process shutdown log. Current time is %lu (in seconds).\n\n", tv.tv_sec);
264
265 return 0;
266 }
267
268 static int
269 sd_closelog(vfs_context_t ctx)
270 {
271 int error = 0;
272 if (sd_logvp != NULLVP) {
273 VNOP_FSYNC(sd_logvp, MNT_WAIT, ctx);
274 error = vnode_close(sd_logvp, FWRITE, ctx);
275 }
276
277 return error;
278 }
279
280 static void
281 sd_log(vfs_context_t ctx, const char *fmt, ...)
282 {
283 int resid, log_error, len;
284 char logbuf[100];
285 va_list arglist;
286
287 /* If the log isn't open yet, open it */
288 if (sd_logvp == NULLVP) {
289 if (sd_openlog(ctx) != 0) {
290 /* Couldn't open, we fail out */
291 return;
292 }
293 }
294
295 va_start(arglist, fmt);
296 len = vsnprintf(logbuf, sizeof(logbuf), fmt, arglist);
297 log_error = vn_rdwr(UIO_WRITE, sd_logvp, (caddr_t)logbuf, len, sd_log_offset,
298 UIO_SYSSPACE, IO_UNIT | IO_NOAUTH, vfs_context_ucred(ctx), &resid, vfs_context_proc(ctx));
299 if (log_error == EIO || log_error == 0) {
300 sd_log_offset += (len - resid);
301 }
302
303 va_end(arglist);
304
305 }
306
307 static int
308 sd_filt1(proc_t p, void * args)
309 {
310 proc_t self = current_proc();
311 struct sd_filterargs * sf = (struct sd_filterargs *)args;
312 int delayterm = sf-> delayterm;
313 int shutdownstate = sf->shutdownstate;
314
315 if (((p->p_flag&P_SYSTEM) != 0) || (p->p_ppid == 0)
316 ||(p == self) || (p->p_stat == SZOMB)
317 || (p->p_shutdownstate != shutdownstate)
318 ||((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM))
319 || ((p->p_sigcatch & sigmask(SIGTERM))== 0)) {
320 return(0);
321 }
322 else
323 return(1);
324 }
325
326
327 static int
328 sd_callback1(proc_t p, void * args)
329 {
330 struct sd_iterargs * sd = (struct sd_iterargs *)args;
331 int signo = sd->signo;
332 int setsdstate = sd->setsdstate;
333 int countproc = sd->countproc;
334
335 proc_lock(p);
336 p->p_shutdownstate = setsdstate;
337 if (p->p_stat != SZOMB) {
338 proc_unlock(p);
339 if (countproc != 0) {
340 proc_list_lock();
341 p->p_listflag |= P_LIST_EXITCOUNT;
342 proc_shutdown_exitcount++;
343 proc_list_unlock();
344 }
345
346 psignal(p, signo);
347 if (countproc != 0)
348 sd->activecount++;
349 } else {
350 proc_unlock(p);
351 }
352
353 return PROC_RETURNED;
354 }
355
356 static int
357 sd_filt2(proc_t p, void * args)
358 {
359 proc_t self = current_proc();
360 struct sd_filterargs * sf = (struct sd_filterargs *)args;
361 int delayterm = sf-> delayterm;
362 int shutdownstate = sf->shutdownstate;
363
364 if (((p->p_flag&P_SYSTEM) != 0) || (p->p_ppid == 0)
365 ||(p == self) || (p->p_stat == SZOMB)
366 || (p->p_shutdownstate == shutdownstate)
367 ||((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM))) {
368 return(0);
369 }
370 else
371 return(1);
372 }
373
374 static int
375 sd_callback2(proc_t p, void * args)
376 {
377 struct sd_iterargs * sd = (struct sd_iterargs *)args;
378 int signo = sd->signo;
379 int setsdstate = sd->setsdstate;
380 int countproc = sd->countproc;
381
382 proc_lock(p);
383 p->p_shutdownstate = setsdstate;
384 if (p->p_stat != SZOMB) {
385 proc_unlock(p);
386 if (countproc != 0) {
387 proc_list_lock();
388 p->p_listflag |= P_LIST_EXITCOUNT;
389 proc_shutdown_exitcount++;
390 proc_list_unlock();
391 }
392 psignal(p, signo);
393 if (countproc != 0)
394 sd->activecount++;
395 } else {
396 proc_unlock(p);
397 }
398
399 return PROC_RETURNED;
400 }
401
402 static int
403 sd_callback3(proc_t p, void * args)
404 {
405 struct sd_iterargs * sd = (struct sd_iterargs *)args;
406 vfs_context_t ctx = vfs_context_current();
407
408 int setsdstate = sd->setsdstate;
409
410 proc_lock(p);
411 p->p_shutdownstate = setsdstate;
412 if (p->p_stat != SZOMB) {
413 /*
414 * NOTE: following code ignores sig_lock and plays
415 * with exit_thread correctly. This is OK unless we
416 * are a multiprocessor, in which case I do not
417 * understand the sig_lock. This needs to be fixed.
418 * XXX
419 */
420 if (p->exit_thread) { /* someone already doing it */
421 proc_unlock(p);
422 /* give him a chance */
423 thread_block(THREAD_CONTINUE_NULL);
424 } else {
425 p->exit_thread = current_thread();
426 printf(".");
427
428 sd_log(ctx, "%s[%d] had to be forced closed with exit1().\n", p->p_comm, p->p_pid);
429
430 proc_unlock(p);
431 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE,
432 p->p_pid, 0, 1, 0, 0);
433 sd->activecount++;
434 exit1(p, 1, (int *)NULL);
435 }
436 } else {
437 proc_unlock(p);
438 }
439
440 return PROC_RETURNED;
441 }
442
443
444 /*
445 * proc_shutdown()
446 *
447 * Shutdown down proc system (release references to current and root
448 * dirs for each process).
449 *
450 * POSIX modifications:
451 *
452 * For POSIX fcntl() file locking call vno_lockrelease() on
453 * the file to release all of its record locks, if any.
454 */
455
456 static void
457 proc_shutdown(void)
458 {
459 vfs_context_t ctx = vfs_context_current();
460 struct proc *p, *self;
461 int delayterm = 0;
462 struct sd_filterargs sfargs;
463 struct sd_iterargs sdargs;
464 int error = 0;
465 struct timespec ts;
466
467 /*
468 * Kill as many procs as we can. (Except ourself...)
469 */
470 self = (struct proc *)current_proc();
471
472 /*
473 * Signal the init with SIGTERM so that he does not launch
474 * new processes
475 */
476 p = proc_find(1);
477 if (p && p != self) {
478 psignal(p, SIGTERM);
479 }
480 proc_rele(p);
481
482 printf("Killing all processes ");
483
484 sigterm_loop:
485 /*
486 * send SIGTERM to those procs interested in catching one
487 */
488 sfargs.delayterm = delayterm;
489 sfargs.shutdownstate = 0;
490 sdargs.signo = SIGTERM;
491 sdargs.setsdstate = 1;
492 sdargs.countproc = 1;
493 sdargs.activecount = 0;
494
495 error = 0;
496 /* post a SIGTERM to all that catch SIGTERM and not marked for delay */
497 proc_rebootscan(sd_callback1, (void *)&sdargs, sd_filt1, (void *)&sfargs);
498
499 if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) {
500 proc_list_lock();
501 if (proc_shutdown_exitcount != 0) {
502 /*
503 * now wait for up to 30 seconds to allow those procs catching SIGTERM
504 * to digest it
505 * as soon as these procs have exited, we'll continue on to the next step
506 */
507 ts.tv_sec = 30;
508 ts.tv_nsec = 0;
509 error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
510 if (error != 0) {
511 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
512 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
513 p->p_listflag &= ~P_LIST_EXITCOUNT;
514 }
515 for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
516 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
517 p->p_listflag &= ~P_LIST_EXITCOUNT;
518 }
519 }
520
521 }
522 proc_list_unlock();
523 }
524 if (error == ETIMEDOUT) {
525 /*
526 * log the names of the unresponsive tasks
527 */
528
529
530 proc_list_lock();
531
532 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
533 if (p->p_shutdownstate == 1) {
534 printf("%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
535 sd_log(ctx, "%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
536 }
537 }
538
539 proc_list_unlock();
540
541 delay_for_interval(1000 * 5, 1000 * 1000);
542 }
543
544 /*
545 * send a SIGKILL to all the procs still hanging around
546 */
547 sfargs.delayterm = delayterm;
548 sfargs.shutdownstate = 2;
549 sdargs.signo = SIGKILL;
550 sdargs.setsdstate = 2;
551 sdargs.countproc = 1;
552 sdargs.activecount = 0;
553
554 /* post a SIGKILL to all that catch SIGTERM and not marked for delay */
555 proc_rebootscan(sd_callback2, (void *)&sdargs, sd_filt2, (void *)&sfargs);
556
557 if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) {
558 proc_list_lock();
559 if (proc_shutdown_exitcount != 0) {
560 /*
561 * wait for up to 60 seconds to allow these procs to exit normally
562 *
563 * History: The delay interval was changed from 100 to 200
564 * for NFS requests in particular.
565 */
566 ts.tv_sec = 60;
567 ts.tv_nsec = 0;
568 error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
569 if (error != 0) {
570 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
571 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
572 p->p_listflag &= ~P_LIST_EXITCOUNT;
573 }
574 for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
575 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
576 p->p_listflag &= ~P_LIST_EXITCOUNT;
577 }
578 }
579 }
580 proc_list_unlock();
581 }
582
583 /*
584 * if we still have procs that haven't exited, then brute force 'em
585 */
586 sfargs.delayterm = delayterm;
587 sfargs.shutdownstate = 3;
588 sdargs.signo = 0;
589 sdargs.setsdstate = 3;
590 sdargs.countproc = 0;
591 sdargs.activecount = 0;
592
593 /* post a SIGTERM to all that catch SIGTERM and not marked for delay */
594 proc_rebootscan(sd_callback3, (void *)&sdargs, sd_filt2, (void *)&sfargs);
595 printf("\n");
596
597 /* Now start the termination of processes that are marked for delayed termn */
598 if (delayterm == 0) {
599 delayterm = 1;
600 goto sigterm_loop;
601 }
602
603 sd_closelog(ctx);
604
605 /*
606 * Now that all other processes have been terminated, suspend init
607 */
608 task_suspend_internal(initproc->task);
609
610 /* drop the ref on initproc */
611 proc_rele(initproc);
612 printf("continuing\n");
613 }
614