]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_shutdown.c
xnu-3248.20.55.tar.gz
[apple/xnu.git] / bsd / kern / kern_shutdown.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * File: bsd/kern/kern_shutdown.c
30 *
31 * Copyright (C) 1989, NeXT, Inc.
32 *
33 */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/vm.h>
39 #include <sys/proc_internal.h>
40 #include <sys/user.h>
41 #include <sys/reboot.h>
42 #include <sys/conf.h>
43 #include <sys/vnode_internal.h>
44 #include <sys/file_internal.h>
45 #include <sys/clist.h>
46 #include <sys/callout.h>
47 #include <sys/mbuf.h>
48 #include <sys/msgbuf.h>
49 #include <sys/ioctl.h>
50 #include <sys/signal.h>
51 #include <sys/tty.h>
52 #include <kern/task.h>
53 #include <sys/quota.h>
54 #include <vm/vm_kern.h>
55 #include <mach/vm_param.h>
56 #include <sys/filedesc.h>
57 #include <mach/host_priv.h>
58 #include <mach/host_reboot.h>
59
60 #include <security/audit/audit.h>
61
62 #include <kern/sched_prim.h> /* for thread_block() */
63 #include <kern/host.h> /* for host_priv_self() */
64 #include <net/if_var.h> /* for if_down_all() */
65 #include <sys/buf_internal.h> /* for count_busy_buffers() */
66 #include <sys/mount_internal.h> /* for vfs_unmountall() */
67 #include <mach/task.h> /* for task_suspend() */
68 #include <sys/sysproto.h> /* abused for sync() */
69 #include <kern/clock.h> /* for delay_for_interval() */
70 #include <libkern/OSAtomic.h>
71
72 #include <sys/kdebug.h>
73
74 uint32_t system_inshutdown = 0;
75
76 /* XXX should be in a header file somewhere, but isn't */
77 extern void (*unmountroot_pre_hook)(void);
78
79 unsigned int proc_shutdown_exitcount = 0;
80
81 static int sd_openlog(vfs_context_t);
82 static int sd_closelog(vfs_context_t);
83 static void sd_log(vfs_context_t, const char *, ...);
84 static void proc_shutdown(void);
85 static void kernel_hwm_panic_info(void);
86 extern void IOSystemShutdownNotification(void);
87
88 struct sd_filterargs{
89 int delayterm;
90 int shutdownstate;
91 };
92
93
94 struct sd_iterargs {
95 int signo; /* the signal to be posted */
96 int setsdstate; /* shutdown state to be set */
97 int countproc; /* count processes on action */
98 int activecount; /* number of processes on which action was done */
99 };
100
101 static vnode_t sd_logvp = NULLVP;
102 static off_t sd_log_offset = 0;
103
104
105 static int sd_filt1(proc_t, void *);
106 static int sd_filt2(proc_t, void *);
107 static int sd_callback1(proc_t p, void * arg);
108 static int sd_callback2(proc_t p, void * arg);
109 static int sd_callback3(proc_t p, void * arg);
110
111 extern boolean_t panic_include_zprint;
112 extern vm_offset_t panic_kext_memory_info;
113 extern vm_size_t panic_kext_memory_size;
114
115 static void
116 kernel_hwm_panic_info(void)
117 {
118 mach_memory_info_t *memory_info;
119 unsigned int num_sites;
120 kern_return_t kr;
121
122 panic_include_zprint = TRUE;
123 panic_kext_memory_info = 0;
124 panic_kext_memory_size = 0;
125
126 num_sites = VM_KERN_MEMORY_COUNT + VM_KERN_COUNTER_COUNT;
127 panic_kext_memory_size = round_page(num_sites * sizeof(mach_zone_info_t));
128
129 kr = kmem_alloc(kernel_map, (vm_offset_t *) &panic_kext_memory_info, panic_kext_memory_size, VM_KERN_MEMORY_OSFMK);
130 if (kr != KERN_SUCCESS) {
131 panic_kext_memory_info = 0;
132 return;
133 }
134 memory_info = (mach_memory_info_t *)panic_kext_memory_info;
135 vm_page_diagnose(memory_info, num_sites);
136 return;
137 }
138
139 int
140 reboot_kernel(int howto, char *message)
141 {
142 int hostboot_option=0;
143
144 if (!OSCompareAndSwap(0, 1, &system_inshutdown)) {
145 if ( (howto&RB_QUICK) == RB_QUICK)
146 goto force_reboot;
147 return (EBUSY);
148 }
149 /*
150 * Temporary hack to notify the power management root domain
151 * that the system will shut down.
152 */
153 IOSystemShutdownNotification();
154
155 if ((howto&RB_QUICK)==RB_QUICK) {
156 printf("Quick reboot...\n");
157 if ((howto&RB_NOSYNC)==0) {
158 sync((proc_t)NULL, (void *)NULL, (int *)NULL);
159 }
160 }
161 else if ((howto&RB_NOSYNC)==0) {
162 int iter, nbusy;
163
164 printf("syncing disks... ");
165
166 /*
167 * Release vnodes held by texts before sync.
168 */
169
170 /* handle live procs (deallocate their root and current directories), suspend initproc */
171 proc_shutdown();
172
173 #if CONFIG_AUDIT
174 audit_shutdown();
175 #endif
176
177 if (unmountroot_pre_hook != NULL)
178 unmountroot_pre_hook();
179
180 sync((proc_t)NULL, (void *)NULL, (int *)NULL);
181
182 if (kdebug_enable)
183 kdbg_dump_trace_to_file("/var/log/shutdown/shutdown.trace");
184
185 /*
186 * Unmount filesystems
187 */
188 vfs_unmountall();
189
190 /* Wait for the buffer cache to clean remaining dirty buffers */
191 for (iter = 0; iter < 100; iter++) {
192 nbusy = count_busy_buffers();
193 if (nbusy == 0)
194 break;
195 printf("%d ", nbusy);
196 delay_for_interval( 1 * nbusy, 1000 * 1000);
197 }
198 if (nbusy)
199 printf("giving up\n");
200 else
201 printf("done\n");
202 }
203 #if NETWORKING
204 /*
205 * Can't just use an splnet() here to disable the network
206 * because that will lock out softints which the disk
207 * drivers depend on to finish DMAs.
208 */
209 if_down_all();
210 #endif /* NETWORKING */
211
212 force_reboot:
213
214 if (howto & RB_PANIC) {
215 if (strncmp(message, "Kernel memory has exceeded limits", 33) == 0) {
216 kernel_hwm_panic_info();
217 }
218 panic ("userspace panic: %s", message);
219 }
220
221 if (howto & RB_POWERDOWN)
222 hostboot_option = HOST_REBOOT_HALT;
223 if (howto & RB_HALT)
224 hostboot_option = HOST_REBOOT_HALT;
225
226 if (howto & RB_UPSDELAY) {
227 hostboot_option = HOST_REBOOT_UPSDELAY;
228 }
229
230 host_reboot(host_priv_self(), hostboot_option);
231 /*
232 * should not be reached
233 */
234 return (0);
235 }
236
237 static int
238 sd_openlog(vfs_context_t ctx)
239 {
240 int error = 0;
241 struct timeval tv;
242
243 /* Open shutdown log */
244 if ((error = vnode_open(PROC_SHUTDOWN_LOG, (O_CREAT | FWRITE | O_NOFOLLOW), 0644, 0, &sd_logvp, ctx))) {
245 printf("Failed to open %s: error %d\n", PROC_SHUTDOWN_LOG, error);
246 sd_logvp = NULLVP;
247 return error;
248 }
249
250 vnode_setsize(sd_logvp, (off_t)0, 0, ctx);
251
252 /* Write a little header */
253 microtime(&tv);
254 sd_log(ctx, "Process shutdown log. Current time is %lu (in seconds).\n\n", tv.tv_sec);
255
256 return 0;
257 }
258
259 static int
260 sd_closelog(vfs_context_t ctx)
261 {
262 int error = 0;
263 if (sd_logvp != NULLVP) {
264 VNOP_FSYNC(sd_logvp, MNT_WAIT, ctx);
265 error = vnode_close(sd_logvp, FWRITE, ctx);
266 }
267
268 return error;
269 }
270
271 static void
272 sd_log(vfs_context_t ctx, const char *fmt, ...)
273 {
274 int resid, log_error, len;
275 char logbuf[100];
276 va_list arglist;
277
278 /* If the log isn't open yet, open it */
279 if (sd_logvp == NULLVP) {
280 if (sd_openlog(ctx) != 0) {
281 /* Couldn't open, we fail out */
282 return;
283 }
284 }
285
286 va_start(arglist, fmt);
287 len = vsnprintf(logbuf, sizeof(logbuf), fmt, arglist);
288 log_error = vn_rdwr(UIO_WRITE, sd_logvp, (caddr_t)logbuf, len, sd_log_offset,
289 UIO_SYSSPACE, IO_UNIT | IO_NOAUTH, vfs_context_ucred(ctx), &resid, vfs_context_proc(ctx));
290 if (log_error == EIO || log_error == 0) {
291 sd_log_offset += (len - resid);
292 }
293
294 va_end(arglist);
295
296 }
297
298 static int
299 sd_filt1(proc_t p, void * args)
300 {
301 proc_t self = current_proc();
302 struct sd_filterargs * sf = (struct sd_filterargs *)args;
303 int delayterm = sf-> delayterm;
304 int shutdownstate = sf->shutdownstate;
305
306 if (((p->p_flag&P_SYSTEM) != 0) || (p->p_ppid == 0)
307 ||(p == self) || (p->p_stat == SZOMB)
308 || (p->p_shutdownstate != shutdownstate)
309 ||((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM))
310 || ((p->p_sigcatch & sigmask(SIGTERM))== 0)) {
311 return(0);
312 }
313 else
314 return(1);
315 }
316
317
318 static int
319 sd_callback1(proc_t p, void * args)
320 {
321 struct sd_iterargs * sd = (struct sd_iterargs *)args;
322 int signo = sd->signo;
323 int setsdstate = sd->setsdstate;
324 int countproc = sd->countproc;
325
326 proc_lock(p);
327 p->p_shutdownstate = setsdstate;
328 if (p->p_stat != SZOMB) {
329 proc_unlock(p);
330 if (countproc != 0) {
331 proc_list_lock();
332 p->p_listflag |= P_LIST_EXITCOUNT;
333 proc_shutdown_exitcount++;
334 proc_list_unlock();
335 }
336
337 psignal(p, signo);
338 if (countproc != 0)
339 sd->activecount++;
340 } else
341 proc_unlock(p);
342 return(PROC_RETURNED);
343 }
344
345 static int
346 sd_filt2(proc_t p, void * args)
347 {
348 proc_t self = current_proc();
349 struct sd_filterargs * sf = (struct sd_filterargs *)args;
350 int delayterm = sf-> delayterm;
351 int shutdownstate = sf->shutdownstate;
352
353 if (((p->p_flag&P_SYSTEM) != 0) || (p->p_ppid == 0)
354 ||(p == self) || (p->p_stat == SZOMB)
355 || (p->p_shutdownstate == shutdownstate)
356 ||((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM))) {
357 return(0);
358 }
359 else
360 return(1);
361 }
362
363 static int
364 sd_callback2(proc_t p, void * args)
365 {
366 struct sd_iterargs * sd = (struct sd_iterargs *)args;
367 int signo = sd->signo;
368 int setsdstate = sd->setsdstate;
369 int countproc = sd->countproc;
370
371 proc_lock(p);
372 p->p_shutdownstate = setsdstate;
373 if (p->p_stat != SZOMB) {
374 proc_unlock(p);
375 if (countproc != 0) {
376 proc_list_lock();
377 p->p_listflag |= P_LIST_EXITCOUNT;
378 proc_shutdown_exitcount++;
379 proc_list_unlock();
380 }
381 psignal(p, signo);
382 if (countproc != 0)
383 sd->activecount++;
384 } else
385 proc_unlock(p);
386
387 return(PROC_RETURNED);
388
389 }
390
391 static int
392 sd_callback3(proc_t p, void * args)
393 {
394 struct sd_iterargs * sd = (struct sd_iterargs *)args;
395 vfs_context_t ctx = vfs_context_current();
396
397 int setsdstate = sd->setsdstate;
398
399 proc_lock(p);
400 p->p_shutdownstate = setsdstate;
401 if (p->p_stat != SZOMB) {
402 /*
403 * NOTE: following code ignores sig_lock and plays
404 * with exit_thread correctly. This is OK unless we
405 * are a multiprocessor, in which case I do not
406 * understand the sig_lock. This needs to be fixed.
407 * XXX
408 */
409 if (p->exit_thread) { /* someone already doing it */
410 proc_unlock(p);
411 /* give him a chance */
412 thread_block(THREAD_CONTINUE_NULL);
413 } else {
414 p->exit_thread = current_thread();
415 printf(".");
416
417 sd_log(ctx, "%s[%d] had to be forced closed with exit1().\n", p->p_comm, p->p_pid);
418
419 proc_unlock(p);
420 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE,
421 p->p_pid, 0, 1, 0, 0);
422 sd->activecount++;
423 exit1(p, 1, (int *)NULL);
424 }
425 } else
426 proc_unlock(p);
427
428 return(PROC_RETURNED);
429 }
430
431
432 /*
433 * proc_shutdown()
434 *
435 * Shutdown down proc system (release references to current and root
436 * dirs for each process).
437 *
438 * POSIX modifications:
439 *
440 * For POSIX fcntl() file locking call vno_lockrelease() on
441 * the file to release all of its record locks, if any.
442 */
443
444 static void
445 proc_shutdown(void)
446 {
447 vfs_context_t ctx = vfs_context_current();
448 struct proc *p, *self;
449 int delayterm = 0;
450 struct sd_filterargs sfargs;
451 struct sd_iterargs sdargs;
452 int error = 0;
453 struct timespec ts;
454
455 /*
456 * Kill as many procs as we can. (Except ourself...)
457 */
458 self = (struct proc *)current_proc();
459
460 /*
461 * Signal the init with SIGTERM so that he does not launch
462 * new processes
463 */
464 p = proc_find(1);
465 if (p && p != self) {
466 psignal(p, SIGTERM);
467 }
468 proc_rele(p);
469
470 printf("Killing all processes ");
471
472 sigterm_loop:
473 /*
474 * send SIGTERM to those procs interested in catching one
475 */
476 sfargs.delayterm = delayterm;
477 sfargs.shutdownstate = 0;
478 sdargs.signo = SIGTERM;
479 sdargs.setsdstate = 1;
480 sdargs.countproc = 1;
481 sdargs.activecount = 0;
482
483 error = 0;
484 /* post a SIGTERM to all that catch SIGTERM and not marked for delay */
485 proc_rebootscan(sd_callback1, (void *)&sdargs, sd_filt1, (void *)&sfargs);
486
487 if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) {
488 proc_list_lock();
489 if (proc_shutdown_exitcount != 0) {
490 /*
491 * now wait for up to 30 seconds to allow those procs catching SIGTERM
492 * to digest it
493 * as soon as these procs have exited, we'll continue on to the next step
494 */
495 ts.tv_sec = 30;
496 ts.tv_nsec = 0;
497 error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
498 if (error != 0) {
499 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
500 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
501 p->p_listflag &= ~P_LIST_EXITCOUNT;
502 }
503 for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
504 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
505 p->p_listflag &= ~P_LIST_EXITCOUNT;
506 }
507 }
508
509 }
510 proc_list_unlock();
511 }
512 if (error == ETIMEDOUT) {
513 /*
514 * log the names of the unresponsive tasks
515 */
516
517
518 proc_list_lock();
519
520 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
521 if (p->p_shutdownstate == 1) {
522 printf("%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
523 sd_log(ctx, "%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
524 }
525 }
526
527 proc_list_unlock();
528
529 delay_for_interval(1000 * 5, 1000 * 1000);
530 }
531
532 /*
533 * send a SIGKILL to all the procs still hanging around
534 */
535 sfargs.delayterm = delayterm;
536 sfargs.shutdownstate = 2;
537 sdargs.signo = SIGKILL;
538 sdargs.setsdstate = 2;
539 sdargs.countproc = 1;
540 sdargs.activecount = 0;
541
542 /* post a SIGKILL to all that catch SIGTERM and not marked for delay */
543 proc_rebootscan(sd_callback2, (void *)&sdargs, sd_filt2, (void *)&sfargs);
544
545 if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) {
546 proc_list_lock();
547 if (proc_shutdown_exitcount != 0) {
548 /*
549 * wait for up to 60 seconds to allow these procs to exit normally
550 *
551 * History: The delay interval was changed from 100 to 200
552 * for NFS requests in particular.
553 */
554 ts.tv_sec = 60;
555 ts.tv_nsec = 0;
556 error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
557 if (error != 0) {
558 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
559 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
560 p->p_listflag &= ~P_LIST_EXITCOUNT;
561 }
562 for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
563 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
564 p->p_listflag &= ~P_LIST_EXITCOUNT;
565 }
566 }
567 }
568 proc_list_unlock();
569 }
570
571 /*
572 * if we still have procs that haven't exited, then brute force 'em
573 */
574 sfargs.delayterm = delayterm;
575 sfargs.shutdownstate = 3;
576 sdargs.signo = 0;
577 sdargs.setsdstate = 3;
578 sdargs.countproc = 0;
579 sdargs.activecount = 0;
580
581 /* post a SIGTERM to all that catch SIGTERM and not marked for delay */
582 proc_rebootscan(sd_callback3, (void *)&sdargs, sd_filt2, (void *)&sfargs);
583 printf("\n");
584
585 /* Now start the termination of processes that are marked for delayed termn */
586 if (delayterm == 0) {
587 delayterm = 1;
588 goto sigterm_loop;
589 }
590
591 sd_closelog(ctx);
592
593 /*
594 * Now that all other processes have been terminated, suspend init
595 */
596 task_suspend_internal(initproc->task);
597
598 /* drop the ref on initproc */
599 proc_rele(initproc);
600 printf("continuing\n");
601 }
602