2  * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  29  *      File:   bsd/kern/kern_shutdown.c 
  31  *      Copyright (C) 1989, NeXT, Inc. 
  35 #include <sys/param.h> 
  36 #include <sys/systm.h> 
  37 #include <sys/kernel.h> 
  39 #include <sys/proc_internal.h> 
  41 #include <sys/reboot.h> 
  43 #include <sys/vnode_internal.h> 
  44 #include <sys/file_internal.h> 
  46 #include <sys/msgbuf.h> 
  47 #include <sys/ioctl.h> 
  48 #include <sys/signal.h> 
  50 #include <kern/task.h> 
  51 #include <sys/quota.h> 
  52 #include <vm/vm_kern.h> 
  53 #include <mach/vm_param.h> 
  54 #include <sys/filedesc.h> 
  55 #include <mach/host_priv.h> 
  56 #include <mach/host_reboot.h> 
  58 #include <security/audit/audit.h> 
  60 #include <kern/sched_prim.h>            /* for thread_block() */ 
  61 #include <kern/host.h>                  /* for host_priv_self() */ 
  62 #include <net/if_var.h>                 /* for if_down_all() */ 
  63 #include <sys/buf_internal.h>           /* for count_busy_buffers() */ 
  64 #include <sys/mount_internal.h>         /* for vfs_unmountall() */ 
  65 #include <mach/task.h>                  /* for task_suspend() */ 
  66 #include <sys/sysproto.h>               /* abused for sync() */ 
  67 #include <kern/clock.h>                 /* for delay_for_interval() */ 
  68 #include <libkern/OSAtomic.h> 
  69 #include <IOKit/IOPlatformExpert.h> 
  70 #include <IOKit/IOMessage.h> 
  72 #include <sys/kdebug.h> 
  74 uint32_t system_inshutdown 
= 0; 
  77 /* XXX should be in a header file somewhere, but isn't */ 
  78 extern void (*unmountroot_pre_hook
)(void); 
  81 unsigned int proc_shutdown_exitcount 
= 0; 
  83 static int  sd_openlog(vfs_context_t
); 
  84 static int  sd_closelog(vfs_context_t
); 
  85 static void sd_log(vfs_context_t
, const char *, ...); 
  86 static void proc_shutdown(int only_non_dext
); 
  87 static void zprint_panic_info(void); 
  88 extern void halt_log_enter(const char * what
, const void * pc
, uint64_t time
); 
  90 #if DEVELOPMENT || DEBUG 
  91 extern boolean_t 
kdp_has_polled_corefile(void); 
  92 #endif /* DEVELOPMENT || DEBUG */ 
  94 struct sd_filterargs 
{ 
 102         int signo
;              /* the signal to be posted */ 
 103         int setsdstate
;         /* shutdown state to be set */ 
 104         int countproc
;          /* count processes on action */ 
 105         int activecount
;        /* number of processes on which action was done */ 
 108 static vnode_t sd_logvp 
= NULLVP
; 
 109 static off_t sd_log_offset 
= 0; 
 112 static int sd_filt1(proc_t
, void *); 
 113 static int sd_filt2(proc_t
, void *); 
 114 static int sd_callback1(proc_t p
, void * arg
); 
 115 static int sd_callback2(proc_t p
, void * arg
); 
 116 static int sd_callback3(proc_t p
, void * arg
); 
 118 extern bool panic_include_zprint
; 
 119 extern mach_memory_info_t 
*panic_kext_memory_info
; 
 120 extern vm_size_t panic_kext_memory_size
; 
 123 zprint_panic_info(void) 
 125         unsigned int  num_sites
; 
 128         panic_include_zprint 
= TRUE
; 
 129         panic_kext_memory_info 
= NULL
; 
 130         panic_kext_memory_size 
= 0; 
 132         num_sites 
= vm_page_diagnose_estimate(); 
 133         panic_kext_memory_size 
= num_sites 
* sizeof(panic_kext_memory_info
[0]); 
 135         kr 
= kmem_alloc(kernel_map
, (vm_offset_t 
*)&panic_kext_memory_info
, round_page(panic_kext_memory_size
), VM_KERN_MEMORY_OSFMK
); 
 136         if (kr 
!= KERN_SUCCESS
) { 
 137                 panic_kext_memory_info 
= NULL
; 
 141         vm_page_diagnose(panic_kext_memory_info
, num_sites
, 0); 
 145 get_system_inshutdown() 
 147         return system_inshutdown
; 
 152 panic_kernel(int howto
, char *message
) 
 154         if ((howto 
& RB_PANIC_ZPRINT
) == RB_PANIC_ZPRINT
) { 
 157         panic("userspace panic: %s", message
); 
 160 extern boolean_t compressor_store_stop_compaction
; 
 161 extern lck_mtx_t vm_swap_data_lock
; 
 162 extern int vm_swapfile_create_thread_running
; 
 163 extern int vm_swapfile_gc_thread_running
; 
 166 reboot_kernel(int howto
, char *message
) 
 168         int hostboot_option 
= 0; 
 171         if ((howto 
& (RB_PANIC 
| RB_QUICK
)) == (RB_PANIC 
| RB_QUICK
)) { 
 172                 panic_kernel(howto
, message
); 
 175         if (!OSCompareAndSwap(0, 1, &system_inshutdown
)) { 
 176                 if ((howto 
& RB_QUICK
) == RB_QUICK
) { 
 182         lck_mtx_lock(&vm_swap_data_lock
); 
 184         /* Turn OFF future swapfile reclaimation / compaction etc.*/ 
 185         compressor_store_stop_compaction 
= TRUE
; 
 187         /* wait for any current swapfile work to end */ 
 188         while (vm_swapfile_create_thread_running 
|| vm_swapfile_gc_thread_running
) { 
 189                 assert_wait((event_t
)&compressor_store_stop_compaction
, THREAD_UNINT
); 
 191                 lck_mtx_unlock(&vm_swap_data_lock
); 
 193                 thread_block(THREAD_CONTINUE_NULL
); 
 195                 lck_mtx_lock(&vm_swap_data_lock
); 
 198         lck_mtx_unlock(&vm_swap_data_lock
); 
 201          * Notify the power management root domain that the system will shut down. 
 203         IOSystemShutdownNotification(kIOSystemShutdownNotificationStageProcessExit
); 
 205         if ((howto 
& RB_QUICK
) == RB_QUICK
) { 
 206                 printf("Quick reboot...\n"); 
 207                 if ((howto 
& RB_NOSYNC
) == 0) { 
 208                         sync((proc_t
)NULL
, (void *)NULL
, (int *)NULL
); 
 210         } else if ((howto 
& RB_NOSYNC
) == 0) { 
 213                 printf("syncing disks... "); 
 216                  * Release vnodes held by texts before sync. 
 219                 /* handle live procs (deallocate their root and current directories), suspend initproc */ 
 221                 startTime 
= mach_absolute_time(); 
 223                 halt_log_enter("proc_shutdown", 0, mach_absolute_time() - startTime
); 
 226                 startTime 
= mach_absolute_time(); 
 228                 halt_log_enter("audit_shutdown", 0, mach_absolute_time() - startTime
); 
 231 #if XNU_TARGET_OS_OSX 
 232                 if (unmountroot_pre_hook 
!= NULL
) { 
 233                         unmountroot_pre_hook(); 
 237                 startTime 
= mach_absolute_time(); 
 238                 sync((proc_t
)NULL
, (void *)NULL
, (int *)NULL
); 
 241                         startTime 
= mach_absolute_time(); 
 242                         kdbg_dump_trace_to_file("/var/log/shutdown/shutdown.trace"); 
 243                         halt_log_enter("shutdown.trace", 0, mach_absolute_time() - startTime
); 
 246                 IOSystemShutdownNotification(kIOSystemShutdownNotificationStageRootUnmount
); 
 249                  * Unmount filesystems 
 252 #if DEVELOPMENT || DEBUG 
 253                 if (!(howto 
& RB_PANIC
) || !kdp_has_polled_corefile()) 
 254 #endif /* DEVELOPMENT || DEBUG */ 
 256                         startTime 
= mach_absolute_time(); 
 257                         vfs_unmountall(TRUE
); 
 258                         halt_log_enter("vfs_unmountall", 0, mach_absolute_time() - startTime
); 
 261                 IOSystemShutdownNotification(kIOSystemShutdownNotificationTerminateDEXTs
); 
 263                 startTime 
= mach_absolute_time(); 
 264                 proc_shutdown(FALSE
); 
 265                 halt_log_enter("proc_shutdown", 0, mach_absolute_time() - startTime
); 
 267 #if DEVELOPMENT || DEBUG 
 268                 if (!(howto 
& RB_PANIC
) || !kdp_has_polled_corefile()) 
 269 #endif /* DEVELOPMENT || DEBUG */ 
 271                         startTime 
= mach_absolute_time(); 
 272                         vfs_unmountall(FALSE
); 
 273                         halt_log_enter("vfs_unmountall", 0, mach_absolute_time() - startTime
); 
 278                 /* Wait for the buffer cache to clean remaining dirty buffers */ 
 279                 startTime 
= mach_absolute_time(); 
 280                 for (iter 
= 0; iter 
< 100; iter
++) { 
 281                         nbusy 
= count_busy_buffers(); 
 285                         printf("%d ", nbusy
); 
 286                         delay_for_interval( 1 * nbusy
, 1000 * 1000); 
 289                         printf("giving up\n"); 
 293                 halt_log_enter("bufferclean", 0, mach_absolute_time() - startTime
); 
 297          * Can't just use an splnet() here to disable the network 
 298          * because that will lock out softints which the disk 
 299          * drivers depend on to finish DMAs. 
 301         startTime 
= mach_absolute_time(); 
 303         halt_log_enter("if_down_all", 0, mach_absolute_time() - startTime
); 
 304 #endif /* NETWORKING */ 
 308         if (howto 
& RB_PANIC
) { 
 309                 panic_kernel(howto
, message
); 
 312         if (howto 
& RB_HALT
) { 
 313                 hostboot_option 
= HOST_REBOOT_HALT
; 
 316         if (howto 
& RB_UPSDELAY
) { 
 317                 hostboot_option 
= HOST_REBOOT_UPSDELAY
; 
 320         host_reboot(host_priv_self(), hostboot_option
); 
 322          * should not be reached 
 328 sd_openlog(vfs_context_t ctx
) 
 333         /* Open shutdown log */ 
 334         if ((error 
= vnode_open(PROC_SHUTDOWN_LOG
, (O_CREAT 
| FWRITE 
| O_NOFOLLOW
), 0644, 0, &sd_logvp
, ctx
))) { 
 335                 printf("Failed to open %s: error %d\n", PROC_SHUTDOWN_LOG
, error
); 
 340         vnode_setsize(sd_logvp
, (off_t
)0, 0, ctx
); 
 342         /* Write a little header */ 
 344         sd_log(ctx
, "Process shutdown log.  Current time is %lu (in seconds).\n\n", tv
.tv_sec
); 
 350 sd_closelog(vfs_context_t ctx
) 
 353         if (sd_logvp 
!= NULLVP
) { 
 354                 VNOP_FSYNC(sd_logvp
, MNT_WAIT
, ctx
); 
 355                 error 
= vnode_close(sd_logvp
, FWRITE
, ctx
); 
 363 sd_log(vfs_context_t ctx
, const char *fmt
, ...) 
 365         int resid
, log_error
, len
; 
 369         /* If the log isn't open yet, open it */ 
 370         if (sd_logvp 
== NULLVP
) { 
 371                 if (sd_openlog(ctx
) != 0) { 
 372                         /* Couldn't open, we fail out */ 
 377         va_start(arglist
, fmt
); 
 378         len 
= vsnprintf(logbuf
, sizeof(logbuf
), fmt
, arglist
); 
 379         log_error 
= vn_rdwr(UIO_WRITE
, sd_logvp
, (caddr_t
)logbuf
, len
, sd_log_offset
, 
 380             UIO_SYSSPACE
, IO_UNIT 
| IO_NOAUTH
, vfs_context_ucred(ctx
), &resid
, vfs_context_proc(ctx
)); 
 381         if (log_error 
== EIO 
|| log_error 
== 0) { 
 382                 sd_log_offset 
+= (len 
- resid
); 
 388 #define proc_is_driver(p) (task_is_driver((p)->task)) 
 391 sd_filt1(proc_t p
, void * args
) 
 393         proc_t self 
= current_proc(); 
 394         struct sd_filterargs 
* sf 
= (struct sd_filterargs 
*)args
; 
 395         int delayterm 
= sf
->delayterm
; 
 396         int shutdownstate 
= sf
->shutdownstate
; 
 398         if (sf
->only_non_dext 
&& proc_is_driver(p
)) { 
 402         if (((p
->p_flag 
& P_SYSTEM
) != 0) || (p
->p_ppid 
== 0) 
 403             || (p 
== self
) || (p
->p_stat 
== SZOMB
) 
 404             || (p
->p_shutdownstate 
!= shutdownstate
) 
 405             || ((delayterm 
== 0) && ((p
->p_lflag 
& P_LDELAYTERM
) == P_LDELAYTERM
)) 
 406             || ((p
->p_sigcatch 
& sigmask(SIGTERM
)) == 0)) { 
 415 sd_callback1(proc_t p
, void * args
) 
 417         struct sd_iterargs 
* sd 
= (struct sd_iterargs 
*)args
; 
 418         int signo 
= sd
->signo
; 
 419         int setsdstate 
= sd
->setsdstate
; 
 420         int countproc 
= sd
->countproc
; 
 423         p
->p_shutdownstate 
= (char)setsdstate
; 
 424         if (p
->p_stat 
!= SZOMB
) { 
 426                 if (countproc 
!= 0) { 
 428                         p
->p_listflag 
|= P_LIST_EXITCOUNT
; 
 429                         proc_shutdown_exitcount
++; 
 432                 if (proc_is_driver(p
)) { 
 433                         printf("lingering dext %s signal(%d)\n", p
->p_name
, signo
); 
 436                 if (countproc 
!= 0) { 
 443         return PROC_RETURNED
; 
 447 sd_filt2(proc_t p
, void * args
) 
 449         proc_t self 
= current_proc(); 
 450         struct sd_filterargs 
* sf 
= (struct sd_filterargs 
*)args
; 
 451         int delayterm 
= sf
->delayterm
; 
 452         int shutdownstate 
= sf
->shutdownstate
; 
 454         if (sf
->only_non_dext 
&& proc_is_driver(p
)) { 
 458         if (((p
->p_flag 
& P_SYSTEM
) != 0) || (p
->p_ppid 
== 0) 
 459             || (p 
== self
) || (p
->p_stat 
== SZOMB
) 
 460             || (p
->p_shutdownstate 
== shutdownstate
) 
 461             || ((delayterm 
== 0) && ((p
->p_lflag 
& P_LDELAYTERM
) == P_LDELAYTERM
))) { 
 469 sd_callback2(proc_t p
, void * args
) 
 471         struct sd_iterargs 
* sd 
= (struct sd_iterargs 
*)args
; 
 472         int signo 
= sd
->signo
; 
 473         int setsdstate 
= sd
->setsdstate
; 
 474         int countproc 
= sd
->countproc
; 
 477         p
->p_shutdownstate 
= (char)setsdstate
; 
 478         if (p
->p_stat 
!= SZOMB
) { 
 480                 if (countproc 
!= 0) { 
 482                         p
->p_listflag 
|= P_LIST_EXITCOUNT
; 
 483                         proc_shutdown_exitcount
++; 
 486                 if (proc_is_driver(p
)) { 
 487                         printf("lingering dext %s signal(%d)\n", p
->p_name
, signo
); 
 490                 if (countproc 
!= 0) { 
 497         return PROC_RETURNED
; 
 501 sd_callback3(proc_t p
, void * args
) 
 503         struct sd_iterargs 
* sd 
= (struct sd_iterargs 
*)args
; 
 504         vfs_context_t ctx 
= vfs_context_current(); 
 506         int setsdstate 
= sd
->setsdstate
; 
 509         p
->p_shutdownstate 
= (char)setsdstate
; 
 510         if (p
->p_stat 
!= SZOMB
) { 
 512                  * NOTE: following code ignores sig_lock and plays 
 513                  * with exit_thread correctly.  This is OK unless we 
 514                  * are a multiprocessor, in which case I do not 
 515                  * understand the sig_lock.  This needs to be fixed. 
 518                 if (p
->exit_thread
) {   /* someone already doing it */ 
 520                         /* give him a chance */ 
 521                         thread_block(THREAD_CONTINUE_NULL
); 
 523                         p
->exit_thread 
= current_thread(); 
 526                         sd_log(ctx
, "%s[%d] had to be forced closed with exit1().\n", p
->p_comm
, p
->p_pid
); 
 529                         KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC
, BSD_PROC_FRCEXIT
) | DBG_FUNC_NONE
, 
 530                             p
->p_pid
, 0, 1, 0, 0); 
 532                         exit1(p
, 1, (int *)NULL
); 
 538         return PROC_RETURNED
; 
 545  *      Shutdown down proc system (release references to current and root 
 546  *      dirs for each process). 
 548  * POSIX modifications: 
 550  *      For POSIX fcntl() file locking call vno_lockrelease() on 
 551  *      the file to release all of its record locks, if any. 
 555 proc_shutdown(int only_non_dext
) 
 557         vfs_context_t ctx 
= vfs_context_current(); 
 558         struct proc 
*p
, *self
; 
 560         struct sd_filterargs sfargs
; 
 561         struct sd_iterargs sdargs
; 
 566          *      Kill as many procs as we can.  (Except ourself...) 
 568         self 
= (struct proc 
*)current_proc(); 
 571          * Signal the init with SIGTERM so that he does not launch 
 575         if (p 
&& p 
!= self
) { 
 580         printf("Killing all processes "); 
 584          * send SIGTERM to those procs interested in catching one 
 586         sfargs
.delayterm 
= delayterm
; 
 587         sfargs
.shutdownstate 
= 0; 
 588         sfargs
.only_non_dext 
= only_non_dext
; 
 589         sdargs
.signo 
= SIGTERM
; 
 590         sdargs
.setsdstate 
= 1; 
 591         sdargs
.countproc 
= 1; 
 592         sdargs
.activecount 
= 0; 
 595         /* post a SIGTERM to all that catch SIGTERM and not marked for delay */ 
 596         proc_rebootscan(sd_callback1
, (void *)&sdargs
, sd_filt1
, (void *)&sfargs
); 
 598         if (sdargs
.activecount 
!= 0 && proc_shutdown_exitcount 
!= 0) { 
 600                 if (proc_shutdown_exitcount 
!= 0) { 
 602                          * now wait for up to 3 seconds to allow those procs catching SIGTERM 
 604                          * as soon as these procs have exited, we'll continue on to the next step 
 608                         error 
= msleep(&proc_shutdown_exitcount
, &proc_list_mlock
, PWAIT
, "shutdownwait", &ts
); 
 610                                 for (p 
= allproc
.lh_first
; p
; p 
= p
->p_list
.le_next
) { 
 611                                         if ((p
->p_listflag 
& P_LIST_EXITCOUNT
) == P_LIST_EXITCOUNT
) { 
 612                                                 p
->p_listflag 
&= ~P_LIST_EXITCOUNT
; 
 615                                 for (p 
= zombproc
.lh_first
; p
; p 
= p
->p_list
.le_next
) { 
 616                                         if ((p
->p_listflag 
& P_LIST_EXITCOUNT
) == P_LIST_EXITCOUNT
) { 
 617                                                 p
->p_listflag 
&= ~P_LIST_EXITCOUNT
; 
 624         if (error 
== ETIMEDOUT
) { 
 626                  * log the names of the unresponsive tasks 
 631                 for (p 
= allproc
.lh_first
; p
; p 
= p
->p_list
.le_next
) { 
 632                         if (p
->p_shutdownstate 
== 1) { 
 633                                 printf("%s[%d]: didn't act on SIGTERM\n", p
->p_comm
, p
->p_pid
); 
 634                                 sd_log(ctx
, "%s[%d]: didn't act on SIGTERM\n", p
->p_comm
, p
->p_pid
); 
 642          * send a SIGKILL to all the procs still hanging around 
 644         sfargs
.delayterm 
= delayterm
; 
 645         sfargs
.shutdownstate 
= 2; 
 646         sdargs
.signo 
= SIGKILL
; 
 647         sdargs
.setsdstate 
= 2; 
 648         sdargs
.countproc 
= 1; 
 649         sdargs
.activecount 
= 0; 
 651         /* post a SIGKILL to all that catch SIGTERM and not marked for delay */ 
 652         proc_rebootscan(sd_callback2
, (void *)&sdargs
, sd_filt2
, (void *)&sfargs
); 
 656         if (sdargs
.activecount 
!= 0 && proc_shutdown_exitcount 
!= 0) { 
 658                 if (proc_shutdown_exitcount 
!= 0) { 
 660                          * wait for up to 60 seconds to allow these procs to exit normally 
 662                          * History:     The delay interval was changed from 100 to 200 
 663                          *              for NFS requests in particular. 
 667                         error 
= msleep(&proc_shutdown_exitcount
, &proc_list_mlock
, PWAIT
, "shutdownwait", &ts
); 
 669                                 for (p 
= allproc
.lh_first
; p
; p 
= p
->p_list
.le_next
) { 
 670                                         if ((p
->p_listflag 
& P_LIST_EXITCOUNT
) == P_LIST_EXITCOUNT
) { 
 671                                                 p
->p_listflag 
&= ~P_LIST_EXITCOUNT
; 
 674                                 for (p 
= zombproc
.lh_first
; p
; p 
= p
->p_list
.le_next
) { 
 675                                         if ((p
->p_listflag 
& P_LIST_EXITCOUNT
) == P_LIST_EXITCOUNT
) { 
 676                                                 p
->p_listflag 
&= ~P_LIST_EXITCOUNT
; 
 684         if (error 
== ETIMEDOUT
) { 
 686                  * log the names of the unresponsive tasks 
 691                 for (p 
= allproc
.lh_first
; p
; p 
= p
->p_list
.le_next
) { 
 692                         if (p
->p_shutdownstate 
== 2) { 
 693                                 printf("%s[%d]: didn't act on SIGKILL\n", p
->p_comm
, p
->p_pid
); 
 694                                 sd_log(ctx
, "%s[%d]: didn't act on SIGKILL\n", p
->p_comm
, p
->p_pid
); 
 702          * if we still have procs that haven't exited, then brute force 'em 
 704         sfargs
.delayterm 
= delayterm
; 
 705         sfargs
.shutdownstate 
= 3; 
 707         sdargs
.setsdstate 
= 3; 
 708         sdargs
.countproc 
= 0; 
 709         sdargs
.activecount 
= 0; 
 713         /* post a SIGTERM to all that catch SIGTERM and not marked for delay */ 
 714         proc_rebootscan(sd_callback3
, (void *)&sdargs
, sd_filt2
, (void *)&sfargs
); 
 717         /* Now start the termination of processes that are marked for delayed termn */ 
 718         if (delayterm 
== 0) { 
 730          * Now that all other processes have been terminated, suspend init 
 732         task_suspend_internal(initproc
->task
); 
 734         /* drop the ref on initproc */ 
 736         printf("continuing\n");