]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/kern/kern_shutdown.c
xnu-2422.110.17.tar.gz
[apple/xnu.git] / bsd / kern / kern_shutdown.c
index 592368bb45639cbe8f84498b1ab8e9543280624e..4e231826d10c3f69483503ff52206ed97c2332af 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/tty.h>
 #include <kern/task.h>
 #include <sys/quota.h>
-#include <ufs/ufs/inode.h>
-#if    NCPUS > 1
-#include <kern/processor.h>
-#include <kern/thread.h>
-#include <sys/lock.h>
-#endif /* NCPUS > 1 */
 #include <vm/vm_kern.h>
 #include <mach/vm_param.h>
 #include <sys/filedesc.h>
 #include <mach/host_priv.h>
 #include <mach/host_reboot.h>
 
-#include <bsm/audit_kernel.h>
+#include <security/audit/audit.h>
 
-int    waittime = -1;
-static void proc_shutdown();
+#include <kern/sched_prim.h>           /* for thread_block() */
+#include <kern/host.h>                 /* for host_priv_self() */
+#include <net/if_var.h>                        /* for if_down_all() */
+#include <sys/buf_internal.h>          /* for count_busy_buffers() */
+#include <sys/mount_internal.h>                /* for vfs_unmountall() */
+#include <mach/task.h>                 /* for task_suspend() */
+#include <sys/sysproto.h>              /* abused for sync() */
+#include <kern/clock.h>                        /* for delay_for_interval() */
+#include <libkern/OSAtomic.h>
 
-void
-boot(paniced, howto, command)
-       int paniced, howto;
-       char *command;
+#include <sys/kdebug.h>
+
+uint32_t system_inshutdown = 0;
+
+/* XXX should be in a header file somewhere, but isn't */
+extern void md_prepare_for_shutdown(int, int, char *);
+extern void (*unmountroot_pre_hook)(void);
+
+unsigned int proc_shutdown_exitcount = 0;
+
+static int  sd_openlog(vfs_context_t);
+static int  sd_closelog(vfs_context_t);
+static void sd_log(vfs_context_t, const char *, ...);
+static void proc_shutdown(void);
+
+extern void IOSystemShutdownNotification(void);
+
+struct sd_filterargs{
+       int delayterm;
+       int shutdownstate;
+};
+
+
+struct sd_iterargs {
+       int signo;              /* the signal to be posted */
+       int setsdstate;         /* shutdown state to be set */
+       int countproc;          /* count processes on action */
+       int activecount;        /* number of processes on which action was done */
+};
+
+static vnode_t sd_logvp = NULLVP;
+static off_t sd_log_offset = 0;
+
+
+static int sd_filt1(proc_t, void *);
+static int sd_filt2(proc_t, void *);
+static int  sd_callback1(proc_t p, void * arg);
+static int  sd_callback2(proc_t p, void * arg);
+static int  sd_callback3(proc_t p, void * arg);
+
+int
+boot(int paniced, int howto, char *command)
 {
-       register int i;
-       int s;
        struct proc *p = current_proc();        /* XXX */
        int hostboot_option=0;
-       int funnel_state;
-       struct proc  *launchd_proc;
-
-    extern void md_prepare_for_shutdown(int paniced, int howto, char * command);
 
-       funnel_state = thread_funnel_set(kernel_flock, TRUE);
+       if (!OSCompareAndSwap(0, 1, &system_inshutdown)) {
+               if ( (howto&RB_QUICK) == RB_QUICK)
+                       goto force_reboot;
+               return (EBUSY);
+       }
+       /*
+        * Temporary hack to notify the power management root domain
+        * that the system will shut down.
+        */
+       IOSystemShutdownNotification();
 
        md_prepare_for_shutdown(paniced, howto, command);
 
-       if ((howto&RB_NOSYNC)==0 && waittime < 0) {
+       if ((howto&RB_QUICK)==RB_QUICK) {
+               printf("Quick reboot...\n");
+               if ((howto&RB_NOSYNC)==0) {
+                       sync(p, (void *)NULL, (int *)NULL);
+               }
+       }
+       else if ((howto&RB_NOSYNC)==0) {
                int iter, nbusy;
 
-               waittime = 0;
-               
                printf("syncing disks... ");
 
                /*
@@ -100,19 +146,25 @@ boot(paniced, howto, command)
                /* handle live procs (deallocate their root and current directories). */                
                proc_shutdown();
 
+#if CONFIG_AUDIT
                audit_shutdown();
+#endif
+
+               if (unmountroot_pre_hook != NULL)
+                       unmountroot_pre_hook();
 
                sync(p, (void *)NULL, (int *)NULL);
 
                /*
-                * Now that all processes have been  termianted and system is sync'ed up, 
-                * suspend launchd
+                * Now that all processes have been terminated and system is
+                * sync'ed up, suspend init
                 */
+                       
+               if (initproc && p != initproc)
+                       task_suspend(initproc->task);
 
-               launchd_proc = pfind(1);
-               if (launchd_proc && p != launchd_proc) {
-                       task_suspend(launchd_proc->task);
-               }
+               if (kdebug_enable)
+                       kdbg_dump_trace_to_file("/var/log/shutdown/shutdown.trace");
 
                /*
                 * Unmount filesystems
@@ -125,21 +177,23 @@ boot(paniced, howto, command)
                        if (nbusy == 0)
                                break;
                        printf("%d ", nbusy);
-                       IOSleep( 1 * nbusy );
+                       delay_for_interval( 1 * nbusy, 1000 * 1000);
                }
                if (nbusy)
                        printf("giving up\n");
                else
                        printf("done\n");
        }
-
+#if NETWORKING
        /*
         * Can't just use an splnet() here to disable the network
         * because that will lock out softints which the disk
         * drivers depend on to finish DMAs.
         */
        if_down_all();
+#endif /* NETWORKING */
 
+force_reboot:
        if (howto & RB_POWERDOWN)
                hostboot_option = HOST_REBOOT_HALT;
        if (howto & RB_HALT)
@@ -147,28 +201,212 @@ boot(paniced, howto, command)
        if (paniced == RB_PANIC)
                hostboot_option = HOST_REBOOT_HALT;
 
-    if (howto & RB_UPSDELAY) {
-        hostboot_option = HOST_REBOOT_UPSDELAY;
-    }
+       if (howto & RB_UPSDELAY) {
+               hostboot_option = HOST_REBOOT_UPSDELAY;
+       }
 
+       host_reboot(host_priv_self(), hostboot_option);
        /*
-        * if we're going to power down due to a halt,
-        * give the disks a chance to finish getting
-        * the track cache flushed to the media... 
-        * unfortunately, some of our earlier drives
-        * don't properly hold off on returning 
-        * from the track flush command (issued by
-        * the unmounts) until it's actully fully
-        * committed.
+        * should not be reached
         */
-       if (hostboot_option == HOST_REBOOT_HALT)
-               IOSleep( 1 * 1000 );
+       return (0);
+}
 
-       host_reboot(host_priv_self(), hostboot_option);
+static int
+sd_openlog(vfs_context_t ctx)
+{
+       int error = 0;
+       struct timeval tv;
+       
+       /* Open shutdown log */
+       if ((error = vnode_open(PROC_SHUTDOWN_LOG, (O_CREAT | FWRITE | O_NOFOLLOW), 0644, 0, &sd_logvp, ctx))) {
+               printf("Failed to open %s: error %d\n", PROC_SHUTDOWN_LOG, error);
+               sd_logvp = NULLVP;
+               return error;
+       }
+
+       vnode_setsize(sd_logvp, (off_t)0, 0, ctx);
+
+       /* Write a little header */
+       microtime(&tv);
+       sd_log(ctx, "Process shutdown log.  Current time is %lu (in seconds).\n\n", tv.tv_sec);
+
+       return 0;
+}
+
+static int
+sd_closelog(vfs_context_t ctx)
+{
+       int error = 0;
+       if (sd_logvp != NULLVP) {
+               VNOP_FSYNC(sd_logvp, MNT_WAIT, ctx);
+               error = vnode_close(sd_logvp, FWRITE, ctx);
+       }
+
+       return error;
+}
+
+static void
+sd_log(vfs_context_t ctx, const char *fmt, ...) 
+{
+       int resid, log_error, len;
+       char logbuf[100];
+       va_list arglist;
+
+       /* If the log isn't open yet, open it */
+       if (sd_logvp == NULLVP) {
+               if (sd_openlog(ctx) != 0) {
+                       /* Couldn't open, we fail out */
+                       return;
+               }
+       }
+
+       va_start(arglist, fmt);
+       len = vsnprintf(logbuf, sizeof(logbuf), fmt, arglist);
+       log_error = vn_rdwr(UIO_WRITE, sd_logvp, (caddr_t)logbuf, len, sd_log_offset,
+                       UIO_SYSSPACE, IO_UNIT | IO_NOAUTH, vfs_context_ucred(ctx), &resid, vfs_context_proc(ctx));
+       if (log_error == EIO || log_error == 0) {
+               sd_log_offset += (len - resid);
+       }
+
+       va_end(arglist);
+
+}
+
+static int
+sd_filt1(proc_t p, void * args)
+{
+       proc_t self = current_proc();
+       struct sd_filterargs * sf = (struct sd_filterargs *)args;
+       int delayterm = sf-> delayterm;
+       int shutdownstate = sf->shutdownstate;
+
+       if (((p->p_flag&P_SYSTEM) != 0) || (p->p_ppid == 0) 
+               ||(p == self) || (p->p_stat == SZOMB) 
+               || (p->p_shutdownstate != shutdownstate) 
+               ||((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM))
+               || ((p->p_sigcatch & sigmask(SIGTERM))== 0)) {
+                       return(0);
+               }
+        else 
+                return(1);
+}
+
+
+static int  
+sd_callback1(proc_t p, void * args)
+{
+       struct sd_iterargs * sd = (struct sd_iterargs *)args;
+       int signo = sd->signo;
+       int setsdstate = sd->setsdstate;
+       int countproc = sd->countproc;
+
+       proc_lock(p);
+       p->p_shutdownstate = setsdstate;
+       if (p->p_stat != SZOMB) {
+               proc_unlock(p);
+               if (countproc != 0) {
+                       proc_list_lock();
+                       p->p_listflag |= P_LIST_EXITCOUNT;
+                       proc_shutdown_exitcount++;
+                       proc_list_unlock();
+               }
+
+               psignal(p, signo);
+               if (countproc !=  0)
+                       sd->activecount++;
+       } else
+               proc_unlock(p);
+       return(PROC_RETURNED);
+}
+
+static int
+sd_filt2(proc_t p, void * args)
+{
+       proc_t self = current_proc();
+       struct sd_filterargs * sf = (struct sd_filterargs *)args;
+       int delayterm = sf-> delayterm;
+       int shutdownstate = sf->shutdownstate;
+
+       if (((p->p_flag&P_SYSTEM) != 0) || (p->p_ppid == 0) 
+               ||(p == self) || (p->p_stat == SZOMB) 
+               || (p->p_shutdownstate == shutdownstate) 
+               ||((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM))) {
+                       return(0);
+               }
+        else
+                return(1);
+}
+
+static int  
+sd_callback2(proc_t p, void * args)
+{
+       struct sd_iterargs * sd = (struct sd_iterargs *)args;
+       int signo = sd->signo;
+       int setsdstate = sd->setsdstate;
+       int countproc = sd->countproc;
+
+       proc_lock(p);
+       p->p_shutdownstate = setsdstate;
+       if (p->p_stat != SZOMB) {
+               proc_unlock(p);
+               if (countproc !=  0) {
+                       proc_list_lock();
+                       p->p_listflag |= P_LIST_EXITCOUNT;
+                       proc_shutdown_exitcount++;
+                       proc_list_unlock();
+               }
+               psignal(p, signo);
+               if (countproc !=  0)
+                       sd->activecount++;
+       } else
+               proc_unlock(p);
+
+       return(PROC_RETURNED);
 
-       thread_funnel_set(kernel_flock, FALSE);
 }
 
+static int  
+sd_callback3(proc_t p, void * args)
+{
+       struct sd_iterargs * sd = (struct sd_iterargs *)args;
+       vfs_context_t ctx = vfs_context_current();
+
+       int setsdstate = sd->setsdstate;
+
+       proc_lock(p);
+       p->p_shutdownstate = setsdstate;
+       if (p->p_stat != SZOMB) {
+              /*
+               * NOTE: following code ignores sig_lock and plays
+               * with exit_thread correctly.  This is OK unless we
+               * are a multiprocessor, in which case I do not
+               * understand the sig_lock.  This needs to be fixed.
+               * XXX
+               */
+               if (p->exit_thread) {   /* someone already doing it */
+                       proc_unlock(p);
+                       /* give him a chance */
+                       thread_block(THREAD_CONTINUE_NULL);
+               } else {
+                       p->exit_thread = current_thread();
+                       printf(".");
+
+                       sd_log(ctx, "%s[%d] had to be forced closed with exit1().\n", p->p_comm, p->p_pid);
+
+                       proc_unlock(p);
+                       KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE,
+                                             p->p_pid, 0, 1, 0, 0);
+                       sd->activecount++;
+                       exit1(p, 1, (int *)NULL);
+               }
+       } else
+               proc_unlock(p);
+
+       return(PROC_RETURNED);
+}
+
+
 /*
  * proc_shutdown()
  *
@@ -182,12 +420,15 @@ boot(paniced, howto, command)
  */
 
 static void
-proc_shutdown()
+proc_shutdown(void)
 {
-       struct proc     *p, *self;
-       struct vnode    **cdirp, **rdirp, *vp;
-       int             restart, i, TERM_catch;
+       vfs_context_t ctx = vfs_context_current();
+       struct proc *p, *self;
        int delayterm = 0;
+       struct sd_filterargs sfargs;
+       struct sd_iterargs sdargs;
+       int error = 0;
+       struct timespec ts;
 
        /*
         *      Kill as many procs as we can.  (Except ourself...)
@@ -198,140 +439,137 @@ proc_shutdown()
         * Signal the init with SIGTERM so that he does not launch
         * new processes 
         */
-       p = pfind(1);
+       p = proc_find(1);
        if (p && p != self) {
                psignal(p, SIGTERM);
        }
+       proc_rele(p);
 
        printf("Killing all processes ");
 
-       /*
-        * send SIGTERM to those procs interested in catching one
-        */
 sigterm_loop:
-       for (p = allproc.lh_first; p; p = p->p_list.le_next) {
-               if (((p->p_flag&P_SYSTEM) == 0) && (p->p_pptr->p_pid != 0) && (p != self) && (p->p_stat != SZOMB) && (p->p_shutdownstate == 0)) {
-
-                       if ((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM)) {
-                               continue;
-                       }
-                       if (p->p_sigcatch & sigmask(SIGTERM)) {
-                                       p->p_shutdownstate = 1;
-                                       if (proc_refinternal(p, 1) == p) {
-                                       psignal(p, SIGTERM);
-                                               proc_dropinternal(p, 1);
-                                       }
-                               goto sigterm_loop;
-               }
-       }
-       }
        /*
-        * now wait for up to 30 seconds to allow those procs catching SIGTERM
-        * to digest it
-        * as soon as these procs have exited, we'll continue on to the next step
+        * send SIGTERM to those procs interested in catching one
         */
-       for (i = 0; i < 300; i++) {
-               /*
-                * sleep for a tenth of a second
-                * and then check to see if the tasks that were sent a
-                * SIGTERM have exited
-                */
-               IOSleep(100);   
-               TERM_catch = 0;
-
-               for (p = allproc.lh_first; p; p = p->p_list.le_next) {
-                       if (p->p_shutdownstate == 1) {
-                               TERM_catch++;
+       sfargs.delayterm = delayterm;
+       sfargs.shutdownstate = 0;
+       sdargs.signo = SIGTERM;
+       sdargs.setsdstate = 1;
+       sdargs.countproc = 1;
+       sdargs.activecount = 0;
+
+       error = 0;
+       /* post a SIGTERM to all that catch SIGTERM and not marked for delay */
+       proc_rebootscan(sd_callback1, (void *)&sdargs, sd_filt1, (void *)&sfargs);
+
+       if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) {
+               proc_list_lock();
+               if (proc_shutdown_exitcount != 0) {
+                       /*
+                       * now wait for up to 30 seconds to allow those procs catching SIGTERM
+                       * to digest it
+                       * as soon as these procs have exited, we'll continue on to the next step
+                       */
+                       ts.tv_sec = 30;
+                       ts.tv_nsec = 0;
+                       error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
+                       if (error != 0) {
+                               for (p = allproc.lh_first; p; p = p->p_list.le_next) {
+                                       if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
+                                               p->p_listflag &= ~P_LIST_EXITCOUNT;
+                               }
+                               for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
+                                       if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
+                                               p->p_listflag &= ~P_LIST_EXITCOUNT;
+                               }
                        }
+                       
                }
-               if (TERM_catch == 0)
-                       break;
+               proc_list_unlock();
        }
-       if (TERM_catch) {
+       if (error == ETIMEDOUT) {
                /*
                 * log the names of the unresponsive tasks
                 */
 
-               for (p = allproc.lh_first; p; p = p->p_list.le_next) {
+
+               proc_list_lock();
+
+               for (p = allproc.lh_first; p; p = p->p_list.le_next) {
                        if (p->p_shutdownstate == 1) {
-                                 printf("%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
+                               printf("%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
+                               sd_log(ctx, "%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
                        }
                }
-               IOSleep(1000 * 5);
+
+               proc_list_unlock();
+
+               delay_for_interval(1000 * 5, 1000 * 1000);
        }
 
        /*
         * send a SIGKILL to all the procs still hanging around
         */
-sigkill_loop:
-       for (p = allproc.lh_first; p; p = p->p_list.le_next) {
-               if (((p->p_flag&P_SYSTEM) == 0) && (p->p_pptr->p_pid != 0) && (p != self) && (p->p_stat != SZOMB) && (p->p_shutdownstate != 2)) {
-
-                       if ((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM)) {
-                               continue;
-                       }
-                       if (proc_refinternal(p, 1) == p) {
-                               psignal(p, SIGKILL);
-                               proc_dropinternal(p, 1);
+       sfargs.delayterm = delayterm;
+       sfargs.shutdownstate = 2;
+       sdargs.signo = SIGKILL;
+       sdargs.setsdstate = 2;
+       sdargs.countproc = 1;
+       sdargs.activecount = 0;
+
+       /* post a SIGKILL to all that catch SIGTERM and not marked for delay */
+       proc_rebootscan(sd_callback2, (void *)&sdargs, sd_filt2, (void *)&sfargs);
+
+       if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) {
+               proc_list_lock();
+               if (proc_shutdown_exitcount != 0) {
+                       /*
+                       * wait for up to 60 seconds to allow these procs to exit normally
+                       *
+                       * History:      The delay interval was changed from 100 to 200
+                       *               for NFS requests in particular.
+                       */
+                       ts.tv_sec = 60;
+                       ts.tv_nsec = 0;
+                       error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
+                       if (error != 0) {
+                               for (p = allproc.lh_first; p; p = p->p_list.le_next) {
+                                       if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
+                                               p->p_listflag &= ~P_LIST_EXITCOUNT;
+                               }
+                               for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
+                                       if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
+                                               p->p_listflag &= ~P_LIST_EXITCOUNT;
+                               }
                        }
-                       p->p_shutdownstate = 2;
-                       goto sigkill_loop;
                }
-       }
-       /*
-        * wait for up to 60 seconds to allow these procs to exit normally
-        */
-       for (i = 0; i < 300; i++) {
-               IOSleep(200);  /* double the time from 100 to 200 for NFS requests in particular */
-
-               for (p = allproc.lh_first; p; p = p->p_list.le_next) {
-                               if (p->p_shutdownstate == 2)
-                               break;
-               }
-               if (!p)
-                       break;
+               proc_list_unlock();
        }
 
        /*
         * if we still have procs that haven't exited, then brute force 'em
         */
-       p = allproc.lh_first;
-       while (p) {
-               if ((p->p_shutdownstate == 3) || (p->p_flag&P_SYSTEM) || (!delayterm && ((p->p_lflag& P_LDELAYTERM))) 
-                               || (p->p_pptr->p_pid == 0) || (p == self)) {
-                       p = p->p_list.le_next;
-               }
-               else {
-                       p->p_shutdownstate = 3;
-                       /*
-                        * NOTE: following code ignores sig_lock and plays
-                        * with exit_thread correctly.  This is OK unless we
-                        * are a multiprocessor, in which case I do not
-                        * understand the sig_lock.  This needs to be fixed.
-                        * XXX
-                        */
-                       if (p->exit_thread) {   /* someone already doing it */
-                               /* give him a chance */
-                               thread_block(THREAD_CONTINUE_NULL);
-                       } else {
-                               p->exit_thread = current_thread();
-                               printf(".");
-                               if (proc_refinternal(p, 1) == p) {
-                                       exit1(p, 1, (int *)NULL);
-                                       proc_dropinternal(p, 1);
-                               }
-                       }
-                       p = allproc.lh_first;
-               }
-       }
+       sfargs.delayterm = delayterm;
+       sfargs.shutdownstate = 3;
+       sdargs.signo = 0;
+       sdargs.setsdstate = 3;
+       sdargs.countproc = 0;
+       sdargs.activecount = 0;
+
+       /* post a SIGTERM to all that catch SIGTERM and not marked for delay */
+       proc_rebootscan(sd_callback3, (void *)&sdargs, sd_filt2, (void *)&sfargs);
        printf("\n");
 
-
        /* Now start the termination of processes that are marked for delayed termn */
        if (delayterm == 0) {
                delayterm = 1;
                goto  sigterm_loop;
        }
+
+       sd_closelog(ctx);
+
+       /* drop the ref on initproc */
+       proc_rele(initproc);
        printf("continuing\n");
 }