]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/kern/kern_event.c
xnu-2050.24.15.tar.gz
[apple/xnu.git] / bsd / kern / kern_event.c
index 92448a3f5b2e8b7bda0656221ed2b91ed74d9f71..0e2705e5acf621d0276f1e77b1426ce25de876f0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <libkern/libkern.h>
 #include "net/net_str_id.h"
 
+#include <mach/task.h>
+
+#if VM_PRESSURE_EVENTS
+#include <kern/vm_pressure.h>
+#endif
+
 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
 
 #define KQ_EVENT NULL
@@ -140,6 +146,8 @@ static void kevent_continue(struct kqueue *kq, void *data, int error);
 static void    kqueue_scan_continue(void *contp, wait_result_t wait_result);
 static int     kqueue_process(struct kqueue *kq, kevent_callback_t callback,
                               void *data, int *countp, struct proc *p);
+static int     kqueue_begin_processing(struct kqueue *kq);
+static void    kqueue_end_processing(struct kqueue *kq);
 static int     knote_process(struct knote *kn, kevent_callback_t callback,
                              void *data, struct kqtailq *inprocessp, struct proc *p);
 static void    knote_put(struct knote *kn);
@@ -183,6 +191,17 @@ static struct filterops proc_filtops = {
         .f_event = filt_proc,
 };
 
+#if VM_PRESSURE_EVENTS
+static int filt_vmattach(struct knote *kn);
+static void filt_vmdetach(struct knote *kn);
+static int filt_vm(struct knote *kn, long hint);
+static struct filterops vm_filtops = {
+       .f_attach = filt_vmattach,
+       .f_detach = filt_vmdetach,
+       .f_event = filt_vm,
+};
+#endif /* VM_PRESSURE_EVENTS */
+
 extern struct filterops fs_filtops;
 
 extern struct filterops sig_filtops;
@@ -238,11 +257,6 @@ static struct filterops user_filtops = {
         .f_touch = filt_usertouch,
 };
 
-#if CONFIG_AUDIT
-/* Audit session filter */
-extern struct filterops audit_session_filtops;
-#endif
-
 /*
  * Table for for all system-defined filters.
  */
@@ -261,11 +275,13 @@ static struct filterops *sysfilt_ops[] = {
        &machport_filtops,              /* EVFILT_MACHPORT */
        &fs_filtops,                    /* EVFILT_FS */
        &user_filtops,                  /* EVFILT_USER */
-#if CONFIG_AUDIT
-       &audit_session_filtops,         /* EVFILT_SESSION */
+       &bad_filtops,                   /* unused */
+#if VM_PRESSURE_EVENTS
+       &vm_filtops,                    /* EVFILT_VM */
 #else
-       &bad_filtops,
+       &bad_filtops,                   /* EVFILT_VM */
 #endif
+       &file_filtops,                  /* EVFILT_SOCK */
 };
 
 /*
@@ -466,6 +482,23 @@ filt_procattach(struct knote *kn)
                return (ESRCH);
        }
 
+       const int NoteExitStatusBits = NOTE_EXIT | NOTE_EXITSTATUS;
+
+       if ((kn->kn_sfflags & NoteExitStatusBits) == NoteExitStatusBits)
+               do {
+                       pid_t selfpid = proc_selfpid();
+
+                       if (p->p_ppid == selfpid)
+                               break;  /* parent => ok */
+
+                       if ((p->p_lflag & P_LTRACED) != 0 &&
+                           (p->p_oppid == selfpid))
+                               break;  /* parent-in-waiting => ok */
+
+                       proc_rele(p);
+                       return (EACCES);
+               } while (0);
+
        proc_klist_lock();
 
        kn->kn_flags |= EV_CLEAR;       /* automatically set */
@@ -515,6 +548,24 @@ filt_proc(struct knote *kn, long hint)
                 */
                event = (u_int)hint & NOTE_PCTRLMASK;
 
+               /*
+                * termination lifecycle events can happen while a debugger
+                * has reparented a process, in which case notifications
+                * should be quashed except to the tracing parent. When
+                * the debugger reaps the child (either via wait4(2) or
+                * process exit), the child will be reparented to the original
+                * parent and these knotes re-fired.
+                */
+               if (event & NOTE_EXIT) {
+                       if ((kn->kn_ptr.p_proc->p_oppid != 0)
+                               && (kn->kn_kq->kq_p->p_pid != kn->kn_ptr.p_proc->p_ppid)) {
+                               /*
+                                * This knote is not for the current ptrace(2) parent, ignore.
+                                */
+                               return 0;
+                       }
+               }                                       
+
                /*
                 * if the user is interested in this event, record it.
                 */
@@ -524,12 +575,67 @@ filt_proc(struct knote *kn, long hint)
                if (event == NOTE_REAP || (event == NOTE_EXIT && !(kn->kn_sfflags & NOTE_REAP))) {
                        kn->kn_flags |= (EV_EOF | EV_ONESHOT);
                }
+               if ((event == NOTE_EXIT) && ((kn->kn_sfflags & NOTE_EXITSTATUS) != 0)) {
+                       kn->kn_fflags |= NOTE_EXITSTATUS;
+                       kn->kn_data = (hint & NOTE_PDATAMASK);
+               }
+               if ((event == NOTE_RESOURCEEND) && ((kn->kn_sfflags & NOTE_RESOURCEEND) != 0)) {
+                       kn->kn_fflags |= NOTE_RESOURCEEND;
+                       kn->kn_data = (hint & NOTE_PDATAMASK);
+               }
+#if CONFIG_EMBEDDED
+               /* If the event is one of the APPSTATE events,remove the rest */
+               if (((event & NOTE_APPALLSTATES) != 0) && ((kn->kn_sfflags & NOTE_APPALLSTATES) != 0)) {
+                       /* only one state at a time */
+                       kn->kn_fflags &= ~NOTE_APPALLSTATES;
+                       kn->kn_fflags |= event;
+               }
+#endif /* CONFIG_EMBEDDED */
        }
 
        /* atomic check, no locking need when called from above */
        return (kn->kn_fflags != 0); 
 }
 
+#if VM_PRESSURE_EVENTS
+/*
+ * Virtual memory kevents
+ *
+ * author: Matt Jacobson [matthew_jacobson@apple.com]
+ */
+
+static int
+filt_vmattach(struct knote *kn)
+{      
+       /* 
+        * The note will be cleared once the information has been flushed to the client. 
+        * If there is still pressure, we will be re-alerted.
+        */
+       kn->kn_flags |= EV_CLEAR; 
+       
+       return vm_knote_register(kn);
+}
+
+static void
+filt_vmdetach(struct knote *kn)
+{
+       vm_knote_unregister(kn);
+}
+
+static int
+filt_vm(struct knote *kn, long hint)
+{
+       /* hint == 0 means this is just an alive? check (always true) */
+       if (hint != 0) { 
+               const pid_t pid = (pid_t)hint;
+               if ((kn->kn_sfflags & NOTE_VM_PRESSURE) && (kn->kn_kq->kq_p->p_pid == pid)) {
+                       kn->kn_fflags |= NOTE_VM_PRESSURE;
+               }
+       }
+       
+       return (kn->kn_fflags != 0);
+}
+#endif /* VM_PRESSURE_EVENTS */
 
 /*
  * filt_timervalidate - process data from user
@@ -872,7 +978,7 @@ filt_userattach(struct knote *kn)
 {
         /* EVFILT_USER knotes are not attached to anything in the kernel */
         kn->kn_hook = NULL;
-       if (kn->kn_fflags & NOTE_TRIGGER || kn->kn_flags & EV_TRIGGER) {
+       if (kn->kn_fflags & NOTE_TRIGGER) {
                kn->kn_hookid = 1;
        } else {
                kn->kn_hookid = 0;
@@ -895,10 +1001,10 @@ filt_user(struct knote *kn, __unused long hint)
 static void
 filt_usertouch(struct knote *kn, struct kevent64_s *kev, long type)
 {
-        int ffctrl;
+        uint32_t ffctrl;
         switch (type) {
         case EVENT_REGISTER:
-                if (kev->fflags & NOTE_TRIGGER || kev->flags & EV_TRIGGER) {
+                if (kev->fflags & NOTE_TRIGGER) {
                         kn->kn_hookid = 1;
                 }
 
@@ -1510,19 +1616,28 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc
 
                        error = fops->f_attach(kn);
 
-                       /*
-                        * Anyone trying to drop this knote will yield to
-                        * us, since KN_ATTACHING is set.
-                        */
                        kqlock(kq);
-                       if (error != 0 || (kn->kn_status & KN_DROPPING)) {
-                               if (error == 0) {
-                                       kn->kn_fop->f_detach(kn);
-                               }
+
+                       if (error != 0) {
+                               /*
+                                * Failed to attach correctly, so drop.
+                                * All other possible users/droppers
+                                * have deferred to us.
+                                */
                                kn->kn_status |= KN_DROPPING;
                                kqunlock(kq);
                                knote_drop(kn, p);
                                goto done;
+                       } else if (kn->kn_status & KN_DROPPING) {
+                               /*
+                                * Attach succeeded, but someone else
+                                * deferred their drop - now we have
+                                * to do it for them (after detaching).
+                                */
+                               kqunlock(kq);
+                               kn->kn_fop->f_detach(kn);
+                               knote_drop(kn, p);
+                               goto done;
                        }
                        kn->kn_status &= ~KN_ATTACHING;
                        kqunlock(kq);
@@ -1556,6 +1671,17 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc
                                knote_enqueue(kn);
                }
 
+               /*
+                * The user may change some filter values after the
+                * initial EV_ADD, but doing so will not reset any 
+                * filter which have already been triggered.
+                */
+               kn->kn_kevent.udata = kev->udata;
+               if (fops->f_isfd || fops->f_touch == NULL) {
+                       kn->kn_sfflags = kev->fflags;
+                       kn->kn_sdata = kev->data;
+               }
+
                /*
                 * If somebody is in the middle of dropping this
                 * knote - go find/insert a new one.  But we have
@@ -1570,22 +1696,11 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc
                }
 
                /*
-                * The user may change some filter values after the
-                * initial EV_ADD, but doing so will not reset any 
-                * filter which have already been triggered.
+                * Call touch routine to notify filter of changes
+                * in filter values.
                 */
-               kn->kn_kevent.udata = kev->udata;
                if (!fops->f_isfd && fops->f_touch != NULL)
                        fops->f_touch(kn, kev, EVENT_REGISTER);
-               else {
-                       kn->kn_sfflags = kev->fflags;
-                       kn->kn_sdata = kev->data;
-               }
-
-               /* We may need to push some info down to a networked filesystem */
-               if (kn->kn_filter == EVFILT_VNODE) {
-                       vnode_knoteupdate(kn);
-               }
        }
        /* still have use ref on knote */
 
@@ -1672,13 +1787,10 @@ knote_process(struct knote      *kn,
                                }
 
                                /* capture the kevent data - using touch if specified */
-                               if (result) {
-                                       if (touch) {
-                                               kn->kn_fop->f_touch(kn, &kev, EVENT_PROCESS);
-                                       } else {
-                                               kev = kn->kn_kevent;
-                                       }
+                               if (result && touch) {
+                                       kn->kn_fop->f_touch(kn, &kev, EVENT_PROCESS);
                                }
+
                                /* convert back to a kqlock - bail if the knote went away */
                                if (!knoteuse2kqlock(kq, kn)) {
                                        return EJUSTRETURN;
@@ -1687,6 +1799,12 @@ knote_process(struct knote       *kn,
                                        if (!(kn->kn_status & KN_ACTIVE)) {
                                                knote_activate(kn, 0);
                                        }
+
+                                       /* capture all events that occurred during filter */
+                                       if (!touch) {
+                                               kev = kn->kn_kevent;
+                                       }
+
                                } else if ((kn->kn_status & KN_STAYQUEUED) == 0) {
                                        /* was already dequeued, so just bail on this one */
                                        return EJUSTRETURN;
@@ -1716,21 +1834,26 @@ knote_process(struct knote      *kn,
 
        if (result == 0) {
                return EJUSTRETURN;
-       } else if (kn->kn_flags & EV_ONESHOT) {
+       } else if ((kn->kn_flags & EV_ONESHOT) != 0) {
                knote_deactivate(kn);
                if (kqlock2knotedrop(kq, kn)) {
                        kn->kn_fop->f_detach(kn);
                        knote_drop(kn, p);
                }
-       } else if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) {
-               knote_deactivate(kn);
-               /* manually clear knotes who weren't 'touch'ed */
-               if ((touch == 0) && (kn->kn_flags & EV_CLEAR)) {
+       } else if ((kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) != 0) {
+               if ((kn->kn_flags & EV_DISPATCH) != 0) {
+                       /* deactivate and disable all dispatch knotes */
+                       knote_deactivate(kn);
+                       kn->kn_status |= KN_DISABLED;
+               } else if (!touch || kn->kn_fflags == 0) {
+                       /* only deactivate if nothing since the touch */
+                       knote_deactivate(kn);
+               }
+               if (!touch && (kn->kn_flags & EV_CLEAR) != 0) {
+                       /* manually clear non-touch knotes */
                        kn->kn_data = 0;
                        kn->kn_fflags = 0;
                }
-               if (kn->kn_flags & EV_DISPATCH)
-                       kn->kn_status |= KN_DISABLED;
                kqunlock(kq);
        } else {
                /*
@@ -1749,6 +1872,47 @@ knote_process(struct knote       *kn,
        return error;
 }
 
+/*
+ * Return 0 to indicate that processing should proceed,
+ * -1 if there is nothing to process.
+ *
+ * Called with kqueue locked and returns the same way,
+ * but may drop lock temporarily.
+ */
+static int
+kqueue_begin_processing(struct kqueue *kq)
+{
+       for (;;) {
+               if (kq->kq_count == 0) {
+                       return -1;
+               }
+
+               /* if someone else is processing the queue, wait */
+               if (kq->kq_nprocess != 0) {
+                       wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_UNINT, 0);
+                       kq->kq_state |= KQ_PROCWAIT;
+                       kqunlock(kq);
+                       thread_block(THREAD_CONTINUE_NULL);
+                       kqlock(kq);
+               } else {
+                       kq->kq_nprocess = 1;
+                       return 0;
+               }
+       }
+}
+
+/*
+ * Called with kqueue lock held.
+ */
+static void
+kqueue_end_processing(struct kqueue *kq)
+{
+       kq->kq_nprocess = 0;
+       if (kq->kq_state & KQ_PROCWAIT) {
+               kq->kq_state &= ~KQ_PROCWAIT;
+               wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_AWAKENED);
+       }
+}
 
 /*
  * kqueue_process - process the triggered events in a kqueue
@@ -1778,23 +1942,13 @@ kqueue_process(struct kqueue *kq,
        int error;
 
         TAILQ_INIT(&inprocess);
- restart:
-       if (kq->kq_count == 0) {
+
+       if (kqueue_begin_processing(kq) == -1) {
                *countp = 0;
+               /* Nothing to process */
                return 0;
        }
 
-       /* if someone else is processing the queue, wait */
-       if (hw_atomic_add(&kq->kq_nprocess, 1) != 1) {
-               hw_atomic_sub(&kq->kq_nprocess, 1);
-               wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_UNINT, 0);
-               kq->kq_state |= KQ_PROCWAIT;
-               kqunlock(kq);
-               thread_block(THREAD_CONTINUE_NULL);
-               kqlock(kq);
-               goto restart;
-       }
-
        /*
         * Clear any pre-posted status from previous runs, so we only
         * detect events that occur during this run.
@@ -1829,11 +1983,8 @@ kqueue_process(struct kqueue *kq,
                kn->kn_tq = &kq->kq_head;
                TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
        }
-       hw_atomic_sub(&kq->kq_nprocess, 1);
-       if (kq->kq_state & KQ_PROCWAIT) {
-               kq->kq_state &= ~KQ_PROCWAIT;
-               wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_AWAKENED);
-       }
+
+       kqueue_end_processing(kq);
 
        *countp = nevents;
        return error;
@@ -2023,11 +2174,15 @@ static int
 kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx)
 {
        struct kqueue *kq = (struct kqueue *)fp->f_data;
-       int again;
-
+       struct knote *kn;
+       struct kqtailq inprocessq;
+       int retnum = 0;
+       
        if (which != FREAD)
                return 0;
 
+       TAILQ_INIT(&inprocessq);
+
        kqlock(kq);
        /* 
         * If this is the first pass, link the wait queue associated with the
@@ -2046,11 +2201,12 @@ kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t
                                        (wait_queue_link_t)wql);
        }
 
- retry:
-       again = 0;
-       if (kq->kq_count != 0) {
-               struct knote *kn;
+       if (kqueue_begin_processing(kq) == -1) {
+               kqunlock(kq);
+               return 0;
+       }
 
+       if (kq->kq_count != 0) {
                /*
                 * there is something queued - but it might be a
                 * KN_STAYQUEUED knote, which may or may not have
@@ -2058,31 +2214,42 @@ kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t
                 * list of knotes to see, and peek at the stay-
                 * queued ones to be really sure.
                 */
-               TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) {
-                       int retnum = 0;
-                       if ((kn->kn_status & KN_STAYQUEUED) == 0 ||
-                           (retnum = kn->kn_fop->f_peek(kn)) > 0) {
-                               kqunlock(kq);
-                               return 1;
+               while ((kn = (struct knote*)TAILQ_FIRST(&kq->kq_head)) != NULL) {
+                       if ((kn->kn_status & KN_STAYQUEUED) == 0) {
+                               retnum = 1;
+                               goto out;
                        }
-                       if (retnum < 0)
-                               again++;
+
+                       TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
+                       TAILQ_INSERT_TAIL(&inprocessq, kn, kn_tqe);
+
+                       if (kqlock2knoteuse(kq, kn)) {
+                               unsigned peek;
+
+                               peek = kn->kn_fop->f_peek(kn);
+                               if (knoteuse2kqlock(kq, kn)) {
+                                       if (peek > 0) {
+                                               retnum = 1;
+                                               goto out;
+                                       }
+                               } else {
+                                       retnum = 0;
+                               }
+                       } 
                }
        }
 
-       /*
-        * If we stumbled across a knote that couldn't be peeked at,
-        * we have to drop the kq lock and try again.
-        */
-       if (again > 0) {
-               kqunlock(kq);
-               mutex_pause(0);
-               kqlock(kq);
-               goto retry;
+out:
+       /* Return knotes to active queue */
+       while ((kn = TAILQ_FIRST(&inprocessq)) != NULL) {
+               TAILQ_REMOVE(&inprocessq, kn, kn_tqe);
+               kn->kn_tq = &kq->kq_head;
+               TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
        }
 
+       kqueue_end_processing(kq);
        kqunlock(kq);
-       return 0;
+       return retnum;
 }
 
 /*
@@ -2282,22 +2449,21 @@ knote_detach(struct klist *list, struct knote *kn)
  * we permanently enqueue them here.
  *
  * kqueue and knote references are held by caller.
+ *
+ * caller provides the wait queue link structure.
  */
 int
-knote_link_wait_queue(struct knote *kn, struct wait_queue *wq)
+knote_link_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t wql)
 {
        struct kqueue *kq = kn->kn_kq;
        kern_return_t kr;
 
-       kr = wait_queue_link(wq, kq->kq_wqs);
+       kr = wait_queue_link_noalloc(wq, kq->kq_wqs, wql);
        if (kr == KERN_SUCCESS) {
-               kqlock(kq);
-               kn->kn_status |= KN_STAYQUEUED;
-               knote_enqueue(kn);
-               kqunlock(kq);
+               knote_markstayqueued(kn);
                return 0;
        } else {
-               return ENOMEM;
+               return EINVAL;
        }
 }
 
@@ -2307,17 +2473,21 @@ knote_link_wait_queue(struct knote *kn, struct wait_queue *wq)
  *
  * Note that the unlink may have already happened from the other side, so
  * ignore any failures to unlink and just remove it from the kqueue list.
+ *
+ * On success, caller is responsible for the link structure
  */
-void
-knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq)
+int
+knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t *wqlp)
 {
        struct kqueue *kq = kn->kn_kq;
+       kern_return_t kr;
 
-       (void) wait_queue_unlink(wq, kq->kq_wqs);
+       kr = wait_queue_unlink_nofree(wq, kq->kq_wqs, wqlp);
        kqlock(kq);
        kn->kn_status &= ~KN_STAYQUEUED;
        knote_dequeue(kn);
        kqunlock(kq);
+       return (kr != KERN_SUCCESS) ? EINVAL : 0;
 }
 
 /*
@@ -2367,7 +2537,7 @@ knote_fdclose(struct proc *p, int fd)
 
 /* proc_fdlock held on entry (and exit) */
 static int
-knote_fdpattach(struct knote *kn, struct filedesc *fdp, __unused struct proc *p)
+knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p)
 {
        struct klist *list = NULL;
 
@@ -2380,10 +2550,18 @@ knote_fdpattach(struct knote *kn, struct filedesc *fdp, __unused struct proc *p)
                if ((u_int)fdp->fd_knlistsize <= kn->kn_id) {
                        u_int size = 0;
 
+                       if (kn->kn_id >= (uint64_t)p->p_rlimit[RLIMIT_NOFILE].rlim_cur 
+                           || kn->kn_id >= (uint64_t)maxfiles)
+                               return (EINVAL);
+               
                        /* have to grow the fd_knlist */
                        size = fdp->fd_knlistsize;
                        while (size <= kn->kn_id)
                                size += KQEXTENT;
+
+                       if (size >= (UINT_MAX/sizeof(struct klist *)))
+                               return (EINVAL);
+
                        MALLOC(list, struct klist *,
                               size * sizeof(struct klist *), M_KQUEUE, M_WAITOK);
                        if (list == NULL)
@@ -2510,6 +2688,11 @@ knote_init(void)
 
        /* Initialize the timer filter lock */
        lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr);
+       
+#if VM_PRESSURE_EVENTS
+       /* Initialize the vm pressure list lock */
+       vm_pressure_init(kq_lck_grp, kq_lck_attr);
+#endif
 }
 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
 
@@ -2822,3 +3005,12 @@ fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo)
        return(0);
 }
 
+
+void
+knote_markstayqueued(struct knote *kn)
+{
+       kqlock(kn->kn_kq);
+       kn->kn_status |= KN_STAYQUEUED;
+       knote_enqueue(kn);
+       kqunlock(kn->kn_kq);
+}