X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/b7266188b87f3620ec3f9f717e57194a7dd989fe..db6096698656d32db7df630594bd9617ee54f828:/bsd/kern/kern_event.c

diff --git a/bsd/kern/kern_event.c b/bsd/kern/kern_event.c
index 5d195dcf0..ba269074a 100644
--- a/bsd/kern/kern_event.c
+++ b/bsd/kern/kern_event.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -92,6 +92,12 @@
 #include <libkern/libkern.h>
 #include "net/net_str_id.h"
 
+#include <mach/task.h>
+
+#if VM_PRESSURE_EVENTS
+#include <kern/vm_pressure.h>
+#endif
+
 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
 
 #define KQ_EVENT NULL
@@ -140,6 +146,8 @@ static void	kevent_continue(struct kqueue *kq, void *data, int error);
 static void	kqueue_scan_continue(void *contp, wait_result_t wait_result);
 static int	kqueue_process(struct kqueue *kq, kevent_callback_t callback,
 			       void *data, int *countp, struct proc *p);
+static int	kqueue_begin_processing(struct kqueue *kq);
+static void	kqueue_end_processing(struct kqueue *kq);
 static int	knote_process(struct knote *kn, kevent_callback_t callback,
 			      void *data, struct kqtailq *inprocessp, struct proc *p);
 static void	knote_put(struct knote *kn);
@@ -183,6 +191,17 @@ static struct filterops proc_filtops = {
         .f_event = filt_proc,
 };
 
+#if VM_PRESSURE_EVENTS
+static int filt_vmattach(struct knote *kn);
+static void filt_vmdetach(struct knote *kn);
+static int filt_vm(struct knote *kn, long hint);
+static struct filterops vm_filtops = {
+	.f_attach = filt_vmattach,
+	.f_detach = filt_vmdetach,
+	.f_event = filt_vm,
+};
+#endif /* VM_PRESSURE_EVENTS */
+
 extern struct filterops fs_filtops;
 
 extern struct filterops sig_filtops;
@@ -238,11 +257,6 @@ static struct filterops user_filtops = {
         .f_touch = filt_usertouch,
 };
 
-#if CONFIG_AUDIT
-/* Audit session filter */
-extern struct filterops audit_session_filtops;
-#endif
-
 /*
  * Table for for all system-defined filters.
  */
@@ -261,11 +275,13 @@ static struct filterops *sysfilt_ops[] = {
 	&machport_filtops,		/* EVFILT_MACHPORT */
 	&fs_filtops,			/* EVFILT_FS */
 	&user_filtops,			/* EVFILT_USER */
-#if CONFIG_AUDIT
-	&audit_session_filtops,		/* EVFILT_SESSION */
+	&bad_filtops,			/* unused */
+#if VM_PRESSURE_EVENTS
+	&vm_filtops,			/* EVFILT_VM */
 #else
-	&bad_filtops,
+	&bad_filtops,			/* EVFILT_VM */
 #endif
+	&file_filtops,			/* EVFILT_SOCK */
 };
 
 /*
@@ -466,6 +482,23 @@ filt_procattach(struct knote *kn)
 		return (ESRCH);
 	}
 
+	const int NoteExitStatusBits = NOTE_EXIT | NOTE_EXITSTATUS;
+
+	if ((kn->kn_sfflags & NoteExitStatusBits) == NoteExitStatusBits)
+		do {
+			pid_t selfpid = proc_selfpid();
+
+			if (p->p_ppid == selfpid)
+				break;	/* parent => ok */
+
+			if ((p->p_lflag & P_LTRACED) != 0 &&
+			    (p->p_oppid == selfpid))
+				break;	/* parent-in-waiting => ok */
+
+			proc_rele(p);
+			return (EACCES);
+		} while (0);
+
 	proc_klist_lock();
 
 	kn->kn_flags |= EV_CLEAR;	/* automatically set */
@@ -524,12 +557,67 @@ filt_proc(struct knote *kn, long hint)
 		if (event == NOTE_REAP || (event == NOTE_EXIT && !(kn->kn_sfflags & NOTE_REAP))) {
 			kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 		}
+		if ((event == NOTE_EXIT) && ((kn->kn_sfflags & NOTE_EXITSTATUS) != 0)) {
+			kn->kn_fflags |= NOTE_EXITSTATUS;
+			kn->kn_data = (hint & NOTE_PDATAMASK);
+		}
+		if ((event == NOTE_RESOURCEEND) && ((kn->kn_sfflags & NOTE_RESOURCEEND) != 0)) {
+			kn->kn_fflags |= NOTE_RESOURCEEND;
+			kn->kn_data = (hint & NOTE_PDATAMASK);
+		}
+#if CONFIG_EMBEDDED
+		/* If the event is one of the APPSTATE events,remove the rest */
+		if (((event & NOTE_APPALLSTATES) != 0) && ((kn->kn_sfflags & NOTE_APPALLSTATES) != 0)) {
+			/* only one state at a time */
+			kn->kn_fflags &= ~NOTE_APPALLSTATES;
+			kn->kn_fflags |= event;
+		}
+#endif /* CONFIG_EMBEDDED */
 	}
 
 	/* atomic check, no locking need when called from above */
 	return (kn->kn_fflags != 0); 
 }
 
+#if VM_PRESSURE_EVENTS
+/*
+ * Virtual memory kevents
+ *
+ * author: Matt Jacobson [matthew_jacobson@apple.com]
+ */
+
+static int
+filt_vmattach(struct knote *kn)
+{	
+	/* 
+	 * The note will be cleared once the information has been flushed to the client. 
+	 * If there is still pressure, we will be re-alerted.
+	 */
+	kn->kn_flags |= EV_CLEAR; 
+	
+	return vm_knote_register(kn);
+}
+
+static void
+filt_vmdetach(struct knote *kn)
+{
+	vm_knote_unregister(kn);
+}
+
+static int
+filt_vm(struct knote *kn, long hint)
+{
+	/* hint == 0 means this is just an alive? check (always true) */
+	if (hint != 0) { 
+		const pid_t pid = (pid_t)hint;
+		if ((kn->kn_sfflags & NOTE_VM_PRESSURE) && (kn->kn_kq->kq_p->p_pid == pid)) {
+			kn->kn_fflags |= NOTE_VM_PRESSURE;
+		}
+	}
+	
+	return (kn->kn_fflags != 0);
+}
+#endif /* VM_PRESSURE_EVENTS */
 
 /*
  * filt_timervalidate - process data from user
@@ -872,7 +960,7 @@ filt_userattach(struct knote *kn)
 {
         /* EVFILT_USER knotes are not attached to anything in the kernel */
         kn->kn_hook = NULL;
-	if (kn->kn_fflags & NOTE_TRIGGER || kn->kn_flags & EV_TRIGGER) {
+	if (kn->kn_fflags & NOTE_TRIGGER) {
 		kn->kn_hookid = 1;
 	} else {
 		kn->kn_hookid = 0;
@@ -895,10 +983,10 @@ filt_user(struct knote *kn, __unused long hint)
 static void
 filt_usertouch(struct knote *kn, struct kevent64_s *kev, long type)
 {
-        int ffctrl;
+        uint32_t ffctrl;
         switch (type) {
         case EVENT_REGISTER:
-                if (kev->fflags & NOTE_TRIGGER || kev->flags & EV_TRIGGER) {
+                if (kev->fflags & NOTE_TRIGGER) {
                         kn->kn_hookid = 1;
                 }
 
@@ -1511,6 +1599,7 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc
 			error = fops->f_attach(kn);
 
 			kqlock(kq);
+
 			if (error != 0) {
 				/*
 				 * Failed to attach correctly, so drop.
@@ -1594,11 +1683,6 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc
 		 */
 		if (!fops->f_isfd && fops->f_touch != NULL)
 		        fops->f_touch(kn, kev, EVENT_REGISTER);
-
-		/* We may need to push some info down to a networked filesystem */
-		if (kn->kn_filter == EVFILT_VNODE) {
-			vnode_knoteupdate(kn);
-		}
 	}
 	/* still have use ref on knote */
 
@@ -1770,6 +1854,47 @@ knote_process(struct knote 	*kn,
 	return error;
 }
 
+/*
+ * Return 0 to indicate that processing should proceed,
+ * -1 if there is nothing to process.
+ *
+ * Called with kqueue locked and returns the same way,
+ * but may drop lock temporarily.
+ */
+static int
+kqueue_begin_processing(struct kqueue *kq)
+{
+	for (;;) {
+		if (kq->kq_count == 0) {
+			return -1;
+		}
+
+		/* if someone else is processing the queue, wait */
+		if (kq->kq_nprocess != 0) {
+			wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_UNINT, 0);
+			kq->kq_state |= KQ_PROCWAIT;
+			kqunlock(kq);
+			thread_block(THREAD_CONTINUE_NULL);
+			kqlock(kq);
+		} else {
+			kq->kq_nprocess = 1;
+			return 0;
+		}
+	}
+}
+
+/*
+ * Called with kqueue lock held.
+ */
+static void
+kqueue_end_processing(struct kqueue *kq)
+{
+	kq->kq_nprocess = 0;
+	if (kq->kq_state & KQ_PROCWAIT) {
+		kq->kq_state &= ~KQ_PROCWAIT;
+		wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_AWAKENED);
+	}
+}
 
 /*
  * kqueue_process - process the triggered events in a kqueue
@@ -1799,23 +1924,13 @@ kqueue_process(struct kqueue *kq,
 	int error;
 
         TAILQ_INIT(&inprocess);
- restart:
-	if (kq->kq_count == 0) {
+
+	if (kqueue_begin_processing(kq) == -1) {
 		*countp = 0;
+		/* Nothing to process */
 		return 0;
 	}
 
-	/* if someone else is processing the queue, wait */
-	if (hw_atomic_add(&kq->kq_nprocess, 1) != 1) {
-	        hw_atomic_sub(&kq->kq_nprocess, 1);
-		wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_UNINT, 0);
-		kq->kq_state |= KQ_PROCWAIT;
-		kqunlock(kq);
-		thread_block(THREAD_CONTINUE_NULL);
-		kqlock(kq);
-		goto restart;
-	}
-
 	/*
 	 * Clear any pre-posted status from previous runs, so we only
 	 * detect events that occur during this run.
@@ -1850,11 +1965,8 @@ kqueue_process(struct kqueue *kq,
 		kn->kn_tq = &kq->kq_head;
 		TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
 	}
-	hw_atomic_sub(&kq->kq_nprocess, 1);
-	if (kq->kq_state & KQ_PROCWAIT) {
-		kq->kq_state &= ~KQ_PROCWAIT;
-		wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_AWAKENED);
-	}
+
+	kqueue_end_processing(kq);
 
 	*countp = nevents;
 	return error;
@@ -2044,11 +2156,15 @@ static int
 kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx)
 {
 	struct kqueue *kq = (struct kqueue *)fp->f_data;
-	int again;
-
+	struct knote *kn;
+	struct kqtailq inprocessq;
+	int retnum = 0;
+	
 	if (which != FREAD)
 		return 0;
 
+	TAILQ_INIT(&inprocessq);
+
 	kqlock(kq);
 	/* 
 	 * If this is the first pass, link the wait queue associated with the
@@ -2067,11 +2183,12 @@ kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t
 					(wait_queue_link_t)wql);
 	}
 
- retry:
-	again = 0;
-	if (kq->kq_count != 0) {
-		struct knote *kn;
+	if (kqueue_begin_processing(kq) == -1) {
+		kqunlock(kq);
+		return 0;
+	}
 
+	if (kq->kq_count != 0) {
 		/*
 		 * there is something queued - but it might be a
 		 * KN_STAYQUEUED knote, which may or may not have
@@ -2079,31 +2196,42 @@ kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t
 		 * list of knotes to see, and peek at the stay-
 		 * queued ones to be really sure.
 		 */
-		TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) {
-			int retnum = 0;
-			if ((kn->kn_status & KN_STAYQUEUED) == 0 ||
-			    (retnum = kn->kn_fop->f_peek(kn)) > 0) {
-				kqunlock(kq);
-				return 1;
+		while ((kn = (struct knote*)TAILQ_FIRST(&kq->kq_head)) != NULL) {
+			if ((kn->kn_status & KN_STAYQUEUED) == 0) {
+				retnum = 1;
+				goto out;
 			}
-			if (retnum < 0)
-				again++;
+
+			TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
+			TAILQ_INSERT_TAIL(&inprocessq, kn, kn_tqe);
+
+			if (kqlock2knoteuse(kq, kn)) {
+				unsigned peek;
+
+				peek = kn->kn_fop->f_peek(kn);
+				if (knoteuse2kqlock(kq, kn)) {
+					if (peek > 0) {
+						retnum = 1;
+						goto out;
+					}
+				} else {
+					retnum = 0;
+				}
+			} 
 		}
 	}
 
-	/*
-	 * If we stumbled across a knote that couldn't be peeked at,
-	 * we have to drop the kq lock and try again.
-	 */
-	if (again > 0) {
-		kqunlock(kq);
-		mutex_pause(0);
-		kqlock(kq);
-		goto retry;
+out:
+	/* Return knotes to active queue */
+	while ((kn = TAILQ_FIRST(&inprocessq)) != NULL) {
+		TAILQ_REMOVE(&inprocessq, kn, kn_tqe);
+		kn->kn_tq = &kq->kq_head;
+		TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
 	}
 
+	kqueue_end_processing(kq);
 	kqunlock(kq);
-	return 0;
+	return retnum;
 }
 
 /*
@@ -2303,22 +2431,21 @@ knote_detach(struct klist *list, struct knote *kn)
  * we permanently enqueue them here.
  *
  * kqueue and knote references are held by caller.
+ *
+ * caller provides the wait queue link structure.
  */
 int
-knote_link_wait_queue(struct knote *kn, struct wait_queue *wq)
+knote_link_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t wql)
 {
 	struct kqueue *kq = kn->kn_kq;
 	kern_return_t kr;
 
-	kr = wait_queue_link(wq, kq->kq_wqs);
+	kr = wait_queue_link_noalloc(wq, kq->kq_wqs, wql);
 	if (kr == KERN_SUCCESS) {
-		kqlock(kq);
-		kn->kn_status |= KN_STAYQUEUED;
-		knote_enqueue(kn);
-		kqunlock(kq);
+		knote_markstayqueued(kn);
 		return 0;
 	} else {
-		return ENOMEM;
+		return EINVAL;
 	}
 }
 
@@ -2328,17 +2455,21 @@ knote_link_wait_queue(struct knote *kn, struct wait_queue *wq)
  *
  * Note that the unlink may have already happened from the other side, so
  * ignore any failures to unlink and just remove it from the kqueue list.
+ *
+ * On success, caller is responsible for the link structure
  */
-void
-knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq)
+int
+knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t *wqlp)
 {
 	struct kqueue *kq = kn->kn_kq;
+	kern_return_t kr;
 
-	(void) wait_queue_unlink(wq, kq->kq_wqs);
+	kr = wait_queue_unlink_nofree(wq, kq->kq_wqs, wqlp);
 	kqlock(kq);
 	kn->kn_status &= ~KN_STAYQUEUED;
 	knote_dequeue(kn);
 	kqunlock(kq);
+	return (kr != KERN_SUCCESS) ? EINVAL : 0;
 }
 
 /*
@@ -2388,7 +2519,7 @@ knote_fdclose(struct proc *p, int fd)
 
 /* proc_fdlock held on entry (and exit) */
 static int
-knote_fdpattach(struct knote *kn, struct filedesc *fdp, __unused struct proc *p)
+knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p)
 {
 	struct klist *list = NULL;
 
@@ -2401,10 +2532,18 @@ knote_fdpattach(struct knote *kn, struct filedesc *fdp, __unused struct proc *p)
 		if ((u_int)fdp->fd_knlistsize <= kn->kn_id) {
 			u_int size = 0;
 
+			if (kn->kn_id >= (uint64_t)p->p_rlimit[RLIMIT_NOFILE].rlim_cur 
+			    || kn->kn_id >= (uint64_t)maxfiles)
+				return (EINVAL);
+		
 			/* have to grow the fd_knlist */
 			size = fdp->fd_knlistsize;
 			while (size <= kn->kn_id)
 				size += KQEXTENT;
+
+			if (size >= (UINT_MAX/sizeof(struct klist *)))
+				return (EINVAL);
+
 			MALLOC(list, struct klist *,
 			       size * sizeof(struct klist *), M_KQUEUE, M_WAITOK);
 			if (list == NULL)
@@ -2531,6 +2670,11 @@ knote_init(void)
 
 	/* Initialize the timer filter lock */
 	lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr);
+	
+#if VM_PRESSURE_EVENTS
+	/* Initialize the vm pressure list lock */
+	vm_pressure_init(kq_lck_grp, kq_lck_attr);
+#endif
 }
 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
 
@@ -2843,3 +2987,12 @@ fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo)
 	return(0);
 }
 
+
+void
+knote_markstayqueued(struct knote *kn)
+{
+	kqlock(kn->kn_kq);
+	kn->kn_status |= KN_STAYQUEUED;
+	knote_enqueue(kn);
+	kqunlock(kn->kn_kq);
+}