X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/b7266188b87f3620ec3f9f717e57194a7dd989fe..db6096698656d32db7df630594bd9617ee54f828:/bsd/kern/kern_event.c diff --git a/bsd/kern/kern_event.c b/bsd/kern/kern_event.c index 5d195dcf0..ba269074a 100644 --- a/bsd/kern/kern_event.c +++ b/bsd/kern/kern_event.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2011 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -92,6 +92,12 @@ #include #include "net/net_str_id.h" +#include + +#if VM_PRESSURE_EVENTS +#include +#endif + MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); #define KQ_EVENT NULL @@ -140,6 +146,8 @@ static void kevent_continue(struct kqueue *kq, void *data, int error); static void kqueue_scan_continue(void *contp, wait_result_t wait_result); static int kqueue_process(struct kqueue *kq, kevent_callback_t callback, void *data, int *countp, struct proc *p); +static int kqueue_begin_processing(struct kqueue *kq); +static void kqueue_end_processing(struct kqueue *kq); static int knote_process(struct knote *kn, kevent_callback_t callback, void *data, struct kqtailq *inprocessp, struct proc *p); static void knote_put(struct knote *kn); @@ -183,6 +191,17 @@ static struct filterops proc_filtops = { .f_event = filt_proc, }; +#if VM_PRESSURE_EVENTS +static int filt_vmattach(struct knote *kn); +static void filt_vmdetach(struct knote *kn); +static int filt_vm(struct knote *kn, long hint); +static struct filterops vm_filtops = { + .f_attach = filt_vmattach, + .f_detach = filt_vmdetach, + .f_event = filt_vm, +}; +#endif /* VM_PRESSURE_EVENTS */ + extern struct filterops fs_filtops; extern struct filterops sig_filtops; @@ -238,11 +257,6 @@ static struct filterops user_filtops = { .f_touch = filt_usertouch, }; -#if CONFIG_AUDIT -/* Audit session filter */ -extern struct filterops audit_session_filtops; -#endif - /* * Table for for all system-defined filters. */ @@ -261,11 +275,13 @@ static struct filterops *sysfilt_ops[] = { &machport_filtops, /* EVFILT_MACHPORT */ &fs_filtops, /* EVFILT_FS */ &user_filtops, /* EVFILT_USER */ -#if CONFIG_AUDIT - &audit_session_filtops, /* EVFILT_SESSION */ + &bad_filtops, /* unused */ +#if VM_PRESSURE_EVENTS + &vm_filtops, /* EVFILT_VM */ #else - &bad_filtops, + &bad_filtops, /* EVFILT_VM */ #endif + &file_filtops, /* EVFILT_SOCK */ }; /* @@ -466,6 +482,23 @@ filt_procattach(struct knote *kn) return (ESRCH); } + const int NoteExitStatusBits = NOTE_EXIT | NOTE_EXITSTATUS; + + if ((kn->kn_sfflags & NoteExitStatusBits) == NoteExitStatusBits) + do { + pid_t selfpid = proc_selfpid(); + + if (p->p_ppid == selfpid) + break; /* parent => ok */ + + if ((p->p_lflag & P_LTRACED) != 0 && + (p->p_oppid == selfpid)) + break; /* parent-in-waiting => ok */ + + proc_rele(p); + return (EACCES); + } while (0); + proc_klist_lock(); kn->kn_flags |= EV_CLEAR; /* automatically set */ @@ -524,12 +557,67 @@ filt_proc(struct knote *kn, long hint) if (event == NOTE_REAP || (event == NOTE_EXIT && !(kn->kn_sfflags & NOTE_REAP))) { kn->kn_flags |= (EV_EOF | EV_ONESHOT); } + if ((event == NOTE_EXIT) && ((kn->kn_sfflags & NOTE_EXITSTATUS) != 0)) { + kn->kn_fflags |= NOTE_EXITSTATUS; + kn->kn_data = (hint & NOTE_PDATAMASK); + } + if ((event == NOTE_RESOURCEEND) && ((kn->kn_sfflags & NOTE_RESOURCEEND) != 0)) { + kn->kn_fflags |= NOTE_RESOURCEEND; + kn->kn_data = (hint & NOTE_PDATAMASK); + } +#if CONFIG_EMBEDDED + /* If the event is one of the APPSTATE events,remove the rest */ + if (((event & NOTE_APPALLSTATES) != 0) && ((kn->kn_sfflags & NOTE_APPALLSTATES) != 0)) { + /* only one state at a time */ + kn->kn_fflags &= ~NOTE_APPALLSTATES; + kn->kn_fflags |= event; + } +#endif /* CONFIG_EMBEDDED */ } /* atomic check, no locking need when called from above */ return (kn->kn_fflags != 0); } +#if VM_PRESSURE_EVENTS +/* + * Virtual memory kevents + * + * author: Matt Jacobson [matthew_jacobson@apple.com] + */ + +static int +filt_vmattach(struct knote *kn) +{ + /* + * The note will be cleared once the information has been flushed to the client. + * If there is still pressure, we will be re-alerted. + */ + kn->kn_flags |= EV_CLEAR; + + return vm_knote_register(kn); +} + +static void +filt_vmdetach(struct knote *kn) +{ + vm_knote_unregister(kn); +} + +static int +filt_vm(struct knote *kn, long hint) +{ + /* hint == 0 means this is just an alive? check (always true) */ + if (hint != 0) { + const pid_t pid = (pid_t)hint; + if ((kn->kn_sfflags & NOTE_VM_PRESSURE) && (kn->kn_kq->kq_p->p_pid == pid)) { + kn->kn_fflags |= NOTE_VM_PRESSURE; + } + } + + return (kn->kn_fflags != 0); +} +#endif /* VM_PRESSURE_EVENTS */ /* * filt_timervalidate - process data from user @@ -872,7 +960,7 @@ filt_userattach(struct knote *kn) { /* EVFILT_USER knotes are not attached to anything in the kernel */ kn->kn_hook = NULL; - if (kn->kn_fflags & NOTE_TRIGGER || kn->kn_flags & EV_TRIGGER) { + if (kn->kn_fflags & NOTE_TRIGGER) { kn->kn_hookid = 1; } else { kn->kn_hookid = 0; @@ -895,10 +983,10 @@ filt_user(struct knote *kn, __unused long hint) static void filt_usertouch(struct knote *kn, struct kevent64_s *kev, long type) { - int ffctrl; + uint32_t ffctrl; switch (type) { case EVENT_REGISTER: - if (kev->fflags & NOTE_TRIGGER || kev->flags & EV_TRIGGER) { + if (kev->fflags & NOTE_TRIGGER) { kn->kn_hookid = 1; } @@ -1511,6 +1599,7 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc error = fops->f_attach(kn); kqlock(kq); + if (error != 0) { /* * Failed to attach correctly, so drop. @@ -1594,11 +1683,6 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc */ if (!fops->f_isfd && fops->f_touch != NULL) fops->f_touch(kn, kev, EVENT_REGISTER); - - /* We may need to push some info down to a networked filesystem */ - if (kn->kn_filter == EVFILT_VNODE) { - vnode_knoteupdate(kn); - } } /* still have use ref on knote */ @@ -1770,6 +1854,47 @@ knote_process(struct knote *kn, return error; } +/* + * Return 0 to indicate that processing should proceed, + * -1 if there is nothing to process. + * + * Called with kqueue locked and returns the same way, + * but may drop lock temporarily. + */ +static int +kqueue_begin_processing(struct kqueue *kq) +{ + for (;;) { + if (kq->kq_count == 0) { + return -1; + } + + /* if someone else is processing the queue, wait */ + if (kq->kq_nprocess != 0) { + wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_UNINT, 0); + kq->kq_state |= KQ_PROCWAIT; + kqunlock(kq); + thread_block(THREAD_CONTINUE_NULL); + kqlock(kq); + } else { + kq->kq_nprocess = 1; + return 0; + } + } +} + +/* + * Called with kqueue lock held. + */ +static void +kqueue_end_processing(struct kqueue *kq) +{ + kq->kq_nprocess = 0; + if (kq->kq_state & KQ_PROCWAIT) { + kq->kq_state &= ~KQ_PROCWAIT; + wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_AWAKENED); + } +} /* * kqueue_process - process the triggered events in a kqueue @@ -1799,23 +1924,13 @@ kqueue_process(struct kqueue *kq, int error; TAILQ_INIT(&inprocess); - restart: - if (kq->kq_count == 0) { + + if (kqueue_begin_processing(kq) == -1) { *countp = 0; + /* Nothing to process */ return 0; } - /* if someone else is processing the queue, wait */ - if (hw_atomic_add(&kq->kq_nprocess, 1) != 1) { - hw_atomic_sub(&kq->kq_nprocess, 1); - wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_UNINT, 0); - kq->kq_state |= KQ_PROCWAIT; - kqunlock(kq); - thread_block(THREAD_CONTINUE_NULL); - kqlock(kq); - goto restart; - } - /* * Clear any pre-posted status from previous runs, so we only * detect events that occur during this run. @@ -1850,11 +1965,8 @@ kqueue_process(struct kqueue *kq, kn->kn_tq = &kq->kq_head; TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); } - hw_atomic_sub(&kq->kq_nprocess, 1); - if (kq->kq_state & KQ_PROCWAIT) { - kq->kq_state &= ~KQ_PROCWAIT; - wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_AWAKENED); - } + + kqueue_end_processing(kq); *countp = nevents; return error; @@ -2044,11 +2156,15 @@ static int kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx) { struct kqueue *kq = (struct kqueue *)fp->f_data; - int again; - + struct knote *kn; + struct kqtailq inprocessq; + int retnum = 0; + if (which != FREAD) return 0; + TAILQ_INIT(&inprocessq); + kqlock(kq); /* * If this is the first pass, link the wait queue associated with the @@ -2067,11 +2183,12 @@ kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t (wait_queue_link_t)wql); } - retry: - again = 0; - if (kq->kq_count != 0) { - struct knote *kn; + if (kqueue_begin_processing(kq) == -1) { + kqunlock(kq); + return 0; + } + if (kq->kq_count != 0) { /* * there is something queued - but it might be a * KN_STAYQUEUED knote, which may or may not have @@ -2079,31 +2196,42 @@ kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t * list of knotes to see, and peek at the stay- * queued ones to be really sure. */ - TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { - int retnum = 0; - if ((kn->kn_status & KN_STAYQUEUED) == 0 || - (retnum = kn->kn_fop->f_peek(kn)) > 0) { - kqunlock(kq); - return 1; + while ((kn = (struct knote*)TAILQ_FIRST(&kq->kq_head)) != NULL) { + if ((kn->kn_status & KN_STAYQUEUED) == 0) { + retnum = 1; + goto out; } - if (retnum < 0) - again++; + + TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); + TAILQ_INSERT_TAIL(&inprocessq, kn, kn_tqe); + + if (kqlock2knoteuse(kq, kn)) { + unsigned peek; + + peek = kn->kn_fop->f_peek(kn); + if (knoteuse2kqlock(kq, kn)) { + if (peek > 0) { + retnum = 1; + goto out; + } + } else { + retnum = 0; + } + } } } - /* - * If we stumbled across a knote that couldn't be peeked at, - * we have to drop the kq lock and try again. - */ - if (again > 0) { - kqunlock(kq); - mutex_pause(0); - kqlock(kq); - goto retry; +out: + /* Return knotes to active queue */ + while ((kn = TAILQ_FIRST(&inprocessq)) != NULL) { + TAILQ_REMOVE(&inprocessq, kn, kn_tqe); + kn->kn_tq = &kq->kq_head; + TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); } + kqueue_end_processing(kq); kqunlock(kq); - return 0; + return retnum; } /* @@ -2303,22 +2431,21 @@ knote_detach(struct klist *list, struct knote *kn) * we permanently enqueue them here. * * kqueue and knote references are held by caller. + * + * caller provides the wait queue link structure. */ int -knote_link_wait_queue(struct knote *kn, struct wait_queue *wq) +knote_link_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t wql) { struct kqueue *kq = kn->kn_kq; kern_return_t kr; - kr = wait_queue_link(wq, kq->kq_wqs); + kr = wait_queue_link_noalloc(wq, kq->kq_wqs, wql); if (kr == KERN_SUCCESS) { - kqlock(kq); - kn->kn_status |= KN_STAYQUEUED; - knote_enqueue(kn); - kqunlock(kq); + knote_markstayqueued(kn); return 0; } else { - return ENOMEM; + return EINVAL; } } @@ -2328,17 +2455,21 @@ knote_link_wait_queue(struct knote *kn, struct wait_queue *wq) * * Note that the unlink may have already happened from the other side, so * ignore any failures to unlink and just remove it from the kqueue list. + * + * On success, caller is responsible for the link structure */ -void -knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq) +int +knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t *wqlp) { struct kqueue *kq = kn->kn_kq; + kern_return_t kr; - (void) wait_queue_unlink(wq, kq->kq_wqs); + kr = wait_queue_unlink_nofree(wq, kq->kq_wqs, wqlp); kqlock(kq); kn->kn_status &= ~KN_STAYQUEUED; knote_dequeue(kn); kqunlock(kq); + return (kr != KERN_SUCCESS) ? EINVAL : 0; } /* @@ -2388,7 +2519,7 @@ knote_fdclose(struct proc *p, int fd) /* proc_fdlock held on entry (and exit) */ static int -knote_fdpattach(struct knote *kn, struct filedesc *fdp, __unused struct proc *p) +knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p) { struct klist *list = NULL; @@ -2401,10 +2532,18 @@ knote_fdpattach(struct knote *kn, struct filedesc *fdp, __unused struct proc *p) if ((u_int)fdp->fd_knlistsize <= kn->kn_id) { u_int size = 0; + if (kn->kn_id >= (uint64_t)p->p_rlimit[RLIMIT_NOFILE].rlim_cur + || kn->kn_id >= (uint64_t)maxfiles) + return (EINVAL); + /* have to grow the fd_knlist */ size = fdp->fd_knlistsize; while (size <= kn->kn_id) size += KQEXTENT; + + if (size >= (UINT_MAX/sizeof(struct klist *))) + return (EINVAL); + MALLOC(list, struct klist *, size * sizeof(struct klist *), M_KQUEUE, M_WAITOK); if (list == NULL) @@ -2531,6 +2670,11 @@ knote_init(void) /* Initialize the timer filter lock */ lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr); + +#if VM_PRESSURE_EVENTS + /* Initialize the vm pressure list lock */ + vm_pressure_init(kq_lck_grp, kq_lck_attr); +#endif } SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL) @@ -2843,3 +2987,12 @@ fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo) return(0); } + +void +knote_markstayqueued(struct knote *kn) +{ + kqlock(kn->kn_kq); + kn->kn_status |= KN_STAYQUEUED; + knote_enqueue(kn); + kqunlock(kn->kn_kq); +}