+ filt_timerlock();
+
+ callout = (thread_call_t)kn->kn_hook;
+ filt_timercancel(kn);
+
+ filt_timerunlock();
+
+ thread_call_free(callout);
+}
+
+
+
+static int
+filt_timer(struct knote *kn, long hint)
+{
+ int result;
+
+ if (hint) {
+ /* real timer pop -- timer lock held by filt_timerexpire */
+ kn->kn_data++;
+
+ if (((kn->kn_hookid & TIMER_CANCELWAIT) == 0) &&
+ ((kn->kn_flags & EV_ONESHOT) == 0)) {
+
+ /* evaluate next time to fire */
+ filt_timerupdate(kn);
+
+ if (kn->kn_ext[0]) {
+ unsigned int timer_flags = 0;
+
+ /* keep the callout and re-arm */
+ if (kn->kn_sfflags & NOTE_CRITICAL)
+ timer_flags |= THREAD_CALL_DELAY_USER_CRITICAL;
+ else if (kn->kn_sfflags & NOTE_BACKGROUND)
+ timer_flags |= THREAD_CALL_DELAY_USER_BACKGROUND;
+ else
+ timer_flags |= THREAD_CALL_DELAY_USER_NORMAL;
+
+ if (kn->kn_sfflags & NOTE_LEEWAY)
+ timer_flags |= THREAD_CALL_DELAY_LEEWAY;
+
+ thread_call_enter_delayed_with_leeway(kn->kn_hook, NULL,
+ kn->kn_ext[0], kn->kn_ext[1], timer_flags);
+
+ kn->kn_hookid |= TIMER_RUNNING;
+ }
+ }
+
+ return (1);
+ }
+
+ /* user-query */
+ filt_timerlock();
+
+ result = (kn->kn_data != 0);
+
+ filt_timerunlock();
+
+ return (result);
+}
+
+
+/*
+ * filt_timertouch - update knote with new user input
+ *
+ * Cancel and restart the timer based on new user data. When
+ * the user picks up a knote, clear the count of how many timer
+ * pops have gone off (in kn_data).
+ */
+static void
+filt_timertouch(struct knote *kn, struct kevent64_s *kev, long type)
+{
+ int error;
+ filt_timerlock();
+
+ switch (type) {
+ case EVENT_REGISTER:
+ /* cancel current call */
+ filt_timercancel(kn);
+
+ /* recalculate deadline */
+ kn->kn_sdata = kev->data;
+ kn->kn_sfflags = kev->fflags;
+ kn->kn_ext[0] = kev->ext[0];
+ kn->kn_ext[1] = kev->ext[1];
+
+ error = filt_timervalidate(kn);
+ if (error) {
+ /* no way to report error, so mark it in the knote */
+ kn->kn_flags |= EV_ERROR;
+ kn->kn_data = error;
+ break;
+ }
+
+ /* start timer if necessary */
+ filt_timerupdate(kn);
+
+ if (kn->kn_ext[0]) {
+ unsigned int timer_flags = 0;
+ if (kn->kn_sfflags & NOTE_CRITICAL)
+ timer_flags |= THREAD_CALL_DELAY_USER_CRITICAL;
+ else if (kn->kn_sfflags & NOTE_BACKGROUND)
+ timer_flags |= THREAD_CALL_DELAY_USER_BACKGROUND;
+ else
+ timer_flags |= THREAD_CALL_DELAY_USER_NORMAL;
+
+ if (kn->kn_sfflags & NOTE_LEEWAY)
+ timer_flags |= THREAD_CALL_DELAY_LEEWAY;
+
+ thread_call_enter_delayed_with_leeway(kn->kn_hook, NULL,
+ kn->kn_ext[0], kn->kn_ext[1], timer_flags);
+
+ kn->kn_hookid |= TIMER_RUNNING;
+ } else {
+ /* pretend the timer has fired */
+ kn->kn_data = 1;
+ }
+
+ break;
+
+ case EVENT_PROCESS:
+ /* reset the timer pop count in kn_data */
+ *kev = kn->kn_kevent;
+ kev->ext[0] = 0;
+ kn->kn_data = 0;
+ if (kn->kn_flags & EV_CLEAR)
+ kn->kn_fflags = 0;
+ break;
+ default:
+ panic("%s: - invalid type (%ld)", __func__, type);
+ break;
+ }
+
+ filt_timerunlock();
+}
+
+static void
+filt_timerlock(void)
+{
+ lck_mtx_lock(&_filt_timerlock);
+}
+
+static void
+filt_timerunlock(void)
+{
+ lck_mtx_unlock(&_filt_timerlock);
+}
+
+static int
+filt_userattach(struct knote *kn)
+{
+ /* EVFILT_USER knotes are not attached to anything in the kernel */
+ kn->kn_hook = NULL;
+ if (kn->kn_fflags & NOTE_TRIGGER) {
+ kn->kn_hookid = 1;
+ } else {
+ kn->kn_hookid = 0;
+ }
+ return (0);
+}
+
+static void
+filt_userdetach(__unused struct knote *kn)
+{
+ /* EVFILT_USER knotes are not attached to anything in the kernel */
+}
+
+static int
+filt_user(struct knote *kn, __unused long hint)
+{
+ return (kn->kn_hookid);
+}
+
+static void
+filt_usertouch(struct knote *kn, struct kevent64_s *kev, long type)
+{
+ uint32_t ffctrl;
+ switch (type) {
+ case EVENT_REGISTER:
+ if (kev->fflags & NOTE_TRIGGER) {
+ kn->kn_hookid = 1;
+ }
+
+ ffctrl = kev->fflags & NOTE_FFCTRLMASK;
+ kev->fflags &= NOTE_FFLAGSMASK;
+ switch (ffctrl) {
+ case NOTE_FFNOP:
+ break;
+ case NOTE_FFAND:
+ OSBitAndAtomic(kev->fflags, &kn->kn_sfflags);
+ break;
+ case NOTE_FFOR:
+ OSBitOrAtomic(kev->fflags, &kn->kn_sfflags);
+ break;
+ case NOTE_FFCOPY:
+ kn->kn_sfflags = kev->fflags;
+ break;
+ }
+ kn->kn_sdata = kev->data;
+ break;
+ case EVENT_PROCESS:
+ *kev = kn->kn_kevent;
+ kev->fflags = (volatile UInt32)kn->kn_sfflags;
+ kev->data = kn->kn_sdata;
+ if (kn->kn_flags & EV_CLEAR) {
+ kn->kn_hookid = 0;
+ kn->kn_data = 0;
+ kn->kn_fflags = 0;
+ }
+ break;
+ default:
+ panic("%s: - invalid type (%ld)", __func__, type);
+ break;
+ }
+}
+
+/*
+ * JMM - placeholder for not-yet-implemented filters
+ */
+static int
+filt_badattach(__unused struct knote *kn)
+{
+ return (ENOTSUP);
+}
+
+struct kqueue *
+kqueue_alloc(struct proc *p)
+{
+ struct filedesc *fdp = p->p_fd;
+ struct kqueue *kq;
+
+ MALLOC_ZONE(kq, struct kqueue *, sizeof (struct kqueue), M_KQUEUE,
+ M_WAITOK);
+ if (kq != NULL) {
+ wait_queue_set_t wqs;
+
+ wqs = wait_queue_set_alloc(SYNC_POLICY_FIFO |
+ SYNC_POLICY_PREPOST);
+ if (wqs != NULL) {
+ bzero(kq, sizeof (struct kqueue));
+ lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr);
+ TAILQ_INIT(&kq->kq_head);
+ kq->kq_wqs = wqs;
+ kq->kq_p = p;
+ } else {
+ FREE_ZONE(kq, sizeof (struct kqueue), M_KQUEUE);
+ kq = NULL;
+ }
+ }
+
+ if (fdp->fd_knlistsize < 0) {
+ proc_fdlock(p);
+ if (fdp->fd_knlistsize < 0)
+ fdp->fd_knlistsize = 0; /* this process has had a kq */
+ proc_fdunlock(p);
+ }
+
+ return (kq);
+}
+
+/*
+ * kqueue_dealloc - detach all knotes from a kqueue and free it
+ *
+ * We walk each list looking for knotes referencing this
+ * this kqueue. If we find one, we try to drop it. But
+ * if we fail to get a drop reference, that will wait
+ * until it is dropped. So, we can just restart again
+ * safe in the assumption that the list will eventually
+ * not contain any more references to this kqueue (either
+ * we dropped them all, or someone else did).
+ *
+ * Assumes no new events are being added to the kqueue.
+ * Nothing locked on entry or exit.
+ */
+void
+kqueue_dealloc(struct kqueue *kq)
+{
+ struct proc *p = kq->kq_p;
+ struct filedesc *fdp = p->p_fd;
+ struct knote *kn;
+ int i;
+
+ proc_fdlock(p);
+ for (i = 0; i < fdp->fd_knlistsize; i++) {
+ kn = SLIST_FIRST(&fdp->fd_knlist[i]);
+ while (kn != NULL) {
+ if (kq == kn->kn_kq) {
+ kqlock(kq);
+ proc_fdunlock(p);
+ /* drop it ourselves or wait */
+ if (kqlock2knotedrop(kq, kn)) {
+ kn->kn_fop->f_detach(kn);
+ knote_drop(kn, p);
+ }
+ proc_fdlock(p);
+ /* start over at beginning of list */
+ kn = SLIST_FIRST(&fdp->fd_knlist[i]);
+ continue;
+ }
+ kn = SLIST_NEXT(kn, kn_link);
+ }
+ }
+ if (fdp->fd_knhashmask != 0) {
+ for (i = 0; i < (int)fdp->fd_knhashmask + 1; i++) {
+ kn = SLIST_FIRST(&fdp->fd_knhash[i]);
+ while (kn != NULL) {
+ if (kq == kn->kn_kq) {
+ kqlock(kq);
+ proc_fdunlock(p);
+ /* drop it ourselves or wait */
+ if (kqlock2knotedrop(kq, kn)) {
+ kn->kn_fop->f_detach(kn);
+ knote_drop(kn, p);
+ }
+ proc_fdlock(p);
+ /* start over at beginning of list */
+ kn = SLIST_FIRST(&fdp->fd_knhash[i]);
+ continue;
+ }
+ kn = SLIST_NEXT(kn, kn_link);
+ }
+ }
+ }
+ proc_fdunlock(p);
+
+ /*
+ * before freeing the wait queue set for this kqueue,
+ * make sure it is unlinked from all its containing (select) sets.
+ */
+ wait_queue_unlink_all((wait_queue_t)kq->kq_wqs);
+ wait_queue_set_free(kq->kq_wqs);
+ lck_spin_destroy(&kq->kq_lock, kq_lck_grp);
+ FREE_ZONE(kq, sizeof (struct kqueue), M_KQUEUE);
+}
+
+int
+kqueue_body(struct proc *p, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval)
+{
+ struct kqueue *kq;
+ struct fileproc *fp;
+ int fd, error;
+
+ error = falloc_withalloc(p,
+ &fp, &fd, vfs_context_current(), fp_zalloc, cra);
+ if (error) {
+ return (error);
+ }
+
+ kq = kqueue_alloc(p);
+ if (kq == NULL) {
+ fp_free(p, fd, fp);
+ return (ENOMEM);
+ }
+
+ fp->f_flag = FREAD | FWRITE;
+ fp->f_ops = &kqueueops;
+ fp->f_data = kq;
+
+ proc_fdlock(p);
+ *fdflags(p, fd) |= UF_EXCLOSE;
+ procfdtbl_releasefd(p, fd, NULL);
+ fp_drop(p, fd, fp, 1);
+ proc_fdunlock(p);
+
+ *retval = fd;
+ return (error);
+}
+
+int
+kqueue(struct proc *p, __unused struct kqueue_args *uap, int32_t *retval)
+{
+ return (kqueue_body(p, fileproc_alloc_init, NULL, retval));
+}
+
+static int
+kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp, struct proc *p,
+ int iskev64)
+{
+ int advance;
+ int error;
+
+ if (iskev64) {
+ advance = sizeof (struct kevent64_s);
+ error = copyin(*addrp, (caddr_t)kevp, advance);
+ } else if (IS_64BIT_PROCESS(p)) {
+ struct user64_kevent kev64;
+ bzero(kevp, sizeof (struct kevent64_s));
+
+ advance = sizeof (kev64);
+ error = copyin(*addrp, (caddr_t)&kev64, advance);
+ if (error)
+ return (error);
+ kevp->ident = kev64.ident;
+ kevp->filter = kev64.filter;
+ kevp->flags = kev64.flags;
+ kevp->fflags = kev64.fflags;
+ kevp->data = kev64.data;
+ kevp->udata = kev64.udata;
+ } else {
+ struct user32_kevent kev32;
+ bzero(kevp, sizeof (struct kevent64_s));
+
+ advance = sizeof (kev32);
+ error = copyin(*addrp, (caddr_t)&kev32, advance);
+ if (error)
+ return (error);
+ kevp->ident = (uintptr_t)kev32.ident;
+ kevp->filter = kev32.filter;
+ kevp->flags = kev32.flags;
+ kevp->fflags = kev32.fflags;
+ kevp->data = (intptr_t)kev32.data;
+ kevp->udata = CAST_USER_ADDR_T(kev32.udata);
+ }
+ if (!error)
+ *addrp += advance;
+ return (error);
+}
+
+static int
+kevent_copyout(struct kevent64_s *kevp, user_addr_t *addrp, struct proc *p,
+ int iskev64)
+{
+ int advance;
+ int error;
+
+ if (iskev64) {
+ advance = sizeof (struct kevent64_s);
+ error = copyout((caddr_t)kevp, *addrp, advance);
+ } else if (IS_64BIT_PROCESS(p)) {
+ struct user64_kevent kev64;
+
+ /*
+ * deal with the special case of a user-supplied
+ * value of (uintptr_t)-1.
+ */
+ kev64.ident = (kevp->ident == (uintptr_t)-1) ?
+ (uint64_t)-1LL : (uint64_t)kevp->ident;
+
+ kev64.filter = kevp->filter;
+ kev64.flags = kevp->flags;
+ kev64.fflags = kevp->fflags;
+ kev64.data = (int64_t) kevp->data;
+ kev64.udata = kevp->udata;
+ advance = sizeof (kev64);
+ error = copyout((caddr_t)&kev64, *addrp, advance);
+ } else {
+ struct user32_kevent kev32;
+
+ kev32.ident = (uint32_t)kevp->ident;
+ kev32.filter = kevp->filter;
+ kev32.flags = kevp->flags;
+ kev32.fflags = kevp->fflags;
+ kev32.data = (int32_t)kevp->data;
+ kev32.udata = kevp->udata;
+ advance = sizeof (kev32);
+ error = copyout((caddr_t)&kev32, *addrp, advance);
+ }
+ if (!error)
+ *addrp += advance;
+ return (error);
+}
+
+/*
+ * kevent_continue - continue a kevent syscall after blocking
+ *
+ * assume we inherit a use count on the kq fileglob.
+ */
+
+static void
+kevent_continue(__unused struct kqueue *kq, void *data, int error)
+{
+ struct _kevent *cont_args;
+ struct fileproc *fp;
+ int32_t *retval;
+ int noutputs;
+ int fd;
+ struct proc *p = current_proc();
+
+ cont_args = (struct _kevent *)data;
+ noutputs = cont_args->eventout;
+ retval = cont_args->retval;
+ fd = cont_args->fd;
+ fp = cont_args->fp;
+
+ fp_drop(p, fd, fp, 0);
+
+ /* don't restart after signals... */
+ if (error == ERESTART)
+ error = EINTR;
+ else if (error == EWOULDBLOCK)
+ error = 0;
+ if (error == 0)
+ *retval = noutputs;
+ unix_syscall_return(error);
+}
+
+/*
+ * kevent - [syscall] register and wait for kernel events
+ *
+ */
+int
+kevent(struct proc *p, struct kevent_args *uap, int32_t *retval)
+{
+ return (kevent_internal(p,
+ 0,
+ uap->changelist,
+ uap->nchanges,
+ uap->eventlist,
+ uap->nevents,
+ uap->fd,
+ uap->timeout,
+ 0, /* no flags from old kevent() call */
+ retval));
+}
+
+int
+kevent64(struct proc *p, struct kevent64_args *uap, int32_t *retval)
+{
+ return (kevent_internal(p,
+ 1,
+ uap->changelist,
+ uap->nchanges,
+ uap->eventlist,
+ uap->nevents,
+ uap->fd,
+ uap->timeout,
+ uap->flags,
+ retval));
+}
+
+static int
+kevent_internal(struct proc *p, int iskev64, user_addr_t changelist,
+ int nchanges, user_addr_t ueventlist, int nevents, int fd,
+ user_addr_t utimeout, __unused unsigned int flags,
+ int32_t *retval)
+{
+ struct _kevent *cont_args;
+ uthread_t ut;
+ struct kqueue *kq;
+ struct fileproc *fp;
+ struct kevent64_s kev;
+ int error, noutputs;
+ struct timeval atv;
+
+ /* convert timeout to absolute - if we have one */
+ if (utimeout != USER_ADDR_NULL) {
+ struct timeval rtv;
+ if (IS_64BIT_PROCESS(p)) {
+ struct user64_timespec ts;
+ error = copyin(utimeout, &ts, sizeof(ts));
+ if ((ts.tv_sec & 0xFFFFFFFF00000000ull) != 0)
+ error = EINVAL;
+ else
+ TIMESPEC_TO_TIMEVAL(&rtv, &ts);
+ } else {
+ struct user32_timespec ts;
+ error = copyin(utimeout, &ts, sizeof(ts));
+ TIMESPEC_TO_TIMEVAL(&rtv, &ts);
+ }
+ if (error)
+ return (error);
+ if (itimerfix(&rtv))
+ return (EINVAL);
+ getmicrouptime(&atv);
+ timevaladd(&atv, &rtv);
+ } else {
+ atv.tv_sec = 0;
+ atv.tv_usec = 0;
+ }
+
+ /* get a usecount for the kq itself */
+ if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0)
+ return (error);
+
+ /* each kq should only be used for events of one type */
+ kqlock(kq);
+ if (kq->kq_state & (KQ_KEV32 | KQ_KEV64)) {
+ if (((iskev64 && (kq->kq_state & KQ_KEV32)) ||
+ (!iskev64 && (kq->kq_state & KQ_KEV64)))) {
+ error = EINVAL;
+ kqunlock(kq);
+ goto errorout;
+ }
+ } else {
+ kq->kq_state |= (iskev64 ? KQ_KEV64 : KQ_KEV32);
+ }
+ kqunlock(kq);
+
+ /* register all the change requests the user provided... */
+ noutputs = 0;
+ while (nchanges > 0 && error == 0) {
+ error = kevent_copyin(&changelist, &kev, p, iskev64);
+ if (error)
+ break;
+
+ kev.flags &= ~EV_SYSFLAGS;
+ error = kevent_register(kq, &kev, p);
+ if ((error || (kev.flags & EV_RECEIPT)) && nevents > 0) {
+ kev.flags = EV_ERROR;
+ kev.data = error;
+ error = kevent_copyout(&kev, &ueventlist, p, iskev64);
+ if (error == 0) {
+ nevents--;
+ noutputs++;
+ }
+ }
+ nchanges--;
+ }
+
+ /* store the continuation/completion data in the uthread */
+ ut = (uthread_t)get_bsdthread_info(current_thread());
+ cont_args = &ut->uu_kevent.ss_kevent;
+ cont_args->fp = fp;
+ cont_args->fd = fd;
+ cont_args->retval = retval;
+ cont_args->eventlist = ueventlist;
+ cont_args->eventcount = nevents;
+ cont_args->eventout = noutputs;
+ cont_args->eventsize = iskev64;
+
+ if (nevents > 0 && noutputs == 0 && error == 0)
+ error = kqueue_scan(kq, kevent_callback,
+ kevent_continue, cont_args,
+ &atv, p);
+ kevent_continue(kq, cont_args, error);
+
+errorout:
+ fp_drop(p, fd, fp, 0);
+ return (error);
+}
+
+
+/*
+ * kevent_callback - callback for each individual event
+ *
+ * called with nothing locked
+ * caller holds a reference on the kqueue
+ */
+static int
+kevent_callback(__unused struct kqueue *kq, struct kevent64_s *kevp,
+ void *data)
+{
+ struct _kevent *cont_args;
+ int error;
+ int iskev64;
+
+ cont_args = (struct _kevent *)data;
+ assert(cont_args->eventout < cont_args->eventcount);
+
+ iskev64 = cont_args->eventsize;
+
+ /*
+ * Copy out the appropriate amount of event data for this user.
+ */
+ error = kevent_copyout(kevp, &cont_args->eventlist, current_proc(),
+ iskev64);
+
+ /*
+ * If there isn't space for additional events, return
+ * a harmless error to stop the processing here
+ */
+ if (error == 0 && ++cont_args->eventout == cont_args->eventcount)
+ error = EWOULDBLOCK;
+ return (error);
+}
+
+/*
+ * kevent_description - format a description of a kevent for diagnostic output
+ *
+ * called with a 128-byte string buffer
+ */
+
+char *
+kevent_description(struct kevent64_s *kevp, char *s, size_t n)
+{
+ snprintf(s, n,
+ "kevent="
+ "{.ident=%#llx, .filter=%d, .flags=%#x, .fflags=%#x, .data=%#llx, .udata=%#llx, .ext[0]=%#llx, .ext[1]=%#llx}",
+ kevp->ident,
+ kevp->filter,
+ kevp->flags,
+ kevp->fflags,
+ kevp->data,
+ kevp->udata,
+ kevp->ext[0],
+ kevp->ext[1]);
+
+ return (s);
+}
+
+/*
+ * kevent_register - add a new event to a kqueue
+ *
+ * Creates a mapping between the event source and
+ * the kqueue via a knote data structure.
+ *
+ * Because many/most the event sources are file
+ * descriptor related, the knote is linked off
+ * the filedescriptor table for quick access.
+ *
+ * called with nothing locked
+ * caller holds a reference on the kqueue
+ */
+
+int
+kevent_register(struct kqueue *kq, struct kevent64_s *kev,
+ __unused struct proc *ctxp)
+{
+ struct proc *p = kq->kq_p;
+ struct filedesc *fdp = p->p_fd;
+ struct filterops *fops;
+ struct fileproc *fp = NULL;
+ struct knote *kn = NULL;
+ int error = 0;
+
+ if (kev->filter < 0) {
+ if (kev->filter + EVFILT_SYSCOUNT < 0)
+ return (EINVAL);