bsd/kern/kern_event.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  *
  22  */
  23 /*-
  24  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
  25  * All rights reserved.
  26  *
  27  * Redistribution and use in source and binary forms, with or without
  28  * modification, are permitted provided that the following conditions
  29  * are met:
  30  * 1. Redistributions of source code must retain the above copyright
  31  *    notice, this list of conditions and the following disclaimer.
  32  * 2. Redistributions in binary form must reproduce the above copyright
  33  *    notice, this list of conditions and the following disclaimer in the
  34  *    documentation and/or other materials provided with the distribution.
  35  *
  36  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  37  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  38  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  39  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  40  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  41  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  42  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  44  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  45  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46  * SUCH DAMAGE.
  47  */
  48 /*
  49  *      @(#)kern_event.c       1.0 (3/31/2000)
  50  */
  51 #include <stdint.h>
  52
  53 #include <sys/param.h>
  54 #include <sys/systm.h>
  55 #include <sys/filedesc.h>
  56 #include <sys/kernel.h>
  57 #include <sys/proc_internal.h>
  58 #include <sys/kauth.h>
  59 #include <sys/malloc.h>
  60 #include <sys/unistd.h>
  61 #include <sys/file_internal.h>
  62 #include <sys/fcntl.h>
  63 #include <sys/select.h>
  64 #include <sys/queue.h>
  65 #include <sys/event.h>
  66 #include <sys/eventvar.h>
  67 #include <sys/protosw.h>
  68 #include <sys/socket.h>
  69 #include <sys/socketvar.h>
  70 #include <sys/stat.h>
  71 #include <sys/sysctl.h>
  72 #include <sys/uio.h>
  73 #include <sys/sysproto.h>
  74 #include <sys/user.h>
  75 #include <string.h>
  76 #include <sys/proc_info.h>
  77
  78 #include <kern/lock.h>
  79 #include <kern/clock.h>
  80 #include <kern/thread_call.h>
  81 #include <kern/sched_prim.h>
  82 #include <kern/zalloc.h>
  83 #include <kern/assert.h>
  84
  85 #include <libkern/libkern.h>
  86
  87 extern void unix_syscall_return(int);
  88
  89 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
  90
  91 static inline void kqlock(struct kqueue *kq);
  92 static inline void kqunlock(struct kqueue *kq);
  93
  94 static int      kqlock2knoteuse(struct kqueue *kq, struct knote *kn);
  95 static int      kqlock2knoteusewait(struct kqueue *kq, struct knote *kn);
  96 static int      kqlock2knotedrop(struct kqueue *kq, struct knote *kn);
  97 static int      knoteuse2kqlock(struct kqueue *kq, struct knote *kn);
  98
  99 static void     kqueue_wakeup(struct kqueue *kq);
 100 static int      kqueue_read(struct fileproc *fp, struct uio *uio,
 101                     kauth_cred_t cred, int flags, struct proc *p);
 102 static int      kqueue_write(struct fileproc *fp, struct uio *uio,
 103                     kauth_cred_t cred, int flags, struct proc *p);
 104 static int      kqueue_ioctl(struct fileproc *fp, u_long com, caddr_t data,
 105                     struct proc *p);
 106 static int      kqueue_select(struct fileproc *fp, int which, void *wql,
 107                     struct proc *p);
 108 static int      kqueue_close(struct fileglob *fp, struct proc *p);
 109 static int      kqueue_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
 110 extern int      kqueue_stat(struct fileproc *fp, struct stat *st, struct proc *p);
 111
 112 static struct fileops kqueueops = {
 113         kqueue_read,
 114         kqueue_write,
 115         kqueue_ioctl,
 116         kqueue_select,
 117         kqueue_close,
 118         kqueue_kqfilter,
 119         0
 120 };
 121
 122 static int kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p);
 123 static int kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p);
 124
 125 static int      kevent_callback(struct kqueue *kq, struct kevent *kevp, void *data);
 126 static void     kevent_continue(struct kqueue *kq, void *data, int error);
 127 static void     kevent_scan_continue(void *contp, wait_result_t wait_result);
 128 static int      kevent_process(struct kqueue *kq, kevent_callback_t callback,
 129                                void *data, int *countp, struct proc *p);
 130 static void     knote_put(struct knote *kn);
 131 static int      knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p);
 132 static void     knote_drop(struct knote *kn, struct proc *p);
 133 static void     knote_activate(struct knote *kn);
 134 static void     knote_deactivate(struct knote *kn);
 135 static void     knote_enqueue(struct knote *kn);
 136 static void     knote_dequeue(struct knote *kn);
 137 static struct   knote *knote_alloc(void);
 138 static void     knote_free(struct knote *kn);
 139 extern void     knote_init(void);
 140
 141 static int      filt_fileattach(struct knote *kn);
 142 static struct filterops file_filtops =
 143         { 1, filt_fileattach, NULL, NULL };
 144
 145 static void     filt_kqdetach(struct knote *kn);
 146 static int      filt_kqueue(struct knote *kn, long hint);
 147 static struct filterops kqread_filtops =
 148         { 1, NULL, filt_kqdetach, filt_kqueue };
 149
 150 /*
 151  * placeholder for not-yet-implemented filters
 152  */
 153 static int      filt_badattach(struct knote *kn);
 154 static struct filterops bad_filtops =
 155         { 0, filt_badattach, 0 , 0 };
 156
 157 static int      filt_procattach(struct knote *kn);
 158 static void     filt_procdetach(struct knote *kn);
 159 static int      filt_proc(struct knote *kn, long hint);
 160
 161 static struct filterops proc_filtops =
 162         { 0, filt_procattach, filt_procdetach, filt_proc };
 163
 164 extern struct filterops fs_filtops;
 165
 166 extern struct filterops sig_filtops;
 167
 168
 169 /* Timer filter */
 170 static int      filt_timercompute(struct knote *kn, uint64_t *abs_time);
 171 static void     filt_timerexpire(void *knx, void *param1);
 172 static int      filt_timerattach(struct knote *kn);
 173 static void     filt_timerdetach(struct knote *kn);
 174 static int      filt_timer(struct knote *kn, long hint);
 175
 176 static struct filterops timer_filtops =
 177         { 0, filt_timerattach, filt_timerdetach, filt_timer };
 178
 179 /* to avoid arming timers that fire quicker than we can handle */
 180 static uint64_t filt_timerfloor = 0;
 181
 182 static lck_mtx_t _filt_timerlock;
 183 static void     filt_timerlock(void);
 184 static void     filt_timerunlock(void);
 185
 186 /*
 187  * Sentinel marker for a thread scanning through the list of
 188  * active knotes.
 189  */
 190 static struct filterops threadmarker_filtops =
 191         { 0, filt_badattach, 0, 0 };
 192
 193 static zone_t   knote_zone;
 194
 195 #define KN_HASHSIZE             64              /* XXX should be tunable */
 196 #define KN_HASH(val, mask)      (((val) ^ (val >> 8)) & (mask))
 197
 198 #if 0
 199 extern struct filterops aio_filtops;
 200 #endif
 201
 202 /*
 203  * Table for for all system-defined filters.
 204  */
 205 static struct filterops *sysfilt_ops[] = {
 206         &file_filtops,                  /* EVFILT_READ */
 207         &file_filtops,                  /* EVFILT_WRITE */
 208 #if 0
 209         &aio_filtops,                   /* EVFILT_AIO */
 210 #else
 211         &bad_filtops,                   /* EVFILT_AIO */
 212 #endif
 213         &file_filtops,                  /* EVFILT_VNODE */
 214         &proc_filtops,                  /* EVFILT_PROC */
 215         &sig_filtops,                   /* EVFILT_SIGNAL */
 216         &timer_filtops,                 /* EVFILT_TIMER */
 217         &bad_filtops,                   /* EVFILT_MACHPORT */
 218         &fs_filtops                     /* EVFILT_FS */
 219 };
 220
 221 /*
 222  * kqueue/note lock attributes and implementations
 223  *
 224  *      kqueues have locks, while knotes have use counts
 225  *      Most of the knote state is guarded by the object lock.
 226  *      the knote "inuse" count and status use the kqueue lock.
 227  */
 228 lck_grp_attr_t * kq_lck_grp_attr;
 229 lck_grp_t * kq_lck_grp;
 230 lck_attr_t * kq_lck_attr;
 231
 232 static inline void
 233 kqlock(struct kqueue *kq)
 234 {
 235         lck_spin_lock(&kq->kq_lock);
 236 }
 237
 238 static inline void
 239 kqunlock(struct kqueue *kq)
 240 {
 241         lck_spin_unlock(&kq->kq_lock);
 242 }
 243
 244 /*
 245  * Convert a kq lock to a knote use referece.
 246  *
 247  *      If the knote is being dropped, we can't get
 248  *      a use reference, so just return with it
 249  *      still locked.
 250  *
 251  *      - kq locked at entry
 252  *      - unlock on exit if we get the use reference
 253  */
 254 static int
 255 kqlock2knoteuse(struct kqueue *kq, struct knote *kn)
 256 {
 257         if (kn->kn_status & KN_DROPPING)
 258                 return 0;
 259         kn->kn_inuse++;
 260         kqunlock(kq);
 261         return 1;
 262  }
 263
 264 /*
 265  * Convert a kq lock to a knote use referece.
 266  *
 267  *      If the knote is being dropped, we can't get
 268  *      a use reference, so just return with it
 269  *      still locked.
 270  *
 271  *      - kq locked at entry
 272  *      - kq always unlocked on exit
 273  */
 274 static int
 275 kqlock2knoteusewait(struct kqueue *kq, struct knote *kn)
 276 {
 277         if (!kqlock2knoteuse(kq, kn)) {
 278                 kn->kn_status |= KN_DROPWAIT;
 279                 assert_wait(&kn->kn_status, THREAD_UNINT);
 280                 kqunlock(kq);
 281                 thread_block(THREAD_CONTINUE_NULL);
 282                 return 0;
 283         }
 284         return 1;
 285  }
 286
 287 /*
 288  * Convert from a knote use reference back to kq lock.
 289  *
 290  *      Drop a use reference and wake any waiters if
 291  *      this is the last one.
 292  *
 293  *      The exit return indicates if the knote is
 294  *      still alive - but the kqueue lock is taken
 295  *      unconditionally.
 296  */
 297 static int
 298 knoteuse2kqlock(struct kqueue *kq, struct knote *kn)
 299 {
 300         kqlock(kq);
 301         if ((--kn->kn_inuse == 0) &&
 302             (kn->kn_status & KN_USEWAIT)) {
 303                 kn->kn_status &= ~KN_USEWAIT;
 304                 thread_wakeup(&kn->kn_inuse);
 305         }
 306         return ((kn->kn_status & KN_DROPPING) == 0);
 307  }
 308
 309 /*
 310  * Convert a kq lock to a knote drop referece.
 311  *
 312  *      If the knote is in use, wait for the use count
 313  *      to subside.  We first mark our intention to drop
 314  *      it - keeping other users from "piling on."
 315  *      If we are too late, we have to wait for the
 316  *      other drop to complete.
 317  *
 318  *      - kq locked at entry
 319  *      - always unlocked on exit.
 320  *      - caller can't hold any locks that would prevent
 321  *        the other dropper from completing.
 322  */
 323 static int
 324 kqlock2knotedrop(struct kqueue *kq, struct knote *kn)
 325 {
 326
 327         if ((kn->kn_status & KN_DROPPING) == 0) {
 328                 kn->kn_status |= KN_DROPPING;
 329                 if (kn->kn_inuse > 0) {
 330                         kn->kn_status |= KN_USEWAIT;
 331                         assert_wait(&kn->kn_inuse, THREAD_UNINT);
 332                         kqunlock(kq);
 333                         thread_block(THREAD_CONTINUE_NULL);
 334                 } else
 335                         kqunlock(kq);
 336                 return 1;
 337         } else {
 338                 kn->kn_status |= KN_DROPWAIT;
 339                 assert_wait(&kn->kn_status, THREAD_UNINT);
 340                 kqunlock(kq);
 341                 thread_block(THREAD_CONTINUE_NULL);
 342                 return 0;
 343         }
 344 }
 345
 346 /*
 347  * Release a knote use count reference.
 348  */
 349 static void
 350 knote_put(struct knote *kn)
 351 {
 352         struct kqueue *kq = kn->kn_kq;
 353
 354         kqlock(kq);
 355         if ((--kn->kn_inuse == 0) &&
 356             (kn->kn_status & KN_USEWAIT)) {
 357                 kn->kn_status &= ~KN_USEWAIT;
 358                 thread_wakeup(&kn->kn_inuse);
 359         }
 360         kqunlock(kq);
 361  }
 362
 363
 364
 365 static int
 366 filt_fileattach(struct knote *kn)
 367 {
 368
 369         return (fo_kqfilter(kn->kn_fp, kn, current_proc()));
 370 }
 371
 372 #define f_flag f_fglob->fg_flag
 373 #define f_type f_fglob->fg_type
 374 #define f_msgcount f_fglob->fg_msgcount
 375 #define f_cred f_fglob->fg_cred
 376 #define f_ops f_fglob->fg_ops
 377 #define f_offset f_fglob->fg_offset
 378 #define f_data f_fglob->fg_data
 379
 380 static void
 381 filt_kqdetach(struct knote *kn)
 382 {
 383         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
 384
 385         kqlock(kq);
 386         KNOTE_DETACH(&kq->kq_sel.si_note, kn);
 387         kqunlock(kq);
 388 }
 389
 390 /*ARGSUSED*/
 391 static int
 392 filt_kqueue(struct knote *kn, __unused long hint)
 393 {
 394         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
 395
 396         kn->kn_data = kq->kq_count;
 397         return (kn->kn_data > 0);
 398 }
 399
 400 static int
 401 filt_procattach(struct knote *kn)
 402 {
 403         struct proc *p;
 404         int funnel_state;
 405
 406         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 407
 408         p = pfind(kn->kn_id);
 409         if (p == NULL) {
 410                 thread_funnel_set(kernel_flock, funnel_state);
 411                 return (ESRCH);
 412         }
 413
 414         kn->kn_flags |= EV_CLEAR;               /* automatically set */
 415         kn->kn_hookid = 1;                      /* mark exit not seen */
 416
 417         /*
 418          * internal flag indicating registration done by kernel
 419          */
 420         if (kn->kn_flags & EV_FLAG1) {
 421                 kn->kn_data = (int)kn->kn_sdata;        /* ppid */
 422                 kn->kn_fflags = NOTE_CHILD;
 423                 kn->kn_flags &= ~EV_FLAG1;
 424         }
 425
 426         /* XXX lock the proc here while adding to the list? */
 427         KNOTE_ATTACH(&p->p_klist, kn);
 428
 429         thread_funnel_set(kernel_flock, funnel_state);
 430
 431         return (0);
 432 }
 433
 434 /*
 435  * The knote may be attached to a different process, which may exit,
 436  * leaving nothing for the knote to be attached to.  In that case,
 437  * we wont be able to find the process from its pid.  But the exit
 438  * code may still be processing the knote list for the target process.
 439  * We may have to wait for that processing to complete before we can
 440  * return (and presumably free the knote) without actually removing
 441  * it from the dead process' knote list.
 442  */
 443 static void
 444 filt_procdetach(struct knote *kn)
 445 {
 446         struct proc *p;
 447         int funnel_state;
 448
 449         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 450         p = pfind(kn->kn_id);
 451
 452         if (p != (struct proc *)NULL) {
 453                 KNOTE_DETACH(&p->p_klist, kn);
 454         } else if (kn->kn_hookid != 0) {        /* if not NOTE_EXIT yet */
 455                 kn->kn_hookid = -1;     /* we are detaching but... */
 456                 assert_wait(&kn->kn_hook, THREAD_UNINT); /* have to wait */
 457                 thread_block(THREAD_CONTINUE_NULL);
 458         }
 459         thread_funnel_set(kernel_flock, funnel_state);
 460 }
 461
 462 static int
 463 filt_proc(struct knote *kn, long hint)
 464 {
 465
 466         if (hint != 0) {
 467                 u_int event;
 468
 469                 /* must hold the funnel when coming from below */
 470                 assert(thread_funnel_get() != (funnel_t)0);
 471
 472                 /*
 473                  * mask off extra data
 474                  */
 475                 event = (u_int)hint & NOTE_PCTRLMASK;
 476
 477                 /*
 478                  * if the user is interested in this event, record it.
 479                  */
 480                 if (kn->kn_sfflags & event)
 481                         kn->kn_fflags |= event;
 482
 483                 /*
 484                  * process is gone, so flag the event as finished.
 485                  *
 486                  * If someone was trying to detach, but couldn't
 487                  * find the proc to complete the detach, wake them
 488                  * up (nothing will ever need to walk the per-proc
 489                  * knote list again - so its safe for them to dump
 490                  * the knote now).
 491                  */
 492                 if (event == NOTE_EXIT) {
 493                         boolean_t detaching = (kn->kn_hookid == -1);
 494
 495                         kn->kn_hookid = 0;
 496                         kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 497                         if (detaching)
 498                                 thread_wakeup(&kn->kn_hookid);
 499                         return (1);
 500                 }
 501
 502                 /*
 503                  * process forked, and user wants to track the new process,
 504                  * so attach a new knote to it, and immediately report an
 505                  * event with the parent's pid.
 506                  */
 507                 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
 508                         struct kevent kev;
 509                         int error;
 510
 511                         /*
 512                          * register knote with new process.
 513                          */
 514                         kev.ident = hint & NOTE_PDATAMASK;      /* pid */
 515                         kev.filter = kn->kn_filter;
 516                         kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
 517                         kev.fflags = kn->kn_sfflags;
 518                         kev.data = kn->kn_id;                   /* parent */
 519                         kev.udata = kn->kn_kevent.udata;        /* preserve udata */
 520                         error = kevent_register(kn->kn_kq, &kev, NULL);
 521                         if (error)
 522                                 kn->kn_fflags |= NOTE_TRACKERR;
 523                 }
 524         }
 525
 526         return (kn->kn_fflags != 0); /* atomic check - no funnel needed from above */
 527 }
 528
 529 /*
 530  * filt_timercompute - compute absolute timeout
 531  *
 532  *      The saved-data field in the knote contains the
 533  *      time value.  The saved filter-flags indicates
 534  *      the unit of measurement.
 535  *
 536  *      If the timeout is not absolute, adjust it for
 537  *      the current time.
 538  */
 539 static int
 540 filt_timercompute(struct knote *kn, uint64_t *abs_time)
 541 {
 542         uint64_t multiplier;
 543         uint64_t raw;
 544
 545         switch (kn->kn_sfflags & (NOTE_SECONDS|NOTE_USECONDS|NOTE_NSECONDS)) {
 546         case NOTE_SECONDS:
 547                 multiplier = NSEC_PER_SEC;
 548                 break;
 549         case NOTE_USECONDS:
 550                 multiplier = NSEC_PER_USEC;
 551                 break;
 552         case NOTE_NSECONDS:
 553                 multiplier = 1;
 554                 break;
 555         case 0: /* milliseconds (default) */
 556                 multiplier = NSEC_PER_SEC / 1000;
 557                 break;
 558         default:
 559                 return EINVAL;
 560         }
 561         nanoseconds_to_absolutetime((uint64_t)kn->kn_sdata * multiplier, &raw);
 562         if (raw <= filt_timerfloor) {
 563                 *abs_time = 0;
 564                 return 0;
 565         }
 566         if ((kn->kn_sfflags & NOTE_ABSOLUTE) == NOTE_ABSOLUTE) {
 567                 uint32_t seconds, nanoseconds;
 568                 uint64_t now;
 569
 570                 clock_get_calendar_nanotime(&seconds, &nanoseconds);
 571                 nanoseconds_to_absolutetime((uint64_t)seconds * NSEC_PER_SEC + nanoseconds,
 572                                             &now);
 573                 if (now >= raw + filt_timerfloor) {
 574                         *abs_time = 0;
 575                         return 0;
 576                 }
 577                 raw -= now;
 578         }
 579         clock_absolutetime_interval_to_deadline(raw, abs_time);
 580         return 0;
 581 }
 582
 583 /*
 584  * filt_timerexpire - the timer callout routine
 585  *
 586  *      Just propagate the timer event into the knote
 587  *      filter routine (by going through the knote
 588  *      synchronization point).  Pass a hint to
 589  *      indicate this is a real event, not just a
 590  *      query from above.
 591  */
 592 static void
 593 filt_timerexpire(void *knx, __unused void *spare)
 594 {
 595         struct klist timer_list;
 596         struct knote *kn = knx;
 597
 598         /* no "object" for timers, so fake a list */
 599         SLIST_INIT(&timer_list);
 600         SLIST_INSERT_HEAD(&timer_list, kn, kn_selnext);
 601         KNOTE(&timer_list, 1);
 602 }
 603
 604 /*
 605  * data contains amount of time to sleep, in milliseconds,
 606  * or a pointer to a timespec structure.
 607  */
 608 static int
 609 filt_timerattach(struct knote *kn)
 610 {
 611         thread_call_t callout;
 612         uint64_t deadline;
 613         int error;
 614
 615         error = filt_timercompute(kn, &deadline);
 616         if (error)
 617                 return (error);
 618
 619         if (deadline) {
 620                 callout = thread_call_allocate(filt_timerexpire, kn);
 621                 if (NULL == callout)
 622                         return (ENOMEM);
 623         } else {
 624                 /* handle as immediate */
 625                 kn->kn_sdata = 0;
 626                 callout = NULL;
 627         }
 628
 629         filt_timerlock();
 630         kn->kn_hook = (caddr_t)callout;
 631
 632         /* absolute=EV_ONESHOT */
 633         if (kn->kn_sfflags & NOTE_ABSOLUTE)
 634                 kn->kn_flags |= EV_ONESHOT;
 635
 636         if (deadline) {
 637                 /* all others - if not faking immediate */
 638                 kn->kn_flags |= EV_CLEAR;
 639                 thread_call_enter_delayed(callout, deadline);
 640                 kn->kn_hookid = 0;
 641         } else {
 642                 /* fake immediate */
 643                 kn->kn_hookid = 1;
 644         }
 645         filt_timerunlock();
 646         return (0);
 647 }
 648
 649 static void
 650 filt_timerdetach(struct knote *kn)
 651 {
 652         thread_call_t callout;
 653
 654         filt_timerlock();
 655         callout = (thread_call_t)kn->kn_hook;
 656         if (callout != NULL) {
 657                 boolean_t cancelled;
 658
 659                 /* cancel the callout if we can */
 660                 cancelled = thread_call_cancel(callout);
 661                 if (cancelled) {
 662                         /* got it, just free it */
 663                         kn->kn_hook = NULL;
 664                         filt_timerunlock();
 665                         thread_call_free(callout);
 666                         return;
 667                 }
 668                 /* we have to wait for the expire routine.  */
 669                 kn->kn_hookid = -1;     /* we are detaching */
 670                 assert_wait(&kn->kn_hook, THREAD_UNINT);
 671                 filt_timerunlock();
 672                 thread_block(THREAD_CONTINUE_NULL);
 673                 assert(kn->kn_hook == NULL);
 674                 return;
 675         }
 676         /* nothing to do */
 677         filt_timerunlock();
 678 }
 679
 680
 681
 682 static int
 683 filt_timer(struct knote *kn, __unused long hint)
 684 {
 685         int result;
 686
 687         if (hint) {
 688                 /* real timer pop */
 689                 thread_call_t callout;
 690                 boolean_t detaching;
 691
 692                 filt_timerlock();
 693
 694                 kn->kn_data++;
 695
 696                 detaching = (kn->kn_hookid < 0);
 697                 callout = (thread_call_t)kn->kn_hook;
 698
 699                 if (!detaching && (kn->kn_flags & EV_ONESHOT) == 0) {
 700                         uint64_t deadline;
 701                         int error;
 702
 703                         /* user input data may have changed - deal */
 704                         error = filt_timercompute(kn, &deadline);
 705                         if (error) {
 706                                 kn->kn_flags |= EV_ERROR;
 707                                 kn->kn_data = error;
 708                         } else if (deadline == 0) {
 709                                 /* revert to fake immediate */
 710                                 kn->kn_flags &= ~EV_CLEAR;
 711                                 kn->kn_sdata = 0;
 712                                 kn->kn_hookid = 1;
 713                         } else {
 714                                 /* keep the callout and re-arm */
 715                                 thread_call_enter_delayed(callout, deadline);
 716                                 filt_timerunlock();
 717                                 return 1;
 718                         }
 719                 }
 720                 kn->kn_hook = NULL;
 721                 filt_timerunlock();
 722                 thread_call_free(callout);
 723
 724                 /* if someone is waiting for timer to pop */
 725                 if (detaching)
 726                         thread_wakeup(&kn->kn_hook);
 727
 728                 return 1;
 729         }
 730
 731         /* user-query */
 732         filt_timerlock();
 733
 734         /* change fake timer to real if needed */
 735         while (kn->kn_hookid > 0 && kn->kn_sdata > 0) {
 736                 int error;
 737
 738                 /* update the fake timer (make real) */
 739                 kn->kn_hookid = 0;
 740                 kn->kn_data = 0;
 741                 filt_timerunlock();
 742                 error = filt_timerattach(kn);
 743                 filt_timerlock();
 744                 if (error) {
 745                         kn->kn_flags |= EV_ERROR;
 746                         kn->kn_data = error;
 747                         filt_timerunlock();
 748                         return 1;
 749                 }
 750         }
 751
 752         /* if still fake, pretend it fired */
 753         if (kn->kn_hookid > 0)
 754                 kn->kn_data = 1;
 755
 756         result = (kn->kn_data != 0);
 757         filt_timerunlock();
 758         return result;
 759 }
 760
 761 static void
 762 filt_timerlock(void)
 763 {
 764         lck_mtx_lock(&_filt_timerlock);
 765 }
 766
 767 static void
 768 filt_timerunlock(void)
 769 {
 770         lck_mtx_unlock(&_filt_timerlock);
 771 }
 772
 773 /*
 774  * JMM - placeholder for not-yet-implemented filters
 775  */
 776 static int
 777 filt_badattach(__unused struct knote *kn)
 778 {
 779         return(ENOTSUP);
 780 }
 781
 782
 783 struct kqueue *
 784 kqueue_alloc(struct proc *p)
 785 {
 786         struct filedesc *fdp = p->p_fd;
 787         struct kqueue *kq;
 788
 789         MALLOC_ZONE(kq, struct kqueue *, sizeof(struct kqueue), M_KQUEUE, M_WAITOK);
 790         if (kq != NULL) {
 791                 bzero(kq, sizeof(struct kqueue));
 792                 lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr);
 793                 TAILQ_INIT(&kq->kq_head);
 794                 TAILQ_INIT(&kq->kq_inprocess);
 795                 kq->kq_fdp = fdp;
 796         }
 797
 798         if (fdp->fd_knlistsize < 0) {
 799                 proc_fdlock(p);
 800                 if (fdp->fd_knlistsize < 0)
 801                         fdp->fd_knlistsize = 0;         /* this process has had a kq */
 802                 proc_fdunlock(p);
 803         }
 804
 805         return kq;
 806 }
 807
 808
 809 /*
 810  * kqueue_dealloc - detach all knotes from a kqueue and free it
 811  *
 812  *      We walk each list looking for knotes referencing this
 813  *      this kqueue.  If we find one, we try to drop it.  But
 814  *      if we fail to get a drop reference, that will wait
 815  *      until it is dropped.  So, we can just restart again
 816  *      safe in the assumption that the list will eventually
 817  *      not contain any more references to this kqueue (either
 818  *      we dropped them all, or someone else did).
 819  *
 820  *      Assumes no new events are being added to the kqueue.
 821  *      Nothing locked on entry or exit.
 822  */
 823 void
 824 kqueue_dealloc(struct kqueue *kq, struct proc *p)
 825 {
 826         struct filedesc *fdp = p->p_fd;
 827         struct knote *kn;
 828         int i;
 829
 830         proc_fdlock(p);
 831         for (i = 0; i < fdp->fd_knlistsize; i++) {
 832                 kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 833                 while (kn != NULL) {
 834                         if (kq == kn->kn_kq) {
 835                                 kqlock(kq);
 836                                 proc_fdunlock(p);
 837                                 /* drop it ourselves or wait */
 838                                 if (kqlock2knotedrop(kq, kn)) {
 839                                         kn->kn_fop->f_detach(kn);
 840                                         knote_drop(kn, p);
 841                                 }
 842                                 proc_fdlock(p);
 843                                 /* start over at beginning of list */
 844                                 kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 845                                 continue;
 846                         }
 847                         kn = SLIST_NEXT(kn, kn_link);
 848                 }
 849         }
 850         if (fdp->fd_knhashmask != 0) {
 851                 for (i = 0; i < (int)fdp->fd_knhashmask + 1; i++) {
 852                         kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 853                         while (kn != NULL) {
 854                                 if (kq == kn->kn_kq) {
 855                                         kqlock(kq);
 856                                         proc_fdunlock(p);
 857                                         /* drop it ourselves or wait */
 858                                         if (kqlock2knotedrop(kq, kn)) {
 859                                                 kn->kn_fop->f_detach(kn);
 860                                                 knote_drop(kn, p);
 861                                         }
 862                                         proc_fdlock(p);
 863                                         /* start over at beginning of list */
 864                                         kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 865                                         continue;
 866                                 }
 867                                 kn = SLIST_NEXT(kn, kn_link);
 868                         }
 869                 }
 870         }
 871         proc_fdunlock(p);
 872         lck_spin_destroy(&kq->kq_lock, kq_lck_grp);
 873         FREE_ZONE(kq, sizeof(struct kqueue), M_KQUEUE);
 874 }
 875
 876 int
 877 kqueue(struct proc *p, __unused struct kqueue_args *uap, register_t *retval)
 878 {
 879         struct kqueue *kq;
 880         struct fileproc *fp;
 881         int fd, error;
 882
 883         error = falloc(p, &fp, &fd);
 884         if (error) {
 885                 return (error);
 886         }
 887
 888         kq = kqueue_alloc(p);
 889         if (kq == NULL) {
 890                 fp_free(p, fd, fp);
 891                 return (ENOMEM);
 892         }
 893
 894         fp->f_flag = FREAD | FWRITE;
 895         fp->f_type = DTYPE_KQUEUE;
 896         fp->f_ops = &kqueueops;
 897         fp->f_data = (caddr_t)kq;
 898
 899         proc_fdlock(p);
 900         *fdflags(p, fd) &= ~UF_RESERVED;
 901         fp_drop(p, fd, fp, 1);
 902         proc_fdunlock(p);
 903
 904         *retval = fd;
 905         return (error);
 906 }
 907
 908 int
 909 kqueue_portset_np(__unused struct proc *p,
 910                                   __unused struct kqueue_portset_np_args *uap,
 911                                   __unused register_t *retval)
 912 {
 913                 /* JMM - Placeholder for now */
 914                 return (ENOTSUP);
 915 }
 916
 917 int
 918 kqueue_from_portset_np(__unused struct proc *p,
 919                                            __unused struct kqueue_from_portset_np_args *uap,
 920                                            __unused register_t *retval)
 921 {
 922                 /* JMM - Placeholder for now */
 923                 return (ENOTSUP);
 924 }
 925
 926 static int
 927 kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p)
 928 {
 929         int advance;
 930         int error;
 931
 932         if (IS_64BIT_PROCESS(p)) {
 933                 struct user_kevent kev64;
 934
 935                 advance = sizeof(kev64);
 936                 error = copyin(*addrp, (caddr_t)&kev64, advance);
 937                 if (error)
 938                         return error;
 939                 kevp->ident = CAST_DOWN(uintptr_t, kev64.ident);
 940                 kevp->filter = kev64.filter;
 941                 kevp->flags = kev64.flags;
 942                 kevp->fflags = kev64.fflags;
 943                 kevp->data = CAST_DOWN(intptr_t, kev64.data);
 944                 kevp->udata = kev64.udata;
 945         } else {
 946                 /*
 947                  * compensate for legacy in-kernel kevent layout
 948                  * where the udata field is alredy 64-bit.
 949                  */
 950                 advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t);
 951                 error = copyin(*addrp, (caddr_t)kevp, advance);
 952         }
 953         if (!error)
 954                 *addrp += advance;
 955         return error;
 956 }
 957
 958 static int
 959 kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p)
 960 {
 961         int advance;
 962         int error;
 963
 964         if (IS_64BIT_PROCESS(p)) {
 965                 struct user_kevent kev64;
 966
 967                 kev64.ident = (uint64_t) kevp->ident;
 968                 kev64.filter = kevp->filter;
 969                 kev64.flags = kevp->flags;
 970                 kev64.fflags = kevp->fflags;
 971                 kev64.data = (int64_t) kevp->data;
 972                 kev64.udata = kevp->udata;
 973                 advance = sizeof(kev64);
 974                 error = copyout((caddr_t)&kev64, *addrp, advance);
 975         } else {
 976                 /*
 977                  * compensate for legacy in-kernel kevent layout
 978                  * where the udata field is alredy 64-bit.
 979                  */
 980                 advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t);
 981                 error = copyout((caddr_t)kevp, *addrp, advance);
 982         }
 983         if (!error)
 984                 *addrp += advance;
 985         return error;
 986 }
 987
 988 /*
 989  * kevent_continue - continue a kevent syscall after blocking
 990  *
 991  *      assume we inherit a use count on the kq fileglob.
 992  */
 993
 994 static void
 995 kevent_continue(__unused struct kqueue *kq, void *data, int error)
 996 {
 997         struct _kevent *cont_args;
 998         struct fileproc *fp;
 999         register_t *retval;
1000         int noutputs;
1001         int fd;
1002         struct proc *p = current_proc();
1003
1004         cont_args = (struct _kevent *)data;
1005         noutputs = cont_args->eventout;
1006         retval = cont_args->retval;
1007         fd = cont_args->fd;
1008         fp = cont_args->fp;
1009
1010         fp_drop(p, fd, fp, 0);
1011
1012         /* don't restart after signals... */
1013         if (error == ERESTART)
1014                 error = EINTR;
1015         else if (error == EWOULDBLOCK)
1016                 error = 0;
1017         if (error == 0)
1018                 *retval = noutputs;
1019         unix_syscall_return(error);
1020 }
1021
1022 /*
1023  * kevent - [syscall] register and wait for kernel events
1024  *
1025  */
1026
1027 int
1028 kevent(struct proc *p, struct kevent_args *uap, register_t *retval)
1029 {
1030         user_addr_t changelist = uap->changelist;
1031         user_addr_t ueventlist = uap->eventlist;
1032         int nchanges = uap->nchanges;
1033         int nevents = uap->nevents;
1034         int fd = uap->fd;
1035
1036         struct _kevent *cont_args;
1037         uthread_t ut;
1038         struct kqueue *kq;
1039         struct fileproc *fp;
1040         struct kevent kev;
1041         int error, noutputs;
1042         struct timeval atv;
1043
1044         /* convert timeout to absolute - if we have one */
1045         if (uap->timeout != USER_ADDR_NULL) {
1046                 struct timeval rtv;
1047                 if ( IS_64BIT_PROCESS(p) ) {
1048                         struct user_timespec ts;
1049                         error = copyin( uap->timeout, &ts, sizeof(ts) );
1050                         if ((ts.tv_sec & 0xFFFFFFFF00000000ull) != 0)
1051                                 error = EINVAL;
1052                         else
1053                                 TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1054                 } else {
1055                         struct timespec ts;
1056                         error = copyin( uap->timeout, &ts, sizeof(ts) );
1057                         TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1058                 }
1059                 if (error)
1060                         return error;
1061                 if (itimerfix(&rtv))
1062                         return EINVAL;
1063                 getmicrouptime(&atv);
1064                 timevaladd(&atv, &rtv);
1065         } else {
1066                 atv.tv_sec = 0;
1067                 atv.tv_usec = 0;
1068         }
1069
1070         /* get a usecount for the kq itself */
1071         if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0)
1072                 return(error);
1073
1074         /* register all the change requests the user provided... */
1075         noutputs = 0;
1076         while (nchanges > 0 && error == 0) {
1077                 error = kevent_copyin(&changelist, &kev, p);
1078                 if (error)
1079                         break;
1080
1081                 kev.flags &= ~EV_SYSFLAGS;
1082                 error = kevent_register(kq, &kev, p);
1083                 if (error && nevents > 0) {
1084                         kev.flags = EV_ERROR;
1085                         kev.data = error;
1086                         error = kevent_copyout(&kev, &ueventlist, p);
1087                         if (error == 0) {
1088                                 nevents--;
1089                                 noutputs++;
1090                         }
1091                 }
1092                 nchanges--;
1093         }
1094
1095         /* store the continuation/completion data in the uthread */
1096         ut = (uthread_t)get_bsdthread_info(current_thread());
1097         cont_args = (struct _kevent *)&ut->uu_state.ss_kevent;
1098         cont_args->fp = fp;
1099         cont_args->fd = fd;
1100         cont_args->retval = retval;
1101         cont_args->eventlist = ueventlist;
1102         cont_args->eventcount = nevents;
1103         cont_args->eventout = noutputs;
1104
1105         if (nevents > 0 && noutputs == 0 && error == 0)
1106                 error = kevent_scan(kq, kevent_callback,
1107                                     kevent_continue, cont_args,
1108                                     &atv, p);
1109         kevent_continue(kq, cont_args, error);
1110         /* NOTREACHED */
1111         return error;
1112 }
1113
1114
1115 /*
1116  * kevent_callback - callback for each individual event
1117  *
1118  *      called with nothing locked
1119  *      caller holds a reference on the kqueue
1120  */
1121
1122 static int
1123 kevent_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data)
1124 {
1125         struct _kevent *cont_args;
1126         int error;
1127
1128         cont_args = (struct _kevent *)data;
1129         assert(cont_args->eventout < cont_arg->eventcount);
1130
1131         /*
1132          * Copy out the appropriate amount of event data for this user.
1133          */
1134         error = kevent_copyout(kevp, &cont_args->eventlist, current_proc());
1135
1136         /*
1137          * If there isn't space for additional events, return
1138          * a harmless error to stop the processing here
1139          */
1140         if (error == 0 && ++cont_args->eventout == cont_args->eventcount)
1141                         error = EWOULDBLOCK;
1142         return error;
1143 }
1144
1145 /*
1146  * kevent_register - add a new event to a kqueue
1147  *
1148  *      Creates a mapping between the event source and
1149  *      the kqueue via a knote data structure.
1150  *
1151  *      Because many/most the event sources are file
1152  *      descriptor related, the knote is linked off
1153  *      the filedescriptor table for quick access.
1154  *
1155  *      called with nothing locked
1156  *      caller holds a reference on the kqueue
1157  */
1158
1159 int
1160 kevent_register(struct kqueue *kq, struct kevent *kev, struct proc *p)
1161 {
1162         struct filedesc *fdp = kq->kq_fdp;
1163         struct filterops *fops;
1164         struct fileproc *fp = NULL;
1165         struct knote *kn = NULL;
1166         int error = 0;
1167
1168         if (kev->filter < 0) {
1169                 if (kev->filter + EVFILT_SYSCOUNT < 0)
1170                         return (EINVAL);
1171                 fops = sysfilt_ops[~kev->filter];       /* to 0-base index */
1172         } else {
1173                 /*
1174                  * XXX
1175                  * filter attach routine is responsible for insuring that
1176                  * the identifier can be attached to it.
1177                  */
1178                 printf("unknown filter: %d\n", kev->filter);
1179                 return (EINVAL);
1180         }
1181
1182         /* this iocount needs to be dropped if it is not registered */
1183         if (fops->f_isfd && (error = fp_lookup(p, kev->ident, &fp, 0)) != 0)
1184                 return(error);
1185
1186  restart:
1187         proc_fdlock(p);
1188         if (fops->f_isfd) {
1189                 /* fd-based knotes are linked off the fd table */
1190                 if (kev->ident < (u_int)fdp->fd_knlistsize) {
1191                         SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
1192                                 if (kq == kn->kn_kq &&
1193                                     kev->filter == kn->kn_filter)
1194                                         break;
1195                 }
1196         } else {
1197                 /* hash non-fd knotes here too */
1198                 if (fdp->fd_knhashmask != 0) {
1199                         struct klist *list;
1200
1201                         list = &fdp->fd_knhash[
1202                             KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
1203                         SLIST_FOREACH(kn, list, kn_link)
1204                                 if (kev->ident == kn->kn_id &&
1205                                     kq == kn->kn_kq &&
1206                                     kev->filter == kn->kn_filter)
1207                                         break;
1208                 }
1209         }
1210
1211         /*
1212          * kn now contains the matching knote, or NULL if no match
1213          */
1214         if (kn == NULL) {
1215                 if ((kev->flags & (EV_ADD|EV_DELETE)) == EV_ADD) {
1216                         kn = knote_alloc();
1217                         if (kn == NULL) {
1218                                 proc_fdunlock(p);
1219                                 error = ENOMEM;
1220                                 goto done;
1221                         }
1222                         kn->kn_fp = fp;
1223                         kn->kn_kq = kq;
1224                         kn->kn_tq = &kq->kq_head;
1225                         kn->kn_fop = fops;
1226                         kn->kn_sfflags = kev->fflags;
1227                         kn->kn_sdata = kev->data;
1228                         kev->fflags = 0;
1229                         kev->data = 0;
1230                         kn->kn_kevent = *kev;
1231                         kn->kn_inuse = 1;  /* for f_attach() */
1232                         kn->kn_status = 0;
1233
1234                         /* before anyone can find it */
1235                         if (kev->flags & EV_DISABLE)
1236                                 kn->kn_status |= KN_DISABLED;
1237
1238                         error = knote_fdpattach(kn, fdp, p);
1239                         proc_fdunlock(p);
1240
1241                         if (error) {
1242                                 knote_free(kn);
1243                                 goto done;
1244                         }
1245
1246                         /*
1247                          * apply reference count to knote structure, and
1248                          * do not release it at the end of this routine.
1249                          */
1250                         fp = NULL;
1251
1252                         /*
1253                          * If the attach fails here, we can drop it knowing
1254                          * that nobody else has a reference to the knote.
1255                          */
1256                         if ((error = fops->f_attach(kn)) != 0) {
1257                                 knote_drop(kn, p);
1258                                 goto done;
1259                         }
1260                 } else {
1261                         proc_fdunlock(p);
1262                         error = ENOENT;
1263                         goto done;
1264                 }
1265         } else {
1266                 /* existing knote - get kqueue lock */
1267                 kqlock(kq);
1268                 proc_fdunlock(p);
1269
1270                 if (kev->flags & EV_DELETE) {
1271                         knote_dequeue(kn);
1272                         kn->kn_status |= KN_DISABLED;
1273                         if (kqlock2knotedrop(kq, kn)) {
1274                                 kn->kn_fop->f_detach(kn);
1275                                 knote_drop(kn, p);
1276                         }
1277                         goto done;
1278                 }
1279
1280                 /* update status flags for existing knote */
1281                 if (kev->flags & EV_DISABLE) {
1282                         knote_dequeue(kn);
1283                         kn->kn_status |= KN_DISABLED;
1284                 } else if (kev->flags & EV_ENABLE) {
1285                         kn->kn_status &= ~KN_DISABLED;
1286                         if (kn->kn_status & KN_ACTIVE)
1287                                 knote_enqueue(kn);
1288                 }
1289
1290                 /*
1291                  * If somebody is in the middle of dropping this
1292                  * knote - go find/insert a new one.  But we have
1293                  * wait for this one to go away first.
1294                  */
1295                 if (!kqlock2knoteusewait(kq, kn))
1296                         /* kqueue unlocked */
1297                         goto restart;
1298
1299                 /*
1300                  * The user may change some filter values after the
1301                  * initial EV_ADD, but doing so will not reset any
1302                  * filter which have already been triggered.
1303                  */
1304                 kn->kn_sfflags = kev->fflags;
1305                 kn->kn_sdata = kev->data;
1306                 kn->kn_kevent.udata = kev->udata;
1307         }
1308
1309         /* still have use ref on knote */
1310         if (kn->kn_fop->f_event(kn, 0)) {
1311                 if (knoteuse2kqlock(kq, kn))
1312                         knote_activate(kn);
1313                 kqunlock(kq);
1314         } else {
1315                 knote_put(kn);
1316         }
1317
1318 done:
1319         if (fp != NULL)
1320                 fp_drop(p, kev->ident, fp, 0);
1321         return (error);
1322 }
1323
1324 /*
1325  * kevent_process - process the triggered events in a kqueue
1326  *
1327  *      Walk the queued knotes and validate that they are
1328  *      really still triggered events by calling the filter
1329  *      routines (if necessary).  Hold a use reference on
1330  *      the knote to avoid it being detached. For each event
1331  *      that is still considered triggered, invoke the
1332  *      callback routine provided.
1333  *
1334  *      caller holds a reference on the kqueue.
1335  *      kqueue locked on entry and exit - but may be dropped
1336  */
1337
1338 static int
1339 kevent_process(struct kqueue *kq,
1340                kevent_callback_t callback,
1341                void *data,
1342                int *countp,
1343                struct proc *p)
1344 {
1345         struct knote *kn;
1346         struct kevent kev;
1347         int nevents;
1348         int error;
1349
1350  restart:
1351         if (kq->kq_count == 0) {
1352                 *countp = 0;
1353                 return 0;
1354         }
1355
1356         /* if someone else is processing the queue, wait */
1357         if (!TAILQ_EMPTY(&kq->kq_inprocess)) {
1358                 assert_wait(&kq->kq_inprocess, THREAD_UNINT);
1359                 kq->kq_state |= KQ_PROCWAIT;
1360                 kqunlock(kq);
1361                 thread_block(THREAD_CONTINUE_NULL);
1362                 kqlock(kq);
1363                 goto restart;
1364         }
1365
1366         error = 0;
1367         nevents = 0;
1368         while (error == 0 &&
1369                (kn = TAILQ_FIRST(&kq->kq_head)) != NULL) {
1370
1371                 /*
1372                  * move knote to the processed queue.
1373                  * this is also protected by the kq lock.
1374                  */
1375                 assert(kn->kn_tq == &kq->kq_head);
1376                 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1377                 kn->kn_tq = &kq->kq_inprocess;
1378                 TAILQ_INSERT_TAIL(&kq->kq_inprocess, kn, kn_tqe);
1379
1380                 /*
1381                  * Non-EV_ONESHOT events must be re-validated.
1382                  *
1383                  * Convert our lock to a use-count and call the event's
1384                  * filter routine to update.
1385                  *
1386                  * If the event is dropping (or no longer valid), we
1387                  * already have it off the active queue, so just
1388                  * finish the job of deactivating it.
1389                  */
1390                 if ((kn->kn_flags & EV_ONESHOT) == 0) {
1391                         int result;
1392
1393                         if (kqlock2knoteuse(kq, kn)) {
1394
1395                                 /* call the filter with just a ref */
1396                                 result = kn->kn_fop->f_event(kn, 0);
1397
1398                                 if (!knoteuse2kqlock(kq, kn) || result == 0) {
1399                                         knote_deactivate(kn);
1400                                         continue;
1401                                 }
1402                         } else {
1403                                 knote_deactivate(kn);
1404                                 continue;
1405                         }
1406                 }
1407
1408                 /*
1409                  * Got a valid triggered knote with the kqueue
1410                  * still locked.  Snapshot the data, and determine
1411                  * how to dispatch the knote for future events.
1412                  */
1413                 kev = kn->kn_kevent;
1414
1415                 /* now what happens to it? */
1416                 if (kn->kn_flags & EV_ONESHOT) {
1417                         knote_deactivate(kn);
1418                         if (kqlock2knotedrop(kq, kn)) {
1419                                 kn->kn_fop->f_detach(kn);
1420                                 knote_drop(kn, p);
1421                         }
1422                 } else if (kn->kn_flags & EV_CLEAR) {
1423                         knote_deactivate(kn);
1424                         kn->kn_data = 0;
1425                         kn->kn_fflags = 0;
1426                         kqunlock(kq);
1427                 } else {
1428                         /*
1429                          * leave on in-process queue.  We'll
1430                          * move all the remaining ones back
1431                          * the kq queue and wakeup any
1432                          * waiters when we are done.
1433                          */
1434                         kqunlock(kq);
1435                 }
1436
1437                 /* callback to handle each event as we find it */
1438                 error = (callback)(kq, &kev, data);
1439                 nevents++;
1440
1441                 kqlock(kq);
1442         }
1443
1444         /*
1445          * With the kqueue still locked, move any knotes
1446          * remaining on the in-process queue back to the
1447          * kq's queue and wake up any waiters.
1448          */
1449         while ((kn = TAILQ_FIRST(&kq->kq_inprocess)) != NULL) {
1450                 assert(kn->kn_tq == &kq->kq_inprocess);
1451                 TAILQ_REMOVE(&kq->kq_inprocess, kn, kn_tqe);
1452                 kn->kn_tq = &kq->kq_head;
1453                 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1454         }
1455         if (kq->kq_state & KQ_PROCWAIT) {
1456                 kq->kq_state &= ~KQ_PROCWAIT;
1457                 thread_wakeup(&kq->kq_inprocess);
1458         }
1459
1460         *countp = nevents;
1461         return error;
1462 }
1463
1464
1465 static void
1466 kevent_scan_continue(void *data, wait_result_t wait_result)
1467 {
1468         uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
1469         struct _kevent_scan * cont_args = &ut->uu_state.ss_kevent_scan;
1470         struct kqueue *kq = (struct kqueue *)data;
1471         int error;
1472         int count;
1473
1474         /* convert the (previous) wait_result to a proper error */
1475         switch (wait_result) {
1476         case THREAD_AWAKENED:
1477                 kqlock(kq);
1478                 error = kevent_process(kq, cont_args->call, cont_args, &count, current_proc());
1479                 if (error == 0 && count == 0) {
1480                         assert_wait_deadline(kq, THREAD_ABORTSAFE, cont_args->deadline);
1481                         kq->kq_state |= KQ_SLEEP;
1482                         kqunlock(kq);
1483                         thread_block_parameter(kevent_scan_continue, kq);
1484                         /* NOTREACHED */
1485                 }
1486                 kqunlock(kq);
1487                 break;
1488         case THREAD_TIMED_OUT:
1489                 error = EWOULDBLOCK;
1490                 break;
1491         case THREAD_INTERRUPTED:
1492                 error = EINTR;
1493                 break;
1494         default:
1495                 panic("kevent_scan_cont() - invalid wait_result (%d)", wait_result);
1496                 error = 0;
1497         }
1498
1499         /* call the continuation with the results */
1500         assert(cont_args->cont != NULL);
1501         (cont_args->cont)(kq, cont_args->data, error);
1502 }
1503
1504
1505 /*
1506  * kevent_scan - scan and wait for events in a kqueue
1507  *
1508  *      Process the triggered events in a kqueue.
1509  *
1510  *      If there are no events triggered arrange to
1511  *      wait for them. If the caller provided a
1512  *      continuation routine, then kevent_scan will
1513  *      also.
1514  *
1515  *      The callback routine must be valid.
1516  *      The caller must hold a use-count reference on the kq.
1517  */
1518
1519 int
1520 kevent_scan(struct kqueue *kq,
1521             kevent_callback_t callback,
1522             kevent_continue_t continuation,
1523             void *data,
1524             struct timeval *atvp,
1525             struct proc *p)
1526 {
1527         thread_continue_t cont = THREAD_CONTINUE_NULL;
1528         uint64_t deadline;
1529         int error;
1530         int first;
1531
1532         assert(callback != NULL);
1533
1534         first = 1;
1535         for (;;) {
1536                 wait_result_t wait_result;
1537                 int count;
1538
1539                 /*
1540                  * Make a pass through the kq to find events already
1541                  * triggered.
1542                  */
1543                 kqlock(kq);
1544                 error = kevent_process(kq, callback, data, &count, p);
1545                 if (error || count)
1546                         break; /* lock still held */
1547
1548                 /* looks like we have to consider blocking */
1549                 if (first) {
1550                         first = 0;
1551                         /* convert the timeout to a deadline once */
1552                         if (atvp->tv_sec || atvp->tv_usec) {
1553                                 uint32_t seconds, nanoseconds;
1554                                 uint64_t now;
1555
1556                                 clock_get_uptime(&now);
1557                                 nanoseconds_to_absolutetime((uint64_t)atvp->tv_sec * NSEC_PER_SEC +
1558                                                             atvp->tv_usec * NSEC_PER_USEC,
1559                                                             &deadline);
1560                                 if (now >= deadline) {
1561                                         /* non-blocking call */
1562                                         error = EWOULDBLOCK;
1563                                         break; /* lock still held */
1564                                 }
1565                                 deadline -= now;
1566                                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
1567                         } else {
1568                                 deadline = 0;   /* block forever */
1569                         }
1570
1571                         if (continuation) {
1572                                 uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
1573                                 struct _kevent_scan *cont_args = &ut->uu_state.ss_kevent_scan;
1574
1575                                 cont_args->call = callback;
1576                                 cont_args->cont = continuation;
1577                                 cont_args->deadline = deadline;
1578                                 cont_args->data = data;
1579                                 cont = kevent_scan_continue;
1580                         }
1581                 }
1582
1583                 /* go ahead and wait */
1584                 assert_wait_deadline(kq, THREAD_ABORTSAFE, deadline);
1585                 kq->kq_state |= KQ_SLEEP;
1586                 kqunlock(kq);
1587                 wait_result = thread_block_parameter(cont, kq);
1588                 /* NOTREACHED if (continuation != NULL) */
1589
1590                 switch (wait_result) {
1591                 case THREAD_AWAKENED:
1592                         continue;
1593                 case THREAD_TIMED_OUT:
1594                         return EWOULDBLOCK;
1595                 case THREAD_INTERRUPTED:
1596                         return EINTR;
1597                 default:
1598                         panic("kevent_scan - bad wait_result (%d)",
1599                               wait_result);
1600                         error = 0;
1601                 }
1602         }
1603         kqunlock(kq);
1604         return error;
1605 }
1606
1607
1608 /*
1609  * XXX
1610  * This could be expanded to call kqueue_scan, if desired.
1611  */
1612 /*ARGSUSED*/
1613 static int
1614 kqueue_read(__unused struct fileproc *fp,
1615                         __unused struct uio *uio,
1616                         __unused kauth_cred_t cred,
1617                         __unused int flags,
1618                         __unused struct proc *p)
1619 {
1620         return (ENXIO);
1621 }
1622
1623 /*ARGSUSED*/
1624 static int
1625 kqueue_write(__unused struct fileproc *fp,
1626                          __unused struct uio *uio,
1627                          __unused kauth_cred_t cred,
1628                          __unused int flags,
1629                          __unused struct proc *p)
1630 {
1631         return (ENXIO);
1632 }
1633
1634 /*ARGSUSED*/
1635 static int
1636 kqueue_ioctl(__unused struct fileproc *fp,
1637                          __unused u_long com,
1638                          __unused caddr_t data,
1639                          __unused struct proc *p)
1640 {
1641         return (ENOTTY);
1642 }
1643
1644 /*ARGSUSED*/
1645 static int
1646 kqueue_select(struct fileproc *fp, int which, void *wql, struct proc *p)
1647 {
1648         struct kqueue *kq = (struct kqueue *)fp->f_data;
1649         int retnum = 0;
1650
1651         if (which == FREAD) {
1652                 kqlock(kq);
1653                 if (kq->kq_count) {
1654                         retnum = 1;
1655                 } else {
1656                         selrecord(p, &kq->kq_sel, wql);
1657                         kq->kq_state |= KQ_SEL;
1658                 }
1659                 kqunlock(kq);
1660         }
1661         return (retnum);
1662 }
1663
1664 /*
1665  * kqueue_close -
1666  */
1667 /*ARGSUSED*/
1668 static int
1669 kqueue_close(struct fileglob *fg, struct proc *p)
1670 {
1671         struct kqueue *kq = (struct kqueue *)fg->fg_data;
1672
1673         kqueue_dealloc(kq, p);
1674         fg->fg_data = NULL;
1675         return (0);
1676 }
1677
1678 /*ARGSUSED*/
1679 /*
1680  * The callers has taken a use-count reference on this kqueue and will donate it
1681  * to the kqueue we are being added to.  This keeps the kqueue from closing until
1682  * that relationship is torn down.
1683  */
1684 static int
1685 kqueue_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
1686 {
1687         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
1688
1689         if (kn->kn_filter != EVFILT_READ)
1690                 return (1);
1691
1692         kn->kn_fop = &kqread_filtops;
1693         kqlock(kq);
1694         KNOTE_ATTACH(&kq->kq_sel.si_note, kn);
1695         kqunlock(kq);
1696         return (0);
1697 }
1698
1699 /*ARGSUSED*/
1700 int
1701 kqueue_stat(struct fileproc *fp, struct stat *st, __unused struct proc *p)
1702 {
1703         struct kqueue *kq = (struct kqueue *)fp->f_data;
1704
1705         bzero((void *)st, sizeof(*st));
1706         st->st_size = kq->kq_count;
1707         st->st_blksize = sizeof(struct kevent);
1708         st->st_mode = S_IFIFO;
1709         return (0);
1710 }
1711
1712 /*
1713  * Called with the kqueue locked
1714  */
1715 static void
1716 kqueue_wakeup(struct kqueue *kq)
1717 {
1718
1719         if (kq->kq_state & KQ_SLEEP) {
1720                 kq->kq_state &= ~KQ_SLEEP;
1721                 thread_wakeup(kq);
1722         }
1723         if (kq->kq_state & KQ_SEL) {
1724                 kq->kq_state &= ~KQ_SEL;
1725                 selwakeup(&kq->kq_sel);
1726         }
1727         KNOTE(&kq->kq_sel.si_note, 0);
1728 }
1729
1730 void
1731 klist_init(struct klist *list)
1732 {
1733         SLIST_INIT(list);
1734 }
1735
1736
1737 /*
1738  * Query/Post each knote in the object's list
1739  *
1740  *      The object lock protects the list. It is assumed
1741  *      that the filter/event routine for the object can
1742  *      determine that the object is already locked (via
1743  *      the hind) and not deadlock itself.
1744  *
1745  *      The object lock should also hold off pending
1746  *      detach/drop operations.  But we'll prevent it here
1747  *      too - just in case.
1748  */
1749 void
1750 knote(struct klist *list, long hint)
1751 {
1752         struct knote *kn;
1753
1754         SLIST_FOREACH(kn, list, kn_selnext) {
1755                 struct kqueue *kq = kn->kn_kq;
1756
1757                 kqlock(kq);
1758                 if (kqlock2knoteuse(kq, kn)) {
1759                         int result;
1760
1761                         /* call the event with only a use count */
1762                         result = kn->kn_fop->f_event(kn, hint);
1763
1764                         /* if its not going away and triggered */
1765                         if (knoteuse2kqlock(kq, kn) && result)
1766                                 knote_activate(kn);
1767                         /* lock held again */
1768                 }
1769                 kqunlock(kq);
1770         }
1771 }
1772
1773 /*
1774  * attach a knote to the specified list.  Return true if this is the first entry.
1775  * The list is protected by whatever lock the object it is associated with uses.
1776  */
1777 int
1778 knote_attach(struct klist *list, struct knote *kn)
1779 {
1780         int ret = SLIST_EMPTY(list);
1781         SLIST_INSERT_HEAD(list, kn, kn_selnext);
1782         return ret;
1783 }
1784
1785 /*
1786  * detach a knote from the specified list.  Return true if that was the last entry.
1787  * The list is protected by whatever lock the object it is associated with uses.
1788  */
1789 int
1790 knote_detach(struct klist *list, struct knote *kn)
1791 {
1792         SLIST_REMOVE(list, kn, knote, kn_selnext);
1793         return SLIST_EMPTY(list);
1794 }
1795
1796 /*
1797  * remove all knotes referencing a specified fd
1798  *
1799  * Essentially an inlined knote_remove & knote_drop
1800  * when we know for sure that the thing is a file
1801  *
1802  * Entered with the proc_fd lock already held.
1803  * It returns the same way, but may drop it temporarily.
1804  */
1805 void
1806 knote_fdclose(struct proc *p, int fd)
1807 {
1808         struct filedesc *fdp = p->p_fd;
1809         struct klist *list;
1810         struct knote *kn;
1811
1812         list = &fdp->fd_knlist[fd];
1813         while ((kn = SLIST_FIRST(list)) != NULL) {
1814                 struct kqueue *kq = kn->kn_kq;
1815
1816                 kqlock(kq);
1817                 proc_fdunlock(p);
1818
1819                 /*
1820                  * Convert the lock to a drop ref.
1821                  * If we get it, go ahead and drop it.
1822                  * Otherwise, we waited for it to
1823                  * be dropped by the other guy, so
1824                  * it is safe to move on in the list.
1825                  */
1826                 if (kqlock2knotedrop(kq, kn)) {
1827                         kn->kn_fop->f_detach(kn);
1828                         knote_drop(kn, p);
1829                 }
1830
1831                 proc_fdlock(p);
1832
1833                 /* the fd tables may have changed - start over */
1834                 list = &fdp->fd_knlist[fd];
1835         }
1836 }
1837
1838 /* proc_fdlock held on entry (and exit) */
1839 static int
1840 knote_fdpattach(struct knote *kn, struct filedesc *fdp, __unused struct proc *p)
1841 {
1842         struct klist *list = NULL;
1843
1844         if (! kn->kn_fop->f_isfd) {
1845                 if (fdp->fd_knhashmask == 0)
1846                         fdp->fd_knhash = hashinit(KN_HASHSIZE, M_KQUEUE,
1847                             &fdp->fd_knhashmask);
1848                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1849         } else {
1850                 if ((u_int)fdp->fd_knlistsize <= kn->kn_id) {
1851                         u_int size = 0;
1852
1853                         /* have to grow the fd_knlist */
1854                         size = fdp->fd_knlistsize;
1855                         while (size <= kn->kn_id)
1856                                 size += KQEXTENT;
1857                         MALLOC(list, struct klist *,
1858                                size * sizeof(struct klist *), M_KQUEUE, M_WAITOK);
1859                         if (list == NULL)
1860                                 return (ENOMEM);
1861
1862                         bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list,
1863                               fdp->fd_knlistsize * sizeof(struct klist *));
1864                         bzero((caddr_t)list +
1865                               fdp->fd_knlistsize * sizeof(struct klist *),
1866                               (size - fdp->fd_knlistsize) * sizeof(struct klist *));
1867                         FREE(fdp->fd_knlist, M_KQUEUE);
1868                         fdp->fd_knlist = list;
1869                         fdp->fd_knlistsize = size;
1870                 }
1871                 list = &fdp->fd_knlist[kn->kn_id];
1872         }
1873         SLIST_INSERT_HEAD(list, kn, kn_link);
1874         return (0);
1875 }
1876
1877
1878
1879 /*
1880  * should be called at spl == 0, since we don't want to hold spl
1881  * while calling fdrop and free.
1882  */
1883 static void
1884 knote_drop(struct knote *kn, struct proc *p)
1885 {
1886         struct filedesc *fdp = p->p_fd;
1887         struct kqueue *kq = kn->kn_kq;
1888         struct klist *list;
1889
1890         proc_fdlock(p);
1891         if (kn->kn_fop->f_isfd)
1892                 list = &fdp->fd_knlist[kn->kn_id];
1893         else
1894                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1895
1896         SLIST_REMOVE(list, kn, knote, kn_link);
1897         kqlock(kq);
1898         knote_dequeue(kn);
1899         if (kn->kn_status & KN_DROPWAIT)
1900                 thread_wakeup(&kn->kn_status);
1901         kqunlock(kq);
1902         proc_fdunlock(p);
1903
1904         if (kn->kn_fop->f_isfd)
1905                 fp_drop(p, kn->kn_id, kn->kn_fp, 0);
1906
1907         knote_free(kn);
1908 }
1909
1910 /* called with kqueue lock held */
1911 static void
1912 knote_activate(struct knote *kn)
1913 {
1914         struct kqueue *kq = kn->kn_kq;
1915
1916         kn->kn_status |= KN_ACTIVE;
1917         knote_enqueue(kn);
1918         kqueue_wakeup(kq);
1919  }
1920
1921 /* called with kqueue lock held */
1922 static void
1923 knote_deactivate(struct knote *kn)
1924 {
1925         kn->kn_status &= ~KN_ACTIVE;
1926         knote_dequeue(kn);
1927 }
1928
1929 /* called with kqueue lock held */
1930 static void
1931 knote_enqueue(struct knote *kn)
1932 {
1933         struct kqueue *kq = kn->kn_kq;
1934
1935         if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) {
1936                 struct kqtailq *tq = kn->kn_tq;
1937
1938                 TAILQ_INSERT_TAIL(tq, kn, kn_tqe);
1939                 kn->kn_status |= KN_QUEUED;
1940                 kq->kq_count++;
1941         }
1942 }
1943
1944 /* called with kqueue lock held */
1945 static void
1946 knote_dequeue(struct knote *kn)
1947 {
1948         struct kqueue *kq = kn->kn_kq;
1949
1950         assert((kn->kn_status & KN_DISABLED) == 0);
1951         if ((kn->kn_status & KN_QUEUED) == KN_QUEUED) {
1952                 struct kqtailq *tq = kn->kn_tq;
1953
1954                 TAILQ_REMOVE(tq, kn, kn_tqe);
1955                 kn->kn_tq = &kq->kq_head;
1956                 kn->kn_status &= ~KN_QUEUED;
1957                 kq->kq_count--;
1958         }
1959 }
1960
1961 void
1962 knote_init(void)
1963 {
1964         knote_zone = zinit(sizeof(struct knote), 8192*sizeof(struct knote), 8192, "knote zone");
1965
1966         /* allocate kq lock group attribute and group */
1967         kq_lck_grp_attr= lck_grp_attr_alloc_init();
1968
1969         kq_lck_grp = lck_grp_alloc_init("kqueue",  kq_lck_grp_attr);
1970
1971         /* Allocate kq lock attribute */
1972         kq_lck_attr = lck_attr_alloc_init();
1973
1974         /* Initialize the timer filter lock */
1975         lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr);
1976 }
1977 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
1978
1979 static struct knote *
1980 knote_alloc(void)
1981 {
1982         return ((struct knote *)zalloc(knote_zone));
1983 }
1984
1985 static void
1986 knote_free(struct knote *kn)
1987 {
1988         zfree(knote_zone, kn);
1989 }
1990
1991 #include <sys/param.h>
1992 #include <sys/socket.h>
1993 #include <sys/protosw.h>
1994 #include <sys/domain.h>
1995 #include <sys/mbuf.h>
1996 #include <sys/kern_event.h>
1997 #include <sys/malloc.h>
1998 #include <sys/sys_domain.h>
1999 #include <sys/syslog.h>
2000
2001
2002 static int kev_attach(struct socket *so, int proto, struct proc *p);
2003 static int kev_detach(struct socket *so);
2004 static int kev_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p);
2005
2006 struct pr_usrreqs event_usrreqs = {
2007      pru_abort_notsupp, pru_accept_notsupp, kev_attach, pru_bind_notsupp, pru_connect_notsupp,
2008      pru_connect2_notsupp, kev_control, kev_detach, pru_disconnect_notsupp,
2009      pru_listen_notsupp, pru_peeraddr_notsupp, pru_rcvd_notsupp, pru_rcvoob_notsupp,
2010      pru_send_notsupp, pru_sense_null, pru_shutdown_notsupp, pru_sockaddr_notsupp,
2011      pru_sosend_notsupp, soreceive, pru_sopoll_notsupp
2012 };
2013
2014 struct protosw eventsw[] = {
2015      {
2016           SOCK_RAW,             &systemdomain,  SYSPROTO_EVENT,         PR_ATOMIC,
2017           0,            0,              0,              0,
2018           0,
2019           0,            0,              0,              0,
2020 #if __APPLE__
2021           0,
2022 #endif
2023           &event_usrreqs,
2024           0,            0,              0,
2025 #if __APPLE__
2026           {0, 0},       0,              {0}
2027 #endif
2028      }
2029 };
2030
2031 static
2032 struct kern_event_head kern_event_head;
2033
2034 static u_long static_event_id = 0;
2035 struct domain *sysdom = &systemdomain;
2036
2037 static lck_grp_t                *evt_mtx_grp;
2038 static lck_attr_t               *evt_mtx_attr;
2039 static lck_grp_attr_t   *evt_mtx_grp_attr;
2040 lck_mtx_t                               *evt_mutex;
2041 /*
2042  * Install the protosw's for the NKE manager.  Invoked at
2043  *  extension load time
2044  */
2045 int
2046 kern_event_init(void)
2047 {
2048     int retval;
2049
2050     if ((retval = net_add_proto(eventsw, &systemdomain)) != 0) {
2051             log(LOG_WARNING, "Can't install kernel events protocol (%d)\n", retval);
2052             return(retval);
2053         }
2054
2055         /*
2056          * allocate lock group attribute and group for kern event
2057          */
2058         evt_mtx_grp_attr = lck_grp_attr_alloc_init();
2059
2060         evt_mtx_grp = lck_grp_alloc_init("eventlist", evt_mtx_grp_attr);
2061
2062         /*
2063          * allocate the lock attribute for mutexes
2064          */
2065         evt_mtx_attr = lck_attr_alloc_init();
2066         evt_mutex = lck_mtx_alloc_init(evt_mtx_grp, evt_mtx_attr);
2067         if (evt_mutex == NULL)
2068                         return (ENOMEM);
2069
2070     return(KERN_SUCCESS);
2071 }
2072
2073 static int
2074 kev_attach(struct socket *so, __unused int proto, __unused struct proc *p)
2075 {
2076      int error;
2077      struct kern_event_pcb  *ev_pcb;
2078
2079      error = soreserve(so, KEV_SNDSPACE, KEV_RECVSPACE);
2080      if (error)
2081           return error;
2082
2083      MALLOC(ev_pcb, struct kern_event_pcb *, sizeof(struct kern_event_pcb), M_PCB, M_WAITOK);
2084      if (ev_pcb == 0)
2085           return ENOBUFS;
2086
2087      ev_pcb->ev_socket = so;
2088      ev_pcb->vendor_code_filter = 0xffffffff;
2089
2090      so->so_pcb = (caddr_t) ev_pcb;
2091          lck_mtx_lock(evt_mutex);
2092      LIST_INSERT_HEAD(&kern_event_head, ev_pcb, ev_link);
2093          lck_mtx_unlock(evt_mutex);
2094
2095      return 0;
2096 }
2097
2098
2099 static int
2100 kev_detach(struct socket *so)
2101 {
2102      struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2103
2104      if (ev_pcb != 0) {
2105                 lck_mtx_lock(evt_mutex);
2106                 LIST_REMOVE(ev_pcb, ev_link);
2107                 lck_mtx_unlock(evt_mutex);
2108                 FREE(ev_pcb, M_PCB);
2109                 so->so_pcb = 0;
2110                 so->so_flags |= SOF_PCBCLEARING;
2111      }
2112
2113      return 0;
2114 }
2115
2116 /*
2117  * For now, kev_vender_code and mbuf_tags use the same
2118  * mechanism.
2119  */
2120 extern errno_t mbuf_tag_id_find_internal(const char *string, u_long *out_id,
2121                                                                                  int create);
2122
2123 errno_t kev_vendor_code_find(
2124         const char      *string,
2125         u_long          *out_vender_code)
2126 {
2127         if (strlen(string) >= KEV_VENDOR_CODE_MAX_STR_LEN) {
2128                 return EINVAL;
2129         }
2130         return mbuf_tag_id_find_internal(string, out_vender_code, 1);
2131 }
2132
2133 extern void mbuf_tag_id_first_last(u_long *first, u_long *last);
2134
2135 errno_t  kev_msg_post(struct kev_msg *event_msg)
2136 {
2137         u_long  min_vendor, max_vendor;
2138
2139         mbuf_tag_id_first_last(&min_vendor, &max_vendor);
2140
2141         if (event_msg == NULL)
2142                 return EINVAL;
2143
2144         /* Limit third parties to posting events for registered vendor codes only */
2145         if (event_msg->vendor_code < min_vendor ||
2146                 event_msg->vendor_code > max_vendor)
2147         {
2148                 return EINVAL;
2149         }
2150
2151         return kev_post_msg(event_msg);
2152 }
2153
2154
2155 int  kev_post_msg(struct kev_msg *event_msg)
2156 {
2157      struct mbuf *m, *m2;
2158      struct kern_event_pcb  *ev_pcb;
2159      struct kern_event_msg  *ev;
2160      char              *tmp;
2161      unsigned long     total_size;
2162      int               i;
2163
2164         /* Verify the message is small enough to fit in one mbuf w/o cluster */
2165         total_size = KEV_MSG_HEADER_SIZE;
2166
2167         for (i = 0; i < 5; i++) {
2168                 if (event_msg->dv[i].data_length == 0)
2169                         break;
2170                 total_size += event_msg->dv[i].data_length;
2171         }
2172
2173         if (total_size > MLEN) {
2174                 return EMSGSIZE;
2175         }
2176
2177      m = m_get(M_DONTWAIT, MT_DATA);
2178      if (m == 0)
2179           return ENOBUFS;
2180
2181      ev = mtod(m, struct kern_event_msg *);
2182      total_size = KEV_MSG_HEADER_SIZE;
2183
2184      tmp = (char *) &ev->event_data[0];
2185      for (i = 0; i < 5; i++) {
2186           if (event_msg->dv[i].data_length == 0)
2187                break;
2188
2189           total_size += event_msg->dv[i].data_length;
2190           bcopy(event_msg->dv[i].data_ptr, tmp,
2191                 event_msg->dv[i].data_length);
2192           tmp += event_msg->dv[i].data_length;
2193      }
2194
2195      ev->id = ++static_event_id;
2196      ev->total_size   = total_size;
2197      ev->vendor_code  = event_msg->vendor_code;
2198      ev->kev_class    = event_msg->kev_class;
2199      ev->kev_subclass = event_msg->kev_subclass;
2200      ev->event_code   = event_msg->event_code;
2201
2202      m->m_len = total_size;
2203      lck_mtx_lock(evt_mutex);
2204      for (ev_pcb = LIST_FIRST(&kern_event_head);
2205           ev_pcb;
2206           ev_pcb = LIST_NEXT(ev_pcb, ev_link)) {
2207
2208           if (ev_pcb->vendor_code_filter != KEV_ANY_VENDOR) {
2209                if (ev_pcb->vendor_code_filter != ev->vendor_code)
2210                     continue;
2211
2212                if (ev_pcb->class_filter != KEV_ANY_CLASS) {
2213                     if (ev_pcb->class_filter != ev->kev_class)
2214                          continue;
2215
2216                     if ((ev_pcb->subclass_filter != KEV_ANY_SUBCLASS) &&
2217                         (ev_pcb->subclass_filter != ev->kev_subclass))
2218                          continue;
2219                }
2220           }
2221
2222           m2 = m_copym(m, 0, m->m_len, M_NOWAIT);
2223           if (m2 == 0) {
2224                m_free(m);
2225                    lck_mtx_unlock(evt_mutex);
2226                return ENOBUFS;
2227           }
2228           socket_lock(ev_pcb->ev_socket, 1);
2229           if (sbappendrecord(&ev_pcb->ev_socket->so_rcv, m2))
2230                   sorwakeup(ev_pcb->ev_socket);
2231           socket_unlock(ev_pcb->ev_socket, 1);
2232      }
2233
2234      m_free(m);
2235      lck_mtx_unlock(evt_mutex);
2236      return 0;
2237 }
2238
2239 static int
2240 kev_control(struct socket *so,
2241                         u_long cmd,
2242                         caddr_t data,
2243                         __unused struct ifnet *ifp,
2244                         __unused struct proc *p)
2245 {
2246         struct kev_request *kev_req = (struct kev_request *) data;
2247         struct kern_event_pcb  *ev_pcb;
2248         struct kev_vendor_code *kev_vendor;
2249         u_long  *id_value = (u_long *) data;
2250
2251
2252         switch (cmd) {
2253
2254                 case SIOCGKEVID:
2255                         *id_value = static_event_id;
2256                         break;
2257
2258                 case SIOCSKEVFILT:
2259                         ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2260                         ev_pcb->vendor_code_filter = kev_req->vendor_code;
2261                         ev_pcb->class_filter     = kev_req->kev_class;
2262                         ev_pcb->subclass_filter  = kev_req->kev_subclass;
2263                         break;
2264
2265                 case SIOCGKEVFILT:
2266                         ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2267                         kev_req->vendor_code = ev_pcb->vendor_code_filter;
2268                         kev_req->kev_class   = ev_pcb->class_filter;
2269                         kev_req->kev_subclass = ev_pcb->subclass_filter;
2270                         break;
2271
2272                 case SIOCGKEVVENDOR:
2273                         kev_vendor = (struct kev_vendor_code*)data;
2274
2275                         /* Make sure string is NULL terminated */
2276                         kev_vendor->vendor_string[KEV_VENDOR_CODE_MAX_STR_LEN-1] = 0;
2277
2278                         return mbuf_tag_id_find_internal(kev_vendor->vendor_string,
2279                                                                                          &kev_vendor->vendor_code, 0);
2280
2281                 default:
2282                         return ENOTSUP;
2283         }
2284
2285         return 0;
2286 }
2287
2288
2289
2290 int
2291 fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo)
2292 {
2293         struct stat * st;
2294
2295         /* No need for the funnel as fd is kept alive */
2296
2297         st = &kinfo->kq_stat;
2298
2299         st->st_size = kq->kq_count;
2300         st->st_blksize = sizeof(struct kevent);
2301         st->st_mode = S_IFIFO;
2302         if (kq->kq_state & KQ_SEL)
2303                 kinfo->kq_state |=  PROC_KQUEUE_SELECT;
2304         if (kq->kq_state & KQ_SLEEP)
2305                 kinfo->kq_state |= PROC_KQUEUE_SLEEP;
2306
2307         return(0);
2308 }
2309