bsd/kern/kern_event.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  *
  28  */
  29 /*-
  30  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
  31  * All rights reserved.
  32  *
  33  * Redistribution and use in source and binary forms, with or without
  34  * modification, are permitted provided that the following conditions
  35  * are met:
  36  * 1. Redistributions of source code must retain the above copyright
  37  *    notice, this list of conditions and the following disclaimer.
  38  * 2. Redistributions in binary form must reproduce the above copyright
  39  *    notice, this list of conditions and the following disclaimer in the
  40  *    documentation and/or other materials provided with the distribution.
  41  *
  42  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  43  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  44  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  45  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  46  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  47  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  48  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  49  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  50  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  51  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  52  * SUCH DAMAGE.
  53  */
  54 /*
  55  *      @(#)kern_event.c       1.0 (3/31/2000)
  56  */
  57 #include <stdint.h>
  58
  59 #include <sys/param.h>
  60 #include <sys/systm.h>
  61 #include <sys/filedesc.h>
  62 #include <sys/kernel.h>
  63 #include <sys/proc_internal.h>
  64 #include <sys/kauth.h>
  65 #include <sys/malloc.h>
  66 #include <sys/unistd.h>
  67 #include <sys/file_internal.h>
  68 #include <sys/fcntl.h>
  69 #include <sys/select.h>
  70 #include <sys/queue.h>
  71 #include <sys/event.h>
  72 #include <sys/eventvar.h>
  73 #include <sys/protosw.h>
  74 #include <sys/socket.h>
  75 #include <sys/socketvar.h>
  76 #include <sys/stat.h>
  77 #include <sys/sysctl.h>
  78 #include <sys/uio.h>
  79 #include <sys/sysproto.h>
  80 #include <sys/user.h>
  81 #include <string.h>
  82 #include <sys/proc_info.h>
  83
  84 #include <kern/lock.h>
  85 #include <kern/clock.h>
  86 #include <kern/thread_call.h>
  87 #include <kern/sched_prim.h>
  88 #include <kern/zalloc.h>
  89 #include <kern/assert.h>
  90
  91 #include <libkern/libkern.h>
  92
  93 extern void unix_syscall_return(int);
  94
  95 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
  96
  97 static inline void kqlock(struct kqueue *kq);
  98 static inline void kqunlock(struct kqueue *kq);
  99
 100 static int      kqlock2knoteuse(struct kqueue *kq, struct knote *kn);
 101 static int      kqlock2knoteusewait(struct kqueue *kq, struct knote *kn);
 102 static int      kqlock2knotedrop(struct kqueue *kq, struct knote *kn);
 103 static int      knoteuse2kqlock(struct kqueue *kq, struct knote *kn);
 104
 105 static void     kqueue_wakeup(struct kqueue *kq);
 106 static int      kqueue_read(struct fileproc *fp, struct uio *uio,
 107                     kauth_cred_t cred, int flags, struct proc *p);
 108 static int      kqueue_write(struct fileproc *fp, struct uio *uio,
 109                     kauth_cred_t cred, int flags, struct proc *p);
 110 static int      kqueue_ioctl(struct fileproc *fp, u_long com, caddr_t data,
 111                     struct proc *p);
 112 static int      kqueue_select(struct fileproc *fp, int which, void *wql,
 113                     struct proc *p);
 114 static int      kqueue_close(struct fileglob *fp, struct proc *p);
 115 static int      kqueue_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
 116 extern int      kqueue_stat(struct fileproc *fp, struct stat *st, struct proc *p);
 117
 118 static struct fileops kqueueops = {
 119         kqueue_read,
 120         kqueue_write,
 121         kqueue_ioctl,
 122         kqueue_select,
 123         kqueue_close,
 124         kqueue_kqfilter,
 125         0
 126 };
 127
 128 static int kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p);
 129 static int kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p);
 130
 131 static int      kevent_callback(struct kqueue *kq, struct kevent *kevp, void *data);
 132 static void     kevent_continue(struct kqueue *kq, void *data, int error);
 133 static void     kevent_scan_continue(void *contp, wait_result_t wait_result);
 134 static int      kevent_process(struct kqueue *kq, kevent_callback_t callback,
 135                                void *data, int *countp, struct proc *p);
 136 static void     knote_put(struct knote *kn);
 137 static int      knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p);
 138 static void     knote_drop(struct knote *kn, struct proc *p);
 139 static void     knote_activate(struct knote *kn);
 140 static void     knote_deactivate(struct knote *kn);
 141 static void     knote_enqueue(struct knote *kn);
 142 static void     knote_dequeue(struct knote *kn);
 143 static struct   knote *knote_alloc(void);
 144 static void     knote_free(struct knote *kn);
 145 extern void     knote_init(void);
 146
 147 static int      filt_fileattach(struct knote *kn);
 148 static struct filterops file_filtops =
 149         { 1, filt_fileattach, NULL, NULL };
 150
 151 static void     filt_kqdetach(struct knote *kn);
 152 static int      filt_kqueue(struct knote *kn, long hint);
 153 static struct filterops kqread_filtops =
 154         { 1, NULL, filt_kqdetach, filt_kqueue };
 155
 156 /*
 157  * placeholder for not-yet-implemented filters
 158  */
 159 static int      filt_badattach(struct knote *kn);
 160 static struct filterops bad_filtops =
 161         { 0, filt_badattach, 0 , 0 };
 162
 163 static int      filt_procattach(struct knote *kn);
 164 static void     filt_procdetach(struct knote *kn);
 165 static int      filt_proc(struct knote *kn, long hint);
 166
 167 static struct filterops proc_filtops =
 168         { 0, filt_procattach, filt_procdetach, filt_proc };
 169
 170 extern struct filterops fs_filtops;
 171
 172 extern struct filterops sig_filtops;
 173
 174
 175 /* Timer filter */
 176 static int      filt_timercompute(struct knote *kn, uint64_t *abs_time);
 177 static void     filt_timerexpire(void *knx, void *param1);
 178 static int      filt_timerattach(struct knote *kn);
 179 static void     filt_timerdetach(struct knote *kn);
 180 static int      filt_timer(struct knote *kn, long hint);
 181
 182 static struct filterops timer_filtops =
 183         { 0, filt_timerattach, filt_timerdetach, filt_timer };
 184
 185 /* to avoid arming timers that fire quicker than we can handle */
 186 static uint64_t filt_timerfloor = 0;
 187
 188 static lck_mtx_t _filt_timerlock;
 189 static void     filt_timerlock(void);
 190 static void     filt_timerunlock(void);
 191
 192 /*
 193  * Sentinel marker for a thread scanning through the list of
 194  * active knotes.
 195  */
 196 static struct filterops threadmarker_filtops =
 197         { 0, filt_badattach, 0, 0 };
 198
 199 static zone_t   knote_zone;
 200
 201 #define KN_HASHSIZE             64              /* XXX should be tunable */
 202 #define KN_HASH(val, mask)      (((val) ^ (val >> 8)) & (mask))
 203
 204 #if 0
 205 extern struct filterops aio_filtops;
 206 #endif
 207
 208 /*
 209  * Table for for all system-defined filters.
 210  */
 211 static struct filterops *sysfilt_ops[] = {
 212         &file_filtops,                  /* EVFILT_READ */
 213         &file_filtops,                  /* EVFILT_WRITE */
 214 #if 0
 215         &aio_filtops,                   /* EVFILT_AIO */
 216 #else
 217         &bad_filtops,                   /* EVFILT_AIO */
 218 #endif
 219         &file_filtops,                  /* EVFILT_VNODE */
 220         &proc_filtops,                  /* EVFILT_PROC */
 221         &sig_filtops,                   /* EVFILT_SIGNAL */
 222         &timer_filtops,                 /* EVFILT_TIMER */
 223         &bad_filtops,                   /* EVFILT_MACHPORT */
 224         &fs_filtops                     /* EVFILT_FS */
 225 };
 226
 227 /*
 228  * kqueue/note lock attributes and implementations
 229  *
 230  *      kqueues have locks, while knotes have use counts
 231  *      Most of the knote state is guarded by the object lock.
 232  *      the knote "inuse" count and status use the kqueue lock.
 233  */
 234 lck_grp_attr_t * kq_lck_grp_attr;
 235 lck_grp_t * kq_lck_grp;
 236 lck_attr_t * kq_lck_attr;
 237
 238 static inline void
 239 kqlock(struct kqueue *kq)
 240 {
 241         lck_spin_lock(&kq->kq_lock);
 242 }
 243
 244 static inline void
 245 kqunlock(struct kqueue *kq)
 246 {
 247         lck_spin_unlock(&kq->kq_lock);
 248 }
 249
 250 /*
 251  * Convert a kq lock to a knote use referece.
 252  *
 253  *      If the knote is being dropped, we can't get
 254  *      a use reference, so just return with it
 255  *      still locked.
 256  *
 257  *      - kq locked at entry
 258  *      - unlock on exit if we get the use reference
 259  */
 260 static int
 261 kqlock2knoteuse(struct kqueue *kq, struct knote *kn)
 262 {
 263         if (kn->kn_status & KN_DROPPING)
 264                 return 0;
 265         kn->kn_inuse++;
 266         kqunlock(kq);
 267         return 1;
 268  }
 269
 270 /*
 271  * Convert a kq lock to a knote use referece.
 272  *
 273  *      If the knote is being dropped, we can't get
 274  *      a use reference, so just return with it
 275  *      still locked.
 276  *
 277  *      - kq locked at entry
 278  *      - kq always unlocked on exit
 279  */
 280 static int
 281 kqlock2knoteusewait(struct kqueue *kq, struct knote *kn)
 282 {
 283         if (!kqlock2knoteuse(kq, kn)) {
 284                 kn->kn_status |= KN_DROPWAIT;
 285                 assert_wait(&kn->kn_status, THREAD_UNINT);
 286                 kqunlock(kq);
 287                 thread_block(THREAD_CONTINUE_NULL);
 288                 return 0;
 289         }
 290         return 1;
 291  }
 292
 293 /*
 294  * Convert from a knote use reference back to kq lock.
 295  *
 296  *      Drop a use reference and wake any waiters if
 297  *      this is the last one.
 298  *
 299  *      The exit return indicates if the knote is
 300  *      still alive - but the kqueue lock is taken
 301  *      unconditionally.
 302  */
 303 static int
 304 knoteuse2kqlock(struct kqueue *kq, struct knote *kn)
 305 {
 306         kqlock(kq);
 307         if ((--kn->kn_inuse == 0) &&
 308             (kn->kn_status & KN_USEWAIT)) {
 309                 kn->kn_status &= ~KN_USEWAIT;
 310                 thread_wakeup(&kn->kn_inuse);
 311         }
 312         return ((kn->kn_status & KN_DROPPING) == 0);
 313  }
 314
 315 /*
 316  * Convert a kq lock to a knote drop referece.
 317  *
 318  *      If the knote is in use, wait for the use count
 319  *      to subside.  We first mark our intention to drop
 320  *      it - keeping other users from "piling on."
 321  *      If we are too late, we have to wait for the
 322  *      other drop to complete.
 323  *
 324  *      - kq locked at entry
 325  *      - always unlocked on exit.
 326  *      - caller can't hold any locks that would prevent
 327  *        the other dropper from completing.
 328  */
 329 static int
 330 kqlock2knotedrop(struct kqueue *kq, struct knote *kn)
 331 {
 332
 333         if ((kn->kn_status & KN_DROPPING) == 0) {
 334                 kn->kn_status |= KN_DROPPING;
 335                 if (kn->kn_inuse > 0) {
 336                         kn->kn_status |= KN_USEWAIT;
 337                         assert_wait(&kn->kn_inuse, THREAD_UNINT);
 338                         kqunlock(kq);
 339                         thread_block(THREAD_CONTINUE_NULL);
 340                 } else
 341                         kqunlock(kq);
 342                 return 1;
 343         } else {
 344                 kn->kn_status |= KN_DROPWAIT;
 345                 assert_wait(&kn->kn_status, THREAD_UNINT);
 346                 kqunlock(kq);
 347                 thread_block(THREAD_CONTINUE_NULL);
 348                 return 0;
 349         }
 350 }
 351
 352 /*
 353  * Release a knote use count reference.
 354  */
 355 static void
 356 knote_put(struct knote *kn)
 357 {
 358         struct kqueue *kq = kn->kn_kq;
 359
 360         kqlock(kq);
 361         if ((--kn->kn_inuse == 0) &&
 362             (kn->kn_status & KN_USEWAIT)) {
 363                 kn->kn_status &= ~KN_USEWAIT;
 364                 thread_wakeup(&kn->kn_inuse);
 365         }
 366         kqunlock(kq);
 367  }
 368
 369
 370
 371 static int
 372 filt_fileattach(struct knote *kn)
 373 {
 374
 375         return (fo_kqfilter(kn->kn_fp, kn, current_proc()));
 376 }
 377
 378 #define f_flag f_fglob->fg_flag
 379 #define f_type f_fglob->fg_type
 380 #define f_msgcount f_fglob->fg_msgcount
 381 #define f_cred f_fglob->fg_cred
 382 #define f_ops f_fglob->fg_ops
 383 #define f_offset f_fglob->fg_offset
 384 #define f_data f_fglob->fg_data
 385
 386 static void
 387 filt_kqdetach(struct knote *kn)
 388 {
 389         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
 390
 391         kqlock(kq);
 392         KNOTE_DETACH(&kq->kq_sel.si_note, kn);
 393         kqunlock(kq);
 394 }
 395
 396 /*ARGSUSED*/
 397 static int
 398 filt_kqueue(struct knote *kn, __unused long hint)
 399 {
 400         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
 401
 402         kn->kn_data = kq->kq_count;
 403         return (kn->kn_data > 0);
 404 }
 405
 406 static int
 407 filt_procattach(struct knote *kn)
 408 {
 409         struct proc *p;
 410         int funnel_state;
 411
 412         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 413
 414         p = pfind(kn->kn_id);
 415         if (p == NULL) {
 416                 thread_funnel_set(kernel_flock, funnel_state);
 417                 return (ESRCH);
 418         }
 419
 420         kn->kn_flags |= EV_CLEAR;               /* automatically set */
 421         kn->kn_hookid = 1;                      /* mark exit not seen */
 422
 423         /*
 424          * internal flag indicating registration done by kernel
 425          */
 426         if (kn->kn_flags & EV_FLAG1) {
 427                 kn->kn_data = (int)kn->kn_sdata;        /* ppid */
 428                 kn->kn_fflags = NOTE_CHILD;
 429                 kn->kn_flags &= ~EV_FLAG1;
 430         }
 431
 432         /* XXX lock the proc here while adding to the list? */
 433         KNOTE_ATTACH(&p->p_klist, kn);
 434
 435         thread_funnel_set(kernel_flock, funnel_state);
 436
 437         return (0);
 438 }
 439
 440 /*
 441  * The knote may be attached to a different process, which may exit,
 442  * leaving nothing for the knote to be attached to.  In that case,
 443  * we wont be able to find the process from its pid.  But the exit
 444  * code may still be processing the knote list for the target process.
 445  * We may have to wait for that processing to complete before we can
 446  * return (and presumably free the knote) without actually removing
 447  * it from the dead process' knote list.
 448  */
 449 static void
 450 filt_procdetach(struct knote *kn)
 451 {
 452         struct proc *p;
 453         int funnel_state;
 454
 455         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 456         p = pfind(kn->kn_id);
 457
 458         if (p != (struct proc *)NULL) {
 459                 KNOTE_DETACH(&p->p_klist, kn);
 460         } else if (kn->kn_hookid != 0) {        /* if not NOTE_EXIT yet */
 461                 kn->kn_hookid = -1;     /* we are detaching but... */
 462                 assert_wait(&kn->kn_hook, THREAD_UNINT); /* have to wait */
 463                 thread_block(THREAD_CONTINUE_NULL);
 464         }
 465         thread_funnel_set(kernel_flock, funnel_state);
 466 }
 467
 468 static int
 469 filt_proc(struct knote *kn, long hint)
 470 {
 471
 472         if (hint != 0) {
 473                 u_int event;
 474
 475                 /* must hold the funnel when coming from below */
 476                 assert(thread_funnel_get() != (funnel_t)0);
 477
 478                 /*
 479                  * mask off extra data
 480                  */
 481                 event = (u_int)hint & NOTE_PCTRLMASK;
 482
 483                 /*
 484                  * if the user is interested in this event, record it.
 485                  */
 486                 if (kn->kn_sfflags & event)
 487                         kn->kn_fflags |= event;
 488
 489                 /*
 490                  * process is gone, so flag the event as finished.
 491                  *
 492                  * If someone was trying to detach, but couldn't
 493                  * find the proc to complete the detach, wake them
 494                  * up (nothing will ever need to walk the per-proc
 495                  * knote list again - so its safe for them to dump
 496                  * the knote now).
 497                  */
 498                 if (event == NOTE_EXIT) {
 499                         boolean_t detaching = (kn->kn_hookid == -1);
 500
 501                         kn->kn_hookid = 0;
 502                         kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 503                         if (detaching)
 504                                 thread_wakeup(&kn->kn_hookid);
 505                         return (1);
 506                 }
 507
 508                 /*
 509                  * process forked, and user wants to track the new process,
 510                  * so attach a new knote to it, and immediately report an
 511                  * event with the parent's pid.
 512                  */
 513                 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
 514                         struct kevent kev;
 515                         int error;
 516
 517                         /*
 518                          * register knote with new process.
 519                          */
 520                         kev.ident = hint & NOTE_PDATAMASK;      /* pid */
 521                         kev.filter = kn->kn_filter;
 522                         kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
 523                         kev.fflags = kn->kn_sfflags;
 524                         kev.data = kn->kn_id;                   /* parent */
 525                         kev.udata = kn->kn_kevent.udata;        /* preserve udata */
 526                         error = kevent_register(kn->kn_kq, &kev, NULL);
 527                         if (error)
 528                                 kn->kn_fflags |= NOTE_TRACKERR;
 529                 }
 530         }
 531
 532         return (kn->kn_fflags != 0); /* atomic check - no funnel needed from above */
 533 }
 534
 535 /*
 536  * filt_timercompute - compute absolute timeout
 537  *
 538  *      The saved-data field in the knote contains the
 539  *      time value.  The saved filter-flags indicates
 540  *      the unit of measurement.
 541  *
 542  *      If the timeout is not absolute, adjust it for
 543  *      the current time.
 544  */
 545 static int
 546 filt_timercompute(struct knote *kn, uint64_t *abs_time)
 547 {
 548         uint64_t multiplier;
 549         uint64_t raw;
 550
 551         switch (kn->kn_sfflags & (NOTE_SECONDS|NOTE_USECONDS|NOTE_NSECONDS)) {
 552         case NOTE_SECONDS:
 553                 multiplier = NSEC_PER_SEC;
 554                 break;
 555         case NOTE_USECONDS:
 556                 multiplier = NSEC_PER_USEC;
 557                 break;
 558         case NOTE_NSECONDS:
 559                 multiplier = 1;
 560                 break;
 561         case 0: /* milliseconds (default) */
 562                 multiplier = NSEC_PER_SEC / 1000;
 563                 break;
 564         default:
 565                 return EINVAL;
 566         }
 567         nanoseconds_to_absolutetime((uint64_t)kn->kn_sdata * multiplier, &raw);
 568         if (raw <= filt_timerfloor) {
 569                 *abs_time = 0;
 570                 return 0;
 571         }
 572         if ((kn->kn_sfflags & NOTE_ABSOLUTE) == NOTE_ABSOLUTE) {
 573                 uint32_t seconds, nanoseconds;
 574                 uint64_t now;
 575
 576                 clock_get_calendar_nanotime(&seconds, &nanoseconds);
 577                 nanoseconds_to_absolutetime((uint64_t)seconds * NSEC_PER_SEC + nanoseconds,
 578                                             &now);
 579                 if (now >= raw + filt_timerfloor) {
 580                         *abs_time = 0;
 581                         return 0;
 582                 }
 583                 raw -= now;
 584         }
 585         clock_absolutetime_interval_to_deadline(raw, abs_time);
 586         return 0;
 587 }
 588
 589 /*
 590  * filt_timerexpire - the timer callout routine
 591  *
 592  *      Just propagate the timer event into the knote
 593  *      filter routine (by going through the knote
 594  *      synchronization point).  Pass a hint to
 595  *      indicate this is a real event, not just a
 596  *      query from above.
 597  */
 598 static void
 599 filt_timerexpire(void *knx, __unused void *spare)
 600 {
 601         struct klist timer_list;
 602         struct knote *kn = knx;
 603
 604         /* no "object" for timers, so fake a list */
 605         SLIST_INIT(&timer_list);
 606         SLIST_INSERT_HEAD(&timer_list, kn, kn_selnext);
 607         KNOTE(&timer_list, 1);
 608 }
 609
 610 /*
 611  * data contains amount of time to sleep, in milliseconds,
 612  * or a pointer to a timespec structure.
 613  */
 614 static int
 615 filt_timerattach(struct knote *kn)
 616 {
 617         thread_call_t callout;
 618         uint64_t deadline;
 619         int error;
 620
 621         error = filt_timercompute(kn, &deadline);
 622         if (error)
 623                 return (error);
 624
 625         if (deadline) {
 626                 callout = thread_call_allocate(filt_timerexpire, kn);
 627                 if (NULL == callout)
 628                         return (ENOMEM);
 629         } else {
 630                 /* handle as immediate */
 631                 kn->kn_sdata = 0;
 632                 callout = NULL;
 633         }
 634
 635         filt_timerlock();
 636         kn->kn_hook = (caddr_t)callout;
 637
 638         /* absolute=EV_ONESHOT */
 639         if (kn->kn_sfflags & NOTE_ABSOLUTE)
 640                 kn->kn_flags |= EV_ONESHOT;
 641
 642         if (deadline) {
 643                 /* all others - if not faking immediate */
 644                 kn->kn_flags |= EV_CLEAR;
 645                 thread_call_enter_delayed(callout, deadline);
 646                 kn->kn_hookid = 0;
 647         } else {
 648                 /* fake immediate */
 649                 kn->kn_hookid = 1;
 650         }
 651         filt_timerunlock();
 652         return (0);
 653 }
 654
 655 static void
 656 filt_timerdetach(struct knote *kn)
 657 {
 658         thread_call_t callout;
 659
 660         filt_timerlock();
 661         callout = (thread_call_t)kn->kn_hook;
 662         if (callout != NULL) {
 663                 boolean_t cancelled;
 664
 665                 /* cancel the callout if we can */
 666                 cancelled = thread_call_cancel(callout);
 667                 if (cancelled) {
 668                         /* got it, just free it */
 669                         kn->kn_hook = NULL;
 670                         filt_timerunlock();
 671                         thread_call_free(callout);
 672                         return;
 673                 }
 674                 /* we have to wait for the expire routine.  */
 675                 kn->kn_hookid = -1;     /* we are detaching */
 676                 assert_wait(&kn->kn_hook, THREAD_UNINT);
 677                 filt_timerunlock();
 678                 thread_block(THREAD_CONTINUE_NULL);
 679                 assert(kn->kn_hook == NULL);
 680                 return;
 681         }
 682         /* nothing to do */
 683         filt_timerunlock();
 684 }
 685
 686
 687
 688 static int
 689 filt_timer(struct knote *kn, __unused long hint)
 690 {
 691         int result;
 692
 693         if (hint) {
 694                 /* real timer pop */
 695                 thread_call_t callout;
 696                 boolean_t detaching;
 697
 698                 filt_timerlock();
 699
 700                 kn->kn_data++;
 701
 702                 detaching = (kn->kn_hookid < 0);
 703                 callout = (thread_call_t)kn->kn_hook;
 704
 705                 if (!detaching && (kn->kn_flags & EV_ONESHOT) == 0) {
 706                         uint64_t deadline;
 707                         int error;
 708
 709                         /* user input data may have changed - deal */
 710                         error = filt_timercompute(kn, &deadline);
 711                         if (error) {
 712                                 kn->kn_flags |= EV_ERROR;
 713                                 kn->kn_data = error;
 714                         } else if (deadline == 0) {
 715                                 /* revert to fake immediate */
 716                                 kn->kn_flags &= ~EV_CLEAR;
 717                                 kn->kn_sdata = 0;
 718                                 kn->kn_hookid = 1;
 719                         } else {
 720                                 /* keep the callout and re-arm */
 721                                 thread_call_enter_delayed(callout, deadline);
 722                                 filt_timerunlock();
 723                                 return 1;
 724                         }
 725                 }
 726                 kn->kn_hook = NULL;
 727                 filt_timerunlock();
 728                 thread_call_free(callout);
 729
 730                 /* if someone is waiting for timer to pop */
 731                 if (detaching)
 732                         thread_wakeup(&kn->kn_hook);
 733
 734                 return 1;
 735         }
 736
 737         /* user-query */
 738         filt_timerlock();
 739
 740         /* change fake timer to real if needed */
 741         while (kn->kn_hookid > 0 && kn->kn_sdata > 0) {
 742                 int error;
 743
 744                 /* update the fake timer (make real) */
 745                 kn->kn_hookid = 0;
 746                 kn->kn_data = 0;
 747                 filt_timerunlock();
 748                 error = filt_timerattach(kn);
 749                 filt_timerlock();
 750                 if (error) {
 751                         kn->kn_flags |= EV_ERROR;
 752                         kn->kn_data = error;
 753                         filt_timerunlock();
 754                         return 1;
 755                 }
 756         }
 757
 758         /* if still fake, pretend it fired */
 759         if (kn->kn_hookid > 0)
 760                 kn->kn_data = 1;
 761
 762         result = (kn->kn_data != 0);
 763         filt_timerunlock();
 764         return result;
 765 }
 766
 767 static void
 768 filt_timerlock(void)
 769 {
 770         lck_mtx_lock(&_filt_timerlock);
 771 }
 772
 773 static void
 774 filt_timerunlock(void)
 775 {
 776         lck_mtx_unlock(&_filt_timerlock);
 777 }
 778
 779 /*
 780  * JMM - placeholder for not-yet-implemented filters
 781  */
 782 static int
 783 filt_badattach(__unused struct knote *kn)
 784 {
 785         return(ENOTSUP);
 786 }
 787
 788
 789 struct kqueue *
 790 kqueue_alloc(struct proc *p)
 791 {
 792         struct filedesc *fdp = p->p_fd;
 793         struct kqueue *kq;
 794
 795         MALLOC_ZONE(kq, struct kqueue *, sizeof(struct kqueue), M_KQUEUE, M_WAITOK);
 796         if (kq != NULL) {
 797                 bzero(kq, sizeof(struct kqueue));
 798                 lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr);
 799                 TAILQ_INIT(&kq->kq_head);
 800                 TAILQ_INIT(&kq->kq_inprocess);
 801                 kq->kq_fdp = fdp;
 802         }
 803
 804         if (fdp->fd_knlistsize < 0) {
 805                 proc_fdlock(p);
 806                 if (fdp->fd_knlistsize < 0)
 807                         fdp->fd_knlistsize = 0;         /* this process has had a kq */
 808                 proc_fdunlock(p);
 809         }
 810
 811         return kq;
 812 }
 813
 814
 815 /*
 816  * kqueue_dealloc - detach all knotes from a kqueue and free it
 817  *
 818  *      We walk each list looking for knotes referencing this
 819  *      this kqueue.  If we find one, we try to drop it.  But
 820  *      if we fail to get a drop reference, that will wait
 821  *      until it is dropped.  So, we can just restart again
 822  *      safe in the assumption that the list will eventually
 823  *      not contain any more references to this kqueue (either
 824  *      we dropped them all, or someone else did).
 825  *
 826  *      Assumes no new events are being added to the kqueue.
 827  *      Nothing locked on entry or exit.
 828  */
 829 void
 830 kqueue_dealloc(struct kqueue *kq, struct proc *p)
 831 {
 832         struct filedesc *fdp = p->p_fd;
 833         struct knote *kn;
 834         int i;
 835
 836         proc_fdlock(p);
 837         for (i = 0; i < fdp->fd_knlistsize; i++) {
 838                 kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 839                 while (kn != NULL) {
 840                         if (kq == kn->kn_kq) {
 841                                 kqlock(kq);
 842                                 proc_fdunlock(p);
 843                                 /* drop it ourselves or wait */
 844                                 if (kqlock2knotedrop(kq, kn)) {
 845                                         kn->kn_fop->f_detach(kn);
 846                                         knote_drop(kn, p);
 847                                 }
 848                                 proc_fdlock(p);
 849                                 /* start over at beginning of list */
 850                                 kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 851                                 continue;
 852                         }
 853                         kn = SLIST_NEXT(kn, kn_link);
 854                 }
 855         }
 856         if (fdp->fd_knhashmask != 0) {
 857                 for (i = 0; i < (int)fdp->fd_knhashmask + 1; i++) {
 858                         kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 859                         while (kn != NULL) {
 860                                 if (kq == kn->kn_kq) {
 861                                         kqlock(kq);
 862                                         proc_fdunlock(p);
 863                                         /* drop it ourselves or wait */
 864                                         if (kqlock2knotedrop(kq, kn)) {
 865                                                 kn->kn_fop->f_detach(kn);
 866                                                 knote_drop(kn, p);
 867                                         }
 868                                         proc_fdlock(p);
 869                                         /* start over at beginning of list */
 870                                         kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 871                                         continue;
 872                                 }
 873                                 kn = SLIST_NEXT(kn, kn_link);
 874                         }
 875                 }
 876         }
 877         proc_fdunlock(p);
 878         lck_spin_destroy(&kq->kq_lock, kq_lck_grp);
 879         FREE_ZONE(kq, sizeof(struct kqueue), M_KQUEUE);
 880 }
 881
 882 int
 883 kqueue(struct proc *p, __unused struct kqueue_args *uap, register_t *retval)
 884 {
 885         struct kqueue *kq;
 886         struct fileproc *fp;
 887         int fd, error;
 888
 889         error = falloc(p, &fp, &fd);
 890         if (error) {
 891                 return (error);
 892         }
 893
 894         kq = kqueue_alloc(p);
 895         if (kq == NULL) {
 896                 fp_free(p, fd, fp);
 897                 return (ENOMEM);
 898         }
 899
 900         fp->f_flag = FREAD | FWRITE;
 901         fp->f_type = DTYPE_KQUEUE;
 902         fp->f_ops = &kqueueops;
 903         fp->f_data = (caddr_t)kq;
 904
 905         proc_fdlock(p);
 906         *fdflags(p, fd) &= ~UF_RESERVED;
 907         fp_drop(p, fd, fp, 1);
 908         proc_fdunlock(p);
 909
 910         *retval = fd;
 911         return (error);
 912 }
 913
 914 int
 915 kqueue_portset_np(__unused struct proc *p,
 916                                   __unused struct kqueue_portset_np_args *uap,
 917                                   __unused register_t *retval)
 918 {
 919                 /* JMM - Placeholder for now */
 920                 return (ENOTSUP);
 921 }
 922
 923 int
 924 kqueue_from_portset_np(__unused struct proc *p,
 925                                            __unused struct kqueue_from_portset_np_args *uap,
 926                                            __unused register_t *retval)
 927 {
 928                 /* JMM - Placeholder for now */
 929                 return (ENOTSUP);
 930 }
 931
 932 static int
 933 kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p)
 934 {
 935         int advance;
 936         int error;
 937
 938         if (IS_64BIT_PROCESS(p)) {
 939                 struct user_kevent kev64;
 940
 941                 advance = sizeof(kev64);
 942                 error = copyin(*addrp, (caddr_t)&kev64, advance);
 943                 if (error)
 944                         return error;
 945                 kevp->ident = CAST_DOWN(uintptr_t, kev64.ident);
 946                 kevp->filter = kev64.filter;
 947                 kevp->flags = kev64.flags;
 948                 kevp->fflags = kev64.fflags;
 949                 kevp->data = CAST_DOWN(intptr_t, kev64.data);
 950                 kevp->udata = kev64.udata;
 951         } else {
 952                 /*
 953                  * compensate for legacy in-kernel kevent layout
 954                  * where the udata field is alredy 64-bit.
 955                  */
 956                 advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t);
 957                 error = copyin(*addrp, (caddr_t)kevp, advance);
 958         }
 959         if (!error)
 960                 *addrp += advance;
 961         return error;
 962 }
 963
 964 static int
 965 kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p)
 966 {
 967         int advance;
 968         int error;
 969
 970         if (IS_64BIT_PROCESS(p)) {
 971                 struct user_kevent kev64;
 972
 973                 kev64.ident = (uint64_t) kevp->ident;
 974                 kev64.filter = kevp->filter;
 975                 kev64.flags = kevp->flags;
 976                 kev64.fflags = kevp->fflags;
 977                 kev64.data = (int64_t) kevp->data;
 978                 kev64.udata = kevp->udata;
 979                 advance = sizeof(kev64);
 980                 error = copyout((caddr_t)&kev64, *addrp, advance);
 981         } else {
 982                 /*
 983                  * compensate for legacy in-kernel kevent layout
 984                  * where the udata field is alredy 64-bit.
 985                  */
 986                 advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t);
 987                 error = copyout((caddr_t)kevp, *addrp, advance);
 988         }
 989         if (!error)
 990                 *addrp += advance;
 991         return error;
 992 }
 993
 994 /*
 995  * kevent_continue - continue a kevent syscall after blocking
 996  *
 997  *      assume we inherit a use count on the kq fileglob.
 998  */
 999
1000 static void
1001 kevent_continue(__unused struct kqueue *kq, void *data, int error)
1002 {
1003         struct _kevent *cont_args;
1004         struct fileproc *fp;
1005         register_t *retval;
1006         int noutputs;
1007         int fd;
1008         struct proc *p = current_proc();
1009
1010         cont_args = (struct _kevent *)data;
1011         noutputs = cont_args->eventout;
1012         retval = cont_args->retval;
1013         fd = cont_args->fd;
1014         fp = cont_args->fp;
1015
1016         fp_drop(p, fd, fp, 0);
1017
1018         /* don't restart after signals... */
1019         if (error == ERESTART)
1020                 error = EINTR;
1021         else if (error == EWOULDBLOCK)
1022                 error = 0;
1023         if (error == 0)
1024                 *retval = noutputs;
1025         unix_syscall_return(error);
1026 }
1027
1028 /*
1029  * kevent - [syscall] register and wait for kernel events
1030  *
1031  */
1032
1033 int
1034 kevent(struct proc *p, struct kevent_args *uap, register_t *retval)
1035 {
1036         user_addr_t changelist = uap->changelist;
1037         user_addr_t ueventlist = uap->eventlist;
1038         int nchanges = uap->nchanges;
1039         int nevents = uap->nevents;
1040         int fd = uap->fd;
1041
1042         struct _kevent *cont_args;
1043         uthread_t ut;
1044         struct kqueue *kq;
1045         struct fileproc *fp;
1046         struct kevent kev;
1047         int error, noutputs;
1048         struct timeval atv;
1049
1050         /* convert timeout to absolute - if we have one */
1051         if (uap->timeout != USER_ADDR_NULL) {
1052                 struct timeval rtv;
1053                 if ( IS_64BIT_PROCESS(p) ) {
1054                         struct user_timespec ts;
1055                         error = copyin( uap->timeout, &ts, sizeof(ts) );
1056                         if ((ts.tv_sec & 0xFFFFFFFF00000000ull) != 0)
1057                                 error = EINVAL;
1058                         else
1059                                 TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1060                 } else {
1061                         struct timespec ts;
1062                         error = copyin( uap->timeout, &ts, sizeof(ts) );
1063                         TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1064                 }
1065                 if (error)
1066                         return error;
1067                 if (itimerfix(&rtv))
1068                         return EINVAL;
1069                 getmicrouptime(&atv);
1070                 timevaladd(&atv, &rtv);
1071         } else {
1072                 atv.tv_sec = 0;
1073                 atv.tv_usec = 0;
1074         }
1075
1076         /* get a usecount for the kq itself */
1077         if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0)
1078                 return(error);
1079
1080         /* register all the change requests the user provided... */
1081         noutputs = 0;
1082         while (nchanges > 0 && error == 0) {
1083                 error = kevent_copyin(&changelist, &kev, p);
1084                 if (error)
1085                         break;
1086
1087                 kev.flags &= ~EV_SYSFLAGS;
1088                 error = kevent_register(kq, &kev, p);
1089                 if (error && nevents > 0) {
1090                         kev.flags = EV_ERROR;
1091                         kev.data = error;
1092                         error = kevent_copyout(&kev, &ueventlist, p);
1093                         if (error == 0) {
1094                                 nevents--;
1095                                 noutputs++;
1096                         }
1097                 }
1098                 nchanges--;
1099         }
1100
1101         /* store the continuation/completion data in the uthread */
1102         ut = (uthread_t)get_bsdthread_info(current_thread());
1103         cont_args = (struct _kevent *)&ut->uu_state.ss_kevent;
1104         cont_args->fp = fp;
1105         cont_args->fd = fd;
1106         cont_args->retval = retval;
1107         cont_args->eventlist = ueventlist;
1108         cont_args->eventcount = nevents;
1109         cont_args->eventout = noutputs;
1110
1111         if (nevents > 0 && noutputs == 0 && error == 0)
1112                 error = kevent_scan(kq, kevent_callback,
1113                                     kevent_continue, cont_args,
1114                                     &atv, p);
1115         kevent_continue(kq, cont_args, error);
1116         /* NOTREACHED */
1117         return error;
1118 }
1119
1120
1121 /*
1122  * kevent_callback - callback for each individual event
1123  *
1124  *      called with nothing locked
1125  *      caller holds a reference on the kqueue
1126  */
1127
1128 static int
1129 kevent_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data)
1130 {
1131         struct _kevent *cont_args;
1132         int error;
1133
1134         cont_args = (struct _kevent *)data;
1135         assert(cont_args->eventout < cont_arg->eventcount);
1136
1137         /*
1138          * Copy out the appropriate amount of event data for this user.
1139          */
1140         error = kevent_copyout(kevp, &cont_args->eventlist, current_proc());
1141
1142         /*
1143          * If there isn't space for additional events, return
1144          * a harmless error to stop the processing here
1145          */
1146         if (error == 0 && ++cont_args->eventout == cont_args->eventcount)
1147                         error = EWOULDBLOCK;
1148         return error;
1149 }
1150
1151 /*
1152  * kevent_register - add a new event to a kqueue
1153  *
1154  *      Creates a mapping between the event source and
1155  *      the kqueue via a knote data structure.
1156  *
1157  *      Because many/most the event sources are file
1158  *      descriptor related, the knote is linked off
1159  *      the filedescriptor table for quick access.
1160  *
1161  *      called with nothing locked
1162  *      caller holds a reference on the kqueue
1163  */
1164
1165 int
1166 kevent_register(struct kqueue *kq, struct kevent *kev, struct proc *p)
1167 {
1168         struct filedesc *fdp = kq->kq_fdp;
1169         struct filterops *fops;
1170         struct fileproc *fp = NULL;
1171         struct knote *kn = NULL;
1172         int error = 0;
1173
1174         if (kev->filter < 0) {
1175                 if (kev->filter + EVFILT_SYSCOUNT < 0)
1176                         return (EINVAL);
1177                 fops = sysfilt_ops[~kev->filter];       /* to 0-base index */
1178         } else {
1179                 /*
1180                  * XXX
1181                  * filter attach routine is responsible for insuring that
1182                  * the identifier can be attached to it.
1183                  */
1184                 printf("unknown filter: %d\n", kev->filter);
1185                 return (EINVAL);
1186         }
1187
1188         /* this iocount needs to be dropped if it is not registered */
1189         if (fops->f_isfd && (error = fp_lookup(p, kev->ident, &fp, 0)) != 0)
1190                 return(error);
1191
1192  restart:
1193         proc_fdlock(p);
1194         if (fops->f_isfd) {
1195                 /* fd-based knotes are linked off the fd table */
1196                 if (kev->ident < (u_int)fdp->fd_knlistsize) {
1197                         SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
1198                                 if (kq == kn->kn_kq &&
1199                                     kev->filter == kn->kn_filter)
1200                                         break;
1201                 }
1202         } else {
1203                 /* hash non-fd knotes here too */
1204                 if (fdp->fd_knhashmask != 0) {
1205                         struct klist *list;
1206
1207                         list = &fdp->fd_knhash[
1208                             KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
1209                         SLIST_FOREACH(kn, list, kn_link)
1210                                 if (kev->ident == kn->kn_id &&
1211                                     kq == kn->kn_kq &&
1212                                     kev->filter == kn->kn_filter)
1213                                         break;
1214                 }
1215         }
1216
1217         /*
1218          * kn now contains the matching knote, or NULL if no match
1219          */
1220         if (kn == NULL) {
1221                 if ((kev->flags & (EV_ADD|EV_DELETE)) == EV_ADD) {
1222                         kn = knote_alloc();
1223                         if (kn == NULL) {
1224                                 proc_fdunlock(p);
1225                                 error = ENOMEM;
1226                                 goto done;
1227                         }
1228                         kn->kn_fp = fp;
1229                         kn->kn_kq = kq;
1230                         kn->kn_tq = &kq->kq_head;
1231                         kn->kn_fop = fops;
1232                         kn->kn_sfflags = kev->fflags;
1233                         kn->kn_sdata = kev->data;
1234                         kev->fflags = 0;
1235                         kev->data = 0;
1236                         kn->kn_kevent = *kev;
1237                         kn->kn_inuse = 1;  /* for f_attach() */
1238                         kn->kn_status = 0;
1239
1240                         /* before anyone can find it */
1241                         if (kev->flags & EV_DISABLE)
1242                                 kn->kn_status |= KN_DISABLED;
1243
1244                         error = knote_fdpattach(kn, fdp, p);
1245                         proc_fdunlock(p);
1246
1247                         if (error) {
1248                                 knote_free(kn);
1249                                 goto done;
1250                         }
1251
1252                         /*
1253                          * apply reference count to knote structure, and
1254                          * do not release it at the end of this routine.
1255                          */
1256                         fp = NULL;
1257
1258                         /*
1259                          * If the attach fails here, we can drop it knowing
1260                          * that nobody else has a reference to the knote.
1261                          */
1262                         if ((error = fops->f_attach(kn)) != 0) {
1263                                 knote_drop(kn, p);
1264                                 goto done;
1265                         }
1266                 } else {
1267                         proc_fdunlock(p);
1268                         error = ENOENT;
1269                         goto done;
1270                 }
1271         } else {
1272                 /* existing knote - get kqueue lock */
1273                 kqlock(kq);
1274                 proc_fdunlock(p);
1275
1276                 if (kev->flags & EV_DELETE) {
1277                         knote_dequeue(kn);
1278                         kn->kn_status |= KN_DISABLED;
1279                         if (kqlock2knotedrop(kq, kn)) {
1280                                 kn->kn_fop->f_detach(kn);
1281                                 knote_drop(kn, p);
1282                         }
1283                         goto done;
1284                 }
1285
1286                 /* update status flags for existing knote */
1287                 if (kev->flags & EV_DISABLE) {
1288                         knote_dequeue(kn);
1289                         kn->kn_status |= KN_DISABLED;
1290                 } else if (kev->flags & EV_ENABLE) {
1291                         kn->kn_status &= ~KN_DISABLED;
1292                         if (kn->kn_status & KN_ACTIVE)
1293                                 knote_enqueue(kn);
1294                 }
1295
1296                 /*
1297                  * If somebody is in the middle of dropping this
1298                  * knote - go find/insert a new one.  But we have
1299                  * wait for this one to go away first.
1300                  */
1301                 if (!kqlock2knoteusewait(kq, kn))
1302                         /* kqueue unlocked */
1303                         goto restart;
1304
1305                 /*
1306                  * The user may change some filter values after the
1307                  * initial EV_ADD, but doing so will not reset any
1308                  * filter which have already been triggered.
1309                  */
1310                 kn->kn_sfflags = kev->fflags;
1311                 kn->kn_sdata = kev->data;
1312                 kn->kn_kevent.udata = kev->udata;
1313         }
1314
1315         /* still have use ref on knote */
1316         if (kn->kn_fop->f_event(kn, 0)) {
1317                 if (knoteuse2kqlock(kq, kn))
1318                         knote_activate(kn);
1319                 kqunlock(kq);
1320         } else {
1321                 knote_put(kn);
1322         }
1323
1324 done:
1325         if (fp != NULL)
1326                 fp_drop(p, kev->ident, fp, 0);
1327         return (error);
1328 }
1329
1330 /*
1331  * kevent_process - process the triggered events in a kqueue
1332  *
1333  *      Walk the queued knotes and validate that they are
1334  *      really still triggered events by calling the filter
1335  *      routines (if necessary).  Hold a use reference on
1336  *      the knote to avoid it being detached. For each event
1337  *      that is still considered triggered, invoke the
1338  *      callback routine provided.
1339  *
1340  *      caller holds a reference on the kqueue.
1341  *      kqueue locked on entry and exit - but may be dropped
1342  */
1343
1344 static int
1345 kevent_process(struct kqueue *kq,
1346                kevent_callback_t callback,
1347                void *data,
1348                int *countp,
1349                struct proc *p)
1350 {
1351         struct knote *kn;
1352         struct kevent kev;
1353         int nevents;
1354         int error;
1355
1356  restart:
1357         if (kq->kq_count == 0) {
1358                 *countp = 0;
1359                 return 0;
1360         }
1361
1362         /* if someone else is processing the queue, wait */
1363         if (!TAILQ_EMPTY(&kq->kq_inprocess)) {
1364                 assert_wait(&kq->kq_inprocess, THREAD_UNINT);
1365                 kq->kq_state |= KQ_PROCWAIT;
1366                 kqunlock(kq);
1367                 thread_block(THREAD_CONTINUE_NULL);
1368                 kqlock(kq);
1369                 goto restart;
1370         }
1371
1372         error = 0;
1373         nevents = 0;
1374         while (error == 0 &&
1375                (kn = TAILQ_FIRST(&kq->kq_head)) != NULL) {
1376
1377                 /*
1378                  * Take note off the active queue.
1379                  *
1380                  * Non-EV_ONESHOT events must be re-validated.
1381                  *
1382                  * Convert our lock to a use-count and call the event's
1383                  * filter routine to update.
1384                  *
1385                  * If the event is valid, or triggered while the kq
1386                  * is unlocked, move to the inprocess queue for processing.
1387                  */
1388
1389                 if ((kn->kn_flags & EV_ONESHOT) == 0) {
1390                         int result;
1391                         knote_deactivate(kn);
1392
1393                         if (kqlock2knoteuse(kq, kn)) {
1394
1395                                 /* call the filter with just a ref */
1396                                 result = kn->kn_fop->f_event(kn, 0);
1397
1398                                 /* if it's still alive, make sure it's active */
1399                                 if (knoteuse2kqlock(kq, kn) && result) {
1400                                         /* may have been reactivated in filter*/
1401                                         if (!(kn->kn_status & KN_ACTIVE)) {
1402                                                 knote_activate(kn);
1403                                         }
1404                                 } else {
1405                                         continue;
1406                                 }
1407                         } else {
1408                                 continue;
1409                         }
1410                 }
1411
1412                 /* knote is active: move onto inprocess queue */
1413                 assert(kn->kn_tq == &kq->kq_head);
1414                 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1415                 kn->kn_tq = &kq->kq_inprocess;
1416                 TAILQ_INSERT_TAIL(&kq->kq_inprocess, kn, kn_tqe);
1417
1418                 /*
1419                  * Got a valid triggered knote with the kqueue
1420                  * still locked.  Snapshot the data, and determine
1421                  * how to dispatch the knote for future events.
1422                  */
1423                 kev = kn->kn_kevent;
1424
1425                 /* now what happens to it? */
1426                 if (kn->kn_flags & EV_ONESHOT) {
1427                         knote_deactivate(kn);
1428                         if (kqlock2knotedrop(kq, kn)) {
1429                                 kn->kn_fop->f_detach(kn);
1430                                 knote_drop(kn, p);
1431                         }
1432                 } else if (kn->kn_flags & EV_CLEAR) {
1433                         knote_deactivate(kn);
1434                         kn->kn_data = 0;
1435                         kn->kn_fflags = 0;
1436                         kqunlock(kq);
1437                 } else {
1438                         /*
1439                          * leave on in-process queue.  We'll
1440                          * move all the remaining ones back
1441                          * the kq queue and wakeup any
1442                          * waiters when we are done.
1443                          */
1444                         kqunlock(kq);
1445                 }
1446
1447                 /* callback to handle each event as we find it */
1448                 error = (callback)(kq, &kev, data);
1449                 nevents++;
1450
1451                 kqlock(kq);
1452         }
1453
1454         /*
1455          * With the kqueue still locked, move any knotes
1456          * remaining on the in-process queue back to the
1457          * kq's queue and wake up any waiters.
1458          */
1459         while ((kn = TAILQ_FIRST(&kq->kq_inprocess)) != NULL) {
1460                 assert(kn->kn_tq == &kq->kq_inprocess);
1461                 TAILQ_REMOVE(&kq->kq_inprocess, kn, kn_tqe);
1462                 kn->kn_tq = &kq->kq_head;
1463                 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1464         }
1465         if (kq->kq_state & KQ_PROCWAIT) {
1466                 kq->kq_state &= ~KQ_PROCWAIT;
1467                 thread_wakeup(&kq->kq_inprocess);
1468         }
1469
1470         *countp = nevents;
1471         return error;
1472 }
1473
1474
1475 static void
1476 kevent_scan_continue(void *data, wait_result_t wait_result)
1477 {
1478         uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
1479         struct _kevent_scan * cont_args = &ut->uu_state.ss_kevent_scan;
1480         struct kqueue *kq = (struct kqueue *)data;
1481         int error;
1482         int count;
1483
1484         /* convert the (previous) wait_result to a proper error */
1485         switch (wait_result) {
1486         case THREAD_AWAKENED:
1487                 kqlock(kq);
1488                 error = kevent_process(kq, cont_args->call, cont_args, &count, current_proc());
1489                 if (error == 0 && count == 0) {
1490                         assert_wait_deadline(kq, THREAD_ABORTSAFE, cont_args->deadline);
1491                         kq->kq_state |= KQ_SLEEP;
1492                         kqunlock(kq);
1493                         thread_block_parameter(kevent_scan_continue, kq);
1494                         /* NOTREACHED */
1495                 }
1496                 kqunlock(kq);
1497                 break;
1498         case THREAD_TIMED_OUT:
1499                 error = EWOULDBLOCK;
1500                 break;
1501         case THREAD_INTERRUPTED:
1502                 error = EINTR;
1503                 break;
1504         default:
1505                 panic("kevent_scan_cont() - invalid wait_result (%d)", wait_result);
1506                 error = 0;
1507         }
1508
1509         /* call the continuation with the results */
1510         assert(cont_args->cont != NULL);
1511         (cont_args->cont)(kq, cont_args->data, error);
1512 }
1513
1514
1515 /*
1516  * kevent_scan - scan and wait for events in a kqueue
1517  *
1518  *      Process the triggered events in a kqueue.
1519  *
1520  *      If there are no events triggered arrange to
1521  *      wait for them. If the caller provided a
1522  *      continuation routine, then kevent_scan will
1523  *      also.
1524  *
1525  *      The callback routine must be valid.
1526  *      The caller must hold a use-count reference on the kq.
1527  */
1528
1529 int
1530 kevent_scan(struct kqueue *kq,
1531             kevent_callback_t callback,
1532             kevent_continue_t continuation,
1533             void *data,
1534             struct timeval *atvp,
1535             struct proc *p)
1536 {
1537         thread_continue_t cont = THREAD_CONTINUE_NULL;
1538         uint64_t deadline;
1539         int error;
1540         int first;
1541
1542         assert(callback != NULL);
1543
1544         first = 1;
1545         for (;;) {
1546                 wait_result_t wait_result;
1547                 int count;
1548
1549                 /*
1550                  * Make a pass through the kq to find events already
1551                  * triggered.
1552                  */
1553                 kqlock(kq);
1554                 error = kevent_process(kq, callback, data, &count, p);
1555                 if (error || count)
1556                         break; /* lock still held */
1557
1558                 /* looks like we have to consider blocking */
1559                 if (first) {
1560                         first = 0;
1561                         /* convert the timeout to a deadline once */
1562                         if (atvp->tv_sec || atvp->tv_usec) {
1563                                 uint32_t seconds, nanoseconds;
1564                                 uint64_t now;
1565
1566                                 clock_get_uptime(&now);
1567                                 nanoseconds_to_absolutetime((uint64_t)atvp->tv_sec * NSEC_PER_SEC +
1568                                                             atvp->tv_usec * NSEC_PER_USEC,
1569                                                             &deadline);
1570                                 if (now >= deadline) {
1571                                         /* non-blocking call */
1572                                         error = EWOULDBLOCK;
1573                                         break; /* lock still held */
1574                                 }
1575                                 deadline -= now;
1576                                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
1577                         } else {
1578                                 deadline = 0;   /* block forever */
1579                         }
1580
1581                         if (continuation) {
1582                                 uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
1583                                 struct _kevent_scan *cont_args = &ut->uu_state.ss_kevent_scan;
1584
1585                                 cont_args->call = callback;
1586                                 cont_args->cont = continuation;
1587                                 cont_args->deadline = deadline;
1588                                 cont_args->data = data;
1589                                 cont = kevent_scan_continue;
1590                         }
1591                 }
1592
1593                 /* go ahead and wait */
1594                 assert_wait_deadline(kq, THREAD_ABORTSAFE, deadline);
1595                 kq->kq_state |= KQ_SLEEP;
1596                 kqunlock(kq);
1597                 wait_result = thread_block_parameter(cont, kq);
1598                 /* NOTREACHED if (continuation != NULL) */
1599
1600                 switch (wait_result) {
1601                 case THREAD_AWAKENED:
1602                         continue;
1603                 case THREAD_TIMED_OUT:
1604                         return EWOULDBLOCK;
1605                 case THREAD_INTERRUPTED:
1606                         return EINTR;
1607                 default:
1608                         panic("kevent_scan - bad wait_result (%d)",
1609                               wait_result);
1610                         error = 0;
1611                 }
1612         }
1613         kqunlock(kq);
1614         return error;
1615 }
1616
1617
1618 /*
1619  * XXX
1620  * This could be expanded to call kqueue_scan, if desired.
1621  */
1622 /*ARGSUSED*/
1623 static int
1624 kqueue_read(__unused struct fileproc *fp,
1625                         __unused struct uio *uio,
1626                         __unused kauth_cred_t cred,
1627                         __unused int flags,
1628                         __unused struct proc *p)
1629 {
1630         return (ENXIO);
1631 }
1632
1633 /*ARGSUSED*/
1634 static int
1635 kqueue_write(__unused struct fileproc *fp,
1636                          __unused struct uio *uio,
1637                          __unused kauth_cred_t cred,
1638                          __unused int flags,
1639                          __unused struct proc *p)
1640 {
1641         return (ENXIO);
1642 }
1643
1644 /*ARGSUSED*/
1645 static int
1646 kqueue_ioctl(__unused struct fileproc *fp,
1647                          __unused u_long com,
1648                          __unused caddr_t data,
1649                          __unused struct proc *p)
1650 {
1651         return (ENOTTY);
1652 }
1653
1654 /*ARGSUSED*/
1655 static int
1656 kqueue_select(struct fileproc *fp, int which, void *wql, struct proc *p)
1657 {
1658         struct kqueue *kq = (struct kqueue *)fp->f_data;
1659         int retnum = 0;
1660
1661         if (which == FREAD) {
1662                 kqlock(kq);
1663                 if (kq->kq_count) {
1664                         retnum = 1;
1665                 } else {
1666                         selrecord(p, &kq->kq_sel, wql);
1667                         kq->kq_state |= KQ_SEL;
1668                 }
1669                 kqunlock(kq);
1670         }
1671         return (retnum);
1672 }
1673
1674 /*
1675  * kqueue_close -
1676  */
1677 /*ARGSUSED*/
1678 static int
1679 kqueue_close(struct fileglob *fg, struct proc *p)
1680 {
1681         struct kqueue *kq = (struct kqueue *)fg->fg_data;
1682
1683         kqueue_dealloc(kq, p);
1684         fg->fg_data = NULL;
1685         return (0);
1686 }
1687
1688 /*ARGSUSED*/
1689 /*
1690  * The callers has taken a use-count reference on this kqueue and will donate it
1691  * to the kqueue we are being added to.  This keeps the kqueue from closing until
1692  * that relationship is torn down.
1693  */
1694 static int
1695 kqueue_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
1696 {
1697         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
1698
1699         if (kn->kn_filter != EVFILT_READ)
1700                 return (1);
1701
1702         kn->kn_fop = &kqread_filtops;
1703         kqlock(kq);
1704         KNOTE_ATTACH(&kq->kq_sel.si_note, kn);
1705         kqunlock(kq);
1706         return (0);
1707 }
1708
1709 /*ARGSUSED*/
1710 int
1711 kqueue_stat(struct fileproc *fp, struct stat *st, __unused struct proc *p)
1712 {
1713         struct kqueue *kq = (struct kqueue *)fp->f_data;
1714
1715         bzero((void *)st, sizeof(*st));
1716         st->st_size = kq->kq_count;
1717         st->st_blksize = sizeof(struct kevent);
1718         st->st_mode = S_IFIFO;
1719         return (0);
1720 }
1721
1722 /*
1723  * Called with the kqueue locked
1724  */
1725 static void
1726 kqueue_wakeup(struct kqueue *kq)
1727 {
1728
1729         if (kq->kq_state & KQ_SLEEP) {
1730                 kq->kq_state &= ~KQ_SLEEP;
1731                 thread_wakeup(kq);
1732         }
1733         if (kq->kq_state & KQ_SEL) {
1734                 kq->kq_state &= ~KQ_SEL;
1735                 selwakeup(&kq->kq_sel);
1736         }
1737         KNOTE(&kq->kq_sel.si_note, 0);
1738 }
1739
1740 void
1741 klist_init(struct klist *list)
1742 {
1743         SLIST_INIT(list);
1744 }
1745
1746
1747 /*
1748  * Query/Post each knote in the object's list
1749  *
1750  *      The object lock protects the list. It is assumed
1751  *      that the filter/event routine for the object can
1752  *      determine that the object is already locked (via
1753  *      the hind) and not deadlock itself.
1754  *
1755  *      The object lock should also hold off pending
1756  *      detach/drop operations.  But we'll prevent it here
1757  *      too - just in case.
1758  */
1759 void
1760 knote(struct klist *list, long hint)
1761 {
1762         struct knote *kn;
1763
1764         SLIST_FOREACH(kn, list, kn_selnext) {
1765                 struct kqueue *kq = kn->kn_kq;
1766
1767                 kqlock(kq);
1768                 if (kqlock2knoteuse(kq, kn)) {
1769                         int result;
1770
1771                         /* call the event with only a use count */
1772                         result = kn->kn_fop->f_event(kn, hint);
1773
1774                         /* if its not going away and triggered */
1775                         if (knoteuse2kqlock(kq, kn) && result)
1776                                 knote_activate(kn);
1777                         /* lock held again */
1778                 }
1779                 kqunlock(kq);
1780         }
1781 }
1782
1783 /*
1784  * attach a knote to the specified list.  Return true if this is the first entry.
1785  * The list is protected by whatever lock the object it is associated with uses.
1786  */
1787 int
1788 knote_attach(struct klist *list, struct knote *kn)
1789 {
1790         int ret = SLIST_EMPTY(list);
1791         SLIST_INSERT_HEAD(list, kn, kn_selnext);
1792         return ret;
1793 }
1794
1795 /*
1796  * detach a knote from the specified list.  Return true if that was the last entry.
1797  * The list is protected by whatever lock the object it is associated with uses.
1798  */
1799 int
1800 knote_detach(struct klist *list, struct knote *kn)
1801 {
1802         SLIST_REMOVE(list, kn, knote, kn_selnext);
1803         return SLIST_EMPTY(list);
1804 }
1805
1806 /*
1807  * remove all knotes referencing a specified fd
1808  *
1809  * Essentially an inlined knote_remove & knote_drop
1810  * when we know for sure that the thing is a file
1811  *
1812  * Entered with the proc_fd lock already held.
1813  * It returns the same way, but may drop it temporarily.
1814  */
1815 void
1816 knote_fdclose(struct proc *p, int fd)
1817 {
1818         struct filedesc *fdp = p->p_fd;
1819         struct klist *list;
1820         struct knote *kn;
1821
1822         list = &fdp->fd_knlist[fd];
1823         while ((kn = SLIST_FIRST(list)) != NULL) {
1824                 struct kqueue *kq = kn->kn_kq;
1825
1826                 kqlock(kq);
1827                 proc_fdunlock(p);
1828
1829                 /*
1830                  * Convert the lock to a drop ref.
1831                  * If we get it, go ahead and drop it.
1832                  * Otherwise, we waited for it to
1833                  * be dropped by the other guy, so
1834                  * it is safe to move on in the list.
1835                  */
1836                 if (kqlock2knotedrop(kq, kn)) {
1837                         kn->kn_fop->f_detach(kn);
1838                         knote_drop(kn, p);
1839                 }
1840
1841                 proc_fdlock(p);
1842
1843                 /* the fd tables may have changed - start over */
1844                 list = &fdp->fd_knlist[fd];
1845         }
1846 }
1847
1848 /* proc_fdlock held on entry (and exit) */
1849 static int
1850 knote_fdpattach(struct knote *kn, struct filedesc *fdp, __unused struct proc *p)
1851 {
1852         struct klist *list = NULL;
1853
1854         if (! kn->kn_fop->f_isfd) {
1855                 if (fdp->fd_knhashmask == 0)
1856                         fdp->fd_knhash = hashinit(KN_HASHSIZE, M_KQUEUE,
1857                             &fdp->fd_knhashmask);
1858                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1859         } else {
1860                 if ((u_int)fdp->fd_knlistsize <= kn->kn_id) {
1861                         u_int size = 0;
1862
1863                         /* have to grow the fd_knlist */
1864                         size = fdp->fd_knlistsize;
1865                         while (size <= kn->kn_id)
1866                                 size += KQEXTENT;
1867                         MALLOC(list, struct klist *,
1868                                size * sizeof(struct klist *), M_KQUEUE, M_WAITOK);
1869                         if (list == NULL)
1870                                 return (ENOMEM);
1871
1872                         bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list,
1873                               fdp->fd_knlistsize * sizeof(struct klist *));
1874                         bzero((caddr_t)list +
1875                               fdp->fd_knlistsize * sizeof(struct klist *),
1876                               (size - fdp->fd_knlistsize) * sizeof(struct klist *));
1877                         FREE(fdp->fd_knlist, M_KQUEUE);
1878                         fdp->fd_knlist = list;
1879                         fdp->fd_knlistsize = size;
1880                 }
1881                 list = &fdp->fd_knlist[kn->kn_id];
1882         }
1883         SLIST_INSERT_HEAD(list, kn, kn_link);
1884         return (0);
1885 }
1886
1887
1888
1889 /*
1890  * should be called at spl == 0, since we don't want to hold spl
1891  * while calling fdrop and free.
1892  */
1893 static void
1894 knote_drop(struct knote *kn, struct proc *p)
1895 {
1896         struct filedesc *fdp = p->p_fd;
1897         struct kqueue *kq = kn->kn_kq;
1898         struct klist *list;
1899
1900         proc_fdlock(p);
1901         if (kn->kn_fop->f_isfd)
1902                 list = &fdp->fd_knlist[kn->kn_id];
1903         else
1904                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1905
1906         SLIST_REMOVE(list, kn, knote, kn_link);
1907         kqlock(kq);
1908         knote_dequeue(kn);
1909         if (kn->kn_status & KN_DROPWAIT)
1910                 thread_wakeup(&kn->kn_status);
1911         kqunlock(kq);
1912         proc_fdunlock(p);
1913
1914         if (kn->kn_fop->f_isfd)
1915                 fp_drop(p, kn->kn_id, kn->kn_fp, 0);
1916
1917         knote_free(kn);
1918 }
1919
1920 /* called with kqueue lock held */
1921 static void
1922 knote_activate(struct knote *kn)
1923 {
1924         struct kqueue *kq = kn->kn_kq;
1925
1926         kn->kn_status |= KN_ACTIVE;
1927         knote_enqueue(kn);
1928         kqueue_wakeup(kq);
1929  }
1930
1931 /* called with kqueue lock held */
1932 static void
1933 knote_deactivate(struct knote *kn)
1934 {
1935         kn->kn_status &= ~KN_ACTIVE;
1936         knote_dequeue(kn);
1937 }
1938
1939 /* called with kqueue lock held */
1940 static void
1941 knote_enqueue(struct knote *kn)
1942 {
1943         struct kqueue *kq = kn->kn_kq;
1944
1945         if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) {
1946                 struct kqtailq *tq = kn->kn_tq;
1947
1948                 TAILQ_INSERT_TAIL(tq, kn, kn_tqe);
1949                 kn->kn_status |= KN_QUEUED;
1950                 kq->kq_count++;
1951         }
1952 }
1953
1954 /* called with kqueue lock held */
1955 static void
1956 knote_dequeue(struct knote *kn)
1957 {
1958         struct kqueue *kq = kn->kn_kq;
1959
1960         assert((kn->kn_status & KN_DISABLED) == 0);
1961         if ((kn->kn_status & KN_QUEUED) == KN_QUEUED) {
1962                 struct kqtailq *tq = kn->kn_tq;
1963
1964                 TAILQ_REMOVE(tq, kn, kn_tqe);
1965                 kn->kn_tq = &kq->kq_head;
1966                 kn->kn_status &= ~KN_QUEUED;
1967                 kq->kq_count--;
1968         }
1969 }
1970
1971 void
1972 knote_init(void)
1973 {
1974         knote_zone = zinit(sizeof(struct knote), 8192*sizeof(struct knote), 8192, "knote zone");
1975
1976         /* allocate kq lock group attribute and group */
1977         kq_lck_grp_attr= lck_grp_attr_alloc_init();
1978
1979         kq_lck_grp = lck_grp_alloc_init("kqueue",  kq_lck_grp_attr);
1980
1981         /* Allocate kq lock attribute */
1982         kq_lck_attr = lck_attr_alloc_init();
1983
1984         /* Initialize the timer filter lock */
1985         lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr);
1986 }
1987 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
1988
1989 static struct knote *
1990 knote_alloc(void)
1991 {
1992         return ((struct knote *)zalloc(knote_zone));
1993 }
1994
1995 static void
1996 knote_free(struct knote *kn)
1997 {
1998         zfree(knote_zone, kn);
1999 }
2000
2001 #include <sys/param.h>
2002 #include <sys/socket.h>
2003 #include <sys/protosw.h>
2004 #include <sys/domain.h>
2005 #include <sys/mbuf.h>
2006 #include <sys/kern_event.h>
2007 #include <sys/malloc.h>
2008 #include <sys/sys_domain.h>
2009 #include <sys/syslog.h>
2010
2011
2012 static int kev_attach(struct socket *so, int proto, struct proc *p);
2013 static int kev_detach(struct socket *so);
2014 static int kev_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p);
2015
2016 struct pr_usrreqs event_usrreqs = {
2017      pru_abort_notsupp, pru_accept_notsupp, kev_attach, pru_bind_notsupp, pru_connect_notsupp,
2018      pru_connect2_notsupp, kev_control, kev_detach, pru_disconnect_notsupp,
2019      pru_listen_notsupp, pru_peeraddr_notsupp, pru_rcvd_notsupp, pru_rcvoob_notsupp,
2020      pru_send_notsupp, pru_sense_null, pru_shutdown_notsupp, pru_sockaddr_notsupp,
2021      pru_sosend_notsupp, soreceive, pru_sopoll_notsupp
2022 };
2023
2024 struct protosw eventsw[] = {
2025      {
2026           SOCK_RAW,             &systemdomain,  SYSPROTO_EVENT,         PR_ATOMIC,
2027           0,            0,              0,              0,
2028           0,
2029           0,            0,              0,              0,
2030 #if __APPLE__
2031           0,
2032 #endif
2033           &event_usrreqs,
2034           0,            0,              0,
2035 #if __APPLE__
2036           {0, 0},       0,              {0}
2037 #endif
2038      }
2039 };
2040
2041 static
2042 struct kern_event_head kern_event_head;
2043
2044 static u_long static_event_id = 0;
2045 struct domain *sysdom = &systemdomain;
2046
2047 static lck_grp_t                *evt_mtx_grp;
2048 static lck_attr_t               *evt_mtx_attr;
2049 static lck_grp_attr_t   *evt_mtx_grp_attr;
2050 lck_mtx_t                               *evt_mutex;
2051 /*
2052  * Install the protosw's for the NKE manager.  Invoked at
2053  *  extension load time
2054  */
2055 int
2056 kern_event_init(void)
2057 {
2058     int retval;
2059
2060     if ((retval = net_add_proto(eventsw, &systemdomain)) != 0) {
2061             log(LOG_WARNING, "Can't install kernel events protocol (%d)\n", retval);
2062             return(retval);
2063         }
2064
2065         /*
2066          * allocate lock group attribute and group for kern event
2067          */
2068         evt_mtx_grp_attr = lck_grp_attr_alloc_init();
2069
2070         evt_mtx_grp = lck_grp_alloc_init("eventlist", evt_mtx_grp_attr);
2071
2072         /*
2073          * allocate the lock attribute for mutexes
2074          */
2075         evt_mtx_attr = lck_attr_alloc_init();
2076         evt_mutex = lck_mtx_alloc_init(evt_mtx_grp, evt_mtx_attr);
2077         if (evt_mutex == NULL)
2078                         return (ENOMEM);
2079
2080     return(KERN_SUCCESS);
2081 }
2082
2083 static int
2084 kev_attach(struct socket *so, __unused int proto, __unused struct proc *p)
2085 {
2086      int error;
2087      struct kern_event_pcb  *ev_pcb;
2088
2089      error = soreserve(so, KEV_SNDSPACE, KEV_RECVSPACE);
2090      if (error)
2091           return error;
2092
2093      MALLOC(ev_pcb, struct kern_event_pcb *, sizeof(struct kern_event_pcb), M_PCB, M_WAITOK);
2094      if (ev_pcb == 0)
2095           return ENOBUFS;
2096
2097      ev_pcb->ev_socket = so;
2098      ev_pcb->vendor_code_filter = 0xffffffff;
2099
2100      so->so_pcb = (caddr_t) ev_pcb;
2101          lck_mtx_lock(evt_mutex);
2102      LIST_INSERT_HEAD(&kern_event_head, ev_pcb, ev_link);
2103          lck_mtx_unlock(evt_mutex);
2104
2105      return 0;
2106 }
2107
2108
2109 static int
2110 kev_detach(struct socket *so)
2111 {
2112      struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2113
2114      if (ev_pcb != 0) {
2115                 lck_mtx_lock(evt_mutex);
2116                 LIST_REMOVE(ev_pcb, ev_link);
2117                 lck_mtx_unlock(evt_mutex);
2118                 FREE(ev_pcb, M_PCB);
2119                 so->so_pcb = 0;
2120                 so->so_flags |= SOF_PCBCLEARING;
2121      }
2122
2123      return 0;
2124 }
2125
2126 /*
2127  * For now, kev_vender_code and mbuf_tags use the same
2128  * mechanism.
2129  */
2130 extern errno_t mbuf_tag_id_find_internal(const char *string, u_long *out_id,
2131                                                                                  int create);
2132
2133 errno_t kev_vendor_code_find(
2134         const char      *string,
2135         u_long          *out_vender_code)
2136 {
2137         if (strlen(string) >= KEV_VENDOR_CODE_MAX_STR_LEN) {
2138                 return EINVAL;
2139         }
2140         return mbuf_tag_id_find_internal(string, out_vender_code, 1);
2141 }
2142
2143 extern void mbuf_tag_id_first_last(u_long *first, u_long *last);
2144
2145 errno_t  kev_msg_post(struct kev_msg *event_msg)
2146 {
2147         u_long  min_vendor, max_vendor;
2148
2149         mbuf_tag_id_first_last(&min_vendor, &max_vendor);
2150
2151         if (event_msg == NULL)
2152                 return EINVAL;
2153
2154         /* Limit third parties to posting events for registered vendor codes only */
2155         if (event_msg->vendor_code < min_vendor ||
2156                 event_msg->vendor_code > max_vendor)
2157         {
2158                 return EINVAL;
2159         }
2160
2161         return kev_post_msg(event_msg);
2162 }
2163
2164
2165 int  kev_post_msg(struct kev_msg *event_msg)
2166 {
2167      struct mbuf *m, *m2;
2168      struct kern_event_pcb  *ev_pcb;
2169      struct kern_event_msg  *ev;
2170      char              *tmp;
2171      unsigned long     total_size;
2172      int               i;
2173
2174         /* Verify the message is small enough to fit in one mbuf w/o cluster */
2175         total_size = KEV_MSG_HEADER_SIZE;
2176
2177         for (i = 0; i < 5; i++) {
2178                 if (event_msg->dv[i].data_length == 0)
2179                         break;
2180                 total_size += event_msg->dv[i].data_length;
2181         }
2182
2183         if (total_size > MLEN) {
2184                 return EMSGSIZE;
2185         }
2186
2187      m = m_get(M_DONTWAIT, MT_DATA);
2188      if (m == 0)
2189           return ENOBUFS;
2190
2191      ev = mtod(m, struct kern_event_msg *);
2192      total_size = KEV_MSG_HEADER_SIZE;
2193
2194      tmp = (char *) &ev->event_data[0];
2195      for (i = 0; i < 5; i++) {
2196           if (event_msg->dv[i].data_length == 0)
2197                break;
2198
2199           total_size += event_msg->dv[i].data_length;
2200           bcopy(event_msg->dv[i].data_ptr, tmp,
2201                 event_msg->dv[i].data_length);
2202           tmp += event_msg->dv[i].data_length;
2203      }
2204
2205      ev->id = ++static_event_id;
2206      ev->total_size   = total_size;
2207      ev->vendor_code  = event_msg->vendor_code;
2208      ev->kev_class    = event_msg->kev_class;
2209      ev->kev_subclass = event_msg->kev_subclass;
2210      ev->event_code   = event_msg->event_code;
2211
2212      m->m_len = total_size;
2213      lck_mtx_lock(evt_mutex);
2214      for (ev_pcb = LIST_FIRST(&kern_event_head);
2215           ev_pcb;
2216           ev_pcb = LIST_NEXT(ev_pcb, ev_link)) {
2217
2218           if (ev_pcb->vendor_code_filter != KEV_ANY_VENDOR) {
2219                if (ev_pcb->vendor_code_filter != ev->vendor_code)
2220                     continue;
2221
2222                if (ev_pcb->class_filter != KEV_ANY_CLASS) {
2223                     if (ev_pcb->class_filter != ev->kev_class)
2224                          continue;
2225
2226                     if ((ev_pcb->subclass_filter != KEV_ANY_SUBCLASS) &&
2227                         (ev_pcb->subclass_filter != ev->kev_subclass))
2228                          continue;
2229                }
2230           }
2231
2232           m2 = m_copym(m, 0, m->m_len, M_NOWAIT);
2233           if (m2 == 0) {
2234                m_free(m);
2235                    lck_mtx_unlock(evt_mutex);
2236                return ENOBUFS;
2237           }
2238           socket_lock(ev_pcb->ev_socket, 1);
2239           if (sbappendrecord(&ev_pcb->ev_socket->so_rcv, m2))
2240                   sorwakeup(ev_pcb->ev_socket);
2241           socket_unlock(ev_pcb->ev_socket, 1);
2242      }
2243
2244      m_free(m);
2245      lck_mtx_unlock(evt_mutex);
2246      return 0;
2247 }
2248
2249 static int
2250 kev_control(struct socket *so,
2251                         u_long cmd,
2252                         caddr_t data,
2253                         __unused struct ifnet *ifp,
2254                         __unused struct proc *p)
2255 {
2256         struct kev_request *kev_req = (struct kev_request *) data;
2257         struct kern_event_pcb  *ev_pcb;
2258         struct kev_vendor_code *kev_vendor;
2259         u_long  *id_value = (u_long *) data;
2260
2261
2262         switch (cmd) {
2263
2264                 case SIOCGKEVID:
2265                         *id_value = static_event_id;
2266                         break;
2267
2268                 case SIOCSKEVFILT:
2269                         ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2270                         ev_pcb->vendor_code_filter = kev_req->vendor_code;
2271                         ev_pcb->class_filter     = kev_req->kev_class;
2272                         ev_pcb->subclass_filter  = kev_req->kev_subclass;
2273                         break;
2274
2275                 case SIOCGKEVFILT:
2276                         ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2277                         kev_req->vendor_code = ev_pcb->vendor_code_filter;
2278                         kev_req->kev_class   = ev_pcb->class_filter;
2279                         kev_req->kev_subclass = ev_pcb->subclass_filter;
2280                         break;
2281
2282                 case SIOCGKEVVENDOR:
2283                         kev_vendor = (struct kev_vendor_code*)data;
2284
2285                         /* Make sure string is NULL terminated */
2286                         kev_vendor->vendor_string[KEV_VENDOR_CODE_MAX_STR_LEN-1] = 0;
2287
2288                         return mbuf_tag_id_find_internal(kev_vendor->vendor_string,
2289                                                                                          &kev_vendor->vendor_code, 0);
2290
2291                 default:
2292                         return ENOTSUP;
2293         }
2294
2295         return 0;
2296 }
2297
2298
2299
2300 int
2301 fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo)
2302 {
2303         struct stat * st;
2304
2305         /* No need for the funnel as fd is kept alive */
2306
2307         st = &kinfo->kq_stat;
2308
2309         st->st_size = kq->kq_count;
2310         st->st_blksize = sizeof(struct kevent);
2311         st->st_mode = S_IFIFO;
2312         if (kq->kq_state & KQ_SEL)
2313                 kinfo->kq_state |=  PROC_KQUEUE_SELECT;
2314         if (kq->kq_state & KQ_SLEEP)
2315                 kinfo->kq_state |= PROC_KQUEUE_SLEEP;
2316
2317         return(0);
2318 }
2319