bsd/kern/kern_event.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License.  The rights granted to you under the
  10  * License may not be used to create, or enable the creation or
  11  * redistribution of, unlawful or unlicensed copies of an Apple operating
  12  * system, or to circumvent, violate, or enable the circumvention or
  13  * violation of, any terms of an Apple operating system software license
  14  * agreement.
  15  *
  16  * Please obtain a copy of the License at
  17  * http://www.opensource.apple.com/apsl/ and read it before using this
  18  * file.
  19  *
  20  * The Original Code and all software distributed under the License are
  21  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  22  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  23  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  24  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  25  * Please see the License for the specific language governing rights and
  26  * limitations under the License.
  27  *
  28  * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
  29  *
  30  */
  31 /*-
  32  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
  33  * All rights reserved.
  34  *
  35  * Redistribution and use in source and binary forms, with or without
  36  * modification, are permitted provided that the following conditions
  37  * are met:
  38  * 1. Redistributions of source code must retain the above copyright
  39  *    notice, this list of conditions and the following disclaimer.
  40  * 2. Redistributions in binary form must reproduce the above copyright
  41  *    notice, this list of conditions and the following disclaimer in the
  42  *    documentation and/or other materials provided with the distribution.
  43  *
  44  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  45  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  46  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  47  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  48  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  49  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  50  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  51  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  52  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  53  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  54  * SUCH DAMAGE.
  55  */
  56 /*
  57  *      @(#)kern_event.c       1.0 (3/31/2000)
  58  */
  59 #include <stdint.h>
  60
  61 #include <sys/param.h>
  62 #include <sys/systm.h>
  63 #include <sys/filedesc.h>
  64 #include <sys/kernel.h>
  65 #include <sys/proc_internal.h>
  66 #include <sys/kauth.h>
  67 #include <sys/malloc.h>
  68 #include <sys/unistd.h>
  69 #include <sys/file_internal.h>
  70 #include <sys/fcntl.h>
  71 #include <sys/select.h>
  72 #include <sys/queue.h>
  73 #include <sys/event.h>
  74 #include <sys/eventvar.h>
  75 #include <sys/protosw.h>
  76 #include <sys/socket.h>
  77 #include <sys/socketvar.h>
  78 #include <sys/stat.h>
  79 #include <sys/sysctl.h>
  80 #include <sys/uio.h>
  81 #include <sys/sysproto.h>
  82 #include <sys/user.h>
  83 #include <string.h>
  84 #include <sys/proc_info.h>
  85
  86 #include <kern/lock.h>
  87 #include <kern/clock.h>
  88 #include <kern/thread_call.h>
  89 #include <kern/sched_prim.h>
  90 #include <kern/zalloc.h>
  91 #include <kern/assert.h>
  92
  93 #include <libkern/libkern.h>
  94
  95 extern void unix_syscall_return(int);
  96
  97 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
  98
  99 static inline void kqlock(struct kqueue *kq);
 100 static inline void kqunlock(struct kqueue *kq);
 101
 102 static int      kqlock2knoteuse(struct kqueue *kq, struct knote *kn);
 103 static int      kqlock2knoteusewait(struct kqueue *kq, struct knote *kn);
 104 static int      kqlock2knotedrop(struct kqueue *kq, struct knote *kn);
 105 static int      knoteuse2kqlock(struct kqueue *kq, struct knote *kn);
 106
 107 static void     kqueue_wakeup(struct kqueue *kq);
 108 static int      kqueue_read(struct fileproc *fp, struct uio *uio,
 109                     kauth_cred_t cred, int flags, struct proc *p);
 110 static int      kqueue_write(struct fileproc *fp, struct uio *uio,
 111                     kauth_cred_t cred, int flags, struct proc *p);
 112 static int      kqueue_ioctl(struct fileproc *fp, u_long com, caddr_t data,
 113                     struct proc *p);
 114 static int      kqueue_select(struct fileproc *fp, int which, void *wql,
 115                     struct proc *p);
 116 static int      kqueue_close(struct fileglob *fp, struct proc *p);
 117 static int      kqueue_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
 118 extern int      kqueue_stat(struct fileproc *fp, struct stat *st, struct proc *p);
 119
 120 static struct fileops kqueueops = {
 121         kqueue_read,
 122         kqueue_write,
 123         kqueue_ioctl,
 124         kqueue_select,
 125         kqueue_close,
 126         kqueue_kqfilter,
 127         0
 128 };
 129
 130 static int kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p);
 131 static int kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p);
 132
 133 static int      kevent_callback(struct kqueue *kq, struct kevent *kevp, void *data);
 134 static void     kevent_continue(struct kqueue *kq, void *data, int error);
 135 static void     kevent_scan_continue(void *contp, wait_result_t wait_result);
 136 static int      kevent_process(struct kqueue *kq, kevent_callback_t callback,
 137                                void *data, int *countp, struct proc *p);
 138 static void     knote_put(struct knote *kn);
 139 static int      knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p);
 140 static void     knote_drop(struct knote *kn, struct proc *p);
 141 static void     knote_activate(struct knote *kn);
 142 static void     knote_deactivate(struct knote *kn);
 143 static void     knote_enqueue(struct knote *kn);
 144 static void     knote_dequeue(struct knote *kn);
 145 static struct   knote *knote_alloc(void);
 146 static void     knote_free(struct knote *kn);
 147 extern void     knote_init(void);
 148
 149 static int      filt_fileattach(struct knote *kn);
 150 static struct filterops file_filtops =
 151         { 1, filt_fileattach, NULL, NULL };
 152
 153 static void     filt_kqdetach(struct knote *kn);
 154 static int      filt_kqueue(struct knote *kn, long hint);
 155 static struct filterops kqread_filtops =
 156         { 1, NULL, filt_kqdetach, filt_kqueue };
 157
 158 /*
 159  * placeholder for not-yet-implemented filters
 160  */
 161 static int      filt_badattach(struct knote *kn);
 162 static struct filterops bad_filtops =
 163         { 0, filt_badattach, 0 , 0 };
 164
 165 static int      filt_procattach(struct knote *kn);
 166 static void     filt_procdetach(struct knote *kn);
 167 static int      filt_proc(struct knote *kn, long hint);
 168
 169 static struct filterops proc_filtops =
 170         { 0, filt_procattach, filt_procdetach, filt_proc };
 171
 172 extern struct filterops fs_filtops;
 173
 174 extern struct filterops sig_filtops;
 175
 176
 177 /* Timer filter */
 178 static int      filt_timercompute(struct knote *kn, uint64_t *abs_time);
 179 static void     filt_timerexpire(void *knx, void *param1);
 180 static int      filt_timerattach(struct knote *kn);
 181 static void     filt_timerdetach(struct knote *kn);
 182 static int      filt_timer(struct knote *kn, long hint);
 183
 184 static struct filterops timer_filtops =
 185         { 0, filt_timerattach, filt_timerdetach, filt_timer };
 186
 187 /* to avoid arming timers that fire quicker than we can handle */
 188 static uint64_t filt_timerfloor = 0;
 189
 190 static lck_mtx_t _filt_timerlock;
 191 static void     filt_timerlock(void);
 192 static void     filt_timerunlock(void);
 193
 194 /*
 195  * Sentinel marker for a thread scanning through the list of
 196  * active knotes.
 197  */
 198 static struct filterops threadmarker_filtops =
 199         { 0, filt_badattach, 0, 0 };
 200
 201 static zone_t   knote_zone;
 202
 203 #define KN_HASHSIZE             64              /* XXX should be tunable */
 204 #define KN_HASH(val, mask)      (((val) ^ (val >> 8)) & (mask))
 205
 206 #if 0
 207 extern struct filterops aio_filtops;
 208 #endif
 209
 210 /*
 211  * Table for for all system-defined filters.
 212  */
 213 static struct filterops *sysfilt_ops[] = {
 214         &file_filtops,                  /* EVFILT_READ */
 215         &file_filtops,                  /* EVFILT_WRITE */
 216 #if 0
 217         &aio_filtops,                   /* EVFILT_AIO */
 218 #else
 219         &bad_filtops,                   /* EVFILT_AIO */
 220 #endif
 221         &file_filtops,                  /* EVFILT_VNODE */
 222         &proc_filtops,                  /* EVFILT_PROC */
 223         &sig_filtops,                   /* EVFILT_SIGNAL */
 224         &timer_filtops,                 /* EVFILT_TIMER */
 225         &bad_filtops,                   /* EVFILT_MACHPORT */
 226         &fs_filtops                     /* EVFILT_FS */
 227 };
 228
 229 /*
 230  * kqueue/note lock attributes and implementations
 231  *
 232  *      kqueues have locks, while knotes have use counts
 233  *      Most of the knote state is guarded by the object lock.
 234  *      the knote "inuse" count and status use the kqueue lock.
 235  */
 236 lck_grp_attr_t * kq_lck_grp_attr;
 237 lck_grp_t * kq_lck_grp;
 238 lck_attr_t * kq_lck_attr;
 239
 240 static inline void
 241 kqlock(struct kqueue *kq)
 242 {
 243         lck_spin_lock(&kq->kq_lock);
 244 }
 245
 246 static inline void
 247 kqunlock(struct kqueue *kq)
 248 {
 249         lck_spin_unlock(&kq->kq_lock);
 250 }
 251
 252 /*
 253  * Convert a kq lock to a knote use referece.
 254  *
 255  *      If the knote is being dropped, we can't get
 256  *      a use reference, so just return with it
 257  *      still locked.
 258  *
 259  *      - kq locked at entry
 260  *      - unlock on exit if we get the use reference
 261  */
 262 static int
 263 kqlock2knoteuse(struct kqueue *kq, struct knote *kn)
 264 {
 265         if (kn->kn_status & KN_DROPPING)
 266                 return 0;
 267         kn->kn_inuse++;
 268         kqunlock(kq);
 269         return 1;
 270  }
 271
 272 /*
 273  * Convert a kq lock to a knote use referece.
 274  *
 275  *      If the knote is being dropped, we can't get
 276  *      a use reference, so just return with it
 277  *      still locked.
 278  *
 279  *      - kq locked at entry
 280  *      - kq always unlocked on exit
 281  */
 282 static int
 283 kqlock2knoteusewait(struct kqueue *kq, struct knote *kn)
 284 {
 285         if (!kqlock2knoteuse(kq, kn)) {
 286                 kn->kn_status |= KN_DROPWAIT;
 287                 assert_wait(&kn->kn_status, THREAD_UNINT);
 288                 kqunlock(kq);
 289                 thread_block(THREAD_CONTINUE_NULL);
 290                 return 0;
 291         }
 292         return 1;
 293  }
 294
 295 /*
 296  * Convert from a knote use reference back to kq lock.
 297  *
 298  *      Drop a use reference and wake any waiters if
 299  *      this is the last one.
 300  *
 301  *      The exit return indicates if the knote is
 302  *      still alive - but the kqueue lock is taken
 303  *      unconditionally.
 304  */
 305 static int
 306 knoteuse2kqlock(struct kqueue *kq, struct knote *kn)
 307 {
 308         kqlock(kq);
 309         if ((--kn->kn_inuse == 0) &&
 310             (kn->kn_status & KN_USEWAIT)) {
 311                 kn->kn_status &= ~KN_USEWAIT;
 312                 thread_wakeup(&kn->kn_inuse);
 313         }
 314         return ((kn->kn_status & KN_DROPPING) == 0);
 315  }
 316
 317 /*
 318  * Convert a kq lock to a knote drop referece.
 319  *
 320  *      If the knote is in use, wait for the use count
 321  *      to subside.  We first mark our intention to drop
 322  *      it - keeping other users from "piling on."
 323  *      If we are too late, we have to wait for the
 324  *      other drop to complete.
 325  *
 326  *      - kq locked at entry
 327  *      - always unlocked on exit.
 328  *      - caller can't hold any locks that would prevent
 329  *        the other dropper from completing.
 330  */
 331 static int
 332 kqlock2knotedrop(struct kqueue *kq, struct knote *kn)
 333 {
 334
 335         if ((kn->kn_status & KN_DROPPING) == 0) {
 336                 kn->kn_status |= KN_DROPPING;
 337                 if (kn->kn_inuse > 0) {
 338                         kn->kn_status |= KN_USEWAIT;
 339                         assert_wait(&kn->kn_inuse, THREAD_UNINT);
 340                         kqunlock(kq);
 341                         thread_block(THREAD_CONTINUE_NULL);
 342                 } else
 343                         kqunlock(kq);
 344                 return 1;
 345         } else {
 346                 kn->kn_status |= KN_DROPWAIT;
 347                 assert_wait(&kn->kn_status, THREAD_UNINT);
 348                 kqunlock(kq);
 349                 thread_block(THREAD_CONTINUE_NULL);
 350                 return 0;
 351         }
 352 }
 353
 354 /*
 355  * Release a knote use count reference.
 356  */
 357 static void
 358 knote_put(struct knote *kn)
 359 {
 360         struct kqueue *kq = kn->kn_kq;
 361
 362         kqlock(kq);
 363         if ((--kn->kn_inuse == 0) &&
 364             (kn->kn_status & KN_USEWAIT)) {
 365                 kn->kn_status &= ~KN_USEWAIT;
 366                 thread_wakeup(&kn->kn_inuse);
 367         }
 368         kqunlock(kq);
 369  }
 370
 371
 372
 373 static int
 374 filt_fileattach(struct knote *kn)
 375 {
 376
 377         return (fo_kqfilter(kn->kn_fp, kn, current_proc()));
 378 }
 379
 380 #define f_flag f_fglob->fg_flag
 381 #define f_type f_fglob->fg_type
 382 #define f_msgcount f_fglob->fg_msgcount
 383 #define f_cred f_fglob->fg_cred
 384 #define f_ops f_fglob->fg_ops
 385 #define f_offset f_fglob->fg_offset
 386 #define f_data f_fglob->fg_data
 387
 388 static void
 389 filt_kqdetach(struct knote *kn)
 390 {
 391         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
 392
 393         kqlock(kq);
 394         KNOTE_DETACH(&kq->kq_sel.si_note, kn);
 395         kqunlock(kq);
 396 }
 397
 398 /*ARGSUSED*/
 399 static int
 400 filt_kqueue(struct knote *kn, __unused long hint)
 401 {
 402         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
 403
 404         kn->kn_data = kq->kq_count;
 405         return (kn->kn_data > 0);
 406 }
 407
 408 static int
 409 filt_procattach(struct knote *kn)
 410 {
 411         struct proc *p;
 412         int funnel_state;
 413
 414         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 415
 416         p = pfind(kn->kn_id);
 417         if (p == NULL) {
 418                 thread_funnel_set(kernel_flock, funnel_state);
 419                 return (ESRCH);
 420         }
 421
 422         kn->kn_flags |= EV_CLEAR;               /* automatically set */
 423         kn->kn_hookid = 1;                      /* mark exit not seen */
 424
 425         /*
 426          * internal flag indicating registration done by kernel
 427          */
 428         if (kn->kn_flags & EV_FLAG1) {
 429                 kn->kn_data = (int)kn->kn_sdata;        /* ppid */
 430                 kn->kn_fflags = NOTE_CHILD;
 431                 kn->kn_flags &= ~EV_FLAG1;
 432         }
 433
 434         /* XXX lock the proc here while adding to the list? */
 435         KNOTE_ATTACH(&p->p_klist, kn);
 436
 437         thread_funnel_set(kernel_flock, funnel_state);
 438
 439         return (0);
 440 }
 441
 442 /*
 443  * The knote may be attached to a different process, which may exit,
 444  * leaving nothing for the knote to be attached to.  In that case,
 445  * we wont be able to find the process from its pid.  But the exit
 446  * code may still be processing the knote list for the target process.
 447  * We may have to wait for that processing to complete before we can
 448  * return (and presumably free the knote) without actually removing
 449  * it from the dead process' knote list.
 450  */
 451 static void
 452 filt_procdetach(struct knote *kn)
 453 {
 454         struct proc *p;
 455         int funnel_state;
 456
 457         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 458         p = pfind(kn->kn_id);
 459
 460         if (p != (struct proc *)NULL) {
 461                 KNOTE_DETACH(&p->p_klist, kn);
 462         } else if (kn->kn_hookid != 0) {        /* if not NOTE_EXIT yet */
 463                 kn->kn_hookid = -1;     /* we are detaching but... */
 464                 assert_wait(&kn->kn_hook, THREAD_UNINT); /* have to wait */
 465                 thread_block(THREAD_CONTINUE_NULL);
 466         }
 467         thread_funnel_set(kernel_flock, funnel_state);
 468 }
 469
 470 static int
 471 filt_proc(struct knote *kn, long hint)
 472 {
 473
 474         if (hint != 0) {
 475                 u_int event;
 476
 477                 /* must hold the funnel when coming from below */
 478                 assert(thread_funnel_get() != (funnel_t)0);
 479
 480                 /*
 481                  * mask off extra data
 482                  */
 483                 event = (u_int)hint & NOTE_PCTRLMASK;
 484
 485                 /*
 486                  * if the user is interested in this event, record it.
 487                  */
 488                 if (kn->kn_sfflags & event)
 489                         kn->kn_fflags |= event;
 490
 491                 /*
 492                  * process is gone, so flag the event as finished.
 493                  *
 494                  * If someone was trying to detach, but couldn't
 495                  * find the proc to complete the detach, wake them
 496                  * up (nothing will ever need to walk the per-proc
 497                  * knote list again - so its safe for them to dump
 498                  * the knote now).
 499                  */
 500                 if (event == NOTE_EXIT) {
 501                         boolean_t detaching = (kn->kn_hookid == -1);
 502
 503                         kn->kn_hookid = 0;
 504                         kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 505                         if (detaching)
 506                                 thread_wakeup(&kn->kn_hookid);
 507                         return (1);
 508                 }
 509
 510                 /*
 511                  * process forked, and user wants to track the new process,
 512                  * so attach a new knote to it, and immediately report an
 513                  * event with the parent's pid.
 514                  */
 515                 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
 516                         struct kevent kev;
 517                         int error;
 518
 519                         /*
 520                          * register knote with new process.
 521                          */
 522                         kev.ident = hint & NOTE_PDATAMASK;      /* pid */
 523                         kev.filter = kn->kn_filter;
 524                         kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
 525                         kev.fflags = kn->kn_sfflags;
 526                         kev.data = kn->kn_id;                   /* parent */
 527                         kev.udata = kn->kn_kevent.udata;        /* preserve udata */
 528                         error = kevent_register(kn->kn_kq, &kev, NULL);
 529                         if (error)
 530                                 kn->kn_fflags |= NOTE_TRACKERR;
 531                 }
 532         }
 533
 534         return (kn->kn_fflags != 0); /* atomic check - no funnel needed from above */
 535 }
 536
 537 /*
 538  * filt_timercompute - compute absolute timeout
 539  *
 540  *      The saved-data field in the knote contains the
 541  *      time value.  The saved filter-flags indicates
 542  *      the unit of measurement.
 543  *
 544  *      If the timeout is not absolute, adjust it for
 545  *      the current time.
 546  */
 547 static int
 548 filt_timercompute(struct knote *kn, uint64_t *abs_time)
 549 {
 550         uint64_t multiplier;
 551         uint64_t raw;
 552
 553         switch (kn->kn_sfflags & (NOTE_SECONDS|NOTE_USECONDS|NOTE_NSECONDS)) {
 554         case NOTE_SECONDS:
 555                 multiplier = NSEC_PER_SEC;
 556                 break;
 557         case NOTE_USECONDS:
 558                 multiplier = NSEC_PER_USEC;
 559                 break;
 560         case NOTE_NSECONDS:
 561                 multiplier = 1;
 562                 break;
 563         case 0: /* milliseconds (default) */
 564                 multiplier = NSEC_PER_SEC / 1000;
 565                 break;
 566         default:
 567                 return EINVAL;
 568         }
 569         nanoseconds_to_absolutetime((uint64_t)kn->kn_sdata * multiplier, &raw);
 570         if (raw <= filt_timerfloor) {
 571                 *abs_time = 0;
 572                 return 0;
 573         }
 574         if ((kn->kn_sfflags & NOTE_ABSOLUTE) == NOTE_ABSOLUTE) {
 575                 uint32_t seconds, nanoseconds;
 576                 uint64_t now;
 577
 578                 clock_get_calendar_nanotime(&seconds, &nanoseconds);
 579                 nanoseconds_to_absolutetime((uint64_t)seconds * NSEC_PER_SEC + nanoseconds,
 580                                             &now);
 581                 if (now >= raw + filt_timerfloor) {
 582                         *abs_time = 0;
 583                         return 0;
 584                 }
 585                 raw -= now;
 586         }
 587         clock_absolutetime_interval_to_deadline(raw, abs_time);
 588         return 0;
 589 }
 590
 591 /*
 592  * filt_timerexpire - the timer callout routine
 593  *
 594  *      Just propagate the timer event into the knote
 595  *      filter routine (by going through the knote
 596  *      synchronization point).  Pass a hint to
 597  *      indicate this is a real event, not just a
 598  *      query from above.
 599  */
 600 static void
 601 filt_timerexpire(void *knx, __unused void *spare)
 602 {
 603         struct klist timer_list;
 604         struct knote *kn = knx;
 605
 606         /* no "object" for timers, so fake a list */
 607         SLIST_INIT(&timer_list);
 608         SLIST_INSERT_HEAD(&timer_list, kn, kn_selnext);
 609         KNOTE(&timer_list, 1);
 610 }
 611
 612 /*
 613  * data contains amount of time to sleep, in milliseconds,
 614  * or a pointer to a timespec structure.
 615  */
 616 static int
 617 filt_timerattach(struct knote *kn)
 618 {
 619         thread_call_t callout;
 620         uint64_t deadline;
 621         int error;
 622
 623         error = filt_timercompute(kn, &deadline);
 624         if (error)
 625                 return (error);
 626
 627         if (deadline) {
 628                 callout = thread_call_allocate(filt_timerexpire, kn);
 629                 if (NULL == callout)
 630                         return (ENOMEM);
 631         } else {
 632                 /* handle as immediate */
 633                 kn->kn_sdata = 0;
 634                 callout = NULL;
 635         }
 636
 637         filt_timerlock();
 638         kn->kn_hook = (caddr_t)callout;
 639
 640         /* absolute=EV_ONESHOT */
 641         if (kn->kn_sfflags & NOTE_ABSOLUTE)
 642                 kn->kn_flags |= EV_ONESHOT;
 643
 644         if (deadline) {
 645                 /* all others - if not faking immediate */
 646                 kn->kn_flags |= EV_CLEAR;
 647                 thread_call_enter_delayed(callout, deadline);
 648                 kn->kn_hookid = 0;
 649         } else {
 650                 /* fake immediate */
 651                 kn->kn_hookid = 1;
 652         }
 653         filt_timerunlock();
 654         return (0);
 655 }
 656
 657 static void
 658 filt_timerdetach(struct knote *kn)
 659 {
 660         thread_call_t callout;
 661
 662         filt_timerlock();
 663         callout = (thread_call_t)kn->kn_hook;
 664         if (callout != NULL) {
 665                 boolean_t cancelled;
 666
 667                 /* cancel the callout if we can */
 668                 cancelled = thread_call_cancel(callout);
 669                 if (cancelled) {
 670                         /* got it, just free it */
 671                         kn->kn_hook = NULL;
 672                         filt_timerunlock();
 673                         thread_call_free(callout);
 674                         return;
 675                 }
 676                 /* we have to wait for the expire routine.  */
 677                 kn->kn_hookid = -1;     /* we are detaching */
 678                 assert_wait(&kn->kn_hook, THREAD_UNINT);
 679                 filt_timerunlock();
 680                 thread_block(THREAD_CONTINUE_NULL);
 681                 assert(kn->kn_hook == NULL);
 682                 return;
 683         }
 684         /* nothing to do */
 685         filt_timerunlock();
 686 }
 687
 688
 689
 690 static int
 691 filt_timer(struct knote *kn, __unused long hint)
 692 {
 693         int result;
 694
 695         if (hint) {
 696                 /* real timer pop */
 697                 thread_call_t callout;
 698                 boolean_t detaching;
 699
 700                 filt_timerlock();
 701
 702                 kn->kn_data++;
 703
 704                 detaching = (kn->kn_hookid < 0);
 705                 callout = (thread_call_t)kn->kn_hook;
 706
 707                 if (!detaching && (kn->kn_flags & EV_ONESHOT) == 0) {
 708                         uint64_t deadline;
 709                         int error;
 710
 711                         /* user input data may have changed - deal */
 712                         error = filt_timercompute(kn, &deadline);
 713                         if (error) {
 714                                 kn->kn_flags |= EV_ERROR;
 715                                 kn->kn_data = error;
 716                         } else if (deadline == 0) {
 717                                 /* revert to fake immediate */
 718                                 kn->kn_flags &= ~EV_CLEAR;
 719                                 kn->kn_sdata = 0;
 720                                 kn->kn_hookid = 1;
 721                         } else {
 722                                 /* keep the callout and re-arm */
 723                                 thread_call_enter_delayed(callout, deadline);
 724                                 filt_timerunlock();
 725                                 return 1;
 726                         }
 727                 }
 728                 kn->kn_hook = NULL;
 729                 filt_timerunlock();
 730                 thread_call_free(callout);
 731
 732                 /* if someone is waiting for timer to pop */
 733                 if (detaching)
 734                         thread_wakeup(&kn->kn_hook);
 735
 736                 return 1;
 737         }
 738
 739         /* user-query */
 740         filt_timerlock();
 741
 742         /* change fake timer to real if needed */
 743         while (kn->kn_hookid > 0 && kn->kn_sdata > 0) {
 744                 int error;
 745
 746                 /* update the fake timer (make real) */
 747                 kn->kn_hookid = 0;
 748                 kn->kn_data = 0;
 749                 filt_timerunlock();
 750                 error = filt_timerattach(kn);
 751                 filt_timerlock();
 752                 if (error) {
 753                         kn->kn_flags |= EV_ERROR;
 754                         kn->kn_data = error;
 755                         filt_timerunlock();
 756                         return 1;
 757                 }
 758         }
 759
 760         /* if still fake, pretend it fired */
 761         if (kn->kn_hookid > 0)
 762                 kn->kn_data = 1;
 763
 764         result = (kn->kn_data != 0);
 765         filt_timerunlock();
 766         return result;
 767 }
 768
 769 static void
 770 filt_timerlock(void)
 771 {
 772         lck_mtx_lock(&_filt_timerlock);
 773 }
 774
 775 static void
 776 filt_timerunlock(void)
 777 {
 778         lck_mtx_unlock(&_filt_timerlock);
 779 }
 780
 781 /*
 782  * JMM - placeholder for not-yet-implemented filters
 783  */
 784 static int
 785 filt_badattach(__unused struct knote *kn)
 786 {
 787         return(ENOTSUP);
 788 }
 789
 790
 791 struct kqueue *
 792 kqueue_alloc(struct proc *p)
 793 {
 794         struct filedesc *fdp = p->p_fd;
 795         struct kqueue *kq;
 796
 797         MALLOC_ZONE(kq, struct kqueue *, sizeof(struct kqueue), M_KQUEUE, M_WAITOK);
 798         if (kq != NULL) {
 799                 bzero(kq, sizeof(struct kqueue));
 800                 lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr);
 801                 TAILQ_INIT(&kq->kq_head);
 802                 TAILQ_INIT(&kq->kq_inprocess);
 803                 kq->kq_fdp = fdp;
 804         }
 805
 806         if (fdp->fd_knlistsize < 0) {
 807                 proc_fdlock(p);
 808                 if (fdp->fd_knlistsize < 0)
 809                         fdp->fd_knlistsize = 0;         /* this process has had a kq */
 810                 proc_fdunlock(p);
 811         }
 812
 813         return kq;
 814 }
 815
 816
 817 /*
 818  * kqueue_dealloc - detach all knotes from a kqueue and free it
 819  *
 820  *      We walk each list looking for knotes referencing this
 821  *      this kqueue.  If we find one, we try to drop it.  But
 822  *      if we fail to get a drop reference, that will wait
 823  *      until it is dropped.  So, we can just restart again
 824  *      safe in the assumption that the list will eventually
 825  *      not contain any more references to this kqueue (either
 826  *      we dropped them all, or someone else did).
 827  *
 828  *      Assumes no new events are being added to the kqueue.
 829  *      Nothing locked on entry or exit.
 830  */
 831 void
 832 kqueue_dealloc(struct kqueue *kq, struct proc *p)
 833 {
 834         struct filedesc *fdp = p->p_fd;
 835         struct knote *kn;
 836         int i;
 837
 838         proc_fdlock(p);
 839         for (i = 0; i < fdp->fd_knlistsize; i++) {
 840                 kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 841                 while (kn != NULL) {
 842                         if (kq == kn->kn_kq) {
 843                                 kqlock(kq);
 844                                 proc_fdunlock(p);
 845                                 /* drop it ourselves or wait */
 846                                 if (kqlock2knotedrop(kq, kn)) {
 847                                         kn->kn_fop->f_detach(kn);
 848                                         knote_drop(kn, p);
 849                                 }
 850                                 proc_fdlock(p);
 851                                 /* start over at beginning of list */
 852                                 kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 853                                 continue;
 854                         }
 855                         kn = SLIST_NEXT(kn, kn_link);
 856                 }
 857         }
 858         if (fdp->fd_knhashmask != 0) {
 859                 for (i = 0; i < (int)fdp->fd_knhashmask + 1; i++) {
 860                         kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 861                         while (kn != NULL) {
 862                                 if (kq == kn->kn_kq) {
 863                                         kqlock(kq);
 864                                         proc_fdunlock(p);
 865                                         /* drop it ourselves or wait */
 866                                         if (kqlock2knotedrop(kq, kn)) {
 867                                                 kn->kn_fop->f_detach(kn);
 868                                                 knote_drop(kn, p);
 869                                         }
 870                                         proc_fdlock(p);
 871                                         /* start over at beginning of list */
 872                                         kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 873                                         continue;
 874                                 }
 875                                 kn = SLIST_NEXT(kn, kn_link);
 876                         }
 877                 }
 878         }
 879         proc_fdunlock(p);
 880         lck_spin_destroy(&kq->kq_lock, kq_lck_grp);
 881         FREE_ZONE(kq, sizeof(struct kqueue), M_KQUEUE);
 882 }
 883
 884 int
 885 kqueue(struct proc *p, __unused struct kqueue_args *uap, register_t *retval)
 886 {
 887         struct kqueue *kq;
 888         struct fileproc *fp;
 889         int fd, error;
 890
 891         error = falloc(p, &fp, &fd);
 892         if (error) {
 893                 return (error);
 894         }
 895
 896         kq = kqueue_alloc(p);
 897         if (kq == NULL) {
 898                 fp_free(p, fd, fp);
 899                 return (ENOMEM);
 900         }
 901
 902         fp->f_flag = FREAD | FWRITE;
 903         fp->f_type = DTYPE_KQUEUE;
 904         fp->f_ops = &kqueueops;
 905         fp->f_data = (caddr_t)kq;
 906
 907         proc_fdlock(p);
 908         *fdflags(p, fd) &= ~UF_RESERVED;
 909         fp_drop(p, fd, fp, 1);
 910         proc_fdunlock(p);
 911
 912         *retval = fd;
 913         return (error);
 914 }
 915
 916 int
 917 kqueue_portset_np(__unused struct proc *p,
 918                                   __unused struct kqueue_portset_np_args *uap,
 919                                   __unused register_t *retval)
 920 {
 921                 /* JMM - Placeholder for now */
 922                 return (ENOTSUP);
 923 }
 924
 925 int
 926 kqueue_from_portset_np(__unused struct proc *p,
 927                                            __unused struct kqueue_from_portset_np_args *uap,
 928                                            __unused register_t *retval)
 929 {
 930                 /* JMM - Placeholder for now */
 931                 return (ENOTSUP);
 932 }
 933
 934 static int
 935 kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p)
 936 {
 937         int advance;
 938         int error;
 939
 940         if (IS_64BIT_PROCESS(p)) {
 941                 struct user_kevent kev64;
 942
 943                 advance = sizeof(kev64);
 944                 error = copyin(*addrp, (caddr_t)&kev64, advance);
 945                 if (error)
 946                         return error;
 947                 kevp->ident = CAST_DOWN(uintptr_t, kev64.ident);
 948                 kevp->filter = kev64.filter;
 949                 kevp->flags = kev64.flags;
 950                 kevp->fflags = kev64.fflags;
 951                 kevp->data = CAST_DOWN(intptr_t, kev64.data);
 952                 kevp->udata = kev64.udata;
 953         } else {
 954                 /*
 955                  * compensate for legacy in-kernel kevent layout
 956                  * where the udata field is alredy 64-bit.
 957                  */
 958                 advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t);
 959                 error = copyin(*addrp, (caddr_t)kevp, advance);
 960         }
 961         if (!error)
 962                 *addrp += advance;
 963         return error;
 964 }
 965
 966 static int
 967 kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p)
 968 {
 969         int advance;
 970         int error;
 971
 972         if (IS_64BIT_PROCESS(p)) {
 973                 struct user_kevent kev64;
 974
 975                 kev64.ident = (uint64_t) kevp->ident;
 976                 kev64.filter = kevp->filter;
 977                 kev64.flags = kevp->flags;
 978                 kev64.fflags = kevp->fflags;
 979                 kev64.data = (int64_t) kevp->data;
 980                 kev64.udata = kevp->udata;
 981                 advance = sizeof(kev64);
 982                 error = copyout((caddr_t)&kev64, *addrp, advance);
 983         } else {
 984                 /*
 985                  * compensate for legacy in-kernel kevent layout
 986                  * where the udata field is alredy 64-bit.
 987                  */
 988                 advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t);
 989                 error = copyout((caddr_t)kevp, *addrp, advance);
 990         }
 991         if (!error)
 992                 *addrp += advance;
 993         return error;
 994 }
 995
 996 /*
 997  * kevent_continue - continue a kevent syscall after blocking
 998  *
 999  *      assume we inherit a use count on the kq fileglob.
1000  */
1001
1002 static void
1003 kevent_continue(__unused struct kqueue *kq, void *data, int error)
1004 {
1005         struct _kevent *cont_args;
1006         struct fileproc *fp;
1007         register_t *retval;
1008         int noutputs;
1009         int fd;
1010         struct proc *p = current_proc();
1011
1012         cont_args = (struct _kevent *)data;
1013         noutputs = cont_args->eventout;
1014         retval = cont_args->retval;
1015         fd = cont_args->fd;
1016         fp = cont_args->fp;
1017
1018         fp_drop(p, fd, fp, 0);
1019
1020         /* don't restart after signals... */
1021         if (error == ERESTART)
1022                 error = EINTR;
1023         else if (error == EWOULDBLOCK)
1024                 error = 0;
1025         if (error == 0)
1026                 *retval = noutputs;
1027         unix_syscall_return(error);
1028 }
1029
1030 /*
1031  * kevent - [syscall] register and wait for kernel events
1032  *
1033  */
1034
1035 int
1036 kevent(struct proc *p, struct kevent_args *uap, register_t *retval)
1037 {
1038         user_addr_t changelist = uap->changelist;
1039         user_addr_t ueventlist = uap->eventlist;
1040         int nchanges = uap->nchanges;
1041         int nevents = uap->nevents;
1042         int fd = uap->fd;
1043
1044         struct _kevent *cont_args;
1045         uthread_t ut;
1046         struct kqueue *kq;
1047         struct fileproc *fp;
1048         struct kevent kev;
1049         int error, noutputs;
1050         struct timeval atv;
1051
1052         /* convert timeout to absolute - if we have one */
1053         if (uap->timeout != USER_ADDR_NULL) {
1054                 struct timeval rtv;
1055                 if ( IS_64BIT_PROCESS(p) ) {
1056                         struct user_timespec ts;
1057                         error = copyin( uap->timeout, &ts, sizeof(ts) );
1058                         if ((ts.tv_sec & 0xFFFFFFFF00000000ull) != 0)
1059                                 error = EINVAL;
1060                         else
1061                                 TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1062                 } else {
1063                         struct timespec ts;
1064                         error = copyin( uap->timeout, &ts, sizeof(ts) );
1065                         TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1066                 }
1067                 if (error)
1068                         return error;
1069                 if (itimerfix(&rtv))
1070                         return EINVAL;
1071                 getmicrouptime(&atv);
1072                 timevaladd(&atv, &rtv);
1073         } else {
1074                 atv.tv_sec = 0;
1075                 atv.tv_usec = 0;
1076         }
1077
1078         /* get a usecount for the kq itself */
1079         if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0)
1080                 return(error);
1081
1082         /* register all the change requests the user provided... */
1083         noutputs = 0;
1084         while (nchanges > 0 && error == 0) {
1085                 error = kevent_copyin(&changelist, &kev, p);
1086                 if (error)
1087                         break;
1088
1089                 kev.flags &= ~EV_SYSFLAGS;
1090                 error = kevent_register(kq, &kev, p);
1091                 if (error && nevents > 0) {
1092                         kev.flags = EV_ERROR;
1093                         kev.data = error;
1094                         error = kevent_copyout(&kev, &ueventlist, p);
1095                         if (error == 0) {
1096                                 nevents--;
1097                                 noutputs++;
1098                         }
1099                 }
1100                 nchanges--;
1101         }
1102
1103         /* store the continuation/completion data in the uthread */
1104         ut = (uthread_t)get_bsdthread_info(current_thread());
1105         cont_args = (struct _kevent *)&ut->uu_state.ss_kevent;
1106         cont_args->fp = fp;
1107         cont_args->fd = fd;
1108         cont_args->retval = retval;
1109         cont_args->eventlist = ueventlist;
1110         cont_args->eventcount = nevents;
1111         cont_args->eventout = noutputs;
1112
1113         if (nevents > 0 && noutputs == 0 && error == 0)
1114                 error = kevent_scan(kq, kevent_callback,
1115                                     kevent_continue, cont_args,
1116                                     &atv, p);
1117         kevent_continue(kq, cont_args, error);
1118         /* NOTREACHED */
1119         return error;
1120 }
1121
1122
1123 /*
1124  * kevent_callback - callback for each individual event
1125  *
1126  *      called with nothing locked
1127  *      caller holds a reference on the kqueue
1128  */
1129
1130 static int
1131 kevent_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data)
1132 {
1133         struct _kevent *cont_args;
1134         int error;
1135
1136         cont_args = (struct _kevent *)data;
1137         assert(cont_args->eventout < cont_arg->eventcount);
1138
1139         /*
1140          * Copy out the appropriate amount of event data for this user.
1141          */
1142         error = kevent_copyout(kevp, &cont_args->eventlist, current_proc());
1143
1144         /*
1145          * If there isn't space for additional events, return
1146          * a harmless error to stop the processing here
1147          */
1148         if (error == 0 && ++cont_args->eventout == cont_args->eventcount)
1149                         error = EWOULDBLOCK;
1150         return error;
1151 }
1152
1153 /*
1154  * kevent_register - add a new event to a kqueue
1155  *
1156  *      Creates a mapping between the event source and
1157  *      the kqueue via a knote data structure.
1158  *
1159  *      Because many/most the event sources are file
1160  *      descriptor related, the knote is linked off
1161  *      the filedescriptor table for quick access.
1162  *
1163  *      called with nothing locked
1164  *      caller holds a reference on the kqueue
1165  */
1166
1167 int
1168 kevent_register(struct kqueue *kq, struct kevent *kev, struct proc *p)
1169 {
1170         struct filedesc *fdp = kq->kq_fdp;
1171         struct filterops *fops;
1172         struct fileproc *fp = NULL;
1173         struct knote *kn = NULL;
1174         int error = 0;
1175
1176         if (kev->filter < 0) {
1177                 if (kev->filter + EVFILT_SYSCOUNT < 0)
1178                         return (EINVAL);
1179                 fops = sysfilt_ops[~kev->filter];       /* to 0-base index */
1180         } else {
1181                 /*
1182                  * XXX
1183                  * filter attach routine is responsible for insuring that
1184                  * the identifier can be attached to it.
1185                  */
1186                 printf("unknown filter: %d\n", kev->filter);
1187                 return (EINVAL);
1188         }
1189
1190         /* this iocount needs to be dropped if it is not registered */
1191         if (fops->f_isfd && (error = fp_lookup(p, kev->ident, &fp, 0)) != 0)
1192                 return(error);
1193
1194  restart:
1195         proc_fdlock(p);
1196         if (fops->f_isfd) {
1197                 /* fd-based knotes are linked off the fd table */
1198                 if (kev->ident < (u_int)fdp->fd_knlistsize) {
1199                         SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
1200                                 if (kq == kn->kn_kq &&
1201                                     kev->filter == kn->kn_filter)
1202                                         break;
1203                 }
1204         } else {
1205                 /* hash non-fd knotes here too */
1206                 if (fdp->fd_knhashmask != 0) {
1207                         struct klist *list;
1208
1209                         list = &fdp->fd_knhash[
1210                             KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
1211                         SLIST_FOREACH(kn, list, kn_link)
1212                                 if (kev->ident == kn->kn_id &&
1213                                     kq == kn->kn_kq &&
1214                                     kev->filter == kn->kn_filter)
1215                                         break;
1216                 }
1217         }
1218
1219         /*
1220          * kn now contains the matching knote, or NULL if no match
1221          */
1222         if (kn == NULL) {
1223                 if ((kev->flags & (EV_ADD|EV_DELETE)) == EV_ADD) {
1224                         kn = knote_alloc();
1225                         if (kn == NULL) {
1226                                 proc_fdunlock(p);
1227                                 error = ENOMEM;
1228                                 goto done;
1229                         }
1230                         kn->kn_fp = fp;
1231                         kn->kn_kq = kq;
1232                         kn->kn_tq = &kq->kq_head;
1233                         kn->kn_fop = fops;
1234                         kn->kn_sfflags = kev->fflags;
1235                         kn->kn_sdata = kev->data;
1236                         kev->fflags = 0;
1237                         kev->data = 0;
1238                         kn->kn_kevent = *kev;
1239                         kn->kn_inuse = 1;  /* for f_attach() */
1240                         kn->kn_status = 0;
1241
1242                         /* before anyone can find it */
1243                         if (kev->flags & EV_DISABLE)
1244                                 kn->kn_status |= KN_DISABLED;
1245
1246                         error = knote_fdpattach(kn, fdp, p);
1247                         proc_fdunlock(p);
1248
1249                         if (error) {
1250                                 knote_free(kn);
1251                                 goto done;
1252                         }
1253
1254                         /*
1255                          * apply reference count to knote structure, and
1256                          * do not release it at the end of this routine.
1257                          */
1258                         fp = NULL;
1259
1260                         /*
1261                          * If the attach fails here, we can drop it knowing
1262                          * that nobody else has a reference to the knote.
1263                          */
1264                         if ((error = fops->f_attach(kn)) != 0) {
1265                                 knote_drop(kn, p);
1266                                 goto done;
1267                         }
1268                 } else {
1269                         proc_fdunlock(p);
1270                         error = ENOENT;
1271                         goto done;
1272                 }
1273         } else {
1274                 /* existing knote - get kqueue lock */
1275                 kqlock(kq);
1276                 proc_fdunlock(p);
1277
1278                 if (kev->flags & EV_DELETE) {
1279                         knote_dequeue(kn);
1280                         kn->kn_status |= KN_DISABLED;
1281                         if (kqlock2knotedrop(kq, kn)) {
1282                                 kn->kn_fop->f_detach(kn);
1283                                 knote_drop(kn, p);
1284                         }
1285                         goto done;
1286                 }
1287
1288                 /* update status flags for existing knote */
1289                 if (kev->flags & EV_DISABLE) {
1290                         knote_dequeue(kn);
1291                         kn->kn_status |= KN_DISABLED;
1292                 } else if (kev->flags & EV_ENABLE) {
1293                         kn->kn_status &= ~KN_DISABLED;
1294                         if (kn->kn_status & KN_ACTIVE)
1295                                 knote_enqueue(kn);
1296                 }
1297
1298                 /*
1299                  * If somebody is in the middle of dropping this
1300                  * knote - go find/insert a new one.  But we have
1301                  * wait for this one to go away first.
1302                  */
1303                 if (!kqlock2knoteusewait(kq, kn))
1304                         /* kqueue unlocked */
1305                         goto restart;
1306
1307                 /*
1308                  * The user may change some filter values after the
1309                  * initial EV_ADD, but doing so will not reset any
1310                  * filter which have already been triggered.
1311                  */
1312                 kn->kn_sfflags = kev->fflags;
1313                 kn->kn_sdata = kev->data;
1314                 kn->kn_kevent.udata = kev->udata;
1315         }
1316
1317         /* still have use ref on knote */
1318         if (kn->kn_fop->f_event(kn, 0)) {
1319                 if (knoteuse2kqlock(kq, kn))
1320                         knote_activate(kn);
1321                 kqunlock(kq);
1322         } else {
1323                 knote_put(kn);
1324         }
1325
1326 done:
1327         if (fp != NULL)
1328                 fp_drop(p, kev->ident, fp, 0);
1329         return (error);
1330 }
1331
1332 /*
1333  * kevent_process - process the triggered events in a kqueue
1334  *
1335  *      Walk the queued knotes and validate that they are
1336  *      really still triggered events by calling the filter
1337  *      routines (if necessary).  Hold a use reference on
1338  *      the knote to avoid it being detached. For each event
1339  *      that is still considered triggered, invoke the
1340  *      callback routine provided.
1341  *
1342  *      caller holds a reference on the kqueue.
1343  *      kqueue locked on entry and exit - but may be dropped
1344  */
1345
1346 static int
1347 kevent_process(struct kqueue *kq,
1348                kevent_callback_t callback,
1349                void *data,
1350                int *countp,
1351                struct proc *p)
1352 {
1353         struct knote *kn;
1354         struct kevent kev;
1355         int nevents;
1356         int error;
1357
1358  restart:
1359         if (kq->kq_count == 0) {
1360                 *countp = 0;
1361                 return 0;
1362         }
1363
1364         /* if someone else is processing the queue, wait */
1365         if (!TAILQ_EMPTY(&kq->kq_inprocess)) {
1366                 assert_wait(&kq->kq_inprocess, THREAD_UNINT);
1367                 kq->kq_state |= KQ_PROCWAIT;
1368                 kqunlock(kq);
1369                 thread_block(THREAD_CONTINUE_NULL);
1370                 kqlock(kq);
1371                 goto restart;
1372         }
1373
1374         error = 0;
1375         nevents = 0;
1376         while (error == 0 &&
1377                (kn = TAILQ_FIRST(&kq->kq_head)) != NULL) {
1378
1379                 /*
1380                  * move knote to the processed queue.
1381                  * this is also protected by the kq lock.
1382                  */
1383                 assert(kn->kn_tq == &kq->kq_head);
1384                 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1385                 kn->kn_tq = &kq->kq_inprocess;
1386                 TAILQ_INSERT_TAIL(&kq->kq_inprocess, kn, kn_tqe);
1387
1388                 /*
1389                  * Non-EV_ONESHOT events must be re-validated.
1390                  *
1391                  * Convert our lock to a use-count and call the event's
1392                  * filter routine to update.
1393                  *
1394                  * If the event is dropping (or no longer valid), we
1395                  * already have it off the active queue, so just
1396                  * finish the job of deactivating it.
1397                  */
1398                 if ((kn->kn_flags & EV_ONESHOT) == 0) {
1399                         int result;
1400
1401                         if (kqlock2knoteuse(kq, kn)) {
1402
1403                                 /* call the filter with just a ref */
1404                                 result = kn->kn_fop->f_event(kn, 0);
1405
1406                                 if (!knoteuse2kqlock(kq, kn) || result == 0) {
1407                                         knote_deactivate(kn);
1408                                         continue;
1409                                 }
1410                         } else {
1411                                 knote_deactivate(kn);
1412                                 continue;
1413                         }
1414                 }
1415
1416                 /*
1417                  * Got a valid triggered knote with the kqueue
1418                  * still locked.  Snapshot the data, and determine
1419                  * how to dispatch the knote for future events.
1420                  */
1421                 kev = kn->kn_kevent;
1422
1423                 /* now what happens to it? */
1424                 if (kn->kn_flags & EV_ONESHOT) {
1425                         knote_deactivate(kn);
1426                         if (kqlock2knotedrop(kq, kn)) {
1427                                 kn->kn_fop->f_detach(kn);
1428                                 knote_drop(kn, p);
1429                         }
1430                 } else if (kn->kn_flags & EV_CLEAR) {
1431                         knote_deactivate(kn);
1432                         kn->kn_data = 0;
1433                         kn->kn_fflags = 0;
1434                         kqunlock(kq);
1435                 } else {
1436                         /*
1437                          * leave on in-process queue.  We'll
1438                          * move all the remaining ones back
1439                          * the kq queue and wakeup any
1440                          * waiters when we are done.
1441                          */
1442                         kqunlock(kq);
1443                 }
1444
1445                 /* callback to handle each event as we find it */
1446                 error = (callback)(kq, &kev, data);
1447                 nevents++;
1448
1449                 kqlock(kq);
1450         }
1451
1452         /*
1453          * With the kqueue still locked, move any knotes
1454          * remaining on the in-process queue back to the
1455          * kq's queue and wake up any waiters.
1456          */
1457         while ((kn = TAILQ_FIRST(&kq->kq_inprocess)) != NULL) {
1458                 assert(kn->kn_tq == &kq->kq_inprocess);
1459                 TAILQ_REMOVE(&kq->kq_inprocess, kn, kn_tqe);
1460                 kn->kn_tq = &kq->kq_head;
1461                 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1462         }
1463         if (kq->kq_state & KQ_PROCWAIT) {
1464                 kq->kq_state &= ~KQ_PROCWAIT;
1465                 thread_wakeup(&kq->kq_inprocess);
1466         }
1467
1468         *countp = nevents;
1469         return error;
1470 }
1471
1472
1473 static void
1474 kevent_scan_continue(void *data, wait_result_t wait_result)
1475 {
1476         uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
1477         struct _kevent_scan * cont_args = &ut->uu_state.ss_kevent_scan;
1478         struct kqueue *kq = (struct kqueue *)data;
1479         int error;
1480         int count;
1481
1482         /* convert the (previous) wait_result to a proper error */
1483         switch (wait_result) {
1484         case THREAD_AWAKENED:
1485                 kqlock(kq);
1486                 error = kevent_process(kq, cont_args->call, cont_args, &count, current_proc());
1487                 if (error == 0 && count == 0) {
1488                         assert_wait_deadline(kq, THREAD_ABORTSAFE, cont_args->deadline);
1489                         kq->kq_state |= KQ_SLEEP;
1490                         kqunlock(kq);
1491                         thread_block_parameter(kevent_scan_continue, kq);
1492                         /* NOTREACHED */
1493                 }
1494                 kqunlock(kq);
1495                 break;
1496         case THREAD_TIMED_OUT:
1497                 error = EWOULDBLOCK;
1498                 break;
1499         case THREAD_INTERRUPTED:
1500                 error = EINTR;
1501                 break;
1502         default:
1503                 panic("kevent_scan_cont() - invalid wait_result (%d)", wait_result);
1504                 error = 0;
1505         }
1506
1507         /* call the continuation with the results */
1508         assert(cont_args->cont != NULL);
1509         (cont_args->cont)(kq, cont_args->data, error);
1510 }
1511
1512
1513 /*
1514  * kevent_scan - scan and wait for events in a kqueue
1515  *
1516  *      Process the triggered events in a kqueue.
1517  *
1518  *      If there are no events triggered arrange to
1519  *      wait for them. If the caller provided a
1520  *      continuation routine, then kevent_scan will
1521  *      also.
1522  *
1523  *      The callback routine must be valid.
1524  *      The caller must hold a use-count reference on the kq.
1525  */
1526
1527 int
1528 kevent_scan(struct kqueue *kq,
1529             kevent_callback_t callback,
1530             kevent_continue_t continuation,
1531             void *data,
1532             struct timeval *atvp,
1533             struct proc *p)
1534 {
1535         thread_continue_t cont = THREAD_CONTINUE_NULL;
1536         uint64_t deadline;
1537         int error;
1538         int first;
1539
1540         assert(callback != NULL);
1541
1542         first = 1;
1543         for (;;) {
1544                 wait_result_t wait_result;
1545                 int count;
1546
1547                 /*
1548                  * Make a pass through the kq to find events already
1549                  * triggered.
1550                  */
1551                 kqlock(kq);
1552                 error = kevent_process(kq, callback, data, &count, p);
1553                 if (error || count)
1554                         break; /* lock still held */
1555
1556                 /* looks like we have to consider blocking */
1557                 if (first) {
1558                         first = 0;
1559                         /* convert the timeout to a deadline once */
1560                         if (atvp->tv_sec || atvp->tv_usec) {
1561                                 uint32_t seconds, nanoseconds;
1562                                 uint64_t now;
1563
1564                                 clock_get_uptime(&now);
1565                                 nanoseconds_to_absolutetime((uint64_t)atvp->tv_sec * NSEC_PER_SEC +
1566                                                             atvp->tv_usec * NSEC_PER_USEC,
1567                                                             &deadline);
1568                                 if (now >= deadline) {
1569                                         /* non-blocking call */
1570                                         error = EWOULDBLOCK;
1571                                         break; /* lock still held */
1572                                 }
1573                                 deadline -= now;
1574                                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
1575                         } else {
1576                                 deadline = 0;   /* block forever */
1577                         }
1578
1579                         if (continuation) {
1580                                 uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
1581                                 struct _kevent_scan *cont_args = &ut->uu_state.ss_kevent_scan;
1582
1583                                 cont_args->call = callback;
1584                                 cont_args->cont = continuation;
1585                                 cont_args->deadline = deadline;
1586                                 cont_args->data = data;
1587                                 cont = kevent_scan_continue;
1588                         }
1589                 }
1590
1591                 /* go ahead and wait */
1592                 assert_wait_deadline(kq, THREAD_ABORTSAFE, deadline);
1593                 kq->kq_state |= KQ_SLEEP;
1594                 kqunlock(kq);
1595                 wait_result = thread_block_parameter(cont, kq);
1596                 /* NOTREACHED if (continuation != NULL) */
1597
1598                 switch (wait_result) {
1599                 case THREAD_AWAKENED:
1600                         continue;
1601                 case THREAD_TIMED_OUT:
1602                         return EWOULDBLOCK;
1603                 case THREAD_INTERRUPTED:
1604                         return EINTR;
1605                 default:
1606                         panic("kevent_scan - bad wait_result (%d)",
1607                               wait_result);
1608                         error = 0;
1609                 }
1610         }
1611         kqunlock(kq);
1612         return error;
1613 }
1614
1615
1616 /*
1617  * XXX
1618  * This could be expanded to call kqueue_scan, if desired.
1619  */
1620 /*ARGSUSED*/
1621 static int
1622 kqueue_read(__unused struct fileproc *fp,
1623                         __unused struct uio *uio,
1624                         __unused kauth_cred_t cred,
1625                         __unused int flags,
1626                         __unused struct proc *p)
1627 {
1628         return (ENXIO);
1629 }
1630
1631 /*ARGSUSED*/
1632 static int
1633 kqueue_write(__unused struct fileproc *fp,
1634                          __unused struct uio *uio,
1635                          __unused kauth_cred_t cred,
1636                          __unused int flags,
1637                          __unused struct proc *p)
1638 {
1639         return (ENXIO);
1640 }
1641
1642 /*ARGSUSED*/
1643 static int
1644 kqueue_ioctl(__unused struct fileproc *fp,
1645                          __unused u_long com,
1646                          __unused caddr_t data,
1647                          __unused struct proc *p)
1648 {
1649         return (ENOTTY);
1650 }
1651
1652 /*ARGSUSED*/
1653 static int
1654 kqueue_select(struct fileproc *fp, int which, void *wql, struct proc *p)
1655 {
1656         struct kqueue *kq = (struct kqueue *)fp->f_data;
1657         int retnum = 0;
1658
1659         if (which == FREAD) {
1660                 kqlock(kq);
1661                 if (kq->kq_count) {
1662                         retnum = 1;
1663                 } else {
1664                         selrecord(p, &kq->kq_sel, wql);
1665                         kq->kq_state |= KQ_SEL;
1666                 }
1667                 kqunlock(kq);
1668         }
1669         return (retnum);
1670 }
1671
1672 /*
1673  * kqueue_close -
1674  */
1675 /*ARGSUSED*/
1676 static int
1677 kqueue_close(struct fileglob *fg, struct proc *p)
1678 {
1679         struct kqueue *kq = (struct kqueue *)fg->fg_data;
1680
1681         kqueue_dealloc(kq, p);
1682         fg->fg_data = NULL;
1683         return (0);
1684 }
1685
1686 /*ARGSUSED*/
1687 /*
1688  * The callers has taken a use-count reference on this kqueue and will donate it
1689  * to the kqueue we are being added to.  This keeps the kqueue from closing until
1690  * that relationship is torn down.
1691  */
1692 static int
1693 kqueue_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
1694 {
1695         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
1696
1697         if (kn->kn_filter != EVFILT_READ)
1698                 return (1);
1699
1700         kn->kn_fop = &kqread_filtops;
1701         kqlock(kq);
1702         KNOTE_ATTACH(&kq->kq_sel.si_note, kn);
1703         kqunlock(kq);
1704         return (0);
1705 }
1706
1707 /*ARGSUSED*/
1708 int
1709 kqueue_stat(struct fileproc *fp, struct stat *st, __unused struct proc *p)
1710 {
1711         struct kqueue *kq = (struct kqueue *)fp->f_data;
1712
1713         bzero((void *)st, sizeof(*st));
1714         st->st_size = kq->kq_count;
1715         st->st_blksize = sizeof(struct kevent);
1716         st->st_mode = S_IFIFO;
1717         return (0);
1718 }
1719
1720 /*
1721  * Called with the kqueue locked
1722  */
1723 static void
1724 kqueue_wakeup(struct kqueue *kq)
1725 {
1726
1727         if (kq->kq_state & KQ_SLEEP) {
1728                 kq->kq_state &= ~KQ_SLEEP;
1729                 thread_wakeup(kq);
1730         }
1731         if (kq->kq_state & KQ_SEL) {
1732                 kq->kq_state &= ~KQ_SEL;
1733                 selwakeup(&kq->kq_sel);
1734         }
1735         KNOTE(&kq->kq_sel.si_note, 0);
1736 }
1737
1738 void
1739 klist_init(struct klist *list)
1740 {
1741         SLIST_INIT(list);
1742 }
1743
1744
1745 /*
1746  * Query/Post each knote in the object's list
1747  *
1748  *      The object lock protects the list. It is assumed
1749  *      that the filter/event routine for the object can
1750  *      determine that the object is already locked (via
1751  *      the hind) and not deadlock itself.
1752  *
1753  *      The object lock should also hold off pending
1754  *      detach/drop operations.  But we'll prevent it here
1755  *      too - just in case.
1756  */
1757 void
1758 knote(struct klist *list, long hint)
1759 {
1760         struct knote *kn;
1761
1762         SLIST_FOREACH(kn, list, kn_selnext) {
1763                 struct kqueue *kq = kn->kn_kq;
1764
1765                 kqlock(kq);
1766                 if (kqlock2knoteuse(kq, kn)) {
1767                         int result;
1768
1769                         /* call the event with only a use count */
1770                         result = kn->kn_fop->f_event(kn, hint);
1771
1772                         /* if its not going away and triggered */
1773                         if (knoteuse2kqlock(kq, kn) && result)
1774                                 knote_activate(kn);
1775                         /* lock held again */
1776                 }
1777                 kqunlock(kq);
1778         }
1779 }
1780
1781 /*
1782  * attach a knote to the specified list.  Return true if this is the first entry.
1783  * The list is protected by whatever lock the object it is associated with uses.
1784  */
1785 int
1786 knote_attach(struct klist *list, struct knote *kn)
1787 {
1788         int ret = SLIST_EMPTY(list);
1789         SLIST_INSERT_HEAD(list, kn, kn_selnext);
1790         return ret;
1791 }
1792
1793 /*
1794  * detach a knote from the specified list.  Return true if that was the last entry.
1795  * The list is protected by whatever lock the object it is associated with uses.
1796  */
1797 int
1798 knote_detach(struct klist *list, struct knote *kn)
1799 {
1800         SLIST_REMOVE(list, kn, knote, kn_selnext);
1801         return SLIST_EMPTY(list);
1802 }
1803
1804 /*
1805  * remove all knotes referencing a specified fd
1806  *
1807  * Essentially an inlined knote_remove & knote_drop
1808  * when we know for sure that the thing is a file
1809  *
1810  * Entered with the proc_fd lock already held.
1811  * It returns the same way, but may drop it temporarily.
1812  */
1813 void
1814 knote_fdclose(struct proc *p, int fd)
1815 {
1816         struct filedesc *fdp = p->p_fd;
1817         struct klist *list;
1818         struct knote *kn;
1819
1820         list = &fdp->fd_knlist[fd];
1821         while ((kn = SLIST_FIRST(list)) != NULL) {
1822                 struct kqueue *kq = kn->kn_kq;
1823
1824                 kqlock(kq);
1825                 proc_fdunlock(p);
1826
1827                 /*
1828                  * Convert the lock to a drop ref.
1829                  * If we get it, go ahead and drop it.
1830                  * Otherwise, we waited for it to
1831                  * be dropped by the other guy, so
1832                  * it is safe to move on in the list.
1833                  */
1834                 if (kqlock2knotedrop(kq, kn)) {
1835                         kn->kn_fop->f_detach(kn);
1836                         knote_drop(kn, p);
1837                 }
1838
1839                 proc_fdlock(p);
1840
1841                 /* the fd tables may have changed - start over */
1842                 list = &fdp->fd_knlist[fd];
1843         }
1844 }
1845
1846 /* proc_fdlock held on entry (and exit) */
1847 static int
1848 knote_fdpattach(struct knote *kn, struct filedesc *fdp, __unused struct proc *p)
1849 {
1850         struct klist *list = NULL;
1851
1852         if (! kn->kn_fop->f_isfd) {
1853                 if (fdp->fd_knhashmask == 0)
1854                         fdp->fd_knhash = hashinit(KN_HASHSIZE, M_KQUEUE,
1855                             &fdp->fd_knhashmask);
1856                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1857         } else {
1858                 if ((u_int)fdp->fd_knlistsize <= kn->kn_id) {
1859                         u_int size = 0;
1860
1861                         /* have to grow the fd_knlist */
1862                         size = fdp->fd_knlistsize;
1863                         while (size <= kn->kn_id)
1864                                 size += KQEXTENT;
1865                         MALLOC(list, struct klist *,
1866                                size * sizeof(struct klist *), M_KQUEUE, M_WAITOK);
1867                         if (list == NULL)
1868                                 return (ENOMEM);
1869
1870                         bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list,
1871                               fdp->fd_knlistsize * sizeof(struct klist *));
1872                         bzero((caddr_t)list +
1873                               fdp->fd_knlistsize * sizeof(struct klist *),
1874                               (size - fdp->fd_knlistsize) * sizeof(struct klist *));
1875                         FREE(fdp->fd_knlist, M_KQUEUE);
1876                         fdp->fd_knlist = list;
1877                         fdp->fd_knlistsize = size;
1878                 }
1879                 list = &fdp->fd_knlist[kn->kn_id];
1880         }
1881         SLIST_INSERT_HEAD(list, kn, kn_link);
1882         return (0);
1883 }
1884
1885
1886
1887 /*
1888  * should be called at spl == 0, since we don't want to hold spl
1889  * while calling fdrop and free.
1890  */
1891 static void
1892 knote_drop(struct knote *kn, struct proc *p)
1893 {
1894         struct filedesc *fdp = p->p_fd;
1895         struct kqueue *kq = kn->kn_kq;
1896         struct klist *list;
1897
1898         proc_fdlock(p);
1899         if (kn->kn_fop->f_isfd)
1900                 list = &fdp->fd_knlist[kn->kn_id];
1901         else
1902                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1903
1904         SLIST_REMOVE(list, kn, knote, kn_link);
1905         kqlock(kq);
1906         knote_dequeue(kn);
1907         if (kn->kn_status & KN_DROPWAIT)
1908                 thread_wakeup(&kn->kn_status);
1909         kqunlock(kq);
1910         proc_fdunlock(p);
1911
1912         if (kn->kn_fop->f_isfd)
1913                 fp_drop(p, kn->kn_id, kn->kn_fp, 0);
1914
1915         knote_free(kn);
1916 }
1917
1918 /* called with kqueue lock held */
1919 static void
1920 knote_activate(struct knote *kn)
1921 {
1922         struct kqueue *kq = kn->kn_kq;
1923
1924         kn->kn_status |= KN_ACTIVE;
1925         knote_enqueue(kn);
1926         kqueue_wakeup(kq);
1927  }
1928
1929 /* called with kqueue lock held */
1930 static void
1931 knote_deactivate(struct knote *kn)
1932 {
1933         kn->kn_status &= ~KN_ACTIVE;
1934         knote_dequeue(kn);
1935 }
1936
1937 /* called with kqueue lock held */
1938 static void
1939 knote_enqueue(struct knote *kn)
1940 {
1941         struct kqueue *kq = kn->kn_kq;
1942
1943         if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) {
1944                 struct kqtailq *tq = kn->kn_tq;
1945
1946                 TAILQ_INSERT_TAIL(tq, kn, kn_tqe);
1947                 kn->kn_status |= KN_QUEUED;
1948                 kq->kq_count++;
1949         }
1950 }
1951
1952 /* called with kqueue lock held */
1953 static void
1954 knote_dequeue(struct knote *kn)
1955 {
1956         struct kqueue *kq = kn->kn_kq;
1957
1958         assert((kn->kn_status & KN_DISABLED) == 0);
1959         if ((kn->kn_status & KN_QUEUED) == KN_QUEUED) {
1960                 struct kqtailq *tq = kn->kn_tq;
1961
1962                 TAILQ_REMOVE(tq, kn, kn_tqe);
1963                 kn->kn_tq = &kq->kq_head;
1964                 kn->kn_status &= ~KN_QUEUED;
1965                 kq->kq_count--;
1966         }
1967 }
1968
1969 void
1970 knote_init(void)
1971 {
1972         knote_zone = zinit(sizeof(struct knote), 8192*sizeof(struct knote), 8192, "knote zone");
1973
1974         /* allocate kq lock group attribute and group */
1975         kq_lck_grp_attr= lck_grp_attr_alloc_init();
1976
1977         kq_lck_grp = lck_grp_alloc_init("kqueue",  kq_lck_grp_attr);
1978
1979         /* Allocate kq lock attribute */
1980         kq_lck_attr = lck_attr_alloc_init();
1981
1982         /* Initialize the timer filter lock */
1983         lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr);
1984 }
1985 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
1986
1987 static struct knote *
1988 knote_alloc(void)
1989 {
1990         return ((struct knote *)zalloc(knote_zone));
1991 }
1992
1993 static void
1994 knote_free(struct knote *kn)
1995 {
1996         zfree(knote_zone, kn);
1997 }
1998
1999 #include <sys/param.h>
2000 #include <sys/socket.h>
2001 #include <sys/protosw.h>
2002 #include <sys/domain.h>
2003 #include <sys/mbuf.h>
2004 #include <sys/kern_event.h>
2005 #include <sys/malloc.h>
2006 #include <sys/sys_domain.h>
2007 #include <sys/syslog.h>
2008
2009
2010 static int kev_attach(struct socket *so, int proto, struct proc *p);
2011 static int kev_detach(struct socket *so);
2012 static int kev_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p);
2013
2014 struct pr_usrreqs event_usrreqs = {
2015      pru_abort_notsupp, pru_accept_notsupp, kev_attach, pru_bind_notsupp, pru_connect_notsupp,
2016      pru_connect2_notsupp, kev_control, kev_detach, pru_disconnect_notsupp,
2017      pru_listen_notsupp, pru_peeraddr_notsupp, pru_rcvd_notsupp, pru_rcvoob_notsupp,
2018      pru_send_notsupp, pru_sense_null, pru_shutdown_notsupp, pru_sockaddr_notsupp,
2019      pru_sosend_notsupp, soreceive, pru_sopoll_notsupp
2020 };
2021
2022 struct protosw eventsw[] = {
2023      {
2024           SOCK_RAW,             &systemdomain,  SYSPROTO_EVENT,         PR_ATOMIC,
2025           0,            0,              0,              0,
2026           0,
2027           0,            0,              0,              0,
2028 #if __APPLE__
2029           0,
2030 #endif
2031           &event_usrreqs,
2032           0,            0,              0,
2033 #if __APPLE__
2034           {0, 0},       0,              {0}
2035 #endif
2036      }
2037 };
2038
2039 static
2040 struct kern_event_head kern_event_head;
2041
2042 static u_long static_event_id = 0;
2043 struct domain *sysdom = &systemdomain;
2044
2045 static lck_grp_t                *evt_mtx_grp;
2046 static lck_attr_t               *evt_mtx_attr;
2047 static lck_grp_attr_t   *evt_mtx_grp_attr;
2048 lck_mtx_t                               *evt_mutex;
2049 /*
2050  * Install the protosw's for the NKE manager.  Invoked at
2051  *  extension load time
2052  */
2053 int
2054 kern_event_init(void)
2055 {
2056     int retval;
2057
2058     if ((retval = net_add_proto(eventsw, &systemdomain)) != 0) {
2059             log(LOG_WARNING, "Can't install kernel events protocol (%d)\n", retval);
2060             return(retval);
2061         }
2062
2063         /*
2064          * allocate lock group attribute and group for kern event
2065          */
2066         evt_mtx_grp_attr = lck_grp_attr_alloc_init();
2067
2068         evt_mtx_grp = lck_grp_alloc_init("eventlist", evt_mtx_grp_attr);
2069
2070         /*
2071          * allocate the lock attribute for mutexes
2072          */
2073         evt_mtx_attr = lck_attr_alloc_init();
2074         evt_mutex = lck_mtx_alloc_init(evt_mtx_grp, evt_mtx_attr);
2075         if (evt_mutex == NULL)
2076                         return (ENOMEM);
2077
2078     return(KERN_SUCCESS);
2079 }
2080
2081 static int
2082 kev_attach(struct socket *so, __unused int proto, __unused struct proc *p)
2083 {
2084      int error;
2085      struct kern_event_pcb  *ev_pcb;
2086
2087      error = soreserve(so, KEV_SNDSPACE, KEV_RECVSPACE);
2088      if (error)
2089           return error;
2090
2091      MALLOC(ev_pcb, struct kern_event_pcb *, sizeof(struct kern_event_pcb), M_PCB, M_WAITOK);
2092      if (ev_pcb == 0)
2093           return ENOBUFS;
2094
2095      ev_pcb->ev_socket = so;
2096      ev_pcb->vendor_code_filter = 0xffffffff;
2097
2098      so->so_pcb = (caddr_t) ev_pcb;
2099          lck_mtx_lock(evt_mutex);
2100      LIST_INSERT_HEAD(&kern_event_head, ev_pcb, ev_link);
2101          lck_mtx_unlock(evt_mutex);
2102
2103      return 0;
2104 }
2105
2106
2107 static int
2108 kev_detach(struct socket *so)
2109 {
2110      struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2111
2112      if (ev_pcb != 0) {
2113                 lck_mtx_lock(evt_mutex);
2114                 LIST_REMOVE(ev_pcb, ev_link);
2115                 lck_mtx_unlock(evt_mutex);
2116                 FREE(ev_pcb, M_PCB);
2117                 so->so_pcb = 0;
2118                 so->so_flags |= SOF_PCBCLEARING;
2119      }
2120
2121      return 0;
2122 }
2123
2124 /*
2125  * For now, kev_vender_code and mbuf_tags use the same
2126  * mechanism.
2127  */
2128 extern errno_t mbuf_tag_id_find_internal(const char *string, u_long *out_id,
2129                                                                                  int create);
2130
2131 errno_t kev_vendor_code_find(
2132         const char      *string,
2133         u_long          *out_vender_code)
2134 {
2135         if (strlen(string) >= KEV_VENDOR_CODE_MAX_STR_LEN) {
2136                 return EINVAL;
2137         }
2138         return mbuf_tag_id_find_internal(string, out_vender_code, 1);
2139 }
2140
2141 extern void mbuf_tag_id_first_last(u_long *first, u_long *last);
2142
2143 errno_t  kev_msg_post(struct kev_msg *event_msg)
2144 {
2145         u_long  min_vendor, max_vendor;
2146
2147         mbuf_tag_id_first_last(&min_vendor, &max_vendor);
2148
2149         if (event_msg == NULL)
2150                 return EINVAL;
2151
2152         /* Limit third parties to posting events for registered vendor codes only */
2153         if (event_msg->vendor_code < min_vendor ||
2154                 event_msg->vendor_code > max_vendor)
2155         {
2156                 return EINVAL;
2157         }
2158
2159         return kev_post_msg(event_msg);
2160 }
2161
2162
2163 int  kev_post_msg(struct kev_msg *event_msg)
2164 {
2165      struct mbuf *m, *m2;
2166      struct kern_event_pcb  *ev_pcb;
2167      struct kern_event_msg  *ev;
2168      char              *tmp;
2169      unsigned long     total_size;
2170      int               i;
2171
2172         /* Verify the message is small enough to fit in one mbuf w/o cluster */
2173         total_size = KEV_MSG_HEADER_SIZE;
2174
2175         for (i = 0; i < 5; i++) {
2176                 if (event_msg->dv[i].data_length == 0)
2177                         break;
2178                 total_size += event_msg->dv[i].data_length;
2179         }
2180
2181         if (total_size > MLEN) {
2182                 return EMSGSIZE;
2183         }
2184
2185      m = m_get(M_DONTWAIT, MT_DATA);
2186      if (m == 0)
2187           return ENOBUFS;
2188
2189      ev = mtod(m, struct kern_event_msg *);
2190      total_size = KEV_MSG_HEADER_SIZE;
2191
2192      tmp = (char *) &ev->event_data[0];
2193      for (i = 0; i < 5; i++) {
2194           if (event_msg->dv[i].data_length == 0)
2195                break;
2196
2197           total_size += event_msg->dv[i].data_length;
2198           bcopy(event_msg->dv[i].data_ptr, tmp,
2199                 event_msg->dv[i].data_length);
2200           tmp += event_msg->dv[i].data_length;
2201      }
2202
2203      ev->id = ++static_event_id;
2204      ev->total_size   = total_size;
2205      ev->vendor_code  = event_msg->vendor_code;
2206      ev->kev_class    = event_msg->kev_class;
2207      ev->kev_subclass = event_msg->kev_subclass;
2208      ev->event_code   = event_msg->event_code;
2209
2210      m->m_len = total_size;
2211      lck_mtx_lock(evt_mutex);
2212      for (ev_pcb = LIST_FIRST(&kern_event_head);
2213           ev_pcb;
2214           ev_pcb = LIST_NEXT(ev_pcb, ev_link)) {
2215
2216           if (ev_pcb->vendor_code_filter != KEV_ANY_VENDOR) {
2217                if (ev_pcb->vendor_code_filter != ev->vendor_code)
2218                     continue;
2219
2220                if (ev_pcb->class_filter != KEV_ANY_CLASS) {
2221                     if (ev_pcb->class_filter != ev->kev_class)
2222                          continue;
2223
2224                     if ((ev_pcb->subclass_filter != KEV_ANY_SUBCLASS) &&
2225                         (ev_pcb->subclass_filter != ev->kev_subclass))
2226                          continue;
2227                }
2228           }
2229
2230           m2 = m_copym(m, 0, m->m_len, M_NOWAIT);
2231           if (m2 == 0) {
2232                m_free(m);
2233                    lck_mtx_unlock(evt_mutex);
2234                return ENOBUFS;
2235           }
2236           socket_lock(ev_pcb->ev_socket, 1);
2237           if (sbappendrecord(&ev_pcb->ev_socket->so_rcv, m2))
2238                   sorwakeup(ev_pcb->ev_socket);
2239           socket_unlock(ev_pcb->ev_socket, 1);
2240      }
2241
2242      m_free(m);
2243      lck_mtx_unlock(evt_mutex);
2244      return 0;
2245 }
2246
2247 static int
2248 kev_control(struct socket *so,
2249                         u_long cmd,
2250                         caddr_t data,
2251                         __unused struct ifnet *ifp,
2252                         __unused struct proc *p)
2253 {
2254         struct kev_request *kev_req = (struct kev_request *) data;
2255         struct kern_event_pcb  *ev_pcb;
2256         struct kev_vendor_code *kev_vendor;
2257         u_long  *id_value = (u_long *) data;
2258
2259
2260         switch (cmd) {
2261
2262                 case SIOCGKEVID:
2263                         *id_value = static_event_id;
2264                         break;
2265
2266                 case SIOCSKEVFILT:
2267                         ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2268                         ev_pcb->vendor_code_filter = kev_req->vendor_code;
2269                         ev_pcb->class_filter     = kev_req->kev_class;
2270                         ev_pcb->subclass_filter  = kev_req->kev_subclass;
2271                         break;
2272
2273                 case SIOCGKEVFILT:
2274                         ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2275                         kev_req->vendor_code = ev_pcb->vendor_code_filter;
2276                         kev_req->kev_class   = ev_pcb->class_filter;
2277                         kev_req->kev_subclass = ev_pcb->subclass_filter;
2278                         break;
2279
2280                 case SIOCGKEVVENDOR:
2281                         kev_vendor = (struct kev_vendor_code*)data;
2282
2283                         /* Make sure string is NULL terminated */
2284                         kev_vendor->vendor_string[KEV_VENDOR_CODE_MAX_STR_LEN-1] = 0;
2285
2286                         return mbuf_tag_id_find_internal(kev_vendor->vendor_string,
2287                                                                                          &kev_vendor->vendor_code, 0);
2288
2289                 default:
2290                         return ENOTSUP;
2291         }
2292
2293         return 0;
2294 }
2295
2296
2297
2298 int
2299 fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo)
2300 {
2301         struct stat * st;
2302
2303         /* No need for the funnel as fd is kept alive */
2304
2305         st = &kinfo->kq_stat;
2306
2307         st->st_size = kq->kq_count;
2308         st->st_blksize = sizeof(struct kevent);
2309         st->st_mode = S_IFIFO;
2310         if (kq->kq_state & KQ_SEL)
2311                 kinfo->kq_state |=  PROC_KQUEUE_SELECT;
2312         if (kq->kq_state & KQ_SLEEP)
2313                 kinfo->kq_state |= PROC_KQUEUE_SLEEP;
2314
2315         return(0);
2316 }
2317