bsd/kern/kern_event.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License.  The rights granted to you under the
  10  * License may not be used to create, or enable the creation or
  11  * redistribution of, unlawful or unlicensed copies of an Apple operating
  12  * system, or to circumvent, violate, or enable the circumvention or
  13  * violation of, any terms of an Apple operating system software license
  14  * agreement.
  15  *
  16  * Please obtain a copy of the License at
  17  * http://www.opensource.apple.com/apsl/ and read it before using this
  18  * file.
  19  *
  20  * The Original Code and all software distributed under the License are
  21  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  22  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  23  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  24  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  25  * Please see the License for the specific language governing rights and
  26  * limitations under the License.
  27  *
  28  * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
  29  *
  30  */
  31 /*-
  32  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
  33  * All rights reserved.
  34  *
  35  * Redistribution and use in source and binary forms, with or without
  36  * modification, are permitted provided that the following conditions
  37  * are met:
  38  * 1. Redistributions of source code must retain the above copyright
  39  *    notice, this list of conditions and the following disclaimer.
  40  * 2. Redistributions in binary form must reproduce the above copyright
  41  *    notice, this list of conditions and the following disclaimer in the
  42  *    documentation and/or other materials provided with the distribution.
  43  *
  44  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  45  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  46  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  47  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  48  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  49  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  50  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  51  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  52  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  53  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  54  * SUCH DAMAGE.
  55  */
  56 /*
  57  *      @(#)kern_event.c       1.0 (3/31/2000)
  58  */
  59 #include <stdint.h>
  60
  61 #include <sys/param.h>
  62 #include <sys/systm.h>
  63 #include <sys/filedesc.h>
  64 #include <sys/kernel.h>
  65 #include <sys/proc_internal.h>
  66 #include <sys/kauth.h>
  67 #include <sys/malloc.h>
  68 #include <sys/unistd.h>
  69 #include <sys/file_internal.h>
  70 #include <sys/fcntl.h>
  71 #include <sys/select.h>
  72 #include <sys/queue.h>
  73 #include <sys/event.h>
  74 #include <sys/eventvar.h>
  75 #include <sys/protosw.h>
  76 #include <sys/socket.h>
  77 #include <sys/socketvar.h>
  78 #include <sys/stat.h>
  79 #include <sys/sysctl.h>
  80 #include <sys/uio.h>
  81 #include <sys/sysproto.h>
  82 #include <sys/user.h>
  83 #include <string.h>
  84
  85 #include <kern/lock.h>
  86 #include <kern/clock.h>
  87 #include <kern/thread_call.h>
  88 #include <kern/sched_prim.h>
  89 #include <kern/zalloc.h>
  90 #include <kern/assert.h>
  91
  92 #include <libkern/libkern.h>
  93
  94 extern void unix_syscall_return(int);
  95
  96 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
  97
  98 static inline void kqlock(struct kqueue *kq);
  99 static inline void kqunlock(struct kqueue *kq);
 100
 101 static int      kqlock2knoteuse(struct kqueue *kq, struct knote *kn);
 102 static int      kqlock2knoteusewait(struct kqueue *kq, struct knote *kn);
 103 static int      kqlock2knotedrop(struct kqueue *kq, struct knote *kn);
 104 static int      knoteuse2kqlock(struct kqueue *kq, struct knote *kn);
 105
 106 static void     kqueue_wakeup(struct kqueue *kq);
 107 static int      kqueue_read(struct fileproc *fp, struct uio *uio,
 108                     kauth_cred_t cred, int flags, struct proc *p);
 109 static int      kqueue_write(struct fileproc *fp, struct uio *uio,
 110                     kauth_cred_t cred, int flags, struct proc *p);
 111 static int      kqueue_ioctl(struct fileproc *fp, u_long com, caddr_t data,
 112                     struct proc *p);
 113 static int      kqueue_select(struct fileproc *fp, int which, void *wql,
 114                     struct proc *p);
 115 static int      kqueue_close(struct fileglob *fp, struct proc *p);
 116 static int      kqueue_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
 117 extern int      kqueue_stat(struct fileproc *fp, struct stat *st, struct proc *p);
 118
 119 static struct fileops kqueueops = {
 120         kqueue_read,
 121         kqueue_write,
 122         kqueue_ioctl,
 123         kqueue_select,
 124         kqueue_close,
 125         kqueue_kqfilter,
 126         0
 127 };
 128
 129 static int kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p);
 130 static int kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p);
 131
 132 static int      kevent_callback(struct kqueue *kq, struct kevent *kevp, void *data);
 133 static void     kevent_continue(struct kqueue *kq, void *data, int error);
 134 static void     kevent_scan_continue(void *contp, wait_result_t wait_result);
 135 static int      kevent_process(struct kqueue *kq, kevent_callback_t callback,
 136                                void *data, int *countp, struct proc *p);
 137 static void     knote_put(struct knote *kn);
 138 static int      knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p);
 139 static void     knote_drop(struct knote *kn, struct proc *p);
 140 static void     knote_activate(struct knote *kn);
 141 static void     knote_deactivate(struct knote *kn);
 142 static void     knote_enqueue(struct knote *kn);
 143 static void     knote_dequeue(struct knote *kn);
 144 static struct   knote *knote_alloc(void);
 145 static void     knote_free(struct knote *kn);
 146 extern void     knote_init(void);
 147
 148 static int      filt_fileattach(struct knote *kn);
 149 static struct filterops file_filtops =
 150         { 1, filt_fileattach, NULL, NULL };
 151
 152 static void     filt_kqdetach(struct knote *kn);
 153 static int      filt_kqueue(struct knote *kn, long hint);
 154 static struct filterops kqread_filtops =
 155         { 1, NULL, filt_kqdetach, filt_kqueue };
 156
 157 /*
 158  * placeholder for not-yet-implemented filters
 159  */
 160 static int      filt_badattach(struct knote *kn);
 161 static struct filterops bad_filtops =
 162         { 0, filt_badattach, 0 , 0 };
 163
 164 static int      filt_procattach(struct knote *kn);
 165 static void     filt_procdetach(struct knote *kn);
 166 static int      filt_proc(struct knote *kn, long hint);
 167
 168 static struct filterops proc_filtops =
 169         { 0, filt_procattach, filt_procdetach, filt_proc };
 170
 171 extern struct filterops fs_filtops;
 172
 173 extern struct filterops sig_filtops;
 174
 175
 176 /* Timer filter */
 177 static int      filt_timercompute(struct knote *kn, uint64_t *abs_time);
 178 static void     filt_timerexpire(void *knx, void *param1);
 179 static int      filt_timerattach(struct knote *kn);
 180 static void     filt_timerdetach(struct knote *kn);
 181 static int      filt_timer(struct knote *kn, long hint);
 182
 183 static struct filterops timer_filtops =
 184         { 0, filt_timerattach, filt_timerdetach, filt_timer };
 185
 186 /* to avoid arming timers that fire quicker than we can handle */
 187 static uint64_t filt_timerfloor = 0;
 188
 189 static lck_mtx_t _filt_timerlock;
 190 static void     filt_timerlock(void);
 191 static void     filt_timerunlock(void);
 192
 193 /*
 194  * Sentinel marker for a thread scanning through the list of
 195  * active knotes.
 196  */
 197 static struct filterops threadmarker_filtops =
 198         { 0, filt_badattach, 0, 0 };
 199
 200 static zone_t   knote_zone;
 201
 202 #define KN_HASHSIZE             64              /* XXX should be tunable */
 203 #define KN_HASH(val, mask)      (((val) ^ (val >> 8)) & (mask))
 204
 205 #if 0
 206 extern struct filterops aio_filtops;
 207 #endif
 208
 209 /*
 210  * Table for for all system-defined filters.
 211  */
 212 static struct filterops *sysfilt_ops[] = {
 213         &file_filtops,                  /* EVFILT_READ */
 214         &file_filtops,                  /* EVFILT_WRITE */
 215 #if 0
 216         &aio_filtops,                   /* EVFILT_AIO */
 217 #else
 218         &bad_filtops,                   /* EVFILT_AIO */
 219 #endif
 220         &file_filtops,                  /* EVFILT_VNODE */
 221         &proc_filtops,                  /* EVFILT_PROC */
 222         &sig_filtops,                   /* EVFILT_SIGNAL */
 223         &timer_filtops,                 /* EVFILT_TIMER */
 224         &bad_filtops,                   /* EVFILT_MACHPORT */
 225         &fs_filtops                     /* EVFILT_FS */
 226 };
 227
 228 /*
 229  * kqueue/note lock attributes and implementations
 230  *
 231  *      kqueues have locks, while knotes have use counts
 232  *      Most of the knote state is guarded by the object lock.
 233  *      the knote "inuse" count and status use the kqueue lock.
 234  */
 235 lck_grp_attr_t * kq_lck_grp_attr;
 236 lck_grp_t * kq_lck_grp;
 237 lck_attr_t * kq_lck_attr;
 238
 239 static inline void
 240 kqlock(struct kqueue *kq)
 241 {
 242         lck_spin_lock(&kq->kq_lock);
 243 }
 244
 245 static inline void
 246 kqunlock(struct kqueue *kq)
 247 {
 248         lck_spin_unlock(&kq->kq_lock);
 249 }
 250
 251 /*
 252  * Convert a kq lock to a knote use referece.
 253  *
 254  *      If the knote is being dropped, we can't get
 255  *      a use reference, so just return with it
 256  *      still locked.
 257  *
 258  *      - kq locked at entry
 259  *      - unlock on exit if we get the use reference
 260  */
 261 static int
 262 kqlock2knoteuse(struct kqueue *kq, struct knote *kn)
 263 {
 264         if (kn->kn_status & KN_DROPPING)
 265                 return 0;
 266         kn->kn_inuse++;
 267         kqunlock(kq);
 268         return 1;
 269  }
 270
 271 /*
 272  * Convert a kq lock to a knote use referece.
 273  *
 274  *      If the knote is being dropped, we can't get
 275  *      a use reference, so just return with it
 276  *      still locked.
 277  *
 278  *      - kq locked at entry
 279  *      - kq always unlocked on exit
 280  */
 281 static int
 282 kqlock2knoteusewait(struct kqueue *kq, struct knote *kn)
 283 {
 284         if (!kqlock2knoteuse(kq, kn)) {
 285                 kn->kn_status |= KN_DROPWAIT;
 286                 assert_wait(&kn->kn_status, THREAD_UNINT);
 287                 kqunlock(kq);
 288                 thread_block(THREAD_CONTINUE_NULL);
 289                 return 0;
 290         }
 291         return 1;
 292  }
 293
 294 /*
 295  * Convert from a knote use reference back to kq lock.
 296  *
 297  *      Drop a use reference and wake any waiters if
 298  *      this is the last one.
 299  *
 300  *      The exit return indicates if the knote is
 301  *      still alive - but the kqueue lock is taken
 302  *      unconditionally.
 303  */
 304 static int
 305 knoteuse2kqlock(struct kqueue *kq, struct knote *kn)
 306 {
 307         kqlock(kq);
 308         if ((--kn->kn_inuse == 0) &&
 309             (kn->kn_status & KN_USEWAIT)) {
 310                 kn->kn_status &= ~KN_USEWAIT;
 311                 thread_wakeup(&kn->kn_inuse);
 312         }
 313         return ((kn->kn_status & KN_DROPPING) == 0);
 314  }
 315
 316 /*
 317  * Convert a kq lock to a knote drop referece.
 318  *
 319  *      If the knote is in use, wait for the use count
 320  *      to subside.  We first mark our intention to drop
 321  *      it - keeping other users from "piling on."
 322  *      If we are too late, we have to wait for the
 323  *      other drop to complete.
 324  *
 325  *      - kq locked at entry
 326  *      - always unlocked on exit.
 327  *      - caller can't hold any locks that would prevent
 328  *        the other dropper from completing.
 329  */
 330 static int
 331 kqlock2knotedrop(struct kqueue *kq, struct knote *kn)
 332 {
 333
 334         if ((kn->kn_status & KN_DROPPING) == 0) {
 335                 kn->kn_status |= KN_DROPPING;
 336                 if (kn->kn_inuse > 0) {
 337                         kn->kn_status |= KN_USEWAIT;
 338                         assert_wait(&kn->kn_inuse, THREAD_UNINT);
 339                         kqunlock(kq);
 340                         thread_block(THREAD_CONTINUE_NULL);
 341                 } else
 342                         kqunlock(kq);
 343                 return 1;
 344         } else {
 345                 kn->kn_status |= KN_DROPWAIT;
 346                 assert_wait(&kn->kn_status, THREAD_UNINT);
 347                 kqunlock(kq);
 348                 thread_block(THREAD_CONTINUE_NULL);
 349                 return 0;
 350         }
 351 }
 352
 353 /*
 354  * Release a knote use count reference.
 355  */
 356 static void
 357 knote_put(struct knote *kn)
 358 {
 359         struct kqueue *kq = kn->kn_kq;
 360
 361         kqlock(kq);
 362         if ((--kn->kn_inuse == 0) &&
 363             (kn->kn_status & KN_USEWAIT)) {
 364                 kn->kn_status &= ~KN_USEWAIT;
 365                 thread_wakeup(&kn->kn_inuse);
 366         }
 367         kqunlock(kq);
 368  }
 369
 370
 371
 372 static int
 373 filt_fileattach(struct knote *kn)
 374 {
 375
 376         return (fo_kqfilter(kn->kn_fp, kn, current_proc()));
 377 }
 378
 379 #define f_flag f_fglob->fg_flag
 380 #define f_type f_fglob->fg_type
 381 #define f_msgcount f_fglob->fg_msgcount
 382 #define f_cred f_fglob->fg_cred
 383 #define f_ops f_fglob->fg_ops
 384 #define f_offset f_fglob->fg_offset
 385 #define f_data f_fglob->fg_data
 386
 387 static void
 388 filt_kqdetach(struct knote *kn)
 389 {
 390         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
 391
 392         kqlock(kq);
 393         KNOTE_DETACH(&kq->kq_sel.si_note, kn);
 394         kqunlock(kq);
 395 }
 396
 397 /*ARGSUSED*/
 398 static int
 399 filt_kqueue(struct knote *kn, __unused long hint)
 400 {
 401         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
 402
 403         kn->kn_data = kq->kq_count;
 404         return (kn->kn_data > 0);
 405 }
 406
 407 static int
 408 filt_procattach(struct knote *kn)
 409 {
 410         struct proc *p;
 411         int funnel_state;
 412
 413         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 414
 415         p = pfind(kn->kn_id);
 416         if (p == NULL) {
 417                 thread_funnel_set(kernel_flock, funnel_state);
 418                 return (ESRCH);
 419         }
 420
 421         kn->kn_flags |= EV_CLEAR;               /* automatically set */
 422
 423         /*
 424          * internal flag indicating registration done by kernel
 425          */
 426         if (kn->kn_flags & EV_FLAG1) {
 427                 kn->kn_data = (int)kn->kn_sdata;        /* ppid */
 428                 kn->kn_fflags = NOTE_CHILD;
 429                 kn->kn_flags &= ~EV_FLAG1;
 430         }
 431
 432         /* XXX lock the proc here while adding to the list? */
 433         KNOTE_ATTACH(&p->p_klist, kn);
 434
 435         thread_funnel_set(kernel_flock, funnel_state);
 436
 437         return (0);
 438 }
 439
 440 /*
 441  * The knote may be attached to a different process, which may exit,
 442  * leaving nothing for the knote to be attached to.  So when the process
 443  * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
 444  * it will be deleted when read out.  However, as part of the knote deletion,
 445  * this routine is called, so a check is needed to avoid actually performing
 446  * a detach, because the original process does not exist any more.
 447  */
 448 static void
 449 filt_procdetach(struct knote *kn)
 450 {
 451         struct proc *p;
 452         int funnel_state;
 453
 454         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 455         p = pfind(kn->kn_id);
 456
 457         if (p != (struct proc *)NULL)
 458                 KNOTE_DETACH(&p->p_klist, kn);
 459
 460         thread_funnel_set(kernel_flock, funnel_state);
 461 }
 462
 463 static int
 464 filt_proc(struct knote *kn, long hint)
 465 {
 466         u_int event;
 467         int funnel_state;
 468
 469         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 470
 471         /*
 472          * mask off extra data
 473          */
 474         event = (u_int)hint & NOTE_PCTRLMASK;
 475
 476         /*
 477          * if the user is interested in this event, record it.
 478          */
 479         if (kn->kn_sfflags & event)
 480                 kn->kn_fflags |= event;
 481
 482         /*
 483          * process is gone, so flag the event as finished.
 484          */
 485         if (event == NOTE_EXIT) {
 486                 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 487                 thread_funnel_set(kernel_flock, funnel_state);
 488                 return (1);
 489         }
 490
 491         /*
 492          * process forked, and user wants to track the new process,
 493          * so attach a new knote to it, and immediately report an
 494          * event with the parent's pid.
 495          */
 496         if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
 497                 struct kevent kev;
 498                 int error;
 499
 500                 /*
 501                  * register knote with new process.
 502                  */
 503                 kev.ident = hint & NOTE_PDATAMASK;      /* pid */
 504                 kev.filter = kn->kn_filter;
 505                 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
 506                 kev.fflags = kn->kn_sfflags;
 507                 kev.data = kn->kn_id;                   /* parent */
 508                 kev.udata = kn->kn_kevent.udata;        /* preserve udata */
 509                 error = kevent_register(kn->kn_kq, &kev, NULL);
 510                 if (error)
 511                         kn->kn_fflags |= NOTE_TRACKERR;
 512         }
 513         event = kn->kn_fflags;
 514         thread_funnel_set(kernel_flock, funnel_state);
 515
 516         return (event != 0);
 517 }
 518
 519 /*
 520  * filt_timercompute - compute absolute timeout
 521  *
 522  *      The saved-data field in the knote contains the
 523  *      time value.  The saved filter-flags indicates
 524  *      the unit of measurement.
 525  *
 526  *      If the timeout is not absolute, adjust it for
 527  *      the current time.
 528  */
 529 static int
 530 filt_timercompute(struct knote *kn, uint64_t *abs_time)
 531 {
 532         uint64_t multiplier;
 533         uint64_t raw;
 534
 535         switch (kn->kn_sfflags & (NOTE_SECONDS|NOTE_USECONDS|NOTE_NSECONDS)) {
 536         case NOTE_SECONDS:
 537                 multiplier = NSEC_PER_SEC;
 538                 break;
 539         case NOTE_USECONDS:
 540                 multiplier = NSEC_PER_USEC;
 541                 break;
 542         case NOTE_NSECONDS:
 543                 multiplier = 1;
 544                 break;
 545         case 0: /* milliseconds (default) */
 546                 multiplier = NSEC_PER_SEC / 1000;
 547                 break;
 548         default:
 549                 return EINVAL;
 550         }
 551         nanoseconds_to_absolutetime((uint64_t)kn->kn_sdata * multiplier, &raw);
 552         if (raw <= filt_timerfloor) {
 553                 *abs_time = 0;
 554                 return 0;
 555         }
 556         if ((kn->kn_sfflags & NOTE_ABSOLUTE) == NOTE_ABSOLUTE) {
 557                 uint32_t seconds, nanoseconds;
 558                 uint64_t now;
 559
 560                 clock_get_calendar_nanotime(&seconds, &nanoseconds);
 561                 nanoseconds_to_absolutetime((uint64_t)seconds * NSEC_PER_SEC + nanoseconds,
 562                                             &now);
 563                 if (now >= raw + filt_timerfloor) {
 564                         *abs_time = 0;
 565                         return 0;
 566                 }
 567                 raw -= now;
 568         }
 569         clock_absolutetime_interval_to_deadline(raw, abs_time);
 570         return 0;
 571 }
 572
 573 /*
 574  * filt_timerexpire - the timer callout routine
 575  *
 576  *      Just propagate the timer event into the knote
 577  *      filter routine (by going through the knote
 578  *      synchronization point).  Pass a hint to
 579  *      indicate this is a real event, not just a
 580  *      query from above.
 581  */
 582 static void
 583 filt_timerexpire(void *knx, __unused void *spare)
 584 {
 585         struct klist timer_list;
 586         struct knote *kn = knx;
 587
 588         /* no "object" for timers, so fake a list */
 589         SLIST_INIT(&timer_list);
 590         SLIST_INSERT_HEAD(&timer_list, kn, kn_selnext);
 591         KNOTE(&timer_list, 1);
 592 }
 593
 594 /*
 595  * data contains amount of time to sleep, in milliseconds,
 596  * or a pointer to a timespec structure.
 597  */
 598 static int
 599 filt_timerattach(struct knote *kn)
 600 {
 601         thread_call_t callout;
 602         uint64_t deadline;
 603         int error;
 604
 605         error = filt_timercompute(kn, &deadline);
 606         if (error)
 607                 return (error);
 608
 609         if (deadline) {
 610                 callout = thread_call_allocate(filt_timerexpire, kn);
 611                 if (NULL == callout)
 612                         return (ENOMEM);
 613         } else {
 614                 /* handle as immediate */
 615                 kn->kn_sdata = 0;
 616                 callout = NULL;
 617         }
 618
 619         filt_timerlock();
 620         kn->kn_hook = (caddr_t)callout;
 621
 622         /* absolute=EV_ONESHOT */
 623         if (kn->kn_sfflags & NOTE_ABSOLUTE)
 624                 kn->kn_flags |= EV_ONESHOT;
 625
 626         if (deadline) {
 627                 /* all others - if not faking immediate */
 628                 kn->kn_flags |= EV_CLEAR;
 629                 thread_call_enter_delayed(callout, deadline);
 630                 kn->kn_hookid = 0;
 631         } else {
 632                 /* fake immediate */
 633                 kn->kn_hookid = 1;
 634         }
 635         filt_timerunlock();
 636         return (0);
 637 }
 638
 639 static void
 640 filt_timerdetach(struct knote *kn)
 641 {
 642         thread_call_t callout;
 643
 644         filt_timerlock();
 645         callout = (thread_call_t)kn->kn_hook;
 646         if (callout != NULL) {
 647                 boolean_t cancelled;
 648
 649                 /* cancel the callout if we can */
 650                 cancelled = thread_call_cancel(callout);
 651                 if (cancelled) {
 652                         /* got it, just free it */
 653                         kn->kn_hook = NULL;
 654                         filt_timerunlock();
 655                         thread_call_free(callout);
 656                         return;
 657                 }
 658                 /* we have to wait for the expire routine.  */
 659                 kn->kn_hookid = -1;     /* we are detaching */
 660                 assert_wait(&kn->kn_hook, THREAD_UNINT);
 661                 filt_timerunlock();
 662                 thread_block(THREAD_CONTINUE_NULL);
 663                 assert(kn->kn_hook == NULL);
 664                 return;
 665         }
 666         /* nothing to do */
 667         filt_timerunlock();
 668 }
 669
 670
 671
 672 static int
 673 filt_timer(struct knote *kn, __unused long hint)
 674 {
 675         int result;
 676
 677         if (hint) {
 678                 /* real timer pop */
 679                 thread_call_t callout;
 680                 boolean_t detaching;
 681
 682                 filt_timerlock();
 683
 684                 kn->kn_data++;
 685
 686                 detaching = (kn->kn_hookid < 0);
 687                 callout = (thread_call_t)kn->kn_hook;
 688
 689                 if (!detaching && (kn->kn_flags & EV_ONESHOT) == 0) {
 690                         uint64_t deadline;
 691                         int error;
 692
 693                         /* user input data may have changed - deal */
 694                         error = filt_timercompute(kn, &deadline);
 695                         if (error) {
 696                                 kn->kn_flags |= EV_ERROR;
 697                                 kn->kn_data = error;
 698                         } else if (deadline == 0) {
 699                                 /* revert to fake immediate */
 700                                 kn->kn_flags &= ~EV_CLEAR;
 701                                 kn->kn_sdata = 0;
 702                                 kn->kn_hookid = 1;
 703                         } else {
 704                                 /* keep the callout and re-arm */
 705                                 thread_call_enter_delayed(callout, deadline);
 706                                 filt_timerunlock();
 707                                 return 1;
 708                         }
 709                 }
 710                 kn->kn_hook = NULL;
 711                 filt_timerunlock();
 712                 thread_call_free(callout);
 713
 714                 /* if someone is waiting for timer to pop */
 715                 if (detaching)
 716                         thread_wakeup(&kn->kn_hook);
 717
 718                 return 1;
 719         }
 720
 721         /* user-query */
 722         filt_timerlock();
 723
 724         /* change fake timer to real if needed */
 725         while (kn->kn_hookid > 0 && kn->kn_sdata > 0) {
 726                 int error;
 727
 728                 /* update the fake timer (make real) */
 729                 kn->kn_hookid = 0;
 730                 kn->kn_data = 0;
 731                 filt_timerunlock();
 732                 error = filt_timerattach(kn);
 733                 filt_timerlock();
 734                 if (error) {
 735                         kn->kn_flags |= EV_ERROR;
 736                         kn->kn_data = error;
 737                         filt_timerunlock();
 738                         return 1;
 739                 }
 740         }
 741
 742         /* if still fake, pretend it fired */
 743         if (kn->kn_hookid > 0)
 744                 kn->kn_data = 1;
 745
 746         result = (kn->kn_data != 0);
 747         filt_timerunlock();
 748         return result;
 749 }
 750
 751 static void
 752 filt_timerlock(void)
 753 {
 754         lck_mtx_lock(&_filt_timerlock);
 755 }
 756
 757 static void
 758 filt_timerunlock(void)
 759 {
 760         lck_mtx_unlock(&_filt_timerlock);
 761 }
 762
 763 /*
 764  * JMM - placeholder for not-yet-implemented filters
 765  */
 766 static int
 767 filt_badattach(__unused struct knote *kn)
 768 {
 769         return(ENOTSUP);
 770 }
 771
 772
 773 struct kqueue *
 774 kqueue_alloc(struct proc *p)
 775 {
 776         struct filedesc *fdp = p->p_fd;
 777         struct kqueue *kq;
 778
 779         MALLOC_ZONE(kq, struct kqueue *, sizeof(struct kqueue), M_KQUEUE, M_WAITOK);
 780         if (kq != NULL) {
 781                 bzero(kq, sizeof(struct kqueue));
 782                 lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr);
 783                 TAILQ_INIT(&kq->kq_head);
 784                 TAILQ_INIT(&kq->kq_inprocess);
 785                 kq->kq_fdp = fdp;
 786         }
 787
 788         if (fdp->fd_knlistsize < 0) {
 789                 proc_fdlock(p);
 790                 if (fdp->fd_knlistsize < 0)
 791                         fdp->fd_knlistsize = 0;         /* this process has had a kq */
 792                 proc_fdunlock(p);
 793         }
 794
 795         return kq;
 796 }
 797
 798
 799 /*
 800  * kqueue_dealloc - detach all knotes from a kqueue and free it
 801  *
 802  *      We walk each list looking for knotes referencing this
 803  *      this kqueue.  If we find one, we try to drop it.  But
 804  *      if we fail to get a drop reference, that will wait
 805  *      until it is dropped.  So, we can just restart again
 806  *      safe in the assumption that the list will eventually
 807  *      not contain any more references to this kqueue (either
 808  *      we dropped them all, or someone else did).
 809  *
 810  *      Assumes no new events are being added to the kqueue.
 811  *      Nothing locked on entry or exit.
 812  */
 813 void
 814 kqueue_dealloc(struct kqueue *kq, struct proc *p)
 815 {
 816         struct filedesc *fdp = p->p_fd;
 817         struct knote *kn;
 818         int i;
 819
 820         proc_fdlock(p);
 821         for (i = 0; i < fdp->fd_knlistsize; i++) {
 822                 kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 823                 while (kn != NULL) {
 824                         if (kq == kn->kn_kq) {
 825                                 kqlock(kq);
 826                                 proc_fdunlock(p);
 827                                 /* drop it ourselves or wait */
 828                                 if (kqlock2knotedrop(kq, kn)) {
 829                                         kn->kn_fop->f_detach(kn);
 830                                         knote_drop(kn, p);
 831                                 }
 832                                 proc_fdlock(p);
 833                                 /* start over at beginning of list */
 834                                 kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 835                                 continue;
 836                         }
 837                         kn = SLIST_NEXT(kn, kn_link);
 838                 }
 839         }
 840         if (fdp->fd_knhashmask != 0) {
 841                 for (i = 0; i < (int)fdp->fd_knhashmask + 1; i++) {
 842                         kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 843                         while (kn != NULL) {
 844                                 if (kq == kn->kn_kq) {
 845                                         kqlock(kq);
 846                                         proc_fdunlock(p);
 847                                         /* drop it ourselves or wait */
 848                                         if (kqlock2knotedrop(kq, kn)) {
 849                                                 kn->kn_fop->f_detach(kn);
 850                                                 knote_drop(kn, p);
 851                                         }
 852                                         proc_fdlock(p);
 853                                         /* start over at beginning of list */
 854                                         kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 855                                         continue;
 856                                 }
 857                                 kn = SLIST_NEXT(kn, kn_link);
 858                         }
 859                 }
 860         }
 861         proc_fdunlock(p);
 862         lck_spin_destroy(&kq->kq_lock, kq_lck_grp);
 863         FREE_ZONE(kq, sizeof(struct kqueue), M_KQUEUE);
 864 }
 865
 866 int
 867 kqueue(struct proc *p, __unused struct kqueue_args *uap, register_t *retval)
 868 {
 869         struct kqueue *kq;
 870         struct fileproc *fp;
 871         int fd, error;
 872
 873         error = falloc(p, &fp, &fd);
 874         if (error) {
 875                 return (error);
 876         }
 877
 878         kq = kqueue_alloc(p);
 879         if (kq == NULL) {
 880                 fp_free(p, fd, fp);
 881                 return (ENOMEM);
 882         }
 883
 884         fp->f_flag = FREAD | FWRITE;
 885         fp->f_type = DTYPE_KQUEUE;
 886         fp->f_ops = &kqueueops;
 887         fp->f_data = (caddr_t)kq;
 888
 889         proc_fdlock(p);
 890         *fdflags(p, fd) &= ~UF_RESERVED;
 891         fp_drop(p, fd, fp, 1);
 892         proc_fdunlock(p);
 893
 894         *retval = fd;
 895         return (error);
 896 }
 897
 898 int
 899 kqueue_portset_np(__unused struct proc *p,
 900                                   __unused struct kqueue_portset_np_args *uap,
 901                                   __unused register_t *retval)
 902 {
 903                 /* JMM - Placeholder for now */
 904                 return (ENOTSUP);
 905 }
 906
 907 int
 908 kqueue_from_portset_np(__unused struct proc *p,
 909                                            __unused struct kqueue_from_portset_np_args *uap,
 910                                            __unused register_t *retval)
 911 {
 912                 /* JMM - Placeholder for now */
 913                 return (ENOTSUP);
 914 }
 915
 916 static int
 917 kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p)
 918 {
 919         int advance;
 920         int error;
 921
 922         if (IS_64BIT_PROCESS(p)) {
 923                 struct user_kevent kev64;
 924
 925                 advance = sizeof(kev64);
 926                 error = copyin(*addrp, (caddr_t)&kev64, advance);
 927                 if (error)
 928                         return error;
 929                 kevp->ident = CAST_DOWN(uintptr_t, kev64.ident);
 930                 kevp->filter = kev64.filter;
 931                 kevp->flags = kev64.flags;
 932                 kevp->fflags = kev64.fflags;
 933                 kevp->data = CAST_DOWN(intptr_t, kev64.data);
 934                 kevp->udata = kev64.udata;
 935         } else {
 936                 /*
 937                  * compensate for legacy in-kernel kevent layout
 938                  * where the udata field is alredy 64-bit.
 939                  */
 940                 advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t);
 941                 error = copyin(*addrp, (caddr_t)kevp, advance);
 942         }
 943         if (!error)
 944                 *addrp += advance;
 945         return error;
 946 }
 947
 948 static int
 949 kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p)
 950 {
 951         int advance;
 952         int error;
 953
 954         if (IS_64BIT_PROCESS(p)) {
 955                 struct user_kevent kev64;
 956
 957                 kev64.ident = (uint64_t) kevp->ident;
 958                 kev64.filter = kevp->filter;
 959                 kev64.flags = kevp->flags;
 960                 kev64.fflags = kevp->fflags;
 961                 kev64.data = (int64_t) kevp->data;
 962                 kev64.udata = kevp->udata;
 963                 advance = sizeof(kev64);
 964                 error = copyout((caddr_t)&kev64, *addrp, advance);
 965         } else {
 966                 /*
 967                  * compensate for legacy in-kernel kevent layout
 968                  * where the udata field is alredy 64-bit.
 969                  */
 970                 advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t);
 971                 error = copyout((caddr_t)kevp, *addrp, advance);
 972         }
 973         if (!error)
 974                 *addrp += advance;
 975         return error;
 976 }
 977
 978 /*
 979  * kevent_continue - continue a kevent syscall after blocking
 980  *
 981  *      assume we inherit a use count on the kq fileglob.
 982  */
 983
 984 static void
 985 kevent_continue(__unused struct kqueue *kq, void *data, int error)
 986 {
 987         struct _kevent *cont_args;
 988         struct fileproc *fp;
 989         register_t *retval;
 990         int noutputs;
 991         int fd;
 992         struct proc *p = current_proc();
 993
 994         cont_args = (struct _kevent *)data;
 995         noutputs = cont_args->eventout;
 996         retval = cont_args->retval;
 997         fd = cont_args->fd;
 998         fp = cont_args->fp;
 999
1000         fp_drop(p, fd, fp, 0);
1001
1002         /* don't restart after signals... */
1003         if (error == ERESTART)
1004                 error = EINTR;
1005         else if (error == EWOULDBLOCK)
1006                 error = 0;
1007         if (error == 0)
1008                 *retval = noutputs;
1009         unix_syscall_return(error);
1010 }
1011
1012 /*
1013  * kevent - [syscall] register and wait for kernel events
1014  *
1015  */
1016
1017 int
1018 kevent(struct proc *p, struct kevent_args *uap, register_t *retval)
1019 {
1020         user_addr_t changelist = uap->changelist;
1021         user_addr_t ueventlist = uap->eventlist;
1022         int nchanges = uap->nchanges;
1023         int nevents = uap->nevents;
1024         int fd = uap->fd;
1025
1026         struct _kevent *cont_args;
1027         uthread_t ut;
1028         struct kqueue *kq;
1029         struct fileproc *fp;
1030         struct kevent kev;
1031         int error, noutputs;
1032         struct timeval atv;
1033
1034         /* convert timeout to absolute - if we have one */
1035         if (uap->timeout != USER_ADDR_NULL) {
1036                 struct timeval rtv;
1037                 if ( IS_64BIT_PROCESS(p) ) {
1038                         struct user_timespec ts;
1039                         error = copyin( uap->timeout, &ts, sizeof(ts) );
1040                         if ((ts.tv_sec & 0xFFFFFFFF00000000ull) != 0)
1041                                 error = EINVAL;
1042                         else
1043                                 TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1044                 } else {
1045                         struct timespec ts;
1046                         error = copyin( uap->timeout, &ts, sizeof(ts) );
1047                         TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1048                 }
1049                 if (error)
1050                         return error;
1051                 if (itimerfix(&rtv))
1052                         return EINVAL;
1053                 getmicrouptime(&atv);
1054                 timevaladd(&atv, &rtv);
1055         } else {
1056                 atv.tv_sec = 0;
1057                 atv.tv_usec = 0;
1058         }
1059
1060         /* get a usecount for the kq itself */
1061         if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0)
1062                 return(error);
1063
1064         /* register all the change requests the user provided... */
1065         noutputs = 0;
1066         while (nchanges > 0 && error == 0) {
1067                 error = kevent_copyin(&changelist, &kev, p);
1068                 if (error)
1069                         break;
1070
1071                 kev.flags &= ~EV_SYSFLAGS;
1072                 error = kevent_register(kq, &kev, p);
1073                 if (error && nevents > 0) {
1074                         kev.flags = EV_ERROR;
1075                         kev.data = error;
1076                         error = kevent_copyout(&kev, &ueventlist, p);
1077                         if (error == 0) {
1078                                 nevents--;
1079                                 noutputs++;
1080                         }
1081                 }
1082                 nchanges--;
1083         }
1084
1085         /* store the continuation/completion data in the uthread */
1086         ut = (uthread_t)get_bsdthread_info(current_thread());
1087         cont_args = (struct _kevent *)&ut->uu_state.ss_kevent;
1088         cont_args->fp = fp;
1089         cont_args->fd = fd;
1090         cont_args->retval = retval;
1091         cont_args->eventlist = ueventlist;
1092         cont_args->eventcount = nevents;
1093         cont_args->eventout = noutputs;
1094
1095         if (nevents > 0 && noutputs == 0 && error == 0)
1096                 error = kevent_scan(kq, kevent_callback,
1097                                     kevent_continue, cont_args,
1098                                     &atv, p);
1099         kevent_continue(kq, cont_args, error);
1100         /* NOTREACHED */
1101         return error;
1102 }
1103
1104
1105 /*
1106  * kevent_callback - callback for each individual event
1107  *
1108  *      called with nothing locked
1109  *      caller holds a reference on the kqueue
1110  */
1111
1112 static int
1113 kevent_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data)
1114 {
1115         struct _kevent *cont_args;
1116         int error;
1117
1118         cont_args = (struct _kevent *)data;
1119         assert(cont_args->eventout < cont_arg->eventcount);
1120
1121         /*
1122          * Copy out the appropriate amount of event data for this user.
1123          */
1124         error = kevent_copyout(kevp, &cont_args->eventlist, current_proc());
1125
1126         /*
1127          * If there isn't space for additional events, return
1128          * a harmless error to stop the processing here
1129          */
1130         if (error == 0 && ++cont_args->eventout == cont_args->eventcount)
1131                         error = EWOULDBLOCK;
1132         return error;
1133 }
1134
1135 /*
1136  * kevent_register - add a new event to a kqueue
1137  *
1138  *      Creates a mapping between the event source and
1139  *      the kqueue via a knote data structure.
1140  *
1141  *      Because many/most the event sources are file
1142  *      descriptor related, the knote is linked off
1143  *      the filedescriptor table for quick access.
1144  *
1145  *      called with nothing locked
1146  *      caller holds a reference on the kqueue
1147  */
1148
1149 int
1150 kevent_register(struct kqueue *kq, struct kevent *kev, struct proc *p)
1151 {
1152         struct filedesc *fdp = kq->kq_fdp;
1153         struct filterops *fops;
1154         struct fileproc *fp = NULL;
1155         struct knote *kn = NULL;
1156         int error = 0;
1157
1158         if (kev->filter < 0) {
1159                 if (kev->filter + EVFILT_SYSCOUNT < 0)
1160                         return (EINVAL);
1161                 fops = sysfilt_ops[~kev->filter];       /* to 0-base index */
1162         } else {
1163                 /*
1164                  * XXX
1165                  * filter attach routine is responsible for insuring that
1166                  * the identifier can be attached to it.
1167                  */
1168                 printf("unknown filter: %d\n", kev->filter);
1169                 return (EINVAL);
1170         }
1171
1172         /* this iocount needs to be dropped if it is not registered */
1173         if (fops->f_isfd && (error = fp_lookup(p, kev->ident, &fp, 0)) != 0)
1174                 return(error);
1175
1176  restart:
1177         proc_fdlock(p);
1178         if (fops->f_isfd) {
1179                 /* fd-based knotes are linked off the fd table */
1180                 if (kev->ident < (u_int)fdp->fd_knlistsize) {
1181                         SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
1182                                 if (kq == kn->kn_kq &&
1183                                     kev->filter == kn->kn_filter)
1184                                         break;
1185                 }
1186         } else {
1187                 /* hash non-fd knotes here too */
1188                 if (fdp->fd_knhashmask != 0) {
1189                         struct klist *list;
1190
1191                         list = &fdp->fd_knhash[
1192                             KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
1193                         SLIST_FOREACH(kn, list, kn_link)
1194                                 if (kev->ident == kn->kn_id &&
1195                                     kq == kn->kn_kq &&
1196                                     kev->filter == kn->kn_filter)
1197                                         break;
1198                 }
1199         }
1200
1201         /*
1202          * kn now contains the matching knote, or NULL if no match
1203          */
1204         if (kn == NULL) {
1205                 if ((kev->flags & (EV_ADD|EV_DELETE)) == EV_ADD) {
1206                         kn = knote_alloc();
1207                         if (kn == NULL) {
1208                                 proc_fdunlock(p);
1209                                 error = ENOMEM;
1210                                 goto done;
1211                         }
1212                         kn->kn_fp = fp;
1213                         kn->kn_kq = kq;
1214                         kn->kn_tq = &kq->kq_head;
1215                         kn->kn_fop = fops;
1216                         kn->kn_sfflags = kev->fflags;
1217                         kn->kn_sdata = kev->data;
1218                         kev->fflags = 0;
1219                         kev->data = 0;
1220                         kn->kn_kevent = *kev;
1221                         kn->kn_inuse = 1;  /* for f_attach() */
1222                         kn->kn_status = 0;
1223
1224                         /* before anyone can find it */
1225                         if (kev->flags & EV_DISABLE)
1226                                 kn->kn_status |= KN_DISABLED;
1227
1228                         error = knote_fdpattach(kn, fdp, p);
1229                         proc_fdunlock(p);
1230
1231                         if (error) {
1232                                 knote_free(kn);
1233                                 goto done;
1234                         }
1235
1236                         /*
1237                          * apply reference count to knote structure, and
1238                          * do not release it at the end of this routine.
1239                          */
1240                         fp = NULL;
1241
1242                         /*
1243                          * If the attach fails here, we can drop it knowing
1244                          * that nobody else has a reference to the knote.
1245                          */
1246                         if ((error = fops->f_attach(kn)) != 0) {
1247                                 knote_drop(kn, p);
1248                                 goto done;
1249                         }
1250                 } else {
1251                         proc_fdunlock(p);
1252                         error = ENOENT;
1253                         goto done;
1254                 }
1255         } else {
1256                 /* existing knote - get kqueue lock */
1257                 kqlock(kq);
1258                 proc_fdunlock(p);
1259
1260                 if (kev->flags & EV_DELETE) {
1261                         knote_dequeue(kn);
1262                         kn->kn_status |= KN_DISABLED;
1263                         if (kqlock2knotedrop(kq, kn)) {
1264                                 kn->kn_fop->f_detach(kn);
1265                                 knote_drop(kn, p);
1266                         }
1267                         goto done;
1268                 }
1269
1270                 /* update status flags for existing knote */
1271                 if (kev->flags & EV_DISABLE) {
1272                         knote_dequeue(kn);
1273                         kn->kn_status |= KN_DISABLED;
1274                 } else if (kev->flags & EV_ENABLE) {
1275                         kn->kn_status &= ~KN_DISABLED;
1276                         if (kn->kn_status & KN_ACTIVE)
1277                                 knote_enqueue(kn);
1278                 }
1279
1280                 /*
1281                  * If somebody is in the middle of dropping this
1282                  * knote - go find/insert a new one.  But we have
1283                  * wait for this one to go away first.
1284                  */
1285                 if (!kqlock2knoteusewait(kq, kn))
1286                         /* kqueue unlocked */
1287                         goto restart;
1288
1289                 /*
1290                  * The user may change some filter values after the
1291                  * initial EV_ADD, but doing so will not reset any
1292                  * filter which have already been triggered.
1293                  */
1294                 kn->kn_sfflags = kev->fflags;
1295                 kn->kn_sdata = kev->data;
1296                 kn->kn_kevent.udata = kev->udata;
1297         }
1298
1299         /* still have use ref on knote */
1300         if (kn->kn_fop->f_event(kn, 0)) {
1301                 if (knoteuse2kqlock(kq, kn))
1302                         knote_activate(kn);
1303                 kqunlock(kq);
1304         } else {
1305                 knote_put(kn);
1306         }
1307
1308 done:
1309         if (fp != NULL)
1310                 fp_drop(p, kev->ident, fp, 0);
1311         return (error);
1312 }
1313
1314 /*
1315  * kevent_process - process the triggered events in a kqueue
1316  *
1317  *      Walk the queued knotes and validate that they are
1318  *      really still triggered events by calling the filter
1319  *      routines (if necessary).  Hold a use reference on
1320  *      the knote to avoid it being detached. For each event
1321  *      that is still considered triggered, invoke the
1322  *      callback routine provided.
1323  *
1324  *      caller holds a reference on the kqueue.
1325  *      kqueue locked on entry and exit - but may be dropped
1326  */
1327
1328 static int
1329 kevent_process(struct kqueue *kq,
1330                kevent_callback_t callback,
1331                void *data,
1332                int *countp,
1333                struct proc *p)
1334 {
1335         struct knote *kn;
1336         struct kevent kev;
1337         int nevents;
1338         int error;
1339
1340  restart:
1341         if (kq->kq_count == 0) {
1342                 *countp = 0;
1343                 return 0;
1344         }
1345
1346         /* if someone else is processing the queue, wait */
1347         if (!TAILQ_EMPTY(&kq->kq_inprocess)) {
1348                 assert_wait(&kq->kq_inprocess, THREAD_UNINT);
1349                 kq->kq_state |= KQ_PROCWAIT;
1350                 kqunlock(kq);
1351                 thread_block(THREAD_CONTINUE_NULL);
1352                 kqlock(kq);
1353                 goto restart;
1354         }
1355
1356         error = 0;
1357         nevents = 0;
1358         while (error == 0 &&
1359                (kn = TAILQ_FIRST(&kq->kq_head)) != NULL) {
1360
1361                 /*
1362                  * move knote to the processed queue.
1363                  * this is also protected by the kq lock.
1364                  */
1365                 assert(kn->kn_tq == &kq->kq_head);
1366                 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1367                 kn->kn_tq = &kq->kq_inprocess;
1368                 TAILQ_INSERT_TAIL(&kq->kq_inprocess, kn, kn_tqe);
1369
1370                 /*
1371                  * Non-EV_ONESHOT events must be re-validated.
1372                  *
1373                  * Convert our lock to a use-count and call the event's
1374                  * filter routine to update.
1375                  *
1376                  * If the event is dropping (or no longer valid), we
1377                  * already have it off the active queue, so just
1378                  * finish the job of deactivating it.
1379                  */
1380                 if ((kn->kn_flags & EV_ONESHOT) == 0) {
1381                         int result;
1382
1383                         if (kqlock2knoteuse(kq, kn)) {
1384
1385                                 /* call the filter with just a ref */
1386                                 result = kn->kn_fop->f_event(kn, 0);
1387
1388                                 if (!knoteuse2kqlock(kq, kn) || result == 0) {
1389                                         knote_deactivate(kn);
1390                                         continue;
1391                                 }
1392                         } else {
1393                                 knote_deactivate(kn);
1394                                 continue;
1395                         }
1396                 }
1397
1398                 /*
1399                  * Got a valid triggered knote with the kqueue
1400                  * still locked.  Snapshot the data, and determine
1401                  * how to dispatch the knote for future events.
1402                  */
1403                 kev = kn->kn_kevent;
1404
1405                 /* now what happens to it? */
1406                 if (kn->kn_flags & EV_ONESHOT) {
1407                         knote_deactivate(kn);
1408                         if (kqlock2knotedrop(kq, kn)) {
1409                                 kn->kn_fop->f_detach(kn);
1410                                 knote_drop(kn, p);
1411                         }
1412                 } else if (kn->kn_flags & EV_CLEAR) {
1413                         knote_deactivate(kn);
1414                         kn->kn_data = 0;
1415                         kn->kn_fflags = 0;
1416                         kqunlock(kq);
1417                 } else {
1418                         /*
1419                          * leave on in-process queue.  We'll
1420                          * move all the remaining ones back
1421                          * the kq queue and wakeup any
1422                          * waiters when we are done.
1423                          */
1424                         kqunlock(kq);
1425                 }
1426
1427                 /* callback to handle each event as we find it */
1428                 error = (callback)(kq, &kev, data);
1429                 nevents++;
1430
1431                 kqlock(kq);
1432         }
1433
1434         /*
1435          * With the kqueue still locked, move any knotes
1436          * remaining on the in-process queue back to the
1437          * kq's queue and wake up any waiters.
1438          */
1439         while ((kn = TAILQ_FIRST(&kq->kq_inprocess)) != NULL) {
1440                 assert(kn->kn_tq == &kq->kq_inprocess);
1441                 TAILQ_REMOVE(&kq->kq_inprocess, kn, kn_tqe);
1442                 kn->kn_tq = &kq->kq_head;
1443                 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1444         }
1445         if (kq->kq_state & KQ_PROCWAIT) {
1446                 kq->kq_state &= ~KQ_PROCWAIT;
1447                 thread_wakeup(&kq->kq_inprocess);
1448         }
1449
1450         *countp = nevents;
1451         return error;
1452 }
1453
1454
1455 static void
1456 kevent_scan_continue(void *data, wait_result_t wait_result)
1457 {
1458         uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
1459         struct _kevent_scan * cont_args = &ut->uu_state.ss_kevent_scan;
1460         struct kqueue *kq = (struct kqueue *)data;
1461         int error;
1462         int count;
1463
1464         /* convert the (previous) wait_result to a proper error */
1465         switch (wait_result) {
1466         case THREAD_AWAKENED:
1467                 kqlock(kq);
1468                 error = kevent_process(kq, cont_args->call, cont_args, &count, current_proc());
1469                 if (error == 0 && count == 0) {
1470                         assert_wait_deadline(kq, THREAD_ABORTSAFE, cont_args->deadline);
1471                         kq->kq_state |= KQ_SLEEP;
1472                         kqunlock(kq);
1473                         thread_block_parameter(kevent_scan_continue, kq);
1474                         /* NOTREACHED */
1475                 }
1476                 kqunlock(kq);
1477                 break;
1478         case THREAD_TIMED_OUT:
1479                 error = EWOULDBLOCK;
1480                 break;
1481         case THREAD_INTERRUPTED:
1482                 error = EINTR;
1483                 break;
1484         default:
1485                 panic("kevent_scan_cont() - invalid wait_result (%d)", wait_result);
1486                 error = 0;
1487         }
1488
1489         /* call the continuation with the results */
1490         assert(cont_args->cont != NULL);
1491         (cont_args->cont)(kq, cont_args->data, error);
1492 }
1493
1494
1495 /*
1496  * kevent_scan - scan and wait for events in a kqueue
1497  *
1498  *      Process the triggered events in a kqueue.
1499  *
1500  *      If there are no events triggered arrange to
1501  *      wait for them. If the caller provided a
1502  *      continuation routine, then kevent_scan will
1503  *      also.
1504  *
1505  *      The callback routine must be valid.
1506  *      The caller must hold a use-count reference on the kq.
1507  */
1508
1509 int
1510 kevent_scan(struct kqueue *kq,
1511             kevent_callback_t callback,
1512             kevent_continue_t continuation,
1513             void *data,
1514             struct timeval *atvp,
1515             struct proc *p)
1516 {
1517         thread_continue_t cont = THREAD_CONTINUE_NULL;
1518         uint64_t deadline;
1519         int error;
1520         int first;
1521
1522         assert(callback != NULL);
1523
1524         first = 1;
1525         for (;;) {
1526                 wait_result_t wait_result;
1527                 int count;
1528
1529                 /*
1530                  * Make a pass through the kq to find events already
1531                  * triggered.
1532                  */
1533                 kqlock(kq);
1534                 error = kevent_process(kq, callback, data, &count, p);
1535                 if (error || count)
1536                         break; /* lock still held */
1537
1538                 /* looks like we have to consider blocking */
1539                 if (first) {
1540                         first = 0;
1541                         /* convert the timeout to a deadline once */
1542                         if (atvp->tv_sec || atvp->tv_usec) {
1543                                 uint32_t seconds, nanoseconds;
1544                                 uint64_t now;
1545
1546                                 clock_get_uptime(&now);
1547                                 nanoseconds_to_absolutetime((uint64_t)atvp->tv_sec * NSEC_PER_SEC +
1548                                                             atvp->tv_usec * NSEC_PER_USEC,
1549                                                             &deadline);
1550                                 if (now >= deadline) {
1551                                         /* non-blocking call */
1552                                         error = EWOULDBLOCK;
1553                                         break; /* lock still held */
1554                                 }
1555                                 deadline -= now;
1556                                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
1557                         } else {
1558                                 deadline = 0;   /* block forever */
1559                         }
1560
1561                         if (continuation) {
1562                                 uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
1563                                 struct _kevent_scan *cont_args = &ut->uu_state.ss_kevent_scan;
1564
1565                                 cont_args->call = callback;
1566                                 cont_args->cont = continuation;
1567                                 cont_args->deadline = deadline;
1568                                 cont_args->data = data;
1569                                 cont = kevent_scan_continue;
1570                         }
1571                 }
1572
1573                 /* go ahead and wait */
1574                 assert_wait_deadline(kq, THREAD_ABORTSAFE, deadline);
1575                 kq->kq_state |= KQ_SLEEP;
1576                 kqunlock(kq);
1577                 wait_result = thread_block_parameter(cont, kq);
1578                 /* NOTREACHED if (continuation != NULL) */
1579
1580                 switch (wait_result) {
1581                 case THREAD_AWAKENED:
1582                         continue;
1583                 case THREAD_TIMED_OUT:
1584                         return EWOULDBLOCK;
1585                 case THREAD_INTERRUPTED:
1586                         return EINTR;
1587                 default:
1588                         panic("kevent_scan - bad wait_result (%d)",
1589                               wait_result);
1590                         error = 0;
1591                 }
1592         }
1593         kqunlock(kq);
1594         return error;
1595 }
1596
1597
1598 /*
1599  * XXX
1600  * This could be expanded to call kqueue_scan, if desired.
1601  */
1602 /*ARGSUSED*/
1603 static int
1604 kqueue_read(__unused struct fileproc *fp,
1605                         __unused struct uio *uio,
1606                         __unused kauth_cred_t cred,
1607                         __unused int flags,
1608                         __unused struct proc *p)
1609 {
1610         return (ENXIO);
1611 }
1612
1613 /*ARGSUSED*/
1614 static int
1615 kqueue_write(__unused struct fileproc *fp,
1616                          __unused struct uio *uio,
1617                          __unused kauth_cred_t cred,
1618                          __unused int flags,
1619                          __unused struct proc *p)
1620 {
1621         return (ENXIO);
1622 }
1623
1624 /*ARGSUSED*/
1625 static int
1626 kqueue_ioctl(__unused struct fileproc *fp,
1627                          __unused u_long com,
1628                          __unused caddr_t data,
1629                          __unused struct proc *p)
1630 {
1631         return (ENOTTY);
1632 }
1633
1634 /*ARGSUSED*/
1635 static int
1636 kqueue_select(struct fileproc *fp, int which, void *wql, struct proc *p)
1637 {
1638         struct kqueue *kq = (struct kqueue *)fp->f_data;
1639         int retnum = 0;
1640
1641         if (which == FREAD) {
1642                 kqlock(kq);
1643                 if (kq->kq_count) {
1644                         retnum = 1;
1645                 } else {
1646                         selrecord(p, &kq->kq_sel, wql);
1647                         kq->kq_state |= KQ_SEL;
1648                 }
1649                 kqunlock(kq);
1650         }
1651         return (retnum);
1652 }
1653
1654 /*
1655  * kqueue_close -
1656  */
1657 /*ARGSUSED*/
1658 static int
1659 kqueue_close(struct fileglob *fg, struct proc *p)
1660 {
1661         struct kqueue *kq = (struct kqueue *)fg->fg_data;
1662
1663         kqueue_dealloc(kq, p);
1664         fg->fg_data = NULL;
1665         return (0);
1666 }
1667
1668 /*ARGSUSED*/
1669 /*
1670  * The callers has taken a use-count reference on this kqueue and will donate it
1671  * to the kqueue we are being added to.  This keeps the kqueue from closing until
1672  * that relationship is torn down.
1673  */
1674 static int
1675 kqueue_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
1676 {
1677         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
1678
1679         if (kn->kn_filter != EVFILT_READ)
1680                 return (1);
1681
1682         kn->kn_fop = &kqread_filtops;
1683         kqlock(kq);
1684         KNOTE_ATTACH(&kq->kq_sel.si_note, kn);
1685         kqunlock(kq);
1686         return (0);
1687 }
1688
1689 /*ARGSUSED*/
1690 int
1691 kqueue_stat(struct fileproc *fp, struct stat *st, __unused struct proc *p)
1692 {
1693         struct kqueue *kq = (struct kqueue *)fp->f_data;
1694
1695         bzero((void *)st, sizeof(*st));
1696         st->st_size = kq->kq_count;
1697         st->st_blksize = sizeof(struct kevent);
1698         st->st_mode = S_IFIFO;
1699         return (0);
1700 }
1701
1702 /*
1703  * Called with the kqueue locked
1704  */
1705 static void
1706 kqueue_wakeup(struct kqueue *kq)
1707 {
1708
1709         if (kq->kq_state & KQ_SLEEP) {
1710                 kq->kq_state &= ~KQ_SLEEP;
1711                 thread_wakeup(kq);
1712         }
1713         if (kq->kq_state & KQ_SEL) {
1714                 kq->kq_state &= ~KQ_SEL;
1715                 selwakeup(&kq->kq_sel);
1716         }
1717         KNOTE(&kq->kq_sel.si_note, 0);
1718 }
1719
1720 void
1721 klist_init(struct klist *list)
1722 {
1723         SLIST_INIT(list);
1724 }
1725
1726
1727 /*
1728  * Query/Post each knote in the object's list
1729  *
1730  *      The object lock protects the list. It is assumed
1731  *      that the filter/event routine for the object can
1732  *      determine that the object is already locked (via
1733  *      the hind) and not deadlock itself.
1734  *
1735  *      The object lock should also hold off pending
1736  *      detach/drop operations.  But we'll prevent it here
1737  *      too - just in case.
1738  */
1739 void
1740 knote(struct klist *list, long hint)
1741 {
1742         struct knote *kn;
1743
1744         SLIST_FOREACH(kn, list, kn_selnext) {
1745                 struct kqueue *kq = kn->kn_kq;
1746
1747                 kqlock(kq);
1748                 if (kqlock2knoteuse(kq, kn)) {
1749                         int result;
1750
1751                         /* call the event with only a use count */
1752                         result = kn->kn_fop->f_event(kn, hint);
1753
1754                         /* if its not going away and triggered */
1755                         if (knoteuse2kqlock(kq, kn) && result)
1756                                 knote_activate(kn);
1757                         /* lock held again */
1758                 }
1759                 kqunlock(kq);
1760         }
1761 }
1762
1763 /*
1764  * attach a knote to the specified list.  Return true if this is the first entry.
1765  * The list is protected by whatever lock the object it is associated with uses.
1766  */
1767 int
1768 knote_attach(struct klist *list, struct knote *kn)
1769 {
1770         int ret = SLIST_EMPTY(list);
1771         SLIST_INSERT_HEAD(list, kn, kn_selnext);
1772         return ret;
1773 }
1774
1775 /*
1776  * detach a knote from the specified list.  Return true if that was the last entry.
1777  * The list is protected by whatever lock the object it is associated with uses.
1778  */
1779 int
1780 knote_detach(struct klist *list, struct knote *kn)
1781 {
1782         SLIST_REMOVE(list, kn, knote, kn_selnext);
1783         return SLIST_EMPTY(list);
1784 }
1785
1786 /*
1787  * remove all knotes referencing a specified fd
1788  *
1789  * Essentially an inlined knote_remove & knote_drop
1790  * when we know for sure that the thing is a file
1791  *
1792  * Entered with the proc_fd lock already held.
1793  * It returns the same way, but may drop it temporarily.
1794  */
1795 void
1796 knote_fdclose(struct proc *p, int fd)
1797 {
1798         struct filedesc *fdp = p->p_fd;
1799         struct klist *list;
1800         struct knote *kn;
1801
1802         list = &fdp->fd_knlist[fd];
1803         while ((kn = SLIST_FIRST(list)) != NULL) {
1804                 struct kqueue *kq = kn->kn_kq;
1805
1806                 kqlock(kq);
1807                 proc_fdunlock(p);
1808
1809                 /*
1810                  * Convert the lock to a drop ref.
1811                  * If we get it, go ahead and drop it.
1812                  * Otherwise, we waited for it to
1813                  * be dropped by the other guy, so
1814                  * it is safe to move on in the list.
1815                  */
1816                 if (kqlock2knotedrop(kq, kn)) {
1817                         kn->kn_fop->f_detach(kn);
1818                         knote_drop(kn, p);
1819                 }
1820
1821                 proc_fdlock(p);
1822
1823                 /* the fd tables may have changed - start over */
1824                 list = &fdp->fd_knlist[fd];
1825         }
1826 }
1827
1828 /* proc_fdlock held on entry (and exit) */
1829 static int
1830 knote_fdpattach(struct knote *kn, struct filedesc *fdp, __unused struct proc *p)
1831 {
1832         struct klist *list = NULL;
1833
1834         if (! kn->kn_fop->f_isfd) {
1835                 if (fdp->fd_knhashmask == 0)
1836                         fdp->fd_knhash = hashinit(KN_HASHSIZE, M_KQUEUE,
1837                             &fdp->fd_knhashmask);
1838                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1839         } else {
1840                 if ((u_int)fdp->fd_knlistsize <= kn->kn_id) {
1841                         u_int size = 0;
1842
1843                         /* have to grow the fd_knlist */
1844                         size = fdp->fd_knlistsize;
1845                         while (size <= kn->kn_id)
1846                                 size += KQEXTENT;
1847                         MALLOC(list, struct klist *,
1848                                size * sizeof(struct klist *), M_KQUEUE, M_WAITOK);
1849                         if (list == NULL)
1850                                 return (ENOMEM);
1851
1852                         bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list,
1853                               fdp->fd_knlistsize * sizeof(struct klist *));
1854                         bzero((caddr_t)list +
1855                               fdp->fd_knlistsize * sizeof(struct klist *),
1856                               (size - fdp->fd_knlistsize) * sizeof(struct klist *));
1857                         FREE(fdp->fd_knlist, M_KQUEUE);
1858                         fdp->fd_knlist = list;
1859                         fdp->fd_knlistsize = size;
1860                 }
1861                 list = &fdp->fd_knlist[kn->kn_id];
1862         }
1863         SLIST_INSERT_HEAD(list, kn, kn_link);
1864         return (0);
1865 }
1866
1867
1868
1869 /*
1870  * should be called at spl == 0, since we don't want to hold spl
1871  * while calling fdrop and free.
1872  */
1873 static void
1874 knote_drop(struct knote *kn, struct proc *p)
1875 {
1876         struct filedesc *fdp = p->p_fd;
1877         struct kqueue *kq = kn->kn_kq;
1878         struct klist *list;
1879
1880         proc_fdlock(p);
1881         if (kn->kn_fop->f_isfd)
1882                 list = &fdp->fd_knlist[kn->kn_id];
1883         else
1884                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1885
1886         SLIST_REMOVE(list, kn, knote, kn_link);
1887         kqlock(kq);
1888         knote_dequeue(kn);
1889         if (kn->kn_status & KN_DROPWAIT)
1890                 thread_wakeup(&kn->kn_status);
1891         kqunlock(kq);
1892         proc_fdunlock(p);
1893
1894         if (kn->kn_fop->f_isfd)
1895                 fp_drop(p, kn->kn_id, kn->kn_fp, 0);
1896
1897         knote_free(kn);
1898 }
1899
1900 /* called with kqueue lock held */
1901 static void
1902 knote_activate(struct knote *kn)
1903 {
1904         struct kqueue *kq = kn->kn_kq;
1905
1906         kn->kn_status |= KN_ACTIVE;
1907         knote_enqueue(kn);
1908         kqueue_wakeup(kq);
1909  }
1910
1911 /* called with kqueue lock held */
1912 static void
1913 knote_deactivate(struct knote *kn)
1914 {
1915         kn->kn_status &= ~KN_ACTIVE;
1916         knote_dequeue(kn);
1917 }
1918
1919 /* called with kqueue lock held */
1920 static void
1921 knote_enqueue(struct knote *kn)
1922 {
1923         struct kqueue *kq = kn->kn_kq;
1924
1925         if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) {
1926                 struct kqtailq *tq = kn->kn_tq;
1927
1928                 TAILQ_INSERT_TAIL(tq, kn, kn_tqe);
1929                 kn->kn_status |= KN_QUEUED;
1930                 kq->kq_count++;
1931         }
1932 }
1933
1934 /* called with kqueue lock held */
1935 static void
1936 knote_dequeue(struct knote *kn)
1937 {
1938         struct kqueue *kq = kn->kn_kq;
1939
1940         assert((kn->kn_status & KN_DISABLED) == 0);
1941         if ((kn->kn_status & KN_QUEUED) == KN_QUEUED) {
1942                 struct kqtailq *tq = kn->kn_tq;
1943
1944                 TAILQ_REMOVE(tq, kn, kn_tqe);
1945                 kn->kn_tq = &kq->kq_head;
1946                 kn->kn_status &= ~KN_QUEUED;
1947                 kq->kq_count--;
1948         }
1949 }
1950
1951 void
1952 knote_init(void)
1953 {
1954         knote_zone = zinit(sizeof(struct knote), 8192*sizeof(struct knote), 8192, "knote zone");
1955
1956         /* allocate kq lock group attribute and group */
1957         kq_lck_grp_attr= lck_grp_attr_alloc_init();
1958         lck_grp_attr_setstat(kq_lck_grp_attr);
1959
1960         kq_lck_grp = lck_grp_alloc_init("kqueue",  kq_lck_grp_attr);
1961
1962         /* Allocate kq lock attribute */
1963         kq_lck_attr = lck_attr_alloc_init();
1964         lck_attr_setdefault(kq_lck_attr);
1965
1966         /* Initialize the timer filter lock */
1967         lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr);
1968 }
1969 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
1970
1971 static struct knote *
1972 knote_alloc(void)
1973 {
1974         return ((struct knote *)zalloc(knote_zone));
1975 }
1976
1977 static void
1978 knote_free(struct knote *kn)
1979 {
1980         zfree(knote_zone, kn);
1981 }
1982
1983 #include <sys/param.h>
1984 #include <sys/socket.h>
1985 #include <sys/protosw.h>
1986 #include <sys/domain.h>
1987 #include <sys/mbuf.h>
1988 #include <sys/kern_event.h>
1989 #include <sys/malloc.h>
1990 #include <sys/sys_domain.h>
1991 #include <sys/syslog.h>
1992
1993
1994 static int kev_attach(struct socket *so, int proto, struct proc *p);
1995 static int kev_detach(struct socket *so);
1996 static int kev_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p);
1997
1998 struct pr_usrreqs event_usrreqs = {
1999      pru_abort_notsupp, pru_accept_notsupp, kev_attach, pru_bind_notsupp, pru_connect_notsupp,
2000      pru_connect2_notsupp, kev_control, kev_detach, pru_disconnect_notsupp,
2001      pru_listen_notsupp, pru_peeraddr_notsupp, pru_rcvd_notsupp, pru_rcvoob_notsupp,
2002      pru_send_notsupp, pru_sense_null, pru_shutdown_notsupp, pru_sockaddr_notsupp,
2003      pru_sosend_notsupp, soreceive, pru_sopoll_notsupp
2004 };
2005
2006 struct protosw eventsw[] = {
2007      {
2008           SOCK_RAW,             &systemdomain,  SYSPROTO_EVENT,         PR_ATOMIC,
2009           0,            0,              0,              0,
2010           0,
2011           0,            0,              0,              0,
2012 #if __APPLE__
2013           0,
2014 #endif
2015           &event_usrreqs,
2016           0,            0,              0,
2017 #if __APPLE__
2018           {0, 0},       0,              {0}
2019 #endif
2020      }
2021 };
2022
2023 static
2024 struct kern_event_head kern_event_head;
2025
2026 static u_long static_event_id = 0;
2027 struct domain *sysdom = &systemdomain;
2028
2029 static lck_grp_t                *evt_mtx_grp;
2030 static lck_attr_t               *evt_mtx_attr;
2031 static lck_grp_attr_t   *evt_mtx_grp_attr;
2032 lck_mtx_t                               *evt_mutex;
2033 /*
2034  * Install the protosw's for the NKE manager.  Invoked at
2035  *  extension load time
2036  */
2037 int
2038 kern_event_init(void)
2039 {
2040     int retval;
2041
2042     if ((retval = net_add_proto(eventsw, &systemdomain)) != 0) {
2043             log(LOG_WARNING, "Can't install kernel events protocol (%d)\n", retval);
2044             return(retval);
2045         }
2046
2047         /*
2048          * allocate lock group attribute and group for kern event
2049          */
2050         evt_mtx_grp_attr = lck_grp_attr_alloc_init();
2051
2052         evt_mtx_grp = lck_grp_alloc_init("eventlist", evt_mtx_grp_attr);
2053
2054         /*
2055          * allocate the lock attribute for mutexes
2056          */
2057         evt_mtx_attr = lck_attr_alloc_init();
2058         lck_attr_setdefault(evt_mtx_attr);
2059         evt_mutex = lck_mtx_alloc_init(evt_mtx_grp, evt_mtx_attr);
2060         if (evt_mutex == NULL)
2061                         return (ENOMEM);
2062
2063     return(KERN_SUCCESS);
2064 }
2065
2066 static int
2067 kev_attach(struct socket *so, __unused int proto, __unused struct proc *p)
2068 {
2069      int error;
2070      struct kern_event_pcb  *ev_pcb;
2071
2072      error = soreserve(so, KEV_SNDSPACE, KEV_RECVSPACE);
2073      if (error)
2074           return error;
2075
2076      MALLOC(ev_pcb, struct kern_event_pcb *, sizeof(struct kern_event_pcb), M_PCB, M_WAITOK);
2077      if (ev_pcb == 0)
2078           return ENOBUFS;
2079
2080      ev_pcb->ev_socket = so;
2081      ev_pcb->vendor_code_filter = 0xffffffff;
2082
2083      so->so_pcb = (caddr_t) ev_pcb;
2084          lck_mtx_lock(evt_mutex);
2085      LIST_INSERT_HEAD(&kern_event_head, ev_pcb, ev_link);
2086          lck_mtx_unlock(evt_mutex);
2087
2088      return 0;
2089 }
2090
2091
2092 static int
2093 kev_detach(struct socket *so)
2094 {
2095      struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2096
2097      if (ev_pcb != 0) {
2098                 lck_mtx_lock(evt_mutex);
2099                 LIST_REMOVE(ev_pcb, ev_link);
2100                 lck_mtx_unlock(evt_mutex);
2101                 FREE(ev_pcb, M_PCB);
2102                 so->so_pcb = 0;
2103                 so->so_flags |= SOF_PCBCLEARING;
2104      }
2105
2106      return 0;
2107 }
2108
2109 /*
2110  * For now, kev_vender_code and mbuf_tags use the same
2111  * mechanism.
2112  */
2113 extern errno_t mbuf_tag_id_find_internal(const char *string, u_long *out_id,
2114                                                                                  int create);
2115
2116 errno_t kev_vendor_code_find(
2117         const char      *string,
2118         u_long          *out_vender_code)
2119 {
2120         if (strlen(string) >= KEV_VENDOR_CODE_MAX_STR_LEN) {
2121                 return EINVAL;
2122         }
2123         return mbuf_tag_id_find_internal(string, out_vender_code, 1);
2124 }
2125
2126 extern void mbuf_tag_id_first_last(u_long *first, u_long *last);
2127
2128 errno_t  kev_msg_post(struct kev_msg *event_msg)
2129 {
2130         u_long  min_vendor, max_vendor;
2131
2132         mbuf_tag_id_first_last(&min_vendor, &max_vendor);
2133
2134         if (event_msg == NULL)
2135                 return EINVAL;
2136
2137         /* Limit third parties to posting events for registered vendor codes only */
2138         if (event_msg->vendor_code < min_vendor ||
2139                 event_msg->vendor_code > max_vendor)
2140         {
2141                 return EINVAL;
2142         }
2143
2144         return kev_post_msg(event_msg);
2145 }
2146
2147
2148 int  kev_post_msg(struct kev_msg *event_msg)
2149 {
2150      struct mbuf *m, *m2;
2151      struct kern_event_pcb  *ev_pcb;
2152      struct kern_event_msg  *ev;
2153      char              *tmp;
2154      unsigned long     total_size;
2155      int               i;
2156
2157         /* Verify the message is small enough to fit in one mbuf w/o cluster */
2158         total_size = KEV_MSG_HEADER_SIZE;
2159
2160         for (i = 0; i < 5; i++) {
2161                 if (event_msg->dv[i].data_length == 0)
2162                         break;
2163                 total_size += event_msg->dv[i].data_length;
2164         }
2165
2166         if (total_size > MLEN) {
2167                 return EMSGSIZE;
2168         }
2169
2170      m = m_get(M_DONTWAIT, MT_DATA);
2171      if (m == 0)
2172           return ENOBUFS;
2173
2174      ev = mtod(m, struct kern_event_msg *);
2175      total_size = KEV_MSG_HEADER_SIZE;
2176
2177      tmp = (char *) &ev->event_data[0];
2178      for (i = 0; i < 5; i++) {
2179           if (event_msg->dv[i].data_length == 0)
2180                break;
2181
2182           total_size += event_msg->dv[i].data_length;
2183           bcopy(event_msg->dv[i].data_ptr, tmp,
2184                 event_msg->dv[i].data_length);
2185           tmp += event_msg->dv[i].data_length;
2186      }
2187
2188      ev->id = ++static_event_id;
2189      ev->total_size   = total_size;
2190      ev->vendor_code  = event_msg->vendor_code;
2191      ev->kev_class    = event_msg->kev_class;
2192      ev->kev_subclass = event_msg->kev_subclass;
2193      ev->event_code   = event_msg->event_code;
2194
2195      m->m_len = total_size;
2196      lck_mtx_lock(evt_mutex);
2197      for (ev_pcb = LIST_FIRST(&kern_event_head);
2198           ev_pcb;
2199           ev_pcb = LIST_NEXT(ev_pcb, ev_link)) {
2200
2201           if (ev_pcb->vendor_code_filter != KEV_ANY_VENDOR) {
2202                if (ev_pcb->vendor_code_filter != ev->vendor_code)
2203                     continue;
2204
2205                if (ev_pcb->class_filter != KEV_ANY_CLASS) {
2206                     if (ev_pcb->class_filter != ev->kev_class)
2207                          continue;
2208
2209                     if ((ev_pcb->subclass_filter != KEV_ANY_SUBCLASS) &&
2210                         (ev_pcb->subclass_filter != ev->kev_subclass))
2211                          continue;
2212                }
2213           }
2214
2215           m2 = m_copym(m, 0, m->m_len, M_NOWAIT);
2216           if (m2 == 0) {
2217                m_free(m);
2218                    lck_mtx_unlock(evt_mutex);
2219                return ENOBUFS;
2220           }
2221           socket_lock(ev_pcb->ev_socket, 1);
2222           if (sbappendrecord(&ev_pcb->ev_socket->so_rcv, m2))
2223                   sorwakeup(ev_pcb->ev_socket);
2224           socket_unlock(ev_pcb->ev_socket, 1);
2225      }
2226
2227      m_free(m);
2228      lck_mtx_unlock(evt_mutex);
2229      return 0;
2230 }
2231
2232 static int
2233 kev_control(struct socket *so,
2234                         u_long cmd,
2235                         caddr_t data,
2236                         __unused struct ifnet *ifp,
2237                         __unused struct proc *p)
2238 {
2239         struct kev_request *kev_req = (struct kev_request *) data;
2240         struct kern_event_pcb  *ev_pcb;
2241         struct kev_vendor_code *kev_vendor;
2242         u_long  *id_value = (u_long *) data;
2243
2244
2245         switch (cmd) {
2246
2247                 case SIOCGKEVID:
2248                         *id_value = static_event_id;
2249                         break;
2250
2251                 case SIOCSKEVFILT:
2252                         ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2253                         ev_pcb->vendor_code_filter = kev_req->vendor_code;
2254                         ev_pcb->class_filter     = kev_req->kev_class;
2255                         ev_pcb->subclass_filter  = kev_req->kev_subclass;
2256                         break;
2257
2258                 case SIOCGKEVFILT:
2259                         ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2260                         kev_req->vendor_code = ev_pcb->vendor_code_filter;
2261                         kev_req->kev_class   = ev_pcb->class_filter;
2262                         kev_req->kev_subclass = ev_pcb->subclass_filter;
2263                         break;
2264
2265                 case SIOCGKEVVENDOR:
2266                         kev_vendor = (struct kev_vendor_code*)data;
2267
2268                         /* Make sure string is NULL terminated */
2269                         kev_vendor->vendor_string[KEV_VENDOR_CODE_MAX_STR_LEN-1] = 0;
2270
2271                         return mbuf_tag_id_find_internal(kev_vendor->vendor_string,
2272                                                                                          &kev_vendor->vendor_code, 0);
2273
2274                 default:
2275                         return ENOTSUP;
2276         }
2277
2278         return 0;
2279 }
2280
2281
2282
2283