bsd/kern/kern_event.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  *
  23  */
  24 /*-
  25  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
  26  * All rights reserved.
  27  *
  28  * Redistribution and use in source and binary forms, with or without
  29  * modification, are permitted provided that the following conditions
  30  * are met:
  31  * 1. Redistributions of source code must retain the above copyright
  32  *    notice, this list of conditions and the following disclaimer.
  33  * 2. Redistributions in binary form must reproduce the above copyright
  34  *    notice, this list of conditions and the following disclaimer in the
  35  *    documentation and/or other materials provided with the distribution.
  36  *
  37  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  38  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  39  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  40  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  41  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  42  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  43  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  44  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  45  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  46  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47  * SUCH DAMAGE.
  48  */
  49 /*
  50  *      @(#)kern_event.c       1.0 (3/31/2000)
  51  */
  52 #include <stdint.h>
  53
  54 #include <sys/param.h>
  55 #include <sys/systm.h>
  56 #include <sys/filedesc.h>
  57 #include <sys/kernel.h>
  58 #include <sys/proc_internal.h>
  59 #include <sys/kauth.h>
  60 #include <sys/malloc.h>
  61 #include <sys/unistd.h>
  62 #include <sys/file_internal.h>
  63 #include <sys/fcntl.h>
  64 #include <sys/select.h>
  65 #include <sys/queue.h>
  66 #include <sys/event.h>
  67 #include <sys/eventvar.h>
  68 #include <sys/protosw.h>
  69 #include <sys/socket.h>
  70 #include <sys/socketvar.h>
  71 #include <sys/stat.h>
  72 #include <sys/sysctl.h>
  73 #include <sys/uio.h>
  74 #include <sys/sysproto.h>
  75 #include <sys/user.h>
  76 #include <string.h>
  77
  78 #include <kern/lock.h>
  79 #include <kern/clock.h>
  80 #include <kern/thread_call.h>
  81 #include <kern/sched_prim.h>
  82 #include <kern/zalloc.h>
  83 #include <kern/assert.h>
  84
  85 #include <libkern/libkern.h>
  86
  87 extern void unix_syscall_return(int);
  88
  89 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
  90
  91 static inline void kqlock(struct kqueue *kq);
  92 static inline void kqunlock(struct kqueue *kq);
  93
  94 static int      kqlock2knoteuse(struct kqueue *kq, struct knote *kn);
  95 static int      kqlock2knoteusewait(struct kqueue *kq, struct knote *kn);
  96 static int      kqlock2knotedrop(struct kqueue *kq, struct knote *kn);
  97 static int      knoteuse2kqlock(struct kqueue *kq, struct knote *kn);
  98
  99 static void     kqueue_wakeup(struct kqueue *kq);
 100 static int      kqueue_read(struct fileproc *fp, struct uio *uio,
 101                     kauth_cred_t cred, int flags, struct proc *p);
 102 static int      kqueue_write(struct fileproc *fp, struct uio *uio,
 103                     kauth_cred_t cred, int flags, struct proc *p);
 104 static int      kqueue_ioctl(struct fileproc *fp, u_long com, caddr_t data,
 105                     struct proc *p);
 106 static int      kqueue_select(struct fileproc *fp, int which, void *wql,
 107                     struct proc *p);
 108 static int      kqueue_close(struct fileglob *fp, struct proc *p);
 109 static int      kqueue_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
 110 extern int      kqueue_stat(struct fileproc *fp, struct stat *st, struct proc *p);
 111
 112 static struct fileops kqueueops = {
 113         kqueue_read,
 114         kqueue_write,
 115         kqueue_ioctl,
 116         kqueue_select,
 117         kqueue_close,
 118         kqueue_kqfilter,
 119         0
 120 };
 121
 122 static int kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p);
 123 static int kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p);
 124
 125 static int      kevent_callback(struct kqueue *kq, struct kevent *kevp, void *data);
 126 static void     kevent_continue(struct kqueue *kq, void *data, int error);
 127 static void     kevent_scan_continue(void *contp, wait_result_t wait_result);
 128 static int      kevent_process(struct kqueue *kq, kevent_callback_t callback,
 129                                void *data, int *countp, struct proc *p);
 130 static void     knote_put(struct knote *kn);
 131 static int      knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p);
 132 static void     knote_drop(struct knote *kn, struct proc *p);
 133 static void     knote_activate(struct knote *kn);
 134 static void     knote_deactivate(struct knote *kn);
 135 static void     knote_enqueue(struct knote *kn);
 136 static void     knote_dequeue(struct knote *kn);
 137 static struct   knote *knote_alloc(void);
 138 static void     knote_free(struct knote *kn);
 139 extern void     knote_init(void);
 140
 141 static int      filt_fileattach(struct knote *kn);
 142 static struct filterops file_filtops =
 143         { 1, filt_fileattach, NULL, NULL };
 144
 145 static void     filt_kqdetach(struct knote *kn);
 146 static int      filt_kqueue(struct knote *kn, long hint);
 147 static struct filterops kqread_filtops =
 148         { 1, NULL, filt_kqdetach, filt_kqueue };
 149
 150 /*
 151  * placeholder for not-yet-implemented filters
 152  */
 153 static int      filt_badattach(struct knote *kn);
 154 static struct filterops bad_filtops =
 155         { 0, filt_badattach, 0 , 0 };
 156
 157 static int      filt_procattach(struct knote *kn);
 158 static void     filt_procdetach(struct knote *kn);
 159 static int      filt_proc(struct knote *kn, long hint);
 160
 161 static struct filterops proc_filtops =
 162         { 0, filt_procattach, filt_procdetach, filt_proc };
 163
 164 extern struct filterops fs_filtops;
 165
 166 extern struct filterops sig_filtops;
 167
 168
 169 /* Timer filter */
 170 static int      filt_timercompute(struct knote *kn, uint64_t *abs_time);
 171 static void     filt_timerexpire(void *knx, void *param1);
 172 static int      filt_timerattach(struct knote *kn);
 173 static void     filt_timerdetach(struct knote *kn);
 174 static int      filt_timer(struct knote *kn, long hint);
 175
 176 static struct filterops timer_filtops =
 177         { 0, filt_timerattach, filt_timerdetach, filt_timer };
 178
 179 /* to avoid arming timers that fire quicker than we can handle */
 180 static uint64_t filt_timerfloor = 0;
 181
 182 static lck_mtx_t _filt_timerlock;
 183 static void     filt_timerlock(void);
 184 static void     filt_timerunlock(void);
 185
 186 /*
 187  * Sentinel marker for a thread scanning through the list of
 188  * active knotes.
 189  */
 190 static struct filterops threadmarker_filtops =
 191         { 0, filt_badattach, 0, 0 };
 192
 193 static zone_t   knote_zone;
 194
 195 #define KN_HASHSIZE             64              /* XXX should be tunable */
 196 #define KN_HASH(val, mask)      (((val) ^ (val >> 8)) & (mask))
 197
 198 #if 0
 199 extern struct filterops aio_filtops;
 200 #endif
 201
 202 /*
 203  * Table for for all system-defined filters.
 204  */
 205 static struct filterops *sysfilt_ops[] = {
 206         &file_filtops,                  /* EVFILT_READ */
 207         &file_filtops,                  /* EVFILT_WRITE */
 208 #if 0
 209         &aio_filtops,                   /* EVFILT_AIO */
 210 #else
 211         &bad_filtops,                   /* EVFILT_AIO */
 212 #endif
 213         &file_filtops,                  /* EVFILT_VNODE */
 214         &proc_filtops,                  /* EVFILT_PROC */
 215         &sig_filtops,                   /* EVFILT_SIGNAL */
 216         &timer_filtops,                 /* EVFILT_TIMER */
 217         &bad_filtops,                   /* EVFILT_MACHPORT */
 218         &fs_filtops                     /* EVFILT_FS */
 219 };
 220
 221 /*
 222  * kqueue/note lock attributes and implementations
 223  *
 224  *      kqueues have locks, while knotes have use counts
 225  *      Most of the knote state is guarded by the object lock.
 226  *      the knote "inuse" count and status use the kqueue lock.
 227  */
 228 lck_grp_attr_t * kq_lck_grp_attr;
 229 lck_grp_t * kq_lck_grp;
 230 lck_attr_t * kq_lck_attr;
 231
 232 static inline void
 233 kqlock(struct kqueue *kq)
 234 {
 235         lck_spin_lock(&kq->kq_lock);
 236 }
 237
 238 static inline void
 239 kqunlock(struct kqueue *kq)
 240 {
 241         lck_spin_unlock(&kq->kq_lock);
 242 }
 243
 244 /*
 245  * Convert a kq lock to a knote use referece.
 246  *
 247  *      If the knote is being dropped, we can't get
 248  *      a use reference, so just return with it
 249  *      still locked.
 250  *
 251  *      - kq locked at entry
 252  *      - unlock on exit if we get the use reference
 253  */
 254 static int
 255 kqlock2knoteuse(struct kqueue *kq, struct knote *kn)
 256 {
 257         if (kn->kn_status & KN_DROPPING)
 258                 return 0;
 259         kn->kn_inuse++;
 260         kqunlock(kq);
 261         return 1;
 262  }
 263
 264 /*
 265  * Convert a kq lock to a knote use referece.
 266  *
 267  *      If the knote is being dropped, we can't get
 268  *      a use reference, so just return with it
 269  *      still locked.
 270  *
 271  *      - kq locked at entry
 272  *      - kq always unlocked on exit
 273  */
 274 static int
 275 kqlock2knoteusewait(struct kqueue *kq, struct knote *kn)
 276 {
 277         if (!kqlock2knoteuse(kq, kn)) {
 278                 kn->kn_status |= KN_DROPWAIT;
 279                 assert_wait(&kn->kn_status, THREAD_UNINT);
 280                 kqunlock(kq);
 281                 thread_block(THREAD_CONTINUE_NULL);
 282                 return 0;
 283         }
 284         return 1;
 285  }
 286
 287 /*
 288  * Convert from a knote use reference back to kq lock.
 289  *
 290  *      Drop a use reference and wake any waiters if
 291  *      this is the last one.
 292  *
 293  *      The exit return indicates if the knote is
 294  *      still alive - but the kqueue lock is taken
 295  *      unconditionally.
 296  */
 297 static int
 298 knoteuse2kqlock(struct kqueue *kq, struct knote *kn)
 299 {
 300         kqlock(kq);
 301         if ((--kn->kn_inuse == 0) &&
 302             (kn->kn_status & KN_USEWAIT)) {
 303                 kn->kn_status &= ~KN_USEWAIT;
 304                 thread_wakeup(&kn->kn_inuse);
 305         }
 306         return ((kn->kn_status & KN_DROPPING) == 0);
 307  }
 308
 309 /*
 310  * Convert a kq lock to a knote drop referece.
 311  *
 312  *      If the knote is in use, wait for the use count
 313  *      to subside.  We first mark our intention to drop
 314  *      it - keeping other users from "piling on."
 315  *      If we are too late, we have to wait for the
 316  *      other drop to complete.
 317  *
 318  *      - kq locked at entry
 319  *      - always unlocked on exit.
 320  *      - caller can't hold any locks that would prevent
 321  *        the other dropper from completing.
 322  */
 323 static int
 324 kqlock2knotedrop(struct kqueue *kq, struct knote *kn)
 325 {
 326
 327         if ((kn->kn_status & KN_DROPPING) == 0) {
 328                 kn->kn_status |= KN_DROPPING;
 329                 if (kn->kn_inuse > 0) {
 330                         kn->kn_status |= KN_USEWAIT;
 331                         assert_wait(&kn->kn_inuse, THREAD_UNINT);
 332                         kqunlock(kq);
 333                         thread_block(THREAD_CONTINUE_NULL);
 334                 } else
 335                         kqunlock(kq);
 336                 return 1;
 337         } else {
 338                 kn->kn_status |= KN_DROPWAIT;
 339                 assert_wait(&kn->kn_status, THREAD_UNINT);
 340                 kqunlock(kq);
 341                 thread_block(THREAD_CONTINUE_NULL);
 342                 return 0;
 343         }
 344 }
 345
 346 /*
 347  * Release a knote use count reference.
 348  */
 349 static void
 350 knote_put(struct knote *kn)
 351 {
 352         struct kqueue *kq = kn->kn_kq;
 353
 354         kqlock(kq);
 355         if ((--kn->kn_inuse == 0) &&
 356             (kn->kn_status & KN_USEWAIT)) {
 357                 kn->kn_status &= ~KN_USEWAIT;
 358                 thread_wakeup(&kn->kn_inuse);
 359         }
 360         kqunlock(kq);
 361  }
 362
 363
 364
 365 static int
 366 filt_fileattach(struct knote *kn)
 367 {
 368
 369         return (fo_kqfilter(kn->kn_fp, kn, current_proc()));
 370 }
 371
 372 #define f_flag f_fglob->fg_flag
 373 #define f_type f_fglob->fg_type
 374 #define f_msgcount f_fglob->fg_msgcount
 375 #define f_cred f_fglob->fg_cred
 376 #define f_ops f_fglob->fg_ops
 377 #define f_offset f_fglob->fg_offset
 378 #define f_data f_fglob->fg_data
 379
 380 static void
 381 filt_kqdetach(struct knote *kn)
 382 {
 383         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
 384
 385         kqlock(kq);
 386         KNOTE_DETACH(&kq->kq_sel.si_note, kn);
 387         kqunlock(kq);
 388 }
 389
 390 /*ARGSUSED*/
 391 static int
 392 filt_kqueue(struct knote *kn, __unused long hint)
 393 {
 394         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
 395
 396         kn->kn_data = kq->kq_count;
 397         return (kn->kn_data > 0);
 398 }
 399
 400 static int
 401 filt_procattach(struct knote *kn)
 402 {
 403         struct proc *p;
 404         int funnel_state;
 405
 406         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 407
 408         p = pfind(kn->kn_id);
 409         if (p == NULL) {
 410                 thread_funnel_set(kernel_flock, funnel_state);
 411                 return (ESRCH);
 412         }
 413
 414         kn->kn_flags |= EV_CLEAR;               /* automatically set */
 415
 416         /*
 417          * internal flag indicating registration done by kernel
 418          */
 419         if (kn->kn_flags & EV_FLAG1) {
 420                 kn->kn_data = (int)kn->kn_sdata;        /* ppid */
 421                 kn->kn_fflags = NOTE_CHILD;
 422                 kn->kn_flags &= ~EV_FLAG1;
 423         }
 424
 425         /* XXX lock the proc here while adding to the list? */
 426         KNOTE_ATTACH(&p->p_klist, kn);
 427
 428         thread_funnel_set(kernel_flock, funnel_state);
 429
 430         return (0);
 431 }
 432
 433 /*
 434  * The knote may be attached to a different process, which may exit,
 435  * leaving nothing for the knote to be attached to.  So when the process
 436  * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
 437  * it will be deleted when read out.  However, as part of the knote deletion,
 438  * this routine is called, so a check is needed to avoid actually performing
 439  * a detach, because the original process does not exist any more.
 440  */
 441 static void
 442 filt_procdetach(struct knote *kn)
 443 {
 444         struct proc *p;
 445         int funnel_state;
 446
 447         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 448         p = pfind(kn->kn_id);
 449
 450         if (p != (struct proc *)NULL)
 451                 KNOTE_DETACH(&p->p_klist, kn);
 452
 453         thread_funnel_set(kernel_flock, funnel_state);
 454 }
 455
 456 static int
 457 filt_proc(struct knote *kn, long hint)
 458 {
 459         u_int event;
 460         int funnel_state;
 461
 462         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 463
 464         /*
 465          * mask off extra data
 466          */
 467         event = (u_int)hint & NOTE_PCTRLMASK;
 468
 469         /*
 470          * if the user is interested in this event, record it.
 471          */
 472         if (kn->kn_sfflags & event)
 473                 kn->kn_fflags |= event;
 474
 475         /*
 476          * process is gone, so flag the event as finished.
 477          */
 478         if (event == NOTE_EXIT) {
 479                 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 480                 thread_funnel_set(kernel_flock, funnel_state);
 481                 return (1);
 482         }
 483
 484         /*
 485          * process forked, and user wants to track the new process,
 486          * so attach a new knote to it, and immediately report an
 487          * event with the parent's pid.
 488          */
 489         if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
 490                 struct kevent kev;
 491                 int error;
 492
 493                 /*
 494                  * register knote with new process.
 495                  */
 496                 kev.ident = hint & NOTE_PDATAMASK;      /* pid */
 497                 kev.filter = kn->kn_filter;
 498                 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
 499                 kev.fflags = kn->kn_sfflags;
 500                 kev.data = kn->kn_id;                   /* parent */
 501                 kev.udata = kn->kn_kevent.udata;        /* preserve udata */
 502                 error = kevent_register(kn->kn_kq, &kev, NULL);
 503                 if (error)
 504                         kn->kn_fflags |= NOTE_TRACKERR;
 505         }
 506         event = kn->kn_fflags;
 507         thread_funnel_set(kernel_flock, funnel_state);
 508
 509         return (event != 0);
 510 }
 511
 512 /*
 513  * filt_timercompute - compute absolute timeout
 514  *
 515  *      The saved-data field in the knote contains the
 516  *      time value.  The saved filter-flags indicates
 517  *      the unit of measurement.
 518  *
 519  *      If the timeout is not absolute, adjust it for
 520  *      the current time.
 521  */
 522 static int
 523 filt_timercompute(struct knote *kn, uint64_t *abs_time)
 524 {
 525         uint64_t multiplier;
 526         uint64_t raw;
 527
 528         switch (kn->kn_sfflags & (NOTE_SECONDS|NOTE_USECONDS|NOTE_NSECONDS)) {
 529         case NOTE_SECONDS:
 530                 multiplier = NSEC_PER_SEC;
 531                 break;
 532         case NOTE_USECONDS:
 533                 multiplier = NSEC_PER_USEC;
 534                 break;
 535         case NOTE_NSECONDS:
 536                 multiplier = 1;
 537                 break;
 538         case 0: /* milliseconds (default) */
 539                 multiplier = NSEC_PER_SEC / 1000;
 540                 break;
 541         default:
 542                 return EINVAL;
 543         }
 544         nanoseconds_to_absolutetime((uint64_t)kn->kn_sdata * multiplier, &raw);
 545         if (raw <= filt_timerfloor) {
 546                 *abs_time = 0;
 547                 return 0;
 548         }
 549         if ((kn->kn_sfflags & NOTE_ABSOLUTE) == NOTE_ABSOLUTE) {
 550                 uint32_t seconds, nanoseconds;
 551                 uint64_t now;
 552
 553                 clock_get_calendar_nanotime(&seconds, &nanoseconds);
 554                 nanoseconds_to_absolutetime((uint64_t)seconds * NSEC_PER_SEC + nanoseconds,
 555                                             &now);
 556                 if (now >= raw + filt_timerfloor) {
 557                         *abs_time = 0;
 558                         return 0;
 559                 }
 560                 raw -= now;
 561         }
 562         clock_absolutetime_interval_to_deadline(raw, abs_time);
 563         return 0;
 564 }
 565
 566 /*
 567  * filt_timerexpire - the timer callout routine
 568  *
 569  *      Just propagate the timer event into the knote
 570  *      filter routine (by going through the knote
 571  *      synchronization point).  Pass a hint to
 572  *      indicate this is a real event, not just a
 573  *      query from above.
 574  */
 575 static void
 576 filt_timerexpire(void *knx, __unused void *spare)
 577 {
 578         struct klist timer_list;
 579         struct knote *kn = knx;
 580
 581         /* no "object" for timers, so fake a list */
 582         SLIST_INIT(&timer_list);
 583         SLIST_INSERT_HEAD(&timer_list, kn, kn_selnext);
 584         KNOTE(&timer_list, 1);
 585 }
 586
 587 /*
 588  * data contains amount of time to sleep, in milliseconds,
 589  * or a pointer to a timespec structure.
 590  */
 591 static int
 592 filt_timerattach(struct knote *kn)
 593 {
 594         thread_call_t callout;
 595         uint64_t deadline;
 596         int error;
 597
 598         error = filt_timercompute(kn, &deadline);
 599         if (error)
 600                 return (error);
 601
 602         if (deadline) {
 603                 callout = thread_call_allocate(filt_timerexpire, kn);
 604                 if (NULL == callout)
 605                         return (ENOMEM);
 606         } else {
 607                 /* handle as immediate */
 608                 kn->kn_sdata = 0;
 609                 callout = NULL;
 610         }
 611
 612         filt_timerlock();
 613         kn->kn_hook = (caddr_t)callout;
 614
 615         /* absolute=EV_ONESHOT */
 616         if (kn->kn_sfflags & NOTE_ABSOLUTE)
 617                 kn->kn_flags |= EV_ONESHOT;
 618
 619         if (deadline) {
 620                 /* all others - if not faking immediate */
 621                 kn->kn_flags |= EV_CLEAR;
 622                 thread_call_enter_delayed(callout, deadline);
 623                 kn->kn_hookid = 0;
 624         } else {
 625                 /* fake immediate */
 626                 kn->kn_hookid = 1;
 627         }
 628         filt_timerunlock();
 629         return (0);
 630 }
 631
 632 static void
 633 filt_timerdetach(struct knote *kn)
 634 {
 635         thread_call_t callout;
 636
 637         filt_timerlock();
 638         callout = (thread_call_t)kn->kn_hook;
 639         if (callout != NULL) {
 640                 boolean_t cancelled;
 641
 642                 /* cancel the callout if we can */
 643                 cancelled = thread_call_cancel(callout);
 644                 if (cancelled) {
 645                         /* got it, just free it */
 646                         kn->kn_hook = NULL;
 647                         filt_timerunlock();
 648                         thread_call_free(callout);
 649                         return;
 650                 }
 651                 /* we have to wait for the expire routine.  */
 652                 kn->kn_hookid = -1;     /* we are detaching */
 653                 assert_wait(&kn->kn_hook, THREAD_UNINT);
 654                 filt_timerunlock();
 655                 thread_block(THREAD_CONTINUE_NULL);
 656                 assert(kn->kn_hook == NULL);
 657                 return;
 658         }
 659         /* nothing to do */
 660         filt_timerunlock();
 661 }
 662
 663
 664
 665 static int
 666 filt_timer(struct knote *kn, __unused long hint)
 667 {
 668         int result;
 669
 670         if (hint) {
 671                 /* real timer pop */
 672                 thread_call_t callout;
 673                 boolean_t detaching;
 674
 675                 filt_timerlock();
 676
 677                 kn->kn_data++;
 678
 679                 detaching = (kn->kn_hookid < 0);
 680                 callout = (thread_call_t)kn->kn_hook;
 681
 682                 if (!detaching && (kn->kn_flags & EV_ONESHOT) == 0) {
 683                         uint64_t deadline;
 684                         int error;
 685
 686                         /* user input data may have changed - deal */
 687                         error = filt_timercompute(kn, &deadline);
 688                         if (error) {
 689                                 kn->kn_flags |= EV_ERROR;
 690                                 kn->kn_data = error;
 691                         } else if (deadline == 0) {
 692                                 /* revert to fake immediate */
 693                                 kn->kn_flags &= ~EV_CLEAR;
 694                                 kn->kn_sdata = 0;
 695                                 kn->kn_hookid = 1;
 696                         } else {
 697                                 /* keep the callout and re-arm */
 698                                 thread_call_enter_delayed(callout, deadline);
 699                                 filt_timerunlock();
 700                                 return 1;
 701                         }
 702                 }
 703                 kn->kn_hook = NULL;
 704                 filt_timerunlock();
 705                 thread_call_free(callout);
 706
 707                 /* if someone is waiting for timer to pop */
 708                 if (detaching)
 709                         thread_wakeup(&kn->kn_hook);
 710
 711                 return 1;
 712         }
 713
 714         /* user-query */
 715         filt_timerlock();
 716
 717         /* change fake timer to real if needed */
 718         while (kn->kn_hookid > 0 && kn->kn_sdata > 0) {
 719                 int error;
 720
 721                 /* update the fake timer (make real) */
 722                 kn->kn_hookid = 0;
 723                 kn->kn_data = 0;
 724                 filt_timerunlock();
 725                 error = filt_timerattach(kn);
 726                 filt_timerlock();
 727                 if (error) {
 728                         kn->kn_flags |= EV_ERROR;
 729                         kn->kn_data = error;
 730                         filt_timerunlock();
 731                         return 1;
 732                 }
 733         }
 734
 735         /* if still fake, pretend it fired */
 736         if (kn->kn_hookid > 0)
 737                 kn->kn_data = 1;
 738
 739         result = (kn->kn_data != 0);
 740         filt_timerunlock();
 741         return result;
 742 }
 743
 744 static void
 745 filt_timerlock(void)
 746 {
 747         lck_mtx_lock(&_filt_timerlock);
 748 }
 749
 750 static void
 751 filt_timerunlock(void)
 752 {
 753         lck_mtx_unlock(&_filt_timerlock);
 754 }
 755
 756 /*
 757  * JMM - placeholder for not-yet-implemented filters
 758  */
 759 static int
 760 filt_badattach(__unused struct knote *kn)
 761 {
 762         return(ENOTSUP);
 763 }
 764
 765
 766 struct kqueue *
 767 kqueue_alloc(struct proc *p)
 768 {
 769         struct filedesc *fdp = p->p_fd;
 770         struct kqueue *kq;
 771
 772         MALLOC_ZONE(kq, struct kqueue *, sizeof(struct kqueue), M_KQUEUE, M_WAITOK);
 773         if (kq != NULL) {
 774                 bzero(kq, sizeof(struct kqueue));
 775                 lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr);
 776                 TAILQ_INIT(&kq->kq_head);
 777                 TAILQ_INIT(&kq->kq_inprocess);
 778                 kq->kq_fdp = fdp;
 779         }
 780
 781         if (fdp->fd_knlistsize < 0) {
 782                 proc_fdlock(p);
 783                 if (fdp->fd_knlistsize < 0)
 784                         fdp->fd_knlistsize = 0;         /* this process has had a kq */
 785                 proc_fdunlock(p);
 786         }
 787
 788         return kq;
 789 }
 790
 791
 792 /*
 793  * kqueue_dealloc - detach all knotes from a kqueue and free it
 794  *
 795  *      We walk each list looking for knotes referencing this
 796  *      this kqueue.  If we find one, we try to drop it.  But
 797  *      if we fail to get a drop reference, that will wait
 798  *      until it is dropped.  So, we can just restart again
 799  *      safe in the assumption that the list will eventually
 800  *      not contain any more references to this kqueue (either
 801  *      we dropped them all, or someone else did).
 802  *
 803  *      Assumes no new events are being added to the kqueue.
 804  *      Nothing locked on entry or exit.
 805  */
 806 void
 807 kqueue_dealloc(struct kqueue *kq, struct proc *p)
 808 {
 809         struct filedesc *fdp = p->p_fd;
 810         struct knote *kn;
 811         int i;
 812
 813         proc_fdlock(p);
 814         for (i = 0; i < fdp->fd_knlistsize; i++) {
 815                 kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 816                 while (kn != NULL) {
 817                         if (kq == kn->kn_kq) {
 818                                 kqlock(kq);
 819                                 proc_fdunlock(p);
 820                                 /* drop it ourselves or wait */
 821                                 if (kqlock2knotedrop(kq, kn)) {
 822                                         kn->kn_fop->f_detach(kn);
 823                                         knote_drop(kn, p);
 824                                 }
 825                                 proc_fdlock(p);
 826                                 /* start over at beginning of list */
 827                                 kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 828                                 continue;
 829                         }
 830                         kn = SLIST_NEXT(kn, kn_link);
 831                 }
 832         }
 833         if (fdp->fd_knhashmask != 0) {
 834                 for (i = 0; i < (int)fdp->fd_knhashmask + 1; i++) {
 835                         kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 836                         while (kn != NULL) {
 837                                 if (kq == kn->kn_kq) {
 838                                         kqlock(kq);
 839                                         proc_fdunlock(p);
 840                                         /* drop it ourselves or wait */
 841                                         if (kqlock2knotedrop(kq, kn)) {
 842                                                 kn->kn_fop->f_detach(kn);
 843                                                 knote_drop(kn, p);
 844                                         }
 845                                         proc_fdlock(p);
 846                                         /* start over at beginning of list */
 847                                         kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 848                                         continue;
 849                                 }
 850                                 kn = SLIST_NEXT(kn, kn_link);
 851                         }
 852                 }
 853         }
 854         proc_fdunlock(p);
 855         lck_spin_destroy(&kq->kq_lock, kq_lck_grp);
 856         FREE_ZONE(kq, sizeof(struct kqueue), M_KQUEUE);
 857 }
 858
 859 int
 860 kqueue(struct proc *p, __unused struct kqueue_args *uap, register_t *retval)
 861 {
 862         struct kqueue *kq;
 863         struct fileproc *fp;
 864         int fd, error;
 865
 866         error = falloc(p, &fp, &fd);
 867         if (error) {
 868                 return (error);
 869         }
 870
 871         kq = kqueue_alloc(p);
 872         if (kq == NULL) {
 873                 fp_free(p, fd, fp);
 874                 return (ENOMEM);
 875         }
 876
 877         fp->f_flag = FREAD | FWRITE;
 878         fp->f_type = DTYPE_KQUEUE;
 879         fp->f_ops = &kqueueops;
 880         fp->f_data = (caddr_t)kq;
 881
 882         proc_fdlock(p);
 883         *fdflags(p, fd) &= ~UF_RESERVED;
 884         fp_drop(p, fd, fp, 1);
 885         proc_fdunlock(p);
 886
 887         *retval = fd;
 888         return (error);
 889 }
 890
 891 int
 892 kqueue_portset_np(__unused struct proc *p,
 893                                   __unused struct kqueue_portset_np_args *uap,
 894                                   __unused register_t *retval)
 895 {
 896                 /* JMM - Placeholder for now */
 897                 return (ENOTSUP);
 898 }
 899
 900 int
 901 kqueue_from_portset_np(__unused struct proc *p,
 902                                            __unused struct kqueue_from_portset_np_args *uap,
 903                                            __unused register_t *retval)
 904 {
 905                 /* JMM - Placeholder for now */
 906                 return (ENOTSUP);
 907 }
 908
 909 static int
 910 kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p)
 911 {
 912         int advance;
 913         int error;
 914
 915         if (IS_64BIT_PROCESS(p)) {
 916                 struct user_kevent kev64;
 917
 918                 advance = sizeof(kev64);
 919                 error = copyin(*addrp, (caddr_t)&kev64, advance);
 920                 if (error)
 921                         return error;
 922                 kevp->ident = CAST_DOWN(uintptr_t, kev64.ident);
 923                 kevp->filter = kev64.filter;
 924                 kevp->flags = kev64.flags;
 925                 kevp->fflags = kev64.fflags;
 926                 kevp->data = CAST_DOWN(intptr_t, kev64.data);
 927                 kevp->udata = kev64.udata;
 928         } else {
 929                 /*
 930                  * compensate for legacy in-kernel kevent layout
 931                  * where the udata field is alredy 64-bit.
 932                  */
 933                 advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t);
 934                 error = copyin(*addrp, (caddr_t)kevp, advance);
 935         }
 936         if (!error)
 937                 *addrp += advance;
 938         return error;
 939 }
 940
 941 static int
 942 kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p)
 943 {
 944         int advance;
 945         int error;
 946
 947         if (IS_64BIT_PROCESS(p)) {
 948                 struct user_kevent kev64;
 949
 950                 kev64.ident = (uint64_t) kevp->ident;
 951                 kev64.filter = kevp->filter;
 952                 kev64.flags = kevp->flags;
 953                 kev64.fflags = kevp->fflags;
 954                 kev64.data = (int64_t) kevp->data;
 955                 kev64.udata = kevp->udata;
 956                 advance = sizeof(kev64);
 957                 error = copyout((caddr_t)&kev64, *addrp, advance);
 958         } else {
 959                 /*
 960                  * compensate for legacy in-kernel kevent layout
 961                  * where the udata field is alredy 64-bit.
 962                  */
 963                 advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t);
 964                 error = copyout((caddr_t)kevp, *addrp, advance);
 965         }
 966         if (!error)
 967                 *addrp += advance;
 968         return error;
 969 }
 970
 971 /*
 972  * kevent_continue - continue a kevent syscall after blocking
 973  *
 974  *      assume we inherit a use count on the kq fileglob.
 975  */
 976
 977 static void
 978 kevent_continue(__unused struct kqueue *kq, void *data, int error)
 979 {
 980         struct _kevent *cont_args;
 981         struct fileproc *fp;
 982         register_t *retval;
 983         int noutputs;
 984         int fd;
 985         struct proc *p = current_proc();
 986
 987         cont_args = (struct _kevent *)data;
 988         noutputs = cont_args->eventout;
 989         retval = cont_args->retval;
 990         fd = cont_args->fd;
 991         fp = cont_args->fp;
 992
 993         fp_drop(p, fd, fp, 0);
 994
 995         /* don't restart after signals... */
 996         if (error == ERESTART)
 997                 error = EINTR;
 998         else if (error == EWOULDBLOCK)
 999                 error = 0;
1000         if (error == 0)
1001                 *retval = noutputs;
1002         unix_syscall_return(error);
1003 }
1004
1005 /*
1006  * kevent - [syscall] register and wait for kernel events
1007  *
1008  */
1009
1010 int
1011 kevent(struct proc *p, struct kevent_args *uap, register_t *retval)
1012 {
1013         user_addr_t changelist = uap->changelist;
1014         user_addr_t ueventlist = uap->eventlist;
1015         int nchanges = uap->nchanges;
1016         int nevents = uap->nevents;
1017         int fd = uap->fd;
1018
1019         struct _kevent *cont_args;
1020         uthread_t ut;
1021         struct kqueue *kq;
1022         struct fileproc *fp;
1023         struct kevent kev;
1024         int error, noutputs;
1025         struct timeval atv;
1026
1027         /* convert timeout to absolute - if we have one */
1028         if (uap->timeout != USER_ADDR_NULL) {
1029                 struct timeval rtv;
1030                 if ( IS_64BIT_PROCESS(p) ) {
1031                         struct user_timespec ts;
1032                         error = copyin( uap->timeout, &ts, sizeof(ts) );
1033                         if ((ts.tv_sec & 0xFFFFFFFF00000000ull) != 0)
1034                                 error = EINVAL;
1035                         else
1036                                 TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1037                 } else {
1038                         struct timespec ts;
1039                         error = copyin( uap->timeout, &ts, sizeof(ts) );
1040                         TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1041                 }
1042                 if (error)
1043                         return error;
1044                 if (itimerfix(&rtv))
1045                         return EINVAL;
1046                 getmicrouptime(&atv);
1047                 timevaladd(&atv, &rtv);
1048         } else {
1049                 atv.tv_sec = 0;
1050                 atv.tv_usec = 0;
1051         }
1052
1053         /* get a usecount for the kq itself */
1054         if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0)
1055                 return(error);
1056
1057         /* register all the change requests the user provided... */
1058         noutputs = 0;
1059         while (nchanges > 0 && error == 0) {
1060                 error = kevent_copyin(&changelist, &kev, p);
1061                 if (error)
1062                         break;
1063
1064                 kev.flags &= ~EV_SYSFLAGS;
1065                 error = kevent_register(kq, &kev, p);
1066                 if (error && nevents > 0) {
1067                         kev.flags = EV_ERROR;
1068                         kev.data = error;
1069                         error = kevent_copyout(&kev, &ueventlist, p);
1070                         if (error == 0) {
1071                                 nevents--;
1072                                 noutputs++;
1073                         }
1074                 }
1075                 nchanges--;
1076         }
1077
1078         /* store the continuation/completion data in the uthread */
1079         ut = (uthread_t)get_bsdthread_info(current_thread());
1080         cont_args = (struct _kevent *)&ut->uu_state.ss_kevent;
1081         cont_args->fp = fp;
1082         cont_args->fd = fd;
1083         cont_args->retval = retval;
1084         cont_args->eventlist = ueventlist;
1085         cont_args->eventcount = nevents;
1086         cont_args->eventout = noutputs;
1087
1088         if (nevents > 0 && noutputs == 0 && error == 0)
1089                 error = kevent_scan(kq, kevent_callback,
1090                                     kevent_continue, cont_args,
1091                                     &atv, p);
1092         kevent_continue(kq, cont_args, error);
1093         /* NOTREACHED */
1094         return error;
1095 }
1096
1097
1098 /*
1099  * kevent_callback - callback for each individual event
1100  *
1101  *      called with nothing locked
1102  *      caller holds a reference on the kqueue
1103  */
1104
1105 static int
1106 kevent_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data)
1107 {
1108         struct _kevent *cont_args;
1109         int error;
1110
1111         cont_args = (struct _kevent *)data;
1112         assert(cont_args->eventout < cont_arg->eventcount);
1113
1114         /*
1115          * Copy out the appropriate amount of event data for this user.
1116          */
1117         error = kevent_copyout(kevp, &cont_args->eventlist, current_proc());
1118
1119         /*
1120          * If there isn't space for additional events, return
1121          * a harmless error to stop the processing here
1122          */
1123         if (error == 0 && ++cont_args->eventout == cont_args->eventcount)
1124                         error = EWOULDBLOCK;
1125         return error;
1126 }
1127
1128 /*
1129  * kevent_register - add a new event to a kqueue
1130  *
1131  *      Creates a mapping between the event source and
1132  *      the kqueue via a knote data structure.
1133  *
1134  *      Because many/most the event sources are file
1135  *      descriptor related, the knote is linked off
1136  *      the filedescriptor table for quick access.
1137  *
1138  *      called with nothing locked
1139  *      caller holds a reference on the kqueue
1140  */
1141
1142 int
1143 kevent_register(struct kqueue *kq, struct kevent *kev, struct proc *p)
1144 {
1145         struct filedesc *fdp = kq->kq_fdp;
1146         struct filterops *fops;
1147         struct fileproc *fp = NULL;
1148         struct knote *kn = NULL;
1149         int error = 0;
1150
1151         if (kev->filter < 0) {
1152                 if (kev->filter + EVFILT_SYSCOUNT < 0)
1153                         return (EINVAL);
1154                 fops = sysfilt_ops[~kev->filter];       /* to 0-base index */
1155         } else {
1156                 /*
1157                  * XXX
1158                  * filter attach routine is responsible for insuring that
1159                  * the identifier can be attached to it.
1160                  */
1161                 printf("unknown filter: %d\n", kev->filter);
1162                 return (EINVAL);
1163         }
1164
1165         /* this iocount needs to be dropped if it is not registered */
1166         if (fops->f_isfd && (error = fp_lookup(p, kev->ident, &fp, 0)) != 0)
1167                 return(error);
1168
1169  restart:
1170         proc_fdlock(p);
1171         if (fops->f_isfd) {
1172                 /* fd-based knotes are linked off the fd table */
1173                 if (kev->ident < (u_int)fdp->fd_knlistsize) {
1174                         SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
1175                                 if (kq == kn->kn_kq &&
1176                                     kev->filter == kn->kn_filter)
1177                                         break;
1178                 }
1179         } else {
1180                 /* hash non-fd knotes here too */
1181                 if (fdp->fd_knhashmask != 0) {
1182                         struct klist *list;
1183
1184                         list = &fdp->fd_knhash[
1185                             KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
1186                         SLIST_FOREACH(kn, list, kn_link)
1187                                 if (kev->ident == kn->kn_id &&
1188                                     kq == kn->kn_kq &&
1189                                     kev->filter == kn->kn_filter)
1190                                         break;
1191                 }
1192         }
1193
1194         /*
1195          * kn now contains the matching knote, or NULL if no match
1196          */
1197         if (kn == NULL) {
1198                 if ((kev->flags & (EV_ADD|EV_DELETE)) == EV_ADD) {
1199                         kn = knote_alloc();
1200                         if (kn == NULL) {
1201                                 proc_fdunlock(p);
1202                                 error = ENOMEM;
1203                                 goto done;
1204                         }
1205                         kn->kn_fp = fp;
1206                         kn->kn_kq = kq;
1207                         kn->kn_tq = &kq->kq_head;
1208                         kn->kn_fop = fops;
1209                         kn->kn_sfflags = kev->fflags;
1210                         kn->kn_sdata = kev->data;
1211                         kev->fflags = 0;
1212                         kev->data = 0;
1213                         kn->kn_kevent = *kev;
1214                         kn->kn_inuse = 1;  /* for f_attach() */
1215                         kn->kn_status = 0;
1216
1217                         /* before anyone can find it */
1218                         if (kev->flags & EV_DISABLE)
1219                                 kn->kn_status |= KN_DISABLED;
1220
1221                         error = knote_fdpattach(kn, fdp, p);
1222                         proc_fdunlock(p);
1223
1224                         if (error) {
1225                                 knote_free(kn);
1226                                 goto done;
1227                         }
1228
1229                         /*
1230                          * apply reference count to knote structure, and
1231                          * do not release it at the end of this routine.
1232                          */
1233                         fp = NULL;
1234
1235                         /*
1236                          * If the attach fails here, we can drop it knowing
1237                          * that nobody else has a reference to the knote.
1238                          */
1239                         if ((error = fops->f_attach(kn)) != 0) {
1240                                 knote_drop(kn, p);
1241                                 goto done;
1242                         }
1243                 } else {
1244                         proc_fdunlock(p);
1245                         error = ENOENT;
1246                         goto done;
1247                 }
1248         } else {
1249                 /* existing knote - get kqueue lock */
1250                 kqlock(kq);
1251                 proc_fdunlock(p);
1252
1253                 if (kev->flags & EV_DELETE) {
1254                         knote_dequeue(kn);
1255                         kn->kn_status |= KN_DISABLED;
1256                         if (kqlock2knotedrop(kq, kn)) {
1257                                 kn->kn_fop->f_detach(kn);
1258                                 knote_drop(kn, p);
1259                         }
1260                         goto done;
1261                 }
1262
1263                 /* update status flags for existing knote */
1264                 if (kev->flags & EV_DISABLE) {
1265                         knote_dequeue(kn);
1266                         kn->kn_status |= KN_DISABLED;
1267                 } else if (kev->flags & EV_ENABLE) {
1268                         kn->kn_status &= ~KN_DISABLED;
1269                         if (kn->kn_status & KN_ACTIVE)
1270                                 knote_enqueue(kn);
1271                 }
1272
1273                 /*
1274                  * If somebody is in the middle of dropping this
1275                  * knote - go find/insert a new one.  But we have
1276                  * wait for this one to go away first.
1277                  */
1278                 if (!kqlock2knoteusewait(kq, kn))
1279                         /* kqueue unlocked */
1280                         goto restart;
1281
1282                 /*
1283                  * The user may change some filter values after the
1284                  * initial EV_ADD, but doing so will not reset any
1285                  * filter which have already been triggered.
1286                  */
1287                 kn->kn_sfflags = kev->fflags;
1288                 kn->kn_sdata = kev->data;
1289                 kn->kn_kevent.udata = kev->udata;
1290         }
1291
1292         /* still have use ref on knote */
1293         if (kn->kn_fop->f_event(kn, 0)) {
1294                 if (knoteuse2kqlock(kq, kn))
1295                         knote_activate(kn);
1296                 kqunlock(kq);
1297         } else {
1298                 knote_put(kn);
1299         }
1300
1301 done:
1302         if (fp != NULL)
1303                 fp_drop(p, kev->ident, fp, 0);
1304         return (error);
1305 }
1306
1307 /*
1308  * kevent_process - process the triggered events in a kqueue
1309  *
1310  *      Walk the queued knotes and validate that they are
1311  *      really still triggered events by calling the filter
1312  *      routines (if necessary).  Hold a use reference on
1313  *      the knote to avoid it being detached. For each event
1314  *      that is still considered triggered, invoke the
1315  *      callback routine provided.
1316  *
1317  *      caller holds a reference on the kqueue.
1318  *      kqueue locked on entry and exit - but may be dropped
1319  */
1320
1321 static int
1322 kevent_process(struct kqueue *kq,
1323                kevent_callback_t callback,
1324                void *data,
1325                int *countp,
1326                struct proc *p)
1327 {
1328         struct knote *kn;
1329         struct kevent kev;
1330         int nevents;
1331         int error;
1332
1333  restart:
1334         if (kq->kq_count == 0) {
1335                 *countp = 0;
1336                 return 0;
1337         }
1338
1339         /* if someone else is processing the queue, wait */
1340         if (!TAILQ_EMPTY(&kq->kq_inprocess)) {
1341                 assert_wait(&kq->kq_inprocess, THREAD_UNINT);
1342                 kq->kq_state |= KQ_PROCWAIT;
1343                 kqunlock(kq);
1344                 thread_block(THREAD_CONTINUE_NULL);
1345                 kqlock(kq);
1346                 goto restart;
1347         }
1348
1349         error = 0;
1350         nevents = 0;
1351         while (error == 0 &&
1352                (kn = TAILQ_FIRST(&kq->kq_head)) != NULL) {
1353
1354                 /*
1355                  * move knote to the processed queue.
1356                  * this is also protected by the kq lock.
1357                  */
1358                 assert(kn->kn_tq == &kq->kq_head);
1359                 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1360                 kn->kn_tq = &kq->kq_inprocess;
1361                 TAILQ_INSERT_TAIL(&kq->kq_inprocess, kn, kn_tqe);
1362
1363                 /*
1364                  * Non-EV_ONESHOT events must be re-validated.
1365                  *
1366                  * Convert our lock to a use-count and call the event's
1367                  * filter routine to update.
1368                  *
1369                  * If the event is dropping (or no longer valid), we
1370                  * already have it off the active queue, so just
1371                  * finish the job of deactivating it.
1372                  */
1373                 if ((kn->kn_flags & EV_ONESHOT) == 0) {
1374                         int result;
1375
1376                         if (kqlock2knoteuse(kq, kn)) {
1377
1378                                 /* call the filter with just a ref */
1379                                 result = kn->kn_fop->f_event(kn, 0);
1380
1381                                 if (!knoteuse2kqlock(kq, kn) || result == 0) {
1382                                         knote_deactivate(kn);
1383                                         continue;
1384                                 }
1385                         } else {
1386                                 knote_deactivate(kn);
1387                                 continue;
1388                         }
1389                 }
1390
1391                 /*
1392                  * Got a valid triggered knote with the kqueue
1393                  * still locked.  Snapshot the data, and determine
1394                  * how to dispatch the knote for future events.
1395                  */
1396                 kev = kn->kn_kevent;
1397
1398                 /* now what happens to it? */
1399                 if (kn->kn_flags & EV_ONESHOT) {
1400                         knote_deactivate(kn);
1401                         if (kqlock2knotedrop(kq, kn)) {
1402                                 kn->kn_fop->f_detach(kn);
1403                                 knote_drop(kn, p);
1404                         }
1405                 } else if (kn->kn_flags & EV_CLEAR) {
1406                         knote_deactivate(kn);
1407                         kn->kn_data = 0;
1408                         kn->kn_fflags = 0;
1409                         kqunlock(kq);
1410                 } else {
1411                         /*
1412                          * leave on in-process queue.  We'll
1413                          * move all the remaining ones back
1414                          * the kq queue and wakeup any
1415                          * waiters when we are done.
1416                          */
1417                         kqunlock(kq);
1418                 }
1419
1420                 /* callback to handle each event as we find it */
1421                 error = (callback)(kq, &kev, data);
1422                 nevents++;
1423
1424                 kqlock(kq);
1425         }
1426
1427         /*
1428          * With the kqueue still locked, move any knotes
1429          * remaining on the in-process queue back to the
1430          * kq's queue and wake up any waiters.
1431          */
1432         while ((kn = TAILQ_FIRST(&kq->kq_inprocess)) != NULL) {
1433                 assert(kn->kn_tq == &kq->kq_inprocess);
1434                 TAILQ_REMOVE(&kq->kq_inprocess, kn, kn_tqe);
1435                 kn->kn_tq = &kq->kq_head;
1436                 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1437         }
1438         if (kq->kq_state & KQ_PROCWAIT) {
1439                 kq->kq_state &= ~KQ_PROCWAIT;
1440                 thread_wakeup(&kq->kq_inprocess);
1441         }
1442
1443         *countp = nevents;
1444         return error;
1445 }
1446
1447
1448 static void
1449 kevent_scan_continue(void *data, wait_result_t wait_result)
1450 {
1451         uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
1452         struct _kevent_scan * cont_args = &ut->uu_state.ss_kevent_scan;
1453         struct kqueue *kq = (struct kqueue *)data;
1454         int error;
1455         int count;
1456
1457         /* convert the (previous) wait_result to a proper error */
1458         switch (wait_result) {
1459         case THREAD_AWAKENED:
1460                 kqlock(kq);
1461                 error = kevent_process(kq, cont_args->call, cont_args, &count, current_proc());
1462                 if (error == 0 && count == 0) {
1463                         assert_wait_deadline(kq, THREAD_ABORTSAFE, cont_args->deadline);
1464                         kq->kq_state |= KQ_SLEEP;
1465                         kqunlock(kq);
1466                         thread_block_parameter(kevent_scan_continue, kq);
1467                         /* NOTREACHED */
1468                 }
1469                 kqunlock(kq);
1470                 break;
1471         case THREAD_TIMED_OUT:
1472                 error = EWOULDBLOCK;
1473                 break;
1474         case THREAD_INTERRUPTED:
1475                 error = EINTR;
1476                 break;
1477         default:
1478                 panic("kevent_scan_cont() - invalid wait_result (%d)", wait_result);
1479                 error = 0;
1480         }
1481
1482         /* call the continuation with the results */
1483         assert(cont_args->cont != NULL);
1484         (cont_args->cont)(kq, cont_args->data, error);
1485 }
1486
1487
1488 /*
1489  * kevent_scan - scan and wait for events in a kqueue
1490  *
1491  *      Process the triggered events in a kqueue.
1492  *
1493  *      If there are no events triggered arrange to
1494  *      wait for them. If the caller provided a
1495  *      continuation routine, then kevent_scan will
1496  *      also.
1497  *
1498  *      The callback routine must be valid.
1499  *      The caller must hold a use-count reference on the kq.
1500  */
1501
1502 int
1503 kevent_scan(struct kqueue *kq,
1504             kevent_callback_t callback,
1505             kevent_continue_t continuation,
1506             void *data,
1507             struct timeval *atvp,
1508             struct proc *p)
1509 {
1510         thread_continue_t cont = THREAD_CONTINUE_NULL;
1511         uint64_t deadline;
1512         int error;
1513         int first;
1514
1515         assert(callback != NULL);
1516
1517         first = 1;
1518         for (;;) {
1519                 wait_result_t wait_result;
1520                 int count;
1521
1522                 /*
1523                  * Make a pass through the kq to find events already
1524                  * triggered.
1525                  */
1526                 kqlock(kq);
1527                 error = kevent_process(kq, callback, data, &count, p);
1528                 if (error || count)
1529                         break; /* lock still held */
1530
1531                 /* looks like we have to consider blocking */
1532                 if (first) {
1533                         first = 0;
1534                         /* convert the timeout to a deadline once */
1535                         if (atvp->tv_sec || atvp->tv_usec) {
1536                                 uint32_t seconds, nanoseconds;
1537                                 uint64_t now;
1538
1539                                 clock_get_uptime(&now);
1540                                 nanoseconds_to_absolutetime((uint64_t)atvp->tv_sec * NSEC_PER_SEC +
1541                                                             atvp->tv_usec * NSEC_PER_USEC,
1542                                                             &deadline);
1543                                 if (now >= deadline) {
1544                                         /* non-blocking call */
1545                                         error = EWOULDBLOCK;
1546                                         break; /* lock still held */
1547                                 }
1548                                 deadline -= now;
1549                                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
1550                         } else {
1551                                 deadline = 0;   /* block forever */
1552                         }
1553
1554                         if (continuation) {
1555                                 uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
1556                                 struct _kevent_scan *cont_args = &ut->uu_state.ss_kevent_scan;
1557
1558                                 cont_args->call = callback;
1559                                 cont_args->cont = continuation;
1560                                 cont_args->deadline = deadline;
1561                                 cont_args->data = data;
1562                                 cont = kevent_scan_continue;
1563                         }
1564                 }
1565
1566                 /* go ahead and wait */
1567                 assert_wait_deadline(kq, THREAD_ABORTSAFE, deadline);
1568                 kq->kq_state |= KQ_SLEEP;
1569                 kqunlock(kq);
1570                 wait_result = thread_block_parameter(cont, kq);
1571                 /* NOTREACHED if (continuation != NULL) */
1572
1573                 switch (wait_result) {
1574                 case THREAD_AWAKENED:
1575                         continue;
1576                 case THREAD_TIMED_OUT:
1577                         return EWOULDBLOCK;
1578                 case THREAD_INTERRUPTED:
1579                         return EINTR;
1580                 default:
1581                         panic("kevent_scan - bad wait_result (%d)",
1582                               wait_result);
1583                         error = 0;
1584                 }
1585         }
1586         kqunlock(kq);
1587         return error;
1588 }
1589
1590
1591 /*
1592  * XXX
1593  * This could be expanded to call kqueue_scan, if desired.
1594  */
1595 /*ARGSUSED*/
1596 static int
1597 kqueue_read(__unused struct fileproc *fp,
1598                         __unused struct uio *uio,
1599                         __unused kauth_cred_t cred,
1600                         __unused int flags,
1601                         __unused struct proc *p)
1602 {
1603         return (ENXIO);
1604 }
1605
1606 /*ARGSUSED*/
1607 static int
1608 kqueue_write(__unused struct fileproc *fp,
1609                          __unused struct uio *uio,
1610                          __unused kauth_cred_t cred,
1611                          __unused int flags,
1612                          __unused struct proc *p)
1613 {
1614         return (ENXIO);
1615 }
1616
1617 /*ARGSUSED*/
1618 static int
1619 kqueue_ioctl(__unused struct fileproc *fp,
1620                          __unused u_long com,
1621                          __unused caddr_t data,
1622                          __unused struct proc *p)
1623 {
1624         return (ENOTTY);
1625 }
1626
1627 /*ARGSUSED*/
1628 static int
1629 kqueue_select(struct fileproc *fp, int which, void *wql, struct proc *p)
1630 {
1631         struct kqueue *kq = (struct kqueue *)fp->f_data;
1632         int retnum = 0;
1633
1634         if (which == FREAD) {
1635                 kqlock(kq);
1636                 if (kq->kq_count) {
1637                         retnum = 1;
1638                 } else {
1639                         selrecord(p, &kq->kq_sel, wql);
1640                         kq->kq_state |= KQ_SEL;
1641                 }
1642                 kqunlock(kq);
1643         }
1644         return (retnum);
1645 }
1646
1647 /*
1648  * kqueue_close -
1649  */
1650 /*ARGSUSED*/
1651 static int
1652 kqueue_close(struct fileglob *fg, struct proc *p)
1653 {
1654         struct kqueue *kq = (struct kqueue *)fg->fg_data;
1655
1656         kqueue_dealloc(kq, p);
1657         fg->fg_data = NULL;
1658         return (0);
1659 }
1660
1661 /*ARGSUSED*/
1662 /*
1663  * The callers has taken a use-count reference on this kqueue and will donate it
1664  * to the kqueue we are being added to.  This keeps the kqueue from closing until
1665  * that relationship is torn down.
1666  */
1667 static int
1668 kqueue_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
1669 {
1670         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
1671
1672         if (kn->kn_filter != EVFILT_READ)
1673                 return (1);
1674
1675         kn->kn_fop = &kqread_filtops;
1676         kqlock(kq);
1677         KNOTE_ATTACH(&kq->kq_sel.si_note, kn);
1678         kqunlock(kq);
1679         return (0);
1680 }
1681
1682 /*ARGSUSED*/
1683 int
1684 kqueue_stat(struct fileproc *fp, struct stat *st, __unused struct proc *p)
1685 {
1686         struct kqueue *kq = (struct kqueue *)fp->f_data;
1687
1688         bzero((void *)st, sizeof(*st));
1689         st->st_size = kq->kq_count;
1690         st->st_blksize = sizeof(struct kevent);
1691         st->st_mode = S_IFIFO;
1692         return (0);
1693 }
1694
1695 /*
1696  * Called with the kqueue locked
1697  */
1698 static void
1699 kqueue_wakeup(struct kqueue *kq)
1700 {
1701
1702         if (kq->kq_state & KQ_SLEEP) {
1703                 kq->kq_state &= ~KQ_SLEEP;
1704                 thread_wakeup(kq);
1705         }
1706         if (kq->kq_state & KQ_SEL) {
1707                 kq->kq_state &= ~KQ_SEL;
1708                 selwakeup(&kq->kq_sel);
1709         }
1710         KNOTE(&kq->kq_sel.si_note, 0);
1711 }
1712
1713 void
1714 klist_init(struct klist *list)
1715 {
1716         SLIST_INIT(list);
1717 }
1718
1719
1720 /*
1721  * Query/Post each knote in the object's list
1722  *
1723  *      The object lock protects the list. It is assumed
1724  *      that the filter/event routine for the object can
1725  *      determine that the object is already locked (via
1726  *      the hind) and not deadlock itself.
1727  *
1728  *      The object lock should also hold off pending
1729  *      detach/drop operations.  But we'll prevent it here
1730  *      too - just in case.
1731  */
1732 void
1733 knote(struct klist *list, long hint)
1734 {
1735         struct knote *kn;
1736
1737         SLIST_FOREACH(kn, list, kn_selnext) {
1738                 struct kqueue *kq = kn->kn_kq;
1739
1740                 kqlock(kq);
1741                 if (kqlock2knoteuse(kq, kn)) {
1742                         int result;
1743
1744                         /* call the event with only a use count */
1745                         result = kn->kn_fop->f_event(kn, hint);
1746
1747                         /* if its not going away and triggered */
1748                         if (knoteuse2kqlock(kq, kn) && result)
1749                                 knote_activate(kn);
1750                         /* lock held again */
1751                 }
1752                 kqunlock(kq);
1753         }
1754 }
1755
1756 /*
1757  * attach a knote to the specified list.  Return true if this is the first entry.
1758  * The list is protected by whatever lock the object it is associated with uses.
1759  */
1760 int
1761 knote_attach(struct klist *list, struct knote *kn)
1762 {
1763         int ret = SLIST_EMPTY(list);
1764         SLIST_INSERT_HEAD(list, kn, kn_selnext);
1765         return ret;
1766 }
1767
1768 /*
1769  * detach a knote from the specified list.  Return true if that was the last entry.
1770  * The list is protected by whatever lock the object it is associated with uses.
1771  */
1772 int
1773 knote_detach(struct klist *list, struct knote *kn)
1774 {
1775         SLIST_REMOVE(list, kn, knote, kn_selnext);
1776         return SLIST_EMPTY(list);
1777 }
1778
1779 /*
1780  * remove all knotes referencing a specified fd
1781  *
1782  * Essentially an inlined knote_remove & knote_drop
1783  * when we know for sure that the thing is a file
1784  *
1785  * Entered with the proc_fd lock already held.
1786  * It returns the same way, but may drop it temporarily.
1787  */
1788 void
1789 knote_fdclose(struct proc *p, int fd)
1790 {
1791         struct filedesc *fdp = p->p_fd;
1792         struct klist *list;
1793         struct knote *kn;
1794
1795         list = &fdp->fd_knlist[fd];
1796         while ((kn = SLIST_FIRST(list)) != NULL) {
1797                 struct kqueue *kq = kn->kn_kq;
1798
1799                 kqlock(kq);
1800                 proc_fdunlock(p);
1801
1802                 /*
1803                  * Convert the lock to a drop ref.
1804                  * If we get it, go ahead and drop it.
1805                  * Otherwise, we waited for it to
1806                  * be dropped by the other guy, so
1807                  * it is safe to move on in the list.
1808                  */
1809                 if (kqlock2knotedrop(kq, kn)) {
1810                         kn->kn_fop->f_detach(kn);
1811                         knote_drop(kn, p);
1812                 }
1813
1814                 proc_fdlock(p);
1815
1816                 /* the fd tables may have changed - start over */
1817                 list = &fdp->fd_knlist[fd];
1818         }
1819 }
1820
1821 /* proc_fdlock held on entry (and exit) */
1822 static int
1823 knote_fdpattach(struct knote *kn, struct filedesc *fdp, __unused struct proc *p)
1824 {
1825         struct klist *list = NULL;
1826
1827         if (! kn->kn_fop->f_isfd) {
1828                 if (fdp->fd_knhashmask == 0)
1829                         fdp->fd_knhash = hashinit(KN_HASHSIZE, M_KQUEUE,
1830                             &fdp->fd_knhashmask);
1831                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1832         } else {
1833                 if ((u_int)fdp->fd_knlistsize <= kn->kn_id) {
1834                         u_int size = 0;
1835
1836                         /* have to grow the fd_knlist */
1837                         size = fdp->fd_knlistsize;
1838                         while (size <= kn->kn_id)
1839                                 size += KQEXTENT;
1840                         MALLOC(list, struct klist *,
1841                                size * sizeof(struct klist *), M_KQUEUE, M_WAITOK);
1842                         if (list == NULL)
1843                                 return (ENOMEM);
1844
1845                         bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list,
1846                               fdp->fd_knlistsize * sizeof(struct klist *));
1847                         bzero((caddr_t)list +
1848                               fdp->fd_knlistsize * sizeof(struct klist *),
1849                               (size - fdp->fd_knlistsize) * sizeof(struct klist *));
1850                         FREE(fdp->fd_knlist, M_KQUEUE);
1851                         fdp->fd_knlist = list;
1852                         fdp->fd_knlistsize = size;
1853                 }
1854                 list = &fdp->fd_knlist[kn->kn_id];
1855         }
1856         SLIST_INSERT_HEAD(list, kn, kn_link);
1857         return (0);
1858 }
1859
1860
1861
1862 /*
1863  * should be called at spl == 0, since we don't want to hold spl
1864  * while calling fdrop and free.
1865  */
1866 static void
1867 knote_drop(struct knote *kn, struct proc *p)
1868 {
1869         struct filedesc *fdp = p->p_fd;
1870         struct kqueue *kq = kn->kn_kq;
1871         struct klist *list;
1872
1873         proc_fdlock(p);
1874         if (kn->kn_fop->f_isfd)
1875                 list = &fdp->fd_knlist[kn->kn_id];
1876         else
1877                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1878
1879         SLIST_REMOVE(list, kn, knote, kn_link);
1880         kqlock(kq);
1881         knote_dequeue(kn);
1882         if (kn->kn_status & KN_DROPWAIT)
1883                 thread_wakeup(&kn->kn_status);
1884         kqunlock(kq);
1885         proc_fdunlock(p);
1886
1887         if (kn->kn_fop->f_isfd)
1888                 fp_drop(p, kn->kn_id, kn->kn_fp, 0);
1889
1890         knote_free(kn);
1891 }
1892
1893 /* called with kqueue lock held */
1894 static void
1895 knote_activate(struct knote *kn)
1896 {
1897         struct kqueue *kq = kn->kn_kq;
1898
1899         kn->kn_status |= KN_ACTIVE;
1900         knote_enqueue(kn);
1901         kqueue_wakeup(kq);
1902  }
1903
1904 /* called with kqueue lock held */
1905 static void
1906 knote_deactivate(struct knote *kn)
1907 {
1908         kn->kn_status &= ~KN_ACTIVE;
1909         knote_dequeue(kn);
1910 }
1911
1912 /* called with kqueue lock held */
1913 static void
1914 knote_enqueue(struct knote *kn)
1915 {
1916         struct kqueue *kq = kn->kn_kq;
1917
1918         if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) {
1919                 struct kqtailq *tq = kn->kn_tq;
1920
1921                 TAILQ_INSERT_TAIL(tq, kn, kn_tqe);
1922                 kn->kn_status |= KN_QUEUED;
1923                 kq->kq_count++;
1924         }
1925 }
1926
1927 /* called with kqueue lock held */
1928 static void
1929 knote_dequeue(struct knote *kn)
1930 {
1931         struct kqueue *kq = kn->kn_kq;
1932
1933         assert((kn->kn_status & KN_DISABLED) == 0);
1934         if ((kn->kn_status & KN_QUEUED) == KN_QUEUED) {
1935                 struct kqtailq *tq = kn->kn_tq;
1936
1937                 TAILQ_REMOVE(tq, kn, kn_tqe);
1938                 kn->kn_tq = &kq->kq_head;
1939                 kn->kn_status &= ~KN_QUEUED;
1940                 kq->kq_count--;
1941         }
1942 }
1943
1944 void
1945 knote_init(void)
1946 {
1947         knote_zone = zinit(sizeof(struct knote), 8192*sizeof(struct knote), 8192, "knote zone");
1948
1949         /* allocate kq lock group attribute and group */
1950         kq_lck_grp_attr= lck_grp_attr_alloc_init();
1951         lck_grp_attr_setstat(kq_lck_grp_attr);
1952
1953         kq_lck_grp = lck_grp_alloc_init("kqueue",  kq_lck_grp_attr);
1954
1955         /* Allocate kq lock attribute */
1956         kq_lck_attr = lck_attr_alloc_init();
1957         lck_attr_setdefault(kq_lck_attr);
1958
1959         /* Initialize the timer filter lock */
1960         lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr);
1961 }
1962 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
1963
1964 static struct knote *
1965 knote_alloc(void)
1966 {
1967         return ((struct knote *)zalloc(knote_zone));
1968 }
1969
1970 static void
1971 knote_free(struct knote *kn)
1972 {
1973         zfree(knote_zone, kn);
1974 }
1975
1976 #include <sys/param.h>
1977 #include <sys/socket.h>
1978 #include <sys/protosw.h>
1979 #include <sys/domain.h>
1980 #include <sys/mbuf.h>
1981 #include <sys/kern_event.h>
1982 #include <sys/malloc.h>
1983 #include <sys/sys_domain.h>
1984 #include <sys/syslog.h>
1985
1986
1987 static int kev_attach(struct socket *so, int proto, struct proc *p);
1988 static int kev_detach(struct socket *so);
1989 static int kev_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p);
1990
1991 struct pr_usrreqs event_usrreqs = {
1992      pru_abort_notsupp, pru_accept_notsupp, kev_attach, pru_bind_notsupp, pru_connect_notsupp,
1993      pru_connect2_notsupp, kev_control, kev_detach, pru_disconnect_notsupp,
1994      pru_listen_notsupp, pru_peeraddr_notsupp, pru_rcvd_notsupp, pru_rcvoob_notsupp,
1995      pru_send_notsupp, pru_sense_null, pru_shutdown_notsupp, pru_sockaddr_notsupp,
1996      pru_sosend_notsupp, soreceive, pru_sopoll_notsupp
1997 };
1998
1999 struct protosw eventsw[] = {
2000      {
2001           SOCK_RAW,             &systemdomain,  SYSPROTO_EVENT,         PR_ATOMIC,
2002           0,            0,              0,              0,
2003           0,
2004           0,            0,              0,              0,
2005 #if __APPLE__
2006           0,
2007 #endif
2008           &event_usrreqs,
2009           0,            0,              0,
2010 #if __APPLE__
2011           {0, 0},       0,              {0}
2012 #endif
2013      }
2014 };
2015
2016 static
2017 struct kern_event_head kern_event_head;
2018
2019 static u_long static_event_id = 0;
2020 struct domain *sysdom = &systemdomain;
2021
2022 static lck_grp_t                *evt_mtx_grp;
2023 static lck_attr_t               *evt_mtx_attr;
2024 static lck_grp_attr_t   *evt_mtx_grp_attr;
2025 lck_mtx_t                               *evt_mutex;
2026 /*
2027  * Install the protosw's for the NKE manager.  Invoked at
2028  *  extension load time
2029  */
2030 int
2031 kern_event_init(void)
2032 {
2033     int retval;
2034
2035     if ((retval = net_add_proto(eventsw, &systemdomain)) != 0) {
2036             log(LOG_WARNING, "Can't install kernel events protocol (%d)\n", retval);
2037             return(retval);
2038         }
2039
2040         /*
2041          * allocate lock group attribute and group for kern event
2042          */
2043         evt_mtx_grp_attr = lck_grp_attr_alloc_init();
2044
2045         evt_mtx_grp = lck_grp_alloc_init("eventlist", evt_mtx_grp_attr);
2046
2047         /*
2048          * allocate the lock attribute for mutexes
2049          */
2050         evt_mtx_attr = lck_attr_alloc_init();
2051         lck_attr_setdefault(evt_mtx_attr);
2052         evt_mutex = lck_mtx_alloc_init(evt_mtx_grp, evt_mtx_attr);
2053         if (evt_mutex == NULL)
2054                         return (ENOMEM);
2055
2056     return(KERN_SUCCESS);
2057 }
2058
2059 static int
2060 kev_attach(struct socket *so, __unused int proto, __unused struct proc *p)
2061 {
2062      int error;
2063      struct kern_event_pcb  *ev_pcb;
2064
2065      error = soreserve(so, KEV_SNDSPACE, KEV_RECVSPACE);
2066      if (error)
2067           return error;
2068
2069      MALLOC(ev_pcb, struct kern_event_pcb *, sizeof(struct kern_event_pcb), M_PCB, M_WAITOK);
2070      if (ev_pcb == 0)
2071           return ENOBUFS;
2072
2073      ev_pcb->ev_socket = so;
2074      ev_pcb->vendor_code_filter = 0xffffffff;
2075
2076      so->so_pcb = (caddr_t) ev_pcb;
2077          lck_mtx_lock(evt_mutex);
2078      LIST_INSERT_HEAD(&kern_event_head, ev_pcb, ev_link);
2079          lck_mtx_unlock(evt_mutex);
2080
2081      return 0;
2082 }
2083
2084
2085 static int
2086 kev_detach(struct socket *so)
2087 {
2088      struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2089
2090      if (ev_pcb != 0) {
2091                 lck_mtx_lock(evt_mutex);
2092                 LIST_REMOVE(ev_pcb, ev_link);
2093                 lck_mtx_unlock(evt_mutex);
2094                 FREE(ev_pcb, M_PCB);
2095                 so->so_pcb = 0;
2096                 so->so_flags |= SOF_PCBCLEARING;
2097      }
2098
2099      return 0;
2100 }
2101
2102 /*
2103  * For now, kev_vender_code and mbuf_tags use the same
2104  * mechanism.
2105  */
2106 extern errno_t mbuf_tag_id_find_internal(const char *string, u_long *out_id,
2107                                                                                  int create);
2108
2109 errno_t kev_vendor_code_find(
2110         const char      *string,
2111         u_long          *out_vender_code)
2112 {
2113         if (strlen(string) >= KEV_VENDOR_CODE_MAX_STR_LEN) {
2114                 return EINVAL;
2115         }
2116         return mbuf_tag_id_find_internal(string, out_vender_code, 1);
2117 }
2118
2119 extern void mbuf_tag_id_first_last(u_long *first, u_long *last);
2120
2121 errno_t  kev_msg_post(struct kev_msg *event_msg)
2122 {
2123         u_long  min_vendor, max_vendor;
2124
2125         mbuf_tag_id_first_last(&min_vendor, &max_vendor);
2126
2127         if (event_msg == NULL)
2128                 return EINVAL;
2129
2130         /* Limit third parties to posting events for registered vendor codes only */
2131         if (event_msg->vendor_code < min_vendor ||
2132                 event_msg->vendor_code > max_vendor)
2133         {
2134                 return EINVAL;
2135         }
2136
2137         return kev_post_msg(event_msg);
2138 }
2139
2140
2141 int  kev_post_msg(struct kev_msg *event_msg)
2142 {
2143      struct mbuf *m, *m2;
2144      struct kern_event_pcb  *ev_pcb;
2145      struct kern_event_msg  *ev;
2146      char              *tmp;
2147      unsigned long     total_size;
2148      int               i;
2149
2150         /* Verify the message is small enough to fit in one mbuf w/o cluster */
2151         total_size = KEV_MSG_HEADER_SIZE;
2152
2153         for (i = 0; i < 5; i++) {
2154                 if (event_msg->dv[i].data_length == 0)
2155                         break;
2156                 total_size += event_msg->dv[i].data_length;
2157         }
2158
2159         if (total_size > MLEN) {
2160                 return EMSGSIZE;
2161         }
2162
2163      m = m_get(M_DONTWAIT, MT_DATA);
2164      if (m == 0)
2165           return ENOBUFS;
2166
2167      ev = mtod(m, struct kern_event_msg *);
2168      total_size = KEV_MSG_HEADER_SIZE;
2169
2170      tmp = (char *) &ev->event_data[0];
2171      for (i = 0; i < 5; i++) {
2172           if (event_msg->dv[i].data_length == 0)
2173                break;
2174
2175           total_size += event_msg->dv[i].data_length;
2176           bcopy(event_msg->dv[i].data_ptr, tmp,
2177                 event_msg->dv[i].data_length);
2178           tmp += event_msg->dv[i].data_length;
2179      }
2180
2181      ev->id = ++static_event_id;
2182      ev->total_size   = total_size;
2183      ev->vendor_code  = event_msg->vendor_code;
2184      ev->kev_class    = event_msg->kev_class;
2185      ev->kev_subclass = event_msg->kev_subclass;
2186      ev->event_code   = event_msg->event_code;
2187
2188      m->m_len = total_size;
2189      lck_mtx_lock(evt_mutex);
2190      for (ev_pcb = LIST_FIRST(&kern_event_head);
2191           ev_pcb;
2192           ev_pcb = LIST_NEXT(ev_pcb, ev_link)) {
2193
2194           if (ev_pcb->vendor_code_filter != KEV_ANY_VENDOR) {
2195                if (ev_pcb->vendor_code_filter != ev->vendor_code)
2196                     continue;
2197
2198                if (ev_pcb->class_filter != KEV_ANY_CLASS) {
2199                     if (ev_pcb->class_filter != ev->kev_class)
2200                          continue;
2201
2202                     if ((ev_pcb->subclass_filter != KEV_ANY_SUBCLASS) &&
2203                         (ev_pcb->subclass_filter != ev->kev_subclass))
2204                          continue;
2205                }
2206           }
2207
2208           m2 = m_copym(m, 0, m->m_len, M_NOWAIT);
2209           if (m2 == 0) {
2210                m_free(m);
2211                    lck_mtx_unlock(evt_mutex);
2212                return ENOBUFS;
2213           }
2214           socket_lock(ev_pcb->ev_socket, 1);
2215           if (sbappendrecord(&ev_pcb->ev_socket->so_rcv, m2))
2216                   sorwakeup(ev_pcb->ev_socket);
2217           socket_unlock(ev_pcb->ev_socket, 1);
2218      }
2219
2220      m_free(m);
2221      lck_mtx_unlock(evt_mutex);
2222      return 0;
2223 }
2224
2225 static int
2226 kev_control(struct socket *so,
2227                         u_long cmd,
2228                         caddr_t data,
2229                         __unused struct ifnet *ifp,
2230                         __unused struct proc *p)
2231 {
2232         struct kev_request *kev_req = (struct kev_request *) data;
2233         struct kern_event_pcb  *ev_pcb;
2234         struct kev_vendor_code *kev_vendor;
2235         u_long  *id_value = (u_long *) data;
2236
2237
2238         switch (cmd) {
2239
2240                 case SIOCGKEVID:
2241                         *id_value = static_event_id;
2242                         break;
2243
2244                 case SIOCSKEVFILT:
2245                         ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2246                         ev_pcb->vendor_code_filter = kev_req->vendor_code;
2247                         ev_pcb->class_filter     = kev_req->kev_class;
2248                         ev_pcb->subclass_filter  = kev_req->kev_subclass;
2249                         break;
2250
2251                 case SIOCGKEVFILT:
2252                         ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2253                         kev_req->vendor_code = ev_pcb->vendor_code_filter;
2254                         kev_req->kev_class   = ev_pcb->class_filter;
2255                         kev_req->kev_subclass = ev_pcb->subclass_filter;
2256                         break;
2257
2258                 case SIOCGKEVVENDOR:
2259                         kev_vendor = (struct kev_vendor_code*)data;
2260
2261                         /* Make sure string is NULL terminated */
2262                         kev_vendor->vendor_string[KEV_VENDOR_CODE_MAX_STR_LEN-1] = 0;
2263
2264                         return mbuf_tag_id_find_internal(kev_vendor->vendor_string,
2265                                                                                          &kev_vendor->vendor_code, 0);
2266
2267                 default:
2268                         return ENOTSUP;
2269         }
2270
2271         return 0;
2272 }
2273
2274
2275
2276