bsd/kern/kern_event.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  *
  22  */
  23 /*-
  24  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
  25  * All rights reserved.
  26  *
  27  * Redistribution and use in source and binary forms, with or without
  28  * modification, are permitted provided that the following conditions
  29  * are met:
  30  * 1. Redistributions of source code must retain the above copyright
  31  *    notice, this list of conditions and the following disclaimer.
  32  * 2. Redistributions in binary form must reproduce the above copyright
  33  *    notice, this list of conditions and the following disclaimer in the
  34  *    documentation and/or other materials provided with the distribution.
  35  *
  36  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  37  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  38  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  39  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  40  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  41  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  42  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  44  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  45  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46  * SUCH DAMAGE.
  47  */
  48 /*
  49  *      @(#)kern_event.c       1.0 (3/31/2000)
  50  */
  51 #include <stdint.h>
  52
  53 #include <sys/param.h>
  54 #include <sys/systm.h>
  55 #include <sys/filedesc.h>
  56 #include <sys/kernel.h>
  57 #include <sys/proc_internal.h>
  58 #include <sys/kauth.h>
  59 #include <sys/malloc.h>
  60 #include <sys/unistd.h>
  61 #include <sys/file_internal.h>
  62 #include <sys/fcntl.h>
  63 #include <sys/select.h>
  64 #include <sys/queue.h>
  65 #include <sys/event.h>
  66 #include <sys/eventvar.h>
  67 #include <sys/protosw.h>
  68 #include <sys/socket.h>
  69 #include <sys/socketvar.h>
  70 #include <sys/stat.h>
  71 #include <sys/sysctl.h>
  72 #include <sys/uio.h>
  73 #include <sys/sysproto.h>
  74 #include <sys/user.h>
  75 #include <string.h>
  76
  77 #include <kern/lock.h>
  78 #include <kern/clock.h>
  79 #include <kern/thread_call.h>
  80 #include <kern/sched_prim.h>
  81 #include <kern/zalloc.h>
  82 #include <kern/assert.h>
  83
  84 #include <libkern/libkern.h>
  85
  86 extern void unix_syscall_return(int);
  87
  88 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
  89
  90 static inline void kqlock(struct kqueue *kq);
  91 static inline void kqunlock(struct kqueue *kq);
  92
  93 static int      kqlock2knoteuse(struct kqueue *kq, struct knote *kn);
  94 static int      kqlock2knoteusewait(struct kqueue *kq, struct knote *kn);
  95 static int      kqlock2knotedrop(struct kqueue *kq, struct knote *kn);
  96 static int      knoteuse2kqlock(struct kqueue *kq, struct knote *kn);
  97
  98 static void     kqueue_wakeup(struct kqueue *kq);
  99 static int      kqueue_read(struct fileproc *fp, struct uio *uio,
 100                     kauth_cred_t cred, int flags, struct proc *p);
 101 static int      kqueue_write(struct fileproc *fp, struct uio *uio,
 102                     kauth_cred_t cred, int flags, struct proc *p);
 103 static int      kqueue_ioctl(struct fileproc *fp, u_long com, caddr_t data,
 104                     struct proc *p);
 105 static int      kqueue_select(struct fileproc *fp, int which, void *wql,
 106                     struct proc *p);
 107 static int      kqueue_close(struct fileglob *fp, struct proc *p);
 108 static int      kqueue_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
 109 extern int      kqueue_stat(struct fileproc *fp, struct stat *st, struct proc *p);
 110
 111 static struct fileops kqueueops = {
 112         kqueue_read,
 113         kqueue_write,
 114         kqueue_ioctl,
 115         kqueue_select,
 116         kqueue_close,
 117         kqueue_kqfilter,
 118         0
 119 };
 120
 121 static int kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p);
 122 static int kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p);
 123
 124 static int      kevent_callback(struct kqueue *kq, struct kevent *kevp, void *data);
 125 static void     kevent_continue(struct kqueue *kq, void *data, int error);
 126 static void     kevent_scan_continue(void *contp, wait_result_t wait_result);
 127 static int      kevent_process(struct kqueue *kq, kevent_callback_t callback,
 128                                void *data, int *countp, struct proc *p);
 129 static void     knote_put(struct knote *kn);
 130 static int      knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p);
 131 static void     knote_drop(struct knote *kn, struct proc *p);
 132 static void     knote_activate(struct knote *kn);
 133 static void     knote_deactivate(struct knote *kn);
 134 static void     knote_enqueue(struct knote *kn);
 135 static void     knote_dequeue(struct knote *kn);
 136 static struct   knote *knote_alloc(void);
 137 static void     knote_free(struct knote *kn);
 138 extern void     knote_init(void);
 139
 140 static int      filt_fileattach(struct knote *kn);
 141 static struct filterops file_filtops =
 142         { 1, filt_fileattach, NULL, NULL };
 143
 144 static void     filt_kqdetach(struct knote *kn);
 145 static int      filt_kqueue(struct knote *kn, long hint);
 146 static struct filterops kqread_filtops =
 147         { 1, NULL, filt_kqdetach, filt_kqueue };
 148
 149 /*
 150  * placeholder for not-yet-implemented filters
 151  */
 152 static int      filt_badattach(struct knote *kn);
 153 static struct filterops bad_filtops =
 154         { 0, filt_badattach, 0 , 0 };
 155
 156 static int      filt_procattach(struct knote *kn);
 157 static void     filt_procdetach(struct knote *kn);
 158 static int      filt_proc(struct knote *kn, long hint);
 159
 160 static struct filterops proc_filtops =
 161         { 0, filt_procattach, filt_procdetach, filt_proc };
 162
 163 extern struct filterops fs_filtops;
 164
 165 extern struct filterops sig_filtops;
 166
 167
 168 /* Timer filter */
 169 static int      filt_timercompute(struct knote *kn, uint64_t *abs_time);
 170 static void     filt_timerexpire(void *knx, void *param1);
 171 static int      filt_timerattach(struct knote *kn);
 172 static void     filt_timerdetach(struct knote *kn);
 173 static int      filt_timer(struct knote *kn, long hint);
 174
 175 static struct filterops timer_filtops =
 176         { 0, filt_timerattach, filt_timerdetach, filt_timer };
 177
 178 /* to avoid arming timers that fire quicker than we can handle */
 179 static uint64_t filt_timerfloor = 0;
 180
 181 static lck_mtx_t _filt_timerlock;
 182 static void     filt_timerlock(void);
 183 static void     filt_timerunlock(void);
 184
 185 /*
 186  * Sentinel marker for a thread scanning through the list of
 187  * active knotes.
 188  */
 189 static struct filterops threadmarker_filtops =
 190         { 0, filt_badattach, 0, 0 };
 191
 192 static zone_t   knote_zone;
 193
 194 #define KN_HASHSIZE             64              /* XXX should be tunable */
 195 #define KN_HASH(val, mask)      (((val) ^ (val >> 8)) & (mask))
 196
 197 #if 0
 198 extern struct filterops aio_filtops;
 199 #endif
 200
 201 /*
 202  * Table for for all system-defined filters.
 203  */
 204 static struct filterops *sysfilt_ops[] = {
 205         &file_filtops,                  /* EVFILT_READ */
 206         &file_filtops,                  /* EVFILT_WRITE */
 207 #if 0
 208         &aio_filtops,                   /* EVFILT_AIO */
 209 #else
 210         &bad_filtops,                   /* EVFILT_AIO */
 211 #endif
 212         &file_filtops,                  /* EVFILT_VNODE */
 213         &proc_filtops,                  /* EVFILT_PROC */
 214         &sig_filtops,                   /* EVFILT_SIGNAL */
 215         &timer_filtops,                 /* EVFILT_TIMER */
 216         &bad_filtops,                   /* EVFILT_MACHPORT */
 217         &fs_filtops                     /* EVFILT_FS */
 218 };
 219
 220 /*
 221  * kqueue/note lock attributes and implementations
 222  *
 223  *      kqueues have locks, while knotes have use counts
 224  *      Most of the knote state is guarded by the object lock.
 225  *      the knote "inuse" count and status use the kqueue lock.
 226  */
 227 lck_grp_attr_t * kq_lck_grp_attr;
 228 lck_grp_t * kq_lck_grp;
 229 lck_attr_t * kq_lck_attr;
 230
 231 static inline void
 232 kqlock(struct kqueue *kq)
 233 {
 234         lck_spin_lock(&kq->kq_lock);
 235 }
 236
 237 static inline void
 238 kqunlock(struct kqueue *kq)
 239 {
 240         lck_spin_unlock(&kq->kq_lock);
 241 }
 242
 243 /*
 244  * Convert a kq lock to a knote use referece.
 245  *
 246  *      If the knote is being dropped, we can't get
 247  *      a use reference, so just return with it
 248  *      still locked.
 249  *
 250  *      - kq locked at entry
 251  *      - unlock on exit if we get the use reference
 252  */
 253 static int
 254 kqlock2knoteuse(struct kqueue *kq, struct knote *kn)
 255 {
 256         if (kn->kn_status & KN_DROPPING)
 257                 return 0;
 258         kn->kn_inuse++;
 259         kqunlock(kq);
 260         return 1;
 261  }
 262
 263 /*
 264  * Convert a kq lock to a knote use referece.
 265  *
 266  *      If the knote is being dropped, we can't get
 267  *      a use reference, so just return with it
 268  *      still locked.
 269  *
 270  *      - kq locked at entry
 271  *      - kq always unlocked on exit
 272  */
 273 static int
 274 kqlock2knoteusewait(struct kqueue *kq, struct knote *kn)
 275 {
 276         if (!kqlock2knoteuse(kq, kn)) {
 277                 kn->kn_status |= KN_DROPWAIT;
 278                 assert_wait(&kn->kn_status, THREAD_UNINT);
 279                 kqunlock(kq);
 280                 thread_block(THREAD_CONTINUE_NULL);
 281                 return 0;
 282         }
 283         return 1;
 284  }
 285
 286 /*
 287  * Convert from a knote use reference back to kq lock.
 288  *
 289  *      Drop a use reference and wake any waiters if
 290  *      this is the last one.
 291  *
 292  *      The exit return indicates if the knote is
 293  *      still alive - but the kqueue lock is taken
 294  *      unconditionally.
 295  */
 296 static int
 297 knoteuse2kqlock(struct kqueue *kq, struct knote *kn)
 298 {
 299         kqlock(kq);
 300         if ((--kn->kn_inuse == 0) &&
 301             (kn->kn_status & KN_USEWAIT)) {
 302                 kn->kn_status &= ~KN_USEWAIT;
 303                 thread_wakeup(&kn->kn_inuse);
 304         }
 305         return ((kn->kn_status & KN_DROPPING) == 0);
 306  }
 307
 308 /*
 309  * Convert a kq lock to a knote drop referece.
 310  *
 311  *      If the knote is in use, wait for the use count
 312  *      to subside.  We first mark our intention to drop
 313  *      it - keeping other users from "piling on."
 314  *      If we are too late, we have to wait for the
 315  *      other drop to complete.
 316  *
 317  *      - kq locked at entry
 318  *      - always unlocked on exit.
 319  *      - caller can't hold any locks that would prevent
 320  *        the other dropper from completing.
 321  */
 322 static int
 323 kqlock2knotedrop(struct kqueue *kq, struct knote *kn)
 324 {
 325
 326         if ((kn->kn_status & KN_DROPPING) == 0) {
 327                 kn->kn_status |= KN_DROPPING;
 328                 if (kn->kn_inuse > 0) {
 329                         kn->kn_status |= KN_USEWAIT;
 330                         assert_wait(&kn->kn_inuse, THREAD_UNINT);
 331                         kqunlock(kq);
 332                         thread_block(THREAD_CONTINUE_NULL);
 333                 } else
 334                         kqunlock(kq);
 335                 return 1;
 336         } else {
 337                 kn->kn_status |= KN_DROPWAIT;
 338                 assert_wait(&kn->kn_status, THREAD_UNINT);
 339                 kqunlock(kq);
 340                 thread_block(THREAD_CONTINUE_NULL);
 341                 return 0;
 342         }
 343 }
 344
 345 /*
 346  * Release a knote use count reference.
 347  */
 348 static void
 349 knote_put(struct knote *kn)
 350 {
 351         struct kqueue *kq = kn->kn_kq;
 352
 353         kqlock(kq);
 354         if ((--kn->kn_inuse == 0) &&
 355             (kn->kn_status & KN_USEWAIT)) {
 356                 kn->kn_status &= ~KN_USEWAIT;
 357                 thread_wakeup(&kn->kn_inuse);
 358         }
 359         kqunlock(kq);
 360  }
 361
 362
 363
 364 static int
 365 filt_fileattach(struct knote *kn)
 366 {
 367
 368         return (fo_kqfilter(kn->kn_fp, kn, current_proc()));
 369 }
 370
 371 #define f_flag f_fglob->fg_flag
 372 #define f_type f_fglob->fg_type
 373 #define f_msgcount f_fglob->fg_msgcount
 374 #define f_cred f_fglob->fg_cred
 375 #define f_ops f_fglob->fg_ops
 376 #define f_offset f_fglob->fg_offset
 377 #define f_data f_fglob->fg_data
 378
 379 static void
 380 filt_kqdetach(struct knote *kn)
 381 {
 382         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
 383
 384         kqlock(kq);
 385         KNOTE_DETACH(&kq->kq_sel.si_note, kn);
 386         kqunlock(kq);
 387 }
 388
 389 /*ARGSUSED*/
 390 static int
 391 filt_kqueue(struct knote *kn, __unused long hint)
 392 {
 393         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
 394
 395         kn->kn_data = kq->kq_count;
 396         return (kn->kn_data > 0);
 397 }
 398
 399 static int
 400 filt_procattach(struct knote *kn)
 401 {
 402         struct proc *p;
 403         int funnel_state;
 404
 405         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 406
 407         if ((kn->kn_sfflags & (NOTE_TRACK | NOTE_TRACKERR | NOTE_CHILD)) != 0) {
 408                 thread_funnel_set(kernel_flock, funnel_state);
 409                 return (ENOTSUP);
 410         }
 411
 412         p = pfind(kn->kn_id);
 413         if (p == NULL) {
 414                 thread_funnel_set(kernel_flock, funnel_state);
 415                 return (ESRCH);
 416         }
 417
 418         kn->kn_flags |= EV_CLEAR;               /* automatically set */
 419
 420         /* XXX lock the proc here while adding to the list? */
 421         KNOTE_ATTACH(&p->p_klist, kn);
 422
 423         thread_funnel_set(kernel_flock, funnel_state);
 424
 425         return (0);
 426 }
 427
 428 /*
 429  * The knote may be attached to a different process, which may exit,
 430  * leaving nothing for the knote to be attached to.  So when the process
 431  * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
 432  * it will be deleted when read out.  However, as part of the knote deletion,
 433  * this routine is called, so a check is needed to avoid actually performing
 434  * a detach, because the original process does not exist any more.
 435  */
 436 static void
 437 filt_procdetach(struct knote *kn)
 438 {
 439         struct proc *p;
 440         int funnel_state;
 441
 442         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 443         p = pfind(kn->kn_id);
 444
 445         if (p != (struct proc *)NULL)
 446                 KNOTE_DETACH(&p->p_klist, kn);
 447
 448         thread_funnel_set(kernel_flock, funnel_state);
 449 }
 450
 451 static int
 452 filt_proc(struct knote *kn, long hint)
 453 {
 454         u_int event;
 455         int funnel_state;
 456
 457         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 458
 459         /*
 460          * mask off extra data
 461          */
 462         event = (u_int)hint & NOTE_PCTRLMASK;
 463
 464         /*
 465          * if the user is interested in this event, record it.
 466          */
 467         if (kn->kn_sfflags & event)
 468                 kn->kn_fflags |= event;
 469
 470         /*
 471          * process is gone, so flag the event as finished.
 472          */
 473         if (event == NOTE_EXIT) {
 474                 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 475                 thread_funnel_set(kernel_flock, funnel_state);
 476                 return (1);
 477         }
 478
 479         event = kn->kn_fflags;
 480         thread_funnel_set(kernel_flock, funnel_state);
 481
 482         return (event != 0);
 483 }
 484
 485 /*
 486  * filt_timercompute - compute absolute timeout
 487  *
 488  *      The saved-data field in the knote contains the
 489  *      time value.  The saved filter-flags indicates
 490  *      the unit of measurement.
 491  *
 492  *      If the timeout is not absolute, adjust it for
 493  *      the current time.
 494  */
 495 static int
 496 filt_timercompute(struct knote *kn, uint64_t *abs_time)
 497 {
 498         uint64_t multiplier;
 499         uint64_t raw;
 500
 501         switch (kn->kn_sfflags & (NOTE_SECONDS|NOTE_USECONDS|NOTE_NSECONDS)) {
 502         case NOTE_SECONDS:
 503                 multiplier = NSEC_PER_SEC;
 504                 break;
 505         case NOTE_USECONDS:
 506                 multiplier = NSEC_PER_USEC;
 507                 break;
 508         case NOTE_NSECONDS:
 509                 multiplier = 1;
 510                 break;
 511         case 0: /* milliseconds (default) */
 512                 multiplier = NSEC_PER_SEC / 1000;
 513                 break;
 514         default:
 515                 return EINVAL;
 516         }
 517         nanoseconds_to_absolutetime((uint64_t)kn->kn_sdata * multiplier, &raw);
 518         if (raw <= filt_timerfloor) {
 519                 *abs_time = 0;
 520                 return 0;
 521         }
 522         if ((kn->kn_sfflags & NOTE_ABSOLUTE) == NOTE_ABSOLUTE) {
 523                 uint32_t seconds, nanoseconds;
 524                 uint64_t now;
 525
 526                 clock_get_calendar_nanotime(&seconds, &nanoseconds);
 527                 nanoseconds_to_absolutetime((uint64_t)seconds * NSEC_PER_SEC + nanoseconds,
 528                                             &now);
 529                 if (now >= raw + filt_timerfloor) {
 530                         *abs_time = 0;
 531                         return 0;
 532                 }
 533                 raw -= now;
 534         }
 535         clock_absolutetime_interval_to_deadline(raw, abs_time);
 536         return 0;
 537 }
 538
 539 /*
 540  * filt_timerexpire - the timer callout routine
 541  *
 542  *      Just propagate the timer event into the knote
 543  *      filter routine (by going through the knote
 544  *      synchronization point).  Pass a hint to
 545  *      indicate this is a real event, not just a
 546  *      query from above.
 547  */
 548 static void
 549 filt_timerexpire(void *knx, __unused void *spare)
 550 {
 551         struct klist timer_list;
 552         struct knote *kn = knx;
 553
 554         /* no "object" for timers, so fake a list */
 555         SLIST_INIT(&timer_list);
 556         SLIST_INSERT_HEAD(&timer_list, kn, kn_selnext);
 557         KNOTE(&timer_list, 1);
 558 }
 559
 560 /*
 561  * data contains amount of time to sleep, in milliseconds,
 562  * or a pointer to a timespec structure.
 563  */
 564 static int
 565 filt_timerattach(struct knote *kn)
 566 {
 567         thread_call_t callout;
 568         uint64_t deadline;
 569         int error;
 570
 571         error = filt_timercompute(kn, &deadline);
 572         if (error)
 573                 return (error);
 574
 575         if (deadline) {
 576                 callout = thread_call_allocate(filt_timerexpire, kn);
 577                 if (NULL == callout)
 578                         return (ENOMEM);
 579         } else {
 580                 /* handle as immediate */
 581                 kn->kn_sdata = 0;
 582                 callout = NULL;
 583         }
 584
 585         filt_timerlock();
 586         kn->kn_hook = (caddr_t)callout;
 587
 588         /* absolute=EV_ONESHOT */
 589         if (kn->kn_sfflags & NOTE_ABSOLUTE)
 590                 kn->kn_flags |= EV_ONESHOT;
 591
 592         if (deadline) {
 593                 /* all others - if not faking immediate */
 594                 kn->kn_flags |= EV_CLEAR;
 595                 thread_call_enter_delayed(callout, deadline);
 596                 kn->kn_hookid = 0;
 597         } else {
 598                 /* fake immediate */
 599                 kn->kn_hookid = 1;
 600         }
 601         filt_timerunlock();
 602         return (0);
 603 }
 604
 605 static void
 606 filt_timerdetach(struct knote *kn)
 607 {
 608         thread_call_t callout;
 609
 610         filt_timerlock();
 611         callout = (thread_call_t)kn->kn_hook;
 612         if (callout != NULL) {
 613                 boolean_t cancelled;
 614
 615                 /* cancel the callout if we can */
 616                 cancelled = thread_call_cancel(callout);
 617                 if (cancelled) {
 618                         /* got it, just free it */
 619                         kn->kn_hook = NULL;
 620                         filt_timerunlock();
 621                         thread_call_free(callout);
 622                         return;
 623                 }
 624                 /* we have to wait for the expire routine.  */
 625                 kn->kn_hookid = -1;     /* we are detaching */
 626                 assert_wait(&kn->kn_hook, THREAD_UNINT);
 627                 filt_timerunlock();
 628                 thread_block(THREAD_CONTINUE_NULL);
 629                 assert(kn->kn_hook == NULL);
 630                 return;
 631         }
 632         /* nothing to do */
 633         filt_timerunlock();
 634 }
 635
 636
 637
 638 static int
 639 filt_timer(struct knote *kn, __unused long hint)
 640 {
 641         int result;
 642
 643         if (hint) {
 644                 /* real timer pop */
 645                 thread_call_t callout;
 646                 boolean_t detaching;
 647
 648                 filt_timerlock();
 649
 650                 kn->kn_data++;
 651
 652                 detaching = (kn->kn_hookid < 0);
 653                 callout = (thread_call_t)kn->kn_hook;
 654
 655                 if (!detaching && (kn->kn_flags & EV_ONESHOT) == 0) {
 656                         uint64_t deadline;
 657                         int error;
 658
 659                         /* user input data may have changed - deal */
 660                         error = filt_timercompute(kn, &deadline);
 661                         if (error) {
 662                                 kn->kn_flags |= EV_ERROR;
 663                                 kn->kn_data = error;
 664                         } else if (deadline == 0) {
 665                                 /* revert to fake immediate */
 666                                 kn->kn_flags &= ~EV_CLEAR;
 667                                 kn->kn_sdata = 0;
 668                                 kn->kn_hookid = 1;
 669                         } else {
 670                                 /* keep the callout and re-arm */
 671                                 thread_call_enter_delayed(callout, deadline);
 672                                 filt_timerunlock();
 673                                 return 1;
 674                         }
 675                 }
 676                 kn->kn_hook = NULL;
 677                 filt_timerunlock();
 678                 thread_call_free(callout);
 679
 680                 /* if someone is waiting for timer to pop */
 681                 if (detaching)
 682                         thread_wakeup(&kn->kn_hook);
 683
 684                 return 1;
 685         }
 686
 687         /* user-query */
 688         filt_timerlock();
 689
 690         /* change fake timer to real if needed */
 691         while (kn->kn_hookid > 0 && kn->kn_sdata > 0) {
 692                 int error;
 693
 694                 /* update the fake timer (make real) */
 695                 kn->kn_hookid = 0;
 696                 kn->kn_data = 0;
 697                 filt_timerunlock();
 698                 error = filt_timerattach(kn);
 699                 filt_timerlock();
 700                 if (error) {
 701                         kn->kn_flags |= EV_ERROR;
 702                         kn->kn_data = error;
 703                         filt_timerunlock();
 704                         return 1;
 705                 }
 706         }
 707
 708         /* if still fake, pretend it fired */
 709         if (kn->kn_hookid > 0)
 710                 kn->kn_data = 1;
 711
 712         result = (kn->kn_data != 0);
 713         filt_timerunlock();
 714         return result;
 715 }
 716
 717 static void
 718 filt_timerlock(void)
 719 {
 720         lck_mtx_lock(&_filt_timerlock);
 721 }
 722
 723 static void
 724 filt_timerunlock(void)
 725 {
 726         lck_mtx_unlock(&_filt_timerlock);
 727 }
 728
 729 /*
 730  * JMM - placeholder for not-yet-implemented filters
 731  */
 732 static int
 733 filt_badattach(__unused struct knote *kn)
 734 {
 735         return(ENOTSUP);
 736 }
 737
 738
 739 struct kqueue *
 740 kqueue_alloc(struct proc *p)
 741 {
 742         struct filedesc *fdp = p->p_fd;
 743         struct kqueue *kq;
 744
 745         MALLOC_ZONE(kq, struct kqueue *, sizeof(struct kqueue), M_KQUEUE, M_WAITOK);
 746         if (kq != NULL) {
 747                 bzero(kq, sizeof(struct kqueue));
 748                 lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr);
 749                 TAILQ_INIT(&kq->kq_head);
 750                 TAILQ_INIT(&kq->kq_inprocess);
 751                 kq->kq_fdp = fdp;
 752         }
 753
 754         if (fdp->fd_knlistsize < 0) {
 755                 proc_fdlock(p);
 756                 if (fdp->fd_knlistsize < 0)
 757                         fdp->fd_knlistsize = 0;         /* this process has had a kq */
 758                 proc_fdunlock(p);
 759         }
 760
 761         return kq;
 762 }
 763
 764
 765 /*
 766  * kqueue_dealloc - detach all knotes from a kqueue and free it
 767  *
 768  *      We walk each list looking for knotes referencing this
 769  *      this kqueue.  If we find one, we try to drop it.  But
 770  *      if we fail to get a drop reference, that will wait
 771  *      until it is dropped.  So, we can just restart again
 772  *      safe in the assumption that the list will eventually
 773  *      not contain any more references to this kqueue (either
 774  *      we dropped them all, or someone else did).
 775  *
 776  *      Assumes no new events are being added to the kqueue.
 777  *      Nothing locked on entry or exit.
 778  */
 779 void
 780 kqueue_dealloc(struct kqueue *kq, struct proc *p)
 781 {
 782         struct filedesc *fdp = p->p_fd;
 783         struct knote *kn;
 784         int i;
 785
 786         proc_fdlock(p);
 787         for (i = 0; i < fdp->fd_knlistsize; i++) {
 788                 kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 789                 while (kn != NULL) {
 790                         if (kq == kn->kn_kq) {
 791                                 kqlock(kq);
 792                                 proc_fdunlock(p);
 793                                 /* drop it ourselves or wait */
 794                                 if (kqlock2knotedrop(kq, kn)) {
 795                                         kn->kn_fop->f_detach(kn);
 796                                         knote_drop(kn, p);
 797                                 }
 798                                 proc_fdlock(p);
 799                                 /* start over at beginning of list */
 800                                 kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 801                                 continue;
 802                         }
 803                         kn = SLIST_NEXT(kn, kn_link);
 804                 }
 805         }
 806         if (fdp->fd_knhashmask != 0) {
 807                 for (i = 0; i < (int)fdp->fd_knhashmask + 1; i++) {
 808                         kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 809                         while (kn != NULL) {
 810                                 if (kq == kn->kn_kq) {
 811                                         kqlock(kq);
 812                                         proc_fdunlock(p);
 813                                         /* drop it ourselves or wait */
 814                                         if (kqlock2knotedrop(kq, kn)) {
 815                                                 kn->kn_fop->f_detach(kn);
 816                                                 knote_drop(kn, p);
 817                                         }
 818                                         proc_fdlock(p);
 819                                         /* start over at beginning of list */
 820                                         kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 821                                         continue;
 822                                 }
 823                                 kn = SLIST_NEXT(kn, kn_link);
 824                         }
 825                 }
 826         }
 827         proc_fdunlock(p);
 828         lck_spin_destroy(&kq->kq_lock, kq_lck_grp);
 829         FREE_ZONE(kq, sizeof(struct kqueue), M_KQUEUE);
 830 }
 831
 832 int
 833 kqueue(struct proc *p, __unused struct kqueue_args *uap, register_t *retval)
 834 {
 835         struct kqueue *kq;
 836         struct fileproc *fp;
 837         int fd, error;
 838
 839         error = falloc(p, &fp, &fd);
 840         if (error) {
 841                 return (error);
 842         }
 843
 844         kq = kqueue_alloc(p);
 845         if (kq == NULL) {
 846                 fp_free(p, fd, fp);
 847                 return (ENOMEM);
 848         }
 849
 850         fp->f_flag = FREAD | FWRITE;
 851         fp->f_type = DTYPE_KQUEUE;
 852         fp->f_ops = &kqueueops;
 853         fp->f_data = (caddr_t)kq;
 854
 855         proc_fdlock(p);
 856         procfdtbl_releasefd(p, fd, NULL);
 857         fp_drop(p, fd, fp, 1);
 858         proc_fdunlock(p);
 859
 860         *retval = fd;
 861         return (error);
 862 }
 863
 864 int
 865 kqueue_portset_np(__unused struct proc *p,
 866                                   __unused struct kqueue_portset_np_args *uap,
 867                                   __unused register_t *retval)
 868 {
 869                 /* JMM - Placeholder for now */
 870                 return (ENOTSUP);
 871 }
 872
 873 int
 874 kqueue_from_portset_np(__unused struct proc *p,
 875                                            __unused struct kqueue_from_portset_np_args *uap,
 876                                            __unused register_t *retval)
 877 {
 878                 /* JMM - Placeholder for now */
 879                 return (ENOTSUP);
 880 }
 881
 882 static int
 883 kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p)
 884 {
 885         int advance;
 886         int error;
 887
 888         if (IS_64BIT_PROCESS(p)) {
 889                 struct user_kevent kev64;
 890
 891                 advance = sizeof(kev64);
 892                 error = copyin(*addrp, (caddr_t)&kev64, advance);
 893                 if (error)
 894                         return error;
 895                 kevp->ident = CAST_DOWN(uintptr_t, kev64.ident);
 896                 kevp->filter = kev64.filter;
 897                 kevp->flags = kev64.flags;
 898                 kevp->fflags = kev64.fflags;
 899                 kevp->data = CAST_DOWN(intptr_t, kev64.data);
 900                 kevp->udata = kev64.udata;
 901         } else {
 902                 /*
 903                  * compensate for legacy in-kernel kevent layout
 904                  * where the udata field is alredy 64-bit.
 905                  */
 906                 advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t);
 907                 error = copyin(*addrp, (caddr_t)kevp, advance);
 908         }
 909         if (!error)
 910                 *addrp += advance;
 911         return error;
 912 }
 913
 914 static int
 915 kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p)
 916 {
 917         int advance;
 918         int error;
 919
 920         if (IS_64BIT_PROCESS(p)) {
 921                 struct user_kevent kev64;
 922
 923                 kev64.ident = (uint64_t) kevp->ident;
 924                 kev64.filter = kevp->filter;
 925                 kev64.flags = kevp->flags;
 926                 kev64.fflags = kevp->fflags;
 927                 kev64.data = (int64_t) kevp->data;
 928                 kev64.udata = kevp->udata;
 929                 advance = sizeof(kev64);
 930                 error = copyout((caddr_t)&kev64, *addrp, advance);
 931         } else {
 932                 /*
 933                  * compensate for legacy in-kernel kevent layout
 934                  * where the udata field is alredy 64-bit.
 935                  */
 936                 advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t);
 937                 error = copyout((caddr_t)kevp, *addrp, advance);
 938         }
 939         if (!error)
 940                 *addrp += advance;
 941         return error;
 942 }
 943
 944 /*
 945  * kevent_continue - continue a kevent syscall after blocking
 946  *
 947  *      assume we inherit a use count on the kq fileglob.
 948  */
 949
 950 static void
 951 kevent_continue(__unused struct kqueue *kq, void *data, int error)
 952 {
 953         struct _kevent *cont_args;
 954         struct fileproc *fp;
 955         register_t *retval;
 956         int noutputs;
 957         int fd;
 958         struct proc *p = current_proc();
 959
 960         cont_args = (struct _kevent *)data;
 961         noutputs = cont_args->eventout;
 962         retval = cont_args->retval;
 963         fd = cont_args->fd;
 964         fp = cont_args->fp;
 965
 966         fp_drop(p, fd, fp, 0);
 967
 968         /* don't restart after signals... */
 969         if (error == ERESTART)
 970                 error = EINTR;
 971         else if (error == EWOULDBLOCK)
 972                 error = 0;
 973         if (error == 0)
 974                 *retval = noutputs;
 975         unix_syscall_return(error);
 976 }
 977
 978 /*
 979  * kevent - [syscall] register and wait for kernel events
 980  *
 981  */
 982
 983 int
 984 kevent(struct proc *p, struct kevent_args *uap, register_t *retval)
 985 {
 986         user_addr_t changelist = uap->changelist;
 987         user_addr_t ueventlist = uap->eventlist;
 988         int nchanges = uap->nchanges;
 989         int nevents = uap->nevents;
 990         int fd = uap->fd;
 991
 992         struct _kevent *cont_args;
 993         uthread_t ut;
 994         struct kqueue *kq;
 995         struct fileproc *fp;
 996         struct kevent kev;
 997         int error, noutputs;
 998         struct timeval atv;
 999
1000         /* convert timeout to absolute - if we have one */
1001         if (uap->timeout != USER_ADDR_NULL) {
1002                 struct timeval rtv;
1003                 if ( IS_64BIT_PROCESS(p) ) {
1004                         struct user_timespec ts;
1005                         error = copyin( uap->timeout, &ts, sizeof(ts) );
1006                         if ((ts.tv_sec & 0xFFFFFFFF00000000ull) != 0)
1007                                 error = EINVAL;
1008                         else
1009                                 TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1010                 } else {
1011                         struct timespec ts;
1012                         error = copyin( uap->timeout, &ts, sizeof(ts) );
1013                         TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1014                 }
1015                 if (error)
1016                         return error;
1017                 if (itimerfix(&rtv))
1018                         return EINVAL;
1019                 getmicrouptime(&atv);
1020                 timevaladd(&atv, &rtv);
1021         } else {
1022                 atv.tv_sec = 0;
1023                 atv.tv_usec = 0;
1024         }
1025
1026         /* get a usecount for the kq itself */
1027         if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0)
1028                 return(error);
1029
1030         /* register all the change requests the user provided... */
1031         noutputs = 0;
1032         while (nchanges > 0 && error == 0) {
1033                 error = kevent_copyin(&changelist, &kev, p);
1034                 if (error)
1035                         break;
1036
1037                 kev.flags &= ~EV_SYSFLAGS;
1038                 error = kevent_register(kq, &kev, p);
1039                 if (error && nevents > 0) {
1040                         kev.flags = EV_ERROR;
1041                         kev.data = error;
1042                         error = kevent_copyout(&kev, &ueventlist, p);
1043                         if (error == 0) {
1044                                 nevents--;
1045                                 noutputs++;
1046                         }
1047                 }
1048                 nchanges--;
1049         }
1050
1051         /* store the continuation/completion data in the uthread */
1052         ut = (uthread_t)get_bsdthread_info(current_thread());
1053         cont_args = (struct _kevent *)&ut->uu_state.ss_kevent;
1054         cont_args->fp = fp;
1055         cont_args->fd = fd;
1056         cont_args->retval = retval;
1057         cont_args->eventlist = ueventlist;
1058         cont_args->eventcount = nevents;
1059         cont_args->eventout = noutputs;
1060
1061         if (nevents > 0 && noutputs == 0 && error == 0)
1062                 error = kevent_scan(kq, kevent_callback,
1063                                     kevent_continue, cont_args,
1064                                     &atv, p);
1065         kevent_continue(kq, cont_args, error);
1066         /* NOTREACHED */
1067         return error;
1068 }
1069
1070
1071 /*
1072  * kevent_callback - callback for each individual event
1073  *
1074  *      called with nothing locked
1075  *      caller holds a reference on the kqueue
1076  */
1077
1078 static int
1079 kevent_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data)
1080 {
1081         struct _kevent *cont_args;
1082         int error;
1083
1084         cont_args = (struct _kevent *)data;
1085         assert(cont_args->eventout < cont_arg->eventcount);
1086
1087         /*
1088          * Copy out the appropriate amount of event data for this user.
1089          */
1090         error = kevent_copyout(kevp, &cont_args->eventlist, current_proc());
1091
1092         /*
1093          * If there isn't space for additional events, return
1094          * a harmless error to stop the processing here
1095          */
1096         if (error == 0 && ++cont_args->eventout == cont_args->eventcount)
1097                         error = EWOULDBLOCK;
1098         return error;
1099 }
1100
1101 /*
1102  * kevent_register - add a new event to a kqueue
1103  *
1104  *      Creates a mapping between the event source and
1105  *      the kqueue via a knote data structure.
1106  *
1107  *      Because many/most the event sources are file
1108  *      descriptor related, the knote is linked off
1109  *      the filedescriptor table for quick access.
1110  *
1111  *      called with nothing locked
1112  *      caller holds a reference on the kqueue
1113  */
1114
1115 int
1116 kevent_register(struct kqueue *kq, struct kevent *kev, struct proc *p)
1117 {
1118         struct filedesc *fdp = kq->kq_fdp;
1119         struct filterops *fops;
1120         struct fileproc *fp = NULL;
1121         struct knote *kn = NULL;
1122         int error = 0;
1123
1124         if (kev->filter < 0) {
1125                 if (kev->filter + EVFILT_SYSCOUNT < 0)
1126                         return (EINVAL);
1127                 fops = sysfilt_ops[~kev->filter];       /* to 0-base index */
1128         } else {
1129                 /*
1130                  * XXX
1131                  * filter attach routine is responsible for insuring that
1132                  * the identifier can be attached to it.
1133                  */
1134                 printf("unknown filter: %d\n", kev->filter);
1135                 return (EINVAL);
1136         }
1137
1138         /* this iocount needs to be dropped if it is not registered */
1139         if (fops->f_isfd && (error = fp_lookup(p, kev->ident, &fp, 0)) != 0)
1140                 return(error);
1141
1142  restart:
1143         proc_fdlock(p);
1144         if (fops->f_isfd) {
1145                 /* fd-based knotes are linked off the fd table */
1146                 if (kev->ident < (u_int)fdp->fd_knlistsize) {
1147                         SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
1148                                 if (kq == kn->kn_kq &&
1149                                     kev->filter == kn->kn_filter)
1150                                         break;
1151                 }
1152         } else {
1153                 /* hash non-fd knotes here too */
1154                 if (fdp->fd_knhashmask != 0) {
1155                         struct klist *list;
1156
1157                         list = &fdp->fd_knhash[
1158                             KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
1159                         SLIST_FOREACH(kn, list, kn_link)
1160                                 if (kev->ident == kn->kn_id &&
1161                                     kq == kn->kn_kq &&
1162                                     kev->filter == kn->kn_filter)
1163                                         break;
1164                 }
1165         }
1166
1167         /*
1168          * kn now contains the matching knote, or NULL if no match
1169          */
1170         if (kn == NULL) {
1171                 if ((kev->flags & (EV_ADD|EV_DELETE)) == EV_ADD) {
1172                         kn = knote_alloc();
1173                         if (kn == NULL) {
1174                                 proc_fdunlock(p);
1175                                 error = ENOMEM;
1176                                 goto done;
1177                         }
1178                         kn->kn_fp = fp;
1179                         kn->kn_kq = kq;
1180                         kn->kn_tq = &kq->kq_head;
1181                         kn->kn_fop = fops;
1182                         kn->kn_sfflags = kev->fflags;
1183                         kn->kn_sdata = kev->data;
1184                         kev->fflags = 0;
1185                         kev->data = 0;
1186                         kn->kn_kevent = *kev;
1187                         kn->kn_inuse = 1;  /* for f_attach() */
1188                         kn->kn_status = 0;
1189
1190                         /* before anyone can find it */
1191                         if (kev->flags & EV_DISABLE)
1192                                 kn->kn_status |= KN_DISABLED;
1193
1194                         error = knote_fdpattach(kn, fdp, p);
1195                         proc_fdunlock(p);
1196
1197                         if (error) {
1198                                 knote_free(kn);
1199                                 goto done;
1200                         }
1201
1202                         /*
1203                          * apply reference count to knote structure, and
1204                          * do not release it at the end of this routine.
1205                          */
1206                         fp = NULL;
1207
1208                         /*
1209                          * If the attach fails here, we can drop it knowing
1210                          * that nobody else has a reference to the knote.
1211                          */
1212                         if ((error = fops->f_attach(kn)) != 0) {
1213                                 knote_drop(kn, p);
1214                                 goto done;
1215                         }
1216                 } else {
1217                         proc_fdunlock(p);
1218                         error = ENOENT;
1219                         goto done;
1220                 }
1221         } else {
1222                 /* existing knote - get kqueue lock */
1223                 kqlock(kq);
1224                 proc_fdunlock(p);
1225
1226                 if (kev->flags & EV_DELETE) {
1227                         knote_dequeue(kn);
1228                         kn->kn_status |= KN_DISABLED;
1229                         if (kqlock2knotedrop(kq, kn)) {
1230                                 kn->kn_fop->f_detach(kn);
1231                                 knote_drop(kn, p);
1232                         }
1233                         goto done;
1234                 }
1235
1236                 /* update status flags for existing knote */
1237                 if (kev->flags & EV_DISABLE) {
1238                         knote_dequeue(kn);
1239                         kn->kn_status |= KN_DISABLED;
1240                 } else if (kev->flags & EV_ENABLE) {
1241                         kn->kn_status &= ~KN_DISABLED;
1242                         if (kn->kn_status & KN_ACTIVE)
1243                                 knote_enqueue(kn);
1244                 }
1245
1246                 /*
1247                  * If somebody is in the middle of dropping this
1248                  * knote - go find/insert a new one.  But we have
1249                  * wait for this one to go away first.
1250                  */
1251                 if (!kqlock2knoteusewait(kq, kn))
1252                         /* kqueue unlocked */
1253                         goto restart;
1254
1255                 /*
1256                  * The user may change some filter values after the
1257                  * initial EV_ADD, but doing so will not reset any
1258                  * filter which have already been triggered.
1259                  */
1260                 kn->kn_sfflags = kev->fflags;
1261                 kn->kn_sdata = kev->data;
1262                 kn->kn_kevent.udata = kev->udata;
1263         }
1264
1265         /* still have use ref on knote */
1266         if (kn->kn_fop->f_event(kn, 0)) {
1267                 if (knoteuse2kqlock(kq, kn))
1268                         knote_activate(kn);
1269                 kqunlock(kq);
1270         } else {
1271                 knote_put(kn);
1272         }
1273
1274 done:
1275         if (fp != NULL)
1276                 fp_drop(p, kev->ident, fp, 0);
1277         return (error);
1278 }
1279
1280 /*
1281  * kevent_process - process the triggered events in a kqueue
1282  *
1283  *      Walk the queued knotes and validate that they are
1284  *      really still triggered events by calling the filter
1285  *      routines (if necessary).  Hold a use reference on
1286  *      the knote to avoid it being detached. For each event
1287  *      that is still considered triggered, invoke the
1288  *      callback routine provided.
1289  *
1290  *      caller holds a reference on the kqueue.
1291  *      kqueue locked on entry and exit - but may be dropped
1292  */
1293
1294 static int
1295 kevent_process(struct kqueue *kq,
1296                kevent_callback_t callback,
1297                void *data,
1298                int *countp,
1299                struct proc *p)
1300 {
1301         struct knote *kn;
1302         struct kevent kev;
1303         int nevents;
1304         int error;
1305
1306  restart:
1307         if (kq->kq_count == 0) {
1308                 *countp = 0;
1309                 return 0;
1310         }
1311
1312         /* if someone else is processing the queue, wait */
1313         if (!TAILQ_EMPTY(&kq->kq_inprocess)) {
1314                 assert_wait(&kq->kq_inprocess, THREAD_UNINT);
1315                 kq->kq_state |= KQ_PROCWAIT;
1316                 kqunlock(kq);
1317                 thread_block(THREAD_CONTINUE_NULL);
1318                 kqlock(kq);
1319                 goto restart;
1320         }
1321
1322         error = 0;
1323         nevents = 0;
1324         while (error == 0 &&
1325                (kn = TAILQ_FIRST(&kq->kq_head)) != NULL) {
1326
1327                 /*
1328                  * move knote to the processed queue.
1329                  * this is also protected by the kq lock.
1330                  */
1331                 assert(kn->kn_tq == &kq->kq_head);
1332                 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1333                 kn->kn_tq = &kq->kq_inprocess;
1334                 TAILQ_INSERT_TAIL(&kq->kq_inprocess, kn, kn_tqe);
1335
1336                 /*
1337                  * Non-EV_ONESHOT events must be re-validated.
1338                  *
1339                  * Convert our lock to a use-count and call the event's
1340                  * filter routine to update.
1341                  *
1342                  * If the event is dropping (or no longer valid), we
1343                  * already have it off the active queue, so just
1344                  * finish the job of deactivating it.
1345                  */
1346                 if ((kn->kn_flags & EV_ONESHOT) == 0) {
1347                         int result;
1348
1349                         if (kqlock2knoteuse(kq, kn)) {
1350
1351                                 /* call the filter with just a ref */
1352                                 result = kn->kn_fop->f_event(kn, 0);
1353
1354                                 if (!knoteuse2kqlock(kq, kn) || result == 0) {
1355                                         knote_deactivate(kn);
1356                                         continue;
1357                                 }
1358                         } else {
1359                                 knote_deactivate(kn);
1360                                 continue;
1361                         }
1362                 }
1363
1364                 /*
1365                  * Got a valid triggered knote with the kqueue
1366                  * still locked.  Snapshot the data, and determine
1367                  * how to dispatch the knote for future events.
1368                  */
1369                 kev = kn->kn_kevent;
1370
1371                 /* now what happens to it? */
1372                 if (kn->kn_flags & EV_ONESHOT) {
1373                         knote_deactivate(kn);
1374                         if (kqlock2knotedrop(kq, kn)) {
1375                                 kn->kn_fop->f_detach(kn);
1376                                 knote_drop(kn, p);
1377                         }
1378                 } else if (kn->kn_flags & EV_CLEAR) {
1379                         knote_deactivate(kn);
1380                         kn->kn_data = 0;
1381                         kn->kn_fflags = 0;
1382                         kqunlock(kq);
1383                 } else {
1384                         /*
1385                          * leave on in-process queue.  We'll
1386                          * move all the remaining ones back
1387                          * the kq queue and wakeup any
1388                          * waiters when we are done.
1389                          */
1390                         kqunlock(kq);
1391                 }
1392
1393                 /* callback to handle each event as we find it */
1394                 error = (callback)(kq, &kev, data);
1395                 nevents++;
1396
1397                 kqlock(kq);
1398         }
1399
1400         /*
1401          * With the kqueue still locked, move any knotes
1402          * remaining on the in-process queue back to the
1403          * kq's queue and wake up any waiters.
1404          */
1405         while ((kn = TAILQ_FIRST(&kq->kq_inprocess)) != NULL) {
1406                 assert(kn->kn_tq == &kq->kq_inprocess);
1407                 TAILQ_REMOVE(&kq->kq_inprocess, kn, kn_tqe);
1408                 kn->kn_tq = &kq->kq_head;
1409                 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1410         }
1411         if (kq->kq_state & KQ_PROCWAIT) {
1412                 kq->kq_state &= ~KQ_PROCWAIT;
1413                 thread_wakeup(&kq->kq_inprocess);
1414         }
1415
1416         *countp = nevents;
1417         return error;
1418 }
1419
1420
1421 static void
1422 kevent_scan_continue(void *data, wait_result_t wait_result)
1423 {
1424         uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
1425         struct _kevent_scan * cont_args = &ut->uu_state.ss_kevent_scan;
1426         struct kqueue *kq = (struct kqueue *)data;
1427         int error;
1428         int count;
1429
1430         /* convert the (previous) wait_result to a proper error */
1431         switch (wait_result) {
1432         case THREAD_AWAKENED:
1433                 kqlock(kq);
1434                 error = kevent_process(kq, cont_args->call, cont_args, &count, current_proc());
1435                 if (error == 0 && count == 0) {
1436                         assert_wait_deadline(kq, THREAD_ABORTSAFE, cont_args->deadline);
1437                         kq->kq_state |= KQ_SLEEP;
1438                         kqunlock(kq);
1439                         thread_block_parameter(kevent_scan_continue, kq);
1440                         /* NOTREACHED */
1441                 }
1442                 kqunlock(kq);
1443                 break;
1444         case THREAD_TIMED_OUT:
1445                 error = EWOULDBLOCK;
1446                 break;
1447         case THREAD_INTERRUPTED:
1448                 error = EINTR;
1449                 break;
1450         default:
1451                 panic("kevent_scan_cont() - invalid wait_result (%d)", wait_result);
1452                 error = 0;
1453         }
1454
1455         /* call the continuation with the results */
1456         assert(cont_args->cont != NULL);
1457         (cont_args->cont)(kq, cont_args->data, error);
1458 }
1459
1460
1461 /*
1462  * kevent_scan - scan and wait for events in a kqueue
1463  *
1464  *      Process the triggered events in a kqueue.
1465  *
1466  *      If there are no events triggered arrange to
1467  *      wait for them. If the caller provided a
1468  *      continuation routine, then kevent_scan will
1469  *      also.
1470  *
1471  *      The callback routine must be valid.
1472  *      The caller must hold a use-count reference on the kq.
1473  */
1474
1475 int
1476 kevent_scan(struct kqueue *kq,
1477             kevent_callback_t callback,
1478             kevent_continue_t continuation,
1479             void *data,
1480             struct timeval *atvp,
1481             struct proc *p)
1482 {
1483         thread_continue_t cont = THREAD_CONTINUE_NULL;
1484         uint64_t deadline;
1485         int error;
1486         int first;
1487
1488         assert(callback != NULL);
1489
1490         first = 1;
1491         for (;;) {
1492                 wait_result_t wait_result;
1493                 int count;
1494
1495                 /*
1496                  * Make a pass through the kq to find events already
1497                  * triggered.
1498                  */
1499                 kqlock(kq);
1500                 error = kevent_process(kq, callback, data, &count, p);
1501                 if (error || count)
1502                         break; /* lock still held */
1503
1504                 /* looks like we have to consider blocking */
1505                 if (first) {
1506                         first = 0;
1507                         /* convert the timeout to a deadline once */
1508                         if (atvp->tv_sec || atvp->tv_usec) {
1509                                 uint32_t seconds, nanoseconds;
1510                                 uint64_t now;
1511
1512                                 clock_get_uptime(&now);
1513                                 nanoseconds_to_absolutetime((uint64_t)atvp->tv_sec * NSEC_PER_SEC +
1514                                                             atvp->tv_usec * NSEC_PER_USEC,
1515                                                             &deadline);
1516                                 if (now >= deadline) {
1517                                         /* non-blocking call */
1518                                         error = EWOULDBLOCK;
1519                                         break; /* lock still held */
1520                                 }
1521                                 deadline -= now;
1522                                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
1523                         } else {
1524                                 deadline = 0;   /* block forever */
1525                         }
1526
1527                         if (continuation) {
1528                                 uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
1529                                 struct _kevent_scan *cont_args = &ut->uu_state.ss_kevent_scan;
1530
1531                                 cont_args->call = callback;
1532                                 cont_args->cont = continuation;
1533                                 cont_args->deadline = deadline;
1534                                 cont_args->data = data;
1535                                 cont = kevent_scan_continue;
1536                         }
1537                 }
1538
1539                 /* go ahead and wait */
1540                 assert_wait_deadline(kq, THREAD_ABORTSAFE, deadline);
1541                 kq->kq_state |= KQ_SLEEP;
1542                 kqunlock(kq);
1543                 wait_result = thread_block_parameter(cont, kq);
1544                 /* NOTREACHED if (continuation != NULL) */
1545
1546                 switch (wait_result) {
1547                 case THREAD_AWAKENED:
1548                         continue;
1549                 case THREAD_TIMED_OUT:
1550                         return EWOULDBLOCK;
1551                 case THREAD_INTERRUPTED:
1552                         return EINTR;
1553                 default:
1554                         panic("kevent_scan - bad wait_result (%d)",
1555                               wait_result);
1556                         error = 0;
1557                 }
1558         }
1559         kqunlock(kq);
1560         return error;
1561 }
1562
1563
1564 /*
1565  * XXX
1566  * This could be expanded to call kqueue_scan, if desired.
1567  */
1568 /*ARGSUSED*/
1569 static int
1570 kqueue_read(__unused struct fileproc *fp,
1571                         __unused struct uio *uio,
1572                         __unused kauth_cred_t cred,
1573                         __unused int flags,
1574                         __unused struct proc *p)
1575 {
1576         return (ENXIO);
1577 }
1578
1579 /*ARGSUSED*/
1580 static int
1581 kqueue_write(__unused struct fileproc *fp,
1582                          __unused struct uio *uio,
1583                          __unused kauth_cred_t cred,
1584                          __unused int flags,
1585                          __unused struct proc *p)
1586 {
1587         return (ENXIO);
1588 }
1589
1590 /*ARGSUSED*/
1591 static int
1592 kqueue_ioctl(__unused struct fileproc *fp,
1593                          __unused u_long com,
1594                          __unused caddr_t data,
1595                          __unused struct proc *p)
1596 {
1597         return (ENOTTY);
1598 }
1599
1600 /*ARGSUSED*/
1601 static int
1602 kqueue_select(struct fileproc *fp, int which, void *wql, struct proc *p)
1603 {
1604         struct kqueue *kq = (struct kqueue *)fp->f_data;
1605         int retnum = 0;
1606
1607         if (which == FREAD) {
1608                 kqlock(kq);
1609                 if (kq->kq_count) {
1610                         retnum = 1;
1611                 } else {
1612                         selrecord(p, &kq->kq_sel, wql);
1613                         kq->kq_state |= KQ_SEL;
1614                 }
1615                 kqunlock(kq);
1616         }
1617         return (retnum);
1618 }
1619
1620 /*
1621  * kqueue_close -
1622  */
1623 /*ARGSUSED*/
1624 static int
1625 kqueue_close(struct fileglob *fg, struct proc *p)
1626 {
1627         struct kqueue *kq = (struct kqueue *)fg->fg_data;
1628
1629         kqueue_dealloc(kq, p);
1630         fg->fg_data = NULL;
1631         return (0);
1632 }
1633
1634 /*ARGSUSED*/
1635 /*
1636  * The callers has taken a use-count reference on this kqueue and will donate it
1637  * to the kqueue we are being added to.  This keeps the kqueue from closing until
1638  * that relationship is torn down.
1639  */
1640 static int
1641 kqueue_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
1642 {
1643         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
1644
1645         if (kn->kn_filter != EVFILT_READ)
1646                 return (1);
1647
1648         kn->kn_fop = &kqread_filtops;
1649         kqlock(kq);
1650         KNOTE_ATTACH(&kq->kq_sel.si_note, kn);
1651         kqunlock(kq);
1652         return (0);
1653 }
1654
1655 /*ARGSUSED*/
1656 int
1657 kqueue_stat(struct fileproc *fp, struct stat *st, __unused struct proc *p)
1658 {
1659         struct kqueue *kq = (struct kqueue *)fp->f_data;
1660
1661         bzero((void *)st, sizeof(*st));
1662         st->st_size = kq->kq_count;
1663         st->st_blksize = sizeof(struct kevent);
1664         st->st_mode = S_IFIFO;
1665         return (0);
1666 }
1667
1668 /*
1669  * Called with the kqueue locked
1670  */
1671 static void
1672 kqueue_wakeup(struct kqueue *kq)
1673 {
1674
1675         if (kq->kq_state & KQ_SLEEP) {
1676                 kq->kq_state &= ~KQ_SLEEP;
1677                 thread_wakeup(kq);
1678         }
1679         if (kq->kq_state & KQ_SEL) {
1680                 kq->kq_state &= ~KQ_SEL;
1681                 selwakeup(&kq->kq_sel);
1682         }
1683         KNOTE(&kq->kq_sel.si_note, 0);
1684 }
1685
1686 void
1687 klist_init(struct klist *list)
1688 {
1689         SLIST_INIT(list);
1690 }
1691
1692
1693 /*
1694  * Query/Post each knote in the object's list
1695  *
1696  *      The object lock protects the list. It is assumed
1697  *      that the filter/event routine for the object can
1698  *      determine that the object is already locked (via
1699  *      the hind) and not deadlock itself.
1700  *
1701  *      The object lock should also hold off pending
1702  *      detach/drop operations.  But we'll prevent it here
1703  *      too - just in case.
1704  */
1705 void
1706 knote(struct klist *list, long hint)
1707 {
1708         struct knote *kn;
1709
1710         SLIST_FOREACH(kn, list, kn_selnext) {
1711                 struct kqueue *kq = kn->kn_kq;
1712
1713                 kqlock(kq);
1714                 if (kqlock2knoteuse(kq, kn)) {
1715                         int result;
1716
1717                         /* call the event with only a use count */
1718                         result = kn->kn_fop->f_event(kn, hint);
1719
1720                         /* if its not going away and triggered */
1721                         if (knoteuse2kqlock(kq, kn) && result)
1722                                 knote_activate(kn);
1723                         /* lock held again */
1724                 }
1725                 kqunlock(kq);
1726         }
1727 }
1728
1729 /*
1730  * attach a knote to the specified list.  Return true if this is the first entry.
1731  * The list is protected by whatever lock the object it is associated with uses.
1732  */
1733 int
1734 knote_attach(struct klist *list, struct knote *kn)
1735 {
1736         int ret = SLIST_EMPTY(list);
1737         SLIST_INSERT_HEAD(list, kn, kn_selnext);
1738         return ret;
1739 }
1740
1741 /*
1742  * detach a knote from the specified list.  Return true if that was the last entry.
1743  * The list is protected by whatever lock the object it is associated with uses.
1744  */
1745 int
1746 knote_detach(struct klist *list, struct knote *kn)
1747 {
1748         SLIST_REMOVE(list, kn, knote, kn_selnext);
1749         return SLIST_EMPTY(list);
1750 }
1751
1752 /*
1753  * remove all knotes referencing a specified fd
1754  *
1755  * Essentially an inlined knote_remove & knote_drop
1756  * when we know for sure that the thing is a file
1757  *
1758  * Entered with the proc_fd lock already held.
1759  * It returns the same way, but may drop it temporarily.
1760  */
1761 void
1762 knote_fdclose(struct proc *p, int fd)
1763 {
1764         struct filedesc *fdp = p->p_fd;
1765         struct klist *list;
1766         struct knote *kn;
1767
1768         list = &fdp->fd_knlist[fd];
1769         while ((kn = SLIST_FIRST(list)) != NULL) {
1770                 struct kqueue *kq = kn->kn_kq;
1771
1772                 kqlock(kq);
1773                 proc_fdunlock(p);
1774
1775                 /*
1776                  * Convert the lock to a drop ref.
1777                  * If we get it, go ahead and drop it.
1778                  * Otherwise, we waited for it to
1779                  * be dropped by the other guy, so
1780                  * it is safe to move on in the list.
1781                  */
1782                 if (kqlock2knotedrop(kq, kn)) {
1783                         kn->kn_fop->f_detach(kn);
1784                         knote_drop(kn, p);
1785                 }
1786
1787                 proc_fdlock(p);
1788
1789                 /* the fd tables may have changed - start over */
1790                 list = &fdp->fd_knlist[fd];
1791         }
1792 }
1793
1794 /* proc_fdlock held on entry (and exit) */
1795 static int
1796 knote_fdpattach(struct knote *kn, struct filedesc *fdp, __unused struct proc *p)
1797 {
1798         struct klist *list = NULL;
1799
1800         if (! kn->kn_fop->f_isfd) {
1801                 if (fdp->fd_knhashmask == 0)
1802                         fdp->fd_knhash = hashinit(KN_HASHSIZE, M_KQUEUE,
1803                             &fdp->fd_knhashmask);
1804                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1805         } else {
1806                 if ((u_int)fdp->fd_knlistsize <= kn->kn_id) {
1807                         u_int size = 0;
1808
1809                         /* have to grow the fd_knlist */
1810                         size = fdp->fd_knlistsize;
1811                         while (size <= kn->kn_id)
1812                                 size += KQEXTENT;
1813                         MALLOC(list, struct klist *,
1814                                size * sizeof(struct klist *), M_KQUEUE, M_WAITOK);
1815                         if (list == NULL)
1816                                 return (ENOMEM);
1817
1818                         bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list,
1819                               fdp->fd_knlistsize * sizeof(struct klist *));
1820                         bzero((caddr_t)list +
1821                               fdp->fd_knlistsize * sizeof(struct klist *),
1822                               (size - fdp->fd_knlistsize) * sizeof(struct klist *));
1823                         FREE(fdp->fd_knlist, M_KQUEUE);
1824                         fdp->fd_knlist = list;
1825                         fdp->fd_knlistsize = size;
1826                 }
1827                 list = &fdp->fd_knlist[kn->kn_id];
1828         }
1829         SLIST_INSERT_HEAD(list, kn, kn_link);
1830         return (0);
1831 }
1832
1833
1834
1835 /*
1836  * should be called at spl == 0, since we don't want to hold spl
1837  * while calling fdrop and free.
1838  */
1839 static void
1840 knote_drop(struct knote *kn, struct proc *p)
1841 {
1842         struct filedesc *fdp = p->p_fd;
1843         struct kqueue *kq = kn->kn_kq;
1844         struct klist *list;
1845
1846         proc_fdlock(p);
1847         if (kn->kn_fop->f_isfd)
1848                 list = &fdp->fd_knlist[kn->kn_id];
1849         else
1850                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1851
1852         SLIST_REMOVE(list, kn, knote, kn_link);
1853         kqlock(kq);
1854         knote_dequeue(kn);
1855         if (kn->kn_status & KN_DROPWAIT)
1856                 thread_wakeup(&kn->kn_status);
1857         kqunlock(kq);
1858         proc_fdunlock(p);
1859
1860         if (kn->kn_fop->f_isfd)
1861                 fp_drop(p, kn->kn_id, kn->kn_fp, 0);
1862
1863         knote_free(kn);
1864 }
1865
1866 /* called with kqueue lock held */
1867 static void
1868 knote_activate(struct knote *kn)
1869 {
1870         struct kqueue *kq = kn->kn_kq;
1871
1872         kn->kn_status |= KN_ACTIVE;
1873         knote_enqueue(kn);
1874         kqueue_wakeup(kq);
1875  }
1876
1877 /* called with kqueue lock held */
1878 static void
1879 knote_deactivate(struct knote *kn)
1880 {
1881         kn->kn_status &= ~KN_ACTIVE;
1882         knote_dequeue(kn);
1883 }
1884
1885 /* called with kqueue lock held */
1886 static void
1887 knote_enqueue(struct knote *kn)
1888 {
1889         struct kqueue *kq = kn->kn_kq;
1890
1891         if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) {
1892                 struct kqtailq *tq = kn->kn_tq;
1893
1894                 TAILQ_INSERT_TAIL(tq, kn, kn_tqe);
1895                 kn->kn_status |= KN_QUEUED;
1896                 kq->kq_count++;
1897         }
1898 }
1899
1900 /* called with kqueue lock held */
1901 static void
1902 knote_dequeue(struct knote *kn)
1903 {
1904         struct kqueue *kq = kn->kn_kq;
1905
1906         assert((kn->kn_status & KN_DISABLED) == 0);
1907         if ((kn->kn_status & KN_QUEUED) == KN_QUEUED) {
1908                 struct kqtailq *tq = kn->kn_tq;
1909
1910                 TAILQ_REMOVE(tq, kn, kn_tqe);
1911                 kn->kn_tq = &kq->kq_head;
1912                 kn->kn_status &= ~KN_QUEUED;
1913                 kq->kq_count--;
1914         }
1915 }
1916
1917 void
1918 knote_init(void)
1919 {
1920         knote_zone = zinit(sizeof(struct knote), 8192*sizeof(struct knote), 8192, "knote zone");
1921
1922         /* allocate kq lock group attribute and group */
1923         kq_lck_grp_attr= lck_grp_attr_alloc_init();
1924         lck_grp_attr_setstat(kq_lck_grp_attr);
1925
1926         kq_lck_grp = lck_grp_alloc_init("kqueue",  kq_lck_grp_attr);
1927
1928         /* Allocate kq lock attribute */
1929         kq_lck_attr = lck_attr_alloc_init();
1930         lck_attr_setdefault(kq_lck_attr);
1931
1932         /* Initialize the timer filter lock */
1933         lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr);
1934 }
1935 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
1936
1937 static struct knote *
1938 knote_alloc(void)
1939 {
1940         return ((struct knote *)zalloc(knote_zone));
1941 }
1942
1943 static void
1944 knote_free(struct knote *kn)
1945 {
1946         zfree(knote_zone, kn);
1947 }
1948
1949 #include <sys/param.h>
1950 #include <sys/socket.h>
1951 #include <sys/protosw.h>
1952 #include <sys/domain.h>
1953 #include <sys/mbuf.h>
1954 #include <sys/kern_event.h>
1955 #include <sys/malloc.h>
1956 #include <sys/sys_domain.h>
1957 #include <sys/syslog.h>
1958
1959
1960 static int kev_attach(struct socket *so, int proto, struct proc *p);
1961 static int kev_detach(struct socket *so);
1962 static int kev_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p);
1963
1964 struct pr_usrreqs event_usrreqs = {
1965      pru_abort_notsupp, pru_accept_notsupp, kev_attach, pru_bind_notsupp, pru_connect_notsupp,
1966      pru_connect2_notsupp, kev_control, kev_detach, pru_disconnect_notsupp,
1967      pru_listen_notsupp, pru_peeraddr_notsupp, pru_rcvd_notsupp, pru_rcvoob_notsupp,
1968      pru_send_notsupp, pru_sense_null, pru_shutdown_notsupp, pru_sockaddr_notsupp,
1969      pru_sosend_notsupp, soreceive, pru_sopoll_notsupp
1970 };
1971
1972 struct protosw eventsw[] = {
1973      {
1974           SOCK_RAW,             &systemdomain,  SYSPROTO_EVENT,         PR_ATOMIC,
1975           0,            0,              0,              0,
1976           0,
1977           0,            0,              0,              0,
1978 #if __APPLE__
1979           0,
1980 #endif
1981           &event_usrreqs,
1982           0,            0,              0,
1983 #if __APPLE__
1984           {0, 0},       0,              {0}
1985 #endif
1986      }
1987 };
1988
1989 static
1990 struct kern_event_head kern_event_head;
1991
1992 static u_long static_event_id = 0;
1993 struct domain *sysdom = &systemdomain;
1994
1995 static lck_grp_t                *evt_mtx_grp;
1996 static lck_attr_t               *evt_mtx_attr;
1997 static lck_grp_attr_t   *evt_mtx_grp_attr;
1998 lck_mtx_t                               *evt_mutex;
1999 /*
2000  * Install the protosw's for the NKE manager.  Invoked at
2001  *  extension load time
2002  */
2003 int
2004 kern_event_init(void)
2005 {
2006     int retval;
2007
2008     if ((retval = net_add_proto(eventsw, &systemdomain)) != 0) {
2009             log(LOG_WARNING, "Can't install kernel events protocol (%d)\n", retval);
2010             return(retval);
2011         }
2012
2013         /*
2014          * allocate lock group attribute and group for kern event
2015          */
2016         evt_mtx_grp_attr = lck_grp_attr_alloc_init();
2017
2018         evt_mtx_grp = lck_grp_alloc_init("eventlist", evt_mtx_grp_attr);
2019
2020         /*
2021          * allocate the lock attribute for mutexes
2022          */
2023         evt_mtx_attr = lck_attr_alloc_init();
2024         lck_attr_setdefault(evt_mtx_attr);
2025         evt_mutex = lck_mtx_alloc_init(evt_mtx_grp, evt_mtx_attr);
2026         if (evt_mutex == NULL)
2027                         return (ENOMEM);
2028
2029     return(KERN_SUCCESS);
2030 }
2031
2032 static int
2033 kev_attach(struct socket *so, __unused int proto, __unused struct proc *p)
2034 {
2035      int error;
2036      struct kern_event_pcb  *ev_pcb;
2037
2038      error = soreserve(so, KEV_SNDSPACE, KEV_RECVSPACE);
2039      if (error)
2040           return error;
2041
2042      MALLOC(ev_pcb, struct kern_event_pcb *, sizeof(struct kern_event_pcb), M_PCB, M_WAITOK);
2043      if (ev_pcb == 0)
2044           return ENOBUFS;
2045
2046      ev_pcb->ev_socket = so;
2047      ev_pcb->vendor_code_filter = 0xffffffff;
2048
2049      so->so_pcb = (caddr_t) ev_pcb;
2050          lck_mtx_lock(evt_mutex);
2051      LIST_INSERT_HEAD(&kern_event_head, ev_pcb, ev_link);
2052          lck_mtx_unlock(evt_mutex);
2053
2054      return 0;
2055 }
2056
2057
2058 static int
2059 kev_detach(struct socket *so)
2060 {
2061      struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2062
2063      if (ev_pcb != 0) {
2064                 lck_mtx_lock(evt_mutex);
2065                 LIST_REMOVE(ev_pcb, ev_link);
2066                 lck_mtx_unlock(evt_mutex);
2067                 FREE(ev_pcb, M_PCB);
2068                 so->so_pcb = 0;
2069                 so->so_flags |= SOF_PCBCLEARING;
2070      }
2071
2072      return 0;
2073 }
2074
2075 /*
2076  * For now, kev_vender_code and mbuf_tags use the same
2077  * mechanism.
2078  */
2079 extern errno_t mbuf_tag_id_find_internal(const char *string, u_long *out_id,
2080                                                                                  int create);
2081
2082 errno_t kev_vendor_code_find(
2083         const char      *string,
2084         u_long          *out_vender_code)
2085 {
2086         if (strlen(string) >= KEV_VENDOR_CODE_MAX_STR_LEN) {
2087                 return EINVAL;
2088         }
2089         return mbuf_tag_id_find_internal(string, out_vender_code, 1);
2090 }
2091
2092 extern void mbuf_tag_id_first_last(u_long *first, u_long *last);
2093
2094 errno_t  kev_msg_post(struct kev_msg *event_msg)
2095 {
2096         u_long  min_vendor, max_vendor;
2097
2098         mbuf_tag_id_first_last(&min_vendor, &max_vendor);
2099
2100         if (event_msg == NULL)
2101                 return EINVAL;
2102
2103         /* Limit third parties to posting events for registered vendor codes only */
2104         if (event_msg->vendor_code < min_vendor ||
2105                 event_msg->vendor_code > max_vendor)
2106         {
2107                 return EINVAL;
2108         }
2109
2110         return kev_post_msg(event_msg);
2111 }
2112
2113
2114 int  kev_post_msg(struct kev_msg *event_msg)
2115 {
2116      struct mbuf *m, *m2;
2117      struct kern_event_pcb  *ev_pcb;
2118      struct kern_event_msg  *ev;
2119      char              *tmp;
2120      unsigned long     total_size;
2121      int               i;
2122
2123         /* Verify the message is small enough to fit in one mbuf w/o cluster */
2124         total_size = KEV_MSG_HEADER_SIZE;
2125
2126         for (i = 0; i < 5; i++) {
2127                 if (event_msg->dv[i].data_length == 0)
2128                         break;
2129                 total_size += event_msg->dv[i].data_length;
2130         }
2131
2132         if (total_size > MLEN) {
2133                 return EMSGSIZE;
2134         }
2135
2136      m = m_get(M_DONTWAIT, MT_DATA);
2137      if (m == 0)
2138           return ENOBUFS;
2139
2140      ev = mtod(m, struct kern_event_msg *);
2141      total_size = KEV_MSG_HEADER_SIZE;
2142
2143      tmp = (char *) &ev->event_data[0];
2144      for (i = 0; i < 5; i++) {
2145           if (event_msg->dv[i].data_length == 0)
2146                break;
2147
2148           total_size += event_msg->dv[i].data_length;
2149           bcopy(event_msg->dv[i].data_ptr, tmp,
2150                 event_msg->dv[i].data_length);
2151           tmp += event_msg->dv[i].data_length;
2152      }
2153
2154      ev->id = ++static_event_id;
2155      ev->total_size   = total_size;
2156      ev->vendor_code  = event_msg->vendor_code;
2157      ev->kev_class    = event_msg->kev_class;
2158      ev->kev_subclass = event_msg->kev_subclass;
2159      ev->event_code   = event_msg->event_code;
2160
2161      m->m_len = total_size;
2162      lck_mtx_lock(evt_mutex);
2163      for (ev_pcb = LIST_FIRST(&kern_event_head);
2164           ev_pcb;
2165           ev_pcb = LIST_NEXT(ev_pcb, ev_link)) {
2166
2167           if (ev_pcb->vendor_code_filter != KEV_ANY_VENDOR) {
2168                if (ev_pcb->vendor_code_filter != ev->vendor_code)
2169                     continue;
2170
2171                if (ev_pcb->class_filter != KEV_ANY_CLASS) {
2172                     if (ev_pcb->class_filter != ev->kev_class)
2173                          continue;
2174
2175                     if ((ev_pcb->subclass_filter != KEV_ANY_SUBCLASS) &&
2176                         (ev_pcb->subclass_filter != ev->kev_subclass))
2177                          continue;
2178                }
2179           }
2180
2181           m2 = m_copym(m, 0, m->m_len, M_NOWAIT);
2182           if (m2 == 0) {
2183                m_free(m);
2184                    lck_mtx_unlock(evt_mutex);
2185                return ENOBUFS;
2186           }
2187           socket_lock(ev_pcb->ev_socket, 1);
2188           if (sbappendrecord(&ev_pcb->ev_socket->so_rcv, m2))
2189                   sorwakeup(ev_pcb->ev_socket);
2190           socket_unlock(ev_pcb->ev_socket, 1);
2191      }
2192
2193      m_free(m);
2194      lck_mtx_unlock(evt_mutex);
2195      return 0;
2196 }
2197
2198 static int
2199 kev_control(struct socket *so,
2200                         u_long cmd,
2201                         caddr_t data,
2202                         __unused struct ifnet *ifp,
2203                         __unused struct proc *p)
2204 {
2205         struct kev_request *kev_req = (struct kev_request *) data;
2206         struct kern_event_pcb  *ev_pcb;
2207         struct kev_vendor_code *kev_vendor;
2208         u_long  *id_value = (u_long *) data;
2209
2210
2211         switch (cmd) {
2212
2213                 case SIOCGKEVID:
2214                         *id_value = static_event_id;
2215                         break;
2216
2217                 case SIOCSKEVFILT:
2218                         ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2219                         ev_pcb->vendor_code_filter = kev_req->vendor_code;
2220                         ev_pcb->class_filter     = kev_req->kev_class;
2221                         ev_pcb->subclass_filter  = kev_req->kev_subclass;
2222                         break;
2223
2224                 case SIOCGKEVFILT:
2225                         ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2226                         kev_req->vendor_code = ev_pcb->vendor_code_filter;
2227                         kev_req->kev_class   = ev_pcb->class_filter;
2228                         kev_req->kev_subclass = ev_pcb->subclass_filter;
2229                         break;
2230
2231                 case SIOCGKEVVENDOR:
2232                         kev_vendor = (struct kev_vendor_code*)data;
2233
2234                         /* Make sure string is NULL terminated */
2235                         kev_vendor->vendor_string[KEV_VENDOR_CODE_MAX_STR_LEN-1] = 0;
2236
2237                         return mbuf_tag_id_find_internal(kev_vendor->vendor_string,
2238                                                                                          &kev_vendor->vendor_code, 0);
2239
2240                 default:
2241                         return ENOTSUP;
2242         }
2243
2244         return 0;
2245 }
2246
2247
2248
2249