bsd/kern/kern_event.c

   1 /*
   2  * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  *
  28  */
  29 /*-
  30  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
  31  * All rights reserved.
  32  *
  33  * Redistribution and use in source and binary forms, with or without
  34  * modification, are permitted provided that the following conditions
  35  * are met:
  36  * 1. Redistributions of source code must retain the above copyright
  37  *    notice, this list of conditions and the following disclaimer.
  38  * 2. Redistributions in binary form must reproduce the above copyright
  39  *    notice, this list of conditions and the following disclaimer in the
  40  *    documentation and/or other materials provided with the distribution.
  41  *
  42  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  43  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  44  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  45  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  46  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  47  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  48  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  49  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  50  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  51  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  52  * SUCH DAMAGE.
  53  */
  54 /*
  55  *      @(#)kern_event.c       1.0 (3/31/2000)
  56  */
  57 #include <stdint.h>
  58
  59 #include <sys/param.h>
  60 #include <sys/systm.h>
  61 #include <sys/filedesc.h>
  62 #include <sys/kernel.h>
  63 #include <sys/proc_internal.h>
  64 #include <sys/kauth.h>
  65 #include <sys/malloc.h>
  66 #include <sys/unistd.h>
  67 #include <sys/file_internal.h>
  68 #include <sys/fcntl.h>
  69 #include <sys/select.h>
  70 #include <sys/queue.h>
  71 #include <sys/event.h>
  72 #include <sys/eventvar.h>
  73 #include <sys/protosw.h>
  74 #include <sys/socket.h>
  75 #include <sys/socketvar.h>
  76 #include <sys/stat.h>
  77 #include <sys/sysctl.h>
  78 #include <sys/uio.h>
  79 #include <sys/sysproto.h>
  80 #include <sys/user.h>
  81 #include <string.h>
  82 #include <sys/proc_info.h>
  83
  84 #include <kern/lock.h>
  85 #include <kern/clock.h>
  86 #include <kern/thread_call.h>
  87 #include <kern/sched_prim.h>
  88 #include <kern/zalloc.h>
  89 #include <kern/assert.h>
  90
  91 #include <libkern/libkern.h>
  92 #include "kpi_mbuf_internal.h"
  93
  94 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
  95
  96 static inline void kqlock(struct kqueue *kq);
  97 static inline void kqunlock(struct kqueue *kq);
  98
  99 static int      kqlock2knoteuse(struct kqueue *kq, struct knote *kn);
 100 static int      kqlock2knoteusewait(struct kqueue *kq, struct knote *kn);
 101 static int      kqlock2knotedrop(struct kqueue *kq, struct knote *kn);
 102 static int      knoteuse2kqlock(struct kqueue *kq, struct knote *kn);
 103
 104 static void     kqueue_wakeup(struct kqueue *kq);
 105 static int      kqueue_read(struct fileproc *fp, struct uio *uio,
 106                     int flags, vfs_context_t ctx);
 107 static int      kqueue_write(struct fileproc *fp, struct uio *uio,
 108                     int flags, vfs_context_t ctx);
 109 static int      kqueue_ioctl(struct fileproc *fp, u_long com, caddr_t data,
 110                     vfs_context_t ctx);
 111 static int      kqueue_select(struct fileproc *fp, int which, void *wql,
 112                     vfs_context_t ctx);
 113 static int      kqueue_close(struct fileglob *fp, vfs_context_t ctx);
 114 static int      kqueue_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx);
 115 extern int      kqueue_stat(struct fileproc *fp, void  *ub, int isstat64, vfs_context_t ctx);
 116
 117 static struct fileops kqueueops = {
 118         kqueue_read,
 119         kqueue_write,
 120         kqueue_ioctl,
 121         kqueue_select,
 122         kqueue_close,
 123         kqueue_kqfilter,
 124         0
 125 };
 126
 127 static int kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p);
 128 static int kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p);
 129
 130 static int      kevent_callback(struct kqueue *kq, struct kevent *kevp, void *data);
 131 static void     kevent_continue(struct kqueue *kq, void *data, int error);
 132 static void     kevent_scan_continue(void *contp, wait_result_t wait_result);
 133 static int      kevent_process(struct kqueue *kq, kevent_callback_t callback,
 134                                void *data, int *countp, struct proc *p);
 135 static void     knote_put(struct knote *kn);
 136 static int      knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p);
 137 static void     knote_drop(struct knote *kn, struct proc *p);
 138 static void     knote_activate(struct knote *kn);
 139 static void     knote_deactivate(struct knote *kn);
 140 static void     knote_enqueue(struct knote *kn);
 141 static void     knote_dequeue(struct knote *kn);
 142 static struct   knote *knote_alloc(void);
 143 static void     knote_free(struct knote *kn);
 144
 145 static int      filt_fileattach(struct knote *kn);
 146 static struct filterops file_filtops =
 147         { 1, filt_fileattach, NULL, NULL };
 148
 149 static void     filt_kqdetach(struct knote *kn);
 150 static int      filt_kqueue(struct knote *kn, long hint);
 151 static struct filterops kqread_filtops =
 152         { 1, NULL, filt_kqdetach, filt_kqueue };
 153
 154 /*
 155  * placeholder for not-yet-implemented filters
 156  */
 157 static int      filt_badattach(struct knote *kn);
 158 static struct filterops bad_filtops =
 159         { 0, filt_badattach, 0 , 0 };
 160
 161 static int      filt_procattach(struct knote *kn);
 162 static void     filt_procdetach(struct knote *kn);
 163 static int      filt_proc(struct knote *kn, long hint);
 164
 165 static struct filterops proc_filtops =
 166         { 0, filt_procattach, filt_procdetach, filt_proc };
 167
 168 extern struct filterops fs_filtops;
 169
 170 extern struct filterops sig_filtops;
 171
 172
 173 /* Timer filter */
 174 static int      filt_timercompute(struct knote *kn, uint64_t *abs_time);
 175 static void     filt_timerexpire(void *knx, void *param1);
 176 static int      filt_timerattach(struct knote *kn);
 177 static void     filt_timerdetach(struct knote *kn);
 178 static int      filt_timer(struct knote *kn, long hint);
 179
 180 static struct filterops timer_filtops =
 181         { 0, filt_timerattach, filt_timerdetach, filt_timer };
 182
 183 /* to avoid arming timers that fire quicker than we can handle */
 184 static uint64_t filt_timerfloor = 0;
 185
 186 static lck_mtx_t _filt_timerlock;
 187 static void     filt_timerlock(void);
 188 static void     filt_timerunlock(void);
 189
 190 static zone_t   knote_zone;
 191
 192 #define KN_HASH(val, mask)      (((val) ^ (val >> 8)) & (mask))
 193
 194 #if 0
 195 extern struct filterops aio_filtops;
 196 #endif
 197
 198 /*
 199  * Table for for all system-defined filters.
 200  */
 201 static struct filterops *sysfilt_ops[] = {
 202         &file_filtops,                  /* EVFILT_READ */
 203         &file_filtops,                  /* EVFILT_WRITE */
 204 #if 0
 205         &aio_filtops,                   /* EVFILT_AIO */
 206 #else
 207         &bad_filtops,                   /* EVFILT_AIO */
 208 #endif
 209         &file_filtops,                  /* EVFILT_VNODE */
 210         &proc_filtops,                  /* EVFILT_PROC */
 211         &sig_filtops,                   /* EVFILT_SIGNAL */
 212         &timer_filtops,                 /* EVFILT_TIMER */
 213         &bad_filtops,                   /* EVFILT_MACHPORT */
 214         &fs_filtops                     /* EVFILT_FS */
 215 };
 216
 217 /*
 218  * kqueue/note lock attributes and implementations
 219  *
 220  *      kqueues have locks, while knotes have use counts
 221  *      Most of the knote state is guarded by the object lock.
 222  *      the knote "inuse" count and status use the kqueue lock.
 223  */
 224 lck_grp_attr_t * kq_lck_grp_attr;
 225 lck_grp_t * kq_lck_grp;
 226 lck_attr_t * kq_lck_attr;
 227
 228 static inline void
 229 kqlock(struct kqueue *kq)
 230 {
 231         lck_spin_lock(&kq->kq_lock);
 232 }
 233
 234 static inline void
 235 kqunlock(struct kqueue *kq)
 236 {
 237         lck_spin_unlock(&kq->kq_lock);
 238 }
 239
 240 /*
 241  * Convert a kq lock to a knote use referece.
 242  *
 243  *      If the knote is being dropped, we can't get
 244  *      a use reference, so just return with it
 245  *      still locked.
 246  *
 247  *      - kq locked at entry
 248  *      - unlock on exit if we get the use reference
 249  */
 250 static int
 251 kqlock2knoteuse(struct kqueue *kq, struct knote *kn)
 252 {
 253         if (kn->kn_status & KN_DROPPING)
 254                 return 0;
 255         kn->kn_inuse++;
 256         kqunlock(kq);
 257         return 1;
 258  }
 259
 260 /*
 261  * Convert a kq lock to a knote use referece.
 262  *
 263  *      If the knote is being dropped, we can't get
 264  *      a use reference, so just return with it
 265  *      still locked.
 266  *
 267  *      - kq locked at entry
 268  *      - kq always unlocked on exit
 269  */
 270 static int
 271 kqlock2knoteusewait(struct kqueue *kq, struct knote *kn)
 272 {
 273         if (!kqlock2knoteuse(kq, kn)) {
 274                 kn->kn_status |= KN_DROPWAIT;
 275                 assert_wait(&kn->kn_status, THREAD_UNINT);
 276                 kqunlock(kq);
 277                 thread_block(THREAD_CONTINUE_NULL);
 278                 return 0;
 279         }
 280         return 1;
 281  }
 282
 283 /*
 284  * Convert from a knote use reference back to kq lock.
 285  *
 286  *      Drop a use reference and wake any waiters if
 287  *      this is the last one.
 288  *
 289  *      The exit return indicates if the knote is
 290  *      still alive - but the kqueue lock is taken
 291  *      unconditionally.
 292  */
 293 static int
 294 knoteuse2kqlock(struct kqueue *kq, struct knote *kn)
 295 {
 296         kqlock(kq);
 297         if ((--kn->kn_inuse == 0) &&
 298             (kn->kn_status & KN_USEWAIT)) {
 299                 kn->kn_status &= ~KN_USEWAIT;
 300                 thread_wakeup(&kn->kn_inuse);
 301         }
 302         return ((kn->kn_status & KN_DROPPING) == 0);
 303  }
 304
 305 /*
 306  * Convert a kq lock to a knote drop referece.
 307  *
 308  *      If the knote is in use, wait for the use count
 309  *      to subside.  We first mark our intention to drop
 310  *      it - keeping other users from "piling on."
 311  *      If we are too late, we have to wait for the
 312  *      other drop to complete.
 313  *
 314  *      - kq locked at entry
 315  *      - always unlocked on exit.
 316  *      - caller can't hold any locks that would prevent
 317  *        the other dropper from completing.
 318  */
 319 static int
 320 kqlock2knotedrop(struct kqueue *kq, struct knote *kn)
 321 {
 322
 323         if ((kn->kn_status & KN_DROPPING) == 0) {
 324                 kn->kn_status |= KN_DROPPING;
 325                 if (kn->kn_inuse > 0) {
 326                         kn->kn_status |= KN_USEWAIT;
 327                         assert_wait(&kn->kn_inuse, THREAD_UNINT);
 328                         kqunlock(kq);
 329                         thread_block(THREAD_CONTINUE_NULL);
 330                 } else
 331                         kqunlock(kq);
 332                 return 1;
 333         } else {
 334                 kn->kn_status |= KN_DROPWAIT;
 335                 assert_wait(&kn->kn_status, THREAD_UNINT);
 336                 kqunlock(kq);
 337                 thread_block(THREAD_CONTINUE_NULL);
 338                 return 0;
 339         }
 340 }
 341
 342 /*
 343  * Release a knote use count reference.
 344  */
 345 static void
 346 knote_put(struct knote *kn)
 347 {
 348         struct kqueue *kq = kn->kn_kq;
 349
 350         kqlock(kq);
 351         if ((--kn->kn_inuse == 0) &&
 352             (kn->kn_status & KN_USEWAIT)) {
 353                 kn->kn_status &= ~KN_USEWAIT;
 354                 thread_wakeup(&kn->kn_inuse);
 355         }
 356         kqunlock(kq);
 357  }
 358
 359
 360
 361 static int
 362 filt_fileattach(struct knote *kn)
 363 {
 364
 365         return (fo_kqfilter(kn->kn_fp, kn, vfs_context_current()));
 366 }
 367
 368 #define f_flag f_fglob->fg_flag
 369 #define f_type f_fglob->fg_type
 370 #define f_msgcount f_fglob->fg_msgcount
 371 #define f_cred f_fglob->fg_cred
 372 #define f_ops f_fglob->fg_ops
 373 #define f_offset f_fglob->fg_offset
 374 #define f_data f_fglob->fg_data
 375
 376 static void
 377 filt_kqdetach(struct knote *kn)
 378 {
 379         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
 380
 381         kqlock(kq);
 382         KNOTE_DETACH(&kq->kq_sel.si_note, kn);
 383         kqunlock(kq);
 384 }
 385
 386 /*ARGSUSED*/
 387 static int
 388 filt_kqueue(struct knote *kn, __unused long hint)
 389 {
 390         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
 391
 392         kn->kn_data = kq->kq_count;
 393         return (kn->kn_data > 0);
 394 }
 395
 396 static int
 397 filt_procattach(struct knote *kn)
 398 {
 399         struct proc *p;
 400
 401         assert(PID_MAX < NOTE_PDATAMASK);
 402
 403         if ((kn->kn_sfflags & (NOTE_TRACK | NOTE_TRACKERR | NOTE_CHILD)) != 0)
 404                 return(ENOTSUP);
 405
 406         p = proc_find(kn->kn_id);
 407         if (p == NULL) {
 408                 return (ESRCH);
 409         }
 410
 411         proc_klist_lock();
 412
 413         kn->kn_flags |= EV_CLEAR;       /* automatically set */
 414         kn->kn_ptr.p_proc = p;          /* store the proc handle */
 415
 416         KNOTE_ATTACH(&p->p_klist, kn);
 417
 418         proc_klist_unlock();
 419
 420         proc_rele(p);
 421
 422         return (0);
 423 }
 424
 425 /*
 426  * The knote may be attached to a different process, which may exit,
 427  * leaving nothing for the knote to be attached to.  In that case,
 428  * the pointer to the process will have already been nulled out.
 429  */
 430 static void
 431 filt_procdetach(struct knote *kn)
 432 {
 433         struct proc *p;
 434
 435         proc_klist_lock();
 436
 437         p = kn->kn_ptr.p_proc;
 438         if (p != PROC_NULL) {
 439                 kn->kn_ptr.p_proc = PROC_NULL;
 440                 KNOTE_DETACH(&p->p_klist, kn);
 441         }
 442
 443         proc_klist_unlock();
 444 }
 445
 446 static int
 447 filt_proc(struct knote *kn, long hint)
 448 {
 449         struct proc * p;
 450
 451         /* hint is 0 when called from above */
 452         if (hint != 0) {
 453                 u_int event;
 454
 455                 /* ALWAYS CALLED WITH proc_klist_lock when (hint != 0) */
 456
 457                 /*
 458                  * mask off extra data
 459                  */
 460                 event = (u_int)hint & NOTE_PCTRLMASK;
 461
 462                 /*
 463                  * if the user is interested in this event, record it.
 464                  */
 465                 if (kn->kn_sfflags & event)
 466                         kn->kn_fflags |= event;
 467
 468                 /*
 469                  * If this is the last possible event for the
 470                  * knote, unlink this knote from the process
 471                  * before the process goes away.
 472                  */
 473                 if (event == NOTE_REAP || (event == NOTE_EXIT && !(kn->kn_sfflags & NOTE_REAP))) {
 474                         kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 475                         p = kn->kn_ptr.p_proc;
 476                         if (p != PROC_NULL) {
 477                                 kn->kn_ptr.p_proc = PROC_NULL;
 478                                 KNOTE_DETACH(&p->p_klist, kn);
 479                         }
 480                         return (1);
 481                 }
 482
 483         }
 484
 485         /* atomic check, no locking need when called from above */
 486         return (kn->kn_fflags != 0);
 487 }
 488
 489 /*
 490  * filt_timercompute - compute absolute timeout
 491  *
 492  *      The saved-data field in the knote contains the
 493  *      time value.  The saved filter-flags indicates
 494  *      the unit of measurement.
 495  *
 496  *      If the timeout is not absolute, adjust it for
 497  *      the current time.
 498  */
 499 static int
 500 filt_timercompute(struct knote *kn, uint64_t *abs_time)
 501 {
 502         uint64_t multiplier;
 503         uint64_t raw;
 504
 505         switch (kn->kn_sfflags & (NOTE_SECONDS|NOTE_USECONDS|NOTE_NSECONDS)) {
 506         case NOTE_SECONDS:
 507                 multiplier = NSEC_PER_SEC;
 508                 break;
 509         case NOTE_USECONDS:
 510                 multiplier = NSEC_PER_USEC;
 511                 break;
 512         case NOTE_NSECONDS:
 513                 multiplier = 1;
 514                 break;
 515         case 0: /* milliseconds (default) */
 516                 multiplier = NSEC_PER_SEC / 1000;
 517                 break;
 518         default:
 519                 return EINVAL;
 520         }
 521         nanoseconds_to_absolutetime((uint64_t)kn->kn_sdata * multiplier, &raw);
 522         if (raw <= filt_timerfloor) {
 523                 *abs_time = 0;
 524                 return 0;
 525         }
 526         if ((kn->kn_sfflags & NOTE_ABSOLUTE) == NOTE_ABSOLUTE) {
 527                 uint32_t seconds, nanoseconds;
 528                 uint64_t now;
 529
 530                 clock_get_calendar_nanotime(&seconds, &nanoseconds);
 531                 nanoseconds_to_absolutetime((uint64_t)seconds * NSEC_PER_SEC + nanoseconds,
 532                                             &now);
 533                 if (now >= raw + filt_timerfloor) {
 534                         *abs_time = 0;
 535                         return 0;
 536                 }
 537                 raw -= now;
 538         }
 539         clock_absolutetime_interval_to_deadline(raw, abs_time);
 540         return 0;
 541 }
 542
 543 /*
 544  * filt_timerexpire - the timer callout routine
 545  *
 546  *      Just propagate the timer event into the knote
 547  *      filter routine (by going through the knote
 548  *      synchronization point).  Pass a hint to
 549  *      indicate this is a real event, not just a
 550  *      query from above.
 551  */
 552 static void
 553 filt_timerexpire(void *knx, __unused void *spare)
 554 {
 555         struct klist timer_list;
 556         struct knote *kn = knx;
 557
 558         /* no "object" for timers, so fake a list */
 559         SLIST_INIT(&timer_list);
 560         SLIST_INSERT_HEAD(&timer_list, kn, kn_selnext);
 561         KNOTE(&timer_list, 1);
 562 }
 563
 564 /*
 565  * data contains amount of time to sleep, in milliseconds,
 566  * or a pointer to a timespec structure.
 567  */
 568 static int
 569 filt_timerattach(struct knote *kn)
 570 {
 571         thread_call_t callout;
 572         uint64_t deadline;
 573         int error;
 574
 575         error = filt_timercompute(kn, &deadline);
 576         if (error)
 577                 return (error);
 578
 579         if (deadline) {
 580                 callout = thread_call_allocate(filt_timerexpire, kn);
 581                 if (NULL == callout)
 582                         return (ENOMEM);
 583         } else {
 584                 /* handle as immediate */
 585                 kn->kn_sdata = 0;
 586                 callout = NULL;
 587         }
 588
 589         filt_timerlock();
 590         kn->kn_hook = (caddr_t)callout;
 591
 592         /* absolute=EV_ONESHOT */
 593         if (kn->kn_sfflags & NOTE_ABSOLUTE)
 594                 kn->kn_flags |= EV_ONESHOT;
 595
 596         if (deadline) {
 597                 /* all others - if not faking immediate */
 598                 kn->kn_flags |= EV_CLEAR;
 599                 thread_call_enter_delayed(callout, deadline);
 600                 kn->kn_hookid = 0;
 601         } else {
 602                 /* fake immediate */
 603                 kn->kn_hookid = 1;
 604         }
 605         filt_timerunlock();
 606         return (0);
 607 }
 608
 609 static void
 610 filt_timerdetach(struct knote *kn)
 611 {
 612         thread_call_t callout;
 613
 614         filt_timerlock();
 615         callout = (thread_call_t)kn->kn_hook;
 616         if (callout != NULL) {
 617                 boolean_t cancelled;
 618
 619                 /* cancel the callout if we can */
 620                 cancelled = thread_call_cancel(callout);
 621                 if (cancelled) {
 622                         /* got it, just free it */
 623                         kn->kn_hook = NULL;
 624                         filt_timerunlock();
 625                         thread_call_free(callout);
 626                         return;
 627                 }
 628                 /* we have to wait for the expire routine.  */
 629                 kn->kn_hookid = -1;     /* we are detaching */
 630                 assert_wait(&kn->kn_hook, THREAD_UNINT);
 631                 filt_timerunlock();
 632                 thread_block(THREAD_CONTINUE_NULL);
 633                 assert(kn->kn_hook == NULL);
 634                 return;
 635         }
 636         /* nothing to do */
 637         filt_timerunlock();
 638 }
 639
 640
 641
 642 static int
 643 filt_timer(struct knote *kn, __unused long hint)
 644 {
 645         int result;
 646
 647         if (hint) {
 648                 /* real timer pop */
 649                 thread_call_t callout;
 650                 boolean_t detaching;
 651
 652                 filt_timerlock();
 653
 654                 kn->kn_data++;
 655
 656                 detaching = (kn->kn_hookid < 0);
 657                 callout = (thread_call_t)kn->kn_hook;
 658
 659                 if (!detaching && (kn->kn_flags & EV_ONESHOT) == 0) {
 660                         uint64_t deadline;
 661                         int error;
 662
 663                         /* user input data may have changed - deal */
 664                         error = filt_timercompute(kn, &deadline);
 665                         if (error) {
 666                                 kn->kn_flags |= EV_ERROR;
 667                                 kn->kn_data = error;
 668                         } else if (deadline == 0) {
 669                                 /* revert to fake immediate */
 670                                 kn->kn_flags &= ~EV_CLEAR;
 671                                 kn->kn_sdata = 0;
 672                                 kn->kn_hookid = 1;
 673                         } else {
 674                                 /* keep the callout and re-arm */
 675                                 thread_call_enter_delayed(callout, deadline);
 676                                 filt_timerunlock();
 677                                 return 1;
 678                         }
 679                 }
 680                 kn->kn_hook = NULL;
 681                 filt_timerunlock();
 682                 thread_call_free(callout);
 683
 684                 /* if someone is waiting for timer to pop */
 685                 if (detaching)
 686                         thread_wakeup(&kn->kn_hook);
 687
 688                 return 1;
 689         }
 690
 691         /* user-query */
 692         filt_timerlock();
 693
 694         /* change fake timer to real if needed */
 695         while (kn->kn_hookid > 0 && kn->kn_sdata > 0) {
 696                 int error;
 697
 698                 /* update the fake timer (make real) */
 699                 kn->kn_hookid = 0;
 700                 kn->kn_data = 0;
 701                 filt_timerunlock();
 702                 error = filt_timerattach(kn);
 703                 filt_timerlock();
 704                 if (error) {
 705                         kn->kn_flags |= EV_ERROR;
 706                         kn->kn_data = error;
 707                         filt_timerunlock();
 708                         return 1;
 709                 }
 710         }
 711
 712         /* if still fake, pretend it fired */
 713         if (kn->kn_hookid > 0)
 714                 kn->kn_data = 1;
 715
 716         result = (kn->kn_data != 0);
 717         filt_timerunlock();
 718         return result;
 719 }
 720
 721 static void
 722 filt_timerlock(void)
 723 {
 724         lck_mtx_lock(&_filt_timerlock);
 725 }
 726
 727 static void
 728 filt_timerunlock(void)
 729 {
 730         lck_mtx_unlock(&_filt_timerlock);
 731 }
 732
 733 /*
 734  * JMM - placeholder for not-yet-implemented filters
 735  */
 736 static int
 737 filt_badattach(__unused struct knote *kn)
 738 {
 739         return(ENOTSUP);
 740 }
 741
 742
 743 struct kqueue *
 744 kqueue_alloc(struct proc *p)
 745 {
 746         struct filedesc *fdp = p->p_fd;
 747         struct kqueue *kq;
 748
 749         MALLOC_ZONE(kq, struct kqueue *, sizeof(struct kqueue), M_KQUEUE, M_WAITOK);
 750         if (kq != NULL) {
 751                 bzero(kq, sizeof(struct kqueue));
 752                 lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr);
 753                 TAILQ_INIT(&kq->kq_head);
 754                 TAILQ_INIT(&kq->kq_inprocess);
 755                 kq->kq_p = p;
 756         }
 757
 758         if (fdp->fd_knlistsize < 0) {
 759                 proc_fdlock(p);
 760                 if (fdp->fd_knlistsize < 0)
 761                         fdp->fd_knlistsize = 0;         /* this process has had a kq */
 762                 proc_fdunlock(p);
 763         }
 764
 765         return kq;
 766 }
 767
 768
 769 /*
 770  * kqueue_dealloc - detach all knotes from a kqueue and free it
 771  *
 772  *      We walk each list looking for knotes referencing this
 773  *      this kqueue.  If we find one, we try to drop it.  But
 774  *      if we fail to get a drop reference, that will wait
 775  *      until it is dropped.  So, we can just restart again
 776  *      safe in the assumption that the list will eventually
 777  *      not contain any more references to this kqueue (either
 778  *      we dropped them all, or someone else did).
 779  *
 780  *      Assumes no new events are being added to the kqueue.
 781  *      Nothing locked on entry or exit.
 782  */
 783 void
 784 kqueue_dealloc(struct kqueue *kq)
 785 {
 786         struct proc *p = kq->kq_p;
 787         struct filedesc *fdp = p->p_fd;
 788         struct knote *kn;
 789         int i;
 790
 791         proc_fdlock(p);
 792         for (i = 0; i < fdp->fd_knlistsize; i++) {
 793                 kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 794                 while (kn != NULL) {
 795                         if (kq == kn->kn_kq) {
 796                                 kqlock(kq);
 797                                 proc_fdunlock(p);
 798                                 /* drop it ourselves or wait */
 799                                 if (kqlock2knotedrop(kq, kn)) {
 800                                         kn->kn_fop->f_detach(kn);
 801                                         knote_drop(kn, p);
 802                                 }
 803                                 proc_fdlock(p);
 804                                 /* start over at beginning of list */
 805                                 kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 806                                 continue;
 807                         }
 808                         kn = SLIST_NEXT(kn, kn_link);
 809                 }
 810         }
 811         if (fdp->fd_knhashmask != 0) {
 812                 for (i = 0; i < (int)fdp->fd_knhashmask + 1; i++) {
 813                         kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 814                         while (kn != NULL) {
 815                                 if (kq == kn->kn_kq) {
 816                                         kqlock(kq);
 817                                         proc_fdunlock(p);
 818                                         /* drop it ourselves or wait */
 819                                         if (kqlock2knotedrop(kq, kn)) {
 820                                                 kn->kn_fop->f_detach(kn);
 821                                                 knote_drop(kn, p);
 822                                         }
 823                                         proc_fdlock(p);
 824                                         /* start over at beginning of list */
 825                                         kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 826                                         continue;
 827                                 }
 828                                 kn = SLIST_NEXT(kn, kn_link);
 829                         }
 830                 }
 831         }
 832         proc_fdunlock(p);
 833         lck_spin_destroy(&kq->kq_lock, kq_lck_grp);
 834         FREE_ZONE(kq, sizeof(struct kqueue), M_KQUEUE);
 835 }
 836
 837 int
 838 kqueue(struct proc *p, __unused struct kqueue_args *uap, register_t *retval)
 839 {
 840         struct kqueue *kq;
 841         struct fileproc *fp;
 842         int fd, error;
 843
 844         error = falloc(p, &fp, &fd, vfs_context_current());
 845         if (error) {
 846                 return (error);
 847         }
 848
 849         kq = kqueue_alloc(p);
 850         if (kq == NULL) {
 851                 fp_free(p, fd, fp);
 852                 return (ENOMEM);
 853         }
 854
 855         fp->f_flag = FREAD | FWRITE;
 856         fp->f_type = DTYPE_KQUEUE;
 857         fp->f_ops = &kqueueops;
 858         fp->f_data = (caddr_t)kq;
 859
 860         proc_fdlock(p);
 861         procfdtbl_releasefd(p, fd, NULL);
 862         fp_drop(p, fd, fp, 1);
 863         proc_fdunlock(p);
 864
 865         *retval = fd;
 866         return (error);
 867 }
 868
 869 int
 870 kqueue_portset_np(__unused struct proc *p,
 871                                   __unused struct kqueue_portset_np_args *uap,
 872                                   __unused register_t *retval)
 873 {
 874                 /* JMM - Placeholder for now */
 875                 return (ENOTSUP);
 876 }
 877
 878 int
 879 kqueue_from_portset_np(__unused struct proc *p,
 880                                            __unused struct kqueue_from_portset_np_args *uap,
 881                                            __unused register_t *retval)
 882 {
 883                 /* JMM - Placeholder for now */
 884                 return (ENOTSUP);
 885 }
 886
 887 static int
 888 kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p)
 889 {
 890         int advance;
 891         int error;
 892
 893         if (IS_64BIT_PROCESS(p)) {
 894                 struct user_kevent kev64;
 895
 896                 advance = sizeof(kev64);
 897                 error = copyin(*addrp, (caddr_t)&kev64, advance);
 898                 if (error)
 899                         return error;
 900                 kevp->ident = CAST_DOWN(uintptr_t, kev64.ident);
 901                 kevp->filter = kev64.filter;
 902                 kevp->flags = kev64.flags;
 903                 kevp->fflags = kev64.fflags;
 904                 kevp->data = CAST_DOWN(intptr_t, kev64.data);
 905                 kevp->udata = kev64.udata;
 906         } else {
 907                 /*
 908                  * compensate for legacy in-kernel kevent layout
 909                  * where the udata field is alredy 64-bit.
 910                  */
 911                 advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t);
 912                 error = copyin(*addrp, (caddr_t)kevp, advance);
 913         }
 914         if (!error)
 915                 *addrp += advance;
 916         return error;
 917 }
 918
 919 static int
 920 kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p)
 921 {
 922         int advance;
 923         int error;
 924
 925         if (IS_64BIT_PROCESS(p)) {
 926                 struct user_kevent kev64;
 927
 928                 /*
 929                  * deal with the special case of a user-supplied
 930                  * value of (uintptr_t)-1.
 931                  */
 932                 kev64.ident = (kevp->ident == (uintptr_t)-1) ?
 933                            (uint64_t)-1LL : (uint64_t)kevp->ident;
 934
 935                 kev64.filter = kevp->filter;
 936                 kev64.flags = kevp->flags;
 937                 kev64.fflags = kevp->fflags;
 938                 kev64.data = (int64_t) kevp->data;
 939                 kev64.udata = kevp->udata;
 940                 advance = sizeof(kev64);
 941                 error = copyout((caddr_t)&kev64, *addrp, advance);
 942         } else {
 943                 /*
 944                  * compensate for legacy in-kernel kevent layout
 945                  * where the udata field is alredy 64-bit.
 946                  */
 947                 advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t);
 948                 error = copyout((caddr_t)kevp, *addrp, advance);
 949         }
 950         if (!error)
 951                 *addrp += advance;
 952         return error;
 953 }
 954
 955 /*
 956  * kevent_continue - continue a kevent syscall after blocking
 957  *
 958  *      assume we inherit a use count on the kq fileglob.
 959  */
 960
 961 static void
 962 kevent_continue(__unused struct kqueue *kq, void *data, int error)
 963 {
 964         struct _kevent *cont_args;
 965         struct fileproc *fp;
 966         register_t *retval;
 967         int noutputs;
 968         int fd;
 969         struct proc *p = current_proc();
 970
 971         cont_args = (struct _kevent *)data;
 972         noutputs = cont_args->eventout;
 973         retval = cont_args->retval;
 974         fd = cont_args->fd;
 975         fp = cont_args->fp;
 976
 977         fp_drop(p, fd, fp, 0);
 978
 979         /* don't restart after signals... */
 980         if (error == ERESTART)
 981                 error = EINTR;
 982         else if (error == EWOULDBLOCK)
 983                 error = 0;
 984         if (error == 0)
 985                 *retval = noutputs;
 986         unix_syscall_return(error);
 987 }
 988
 989 /*
 990  * kevent - [syscall] register and wait for kernel events
 991  *
 992  */
 993
 994 int
 995 kevent(struct proc *p, struct kevent_args *uap, register_t *retval)
 996 {
 997         user_addr_t changelist = uap->changelist;
 998         user_addr_t ueventlist = uap->eventlist;
 999         int nchanges = uap->nchanges;
1000         int nevents = uap->nevents;
1001         int fd = uap->fd;
1002
1003         struct _kevent *cont_args;
1004         uthread_t ut;
1005         struct kqueue *kq;
1006         struct fileproc *fp;
1007         struct kevent kev;
1008         int error, noutputs;
1009         struct timeval atv;
1010
1011         /* convert timeout to absolute - if we have one */
1012         if (uap->timeout != USER_ADDR_NULL) {
1013                 struct timeval rtv;
1014                 if ( IS_64BIT_PROCESS(p) ) {
1015                         struct user_timespec ts;
1016                         error = copyin( uap->timeout, &ts, sizeof(ts) );
1017                         if ((ts.tv_sec & 0xFFFFFFFF00000000ull) != 0)
1018                                 error = EINVAL;
1019                         else
1020                                 TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1021                 } else {
1022                         struct timespec ts;
1023                         error = copyin( uap->timeout, &ts, sizeof(ts) );
1024                         TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1025                 }
1026                 if (error)
1027                         return error;
1028                 if (itimerfix(&rtv))
1029                         return EINVAL;
1030                 getmicrouptime(&atv);
1031                 timevaladd(&atv, &rtv);
1032         } else {
1033                 atv.tv_sec = 0;
1034                 atv.tv_usec = 0;
1035         }
1036
1037         /* get a usecount for the kq itself */
1038         if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0)
1039                 return(error);
1040
1041         /* register all the change requests the user provided... */
1042         noutputs = 0;
1043         while (nchanges > 0 && error == 0) {
1044                 error = kevent_copyin(&changelist, &kev, p);
1045                 if (error)
1046                         break;
1047
1048                 kev.flags &= ~EV_SYSFLAGS;
1049                 error = kevent_register(kq, &kev, p);
1050                 if ((error || (kev.flags & EV_RECEIPT)) && nevents > 0) {
1051                         kev.flags = EV_ERROR;
1052                         kev.data = error;
1053                         error = kevent_copyout(&kev, &ueventlist, p);
1054                         if (error == 0) {
1055                                 nevents--;
1056                                 noutputs++;
1057                         }
1058                 }
1059                 nchanges--;
1060         }
1061
1062         /* store the continuation/completion data in the uthread */
1063         ut = (uthread_t)get_bsdthread_info(current_thread());
1064         cont_args = (struct _kevent *)&ut->uu_kevent.ss_kevent;
1065         cont_args->fp = fp;
1066         cont_args->fd = fd;
1067         cont_args->retval = retval;
1068         cont_args->eventlist = ueventlist;
1069         cont_args->eventcount = nevents;
1070         cont_args->eventout = noutputs;
1071
1072         if (nevents > 0 && noutputs == 0 && error == 0)
1073                 error = kevent_scan(kq, kevent_callback,
1074                                     kevent_continue, cont_args,
1075                                     &atv, p);
1076         kevent_continue(kq, cont_args, error);
1077         /* NOTREACHED */
1078         return error;
1079 }
1080
1081
1082 /*
1083  * kevent_callback - callback for each individual event
1084  *
1085  *      called with nothing locked
1086  *      caller holds a reference on the kqueue
1087  */
1088
1089 static int
1090 kevent_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data)
1091 {
1092         struct _kevent *cont_args;
1093         int error;
1094
1095         cont_args = (struct _kevent *)data;
1096         assert(cont_args->eventout < cont_args->eventcount);
1097
1098         /*
1099          * Copy out the appropriate amount of event data for this user.
1100          */
1101         error = kevent_copyout(kevp, &cont_args->eventlist, current_proc());
1102
1103         /*
1104          * If there isn't space for additional events, return
1105          * a harmless error to stop the processing here
1106          */
1107         if (error == 0 && ++cont_args->eventout == cont_args->eventcount)
1108                         error = EWOULDBLOCK;
1109         return error;
1110 }
1111
1112 /*
1113  * kevent_register - add a new event to a kqueue
1114  *
1115  *      Creates a mapping between the event source and
1116  *      the kqueue via a knote data structure.
1117  *
1118  *      Because many/most the event sources are file
1119  *      descriptor related, the knote is linked off
1120  *      the filedescriptor table for quick access.
1121  *
1122  *      called with nothing locked
1123  *      caller holds a reference on the kqueue
1124  */
1125
1126 int
1127 kevent_register(struct kqueue *kq, struct kevent *kev, __unused struct proc *ctxp)
1128 {
1129         struct proc *p = kq->kq_p;
1130         struct filedesc *fdp = p->p_fd;
1131         struct filterops *fops;
1132         struct fileproc *fp = NULL;
1133         struct knote *kn = NULL;
1134         int error = 0;
1135
1136         if (kev->filter < 0) {
1137                 if (kev->filter + EVFILT_SYSCOUNT < 0)
1138                         return (EINVAL);
1139                 fops = sysfilt_ops[~kev->filter];       /* to 0-base index */
1140         } else {
1141                 /*
1142                  * XXX
1143                  * filter attach routine is responsible for insuring that
1144                  * the identifier can be attached to it.
1145                  */
1146                 printf("unknown filter: %d\n", kev->filter);
1147                 return (EINVAL);
1148         }
1149
1150         /* this iocount needs to be dropped if it is not registered */
1151         if (fops->f_isfd && (error = fp_lookup(p, kev->ident, &fp, 0)) != 0)
1152                 return(error);
1153
1154  restart:
1155         proc_fdlock(p);
1156         if (fops->f_isfd) {
1157                 /* fd-based knotes are linked off the fd table */
1158                 if (kev->ident < (u_int)fdp->fd_knlistsize) {
1159                         SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
1160                                 if (kq == kn->kn_kq &&
1161                                     kev->filter == kn->kn_filter)
1162                                         break;
1163                 }
1164         } else {
1165                 /* hash non-fd knotes here too */
1166                 if (fdp->fd_knhashmask != 0) {
1167                         struct klist *list;
1168
1169                         list = &fdp->fd_knhash[
1170                             KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
1171                         SLIST_FOREACH(kn, list, kn_link)
1172                                 if (kev->ident == kn->kn_id &&
1173                                     kq == kn->kn_kq &&
1174                                     kev->filter == kn->kn_filter)
1175                                         break;
1176                 }
1177         }
1178
1179         /*
1180          * kn now contains the matching knote, or NULL if no match
1181          */
1182         if (kn == NULL) {
1183                 if ((kev->flags & (EV_ADD|EV_DELETE)) == EV_ADD) {
1184                         kn = knote_alloc();
1185                         if (kn == NULL) {
1186                                 proc_fdunlock(p);
1187                                 error = ENOMEM;
1188                                 goto done;
1189                         }
1190                         kn->kn_fp = fp;
1191                         kn->kn_kq = kq;
1192                         kn->kn_tq = &kq->kq_head;
1193                         kn->kn_fop = fops;
1194                         kn->kn_sfflags = kev->fflags;
1195                         kn->kn_sdata = kev->data;
1196                         kev->fflags = 0;
1197                         kev->data = 0;
1198                         kn->kn_kevent = *kev;
1199                         kn->kn_inuse = 1;  /* for f_attach() */
1200                         kn->kn_status = 0;
1201
1202                         /* before anyone can find it */
1203                         if (kev->flags & EV_DISABLE)
1204                                 kn->kn_status |= KN_DISABLED;
1205
1206                         error = knote_fdpattach(kn, fdp, p);
1207                         proc_fdunlock(p);
1208
1209                         if (error) {
1210                                 knote_free(kn);
1211                                 goto done;
1212                         }
1213
1214                         /*
1215                          * apply reference count to knote structure, and
1216                          * do not release it at the end of this routine.
1217                          */
1218                         fp = NULL;
1219
1220                         /*
1221                          * If the attach fails here, we can drop it knowing
1222                          * that nobody else has a reference to the knote.
1223                          */
1224                         if ((error = fops->f_attach(kn)) != 0) {
1225                                 knote_drop(kn, p);
1226                                 goto done;
1227                         }
1228                 } else {
1229                         proc_fdunlock(p);
1230                         error = ENOENT;
1231                         goto done;
1232                 }
1233         } else {
1234                 /* existing knote - get kqueue lock */
1235                 kqlock(kq);
1236                 proc_fdunlock(p);
1237
1238                 if (kev->flags & EV_DELETE) {
1239                         knote_dequeue(kn);
1240                         kn->kn_status |= KN_DISABLED;
1241                         if (kqlock2knotedrop(kq, kn)) {
1242                                 kn->kn_fop->f_detach(kn);
1243                                 knote_drop(kn, p);
1244                         }
1245                         goto done;
1246                 }
1247
1248                 /* update status flags for existing knote */
1249                 if (kev->flags & EV_DISABLE) {
1250                         knote_dequeue(kn);
1251                         kn->kn_status |= KN_DISABLED;
1252                 } else if (kev->flags & EV_ENABLE) {
1253                         kn->kn_status &= ~KN_DISABLED;
1254                         if (kn->kn_status & KN_ACTIVE)
1255                                 knote_enqueue(kn);
1256                 }
1257
1258                 /*
1259                  * If somebody is in the middle of dropping this
1260                  * knote - go find/insert a new one.  But we have
1261                  * wait for this one to go away first.
1262                  */
1263                 if (!kqlock2knoteusewait(kq, kn))
1264                         /* kqueue unlocked */
1265                         goto restart;
1266
1267                 /*
1268                  * The user may change some filter values after the
1269                  * initial EV_ADD, but doing so will not reset any
1270                  * filter which have already been triggered.
1271                  */
1272                 kn->kn_sfflags = kev->fflags;
1273                 kn->kn_sdata = kev->data;
1274                 kn->kn_kevent.udata = kev->udata;
1275         }
1276
1277         /* still have use ref on knote */
1278         if (kn->kn_fop->f_event(kn, 0)) {
1279                 if (knoteuse2kqlock(kq, kn))
1280                         knote_activate(kn);
1281                 kqunlock(kq);
1282         } else {
1283                 knote_put(kn);
1284         }
1285
1286 done:
1287         if (fp != NULL)
1288                 fp_drop(p, kev->ident, fp, 0);
1289         return (error);
1290 }
1291
1292 /*
1293  * kevent_process - process the triggered events in a kqueue
1294  *
1295  *      Walk the queued knotes and validate that they are
1296  *      really still triggered events by calling the filter
1297  *      routines (if necessary).  Hold a use reference on
1298  *      the knote to avoid it being detached. For each event
1299  *      that is still considered triggered, invoke the
1300  *      callback routine provided.
1301  *
1302  *      caller holds a reference on the kqueue.
1303  *      kqueue locked on entry and exit - but may be dropped
1304  */
1305
1306 static int
1307 kevent_process(struct kqueue *kq,
1308                kevent_callback_t callback,
1309                void *data,
1310                int *countp,
1311                struct proc *p)
1312 {
1313         struct knote *kn;
1314         struct kevent kev;
1315         int nevents;
1316         int error;
1317
1318  restart:
1319         if (kq->kq_count == 0) {
1320                 *countp = 0;
1321                 return 0;
1322         }
1323
1324         /* if someone else is processing the queue, wait */
1325         if (!TAILQ_EMPTY(&kq->kq_inprocess)) {
1326                 assert_wait(&kq->kq_inprocess, THREAD_UNINT);
1327                 kq->kq_state |= KQ_PROCWAIT;
1328                 kqunlock(kq);
1329                 thread_block(THREAD_CONTINUE_NULL);
1330                 kqlock(kq);
1331                 goto restart;
1332         }
1333
1334         error = 0;
1335         nevents = 0;
1336         while (error == 0 &&
1337                (kn = TAILQ_FIRST(&kq->kq_head)) != NULL) {
1338
1339                 /*
1340                  * Take note off the active queue.
1341                  *
1342                  * Non-EV_ONESHOT events must be re-validated.
1343                  *
1344                  * Convert our lock to a use-count and call the event's
1345                  * filter routine to update.
1346                  *
1347                  * If the event is valid, or triggered while the kq
1348                  * is unlocked, move to the inprocess queue for processing.
1349                  */
1350
1351                 if ((kn->kn_flags & EV_ONESHOT) == 0) {
1352                         int result;
1353                         knote_deactivate(kn);
1354
1355                         if (kqlock2knoteuse(kq, kn)) {
1356
1357                                 /* call the filter with just a ref */
1358                                 result = kn->kn_fop->f_event(kn, 0);
1359
1360                                 /* if it's still alive, make sure it's active */
1361                                 if (knoteuse2kqlock(kq, kn) && result) {
1362                                         /* may have been reactivated in filter*/
1363                                         if (!(kn->kn_status & KN_ACTIVE)) {
1364                                                 knote_activate(kn);
1365                                         }
1366                                 } else {
1367                                         continue;
1368                                 }
1369                         } else {
1370                                 continue;
1371                         }
1372                 }
1373
1374                 /* knote is active: move onto inprocess queue */
1375                 assert(kn->kn_tq == &kq->kq_head);
1376                 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1377                 kn->kn_tq = &kq->kq_inprocess;
1378                 TAILQ_INSERT_TAIL(&kq->kq_inprocess, kn, kn_tqe);
1379
1380                 /*
1381                  * Got a valid triggered knote with the kqueue
1382                  * still locked.  Snapshot the data, and determine
1383                  * how to dispatch the knote for future events.
1384                  */
1385                 kev = kn->kn_kevent;
1386
1387                 /* now what happens to it? */
1388                 if (kn->kn_flags & EV_ONESHOT) {
1389                         knote_deactivate(kn);
1390                         if (kqlock2knotedrop(kq, kn)) {
1391                                 kn->kn_fop->f_detach(kn);
1392                                 knote_drop(kn, p);
1393                         }
1394                 } else if (kn->kn_flags & EV_CLEAR) {
1395                         knote_deactivate(kn);
1396                         kn->kn_data = 0;
1397                         kn->kn_fflags = 0;
1398                         kqunlock(kq);
1399                 } else {
1400                         /*
1401                          * leave on in-process queue.  We'll
1402                          * move all the remaining ones back
1403                          * the kq queue and wakeup any
1404                          * waiters when we are done.
1405                          */
1406                         kqunlock(kq);
1407                 }
1408
1409                 /* callback to handle each event as we find it */
1410                 error = (callback)(kq, &kev, data);
1411                 nevents++;
1412
1413                 kqlock(kq);
1414         }
1415
1416         /*
1417          * With the kqueue still locked, move any knotes
1418          * remaining on the in-process queue back to the
1419          * kq's queue and wake up any waiters.
1420          */
1421         while ((kn = TAILQ_FIRST(&kq->kq_inprocess)) != NULL) {
1422                 assert(kn->kn_tq == &kq->kq_inprocess);
1423                 TAILQ_REMOVE(&kq->kq_inprocess, kn, kn_tqe);
1424                 kn->kn_tq = &kq->kq_head;
1425                 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1426         }
1427         if (kq->kq_state & KQ_PROCWAIT) {
1428                 kq->kq_state &= ~KQ_PROCWAIT;
1429                 thread_wakeup(&kq->kq_inprocess);
1430         }
1431
1432         *countp = nevents;
1433         return error;
1434 }
1435
1436
1437 static void
1438 kevent_scan_continue(void *data, wait_result_t wait_result)
1439 {
1440         uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
1441         struct _kevent_scan * cont_args = &ut->uu_kevent.ss_kevent_scan;
1442         struct kqueue *kq = (struct kqueue *)data;
1443         int error;
1444         int count;
1445
1446         /* convert the (previous) wait_result to a proper error */
1447         switch (wait_result) {
1448         case THREAD_AWAKENED:
1449                 kqlock(kq);
1450                 error = kevent_process(kq, cont_args->call, cont_args, &count, current_proc());
1451                 if (error == 0 && count == 0) {
1452                         assert_wait_deadline(kq, THREAD_ABORTSAFE, cont_args->deadline);
1453                         kq->kq_state |= KQ_SLEEP;
1454                         kqunlock(kq);
1455                         thread_block_parameter(kevent_scan_continue, kq);
1456                         /* NOTREACHED */
1457                 }
1458                 kqunlock(kq);
1459                 break;
1460         case THREAD_TIMED_OUT:
1461                 error = EWOULDBLOCK;
1462                 break;
1463         case THREAD_INTERRUPTED:
1464                 error = EINTR;
1465                 break;
1466         default:
1467                 panic("kevent_scan_cont() - invalid wait_result (%d)", wait_result);
1468                 error = 0;
1469         }
1470
1471         /* call the continuation with the results */
1472         assert(cont_args->cont != NULL);
1473         (cont_args->cont)(kq, cont_args->data, error);
1474 }
1475
1476
1477 /*
1478  * kevent_scan - scan and wait for events in a kqueue
1479  *
1480  *      Process the triggered events in a kqueue.
1481  *
1482  *      If there are no events triggered arrange to
1483  *      wait for them. If the caller provided a
1484  *      continuation routine, then kevent_scan will
1485  *      also.
1486  *
1487  *      The callback routine must be valid.
1488  *      The caller must hold a use-count reference on the kq.
1489  */
1490
1491 int
1492 kevent_scan(struct kqueue *kq,
1493             kevent_callback_t callback,
1494             kevent_continue_t continuation,
1495             void *data,
1496             struct timeval *atvp,
1497             struct proc *p)
1498 {
1499         thread_continue_t cont = THREAD_CONTINUE_NULL;
1500         uint64_t deadline;
1501         int error;
1502         int first;
1503
1504         assert(callback != NULL);
1505
1506         first = 1;
1507         for (;;) {
1508                 wait_result_t wait_result;
1509                 int count;
1510
1511                 /*
1512                  * Make a pass through the kq to find events already
1513                  * triggered.
1514                  */
1515                 kqlock(kq);
1516                 error = kevent_process(kq, callback, data, &count, p);
1517                 if (error || count)
1518                         break; /* lock still held */
1519
1520                 /* looks like we have to consider blocking */
1521                 if (first) {
1522                         first = 0;
1523                         /* convert the timeout to a deadline once */
1524                         if (atvp->tv_sec || atvp->tv_usec) {
1525                                 uint64_t now;
1526
1527                                 clock_get_uptime(&now);
1528                                 nanoseconds_to_absolutetime((uint64_t)atvp->tv_sec * NSEC_PER_SEC +
1529                                                             atvp->tv_usec * NSEC_PER_USEC,
1530                                                             &deadline);
1531                                 if (now >= deadline) {
1532                                         /* non-blocking call */
1533                                         error = EWOULDBLOCK;
1534                                         break; /* lock still held */
1535                                 }
1536                                 deadline -= now;
1537                                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
1538                         } else {
1539                                 deadline = 0;   /* block forever */
1540                         }
1541
1542                         if (continuation) {
1543                                 uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
1544                                 struct _kevent_scan *cont_args = &ut->uu_kevent.ss_kevent_scan;
1545
1546                                 cont_args->call = callback;
1547                                 cont_args->cont = continuation;
1548                                 cont_args->deadline = deadline;
1549                                 cont_args->data = data;
1550                                 cont = kevent_scan_continue;
1551                         }
1552                 }
1553
1554                 /* go ahead and wait */
1555                 assert_wait_deadline(kq, THREAD_ABORTSAFE, deadline);
1556                 kq->kq_state |= KQ_SLEEP;
1557                 kqunlock(kq);
1558                 wait_result = thread_block_parameter(cont, kq);
1559                 /* NOTREACHED if (continuation != NULL) */
1560
1561                 switch (wait_result) {
1562                 case THREAD_AWAKENED:
1563                         continue;
1564                 case THREAD_TIMED_OUT:
1565                         return EWOULDBLOCK;
1566                 case THREAD_INTERRUPTED:
1567                         return EINTR;
1568                 default:
1569                         panic("kevent_scan - bad wait_result (%d)",
1570                               wait_result);
1571                         error = 0;
1572                 }
1573         }
1574         kqunlock(kq);
1575         return error;
1576 }
1577
1578
1579 /*
1580  * XXX
1581  * This could be expanded to call kqueue_scan, if desired.
1582  */
1583 /*ARGSUSED*/
1584 static int
1585 kqueue_read(__unused struct fileproc *fp,
1586                         __unused struct uio *uio,
1587                         __unused int flags,
1588                         __unused vfs_context_t ctx)
1589 {
1590         return (ENXIO);
1591 }
1592
1593 /*ARGSUSED*/
1594 static int
1595 kqueue_write(__unused struct fileproc *fp,
1596                          __unused struct uio *uio,
1597                          __unused int flags,
1598                          __unused vfs_context_t ctx)
1599 {
1600         return (ENXIO);
1601 }
1602
1603 /*ARGSUSED*/
1604 static int
1605 kqueue_ioctl(__unused struct fileproc *fp,
1606                          __unused u_long com,
1607                          __unused caddr_t data,
1608                          __unused vfs_context_t ctx)
1609 {
1610         return (ENOTTY);
1611 }
1612
1613 /*ARGSUSED*/
1614 static int
1615 kqueue_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
1616 {
1617         struct kqueue *kq = (struct kqueue *)fp->f_data;
1618         int retnum = 0;
1619
1620         if (which == FREAD) {
1621                 kqlock(kq);
1622                 if (kq->kq_count) {
1623                         retnum = 1;
1624                 } else {
1625                         selrecord(vfs_context_proc(ctx), &kq->kq_sel, wql);
1626                         kq->kq_state |= KQ_SEL;
1627                 }
1628                 kqunlock(kq);
1629         }
1630         return (retnum);
1631 }
1632
1633 /*
1634  * kqueue_close -
1635  */
1636 /*ARGSUSED*/
1637 static int
1638 kqueue_close(struct fileglob *fg, __unused vfs_context_t ctx)
1639 {
1640         struct kqueue *kq = (struct kqueue *)fg->fg_data;
1641
1642         kqueue_dealloc(kq);
1643         fg->fg_data = NULL;
1644         return (0);
1645 }
1646
1647 /*ARGSUSED*/
1648 /*
1649  * The callers has taken a use-count reference on this kqueue and will donate it
1650  * to the kqueue we are being added to.  This keeps the kqueue from closing until
1651  * that relationship is torn down.
1652  */
1653 static int
1654 kqueue_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused vfs_context_t ctx)
1655 {
1656         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
1657         struct kqueue *parentkq = kn->kn_kq;
1658
1659         if (parentkq == kq ||
1660             kn->kn_filter != EVFILT_READ)
1661                 return (1);
1662
1663         /*
1664          * We have to avoid creating a cycle when nesting kqueues
1665          * inside another.  Rather than trying to walk the whole
1666          * potential DAG of nested kqueues, we just use a simple
1667          * ceiling protocol.  When a kqueue is inserted into another,
1668          * we check that the (future) parent is not already nested
1669          * into another kqueue at a lower level than the potenial
1670          * child (because it could indicate a cycle).  If that test
1671          * passes, we just mark the nesting levels accordingly.
1672          */
1673
1674         kqlock(parentkq);
1675         if (parentkq->kq_level > 0 &&
1676             parentkq->kq_level < kq->kq_level)
1677         {
1678                 kqunlock(parentkq);
1679                 return (1);
1680         } else {
1681                 /* set parent level appropriately */
1682                 if (parentkq->kq_level == 0)
1683                         parentkq->kq_level = 2;
1684                 if (parentkq->kq_level < kq->kq_level + 1)
1685                         parentkq->kq_level = kq->kq_level + 1;
1686                 kqunlock(parentkq);
1687
1688                 kn->kn_fop = &kqread_filtops;
1689                 kqlock(kq);
1690                 KNOTE_ATTACH(&kq->kq_sel.si_note, kn);
1691                 /* indicate nesting in child, if needed */
1692                 if (kq->kq_level == 0)
1693                         kq->kq_level = 1;
1694                 kqunlock(kq);
1695                 return (0);
1696         }
1697 }
1698
1699 /*ARGSUSED*/
1700 int
1701 kqueue_stat(struct fileproc *fp, void *ub, int isstat64,  __unused vfs_context_t ctx)
1702 {
1703         struct stat *sb = (struct stat *)0;     /* warning avoidance ; protected by isstat64 */
1704         struct stat64 * sb64 = (struct stat64 *)0;  /* warning avoidance ; protected by isstat64 */
1705
1706         struct kqueue *kq = (struct kqueue *)fp->f_data;
1707         if (isstat64 != 0) {
1708                 sb64 = (struct stat64 *)ub;
1709                 bzero((void *)sb64, sizeof(*sb64));
1710                 sb64->st_size = kq->kq_count;
1711                 sb64->st_blksize = sizeof(struct kevent);
1712                 sb64->st_mode = S_IFIFO;
1713         } else {
1714                 sb = (struct stat *)ub;
1715                 bzero((void *)sb, sizeof(*sb));
1716                 sb->st_size = kq->kq_count;
1717                 sb->st_blksize = sizeof(struct kevent);
1718                 sb->st_mode = S_IFIFO;
1719         }
1720
1721         return (0);
1722 }
1723
1724 /*
1725  * Called with the kqueue locked
1726  */
1727 static void
1728 kqueue_wakeup(struct kqueue *kq)
1729 {
1730
1731         if (kq->kq_state & KQ_SLEEP) {
1732                 kq->kq_state &= ~KQ_SLEEP;
1733                 thread_wakeup(kq);
1734         }
1735         if (kq->kq_state & KQ_SEL) {
1736                 kq->kq_state &= ~KQ_SEL;
1737                 selwakeup(&kq->kq_sel);
1738         }
1739         KNOTE(&kq->kq_sel.si_note, 0);
1740 }
1741
1742 void
1743 klist_init(struct klist *list)
1744 {
1745         SLIST_INIT(list);
1746 }
1747
1748
1749 /*
1750  * Query/Post each knote in the object's list
1751  *
1752  *      The object lock protects the list. It is assumed
1753  *      that the filter/event routine for the object can
1754  *      determine that the object is already locked (via
1755  *      the hind) and not deadlock itself.
1756  *
1757  *      The object lock should also hold off pending
1758  *      detach/drop operations.  But we'll prevent it here
1759  *      too - just in case.
1760  */
1761 void
1762 knote(struct klist *list, long hint)
1763 {
1764         struct knote *kn;
1765
1766         SLIST_FOREACH(kn, list, kn_selnext) {
1767                 struct kqueue *kq = kn->kn_kq;
1768
1769                 kqlock(kq);
1770                 if (kqlock2knoteuse(kq, kn)) {
1771                         int result;
1772
1773                         /* call the event with only a use count */
1774                         result = kn->kn_fop->f_event(kn, hint);
1775
1776                         /* if its not going away and triggered */
1777                         if (knoteuse2kqlock(kq, kn) && result)
1778                                 knote_activate(kn);
1779                         /* lock held again */
1780                 }
1781                 kqunlock(kq);
1782         }
1783 }
1784
1785 /*
1786  * attach a knote to the specified list.  Return true if this is the first entry.
1787  * The list is protected by whatever lock the object it is associated with uses.
1788  */
1789 int
1790 knote_attach(struct klist *list, struct knote *kn)
1791 {
1792         int ret = SLIST_EMPTY(list);
1793         SLIST_INSERT_HEAD(list, kn, kn_selnext);
1794         return ret;
1795 }
1796
1797 /*
1798  * detach a knote from the specified list.  Return true if that was the last entry.
1799  * The list is protected by whatever lock the object it is associated with uses.
1800  */
1801 int
1802 knote_detach(struct klist *list, struct knote *kn)
1803 {
1804         SLIST_REMOVE(list, kn, knote, kn_selnext);
1805         return SLIST_EMPTY(list);
1806 }
1807
1808 /*
1809  * remove all knotes referencing a specified fd
1810  *
1811  * Essentially an inlined knote_remove & knote_drop
1812  * when we know for sure that the thing is a file
1813  *
1814  * Entered with the proc_fd lock already held.
1815  * It returns the same way, but may drop it temporarily.
1816  */
1817 void
1818 knote_fdclose(struct proc *p, int fd)
1819 {
1820         struct filedesc *fdp = p->p_fd;
1821         struct klist *list;
1822         struct knote *kn;
1823
1824         list = &fdp->fd_knlist[fd];
1825         while ((kn = SLIST_FIRST(list)) != NULL) {
1826                 struct kqueue *kq = kn->kn_kq;
1827
1828                 if (kq->kq_p != p)
1829                         panic("knote_fdclose: proc mismatch (kq->kq_p=%p != p=%p)", kq->kq_p, p);
1830
1831                 kqlock(kq);
1832                 proc_fdunlock(p);
1833
1834                 /*
1835                  * Convert the lock to a drop ref.
1836                  * If we get it, go ahead and drop it.
1837                  * Otherwise, we waited for it to
1838                  * be dropped by the other guy, so
1839                  * it is safe to move on in the list.
1840                  */
1841                 if (kqlock2knotedrop(kq, kn)) {
1842                         kn->kn_fop->f_detach(kn);
1843                         knote_drop(kn, p);
1844                 }
1845
1846                 proc_fdlock(p);
1847
1848                 /* the fd tables may have changed - start over */
1849                 list = &fdp->fd_knlist[fd];
1850         }
1851 }
1852
1853 /* proc_fdlock held on entry (and exit) */
1854 static int
1855 knote_fdpattach(struct knote *kn, struct filedesc *fdp, __unused struct proc *p)
1856 {
1857         struct klist *list = NULL;
1858
1859         if (! kn->kn_fop->f_isfd) {
1860                 if (fdp->fd_knhashmask == 0)
1861                         fdp->fd_knhash = hashinit(CONFIG_KN_HASHSIZE, M_KQUEUE,
1862                             &fdp->fd_knhashmask);
1863                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1864         } else {
1865                 if ((u_int)fdp->fd_knlistsize <= kn->kn_id) {
1866                         u_int size = 0;
1867
1868                         /* have to grow the fd_knlist */
1869                         size = fdp->fd_knlistsize;
1870                         while (size <= kn->kn_id)
1871                                 size += KQEXTENT;
1872                         MALLOC(list, struct klist *,
1873                                size * sizeof(struct klist *), M_KQUEUE, M_WAITOK);
1874                         if (list == NULL)
1875                                 return (ENOMEM);
1876
1877                         bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list,
1878                               fdp->fd_knlistsize * sizeof(struct klist *));
1879                         bzero((caddr_t)list +
1880                               fdp->fd_knlistsize * sizeof(struct klist *),
1881                               (size - fdp->fd_knlistsize) * sizeof(struct klist *));
1882                         FREE(fdp->fd_knlist, M_KQUEUE);
1883                         fdp->fd_knlist = list;
1884                         fdp->fd_knlistsize = size;
1885                 }
1886                 list = &fdp->fd_knlist[kn->kn_id];
1887         }
1888         SLIST_INSERT_HEAD(list, kn, kn_link);
1889         return (0);
1890 }
1891
1892
1893
1894 /*
1895  * should be called at spl == 0, since we don't want to hold spl
1896  * while calling fdrop and free.
1897  */
1898 static void
1899 knote_drop(struct knote *kn, __unused struct proc *ctxp)
1900 {
1901         struct kqueue *kq = kn->kn_kq;
1902         struct proc *p = kq->kq_p;
1903         struct filedesc *fdp = p->p_fd;
1904         struct klist *list;
1905
1906         proc_fdlock(p);
1907         if (kn->kn_fop->f_isfd)
1908                 list = &fdp->fd_knlist[kn->kn_id];
1909         else
1910                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1911
1912         SLIST_REMOVE(list, kn, knote, kn_link);
1913         kqlock(kq);
1914         knote_dequeue(kn);
1915         if (kn->kn_status & KN_DROPWAIT)
1916                 thread_wakeup(&kn->kn_status);
1917         kqunlock(kq);
1918         proc_fdunlock(p);
1919
1920         if (kn->kn_fop->f_isfd)
1921                 fp_drop(p, kn->kn_id, kn->kn_fp, 0);
1922
1923         knote_free(kn);
1924 }
1925
1926 /* called with kqueue lock held */
1927 static void
1928 knote_activate(struct knote *kn)
1929 {
1930         struct kqueue *kq = kn->kn_kq;
1931
1932         kn->kn_status |= KN_ACTIVE;
1933         knote_enqueue(kn);
1934         kqueue_wakeup(kq);
1935  }
1936
1937 /* called with kqueue lock held */
1938 static void
1939 knote_deactivate(struct knote *kn)
1940 {
1941         kn->kn_status &= ~KN_ACTIVE;
1942         knote_dequeue(kn);
1943 }
1944
1945 /* called with kqueue lock held */
1946 static void
1947 knote_enqueue(struct knote *kn)
1948 {
1949         struct kqueue *kq = kn->kn_kq;
1950
1951         if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) {
1952                 struct kqtailq *tq = kn->kn_tq;
1953
1954                 TAILQ_INSERT_TAIL(tq, kn, kn_tqe);
1955                 kn->kn_status |= KN_QUEUED;
1956                 kq->kq_count++;
1957         }
1958 }
1959
1960 /* called with kqueue lock held */
1961 static void
1962 knote_dequeue(struct knote *kn)
1963 {
1964         struct kqueue *kq = kn->kn_kq;
1965
1966         //assert((kn->kn_status & KN_DISABLED) == 0);
1967         if ((kn->kn_status & KN_QUEUED) == KN_QUEUED) {
1968                 struct kqtailq *tq = kn->kn_tq;
1969
1970                 TAILQ_REMOVE(tq, kn, kn_tqe);
1971                 kn->kn_tq = &kq->kq_head;
1972                 kn->kn_status &= ~KN_QUEUED;
1973                 kq->kq_count--;
1974         }
1975 }
1976
1977 void
1978 knote_init(void)
1979 {
1980         knote_zone = zinit(sizeof(struct knote), 8192*sizeof(struct knote), 8192, "knote zone");
1981
1982         /* allocate kq lock group attribute and group */
1983         kq_lck_grp_attr= lck_grp_attr_alloc_init();
1984
1985         kq_lck_grp = lck_grp_alloc_init("kqueue",  kq_lck_grp_attr);
1986
1987         /* Allocate kq lock attribute */
1988         kq_lck_attr = lck_attr_alloc_init();
1989
1990         /* Initialize the timer filter lock */
1991         lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr);
1992 }
1993 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
1994
1995 static struct knote *
1996 knote_alloc(void)
1997 {
1998         return ((struct knote *)zalloc(knote_zone));
1999 }
2000
2001 static void
2002 knote_free(struct knote *kn)
2003 {
2004         zfree(knote_zone, kn);
2005 }
2006
2007 #if SOCKETS
2008 #include <sys/param.h>
2009 #include <sys/socket.h>
2010 #include <sys/protosw.h>
2011 #include <sys/domain.h>
2012 #include <sys/mbuf.h>
2013 #include <sys/kern_event.h>
2014 #include <sys/malloc.h>
2015 #include <sys/sys_domain.h>
2016 #include <sys/syslog.h>
2017
2018
2019 static int kev_attach(struct socket *so, int proto, struct proc *p);
2020 static int kev_detach(struct socket *so);
2021 static int kev_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p);
2022
2023 struct pr_usrreqs event_usrreqs = {
2024      pru_abort_notsupp, pru_accept_notsupp, kev_attach, pru_bind_notsupp, pru_connect_notsupp,
2025      pru_connect2_notsupp, kev_control, kev_detach, pru_disconnect_notsupp,
2026      pru_listen_notsupp, pru_peeraddr_notsupp, pru_rcvd_notsupp, pru_rcvoob_notsupp,
2027      pru_send_notsupp, pru_sense_null, pru_shutdown_notsupp, pru_sockaddr_notsupp,
2028      pru_sosend_notsupp, soreceive, pru_sopoll_notsupp
2029 };
2030
2031 struct protosw eventsw[] = {
2032      {
2033           SOCK_RAW,             &systemdomain,  SYSPROTO_EVENT,         PR_ATOMIC,
2034           0,            0,              0,              0,
2035           0,
2036           0,            0,              0,              0,
2037 #if __APPLE__
2038           0,
2039 #endif
2040           &event_usrreqs,
2041           0,            0,              0,
2042 #if __APPLE__
2043           {0, 0},       0,              {0}
2044 #endif
2045      }
2046 };
2047
2048 static
2049 struct kern_event_head kern_event_head;
2050
2051 static u_long static_event_id = 0;
2052 struct domain *sysdom = &systemdomain;
2053 static lck_mtx_t *sys_mtx;
2054
2055 /*
2056  * Install the protosw's for the NKE manager.  Invoked at
2057  *  extension load time
2058  */
2059 int
2060 kern_event_init(void)
2061 {
2062     int retval;
2063
2064     if ((retval = net_add_proto(eventsw, &systemdomain)) != 0) {
2065             log(LOG_WARNING, "Can't install kernel events protocol (%d)\n", retval);
2066             return(retval);
2067         }
2068
2069     /*
2070      * Use the domain mutex for all system event sockets
2071      */
2072     sys_mtx = sysdom->dom_mtx;
2073
2074     return(KERN_SUCCESS);
2075 }
2076
2077 static int
2078 kev_attach(struct socket *so, __unused int proto, __unused struct proc *p)
2079 {
2080      int error;
2081      struct kern_event_pcb  *ev_pcb;
2082
2083      error = soreserve(so, KEV_SNDSPACE, KEV_RECVSPACE);
2084      if (error)
2085           return error;
2086
2087      MALLOC(ev_pcb, struct kern_event_pcb *, sizeof(struct kern_event_pcb), M_PCB, M_WAITOK);
2088      if (ev_pcb == 0)
2089           return ENOBUFS;
2090
2091      ev_pcb->ev_socket = so;
2092      ev_pcb->vendor_code_filter = 0xffffffff;
2093
2094      so->so_pcb = (caddr_t) ev_pcb;
2095      lck_mtx_lock(sys_mtx);
2096      LIST_INSERT_HEAD(&kern_event_head, ev_pcb, ev_link);
2097      lck_mtx_unlock(sys_mtx);
2098
2099      return 0;
2100 }
2101
2102
2103 static int
2104 kev_detach(struct socket *so)
2105 {
2106      struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2107
2108      if (ev_pcb != 0) {
2109                 LIST_REMOVE(ev_pcb, ev_link);
2110                 FREE(ev_pcb, M_PCB);
2111                 so->so_pcb = 0;
2112                 so->so_flags |= SOF_PCBCLEARING;
2113      }
2114
2115      return 0;
2116 }
2117
2118 /*
2119  * For now, kev_vendor_code and mbuf_tags use the same
2120  * mechanism.
2121  */
2122
2123 errno_t kev_vendor_code_find(
2124         const char      *string,
2125         u_int32_t       *out_vendor_code)
2126 {
2127         if (strlen(string) >= KEV_VENDOR_CODE_MAX_STR_LEN) {
2128                 return EINVAL;
2129         }
2130         return mbuf_tag_id_find_internal(string, out_vendor_code, 1);
2131 }
2132
2133 errno_t  kev_msg_post(struct kev_msg *event_msg)
2134 {
2135         mbuf_tag_id_t   min_vendor, max_vendor;
2136
2137         mbuf_tag_id_first_last(&min_vendor, &max_vendor);
2138
2139         if (event_msg == NULL)
2140                 return EINVAL;
2141
2142         /* Limit third parties to posting events for registered vendor codes only */
2143         if (event_msg->vendor_code < min_vendor ||
2144                 event_msg->vendor_code > max_vendor)
2145         {
2146                 return EINVAL;
2147         }
2148
2149         return kev_post_msg(event_msg);
2150 }
2151
2152
2153 int  kev_post_msg(struct kev_msg *event_msg)
2154 {
2155      struct mbuf *m, *m2;
2156      struct kern_event_pcb  *ev_pcb;
2157      struct kern_event_msg  *ev;
2158      char              *tmp;
2159      unsigned long     total_size;
2160      int               i;
2161
2162         /* Verify the message is small enough to fit in one mbuf w/o cluster */
2163         total_size = KEV_MSG_HEADER_SIZE;
2164
2165         for (i = 0; i < 5; i++) {
2166                 if (event_msg->dv[i].data_length == 0)
2167                         break;
2168                 total_size += event_msg->dv[i].data_length;
2169         }
2170
2171         if (total_size > MLEN) {
2172                 return EMSGSIZE;
2173         }
2174
2175      m = m_get(M_DONTWAIT, MT_DATA);
2176      if (m == 0)
2177           return ENOBUFS;
2178
2179      ev = mtod(m, struct kern_event_msg *);
2180      total_size = KEV_MSG_HEADER_SIZE;
2181
2182      tmp = (char *) &ev->event_data[0];
2183      for (i = 0; i < 5; i++) {
2184           if (event_msg->dv[i].data_length == 0)
2185                break;
2186
2187           total_size += event_msg->dv[i].data_length;
2188           bcopy(event_msg->dv[i].data_ptr, tmp,
2189                 event_msg->dv[i].data_length);
2190           tmp += event_msg->dv[i].data_length;
2191      }
2192
2193      ev->id = ++static_event_id;
2194      ev->total_size   = total_size;
2195      ev->vendor_code  = event_msg->vendor_code;
2196      ev->kev_class    = event_msg->kev_class;
2197      ev->kev_subclass = event_msg->kev_subclass;
2198      ev->event_code   = event_msg->event_code;
2199
2200      m->m_len = total_size;
2201      lck_mtx_lock(sys_mtx);
2202      for (ev_pcb = LIST_FIRST(&kern_event_head);
2203           ev_pcb;
2204           ev_pcb = LIST_NEXT(ev_pcb, ev_link)) {
2205
2206           if (ev_pcb->vendor_code_filter != KEV_ANY_VENDOR) {
2207                if (ev_pcb->vendor_code_filter != ev->vendor_code)
2208                     continue;
2209
2210                if (ev_pcb->class_filter != KEV_ANY_CLASS) {
2211                     if (ev_pcb->class_filter != ev->kev_class)
2212                          continue;
2213
2214                     if ((ev_pcb->subclass_filter != KEV_ANY_SUBCLASS) &&
2215                         (ev_pcb->subclass_filter != ev->kev_subclass))
2216                          continue;
2217                }
2218           }
2219
2220           m2 = m_copym(m, 0, m->m_len, M_NOWAIT);
2221           if (m2 == 0) {
2222                m_free(m);
2223                    lck_mtx_unlock(sys_mtx);
2224                return ENOBUFS;
2225           }
2226           /* the socket is already locked because we hold the sys_mtx here */
2227           if (sbappendrecord(&ev_pcb->ev_socket->so_rcv, m2))
2228                   sorwakeup(ev_pcb->ev_socket);
2229      }
2230
2231      m_free(m);
2232      lck_mtx_unlock(sys_mtx);
2233      return 0;
2234 }
2235
2236 static int
2237 kev_control(struct socket *so,
2238                         u_long cmd,
2239                         caddr_t data,
2240                         __unused struct ifnet *ifp,
2241                         __unused struct proc *p)
2242 {
2243         struct kev_request *kev_req = (struct kev_request *) data;
2244         struct kern_event_pcb  *ev_pcb;
2245         struct kev_vendor_code *kev_vendor;
2246         u_long  *id_value = (u_long *) data;
2247
2248
2249         switch (cmd) {
2250
2251                 case SIOCGKEVID:
2252                         *id_value = static_event_id;
2253                         break;
2254
2255                 case SIOCSKEVFILT:
2256                         ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2257                         ev_pcb->vendor_code_filter = kev_req->vendor_code;
2258                         ev_pcb->class_filter     = kev_req->kev_class;
2259                         ev_pcb->subclass_filter  = kev_req->kev_subclass;
2260                         break;
2261
2262                 case SIOCGKEVFILT:
2263                         ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2264                         kev_req->vendor_code = ev_pcb->vendor_code_filter;
2265                         kev_req->kev_class   = ev_pcb->class_filter;
2266                         kev_req->kev_subclass = ev_pcb->subclass_filter;
2267                         break;
2268
2269                 case SIOCGKEVVENDOR:
2270                         kev_vendor = (struct kev_vendor_code*)data;
2271
2272                         /* Make sure string is NULL terminated */
2273                         kev_vendor->vendor_string[KEV_VENDOR_CODE_MAX_STR_LEN-1] = 0;
2274
2275                         return mbuf_tag_id_find_internal(kev_vendor->vendor_string,
2276                                                                                          &kev_vendor->vendor_code, 0);
2277
2278                 default:
2279                         return ENOTSUP;
2280         }
2281
2282         return 0;
2283 }
2284
2285 #endif /* SOCKETS */
2286
2287
2288 int
2289 fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo)
2290 {
2291         struct vinfo_stat * st;
2292
2293         /* No need for the funnel as fd is kept alive */
2294
2295         st = &kinfo->kq_stat;
2296
2297         st->vst_size = kq->kq_count;
2298         st->vst_blksize = sizeof(struct kevent);
2299         st->vst_mode = S_IFIFO;
2300         if (kq->kq_state & KQ_SEL)
2301                 kinfo->kq_state |=  PROC_KQUEUE_SELECT;
2302         if (kq->kq_state & KQ_SLEEP)
2303                 kinfo->kq_state |= PROC_KQUEUE_SLEEP;
2304
2305         return(0);
2306 }
2307