bsd/kern/kern_event.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  *
  22  */
  23 /*-
  24  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
  25  * All rights reserved.
  26  *
  27  * Redistribution and use in source and binary forms, with or without
  28  * modification, are permitted provided that the following conditions
  29  * are met:
  30  * 1. Redistributions of source code must retain the above copyright
  31  *    notice, this list of conditions and the following disclaimer.
  32  * 2. Redistributions in binary form must reproduce the above copyright
  33  *    notice, this list of conditions and the following disclaimer in the
  34  *    documentation and/or other materials provided with the distribution.
  35  *
  36  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  37  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  38  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  39  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  40  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  41  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  42  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  44  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  45  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46  * SUCH DAMAGE.
  47  */
  48 /*
  49  *      @(#)kern_event.c       1.0 (3/31/2000)
  50  */
  51 #include <stdint.h>
  52
  53 #include <sys/param.h>
  54 #include <sys/systm.h>
  55 #include <sys/filedesc.h>
  56 #include <sys/kernel.h>
  57 #include <sys/proc_internal.h>
  58 #include <sys/kauth.h>
  59 #include <sys/malloc.h>
  60 #include <sys/unistd.h>
  61 #include <sys/file_internal.h>
  62 #include <sys/fcntl.h>
  63 #include <sys/select.h>
  64 #include <sys/queue.h>
  65 #include <sys/event.h>
  66 #include <sys/eventvar.h>
  67 #include <sys/protosw.h>
  68 #include <sys/socket.h>
  69 #include <sys/socketvar.h>
  70 #include <sys/stat.h>
  71 #include <sys/sysctl.h>
  72 #include <sys/uio.h>
  73 #include <sys/sysproto.h>
  74 #include <sys/user.h>
  75 #include <string.h>
  76
  77 #include <kern/lock.h>
  78 #include <kern/clock.h>
  79 #include <kern/thread_call.h>
  80 #include <kern/sched_prim.h>
  81 #include <kern/zalloc.h>
  82 #include <kern/assert.h>
  83
  84 #include <libkern/libkern.h>
  85
  86 extern void unix_syscall_return(int);
  87
  88 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
  89
  90 static inline void kqlock(struct kqueue *kq);
  91 static inline void kqunlock(struct kqueue *kq);
  92
  93 static int      kqlock2knoteuse(struct kqueue *kq, struct knote *kn);
  94 static int      kqlock2knoteusewait(struct kqueue *kq, struct knote *kn);
  95 static int      kqlock2knotedrop(struct kqueue *kq, struct knote *kn);
  96 static int      knoteuse2kqlock(struct kqueue *kq, struct knote *kn);
  97
  98 static void     kqueue_wakeup(struct kqueue *kq);
  99 static int      kqueue_read(struct fileproc *fp, struct uio *uio,
 100                     kauth_cred_t cred, int flags, struct proc *p);
 101 static int      kqueue_write(struct fileproc *fp, struct uio *uio,
 102                     kauth_cred_t cred, int flags, struct proc *p);
 103 static int      kqueue_ioctl(struct fileproc *fp, u_long com, caddr_t data,
 104                     struct proc *p);
 105 static int      kqueue_select(struct fileproc *fp, int which, void *wql,
 106                     struct proc *p);
 107 static int      kqueue_close(struct fileglob *fp, struct proc *p);
 108 static int      kqueue_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
 109 extern int      kqueue_stat(struct fileproc *fp, struct stat *st, struct proc *p);
 110
 111 static struct fileops kqueueops = {
 112         kqueue_read,
 113         kqueue_write,
 114         kqueue_ioctl,
 115         kqueue_select,
 116         kqueue_close,
 117         kqueue_kqfilter,
 118         0
 119 };
 120
 121 static int kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p);
 122 static int kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p);
 123
 124 static int      kevent_callback(struct kqueue *kq, struct kevent *kevp, void *data);
 125 static void     kevent_continue(struct kqueue *kq, void *data, int error);
 126 static void     kevent_scan_continue(void *contp, wait_result_t wait_result);
 127 static int      kevent_process(struct kqueue *kq, kevent_callback_t callback,
 128                                void *data, int *countp, struct proc *p);
 129 static void     knote_put(struct knote *kn);
 130 static int      knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p);
 131 static void     knote_drop(struct knote *kn, struct proc *p);
 132 static void     knote_activate(struct knote *kn);
 133 static void     knote_deactivate(struct knote *kn);
 134 static void     knote_enqueue(struct knote *kn);
 135 static void     knote_dequeue(struct knote *kn);
 136 static struct   knote *knote_alloc(void);
 137 static void     knote_free(struct knote *kn);
 138 extern void     knote_init(void);
 139
 140 static int      filt_fileattach(struct knote *kn);
 141 static struct filterops file_filtops =
 142         { 1, filt_fileattach, NULL, NULL };
 143
 144 static void     filt_kqdetach(struct knote *kn);
 145 static int      filt_kqueue(struct knote *kn, long hint);
 146 static struct filterops kqread_filtops =
 147         { 1, NULL, filt_kqdetach, filt_kqueue };
 148
 149 /*
 150  * placeholder for not-yet-implemented filters
 151  */
 152 static int      filt_badattach(struct knote *kn);
 153 static struct filterops bad_filtops =
 154         { 0, filt_badattach, 0 , 0 };
 155
 156 static int      filt_procattach(struct knote *kn);
 157 static void     filt_procdetach(struct knote *kn);
 158 static int      filt_proc(struct knote *kn, long hint);
 159
 160 static struct filterops proc_filtops =
 161         { 0, filt_procattach, filt_procdetach, filt_proc };
 162
 163 extern struct filterops fs_filtops;
 164
 165 extern struct filterops sig_filtops;
 166
 167
 168 /* Timer filter */
 169 static int      filt_timercompute(struct knote *kn, uint64_t *abs_time);
 170 static void     filt_timerexpire(void *knx, void *param1);
 171 static int      filt_timerattach(struct knote *kn);
 172 static void     filt_timerdetach(struct knote *kn);
 173 static int      filt_timer(struct knote *kn, long hint);
 174
 175 static struct filterops timer_filtops =
 176         { 0, filt_timerattach, filt_timerdetach, filt_timer };
 177
 178 /* to avoid arming timers that fire quicker than we can handle */
 179 static uint64_t filt_timerfloor = 0;
 180
 181 static lck_mtx_t _filt_timerlock;
 182 static void     filt_timerlock(void);
 183 static void     filt_timerunlock(void);
 184
 185 /*
 186  * Sentinel marker for a thread scanning through the list of
 187  * active knotes.
 188  */
 189 static struct filterops threadmarker_filtops =
 190         { 0, filt_badattach, 0, 0 };
 191
 192 static zone_t   knote_zone;
 193
 194 #define KN_HASHSIZE             64              /* XXX should be tunable */
 195 #define KN_HASH(val, mask)      (((val) ^ (val >> 8)) & (mask))
 196
 197 #if 0
 198 extern struct filterops aio_filtops;
 199 #endif
 200
 201 /*
 202  * Table for for all system-defined filters.
 203  */
 204 static struct filterops *sysfilt_ops[] = {
 205         &file_filtops,                  /* EVFILT_READ */
 206         &file_filtops,                  /* EVFILT_WRITE */
 207 #if 0
 208         &aio_filtops,                   /* EVFILT_AIO */
 209 #else
 210         &bad_filtops,                   /* EVFILT_AIO */
 211 #endif
 212         &file_filtops,                  /* EVFILT_VNODE */
 213         &proc_filtops,                  /* EVFILT_PROC */
 214         &sig_filtops,                   /* EVFILT_SIGNAL */
 215         &timer_filtops,                 /* EVFILT_TIMER */
 216         &bad_filtops,                   /* EVFILT_MACHPORT */
 217         &fs_filtops                     /* EVFILT_FS */
 218 };
 219
 220 /*
 221  * kqueue/note lock attributes and implementations
 222  *
 223  *      kqueues have locks, while knotes have use counts
 224  *      Most of the knote state is guarded by the object lock.
 225  *      the knote "inuse" count and status use the kqueue lock.
 226  */
 227 lck_grp_attr_t * kq_lck_grp_attr;
 228 lck_grp_t * kq_lck_grp;
 229 lck_attr_t * kq_lck_attr;
 230
 231 static inline void
 232 kqlock(struct kqueue *kq)
 233 {
 234         lck_spin_lock(&kq->kq_lock);
 235 }
 236
 237 static inline void
 238 kqunlock(struct kqueue *kq)
 239 {
 240         lck_spin_unlock(&kq->kq_lock);
 241 }
 242
 243 /*
 244  * Convert a kq lock to a knote use referece.
 245  *
 246  *      If the knote is being dropped, we can't get
 247  *      a use reference, so just return with it
 248  *      still locked.
 249  *
 250  *      - kq locked at entry
 251  *      - unlock on exit if we get the use reference
 252  */
 253 static int
 254 kqlock2knoteuse(struct kqueue *kq, struct knote *kn)
 255 {
 256         if (kn->kn_status & KN_DROPPING)
 257                 return 0;
 258         kn->kn_inuse++;
 259         kqunlock(kq);
 260         return 1;
 261  }
 262
 263 /*
 264  * Convert a kq lock to a knote use referece.
 265  *
 266  *      If the knote is being dropped, we can't get
 267  *      a use reference, so just return with it
 268  *      still locked.
 269  *
 270  *      - kq locked at entry
 271  *      - kq always unlocked on exit
 272  */
 273 static int
 274 kqlock2knoteusewait(struct kqueue *kq, struct knote *kn)
 275 {
 276         if (!kqlock2knoteuse(kq, kn)) {
 277                 kn->kn_status |= KN_DROPWAIT;
 278                 assert_wait(&kn->kn_status, THREAD_UNINT);
 279                 kqunlock(kq);
 280                 thread_block(THREAD_CONTINUE_NULL);
 281                 return 0;
 282         }
 283         return 1;
 284  }
 285
 286 /*
 287  * Convert from a knote use reference back to kq lock.
 288  *
 289  *      Drop a use reference and wake any waiters if
 290  *      this is the last one.
 291  *
 292  *      The exit return indicates if the knote is
 293  *      still alive - but the kqueue lock is taken
 294  *      unconditionally.
 295  */
 296 static int
 297 knoteuse2kqlock(struct kqueue *kq, struct knote *kn)
 298 {
 299         kqlock(kq);
 300         if ((--kn->kn_inuse == 0) &&
 301             (kn->kn_status & KN_USEWAIT)) {
 302                 kn->kn_status &= ~KN_USEWAIT;
 303                 thread_wakeup(&kn->kn_inuse);
 304         }
 305         return ((kn->kn_status & KN_DROPPING) == 0);
 306  }
 307
 308 /*
 309  * Convert a kq lock to a knote drop referece.
 310  *
 311  *      If the knote is in use, wait for the use count
 312  *      to subside.  We first mark our intention to drop
 313  *      it - keeping other users from "piling on."
 314  *      If we are too late, we have to wait for the
 315  *      other drop to complete.
 316  *
 317  *      - kq locked at entry
 318  *      - always unlocked on exit.
 319  *      - caller can't hold any locks that would prevent
 320  *        the other dropper from completing.
 321  */
 322 static int
 323 kqlock2knotedrop(struct kqueue *kq, struct knote *kn)
 324 {
 325
 326         if ((kn->kn_status & KN_DROPPING) == 0) {
 327                 kn->kn_status |= KN_DROPPING;
 328                 if (kn->kn_inuse > 0) {
 329                         kn->kn_status |= KN_USEWAIT;
 330                         assert_wait(&kn->kn_inuse, THREAD_UNINT);
 331                         kqunlock(kq);
 332                         thread_block(THREAD_CONTINUE_NULL);
 333                 } else
 334                         kqunlock(kq);
 335                 return 1;
 336         } else {
 337                 kn->kn_status |= KN_DROPWAIT;
 338                 assert_wait(&kn->kn_status, THREAD_UNINT);
 339                 kqunlock(kq);
 340                 thread_block(THREAD_CONTINUE_NULL);
 341                 return 0;
 342         }
 343 }
 344
 345 /*
 346  * Release a knote use count reference.
 347  */
 348 static void
 349 knote_put(struct knote *kn)
 350 {
 351         struct kqueue *kq = kn->kn_kq;
 352
 353         kqlock(kq);
 354         if ((--kn->kn_inuse == 0) &&
 355             (kn->kn_status & KN_USEWAIT)) {
 356                 kn->kn_status &= ~KN_USEWAIT;
 357                 thread_wakeup(&kn->kn_inuse);
 358         }
 359         kqunlock(kq);
 360  }
 361
 362
 363
 364 static int
 365 filt_fileattach(struct knote *kn)
 366 {
 367
 368         return (fo_kqfilter(kn->kn_fp, kn, current_proc()));
 369 }
 370
 371 #define f_flag f_fglob->fg_flag
 372 #define f_type f_fglob->fg_type
 373 #define f_msgcount f_fglob->fg_msgcount
 374 #define f_cred f_fglob->fg_cred
 375 #define f_ops f_fglob->fg_ops
 376 #define f_offset f_fglob->fg_offset
 377 #define f_data f_fglob->fg_data
 378
 379 static void
 380 filt_kqdetach(struct knote *kn)
 381 {
 382         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
 383
 384         kqlock(kq);
 385         KNOTE_DETACH(&kq->kq_sel.si_note, kn);
 386         kqunlock(kq);
 387 }
 388
 389 /*ARGSUSED*/
 390 static int
 391 filt_kqueue(struct knote *kn, __unused long hint)
 392 {
 393         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
 394
 395         kn->kn_data = kq->kq_count;
 396         return (kn->kn_data > 0);
 397 }
 398
 399 static int
 400 filt_procattach(struct knote *kn)
 401 {
 402         struct proc *p;
 403         int funnel_state;
 404
 405         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 406
 407         p = pfind(kn->kn_id);
 408         if (p == NULL) {
 409                 thread_funnel_set(kernel_flock, funnel_state);
 410                 return (ESRCH);
 411         }
 412
 413         kn->kn_flags |= EV_CLEAR;               /* automatically set */
 414
 415         /*
 416          * internal flag indicating registration done by kernel
 417          */
 418         if (kn->kn_flags & EV_FLAG1) {
 419                 kn->kn_data = (int)kn->kn_sdata;        /* ppid */
 420                 kn->kn_fflags = NOTE_CHILD;
 421                 kn->kn_flags &= ~EV_FLAG1;
 422         }
 423
 424         /* XXX lock the proc here while adding to the list? */
 425         KNOTE_ATTACH(&p->p_klist, kn);
 426
 427         thread_funnel_set(kernel_flock, funnel_state);
 428
 429         return (0);
 430 }
 431
 432 /*
 433  * The knote may be attached to a different process, which may exit,
 434  * leaving nothing for the knote to be attached to.  So when the process
 435  * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
 436  * it will be deleted when read out.  However, as part of the knote deletion,
 437  * this routine is called, so a check is needed to avoid actually performing
 438  * a detach, because the original process does not exist any more.
 439  */
 440 static void
 441 filt_procdetach(struct knote *kn)
 442 {
 443         struct proc *p;
 444         int funnel_state;
 445
 446         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 447         p = pfind(kn->kn_id);
 448
 449         if (p != (struct proc *)NULL)
 450                 KNOTE_DETACH(&p->p_klist, kn);
 451
 452         thread_funnel_set(kernel_flock, funnel_state);
 453 }
 454
 455 static int
 456 filt_proc(struct knote *kn, long hint)
 457 {
 458         u_int event;
 459         int funnel_state;
 460
 461         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 462
 463         /*
 464          * mask off extra data
 465          */
 466         event = (u_int)hint & NOTE_PCTRLMASK;
 467
 468         /*
 469          * if the user is interested in this event, record it.
 470          */
 471         if (kn->kn_sfflags & event)
 472                 kn->kn_fflags |= event;
 473
 474         /*
 475          * process is gone, so flag the event as finished.
 476          */
 477         if (event == NOTE_EXIT) {
 478                 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 479                 thread_funnel_set(kernel_flock, funnel_state);
 480                 return (1);
 481         }
 482
 483         /*
 484          * process forked, and user wants to track the new process,
 485          * so attach a new knote to it, and immediately report an
 486          * event with the parent's pid.
 487          */
 488         if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
 489                 struct kevent kev;
 490                 int error;
 491
 492                 /*
 493                  * register knote with new process.
 494                  */
 495                 kev.ident = hint & NOTE_PDATAMASK;      /* pid */
 496                 kev.filter = kn->kn_filter;
 497                 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
 498                 kev.fflags = kn->kn_sfflags;
 499                 kev.data = kn->kn_id;                   /* parent */
 500                 kev.udata = kn->kn_kevent.udata;        /* preserve udata */
 501                 error = kevent_register(kn->kn_kq, &kev, NULL);
 502                 if (error)
 503                         kn->kn_fflags |= NOTE_TRACKERR;
 504         }
 505         event = kn->kn_fflags;
 506         thread_funnel_set(kernel_flock, funnel_state);
 507
 508         return (event != 0);
 509 }
 510
 511 /*
 512  * filt_timercompute - compute absolute timeout
 513  *
 514  *      The saved-data field in the knote contains the
 515  *      time value.  The saved filter-flags indicates
 516  *      the unit of measurement.
 517  *
 518  *      If the timeout is not absolute, adjust it for
 519  *      the current time.
 520  */
 521 static int
 522 filt_timercompute(struct knote *kn, uint64_t *abs_time)
 523 {
 524         uint64_t multiplier;
 525         uint64_t raw;
 526
 527         switch (kn->kn_sfflags & (NOTE_SECONDS|NOTE_USECONDS|NOTE_NSECONDS)) {
 528         case NOTE_SECONDS:
 529                 multiplier = NSEC_PER_SEC;
 530                 break;
 531         case NOTE_USECONDS:
 532                 multiplier = NSEC_PER_USEC;
 533                 break;
 534         case NOTE_NSECONDS:
 535                 multiplier = 1;
 536                 break;
 537         case 0: /* milliseconds (default) */
 538                 multiplier = NSEC_PER_SEC / 1000;
 539                 break;
 540         default:
 541                 return EINVAL;
 542         }
 543         nanoseconds_to_absolutetime((uint64_t)kn->kn_sdata * multiplier, &raw);
 544         if (raw <= filt_timerfloor) {
 545                 *abs_time = 0;
 546                 return 0;
 547         }
 548         if ((kn->kn_sfflags & NOTE_ABSOLUTE) == NOTE_ABSOLUTE) {
 549                 uint32_t seconds, nanoseconds;
 550                 uint64_t now;
 551
 552                 clock_get_calendar_nanotime(&seconds, &nanoseconds);
 553                 nanoseconds_to_absolutetime((uint64_t)seconds * NSEC_PER_SEC + nanoseconds,
 554                                             &now);
 555                 if (now >= raw + filt_timerfloor) {
 556                         *abs_time = 0;
 557                         return 0;
 558                 }
 559                 raw -= now;
 560         }
 561         clock_absolutetime_interval_to_deadline(raw, abs_time);
 562         return 0;
 563 }
 564
 565 /*
 566  * filt_timerexpire - the timer callout routine
 567  *
 568  *      Just propagate the timer event into the knote
 569  *      filter routine (by going through the knote
 570  *      synchronization point).  Pass a hint to
 571  *      indicate this is a real event, not just a
 572  *      query from above.
 573  */
 574 static void
 575 filt_timerexpire(void *knx, __unused void *spare)
 576 {
 577         struct klist timer_list;
 578         struct knote *kn = knx;
 579
 580         /* no "object" for timers, so fake a list */
 581         SLIST_INIT(&timer_list);
 582         SLIST_INSERT_HEAD(&timer_list, kn, kn_selnext);
 583         KNOTE(&timer_list, 1);
 584 }
 585
 586 /*
 587  * data contains amount of time to sleep, in milliseconds,
 588  * or a pointer to a timespec structure.
 589  */
 590 static int
 591 filt_timerattach(struct knote *kn)
 592 {
 593         thread_call_t callout;
 594         uint64_t deadline;
 595         int error;
 596
 597         error = filt_timercompute(kn, &deadline);
 598         if (error)
 599                 return (error);
 600
 601         if (deadline) {
 602                 callout = thread_call_allocate(filt_timerexpire, kn);
 603                 if (NULL == callout)
 604                         return (ENOMEM);
 605         } else {
 606                 /* handle as immediate */
 607                 kn->kn_sdata = 0;
 608                 callout = NULL;
 609         }
 610
 611         filt_timerlock();
 612         kn->kn_hook = (caddr_t)callout;
 613
 614         /* absolute=EV_ONESHOT */
 615         if (kn->kn_sfflags & NOTE_ABSOLUTE)
 616                 kn->kn_flags |= EV_ONESHOT;
 617
 618         if (deadline) {
 619                 /* all others - if not faking immediate */
 620                 kn->kn_flags |= EV_CLEAR;
 621                 thread_call_enter_delayed(callout, deadline);
 622                 kn->kn_hookid = 0;
 623         } else {
 624                 /* fake immediate */
 625                 kn->kn_hookid = 1;
 626         }
 627         filt_timerunlock();
 628         return (0);
 629 }
 630
 631 static void
 632 filt_timerdetach(struct knote *kn)
 633 {
 634         thread_call_t callout;
 635
 636         filt_timerlock();
 637         callout = (thread_call_t)kn->kn_hook;
 638         if (callout != NULL) {
 639                 boolean_t cancelled;
 640
 641                 /* cancel the callout if we can */
 642                 cancelled = thread_call_cancel(callout);
 643                 if (cancelled) {
 644                         /* got it, just free it */
 645                         kn->kn_hook = NULL;
 646                         filt_timerunlock();
 647                         thread_call_free(callout);
 648                         return;
 649                 }
 650                 /* we have to wait for the expire routine.  */
 651                 kn->kn_hookid = -1;     /* we are detaching */
 652                 assert_wait(&kn->kn_hook, THREAD_UNINT);
 653                 filt_timerunlock();
 654                 thread_block(THREAD_CONTINUE_NULL);
 655                 assert(kn->kn_hook == NULL);
 656                 return;
 657         }
 658         /* nothing to do */
 659         filt_timerunlock();
 660 }
 661
 662
 663
 664 static int
 665 filt_timer(struct knote *kn, __unused long hint)
 666 {
 667         int result;
 668
 669         if (hint) {
 670                 /* real timer pop */
 671                 thread_call_t callout;
 672                 boolean_t detaching;
 673
 674                 filt_timerlock();
 675
 676                 kn->kn_data++;
 677
 678                 detaching = (kn->kn_hookid < 0);
 679                 callout = (thread_call_t)kn->kn_hook;
 680
 681                 if (!detaching && (kn->kn_flags & EV_ONESHOT) == 0) {
 682                         uint64_t deadline;
 683                         int error;
 684
 685                         /* user input data may have changed - deal */
 686                         error = filt_timercompute(kn, &deadline);
 687                         if (error) {
 688                                 kn->kn_flags |= EV_ERROR;
 689                                 kn->kn_data = error;
 690                         } else if (deadline == 0) {
 691                                 /* revert to fake immediate */
 692                                 kn->kn_flags &= ~EV_CLEAR;
 693                                 kn->kn_sdata = 0;
 694                                 kn->kn_hookid = 1;
 695                         } else {
 696                                 /* keep the callout and re-arm */
 697                                 thread_call_enter_delayed(callout, deadline);
 698                                 filt_timerunlock();
 699                                 return 1;
 700                         }
 701                 }
 702                 kn->kn_hook = NULL;
 703                 filt_timerunlock();
 704                 thread_call_free(callout);
 705
 706                 /* if someone is waiting for timer to pop */
 707                 if (detaching)
 708                         thread_wakeup(&kn->kn_hook);
 709
 710                 return 1;
 711         }
 712
 713         /* user-query */
 714         filt_timerlock();
 715
 716         /* change fake timer to real if needed */
 717         while (kn->kn_hookid > 0 && kn->kn_sdata > 0) {
 718                 int error;
 719
 720                 /* update the fake timer (make real) */
 721                 kn->kn_hookid = 0;
 722                 kn->kn_data = 0;
 723                 filt_timerunlock();
 724                 error = filt_timerattach(kn);
 725                 filt_timerlock();
 726                 if (error) {
 727                         kn->kn_flags |= EV_ERROR;
 728                         kn->kn_data = error;
 729                         filt_timerunlock();
 730                         return 1;
 731                 }
 732         }
 733
 734         /* if still fake, pretend it fired */
 735         if (kn->kn_hookid > 0)
 736                 kn->kn_data = 1;
 737
 738         result = (kn->kn_data != 0);
 739         filt_timerunlock();
 740         return result;
 741 }
 742
 743 static void
 744 filt_timerlock(void)
 745 {
 746         lck_mtx_lock(&_filt_timerlock);
 747 }
 748
 749 static void
 750 filt_timerunlock(void)
 751 {
 752         lck_mtx_unlock(&_filt_timerlock);
 753 }
 754
 755 /*
 756  * JMM - placeholder for not-yet-implemented filters
 757  */
 758 static int
 759 filt_badattach(__unused struct knote *kn)
 760 {
 761         return(ENOTSUP);
 762 }
 763
 764
 765 struct kqueue *
 766 kqueue_alloc(struct proc *p)
 767 {
 768         struct filedesc *fdp = p->p_fd;
 769         struct kqueue *kq;
 770
 771         MALLOC_ZONE(kq, struct kqueue *, sizeof(struct kqueue), M_KQUEUE, M_WAITOK);
 772         if (kq != NULL) {
 773                 bzero(kq, sizeof(struct kqueue));
 774                 lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr);
 775                 TAILQ_INIT(&kq->kq_head);
 776                 TAILQ_INIT(&kq->kq_inprocess);
 777                 kq->kq_fdp = fdp;
 778         }
 779
 780         if (fdp->fd_knlistsize < 0) {
 781                 proc_fdlock(p);
 782                 if (fdp->fd_knlistsize < 0)
 783                         fdp->fd_knlistsize = 0;         /* this process has had a kq */
 784                 proc_fdunlock(p);
 785         }
 786
 787         return kq;
 788 }
 789
 790
 791 /*
 792  * kqueue_dealloc - detach all knotes from a kqueue and free it
 793  *
 794  *      We walk each list looking for knotes referencing this
 795  *      this kqueue.  If we find one, we try to drop it.  But
 796  *      if we fail to get a drop reference, that will wait
 797  *      until it is dropped.  So, we can just restart again
 798  *      safe in the assumption that the list will eventually
 799  *      not contain any more references to this kqueue (either
 800  *      we dropped them all, or someone else did).
 801  *
 802  *      Assumes no new events are being added to the kqueue.
 803  *      Nothing locked on entry or exit.
 804  */
 805 void
 806 kqueue_dealloc(struct kqueue *kq, struct proc *p)
 807 {
 808         struct filedesc *fdp = p->p_fd;
 809         struct knote *kn;
 810         int i;
 811
 812         proc_fdlock(p);
 813         for (i = 0; i < fdp->fd_knlistsize; i++) {
 814                 kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 815                 while (kn != NULL) {
 816                         if (kq == kn->kn_kq) {
 817                                 kqlock(kq);
 818                                 proc_fdunlock(p);
 819                                 /* drop it ourselves or wait */
 820                                 if (kqlock2knotedrop(kq, kn)) {
 821                                         kn->kn_fop->f_detach(kn);
 822                                         knote_drop(kn, p);
 823                                 }
 824                                 proc_fdlock(p);
 825                                 /* start over at beginning of list */
 826                                 kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 827                                 continue;
 828                         }
 829                         kn = SLIST_NEXT(kn, kn_link);
 830                 }
 831         }
 832         if (fdp->fd_knhashmask != 0) {
 833                 for (i = 0; i < (int)fdp->fd_knhashmask + 1; i++) {
 834                         kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 835                         while (kn != NULL) {
 836                                 if (kq == kn->kn_kq) {
 837                                         kqlock(kq);
 838                                         proc_fdunlock(p);
 839                                         /* drop it ourselves or wait */
 840                                         if (kqlock2knotedrop(kq, kn)) {
 841                                                 kn->kn_fop->f_detach(kn);
 842                                                 knote_drop(kn, p);
 843                                         }
 844                                         proc_fdlock(p);
 845                                         /* start over at beginning of list */
 846                                         kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 847                                         continue;
 848                                 }
 849                                 kn = SLIST_NEXT(kn, kn_link);
 850                         }
 851                 }
 852         }
 853         proc_fdunlock(p);
 854         lck_spin_destroy(&kq->kq_lock, kq_lck_grp);
 855         FREE_ZONE(kq, sizeof(struct kqueue), M_KQUEUE);
 856 }
 857
 858 int
 859 kqueue(struct proc *p, __unused struct kqueue_args *uap, register_t *retval)
 860 {
 861         struct kqueue *kq;
 862         struct fileproc *fp;
 863         int fd, error;
 864
 865         error = falloc(p, &fp, &fd);
 866         if (error) {
 867                 return (error);
 868         }
 869
 870         kq = kqueue_alloc(p);
 871         if (kq == NULL) {
 872                 fp_free(p, fd, fp);
 873                 return (ENOMEM);
 874         }
 875
 876         fp->f_flag = FREAD | FWRITE;
 877         fp->f_type = DTYPE_KQUEUE;
 878         fp->f_ops = &kqueueops;
 879         fp->f_data = (caddr_t)kq;
 880
 881         proc_fdlock(p);
 882         *fdflags(p, fd) &= ~UF_RESERVED;
 883         fp_drop(p, fd, fp, 1);
 884         proc_fdunlock(p);
 885
 886         *retval = fd;
 887         return (error);
 888 }
 889
 890 int
 891 kqueue_portset_np(__unused struct proc *p,
 892                                   __unused struct kqueue_portset_np_args *uap,
 893                                   __unused register_t *retval)
 894 {
 895                 /* JMM - Placeholder for now */
 896                 return (ENOTSUP);
 897 }
 898
 899 int
 900 kqueue_from_portset_np(__unused struct proc *p,
 901                                            __unused struct kqueue_from_portset_np_args *uap,
 902                                            __unused register_t *retval)
 903 {
 904                 /* JMM - Placeholder for now */
 905                 return (ENOTSUP);
 906 }
 907
 908 static int
 909 kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p)
 910 {
 911         int advance;
 912         int error;
 913
 914         if (IS_64BIT_PROCESS(p)) {
 915                 struct user_kevent kev64;
 916
 917                 advance = sizeof(kev64);
 918                 error = copyin(*addrp, (caddr_t)&kev64, advance);
 919                 if (error)
 920                         return error;
 921                 kevp->ident = CAST_DOWN(uintptr_t, kev64.ident);
 922                 kevp->filter = kev64.filter;
 923                 kevp->flags = kev64.flags;
 924                 kevp->fflags = kev64.fflags;
 925                 kevp->data = CAST_DOWN(intptr_t, kev64.data);
 926                 kevp->udata = kev64.udata;
 927         } else {
 928                 /*
 929                  * compensate for legacy in-kernel kevent layout
 930                  * where the udata field is alredy 64-bit.
 931                  */
 932                 advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t);
 933                 error = copyin(*addrp, (caddr_t)kevp, advance);
 934         }
 935         if (!error)
 936                 *addrp += advance;
 937         return error;
 938 }
 939
 940 static int
 941 kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p)
 942 {
 943         int advance;
 944         int error;
 945
 946         if (IS_64BIT_PROCESS(p)) {
 947                 struct user_kevent kev64;
 948
 949                 kev64.ident = (uint64_t) kevp->ident;
 950                 kev64.filter = kevp->filter;
 951                 kev64.flags = kevp->flags;
 952                 kev64.fflags = kevp->fflags;
 953                 kev64.data = (int64_t) kevp->data;
 954                 kev64.udata = kevp->udata;
 955                 advance = sizeof(kev64);
 956                 error = copyout((caddr_t)&kev64, *addrp, advance);
 957         } else {
 958                 /*
 959                  * compensate for legacy in-kernel kevent layout
 960                  * where the udata field is alredy 64-bit.
 961                  */
 962                 advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t);
 963                 error = copyout((caddr_t)kevp, *addrp, advance);
 964         }
 965         if (!error)
 966                 *addrp += advance;
 967         return error;
 968 }
 969
 970 /*
 971  * kevent_continue - continue a kevent syscall after blocking
 972  *
 973  *      assume we inherit a use count on the kq fileglob.
 974  */
 975
 976 static void
 977 kevent_continue(__unused struct kqueue *kq, void *data, int error)
 978 {
 979         struct _kevent *cont_args;
 980         struct fileproc *fp;
 981         register_t *retval;
 982         int noutputs;
 983         int fd;
 984         struct proc *p = current_proc();
 985
 986         cont_args = (struct _kevent *)data;
 987         noutputs = cont_args->eventout;
 988         retval = cont_args->retval;
 989         fd = cont_args->fd;
 990         fp = cont_args->fp;
 991
 992         fp_drop(p, fd, fp, 0);
 993
 994         /* don't restart after signals... */
 995         if (error == ERESTART)
 996                 error = EINTR;
 997         else if (error == EWOULDBLOCK)
 998                 error = 0;
 999         if (error == 0)
1000                 *retval = noutputs;
1001         unix_syscall_return(error);
1002 }
1003
1004 /*
1005  * kevent - [syscall] register and wait for kernel events
1006  *
1007  */
1008
1009 int
1010 kevent(struct proc *p, struct kevent_args *uap, register_t *retval)
1011 {
1012         user_addr_t changelist = uap->changelist;
1013         user_addr_t ueventlist = uap->eventlist;
1014         int nchanges = uap->nchanges;
1015         int nevents = uap->nevents;
1016         int fd = uap->fd;
1017
1018         struct _kevent *cont_args;
1019         uthread_t ut;
1020         struct kqueue *kq;
1021         struct fileproc *fp;
1022         struct kevent kev;
1023         int error, noutputs;
1024         struct timeval atv;
1025
1026         /* convert timeout to absolute - if we have one */
1027         if (uap->timeout != USER_ADDR_NULL) {
1028                 struct timeval rtv;
1029                 if ( IS_64BIT_PROCESS(p) ) {
1030                         struct user_timespec ts;
1031                         error = copyin( uap->timeout, &ts, sizeof(ts) );
1032                         if ((ts.tv_sec & 0xFFFFFFFF00000000ull) != 0)
1033                                 error = EINVAL;
1034                         else
1035                                 TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1036                 } else {
1037                         struct timespec ts;
1038                         error = copyin( uap->timeout, &ts, sizeof(ts) );
1039                         TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1040                 }
1041                 if (error)
1042                         return error;
1043                 if (itimerfix(&rtv))
1044                         return EINVAL;
1045                 getmicrouptime(&atv);
1046                 timevaladd(&atv, &rtv);
1047         } else {
1048                 atv.tv_sec = 0;
1049                 atv.tv_usec = 0;
1050         }
1051
1052         /* get a usecount for the kq itself */
1053         if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0)
1054                 return(error);
1055
1056         /* register all the change requests the user provided... */
1057         noutputs = 0;
1058         while (nchanges > 0 && error == 0) {
1059                 error = kevent_copyin(&changelist, &kev, p);
1060                 if (error)
1061                         break;
1062
1063                 kev.flags &= ~EV_SYSFLAGS;
1064                 error = kevent_register(kq, &kev, p);
1065                 if (error && nevents > 0) {
1066                         kev.flags = EV_ERROR;
1067                         kev.data = error;
1068                         error = kevent_copyout(&kev, &ueventlist, p);
1069                         if (error == 0) {
1070                                 nevents--;
1071                                 noutputs++;
1072                         }
1073                 }
1074                 nchanges--;
1075         }
1076
1077         /* store the continuation/completion data in the uthread */
1078         ut = (uthread_t)get_bsdthread_info(current_thread());
1079         cont_args = (struct _kevent *)&ut->uu_state.ss_kevent;
1080         cont_args->fp = fp;
1081         cont_args->fd = fd;
1082         cont_args->retval = retval;
1083         cont_args->eventlist = ueventlist;
1084         cont_args->eventcount = nevents;
1085         cont_args->eventout = noutputs;
1086
1087         if (nevents > 0 && noutputs == 0 && error == 0)
1088                 error = kevent_scan(kq, kevent_callback,
1089                                     kevent_continue, cont_args,
1090                                     &atv, p);
1091         kevent_continue(kq, cont_args, error);
1092         /* NOTREACHED */
1093         return error;
1094 }
1095
1096
1097 /*
1098  * kevent_callback - callback for each individual event
1099  *
1100  *      called with nothing locked
1101  *      caller holds a reference on the kqueue
1102  */
1103
1104 static int
1105 kevent_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data)
1106 {
1107         struct _kevent *cont_args;
1108         int error;
1109
1110         cont_args = (struct _kevent *)data;
1111         assert(cont_args->eventout < cont_arg->eventcount);
1112
1113         /*
1114          * Copy out the appropriate amount of event data for this user.
1115          */
1116         error = kevent_copyout(kevp, &cont_args->eventlist, current_proc());
1117
1118         /*
1119          * If there isn't space for additional events, return
1120          * a harmless error to stop the processing here
1121          */
1122         if (error == 0 && ++cont_args->eventout == cont_args->eventcount)
1123                         error = EWOULDBLOCK;
1124         return error;
1125 }
1126
1127 /*
1128  * kevent_register - add a new event to a kqueue
1129  *
1130  *      Creates a mapping between the event source and
1131  *      the kqueue via a knote data structure.
1132  *
1133  *      Because many/most the event sources are file
1134  *      descriptor related, the knote is linked off
1135  *      the filedescriptor table for quick access.
1136  *
1137  *      called with nothing locked
1138  *      caller holds a reference on the kqueue
1139  */
1140
1141 int
1142 kevent_register(struct kqueue *kq, struct kevent *kev, struct proc *p)
1143 {
1144         struct filedesc *fdp = kq->kq_fdp;
1145         struct filterops *fops;
1146         struct fileproc *fp = NULL;
1147         struct knote *kn = NULL;
1148         int error = 0;
1149
1150         if (kev->filter < 0) {
1151                 if (kev->filter + EVFILT_SYSCOUNT < 0)
1152                         return (EINVAL);
1153                 fops = sysfilt_ops[~kev->filter];       /* to 0-base index */
1154         } else {
1155                 /*
1156                  * XXX
1157                  * filter attach routine is responsible for insuring that
1158                  * the identifier can be attached to it.
1159                  */
1160                 printf("unknown filter: %d\n", kev->filter);
1161                 return (EINVAL);
1162         }
1163
1164         /* this iocount needs to be dropped if it is not registered */
1165         if (fops->f_isfd && (error = fp_lookup(p, kev->ident, &fp, 0)) != 0)
1166                 return(error);
1167
1168  restart:
1169         proc_fdlock(p);
1170         if (fops->f_isfd) {
1171                 /* fd-based knotes are linked off the fd table */
1172                 if (kev->ident < (u_int)fdp->fd_knlistsize) {
1173                         SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
1174                                 if (kq == kn->kn_kq &&
1175                                     kev->filter == kn->kn_filter)
1176                                         break;
1177                 }
1178         } else {
1179                 /* hash non-fd knotes here too */
1180                 if (fdp->fd_knhashmask != 0) {
1181                         struct klist *list;
1182
1183                         list = &fdp->fd_knhash[
1184                             KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
1185                         SLIST_FOREACH(kn, list, kn_link)
1186                                 if (kev->ident == kn->kn_id &&
1187                                     kq == kn->kn_kq &&
1188                                     kev->filter == kn->kn_filter)
1189                                         break;
1190                 }
1191         }
1192
1193         /*
1194          * kn now contains the matching knote, or NULL if no match
1195          */
1196         if (kn == NULL) {
1197                 if ((kev->flags & (EV_ADD|EV_DELETE)) == EV_ADD) {
1198                         kn = knote_alloc();
1199                         if (kn == NULL) {
1200                                 proc_fdunlock(p);
1201                                 error = ENOMEM;
1202                                 goto done;
1203                         }
1204                         kn->kn_fp = fp;
1205                         kn->kn_kq = kq;
1206                         kn->kn_tq = &kq->kq_head;
1207                         kn->kn_fop = fops;
1208                         kn->kn_sfflags = kev->fflags;
1209                         kn->kn_sdata = kev->data;
1210                         kev->fflags = 0;
1211                         kev->data = 0;
1212                         kn->kn_kevent = *kev;
1213                         kn->kn_inuse = 1;  /* for f_attach() */
1214                         kn->kn_status = 0;
1215
1216                         /* before anyone can find it */
1217                         if (kev->flags & EV_DISABLE)
1218                                 kn->kn_status |= KN_DISABLED;
1219
1220                         error = knote_fdpattach(kn, fdp, p);
1221                         proc_fdunlock(p);
1222
1223                         if (error) {
1224                                 knote_free(kn);
1225                                 goto done;
1226                         }
1227
1228                         /*
1229                          * apply reference count to knote structure, and
1230                          * do not release it at the end of this routine.
1231                          */
1232                         fp = NULL;
1233
1234                         /*
1235                          * If the attach fails here, we can drop it knowing
1236                          * that nobody else has a reference to the knote.
1237                          */
1238                         if ((error = fops->f_attach(kn)) != 0) {
1239                                 knote_drop(kn, p);
1240                                 goto done;
1241                         }
1242                 } else {
1243                         proc_fdunlock(p);
1244                         error = ENOENT;
1245                         goto done;
1246                 }
1247         } else {
1248                 /* existing knote - get kqueue lock */
1249                 kqlock(kq);
1250                 proc_fdunlock(p);
1251
1252                 if (kev->flags & EV_DELETE) {
1253                         knote_dequeue(kn);
1254                         kn->kn_status |= KN_DISABLED;
1255                         if (kqlock2knotedrop(kq, kn)) {
1256                                 kn->kn_fop->f_detach(kn);
1257                                 knote_drop(kn, p);
1258                         }
1259                         goto done;
1260                 }
1261
1262                 /* update status flags for existing knote */
1263                 if (kev->flags & EV_DISABLE) {
1264                         knote_dequeue(kn);
1265                         kn->kn_status |= KN_DISABLED;
1266                 } else if (kev->flags & EV_ENABLE) {
1267                         kn->kn_status &= ~KN_DISABLED;
1268                         if (kn->kn_status & KN_ACTIVE)
1269                                 knote_enqueue(kn);
1270                 }
1271
1272                 /*
1273                  * If somebody is in the middle of dropping this
1274                  * knote - go find/insert a new one.  But we have
1275                  * wait for this one to go away first.
1276                  */
1277                 if (!kqlock2knoteusewait(kq, kn))
1278                         /* kqueue unlocked */
1279                         goto restart;
1280
1281                 /*
1282                  * The user may change some filter values after the
1283                  * initial EV_ADD, but doing so will not reset any
1284                  * filter which have already been triggered.
1285                  */
1286                 kn->kn_sfflags = kev->fflags;
1287                 kn->kn_sdata = kev->data;
1288                 kn->kn_kevent.udata = kev->udata;
1289         }
1290
1291         /* still have use ref on knote */
1292         if (kn->kn_fop->f_event(kn, 0)) {
1293                 if (knoteuse2kqlock(kq, kn))
1294                         knote_activate(kn);
1295                 kqunlock(kq);
1296         } else {
1297                 knote_put(kn);
1298         }
1299
1300 done:
1301         if (fp != NULL)
1302                 fp_drop(p, kev->ident, fp, 0);
1303         return (error);
1304 }
1305
1306 /*
1307  * kevent_process - process the triggered events in a kqueue
1308  *
1309  *      Walk the queued knotes and validate that they are
1310  *      really still triggered events by calling the filter
1311  *      routines (if necessary).  Hold a use reference on
1312  *      the knote to avoid it being detached. For each event
1313  *      that is still considered triggered, invoke the
1314  *      callback routine provided.
1315  *
1316  *      caller holds a reference on the kqueue.
1317  *      kqueue locked on entry and exit - but may be dropped
1318  */
1319
1320 static int
1321 kevent_process(struct kqueue *kq,
1322                kevent_callback_t callback,
1323                void *data,
1324                int *countp,
1325                struct proc *p)
1326 {
1327         struct knote *kn;
1328         struct kevent kev;
1329         int nevents;
1330         int error;
1331
1332  restart:
1333         if (kq->kq_count == 0) {
1334                 *countp = 0;
1335                 return 0;
1336         }
1337
1338         /* if someone else is processing the queue, wait */
1339         if (!TAILQ_EMPTY(&kq->kq_inprocess)) {
1340                 assert_wait(&kq->kq_inprocess, THREAD_UNINT);
1341                 kq->kq_state |= KQ_PROCWAIT;
1342                 kqunlock(kq);
1343                 thread_block(THREAD_CONTINUE_NULL);
1344                 kqlock(kq);
1345                 goto restart;
1346         }
1347
1348         error = 0;
1349         nevents = 0;
1350         while (error == 0 &&
1351                (kn = TAILQ_FIRST(&kq->kq_head)) != NULL) {
1352
1353                 /*
1354                  * move knote to the processed queue.
1355                  * this is also protected by the kq lock.
1356                  */
1357                 assert(kn->kn_tq == &kq->kq_head);
1358                 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1359                 kn->kn_tq = &kq->kq_inprocess;
1360                 TAILQ_INSERT_TAIL(&kq->kq_inprocess, kn, kn_tqe);
1361
1362                 /*
1363                  * Non-EV_ONESHOT events must be re-validated.
1364                  *
1365                  * Convert our lock to a use-count and call the event's
1366                  * filter routine to update.
1367                  *
1368                  * If the event is dropping (or no longer valid), we
1369                  * already have it off the active queue, so just
1370                  * finish the job of deactivating it.
1371                  */
1372                 if ((kn->kn_flags & EV_ONESHOT) == 0) {
1373                         int result;
1374
1375                         if (kqlock2knoteuse(kq, kn)) {
1376
1377                                 /* call the filter with just a ref */
1378                                 result = kn->kn_fop->f_event(kn, 0);
1379
1380                                 if (!knoteuse2kqlock(kq, kn) || result == 0) {
1381                                         knote_deactivate(kn);
1382                                         continue;
1383                                 }
1384                         } else {
1385                                 knote_deactivate(kn);
1386                                 continue;
1387                         }
1388                 }
1389
1390                 /*
1391                  * Got a valid triggered knote with the kqueue
1392                  * still locked.  Snapshot the data, and determine
1393                  * how to dispatch the knote for future events.
1394                  */
1395                 kev = kn->kn_kevent;
1396
1397                 /* now what happens to it? */
1398                 if (kn->kn_flags & EV_ONESHOT) {
1399                         knote_deactivate(kn);
1400                         if (kqlock2knotedrop(kq, kn)) {
1401                                 kn->kn_fop->f_detach(kn);
1402                                 knote_drop(kn, p);
1403                         }
1404                 } else if (kn->kn_flags & EV_CLEAR) {
1405                         knote_deactivate(kn);
1406                         kn->kn_data = 0;
1407                         kn->kn_fflags = 0;
1408                         kqunlock(kq);
1409                 } else {
1410                         /*
1411                          * leave on in-process queue.  We'll
1412                          * move all the remaining ones back
1413                          * the kq queue and wakeup any
1414                          * waiters when we are done.
1415                          */
1416                         kqunlock(kq);
1417                 }
1418
1419                 /* callback to handle each event as we find it */
1420                 error = (callback)(kq, &kev, data);
1421                 nevents++;
1422
1423                 kqlock(kq);
1424         }
1425
1426         /*
1427          * With the kqueue still locked, move any knotes
1428          * remaining on the in-process queue back to the
1429          * kq's queue and wake up any waiters.
1430          */
1431         while ((kn = TAILQ_FIRST(&kq->kq_inprocess)) != NULL) {
1432                 assert(kn->kn_tq == &kq->kq_inprocess);
1433                 TAILQ_REMOVE(&kq->kq_inprocess, kn, kn_tqe);
1434                 kn->kn_tq = &kq->kq_head;
1435                 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1436         }
1437         if (kq->kq_state & KQ_PROCWAIT) {
1438                 kq->kq_state &= ~KQ_PROCWAIT;
1439                 thread_wakeup(&kq->kq_inprocess);
1440         }
1441
1442         *countp = nevents;
1443         return error;
1444 }
1445
1446
1447 static void
1448 kevent_scan_continue(void *data, wait_result_t wait_result)
1449 {
1450         uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
1451         struct _kevent_scan * cont_args = &ut->uu_state.ss_kevent_scan;
1452         struct kqueue *kq = (struct kqueue *)data;
1453         int error;
1454         int count;
1455
1456         /* convert the (previous) wait_result to a proper error */
1457         switch (wait_result) {
1458         case THREAD_AWAKENED:
1459                 kqlock(kq);
1460                 error = kevent_process(kq, cont_args->call, cont_args, &count, current_proc());
1461                 if (error == 0 && count == 0) {
1462                         assert_wait_deadline(kq, THREAD_ABORTSAFE, cont_args->deadline);
1463                         kq->kq_state |= KQ_SLEEP;
1464                         kqunlock(kq);
1465                         thread_block_parameter(kevent_scan_continue, kq);
1466                         /* NOTREACHED */
1467                 }
1468                 kqunlock(kq);
1469                 break;
1470         case THREAD_TIMED_OUT:
1471                 error = EWOULDBLOCK;
1472                 break;
1473         case THREAD_INTERRUPTED:
1474                 error = EINTR;
1475                 break;
1476         default:
1477                 panic("kevent_scan_cont() - invalid wait_result (%d)", wait_result);
1478                 error = 0;
1479         }
1480
1481         /* call the continuation with the results */
1482         assert(cont_args->cont != NULL);
1483         (cont_args->cont)(kq, cont_args->data, error);
1484 }
1485
1486
1487 /*
1488  * kevent_scan - scan and wait for events in a kqueue
1489  *
1490  *      Process the triggered events in a kqueue.
1491  *
1492  *      If there are no events triggered arrange to
1493  *      wait for them. If the caller provided a
1494  *      continuation routine, then kevent_scan will
1495  *      also.
1496  *
1497  *      The callback routine must be valid.
1498  *      The caller must hold a use-count reference on the kq.
1499  */
1500
1501 int
1502 kevent_scan(struct kqueue *kq,
1503             kevent_callback_t callback,
1504             kevent_continue_t continuation,
1505             void *data,
1506             struct timeval *atvp,
1507             struct proc *p)
1508 {
1509         thread_continue_t cont = THREAD_CONTINUE_NULL;
1510         uint64_t deadline;
1511         int error;
1512         int first;
1513
1514         assert(callback != NULL);
1515
1516         first = 1;
1517         for (;;) {
1518                 wait_result_t wait_result;
1519                 int count;
1520
1521                 /*
1522                  * Make a pass through the kq to find events already
1523                  * triggered.
1524                  */
1525                 kqlock(kq);
1526                 error = kevent_process(kq, callback, data, &count, p);
1527                 if (error || count)
1528                         break; /* lock still held */
1529
1530                 /* looks like we have to consider blocking */
1531                 if (first) {
1532                         first = 0;
1533                         /* convert the timeout to a deadline once */
1534                         if (atvp->tv_sec || atvp->tv_usec) {
1535                                 uint32_t seconds, nanoseconds;
1536                                 uint64_t now;
1537
1538                                 clock_get_uptime(&now);
1539                                 nanoseconds_to_absolutetime((uint64_t)atvp->tv_sec * NSEC_PER_SEC +
1540                                                             atvp->tv_usec * NSEC_PER_USEC,
1541                                                             &deadline);
1542                                 if (now >= deadline) {
1543                                         /* non-blocking call */
1544                                         error = EWOULDBLOCK;
1545                                         break; /* lock still held */
1546                                 }
1547                                 deadline -= now;
1548                                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
1549                         } else {
1550                                 deadline = 0;   /* block forever */
1551                         }
1552
1553                         if (continuation) {
1554                                 uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
1555                                 struct _kevent_scan *cont_args = &ut->uu_state.ss_kevent_scan;
1556
1557                                 cont_args->call = callback;
1558                                 cont_args->cont = continuation;
1559                                 cont_args->deadline = deadline;
1560                                 cont_args->data = data;
1561                                 cont = kevent_scan_continue;
1562                         }
1563                 }
1564
1565                 /* go ahead and wait */
1566                 assert_wait_deadline(kq, THREAD_ABORTSAFE, deadline);
1567                 kq->kq_state |= KQ_SLEEP;
1568                 kqunlock(kq);
1569                 wait_result = thread_block_parameter(cont, kq);
1570                 /* NOTREACHED if (continuation != NULL) */
1571
1572                 switch (wait_result) {
1573                 case THREAD_AWAKENED:
1574                         continue;
1575                 case THREAD_TIMED_OUT:
1576                         return EWOULDBLOCK;
1577                 case THREAD_INTERRUPTED:
1578                         return EINTR;
1579                 default:
1580                         panic("kevent_scan - bad wait_result (%d)",
1581                               wait_result);
1582                         error = 0;
1583                 }
1584         }
1585         kqunlock(kq);
1586         return error;
1587 }
1588
1589
1590 /*
1591  * XXX
1592  * This could be expanded to call kqueue_scan, if desired.
1593  */
1594 /*ARGSUSED*/
1595 static int
1596 kqueue_read(__unused struct fileproc *fp,
1597                         __unused struct uio *uio,
1598                         __unused kauth_cred_t cred,
1599                         __unused int flags,
1600                         __unused struct proc *p)
1601 {
1602         return (ENXIO);
1603 }
1604
1605 /*ARGSUSED*/
1606 static int
1607 kqueue_write(__unused struct fileproc *fp,
1608                          __unused struct uio *uio,
1609                          __unused kauth_cred_t cred,
1610                          __unused int flags,
1611                          __unused struct proc *p)
1612 {
1613         return (ENXIO);
1614 }
1615
1616 /*ARGSUSED*/
1617 static int
1618 kqueue_ioctl(__unused struct fileproc *fp,
1619                          __unused u_long com,
1620                          __unused caddr_t data,
1621                          __unused struct proc *p)
1622 {
1623         return (ENOTTY);
1624 }
1625
1626 /*ARGSUSED*/
1627 static int
1628 kqueue_select(struct fileproc *fp, int which, void *wql, struct proc *p)
1629 {
1630         struct kqueue *kq = (struct kqueue *)fp->f_data;
1631         int retnum = 0;
1632
1633         if (which == FREAD) {
1634                 kqlock(kq);
1635                 if (kq->kq_count) {
1636                         retnum = 1;
1637                 } else {
1638                         selrecord(p, &kq->kq_sel, wql);
1639                         kq->kq_state |= KQ_SEL;
1640                 }
1641                 kqunlock(kq);
1642         }
1643         return (retnum);
1644 }
1645
1646 /*
1647  * kqueue_close -
1648  */
1649 /*ARGSUSED*/
1650 static int
1651 kqueue_close(struct fileglob *fg, struct proc *p)
1652 {
1653         struct kqueue *kq = (struct kqueue *)fg->fg_data;
1654
1655         kqueue_dealloc(kq, p);
1656         fg->fg_data = NULL;
1657         return (0);
1658 }
1659
1660 /*ARGSUSED*/
1661 /*
1662  * The callers has taken a use-count reference on this kqueue and will donate it
1663  * to the kqueue we are being added to.  This keeps the kqueue from closing until
1664  * that relationship is torn down.
1665  */
1666 static int
1667 kqueue_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
1668 {
1669         struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
1670
1671         if (kn->kn_filter != EVFILT_READ)
1672                 return (1);
1673
1674         kn->kn_fop = &kqread_filtops;
1675         kqlock(kq);
1676         KNOTE_ATTACH(&kq->kq_sel.si_note, kn);
1677         kqunlock(kq);
1678         return (0);
1679 }
1680
1681 /*ARGSUSED*/
1682 int
1683 kqueue_stat(struct fileproc *fp, struct stat *st, __unused struct proc *p)
1684 {
1685         struct kqueue *kq = (struct kqueue *)fp->f_data;
1686
1687         bzero((void *)st, sizeof(*st));
1688         st->st_size = kq->kq_count;
1689         st->st_blksize = sizeof(struct kevent);
1690         st->st_mode = S_IFIFO;
1691         return (0);
1692 }
1693
1694 /*
1695  * Called with the kqueue locked
1696  */
1697 static void
1698 kqueue_wakeup(struct kqueue *kq)
1699 {
1700
1701         if (kq->kq_state & KQ_SLEEP) {
1702                 kq->kq_state &= ~KQ_SLEEP;
1703                 thread_wakeup(kq);
1704         }
1705         if (kq->kq_state & KQ_SEL) {
1706                 kq->kq_state &= ~KQ_SEL;
1707                 selwakeup(&kq->kq_sel);
1708         }
1709         KNOTE(&kq->kq_sel.si_note, 0);
1710 }
1711
1712 void
1713 klist_init(struct klist *list)
1714 {
1715         SLIST_INIT(list);
1716 }
1717
1718
1719 /*
1720  * Query/Post each knote in the object's list
1721  *
1722  *      The object lock protects the list. It is assumed
1723  *      that the filter/event routine for the object can
1724  *      determine that the object is already locked (via
1725  *      the hind) and not deadlock itself.
1726  *
1727  *      The object lock should also hold off pending
1728  *      detach/drop operations.  But we'll prevent it here
1729  *      too - just in case.
1730  */
1731 void
1732 knote(struct klist *list, long hint)
1733 {
1734         struct knote *kn;
1735
1736         SLIST_FOREACH(kn, list, kn_selnext) {
1737                 struct kqueue *kq = kn->kn_kq;
1738
1739                 kqlock(kq);
1740                 if (kqlock2knoteuse(kq, kn)) {
1741                         int result;
1742
1743                         /* call the event with only a use count */
1744                         result = kn->kn_fop->f_event(kn, hint);
1745
1746                         /* if its not going away and triggered */
1747                         if (knoteuse2kqlock(kq, kn) && result)
1748                                 knote_activate(kn);
1749                         /* lock held again */
1750                 }
1751                 kqunlock(kq);
1752         }
1753 }
1754
1755 /*
1756  * attach a knote to the specified list.  Return true if this is the first entry.
1757  * The list is protected by whatever lock the object it is associated with uses.
1758  */
1759 int
1760 knote_attach(struct klist *list, struct knote *kn)
1761 {
1762         int ret = SLIST_EMPTY(list);
1763         SLIST_INSERT_HEAD(list, kn, kn_selnext);
1764         return ret;
1765 }
1766
1767 /*
1768  * detach a knote from the specified list.  Return true if that was the last entry.
1769  * The list is protected by whatever lock the object it is associated with uses.
1770  */
1771 int
1772 knote_detach(struct klist *list, struct knote *kn)
1773 {
1774         SLIST_REMOVE(list, kn, knote, kn_selnext);
1775         return SLIST_EMPTY(list);
1776 }
1777
1778 /*
1779  * remove all knotes referencing a specified fd
1780  *
1781  * Essentially an inlined knote_remove & knote_drop
1782  * when we know for sure that the thing is a file
1783  *
1784  * Entered with the proc_fd lock already held.
1785  * It returns the same way, but may drop it temporarily.
1786  */
1787 void
1788 knote_fdclose(struct proc *p, int fd)
1789 {
1790         struct filedesc *fdp = p->p_fd;
1791         struct klist *list;
1792         struct knote *kn;
1793
1794         list = &fdp->fd_knlist[fd];
1795         while ((kn = SLIST_FIRST(list)) != NULL) {
1796                 struct kqueue *kq = kn->kn_kq;
1797
1798                 kqlock(kq);
1799                 proc_fdunlock(p);
1800
1801                 /*
1802                  * Convert the lock to a drop ref.
1803                  * If we get it, go ahead and drop it.
1804                  * Otherwise, we waited for it to
1805                  * be dropped by the other guy, so
1806                  * it is safe to move on in the list.
1807                  */
1808                 if (kqlock2knotedrop(kq, kn)) {
1809                         kn->kn_fop->f_detach(kn);
1810                         knote_drop(kn, p);
1811                 }
1812
1813                 proc_fdlock(p);
1814
1815                 /* the fd tables may have changed - start over */
1816                 list = &fdp->fd_knlist[fd];
1817         }
1818 }
1819
1820 /* proc_fdlock held on entry (and exit) */
1821 static int
1822 knote_fdpattach(struct knote *kn, struct filedesc *fdp, __unused struct proc *p)
1823 {
1824         struct klist *list = NULL;
1825
1826         if (! kn->kn_fop->f_isfd) {
1827                 if (fdp->fd_knhashmask == 0)
1828                         fdp->fd_knhash = hashinit(KN_HASHSIZE, M_KQUEUE,
1829                             &fdp->fd_knhashmask);
1830                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1831         } else {
1832                 if ((u_int)fdp->fd_knlistsize <= kn->kn_id) {
1833                         u_int size = 0;
1834
1835                         /* have to grow the fd_knlist */
1836                         size = fdp->fd_knlistsize;
1837                         while (size <= kn->kn_id)
1838                                 size += KQEXTENT;
1839                         MALLOC(list, struct klist *,
1840                                size * sizeof(struct klist *), M_KQUEUE, M_WAITOK);
1841                         if (list == NULL)
1842                                 return (ENOMEM);
1843
1844                         bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list,
1845                               fdp->fd_knlistsize * sizeof(struct klist *));
1846                         bzero((caddr_t)list +
1847                               fdp->fd_knlistsize * sizeof(struct klist *),
1848                               (size - fdp->fd_knlistsize) * sizeof(struct klist *));
1849                         FREE(fdp->fd_knlist, M_KQUEUE);
1850                         fdp->fd_knlist = list;
1851                         fdp->fd_knlistsize = size;
1852                 }
1853                 list = &fdp->fd_knlist[kn->kn_id];
1854         }
1855         SLIST_INSERT_HEAD(list, kn, kn_link);
1856         return (0);
1857 }
1858
1859
1860
1861 /*
1862  * should be called at spl == 0, since we don't want to hold spl
1863  * while calling fdrop and free.
1864  */
1865 static void
1866 knote_drop(struct knote *kn, struct proc *p)
1867 {
1868         struct filedesc *fdp = p->p_fd;
1869         struct kqueue *kq = kn->kn_kq;
1870         struct klist *list;
1871
1872         proc_fdlock(p);
1873         if (kn->kn_fop->f_isfd)
1874                 list = &fdp->fd_knlist[kn->kn_id];
1875         else
1876                 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1877
1878         SLIST_REMOVE(list, kn, knote, kn_link);
1879         kqlock(kq);
1880         knote_dequeue(kn);
1881         if (kn->kn_status & KN_DROPWAIT)
1882                 thread_wakeup(&kn->kn_status);
1883         kqunlock(kq);
1884         proc_fdunlock(p);
1885
1886         if (kn->kn_fop->f_isfd)
1887                 fp_drop(p, kn->kn_id, kn->kn_fp, 0);
1888
1889         knote_free(kn);
1890 }
1891
1892 /* called with kqueue lock held */
1893 static void
1894 knote_activate(struct knote *kn)
1895 {
1896         struct kqueue *kq = kn->kn_kq;
1897
1898         kn->kn_status |= KN_ACTIVE;
1899         knote_enqueue(kn);
1900         kqueue_wakeup(kq);
1901  }
1902
1903 /* called with kqueue lock held */
1904 static void
1905 knote_deactivate(struct knote *kn)
1906 {
1907         kn->kn_status &= ~KN_ACTIVE;
1908         knote_dequeue(kn);
1909 }
1910
1911 /* called with kqueue lock held */
1912 static void
1913 knote_enqueue(struct knote *kn)
1914 {
1915         struct kqueue *kq = kn->kn_kq;
1916
1917         if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) {
1918                 struct kqtailq *tq = kn->kn_tq;
1919
1920                 TAILQ_INSERT_TAIL(tq, kn, kn_tqe);
1921                 kn->kn_status |= KN_QUEUED;
1922                 kq->kq_count++;
1923         }
1924 }
1925
1926 /* called with kqueue lock held */
1927 static void
1928 knote_dequeue(struct knote *kn)
1929 {
1930         struct kqueue *kq = kn->kn_kq;
1931
1932         assert((kn->kn_status & KN_DISABLED) == 0);
1933         if ((kn->kn_status & KN_QUEUED) == KN_QUEUED) {
1934                 struct kqtailq *tq = kn->kn_tq;
1935
1936                 TAILQ_REMOVE(tq, kn, kn_tqe);
1937                 kn->kn_tq = &kq->kq_head;
1938                 kn->kn_status &= ~KN_QUEUED;
1939                 kq->kq_count--;
1940         }
1941 }
1942
1943 void
1944 knote_init(void)
1945 {
1946         knote_zone = zinit(sizeof(struct knote), 8192*sizeof(struct knote), 8192, "knote zone");
1947
1948         /* allocate kq lock group attribute and group */
1949         kq_lck_grp_attr= lck_grp_attr_alloc_init();
1950         lck_grp_attr_setstat(kq_lck_grp_attr);
1951
1952         kq_lck_grp = lck_grp_alloc_init("kqueue",  kq_lck_grp_attr);
1953
1954         /* Allocate kq lock attribute */
1955         kq_lck_attr = lck_attr_alloc_init();
1956         lck_attr_setdefault(kq_lck_attr);
1957
1958         /* Initialize the timer filter lock */
1959         lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr);
1960 }
1961 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
1962
1963 static struct knote *
1964 knote_alloc(void)
1965 {
1966         return ((struct knote *)zalloc(knote_zone));
1967 }
1968
1969 static void
1970 knote_free(struct knote *kn)
1971 {
1972         zfree(knote_zone, kn);
1973 }
1974
1975 #include <sys/param.h>
1976 #include <sys/socket.h>
1977 #include <sys/protosw.h>
1978 #include <sys/domain.h>
1979 #include <sys/mbuf.h>
1980 #include <sys/kern_event.h>
1981 #include <sys/malloc.h>
1982 #include <sys/sys_domain.h>
1983 #include <sys/syslog.h>
1984
1985
1986 static int kev_attach(struct socket *so, int proto, struct proc *p);
1987 static int kev_detach(struct socket *so);
1988 static int kev_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p);
1989
1990 struct pr_usrreqs event_usrreqs = {
1991      pru_abort_notsupp, pru_accept_notsupp, kev_attach, pru_bind_notsupp, pru_connect_notsupp,
1992      pru_connect2_notsupp, kev_control, kev_detach, pru_disconnect_notsupp,
1993      pru_listen_notsupp, pru_peeraddr_notsupp, pru_rcvd_notsupp, pru_rcvoob_notsupp,
1994      pru_send_notsupp, pru_sense_null, pru_shutdown_notsupp, pru_sockaddr_notsupp,
1995      pru_sosend_notsupp, soreceive, pru_sopoll_notsupp
1996 };
1997
1998 struct protosw eventsw[] = {
1999      {
2000           SOCK_RAW,             &systemdomain,  SYSPROTO_EVENT,         PR_ATOMIC,
2001           0,            0,              0,              0,
2002           0,
2003           0,            0,              0,              0,
2004 #if __APPLE__
2005           0,
2006 #endif
2007           &event_usrreqs,
2008           0,            0,              0,
2009 #if __APPLE__
2010           {0, 0},       0,              {0}
2011 #endif
2012      }
2013 };
2014
2015 static
2016 struct kern_event_head kern_event_head;
2017
2018 static u_long static_event_id = 0;
2019 struct domain *sysdom = &systemdomain;
2020
2021 static lck_grp_t                *evt_mtx_grp;
2022 static lck_attr_t               *evt_mtx_attr;
2023 static lck_grp_attr_t   *evt_mtx_grp_attr;
2024 lck_mtx_t                               *evt_mutex;
2025 /*
2026  * Install the protosw's for the NKE manager.  Invoked at
2027  *  extension load time
2028  */
2029 int
2030 kern_event_init(void)
2031 {
2032     int retval;
2033
2034     if ((retval = net_add_proto(eventsw, &systemdomain)) != 0) {
2035             log(LOG_WARNING, "Can't install kernel events protocol (%d)\n", retval);
2036             return(retval);
2037         }
2038
2039         /*
2040          * allocate lock group attribute and group for kern event
2041          */
2042         evt_mtx_grp_attr = lck_grp_attr_alloc_init();
2043
2044         evt_mtx_grp = lck_grp_alloc_init("eventlist", evt_mtx_grp_attr);
2045
2046         /*
2047          * allocate the lock attribute for mutexes
2048          */
2049         evt_mtx_attr = lck_attr_alloc_init();
2050         lck_attr_setdefault(evt_mtx_attr);
2051         evt_mutex = lck_mtx_alloc_init(evt_mtx_grp, evt_mtx_attr);
2052         if (evt_mutex == NULL)
2053                         return (ENOMEM);
2054
2055     return(KERN_SUCCESS);
2056 }
2057
2058 static int
2059 kev_attach(struct socket *so, __unused int proto, __unused struct proc *p)
2060 {
2061      int error;
2062      struct kern_event_pcb  *ev_pcb;
2063
2064      error = soreserve(so, KEV_SNDSPACE, KEV_RECVSPACE);
2065      if (error)
2066           return error;
2067
2068      MALLOC(ev_pcb, struct kern_event_pcb *, sizeof(struct kern_event_pcb), M_PCB, M_WAITOK);
2069      if (ev_pcb == 0)
2070           return ENOBUFS;
2071
2072      ev_pcb->ev_socket = so;
2073      ev_pcb->vendor_code_filter = 0xffffffff;
2074
2075      so->so_pcb = (caddr_t) ev_pcb;
2076          lck_mtx_lock(evt_mutex);
2077      LIST_INSERT_HEAD(&kern_event_head, ev_pcb, ev_link);
2078          lck_mtx_unlock(evt_mutex);
2079
2080      return 0;
2081 }
2082
2083
2084 static int
2085 kev_detach(struct socket *so)
2086 {
2087      struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2088
2089      if (ev_pcb != 0) {
2090                 lck_mtx_lock(evt_mutex);
2091                 LIST_REMOVE(ev_pcb, ev_link);
2092                 lck_mtx_unlock(evt_mutex);
2093                 FREE(ev_pcb, M_PCB);
2094                 so->so_pcb = 0;
2095                 so->so_flags |= SOF_PCBCLEARING;
2096      }
2097
2098      return 0;
2099 }
2100
2101 /*
2102  * For now, kev_vender_code and mbuf_tags use the same
2103  * mechanism.
2104  */
2105 extern errno_t mbuf_tag_id_find_internal(const char *string, u_long *out_id,
2106                                                                                  int create);
2107
2108 errno_t kev_vendor_code_find(
2109         const char      *string,
2110         u_long          *out_vender_code)
2111 {
2112         if (strlen(string) >= KEV_VENDOR_CODE_MAX_STR_LEN) {
2113                 return EINVAL;
2114         }
2115         return mbuf_tag_id_find_internal(string, out_vender_code, 1);
2116 }
2117
2118 extern void mbuf_tag_id_first_last(u_long *first, u_long *last);
2119
2120 errno_t  kev_msg_post(struct kev_msg *event_msg)
2121 {
2122         u_long  min_vendor, max_vendor;
2123
2124         mbuf_tag_id_first_last(&min_vendor, &max_vendor);
2125
2126         if (event_msg == NULL)
2127                 return EINVAL;
2128
2129         /* Limit third parties to posting events for registered vendor codes only */
2130         if (event_msg->vendor_code < min_vendor ||
2131                 event_msg->vendor_code > max_vendor)
2132         {
2133                 return EINVAL;
2134         }
2135
2136         return kev_post_msg(event_msg);
2137 }
2138
2139
2140 int  kev_post_msg(struct kev_msg *event_msg)
2141 {
2142      struct mbuf *m, *m2;
2143      struct kern_event_pcb  *ev_pcb;
2144      struct kern_event_msg  *ev;
2145      char              *tmp;
2146      unsigned long     total_size;
2147      int               i;
2148
2149         /* Verify the message is small enough to fit in one mbuf w/o cluster */
2150         total_size = KEV_MSG_HEADER_SIZE;
2151
2152         for (i = 0; i < 5; i++) {
2153                 if (event_msg->dv[i].data_length == 0)
2154                         break;
2155                 total_size += event_msg->dv[i].data_length;
2156         }
2157
2158         if (total_size > MLEN) {
2159                 return EMSGSIZE;
2160         }
2161
2162      m = m_get(M_DONTWAIT, MT_DATA);
2163      if (m == 0)
2164           return ENOBUFS;
2165
2166      ev = mtod(m, struct kern_event_msg *);
2167      total_size = KEV_MSG_HEADER_SIZE;
2168
2169      tmp = (char *) &ev->event_data[0];
2170      for (i = 0; i < 5; i++) {
2171           if (event_msg->dv[i].data_length == 0)
2172                break;
2173
2174           total_size += event_msg->dv[i].data_length;
2175           bcopy(event_msg->dv[i].data_ptr, tmp,
2176                 event_msg->dv[i].data_length);
2177           tmp += event_msg->dv[i].data_length;
2178      }
2179
2180      ev->id = ++static_event_id;
2181      ev->total_size   = total_size;
2182      ev->vendor_code  = event_msg->vendor_code;
2183      ev->kev_class    = event_msg->kev_class;
2184      ev->kev_subclass = event_msg->kev_subclass;
2185      ev->event_code   = event_msg->event_code;
2186
2187      m->m_len = total_size;
2188      lck_mtx_lock(evt_mutex);
2189      for (ev_pcb = LIST_FIRST(&kern_event_head);
2190           ev_pcb;
2191           ev_pcb = LIST_NEXT(ev_pcb, ev_link)) {
2192
2193           if (ev_pcb->vendor_code_filter != KEV_ANY_VENDOR) {
2194                if (ev_pcb->vendor_code_filter != ev->vendor_code)
2195                     continue;
2196
2197                if (ev_pcb->class_filter != KEV_ANY_CLASS) {
2198                     if (ev_pcb->class_filter != ev->kev_class)
2199                          continue;
2200
2201                     if ((ev_pcb->subclass_filter != KEV_ANY_SUBCLASS) &&
2202                         (ev_pcb->subclass_filter != ev->kev_subclass))
2203                          continue;
2204                }
2205           }
2206
2207           m2 = m_copym(m, 0, m->m_len, M_NOWAIT);
2208           if (m2 == 0) {
2209                m_free(m);
2210                    lck_mtx_unlock(evt_mutex);
2211                return ENOBUFS;
2212           }
2213           socket_lock(ev_pcb->ev_socket, 1);
2214           if (sbappendrecord(&ev_pcb->ev_socket->so_rcv, m2))
2215                   sorwakeup(ev_pcb->ev_socket);
2216           socket_unlock(ev_pcb->ev_socket, 1);
2217      }
2218
2219      m_free(m);
2220      lck_mtx_unlock(evt_mutex);
2221      return 0;
2222 }
2223
2224 static int
2225 kev_control(struct socket *so,
2226                         u_long cmd,
2227                         caddr_t data,
2228                         __unused struct ifnet *ifp,
2229                         __unused struct proc *p)
2230 {
2231         struct kev_request *kev_req = (struct kev_request *) data;
2232         struct kern_event_pcb  *ev_pcb;
2233         struct kev_vendor_code *kev_vendor;
2234         u_long  *id_value = (u_long *) data;
2235
2236
2237         switch (cmd) {
2238
2239                 case SIOCGKEVID:
2240                         *id_value = static_event_id;
2241                         break;
2242
2243                 case SIOCSKEVFILT:
2244                         ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2245                         ev_pcb->vendor_code_filter = kev_req->vendor_code;
2246                         ev_pcb->class_filter     = kev_req->kev_class;
2247                         ev_pcb->subclass_filter  = kev_req->kev_subclass;
2248                         break;
2249
2250                 case SIOCGKEVFILT:
2251                         ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2252                         kev_req->vendor_code = ev_pcb->vendor_code_filter;
2253                         kev_req->kev_class   = ev_pcb->class_filter;
2254                         kev_req->kev_subclass = ev_pcb->subclass_filter;
2255                         break;
2256
2257                 case SIOCGKEVVENDOR:
2258                         kev_vendor = (struct kev_vendor_code*)data;
2259
2260                         /* Make sure string is NULL terminated */
2261                         kev_vendor->vendor_string[KEV_VENDOR_CODE_MAX_STR_LEN-1] = 0;
2262
2263                         return mbuf_tag_id_find_internal(kev_vendor->vendor_string,
2264                                                                                          &kev_vendor->vendor_code, 0);
2265
2266                 default:
2267                         return ENOTSUP;
2268         }
2269
2270         return 0;
2271 }
2272
2273
2274
2275