2  * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  30  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 
  31  * All rights reserved. 
  33  * Redistribution and use in source and binary forms, with or without 
  34  * modification, are permitted provided that the following conditions 
  36  * 1. Redistributions of source code must retain the above copyright 
  37  *    notice, this list of conditions and the following disclaimer. 
  38  * 2. Redistributions in binary form must reproduce the above copyright 
  39  *    notice, this list of conditions and the following disclaimer in the 
  40  *    documentation and/or other materials provided with the distribution. 
  42  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 
  43  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  44  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  45  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 
  46  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  47  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  48  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  49  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  50  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  51  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  55  *      @(#)kern_event.c       1.0 (3/31/2000) 
  59 #include <sys/param.h> 
  60 #include <sys/systm.h> 
  61 #include <sys/filedesc.h> 
  62 #include <sys/kernel.h> 
  63 #include <sys/proc_internal.h> 
  64 #include <sys/kauth.h> 
  65 #include <sys/malloc.h>  
  66 #include <sys/unistd.h> 
  67 #include <sys/file_internal.h> 
  68 #include <sys/fcntl.h> 
  69 #include <sys/select.h> 
  70 #include <sys/queue.h> 
  71 #include <sys/event.h> 
  72 #include <sys/eventvar.h> 
  73 #include <sys/protosw.h> 
  74 #include <sys/socket.h> 
  75 #include <sys/socketvar.h> 
  77 #include <sys/sysctl.h> 
  79 #include <sys/sysproto.h> 
  82 #include <sys/proc_info.h> 
  84 #include <kern/lock.h> 
  85 #include <kern/clock.h> 
  86 #include <kern/thread_call.h> 
  87 #include <kern/sched_prim.h> 
  88 #include <kern/zalloc.h> 
  89 #include <kern/assert.h> 
  91 #include <libkern/libkern.h> 
  92 #include "kpi_mbuf_internal.h" 
  94 MALLOC_DEFINE(M_KQUEUE
, "kqueue", "memory for kqueue system"); 
  96 static inline void kqlock(struct kqueue 
*kq
); 
  97 static inline void kqunlock(struct kqueue 
*kq
); 
  99 static int      kqlock2knoteuse(struct kqueue 
*kq
, struct knote 
*kn
); 
 100 static int      kqlock2knoteusewait(struct kqueue 
*kq
, struct knote 
*kn
); 
 101 static int      kqlock2knotedrop(struct kqueue 
*kq
, struct knote 
*kn
); 
 102 static int      knoteuse2kqlock(struct kqueue 
*kq
, struct knote 
*kn
); 
 104 static void     kqueue_wakeup(struct kqueue 
*kq
); 
 105 static int      kqueue_read(struct fileproc 
*fp
, struct uio 
*uio
, 
 106                     int flags
, vfs_context_t ctx
); 
 107 static int      kqueue_write(struct fileproc 
*fp
, struct uio 
*uio
, 
 108                     int flags
, vfs_context_t ctx
); 
 109 static int      kqueue_ioctl(struct fileproc 
*fp
, u_long com
, caddr_t data
, 
 111 static int      kqueue_select(struct fileproc 
*fp
, int which
, void *wql
,  
 113 static int      kqueue_close(struct fileglob 
*fp
, vfs_context_t ctx
); 
 114 static int      kqueue_kqfilter(struct fileproc 
*fp
, struct knote 
*kn
, vfs_context_t ctx
); 
 115 extern int      kqueue_stat(struct fileproc 
*fp
, void  *ub
, int isstat64
, vfs_context_t ctx
); 
 117 static struct fileops kqueueops 
= { 
 127 static int kevent_copyin(user_addr_t 
*addrp
, struct kevent 
*kevp
, struct proc 
*p
); 
 128 static int kevent_copyout(struct kevent 
*kevp
, user_addr_t 
*addrp
, struct proc 
*p
); 
 130 static int      kevent_callback(struct kqueue 
*kq
, struct kevent 
*kevp
, void *data
); 
 131 static void     kevent_continue(struct kqueue 
*kq
, void *data
, int error
); 
 132 static void     kevent_scan_continue(void *contp
, wait_result_t wait_result
); 
 133 static int      kevent_process(struct kqueue 
*kq
, kevent_callback_t callback
, 
 134                                void *data
, int *countp
, struct proc 
*p
); 
 135 static void     knote_put(struct knote 
*kn
); 
 136 static int      knote_fdpattach(struct knote 
*kn
, struct filedesc 
*fdp
, struct proc 
*p
); 
 137 static void     knote_drop(struct knote 
*kn
, struct proc 
*p
); 
 138 static void     knote_activate(struct knote 
*kn
); 
 139 static void     knote_deactivate(struct knote 
*kn
); 
 140 static void     knote_enqueue(struct knote 
*kn
); 
 141 static void     knote_dequeue(struct knote 
*kn
); 
 142 static struct   knote 
*knote_alloc(void); 
 143 static void     knote_free(struct knote 
*kn
); 
 145 static int      filt_fileattach(struct knote 
*kn
); 
 146 static struct filterops file_filtops 
= 
 147         { 1, filt_fileattach
, NULL
, NULL 
}; 
 149 static void     filt_kqdetach(struct knote 
*kn
); 
 150 static int      filt_kqueue(struct knote 
*kn
, long hint
); 
 151 static struct filterops kqread_filtops 
= 
 152         { 1, NULL
, filt_kqdetach
, filt_kqueue 
}; 
 155  * placeholder for not-yet-implemented filters 
 157 static int      filt_badattach(struct knote 
*kn
); 
 158 static struct filterops bad_filtops 
= 
 159         { 0, filt_badattach
, 0 , 0 }; 
 161 static int      filt_procattach(struct knote 
*kn
); 
 162 static void     filt_procdetach(struct knote 
*kn
); 
 163 static int      filt_proc(struct knote 
*kn
, long hint
); 
 165 static struct filterops proc_filtops 
= 
 166         { 0, filt_procattach
, filt_procdetach
, filt_proc 
}; 
 168 extern struct filterops fs_filtops
; 
 170 extern struct filterops sig_filtops
; 
 174 static int      filt_timercompute(struct knote 
*kn
, uint64_t *abs_time
); 
 175 static void     filt_timerexpire(void *knx
, void *param1
); 
 176 static int      filt_timerattach(struct knote 
*kn
); 
 177 static void     filt_timerdetach(struct knote 
*kn
); 
 178 static int      filt_timer(struct knote 
*kn
, long hint
); 
 180 static struct filterops timer_filtops 
= 
 181         { 0, filt_timerattach
, filt_timerdetach
, filt_timer 
}; 
 183 /* to avoid arming timers that fire quicker than we can handle */ 
 184 static uint64_t filt_timerfloor 
= 0;  
 186 static lck_mtx_t _filt_timerlock
; 
 187 static void     filt_timerlock(void); 
 188 static void     filt_timerunlock(void); 
 190 static zone_t   knote_zone
; 
 192 #define KN_HASH(val, mask)      (((val) ^ (val >> 8)) & (mask)) 
 195 extern struct filterops aio_filtops
; 
 199  * Table for for all system-defined filters. 
 201 static struct filterops 
*sysfilt_ops
[] = { 
 202         &file_filtops
,                  /* EVFILT_READ */ 
 203         &file_filtops
,                  /* EVFILT_WRITE */ 
 205         &aio_filtops
,                   /* EVFILT_AIO */ 
 207         &bad_filtops
,                   /* EVFILT_AIO */ 
 209         &file_filtops
,                  /* EVFILT_VNODE */ 
 210         &proc_filtops
,                  /* EVFILT_PROC */ 
 211         &sig_filtops
,                   /* EVFILT_SIGNAL */ 
 212         &timer_filtops
,                 /* EVFILT_TIMER */ 
 213         &bad_filtops
,                   /* EVFILT_MACHPORT */ 
 214         &fs_filtops                     
/* EVFILT_FS */ 
 218  * kqueue/note lock attributes and implementations 
 220  *      kqueues have locks, while knotes have use counts 
 221  *      Most of the knote state is guarded by the object lock. 
 222  *      the knote "inuse" count and status use the kqueue lock. 
 224 lck_grp_attr_t 
* kq_lck_grp_attr
; 
 225 lck_grp_t 
* kq_lck_grp
; 
 226 lck_attr_t 
* kq_lck_attr
; 
 229 kqlock(struct kqueue 
*kq
) 
 231         lck_spin_lock(&kq
->kq_lock
); 
 235 kqunlock(struct kqueue 
*kq
) 
 237         lck_spin_unlock(&kq
->kq_lock
); 
 241  * Convert a kq lock to a knote use referece. 
 243  *      If the knote is being dropped, we can't get 
 244  *      a use reference, so just return with it 
 247  *      - kq locked at entry 
 248  *      - unlock on exit if we get the use reference 
 251 kqlock2knoteuse(struct kqueue 
*kq
, struct knote 
*kn
) 
 253         if (kn
->kn_status 
& KN_DROPPING
) 
 261  * Convert a kq lock to a knote use referece. 
 263  *      If the knote is being dropped, we can't get 
 264  *      a use reference, so just return with it 
 267  *      - kq locked at entry 
 268  *      - kq always unlocked on exit 
 271 kqlock2knoteusewait(struct kqueue 
*kq
, struct knote 
*kn
) 
 273         if (!kqlock2knoteuse(kq
, kn
)) { 
 274                 kn
->kn_status 
|= KN_DROPWAIT
; 
 275                 assert_wait(&kn
->kn_status
, THREAD_UNINT
); 
 277                 thread_block(THREAD_CONTINUE_NULL
); 
 284  * Convert from a knote use reference back to kq lock. 
 286  *      Drop a use reference and wake any waiters if 
 287  *      this is the last one. 
 289  *      The exit return indicates if the knote is 
 290  *      still alive - but the kqueue lock is taken 
 294 knoteuse2kqlock(struct kqueue 
*kq
, struct knote 
*kn
) 
 297         if ((--kn
->kn_inuse 
== 0) && 
 298             (kn
->kn_status 
& KN_USEWAIT
)) { 
 299                 kn
->kn_status 
&= ~KN_USEWAIT
; 
 300                 thread_wakeup(&kn
->kn_inuse
); 
 302         return ((kn
->kn_status 
& KN_DROPPING
) == 0); 
 306  * Convert a kq lock to a knote drop referece. 
 308  *      If the knote is in use, wait for the use count 
 309  *      to subside.  We first mark our intention to drop 
 310  *      it - keeping other users from "piling on." 
 311  *      If we are too late, we have to wait for the 
 312  *      other drop to complete. 
 314  *      - kq locked at entry 
 315  *      - always unlocked on exit. 
 316  *      - caller can't hold any locks that would prevent 
 317  *        the other dropper from completing. 
 320 kqlock2knotedrop(struct kqueue 
*kq
, struct knote 
*kn
) 
 323         if ((kn
->kn_status 
& KN_DROPPING
) == 0) { 
 324                 kn
->kn_status 
|= KN_DROPPING
; 
 325                 if (kn
->kn_inuse 
> 0) { 
 326                         kn
->kn_status 
|= KN_USEWAIT
; 
 327                         assert_wait(&kn
->kn_inuse
, THREAD_UNINT
); 
 329                         thread_block(THREAD_CONTINUE_NULL
); 
 334                 kn
->kn_status 
|= KN_DROPWAIT
; 
 335                 assert_wait(&kn
->kn_status
, THREAD_UNINT
); 
 337                 thread_block(THREAD_CONTINUE_NULL
); 
 343  * Release a knote use count reference. 
 346 knote_put(struct knote 
*kn
) 
 348         struct kqueue 
*kq 
= kn
->kn_kq
; 
 351         if ((--kn
->kn_inuse 
== 0) &&  
 352             (kn
->kn_status 
& KN_USEWAIT
)) { 
 353                 kn
->kn_status 
&= ~KN_USEWAIT
; 
 354                 thread_wakeup(&kn
->kn_inuse
); 
 362 filt_fileattach(struct knote 
*kn
) 
 365         return (fo_kqfilter(kn
->kn_fp
, kn
, vfs_context_current())); 
 368 #define f_flag f_fglob->fg_flag 
 369 #define f_type f_fglob->fg_type 
 370 #define f_msgcount f_fglob->fg_msgcount 
 371 #define f_cred f_fglob->fg_cred 
 372 #define f_ops f_fglob->fg_ops 
 373 #define f_offset f_fglob->fg_offset 
 374 #define f_data f_fglob->fg_data 
 377 filt_kqdetach(struct knote 
*kn
) 
 379         struct kqueue 
*kq 
= (struct kqueue 
*)kn
->kn_fp
->f_data
; 
 382         KNOTE_DETACH(&kq
->kq_sel
.si_note
, kn
); 
 388 filt_kqueue(struct knote 
*kn
, __unused 
long hint
) 
 390         struct kqueue 
*kq 
= (struct kqueue 
*)kn
->kn_fp
->f_data
; 
 392         kn
->kn_data 
= kq
->kq_count
; 
 393         return (kn
->kn_data 
> 0); 
 397 filt_procattach(struct knote 
*kn
) 
 401         assert(PID_MAX 
< NOTE_PDATAMASK
); 
 403         if ((kn
->kn_sfflags 
& (NOTE_TRACK 
| NOTE_TRACKERR 
| NOTE_CHILD
)) != 0) 
 406         p 
= proc_find(kn
->kn_id
); 
 413         kn
->kn_flags 
|= EV_CLEAR
;       /* automatically set */ 
 414         kn
->kn_ptr
.p_proc 
= p
;          /* store the proc handle */ 
 416         KNOTE_ATTACH(&p
->p_klist
, kn
); 
 426  * The knote may be attached to a different process, which may exit, 
 427  * leaving nothing for the knote to be attached to.  In that case, 
 428  * the pointer to the process will have already been nulled out. 
 431 filt_procdetach(struct knote 
*kn
) 
 437         p 
= kn
->kn_ptr
.p_proc
; 
 438         if (p 
!= PROC_NULL
) { 
 439                 kn
->kn_ptr
.p_proc 
= PROC_NULL
; 
 440                 KNOTE_DETACH(&p
->p_klist
, kn
); 
 447 filt_proc(struct knote 
*kn
, long hint
) 
 451         /* hint is 0 when called from above */ 
 455                 /* ALWAYS CALLED WITH proc_klist_lock when (hint != 0) */ 
 458                  * mask off extra data 
 460                 event 
= (u_int
)hint 
& NOTE_PCTRLMASK
; 
 463                  * if the user is interested in this event, record it. 
 465                 if (kn
->kn_sfflags 
& event
) 
 466                         kn
->kn_fflags 
|= event
; 
 469                  * If this is the last possible event for the 
 470                  * knote, unlink this knote from the process 
 471                  * before the process goes away. 
 473                 if (event 
== NOTE_REAP 
|| (event 
== NOTE_EXIT 
&& !(kn
->kn_sfflags 
& NOTE_REAP
))) { 
 474                         kn
->kn_flags 
|= (EV_EOF 
| EV_ONESHOT
); 
 475                         p 
= kn
->kn_ptr
.p_proc
; 
 476                         if (p 
!= PROC_NULL
) { 
 477                                 kn
->kn_ptr
.p_proc 
= PROC_NULL
; 
 478                                 KNOTE_DETACH(&p
->p_klist
, kn
); 
 485         /* atomic check, no locking need when called from above */ 
 486         return (kn
->kn_fflags 
!= 0);  
 490  * filt_timercompute - compute absolute timeout 
 492  *      The saved-data field in the knote contains the 
 493  *      time value.  The saved filter-flags indicates 
 494  *      the unit of measurement. 
 496  *      If the timeout is not absolute, adjust it for 
 500 filt_timercompute(struct knote 
*kn
, uint64_t *abs_time
) 
 505         switch (kn
->kn_sfflags 
& (NOTE_SECONDS
|NOTE_USECONDS
|NOTE_NSECONDS
)) { 
 507                 multiplier 
= NSEC_PER_SEC
; 
 510                 multiplier 
= NSEC_PER_USEC
; 
 515         case 0: /* milliseconds (default) */ 
 516                 multiplier 
= NSEC_PER_SEC 
/ 1000; 
 521         nanoseconds_to_absolutetime((uint64_t)kn
->kn_sdata 
* multiplier
, &raw
); 
 522         if (raw 
<= filt_timerfloor
) { 
 526         if ((kn
->kn_sfflags 
& NOTE_ABSOLUTE
) == NOTE_ABSOLUTE
) { 
 527                 uint32_t seconds
, nanoseconds
; 
 530                 clock_get_calendar_nanotime(&seconds
, &nanoseconds
); 
 531                 nanoseconds_to_absolutetime((uint64_t)seconds 
* NSEC_PER_SEC 
+ nanoseconds
, 
 533                 if (now 
>= raw 
+ filt_timerfloor
) { 
 539         clock_absolutetime_interval_to_deadline(raw
, abs_time
); 
 544  * filt_timerexpire - the timer callout routine 
 546  *      Just propagate the timer event into the knote 
 547  *      filter routine (by going through the knote 
 548  *      synchronization point).  Pass a hint to 
 549  *      indicate this is a real event, not just a 
 553 filt_timerexpire(void *knx
, __unused 
void *spare
) 
 555         struct klist timer_list
; 
 556         struct knote 
*kn 
= knx
; 
 558         /* no "object" for timers, so fake a list */ 
 559         SLIST_INIT(&timer_list
); 
 560         SLIST_INSERT_HEAD(&timer_list
, kn
, kn_selnext
);  
 561         KNOTE(&timer_list
, 1); 
 565  * data contains amount of time to sleep, in milliseconds, 
 566  * or a pointer to a timespec structure. 
 569 filt_timerattach(struct knote 
*kn
) 
 571         thread_call_t callout
; 
 575         error 
= filt_timercompute(kn
, &deadline
); 
 580                 callout 
= thread_call_allocate(filt_timerexpire
, kn
); 
 584                 /* handle as immediate */ 
 590         kn
->kn_hook 
= (caddr_t
)callout
; 
 592         /* absolute=EV_ONESHOT */ 
 593         if (kn
->kn_sfflags 
& NOTE_ABSOLUTE
) 
 594                 kn
->kn_flags 
|= EV_ONESHOT
;  
 597                 /* all others - if not faking immediate */ 
 598                 kn
->kn_flags 
|= EV_CLEAR
; 
 599                 thread_call_enter_delayed(callout
, deadline
); 
 610 filt_timerdetach(struct knote 
*kn
) 
 612         thread_call_t callout
; 
 615         callout 
= (thread_call_t
)kn
->kn_hook
; 
 616         if (callout 
!= NULL
) { 
 619                 /* cancel the callout if we can */ 
 620                 cancelled 
= thread_call_cancel(callout
); 
 622                         /* got it, just free it */ 
 625                         thread_call_free(callout
); 
 628                 /* we have to wait for the expire routine.  */ 
 629                 kn
->kn_hookid 
= -1;     /* we are detaching */ 
 630                 assert_wait(&kn
->kn_hook
, THREAD_UNINT
); 
 632                 thread_block(THREAD_CONTINUE_NULL
); 
 633                 assert(kn
->kn_hook 
== NULL
); 
 643 filt_timer(struct knote 
*kn
, __unused 
long hint
) 
 649                 thread_call_t callout
; 
 656                 detaching 
= (kn
->kn_hookid 
< 0); 
 657                 callout 
= (thread_call_t
)kn
->kn_hook
; 
 659                 if (!detaching 
&& (kn
->kn_flags 
& EV_ONESHOT
) == 0) { 
 663                         /* user input data may have changed - deal */ 
 664                         error 
= filt_timercompute(kn
, &deadline
); 
 666                                 kn
->kn_flags 
|= EV_ERROR
; 
 668                         } else if (deadline 
== 0) { 
 669                                 /* revert to fake immediate */ 
 670                                 kn
->kn_flags 
&= ~EV_CLEAR
; 
 674                                 /* keep the callout and re-arm */ 
 675                                 thread_call_enter_delayed(callout
, deadline
); 
 682                 thread_call_free(callout
); 
 684                 /* if someone is waiting for timer to pop */ 
 686                         thread_wakeup(&kn
->kn_hook
); 
 694         /* change fake timer to real if needed */ 
 695         while (kn
->kn_hookid 
> 0 && kn
->kn_sdata 
> 0) { 
 698                 /* update the fake timer (make real) */ 
 702                 error 
= filt_timerattach(kn
); 
 705                         kn
->kn_flags 
|= EV_ERROR
; 
 712         /* if still fake, pretend it fired */ 
 713         if (kn
->kn_hookid 
> 0) 
 716         result 
= (kn
->kn_data 
!= 0); 
 724         lck_mtx_lock(&_filt_timerlock
); 
 728 filt_timerunlock(void) 
 730         lck_mtx_unlock(&_filt_timerlock
); 
 734  * JMM - placeholder for not-yet-implemented filters 
 737 filt_badattach(__unused 
struct knote 
*kn
) 
 744 kqueue_alloc(struct proc 
*p
) 
 746         struct filedesc 
*fdp 
= p
->p_fd
; 
 749         MALLOC_ZONE(kq
, struct kqueue 
*, sizeof(struct kqueue
), M_KQUEUE
, M_WAITOK
); 
 751                 bzero(kq
, sizeof(struct kqueue
)); 
 752                 lck_spin_init(&kq
->kq_lock
, kq_lck_grp
, kq_lck_attr
); 
 753                 TAILQ_INIT(&kq
->kq_head
); 
 754                 TAILQ_INIT(&kq
->kq_inprocess
); 
 758         if (fdp
->fd_knlistsize 
< 0) { 
 760                 if (fdp
->fd_knlistsize 
< 0) 
 761                         fdp
->fd_knlistsize 
= 0;         /* this process has had a kq */ 
 770  * kqueue_dealloc - detach all knotes from a kqueue and free it 
 772  *      We walk each list looking for knotes referencing this 
 773  *      this kqueue.  If we find one, we try to drop it.  But 
 774  *      if we fail to get a drop reference, that will wait 
 775  *      until it is dropped.  So, we can just restart again 
 776  *      safe in the assumption that the list will eventually 
 777  *      not contain any more references to this kqueue (either 
 778  *      we dropped them all, or someone else did). 
 780  *      Assumes no new events are being added to the kqueue. 
 781  *      Nothing locked on entry or exit. 
 784 kqueue_dealloc(struct kqueue 
*kq
) 
 786         struct proc 
*p 
= kq
->kq_p
; 
 787         struct filedesc 
*fdp 
= p
->p_fd
; 
 792         for (i 
= 0; i 
< fdp
->fd_knlistsize
; i
++) { 
 793                 kn 
= SLIST_FIRST(&fdp
->fd_knlist
[i
]); 
 795                         if (kq 
== kn
->kn_kq
) { 
 798                                 /* drop it ourselves or wait */ 
 799                                 if (kqlock2knotedrop(kq
, kn
)) { 
 800                                         kn
->kn_fop
->f_detach(kn
); 
 804                                 /* start over at beginning of list */ 
 805                                 kn 
= SLIST_FIRST(&fdp
->fd_knlist
[i
]); 
 808                         kn 
= SLIST_NEXT(kn
, kn_link
); 
 811         if (fdp
->fd_knhashmask 
!= 0) { 
 812                 for (i 
= 0; i 
< (int)fdp
->fd_knhashmask 
+ 1; i
++) { 
 813                         kn 
= SLIST_FIRST(&fdp
->fd_knhash
[i
]); 
 815                                 if (kq 
== kn
->kn_kq
) { 
 818                                         /* drop it ourselves or wait */ 
 819                                         if (kqlock2knotedrop(kq
, kn
)) { 
 820                                                 kn
->kn_fop
->f_detach(kn
); 
 824                                         /* start over at beginning of list */ 
 825                                         kn 
= SLIST_FIRST(&fdp
->fd_knhash
[i
]); 
 828                                 kn 
= SLIST_NEXT(kn
, kn_link
); 
 833         lck_spin_destroy(&kq
->kq_lock
, kq_lck_grp
); 
 834         FREE_ZONE(kq
, sizeof(struct kqueue
), M_KQUEUE
); 
 838 kqueue(struct proc 
*p
, __unused 
struct kqueue_args 
*uap
, register_t 
*retval
) 
 844         error 
= falloc(p
, &fp
, &fd
, vfs_context_current()); 
 849         kq 
= kqueue_alloc(p
); 
 855         fp
->f_flag 
= FREAD 
| FWRITE
; 
 856         fp
->f_type 
= DTYPE_KQUEUE
; 
 857         fp
->f_ops 
= &kqueueops
; 
 858         fp
->f_data 
= (caddr_t
)kq
; 
 861         procfdtbl_releasefd(p
, fd
, NULL
); 
 862         fp_drop(p
, fd
, fp
, 1); 
 870 kqueue_portset_np(__unused 
struct proc 
*p
,  
 871                                   __unused 
struct kqueue_portset_np_args 
*uap
,  
 872                                   __unused register_t 
*retval
) 
 874                 /* JMM - Placeholder for now */ 
 879 kqueue_from_portset_np(__unused 
struct proc 
*p
,  
 880                                            __unused 
struct kqueue_from_portset_np_args 
*uap
,  
 881                                            __unused register_t 
*retval
) 
 883                 /* JMM - Placeholder for now */ 
 888 kevent_copyin(user_addr_t 
*addrp
, struct kevent 
*kevp
, struct proc 
*p
) 
 893         if (IS_64BIT_PROCESS(p
)) { 
 894                 struct user_kevent kev64
; 
 896                 advance 
= sizeof(kev64
); 
 897                 error 
= copyin(*addrp
, (caddr_t
)&kev64
, advance
); 
 900                 kevp
->ident 
= CAST_DOWN(uintptr_t, kev64
.ident
); 
 901                 kevp
->filter 
= kev64
.filter
; 
 902                 kevp
->flags 
= kev64
.flags
; 
 903                 kevp
->fflags 
= kev64
.fflags
; 
 904                 kevp
->data 
= CAST_DOWN(intptr_t, kev64
.data
); 
 905                 kevp
->udata 
= kev64
.udata
; 
 908                  * compensate for legacy in-kernel kevent layout 
 909                  * where the udata field is alredy 64-bit. 
 911                 advance 
= sizeof(*kevp
) + sizeof(void *) - sizeof(user_addr_t
); 
 912                 error 
= copyin(*addrp
, (caddr_t
)kevp
, advance
); 
 920 kevent_copyout(struct kevent 
*kevp
, user_addr_t 
*addrp
, struct proc 
*p
) 
 925         if (IS_64BIT_PROCESS(p
)) { 
 926                 struct user_kevent kev64
; 
 929                  * deal with the special case of a user-supplied 
 930                  * value of (uintptr_t)-1. 
 932                 kev64
.ident 
= (kevp
->ident 
== (uintptr_t)-1) ? 
 933                            (uint64_t)-1LL : (uint64_t)kevp
->ident
; 
 935                 kev64
.filter 
= kevp
->filter
; 
 936                 kev64
.flags 
= kevp
->flags
; 
 937                 kev64
.fflags 
= kevp
->fflags
; 
 938                 kev64
.data 
= (int64_t) kevp
->data
; 
 939                 kev64
.udata 
= kevp
->udata
; 
 940                 advance 
= sizeof(kev64
); 
 941                 error 
= copyout((caddr_t
)&kev64
, *addrp
, advance
); 
 944                  * compensate for legacy in-kernel kevent layout 
 945                  * where the udata field is alredy 64-bit. 
 947                 advance 
= sizeof(*kevp
) + sizeof(void *) - sizeof(user_addr_t
); 
 948                 error 
= copyout((caddr_t
)kevp
, *addrp
, advance
); 
 956  * kevent_continue - continue a kevent syscall after blocking 
 958  *      assume we inherit a use count on the kq fileglob. 
 962 kevent_continue(__unused 
struct kqueue 
*kq
, void *data
, int error
) 
 964         struct _kevent 
*cont_args
; 
 969         struct proc 
*p 
= current_proc(); 
 971         cont_args 
= (struct _kevent 
*)data
; 
 972         noutputs 
= cont_args
->eventout
; 
 973         retval 
= cont_args
->retval
; 
 977         fp_drop(p
, fd
, fp
, 0); 
 979         /* don't restart after signals... */ 
 980         if (error 
== ERESTART
) 
 982         else if (error 
== EWOULDBLOCK
) 
 986         unix_syscall_return(error
); 
 990  * kevent - [syscall] register and wait for kernel events 
 995 kevent(struct proc 
*p
, struct kevent_args 
*uap
, register_t 
*retval
) 
 997         user_addr_t changelist 
= uap
->changelist
; 
 998         user_addr_t ueventlist 
= uap
->eventlist
; 
 999         int nchanges 
= uap
->nchanges
; 
1000         int nevents 
= uap
->nevents
; 
1003         struct _kevent 
*cont_args
; 
1006         struct fileproc 
*fp
; 
1008         int error
, noutputs
; 
1011         /* convert timeout to absolute - if we have one */ 
1012         if (uap
->timeout 
!= USER_ADDR_NULL
) { 
1014                 if ( IS_64BIT_PROCESS(p
) ) { 
1015                         struct user_timespec ts
; 
1016                         error 
= copyin( uap
->timeout
, &ts
, sizeof(ts
) ); 
1017                         if ((ts
.tv_sec 
& 0xFFFFFFFF00000000ull
) != 0) 
1020                                 TIMESPEC_TO_TIMEVAL(&rtv
, &ts
); 
1023                         error 
= copyin( uap
->timeout
, &ts
, sizeof(ts
) ); 
1024                         TIMESPEC_TO_TIMEVAL(&rtv
, &ts
); 
1028                 if (itimerfix(&rtv
)) 
1030                 getmicrouptime(&atv
); 
1031                 timevaladd(&atv
, &rtv
); 
1037         /* get a usecount for the kq itself */ 
1038         if ((error 
= fp_getfkq(p
, fd
, &fp
, &kq
)) != 0) 
1041         /* register all the change requests the user provided... */ 
1043         while (nchanges 
> 0 && error 
== 0) { 
1044                 error 
= kevent_copyin(&changelist
, &kev
, p
); 
1048                 kev
.flags 
&= ~EV_SYSFLAGS
; 
1049                 error 
= kevent_register(kq
, &kev
, p
); 
1050                 if ((error 
|| (kev
.flags 
& EV_RECEIPT
)) && nevents 
> 0) { 
1051                         kev
.flags 
= EV_ERROR
; 
1053                         error 
= kevent_copyout(&kev
, &ueventlist
, p
); 
1062         /* store the continuation/completion data in the uthread */ 
1063         ut 
= (uthread_t
)get_bsdthread_info(current_thread()); 
1064         cont_args 
= (struct _kevent 
*)&ut
->uu_kevent
.ss_kevent
; 
1067         cont_args
->retval 
= retval
; 
1068         cont_args
->eventlist 
= ueventlist
; 
1069         cont_args
->eventcount 
= nevents
; 
1070         cont_args
->eventout 
= noutputs
; 
1072         if (nevents 
> 0 && noutputs 
== 0 && error 
== 0) 
1073                 error 
= kevent_scan(kq
, kevent_callback
, 
1074                                     kevent_continue
, cont_args
, 
1076         kevent_continue(kq
, cont_args
, error
); 
1083  * kevent_callback - callback for each individual event 
1085  *      called with nothing locked 
1086  *      caller holds a reference on the kqueue 
1090 kevent_callback(__unused 
struct kqueue 
*kq
, struct kevent 
*kevp
, void *data
) 
1092         struct _kevent 
*cont_args
; 
1095         cont_args 
= (struct _kevent 
*)data
; 
1096         assert(cont_args
->eventout 
< cont_args
->eventcount
); 
1099          * Copy out the appropriate amount of event data for this user. 
1101         error 
= kevent_copyout(kevp
, &cont_args
->eventlist
, current_proc()); 
1104          * If there isn't space for additional events, return 
1105          * a harmless error to stop the processing here 
1107         if (error 
== 0 && ++cont_args
->eventout 
== cont_args
->eventcount
) 
1108                         error 
= EWOULDBLOCK
; 
1113  * kevent_register - add a new event to a kqueue 
1115  *      Creates a mapping between the event source and 
1116  *      the kqueue via a knote data structure. 
1118  *      Because many/most the event sources are file 
1119  *      descriptor related, the knote is linked off 
1120  *      the filedescriptor table for quick access. 
1122  *      called with nothing locked 
1123  *      caller holds a reference on the kqueue 
1127 kevent_register(struct kqueue 
*kq
, struct kevent 
*kev
, __unused 
struct proc 
*ctxp
) 
1129         struct proc 
*p 
= kq
->kq_p
; 
1130         struct filedesc 
*fdp 
= p
->p_fd
; 
1131         struct filterops 
*fops
; 
1132         struct fileproc 
*fp 
= NULL
; 
1133         struct knote 
*kn 
= NULL
; 
1136         if (kev
->filter 
< 0) { 
1137                 if (kev
->filter 
+ EVFILT_SYSCOUNT 
< 0) 
1139                 fops 
= sysfilt_ops
[~kev
->filter
];       /* to 0-base index */ 
1143                  * filter attach routine is responsible for insuring that 
1144                  * the identifier can be attached to it. 
1146                 printf("unknown filter: %d\n", kev
->filter
); 
1150         /* this iocount needs to be dropped if it is not registered */ 
1151         if (fops
->f_isfd 
&& (error 
= fp_lookup(p
, kev
->ident
, &fp
, 0)) != 0) 
1157                 /* fd-based knotes are linked off the fd table */ 
1158                 if (kev
->ident 
< (u_int
)fdp
->fd_knlistsize
) { 
1159                         SLIST_FOREACH(kn
, &fdp
->fd_knlist
[kev
->ident
], kn_link
) 
1160                                 if (kq 
== kn
->kn_kq 
&& 
1161                                     kev
->filter 
== kn
->kn_filter
) 
1165                 /* hash non-fd knotes here too */ 
1166                 if (fdp
->fd_knhashmask 
!= 0) { 
1169                         list 
= &fdp
->fd_knhash
[ 
1170                             KN_HASH((u_long
)kev
->ident
, fdp
->fd_knhashmask
)]; 
1171                         SLIST_FOREACH(kn
, list
, kn_link
) 
1172                                 if (kev
->ident 
== kn
->kn_id 
&& 
1174                                     kev
->filter 
== kn
->kn_filter
) 
1180          * kn now contains the matching knote, or NULL if no match 
1183                 if ((kev
->flags 
& (EV_ADD
|EV_DELETE
)) == EV_ADD
) { 
1192                         kn
->kn_tq 
= &kq
->kq_head
; 
1194                         kn
->kn_sfflags 
= kev
->fflags
; 
1195                         kn
->kn_sdata 
= kev
->data
; 
1198                         kn
->kn_kevent 
= *kev
; 
1199                         kn
->kn_inuse 
= 1;  /* for f_attach() */ 
1202                         /* before anyone can find it */ 
1203                         if (kev
->flags 
& EV_DISABLE
) 
1204                                 kn
->kn_status 
|= KN_DISABLED
; 
1206                         error 
= knote_fdpattach(kn
, fdp
, p
); 
1215                          * apply reference count to knote structure, and 
1216                          * do not release it at the end of this routine. 
1221                          * If the attach fails here, we can drop it knowing 
1222                          * that nobody else has a reference to the knote. 
1224                         if ((error 
= fops
->f_attach(kn
)) != 0) { 
1234                 /* existing knote - get kqueue lock */ 
1238                 if (kev
->flags 
& EV_DELETE
) { 
1240                         kn
->kn_status 
|= KN_DISABLED
; 
1241                         if (kqlock2knotedrop(kq
, kn
)) { 
1242                                 kn
->kn_fop
->f_detach(kn
); 
1248                 /* update status flags for existing knote */ 
1249                 if (kev
->flags 
& EV_DISABLE
) { 
1251                         kn
->kn_status 
|= KN_DISABLED
; 
1252                 } else if (kev
->flags 
& EV_ENABLE
) { 
1253                         kn
->kn_status 
&= ~KN_DISABLED
; 
1254                         if (kn
->kn_status 
& KN_ACTIVE
) 
1259                  * If somebody is in the middle of dropping this 
1260                  * knote - go find/insert a new one.  But we have 
1261                  * wait for this one to go away first. 
1263                 if (!kqlock2knoteusewait(kq
, kn
)) 
1264                         /* kqueue unlocked */ 
1268                  * The user may change some filter values after the 
1269                  * initial EV_ADD, but doing so will not reset any  
1270                  * filter which have already been triggered. 
1272                 kn
->kn_sfflags 
= kev
->fflags
; 
1273                 kn
->kn_sdata 
= kev
->data
; 
1274                 kn
->kn_kevent
.udata 
= kev
->udata
; 
1277         /* still have use ref on knote */ 
1278         if (kn
->kn_fop
->f_event(kn
, 0)) { 
1279                 if (knoteuse2kqlock(kq
, kn
)) 
1288                 fp_drop(p
, kev
->ident
, fp
, 0); 
1293  * kevent_process - process the triggered events in a kqueue 
1295  *      Walk the queued knotes and validate that they are 
1296  *      really still triggered events by calling the filter 
1297  *      routines (if necessary).  Hold a use reference on 
1298  *      the knote to avoid it being detached. For each event 
1299  *      that is still considered triggered, invoke the 
1300  *      callback routine provided. 
1302  *      caller holds a reference on the kqueue. 
1303  *      kqueue locked on entry and exit - but may be dropped 
1307 kevent_process(struct kqueue 
*kq
, 
1308                kevent_callback_t callback
, 
1319         if (kq
->kq_count 
== 0) { 
1324         /* if someone else is processing the queue, wait */ 
1325         if (!TAILQ_EMPTY(&kq
->kq_inprocess
)) { 
1326                 assert_wait(&kq
->kq_inprocess
, THREAD_UNINT
); 
1327                 kq
->kq_state 
|= KQ_PROCWAIT
; 
1329                 thread_block(THREAD_CONTINUE_NULL
); 
1336         while (error 
== 0 && 
1337                (kn 
= TAILQ_FIRST(&kq
->kq_head
)) != NULL
) { 
1340                  * Take note off the active queue. 
1342                  * Non-EV_ONESHOT events must be re-validated. 
1344                  * Convert our lock to a use-count and call the event's 
1345                  * filter routine to update. 
1347                  * If the event is valid, or triggered while the kq 
1348                  * is unlocked, move to the inprocess queue for processing. 
1351                 if ((kn
->kn_flags 
& EV_ONESHOT
) == 0) { 
1353                         knote_deactivate(kn
); 
1355                         if (kqlock2knoteuse(kq
, kn
)) { 
1357                                 /* call the filter with just a ref */ 
1358                                 result 
= kn
->kn_fop
->f_event(kn
, 0); 
1360                                 /* if it's still alive, make sure it's active */ 
1361                                 if (knoteuse2kqlock(kq
, kn
) && result
) { 
1362                                         /* may have been reactivated in filter*/ 
1363                                         if (!(kn
->kn_status 
& KN_ACTIVE
)) { 
1374                 /* knote is active: move onto inprocess queue */ 
1375                 assert(kn
->kn_tq 
== &kq
->kq_head
); 
1376                 TAILQ_REMOVE(&kq
->kq_head
, kn
, kn_tqe
); 
1377                 kn
->kn_tq 
= &kq
->kq_inprocess
; 
1378                 TAILQ_INSERT_TAIL(&kq
->kq_inprocess
, kn
, kn_tqe
); 
1381                  * Got a valid triggered knote with the kqueue 
1382                  * still locked.  Snapshot the data, and determine 
1383                  * how to dispatch the knote for future events. 
1385                 kev 
= kn
->kn_kevent
; 
1387                 /* now what happens to it? */ 
1388                 if (kn
->kn_flags 
& EV_ONESHOT
) { 
1389                         knote_deactivate(kn
); 
1390                         if (kqlock2knotedrop(kq
, kn
)) { 
1391                                 kn
->kn_fop
->f_detach(kn
); 
1394                 } else if (kn
->kn_flags 
& EV_CLEAR
) { 
1395                         knote_deactivate(kn
); 
1401                          * leave on in-process queue.  We'll 
1402                          * move all the remaining ones back 
1403                          * the kq queue and wakeup any 
1404                          * waiters when we are done. 
1409                 /* callback to handle each event as we find it */ 
1410                 error 
= (callback
)(kq
, &kev
, data
); 
1417          * With the kqueue still locked, move any knotes 
1418          * remaining on the in-process queue back to the 
1419          * kq's queue and wake up any waiters. 
1421         while ((kn 
= TAILQ_FIRST(&kq
->kq_inprocess
)) != NULL
) { 
1422                 assert(kn
->kn_tq 
== &kq
->kq_inprocess
); 
1423                 TAILQ_REMOVE(&kq
->kq_inprocess
, kn
, kn_tqe
); 
1424                 kn
->kn_tq 
= &kq
->kq_head
; 
1425                 TAILQ_INSERT_TAIL(&kq
->kq_head
, kn
, kn_tqe
); 
1427         if (kq
->kq_state 
& KQ_PROCWAIT
) { 
1428                 kq
->kq_state 
&= ~KQ_PROCWAIT
; 
1429                 thread_wakeup(&kq
->kq_inprocess
); 
1438 kevent_scan_continue(void *data
, wait_result_t wait_result
) 
1440         uthread_t ut 
= (uthread_t
)get_bsdthread_info(current_thread()); 
1441         struct _kevent_scan 
* cont_args 
= &ut
->uu_kevent
.ss_kevent_scan
; 
1442         struct kqueue 
*kq 
= (struct kqueue 
*)data
; 
1446         /* convert the (previous) wait_result to a proper error */ 
1447         switch (wait_result
) { 
1448         case THREAD_AWAKENED
: 
1450                 error 
= kevent_process(kq
, cont_args
->call
, cont_args
, &count
, current_proc()); 
1451                 if (error 
== 0 && count 
== 0) { 
1452                         assert_wait_deadline(kq
, THREAD_ABORTSAFE
, cont_args
->deadline
); 
1453                         kq
->kq_state 
|= KQ_SLEEP
; 
1455                         thread_block_parameter(kevent_scan_continue
, kq
); 
1460         case THREAD_TIMED_OUT
: 
1461                 error 
= EWOULDBLOCK
;  
1463         case THREAD_INTERRUPTED
: 
1467                 panic("kevent_scan_cont() - invalid wait_result (%d)", wait_result
); 
1471         /* call the continuation with the results */ 
1472         assert(cont_args
->cont 
!= NULL
); 
1473         (cont_args
->cont
)(kq
, cont_args
->data
, error
); 
1478  * kevent_scan - scan and wait for events in a kqueue 
1480  *      Process the triggered events in a kqueue. 
1482  *      If there are no events triggered arrange to 
1483  *      wait for them. If the caller provided a 
1484  *      continuation routine, then kevent_scan will 
1487  *      The callback routine must be valid. 
1488  *      The caller must hold a use-count reference on the kq. 
1492 kevent_scan(struct kqueue 
*kq
,  
1493             kevent_callback_t callback
, 
1494             kevent_continue_t continuation
, 
1496             struct timeval 
*atvp
, 
1499         thread_continue_t cont 
= THREAD_CONTINUE_NULL
; 
1504         assert(callback 
!= NULL
); 
1508                 wait_result_t wait_result
; 
1512                  * Make a pass through the kq to find events already 
1516                 error 
= kevent_process(kq
, callback
, data
, &count
, p
); 
1518                         break; /* lock still held */ 
1520                 /* looks like we have to consider blocking */ 
1523                         /* convert the timeout to a deadline once */ 
1524                         if (atvp
->tv_sec 
|| atvp
->tv_usec
) { 
1527                                 clock_get_uptime(&now
); 
1528                                 nanoseconds_to_absolutetime((uint64_t)atvp
->tv_sec 
* NSEC_PER_SEC 
+ 
1529                                                             atvp
->tv_usec 
* NSEC_PER_USEC
, 
1531                                 if (now 
>= deadline
) { 
1532                                         /* non-blocking call */ 
1533                                         error 
= EWOULDBLOCK
; 
1534                                         break; /* lock still held */ 
1537                                 clock_absolutetime_interval_to_deadline(deadline
, &deadline
); 
1539                                 deadline 
= 0;   /* block forever */ 
1543                                 uthread_t ut 
= (uthread_t
)get_bsdthread_info(current_thread()); 
1544                                 struct _kevent_scan 
*cont_args 
= &ut
->uu_kevent
.ss_kevent_scan
; 
1546                                 cont_args
->call 
= callback
; 
1547                                 cont_args
->cont 
= continuation
; 
1548                                 cont_args
->deadline 
= deadline
; 
1549                                 cont_args
->data 
= data
; 
1550                                 cont 
= kevent_scan_continue
; 
1554                 /* go ahead and wait */ 
1555                 assert_wait_deadline(kq
, THREAD_ABORTSAFE
, deadline
); 
1556                 kq
->kq_state 
|= KQ_SLEEP
; 
1558                 wait_result 
= thread_block_parameter(cont
, kq
); 
1559                 /* NOTREACHED if (continuation != NULL) */ 
1561                 switch (wait_result
) { 
1562                 case THREAD_AWAKENED
: 
1564                 case THREAD_TIMED_OUT
: 
1566                 case THREAD_INTERRUPTED
: 
1569                         panic("kevent_scan - bad wait_result (%d)", 
1581  * This could be expanded to call kqueue_scan, if desired. 
1585 kqueue_read(__unused 
struct fileproc 
*fp
,  
1586                         __unused 
struct uio 
*uio
,  
1588                         __unused vfs_context_t ctx
) 
1595 kqueue_write(__unused 
struct fileproc 
*fp
,  
1596                          __unused 
struct uio 
*uio
,  
1598                          __unused vfs_context_t ctx
) 
1605 kqueue_ioctl(__unused 
struct fileproc 
*fp
,  
1606                          __unused u_long com
,  
1607                          __unused caddr_t data
,  
1608                          __unused vfs_context_t ctx
) 
1615 kqueue_select(struct fileproc 
*fp
, int which
, void *wql
, vfs_context_t ctx
) 
1617         struct kqueue 
*kq 
= (struct kqueue 
*)fp
->f_data
; 
1620         if (which 
== FREAD
) { 
1625                         selrecord(vfs_context_proc(ctx
), &kq
->kq_sel
, wql
); 
1626                         kq
->kq_state 
|= KQ_SEL
; 
1638 kqueue_close(struct fileglob 
*fg
, __unused vfs_context_t ctx
) 
1640         struct kqueue 
*kq 
= (struct kqueue 
*)fg
->fg_data
; 
1649  * The callers has taken a use-count reference on this kqueue and will donate it 
1650  * to the kqueue we are being added to.  This keeps the kqueue from closing until 
1651  * that relationship is torn down. 
1654 kqueue_kqfilter(__unused 
struct fileproc 
*fp
, struct knote 
*kn
, __unused vfs_context_t ctx
) 
1656         struct kqueue 
*kq 
= (struct kqueue 
*)kn
->kn_fp
->f_data
; 
1657         struct kqueue 
*parentkq 
= kn
->kn_kq
; 
1659         if (parentkq 
== kq 
|| 
1660             kn
->kn_filter 
!= EVFILT_READ
) 
1664          * We have to avoid creating a cycle when nesting kqueues 
1665          * inside another.  Rather than trying to walk the whole 
1666          * potential DAG of nested kqueues, we just use a simple 
1667          * ceiling protocol.  When a kqueue is inserted into another, 
1668          * we check that the (future) parent is not already nested 
1669          * into another kqueue at a lower level than the potenial 
1670          * child (because it could indicate a cycle).  If that test 
1671          * passes, we just mark the nesting levels accordingly. 
1675         if (parentkq
->kq_level 
> 0 &&  
1676             parentkq
->kq_level 
< kq
->kq_level
) 
1681                 /* set parent level appropriately */ 
1682                 if (parentkq
->kq_level 
== 0) 
1683                         parentkq
->kq_level 
= 2; 
1684                 if (parentkq
->kq_level 
< kq
->kq_level 
+ 1) 
1685                         parentkq
->kq_level 
= kq
->kq_level 
+ 1; 
1688                 kn
->kn_fop 
= &kqread_filtops
; 
1690                 KNOTE_ATTACH(&kq
->kq_sel
.si_note
, kn
); 
1691                 /* indicate nesting in child, if needed */ 
1692                 if (kq
->kq_level 
== 0) 
1701 kqueue_stat(struct fileproc 
*fp
, void *ub
, int isstat64
,  __unused vfs_context_t ctx
) 
1703         struct stat 
*sb 
= (struct stat 
*)0;     /* warning avoidance ; protected by isstat64 */ 
1704         struct stat64 
* sb64 
= (struct stat64 
*)0;  /* warning avoidance ; protected by isstat64 */ 
1706         struct kqueue 
*kq 
= (struct kqueue 
*)fp
->f_data
; 
1707         if (isstat64 
!= 0) { 
1708                 sb64 
= (struct stat64 
*)ub
; 
1709                 bzero((void *)sb64
, sizeof(*sb64
)); 
1710                 sb64
->st_size 
= kq
->kq_count
; 
1711                 sb64
->st_blksize 
= sizeof(struct kevent
); 
1712                 sb64
->st_mode 
= S_IFIFO
; 
1714                 sb 
= (struct stat 
*)ub
; 
1715                 bzero((void *)sb
, sizeof(*sb
)); 
1716                 sb
->st_size 
= kq
->kq_count
; 
1717                 sb
->st_blksize 
= sizeof(struct kevent
); 
1718                 sb
->st_mode 
= S_IFIFO
; 
1725  * Called with the kqueue locked 
1728 kqueue_wakeup(struct kqueue 
*kq
) 
1731         if (kq
->kq_state 
& KQ_SLEEP
) { 
1732                 kq
->kq_state 
&= ~KQ_SLEEP
; 
1735         if (kq
->kq_state 
& KQ_SEL
) { 
1736                 kq
->kq_state 
&= ~KQ_SEL
; 
1737                 selwakeup(&kq
->kq_sel
); 
1739         KNOTE(&kq
->kq_sel
.si_note
, 0); 
1743 klist_init(struct klist 
*list
) 
1750  * Query/Post each knote in the object's list 
1752  *      The object lock protects the list. It is assumed 
1753  *      that the filter/event routine for the object can 
1754  *      determine that the object is already locked (via 
1755  *      the hind) and not deadlock itself. 
1757  *      The object lock should also hold off pending 
1758  *      detach/drop operations.  But we'll prevent it here 
1759  *      too - just in case. 
1762 knote(struct klist 
*list
, long hint
) 
1766         SLIST_FOREACH(kn
, list
, kn_selnext
) { 
1767                 struct kqueue 
*kq 
= kn
->kn_kq
; 
1770                 if (kqlock2knoteuse(kq
, kn
)) { 
1773                         /* call the event with only a use count */ 
1774                         result 
= kn
->kn_fop
->f_event(kn
, hint
); 
1776                         /* if its not going away and triggered */ 
1777                         if (knoteuse2kqlock(kq
, kn
) && result
) 
1779                         /* lock held again */ 
1786  * attach a knote to the specified list.  Return true if this is the first entry. 
1787  * The list is protected by whatever lock the object it is associated with uses. 
1790 knote_attach(struct klist 
*list
, struct knote 
*kn
) 
1792         int ret 
= SLIST_EMPTY(list
); 
1793         SLIST_INSERT_HEAD(list
, kn
, kn_selnext
); 
1798  * detach a knote from the specified list.  Return true if that was the last entry. 
1799  * The list is protected by whatever lock the object it is associated with uses. 
1802 knote_detach(struct klist 
*list
, struct knote 
*kn
) 
1804         SLIST_REMOVE(list
, kn
, knote
, kn_selnext
); 
1805         return SLIST_EMPTY(list
); 
1809  * remove all knotes referencing a specified fd 
1811  * Essentially an inlined knote_remove & knote_drop 
1812  * when we know for sure that the thing is a file 
1814  * Entered with the proc_fd lock already held. 
1815  * It returns the same way, but may drop it temporarily. 
1818 knote_fdclose(struct proc 
*p
, int fd
) 
1820         struct filedesc 
*fdp 
= p
->p_fd
; 
1824         list 
= &fdp
->fd_knlist
[fd
]; 
1825         while ((kn 
= SLIST_FIRST(list
)) != NULL
) { 
1826                 struct kqueue 
*kq 
= kn
->kn_kq
; 
1829                         panic("knote_fdclose: proc mismatch (kq->kq_p=%p != p=%p)", kq
->kq_p
, p
); 
1835                  * Convert the lock to a drop ref. 
1836                  * If we get it, go ahead and drop it. 
1837                  * Otherwise, we waited for it to 
1838                  * be dropped by the other guy, so 
1839                  * it is safe to move on in the list. 
1841                 if (kqlock2knotedrop(kq
, kn
)) { 
1842                         kn
->kn_fop
->f_detach(kn
); 
1848                 /* the fd tables may have changed - start over */ 
1849                 list 
= &fdp
->fd_knlist
[fd
]; 
1853 /* proc_fdlock held on entry (and exit) */ 
1855 knote_fdpattach(struct knote 
*kn
, struct filedesc 
*fdp
, __unused 
struct proc 
*p
) 
1857         struct klist 
*list 
= NULL
; 
1859         if (! kn
->kn_fop
->f_isfd
) { 
1860                 if (fdp
->fd_knhashmask 
== 0) 
1861                         fdp
->fd_knhash 
= hashinit(CONFIG_KN_HASHSIZE
, M_KQUEUE
, 
1862                             &fdp
->fd_knhashmask
); 
1863                 list 
= &fdp
->fd_knhash
[KN_HASH(kn
->kn_id
, fdp
->fd_knhashmask
)]; 
1865                 if ((u_int
)fdp
->fd_knlistsize 
<= kn
->kn_id
) { 
1868                         /* have to grow the fd_knlist */ 
1869                         size 
= fdp
->fd_knlistsize
; 
1870                         while (size 
<= kn
->kn_id
) 
1872                         MALLOC(list
, struct klist 
*, 
1873                                size 
* sizeof(struct klist 
*), M_KQUEUE
, M_WAITOK
); 
1877                         bcopy((caddr_t
)fdp
->fd_knlist
, (caddr_t
)list
, 
1878                               fdp
->fd_knlistsize 
* sizeof(struct klist 
*)); 
1879                         bzero((caddr_t
)list 
+ 
1880                               fdp
->fd_knlistsize 
* sizeof(struct klist 
*), 
1881                               (size 
- fdp
->fd_knlistsize
) * sizeof(struct klist 
*)); 
1882                         FREE(fdp
->fd_knlist
, M_KQUEUE
); 
1883                         fdp
->fd_knlist 
= list
; 
1884                         fdp
->fd_knlistsize 
= size
; 
1886                 list 
= &fdp
->fd_knlist
[kn
->kn_id
]; 
1888         SLIST_INSERT_HEAD(list
, kn
, kn_link
); 
1895  * should be called at spl == 0, since we don't want to hold spl 
1896  * while calling fdrop and free. 
1899 knote_drop(struct knote 
*kn
, __unused 
struct proc 
*ctxp
) 
1901         struct kqueue 
*kq 
= kn
->kn_kq
; 
1902         struct proc 
*p 
= kq
->kq_p
; 
1903         struct filedesc 
*fdp 
= p
->p_fd
; 
1907         if (kn
->kn_fop
->f_isfd
) 
1908                 list 
= &fdp
->fd_knlist
[kn
->kn_id
]; 
1910                 list 
= &fdp
->fd_knhash
[KN_HASH(kn
->kn_id
, fdp
->fd_knhashmask
)]; 
1912         SLIST_REMOVE(list
, kn
, knote
, kn_link
); 
1915         if (kn
->kn_status 
& KN_DROPWAIT
) 
1916                 thread_wakeup(&kn
->kn_status
); 
1920         if (kn
->kn_fop
->f_isfd
) 
1921                 fp_drop(p
, kn
->kn_id
, kn
->kn_fp
, 0); 
1926 /* called with kqueue lock held */ 
1928 knote_activate(struct knote 
*kn
) 
1930         struct kqueue 
*kq 
= kn
->kn_kq
; 
1932         kn
->kn_status 
|= KN_ACTIVE
; 
1937 /* called with kqueue lock held */ 
1939 knote_deactivate(struct knote 
*kn
) 
1941         kn
->kn_status 
&= ~KN_ACTIVE
; 
1945 /* called with kqueue lock held */ 
1947 knote_enqueue(struct knote 
*kn
) 
1949         struct kqueue 
*kq 
= kn
->kn_kq
; 
1951         if ((kn
->kn_status 
& (KN_QUEUED 
| KN_DISABLED
)) == 0) { 
1952                 struct kqtailq 
*tq 
= kn
->kn_tq
; 
1954                 TAILQ_INSERT_TAIL(tq
, kn
, kn_tqe
);  
1955                 kn
->kn_status 
|= KN_QUEUED
; 
1960 /* called with kqueue lock held */ 
1962 knote_dequeue(struct knote 
*kn
) 
1964         struct kqueue 
*kq 
= kn
->kn_kq
; 
1966         //assert((kn->kn_status & KN_DISABLED) == 0); 
1967         if ((kn
->kn_status 
& KN_QUEUED
) == KN_QUEUED
) { 
1968                 struct kqtailq 
*tq 
= kn
->kn_tq
; 
1970                 TAILQ_REMOVE(tq
, kn
, kn_tqe
);  
1971                 kn
->kn_tq 
= &kq
->kq_head
; 
1972                 kn
->kn_status 
&= ~KN_QUEUED
; 
1980         knote_zone 
= zinit(sizeof(struct knote
), 8192*sizeof(struct knote
), 8192, "knote zone"); 
1982         /* allocate kq lock group attribute and group */ 
1983         kq_lck_grp_attr
= lck_grp_attr_alloc_init(); 
1985         kq_lck_grp 
= lck_grp_alloc_init("kqueue",  kq_lck_grp_attr
); 
1987         /* Allocate kq lock attribute */ 
1988         kq_lck_attr 
= lck_attr_alloc_init(); 
1990         /* Initialize the timer filter lock */ 
1991         lck_mtx_init(&_filt_timerlock
, kq_lck_grp
, kq_lck_attr
); 
1993 SYSINIT(knote
, SI_SUB_PSEUDO
, SI_ORDER_ANY
, knote_init
, NULL
) 
1995 static struct knote 
* 
1998         return ((struct knote 
*)zalloc(knote_zone
)); 
2002 knote_free(struct knote 
*kn
) 
2004         zfree(knote_zone
, kn
); 
2008 #include <sys/param.h> 
2009 #include <sys/socket.h> 
2010 #include <sys/protosw.h> 
2011 #include <sys/domain.h> 
2012 #include <sys/mbuf.h> 
2013 #include <sys/kern_event.h> 
2014 #include <sys/malloc.h> 
2015 #include <sys/sys_domain.h> 
2016 #include <sys/syslog.h> 
2019 static int kev_attach(struct socket 
*so
, int proto
, struct proc 
*p
); 
2020 static int kev_detach(struct socket 
*so
); 
2021 static int kev_control(struct socket 
*so
, u_long cmd
, caddr_t data
, struct ifnet 
*ifp
, struct proc 
*p
); 
2023 struct pr_usrreqs event_usrreqs 
= { 
2024      pru_abort_notsupp
, pru_accept_notsupp
, kev_attach
, pru_bind_notsupp
, pru_connect_notsupp
, 
2025      pru_connect2_notsupp
, kev_control
, kev_detach
, pru_disconnect_notsupp
, 
2026      pru_listen_notsupp
, pru_peeraddr_notsupp
, pru_rcvd_notsupp
, pru_rcvoob_notsupp
, 
2027      pru_send_notsupp
, pru_sense_null
, pru_shutdown_notsupp
, pru_sockaddr_notsupp
, 
2028      pru_sosend_notsupp
, soreceive
, pru_sopoll_notsupp
 
2031 struct protosw eventsw
[] = { 
2033           SOCK_RAW
,             &systemdomain
,  SYSPROTO_EVENT
,         PR_ATOMIC
, 
2049 struct kern_event_head kern_event_head
; 
2051 static u_long static_event_id 
= 0; 
2052 struct domain 
*sysdom 
= &systemdomain
; 
2053 static lck_mtx_t 
*sys_mtx
; 
2056  * Install the protosw's for the NKE manager.  Invoked at 
2057  *  extension load time 
2060 kern_event_init(void) 
2064     if ((retval 
= net_add_proto(eventsw
, &systemdomain
)) != 0) { 
2065             log(LOG_WARNING
, "Can't install kernel events protocol (%d)\n", retval
); 
2070      * Use the domain mutex for all system event sockets 
2072     sys_mtx 
= sysdom
->dom_mtx
; 
2074     return(KERN_SUCCESS
); 
2078 kev_attach(struct socket 
*so
, __unused 
int proto
, __unused 
struct proc 
*p
) 
2081      struct kern_event_pcb  
*ev_pcb
; 
2083      error 
= soreserve(so
, KEV_SNDSPACE
, KEV_RECVSPACE
); 
2087      MALLOC(ev_pcb
, struct kern_event_pcb 
*, sizeof(struct kern_event_pcb
), M_PCB
, M_WAITOK
); 
2091      ev_pcb
->ev_socket 
= so
; 
2092      ev_pcb
->vendor_code_filter 
= 0xffffffff; 
2094      so
->so_pcb 
= (caddr_t
) ev_pcb
; 
2095      lck_mtx_lock(sys_mtx
); 
2096      LIST_INSERT_HEAD(&kern_event_head
, ev_pcb
, ev_link
); 
2097      lck_mtx_unlock(sys_mtx
); 
2104 kev_detach(struct socket 
*so
) 
2106      struct kern_event_pcb 
*ev_pcb 
= (struct kern_event_pcb 
*) so
->so_pcb
; 
2109                 LIST_REMOVE(ev_pcb
, ev_link
); 
2110                 FREE(ev_pcb
, M_PCB
); 
2112                 so
->so_flags 
|= SOF_PCBCLEARING
; 
2119  * For now, kev_vendor_code and mbuf_tags use the same 
2123 errno_t 
kev_vendor_code_find( 
2125         u_int32_t       
*out_vendor_code
) 
2127         if (strlen(string
) >= KEV_VENDOR_CODE_MAX_STR_LEN
) { 
2130         return mbuf_tag_id_find_internal(string
, out_vendor_code
, 1); 
2133 errno_t  
kev_msg_post(struct kev_msg 
*event_msg
) 
2135         mbuf_tag_id_t   min_vendor
, max_vendor
; 
2137         mbuf_tag_id_first_last(&min_vendor
, &max_vendor
); 
2139         if (event_msg 
== NULL
) 
2142         /* Limit third parties to posting events for registered vendor codes only */ 
2143         if (event_msg
->vendor_code 
< min_vendor 
|| 
2144                 event_msg
->vendor_code 
> max_vendor
) 
2149         return kev_post_msg(event_msg
); 
2153 int  kev_post_msg(struct kev_msg 
*event_msg
) 
2155      struct mbuf 
*m
, *m2
; 
2156      struct kern_event_pcb  
*ev_pcb
; 
2157      struct kern_event_msg  
*ev
; 
2159      unsigned long     total_size
; 
2162         /* Verify the message is small enough to fit in one mbuf w/o cluster */ 
2163         total_size 
= KEV_MSG_HEADER_SIZE
; 
2165         for (i 
= 0; i 
< 5; i
++) { 
2166                 if (event_msg
->dv
[i
].data_length 
== 0) 
2168                 total_size 
+= event_msg
->dv
[i
].data_length
; 
2171         if (total_size 
> MLEN
) { 
2175      m 
= m_get(M_DONTWAIT
, MT_DATA
); 
2179      ev 
= mtod(m
, struct kern_event_msg 
*); 
2180      total_size 
= KEV_MSG_HEADER_SIZE
; 
2182      tmp 
= (char *) &ev
->event_data
[0]; 
2183      for (i 
= 0; i 
< 5; i
++) { 
2184           if (event_msg
->dv
[i
].data_length 
== 0) 
2187           total_size 
+= event_msg
->dv
[i
].data_length
; 
2188           bcopy(event_msg
->dv
[i
].data_ptr
, tmp
,  
2189                 event_msg
->dv
[i
].data_length
); 
2190           tmp 
+= event_msg
->dv
[i
].data_length
; 
2193      ev
->id 
= ++static_event_id
; 
2194      ev
->total_size   
= total_size
; 
2195      ev
->vendor_code  
= event_msg
->vendor_code
; 
2196      ev
->kev_class    
= event_msg
->kev_class
; 
2197      ev
->kev_subclass 
= event_msg
->kev_subclass
; 
2198      ev
->event_code   
= event_msg
->event_code
; 
2200      m
->m_len 
= total_size
; 
2201      lck_mtx_lock(sys_mtx
); 
2202      for (ev_pcb 
= LIST_FIRST(&kern_event_head
);  
2204           ev_pcb 
= LIST_NEXT(ev_pcb
, ev_link
)) { 
2206           if (ev_pcb
->vendor_code_filter 
!= KEV_ANY_VENDOR
) { 
2207                if (ev_pcb
->vendor_code_filter 
!= ev
->vendor_code
) 
2210                if (ev_pcb
->class_filter 
!= KEV_ANY_CLASS
) { 
2211                     if (ev_pcb
->class_filter 
!= ev
->kev_class
) 
2214                     if ((ev_pcb
->subclass_filter 
!= KEV_ANY_SUBCLASS
) && 
2215                         (ev_pcb
->subclass_filter 
!= ev
->kev_subclass
)) 
2220           m2 
= m_copym(m
, 0, m
->m_len
, M_NOWAIT
); 
2223                    lck_mtx_unlock(sys_mtx
); 
2226           /* the socket is already locked because we hold the sys_mtx here */ 
2227           if (sbappendrecord(&ev_pcb
->ev_socket
->so_rcv
, m2
)) 
2228                   sorwakeup(ev_pcb
->ev_socket
); 
2232      lck_mtx_unlock(sys_mtx
); 
2237 kev_control(struct socket 
*so
,  
2240                         __unused 
struct ifnet 
*ifp
,  
2241                         __unused 
struct proc 
*p
) 
2243         struct kev_request 
*kev_req 
= (struct kev_request 
*) data
; 
2244         struct kern_event_pcb  
*ev_pcb
; 
2245         struct kev_vendor_code 
*kev_vendor
; 
2246         u_long  
*id_value 
= (u_long 
*) data
; 
2252                         *id_value 
= static_event_id
; 
2256                         ev_pcb 
= (struct kern_event_pcb 
*) so
->so_pcb
; 
2257                         ev_pcb
->vendor_code_filter 
= kev_req
->vendor_code
; 
2258                         ev_pcb
->class_filter     
= kev_req
->kev_class
; 
2259                         ev_pcb
->subclass_filter  
= kev_req
->kev_subclass
; 
2263                         ev_pcb 
= (struct kern_event_pcb 
*) so
->so_pcb
; 
2264                         kev_req
->vendor_code 
= ev_pcb
->vendor_code_filter
; 
2265                         kev_req
->kev_class   
= ev_pcb
->class_filter
; 
2266                         kev_req
->kev_subclass 
= ev_pcb
->subclass_filter
; 
2269                 case SIOCGKEVVENDOR
: 
2270                         kev_vendor 
= (struct kev_vendor_code
*)data
; 
2272                         /* Make sure string is NULL terminated */ 
2273                         kev_vendor
->vendor_string
[KEV_VENDOR_CODE_MAX_STR_LEN
-1] = 0; 
2275                         return mbuf_tag_id_find_internal(kev_vendor
->vendor_string
, 
2276                                                                                          &kev_vendor
->vendor_code
, 0); 
2285 #endif /* SOCKETS */ 
2289 fill_kqueueinfo(struct kqueue 
*kq
, struct kqueue_info 
* kinfo
) 
2291         struct vinfo_stat 
* st
; 
2293         /* No need for the funnel as fd is kept alive */ 
2295         st 
= &kinfo
->kq_stat
; 
2297         st
->vst_size 
= kq
->kq_count
; 
2298         st
->vst_blksize 
= sizeof(struct kevent
); 
2299         st
->vst_mode 
= S_IFIFO
; 
2300         if (kq
->kq_state 
& KQ_SEL
) 
2301                 kinfo
->kq_state 
|=  PROC_KQUEUE_SELECT
; 
2302         if (kq
->kq_state 
& KQ_SLEEP
) 
2303                 kinfo
->kq_state 
|= PROC_KQUEUE_SLEEP
;