2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. 
   4  * @APPLE_LICENSE_HEADER_START@ 
   6  * The contents of this file constitute Original Code as defined in and 
   7  * are subject to the Apple Public Source License Version 1.1 (the 
   8  * "License").  You may not use this file except in compliance with the 
   9  * License.  Please obtain a copy of the License at 
  10  * http://www.apple.com/publicsource and read it before using this file. 
  12  * This Original Code and all software distributed under the License are 
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the 
  17  * License for the specific language governing rights and limitations 
  20  * @APPLE_LICENSE_HEADER_END@ 
  24  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 
  25  * All rights reserved. 
  27  * Redistribution and use in source and binary forms, with or without 
  28  * modification, are permitted provided that the following conditions 
  30  * 1. Redistributions of source code must retain the above copyright 
  31  *    notice, this list of conditions and the following disclaimer. 
  32  * 2. Redistributions in binary form must reproduce the above copyright 
  33  *    notice, this list of conditions and the following disclaimer in the 
  34  *    documentation and/or other materials provided with the distribution. 
  36  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 
  37  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  38  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  39  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 
  40  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  41  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  42  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  44  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  45  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  49  *      @(#)kern_event.c       1.0 (3/31/2000) 
  53 #include <sys/param.h> 
  54 #include <sys/systm.h> 
  55 #include <sys/filedesc.h> 
  56 #include <sys/kernel.h> 
  57 #include <sys/proc_internal.h> 
  58 #include <sys/kauth.h> 
  59 #include <sys/malloc.h>  
  60 #include <sys/unistd.h> 
  61 #include <sys/file_internal.h> 
  62 #include <sys/fcntl.h> 
  63 #include <sys/select.h> 
  64 #include <sys/queue.h> 
  65 #include <sys/event.h> 
  66 #include <sys/eventvar.h> 
  67 #include <sys/protosw.h> 
  68 #include <sys/socket.h> 
  69 #include <sys/socketvar.h> 
  71 #include <sys/sysctl.h> 
  73 #include <sys/sysproto.h> 
  77 #include <kern/lock.h> 
  78 #include <kern/clock.h> 
  79 #include <kern/thread_call.h> 
  80 #include <kern/sched_prim.h> 
  81 #include <kern/zalloc.h> 
  82 #include <kern/assert.h> 
  84 #include <libkern/libkern.h> 
  86 extern void unix_syscall_return(int); 
  88 MALLOC_DEFINE(M_KQUEUE
, "kqueue", "memory for kqueue system"); 
  90 static inline void kqlock(struct kqueue 
*kq
); 
  91 static inline void kqunlock(struct kqueue 
*kq
); 
  93 static int      kqlock2knoteuse(struct kqueue 
*kq
, struct knote 
*kn
); 
  94 static int      kqlock2knoteusewait(struct kqueue 
*kq
, struct knote 
*kn
); 
  95 static int      kqlock2knotedrop(struct kqueue 
*kq
, struct knote 
*kn
); 
  96 static int      knoteuse2kqlock(struct kqueue 
*kq
, struct knote 
*kn
); 
  98 static void     kqueue_wakeup(struct kqueue 
*kq
); 
  99 static int      kqueue_read(struct fileproc 
*fp
, struct uio 
*uio
, 
 100                     kauth_cred_t cred
, int flags
, struct proc 
*p
); 
 101 static int      kqueue_write(struct fileproc 
*fp
, struct uio 
*uio
, 
 102                     kauth_cred_t cred
, int flags
, struct proc 
*p
); 
 103 static int      kqueue_ioctl(struct fileproc 
*fp
, u_long com
, caddr_t data
, 
 105 static int      kqueue_select(struct fileproc 
*fp
, int which
, void *wql
,  
 107 static int      kqueue_close(struct fileglob 
*fp
, struct proc 
*p
); 
 108 static int      kqueue_kqfilter(struct fileproc 
*fp
, struct knote 
*kn
, struct proc 
*p
); 
 109 extern int      kqueue_stat(struct fileproc 
*fp
, struct stat 
*st
, struct proc 
*p
); 
 111 static struct fileops kqueueops 
= { 
 121 static int kevent_copyin(user_addr_t 
*addrp
, struct kevent 
*kevp
, struct proc 
*p
); 
 122 static int kevent_copyout(struct kevent 
*kevp
, user_addr_t 
*addrp
, struct proc 
*p
); 
 124 static int      kevent_callback(struct kqueue 
*kq
, struct kevent 
*kevp
, void *data
); 
 125 static void     kevent_continue(struct kqueue 
*kq
, void *data
, int error
); 
 126 static void     kevent_scan_continue(void *contp
, wait_result_t wait_result
); 
 127 static int      kevent_process(struct kqueue 
*kq
, kevent_callback_t callback
, 
 128                                void *data
, int *countp
, struct proc 
*p
); 
 129 static void     knote_put(struct knote 
*kn
); 
 130 static int      knote_fdpattach(struct knote 
*kn
, struct filedesc 
*fdp
, struct proc 
*p
); 
 131 static void     knote_drop(struct knote 
*kn
, struct proc 
*p
); 
 132 static void     knote_activate(struct knote 
*kn
); 
 133 static void     knote_deactivate(struct knote 
*kn
); 
 134 static void     knote_enqueue(struct knote 
*kn
); 
 135 static void     knote_dequeue(struct knote 
*kn
); 
 136 static struct   knote 
*knote_alloc(void); 
 137 static void     knote_free(struct knote 
*kn
); 
 138 extern void     knote_init(void); 
 140 static int      filt_fileattach(struct knote 
*kn
); 
 141 static struct filterops file_filtops 
= 
 142         { 1, filt_fileattach
, NULL
, NULL 
}; 
 144 static void     filt_kqdetach(struct knote 
*kn
); 
 145 static int      filt_kqueue(struct knote 
*kn
, long hint
); 
 146 static struct filterops kqread_filtops 
= 
 147         { 1, NULL
, filt_kqdetach
, filt_kqueue 
}; 
 150  * placeholder for not-yet-implemented filters 
 152 static int      filt_badattach(struct knote 
*kn
); 
 153 static struct filterops bad_filtops 
= 
 154         { 0, filt_badattach
, 0 , 0 }; 
 156 static int      filt_procattach(struct knote 
*kn
); 
 157 static void     filt_procdetach(struct knote 
*kn
); 
 158 static int      filt_proc(struct knote 
*kn
, long hint
); 
 160 static struct filterops proc_filtops 
= 
 161         { 0, filt_procattach
, filt_procdetach
, filt_proc 
}; 
 163 extern struct filterops fs_filtops
; 
 165 extern struct filterops sig_filtops
; 
 169 static int      filt_timercompute(struct knote 
*kn
, uint64_t *abs_time
); 
 170 static void     filt_timerexpire(void *knx
, void *param1
); 
 171 static int      filt_timerattach(struct knote 
*kn
); 
 172 static void     filt_timerdetach(struct knote 
*kn
); 
 173 static int      filt_timer(struct knote 
*kn
, long hint
); 
 175 static struct filterops timer_filtops 
= 
 176         { 0, filt_timerattach
, filt_timerdetach
, filt_timer 
}; 
 178 /* to avoid arming timers that fire quicker than we can handle */ 
 179 static uint64_t filt_timerfloor 
= 0;  
 181 static lck_mtx_t _filt_timerlock
; 
 182 static void     filt_timerlock(void); 
 183 static void     filt_timerunlock(void); 
 186  * Sentinel marker for a thread scanning through the list of 
 189 static struct filterops threadmarker_filtops 
= 
 190         { 0, filt_badattach
, 0, 0 }; 
 192 static zone_t   knote_zone
; 
 194 #define KN_HASHSIZE             64              /* XXX should be tunable */ 
 195 #define KN_HASH(val, mask)      (((val) ^ (val >> 8)) & (mask)) 
 198 extern struct filterops aio_filtops
; 
 202  * Table for for all system-defined filters. 
 204 static struct filterops 
*sysfilt_ops
[] = { 
 205         &file_filtops
,                  /* EVFILT_READ */ 
 206         &file_filtops
,                  /* EVFILT_WRITE */ 
 208         &aio_filtops
,                   /* EVFILT_AIO */ 
 210         &bad_filtops
,                   /* EVFILT_AIO */ 
 212         &file_filtops
,                  /* EVFILT_VNODE */ 
 213         &proc_filtops
,                  /* EVFILT_PROC */ 
 214         &sig_filtops
,                   /* EVFILT_SIGNAL */ 
 215         &timer_filtops
,                 /* EVFILT_TIMER */ 
 216         &bad_filtops
,                   /* EVFILT_MACHPORT */ 
 217         &fs_filtops                     
/* EVFILT_FS */ 
 221  * kqueue/note lock attributes and implementations 
 223  *      kqueues have locks, while knotes have use counts 
 224  *      Most of the knote state is guarded by the object lock. 
 225  *      the knote "inuse" count and status use the kqueue lock. 
 227 lck_grp_attr_t 
* kq_lck_grp_attr
; 
 228 lck_grp_t 
* kq_lck_grp
; 
 229 lck_attr_t 
* kq_lck_attr
; 
 232 kqlock(struct kqueue 
*kq
) 
 234         lck_spin_lock(&kq
->kq_lock
); 
 238 kqunlock(struct kqueue 
*kq
) 
 240         lck_spin_unlock(&kq
->kq_lock
); 
 244  * Convert a kq lock to a knote use referece. 
 246  *      If the knote is being dropped, we can't get 
 247  *      a use reference, so just return with it 
 250  *      - kq locked at entry 
 251  *      - unlock on exit if we get the use reference 
 254 kqlock2knoteuse(struct kqueue 
*kq
, struct knote 
*kn
) 
 256         if (kn
->kn_status 
& KN_DROPPING
) 
 264  * Convert a kq lock to a knote use referece. 
 266  *      If the knote is being dropped, we can't get 
 267  *      a use reference, so just return with it 
 270  *      - kq locked at entry 
 271  *      - kq always unlocked on exit 
 274 kqlock2knoteusewait(struct kqueue 
*kq
, struct knote 
*kn
) 
 276         if (!kqlock2knoteuse(kq
, kn
)) { 
 277                 kn
->kn_status 
|= KN_DROPWAIT
; 
 278                 assert_wait(&kn
->kn_status
, THREAD_UNINT
); 
 280                 thread_block(THREAD_CONTINUE_NULL
); 
 287  * Convert from a knote use reference back to kq lock. 
 289  *      Drop a use reference and wake any waiters if 
 290  *      this is the last one. 
 292  *      The exit return indicates if the knote is 
 293  *      still alive - but the kqueue lock is taken 
 297 knoteuse2kqlock(struct kqueue 
*kq
, struct knote 
*kn
) 
 300         if ((--kn
->kn_inuse 
== 0) && 
 301             (kn
->kn_status 
& KN_USEWAIT
)) { 
 302                 kn
->kn_status 
&= ~KN_USEWAIT
; 
 303                 thread_wakeup(&kn
->kn_inuse
); 
 305         return ((kn
->kn_status 
& KN_DROPPING
) == 0); 
 309  * Convert a kq lock to a knote drop referece. 
 311  *      If the knote is in use, wait for the use count 
 312  *      to subside.  We first mark our intention to drop 
 313  *      it - keeping other users from "piling on." 
 314  *      If we are too late, we have to wait for the 
 315  *      other drop to complete. 
 317  *      - kq locked at entry 
 318  *      - always unlocked on exit. 
 319  *      - caller can't hold any locks that would prevent 
 320  *        the other dropper from completing. 
 323 kqlock2knotedrop(struct kqueue 
*kq
, struct knote 
*kn
) 
 326         if ((kn
->kn_status 
& KN_DROPPING
) == 0) { 
 327                 kn
->kn_status 
|= KN_DROPPING
; 
 328                 if (kn
->kn_inuse 
> 0) { 
 329                         kn
->kn_status 
|= KN_USEWAIT
; 
 330                         assert_wait(&kn
->kn_inuse
, THREAD_UNINT
); 
 332                         thread_block(THREAD_CONTINUE_NULL
); 
 337                 kn
->kn_status 
|= KN_DROPWAIT
; 
 338                 assert_wait(&kn
->kn_status
, THREAD_UNINT
); 
 340                 thread_block(THREAD_CONTINUE_NULL
); 
 346  * Release a knote use count reference. 
 349 knote_put(struct knote 
*kn
) 
 351         struct kqueue 
*kq 
= kn
->kn_kq
; 
 354         if ((--kn
->kn_inuse 
== 0) &&  
 355             (kn
->kn_status 
& KN_USEWAIT
)) { 
 356                 kn
->kn_status 
&= ~KN_USEWAIT
; 
 357                 thread_wakeup(&kn
->kn_inuse
); 
 365 filt_fileattach(struct knote 
*kn
) 
 368         return (fo_kqfilter(kn
->kn_fp
, kn
, current_proc())); 
 371 #define f_flag f_fglob->fg_flag 
 372 #define f_type f_fglob->fg_type 
 373 #define f_msgcount f_fglob->fg_msgcount 
 374 #define f_cred f_fglob->fg_cred 
 375 #define f_ops f_fglob->fg_ops 
 376 #define f_offset f_fglob->fg_offset 
 377 #define f_data f_fglob->fg_data 
 380 filt_kqdetach(struct knote 
*kn
) 
 382         struct kqueue 
*kq 
= (struct kqueue 
*)kn
->kn_fp
->f_data
; 
 385         KNOTE_DETACH(&kq
->kq_sel
.si_note
, kn
); 
 391 filt_kqueue(struct knote 
*kn
, __unused 
long hint
) 
 393         struct kqueue 
*kq 
= (struct kqueue 
*)kn
->kn_fp
->f_data
; 
 395         kn
->kn_data 
= kq
->kq_count
; 
 396         return (kn
->kn_data 
> 0); 
 400 filt_procattach(struct knote 
*kn
) 
 405         funnel_state 
= thread_funnel_set(kernel_flock
, TRUE
); 
 407         if ((kn
->kn_sfflags 
& (NOTE_TRACK 
| NOTE_TRACKERR 
| NOTE_CHILD
)) != 0) { 
 408                 thread_funnel_set(kernel_flock
, funnel_state
); 
 412         p 
= pfind(kn
->kn_id
); 
 414                 thread_funnel_set(kernel_flock
, funnel_state
); 
 418         kn
->kn_flags 
|= EV_CLEAR
;               /* automatically set */ 
 420         /* XXX lock the proc here while adding to the list? */ 
 421         KNOTE_ATTACH(&p
->p_klist
, kn
); 
 423         thread_funnel_set(kernel_flock
, funnel_state
); 
 429  * The knote may be attached to a different process, which may exit, 
 430  * leaving nothing for the knote to be attached to.  So when the process 
 431  * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 
 432  * it will be deleted when read out.  However, as part of the knote deletion, 
 433  * this routine is called, so a check is needed to avoid actually performing 
 434  * a detach, because the original process does not exist any more. 
 437 filt_procdetach(struct knote 
*kn
) 
 442         funnel_state 
= thread_funnel_set(kernel_flock
, TRUE
); 
 443         p 
= pfind(kn
->kn_id
); 
 445         if (p 
!= (struct proc 
*)NULL
) 
 446                 KNOTE_DETACH(&p
->p_klist
, kn
); 
 448         thread_funnel_set(kernel_flock
, funnel_state
); 
 452 filt_proc(struct knote 
*kn
, long hint
) 
 457         funnel_state 
= thread_funnel_set(kernel_flock
, TRUE
); 
 460          * mask off extra data 
 462         event 
= (u_int
)hint 
& NOTE_PCTRLMASK
; 
 465          * if the user is interested in this event, record it. 
 467         if (kn
->kn_sfflags 
& event
) 
 468                 kn
->kn_fflags 
|= event
; 
 471          * process is gone, so flag the event as finished. 
 473         if (event 
== NOTE_EXIT
) { 
 474                 kn
->kn_flags 
|= (EV_EOF 
| EV_ONESHOT
);  
 475                 thread_funnel_set(kernel_flock
, funnel_state
); 
 479         event 
= kn
->kn_fflags
; 
 480         thread_funnel_set(kernel_flock
, funnel_state
); 
 486  * filt_timercompute - compute absolute timeout 
 488  *      The saved-data field in the knote contains the 
 489  *      time value.  The saved filter-flags indicates 
 490  *      the unit of measurement. 
 492  *      If the timeout is not absolute, adjust it for 
 496 filt_timercompute(struct knote 
*kn
, uint64_t *abs_time
) 
 501         switch (kn
->kn_sfflags 
& (NOTE_SECONDS
|NOTE_USECONDS
|NOTE_NSECONDS
)) { 
 503                 multiplier 
= NSEC_PER_SEC
; 
 506                 multiplier 
= NSEC_PER_USEC
; 
 511         case 0: /* milliseconds (default) */ 
 512                 multiplier 
= NSEC_PER_SEC 
/ 1000; 
 517         nanoseconds_to_absolutetime((uint64_t)kn
->kn_sdata 
* multiplier
, &raw
); 
 518         if (raw 
<= filt_timerfloor
) { 
 522         if ((kn
->kn_sfflags 
& NOTE_ABSOLUTE
) == NOTE_ABSOLUTE
) { 
 523                 uint32_t seconds
, nanoseconds
; 
 526                 clock_get_calendar_nanotime(&seconds
, &nanoseconds
); 
 527                 nanoseconds_to_absolutetime((uint64_t)seconds 
* NSEC_PER_SEC 
+ nanoseconds
, 
 529                 if (now 
>= raw 
+ filt_timerfloor
) { 
 535         clock_absolutetime_interval_to_deadline(raw
, abs_time
); 
 540  * filt_timerexpire - the timer callout routine 
 542  *      Just propagate the timer event into the knote 
 543  *      filter routine (by going through the knote 
 544  *      synchronization point).  Pass a hint to 
 545  *      indicate this is a real event, not just a 
 549 filt_timerexpire(void *knx
, __unused 
void *spare
) 
 551         struct klist timer_list
; 
 552         struct knote 
*kn 
= knx
; 
 554         /* no "object" for timers, so fake a list */ 
 555         SLIST_INIT(&timer_list
); 
 556         SLIST_INSERT_HEAD(&timer_list
, kn
, kn_selnext
);  
 557         KNOTE(&timer_list
, 1); 
 561  * data contains amount of time to sleep, in milliseconds, 
 562  * or a pointer to a timespec structure. 
 565 filt_timerattach(struct knote 
*kn
) 
 567         thread_call_t callout
; 
 571         error 
= filt_timercompute(kn
, &deadline
); 
 576                 callout 
= thread_call_allocate(filt_timerexpire
, kn
); 
 580                 /* handle as immediate */ 
 586         kn
->kn_hook 
= (caddr_t
)callout
; 
 588         /* absolute=EV_ONESHOT */ 
 589         if (kn
->kn_sfflags 
& NOTE_ABSOLUTE
) 
 590                 kn
->kn_flags 
|= EV_ONESHOT
;  
 593                 /* all others - if not faking immediate */ 
 594                 kn
->kn_flags 
|= EV_CLEAR
; 
 595                 thread_call_enter_delayed(callout
, deadline
); 
 606 filt_timerdetach(struct knote 
*kn
) 
 608         thread_call_t callout
; 
 611         callout 
= (thread_call_t
)kn
->kn_hook
; 
 612         if (callout 
!= NULL
) { 
 615                 /* cancel the callout if we can */ 
 616                 cancelled 
= thread_call_cancel(callout
); 
 618                         /* got it, just free it */ 
 621                         thread_call_free(callout
); 
 624                 /* we have to wait for the expire routine.  */ 
 625                 kn
->kn_hookid 
= -1;     /* we are detaching */ 
 626                 assert_wait(&kn
->kn_hook
, THREAD_UNINT
); 
 628                 thread_block(THREAD_CONTINUE_NULL
); 
 629                 assert(kn
->kn_hook 
== NULL
); 
 639 filt_timer(struct knote 
*kn
, __unused 
long hint
) 
 645                 thread_call_t callout
; 
 652                 detaching 
= (kn
->kn_hookid 
< 0); 
 653                 callout 
= (thread_call_t
)kn
->kn_hook
; 
 655                 if (!detaching 
&& (kn
->kn_flags 
& EV_ONESHOT
) == 0) { 
 659                         /* user input data may have changed - deal */ 
 660                         error 
= filt_timercompute(kn
, &deadline
); 
 662                                 kn
->kn_flags 
|= EV_ERROR
; 
 664                         } else if (deadline 
== 0) { 
 665                                 /* revert to fake immediate */ 
 666                                 kn
->kn_flags 
&= ~EV_CLEAR
; 
 670                                 /* keep the callout and re-arm */ 
 671                                 thread_call_enter_delayed(callout
, deadline
); 
 678                 thread_call_free(callout
); 
 680                 /* if someone is waiting for timer to pop */ 
 682                         thread_wakeup(&kn
->kn_hook
); 
 690         /* change fake timer to real if needed */ 
 691         while (kn
->kn_hookid 
> 0 && kn
->kn_sdata 
> 0) { 
 694                 /* update the fake timer (make real) */ 
 698                 error 
= filt_timerattach(kn
); 
 701                         kn
->kn_flags 
|= EV_ERROR
; 
 708         /* if still fake, pretend it fired */ 
 709         if (kn
->kn_hookid 
> 0) 
 712         result 
= (kn
->kn_data 
!= 0); 
 720         lck_mtx_lock(&_filt_timerlock
); 
 724 filt_timerunlock(void) 
 726         lck_mtx_unlock(&_filt_timerlock
); 
 730  * JMM - placeholder for not-yet-implemented filters 
 733 filt_badattach(__unused 
struct knote 
*kn
) 
 740 kqueue_alloc(struct proc 
*p
) 
 742         struct filedesc 
*fdp 
= p
->p_fd
; 
 745         MALLOC_ZONE(kq
, struct kqueue 
*, sizeof(struct kqueue
), M_KQUEUE
, M_WAITOK
); 
 747                 bzero(kq
, sizeof(struct kqueue
)); 
 748                 lck_spin_init(&kq
->kq_lock
, kq_lck_grp
, kq_lck_attr
); 
 749                 TAILQ_INIT(&kq
->kq_head
); 
 750                 TAILQ_INIT(&kq
->kq_inprocess
); 
 754         if (fdp
->fd_knlistsize 
< 0) { 
 756                 if (fdp
->fd_knlistsize 
< 0) 
 757                         fdp
->fd_knlistsize 
= 0;         /* this process has had a kq */ 
 766  * kqueue_dealloc - detach all knotes from a kqueue and free it 
 768  *      We walk each list looking for knotes referencing this 
 769  *      this kqueue.  If we find one, we try to drop it.  But 
 770  *      if we fail to get a drop reference, that will wait 
 771  *      until it is dropped.  So, we can just restart again 
 772  *      safe in the assumption that the list will eventually 
 773  *      not contain any more references to this kqueue (either 
 774  *      we dropped them all, or someone else did). 
 776  *      Assumes no new events are being added to the kqueue. 
 777  *      Nothing locked on entry or exit. 
 780 kqueue_dealloc(struct kqueue 
*kq
, struct proc 
*p
) 
 782         struct filedesc 
*fdp 
= p
->p_fd
; 
 787         for (i 
= 0; i 
< fdp
->fd_knlistsize
; i
++) { 
 788                 kn 
= SLIST_FIRST(&fdp
->fd_knlist
[i
]); 
 790                         if (kq 
== kn
->kn_kq
) { 
 793                                 /* drop it ourselves or wait */ 
 794                                 if (kqlock2knotedrop(kq
, kn
)) { 
 795                                         kn
->kn_fop
->f_detach(kn
); 
 799                                 /* start over at beginning of list */ 
 800                                 kn 
= SLIST_FIRST(&fdp
->fd_knlist
[i
]); 
 803                         kn 
= SLIST_NEXT(kn
, kn_link
); 
 806         if (fdp
->fd_knhashmask 
!= 0) { 
 807                 for (i 
= 0; i 
< (int)fdp
->fd_knhashmask 
+ 1; i
++) { 
 808                         kn 
= SLIST_FIRST(&fdp
->fd_knhash
[i
]); 
 810                                 if (kq 
== kn
->kn_kq
) { 
 813                                         /* drop it ourselves or wait */ 
 814                                         if (kqlock2knotedrop(kq
, kn
)) { 
 815                                                 kn
->kn_fop
->f_detach(kn
); 
 819                                         /* start over at beginning of list */ 
 820                                         kn 
= SLIST_FIRST(&fdp
->fd_knhash
[i
]); 
 823                                 kn 
= SLIST_NEXT(kn
, kn_link
); 
 828         lck_spin_destroy(&kq
->kq_lock
, kq_lck_grp
); 
 829         FREE_ZONE(kq
, sizeof(struct kqueue
), M_KQUEUE
); 
 833 kqueue(struct proc 
*p
, __unused 
struct kqueue_args 
*uap
, register_t 
*retval
) 
 839         error 
= falloc(p
, &fp
, &fd
); 
 844         kq 
= kqueue_alloc(p
); 
 850         fp
->f_flag 
= FREAD 
| FWRITE
; 
 851         fp
->f_type 
= DTYPE_KQUEUE
; 
 852         fp
->f_ops 
= &kqueueops
; 
 853         fp
->f_data 
= (caddr_t
)kq
; 
 856         procfdtbl_releasefd(p
, fd
, NULL
); 
 857         fp_drop(p
, fd
, fp
, 1); 
 865 kqueue_portset_np(__unused 
struct proc 
*p
,  
 866                                   __unused 
struct kqueue_portset_np_args 
*uap
,  
 867                                   __unused register_t 
*retval
) 
 869                 /* JMM - Placeholder for now */ 
 874 kqueue_from_portset_np(__unused 
struct proc 
*p
,  
 875                                            __unused 
struct kqueue_from_portset_np_args 
*uap
,  
 876                                            __unused register_t 
*retval
) 
 878                 /* JMM - Placeholder for now */ 
 883 kevent_copyin(user_addr_t 
*addrp
, struct kevent 
*kevp
, struct proc 
*p
) 
 888         if (IS_64BIT_PROCESS(p
)) { 
 889                 struct user_kevent kev64
; 
 891                 advance 
= sizeof(kev64
); 
 892                 error 
= copyin(*addrp
, (caddr_t
)&kev64
, advance
); 
 895                 kevp
->ident 
= CAST_DOWN(uintptr_t, kev64
.ident
); 
 896                 kevp
->filter 
= kev64
.filter
; 
 897                 kevp
->flags 
= kev64
.flags
; 
 898                 kevp
->fflags 
= kev64
.fflags
; 
 899                 kevp
->data 
= CAST_DOWN(intptr_t, kev64
.data
); 
 900                 kevp
->udata 
= kev64
.udata
; 
 903                  * compensate for legacy in-kernel kevent layout 
 904                  * where the udata field is alredy 64-bit. 
 906                 advance 
= sizeof(*kevp
) + sizeof(void *) - sizeof(user_addr_t
); 
 907                 error 
= copyin(*addrp
, (caddr_t
)kevp
, advance
); 
 915 kevent_copyout(struct kevent 
*kevp
, user_addr_t 
*addrp
, struct proc 
*p
) 
 920         if (IS_64BIT_PROCESS(p
)) { 
 921                 struct user_kevent kev64
; 
 923                 kev64
.ident 
= (uint64_t) kevp
->ident
; 
 924                 kev64
.filter 
= kevp
->filter
; 
 925                 kev64
.flags 
= kevp
->flags
; 
 926                 kev64
.fflags 
= kevp
->fflags
; 
 927                 kev64
.data 
= (int64_t) kevp
->data
; 
 928                 kev64
.udata 
= kevp
->udata
; 
 929                 advance 
= sizeof(kev64
); 
 930                 error 
= copyout((caddr_t
)&kev64
, *addrp
, advance
); 
 933                  * compensate for legacy in-kernel kevent layout 
 934                  * where the udata field is alredy 64-bit. 
 936                 advance 
= sizeof(*kevp
) + sizeof(void *) - sizeof(user_addr_t
); 
 937                 error 
= copyout((caddr_t
)kevp
, *addrp
, advance
); 
 945  * kevent_continue - continue a kevent syscall after blocking 
 947  *      assume we inherit a use count on the kq fileglob. 
 951 kevent_continue(__unused 
struct kqueue 
*kq
, void *data
, int error
) 
 953         struct _kevent 
*cont_args
; 
 958         struct proc 
*p 
= current_proc(); 
 960         cont_args 
= (struct _kevent 
*)data
; 
 961         noutputs 
= cont_args
->eventout
; 
 962         retval 
= cont_args
->retval
; 
 966         fp_drop(p
, fd
, fp
, 0); 
 968         /* don't restart after signals... */ 
 969         if (error 
== ERESTART
) 
 971         else if (error 
== EWOULDBLOCK
) 
 975         unix_syscall_return(error
); 
 979  * kevent - [syscall] register and wait for kernel events 
 984 kevent(struct proc 
*p
, struct kevent_args 
*uap
, register_t 
*retval
) 
 986         user_addr_t changelist 
= uap
->changelist
; 
 987         user_addr_t ueventlist 
= uap
->eventlist
; 
 988         int nchanges 
= uap
->nchanges
; 
 989         int nevents 
= uap
->nevents
; 
 992         struct _kevent 
*cont_args
; 
1000         /* convert timeout to absolute - if we have one */ 
1001         if (uap
->timeout 
!= USER_ADDR_NULL
) { 
1003                 if ( IS_64BIT_PROCESS(p
) ) { 
1004                         struct user_timespec ts
; 
1005                         error 
= copyin( uap
->timeout
, &ts
, sizeof(ts
) ); 
1006                         if ((ts
.tv_sec 
& 0xFFFFFFFF00000000ull
) != 0) 
1009                                 TIMESPEC_TO_TIMEVAL(&rtv
, &ts
); 
1012                         error 
= copyin( uap
->timeout
, &ts
, sizeof(ts
) ); 
1013                         TIMESPEC_TO_TIMEVAL(&rtv
, &ts
); 
1017                 if (itimerfix(&rtv
)) 
1019                 getmicrouptime(&atv
); 
1020                 timevaladd(&atv
, &rtv
); 
1026         /* get a usecount for the kq itself */ 
1027         if ((error 
= fp_getfkq(p
, fd
, &fp
, &kq
)) != 0) 
1030         /* register all the change requests the user provided... */ 
1032         while (nchanges 
> 0 && error 
== 0) { 
1033                 error 
= kevent_copyin(&changelist
, &kev
, p
); 
1037                 kev
.flags 
&= ~EV_SYSFLAGS
; 
1038                 error 
= kevent_register(kq
, &kev
, p
); 
1039                 if (error 
&& nevents 
> 0) { 
1040                         kev
.flags 
= EV_ERROR
; 
1042                         error 
= kevent_copyout(&kev
, &ueventlist
, p
); 
1051         /* store the continuation/completion data in the uthread */ 
1052         ut 
= (uthread_t
)get_bsdthread_info(current_thread()); 
1053         cont_args 
= (struct _kevent 
*)&ut
->uu_state
.ss_kevent
; 
1056         cont_args
->retval 
= retval
; 
1057         cont_args
->eventlist 
= ueventlist
; 
1058         cont_args
->eventcount 
= nevents
; 
1059         cont_args
->eventout 
= noutputs
; 
1061         if (nevents 
> 0 && noutputs 
== 0 && error 
== 0) 
1062                 error 
= kevent_scan(kq
, kevent_callback
, 
1063                                     kevent_continue
, cont_args
, 
1065         kevent_continue(kq
, cont_args
, error
); 
1072  * kevent_callback - callback for each individual event 
1074  *      called with nothing locked 
1075  *      caller holds a reference on the kqueue 
1079 kevent_callback(__unused 
struct kqueue 
*kq
, struct kevent 
*kevp
, void *data
) 
1081         struct _kevent 
*cont_args
; 
1084         cont_args 
= (struct _kevent 
*)data
; 
1085         assert(cont_args
->eventout 
< cont_arg
->eventcount
); 
1088          * Copy out the appropriate amount of event data for this user. 
1090         error 
= kevent_copyout(kevp
, &cont_args
->eventlist
, current_proc()); 
1093          * If there isn't space for additional events, return 
1094          * a harmless error to stop the processing here 
1096         if (error 
== 0 && ++cont_args
->eventout 
== cont_args
->eventcount
) 
1097                         error 
= EWOULDBLOCK
; 
1102  * kevent_register - add a new event to a kqueue 
1104  *      Creates a mapping between the event source and 
1105  *      the kqueue via a knote data structure. 
1107  *      Because many/most the event sources are file 
1108  *      descriptor related, the knote is linked off 
1109  *      the filedescriptor table for quick access. 
1111  *      called with nothing locked 
1112  *      caller holds a reference on the kqueue 
1116 kevent_register(struct kqueue 
*kq
, struct kevent 
*kev
, struct proc 
*p
) 
1118         struct filedesc 
*fdp 
= kq
->kq_fdp
; 
1119         struct filterops 
*fops
; 
1120         struct fileproc 
*fp 
= NULL
; 
1121         struct knote 
*kn 
= NULL
; 
1124         if (kev
->filter 
< 0) { 
1125                 if (kev
->filter 
+ EVFILT_SYSCOUNT 
< 0) 
1127                 fops 
= sysfilt_ops
[~kev
->filter
];       /* to 0-base index */ 
1131                  * filter attach routine is responsible for insuring that 
1132                  * the identifier can be attached to it. 
1134                 printf("unknown filter: %d\n", kev
->filter
); 
1138         /* this iocount needs to be dropped if it is not registered */ 
1139         if (fops
->f_isfd 
&& (error 
= fp_lookup(p
, kev
->ident
, &fp
, 0)) != 0) 
1145                 /* fd-based knotes are linked off the fd table */ 
1146                 if (kev
->ident 
< (u_int
)fdp
->fd_knlistsize
) { 
1147                         SLIST_FOREACH(kn
, &fdp
->fd_knlist
[kev
->ident
], kn_link
) 
1148                                 if (kq 
== kn
->kn_kq 
&& 
1149                                     kev
->filter 
== kn
->kn_filter
) 
1153                 /* hash non-fd knotes here too */ 
1154                 if (fdp
->fd_knhashmask 
!= 0) { 
1157                         list 
= &fdp
->fd_knhash
[ 
1158                             KN_HASH((u_long
)kev
->ident
, fdp
->fd_knhashmask
)]; 
1159                         SLIST_FOREACH(kn
, list
, kn_link
) 
1160                                 if (kev
->ident 
== kn
->kn_id 
&& 
1162                                     kev
->filter 
== kn
->kn_filter
) 
1168          * kn now contains the matching knote, or NULL if no match 
1171                 if ((kev
->flags 
& (EV_ADD
|EV_DELETE
)) == EV_ADD
) { 
1180                         kn
->kn_tq 
= &kq
->kq_head
; 
1182                         kn
->kn_sfflags 
= kev
->fflags
; 
1183                         kn
->kn_sdata 
= kev
->data
; 
1186                         kn
->kn_kevent 
= *kev
; 
1187                         kn
->kn_inuse 
= 1;  /* for f_attach() */ 
1190                         /* before anyone can find it */ 
1191                         if (kev
->flags 
& EV_DISABLE
) 
1192                                 kn
->kn_status 
|= KN_DISABLED
; 
1194                         error 
= knote_fdpattach(kn
, fdp
, p
); 
1203                          * apply reference count to knote structure, and 
1204                          * do not release it at the end of this routine. 
1209                          * If the attach fails here, we can drop it knowing 
1210                          * that nobody else has a reference to the knote. 
1212                         if ((error 
= fops
->f_attach(kn
)) != 0) { 
1222                 /* existing knote - get kqueue lock */ 
1226                 if (kev
->flags 
& EV_DELETE
) { 
1228                         kn
->kn_status 
|= KN_DISABLED
; 
1229                         if (kqlock2knotedrop(kq
, kn
)) { 
1230                                 kn
->kn_fop
->f_detach(kn
); 
1236                 /* update status flags for existing knote */ 
1237                 if (kev
->flags 
& EV_DISABLE
) { 
1239                         kn
->kn_status 
|= KN_DISABLED
; 
1240                 } else if (kev
->flags 
& EV_ENABLE
) { 
1241                         kn
->kn_status 
&= ~KN_DISABLED
; 
1242                         if (kn
->kn_status 
& KN_ACTIVE
) 
1247                  * If somebody is in the middle of dropping this 
1248                  * knote - go find/insert a new one.  But we have 
1249                  * wait for this one to go away first. 
1251                 if (!kqlock2knoteusewait(kq
, kn
)) 
1252                         /* kqueue unlocked */ 
1256                  * The user may change some filter values after the 
1257                  * initial EV_ADD, but doing so will not reset any  
1258                  * filter which have already been triggered. 
1260                 kn
->kn_sfflags 
= kev
->fflags
; 
1261                 kn
->kn_sdata 
= kev
->data
; 
1262                 kn
->kn_kevent
.udata 
= kev
->udata
; 
1265         /* still have use ref on knote */ 
1266         if (kn
->kn_fop
->f_event(kn
, 0)) { 
1267                 if (knoteuse2kqlock(kq
, kn
)) 
1276                 fp_drop(p
, kev
->ident
, fp
, 0); 
1281  * kevent_process - process the triggered events in a kqueue 
1283  *      Walk the queued knotes and validate that they are 
1284  *      really still triggered events by calling the filter 
1285  *      routines (if necessary).  Hold a use reference on 
1286  *      the knote to avoid it being detached. For each event 
1287  *      that is still considered triggered, invoke the 
1288  *      callback routine provided. 
1290  *      caller holds a reference on the kqueue. 
1291  *      kqueue locked on entry and exit - but may be dropped 
1295 kevent_process(struct kqueue 
*kq
, 
1296                kevent_callback_t callback
, 
1307         if (kq
->kq_count 
== 0) { 
1312         /* if someone else is processing the queue, wait */ 
1313         if (!TAILQ_EMPTY(&kq
->kq_inprocess
)) { 
1314                 assert_wait(&kq
->kq_inprocess
, THREAD_UNINT
); 
1315                 kq
->kq_state 
|= KQ_PROCWAIT
; 
1317                 thread_block(THREAD_CONTINUE_NULL
); 
1324         while (error 
== 0 && 
1325                (kn 
= TAILQ_FIRST(&kq
->kq_head
)) != NULL
) { 
1328                  * move knote to the processed queue. 
1329                  * this is also protected by the kq lock. 
1331                 assert(kn
->kn_tq 
== &kq
->kq_head
); 
1332                 TAILQ_REMOVE(&kq
->kq_head
, kn
, kn_tqe
); 
1333                 kn
->kn_tq 
= &kq
->kq_inprocess
; 
1334                 TAILQ_INSERT_TAIL(&kq
->kq_inprocess
, kn
, kn_tqe
); 
1337                  * Non-EV_ONESHOT events must be re-validated. 
1339                  * Convert our lock to a use-count and call the event's 
1340                  * filter routine to update. 
1342                  * If the event is dropping (or no longer valid), we 
1343                  * already have it off the active queue, so just 
1344                  * finish the job of deactivating it. 
1346                 if ((kn
->kn_flags 
& EV_ONESHOT
) == 0) { 
1349                         if (kqlock2knoteuse(kq
, kn
)) { 
1351                                 /* call the filter with just a ref */ 
1352                                 result 
= kn
->kn_fop
->f_event(kn
, 0); 
1354                                 if (!knoteuse2kqlock(kq
, kn
) || result 
== 0) { 
1355                                         knote_deactivate(kn
); 
1359                                 knote_deactivate(kn
); 
1365                  * Got a valid triggered knote with the kqueue 
1366                  * still locked.  Snapshot the data, and determine 
1367                  * how to dispatch the knote for future events. 
1369                 kev 
= kn
->kn_kevent
; 
1371                 /* now what happens to it? */ 
1372                 if (kn
->kn_flags 
& EV_ONESHOT
) { 
1373                         knote_deactivate(kn
); 
1374                         if (kqlock2knotedrop(kq
, kn
)) { 
1375                                 kn
->kn_fop
->f_detach(kn
); 
1378                 } else if (kn
->kn_flags 
& EV_CLEAR
) { 
1379                         knote_deactivate(kn
); 
1385                          * leave on in-process queue.  We'll 
1386                          * move all the remaining ones back 
1387                          * the kq queue and wakeup any 
1388                          * waiters when we are done. 
1393                 /* callback to handle each event as we find it */ 
1394                 error 
= (callback
)(kq
, &kev
, data
); 
1401          * With the kqueue still locked, move any knotes 
1402          * remaining on the in-process queue back to the 
1403          * kq's queue and wake up any waiters. 
1405         while ((kn 
= TAILQ_FIRST(&kq
->kq_inprocess
)) != NULL
) { 
1406                 assert(kn
->kn_tq 
== &kq
->kq_inprocess
); 
1407                 TAILQ_REMOVE(&kq
->kq_inprocess
, kn
, kn_tqe
); 
1408                 kn
->kn_tq 
= &kq
->kq_head
; 
1409                 TAILQ_INSERT_TAIL(&kq
->kq_head
, kn
, kn_tqe
); 
1411         if (kq
->kq_state 
& KQ_PROCWAIT
) { 
1412                 kq
->kq_state 
&= ~KQ_PROCWAIT
; 
1413                 thread_wakeup(&kq
->kq_inprocess
); 
1422 kevent_scan_continue(void *data
, wait_result_t wait_result
) 
1424         uthread_t ut 
= (uthread_t
)get_bsdthread_info(current_thread()); 
1425         struct _kevent_scan 
* cont_args 
= &ut
->uu_state
.ss_kevent_scan
; 
1426         struct kqueue 
*kq 
= (struct kqueue 
*)data
; 
1430         /* convert the (previous) wait_result to a proper error */ 
1431         switch (wait_result
) { 
1432         case THREAD_AWAKENED
: 
1434                 error 
= kevent_process(kq
, cont_args
->call
, cont_args
, &count
, current_proc()); 
1435                 if (error 
== 0 && count 
== 0) { 
1436                         assert_wait_deadline(kq
, THREAD_ABORTSAFE
, cont_args
->deadline
); 
1437                         kq
->kq_state 
|= KQ_SLEEP
; 
1439                         thread_block_parameter(kevent_scan_continue
, kq
); 
1444         case THREAD_TIMED_OUT
: 
1445                 error 
= EWOULDBLOCK
;  
1447         case THREAD_INTERRUPTED
: 
1451                 panic("kevent_scan_cont() - invalid wait_result (%d)", wait_result
); 
1455         /* call the continuation with the results */ 
1456         assert(cont_args
->cont 
!= NULL
); 
1457         (cont_args
->cont
)(kq
, cont_args
->data
, error
); 
1462  * kevent_scan - scan and wait for events in a kqueue 
1464  *      Process the triggered events in a kqueue. 
1466  *      If there are no events triggered arrange to 
1467  *      wait for them. If the caller provided a 
1468  *      continuation routine, then kevent_scan will 
1471  *      The callback routine must be valid. 
1472  *      The caller must hold a use-count reference on the kq. 
1476 kevent_scan(struct kqueue 
*kq
,  
1477             kevent_callback_t callback
, 
1478             kevent_continue_t continuation
, 
1480             struct timeval 
*atvp
, 
1483         thread_continue_t cont 
= THREAD_CONTINUE_NULL
; 
1488         assert(callback 
!= NULL
); 
1492                 wait_result_t wait_result
; 
1496                  * Make a pass through the kq to find events already 
1500                 error 
= kevent_process(kq
, callback
, data
, &count
, p
); 
1502                         break; /* lock still held */ 
1504                 /* looks like we have to consider blocking */ 
1507                         /* convert the timeout to a deadline once */ 
1508                         if (atvp
->tv_sec 
|| atvp
->tv_usec
) { 
1509                                 uint32_t seconds
, nanoseconds
; 
1512                                 clock_get_uptime(&now
); 
1513                                 nanoseconds_to_absolutetime((uint64_t)atvp
->tv_sec 
* NSEC_PER_SEC 
+ 
1514                                                             atvp
->tv_usec 
* NSEC_PER_USEC
, 
1516                                 if (now 
>= deadline
) { 
1517                                         /* non-blocking call */ 
1518                                         error 
= EWOULDBLOCK
; 
1519                                         break; /* lock still held */ 
1522                                 clock_absolutetime_interval_to_deadline(deadline
, &deadline
); 
1524                                 deadline 
= 0;   /* block forever */ 
1528                                 uthread_t ut 
= (uthread_t
)get_bsdthread_info(current_thread()); 
1529                                 struct _kevent_scan 
*cont_args 
= &ut
->uu_state
.ss_kevent_scan
; 
1531                                 cont_args
->call 
= callback
; 
1532                                 cont_args
->cont 
= continuation
; 
1533                                 cont_args
->deadline 
= deadline
; 
1534                                 cont_args
->data 
= data
; 
1535                                 cont 
= kevent_scan_continue
; 
1539                 /* go ahead and wait */ 
1540                 assert_wait_deadline(kq
, THREAD_ABORTSAFE
, deadline
); 
1541                 kq
->kq_state 
|= KQ_SLEEP
; 
1543                 wait_result 
= thread_block_parameter(cont
, kq
); 
1544                 /* NOTREACHED if (continuation != NULL) */ 
1546                 switch (wait_result
) { 
1547                 case THREAD_AWAKENED
: 
1549                 case THREAD_TIMED_OUT
: 
1551                 case THREAD_INTERRUPTED
: 
1554                         panic("kevent_scan - bad wait_result (%d)", 
1566  * This could be expanded to call kqueue_scan, if desired. 
1570 kqueue_read(__unused 
struct fileproc 
*fp
,  
1571                         __unused 
struct uio 
*uio
,  
1572                         __unused kauth_cred_t cred
, 
1574                         __unused 
struct proc 
*p
) 
1581 kqueue_write(__unused 
struct fileproc 
*fp
,  
1582                          __unused 
struct uio 
*uio
,  
1583                          __unused kauth_cred_t cred
, 
1585                          __unused 
struct proc 
*p
) 
1592 kqueue_ioctl(__unused 
struct fileproc 
*fp
,  
1593                          __unused u_long com
,  
1594                          __unused caddr_t data
,  
1595                          __unused 
struct proc 
*p
) 
1602 kqueue_select(struct fileproc 
*fp
, int which
, void *wql
, struct proc 
*p
) 
1604         struct kqueue 
*kq 
= (struct kqueue 
*)fp
->f_data
; 
1607         if (which 
== FREAD
) { 
1612                         selrecord(p
, &kq
->kq_sel
, wql
); 
1613                         kq
->kq_state 
|= KQ_SEL
; 
1625 kqueue_close(struct fileglob 
*fg
, struct proc 
*p
) 
1627         struct kqueue 
*kq 
= (struct kqueue 
*)fg
->fg_data
; 
1629         kqueue_dealloc(kq
, p
); 
1636  * The callers has taken a use-count reference on this kqueue and will donate it 
1637  * to the kqueue we are being added to.  This keeps the kqueue from closing until 
1638  * that relationship is torn down. 
1641 kqueue_kqfilter(__unused 
struct fileproc 
*fp
, struct knote 
*kn
, __unused 
struct proc 
*p
) 
1643         struct kqueue 
*kq 
= (struct kqueue 
*)kn
->kn_fp
->f_data
; 
1645         if (kn
->kn_filter 
!= EVFILT_READ
) 
1648         kn
->kn_fop 
= &kqread_filtops
; 
1650         KNOTE_ATTACH(&kq
->kq_sel
.si_note
, kn
); 
1657 kqueue_stat(struct fileproc 
*fp
, struct stat 
*st
, __unused 
struct proc 
*p
) 
1659         struct kqueue 
*kq 
= (struct kqueue 
*)fp
->f_data
; 
1661         bzero((void *)st
, sizeof(*st
)); 
1662         st
->st_size 
= kq
->kq_count
; 
1663         st
->st_blksize 
= sizeof(struct kevent
); 
1664         st
->st_mode 
= S_IFIFO
; 
1669  * Called with the kqueue locked 
1672 kqueue_wakeup(struct kqueue 
*kq
) 
1675         if (kq
->kq_state 
& KQ_SLEEP
) { 
1676                 kq
->kq_state 
&= ~KQ_SLEEP
; 
1679         if (kq
->kq_state 
& KQ_SEL
) { 
1680                 kq
->kq_state 
&= ~KQ_SEL
; 
1681                 selwakeup(&kq
->kq_sel
); 
1683         KNOTE(&kq
->kq_sel
.si_note
, 0); 
1687 klist_init(struct klist 
*list
) 
1694  * Query/Post each knote in the object's list 
1696  *      The object lock protects the list. It is assumed 
1697  *      that the filter/event routine for the object can 
1698  *      determine that the object is already locked (via 
1699  *      the hind) and not deadlock itself. 
1701  *      The object lock should also hold off pending 
1702  *      detach/drop operations.  But we'll prevent it here 
1703  *      too - just in case. 
1706 knote(struct klist 
*list
, long hint
) 
1710         SLIST_FOREACH(kn
, list
, kn_selnext
) { 
1711                 struct kqueue 
*kq 
= kn
->kn_kq
; 
1714                 if (kqlock2knoteuse(kq
, kn
)) { 
1717                         /* call the event with only a use count */ 
1718                         result 
= kn
->kn_fop
->f_event(kn
, hint
); 
1720                         /* if its not going away and triggered */ 
1721                         if (knoteuse2kqlock(kq
, kn
) && result
) 
1723                         /* lock held again */ 
1730  * attach a knote to the specified list.  Return true if this is the first entry. 
1731  * The list is protected by whatever lock the object it is associated with uses. 
1734 knote_attach(struct klist 
*list
, struct knote 
*kn
) 
1736         int ret 
= SLIST_EMPTY(list
); 
1737         SLIST_INSERT_HEAD(list
, kn
, kn_selnext
); 
1742  * detach a knote from the specified list.  Return true if that was the last entry. 
1743  * The list is protected by whatever lock the object it is associated with uses. 
1746 knote_detach(struct klist 
*list
, struct knote 
*kn
) 
1748         SLIST_REMOVE(list
, kn
, knote
, kn_selnext
); 
1749         return SLIST_EMPTY(list
); 
1753  * remove all knotes referencing a specified fd 
1755  * Essentially an inlined knote_remove & knote_drop 
1756  * when we know for sure that the thing is a file 
1758  * Entered with the proc_fd lock already held. 
1759  * It returns the same way, but may drop it temporarily. 
1762 knote_fdclose(struct proc 
*p
, int fd
) 
1764         struct filedesc 
*fdp 
= p
->p_fd
; 
1768         list 
= &fdp
->fd_knlist
[fd
]; 
1769         while ((kn 
= SLIST_FIRST(list
)) != NULL
) { 
1770                 struct kqueue 
*kq 
= kn
->kn_kq
; 
1776                  * Convert the lock to a drop ref. 
1777                  * If we get it, go ahead and drop it. 
1778                  * Otherwise, we waited for it to 
1779                  * be dropped by the other guy, so 
1780                  * it is safe to move on in the list. 
1782                 if (kqlock2knotedrop(kq
, kn
)) { 
1783                         kn
->kn_fop
->f_detach(kn
); 
1789                 /* the fd tables may have changed - start over */ 
1790                 list 
= &fdp
->fd_knlist
[fd
]; 
1794 /* proc_fdlock held on entry (and exit) */ 
1796 knote_fdpattach(struct knote 
*kn
, struct filedesc 
*fdp
, __unused 
struct proc 
*p
) 
1798         struct klist 
*list 
= NULL
; 
1800         if (! kn
->kn_fop
->f_isfd
) { 
1801                 if (fdp
->fd_knhashmask 
== 0) 
1802                         fdp
->fd_knhash 
= hashinit(KN_HASHSIZE
, M_KQUEUE
, 
1803                             &fdp
->fd_knhashmask
); 
1804                 list 
= &fdp
->fd_knhash
[KN_HASH(kn
->kn_id
, fdp
->fd_knhashmask
)]; 
1806                 if ((u_int
)fdp
->fd_knlistsize 
<= kn
->kn_id
) { 
1809                         /* have to grow the fd_knlist */ 
1810                         size 
= fdp
->fd_knlistsize
; 
1811                         while (size 
<= kn
->kn_id
) 
1813                         MALLOC(list
, struct klist 
*, 
1814                                size 
* sizeof(struct klist 
*), M_KQUEUE
, M_WAITOK
); 
1818                         bcopy((caddr_t
)fdp
->fd_knlist
, (caddr_t
)list
, 
1819                               fdp
->fd_knlistsize 
* sizeof(struct klist 
*)); 
1820                         bzero((caddr_t
)list 
+ 
1821                               fdp
->fd_knlistsize 
* sizeof(struct klist 
*), 
1822                               (size 
- fdp
->fd_knlistsize
) * sizeof(struct klist 
*)); 
1823                         FREE(fdp
->fd_knlist
, M_KQUEUE
); 
1824                         fdp
->fd_knlist 
= list
; 
1825                         fdp
->fd_knlistsize 
= size
; 
1827                 list 
= &fdp
->fd_knlist
[kn
->kn_id
]; 
1829         SLIST_INSERT_HEAD(list
, kn
, kn_link
); 
1836  * should be called at spl == 0, since we don't want to hold spl 
1837  * while calling fdrop and free. 
1840 knote_drop(struct knote 
*kn
, struct proc 
*p
) 
1842         struct filedesc 
*fdp 
= p
->p_fd
; 
1843         struct kqueue 
*kq 
= kn
->kn_kq
; 
1847         if (kn
->kn_fop
->f_isfd
) 
1848                 list 
= &fdp
->fd_knlist
[kn
->kn_id
]; 
1850                 list 
= &fdp
->fd_knhash
[KN_HASH(kn
->kn_id
, fdp
->fd_knhashmask
)]; 
1852         SLIST_REMOVE(list
, kn
, knote
, kn_link
); 
1855         if (kn
->kn_status 
& KN_DROPWAIT
) 
1856                 thread_wakeup(&kn
->kn_status
); 
1860         if (kn
->kn_fop
->f_isfd
) 
1861                 fp_drop(p
, kn
->kn_id
, kn
->kn_fp
, 0); 
1866 /* called with kqueue lock held */ 
1868 knote_activate(struct knote 
*kn
) 
1870         struct kqueue 
*kq 
= kn
->kn_kq
; 
1872         kn
->kn_status 
|= KN_ACTIVE
; 
1877 /* called with kqueue lock held */ 
1879 knote_deactivate(struct knote 
*kn
) 
1881         kn
->kn_status 
&= ~KN_ACTIVE
; 
1885 /* called with kqueue lock held */ 
1887 knote_enqueue(struct knote 
*kn
) 
1889         struct kqueue 
*kq 
= kn
->kn_kq
; 
1891         if ((kn
->kn_status 
& (KN_QUEUED 
| KN_DISABLED
)) == 0) { 
1892                 struct kqtailq 
*tq 
= kn
->kn_tq
; 
1894                 TAILQ_INSERT_TAIL(tq
, kn
, kn_tqe
);  
1895                 kn
->kn_status 
|= KN_QUEUED
; 
1900 /* called with kqueue lock held */ 
1902 knote_dequeue(struct knote 
*kn
) 
1904         struct kqueue 
*kq 
= kn
->kn_kq
; 
1906         assert((kn
->kn_status 
& KN_DISABLED
) == 0); 
1907         if ((kn
->kn_status 
& KN_QUEUED
) == KN_QUEUED
) { 
1908                 struct kqtailq 
*tq 
= kn
->kn_tq
; 
1910                 TAILQ_REMOVE(tq
, kn
, kn_tqe
);  
1911                 kn
->kn_tq 
= &kq
->kq_head
; 
1912                 kn
->kn_status 
&= ~KN_QUEUED
; 
1920         knote_zone 
= zinit(sizeof(struct knote
), 8192*sizeof(struct knote
), 8192, "knote zone"); 
1922         /* allocate kq lock group attribute and group */ 
1923         kq_lck_grp_attr
= lck_grp_attr_alloc_init(); 
1924         lck_grp_attr_setstat(kq_lck_grp_attr
); 
1926         kq_lck_grp 
= lck_grp_alloc_init("kqueue",  kq_lck_grp_attr
); 
1928         /* Allocate kq lock attribute */ 
1929         kq_lck_attr 
= lck_attr_alloc_init(); 
1930         lck_attr_setdefault(kq_lck_attr
); 
1932         /* Initialize the timer filter lock */ 
1933         lck_mtx_init(&_filt_timerlock
, kq_lck_grp
, kq_lck_attr
); 
1935 SYSINIT(knote
, SI_SUB_PSEUDO
, SI_ORDER_ANY
, knote_init
, NULL
) 
1937 static struct knote 
* 
1940         return ((struct knote 
*)zalloc(knote_zone
)); 
1944 knote_free(struct knote 
*kn
) 
1946         zfree(knote_zone
, kn
); 
1949 #include <sys/param.h> 
1950 #include <sys/socket.h> 
1951 #include <sys/protosw.h> 
1952 #include <sys/domain.h> 
1953 #include <sys/mbuf.h> 
1954 #include <sys/kern_event.h> 
1955 #include <sys/malloc.h> 
1956 #include <sys/sys_domain.h> 
1957 #include <sys/syslog.h> 
1960 static int kev_attach(struct socket 
*so
, int proto
, struct proc 
*p
); 
1961 static int kev_detach(struct socket 
*so
); 
1962 static int kev_control(struct socket 
*so
, u_long cmd
, caddr_t data
, struct ifnet 
*ifp
, struct proc 
*p
); 
1964 struct pr_usrreqs event_usrreqs 
= { 
1965      pru_abort_notsupp
, pru_accept_notsupp
, kev_attach
, pru_bind_notsupp
, pru_connect_notsupp
, 
1966      pru_connect2_notsupp
, kev_control
, kev_detach
, pru_disconnect_notsupp
, 
1967      pru_listen_notsupp
, pru_peeraddr_notsupp
, pru_rcvd_notsupp
, pru_rcvoob_notsupp
, 
1968      pru_send_notsupp
, pru_sense_null
, pru_shutdown_notsupp
, pru_sockaddr_notsupp
, 
1969      pru_sosend_notsupp
, soreceive
, pru_sopoll_notsupp
 
1972 struct protosw eventsw
[] = { 
1974           SOCK_RAW
,             &systemdomain
,  SYSPROTO_EVENT
,         PR_ATOMIC
, 
1990 struct kern_event_head kern_event_head
; 
1992 static u_long static_event_id 
= 0; 
1993 struct domain 
*sysdom 
= &systemdomain
; 
1995 static lck_grp_t                
*evt_mtx_grp
; 
1996 static lck_attr_t               
*evt_mtx_attr
; 
1997 static lck_grp_attr_t   
*evt_mtx_grp_attr
; 
1998 lck_mtx_t                               
*evt_mutex
; 
2000  * Install the protosw's for the NKE manager.  Invoked at 
2001  *  extension load time 
2004 kern_event_init(void) 
2008     if ((retval 
= net_add_proto(eventsw
, &systemdomain
)) != 0) { 
2009             log(LOG_WARNING
, "Can't install kernel events protocol (%d)\n", retval
); 
2014          * allocate lock group attribute and group for kern event  
2016         evt_mtx_grp_attr 
= lck_grp_attr_alloc_init(); 
2018         evt_mtx_grp 
= lck_grp_alloc_init("eventlist", evt_mtx_grp_attr
); 
2021          * allocate the lock attribute for mutexes 
2023         evt_mtx_attr 
= lck_attr_alloc_init(); 
2024         lck_attr_setdefault(evt_mtx_attr
); 
2025         evt_mutex 
= lck_mtx_alloc_init(evt_mtx_grp
, evt_mtx_attr
); 
2026         if (evt_mutex 
== NULL
) 
2029     return(KERN_SUCCESS
); 
2033 kev_attach(struct socket 
*so
, __unused 
int proto
, __unused 
struct proc 
*p
) 
2036      struct kern_event_pcb  
*ev_pcb
; 
2038      error 
= soreserve(so
, KEV_SNDSPACE
, KEV_RECVSPACE
); 
2042      MALLOC(ev_pcb
, struct kern_event_pcb 
*, sizeof(struct kern_event_pcb
), M_PCB
, M_WAITOK
); 
2046      ev_pcb
->ev_socket 
= so
; 
2047      ev_pcb
->vendor_code_filter 
= 0xffffffff; 
2049      so
->so_pcb 
= (caddr_t
) ev_pcb
; 
2050          lck_mtx_lock(evt_mutex
); 
2051      LIST_INSERT_HEAD(&kern_event_head
, ev_pcb
, ev_link
); 
2052          lck_mtx_unlock(evt_mutex
); 
2059 kev_detach(struct socket 
*so
) 
2061      struct kern_event_pcb 
*ev_pcb 
= (struct kern_event_pcb 
*) so
->so_pcb
; 
2064                 lck_mtx_lock(evt_mutex
); 
2065                 LIST_REMOVE(ev_pcb
, ev_link
); 
2066                 lck_mtx_unlock(evt_mutex
); 
2067                 FREE(ev_pcb
, M_PCB
); 
2069                 so
->so_flags 
|= SOF_PCBCLEARING
; 
2076  * For now, kev_vender_code and mbuf_tags use the same 
2079 extern errno_t 
mbuf_tag_id_find_internal(const char *string
, u_long 
*out_id
, 
2082 errno_t 
kev_vendor_code_find( 
2084         u_long          
*out_vender_code
) 
2086         if (strlen(string
) >= KEV_VENDOR_CODE_MAX_STR_LEN
) { 
2089         return mbuf_tag_id_find_internal(string
, out_vender_code
, 1); 
2092 extern void mbuf_tag_id_first_last(u_long 
*first
, u_long 
*last
); 
2094 errno_t  
kev_msg_post(struct kev_msg 
*event_msg
) 
2096         u_long  min_vendor
, max_vendor
; 
2098         mbuf_tag_id_first_last(&min_vendor
, &max_vendor
); 
2100         if (event_msg 
== NULL
) 
2103         /* Limit third parties to posting events for registered vendor codes only */ 
2104         if (event_msg
->vendor_code 
< min_vendor 
|| 
2105                 event_msg
->vendor_code 
> max_vendor
) 
2110         return kev_post_msg(event_msg
); 
2114 int  kev_post_msg(struct kev_msg 
*event_msg
) 
2116      struct mbuf 
*m
, *m2
; 
2117      struct kern_event_pcb  
*ev_pcb
; 
2118      struct kern_event_msg  
*ev
; 
2120      unsigned long     total_size
; 
2123         /* Verify the message is small enough to fit in one mbuf w/o cluster */ 
2124         total_size 
= KEV_MSG_HEADER_SIZE
; 
2126         for (i 
= 0; i 
< 5; i
++) { 
2127                 if (event_msg
->dv
[i
].data_length 
== 0) 
2129                 total_size 
+= event_msg
->dv
[i
].data_length
; 
2132         if (total_size 
> MLEN
) { 
2136      m 
= m_get(M_DONTWAIT
, MT_DATA
); 
2140      ev 
= mtod(m
, struct kern_event_msg 
*); 
2141      total_size 
= KEV_MSG_HEADER_SIZE
; 
2143      tmp 
= (char *) &ev
->event_data
[0]; 
2144      for (i 
= 0; i 
< 5; i
++) { 
2145           if (event_msg
->dv
[i
].data_length 
== 0) 
2148           total_size 
+= event_msg
->dv
[i
].data_length
; 
2149           bcopy(event_msg
->dv
[i
].data_ptr
, tmp
,  
2150                 event_msg
->dv
[i
].data_length
); 
2151           tmp 
+= event_msg
->dv
[i
].data_length
; 
2154      ev
->id 
= ++static_event_id
; 
2155      ev
->total_size   
= total_size
; 
2156      ev
->vendor_code  
= event_msg
->vendor_code
; 
2157      ev
->kev_class    
= event_msg
->kev_class
; 
2158      ev
->kev_subclass 
= event_msg
->kev_subclass
; 
2159      ev
->event_code   
= event_msg
->event_code
; 
2161      m
->m_len 
= total_size
; 
2162      lck_mtx_lock(evt_mutex
); 
2163      for (ev_pcb 
= LIST_FIRST(&kern_event_head
);  
2165           ev_pcb 
= LIST_NEXT(ev_pcb
, ev_link
)) { 
2167           if (ev_pcb
->vendor_code_filter 
!= KEV_ANY_VENDOR
) { 
2168                if (ev_pcb
->vendor_code_filter 
!= ev
->vendor_code
) 
2171                if (ev_pcb
->class_filter 
!= KEV_ANY_CLASS
) { 
2172                     if (ev_pcb
->class_filter 
!= ev
->kev_class
) 
2175                     if ((ev_pcb
->subclass_filter 
!= KEV_ANY_SUBCLASS
) && 
2176                         (ev_pcb
->subclass_filter 
!= ev
->kev_subclass
)) 
2181           m2 
= m_copym(m
, 0, m
->m_len
, M_NOWAIT
); 
2184                    lck_mtx_unlock(evt_mutex
); 
2187           socket_lock(ev_pcb
->ev_socket
, 1); 
2188           if (sbappendrecord(&ev_pcb
->ev_socket
->so_rcv
, m2
)) 
2189                   sorwakeup(ev_pcb
->ev_socket
); 
2190           socket_unlock(ev_pcb
->ev_socket
, 1); 
2194      lck_mtx_unlock(evt_mutex
); 
2199 kev_control(struct socket 
*so
,  
2202                         __unused 
struct ifnet 
*ifp
,  
2203                         __unused 
struct proc 
*p
) 
2205         struct kev_request 
*kev_req 
= (struct kev_request 
*) data
; 
2206         struct kern_event_pcb  
*ev_pcb
; 
2207         struct kev_vendor_code 
*kev_vendor
; 
2208         u_long  
*id_value 
= (u_long 
*) data
; 
2214                         *id_value 
= static_event_id
; 
2218                         ev_pcb 
= (struct kern_event_pcb 
*) so
->so_pcb
; 
2219                         ev_pcb
->vendor_code_filter 
= kev_req
->vendor_code
; 
2220                         ev_pcb
->class_filter     
= kev_req
->kev_class
; 
2221                         ev_pcb
->subclass_filter  
= kev_req
->kev_subclass
; 
2225                         ev_pcb 
= (struct kern_event_pcb 
*) so
->so_pcb
; 
2226                         kev_req
->vendor_code 
= ev_pcb
->vendor_code_filter
; 
2227                         kev_req
->kev_class   
= ev_pcb
->class_filter
; 
2228                         kev_req
->kev_subclass 
= ev_pcb
->subclass_filter
; 
2231                 case SIOCGKEVVENDOR
: 
2232                         kev_vendor 
= (struct kev_vendor_code
*)data
; 
2234                         /* Make sure string is NULL terminated */ 
2235                         kev_vendor
->vendor_string
[KEV_VENDOR_CODE_MAX_STR_LEN
-1] = 0; 
2237                         return mbuf_tag_id_find_internal(kev_vendor
->vendor_string
, 
2238                                                                                          &kev_vendor
->vendor_code
, 0);