2  * Copyright (c) 2000-2017 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  30  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 
  31  * All rights reserved. 
  33  * Redistribution and use in source and binary forms, with or without 
  34  * modification, are permitted provided that the following conditions 
  36  * 1. Redistributions of source code must retain the above copyright 
  37  *    notice, this list of conditions and the following disclaimer. 
  38  * 2. Redistributions in binary form must reproduce the above copyright 
  39  *    notice, this list of conditions and the following disclaimer in the 
  40  *    documentation and/or other materials provided with the distribution. 
  42  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 
  43  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  44  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  45  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 
  46  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  47  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  48  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  49  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  50  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  51  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  55  *      @(#)kern_event.c       1.0 (3/31/2000) 
  58 #include <stdatomic.h> 
  60 #include <sys/param.h> 
  61 #include <sys/systm.h> 
  62 #include <sys/filedesc.h> 
  63 #include <sys/kernel.h> 
  64 #include <sys/proc_internal.h> 
  65 #include <sys/kauth.h> 
  66 #include <sys/malloc.h> 
  67 #include <sys/unistd.h> 
  68 #include <sys/file_internal.h> 
  69 #include <sys/fcntl.h> 
  70 #include <sys/select.h> 
  71 #include <sys/queue.h> 
  72 #include <sys/event.h> 
  73 #include <sys/eventvar.h> 
  74 #include <sys/protosw.h> 
  75 #include <sys/socket.h> 
  76 #include <sys/socketvar.h> 
  78 #include <sys/sysctl.h> 
  80 #include <sys/sysproto.h> 
  82 #include <sys/vnode_internal.h> 
  84 #include <sys/proc_info.h> 
  85 #include <sys/codesign.h> 
  86 #include <sys/pthread_shims.h> 
  87 #include <sys/kdebug.h> 
  88 #include <sys/reason.h> 
  89 #include <os/reason_private.h> 
  91 #include <kern/locks.h> 
  92 #include <kern/clock.h> 
  93 #include <kern/cpu_data.h> 
  94 #include <kern/policy_internal.h> 
  95 #include <kern/thread_call.h> 
  96 #include <kern/sched_prim.h> 
  97 #include <kern/waitq.h> 
  98 #include <kern/zalloc.h> 
  99 #include <kern/kalloc.h> 
 100 #include <kern/assert.h> 
 101 #include <kern/ast.h> 
 102 #include <kern/thread.h> 
 103 #include <kern/kcdata.h> 
 105 #include <libkern/libkern.h> 
 106 #include <libkern/OSAtomic.h> 
 108 #include "net/net_str_id.h" 
 110 #include <mach/task.h> 
 111 #include <libkern/section_keywords.h> 
 113 #if CONFIG_MEMORYSTATUS 
 114 #include <sys/kern_memorystatus.h> 
 117 extern thread_t 
port_name_to_thread(mach_port_name_t    port_name
); /* osfmk/kern/ipc_tt.h   */ 
 118 extern mach_port_name_t 
ipc_entry_name_mask(mach_port_name_t name
); /* osfmk/ipc/ipc_entry.h */ 
 120 #define KEV_EVTID(code) BSDDBG_CODE(DBG_BSD_KEVENT, (code)) 
 123  * JMM - this typedef needs to be unified with pthread_priority_t 
 124  *       and mach_msg_priority_t. It also needs to be the same type 
 127 typedef int32_t qos_t
; 
 129 MALLOC_DEFINE(M_KQUEUE
, "kqueue", "memory for kqueue system"); 
 131 #define KQ_EVENT        NO_EVENT64 
 133 #define KNUSE_NONE       0x0 
 134 #define KNUSE_STEAL_DROP 0x1 
 135 #define KNUSE_BOOST      0x2 
 136 static int kqlock2knoteuse(struct kqueue 
*kq
, struct knote 
*kn
, int flags
); 
 137 static int kqlock2knotedrop(struct kqueue 
*kq
, struct knote 
*kn
); 
 138 static int kqlock2knotedetach(struct kqueue 
*kq
, struct knote 
*kn
, int flags
); 
 139 static int knoteuse2kqlock(struct kqueue 
*kq
, struct knote 
*kn
, int flags
); 
 141 static int kqueue_read(struct fileproc 
*fp
, struct uio 
*uio
, 
 142                 int flags
, vfs_context_t ctx
); 
 143 static int kqueue_write(struct fileproc 
*fp
, struct uio 
*uio
, 
 144                 int flags
, vfs_context_t ctx
); 
 145 static int kqueue_ioctl(struct fileproc 
*fp
, u_long com
, caddr_t data
, 
 147 static int kqueue_select(struct fileproc 
*fp
, int which
, void *wq_link_id
, 
 149 static int kqueue_close(struct fileglob 
*fg
, vfs_context_t ctx
); 
 150 static int kqueue_kqfilter(struct fileproc 
*fp
, struct knote 
*kn
, 
 151                 struct kevent_internal_s 
*kev
, vfs_context_t ctx
); 
 152 static int kqueue_drain(struct fileproc 
*fp
, vfs_context_t ctx
); 
 154 static const struct fileops kqueueops 
= { 
 155         .fo_type 
= DTYPE_KQUEUE
, 
 156         .fo_read 
= kqueue_read
, 
 157         .fo_write 
= kqueue_write
, 
 158         .fo_ioctl 
= kqueue_ioctl
, 
 159         .fo_select 
= kqueue_select
, 
 160         .fo_close 
= kqueue_close
, 
 161         .fo_kqfilter 
= kqueue_kqfilter
, 
 162         .fo_drain 
= kqueue_drain
, 
 165 static void kevent_put_kq(struct proc 
*p
, kqueue_id_t id
, struct fileproc 
*fp
, struct kqueue 
*kq
); 
 166 static int kevent_internal(struct proc 
*p
, 
 167                            kqueue_id_t id
, kqueue_id_t 
*id_out
, 
 168                            user_addr_t changelist
, int nchanges
, 
 169                            user_addr_t eventlist
, int nevents
,  
 170                            user_addr_t data_out
, uint64_t data_available
, 
 171                            unsigned int flags
, user_addr_t utimeout
, 
 172                            kqueue_continue_t continuation
, 
 174 static int kevent_copyin(user_addr_t 
*addrp
, struct kevent_internal_s 
*kevp
, 
 175                          struct proc 
*p
, unsigned int flags
); 
 176 static int kevent_copyout(struct kevent_internal_s 
*kevp
, user_addr_t 
*addrp
, 
 177                           struct proc 
*p
, unsigned int flags
); 
 178 char * kevent_description(struct kevent_internal_s 
*kevp
, char *s
, size_t n
); 
 180 static void kqueue_interrupt(struct kqueue 
*kq
); 
 181 static int kevent_callback(struct kqueue 
*kq
, struct kevent_internal_s 
*kevp
, 
 183 static void kevent_continue(struct kqueue 
*kq
, void *data
, int error
); 
 184 static void kqueue_scan_continue(void *contp
, wait_result_t wait_result
); 
 185 static int kqueue_process(struct kqueue 
*kq
, kevent_callback_t callback
, void *callback_data
, 
 186                           struct filt_process_s 
*process_data
, int *countp
, struct proc 
*p
); 
 187 static struct kqtailq 
*kqueue_get_base_queue(struct kqueue 
*kq
, kq_index_t qos_index
); 
 188 static struct kqtailq 
*kqueue_get_high_queue(struct kqueue 
*kq
, kq_index_t qos_index
); 
 189 static int kqueue_queue_empty(struct kqueue 
*kq
, kq_index_t qos_index
); 
 191 static struct kqtailq 
*kqueue_get_suppressed_queue(struct kqueue 
*kq
, kq_index_t qos_index
); 
 193 static void kqworkq_request_thread(struct kqworkq 
*kqwq
, kq_index_t qos_index
); 
 194 static void kqworkq_request_help(struct kqworkq 
*kqwq
, kq_index_t qos_index
); 
 195 static void kqworkq_update_override(struct kqworkq 
*kqwq
, kq_index_t qos_index
, kq_index_t override_index
); 
 196 static void kqworkq_bind_thread_impl(struct kqworkq 
*kqwq
, kq_index_t qos_index
, thread_t thread
, unsigned int flags
); 
 197 static void kqworkq_unbind_thread(struct kqworkq 
*kqwq
, kq_index_t qos_index
, thread_t thread
, unsigned int flags
); 
 198 static struct kqrequest 
*kqworkq_get_request(struct kqworkq 
*kqwq
, kq_index_t qos_index
); 
 202         KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI 
= 0x1, 
 203         KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI 
= 0x2, 
 204         KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS 
= 0x4, 
 205         KQWL_UO_UPDATE_OVERRIDE_LAZY 
= 0x8 
 208 static void kqworkloop_update_override(struct kqworkloop 
*kqwl
, kq_index_t qos_index
, kq_index_t override_index
, uint32_t flags
); 
 209 static void kqworkloop_bind_thread_impl(struct kqworkloop 
*kqwl
, thread_t thread
, unsigned int flags
); 
 210 static void kqworkloop_unbind_thread(struct kqworkloop 
*kqwl
, thread_t thread
, unsigned int flags
); 
 211 static inline kq_index_t 
kqworkloop_combined_qos(struct kqworkloop 
*kqwl
, boolean_t 
*); 
 212 static void kqworkloop_update_suppress_sync_count(struct kqrequest 
*kqr
, uint32_t flags
); 
 216          * The wakeup qos is the qos of QUEUED knotes. 
 218          * This QoS is accounted for with the events override in the 
 219          * kqr_override_index field. It is raised each time a new knote is queued at 
 220          * a given QoS. The kqr_wakeup_indexes field is a superset of the non empty 
 221          * knote buckets and is recomputed after each event delivery. 
 223         KQWL_UTQ_UPDATE_WAKEUP_QOS
, 
 224         KQWL_UTQ_UPDATE_STAYACTIVE_QOS
, 
 225         KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
, 
 227          * The wakeup override is for suppressed knotes that have fired again at 
 228          * a higher QoS than the one for which they are suppressed already. 
 229          * This override is cleared when the knote suppressed list becomes empty. 
 231         KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
, 
 232         KQWL_UTQ_RESET_WAKEUP_OVERRIDE
, 
 234          * The async QoS is the maximum QoS of an event enqueued on this workloop in 
 235          * userland. It is copied from the only EVFILT_WORKLOOP knote with 
 236          * a NOTE_WL_THREAD_REQUEST bit set allowed on this workloop. If there is no 
 237          * such knote, this QoS is 0. 
 239         KQWL_UTQ_SET_ASYNC_QOS
, 
 241          * The sync waiters QoS is the maximum QoS of any thread blocked on an 
 242          * EVFILT_WORKLOOP knote marked with the NOTE_WL_SYNC_WAIT bit. 
 243          * If there is no such knote, this QoS is 0. 
 245         KQWL_UTQ_SET_SYNC_WAITERS_QOS
, 
 246         KQWL_UTQ_REDRIVE_EVENTS
, 
 248 static void kqworkloop_update_threads_qos(struct kqworkloop 
*kqwl
, int op
, kq_index_t qos
); 
 249 static void kqworkloop_request_help(struct kqworkloop 
*kqwl
, kq_index_t qos_index
); 
 251 static int knote_process(struct knote 
*kn
, kevent_callback_t callback
, void *callback_data
, 
 252                          struct filt_process_s 
*process_data
, struct proc 
*p
); 
 254 static void knote_put(struct knote 
*kn
); 
 257 static int kq_add_knote(struct kqueue 
*kq
, struct knote 
*kn
, 
 258                 struct kevent_internal_s 
*kev
, struct proc 
*p
, int *knoteuse_flags
); 
 259 static struct knote 
*kq_find_knote_and_kq_lock(struct kqueue 
*kq
, struct kevent_internal_s 
*kev
, bool is_fd
, struct proc 
*p
); 
 260 static void kq_remove_knote(struct kqueue 
*kq
, struct knote 
*kn
, struct proc 
*p
, kn_status_t 
*kn_status
, uint16_t *kq_state
); 
 262 static void knote_drop(struct knote 
*kn
, struct proc 
*p
); 
 263 static struct knote 
*knote_alloc(void); 
 264 static void knote_free(struct knote 
*kn
); 
 266 static void knote_activate(struct knote 
*kn
); 
 267 static void knote_deactivate(struct knote 
*kn
); 
 269 static void knote_enable(struct knote 
*kn
); 
 270 static void knote_disable(struct knote 
*kn
); 
 272 static int knote_enqueue(struct knote 
*kn
); 
 273 static void knote_dequeue(struct knote 
*kn
); 
 275 static void knote_suppress(struct knote 
*kn
); 
 276 static void knote_unsuppress(struct knote 
*kn
); 
 277 static void knote_wakeup(struct knote 
*kn
); 
 279 static kq_index_t 
knote_get_queue_index(struct knote 
*kn
); 
 280 static struct kqtailq 
*knote_get_queue(struct knote 
*kn
); 
 281 static kq_index_t 
knote_get_req_index(struct knote 
*kn
); 
 282 static kq_index_t 
knote_get_qos_index(struct knote 
*kn
); 
 283 static void knote_set_qos_index(struct knote 
*kn
, kq_index_t qos_index
); 
 284 static kq_index_t 
knote_get_qos_override_index(struct knote 
*kn
); 
 285 static kq_index_t 
knote_get_sync_qos_override_index(struct knote 
*kn
); 
 286 static void knote_set_qos_override_index(struct knote 
*kn
, kq_index_t qos_index
, boolean_t override_is_sync
); 
 287 static void knote_set_qos_overcommit(struct knote 
*kn
); 
 289 static int filt_fileattach(struct knote 
*kn
, struct kevent_internal_s 
*kev
); 
 290 SECURITY_READ_ONLY_EARLY(static struct filterops
) file_filtops 
= { 
 292         .f_attach 
= filt_fileattach
, 
 295 static void filt_kqdetach(struct knote 
*kn
); 
 296 static int filt_kqueue(struct knote 
*kn
, long hint
); 
 297 static int filt_kqtouch(struct knote 
*kn
, struct kevent_internal_s 
*kev
); 
 298 static int filt_kqprocess(struct knote 
*kn
, struct filt_process_s 
*data
, struct kevent_internal_s 
*kev
); 
 299 SECURITY_READ_ONLY_EARLY(static struct filterops
) kqread_filtops 
= { 
 301         .f_detach 
= filt_kqdetach
, 
 302         .f_event 
= filt_kqueue
, 
 303         .f_touch 
= filt_kqtouch
, 
 304         .f_process 
= filt_kqprocess
, 
 307 /* placeholder for not-yet-implemented filters */ 
 308 static int filt_badattach(struct knote 
*kn
, struct kevent_internal_s 
*kev
); 
 309 SECURITY_READ_ONLY_EARLY(static struct filterops
) bad_filtops 
= { 
 310         .f_attach 
= filt_badattach
, 
 313 static int filt_procattach(struct knote 
*kn
, struct kevent_internal_s 
*kev
); 
 314 static void filt_procdetach(struct knote 
*kn
); 
 315 static int filt_proc(struct knote 
*kn
, long hint
); 
 316 static int filt_proctouch(struct knote 
*kn
, struct kevent_internal_s 
*kev
); 
 317 static int filt_procprocess(struct knote 
*kn
, struct filt_process_s 
*data
, struct kevent_internal_s 
*kev
); 
 318 SECURITY_READ_ONLY_EARLY(static struct filterops
) proc_filtops 
= { 
 319         .f_attach 
= filt_procattach
, 
 320         .f_detach 
= filt_procdetach
, 
 321         .f_event 
= filt_proc
, 
 322         .f_touch 
= filt_proctouch
, 
 323         .f_process 
= filt_procprocess
, 
 326 #if CONFIG_MEMORYSTATUS 
 327 extern const struct filterops memorystatus_filtops
; 
 328 #endif /* CONFIG_MEMORYSTATUS */ 
 330 extern const struct filterops fs_filtops
; 
 332 extern const struct filterops sig_filtops
; 
 334 static zone_t knote_zone
; 
 335 static zone_t kqfile_zone
; 
 336 static zone_t kqworkq_zone
; 
 337 static zone_t kqworkloop_zone
; 
 339 #define KN_HASH(val, mask)      (((val) ^ (val >> 8)) & (mask)) 
 341 /* Mach portset filter */ 
 342 extern const struct filterops machport_filtops
; 
 345 static int filt_userattach(struct knote 
*kn
, struct kevent_internal_s 
*kev
); 
 346 static void filt_userdetach(struct knote 
*kn
); 
 347 static int filt_user(struct knote 
*kn
, long hint
); 
 348 static int filt_usertouch(struct knote 
*kn
, struct kevent_internal_s 
*kev
); 
 349 static int filt_userprocess(struct knote 
*kn
, struct filt_process_s 
*data
, struct kevent_internal_s 
*kev
); 
 350 SECURITY_READ_ONLY_EARLY(static struct filterops
) user_filtops 
= { 
 351         .f_attach 
= filt_userattach
, 
 352         .f_detach 
= filt_userdetach
, 
 353         .f_event 
= filt_user
, 
 354         .f_touch 
= filt_usertouch
, 
 355         .f_process 
= filt_userprocess
, 
 358 static lck_spin_t _filt_userlock
; 
 359 static void filt_userlock(void); 
 360 static void filt_userunlock(void); 
 362 /* Workloop filter */ 
 363 static bool filt_wlneeds_boost(struct kevent_internal_s 
*kev
); 
 364 static int filt_wlattach(struct knote 
*kn
, struct kevent_internal_s 
*kev
); 
 365 static int filt_wlpost_attach(struct knote 
*kn
, struct  kevent_internal_s 
*kev
); 
 366 static void filt_wldetach(struct knote 
*kn
); 
 367 static int filt_wlevent(struct knote 
*kn
, long hint
); 
 368 static int filt_wltouch(struct knote 
*kn
, struct kevent_internal_s 
*kev
); 
 369 static int filt_wldrop_and_unlock(struct knote 
*kn
, struct kevent_internal_s 
*kev
); 
 370 static int filt_wlprocess(struct knote 
*kn
, struct filt_process_s 
*data
, struct kevent_internal_s 
*kev
); 
 371 SECURITY_READ_ONLY_EARLY(static struct filterops
) workloop_filtops 
= { 
 372         .f_needs_boost 
= filt_wlneeds_boost
, 
 373         .f_attach 
= filt_wlattach
, 
 374         .f_post_attach 
= filt_wlpost_attach
, 
 375         .f_detach 
= filt_wldetach
, 
 376         .f_event 
= filt_wlevent
, 
 377         .f_touch 
= filt_wltouch
, 
 378         .f_drop_and_unlock 
= filt_wldrop_and_unlock
, 
 379         .f_process 
= filt_wlprocess
, 
 382 extern const struct filterops pipe_rfiltops
; 
 383 extern const struct filterops pipe_wfiltops
; 
 384 extern const struct filterops ptsd_kqops
; 
 385 extern const struct filterops ptmx_kqops
; 
 386 extern const struct filterops soread_filtops
; 
 387 extern const struct filterops sowrite_filtops
; 
 388 extern const struct filterops sock_filtops
; 
 389 extern const struct filterops soexcept_filtops
; 
 390 extern const struct filterops spec_filtops
; 
 391 extern const struct filterops bpfread_filtops
; 
 392 extern const struct filterops necp_fd_rfiltops
; 
 393 extern const struct filterops fsevent_filtops
; 
 394 extern const struct filterops vnode_filtops
; 
 395 extern const struct filterops tty_filtops
; 
 397 const static struct filterops timer_filtops
; 
 401  * Rules for adding new filters to the system: 
 403  * - Add a new "EVFILT_" option value to bsd/sys/event.h (typically a negative value) 
 404  *   in the exported section of the header 
 405  * - Update the EVFILT_SYSCOUNT value to reflect the new addition 
 406  * - Add a filterops to the sysfilt_ops array. Public filters should be added at the end  
 407  *   of the Public Filters section in the array. 
 409  * - Add a new "EVFILT_" value to bsd/sys/event.h (typically a positive value) 
 410  *   in the XNU_KERNEL_PRIVATE section of the header 
 411  * - Update the EVFILTID_MAX value to reflect the new addition 
 412  * - Add a filterops to the sysfilt_ops. Private filters should be added at the end of  
 413  *   the Private filters section of the array.  
 415 SECURITY_READ_ONLY_EARLY(static struct filterops 
*) sysfilt_ops
[EVFILTID_MAX
] = { 
 417         [~EVFILT_READ
]                                  = &file_filtops
, 
 418         [~EVFILT_WRITE
]                                 = &file_filtops
, 
 419         [~EVFILT_AIO
]                                   = &bad_filtops
, 
 420         [~EVFILT_VNODE
]                                 = &file_filtops
, 
 421         [~EVFILT_PROC
]                                  = &proc_filtops
, 
 422         [~EVFILT_SIGNAL
]                                = &sig_filtops
, 
 423         [~EVFILT_TIMER
]                                 = &timer_filtops
, 
 424         [~EVFILT_MACHPORT
]                              = &machport_filtops
, 
 425         [~EVFILT_FS
]                                    = &fs_filtops
, 
 426         [~EVFILT_USER
]                                  = &user_filtops
, 
 429         [~EVFILT_SOCK
]                                  = &file_filtops
, 
 430 #if CONFIG_MEMORYSTATUS 
 431         [~EVFILT_MEMORYSTATUS
]                  = &memorystatus_filtops
, 
 433         [~EVFILT_MEMORYSTATUS
]                  = &bad_filtops
, 
 435         [~EVFILT_EXCEPT
]                                = &file_filtops
, 
 437         [~EVFILT_WORKLOOP
]              = &workloop_filtops
, 
 439         /* Private filters */ 
 440         [EVFILTID_KQREAD
]                               = &kqread_filtops
, 
 441         [EVFILTID_PIPE_R
]                               = &pipe_rfiltops
, 
 442         [EVFILTID_PIPE_W
]                               = &pipe_wfiltops
, 
 443         [EVFILTID_PTSD
]                                 = &ptsd_kqops
, 
 444         [EVFILTID_SOREAD
]                               = &soread_filtops
, 
 445         [EVFILTID_SOWRITE
]                              = &sowrite_filtops
, 
 446         [EVFILTID_SCK
]                                  = &sock_filtops
, 
 447         [EVFILTID_SOEXCEPT
]                     = &soexcept_filtops
, 
 448         [EVFILTID_SPEC
]                                 = &spec_filtops
, 
 449         [EVFILTID_BPFREAD
]                              = &bpfread_filtops
, 
 450         [EVFILTID_NECP_FD
]                              = &necp_fd_rfiltops
, 
 451         [EVFILTID_FSEVENT
]                              = &fsevent_filtops
, 
 452         [EVFILTID_VN
]                                   = &vnode_filtops
, 
 453         [EVFILTID_TTY
]                                  = &tty_filtops
, 
 454         [EVFILTID_PTMX
]                                 = &ptmx_kqops
, 
 457 /* waitq prepost callback */ 
 458 void waitq_set__CALLING_PREPOST_HOOK__(void *kq_hook
, void *knote_hook
, int qos
); 
 460 #ifndef _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG 
 461 #define _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG 0x02000000 /* pthread event manager bit */ 
 463 #ifndef _PTHREAD_PRIORITY_OVERCOMMIT_FLAG 
 464 #define _PTHREAD_PRIORITY_OVERCOMMIT_FLAG    0x80000000 /* request overcommit threads */ 
 466 #ifndef _PTHREAD_PRIORITY_QOS_CLASS_MASK 
 467 #define _PTHREAD_PRIORITY_QOS_CLASS_MASK    0x003fff00  /* QoS class mask */ 
 469 #ifndef _PTHREAD_PRIORITY_QOS_CLASS_SHIFT_32 
 470 #define _PTHREAD_PRIORITY_QOS_CLASS_SHIFT_32 8 
 473 static inline __kdebug_only
 
 475 kqr_thread_id(struct kqrequest 
*kqr
) 
 477         return (uintptr_t)thread_tid(kqr
->kqr_thread
); 
 481 boolean_t 
is_workqueue_thread(thread_t thread
) 
 483         return (thread_get_tag(thread
) & THREAD_TAG_WORKQUEUE
); 
 487 void knote_canonicalize_kevent_qos(struct knote 
*kn
) 
 489         struct kqueue 
*kq 
= knote_get_kq(kn
); 
 490         unsigned long canonical
; 
 492         if ((kq
->kq_state 
& (KQ_WORKQ 
| KQ_WORKLOOP
)) == 0) 
 495         /* preserve manager and overcommit flags in this case */ 
 496         canonical 
= pthread_priority_canonicalize(kn
->kn_qos
, FALSE
); 
 497         kn
->kn_qos 
= (qos_t
)canonical
; 
 501 kq_index_t 
qos_index_from_qos(struct knote 
*kn
, qos_t qos
, boolean_t propagation
) 
 503         struct kqueue 
*kq 
= knote_get_kq(kn
); 
 504         kq_index_t qos_index
; 
 505         unsigned long flags 
= 0; 
 507         if ((kq
->kq_state 
& (KQ_WORKQ 
| KQ_WORKLOOP
)) == 0) 
 508                 return QOS_INDEX_KQFILE
; 
 510         qos_index 
= (kq_index_t
)thread_qos_from_pthread_priority( 
 511                                 (unsigned long)qos
, &flags
); 
 513         if (kq
->kq_state 
& KQ_WORKQ
) { 
 514                 /* workq kqueues support requesting a manager thread (non-propagation) */ 
 515                 if (!propagation 
&& (flags 
& _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
)) 
 516                         return KQWQ_QOS_MANAGER
; 
 523 qos_t 
qos_from_qos_index(kq_index_t qos_index
) 
 525         /* should only happen for KQ_WORKQ */ 
 526         if (qos_index 
== KQWQ_QOS_MANAGER
)  
 527                 return  _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
; 
 530                 return THREAD_QOS_UNSPECIFIED
; 
 532         /* Should have support from pthread kext support */ 
 533         return (1 << (qos_index 
- 1 +  
 534                       _PTHREAD_PRIORITY_QOS_CLASS_SHIFT_32
)); 
 537 /* kqr lock must be held */ 
 539 unsigned long pthread_priority_for_kqrequest( 
 540         struct kqrequest 
*kqr
, 
 541         kq_index_t qos_index
) 
 543         unsigned long priority 
= qos_from_qos_index(qos_index
); 
 544         if (kqr
->kqr_state 
& KQR_THOVERCOMMIT
) { 
 545                 priority 
|= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
; 
 551 kq_index_t 
qos_index_for_servicer(int qos_class
, thread_t thread
, int flags
) 
 553 #pragma unused(thread) 
 554         kq_index_t qos_index
; 
 556         if (flags 
& KEVENT_FLAG_WORKQ_MANAGER
) 
 557                 return KQWQ_QOS_MANAGER
; 
 559         qos_index 
= (kq_index_t
)qos_class
; 
 560         assert(qos_index 
> 0 && qos_index 
< KQWQ_QOS_MANAGER
); 
 566  * kqueue/note lock implementations 
 568  *      The kqueue lock guards the kq state, the state of its queues, 
 569  *      and the kqueue-aware status and use counts of individual knotes. 
 571  *      The kqueue workq lock is used to protect state guarding the 
 572  *      interaction of the kqueue with the workq.  This state cannot 
 573  *      be guarded by the kq lock - as it needs to be taken when we 
 574  *      already have the waitq set lock held (during the waitq hook 
 575  *      callback).  It might be better to use the waitq lock itself 
 576  *      for this, but the IRQ requirements make that difficult). 
 578  *      Knote flags, filter flags, and associated data are protected 
 579  *      by the underlying object lock - and are only ever looked at 
 580  *      by calling the filter to get a [consistent] snapshot of that 
 583 lck_grp_attr_t 
* kq_lck_grp_attr
; 
 584 lck_grp_t 
* kq_lck_grp
; 
 585 lck_attr_t 
* kq_lck_attr
; 
 588 kqlock(struct kqueue 
*kq
) 
 590         lck_spin_lock(&kq
->kq_lock
); 
 594 kqlock_held(__assert_only 
struct kqueue 
*kq
) 
 596         LCK_SPIN_ASSERT(&kq
->kq_lock
, LCK_ASSERT_OWNED
); 
 600 kqunlock(struct kqueue 
*kq
) 
 602         lck_spin_unlock(&kq
->kq_lock
); 
 606 knhash_lock(proc_t p
) 
 608         lck_mtx_lock(&p
->p_fd
->fd_knhashlock
); 
 612 knhash_unlock(proc_t p
) 
 614         lck_mtx_unlock(&p
->p_fd
->fd_knhashlock
); 
 619  * Convert a kq lock to a knote use referece. 
 621  *      If the knote is being dropped, or has 
 622  *  vanished, we can't get a use reference. 
 623  *  Just return with it still locked. 
 625  *      - kq locked at entry 
 626  *      - unlock on exit if we get the use reference 
 629 kqlock2knoteuse(struct kqueue 
*kq
, struct knote 
*kn
, int flags
) 
 631         if (kn
->kn_status 
& (KN_DROPPING 
| KN_VANISHED
)) 
 634         assert(kn
->kn_status 
& KN_ATTACHED
); 
 636         if (flags 
& KNUSE_BOOST
) { 
 637                 set_thread_rwlock_boost(); 
 644  *      - kq locked at entry 
 645  *      - kq unlocked at exit 
 649 knoteusewait(struct kqueue 
*kq
, struct knote 
*kn
) 
 651         kn
->kn_status 
|= KN_USEWAIT
; 
 652         waitq_assert_wait64((struct waitq 
*)&kq
->kq_wqs
, 
 653                         CAST_EVENT64_T(&kn
->kn_status
), 
 654                         THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
); 
 656         return thread_block(THREAD_CONTINUE_NULL
); 
 660 knoteuse_needs_boost(struct knote 
*kn
, struct kevent_internal_s 
*kev
) 
 662         if (knote_fops(kn
)->f_needs_boost
) { 
 663                 return knote_fops(kn
)->f_needs_boost(kev
); 
 669  * Convert from a knote use reference back to kq lock. 
 671  *      Drop a use reference and wake any waiters if 
 672  *      this is the last one. 
 674  *  If someone is trying to drop the knote, but the 
 675  *  caller has events they must deliver, take 
 676  *  responsibility for the drop later - and wake the 
 677  *  other attempted dropper in a manner that informs 
 678  *  him of the transfer of responsibility. 
 680  *      The exit return indicates if the knote is still alive 
 681  *  (or if not, the other dropper has been given the green 
 684  *  The kqueue lock is re-taken unconditionally. 
 687 knoteuse2kqlock(struct kqueue 
*kq
, struct knote 
*kn
, int flags
) 
 690         int steal_drop 
= (flags 
& KNUSE_STEAL_DROP
); 
 693         if (flags 
& KNUSE_BOOST
) { 
 694                 clear_thread_rwlock_boost(); 
 697         if (--kn
->kn_inuse 
== 0) { 
 699                 if ((kn
->kn_status 
& KN_ATTACHING
) != 0) { 
 700                         kn
->kn_status 
&= ~KN_ATTACHING
; 
 703                 if ((kn
->kn_status 
& KN_USEWAIT
) != 0) { 
 704                         wait_result_t result
; 
 706                         /* If we need to, try and steal the drop */ 
 707                         if (kn
->kn_status 
& KN_DROPPING
) { 
 708                                 if (steal_drop 
&& !(kn
->kn_status 
& KN_STOLENDROP
)) { 
 709                                         kn
->kn_status 
|= KN_STOLENDROP
; 
 715                         /* wakeup indicating if ANY USE stole the drop */ 
 716                         result 
= (kn
->kn_status 
& KN_STOLENDROP
) ? 
 717                                  THREAD_RESTART 
: THREAD_AWAKENED
; 
 719                         kn
->kn_status 
&= ~KN_USEWAIT
; 
 720                         waitq_wakeup64_all((struct waitq 
*)&kq
->kq_wqs
, 
 721                                            CAST_EVENT64_T(&kn
->kn_status
), 
 723                                            WAITQ_ALL_PRIORITIES
); 
 725                         /* should have seen use-wait if dropping with use refs */ 
 726                         assert((kn
->kn_status 
& (KN_DROPPING
|KN_STOLENDROP
)) == 0); 
 729         } else if (kn
->kn_status 
& KN_DROPPING
) { 
 730                 /* not the last ref but want to steal a drop if present */ 
 731                 if (steal_drop 
&& ((kn
->kn_status 
& KN_STOLENDROP
) == 0)) { 
 732                         kn
->kn_status 
|= KN_STOLENDROP
; 
 734                         /* but we now have to wait to be the last ref */ 
 735                         knoteusewait(kq
, kn
); 
 746  * Convert a kq lock to a knote use reference 
 747  * (for the purpose of detaching AND vanishing it). 
 749  *      If the knote is being dropped, we can't get 
 750  *      a detach reference, so wait for the knote to 
 751  *  finish dropping before returning. 
 753  *  If the knote is being used for other purposes, 
 754  *  we cannot detach it until those uses are done 
 755  *  as well. Again, just wait for them to finish 
 756  *  (caller will start over at lookup). 
 758  *      - kq locked at entry 
 762 kqlock2knotedetach(struct kqueue 
*kq
, struct knote 
*kn
, int flags
) 
 764         if ((kn
->kn_status 
& KN_DROPPING
) || kn
->kn_inuse
) { 
 765                 /* have to wait for dropper or current uses to go away */ 
 766                 knoteusewait(kq
, kn
); 
 769         assert((kn
->kn_status 
& KN_VANISHED
) == 0); 
 770         assert(kn
->kn_status 
& KN_ATTACHED
); 
 771         kn
->kn_status 
&= ~KN_ATTACHED
; 
 772         kn
->kn_status 
|= KN_VANISHED
; 
 773         if (flags 
& KNUSE_BOOST
) { 
 774                 clear_thread_rwlock_boost(); 
 782  * Convert a kq lock to a knote drop reference. 
 784  *      If the knote is in use, wait for the use count 
 785  *      to subside.  We first mark our intention to drop 
 786  *      it - keeping other users from "piling on." 
 787  *      If we are too late, we have to wait for the 
 788  *      other drop to complete. 
 790  *      - kq locked at entry 
 791  *      - always unlocked on exit. 
 792  *      - caller can't hold any locks that would prevent 
 793  *        the other dropper from completing. 
 796 kqlock2knotedrop(struct kqueue 
*kq
, struct knote 
*kn
) 
 799         wait_result_t result
; 
 801         oktodrop 
= ((kn
->kn_status 
& (KN_DROPPING 
| KN_ATTACHING
)) == 0); 
 802         /* if another thread is attaching, they will become the dropping thread */ 
 803         kn
->kn_status 
|= KN_DROPPING
; 
 804         knote_unsuppress(kn
); 
 807                 if (kn
->kn_inuse 
== 0) { 
 812         result 
= knoteusewait(kq
, kn
); 
 813         /* THREAD_RESTART == another thread stole the knote drop */ 
 814         return (result 
== THREAD_AWAKENED
); 
 819  * Release a knote use count reference. 
 822 knote_put(struct knote 
*kn
) 
 824         struct kqueue 
*kq 
= knote_get_kq(kn
); 
 827         if (--kn
->kn_inuse 
== 0) { 
 828                 if ((kn
->kn_status 
& KN_USEWAIT
) != 0) { 
 829                         kn
->kn_status 
&= ~KN_USEWAIT
; 
 830                         waitq_wakeup64_all((struct waitq 
*)&kq
->kq_wqs
, 
 831                                            CAST_EVENT64_T(&kn
->kn_status
), 
 833                                            WAITQ_ALL_PRIORITIES
); 
 841 filt_fileattach(struct knote 
*kn
, struct kevent_internal_s 
*kev
) 
 843         return (fo_kqfilter(kn
->kn_fp
, kn
, kev
, vfs_context_current())); 
 846 #define f_flag f_fglob->fg_flag 
 847 #define f_msgcount f_fglob->fg_msgcount 
 848 #define f_cred f_fglob->fg_cred 
 849 #define f_ops f_fglob->fg_ops 
 850 #define f_offset f_fglob->fg_offset 
 851 #define f_data f_fglob->fg_data 
 854 filt_kqdetach(struct knote 
*kn
) 
 856         struct kqfile 
*kqf 
= (struct kqfile 
*)kn
->kn_fp
->f_data
; 
 857         struct kqueue 
*kq 
= &kqf
->kqf_kqueue
; 
 860         KNOTE_DETACH(&kqf
->kqf_sel
.si_note
, kn
); 
 866 filt_kqueue(struct knote 
*kn
, __unused 
long hint
) 
 868         struct kqueue 
*kq 
= (struct kqueue 
*)kn
->kn_fp
->f_data
; 
 871         count 
= kq
->kq_count
; 
 876 filt_kqtouch(struct knote 
*kn
, struct kevent_internal_s 
*kev
) 
 879         struct kqueue 
*kq 
= (struct kqueue 
*)kn
->kn_fp
->f_data
; 
 883         kn
->kn_data 
= kq
->kq_count
; 
 884         if ((kn
->kn_status 
& KN_UDATA_SPECIFIC
) == 0) 
 885                 kn
->kn_udata 
= kev
->udata
; 
 886         res 
= (kn
->kn_data 
> 0); 
 894 filt_kqprocess(struct knote 
*kn
, struct filt_process_s 
*data
, struct kevent_internal_s 
*kev
) 
 897         struct kqueue 
*kq 
= (struct kqueue 
*)kn
->kn_fp
->f_data
; 
 901         kn
->kn_data 
= kq
->kq_count
; 
 902         res 
= (kn
->kn_data 
> 0); 
 904                 *kev 
= kn
->kn_kevent
; 
 905                 if (kn
->kn_flags 
& EV_CLEAR
) 
 913 #pragma mark EVFILT_PROC 
 916 filt_procattach(struct knote 
*kn
, __unused 
struct kevent_internal_s 
*kev
) 
 920         assert(PID_MAX 
< NOTE_PDATAMASK
); 
 922         if ((kn
->kn_sfflags 
& (NOTE_TRACK 
| NOTE_TRACKERR 
| NOTE_CHILD
)) != 0) { 
 923                 kn
->kn_flags 
= EV_ERROR
; 
 924                 kn
->kn_data 
= ENOTSUP
; 
 928         p 
= proc_find(kn
->kn_id
); 
 930                 kn
->kn_flags 
= EV_ERROR
; 
 935         const int NoteExitStatusBits 
= NOTE_EXIT 
| NOTE_EXITSTATUS
; 
 937         if ((kn
->kn_sfflags 
& NoteExitStatusBits
) == NoteExitStatusBits
) 
 939                         pid_t selfpid 
= proc_selfpid(); 
 941                         if (p
->p_ppid 
== selfpid
) 
 942                                 break;  /* parent => ok */ 
 944                         if ((p
->p_lflag 
& P_LTRACED
) != 0 && 
 945                             (p
->p_oppid 
== selfpid
)) 
 946                                 break;  /* parent-in-waiting => ok */ 
 949                         kn
->kn_flags 
= EV_ERROR
; 
 950                         kn
->kn_data 
= EACCES
; 
 956         kn
->kn_ptr
.p_proc 
= p
;          /* store the proc handle */ 
 958         KNOTE_ATTACH(&p
->p_klist
, kn
); 
 965          * only captures edge-triggered events after this point 
 966          * so it can't already be fired. 
 973  * The knote may be attached to a different process, which may exit, 
 974  * leaving nothing for the knote to be attached to.  In that case, 
 975  * the pointer to the process will have already been nulled out. 
 978 filt_procdetach(struct knote 
*kn
) 
 984         p 
= kn
->kn_ptr
.p_proc
; 
 985         if (p 
!= PROC_NULL
) { 
 986                 kn
->kn_ptr
.p_proc 
= PROC_NULL
; 
 987                 KNOTE_DETACH(&p
->p_klist
, kn
); 
 994 filt_proc(struct knote 
*kn
, long hint
) 
 998         /* ALWAYS CALLED WITH proc_klist_lock */ 
1001          * Note: a lot of bits in hint may be obtained from the knote 
1002          * To free some of those bits, see <rdar://problem/12592988> Freeing up 
1003          * bits in hint for filt_proc 
1005          * mask off extra data 
1007         event 
= (u_int
)hint 
& NOTE_PCTRLMASK
; 
1010          * termination lifecycle events can happen while a debugger 
1011          * has reparented a process, in which case notifications 
1012          * should be quashed except to the tracing parent. When 
1013          * the debugger reaps the child (either via wait4(2) or 
1014          * process exit), the child will be reparented to the original 
1015          * parent and these knotes re-fired. 
1017         if (event 
& NOTE_EXIT
) { 
1018                 if ((kn
->kn_ptr
.p_proc
->p_oppid 
!= 0) 
1019                     && (knote_get_kq(kn
)->kq_p
->p_pid 
!= kn
->kn_ptr
.p_proc
->p_ppid
)) { 
1021                          * This knote is not for the current ptrace(2) parent, ignore. 
1028          * if the user is interested in this event, record it. 
1030         if (kn
->kn_sfflags 
& event
) 
1031                 kn
->kn_fflags 
|= event
; 
1033 #pragma clang diagnostic push 
1034 #pragma clang diagnostic ignored "-Wdeprecated-declarations" 
1035         if ((event 
== NOTE_REAP
) || ((event 
== NOTE_EXIT
) && !(kn
->kn_sfflags 
& NOTE_REAP
))) { 
1036                 kn
->kn_flags 
|= (EV_EOF 
| EV_ONESHOT
); 
1038 #pragma clang diagnostic pop 
1042          * The kernel has a wrapper in place that returns the same data 
1043          * as is collected here, in kn_data.  Any changes to how  
1044          * NOTE_EXITSTATUS and NOTE_EXIT_DETAIL are collected 
1045          * should also be reflected in the proc_pidnoteexit() wrapper. 
1047         if (event 
== NOTE_EXIT
) { 
1049                 if ((kn
->kn_sfflags 
& NOTE_EXITSTATUS
) != 0) { 
1050                         kn
->kn_fflags 
|= NOTE_EXITSTATUS
; 
1051                         kn
->kn_data 
|= (hint 
& NOTE_PDATAMASK
); 
1053                 if ((kn
->kn_sfflags 
& NOTE_EXIT_DETAIL
) != 0) { 
1054                         kn
->kn_fflags 
|= NOTE_EXIT_DETAIL
; 
1055                         if ((kn
->kn_ptr
.p_proc
->p_lflag 
& 
1056                              P_LTERM_DECRYPTFAIL
) != 0) { 
1057                                 kn
->kn_data 
|= NOTE_EXIT_DECRYPTFAIL
;  
1059                         if ((kn
->kn_ptr
.p_proc
->p_lflag 
& 
1060                              P_LTERM_JETSAM
) != 0) { 
1061                                 kn
->kn_data 
|= NOTE_EXIT_MEMORY
; 
1062                                 switch (kn
->kn_ptr
.p_proc
->p_lflag 
& P_JETSAM_MASK
) { 
1063                                 case P_JETSAM_VMPAGESHORTAGE
: 
1064                                         kn
->kn_data 
|= NOTE_EXIT_MEMORY_VMPAGESHORTAGE
; 
1066                                 case P_JETSAM_VMTHRASHING
: 
1067                                         kn
->kn_data 
|= NOTE_EXIT_MEMORY_VMTHRASHING
; 
1069                                 case P_JETSAM_FCTHRASHING
: 
1070                                         kn
->kn_data 
|= NOTE_EXIT_MEMORY_FCTHRASHING
; 
1072                                 case P_JETSAM_VNODE
: 
1073                                         kn
->kn_data 
|= NOTE_EXIT_MEMORY_VNODE
; 
1075                                 case P_JETSAM_HIWAT
: 
1076                                         kn
->kn_data 
|= NOTE_EXIT_MEMORY_HIWAT
; 
1079                                         kn
->kn_data 
|= NOTE_EXIT_MEMORY_PID
; 
1081                                 case P_JETSAM_IDLEEXIT
: 
1082                                         kn
->kn_data 
|= NOTE_EXIT_MEMORY_IDLE
; 
1086                         if ((kn
->kn_ptr
.p_proc
->p_csflags 
& 
1088                                 kn
->kn_data 
|= NOTE_EXIT_CSERROR
; 
1093         /* if we have any matching state, activate the knote */ 
1094         return (kn
->kn_fflags 
!= 0); 
1098 filt_proctouch(struct knote 
*kn
, struct kevent_internal_s 
*kev
) 
1104         /* accept new filter flags and mask off output events no long interesting */ 
1105         kn
->kn_sfflags 
= kev
->fflags
; 
1106         if ((kn
->kn_status 
& KN_UDATA_SPECIFIC
) == 0) 
1107                 kn
->kn_udata 
= kev
->udata
; 
1109         /* restrict the current results to the (smaller?) set of new interest */ 
1111          * For compatibility with previous implementations, we leave kn_fflags 
1112          * as they were before. 
1114         //kn->kn_fflags &= kn->kn_sfflags; 
1116         res 
= (kn
->kn_fflags 
!= 0); 
1118         proc_klist_unlock(); 
1124 filt_procprocess(struct knote 
*kn
, struct filt_process_s 
*data
, struct kevent_internal_s 
*kev
) 
1126 #pragma unused(data) 
1130         res 
= (kn
->kn_fflags 
!= 0); 
1132                 *kev 
= kn
->kn_kevent
; 
1133                 kn
->kn_flags 
|= EV_CLEAR
;       /* automatically set */ 
1137         proc_klist_unlock(); 
1142 #pragma mark EVFILT_TIMER 
1146  * Values stored in the knote at rest (using Mach absolute time units) 
1148  * kn->kn_hook          where the thread_call object is stored 
1149  * kn->kn_ext[0]        next deadline or 0 if immediate expiration 
1150  * kn->kn_ext[1]        leeway value 
1151  * kn->kn_sdata         interval timer: the interval 
1152  *                      absolute/deadline timer: 0 
1153  * kn->kn_data          fire count 
1156 static lck_mtx_t _filt_timerlock
; 
1158 static void filt_timerlock(void)   { lck_mtx_lock(&_filt_timerlock
);   } 
1159 static void filt_timerunlock(void) { lck_mtx_unlock(&_filt_timerlock
); } 
1161 static inline void filt_timer_assert_locked(void) 
1163         LCK_MTX_ASSERT(&_filt_timerlock
, LCK_MTX_ASSERT_OWNED
); 
1166 /* state flags stored in kn_hookid */ 
1167 #define TIMER_RUNNING           0x1 
1168 #define TIMER_CANCELWAIT        0x2 
1171  * filt_timervalidate - process data from user 
1173  * Sets up the deadline, interval, and leeway from the provided user data 
1176  *      kn_sdata        timer deadline or interval time 
1177  *      kn_sfflags      style of timer, unit of measurement 
1180  *      kn_sdata        either interval in abstime or 0 if non-repeating timer 
1181  *      ext[0]          fire deadline in abs/cont time 
1182  *                      (or 0 if NOTE_ABSOLUTE and deadline is in past) 
1185  *      EINVAL          Invalid user data parameters 
1187  * Called with timer filter lock held. 
1190 filt_timervalidate(struct knote 
*kn
) 
1193          * There are 4 knobs that need to be chosen for a timer registration: 
1195          * A) Units of time (what is the time duration of the specified number) 
1196          *      Absolute and interval take: 
1197          *              NOTE_SECONDS, NOTE_USECONDS, NOTE_NSECONDS, NOTE_MACHTIME 
1198          *      Defaults to milliseconds if not specified 
1200          * B) Clock epoch (what is the zero point of the specified number) 
1201          *      For interval, there is none 
1202          *      For absolute, defaults to the gettimeofday/calendar epoch 
1203          *      With NOTE_MACHTIME, uses mach_absolute_time() 
1204          *      With NOTE_MACHTIME and NOTE_MACH_CONTINUOUS_TIME, uses mach_continuous_time() 
1206          * C) The knote's behavior on delivery 
1207          *      Interval timer causes the knote to arm for the next interval unless one-shot is set 
1208          *      Absolute is a forced one-shot timer which deletes on delivery 
1209          *      TODO: Add a way for absolute to be not forced one-shot 
1211          * D) Whether the time duration is relative to now or absolute 
1212          *      Interval fires at now + duration when it is set up 
1213          *      Absolute fires at now + difference between now walltime and passed in walltime 
1214          *      With NOTE_MACHTIME it fires at an absolute MAT or MCT. 
1216          * E) Whether the timer continues to tick across sleep 
1217          *      By default all three do not. 
1218          *      For interval and absolute, NOTE_MACH_CONTINUOUS_TIME causes them to tick across sleep 
1219          *      With NOTE_ABSOLUTE | NOTE_MACHTIME | NOTE_MACH_CONTINUOUS_TIME: 
1220          *              expires when mach_continuous_time() is > the passed in value. 
1223         filt_timer_assert_locked(); 
1225         uint64_t multiplier
; 
1227         boolean_t use_abstime 
= FALSE
; 
1229         switch (kn
->kn_sfflags 
& (NOTE_SECONDS
|NOTE_USECONDS
|NOTE_NSECONDS
|NOTE_MACHTIME
)) { 
1231                 multiplier 
= NSEC_PER_SEC
; 
1234                 multiplier 
= NSEC_PER_USEC
; 
1243         case 0: /* milliseconds (default) */ 
1244                 multiplier 
= NSEC_PER_SEC 
/ 1000; 
1250         /* transform the leeway in kn_ext[1] to same time scale */ 
1251         if (kn
->kn_sfflags 
& NOTE_LEEWAY
) { 
1252                 uint64_t leeway_abs
; 
1255                         leeway_abs 
= (uint64_t)kn
->kn_ext
[1]; 
1258                         if (os_mul_overflow((uint64_t)kn
->kn_ext
[1], multiplier
, &leeway_ns
)) 
1261                         nanoseconds_to_absolutetime(leeway_ns
, &leeway_abs
); 
1264                 kn
->kn_ext
[1] = leeway_abs
; 
1267         if (kn
->kn_sfflags 
& NOTE_ABSOLUTE
) { 
1268                 uint64_t deadline_abs
; 
1271                         deadline_abs 
= (uint64_t)kn
->kn_sdata
; 
1273                         uint64_t calendar_deadline_ns
; 
1275                         if (os_mul_overflow((uint64_t)kn
->kn_sdata
, multiplier
, &calendar_deadline_ns
)) 
1278                         /* calendar_deadline_ns is in nanoseconds since the epoch */ 
1280                         clock_sec_t seconds
; 
1281                         clock_nsec_t nanoseconds
; 
1284                          * Note that the conversion through wall-time is only done once. 
1286                          * If the relationship between MAT and gettimeofday changes, 
1287                          * the underlying timer does not update. 
1289                          * TODO: build a wall-time denominated timer_call queue 
1290                          * and a flag to request DTRTing with wall-time timers 
1292                         clock_get_calendar_nanotime(&seconds
, &nanoseconds
); 
1294                         uint64_t calendar_now_ns 
= (uint64_t)seconds 
* NSEC_PER_SEC 
+ nanoseconds
; 
1296                         /* if deadline is in the future */ 
1297                         if (calendar_now_ns 
< calendar_deadline_ns
) { 
1298                                 uint64_t interval_ns 
= calendar_deadline_ns 
- calendar_now_ns
; 
1299                                 uint64_t interval_abs
; 
1301                                 nanoseconds_to_absolutetime(interval_ns
, &interval_abs
); 
1304                                  * Note that the NOTE_MACH_CONTINUOUS_TIME flag here only 
1305                                  * causes the timer to keep ticking across sleep, but 
1306                                  * it does not change the calendar timebase. 
1309                                 if (kn
->kn_sfflags 
& NOTE_MACH_CONTINUOUS_TIME
) 
1310                                         clock_continuoustime_interval_to_deadline(interval_abs
, 
1313                                         clock_absolutetime_interval_to_deadline(interval_abs
, 
1316                                 deadline_abs 
= 0; /* cause immediate expiration */ 
1320                 kn
->kn_ext
[0] = deadline_abs
; 
1321                 kn
->kn_sdata  
= 0;       /* NOTE_ABSOLUTE is non-repeating */ 
1322         } else if (kn
->kn_sdata 
< 0) { 
1324                  * Negative interval timers fire immediately, once. 
1326                  * Ideally a negative interval would be an error, but certain clients 
1327                  * pass negative values on accident, and expect an event back. 
1329                  * In the old implementation the timer would repeat with no delay 
1330                  * N times until mach_absolute_time() + (N * interval) underflowed, 
1331                  * then it would wait ~forever by accidentally arming a timer for the far future. 
1333                  * We now skip the power-wasting hot spin phase and go straight to the idle phase. 
1336                 kn
->kn_sdata  
= 0;      /* non-repeating */ 
1337                 kn
->kn_ext
[0] = 0;      /* expire immediately */ 
1339                 uint64_t interval_abs 
= 0; 
1342                         interval_abs 
= (uint64_t)kn
->kn_sdata
; 
1344                         uint64_t interval_ns
; 
1345                         if (os_mul_overflow((uint64_t)kn
->kn_sdata
, multiplier
, &interval_ns
)) 
1348                         nanoseconds_to_absolutetime(interval_ns
, &interval_abs
); 
1351                 uint64_t deadline 
= 0; 
1353                 if (kn
->kn_sfflags 
& NOTE_MACH_CONTINUOUS_TIME
) 
1354                         clock_continuoustime_interval_to_deadline(interval_abs
, &deadline
); 
1356                         clock_absolutetime_interval_to_deadline(interval_abs
, &deadline
); 
1358                 kn
->kn_sdata  
= interval_abs
;   /* default to a repeating timer */ 
1359                 kn
->kn_ext
[0] = deadline
; 
1369  * filt_timerexpire - the timer callout routine 
1371  * Just propagate the timer event into the knote 
1372  * filter routine (by going through the knote 
1373  * synchronization point).  Pass a hint to 
1374  * indicate this is a real event, not just a 
1378 filt_timerexpire(void *knx
, __unused 
void *spare
) 
1380         struct klist timer_list
; 
1381         struct knote 
*kn 
= knx
; 
1385         kn
->kn_hookid 
&= ~TIMER_RUNNING
; 
1387         /* no "object" for timers, so fake a list */ 
1388         SLIST_INIT(&timer_list
); 
1389         SLIST_INSERT_HEAD(&timer_list
, kn
, kn_selnext
); 
1391         KNOTE(&timer_list
, 1); 
1393         /* if someone is waiting for timer to pop */ 
1394         if (kn
->kn_hookid 
& TIMER_CANCELWAIT
) { 
1395                 struct kqueue 
*kq 
= knote_get_kq(kn
); 
1396                 waitq_wakeup64_all((struct waitq 
*)&kq
->kq_wqs
, 
1397                                    CAST_EVENT64_T(&kn
->kn_hook
), 
1399                                    WAITQ_ALL_PRIORITIES
); 
1401                 kn
->kn_hookid 
&= ~TIMER_CANCELWAIT
; 
1408  * Cancel a running timer (or wait for the pop). 
1409  * Timer filter lock is held. 
1410  * May drop and retake the timer filter lock. 
1413 filt_timercancel(struct knote 
*kn
) 
1415         filt_timer_assert_locked(); 
1417         assert((kn
->kn_hookid 
& TIMER_CANCELWAIT
) == 0); 
1419         /* if no timer, then we're good */ 
1420         if ((kn
->kn_hookid 
& TIMER_RUNNING
) == 0) 
1423         thread_call_t callout 
= (thread_call_t
)kn
->kn_hook
; 
1425         /* cancel the callout if we can */ 
1426         if (thread_call_cancel(callout
)) { 
1427                 kn
->kn_hookid 
&= ~TIMER_RUNNING
; 
1431         /* cancel failed, we have to wait for the in-flight expire routine */ 
1433         kn
->kn_hookid 
|= TIMER_CANCELWAIT
; 
1435         struct kqueue 
*kq 
= knote_get_kq(kn
); 
1437         waitq_assert_wait64((struct waitq 
*)&kq
->kq_wqs
, 
1438                             CAST_EVENT64_T(&kn
->kn_hook
), 
1439                             THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
); 
1442         thread_block(THREAD_CONTINUE_NULL
); 
1445         assert((kn
->kn_hookid 
& TIMER_CANCELWAIT
) == 0); 
1446         assert((kn
->kn_hookid 
& TIMER_RUNNING
) == 0); 
1450 filt_timerarm(struct knote 
*kn
) 
1452         filt_timer_assert_locked(); 
1454         assert((kn
->kn_hookid 
& TIMER_RUNNING
) == 0); 
1456         thread_call_t callout 
= (thread_call_t
)kn
->kn_hook
; 
1458         uint64_t deadline 
= kn
->kn_ext
[0]; 
1459         uint64_t leeway   
= kn
->kn_ext
[1]; 
1461         int filter_flags 
= kn
->kn_sfflags
; 
1462         unsigned int timer_flags 
= 0; 
1464         if (filter_flags 
& NOTE_CRITICAL
) 
1465                 timer_flags 
|= THREAD_CALL_DELAY_USER_CRITICAL
; 
1466         else if (filter_flags 
& NOTE_BACKGROUND
) 
1467                 timer_flags 
|= THREAD_CALL_DELAY_USER_BACKGROUND
; 
1469                 timer_flags 
|= THREAD_CALL_DELAY_USER_NORMAL
; 
1471         if (filter_flags 
& NOTE_LEEWAY
) 
1472                 timer_flags 
|= THREAD_CALL_DELAY_LEEWAY
; 
1474         if (filter_flags 
& NOTE_MACH_CONTINUOUS_TIME
) 
1475                 timer_flags 
|= THREAD_CALL_CONTINUOUS
; 
1477         thread_call_enter_delayed_with_leeway(callout
, NULL
, 
1481         kn
->kn_hookid 
|= TIMER_RUNNING
; 
1485  * Does this knote need a timer armed for it, or should it be ready immediately? 
1488 filt_timer_is_ready(struct knote 
*kn
) 
1492         if (kn
->kn_sfflags 
& NOTE_MACH_CONTINUOUS_TIME
) 
1493                 now 
= mach_continuous_time(); 
1495                 now 
= mach_absolute_time(); 
1497         uint64_t deadline 
= kn
->kn_ext
[0]; 
1506  * Allocate a thread call for the knote's lifetime, and kick off the timer. 
1509 filt_timerattach(struct knote 
*kn
, __unused 
struct kevent_internal_s 
*kev
) 
1511         thread_call_t callout
; 
1514         callout 
= thread_call_allocate_with_options(filt_timerexpire
, 
1515                         (thread_call_param_t
)kn
, THREAD_CALL_PRIORITY_HIGH
, 
1516                         THREAD_CALL_OPTIONS_ONCE
); 
1518         if (NULL 
== callout
) { 
1519                 kn
->kn_flags 
= EV_ERROR
; 
1520                 kn
->kn_data 
= ENOMEM
; 
1526         if ((error 
= filt_timervalidate(kn
)) != 0) { 
1527                 kn
->kn_flags 
= EV_ERROR
; 
1528                 kn
->kn_data  
= error
; 
1531                 __assert_only boolean_t freed 
= thread_call_free(callout
); 
1536         kn
->kn_hook 
= (void*)callout
; 
1538         kn
->kn_flags 
|= EV_CLEAR
; 
1540         /* NOTE_ABSOLUTE implies EV_ONESHOT */ 
1541         if (kn
->kn_sfflags 
& NOTE_ABSOLUTE
) 
1542                 kn
->kn_flags 
|= EV_ONESHOT
; 
1544         boolean_t timer_ready 
= FALSE
; 
1546         if ((timer_ready 
= filt_timer_is_ready(kn
))) { 
1547                 /* cause immediate expiration */ 
1559  * Shut down the timer if it's running, and free the callout. 
1562 filt_timerdetach(struct knote 
*kn
) 
1564         thread_call_t callout
; 
1568         callout 
= (thread_call_t
)kn
->kn_hook
; 
1569         filt_timercancel(kn
); 
1573         __assert_only boolean_t freed 
= thread_call_free(callout
); 
1578  * filt_timerevent - post events to a timer knote 
1580  * Called in the context of filt_timerexpire with 
1581  * the filt_timerlock held 
1584 filt_timerevent(struct knote 
*kn
, __unused 
long hint
) 
1586         filt_timer_assert_locked(); 
1593  * filt_timertouch - update timer knote with new user input 
1595  * Cancel and restart the timer based on new user data. When 
1596  * the user picks up a knote, clear the count of how many timer 
1597  * pops have gone off (in kn_data). 
1602         struct kevent_internal_s 
*kev
) 
1609          * cancel current call - drops and retakes lock 
1610          * TODO: not safe against concurrent touches? 
1612         filt_timercancel(kn
); 
1614         /* clear if the timer had previously fired, the user no longer wants to see it */ 
1617         /* capture the new values used to compute deadline */ 
1618         kn
->kn_sdata 
= kev
->data
; 
1619         kn
->kn_sfflags 
= kev
->fflags
; 
1620         kn
->kn_ext
[0] = kev
->ext
[0]; 
1621         kn
->kn_ext
[1] = kev
->ext
[1]; 
1623         if ((kn
->kn_status 
& KN_UDATA_SPECIFIC
) == 0) 
1624                 kn
->kn_udata 
= kev
->udata
; 
1626         /* recalculate deadline */ 
1627         error 
= filt_timervalidate(kn
); 
1629                 /* no way to report error, so mark it in the knote */ 
1630                 kn
->kn_flags 
|= EV_ERROR
; 
1631                 kn
->kn_data 
= error
; 
1636         boolean_t timer_ready 
= FALSE
; 
1638         if ((timer_ready 
= filt_timer_is_ready(kn
))) { 
1639                 /* cause immediate expiration */ 
1651  * filt_timerprocess - query state of knote and snapshot event data 
1653  * Determine if the timer has fired in the past, snapshot the state 
1654  * of the kevent for returning to user-space, and clear pending event 
1655  * counters for the next time. 
1660         __unused 
struct filt_process_s 
*data
, 
1661         struct kevent_internal_s 
*kev
) 
1665         if (kn
->kn_data 
== 0 || (kn
->kn_hookid 
& TIMER_CANCELWAIT
)) { 
1668                  * The timer hasn't yet fired, so there's nothing to deliver 
1670                  * touch is in the middle of canceling the timer, 
1671                  * so don't deliver or re-arm anything 
1673                  * This can happen if a touch resets a timer that had fired 
1674                  * without being processed 
1680         if (kn
->kn_sdata 
!= 0 && ((kn
->kn_flags 
& EV_ERROR
) == 0)) { 
1682                  * This is a 'repeating' timer, so we have to emit 
1683                  * how many intervals expired between the arm 
1686                  * A very strange style of interface, because 
1687                  * this could easily be done in the client... 
1690                 /* The timer better have had expired... */ 
1691                 assert((kn
->kn_hookid 
& TIMER_RUNNING
) == 0); 
1695                 if (kn
->kn_sfflags 
& NOTE_MACH_CONTINUOUS_TIME
) 
1696                         now 
= mach_continuous_time(); 
1698                         now 
= mach_absolute_time(); 
1700                 uint64_t first_deadline 
= kn
->kn_ext
[0]; 
1701                 uint64_t interval_abs   
= kn
->kn_sdata
; 
1702                 uint64_t orig_arm_time  
= first_deadline 
- interval_abs
; 
1704                 assert(now 
> orig_arm_time
); 
1705                 assert(now 
> first_deadline
); 
1707                 uint64_t elapsed 
= now 
- orig_arm_time
; 
1709                 uint64_t num_fired 
= elapsed 
/ interval_abs
; 
1712                  * To reach this code, we must have seen the timer pop 
1713                  * and be in repeating mode, so therefore it must have been 
1714                  * more than 'interval' time since the attach or last 
1717                  * An unsuccessful touch would: 
1722                  * all of which will prevent this code from running. 
1724                 assert(num_fired 
> 0); 
1726                 /* report how many intervals have elapsed to the user */ 
1727                 kn
->kn_data 
= (int64_t) num_fired
; 
1729                 /* We only need to re-arm the timer if it's not about to be destroyed */ 
1730                 if ((kn
->kn_flags 
& EV_ONESHOT
) == 0) { 
1731                         /* fire at the end of the next interval */ 
1732                         uint64_t new_deadline 
= first_deadline 
+ num_fired 
* interval_abs
; 
1734                         assert(new_deadline 
> now
); 
1736                         kn
->kn_ext
[0] = new_deadline
; 
1743          * Copy out the interesting kevent state, 
1744          * but don't leak out the raw time calculations. 
1746          * TODO: potential enhancements - tell the user about: 
1747          *      - deadline to which this timer thought it was expiring 
1748          *      - return kn_sfflags in the fflags field so the client can know 
1749          *        under what flags the timer fired 
1751         *kev 
= kn
->kn_kevent
; 
1753         /* kev->ext[1] = 0;  JMM - shouldn't we hide this too? */ 
1755         /* we have delivered the event, reset the timer pop count */ 
1762 SECURITY_READ_ONLY_EARLY(static struct filterops
) timer_filtops 
= { 
1763         .f_attach   
= filt_timerattach
, 
1764         .f_detach   
= filt_timerdetach
, 
1765         .f_event    
= filt_timerevent
, 
1766         .f_touch    
= filt_timertouch
, 
1767         .f_process  
= filt_timerprocess
, 
1771 #pragma mark EVFILT_USER 
1777         lck_spin_lock(&_filt_userlock
); 
1781 filt_userunlock(void) 
1783         lck_spin_unlock(&_filt_userlock
); 
1787 filt_userattach(struct knote 
*kn
, __unused 
struct kevent_internal_s 
*kev
) 
1789         /* EVFILT_USER knotes are not attached to anything in the kernel */ 
1790         /* Cant discover this knote until after attach - so no lock needed */ 
1792         if (kn
->kn_sfflags 
& NOTE_TRIGGER
) { 
1797         return (kn
->kn_hookid
); 
1801 filt_userdetach(__unused 
struct knote 
*kn
) 
1803         /* EVFILT_USER knotes are not attached to anything in the kernel */ 
1808         __unused 
struct knote 
*kn
, 
1818         struct kevent_internal_s 
*kev
) 
1826         ffctrl 
= kev
->fflags 
& NOTE_FFCTRLMASK
; 
1827         fflags 
= kev
->fflags 
& NOTE_FFLAGSMASK
; 
1832                 kn
->kn_sfflags 
&= fflags
; 
1835                 kn
->kn_sfflags 
|= fflags
; 
1838                 kn
->kn_sfflags 
= fflags
; 
1841         kn
->kn_sdata 
= kev
->data
; 
1843         if ((kn
->kn_status 
& KN_UDATA_SPECIFIC
) == 0) 
1844                 kn
->kn_udata 
= kev
->udata
; 
1846         if (kev
->fflags 
& NOTE_TRIGGER
) { 
1849         active 
= kn
->kn_hookid
; 
1859         __unused 
struct filt_process_s 
*data
, 
1860         struct kevent_internal_s 
*kev
) 
1864         if (kn
->kn_hookid 
== 0) { 
1869         *kev 
= kn
->kn_kevent
; 
1870         kev
->fflags 
= (volatile UInt32
)kn
->kn_sfflags
; 
1871         kev
->data 
= kn
->kn_sdata
; 
1872         if (kn
->kn_flags 
& EV_CLEAR
) { 
1882 #pragma mark EVFILT_WORKLOOP 
1884 #if DEBUG || DEVELOPMENT 
1886  * see src/queue_internal.h in libdispatch 
1888 #define DISPATCH_QUEUE_ENQUEUED 0x1ull 
1892 filt_wllock(struct kqworkloop 
*kqwl
) 
1894         lck_mtx_lock(&kqwl
->kqwl_statelock
); 
1898 filt_wlunlock(struct kqworkloop 
*kqwl
) 
1900         lck_mtx_unlock(&kqwl
->kqwl_statelock
); 
1904 filt_wlheld(__assert_only 
struct kqworkloop 
*kqwl
) 
1906         LCK_MTX_ASSERT(&kqwl
->kqwl_statelock
, LCK_MTX_ASSERT_OWNED
); 
1909 #define WL_OWNER_SUSPENDED    ((thread_t)(~0ull))  /* special owner when suspended */ 
1912 filt_wlowner_is_valid(thread_t owner
) 
1914         return owner 
!= THREAD_NULL 
&& owner 
!= WL_OWNER_SUSPENDED
; 
1918 filt_wlshould_end_ownership(struct kqworkloop 
*kqwl
, 
1919                 struct kevent_internal_s 
*kev
, int error
) 
1921         thread_t owner 
= kqwl
->kqwl_owner
; 
1922         return (error 
== 0 || error 
== ESTALE
) && 
1923                         (kev
->fflags 
& NOTE_WL_END_OWNERSHIP
) && 
1924                         (owner 
== current_thread() || owner 
== WL_OWNER_SUSPENDED
); 
1928 filt_wlshould_update_ownership(struct kevent_internal_s 
*kev
, int error
) 
1930         return error 
== 0 && (kev
->fflags 
& NOTE_WL_DISCOVER_OWNER
) && 
1931                         kev
->ext
[EV_EXTIDX_WL_ADDR
]; 
1935 filt_wlshould_set_async_qos(struct kevent_internal_s 
*kev
, int error
, 
1936                 kq_index_t async_qos
) 
1941         if (async_qos 
!= THREAD_QOS_UNSPECIFIED
) { 
1944         if ((kev
->fflags 
& NOTE_WL_THREAD_REQUEST
) && (kev
->flags 
& EV_DELETE
)) { 
1945                 /* see filt_wlprocess() */ 
1953 filt_wlupdateowner(struct kqworkloop 
*kqwl
, struct kevent_internal_s 
*kev
, 
1954                 int error
, kq_index_t async_qos
) 
1956         struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
1957         thread_t cur_owner
, new_owner
, extra_thread_ref 
= THREAD_NULL
; 
1958         kq_index_t cur_override 
= THREAD_QOS_UNSPECIFIED
; 
1959         kq_index_t old_owner_override 
= THREAD_QOS_UNSPECIFIED
; 
1960         boolean_t ipc_override_is_sync 
= false; 
1961         boolean_t old_owner_override_is_sync 
= false; 
1962         int action 
= KQWL_UTQ_NONE
; 
1967          * The owner is only changed under both the filt_wllock and the 
1968          * kqwl_req_lock. Looking at it with either one held is fine. 
1970         cur_owner 
= kqwl
->kqwl_owner
; 
1971         if (filt_wlshould_end_ownership(kqwl
, kev
, error
)) { 
1972                 new_owner 
= THREAD_NULL
; 
1973         } else if (filt_wlshould_update_ownership(kev
, error
)) { 
1975                  * Decipher the owner port name, and translate accordingly. 
1976                  * The low 2 bits were borrowed for other flags, so mask them off. 
1978                 uint64_t udata 
= kev
->ext
[EV_EXTIDX_WL_VALUE
]; 
1979                 mach_port_name_t new_owner_name 
= (mach_port_name_t
)udata 
& ~0x3; 
1980                 if (new_owner_name 
!= MACH_PORT_NULL
) { 
1981                         new_owner_name 
= ipc_entry_name_mask(new_owner_name
); 
1984                 if (MACH_PORT_VALID(new_owner_name
)) { 
1985                         new_owner 
= port_name_to_thread(new_owner_name
); 
1986                         if (new_owner 
== THREAD_NULL
) 
1988                         extra_thread_ref 
= new_owner
; 
1989                 } else if (new_owner_name 
== MACH_PORT_DEAD
) { 
1990                         new_owner 
= WL_OWNER_SUSPENDED
; 
1993                          * We never want to learn a new owner that is NULL. 
1994                          * Ownership should be ended with END_OWNERSHIP. 
1996                         new_owner 
= cur_owner
; 
1999                 new_owner 
= cur_owner
; 
2002         if (filt_wlshould_set_async_qos(kev
, error
, async_qos
)) { 
2003                 action 
= KQWL_UTQ_SET_ASYNC_QOS
; 
2005         if (cur_owner 
== new_owner 
&& action 
== KQWL_UTQ_NONE
) { 
2009         kqwl_req_lock(kqwl
); 
2011         /* If already tracked as servicer, don't track as owner */ 
2012         if ((kqr
->kqr_state 
& KQR_BOUND
) && new_owner 
== kqr
->kqr_thread
) { 
2013                 kqwl
->kqwl_owner 
= new_owner 
= THREAD_NULL
; 
2016         if (cur_owner 
!= new_owner
) { 
2017                 kqwl
->kqwl_owner 
= new_owner
; 
2018                 if (new_owner 
== extra_thread_ref
) { 
2019                         /* we just transfered this ref to kqwl_owner */ 
2020                         extra_thread_ref 
= THREAD_NULL
; 
2022                 cur_override 
= kqworkloop_combined_qos(kqwl
, &ipc_override_is_sync
); 
2023                 old_owner_override 
= kqr
->kqr_dsync_owner_qos
; 
2024                 old_owner_override_is_sync 
= kqr
->kqr_owner_override_is_sync
; 
2026                 if (filt_wlowner_is_valid(new_owner
)) { 
2027                         /* override it before we drop the old */ 
2028                         if (cur_override 
!= THREAD_QOS_UNSPECIFIED
) { 
2029                                 thread_add_ipc_override(new_owner
, cur_override
); 
2031                         if (ipc_override_is_sync
) { 
2032                                 thread_add_sync_ipc_override(new_owner
); 
2034                         /* Update the kqr to indicate that owner has sync ipc override */ 
2035                         kqr
->kqr_dsync_owner_qos 
= cur_override
; 
2036                         kqr
->kqr_owner_override_is_sync 
= ipc_override_is_sync
; 
2037                         thread_starts_owning_workloop(new_owner
); 
2038                         if ((kqr
->kqr_state 
& (KQR_THREQUESTED 
| KQR_BOUND
)) == KQR_THREQUESTED
) { 
2039                                 if (action 
== KQWL_UTQ_NONE
) { 
2040                                         action 
= KQWL_UTQ_REDRIVE_EVENTS
; 
2043                 } else if (new_owner 
== THREAD_NULL
) { 
2044                         kqr
->kqr_dsync_owner_qos 
= THREAD_QOS_UNSPECIFIED
; 
2045                         kqr
->kqr_owner_override_is_sync 
= false; 
2046                         if ((kqr
->kqr_state 
& (KQR_THREQUESTED 
| KQR_WAKEUP
)) == KQR_WAKEUP
) { 
2047                                 if (action 
== KQWL_UTQ_NONE
) { 
2048                                         action 
= KQWL_UTQ_REDRIVE_EVENTS
; 
2054         if (action 
!= KQWL_UTQ_NONE
) { 
2055                 kqworkloop_update_threads_qos(kqwl
, action
, async_qos
); 
2058         kqwl_req_unlock(kqwl
); 
2060         /* Now that we are unlocked, drop the override and ref on old owner */ 
2061         if (new_owner 
!= cur_owner 
&& filt_wlowner_is_valid(cur_owner
)) { 
2062                 if (old_owner_override 
!= THREAD_QOS_UNSPECIFIED
) { 
2063                         thread_drop_ipc_override(cur_owner
); 
2065                 if (old_owner_override_is_sync
) { 
2066                         thread_drop_sync_ipc_override(cur_owner
); 
2068                 thread_ends_owning_workloop(cur_owner
); 
2069                 thread_deallocate(cur_owner
); 
2073         if (extra_thread_ref
) { 
2074                 thread_deallocate(extra_thread_ref
); 
2081         struct kqworkloop 
*kqwl
, 
2082         struct kevent_internal_s 
*kev
, 
2085         user_addr_t addr 
= CAST_USER_ADDR_T(kev
->ext
[EV_EXTIDX_WL_ADDR
]); 
2089         /* we must have the workloop state mutex held */ 
2092         /* Do we have a debounce address to work with? */ 
2094                 uint64_t kdata 
= kev
->ext
[EV_EXTIDX_WL_VALUE
]; 
2095                 uint64_t mask 
= kev
->ext
[EV_EXTIDX_WL_MASK
]; 
2097                 error 
= copyin_word(addr
, &udata
, sizeof(udata
)); 
2102                 /* update state as copied in */ 
2103                 kev
->ext
[EV_EXTIDX_WL_VALUE
] = udata
; 
2105                 /* If the masked bits don't match, reject it as stale */ 
2106                 if ((udata 
& mask
) != (kdata 
& mask
)) { 
2110 #if DEBUG || DEVELOPMENT 
2111                 if ((kev
->fflags 
& NOTE_WL_THREAD_REQUEST
) && !(kev
->flags 
& EV_DELETE
)) { 
2112                         if ((udata 
& DISPATCH_QUEUE_ENQUEUED
) == 0 && 
2113                                         (udata 
>> 48) != 0 && (udata 
>> 48) != 0xffff) { 
2114                                 panic("kevent: workloop %#016llx is not enqueued " 
2115                                                 "(kev:%p dq_state:%#016llx)", kev
->udata
, kev
, udata
); 
2121         return default_result
; 
2125  * Remembers the last updated that came in from userspace for debugging reasons. 
2126  * - fflags is mirrored from the userspace kevent 
2127  * - ext[i, i != VALUE] is mirrored from the userspace kevent 
2128  * - ext[VALUE] is set to what the kernel loaded atomically 
2129  * - data is set to the error if any 
2132 filt_wlremember_last_update( 
2133         __assert_only 
struct kqworkloop 
*kqwl
, 
2135         struct kevent_internal_s 
*kev
, 
2139         kn
->kn_fflags 
= kev
->fflags
; 
2140         kn
->kn_data 
= error
; 
2141         memcpy(kn
->kn_ext
, kev
->ext
, sizeof(kev
->ext
)); 
2145  * Return which operations on EVFILT_WORKLOOP need to be protected against 
2146  * knoteusewait() causing priority inversions. 
2149 filt_wlneeds_boost(struct kevent_internal_s 
*kev
) 
2153                  * this is an f_process() usecount, and it can cause a drop to wait 
2157         if (kev
->fflags 
& NOTE_WL_THREAD_REQUEST
) { 
2159                  * All operations on thread requests may starve drops or re-attach of 
2160                  * the same knote, all of them need boosts. None of what we do under 
2161                  * thread-request usecount holds blocks anyway. 
2165         if (kev
->fflags 
& NOTE_WL_SYNC_WAIT
) { 
2167                  * this may call filt_wlwait() and we don't want to hold any boost when 
2168                  * woken up, this would cause background threads contending on 
2169                  * dispatch_sync() to wake up at 64 and be preempted immediately when 
2176          * SYNC_WAIT knotes when deleted don't need to be rushed, there's no 
2177          * detach/reattach race with these ever. In addition to this, when the 
2178          * SYNC_WAIT knote is dropped, the caller is no longer receiving the 
2179          * workloop overrides if any, and we'd rather schedule other threads than 
2180          * him, he's not possibly stalling anything anymore. 
2182         return (kev
->flags 
& EV_DELETE
) == 0; 
2186 filt_wlattach(struct knote 
*kn
, struct kevent_internal_s 
*kev
) 
2188         struct kqueue 
*kq 
= knote_get_kq(kn
); 
2189         struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
2191         kq_index_t qos_index 
= 0; 
2193         if ((kq
->kq_state 
& KQ_WORKLOOP
) == 0) { 
2198 #if DEVELOPMENT || DEBUG 
2199         if (kev
->ident 
== 0 && kev
->udata 
== 0 && kev
->fflags 
== 0) { 
2200                 struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
2202                 kqwl_req_lock(kqwl
); 
2204                 if (kqr
->kqr_dsync_waiters
) { 
2205                         kev
->fflags 
|= NOTE_WL_SYNC_WAIT
; 
2207                 if (kqr
->kqr_qos_index
) { 
2208                         kev
->fflags 
|= NOTE_WL_THREAD_REQUEST
; 
2210                 if (kqwl
->kqwl_owner 
== WL_OWNER_SUSPENDED
) { 
2211                         kev
->ext
[0] = ~0ull; 
2213                         kev
->ext
[0] = thread_tid(kqwl
->kqwl_owner
); 
2215                 kev
->ext
[1] = thread_tid(kqwl
->kqwl_request
.kqr_thread
); 
2216                 kev
->ext
[2] = thread_owned_workloops_count(current_thread()); 
2217                 kev
->ext
[3] = kn
->kn_kevent
.ext
[3]; 
2218                 kqwl_req_unlock(kqwl
); 
2224         /* Some simple validation */ 
2225         int command 
= (kn
->kn_sfflags 
& NOTE_WL_COMMANDS_MASK
); 
2227         case NOTE_WL_THREAD_REQUEST
: 
2228                 if (kn
->kn_id 
!= kqwl
->kqwl_dynamicid
) { 
2232                 qos_index 
= qos_index_from_qos(kn
, kn
->kn_qos
, FALSE
); 
2233                 if (qos_index 
< THREAD_QOS_MAINTENANCE 
|| 
2234                                 qos_index 
> THREAD_QOS_USER_INTERACTIVE
) { 
2239         case NOTE_WL_SYNC_WAIT
: 
2240         case NOTE_WL_SYNC_WAKE
: 
2241                 if (kq
->kq_state 
& KQ_NO_WQ_THREAD
) { 
2245                 if (kn
->kn_id 
== kqwl
->kqwl_dynamicid
) { 
2249                 if ((kn
->kn_flags 
& EV_DISABLE
) == 0) { 
2253                 if (kn
->kn_sfflags 
& NOTE_WL_END_OWNERSHIP
) { 
2266         if (command 
== NOTE_WL_THREAD_REQUEST 
&& kqwl
->kqwl_request
.kqr_qos_index
) { 
2268                  * There already is a thread request, and well, you're only allowed 
2269                  * one per workloop, so fail the attach. 
2271                  * Note: kqr_qos_index is always set with the wllock held, so we 
2272                  * don't need to take the kqr lock. 
2276                 /* Make sure user and kernel are in agreement on important state */ 
2277                 error 
= filt_wldebounce(kqwl
, kev
, 0); 
2280         error 
= filt_wlupdateowner(kqwl
, kev
, error
, qos_index
); 
2281         filt_wlunlock(kqwl
); 
2284                 kn
->kn_flags 
|= EV_ERROR
; 
2285                 /* If userland wants ESTALE to be hidden, fail the attach anyway */ 
2286                 if (error 
== ESTALE 
&& (kn
->kn_sfflags 
& NOTE_WL_IGNORE_ESTALE
)) { 
2289                 kn
->kn_data 
= error
; 
2293         /* Just attaching the thread request successfully will fire it */ 
2294         return command 
== NOTE_WL_THREAD_REQUEST
; 
2297 __attribute__((noinline
,not_tail_called
)) 
2299 filt_wlwait(struct kqworkloop           
*kqwl
, 
2301             struct kevent_internal_s    
*kev
) 
2304         assert((kn
->kn_sfflags 
& NOTE_WL_SYNC_WAKE
) == 0); 
2307          * Hint to the wakeup side that this thread is waiting.  Also used by 
2308          * stackshot for waitinfo. 
2310         kn
->kn_hook 
= current_thread(); 
2312         thread_set_pending_block_hint(current_thread(), kThreadWaitWorkloopSyncWait
); 
2314         wait_result_t wr 
= assert_wait(kn
, THREAD_ABORTSAFE
); 
2316         if (wr 
== THREAD_WAITING
) { 
2317                 kq_index_t qos_index 
= qos_index_from_qos(kn
, kev
->qos
, TRUE
); 
2318                 struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
2320                 thread_t thread_to_handoff 
= THREAD_NULL
; /* holds +1 thread ref */ 
2322                 thread_t kqwl_owner 
= kqwl
->kqwl_owner
; 
2323                 if (filt_wlowner_is_valid(kqwl_owner
)) { 
2324                         thread_reference(kqwl_owner
); 
2325                         thread_to_handoff 
= kqwl_owner
; 
2328                 kqwl_req_lock(kqwl
); 
2331                         assert(kqr
->kqr_dsync_waiters 
< UINT16_MAX
); 
2332                         kqr
->kqr_dsync_waiters
++; 
2333                         if (qos_index 
> kqr
->kqr_dsync_waiters_qos
) { 
2334                                 kqworkloop_update_threads_qos(kqwl
, 
2335                                                 KQWL_UTQ_SET_SYNC_WAITERS_QOS
, qos_index
); 
2339                 if ((kqr
->kqr_state 
& KQR_BOUND
) && thread_to_handoff 
== THREAD_NULL
) { 
2340                         assert(kqr
->kqr_thread 
!= THREAD_NULL
); 
2341                         thread_t servicer 
= kqr
->kqr_thread
; 
2343                         thread_reference(servicer
); 
2344                         thread_to_handoff 
= servicer
; 
2347                 kqwl_req_unlock(kqwl
); 
2349                 filt_wlunlock(kqwl
); 
2351                 /* TODO: use continuation based blocking <rdar://problem/31299584> */ 
2353                 /* consume a refcount on thread_to_handoff, then thread_block() */ 
2354                 wr 
= thread_handoff(thread_to_handoff
); 
2355                 thread_to_handoff 
= THREAD_NULL
; 
2359                 /* clear waiting state (only one waiting thread - so no race) */ 
2360                 assert(kn
->kn_hook 
== current_thread()); 
2363                         kqwl_req_lock(kqwl
); 
2364                         assert(kqr
->kqr_dsync_waiters 
> 0); 
2365                         if (--kqr
->kqr_dsync_waiters 
== 0) { 
2366                                 assert(kqr
->kqr_dsync_waiters_qos
); 
2367                                 kqworkloop_update_threads_qos(kqwl
, 
2368                                                 KQWL_UTQ_SET_SYNC_WAITERS_QOS
, 0); 
2370                         kqwl_req_unlock(kqwl
); 
2377         case THREAD_AWAKENED
: 
2379         case THREAD_INTERRUPTED
: 
2381         case THREAD_RESTART
: 
2384                 panic("filt_wlattach: unexpected wait result %d", wr
); 
2389 /* called in stackshot context to report the thread responsible for blocking this thread */ 
2391 kdp_workloop_sync_wait_find_owner(__assert_only thread_t thread
, 
2393                                   thread_waitinfo_t 
*waitinfo
) 
2395         struct knote 
*kn 
= (struct knote
*) event
; 
2396         assert(kdp_is_in_zone(kn
, "knote zone")); 
2398         assert(kn
->kn_hook 
== thread
); 
2400         struct kqueue 
*kq 
= knote_get_kq(kn
); 
2401         assert(kdp_is_in_zone(kq
, "kqueue workloop zone")); 
2402         assert(kq
->kq_state 
& KQ_WORKLOOP
); 
2404         struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
2405         struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
2407         thread_t kqwl_owner 
= kqwl
->kqwl_owner
; 
2408         thread_t servicer 
= kqr
->kqr_thread
; 
2410         if (kqwl_owner 
== WL_OWNER_SUSPENDED
) { 
2411                 waitinfo
->owner 
= STACKSHOT_WAITOWNER_SUSPENDED
; 
2412         } else if (kqwl_owner 
!= THREAD_NULL
) { 
2413                 assert(kdp_is_in_zone(kqwl_owner
, "threads")); 
2415                 waitinfo
->owner 
= thread_tid(kqwl
->kqwl_owner
); 
2416         } else if (servicer 
!= THREAD_NULL
) { 
2417                 assert(kdp_is_in_zone(servicer
, "threads")); 
2419                 waitinfo
->owner 
= thread_tid(servicer
); 
2420         } else if (kqr
->kqr_state 
& KQR_THREQUESTED
) { 
2421                 waitinfo
->owner 
= STACKSHOT_WAITOWNER_THREQUESTED
; 
2423                 waitinfo
->owner 
= 0; 
2426         waitinfo
->context 
= kqwl
->kqwl_dynamicid
; 
2432  * Takes kqueue locked, returns locked, may drop in the middle and/or block for a while 
2435 filt_wlpost_attach(struct knote 
*kn
, struct  kevent_internal_s 
*kev
) 
2437         struct kqueue 
*kq 
= knote_get_kq(kn
); 
2438         struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
2441         if (kev
->fflags 
& NOTE_WL_SYNC_WAIT
) { 
2442                 if (kqlock2knoteuse(kq
, kn
, KNUSE_NONE
)) { 
2444                         /* if the wake has already preposted, don't wait */ 
2445                         if ((kn
->kn_sfflags 
& NOTE_WL_SYNC_WAKE
) == 0) 
2446                                 error 
= filt_wlwait(kqwl
, kn
, kev
); 
2447                         filt_wlunlock(kqwl
); 
2448                         knoteuse2kqlock(kq
, kn
, KNUSE_NONE
); 
2455 filt_wldetach(__assert_only 
struct knote 
*kn
) 
2457         assert(knote_get_kq(kn
)->kq_state 
& KQ_WORKLOOP
); 
2460          * Thread requests have nothing to detach. 
2461          * Sync waiters should have been aborted out 
2462          * and drop their refs before we could drop/ 
2463          * detach their knotes. 
2465         assert(kn
->kn_hook 
== NULL
); 
2470         __unused 
struct knote 
*kn
, 
2473         panic("filt_wlevent"); 
2478 filt_wlvalidate_kev_flags(struct knote 
*kn
, struct kevent_internal_s 
*kev
) 
2480         int new_commands 
= kev
->fflags 
& NOTE_WL_COMMANDS_MASK
; 
2481         int sav_commands 
= kn
->kn_sfflags 
& NOTE_WL_COMMANDS_MASK
; 
2484         switch (new_commands
) { 
2485         case NOTE_WL_THREAD_REQUEST
: 
2486                 /* thread requests can only update themselves */ 
2487                 if (sav_commands 
!= new_commands
) 
2491         case NOTE_WL_SYNC_WAIT
: 
2492                 if (kev
->fflags 
& NOTE_WL_END_OWNERSHIP
) 
2495         case NOTE_WL_SYNC_WAKE
: 
2496                 /* waits and wakes can update themselves or their counterparts */ 
2497                 if (!(sav_commands 
& (NOTE_WL_SYNC_WAIT 
| NOTE_WL_SYNC_WAKE
))) 
2499                 if (kev
->fflags 
& NOTE_WL_UPDATE_QOS
) 
2501                 if ((kev
->flags 
& (EV_ENABLE 
| EV_DELETE
)) == EV_ENABLE
) 
2503                 if (kev
->flags 
& EV_DELETE
) { 
2505                          * Really this is not supported: there is absolutely no reason 
2506                          * whatsoever to want to fail the drop of a NOTE_WL_SYNC_WAIT knote. 
2508                         if (kev
->ext
[EV_EXTIDX_WL_ADDR
] && kev
->ext
[EV_EXTIDX_WL_MASK
]) { 
2517         if ((kev
->flags 
& EV_DELETE
) && (kev
->fflags 
& NOTE_WL_DISCOVER_OWNER
)) { 
2526         struct kevent_internal_s 
*kev
) 
2528         struct kqueue 
*kq 
= knote_get_kq(kn
); 
2530         struct kqworkloop 
*kqwl
; 
2532         assert(kq
->kq_state 
& KQ_WORKLOOP
); 
2533         kqwl 
= (struct kqworkloop 
*)kq
; 
2535         error 
= filt_wlvalidate_kev_flags(kn
, kev
); 
2542         /* Make sure user and kernel are in agreement on important state */ 
2543         error 
= filt_wldebounce(kqwl
, kev
, 0); 
2545                 error 
= filt_wlupdateowner(kqwl
, kev
, error
, 0); 
2549         int new_command 
= kev
->fflags 
& NOTE_WL_COMMANDS_MASK
; 
2550         switch (new_command
) { 
2551         case NOTE_WL_THREAD_REQUEST
: 
2552                 assert(kqwl
->kqwl_request
.kqr_qos_index 
!= THREAD_QOS_UNSPECIFIED
); 
2555         case NOTE_WL_SYNC_WAIT
: 
2557                  * we need to allow waiting several times on the same knote because 
2558                  * of EINTR. If it's already woken though, it won't block. 
2562         case NOTE_WL_SYNC_WAKE
: 
2563                 if (kn
->kn_sfflags 
& NOTE_WL_SYNC_WAKE
) { 
2564                         /* disallow waking the same knote twice */ 
2569                         thread_wakeup_thread((event_t
)kn
, (thread_t
)kn
->kn_hook
); 
2579          * Save off any additional fflags/data we just accepted 
2580          * But only keep the last round of "update" bits we acted on which helps 
2583         kn
->kn_sfflags 
&= ~NOTE_WL_UPDATES_MASK
; 
2584         kn
->kn_sfflags 
|= kev
->fflags
; 
2585         kn
->kn_sdata 
= kev
->data
; 
2587         kq_index_t qos_index 
= THREAD_QOS_UNSPECIFIED
; 
2589         if (kev
->fflags 
& NOTE_WL_UPDATE_QOS
) { 
2590                 qos_t qos 
= pthread_priority_canonicalize(kev
->qos
, FALSE
); 
2592                 if (kn
->kn_qos 
!= qos
) { 
2593                         qos_index 
= qos_index_from_qos(kn
, qos
, FALSE
); 
2594                         if (qos_index 
== THREAD_QOS_UNSPECIFIED
) { 
2599                         if (kn
->kn_status 
& KN_QUEUED
) { 
2601                                 knote_set_qos_index(kn
, qos_index
); 
2605                                 knote_set_qos_index(kn
, qos_index
); 
2612         error 
= filt_wlupdateowner(kqwl
, kev
, 0, qos_index
); 
2617         if (new_command 
== NOTE_WL_SYNC_WAIT
) { 
2618                 /* if the wake has already preposted, don't wait */ 
2619                 if ((kn
->kn_sfflags 
& NOTE_WL_SYNC_WAKE
) == 0) 
2620                         error 
= filt_wlwait(kqwl
, kn
, kev
); 
2624         filt_wlremember_last_update(kqwl
, kn
, kev
, error
); 
2625         filt_wlunlock(kqwl
); 
2628                 if (error 
== ESTALE 
&& (kev
->fflags 
& NOTE_WL_IGNORE_ESTALE
)) { 
2629                         /* If userland wants ESTALE to be hidden, do not activate */ 
2632                 kev
->flags 
|= EV_ERROR
; 
2636         /* Just touching the thread request successfully will fire it */ 
2637         return new_command 
== NOTE_WL_THREAD_REQUEST
; 
2641 filt_wldrop_and_unlock( 
2643         struct kevent_internal_s 
*kev
) 
2645         struct kqueue 
*kq 
= knote_get_kq(kn
); 
2646         struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
2647         int error 
= 0, knoteuse_flags 
= KNUSE_NONE
; 
2651         assert(kev
->flags 
& EV_DELETE
); 
2652         assert(kq
->kq_state 
& KQ_WORKLOOP
); 
2654         error 
= filt_wlvalidate_kev_flags(kn
, kev
); 
2659         if (kn
->kn_sfflags 
& NOTE_WL_THREAD_REQUEST
) { 
2660                 knoteuse_flags 
|= KNUSE_BOOST
; 
2663         /* take a usecount to allow taking the filt_wllock */ 
2664         if (!kqlock2knoteuse(kq
, kn
, knoteuse_flags
)) { 
2665                 /* knote is being dropped already */ 
2666                 error 
= EINPROGRESS
; 
2673          * Make sure user and kernel are in agreement on important state 
2675          * Userland will modify bits to cause this to fail for the touch / drop 
2676          * race case (when a drop for a thread request quiescing comes in late after 
2677          * the workloop has been woken up again). 
2679         error 
= filt_wldebounce(kqwl
, kev
, 0); 
2681         if (!knoteuse2kqlock(kq
, kn
, knoteuse_flags
)) { 
2682                 /* knote is no longer alive */ 
2683                 error 
= EINPROGRESS
; 
2687         if (!error 
&& (kn
->kn_sfflags 
& NOTE_WL_THREAD_REQUEST
) && kn
->kn_inuse
) { 
2689                  * There is a concurrent drop or touch happening, we can't resolve this, 
2690                  * userland has to redrive. 
2692                  * The race we're worried about here is the following: 
2694                  *   f_touch               |  f_drop_and_unlock 
2695                  * ------------------------+-------------------------------------------- 
2697                  *                         | kqlock2knoteuse() 
2699                  *                         | debounces successfully 
2702                  *  filt_wllock() <BLOCKS> | 
2703                  *                         | knoteuse2kqlock() 
2705                  *                         | kqlock2knotedrop() <BLOCKS, WAKES f_touch> 
2706                  *  debounces successfully | 
2708                  *  caller WAKES f_drop    | 
2709                  *                         | performs drop, but f_touch should have won 
2711                  * So if the usecount is not 0 here, we need to wait for it to drop and 
2712                  * redrive the whole logic (including looking up the knote again). 
2714                 filt_wlunlock(kqwl
); 
2715                 knoteusewait(kq
, kn
); 
2720          * If error is 0 this will set kqr_qos_index to THREAD_QOS_UNSPECIFIED 
2722          * If error is 0 or ESTALE this may drop ownership and cause a thread 
2723          * request redrive, however the kqlock is held which prevents f_process() to 
2724          * run until we did the drop for real. 
2726         error 
= filt_wlupdateowner(kqwl
, kev
, error
, 0); 
2731         if ((kn
->kn_sfflags 
& (NOTE_WL_SYNC_WAIT 
| NOTE_WL_SYNC_WAKE
)) == 
2732                         NOTE_WL_SYNC_WAIT
) { 
2734                  * When deleting a SYNC_WAIT knote that hasn't been woken up 
2735                  * explicitly, issue a wake up. 
2737                 kn
->kn_sfflags 
|= NOTE_WL_SYNC_WAKE
; 
2739                         thread_wakeup_thread((event_t
)kn
, (thread_t
)kn
->kn_hook
); 
2744         filt_wlremember_last_update(kqwl
, kn
, kev
, error
); 
2745         filt_wlunlock(kqwl
); 
2749                 /* If nothing failed, do the regular knote drop. */ 
2750                 if (kqlock2knotedrop(kq
, kn
)) { 
2751                         knote_drop(kn
, current_proc()); 
2753                         error 
= EINPROGRESS
; 
2758         if (error 
== ESTALE 
&& (kev
->fflags 
& NOTE_WL_IGNORE_ESTALE
)) { 
2761         if (error 
== EINPROGRESS
) { 
2763                  * filt_wlprocess() makes sure that no event can be delivered for 
2764                  * NOTE_WL_THREAD_REQUEST knotes once a drop is happening, and 
2765                  * NOTE_WL_SYNC_* knotes are never fired. 
2767                  * It means that EINPROGRESS is about a state that userland cannot 
2768                  * observe for this filter (an event being delivered concurrently from 
2769                  * a drop), so silence the error. 
2779         __unused 
struct filt_process_s 
*data
, 
2780         struct kevent_internal_s 
*kev
) 
2782         struct kqueue 
*kq 
= knote_get_kq(kn
); 
2783         struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
2784         struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
2787         assert(kq
->kq_state 
& KQ_WORKLOOP
); 
2789         /* only thread requests should get here */ 
2790         assert(kn
->kn_sfflags 
& NOTE_WL_THREAD_REQUEST
); 
2791         if (kn
->kn_sfflags 
& NOTE_WL_THREAD_REQUEST
) { 
2793                 assert(kqr
->kqr_qos_index 
!= THREAD_QOS_UNSPECIFIED
); 
2794                 if (kqwl
->kqwl_owner
) { 
2796                          * <rdar://problem/33584321> userspace sometimes due to events being 
2797                          * delivered but not triggering a drain session can cause a process 
2798                          * of the thread request knote. 
2800                          * When that happens, the automatic deactivation due to process 
2801                          * would swallow the event, so we have to activate the knote again. 
2806                 } else if (kqr
->kqr_qos_index
) { 
2807 #if DEBUG || DEVELOPMENT 
2808                         user_addr_t addr 
= CAST_USER_ADDR_T(kn
->kn_ext
[EV_EXTIDX_WL_ADDR
]); 
2809                         task_t t 
= current_task(); 
2811                         if (addr 
&& task_is_active(t
) && !task_is_halting(t
) && 
2812                                         copyin_word(addr
, &val
, sizeof(val
)) == 0 && 
2813                                         val 
&& (val 
& DISPATCH_QUEUE_ENQUEUED
) == 0 && 
2814                                         (val 
>> 48) != 0 && (val 
>> 48) != 0xffff) { 
2815                                 panic("kevent: workloop %#016llx is not enqueued " 
2816                                                 "(kn:%p dq_state:%#016llx kev.dq_state:%#016llx)", 
2817                                                 kn
->kn_udata
, kn
, val
, 
2818                                                 kn
->kn_ext
[EV_EXTIDX_WL_VALUE
]); 
2821                         *kev 
= kn
->kn_kevent
; 
2822                         kev
->fflags 
= kn
->kn_sfflags
; 
2823                         kev
->data 
= kn
->kn_sdata
; 
2824                         kev
->qos 
= kn
->kn_qos
; 
2827                 filt_wlunlock(kqwl
); 
2832 #pragma mark kevent / knotes 
2835  * JMM - placeholder for not-yet-implemented filters 
2838 filt_badattach(__unused 
struct knote 
*kn
, __unused 
struct kevent_internal_s 
*kev
) 
2840         kn
->kn_flags 
|= EV_ERROR
; 
2841         kn
->kn_data 
= ENOTSUP
; 
2846 kqueue_alloc(struct proc 
*p
, unsigned int flags
) 
2848         struct filedesc 
*fdp 
= p
->p_fd
; 
2849         struct kqueue 
*kq 
= NULL
; 
2852         uint64_t kq_addr_offset
; 
2854         if (flags 
& KEVENT_FLAG_WORKQ
) { 
2855                 struct kqworkq 
*kqwq
; 
2858                 kqwq 
= (struct kqworkq 
*)zalloc(kqworkq_zone
); 
2862                 kq 
= &kqwq
->kqwq_kqueue
; 
2863                 bzero(kqwq
, sizeof (struct kqworkq
)); 
2865                 kqwq
->kqwq_state 
= KQ_WORKQ
; 
2867                 for (i 
= 0; i 
< KQWQ_NBUCKETS
; i
++) { 
2868                         TAILQ_INIT(&kq
->kq_queue
[i
]); 
2870                 for (i 
= 0; i 
< KQWQ_NQOS
; i
++) { 
2871                         kqwq
->kqwq_request
[i
].kqr_qos_index 
= i
; 
2874                 lck_spin_init(&kqwq
->kqwq_reqlock
, kq_lck_grp
, kq_lck_attr
); 
2875                 policy 
= SYNC_POLICY_FIFO
; 
2876                 hook 
= (void *)kqwq
; 
2878         } else if (flags 
& KEVENT_FLAG_WORKLOOP
) { 
2879                 struct kqworkloop 
*kqwl
; 
2882                 kqwl 
= (struct kqworkloop 
*)zalloc(kqworkloop_zone
); 
2886                 bzero(kqwl
, sizeof (struct kqworkloop
)); 
2888                 kqwl
->kqwl_state 
= KQ_WORKLOOP 
| KQ_DYNAMIC
; 
2889                 kqwl
->kqwl_retains 
= 1; /* donate a retain to creator */ 
2891                 kq 
= &kqwl
->kqwl_kqueue
; 
2892                 for (i 
= 0; i 
< KQWL_NBUCKETS
; i
++) { 
2893                         TAILQ_INIT(&kq
->kq_queue
[i
]); 
2895                 TAILQ_INIT(&kqwl
->kqwl_request
.kqr_suppressed
); 
2897                 lck_spin_init(&kqwl
->kqwl_reqlock
, kq_lck_grp
, kq_lck_attr
); 
2898                 lck_mtx_init(&kqwl
->kqwl_statelock
, kq_lck_grp
, kq_lck_attr
); 
2900                 policy 
= SYNC_POLICY_FIFO
; 
2901                 if (flags 
& KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD
) { 
2902                         policy 
|= SYNC_POLICY_PREPOST
; 
2903                         kq
->kq_state 
|= KQ_NO_WQ_THREAD
; 
2905                         hook 
= (void *)kqwl
; 
2911                 kqf 
= (struct kqfile 
*)zalloc(kqfile_zone
); 
2915                 kq 
= &kqf
->kqf_kqueue
; 
2916                 bzero(kqf
, sizeof (struct kqfile
)); 
2917                 TAILQ_INIT(&kq
->kq_queue
[0]); 
2918                 TAILQ_INIT(&kqf
->kqf_suppressed
); 
2920                 policy 
= SYNC_POLICY_FIFO 
| SYNC_POLICY_PREPOST
; 
2923         waitq_set_init(&kq
->kq_wqs
, policy
, NULL
, hook
); 
2924         lck_spin_init(&kq
->kq_lock
, kq_lck_grp
, kq_lck_attr
); 
2927         if (fdp
->fd_knlistsize 
< 0) { 
2929                 if (fdp
->fd_knlistsize 
< 0) 
2930                         fdp
->fd_knlistsize 
= 0; /* this process has had a kq */ 
2934         kq_addr_offset 
= ((uintptr_t)kq 
- (uintptr_t)VM_MIN_KERNEL_AND_KEXT_ADDRESS
); 
2935         /* Assert that the address can be pointer compacted for use with knote */ 
2936         assert(kq_addr_offset 
< (uint64_t)(1ull << KNOTE_KQ_BITSIZE
)); 
2941  * knotes_dealloc - detach all knotes for the process and drop them 
2943  *              Called with proc_fdlock held. 
2944  *              Returns with it locked. 
2945  *              May drop it temporarily. 
2946  *              Process is in such a state that it will not try to allocate 
2947  *              any more knotes during this process (stopped for exit or exec). 
2950 knotes_dealloc(proc_t p
) 
2952         struct filedesc 
*fdp 
= p
->p_fd
; 
2955         struct  klist 
*kn_hash 
= NULL
; 
2958         /* Close all the fd-indexed knotes up front */ 
2959         if (fdp
->fd_knlistsize 
> 0) { 
2960                 for (i 
= 0; i 
< fdp
->fd_knlistsize
; i
++) { 
2961                         while ((kn 
= SLIST_FIRST(&fdp
->fd_knlist
[i
])) != NULL
) { 
2962                                 kq 
= knote_get_kq(kn
); 
2965                                 /* drop it ourselves or wait */ 
2966                                 if (kqlock2knotedrop(kq
, kn
)) { 
2972                 /* free the table */ 
2973                 FREE(fdp
->fd_knlist
, M_KQUEUE
); 
2974                 fdp
->fd_knlist 
= NULL
; 
2976         fdp
->fd_knlistsize 
= -1; 
2981         /* Clean out all the hashed knotes as well */ 
2982         if (fdp
->fd_knhashmask 
!= 0) { 
2983                 for (i 
= 0; i 
<= (int)fdp
->fd_knhashmask
; i
++) { 
2984                         while ((kn 
= SLIST_FIRST(&fdp
->fd_knhash
[i
])) != NULL
) { 
2985                                 kq 
= knote_get_kq(kn
); 
2988                                 /* drop it ourselves or wait */ 
2989                                 if (kqlock2knotedrop(kq
, kn
)) { 
2995                 kn_hash 
= fdp
->fd_knhash
; 
2996                 fdp
->fd_knhashmask 
= 0; 
2997                 fdp
->fd_knhash 
= NULL
; 
3002         /* free the kn_hash table */ 
3004                 FREE(kn_hash
, M_KQUEUE
); 
3011  * kqueue_dealloc - detach all knotes from a kqueue and free it 
3013  *      We walk each list looking for knotes referencing this 
3014  *      this kqueue.  If we find one, we try to drop it.  But 
3015  *      if we fail to get a drop reference, that will wait 
3016  *      until it is dropped.  So, we can just restart again 
3017  *      safe in the assumption that the list will eventually 
3018  *      not contain any more references to this kqueue (either 
3019  *      we dropped them all, or someone else did). 
3021  *      Assumes no new events are being added to the kqueue. 
3022  *      Nothing locked on entry or exit. 
3024  * Workloop kqueues cant get here unless all the knotes 
3025  * are already gone and all requested threads have come 
3026  * and gone (cancelled or arrived). 
3029 kqueue_dealloc(struct kqueue 
*kq
) 
3032         struct filedesc 
*fdp
; 
3043         for (i 
= 0; i 
< fdp
->fd_knlistsize
; i
++) { 
3044                 kn 
= SLIST_FIRST(&fdp
->fd_knlist
[i
]); 
3045                 while (kn 
!= NULL
) { 
3046                         if (kq 
== knote_get_kq(kn
)) { 
3047                                 assert((kq
->kq_state 
& KQ_WORKLOOP
) == 0); 
3050                                 /* drop it ourselves or wait */ 
3051                                 if (kqlock2knotedrop(kq
, kn
)) { 
3055                                 /* start over at beginning of list */ 
3056                                 kn 
= SLIST_FIRST(&fdp
->fd_knlist
[i
]); 
3059                         kn 
= SLIST_NEXT(kn
, kn_link
); 
3065         if (fdp
->fd_knhashmask 
!= 0) { 
3066                 for (i 
= 0; i 
< (int)fdp
->fd_knhashmask 
+ 1; i
++) { 
3067                         kn 
= SLIST_FIRST(&fdp
->fd_knhash
[i
]); 
3068                         while (kn 
!= NULL
) { 
3069                                 if (kq 
== knote_get_kq(kn
)) { 
3070                                         assert((kq
->kq_state 
& KQ_WORKLOOP
) == 0); 
3073                                         /* drop it ourselves or wait */ 
3074                                         if (kqlock2knotedrop(kq
, kn
)) { 
3078                                         /* start over at beginning of list */ 
3079                                         kn 
= SLIST_FIRST(&fdp
->fd_knhash
[i
]); 
3082                                 kn 
= SLIST_NEXT(kn
, kn_link
); 
3088         if (kq
->kq_state 
& KQ_WORKLOOP
) { 
3089                 struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
3090                 struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
3091                 thread_t cur_owner 
= kqwl
->kqwl_owner
; 
3093                 assert(TAILQ_EMPTY(&kqwl
->kqwl_request
.kqr_suppressed
)); 
3094                 if (filt_wlowner_is_valid(cur_owner
)) { 
3096                          * If the kqueue had an owner that prevented the thread request to 
3097                          * go through, then no unbind happened, and we may have lingering 
3098                          * overrides to drop. 
3100                         if (kqr
->kqr_dsync_owner_qos 
!= THREAD_QOS_UNSPECIFIED
) { 
3101                                 thread_drop_ipc_override(cur_owner
); 
3102                                 kqr
->kqr_dsync_owner_qos 
= THREAD_QOS_UNSPECIFIED
; 
3105                         if (kqr
->kqr_owner_override_is_sync
) { 
3106                                 thread_drop_sync_ipc_override(cur_owner
); 
3107                                 kqr
->kqr_owner_override_is_sync 
= 0; 
3109                         thread_ends_owning_workloop(cur_owner
); 
3110                         thread_deallocate(cur_owner
); 
3111                         kqwl
->kqwl_owner 
= THREAD_NULL
; 
3116          * waitq_set_deinit() remove the KQ's waitq set from 
3117          * any select sets to which it may belong. 
3119         waitq_set_deinit(&kq
->kq_wqs
); 
3120         lck_spin_destroy(&kq
->kq_lock
, kq_lck_grp
); 
3122         if (kq
->kq_state 
& KQ_WORKQ
) { 
3123                 struct kqworkq 
*kqwq 
= (struct kqworkq 
*)kq
; 
3125                 lck_spin_destroy(&kqwq
->kqwq_reqlock
, kq_lck_grp
); 
3126                 zfree(kqworkq_zone
, kqwq
); 
3127         } else if (kq
->kq_state 
& KQ_WORKLOOP
) { 
3128                 struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
3130                 assert(kqwl
->kqwl_retains 
== 0); 
3131                 lck_spin_destroy(&kqwl
->kqwl_reqlock
, kq_lck_grp
); 
3132                 lck_mtx_destroy(&kqwl
->kqwl_statelock
, kq_lck_grp
); 
3133                 zfree(kqworkloop_zone
, kqwl
); 
3135                 struct kqfile 
*kqf 
= (struct kqfile 
*)kq
; 
3137                 zfree(kqfile_zone
, kqf
); 
3142 kqueue_retain(struct kqueue 
*kq
) 
3144         struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
3147         if ((kq
->kq_state 
& KQ_DYNAMIC
) == 0) 
3150         previous 
= OSIncrementAtomic(&kqwl
->kqwl_retains
); 
3151         if (previous 
== KQ_WORKLOOP_RETAINS_MAX
) 
3152                 panic("kq(%p) retain overflow", kq
); 
3155                 panic("kq(%p) resurrection", kq
); 
3158 #define KQUEUE_CANT_BE_LAST_REF  0 
3159 #define KQUEUE_MIGHT_BE_LAST_REF 1 
3162 kqueue_release(struct kqueue 
*kq
, __assert_only 
int possibly_last
) 
3164         struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
3166         if ((kq
->kq_state 
& KQ_DYNAMIC
) == 0) { 
3170         assert(kq
->kq_state 
& KQ_WORKLOOP
); /* for now */ 
3171         uint32_t refs 
= OSDecrementAtomic(&kqwl
->kqwl_retains
); 
3172         if (__improbable(refs 
== 0)) { 
3173                 panic("kq(%p) over-release", kq
); 
3176                 assert(possibly_last
); 
3182 kqueue_body(struct proc 
*p
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
) 
3185         struct fileproc 
*fp
; 
3188         error 
= falloc_withalloc(p
, 
3189             &fp
, &fd
, vfs_context_current(), fp_zalloc
, cra
); 
3194         kq 
= kqueue_alloc(p
, 0); 
3200         fp
->f_flag 
= FREAD 
| FWRITE
; 
3201         fp
->f_ops 
= &kqueueops
; 
3205         *fdflags(p
, fd
) |= UF_EXCLOSE
; 
3206         procfdtbl_releasefd(p
, fd
, NULL
); 
3207         fp_drop(p
, fd
, fp
, 1); 
3215 kqueue(struct proc 
*p
, __unused 
struct kqueue_args 
*uap
, int32_t *retval
) 
3217         return (kqueue_body(p
, fileproc_alloc_init
, NULL
, retval
)); 
3221 kevent_copyin(user_addr_t 
*addrp
, struct kevent_internal_s 
*kevp
, struct proc 
*p
, 
3227         if (flags 
& KEVENT_FLAG_LEGACY32
) { 
3228                 bzero(kevp
, sizeof (*kevp
)); 
3230                 if (IS_64BIT_PROCESS(p
)) { 
3231                         struct user64_kevent kev64
; 
3233                         advance 
= sizeof (kev64
); 
3234                         error 
= copyin(*addrp
, (caddr_t
)&kev64
, advance
); 
3237                         kevp
->ident 
= kev64
.ident
; 
3238                         kevp
->filter 
= kev64
.filter
; 
3239                         kevp
->flags 
= kev64
.flags
; 
3240                         kevp
->udata 
= kev64
.udata
; 
3241                         kevp
->fflags 
= kev64
.fflags
; 
3242                         kevp
->data 
= kev64
.data
; 
3244                         struct user32_kevent kev32
; 
3246                         advance 
= sizeof (kev32
); 
3247                         error 
= copyin(*addrp
, (caddr_t
)&kev32
, advance
); 
3250                         kevp
->ident 
= (uintptr_t)kev32
.ident
; 
3251                         kevp
->filter 
= kev32
.filter
; 
3252                         kevp
->flags 
= kev32
.flags
; 
3253                         kevp
->udata 
= CAST_USER_ADDR_T(kev32
.udata
); 
3254                         kevp
->fflags 
= kev32
.fflags
; 
3255                         kevp
->data 
= (intptr_t)kev32
.data
; 
3257         } else if (flags 
& KEVENT_FLAG_LEGACY64
) { 
3258                 struct kevent64_s kev64
; 
3260                 bzero(kevp
, sizeof (*kevp
)); 
3262                 advance 
= sizeof (struct kevent64_s
); 
3263                 error 
= copyin(*addrp
, (caddr_t
)&kev64
, advance
); 
3266                 kevp
->ident 
= kev64
.ident
; 
3267                 kevp
->filter 
= kev64
.filter
; 
3268                 kevp
->flags 
= kev64
.flags
; 
3269                 kevp
->udata 
= kev64
.udata
; 
3270                 kevp
->fflags 
= kev64
.fflags
; 
3271                 kevp
->data 
= kev64
.data
; 
3272                 kevp
->ext
[0] = kev64
.ext
[0]; 
3273                 kevp
->ext
[1] = kev64
.ext
[1]; 
3276                 struct kevent_qos_s kevqos
; 
3278                 bzero(kevp
, sizeof (*kevp
)); 
3280                 advance 
= sizeof (struct kevent_qos_s
); 
3281                 error 
= copyin(*addrp
, (caddr_t
)&kevqos
, advance
); 
3284                 kevp
->ident 
= kevqos
.ident
; 
3285                 kevp
->filter 
= kevqos
.filter
; 
3286                 kevp
->flags 
= kevqos
.flags
; 
3287                 kevp
->qos 
= kevqos
.qos
; 
3288 //              kevp->xflags = kevqos.xflags; 
3289                 kevp
->udata 
= kevqos
.udata
; 
3290                 kevp
->fflags 
= kevqos
.fflags
; 
3291                 kevp
->data 
= kevqos
.data
; 
3292                 kevp
->ext
[0] = kevqos
.ext
[0]; 
3293                 kevp
->ext
[1] = kevqos
.ext
[1]; 
3294                 kevp
->ext
[2] = kevqos
.ext
[2]; 
3295                 kevp
->ext
[3] = kevqos
.ext
[3]; 
3303 kevent_copyout(struct kevent_internal_s 
*kevp
, user_addr_t 
*addrp
, struct proc 
*p
, 
3306         user_addr_t addr 
= *addrp
; 
3311          * fully initialize the differnt output event structure 
3312          * types from the internal kevent (and some universal 
3313          * defaults for fields not represented in the internal 
3316         if (flags 
& KEVENT_FLAG_LEGACY32
) { 
3317                 assert((flags 
& KEVENT_FLAG_STACK_EVENTS
) == 0); 
3319                 if (IS_64BIT_PROCESS(p
)) { 
3320                         struct user64_kevent kev64
; 
3322                         advance 
= sizeof (kev64
); 
3323                         bzero(&kev64
, advance
); 
3326                          * deal with the special case of a user-supplied 
3327                          * value of (uintptr_t)-1. 
3329                         kev64
.ident 
= (kevp
->ident 
== (uintptr_t)-1) ? 
3330                                 (uint64_t)-1LL : (uint64_t)kevp
->ident
; 
3332                         kev64
.filter 
= kevp
->filter
; 
3333                         kev64
.flags 
= kevp
->flags
; 
3334                         kev64
.fflags 
= kevp
->fflags
; 
3335                         kev64
.data 
= (int64_t) kevp
->data
; 
3336                         kev64
.udata 
= kevp
->udata
; 
3337                         error 
= copyout((caddr_t
)&kev64
, addr
, advance
); 
3339                         struct user32_kevent kev32
; 
3341                         advance 
= sizeof (kev32
); 
3342                         bzero(&kev32
, advance
); 
3343                         kev32
.ident 
= (uint32_t)kevp
->ident
; 
3344                         kev32
.filter 
= kevp
->filter
; 
3345                         kev32
.flags 
= kevp
->flags
; 
3346                         kev32
.fflags 
= kevp
->fflags
; 
3347                         kev32
.data 
= (int32_t)kevp
->data
; 
3348                         kev32
.udata 
= kevp
->udata
; 
3349                         error 
= copyout((caddr_t
)&kev32
, addr
, advance
); 
3351         } else if (flags 
& KEVENT_FLAG_LEGACY64
) { 
3352                 struct kevent64_s kev64
; 
3354                 advance 
= sizeof (struct kevent64_s
); 
3355                 if (flags 
& KEVENT_FLAG_STACK_EVENTS
) { 
3358                 bzero(&kev64
, advance
); 
3359                 kev64
.ident 
= kevp
->ident
; 
3360                 kev64
.filter 
= kevp
->filter
; 
3361                 kev64
.flags 
= kevp
->flags
; 
3362                 kev64
.fflags 
= kevp
->fflags
; 
3363                 kev64
.data 
= (int64_t) kevp
->data
; 
3364                 kev64
.udata 
= kevp
->udata
; 
3365                 kev64
.ext
[0] = kevp
->ext
[0]; 
3366                 kev64
.ext
[1] = kevp
->ext
[1]; 
3367                 error 
= copyout((caddr_t
)&kev64
, addr
, advance
); 
3369                 struct kevent_qos_s kevqos
; 
3371                 advance 
= sizeof (struct kevent_qos_s
); 
3372                 if (flags 
& KEVENT_FLAG_STACK_EVENTS
) { 
3375                 bzero(&kevqos
, advance
); 
3376                 kevqos
.ident 
= kevp
->ident
; 
3377                 kevqos
.filter 
= kevp
->filter
; 
3378                 kevqos
.flags 
= kevp
->flags
; 
3379                 kevqos
.qos 
= kevp
->qos
; 
3380                 kevqos
.udata 
= kevp
->udata
; 
3381                 kevqos
.fflags 
= kevp
->fflags
; 
3383                 kevqos
.data 
= (int64_t) kevp
->data
; 
3384                 kevqos
.ext
[0] = kevp
->ext
[0]; 
3385                 kevqos
.ext
[1] = kevp
->ext
[1]; 
3386                 kevqos
.ext
[2] = kevp
->ext
[2]; 
3387                 kevqos
.ext
[3] = kevp
->ext
[3]; 
3388                 error 
= copyout((caddr_t
)&kevqos
, addr
, advance
); 
3391                 if (flags 
& KEVENT_FLAG_STACK_EVENTS
) 
3394                         *addrp 
= addr 
+ advance
; 
3400 kevent_get_data_size(struct proc 
*p
,  
3401                      uint64_t data_available
, 
3403                      user_size_t 
*residp
) 
3408         if (data_available 
!= USER_ADDR_NULL
) { 
3409                 if (flags 
& KEVENT_FLAG_KERNEL
) { 
3410                         resid 
= *(user_size_t 
*)(uintptr_t)data_available
; 
3411                 } else if (IS_64BIT_PROCESS(p
)) { 
3412                         user64_size_t usize
; 
3413                         error 
= copyin((user_addr_t
)data_available
, &usize
, sizeof(usize
)); 
3414                         resid 
= (user_size_t
)usize
; 
3416                         user32_size_t usize
; 
3417                         error 
= copyin((user_addr_t
)data_available
, &usize
, sizeof(usize
)); 
3418                         resid 
= (user_size_t
)usize
; 
3430 kevent_put_data_size(struct proc 
*p
,  
3431                      uint64_t data_available
, 
3437         if (data_available
) { 
3438                 if (flags 
& KEVENT_FLAG_KERNEL
) { 
3439                         *(user_size_t 
*)(uintptr_t)data_available 
= resid
; 
3440                 } else if (IS_64BIT_PROCESS(p
)) { 
3441                         user64_size_t usize 
= (user64_size_t
)resid
; 
3442                         error 
= copyout(&usize
, (user_addr_t
)data_available
, sizeof(usize
)); 
3444                         user32_size_t usize 
= (user32_size_t
)resid
; 
3445                         error 
= copyout(&usize
, (user_addr_t
)data_available
, sizeof(usize
)); 
3452  * kevent_continue - continue a kevent syscall after blocking 
3454  *      assume we inherit a use count on the kq fileglob. 
3457 __attribute__((noreturn
)) 
3459 kevent_continue(__unused 
struct kqueue 
*kq
, void *data
, int error
) 
3461         struct _kevent 
*cont_args
; 
3462         struct fileproc 
*fp
; 
3463         uint64_t data_available
; 
3464         user_size_t data_size
; 
3465         user_size_t data_resid
; 
3470         struct proc 
*p 
= current_proc(); 
3472         cont_args 
= (struct _kevent 
*)data
; 
3473         data_available 
= cont_args
->data_available
; 
3474         flags 
= cont_args
->process_data
.fp_flags
; 
3475         data_size 
= cont_args
->process_data
.fp_data_size
; 
3476         data_resid 
= cont_args
->process_data
.fp_data_resid
; 
3477         noutputs 
= cont_args
->eventout
; 
3478         retval 
= cont_args
->retval
; 
3482         kevent_put_kq(p
, fd
, fp
, kq
); 
3484         /* don't abandon other output just because of residual copyout failures */ 
3485         if (error 
== 0 && data_available 
&& data_resid 
!= data_size
) { 
3486                 (void)kevent_put_data_size(p
, data_available
, flags
, data_resid
); 
3489         /* don't restart after signals... */ 
3490         if (error 
== ERESTART
) 
3492         else if (error 
== EWOULDBLOCK
) 
3496         unix_syscall_return(error
); 
3500  * kevent - [syscall] register and wait for kernel events 
3504 kevent(struct proc 
*p
, struct kevent_args 
*uap
, int32_t *retval
) 
3506         unsigned int flags 
= KEVENT_FLAG_LEGACY32
; 
3508         return kevent_internal(p
, 
3509                                (kqueue_id_t
)uap
->fd
, NULL
, 
3510                                uap
->changelist
, uap
->nchanges
, 
3511                                uap
->eventlist
, uap
->nevents
, 
3520 kevent64(struct proc 
*p
, struct kevent64_args 
*uap
, int32_t *retval
) 
3524         /* restrict to user flags and set legacy64 */ 
3525         flags 
= uap
->flags 
& KEVENT_FLAG_USER
; 
3526         flags 
|= KEVENT_FLAG_LEGACY64
; 
3528         return kevent_internal(p
, 
3529                                (kqueue_id_t
)uap
->fd
, NULL
, 
3530                                uap
->changelist
, uap
->nchanges
, 
3531                                uap
->eventlist
, uap
->nevents
, 
3540 kevent_qos(struct proc 
*p
, struct kevent_qos_args 
*uap
, int32_t *retval
) 
3542         /* restrict to user flags */ 
3543         uap
->flags 
&= KEVENT_FLAG_USER
; 
3545         return kevent_internal(p
, 
3546                                (kqueue_id_t
)uap
->fd
, NULL
, 
3547                                uap
->changelist
, uap
->nchanges
, 
3548                                uap
->eventlist
,  uap
->nevents
, 
3549                                uap
->data_out
, (uint64_t)uap
->data_available
, 
3557 kevent_qos_internal(struct proc 
*p
, int fd
,  
3558                     user_addr_t changelist
, int nchanges
, 
3559                     user_addr_t eventlist
, int nevents
, 
3560                     user_addr_t data_out
, user_size_t 
*data_available
, 
3564         return kevent_internal(p
, 
3565                                (kqueue_id_t
)fd
, NULL
, 
3566                                changelist
, nchanges
, 
3568                                data_out
, (uint64_t)data_available
, 
3569                                (flags 
| KEVENT_FLAG_KERNEL
), 
3576 kevent_id(struct proc 
*p
, struct kevent_id_args 
*uap
, int32_t *retval
) 
3578         /* restrict to user flags */ 
3579         uap
->flags 
&= KEVENT_FLAG_USER
; 
3581         return kevent_internal(p
, 
3582                                (kqueue_id_t
)uap
->id
, NULL
, 
3583                                uap
->changelist
, uap
->nchanges
, 
3584                                uap
->eventlist
,  uap
->nevents
, 
3585                                uap
->data_out
, (uint64_t)uap
->data_available
, 
3586                                (uap
->flags 
| KEVENT_FLAG_DYNAMIC_KQUEUE
), 
3593 kevent_id_internal(struct proc 
*p
, kqueue_id_t 
*id
, 
3594                     user_addr_t changelist
, int nchanges
, 
3595                     user_addr_t eventlist
, int nevents
, 
3596                     user_addr_t data_out
, user_size_t 
*data_available
, 
3600         return kevent_internal(p
, 
3602                                changelist
, nchanges
, 
3604                                data_out
, (uint64_t)data_available
, 
3605                                (flags 
| KEVENT_FLAG_KERNEL 
| KEVENT_FLAG_DYNAMIC_KQUEUE
), 
3612 kevent_get_timeout(struct proc 
*p
, 
3613                    user_addr_t utimeout
, 
3615                    struct timeval 
*atvp
) 
3620         if (flags 
& KEVENT_FLAG_IMMEDIATE
) { 
3621                 getmicrouptime(&atv
); 
3622         } else if (utimeout 
!= USER_ADDR_NULL
) { 
3624                 if (flags 
& KEVENT_FLAG_KERNEL
) { 
3625                         struct timespec 
*tsp 
= (struct timespec 
*)utimeout
; 
3626                         TIMESPEC_TO_TIMEVAL(&rtv
, tsp
); 
3627                 } else if (IS_64BIT_PROCESS(p
)) { 
3628                         struct user64_timespec ts
; 
3629                         error 
= copyin(utimeout
, &ts
, sizeof(ts
)); 
3630                         if ((ts
.tv_sec 
& 0xFFFFFFFF00000000ull
) != 0) 
3633                                 TIMESPEC_TO_TIMEVAL(&rtv
, &ts
); 
3635                         struct user32_timespec ts
; 
3636                         error 
= copyin(utimeout
, &ts
, sizeof(ts
)); 
3637                         TIMESPEC_TO_TIMEVAL(&rtv
, &ts
); 
3641                 if (itimerfix(&rtv
)) 
3643                 getmicrouptime(&atv
); 
3644                 timevaladd(&atv
, &rtv
); 
3646                 /* wait forever value */ 
3655 kevent_set_kq_mode(struct kqueue 
*kq
, unsigned int flags
) 
3657         /* each kq should only be used for events of one type */ 
3659         if (kq
->kq_state 
& (KQ_KEV32 
| KQ_KEV64 
| KQ_KEV_QOS
)) { 
3660                 if (flags 
& KEVENT_FLAG_LEGACY32
) { 
3661                         if ((kq
->kq_state 
& KQ_KEV32
) == 0) { 
3665                 } else if (kq
->kq_state 
& KQ_KEV32
) { 
3669         } else if (flags 
& KEVENT_FLAG_LEGACY32
) { 
3670                 kq
->kq_state 
|= KQ_KEV32
; 
3671         } else if (flags 
& KEVENT_FLAG_LEGACY64
) { 
3672                 kq
->kq_state 
|= KQ_KEV64
; 
3674                 kq
->kq_state 
|= KQ_KEV_QOS
; 
3680 #define KQ_HASH(val, mask)  (((val) ^ (val >> 8)) & (mask)) 
3681 #define CONFIG_KQ_HASHSIZE  CONFIG_KN_HASHSIZE 
3684 kqhash_lock(proc_t p
) 
3686         lck_mtx_lock_spin_always(&p
->p_fd
->fd_kqhashlock
); 
3690 kqhash_lock_held(__assert_only proc_t p
) 
3692         LCK_MTX_ASSERT(&p
->p_fd
->fd_kqhashlock
, LCK_MTX_ASSERT_OWNED
); 
3696 kqhash_unlock(proc_t p
) 
3698         lck_mtx_unlock(&p
->p_fd
->fd_kqhashlock
); 
3702 kqueue_hash_init_if_needed(proc_t p
) 
3704         struct filedesc 
*fdp 
= p
->p_fd
; 
3706         kqhash_lock_held(p
); 
3708         if (__improbable(fdp
->fd_kqhash 
== NULL
)) { 
3709                 struct kqlist 
*alloc_hash
; 
3713                 alloc_hash 
= hashinit(CONFIG_KQ_HASHSIZE
, M_KQUEUE
, &alloc_mask
); 
3716                 /* See if we won the race */ 
3717                 if (fdp
->fd_kqhashmask 
== 0) { 
3718                         fdp
->fd_kqhash 
= alloc_hash
; 
3719                         fdp
->fd_kqhashmask 
= alloc_mask
; 
3722                         FREE(alloc_hash
, M_KQUEUE
); 
3729  * Called with the kqhash_lock() held 
3737         struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
3738         struct filedesc 
*fdp 
= p
->p_fd
; 
3739         struct kqlist 
*list
; 
3741         /* should hold the kq hash lock */ 
3742         kqhash_lock_held(p
); 
3744         if ((kq
->kq_state 
& KQ_DYNAMIC
) == 0) { 
3745                 assert(kq
->kq_state 
& KQ_DYNAMIC
); 
3749         /* only dynamically allocate workloop kqs for now */ 
3750         assert(kq
->kq_state 
& KQ_WORKLOOP
); 
3751         assert(fdp
->fd_kqhash
); 
3753         kqwl
->kqwl_dynamicid 
= id
; 
3755         list 
= &fdp
->fd_kqhash
[KQ_HASH(id
, fdp
->fd_kqhashmask
)]; 
3756         SLIST_INSERT_HEAD(list
, kqwl
, kqwl_hashlink
); 
3759 /* Called with kqhash_lock held */ 
3765         struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
3766         struct filedesc 
*fdp 
= p
->p_fd
; 
3767         struct kqlist 
*list
; 
3769         /* should hold the kq hash lock */ 
3770         kqhash_lock_held(p
); 
3772         if ((kq
->kq_state 
& KQ_DYNAMIC
) == 0) { 
3773                 assert(kq
->kq_state 
& KQ_DYNAMIC
); 
3776         assert(kq
->kq_state 
& KQ_WORKLOOP
); /* for now */ 
3777         list 
= &fdp
->fd_kqhash
[KQ_HASH(kqwl
->kqwl_dynamicid
, fdp
->fd_kqhashmask
)]; 
3778         SLIST_REMOVE(list
, kqwl
, kqworkloop
, kqwl_hashlink
); 
3781 /* Called with kqhash_lock held */ 
3782 static struct kqueue 
* 
3783 kqueue_hash_lookup(struct proc 
*p
, kqueue_id_t id
) 
3785         struct filedesc 
*fdp 
= p
->p_fd
; 
3786         struct kqlist 
*list
; 
3787         struct kqworkloop 
*kqwl
; 
3789         /* should hold the kq hash lock */ 
3790         kqhash_lock_held(p
); 
3792         if (fdp
->fd_kqhashmask 
== 0) return NULL
; 
3794         list 
= &fdp
->fd_kqhash
[KQ_HASH(id
, fdp
->fd_kqhashmask
)]; 
3795         SLIST_FOREACH(kqwl
, list
, kqwl_hashlink
) { 
3796                 if (kqwl
->kqwl_dynamicid 
== id
) { 
3797                         struct kqueue 
*kq 
= (struct kqueue 
*)kqwl
; 
3799                         assert(kq
->kq_state 
& KQ_DYNAMIC
); 
3800                         assert(kq
->kq_state 
& KQ_WORKLOOP
); /* for now */ 
3808 kqueue_release_last(struct proc 
*p
, struct kqueue 
*kq
) 
3810         if (kq
->kq_state 
& KQ_DYNAMIC
) { 
3812                 if (kqueue_release(kq
, KQUEUE_MIGHT_BE_LAST_REF
)) { 
3813                         kqueue_hash_remove(p
, kq
); 
3822 static struct kqueue 
* 
3823 kevent_get_bound_kq(__assert_only 
struct proc 
*p
, thread_t thread
, 
3824                     unsigned int kev_flags
, unsigned int kq_flags
) 
3827         struct uthread 
*ut 
= get_bsdthread_info(thread
); 
3829         assert(p 
== get_bsdthreadtask_info(thread
)); 
3831         if (!(ut
->uu_kqueue_flags 
& kev_flags
)) 
3834         kq 
= ut
->uu_kqueue_bound
; 
3838         if (!(kq
->kq_state 
& kq_flags
)) 
3845 kevent_get_kq(struct proc 
*p
, kqueue_id_t id
, unsigned int flags
, struct fileproc 
**fpp
, int *fdp
, struct kqueue 
**kqp
) 
3847         struct filedesc 
*descp 
= p
->p_fd
; 
3848         struct fileproc 
*fp 
= NULL
; 
3853         /* Was the workloop flag passed?  Then it is for sure only a workloop */ 
3854         if (flags 
& KEVENT_FLAG_DYNAMIC_KQUEUE
) { 
3855                 assert(flags 
& KEVENT_FLAG_WORKLOOP
); 
3856                 if (id 
== (kqueue_id_t
)-1 && 
3857                     (flags 
& KEVENT_FLAG_KERNEL
) && 
3858                     (flags 
& KEVENT_FLAG_WORKLOOP
)) { 
3860                         assert(is_workqueue_thread(current_thread())); 
3863                          * when kevent_id_internal is called from within the 
3864                          * kernel, and the passed 'id' value is '-1' then we 
3865                          * look for the currently bound workloop kq. 
3867                          * Until pthread kext avoids calling in to kevent_id_internal 
3868                          * for threads whose fulfill is canceled, calling in unbound 
3871                         kq 
= kevent_get_bound_kq(p
, current_thread(), 
3872                                                  KEVENT_FLAG_WORKLOOP
, KQ_WORKLOOP
); 
3876                                 struct uthread 
*ut 
= get_bsdthread_info(current_thread()); 
3878                                 /* If thread is unbound due to cancel, just return an error */ 
3879                                 if (ut
->uu_kqueue_flags 
== KEVENT_FLAG_WORKLOOP_CANCELED
) { 
3880                                         ut
->uu_kqueue_flags 
= 0; 
3883                                         panic("Unbound thread called kevent_internal with id=-1" 
3884                                               " uu_kqueue_flags:0x%x, uu_kqueue_bound:%p", 
3885                                               ut
->uu_kqueue_flags
, ut
->uu_kqueue_bound
); 
3895                 /* try shortcut on kq lookup for bound threads */ 
3896                 kq 
= kevent_get_bound_kq(p
, current_thread(), KEVENT_FLAG_WORKLOOP
, KQ_WORKLOOP
); 
3897                 if (kq 
!= NULL 
&& ((struct kqworkloop 
*)kq
)->kqwl_dynamicid 
== id
) { 
3899                         if (flags 
& KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST
) { 
3905                         /* retain a reference while working with this kq. */ 
3906                         assert(kq
->kq_state 
& KQ_DYNAMIC
); 
3912                 /* look for the kq on the hash table */ 
3914                 kq 
= kqueue_hash_lookup(p
, id
); 
3918                         if (flags 
& KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST
) { 
3923                         struct kqueue 
*alloc_kq
; 
3924                         alloc_kq 
= kqueue_alloc(p
, flags
); 
3927                                 kqueue_hash_init_if_needed(p
); 
3928                                 kq 
= kqueue_hash_lookup(p
, id
); 
3930                                         /* insert our new one */ 
3932                                         kqueue_hash_insert(p
, id
, kq
); 
3935                                         /* lost race, retain existing workloop */ 
3938                                         kqueue_release(alloc_kq
, KQUEUE_MIGHT_BE_LAST_REF
); 
3939                                         kqueue_dealloc(alloc_kq
); 
3947                         if (flags 
& KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST
) { 
3954                         /* retain a reference while working with this kq. */ 
3955                         assert(kq
->kq_state 
& KQ_DYNAMIC
); 
3960         } else if (flags 
& KEVENT_FLAG_WORKQ
) { 
3961                 /* must already exist for bound threads. */ 
3962                 if (flags 
& KEVENT_FLAG_KERNEL
) { 
3963                         assert(descp
->fd_wqkqueue 
!= NULL
); 
3967                  * use the private kq associated with the proc workq. 
3968                  * Just being a thread within the process (and not 
3969                  * being the exit/exec thread) is enough to hold a 
3970                  * reference on this special kq. 
3972                 kq 
= descp
->fd_wqkqueue
; 
3974                         struct kqueue 
*alloc_kq 
= kqueue_alloc(p
, KEVENT_FLAG_WORKQ
); 
3975                         if (alloc_kq 
== NULL
) 
3979                         if (descp
->fd_wqkqueue 
== NULL
) { 
3980                                 kq 
= descp
->fd_wqkqueue 
= alloc_kq
; 
3984                                 kq 
= descp
->fd_wqkqueue
; 
3985                                 kqueue_dealloc(alloc_kq
); 
3989                 /* get a usecount for the kq itself */ 
3991                 if ((error 
= fp_getfkq(p
, fd
, &fp
, &kq
)) != 0) 
3994         if ((error 
= kevent_set_kq_mode(kq
, flags
)) != 0) { 
3995                 /* drop the usecount */ 
3997                         fp_drop(p
, fd
, fp
, 0); 
4013         struct fileproc 
*fp
, 
4016         kqueue_release_last(p
, kq
); 
4018                 assert((kq
->kq_state 
& KQ_WORKQ
) == 0); 
4019                 fp_drop(p
, (int)id
, fp
, 0); 
4024 kevent_workloop_serial_no_copyin(proc_t p
, uint64_t workloop_id
) 
4026         uint64_t serial_no 
= 0; 
4030         if (workloop_id 
== 0 || p
->p_dispatchqueue_serialno_offset 
== 0) { 
4033         addr 
= (user_addr_t
)(workloop_id 
+ p
->p_dispatchqueue_serialno_offset
); 
4035         if (proc_is64bit(p
)) { 
4036                 rc 
= copyin(addr
, (caddr_t
)&serial_no
, sizeof(serial_no
)); 
4038                 uint32_t serial_no32 
= 0; 
4039                 rc 
= copyin(addr
, (caddr_t
)&serial_no32
, sizeof(serial_no32
)); 
4040                 serial_no 
= serial_no32
; 
4042         return rc 
== 0 ? serial_no 
: 0; 
4046 kevent_exit_on_workloop_ownership_leak(thread_t thread
) 
4048         proc_t p 
= current_proc(); 
4049         struct filedesc 
*fdp 
= p
->p_fd
; 
4050         kqueue_id_t workloop_id 
= 0; 
4052         mach_vm_address_t addr
; 
4053         uint32_t reason_size
; 
4056         if (fdp
->fd_kqhashmask 
> 0) { 
4057                 for (uint32_t i 
= 0; i 
< fdp
->fd_kqhashmask 
+ 1; i
++) { 
4058                         struct kqworkloop 
*kqwl
; 
4060                         SLIST_FOREACH(kqwl
, &fdp
->fd_kqhash
[i
], kqwl_hashlink
) { 
4061                                 struct kqueue 
*kq 
= &kqwl
->kqwl_kqueue
; 
4062                                 if ((kq
->kq_state 
& KQ_DYNAMIC
) && kqwl
->kqwl_owner 
== thread
) { 
4063                                         workloop_id 
= kqwl
->kqwl_dynamicid
; 
4070         assert(workloop_id
); 
4072         reason 
= os_reason_create(OS_REASON_LIBSYSTEM
, 
4073                         OS_REASON_LIBSYSTEM_CODE_WORKLOOP_OWNERSHIP_LEAK
); 
4074         if (reason 
== OS_REASON_NULL
) { 
4078         reason
->osr_flags 
|= OS_REASON_FLAG_GENERATE_CRASH_REPORT
; 
4079         reason_size 
= 2 * sizeof(uint64_t); 
4080         reason_size 
= kcdata_estimate_required_buffer_size(2, reason_size
); 
4081         if (os_reason_alloc_buffer(reason
, reason_size
) != 0) { 
4085         struct kcdata_descriptor 
*kcd 
= &reason
->osr_kcd_descriptor
; 
4087         if (kcdata_get_memory_addr(kcd
, EXIT_REASON_WORKLOOP_ID
, 
4088                         sizeof(workloop_id
), &addr
) == KERN_SUCCESS
) { 
4089                 kcdata_memcpy(kcd
, addr
, &workloop_id
, sizeof(workloop_id
)); 
4092         uint64_t serial_no 
= kevent_workloop_serial_no_copyin(p
, workloop_id
); 
4093         if (serial_no 
&& kcdata_get_memory_addr(kcd
, EXIT_REASON_DISPATCH_QUEUE_NO
, 
4094                         sizeof(serial_no
), &addr
) == KERN_SUCCESS
) { 
4095                 kcdata_memcpy(kcd
, addr
, &serial_no
, sizeof(serial_no
)); 
4099 #if DEVELOPMENT || DEBUG 
4100         psignal_try_thread_with_reason(p
, thread
, SIGABRT
, reason
); 
4103         return exit_with_reason(p
, W_EXITCODE(0, SIGKILL
), (int *)NULL
, 
4104                         FALSE
, FALSE
, 0, reason
); 
4110 kevent_servicer_detach_preflight(thread_t thread
, unsigned int flags
, struct kqueue 
*kq
) 
4113         struct kqworkloop 
*kqwl
; 
4115         struct kqrequest 
*kqr
; 
4117         if (!(flags 
& KEVENT_FLAG_WORKLOOP
) || !(kq
->kq_state 
& KQ_WORKLOOP
)) 
4120         /* only kq created with KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD from userspace can have attached threads */ 
4121         if (!(kq
->kq_state 
& KQ_NO_WQ_THREAD
)) 
4124         /* allow detach only on not wq threads */ 
4125         if (is_workqueue_thread(thread
)) 
4128         /* check that the current thread is bound to the requested wq */ 
4129         ut 
= get_bsdthread_info(thread
); 
4130         if (ut
->uu_kqueue_bound 
!= kq
) 
4133         kqwl 
= (struct kqworkloop 
*)kq
; 
4134         kqwl_req_lock(kqwl
); 
4135         kqr 
= &kqwl
->kqwl_request
; 
4137         /* check that the wq is bound to the thread */ 
4138         if ((kqr
->kqr_state 
& KQR_BOUND
) == 0  || (kqr
->kqr_thread 
!= thread
)) 
4141         kqwl_req_unlock(kqwl
); 
4147 kevent_servicer_detach_thread(struct proc 
*p
, kqueue_id_t id
, thread_t thread
, 
4148                 unsigned int flags
, struct kqueue 
*kq
) 
4150         struct kqworkloop 
*kqwl
; 
4153         assert((flags 
& KEVENT_FLAG_WORKLOOP
) && (kq
->kq_state 
& KQ_WORKLOOP
)); 
4155         /* allow detach only on not wqthreads threads */ 
4156         assert(!is_workqueue_thread(thread
)); 
4158         /* only kq created with KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD from userspace can have attached threads */ 
4159         assert(kq
->kq_state 
& KQ_NO_WQ_THREAD
); 
4161         /* check that the current thread is bound to the requested kq */ 
4162         ut 
= get_bsdthread_info(thread
); 
4163         assert(ut
->uu_kqueue_bound 
== kq
); 
4165         kqwl 
= (struct kqworkloop 
*)kq
; 
4169         /* unbind the thread. 
4170          * unbind itself checks if still processing and ends it. 
4172         kqworkloop_unbind_thread(kqwl
, thread
, flags
); 
4176         kevent_put_kq(p
, id
, NULL
, kq
); 
4182 kevent_servicer_attach_thread(thread_t thread
, unsigned int flags
, struct kqueue 
*kq
) 
4185         struct kqworkloop 
*kqwl
; 
4187         struct kqrequest 
*kqr
; 
4189         if (!(flags 
& KEVENT_FLAG_WORKLOOP
) || !(kq
->kq_state 
& KQ_WORKLOOP
)) 
4192         /* only kq created with KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD from userspace can have attached threads*/ 
4193         if (!(kq
->kq_state 
& KQ_NO_WQ_THREAD
)) 
4196         /* allow attach only on not wqthreads */ 
4197         if (is_workqueue_thread(thread
)) 
4200         /* check that the thread is not already bound */ 
4201         ut 
= get_bsdthread_info(thread
); 
4202         if (ut
->uu_kqueue_bound 
!= NULL
) 
4205         assert(ut
->uu_kqueue_flags 
== 0); 
4208         kqwl 
= (struct kqworkloop 
*)kq
; 
4209         kqwl_req_lock(kqwl
); 
4210         kqr 
= &kqwl
->kqwl_request
; 
4212         /* check that the kqueue is not already bound */ 
4213         if (kqr
->kqr_state 
& (KQR_BOUND 
| KQR_THREQUESTED 
| KQR_DRAIN
)) { 
4218         assert(kqr
->kqr_thread 
== NULL
); 
4219         assert((kqr
->kqr_state 
& KQR_PROCESSING
) == 0); 
4221         kqr
->kqr_state 
|= KQR_THREQUESTED
; 
4222         kqr
->kqr_qos_index 
= THREAD_QOS_UNSPECIFIED
; 
4223         kqr
->kqr_override_index 
= THREAD_QOS_UNSPECIFIED
; 
4224         kqr
->kqr_dsync_owner_qos 
= THREAD_QOS_UNSPECIFIED
; 
4225         kqr
->kqr_owner_override_is_sync 
= 0; 
4227         kqworkloop_bind_thread_impl(kqwl
, thread
, KEVENT_FLAG_WORKLOOP
); 
4229         /* get a ref on the wlkq on behalf of the attached thread */ 
4233         kqwl_req_unlock(kqwl
); 
4240 boolean_t 
kevent_args_requesting_events(unsigned int flags
, int nevents
) 
4242         return (!(flags 
& KEVENT_FLAG_ERROR_EVENTS
) && nevents 
> 0); 
4246 kevent_internal(struct proc 
*p
, 
4247                 kqueue_id_t id
, kqueue_id_t 
*id_out
, 
4248                 user_addr_t changelist
, int nchanges
, 
4249                 user_addr_t ueventlist
, int nevents
, 
4250                 user_addr_t data_out
, uint64_t data_available
, 
4252                 user_addr_t utimeout
, 
4253                 kqueue_continue_t continuation
, 
4256         struct _kevent 
*cont_args
; 
4259         struct fileproc 
*fp 
= NULL
; 
4261         struct kevent_internal_s kev
; 
4262         int error
, noutputs
; 
4264         user_size_t data_size
; 
4265         user_size_t data_resid
; 
4266         thread_t thread 
= current_thread(); 
4268         /* Don't allow user-space threads to process output events from the workq kqs */ 
4269         if (((flags 
& (KEVENT_FLAG_WORKQ 
| KEVENT_FLAG_KERNEL
)) == KEVENT_FLAG_WORKQ
) && 
4270             kevent_args_requesting_events(flags
, nevents
)) 
4273         /* restrict dynamic kqueue allocation to workloops (for now) */ 
4274         if ((flags 
& (KEVENT_FLAG_DYNAMIC_KQUEUE 
| KEVENT_FLAG_WORKLOOP
)) == KEVENT_FLAG_DYNAMIC_KQUEUE
) 
4277         if (flags 
& (KEVENT_FLAG_WORKLOOP_SERVICER_ATTACH 
| KEVENT_FLAG_WORKLOOP_SERVICER_DETACH 
| 
4278             KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST 
| KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST 
| KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD
)) { 
4280                 /* allowed only on workloops when calling kevent_id from user-space */ 
4281                 if (!(flags 
& KEVENT_FLAG_WORKLOOP
) || (flags 
& KEVENT_FLAG_KERNEL
) || !(flags 
& KEVENT_FLAG_DYNAMIC_KQUEUE
)) 
4284                 /* cannot attach and detach simultaneously*/ 
4285                 if ((flags 
& KEVENT_FLAG_WORKLOOP_SERVICER_ATTACH
) && (flags 
& KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
)) 
4288                 /* cannot ask for events and detach */ 
4289                 if ((flags 
& KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
) && kevent_args_requesting_events(flags
, nevents
)) 
4294         /* prepare to deal with stack-wise allocation of out events */ 
4295         if (flags 
& KEVENT_FLAG_STACK_EVENTS
) { 
4296                 int scale 
= ((flags 
& KEVENT_FLAG_LEGACY32
) ?  
4297                              (IS_64BIT_PROCESS(p
) ? sizeof(struct user64_kevent
) : 
4298                                                     sizeof(struct user32_kevent
)) : 
4299                              ((flags 
& KEVENT_FLAG_LEGACY64
) ? sizeof(struct kevent64_s
) : 
4300                                                                sizeof(struct kevent_qos_s
))); 
4301                 ueventlist 
+= nevents 
* scale
; 
4304         /* convert timeout to absolute - if we have one (and not immediate) */ 
4305         error 
= kevent_get_timeout(p
, utimeout
, flags
, &atv
); 
4309         /* copyin initial value of data residual from data_available */ 
4310         error 
= kevent_get_data_size(p
, data_available
, flags
, &data_size
); 
4314         /* get the kq we are going to be working on */ 
4315         error 
= kevent_get_kq(p
, id
, flags
, &fp
, &fd
, &kq
); 
4319         /* only bound threads can receive events on workloops */ 
4320         if ((flags 
& KEVENT_FLAG_WORKLOOP
) && kevent_args_requesting_events(flags
, nevents
)) { 
4321                 ut 
= (uthread_t
)get_bsdthread_info(thread
); 
4322                 if (ut
->uu_kqueue_bound 
!= kq
) { 
4329         /* attach the current thread if necessary */ 
4330         if (flags 
& KEVENT_FLAG_WORKLOOP_SERVICER_ATTACH
) { 
4331                 error 
= kevent_servicer_attach_thread(thread
, flags
, kq
); 
4336                 /* before processing events and committing to the system call, return an error if the thread cannot be detached when requested */ 
4337                 if (flags 
& KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
) { 
4338                         error 
= kevent_servicer_detach_preflight(thread
, flags
, kq
); 
4344         if (id_out 
&& kq 
&& (flags 
& KEVENT_FLAG_WORKLOOP
)) { 
4345                 assert(kq
->kq_state 
& KQ_WORKLOOP
); 
4346                 struct kqworkloop 
*kqwl
; 
4347                 kqwl 
= (struct kqworkloop 
*)kq
; 
4348                 *id_out 
= kqwl
->kqwl_dynamicid
; 
4351         /* register all the change requests the user provided... */ 
4353         while (nchanges 
> 0 && error 
== 0) { 
4354                 error 
= kevent_copyin(&changelist
, &kev
, p
, flags
); 
4358                 /* Make sure user doesn't pass in any system flags */ 
4359                 kev
.flags 
&= ~EV_SYSFLAGS
; 
4361                 kevent_register(kq
, &kev
, p
); 
4364                     ((kev
.flags 
& EV_ERROR
) || (kev
.flags 
& EV_RECEIPT
))) { 
4365                         if (kev
.flags 
& EV_RECEIPT
) { 
4366                                 kev
.flags 
|= EV_ERROR
; 
4369                         error 
= kevent_copyout(&kev
, &ueventlist
, p
, flags
); 
4374                 } else if (kev
.flags 
& EV_ERROR
) { 
4380         /* short-circuit the scan if we only want error events */ 
4381         if (flags 
& KEVENT_FLAG_ERROR_EVENTS
) 
4384         /* process pending events */ 
4385         if (nevents 
> 0 && noutputs 
== 0 && error 
== 0) { 
4386                 /* store the continuation/completion data in the uthread */ 
4387                 ut 
= (uthread_t
)get_bsdthread_info(thread
); 
4388                 cont_args 
= &ut
->uu_kevent
.ss_kevent
; 
4391                 cont_args
->retval 
= retval
; 
4392                 cont_args
->eventlist 
= ueventlist
; 
4393                 cont_args
->eventcount 
= nevents
; 
4394                 cont_args
->eventout 
= noutputs
; 
4395                 cont_args
->data_available 
= data_available
; 
4396                 cont_args
->process_data
.fp_fd 
= (int)id
; 
4397                 cont_args
->process_data
.fp_flags 
= flags
; 
4398                 cont_args
->process_data
.fp_data_out 
= data_out
; 
4399                 cont_args
->process_data
.fp_data_size 
= data_size
; 
4400                 cont_args
->process_data
.fp_data_resid 
= data_size
; 
4402                 error 
= kqueue_scan(kq
, kevent_callback
, 
4403                                     continuation
, cont_args
, 
4404                                     &cont_args
->process_data
, 
4407                 /* process remaining outputs */ 
4408                 noutputs 
= cont_args
->eventout
; 
4409                 data_resid 
= cont_args
->process_data
.fp_data_resid
; 
4411                 /* copyout residual data size value (if it needs to be copied out) */ 
4412                 /* don't abandon other output just because of residual copyout failures */ 
4413                 if (error 
== 0 && data_available 
&& data_resid 
!= data_size
) { 
4414                         (void)kevent_put_data_size(p
, data_available
, flags
, data_resid
); 
4418         /* detach the current thread if necessary */ 
4419         if (flags 
& KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
) { 
4421                 kevent_servicer_detach_thread(p
, id
, thread
, flags
, kq
); 
4425         kevent_put_kq(p
, id
, fp
, kq
); 
4427         /* don't restart after signals... */ 
4428         if (error 
== ERESTART
) 
4430         else if (error 
== EWOULDBLOCK
) 
4439  * kevent_callback - callback for each individual event 
4441  * called with nothing locked 
4442  * caller holds a reference on the kqueue 
4445 kevent_callback(__unused 
struct kqueue 
*kq
, struct kevent_internal_s 
*kevp
, 
4448         struct _kevent 
*cont_args
; 
4451         cont_args 
= (struct _kevent 
*)data
; 
4452         assert(cont_args
->eventout 
< cont_args
->eventcount
); 
4455          * Copy out the appropriate amount of event data for this user. 
4457         error 
= kevent_copyout(kevp
, &cont_args
->eventlist
, current_proc(), 
4458                                cont_args
->process_data
.fp_flags
); 
4461          * If there isn't space for additional events, return 
4462          * a harmless error to stop the processing here 
4464         if (error 
== 0 && ++cont_args
->eventout 
== cont_args
->eventcount
) 
4465                 error 
= EWOULDBLOCK
; 
4470  * kevent_description - format a description of a kevent for diagnostic output 
4472  * called with a 256-byte string buffer 
4476 kevent_description(struct kevent_internal_s 
*kevp
, char *s
, size_t n
) 
4480             "{.ident=%#llx, .filter=%d, .flags=%#x, .udata=%#llx, .fflags=%#x, .data=%#llx, .ext[0]=%#llx, .ext[1]=%#llx}", 
4494  * kevent_register - add a new event to a kqueue 
4496  *      Creates a mapping between the event source and 
4497  *      the kqueue via a knote data structure. 
4499  *      Because many/most the event sources are file 
4500  *      descriptor related, the knote is linked off 
4501  *      the filedescriptor table for quick access. 
4503  *      called with nothing locked 
4504  *      caller holds a reference on the kqueue 
4508 kevent_register(struct kqueue 
*kq
, struct kevent_internal_s 
*kev
, 
4509     __unused 
struct proc 
*ctxp
) 
4511         struct proc 
*p 
= kq
->kq_p
; 
4512         const struct filterops 
*fops
; 
4513         struct knote 
*kn 
= NULL
; 
4516         unsigned short kev_flags 
= kev
->flags
; 
4517         int knoteuse_flags 
= KNUSE_NONE
; 
4519         if (kev
->filter 
< 0) { 
4520                 if (kev
->filter 
+ EVFILT_SYSCOUNT 
< 0) { 
4524                 fops 
= sysfilt_ops
[~kev
->filter
];       /* to 0-base index */ 
4530         /* restrict EV_VANISHED to adding udata-specific dispatch kevents */ 
4531         if ((kev
->flags 
& EV_VANISHED
) && 
4532             (kev
->flags 
& (EV_ADD 
| EV_DISPATCH2
)) != (EV_ADD 
| EV_DISPATCH2
)) { 
4537         /* Simplify the flags - delete and disable overrule */ 
4538         if (kev
->flags 
& EV_DELETE
) 
4539                 kev
->flags 
&= ~EV_ADD
; 
4540         if (kev
->flags 
& EV_DISABLE
) 
4541                 kev
->flags 
&= ~EV_ENABLE
; 
4543         if (kq
->kq_state 
& KQ_WORKLOOP
) { 
4544                 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_REGISTER
), 
4545                               ((struct kqworkloop 
*)kq
)->kqwl_dynamicid
, 
4546                               kev
->udata
, kev
->flags
, kev
->filter
); 
4547         } else if (kq
->kq_state 
& KQ_WORKQ
) { 
4548                 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_REGISTER
), 
4549                               0, kev
->udata
, kev
->flags
, kev
->filter
); 
4551                 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_REGISTER
), 
4552                               VM_KERNEL_UNSLIDE_OR_PERM(kq
), 
4553                               kev
->udata
, kev
->flags
, kev
->filter
); 
4558         /* find the matching knote from the fd tables/hashes */ 
4559         kn 
= kq_find_knote_and_kq_lock(kq
, kev
, fops
->f_isfd
, p
); 
4562                 if (kev
->flags 
& EV_ADD
) { 
4563                         struct fileproc 
*knote_fp 
= NULL
; 
4565                         /* grab a file reference for the new knote */ 
4567                                 if ((error 
= fp_lookup(p
, kev
->ident
, &knote_fp
, 0)) != 0) { 
4575                                 if (knote_fp 
!= NULL
) 
4576                                         fp_drop(p
, kev
->ident
, knote_fp
, 0); 
4580                         kn
->kn_fp 
= knote_fp
; 
4581                         knote_set_kq(kn
, kq
); 
4582                         kqueue_retain(kq
); /* retain a kq ref */ 
4583                         kn
->kn_filtid 
= ~kev
->filter
; 
4584                         kn
->kn_inuse 
= 1;  /* for f_attach() */ 
4585                         kn
->kn_status 
= KN_ATTACHING 
| KN_ATTACHED
; 
4587                         /* was vanish support requested */ 
4588                         if (kev
->flags 
& EV_VANISHED
) { 
4589                                 kev
->flags 
&= ~EV_VANISHED
; 
4590                                 kn
->kn_status 
|= KN_REQVANISH
; 
4593                         /* snapshot matching/dispatching protcol flags into knote */ 
4594                         if (kev
->flags 
& EV_DISPATCH
) 
4595                                 kn
->kn_status 
|= KN_DISPATCH
; 
4596                         if (kev
->flags 
& EV_UDATA_SPECIFIC
) 
4597                                 kn
->kn_status 
|= KN_UDATA_SPECIFIC
; 
4600                          * copy the kevent state into knote 
4601                          * protocol is that fflags and data 
4602                          * are saved off, and cleared before 
4603                          * calling the attach routine. 
4605                         kn
->kn_kevent 
= *kev
; 
4606                         kn
->kn_sfflags 
= kev
->fflags
; 
4607                         kn
->kn_sdata 
= kev
->data
; 
4611                         /* invoke pthread kext to convert kevent qos to thread qos */ 
4612                         knote_canonicalize_kevent_qos(kn
); 
4613                         knote_set_qos_index(kn
, qos_index_from_qos(kn
, kn
->kn_qos
, FALSE
)); 
4615                         /* before anyone can find it */ 
4616                         if (kev
->flags 
& EV_DISABLE
) { 
4618                                  * do this before anyone can find it, 
4619                                  * this can't call knote_disable() because it expects having 
4622                                 kn
->kn_status 
|= KN_DISABLED
; 
4625                         /* Add the knote for lookup thru the fd table */ 
4626                         error 
= kq_add_knote(kq
, kn
, kev
, p
, &knoteuse_flags
); 
4628                                 (void)kqueue_release(kq
, KQUEUE_CANT_BE_LAST_REF
); 
4630                                 if (knote_fp 
!= NULL
) 
4631                                         fp_drop(p
, kev
->ident
, knote_fp
, 0); 
4633                                 if (error 
== ERESTART
) { 
4640                         /* fp reference count now applies to knote */ 
4641                         /* rwlock boost is now held */ 
4643                         /* call filter attach routine */ 
4644                         result 
= fops
->f_attach(kn
, kev
); 
4647                          * Trade knote use count for kq lock. 
4648                          * Cannot be dropped because we held 
4649                          * KN_ATTACHING throughout. 
4651                         knoteuse2kqlock(kq
, kn
, KNUSE_STEAL_DROP 
| knoteuse_flags
); 
4653                         if (kn
->kn_flags 
& EV_ERROR
) { 
4655                                  * Failed to attach correctly, so drop. 
4656                                  * All other possible users/droppers 
4657                                  * have deferred to us.  Save the error 
4658                                  * to return to our caller. 
4660                                 kn
->kn_status 
&= ~KN_ATTACHED
; 
4661                                 kn
->kn_status 
|= KN_DROPPING
; 
4662                                 error 
= kn
->kn_data
; 
4668                         /* end "attaching" phase - now just attached */ 
4669                         kn
->kn_status 
&= ~KN_ATTACHING
; 
4671                         if (kn
->kn_status 
& KN_DROPPING
) { 
4673                                  * Attach succeeded, but someone else 
4674                                  * deferred their drop - now we have 
4675                                  * to do it for them. 
4682                         /* Mark the thread request overcommit - if appropos */ 
4683                         knote_set_qos_overcommit(kn
); 
4686                          * If the attach routine indicated that an 
4687                          * event is already fired, activate the knote. 
4692                         if (knote_fops(kn
)->f_post_attach
) { 
4693                                 error 
= knote_fops(kn
)->f_post_attach(kn
, kev
); 
4701                         if ((kev_flags 
& (EV_ADD 
| EV_DELETE
)) == (EV_ADD 
| EV_DELETE
) && 
4702                                         (kq
->kq_state 
& KQ_WORKLOOP
)) { 
4704                                  * For workloops, understand EV_ADD|EV_DELETE as a "soft" delete 
4705                                  * that doesn't care about ENOENT, so just pretend the deletion 
4715                 /* existing knote: kqueue lock already taken by kq_find_knote_and_kq_lock */ 
4717                 if ((kn
->kn_status 
& (KN_DROPPING 
| KN_ATTACHING
)) != 0) { 
4719                          * The knote is not in a stable state, wait for that 
4720                          * transition to complete and then redrive the lookup. 
4722                         knoteusewait(kq
, kn
); 
4726                 if (kev
->flags 
& EV_DELETE
) { 
4729                          * If attempting to delete a disabled dispatch2 knote, 
4730                          * we must wait for the knote to be re-enabled (unless 
4731                          * it is being re-enabled atomically here). 
4733                         if ((kev
->flags 
& EV_ENABLE
) == 0 && 
4734                             (kn
->kn_status 
& (KN_DISPATCH2 
| KN_DISABLED
)) == 
4735                                              (KN_DISPATCH2 
| KN_DISABLED
)) { 
4736                                 kn
->kn_status 
|= KN_DEFERDELETE
; 
4738                                 error 
= EINPROGRESS
; 
4739                         } else if (knote_fops(kn
)->f_drop_and_unlock
) { 
4741                                  * The filter has requested to handle EV_DELETE events 
4743                                  * ERESTART means the kevent has to be re-evaluated 
4745                                 error 
= knote_fops(kn
)->f_drop_and_unlock(kn
, kev
); 
4746                                 if (error 
== ERESTART
) { 
4750                         } else if (kqlock2knotedrop(kq
, kn
)) { 
4751                                 /* standard/default EV_DELETE path */ 
4755                                  * The kqueue is unlocked, it's not being 
4756                                  * dropped, and kqlock2knotedrop returned 0: 
4757                                  * this means that someone stole the drop of 
4758                                  * the knote from us. 
4760                                 error 
= EINPROGRESS
; 
4766                  * If we are re-enabling a deferred-delete knote, 
4767                  * just enable it now and avoid calling the 
4768                  * filter touch routine (it has delivered its 
4769                  * last event already). 
4771                 if ((kev
->flags 
& EV_ENABLE
) && 
4772                     (kn
->kn_status 
& KN_DEFERDELETE
)) { 
4773                         assert(kn
->kn_status 
& KN_DISABLED
); 
4781                  * If we are disabling, do it before unlocking and 
4782                  * calling the touch routine (so no processing can 
4783                  * see the new kevent state before the disable is 
4786                 if (kev
->flags 
& EV_DISABLE
) 
4790                  * Convert the kqlock to a use reference on the 
4791                  * knote so we can call the filter touch routine. 
4793                 if (knoteuse_needs_boost(kn
, kev
)) { 
4794                         knoteuse_flags 
|= KNUSE_BOOST
; 
4796                 if (kqlock2knoteuse(kq
, kn
, knoteuse_flags
)) { 
4798                          * Call touch routine to notify filter of changes 
4799                          * in filter values (and to re-determine if any 
4800                          * events are fired). 
4802                         result 
= knote_fops(kn
)->f_touch(kn
, kev
); 
4804                         /* Get the kq lock back (don't defer droppers). */ 
4805                         if (!knoteuse2kqlock(kq
, kn
, knoteuse_flags
)) { 
4810                         /* Handle errors during touch routine */ 
4811                         if (kev
->flags 
& EV_ERROR
) { 
4817                         /* Activate it if the touch routine said to */ 
4822                 /* Enable the knote if called for */ 
4823                 if (kev
->flags 
& EV_ENABLE
) 
4828         /* still have kqlock held and knote is valid */ 
4832         /* output local errors through the kevent */ 
4834                 kev
->flags 
|= EV_ERROR
; 
4841  * knote_process - process a triggered event 
4843  *      Validate that it is really still a triggered event 
4844  *      by calling the filter routines (if necessary).  Hold 
4845  *      a use reference on the knote to avoid it being detached. 
4847  *      If it is still considered triggered, we will have taken 
4848  *      a copy of the state under the filter lock.  We use that 
4849  *      snapshot to dispatch the knote for future processing (or 
4850  *      not, if this was a lost event). 
4852  *      Our caller assures us that nobody else can be processing 
4853  *      events from this knote during the whole operation. But 
4854  *      others can be touching or posting events to the knote 
4855  *      interspersed with our processing it. 
4857  *      caller holds a reference on the kqueue. 
4858  *      kqueue locked on entry and exit - but may be dropped 
4861 knote_process(struct knote 
*kn
,  
4862         kevent_callback_t callback
, 
4863         void *callback_data
, 
4864         struct filt_process_s 
*process_data
, 
4867         struct kevent_internal_s kev
; 
4868         struct kqueue 
*kq 
= knote_get_kq(kn
); 
4872         bzero(&kev
, sizeof(kev
)); 
4875          * Must be active or stayactive 
4876          * Must be queued and not disabled/suppressed 
4878         assert(kn
->kn_status 
& KN_QUEUED
); 
4879         assert(kn
->kn_status 
& (KN_ACTIVE
|KN_STAYACTIVE
)); 
4880         assert(!(kn
->kn_status 
& (KN_DISABLED
|KN_SUPPRESSED
|KN_DROPPING
))); 
4882         if (kq
->kq_state 
& KQ_WORKLOOP
) { 
4883                 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS
), 
4884                               ((struct kqworkloop 
*)kq
)->kqwl_dynamicid
, 
4885                               kn
->kn_udata
, kn
->kn_status 
| (kn
->kn_id 
<< 32), 
4887         } else if (kq
->kq_state 
& KQ_WORKQ
) { 
4888                 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS
), 
4889                               0, kn
->kn_udata
, kn
->kn_status 
| (kn
->kn_id 
<< 32), 
4892                 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS
), 
4893                               VM_KERNEL_UNSLIDE_OR_PERM(kq
), kn
->kn_udata
, 
4894                               kn
->kn_status 
| (kn
->kn_id 
<< 32), kn
->kn_filtid
); 
4898          * For deferred-drop or vanished events, we just create a fake 
4899          * event to acknowledge end-of-life.  Otherwise, we call the 
4900          * filter's process routine to snapshot the kevent state under 
4901          * the filter's locking protocol. 
4903         if (kn
->kn_status 
& (KN_DEFERDELETE 
| KN_VANISHED
)) { 
4904                 /* create fake event */ 
4905                 kev
.filter 
= kn
->kn_filter
; 
4906                 kev
.ident 
= kn
->kn_id
; 
4907                 kev
.qos 
= kn
->kn_qos
; 
4908                 kev
.flags 
= (kn
->kn_status 
& KN_DEFERDELETE
) ?  
4909                             EV_DELETE 
: EV_VANISHED
; 
4910                 kev
.flags 
|= (EV_DISPATCH2 
| EV_ONESHOT
); 
4911                 kev
.udata 
= kn
->kn_udata
; 
4916                 int flags 
= KNUSE_NONE
; 
4917                 /* deactivate - so new activations indicate a wakeup */ 
4918                 knote_deactivate(kn
); 
4920                 /* suppress knotes to avoid returning the same event multiple times in a single call. */ 
4923                 if (knoteuse_needs_boost(kn
, NULL
)) { 
4924                         flags 
|= KNUSE_BOOST
; 
4926                 /* convert lock to a knote use reference */ 
4927                 if (!kqlock2knoteuse(kq
, kn
, flags
)) 
4928                         panic("dropping knote found on queue\n"); 
4930                 /* call out to the filter to process with just a ref */ 
4931                 result 
= knote_fops(kn
)->f_process(kn
, process_data
, &kev
); 
4932                 if (result
) flags 
|= KNUSE_STEAL_DROP
; 
4935                  * convert our reference back to a lock. accept drop 
4936                  * responsibility from others if we've committed to 
4937                  * delivering event data. 
4939                 if (!knoteuse2kqlock(kq
, kn
, flags
)) { 
4947                  * Determine how to dispatch the knote for future event handling. 
4948                  * not-fired: just return (do not callout, leave deactivated). 
4949                  * One-shot:  If dispatch2, enter deferred-delete mode (unless this is 
4950                  *            is the deferred delete event delivery itself).  Otherwise, 
4952                  * stolendrop:We took responsibility for someone else's drop attempt. 
4953                  *            treat this just like one-shot and prepare to turn it back 
4954                  *            into a deferred delete if required. 
4955                  * Dispatch:  don't clear state, just mark it disabled. 
4956                  * Cleared:   just leave it deactivated. 
4957                  * Others:    re-activate as there may be more events to handle. 
4958                  *            This will not wake up more handlers right now, but 
4959                  *            at the completion of handling events it may trigger 
4960                  *            more handler threads (TODO: optimize based on more than 
4961                  *            just this one event being detected by the filter). 
4965                         return (EJUSTRETURN
); 
4967                 if ((kev
.flags 
& EV_ONESHOT
) || (kn
->kn_status 
& KN_STOLENDROP
)) { 
4968                         if ((kn
->kn_status 
& (KN_DISPATCH2 
| KN_DEFERDELETE
)) == KN_DISPATCH2
) { 
4969                                 /* defer dropping non-delete oneshot dispatch2 events */ 
4970                                 kn
->kn_status 
|= KN_DEFERDELETE
; 
4973                                 /* if we took over another's drop clear those flags here */ 
4974                                 if (kn
->kn_status 
& KN_STOLENDROP
) { 
4975                                         assert(kn
->kn_status 
& KN_DROPPING
); 
4977                                          * the knote will be dropped when the 
4978                                          * deferred deletion occurs 
4980                                         kn
->kn_status 
&= ~(KN_DROPPING
|KN_STOLENDROP
); 
4982                         } else if (kn
->kn_status 
& KN_STOLENDROP
) { 
4983                                 /* We now own the drop of the knote. */ 
4984                                 assert(kn
->kn_status 
& KN_DROPPING
); 
4985                                 knote_unsuppress(kn
); 
4989                         } else if (kqlock2knotedrop(kq
, kn
)) { 
4990                                 /* just EV_ONESHOT, _not_ DISPATCH2 */ 
4994                 } else if (kn
->kn_status 
& KN_DISPATCH
) { 
4995                         /* disable all dispatch knotes */ 
4997                 } else if ((kev
.flags 
& EV_CLEAR
) == 0) { 
4998                         /* re-activate in case there are more events */ 
5004          * callback to handle each event as we find it. 
5005          * If we have to detach and drop the knote, do 
5006          * it while we have the kq unlocked. 
5010                 error 
= (callback
)(kq
, &kev
, callback_data
); 
5018  * Return 0 to indicate that processing should proceed, 
5019  * -1 if there is nothing to process. 
5021  * Called with kqueue locked and returns the same way, 
5022  * but may drop lock temporarily. 
5025 kqworkq_begin_processing(struct kqworkq 
*kqwq
, kq_index_t qos_index
, int flags
) 
5027         struct kqrequest 
*kqr
; 
5028         thread_t self 
= current_thread(); 
5029         __assert_only 
struct uthread 
*ut 
= get_bsdthread_info(self
); 
5031         assert(kqwq
->kqwq_state 
& KQ_WORKQ
); 
5032         assert(qos_index 
< KQWQ_NQOS
); 
5034         KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN
) | DBG_FUNC_START
, 
5037         kqwq_req_lock(kqwq
); 
5039         kqr 
= kqworkq_get_request(kqwq
, qos_index
); 
5041         /* manager skips buckets that haven't asked for its help */ 
5042         if (flags 
& KEVENT_FLAG_WORKQ_MANAGER
) { 
5044                 /* If nothing for manager to do, just return */ 
5045                 if ((kqr
->kqr_state 
& KQWQ_THMANAGER
) == 0) { 
5046                         KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN
) | DBG_FUNC_END
, 
5048                         kqwq_req_unlock(kqwq
); 
5051                 /* bind manager thread from this time on */ 
5052                 kqworkq_bind_thread_impl(kqwq
, qos_index
, self
, flags
); 
5055                 /* We should already be bound to this kqueue */ 
5056                 assert(kqr
->kqr_state 
& KQR_BOUND
); 
5057                 assert(kqr
->kqr_thread 
== self
); 
5058                 assert(ut
->uu_kqueue_bound 
== (struct kqueue 
*)kqwq
); 
5059                 assert(ut
->uu_kqueue_qos_index 
== qos_index
); 
5060                 assert((ut
->uu_kqueue_flags 
& flags
) == ut
->uu_kqueue_flags
); 
5064          * we should have been requested to be here 
5065          * and nobody else should still be processing 
5067         assert(kqr
->kqr_state 
& KQR_WAKEUP
); 
5068         assert(kqr
->kqr_state 
& KQR_THREQUESTED
); 
5069         assert((kqr
->kqr_state 
& KQR_PROCESSING
) == 0); 
5071         /* reset wakeup trigger to catch new events after we start processing */ 
5072         kqr
->kqr_state 
&= ~KQR_WAKEUP
; 
5074         /* convert to processing mode */ 
5075         kqr
->kqr_state 
|= KQR_PROCESSING
; 
5077         KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN
) | DBG_FUNC_END
, 
5078                       kqr_thread_id(kqr
), kqr
->kqr_state
); 
5080         kqwq_req_unlock(kqwq
); 
5085 kqworkloop_is_processing_on_current_thread(struct kqworkloop 
*kqwl
) 
5087         struct kqueue 
*kq 
= &kqwl
->kqwl_kqueue
; 
5091         if (kq
->kq_state 
& KQ_PROCESSING
) { 
5093                  * KQ_PROCESSING is unset with the kqlock held, and the kqr thread is 
5094                  * never modified while KQ_PROCESSING is set, meaning that peeking at 
5095                  * its value is safe from this context. 
5097                 return kqwl
->kqwl_request
.kqr_thread 
== current_thread(); 
5103 kqworkloop_acknowledge_events(struct kqworkloop 
*kqwl
, boolean_t clear_ipc_override
) 
5105         struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
5106         struct knote 
*kn
, *tmp
; 
5108         kqlock_held(&kqwl
->kqwl_kqueue
); 
5110         TAILQ_FOREACH_SAFE(kn
, &kqr
->kqr_suppressed
, kn_tqe
, tmp
) { 
5112                  * If a knote that can adjust QoS is disabled because of the automatic 
5113                  * behavior of EV_DISPATCH, the knotes should stay suppressed so that 
5114                  * further overrides keep pushing. 
5116                 if (knote_fops(kn
)->f_adjusts_qos 
&& (kn
->kn_status 
& KN_DISABLED
) && 
5117                                 (kn
->kn_status 
& (KN_STAYACTIVE 
| KN_DROPPING
)) == 0 && 
5118                                 (kn
->kn_flags 
& (EV_DISPATCH 
| EV_DISABLE
)) == EV_DISPATCH
) { 
5120                          * When called from unbind, clear the sync ipc override on the knote 
5121                          * for events which are delivered. 
5123                         if (clear_ipc_override
) { 
5124                                 knote_adjust_sync_qos(kn
, THREAD_QOS_UNSPECIFIED
, FALSE
); 
5128                 knote_unsuppress(kn
); 
5133 kqworkloop_begin_processing(struct kqworkloop 
*kqwl
, 
5134                 __assert_only 
unsigned int flags
) 
5136         struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
5137         struct kqueue 
*kq 
= &kqwl
->kqwl_kqueue
; 
5141         KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN
) | DBG_FUNC_START
, 
5142                       kqwl
->kqwl_dynamicid
, flags
, 0); 
5144         kqwl_req_lock(kqwl
); 
5146         /* nobody else should still be processing */ 
5147         assert((kqr
->kqr_state 
& KQR_PROCESSING
) == 0); 
5148         assert((kq
->kq_state 
& KQ_PROCESSING
) == 0); 
5150         kqr
->kqr_state 
|= KQR_PROCESSING 
| KQR_R2K_NOTIF_ARMED
; 
5151         kq
->kq_state 
|= KQ_PROCESSING
; 
5153         kqwl_req_unlock(kqwl
); 
5155         kqworkloop_acknowledge_events(kqwl
, FALSE
); 
5157         KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN
) | DBG_FUNC_END
, 
5158                       kqwl
->kqwl_dynamicid
, flags
, 0); 
5164  * Return 0 to indicate that processing should proceed, 
5165  * -1 if there is nothing to process. 
5167  * Called with kqueue locked and returns the same way, 
5168  * but may drop lock temporarily. 
5172 kqueue_begin_processing(struct kqueue 
*kq
, kq_index_t qos_index
, unsigned int flags
) 
5174         struct kqtailq 
*suppressq
; 
5178         if (kq
->kq_state 
& KQ_WORKQ
) { 
5179                 return kqworkq_begin_processing((struct kqworkq 
*)kq
, qos_index
, flags
); 
5180         } else if (kq
->kq_state 
& KQ_WORKLOOP
) { 
5181                 return kqworkloop_begin_processing((struct kqworkloop
*)kq
, flags
); 
5184         KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_START
, 
5185                       VM_KERNEL_UNSLIDE_OR_PERM(kq
), flags
); 
5187         assert(qos_index 
== QOS_INDEX_KQFILE
); 
5189         /* wait to become the exclusive processing thread */ 
5191                 if (kq
->kq_state 
& KQ_DRAIN
) { 
5192                         KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_END
, 
5193                                       VM_KERNEL_UNSLIDE_OR_PERM(kq
), 2); 
5197                 if ((kq
->kq_state 
& KQ_PROCESSING
) == 0) 
5200                 /* if someone else is processing the queue, wait */ 
5201                 kq
->kq_state 
|= KQ_PROCWAIT
; 
5202                 suppressq 
= kqueue_get_suppressed_queue(kq
, qos_index
); 
5203                 waitq_assert_wait64((struct waitq 
*)&kq
->kq_wqs
, 
5204                                     CAST_EVENT64_T(suppressq
), 
5205                                     THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
); 
5208                 thread_block(THREAD_CONTINUE_NULL
); 
5212         /* Nobody else processing */ 
5214         /* clear pre-posts and KQ_WAKEUP now, in case we bail early */ 
5215         waitq_set_clear_preposts(&kq
->kq_wqs
); 
5216         kq
->kq_state 
&= ~KQ_WAKEUP
; 
5218         /* anything left to process? */ 
5219         if (kqueue_queue_empty(kq
, qos_index
)) { 
5220                 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_END
, 
5221                               VM_KERNEL_UNSLIDE_OR_PERM(kq
), 1); 
5225         /* convert to processing mode */ 
5226         kq
->kq_state 
|= KQ_PROCESSING
; 
5228         KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_END
, 
5229                       VM_KERNEL_UNSLIDE_OR_PERM(kq
)); 
5235  *      kqworkq_end_processing - Complete the processing of a workq kqueue 
5237  *      We may have to request new threads. 
5238  *      This can happen there are no waiting processing threads and: 
5239  *      - there were active events we never got to (count > 0) 
5240  *      - we pended waitq hook callouts during processing 
5241  *      - we pended wakeups while processing (or unsuppressing) 
5243  *      Called with kqueue lock held. 
5246 kqworkq_end_processing(struct kqworkq 
*kqwq
, kq_index_t qos_index
, int flags
) 
5248 #pragma unused(flags) 
5250         struct kqueue 
*kq 
= &kqwq
->kqwq_kqueue
; 
5251         struct kqtailq 
*suppressq 
= kqueue_get_suppressed_queue(kq
, qos_index
); 
5253         thread_t self 
= current_thread(); 
5254         struct uthread 
*ut 
= get_bsdthread_info(self
); 
5256         struct kqrequest 
*kqr
; 
5259         assert(kqwq
->kqwq_state 
& KQ_WORKQ
); 
5260         assert(qos_index 
< KQWQ_NQOS
); 
5262         /* Are we really bound to this kqueue? */ 
5263         if (ut
->uu_kqueue_bound 
!= kq
) { 
5264                 assert(ut
->uu_kqueue_bound 
== kq
); 
5268         kqr 
= kqworkq_get_request(kqwq
, qos_index
); 
5270         kqwq_req_lock(kqwq
); 
5272         /* Do we claim to be manager? */ 
5273         if (flags 
& KEVENT_FLAG_WORKQ_MANAGER
) { 
5275                 /* bail if not bound that way */ 
5276                 if (ut
->uu_kqueue_qos_index 
!= KQWQ_QOS_MANAGER 
|| 
5277                     (ut
->uu_kqueue_flags 
& KEVENT_FLAG_WORKQ_MANAGER
) == 0) { 
5278                         assert(ut
->uu_kqueue_qos_index 
== KQWQ_QOS_MANAGER
); 
5279                         assert(ut
->uu_kqueue_flags 
& KEVENT_FLAG_WORKQ_MANAGER
); 
5280                         kqwq_req_unlock(kqwq
); 
5284                 /* bail if this request wasn't already getting manager help */ 
5285                 if ((kqr
->kqr_state 
& KQWQ_THMANAGER
) == 0 || 
5286                     (kqr
->kqr_state 
& KQR_PROCESSING
) == 0) { 
5287                         kqwq_req_unlock(kqwq
); 
5291                 if (ut
->uu_kqueue_qos_index 
!= qos_index 
|| 
5292                     (ut
->uu_kqueue_flags 
& KEVENT_FLAG_WORKQ_MANAGER
)) { 
5293                         assert(ut
->uu_kqueue_qos_index 
== qos_index
); 
5294                         assert((ut
->uu_kqueue_flags 
& KEVENT_FLAG_WORKQ_MANAGER
) == 0); 
5295                         kqwq_req_unlock(kqwq
); 
5300         assert(kqr
->kqr_state 
& KQR_BOUND
); 
5301         thread 
= kqr
->kqr_thread
; 
5302         assert(thread 
== self
); 
5304         assert(kqr
->kqr_state 
& KQR_PROCESSING
); 
5306         /* If we didn't drain the whole queue, re-mark a wakeup being needed */ 
5307         if (!kqueue_queue_empty(kq
, qos_index
)) 
5308                 kqr
->kqr_state 
|= KQR_WAKEUP
; 
5310         kqwq_req_unlock(kqwq
); 
5313          * Return suppressed knotes to their original state. 
5314          * For workq kqueues, suppressed ones that are still 
5315          * truly active (not just forced into the queue) will 
5316          * set flags we check below to see if anything got 
5319         while ((kn 
= TAILQ_FIRST(suppressq
)) != NULL
) { 
5320                 assert(kn
->kn_status 
& KN_SUPPRESSED
); 
5321                 knote_unsuppress(kn
); 
5324         kqwq_req_lock(kqwq
); 
5326         /* Indicate that we are done processing this request */ 
5327         kqr
->kqr_state 
&= ~KQR_PROCESSING
; 
5330          * Drop our association with this one request and its 
5333         kqworkq_unbind_thread(kqwq
, qos_index
, thread
, flags
); 
5336          * request a new thread if we didn't process the whole 
5337          * queue or real events have happened (not just putting 
5338          * stay-active events back). 
5340         if (kqr
->kqr_state 
& KQR_WAKEUP
) { 
5341                 if (kqueue_queue_empty(kq
, qos_index
)) { 
5342                         kqr
->kqr_state 
&= ~KQR_WAKEUP
; 
5344                         kqworkq_request_thread(kqwq
, qos_index
); 
5347         kqwq_req_unlock(kqwq
); 
5351 kqworkloop_end_processing(struct kqworkloop 
*kqwl
, int nevents
, 
5354         struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
5355         struct kqueue 
*kq 
= &kqwl
->kqwl_kqueue
; 
5359         KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END
) | DBG_FUNC_START
, 
5360                         kqwl
->kqwl_dynamicid
, flags
, 0); 
5362         if ((kq
->kq_state 
& KQ_NO_WQ_THREAD
) && nevents 
== 0 && 
5363                         (flags 
& KEVENT_FLAG_IMMEDIATE
) == 0) { 
5365                  * <rdar://problem/31634014> We may soon block, but have returned no 
5366                  * kevents that need to be kept supressed for overriding purposes. 
5368                  * It is hence safe to acknowledge events and unsuppress everything, so 
5369                  * that if we block we can observe all events firing. 
5371                 kqworkloop_acknowledge_events(kqwl
, TRUE
); 
5374         kqwl_req_lock(kqwl
); 
5376         assert(kqr
->kqr_state 
& KQR_PROCESSING
); 
5377         assert(kq
->kq_state 
& KQ_PROCESSING
); 
5379         kq
->kq_state 
&= ~KQ_PROCESSING
; 
5380         kqr
->kqr_state 
&= ~KQR_PROCESSING
; 
5381         kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
, 0); 
5383         kqwl_req_unlock(kqwl
); 
5385         KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END
) | DBG_FUNC_END
, 
5386                         kqwl
->kqwl_dynamicid
, flags
, 0); 
5390  * Called with kqueue lock held. 
5393 kqueue_end_processing(struct kqueue 
*kq
, kq_index_t qos_index
, 
5394                 int nevents
, unsigned int flags
) 
5397         struct kqtailq 
*suppressq
; 
5402         assert((kq
->kq_state 
& KQ_WORKQ
) == 0); 
5404         if (kq
->kq_state 
& KQ_WORKLOOP
) { 
5405                 return kqworkloop_end_processing((struct kqworkloop 
*)kq
, nevents
, flags
); 
5408         KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_END
), 
5409                       VM_KERNEL_UNSLIDE_OR_PERM(kq
), flags
); 
5411         assert(qos_index 
== QOS_INDEX_KQFILE
); 
5414          * Return suppressed knotes to their original state. 
5416         suppressq 
= kqueue_get_suppressed_queue(kq
, qos_index
); 
5417         while ((kn 
= TAILQ_FIRST(suppressq
)) != NULL
) { 
5418                 assert(kn
->kn_status 
& KN_SUPPRESSED
); 
5419                 knote_unsuppress(kn
); 
5422         procwait 
= (kq
->kq_state 
& KQ_PROCWAIT
); 
5423         kq
->kq_state 
&= ~(KQ_PROCESSING 
| KQ_PROCWAIT
); 
5426                 /* first wake up any thread already waiting to process */ 
5427                 waitq_wakeup64_all((struct waitq 
*)&kq
->kq_wqs
, 
5428                                    CAST_EVENT64_T(suppressq
), 
5430                                    WAITQ_ALL_PRIORITIES
); 
5435  *      kqwq_internal_bind - bind thread to processing workq kqueue 
5437  *      Determines if the provided thread will be responsible for 
5438  *      servicing the particular QoS class index specified in the 
5439  *      parameters. Once the binding is done, any overrides that may 
5440  *      be associated with the cooresponding events can be applied. 
5442  *      This should be called as soon as the thread identity is known, 
5443  *      preferably while still at high priority during creation. 
5445  *  - caller holds a reference on the process (and workq kq) 
5446  *      - the thread MUST call kevent_qos_internal after being bound 
5447  *        or the bucket of events may never be delivered.   
5449  *    (unless this is a synchronous bind, then the request is locked) 
5452 kqworkq_internal_bind( 
5454         kq_index_t qos_index
, 
5459         struct kqworkq 
*kqwq
; 
5460         struct kqrequest 
*kqr
; 
5461         struct uthread 
*ut 
= get_bsdthread_info(thread
); 
5463         /* If no process workq, can't be our thread. */ 
5464         kq 
= p
->p_fd
->fd_wqkqueue
; 
5469         assert(kq
->kq_state 
& KQ_WORKQ
); 
5470         kqwq 
= (struct kqworkq 
*)kq
; 
5473          * No need to bind the manager thread to any specific 
5474          * bucket, but still claim the thread. 
5476         if (qos_index 
== KQWQ_QOS_MANAGER
) { 
5477                 assert(ut
->uu_kqueue_bound 
== NULL
); 
5478                 assert(flags 
& KEVENT_FLAG_WORKQ_MANAGER
); 
5479                 ut
->uu_kqueue_bound 
= kq
; 
5480                 ut
->uu_kqueue_qos_index 
= qos_index
; 
5481                 ut
->uu_kqueue_flags 
= flags
; 
5483                 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_BIND
), 
5484                               thread_tid(thread
), flags
, qos_index
); 
5490          * If this is a synchronous bind callback, the request 
5491          * lock is already held, so just do the bind. 
5493         if (flags 
& KEVENT_FLAG_SYNCHRONOUS_BIND
) { 
5494                 kqwq_req_held(kqwq
); 
5495                 /* strip out synchronout bind flag */ 
5496                 flags 
&= ~KEVENT_FLAG_SYNCHRONOUS_BIND
; 
5497                 kqworkq_bind_thread_impl(kqwq
, qos_index
, thread
, flags
); 
5502          * check the request that corresponds to our qos_index 
5503          * to see if there is an outstanding request. 
5505         kqr 
= kqworkq_get_request(kqwq
, qos_index
); 
5506         assert(kqr
->kqr_qos_index 
== qos_index
); 
5507         kqwq_req_lock(kqwq
); 
5509         KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_BIND
), 
5510                       thread_tid(thread
), flags
, qos_index
, kqr
->kqr_state
); 
5512         if ((kqr
->kqr_state 
& KQR_THREQUESTED
) && 
5513             (kqr
->kqr_state 
& KQR_PROCESSING
) == 0) { 
5515                 if ((kqr
->kqr_state 
& KQR_BOUND
) && 
5516                     thread 
== kqr
->kqr_thread
) { 
5517                         /* duplicate bind - claim the thread */ 
5518                         assert(ut
->uu_kqueue_bound 
== kq
); 
5519                         assert(ut
->uu_kqueue_qos_index 
== qos_index
); 
5520                         kqwq_req_unlock(kqwq
); 
5523                 if ((kqr
->kqr_state 
& (KQR_BOUND 
| KQWQ_THMANAGER
)) == 0) { 
5524                         /* ours to bind to */ 
5525                         kqworkq_bind_thread_impl(kqwq
, qos_index
, thread
, flags
); 
5526                         kqwq_req_unlock(kqwq
); 
5530         kqwq_req_unlock(kqwq
); 
5535 kqworkloop_bind_thread_impl(struct kqworkloop 
*kqwl
, 
5537                             __assert_only 
unsigned int flags
) 
5539         assert(flags 
& KEVENT_FLAG_WORKLOOP
); 
5541         /* the request object must be locked */ 
5542         kqwl_req_held(kqwl
); 
5544         struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
5545         struct uthread 
*ut 
= get_bsdthread_info(thread
); 
5546         boolean_t ipc_override_is_sync
; 
5547         kq_index_t qos_index 
= kqworkloop_combined_qos(kqwl
, &ipc_override_is_sync
); 
5549         /* nobody else bound so finally bind (as a workloop) */ 
5550         assert(kqr
->kqr_state 
& KQR_THREQUESTED
); 
5551         assert((kqr
->kqr_state 
& (KQR_BOUND 
| KQR_PROCESSING
)) == 0); 
5552         assert(thread 
!= kqwl
->kqwl_owner
); 
5554         KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_BIND
), 
5555                       kqwl
->kqwl_dynamicid
, (uintptr_t)thread_tid(thread
), 
5557                       (uintptr_t)(((uintptr_t)kqr
->kqr_override_index 
<< 16) | 
5558                       (((uintptr_t)kqr
->kqr_state
) << 8) | 
5559                       ((uintptr_t)ipc_override_is_sync
))); 
5561         kqr
->kqr_state 
|= KQR_BOUND 
| KQR_R2K_NOTIF_ARMED
; 
5562         kqr
->kqr_thread 
= thread
; 
5564         /* bind the workloop to the uthread */ 
5565         ut
->uu_kqueue_bound 
= (struct kqueue 
*)kqwl
; 
5566         ut
->uu_kqueue_flags 
= flags
; 
5567         ut
->uu_kqueue_qos_index 
= qos_index
; 
5568         assert(ut
->uu_kqueue_override_is_sync 
== 0); 
5569         ut
->uu_kqueue_override_is_sync 
= ipc_override_is_sync
; 
5571                 thread_add_ipc_override(thread
, qos_index
); 
5573         if (ipc_override_is_sync
) { 
5574                 thread_add_sync_ipc_override(thread
); 
5579  *  workloop_fulfill_threadreq - bind thread to processing workloop 
5581  * The provided thread will be responsible for delivering events 
5582  * associated with the given kqrequest.  Bind it and get ready for 
5583  * the thread to eventually arrive. 
5585  * If WORKLOOP_FULFILL_THREADREQ_SYNC is specified, the callback 
5586  * within the context of the pthread_functions->workq_threadreq 
5587  * callout.  In this case, the request structure is already locked. 
5590 workloop_fulfill_threadreq(struct proc 
*p
, 
5591                            workq_threadreq_t req
, 
5595         int sync 
= (flags 
& WORKLOOP_FULFILL_THREADREQ_SYNC
); 
5596         int cancel 
= (flags 
& WORKLOOP_FULFILL_THREADREQ_CANCEL
); 
5597         struct kqrequest 
*kqr
; 
5598         struct kqworkloop 
*kqwl
; 
5600         kqwl 
= (struct kqworkloop 
*)((uintptr_t)req 
- 
5601                                      offsetof(struct kqworkloop
, kqwl_request
) - 
5602                                      offsetof(struct kqrequest
, kqr_req
)); 
5603         kqr 
= &kqwl
->kqwl_request
; 
5605         /* validate we're looking at something valid */ 
5606         if (kqwl
->kqwl_p 
!= p 
|| 
5607             (kqwl
->kqwl_state 
& KQ_WORKLOOP
) == 0) { 
5608                 assert(kqwl
->kqwl_p 
== p
); 
5609                 assert(kqwl
->kqwl_state 
& KQ_WORKLOOP
); 
5614                 kqwl_req_lock(kqwl
); 
5616         /* Should be a pending request */ 
5617         if ((kqr
->kqr_state 
& KQR_BOUND
) || 
5618             (kqr
->kqr_state 
& KQR_THREQUESTED
) == 0) { 
5620                 assert((kqr
->kqr_state 
& KQR_BOUND
) == 0); 
5621                 assert(kqr
->kqr_state 
& KQR_THREQUESTED
); 
5623                         kqwl_req_unlock(kqwl
); 
5627         assert((kqr
->kqr_state 
& KQR_DRAIN
) == 0); 
5630          * Is it a cancel indication from pthread. 
5631          * If so, we must be exiting/exec'ing. Forget 
5632          * our pending request. 
5635                 kqr
->kqr_state 
&= ~KQR_THREQUESTED
; 
5636                 kqr
->kqr_state 
|= KQR_DRAIN
; 
5638                 /* do the actual bind? */ 
5639                 kqworkloop_bind_thread_impl(kqwl
, thread
, KEVENT_FLAG_WORKLOOP
); 
5643                 kqwl_req_unlock(kqwl
); 
5646                 kqueue_release_last(p
, &kqwl
->kqwl_kqueue
); /* may dealloc kq */ 
5653  *      kevent_qos_internal_bind - bind thread to processing kqueue 
5655  *      Indicates that the provided thread will be responsible for 
5656  *      servicing the particular QoS class index specified in the 
5657  *      parameters. Once the binding is done, any overrides that may 
5658  *      be associated with the cooresponding events can be applied. 
5660  *      This should be called as soon as the thread identity is known, 
5661  *      preferably while still at high priority during creation. 
5663  *  - caller holds a reference on the kqueue. 
5664  *      - the thread MUST call kevent_qos_internal after being bound 
5665  *        or the bucket of events may never be delivered.   
5666  *      - Nothing locked (may take mutex or block). 
5670 kevent_qos_internal_bind( 
5676         kq_index_t qos_index
; 
5678         assert(flags 
& KEVENT_FLAG_WORKQ
); 
5680         if (thread 
== THREAD_NULL 
|| (flags 
& KEVENT_FLAG_WORKQ
) == 0) { 
5684         /* get the qos index we're going to service */ 
5685         qos_index 
= qos_index_for_servicer(qos_class
, thread
, flags
); 
5687         if (kqworkq_internal_bind(p
, qos_index
, thread
, flags
)) 
5695 kqworkloop_internal_unbind( 
5701         struct kqworkloop 
*kqwl
; 
5702         struct uthread 
*ut 
= get_bsdthread_info(thread
); 
5704         assert(ut
->uu_kqueue_bound 
!= NULL
); 
5705         kq 
= ut
->uu_kqueue_bound
; 
5706         assert(kq
->kq_state 
& KQ_WORKLOOP
); 
5707         kqwl 
= (struct kqworkloop 
*)kq
; 
5709         KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_UNBIND
), 
5710                       kqwl
->kqwl_dynamicid
, (uintptr_t)thread_tid(thread
), 
5713         if (!(kq
->kq_state 
& KQ_NO_WQ_THREAD
)) { 
5714                 assert(is_workqueue_thread(thread
)); 
5717                 kqworkloop_unbind_thread(kqwl
, thread
, flags
); 
5720                 /* If last reference, dealloc the workloop kq */ 
5721                 kqueue_release_last(p
, kq
); 
5723                 assert(!is_workqueue_thread(thread
)); 
5724                 kevent_servicer_detach_thread(p
, kqwl
->kqwl_dynamicid
, thread
, flags
, kq
); 
5729 kqworkq_internal_unbind( 
5731         kq_index_t qos_index
, 
5736         struct kqworkq 
*kqwq
; 
5738         kq_index_t end_index
; 
5740         assert(thread 
== current_thread()); 
5741         ut 
= get_bsdthread_info(thread
); 
5743         kq 
= p
->p_fd
->fd_wqkqueue
; 
5744         assert(kq
->kq_state 
& KQ_WORKQ
); 
5745         assert(ut
->uu_kqueue_bound 
== kq
); 
5747         kqwq 
= (struct kqworkq 
*)kq
; 
5749         /* end servicing any requests we might own */ 
5750         end_index 
= (qos_index 
== KQWQ_QOS_MANAGER
) ?  
5754         KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_UNBIND
), 
5755                       (uintptr_t)thread_tid(thread
), flags
, qos_index
); 
5758                 kqworkq_end_processing(kqwq
, qos_index
, flags
); 
5759         } while (qos_index
-- > end_index
); 
5761         ut
->uu_kqueue_bound 
= NULL
; 
5762         ut
->uu_kqueue_qos_index 
= 0; 
5763         ut
->uu_kqueue_flags 
= 0; 
5769  *      kevent_qos_internal_unbind - unbind thread from processing kqueue 
5771  *      End processing the per-QoS bucket of events and allow other threads 
5772  *      to be requested for future servicing.   
5774  *      caller holds a reference on the kqueue. 
5775  *      thread is the current thread. 
5779 kevent_qos_internal_unbind( 
5785 #pragma unused(qos_class) 
5789         unsigned int bound_flags
; 
5792         ut 
= get_bsdthread_info(thread
); 
5793         if (ut
->uu_kqueue_bound 
== NULL
) { 
5794                 /* early out if we are already unbound */ 
5795                 assert(ut
->uu_kqueue_flags 
== 0); 
5796                 assert(ut
->uu_kqueue_qos_index 
== 0); 
5797                 assert(ut
->uu_kqueue_override_is_sync 
== 0); 
5801         assert(flags 
& (KEVENT_FLAG_WORKQ 
| KEVENT_FLAG_WORKLOOP
)); 
5802         assert(thread 
== current_thread()); 
5804         check_flags 
= flags 
& KEVENT_FLAG_UNBIND_CHECK_FLAGS
; 
5806         /* Get the kqueue we started with */ 
5807         kq 
= ut
->uu_kqueue_bound
; 
5809         assert(kq
->kq_state 
& (KQ_WORKQ 
| KQ_WORKLOOP
)); 
5811         /* get flags and QoS parameters we started with */ 
5812         bound_flags 
= ut
->uu_kqueue_flags
; 
5814         /* Unbind from the class of workq */ 
5815         if (kq
->kq_state 
& KQ_WORKQ
) { 
5816                 if (check_flags 
&& !(flags 
& KEVENT_FLAG_WORKQ
)) { 
5820                 kqworkq_internal_unbind(p
, ut
->uu_kqueue_qos_index
, thread
, bound_flags
); 
5822                 if (check_flags 
&& !(flags 
& KEVENT_FLAG_WORKLOOP
)) { 
5826                 kqworkloop_internal_unbind(p
, thread
, bound_flags
); 
5833  * kqueue_process - process the triggered events in a kqueue 
5835  *      Walk the queued knotes and validate that they are 
5836  *      really still triggered events by calling the filter 
5837  *      routines (if necessary).  Hold a use reference on 
5838  *      the knote to avoid it being detached. For each event 
5839  *      that is still considered triggered, invoke the 
5840  *      callback routine provided. 
5842  *      caller holds a reference on the kqueue. 
5843  *      kqueue locked on entry and exit - but may be dropped 
5844  *      kqueue list locked (held for duration of call) 
5848 kqueue_process(struct kqueue 
*kq
, 
5849     kevent_callback_t callback
, 
5850     void *callback_data
, 
5851     struct filt_process_s 
*process_data
, 
5855         unsigned int flags 
= process_data 
? process_data
->fp_flags 
: 0; 
5856         struct uthread 
*ut 
= get_bsdthread_info(current_thread()); 
5857         kq_index_t start_index
, end_index
, i
; 
5863          * Based on the mode of the kqueue and the bound QoS of the servicer, 
5864          * determine the range of thread requests that need checking 
5866         if (kq
->kq_state 
& KQ_WORKQ
) { 
5867                 if (flags 
& KEVENT_FLAG_WORKQ_MANAGER
) { 
5868                         start_index 
= KQWQ_QOS_MANAGER
; 
5869                 } else if (ut
->uu_kqueue_bound 
!= kq
) { 
5872                         start_index 
= ut
->uu_kqueue_qos_index
; 
5875                 /* manager services every request in a workq kqueue */ 
5876                 assert(start_index 
> 0 && start_index 
<= KQWQ_QOS_MANAGER
); 
5877                 end_index 
= (start_index 
== KQWQ_QOS_MANAGER
) ? 0 : start_index
; 
5879         } else if (kq
->kq_state 
& KQ_WORKLOOP
) { 
5880                 if (ut
->uu_kqueue_bound 
!= kq
) 
5884                  * Single request servicing 
5885                  * we want to deliver all events, regardless of the QOS 
5887                 start_index 
= end_index 
= THREAD_QOS_UNSPECIFIED
; 
5889                 start_index 
= end_index 
= QOS_INDEX_KQFILE
; 
5895                 if (kqueue_begin_processing(kq
, i
, flags
) == -1) { 
5897                         /* Nothing to process */ 
5902                  * loop through the enqueued knotes associated with this request, 
5903                  * processing each one. Each request may have several queues 
5904                  * of knotes to process (depending on the type of kqueue) so we 
5905                  * have to loop through all the queues as long as we have additional 
5910                 struct kqtailq 
*base_queue 
= kqueue_get_base_queue(kq
, i
); 
5911                 struct kqtailq 
*queue 
= kqueue_get_high_queue(kq
, i
); 
5913                         while (error 
== 0 && (kn 
= TAILQ_FIRST(queue
)) != NULL
) { 
5914                                 error 
= knote_process(kn
, callback
, callback_data
, process_data
, p
); 
5915                                 if (error 
== EJUSTRETURN
) { 
5920                                 /* error is EWOULDBLOCK when the out event array is full */ 
5922                 } while (error 
== 0 && queue
-- > base_queue
); 
5924                 if ((kq
->kq_state 
& KQ_WORKQ
) == 0) { 
5925                         kqueue_end_processing(kq
, i
, nevents
, flags
); 
5928                 if (error 
== EWOULDBLOCK
) { 
5929                         /* break out if no more space for additional events */ 
5933         } while (i
-- > end_index
); 
5940 kqueue_scan_continue(void *data
, wait_result_t wait_result
) 
5942         thread_t self 
= current_thread(); 
5943         uthread_t ut 
= (uthread_t
)get_bsdthread_info(self
); 
5944         struct _kqueue_scan 
* cont_args 
= &ut
->uu_kevent
.ss_kqueue_scan
; 
5945         struct kqueue 
*kq 
= (struct kqueue 
*)data
; 
5946         struct filt_process_s 
*process_data 
= cont_args
->process_data
; 
5950         /* convert the (previous) wait_result to a proper error */ 
5951         switch (wait_result
) { 
5952         case THREAD_AWAKENED
: { 
5955                 error 
= kqueue_process(kq
, cont_args
->call
, cont_args
->data
,  
5956                                        process_data
, &count
, current_proc()); 
5957                 if (error 
== 0 && count 
== 0) { 
5958                         if (kq
->kq_state 
& KQ_DRAIN
) { 
5963                         if (kq
->kq_state 
& KQ_WAKEUP
) 
5966                         waitq_assert_wait64((struct waitq 
*)&kq
->kq_wqs
, 
5967                                             KQ_EVENT
, THREAD_ABORTSAFE
, 
5968                                             cont_args
->deadline
); 
5969                         kq
->kq_state 
|= KQ_SLEEP
; 
5971                         thread_block_parameter(kqueue_scan_continue
, kq
); 
5976         case THREAD_TIMED_OUT
: 
5977                 error 
= EWOULDBLOCK
; 
5979         case THREAD_INTERRUPTED
: 
5982         case THREAD_RESTART
: 
5987                 panic("%s: - invalid wait_result (%d)", __func__
, 
5992         /* call the continuation with the results */ 
5993         assert(cont_args
->cont 
!= NULL
); 
5994         (cont_args
->cont
)(kq
, cont_args
->data
, error
); 
5999  * kqueue_scan - scan and wait for events in a kqueue 
6001  *      Process the triggered events in a kqueue. 
6003  *      If there are no events triggered arrange to 
6004  *      wait for them. If the caller provided a 
6005  *      continuation routine, then kevent_scan will 
6008  *      The callback routine must be valid. 
6009  *      The caller must hold a use-count reference on the kq. 
6013 kqueue_scan(struct kqueue 
*kq
, 
6014             kevent_callback_t callback
, 
6015             kqueue_continue_t continuation
, 
6016             void *callback_data
, 
6017             struct filt_process_s 
*process_data
, 
6018             struct timeval 
*atvp
, 
6021         thread_continue_t cont 
= THREAD_CONTINUE_NULL
; 
6028         assert(callback 
!= NULL
); 
6031          * Determine which QoS index we are servicing 
6033         flags 
= (process_data
) ? process_data
->fp_flags 
: 0; 
6034         fd 
= (process_data
) ? process_data
->fp_fd 
: -1; 
6038                 wait_result_t wait_result
; 
6042                  * Make a pass through the kq to find events already 
6046                 error 
= kqueue_process(kq
, callback
, callback_data
, 
6047                                        process_data
, &count
, p
); 
6049                         break; /* lock still held */ 
6051                 /* looks like we have to consider blocking */ 
6054                         /* convert the timeout to a deadline once */ 
6055                         if (atvp
->tv_sec 
|| atvp
->tv_usec
) { 
6058                                 clock_get_uptime(&now
); 
6059                                 nanoseconds_to_absolutetime((uint64_t)atvp
->tv_sec 
* NSEC_PER_SEC 
+ 
6060                                                             atvp
->tv_usec 
* (long)NSEC_PER_USEC
, 
6062                                 if (now 
>= deadline
) { 
6063                                         /* non-blocking call */ 
6064                                         error 
= EWOULDBLOCK
; 
6065                                         break; /* lock still held */ 
6068                                 clock_absolutetime_interval_to_deadline(deadline
, &deadline
); 
6070                                 deadline 
= 0;   /* block forever */ 
6074                                 uthread_t ut 
= (uthread_t
)get_bsdthread_info(current_thread()); 
6075                                 struct _kqueue_scan 
*cont_args 
= &ut
->uu_kevent
.ss_kqueue_scan
; 
6077                                 cont_args
->call 
= callback
; 
6078                                 cont_args
->cont 
= continuation
; 
6079                                 cont_args
->deadline 
= deadline
; 
6080                                 cont_args
->data 
= callback_data
; 
6081                                 cont_args
->process_data 
= process_data
; 
6082                                 cont 
= kqueue_scan_continue
; 
6086                 if (kq
->kq_state 
& KQ_DRAIN
) { 
6091                 /* If awakened during processing, try again */ 
6092                 if (kq
->kq_state 
& KQ_WAKEUP
) { 
6097                 /* go ahead and wait */ 
6098                 waitq_assert_wait64_leeway((struct waitq 
*)&kq
->kq_wqs
, 
6099                                            KQ_EVENT
, THREAD_ABORTSAFE
, 
6100                                            TIMEOUT_URGENCY_USER_NORMAL
, 
6101                                            deadline
, TIMEOUT_NO_LEEWAY
); 
6102                 kq
->kq_state 
|= KQ_SLEEP
; 
6104                 wait_result 
= thread_block_parameter(cont
, kq
); 
6105                 /* NOTREACHED if (continuation != NULL) */ 
6107                 switch (wait_result
) { 
6108                 case THREAD_AWAKENED
: 
6110                 case THREAD_TIMED_OUT
: 
6112                 case THREAD_INTERRUPTED
: 
6114                 case THREAD_RESTART
: 
6117                         panic("%s: - bad wait_result (%d)", __func__
, 
6129  * This could be expanded to call kqueue_scan, if desired. 
6133 kqueue_read(__unused 
struct fileproc 
*fp
, 
6134     __unused 
struct uio 
*uio
, 
6136     __unused vfs_context_t ctx
) 
6143 kqueue_write(__unused 
struct fileproc 
*fp
, 
6144     __unused 
struct uio 
*uio
, 
6146     __unused vfs_context_t ctx
) 
6153 kqueue_ioctl(__unused 
struct fileproc 
*fp
, 
6154     __unused u_long com
, 
6155     __unused caddr_t data
, 
6156     __unused vfs_context_t ctx
) 
6163 kqueue_select(struct fileproc 
*fp
, int which
, void *wq_link_id
, 
6164     __unused vfs_context_t ctx
) 
6166         struct kqueue 
*kq 
= (struct kqueue 
*)fp
->f_data
; 
6167         struct kqtailq 
*queue
; 
6168         struct kqtailq 
*suppressq
; 
6177         assert((kq
->kq_state 
& KQ_WORKQ
) == 0); 
6180          * If this is the first pass, link the wait queue associated with the 
6181          * the kqueue onto the wait queue set for the select().  Normally we 
6182          * use selrecord() for this, but it uses the wait queue within the 
6183          * selinfo structure and we need to use the main one for the kqueue to 
6184          * catch events from KN_STAYQUEUED sources. So we do the linkage manually. 
6185          * (The select() call will unlink them when it ends). 
6187         if (wq_link_id 
!= NULL
) { 
6188                 thread_t cur_act 
= current_thread(); 
6189                 struct uthread 
* ut 
= get_bsdthread_info(cur_act
); 
6191                 kq
->kq_state 
|= KQ_SEL
; 
6192                 waitq_link((struct waitq 
*)&kq
->kq_wqs
, ut
->uu_wqset
, 
6193                            WAITQ_SHOULD_LOCK
, (uint64_t *)wq_link_id
); 
6195                 /* always consume the reserved link object */ 
6196                 waitq_link_release(*(uint64_t *)wq_link_id
); 
6197                 *(uint64_t *)wq_link_id 
= 0; 
6200                  * selprocess() is expecting that we send it back the waitq 
6201                  * that was just added to the thread's waitq set. In order 
6202                  * to not change the selrecord() API (which is exported to 
6203                  * kexts), we pass this value back through the 
6204                  * void *wq_link_id pointer we were passed. We need to use 
6205                  * memcpy here because the pointer may not be properly aligned 
6206                  * on 32-bit systems. 
6208                 void *wqptr 
= &kq
->kq_wqs
; 
6209                 memcpy(wq_link_id
, (void *)&wqptr
, sizeof(void *)); 
6212         if (kqueue_begin_processing(kq
, QOS_INDEX_KQFILE
, 0) == -1) { 
6217         queue 
= kqueue_get_base_queue(kq
, QOS_INDEX_KQFILE
); 
6218         if (!TAILQ_EMPTY(queue
)) { 
6220                  * there is something queued - but it might be a 
6221                  * KN_STAYACTIVE knote, which may or may not have 
6222                  * any events pending.  Otherwise, we have to walk 
6223                  * the list of knotes to see, and peek at the 
6224                  * (non-vanished) stay-active ones to be really sure. 
6226                 while ((kn 
= (struct knote 
*)TAILQ_FIRST(queue
)) != NULL
) { 
6227                         if (kn
->kn_status 
& KN_ACTIVE
) { 
6231                         assert(kn
->kn_status 
& KN_STAYACTIVE
); 
6236                  * There were no regular events on the queue, so take 
6237                  * a deeper look at the stay-queued ones we suppressed. 
6239                 suppressq 
= kqueue_get_suppressed_queue(kq
, QOS_INDEX_KQFILE
); 
6240                 while ((kn 
= (struct knote 
*)TAILQ_FIRST(suppressq
)) != NULL
) { 
6243                         assert(!knoteuse_needs_boost(kn
, NULL
)); 
6245                         /* If didn't vanish while suppressed - peek at it */ 
6246                         if (kqlock2knoteuse(kq
, kn
, KNUSE_NONE
)) { 
6247                                 peek 
= knote_fops(kn
)->f_peek(kn
); 
6249                                 /* if it dropped while getting lock - move on */ 
6250                                 if (!knoteuse2kqlock(kq
, kn
, KNUSE_NONE
)) 
6255                         knote_unsuppress(kn
); 
6257                         /* has data or it has to report a vanish */ 
6266         kqueue_end_processing(kq
, QOS_INDEX_KQFILE
, retnum
, 0); 
6276 kqueue_close(struct fileglob 
*fg
, __unused vfs_context_t ctx
) 
6278         struct kqfile 
*kqf 
= (struct kqfile 
*)fg
->fg_data
; 
6280         assert((kqf
->kqf_state 
& KQ_WORKQ
) == 0); 
6281         kqueue_dealloc(&kqf
->kqf_kqueue
); 
6288  * The callers has taken a use-count reference on this kqueue and will donate it 
6289  * to the kqueue we are being added to.  This keeps the kqueue from closing until 
6290  * that relationship is torn down. 
6293 kqueue_kqfilter(__unused 
struct fileproc 
*fp
, struct knote 
*kn
, 
6294                 __unused 
struct kevent_internal_s 
*kev
, __unused vfs_context_t ctx
) 
6296         struct kqfile 
*kqf 
= (struct kqfile 
*)kn
->kn_fp
->f_data
; 
6297         struct kqueue 
*kq 
= &kqf
->kqf_kqueue
; 
6298         struct kqueue 
*parentkq 
= knote_get_kq(kn
); 
6300         assert((kqf
->kqf_state 
& KQ_WORKQ
) == 0); 
6302         if (parentkq 
== kq 
|| 
6303             kn
->kn_filter 
!= EVFILT_READ
) { 
6304                 kn
->kn_flags 
= EV_ERROR
; 
6305                 kn
->kn_data 
= EINVAL
; 
6310          * We have to avoid creating a cycle when nesting kqueues 
6311          * inside another.  Rather than trying to walk the whole 
6312          * potential DAG of nested kqueues, we just use a simple 
6313          * ceiling protocol.  When a kqueue is inserted into another, 
6314          * we check that the (future) parent is not already nested 
6315          * into another kqueue at a lower level than the potenial 
6316          * child (because it could indicate a cycle).  If that test 
6317          * passes, we just mark the nesting levels accordingly. 
6321         if (parentkq
->kq_level 
> 0 && 
6322             parentkq
->kq_level 
< kq
->kq_level
) 
6325                 kn
->kn_flags 
= EV_ERROR
; 
6326                 kn
->kn_data 
= EINVAL
; 
6329                 /* set parent level appropriately */ 
6330                 if (parentkq
->kq_level 
== 0) 
6331                         parentkq
->kq_level 
= 2; 
6332                 if (parentkq
->kq_level 
< kq
->kq_level 
+ 1) 
6333                         parentkq
->kq_level 
= kq
->kq_level 
+ 1; 
6336                 kn
->kn_filtid 
= EVFILTID_KQREAD
; 
6338                 KNOTE_ATTACH(&kqf
->kqf_sel
.si_note
, kn
); 
6339                 /* indicate nesting in child, if needed */ 
6340                 if (kq
->kq_level 
== 0) 
6343                 int count 
= kq
->kq_count
; 
6350  * kqueue_drain - called when kq is closed 
6354 kqueue_drain(struct fileproc 
*fp
, __unused vfs_context_t ctx
) 
6356         struct kqueue 
*kq 
= (struct kqueue 
*)fp
->f_fglob
->fg_data
; 
6358         assert((kq
->kq_state 
& KQ_WORKQ
) == 0); 
6361         kq
->kq_state 
|= KQ_DRAIN
; 
6362         kqueue_interrupt(kq
); 
6369 kqueue_stat(struct kqueue 
*kq
, void *ub
, int isstat64
, proc_t p
) 
6371         assert((kq
->kq_state 
& KQ_WORKQ
) == 0); 
6374         if (isstat64 
!= 0) { 
6375                 struct stat64 
*sb64 
= (struct stat64 
*)ub
; 
6377                 bzero((void *)sb64
, sizeof(*sb64
)); 
6378                 sb64
->st_size 
= kq
->kq_count
; 
6379                 if (kq
->kq_state 
& KQ_KEV_QOS
) 
6380                         sb64
->st_blksize 
= sizeof(struct kevent_qos_s
); 
6381                 else if (kq
->kq_state 
& KQ_KEV64
) 
6382                         sb64
->st_blksize 
= sizeof(struct kevent64_s
); 
6383                 else if (IS_64BIT_PROCESS(p
)) 
6384                         sb64
->st_blksize 
= sizeof(struct user64_kevent
); 
6386                         sb64
->st_blksize 
= sizeof(struct user32_kevent
); 
6387                 sb64
->st_mode 
= S_IFIFO
; 
6389                 struct stat 
*sb 
= (struct stat 
*)ub
; 
6391                 bzero((void *)sb
, sizeof(*sb
)); 
6392                 sb
->st_size 
= kq
->kq_count
; 
6393                 if (kq
->kq_state 
& KQ_KEV_QOS
) 
6394                         sb
->st_blksize 
= sizeof(struct kevent_qos_s
); 
6395                 else if (kq
->kq_state 
& KQ_KEV64
) 
6396                         sb
->st_blksize 
= sizeof(struct kevent64_s
); 
6397                 else if (IS_64BIT_PROCESS(p
)) 
6398                         sb
->st_blksize 
= sizeof(struct user64_kevent
); 
6400                         sb
->st_blksize 
= sizeof(struct user32_kevent
); 
6401                 sb
->st_mode 
= S_IFIFO
; 
6408  * Interact with the pthread kext to request a servicing there. 
6409  * Eventually, this will request threads at specific QoS levels. 
6410  * For now, it only requests a dispatch-manager-QoS thread, and 
6411  * only one-at-a-time. 
6413  * - Caller holds the workq request lock 
6415  * - May be called with the kqueue's wait queue set locked, 
6416  *   so cannot do anything that could recurse on that. 
6419 kqworkq_request_thread( 
6420         struct kqworkq 
*kqwq
,  
6421         kq_index_t qos_index
) 
6423         struct kqrequest 
*kqr
; 
6425         assert(kqwq
->kqwq_state 
& KQ_WORKQ
); 
6426         assert(qos_index 
< KQWQ_NQOS
); 
6428         kqr 
= kqworkq_get_request(kqwq
, qos_index
); 
6430         assert(kqr
->kqr_state 
& KQR_WAKEUP
); 
6433          * If we have already requested a thread, and it hasn't 
6434          * started processing yet, there's no use hammering away 
6435          * on the pthread kext. 
6437         if (kqr
->kqr_state 
& KQR_THREQUESTED
) 
6440         assert((kqr
->kqr_state 
& KQR_BOUND
) == 0); 
6442         /* request additional workq threads if appropriate */ 
6443         if (pthread_functions 
!= NULL 
&& 
6444             pthread_functions
->workq_reqthreads 
!= NULL
) { 
6445                 unsigned int flags 
= KEVENT_FLAG_WORKQ
; 
6446                 unsigned long priority
; 
6449                 /* Compute the appropriate pthread priority */ 
6450                 priority 
= qos_from_qos_index(qos_index
); 
6453                 /* JMM - for now remain compatible with old invocations */ 
6454                 /* set the over-commit flag on the request if needed */ 
6455                 if (kqr
->kqr_state 
& KQR_THOVERCOMMIT
) 
6456                         priority 
|= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
; 
6459                 /* Compute a priority based on qos_index. */ 
6460                 struct workq_reqthreads_req_s request 
= { 
6461                         .priority 
= priority
, 
6465                 /* mark that we are making a request */ 
6466                 kqr
->kqr_state 
|= KQR_THREQUESTED
; 
6467                 if (qos_index 
== KQWQ_QOS_MANAGER
) 
6468                         kqr
->kqr_state 
|= KQWQ_THMANAGER
; 
6470                 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_THREQUEST
), 
6472                               (((uintptr_t)kqr
->kqr_override_index 
<< 8) | 
6473                                (uintptr_t)kqr
->kqr_state
)); 
6474                 wqthread 
= (*pthread_functions
->workq_reqthreads
)(kqwq
->kqwq_p
, 1, &request
); 
6476                 /* We've been switched to the emergency/manager thread */ 
6477                 if (wqthread 
== (thread_t
)-1) { 
6478                         assert(qos_index 
!= KQWQ_QOS_MANAGER
); 
6479                         kqr
->kqr_state 
|= KQWQ_THMANAGER
; 
6484                  * bind the returned thread identity 
6485                  * This goes away when we switch to synchronous callback 
6486                  * binding from the pthread kext. 
6488                 if (wqthread 
!= NULL
) { 
6489                         kqworkq_bind_thread_impl(kqwq
, qos_index
, wqthread
, flags
); 
6495  * If we aren't already busy processing events [for this QoS], 
6496  * request workq thread support as appropriate. 
6498  * TBD - for now, we don't segregate out processing by QoS. 
6500  * - May be called with the kqueue's wait queue set locked, 
6501  *   so cannot do anything that could recurse on that. 
6504 kqworkq_request_help( 
6505         struct kqworkq 
*kqwq
,  
6506         kq_index_t qos_index
) 
6508         struct kqrequest 
*kqr
; 
6510         /* convert to thread qos value */ 
6511         assert(qos_index 
< KQWQ_NQOS
); 
6513         kqwq_req_lock(kqwq
); 
6514         kqr 
= kqworkq_get_request(kqwq
, qos_index
); 
6516         if ((kqr
->kqr_state 
& KQR_WAKEUP
) == 0) { 
6517                 /* Indicate that we needed help from this request */ 
6518                 kqr
->kqr_state 
|= KQR_WAKEUP
; 
6520                 /* Go assure a thread request has been made */ 
6521                 kqworkq_request_thread(kqwq
, qos_index
); 
6523         kqwq_req_unlock(kqwq
); 
6527 kqworkloop_threadreq_impl(struct kqworkloop 
*kqwl
, kq_index_t qos_index
) 
6529         struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
6530         unsigned long pri 
= pthread_priority_for_kqrequest(kqr
, qos_index
); 
6533         assert((kqr
->kqr_state 
& (KQR_THREQUESTED 
| KQR_BOUND
)) == KQR_THREQUESTED
); 
6536          * New-style thread request supported. Provide 
6537          * the pthread kext a pointer to a workq_threadreq_s 
6538          * structure for its use until a corresponding 
6539          * workloop_fulfill_threqreq callback. 
6541         if (current_proc() == kqwl
->kqwl_kqueue
.kq_p
) { 
6542                 op 
= WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
; 
6544                 op 
= WORKQ_THREADREQ_WORKLOOP
; 
6547         ret 
= (*pthread_functions
->workq_threadreq
)(kqwl
->kqwl_p
, &kqr
->kqr_req
, 
6548                         WORKQ_THREADREQ_WORKLOOP
, pri
, 0); 
6551                 assert(op 
== WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
); 
6552                 op 
= WORKQ_THREADREQ_WORKLOOP
; 
6558                  * Process is shutting down or exec'ing. 
6559                  * All the kqueues are going to be cleaned up 
6560                  * soon. Forget we even asked for a thread - 
6561                  * and make sure we don't ask for more. 
6563                 kqueue_release((struct kqueue 
*)kqwl
, KQUEUE_CANT_BE_LAST_REF
); 
6564                 kqr
->kqr_state 
&= ~KQR_THREQUESTED
; 
6565                 kqr
->kqr_state 
|= KQR_DRAIN
; 
6569                 assert(op 
== WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
); 
6570                 act_set_astkevent(current_thread(), AST_KEVENT_REDRIVE_THREADREQ
); 
6579 kqworkloop_threadreq_modify(struct kqworkloop 
*kqwl
, kq_index_t qos_index
) 
6581         struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
6582         unsigned long pri 
= pthread_priority_for_kqrequest(kqr
, qos_index
); 
6583         int ret
, op 
= WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
; 
6585         assert((kqr
->kqr_state 
& (KQR_THREQUESTED 
| KQR_BOUND
)) == KQR_THREQUESTED
); 
6587         if (current_proc() == kqwl
->kqwl_kqueue
.kq_p
) { 
6588                 op 
= WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
; 
6590                 op 
= WORKQ_THREADREQ_CHANGE_PRI
; 
6593         ret 
= (*pthread_functions
->workq_threadreq_modify
)(kqwl
->kqwl_p
, 
6594                         &kqr
->kqr_req
, op
, pri
, 0); 
6597                 assert(op 
== WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
); 
6598                 op 
= WORKQ_THREADREQ_CHANGE_PRI
; 
6602                 assert(op 
== WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
); 
6603                 act_set_astkevent(current_thread(), AST_KEVENT_REDRIVE_THREADREQ
); 
6617  * Interact with the pthread kext to request a servicing thread. 
6618  * This will request a single thread at the highest QoS level 
6619  * for which there is work (whether that was the requested QoS 
6620  * for an event or an override applied to a lower-QoS request). 
6622  * - Caller holds the workloop request lock 
6624  * - May be called with the kqueue's wait queue set locked, 
6625  *   so cannot do anything that could recurse on that. 
6628 kqworkloop_request_thread(struct kqworkloop 
*kqwl
, kq_index_t qos_index
) 
6630         struct kqrequest 
*kqr
; 
6632         assert(kqwl
->kqwl_state 
& KQ_WORKLOOP
); 
6634         kqr 
= &kqwl
->kqwl_request
; 
6636         assert(kqwl
->kqwl_owner 
== THREAD_NULL
); 
6637         assert((kqr
->kqr_state 
& KQR_BOUND
) == 0); 
6638         assert((kqr
->kqr_state 
& KQR_THREQUESTED
) == 0); 
6639         assert(!(kqwl
->kqwl_kqueue
.kq_state 
& KQ_NO_WQ_THREAD
)); 
6641         /* If we're draining thread requests, just bail */ 
6642         if (kqr
->kqr_state 
& KQR_DRAIN
) 
6645         if (pthread_functions 
!= NULL 
&& 
6646                         pthread_functions
->workq_threadreq 
!= NULL
) { 
6648                  * set request state flags, etc... before calling pthread 
6649                  * This assures they are set before a possible synchronous 
6650                  * callback to workloop_fulfill_threadreq(). 
6652                 kqr
->kqr_state 
|= KQR_THREQUESTED
; 
6654                 /* Add a thread request reference on the kqueue. */ 
6655                 kqueue_retain((struct kqueue 
*)kqwl
); 
6657                 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THREQUEST
), 
6658                               kqwl
->kqwl_dynamicid
, 
6659                               0, qos_index
, kqr
->kqr_state
); 
6660                 kqworkloop_threadreq_impl(kqwl
, qos_index
); 
6662                 panic("kqworkloop_request_thread"); 
6668 kqworkloop_update_sync_override_state(struct kqworkloop 
*kqwl
, boolean_t sync_ipc_override
) 
6670         struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
6671         kqwl_req_lock(kqwl
); 
6672         kqr
->kqr_has_sync_override 
= sync_ipc_override
; 
6673         kqwl_req_unlock(kqwl
); 
6677 static inline kq_index_t
 
6678 kqworkloop_combined_qos(struct kqworkloop 
*kqwl
, boolean_t 
*ipc_override_is_sync
) 
6680         struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
6681         kq_index_t override
; 
6683         *ipc_override_is_sync 
= FALSE
; 
6684         override 
= MAX(MAX(kqr
->kqr_qos_index
, kqr
->kqr_override_index
), 
6685                                         kqr
->kqr_dsync_waiters_qos
); 
6687         if (kqr
->kqr_sync_suppress_count 
> 0 || kqr
->kqr_has_sync_override
) { 
6688                 *ipc_override_is_sync 
= TRUE
; 
6689                 override 
= THREAD_QOS_USER_INTERACTIVE
; 
6695 kqworkloop_request_fire_r2k_notification(struct kqworkloop 
*kqwl
) 
6697         struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
6699         kqwl_req_held(kqwl
); 
6701         if (kqr
->kqr_state 
& KQR_R2K_NOTIF_ARMED
) { 
6702                 assert(kqr
->kqr_state 
& KQR_BOUND
); 
6703                 assert(kqr
->kqr_thread
); 
6705                 kqr
->kqr_state 
&= ~KQR_R2K_NOTIF_ARMED
; 
6706                 act_set_astkevent(kqr
->kqr_thread
, AST_KEVENT_RETURN_TO_KERNEL
); 
6711 kqworkloop_update_threads_qos(struct kqworkloop 
*kqwl
, int op
, kq_index_t qos
) 
6713         const uint8_t KQWL_STAYACTIVE_FIRED_BIT 
= (1 << 0); 
6715         struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
6716         boolean_t old_ipc_override_is_sync 
= FALSE
; 
6717         kq_index_t old_qos 
= kqworkloop_combined_qos(kqwl
, &old_ipc_override_is_sync
); 
6718         struct kqueue 
*kq 
= &kqwl
->kqwl_kqueue
; 
6719         bool static_thread 
= (kq
->kq_state 
& KQ_NO_WQ_THREAD
); 
6722         /* must hold the kqr lock */ 
6723         kqwl_req_held(kqwl
); 
6726         case KQWL_UTQ_UPDATE_WAKEUP_QOS
: 
6727                 if (qos 
== KQWL_BUCKET_STAYACTIVE
) { 
6729                          * the KQWL_BUCKET_STAYACTIVE is not a QoS bucket, we only remember 
6730                          * a high watermark (kqr_stayactive_qos) of any stay active knote 
6731                          * that was ever registered with this workloop. 
6733                          * When waitq_set__CALLING_PREPOST_HOOK__() wakes up any stay active 
6734                          * knote, we use this high-watermark as a wakeup-index, and also set 
6735                          * the magic KQWL_BUCKET_STAYACTIVE bit to make sure we remember 
6736                          * there is at least one stay active knote fired until the next full 
6737                          * processing of this bucket. 
6739                         kqr
->kqr_wakeup_indexes 
|= KQWL_STAYACTIVE_FIRED_BIT
; 
6740                         qos 
= kqr
->kqr_stayactive_qos
; 
6742                         assert(!static_thread
); 
6744                 if (kqr
->kqr_wakeup_indexes 
& (1 << qos
)) { 
6745                         assert(kqr
->kqr_state 
& KQR_WAKEUP
); 
6749                 kqr
->kqr_wakeup_indexes 
|= (1 << qos
); 
6750                 kqr
->kqr_state 
|= KQR_WAKEUP
; 
6751                 kqworkloop_request_fire_r2k_notification(kqwl
); 
6752                 goto recompute_async
; 
6754         case KQWL_UTQ_UPDATE_STAYACTIVE_QOS
: 
6756                 if (kqr
->kqr_stayactive_qos 
< qos
) { 
6757                         kqr
->kqr_stayactive_qos 
= qos
; 
6758                         if (kqr
->kqr_wakeup_indexes 
& KQWL_STAYACTIVE_FIRED_BIT
) { 
6759                                 assert(kqr
->kqr_state 
& KQR_WAKEUP
); 
6760                                 kqr
->kqr_wakeup_indexes 
|= (1 << qos
); 
6761                                 goto recompute_async
; 
6766         case KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
: 
6767                 kqlock_held(kq
); // to look at kq_queues 
6768                 kqr
->kqr_has_sync_override 
= FALSE
; 
6769                 i 
= KQWL_BUCKET_STAYACTIVE
; 
6770                 if (TAILQ_EMPTY(&kqr
->kqr_suppressed
)) { 
6771                         kqr
->kqr_override_index 
= THREAD_QOS_UNSPECIFIED
; 
6773                 if (!TAILQ_EMPTY(&kq
->kq_queue
[i
]) && 
6774                                 (kqr
->kqr_wakeup_indexes 
& KQWL_STAYACTIVE_FIRED_BIT
)) { 
6776                          * If the KQWL_STAYACTIVE_FIRED_BIT is set, it means a stay active 
6777                          * knote may have fired, so we need to merge in kqr_stayactive_qos. 
6779                          * Unlike other buckets, this one is never empty but could be idle. 
6781                         kqr
->kqr_wakeup_indexes 
&= KQWL_STAYACTIVE_FIRED_BIT
; 
6782                         kqr
->kqr_wakeup_indexes 
|= (1 << kqr
->kqr_stayactive_qos
); 
6784                         kqr
->kqr_wakeup_indexes 
= 0; 
6786                 for (i 
= THREAD_QOS_UNSPECIFIED 
+ 1; i 
< KQWL_BUCKET_STAYACTIVE
; i
++) { 
6787                         if (!TAILQ_EMPTY(&kq
->kq_queue
[i
])) { 
6788                                 kqr
->kqr_wakeup_indexes 
|= (1 << i
); 
6789                                 struct knote 
*kn 
= TAILQ_FIRST(&kqwl
->kqwl_kqueue
.kq_queue
[i
]); 
6790                                 if (i 
== THREAD_QOS_USER_INTERACTIVE 
&& 
6791                                     kn
->kn_qos_override_is_sync
) { 
6792                                         kqr
->kqr_has_sync_override 
= TRUE
; 
6796                 if (kqr
->kqr_wakeup_indexes
) { 
6797                         kqr
->kqr_state 
|= KQR_WAKEUP
; 
6798                         kqworkloop_request_fire_r2k_notification(kqwl
); 
6800                         kqr
->kqr_state 
&= ~KQR_WAKEUP
; 
6802                 assert(qos 
== THREAD_QOS_UNSPECIFIED
); 
6803                 goto recompute_async
; 
6805         case KQWL_UTQ_RESET_WAKEUP_OVERRIDE
: 
6806                 kqr
->kqr_override_index 
= THREAD_QOS_UNSPECIFIED
; 
6807                 assert(qos 
== THREAD_QOS_UNSPECIFIED
); 
6808                 goto recompute_async
; 
6810         case KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
: 
6813                  * When modifying the wakeup QoS or the async override QoS, we always 
6814                  * need to maintain our invariant that kqr_override_index is at least as 
6815                  * large as the highest QoS for which an event is fired. 
6817                  * However this override index can be larger when there is an overriden 
6818                  * suppressed knote pushing on the kqueue. 
6820                 if (kqr
->kqr_wakeup_indexes 
> (1 << qos
)) { 
6821                         qos 
= fls(kqr
->kqr_wakeup_indexes
) - 1; /* fls is 1-based */ 
6823                 if (kqr
->kqr_override_index 
< qos
) { 
6824                         kqr
->kqr_override_index 
= qos
; 
6828         case KQWL_UTQ_REDRIVE_EVENTS
: 
6831         case KQWL_UTQ_SET_ASYNC_QOS
: 
6833                 kqr
->kqr_qos_index 
= qos
; 
6836         case KQWL_UTQ_SET_SYNC_WAITERS_QOS
: 
6838                 kqr
->kqr_dsync_waiters_qos 
= qos
; 
6842                 panic("unknown kqwl thread qos update operation: %d", op
); 
6845         boolean_t new_ipc_override_is_sync 
= FALSE
; 
6846         kq_index_t new_qos 
= kqworkloop_combined_qos(kqwl
, &new_ipc_override_is_sync
); 
6847         thread_t kqwl_owner 
= kqwl
->kqwl_owner
; 
6848         thread_t servicer 
= kqr
->kqr_thread
; 
6849         __assert_only 
int ret
; 
6852          * Apply the diffs to the owner if applicable 
6854         if (filt_wlowner_is_valid(kqwl_owner
)) { 
6856                 /* JMM - need new trace hooks for owner overrides */ 
6857                 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST
), 
6858                                 kqwl
->kqwl_dynamicid
, 
6859                                 (kqr
->kqr_state 
& KQR_BOUND
) ? thread_tid(kqwl_owner
) : 0, 
6860                                 (kqr
->kqr_qos_index 
<< 8) | new_qos
, 
6861                                 (kqr
->kqr_override_index 
<< 8) | kqr
->kqr_state
); 
6863                 if (new_qos 
== kqr
->kqr_dsync_owner_qos
) { 
6865                 } else if (kqr
->kqr_dsync_owner_qos 
== THREAD_QOS_UNSPECIFIED
) { 
6866                         thread_add_ipc_override(kqwl_owner
, new_qos
); 
6867                 } else if (new_qos 
== THREAD_QOS_UNSPECIFIED
) { 
6868                         thread_drop_ipc_override(kqwl_owner
); 
6869                 } else /* kqr->kqr_dsync_owner_qos != new_qos */ { 
6870                         thread_update_ipc_override(kqwl_owner
, new_qos
); 
6872                 kqr
->kqr_dsync_owner_qos 
= new_qos
; 
6874                 if (new_ipc_override_is_sync 
&& 
6875                         !kqr
->kqr_owner_override_is_sync
) { 
6876                         thread_add_sync_ipc_override(kqwl_owner
); 
6877                 } else if (!new_ipc_override_is_sync 
&& 
6878                         kqr
->kqr_owner_override_is_sync
) { 
6879                         thread_drop_sync_ipc_override(kqwl_owner
); 
6881                 kqr
->kqr_owner_override_is_sync 
= new_ipc_override_is_sync
; 
6885          * apply the diffs to the servicer 
6887         if (static_thread
) { 
6889                  * Statically bound thread 
6891                  * These threads don't participates in QoS overrides today, just wakeup 
6892                  * the thread blocked on this kqueue if a new event arrived. 
6896                 case KQWL_UTQ_UPDATE_WAKEUP_QOS
: 
6897                 case KQWL_UTQ_UPDATE_STAYACTIVE_QOS
: 
6898                 case KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
: 
6901                 case KQWL_UTQ_RESET_WAKEUP_OVERRIDE
: 
6902                 case KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
: 
6903                 case KQWL_UTQ_REDRIVE_EVENTS
: 
6904                 case KQWL_UTQ_SET_ASYNC_QOS
: 
6905                 case KQWL_UTQ_SET_SYNC_WAITERS_QOS
: 
6906                         panic("should never be called"); 
6912                 if ((kqr
->kqr_state 
& KQR_BOUND
) && (kqr
->kqr_state 
& KQR_WAKEUP
)) { 
6913                         assert(servicer 
&& !is_workqueue_thread(servicer
)); 
6914                         if (kq
->kq_state 
& (KQ_SLEEP 
| KQ_SEL
)) { 
6915                                 kq
->kq_state 
&= ~(KQ_SLEEP 
| KQ_SEL
); 
6916                                 waitq_wakeup64_all((struct waitq 
*)&kq
->kq_wqs
, KQ_EVENT
, 
6917                                                 THREAD_AWAKENED
, WAITQ_ALL_PRIORITIES
); 
6920         } else if ((kqr
->kqr_state 
& KQR_THREQUESTED
) == 0) { 
6922                  * No servicer, nor thread-request 
6924                  * Make a new thread request, unless there is an owner (or the workloop 
6925                  * is suspended in userland) or if there is no asynchronous work in the 
6929                 if (kqwl_owner 
== THREAD_NULL 
&& (kqr
->kqr_state 
& KQR_WAKEUP
)) { 
6930                         kqworkloop_request_thread(kqwl
, new_qos
); 
6932         } else if ((kqr
->kqr_state 
& KQR_BOUND
) == 0 && 
6933                         (kqwl_owner 
|| (kqr
->kqr_state 
& KQR_WAKEUP
) == 0)) { 
6935                  * No servicer, thread request in flight we want to cancel 
6937                  * We just got rid of the last knote of the kqueue or noticed an owner 
6938                  * with a thread request still in flight, take it back. 
6940                 ret 
= (*pthread_functions
->workq_threadreq_modify
)(kqwl
->kqwl_p
, 
6941                                 &kqr
->kqr_req
, WORKQ_THREADREQ_CANCEL
, 0, 0); 
6943                         kqr
->kqr_state 
&= ~KQR_THREQUESTED
; 
6944                         kqueue_release(kq
, KQUEUE_CANT_BE_LAST_REF
); 
6947                 boolean_t qos_changed 
= FALSE
; 
6950                  * Servicer or request is in flight 
6952                  * Just apply the diff to the servicer or the thread request 
6954                 if (kqr
->kqr_state 
& KQR_BOUND
) { 
6955                         servicer 
= kqr
->kqr_thread
; 
6956                         struct uthread 
*ut 
= get_bsdthread_info(servicer
); 
6957                         if (ut
->uu_kqueue_qos_index 
!= new_qos
) { 
6958                                 if (ut
->uu_kqueue_qos_index 
== THREAD_QOS_UNSPECIFIED
) { 
6959                                         thread_add_ipc_override(servicer
, new_qos
); 
6960                                 } else if (new_qos 
== THREAD_QOS_UNSPECIFIED
) { 
6961                                         thread_drop_ipc_override(servicer
); 
6962                                 } else /* ut->uu_kqueue_qos_index != new_qos */ { 
6963                                         thread_update_ipc_override(servicer
, new_qos
); 
6965                                 ut
->uu_kqueue_qos_index 
= new_qos
; 
6969                         if (new_ipc_override_is_sync 
!= ut
->uu_kqueue_override_is_sync
) { 
6970                                 if (new_ipc_override_is_sync 
&& 
6971                                     !ut
->uu_kqueue_override_is_sync
) { 
6972                                         thread_add_sync_ipc_override(servicer
); 
6973                                 } else if (!new_ipc_override_is_sync 
&& 
6974                                         ut
->uu_kqueue_override_is_sync
) { 
6975                                         thread_drop_sync_ipc_override(servicer
); 
6977                                 ut
->uu_kqueue_override_is_sync 
= new_ipc_override_is_sync
; 
6980                 } else if (old_qos 
!= new_qos
) { 
6982                         kqworkloop_threadreq_modify(kqwl
, new_qos
); 
6986                         servicer 
= kqr
->kqr_thread
; 
6987                         KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST
), 
6988                                 kqwl
->kqwl_dynamicid
, 
6989                                 (kqr
->kqr_state 
& KQR_BOUND
) ? thread_tid(servicer
) : 0, 
6990                                 (kqr
->kqr_qos_index 
<< 16) | (new_qos 
<< 8) | new_ipc_override_is_sync
, 
6991                                 (kqr
->kqr_override_index 
<< 8) | kqr
->kqr_state
); 
6997 kqworkloop_request_help(struct kqworkloop 
*kqwl
, kq_index_t qos_index
) 
6999         /* convert to thread qos value */ 
7000         assert(qos_index 
< KQWL_NBUCKETS
); 
7002         kqwl_req_lock(kqwl
); 
7003         kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_UPDATE_WAKEUP_QOS
, qos_index
); 
7004         kqwl_req_unlock(kqwl
); 
7008  * These arrays described the low and high qindexes for a given qos_index. 
7009  * The values come from the chart in <sys/eventvar.h> (must stay in sync). 
7011 static kq_index_t _kqwq_base_index
[KQWQ_NQOS
] = {0, 0, 6, 11, 15, 18, 20, 21}; 
7012 static kq_index_t _kqwq_high_index
[KQWQ_NQOS
] = {0, 5, 10, 14, 17, 19, 20, 21}; 
7014 static struct kqtailq 
* 
7015 kqueue_get_base_queue(struct kqueue 
*kq
, kq_index_t qos_index
) 
7017         if (kq
->kq_state 
& KQ_WORKQ
) { 
7018                 assert(qos_index 
< KQWQ_NQOS
); 
7019                 return &kq
->kq_queue
[_kqwq_base_index
[qos_index
]]; 
7020         } else if (kq
->kq_state 
& KQ_WORKLOOP
) { 
7021                 assert(qos_index 
< KQWL_NBUCKETS
); 
7022                 return &kq
->kq_queue
[qos_index
]; 
7024                 assert(qos_index 
== QOS_INDEX_KQFILE
); 
7025                 return &kq
->kq_queue
[QOS_INDEX_KQFILE
]; 
7029 static struct kqtailq 
* 
7030 kqueue_get_high_queue(struct kqueue 
*kq
, kq_index_t qos_index
) 
7032         if (kq
->kq_state 
& KQ_WORKQ
) { 
7033                 assert(qos_index 
< KQWQ_NQOS
); 
7034                 return &kq
->kq_queue
[_kqwq_high_index
[qos_index
]]; 
7035         } else if (kq
->kq_state 
& KQ_WORKLOOP
) { 
7036                 assert(qos_index 
< KQWL_NBUCKETS
); 
7037                 return &kq
->kq_queue
[KQWL_BUCKET_STAYACTIVE
]; 
7039                 assert(qos_index 
== QOS_INDEX_KQFILE
); 
7040                 return &kq
->kq_queue
[QOS_INDEX_KQFILE
]; 
7045 kqueue_queue_empty(struct kqueue 
*kq
, kq_index_t qos_index
) 
7047         struct kqtailq 
*base_queue 
= kqueue_get_base_queue(kq
, qos_index
); 
7048         struct kqtailq 
*queue 
= kqueue_get_high_queue(kq
, qos_index
); 
7051                 if (!TAILQ_EMPTY(queue
)) 
7053         } while (queue
-- > base_queue
); 
7057 static struct kqtailq 
* 
7058 kqueue_get_suppressed_queue(struct kqueue 
*kq
, kq_index_t qos_index
) 
7060     struct kqtailq 
*res
; 
7061         struct kqrequest 
*kqr
; 
7063         if (kq
->kq_state 
& KQ_WORKQ
) { 
7064                 struct kqworkq 
*kqwq 
= (struct kqworkq 
*)kq
; 
7066                 kqr 
= kqworkq_get_request(kqwq
, qos_index
); 
7067                 res 
= &kqr
->kqr_suppressed
; 
7068         } else if (kq
->kq_state 
& KQ_WORKLOOP
) { 
7069                 struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
7071                 kqr 
= &kqwl
->kqwl_request
; 
7072                 res 
= &kqr
->kqr_suppressed
; 
7074                 struct kqfile 
*kqf 
= (struct kqfile 
*)kq
; 
7075                 res 
= &kqf
->kqf_suppressed
; 
7081 knote_get_queue_index(struct knote 
*kn
) 
7083         kq_index_t override_index 
= knote_get_qos_override_index(kn
); 
7084         kq_index_t qos_index 
= knote_get_qos_index(kn
); 
7085         struct kqueue 
*kq 
= knote_get_kq(kn
); 
7088         if (kq
->kq_state 
& KQ_WORKQ
) { 
7089                 res 
= _kqwq_base_index
[qos_index
]; 
7090                 if (override_index 
> qos_index
) 
7091                         res 
+= override_index 
- qos_index
; 
7092                 assert(res 
<= _kqwq_high_index
[qos_index
]); 
7093         } else if (kq
->kq_state 
& KQ_WORKLOOP
) { 
7094                 res 
= MAX(override_index
, qos_index
); 
7095                 assert(res 
< KQWL_NBUCKETS
); 
7097                 assert(qos_index 
== QOS_INDEX_KQFILE
); 
7098                 assert(override_index 
== QOS_INDEX_KQFILE
); 
7099                 res 
= QOS_INDEX_KQFILE
; 
7104 static struct kqtailq 
* 
7105 knote_get_queue(struct knote 
*kn
) 
7107         kq_index_t qindex 
= knote_get_queue_index(kn
); 
7109         return &(knote_get_kq(kn
))->kq_queue
[qindex
]; 
7113 knote_get_req_index(struct knote 
*kn
) 
7115         return kn
->kn_req_index
; 
7119 knote_get_qos_index(struct knote 
*kn
) 
7121         return kn
->kn_qos_index
; 
7125 knote_set_qos_index(struct knote 
*kn
, kq_index_t qos_index
) 
7127         struct kqueue 
*kq 
= knote_get_kq(kn
); 
7129         assert(qos_index 
< KQWQ_NQOS
); 
7130         assert((kn
->kn_status 
& KN_QUEUED
) == 0); 
7132         if (kq
->kq_state 
& KQ_WORKQ
) { 
7133                 assert(qos_index 
> THREAD_QOS_UNSPECIFIED
); 
7134         } else if (kq
->kq_state 
& KQ_WORKLOOP
) { 
7135                 /* XXX this policy decision shouldn't be here */ 
7136                 if (qos_index 
== THREAD_QOS_UNSPECIFIED
) 
7137                         qos_index 
= THREAD_QOS_LEGACY
; 
7139                 qos_index 
= QOS_INDEX_KQFILE
; 
7141         /* always set requested */ 
7142         kn
->kn_req_index 
= qos_index
; 
7144         /* only adjust in-use qos index when not suppressed */ 
7145         if ((kn
->kn_status 
& KN_SUPPRESSED
) == 0) 
7146                 kn
->kn_qos_index 
= qos_index
; 
7150 knote_set_qos_overcommit(struct knote 
*kn
) 
7152         struct kqueue 
*kq 
= knote_get_kq(kn
); 
7153         struct kqrequest 
*kqr
; 
7155         /* turn overcommit on for the appropriate thread request? */ 
7156         if (kn
->kn_qos 
& _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
) { 
7157                 if (kq
->kq_state 
& KQ_WORKQ
) { 
7158                         kq_index_t qos_index 
= knote_get_qos_index(kn
); 
7159                         struct kqworkq 
*kqwq 
= (struct kqworkq 
*)kq
; 
7161                         kqr 
= kqworkq_get_request(kqwq
, qos_index
); 
7163                         kqwq_req_lock(kqwq
); 
7164                         kqr
->kqr_state 
|= KQR_THOVERCOMMIT
; 
7165                         kqwq_req_unlock(kqwq
); 
7166                 } else if (kq
->kq_state 
& KQ_WORKLOOP
) { 
7167                         struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
7169                         kqr 
= &kqwl
->kqwl_request
; 
7171                         kqwl_req_lock(kqwl
); 
7172                         kqr
->kqr_state 
|= KQR_THOVERCOMMIT
; 
7173                         kqwl_req_unlock(kqwl
); 
7179 knote_get_qos_override_index(struct knote 
*kn
) 
7181         return kn
->kn_qos_override
; 
7185 knote_set_qos_override_index(struct knote 
*kn
, kq_index_t override_index
, 
7186                 boolean_t override_is_sync
) 
7188         struct kqueue 
*kq 
= knote_get_kq(kn
); 
7189         kq_index_t qos_index 
= knote_get_qos_index(kn
); 
7190         kq_index_t old_override_index 
= knote_get_qos_override_index(kn
); 
7191         boolean_t old_override_is_sync 
= kn
->kn_qos_override_is_sync
; 
7194         assert((kn
->kn_status 
& KN_QUEUED
) == 0); 
7196         if (override_index 
== KQWQ_QOS_MANAGER
) { 
7197                 assert(qos_index 
== KQWQ_QOS_MANAGER
); 
7199                 assert(override_index 
< KQWQ_QOS_MANAGER
); 
7202         kn
->kn_qos_override 
= override_index
; 
7203         kn
->kn_qos_override_is_sync 
= override_is_sync
; 
7206          * If this is a workq/workloop kqueue, apply the override to the 
7209         if (kq
->kq_state 
& KQ_WORKQ
)  { 
7210                 struct kqworkq 
*kqwq 
= (struct kqworkq 
*)kq
; 
7212                 assert(qos_index 
> THREAD_QOS_UNSPECIFIED
); 
7213                 kqworkq_update_override(kqwq
, qos_index
, override_index
); 
7214         } else if (kq
->kq_state 
& KQ_WORKLOOP
) { 
7215                 struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
7217                 if ((kn
->kn_status 
& KN_SUPPRESSED
) == KN_SUPPRESSED
) { 
7218                         flags 
= flags 
| KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
; 
7220                         if (override_index 
== THREAD_QOS_USER_INTERACTIVE
 
7221                                         && override_is_sync
) { 
7222                                 flags 
= flags 
| KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
; 
7225                         if (old_override_index 
== THREAD_QOS_USER_INTERACTIVE
 
7226                                         && old_override_is_sync
) { 
7227                                 flags 
= flags 
| KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
; 
7231                 assert(qos_index 
> THREAD_QOS_UNSPECIFIED
); 
7232                 kqworkloop_update_override(kqwl
, qos_index
, override_index
, flags
); 
7237 knote_get_sync_qos_override_index(struct knote 
*kn
) 
7239         return kn
->kn_qos_sync_override
; 
7243 kqworkq_update_override(struct kqworkq 
*kqwq
, kq_index_t qos_index
, kq_index_t override_index
) 
7245         struct kqrequest 
*kqr
; 
7246         kq_index_t old_override_index
; 
7248         if (override_index 
<= qos_index
) { 
7252         kqr 
= kqworkq_get_request(kqwq
, qos_index
); 
7254         kqwq_req_lock(kqwq
); 
7255         old_override_index 
= kqr
->kqr_override_index
; 
7256         if (override_index 
> MAX(kqr
->kqr_qos_index
, old_override_index
)) { 
7257                 kqr
->kqr_override_index 
= override_index
; 
7259                 /* apply the override to [incoming?] servicing thread */ 
7260                 if (kqr
->kqr_state 
& KQR_BOUND
) { 
7261                         thread_t wqthread 
= kqr
->kqr_thread
; 
7263                         /* only apply if non-manager */ 
7265                     if ((kqr
->kqr_state 
& KQWQ_THMANAGER
) == 0) { 
7266                                 if (old_override_index
) 
7267                                         thread_update_ipc_override(wqthread
, override_index
); 
7269                                         thread_add_ipc_override(wqthread
, override_index
); 
7273         kqwq_req_unlock(kqwq
); 
7276 /* called with the kqworkq lock held */ 
7278 kqworkq_bind_thread_impl( 
7279         struct kqworkq 
*kqwq
, 
7280         kq_index_t qos_index
, 
7284         /* request lock must be held */ 
7285         kqwq_req_held(kqwq
); 
7287         struct kqrequest 
*kqr 
= kqworkq_get_request(kqwq
, qos_index
); 
7288         assert(kqr
->kqr_state 
& KQR_THREQUESTED
); 
7290         if (qos_index 
== KQWQ_QOS_MANAGER
) 
7291                 flags 
|= KEVENT_FLAG_WORKQ_MANAGER
; 
7293         struct uthread 
*ut 
= get_bsdthread_info(thread
); 
7296          * If this is a manager, and the manager request bit is 
7297          * not set, assure no other thread is bound. If the bit 
7298          * is set, make sure the old thread is us (or not set). 
7300         if (flags 
& KEVENT_FLAG_WORKQ_MANAGER
) { 
7301                 if ((kqr
->kqr_state 
& KQR_BOUND
) == 0) { 
7302                         kqr
->kqr_state 
|= (KQR_BOUND 
| KQWQ_THMANAGER
); 
7303                         TAILQ_INIT(&kqr
->kqr_suppressed
); 
7304                         kqr
->kqr_thread 
= thread
; 
7305                         ut
->uu_kqueue_bound 
= (struct kqueue 
*)kqwq
; 
7306                         ut
->uu_kqueue_qos_index 
= KQWQ_QOS_MANAGER
; 
7307                         ut
->uu_kqueue_flags 
= (KEVENT_FLAG_WORKQ 
|  
7308                                                KEVENT_FLAG_WORKQ_MANAGER
); 
7310                         assert(kqr
->kqr_state 
& KQR_BOUND
); 
7311                         assert(thread 
== kqr
->kqr_thread
); 
7312                         assert(ut
->uu_kqueue_bound 
== (struct kqueue 
*)kqwq
); 
7313                         assert(ut
->uu_kqueue_qos_index 
== KQWQ_QOS_MANAGER
); 
7314                         assert(ut
->uu_kqueue_flags 
& KEVENT_FLAG_WORKQ_MANAGER
); 
7319         /* Just a normal one-queue servicing thread */ 
7320         assert(kqr
->kqr_state 
& KQR_THREQUESTED
); 
7321         assert(kqr
->kqr_qos_index 
== qos_index
); 
7323         if ((kqr
->kqr_state 
& KQR_BOUND
) == 0) { 
7324                 kqr
->kqr_state 
|= KQR_BOUND
; 
7325                 TAILQ_INIT(&kqr
->kqr_suppressed
); 
7326                 kqr
->kqr_thread 
= thread
; 
7328                 /* apply an ipc QoS override if one is needed */ 
7329                 if (kqr
->kqr_override_index
) { 
7330                         assert(kqr
->kqr_qos_index
); 
7331                         assert(kqr
->kqr_override_index 
> kqr
->kqr_qos_index
); 
7332                         assert(thread_get_ipc_override(thread
) == THREAD_QOS_UNSPECIFIED
); 
7333                         thread_add_ipc_override(thread
, kqr
->kqr_override_index
); 
7336                 /* indicate that we are processing in the uthread */ 
7337                 ut
->uu_kqueue_bound 
= (struct kqueue 
*)kqwq
; 
7338                 ut
->uu_kqueue_qos_index 
= qos_index
; 
7339                 ut
->uu_kqueue_flags 
= flags
; 
7342                  * probably syncronously bound AND post-request bound 
7343                  * this logic can go away when we get rid of post-request bind 
7345                 assert(kqr
->kqr_state 
& KQR_BOUND
); 
7346                 assert(thread 
== kqr
->kqr_thread
); 
7347                 assert(ut
->uu_kqueue_bound 
== (struct kqueue 
*)kqwq
); 
7348                 assert(ut
->uu_kqueue_qos_index 
== qos_index
); 
7349                 assert((ut
->uu_kqueue_flags 
& flags
) == flags
); 
7354 kqworkloop_update_override( 
7355         struct kqworkloop 
*kqwl
, 
7356         kq_index_t qos_index
, 
7357         kq_index_t override_index
, 
7360         struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
7362         kqwl_req_lock(kqwl
); 
7364         /* Do not override on attached threads */ 
7365         if (kqr
->kqr_state 
& KQR_BOUND
) { 
7366                 assert(kqr
->kqr_thread
); 
7368                 if (kqwl
->kqwl_kqueue
.kq_state 
& KQ_NO_WQ_THREAD
) { 
7369                         kqwl_req_unlock(kqwl
); 
7370                         assert(!is_workqueue_thread(kqr
->kqr_thread
)); 
7375         /* Update sync ipc counts on kqr for suppressed knotes */ 
7376         if (flags 
& KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
) { 
7377                 kqworkloop_update_suppress_sync_count(kqr
, flags
); 
7380         if ((flags 
& KQWL_UO_UPDATE_OVERRIDE_LAZY
) == 0) { 
7381                 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
, 
7382                         MAX(qos_index
, override_index
)); 
7384         kqwl_req_unlock(kqwl
); 
7388 kqworkloop_update_suppress_sync_count( 
7389         struct kqrequest 
*kqr
, 
7392         if (flags 
& KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
) { 
7393                 kqr
->kqr_sync_suppress_count
++; 
7396         if (flags 
& KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
) { 
7397                 assert(kqr
->kqr_sync_suppress_count 
> 0); 
7398                 kqr
->kqr_sync_suppress_count
--; 
7403  *      kqworkloop_unbind_thread - Unbind the servicer thread of a workloop kqueue 
7405  *      It will end the processing phase in case it was still processing: 
7407  *      We may have to request a new thread for not KQ_NO_WQ_THREAD workloop. 
7408  *      This can happen if : 
7409  *      - there were active events at or above our QoS we never got to (count > 0) 
7410  *      - we pended waitq hook callouts during processing 
7411  *      - we pended wakeups while processing (or unsuppressing) 
7413  *      Called with kqueue lock held. 
7417 kqworkloop_unbind_thread( 
7418         struct kqworkloop 
*kqwl
, 
7420         __unused 
unsigned int flags
) 
7422         struct kqueue 
*kq 
= &kqwl
->kqwl_kqueue
; 
7423         struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
7427         assert((kq
->kq_state 
& KQ_PROCESSING
) == 0); 
7428         if (kq
->kq_state 
& KQ_PROCESSING
) { 
7433          * Forcing the KQ_PROCESSING flag allows for QoS updates because of 
7434          * unsuppressing knotes not to be applied until the eventual call to 
7435          * kqworkloop_update_threads_qos() below. 
7437         kq
->kq_state 
|= KQ_PROCESSING
; 
7438         kqworkloop_acknowledge_events(kqwl
, TRUE
); 
7439         kq
->kq_state 
&= ~KQ_PROCESSING
; 
7441         kqwl_req_lock(kqwl
); 
7443         /* deal with extraneous unbinds in release kernels */ 
7444         assert((kqr
->kqr_state 
& (KQR_BOUND 
| KQR_PROCESSING
)) == KQR_BOUND
); 
7445         if ((kqr
->kqr_state 
& (KQR_BOUND 
| KQR_PROCESSING
)) != KQR_BOUND
) { 
7446                 kqwl_req_unlock(kqwl
); 
7450         assert(thread 
== current_thread()); 
7451         assert(kqr
->kqr_thread 
== thread
); 
7452         if (kqr
->kqr_thread 
!= thread
) { 
7453                 kqwl_req_unlock(kqwl
); 
7457         struct uthread 
*ut 
= get_bsdthread_info(thread
); 
7458         kq_index_t old_qos_index 
= ut
->uu_kqueue_qos_index
; 
7459         boolean_t ipc_override_is_sync 
= ut
->uu_kqueue_override_is_sync
; 
7460         ut
->uu_kqueue_bound 
= NULL
; 
7461         ut
->uu_kqueue_qos_index 
= 0; 
7462         ut
->uu_kqueue_override_is_sync 
= 0; 
7463         ut
->uu_kqueue_flags 
= 0; 
7465         /* unbind the servicer thread, drop overrides */ 
7466         kqr
->kqr_thread 
= NULL
; 
7467         kqr
->kqr_state 
&= ~(KQR_BOUND 
| KQR_THREQUESTED 
| KQR_R2K_NOTIF_ARMED
); 
7468         kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
, 0); 
7470         kqwl_req_unlock(kqwl
); 
7473          * Drop the override on the current thread last, after the call to 
7474          * kqworkloop_update_threads_qos above. 
7476         if (old_qos_index
) { 
7477                 thread_drop_ipc_override(thread
); 
7479         if (ipc_override_is_sync
) { 
7480                 thread_drop_sync_ipc_override(thread
); 
7484 /* called with the kqworkq lock held */ 
7486 kqworkq_unbind_thread( 
7487         struct kqworkq 
*kqwq
, 
7488         kq_index_t qos_index
, 
7490         __unused 
unsigned int flags
) 
7492         struct kqrequest 
*kqr 
= kqworkq_get_request(kqwq
, qos_index
); 
7493         kq_index_t override_index 
= 0; 
7495         /* request lock must be held */ 
7496         kqwq_req_held(kqwq
); 
7498         assert(thread 
== current_thread()); 
7500         if ((kqr
->kqr_state 
& KQR_BOUND
) == 0) { 
7501                 assert(kqr
->kqr_state 
& KQR_BOUND
); 
7505         assert(kqr
->kqr_thread 
== thread
); 
7506         assert(TAILQ_EMPTY(&kqr
->kqr_suppressed
)); 
7509          * If there is an override, drop it from the current thread 
7510          * and then we are free to recompute (a potentially lower) 
7511          * minimum override to apply to the next thread request. 
7513         if (kqr
->kqr_override_index
) { 
7514                 struct kqtailq 
*base_queue 
= kqueue_get_base_queue(&kqwq
->kqwq_kqueue
, qos_index
); 
7515                 struct kqtailq 
*queue 
= kqueue_get_high_queue(&kqwq
->kqwq_kqueue
, qos_index
); 
7517                 /* if not bound to a manager thread, drop the current ipc override */ 
7518                 if ((kqr
->kqr_state 
& KQWQ_THMANAGER
) == 0) { 
7519                         thread_drop_ipc_override(thread
); 
7522                 /* recompute the new override */ 
7524                         if (!TAILQ_EMPTY(queue
)) { 
7525                                 override_index 
= queue 
- base_queue 
+ qos_index
; 
7528                 } while (queue
-- > base_queue
); 
7531         /* Mark it unbound */ 
7532         kqr
->kqr_thread 
= NULL
; 
7533         kqr
->kqr_state 
&= ~(KQR_BOUND 
| KQR_THREQUESTED 
| KQWQ_THMANAGER
); 
7535         /* apply the new override */ 
7536         if (override_index 
> kqr
->kqr_qos_index
) { 
7537                 kqr
->kqr_override_index 
= override_index
; 
7539                 kqr
->kqr_override_index 
= THREAD_QOS_UNSPECIFIED
; 
7544 kqworkq_get_request(struct kqworkq 
*kqwq
, kq_index_t qos_index
) 
7546         assert(qos_index 
< KQWQ_NQOS
); 
7547         return &kqwq
->kqwq_request
[qos_index
]; 
7551 knote_adjust_qos(struct knote 
*kn
, qos_t new_qos
, qos_t new_override
, kq_index_t sync_override_index
) 
7553         struct kqueue 
*kq 
= knote_get_kq(kn
); 
7554         boolean_t override_is_sync 
= FALSE
; 
7556         if (kq
->kq_state 
& (KQ_WORKQ 
| KQ_WORKLOOP
)) { 
7557                 kq_index_t new_qos_index
; 
7558                 kq_index_t new_override_index
; 
7559                 kq_index_t servicer_qos_index
; 
7561                 new_qos_index 
= qos_index_from_qos(kn
, new_qos
, FALSE
); 
7562                 new_override_index 
= qos_index_from_qos(kn
, new_override
, TRUE
); 
7564                 /* make sure the servicer qos acts as a floor */ 
7565                 servicer_qos_index 
= qos_index_from_qos(kn
, kn
->kn_qos
, FALSE
); 
7566                 if (servicer_qos_index 
> new_qos_index
) 
7567                         new_qos_index 
= servicer_qos_index
; 
7568                 if (servicer_qos_index 
> new_override_index
) 
7569                         new_override_index 
= servicer_qos_index
; 
7570                 if (sync_override_index 
>= new_override_index
) { 
7571                         new_override_index 
= sync_override_index
; 
7572                         override_is_sync 
= TRUE
; 
7576                 if (new_qos_index 
!= knote_get_req_index(kn
) || 
7577                     new_override_index 
!= knote_get_qos_override_index(kn
) || 
7578                     override_is_sync 
!= kn
->kn_qos_override_is_sync
) { 
7579                         if (kn
->kn_status 
& KN_QUEUED
) { 
7581                                 knote_set_qos_index(kn
, new_qos_index
); 
7582                                 knote_set_qos_override_index(kn
, new_override_index
, override_is_sync
); 
7586                                 knote_set_qos_index(kn
, new_qos_index
); 
7587                                 knote_set_qos_override_index(kn
, new_override_index
, override_is_sync
); 
7595 knote_adjust_sync_qos(struct knote 
*kn
, kq_index_t sync_qos
, boolean_t lock_kq
) 
7597         struct kqueue 
*kq 
= knote_get_kq(kn
); 
7598         kq_index_t old_sync_override
; 
7599         kq_index_t qos_index 
= knote_get_qos_index(kn
); 
7602         /* Tracking only happens for UI qos */ 
7603         if (sync_qos 
!= THREAD_QOS_USER_INTERACTIVE 
&& 
7604                 sync_qos 
!= THREAD_QOS_UNSPECIFIED
) { 
7611         if (kq
->kq_state 
& KQ_WORKLOOP
) { 
7612                 struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
7614                 old_sync_override 
= knote_get_sync_qos_override_index(kn
); 
7615                 if (old_sync_override 
!= sync_qos
) { 
7616                         kn
->kn_qos_sync_override 
= sync_qos
; 
7618                         /* update sync ipc counters for suppressed knotes */ 
7619                         if ((kn
->kn_status 
& KN_SUPPRESSED
) == KN_SUPPRESSED
) { 
7620                                 flags 
= flags 
| KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
; 
7622                                 /* Do not recalculate kqwl override, it would be done later */ 
7623                                 flags 
= flags 
| KQWL_UO_UPDATE_OVERRIDE_LAZY
; 
7625                                 if (sync_qos 
== THREAD_QOS_USER_INTERACTIVE
) { 
7626                                         flags 
= flags 
| KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
; 
7629                                 if (old_sync_override 
== THREAD_QOS_USER_INTERACTIVE
) { 
7630                                         flags 
= flags 
| KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
; 
7633                                 kqworkloop_update_override(kqwl
, qos_index
, sync_qos
, 
7644 knote_wakeup(struct knote 
*kn
) 
7646         struct kqueue 
*kq 
= knote_get_kq(kn
); 
7647         kq_index_t qos_index 
= knote_get_qos_index(kn
); 
7651         if (kq
->kq_state 
& KQ_WORKQ
) { 
7652                 /* request a servicing thread */ 
7653                 struct kqworkq 
*kqwq 
= (struct kqworkq 
*)kq
; 
7655                 kqworkq_request_help(kqwq
, qos_index
); 
7657         } else if (kq
->kq_state 
& KQ_WORKLOOP
) { 
7658                 /* request a servicing thread */ 
7659                 struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
7661                 if (kqworkloop_is_processing_on_current_thread(kqwl
)) { 
7663                          * kqworkloop_end_processing() will perform the required QoS 
7664                          * computations when it unsets the processing mode. 
7668                 kqworkloop_request_help(kqwl
, qos_index
); 
7670                 struct kqfile 
*kqf 
= (struct kqfile 
*)kq
; 
7672                 /* flag wakeups during processing */ 
7673                 if (kq
->kq_state 
& KQ_PROCESSING
) 
7674                         kq
->kq_state 
|= KQ_WAKEUP
; 
7676                 /* wakeup a thread waiting on this queue */ 
7677                 if (kq
->kq_state 
& (KQ_SLEEP 
| KQ_SEL
)) { 
7678                         kq
->kq_state 
&= ~(KQ_SLEEP 
| KQ_SEL
); 
7679                         waitq_wakeup64_all((struct waitq 
*)&kq
->kq_wqs
, 
7682                                            WAITQ_ALL_PRIORITIES
); 
7685                 /* wakeup other kqueues/select sets we're inside */ 
7686                 KNOTE(&kqf
->kqf_sel
.si_note
, 0); 
7691  * Called with the kqueue locked 
7694 kqueue_interrupt(struct kqueue 
*kq
) 
7696         assert((kq
->kq_state 
& KQ_WORKQ
) == 0); 
7698         /* wakeup sleeping threads */ 
7699         if ((kq
->kq_state 
& (KQ_SLEEP 
| KQ_SEL
)) != 0) { 
7700                 kq
->kq_state 
&= ~(KQ_SLEEP 
| KQ_SEL
); 
7701                 (void)waitq_wakeup64_all((struct waitq 
*)&kq
->kq_wqs
, 
7704                                          WAITQ_ALL_PRIORITIES
); 
7707         /* wakeup threads waiting their turn to process */ 
7708         if (kq
->kq_state 
& KQ_PROCWAIT
) { 
7709                 struct kqtailq 
*suppressq
; 
7711                 assert(kq
->kq_state 
& KQ_PROCESSING
); 
7713                 kq
->kq_state 
&= ~KQ_PROCWAIT
; 
7714                 suppressq 
= kqueue_get_suppressed_queue(kq
, QOS_INDEX_KQFILE
); 
7715                 (void)waitq_wakeup64_all((struct waitq 
*)&kq
->kq_wqs
,  
7716                                          CAST_EVENT64_T(suppressq
), 
7718                                          WAITQ_ALL_PRIORITIES
); 
7723  * Called back from waitq code when no threads waiting and the hook was set. 
7725  * Interrupts are likely disabled and spin locks are held - minimal work 
7726  * can be done in this context!!! 
7728  * JMM - in the future, this will try to determine which knotes match the 
7729  * wait queue wakeup and apply these wakeups against those knotes themselves. 
7730  * For now, all the events dispatched this way are dispatch-manager handled, 
7731  * so hard-code that for now. 
7734 waitq_set__CALLING_PREPOST_HOOK__(void *kq_hook
, void *knote_hook
, int qos
) 
7736 #pragma unused(knote_hook, qos) 
7738         struct kqueue 
*kq 
= (struct kqueue 
*)kq_hook
; 
7740         if (kq
->kq_state 
& KQ_WORKQ
) { 
7741                 struct kqworkq 
*kqwq 
= (struct kqworkq 
*)kq
; 
7743                 kqworkq_request_help(kqwq
, KQWQ_QOS_MANAGER
); 
7745         } else if (kq
->kq_state 
& KQ_WORKLOOP
) { 
7746                 struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
7748                 kqworkloop_request_help(kqwl
, KQWL_BUCKET_STAYACTIVE
); 
7753 klist_init(struct klist 
*list
) 
7760  * Query/Post each knote in the object's list 
7762  *      The object lock protects the list. It is assumed 
7763  *      that the filter/event routine for the object can 
7764  *      determine that the object is already locked (via 
7765  *      the hint) and not deadlock itself. 
7767  *      The object lock should also hold off pending 
7768  *      detach/drop operations.  But we'll prevent it here 
7769  *      too (by taking a use reference) - just in case. 
7772 knote(struct klist 
*list
, long hint
) 
7776         SLIST_FOREACH(kn
, list
, kn_selnext
) { 
7777                 struct kqueue 
*kq 
= knote_get_kq(kn
); 
7781                 assert(!knoteuse_needs_boost(kn
, NULL
)); 
7783                 /* If we can get a use reference - deliver event */ 
7784                 if (kqlock2knoteuse(kq
, kn
, KNUSE_NONE
)) { 
7787                         /* call the event with only a use count */ 
7788                         result 
= knote_fops(kn
)->f_event(kn
, hint
); 
7790                         /* if its not going away and triggered */ 
7791                         if (knoteuse2kqlock(kq
, kn
, KNUSE_NONE
) && result
) 
7800  * attach a knote to the specified list.  Return true if this is the first entry. 
7801  * The list is protected by whatever lock the object it is associated with uses. 
7804 knote_attach(struct klist 
*list
, struct knote 
*kn
) 
7806         int ret 
= SLIST_EMPTY(list
); 
7807         SLIST_INSERT_HEAD(list
, kn
, kn_selnext
); 
7812  * detach a knote from the specified list.  Return true if that was the last entry. 
7813  * The list is protected by whatever lock the object it is associated with uses. 
7816 knote_detach(struct klist 
*list
, struct knote 
*kn
) 
7818         SLIST_REMOVE(list
, kn
, knote
, kn_selnext
); 
7819         return (SLIST_EMPTY(list
)); 
7823  * knote_vanish - Indicate that the source has vanished 
7825  * If the knote has requested EV_VANISHED delivery, 
7826  * arrange for that. Otherwise, deliver a NOTE_REVOKE 
7827  * event for backward compatibility. 
7829  * The knote is marked as having vanished, but is not 
7830  * actually detached from the source in this instance. 
7831  * The actual detach is deferred until the knote drop. 
7833  * Our caller already has the object lock held. Calling 
7834  * the detach routine would try to take that lock 
7835  * recursively - which likely is not supported. 
7838 knote_vanish(struct klist 
*list
) 
7841         struct knote 
*kn_next
; 
7843         SLIST_FOREACH_SAFE(kn
, list
, kn_selnext
, kn_next
) { 
7844                 struct kqueue 
*kq 
= knote_get_kq(kn
); 
7849                 assert(!knoteuse_needs_boost(kn
, NULL
)); 
7851                 if ((kn
->kn_status 
& KN_DROPPING
) == 0) { 
7852                         /* If EV_VANISH supported - prepare to deliver one */ 
7853                         if (kn
->kn_status 
& KN_REQVANISH
) { 
7854                                 kn
->kn_status 
|= KN_VANISHED
; 
7857                         } else if (kqlock2knoteuse(kq
, kn
, KNUSE_NONE
)) { 
7858                                 /* call the event with only a use count */ 
7859                                 result 
= knote_fops(kn
)->f_event(kn
, NOTE_REVOKE
); 
7861                                 /* if its not going away and triggered */ 
7862                                 if (knoteuse2kqlock(kq
, kn
, KNUSE_NONE
) && result
) 
7864                                 /* lock held again */ 
7872  * For a given knote, link a provided wait queue directly with the kqueue. 
7873  * Wakeups will happen via recursive wait queue support.  But nothing will move 
7874  * the knote to the active list at wakeup (nothing calls knote()).  Instead, 
7875  * we permanently enqueue them here. 
7877  * kqueue and knote references are held by caller. 
7878  * waitq locked by caller. 
7880  * caller provides the wait queue link structure. 
7883 knote_link_waitq(struct knote 
*kn
, struct waitq 
*wq
, uint64_t *reserved_link
) 
7885         struct kqueue 
*kq 
= knote_get_kq(kn
); 
7888         kr 
= waitq_link(wq
, &kq
->kq_wqs
, WAITQ_ALREADY_LOCKED
, reserved_link
); 
7889         if (kr 
== KERN_SUCCESS
) { 
7890                 knote_markstayactive(kn
); 
7898  * Unlink the provided wait queue from the kqueue associated with a knote. 
7899  * Also remove it from the magic list of directly attached knotes. 
7901  * Note that the unlink may have already happened from the other side, so 
7902  * ignore any failures to unlink and just remove it from the kqueue list. 
7904  * On success, caller is responsible for the link structure 
7907 knote_unlink_waitq(struct knote 
*kn
, struct waitq 
*wq
) 
7909         struct kqueue 
*kq 
= knote_get_kq(kn
); 
7912         kr 
= waitq_unlink(wq
, &kq
->kq_wqs
); 
7913         knote_clearstayactive(kn
); 
7914         return ((kr 
!= KERN_SUCCESS
) ? EINVAL 
: 0); 
7918  * remove all knotes referencing a specified fd 
7920  * Essentially an inlined knote_remove & knote_drop 
7921  * when we know for sure that the thing is a file 
7923  * Entered with the proc_fd lock already held. 
7924  * It returns the same way, but may drop it temporarily. 
7927 knote_fdclose(struct proc 
*p
, int fd
, int force
) 
7933         list 
= &p
->p_fd
->fd_knlist
[fd
]; 
7934         SLIST_FOREACH(kn
, list
, kn_link
) { 
7935                 struct kqueue 
*kq 
= knote_get_kq(kn
); 
7940                         panic("%s: proc mismatch (kq->kq_p=%p != p=%p)", 
7941                             __func__
, kq
->kq_p
, p
); 
7944                  * If the knote supports EV_VANISHED delivery, 
7945                  * transition it to vanished mode (or skip over 
7946                  * it if already vanished). 
7948                 if (!force 
&& (kn
->kn_status 
& KN_REQVANISH
)) { 
7950                         if ((kn
->kn_status 
& KN_VANISHED
) == 0) { 
7953                                 assert(!knoteuse_needs_boost(kn
, NULL
)); 
7955                                 /* get detach reference (also marks vanished) */ 
7956                                 if (kqlock2knotedetach(kq
, kn
, KNUSE_NONE
)) { 
7957                                         /* detach knote and drop fp use reference */ 
7958                                         knote_fops(kn
)->f_detach(kn
); 
7959                                         if (knote_fops(kn
)->f_isfd
) 
7960                                                 fp_drop(p
, kn
->kn_id
, kn
->kn_fp
, 0); 
7962                                         /* activate it if it's still in existence */ 
7963                                         if (knoteuse2kqlock(kq
, kn
, KNUSE_NONE
)) { 
7979                  * Convert the kq lock to a drop ref. 
7980                  * If we get it, go ahead and drop it. 
7981                  * Otherwise, we waited for the blocking 
7982                  * condition to complete. Either way, 
7983                  * we dropped the fdlock so start over. 
7985                 if (kqlock2knotedrop(kq
, kn
)) { 
7995  * knote_fdfind - lookup a knote in the fd table for process 
7997  * If the filter is file-based, lookup based on fd index. 
7998  * Otherwise use a hash based on the ident. 
8000  * Matching is based on kq, filter, and ident. Optionally, 
8001  * it may also be based on the udata field in the kevent - 
8002  * allowing multiple event registration for the file object 
8005  * fd_knhashlock or fdlock held on entry (and exit) 
8007 static struct knote 
* 
8008 knote_fdfind(struct kqueue 
*kq
, 
8009              struct kevent_internal_s 
*kev
, 
8013         struct filedesc 
*fdp 
= p
->p_fd
; 
8014         struct klist 
*list 
= NULL
; 
8015         struct knote 
*kn 
= NULL
; 
8018          * determine where to look for the knote 
8021                 /* fd-based knotes are linked off the fd table */ 
8022                 if (kev
->ident 
< (u_int
)fdp
->fd_knlistsize
) { 
8023                         list 
= &fdp
->fd_knlist
[kev
->ident
]; 
8025         } else if (fdp
->fd_knhashmask 
!= 0) { 
8026                 /* hash non-fd knotes here too */ 
8027                 list 
= &fdp
->fd_knhash
[KN_HASH((u_long
)kev
->ident
, fdp
->fd_knhashmask
)]; 
8031          * scan the selected list looking for a match 
8034                 SLIST_FOREACH(kn
, list
, kn_link
) { 
8035                         if (kq 
== knote_get_kq(kn
) && 
8036                             kev
->ident 
== kn
->kn_id 
&&  
8037                             kev
->filter 
== kn
->kn_filter
) { 
8038                                 if (kev
->flags 
& EV_UDATA_SPECIFIC
) { 
8039                                         if ((kn
->kn_status 
& KN_UDATA_SPECIFIC
) && 
8040                                             kev
->udata 
== kn
->kn_udata
) { 
8041                                                 break; /* matching udata-specific knote */ 
8043                                 } else if ((kn
->kn_status 
& KN_UDATA_SPECIFIC
) == 0) { 
8044                                         break; /* matching non-udata-specific knote */ 
8053  * kq_add_knote- Add knote to the fd table for process 
8054  * while checking for duplicates. 
8056  * All file-based filters associate a list of knotes by file 
8057  * descriptor index. All other filters hash the knote by ident. 
8059  * May have to grow the table of knote lists to cover the 
8060  * file descriptor index presented. 
8062  * fd_knhashlock and fdlock unheld on entry (and exit). 
8064  * Takes a rwlock boost if inserting the knote is successful. 
8067 kq_add_knote(struct kqueue 
*kq
, struct knote 
*kn
, 
8068              struct kevent_internal_s 
*kev
, 
8069              struct proc 
*p
, int *knoteuse_flags
) 
8071         struct filedesc 
*fdp 
= p
->p_fd
; 
8072         struct klist 
*list 
= NULL
; 
8074         bool is_fd 
= knote_fops(kn
)->f_isfd
; 
8081         if (knote_fdfind(kq
, kev
, is_fd
, p
) != NULL
) { 
8082                 /* found an existing knote: we can't add this one */ 
8087         /* knote was not found: add it now */ 
8089                 if (fdp
->fd_knhashmask 
== 0) { 
8092                         list 
= hashinit(CONFIG_KN_HASHSIZE
, M_KQUEUE
, 
8099                         fdp
->fd_knhash 
= list
; 
8100                         fdp
->fd_knhashmask 
= size
; 
8103                 list 
= &fdp
->fd_knhash
[KN_HASH(kn
->kn_id
, fdp
->fd_knhashmask
)]; 
8104                 SLIST_INSERT_HEAD(list
, kn
, kn_link
); 
8109                 /* knote is fd based */ 
8111                 if ((u_int
)fdp
->fd_knlistsize 
<= kn
->kn_id
) { 
8114                         if (kn
->kn_id 
>= (uint64_t)p
->p_rlimit
[RLIMIT_NOFILE
].rlim_cur
 
8115                             || kn
->kn_id 
>= (uint64_t)maxfiles
) { 
8119                         /* have to grow the fd_knlist */ 
8120                         size 
= fdp
->fd_knlistsize
; 
8121                         while (size 
<= kn
->kn_id
) 
8124                         if (size 
>= (UINT_MAX
/sizeof(struct klist 
*))) { 
8129                         MALLOC(list
, struct klist 
*, 
8130                             size 
* sizeof(struct klist 
*), M_KQUEUE
, M_WAITOK
); 
8136                         bcopy((caddr_t
)fdp
->fd_knlist
, (caddr_t
)list
, 
8137                             fdp
->fd_knlistsize 
* sizeof(struct klist 
*)); 
8138                         bzero((caddr_t
)list 
+ 
8139                             fdp
->fd_knlistsize 
* sizeof(struct klist 
*), 
8140                             (size 
- fdp
->fd_knlistsize
) * sizeof(struct klist 
*)); 
8141                         FREE(fdp
->fd_knlist
, M_KQUEUE
); 
8142                         fdp
->fd_knlist 
= list
; 
8143                         fdp
->fd_knlistsize 
= size
; 
8146                 list 
= &fdp
->fd_knlist
[kn
->kn_id
]; 
8147                 SLIST_INSERT_HEAD(list
, kn
, kn_link
); 
8154         if (ret 
== 0 && knoteuse_needs_boost(kn
, kev
)) { 
8155                 set_thread_rwlock_boost(); 
8156                 *knoteuse_flags 
= KNUSE_BOOST
; 
8158                 *knoteuse_flags 
= KNUSE_NONE
; 
8169  * kq_remove_knote - remove a knote from the fd table for process 
8170  * and copy kn_status an kq_state while holding kqlock and 
8173  * If the filter is file-based, remove based on fd index. 
8174  * Otherwise remove from the hash based on the ident. 
8176  * fd_knhashlock and fdlock unheld on entry (and exit). 
8179 kq_remove_knote(struct kqueue 
*kq
, struct knote 
*kn
, struct proc 
*p
, 
8180         kn_status_t 
*kn_status
, uint16_t *kq_state
) 
8182         struct filedesc 
*fdp 
= p
->p_fd
; 
8183         struct klist 
*list 
= NULL
; 
8186         is_fd 
= knote_fops(kn
)->f_isfd
; 
8194                 assert ((u_int
)fdp
->fd_knlistsize 
> kn
->kn_id
); 
8195                 list 
= &fdp
->fd_knlist
[kn
->kn_id
]; 
8197                 list 
= &fdp
->fd_knhash
[KN_HASH(kn
->kn_id
, fdp
->fd_knhashmask
)]; 
8199         SLIST_REMOVE(list
, kn
, knote
, kn_link
); 
8202         *kn_status 
= kn
->kn_status
; 
8203         *kq_state 
= kq
->kq_state
; 
8213  * kq_find_knote_and_kq_lock - lookup a knote in the fd table for process 
8214  * and, if the knote is found, acquires the kqlock while holding the fd table lock/spinlock. 
8216  * fd_knhashlock or fdlock unheld on entry (and exit) 
8219 static struct knote 
* 
8220 kq_find_knote_and_kq_lock(struct kqueue 
*kq
, 
8221              struct kevent_internal_s 
*kev
, 
8232         ret 
= knote_fdfind(kq
, kev
, is_fd
, p
); 
8246  * knote_drop - disconnect and drop the knote 
8248  * Called with the kqueue unlocked and holding a 
8249  * "drop reference" on the knote in question. 
8250  * This reference is most often aquired thru a call 
8251  * to kqlock2knotedrop(). But it can also be acquired 
8252  * through stealing a drop reference via a call to 
8253  * knoteuse2knotedrop() or during the initial attach 
8256  * The knote may have already been detached from 
8257  * (or not yet attached to) its source object. 
8260 knote_drop(struct knote 
*kn
, __unused 
struct proc 
*ctxp
) 
8262         struct kqueue 
*kq 
= knote_get_kq(kn
); 
8263         struct proc 
*p 
= kq
->kq_p
; 
8264         kn_status_t kn_status
; 
8267         /* If we are attached, disconnect from the source first */ 
8268         if (kn
->kn_status 
& KN_ATTACHED
) { 
8269                 knote_fops(kn
)->f_detach(kn
); 
8272         /* Remove the source from the appropriate hash */ 
8273         kq_remove_knote(kq
, kn
, p
, &kn_status
, &kq_state
); 
8276          * If a kqueue_dealloc is happening in parallel for the kq 
8277          * pointed by the knote the kq could be aready deallocated 
8279          * Do not access the kq after the kq_remove_knote if it is 
8283         /* determine if anyone needs to know about the drop */ 
8284         assert((kn_status 
& (KN_DROPPING 
| KN_SUPPRESSED 
| KN_QUEUED
)) == KN_DROPPING
); 
8287          * If KN_USEWAIT is set, some other thread was trying to drop the kn. 
8288          * Or it was in kqueue_dealloc, so the kqueue_dealloc did not happen 
8289          * because that thread was waiting on this wake, or it was a drop happening 
8290          * because of a kevent_register that takes a reference on the kq, and therefore 
8291          * the kq cannot be deallocated in parallel. 
8293          * It is safe to access kq->kq_wqs if needswakeup is set. 
8295         if (kn_status 
& KN_USEWAIT
) 
8296                 waitq_wakeup64_all((struct waitq 
*)&kq
->kq_wqs
, 
8297                                    CAST_EVENT64_T(&kn
->kn_status
), 
8299                                    WAITQ_ALL_PRIORITIES
); 
8301         if (knote_fops(kn
)->f_isfd 
&& ((kn
->kn_status 
& KN_VANISHED
) == 0)) 
8302                 fp_drop(p
, kn
->kn_id
, kn
->kn_fp
, 0); 
8307          * release reference on dynamic kq (and free if last). 
8308          * Will only be last if this is from fdfree, etc... 
8309          * because otherwise processing thread has reference. 
8311         if (kq_state 
& KQ_DYNAMIC
) 
8312                 kqueue_release_last(p
, kq
); 
8315 /* called with kqueue lock held */ 
8317 knote_activate(struct knote 
*kn
) 
8319         if (kn
->kn_status 
& KN_ACTIVE
) 
8322         KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KNOTE_ACTIVATE
), 
8323                       kn
->kn_udata
, kn
->kn_status 
| (kn
->kn_id 
<< 32), 
8326         kn
->kn_status 
|= KN_ACTIVE
; 
8327         if (knote_enqueue(kn
)) 
8331 /* called with kqueue lock held */ 
8333 knote_deactivate(struct knote 
*kn
) 
8335         kn
->kn_status 
&= ~KN_ACTIVE
; 
8336         if ((kn
->kn_status 
& KN_STAYACTIVE
) == 0) 
8340 /* called with kqueue lock held */ 
8342 knote_enable(struct knote 
*kn
) 
8344         if ((kn
->kn_status 
& KN_DISABLED
) == 0) 
8347         kn
->kn_status 
&= ~KN_DISABLED
; 
8349         if (kn
->kn_status 
& KN_SUPPRESSED
) { 
8350                 /* Clear the sync qos on the knote */ 
8351                 knote_adjust_sync_qos(kn
, THREAD_QOS_UNSPECIFIED
, FALSE
); 
8354                  * it is possible for userland to have knotes registered for a given 
8355                  * workloop `wl_orig` but really handled on another workloop `wl_new`. 
8357                  * In that case, rearming will happen from the servicer thread of 
8358                  * `wl_new` which if `wl_orig` is no longer being serviced, would cause 
8359                  * this knote to stay suppressed forever if we only relied on 
8360                  * kqworkloop_acknowledge_events to be called by `wl_orig`. 
8362                  * However if we see the KQ_PROCESSING bit on `wl_orig` set, we can't 
8363                  * unsuppress because that would mess with the processing phase of 
8364                  * `wl_orig`, however it also means kqworkloop_acknowledge_events() 
8367                 struct kqueue 
*kq 
= knote_get_kq(kn
); 
8368                 if ((kq
->kq_state 
& KQ_PROCESSING
) == 0) { 
8369                         knote_unsuppress(kn
); 
8371         } else if (knote_enqueue(kn
)) { 
8376 /* called with kqueue lock held */ 
8378 knote_disable(struct knote 
*kn
) 
8380         if (kn
->kn_status 
& KN_DISABLED
) 
8383         kn
->kn_status 
|= KN_DISABLED
; 
8387 /* called with kqueue lock held */ 
8389 knote_suppress(struct knote 
*kn
) 
8391         struct kqtailq 
*suppressq
; 
8392         struct kqueue 
*kq 
= knote_get_kq(kn
); 
8396         if (kn
->kn_status 
& KN_SUPPRESSED
) 
8400         kn
->kn_status 
|= KN_SUPPRESSED
; 
8401         suppressq 
= kqueue_get_suppressed_queue(kq
, knote_get_qos_index(kn
)); 
8402         TAILQ_INSERT_TAIL(suppressq
, kn
, kn_tqe
); 
8404         if ((kq
->kq_state 
& KQ_WORKLOOP
) && 
8405              knote_get_qos_override_index(kn
) == THREAD_QOS_USER_INTERACTIVE 
&& 
8406              kn
->kn_qos_override_is_sync
) { 
8407                 struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
8408                 /* update the sync qos override counter for suppressed knotes */ 
8409                 kqworkloop_update_override(kqwl
, knote_get_qos_index(kn
), 
8410                         knote_get_qos_override_index(kn
), 
8411                         (KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS 
| KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
)); 
8415 /* called with kqueue lock held */ 
8417 knote_unsuppress(struct knote 
*kn
) 
8419         struct kqtailq 
*suppressq
; 
8420         struct kqueue 
*kq 
= knote_get_kq(kn
); 
8424         if ((kn
->kn_status 
& KN_SUPPRESSED
) == 0) 
8427         /* Clear the sync qos on the knote */ 
8428         knote_adjust_sync_qos(kn
, THREAD_QOS_UNSPECIFIED
, FALSE
); 
8430         kn
->kn_status 
&= ~KN_SUPPRESSED
; 
8431         suppressq 
= kqueue_get_suppressed_queue(kq
, knote_get_qos_index(kn
)); 
8432         TAILQ_REMOVE(suppressq
, kn
, kn_tqe
); 
8434         /* udate in-use qos to equal requested qos */ 
8435         kn
->kn_qos_index 
= kn
->kn_req_index
; 
8437         /* don't wakeup if unsuppressing just a stay-active knote */ 
8438         if (knote_enqueue(kn
) && (kn
->kn_status 
& KN_ACTIVE
)) { 
8442         if ((kq
->kq_state 
& KQ_WORKLOOP
) && !(kq
->kq_state 
& KQ_NO_WQ_THREAD
) && 
8443              knote_get_qos_override_index(kn
) == THREAD_QOS_USER_INTERACTIVE 
&& 
8444              kn
->kn_qos_override_is_sync
) { 
8445                 struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
8447                 /* update the sync qos override counter for suppressed knotes */ 
8448                 kqworkloop_update_override(kqwl
, knote_get_qos_index(kn
), 
8449                         knote_get_qos_override_index(kn
), 
8450                         (KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS 
| KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
)); 
8453         if (TAILQ_EMPTY(suppressq
) && (kq
->kq_state 
& KQ_WORKLOOP
) && 
8454                         !(kq
->kq_state 
& KQ_NO_WQ_THREAD
)) { 
8455                 struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
8456                 if (kqworkloop_is_processing_on_current_thread(kqwl
)) { 
8458                          * kqworkloop_end_processing() will perform the required QoS 
8459                          * computations when it unsets the processing mode. 
8462                         kqwl_req_lock(kqwl
); 
8463                         kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_RESET_WAKEUP_OVERRIDE
, 0); 
8464                         kqwl_req_unlock(kqwl
); 
8469 /* called with kqueue lock held */ 
8471 knote_update_sync_override_state(struct knote 
*kn
) 
8473         struct kqtailq 
*queue 
= knote_get_queue(kn
); 
8474         struct kqueue 
*kq 
= knote_get_kq(kn
); 
8476         if (!(kq
->kq_state 
& KQ_WORKLOOP
) || 
8477             knote_get_queue_index(kn
) != THREAD_QOS_USER_INTERACTIVE
) 
8480         /* Update the sync ipc state on workloop */ 
8481         struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
8482         boolean_t sync_ipc_override 
= FALSE
; 
8483         if (!TAILQ_EMPTY(queue
)) { 
8484                 struct knote 
*kn_head 
= TAILQ_FIRST(queue
); 
8485                 if (kn_head
->kn_qos_override_is_sync
) 
8486                         sync_ipc_override 
= TRUE
; 
8488         kqworkloop_update_sync_override_state(kqwl
, sync_ipc_override
); 
8491 /* called with kqueue lock held */ 
8493 knote_enqueue(struct knote 
*kn
) 
8495         if ((kn
->kn_status 
& (KN_ACTIVE 
| KN_STAYACTIVE
)) == 0 || 
8496             (kn
->kn_status 
& (KN_DISABLED 
| KN_SUPPRESSED 
| KN_DROPPING
))) 
8499         if ((kn
->kn_status 
& KN_QUEUED
) == 0) { 
8500                 struct kqtailq 
*queue 
= knote_get_queue(kn
); 
8501                 struct kqueue 
*kq 
= knote_get_kq(kn
); 
8504                 /* insert at head for sync ipc waiters */ 
8505                 if (kn
->kn_qos_override_is_sync
) { 
8506                         TAILQ_INSERT_HEAD(queue
, kn
, kn_tqe
); 
8508                         TAILQ_INSERT_TAIL(queue
, kn
, kn_tqe
); 
8510                 kn
->kn_status 
|= KN_QUEUED
; 
8512                 knote_update_sync_override_state(kn
); 
8515         return ((kn
->kn_status 
& KN_STAYACTIVE
) != 0); 
8519 /* called with kqueue lock held */ 
8521 knote_dequeue(struct knote 
*kn
) 
8523         struct kqueue 
*kq 
= knote_get_kq(kn
); 
8524         struct kqtailq 
*queue
; 
8528         if ((kn
->kn_status 
& KN_QUEUED
) == 0) 
8531         queue 
= knote_get_queue(kn
); 
8532         TAILQ_REMOVE(queue
, kn
, kn_tqe
); 
8533         kn
->kn_status 
&= ~KN_QUEUED
; 
8535         knote_update_sync_override_state(kn
); 
8541         knote_zone 
= zinit(sizeof(struct knote
), 8192*sizeof(struct knote
), 
8542                            8192, "knote zone"); 
8544         kqfile_zone 
= zinit(sizeof(struct kqfile
), 8192*sizeof(struct kqfile
), 
8545                             8192, "kqueue file zone"); 
8547         kqworkq_zone 
= zinit(sizeof(struct kqworkq
), 8192*sizeof(struct kqworkq
), 
8548                             8192, "kqueue workq zone"); 
8550         kqworkloop_zone 
= zinit(sizeof(struct kqworkloop
), 8192*sizeof(struct kqworkloop
), 
8551                             8192, "kqueue workloop zone"); 
8553         /* allocate kq lock group attribute and group */ 
8554         kq_lck_grp_attr 
= lck_grp_attr_alloc_init(); 
8556         kq_lck_grp 
= lck_grp_alloc_init("kqueue",  kq_lck_grp_attr
); 
8558         /* Allocate kq lock attribute */ 
8559         kq_lck_attr 
= lck_attr_alloc_init(); 
8561         /* Initialize the timer filter lock */ 
8562         lck_mtx_init(&_filt_timerlock
, kq_lck_grp
, kq_lck_attr
); 
8564         /* Initialize the user filter lock */ 
8565         lck_spin_init(&_filt_userlock
, kq_lck_grp
, kq_lck_attr
); 
8567 #if CONFIG_MEMORYSTATUS 
8568         /* Initialize the memorystatus list lock */ 
8569         memorystatus_kevent_init(kq_lck_grp
, kq_lck_attr
); 
8572 SYSINIT(knote
, SI_SUB_PSEUDO
, SI_ORDER_ANY
, knote_init
, NULL
) 
8574 const struct filterops 
* 
8575 knote_fops(struct knote 
*kn
) 
8577         return sysfilt_ops
[kn
->kn_filtid
]; 
8580 static struct knote 
* 
8584         kn 
= ((struct knote 
*)zalloc(knote_zone
)); 
8585         *kn 
= (struct knote
) { .kn_qos_override 
= 0, .kn_qos_sync_override 
= 0, .kn_qos_override_is_sync 
= 0 }; 
8590 knote_free(struct knote 
*kn
) 
8592         zfree(knote_zone
, kn
); 
8596 #include <sys/param.h> 
8597 #include <sys/socket.h> 
8598 #include <sys/protosw.h> 
8599 #include <sys/domain.h> 
8600 #include <sys/mbuf.h> 
8601 #include <sys/kern_event.h> 
8602 #include <sys/malloc.h> 
8603 #include <sys/sys_domain.h> 
8604 #include <sys/syslog.h> 
8607 #define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t)) 
8611 #define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n)) 
8614 static lck_grp_attr_t 
*kev_lck_grp_attr
; 
8615 static lck_attr_t 
*kev_lck_attr
; 
8616 static lck_grp_t 
*kev_lck_grp
; 
8617 static decl_lck_rw_data(,kev_lck_data
); 
8618 static lck_rw_t 
*kev_rwlock 
= &kev_lck_data
; 
8620 static int kev_attach(struct socket 
*so
, int proto
, struct proc 
*p
); 
8621 static int kev_detach(struct socket 
*so
); 
8622 static int kev_control(struct socket 
*so
, u_long cmd
, caddr_t data
, 
8623     struct ifnet 
*ifp
, struct proc 
*p
); 
8624 static lck_mtx_t 
* event_getlock(struct socket 
*, int); 
8625 static int event_lock(struct socket 
*, int, void *); 
8626 static int event_unlock(struct socket 
*, int, void *); 
8628 static int event_sofreelastref(struct socket 
*); 
8629 static void kev_delete(struct kern_event_pcb 
*); 
8631 static struct pr_usrreqs event_usrreqs 
= { 
8632         .pru_attach 
=           kev_attach
, 
8633         .pru_control 
=          kev_control
, 
8634         .pru_detach 
=           kev_detach
, 
8635         .pru_soreceive 
=        soreceive
, 
8638 static struct protosw eventsw
[] = { 
8640         .pr_type 
=              SOCK_RAW
, 
8641         .pr_protocol 
=          SYSPROTO_EVENT
, 
8642         .pr_flags 
=             PR_ATOMIC
, 
8643         .pr_usrreqs 
=           &event_usrreqs
, 
8644         .pr_lock 
=              event_lock
, 
8645         .pr_unlock 
=            event_unlock
, 
8646         .pr_getlock 
=           event_getlock
, 
8650 __private_extern__ 
int kevt_getstat SYSCTL_HANDLER_ARGS
; 
8651 __private_extern__ 
int kevt_pcblist SYSCTL_HANDLER_ARGS
; 
8653 SYSCTL_NODE(_net_systm
, OID_AUTO
, kevt
, 
8654         CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "Kernel event family"); 
8656 struct kevtstat kevtstat
; 
8657 SYSCTL_PROC(_net_systm_kevt
, OID_AUTO
, stats
, 
8658     CTLTYPE_STRUCT 
| CTLFLAG_RD 
| CTLFLAG_LOCKED
, 0, 0, 
8659     kevt_getstat
, "S,kevtstat", ""); 
8661 SYSCTL_PROC(_net_systm_kevt
, OID_AUTO
, pcblist
, 
8662         CTLTYPE_STRUCT 
| CTLFLAG_RD 
| CTLFLAG_LOCKED
, 0, 0, 
8663         kevt_pcblist
, "S,xkevtpcb", ""); 
8666 event_getlock(struct socket 
*so
, int flags
) 
8668 #pragma unused(flags) 
8669         struct kern_event_pcb 
*ev_pcb 
= (struct kern_event_pcb 
*)so
->so_pcb
; 
8671         if (so
->so_pcb 
!= NULL
)  { 
8672                 if (so
->so_usecount 
< 0) 
8673                         panic("%s: so=%p usecount=%d lrh= %s\n", __func__
, 
8674                             so
, so
->so_usecount
, solockhistory_nr(so
)); 
8677                 panic("%s: so=%p NULL NO so_pcb %s\n", __func__
, 
8678                     so
, solockhistory_nr(so
)); 
8681         return (&ev_pcb
->evp_mtx
); 
8685 event_lock(struct socket 
*so
, int refcount
, void *lr
) 
8690                 lr_saved 
= __builtin_return_address(0); 
8694         if (so
->so_pcb 
!= NULL
) { 
8695                 lck_mtx_lock(&((struct kern_event_pcb 
*)so
->so_pcb
)->evp_mtx
); 
8697                 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__
, 
8698                     so
, lr_saved
, solockhistory_nr(so
)); 
8702         if (so
->so_usecount 
< 0) { 
8703                 panic("%s: so=%p so_pcb=%p lr=%p ref=%d lrh= %s\n", __func__
, 
8704                     so
, so
->so_pcb
, lr_saved
, so
->so_usecount
, 
8705                     solockhistory_nr(so
)); 
8712         so
->lock_lr
[so
->next_lock_lr
] = lr_saved
; 
8713         so
->next_lock_lr 
= (so
->next_lock_lr
+1) % SO_LCKDBG_MAX
; 
8718 event_unlock(struct socket 
*so
, int refcount
, void *lr
) 
8721         lck_mtx_t 
*mutex_held
; 
8724                 lr_saved 
= __builtin_return_address(0); 
8731         if (so
->so_usecount 
< 0) { 
8732                 panic("%s: so=%p usecount=%d lrh= %s\n", __func__
, 
8733                     so
, so
->so_usecount
, solockhistory_nr(so
)); 
8736         if (so
->so_pcb 
== NULL
) { 
8737                 panic("%s: so=%p NO PCB usecount=%d lr=%p lrh= %s\n", __func__
, 
8738                     so
, so
->so_usecount
, (void *)lr_saved
, 
8739                     solockhistory_nr(so
)); 
8742         mutex_held 
= (&((struct kern_event_pcb 
*)so
->so_pcb
)->evp_mtx
); 
8744         LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
); 
8745         so
->unlock_lr
[so
->next_unlock_lr
] = lr_saved
; 
8746         so
->next_unlock_lr 
= (so
->next_unlock_lr
+1) % SO_LCKDBG_MAX
; 
8748         if (so
->so_usecount 
== 0) { 
8749                 VERIFY(so
->so_flags 
& SOF_PCBCLEARING
); 
8750                 event_sofreelastref(so
); 
8752                 lck_mtx_unlock(mutex_held
); 
8759 event_sofreelastref(struct socket 
*so
) 
8761         struct kern_event_pcb 
*ev_pcb 
= (struct kern_event_pcb 
*)so
->so_pcb
; 
8763         LCK_MTX_ASSERT(&(ev_pcb
->evp_mtx
), LCK_MTX_ASSERT_OWNED
); 
8768          * Disable upcall in the event another thread is in kev_post_msg() 
8769          * appending record to the receive socket buffer, since sbwakeup() 
8770          * may release the socket lock otherwise. 
8772         so
->so_rcv
.sb_flags 
&= ~SB_UPCALL
; 
8773         so
->so_snd
.sb_flags 
&= ~SB_UPCALL
; 
8774         so
->so_event 
= sonullevent
; 
8775         lck_mtx_unlock(&(ev_pcb
->evp_mtx
)); 
8777         LCK_MTX_ASSERT(&(ev_pcb
->evp_mtx
), LCK_MTX_ASSERT_NOTOWNED
); 
8778         lck_rw_lock_exclusive(kev_rwlock
); 
8779         LIST_REMOVE(ev_pcb
, evp_link
); 
8780         kevtstat
.kes_pcbcount
--; 
8781         kevtstat
.kes_gencnt
++; 
8782         lck_rw_done(kev_rwlock
); 
8785         sofreelastref(so
, 1); 
8789 static int event_proto_count 
= (sizeof (eventsw
) / sizeof (struct protosw
)); 
8792 struct kern_event_head kern_event_head
; 
8794 static u_int32_t static_event_id 
= 0; 
8796 #define EVPCB_ZONE_MAX          65536 
8797 #define EVPCB_ZONE_NAME         "kerneventpcb" 
8798 static struct zone 
*ev_pcb_zone
; 
8801  * Install the protosw's for the NKE manager.  Invoked at extension load time 
8804 kern_event_init(struct domain 
*dp
) 
8809         VERIFY(!(dp
->dom_flags 
& DOM_INITIALIZED
)); 
8810         VERIFY(dp 
== systemdomain
); 
8812         kev_lck_grp_attr 
= lck_grp_attr_alloc_init(); 
8813         if (kev_lck_grp_attr 
== NULL
) { 
8814                 panic("%s: lck_grp_attr_alloc_init failed\n", __func__
); 
8818         kev_lck_grp 
= lck_grp_alloc_init("Kernel Event Protocol", 
8820         if (kev_lck_grp 
== NULL
) { 
8821                 panic("%s: lck_grp_alloc_init failed\n", __func__
); 
8825         kev_lck_attr 
= lck_attr_alloc_init(); 
8826         if (kev_lck_attr 
== NULL
) { 
8827                 panic("%s: lck_attr_alloc_init failed\n", __func__
); 
8831         lck_rw_init(kev_rwlock
, kev_lck_grp
, kev_lck_attr
); 
8832         if (kev_rwlock 
== NULL
) { 
8833                 panic("%s: lck_mtx_alloc_init failed\n", __func__
); 
8837         for (i 
= 0, pr 
= &eventsw
[0]; i 
< event_proto_count
; i
++, pr
++) 
8838                 net_add_proto(pr
, dp
, 1); 
8840         ev_pcb_zone 
= zinit(sizeof(struct kern_event_pcb
), 
8841             EVPCB_ZONE_MAX 
* sizeof(struct kern_event_pcb
), 0, EVPCB_ZONE_NAME
); 
8842         if (ev_pcb_zone 
== NULL
) { 
8843                 panic("%s: failed allocating ev_pcb_zone", __func__
); 
8846         zone_change(ev_pcb_zone
, Z_EXPAND
, TRUE
); 
8847         zone_change(ev_pcb_zone
, Z_CALLERACCT
, TRUE
); 
8851 kev_attach(struct socket 
*so
, __unused 
int proto
, __unused 
struct proc 
*p
) 
8854         struct kern_event_pcb 
*ev_pcb
; 
8856         error 
= soreserve(so
, KEV_SNDSPACE
, KEV_RECVSPACE
); 
8860         if ((ev_pcb 
= (struct kern_event_pcb 
*)zalloc(ev_pcb_zone
)) == NULL
) { 
8863         bzero(ev_pcb
, sizeof(struct kern_event_pcb
)); 
8864         lck_mtx_init(&ev_pcb
->evp_mtx
, kev_lck_grp
, kev_lck_attr
); 
8866         ev_pcb
->evp_socket 
= so
; 
8867         ev_pcb
->evp_vendor_code_filter 
= 0xffffffff; 
8869         so
->so_pcb 
= (caddr_t
) ev_pcb
; 
8870         lck_rw_lock_exclusive(kev_rwlock
); 
8871         LIST_INSERT_HEAD(&kern_event_head
, ev_pcb
, evp_link
); 
8872         kevtstat
.kes_pcbcount
++; 
8873         kevtstat
.kes_gencnt
++; 
8874         lck_rw_done(kev_rwlock
); 
8880 kev_delete(struct kern_event_pcb 
*ev_pcb
) 
8882         VERIFY(ev_pcb 
!= NULL
); 
8883         lck_mtx_destroy(&ev_pcb
->evp_mtx
, kev_lck_grp
); 
8884         zfree(ev_pcb_zone
, ev_pcb
); 
8888 kev_detach(struct socket 
*so
) 
8890         struct kern_event_pcb 
*ev_pcb 
= (struct kern_event_pcb 
*) so
->so_pcb
; 
8892         if (ev_pcb 
!= NULL
) { 
8893                 soisdisconnected(so
); 
8894                 so
->so_flags 
|= SOF_PCBCLEARING
; 
8901  * For now, kev_vendor_code and mbuf_tags use the same 
8904 errno_t 
kev_vendor_code_find( 
8906         u_int32_t       
*out_vendor_code
) 
8908         if (strlen(string
) >= KEV_VENDOR_CODE_MAX_STR_LEN
) { 
8911         return (net_str_id_find_internal(string
, out_vendor_code
, 
8912             NSI_VENDOR_CODE
, 1)); 
8916 kev_msg_post(struct kev_msg 
*event_msg
) 
8918         mbuf_tag_id_t min_vendor
, max_vendor
; 
8920         net_str_id_first_last(&min_vendor
, &max_vendor
, NSI_VENDOR_CODE
); 
8922         if (event_msg 
== NULL
) 
8926          * Limit third parties to posting events for registered vendor codes 
8929         if (event_msg
->vendor_code 
< min_vendor 
|| 
8930             event_msg
->vendor_code 
> max_vendor
) { 
8931                 OSIncrementAtomic64((SInt64 
*)&kevtstat
.kes_badvendor
); 
8934         return (kev_post_msg(event_msg
)); 
8938 kev_post_msg(struct kev_msg 
*event_msg
) 
8940         struct mbuf 
*m
, *m2
; 
8941         struct kern_event_pcb 
*ev_pcb
; 
8942         struct kern_event_msg 
*ev
; 
8944         u_int32_t total_size
; 
8947         /* Verify the message is small enough to fit in one mbuf w/o cluster */ 
8948         total_size 
= KEV_MSG_HEADER_SIZE
; 
8950         for (i 
= 0; i 
< 5; i
++) { 
8951                 if (event_msg
->dv
[i
].data_length 
== 0) 
8953                 total_size 
+= event_msg
->dv
[i
].data_length
; 
8956         if (total_size 
> MLEN
) { 
8957                 OSIncrementAtomic64((SInt64 
*)&kevtstat
.kes_toobig
); 
8961         m 
= m_get(M_WAIT
, MT_DATA
); 
8963                 OSIncrementAtomic64((SInt64 
*)&kevtstat
.kes_nomem
); 
8966         ev 
= mtod(m
, struct kern_event_msg 
*); 
8967         total_size 
= KEV_MSG_HEADER_SIZE
; 
8969         tmp 
= (char *) &ev
->event_data
[0]; 
8970         for (i 
= 0; i 
< 5; i
++) { 
8971                 if (event_msg
->dv
[i
].data_length 
== 0) 
8974                 total_size 
+= event_msg
->dv
[i
].data_length
; 
8975                 bcopy(event_msg
->dv
[i
].data_ptr
, tmp
, 
8976                     event_msg
->dv
[i
].data_length
); 
8977                 tmp 
+= event_msg
->dv
[i
].data_length
; 
8980         ev
->id 
= ++static_event_id
; 
8981         ev
->total_size   
= total_size
; 
8982         ev
->vendor_code  
= event_msg
->vendor_code
; 
8983         ev
->kev_class    
= event_msg
->kev_class
; 
8984         ev
->kev_subclass 
= event_msg
->kev_subclass
; 
8985         ev
->event_code   
= event_msg
->event_code
; 
8987         m
->m_len 
= total_size
; 
8988         lck_rw_lock_shared(kev_rwlock
); 
8989         for (ev_pcb 
= LIST_FIRST(&kern_event_head
); 
8991             ev_pcb 
= LIST_NEXT(ev_pcb
, evp_link
)) { 
8992                 lck_mtx_lock(&ev_pcb
->evp_mtx
); 
8993                 if (ev_pcb
->evp_socket
->so_pcb 
== NULL
) { 
8994                         lck_mtx_unlock(&ev_pcb
->evp_mtx
); 
8997                 if (ev_pcb
->evp_vendor_code_filter 
!= KEV_ANY_VENDOR
) { 
8998                         if (ev_pcb
->evp_vendor_code_filter 
!= ev
->vendor_code
) { 
8999                                 lck_mtx_unlock(&ev_pcb
->evp_mtx
); 
9003                         if (ev_pcb
->evp_class_filter 
!= KEV_ANY_CLASS
) { 
9004                                 if (ev_pcb
->evp_class_filter 
!= ev
->kev_class
) { 
9005                                         lck_mtx_unlock(&ev_pcb
->evp_mtx
); 
9009                                 if ((ev_pcb
->evp_subclass_filter 
!= 
9010                                     KEV_ANY_SUBCLASS
) && 
9011                                     (ev_pcb
->evp_subclass_filter 
!= 
9012                                     ev
->kev_subclass
)) { 
9013                                         lck_mtx_unlock(&ev_pcb
->evp_mtx
); 
9019                 m2 
= m_copym(m
, 0, m
->m_len
, M_WAIT
); 
9021                         OSIncrementAtomic64((SInt64 
*)&kevtstat
.kes_nomem
); 
9023                         lck_mtx_unlock(&ev_pcb
->evp_mtx
); 
9024                         lck_rw_done(kev_rwlock
); 
9027                 if (sbappendrecord(&ev_pcb
->evp_socket
->so_rcv
, m2
)) { 
9029                          * We use "m" for the socket stats as it would be 
9030                          * unsafe to use "m2" 
9032                         so_inc_recv_data_stat(ev_pcb
->evp_socket
, 
9033                             1, m
->m_len
, MBUF_TC_BE
); 
9035                         sorwakeup(ev_pcb
->evp_socket
); 
9036                         OSIncrementAtomic64((SInt64 
*)&kevtstat
.kes_posted
); 
9038                         OSIncrementAtomic64((SInt64 
*)&kevtstat
.kes_fullsock
); 
9040                 lck_mtx_unlock(&ev_pcb
->evp_mtx
); 
9043         lck_rw_done(kev_rwlock
); 
9049 kev_control(struct socket 
*so
, 
9052     __unused 
struct ifnet 
*ifp
, 
9053     __unused 
struct proc 
*p
) 
9055         struct kev_request 
*kev_req 
= (struct kev_request 
*) data
; 
9056         struct kern_event_pcb  
*ev_pcb
; 
9057         struct kev_vendor_code 
*kev_vendor
; 
9058         u_int32_t  
*id_value 
= (u_int32_t 
*) data
; 
9062                         *id_value 
= static_event_id
; 
9065                         ev_pcb 
= (struct kern_event_pcb 
*) so
->so_pcb
; 
9066                         ev_pcb
->evp_vendor_code_filter 
= kev_req
->vendor_code
; 
9067                         ev_pcb
->evp_class_filter 
= kev_req
->kev_class
; 
9068                         ev_pcb
->evp_subclass_filter  
= kev_req
->kev_subclass
; 
9071                         ev_pcb 
= (struct kern_event_pcb 
*) so
->so_pcb
; 
9072                         kev_req
->vendor_code 
= ev_pcb
->evp_vendor_code_filter
; 
9073                         kev_req
->kev_class   
= ev_pcb
->evp_class_filter
; 
9074                         kev_req
->kev_subclass 
= ev_pcb
->evp_subclass_filter
; 
9076                 case SIOCGKEVVENDOR
: 
9077                         kev_vendor 
= (struct kev_vendor_code 
*)data
; 
9078                         /* Make sure string is NULL terminated */ 
9079                         kev_vendor
->vendor_string
[KEV_VENDOR_CODE_MAX_STR_LEN
-1] = 0; 
9080                         return (net_str_id_find_internal(kev_vendor
->vendor_string
, 
9081                             &kev_vendor
->vendor_code
, NSI_VENDOR_CODE
, 0)); 
9090 kevt_getstat SYSCTL_HANDLER_ARGS
 
9092 #pragma unused(oidp, arg1, arg2) 
9095         lck_rw_lock_shared(kev_rwlock
); 
9097         if (req
->newptr 
!= USER_ADDR_NULL
) { 
9101         if (req
->oldptr 
== USER_ADDR_NULL
) { 
9102                 req
->oldidx 
= sizeof(struct kevtstat
); 
9106         error 
= SYSCTL_OUT(req
, &kevtstat
, 
9107             MIN(sizeof(struct kevtstat
), req
->oldlen
)); 
9109         lck_rw_done(kev_rwlock
); 
9114 __private_extern__ 
int 
9115 kevt_pcblist SYSCTL_HANDLER_ARGS
 
9117 #pragma unused(oidp, arg1, arg2) 
9120         struct xsystmgen xsg
; 
9122         size_t item_size 
= ROUNDUP64(sizeof (struct xkevtpcb
)) + 
9123                 ROUNDUP64(sizeof (struct xsocket_n
)) + 
9124                 2 * ROUNDUP64(sizeof (struct xsockbuf_n
)) + 
9125                 ROUNDUP64(sizeof (struct xsockstat_n
)); 
9126         struct kern_event_pcb  
*ev_pcb
; 
9128         buf 
= _MALLOC(item_size
, M_TEMP
, M_WAITOK 
| M_ZERO
); 
9132         lck_rw_lock_shared(kev_rwlock
); 
9134         n 
= kevtstat
.kes_pcbcount
; 
9136         if (req
->oldptr 
== USER_ADDR_NULL
) { 
9137                 req
->oldidx 
= (n 
+ n
/8) * item_size
; 
9140         if (req
->newptr 
!= USER_ADDR_NULL
) { 
9144         bzero(&xsg
, sizeof (xsg
)); 
9145         xsg
.xg_len 
= sizeof (xsg
); 
9147         xsg
.xg_gen 
= kevtstat
.kes_gencnt
; 
9148         xsg
.xg_sogen 
= so_gencnt
; 
9149         error 
= SYSCTL_OUT(req
, &xsg
, sizeof (xsg
)); 
9154          * We are done if there is no pcb 
9161         for (i 
= 0, ev_pcb 
= LIST_FIRST(&kern_event_head
); 
9162             i 
< n 
&& ev_pcb 
!= NULL
; 
9163             i
++, ev_pcb 
= LIST_NEXT(ev_pcb
, evp_link
)) { 
9164                 struct xkevtpcb 
*xk 
= (struct xkevtpcb 
*)buf
; 
9165                 struct xsocket_n 
*xso 
= (struct xsocket_n 
*) 
9166                         ADVANCE64(xk
, sizeof (*xk
)); 
9167                 struct xsockbuf_n 
*xsbrcv 
= (struct xsockbuf_n 
*) 
9168                         ADVANCE64(xso
, sizeof (*xso
)); 
9169                 struct xsockbuf_n 
*xsbsnd 
= (struct xsockbuf_n 
*) 
9170                         ADVANCE64(xsbrcv
, sizeof (*xsbrcv
)); 
9171                 struct xsockstat_n 
*xsostats 
= (struct xsockstat_n 
*) 
9172                         ADVANCE64(xsbsnd
, sizeof (*xsbsnd
)); 
9174                 bzero(buf
, item_size
); 
9176                 lck_mtx_lock(&ev_pcb
->evp_mtx
); 
9178                 xk
->kep_len 
= sizeof(struct xkevtpcb
); 
9179                 xk
->kep_kind 
= XSO_EVT
; 
9180                 xk
->kep_evtpcb 
= (uint64_t)VM_KERNEL_ADDRPERM(ev_pcb
); 
9181                 xk
->kep_vendor_code_filter 
= ev_pcb
->evp_vendor_code_filter
; 
9182                 xk
->kep_class_filter 
= ev_pcb
->evp_class_filter
; 
9183                 xk
->kep_subclass_filter 
= ev_pcb
->evp_subclass_filter
; 
9185                 sotoxsocket_n(ev_pcb
->evp_socket
, xso
); 
9186                 sbtoxsockbuf_n(ev_pcb
->evp_socket 
? 
9187                         &ev_pcb
->evp_socket
->so_rcv 
: NULL
, xsbrcv
); 
9188                 sbtoxsockbuf_n(ev_pcb
->evp_socket 
? 
9189                         &ev_pcb
->evp_socket
->so_snd 
: NULL
, xsbsnd
); 
9190                 sbtoxsockstat_n(ev_pcb
->evp_socket
, xsostats
); 
9192                 lck_mtx_unlock(&ev_pcb
->evp_mtx
); 
9194                 error 
= SYSCTL_OUT(req
, buf
, item_size
); 
9199                  * Give the user an updated idea of our state. 
9200                  * If the generation differs from what we told 
9201                  * her before, she knows that something happened 
9202                  * while we were processing this request, and it 
9203                  * might be necessary to retry. 
9205                 bzero(&xsg
, sizeof (xsg
)); 
9206                 xsg
.xg_len 
= sizeof (xsg
); 
9208                 xsg
.xg_gen 
= kevtstat
.kes_gencnt
; 
9209                 xsg
.xg_sogen 
= so_gencnt
; 
9210                 error 
= SYSCTL_OUT(req
, &xsg
, sizeof (xsg
)); 
9217         lck_rw_done(kev_rwlock
); 
9222 #endif /* SOCKETS */ 
9226 fill_kqueueinfo(struct kqueue 
*kq
, struct kqueue_info 
* kinfo
) 
9228         struct vinfo_stat 
* st
; 
9230         st 
= &kinfo
->kq_stat
; 
9232         st
->vst_size 
= kq
->kq_count
; 
9233         if (kq
->kq_state 
& KQ_KEV_QOS
) 
9234                 st
->vst_blksize 
= sizeof(struct kevent_qos_s
); 
9235         else if (kq
->kq_state 
& KQ_KEV64
) 
9236                 st
->vst_blksize 
= sizeof(struct kevent64_s
); 
9238                 st
->vst_blksize 
= sizeof(struct kevent
); 
9239         st
->vst_mode 
= S_IFIFO
; 
9240         st
->vst_ino 
= (kq
->kq_state 
& KQ_DYNAMIC
) ? 
9241                 ((struct kqworkloop 
*)kq
)->kqwl_dynamicid 
: 0; 
9243         /* flags exported to libproc as PROC_KQUEUE_* (sys/proc_info.h) */ 
9244 #define PROC_KQUEUE_MASK (KQ_SEL|KQ_SLEEP|KQ_KEV32|KQ_KEV64|KQ_KEV_QOS|KQ_WORKQ|KQ_WORKLOOP) 
9245         kinfo
->kq_state 
= kq
->kq_state 
& PROC_KQUEUE_MASK
; 
9251 fill_kqueue_dyninfo(struct kqueue 
*kq
, struct kqueue_dyninfo 
*kqdi
) 
9253         struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
9254         struct kqrequest 
*kqr 
= &kqwl
->kqwl_request
; 
9257         if ((kq
->kq_state 
& KQ_WORKLOOP
) == 0) { 
9261         if ((err 
= fill_kqueueinfo(kq
, &kqdi
->kqdi_info
))) { 
9265         kqwl_req_lock(kqwl
); 
9267         if (kqr
->kqr_thread
) { 
9268                 kqdi
->kqdi_servicer 
= thread_tid(kqr
->kqr_thread
); 
9271         if (kqwl
->kqwl_owner 
== WL_OWNER_SUSPENDED
) { 
9272                 kqdi
->kqdi_owner 
= ~0ull; 
9274                 kqdi
->kqdi_owner 
= thread_tid(kqwl
->kqwl_owner
); 
9277         kqdi
->kqdi_request_state 
= kqr
->kqr_state
; 
9278         kqdi
->kqdi_async_qos 
= kqr
->kqr_qos_index
; 
9279         kqdi
->kqdi_events_qos 
= kqr
->kqr_override_index
; 
9280         kqdi
->kqdi_sync_waiters 
= kqr
->kqr_dsync_waiters
; 
9281         kqdi
->kqdi_sync_waiter_qos 
= kqr
->kqr_dsync_waiters_qos
; 
9283         kqwl_req_unlock(kqwl
); 
9290 knote_markstayactive(struct knote 
*kn
) 
9292         struct kqueue 
*kq 
= knote_get_kq(kn
); 
9295         kn
->kn_status 
|= KN_STAYACTIVE
; 
9298          * Making a knote stay active is a property of the knote that must be 
9299          * established before it is fully attached. 
9301         assert(kn
->kn_status 
& KN_ATTACHING
); 
9303         /* handle all stayactive knotes on the (appropriate) manager */ 
9304         if (kq
->kq_state 
& KQ_WORKQ
) { 
9305                 knote_set_qos_index(kn
, KQWQ_QOS_MANAGER
); 
9306         } else if (kq
->kq_state 
& KQ_WORKLOOP
) { 
9307                 struct kqworkloop 
*kqwl 
= (struct kqworkloop 
*)kq
; 
9308                 kqwl_req_lock(kqwl
); 
9309                 assert(kn
->kn_req_index 
&& kn
->kn_req_index 
< THREAD_QOS_LAST
); 
9310                 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_UPDATE_STAYACTIVE_QOS
, 
9312                 kqwl_req_unlock(kqwl
); 
9313                 knote_set_qos_index(kn
, KQWL_BUCKET_STAYACTIVE
); 
9321 knote_clearstayactive(struct knote 
*kn
) 
9323         kqlock(knote_get_kq(kn
)); 
9324         kn
->kn_status 
&= ~KN_STAYACTIVE
; 
9325         knote_deactivate(kn
); 
9326         kqunlock(knote_get_kq(kn
)); 
9329 static unsigned long 
9330 kevent_extinfo_emit(struct kqueue 
*kq
, struct knote 
*kn
, struct kevent_extinfo 
*buf
, 
9331                 unsigned long buflen
, unsigned long nknotes
) 
9333         for (; kn
; kn 
= SLIST_NEXT(kn
, kn_link
)) { 
9334                 if (kq 
== knote_get_kq(kn
)) { 
9335                         if (nknotes 
< buflen
) { 
9336                                 struct kevent_extinfo 
*info 
= &buf
[nknotes
]; 
9337                                 struct kevent_internal_s 
*kevp 
= &kn
->kn_kevent
; 
9341                                 info
->kqext_kev 
= (struct kevent_qos_s
){ 
9342                                         .ident 
= kevp
->ident
, 
9343                                         .filter 
= kevp
->filter
, 
9344                                         .flags 
= kevp
->flags
, 
9345                                         .fflags 
= kevp
->fflags
, 
9346                                         .data 
= (int64_t)kevp
->data
, 
9347                                         .udata 
= kevp
->udata
, 
9348                                         .ext
[0] = kevp
->ext
[0], 
9349                                         .ext
[1] = kevp
->ext
[1], 
9350                                         .ext
[2] = kevp
->ext
[2], 
9351                                         .ext
[3] = kevp
->ext
[3], 
9352                                         .qos 
= kn
->kn_req_index
, 
9354                                 info
->kqext_sdata 
= kn
->kn_sdata
; 
9355                                 info
->kqext_status 
= kn
->kn_status
; 
9356                                 info
->kqext_sfflags 
= kn
->kn_sfflags
; 
9361                         /* we return total number of knotes, which may be more than requested */ 
9370 kevent_copyout_proc_dynkqids(void *proc
, user_addr_t ubuf
, uint32_t ubufsize
, 
9371                 int32_t *nkqueues_out
) 
9373         proc_t p 
= (proc_t
)proc
; 
9374         struct filedesc 
*fdp 
= p
->p_fd
; 
9375         unsigned int nkqueues 
= 0; 
9376         unsigned long ubuflen 
= ubufsize 
/ sizeof(kqueue_id_t
); 
9377         size_t buflen
, bufsize
; 
9378         kqueue_id_t 
*kq_ids 
= NULL
; 
9383         if (ubuf 
== USER_ADDR_NULL 
&& ubufsize 
!= 0) { 
9388         buflen 
= min(ubuflen
, PROC_PIDDYNKQUEUES_MAX
); 
9391                 if (os_mul_overflow(sizeof(kqueue_id_t
), buflen
, &bufsize
)) { 
9395                 kq_ids 
= kalloc(bufsize
); 
9396                 assert(kq_ids 
!= NULL
); 
9401         if (fdp
->fd_kqhashmask 
> 0) { 
9402                 for (uint32_t i 
= 0; i 
< fdp
->fd_kqhashmask 
+ 1; i
++) { 
9403                         struct kqworkloop 
*kqwl
; 
9405                         SLIST_FOREACH(kqwl
, &fdp
->fd_kqhash
[i
], kqwl_hashlink
) { 
9406                                 /* report the number of kqueues, even if they don't all fit */ 
9407                                 if (nkqueues 
< buflen
) { 
9408                                         kq_ids
[nkqueues
] = kqwl
->kqwl_dynamicid
; 
9419                 if (os_mul_overflow(sizeof(kqueue_id_t
), min(ubuflen
, nkqueues
), ©size
)) { 
9424                 assert(ubufsize 
>= copysize
); 
9425                 err 
= copyout(kq_ids
, ubuf
, copysize
); 
9430                 kfree(kq_ids
, bufsize
); 
9434                 *nkqueues_out 
= (int)min(nkqueues
, PROC_PIDDYNKQUEUES_MAX
); 
9440 kevent_copyout_dynkqinfo(void *proc
, kqueue_id_t kq_id
, user_addr_t ubuf
, 
9441                 uint32_t ubufsize
, int32_t *size_out
) 
9443         proc_t p 
= (proc_t
)proc
; 
9446         struct kqueue_dyninfo kqdi 
= { }; 
9450         if (ubufsize 
< sizeof(struct kqueue_info
)) { 
9455         kq 
= kqueue_hash_lookup(p
, kq_id
); 
9464          * backward compatibility: allow the argument to this call to only be 
9465          * a struct kqueue_info 
9467         if (ubufsize 
>= sizeof(struct kqueue_dyninfo
)) { 
9468                 ubufsize 
= sizeof(struct kqueue_dyninfo
); 
9469                 err 
= fill_kqueue_dyninfo(kq
, &kqdi
); 
9471                 ubufsize 
= sizeof(struct kqueue_info
); 
9472                 err 
= fill_kqueueinfo(kq
, &kqdi
.kqdi_info
); 
9474         if (err 
== 0 && (err 
= copyout(&kqdi
, ubuf
, ubufsize
)) == 0) { 
9475                 *size_out 
= ubufsize
; 
9477         kqueue_release_last(p
, kq
); 
9482 kevent_copyout_dynkqextinfo(void *proc
, kqueue_id_t kq_id
, user_addr_t ubuf
, 
9483                 uint32_t ubufsize
, int32_t *nknotes_out
) 
9485         proc_t p 
= (proc_t
)proc
; 
9492         kq 
= kqueue_hash_lookup(p
, kq_id
); 
9500         err 
= pid_kqueue_extinfo(p
, kq
, ubuf
, ubufsize
, nknotes_out
); 
9501         kqueue_release_last(p
, kq
); 
9506 pid_kqueue_extinfo(proc_t p
, struct kqueue 
*kq
, user_addr_t ubuf
, 
9507                 uint32_t bufsize
, int32_t *retval
) 
9512         struct filedesc 
*fdp 
= p
->p_fd
; 
9513         unsigned long nknotes 
= 0; 
9514         unsigned long buflen 
= bufsize 
/ sizeof(struct kevent_extinfo
); 
9515         struct kevent_extinfo 
*kqext 
= NULL
; 
9517         /* arbitrary upper limit to cap kernel memory usage, copyout size, etc. */ 
9518         buflen 
= min(buflen
, PROC_PIDFDKQUEUE_KNOTES_MAX
); 
9520         kqext 
= kalloc(buflen 
* sizeof(struct kevent_extinfo
)); 
9521         if (kqext 
== NULL
) { 
9525         bzero(kqext
, buflen 
* sizeof(struct kevent_extinfo
)); 
9528         for (i 
= 0; i 
< fdp
->fd_knlistsize
; i
++) { 
9529                 kn 
= SLIST_FIRST(&fdp
->fd_knlist
[i
]); 
9530                 nknotes 
= kevent_extinfo_emit(kq
, kn
, kqext
, buflen
, nknotes
); 
9534         if (fdp
->fd_knhashmask 
!= 0) { 
9535                 for (i 
= 0; i 
< (int)fdp
->fd_knhashmask 
+ 1; i
++) { 
9537                         kn 
= SLIST_FIRST(&fdp
->fd_knhash
[i
]); 
9538                         nknotes 
= kevent_extinfo_emit(kq
, kn
, kqext
, buflen
, nknotes
); 
9543         assert(bufsize 
>= sizeof(struct kevent_extinfo
) * min(buflen
, nknotes
)); 
9544         err 
= copyout(kqext
, ubuf
, sizeof(struct kevent_extinfo
) * min(buflen
, nknotes
)); 
9548                 kfree(kqext
, buflen 
* sizeof(struct kevent_extinfo
)); 
9553                 *retval 
= min(nknotes
, PROC_PIDFDKQUEUE_KNOTES_MAX
); 
9559 klist_copy_udata(struct klist 
*list
, uint64_t *buf
, 
9560                 unsigned int buflen
, unsigned int nknotes
) 
9562         struct kevent_internal_s 
*kev
; 
9564         SLIST_FOREACH(kn
, list
, kn_link
) { 
9565                 if (nknotes 
< buflen
) { 
9566                         struct kqueue 
*kq 
= knote_get_kq(kn
); 
9568                         kev 
= &(kn
->kn_kevent
); 
9569                         buf
[nknotes
] = kev
->udata
; 
9572                 /* we return total number of knotes, which may be more than requested */ 
9580 kqlist_copy_dynamicids(__assert_only proc_t p
, struct kqlist 
*list
, 
9581                 uint64_t *buf
, unsigned int buflen
, unsigned int nids
) 
9583         kqhash_lock_held(p
); 
9584         struct kqworkloop 
*kqwl
; 
9585         SLIST_FOREACH(kqwl
, list
, kqwl_hashlink
) { 
9586                 if (nids 
< buflen
) { 
9587                         buf
[nids
] = kqwl
->kqwl_dynamicid
; 
9595 kevent_proc_copy_uptrs(void *proc
, uint64_t *buf
, int bufsize
) 
9597         proc_t p 
= (proc_t
)proc
; 
9598         struct filedesc 
*fdp 
= p
->p_fd
; 
9599         unsigned int nuptrs 
= 0; 
9600         unsigned long buflen 
= bufsize 
/ sizeof(uint64_t); 
9603                 assert(buf 
!= NULL
); 
9607         for (int i 
= 0; i 
< fdp
->fd_knlistsize
; i
++) { 
9608                 nuptrs 
= klist_copy_udata(&fdp
->fd_knlist
[i
], buf
, buflen
, nuptrs
); 
9612         if (fdp
->fd_knhashmask 
!= 0) { 
9613                 for (int i 
= 0; i 
< (int)fdp
->fd_knhashmask 
+ 1; i
++) { 
9614                         nuptrs 
= klist_copy_udata(&fdp
->fd_knhash
[i
], buf
, buflen
, nuptrs
); 
9620         if (fdp
->fd_kqhashmask 
!= 0) { 
9621                 for (int i 
= 0; i 
< (int)fdp
->fd_kqhashmask 
+ 1; i
++) { 
9622                         nuptrs 
= kqlist_copy_dynamicids(p
, &fdp
->fd_kqhash
[i
], buf
, buflen
, 
9632 kevent_redrive_proc_thread_request(proc_t p
) 
9634         __assert_only 
int ret
; 
9635         ret 
= (*pthread_functions
->workq_threadreq
)(p
, NULL
, WORKQ_THREADREQ_REDRIVE
, 0, 0); 
9636         assert(ret 
== 0 || ret 
== ECANCELED
); 
9640 kevent_set_return_to_kernel_user_tsd(proc_t p
, thread_t thread
) 
9643         bool proc_is_64bit 
= !!(p
->p_flag 
& P_LP64
); 
9644         size_t user_addr_size 
= proc_is_64bit 
? 8 : 4; 
9645         uint32_t ast_flags32 
= 0; 
9646         uint64_t ast_flags64 
= 0; 
9647         struct uthread 
*ut 
= get_bsdthread_info(thread
); 
9649         if (ut
->uu_kqueue_bound 
!= NULL
) { 
9650                 if (ut
->uu_kqueue_flags 
& KEVENT_FLAG_WORKLOOP
) { 
9651                         ast_flags64 
|= R2K_WORKLOOP_PENDING_EVENTS
; 
9652                 } else if (ut
->uu_kqueue_flags 
& KEVENT_FLAG_WORKQ
) { 
9653                         ast_flags64 
|= R2K_WORKQ_PENDING_EVENTS
; 
9657         if (ast_flags64 
== 0) { 
9661         if (!(p
->p_flag 
& P_LP64
)) { 
9662                 ast_flags32 
= (uint32_t)ast_flags64
; 
9663                 assert(ast_flags64 
< 0x100000000ull
); 
9666         ast_addr 
= thread_rettokern_addr(thread
); 
9667         if (ast_addr 
== 0) { 
9671         if (copyout((proc_is_64bit 
? (void *)&ast_flags64 
: (void *)&ast_flags32
), 
9672                     (user_addr_t
)ast_addr
, 
9673                     user_addr_size
) != 0) { 
9674                 printf("pid %d (tid:%llu): copyout of return_to_kernel ast flags failed with " 
9675                        "ast_addr = %llu\n", p
->p_pid
, thread_tid(current_thread()), ast_addr
); 
9680 kevent_ast(thread_t thread
, uint16_t bits
) 
9682         proc_t p 
= current_proc(); 
9684         if (bits 
& AST_KEVENT_REDRIVE_THREADREQ
) { 
9685                 kevent_redrive_proc_thread_request(p
); 
9687         if (bits 
& AST_KEVENT_RETURN_TO_KERNEL
) { 
9688                 kevent_set_return_to_kernel_user_tsd(p
, thread
); 
9692 #if DEVELOPMENT || DEBUG 
9694 #define KEVENT_SYSCTL_BOUND_ID 1 
9697 kevent_sysctl SYSCTL_HANDLER_ARGS
 
9699 #pragma unused(oidp, arg2) 
9700         uintptr_t type 
= (uintptr_t)arg1
; 
9701         uint64_t bound_id 
= 0; 
9705         if (type 
!= KEVENT_SYSCTL_BOUND_ID
) { 
9713         ut 
= get_bsdthread_info(current_thread()); 
9718         kq 
= ut
->uu_kqueue_bound
; 
9720                 if (kq
->kq_state 
& KQ_WORKLOOP
) { 
9721                         bound_id 
= ((struct kqworkloop 
*)kq
)->kqwl_dynamicid
; 
9722                 } else if (kq
->kq_state 
& KQ_WORKQ
) { 
9727         return sysctl_io_number(req
, bound_id
, sizeof(bound_id
), NULL
, NULL
); 
9730 SYSCTL_NODE(_kern
, OID_AUTO
, kevent
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 0, 
9731                 "kevent information"); 
9733 SYSCTL_PROC(_kern_kevent
, OID_AUTO
, bound_id
, 
9734                 CTLTYPE_QUAD 
| CTLFLAG_RD 
| CTLFLAG_LOCKED 
| CTLFLAG_MASKED
, 
9735                 (void *)KEVENT_SYSCTL_BOUND_ID
, 
9736                 sizeof(kqueue_id_t
), kevent_sysctl
, "Q", 
9737                 "get the ID of the bound kqueue"); 
9739 #endif /* DEVELOPMENT || DEBUG */