2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
31 * All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * @(#)kern_event.c 1.0 (3/31/2000)
58 #include <machine/atomic.h>
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/filedesc.h>
63 #include <sys/kernel.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/malloc.h>
67 #include <sys/unistd.h>
68 #include <sys/file_internal.h>
69 #include <sys/fcntl.h>
70 #include <sys/select.h>
71 #include <sys/queue.h>
72 #include <sys/event.h>
73 #include <sys/eventvar.h>
74 #include <sys/protosw.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
78 #include <sys/syscall.h> // SYS_* constants
79 #include <sys/sysctl.h>
81 #include <sys/sysproto.h>
83 #include <sys/vnode_internal.h>
85 #include <sys/proc_info.h>
86 #include <sys/codesign.h>
87 #include <sys/pthread_shims.h>
88 #include <sys/kdebug.h>
89 #include <sys/reason.h>
90 #include <os/reason_private.h>
91 #include <pexpert/pexpert.h>
93 #include <kern/locks.h>
94 #include <kern/clock.h>
95 #include <kern/cpu_data.h>
96 #include <kern/policy_internal.h>
97 #include <kern/thread_call.h>
98 #include <kern/sched_prim.h>
99 #include <kern/waitq.h>
100 #include <kern/zalloc.h>
101 #include <kern/kalloc.h>
102 #include <kern/assert.h>
103 #include <kern/ast.h>
104 #include <kern/thread.h>
105 #include <kern/kcdata.h>
107 #include <pthread/priority_private.h>
108 #include <pthread/workqueue_syscalls.h>
109 #include <pthread/workqueue_internal.h>
110 #include <libkern/libkern.h>
111 #include <libkern/OSAtomic.h>
113 #include "net/net_str_id.h"
115 #include <mach/task.h>
116 #include <libkern/section_keywords.h>
118 #if CONFIG_MEMORYSTATUS
119 #include <sys/kern_memorystatus.h>
122 extern thread_t
port_name_to_thread(mach_port_name_t port_name
); /* osfmk/kern/ipc_tt.h */
123 extern mach_port_name_t
ipc_entry_name_mask(mach_port_name_t name
); /* osfmk/ipc/ipc_entry.h */
125 #define KEV_EVTID(code) BSDDBG_CODE(DBG_BSD_KEVENT, (code))
127 MALLOC_DEFINE(M_KQUEUE
, "kqueue", "memory for kqueue system");
129 #define KQ_EVENT NO_EVENT64
131 static int kqueue_read(struct fileproc
*fp
, struct uio
*uio
,
132 int flags
, vfs_context_t ctx
);
133 static int kqueue_write(struct fileproc
*fp
, struct uio
*uio
,
134 int flags
, vfs_context_t ctx
);
135 static int kqueue_ioctl(struct fileproc
*fp
, u_long com
, caddr_t data
,
137 static int kqueue_select(struct fileproc
*fp
, int which
, void *wq_link_id
,
139 static int kqueue_close(struct fileglob
*fg
, vfs_context_t ctx
);
140 static int kqueue_kqfilter(struct fileproc
*fp
, struct knote
*kn
,
141 struct kevent_internal_s
*kev
, vfs_context_t ctx
);
142 static int kqueue_drain(struct fileproc
*fp
, vfs_context_t ctx
);
144 static const struct fileops kqueueops
= {
145 .fo_type
= DTYPE_KQUEUE
,
146 .fo_read
= kqueue_read
,
147 .fo_write
= kqueue_write
,
148 .fo_ioctl
= kqueue_ioctl
,
149 .fo_select
= kqueue_select
,
150 .fo_close
= kqueue_close
,
151 .fo_kqfilter
= kqueue_kqfilter
,
152 .fo_drain
= kqueue_drain
,
155 static void kevent_put_kq(struct proc
*p
, kqueue_id_t id
, struct fileproc
*fp
, struct kqueue
*kq
);
156 static int kevent_internal(struct proc
*p
,
157 kqueue_id_t id
, kqueue_id_t
*id_out
,
158 user_addr_t changelist
, int nchanges
,
159 user_addr_t eventlist
, int nevents
,
160 user_addr_t data_out
, uint64_t data_available
,
161 unsigned int flags
, user_addr_t utimeout
,
162 kqueue_continue_t continuation
,
164 static int kevent_copyin(user_addr_t
*addrp
, struct kevent_internal_s
*kevp
,
165 struct proc
*p
, unsigned int flags
);
166 static int kevent_copyout(struct kevent_internal_s
*kevp
, user_addr_t
*addrp
,
167 struct proc
*p
, unsigned int flags
);
168 char * kevent_description(struct kevent_internal_s
*kevp
, char *s
, size_t n
);
170 static int kevent_register_wait_prepare(struct knote
*kn
, struct kevent_internal_s
*kev
);
171 static void kevent_register_wait_block(struct turnstile
*ts
, thread_t handoff_thread
,
172 struct knote_lock_ctx
*knlc
, thread_continue_t cont
,
173 struct _kevent_register
*cont_args
) __dead2
;
174 static void kevent_register_wait_return(struct _kevent_register
*cont_args
) __dead2
;
175 static void kevent_register_wait_cleanup(struct knote
*kn
);
176 static inline void kqueue_release_last(struct proc
*p
, kqueue_t kqu
);
177 static void kqueue_interrupt(struct kqueue
*kq
);
178 static int kevent_callback(struct kqueue
*kq
, struct kevent_internal_s
*kevp
,
180 static void kevent_continue(struct kqueue
*kq
, void *data
, int error
);
181 static void kqueue_scan_continue(void *contp
, wait_result_t wait_result
);
182 static int kqueue_process(struct kqueue
*kq
, kevent_callback_t callback
, void *callback_data
,
183 struct filt_process_s
*process_data
, int *countp
);
184 static int kqueue_queue_empty(struct kqueue
*kq
, kq_index_t qos_index
);
186 static struct kqtailq
*kqueue_get_suppressed_queue(kqueue_t kq
, struct knote
*kn
);
187 static void kqueue_threadreq_initiate(struct kqueue
*kq
, struct kqrequest
*kqr
, kq_index_t qos
, int flags
);
189 static void kqworkq_update_override(struct kqworkq
*kqwq
, struct knote
*kn
, kq_index_t qos
);
190 static void kqworkq_unbind(proc_t p
, struct kqrequest
*kqr
);
191 static thread_qos_t
kqworkq_unbind_locked(struct kqworkq
*kqwq
, struct kqrequest
*kqr
, thread_t thread
);
192 static struct kqrequest
*kqworkq_get_request(struct kqworkq
*kqwq
, kq_index_t qos_index
);
194 static void kqworkloop_update_override(struct kqworkloop
*kqwl
, kq_index_t override_index
);
195 static void kqworkloop_unbind(proc_t p
, struct kqworkloop
*kwql
);
196 static thread_qos_t
kqworkloop_unbind_locked(struct kqworkloop
*kwql
, thread_t thread
);
197 static kq_index_t
kqworkloop_owner_override(struct kqworkloop
*kqwl
);
201 * The wakeup qos is the qos of QUEUED knotes.
203 * This QoS is accounted for with the events override in the
204 * kqr_override_index field. It is raised each time a new knote is queued at
205 * a given QoS. The kqr_wakeup_indexes field is a superset of the non empty
206 * knote buckets and is recomputed after each event delivery.
208 KQWL_UTQ_UPDATE_WAKEUP_QOS
,
209 KQWL_UTQ_UPDATE_STAYACTIVE_QOS
,
210 KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
,
211 KQWL_UTQ_UNBINDING
, /* attempt to rebind */
214 * The wakeup override is for suppressed knotes that have fired again at
215 * a higher QoS than the one for which they are suppressed already.
216 * This override is cleared when the knote suppressed list becomes empty.
218 KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
,
219 KQWL_UTQ_RESET_WAKEUP_OVERRIDE
,
221 * The QoS is the maximum QoS of an event enqueued on this workloop in
222 * userland. It is copied from the only EVFILT_WORKLOOP knote with
223 * a NOTE_WL_THREAD_REQUEST bit set allowed on this workloop. If there is no
224 * such knote, this QoS is 0.
226 KQWL_UTQ_SET_QOS_INDEX
,
227 KQWL_UTQ_REDRIVE_EVENTS
,
229 static void kqworkloop_update_threads_qos(struct kqworkloop
*kqwl
, int op
, kq_index_t qos
);
230 static void kqworkloop_request_help(struct kqworkloop
*kqwl
, kq_index_t qos_index
);
231 static int kqworkloop_end_processing(struct kqworkloop
*kqwl
, int flags
, int kevent_flags
);
233 static int knote_process(struct knote
*kn
, kevent_callback_t callback
, void *callback_data
,
234 struct filt_process_s
*process_data
);
236 static int kq_add_knote(struct kqueue
*kq
, struct knote
*kn
,
237 struct knote_lock_ctx
*knlc
, struct proc
*p
);
238 static struct knote
*kq_find_knote_and_kq_lock(struct kqueue
*kq
, struct kevent_internal_s
*kev
, bool is_fd
, struct proc
*p
);
240 static void knote_drop(struct kqueue
*kq
, struct knote
*kn
, struct knote_lock_ctx
*knlc
);
241 static struct knote
*knote_alloc(void);
242 static void knote_free(struct knote
*kn
);
244 static void knote_activate(struct knote
*kn
);
245 static void knote_deactivate(struct knote
*kn
);
247 static void knote_enable(struct knote
*kn
);
248 static void knote_disable(struct knote
*kn
);
250 static int knote_enqueue(struct knote
*kn
);
251 static void knote_dequeue(struct knote
*kn
);
253 static void knote_suppress(struct knote
*kn
);
254 static void knote_unsuppress(struct knote
*kn
);
255 static void knote_wakeup(struct knote
*kn
);
257 static bool knote_should_apply_qos_override(struct kqueue
*kq
, struct knote
*kn
,
258 int result
, thread_qos_t
*qos_out
);
259 static void knote_apply_qos_override(struct knote
*kn
, kq_index_t qos_index
);
260 static void knote_adjust_qos(struct kqueue
*kq
, struct knote
*kn
, int result
);
261 static void knote_reset_priority(struct knote
*kn
, pthread_priority_t pp
);
262 static kq_index_t
knote_get_qos_override_index(struct knote
*kn
);
263 static void knote_set_qos_overcommit(struct knote
*kn
);
265 static zone_t knote_zone
;
266 static zone_t kqfile_zone
;
267 static zone_t kqworkq_zone
;
268 static zone_t kqworkloop_zone
;
269 #if DEVELOPMENT || DEBUG
270 #define KEVENT_PANIC_ON_WORKLOOP_OWNERSHIP_LEAK (1U << 0)
271 #define KEVENT_PANIC_ON_NON_ENQUEUED_PROCESS (1U << 1)
272 #define KEVENT_PANIC_BOOT_ARG_INITIALIZED (1U << 31)
274 #define KEVENT_PANIC_DEFAULT_VALUE (0)
276 kevent_debug_flags(void)
278 static uint32_t flags
= KEVENT_PANIC_DEFAULT_VALUE
;
280 if ((flags
& KEVENT_PANIC_BOOT_ARG_INITIALIZED
) == 0) {
282 if (!PE_parse_boot_argn("kevent_debug", &value
, sizeof(value
))) {
283 value
= KEVENT_PANIC_DEFAULT_VALUE
;
285 value
|= KEVENT_PANIC_BOOT_ARG_INITIALIZED
;
286 os_atomic_store(&flags
, value
, relaxed
);
292 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
294 /* placeholder for not-yet-implemented filters */
295 static int filt_badattach(struct knote
*kn
, struct kevent_internal_s
*kev
);
296 static int filt_badevent(struct knote
*kn
, long hint
);
297 SECURITY_READ_ONLY_EARLY(static struct filterops
) bad_filtops
= {
298 .f_attach
= filt_badattach
,
301 #if CONFIG_MEMORYSTATUS
302 extern const struct filterops memorystatus_filtops
;
303 #endif /* CONFIG_MEMORYSTATUS */
304 extern const struct filterops fs_filtops
;
305 extern const struct filterops sig_filtops
;
306 extern const struct filterops machport_filtops
;
307 extern const struct filterops pipe_rfiltops
;
308 extern const struct filterops pipe_wfiltops
;
309 extern const struct filterops ptsd_kqops
;
310 extern const struct filterops ptmx_kqops
;
311 extern const struct filterops soread_filtops
;
312 extern const struct filterops sowrite_filtops
;
313 extern const struct filterops sock_filtops
;
314 extern const struct filterops soexcept_filtops
;
315 extern const struct filterops spec_filtops
;
316 extern const struct filterops bpfread_filtops
;
317 extern const struct filterops necp_fd_rfiltops
;
318 extern const struct filterops fsevent_filtops
;
319 extern const struct filterops vnode_filtops
;
320 extern const struct filterops tty_filtops
;
322 const static struct filterops file_filtops
;
323 const static struct filterops kqread_filtops
;
324 const static struct filterops proc_filtops
;
325 const static struct filterops timer_filtops
;
326 const static struct filterops user_filtops
;
327 const static struct filterops workloop_filtops
;
331 * Rules for adding new filters to the system:
333 * - Add a new "EVFILT_" option value to bsd/sys/event.h (typically a negative value)
334 * in the exported section of the header
335 * - Update the EVFILT_SYSCOUNT value to reflect the new addition
336 * - Add a filterops to the sysfilt_ops array. Public filters should be added at the end
337 * of the Public Filters section in the array.
339 * - Add a new "EVFILT_" value to bsd/sys/event.h (typically a positive value)
340 * in the XNU_KERNEL_PRIVATE section of the header
341 * - Update the EVFILTID_MAX value to reflect the new addition
342 * - Add a filterops to the sysfilt_ops. Private filters should be added at the end of
343 * the Private filters section of the array.
345 SECURITY_READ_ONLY_EARLY(static struct filterops
*) sysfilt_ops
[EVFILTID_MAX
] = {
347 [~EVFILT_READ
] = &file_filtops
,
348 [~EVFILT_WRITE
] = &file_filtops
,
349 [~EVFILT_AIO
] = &bad_filtops
,
350 [~EVFILT_VNODE
] = &file_filtops
,
351 [~EVFILT_PROC
] = &proc_filtops
,
352 [~EVFILT_SIGNAL
] = &sig_filtops
,
353 [~EVFILT_TIMER
] = &timer_filtops
,
354 [~EVFILT_MACHPORT
] = &machport_filtops
,
355 [~EVFILT_FS
] = &fs_filtops
,
356 [~EVFILT_USER
] = &user_filtops
,
358 [~EVFILT_VM
] = &bad_filtops
,
359 [~EVFILT_SOCK
] = &file_filtops
,
360 #if CONFIG_MEMORYSTATUS
361 [~EVFILT_MEMORYSTATUS
] = &memorystatus_filtops
,
363 [~EVFILT_MEMORYSTATUS
] = &bad_filtops
,
365 [~EVFILT_EXCEPT
] = &file_filtops
,
366 [~EVFILT_WORKLOOP
] = &workloop_filtops
,
368 /* Private filters */
369 [EVFILTID_KQREAD
] = &kqread_filtops
,
370 [EVFILTID_PIPE_R
] = &pipe_rfiltops
,
371 [EVFILTID_PIPE_W
] = &pipe_wfiltops
,
372 [EVFILTID_PTSD
] = &ptsd_kqops
,
373 [EVFILTID_SOREAD
] = &soread_filtops
,
374 [EVFILTID_SOWRITE
] = &sowrite_filtops
,
375 [EVFILTID_SCK
] = &sock_filtops
,
376 [EVFILTID_SOEXCEPT
] = &soexcept_filtops
,
377 [EVFILTID_SPEC
] = &spec_filtops
,
378 [EVFILTID_BPFREAD
] = &bpfread_filtops
,
379 [EVFILTID_NECP_FD
] = &necp_fd_rfiltops
,
380 [EVFILTID_FSEVENT
] = &fsevent_filtops
,
381 [EVFILTID_VN
] = &vnode_filtops
,
382 [EVFILTID_TTY
] = &tty_filtops
,
383 [EVFILTID_PTMX
] = &ptmx_kqops
,
386 /* waitq prepost callback */
387 void waitq_set__CALLING_PREPOST_HOOK__(void *kq_hook
, void *knote_hook
, int qos
);
389 static inline struct kqworkloop
*
390 kqr_kqworkloop(struct kqrequest
*kqr
)
392 if (kqr
->kqr_state
& KQR_WORKLOOP
) {
393 return __container_of(kqr
, struct kqworkloop
, kqwl_request
);
398 static inline kqueue_t
399 kqr_kqueue(proc_t p
, struct kqrequest
*kqr
)
402 if (kqr
->kqr_state
& KQR_WORKLOOP
) {
403 kqu
.kqwl
= kqr_kqworkloop(kqr
);
405 kqu
.kqwq
= (struct kqworkq
*)p
->p_fd
->fd_wqkqueue
;
406 assert(kqr
>= kqu
.kqwq
->kqwq_request
&&
407 kqr
< kqu
.kqwq
->kqwq_request
+ KQWQ_NBUCKETS
);
412 static inline boolean_t
413 is_workqueue_thread(thread_t thread
)
415 return thread_get_tag(thread
) & THREAD_TAG_WORKQUEUE
;
419 * kqueue/note lock implementations
421 * The kqueue lock guards the kq state, the state of its queues,
422 * and the kqueue-aware status and locks of individual knotes.
424 * The kqueue workq lock is used to protect state guarding the
425 * interaction of the kqueue with the workq. This state cannot
426 * be guarded by the kq lock - as it needs to be taken when we
427 * already have the waitq set lock held (during the waitq hook
428 * callback). It might be better to use the waitq lock itself
429 * for this, but the IRQ requirements make that difficult).
431 * Knote flags, filter flags, and associated data are protected
432 * by the underlying object lock - and are only ever looked at
433 * by calling the filter to get a [consistent] snapshot of that
436 static lck_grp_attr_t
*kq_lck_grp_attr
;
437 static lck_grp_t
*kq_lck_grp
;
438 static lck_attr_t
*kq_lck_attr
;
443 lck_spin_lock(&kqu
.kq
->kq_lock
);
447 kqlock_held(__assert_only kqueue_t kqu
)
449 LCK_SPIN_ASSERT(&kqu
.kq
->kq_lock
, LCK_ASSERT_OWNED
);
453 kqunlock(kqueue_t kqu
)
455 lck_spin_unlock(&kqu
.kq
->kq_lock
);
459 kq_req_lock(kqueue_t kqu
)
461 assert(kqu
.kq
->kq_state
& (KQ_WORKLOOP
| KQ_WORKQ
));
462 lck_spin_lock(&kqu
.kq
->kq_reqlock
);
466 kq_req_unlock(kqueue_t kqu
)
468 assert(kqu
.kq
->kq_state
& (KQ_WORKLOOP
| KQ_WORKQ
));
469 lck_spin_unlock(&kqu
.kq
->kq_reqlock
);
473 kq_req_held(__assert_only kqueue_t kqu
)
475 assert(kqu
.kq
->kq_state
& (KQ_WORKLOOP
| KQ_WORKQ
));
476 LCK_SPIN_ASSERT(&kqu
.kq
->kq_reqlock
, LCK_ASSERT_OWNED
);
480 knhash_lock(proc_t p
)
482 lck_mtx_lock(&p
->p_fd
->fd_knhashlock
);
486 knhash_unlock(proc_t p
)
488 lck_mtx_unlock(&p
->p_fd
->fd_knhashlock
);
491 #pragma mark knote locks
494 * Enum used by the knote_lock_* functions.
496 * KNOTE_KQ_LOCK_ALWAYS
497 * The function will always return with the kq lock held.
499 * KNOTE_KQ_UNLOCK_ON_SUCCESS
500 * The function will return with the kq lock held if it was successful
501 * (knote_lock() is the only function that can fail).
503 * KNOTE_KQ_UNLOCK_ON_FAILURE
504 * The function will return with the kq lock held if it was unsuccessful
505 * (knote_lock() is the only function that can fail).
508 * The function returns with the kq unlocked.
510 #define KNOTE_KQ_LOCK_ALWAYS 0x0
511 #define KNOTE_KQ_LOCK_ON_SUCCESS 0x1
512 #define KNOTE_KQ_LOCK_ON_FAILURE 0x2
513 #define KNOTE_KQ_UNLOCK 0x3
515 #if DEBUG || DEVELOPMENT
516 __attribute__((noinline
, not_tail_called
, disable_tail_calls
))
518 knote_lock_ctx_chk(struct knote_lock_ctx
*knlc
)
520 /* evil hackery to make sure no one forgets to unlock */
521 assert(knlc
->knlc_state
== KNOTE_LOCK_CTX_UNLOCKED
);
525 static struct knote_lock_ctx
*
526 knote_lock_ctx_find(struct kqueue
*kq
, struct knote
*kn
)
528 struct knote_lock_ctx
*ctx
;
529 LIST_FOREACH(ctx
, &kq
->kq_knlocks
, knlc_le
) {
530 if (ctx
->knlc_knote
== kn
) {
534 panic("knote lock context not found: %p", kn
);
538 /* slowpath of knote_lock() */
539 __attribute__((noinline
))
540 static bool __result_use_check
541 knote_lock_slow(struct kqueue
*kq
, struct knote
*kn
,
542 struct knote_lock_ctx
*knlc
, int kqlocking
)
546 struct knote_lock_ctx
*owner_lc
= knote_lock_ctx_find(kq
, kn
);
547 thread_t owner_thread
= owner_lc
->knlc_thread
;
549 #if DEBUG || DEVELOPMENT
550 knlc
->knlc_state
= KNOTE_LOCK_CTX_WAITING
;
553 thread_reference(owner_thread
);
554 TAILQ_INSERT_TAIL(&owner_lc
->knlc_head
, knlc
, knlc_tqe
);
555 assert_wait(&kn
->kn_status
, THREAD_UNINT
| THREAD_WAIT_NOREPORT
);
558 if (thread_handoff_deallocate(owner_thread
) == THREAD_RESTART
) {
559 if (kqlocking
== KNOTE_KQ_LOCK_ALWAYS
||
560 kqlocking
== KNOTE_KQ_LOCK_ON_FAILURE
) {
563 #if DEBUG || DEVELOPMENT
564 assert(knlc
->knlc_state
== KNOTE_LOCK_CTX_WAITING
);
565 knlc
->knlc_state
= KNOTE_LOCK_CTX_UNLOCKED
;
569 #if DEBUG || DEVELOPMENT
570 assert(knlc
->knlc_state
== KNOTE_LOCK_CTX_LOCKED
);
572 if (kqlocking
== KNOTE_KQ_LOCK_ALWAYS
||
573 kqlocking
== KNOTE_KQ_LOCK_ON_SUCCESS
) {
580 * Attempts to take the "knote" lock.
582 * Called with the kqueue lock held.
584 * Returns true if the knote lock is acquired, false if it has been dropped
586 static bool __result_use_check
587 knote_lock(struct kqueue
*kq
, struct knote
*kn
, struct knote_lock_ctx
*knlc
,
592 #if DEBUG || DEVELOPMENT
593 assert(knlc
->knlc_state
== KNOTE_LOCK_CTX_UNLOCKED
);
595 knlc
->knlc_knote
= kn
;
596 knlc
->knlc_thread
= current_thread();
597 TAILQ_INIT(&knlc
->knlc_head
);
599 if (__improbable(kn
->kn_status
& KN_LOCKED
)) {
600 return knote_lock_slow(kq
, kn
, knlc
, kqlocking
);
604 * When the knote will be dropped, the knote lock is taken before
605 * KN_DROPPING is set, and then the knote will be removed from any
606 * hash table that references it before the lock is canceled.
608 assert((kn
->kn_status
& KN_DROPPING
) == 0);
609 LIST_INSERT_HEAD(&kq
->kq_knlocks
, knlc
, knlc_le
);
610 kn
->kn_status
|= KN_LOCKED
;
611 #if DEBUG || DEVELOPMENT
612 knlc
->knlc_state
= KNOTE_LOCK_CTX_LOCKED
;
615 if (kqlocking
== KNOTE_KQ_UNLOCK
||
616 kqlocking
== KNOTE_KQ_LOCK_ON_FAILURE
) {
623 * Unlocks a knote successfully locked with knote_lock().
625 * Called with the kqueue lock held.
627 * Returns with the kqueue lock held according to KNOTE_KQ_* flags
630 knote_unlock(struct kqueue
*kq
, struct knote
*kn
,
631 struct knote_lock_ctx
*knlc
, int flags
)
635 assert(knlc
->knlc_knote
== kn
);
636 assert(kn
->kn_status
& KN_LOCKED
);
637 #if DEBUG || DEVELOPMENT
638 assert(knlc
->knlc_state
== KNOTE_LOCK_CTX_LOCKED
);
641 struct knote_lock_ctx
*next_owner_lc
= TAILQ_FIRST(&knlc
->knlc_head
);
643 LIST_REMOVE(knlc
, knlc_le
);
646 assert(next_owner_lc
->knlc_knote
== kn
);
647 TAILQ_REMOVE(&knlc
->knlc_head
, next_owner_lc
, knlc_tqe
);
649 assert(TAILQ_EMPTY(&next_owner_lc
->knlc_head
));
650 TAILQ_CONCAT(&next_owner_lc
->knlc_head
, &knlc
->knlc_head
, knlc_tqe
);
651 LIST_INSERT_HEAD(&kq
->kq_knlocks
, next_owner_lc
, knlc_le
);
652 #if DEBUG || DEVELOPMENT
653 next_owner_lc
->knlc_state
= KNOTE_LOCK_CTX_LOCKED
;
656 kn
->kn_status
&= ~KN_LOCKED
;
658 if (kn
->kn_inuse
== 0) {
660 * No f_event() in flight anymore, we can leave QoS "Merge" mode
662 * See knote_should_apply_qos_override()
664 kn
->kn_status
&= ~KN_MERGE_QOS
;
666 if (flags
& KNOTE_KQ_UNLOCK
) {
670 thread_wakeup_thread(&kn
->kn_status
, next_owner_lc
->knlc_thread
);
672 #if DEBUG || DEVELOPMENT
673 knlc
->knlc_state
= KNOTE_LOCK_CTX_UNLOCKED
;
678 * Aborts all waiters for a knote lock, and unlock the knote.
680 * Called with the kqueue lock held.
682 * Returns with the kqueue lock held according to KNOTE_KQ_* flags
685 knote_unlock_cancel(struct kqueue
*kq
, struct knote
*kn
,
686 struct knote_lock_ctx
*knlc
, int kqlocking
)
690 assert(knlc
->knlc_knote
== kn
);
691 assert(kn
->kn_status
& KN_LOCKED
);
692 assert(kn
->kn_status
& KN_DROPPING
);
694 LIST_REMOVE(knlc
, knlc_le
);
695 kn
->kn_status
&= ~KN_LOCKED
;
697 if (kqlocking
== KNOTE_KQ_UNLOCK
||
698 kqlocking
== KNOTE_KQ_LOCK_ON_FAILURE
) {
701 if (!TAILQ_EMPTY(&knlc
->knlc_head
)) {
702 thread_wakeup_with_result(&kn
->kn_status
, THREAD_RESTART
);
704 #if DEBUG || DEVELOPMENT
705 knlc
->knlc_state
= KNOTE_LOCK_CTX_UNLOCKED
;
710 * Call the f_event hook of a given filter.
712 * Takes a use count to protect against concurrent drops.
715 knote_call_filter_event(struct kqueue
*kq
, struct knote
*kn
, long hint
)
717 int result
, dropping
= 0;
721 if (kn
->kn_status
& (KN_DROPPING
| KN_VANISHED
)) {
727 result
= filter_call(knote_fops(kn
), f_event(kn
, hint
));
730 dropping
= (kn
->kn_status
& KN_DROPPING
);
732 if (!dropping
&& (result
& FILTER_ACTIVE
)) {
733 if (result
& FILTER_ADJUST_EVENT_QOS_BIT
) {
734 knote_adjust_qos(kq
, kn
, result
);
739 if (--kn
->kn_inuse
== 0) {
740 if ((kn
->kn_status
& KN_LOCKED
) == 0) {
742 * We're the last f_event() call and there's no other f_* call in
743 * flight, we can leave QoS "Merge" mode.
745 * See knote_should_apply_qos_override()
747 kn
->kn_status
&= ~KN_MERGE_QOS
;
750 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
751 CAST_EVENT64_T(&kn
->kn_inuse
),
752 THREAD_AWAKENED
, WAITQ_ALL_PRIORITIES
);
758 * Called by knote_drop() to wait for the last f_event() caller to be done.
760 * - kq locked at entry
761 * - kq unlocked at exit
764 knote_wait_for_filter_events(struct kqueue
*kq
, struct knote
*kn
)
766 wait_result_t wr
= THREAD_NOT_WAITING
;
770 assert(kn
->kn_status
& KN_DROPPING
);
773 wr
= waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
774 CAST_EVENT64_T(&kn
->kn_inuse
),
775 THREAD_UNINT
| THREAD_WAIT_NOREPORT
, TIMEOUT_WAIT_FOREVER
);
778 if (wr
== THREAD_WAITING
) {
779 thread_block(THREAD_CONTINUE_NULL
);
783 #pragma mark file_filtops
786 filt_fileattach(struct knote
*kn
, struct kevent_internal_s
*kev
)
788 return fo_kqfilter(kn
->kn_fp
, kn
, kev
, vfs_context_current());
791 SECURITY_READ_ONLY_EARLY(static struct filterops
) file_filtops
= {
793 .f_attach
= filt_fileattach
,
796 #pragma mark kqread_filtops
798 #define f_flag f_fglob->fg_flag
799 #define f_ops f_fglob->fg_ops
800 #define f_data f_fglob->fg_data
801 #define f_lflags f_fglob->fg_lflags
804 filt_kqdetach(struct knote
*kn
)
806 struct kqfile
*kqf
= (struct kqfile
*)kn
->kn_fp
->f_data
;
807 struct kqueue
*kq
= &kqf
->kqf_kqueue
;
810 KNOTE_DETACH(&kqf
->kqf_sel
.si_note
, kn
);
815 filt_kqueue(struct knote
*kn
, __unused
long hint
)
817 struct kqueue
*kq
= (struct kqueue
*)kn
->kn_fp
->f_data
;
819 return kq
->kq_count
> 0;
823 filt_kqtouch(struct knote
*kn
, struct kevent_internal_s
*kev
)
826 struct kqueue
*kq
= (struct kqueue
*)kn
->kn_fp
->f_data
;
830 kn
->kn_data
= kq
->kq_count
;
831 res
= (kn
->kn_data
> 0);
839 filt_kqprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
)
842 struct kqueue
*kq
= (struct kqueue
*)kn
->kn_fp
->f_data
;
846 kn
->kn_data
= kq
->kq_count
;
847 res
= (kn
->kn_data
> 0);
849 *kev
= kn
->kn_kevent
;
850 if (kn
->kn_flags
& EV_CLEAR
) {
859 SECURITY_READ_ONLY_EARLY(static struct filterops
) kqread_filtops
= {
861 .f_detach
= filt_kqdetach
,
862 .f_event
= filt_kqueue
,
863 .f_touch
= filt_kqtouch
,
864 .f_process
= filt_kqprocess
,
867 #pragma mark proc_filtops
870 filt_procattach(struct knote
*kn
, __unused
struct kevent_internal_s
*kev
)
874 assert(PID_MAX
< NOTE_PDATAMASK
);
876 if ((kn
->kn_sfflags
& (NOTE_TRACK
| NOTE_TRACKERR
| NOTE_CHILD
)) != 0) {
877 knote_set_error(kn
, ENOTSUP
);
881 p
= proc_find(kn
->kn_id
);
883 knote_set_error(kn
, ESRCH
);
887 const int NoteExitStatusBits
= NOTE_EXIT
| NOTE_EXITSTATUS
;
889 if ((kn
->kn_sfflags
& NoteExitStatusBits
) == NoteExitStatusBits
) {
891 pid_t selfpid
= proc_selfpid();
893 if (p
->p_ppid
== selfpid
) {
894 break; /* parent => ok */
896 if ((p
->p_lflag
& P_LTRACED
) != 0 &&
897 (p
->p_oppid
== selfpid
)) {
898 break; /* parent-in-waiting => ok */
901 knote_set_error(kn
, EACCES
);
908 kn
->kn_ptr
.p_proc
= p
; /* store the proc handle */
910 KNOTE_ATTACH(&p
->p_klist
, kn
);
917 * only captures edge-triggered events after this point
918 * so it can't already be fired.
925 * The knote may be attached to a different process, which may exit,
926 * leaving nothing for the knote to be attached to. In that case,
927 * the pointer to the process will have already been nulled out.
930 filt_procdetach(struct knote
*kn
)
936 p
= kn
->kn_ptr
.p_proc
;
937 if (p
!= PROC_NULL
) {
938 kn
->kn_ptr
.p_proc
= PROC_NULL
;
939 KNOTE_DETACH(&p
->p_klist
, kn
);
946 filt_proc(struct knote
*kn
, long hint
)
950 /* ALWAYS CALLED WITH proc_klist_lock */
953 * Note: a lot of bits in hint may be obtained from the knote
954 * To free some of those bits, see <rdar://problem/12592988> Freeing up
955 * bits in hint for filt_proc
957 * mask off extra data
959 event
= (u_int
)hint
& NOTE_PCTRLMASK
;
962 * termination lifecycle events can happen while a debugger
963 * has reparented a process, in which case notifications
964 * should be quashed except to the tracing parent. When
965 * the debugger reaps the child (either via wait4(2) or
966 * process exit), the child will be reparented to the original
967 * parent and these knotes re-fired.
969 if (event
& NOTE_EXIT
) {
970 if ((kn
->kn_ptr
.p_proc
->p_oppid
!= 0)
971 && (knote_get_kq(kn
)->kq_p
->p_pid
!= kn
->kn_ptr
.p_proc
->p_ppid
)) {
973 * This knote is not for the current ptrace(2) parent, ignore.
980 * if the user is interested in this event, record it.
982 if (kn
->kn_sfflags
& event
) {
983 kn
->kn_fflags
|= event
;
986 #pragma clang diagnostic push
987 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
988 if ((event
== NOTE_REAP
) || ((event
== NOTE_EXIT
) && !(kn
->kn_sfflags
& NOTE_REAP
))) {
989 kn
->kn_flags
|= (EV_EOF
| EV_ONESHOT
);
991 #pragma clang diagnostic pop
995 * The kernel has a wrapper in place that returns the same data
996 * as is collected here, in kn_data. Any changes to how
997 * NOTE_EXITSTATUS and NOTE_EXIT_DETAIL are collected
998 * should also be reflected in the proc_pidnoteexit() wrapper.
1000 if (event
== NOTE_EXIT
) {
1002 if ((kn
->kn_sfflags
& NOTE_EXITSTATUS
) != 0) {
1003 kn
->kn_fflags
|= NOTE_EXITSTATUS
;
1004 kn
->kn_data
|= (hint
& NOTE_PDATAMASK
);
1006 if ((kn
->kn_sfflags
& NOTE_EXIT_DETAIL
) != 0) {
1007 kn
->kn_fflags
|= NOTE_EXIT_DETAIL
;
1008 if ((kn
->kn_ptr
.p_proc
->p_lflag
&
1009 P_LTERM_DECRYPTFAIL
) != 0) {
1010 kn
->kn_data
|= NOTE_EXIT_DECRYPTFAIL
;
1012 if ((kn
->kn_ptr
.p_proc
->p_lflag
&
1013 P_LTERM_JETSAM
) != 0) {
1014 kn
->kn_data
|= NOTE_EXIT_MEMORY
;
1015 switch (kn
->kn_ptr
.p_proc
->p_lflag
& P_JETSAM_MASK
) {
1016 case P_JETSAM_VMPAGESHORTAGE
:
1017 kn
->kn_data
|= NOTE_EXIT_MEMORY_VMPAGESHORTAGE
;
1019 case P_JETSAM_VMTHRASHING
:
1020 kn
->kn_data
|= NOTE_EXIT_MEMORY_VMTHRASHING
;
1022 case P_JETSAM_FCTHRASHING
:
1023 kn
->kn_data
|= NOTE_EXIT_MEMORY_FCTHRASHING
;
1025 case P_JETSAM_VNODE
:
1026 kn
->kn_data
|= NOTE_EXIT_MEMORY_VNODE
;
1028 case P_JETSAM_HIWAT
:
1029 kn
->kn_data
|= NOTE_EXIT_MEMORY_HIWAT
;
1032 kn
->kn_data
|= NOTE_EXIT_MEMORY_PID
;
1034 case P_JETSAM_IDLEEXIT
:
1035 kn
->kn_data
|= NOTE_EXIT_MEMORY_IDLE
;
1039 if ((kn
->kn_ptr
.p_proc
->p_csflags
&
1041 kn
->kn_data
|= NOTE_EXIT_CSERROR
;
1046 /* if we have any matching state, activate the knote */
1047 return kn
->kn_fflags
!= 0;
1051 filt_proctouch(struct knote
*kn
, struct kevent_internal_s
*kev
)
1057 /* accept new filter flags and mask off output events no long interesting */
1058 kn
->kn_sfflags
= kev
->fflags
;
1060 /* restrict the current results to the (smaller?) set of new interest */
1062 * For compatibility with previous implementations, we leave kn_fflags
1063 * as they were before.
1065 //kn->kn_fflags &= kn->kn_sfflags;
1067 res
= (kn
->kn_fflags
!= 0);
1069 proc_klist_unlock();
1075 filt_procprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
)
1077 #pragma unused(data)
1081 res
= (kn
->kn_fflags
!= 0);
1083 *kev
= kn
->kn_kevent
;
1084 kn
->kn_flags
|= EV_CLEAR
; /* automatically set */
1088 proc_klist_unlock();
1092 SECURITY_READ_ONLY_EARLY(static struct filterops
) proc_filtops
= {
1093 .f_attach
= filt_procattach
,
1094 .f_detach
= filt_procdetach
,
1095 .f_event
= filt_proc
,
1096 .f_touch
= filt_proctouch
,
1097 .f_process
= filt_procprocess
,
1100 #pragma mark timer_filtops
1102 struct filt_timer_params
{
1103 uint64_t deadline
; /* deadline in abs/cont time
1104 * (or 0 if NOTE_ABSOLUTE and deadline is in past) */
1105 uint64_t leeway
; /* leeway in abstime, or 0 if none */
1106 uint64_t interval
; /* interval in abstime or 0 if non-repeating timer */
1110 * Values stored in the knote at rest (using Mach absolute time units)
1112 * kn->kn_hook where the thread_call object is stored
1113 * kn->kn_ext[0] next deadline or 0 if immediate expiration
1114 * kn->kn_ext[1] leeway value
1115 * kn->kn_sdata interval timer: the interval
1116 * absolute/deadline timer: 0
1117 * kn->kn_hookid timer state
1120 * The timer has either never been scheduled or been cancelled.
1121 * It is safe to schedule a new one in this state.
1124 * The timer has been scheduled
1127 * The timer has fired and an event needs to be delivered.
1128 * When in this state, the callout may still be running.
1131 * The timer has fired at registration time, and the callout was never
1134 #define TIMER_IDLE 0x0
1135 #define TIMER_ARMED 0x1
1136 #define TIMER_FIRED 0x2
1137 #define TIMER_IMMEDIATE 0x3
1140 filt_timer_set_params(struct knote
*kn
, struct filt_timer_params
*params
)
1142 kn
->kn_ext
[0] = params
->deadline
;
1143 kn
->kn_ext
[1] = params
->leeway
;
1144 kn
->kn_sdata
= params
->interval
;
1148 * filt_timervalidate - process data from user
1150 * Sets up the deadline, interval, and leeway from the provided user data
1153 * kn_sdata timer deadline or interval time
1154 * kn_sfflags style of timer, unit of measurement
1157 * struct filter_timer_params to apply to the filter with
1158 * filt_timer_set_params when changes are ready to be commited.
1161 * EINVAL Invalid user data parameters
1162 * ERANGE Various overflows with the parameters
1164 * Called with timer filter lock held.
1167 filt_timervalidate(const struct kevent_internal_s
*kev
,
1168 struct filt_timer_params
*params
)
1171 * There are 5 knobs that need to be chosen for a timer registration:
1173 * A) Units of time (what is the time duration of the specified number)
1174 * Absolute and interval take:
1175 * NOTE_SECONDS, NOTE_USECONDS, NOTE_NSECONDS, NOTE_MACHTIME
1176 * Defaults to milliseconds if not specified
1178 * B) Clock epoch (what is the zero point of the specified number)
1179 * For interval, there is none
1180 * For absolute, defaults to the gettimeofday/calendar epoch
1181 * With NOTE_MACHTIME, uses mach_absolute_time()
1182 * With NOTE_MACHTIME and NOTE_MACH_CONTINUOUS_TIME, uses mach_continuous_time()
1184 * C) The knote's behavior on delivery
1185 * Interval timer causes the knote to arm for the next interval unless one-shot is set
1186 * Absolute is a forced one-shot timer which deletes on delivery
1187 * TODO: Add a way for absolute to be not forced one-shot
1189 * D) Whether the time duration is relative to now or absolute
1190 * Interval fires at now + duration when it is set up
1191 * Absolute fires at now + difference between now walltime and passed in walltime
1192 * With NOTE_MACHTIME it fires at an absolute MAT or MCT.
1194 * E) Whether the timer continues to tick across sleep
1195 * By default all three do not.
1196 * For interval and absolute, NOTE_MACH_CONTINUOUS_TIME causes them to tick across sleep
1197 * With NOTE_ABSOLUTE | NOTE_MACHTIME | NOTE_MACH_CONTINUOUS_TIME:
1198 * expires when mach_continuous_time() is > the passed in value.
1201 uint64_t multiplier
;
1203 boolean_t use_abstime
= FALSE
;
1205 switch (kev
->fflags
& (NOTE_SECONDS
| NOTE_USECONDS
| NOTE_NSECONDS
| NOTE_MACHTIME
)) {
1207 multiplier
= NSEC_PER_SEC
;
1210 multiplier
= NSEC_PER_USEC
;
1219 case 0: /* milliseconds (default) */
1220 multiplier
= NSEC_PER_SEC
/ 1000;
1226 /* transform the leeway in kn_ext[1] to same time scale */
1227 if (kev
->fflags
& NOTE_LEEWAY
) {
1228 uint64_t leeway_abs
;
1231 leeway_abs
= (uint64_t)kev
->ext
[1];
1234 if (os_mul_overflow((uint64_t)kev
->ext
[1], multiplier
, &leeway_ns
)) {
1238 nanoseconds_to_absolutetime(leeway_ns
, &leeway_abs
);
1241 params
->leeway
= leeway_abs
;
1246 if (kev
->fflags
& NOTE_ABSOLUTE
) {
1247 uint64_t deadline_abs
;
1250 deadline_abs
= (uint64_t)kev
->data
;
1252 uint64_t calendar_deadline_ns
;
1254 if (os_mul_overflow((uint64_t)kev
->data
, multiplier
, &calendar_deadline_ns
)) {
1258 /* calendar_deadline_ns is in nanoseconds since the epoch */
1260 clock_sec_t seconds
;
1261 clock_nsec_t nanoseconds
;
1264 * Note that the conversion through wall-time is only done once.
1266 * If the relationship between MAT and gettimeofday changes,
1267 * the underlying timer does not update.
1269 * TODO: build a wall-time denominated timer_call queue
1270 * and a flag to request DTRTing with wall-time timers
1272 clock_get_calendar_nanotime(&seconds
, &nanoseconds
);
1274 uint64_t calendar_now_ns
= (uint64_t)seconds
* NSEC_PER_SEC
+ nanoseconds
;
1276 /* if deadline is in the future */
1277 if (calendar_now_ns
< calendar_deadline_ns
) {
1278 uint64_t interval_ns
= calendar_deadline_ns
- calendar_now_ns
;
1279 uint64_t interval_abs
;
1281 nanoseconds_to_absolutetime(interval_ns
, &interval_abs
);
1284 * Note that the NOTE_MACH_CONTINUOUS_TIME flag here only
1285 * causes the timer to keep ticking across sleep, but
1286 * it does not change the calendar timebase.
1289 if (kev
->fflags
& NOTE_MACH_CONTINUOUS_TIME
) {
1290 clock_continuoustime_interval_to_deadline(interval_abs
,
1293 clock_absolutetime_interval_to_deadline(interval_abs
,
1297 deadline_abs
= 0; /* cause immediate expiration */
1301 params
->deadline
= deadline_abs
;
1302 params
->interval
= 0; /* NOTE_ABSOLUTE is non-repeating */
1303 } else if (kev
->data
< 0) {
1305 * Negative interval timers fire immediately, once.
1307 * Ideally a negative interval would be an error, but certain clients
1308 * pass negative values on accident, and expect an event back.
1310 * In the old implementation the timer would repeat with no delay
1311 * N times until mach_absolute_time() + (N * interval) underflowed,
1312 * then it would wait ~forever by accidentally arming a timer for the far future.
1314 * We now skip the power-wasting hot spin phase and go straight to the idle phase.
1317 params
->deadline
= 0; /* expire immediately */
1318 params
->interval
= 0; /* non-repeating */
1320 uint64_t interval_abs
= 0;
1323 interval_abs
= (uint64_t)kev
->data
;
1325 uint64_t interval_ns
;
1326 if (os_mul_overflow((uint64_t)kev
->data
, multiplier
, &interval_ns
)) {
1330 nanoseconds_to_absolutetime(interval_ns
, &interval_abs
);
1333 uint64_t deadline
= 0;
1335 if (kev
->fflags
& NOTE_MACH_CONTINUOUS_TIME
) {
1336 clock_continuoustime_interval_to_deadline(interval_abs
, &deadline
);
1338 clock_absolutetime_interval_to_deadline(interval_abs
, &deadline
);
1341 params
->deadline
= deadline
;
1342 params
->interval
= interval_abs
;
1349 * filt_timerexpire - the timer callout routine
1352 filt_timerexpire(void *knx
, __unused
void *spare
)
1354 struct knote
*kn
= knx
;
1357 if (os_atomic_cmpxchgv(&kn
->kn_hookid
, TIMER_ARMED
, TIMER_FIRED
,
1359 // our f_event always would say FILTER_ACTIVE,
1360 // so be leaner and just do it.
1361 struct kqueue
*kq
= knote_get_kq(kn
);
1367 * From TIMER_ARMED, the only allowed transition are:
1368 * - to TIMER_FIRED through the timer callout just above
1369 * - to TIMER_IDLE due to filt_timercancel() which will wait for the
1370 * timer callout (and any possible invocation of filt_timerexpire) to
1371 * have finished before the state is changed again.
1373 assert(v
== TIMER_IDLE
);
1378 filt_timercancel(struct knote
*kn
)
1380 if (os_atomic_xchg(&kn
->kn_hookid
, TIMER_IDLE
, relaxed
) == TIMER_ARMED
) {
1381 /* cancel the thread call and wait for any filt_timerexpire in flight */
1382 thread_call_cancel_wait((thread_call_t
)kn
->kn_hook
);
1387 * Does this deadline needs a timer armed for it, or has it expired?
1390 filt_timer_is_ready(struct knote
*kn
)
1392 uint64_t now
, deadline
= kn
->kn_ext
[0];
1394 if (deadline
== 0) {
1398 if (kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
) {
1399 now
= mach_continuous_time();
1401 now
= mach_absolute_time();
1403 return deadline
<= now
;
1409 * It is the responsibility of the caller to make sure the timer call
1410 * has completed or been cancelled properly prior to arming it.
1413 filt_timerarm(struct knote
*kn
)
1415 uint64_t deadline
= kn
->kn_ext
[0];
1416 uint64_t leeway
= kn
->kn_ext
[1];
1418 int filter_flags
= kn
->kn_sfflags
;
1419 unsigned int timer_flags
= 0;
1421 assert(os_atomic_load(&kn
->kn_hookid
, relaxed
) == TIMER_IDLE
);
1423 if (filter_flags
& NOTE_CRITICAL
) {
1424 timer_flags
|= THREAD_CALL_DELAY_USER_CRITICAL
;
1425 } else if (filter_flags
& NOTE_BACKGROUND
) {
1426 timer_flags
|= THREAD_CALL_DELAY_USER_BACKGROUND
;
1428 timer_flags
|= THREAD_CALL_DELAY_USER_NORMAL
;
1431 if (filter_flags
& NOTE_LEEWAY
) {
1432 timer_flags
|= THREAD_CALL_DELAY_LEEWAY
;
1435 if (filter_flags
& NOTE_MACH_CONTINUOUS_TIME
) {
1436 timer_flags
|= THREAD_CALL_CONTINUOUS
;
1439 os_atomic_store(&kn
->kn_hookid
, TIMER_ARMED
, relaxed
);
1440 thread_call_enter_delayed_with_leeway((thread_call_t
)kn
->kn_hook
, NULL
,
1441 deadline
, leeway
, timer_flags
);
1445 * Allocate a thread call for the knote's lifetime, and kick off the timer.
1448 filt_timerattach(struct knote
*kn
, struct kevent_internal_s
*kev
)
1450 thread_call_t callout
;
1451 struct filt_timer_params params
;
1454 if ((error
= filt_timervalidate(kev
, ¶ms
)) != 0) {
1455 knote_set_error(kn
, error
);
1459 callout
= thread_call_allocate_with_options(filt_timerexpire
,
1460 (thread_call_param_t
)kn
, THREAD_CALL_PRIORITY_HIGH
,
1461 THREAD_CALL_OPTIONS_ONCE
);
1463 if (NULL
== callout
) {
1464 knote_set_error(kn
, ENOMEM
);
1468 filt_timer_set_params(kn
, ¶ms
);
1469 kn
->kn_hook
= callout
;
1470 kn
->kn_flags
|= EV_CLEAR
;
1471 os_atomic_store(&kn
->kn_hookid
, TIMER_IDLE
, relaxed
);
1473 /* NOTE_ABSOLUTE implies EV_ONESHOT */
1474 if (kn
->kn_sfflags
& NOTE_ABSOLUTE
) {
1475 kn
->kn_flags
|= EV_ONESHOT
;
1478 if (filt_timer_is_ready(kn
)) {
1479 os_atomic_store(&kn
->kn_hookid
, TIMER_IMMEDIATE
, relaxed
);
1480 return FILTER_ACTIVE
;
1488 * Shut down the timer if it's running, and free the callout.
1491 filt_timerdetach(struct knote
*kn
)
1493 __assert_only boolean_t freed
;
1496 * Unconditionally cancel to make sure there can't be any filt_timerexpire()
1499 thread_call_cancel_wait((thread_call_t
)kn
->kn_hook
);
1500 freed
= thread_call_free((thread_call_t
)kn
->kn_hook
);
1505 * filt_timertouch - update timer knote with new user input
1507 * Cancel and restart the timer based on new user data. When
1508 * the user picks up a knote, clear the count of how many timer
1509 * pops have gone off (in kn_data).
1512 filt_timertouch(struct knote
*kn
, struct kevent_internal_s
*kev
)
1514 struct filt_timer_params params
;
1515 uint32_t changed_flags
= (kn
->kn_sfflags
^ kev
->fflags
);
1518 if (changed_flags
& NOTE_ABSOLUTE
) {
1519 kev
->flags
|= EV_ERROR
;
1524 if ((error
= filt_timervalidate(kev
, ¶ms
)) != 0) {
1525 kev
->flags
|= EV_ERROR
;
1530 /* capture the new values used to compute deadline */
1531 filt_timercancel(kn
);
1532 filt_timer_set_params(kn
, ¶ms
);
1533 kn
->kn_sfflags
= kev
->fflags
;
1535 if (filt_timer_is_ready(kn
)) {
1536 os_atomic_store(&kn
->kn_hookid
, TIMER_IMMEDIATE
, relaxed
);
1537 return FILTER_ACTIVE
| FILTER_UPDATE_REQ_QOS
;
1540 return FILTER_UPDATE_REQ_QOS
;
1545 * filt_timerprocess - query state of knote and snapshot event data
1547 * Determine if the timer has fired in the past, snapshot the state
1548 * of the kevent for returning to user-space, and clear pending event
1549 * counters for the next time.
1554 __unused
struct filt_process_s
*data
,
1555 struct kevent_internal_s
*kev
)
1558 * filt_timerprocess is serialized with any filter routine except for
1559 * filt_timerexpire which atomically does a TIMER_ARMED -> TIMER_FIRED
1560 * transition, and on success, activates the knote.
1562 * Hence, we don't need atomic modifications of the state, only to peek at
1563 * whether we see any of the "FIRED" state, and if we do, it is safe to
1564 * do simple state machine transitions.
1566 switch (os_atomic_load(&kn
->kn_hookid
, relaxed
)) {
1570 * This can happen if a touch resets a timer that had fired
1571 * without being processed
1576 os_atomic_store(&kn
->kn_hookid
, TIMER_IDLE
, relaxed
);
1579 * Copy out the interesting kevent state,
1580 * but don't leak out the raw time calculations.
1582 * TODO: potential enhancements - tell the user about:
1583 * - deadline to which this timer thought it was expiring
1584 * - return kn_sfflags in the fflags field so the client can know
1585 * under what flags the timer fired
1587 *kev
= kn
->kn_kevent
;
1589 /* kev->ext[1] = 0; JMM - shouldn't we hide this too? */
1591 if (kn
->kn_sdata
== 0) {
1595 * This is a 'repeating' timer, so we have to emit
1596 * how many intervals expired between the arm
1599 * A very strange style of interface, because
1600 * this could easily be done in the client...
1605 if (kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
) {
1606 now
= mach_continuous_time();
1608 now
= mach_absolute_time();
1611 uint64_t first_deadline
= kn
->kn_ext
[0];
1612 uint64_t interval_abs
= kn
->kn_sdata
;
1613 uint64_t orig_arm_time
= first_deadline
- interval_abs
;
1615 assert(now
> orig_arm_time
);
1616 assert(now
> first_deadline
);
1618 uint64_t elapsed
= now
- orig_arm_time
;
1620 uint64_t num_fired
= elapsed
/ interval_abs
;
1623 * To reach this code, we must have seen the timer pop
1624 * and be in repeating mode, so therefore it must have been
1625 * more than 'interval' time since the attach or last
1628 assert(num_fired
> 0);
1630 /* report how many intervals have elapsed to the user */
1631 kev
->data
= (int64_t)num_fired
;
1633 /* We only need to re-arm the timer if it's not about to be destroyed */
1634 if ((kn
->kn_flags
& EV_ONESHOT
) == 0) {
1635 /* fire at the end of the next interval */
1636 uint64_t new_deadline
= first_deadline
+ num_fired
* interval_abs
;
1638 assert(new_deadline
> now
);
1640 kn
->kn_ext
[0] = new_deadline
;
1643 * This can't shortcut setting up the thread call, because
1644 * knote_process deactivates EV_CLEAR knotes unconditionnally.
1650 return FILTER_ACTIVE
;
1653 SECURITY_READ_ONLY_EARLY(static struct filterops
) timer_filtops
= {
1654 .f_extended_codes
= true,
1655 .f_attach
= filt_timerattach
,
1656 .f_detach
= filt_timerdetach
,
1657 .f_event
= filt_badevent
,
1658 .f_touch
= filt_timertouch
,
1659 .f_process
= filt_timerprocess
,
1662 #pragma mark user_filtops
1665 filt_userattach(struct knote
*kn
, __unused
struct kevent_internal_s
*kev
)
1667 if (kn
->kn_sfflags
& NOTE_TRIGGER
) {
1668 kn
->kn_hookid
= FILTER_ACTIVE
;
1672 return kn
->kn_hookid
;
1676 filt_userdetach(__unused
struct knote
*kn
)
1678 /* EVFILT_USER knotes are not attached to anything in the kernel */
1682 filt_usertouch(struct knote
*kn
, struct kevent_internal_s
*kev
)
1687 ffctrl
= kev
->fflags
& NOTE_FFCTRLMASK
;
1688 fflags
= kev
->fflags
& NOTE_FFLAGSMASK
;
1693 kn
->kn_sfflags
&= fflags
;
1696 kn
->kn_sfflags
|= fflags
;
1699 kn
->kn_sfflags
= fflags
;
1702 kn
->kn_sdata
= kev
->data
;
1704 if (kev
->fflags
& NOTE_TRIGGER
) {
1705 kn
->kn_hookid
= FILTER_ACTIVE
;
1707 return (int)kn
->kn_hookid
;
1713 __unused
struct filt_process_s
*data
,
1714 struct kevent_internal_s
*kev
)
1716 int result
= (int)kn
->kn_hookid
;
1719 *kev
= kn
->kn_kevent
;
1720 kev
->fflags
= kn
->kn_sfflags
;
1721 kev
->data
= kn
->kn_sdata
;
1722 if (kn
->kn_flags
& EV_CLEAR
) {
1732 SECURITY_READ_ONLY_EARLY(static struct filterops
) user_filtops
= {
1733 .f_extended_codes
= true,
1734 .f_attach
= filt_userattach
,
1735 .f_detach
= filt_userdetach
,
1736 .f_event
= filt_badevent
,
1737 .f_touch
= filt_usertouch
,
1738 .f_process
= filt_userprocess
,
1741 #pragma mark workloop_filtops
1744 filt_wllock(struct kqworkloop
*kqwl
)
1746 lck_mtx_lock(&kqwl
->kqwl_statelock
);
1750 filt_wlunlock(struct kqworkloop
*kqwl
)
1752 lck_mtx_unlock(&kqwl
->kqwl_statelock
);
1756 * Returns true when the interlock for the turnstile is the workqueue lock
1758 * When this is the case, all turnstiles operations are delegated
1759 * to the workqueue subsystem.
1761 * This is required because kqueue_threadreq_bind_prepost only holds the
1762 * workqueue lock but needs to move the inheritor from the workloop turnstile
1763 * away from the creator thread, so that this now fulfilled request cannot be
1764 * picked anymore by other threads.
1767 filt_wlturnstile_interlock_is_workq(struct kqworkloop
*kqwl
)
1769 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
1770 return (kqr
->kqr_state
& KQR_THREQUESTED
) &&
1771 (kqr
->kqr_thread
== THREAD_NULL
);
1775 filt_wlupdate_inheritor(struct kqworkloop
*kqwl
, struct turnstile
*ts
,
1776 turnstile_update_flags_t flags
)
1778 turnstile_inheritor_t inheritor
= TURNSTILE_INHERITOR_NULL
;
1779 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
1782 * binding to the workq should always happen through
1783 * workq_kern_threadreq_update_inheritor()
1785 assert(!filt_wlturnstile_interlock_is_workq(kqwl
));
1787 if ((inheritor
= kqwl
->kqwl_owner
)) {
1788 flags
|= TURNSTILE_INHERITOR_THREAD
;
1789 } else if ((inheritor
= kqr
->kqr_thread
)) {
1790 flags
|= TURNSTILE_INHERITOR_THREAD
;
1793 turnstile_update_inheritor(ts
, inheritor
, flags
);
1796 #define FILT_WLATTACH 0
1797 #define FILT_WLTOUCH 1
1798 #define FILT_WLDROP 2
1802 filt_wlupdate(struct kqworkloop
*kqwl
, struct knote
*kn
,
1803 struct kevent_internal_s
*kev
, kq_index_t qos_index
, int op
)
1805 user_addr_t uaddr
= CAST_USER_ADDR_T(kev
->ext
[EV_EXTIDX_WL_ADDR
]);
1806 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
1807 thread_t cur_owner
, new_owner
, extra_thread_ref
= THREAD_NULL
;
1808 kq_index_t cur_owner_override
= THREAD_QOS_UNSPECIFIED
;
1809 int action
= KQWL_UTQ_NONE
, error
= 0;
1810 bool needs_wake
= false, needs_wllock
= false;
1811 uint64_t kdata
= kev
->ext
[EV_EXTIDX_WL_VALUE
];
1812 uint64_t mask
= kev
->ext
[EV_EXTIDX_WL_MASK
];
1815 if (kev
->fflags
& (NOTE_WL_END_OWNERSHIP
| NOTE_WL_DISCOVER_OWNER
)) {
1817 * If we're maybe going to change the kqwl_owner,
1818 * then we need to hold the filt_wllock().
1820 needs_wllock
= true;
1821 } else if (kqr
->kqr_thread
== current_thread()) {
1823 * <rdar://problem/41531764> Servicer updates need to be serialized with
1824 * any ownership change too, as the kqr_thread value influences the
1825 * outcome of handling NOTE_WL_DISCOVER_OWNER.
1827 needs_wllock
= true;
1833 * The kqwl owner is set under both the req and filter lock,
1834 * meaning it's fine to look at it under any.
1836 new_owner
= cur_owner
= kqwl
->kqwl_owner
;
1838 new_owner
= cur_owner
= THREAD_NULL
;
1844 * If asked, load the uint64 value at the user provided address and compare
1845 * it against the passed in mask and expected value.
1847 * If NOTE_WL_DISCOVER_OWNER is specified, translate the loaded name as
1848 * a thread reference.
1850 * If NOTE_WL_END_OWNERSHIP is specified and the currently known owner is
1851 * the current thread, then end ownership.
1853 * Lastly decide whether we need to perform a QoS update.
1856 error
= copyin_word(uaddr
, &udata
, sizeof(udata
));
1861 /* Update state as copied in. */
1862 kev
->ext
[EV_EXTIDX_WL_VALUE
] = udata
;
1864 if ((udata
& mask
) != (kdata
& mask
)) {
1866 } else if (kev
->fflags
& NOTE_WL_DISCOVER_OWNER
) {
1868 * Decipher the owner port name, and translate accordingly.
1869 * The low 2 bits were borrowed for other flags, so mask them off.
1871 * Then attempt translation to a thread reference or fail.
1873 mach_port_name_t name
= (mach_port_name_t
)udata
& ~0x3;
1874 if (name
!= MACH_PORT_NULL
) {
1875 name
= ipc_entry_name_mask(name
);
1876 extra_thread_ref
= port_name_to_thread(name
);
1877 if (extra_thread_ref
== THREAD_NULL
) {
1881 new_owner
= extra_thread_ref
;
1886 if ((kev
->fflags
& NOTE_WL_END_OWNERSHIP
) && new_owner
== current_thread()) {
1887 new_owner
= THREAD_NULL
;
1891 if ((kev
->fflags
& NOTE_WL_THREAD_REQUEST
) && (kev
->flags
& EV_DELETE
)) {
1892 action
= KQWL_UTQ_SET_QOS_INDEX
;
1893 } else if (qos_index
&& kqr
->kqr_qos_index
!= qos_index
) {
1894 action
= KQWL_UTQ_SET_QOS_INDEX
;
1897 if (op
== FILT_WLTOUCH
) {
1899 * Save off any additional fflags/data we just accepted
1900 * But only keep the last round of "update" bits we acted on which helps
1903 kn
->kn_sfflags
&= ~NOTE_WL_UPDATES_MASK
;
1904 kn
->kn_sfflags
|= kev
->fflags
;
1905 kn
->kn_sdata
= kev
->data
;
1906 if (kev
->fflags
& NOTE_WL_SYNC_WAKE
) {
1907 needs_wake
= (kn
->kn_hook
!= THREAD_NULL
);
1909 } else if (op
== FILT_WLDROP
) {
1910 if ((kn
->kn_sfflags
& (NOTE_WL_SYNC_WAIT
| NOTE_WL_SYNC_WAKE
)) ==
1911 NOTE_WL_SYNC_WAIT
) {
1913 * When deleting a SYNC_WAIT knote that hasn't been woken up
1914 * explicitly, issue a wake up.
1916 kn
->kn_sfflags
|= NOTE_WL_SYNC_WAKE
;
1917 needs_wake
= (kn
->kn_hook
!= THREAD_NULL
);
1925 * Commit ownership and QoS changes if any, possibly wake up waiters
1928 if (cur_owner
== new_owner
&& action
== KQWL_UTQ_NONE
&& !needs_wake
) {
1934 /* If already tracked as servicer, don't track as owner */
1935 if (new_owner
== kqr
->kqr_thread
) {
1936 new_owner
= THREAD_NULL
;
1939 if (cur_owner
!= new_owner
) {
1940 kqwl
->kqwl_owner
= new_owner
;
1941 if (new_owner
== extra_thread_ref
) {
1942 /* we just transfered this ref to kqwl_owner */
1943 extra_thread_ref
= THREAD_NULL
;
1945 cur_owner_override
= kqworkloop_owner_override(kqwl
);
1948 thread_ends_owning_workloop(cur_owner
);
1952 /* override it before we drop the old */
1953 if (cur_owner_override
!= THREAD_QOS_UNSPECIFIED
) {
1954 thread_add_ipc_override(new_owner
, cur_owner_override
);
1956 thread_starts_owning_workloop(new_owner
);
1957 if ((kqr
->kqr_state
& KQR_THREQUESTED
) && !kqr
->kqr_thread
) {
1958 if (action
== KQWL_UTQ_NONE
) {
1959 action
= KQWL_UTQ_REDRIVE_EVENTS
;
1963 if ((kqr
->kqr_state
& (KQR_THREQUESTED
| KQR_WAKEUP
)) == KQR_WAKEUP
) {
1964 if (action
== KQWL_UTQ_NONE
) {
1965 action
= KQWL_UTQ_REDRIVE_EVENTS
;
1971 struct turnstile
*ts
= kqwl
->kqwl_turnstile
;
1972 bool wl_inheritor_updated
= false;
1974 if (action
!= KQWL_UTQ_NONE
) {
1975 kqworkloop_update_threads_qos(kqwl
, action
, qos_index
);
1978 if (cur_owner
!= new_owner
&& ts
) {
1979 if (action
== KQWL_UTQ_REDRIVE_EVENTS
) {
1981 * Note that when action is KQWL_UTQ_REDRIVE_EVENTS,
1982 * the code went through workq_kern_threadreq_initiate()
1983 * and the workqueue has set the inheritor already
1985 assert(filt_wlturnstile_interlock_is_workq(kqwl
));
1986 } else if (filt_wlturnstile_interlock_is_workq(kqwl
)) {
1987 workq_kern_threadreq_lock(kqwl
->kqwl_p
);
1988 workq_kern_threadreq_update_inheritor(kqwl
->kqwl_p
, kqr
, new_owner
,
1989 ts
, TURNSTILE_IMMEDIATE_UPDATE
);
1990 workq_kern_threadreq_unlock(kqwl
->kqwl_p
);
1991 if (!filt_wlturnstile_interlock_is_workq(kqwl
)) {
1993 * If the workq is no longer the interlock, then
1994 * workq_kern_threadreq_update_inheritor() has finished a bind
1995 * and we need to fallback to the regular path.
1997 filt_wlupdate_inheritor(kqwl
, ts
, TURNSTILE_IMMEDIATE_UPDATE
);
1999 wl_inheritor_updated
= true;
2001 filt_wlupdate_inheritor(kqwl
, ts
, TURNSTILE_IMMEDIATE_UPDATE
);
2002 wl_inheritor_updated
= true;
2006 * We need a turnstile reference because we are dropping the interlock
2007 * and the caller has not called turnstile_prepare.
2009 if (wl_inheritor_updated
) {
2010 turnstile_reference(ts
);
2014 if (needs_wake
&& ts
) {
2015 waitq_wakeup64_thread(&ts
->ts_waitq
, CAST_EVENT64_T((event_t
)kn
),
2016 (thread_t
)kn
->kn_hook
, THREAD_AWAKENED
);
2019 kq_req_unlock(kqwl
);
2021 if (wl_inheritor_updated
) {
2022 turnstile_update_inheritor_complete(ts
, TURNSTILE_INTERLOCK_NOT_HELD
);
2023 turnstile_deallocate(ts
);
2030 * Unlock and cleanup various lingering references and things.
2033 filt_wlunlock(kqwl
);
2036 #if CONFIG_WORKLOOP_DEBUG
2037 KQWL_HISTORY_WRITE_ENTRY(kqwl
, {
2038 .updater
= current_thread(),
2039 .servicer
= kqr
->kqr_thread
, /* Note: racy */
2040 .old_owner
= cur_owner
,
2041 .new_owner
= new_owner
,
2043 .kev_ident
= kev
->ident
,
2044 .error
= (int16_t)error
,
2045 .kev_flags
= kev
->flags
,
2046 .kev_fflags
= kev
->fflags
,
2052 #endif // CONFIG_WORKLOOP_DEBUG
2054 if (cur_owner
&& new_owner
!= cur_owner
) {
2055 if (cur_owner_override
!= THREAD_QOS_UNSPECIFIED
) {
2056 thread_drop_ipc_override(cur_owner
);
2058 thread_deallocate(cur_owner
);
2061 if (extra_thread_ref
) {
2062 thread_deallocate(extra_thread_ref
);
2068 * Remembers the last updated that came in from userspace for debugging reasons.
2069 * - fflags is mirrored from the userspace kevent
2070 * - ext[i, i != VALUE] is mirrored from the userspace kevent
2071 * - ext[VALUE] is set to what the kernel loaded atomically
2072 * - data is set to the error if any
2075 filt_wlremember_last_update(struct knote
*kn
, struct kevent_internal_s
*kev
,
2078 kn
->kn_fflags
= kev
->fflags
;
2079 kn
->kn_data
= error
;
2080 memcpy(kn
->kn_ext
, kev
->ext
, sizeof(kev
->ext
));
2084 filt_wlattach(struct knote
*kn
, struct kevent_internal_s
*kev
)
2086 struct kqueue
*kq
= knote_get_kq(kn
);
2087 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2089 kq_index_t qos_index
= 0;
2091 if ((kq
->kq_state
& KQ_WORKLOOP
) == 0) {
2096 #if DEVELOPMENT || DEBUG
2097 if (kev
->ident
== 0 && kev
->udata
== 0 && kev
->fflags
== 0) {
2098 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
2102 if (kqr
->kqr_dsync_waiters
) {
2103 kev
->fflags
|= NOTE_WL_SYNC_WAIT
;
2105 if (kqr
->kqr_qos_index
) {
2106 kev
->fflags
|= NOTE_WL_THREAD_REQUEST
;
2108 kev
->ext
[0] = thread_tid(kqwl
->kqwl_owner
);
2109 kev
->ext
[1] = thread_tid(kqwl
->kqwl_request
.kqr_thread
);
2110 kev
->ext
[2] = thread_owned_workloops_count(current_thread());
2111 kev
->ext
[3] = kn
->kn_kevent
.ext
[3];
2112 kq_req_unlock(kqwl
);
2118 int command
= (kn
->kn_sfflags
& NOTE_WL_COMMANDS_MASK
);
2120 case NOTE_WL_THREAD_REQUEST
:
2121 if (kn
->kn_id
!= kqwl
->kqwl_dynamicid
) {
2125 qos_index
= _pthread_priority_thread_qos(kn
->kn_qos
);
2126 if (qos_index
== THREAD_QOS_UNSPECIFIED
) {
2130 if (kqwl
->kqwl_request
.kqr_qos_index
) {
2132 * There already is a thread request, and well, you're only allowed
2133 * one per workloop, so fail the attach.
2139 case NOTE_WL_SYNC_WAIT
:
2140 case NOTE_WL_SYNC_WAKE
:
2141 if (kn
->kn_id
== kqwl
->kqwl_dynamicid
) {
2145 if ((kn
->kn_flags
& EV_DISABLE
) == 0) {
2149 if (kn
->kn_sfflags
& NOTE_WL_END_OWNERSHIP
) {
2159 error
= filt_wlupdate(kqwl
, kn
, kev
, qos_index
, FILT_WLATTACH
);
2163 /* If userland wants ESTALE to be hidden, fail the attach anyway */
2164 if (error
== ESTALE
&& (kn
->kn_sfflags
& NOTE_WL_IGNORE_ESTALE
)) {
2167 knote_set_error(kn
, error
);
2170 if (command
== NOTE_WL_SYNC_WAIT
) {
2171 return kevent_register_wait_prepare(kn
, kev
);
2173 /* Just attaching the thread request successfully will fire it */
2174 if (command
== NOTE_WL_THREAD_REQUEST
) {
2176 * Thread Request knotes need an explicit touch to be active again,
2177 * so delivering an event needs to also consume it.
2179 kn
->kn_flags
|= EV_CLEAR
;
2180 return FILTER_ACTIVE
;
2186 filt_wlwait_continue(void *parameter
, wait_result_t wr
)
2188 struct _kevent_register
*cont_args
= parameter
;
2189 struct kqworkloop
*kqwl
= (struct kqworkloop
*)cont_args
->kq
;
2190 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
2193 kqr
->kqr_dsync_waiters
--;
2194 if (filt_wlturnstile_interlock_is_workq(kqwl
)) {
2195 workq_kern_threadreq_lock(kqwl
->kqwl_p
);
2196 turnstile_complete((uintptr_t)kqwl
, &kqwl
->kqwl_turnstile
, NULL
);
2197 workq_kern_threadreq_unlock(kqwl
->kqwl_p
);
2199 turnstile_complete((uintptr_t)kqwl
, &kqwl
->kqwl_turnstile
, NULL
);
2201 kq_req_unlock(kqwl
);
2203 turnstile_cleanup();
2205 if (wr
== THREAD_INTERRUPTED
) {
2206 cont_args
->kev
.flags
|= EV_ERROR
;
2207 cont_args
->kev
.data
= EINTR
;
2208 } else if (wr
!= THREAD_AWAKENED
) {
2209 panic("Unexpected wait result: %d", wr
);
2212 kevent_register_wait_return(cont_args
);
2216 * Called with the workloop mutex held, most of the time never returns as it
2217 * calls filt_wlwait_continue through a continuation.
2220 filt_wlpost_register_wait(struct uthread
*uth
, struct knote_lock_ctx
*knlc
,
2221 struct _kevent_register
*cont_args
)
2223 struct kqworkloop
*kqwl
= (struct kqworkloop
*)cont_args
->kq
;
2224 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
2225 struct turnstile
*ts
;
2226 bool workq_locked
= false;
2230 kqr
->kqr_dsync_waiters
++;
2232 if (filt_wlturnstile_interlock_is_workq(kqwl
)) {
2233 workq_kern_threadreq_lock(kqwl
->kqwl_p
);
2234 workq_locked
= true;
2237 ts
= turnstile_prepare((uintptr_t)kqwl
, &kqwl
->kqwl_turnstile
,
2238 TURNSTILE_NULL
, TURNSTILE_WORKLOOPS
);
2241 workq_kern_threadreq_update_inheritor(kqwl
->kqwl_p
,
2242 &kqwl
->kqwl_request
, kqwl
->kqwl_owner
, ts
,
2243 TURNSTILE_DELAYED_UPDATE
);
2244 if (!filt_wlturnstile_interlock_is_workq(kqwl
)) {
2246 * if the interlock is no longer the workqueue lock,
2247 * then we don't need to hold it anymore.
2249 workq_kern_threadreq_unlock(kqwl
->kqwl_p
);
2250 workq_locked
= false;
2253 if (!workq_locked
) {
2255 * If the interlock is the workloop's, then it's our responsibility to
2256 * call update_inheritor, so just do it.
2258 filt_wlupdate_inheritor(kqwl
, ts
, TURNSTILE_DELAYED_UPDATE
);
2261 thread_set_pending_block_hint(uth
->uu_thread
, kThreadWaitWorkloopSyncWait
);
2262 waitq_assert_wait64(&ts
->ts_waitq
, CAST_EVENT64_T(cont_args
->knote
),
2263 THREAD_ABORTSAFE
, TIMEOUT_WAIT_FOREVER
);
2266 workq_kern_threadreq_unlock(kqwl
->kqwl_p
);
2269 thread_t thread
= kqwl
->kqwl_owner
?: kqr
->kqr_thread
;
2271 thread_reference(thread
);
2273 kq_req_unlock(kqwl
);
2275 kevent_register_wait_block(ts
, thread
, knlc
, filt_wlwait_continue
, cont_args
);
2278 /* called in stackshot context to report the thread responsible for blocking this thread */
2280 kdp_workloop_sync_wait_find_owner(__assert_only thread_t thread
,
2281 event64_t event
, thread_waitinfo_t
*waitinfo
)
2283 struct knote
*kn
= (struct knote
*)event
;
2284 assert(kdp_is_in_zone(kn
, "knote zone"));
2286 assert(kn
->kn_hook
== thread
);
2288 struct kqueue
*kq
= knote_get_kq(kn
);
2289 assert(kdp_is_in_zone(kq
, "kqueue workloop zone"));
2290 assert(kq
->kq_state
& KQ_WORKLOOP
);
2292 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2293 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
2295 thread_t kqwl_owner
= kqwl
->kqwl_owner
;
2296 thread_t servicer
= kqr
->kqr_thread
;
2298 if (kqwl_owner
!= THREAD_NULL
) {
2299 assert(kdp_is_in_zone(kqwl_owner
, "threads"));
2301 waitinfo
->owner
= thread_tid(kqwl
->kqwl_owner
);
2302 } else if (servicer
!= THREAD_NULL
) {
2303 assert(kdp_is_in_zone(servicer
, "threads"));
2305 waitinfo
->owner
= thread_tid(servicer
);
2306 } else if (kqr
->kqr_state
& KQR_THREQUESTED
) {
2307 waitinfo
->owner
= STACKSHOT_WAITOWNER_THREQUESTED
;
2309 waitinfo
->owner
= 0;
2312 waitinfo
->context
= kqwl
->kqwl_dynamicid
;
2316 filt_wldetach(__assert_only
struct knote
*kn
)
2318 assert(knote_get_kq(kn
)->kq_state
& KQ_WORKLOOP
);
2320 kevent_register_wait_cleanup(kn
);
2325 filt_wlvalidate_kev_flags(struct knote
*kn
, struct kevent_internal_s
*kev
,
2326 thread_qos_t
*qos_index
)
2328 int new_commands
= kev
->fflags
& NOTE_WL_COMMANDS_MASK
;
2329 int sav_commands
= kn
->kn_sfflags
& NOTE_WL_COMMANDS_MASK
;
2331 if ((kev
->fflags
& NOTE_WL_DISCOVER_OWNER
) && (kev
->flags
& EV_DELETE
)) {
2334 if (kev
->fflags
& NOTE_WL_UPDATE_QOS
) {
2335 if (kev
->flags
& EV_DELETE
) {
2338 if (sav_commands
!= NOTE_WL_THREAD_REQUEST
) {
2341 if (!(*qos_index
= _pthread_priority_thread_qos(kev
->qos
))) {
2346 switch (new_commands
) {
2347 case NOTE_WL_THREAD_REQUEST
:
2348 /* thread requests can only update themselves */
2349 if (sav_commands
!= NOTE_WL_THREAD_REQUEST
) {
2354 case NOTE_WL_SYNC_WAIT
:
2355 if (kev
->fflags
& NOTE_WL_END_OWNERSHIP
) {
2360 case NOTE_WL_SYNC_WAKE
:
2362 if (!(sav_commands
& (NOTE_WL_SYNC_WAIT
| NOTE_WL_SYNC_WAKE
))) {
2365 if ((kev
->flags
& (EV_ENABLE
| EV_DELETE
)) == EV_ENABLE
) {
2377 filt_wltouch(struct knote
*kn
, struct kevent_internal_s
*kev
)
2379 struct kqworkloop
*kqwl
= (struct kqworkloop
*)knote_get_kq(kn
);
2380 thread_qos_t qos_index
= THREAD_QOS_UNSPECIFIED
;
2382 int error
= filt_wlvalidate_kev_flags(kn
, kev
, &qos_index
);
2387 error
= filt_wlupdate(kqwl
, kn
, kev
, qos_index
, FILT_WLTOUCH
);
2388 filt_wlremember_last_update(kn
, kev
, error
);
2395 if (error
== ESTALE
&& (kev
->fflags
& NOTE_WL_IGNORE_ESTALE
)) {
2396 /* If userland wants ESTALE to be hidden, do not activate */
2399 kev
->flags
|= EV_ERROR
;
2403 int command
= kev
->fflags
& NOTE_WL_COMMANDS_MASK
;
2404 if (command
== NOTE_WL_SYNC_WAIT
&& !(kn
->kn_sfflags
& NOTE_WL_SYNC_WAKE
)) {
2405 return kevent_register_wait_prepare(kn
, kev
);
2407 /* Just touching the thread request successfully will fire it */
2408 if (command
== NOTE_WL_THREAD_REQUEST
) {
2409 if (kev
->fflags
& NOTE_WL_UPDATE_QOS
) {
2410 return FILTER_ACTIVE
| FILTER_UPDATE_REQ_QOS
;
2412 return FILTER_ACTIVE
;
2418 filt_wlallow_drop(struct knote
*kn
, struct kevent_internal_s
*kev
)
2420 struct kqworkloop
*kqwl
= (struct kqworkloop
*)knote_get_kq(kn
);
2422 int error
= filt_wlvalidate_kev_flags(kn
, kev
, NULL
);
2427 error
= filt_wlupdate(kqwl
, kn
, kev
, 0, FILT_WLDROP
);
2428 filt_wlremember_last_update(kn
, kev
, error
);
2435 if (error
== ESTALE
&& (kev
->fflags
& NOTE_WL_IGNORE_ESTALE
)) {
2438 kev
->flags
|= EV_ERROR
;
2448 __unused
struct filt_process_s
*data
,
2449 struct kevent_internal_s
*kev
)
2451 struct kqworkloop
*kqwl
= (struct kqworkloop
*)knote_get_kq(kn
);
2454 assert(kn
->kn_sfflags
& NOTE_WL_THREAD_REQUEST
);
2458 if (kqwl
->kqwl_owner
) {
2460 * <rdar://problem/33584321> userspace sometimes due to events being
2461 * delivered but not triggering a drain session can cause a process
2462 * of the thread request knote.
2464 * When that happens, the automatic deactivation due to process
2465 * would swallow the event, so we have to activate the knote again.
2471 #if DEBUG || DEVELOPMENT
2472 if (kevent_debug_flags() & KEVENT_PANIC_ON_NON_ENQUEUED_PROCESS
) {
2474 * see src/queue_internal.h in libdispatch
2476 #define DISPATCH_QUEUE_ENQUEUED 0x1ull
2477 user_addr_t addr
= CAST_USER_ADDR_T(kn
->kn_ext
[EV_EXTIDX_WL_ADDR
]);
2478 task_t t
= current_task();
2480 if (addr
&& task_is_active(t
) && !task_is_halting(t
) &&
2481 copyin_word(addr
, &val
, sizeof(val
)) == 0 &&
2482 val
&& (val
& DISPATCH_QUEUE_ENQUEUED
) == 0 &&
2483 (val
>> 48) != 0xdead && (val
>> 48) != 0 && (val
>> 48) != 0xffff) {
2484 panic("kevent: workloop %#016llx is not enqueued "
2485 "(kn:%p dq_state:%#016llx kev.dq_state:%#016llx)",
2486 kn
->kn_udata
, kn
, val
, kn
->kn_ext
[EV_EXTIDX_WL_VALUE
]);
2490 *kev
= kn
->kn_kevent
;
2491 kev
->fflags
= kn
->kn_sfflags
;
2492 kev
->data
= kn
->kn_sdata
;
2493 kev
->qos
= kn
->kn_qos
;
2494 rc
|= FILTER_ACTIVE
;
2497 filt_wlunlock(kqwl
);
2499 if (rc
& FILTER_ACTIVE
) {
2500 workq_thread_set_max_qos(kqwl
->kqwl_p
, &kqwl
->kqwl_request
);
2505 SECURITY_READ_ONLY_EARLY(static struct filterops
) workloop_filtops
= {
2506 .f_extended_codes
= true,
2507 .f_attach
= filt_wlattach
,
2508 .f_detach
= filt_wldetach
,
2509 .f_event
= filt_badevent
,
2510 .f_touch
= filt_wltouch
,
2511 .f_process
= filt_wlprocess
,
2512 .f_allow_drop
= filt_wlallow_drop
,
2513 .f_post_register_wait
= filt_wlpost_register_wait
,
2516 #pragma mark kevent / knotes
2519 * JMM - placeholder for not-yet-implemented filters
2522 filt_badevent(struct knote
*kn
, long hint
)
2524 panic("%s[%d](%p, %ld)", __func__
, kn
->kn_filter
, kn
, hint
);
2529 filt_badattach(__unused
struct knote
*kn
, __unused
struct kevent_internal_s
*kev
)
2531 knote_set_error(kn
, ENOTSUP
);
2536 kqueue_alloc(struct proc
*p
, unsigned int flags
)
2538 struct filedesc
*fdp
= p
->p_fd
;
2539 struct kqueue
*kq
= NULL
;
2543 if (flags
& KEVENT_FLAG_WORKQ
) {
2544 struct kqworkq
*kqwq
;
2547 kqwq
= (struct kqworkq
*)zalloc(kqworkq_zone
);
2552 kq
= &kqwq
->kqwq_kqueue
;
2553 bzero(kqwq
, sizeof(struct kqworkq
));
2555 kqwq
->kqwq_state
= KQ_WORKQ
;
2557 for (i
= 0; i
< KQWQ_NBUCKETS
; i
++) {
2558 TAILQ_INIT(&kqwq
->kqwq_queue
[i
]);
2560 for (i
= 0; i
< KQWQ_NBUCKETS
; i
++) {
2561 if (i
!= KQWQ_QOS_MANAGER
) {
2563 * Because of how the bucketized system works, we mix overcommit
2564 * sources with not overcommit: each time we move a knote from
2565 * one bucket to the next due to overrides, we'd had to track
2566 * overcommitness, and it's really not worth it in the workloop
2567 * enabled world that track this faithfully.
2569 * Incidentally, this behaves like the original manager-based
2570 * kqwq where event delivery always happened (hence is
2573 kqwq
->kqwq_request
[i
].kqr_state
|= KQR_THOVERCOMMIT
;
2575 kqwq
->kqwq_request
[i
].kqr_qos_index
= i
;
2576 TAILQ_INIT(&kqwq
->kqwq_request
[i
].kqr_suppressed
);
2579 policy
= SYNC_POLICY_FIFO
;
2580 hook
= (void *)kqwq
;
2581 } else if (flags
& KEVENT_FLAG_WORKLOOP
) {
2582 struct kqworkloop
*kqwl
;
2585 kqwl
= (struct kqworkloop
*)zalloc(kqworkloop_zone
);
2590 bzero(kqwl
, sizeof(struct kqworkloop
));
2592 kqwl
->kqwl_state
= KQ_WORKLOOP
| KQ_DYNAMIC
;
2593 kqwl
->kqwl_retains
= 1; /* donate a retain to creator */
2594 kqwl
->kqwl_request
.kqr_state
= KQR_WORKLOOP
;
2596 kq
= &kqwl
->kqwl_kqueue
;
2597 for (i
= 0; i
< KQWL_NBUCKETS
; i
++) {
2598 TAILQ_INIT(&kqwl
->kqwl_queue
[i
]);
2600 TAILQ_INIT(&kqwl
->kqwl_request
.kqr_suppressed
);
2602 lck_mtx_init(&kqwl
->kqwl_statelock
, kq_lck_grp
, kq_lck_attr
);
2604 policy
= SYNC_POLICY_FIFO
;
2605 hook
= (void *)kqwl
;
2609 kqf
= (struct kqfile
*)zalloc(kqfile_zone
);
2614 kq
= &kqf
->kqf_kqueue
;
2615 bzero(kqf
, sizeof(struct kqfile
));
2616 TAILQ_INIT(&kqf
->kqf_queue
);
2617 TAILQ_INIT(&kqf
->kqf_suppressed
);
2619 policy
= SYNC_POLICY_FIFO
| SYNC_POLICY_PREPOST
;
2622 waitq_set_init(&kq
->kq_wqs
, policy
, NULL
, hook
);
2623 lck_spin_init(&kq
->kq_lock
, kq_lck_grp
, kq_lck_attr
);
2624 lck_spin_init(&kq
->kq_reqlock
, kq_lck_grp
, kq_lck_attr
);
2627 if (fdp
->fd_knlistsize
< 0) {
2629 if (fdp
->fd_knlistsize
< 0) {
2630 fdp
->fd_knlistsize
= 0; /* this process has had a kq */
2639 * knotes_dealloc - detach all knotes for the process and drop them
2641 * Called with proc_fdlock held.
2642 * Returns with it locked.
2643 * May drop it temporarily.
2644 * Process is in such a state that it will not try to allocate
2645 * any more knotes during this process (stopped for exit or exec).
2648 knotes_dealloc(proc_t p
)
2650 struct filedesc
*fdp
= p
->p_fd
;
2653 struct klist
*kn_hash
= NULL
;
2656 /* Close all the fd-indexed knotes up front */
2657 if (fdp
->fd_knlistsize
> 0) {
2658 for (i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
2659 while ((kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
])) != NULL
) {
2660 kq
= knote_get_kq(kn
);
2663 knote_drop(kq
, kn
, NULL
);
2667 /* free the table */
2668 FREE(fdp
->fd_knlist
, M_KQUEUE
);
2669 fdp
->fd_knlist
= NULL
;
2671 fdp
->fd_knlistsize
= -1;
2676 /* Clean out all the hashed knotes as well */
2677 if (fdp
->fd_knhashmask
!= 0) {
2678 for (i
= 0; i
<= (int)fdp
->fd_knhashmask
; i
++) {
2679 while ((kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
])) != NULL
) {
2680 kq
= knote_get_kq(kn
);
2683 knote_drop(kq
, kn
, NULL
);
2687 kn_hash
= fdp
->fd_knhash
;
2688 fdp
->fd_knhashmask
= 0;
2689 fdp
->fd_knhash
= NULL
;
2694 /* free the kn_hash table */
2696 FREE(kn_hash
, M_KQUEUE
);
2703 * kqworkloop_invalidate
2705 * Invalidate ownership of a workloop.
2707 * This is meant to be used so that any remnant of overrides and ownership
2708 * information is dropped before a kqworkloop can no longer be found in the
2709 * global hash table and have ghost workloop ownership left over.
2711 * Possibly returns a thread to deallocate in a safe context.
2714 kqworkloop_invalidate(struct kqworkloop
*kqwl
)
2716 thread_t cur_owner
= kqwl
->kqwl_owner
;
2718 assert(TAILQ_EMPTY(&kqwl
->kqwl_request
.kqr_suppressed
));
2721 * If the kqueue had an owner that prevented the thread request to
2722 * go through, then no unbind happened, and we may have lingering
2723 * overrides to drop.
2725 if (kqworkloop_owner_override(kqwl
) != THREAD_QOS_UNSPECIFIED
) {
2726 thread_drop_ipc_override(cur_owner
);
2728 thread_ends_owning_workloop(cur_owner
);
2729 kqwl
->kqwl_owner
= THREAD_NULL
;
2736 * kqueue_dealloc - detach all knotes from a kqueue and free it
2738 * We walk each list looking for knotes referencing this
2739 * this kqueue. If we find one, we try to drop it. But
2740 * if we fail to get a drop reference, that will wait
2741 * until it is dropped. So, we can just restart again
2742 * safe in the assumption that the list will eventually
2743 * not contain any more references to this kqueue (either
2744 * we dropped them all, or someone else did).
2746 * Assumes no new events are being added to the kqueue.
2747 * Nothing locked on entry or exit.
2749 * Workloop kqueues cant get here unless all the knotes
2750 * are already gone and all requested threads have come
2751 * and gone (cancelled or arrived).
2754 kqueue_dealloc(struct kqueue
*kq
)
2757 struct filedesc
*fdp
;
2769 * Workloops are refcounted by their knotes, so there's no point
2770 * spending a lot of time under these locks just to deallocate one.
2772 if ((kq
->kq_state
& KQ_WORKLOOP
) == 0) {
2773 KNOTE_LOCK_CTX(knlc
);
2776 for (i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
2777 kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
]);
2778 while (kn
!= NULL
) {
2779 if (kq
== knote_get_kq(kn
)) {
2782 if (knote_lock(kq
, kn
, &knlc
, KNOTE_KQ_LOCK_ON_SUCCESS
)) {
2783 knote_drop(kq
, kn
, &knlc
);
2786 /* start over at beginning of list */
2787 kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
]);
2790 kn
= SLIST_NEXT(kn
, kn_link
);
2797 if (fdp
->fd_knhashmask
!= 0) {
2798 for (i
= 0; i
< (int)fdp
->fd_knhashmask
+ 1; i
++) {
2799 kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
]);
2800 while (kn
!= NULL
) {
2801 if (kq
== knote_get_kq(kn
)) {
2804 if (knote_lock(kq
, kn
, &knlc
, KNOTE_KQ_LOCK_ON_SUCCESS
)) {
2805 knote_drop(kq
, kn
, &knlc
);
2808 /* start over at beginning of list */
2809 kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
]);
2812 kn
= SLIST_NEXT(kn
, kn_link
);
2819 if (kq
->kq_state
& KQ_WORKLOOP
) {
2820 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2821 thread_t cur_owner
= kqworkloop_invalidate(kqwl
);
2824 thread_deallocate(cur_owner
);
2827 if (kqwl
->kqwl_request
.kqr_state
& KQR_ALLOCATED_TURNSTILE
) {
2828 struct turnstile
*ts
;
2829 turnstile_complete((uintptr_t)kqwl
, &kqwl
->kqwl_turnstile
, &ts
);
2830 turnstile_cleanup();
2831 turnstile_deallocate(ts
);
2833 assert(kqwl
->kqwl_turnstile
== NULL
);
2838 * waitq_set_deinit() remove the KQ's waitq set from
2839 * any select sets to which it may belong.
2841 waitq_set_deinit(&kq
->kq_wqs
);
2842 lck_spin_destroy(&kq
->kq_lock
, kq_lck_grp
);
2843 lck_spin_destroy(&kq
->kq_reqlock
, kq_lck_grp
);
2845 if (kq
->kq_state
& KQ_WORKQ
) {
2846 zfree(kqworkq_zone
, kq
);
2847 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
2848 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2850 assert(kqwl
->kqwl_retains
== 0);
2851 lck_mtx_destroy(&kqwl
->kqwl_statelock
, kq_lck_grp
);
2852 zfree(kqworkloop_zone
, kqwl
);
2854 zfree(kqfile_zone
, kq
);
2859 kqueue_retain(struct kqueue
*kq
)
2861 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2864 if ((kq
->kq_state
& KQ_DYNAMIC
) == 0) {
2868 previous
= OSIncrementAtomic(&kqwl
->kqwl_retains
);
2869 if (previous
== KQ_WORKLOOP_RETAINS_MAX
) {
2870 panic("kq(%p) retain overflow", kq
);
2873 if (previous
== 0) {
2874 panic("kq(%p) resurrection", kq
);
2878 #define KQUEUE_CANT_BE_LAST_REF 0
2879 #define KQUEUE_MIGHT_BE_LAST_REF 1
2882 kqueue_release(kqueue_t kqu
, __assert_only
int possibly_last
)
2884 if ((kqu
.kq
->kq_state
& KQ_DYNAMIC
) == 0) {
2888 assert(kqu
.kq
->kq_state
& KQ_WORKLOOP
); /* for now */
2889 uint32_t refs
= OSDecrementAtomic(&kqu
.kqwl
->kqwl_retains
);
2890 if (__improbable(refs
== 0)) {
2891 panic("kq(%p) over-release", kqu
.kq
);
2894 assert(possibly_last
);
2900 kqueue_body(struct proc
*p
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
)
2903 struct fileproc
*fp
;
2906 error
= falloc_withalloc(p
,
2907 &fp
, &fd
, vfs_context_current(), fp_zalloc
, cra
);
2912 kq
= kqueue_alloc(p
, 0);
2918 fp
->f_flag
= FREAD
| FWRITE
;
2919 fp
->f_ops
= &kqueueops
;
2921 fp
->f_lflags
|= FG_CONFINED
;
2924 *fdflags(p
, fd
) |= UF_EXCLOSE
| UF_FORKCLOSE
;
2925 procfdtbl_releasefd(p
, fd
, NULL
);
2926 fp_drop(p
, fd
, fp
, 1);
2934 kqueue(struct proc
*p
, __unused
struct kqueue_args
*uap
, int32_t *retval
)
2936 return kqueue_body(p
, fileproc_alloc_init
, NULL
, retval
);
2940 kevent_copyin(user_addr_t
*addrp
, struct kevent_internal_s
*kevp
, struct proc
*p
,
2946 if (flags
& KEVENT_FLAG_LEGACY32
) {
2947 bzero(kevp
, sizeof(*kevp
));
2949 if (IS_64BIT_PROCESS(p
)) {
2950 struct user64_kevent kev64
;
2952 advance
= sizeof(kev64
);
2953 error
= copyin(*addrp
, (caddr_t
)&kev64
, advance
);
2957 kevp
->ident
= kev64
.ident
;
2958 kevp
->filter
= kev64
.filter
;
2959 kevp
->flags
= kev64
.flags
;
2960 kevp
->udata
= kev64
.udata
;
2961 kevp
->fflags
= kev64
.fflags
;
2962 kevp
->data
= kev64
.data
;
2964 struct user32_kevent kev32
;
2966 advance
= sizeof(kev32
);
2967 error
= copyin(*addrp
, (caddr_t
)&kev32
, advance
);
2971 kevp
->ident
= (uintptr_t)kev32
.ident
;
2972 kevp
->filter
= kev32
.filter
;
2973 kevp
->flags
= kev32
.flags
;
2974 kevp
->udata
= CAST_USER_ADDR_T(kev32
.udata
);
2975 kevp
->fflags
= kev32
.fflags
;
2976 kevp
->data
= (intptr_t)kev32
.data
;
2978 } else if (flags
& KEVENT_FLAG_LEGACY64
) {
2979 struct kevent64_s kev64
;
2981 bzero(kevp
, sizeof(*kevp
));
2983 advance
= sizeof(struct kevent64_s
);
2984 error
= copyin(*addrp
, (caddr_t
)&kev64
, advance
);
2988 kevp
->ident
= kev64
.ident
;
2989 kevp
->filter
= kev64
.filter
;
2990 kevp
->flags
= kev64
.flags
;
2991 kevp
->udata
= kev64
.udata
;
2992 kevp
->fflags
= kev64
.fflags
;
2993 kevp
->data
= kev64
.data
;
2994 kevp
->ext
[0] = kev64
.ext
[0];
2995 kevp
->ext
[1] = kev64
.ext
[1];
2997 struct kevent_qos_s kevqos
;
2999 bzero(kevp
, sizeof(*kevp
));
3001 advance
= sizeof(struct kevent_qos_s
);
3002 error
= copyin(*addrp
, (caddr_t
)&kevqos
, advance
);
3006 kevp
->ident
= kevqos
.ident
;
3007 kevp
->filter
= kevqos
.filter
;
3008 kevp
->flags
= kevqos
.flags
;
3009 kevp
->qos
= kevqos
.qos
;
3010 // kevp->xflags = kevqos.xflags;
3011 kevp
->udata
= kevqos
.udata
;
3012 kevp
->fflags
= kevqos
.fflags
;
3013 kevp
->data
= kevqos
.data
;
3014 kevp
->ext
[0] = kevqos
.ext
[0];
3015 kevp
->ext
[1] = kevqos
.ext
[1];
3016 kevp
->ext
[2] = kevqos
.ext
[2];
3017 kevp
->ext
[3] = kevqos
.ext
[3];
3026 kevent_copyout(struct kevent_internal_s
*kevp
, user_addr_t
*addrp
, struct proc
*p
,
3029 user_addr_t addr
= *addrp
;
3034 * fully initialize the differnt output event structure
3035 * types from the internal kevent (and some universal
3036 * defaults for fields not represented in the internal
3039 if (flags
& KEVENT_FLAG_LEGACY32
) {
3040 assert((flags
& KEVENT_FLAG_STACK_EVENTS
) == 0);
3042 if (IS_64BIT_PROCESS(p
)) {
3043 struct user64_kevent kev64
;
3045 advance
= sizeof(kev64
);
3046 bzero(&kev64
, advance
);
3049 * deal with the special case of a user-supplied
3050 * value of (uintptr_t)-1.
3052 kev64
.ident
= (kevp
->ident
== (uintptr_t)-1) ?
3053 (uint64_t)-1LL : (uint64_t)kevp
->ident
;
3055 kev64
.filter
= kevp
->filter
;
3056 kev64
.flags
= kevp
->flags
;
3057 kev64
.fflags
= kevp
->fflags
;
3058 kev64
.data
= (int64_t) kevp
->data
;
3059 kev64
.udata
= kevp
->udata
;
3060 error
= copyout((caddr_t
)&kev64
, addr
, advance
);
3062 struct user32_kevent kev32
;
3064 advance
= sizeof(kev32
);
3065 bzero(&kev32
, advance
);
3066 kev32
.ident
= (uint32_t)kevp
->ident
;
3067 kev32
.filter
= kevp
->filter
;
3068 kev32
.flags
= kevp
->flags
;
3069 kev32
.fflags
= kevp
->fflags
;
3070 kev32
.data
= (int32_t)kevp
->data
;
3071 kev32
.udata
= kevp
->udata
;
3072 error
= copyout((caddr_t
)&kev32
, addr
, advance
);
3074 } else if (flags
& KEVENT_FLAG_LEGACY64
) {
3075 struct kevent64_s kev64
;
3077 advance
= sizeof(struct kevent64_s
);
3078 if (flags
& KEVENT_FLAG_STACK_EVENTS
) {
3081 bzero(&kev64
, advance
);
3082 kev64
.ident
= kevp
->ident
;
3083 kev64
.filter
= kevp
->filter
;
3084 kev64
.flags
= kevp
->flags
;
3085 kev64
.fflags
= kevp
->fflags
;
3086 kev64
.data
= (int64_t) kevp
->data
;
3087 kev64
.udata
= kevp
->udata
;
3088 kev64
.ext
[0] = kevp
->ext
[0];
3089 kev64
.ext
[1] = kevp
->ext
[1];
3090 error
= copyout((caddr_t
)&kev64
, addr
, advance
);
3092 struct kevent_qos_s kevqos
;
3094 advance
= sizeof(struct kevent_qos_s
);
3095 if (flags
& KEVENT_FLAG_STACK_EVENTS
) {
3098 bzero(&kevqos
, advance
);
3099 kevqos
.ident
= kevp
->ident
;
3100 kevqos
.filter
= kevp
->filter
;
3101 kevqos
.flags
= kevp
->flags
;
3102 kevqos
.qos
= kevp
->qos
;
3103 kevqos
.udata
= kevp
->udata
;
3104 kevqos
.fflags
= kevp
->fflags
;
3106 kevqos
.data
= (int64_t) kevp
->data
;
3107 kevqos
.ext
[0] = kevp
->ext
[0];
3108 kevqos
.ext
[1] = kevp
->ext
[1];
3109 kevqos
.ext
[2] = kevp
->ext
[2];
3110 kevqos
.ext
[3] = kevp
->ext
[3];
3111 error
= copyout((caddr_t
)&kevqos
, addr
, advance
);
3114 if (flags
& KEVENT_FLAG_STACK_EVENTS
) {
3117 *addrp
= addr
+ advance
;
3124 kevent_get_data_size(
3126 uint64_t data_available
,
3128 user_size_t
*residp
)
3133 if (data_available
!= USER_ADDR_NULL
) {
3134 if (flags
& KEVENT_FLAG_KERNEL
) {
3135 resid
= *(user_size_t
*)(uintptr_t)data_available
;
3136 } else if (IS_64BIT_PROCESS(p
)) {
3137 user64_size_t usize
;
3138 error
= copyin((user_addr_t
)data_available
, &usize
, sizeof(usize
));
3139 resid
= (user_size_t
)usize
;
3141 user32_size_t usize
;
3142 error
= copyin((user_addr_t
)data_available
, &usize
, sizeof(usize
));
3143 resid
= (user_size_t
)usize
;
3156 kevent_put_data_size(
3158 uint64_t data_available
,
3164 if (data_available
) {
3165 if (flags
& KEVENT_FLAG_KERNEL
) {
3166 *(user_size_t
*)(uintptr_t)data_available
= resid
;
3167 } else if (IS_64BIT_PROCESS(p
)) {
3168 user64_size_t usize
= (user64_size_t
)resid
;
3169 error
= copyout(&usize
, (user_addr_t
)data_available
, sizeof(usize
));
3171 user32_size_t usize
= (user32_size_t
)resid
;
3172 error
= copyout(&usize
, (user_addr_t
)data_available
, sizeof(usize
));
3179 * kevent_continue - continue a kevent syscall after blocking
3181 * assume we inherit a use count on the kq fileglob.
3183 __attribute__((noreturn
))
3185 kevent_continue(__unused
struct kqueue
*kq
, void *data
, int error
)
3187 struct _kevent
*cont_args
;
3188 struct fileproc
*fp
;
3189 uint64_t data_available
;
3190 user_size_t data_size
;
3191 user_size_t data_resid
;
3196 struct proc
*p
= current_proc();
3198 cont_args
= (struct _kevent
*)data
;
3199 data_available
= cont_args
->data_available
;
3200 flags
= cont_args
->process_data
.fp_flags
;
3201 data_size
= cont_args
->process_data
.fp_data_size
;
3202 data_resid
= cont_args
->process_data
.fp_data_resid
;
3203 noutputs
= cont_args
->eventout
;
3204 retval
= cont_args
->retval
;
3208 kevent_put_kq(p
, fd
, fp
, kq
);
3210 /* don't abandon other output just because of residual copyout failures */
3211 if (error
== 0 && data_available
&& data_resid
!= data_size
) {
3212 (void)kevent_put_data_size(p
, data_available
, flags
, data_resid
);
3215 /* don't restart after signals... */
3216 if (error
== ERESTART
) {
3218 } else if (error
== EWOULDBLOCK
) {
3224 unix_syscall_return(error
);
3228 * kevent - [syscall] register and wait for kernel events
3232 kevent(struct proc
*p
, struct kevent_args
*uap
, int32_t *retval
)
3234 unsigned int flags
= KEVENT_FLAG_LEGACY32
;
3236 return kevent_internal(p
,
3237 (kqueue_id_t
)uap
->fd
, NULL
,
3238 uap
->changelist
, uap
->nchanges
,
3239 uap
->eventlist
, uap
->nevents
,
3248 kevent64(struct proc
*p
, struct kevent64_args
*uap
, int32_t *retval
)
3252 /* restrict to user flags and set legacy64 */
3253 flags
= uap
->flags
& KEVENT_FLAG_USER
;
3254 flags
|= KEVENT_FLAG_LEGACY64
;
3256 return kevent_internal(p
,
3257 (kqueue_id_t
)uap
->fd
, NULL
,
3258 uap
->changelist
, uap
->nchanges
,
3259 uap
->eventlist
, uap
->nevents
,
3268 kevent_qos(struct proc
*p
, struct kevent_qos_args
*uap
, int32_t *retval
)
3270 /* restrict to user flags */
3271 uap
->flags
&= KEVENT_FLAG_USER
;
3273 return kevent_internal(p
,
3274 (kqueue_id_t
)uap
->fd
, NULL
,
3275 uap
->changelist
, uap
->nchanges
,
3276 uap
->eventlist
, uap
->nevents
,
3277 uap
->data_out
, (uint64_t)uap
->data_available
,
3285 kevent_qos_internal(struct proc
*p
, int fd
,
3286 user_addr_t changelist
, int nchanges
,
3287 user_addr_t eventlist
, int nevents
,
3288 user_addr_t data_out
, user_size_t
*data_available
,
3292 return kevent_internal(p
,
3293 (kqueue_id_t
)fd
, NULL
,
3294 changelist
, nchanges
,
3296 data_out
, (uint64_t)data_available
,
3297 (flags
| KEVENT_FLAG_KERNEL
),
3304 kevent_id(struct proc
*p
, struct kevent_id_args
*uap
, int32_t *retval
)
3306 /* restrict to user flags */
3307 uap
->flags
&= KEVENT_FLAG_USER
;
3309 return kevent_internal(p
,
3310 (kqueue_id_t
)uap
->id
, NULL
,
3311 uap
->changelist
, uap
->nchanges
,
3312 uap
->eventlist
, uap
->nevents
,
3313 uap
->data_out
, (uint64_t)uap
->data_available
,
3314 (uap
->flags
| KEVENT_FLAG_DYNAMIC_KQUEUE
),
3321 kevent_id_internal(struct proc
*p
, kqueue_id_t
*id
,
3322 user_addr_t changelist
, int nchanges
,
3323 user_addr_t eventlist
, int nevents
,
3324 user_addr_t data_out
, user_size_t
*data_available
,
3328 return kevent_internal(p
,
3330 changelist
, nchanges
,
3332 data_out
, (uint64_t)data_available
,
3333 (flags
| KEVENT_FLAG_KERNEL
| KEVENT_FLAG_DYNAMIC_KQUEUE
),
3340 kevent_get_timeout(struct proc
*p
,
3341 user_addr_t utimeout
,
3343 struct timeval
*atvp
)
3348 if (flags
& KEVENT_FLAG_IMMEDIATE
) {
3349 getmicrouptime(&atv
);
3350 } else if (utimeout
!= USER_ADDR_NULL
) {
3352 if (flags
& KEVENT_FLAG_KERNEL
) {
3353 struct timespec
*tsp
= (struct timespec
*)utimeout
;
3354 TIMESPEC_TO_TIMEVAL(&rtv
, tsp
);
3355 } else if (IS_64BIT_PROCESS(p
)) {
3356 struct user64_timespec ts
;
3357 error
= copyin(utimeout
, &ts
, sizeof(ts
));
3358 if ((ts
.tv_sec
& 0xFFFFFFFF00000000ull
) != 0) {
3361 TIMESPEC_TO_TIMEVAL(&rtv
, &ts
);
3364 struct user32_timespec ts
;
3365 error
= copyin(utimeout
, &ts
, sizeof(ts
));
3366 TIMESPEC_TO_TIMEVAL(&rtv
, &ts
);
3371 if (itimerfix(&rtv
)) {
3374 getmicrouptime(&atv
);
3375 timevaladd(&atv
, &rtv
);
3377 /* wait forever value */
3386 kevent_set_kq_mode(struct kqueue
*kq
, unsigned int flags
)
3388 /* each kq should only be used for events of one type */
3390 if (kq
->kq_state
& (KQ_KEV32
| KQ_KEV64
| KQ_KEV_QOS
)) {
3391 if (flags
& KEVENT_FLAG_LEGACY32
) {
3392 if ((kq
->kq_state
& KQ_KEV32
) == 0) {
3396 } else if (kq
->kq_state
& KQ_KEV32
) {
3400 } else if (flags
& KEVENT_FLAG_LEGACY32
) {
3401 kq
->kq_state
|= KQ_KEV32
;
3402 } else if (flags
& KEVENT_FLAG_LEGACY64
) {
3403 kq
->kq_state
|= KQ_KEV64
;
3405 kq
->kq_state
|= KQ_KEV_QOS
;
3411 #define KQ_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
3412 #define CONFIG_KQ_HASHSIZE CONFIG_KN_HASHSIZE
3415 kqhash_lock(proc_t p
)
3417 lck_mtx_lock_spin_always(&p
->p_fd
->fd_kqhashlock
);
3421 kqhash_lock_held(__assert_only proc_t p
)
3423 LCK_MTX_ASSERT(&p
->p_fd
->fd_kqhashlock
, LCK_MTX_ASSERT_OWNED
);
3427 kqhash_unlock(proc_t p
)
3429 lck_mtx_unlock(&p
->p_fd
->fd_kqhashlock
);
3433 kqueue_hash_init_if_needed(proc_t p
)
3435 struct filedesc
*fdp
= p
->p_fd
;
3437 kqhash_lock_held(p
);
3439 if (__improbable(fdp
->fd_kqhash
== NULL
)) {
3440 struct kqlist
*alloc_hash
;
3444 alloc_hash
= hashinit(CONFIG_KQ_HASHSIZE
, M_KQUEUE
, &alloc_mask
);
3447 /* See if we won the race */
3448 if (fdp
->fd_kqhashmask
== 0) {
3449 fdp
->fd_kqhash
= alloc_hash
;
3450 fdp
->fd_kqhashmask
= alloc_mask
;
3453 FREE(alloc_hash
, M_KQUEUE
);
3460 * Called with the kqhash_lock() held
3468 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3469 struct filedesc
*fdp
= p
->p_fd
;
3470 struct kqlist
*list
;
3472 /* should hold the kq hash lock */
3473 kqhash_lock_held(p
);
3475 if ((kq
->kq_state
& KQ_DYNAMIC
) == 0) {
3476 assert(kq
->kq_state
& KQ_DYNAMIC
);
3480 /* only dynamically allocate workloop kqs for now */
3481 assert(kq
->kq_state
& KQ_WORKLOOP
);
3482 assert(fdp
->fd_kqhash
);
3484 kqwl
->kqwl_dynamicid
= id
;
3486 list
= &fdp
->fd_kqhash
[KQ_HASH(id
, fdp
->fd_kqhashmask
)];
3487 SLIST_INSERT_HEAD(list
, kqwl
, kqwl_hashlink
);
3490 /* Called with kqhash_lock held */
3496 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3497 struct filedesc
*fdp
= p
->p_fd
;
3498 struct kqlist
*list
;
3500 /* should hold the kq hash lock */
3501 kqhash_lock_held(p
);
3503 if ((kq
->kq_state
& KQ_DYNAMIC
) == 0) {
3504 assert(kq
->kq_state
& KQ_DYNAMIC
);
3507 assert(kq
->kq_state
& KQ_WORKLOOP
); /* for now */
3508 list
= &fdp
->fd_kqhash
[KQ_HASH(kqwl
->kqwl_dynamicid
, fdp
->fd_kqhashmask
)];
3509 SLIST_REMOVE(list
, kqwl
, kqworkloop
, kqwl_hashlink
);
3512 /* Called with kqhash_lock held */
3513 static struct kqueue
*
3514 kqueue_hash_lookup(struct proc
*p
, kqueue_id_t id
)
3516 struct filedesc
*fdp
= p
->p_fd
;
3517 struct kqlist
*list
;
3518 struct kqworkloop
*kqwl
;
3520 /* should hold the kq hash lock */
3521 kqhash_lock_held(p
);
3523 if (fdp
->fd_kqhashmask
== 0) {
3527 list
= &fdp
->fd_kqhash
[KQ_HASH(id
, fdp
->fd_kqhashmask
)];
3528 SLIST_FOREACH(kqwl
, list
, kqwl_hashlink
) {
3529 if (kqwl
->kqwl_dynamicid
== id
) {
3530 struct kqueue
*kq
= (struct kqueue
*)kqwl
;
3532 assert(kq
->kq_state
& KQ_DYNAMIC
);
3533 assert(kq
->kq_state
& KQ_WORKLOOP
); /* for now */
3541 kqueue_release_last(struct proc
*p
, kqueue_t kqu
)
3543 struct kqueue
*kq
= kqu
.kq
;
3544 if (kq
->kq_state
& KQ_DYNAMIC
) {
3546 if (kqueue_release(kq
, KQUEUE_MIGHT_BE_LAST_REF
)) {
3547 thread_t cur_owner
= kqworkloop_invalidate(kqu
.kqwl
);
3548 kqueue_hash_remove(p
, kq
);
3551 thread_deallocate(cur_owner
);
3561 * kqworkloops_dealloc - rebalance retains on kqworkloops created with
3562 * scheduling parameters
3564 * Called with proc_fdlock held.
3565 * Returns with it locked.
3566 * Process is in such a state that it will not try to allocate
3567 * any more knotes during this process (stopped for exit or exec).
3570 kqworkloops_dealloc(proc_t p
)
3572 struct filedesc
*fdp
= p
->p_fd
;
3573 struct kqlist
*list
;
3574 struct kqworkloop
*kqwl
, *kqwln
;
3575 struct kqlist tofree
;
3578 if (!(fdp
->fd_flags
& FD_WORKLOOP
)) {
3582 SLIST_INIT(&tofree
);
3585 assert(fdp
->fd_kqhashmask
!= 0);
3587 for (i
= 0; i
<= (int)fdp
->fd_kqhashmask
; i
++) {
3588 list
= &fdp
->fd_kqhash
[i
];
3589 SLIST_FOREACH_SAFE(kqwl
, list
, kqwl_hashlink
, kqwln
) {
3591 * kqworkloops that have scheduling parameters have an
3592 * implicit retain from kqueue_workloop_ctl that needs
3593 * to be balanced on process exit.
3595 assert(kqwl
->kqwl_params
);
3596 SLIST_REMOVE(list
, kqwl
, kqworkloop
, kqwl_hashlink
);
3597 SLIST_INSERT_HEAD(&tofree
, kqwl
, kqwl_hashlink
);
3603 SLIST_FOREACH_SAFE(kqwl
, &tofree
, kqwl_hashlink
, kqwln
) {
3604 struct kqueue
*kq
= (struct kqueue
*)kqwl
;
3605 __assert_only
bool released
;
3606 released
= kqueue_release(kq
, KQUEUE_MIGHT_BE_LAST_REF
);
3612 static struct kqueue
*
3613 kevent_get_bound_kqworkloop(thread_t thread
)
3615 struct uthread
*ut
= get_bsdthread_info(thread
);
3616 struct kqrequest
*kqr
= ut
->uu_kqr_bound
;
3618 return kqr
? (struct kqueue
*)kqr_kqworkloop(kqr
) : NULL
;
3622 kevent_get_kq(struct proc
*p
, kqueue_id_t id
, workq_threadreq_param_t
*trp
,
3623 unsigned int flags
, struct fileproc
**fpp
, int *fdp
,
3624 struct kqueue
**kqp
)
3626 struct filedesc
*descp
= p
->p_fd
;
3627 struct fileproc
*fp
= NULL
;
3628 struct kqueue
*kq
= NULL
;
3631 thread_t th
= current_thread();
3633 assert(!trp
|| (flags
& KEVENT_FLAG_WORKLOOP
));
3635 /* Was the workloop flag passed? Then it is for sure only a workloop */
3636 if (flags
& KEVENT_FLAG_DYNAMIC_KQUEUE
) {
3637 assert(flags
& KEVENT_FLAG_WORKLOOP
);
3638 assert(!trp
|| (flags
& KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST
));
3639 kq
= kevent_get_bound_kqworkloop(th
);
3642 * when kevent_id_internal is called from within the
3643 * kernel, and the passed 'id' value is '-1' then we
3644 * look for the currently bound workloop kq.
3646 if (id
== (kqueue_id_t
)-1 &&
3647 (flags
& KEVENT_FLAG_KERNEL
) &&
3648 (flags
& KEVENT_FLAG_WORKLOOP
)) {
3649 if (!is_workqueue_thread(th
) || !kq
) {
3657 if (id
== 0 || id
== (kqueue_id_t
)-1) {
3661 /* try shortcut on kq lookup for bound threads */
3662 if (kq
!= NULL
&& ((struct kqworkloop
*)kq
)->kqwl_dynamicid
== id
) {
3663 if (flags
& KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST
) {
3667 /* retain a reference while working with this kq. */
3668 assert(kq
->kq_state
& KQ_DYNAMIC
);
3673 /* look for the kq on the hash table */
3675 kq
= kqueue_hash_lookup(p
, id
);
3679 if (flags
& KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST
) {
3683 struct kqueue
*alloc_kq
;
3684 alloc_kq
= kqueue_alloc(p
, flags
);
3690 kqueue_hash_init_if_needed(p
);
3691 kq
= kqueue_hash_lookup(p
, id
);
3693 /* insert our new one */
3696 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3697 kqwl
->kqwl_params
= trp
->trp_value
;
3699 kqueue_hash_insert(p
, id
, kq
);
3701 } else if (flags
& KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST
) {
3702 /* lost race and caller wants an error */
3704 kqueue_release(alloc_kq
, KQUEUE_MIGHT_BE_LAST_REF
);
3705 kqueue_dealloc(alloc_kq
);
3708 /* lost race, retain existing workloop */
3711 kqueue_release(alloc_kq
, KQUEUE_MIGHT_BE_LAST_REF
);
3712 kqueue_dealloc(alloc_kq
);
3715 if (flags
& KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST
) {
3720 /* retain a reference while working with this kq. */
3721 assert(kq
->kq_state
& KQ_DYNAMIC
);
3725 } else if (flags
& KEVENT_FLAG_WORKQ
) {
3726 /* must already exist for bound threads. */
3727 if (flags
& KEVENT_FLAG_KERNEL
) {
3728 assert(descp
->fd_wqkqueue
!= NULL
);
3732 * use the private kq associated with the proc workq.
3733 * Just being a thread within the process (and not
3734 * being the exit/exec thread) is enough to hold a
3735 * reference on this special kq.
3737 kq
= descp
->fd_wqkqueue
;
3739 struct kqueue
*alloc_kq
= kqueue_alloc(p
, KEVENT_FLAG_WORKQ
);
3740 if (alloc_kq
== NULL
) {
3745 if (descp
->fd_wqkqueue
== NULL
) {
3746 kq
= descp
->fd_wqkqueue
= alloc_kq
;
3750 kq
= descp
->fd_wqkqueue
;
3751 kqueue_dealloc(alloc_kq
);
3755 /* get a usecount for the kq itself */
3757 if ((error
= fp_getfkq(p
, fd
, &fp
, &kq
)) != 0) {
3761 if ((error
= kevent_set_kq_mode(kq
, flags
)) != 0) {
3762 /* drop the usecount */
3764 fp_drop(p
, fd
, fp
, 0);
3781 struct fileproc
*fp
,
3784 kqueue_release_last(p
, kq
);
3786 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
3787 fp_drop(p
, (int)id
, fp
, 0);
3792 kevent_workloop_serial_no_copyin(proc_t p
, uint64_t workloop_id
)
3794 uint64_t serial_no
= 0;
3798 if (workloop_id
== 0 || p
->p_dispatchqueue_serialno_offset
== 0) {
3801 addr
= (user_addr_t
)(workloop_id
+ p
->p_dispatchqueue_serialno_offset
);
3803 if (proc_is64bit(p
)) {
3804 rc
= copyin(addr
, (caddr_t
)&serial_no
, sizeof(serial_no
));
3806 uint32_t serial_no32
= 0;
3807 rc
= copyin(addr
, (caddr_t
)&serial_no32
, sizeof(serial_no32
));
3808 serial_no
= serial_no32
;
3810 return rc
== 0 ? serial_no
: 0;
3814 kevent_exit_on_workloop_ownership_leak(thread_t thread
)
3816 proc_t p
= current_proc();
3817 struct filedesc
*fdp
= p
->p_fd
;
3818 kqueue_id_t workloop_id
= 0;
3819 os_reason_t reason
= OS_REASON_NULL
;
3820 mach_vm_address_t addr
;
3821 uint32_t reason_size
;
3824 if (fdp
->fd_kqhashmask
> 0) {
3825 for (uint32_t i
= 0; i
< fdp
->fd_kqhashmask
+ 1; i
++) {
3826 struct kqworkloop
*kqwl
;
3828 SLIST_FOREACH(kqwl
, &fdp
->fd_kqhash
[i
], kqwl_hashlink
) {
3829 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
3830 if ((kq
->kq_state
& KQ_DYNAMIC
) && kqwl
->kqwl_owner
== thread
) {
3831 workloop_id
= kqwl
->kqwl_dynamicid
;
3839 reason
= os_reason_create(OS_REASON_LIBSYSTEM
,
3840 OS_REASON_LIBSYSTEM_CODE_WORKLOOP_OWNERSHIP_LEAK
);
3841 if (reason
== OS_REASON_NULL
) {
3845 reason
->osr_flags
|= OS_REASON_FLAG_GENERATE_CRASH_REPORT
;
3846 reason_size
= 2 * sizeof(uint64_t);
3847 reason_size
= kcdata_estimate_required_buffer_size(2, reason_size
);
3848 if (os_reason_alloc_buffer(reason
, reason_size
) != 0) {
3853 struct kcdata_descriptor
*kcd
= &reason
->osr_kcd_descriptor
;
3855 if (kcdata_get_memory_addr(kcd
, EXIT_REASON_WORKLOOP_ID
,
3856 sizeof(workloop_id
), &addr
) == KERN_SUCCESS
) {
3857 kcdata_memcpy(kcd
, addr
, &workloop_id
, sizeof(workloop_id
));
3860 uint64_t serial_no
= kevent_workloop_serial_no_copyin(p
, workloop_id
);
3861 if (serial_no
&& kcdata_get_memory_addr(kcd
, EXIT_REASON_DISPATCH_QUEUE_NO
,
3862 sizeof(serial_no
), &addr
) == KERN_SUCCESS
) {
3863 kcdata_memcpy(kcd
, addr
, &serial_no
, sizeof(serial_no
));
3867 #if DEVELOPMENT || DEBUG
3868 if (kevent_debug_flags() & KEVENT_PANIC_ON_WORKLOOP_OWNERSHIP_LEAK
) {
3869 panic("thread %p in task %p is leaked workloop 0x%016llx ownership",
3870 thread
, p
->task
, workloop_id
);
3872 psignal_try_thread_with_reason(p
, thread
, SIGABRT
, reason
);
3875 return exit_with_reason(p
, W_EXITCODE(0, SIGKILL
), (int *)NULL
,
3876 FALSE
, FALSE
, 0, reason
);
3880 static inline boolean_t
3881 kevent_args_requesting_events(unsigned int flags
, int nevents
)
3883 return !(flags
& KEVENT_FLAG_ERROR_EVENTS
) && nevents
> 0;
3887 kevent_internal(struct proc
*p
,
3888 kqueue_id_t id
, kqueue_id_t
*id_out
,
3889 user_addr_t changelist
, int nchanges
,
3890 user_addr_t ueventlist
, int nevents
,
3891 user_addr_t data_out
, uint64_t data_available
,
3893 user_addr_t utimeout
,
3894 kqueue_continue_t continuation
,
3899 struct fileproc
*fp
= NULL
;
3901 struct kevent_internal_s kev
;
3902 int error
, noutputs
, register_rc
;
3903 bool needs_end_processing
= false;
3905 user_size_t data_size
;
3906 user_size_t data_resid
;
3907 thread_t thread
= current_thread();
3908 KNOTE_LOCK_CTX(knlc
);
3910 /* Don't allow user-space threads to process output events from the workq kqs */
3911 if (((flags
& (KEVENT_FLAG_WORKQ
| KEVENT_FLAG_KERNEL
)) == KEVENT_FLAG_WORKQ
) &&
3912 kevent_args_requesting_events(flags
, nevents
)) {
3916 if (flags
& KEVENT_FLAG_PARKING
) {
3917 if (!kevent_args_requesting_events(flags
, nevents
) || id
!= (kqueue_id_t
)-1) {
3922 /* restrict dynamic kqueue allocation to workloops (for now) */
3923 if ((flags
& (KEVENT_FLAG_DYNAMIC_KQUEUE
| KEVENT_FLAG_WORKLOOP
)) == KEVENT_FLAG_DYNAMIC_KQUEUE
) {
3927 if ((flags
& (KEVENT_FLAG_WORKLOOP
)) && (flags
& (KEVENT_FLAG_WORKQ
))) {
3931 if (flags
& (KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST
| KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST
)) {
3932 /* allowed only on workloops when calling kevent_id from user-space */
3933 if (!(flags
& KEVENT_FLAG_WORKLOOP
) || (flags
& KEVENT_FLAG_KERNEL
) || !(flags
& KEVENT_FLAG_DYNAMIC_KQUEUE
)) {
3938 /* prepare to deal with stack-wise allocation of out events */
3939 if (flags
& KEVENT_FLAG_STACK_EVENTS
) {
3940 int scale
= ((flags
& KEVENT_FLAG_LEGACY32
) ?
3941 (IS_64BIT_PROCESS(p
) ? sizeof(struct user64_kevent
) :
3942 sizeof(struct user32_kevent
)) :
3943 ((flags
& KEVENT_FLAG_LEGACY64
) ? sizeof(struct kevent64_s
) :
3944 sizeof(struct kevent_qos_s
)));
3945 ueventlist
+= nevents
* scale
;
3948 /* convert timeout to absolute - if we have one (and not immediate) */
3949 error
= kevent_get_timeout(p
, utimeout
, flags
, &atv
);
3954 /* copyin initial value of data residual from data_available */
3955 error
= kevent_get_data_size(p
, data_available
, flags
, &data_size
);
3960 /* get the kq we are going to be working on */
3961 error
= kevent_get_kq(p
, id
, NULL
, flags
, &fp
, &fd
, &kq
);
3962 #if CONFIG_WORKLOOP_DEBUG
3963 ut
= (uthread_t
)get_bsdthread_info(thread
);
3964 UU_KEVENT_HISTORY_WRITE_ENTRY(ut
, {
3966 .uu_kq
= error
? NULL
: kq
,
3968 .uu_nchanges
= nchanges
,
3969 .uu_nevents
= nevents
,
3972 #endif // CONFIG_WORKLOOP_DEBUG
3977 /* only bound threads can receive events on workloops */
3978 if (flags
& KEVENT_FLAG_WORKLOOP
) {
3979 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3980 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
3982 assert(kq
->kq_state
& KQ_WORKLOOP
);
3984 if (kevent_args_requesting_events(flags
, nevents
)) {
3985 if (kq
!= kevent_get_bound_kqworkloop(thread
)) {
3992 * Disable the R2K notification while doing a register, if the
3993 * caller wants events too, we don't want the AST to be set if we
3994 * will process these events soon.
3996 kqr
->kqr_state
&= ~KQR_R2K_NOTIF_ARMED
;
3997 needs_end_processing
= true;
4002 *id_out
= kqwl
->kqwl_dynamicid
;
4006 /* register all the change requests the user provided... */
4008 while (nchanges
> 0 && error
== 0) {
4009 error
= kevent_copyin(&changelist
, &kev
, p
, flags
);
4014 /* Make sure user doesn't pass in any system flags */
4015 kev
.flags
&= ~EV_SYSFLAGS
;
4017 register_rc
= kevent_register(kq
, &kev
, &knlc
);
4018 if (register_rc
& FILTER_REGISTER_WAIT
) {
4021 // f_post_register_wait is meant to call a continuation and not to
4022 // return, which is why we don't support FILTER_REGISTER_WAIT if
4023 // KEVENT_FLAG_ERROR_EVENTS is not passed, or if the event that
4024 // waits isn't the last.
4026 // It is implementable, but not used by any userspace code at the
4027 // moment, so for now return ENOTSUP if someone tries to do it.
4028 if (nchanges
== 1 && nevents
>= 1 && (flags
& KEVENT_FLAG_ERROR_EVENTS
)) {
4029 struct _kevent_register
*cont_args
;
4030 /* store the continuation/completion data in the uthread */
4031 ut
= (uthread_t
)get_bsdthread_info(thread
);
4032 cont_args
= &ut
->uu_save
.uus_kevent_register
;
4033 cont_args
->kev
= kev
;
4037 cont_args
->ueventlist
= ueventlist
;
4038 cont_args
->flags
= flags
;
4039 cont_args
->retval
= retval
;
4040 cont_args
->eventcount
= nevents
;
4041 cont_args
->eventout
= noutputs
;
4042 knote_fops(cont_args
->knote
)->f_post_register_wait(ut
, &knlc
, cont_args
);
4043 panic("f_post_register_wait returned (kev: %p)", &kev
);
4046 kev
.flags
|= EV_ERROR
;
4048 knote_unlock(kq
, knlc
.knlc_knote
, &knlc
, KNOTE_KQ_UNLOCK
);
4051 // keep in sync with kevent_register_wait_return()
4052 if (nevents
> 0 && (kev
.flags
& (EV_ERROR
| EV_RECEIPT
))) {
4053 if ((kev
.flags
& EV_ERROR
) == 0) {
4054 kev
.flags
|= EV_ERROR
;
4057 error
= kevent_copyout(&kev
, &ueventlist
, p
, flags
);
4062 } else if (kev
.flags
& EV_ERROR
) {
4068 /* short-circuit the scan if we only want error events */
4069 if (flags
& KEVENT_FLAG_ERROR_EVENTS
) {
4073 /* process pending events */
4074 if (nevents
> 0 && noutputs
== 0 && error
== 0) {
4075 struct _kevent
*cont_args
;
4076 /* store the continuation/completion data in the uthread */
4077 ut
= (uthread_t
)get_bsdthread_info(thread
);
4078 cont_args
= &ut
->uu_save
.uus_kevent
;
4081 cont_args
->retval
= retval
;
4082 cont_args
->eventlist
= ueventlist
;
4083 cont_args
->eventcount
= nevents
;
4084 cont_args
->eventout
= noutputs
;
4085 cont_args
->data_available
= data_available
;
4086 cont_args
->process_data
.fp_fd
= (int)id
;
4087 cont_args
->process_data
.fp_flags
= flags
;
4088 cont_args
->process_data
.fp_data_out
= data_out
;
4089 cont_args
->process_data
.fp_data_size
= data_size
;
4090 cont_args
->process_data
.fp_data_resid
= data_size
;
4093 * kqworkloop_end_processing() will happen at the end of kqueue_scan()
4095 needs_end_processing
= false;
4097 error
= kqueue_scan(kq
, kevent_callback
,
4098 continuation
, cont_args
,
4099 &cont_args
->process_data
,
4102 /* process remaining outputs */
4103 noutputs
= cont_args
->eventout
;
4104 data_resid
= cont_args
->process_data
.fp_data_resid
;
4106 /* copyout residual data size value (if it needs to be copied out) */
4107 /* don't abandon other output just because of residual copyout failures */
4108 if (error
== 0 && data_available
&& data_resid
!= data_size
) {
4109 (void)kevent_put_data_size(p
, data_available
, flags
, data_resid
);
4114 if (__improbable(needs_end_processing
)) {
4116 * If we didn't through kqworkloop_end_processing(),
4117 * we need to do it here.
4120 kqworkloop_end_processing((struct kqworkloop
*)kq
, 0, 0);
4123 kevent_put_kq(p
, id
, fp
, kq
);
4125 /* don't restart after signals... */
4126 if (error
== ERESTART
) {
4128 } else if (error
== EWOULDBLOCK
) {
4139 * kevent_callback - callback for each individual event
4141 * called with nothing locked
4142 * caller holds a reference on the kqueue
4145 kevent_callback(__unused
struct kqueue
*kq
, struct kevent_internal_s
*kevp
,
4148 struct _kevent
*cont_args
;
4151 cont_args
= (struct _kevent
*)data
;
4152 assert(cont_args
->eventout
< cont_args
->eventcount
);
4155 * Copy out the appropriate amount of event data for this user.
4157 error
= kevent_copyout(kevp
, &cont_args
->eventlist
, current_proc(),
4158 cont_args
->process_data
.fp_flags
);
4161 * If there isn't space for additional events, return
4162 * a harmless error to stop the processing here
4164 if (error
== 0 && ++cont_args
->eventout
== cont_args
->eventcount
) {
4165 error
= EWOULDBLOCK
;
4171 * kevent_description - format a description of a kevent for diagnostic output
4173 * called with a 256-byte string buffer
4177 kevent_description(struct kevent_internal_s
*kevp
, char *s
, size_t n
)
4181 "{.ident=%#llx, .filter=%d, .flags=%#x, .udata=%#llx, .fflags=%#x, .data=%#llx, .ext[0]=%#llx, .ext[1]=%#llx}",
4195 kevent_register_validate_priority(struct kqueue
*kq
, struct knote
*kn
,
4196 struct kevent_internal_s
*kev
)
4198 /* We don't care about the priority of a disabled or deleted knote */
4199 if (kev
->flags
& (EV_DISABLE
| EV_DELETE
)) {
4203 if (kq
->kq_state
& KQ_WORKLOOP
) {
4205 * Workloops need valid priorities with a QOS (excluding manager) for
4206 * any enabled knote.
4208 * When it is pre-existing, just make sure it has a valid QoS as
4209 * kevent_register() will not use the incoming priority (filters who do
4210 * have the responsibility to validate it again, see filt_wltouch).
4212 * If the knote is being made, validate the incoming priority.
4214 if (!_pthread_priority_thread_qos(kn
? kn
->kn_qos
: kev
->qos
)) {
4223 * Prepare a filter for waiting after register.
4225 * The f_post_register_wait hook will be called later by kevent_register()
4226 * and should call kevent_register_wait_block()
4229 kevent_register_wait_prepare(struct knote
*kn
, struct kevent_internal_s
*kev
)
4231 thread_t thread
= current_thread();
4232 struct uthread
*uth
= get_bsdthread_info(thread
);
4234 assert(knote_fops(kn
)->f_extended_codes
);
4236 if (kn
->kn_hook
== NULL
) {
4237 thread_reference(thread
);
4238 kn
->kn_hook
= thread
;
4239 } else if (kn
->kn_hook
!= thread
) {
4241 * kn_hook may be set from a previous aborted wait
4242 * However, it has to be from the same thread.
4244 kev
->flags
|= EV_ERROR
;
4249 uth
->uu_save
.uus_kevent_register
.knote
= kn
;
4250 return FILTER_REGISTER_WAIT
;
4254 * Cleanup a kevent_register_wait_prepare() effect for threads that have been
4255 * aborted instead of properly woken up with thread_wakeup_thread().
4258 kevent_register_wait_cleanup(struct knote
*kn
)
4260 thread_t thread
= kn
->kn_hook
;
4262 thread_deallocate(thread
);
4266 * Must be called at the end of a f_post_register_wait call from a filter.
4269 kevent_register_wait_block(struct turnstile
*ts
, thread_t thread
,
4270 struct knote_lock_ctx
*knlc
, thread_continue_t cont
,
4271 struct _kevent_register
*cont_args
)
4273 knote_unlock(cont_args
->kq
, cont_args
->knote
, knlc
, KNOTE_KQ_UNLOCK
);
4274 turnstile_update_inheritor_complete(ts
, TURNSTILE_INTERLOCK_NOT_HELD
);
4275 cont_args
->handoff_thread
= thread
;
4276 thread_handoff_parameter(thread
, cont
, cont_args
);
4280 * Called by Filters using a f_post_register_wait to return from their wait.
4283 kevent_register_wait_return(struct _kevent_register
*cont_args
)
4285 struct kqueue
*kq
= cont_args
->kq
;
4286 proc_t p
= kq
->kq_p
;
4287 struct kevent_internal_s
*kev
= &cont_args
->kev
;
4290 if (cont_args
->handoff_thread
) {
4291 thread_deallocate(cont_args
->handoff_thread
);
4294 if (kev
->flags
& (EV_ERROR
| EV_RECEIPT
)) {
4295 if ((kev
->flags
& EV_ERROR
) == 0) {
4296 kev
->flags
|= EV_ERROR
;
4299 error
= kevent_copyout(kev
, &cont_args
->ueventlist
, p
, cont_args
->flags
);
4301 cont_args
->eventout
++;
4305 kevent_put_kq(p
, cont_args
->fd
, cont_args
->fp
, kq
);
4307 *cont_args
->retval
= cont_args
->eventout
;
4309 unix_syscall_return(error
);
4313 * kevent_register - add a new event to a kqueue
4315 * Creates a mapping between the event source and
4316 * the kqueue via a knote data structure.
4318 * Because many/most the event sources are file
4319 * descriptor related, the knote is linked off
4320 * the filedescriptor table for quick access.
4322 * called with nothing locked
4323 * caller holds a reference on the kqueue
4327 kevent_register(struct kqueue
*kq
, struct kevent_internal_s
*kev
,
4328 struct knote_lock_ctx
*knlc
)
4330 struct proc
*p
= kq
->kq_p
;
4331 const struct filterops
*fops
;
4332 struct knote
*kn
= NULL
;
4333 int result
= 0, error
= 0;
4334 unsigned short kev_flags
= kev
->flags
;
4336 if (kev
->filter
< 0) {
4337 if (kev
->filter
+ EVFILT_SYSCOUNT
< 0) {
4341 fops
= sysfilt_ops
[~kev
->filter
]; /* to 0-base index */
4347 /* restrict EV_VANISHED to adding udata-specific dispatch kevents */
4348 if ((kev
->flags
& EV_VANISHED
) &&
4349 (kev
->flags
& (EV_ADD
| EV_DISPATCH2
)) != (EV_ADD
| EV_DISPATCH2
)) {
4354 /* Simplify the flags - delete and disable overrule */
4355 if (kev
->flags
& EV_DELETE
) {
4356 kev
->flags
&= ~EV_ADD
;
4358 if (kev
->flags
& EV_DISABLE
) {
4359 kev
->flags
&= ~EV_ENABLE
;
4362 if (kq
->kq_state
& KQ_WORKLOOP
) {
4363 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_REGISTER
),
4364 ((struct kqworkloop
*)kq
)->kqwl_dynamicid
,
4365 kev
->udata
, kev
->flags
, kev
->filter
);
4366 } else if (kq
->kq_state
& KQ_WORKQ
) {
4367 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_REGISTER
),
4368 0, kev
->udata
, kev
->flags
, kev
->filter
);
4370 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_REGISTER
),
4371 VM_KERNEL_UNSLIDE_OR_PERM(kq
),
4372 kev
->udata
, kev
->flags
, kev
->filter
);
4376 /* find the matching knote from the fd tables/hashes */
4377 kn
= kq_find_knote_and_kq_lock(kq
, kev
, fops
->f_isfd
, p
);
4378 error
= kevent_register_validate_priority(kq
, kn
, kev
);
4384 if (kn
== NULL
&& (kev
->flags
& EV_ADD
) == 0) {
4386 * No knote found, EV_ADD wasn't specified
4389 if ((kev_flags
& EV_ADD
) && (kev_flags
& EV_DELETE
) &&
4390 (kq
->kq_state
& KQ_WORKLOOP
)) {
4392 * For workloops, understand EV_ADD|EV_DELETE as a "soft" delete
4393 * that doesn't care about ENOENT, so just pretend the deletion
4400 } else if (kn
== NULL
) {
4402 * No knote found, need to attach a new one (attach)
4405 struct fileproc
*knote_fp
= NULL
;
4407 /* grab a file reference for the new knote */
4409 if ((error
= fp_lookup(p
, kev
->ident
, &knote_fp
, 0)) != 0) {
4417 if (knote_fp
!= NULL
) {
4418 fp_drop(p
, kev
->ident
, knote_fp
, 0);
4423 kn
->kn_fp
= knote_fp
;
4424 kn
->kn_kq_packed
= (intptr_t)(struct kqueue
*)kq
;
4425 kqueue_retain(kq
); /* retain a kq ref */
4426 kn
->kn_filtid
= ~kev
->filter
;
4427 kn
->kn_status
= KN_ATTACHING
| KN_ATTACHED
;
4429 /* was vanish support requested */
4430 if (kev
->flags
& EV_VANISHED
) {
4431 kev
->flags
&= ~EV_VANISHED
;
4432 kn
->kn_status
|= KN_REQVANISH
;
4435 /* snapshot matching/dispatching protcol flags into knote */
4436 if (kev
->flags
& EV_DISPATCH
) {
4437 kn
->kn_status
|= KN_DISPATCH
;
4439 if (kev
->flags
& EV_UDATA_SPECIFIC
) {
4440 kn
->kn_status
|= KN_UDATA_SPECIFIC
;
4442 if (kev
->flags
& EV_DISABLE
) {
4443 kn
->kn_status
|= KN_DISABLED
;
4447 * copy the kevent state into knote
4448 * protocol is that fflags and data
4449 * are saved off, and cleared before
4450 * calling the attach routine.
4452 kn
->kn_kevent
= *kev
;
4453 kn
->kn_sfflags
= kev
->fflags
;
4454 kn
->kn_sdata
= kev
->data
;
4457 knote_reset_priority(kn
, kev
->qos
);
4459 /* Add the knote for lookup thru the fd table */
4460 error
= kq_add_knote(kq
, kn
, knlc
, p
);
4462 (void)kqueue_release(kq
, KQUEUE_CANT_BE_LAST_REF
);
4464 if (knote_fp
!= NULL
) {
4465 fp_drop(p
, kev
->ident
, knote_fp
, 0);
4468 if (error
== ERESTART
) {
4474 /* fp reference count now applies to knote */
4477 * we can't use filter_call() because f_attach can change the filter ops
4478 * for a filter that supports f_extended_codes, so we need to reload
4479 * knote_fops() and not use `fops`.
4481 result
= fops
->f_attach(kn
, kev
);
4482 if (result
&& !knote_fops(kn
)->f_extended_codes
) {
4483 result
= FILTER_ACTIVE
;
4488 if (kn
->kn_flags
& EV_ERROR
) {
4490 * Failed to attach correctly, so drop.
4492 kn
->kn_status
&= ~(KN_ATTACHED
| KN_ATTACHING
);
4493 error
= kn
->kn_data
;
4494 knote_drop(kq
, kn
, knlc
);
4500 * end "attaching" phase - now just attached
4502 * Mark the thread request overcommit, if appropos
4504 * If the attach routine indicated that an
4505 * event is already fired, activate the knote.
4507 kn
->kn_status
&= ~KN_ATTACHING
;
4508 knote_set_qos_overcommit(kn
);
4510 if (result
& FILTER_ACTIVE
) {
4511 if (result
& FILTER_ADJUST_EVENT_QOS_BIT
) {
4512 knote_adjust_qos(kq
, kn
, result
);
4516 } else if (!knote_lock(kq
, kn
, knlc
, KNOTE_KQ_LOCK_ON_SUCCESS
)) {
4518 * The knote was dropped while we were waiting for the lock,
4519 * we need to re-evaluate entirely
4523 } else if (kev
->flags
& EV_DELETE
) {
4525 * Deletion of a knote (drop)
4527 * If the filter wants to filter drop events, let it do so.
4529 * defer-delete: when trying to delete a disabled EV_DISPATCH2 knote,
4530 * we must wait for the knote to be re-enabled (unless it is being
4531 * re-enabled atomically here).
4534 if (knote_fops(kn
)->f_allow_drop
) {
4538 drop
= knote_fops(kn
)->f_allow_drop(kn
, kev
);
4546 if ((kev
->flags
& EV_ENABLE
) == 0 &&
4547 (kn
->kn_status
& (KN_DISPATCH2
| KN_DISABLED
)) ==
4548 (KN_DISPATCH2
| KN_DISABLED
)) {
4549 kn
->kn_status
|= KN_DEFERDELETE
;
4550 error
= EINPROGRESS
;
4554 knote_drop(kq
, kn
, knlc
);
4558 * Regular update of a knote (touch)
4560 * Call touch routine to notify filter of changes in filter values
4561 * (and to re-determine if any events are fired).
4563 * If the knote is in defer-delete, avoid calling the filter touch
4564 * routine (it has delivered its last event already).
4566 * If the touch routine had no failure,
4567 * apply the requested side effects to the knote.
4570 if (kn
->kn_status
& (KN_DEFERDELETE
| KN_VANISHED
)) {
4571 if (kev
->flags
& EV_ENABLE
) {
4572 result
= FILTER_ACTIVE
;
4576 result
= filter_call(knote_fops(kn
), f_touch(kn
, kev
));
4580 if (kev
->flags
& EV_ERROR
) {
4583 /* accept new kevent state */
4584 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0) {
4585 kn
->kn_udata
= kev
->udata
;
4587 if (kev
->flags
& EV_DISABLE
) {
4590 if (result
& (FILTER_UPDATE_REQ_QOS
| FILTER_ADJUST_EVENT_QOS_BIT
)) {
4593 if ((result
& FILTER_UPDATE_REQ_QOS
) &&
4594 kev
->qos
&& kev
->qos
!= kn
->kn_qos
) {
4595 knote_reset_priority(kn
, kev
->qos
);
4597 if (result
& FILTER_ACTIVE
) {
4599 if (result
& FILTER_ADJUST_EVENT_QOS_BIT
) {
4600 if (knote_should_apply_qos_override(kq
, kn
, result
, &qos
)) {
4601 knote_apply_qos_override(kn
, qos
);
4606 if (result
& (FILTER_UPDATE_REQ_QOS
| FILTER_ADJUST_EVENT_QOS_BIT
)) {
4607 if (knote_enqueue(kn
) && (kn
->kn_status
& KN_ACTIVE
)) {
4611 if (kev
->flags
& EV_ENABLE
) {
4618 if ((result
& FILTER_REGISTER_WAIT
) == 0) {
4620 * When the filter asked for a post-register wait,
4621 * we leave the knote and kqueue locked for kevent_register()
4622 * to call the filter's f_post_register_wait hook.
4624 knote_unlock(kq
, kn
, knlc
, KNOTE_KQ_UNLOCK
);
4628 /* output local errors through the kevent */
4630 kev
->flags
|= EV_ERROR
;
4637 * knote_process - process a triggered event
4639 * Validate that it is really still a triggered event
4640 * by calling the filter routines (if necessary). Hold
4641 * a use reference on the knote to avoid it being detached.
4643 * If it is still considered triggered, we will have taken
4644 * a copy of the state under the filter lock. We use that
4645 * snapshot to dispatch the knote for future processing (or
4646 * not, if this was a lost event).
4648 * Our caller assures us that nobody else can be processing
4649 * events from this knote during the whole operation. But
4650 * others can be touching or posting events to the knote
4651 * interspersed with our processing it.
4653 * caller holds a reference on the kqueue.
4654 * kqueue locked on entry and exit - but may be dropped
4657 knote_process(struct knote
*kn
,
4658 kevent_callback_t callback
,
4659 void *callback_data
,
4660 struct filt_process_s
*process_data
)
4662 struct kevent_internal_s kev
;
4663 struct kqueue
*kq
= knote_get_kq(kn
);
4664 KNOTE_LOCK_CTX(knlc
);
4665 int result
= FILTER_ACTIVE
;
4669 bzero(&kev
, sizeof(kev
));
4672 * Must be active or stayactive
4673 * Must be queued and not disabled/suppressed
4675 assert(kn
->kn_status
& KN_QUEUED
);
4676 assert(kn
->kn_status
& (KN_ACTIVE
| KN_STAYACTIVE
));
4677 assert(!(kn
->kn_status
& (KN_DISABLED
| KN_SUPPRESSED
| KN_DROPPING
)));
4679 if (kq
->kq_state
& KQ_WORKLOOP
) {
4680 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS
),
4681 ((struct kqworkloop
*)kq
)->kqwl_dynamicid
,
4682 kn
->kn_udata
, kn
->kn_status
| (kn
->kn_id
<< 32),
4684 } else if (kq
->kq_state
& KQ_WORKQ
) {
4685 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS
),
4686 0, kn
->kn_udata
, kn
->kn_status
| (kn
->kn_id
<< 32),
4689 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS
),
4690 VM_KERNEL_UNSLIDE_OR_PERM(kq
), kn
->kn_udata
,
4691 kn
->kn_status
| (kn
->kn_id
<< 32), kn
->kn_filtid
);
4694 if ((kn
->kn_status
& KN_DROPPING
) ||
4695 !knote_lock(kq
, kn
, &knlc
, KNOTE_KQ_LOCK_ALWAYS
)) {
4697 * When the knote is dropping or has dropped,
4698 * then there's nothing we want to process.
4704 * For deferred-drop or vanished events, we just create a fake
4705 * event to acknowledge end-of-life. Otherwise, we call the
4706 * filter's process routine to snapshot the kevent state under
4707 * the filter's locking protocol.
4709 * suppress knotes to avoid returning the same event multiple times in
4714 if (kn
->kn_status
& (KN_DEFERDELETE
| KN_VANISHED
)) {
4715 /* create fake event */
4716 kev
.filter
= kn
->kn_filter
;
4717 kev
.ident
= kn
->kn_id
;
4718 kev
.flags
= (kn
->kn_status
& KN_DEFERDELETE
) ? EV_DELETE
: EV_VANISHED
;
4719 kev
.flags
|= (EV_DISPATCH2
| EV_ONESHOT
);
4720 kev
.udata
= kn
->kn_udata
;
4722 /* deactivate - so new activations indicate a wakeup */
4723 knote_deactivate(kn
);
4726 result
= filter_call(knote_fops(kn
), f_process(kn
, process_data
, &kev
));
4731 * Determine how to dispatch the knote for future event handling.
4732 * not-fired: just return (do not callout, leave deactivated).
4733 * One-shot: If dispatch2, enter deferred-delete mode (unless this is
4734 * is the deferred delete event delivery itself). Otherwise,
4736 * Dispatch: don't clear state, just mark it disabled.
4737 * Cleared: just leave it deactivated.
4738 * Others: re-activate as there may be more events to handle.
4739 * This will not wake up more handlers right now, but
4740 * at the completion of handling events it may trigger
4741 * more handler threads (TODO: optimize based on more than
4742 * just this one event being detected by the filter).
4744 if ((result
& FILTER_ACTIVE
) == 0) {
4745 if ((kn
->kn_status
& (KN_ACTIVE
| KN_STAYACTIVE
)) == 0) {
4747 * Stay active knotes should not be unsuppressed or we'd create an
4750 * Some knotes (like EVFILT_WORKLOOP) can be reactivated from
4751 * within f_process() but that doesn't necessarily make them
4752 * ready to process, so we should leave them be.
4754 * For other knotes, since we will not return an event,
4755 * there's no point keeping the knote suppressed.
4757 knote_unsuppress(kn
);
4759 knote_unlock(kq
, kn
, &knlc
, KNOTE_KQ_LOCK_ALWAYS
);
4763 if (result
& FILTER_ADJUST_EVENT_QOS_BIT
) {
4764 knote_adjust_qos(kq
, kn
, result
);
4766 kev
.qos
= _pthread_priority_combine(kn
->kn_qos
, kn
->kn_qos_override
);
4768 if (kev
.flags
& EV_ONESHOT
) {
4769 if ((kn
->kn_status
& (KN_DISPATCH2
| KN_DEFERDELETE
)) == KN_DISPATCH2
) {
4770 /* defer dropping non-delete oneshot dispatch2 events */
4771 kn
->kn_status
|= KN_DEFERDELETE
;
4776 } else if (kn
->kn_status
& KN_DISPATCH
) {
4777 /* disable all dispatch knotes */
4779 } else if ((kev
.flags
& EV_CLEAR
) == 0) {
4780 /* re-activate in case there are more events */
4785 * callback to handle each event as we find it.
4786 * If we have to detach and drop the knote, do
4787 * it while we have the kq unlocked.
4790 knote_drop(kq
, kn
, &knlc
);
4792 knote_unlock(kq
, kn
, &knlc
, KNOTE_KQ_UNLOCK
);
4795 if (kev
.flags
& EV_VANISHED
) {
4796 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KNOTE_VANISHED
),
4797 kev
.ident
, kn
->kn_udata
, kn
->kn_status
| (kn
->kn_id
<< 32),
4801 error
= (callback
)(kq
, &kev
, callback_data
);
4807 * Returns -1 if the kqueue was unbound and processing should not happen
4809 #define KQWQAE_BEGIN_PROCESSING 1
4810 #define KQWQAE_END_PROCESSING 2
4811 #define KQWQAE_UNBIND 3
4813 kqworkq_acknowledge_events(struct kqworkq
*kqwq
, struct kqrequest
*kqr
,
4814 int kevent_flags
, int kqwqae_op
)
4816 thread_qos_t old_override
= THREAD_QOS_UNSPECIFIED
;
4817 thread_t thread
= kqr
->kqr_thread
;
4820 bool seen_stayactive
= false, unbind
;
4822 kqlock_held(&kqwq
->kqwq_kqueue
);
4824 if (!TAILQ_EMPTY(&kqr
->kqr_suppressed
)) {
4826 * Return suppressed knotes to their original state.
4827 * For workq kqueues, suppressed ones that are still
4828 * truly active (not just forced into the queue) will
4829 * set flags we check below to see if anything got
4832 while ((kn
= TAILQ_FIRST(&kqr
->kqr_suppressed
)) != NULL
) {
4833 assert(kn
->kn_status
& KN_SUPPRESSED
);
4834 knote_unsuppress(kn
);
4835 if (kn
->kn_status
& KN_STAYACTIVE
) {
4836 seen_stayactive
= true;
4843 #if DEBUG || DEVELOPMENT
4844 thread_t self
= current_thread();
4845 struct uthread
*ut
= get_bsdthread_info(self
);
4847 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
4848 assert(kqr
->kqr_thread
== self
);
4849 assert(ut
->uu_kqr_bound
== kqr
);
4850 #endif // DEBUG || DEVELOPMENT
4852 if (kqwqae_op
== KQWQAE_UNBIND
) {
4854 } else if ((kevent_flags
& KEVENT_FLAG_PARKING
) == 0) {
4856 } else if (kqwqae_op
== KQWQAE_BEGIN_PROCESSING
&& seen_stayactive
) {
4858 * When we unsuppress stayactive knotes, for the kind that are hooked
4859 * through select, we need to process once before we can assert there's
4860 * no event pending. Hence we can't unbind during BEGIN PROCESSING.
4864 unbind
= ((kqr
->kqr_state
& KQR_WAKEUP
) == 0);
4867 old_override
= kqworkq_unbind_locked(kqwq
, kqr
, thread
);
4870 * request a new thread if we didn't process the whole queue or real events
4871 * have happened (not just putting stay-active events back).
4873 if (kqr
->kqr_state
& KQR_WAKEUP
) {
4874 kqueue_threadreq_initiate(&kqwq
->kqwq_kqueue
, kqr
,
4875 kqr
->kqr_qos_index
, 0);
4881 * Reset wakeup bit to notice events firing while we are processing,
4882 * as we cannot rely on the bucket queue emptiness because of stay
4885 kqr
->kqr_state
&= ~KQR_WAKEUP
;
4888 kq_req_unlock(kqwq
);
4891 thread_drop_ipc_override(thread
);
4898 * Return 0 to indicate that processing should proceed,
4899 * -1 if there is nothing to process.
4901 * Called with kqueue locked and returns the same way,
4902 * but may drop lock temporarily.
4905 kqworkq_begin_processing(struct kqworkq
*kqwq
, struct kqrequest
*kqr
,
4910 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN
) | DBG_FUNC_START
,
4911 0, kqr
->kqr_qos_index
);
4913 rc
= kqworkq_acknowledge_events(kqwq
, kqr
, kevent_flags
,
4914 KQWQAE_BEGIN_PROCESSING
);
4916 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
4917 thread_tid(kqr
->kqr_thread
), kqr
->kqr_state
);
4923 kqworkloop_is_processing_on_current_thread(struct kqworkloop
*kqwl
)
4925 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
4929 if (kq
->kq_state
& KQ_PROCESSING
) {
4931 * KQ_PROCESSING is unset with the kqlock held, and the kqr thread is
4932 * never modified while KQ_PROCESSING is set, meaning that peeking at
4933 * its value is safe from this context.
4935 return kqwl
->kqwl_request
.kqr_thread
== current_thread();
4941 kqworkloop_acknowledge_events(struct kqworkloop
*kqwl
)
4943 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
4944 kq_index_t qos
= THREAD_QOS_UNSPECIFIED
;
4945 struct knote
*kn
, *tmp
;
4947 kqlock_held(&kqwl
->kqwl_kqueue
);
4949 TAILQ_FOREACH_SAFE(kn
, &kqr
->kqr_suppressed
, kn_tqe
, tmp
) {
4951 * If a knote that can adjust QoS is disabled because of the automatic
4952 * behavior of EV_DISPATCH, the knotes should stay suppressed so that
4953 * further overrides keep pushing.
4955 if (knote_fops(kn
)->f_adjusts_qos
&& (kn
->kn_status
& KN_DISABLED
) &&
4956 (kn
->kn_status
& (KN_STAYACTIVE
| KN_DROPPING
)) == 0 &&
4957 (kn
->kn_flags
& (EV_DISPATCH
| EV_DISABLE
)) == EV_DISPATCH
) {
4958 qos
= MAX(qos
, knote_get_qos_override_index(kn
));
4961 knote_unsuppress(kn
);
4968 kqworkloop_begin_processing(struct kqworkloop
*kqwl
, unsigned int kevent_flags
)
4970 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
4971 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
4972 thread_qos_t old_override
= THREAD_QOS_UNSPECIFIED
, qos_override
;
4973 thread_t thread
= kqr
->kqr_thread
;
4974 int rc
= 0, op
= KQWL_UTQ_NONE
;
4978 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN
) | DBG_FUNC_START
,
4979 kqwl
->kqwl_dynamicid
, 0, 0);
4981 /* nobody else should still be processing */
4982 assert((kq
->kq_state
& KQ_PROCESSING
) == 0);
4984 kq
->kq_state
|= KQ_PROCESSING
;
4986 if (!TAILQ_EMPTY(&kqr
->kqr_suppressed
)) {
4987 op
= KQWL_UTQ_RESET_WAKEUP_OVERRIDE
;
4990 if (kevent_flags
& KEVENT_FLAG_PARKING
) {
4992 * When "parking" we want to process events and if no events are found
4995 * However, non overcommit threads sometimes park even when they have
4996 * more work so that the pool can narrow. For these, we need to unbind
4997 * early, so that calling kqworkloop_update_threads_qos() can ask the
4998 * workqueue subsystem whether the thread should park despite having
5001 if (kqr
->kqr_state
& KQR_THOVERCOMMIT
) {
5002 op
= KQWL_UTQ_PARKING
;
5004 op
= KQWL_UTQ_UNBINDING
;
5007 if (op
== KQWL_UTQ_NONE
) {
5011 qos_override
= kqworkloop_acknowledge_events(kqwl
);
5015 if (op
== KQWL_UTQ_UNBINDING
) {
5016 old_override
= kqworkloop_unbind_locked(kqwl
, thread
);
5017 (void)kqueue_release(kqwl
, KQUEUE_CANT_BE_LAST_REF
);
5019 kqworkloop_update_threads_qos(kqwl
, op
, qos_override
);
5020 if (op
== KQWL_UTQ_PARKING
) {
5021 if (!TAILQ_EMPTY(&kqwl
->kqwl_queue
[KQWL_BUCKET_STAYACTIVE
])) {
5023 * We cannot trust KQR_WAKEUP when looking at stay active knotes.
5024 * We need to process once, and kqworkloop_end_processing will
5025 * handle the unbind.
5027 } else if ((kqr
->kqr_state
& KQR_WAKEUP
) == 0 || kqwl
->kqwl_owner
) {
5028 old_override
= kqworkloop_unbind_locked(kqwl
, thread
);
5029 (void)kqueue_release(kqwl
, KQUEUE_CANT_BE_LAST_REF
);
5032 } else if (op
== KQWL_UTQ_UNBINDING
) {
5033 if (kqr
->kqr_thread
== thread
) {
5035 * The thread request fired again, passed the admission check and
5036 * got bound to the current thread again.
5045 * Reset wakeup bit to notice stay active events firing while we are
5046 * processing, as we cannot rely on the stayactive bucket emptiness.
5048 kqr
->kqr_wakeup_indexes
&= ~KQWL_STAYACTIVE_FIRED_BIT
;
5050 kq
->kq_state
&= ~KQ_PROCESSING
;
5053 kq_req_unlock(kqwl
);
5056 thread_drop_ipc_override(thread
);
5060 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN
) | DBG_FUNC_END
,
5061 kqwl
->kqwl_dynamicid
, 0, 0);
5067 * Return 0 to indicate that processing should proceed,
5068 * -1 if there is nothing to process.
5070 * Called with kqueue locked and returns the same way,
5071 * but may drop lock temporarily.
5075 kqfile_begin_processing(struct kqueue
*kq
)
5077 struct kqtailq
*suppressq
;
5081 assert((kq
->kq_state
& (KQ_WORKQ
| KQ_WORKLOOP
)) == 0);
5082 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_START
,
5083 VM_KERNEL_UNSLIDE_OR_PERM(kq
), 0);
5085 /* wait to become the exclusive processing thread */
5087 if (kq
->kq_state
& KQ_DRAIN
) {
5088 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5089 VM_KERNEL_UNSLIDE_OR_PERM(kq
), 2);
5093 if ((kq
->kq_state
& KQ_PROCESSING
) == 0) {
5097 /* if someone else is processing the queue, wait */
5098 kq
->kq_state
|= KQ_PROCWAIT
;
5099 suppressq
= kqueue_get_suppressed_queue(kq
, NULL
);
5100 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
5101 CAST_EVENT64_T(suppressq
), THREAD_UNINT
| THREAD_WAIT_NOREPORT
,
5102 TIMEOUT_WAIT_FOREVER
);
5105 thread_block(THREAD_CONTINUE_NULL
);
5109 /* Nobody else processing */
5111 /* clear pre-posts and KQ_WAKEUP now, in case we bail early */
5112 waitq_set_clear_preposts(&kq
->kq_wqs
);
5113 kq
->kq_state
&= ~KQ_WAKEUP
;
5115 /* anything left to process? */
5116 if (kqueue_queue_empty(kq
, QOS_INDEX_KQFILE
)) {
5117 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5118 VM_KERNEL_UNSLIDE_OR_PERM(kq
), 1);
5122 /* convert to processing mode */
5123 kq
->kq_state
|= KQ_PROCESSING
;
5125 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5126 VM_KERNEL_UNSLIDE_OR_PERM(kq
));
5132 * Try to end the processing, only called when a workq thread is attempting to
5133 * park (KEVENT_FLAG_PARKING is set).
5135 * When returning -1, the kqworkq is setup again so that it is ready to be
5139 kqworkq_end_processing(struct kqworkq
*kqwq
, struct kqrequest
*kqr
,
5142 if (!kqueue_queue_empty(&kqwq
->kqwq_kqueue
, kqr
->kqr_qos_index
)) {
5143 /* remember we didn't process everything */
5145 kqr
->kqr_state
|= KQR_WAKEUP
;
5146 kq_req_unlock(kqwq
);
5149 if (kevent_flags
& KEVENT_FLAG_PARKING
) {
5151 * if acknowledge events "succeeds" it means there are events,
5152 * which is a failure condition for end_processing.
5154 int rc
= kqworkq_acknowledge_events(kqwq
, kqr
, kevent_flags
,
5155 KQWQAE_END_PROCESSING
);
5165 * Try to end the processing, only called when a workq thread is attempting to
5166 * park (KEVENT_FLAG_PARKING is set).
5168 * When returning -1, the kqworkq is setup again so that it is ready to be
5169 * processed (as if kqworkloop_begin_processing had just been called).
5171 * If successful and KEVENT_FLAG_PARKING was set in the kevent_flags,
5172 * the kqworkloop is unbound from its servicer as a side effect.
5175 kqworkloop_end_processing(struct kqworkloop
*kqwl
, int flags
, int kevent_flags
)
5177 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
5178 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
5179 thread_qos_t old_override
= THREAD_QOS_UNSPECIFIED
, qos_override
;
5180 thread_t thread
= kqr
->kqr_thread
;
5185 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END
) | DBG_FUNC_START
,
5186 kqwl
->kqwl_dynamicid
, 0, 0);
5188 if (flags
& KQ_PROCESSING
) {
5189 assert(kq
->kq_state
& KQ_PROCESSING
);
5192 * If we still have queued stayactive knotes, remember we didn't finish
5193 * processing all of them. This should be extremely rare and would
5194 * require to have a lot of them registered and fired.
5196 if (!TAILQ_EMPTY(&kqwl
->kqwl_queue
[KQWL_BUCKET_STAYACTIVE
])) {
5198 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_UPDATE_WAKEUP_QOS
,
5199 KQWL_BUCKET_STAYACTIVE
);
5200 kq_req_unlock(kqwl
);
5204 * When KEVENT_FLAG_PARKING is set, we need to attempt an unbind while
5205 * still under the lock.
5207 * So we do everything kqworkloop_unbind() would do, but because we're
5208 * inside kqueue_process(), if the workloop actually received events
5209 * while our locks were dropped, we have the opportunity to fail the end
5210 * processing and loop again.
5212 * This avoids going through the process-wide workqueue lock hence
5215 if (kevent_flags
& KEVENT_FLAG_PARKING
) {
5216 qos_override
= kqworkloop_acknowledge_events(kqwl
);
5222 if (kevent_flags
& KEVENT_FLAG_PARKING
) {
5223 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_PARKING
, qos_override
);
5224 if ((kqr
->kqr_state
& KQR_WAKEUP
) && !kqwl
->kqwl_owner
) {
5226 * Reset wakeup bit to notice stay active events firing while we are
5227 * processing, as we cannot rely on the stayactive bucket emptiness.
5229 kqr
->kqr_wakeup_indexes
&= ~KQWL_STAYACTIVE_FIRED_BIT
;
5232 old_override
= kqworkloop_unbind_locked(kqwl
, thread
);
5233 (void)kqueue_release(kqwl
, KQUEUE_CANT_BE_LAST_REF
);
5234 kq
->kq_state
&= ~flags
;
5237 kq
->kq_state
&= ~flags
;
5238 kqr
->kqr_state
|= KQR_R2K_NOTIF_ARMED
;
5239 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
, 0);
5242 kq_req_unlock(kqwl
);
5245 thread_drop_ipc_override(thread
);
5248 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END
) | DBG_FUNC_END
,
5249 kqwl
->kqwl_dynamicid
, 0, 0);
5255 * Called with kqueue lock held.
5258 kqfile_end_processing(struct kqueue
*kq
)
5261 struct kqtailq
*suppressq
;
5266 assert((kq
->kq_state
& (KQ_WORKQ
| KQ_WORKLOOP
)) == 0);
5268 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_END
),
5269 VM_KERNEL_UNSLIDE_OR_PERM(kq
), 0);
5272 * Return suppressed knotes to their original state.
5274 suppressq
= kqueue_get_suppressed_queue(kq
, NULL
);
5275 while ((kn
= TAILQ_FIRST(suppressq
)) != NULL
) {
5276 assert(kn
->kn_status
& KN_SUPPRESSED
);
5277 knote_unsuppress(kn
);
5280 procwait
= (kq
->kq_state
& KQ_PROCWAIT
);
5281 kq
->kq_state
&= ~(KQ_PROCESSING
| KQ_PROCWAIT
);
5284 /* first wake up any thread already waiting to process */
5285 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
5286 CAST_EVENT64_T(suppressq
),
5288 WAITQ_ALL_PRIORITIES
);
5293 kqueue_workloop_ctl_internal(proc_t p
, uintptr_t cmd
, uint64_t __unused options
,
5294 struct kqueue_workloop_params
*params
, int *retval
)
5298 struct fileproc
*fp
;
5300 struct kqworkloop
*kqwl
;
5301 struct filedesc
*fdp
= p
->p_fd
;
5302 workq_threadreq_param_t trp
= { };
5305 case KQ_WORKLOOP_CREATE
:
5306 if (!params
->kqwlp_flags
) {
5311 if ((params
->kqwlp_flags
& KQ_WORKLOOP_CREATE_SCHED_PRI
) &&
5312 (params
->kqwlp_sched_pri
< 1 ||
5313 params
->kqwlp_sched_pri
> 63 /* MAXPRI_USER */)) {
5318 if ((params
->kqwlp_flags
& KQ_WORKLOOP_CREATE_SCHED_POL
) &&
5319 invalid_policy(params
->kqwlp_sched_pol
)) {
5324 if ((params
->kqwlp_flags
& KQ_WORKLOOP_CREATE_CPU_PERCENT
) &&
5325 (params
->kqwlp_cpu_percent
<= 0 ||
5326 params
->kqwlp_cpu_percent
> 100 ||
5327 params
->kqwlp_cpu_refillms
<= 0 ||
5328 params
->kqwlp_cpu_refillms
> 0x00ffffff)) {
5333 if (params
->kqwlp_flags
& KQ_WORKLOOP_CREATE_SCHED_PRI
) {
5334 trp
.trp_flags
|= TRP_PRIORITY
;
5335 trp
.trp_pri
= params
->kqwlp_sched_pri
;
5337 if (params
->kqwlp_flags
& KQ_WORKLOOP_CREATE_SCHED_POL
) {
5338 trp
.trp_flags
|= TRP_POLICY
;
5339 trp
.trp_pol
= params
->kqwlp_sched_pol
;
5341 if (params
->kqwlp_flags
& KQ_WORKLOOP_CREATE_CPU_PERCENT
) {
5342 trp
.trp_flags
|= TRP_CPUPERCENT
;
5343 trp
.trp_cpupercent
= (uint8_t)params
->kqwlp_cpu_percent
;
5344 trp
.trp_refillms
= params
->kqwlp_cpu_refillms
;
5347 error
= kevent_get_kq(p
, params
->kqwlp_id
, &trp
,
5348 KEVENT_FLAG_DYNAMIC_KQUEUE
| KEVENT_FLAG_WORKLOOP
|
5349 KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST
, &fp
, &fd
, &kq
);
5354 if (!(fdp
->fd_flags
& FD_WORKLOOP
)) {
5355 /* FD_WORKLOOP indicates we've ever created a workloop
5356 * via this syscall but its only ever added to a process, never
5360 fdp
->fd_flags
|= FD_WORKLOOP
;
5364 case KQ_WORKLOOP_DESTROY
:
5365 error
= kevent_get_kq(p
, params
->kqwlp_id
, NULL
,
5366 KEVENT_FLAG_DYNAMIC_KQUEUE
| KEVENT_FLAG_WORKLOOP
|
5367 KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST
, &fp
, &fd
, &kq
);
5372 kqwl
= (struct kqworkloop
*)kq
;
5373 trp
.trp_value
= kqwl
->kqwl_params
;
5374 if (trp
.trp_flags
&& !(trp
.trp_flags
& TRP_RELEASED
)) {
5375 trp
.trp_flags
|= TRP_RELEASED
;
5376 kqueue_release(kq
, KQUEUE_CANT_BE_LAST_REF
);
5381 kqueue_release_last(p
, kq
);
5389 kqueue_workloop_ctl(proc_t p
, struct kqueue_workloop_ctl_args
*uap
, int *retval
)
5391 struct kqueue_workloop_params params
= {
5394 if (uap
->sz
< sizeof(params
.kqwlp_version
)) {
5398 size_t copyin_sz
= MIN(sizeof(params
), uap
->sz
);
5399 int rv
= copyin(uap
->addr
, ¶ms
, copyin_sz
);
5404 if (params
.kqwlp_version
!= (int)uap
->sz
) {
5408 return kqueue_workloop_ctl_internal(p
, uap
->cmd
, uap
->options
, ¶ms
,
5413 * kqueue_process - process the triggered events in a kqueue
5415 * Walk the queued knotes and validate that they are really still triggered
5416 * events by calling the filter routines (if necessary).
5418 * For each event that is still considered triggered, invoke the callback
5421 * caller holds a reference on the kqueue.
5422 * kqueue locked on entry and exit - but may be dropped
5423 * kqueue list locked (held for duration of call)
5426 kqueue_process(struct kqueue
*kq
,
5427 kevent_callback_t callback
,
5428 void *callback_data
,
5429 struct filt_process_s
*process_data
,
5432 struct uthread
*ut
= get_bsdthread_info(current_thread());
5433 struct kqrequest
*kqr
= ut
->uu_kqr_bound
;
5435 unsigned int flags
= process_data
? process_data
->fp_flags
: 0;
5436 int nevents
= 0, error
= 0, rc
= 0;
5437 struct kqtailq
*base_queue
, *queue
;
5438 kqueue_t kqu
= { .kq
= kq
};
5439 #if DEBUG || DEVELOPMENT
5443 if (kq
->kq_state
& KQ_WORKQ
) {
5444 if (kqr
== NULL
|| (kqr
->kqr_state
& KQR_WORKLOOP
)) {
5447 rc
= kqworkq_begin_processing(kqu
.kqwq
, kqr
, flags
);
5448 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
5449 if (ut
->uu_kqr_bound
!= &kqu
.kqwl
->kqwl_request
) {
5452 rc
= kqworkloop_begin_processing(kqu
.kqwl
, flags
);
5454 rc
= kqfile_begin_processing(kq
);
5458 /* Nothing to process */
5464 * loop through the enqueued knotes associated with this request,
5465 * processing each one. Each request may have several queues
5466 * of knotes to process (depending on the type of kqueue) so we
5467 * have to loop through all the queues as long as we have additional
5472 if (kq
->kq_state
& KQ_WORKQ
) {
5473 base_queue
= queue
= &kqu
.kqwq
->kqwq_queue
[kqr
->kqr_qos_index
];
5474 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
5475 base_queue
= &kqu
.kqwl
->kqwl_queue
[0];
5476 queue
= &kqu
.kqwl
->kqwl_queue
[KQWL_NBUCKETS
- 1];
5478 base_queue
= queue
= &kq
->kq_queue
[QOS_INDEX_KQFILE
];
5482 while (error
== 0 && (kn
= TAILQ_FIRST(queue
)) != NULL
) {
5483 error
= knote_process(kn
, callback
, callback_data
, process_data
);
5484 if (error
== EJUSTRETURN
) {
5489 /* error is EWOULDBLOCK when the out event array is full */
5492 if (error
== EWOULDBLOCK
) {
5493 /* break out if no more space for additional events */
5497 } while (queue
-- > base_queue
);
5502 * If KEVENT_FLAG_PARKING is set, and no kevents have been returned,
5503 * we want to unbind the kqrequest from the thread.
5505 * However, because the kq locks are dropped several times during process,
5506 * new knotes may have fired again, in which case, we want to fail the end
5507 * processing and process again, until it converges.
5509 * If we returned events however, end processing never fails.
5511 if (error
|| nevents
) {
5512 flags
&= ~KEVENT_FLAG_PARKING
;
5514 if (kq
->kq_state
& KQ_WORKQ
) {
5515 rc
= kqworkq_end_processing(kqu
.kqwq
, kqr
, flags
);
5516 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
5517 rc
= kqworkloop_end_processing(kqu
.kqwl
, KQ_PROCESSING
, flags
);
5519 kqfile_end_processing(kq
);
5523 assert(flags
& KEVENT_FLAG_PARKING
);
5524 #if DEBUG || DEVELOPMENT
5525 if (retries
-- == 0) {
5526 panic("kevent: way too many knote_process retries, kq: %p (0x%02x)",
5536 kqueue_scan_continue(void *data
, wait_result_t wait_result
)
5538 thread_t self
= current_thread();
5539 uthread_t ut
= (uthread_t
)get_bsdthread_info(self
);
5540 struct _kqueue_scan
* cont_args
= &ut
->uu_save
.uus_kqueue_scan
;
5541 struct kqueue
*kq
= (struct kqueue
*)data
;
5542 struct filt_process_s
*process_data
= cont_args
->process_data
;
5546 /* convert the (previous) wait_result to a proper error */
5547 switch (wait_result
) {
5548 case THREAD_AWAKENED
: {
5551 error
= kqueue_process(kq
, cont_args
->call
, cont_args
->data
,
5552 process_data
, &count
);
5553 if (error
== 0 && count
== 0) {
5554 if (kq
->kq_state
& KQ_DRAIN
) {
5559 if (kq
->kq_state
& KQ_WAKEUP
) {
5563 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
5564 KQ_EVENT
, THREAD_ABORTSAFE
,
5565 cont_args
->deadline
);
5566 kq
->kq_state
|= KQ_SLEEP
;
5568 thread_block_parameter(kqueue_scan_continue
, kq
);
5573 case THREAD_TIMED_OUT
:
5574 error
= EWOULDBLOCK
;
5576 case THREAD_INTERRUPTED
:
5579 case THREAD_RESTART
:
5584 panic("%s: - invalid wait_result (%d)", __func__
,
5589 /* call the continuation with the results */
5590 assert(cont_args
->cont
!= NULL
);
5591 (cont_args
->cont
)(kq
, cont_args
->data
, error
);
5596 * kqueue_scan - scan and wait for events in a kqueue
5598 * Process the triggered events in a kqueue.
5600 * If there are no events triggered arrange to
5601 * wait for them. If the caller provided a
5602 * continuation routine, then kevent_scan will
5605 * The callback routine must be valid.
5606 * The caller must hold a use-count reference on the kq.
5609 kqueue_scan(struct kqueue
*kq
,
5610 kevent_callback_t callback
,
5611 kqueue_continue_t continuation
,
5612 void *callback_data
,
5613 struct filt_process_s
*process_data
,
5614 struct timeval
*atvp
,
5615 __unused
struct proc
*p
)
5617 thread_continue_t cont
= THREAD_CONTINUE_NULL
;
5624 assert(callback
!= NULL
);
5627 * Determine which QoS index we are servicing
5629 flags
= (process_data
) ? process_data
->fp_flags
: 0;
5630 fd
= (process_data
) ? process_data
->fp_fd
: -1;
5634 wait_result_t wait_result
;
5638 * Make a pass through the kq to find events already
5642 error
= kqueue_process(kq
, callback
, callback_data
,
5643 process_data
, &count
);
5644 if (error
|| count
) {
5645 break; /* lock still held */
5647 /* looks like we have to consider blocking */
5650 /* convert the timeout to a deadline once */
5651 if (atvp
->tv_sec
|| atvp
->tv_usec
) {
5654 clock_get_uptime(&now
);
5655 nanoseconds_to_absolutetime((uint64_t)atvp
->tv_sec
* NSEC_PER_SEC
+
5656 atvp
->tv_usec
* (long)NSEC_PER_USEC
,
5658 if (now
>= deadline
) {
5659 /* non-blocking call */
5660 error
= EWOULDBLOCK
;
5661 break; /* lock still held */
5664 clock_absolutetime_interval_to_deadline(deadline
, &deadline
);
5666 deadline
= 0; /* block forever */
5670 uthread_t ut
= (uthread_t
)get_bsdthread_info(current_thread());
5671 struct _kqueue_scan
*cont_args
= &ut
->uu_save
.uus_kqueue_scan
;
5673 cont_args
->call
= callback
;
5674 cont_args
->cont
= continuation
;
5675 cont_args
->deadline
= deadline
;
5676 cont_args
->data
= callback_data
;
5677 cont_args
->process_data
= process_data
;
5678 cont
= kqueue_scan_continue
;
5682 if (kq
->kq_state
& KQ_DRAIN
) {
5687 /* If awakened during processing, try again */
5688 if (kq
->kq_state
& KQ_WAKEUP
) {
5693 /* go ahead and wait */
5694 waitq_assert_wait64_leeway((struct waitq
*)&kq
->kq_wqs
,
5695 KQ_EVENT
, THREAD_ABORTSAFE
,
5696 TIMEOUT_URGENCY_USER_NORMAL
,
5697 deadline
, TIMEOUT_NO_LEEWAY
);
5698 kq
->kq_state
|= KQ_SLEEP
;
5700 wait_result
= thread_block_parameter(cont
, kq
);
5701 /* NOTREACHED if (continuation != NULL) */
5703 switch (wait_result
) {
5704 case THREAD_AWAKENED
:
5706 case THREAD_TIMED_OUT
:
5708 case THREAD_INTERRUPTED
:
5710 case THREAD_RESTART
:
5713 panic("%s: - bad wait_result (%d)", __func__
,
5725 * This could be expanded to call kqueue_scan, if desired.
5729 kqueue_read(__unused
struct fileproc
*fp
,
5730 __unused
struct uio
*uio
,
5732 __unused vfs_context_t ctx
)
5739 kqueue_write(__unused
struct fileproc
*fp
,
5740 __unused
struct uio
*uio
,
5742 __unused vfs_context_t ctx
)
5749 kqueue_ioctl(__unused
struct fileproc
*fp
,
5750 __unused u_long com
,
5751 __unused caddr_t data
,
5752 __unused vfs_context_t ctx
)
5759 kqueue_select(struct fileproc
*fp
, int which
, void *wq_link_id
,
5760 __unused vfs_context_t ctx
)
5762 struct kqueue
*kq
= (struct kqueue
*)fp
->f_data
;
5763 struct kqtailq
*queue
;
5764 struct kqtailq
*suppressq
;
5768 if (which
!= FREAD
) {
5774 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
5777 * If this is the first pass, link the wait queue associated with the
5778 * the kqueue onto the wait queue set for the select(). Normally we
5779 * use selrecord() for this, but it uses the wait queue within the
5780 * selinfo structure and we need to use the main one for the kqueue to
5781 * catch events from KN_STAYQUEUED sources. So we do the linkage manually.
5782 * (The select() call will unlink them when it ends).
5784 if (wq_link_id
!= NULL
) {
5785 thread_t cur_act
= current_thread();
5786 struct uthread
* ut
= get_bsdthread_info(cur_act
);
5788 kq
->kq_state
|= KQ_SEL
;
5789 waitq_link((struct waitq
*)&kq
->kq_wqs
, ut
->uu_wqset
,
5790 WAITQ_SHOULD_LOCK
, (uint64_t *)wq_link_id
);
5792 /* always consume the reserved link object */
5793 waitq_link_release(*(uint64_t *)wq_link_id
);
5794 *(uint64_t *)wq_link_id
= 0;
5797 * selprocess() is expecting that we send it back the waitq
5798 * that was just added to the thread's waitq set. In order
5799 * to not change the selrecord() API (which is exported to
5800 * kexts), we pass this value back through the
5801 * void *wq_link_id pointer we were passed. We need to use
5802 * memcpy here because the pointer may not be properly aligned
5803 * on 32-bit systems.
5805 void *wqptr
= &kq
->kq_wqs
;
5806 memcpy(wq_link_id
, (void *)&wqptr
, sizeof(void *));
5809 if (kqfile_begin_processing(kq
) == -1) {
5814 queue
= &kq
->kq_queue
[QOS_INDEX_KQFILE
];
5815 if (!TAILQ_EMPTY(queue
)) {
5817 * there is something queued - but it might be a
5818 * KN_STAYACTIVE knote, which may or may not have
5819 * any events pending. Otherwise, we have to walk
5820 * the list of knotes to see, and peek at the
5821 * (non-vanished) stay-active ones to be really sure.
5823 while ((kn
= (struct knote
*)TAILQ_FIRST(queue
)) != NULL
) {
5824 if (kn
->kn_status
& KN_ACTIVE
) {
5828 assert(kn
->kn_status
& KN_STAYACTIVE
);
5833 * There were no regular events on the queue, so take
5834 * a deeper look at the stay-queued ones we suppressed.
5836 suppressq
= kqueue_get_suppressed_queue(kq
, NULL
);
5837 while ((kn
= (struct knote
*)TAILQ_FIRST(suppressq
)) != NULL
) {
5838 KNOTE_LOCK_CTX(knlc
);
5841 /* If didn't vanish while suppressed - peek at it */
5842 if ((kn
->kn_status
& KN_DROPPING
) || !knote_lock(kq
, kn
, &knlc
,
5843 KNOTE_KQ_LOCK_ON_FAILURE
)) {
5847 result
= filter_call(knote_fops(kn
), f_peek(kn
));
5850 knote_unlock(kq
, kn
, &knlc
, KNOTE_KQ_LOCK_ALWAYS
);
5853 knote_unsuppress(kn
);
5855 /* has data or it has to report a vanish */
5856 if (result
& FILTER_ACTIVE
) {
5864 kqfile_end_processing(kq
);
5874 kqueue_close(struct fileglob
*fg
, __unused vfs_context_t ctx
)
5876 struct kqfile
*kqf
= (struct kqfile
*)fg
->fg_data
;
5878 assert((kqf
->kqf_state
& KQ_WORKQ
) == 0);
5879 kqueue_dealloc(&kqf
->kqf_kqueue
);
5885 * Max depth of the nested kq path that can be created.
5886 * Note that this has to be less than the size of kq_level
5887 * to avoid wrapping around and mislabeling the level.
5889 #define MAX_NESTED_KQ 1000
5893 * The callers has taken a use-count reference on this kqueue and will donate it
5894 * to the kqueue we are being added to. This keeps the kqueue from closing until
5895 * that relationship is torn down.
5898 kqueue_kqfilter(__unused
struct fileproc
*fp
, struct knote
*kn
,
5899 __unused
struct kevent_internal_s
*kev
, __unused vfs_context_t ctx
)
5901 struct kqfile
*kqf
= (struct kqfile
*)kn
->kn_fp
->f_data
;
5902 struct kqueue
*kq
= &kqf
->kqf_kqueue
;
5903 struct kqueue
*parentkq
= knote_get_kq(kn
);
5904 uint16_t plevel
= 0;
5906 assert((kqf
->kqf_state
& KQ_WORKQ
) == 0);
5908 if (parentkq
== kq
|| kn
->kn_filter
!= EVFILT_READ
) {
5909 knote_set_error(kn
, EINVAL
);
5914 * We have to avoid creating a cycle when nesting kqueues
5915 * inside another. Rather than trying to walk the whole
5916 * potential DAG of nested kqueues, we just use a simple
5917 * ceiling protocol. When a kqueue is inserted into another,
5918 * we check that the (future) parent is not already nested
5919 * into another kqueue at a lower level than the potenial
5920 * child (because it could indicate a cycle). If that test
5921 * passes, we just mark the nesting levels accordingly.
5923 * Only up to MAX_NESTED_KQ can be nested.
5927 if (parentkq
->kq_level
> 0 &&
5928 parentkq
->kq_level
< kq
->kq_level
) {
5930 knote_set_error(kn
, EINVAL
);
5933 /* set parent level appropriately */
5934 plevel
= (parentkq
->kq_level
== 0)? 2: parentkq
->kq_level
;
5935 if (plevel
< kq
->kq_level
+ 1) {
5936 if (kq
->kq_level
+ 1 > MAX_NESTED_KQ
) {
5938 knote_set_error(kn
, EINVAL
);
5941 plevel
= kq
->kq_level
+ 1;
5944 parentkq
->kq_level
= plevel
;
5947 kn
->kn_filtid
= EVFILTID_KQREAD
;
5949 KNOTE_ATTACH(&kqf
->kqf_sel
.si_note
, kn
);
5950 /* indicate nesting in child, if needed */
5951 if (kq
->kq_level
== 0) {
5955 int count
= kq
->kq_count
;
5962 * kqueue_drain - called when kq is closed
5966 kqueue_drain(struct fileproc
*fp
, __unused vfs_context_t ctx
)
5968 struct kqueue
*kq
= (struct kqueue
*)fp
->f_fglob
->fg_data
;
5970 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
5973 kq
->kq_state
|= KQ_DRAIN
;
5974 kqueue_interrupt(kq
);
5981 kqueue_stat(struct kqueue
*kq
, void *ub
, int isstat64
, proc_t p
)
5983 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
5986 if (isstat64
!= 0) {
5987 struct stat64
*sb64
= (struct stat64
*)ub
;
5989 bzero((void *)sb64
, sizeof(*sb64
));
5990 sb64
->st_size
= kq
->kq_count
;
5991 if (kq
->kq_state
& KQ_KEV_QOS
) {
5992 sb64
->st_blksize
= sizeof(struct kevent_qos_s
);
5993 } else if (kq
->kq_state
& KQ_KEV64
) {
5994 sb64
->st_blksize
= sizeof(struct kevent64_s
);
5995 } else if (IS_64BIT_PROCESS(p
)) {
5996 sb64
->st_blksize
= sizeof(struct user64_kevent
);
5998 sb64
->st_blksize
= sizeof(struct user32_kevent
);
6000 sb64
->st_mode
= S_IFIFO
;
6002 struct stat
*sb
= (struct stat
*)ub
;
6004 bzero((void *)sb
, sizeof(*sb
));
6005 sb
->st_size
= kq
->kq_count
;
6006 if (kq
->kq_state
& KQ_KEV_QOS
) {
6007 sb
->st_blksize
= sizeof(struct kevent_qos_s
);
6008 } else if (kq
->kq_state
& KQ_KEV64
) {
6009 sb
->st_blksize
= sizeof(struct kevent64_s
);
6010 } else if (IS_64BIT_PROCESS(p
)) {
6011 sb
->st_blksize
= sizeof(struct user64_kevent
);
6013 sb
->st_blksize
= sizeof(struct user32_kevent
);
6015 sb
->st_mode
= S_IFIFO
;
6022 kqueue_threadreq_can_use_ast(struct kqueue
*kq
)
6024 if (current_proc() == kq
->kq_p
) {
6026 * Setting an AST from a non BSD syscall is unsafe: mach_msg_trap() can
6027 * do combined send/receive and in the case of self-IPC, the AST may bet
6028 * set on a thread that will not return to userspace and needs the
6029 * thread the AST would create to unblock itself.
6031 * At this time, we really want to target:
6033 * - kevent variants that can cause thread creations, and dispatch
6034 * really only uses kevent_qos and kevent_id,
6036 * - workq_kernreturn (directly about thread creations)
6038 * - bsdthread_ctl which is used for qos changes and has direct impact
6039 * on the creator thread scheduling decisions.
6041 switch (current_uthread()->syscall_code
) {
6042 case SYS_kevent_qos
:
6044 case SYS_workq_kernreturn
:
6045 case SYS_bsdthread_ctl
:
6053 * Interact with the pthread kext to request a servicing there at a specific QoS
6056 * - Caller holds the workq request lock
6058 * - May be called with the kqueue's wait queue set locked,
6059 * so cannot do anything that could recurse on that.
6062 kqueue_threadreq_initiate(struct kqueue
*kq
, struct kqrequest
*kqr
,
6063 kq_index_t qos
, int flags
)
6065 assert(kqr
->kqr_state
& KQR_WAKEUP
);
6066 assert(kqr
->kqr_thread
== THREAD_NULL
);
6067 assert((kqr
->kqr_state
& KQR_THREQUESTED
) == 0);
6068 struct turnstile
*ts
= TURNSTILE_NULL
;
6070 if (workq_is_exiting(kq
->kq_p
)) {
6074 /* Add a thread request reference on the kqueue. */
6079 if (kq
->kq_state
& KQ_WORKLOOP
) {
6080 __assert_only
struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
6082 assert(kqwl
->kqwl_owner
== THREAD_NULL
);
6083 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THREQUEST
),
6084 kqwl
->kqwl_dynamicid
, 0, qos
, kqr
->kqr_state
);
6085 ts
= kqwl
->kqwl_turnstile
;
6087 assert(kq
->kq_state
& KQ_WORKQ
);
6088 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_THREQUEST
),
6089 -1, 0, qos
, kqr
->kqr_state
);
6092 kqr
->kqr_state
|= KQR_THREQUESTED
;
6095 * New-style thread request supported.
6096 * Provide the pthread kext a pointer to a workq_threadreq_s structure for
6097 * its use until a corresponding kqueue_threadreq_bind callback.
6099 if (kqueue_threadreq_can_use_ast(kq
)) {
6100 flags
|= WORKQ_THREADREQ_SET_AST_ON_FAILURE
;
6102 if (qos
== KQWQ_QOS_MANAGER
) {
6103 qos
= WORKQ_THREAD_QOS_MANAGER
;
6105 if (!workq_kern_threadreq_initiate(kq
->kq_p
, kqr
, ts
, qos
, flags
)) {
6107 * Process is shutting down or exec'ing.
6108 * All the kqueues are going to be cleaned up
6109 * soon. Forget we even asked for a thread -
6110 * and make sure we don't ask for more.
6112 kqr
->kqr_state
&= ~(KQR_THREQUESTED
| KQR_R2K_NOTIF_ARMED
);
6113 kqueue_release(kq
, KQUEUE_CANT_BE_LAST_REF
);
6118 * kqueue_threadreq_bind_prepost - prepost the bind to kevent
6120 * This is used when kqueue_threadreq_bind may cause a lock inversion.
6123 kqueue_threadreq_bind_prepost(struct proc
*p __unused
, workq_threadreq_t req
,
6126 struct kqrequest
*kqr
= __container_of(req
, struct kqrequest
, kqr_req
);
6127 struct uthread
*ut
= get_bsdthread_info(thread
);
6129 req
->tr_binding_thread
= thread
;
6130 ut
->uu_kqr_bound
= kqr
;
6131 req
->tr_state
= TR_STATE_BINDING
;
6133 struct kqworkloop
*kqwl
= kqr_kqworkloop(kqr
);
6134 if (kqwl
&& kqwl
->kqwl_turnstile
) {
6135 struct turnstile
*ts
= kqwl
->kqwl_turnstile
;
6137 * While a thread request is in flight, the workqueue
6138 * is the interlock for the turnstile and can update the inheritor.
6140 turnstile_update_inheritor(ts
, thread
, TURNSTILE_IMMEDIATE_UPDATE
|
6141 TURNSTILE_INHERITOR_THREAD
);
6142 turnstile_update_inheritor_complete(ts
, TURNSTILE_INTERLOCK_HELD
);
6147 * kqueue_threadreq_bind_commit - commit a bind prepost
6149 * The workq code has to commit any binding prepost before the thread has
6150 * a chance to come back to userspace (and do kevent syscalls) or be aborted.
6153 kqueue_threadreq_bind_commit(struct proc
*p
, thread_t thread
)
6155 struct uthread
*ut
= get_bsdthread_info(thread
);
6156 struct kqrequest
*kqr
= ut
->uu_kqr_bound
;
6157 kqueue_t kqu
= kqr_kqueue(p
, kqr
);
6160 if (kqr
->kqr_req
.tr_state
== TR_STATE_BINDING
) {
6161 kqueue_threadreq_bind(p
, &kqr
->kqr_req
, thread
, 0);
6167 kqueue_threadreq_modify(struct kqueue
*kq
, struct kqrequest
*kqr
, kq_index_t qos
)
6169 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
6170 assert(kqr
->kqr_thread
== THREAD_NULL
);
6175 if (kqueue_threadreq_can_use_ast(kq
)) {
6176 flags
|= WORKQ_THREADREQ_SET_AST_ON_FAILURE
;
6178 workq_kern_threadreq_modify(kq
->kq_p
, kqr
, qos
, flags
);
6182 * kqueue_threadreq_bind - bind thread to processing kqrequest
6184 * The provided thread will be responsible for delivering events
6185 * associated with the given kqrequest. Bind it and get ready for
6186 * the thread to eventually arrive.
6189 kqueue_threadreq_bind(struct proc
*p
, workq_threadreq_t req
, thread_t thread
,
6192 struct kqrequest
*kqr
= __container_of(req
, struct kqrequest
, kqr_req
);
6193 kqueue_t kqu
= kqr_kqueue(p
, kqr
);
6194 struct uthread
*ut
= get_bsdthread_info(thread
);
6198 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
6199 assert(kqr
->kqr_thread
== THREAD_NULL
);
6200 assert(ut
->uu_kqueue_override
== 0);
6202 if (kqr
->kqr_req
.tr_state
== TR_STATE_BINDING
) {
6203 assert(ut
->uu_kqr_bound
== kqr
);
6204 assert(kqr
->kqr_req
.tr_binding_thread
== thread
);
6205 kqr
->kqr_req
.tr_state
= TR_STATE_IDLE
;
6206 kqr
->kqr_req
.tr_binding_thread
= NULL
;
6208 assert(ut
->uu_kqr_bound
== NULL
);
6211 ut
->uu_kqr_bound
= kqr
;
6212 kqr
->kqr_thread
= thread
;
6214 if (kqu
.kq
->kq_state
& KQ_WORKLOOP
) {
6215 struct turnstile
*ts
= kqu
.kqwl
->kqwl_turnstile
;
6217 if (__improbable(thread
== kqu
.kqwl
->kqwl_owner
)) {
6219 * <rdar://problem/38626999> shows that asserting here is not ok.
6221 * This is not supposed to happen for correct use of the interface,
6222 * but it is sadly possible for userspace (with the help of memory
6223 * corruption, such as over-release of a dispatch queue) to make
6224 * the creator thread the "owner" of a workloop.
6226 * Once that happens, and that creator thread picks up the same
6227 * workloop as a servicer, we trip this codepath. We need to fixup
6228 * the state to forget about this thread being the owner, as the
6229 * entire workloop state machine expects servicers to never be
6230 * owners and everything would basically go downhill from here.
6232 kqu
.kqwl
->kqwl_owner
= THREAD_NULL
;
6233 if (kqworkloop_owner_override(kqu
.kqwl
)) {
6234 thread_drop_ipc_override(thread
);
6236 thread_ends_owning_workloop(thread
);
6239 if (ts
&& (flags
& KQUEUE_THREADERQ_BIND_NO_INHERITOR_UPDATE
) == 0) {
6241 * Past this point, the interlock is the kq req lock again,
6242 * so we can fix the inheritor for good.
6244 filt_wlupdate_inheritor(kqu
.kqwl
, ts
, TURNSTILE_IMMEDIATE_UPDATE
);
6245 turnstile_update_inheritor_complete(ts
, TURNSTILE_INTERLOCK_HELD
);
6248 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_BIND
), kqu
.kqwl
->kqwl_dynamicid
,
6249 thread_tid(thread
), kqr
->kqr_qos_index
,
6250 (kqr
->kqr_override_index
<< 16) | kqr
->kqr_state
);
6252 ut
->uu_kqueue_override
= kqr
->kqr_override_index
;
6253 if (kqr
->kqr_override_index
) {
6254 thread_add_ipc_override(thread
, kqr
->kqr_override_index
);
6257 assert(kqr
->kqr_override_index
== 0);
6259 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_BIND
), -1,
6260 thread_tid(thread
), kqr
->kqr_qos_index
,
6261 (kqr
->kqr_override_index
<< 16) | kqr
->kqr_state
);
6266 * kqueue_threadreq_cancel - abort a pending thread request
6268 * Called when exiting/exec'ing. Forget our pending request.
6271 kqueue_threadreq_cancel(struct proc
*p
, workq_threadreq_t req
)
6273 struct kqrequest
*kqr
= __container_of(req
, struct kqrequest
, kqr_req
);
6274 kqueue_t kqu
= kqr_kqueue(p
, kqr
);
6278 assert(kqr
->kqr_thread
== THREAD_NULL
);
6279 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
6280 kqr
->kqr_state
&= ~(KQR_THREQUESTED
| KQR_R2K_NOTIF_ARMED
);
6284 kqueue_release_last(p
, kqu
); /* may dealloc kqu */
6287 workq_threadreq_param_t
6288 kqueue_threadreq_workloop_param(workq_threadreq_t req
)
6290 struct kqrequest
*kqr
= __container_of(req
, struct kqrequest
, kqr_req
);
6291 struct kqworkloop
*kqwl
;
6292 workq_threadreq_param_t trp
;
6294 assert(kqr
->kqr_state
& KQR_WORKLOOP
);
6295 kqwl
= __container_of(kqr
, struct kqworkloop
, kqwl_request
);
6296 trp
.trp_value
= kqwl
->kqwl_params
;
6301 * kqueue_threadreq_unbind - unbind thread from processing kqueue
6303 * End processing the per-QoS bucket of events and allow other threads
6304 * to be requested for future servicing.
6306 * caller holds a reference on the kqueue.
6309 kqueue_threadreq_unbind(struct proc
*p
, struct kqrequest
*kqr
)
6311 if (kqr
->kqr_state
& KQR_WORKLOOP
) {
6312 kqworkloop_unbind(p
, kqr_kqworkloop(kqr
));
6314 kqworkq_unbind(p
, kqr
);
6319 * If we aren't already busy processing events [for this QoS],
6320 * request workq thread support as appropriate.
6322 * TBD - for now, we don't segregate out processing by QoS.
6324 * - May be called with the kqueue's wait queue set locked,
6325 * so cannot do anything that could recurse on that.
6328 kqworkq_request_help(struct kqworkq
*kqwq
, kq_index_t qos_index
)
6330 struct kqrequest
*kqr
;
6332 /* convert to thread qos value */
6333 assert(qos_index
< KQWQ_NBUCKETS
);
6336 kqr
= kqworkq_get_request(kqwq
, qos_index
);
6338 if ((kqr
->kqr_state
& KQR_WAKEUP
) == 0) {
6339 kqr
->kqr_state
|= KQR_WAKEUP
;
6340 if ((kqr
->kqr_state
& KQR_THREQUESTED
) == 0) {
6341 kqueue_threadreq_initiate(&kqwq
->kqwq_kqueue
, kqr
, qos_index
, 0);
6344 kq_req_unlock(kqwq
);
6348 kqworkloop_owner_override(struct kqworkloop
*kqwl
)
6350 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6351 return MAX(kqr
->kqr_qos_index
, kqr
->kqr_override_index
);
6355 kqworkloop_request_fire_r2k_notification(struct kqworkloop
*kqwl
)
6357 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6361 if (kqr
->kqr_state
& KQR_R2K_NOTIF_ARMED
) {
6362 assert(kqr
->kqr_thread
);
6363 kqr
->kqr_state
&= ~KQR_R2K_NOTIF_ARMED
;
6364 act_set_astkevent(kqr
->kqr_thread
, AST_KEVENT_RETURN_TO_KERNEL
);
6369 kqworkloop_update_threads_qos(struct kqworkloop
*kqwl
, int op
, kq_index_t qos
)
6371 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6372 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
6373 kq_index_t old_owner_override
= kqworkloop_owner_override(kqwl
);
6376 /* must hold the kqr lock */
6380 case KQWL_UTQ_UPDATE_WAKEUP_QOS
:
6381 if (qos
== KQWL_BUCKET_STAYACTIVE
) {
6383 * the KQWL_BUCKET_STAYACTIVE is not a QoS bucket, we only remember
6384 * a high watermark (kqr_stayactive_qos) of any stay active knote
6385 * that was ever registered with this workloop.
6387 * When waitq_set__CALLING_PREPOST_HOOK__() wakes up any stay active
6388 * knote, we use this high-watermark as a wakeup-index, and also set
6389 * the magic KQWL_BUCKET_STAYACTIVE bit to make sure we remember
6390 * there is at least one stay active knote fired until the next full
6391 * processing of this bucket.
6393 kqr
->kqr_wakeup_indexes
|= KQWL_STAYACTIVE_FIRED_BIT
;
6394 qos
= kqr
->kqr_stayactive_qos
;
6397 if (kqr
->kqr_wakeup_indexes
& (1 << qos
)) {
6398 assert(kqr
->kqr_state
& KQR_WAKEUP
);
6402 kqr
->kqr_wakeup_indexes
|= (1 << qos
);
6403 kqr
->kqr_state
|= KQR_WAKEUP
;
6404 kqworkloop_request_fire_r2k_notification(kqwl
);
6407 case KQWL_UTQ_UPDATE_STAYACTIVE_QOS
:
6409 if (kqr
->kqr_stayactive_qos
< qos
) {
6410 kqr
->kqr_stayactive_qos
= qos
;
6411 if (kqr
->kqr_wakeup_indexes
& KQWL_STAYACTIVE_FIRED_BIT
) {
6412 assert(kqr
->kqr_state
& KQR_WAKEUP
);
6413 kqr
->kqr_wakeup_indexes
|= (1 << qos
);
6419 case KQWL_UTQ_PARKING
:
6420 case KQWL_UTQ_UNBINDING
:
6421 kqr
->kqr_override_index
= qos
;
6423 case KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
:
6424 if (op
== KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
) {
6425 assert(qos
== THREAD_QOS_UNSPECIFIED
);
6427 kqlock_held(kqwl
); // to look at kq_queues
6428 i
= KQWL_BUCKET_STAYACTIVE
;
6429 if (TAILQ_EMPTY(&kqr
->kqr_suppressed
)) {
6430 kqr
->kqr_override_index
= THREAD_QOS_UNSPECIFIED
;
6432 if (!TAILQ_EMPTY(&kqwl
->kqwl_queue
[i
]) &&
6433 (kqr
->kqr_wakeup_indexes
& KQWL_STAYACTIVE_FIRED_BIT
)) {
6435 * If the KQWL_STAYACTIVE_FIRED_BIT is set, it means a stay active
6436 * knote may have fired, so we need to merge in kqr_stayactive_qos.
6438 * Unlike other buckets, this one is never empty but could be idle.
6440 kqr
->kqr_wakeup_indexes
&= KQWL_STAYACTIVE_FIRED_BIT
;
6441 kqr
->kqr_wakeup_indexes
|= (1 << kqr
->kqr_stayactive_qos
);
6443 kqr
->kqr_wakeup_indexes
= 0;
6445 for (i
= THREAD_QOS_UNSPECIFIED
+ 1; i
< KQWL_BUCKET_STAYACTIVE
; i
++) {
6446 if (!TAILQ_EMPTY(&kqwl
->kqwl_queue
[i
])) {
6447 kqr
->kqr_wakeup_indexes
|= (1 << i
);
6450 if (kqr
->kqr_wakeup_indexes
) {
6451 kqr
->kqr_state
|= KQR_WAKEUP
;
6452 kqworkloop_request_fire_r2k_notification(kqwl
);
6454 kqr
->kqr_state
&= ~KQR_WAKEUP
;
6458 case KQWL_UTQ_RESET_WAKEUP_OVERRIDE
:
6459 kqr
->kqr_override_index
= qos
;
6462 case KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
:
6465 * When modifying the wakeup QoS or the override QoS, we always need to
6466 * maintain our invariant that kqr_override_index is at least as large
6467 * as the highest QoS for which an event is fired.
6469 * However this override index can be larger when there is an overriden
6470 * suppressed knote pushing on the kqueue.
6472 if (kqr
->kqr_wakeup_indexes
> (1 << qos
)) {
6473 qos
= fls(kqr
->kqr_wakeup_indexes
) - 1; /* fls is 1-based */
6475 if (kqr
->kqr_override_index
< qos
) {
6476 kqr
->kqr_override_index
= qos
;
6480 case KQWL_UTQ_REDRIVE_EVENTS
:
6483 case KQWL_UTQ_SET_QOS_INDEX
:
6484 kqr
->kqr_qos_index
= qos
;
6488 panic("unknown kqwl thread qos update operation: %d", op
);
6491 thread_t kqwl_owner
= kqwl
->kqwl_owner
;
6492 thread_t servicer
= kqr
->kqr_thread
;
6493 boolean_t qos_changed
= FALSE
;
6494 kq_index_t new_owner_override
= kqworkloop_owner_override(kqwl
);
6497 * Apply the diffs to the owner if applicable
6501 /* JMM - need new trace hooks for owner overrides */
6502 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST
),
6503 kqwl
->kqwl_dynamicid
, thread_tid(kqwl_owner
), kqr
->kqr_qos_index
,
6504 (kqr
->kqr_override_index
<< 16) | kqr
->kqr_state
);
6506 if (new_owner_override
== old_owner_override
) {
6508 } else if (old_owner_override
== THREAD_QOS_UNSPECIFIED
) {
6509 thread_add_ipc_override(kqwl_owner
, new_owner_override
);
6510 } else if (new_owner_override
== THREAD_QOS_UNSPECIFIED
) {
6511 thread_drop_ipc_override(kqwl_owner
);
6512 } else { /* old_owner_override != new_owner_override */
6513 thread_update_ipc_override(kqwl_owner
, new_owner_override
);
6518 * apply the diffs to the servicer
6520 if ((kqr
->kqr_state
& KQR_THREQUESTED
) == 0) {
6522 * No servicer, nor thread-request
6524 * Make a new thread request, unless there is an owner (or the workloop
6525 * is suspended in userland) or if there is no asynchronous work in the
6529 if (kqwl_owner
== NULL
&& (kqr
->kqr_state
& KQR_WAKEUP
)) {
6530 int initiate_flags
= 0;
6531 if (op
== KQWL_UTQ_UNBINDING
) {
6532 initiate_flags
= WORKQ_THREADREQ_ATTEMPT_REBIND
;
6534 kqueue_threadreq_initiate(kq
, kqr
, new_owner_override
,
6537 } else if (servicer
) {
6539 * Servicer in flight
6541 * Just apply the diff to the servicer
6543 struct uthread
*ut
= get_bsdthread_info(servicer
);
6544 if (ut
->uu_kqueue_override
!= kqr
->kqr_override_index
) {
6545 if (ut
->uu_kqueue_override
== THREAD_QOS_UNSPECIFIED
) {
6546 thread_add_ipc_override(servicer
, kqr
->kqr_override_index
);
6547 } else if (kqr
->kqr_override_index
== THREAD_QOS_UNSPECIFIED
) {
6548 thread_drop_ipc_override(servicer
);
6549 } else { /* ut->uu_kqueue_override != kqr->kqr_override_index */
6550 thread_update_ipc_override(servicer
, kqr
->kqr_override_index
);
6552 ut
->uu_kqueue_override
= kqr
->kqr_override_index
;
6555 } else if (new_owner_override
== THREAD_QOS_UNSPECIFIED
) {
6557 * No events to deliver anymore.
6559 * However canceling with turnstiles is challenging, so the fact that
6560 * the request isn't useful will be discovered by the servicer himself
6563 } else if (old_owner_override
!= new_owner_override
) {
6565 * Request is in flight
6567 * Apply the diff to the thread request
6569 kqueue_threadreq_modify(kq
, kqr
, new_owner_override
);
6574 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST
), kqwl
->kqwl_dynamicid
,
6575 thread_tid(kqr
->kqr_thread
), kqr
->kqr_qos_index
,
6576 (kqr
->kqr_override_index
<< 16) | kqr
->kqr_state
);
6581 kqworkloop_request_help(struct kqworkloop
*kqwl
, kq_index_t qos_index
)
6583 /* convert to thread qos value */
6584 assert(qos_index
< KQWL_NBUCKETS
);
6587 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_UPDATE_WAKEUP_QOS
, qos_index
);
6588 kq_req_unlock(kqwl
);
6591 static struct kqtailq
*
6592 kqueue_get_queue(struct kqueue
*kq
, kq_index_t qos_index
)
6594 if (kq
->kq_state
& KQ_WORKQ
) {
6595 assert(qos_index
< KQWQ_NBUCKETS
);
6596 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
6597 assert(qos_index
< KQWL_NBUCKETS
);
6599 assert(qos_index
== QOS_INDEX_KQFILE
);
6601 static_assert(offsetof(struct kqueue
, kq_queue
) == sizeof(struct kqueue
),
6602 "struct kqueue::kq_queue must be exactly at the end");
6603 return &kq
->kq_queue
[qos_index
];
6607 kqueue_queue_empty(struct kqueue
*kq
, kq_index_t qos_index
)
6609 return TAILQ_EMPTY(kqueue_get_queue(kq
, qos_index
));
6612 static struct kqtailq
*
6613 kqueue_get_suppressed_queue(kqueue_t kq
, struct knote
*kn
)
6615 if (kq
.kq
->kq_state
& KQ_WORKQ
) {
6616 return &kqworkq_get_request(kq
.kqwq
, kn
->kn_qos_index
)->kqr_suppressed
;
6617 } else if (kq
.kq
->kq_state
& KQ_WORKLOOP
) {
6618 return &kq
.kqwl
->kqwl_request
.kqr_suppressed
;
6620 return &kq
.kqf
->kqf_suppressed
;
6624 static struct turnstile
*
6625 kqueue_get_turnstile(kqueue_t kqu
, bool can_alloc
)
6629 if ((kqu
.kq
->kq_state
& KQ_WORKLOOP
) == 0) {
6630 return TURNSTILE_NULL
;
6633 kqr_state
= os_atomic_load(&kqu
.kqwl
->kqwl_request
.kqr_state
, relaxed
);
6634 if (kqr_state
& KQR_ALLOCATED_TURNSTILE
) {
6635 /* force a dependency to pair with the atomic or with release below */
6636 return os_atomic_load_with_dependency_on(&kqu
.kqwl
->kqwl_turnstile
,
6641 return TURNSTILE_NULL
;
6644 struct turnstile
*ts
= turnstile_alloc(), *free_ts
= TURNSTILE_NULL
;
6647 if (filt_wlturnstile_interlock_is_workq(kqu
.kqwl
)) {
6648 workq_kern_threadreq_lock(kqu
.kqwl
->kqwl_p
);
6651 if (kqu
.kqwl
->kqwl_request
.kqr_state
& KQR_ALLOCATED_TURNSTILE
) {
6653 ts
= kqu
.kqwl
->kqwl_turnstile
;
6655 ts
= turnstile_prepare((uintptr_t)kqu
.kqwl
, &kqu
.kqwl
->kqwl_turnstile
,
6656 ts
, TURNSTILE_WORKLOOPS
);
6658 /* release-barrier to pair with the unlocked load of kqwl_turnstile above */
6659 os_atomic_or(&kqu
.kqwl
->kqwl_request
.kqr_state
,
6660 KQR_ALLOCATED_TURNSTILE
, release
);
6663 if (filt_wlturnstile_interlock_is_workq(kqu
.kqwl
)) {
6664 workq_kern_threadreq_unlock(kqu
.kqwl
->kqwl_p
);
6666 kq_req_unlock(kqu
.kqwl
);
6669 turnstile_deallocate(free_ts
);
6675 kqueue_turnstile(struct kqueue
*kq
)
6677 return kqueue_get_turnstile(kq
, false);
6681 kqueue_alloc_turnstile(struct kqueue
*kq
)
6683 return kqueue_get_turnstile(kq
, true);
6686 static struct kqtailq
*
6687 knote_get_queue(struct knote
*kn
)
6689 return kqueue_get_queue(knote_get_kq(kn
), kn
->kn_qos_index
);
6693 knote_reset_priority(struct knote
*kn
, pthread_priority_t pp
)
6695 struct kqueue
*kq
= knote_get_kq(kn
);
6696 kq_index_t qos
= _pthread_priority_thread_qos(pp
);
6698 assert((kn
->kn_status
& KN_QUEUED
) == 0);
6700 if (kq
->kq_state
& KQ_WORKQ
) {
6701 if (qos
== THREAD_QOS_UNSPECIFIED
) {
6702 /* On workqueues, outside of QoS means MANAGER */
6703 qos
= KQWQ_QOS_MANAGER
;
6704 pp
= _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
;
6706 pp
= _pthread_priority_normalize(pp
);
6708 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
6709 assert((pp
& _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
) == 0);
6710 pp
= _pthread_priority_normalize(pp
);
6712 pp
= _pthread_unspecified_priority();
6713 qos
= THREAD_QOS_UNSPECIFIED
;
6717 kn
->kn_req_index
= qos
;
6719 if ((kn
->kn_status
& KN_MERGE_QOS
) == 0 || qos
> kn
->kn_qos_override
) {
6720 /* Never lower QoS when in "Merge" mode */
6721 kn
->kn_qos_override
= qos
;
6724 /* only adjust in-use qos index when not suppressed */
6725 if ((kn
->kn_status
& KN_SUPPRESSED
) == 0) {
6726 kn
->kn_qos_index
= qos
;
6727 } else if (kq
->kq_state
& KQ_WORKQ
) {
6728 kqworkq_update_override((struct kqworkq
*)kq
, kn
, qos
);
6729 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
6730 kqworkloop_update_override((struct kqworkloop
*)kq
, qos
);
6735 knote_set_qos_overcommit(struct knote
*kn
)
6737 struct kqueue
*kq
= knote_get_kq(kn
);
6739 /* turn overcommit on for the appropriate thread request? */
6740 if ((kn
->kn_qos
& _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
) &&
6741 (kq
->kq_state
& KQ_WORKLOOP
)) {
6742 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
6743 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6746 * This test is racy, but since we never remove this bit,
6747 * it allows us to avoid taking a lock.
6749 if (kqr
->kqr_state
& KQR_THOVERCOMMIT
) {
6754 kqr
->kqr_state
|= KQR_THOVERCOMMIT
;
6755 if (!kqr
->kqr_thread
&& (kqr
->kqr_state
& KQR_THREQUESTED
)) {
6756 kqueue_threadreq_modify(kq
, kqr
, kqr
->kqr_req
.tr_qos
);
6758 kq_req_unlock(kqwl
);
6763 knote_get_qos_override_index(struct knote
*kn
)
6765 return kn
->kn_qos_override
;
6769 kqworkq_update_override(struct kqworkq
*kqwq
, struct knote
*kn
,
6770 kq_index_t override_index
)
6772 struct kqrequest
*kqr
;
6773 kq_index_t old_override_index
;
6774 kq_index_t queue_index
= kn
->kn_qos_index
;
6776 if (override_index
<= queue_index
) {
6780 kqr
= kqworkq_get_request(kqwq
, queue_index
);
6783 old_override_index
= kqr
->kqr_override_index
;
6784 if (override_index
> MAX(kqr
->kqr_qos_index
, old_override_index
)) {
6785 kqr
->kqr_override_index
= override_index
;
6787 /* apply the override to [incoming?] servicing thread */
6788 if (kqr
->kqr_thread
) {
6789 if (old_override_index
) {
6790 thread_update_ipc_override(kqr
->kqr_thread
, override_index
);
6792 thread_add_ipc_override(kqr
->kqr_thread
, override_index
);
6796 kq_req_unlock(kqwq
);
6800 kqworkloop_update_override(struct kqworkloop
*kqwl
, kq_index_t override_index
)
6803 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
,
6805 kq_req_unlock(kqwl
);
6809 kqworkloop_unbind_locked(struct kqworkloop
*kqwl
, thread_t thread
)
6811 struct uthread
*ut
= get_bsdthread_info(thread
);
6812 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6813 kq_index_t ipc_override
= ut
->uu_kqueue_override
;
6815 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_UNBIND
), kqwl
->kqwl_dynamicid
,
6816 thread_tid(thread
), 0, 0);
6819 assert(ut
->uu_kqr_bound
== kqr
);
6820 ut
->uu_kqr_bound
= NULL
;
6821 ut
->uu_kqueue_override
= THREAD_QOS_UNSPECIFIED
;
6823 if (kqwl
->kqwl_owner
== NULL
&& kqwl
->kqwl_turnstile
) {
6824 turnstile_update_inheritor(kqwl
->kqwl_turnstile
,
6825 TURNSTILE_INHERITOR_NULL
, TURNSTILE_IMMEDIATE_UPDATE
);
6826 turnstile_update_inheritor_complete(kqwl
->kqwl_turnstile
,
6827 TURNSTILE_INTERLOCK_HELD
);
6830 kqr
->kqr_thread
= NULL
;
6831 kqr
->kqr_state
&= ~(KQR_THREQUESTED
| KQR_R2K_NOTIF_ARMED
);
6832 return ipc_override
;
6836 * kqworkloop_unbind - Unbind the servicer thread of a workloop kqueue
6838 * It will acknowledge events, and possibly request a new thread if:
6839 * - there were active events left
6840 * - we pended waitq hook callouts during processing
6841 * - we pended wakeups while processing (or unsuppressing)
6843 * Called with kqueue lock held.
6846 kqworkloop_unbind(proc_t p
, struct kqworkloop
*kqwl
)
6848 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
6849 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6850 thread_t thread
= kqr
->kqr_thread
;
6851 int op
= KQWL_UTQ_PARKING
;
6852 kq_index_t ipc_override
, qos_override
= THREAD_QOS_UNSPECIFIED
;
6854 assert(thread
== current_thread());
6859 * Forcing the KQ_PROCESSING flag allows for QoS updates because of
6860 * unsuppressing knotes not to be applied until the eventual call to
6861 * kqworkloop_update_threads_qos() below.
6863 assert((kq
->kq_state
& KQ_PROCESSING
) == 0);
6864 if (!TAILQ_EMPTY(&kqr
->kqr_suppressed
)) {
6865 kq
->kq_state
|= KQ_PROCESSING
;
6866 qos_override
= kqworkloop_acknowledge_events(kqwl
);
6867 kq
->kq_state
&= ~KQ_PROCESSING
;
6872 ipc_override
= kqworkloop_unbind_locked(kqwl
, thread
);
6873 kqworkloop_update_threads_qos(kqwl
, op
, qos_override
);
6875 kq_req_unlock(kqwl
);
6880 * Drop the override on the current thread last, after the call to
6881 * kqworkloop_update_threads_qos above.
6884 thread_drop_ipc_override(thread
);
6887 /* If last reference, dealloc the workloop kq */
6888 kqueue_release_last(p
, kqwl
);
6892 kqworkq_unbind_locked(__assert_only
struct kqworkq
*kqwq
,
6893 struct kqrequest
*kqr
, thread_t thread
)
6895 struct uthread
*ut
= get_bsdthread_info(thread
);
6896 kq_index_t old_override
= kqr
->kqr_override_index
;
6898 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_UNBIND
), -1,
6899 thread_tid(kqr
->kqr_thread
), kqr
->kqr_qos_index
, 0);
6902 assert(ut
->uu_kqr_bound
== kqr
);
6903 ut
->uu_kqr_bound
= NULL
;
6904 kqr
->kqr_thread
= NULL
;
6905 kqr
->kqr_state
&= ~(KQR_THREQUESTED
| KQR_R2K_NOTIF_ARMED
);
6906 kqr
->kqr_override_index
= THREAD_QOS_UNSPECIFIED
;
6908 return old_override
;
6912 * kqworkq_unbind - unbind of a workq kqueue from a thread
6914 * We may have to request new threads.
6915 * This can happen there are no waiting processing threads and:
6916 * - there were active events we never got to (count > 0)
6917 * - we pended waitq hook callouts during processing
6918 * - we pended wakeups while processing (or unsuppressing)
6921 kqworkq_unbind(proc_t p
, struct kqrequest
*kqr
)
6923 struct kqworkq
*kqwq
= (struct kqworkq
*)p
->p_fd
->fd_wqkqueue
;
6924 __assert_only
int rc
;
6927 rc
= kqworkq_acknowledge_events(kqwq
, kqr
, 0, KQWQAE_UNBIND
);
6933 kqworkq_get_request(struct kqworkq
*kqwq
, kq_index_t qos_index
)
6935 assert(qos_index
< KQWQ_NBUCKETS
);
6936 return &kqwq
->kqwq_request
[qos_index
];
6940 knote_apply_qos_override(struct knote
*kn
, kq_index_t qos_index
)
6942 assert((kn
->kn_status
& KN_QUEUED
) == 0);
6944 kn
->kn_qos_override
= qos_index
;
6946 if (kn
->kn_status
& KN_SUPPRESSED
) {
6947 struct kqueue
*kq
= knote_get_kq(kn
);
6949 * For suppressed events, the kn_qos_index field cannot be touched as it
6950 * allows us to know on which supress queue the knote is for a kqworkq.
6952 * Also, there's no natural push applied on the kqueues when this field
6953 * changes anyway. We hence need to apply manual overrides in this case,
6954 * which will be cleared when the events are later acknowledged.
6956 if (kq
->kq_state
& KQ_WORKQ
) {
6957 kqworkq_update_override((struct kqworkq
*)kq
, kn
, qos_index
);
6959 kqworkloop_update_override((struct kqworkloop
*)kq
, qos_index
);
6962 kn
->kn_qos_index
= qos_index
;
6967 knote_should_apply_qos_override(struct kqueue
*kq
, struct knote
*kn
, int result
,
6968 thread_qos_t
*qos_out
)
6970 thread_qos_t qos_index
= (result
>> FILTER_ADJUST_EVENT_QOS_SHIFT
) & 7;
6974 assert(result
& FILTER_ADJUST_EVENT_QOS_BIT
);
6975 assert(qos_index
< THREAD_QOS_LAST
);
6978 * Early exit for knotes that should not change QoS
6980 * It is safe to test kn_req_index against MANAGER / STAYACTIVE because
6981 * knotes with such kn_req_index values never change for their entire
6984 if (__improbable(!knote_fops(kn
)->f_adjusts_qos
)) {
6985 panic("filter %d cannot change QoS", kn
->kn_filtid
);
6986 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
6987 if (kn
->kn_req_index
== KQWL_BUCKET_STAYACTIVE
) {
6990 } else if (kq
->kq_state
& KQ_WORKQ
) {
6991 if (kn
->kn_req_index
== KQWQ_QOS_MANAGER
) {
6999 * knotes with the FALLBACK flag will only use their registration QoS if the
7000 * incoming event has no QoS, else, the registration QoS acts as a floor.
7002 if (kn
->kn_qos
& _PTHREAD_PRIORITY_FALLBACK_FLAG
) {
7003 if (qos_index
== THREAD_QOS_UNSPECIFIED
) {
7004 qos_index
= kn
->kn_req_index
;
7007 if (qos_index
< kn
->kn_req_index
) {
7008 qos_index
= kn
->kn_req_index
;
7011 if ((kn
->kn_status
& KN_MERGE_QOS
) && (qos_index
< kn
->kn_qos_override
)) {
7012 /* Never lower QoS when in "Merge" mode */
7016 if ((kn
->kn_status
& KN_LOCKED
) && kn
->kn_inuse
) {
7018 * When we're trying to update the QoS override and that both an
7019 * f_event() and other f_* calls are running concurrently, any of these
7020 * in flight calls may want to perform overrides that aren't properly
7021 * serialized with each other.
7023 * The first update that observes this racy situation enters a "Merge"
7024 * mode which causes subsequent override requests to saturate the
7025 * override instead of replacing its value.
7027 * This mode is left when knote_unlock() or knote_call_filter_event()
7028 * observe that no other f_* routine is in flight.
7030 kn
->kn_status
|= KN_MERGE_QOS
;
7033 if (kn
->kn_qos_override
== qos_index
) {
7037 *qos_out
= qos_index
;
7042 knote_adjust_qos(struct kqueue
*kq
, struct knote
*kn
, int result
)
7045 if (knote_should_apply_qos_override(kq
, kn
, result
, &qos
)) {
7047 knote_apply_qos_override(kn
, qos
);
7048 if (knote_enqueue(kn
) && (kn
->kn_status
& KN_ACTIVE
)) {
7055 knote_wakeup(struct knote
*kn
)
7057 struct kqueue
*kq
= knote_get_kq(kn
);
7061 if (kq
->kq_state
& KQ_WORKQ
) {
7062 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
7064 kqworkq_request_help(kqwq
, kn
->kn_qos_index
);
7065 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7066 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7069 * kqworkloop_end_processing() will perform the required QoS
7070 * computations when it unsets the processing mode.
7072 if (!kqworkloop_is_processing_on_current_thread(kqwl
)) {
7073 kqworkloop_request_help(kqwl
, kn
->kn_qos_index
);
7076 struct kqfile
*kqf
= (struct kqfile
*)kq
;
7078 /* flag wakeups during processing */
7079 if (kq
->kq_state
& KQ_PROCESSING
) {
7080 kq
->kq_state
|= KQ_WAKEUP
;
7083 /* wakeup a thread waiting on this queue */
7084 if (kq
->kq_state
& (KQ_SLEEP
| KQ_SEL
)) {
7085 kq
->kq_state
&= ~(KQ_SLEEP
| KQ_SEL
);
7086 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
, KQ_EVENT
,
7087 THREAD_AWAKENED
, WAITQ_ALL_PRIORITIES
);
7090 /* wakeup other kqueues/select sets we're inside */
7091 KNOTE(&kqf
->kqf_sel
.si_note
, 0);
7096 * Called with the kqueue locked
7099 kqueue_interrupt(struct kqueue
*kq
)
7101 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
7103 /* wakeup sleeping threads */
7104 if ((kq
->kq_state
& (KQ_SLEEP
| KQ_SEL
)) != 0) {
7105 kq
->kq_state
&= ~(KQ_SLEEP
| KQ_SEL
);
7106 (void)waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
7109 WAITQ_ALL_PRIORITIES
);
7112 /* wakeup threads waiting their turn to process */
7113 if (kq
->kq_state
& KQ_PROCWAIT
) {
7114 struct kqtailq
*suppressq
;
7116 assert(kq
->kq_state
& KQ_PROCESSING
);
7118 kq
->kq_state
&= ~KQ_PROCWAIT
;
7119 suppressq
= kqueue_get_suppressed_queue(kq
, NULL
);
7120 (void)waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
7121 CAST_EVENT64_T(suppressq
),
7123 WAITQ_ALL_PRIORITIES
);
7128 * Called back from waitq code when no threads waiting and the hook was set.
7130 * Interrupts are likely disabled and spin locks are held - minimal work
7131 * can be done in this context!!!
7133 * JMM - in the future, this will try to determine which knotes match the
7134 * wait queue wakeup and apply these wakeups against those knotes themselves.
7135 * For now, all the events dispatched this way are dispatch-manager handled,
7136 * so hard-code that for now.
7139 waitq_set__CALLING_PREPOST_HOOK__(void *kq_hook
, void *knote_hook
, int qos
)
7141 #pragma unused(knote_hook, qos)
7143 struct kqueue
*kq
= (struct kqueue
*)kq_hook
;
7145 if (kq
->kq_state
& KQ_WORKQ
) {
7146 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
7148 kqworkq_request_help(kqwq
, KQWQ_QOS_MANAGER
);
7149 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7150 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7152 kqworkloop_request_help(kqwl
, KQWL_BUCKET_STAYACTIVE
);
7157 klist_init(struct klist
*list
)
7164 * Query/Post each knote in the object's list
7166 * The object lock protects the list. It is assumed
7167 * that the filter/event routine for the object can
7168 * determine that the object is already locked (via
7169 * the hint) and not deadlock itself.
7171 * The object lock should also hold off pending
7172 * detach/drop operations.
7175 knote(struct klist
*list
, long hint
)
7179 SLIST_FOREACH(kn
, list
, kn_selnext
) {
7180 struct kqueue
*kq
= knote_get_kq(kn
);
7182 knote_call_filter_event(kq
, kn
, hint
);
7188 * attach a knote to the specified list. Return true if this is the first entry.
7189 * The list is protected by whatever lock the object it is associated with uses.
7192 knote_attach(struct klist
*list
, struct knote
*kn
)
7194 int ret
= SLIST_EMPTY(list
);
7195 SLIST_INSERT_HEAD(list
, kn
, kn_selnext
);
7200 * detach a knote from the specified list. Return true if that was the last entry.
7201 * The list is protected by whatever lock the object it is associated with uses.
7204 knote_detach(struct klist
*list
, struct knote
*kn
)
7206 SLIST_REMOVE(list
, kn
, knote
, kn_selnext
);
7207 return SLIST_EMPTY(list
);
7211 * knote_vanish - Indicate that the source has vanished
7213 * If the knote has requested EV_VANISHED delivery,
7214 * arrange for that. Otherwise, deliver a NOTE_REVOKE
7215 * event for backward compatibility.
7217 * The knote is marked as having vanished, but is not
7218 * actually detached from the source in this instance.
7219 * The actual detach is deferred until the knote drop.
7221 * Our caller already has the object lock held. Calling
7222 * the detach routine would try to take that lock
7223 * recursively - which likely is not supported.
7226 knote_vanish(struct klist
*list
, bool make_active
)
7229 struct knote
*kn_next
;
7231 SLIST_FOREACH_SAFE(kn
, list
, kn_selnext
, kn_next
) {
7232 struct kqueue
*kq
= knote_get_kq(kn
);
7235 if (__probable(kn
->kn_status
& KN_REQVANISH
)) {
7237 * If EV_VANISH supported - prepare to deliver one
7239 kn
->kn_status
|= KN_VANISHED
;
7242 * Handle the legacy way to indicate that the port/portset was
7243 * deallocated or left the current Mach portspace (modern technique
7244 * is with an EV_VANISHED protocol).
7246 * Deliver an EV_EOF event for these changes (hopefully it will get
7247 * delivered before the port name recycles to the same generation
7248 * count and someone tries to re-register a kevent for it or the
7249 * events are udata-specific - avoiding a conflict).
7251 kn
->kn_flags
|= EV_EOF
| EV_ONESHOT
;
7261 * Force a lazy allocation of the waitqset link
7262 * of the kq_wqs associated with the kn
7263 * if it wasn't already allocated.
7265 * This allows knote_link_waitq to never block
7266 * if reserved_link is not NULL.
7269 knote_link_waitqset_lazy_alloc(struct knote
*kn
)
7271 struct kqueue
*kq
= knote_get_kq(kn
);
7272 waitq_set_lazy_init_link(&kq
->kq_wqs
);
7276 * Check if a lazy allocation for the waitqset link
7277 * of the kq_wqs is needed.
7280 knote_link_waitqset_should_lazy_alloc(struct knote
*kn
)
7282 struct kqueue
*kq
= knote_get_kq(kn
);
7283 return waitq_set_should_lazy_init_link(&kq
->kq_wqs
);
7287 * For a given knote, link a provided wait queue directly with the kqueue.
7288 * Wakeups will happen via recursive wait queue support. But nothing will move
7289 * the knote to the active list at wakeup (nothing calls knote()). Instead,
7290 * we permanently enqueue them here.
7292 * kqueue and knote references are held by caller.
7293 * waitq locked by caller.
7295 * caller provides the wait queue link structure and insures that the kq->kq_wqs
7296 * is linked by previously calling knote_link_waitqset_lazy_alloc.
7299 knote_link_waitq(struct knote
*kn
, struct waitq
*wq
, uint64_t *reserved_link
)
7301 struct kqueue
*kq
= knote_get_kq(kn
);
7304 kr
= waitq_link(wq
, &kq
->kq_wqs
, WAITQ_ALREADY_LOCKED
, reserved_link
);
7305 if (kr
== KERN_SUCCESS
) {
7306 knote_markstayactive(kn
);
7314 * Unlink the provided wait queue from the kqueue associated with a knote.
7315 * Also remove it from the magic list of directly attached knotes.
7317 * Note that the unlink may have already happened from the other side, so
7318 * ignore any failures to unlink and just remove it from the kqueue list.
7320 * On success, caller is responsible for the link structure
7323 knote_unlink_waitq(struct knote
*kn
, struct waitq
*wq
)
7325 struct kqueue
*kq
= knote_get_kq(kn
);
7328 kr
= waitq_unlink(wq
, &kq
->kq_wqs
);
7329 knote_clearstayactive(kn
);
7330 return (kr
!= KERN_SUCCESS
) ? EINVAL
: 0;
7334 * remove all knotes referencing a specified fd
7336 * Entered with the proc_fd lock already held.
7337 * It returns the same way, but may drop it temporarily.
7340 knote_fdclose(struct proc
*p
, int fd
)
7344 KNOTE_LOCK_CTX(knlc
);
7347 list
= &p
->p_fd
->fd_knlist
[fd
];
7348 SLIST_FOREACH(kn
, list
, kn_link
) {
7349 struct kqueue
*kq
= knote_get_kq(kn
);
7353 if (kq
->kq_p
!= p
) {
7354 panic("%s: proc mismatch (kq->kq_p=%p != p=%p)",
7355 __func__
, kq
->kq_p
, p
);
7359 * If the knote supports EV_VANISHED delivery,
7360 * transition it to vanished mode (or skip over
7361 * it if already vanished).
7363 if (kn
->kn_status
& KN_VANISHED
) {
7369 if (!knote_lock(kq
, kn
, &knlc
, KNOTE_KQ_LOCK_ON_SUCCESS
)) {
7370 /* the knote was dropped by someone, nothing to do */
7371 } else if (kn
->kn_status
& KN_REQVANISH
) {
7372 kn
->kn_status
|= KN_VANISHED
;
7373 kn
->kn_status
&= ~KN_ATTACHED
;
7376 knote_fops(kn
)->f_detach(kn
);
7377 if (knote_fops(kn
)->f_isfd
) {
7378 fp_drop(p
, kn
->kn_id
, kn
->kn_fp
, 0);
7383 knote_unlock(kq
, kn
, &knlc
, KNOTE_KQ_UNLOCK
);
7385 knote_drop(kq
, kn
, &knlc
);
7394 * knote_fdfind - lookup a knote in the fd table for process
7396 * If the filter is file-based, lookup based on fd index.
7397 * Otherwise use a hash based on the ident.
7399 * Matching is based on kq, filter, and ident. Optionally,
7400 * it may also be based on the udata field in the kevent -
7401 * allowing multiple event registration for the file object
7404 * fd_knhashlock or fdlock held on entry (and exit)
7406 static struct knote
*
7407 knote_fdfind(struct kqueue
*kq
,
7408 struct kevent_internal_s
*kev
,
7412 struct filedesc
*fdp
= p
->p_fd
;
7413 struct klist
*list
= NULL
;
7414 struct knote
*kn
= NULL
;
7417 * determine where to look for the knote
7420 /* fd-based knotes are linked off the fd table */
7421 if (kev
->ident
< (u_int
)fdp
->fd_knlistsize
) {
7422 list
= &fdp
->fd_knlist
[kev
->ident
];
7424 } else if (fdp
->fd_knhashmask
!= 0) {
7425 /* hash non-fd knotes here too */
7426 list
= &fdp
->fd_knhash
[KN_HASH((u_long
)kev
->ident
, fdp
->fd_knhashmask
)];
7430 * scan the selected list looking for a match
7433 SLIST_FOREACH(kn
, list
, kn_link
) {
7434 if (kq
== knote_get_kq(kn
) &&
7435 kev
->ident
== kn
->kn_id
&&
7436 kev
->filter
== kn
->kn_filter
) {
7437 if (kev
->flags
& EV_UDATA_SPECIFIC
) {
7438 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) &&
7439 kev
->udata
== kn
->kn_udata
) {
7440 break; /* matching udata-specific knote */
7442 } else if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0) {
7443 break; /* matching non-udata-specific knote */
7452 * kq_add_knote- Add knote to the fd table for process
7453 * while checking for duplicates.
7455 * All file-based filters associate a list of knotes by file
7456 * descriptor index. All other filters hash the knote by ident.
7458 * May have to grow the table of knote lists to cover the
7459 * file descriptor index presented.
7461 * fd_knhashlock and fdlock unheld on entry (and exit).
7463 * Takes a rwlock boost if inserting the knote is successful.
7466 kq_add_knote(struct kqueue
*kq
, struct knote
*kn
, struct knote_lock_ctx
*knlc
,
7469 struct filedesc
*fdp
= p
->p_fd
;
7470 struct klist
*list
= NULL
;
7472 bool is_fd
= knote_fops(kn
)->f_isfd
;
7480 if (knote_fdfind(kq
, &kn
->kn_kevent
, is_fd
, p
) != NULL
) {
7481 /* found an existing knote: we can't add this one */
7486 /* knote was not found: add it now */
7488 if (fdp
->fd_knhashmask
== 0) {
7491 list
= hashinit(CONFIG_KN_HASHSIZE
, M_KQUEUE
, &size
);
7497 fdp
->fd_knhash
= list
;
7498 fdp
->fd_knhashmask
= size
;
7501 list
= &fdp
->fd_knhash
[KN_HASH(kn
->kn_id
, fdp
->fd_knhashmask
)];
7502 SLIST_INSERT_HEAD(list
, kn
, kn_link
);
7506 /* knote is fd based */
7508 if ((u_int
)fdp
->fd_knlistsize
<= kn
->kn_id
) {
7511 if (kn
->kn_id
>= (uint64_t)p
->p_rlimit
[RLIMIT_NOFILE
].rlim_cur
7512 || kn
->kn_id
>= (uint64_t)maxfiles
) {
7516 /* have to grow the fd_knlist */
7517 size
= fdp
->fd_knlistsize
;
7518 while (size
<= kn
->kn_id
) {
7522 if (size
>= (UINT_MAX
/ sizeof(struct klist
*))) {
7527 MALLOC(list
, struct klist
*,
7528 size
* sizeof(struct klist
*), M_KQUEUE
, M_WAITOK
);
7534 bcopy((caddr_t
)fdp
->fd_knlist
, (caddr_t
)list
,
7535 fdp
->fd_knlistsize
* sizeof(struct klist
*));
7536 bzero((caddr_t
)list
+
7537 fdp
->fd_knlistsize
* sizeof(struct klist
*),
7538 (size
- fdp
->fd_knlistsize
) * sizeof(struct klist
*));
7539 FREE(fdp
->fd_knlist
, M_KQUEUE
);
7540 fdp
->fd_knlist
= list
;
7541 fdp
->fd_knlistsize
= size
;
7544 list
= &fdp
->fd_knlist
[kn
->kn_id
];
7545 SLIST_INSERT_HEAD(list
, kn
, kn_link
);
7553 assert((kn
->kn_status
& KN_LOCKED
) == 0);
7554 (void)knote_lock(kq
, kn
, knlc
, KNOTE_KQ_UNLOCK
);
7566 * kq_remove_knote - remove a knote from the fd table for process
7568 * If the filter is file-based, remove based on fd index.
7569 * Otherwise remove from the hash based on the ident.
7571 * fd_knhashlock and fdlock unheld on entry (and exit).
7574 kq_remove_knote(struct kqueue
*kq
, struct knote
*kn
, struct proc
*p
,
7575 struct knote_lock_ctx
*knlc
)
7577 struct filedesc
*fdp
= p
->p_fd
;
7578 struct klist
*list
= NULL
;
7582 is_fd
= knote_fops(kn
)->f_isfd
;
7591 assert((u_int
)fdp
->fd_knlistsize
> kn
->kn_id
);
7592 list
= &fdp
->fd_knlist
[kn
->kn_id
];
7594 list
= &fdp
->fd_knhash
[KN_HASH(kn
->kn_id
, fdp
->fd_knhashmask
)];
7596 SLIST_REMOVE(list
, kn
, knote
, kn_link
);
7599 kq_state
= kq
->kq_state
;
7601 knote_unlock_cancel(kq
, kn
, knlc
, KNOTE_KQ_UNLOCK
);
7611 if (kq_state
& KQ_DYNAMIC
) {
7612 kqueue_release_last(p
, kq
);
7617 * kq_find_knote_and_kq_lock - lookup a knote in the fd table for process
7618 * and, if the knote is found, acquires the kqlock while holding the fd table lock/spinlock.
7620 * fd_knhashlock or fdlock unheld on entry (and exit)
7623 static struct knote
*
7624 kq_find_knote_and_kq_lock(struct kqueue
*kq
, struct kevent_internal_s
*kev
,
7625 bool is_fd
, struct proc
*p
)
7635 ret
= knote_fdfind(kq
, kev
, is_fd
, p
);
7650 * knote_drop - disconnect and drop the knote
7652 * Called with the kqueue locked, returns with the kqueue unlocked.
7654 * If a knote locking context is passed, it is canceled.
7656 * The knote may have already been detached from
7657 * (or not yet attached to) its source object.
7660 knote_drop(struct kqueue
*kq
, struct knote
*kn
, struct knote_lock_ctx
*knlc
)
7662 struct proc
*p
= kq
->kq_p
;
7666 assert((kn
->kn_status
& KN_DROPPING
) == 0);
7668 assert((kn
->kn_status
& KN_LOCKED
) == 0);
7670 kn
->kn_status
|= KN_DROPPING
;
7672 knote_unsuppress(kn
);
7674 knote_wait_for_filter_events(kq
, kn
);
7676 /* If we are attached, disconnect from the source first */
7677 if (kn
->kn_status
& KN_ATTACHED
) {
7678 knote_fops(kn
)->f_detach(kn
);
7681 /* kq may be freed when kq_remove_knote() returns */
7682 kq_remove_knote(kq
, kn
, p
, knlc
);
7683 if (knote_fops(kn
)->f_isfd
&& ((kn
->kn_status
& KN_VANISHED
) == 0)) {
7684 fp_drop(p
, kn
->kn_id
, kn
->kn_fp
, 0);
7690 /* called with kqueue lock held */
7692 knote_activate(struct knote
*kn
)
7694 if (kn
->kn_status
& KN_ACTIVE
) {
7698 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KNOTE_ACTIVATE
),
7699 kn
->kn_udata
, kn
->kn_status
| (kn
->kn_id
<< 32),
7702 kn
->kn_status
|= KN_ACTIVE
;
7703 if (knote_enqueue(kn
)) {
7708 /* called with kqueue lock held */
7710 knote_deactivate(struct knote
*kn
)
7712 kn
->kn_status
&= ~KN_ACTIVE
;
7713 if ((kn
->kn_status
& KN_STAYACTIVE
) == 0) {
7718 /* called with kqueue lock held */
7720 knote_enable(struct knote
*kn
)
7722 if ((kn
->kn_status
& KN_DISABLED
) == 0) {
7726 kn
->kn_status
&= ~KN_DISABLED
;
7728 if (kn
->kn_status
& KN_SUPPRESSED
) {
7730 * it is possible for userland to have knotes registered for a given
7731 * workloop `wl_orig` but really handled on another workloop `wl_new`.
7733 * In that case, rearming will happen from the servicer thread of
7734 * `wl_new` which if `wl_orig` is no longer being serviced, would cause
7735 * this knote to stay suppressed forever if we only relied on
7736 * kqworkloop_acknowledge_events to be called by `wl_orig`.
7738 * However if we see the KQ_PROCESSING bit on `wl_orig` set, we can't
7739 * unsuppress because that would mess with the processing phase of
7740 * `wl_orig`, however it also means kqworkloop_acknowledge_events()
7743 struct kqueue
*kq
= knote_get_kq(kn
);
7744 if ((kq
->kq_state
& KQ_PROCESSING
) == 0) {
7745 knote_unsuppress(kn
);
7747 } else if (knote_enqueue(kn
)) {
7752 /* called with kqueue lock held */
7754 knote_disable(struct knote
*kn
)
7756 if (kn
->kn_status
& KN_DISABLED
) {
7760 kn
->kn_status
|= KN_DISABLED
;
7764 /* called with kqueue lock held */
7766 knote_suppress(struct knote
*kn
)
7768 struct kqtailq
*suppressq
;
7769 struct kqueue
*kq
= knote_get_kq(kn
);
7773 if (kn
->kn_status
& KN_SUPPRESSED
) {
7778 kn
->kn_status
|= KN_SUPPRESSED
;
7779 suppressq
= kqueue_get_suppressed_queue(kq
, kn
);
7780 TAILQ_INSERT_TAIL(suppressq
, kn
, kn_tqe
);
7783 /* called with kqueue lock held */
7785 knote_unsuppress(struct knote
*kn
)
7787 struct kqtailq
*suppressq
;
7788 struct kqueue
*kq
= knote_get_kq(kn
);
7792 if ((kn
->kn_status
& KN_SUPPRESSED
) == 0) {
7796 kn
->kn_status
&= ~KN_SUPPRESSED
;
7797 suppressq
= kqueue_get_suppressed_queue(kq
, kn
);
7798 TAILQ_REMOVE(suppressq
, kn
, kn_tqe
);
7801 * If the knote is no longer active, reset its push,
7802 * and resynchronize kn_qos_index with kn_qos_override
7804 if ((kn
->kn_status
& KN_ACTIVE
) == 0) {
7805 kn
->kn_qos_override
= kn
->kn_req_index
;
7807 kn
->kn_qos_index
= kn
->kn_qos_override
;
7809 /* don't wakeup if unsuppressing just a stay-active knote */
7810 if (knote_enqueue(kn
) && (kn
->kn_status
& KN_ACTIVE
)) {
7814 if ((kq
->kq_state
& KQ_WORKLOOP
) && TAILQ_EMPTY(suppressq
)) {
7815 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7817 if (kqworkloop_is_processing_on_current_thread(kqwl
)) {
7819 * kqworkloop_end_processing() or kqworkloop_begin_processing()
7820 * will perform the required QoS computations when it unsets the
7825 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_RESET_WAKEUP_OVERRIDE
, 0);
7826 kq_req_unlock(kqwl
);
7831 /* called with kqueue lock held */
7833 knote_enqueue(struct knote
*kn
)
7835 if ((kn
->kn_status
& (KN_ACTIVE
| KN_STAYACTIVE
)) == 0 ||
7836 (kn
->kn_status
& (KN_DISABLED
| KN_SUPPRESSED
| KN_DROPPING
))) {
7840 if ((kn
->kn_status
& KN_QUEUED
) == 0) {
7841 struct kqtailq
*queue
= knote_get_queue(kn
);
7842 struct kqueue
*kq
= knote_get_kq(kn
);
7845 TAILQ_INSERT_TAIL(queue
, kn
, kn_tqe
);
7846 kn
->kn_status
|= KN_QUEUED
;
7850 return (kn
->kn_status
& KN_STAYACTIVE
) != 0;
7854 /* called with kqueue lock held */
7856 knote_dequeue(struct knote
*kn
)
7858 struct kqueue
*kq
= knote_get_kq(kn
);
7859 struct kqtailq
*queue
;
7863 if ((kn
->kn_status
& KN_QUEUED
) == 0) {
7867 queue
= knote_get_queue(kn
);
7868 TAILQ_REMOVE(queue
, kn
, kn_tqe
);
7869 kn
->kn_status
&= ~KN_QUEUED
;
7876 knote_zone
= zinit(sizeof(struct knote
), 8192 * sizeof(struct knote
),
7877 8192, "knote zone");
7879 kqfile_zone
= zinit(sizeof(struct kqfile
), 8192 * sizeof(struct kqfile
),
7880 8192, "kqueue file zone");
7882 kqworkq_zone
= zinit(sizeof(struct kqworkq
), 8192 * sizeof(struct kqworkq
),
7883 8192, "kqueue workq zone");
7885 kqworkloop_zone
= zinit(sizeof(struct kqworkloop
), 8192 * sizeof(struct kqworkloop
),
7886 8192, "kqueue workloop zone");
7888 /* allocate kq lock group attribute and group */
7889 kq_lck_grp_attr
= lck_grp_attr_alloc_init();
7891 kq_lck_grp
= lck_grp_alloc_init("kqueue", kq_lck_grp_attr
);
7893 /* Allocate kq lock attribute */
7894 kq_lck_attr
= lck_attr_alloc_init();
7896 #if CONFIG_MEMORYSTATUS
7897 /* Initialize the memorystatus list lock */
7898 memorystatus_kevent_init(kq_lck_grp
, kq_lck_attr
);
7901 SYSINIT(knote
, SI_SUB_PSEUDO
, SI_ORDER_ANY
, knote_init
, NULL
)
7903 const struct filterops
*
7904 knote_fops(struct knote
*kn
)
7906 return sysfilt_ops
[kn
->kn_filtid
];
7909 static struct knote
*
7912 struct knote
*kn
= ((struct knote
*)zalloc(knote_zone
));
7913 bzero(kn
, sizeof(struct knote
));
7918 knote_free(struct knote
*kn
)
7920 assert(kn
->kn_inuse
== 0);
7921 assert((kn
->kn_status
& KN_LOCKED
) == 0);
7922 zfree(knote_zone
, kn
);
7926 #include <sys/param.h>
7927 #include <sys/socket.h>
7928 #include <sys/protosw.h>
7929 #include <sys/domain.h>
7930 #include <sys/mbuf.h>
7931 #include <sys/kern_event.h>
7932 #include <sys/malloc.h>
7933 #include <sys/sys_domain.h>
7934 #include <sys/syslog.h>
7937 #define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t))
7941 #define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n))
7944 static lck_grp_attr_t
*kev_lck_grp_attr
;
7945 static lck_attr_t
*kev_lck_attr
;
7946 static lck_grp_t
*kev_lck_grp
;
7947 static decl_lck_rw_data(, kev_lck_data
);
7948 static lck_rw_t
*kev_rwlock
= &kev_lck_data
;
7950 static int kev_attach(struct socket
*so
, int proto
, struct proc
*p
);
7951 static int kev_detach(struct socket
*so
);
7952 static int kev_control(struct socket
*so
, u_long cmd
, caddr_t data
,
7953 struct ifnet
*ifp
, struct proc
*p
);
7954 static lck_mtx_t
* event_getlock(struct socket
*, int);
7955 static int event_lock(struct socket
*, int, void *);
7956 static int event_unlock(struct socket
*, int, void *);
7958 static int event_sofreelastref(struct socket
*);
7959 static void kev_delete(struct kern_event_pcb
*);
7961 static struct pr_usrreqs event_usrreqs
= {
7962 .pru_attach
= kev_attach
,
7963 .pru_control
= kev_control
,
7964 .pru_detach
= kev_detach
,
7965 .pru_soreceive
= soreceive
,
7968 static struct protosw eventsw
[] = {
7970 .pr_type
= SOCK_RAW
,
7971 .pr_protocol
= SYSPROTO_EVENT
,
7972 .pr_flags
= PR_ATOMIC
,
7973 .pr_usrreqs
= &event_usrreqs
,
7974 .pr_lock
= event_lock
,
7975 .pr_unlock
= event_unlock
,
7976 .pr_getlock
= event_getlock
,
7980 __private_extern__
int kevt_getstat SYSCTL_HANDLER_ARGS
;
7981 __private_extern__
int kevt_pcblist SYSCTL_HANDLER_ARGS
;
7983 SYSCTL_NODE(_net_systm
, OID_AUTO
, kevt
,
7984 CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "Kernel event family");
7986 struct kevtstat kevtstat
;
7987 SYSCTL_PROC(_net_systm_kevt
, OID_AUTO
, stats
,
7988 CTLTYPE_STRUCT
| CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 0,
7989 kevt_getstat
, "S,kevtstat", "");
7991 SYSCTL_PROC(_net_systm_kevt
, OID_AUTO
, pcblist
,
7992 CTLTYPE_STRUCT
| CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 0,
7993 kevt_pcblist
, "S,xkevtpcb", "");
7996 event_getlock(struct socket
*so
, int flags
)
7998 #pragma unused(flags)
7999 struct kern_event_pcb
*ev_pcb
= (struct kern_event_pcb
*)so
->so_pcb
;
8001 if (so
->so_pcb
!= NULL
) {
8002 if (so
->so_usecount
< 0) {
8003 panic("%s: so=%p usecount=%d lrh= %s\n", __func__
,
8004 so
, so
->so_usecount
, solockhistory_nr(so
));
8008 panic("%s: so=%p NULL NO so_pcb %s\n", __func__
,
8009 so
, solockhistory_nr(so
));
8012 return &ev_pcb
->evp_mtx
;
8016 event_lock(struct socket
*so
, int refcount
, void *lr
)
8021 lr_saved
= __builtin_return_address(0);
8026 if (so
->so_pcb
!= NULL
) {
8027 lck_mtx_lock(&((struct kern_event_pcb
*)so
->so_pcb
)->evp_mtx
);
8029 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__
,
8030 so
, lr_saved
, solockhistory_nr(so
));
8034 if (so
->so_usecount
< 0) {
8035 panic("%s: so=%p so_pcb=%p lr=%p ref=%d lrh= %s\n", __func__
,
8036 so
, so
->so_pcb
, lr_saved
, so
->so_usecount
,
8037 solockhistory_nr(so
));
8045 so
->lock_lr
[so
->next_lock_lr
] = lr_saved
;
8046 so
->next_lock_lr
= (so
->next_lock_lr
+ 1) % SO_LCKDBG_MAX
;
8051 event_unlock(struct socket
*so
, int refcount
, void *lr
)
8054 lck_mtx_t
*mutex_held
;
8057 lr_saved
= __builtin_return_address(0);
8065 if (so
->so_usecount
< 0) {
8066 panic("%s: so=%p usecount=%d lrh= %s\n", __func__
,
8067 so
, so
->so_usecount
, solockhistory_nr(so
));
8070 if (so
->so_pcb
== NULL
) {
8071 panic("%s: so=%p NO PCB usecount=%d lr=%p lrh= %s\n", __func__
,
8072 so
, so
->so_usecount
, (void *)lr_saved
,
8073 solockhistory_nr(so
));
8076 mutex_held
= (&((struct kern_event_pcb
*)so
->so_pcb
)->evp_mtx
);
8078 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
8079 so
->unlock_lr
[so
->next_unlock_lr
] = lr_saved
;
8080 so
->next_unlock_lr
= (so
->next_unlock_lr
+ 1) % SO_LCKDBG_MAX
;
8082 if (so
->so_usecount
== 0) {
8083 VERIFY(so
->so_flags
& SOF_PCBCLEARING
);
8084 event_sofreelastref(so
);
8086 lck_mtx_unlock(mutex_held
);
8093 event_sofreelastref(struct socket
*so
)
8095 struct kern_event_pcb
*ev_pcb
= (struct kern_event_pcb
*)so
->so_pcb
;
8097 LCK_MTX_ASSERT(&(ev_pcb
->evp_mtx
), LCK_MTX_ASSERT_OWNED
);
8102 * Disable upcall in the event another thread is in kev_post_msg()
8103 * appending record to the receive socket buffer, since sbwakeup()
8104 * may release the socket lock otherwise.
8106 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
8107 so
->so_snd
.sb_flags
&= ~SB_UPCALL
;
8108 so
->so_event
= sonullevent
;
8109 lck_mtx_unlock(&(ev_pcb
->evp_mtx
));
8111 LCK_MTX_ASSERT(&(ev_pcb
->evp_mtx
), LCK_MTX_ASSERT_NOTOWNED
);
8112 lck_rw_lock_exclusive(kev_rwlock
);
8113 LIST_REMOVE(ev_pcb
, evp_link
);
8114 kevtstat
.kes_pcbcount
--;
8115 kevtstat
.kes_gencnt
++;
8116 lck_rw_done(kev_rwlock
);
8119 sofreelastref(so
, 1);
8123 static int event_proto_count
= (sizeof(eventsw
) / sizeof(struct protosw
));
8126 struct kern_event_head kern_event_head
;
8128 static u_int32_t static_event_id
= 0;
8130 #define EVPCB_ZONE_MAX 65536
8131 #define EVPCB_ZONE_NAME "kerneventpcb"
8132 static struct zone
*ev_pcb_zone
;
8135 * Install the protosw's for the NKE manager. Invoked at extension load time
8138 kern_event_init(struct domain
*dp
)
8143 VERIFY(!(dp
->dom_flags
& DOM_INITIALIZED
));
8144 VERIFY(dp
== systemdomain
);
8146 kev_lck_grp_attr
= lck_grp_attr_alloc_init();
8147 if (kev_lck_grp_attr
== NULL
) {
8148 panic("%s: lck_grp_attr_alloc_init failed\n", __func__
);
8152 kev_lck_grp
= lck_grp_alloc_init("Kernel Event Protocol",
8154 if (kev_lck_grp
== NULL
) {
8155 panic("%s: lck_grp_alloc_init failed\n", __func__
);
8159 kev_lck_attr
= lck_attr_alloc_init();
8160 if (kev_lck_attr
== NULL
) {
8161 panic("%s: lck_attr_alloc_init failed\n", __func__
);
8165 lck_rw_init(kev_rwlock
, kev_lck_grp
, kev_lck_attr
);
8166 if (kev_rwlock
== NULL
) {
8167 panic("%s: lck_mtx_alloc_init failed\n", __func__
);
8171 for (i
= 0, pr
= &eventsw
[0]; i
< event_proto_count
; i
++, pr
++) {
8172 net_add_proto(pr
, dp
, 1);
8175 ev_pcb_zone
= zinit(sizeof(struct kern_event_pcb
),
8176 EVPCB_ZONE_MAX
* sizeof(struct kern_event_pcb
), 0, EVPCB_ZONE_NAME
);
8177 if (ev_pcb_zone
== NULL
) {
8178 panic("%s: failed allocating ev_pcb_zone", __func__
);
8181 zone_change(ev_pcb_zone
, Z_EXPAND
, TRUE
);
8182 zone_change(ev_pcb_zone
, Z_CALLERACCT
, TRUE
);
8186 kev_attach(struct socket
*so
, __unused
int proto
, __unused
struct proc
*p
)
8189 struct kern_event_pcb
*ev_pcb
;
8191 error
= soreserve(so
, KEV_SNDSPACE
, KEV_RECVSPACE
);
8196 if ((ev_pcb
= (struct kern_event_pcb
*)zalloc(ev_pcb_zone
)) == NULL
) {
8199 bzero(ev_pcb
, sizeof(struct kern_event_pcb
));
8200 lck_mtx_init(&ev_pcb
->evp_mtx
, kev_lck_grp
, kev_lck_attr
);
8202 ev_pcb
->evp_socket
= so
;
8203 ev_pcb
->evp_vendor_code_filter
= 0xffffffff;
8205 so
->so_pcb
= (caddr_t
) ev_pcb
;
8206 lck_rw_lock_exclusive(kev_rwlock
);
8207 LIST_INSERT_HEAD(&kern_event_head
, ev_pcb
, evp_link
);
8208 kevtstat
.kes_pcbcount
++;
8209 kevtstat
.kes_gencnt
++;
8210 lck_rw_done(kev_rwlock
);
8216 kev_delete(struct kern_event_pcb
*ev_pcb
)
8218 VERIFY(ev_pcb
!= NULL
);
8219 lck_mtx_destroy(&ev_pcb
->evp_mtx
, kev_lck_grp
);
8220 zfree(ev_pcb_zone
, ev_pcb
);
8224 kev_detach(struct socket
*so
)
8226 struct kern_event_pcb
*ev_pcb
= (struct kern_event_pcb
*) so
->so_pcb
;
8228 if (ev_pcb
!= NULL
) {
8229 soisdisconnected(so
);
8230 so
->so_flags
|= SOF_PCBCLEARING
;
8237 * For now, kev_vendor_code and mbuf_tags use the same
8241 kev_vendor_code_find(
8243 u_int32_t
*out_vendor_code
)
8245 if (strlen(string
) >= KEV_VENDOR_CODE_MAX_STR_LEN
) {
8248 return net_str_id_find_internal(string
, out_vendor_code
,
8249 NSI_VENDOR_CODE
, 1);
8253 kev_msg_post(struct kev_msg
*event_msg
)
8255 mbuf_tag_id_t min_vendor
, max_vendor
;
8257 net_str_id_first_last(&min_vendor
, &max_vendor
, NSI_VENDOR_CODE
);
8259 if (event_msg
== NULL
) {
8264 * Limit third parties to posting events for registered vendor codes
8267 if (event_msg
->vendor_code
< min_vendor
||
8268 event_msg
->vendor_code
> max_vendor
) {
8269 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_badvendor
);
8272 return kev_post_msg(event_msg
);
8276 kev_post_msg(struct kev_msg
*event_msg
)
8278 struct mbuf
*m
, *m2
;
8279 struct kern_event_pcb
*ev_pcb
;
8280 struct kern_event_msg
*ev
;
8282 u_int32_t total_size
;
8285 /* Verify the message is small enough to fit in one mbuf w/o cluster */
8286 total_size
= KEV_MSG_HEADER_SIZE
;
8288 for (i
= 0; i
< 5; i
++) {
8289 if (event_msg
->dv
[i
].data_length
== 0) {
8292 total_size
+= event_msg
->dv
[i
].data_length
;
8295 if (total_size
> MLEN
) {
8296 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_toobig
);
8300 m
= m_get(M_WAIT
, MT_DATA
);
8302 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_nomem
);
8305 ev
= mtod(m
, struct kern_event_msg
*);
8306 total_size
= KEV_MSG_HEADER_SIZE
;
8308 tmp
= (char *) &ev
->event_data
[0];
8309 for (i
= 0; i
< 5; i
++) {
8310 if (event_msg
->dv
[i
].data_length
== 0) {
8314 total_size
+= event_msg
->dv
[i
].data_length
;
8315 bcopy(event_msg
->dv
[i
].data_ptr
, tmp
,
8316 event_msg
->dv
[i
].data_length
);
8317 tmp
+= event_msg
->dv
[i
].data_length
;
8320 ev
->id
= ++static_event_id
;
8321 ev
->total_size
= total_size
;
8322 ev
->vendor_code
= event_msg
->vendor_code
;
8323 ev
->kev_class
= event_msg
->kev_class
;
8324 ev
->kev_subclass
= event_msg
->kev_subclass
;
8325 ev
->event_code
= event_msg
->event_code
;
8327 m
->m_len
= total_size
;
8328 lck_rw_lock_shared(kev_rwlock
);
8329 for (ev_pcb
= LIST_FIRST(&kern_event_head
);
8331 ev_pcb
= LIST_NEXT(ev_pcb
, evp_link
)) {
8332 lck_mtx_lock(&ev_pcb
->evp_mtx
);
8333 if (ev_pcb
->evp_socket
->so_pcb
== NULL
) {
8334 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
8337 if (ev_pcb
->evp_vendor_code_filter
!= KEV_ANY_VENDOR
) {
8338 if (ev_pcb
->evp_vendor_code_filter
!= ev
->vendor_code
) {
8339 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
8343 if (ev_pcb
->evp_class_filter
!= KEV_ANY_CLASS
) {
8344 if (ev_pcb
->evp_class_filter
!= ev
->kev_class
) {
8345 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
8349 if ((ev_pcb
->evp_subclass_filter
!=
8350 KEV_ANY_SUBCLASS
) &&
8351 (ev_pcb
->evp_subclass_filter
!=
8352 ev
->kev_subclass
)) {
8353 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
8359 m2
= m_copym(m
, 0, m
->m_len
, M_WAIT
);
8361 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_nomem
);
8363 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
8364 lck_rw_done(kev_rwlock
);
8367 if (sbappendrecord(&ev_pcb
->evp_socket
->so_rcv
, m2
)) {
8369 * We use "m" for the socket stats as it would be
8370 * unsafe to use "m2"
8372 so_inc_recv_data_stat(ev_pcb
->evp_socket
,
8373 1, m
->m_len
, MBUF_TC_BE
);
8375 sorwakeup(ev_pcb
->evp_socket
);
8376 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_posted
);
8378 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_fullsock
);
8380 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
8383 lck_rw_done(kev_rwlock
);
8389 kev_control(struct socket
*so
,
8392 __unused
struct ifnet
*ifp
,
8393 __unused
struct proc
*p
)
8395 struct kev_request
*kev_req
= (struct kev_request
*) data
;
8396 struct kern_event_pcb
*ev_pcb
;
8397 struct kev_vendor_code
*kev_vendor
;
8398 u_int32_t
*id_value
= (u_int32_t
*) data
;
8402 *id_value
= static_event_id
;
8405 ev_pcb
= (struct kern_event_pcb
*) so
->so_pcb
;
8406 ev_pcb
->evp_vendor_code_filter
= kev_req
->vendor_code
;
8407 ev_pcb
->evp_class_filter
= kev_req
->kev_class
;
8408 ev_pcb
->evp_subclass_filter
= kev_req
->kev_subclass
;
8411 ev_pcb
= (struct kern_event_pcb
*) so
->so_pcb
;
8412 kev_req
->vendor_code
= ev_pcb
->evp_vendor_code_filter
;
8413 kev_req
->kev_class
= ev_pcb
->evp_class_filter
;
8414 kev_req
->kev_subclass
= ev_pcb
->evp_subclass_filter
;
8416 case SIOCGKEVVENDOR
:
8417 kev_vendor
= (struct kev_vendor_code
*)data
;
8418 /* Make sure string is NULL terminated */
8419 kev_vendor
->vendor_string
[KEV_VENDOR_CODE_MAX_STR_LEN
- 1] = 0;
8420 return net_str_id_find_internal(kev_vendor
->vendor_string
,
8421 &kev_vendor
->vendor_code
, NSI_VENDOR_CODE
, 0);
8430 kevt_getstat SYSCTL_HANDLER_ARGS
8432 #pragma unused(oidp, arg1, arg2)
8435 lck_rw_lock_shared(kev_rwlock
);
8437 if (req
->newptr
!= USER_ADDR_NULL
) {
8441 if (req
->oldptr
== USER_ADDR_NULL
) {
8442 req
->oldidx
= sizeof(struct kevtstat
);
8446 error
= SYSCTL_OUT(req
, &kevtstat
,
8447 MIN(sizeof(struct kevtstat
), req
->oldlen
));
8449 lck_rw_done(kev_rwlock
);
8454 __private_extern__
int
8455 kevt_pcblist SYSCTL_HANDLER_ARGS
8457 #pragma unused(oidp, arg1, arg2)
8460 struct xsystmgen xsg
;
8462 size_t item_size
= ROUNDUP64(sizeof(struct xkevtpcb
)) +
8463 ROUNDUP64(sizeof(struct xsocket_n
)) +
8464 2 * ROUNDUP64(sizeof(struct xsockbuf_n
)) +
8465 ROUNDUP64(sizeof(struct xsockstat_n
));
8466 struct kern_event_pcb
*ev_pcb
;
8468 buf
= _MALLOC(item_size
, M_TEMP
, M_WAITOK
| M_ZERO
);
8473 lck_rw_lock_shared(kev_rwlock
);
8475 n
= kevtstat
.kes_pcbcount
;
8477 if (req
->oldptr
== USER_ADDR_NULL
) {
8478 req
->oldidx
= (n
+ n
/ 8) * item_size
;
8481 if (req
->newptr
!= USER_ADDR_NULL
) {
8485 bzero(&xsg
, sizeof(xsg
));
8486 xsg
.xg_len
= sizeof(xsg
);
8488 xsg
.xg_gen
= kevtstat
.kes_gencnt
;
8489 xsg
.xg_sogen
= so_gencnt
;
8490 error
= SYSCTL_OUT(req
, &xsg
, sizeof(xsg
));
8495 * We are done if there is no pcb
8502 for (i
= 0, ev_pcb
= LIST_FIRST(&kern_event_head
);
8503 i
< n
&& ev_pcb
!= NULL
;
8504 i
++, ev_pcb
= LIST_NEXT(ev_pcb
, evp_link
)) {
8505 struct xkevtpcb
*xk
= (struct xkevtpcb
*)buf
;
8506 struct xsocket_n
*xso
= (struct xsocket_n
*)
8507 ADVANCE64(xk
, sizeof(*xk
));
8508 struct xsockbuf_n
*xsbrcv
= (struct xsockbuf_n
*)
8509 ADVANCE64(xso
, sizeof(*xso
));
8510 struct xsockbuf_n
*xsbsnd
= (struct xsockbuf_n
*)
8511 ADVANCE64(xsbrcv
, sizeof(*xsbrcv
));
8512 struct xsockstat_n
*xsostats
= (struct xsockstat_n
*)
8513 ADVANCE64(xsbsnd
, sizeof(*xsbsnd
));
8515 bzero(buf
, item_size
);
8517 lck_mtx_lock(&ev_pcb
->evp_mtx
);
8519 xk
->kep_len
= sizeof(struct xkevtpcb
);
8520 xk
->kep_kind
= XSO_EVT
;
8521 xk
->kep_evtpcb
= (uint64_t)VM_KERNEL_ADDRPERM(ev_pcb
);
8522 xk
->kep_vendor_code_filter
= ev_pcb
->evp_vendor_code_filter
;
8523 xk
->kep_class_filter
= ev_pcb
->evp_class_filter
;
8524 xk
->kep_subclass_filter
= ev_pcb
->evp_subclass_filter
;
8526 sotoxsocket_n(ev_pcb
->evp_socket
, xso
);
8527 sbtoxsockbuf_n(ev_pcb
->evp_socket
?
8528 &ev_pcb
->evp_socket
->so_rcv
: NULL
, xsbrcv
);
8529 sbtoxsockbuf_n(ev_pcb
->evp_socket
?
8530 &ev_pcb
->evp_socket
->so_snd
: NULL
, xsbsnd
);
8531 sbtoxsockstat_n(ev_pcb
->evp_socket
, xsostats
);
8533 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
8535 error
= SYSCTL_OUT(req
, buf
, item_size
);
8540 * Give the user an updated idea of our state.
8541 * If the generation differs from what we told
8542 * her before, she knows that something happened
8543 * while we were processing this request, and it
8544 * might be necessary to retry.
8546 bzero(&xsg
, sizeof(xsg
));
8547 xsg
.xg_len
= sizeof(xsg
);
8549 xsg
.xg_gen
= kevtstat
.kes_gencnt
;
8550 xsg
.xg_sogen
= so_gencnt
;
8551 error
= SYSCTL_OUT(req
, &xsg
, sizeof(xsg
));
8558 lck_rw_done(kev_rwlock
);
8563 #endif /* SOCKETS */
8567 fill_kqueueinfo(struct kqueue
*kq
, struct kqueue_info
* kinfo
)
8569 struct vinfo_stat
* st
;
8571 st
= &kinfo
->kq_stat
;
8573 st
->vst_size
= kq
->kq_count
;
8574 if (kq
->kq_state
& KQ_KEV_QOS
) {
8575 st
->vst_blksize
= sizeof(struct kevent_qos_s
);
8576 } else if (kq
->kq_state
& KQ_KEV64
) {
8577 st
->vst_blksize
= sizeof(struct kevent64_s
);
8579 st
->vst_blksize
= sizeof(struct kevent
);
8581 st
->vst_mode
= S_IFIFO
;
8582 st
->vst_ino
= (kq
->kq_state
& KQ_DYNAMIC
) ?
8583 ((struct kqworkloop
*)kq
)->kqwl_dynamicid
: 0;
8585 /* flags exported to libproc as PROC_KQUEUE_* (sys/proc_info.h) */
8586 #define PROC_KQUEUE_MASK (KQ_SEL|KQ_SLEEP|KQ_KEV32|KQ_KEV64|KQ_KEV_QOS|KQ_WORKQ|KQ_WORKLOOP)
8587 kinfo
->kq_state
= kq
->kq_state
& PROC_KQUEUE_MASK
;
8593 fill_kqueue_dyninfo(struct kqueue
*kq
, struct kqueue_dyninfo
*kqdi
)
8595 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
8596 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
8597 workq_threadreq_param_t trp
= {};
8600 if ((kq
->kq_state
& KQ_WORKLOOP
) == 0) {
8604 if ((err
= fill_kqueueinfo(kq
, &kqdi
->kqdi_info
))) {
8610 kqdi
->kqdi_servicer
= thread_tid(kqr
->kqr_thread
);
8611 kqdi
->kqdi_owner
= thread_tid(kqwl
->kqwl_owner
);
8612 kqdi
->kqdi_request_state
= kqr
->kqr_state
;
8613 kqdi
->kqdi_async_qos
= kqr
->kqr_qos_index
;
8614 kqdi
->kqdi_events_qos
= kqr
->kqr_override_index
;
8615 kqdi
->kqdi_sync_waiters
= kqr
->kqr_dsync_waiters
;
8616 kqdi
->kqdi_sync_waiter_qos
= 0;
8618 trp
.trp_value
= kqwl
->kqwl_params
;
8619 if (trp
.trp_flags
& TRP_PRIORITY
) {
8620 kqdi
->kqdi_pri
= trp
.trp_pri
;
8625 if (trp
.trp_flags
& TRP_POLICY
) {
8626 kqdi
->kqdi_pol
= trp
.trp_pol
;
8631 if (trp
.trp_flags
& TRP_CPUPERCENT
) {
8632 kqdi
->kqdi_cpupercent
= trp
.trp_cpupercent
;
8634 kqdi
->kqdi_cpupercent
= 0;
8637 kq_req_unlock(kqwl
);
8644 knote_markstayactive(struct knote
*kn
)
8646 struct kqueue
*kq
= knote_get_kq(kn
);
8650 kn
->kn_status
|= KN_STAYACTIVE
;
8653 * Making a knote stay active is a property of the knote that must be
8654 * established before it is fully attached.
8656 assert(kn
->kn_status
& KN_ATTACHING
);
8657 assert((kn
->kn_status
& (KN_QUEUED
| KN_SUPPRESSED
)) == 0);
8659 /* handle all stayactive knotes on the (appropriate) manager */
8660 if (kq
->kq_state
& KQ_WORKQ
) {
8661 qos
= KQWQ_QOS_MANAGER
;
8662 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
8663 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
8665 qos
= _pthread_priority_thread_qos(kn
->kn_qos
);
8666 assert(qos
&& qos
< THREAD_QOS_LAST
);
8668 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_UPDATE_STAYACTIVE_QOS
, qos
);
8670 qos
= KQWL_BUCKET_STAYACTIVE
;
8672 qos
= THREAD_QOS_UNSPECIFIED
;
8675 kn
->kn_req_index
= qos
;
8676 kn
->kn_qos_override
= qos
;
8677 kn
->kn_qos_index
= qos
;
8684 knote_clearstayactive(struct knote
*kn
)
8686 kqlock(knote_get_kq(kn
));
8687 kn
->kn_status
&= ~KN_STAYACTIVE
;
8688 knote_deactivate(kn
);
8689 kqunlock(knote_get_kq(kn
));
8692 static unsigned long
8693 kevent_extinfo_emit(struct kqueue
*kq
, struct knote
*kn
, struct kevent_extinfo
*buf
,
8694 unsigned long buflen
, unsigned long nknotes
)
8696 for (; kn
; kn
= SLIST_NEXT(kn
, kn_link
)) {
8697 if (kq
== knote_get_kq(kn
)) {
8698 if (nknotes
< buflen
) {
8699 struct kevent_extinfo
*info
= &buf
[nknotes
];
8700 struct kevent_internal_s
*kevp
= &kn
->kn_kevent
;
8704 info
->kqext_kev
= (struct kevent_qos_s
){
8705 .ident
= kevp
->ident
,
8706 .filter
= kevp
->filter
,
8707 .flags
= kevp
->flags
,
8708 .fflags
= kevp
->fflags
,
8709 .data
= (int64_t)kevp
->data
,
8710 .udata
= kevp
->udata
,
8711 .ext
[0] = kevp
->ext
[0],
8712 .ext
[1] = kevp
->ext
[1],
8713 .ext
[2] = kevp
->ext
[2],
8714 .ext
[3] = kevp
->ext
[3],
8715 .qos
= kn
->kn_req_index
,
8717 info
->kqext_sdata
= kn
->kn_sdata
;
8718 info
->kqext_status
= kn
->kn_status
;
8719 info
->kqext_sfflags
= kn
->kn_sfflags
;
8724 /* we return total number of knotes, which may be more than requested */
8733 kevent_copyout_proc_dynkqids(void *proc
, user_addr_t ubuf
, uint32_t ubufsize
,
8734 int32_t *nkqueues_out
)
8736 proc_t p
= (proc_t
)proc
;
8737 struct filedesc
*fdp
= p
->p_fd
;
8738 unsigned int nkqueues
= 0;
8739 unsigned long ubuflen
= ubufsize
/ sizeof(kqueue_id_t
);
8740 size_t buflen
, bufsize
;
8741 kqueue_id_t
*kq_ids
= NULL
;
8746 if (ubuf
== USER_ADDR_NULL
&& ubufsize
!= 0) {
8751 buflen
= min(ubuflen
, PROC_PIDDYNKQUEUES_MAX
);
8754 if (os_mul_overflow(sizeof(kqueue_id_t
), buflen
, &bufsize
)) {
8758 kq_ids
= kalloc(bufsize
);
8763 bzero(kq_ids
, bufsize
);
8768 if (fdp
->fd_kqhashmask
> 0) {
8769 for (uint32_t i
= 0; i
< fdp
->fd_kqhashmask
+ 1; i
++) {
8770 struct kqworkloop
*kqwl
;
8772 SLIST_FOREACH(kqwl
, &fdp
->fd_kqhash
[i
], kqwl_hashlink
) {
8773 /* report the number of kqueues, even if they don't all fit */
8774 if (nkqueues
< buflen
) {
8775 kq_ids
[nkqueues
] = kqwl
->kqwl_dynamicid
;
8786 if (os_mul_overflow(sizeof(kqueue_id_t
), min(buflen
, nkqueues
), ©size
)) {
8791 assert(ubufsize
>= copysize
);
8792 err
= copyout(kq_ids
, ubuf
, copysize
);
8797 kfree(kq_ids
, bufsize
);
8801 *nkqueues_out
= (int)min(nkqueues
, PROC_PIDDYNKQUEUES_MAX
);
8807 kevent_copyout_dynkqinfo(void *proc
, kqueue_id_t kq_id
, user_addr_t ubuf
,
8808 uint32_t ubufsize
, int32_t *size_out
)
8810 proc_t p
= (proc_t
)proc
;
8813 struct kqueue_dyninfo kqdi
= { };
8817 if (ubufsize
< sizeof(struct kqueue_info
)) {
8822 kq
= kqueue_hash_lookup(p
, kq_id
);
8831 * backward compatibility: allow the argument to this call to only be
8832 * a struct kqueue_info
8834 if (ubufsize
>= sizeof(struct kqueue_dyninfo
)) {
8835 ubufsize
= sizeof(struct kqueue_dyninfo
);
8836 err
= fill_kqueue_dyninfo(kq
, &kqdi
);
8838 ubufsize
= sizeof(struct kqueue_info
);
8839 err
= fill_kqueueinfo(kq
, &kqdi
.kqdi_info
);
8841 if (err
== 0 && (err
= copyout(&kqdi
, ubuf
, ubufsize
)) == 0) {
8842 *size_out
= ubufsize
;
8844 kqueue_release_last(p
, kq
);
8849 kevent_copyout_dynkqextinfo(void *proc
, kqueue_id_t kq_id
, user_addr_t ubuf
,
8850 uint32_t ubufsize
, int32_t *nknotes_out
)
8852 proc_t p
= (proc_t
)proc
;
8859 kq
= kqueue_hash_lookup(p
, kq_id
);
8867 err
= pid_kqueue_extinfo(p
, kq
, ubuf
, ubufsize
, nknotes_out
);
8868 kqueue_release_last(p
, kq
);
8873 pid_kqueue_extinfo(proc_t p
, struct kqueue
*kq
, user_addr_t ubuf
,
8874 uint32_t bufsize
, int32_t *retval
)
8879 struct filedesc
*fdp
= p
->p_fd
;
8880 unsigned long nknotes
= 0;
8881 unsigned long buflen
= bufsize
/ sizeof(struct kevent_extinfo
);
8882 struct kevent_extinfo
*kqext
= NULL
;
8884 /* arbitrary upper limit to cap kernel memory usage, copyout size, etc. */
8885 buflen
= min(buflen
, PROC_PIDFDKQUEUE_KNOTES_MAX
);
8887 kqext
= kalloc(buflen
* sizeof(struct kevent_extinfo
));
8888 if (kqext
== NULL
) {
8892 bzero(kqext
, buflen
* sizeof(struct kevent_extinfo
));
8895 for (i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
8896 kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
]);
8897 nknotes
= kevent_extinfo_emit(kq
, kn
, kqext
, buflen
, nknotes
);
8901 if (fdp
->fd_knhashmask
!= 0) {
8902 for (i
= 0; i
< (int)fdp
->fd_knhashmask
+ 1; i
++) {
8904 kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
]);
8905 nknotes
= kevent_extinfo_emit(kq
, kn
, kqext
, buflen
, nknotes
);
8910 assert(bufsize
>= sizeof(struct kevent_extinfo
) * min(buflen
, nknotes
));
8911 err
= copyout(kqext
, ubuf
, sizeof(struct kevent_extinfo
) * min(buflen
, nknotes
));
8915 kfree(kqext
, buflen
* sizeof(struct kevent_extinfo
));
8920 *retval
= min(nknotes
, PROC_PIDFDKQUEUE_KNOTES_MAX
);
8926 klist_copy_udata(struct klist
*list
, uint64_t *buf
,
8927 unsigned int buflen
, unsigned int nknotes
)
8929 struct kevent_internal_s
*kev
;
8931 SLIST_FOREACH(kn
, list
, kn_link
) {
8932 if (nknotes
< buflen
) {
8933 struct kqueue
*kq
= knote_get_kq(kn
);
8935 kev
= &(kn
->kn_kevent
);
8936 buf
[nknotes
] = kev
->udata
;
8939 /* we return total number of knotes, which may be more than requested */
8947 kqlist_copy_dynamicids(__assert_only proc_t p
, struct kqlist
*list
,
8948 uint64_t *buf
, unsigned int buflen
, unsigned int nids
)
8950 kqhash_lock_held(p
);
8951 struct kqworkloop
*kqwl
;
8952 SLIST_FOREACH(kqwl
, list
, kqwl_hashlink
) {
8953 if (nids
< buflen
) {
8954 buf
[nids
] = kqwl
->kqwl_dynamicid
;
8962 kevent_proc_copy_uptrs(void *proc
, uint64_t *buf
, int bufsize
)
8964 proc_t p
= (proc_t
)proc
;
8965 struct filedesc
*fdp
= p
->p_fd
;
8966 unsigned int nuptrs
= 0;
8967 unsigned long buflen
= bufsize
/ sizeof(uint64_t);
8970 assert(buf
!= NULL
);
8974 for (int i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
8975 nuptrs
= klist_copy_udata(&fdp
->fd_knlist
[i
], buf
, buflen
, nuptrs
);
8979 if (fdp
->fd_knhashmask
!= 0) {
8980 for (int i
= 0; i
< (int)fdp
->fd_knhashmask
+ 1; i
++) {
8981 nuptrs
= klist_copy_udata(&fdp
->fd_knhash
[i
], buf
, buflen
, nuptrs
);
8987 if (fdp
->fd_kqhashmask
!= 0) {
8988 for (int i
= 0; i
< (int)fdp
->fd_kqhashmask
+ 1; i
++) {
8989 nuptrs
= kqlist_copy_dynamicids(p
, &fdp
->fd_kqhash
[i
], buf
, buflen
,
8999 kevent_set_return_to_kernel_user_tsd(proc_t p
, thread_t thread
)
9002 bool proc_is_64bit
= !!(p
->p_flag
& P_LP64
);
9003 size_t user_addr_size
= proc_is_64bit
? 8 : 4;
9004 uint32_t ast_flags32
= 0;
9005 uint64_t ast_flags64
= 0;
9006 struct uthread
*ut
= get_bsdthread_info(thread
);
9008 if (ut
->uu_kqr_bound
!= NULL
) {
9009 ast_flags64
|= R2K_WORKLOOP_PENDING_EVENTS
;
9012 if (ast_flags64
== 0) {
9016 if (!(p
->p_flag
& P_LP64
)) {
9017 ast_flags32
= (uint32_t)ast_flags64
;
9018 assert(ast_flags64
< 0x100000000ull
);
9021 ast_addr
= thread_rettokern_addr(thread
);
9022 if (ast_addr
== 0) {
9026 if (copyout((proc_is_64bit
? (void *)&ast_flags64
: (void *)&ast_flags32
),
9027 (user_addr_t
)ast_addr
,
9028 user_addr_size
) != 0) {
9029 printf("pid %d (tid:%llu): copyout of return_to_kernel ast flags failed with "
9030 "ast_addr = %llu\n", p
->p_pid
, thread_tid(current_thread()), ast_addr
);
9035 kevent_ast(thread_t thread
, uint16_t bits
)
9037 proc_t p
= current_proc();
9039 if (bits
& AST_KEVENT_REDRIVE_THREADREQ
) {
9040 workq_kern_threadreq_redrive(p
, WORKQ_THREADREQ_CAN_CREATE_THREADS
);
9042 if (bits
& AST_KEVENT_RETURN_TO_KERNEL
) {
9043 kevent_set_return_to_kernel_user_tsd(p
, thread
);
9047 #if DEVELOPMENT || DEBUG
9049 #define KEVENT_SYSCTL_BOUND_ID 1
9052 kevent_sysctl SYSCTL_HANDLER_ARGS
9054 #pragma unused(oidp, arg2)
9055 uintptr_t type
= (uintptr_t)arg1
;
9056 uint64_t bound_id
= 0;
9058 if (type
!= KEVENT_SYSCTL_BOUND_ID
) {
9066 struct uthread
*ut
= get_bsdthread_info(current_thread());
9071 struct kqrequest
*kqr
= ut
->uu_kqr_bound
;
9073 if (kqr
->kqr_state
& KQR_WORKLOOP
) {
9074 bound_id
= kqr_kqworkloop(kqr
)->kqwl_dynamicid
;
9080 return sysctl_io_number(req
, bound_id
, sizeof(bound_id
), NULL
, NULL
);
9083 SYSCTL_NODE(_kern
, OID_AUTO
, kevent
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0,
9084 "kevent information");
9086 SYSCTL_PROC(_kern_kevent
, OID_AUTO
, bound_id
,
9087 CTLTYPE_QUAD
| CTLFLAG_RD
| CTLFLAG_LOCKED
| CTLFLAG_MASKED
,
9088 (void *)KEVENT_SYSCTL_BOUND_ID
,
9089 sizeof(kqueue_id_t
), kevent_sysctl
, "Q",
9090 "get the ID of the bound kqueue");
9092 #endif /* DEVELOPMENT || DEBUG */