2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
31 * All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * @(#)kern_event.c 1.0 (3/31/2000)
58 #include <stdatomic.h>
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/filedesc.h>
63 #include <sys/kernel.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/malloc.h>
67 #include <sys/unistd.h>
68 #include <sys/file_internal.h>
69 #include <sys/fcntl.h>
70 #include <sys/select.h>
71 #include <sys/queue.h>
72 #include <sys/event.h>
73 #include <sys/eventvar.h>
74 #include <sys/protosw.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
78 #include <sys/sysctl.h>
80 #include <sys/sysproto.h>
82 #include <sys/vnode_internal.h>
84 #include <sys/proc_info.h>
85 #include <sys/codesign.h>
86 #include <sys/pthread_shims.h>
87 #include <sys/kdebug.h>
88 #include <sys/reason.h>
89 #include <os/reason_private.h>
91 #include <kern/locks.h>
92 #include <kern/clock.h>
93 #include <kern/cpu_data.h>
94 #include <kern/policy_internal.h>
95 #include <kern/thread_call.h>
96 #include <kern/sched_prim.h>
97 #include <kern/waitq.h>
98 #include <kern/zalloc.h>
99 #include <kern/kalloc.h>
100 #include <kern/assert.h>
101 #include <kern/ast.h>
102 #include <kern/thread.h>
103 #include <kern/kcdata.h>
105 #include <libkern/libkern.h>
106 #include <libkern/OSAtomic.h>
108 #include "net/net_str_id.h"
110 #include <mach/task.h>
111 #include <libkern/section_keywords.h>
113 #if CONFIG_MEMORYSTATUS
114 #include <sys/kern_memorystatus.h>
117 extern thread_t
port_name_to_thread(mach_port_name_t port_name
); /* osfmk/kern/ipc_tt.h */
118 extern mach_port_name_t
ipc_entry_name_mask(mach_port_name_t name
); /* osfmk/ipc/ipc_entry.h */
120 #define KEV_EVTID(code) BSDDBG_CODE(DBG_BSD_KEVENT, (code))
123 * JMM - this typedef needs to be unified with pthread_priority_t
124 * and mach_msg_priority_t. It also needs to be the same type
127 typedef int32_t qos_t
;
129 MALLOC_DEFINE(M_KQUEUE
, "kqueue", "memory for kqueue system");
131 #define KQ_EVENT NO_EVENT64
133 #define KNUSE_NONE 0x0
134 #define KNUSE_STEAL_DROP 0x1
135 #define KNUSE_BOOST 0x2
136 static int kqlock2knoteuse(struct kqueue
*kq
, struct knote
*kn
, int flags
);
137 static int kqlock2knotedrop(struct kqueue
*kq
, struct knote
*kn
);
138 static int kqlock2knotedetach(struct kqueue
*kq
, struct knote
*kn
, int flags
);
139 static int knoteuse2kqlock(struct kqueue
*kq
, struct knote
*kn
, int flags
);
141 static int kqueue_read(struct fileproc
*fp
, struct uio
*uio
,
142 int flags
, vfs_context_t ctx
);
143 static int kqueue_write(struct fileproc
*fp
, struct uio
*uio
,
144 int flags
, vfs_context_t ctx
);
145 static int kqueue_ioctl(struct fileproc
*fp
, u_long com
, caddr_t data
,
147 static int kqueue_select(struct fileproc
*fp
, int which
, void *wq_link_id
,
149 static int kqueue_close(struct fileglob
*fg
, vfs_context_t ctx
);
150 static int kqueue_kqfilter(struct fileproc
*fp
, struct knote
*kn
,
151 struct kevent_internal_s
*kev
, vfs_context_t ctx
);
152 static int kqueue_drain(struct fileproc
*fp
, vfs_context_t ctx
);
154 static const struct fileops kqueueops
= {
155 .fo_type
= DTYPE_KQUEUE
,
156 .fo_read
= kqueue_read
,
157 .fo_write
= kqueue_write
,
158 .fo_ioctl
= kqueue_ioctl
,
159 .fo_select
= kqueue_select
,
160 .fo_close
= kqueue_close
,
161 .fo_kqfilter
= kqueue_kqfilter
,
162 .fo_drain
= kqueue_drain
,
165 static void kevent_put_kq(struct proc
*p
, kqueue_id_t id
, struct fileproc
*fp
, struct kqueue
*kq
);
166 static int kevent_internal(struct proc
*p
,
167 kqueue_id_t id
, kqueue_id_t
*id_out
,
168 user_addr_t changelist
, int nchanges
,
169 user_addr_t eventlist
, int nevents
,
170 user_addr_t data_out
, uint64_t data_available
,
171 unsigned int flags
, user_addr_t utimeout
,
172 kqueue_continue_t continuation
,
174 static int kevent_copyin(user_addr_t
*addrp
, struct kevent_internal_s
*kevp
,
175 struct proc
*p
, unsigned int flags
);
176 static int kevent_copyout(struct kevent_internal_s
*kevp
, user_addr_t
*addrp
,
177 struct proc
*p
, unsigned int flags
);
178 char * kevent_description(struct kevent_internal_s
*kevp
, char *s
, size_t n
);
180 static void kqueue_interrupt(struct kqueue
*kq
);
181 static int kevent_callback(struct kqueue
*kq
, struct kevent_internal_s
*kevp
,
183 static void kevent_continue(struct kqueue
*kq
, void *data
, int error
);
184 static void kqueue_scan_continue(void *contp
, wait_result_t wait_result
);
185 static int kqueue_process(struct kqueue
*kq
, kevent_callback_t callback
, void *callback_data
,
186 struct filt_process_s
*process_data
, int *countp
, struct proc
*p
);
187 static struct kqtailq
*kqueue_get_base_queue(struct kqueue
*kq
, kq_index_t qos_index
);
188 static struct kqtailq
*kqueue_get_high_queue(struct kqueue
*kq
, kq_index_t qos_index
);
189 static int kqueue_queue_empty(struct kqueue
*kq
, kq_index_t qos_index
);
191 static struct kqtailq
*kqueue_get_suppressed_queue(struct kqueue
*kq
, kq_index_t qos_index
);
193 static void kqworkq_request_thread(struct kqworkq
*kqwq
, kq_index_t qos_index
);
194 static void kqworkq_request_help(struct kqworkq
*kqwq
, kq_index_t qos_index
);
195 static void kqworkq_update_override(struct kqworkq
*kqwq
, kq_index_t qos_index
, kq_index_t override_index
);
196 static void kqworkq_bind_thread_impl(struct kqworkq
*kqwq
, kq_index_t qos_index
, thread_t thread
, unsigned int flags
);
197 static void kqworkq_unbind_thread(struct kqworkq
*kqwq
, kq_index_t qos_index
, thread_t thread
, unsigned int flags
);
198 static struct kqrequest
*kqworkq_get_request(struct kqworkq
*kqwq
, kq_index_t qos_index
);
202 KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
= 0x1,
203 KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
= 0x2,
204 KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
= 0x4,
205 KQWL_UO_UPDATE_OVERRIDE_LAZY
= 0x8
208 static void kqworkloop_update_override(struct kqworkloop
*kqwl
, kq_index_t qos_index
, kq_index_t override_index
, uint32_t flags
);
209 static void kqworkloop_bind_thread_impl(struct kqworkloop
*kqwl
, thread_t thread
, unsigned int flags
);
210 static void kqworkloop_unbind_thread(struct kqworkloop
*kqwl
, thread_t thread
, unsigned int flags
);
211 static inline kq_index_t
kqworkloop_combined_qos(struct kqworkloop
*kqwl
, boolean_t
*);
212 static void kqworkloop_update_suppress_sync_count(struct kqrequest
*kqr
, uint32_t flags
);
216 * The wakeup qos is the qos of QUEUED knotes.
218 * This QoS is accounted for with the events override in the
219 * kqr_override_index field. It is raised each time a new knote is queued at
220 * a given QoS. The kqr_wakeup_indexes field is a superset of the non empty
221 * knote buckets and is recomputed after each event delivery.
223 KQWL_UTQ_UPDATE_WAKEUP_QOS
,
224 KQWL_UTQ_UPDATE_STAYACTIVE_QOS
,
225 KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
,
227 * The wakeup override is for suppressed knotes that have fired again at
228 * a higher QoS than the one for which they are suppressed already.
229 * This override is cleared when the knote suppressed list becomes empty.
231 KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
,
232 KQWL_UTQ_RESET_WAKEUP_OVERRIDE
,
234 * The async QoS is the maximum QoS of an event enqueued on this workloop in
235 * userland. It is copied from the only EVFILT_WORKLOOP knote with
236 * a NOTE_WL_THREAD_REQUEST bit set allowed on this workloop. If there is no
237 * such knote, this QoS is 0.
239 KQWL_UTQ_SET_ASYNC_QOS
,
241 * The sync waiters QoS is the maximum QoS of any thread blocked on an
242 * EVFILT_WORKLOOP knote marked with the NOTE_WL_SYNC_WAIT bit.
243 * If there is no such knote, this QoS is 0.
245 KQWL_UTQ_SET_SYNC_WAITERS_QOS
,
246 KQWL_UTQ_REDRIVE_EVENTS
,
248 static void kqworkloop_update_threads_qos(struct kqworkloop
*kqwl
, int op
, kq_index_t qos
);
249 static void kqworkloop_request_help(struct kqworkloop
*kqwl
, kq_index_t qos_index
);
251 static int knote_process(struct knote
*kn
, kevent_callback_t callback
, void *callback_data
,
252 struct filt_process_s
*process_data
, struct proc
*p
);
254 static void knote_put(struct knote
*kn
);
257 static int kq_add_knote(struct kqueue
*kq
, struct knote
*kn
,
258 struct kevent_internal_s
*kev
, struct proc
*p
, int *knoteuse_flags
);
259 static struct knote
*kq_find_knote_and_kq_lock(struct kqueue
*kq
, struct kevent_internal_s
*kev
, bool is_fd
, struct proc
*p
);
260 static void kq_remove_knote(struct kqueue
*kq
, struct knote
*kn
, struct proc
*p
, kn_status_t
*kn_status
, uint16_t *kq_state
);
262 static void knote_drop(struct knote
*kn
, struct proc
*p
);
263 static struct knote
*knote_alloc(void);
264 static void knote_free(struct knote
*kn
);
266 static void knote_activate(struct knote
*kn
);
267 static void knote_deactivate(struct knote
*kn
);
269 static void knote_enable(struct knote
*kn
);
270 static void knote_disable(struct knote
*kn
);
272 static int knote_enqueue(struct knote
*kn
);
273 static void knote_dequeue(struct knote
*kn
);
275 static void knote_suppress(struct knote
*kn
);
276 static void knote_unsuppress(struct knote
*kn
);
277 static void knote_wakeup(struct knote
*kn
);
279 static kq_index_t
knote_get_queue_index(struct knote
*kn
);
280 static struct kqtailq
*knote_get_queue(struct knote
*kn
);
281 static kq_index_t
knote_get_req_index(struct knote
*kn
);
282 static kq_index_t
knote_get_qos_index(struct knote
*kn
);
283 static void knote_set_qos_index(struct knote
*kn
, kq_index_t qos_index
);
284 static kq_index_t
knote_get_qos_override_index(struct knote
*kn
);
285 static kq_index_t
knote_get_sync_qos_override_index(struct knote
*kn
);
286 static void knote_set_qos_override_index(struct knote
*kn
, kq_index_t qos_index
, boolean_t override_is_sync
);
287 static void knote_set_qos_overcommit(struct knote
*kn
);
289 static int filt_fileattach(struct knote
*kn
, struct kevent_internal_s
*kev
);
290 SECURITY_READ_ONLY_EARLY(static struct filterops
) file_filtops
= {
292 .f_attach
= filt_fileattach
,
295 static void filt_kqdetach(struct knote
*kn
);
296 static int filt_kqueue(struct knote
*kn
, long hint
);
297 static int filt_kqtouch(struct knote
*kn
, struct kevent_internal_s
*kev
);
298 static int filt_kqprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
);
299 SECURITY_READ_ONLY_EARLY(static struct filterops
) kqread_filtops
= {
301 .f_detach
= filt_kqdetach
,
302 .f_event
= filt_kqueue
,
303 .f_touch
= filt_kqtouch
,
304 .f_process
= filt_kqprocess
,
307 /* placeholder for not-yet-implemented filters */
308 static int filt_badattach(struct knote
*kn
, struct kevent_internal_s
*kev
);
309 SECURITY_READ_ONLY_EARLY(static struct filterops
) bad_filtops
= {
310 .f_attach
= filt_badattach
,
313 static int filt_procattach(struct knote
*kn
, struct kevent_internal_s
*kev
);
314 static void filt_procdetach(struct knote
*kn
);
315 static int filt_proc(struct knote
*kn
, long hint
);
316 static int filt_proctouch(struct knote
*kn
, struct kevent_internal_s
*kev
);
317 static int filt_procprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
);
318 SECURITY_READ_ONLY_EARLY(static struct filterops
) proc_filtops
= {
319 .f_attach
= filt_procattach
,
320 .f_detach
= filt_procdetach
,
321 .f_event
= filt_proc
,
322 .f_touch
= filt_proctouch
,
323 .f_process
= filt_procprocess
,
326 #if CONFIG_MEMORYSTATUS
327 extern const struct filterops memorystatus_filtops
;
328 #endif /* CONFIG_MEMORYSTATUS */
330 extern const struct filterops fs_filtops
;
332 extern const struct filterops sig_filtops
;
334 static zone_t knote_zone
;
335 static zone_t kqfile_zone
;
336 static zone_t kqworkq_zone
;
337 static zone_t kqworkloop_zone
;
339 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
341 /* Mach portset filter */
342 extern const struct filterops machport_filtops
;
345 static int filt_userattach(struct knote
*kn
, struct kevent_internal_s
*kev
);
346 static void filt_userdetach(struct knote
*kn
);
347 static int filt_user(struct knote
*kn
, long hint
);
348 static int filt_usertouch(struct knote
*kn
, struct kevent_internal_s
*kev
);
349 static int filt_userprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
);
350 SECURITY_READ_ONLY_EARLY(static struct filterops
) user_filtops
= {
351 .f_attach
= filt_userattach
,
352 .f_detach
= filt_userdetach
,
353 .f_event
= filt_user
,
354 .f_touch
= filt_usertouch
,
355 .f_process
= filt_userprocess
,
358 static lck_spin_t _filt_userlock
;
359 static void filt_userlock(void);
360 static void filt_userunlock(void);
362 /* Workloop filter */
363 static bool filt_wlneeds_boost(struct kevent_internal_s
*kev
);
364 static int filt_wlattach(struct knote
*kn
, struct kevent_internal_s
*kev
);
365 static int filt_wlpost_attach(struct knote
*kn
, struct kevent_internal_s
*kev
);
366 static void filt_wldetach(struct knote
*kn
);
367 static int filt_wlevent(struct knote
*kn
, long hint
);
368 static int filt_wltouch(struct knote
*kn
, struct kevent_internal_s
*kev
);
369 static int filt_wldrop_and_unlock(struct knote
*kn
, struct kevent_internal_s
*kev
);
370 static int filt_wlprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
);
371 SECURITY_READ_ONLY_EARLY(static struct filterops
) workloop_filtops
= {
372 .f_needs_boost
= filt_wlneeds_boost
,
373 .f_attach
= filt_wlattach
,
374 .f_post_attach
= filt_wlpost_attach
,
375 .f_detach
= filt_wldetach
,
376 .f_event
= filt_wlevent
,
377 .f_touch
= filt_wltouch
,
378 .f_drop_and_unlock
= filt_wldrop_and_unlock
,
379 .f_process
= filt_wlprocess
,
382 extern const struct filterops pipe_rfiltops
;
383 extern const struct filterops pipe_wfiltops
;
384 extern const struct filterops ptsd_kqops
;
385 extern const struct filterops soread_filtops
;
386 extern const struct filterops sowrite_filtops
;
387 extern const struct filterops sock_filtops
;
388 extern const struct filterops soexcept_filtops
;
389 extern const struct filterops spec_filtops
;
390 extern const struct filterops bpfread_filtops
;
391 extern const struct filterops necp_fd_rfiltops
;
392 extern const struct filterops fsevent_filtops
;
393 extern const struct filterops vnode_filtops
;
394 extern const struct filterops tty_filtops
;
396 const static struct filterops timer_filtops
;
400 * Rules for adding new filters to the system:
402 * - Add a new "EVFILT_" option value to bsd/sys/event.h (typically a negative value)
403 * in the exported section of the header
404 * - Update the EVFILT_SYSCOUNT value to reflect the new addition
405 * - Add a filterops to the sysfilt_ops array. Public filters should be added at the end
406 * of the Public Filters section in the array.
408 * - Add a new "EVFILT_" value to bsd/sys/event.h (typically a positive value)
409 * in the XNU_KERNEL_PRIVATE section of the header
410 * - Update the EVFILTID_MAX value to reflect the new addition
411 * - Add a filterops to the sysfilt_ops. Private filters should be added at the end of
412 * the Private filters section of the array.
414 SECURITY_READ_ONLY_EARLY(static struct filterops
*) sysfilt_ops
[EVFILTID_MAX
] = {
416 [~EVFILT_READ
] = &file_filtops
,
417 [~EVFILT_WRITE
] = &file_filtops
,
418 [~EVFILT_AIO
] = &bad_filtops
,
419 [~EVFILT_VNODE
] = &file_filtops
,
420 [~EVFILT_PROC
] = &proc_filtops
,
421 [~EVFILT_SIGNAL
] = &sig_filtops
,
422 [~EVFILT_TIMER
] = &timer_filtops
,
423 [~EVFILT_MACHPORT
] = &machport_filtops
,
424 [~EVFILT_FS
] = &fs_filtops
,
425 [~EVFILT_USER
] = &user_filtops
,
428 [~EVFILT_SOCK
] = &file_filtops
,
429 #if CONFIG_MEMORYSTATUS
430 [~EVFILT_MEMORYSTATUS
] = &memorystatus_filtops
,
432 [~EVFILT_MEMORYSTATUS
] = &bad_filtops
,
434 [~EVFILT_EXCEPT
] = &file_filtops
,
436 [~EVFILT_WORKLOOP
] = &workloop_filtops
,
438 /* Private filters */
439 [EVFILTID_KQREAD
] = &kqread_filtops
,
440 [EVFILTID_PIPE_R
] = &pipe_rfiltops
,
441 [EVFILTID_PIPE_W
] = &pipe_wfiltops
,
442 [EVFILTID_PTSD
] = &ptsd_kqops
,
443 [EVFILTID_SOREAD
] = &soread_filtops
,
444 [EVFILTID_SOWRITE
] = &sowrite_filtops
,
445 [EVFILTID_SCK
] = &sock_filtops
,
446 [EVFILTID_SOEXCEPT
] = &soexcept_filtops
,
447 [EVFILTID_SPEC
] = &spec_filtops
,
448 [EVFILTID_BPFREAD
] = &bpfread_filtops
,
449 [EVFILTID_NECP_FD
] = &necp_fd_rfiltops
,
450 [EVFILTID_FSEVENT
] = &fsevent_filtops
,
451 [EVFILTID_VN
] = &vnode_filtops
,
452 [EVFILTID_TTY
] = &tty_filtops
455 /* waitq prepost callback */
456 void waitq_set__CALLING_PREPOST_HOOK__(void *kq_hook
, void *knote_hook
, int qos
);
458 #ifndef _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
459 #define _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG 0x02000000 /* pthread event manager bit */
461 #ifndef _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
462 #define _PTHREAD_PRIORITY_OVERCOMMIT_FLAG 0x80000000 /* request overcommit threads */
464 #ifndef _PTHREAD_PRIORITY_QOS_CLASS_MASK
465 #define _PTHREAD_PRIORITY_QOS_CLASS_MASK 0x003fff00 /* QoS class mask */
467 #ifndef _PTHREAD_PRIORITY_QOS_CLASS_SHIFT_32
468 #define _PTHREAD_PRIORITY_QOS_CLASS_SHIFT_32 8
471 static inline __kdebug_only
473 kqr_thread_id(struct kqrequest
*kqr
)
475 return (uintptr_t)thread_tid(kqr
->kqr_thread
);
479 boolean_t
is_workqueue_thread(thread_t thread
)
481 return (thread_get_tag(thread
) & THREAD_TAG_WORKQUEUE
);
485 void knote_canonicalize_kevent_qos(struct knote
*kn
)
487 struct kqueue
*kq
= knote_get_kq(kn
);
488 unsigned long canonical
;
490 if ((kq
->kq_state
& (KQ_WORKQ
| KQ_WORKLOOP
)) == 0)
493 /* preserve manager and overcommit flags in this case */
494 canonical
= pthread_priority_canonicalize(kn
->kn_qos
, FALSE
);
495 kn
->kn_qos
= (qos_t
)canonical
;
499 kq_index_t
qos_index_from_qos(struct knote
*kn
, qos_t qos
, boolean_t propagation
)
501 struct kqueue
*kq
= knote_get_kq(kn
);
502 kq_index_t qos_index
;
503 unsigned long flags
= 0;
505 if ((kq
->kq_state
& (KQ_WORKQ
| KQ_WORKLOOP
)) == 0)
506 return QOS_INDEX_KQFILE
;
508 qos_index
= (kq_index_t
)thread_qos_from_pthread_priority(
509 (unsigned long)qos
, &flags
);
511 if (kq
->kq_state
& KQ_WORKQ
) {
512 /* workq kqueues support requesting a manager thread (non-propagation) */
513 if (!propagation
&& (flags
& _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
))
514 return KQWQ_QOS_MANAGER
;
521 qos_t
qos_from_qos_index(kq_index_t qos_index
)
523 /* should only happen for KQ_WORKQ */
524 if (qos_index
== KQWQ_QOS_MANAGER
)
525 return _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
;
528 return THREAD_QOS_UNSPECIFIED
;
530 /* Should have support from pthread kext support */
531 return (1 << (qos_index
- 1 +
532 _PTHREAD_PRIORITY_QOS_CLASS_SHIFT_32
));
535 /* kqr lock must be held */
537 unsigned long pthread_priority_for_kqrequest(
538 struct kqrequest
*kqr
,
539 kq_index_t qos_index
)
541 unsigned long priority
= qos_from_qos_index(qos_index
);
542 if (kqr
->kqr_state
& KQR_THOVERCOMMIT
) {
543 priority
|= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
;
549 kq_index_t
qos_index_for_servicer(int qos_class
, thread_t thread
, int flags
)
551 #pragma unused(thread)
552 kq_index_t qos_index
;
554 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
)
555 return KQWQ_QOS_MANAGER
;
557 qos_index
= (kq_index_t
)qos_class
;
558 assert(qos_index
> 0 && qos_index
< KQWQ_QOS_MANAGER
);
564 * kqueue/note lock implementations
566 * The kqueue lock guards the kq state, the state of its queues,
567 * and the kqueue-aware status and use counts of individual knotes.
569 * The kqueue workq lock is used to protect state guarding the
570 * interaction of the kqueue with the workq. This state cannot
571 * be guarded by the kq lock - as it needs to be taken when we
572 * already have the waitq set lock held (during the waitq hook
573 * callback). It might be better to use the waitq lock itself
574 * for this, but the IRQ requirements make that difficult).
576 * Knote flags, filter flags, and associated data are protected
577 * by the underlying object lock - and are only ever looked at
578 * by calling the filter to get a [consistent] snapshot of that
581 lck_grp_attr_t
* kq_lck_grp_attr
;
582 lck_grp_t
* kq_lck_grp
;
583 lck_attr_t
* kq_lck_attr
;
586 kqlock(struct kqueue
*kq
)
588 lck_spin_lock(&kq
->kq_lock
);
592 kqlock_held(__assert_only
struct kqueue
*kq
)
594 LCK_SPIN_ASSERT(&kq
->kq_lock
, LCK_ASSERT_OWNED
);
598 kqunlock(struct kqueue
*kq
)
600 lck_spin_unlock(&kq
->kq_lock
);
604 knhash_lock(proc_t p
)
606 lck_mtx_lock(&p
->p_fd
->fd_knhashlock
);
610 knhash_unlock(proc_t p
)
612 lck_mtx_unlock(&p
->p_fd
->fd_knhashlock
);
617 * Convert a kq lock to a knote use referece.
619 * If the knote is being dropped, or has
620 * vanished, we can't get a use reference.
621 * Just return with it still locked.
623 * - kq locked at entry
624 * - unlock on exit if we get the use reference
627 kqlock2knoteuse(struct kqueue
*kq
, struct knote
*kn
, int flags
)
629 if (kn
->kn_status
& (KN_DROPPING
| KN_VANISHED
))
632 assert(kn
->kn_status
& KN_ATTACHED
);
634 if (flags
& KNUSE_BOOST
) {
635 set_thread_rwlock_boost();
642 * - kq locked at entry
643 * - kq unlocked at exit
647 knoteusewait(struct kqueue
*kq
, struct knote
*kn
)
649 kn
->kn_status
|= KN_USEWAIT
;
650 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
651 CAST_EVENT64_T(&kn
->kn_status
),
652 THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
);
654 return thread_block(THREAD_CONTINUE_NULL
);
658 knoteuse_needs_boost(struct knote
*kn
, struct kevent_internal_s
*kev
)
660 if (knote_fops(kn
)->f_needs_boost
) {
661 return knote_fops(kn
)->f_needs_boost(kev
);
667 * Convert from a knote use reference back to kq lock.
669 * Drop a use reference and wake any waiters if
670 * this is the last one.
672 * If someone is trying to drop the knote, but the
673 * caller has events they must deliver, take
674 * responsibility for the drop later - and wake the
675 * other attempted dropper in a manner that informs
676 * him of the transfer of responsibility.
678 * The exit return indicates if the knote is still alive
679 * (or if not, the other dropper has been given the green
682 * The kqueue lock is re-taken unconditionally.
685 knoteuse2kqlock(struct kqueue
*kq
, struct knote
*kn
, int flags
)
688 int steal_drop
= (flags
& KNUSE_STEAL_DROP
);
691 if (flags
& KNUSE_BOOST
) {
692 clear_thread_rwlock_boost();
695 if (--kn
->kn_inuse
== 0) {
697 if ((kn
->kn_status
& KN_ATTACHING
) != 0) {
698 kn
->kn_status
&= ~KN_ATTACHING
;
701 if ((kn
->kn_status
& KN_USEWAIT
) != 0) {
702 wait_result_t result
;
704 /* If we need to, try and steal the drop */
705 if (kn
->kn_status
& KN_DROPPING
) {
706 if (steal_drop
&& !(kn
->kn_status
& KN_STOLENDROP
)) {
707 kn
->kn_status
|= KN_STOLENDROP
;
713 /* wakeup indicating if ANY USE stole the drop */
714 result
= (kn
->kn_status
& KN_STOLENDROP
) ?
715 THREAD_RESTART
: THREAD_AWAKENED
;
717 kn
->kn_status
&= ~KN_USEWAIT
;
718 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
719 CAST_EVENT64_T(&kn
->kn_status
),
721 WAITQ_ALL_PRIORITIES
);
723 /* should have seen use-wait if dropping with use refs */
724 assert((kn
->kn_status
& (KN_DROPPING
|KN_STOLENDROP
)) == 0);
727 } else if (kn
->kn_status
& KN_DROPPING
) {
728 /* not the last ref but want to steal a drop if present */
729 if (steal_drop
&& ((kn
->kn_status
& KN_STOLENDROP
) == 0)) {
730 kn
->kn_status
|= KN_STOLENDROP
;
732 /* but we now have to wait to be the last ref */
733 knoteusewait(kq
, kn
);
744 * Convert a kq lock to a knote use reference
745 * (for the purpose of detaching AND vanishing it).
747 * If the knote is being dropped, we can't get
748 * a detach reference, so wait for the knote to
749 * finish dropping before returning.
751 * If the knote is being used for other purposes,
752 * we cannot detach it until those uses are done
753 * as well. Again, just wait for them to finish
754 * (caller will start over at lookup).
756 * - kq locked at entry
760 kqlock2knotedetach(struct kqueue
*kq
, struct knote
*kn
, int flags
)
762 if ((kn
->kn_status
& KN_DROPPING
) || kn
->kn_inuse
) {
763 /* have to wait for dropper or current uses to go away */
764 knoteusewait(kq
, kn
);
767 assert((kn
->kn_status
& KN_VANISHED
) == 0);
768 assert(kn
->kn_status
& KN_ATTACHED
);
769 kn
->kn_status
&= ~KN_ATTACHED
;
770 kn
->kn_status
|= KN_VANISHED
;
771 if (flags
& KNUSE_BOOST
) {
772 clear_thread_rwlock_boost();
780 * Convert a kq lock to a knote drop reference.
782 * If the knote is in use, wait for the use count
783 * to subside. We first mark our intention to drop
784 * it - keeping other users from "piling on."
785 * If we are too late, we have to wait for the
786 * other drop to complete.
788 * - kq locked at entry
789 * - always unlocked on exit.
790 * - caller can't hold any locks that would prevent
791 * the other dropper from completing.
794 kqlock2knotedrop(struct kqueue
*kq
, struct knote
*kn
)
797 wait_result_t result
;
799 oktodrop
= ((kn
->kn_status
& (KN_DROPPING
| KN_ATTACHING
)) == 0);
800 /* if another thread is attaching, they will become the dropping thread */
801 kn
->kn_status
|= KN_DROPPING
;
802 knote_unsuppress(kn
);
805 if (kn
->kn_inuse
== 0) {
810 result
= knoteusewait(kq
, kn
);
811 /* THREAD_RESTART == another thread stole the knote drop */
812 return (result
== THREAD_AWAKENED
);
817 * Release a knote use count reference.
820 knote_put(struct knote
*kn
)
822 struct kqueue
*kq
= knote_get_kq(kn
);
825 if (--kn
->kn_inuse
== 0) {
826 if ((kn
->kn_status
& KN_USEWAIT
) != 0) {
827 kn
->kn_status
&= ~KN_USEWAIT
;
828 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
829 CAST_EVENT64_T(&kn
->kn_status
),
831 WAITQ_ALL_PRIORITIES
);
839 filt_fileattach(struct knote
*kn
, struct kevent_internal_s
*kev
)
841 return (fo_kqfilter(kn
->kn_fp
, kn
, kev
, vfs_context_current()));
844 #define f_flag f_fglob->fg_flag
845 #define f_msgcount f_fglob->fg_msgcount
846 #define f_cred f_fglob->fg_cred
847 #define f_ops f_fglob->fg_ops
848 #define f_offset f_fglob->fg_offset
849 #define f_data f_fglob->fg_data
852 filt_kqdetach(struct knote
*kn
)
854 struct kqfile
*kqf
= (struct kqfile
*)kn
->kn_fp
->f_data
;
855 struct kqueue
*kq
= &kqf
->kqf_kqueue
;
858 KNOTE_DETACH(&kqf
->kqf_sel
.si_note
, kn
);
864 filt_kqueue(struct knote
*kn
, __unused
long hint
)
866 struct kqueue
*kq
= (struct kqueue
*)kn
->kn_fp
->f_data
;
869 count
= kq
->kq_count
;
874 filt_kqtouch(struct knote
*kn
, struct kevent_internal_s
*kev
)
877 struct kqueue
*kq
= (struct kqueue
*)kn
->kn_fp
->f_data
;
881 kn
->kn_data
= kq
->kq_count
;
882 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0)
883 kn
->kn_udata
= kev
->udata
;
884 res
= (kn
->kn_data
> 0);
892 filt_kqprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
)
895 struct kqueue
*kq
= (struct kqueue
*)kn
->kn_fp
->f_data
;
899 kn
->kn_data
= kq
->kq_count
;
900 res
= (kn
->kn_data
> 0);
902 *kev
= kn
->kn_kevent
;
903 if (kn
->kn_flags
& EV_CLEAR
)
911 #pragma mark EVFILT_PROC
914 filt_procattach(struct knote
*kn
, __unused
struct kevent_internal_s
*kev
)
918 assert(PID_MAX
< NOTE_PDATAMASK
);
920 if ((kn
->kn_sfflags
& (NOTE_TRACK
| NOTE_TRACKERR
| NOTE_CHILD
)) != 0) {
921 kn
->kn_flags
= EV_ERROR
;
922 kn
->kn_data
= ENOTSUP
;
926 p
= proc_find(kn
->kn_id
);
928 kn
->kn_flags
= EV_ERROR
;
933 const int NoteExitStatusBits
= NOTE_EXIT
| NOTE_EXITSTATUS
;
935 if ((kn
->kn_sfflags
& NoteExitStatusBits
) == NoteExitStatusBits
)
937 pid_t selfpid
= proc_selfpid();
939 if (p
->p_ppid
== selfpid
)
940 break; /* parent => ok */
942 if ((p
->p_lflag
& P_LTRACED
) != 0 &&
943 (p
->p_oppid
== selfpid
))
944 break; /* parent-in-waiting => ok */
947 kn
->kn_flags
= EV_ERROR
;
948 kn
->kn_data
= EACCES
;
954 kn
->kn_ptr
.p_proc
= p
; /* store the proc handle */
956 KNOTE_ATTACH(&p
->p_klist
, kn
);
963 * only captures edge-triggered events after this point
964 * so it can't already be fired.
971 * The knote may be attached to a different process, which may exit,
972 * leaving nothing for the knote to be attached to. In that case,
973 * the pointer to the process will have already been nulled out.
976 filt_procdetach(struct knote
*kn
)
982 p
= kn
->kn_ptr
.p_proc
;
983 if (p
!= PROC_NULL
) {
984 kn
->kn_ptr
.p_proc
= PROC_NULL
;
985 KNOTE_DETACH(&p
->p_klist
, kn
);
992 filt_proc(struct knote
*kn
, long hint
)
996 /* ALWAYS CALLED WITH proc_klist_lock */
999 * Note: a lot of bits in hint may be obtained from the knote
1000 * To free some of those bits, see <rdar://problem/12592988> Freeing up
1001 * bits in hint for filt_proc
1003 * mask off extra data
1005 event
= (u_int
)hint
& NOTE_PCTRLMASK
;
1008 * termination lifecycle events can happen while a debugger
1009 * has reparented a process, in which case notifications
1010 * should be quashed except to the tracing parent. When
1011 * the debugger reaps the child (either via wait4(2) or
1012 * process exit), the child will be reparented to the original
1013 * parent and these knotes re-fired.
1015 if (event
& NOTE_EXIT
) {
1016 if ((kn
->kn_ptr
.p_proc
->p_oppid
!= 0)
1017 && (knote_get_kq(kn
)->kq_p
->p_pid
!= kn
->kn_ptr
.p_proc
->p_ppid
)) {
1019 * This knote is not for the current ptrace(2) parent, ignore.
1026 * if the user is interested in this event, record it.
1028 if (kn
->kn_sfflags
& event
)
1029 kn
->kn_fflags
|= event
;
1031 #pragma clang diagnostic push
1032 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
1033 if ((event
== NOTE_REAP
) || ((event
== NOTE_EXIT
) && !(kn
->kn_sfflags
& NOTE_REAP
))) {
1034 kn
->kn_flags
|= (EV_EOF
| EV_ONESHOT
);
1036 #pragma clang diagnostic pop
1040 * The kernel has a wrapper in place that returns the same data
1041 * as is collected here, in kn_data. Any changes to how
1042 * NOTE_EXITSTATUS and NOTE_EXIT_DETAIL are collected
1043 * should also be reflected in the proc_pidnoteexit() wrapper.
1045 if (event
== NOTE_EXIT
) {
1047 if ((kn
->kn_sfflags
& NOTE_EXITSTATUS
) != 0) {
1048 kn
->kn_fflags
|= NOTE_EXITSTATUS
;
1049 kn
->kn_data
|= (hint
& NOTE_PDATAMASK
);
1051 if ((kn
->kn_sfflags
& NOTE_EXIT_DETAIL
) != 0) {
1052 kn
->kn_fflags
|= NOTE_EXIT_DETAIL
;
1053 if ((kn
->kn_ptr
.p_proc
->p_lflag
&
1054 P_LTERM_DECRYPTFAIL
) != 0) {
1055 kn
->kn_data
|= NOTE_EXIT_DECRYPTFAIL
;
1057 if ((kn
->kn_ptr
.p_proc
->p_lflag
&
1058 P_LTERM_JETSAM
) != 0) {
1059 kn
->kn_data
|= NOTE_EXIT_MEMORY
;
1060 switch (kn
->kn_ptr
.p_proc
->p_lflag
& P_JETSAM_MASK
) {
1061 case P_JETSAM_VMPAGESHORTAGE
:
1062 kn
->kn_data
|= NOTE_EXIT_MEMORY_VMPAGESHORTAGE
;
1064 case P_JETSAM_VMTHRASHING
:
1065 kn
->kn_data
|= NOTE_EXIT_MEMORY_VMTHRASHING
;
1067 case P_JETSAM_FCTHRASHING
:
1068 kn
->kn_data
|= NOTE_EXIT_MEMORY_FCTHRASHING
;
1070 case P_JETSAM_VNODE
:
1071 kn
->kn_data
|= NOTE_EXIT_MEMORY_VNODE
;
1073 case P_JETSAM_HIWAT
:
1074 kn
->kn_data
|= NOTE_EXIT_MEMORY_HIWAT
;
1077 kn
->kn_data
|= NOTE_EXIT_MEMORY_PID
;
1079 case P_JETSAM_IDLEEXIT
:
1080 kn
->kn_data
|= NOTE_EXIT_MEMORY_IDLE
;
1084 if ((kn
->kn_ptr
.p_proc
->p_csflags
&
1086 kn
->kn_data
|= NOTE_EXIT_CSERROR
;
1091 /* if we have any matching state, activate the knote */
1092 return (kn
->kn_fflags
!= 0);
1096 filt_proctouch(struct knote
*kn
, struct kevent_internal_s
*kev
)
1102 /* accept new filter flags and mask off output events no long interesting */
1103 kn
->kn_sfflags
= kev
->fflags
;
1104 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0)
1105 kn
->kn_udata
= kev
->udata
;
1107 /* restrict the current results to the (smaller?) set of new interest */
1109 * For compatibility with previous implementations, we leave kn_fflags
1110 * as they were before.
1112 //kn->kn_fflags &= kn->kn_sfflags;
1114 res
= (kn
->kn_fflags
!= 0);
1116 proc_klist_unlock();
1122 filt_procprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
)
1124 #pragma unused(data)
1128 res
= (kn
->kn_fflags
!= 0);
1130 *kev
= kn
->kn_kevent
;
1131 kn
->kn_flags
|= EV_CLEAR
; /* automatically set */
1135 proc_klist_unlock();
1140 #pragma mark EVFILT_TIMER
1144 * Values stored in the knote at rest (using Mach absolute time units)
1146 * kn->kn_hook where the thread_call object is stored
1147 * kn->kn_ext[0] next deadline or 0 if immediate expiration
1148 * kn->kn_ext[1] leeway value
1149 * kn->kn_sdata interval timer: the interval
1150 * absolute/deadline timer: 0
1151 * kn->kn_data fire count
1154 static lck_mtx_t _filt_timerlock
;
1156 static void filt_timerlock(void) { lck_mtx_lock(&_filt_timerlock
); }
1157 static void filt_timerunlock(void) { lck_mtx_unlock(&_filt_timerlock
); }
1159 static inline void filt_timer_assert_locked(void)
1161 LCK_MTX_ASSERT(&_filt_timerlock
, LCK_MTX_ASSERT_OWNED
);
1164 /* state flags stored in kn_hookid */
1165 #define TIMER_RUNNING 0x1
1166 #define TIMER_CANCELWAIT 0x2
1169 * filt_timervalidate - process data from user
1171 * Sets up the deadline, interval, and leeway from the provided user data
1174 * kn_sdata timer deadline or interval time
1175 * kn_sfflags style of timer, unit of measurement
1178 * kn_sdata either interval in abstime or 0 if non-repeating timer
1179 * ext[0] fire deadline in abs/cont time
1180 * (or 0 if NOTE_ABSOLUTE and deadline is in past)
1183 * EINVAL Invalid user data parameters
1185 * Called with timer filter lock held.
1188 filt_timervalidate(struct knote
*kn
)
1191 * There are 4 knobs that need to be chosen for a timer registration:
1193 * A) Units of time (what is the time duration of the specified number)
1194 * Absolute and interval take:
1195 * NOTE_SECONDS, NOTE_USECONDS, NOTE_NSECONDS, NOTE_MACHTIME
1196 * Defaults to milliseconds if not specified
1198 * B) Clock epoch (what is the zero point of the specified number)
1199 * For interval, there is none
1200 * For absolute, defaults to the gettimeofday/calendar epoch
1201 * With NOTE_MACHTIME, uses mach_absolute_time()
1202 * With NOTE_MACHTIME and NOTE_MACH_CONTINUOUS_TIME, uses mach_continuous_time()
1204 * C) The knote's behavior on delivery
1205 * Interval timer causes the knote to arm for the next interval unless one-shot is set
1206 * Absolute is a forced one-shot timer which deletes on delivery
1207 * TODO: Add a way for absolute to be not forced one-shot
1209 * D) Whether the time duration is relative to now or absolute
1210 * Interval fires at now + duration when it is set up
1211 * Absolute fires at now + difference between now walltime and passed in walltime
1212 * With NOTE_MACHTIME it fires at an absolute MAT or MCT.
1214 * E) Whether the timer continues to tick across sleep
1215 * By default all three do not.
1216 * For interval and absolute, NOTE_MACH_CONTINUOUS_TIME causes them to tick across sleep
1217 * With NOTE_ABSOLUTE | NOTE_MACHTIME | NOTE_MACH_CONTINUOUS_TIME:
1218 * expires when mach_continuous_time() is > the passed in value.
1221 filt_timer_assert_locked();
1223 uint64_t multiplier
;
1225 boolean_t use_abstime
= FALSE
;
1227 switch (kn
->kn_sfflags
& (NOTE_SECONDS
|NOTE_USECONDS
|NOTE_NSECONDS
|NOTE_MACHTIME
)) {
1229 multiplier
= NSEC_PER_SEC
;
1232 multiplier
= NSEC_PER_USEC
;
1241 case 0: /* milliseconds (default) */
1242 multiplier
= NSEC_PER_SEC
/ 1000;
1248 /* transform the leeway in kn_ext[1] to same time scale */
1249 if (kn
->kn_sfflags
& NOTE_LEEWAY
) {
1250 uint64_t leeway_abs
;
1253 leeway_abs
= (uint64_t)kn
->kn_ext
[1];
1256 if (os_mul_overflow((uint64_t)kn
->kn_ext
[1], multiplier
, &leeway_ns
))
1259 nanoseconds_to_absolutetime(leeway_ns
, &leeway_abs
);
1262 kn
->kn_ext
[1] = leeway_abs
;
1265 if (kn
->kn_sfflags
& NOTE_ABSOLUTE
) {
1266 uint64_t deadline_abs
;
1269 deadline_abs
= (uint64_t)kn
->kn_sdata
;
1271 uint64_t calendar_deadline_ns
;
1273 if (os_mul_overflow((uint64_t)kn
->kn_sdata
, multiplier
, &calendar_deadline_ns
))
1276 /* calendar_deadline_ns is in nanoseconds since the epoch */
1278 clock_sec_t seconds
;
1279 clock_nsec_t nanoseconds
;
1282 * Note that the conversion through wall-time is only done once.
1284 * If the relationship between MAT and gettimeofday changes,
1285 * the underlying timer does not update.
1287 * TODO: build a wall-time denominated timer_call queue
1288 * and a flag to request DTRTing with wall-time timers
1290 clock_get_calendar_nanotime(&seconds
, &nanoseconds
);
1292 uint64_t calendar_now_ns
= (uint64_t)seconds
* NSEC_PER_SEC
+ nanoseconds
;
1294 /* if deadline is in the future */
1295 if (calendar_now_ns
< calendar_deadline_ns
) {
1296 uint64_t interval_ns
= calendar_deadline_ns
- calendar_now_ns
;
1297 uint64_t interval_abs
;
1299 nanoseconds_to_absolutetime(interval_ns
, &interval_abs
);
1302 * Note that the NOTE_MACH_CONTINUOUS_TIME flag here only
1303 * causes the timer to keep ticking across sleep, but
1304 * it does not change the calendar timebase.
1307 if (kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
)
1308 clock_continuoustime_interval_to_deadline(interval_abs
,
1311 clock_absolutetime_interval_to_deadline(interval_abs
,
1314 deadline_abs
= 0; /* cause immediate expiration */
1318 kn
->kn_ext
[0] = deadline_abs
;
1319 kn
->kn_sdata
= 0; /* NOTE_ABSOLUTE is non-repeating */
1320 } else if (kn
->kn_sdata
< 0) {
1322 * Negative interval timers fire immediately, once.
1324 * Ideally a negative interval would be an error, but certain clients
1325 * pass negative values on accident, and expect an event back.
1327 * In the old implementation the timer would repeat with no delay
1328 * N times until mach_absolute_time() + (N * interval) underflowed,
1329 * then it would wait ~forever by accidentally arming a timer for the far future.
1331 * We now skip the power-wasting hot spin phase and go straight to the idle phase.
1334 kn
->kn_sdata
= 0; /* non-repeating */
1335 kn
->kn_ext
[0] = 0; /* expire immediately */
1337 uint64_t interval_abs
= 0;
1340 interval_abs
= (uint64_t)kn
->kn_sdata
;
1342 uint64_t interval_ns
;
1343 if (os_mul_overflow((uint64_t)kn
->kn_sdata
, multiplier
, &interval_ns
))
1346 nanoseconds_to_absolutetime(interval_ns
, &interval_abs
);
1349 uint64_t deadline
= 0;
1351 if (kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
)
1352 clock_continuoustime_interval_to_deadline(interval_abs
, &deadline
);
1354 clock_absolutetime_interval_to_deadline(interval_abs
, &deadline
);
1356 kn
->kn_sdata
= interval_abs
; /* default to a repeating timer */
1357 kn
->kn_ext
[0] = deadline
;
1367 * filt_timerexpire - the timer callout routine
1369 * Just propagate the timer event into the knote
1370 * filter routine (by going through the knote
1371 * synchronization point). Pass a hint to
1372 * indicate this is a real event, not just a
1376 filt_timerexpire(void *knx
, __unused
void *spare
)
1378 struct klist timer_list
;
1379 struct knote
*kn
= knx
;
1383 kn
->kn_hookid
&= ~TIMER_RUNNING
;
1385 /* no "object" for timers, so fake a list */
1386 SLIST_INIT(&timer_list
);
1387 SLIST_INSERT_HEAD(&timer_list
, kn
, kn_selnext
);
1389 KNOTE(&timer_list
, 1);
1391 /* if someone is waiting for timer to pop */
1392 if (kn
->kn_hookid
& TIMER_CANCELWAIT
) {
1393 struct kqueue
*kq
= knote_get_kq(kn
);
1394 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
1395 CAST_EVENT64_T(&kn
->kn_hook
),
1397 WAITQ_ALL_PRIORITIES
);
1399 kn
->kn_hookid
&= ~TIMER_CANCELWAIT
;
1406 * Cancel a running timer (or wait for the pop).
1407 * Timer filter lock is held.
1408 * May drop and retake the timer filter lock.
1411 filt_timercancel(struct knote
*kn
)
1413 filt_timer_assert_locked();
1415 assert((kn
->kn_hookid
& TIMER_CANCELWAIT
) == 0);
1417 /* if no timer, then we're good */
1418 if ((kn
->kn_hookid
& TIMER_RUNNING
) == 0)
1421 thread_call_t callout
= (thread_call_t
)kn
->kn_hook
;
1423 /* cancel the callout if we can */
1424 if (thread_call_cancel(callout
)) {
1425 kn
->kn_hookid
&= ~TIMER_RUNNING
;
1429 /* cancel failed, we have to wait for the in-flight expire routine */
1431 kn
->kn_hookid
|= TIMER_CANCELWAIT
;
1433 struct kqueue
*kq
= knote_get_kq(kn
);
1435 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
1436 CAST_EVENT64_T(&kn
->kn_hook
),
1437 THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
);
1440 thread_block(THREAD_CONTINUE_NULL
);
1443 assert((kn
->kn_hookid
& TIMER_CANCELWAIT
) == 0);
1444 assert((kn
->kn_hookid
& TIMER_RUNNING
) == 0);
1448 filt_timerarm(struct knote
*kn
)
1450 filt_timer_assert_locked();
1452 assert((kn
->kn_hookid
& TIMER_RUNNING
) == 0);
1454 thread_call_t callout
= (thread_call_t
)kn
->kn_hook
;
1456 uint64_t deadline
= kn
->kn_ext
[0];
1457 uint64_t leeway
= kn
->kn_ext
[1];
1459 int filter_flags
= kn
->kn_sfflags
;
1460 unsigned int timer_flags
= 0;
1462 if (filter_flags
& NOTE_CRITICAL
)
1463 timer_flags
|= THREAD_CALL_DELAY_USER_CRITICAL
;
1464 else if (filter_flags
& NOTE_BACKGROUND
)
1465 timer_flags
|= THREAD_CALL_DELAY_USER_BACKGROUND
;
1467 timer_flags
|= THREAD_CALL_DELAY_USER_NORMAL
;
1469 if (filter_flags
& NOTE_LEEWAY
)
1470 timer_flags
|= THREAD_CALL_DELAY_LEEWAY
;
1472 if (filter_flags
& NOTE_MACH_CONTINUOUS_TIME
)
1473 timer_flags
|= THREAD_CALL_CONTINUOUS
;
1475 thread_call_enter_delayed_with_leeway(callout
, NULL
,
1479 kn
->kn_hookid
|= TIMER_RUNNING
;
1483 * Does this knote need a timer armed for it, or should it be ready immediately?
1486 filt_timer_is_ready(struct knote
*kn
)
1490 if (kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
)
1491 now
= mach_continuous_time();
1493 now
= mach_absolute_time();
1495 uint64_t deadline
= kn
->kn_ext
[0];
1504 * Allocate a thread call for the knote's lifetime, and kick off the timer.
1507 filt_timerattach(struct knote
*kn
, __unused
struct kevent_internal_s
*kev
)
1509 thread_call_t callout
;
1512 callout
= thread_call_allocate_with_options(filt_timerexpire
,
1513 (thread_call_param_t
)kn
, THREAD_CALL_PRIORITY_HIGH
,
1514 THREAD_CALL_OPTIONS_ONCE
);
1516 if (NULL
== callout
) {
1517 kn
->kn_flags
= EV_ERROR
;
1518 kn
->kn_data
= ENOMEM
;
1524 if ((error
= filt_timervalidate(kn
)) != 0) {
1525 kn
->kn_flags
= EV_ERROR
;
1526 kn
->kn_data
= error
;
1529 __assert_only boolean_t freed
= thread_call_free(callout
);
1534 kn
->kn_hook
= (void*)callout
;
1536 kn
->kn_flags
|= EV_CLEAR
;
1538 /* NOTE_ABSOLUTE implies EV_ONESHOT */
1539 if (kn
->kn_sfflags
& NOTE_ABSOLUTE
)
1540 kn
->kn_flags
|= EV_ONESHOT
;
1542 boolean_t timer_ready
= FALSE
;
1544 if ((timer_ready
= filt_timer_is_ready(kn
))) {
1545 /* cause immediate expiration */
1557 * Shut down the timer if it's running, and free the callout.
1560 filt_timerdetach(struct knote
*kn
)
1562 thread_call_t callout
;
1566 callout
= (thread_call_t
)kn
->kn_hook
;
1567 filt_timercancel(kn
);
1571 __assert_only boolean_t freed
= thread_call_free(callout
);
1576 * filt_timerevent - post events to a timer knote
1578 * Called in the context of filt_timerexpire with
1579 * the filt_timerlock held
1582 filt_timerevent(struct knote
*kn
, __unused
long hint
)
1584 filt_timer_assert_locked();
1591 * filt_timertouch - update timer knote with new user input
1593 * Cancel and restart the timer based on new user data. When
1594 * the user picks up a knote, clear the count of how many timer
1595 * pops have gone off (in kn_data).
1600 struct kevent_internal_s
*kev
)
1607 * cancel current call - drops and retakes lock
1608 * TODO: not safe against concurrent touches?
1610 filt_timercancel(kn
);
1612 /* clear if the timer had previously fired, the user no longer wants to see it */
1615 /* capture the new values used to compute deadline */
1616 kn
->kn_sdata
= kev
->data
;
1617 kn
->kn_sfflags
= kev
->fflags
;
1618 kn
->kn_ext
[0] = kev
->ext
[0];
1619 kn
->kn_ext
[1] = kev
->ext
[1];
1621 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0)
1622 kn
->kn_udata
= kev
->udata
;
1624 /* recalculate deadline */
1625 error
= filt_timervalidate(kn
);
1627 /* no way to report error, so mark it in the knote */
1628 kn
->kn_flags
|= EV_ERROR
;
1629 kn
->kn_data
= error
;
1634 boolean_t timer_ready
= FALSE
;
1636 if ((timer_ready
= filt_timer_is_ready(kn
))) {
1637 /* cause immediate expiration */
1649 * filt_timerprocess - query state of knote and snapshot event data
1651 * Determine if the timer has fired in the past, snapshot the state
1652 * of the kevent for returning to user-space, and clear pending event
1653 * counters for the next time.
1658 __unused
struct filt_process_s
*data
,
1659 struct kevent_internal_s
*kev
)
1663 if (kn
->kn_data
== 0 || (kn
->kn_hookid
& TIMER_CANCELWAIT
)) {
1666 * The timer hasn't yet fired, so there's nothing to deliver
1668 * touch is in the middle of canceling the timer,
1669 * so don't deliver or re-arm anything
1671 * This can happen if a touch resets a timer that had fired
1672 * without being processed
1678 if (kn
->kn_sdata
!= 0 && ((kn
->kn_flags
& EV_ERROR
) == 0)) {
1680 * This is a 'repeating' timer, so we have to emit
1681 * how many intervals expired between the arm
1684 * A very strange style of interface, because
1685 * this could easily be done in the client...
1688 /* The timer better have had expired... */
1689 assert((kn
->kn_hookid
& TIMER_RUNNING
) == 0);
1693 if (kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
)
1694 now
= mach_continuous_time();
1696 now
= mach_absolute_time();
1698 uint64_t first_deadline
= kn
->kn_ext
[0];
1699 uint64_t interval_abs
= kn
->kn_sdata
;
1700 uint64_t orig_arm_time
= first_deadline
- interval_abs
;
1702 assert(now
> orig_arm_time
);
1703 assert(now
> first_deadline
);
1705 uint64_t elapsed
= now
- orig_arm_time
;
1707 uint64_t num_fired
= elapsed
/ interval_abs
;
1710 * To reach this code, we must have seen the timer pop
1711 * and be in repeating mode, so therefore it must have been
1712 * more than 'interval' time since the attach or last
1715 * An unsuccessful touch would:
1720 * all of which will prevent this code from running.
1722 assert(num_fired
> 0);
1724 /* report how many intervals have elapsed to the user */
1725 kn
->kn_data
= (int64_t) num_fired
;
1727 /* We only need to re-arm the timer if it's not about to be destroyed */
1728 if ((kn
->kn_flags
& EV_ONESHOT
) == 0) {
1729 /* fire at the end of the next interval */
1730 uint64_t new_deadline
= first_deadline
+ num_fired
* interval_abs
;
1732 assert(new_deadline
> now
);
1734 kn
->kn_ext
[0] = new_deadline
;
1741 * Copy out the interesting kevent state,
1742 * but don't leak out the raw time calculations.
1744 * TODO: potential enhancements - tell the user about:
1745 * - deadline to which this timer thought it was expiring
1746 * - return kn_sfflags in the fflags field so the client can know
1747 * under what flags the timer fired
1749 *kev
= kn
->kn_kevent
;
1751 /* kev->ext[1] = 0; JMM - shouldn't we hide this too? */
1753 /* we have delivered the event, reset the timer pop count */
1760 SECURITY_READ_ONLY_EARLY(static struct filterops
) timer_filtops
= {
1761 .f_attach
= filt_timerattach
,
1762 .f_detach
= filt_timerdetach
,
1763 .f_event
= filt_timerevent
,
1764 .f_touch
= filt_timertouch
,
1765 .f_process
= filt_timerprocess
,
1769 #pragma mark EVFILT_USER
1775 lck_spin_lock(&_filt_userlock
);
1779 filt_userunlock(void)
1781 lck_spin_unlock(&_filt_userlock
);
1785 filt_userattach(struct knote
*kn
, __unused
struct kevent_internal_s
*kev
)
1787 /* EVFILT_USER knotes are not attached to anything in the kernel */
1788 /* Cant discover this knote until after attach - so no lock needed */
1790 if (kn
->kn_sfflags
& NOTE_TRIGGER
) {
1795 return (kn
->kn_hookid
);
1799 filt_userdetach(__unused
struct knote
*kn
)
1801 /* EVFILT_USER knotes are not attached to anything in the kernel */
1806 __unused
struct knote
*kn
,
1816 struct kevent_internal_s
*kev
)
1824 ffctrl
= kev
->fflags
& NOTE_FFCTRLMASK
;
1825 fflags
= kev
->fflags
& NOTE_FFLAGSMASK
;
1830 kn
->kn_sfflags
&= fflags
;
1833 kn
->kn_sfflags
|= fflags
;
1836 kn
->kn_sfflags
= fflags
;
1839 kn
->kn_sdata
= kev
->data
;
1841 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0)
1842 kn
->kn_udata
= kev
->udata
;
1844 if (kev
->fflags
& NOTE_TRIGGER
) {
1847 active
= kn
->kn_hookid
;
1857 __unused
struct filt_process_s
*data
,
1858 struct kevent_internal_s
*kev
)
1862 if (kn
->kn_hookid
== 0) {
1867 *kev
= kn
->kn_kevent
;
1868 kev
->fflags
= (volatile UInt32
)kn
->kn_sfflags
;
1869 kev
->data
= kn
->kn_sdata
;
1870 if (kn
->kn_flags
& EV_CLEAR
) {
1880 #pragma mark EVFILT_WORKLOOP
1882 #if DEBUG || DEVELOPMENT
1884 * see src/queue_internal.h in libdispatch
1886 #define DISPATCH_QUEUE_ENQUEUED 0x1ull
1890 filt_wllock(struct kqworkloop
*kqwl
)
1892 lck_mtx_lock(&kqwl
->kqwl_statelock
);
1896 filt_wlunlock(struct kqworkloop
*kqwl
)
1898 lck_mtx_unlock(&kqwl
->kqwl_statelock
);
1902 filt_wlheld(__assert_only
struct kqworkloop
*kqwl
)
1904 LCK_MTX_ASSERT(&kqwl
->kqwl_statelock
, LCK_MTX_ASSERT_OWNED
);
1907 #define WL_OWNER_SUSPENDED ((thread_t)(~0ull)) /* special owner when suspended */
1910 filt_wlowner_is_valid(thread_t owner
)
1912 return owner
!= THREAD_NULL
&& owner
!= WL_OWNER_SUSPENDED
;
1916 filt_wlshould_end_ownership(struct kqworkloop
*kqwl
,
1917 struct kevent_internal_s
*kev
, int error
)
1919 thread_t owner
= kqwl
->kqwl_owner
;
1920 return (error
== 0 || error
== ESTALE
) &&
1921 (kev
->fflags
& NOTE_WL_END_OWNERSHIP
) &&
1922 (owner
== current_thread() || owner
== WL_OWNER_SUSPENDED
);
1926 filt_wlshould_update_ownership(struct kevent_internal_s
*kev
, int error
)
1928 return error
== 0 && (kev
->fflags
& NOTE_WL_DISCOVER_OWNER
) &&
1929 kev
->ext
[EV_EXTIDX_WL_ADDR
];
1933 filt_wlshould_set_async_qos(struct kevent_internal_s
*kev
, int error
,
1934 kq_index_t async_qos
)
1939 if (async_qos
!= THREAD_QOS_UNSPECIFIED
) {
1942 if ((kev
->fflags
& NOTE_WL_THREAD_REQUEST
) && (kev
->flags
& EV_DELETE
)) {
1943 /* see filt_wlprocess() */
1951 filt_wlupdateowner(struct kqworkloop
*kqwl
, struct kevent_internal_s
*kev
,
1952 int error
, kq_index_t async_qos
)
1954 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
1955 thread_t cur_owner
, new_owner
, extra_thread_ref
= THREAD_NULL
;
1956 kq_index_t cur_override
= THREAD_QOS_UNSPECIFIED
;
1957 kq_index_t old_owner_override
= THREAD_QOS_UNSPECIFIED
;
1958 boolean_t ipc_override_is_sync
= false;
1959 boolean_t old_owner_override_is_sync
= false;
1960 int action
= KQWL_UTQ_NONE
;
1965 * The owner is only changed under both the filt_wllock and the
1966 * kqwl_req_lock. Looking at it with either one held is fine.
1968 cur_owner
= kqwl
->kqwl_owner
;
1969 if (filt_wlshould_end_ownership(kqwl
, kev
, error
)) {
1970 new_owner
= THREAD_NULL
;
1971 } else if (filt_wlshould_update_ownership(kev
, error
)) {
1973 * Decipher the owner port name, and translate accordingly.
1974 * The low 2 bits were borrowed for other flags, so mask them off.
1976 uint64_t udata
= kev
->ext
[EV_EXTIDX_WL_VALUE
];
1977 mach_port_name_t new_owner_name
= (mach_port_name_t
)udata
& ~0x3;
1978 if (new_owner_name
!= MACH_PORT_NULL
) {
1979 new_owner_name
= ipc_entry_name_mask(new_owner_name
);
1982 if (MACH_PORT_VALID(new_owner_name
)) {
1983 new_owner
= port_name_to_thread(new_owner_name
);
1984 if (new_owner
== THREAD_NULL
)
1986 extra_thread_ref
= new_owner
;
1987 } else if (new_owner_name
== MACH_PORT_DEAD
) {
1988 new_owner
= WL_OWNER_SUSPENDED
;
1991 * We never want to learn a new owner that is NULL.
1992 * Ownership should be ended with END_OWNERSHIP.
1994 new_owner
= cur_owner
;
1997 new_owner
= cur_owner
;
2000 if (filt_wlshould_set_async_qos(kev
, error
, async_qos
)) {
2001 action
= KQWL_UTQ_SET_ASYNC_QOS
;
2003 if (cur_owner
== new_owner
&& action
== KQWL_UTQ_NONE
) {
2007 kqwl_req_lock(kqwl
);
2009 /* If already tracked as servicer, don't track as owner */
2010 if ((kqr
->kqr_state
& KQR_BOUND
) && new_owner
== kqr
->kqr_thread
) {
2011 kqwl
->kqwl_owner
= new_owner
= THREAD_NULL
;
2014 if (cur_owner
!= new_owner
) {
2015 kqwl
->kqwl_owner
= new_owner
;
2016 if (new_owner
== extra_thread_ref
) {
2017 /* we just transfered this ref to kqwl_owner */
2018 extra_thread_ref
= THREAD_NULL
;
2020 cur_override
= kqworkloop_combined_qos(kqwl
, &ipc_override_is_sync
);
2021 old_owner_override
= kqr
->kqr_dsync_owner_qos
;
2022 old_owner_override_is_sync
= kqr
->kqr_owner_override_is_sync
;
2024 if (filt_wlowner_is_valid(new_owner
)) {
2025 /* override it before we drop the old */
2026 if (cur_override
!= THREAD_QOS_UNSPECIFIED
) {
2027 thread_add_ipc_override(new_owner
, cur_override
);
2029 if (ipc_override_is_sync
) {
2030 thread_add_sync_ipc_override(new_owner
);
2032 /* Update the kqr to indicate that owner has sync ipc override */
2033 kqr
->kqr_dsync_owner_qos
= cur_override
;
2034 kqr
->kqr_owner_override_is_sync
= ipc_override_is_sync
;
2035 thread_starts_owning_workloop(new_owner
);
2036 if ((kqr
->kqr_state
& (KQR_THREQUESTED
| KQR_BOUND
)) == KQR_THREQUESTED
) {
2037 if (action
== KQWL_UTQ_NONE
) {
2038 action
= KQWL_UTQ_REDRIVE_EVENTS
;
2041 } else if (new_owner
== THREAD_NULL
) {
2042 kqr
->kqr_dsync_owner_qos
= THREAD_QOS_UNSPECIFIED
;
2043 kqr
->kqr_owner_override_is_sync
= false;
2044 if ((kqr
->kqr_state
& (KQR_THREQUESTED
| KQR_WAKEUP
)) == KQR_WAKEUP
) {
2045 if (action
== KQWL_UTQ_NONE
) {
2046 action
= KQWL_UTQ_REDRIVE_EVENTS
;
2052 if (action
!= KQWL_UTQ_NONE
) {
2053 kqworkloop_update_threads_qos(kqwl
, action
, async_qos
);
2056 kqwl_req_unlock(kqwl
);
2058 /* Now that we are unlocked, drop the override and ref on old owner */
2059 if (new_owner
!= cur_owner
&& filt_wlowner_is_valid(cur_owner
)) {
2060 if (old_owner_override
!= THREAD_QOS_UNSPECIFIED
) {
2061 thread_drop_ipc_override(cur_owner
);
2063 if (old_owner_override_is_sync
) {
2064 thread_drop_sync_ipc_override(cur_owner
);
2066 thread_ends_owning_workloop(cur_owner
);
2067 thread_deallocate(cur_owner
);
2071 if (extra_thread_ref
) {
2072 thread_deallocate(extra_thread_ref
);
2079 struct kqworkloop
*kqwl
,
2080 struct kevent_internal_s
*kev
,
2083 user_addr_t addr
= CAST_USER_ADDR_T(kev
->ext
[EV_EXTIDX_WL_ADDR
]);
2087 /* we must have the workloop state mutex held */
2090 /* Do we have a debounce address to work with? */
2092 uint64_t kdata
= kev
->ext
[EV_EXTIDX_WL_VALUE
];
2093 uint64_t mask
= kev
->ext
[EV_EXTIDX_WL_MASK
];
2095 error
= copyin_word(addr
, &udata
, sizeof(udata
));
2100 /* update state as copied in */
2101 kev
->ext
[EV_EXTIDX_WL_VALUE
] = udata
;
2103 /* If the masked bits don't match, reject it as stale */
2104 if ((udata
& mask
) != (kdata
& mask
)) {
2108 #if DEBUG || DEVELOPMENT
2109 if ((kev
->fflags
& NOTE_WL_THREAD_REQUEST
) && !(kev
->flags
& EV_DELETE
)) {
2110 if ((udata
& DISPATCH_QUEUE_ENQUEUED
) == 0) {
2111 panic("kevent: workloop %#016llx is not enqueued "
2112 "(kev:%p dq_state:%#016llx)", kev
->udata
, kev
, udata
);
2118 return default_result
;
2122 * Remembers the last updated that came in from userspace for debugging reasons.
2123 * - fflags is mirrored from the userspace kevent
2124 * - ext[i, i != VALUE] is mirrored from the userspace kevent
2125 * - ext[VALUE] is set to what the kernel loaded atomically
2126 * - data is set to the error if any
2129 filt_wlremember_last_update(
2130 __assert_only
struct kqworkloop
*kqwl
,
2132 struct kevent_internal_s
*kev
,
2136 kn
->kn_fflags
= kev
->fflags
;
2137 kn
->kn_data
= error
;
2138 memcpy(kn
->kn_ext
, kev
->ext
, sizeof(kev
->ext
));
2142 * Return which operations on EVFILT_WORKLOOP need to be protected against
2143 * knoteusewait() causing priority inversions.
2146 filt_wlneeds_boost(struct kevent_internal_s
*kev
)
2150 * this is an f_process() usecount, and it can cause a drop to wait
2154 if (kev
->fflags
& NOTE_WL_THREAD_REQUEST
) {
2156 * All operations on thread requests may starve drops or re-attach of
2157 * the same knote, all of them need boosts. None of what we do under
2158 * thread-request usecount holds blocks anyway.
2162 if (kev
->fflags
& NOTE_WL_SYNC_WAIT
) {
2164 * this may call filt_wlwait() and we don't want to hold any boost when
2165 * woken up, this would cause background threads contending on
2166 * dispatch_sync() to wake up at 64 and be preempted immediately when
2173 * SYNC_WAIT knotes when deleted don't need to be rushed, there's no
2174 * detach/reattach race with these ever. In addition to this, when the
2175 * SYNC_WAIT knote is dropped, the caller is no longer receiving the
2176 * workloop overrides if any, and we'd rather schedule other threads than
2177 * him, he's not possibly stalling anything anymore.
2179 return (kev
->flags
& EV_DELETE
) == 0;
2183 filt_wlattach(struct knote
*kn
, struct kevent_internal_s
*kev
)
2185 struct kqueue
*kq
= knote_get_kq(kn
);
2186 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2188 kq_index_t qos_index
= 0;
2190 if ((kq
->kq_state
& KQ_WORKLOOP
) == 0) {
2195 #if DEVELOPMENT || DEBUG
2196 if (kev
->ident
== 0 && kev
->udata
== 0 && kev
->fflags
== 0) {
2197 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
2199 kqwl_req_lock(kqwl
);
2201 if (kqr
->kqr_dsync_waiters
) {
2202 kev
->fflags
|= NOTE_WL_SYNC_WAIT
;
2204 if (kqr
->kqr_qos_index
) {
2205 kev
->fflags
|= NOTE_WL_THREAD_REQUEST
;
2207 if (kqwl
->kqwl_owner
== WL_OWNER_SUSPENDED
) {
2208 kev
->ext
[0] = ~0ull;
2210 kev
->ext
[0] = thread_tid(kqwl
->kqwl_owner
);
2212 kev
->ext
[1] = thread_tid(kqwl
->kqwl_request
.kqr_thread
);
2213 kev
->ext
[2] = thread_owned_workloops_count(current_thread());
2214 kev
->ext
[3] = kn
->kn_kevent
.ext
[3];
2215 kqwl_req_unlock(kqwl
);
2221 /* Some simple validation */
2222 int command
= (kn
->kn_sfflags
& NOTE_WL_COMMANDS_MASK
);
2224 case NOTE_WL_THREAD_REQUEST
:
2225 if (kn
->kn_id
!= kqwl
->kqwl_dynamicid
) {
2229 qos_index
= qos_index_from_qos(kn
, kn
->kn_qos
, FALSE
);
2230 if (qos_index
< THREAD_QOS_MAINTENANCE
||
2231 qos_index
> THREAD_QOS_USER_INTERACTIVE
) {
2236 case NOTE_WL_SYNC_WAIT
:
2237 case NOTE_WL_SYNC_WAKE
:
2238 if (kq
->kq_state
& KQ_NO_WQ_THREAD
) {
2242 if (kn
->kn_id
== kqwl
->kqwl_dynamicid
) {
2246 if ((kn
->kn_flags
& EV_DISABLE
) == 0) {
2250 if (kn
->kn_sfflags
& NOTE_WL_END_OWNERSHIP
) {
2263 if (command
== NOTE_WL_THREAD_REQUEST
&& kqwl
->kqwl_request
.kqr_qos_index
) {
2265 * There already is a thread request, and well, you're only allowed
2266 * one per workloop, so fail the attach.
2268 * Note: kqr_qos_index is always set with the wllock held, so we
2269 * don't need to take the kqr lock.
2273 /* Make sure user and kernel are in agreement on important state */
2274 error
= filt_wldebounce(kqwl
, kev
, 0);
2277 error
= filt_wlupdateowner(kqwl
, kev
, error
, qos_index
);
2278 filt_wlunlock(kqwl
);
2281 kn
->kn_flags
|= EV_ERROR
;
2282 /* If userland wants ESTALE to be hidden, fail the attach anyway */
2283 if (error
== ESTALE
&& (kn
->kn_sfflags
& NOTE_WL_IGNORE_ESTALE
)) {
2286 kn
->kn_data
= error
;
2290 /* Just attaching the thread request successfully will fire it */
2291 return command
== NOTE_WL_THREAD_REQUEST
;
2294 __attribute__((noinline
,not_tail_called
))
2296 filt_wlwait(struct kqworkloop
*kqwl
,
2298 struct kevent_internal_s
*kev
)
2301 assert((kn
->kn_sfflags
& NOTE_WL_SYNC_WAKE
) == 0);
2304 * Hint to the wakeup side that this thread is waiting. Also used by
2305 * stackshot for waitinfo.
2307 kn
->kn_hook
= current_thread();
2309 thread_set_pending_block_hint(current_thread(), kThreadWaitWorkloopSyncWait
);
2311 wait_result_t wr
= assert_wait(kn
, THREAD_ABORTSAFE
);
2313 if (wr
== THREAD_WAITING
) {
2314 kq_index_t qos_index
= qos_index_from_qos(kn
, kev
->qos
, TRUE
);
2315 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
2317 thread_t thread_to_handoff
= THREAD_NULL
; /* holds +1 thread ref */
2319 thread_t kqwl_owner
= kqwl
->kqwl_owner
;
2320 if (filt_wlowner_is_valid(kqwl_owner
)) {
2321 thread_reference(kqwl_owner
);
2322 thread_to_handoff
= kqwl_owner
;
2325 kqwl_req_lock(kqwl
);
2328 assert(kqr
->kqr_dsync_waiters
< UINT16_MAX
);
2329 kqr
->kqr_dsync_waiters
++;
2330 if (qos_index
> kqr
->kqr_dsync_waiters_qos
) {
2331 kqworkloop_update_threads_qos(kqwl
,
2332 KQWL_UTQ_SET_SYNC_WAITERS_QOS
, qos_index
);
2336 if ((kqr
->kqr_state
& KQR_BOUND
) && thread_to_handoff
== THREAD_NULL
) {
2337 assert(kqr
->kqr_thread
!= THREAD_NULL
);
2338 thread_t servicer
= kqr
->kqr_thread
;
2340 thread_reference(servicer
);
2341 thread_to_handoff
= servicer
;
2344 kqwl_req_unlock(kqwl
);
2346 filt_wlunlock(kqwl
);
2348 /* TODO: use continuation based blocking <rdar://problem/31299584> */
2350 /* consume a refcount on thread_to_handoff, then thread_block() */
2351 wr
= thread_handoff(thread_to_handoff
);
2352 thread_to_handoff
= THREAD_NULL
;
2356 /* clear waiting state (only one waiting thread - so no race) */
2357 assert(kn
->kn_hook
== current_thread());
2360 kqwl_req_lock(kqwl
);
2361 assert(kqr
->kqr_dsync_waiters
> 0);
2362 if (--kqr
->kqr_dsync_waiters
== 0) {
2363 assert(kqr
->kqr_dsync_waiters_qos
);
2364 kqworkloop_update_threads_qos(kqwl
,
2365 KQWL_UTQ_SET_SYNC_WAITERS_QOS
, 0);
2367 kqwl_req_unlock(kqwl
);
2374 case THREAD_AWAKENED
:
2376 case THREAD_INTERRUPTED
:
2378 case THREAD_RESTART
:
2381 panic("filt_wlattach: unexpected wait result %d", wr
);
2386 /* called in stackshot context to report the thread responsible for blocking this thread */
2388 kdp_workloop_sync_wait_find_owner(__assert_only thread_t thread
,
2390 thread_waitinfo_t
*waitinfo
)
2392 struct knote
*kn
= (struct knote
*) event
;
2393 assert(kdp_is_in_zone(kn
, "knote zone"));
2395 assert(kn
->kn_hook
== thread
);
2397 struct kqueue
*kq
= knote_get_kq(kn
);
2398 assert(kdp_is_in_zone(kq
, "kqueue workloop zone"));
2399 assert(kq
->kq_state
& KQ_WORKLOOP
);
2401 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2402 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
2404 thread_t kqwl_owner
= kqwl
->kqwl_owner
;
2405 thread_t servicer
= kqr
->kqr_thread
;
2407 if (kqwl_owner
== WL_OWNER_SUSPENDED
) {
2408 waitinfo
->owner
= STACKSHOT_WAITOWNER_SUSPENDED
;
2409 } else if (kqwl_owner
!= THREAD_NULL
) {
2410 assert(kdp_is_in_zone(kqwl_owner
, "threads"));
2412 waitinfo
->owner
= thread_tid(kqwl
->kqwl_owner
);
2413 } else if (servicer
!= THREAD_NULL
) {
2414 assert(kdp_is_in_zone(servicer
, "threads"));
2416 waitinfo
->owner
= thread_tid(servicer
);
2417 } else if (kqr
->kqr_state
& KQR_THREQUESTED
) {
2418 waitinfo
->owner
= STACKSHOT_WAITOWNER_THREQUESTED
;
2420 waitinfo
->owner
= 0;
2423 waitinfo
->context
= kqwl
->kqwl_dynamicid
;
2429 * Takes kqueue locked, returns locked, may drop in the middle and/or block for a while
2432 filt_wlpost_attach(struct knote
*kn
, struct kevent_internal_s
*kev
)
2434 struct kqueue
*kq
= knote_get_kq(kn
);
2435 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2438 if (kev
->fflags
& NOTE_WL_SYNC_WAIT
) {
2439 if (kqlock2knoteuse(kq
, kn
, KNUSE_NONE
)) {
2441 /* if the wake has already preposted, don't wait */
2442 if ((kn
->kn_sfflags
& NOTE_WL_SYNC_WAKE
) == 0)
2443 error
= filt_wlwait(kqwl
, kn
, kev
);
2444 filt_wlunlock(kqwl
);
2445 knoteuse2kqlock(kq
, kn
, KNUSE_NONE
);
2452 filt_wldetach(__assert_only
struct knote
*kn
)
2454 assert(knote_get_kq(kn
)->kq_state
& KQ_WORKLOOP
);
2457 * Thread requests have nothing to detach.
2458 * Sync waiters should have been aborted out
2459 * and drop their refs before we could drop/
2460 * detach their knotes.
2462 assert(kn
->kn_hook
== NULL
);
2467 __unused
struct knote
*kn
,
2470 panic("filt_wlevent");
2475 filt_wlvalidate_kev_flags(struct knote
*kn
, struct kevent_internal_s
*kev
)
2477 int new_commands
= kev
->fflags
& NOTE_WL_COMMANDS_MASK
;
2478 int sav_commands
= kn
->kn_sfflags
& NOTE_WL_COMMANDS_MASK
;
2481 switch (new_commands
) {
2482 case NOTE_WL_THREAD_REQUEST
:
2483 /* thread requests can only update themselves */
2484 if (sav_commands
!= new_commands
)
2488 case NOTE_WL_SYNC_WAIT
:
2489 if (kev
->fflags
& NOTE_WL_END_OWNERSHIP
)
2492 case NOTE_WL_SYNC_WAKE
:
2493 /* waits and wakes can update themselves or their counterparts */
2494 if (!(sav_commands
& (NOTE_WL_SYNC_WAIT
| NOTE_WL_SYNC_WAKE
)))
2496 if (kev
->fflags
& NOTE_WL_UPDATE_QOS
)
2498 if ((kev
->flags
& (EV_ENABLE
| EV_DELETE
)) == EV_ENABLE
)
2500 if (kev
->flags
& EV_DELETE
) {
2502 * Really this is not supported: there is absolutely no reason
2503 * whatsoever to want to fail the drop of a NOTE_WL_SYNC_WAIT knote.
2505 if (kev
->ext
[EV_EXTIDX_WL_ADDR
] && kev
->ext
[EV_EXTIDX_WL_MASK
]) {
2514 if ((kev
->flags
& EV_DELETE
) && (kev
->fflags
& NOTE_WL_DISCOVER_OWNER
)) {
2523 struct kevent_internal_s
*kev
)
2525 struct kqueue
*kq
= knote_get_kq(kn
);
2527 struct kqworkloop
*kqwl
;
2529 assert(kq
->kq_state
& KQ_WORKLOOP
);
2530 kqwl
= (struct kqworkloop
*)kq
;
2532 error
= filt_wlvalidate_kev_flags(kn
, kev
);
2539 /* Make sure user and kernel are in agreement on important state */
2540 error
= filt_wldebounce(kqwl
, kev
, 0);
2542 error
= filt_wlupdateowner(kqwl
, kev
, error
, 0);
2546 int new_command
= kev
->fflags
& NOTE_WL_COMMANDS_MASK
;
2547 switch (new_command
) {
2548 case NOTE_WL_THREAD_REQUEST
:
2549 assert(kqwl
->kqwl_request
.kqr_qos_index
!= THREAD_QOS_UNSPECIFIED
);
2552 case NOTE_WL_SYNC_WAIT
:
2554 * we need to allow waiting several times on the same knote because
2555 * of EINTR. If it's already woken though, it won't block.
2559 case NOTE_WL_SYNC_WAKE
:
2560 if (kn
->kn_sfflags
& NOTE_WL_SYNC_WAKE
) {
2561 /* disallow waking the same knote twice */
2566 thread_wakeup_thread((event_t
)kn
, (thread_t
)kn
->kn_hook
);
2576 * Save off any additional fflags/data we just accepted
2577 * But only keep the last round of "update" bits we acted on which helps
2580 kn
->kn_sfflags
&= ~NOTE_WL_UPDATES_MASK
;
2581 kn
->kn_sfflags
|= kev
->fflags
;
2582 kn
->kn_sdata
= kev
->data
;
2584 kq_index_t qos_index
= THREAD_QOS_UNSPECIFIED
;
2586 if (kev
->fflags
& NOTE_WL_UPDATE_QOS
) {
2587 qos_t qos
= pthread_priority_canonicalize(kev
->qos
, FALSE
);
2589 if (kn
->kn_qos
!= qos
) {
2590 qos_index
= qos_index_from_qos(kn
, qos
, FALSE
);
2591 if (qos_index
== THREAD_QOS_UNSPECIFIED
) {
2596 if (kn
->kn_status
& KN_QUEUED
) {
2598 knote_set_qos_index(kn
, qos_index
);
2602 knote_set_qos_index(kn
, qos_index
);
2609 error
= filt_wlupdateowner(kqwl
, kev
, 0, qos_index
);
2614 if (new_command
== NOTE_WL_SYNC_WAIT
) {
2615 /* if the wake has already preposted, don't wait */
2616 if ((kn
->kn_sfflags
& NOTE_WL_SYNC_WAKE
) == 0)
2617 error
= filt_wlwait(kqwl
, kn
, kev
);
2621 filt_wlremember_last_update(kqwl
, kn
, kev
, error
);
2622 filt_wlunlock(kqwl
);
2625 if (error
== ESTALE
&& (kev
->fflags
& NOTE_WL_IGNORE_ESTALE
)) {
2626 /* If userland wants ESTALE to be hidden, do not activate */
2629 kev
->flags
|= EV_ERROR
;
2633 /* Just touching the thread request successfully will fire it */
2634 return new_command
== NOTE_WL_THREAD_REQUEST
;
2638 filt_wldrop_and_unlock(
2640 struct kevent_internal_s
*kev
)
2642 struct kqueue
*kq
= knote_get_kq(kn
);
2643 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2644 int error
= 0, knoteuse_flags
= KNUSE_NONE
;
2648 assert(kev
->flags
& EV_DELETE
);
2649 assert(kq
->kq_state
& KQ_WORKLOOP
);
2651 error
= filt_wlvalidate_kev_flags(kn
, kev
);
2656 if (kn
->kn_sfflags
& NOTE_WL_THREAD_REQUEST
) {
2657 knoteuse_flags
|= KNUSE_BOOST
;
2660 /* take a usecount to allow taking the filt_wllock */
2661 if (!kqlock2knoteuse(kq
, kn
, knoteuse_flags
)) {
2662 /* knote is being dropped already */
2663 error
= EINPROGRESS
;
2670 * Make sure user and kernel are in agreement on important state
2672 * Userland will modify bits to cause this to fail for the touch / drop
2673 * race case (when a drop for a thread request quiescing comes in late after
2674 * the workloop has been woken up again).
2676 error
= filt_wldebounce(kqwl
, kev
, 0);
2678 if (!knoteuse2kqlock(kq
, kn
, knoteuse_flags
)) {
2679 /* knote is no longer alive */
2680 error
= EINPROGRESS
;
2684 if (!error
&& (kn
->kn_sfflags
& NOTE_WL_THREAD_REQUEST
) && kn
->kn_inuse
) {
2686 * There is a concurrent drop or touch happening, we can't resolve this,
2687 * userland has to redrive.
2689 * The race we're worried about here is the following:
2691 * f_touch | f_drop_and_unlock
2692 * ------------------------+--------------------------------------------
2694 * | kqlock2knoteuse()
2696 * | debounces successfully
2699 * filt_wllock() <BLOCKS> |
2700 * | knoteuse2kqlock()
2702 * | kqlock2knotedrop() <BLOCKS, WAKES f_touch>
2703 * debounces successfully |
2705 * caller WAKES f_drop |
2706 * | performs drop, but f_touch should have won
2708 * So if the usecount is not 0 here, we need to wait for it to drop and
2709 * redrive the whole logic (including looking up the knote again).
2711 filt_wlunlock(kqwl
);
2712 knoteusewait(kq
, kn
);
2717 * If error is 0 this will set kqr_qos_index to THREAD_QOS_UNSPECIFIED
2719 * If error is 0 or ESTALE this may drop ownership and cause a thread
2720 * request redrive, however the kqlock is held which prevents f_process() to
2721 * run until we did the drop for real.
2723 error
= filt_wlupdateowner(kqwl
, kev
, error
, 0);
2728 if ((kn
->kn_sfflags
& (NOTE_WL_SYNC_WAIT
| NOTE_WL_SYNC_WAKE
)) ==
2729 NOTE_WL_SYNC_WAIT
) {
2731 * When deleting a SYNC_WAIT knote that hasn't been woken up
2732 * explicitly, issue a wake up.
2734 kn
->kn_sfflags
|= NOTE_WL_SYNC_WAKE
;
2736 thread_wakeup_thread((event_t
)kn
, (thread_t
)kn
->kn_hook
);
2741 filt_wlremember_last_update(kqwl
, kn
, kev
, error
);
2742 filt_wlunlock(kqwl
);
2746 /* If nothing failed, do the regular knote drop. */
2747 if (kqlock2knotedrop(kq
, kn
)) {
2748 knote_drop(kn
, current_proc());
2750 error
= EINPROGRESS
;
2755 if (error
== ESTALE
&& (kev
->fflags
& NOTE_WL_IGNORE_ESTALE
)) {
2758 if (error
== EINPROGRESS
) {
2760 * filt_wlprocess() makes sure that no event can be delivered for
2761 * NOTE_WL_THREAD_REQUEST knotes once a drop is happening, and
2762 * NOTE_WL_SYNC_* knotes are never fired.
2764 * It means that EINPROGRESS is about a state that userland cannot
2765 * observe for this filter (an event being delivered concurrently from
2766 * a drop), so silence the error.
2776 __unused
struct filt_process_s
*data
,
2777 struct kevent_internal_s
*kev
)
2779 struct kqueue
*kq
= knote_get_kq(kn
);
2780 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2781 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
2784 assert(kq
->kq_state
& KQ_WORKLOOP
);
2786 /* only thread requests should get here */
2787 assert(kn
->kn_sfflags
& NOTE_WL_THREAD_REQUEST
);
2788 if (kn
->kn_sfflags
& NOTE_WL_THREAD_REQUEST
) {
2790 assert(kqr
->kqr_qos_index
!= THREAD_QOS_UNSPECIFIED
);
2791 if (kqwl
->kqwl_owner
) {
2793 * <rdar://problem/33584321> userspace sometimes due to events being
2794 * delivered but not triggering a drain session can cause a process
2795 * of the thread request knote.
2797 * When that happens, the automatic deactivation due to process
2798 * would swallow the event, so we have to activate the knote again.
2803 } else if (kqr
->kqr_qos_index
) {
2804 #if DEBUG || DEVELOPMENT
2805 user_addr_t addr
= CAST_USER_ADDR_T(kn
->kn_ext
[EV_EXTIDX_WL_ADDR
]);
2806 task_t t
= current_task();
2808 if (addr
&& task_is_active(t
) && !task_is_halting(t
) &&
2809 copyin_word(addr
, &val
, sizeof(val
)) == 0 &&
2810 val
&& (val
& DISPATCH_QUEUE_ENQUEUED
) == 0) {
2811 panic("kevent: workloop %#016llx is not enqueued "
2812 "(kn:%p dq_state:%#016llx kev.dq_state:%#016llx)",
2813 kn
->kn_udata
, kn
, val
,
2814 kn
->kn_ext
[EV_EXTIDX_WL_VALUE
]);
2817 *kev
= kn
->kn_kevent
;
2818 kev
->fflags
= kn
->kn_sfflags
;
2819 kev
->data
= kn
->kn_sdata
;
2820 kev
->qos
= kn
->kn_qos
;
2823 filt_wlunlock(kqwl
);
2828 #pragma mark kevent / knotes
2831 * JMM - placeholder for not-yet-implemented filters
2834 filt_badattach(__unused
struct knote
*kn
, __unused
struct kevent_internal_s
*kev
)
2836 kn
->kn_flags
|= EV_ERROR
;
2837 kn
->kn_data
= ENOTSUP
;
2842 kqueue_alloc(struct proc
*p
, unsigned int flags
)
2844 struct filedesc
*fdp
= p
->p_fd
;
2845 struct kqueue
*kq
= NULL
;
2848 uint64_t kq_addr_offset
;
2850 if (flags
& KEVENT_FLAG_WORKQ
) {
2851 struct kqworkq
*kqwq
;
2854 kqwq
= (struct kqworkq
*)zalloc(kqworkq_zone
);
2858 kq
= &kqwq
->kqwq_kqueue
;
2859 bzero(kqwq
, sizeof (struct kqworkq
));
2861 kqwq
->kqwq_state
= KQ_WORKQ
;
2863 for (i
= 0; i
< KQWQ_NBUCKETS
; i
++) {
2864 TAILQ_INIT(&kq
->kq_queue
[i
]);
2866 for (i
= 0; i
< KQWQ_NQOS
; i
++) {
2867 kqwq
->kqwq_request
[i
].kqr_qos_index
= i
;
2870 lck_spin_init(&kqwq
->kqwq_reqlock
, kq_lck_grp
, kq_lck_attr
);
2871 policy
= SYNC_POLICY_FIFO
;
2872 hook
= (void *)kqwq
;
2874 } else if (flags
& KEVENT_FLAG_WORKLOOP
) {
2875 struct kqworkloop
*kqwl
;
2878 kqwl
= (struct kqworkloop
*)zalloc(kqworkloop_zone
);
2882 bzero(kqwl
, sizeof (struct kqworkloop
));
2884 kqwl
->kqwl_state
= KQ_WORKLOOP
| KQ_DYNAMIC
;
2885 kqwl
->kqwl_retains
= 1; /* donate a retain to creator */
2887 kq
= &kqwl
->kqwl_kqueue
;
2888 for (i
= 0; i
< KQWL_NBUCKETS
; i
++) {
2889 TAILQ_INIT(&kq
->kq_queue
[i
]);
2891 TAILQ_INIT(&kqwl
->kqwl_request
.kqr_suppressed
);
2893 lck_spin_init(&kqwl
->kqwl_reqlock
, kq_lck_grp
, kq_lck_attr
);
2894 lck_mtx_init(&kqwl
->kqwl_statelock
, kq_lck_grp
, kq_lck_attr
);
2896 policy
= SYNC_POLICY_FIFO
;
2897 if (flags
& KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD
) {
2898 policy
|= SYNC_POLICY_PREPOST
;
2899 kq
->kq_state
|= KQ_NO_WQ_THREAD
;
2901 hook
= (void *)kqwl
;
2907 kqf
= (struct kqfile
*)zalloc(kqfile_zone
);
2911 kq
= &kqf
->kqf_kqueue
;
2912 bzero(kqf
, sizeof (struct kqfile
));
2913 TAILQ_INIT(&kq
->kq_queue
[0]);
2914 TAILQ_INIT(&kqf
->kqf_suppressed
);
2916 policy
= SYNC_POLICY_FIFO
| SYNC_POLICY_PREPOST
;
2919 waitq_set_init(&kq
->kq_wqs
, policy
, NULL
, hook
);
2920 lck_spin_init(&kq
->kq_lock
, kq_lck_grp
, kq_lck_attr
);
2923 if (fdp
->fd_knlistsize
< 0) {
2925 if (fdp
->fd_knlistsize
< 0)
2926 fdp
->fd_knlistsize
= 0; /* this process has had a kq */
2930 kq_addr_offset
= ((uintptr_t)kq
- (uintptr_t)VM_MIN_KERNEL_AND_KEXT_ADDRESS
);
2931 /* Assert that the address can be pointer compacted for use with knote */
2932 assert(kq_addr_offset
< (uint64_t)(1ull << KNOTE_KQ_BITSIZE
));
2937 * knotes_dealloc - detach all knotes for the process and drop them
2939 * Called with proc_fdlock held.
2940 * Returns with it locked.
2941 * May drop it temporarily.
2942 * Process is in such a state that it will not try to allocate
2943 * any more knotes during this process (stopped for exit or exec).
2946 knotes_dealloc(proc_t p
)
2948 struct filedesc
*fdp
= p
->p_fd
;
2951 struct klist
*kn_hash
= NULL
;
2954 /* Close all the fd-indexed knotes up front */
2955 if (fdp
->fd_knlistsize
> 0) {
2956 for (i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
2957 while ((kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
])) != NULL
) {
2958 kq
= knote_get_kq(kn
);
2961 /* drop it ourselves or wait */
2962 if (kqlock2knotedrop(kq
, kn
)) {
2968 /* free the table */
2969 FREE(fdp
->fd_knlist
, M_KQUEUE
);
2970 fdp
->fd_knlist
= NULL
;
2972 fdp
->fd_knlistsize
= -1;
2977 /* Clean out all the hashed knotes as well */
2978 if (fdp
->fd_knhashmask
!= 0) {
2979 for (i
= 0; i
<= (int)fdp
->fd_knhashmask
; i
++) {
2980 while ((kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
])) != NULL
) {
2981 kq
= knote_get_kq(kn
);
2984 /* drop it ourselves or wait */
2985 if (kqlock2knotedrop(kq
, kn
)) {
2991 kn_hash
= fdp
->fd_knhash
;
2992 fdp
->fd_knhashmask
= 0;
2993 fdp
->fd_knhash
= NULL
;
2998 /* free the kn_hash table */
3000 FREE(kn_hash
, M_KQUEUE
);
3007 * kqueue_dealloc - detach all knotes from a kqueue and free it
3009 * We walk each list looking for knotes referencing this
3010 * this kqueue. If we find one, we try to drop it. But
3011 * if we fail to get a drop reference, that will wait
3012 * until it is dropped. So, we can just restart again
3013 * safe in the assumption that the list will eventually
3014 * not contain any more references to this kqueue (either
3015 * we dropped them all, or someone else did).
3017 * Assumes no new events are being added to the kqueue.
3018 * Nothing locked on entry or exit.
3020 * Workloop kqueues cant get here unless all the knotes
3021 * are already gone and all requested threads have come
3022 * and gone (cancelled or arrived).
3025 kqueue_dealloc(struct kqueue
*kq
)
3028 struct filedesc
*fdp
;
3039 for (i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
3040 kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
]);
3041 while (kn
!= NULL
) {
3042 if (kq
== knote_get_kq(kn
)) {
3043 assert((kq
->kq_state
& KQ_WORKLOOP
) == 0);
3046 /* drop it ourselves or wait */
3047 if (kqlock2knotedrop(kq
, kn
)) {
3051 /* start over at beginning of list */
3052 kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
]);
3055 kn
= SLIST_NEXT(kn
, kn_link
);
3061 if (fdp
->fd_knhashmask
!= 0) {
3062 for (i
= 0; i
< (int)fdp
->fd_knhashmask
+ 1; i
++) {
3063 kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
]);
3064 while (kn
!= NULL
) {
3065 if (kq
== knote_get_kq(kn
)) {
3066 assert((kq
->kq_state
& KQ_WORKLOOP
) == 0);
3069 /* drop it ourselves or wait */
3070 if (kqlock2knotedrop(kq
, kn
)) {
3074 /* start over at beginning of list */
3075 kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
]);
3078 kn
= SLIST_NEXT(kn
, kn_link
);
3084 if (kq
->kq_state
& KQ_WORKLOOP
) {
3085 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3086 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
3087 thread_t cur_owner
= kqwl
->kqwl_owner
;
3089 assert(TAILQ_EMPTY(&kqwl
->kqwl_request
.kqr_suppressed
));
3090 if (filt_wlowner_is_valid(cur_owner
)) {
3092 * If the kqueue had an owner that prevented the thread request to
3093 * go through, then no unbind happened, and we may have lingering
3094 * overrides to drop.
3096 if (kqr
->kqr_dsync_owner_qos
!= THREAD_QOS_UNSPECIFIED
) {
3097 thread_drop_ipc_override(cur_owner
);
3098 kqr
->kqr_dsync_owner_qos
= THREAD_QOS_UNSPECIFIED
;
3101 if (kqr
->kqr_owner_override_is_sync
) {
3102 thread_drop_sync_ipc_override(cur_owner
);
3103 kqr
->kqr_owner_override_is_sync
= 0;
3105 thread_ends_owning_workloop(cur_owner
);
3106 thread_deallocate(cur_owner
);
3107 kqwl
->kqwl_owner
= THREAD_NULL
;
3112 * waitq_set_deinit() remove the KQ's waitq set from
3113 * any select sets to which it may belong.
3115 waitq_set_deinit(&kq
->kq_wqs
);
3116 lck_spin_destroy(&kq
->kq_lock
, kq_lck_grp
);
3118 if (kq
->kq_state
& KQ_WORKQ
) {
3119 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
3121 lck_spin_destroy(&kqwq
->kqwq_reqlock
, kq_lck_grp
);
3122 zfree(kqworkq_zone
, kqwq
);
3123 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
3124 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3126 assert(kqwl
->kqwl_retains
== 0);
3127 lck_spin_destroy(&kqwl
->kqwl_reqlock
, kq_lck_grp
);
3128 lck_mtx_destroy(&kqwl
->kqwl_statelock
, kq_lck_grp
);
3129 zfree(kqworkloop_zone
, kqwl
);
3131 struct kqfile
*kqf
= (struct kqfile
*)kq
;
3133 zfree(kqfile_zone
, kqf
);
3138 kqueue_retain(struct kqueue
*kq
)
3140 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3143 if ((kq
->kq_state
& KQ_DYNAMIC
) == 0)
3146 previous
= OSIncrementAtomic(&kqwl
->kqwl_retains
);
3147 if (previous
== KQ_WORKLOOP_RETAINS_MAX
)
3148 panic("kq(%p) retain overflow", kq
);
3151 panic("kq(%p) resurrection", kq
);
3154 #define KQUEUE_CANT_BE_LAST_REF 0
3155 #define KQUEUE_MIGHT_BE_LAST_REF 1
3158 kqueue_release(struct kqueue
*kq
, __assert_only
int possibly_last
)
3160 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3162 if ((kq
->kq_state
& KQ_DYNAMIC
) == 0) {
3166 assert(kq
->kq_state
& KQ_WORKLOOP
); /* for now */
3167 uint32_t refs
= OSDecrementAtomic(&kqwl
->kqwl_retains
);
3168 if (__improbable(refs
== 0)) {
3169 panic("kq(%p) over-release", kq
);
3172 assert(possibly_last
);
3178 kqueue_body(struct proc
*p
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
)
3181 struct fileproc
*fp
;
3184 error
= falloc_withalloc(p
,
3185 &fp
, &fd
, vfs_context_current(), fp_zalloc
, cra
);
3190 kq
= kqueue_alloc(p
, 0);
3196 fp
->f_flag
= FREAD
| FWRITE
;
3197 fp
->f_ops
= &kqueueops
;
3201 *fdflags(p
, fd
) |= UF_EXCLOSE
;
3202 procfdtbl_releasefd(p
, fd
, NULL
);
3203 fp_drop(p
, fd
, fp
, 1);
3211 kqueue(struct proc
*p
, __unused
struct kqueue_args
*uap
, int32_t *retval
)
3213 return (kqueue_body(p
, fileproc_alloc_init
, NULL
, retval
));
3217 kevent_copyin(user_addr_t
*addrp
, struct kevent_internal_s
*kevp
, struct proc
*p
,
3223 if (flags
& KEVENT_FLAG_LEGACY32
) {
3224 bzero(kevp
, sizeof (*kevp
));
3226 if (IS_64BIT_PROCESS(p
)) {
3227 struct user64_kevent kev64
;
3229 advance
= sizeof (kev64
);
3230 error
= copyin(*addrp
, (caddr_t
)&kev64
, advance
);
3233 kevp
->ident
= kev64
.ident
;
3234 kevp
->filter
= kev64
.filter
;
3235 kevp
->flags
= kev64
.flags
;
3236 kevp
->udata
= kev64
.udata
;
3237 kevp
->fflags
= kev64
.fflags
;
3238 kevp
->data
= kev64
.data
;
3240 struct user32_kevent kev32
;
3242 advance
= sizeof (kev32
);
3243 error
= copyin(*addrp
, (caddr_t
)&kev32
, advance
);
3246 kevp
->ident
= (uintptr_t)kev32
.ident
;
3247 kevp
->filter
= kev32
.filter
;
3248 kevp
->flags
= kev32
.flags
;
3249 kevp
->udata
= CAST_USER_ADDR_T(kev32
.udata
);
3250 kevp
->fflags
= kev32
.fflags
;
3251 kevp
->data
= (intptr_t)kev32
.data
;
3253 } else if (flags
& KEVENT_FLAG_LEGACY64
) {
3254 struct kevent64_s kev64
;
3256 bzero(kevp
, sizeof (*kevp
));
3258 advance
= sizeof (struct kevent64_s
);
3259 error
= copyin(*addrp
, (caddr_t
)&kev64
, advance
);
3262 kevp
->ident
= kev64
.ident
;
3263 kevp
->filter
= kev64
.filter
;
3264 kevp
->flags
= kev64
.flags
;
3265 kevp
->udata
= kev64
.udata
;
3266 kevp
->fflags
= kev64
.fflags
;
3267 kevp
->data
= kev64
.data
;
3268 kevp
->ext
[0] = kev64
.ext
[0];
3269 kevp
->ext
[1] = kev64
.ext
[1];
3272 struct kevent_qos_s kevqos
;
3274 bzero(kevp
, sizeof (*kevp
));
3276 advance
= sizeof (struct kevent_qos_s
);
3277 error
= copyin(*addrp
, (caddr_t
)&kevqos
, advance
);
3280 kevp
->ident
= kevqos
.ident
;
3281 kevp
->filter
= kevqos
.filter
;
3282 kevp
->flags
= kevqos
.flags
;
3283 kevp
->qos
= kevqos
.qos
;
3284 // kevp->xflags = kevqos.xflags;
3285 kevp
->udata
= kevqos
.udata
;
3286 kevp
->fflags
= kevqos
.fflags
;
3287 kevp
->data
= kevqos
.data
;
3288 kevp
->ext
[0] = kevqos
.ext
[0];
3289 kevp
->ext
[1] = kevqos
.ext
[1];
3290 kevp
->ext
[2] = kevqos
.ext
[2];
3291 kevp
->ext
[3] = kevqos
.ext
[3];
3299 kevent_copyout(struct kevent_internal_s
*kevp
, user_addr_t
*addrp
, struct proc
*p
,
3302 user_addr_t addr
= *addrp
;
3307 * fully initialize the differnt output event structure
3308 * types from the internal kevent (and some universal
3309 * defaults for fields not represented in the internal
3312 if (flags
& KEVENT_FLAG_LEGACY32
) {
3313 assert((flags
& KEVENT_FLAG_STACK_EVENTS
) == 0);
3315 if (IS_64BIT_PROCESS(p
)) {
3316 struct user64_kevent kev64
;
3318 advance
= sizeof (kev64
);
3319 bzero(&kev64
, advance
);
3322 * deal with the special case of a user-supplied
3323 * value of (uintptr_t)-1.
3325 kev64
.ident
= (kevp
->ident
== (uintptr_t)-1) ?
3326 (uint64_t)-1LL : (uint64_t)kevp
->ident
;
3328 kev64
.filter
= kevp
->filter
;
3329 kev64
.flags
= kevp
->flags
;
3330 kev64
.fflags
= kevp
->fflags
;
3331 kev64
.data
= (int64_t) kevp
->data
;
3332 kev64
.udata
= kevp
->udata
;
3333 error
= copyout((caddr_t
)&kev64
, addr
, advance
);
3335 struct user32_kevent kev32
;
3337 advance
= sizeof (kev32
);
3338 bzero(&kev32
, advance
);
3339 kev32
.ident
= (uint32_t)kevp
->ident
;
3340 kev32
.filter
= kevp
->filter
;
3341 kev32
.flags
= kevp
->flags
;
3342 kev32
.fflags
= kevp
->fflags
;
3343 kev32
.data
= (int32_t)kevp
->data
;
3344 kev32
.udata
= kevp
->udata
;
3345 error
= copyout((caddr_t
)&kev32
, addr
, advance
);
3347 } else if (flags
& KEVENT_FLAG_LEGACY64
) {
3348 struct kevent64_s kev64
;
3350 advance
= sizeof (struct kevent64_s
);
3351 if (flags
& KEVENT_FLAG_STACK_EVENTS
) {
3354 bzero(&kev64
, advance
);
3355 kev64
.ident
= kevp
->ident
;
3356 kev64
.filter
= kevp
->filter
;
3357 kev64
.flags
= kevp
->flags
;
3358 kev64
.fflags
= kevp
->fflags
;
3359 kev64
.data
= (int64_t) kevp
->data
;
3360 kev64
.udata
= kevp
->udata
;
3361 kev64
.ext
[0] = kevp
->ext
[0];
3362 kev64
.ext
[1] = kevp
->ext
[1];
3363 error
= copyout((caddr_t
)&kev64
, addr
, advance
);
3365 struct kevent_qos_s kevqos
;
3367 advance
= sizeof (struct kevent_qos_s
);
3368 if (flags
& KEVENT_FLAG_STACK_EVENTS
) {
3371 bzero(&kevqos
, advance
);
3372 kevqos
.ident
= kevp
->ident
;
3373 kevqos
.filter
= kevp
->filter
;
3374 kevqos
.flags
= kevp
->flags
;
3375 kevqos
.qos
= kevp
->qos
;
3376 kevqos
.udata
= kevp
->udata
;
3377 kevqos
.fflags
= kevp
->fflags
;
3379 kevqos
.data
= (int64_t) kevp
->data
;
3380 kevqos
.ext
[0] = kevp
->ext
[0];
3381 kevqos
.ext
[1] = kevp
->ext
[1];
3382 kevqos
.ext
[2] = kevp
->ext
[2];
3383 kevqos
.ext
[3] = kevp
->ext
[3];
3384 error
= copyout((caddr_t
)&kevqos
, addr
, advance
);
3387 if (flags
& KEVENT_FLAG_STACK_EVENTS
)
3390 *addrp
= addr
+ advance
;
3396 kevent_get_data_size(struct proc
*p
,
3397 uint64_t data_available
,
3399 user_size_t
*residp
)
3404 if (data_available
!= USER_ADDR_NULL
) {
3405 if (flags
& KEVENT_FLAG_KERNEL
) {
3406 resid
= *(user_size_t
*)(uintptr_t)data_available
;
3407 } else if (IS_64BIT_PROCESS(p
)) {
3408 user64_size_t usize
;
3409 error
= copyin((user_addr_t
)data_available
, &usize
, sizeof(usize
));
3410 resid
= (user_size_t
)usize
;
3412 user32_size_t usize
;
3413 error
= copyin((user_addr_t
)data_available
, &usize
, sizeof(usize
));
3414 resid
= (user_size_t
)usize
;
3426 kevent_put_data_size(struct proc
*p
,
3427 uint64_t data_available
,
3433 if (data_available
) {
3434 if (flags
& KEVENT_FLAG_KERNEL
) {
3435 *(user_size_t
*)(uintptr_t)data_available
= resid
;
3436 } else if (IS_64BIT_PROCESS(p
)) {
3437 user64_size_t usize
= (user64_size_t
)resid
;
3438 error
= copyout(&usize
, (user_addr_t
)data_available
, sizeof(usize
));
3440 user32_size_t usize
= (user32_size_t
)resid
;
3441 error
= copyout(&usize
, (user_addr_t
)data_available
, sizeof(usize
));
3448 * kevent_continue - continue a kevent syscall after blocking
3450 * assume we inherit a use count on the kq fileglob.
3453 __attribute__((noreturn
))
3455 kevent_continue(__unused
struct kqueue
*kq
, void *data
, int error
)
3457 struct _kevent
*cont_args
;
3458 struct fileproc
*fp
;
3459 uint64_t data_available
;
3460 user_size_t data_size
;
3461 user_size_t data_resid
;
3466 struct proc
*p
= current_proc();
3468 cont_args
= (struct _kevent
*)data
;
3469 data_available
= cont_args
->data_available
;
3470 flags
= cont_args
->process_data
.fp_flags
;
3471 data_size
= cont_args
->process_data
.fp_data_size
;
3472 data_resid
= cont_args
->process_data
.fp_data_resid
;
3473 noutputs
= cont_args
->eventout
;
3474 retval
= cont_args
->retval
;
3478 kevent_put_kq(p
, fd
, fp
, kq
);
3480 /* don't abandon other output just because of residual copyout failures */
3481 if (error
== 0 && data_available
&& data_resid
!= data_size
) {
3482 (void)kevent_put_data_size(p
, data_available
, flags
, data_resid
);
3485 /* don't restart after signals... */
3486 if (error
== ERESTART
)
3488 else if (error
== EWOULDBLOCK
)
3492 unix_syscall_return(error
);
3496 * kevent - [syscall] register and wait for kernel events
3500 kevent(struct proc
*p
, struct kevent_args
*uap
, int32_t *retval
)
3502 unsigned int flags
= KEVENT_FLAG_LEGACY32
;
3504 return kevent_internal(p
,
3505 (kqueue_id_t
)uap
->fd
, NULL
,
3506 uap
->changelist
, uap
->nchanges
,
3507 uap
->eventlist
, uap
->nevents
,
3516 kevent64(struct proc
*p
, struct kevent64_args
*uap
, int32_t *retval
)
3520 /* restrict to user flags and set legacy64 */
3521 flags
= uap
->flags
& KEVENT_FLAG_USER
;
3522 flags
|= KEVENT_FLAG_LEGACY64
;
3524 return kevent_internal(p
,
3525 (kqueue_id_t
)uap
->fd
, NULL
,
3526 uap
->changelist
, uap
->nchanges
,
3527 uap
->eventlist
, uap
->nevents
,
3536 kevent_qos(struct proc
*p
, struct kevent_qos_args
*uap
, int32_t *retval
)
3538 /* restrict to user flags */
3539 uap
->flags
&= KEVENT_FLAG_USER
;
3541 return kevent_internal(p
,
3542 (kqueue_id_t
)uap
->fd
, NULL
,
3543 uap
->changelist
, uap
->nchanges
,
3544 uap
->eventlist
, uap
->nevents
,
3545 uap
->data_out
, (uint64_t)uap
->data_available
,
3553 kevent_qos_internal(struct proc
*p
, int fd
,
3554 user_addr_t changelist
, int nchanges
,
3555 user_addr_t eventlist
, int nevents
,
3556 user_addr_t data_out
, user_size_t
*data_available
,
3560 return kevent_internal(p
,
3561 (kqueue_id_t
)fd
, NULL
,
3562 changelist
, nchanges
,
3564 data_out
, (uint64_t)data_available
,
3565 (flags
| KEVENT_FLAG_KERNEL
),
3572 kevent_id(struct proc
*p
, struct kevent_id_args
*uap
, int32_t *retval
)
3574 /* restrict to user flags */
3575 uap
->flags
&= KEVENT_FLAG_USER
;
3577 return kevent_internal(p
,
3578 (kqueue_id_t
)uap
->id
, NULL
,
3579 uap
->changelist
, uap
->nchanges
,
3580 uap
->eventlist
, uap
->nevents
,
3581 uap
->data_out
, (uint64_t)uap
->data_available
,
3582 (uap
->flags
| KEVENT_FLAG_DYNAMIC_KQUEUE
),
3589 kevent_id_internal(struct proc
*p
, kqueue_id_t
*id
,
3590 user_addr_t changelist
, int nchanges
,
3591 user_addr_t eventlist
, int nevents
,
3592 user_addr_t data_out
, user_size_t
*data_available
,
3596 return kevent_internal(p
,
3598 changelist
, nchanges
,
3600 data_out
, (uint64_t)data_available
,
3601 (flags
| KEVENT_FLAG_KERNEL
| KEVENT_FLAG_DYNAMIC_KQUEUE
),
3608 kevent_get_timeout(struct proc
*p
,
3609 user_addr_t utimeout
,
3611 struct timeval
*atvp
)
3616 if (flags
& KEVENT_FLAG_IMMEDIATE
) {
3617 getmicrouptime(&atv
);
3618 } else if (utimeout
!= USER_ADDR_NULL
) {
3620 if (flags
& KEVENT_FLAG_KERNEL
) {
3621 struct timespec
*tsp
= (struct timespec
*)utimeout
;
3622 TIMESPEC_TO_TIMEVAL(&rtv
, tsp
);
3623 } else if (IS_64BIT_PROCESS(p
)) {
3624 struct user64_timespec ts
;
3625 error
= copyin(utimeout
, &ts
, sizeof(ts
));
3626 if ((ts
.tv_sec
& 0xFFFFFFFF00000000ull
) != 0)
3629 TIMESPEC_TO_TIMEVAL(&rtv
, &ts
);
3631 struct user32_timespec ts
;
3632 error
= copyin(utimeout
, &ts
, sizeof(ts
));
3633 TIMESPEC_TO_TIMEVAL(&rtv
, &ts
);
3637 if (itimerfix(&rtv
))
3639 getmicrouptime(&atv
);
3640 timevaladd(&atv
, &rtv
);
3642 /* wait forever value */
3651 kevent_set_kq_mode(struct kqueue
*kq
, unsigned int flags
)
3653 /* each kq should only be used for events of one type */
3655 if (kq
->kq_state
& (KQ_KEV32
| KQ_KEV64
| KQ_KEV_QOS
)) {
3656 if (flags
& KEVENT_FLAG_LEGACY32
) {
3657 if ((kq
->kq_state
& KQ_KEV32
) == 0) {
3661 } else if (kq
->kq_state
& KQ_KEV32
) {
3665 } else if (flags
& KEVENT_FLAG_LEGACY32
) {
3666 kq
->kq_state
|= KQ_KEV32
;
3667 } else if (flags
& KEVENT_FLAG_LEGACY64
) {
3668 kq
->kq_state
|= KQ_KEV64
;
3670 kq
->kq_state
|= KQ_KEV_QOS
;
3676 #define KQ_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
3677 #define CONFIG_KQ_HASHSIZE CONFIG_KN_HASHSIZE
3680 kqhash_lock(proc_t p
)
3682 lck_mtx_lock_spin_always(&p
->p_fd
->fd_kqhashlock
);
3686 kqhash_lock_held(__assert_only proc_t p
)
3688 LCK_MTX_ASSERT(&p
->p_fd
->fd_kqhashlock
, LCK_MTX_ASSERT_OWNED
);
3692 kqhash_unlock(proc_t p
)
3694 lck_mtx_unlock(&p
->p_fd
->fd_kqhashlock
);
3698 kqueue_hash_init_if_needed(proc_t p
)
3700 struct filedesc
*fdp
= p
->p_fd
;
3702 kqhash_lock_held(p
);
3704 if (__improbable(fdp
->fd_kqhash
== NULL
)) {
3705 struct kqlist
*alloc_hash
;
3709 alloc_hash
= hashinit(CONFIG_KQ_HASHSIZE
, M_KQUEUE
, &alloc_mask
);
3712 /* See if we won the race */
3713 if (fdp
->fd_kqhashmask
== 0) {
3714 fdp
->fd_kqhash
= alloc_hash
;
3715 fdp
->fd_kqhashmask
= alloc_mask
;
3718 FREE(alloc_hash
, M_KQUEUE
);
3725 * Called with the kqhash_lock() held
3733 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3734 struct filedesc
*fdp
= p
->p_fd
;
3735 struct kqlist
*list
;
3737 /* should hold the kq hash lock */
3738 kqhash_lock_held(p
);
3740 if ((kq
->kq_state
& KQ_DYNAMIC
) == 0) {
3741 assert(kq
->kq_state
& KQ_DYNAMIC
);
3745 /* only dynamically allocate workloop kqs for now */
3746 assert(kq
->kq_state
& KQ_WORKLOOP
);
3747 assert(fdp
->fd_kqhash
);
3749 kqwl
->kqwl_dynamicid
= id
;
3751 list
= &fdp
->fd_kqhash
[KQ_HASH(id
, fdp
->fd_kqhashmask
)];
3752 SLIST_INSERT_HEAD(list
, kqwl
, kqwl_hashlink
);
3755 /* Called with kqhash_lock held */
3761 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3762 struct filedesc
*fdp
= p
->p_fd
;
3763 struct kqlist
*list
;
3765 /* should hold the kq hash lock */
3766 kqhash_lock_held(p
);
3768 if ((kq
->kq_state
& KQ_DYNAMIC
) == 0) {
3769 assert(kq
->kq_state
& KQ_DYNAMIC
);
3772 assert(kq
->kq_state
& KQ_WORKLOOP
); /* for now */
3773 list
= &fdp
->fd_kqhash
[KQ_HASH(kqwl
->kqwl_dynamicid
, fdp
->fd_kqhashmask
)];
3774 SLIST_REMOVE(list
, kqwl
, kqworkloop
, kqwl_hashlink
);
3777 /* Called with kqhash_lock held */
3778 static struct kqueue
*
3779 kqueue_hash_lookup(struct proc
*p
, kqueue_id_t id
)
3781 struct filedesc
*fdp
= p
->p_fd
;
3782 struct kqlist
*list
;
3783 struct kqworkloop
*kqwl
;
3785 /* should hold the kq hash lock */
3786 kqhash_lock_held(p
);
3788 if (fdp
->fd_kqhashmask
== 0) return NULL
;
3790 list
= &fdp
->fd_kqhash
[KQ_HASH(id
, fdp
->fd_kqhashmask
)];
3791 SLIST_FOREACH(kqwl
, list
, kqwl_hashlink
) {
3792 if (kqwl
->kqwl_dynamicid
== id
) {
3793 struct kqueue
*kq
= (struct kqueue
*)kqwl
;
3795 assert(kq
->kq_state
& KQ_DYNAMIC
);
3796 assert(kq
->kq_state
& KQ_WORKLOOP
); /* for now */
3804 kqueue_release_last(struct proc
*p
, struct kqueue
*kq
)
3806 if (kq
->kq_state
& KQ_DYNAMIC
) {
3808 if (kqueue_release(kq
, KQUEUE_MIGHT_BE_LAST_REF
)) {
3809 kqueue_hash_remove(p
, kq
);
3818 static struct kqueue
*
3819 kevent_get_bound_kq(__assert_only
struct proc
*p
, thread_t thread
,
3820 unsigned int kev_flags
, unsigned int kq_flags
)
3823 struct uthread
*ut
= get_bsdthread_info(thread
);
3825 assert(p
== get_bsdthreadtask_info(thread
));
3827 if (!(ut
->uu_kqueue_flags
& kev_flags
))
3830 kq
= ut
->uu_kqueue_bound
;
3834 if (!(kq
->kq_state
& kq_flags
))
3841 kevent_get_kq(struct proc
*p
, kqueue_id_t id
, unsigned int flags
, struct fileproc
**fpp
, int *fdp
, struct kqueue
**kqp
)
3843 struct filedesc
*descp
= p
->p_fd
;
3844 struct fileproc
*fp
= NULL
;
3849 /* Was the workloop flag passed? Then it is for sure only a workloop */
3850 if (flags
& KEVENT_FLAG_DYNAMIC_KQUEUE
) {
3851 assert(flags
& KEVENT_FLAG_WORKLOOP
);
3852 if (id
== (kqueue_id_t
)-1 &&
3853 (flags
& KEVENT_FLAG_KERNEL
) &&
3854 (flags
& KEVENT_FLAG_WORKLOOP
)) {
3856 assert(is_workqueue_thread(current_thread()));
3859 * when kevent_id_internal is called from within the
3860 * kernel, and the passed 'id' value is '-1' then we
3861 * look for the currently bound workloop kq.
3863 * Until pthread kext avoids calling in to kevent_id_internal
3864 * for threads whose fulfill is canceled, calling in unbound
3867 kq
= kevent_get_bound_kq(p
, current_thread(),
3868 KEVENT_FLAG_WORKLOOP
, KQ_WORKLOOP
);
3872 struct uthread
*ut
= get_bsdthread_info(current_thread());
3874 /* If thread is unbound due to cancel, just return an error */
3875 if (ut
->uu_kqueue_flags
== KEVENT_FLAG_WORKLOOP_CANCELED
) {
3876 ut
->uu_kqueue_flags
= 0;
3879 panic("Unbound thread called kevent_internal with id=-1"
3880 " uu_kqueue_flags:0x%x, uu_kqueue_bound:%p",
3881 ut
->uu_kqueue_flags
, ut
->uu_kqueue_bound
);
3891 /* try shortcut on kq lookup for bound threads */
3892 kq
= kevent_get_bound_kq(p
, current_thread(), KEVENT_FLAG_WORKLOOP
, KQ_WORKLOOP
);
3893 if (kq
!= NULL
&& ((struct kqworkloop
*)kq
)->kqwl_dynamicid
== id
) {
3895 if (flags
& KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST
) {
3901 /* retain a reference while working with this kq. */
3902 assert(kq
->kq_state
& KQ_DYNAMIC
);
3908 /* look for the kq on the hash table */
3910 kq
= kqueue_hash_lookup(p
, id
);
3914 if (flags
& KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST
) {
3919 struct kqueue
*alloc_kq
;
3920 alloc_kq
= kqueue_alloc(p
, flags
);
3923 kqueue_hash_init_if_needed(p
);
3924 kq
= kqueue_hash_lookup(p
, id
);
3926 /* insert our new one */
3928 kqueue_hash_insert(p
, id
, kq
);
3931 /* lost race, retain existing workloop */
3934 kqueue_release(alloc_kq
, KQUEUE_MIGHT_BE_LAST_REF
);
3935 kqueue_dealloc(alloc_kq
);
3943 if (flags
& KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST
) {
3950 /* retain a reference while working with this kq. */
3951 assert(kq
->kq_state
& KQ_DYNAMIC
);
3956 } else if (flags
& KEVENT_FLAG_WORKQ
) {
3957 /* must already exist for bound threads. */
3958 if (flags
& KEVENT_FLAG_KERNEL
) {
3959 assert(descp
->fd_wqkqueue
!= NULL
);
3963 * use the private kq associated with the proc workq.
3964 * Just being a thread within the process (and not
3965 * being the exit/exec thread) is enough to hold a
3966 * reference on this special kq.
3968 kq
= descp
->fd_wqkqueue
;
3970 struct kqueue
*alloc_kq
= kqueue_alloc(p
, KEVENT_FLAG_WORKQ
);
3971 if (alloc_kq
== NULL
)
3975 if (descp
->fd_wqkqueue
== NULL
) {
3976 kq
= descp
->fd_wqkqueue
= alloc_kq
;
3980 kq
= descp
->fd_wqkqueue
;
3981 kqueue_dealloc(alloc_kq
);
3985 /* get a usecount for the kq itself */
3987 if ((error
= fp_getfkq(p
, fd
, &fp
, &kq
)) != 0)
3990 if ((error
= kevent_set_kq_mode(kq
, flags
)) != 0) {
3991 /* drop the usecount */
3993 fp_drop(p
, fd
, fp
, 0);
4009 struct fileproc
*fp
,
4012 kqueue_release_last(p
, kq
);
4014 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
4015 fp_drop(p
, (int)id
, fp
, 0);
4020 kevent_workloop_serial_no_copyin(proc_t p
, uint64_t workloop_id
)
4022 uint64_t serial_no
= 0;
4026 if (workloop_id
== 0 || p
->p_dispatchqueue_serialno_offset
== 0) {
4029 addr
= (user_addr_t
)(workloop_id
+ p
->p_dispatchqueue_serialno_offset
);
4031 if (proc_is64bit(p
)) {
4032 rc
= copyin(addr
, (caddr_t
)&serial_no
, sizeof(serial_no
));
4034 uint32_t serial_no32
= 0;
4035 rc
= copyin(addr
, (caddr_t
)&serial_no32
, sizeof(serial_no32
));
4036 serial_no
= serial_no32
;
4038 return rc
== 0 ? serial_no
: 0;
4042 kevent_exit_on_workloop_ownership_leak(thread_t thread
)
4044 proc_t p
= current_proc();
4045 struct filedesc
*fdp
= p
->p_fd
;
4046 kqueue_id_t workloop_id
= 0;
4048 mach_vm_address_t addr
;
4049 uint32_t reason_size
;
4052 if (fdp
->fd_kqhashmask
> 0) {
4053 for (uint32_t i
= 0; i
< fdp
->fd_kqhashmask
+ 1; i
++) {
4054 struct kqworkloop
*kqwl
;
4056 SLIST_FOREACH(kqwl
, &fdp
->fd_kqhash
[i
], kqwl_hashlink
) {
4057 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
4058 if ((kq
->kq_state
& KQ_DYNAMIC
) && kqwl
->kqwl_owner
== thread
) {
4059 workloop_id
= kqwl
->kqwl_dynamicid
;
4066 assert(workloop_id
);
4068 reason
= os_reason_create(OS_REASON_LIBSYSTEM
,
4069 OS_REASON_LIBSYSTEM_CODE_WORKLOOP_OWNERSHIP_LEAK
);
4070 if (reason
== OS_REASON_NULL
) {
4074 reason
->osr_flags
|= OS_REASON_FLAG_GENERATE_CRASH_REPORT
;
4075 reason_size
= 2 * sizeof(uint64_t);
4076 reason_size
= kcdata_estimate_required_buffer_size(2, reason_size
);
4077 if (os_reason_alloc_buffer(reason
, reason_size
) != 0) {
4081 struct kcdata_descriptor
*kcd
= &reason
->osr_kcd_descriptor
;
4083 if (kcdata_get_memory_addr(kcd
, EXIT_REASON_WORKLOOP_ID
,
4084 sizeof(workloop_id
), &addr
) == KERN_SUCCESS
) {
4085 kcdata_memcpy(kcd
, addr
, &workloop_id
, sizeof(workloop_id
));
4088 uint64_t serial_no
= kevent_workloop_serial_no_copyin(p
, workloop_id
);
4089 if (serial_no
&& kcdata_get_memory_addr(kcd
, EXIT_REASON_DISPATCH_QUEUE_NO
,
4090 sizeof(serial_no
), &addr
) == KERN_SUCCESS
) {
4091 kcdata_memcpy(kcd
, addr
, &serial_no
, sizeof(serial_no
));
4095 #if DEVELOPMENT || DEBUG
4096 psignal_try_thread_with_reason(p
, thread
, SIGABRT
, reason
);
4099 return exit_with_reason(p
, W_EXITCODE(0, SIGKILL
), (int *)NULL
,
4100 FALSE
, FALSE
, 0, reason
);
4106 kevent_servicer_detach_preflight(thread_t thread
, unsigned int flags
, struct kqueue
*kq
)
4109 struct kqworkloop
*kqwl
;
4111 struct kqrequest
*kqr
;
4113 if (!(flags
& KEVENT_FLAG_WORKLOOP
) || !(kq
->kq_state
& KQ_WORKLOOP
))
4116 /* only kq created with KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD from userspace can have attached threads */
4117 if (!(kq
->kq_state
& KQ_NO_WQ_THREAD
))
4120 /* allow detach only on not wq threads */
4121 if (is_workqueue_thread(thread
))
4124 /* check that the current thread is bound to the requested wq */
4125 ut
= get_bsdthread_info(thread
);
4126 if (ut
->uu_kqueue_bound
!= kq
)
4129 kqwl
= (struct kqworkloop
*)kq
;
4130 kqwl_req_lock(kqwl
);
4131 kqr
= &kqwl
->kqwl_request
;
4133 /* check that the wq is bound to the thread */
4134 if ((kqr
->kqr_state
& KQR_BOUND
) == 0 || (kqr
->kqr_thread
!= thread
))
4137 kqwl_req_unlock(kqwl
);
4143 kevent_servicer_detach_thread(struct proc
*p
, kqueue_id_t id
, thread_t thread
,
4144 unsigned int flags
, struct kqueue
*kq
)
4146 struct kqworkloop
*kqwl
;
4149 assert((flags
& KEVENT_FLAG_WORKLOOP
) && (kq
->kq_state
& KQ_WORKLOOP
));
4151 /* allow detach only on not wqthreads threads */
4152 assert(!is_workqueue_thread(thread
));
4154 /* only kq created with KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD from userspace can have attached threads */
4155 assert(kq
->kq_state
& KQ_NO_WQ_THREAD
);
4157 /* check that the current thread is bound to the requested kq */
4158 ut
= get_bsdthread_info(thread
);
4159 assert(ut
->uu_kqueue_bound
== kq
);
4161 kqwl
= (struct kqworkloop
*)kq
;
4165 /* unbind the thread.
4166 * unbind itself checks if still processing and ends it.
4168 kqworkloop_unbind_thread(kqwl
, thread
, flags
);
4172 kevent_put_kq(p
, id
, NULL
, kq
);
4178 kevent_servicer_attach_thread(thread_t thread
, unsigned int flags
, struct kqueue
*kq
)
4181 struct kqworkloop
*kqwl
;
4183 struct kqrequest
*kqr
;
4185 if (!(flags
& KEVENT_FLAG_WORKLOOP
) || !(kq
->kq_state
& KQ_WORKLOOP
))
4188 /* only kq created with KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD from userspace can have attached threads*/
4189 if (!(kq
->kq_state
& KQ_NO_WQ_THREAD
))
4192 /* allow attach only on not wqthreads */
4193 if (is_workqueue_thread(thread
))
4196 /* check that the thread is not already bound */
4197 ut
= get_bsdthread_info(thread
);
4198 if (ut
->uu_kqueue_bound
!= NULL
)
4201 assert(ut
->uu_kqueue_flags
== 0);
4204 kqwl
= (struct kqworkloop
*)kq
;
4205 kqwl_req_lock(kqwl
);
4206 kqr
= &kqwl
->kqwl_request
;
4208 /* check that the kqueue is not already bound */
4209 if (kqr
->kqr_state
& (KQR_BOUND
| KQR_THREQUESTED
| KQR_DRAIN
)) {
4214 assert(kqr
->kqr_thread
== NULL
);
4215 assert((kqr
->kqr_state
& KQR_PROCESSING
) == 0);
4217 kqr
->kqr_state
|= KQR_THREQUESTED
;
4218 kqr
->kqr_qos_index
= THREAD_QOS_UNSPECIFIED
;
4219 kqr
->kqr_override_index
= THREAD_QOS_UNSPECIFIED
;
4220 kqr
->kqr_dsync_owner_qos
= THREAD_QOS_UNSPECIFIED
;
4221 kqr
->kqr_owner_override_is_sync
= 0;
4223 kqworkloop_bind_thread_impl(kqwl
, thread
, KEVENT_FLAG_WORKLOOP
);
4225 /* get a ref on the wlkq on behalf of the attached thread */
4229 kqwl_req_unlock(kqwl
);
4236 boolean_t
kevent_args_requesting_events(unsigned int flags
, int nevents
)
4238 return (!(flags
& KEVENT_FLAG_ERROR_EVENTS
) && nevents
> 0);
4242 kevent_internal(struct proc
*p
,
4243 kqueue_id_t id
, kqueue_id_t
*id_out
,
4244 user_addr_t changelist
, int nchanges
,
4245 user_addr_t ueventlist
, int nevents
,
4246 user_addr_t data_out
, uint64_t data_available
,
4248 user_addr_t utimeout
,
4249 kqueue_continue_t continuation
,
4252 struct _kevent
*cont_args
;
4255 struct fileproc
*fp
= NULL
;
4257 struct kevent_internal_s kev
;
4258 int error
, noutputs
;
4260 user_size_t data_size
;
4261 user_size_t data_resid
;
4262 thread_t thread
= current_thread();
4264 /* Don't allow user-space threads to process output events from the workq kqs */
4265 if (((flags
& (KEVENT_FLAG_WORKQ
| KEVENT_FLAG_KERNEL
)) == KEVENT_FLAG_WORKQ
) &&
4266 kevent_args_requesting_events(flags
, nevents
))
4269 /* restrict dynamic kqueue allocation to workloops (for now) */
4270 if ((flags
& (KEVENT_FLAG_DYNAMIC_KQUEUE
| KEVENT_FLAG_WORKLOOP
)) == KEVENT_FLAG_DYNAMIC_KQUEUE
)
4273 if (flags
& (KEVENT_FLAG_WORKLOOP_SERVICER_ATTACH
| KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
|
4274 KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST
| KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST
| KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD
)) {
4276 /* allowed only on workloops when calling kevent_id from user-space */
4277 if (!(flags
& KEVENT_FLAG_WORKLOOP
) || (flags
& KEVENT_FLAG_KERNEL
) || !(flags
& KEVENT_FLAG_DYNAMIC_KQUEUE
))
4280 /* cannot attach and detach simultaneously*/
4281 if ((flags
& KEVENT_FLAG_WORKLOOP_SERVICER_ATTACH
) && (flags
& KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
))
4284 /* cannot ask for events and detach */
4285 if ((flags
& KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
) && kevent_args_requesting_events(flags
, nevents
))
4290 /* prepare to deal with stack-wise allocation of out events */
4291 if (flags
& KEVENT_FLAG_STACK_EVENTS
) {
4292 int scale
= ((flags
& KEVENT_FLAG_LEGACY32
) ?
4293 (IS_64BIT_PROCESS(p
) ? sizeof(struct user64_kevent
) :
4294 sizeof(struct user32_kevent
)) :
4295 ((flags
& KEVENT_FLAG_LEGACY64
) ? sizeof(struct kevent64_s
) :
4296 sizeof(struct kevent_qos_s
)));
4297 ueventlist
+= nevents
* scale
;
4300 /* convert timeout to absolute - if we have one (and not immediate) */
4301 error
= kevent_get_timeout(p
, utimeout
, flags
, &atv
);
4305 /* copyin initial value of data residual from data_available */
4306 error
= kevent_get_data_size(p
, data_available
, flags
, &data_size
);
4310 /* get the kq we are going to be working on */
4311 error
= kevent_get_kq(p
, id
, flags
, &fp
, &fd
, &kq
);
4315 /* only bound threads can receive events on workloops */
4316 if ((flags
& KEVENT_FLAG_WORKLOOP
) && kevent_args_requesting_events(flags
, nevents
)) {
4317 ut
= (uthread_t
)get_bsdthread_info(thread
);
4318 if (ut
->uu_kqueue_bound
!= kq
) {
4325 /* attach the current thread if necessary */
4326 if (flags
& KEVENT_FLAG_WORKLOOP_SERVICER_ATTACH
) {
4327 error
= kevent_servicer_attach_thread(thread
, flags
, kq
);
4332 /* before processing events and committing to the system call, return an error if the thread cannot be detached when requested */
4333 if (flags
& KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
) {
4334 error
= kevent_servicer_detach_preflight(thread
, flags
, kq
);
4340 if (id_out
&& kq
&& (flags
& KEVENT_FLAG_WORKLOOP
)) {
4341 assert(kq
->kq_state
& KQ_WORKLOOP
);
4342 struct kqworkloop
*kqwl
;
4343 kqwl
= (struct kqworkloop
*)kq
;
4344 *id_out
= kqwl
->kqwl_dynamicid
;
4347 /* register all the change requests the user provided... */
4349 while (nchanges
> 0 && error
== 0) {
4350 error
= kevent_copyin(&changelist
, &kev
, p
, flags
);
4354 /* Make sure user doesn't pass in any system flags */
4355 kev
.flags
&= ~EV_SYSFLAGS
;
4357 kevent_register(kq
, &kev
, p
);
4360 ((kev
.flags
& EV_ERROR
) || (kev
.flags
& EV_RECEIPT
))) {
4361 if (kev
.flags
& EV_RECEIPT
) {
4362 kev
.flags
|= EV_ERROR
;
4365 error
= kevent_copyout(&kev
, &ueventlist
, p
, flags
);
4370 } else if (kev
.flags
& EV_ERROR
) {
4376 /* short-circuit the scan if we only want error events */
4377 if (flags
& KEVENT_FLAG_ERROR_EVENTS
)
4380 /* process pending events */
4381 if (nevents
> 0 && noutputs
== 0 && error
== 0) {
4382 /* store the continuation/completion data in the uthread */
4383 ut
= (uthread_t
)get_bsdthread_info(thread
);
4384 cont_args
= &ut
->uu_kevent
.ss_kevent
;
4387 cont_args
->retval
= retval
;
4388 cont_args
->eventlist
= ueventlist
;
4389 cont_args
->eventcount
= nevents
;
4390 cont_args
->eventout
= noutputs
;
4391 cont_args
->data_available
= data_available
;
4392 cont_args
->process_data
.fp_fd
= (int)id
;
4393 cont_args
->process_data
.fp_flags
= flags
;
4394 cont_args
->process_data
.fp_data_out
= data_out
;
4395 cont_args
->process_data
.fp_data_size
= data_size
;
4396 cont_args
->process_data
.fp_data_resid
= data_size
;
4398 error
= kqueue_scan(kq
, kevent_callback
,
4399 continuation
, cont_args
,
4400 &cont_args
->process_data
,
4403 /* process remaining outputs */
4404 noutputs
= cont_args
->eventout
;
4405 data_resid
= cont_args
->process_data
.fp_data_resid
;
4407 /* copyout residual data size value (if it needs to be copied out) */
4408 /* don't abandon other output just because of residual copyout failures */
4409 if (error
== 0 && data_available
&& data_resid
!= data_size
) {
4410 (void)kevent_put_data_size(p
, data_available
, flags
, data_resid
);
4414 /* detach the current thread if necessary */
4415 if (flags
& KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
) {
4417 kevent_servicer_detach_thread(p
, id
, thread
, flags
, kq
);
4421 kevent_put_kq(p
, id
, fp
, kq
);
4423 /* don't restart after signals... */
4424 if (error
== ERESTART
)
4426 else if (error
== EWOULDBLOCK
)
4435 * kevent_callback - callback for each individual event
4437 * called with nothing locked
4438 * caller holds a reference on the kqueue
4441 kevent_callback(__unused
struct kqueue
*kq
, struct kevent_internal_s
*kevp
,
4444 struct _kevent
*cont_args
;
4447 cont_args
= (struct _kevent
*)data
;
4448 assert(cont_args
->eventout
< cont_args
->eventcount
);
4451 * Copy out the appropriate amount of event data for this user.
4453 error
= kevent_copyout(kevp
, &cont_args
->eventlist
, current_proc(),
4454 cont_args
->process_data
.fp_flags
);
4457 * If there isn't space for additional events, return
4458 * a harmless error to stop the processing here
4460 if (error
== 0 && ++cont_args
->eventout
== cont_args
->eventcount
)
4461 error
= EWOULDBLOCK
;
4466 * kevent_description - format a description of a kevent for diagnostic output
4468 * called with a 256-byte string buffer
4472 kevent_description(struct kevent_internal_s
*kevp
, char *s
, size_t n
)
4476 "{.ident=%#llx, .filter=%d, .flags=%#x, .udata=%#llx, .fflags=%#x, .data=%#llx, .ext[0]=%#llx, .ext[1]=%#llx}",
4490 * kevent_register - add a new event to a kqueue
4492 * Creates a mapping between the event source and
4493 * the kqueue via a knote data structure.
4495 * Because many/most the event sources are file
4496 * descriptor related, the knote is linked off
4497 * the filedescriptor table for quick access.
4499 * called with nothing locked
4500 * caller holds a reference on the kqueue
4504 kevent_register(struct kqueue
*kq
, struct kevent_internal_s
*kev
,
4505 __unused
struct proc
*ctxp
)
4507 struct proc
*p
= kq
->kq_p
;
4508 const struct filterops
*fops
;
4509 struct knote
*kn
= NULL
;
4512 unsigned short kev_flags
= kev
->flags
;
4513 int knoteuse_flags
= KNUSE_NONE
;
4515 if (kev
->filter
< 0) {
4516 if (kev
->filter
+ EVFILT_SYSCOUNT
< 0) {
4520 fops
= sysfilt_ops
[~kev
->filter
]; /* to 0-base index */
4526 /* restrict EV_VANISHED to adding udata-specific dispatch kevents */
4527 if ((kev
->flags
& EV_VANISHED
) &&
4528 (kev
->flags
& (EV_ADD
| EV_DISPATCH2
)) != (EV_ADD
| EV_DISPATCH2
)) {
4533 /* Simplify the flags - delete and disable overrule */
4534 if (kev
->flags
& EV_DELETE
)
4535 kev
->flags
&= ~EV_ADD
;
4536 if (kev
->flags
& EV_DISABLE
)
4537 kev
->flags
&= ~EV_ENABLE
;
4539 if (kq
->kq_state
& KQ_WORKLOOP
) {
4540 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_REGISTER
),
4541 ((struct kqworkloop
*)kq
)->kqwl_dynamicid
,
4542 kev
->udata
, kev
->flags
, kev
->filter
);
4543 } else if (kq
->kq_state
& KQ_WORKQ
) {
4544 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_REGISTER
),
4545 0, kev
->udata
, kev
->flags
, kev
->filter
);
4547 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_REGISTER
),
4548 VM_KERNEL_UNSLIDE_OR_PERM(kq
),
4549 kev
->udata
, kev
->flags
, kev
->filter
);
4554 /* find the matching knote from the fd tables/hashes */
4555 kn
= kq_find_knote_and_kq_lock(kq
, kev
, fops
->f_isfd
, p
);
4558 if (kev
->flags
& EV_ADD
) {
4559 struct fileproc
*knote_fp
= NULL
;
4561 /* grab a file reference for the new knote */
4563 if ((error
= fp_lookup(p
, kev
->ident
, &knote_fp
, 0)) != 0) {
4571 if (knote_fp
!= NULL
)
4572 fp_drop(p
, kev
->ident
, knote_fp
, 0);
4576 kn
->kn_fp
= knote_fp
;
4577 knote_set_kq(kn
, kq
);
4578 kqueue_retain(kq
); /* retain a kq ref */
4579 kn
->kn_filtid
= ~kev
->filter
;
4580 kn
->kn_inuse
= 1; /* for f_attach() */
4581 kn
->kn_status
= KN_ATTACHING
| KN_ATTACHED
;
4583 /* was vanish support requested */
4584 if (kev
->flags
& EV_VANISHED
) {
4585 kev
->flags
&= ~EV_VANISHED
;
4586 kn
->kn_status
|= KN_REQVANISH
;
4589 /* snapshot matching/dispatching protcol flags into knote */
4590 if (kev
->flags
& EV_DISPATCH
)
4591 kn
->kn_status
|= KN_DISPATCH
;
4592 if (kev
->flags
& EV_UDATA_SPECIFIC
)
4593 kn
->kn_status
|= KN_UDATA_SPECIFIC
;
4596 * copy the kevent state into knote
4597 * protocol is that fflags and data
4598 * are saved off, and cleared before
4599 * calling the attach routine.
4601 kn
->kn_kevent
= *kev
;
4602 kn
->kn_sfflags
= kev
->fflags
;
4603 kn
->kn_sdata
= kev
->data
;
4607 /* invoke pthread kext to convert kevent qos to thread qos */
4608 knote_canonicalize_kevent_qos(kn
);
4609 knote_set_qos_index(kn
, qos_index_from_qos(kn
, kn
->kn_qos
, FALSE
));
4611 /* before anyone can find it */
4612 if (kev
->flags
& EV_DISABLE
) {
4614 * do this before anyone can find it,
4615 * this can't call knote_disable() because it expects having
4618 kn
->kn_status
|= KN_DISABLED
;
4621 /* Add the knote for lookup thru the fd table */
4622 error
= kq_add_knote(kq
, kn
, kev
, p
, &knoteuse_flags
);
4624 (void)kqueue_release(kq
, KQUEUE_CANT_BE_LAST_REF
);
4626 if (knote_fp
!= NULL
)
4627 fp_drop(p
, kev
->ident
, knote_fp
, 0);
4629 if (error
== ERESTART
) {
4636 /* fp reference count now applies to knote */
4637 /* rwlock boost is now held */
4639 /* call filter attach routine */
4640 result
= fops
->f_attach(kn
, kev
);
4643 * Trade knote use count for kq lock.
4644 * Cannot be dropped because we held
4645 * KN_ATTACHING throughout.
4647 knoteuse2kqlock(kq
, kn
, KNUSE_STEAL_DROP
| knoteuse_flags
);
4649 if (kn
->kn_flags
& EV_ERROR
) {
4651 * Failed to attach correctly, so drop.
4652 * All other possible users/droppers
4653 * have deferred to us. Save the error
4654 * to return to our caller.
4656 kn
->kn_status
&= ~KN_ATTACHED
;
4657 kn
->kn_status
|= KN_DROPPING
;
4658 error
= kn
->kn_data
;
4664 /* end "attaching" phase - now just attached */
4665 kn
->kn_status
&= ~KN_ATTACHING
;
4667 if (kn
->kn_status
& KN_DROPPING
) {
4669 * Attach succeeded, but someone else
4670 * deferred their drop - now we have
4671 * to do it for them.
4678 /* Mark the thread request overcommit - if appropos */
4679 knote_set_qos_overcommit(kn
);
4682 * If the attach routine indicated that an
4683 * event is already fired, activate the knote.
4688 if (knote_fops(kn
)->f_post_attach
) {
4689 error
= knote_fops(kn
)->f_post_attach(kn
, kev
);
4697 if ((kev_flags
& (EV_ADD
| EV_DELETE
)) == (EV_ADD
| EV_DELETE
) &&
4698 (kq
->kq_state
& KQ_WORKLOOP
)) {
4700 * For workloops, understand EV_ADD|EV_DELETE as a "soft" delete
4701 * that doesn't care about ENOENT, so just pretend the deletion
4711 /* existing knote: kqueue lock already taken by kq_find_knote_and_kq_lock */
4713 if ((kn
->kn_status
& (KN_DROPPING
| KN_ATTACHING
)) != 0) {
4715 * The knote is not in a stable state, wait for that
4716 * transition to complete and then redrive the lookup.
4718 knoteusewait(kq
, kn
);
4722 if (kev
->flags
& EV_DELETE
) {
4725 * If attempting to delete a disabled dispatch2 knote,
4726 * we must wait for the knote to be re-enabled (unless
4727 * it is being re-enabled atomically here).
4729 if ((kev
->flags
& EV_ENABLE
) == 0 &&
4730 (kn
->kn_status
& (KN_DISPATCH2
| KN_DISABLED
)) ==
4731 (KN_DISPATCH2
| KN_DISABLED
)) {
4732 kn
->kn_status
|= KN_DEFERDELETE
;
4734 error
= EINPROGRESS
;
4735 } else if (knote_fops(kn
)->f_drop_and_unlock
) {
4737 * The filter has requested to handle EV_DELETE events
4739 * ERESTART means the kevent has to be re-evaluated
4741 error
= knote_fops(kn
)->f_drop_and_unlock(kn
, kev
);
4742 if (error
== ERESTART
) {
4746 } else if (kqlock2knotedrop(kq
, kn
)) {
4747 /* standard/default EV_DELETE path */
4751 * The kqueue is unlocked, it's not being
4752 * dropped, and kqlock2knotedrop returned 0:
4753 * this means that someone stole the drop of
4754 * the knote from us.
4756 error
= EINPROGRESS
;
4762 * If we are re-enabling a deferred-delete knote,
4763 * just enable it now and avoid calling the
4764 * filter touch routine (it has delivered its
4765 * last event already).
4767 if ((kev
->flags
& EV_ENABLE
) &&
4768 (kn
->kn_status
& KN_DEFERDELETE
)) {
4769 assert(kn
->kn_status
& KN_DISABLED
);
4777 * If we are disabling, do it before unlocking and
4778 * calling the touch routine (so no processing can
4779 * see the new kevent state before the disable is
4782 if (kev
->flags
& EV_DISABLE
)
4786 * Convert the kqlock to a use reference on the
4787 * knote so we can call the filter touch routine.
4789 if (knoteuse_needs_boost(kn
, kev
)) {
4790 knoteuse_flags
|= KNUSE_BOOST
;
4792 if (kqlock2knoteuse(kq
, kn
, knoteuse_flags
)) {
4794 * Call touch routine to notify filter of changes
4795 * in filter values (and to re-determine if any
4796 * events are fired).
4798 result
= knote_fops(kn
)->f_touch(kn
, kev
);
4800 /* Get the kq lock back (don't defer droppers). */
4801 if (!knoteuse2kqlock(kq
, kn
, knoteuse_flags
)) {
4806 /* Handle errors during touch routine */
4807 if (kev
->flags
& EV_ERROR
) {
4813 /* Activate it if the touch routine said to */
4818 /* Enable the knote if called for */
4819 if (kev
->flags
& EV_ENABLE
)
4824 /* still have kqlock held and knote is valid */
4828 /* output local errors through the kevent */
4830 kev
->flags
|= EV_ERROR
;
4837 * knote_process - process a triggered event
4839 * Validate that it is really still a triggered event
4840 * by calling the filter routines (if necessary). Hold
4841 * a use reference on the knote to avoid it being detached.
4843 * If it is still considered triggered, we will have taken
4844 * a copy of the state under the filter lock. We use that
4845 * snapshot to dispatch the knote for future processing (or
4846 * not, if this was a lost event).
4848 * Our caller assures us that nobody else can be processing
4849 * events from this knote during the whole operation. But
4850 * others can be touching or posting events to the knote
4851 * interspersed with our processing it.
4853 * caller holds a reference on the kqueue.
4854 * kqueue locked on entry and exit - but may be dropped
4857 knote_process(struct knote
*kn
,
4858 kevent_callback_t callback
,
4859 void *callback_data
,
4860 struct filt_process_s
*process_data
,
4863 struct kevent_internal_s kev
;
4864 struct kqueue
*kq
= knote_get_kq(kn
);
4868 bzero(&kev
, sizeof(kev
));
4871 * Must be active or stayactive
4872 * Must be queued and not disabled/suppressed
4874 assert(kn
->kn_status
& KN_QUEUED
);
4875 assert(kn
->kn_status
& (KN_ACTIVE
|KN_STAYACTIVE
));
4876 assert(!(kn
->kn_status
& (KN_DISABLED
|KN_SUPPRESSED
|KN_DROPPING
)));
4878 if (kq
->kq_state
& KQ_WORKLOOP
) {
4879 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS
),
4880 ((struct kqworkloop
*)kq
)->kqwl_dynamicid
,
4881 kn
->kn_udata
, kn
->kn_status
| (kn
->kn_id
<< 32),
4883 } else if (kq
->kq_state
& KQ_WORKQ
) {
4884 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS
),
4885 0, kn
->kn_udata
, kn
->kn_status
| (kn
->kn_id
<< 32),
4888 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS
),
4889 VM_KERNEL_UNSLIDE_OR_PERM(kq
), kn
->kn_udata
,
4890 kn
->kn_status
| (kn
->kn_id
<< 32), kn
->kn_filtid
);
4894 * For deferred-drop or vanished events, we just create a fake
4895 * event to acknowledge end-of-life. Otherwise, we call the
4896 * filter's process routine to snapshot the kevent state under
4897 * the filter's locking protocol.
4899 if (kn
->kn_status
& (KN_DEFERDELETE
| KN_VANISHED
)) {
4900 /* create fake event */
4901 kev
.filter
= kn
->kn_filter
;
4902 kev
.ident
= kn
->kn_id
;
4903 kev
.qos
= kn
->kn_qos
;
4904 kev
.flags
= (kn
->kn_status
& KN_DEFERDELETE
) ?
4905 EV_DELETE
: EV_VANISHED
;
4906 kev
.flags
|= (EV_DISPATCH2
| EV_ONESHOT
);
4907 kev
.udata
= kn
->kn_udata
;
4912 int flags
= KNUSE_NONE
;
4913 /* deactivate - so new activations indicate a wakeup */
4914 knote_deactivate(kn
);
4916 /* suppress knotes to avoid returning the same event multiple times in a single call. */
4919 if (knoteuse_needs_boost(kn
, NULL
)) {
4920 flags
|= KNUSE_BOOST
;
4922 /* convert lock to a knote use reference */
4923 if (!kqlock2knoteuse(kq
, kn
, flags
))
4924 panic("dropping knote found on queue\n");
4926 /* call out to the filter to process with just a ref */
4927 result
= knote_fops(kn
)->f_process(kn
, process_data
, &kev
);
4928 if (result
) flags
|= KNUSE_STEAL_DROP
;
4931 * convert our reference back to a lock. accept drop
4932 * responsibility from others if we've committed to
4933 * delivering event data.
4935 if (!knoteuse2kqlock(kq
, kn
, flags
)) {
4943 * Determine how to dispatch the knote for future event handling.
4944 * not-fired: just return (do not callout, leave deactivated).
4945 * One-shot: If dispatch2, enter deferred-delete mode (unless this is
4946 * is the deferred delete event delivery itself). Otherwise,
4948 * stolendrop:We took responsibility for someone else's drop attempt.
4949 * treat this just like one-shot and prepare to turn it back
4950 * into a deferred delete if required.
4951 * Dispatch: don't clear state, just mark it disabled.
4952 * Cleared: just leave it deactivated.
4953 * Others: re-activate as there may be more events to handle.
4954 * This will not wake up more handlers right now, but
4955 * at the completion of handling events it may trigger
4956 * more handler threads (TODO: optimize based on more than
4957 * just this one event being detected by the filter).
4961 return (EJUSTRETURN
);
4963 if ((kev
.flags
& EV_ONESHOT
) || (kn
->kn_status
& KN_STOLENDROP
)) {
4964 if ((kn
->kn_status
& (KN_DISPATCH2
| KN_DEFERDELETE
)) == KN_DISPATCH2
) {
4965 /* defer dropping non-delete oneshot dispatch2 events */
4966 kn
->kn_status
|= KN_DEFERDELETE
;
4969 /* if we took over another's drop clear those flags here */
4970 if (kn
->kn_status
& KN_STOLENDROP
) {
4971 assert(kn
->kn_status
& KN_DROPPING
);
4973 * the knote will be dropped when the
4974 * deferred deletion occurs
4976 kn
->kn_status
&= ~(KN_DROPPING
|KN_STOLENDROP
);
4978 } else if (kn
->kn_status
& KN_STOLENDROP
) {
4979 /* We now own the drop of the knote. */
4980 assert(kn
->kn_status
& KN_DROPPING
);
4981 knote_unsuppress(kn
);
4985 } else if (kqlock2knotedrop(kq
, kn
)) {
4986 /* just EV_ONESHOT, _not_ DISPATCH2 */
4990 } else if (kn
->kn_status
& KN_DISPATCH
) {
4991 /* disable all dispatch knotes */
4993 } else if ((kev
.flags
& EV_CLEAR
) == 0) {
4994 /* re-activate in case there are more events */
5000 * callback to handle each event as we find it.
5001 * If we have to detach and drop the knote, do
5002 * it while we have the kq unlocked.
5006 error
= (callback
)(kq
, &kev
, callback_data
);
5014 * Return 0 to indicate that processing should proceed,
5015 * -1 if there is nothing to process.
5017 * Called with kqueue locked and returns the same way,
5018 * but may drop lock temporarily.
5021 kqworkq_begin_processing(struct kqworkq
*kqwq
, kq_index_t qos_index
, int flags
)
5023 struct kqrequest
*kqr
;
5024 thread_t self
= current_thread();
5025 __assert_only
struct uthread
*ut
= get_bsdthread_info(self
);
5027 assert(kqwq
->kqwq_state
& KQ_WORKQ
);
5028 assert(qos_index
< KQWQ_NQOS
);
5030 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN
) | DBG_FUNC_START
,
5033 kqwq_req_lock(kqwq
);
5035 kqr
= kqworkq_get_request(kqwq
, qos_index
);
5037 /* manager skips buckets that haven't asked for its help */
5038 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
) {
5040 /* If nothing for manager to do, just return */
5041 if ((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0) {
5042 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5044 kqwq_req_unlock(kqwq
);
5047 /* bind manager thread from this time on */
5048 kqworkq_bind_thread_impl(kqwq
, qos_index
, self
, flags
);
5051 /* We should already be bound to this kqueue */
5052 assert(kqr
->kqr_state
& KQR_BOUND
);
5053 assert(kqr
->kqr_thread
== self
);
5054 assert(ut
->uu_kqueue_bound
== (struct kqueue
*)kqwq
);
5055 assert(ut
->uu_kqueue_qos_index
== qos_index
);
5056 assert((ut
->uu_kqueue_flags
& flags
) == ut
->uu_kqueue_flags
);
5060 * we should have been requested to be here
5061 * and nobody else should still be processing
5063 assert(kqr
->kqr_state
& KQR_WAKEUP
);
5064 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
5065 assert((kqr
->kqr_state
& KQR_PROCESSING
) == 0);
5067 /* reset wakeup trigger to catch new events after we start processing */
5068 kqr
->kqr_state
&= ~KQR_WAKEUP
;
5070 /* convert to processing mode */
5071 kqr
->kqr_state
|= KQR_PROCESSING
;
5073 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5074 kqr_thread_id(kqr
), kqr
->kqr_state
);
5076 kqwq_req_unlock(kqwq
);
5081 kqworkloop_is_processing_on_current_thread(struct kqworkloop
*kqwl
)
5083 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
5087 if (kq
->kq_state
& KQ_PROCESSING
) {
5089 * KQ_PROCESSING is unset with the kqlock held, and the kqr thread is
5090 * never modified while KQ_PROCESSING is set, meaning that peeking at
5091 * its value is safe from this context.
5093 return kqwl
->kqwl_request
.kqr_thread
== current_thread();
5099 kqworkloop_acknowledge_events(struct kqworkloop
*kqwl
, boolean_t clear_ipc_override
)
5101 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
5102 struct knote
*kn
, *tmp
;
5104 kqlock_held(&kqwl
->kqwl_kqueue
);
5106 TAILQ_FOREACH_SAFE(kn
, &kqr
->kqr_suppressed
, kn_tqe
, tmp
) {
5108 * If a knote that can adjust QoS is disabled because of the automatic
5109 * behavior of EV_DISPATCH, the knotes should stay suppressed so that
5110 * further overrides keep pushing.
5112 if (knote_fops(kn
)->f_adjusts_qos
&& (kn
->kn_status
& KN_DISABLED
) &&
5113 (kn
->kn_status
& (KN_STAYACTIVE
| KN_DROPPING
)) == 0 &&
5114 (kn
->kn_flags
& (EV_DISPATCH
| EV_DISABLE
)) == EV_DISPATCH
) {
5116 * When called from unbind, clear the sync ipc override on the knote
5117 * for events which are delivered.
5119 if (clear_ipc_override
) {
5120 knote_adjust_sync_qos(kn
, THREAD_QOS_UNSPECIFIED
, FALSE
);
5124 knote_unsuppress(kn
);
5129 kqworkloop_begin_processing(struct kqworkloop
*kqwl
,
5130 __assert_only
unsigned int flags
)
5132 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
5133 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
5137 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN
) | DBG_FUNC_START
,
5138 kqwl
->kqwl_dynamicid
, flags
, 0);
5140 kqwl_req_lock(kqwl
);
5142 /* nobody else should still be processing */
5143 assert((kqr
->kqr_state
& KQR_PROCESSING
) == 0);
5144 assert((kq
->kq_state
& KQ_PROCESSING
) == 0);
5146 kqr
->kqr_state
|= KQR_PROCESSING
| KQR_R2K_NOTIF_ARMED
;
5147 kq
->kq_state
|= KQ_PROCESSING
;
5149 kqwl_req_unlock(kqwl
);
5151 kqworkloop_acknowledge_events(kqwl
, FALSE
);
5153 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN
) | DBG_FUNC_END
,
5154 kqwl
->kqwl_dynamicid
, flags
, 0);
5160 * Return 0 to indicate that processing should proceed,
5161 * -1 if there is nothing to process.
5163 * Called with kqueue locked and returns the same way,
5164 * but may drop lock temporarily.
5168 kqueue_begin_processing(struct kqueue
*kq
, kq_index_t qos_index
, unsigned int flags
)
5170 struct kqtailq
*suppressq
;
5174 if (kq
->kq_state
& KQ_WORKQ
) {
5175 return kqworkq_begin_processing((struct kqworkq
*)kq
, qos_index
, flags
);
5176 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
5177 return kqworkloop_begin_processing((struct kqworkloop
*)kq
, flags
);
5180 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_START
,
5181 VM_KERNEL_UNSLIDE_OR_PERM(kq
), flags
);
5183 assert(qos_index
== QOS_INDEX_KQFILE
);
5185 /* wait to become the exclusive processing thread */
5187 if (kq
->kq_state
& KQ_DRAIN
) {
5188 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5189 VM_KERNEL_UNSLIDE_OR_PERM(kq
), 2);
5193 if ((kq
->kq_state
& KQ_PROCESSING
) == 0)
5196 /* if someone else is processing the queue, wait */
5197 kq
->kq_state
|= KQ_PROCWAIT
;
5198 suppressq
= kqueue_get_suppressed_queue(kq
, qos_index
);
5199 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
5200 CAST_EVENT64_T(suppressq
),
5201 THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
);
5204 thread_block(THREAD_CONTINUE_NULL
);
5208 /* Nobody else processing */
5210 /* clear pre-posts and KQ_WAKEUP now, in case we bail early */
5211 waitq_set_clear_preposts(&kq
->kq_wqs
);
5212 kq
->kq_state
&= ~KQ_WAKEUP
;
5214 /* anything left to process? */
5215 if (kqueue_queue_empty(kq
, qos_index
)) {
5216 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5217 VM_KERNEL_UNSLIDE_OR_PERM(kq
), 1);
5221 /* convert to processing mode */
5222 kq
->kq_state
|= KQ_PROCESSING
;
5224 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5225 VM_KERNEL_UNSLIDE_OR_PERM(kq
));
5231 * kqworkq_end_processing - Complete the processing of a workq kqueue
5233 * We may have to request new threads.
5234 * This can happen there are no waiting processing threads and:
5235 * - there were active events we never got to (count > 0)
5236 * - we pended waitq hook callouts during processing
5237 * - we pended wakeups while processing (or unsuppressing)
5239 * Called with kqueue lock held.
5242 kqworkq_end_processing(struct kqworkq
*kqwq
, kq_index_t qos_index
, int flags
)
5244 #pragma unused(flags)
5246 struct kqueue
*kq
= &kqwq
->kqwq_kqueue
;
5247 struct kqtailq
*suppressq
= kqueue_get_suppressed_queue(kq
, qos_index
);
5249 thread_t self
= current_thread();
5250 struct uthread
*ut
= get_bsdthread_info(self
);
5252 struct kqrequest
*kqr
;
5255 assert(kqwq
->kqwq_state
& KQ_WORKQ
);
5256 assert(qos_index
< KQWQ_NQOS
);
5258 /* Are we really bound to this kqueue? */
5259 if (ut
->uu_kqueue_bound
!= kq
) {
5260 assert(ut
->uu_kqueue_bound
== kq
);
5264 kqr
= kqworkq_get_request(kqwq
, qos_index
);
5266 kqwq_req_lock(kqwq
);
5268 /* Do we claim to be manager? */
5269 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
) {
5271 /* bail if not bound that way */
5272 if (ut
->uu_kqueue_qos_index
!= KQWQ_QOS_MANAGER
||
5273 (ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
) == 0) {
5274 assert(ut
->uu_kqueue_qos_index
== KQWQ_QOS_MANAGER
);
5275 assert(ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
);
5276 kqwq_req_unlock(kqwq
);
5280 /* bail if this request wasn't already getting manager help */
5281 if ((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0 ||
5282 (kqr
->kqr_state
& KQR_PROCESSING
) == 0) {
5283 kqwq_req_unlock(kqwq
);
5287 if (ut
->uu_kqueue_qos_index
!= qos_index
||
5288 (ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
)) {
5289 assert(ut
->uu_kqueue_qos_index
== qos_index
);
5290 assert((ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
) == 0);
5291 kqwq_req_unlock(kqwq
);
5296 assert(kqr
->kqr_state
& KQR_BOUND
);
5297 thread
= kqr
->kqr_thread
;
5298 assert(thread
== self
);
5300 assert(kqr
->kqr_state
& KQR_PROCESSING
);
5302 /* If we didn't drain the whole queue, re-mark a wakeup being needed */
5303 if (!kqueue_queue_empty(kq
, qos_index
))
5304 kqr
->kqr_state
|= KQR_WAKEUP
;
5306 kqwq_req_unlock(kqwq
);
5309 * Return suppressed knotes to their original state.
5310 * For workq kqueues, suppressed ones that are still
5311 * truly active (not just forced into the queue) will
5312 * set flags we check below to see if anything got
5315 while ((kn
= TAILQ_FIRST(suppressq
)) != NULL
) {
5316 assert(kn
->kn_status
& KN_SUPPRESSED
);
5317 knote_unsuppress(kn
);
5320 kqwq_req_lock(kqwq
);
5322 /* Indicate that we are done processing this request */
5323 kqr
->kqr_state
&= ~KQR_PROCESSING
;
5326 * Drop our association with this one request and its
5329 kqworkq_unbind_thread(kqwq
, qos_index
, thread
, flags
);
5332 * request a new thread if we didn't process the whole
5333 * queue or real events have happened (not just putting
5334 * stay-active events back).
5336 if (kqr
->kqr_state
& KQR_WAKEUP
) {
5337 if (kqueue_queue_empty(kq
, qos_index
)) {
5338 kqr
->kqr_state
&= ~KQR_WAKEUP
;
5340 kqworkq_request_thread(kqwq
, qos_index
);
5343 kqwq_req_unlock(kqwq
);
5347 kqworkloop_end_processing(struct kqworkloop
*kqwl
, int nevents
,
5350 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
5351 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
5355 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END
) | DBG_FUNC_START
,
5356 kqwl
->kqwl_dynamicid
, flags
, 0);
5358 if ((kq
->kq_state
& KQ_NO_WQ_THREAD
) && nevents
== 0 &&
5359 (flags
& KEVENT_FLAG_IMMEDIATE
) == 0) {
5361 * <rdar://problem/31634014> We may soon block, but have returned no
5362 * kevents that need to be kept supressed for overriding purposes.
5364 * It is hence safe to acknowledge events and unsuppress everything, so
5365 * that if we block we can observe all events firing.
5367 kqworkloop_acknowledge_events(kqwl
, TRUE
);
5370 kqwl_req_lock(kqwl
);
5372 assert(kqr
->kqr_state
& KQR_PROCESSING
);
5373 assert(kq
->kq_state
& KQ_PROCESSING
);
5375 kq
->kq_state
&= ~KQ_PROCESSING
;
5376 kqr
->kqr_state
&= ~KQR_PROCESSING
;
5377 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
, 0);
5379 kqwl_req_unlock(kqwl
);
5381 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END
) | DBG_FUNC_END
,
5382 kqwl
->kqwl_dynamicid
, flags
, 0);
5386 * Called with kqueue lock held.
5389 kqueue_end_processing(struct kqueue
*kq
, kq_index_t qos_index
,
5390 int nevents
, unsigned int flags
)
5393 struct kqtailq
*suppressq
;
5398 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
5400 if (kq
->kq_state
& KQ_WORKLOOP
) {
5401 return kqworkloop_end_processing((struct kqworkloop
*)kq
, nevents
, flags
);
5404 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_END
),
5405 VM_KERNEL_UNSLIDE_OR_PERM(kq
), flags
);
5407 assert(qos_index
== QOS_INDEX_KQFILE
);
5410 * Return suppressed knotes to their original state.
5412 suppressq
= kqueue_get_suppressed_queue(kq
, qos_index
);
5413 while ((kn
= TAILQ_FIRST(suppressq
)) != NULL
) {
5414 assert(kn
->kn_status
& KN_SUPPRESSED
);
5415 knote_unsuppress(kn
);
5418 procwait
= (kq
->kq_state
& KQ_PROCWAIT
);
5419 kq
->kq_state
&= ~(KQ_PROCESSING
| KQ_PROCWAIT
);
5422 /* first wake up any thread already waiting to process */
5423 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
5424 CAST_EVENT64_T(suppressq
),
5426 WAITQ_ALL_PRIORITIES
);
5431 * kqwq_internal_bind - bind thread to processing workq kqueue
5433 * Determines if the provided thread will be responsible for
5434 * servicing the particular QoS class index specified in the
5435 * parameters. Once the binding is done, any overrides that may
5436 * be associated with the cooresponding events can be applied.
5438 * This should be called as soon as the thread identity is known,
5439 * preferably while still at high priority during creation.
5441 * - caller holds a reference on the process (and workq kq)
5442 * - the thread MUST call kevent_qos_internal after being bound
5443 * or the bucket of events may never be delivered.
5445 * (unless this is a synchronous bind, then the request is locked)
5448 kqworkq_internal_bind(
5450 kq_index_t qos_index
,
5455 struct kqworkq
*kqwq
;
5456 struct kqrequest
*kqr
;
5457 struct uthread
*ut
= get_bsdthread_info(thread
);
5459 /* If no process workq, can't be our thread. */
5460 kq
= p
->p_fd
->fd_wqkqueue
;
5465 assert(kq
->kq_state
& KQ_WORKQ
);
5466 kqwq
= (struct kqworkq
*)kq
;
5469 * No need to bind the manager thread to any specific
5470 * bucket, but still claim the thread.
5472 if (qos_index
== KQWQ_QOS_MANAGER
) {
5473 assert(ut
->uu_kqueue_bound
== NULL
);
5474 assert(flags
& KEVENT_FLAG_WORKQ_MANAGER
);
5475 ut
->uu_kqueue_bound
= kq
;
5476 ut
->uu_kqueue_qos_index
= qos_index
;
5477 ut
->uu_kqueue_flags
= flags
;
5479 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_BIND
),
5480 thread_tid(thread
), flags
, qos_index
);
5486 * If this is a synchronous bind callback, the request
5487 * lock is already held, so just do the bind.
5489 if (flags
& KEVENT_FLAG_SYNCHRONOUS_BIND
) {
5490 kqwq_req_held(kqwq
);
5491 /* strip out synchronout bind flag */
5492 flags
&= ~KEVENT_FLAG_SYNCHRONOUS_BIND
;
5493 kqworkq_bind_thread_impl(kqwq
, qos_index
, thread
, flags
);
5498 * check the request that corresponds to our qos_index
5499 * to see if there is an outstanding request.
5501 kqr
= kqworkq_get_request(kqwq
, qos_index
);
5502 assert(kqr
->kqr_qos_index
== qos_index
);
5503 kqwq_req_lock(kqwq
);
5505 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_BIND
),
5506 thread_tid(thread
), flags
, qos_index
, kqr
->kqr_state
);
5508 if ((kqr
->kqr_state
& KQR_THREQUESTED
) &&
5509 (kqr
->kqr_state
& KQR_PROCESSING
) == 0) {
5511 if ((kqr
->kqr_state
& KQR_BOUND
) &&
5512 thread
== kqr
->kqr_thread
) {
5513 /* duplicate bind - claim the thread */
5514 assert(ut
->uu_kqueue_bound
== kq
);
5515 assert(ut
->uu_kqueue_qos_index
== qos_index
);
5516 kqwq_req_unlock(kqwq
);
5519 if ((kqr
->kqr_state
& (KQR_BOUND
| KQWQ_THMANAGER
)) == 0) {
5520 /* ours to bind to */
5521 kqworkq_bind_thread_impl(kqwq
, qos_index
, thread
, flags
);
5522 kqwq_req_unlock(kqwq
);
5526 kqwq_req_unlock(kqwq
);
5531 kqworkloop_bind_thread_impl(struct kqworkloop
*kqwl
,
5533 __assert_only
unsigned int flags
)
5535 assert(flags
& KEVENT_FLAG_WORKLOOP
);
5537 /* the request object must be locked */
5538 kqwl_req_held(kqwl
);
5540 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
5541 struct uthread
*ut
= get_bsdthread_info(thread
);
5542 boolean_t ipc_override_is_sync
;
5543 kq_index_t qos_index
= kqworkloop_combined_qos(kqwl
, &ipc_override_is_sync
);
5545 /* nobody else bound so finally bind (as a workloop) */
5546 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
5547 assert((kqr
->kqr_state
& (KQR_BOUND
| KQR_PROCESSING
)) == 0);
5548 assert(thread
!= kqwl
->kqwl_owner
);
5550 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_BIND
),
5551 kqwl
->kqwl_dynamicid
, (uintptr_t)thread_tid(thread
),
5553 (uintptr_t)(((uintptr_t)kqr
->kqr_override_index
<< 16) |
5554 (((uintptr_t)kqr
->kqr_state
) << 8) |
5555 ((uintptr_t)ipc_override_is_sync
)));
5557 kqr
->kqr_state
|= KQR_BOUND
| KQR_R2K_NOTIF_ARMED
;
5558 kqr
->kqr_thread
= thread
;
5560 /* bind the workloop to the uthread */
5561 ut
->uu_kqueue_bound
= (struct kqueue
*)kqwl
;
5562 ut
->uu_kqueue_flags
= flags
;
5563 ut
->uu_kqueue_qos_index
= qos_index
;
5564 assert(ut
->uu_kqueue_override_is_sync
== 0);
5565 ut
->uu_kqueue_override_is_sync
= ipc_override_is_sync
;
5567 thread_add_ipc_override(thread
, qos_index
);
5569 if (ipc_override_is_sync
) {
5570 thread_add_sync_ipc_override(thread
);
5575 * workloop_fulfill_threadreq - bind thread to processing workloop
5577 * The provided thread will be responsible for delivering events
5578 * associated with the given kqrequest. Bind it and get ready for
5579 * the thread to eventually arrive.
5581 * If WORKLOOP_FULFILL_THREADREQ_SYNC is specified, the callback
5582 * within the context of the pthread_functions->workq_threadreq
5583 * callout. In this case, the request structure is already locked.
5586 workloop_fulfill_threadreq(struct proc
*p
,
5587 workq_threadreq_t req
,
5591 int sync
= (flags
& WORKLOOP_FULFILL_THREADREQ_SYNC
);
5592 int cancel
= (flags
& WORKLOOP_FULFILL_THREADREQ_CANCEL
);
5593 struct kqrequest
*kqr
;
5594 struct kqworkloop
*kqwl
;
5596 kqwl
= (struct kqworkloop
*)((uintptr_t)req
-
5597 offsetof(struct kqworkloop
, kqwl_request
) -
5598 offsetof(struct kqrequest
, kqr_req
));
5599 kqr
= &kqwl
->kqwl_request
;
5601 /* validate we're looking at something valid */
5602 if (kqwl
->kqwl_p
!= p
||
5603 (kqwl
->kqwl_state
& KQ_WORKLOOP
) == 0) {
5604 assert(kqwl
->kqwl_p
== p
);
5605 assert(kqwl
->kqwl_state
& KQ_WORKLOOP
);
5610 kqwl_req_lock(kqwl
);
5612 /* Should be a pending request */
5613 if ((kqr
->kqr_state
& KQR_BOUND
) ||
5614 (kqr
->kqr_state
& KQR_THREQUESTED
) == 0) {
5616 assert((kqr
->kqr_state
& KQR_BOUND
) == 0);
5617 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
5619 kqwl_req_unlock(kqwl
);
5623 assert((kqr
->kqr_state
& KQR_DRAIN
) == 0);
5626 * Is it a cancel indication from pthread.
5627 * If so, we must be exiting/exec'ing. Forget
5628 * our pending request.
5631 kqr
->kqr_state
&= ~KQR_THREQUESTED
;
5632 kqr
->kqr_state
|= KQR_DRAIN
;
5634 /* do the actual bind? */
5635 kqworkloop_bind_thread_impl(kqwl
, thread
, KEVENT_FLAG_WORKLOOP
);
5639 kqwl_req_unlock(kqwl
);
5642 kqueue_release_last(p
, &kqwl
->kqwl_kqueue
); /* may dealloc kq */
5649 * kevent_qos_internal_bind - bind thread to processing kqueue
5651 * Indicates that the provided thread will be responsible for
5652 * servicing the particular QoS class index specified in the
5653 * parameters. Once the binding is done, any overrides that may
5654 * be associated with the cooresponding events can be applied.
5656 * This should be called as soon as the thread identity is known,
5657 * preferably while still at high priority during creation.
5659 * - caller holds a reference on the kqueue.
5660 * - the thread MUST call kevent_qos_internal after being bound
5661 * or the bucket of events may never be delivered.
5662 * - Nothing locked (may take mutex or block).
5666 kevent_qos_internal_bind(
5672 kq_index_t qos_index
;
5674 assert(flags
& KEVENT_FLAG_WORKQ
);
5676 if (thread
== THREAD_NULL
|| (flags
& KEVENT_FLAG_WORKQ
) == 0) {
5680 /* get the qos index we're going to service */
5681 qos_index
= qos_index_for_servicer(qos_class
, thread
, flags
);
5683 if (kqworkq_internal_bind(p
, qos_index
, thread
, flags
))
5691 kqworkloop_internal_unbind(
5697 struct kqworkloop
*kqwl
;
5698 struct uthread
*ut
= get_bsdthread_info(thread
);
5700 assert(ut
->uu_kqueue_bound
!= NULL
);
5701 kq
= ut
->uu_kqueue_bound
;
5702 assert(kq
->kq_state
& KQ_WORKLOOP
);
5703 kqwl
= (struct kqworkloop
*)kq
;
5705 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_UNBIND
),
5706 kqwl
->kqwl_dynamicid
, (uintptr_t)thread_tid(thread
),
5709 if (!(kq
->kq_state
& KQ_NO_WQ_THREAD
)) {
5710 assert(is_workqueue_thread(thread
));
5713 kqworkloop_unbind_thread(kqwl
, thread
, flags
);
5716 /* If last reference, dealloc the workloop kq */
5717 kqueue_release_last(p
, kq
);
5719 assert(!is_workqueue_thread(thread
));
5720 kevent_servicer_detach_thread(p
, kqwl
->kqwl_dynamicid
, thread
, flags
, kq
);
5725 kqworkq_internal_unbind(
5727 kq_index_t qos_index
,
5732 struct kqworkq
*kqwq
;
5734 kq_index_t end_index
;
5736 assert(thread
== current_thread());
5737 ut
= get_bsdthread_info(thread
);
5739 kq
= p
->p_fd
->fd_wqkqueue
;
5740 assert(kq
->kq_state
& KQ_WORKQ
);
5741 assert(ut
->uu_kqueue_bound
== kq
);
5743 kqwq
= (struct kqworkq
*)kq
;
5745 /* end servicing any requests we might own */
5746 end_index
= (qos_index
== KQWQ_QOS_MANAGER
) ?
5750 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_UNBIND
),
5751 (uintptr_t)thread_tid(thread
), flags
, qos_index
);
5754 kqworkq_end_processing(kqwq
, qos_index
, flags
);
5755 } while (qos_index
-- > end_index
);
5757 ut
->uu_kqueue_bound
= NULL
;
5758 ut
->uu_kqueue_qos_index
= 0;
5759 ut
->uu_kqueue_flags
= 0;
5765 * kevent_qos_internal_unbind - unbind thread from processing kqueue
5767 * End processing the per-QoS bucket of events and allow other threads
5768 * to be requested for future servicing.
5770 * caller holds a reference on the kqueue.
5771 * thread is the current thread.
5775 kevent_qos_internal_unbind(
5781 #pragma unused(qos_class)
5785 unsigned int bound_flags
;
5788 ut
= get_bsdthread_info(thread
);
5789 if (ut
->uu_kqueue_bound
== NULL
) {
5790 /* early out if we are already unbound */
5791 assert(ut
->uu_kqueue_flags
== 0);
5792 assert(ut
->uu_kqueue_qos_index
== 0);
5793 assert(ut
->uu_kqueue_override_is_sync
== 0);
5797 assert(flags
& (KEVENT_FLAG_WORKQ
| KEVENT_FLAG_WORKLOOP
));
5798 assert(thread
== current_thread());
5800 check_flags
= flags
& KEVENT_FLAG_UNBIND_CHECK_FLAGS
;
5802 /* Get the kqueue we started with */
5803 kq
= ut
->uu_kqueue_bound
;
5805 assert(kq
->kq_state
& (KQ_WORKQ
| KQ_WORKLOOP
));
5807 /* get flags and QoS parameters we started with */
5808 bound_flags
= ut
->uu_kqueue_flags
;
5810 /* Unbind from the class of workq */
5811 if (kq
->kq_state
& KQ_WORKQ
) {
5812 if (check_flags
&& !(flags
& KEVENT_FLAG_WORKQ
)) {
5816 kqworkq_internal_unbind(p
, ut
->uu_kqueue_qos_index
, thread
, bound_flags
);
5818 if (check_flags
&& !(flags
& KEVENT_FLAG_WORKLOOP
)) {
5822 kqworkloop_internal_unbind(p
, thread
, bound_flags
);
5829 * kqueue_process - process the triggered events in a kqueue
5831 * Walk the queued knotes and validate that they are
5832 * really still triggered events by calling the filter
5833 * routines (if necessary). Hold a use reference on
5834 * the knote to avoid it being detached. For each event
5835 * that is still considered triggered, invoke the
5836 * callback routine provided.
5838 * caller holds a reference on the kqueue.
5839 * kqueue locked on entry and exit - but may be dropped
5840 * kqueue list locked (held for duration of call)
5844 kqueue_process(struct kqueue
*kq
,
5845 kevent_callback_t callback
,
5846 void *callback_data
,
5847 struct filt_process_s
*process_data
,
5851 unsigned int flags
= process_data
? process_data
->fp_flags
: 0;
5852 struct uthread
*ut
= get_bsdthread_info(current_thread());
5853 kq_index_t start_index
, end_index
, i
;
5859 * Based on the mode of the kqueue and the bound QoS of the servicer,
5860 * determine the range of thread requests that need checking
5862 if (kq
->kq_state
& KQ_WORKQ
) {
5863 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
) {
5864 start_index
= KQWQ_QOS_MANAGER
;
5865 } else if (ut
->uu_kqueue_bound
!= kq
) {
5868 start_index
= ut
->uu_kqueue_qos_index
;
5871 /* manager services every request in a workq kqueue */
5872 assert(start_index
> 0 && start_index
<= KQWQ_QOS_MANAGER
);
5873 end_index
= (start_index
== KQWQ_QOS_MANAGER
) ? 0 : start_index
;
5875 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
5876 if (ut
->uu_kqueue_bound
!= kq
)
5880 * Single request servicing
5881 * we want to deliver all events, regardless of the QOS
5883 start_index
= end_index
= THREAD_QOS_UNSPECIFIED
;
5885 start_index
= end_index
= QOS_INDEX_KQFILE
;
5891 if (kqueue_begin_processing(kq
, i
, flags
) == -1) {
5893 /* Nothing to process */
5898 * loop through the enqueued knotes associated with this request,
5899 * processing each one. Each request may have several queues
5900 * of knotes to process (depending on the type of kqueue) so we
5901 * have to loop through all the queues as long as we have additional
5906 struct kqtailq
*base_queue
= kqueue_get_base_queue(kq
, i
);
5907 struct kqtailq
*queue
= kqueue_get_high_queue(kq
, i
);
5909 while (error
== 0 && (kn
= TAILQ_FIRST(queue
)) != NULL
) {
5910 error
= knote_process(kn
, callback
, callback_data
, process_data
, p
);
5911 if (error
== EJUSTRETURN
) {
5916 /* error is EWOULDBLOCK when the out event array is full */
5918 } while (error
== 0 && queue
-- > base_queue
);
5920 if ((kq
->kq_state
& KQ_WORKQ
) == 0) {
5921 kqueue_end_processing(kq
, i
, nevents
, flags
);
5924 if (error
== EWOULDBLOCK
) {
5925 /* break out if no more space for additional events */
5929 } while (i
-- > end_index
);
5936 kqueue_scan_continue(void *data
, wait_result_t wait_result
)
5938 thread_t self
= current_thread();
5939 uthread_t ut
= (uthread_t
)get_bsdthread_info(self
);
5940 struct _kqueue_scan
* cont_args
= &ut
->uu_kevent
.ss_kqueue_scan
;
5941 struct kqueue
*kq
= (struct kqueue
*)data
;
5942 struct filt_process_s
*process_data
= cont_args
->process_data
;
5946 /* convert the (previous) wait_result to a proper error */
5947 switch (wait_result
) {
5948 case THREAD_AWAKENED
: {
5951 error
= kqueue_process(kq
, cont_args
->call
, cont_args
->data
,
5952 process_data
, &count
, current_proc());
5953 if (error
== 0 && count
== 0) {
5954 if (kq
->kq_state
& KQ_DRAIN
) {
5959 if (kq
->kq_state
& KQ_WAKEUP
)
5962 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
5963 KQ_EVENT
, THREAD_ABORTSAFE
,
5964 cont_args
->deadline
);
5965 kq
->kq_state
|= KQ_SLEEP
;
5967 thread_block_parameter(kqueue_scan_continue
, kq
);
5972 case THREAD_TIMED_OUT
:
5973 error
= EWOULDBLOCK
;
5975 case THREAD_INTERRUPTED
:
5978 case THREAD_RESTART
:
5983 panic("%s: - invalid wait_result (%d)", __func__
,
5988 /* call the continuation with the results */
5989 assert(cont_args
->cont
!= NULL
);
5990 (cont_args
->cont
)(kq
, cont_args
->data
, error
);
5995 * kqueue_scan - scan and wait for events in a kqueue
5997 * Process the triggered events in a kqueue.
5999 * If there are no events triggered arrange to
6000 * wait for them. If the caller provided a
6001 * continuation routine, then kevent_scan will
6004 * The callback routine must be valid.
6005 * The caller must hold a use-count reference on the kq.
6009 kqueue_scan(struct kqueue
*kq
,
6010 kevent_callback_t callback
,
6011 kqueue_continue_t continuation
,
6012 void *callback_data
,
6013 struct filt_process_s
*process_data
,
6014 struct timeval
*atvp
,
6017 thread_continue_t cont
= THREAD_CONTINUE_NULL
;
6024 assert(callback
!= NULL
);
6027 * Determine which QoS index we are servicing
6029 flags
= (process_data
) ? process_data
->fp_flags
: 0;
6030 fd
= (process_data
) ? process_data
->fp_fd
: -1;
6034 wait_result_t wait_result
;
6038 * Make a pass through the kq to find events already
6042 error
= kqueue_process(kq
, callback
, callback_data
,
6043 process_data
, &count
, p
);
6045 break; /* lock still held */
6047 /* looks like we have to consider blocking */
6050 /* convert the timeout to a deadline once */
6051 if (atvp
->tv_sec
|| atvp
->tv_usec
) {
6054 clock_get_uptime(&now
);
6055 nanoseconds_to_absolutetime((uint64_t)atvp
->tv_sec
* NSEC_PER_SEC
+
6056 atvp
->tv_usec
* (long)NSEC_PER_USEC
,
6058 if (now
>= deadline
) {
6059 /* non-blocking call */
6060 error
= EWOULDBLOCK
;
6061 break; /* lock still held */
6064 clock_absolutetime_interval_to_deadline(deadline
, &deadline
);
6066 deadline
= 0; /* block forever */
6070 uthread_t ut
= (uthread_t
)get_bsdthread_info(current_thread());
6071 struct _kqueue_scan
*cont_args
= &ut
->uu_kevent
.ss_kqueue_scan
;
6073 cont_args
->call
= callback
;
6074 cont_args
->cont
= continuation
;
6075 cont_args
->deadline
= deadline
;
6076 cont_args
->data
= callback_data
;
6077 cont_args
->process_data
= process_data
;
6078 cont
= kqueue_scan_continue
;
6082 if (kq
->kq_state
& KQ_DRAIN
) {
6087 /* If awakened during processing, try again */
6088 if (kq
->kq_state
& KQ_WAKEUP
) {
6093 /* go ahead and wait */
6094 waitq_assert_wait64_leeway((struct waitq
*)&kq
->kq_wqs
,
6095 KQ_EVENT
, THREAD_ABORTSAFE
,
6096 TIMEOUT_URGENCY_USER_NORMAL
,
6097 deadline
, TIMEOUT_NO_LEEWAY
);
6098 kq
->kq_state
|= KQ_SLEEP
;
6100 wait_result
= thread_block_parameter(cont
, kq
);
6101 /* NOTREACHED if (continuation != NULL) */
6103 switch (wait_result
) {
6104 case THREAD_AWAKENED
:
6106 case THREAD_TIMED_OUT
:
6108 case THREAD_INTERRUPTED
:
6110 case THREAD_RESTART
:
6113 panic("%s: - bad wait_result (%d)", __func__
,
6125 * This could be expanded to call kqueue_scan, if desired.
6129 kqueue_read(__unused
struct fileproc
*fp
,
6130 __unused
struct uio
*uio
,
6132 __unused vfs_context_t ctx
)
6139 kqueue_write(__unused
struct fileproc
*fp
,
6140 __unused
struct uio
*uio
,
6142 __unused vfs_context_t ctx
)
6149 kqueue_ioctl(__unused
struct fileproc
*fp
,
6150 __unused u_long com
,
6151 __unused caddr_t data
,
6152 __unused vfs_context_t ctx
)
6159 kqueue_select(struct fileproc
*fp
, int which
, void *wq_link_id
,
6160 __unused vfs_context_t ctx
)
6162 struct kqueue
*kq
= (struct kqueue
*)fp
->f_data
;
6163 struct kqtailq
*queue
;
6164 struct kqtailq
*suppressq
;
6173 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
6176 * If this is the first pass, link the wait queue associated with the
6177 * the kqueue onto the wait queue set for the select(). Normally we
6178 * use selrecord() for this, but it uses the wait queue within the
6179 * selinfo structure and we need to use the main one for the kqueue to
6180 * catch events from KN_STAYQUEUED sources. So we do the linkage manually.
6181 * (The select() call will unlink them when it ends).
6183 if (wq_link_id
!= NULL
) {
6184 thread_t cur_act
= current_thread();
6185 struct uthread
* ut
= get_bsdthread_info(cur_act
);
6187 kq
->kq_state
|= KQ_SEL
;
6188 waitq_link((struct waitq
*)&kq
->kq_wqs
, ut
->uu_wqset
,
6189 WAITQ_SHOULD_LOCK
, (uint64_t *)wq_link_id
);
6191 /* always consume the reserved link object */
6192 waitq_link_release(*(uint64_t *)wq_link_id
);
6193 *(uint64_t *)wq_link_id
= 0;
6196 * selprocess() is expecting that we send it back the waitq
6197 * that was just added to the thread's waitq set. In order
6198 * to not change the selrecord() API (which is exported to
6199 * kexts), we pass this value back through the
6200 * void *wq_link_id pointer we were passed. We need to use
6201 * memcpy here because the pointer may not be properly aligned
6202 * on 32-bit systems.
6204 void *wqptr
= &kq
->kq_wqs
;
6205 memcpy(wq_link_id
, (void *)&wqptr
, sizeof(void *));
6208 if (kqueue_begin_processing(kq
, QOS_INDEX_KQFILE
, 0) == -1) {
6213 queue
= kqueue_get_base_queue(kq
, QOS_INDEX_KQFILE
);
6214 if (!TAILQ_EMPTY(queue
)) {
6216 * there is something queued - but it might be a
6217 * KN_STAYACTIVE knote, which may or may not have
6218 * any events pending. Otherwise, we have to walk
6219 * the list of knotes to see, and peek at the
6220 * (non-vanished) stay-active ones to be really sure.
6222 while ((kn
= (struct knote
*)TAILQ_FIRST(queue
)) != NULL
) {
6223 if (kn
->kn_status
& KN_ACTIVE
) {
6227 assert(kn
->kn_status
& KN_STAYACTIVE
);
6232 * There were no regular events on the queue, so take
6233 * a deeper look at the stay-queued ones we suppressed.
6235 suppressq
= kqueue_get_suppressed_queue(kq
, QOS_INDEX_KQFILE
);
6236 while ((kn
= (struct knote
*)TAILQ_FIRST(suppressq
)) != NULL
) {
6239 assert(!knoteuse_needs_boost(kn
, NULL
));
6241 /* If didn't vanish while suppressed - peek at it */
6242 if (kqlock2knoteuse(kq
, kn
, KNUSE_NONE
)) {
6243 peek
= knote_fops(kn
)->f_peek(kn
);
6245 /* if it dropped while getting lock - move on */
6246 if (!knoteuse2kqlock(kq
, kn
, KNUSE_NONE
))
6251 knote_unsuppress(kn
);
6253 /* has data or it has to report a vanish */
6262 kqueue_end_processing(kq
, QOS_INDEX_KQFILE
, retnum
, 0);
6272 kqueue_close(struct fileglob
*fg
, __unused vfs_context_t ctx
)
6274 struct kqfile
*kqf
= (struct kqfile
*)fg
->fg_data
;
6276 assert((kqf
->kqf_state
& KQ_WORKQ
) == 0);
6277 kqueue_dealloc(&kqf
->kqf_kqueue
);
6284 * The callers has taken a use-count reference on this kqueue and will donate it
6285 * to the kqueue we are being added to. This keeps the kqueue from closing until
6286 * that relationship is torn down.
6289 kqueue_kqfilter(__unused
struct fileproc
*fp
, struct knote
*kn
,
6290 __unused
struct kevent_internal_s
*kev
, __unused vfs_context_t ctx
)
6292 struct kqfile
*kqf
= (struct kqfile
*)kn
->kn_fp
->f_data
;
6293 struct kqueue
*kq
= &kqf
->kqf_kqueue
;
6294 struct kqueue
*parentkq
= knote_get_kq(kn
);
6296 assert((kqf
->kqf_state
& KQ_WORKQ
) == 0);
6298 if (parentkq
== kq
||
6299 kn
->kn_filter
!= EVFILT_READ
) {
6300 kn
->kn_flags
= EV_ERROR
;
6301 kn
->kn_data
= EINVAL
;
6306 * We have to avoid creating a cycle when nesting kqueues
6307 * inside another. Rather than trying to walk the whole
6308 * potential DAG of nested kqueues, we just use a simple
6309 * ceiling protocol. When a kqueue is inserted into another,
6310 * we check that the (future) parent is not already nested
6311 * into another kqueue at a lower level than the potenial
6312 * child (because it could indicate a cycle). If that test
6313 * passes, we just mark the nesting levels accordingly.
6317 if (parentkq
->kq_level
> 0 &&
6318 parentkq
->kq_level
< kq
->kq_level
)
6321 kn
->kn_flags
= EV_ERROR
;
6322 kn
->kn_data
= EINVAL
;
6325 /* set parent level appropriately */
6326 if (parentkq
->kq_level
== 0)
6327 parentkq
->kq_level
= 2;
6328 if (parentkq
->kq_level
< kq
->kq_level
+ 1)
6329 parentkq
->kq_level
= kq
->kq_level
+ 1;
6332 kn
->kn_filtid
= EVFILTID_KQREAD
;
6334 KNOTE_ATTACH(&kqf
->kqf_sel
.si_note
, kn
);
6335 /* indicate nesting in child, if needed */
6336 if (kq
->kq_level
== 0)
6339 int count
= kq
->kq_count
;
6346 * kqueue_drain - called when kq is closed
6350 kqueue_drain(struct fileproc
*fp
, __unused vfs_context_t ctx
)
6352 struct kqueue
*kq
= (struct kqueue
*)fp
->f_fglob
->fg_data
;
6354 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
6357 kq
->kq_state
|= KQ_DRAIN
;
6358 kqueue_interrupt(kq
);
6365 kqueue_stat(struct kqueue
*kq
, void *ub
, int isstat64
, proc_t p
)
6367 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
6370 if (isstat64
!= 0) {
6371 struct stat64
*sb64
= (struct stat64
*)ub
;
6373 bzero((void *)sb64
, sizeof(*sb64
));
6374 sb64
->st_size
= kq
->kq_count
;
6375 if (kq
->kq_state
& KQ_KEV_QOS
)
6376 sb64
->st_blksize
= sizeof(struct kevent_qos_s
);
6377 else if (kq
->kq_state
& KQ_KEV64
)
6378 sb64
->st_blksize
= sizeof(struct kevent64_s
);
6379 else if (IS_64BIT_PROCESS(p
))
6380 sb64
->st_blksize
= sizeof(struct user64_kevent
);
6382 sb64
->st_blksize
= sizeof(struct user32_kevent
);
6383 sb64
->st_mode
= S_IFIFO
;
6385 struct stat
*sb
= (struct stat
*)ub
;
6387 bzero((void *)sb
, sizeof(*sb
));
6388 sb
->st_size
= kq
->kq_count
;
6389 if (kq
->kq_state
& KQ_KEV_QOS
)
6390 sb
->st_blksize
= sizeof(struct kevent_qos_s
);
6391 else if (kq
->kq_state
& KQ_KEV64
)
6392 sb
->st_blksize
= sizeof(struct kevent64_s
);
6393 else if (IS_64BIT_PROCESS(p
))
6394 sb
->st_blksize
= sizeof(struct user64_kevent
);
6396 sb
->st_blksize
= sizeof(struct user32_kevent
);
6397 sb
->st_mode
= S_IFIFO
;
6404 * Interact with the pthread kext to request a servicing there.
6405 * Eventually, this will request threads at specific QoS levels.
6406 * For now, it only requests a dispatch-manager-QoS thread, and
6407 * only one-at-a-time.
6409 * - Caller holds the workq request lock
6411 * - May be called with the kqueue's wait queue set locked,
6412 * so cannot do anything that could recurse on that.
6415 kqworkq_request_thread(
6416 struct kqworkq
*kqwq
,
6417 kq_index_t qos_index
)
6419 struct kqrequest
*kqr
;
6421 assert(kqwq
->kqwq_state
& KQ_WORKQ
);
6422 assert(qos_index
< KQWQ_NQOS
);
6424 kqr
= kqworkq_get_request(kqwq
, qos_index
);
6426 assert(kqr
->kqr_state
& KQR_WAKEUP
);
6429 * If we have already requested a thread, and it hasn't
6430 * started processing yet, there's no use hammering away
6431 * on the pthread kext.
6433 if (kqr
->kqr_state
& KQR_THREQUESTED
)
6436 assert((kqr
->kqr_state
& KQR_BOUND
) == 0);
6438 /* request additional workq threads if appropriate */
6439 if (pthread_functions
!= NULL
&&
6440 pthread_functions
->workq_reqthreads
!= NULL
) {
6441 unsigned int flags
= KEVENT_FLAG_WORKQ
;
6442 unsigned long priority
;
6445 /* Compute the appropriate pthread priority */
6446 priority
= qos_from_qos_index(qos_index
);
6449 /* JMM - for now remain compatible with old invocations */
6450 /* set the over-commit flag on the request if needed */
6451 if (kqr
->kqr_state
& KQR_THOVERCOMMIT
)
6452 priority
|= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
;
6455 /* Compute a priority based on qos_index. */
6456 struct workq_reqthreads_req_s request
= {
6457 .priority
= priority
,
6461 /* mark that we are making a request */
6462 kqr
->kqr_state
|= KQR_THREQUESTED
;
6463 if (qos_index
== KQWQ_QOS_MANAGER
)
6464 kqr
->kqr_state
|= KQWQ_THMANAGER
;
6466 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_THREQUEST
),
6468 (((uintptr_t)kqr
->kqr_override_index
<< 8) |
6469 (uintptr_t)kqr
->kqr_state
));
6470 wqthread
= (*pthread_functions
->workq_reqthreads
)(kqwq
->kqwq_p
, 1, &request
);
6472 /* We've been switched to the emergency/manager thread */
6473 if (wqthread
== (thread_t
)-1) {
6474 assert(qos_index
!= KQWQ_QOS_MANAGER
);
6475 kqr
->kqr_state
|= KQWQ_THMANAGER
;
6480 * bind the returned thread identity
6481 * This goes away when we switch to synchronous callback
6482 * binding from the pthread kext.
6484 if (wqthread
!= NULL
) {
6485 kqworkq_bind_thread_impl(kqwq
, qos_index
, wqthread
, flags
);
6491 * If we aren't already busy processing events [for this QoS],
6492 * request workq thread support as appropriate.
6494 * TBD - for now, we don't segregate out processing by QoS.
6496 * - May be called with the kqueue's wait queue set locked,
6497 * so cannot do anything that could recurse on that.
6500 kqworkq_request_help(
6501 struct kqworkq
*kqwq
,
6502 kq_index_t qos_index
)
6504 struct kqrequest
*kqr
;
6506 /* convert to thread qos value */
6507 assert(qos_index
< KQWQ_NQOS
);
6509 kqwq_req_lock(kqwq
);
6510 kqr
= kqworkq_get_request(kqwq
, qos_index
);
6512 if ((kqr
->kqr_state
& KQR_WAKEUP
) == 0) {
6513 /* Indicate that we needed help from this request */
6514 kqr
->kqr_state
|= KQR_WAKEUP
;
6516 /* Go assure a thread request has been made */
6517 kqworkq_request_thread(kqwq
, qos_index
);
6519 kqwq_req_unlock(kqwq
);
6523 kqworkloop_threadreq_impl(struct kqworkloop
*kqwl
, kq_index_t qos_index
)
6525 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6526 unsigned long pri
= pthread_priority_for_kqrequest(kqr
, qos_index
);
6529 assert((kqr
->kqr_state
& (KQR_THREQUESTED
| KQR_BOUND
)) == KQR_THREQUESTED
);
6532 * New-style thread request supported. Provide
6533 * the pthread kext a pointer to a workq_threadreq_s
6534 * structure for its use until a corresponding
6535 * workloop_fulfill_threqreq callback.
6537 if (current_proc() == kqwl
->kqwl_kqueue
.kq_p
) {
6538 op
= WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
;
6540 op
= WORKQ_THREADREQ_WORKLOOP
;
6543 ret
= (*pthread_functions
->workq_threadreq
)(kqwl
->kqwl_p
, &kqr
->kqr_req
,
6544 WORKQ_THREADREQ_WORKLOOP
, pri
, 0);
6547 assert(op
== WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
);
6548 op
= WORKQ_THREADREQ_WORKLOOP
;
6554 * Process is shutting down or exec'ing.
6555 * All the kqueues are going to be cleaned up
6556 * soon. Forget we even asked for a thread -
6557 * and make sure we don't ask for more.
6559 kqueue_release((struct kqueue
*)kqwl
, KQUEUE_CANT_BE_LAST_REF
);
6560 kqr
->kqr_state
&= ~KQR_THREQUESTED
;
6561 kqr
->kqr_state
|= KQR_DRAIN
;
6565 assert(op
== WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
);
6566 act_set_astkevent(current_thread(), AST_KEVENT_REDRIVE_THREADREQ
);
6575 kqworkloop_threadreq_modify(struct kqworkloop
*kqwl
, kq_index_t qos_index
)
6577 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6578 unsigned long pri
= pthread_priority_for_kqrequest(kqr
, qos_index
);
6579 int ret
, op
= WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
;
6581 assert((kqr
->kqr_state
& (KQR_THREQUESTED
| KQR_BOUND
)) == KQR_THREQUESTED
);
6583 if (current_proc() == kqwl
->kqwl_kqueue
.kq_p
) {
6584 op
= WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
;
6586 op
= WORKQ_THREADREQ_CHANGE_PRI
;
6589 ret
= (*pthread_functions
->workq_threadreq_modify
)(kqwl
->kqwl_p
,
6590 &kqr
->kqr_req
, op
, pri
, 0);
6593 assert(op
== WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
);
6594 op
= WORKQ_THREADREQ_CHANGE_PRI
;
6598 assert(op
== WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
);
6599 act_set_astkevent(current_thread(), AST_KEVENT_REDRIVE_THREADREQ
);
6613 * Interact with the pthread kext to request a servicing thread.
6614 * This will request a single thread at the highest QoS level
6615 * for which there is work (whether that was the requested QoS
6616 * for an event or an override applied to a lower-QoS request).
6618 * - Caller holds the workloop request lock
6620 * - May be called with the kqueue's wait queue set locked,
6621 * so cannot do anything that could recurse on that.
6624 kqworkloop_request_thread(struct kqworkloop
*kqwl
, kq_index_t qos_index
)
6626 struct kqrequest
*kqr
;
6628 assert(kqwl
->kqwl_state
& KQ_WORKLOOP
);
6630 kqr
= &kqwl
->kqwl_request
;
6632 assert(kqwl
->kqwl_owner
== THREAD_NULL
);
6633 assert((kqr
->kqr_state
& KQR_BOUND
) == 0);
6634 assert((kqr
->kqr_state
& KQR_THREQUESTED
) == 0);
6635 assert(!(kqwl
->kqwl_kqueue
.kq_state
& KQ_NO_WQ_THREAD
));
6637 /* If we're draining thread requests, just bail */
6638 if (kqr
->kqr_state
& KQR_DRAIN
)
6641 if (pthread_functions
!= NULL
&&
6642 pthread_functions
->workq_threadreq
!= NULL
) {
6644 * set request state flags, etc... before calling pthread
6645 * This assures they are set before a possible synchronous
6646 * callback to workloop_fulfill_threadreq().
6648 kqr
->kqr_state
|= KQR_THREQUESTED
;
6650 /* Add a thread request reference on the kqueue. */
6651 kqueue_retain((struct kqueue
*)kqwl
);
6653 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THREQUEST
),
6654 kqwl
->kqwl_dynamicid
,
6655 0, qos_index
, kqr
->kqr_state
);
6656 kqworkloop_threadreq_impl(kqwl
, qos_index
);
6658 panic("kqworkloop_request_thread");
6664 kqworkloop_update_sync_override_state(struct kqworkloop
*kqwl
, boolean_t sync_ipc_override
)
6666 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6667 kqwl_req_lock(kqwl
);
6668 kqr
->kqr_has_sync_override
= sync_ipc_override
;
6669 kqwl_req_unlock(kqwl
);
6673 static inline kq_index_t
6674 kqworkloop_combined_qos(struct kqworkloop
*kqwl
, boolean_t
*ipc_override_is_sync
)
6676 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6677 kq_index_t override
;
6679 *ipc_override_is_sync
= FALSE
;
6680 override
= MAX(MAX(kqr
->kqr_qos_index
, kqr
->kqr_override_index
),
6681 kqr
->kqr_dsync_waiters_qos
);
6683 if (kqr
->kqr_sync_suppress_count
> 0 || kqr
->kqr_has_sync_override
) {
6684 *ipc_override_is_sync
= TRUE
;
6685 override
= THREAD_QOS_USER_INTERACTIVE
;
6691 kqworkloop_request_fire_r2k_notification(struct kqworkloop
*kqwl
)
6693 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6695 kqwl_req_held(kqwl
);
6697 if (kqr
->kqr_state
& KQR_R2K_NOTIF_ARMED
) {
6698 assert(kqr
->kqr_state
& KQR_BOUND
);
6699 assert(kqr
->kqr_thread
);
6701 kqr
->kqr_state
&= ~KQR_R2K_NOTIF_ARMED
;
6702 act_set_astkevent(kqr
->kqr_thread
, AST_KEVENT_RETURN_TO_KERNEL
);
6707 kqworkloop_update_threads_qos(struct kqworkloop
*kqwl
, int op
, kq_index_t qos
)
6709 const uint8_t KQWL_STAYACTIVE_FIRED_BIT
= (1 << 0);
6711 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6712 boolean_t old_ipc_override_is_sync
= FALSE
;
6713 kq_index_t old_qos
= kqworkloop_combined_qos(kqwl
, &old_ipc_override_is_sync
);
6714 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
6715 bool static_thread
= (kq
->kq_state
& KQ_NO_WQ_THREAD
);
6718 /* must hold the kqr lock */
6719 kqwl_req_held(kqwl
);
6722 case KQWL_UTQ_UPDATE_WAKEUP_QOS
:
6723 if (qos
== KQWL_BUCKET_STAYACTIVE
) {
6725 * the KQWL_BUCKET_STAYACTIVE is not a QoS bucket, we only remember
6726 * a high watermark (kqr_stayactive_qos) of any stay active knote
6727 * that was ever registered with this workloop.
6729 * When waitq_set__CALLING_PREPOST_HOOK__() wakes up any stay active
6730 * knote, we use this high-watermark as a wakeup-index, and also set
6731 * the magic KQWL_BUCKET_STAYACTIVE bit to make sure we remember
6732 * there is at least one stay active knote fired until the next full
6733 * processing of this bucket.
6735 kqr
->kqr_wakeup_indexes
|= KQWL_STAYACTIVE_FIRED_BIT
;
6736 qos
= kqr
->kqr_stayactive_qos
;
6738 assert(!static_thread
);
6740 if (kqr
->kqr_wakeup_indexes
& (1 << qos
)) {
6741 assert(kqr
->kqr_state
& KQR_WAKEUP
);
6745 kqr
->kqr_wakeup_indexes
|= (1 << qos
);
6746 kqr
->kqr_state
|= KQR_WAKEUP
;
6747 kqworkloop_request_fire_r2k_notification(kqwl
);
6748 goto recompute_async
;
6750 case KQWL_UTQ_UPDATE_STAYACTIVE_QOS
:
6752 if (kqr
->kqr_stayactive_qos
< qos
) {
6753 kqr
->kqr_stayactive_qos
= qos
;
6754 if (kqr
->kqr_wakeup_indexes
& KQWL_STAYACTIVE_FIRED_BIT
) {
6755 assert(kqr
->kqr_state
& KQR_WAKEUP
);
6756 kqr
->kqr_wakeup_indexes
|= (1 << qos
);
6757 goto recompute_async
;
6762 case KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
:
6763 kqlock_held(kq
); // to look at kq_queues
6764 kqr
->kqr_has_sync_override
= FALSE
;
6765 i
= KQWL_BUCKET_STAYACTIVE
;
6766 if (TAILQ_EMPTY(&kqr
->kqr_suppressed
)) {
6767 kqr
->kqr_override_index
= THREAD_QOS_UNSPECIFIED
;
6769 if (!TAILQ_EMPTY(&kq
->kq_queue
[i
]) &&
6770 (kqr
->kqr_wakeup_indexes
& KQWL_STAYACTIVE_FIRED_BIT
)) {
6772 * If the KQWL_STAYACTIVE_FIRED_BIT is set, it means a stay active
6773 * knote may have fired, so we need to merge in kqr_stayactive_qos.
6775 * Unlike other buckets, this one is never empty but could be idle.
6777 kqr
->kqr_wakeup_indexes
&= KQWL_STAYACTIVE_FIRED_BIT
;
6778 kqr
->kqr_wakeup_indexes
|= (1 << kqr
->kqr_stayactive_qos
);
6780 kqr
->kqr_wakeup_indexes
= 0;
6782 for (i
= THREAD_QOS_UNSPECIFIED
+ 1; i
< KQWL_BUCKET_STAYACTIVE
; i
++) {
6783 if (!TAILQ_EMPTY(&kq
->kq_queue
[i
])) {
6784 kqr
->kqr_wakeup_indexes
|= (1 << i
);
6785 struct knote
*kn
= TAILQ_FIRST(&kqwl
->kqwl_kqueue
.kq_queue
[i
]);
6786 if (i
== THREAD_QOS_USER_INTERACTIVE
&&
6787 kn
->kn_qos_override_is_sync
) {
6788 kqr
->kqr_has_sync_override
= TRUE
;
6792 if (kqr
->kqr_wakeup_indexes
) {
6793 kqr
->kqr_state
|= KQR_WAKEUP
;
6794 kqworkloop_request_fire_r2k_notification(kqwl
);
6796 kqr
->kqr_state
&= ~KQR_WAKEUP
;
6798 assert(qos
== THREAD_QOS_UNSPECIFIED
);
6799 goto recompute_async
;
6801 case KQWL_UTQ_RESET_WAKEUP_OVERRIDE
:
6802 kqr
->kqr_override_index
= THREAD_QOS_UNSPECIFIED
;
6803 assert(qos
== THREAD_QOS_UNSPECIFIED
);
6804 goto recompute_async
;
6806 case KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
:
6809 * When modifying the wakeup QoS or the async override QoS, we always
6810 * need to maintain our invariant that kqr_override_index is at least as
6811 * large as the highest QoS for which an event is fired.
6813 * However this override index can be larger when there is an overriden
6814 * suppressed knote pushing on the kqueue.
6816 if (kqr
->kqr_wakeup_indexes
> (1 << qos
)) {
6817 qos
= fls(kqr
->kqr_wakeup_indexes
) - 1; /* fls is 1-based */
6819 if (kqr
->kqr_override_index
< qos
) {
6820 kqr
->kqr_override_index
= qos
;
6824 case KQWL_UTQ_REDRIVE_EVENTS
:
6827 case KQWL_UTQ_SET_ASYNC_QOS
:
6829 kqr
->kqr_qos_index
= qos
;
6832 case KQWL_UTQ_SET_SYNC_WAITERS_QOS
:
6834 kqr
->kqr_dsync_waiters_qos
= qos
;
6838 panic("unknown kqwl thread qos update operation: %d", op
);
6841 boolean_t new_ipc_override_is_sync
= FALSE
;
6842 kq_index_t new_qos
= kqworkloop_combined_qos(kqwl
, &new_ipc_override_is_sync
);
6843 thread_t kqwl_owner
= kqwl
->kqwl_owner
;
6844 thread_t servicer
= kqr
->kqr_thread
;
6845 __assert_only
int ret
;
6848 * Apply the diffs to the owner if applicable
6850 if (filt_wlowner_is_valid(kqwl_owner
)) {
6852 /* JMM - need new trace hooks for owner overrides */
6853 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST
),
6854 kqwl
->kqwl_dynamicid
,
6855 (kqr
->kqr_state
& KQR_BOUND
) ? thread_tid(kqwl_owner
) : 0,
6856 (kqr
->kqr_qos_index
<< 8) | new_qos
,
6857 (kqr
->kqr_override_index
<< 8) | kqr
->kqr_state
);
6859 if (new_qos
== kqr
->kqr_dsync_owner_qos
) {
6861 } else if (kqr
->kqr_dsync_owner_qos
== THREAD_QOS_UNSPECIFIED
) {
6862 thread_add_ipc_override(kqwl_owner
, new_qos
);
6863 } else if (new_qos
== THREAD_QOS_UNSPECIFIED
) {
6864 thread_drop_ipc_override(kqwl_owner
);
6865 } else /* kqr->kqr_dsync_owner_qos != new_qos */ {
6866 thread_update_ipc_override(kqwl_owner
, new_qos
);
6868 kqr
->kqr_dsync_owner_qos
= new_qos
;
6870 if (new_ipc_override_is_sync
&&
6871 !kqr
->kqr_owner_override_is_sync
) {
6872 thread_add_sync_ipc_override(kqwl_owner
);
6873 } else if (!new_ipc_override_is_sync
&&
6874 kqr
->kqr_owner_override_is_sync
) {
6875 thread_drop_sync_ipc_override(kqwl_owner
);
6877 kqr
->kqr_owner_override_is_sync
= new_ipc_override_is_sync
;
6881 * apply the diffs to the servicer
6883 if (static_thread
) {
6885 * Statically bound thread
6887 * These threads don't participates in QoS overrides today, just wakeup
6888 * the thread blocked on this kqueue if a new event arrived.
6892 case KQWL_UTQ_UPDATE_WAKEUP_QOS
:
6893 case KQWL_UTQ_UPDATE_STAYACTIVE_QOS
:
6894 case KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
:
6897 case KQWL_UTQ_RESET_WAKEUP_OVERRIDE
:
6898 case KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
:
6899 case KQWL_UTQ_REDRIVE_EVENTS
:
6900 case KQWL_UTQ_SET_ASYNC_QOS
:
6901 case KQWL_UTQ_SET_SYNC_WAITERS_QOS
:
6902 panic("should never be called");
6908 if ((kqr
->kqr_state
& KQR_BOUND
) && (kqr
->kqr_state
& KQR_WAKEUP
)) {
6909 assert(servicer
&& !is_workqueue_thread(servicer
));
6910 if (kq
->kq_state
& (KQ_SLEEP
| KQ_SEL
)) {
6911 kq
->kq_state
&= ~(KQ_SLEEP
| KQ_SEL
);
6912 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
, KQ_EVENT
,
6913 THREAD_AWAKENED
, WAITQ_ALL_PRIORITIES
);
6916 } else if ((kqr
->kqr_state
& KQR_THREQUESTED
) == 0) {
6918 * No servicer, nor thread-request
6920 * Make a new thread request, unless there is an owner (or the workloop
6921 * is suspended in userland) or if there is no asynchronous work in the
6925 if (kqwl_owner
== THREAD_NULL
&& (kqr
->kqr_state
& KQR_WAKEUP
)) {
6926 kqworkloop_request_thread(kqwl
, new_qos
);
6928 } else if ((kqr
->kqr_state
& KQR_BOUND
) == 0 &&
6929 (kqwl_owner
|| (kqr
->kqr_state
& KQR_WAKEUP
) == 0)) {
6931 * No servicer, thread request in flight we want to cancel
6933 * We just got rid of the last knote of the kqueue or noticed an owner
6934 * with a thread request still in flight, take it back.
6936 ret
= (*pthread_functions
->workq_threadreq_modify
)(kqwl
->kqwl_p
,
6937 &kqr
->kqr_req
, WORKQ_THREADREQ_CANCEL
, 0, 0);
6939 kqr
->kqr_state
&= ~KQR_THREQUESTED
;
6940 kqueue_release(kq
, KQUEUE_CANT_BE_LAST_REF
);
6943 boolean_t qos_changed
= FALSE
;
6946 * Servicer or request is in flight
6948 * Just apply the diff to the servicer or the thread request
6950 if (kqr
->kqr_state
& KQR_BOUND
) {
6951 servicer
= kqr
->kqr_thread
;
6952 struct uthread
*ut
= get_bsdthread_info(servicer
);
6953 if (ut
->uu_kqueue_qos_index
!= new_qos
) {
6954 if (ut
->uu_kqueue_qos_index
== THREAD_QOS_UNSPECIFIED
) {
6955 thread_add_ipc_override(servicer
, new_qos
);
6956 } else if (new_qos
== THREAD_QOS_UNSPECIFIED
) {
6957 thread_drop_ipc_override(servicer
);
6958 } else /* ut->uu_kqueue_qos_index != new_qos */ {
6959 thread_update_ipc_override(servicer
, new_qos
);
6961 ut
->uu_kqueue_qos_index
= new_qos
;
6965 if (new_ipc_override_is_sync
!= ut
->uu_kqueue_override_is_sync
) {
6966 if (new_ipc_override_is_sync
&&
6967 !ut
->uu_kqueue_override_is_sync
) {
6968 thread_add_sync_ipc_override(servicer
);
6969 } else if (!new_ipc_override_is_sync
&&
6970 ut
->uu_kqueue_override_is_sync
) {
6971 thread_drop_sync_ipc_override(servicer
);
6973 ut
->uu_kqueue_override_is_sync
= new_ipc_override_is_sync
;
6976 } else if (old_qos
!= new_qos
) {
6978 kqworkloop_threadreq_modify(kqwl
, new_qos
);
6982 servicer
= kqr
->kqr_thread
;
6983 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST
),
6984 kqwl
->kqwl_dynamicid
,
6985 (kqr
->kqr_state
& KQR_BOUND
) ? thread_tid(servicer
) : 0,
6986 (kqr
->kqr_qos_index
<< 16) | (new_qos
<< 8) | new_ipc_override_is_sync
,
6987 (kqr
->kqr_override_index
<< 8) | kqr
->kqr_state
);
6993 kqworkloop_request_help(struct kqworkloop
*kqwl
, kq_index_t qos_index
)
6995 /* convert to thread qos value */
6996 assert(qos_index
< KQWL_NBUCKETS
);
6998 kqwl_req_lock(kqwl
);
6999 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_UPDATE_WAKEUP_QOS
, qos_index
);
7000 kqwl_req_unlock(kqwl
);
7004 * These arrays described the low and high qindexes for a given qos_index.
7005 * The values come from the chart in <sys/eventvar.h> (must stay in sync).
7007 static kq_index_t _kqwq_base_index
[KQWQ_NQOS
] = {0, 0, 6, 11, 15, 18, 20, 21};
7008 static kq_index_t _kqwq_high_index
[KQWQ_NQOS
] = {0, 5, 10, 14, 17, 19, 20, 21};
7010 static struct kqtailq
*
7011 kqueue_get_base_queue(struct kqueue
*kq
, kq_index_t qos_index
)
7013 if (kq
->kq_state
& KQ_WORKQ
) {
7014 assert(qos_index
< KQWQ_NQOS
);
7015 return &kq
->kq_queue
[_kqwq_base_index
[qos_index
]];
7016 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7017 assert(qos_index
< KQWL_NBUCKETS
);
7018 return &kq
->kq_queue
[qos_index
];
7020 assert(qos_index
== QOS_INDEX_KQFILE
);
7021 return &kq
->kq_queue
[QOS_INDEX_KQFILE
];
7025 static struct kqtailq
*
7026 kqueue_get_high_queue(struct kqueue
*kq
, kq_index_t qos_index
)
7028 if (kq
->kq_state
& KQ_WORKQ
) {
7029 assert(qos_index
< KQWQ_NQOS
);
7030 return &kq
->kq_queue
[_kqwq_high_index
[qos_index
]];
7031 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7032 assert(qos_index
< KQWL_NBUCKETS
);
7033 return &kq
->kq_queue
[KQWL_BUCKET_STAYACTIVE
];
7035 assert(qos_index
== QOS_INDEX_KQFILE
);
7036 return &kq
->kq_queue
[QOS_INDEX_KQFILE
];
7041 kqueue_queue_empty(struct kqueue
*kq
, kq_index_t qos_index
)
7043 struct kqtailq
*base_queue
= kqueue_get_base_queue(kq
, qos_index
);
7044 struct kqtailq
*queue
= kqueue_get_high_queue(kq
, qos_index
);
7047 if (!TAILQ_EMPTY(queue
))
7049 } while (queue
-- > base_queue
);
7053 static struct kqtailq
*
7054 kqueue_get_suppressed_queue(struct kqueue
*kq
, kq_index_t qos_index
)
7056 struct kqtailq
*res
;
7057 struct kqrequest
*kqr
;
7059 if (kq
->kq_state
& KQ_WORKQ
) {
7060 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
7062 kqr
= kqworkq_get_request(kqwq
, qos_index
);
7063 res
= &kqr
->kqr_suppressed
;
7064 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7065 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7067 kqr
= &kqwl
->kqwl_request
;
7068 res
= &kqr
->kqr_suppressed
;
7070 struct kqfile
*kqf
= (struct kqfile
*)kq
;
7071 res
= &kqf
->kqf_suppressed
;
7077 knote_get_queue_index(struct knote
*kn
)
7079 kq_index_t override_index
= knote_get_qos_override_index(kn
);
7080 kq_index_t qos_index
= knote_get_qos_index(kn
);
7081 struct kqueue
*kq
= knote_get_kq(kn
);
7084 if (kq
->kq_state
& KQ_WORKQ
) {
7085 res
= _kqwq_base_index
[qos_index
];
7086 if (override_index
> qos_index
)
7087 res
+= override_index
- qos_index
;
7088 assert(res
<= _kqwq_high_index
[qos_index
]);
7089 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7090 res
= MAX(override_index
, qos_index
);
7091 assert(res
< KQWL_NBUCKETS
);
7093 assert(qos_index
== QOS_INDEX_KQFILE
);
7094 assert(override_index
== QOS_INDEX_KQFILE
);
7095 res
= QOS_INDEX_KQFILE
;
7100 static struct kqtailq
*
7101 knote_get_queue(struct knote
*kn
)
7103 kq_index_t qindex
= knote_get_queue_index(kn
);
7105 return &(knote_get_kq(kn
))->kq_queue
[qindex
];
7109 knote_get_req_index(struct knote
*kn
)
7111 return kn
->kn_req_index
;
7115 knote_get_qos_index(struct knote
*kn
)
7117 return kn
->kn_qos_index
;
7121 knote_set_qos_index(struct knote
*kn
, kq_index_t qos_index
)
7123 struct kqueue
*kq
= knote_get_kq(kn
);
7125 assert(qos_index
< KQWQ_NQOS
);
7126 assert((kn
->kn_status
& KN_QUEUED
) == 0);
7128 if (kq
->kq_state
& KQ_WORKQ
) {
7129 assert(qos_index
> THREAD_QOS_UNSPECIFIED
);
7130 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7131 /* XXX this policy decision shouldn't be here */
7132 if (qos_index
== THREAD_QOS_UNSPECIFIED
)
7133 qos_index
= THREAD_QOS_LEGACY
;
7135 qos_index
= QOS_INDEX_KQFILE
;
7137 /* always set requested */
7138 kn
->kn_req_index
= qos_index
;
7140 /* only adjust in-use qos index when not suppressed */
7141 if ((kn
->kn_status
& KN_SUPPRESSED
) == 0)
7142 kn
->kn_qos_index
= qos_index
;
7146 knote_set_qos_overcommit(struct knote
*kn
)
7148 struct kqueue
*kq
= knote_get_kq(kn
);
7149 struct kqrequest
*kqr
;
7151 /* turn overcommit on for the appropriate thread request? */
7152 if (kn
->kn_qos
& _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
) {
7153 if (kq
->kq_state
& KQ_WORKQ
) {
7154 kq_index_t qos_index
= knote_get_qos_index(kn
);
7155 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
7157 kqr
= kqworkq_get_request(kqwq
, qos_index
);
7159 kqwq_req_lock(kqwq
);
7160 kqr
->kqr_state
|= KQR_THOVERCOMMIT
;
7161 kqwq_req_unlock(kqwq
);
7162 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7163 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7165 kqr
= &kqwl
->kqwl_request
;
7167 kqwl_req_lock(kqwl
);
7168 kqr
->kqr_state
|= KQR_THOVERCOMMIT
;
7169 kqwl_req_unlock(kqwl
);
7175 knote_get_qos_override_index(struct knote
*kn
)
7177 return kn
->kn_qos_override
;
7181 knote_set_qos_override_index(struct knote
*kn
, kq_index_t override_index
,
7182 boolean_t override_is_sync
)
7184 struct kqueue
*kq
= knote_get_kq(kn
);
7185 kq_index_t qos_index
= knote_get_qos_index(kn
);
7186 kq_index_t old_override_index
= knote_get_qos_override_index(kn
);
7187 boolean_t old_override_is_sync
= kn
->kn_qos_override_is_sync
;
7190 assert((kn
->kn_status
& KN_QUEUED
) == 0);
7192 if (override_index
== KQWQ_QOS_MANAGER
) {
7193 assert(qos_index
== KQWQ_QOS_MANAGER
);
7195 assert(override_index
< KQWQ_QOS_MANAGER
);
7198 kn
->kn_qos_override
= override_index
;
7199 kn
->kn_qos_override_is_sync
= override_is_sync
;
7202 * If this is a workq/workloop kqueue, apply the override to the
7205 if (kq
->kq_state
& KQ_WORKQ
) {
7206 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
7208 assert(qos_index
> THREAD_QOS_UNSPECIFIED
);
7209 kqworkq_update_override(kqwq
, qos_index
, override_index
);
7210 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7211 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7213 if ((kn
->kn_status
& KN_SUPPRESSED
) == KN_SUPPRESSED
) {
7214 flags
= flags
| KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
;
7216 if (override_index
== THREAD_QOS_USER_INTERACTIVE
7217 && override_is_sync
) {
7218 flags
= flags
| KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
;
7221 if (old_override_index
== THREAD_QOS_USER_INTERACTIVE
7222 && old_override_is_sync
) {
7223 flags
= flags
| KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
;
7227 assert(qos_index
> THREAD_QOS_UNSPECIFIED
);
7228 kqworkloop_update_override(kqwl
, qos_index
, override_index
, flags
);
7233 knote_get_sync_qos_override_index(struct knote
*kn
)
7235 return kn
->kn_qos_sync_override
;
7239 kqworkq_update_override(struct kqworkq
*kqwq
, kq_index_t qos_index
, kq_index_t override_index
)
7241 struct kqrequest
*kqr
;
7242 kq_index_t old_override_index
;
7244 if (override_index
<= qos_index
) {
7248 kqr
= kqworkq_get_request(kqwq
, qos_index
);
7250 kqwq_req_lock(kqwq
);
7251 old_override_index
= kqr
->kqr_override_index
;
7252 if (override_index
> MAX(kqr
->kqr_qos_index
, old_override_index
)) {
7253 kqr
->kqr_override_index
= override_index
;
7255 /* apply the override to [incoming?] servicing thread */
7256 if (kqr
->kqr_state
& KQR_BOUND
) {
7257 thread_t wqthread
= kqr
->kqr_thread
;
7259 /* only apply if non-manager */
7261 if ((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0) {
7262 if (old_override_index
)
7263 thread_update_ipc_override(wqthread
, override_index
);
7265 thread_add_ipc_override(wqthread
, override_index
);
7269 kqwq_req_unlock(kqwq
);
7272 /* called with the kqworkq lock held */
7274 kqworkq_bind_thread_impl(
7275 struct kqworkq
*kqwq
,
7276 kq_index_t qos_index
,
7280 /* request lock must be held */
7281 kqwq_req_held(kqwq
);
7283 struct kqrequest
*kqr
= kqworkq_get_request(kqwq
, qos_index
);
7284 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
7286 if (qos_index
== KQWQ_QOS_MANAGER
)
7287 flags
|= KEVENT_FLAG_WORKQ_MANAGER
;
7289 struct uthread
*ut
= get_bsdthread_info(thread
);
7292 * If this is a manager, and the manager request bit is
7293 * not set, assure no other thread is bound. If the bit
7294 * is set, make sure the old thread is us (or not set).
7296 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
) {
7297 if ((kqr
->kqr_state
& KQR_BOUND
) == 0) {
7298 kqr
->kqr_state
|= (KQR_BOUND
| KQWQ_THMANAGER
);
7299 TAILQ_INIT(&kqr
->kqr_suppressed
);
7300 kqr
->kqr_thread
= thread
;
7301 ut
->uu_kqueue_bound
= (struct kqueue
*)kqwq
;
7302 ut
->uu_kqueue_qos_index
= KQWQ_QOS_MANAGER
;
7303 ut
->uu_kqueue_flags
= (KEVENT_FLAG_WORKQ
|
7304 KEVENT_FLAG_WORKQ_MANAGER
);
7306 assert(kqr
->kqr_state
& KQR_BOUND
);
7307 assert(thread
== kqr
->kqr_thread
);
7308 assert(ut
->uu_kqueue_bound
== (struct kqueue
*)kqwq
);
7309 assert(ut
->uu_kqueue_qos_index
== KQWQ_QOS_MANAGER
);
7310 assert(ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
);
7315 /* Just a normal one-queue servicing thread */
7316 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
7317 assert(kqr
->kqr_qos_index
== qos_index
);
7319 if ((kqr
->kqr_state
& KQR_BOUND
) == 0) {
7320 kqr
->kqr_state
|= KQR_BOUND
;
7321 TAILQ_INIT(&kqr
->kqr_suppressed
);
7322 kqr
->kqr_thread
= thread
;
7324 /* apply an ipc QoS override if one is needed */
7325 if (kqr
->kqr_override_index
) {
7326 assert(kqr
->kqr_qos_index
);
7327 assert(kqr
->kqr_override_index
> kqr
->kqr_qos_index
);
7328 assert(thread_get_ipc_override(thread
) == THREAD_QOS_UNSPECIFIED
);
7329 thread_add_ipc_override(thread
, kqr
->kqr_override_index
);
7332 /* indicate that we are processing in the uthread */
7333 ut
->uu_kqueue_bound
= (struct kqueue
*)kqwq
;
7334 ut
->uu_kqueue_qos_index
= qos_index
;
7335 ut
->uu_kqueue_flags
= flags
;
7338 * probably syncronously bound AND post-request bound
7339 * this logic can go away when we get rid of post-request bind
7341 assert(kqr
->kqr_state
& KQR_BOUND
);
7342 assert(thread
== kqr
->kqr_thread
);
7343 assert(ut
->uu_kqueue_bound
== (struct kqueue
*)kqwq
);
7344 assert(ut
->uu_kqueue_qos_index
== qos_index
);
7345 assert((ut
->uu_kqueue_flags
& flags
) == flags
);
7350 kqworkloop_update_override(
7351 struct kqworkloop
*kqwl
,
7352 kq_index_t qos_index
,
7353 kq_index_t override_index
,
7356 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
7358 kqwl_req_lock(kqwl
);
7360 /* Do not override on attached threads */
7361 if (kqr
->kqr_state
& KQR_BOUND
) {
7362 assert(kqr
->kqr_thread
);
7364 if (kqwl
->kqwl_kqueue
.kq_state
& KQ_NO_WQ_THREAD
) {
7365 kqwl_req_unlock(kqwl
);
7366 assert(!is_workqueue_thread(kqr
->kqr_thread
));
7371 /* Update sync ipc counts on kqr for suppressed knotes */
7372 if (flags
& KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
) {
7373 kqworkloop_update_suppress_sync_count(kqr
, flags
);
7376 if ((flags
& KQWL_UO_UPDATE_OVERRIDE_LAZY
) == 0) {
7377 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
,
7378 MAX(qos_index
, override_index
));
7380 kqwl_req_unlock(kqwl
);
7384 kqworkloop_update_suppress_sync_count(
7385 struct kqrequest
*kqr
,
7388 if (flags
& KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
) {
7389 kqr
->kqr_sync_suppress_count
++;
7392 if (flags
& KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
) {
7393 assert(kqr
->kqr_sync_suppress_count
> 0);
7394 kqr
->kqr_sync_suppress_count
--;
7399 * kqworkloop_unbind_thread - Unbind the servicer thread of a workloop kqueue
7401 * It will end the processing phase in case it was still processing:
7403 * We may have to request a new thread for not KQ_NO_WQ_THREAD workloop.
7404 * This can happen if :
7405 * - there were active events at or above our QoS we never got to (count > 0)
7406 * - we pended waitq hook callouts during processing
7407 * - we pended wakeups while processing (or unsuppressing)
7409 * Called with kqueue lock held.
7413 kqworkloop_unbind_thread(
7414 struct kqworkloop
*kqwl
,
7416 __unused
unsigned int flags
)
7418 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
7419 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
7423 assert((kq
->kq_state
& KQ_PROCESSING
) == 0);
7424 if (kq
->kq_state
& KQ_PROCESSING
) {
7429 * Forcing the KQ_PROCESSING flag allows for QoS updates because of
7430 * unsuppressing knotes not to be applied until the eventual call to
7431 * kqworkloop_update_threads_qos() below.
7433 kq
->kq_state
|= KQ_PROCESSING
;
7434 kqworkloop_acknowledge_events(kqwl
, TRUE
);
7435 kq
->kq_state
&= ~KQ_PROCESSING
;
7437 kqwl_req_lock(kqwl
);
7439 /* deal with extraneous unbinds in release kernels */
7440 assert((kqr
->kqr_state
& (KQR_BOUND
| KQR_PROCESSING
)) == KQR_BOUND
);
7441 if ((kqr
->kqr_state
& (KQR_BOUND
| KQR_PROCESSING
)) != KQR_BOUND
) {
7442 kqwl_req_unlock(kqwl
);
7446 assert(thread
== current_thread());
7447 assert(kqr
->kqr_thread
== thread
);
7448 if (kqr
->kqr_thread
!= thread
) {
7449 kqwl_req_unlock(kqwl
);
7453 struct uthread
*ut
= get_bsdthread_info(thread
);
7454 kq_index_t old_qos_index
= ut
->uu_kqueue_qos_index
;
7455 boolean_t ipc_override_is_sync
= ut
->uu_kqueue_override_is_sync
;
7456 ut
->uu_kqueue_bound
= NULL
;
7457 ut
->uu_kqueue_qos_index
= 0;
7458 ut
->uu_kqueue_override_is_sync
= 0;
7459 ut
->uu_kqueue_flags
= 0;
7461 /* unbind the servicer thread, drop overrides */
7462 kqr
->kqr_thread
= NULL
;
7463 kqr
->kqr_state
&= ~(KQR_BOUND
| KQR_THREQUESTED
| KQR_R2K_NOTIF_ARMED
);
7464 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
, 0);
7466 kqwl_req_unlock(kqwl
);
7469 * Drop the override on the current thread last, after the call to
7470 * kqworkloop_update_threads_qos above.
7472 if (old_qos_index
) {
7473 thread_drop_ipc_override(thread
);
7475 if (ipc_override_is_sync
) {
7476 thread_drop_sync_ipc_override(thread
);
7480 /* called with the kqworkq lock held */
7482 kqworkq_unbind_thread(
7483 struct kqworkq
*kqwq
,
7484 kq_index_t qos_index
,
7486 __unused
unsigned int flags
)
7488 struct kqrequest
*kqr
= kqworkq_get_request(kqwq
, qos_index
);
7489 kq_index_t override_index
= 0;
7491 /* request lock must be held */
7492 kqwq_req_held(kqwq
);
7494 assert(thread
== current_thread());
7496 if ((kqr
->kqr_state
& KQR_BOUND
) == 0) {
7497 assert(kqr
->kqr_state
& KQR_BOUND
);
7501 assert(kqr
->kqr_thread
== thread
);
7502 assert(TAILQ_EMPTY(&kqr
->kqr_suppressed
));
7505 * If there is an override, drop it from the current thread
7506 * and then we are free to recompute (a potentially lower)
7507 * minimum override to apply to the next thread request.
7509 if (kqr
->kqr_override_index
) {
7510 struct kqtailq
*base_queue
= kqueue_get_base_queue(&kqwq
->kqwq_kqueue
, qos_index
);
7511 struct kqtailq
*queue
= kqueue_get_high_queue(&kqwq
->kqwq_kqueue
, qos_index
);
7513 /* if not bound to a manager thread, drop the current ipc override */
7514 if ((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0) {
7515 thread_drop_ipc_override(thread
);
7518 /* recompute the new override */
7520 if (!TAILQ_EMPTY(queue
)) {
7521 override_index
= queue
- base_queue
+ qos_index
;
7524 } while (queue
-- > base_queue
);
7527 /* Mark it unbound */
7528 kqr
->kqr_thread
= NULL
;
7529 kqr
->kqr_state
&= ~(KQR_BOUND
| KQR_THREQUESTED
| KQWQ_THMANAGER
);
7531 /* apply the new override */
7532 if (override_index
> kqr
->kqr_qos_index
) {
7533 kqr
->kqr_override_index
= override_index
;
7535 kqr
->kqr_override_index
= THREAD_QOS_UNSPECIFIED
;
7540 kqworkq_get_request(struct kqworkq
*kqwq
, kq_index_t qos_index
)
7542 assert(qos_index
< KQWQ_NQOS
);
7543 return &kqwq
->kqwq_request
[qos_index
];
7547 knote_adjust_qos(struct knote
*kn
, qos_t new_qos
, qos_t new_override
, kq_index_t sync_override_index
)
7549 struct kqueue
*kq
= knote_get_kq(kn
);
7550 boolean_t override_is_sync
= FALSE
;
7552 if (kq
->kq_state
& (KQ_WORKQ
| KQ_WORKLOOP
)) {
7553 kq_index_t new_qos_index
;
7554 kq_index_t new_override_index
;
7555 kq_index_t servicer_qos_index
;
7557 new_qos_index
= qos_index_from_qos(kn
, new_qos
, FALSE
);
7558 new_override_index
= qos_index_from_qos(kn
, new_override
, TRUE
);
7560 /* make sure the servicer qos acts as a floor */
7561 servicer_qos_index
= qos_index_from_qos(kn
, kn
->kn_qos
, FALSE
);
7562 if (servicer_qos_index
> new_qos_index
)
7563 new_qos_index
= servicer_qos_index
;
7564 if (servicer_qos_index
> new_override_index
)
7565 new_override_index
= servicer_qos_index
;
7566 if (sync_override_index
>= new_override_index
) {
7567 new_override_index
= sync_override_index
;
7568 override_is_sync
= TRUE
;
7572 if (new_qos_index
!= knote_get_req_index(kn
) ||
7573 new_override_index
!= knote_get_qos_override_index(kn
) ||
7574 override_is_sync
!= kn
->kn_qos_override_is_sync
) {
7575 if (kn
->kn_status
& KN_QUEUED
) {
7577 knote_set_qos_index(kn
, new_qos_index
);
7578 knote_set_qos_override_index(kn
, new_override_index
, override_is_sync
);
7582 knote_set_qos_index(kn
, new_qos_index
);
7583 knote_set_qos_override_index(kn
, new_override_index
, override_is_sync
);
7591 knote_adjust_sync_qos(struct knote
*kn
, kq_index_t sync_qos
, boolean_t lock_kq
)
7593 struct kqueue
*kq
= knote_get_kq(kn
);
7594 kq_index_t old_sync_override
;
7595 kq_index_t qos_index
= knote_get_qos_index(kn
);
7598 /* Tracking only happens for UI qos */
7599 if (sync_qos
!= THREAD_QOS_USER_INTERACTIVE
&&
7600 sync_qos
!= THREAD_QOS_UNSPECIFIED
) {
7607 if (kq
->kq_state
& KQ_WORKLOOP
) {
7608 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7610 old_sync_override
= knote_get_sync_qos_override_index(kn
);
7611 if (old_sync_override
!= sync_qos
) {
7612 kn
->kn_qos_sync_override
= sync_qos
;
7614 /* update sync ipc counters for suppressed knotes */
7615 if ((kn
->kn_status
& KN_SUPPRESSED
) == KN_SUPPRESSED
) {
7616 flags
= flags
| KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
;
7618 /* Do not recalculate kqwl override, it would be done later */
7619 flags
= flags
| KQWL_UO_UPDATE_OVERRIDE_LAZY
;
7621 if (sync_qos
== THREAD_QOS_USER_INTERACTIVE
) {
7622 flags
= flags
| KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
;
7625 if (old_sync_override
== THREAD_QOS_USER_INTERACTIVE
) {
7626 flags
= flags
| KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
;
7629 kqworkloop_update_override(kqwl
, qos_index
, sync_qos
,
7640 knote_wakeup(struct knote
*kn
)
7642 struct kqueue
*kq
= knote_get_kq(kn
);
7643 kq_index_t qos_index
= knote_get_qos_index(kn
);
7647 if (kq
->kq_state
& KQ_WORKQ
) {
7648 /* request a servicing thread */
7649 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
7651 kqworkq_request_help(kqwq
, qos_index
);
7653 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7654 /* request a servicing thread */
7655 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7657 if (kqworkloop_is_processing_on_current_thread(kqwl
)) {
7659 * kqworkloop_end_processing() will perform the required QoS
7660 * computations when it unsets the processing mode.
7664 kqworkloop_request_help(kqwl
, qos_index
);
7666 struct kqfile
*kqf
= (struct kqfile
*)kq
;
7668 /* flag wakeups during processing */
7669 if (kq
->kq_state
& KQ_PROCESSING
)
7670 kq
->kq_state
|= KQ_WAKEUP
;
7672 /* wakeup a thread waiting on this queue */
7673 if (kq
->kq_state
& (KQ_SLEEP
| KQ_SEL
)) {
7674 kq
->kq_state
&= ~(KQ_SLEEP
| KQ_SEL
);
7675 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
7678 WAITQ_ALL_PRIORITIES
);
7681 /* wakeup other kqueues/select sets we're inside */
7682 KNOTE(&kqf
->kqf_sel
.si_note
, 0);
7687 * Called with the kqueue locked
7690 kqueue_interrupt(struct kqueue
*kq
)
7692 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
7694 /* wakeup sleeping threads */
7695 if ((kq
->kq_state
& (KQ_SLEEP
| KQ_SEL
)) != 0) {
7696 kq
->kq_state
&= ~(KQ_SLEEP
| KQ_SEL
);
7697 (void)waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
7700 WAITQ_ALL_PRIORITIES
);
7703 /* wakeup threads waiting their turn to process */
7704 if (kq
->kq_state
& KQ_PROCWAIT
) {
7705 struct kqtailq
*suppressq
;
7707 assert(kq
->kq_state
& KQ_PROCESSING
);
7709 kq
->kq_state
&= ~KQ_PROCWAIT
;
7710 suppressq
= kqueue_get_suppressed_queue(kq
, QOS_INDEX_KQFILE
);
7711 (void)waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
7712 CAST_EVENT64_T(suppressq
),
7714 WAITQ_ALL_PRIORITIES
);
7719 * Called back from waitq code when no threads waiting and the hook was set.
7721 * Interrupts are likely disabled and spin locks are held - minimal work
7722 * can be done in this context!!!
7724 * JMM - in the future, this will try to determine which knotes match the
7725 * wait queue wakeup and apply these wakeups against those knotes themselves.
7726 * For now, all the events dispatched this way are dispatch-manager handled,
7727 * so hard-code that for now.
7730 waitq_set__CALLING_PREPOST_HOOK__(void *kq_hook
, void *knote_hook
, int qos
)
7732 #pragma unused(knote_hook, qos)
7734 struct kqueue
*kq
= (struct kqueue
*)kq_hook
;
7736 if (kq
->kq_state
& KQ_WORKQ
) {
7737 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
7739 kqworkq_request_help(kqwq
, KQWQ_QOS_MANAGER
);
7741 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7742 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7744 kqworkloop_request_help(kqwl
, KQWL_BUCKET_STAYACTIVE
);
7749 klist_init(struct klist
*list
)
7756 * Query/Post each knote in the object's list
7758 * The object lock protects the list. It is assumed
7759 * that the filter/event routine for the object can
7760 * determine that the object is already locked (via
7761 * the hint) and not deadlock itself.
7763 * The object lock should also hold off pending
7764 * detach/drop operations. But we'll prevent it here
7765 * too (by taking a use reference) - just in case.
7768 knote(struct klist
*list
, long hint
)
7772 SLIST_FOREACH(kn
, list
, kn_selnext
) {
7773 struct kqueue
*kq
= knote_get_kq(kn
);
7777 assert(!knoteuse_needs_boost(kn
, NULL
));
7779 /* If we can get a use reference - deliver event */
7780 if (kqlock2knoteuse(kq
, kn
, KNUSE_NONE
)) {
7783 /* call the event with only a use count */
7784 result
= knote_fops(kn
)->f_event(kn
, hint
);
7786 /* if its not going away and triggered */
7787 if (knoteuse2kqlock(kq
, kn
, KNUSE_NONE
) && result
)
7796 * attach a knote to the specified list. Return true if this is the first entry.
7797 * The list is protected by whatever lock the object it is associated with uses.
7800 knote_attach(struct klist
*list
, struct knote
*kn
)
7802 int ret
= SLIST_EMPTY(list
);
7803 SLIST_INSERT_HEAD(list
, kn
, kn_selnext
);
7808 * detach a knote from the specified list. Return true if that was the last entry.
7809 * The list is protected by whatever lock the object it is associated with uses.
7812 knote_detach(struct klist
*list
, struct knote
*kn
)
7814 SLIST_REMOVE(list
, kn
, knote
, kn_selnext
);
7815 return (SLIST_EMPTY(list
));
7819 * knote_vanish - Indicate that the source has vanished
7821 * If the knote has requested EV_VANISHED delivery,
7822 * arrange for that. Otherwise, deliver a NOTE_REVOKE
7823 * event for backward compatibility.
7825 * The knote is marked as having vanished, but is not
7826 * actually detached from the source in this instance.
7827 * The actual detach is deferred until the knote drop.
7829 * Our caller already has the object lock held. Calling
7830 * the detach routine would try to take that lock
7831 * recursively - which likely is not supported.
7834 knote_vanish(struct klist
*list
)
7837 struct knote
*kn_next
;
7839 SLIST_FOREACH_SAFE(kn
, list
, kn_selnext
, kn_next
) {
7840 struct kqueue
*kq
= knote_get_kq(kn
);
7845 assert(!knoteuse_needs_boost(kn
, NULL
));
7847 if ((kn
->kn_status
& KN_DROPPING
) == 0) {
7848 /* If EV_VANISH supported - prepare to deliver one */
7849 if (kn
->kn_status
& KN_REQVANISH
) {
7850 kn
->kn_status
|= KN_VANISHED
;
7853 } else if (kqlock2knoteuse(kq
, kn
, KNUSE_NONE
)) {
7854 /* call the event with only a use count */
7855 result
= knote_fops(kn
)->f_event(kn
, NOTE_REVOKE
);
7857 /* if its not going away and triggered */
7858 if (knoteuse2kqlock(kq
, kn
, KNUSE_NONE
) && result
)
7860 /* lock held again */
7868 * For a given knote, link a provided wait queue directly with the kqueue.
7869 * Wakeups will happen via recursive wait queue support. But nothing will move
7870 * the knote to the active list at wakeup (nothing calls knote()). Instead,
7871 * we permanently enqueue them here.
7873 * kqueue and knote references are held by caller.
7874 * waitq locked by caller.
7876 * caller provides the wait queue link structure.
7879 knote_link_waitq(struct knote
*kn
, struct waitq
*wq
, uint64_t *reserved_link
)
7881 struct kqueue
*kq
= knote_get_kq(kn
);
7884 kr
= waitq_link(wq
, &kq
->kq_wqs
, WAITQ_ALREADY_LOCKED
, reserved_link
);
7885 if (kr
== KERN_SUCCESS
) {
7886 knote_markstayactive(kn
);
7894 * Unlink the provided wait queue from the kqueue associated with a knote.
7895 * Also remove it from the magic list of directly attached knotes.
7897 * Note that the unlink may have already happened from the other side, so
7898 * ignore any failures to unlink and just remove it from the kqueue list.
7900 * On success, caller is responsible for the link structure
7903 knote_unlink_waitq(struct knote
*kn
, struct waitq
*wq
)
7905 struct kqueue
*kq
= knote_get_kq(kn
);
7908 kr
= waitq_unlink(wq
, &kq
->kq_wqs
);
7909 knote_clearstayactive(kn
);
7910 return ((kr
!= KERN_SUCCESS
) ? EINVAL
: 0);
7914 * remove all knotes referencing a specified fd
7916 * Essentially an inlined knote_remove & knote_drop
7917 * when we know for sure that the thing is a file
7919 * Entered with the proc_fd lock already held.
7920 * It returns the same way, but may drop it temporarily.
7923 knote_fdclose(struct proc
*p
, int fd
, int force
)
7929 list
= &p
->p_fd
->fd_knlist
[fd
];
7930 SLIST_FOREACH(kn
, list
, kn_link
) {
7931 struct kqueue
*kq
= knote_get_kq(kn
);
7936 panic("%s: proc mismatch (kq->kq_p=%p != p=%p)",
7937 __func__
, kq
->kq_p
, p
);
7940 * If the knote supports EV_VANISHED delivery,
7941 * transition it to vanished mode (or skip over
7942 * it if already vanished).
7944 if (!force
&& (kn
->kn_status
& KN_REQVANISH
)) {
7946 if ((kn
->kn_status
& KN_VANISHED
) == 0) {
7949 assert(!knoteuse_needs_boost(kn
, NULL
));
7951 /* get detach reference (also marks vanished) */
7952 if (kqlock2knotedetach(kq
, kn
, KNUSE_NONE
)) {
7953 /* detach knote and drop fp use reference */
7954 knote_fops(kn
)->f_detach(kn
);
7955 if (knote_fops(kn
)->f_isfd
)
7956 fp_drop(p
, kn
->kn_id
, kn
->kn_fp
, 0);
7958 /* activate it if it's still in existence */
7959 if (knoteuse2kqlock(kq
, kn
, KNUSE_NONE
)) {
7975 * Convert the kq lock to a drop ref.
7976 * If we get it, go ahead and drop it.
7977 * Otherwise, we waited for the blocking
7978 * condition to complete. Either way,
7979 * we dropped the fdlock so start over.
7981 if (kqlock2knotedrop(kq
, kn
)) {
7991 * knote_fdfind - lookup a knote in the fd table for process
7993 * If the filter is file-based, lookup based on fd index.
7994 * Otherwise use a hash based on the ident.
7996 * Matching is based on kq, filter, and ident. Optionally,
7997 * it may also be based on the udata field in the kevent -
7998 * allowing multiple event registration for the file object
8001 * fd_knhashlock or fdlock held on entry (and exit)
8003 static struct knote
*
8004 knote_fdfind(struct kqueue
*kq
,
8005 struct kevent_internal_s
*kev
,
8009 struct filedesc
*fdp
= p
->p_fd
;
8010 struct klist
*list
= NULL
;
8011 struct knote
*kn
= NULL
;
8014 * determine where to look for the knote
8017 /* fd-based knotes are linked off the fd table */
8018 if (kev
->ident
< (u_int
)fdp
->fd_knlistsize
) {
8019 list
= &fdp
->fd_knlist
[kev
->ident
];
8021 } else if (fdp
->fd_knhashmask
!= 0) {
8022 /* hash non-fd knotes here too */
8023 list
= &fdp
->fd_knhash
[KN_HASH((u_long
)kev
->ident
, fdp
->fd_knhashmask
)];
8027 * scan the selected list looking for a match
8030 SLIST_FOREACH(kn
, list
, kn_link
) {
8031 if (kq
== knote_get_kq(kn
) &&
8032 kev
->ident
== kn
->kn_id
&&
8033 kev
->filter
== kn
->kn_filter
) {
8034 if (kev
->flags
& EV_UDATA_SPECIFIC
) {
8035 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) &&
8036 kev
->udata
== kn
->kn_udata
) {
8037 break; /* matching udata-specific knote */
8039 } else if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0) {
8040 break; /* matching non-udata-specific knote */
8049 * kq_add_knote- Add knote to the fd table for process
8050 * while checking for duplicates.
8052 * All file-based filters associate a list of knotes by file
8053 * descriptor index. All other filters hash the knote by ident.
8055 * May have to grow the table of knote lists to cover the
8056 * file descriptor index presented.
8058 * fd_knhashlock and fdlock unheld on entry (and exit).
8060 * Takes a rwlock boost if inserting the knote is successful.
8063 kq_add_knote(struct kqueue
*kq
, struct knote
*kn
,
8064 struct kevent_internal_s
*kev
,
8065 struct proc
*p
, int *knoteuse_flags
)
8067 struct filedesc
*fdp
= p
->p_fd
;
8068 struct klist
*list
= NULL
;
8070 bool is_fd
= knote_fops(kn
)->f_isfd
;
8077 if (knote_fdfind(kq
, kev
, is_fd
, p
) != NULL
) {
8078 /* found an existing knote: we can't add this one */
8083 /* knote was not found: add it now */
8085 if (fdp
->fd_knhashmask
== 0) {
8088 list
= hashinit(CONFIG_KN_HASHSIZE
, M_KQUEUE
,
8095 fdp
->fd_knhash
= list
;
8096 fdp
->fd_knhashmask
= size
;
8099 list
= &fdp
->fd_knhash
[KN_HASH(kn
->kn_id
, fdp
->fd_knhashmask
)];
8100 SLIST_INSERT_HEAD(list
, kn
, kn_link
);
8105 /* knote is fd based */
8107 if ((u_int
)fdp
->fd_knlistsize
<= kn
->kn_id
) {
8110 if (kn
->kn_id
>= (uint64_t)p
->p_rlimit
[RLIMIT_NOFILE
].rlim_cur
8111 || kn
->kn_id
>= (uint64_t)maxfiles
) {
8115 /* have to grow the fd_knlist */
8116 size
= fdp
->fd_knlistsize
;
8117 while (size
<= kn
->kn_id
)
8120 if (size
>= (UINT_MAX
/sizeof(struct klist
*))) {
8125 MALLOC(list
, struct klist
*,
8126 size
* sizeof(struct klist
*), M_KQUEUE
, M_WAITOK
);
8132 bcopy((caddr_t
)fdp
->fd_knlist
, (caddr_t
)list
,
8133 fdp
->fd_knlistsize
* sizeof(struct klist
*));
8134 bzero((caddr_t
)list
+
8135 fdp
->fd_knlistsize
* sizeof(struct klist
*),
8136 (size
- fdp
->fd_knlistsize
) * sizeof(struct klist
*));
8137 FREE(fdp
->fd_knlist
, M_KQUEUE
);
8138 fdp
->fd_knlist
= list
;
8139 fdp
->fd_knlistsize
= size
;
8142 list
= &fdp
->fd_knlist
[kn
->kn_id
];
8143 SLIST_INSERT_HEAD(list
, kn
, kn_link
);
8150 if (ret
== 0 && knoteuse_needs_boost(kn
, kev
)) {
8151 set_thread_rwlock_boost();
8152 *knoteuse_flags
= KNUSE_BOOST
;
8154 *knoteuse_flags
= KNUSE_NONE
;
8165 * kq_remove_knote - remove a knote from the fd table for process
8166 * and copy kn_status an kq_state while holding kqlock and
8169 * If the filter is file-based, remove based on fd index.
8170 * Otherwise remove from the hash based on the ident.
8172 * fd_knhashlock and fdlock unheld on entry (and exit).
8175 kq_remove_knote(struct kqueue
*kq
, struct knote
*kn
, struct proc
*p
,
8176 kn_status_t
*kn_status
, uint16_t *kq_state
)
8178 struct filedesc
*fdp
= p
->p_fd
;
8179 struct klist
*list
= NULL
;
8182 is_fd
= knote_fops(kn
)->f_isfd
;
8190 assert ((u_int
)fdp
->fd_knlistsize
> kn
->kn_id
);
8191 list
= &fdp
->fd_knlist
[kn
->kn_id
];
8193 list
= &fdp
->fd_knhash
[KN_HASH(kn
->kn_id
, fdp
->fd_knhashmask
)];
8195 SLIST_REMOVE(list
, kn
, knote
, kn_link
);
8198 *kn_status
= kn
->kn_status
;
8199 *kq_state
= kq
->kq_state
;
8209 * kq_find_knote_and_kq_lock - lookup a knote in the fd table for process
8210 * and, if the knote is found, acquires the kqlock while holding the fd table lock/spinlock.
8212 * fd_knhashlock or fdlock unheld on entry (and exit)
8215 static struct knote
*
8216 kq_find_knote_and_kq_lock(struct kqueue
*kq
,
8217 struct kevent_internal_s
*kev
,
8228 ret
= knote_fdfind(kq
, kev
, is_fd
, p
);
8242 * knote_drop - disconnect and drop the knote
8244 * Called with the kqueue unlocked and holding a
8245 * "drop reference" on the knote in question.
8246 * This reference is most often aquired thru a call
8247 * to kqlock2knotedrop(). But it can also be acquired
8248 * through stealing a drop reference via a call to
8249 * knoteuse2knotedrop() or during the initial attach
8252 * The knote may have already been detached from
8253 * (or not yet attached to) its source object.
8256 knote_drop(struct knote
*kn
, __unused
struct proc
*ctxp
)
8258 struct kqueue
*kq
= knote_get_kq(kn
);
8259 struct proc
*p
= kq
->kq_p
;
8260 kn_status_t kn_status
;
8263 /* If we are attached, disconnect from the source first */
8264 if (kn
->kn_status
& KN_ATTACHED
) {
8265 knote_fops(kn
)->f_detach(kn
);
8268 /* Remove the source from the appropriate hash */
8269 kq_remove_knote(kq
, kn
, p
, &kn_status
, &kq_state
);
8272 * If a kqueue_dealloc is happening in parallel for the kq
8273 * pointed by the knote the kq could be aready deallocated
8275 * Do not access the kq after the kq_remove_knote if it is
8279 /* determine if anyone needs to know about the drop */
8280 assert((kn_status
& (KN_DROPPING
| KN_SUPPRESSED
| KN_QUEUED
)) == KN_DROPPING
);
8283 * If KN_USEWAIT is set, some other thread was trying to drop the kn.
8284 * Or it was in kqueue_dealloc, so the kqueue_dealloc did not happen
8285 * because that thread was waiting on this wake, or it was a drop happening
8286 * because of a kevent_register that takes a reference on the kq, and therefore
8287 * the kq cannot be deallocated in parallel.
8289 * It is safe to access kq->kq_wqs if needswakeup is set.
8291 if (kn_status
& KN_USEWAIT
)
8292 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
8293 CAST_EVENT64_T(&kn
->kn_status
),
8295 WAITQ_ALL_PRIORITIES
);
8297 if (knote_fops(kn
)->f_isfd
&& ((kn
->kn_status
& KN_VANISHED
) == 0))
8298 fp_drop(p
, kn
->kn_id
, kn
->kn_fp
, 0);
8303 * release reference on dynamic kq (and free if last).
8304 * Will only be last if this is from fdfree, etc...
8305 * because otherwise processing thread has reference.
8307 if (kq_state
& KQ_DYNAMIC
)
8308 kqueue_release_last(p
, kq
);
8311 /* called with kqueue lock held */
8313 knote_activate(struct knote
*kn
)
8315 if (kn
->kn_status
& KN_ACTIVE
)
8318 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KNOTE_ACTIVATE
),
8319 kn
->kn_udata
, kn
->kn_status
| (kn
->kn_id
<< 32),
8322 kn
->kn_status
|= KN_ACTIVE
;
8323 if (knote_enqueue(kn
))
8327 /* called with kqueue lock held */
8329 knote_deactivate(struct knote
*kn
)
8331 kn
->kn_status
&= ~KN_ACTIVE
;
8332 if ((kn
->kn_status
& KN_STAYACTIVE
) == 0)
8336 /* called with kqueue lock held */
8338 knote_enable(struct knote
*kn
)
8340 if ((kn
->kn_status
& KN_DISABLED
) == 0)
8343 kn
->kn_status
&= ~KN_DISABLED
;
8345 if (kn
->kn_status
& KN_SUPPRESSED
) {
8346 /* Clear the sync qos on the knote */
8347 knote_adjust_sync_qos(kn
, THREAD_QOS_UNSPECIFIED
, FALSE
);
8350 * it is possible for userland to have knotes registered for a given
8351 * workloop `wl_orig` but really handled on another workloop `wl_new`.
8353 * In that case, rearming will happen from the servicer thread of
8354 * `wl_new` which if `wl_orig` is no longer being serviced, would cause
8355 * this knote to stay suppressed forever if we only relied on
8356 * kqworkloop_acknowledge_events to be called by `wl_orig`.
8358 * However if we see the KQ_PROCESSING bit on `wl_orig` set, we can't
8359 * unsuppress because that would mess with the processing phase of
8360 * `wl_orig`, however it also means kqworkloop_acknowledge_events()
8363 struct kqueue
*kq
= knote_get_kq(kn
);
8364 if ((kq
->kq_state
& KQ_PROCESSING
) == 0) {
8365 knote_unsuppress(kn
);
8367 } else if (knote_enqueue(kn
)) {
8372 /* called with kqueue lock held */
8374 knote_disable(struct knote
*kn
)
8376 if (kn
->kn_status
& KN_DISABLED
)
8379 kn
->kn_status
|= KN_DISABLED
;
8383 /* called with kqueue lock held */
8385 knote_suppress(struct knote
*kn
)
8387 struct kqtailq
*suppressq
;
8388 struct kqueue
*kq
= knote_get_kq(kn
);
8392 if (kn
->kn_status
& KN_SUPPRESSED
)
8396 kn
->kn_status
|= KN_SUPPRESSED
;
8397 suppressq
= kqueue_get_suppressed_queue(kq
, knote_get_qos_index(kn
));
8398 TAILQ_INSERT_TAIL(suppressq
, kn
, kn_tqe
);
8400 if ((kq
->kq_state
& KQ_WORKLOOP
) &&
8401 knote_get_qos_override_index(kn
) == THREAD_QOS_USER_INTERACTIVE
&&
8402 kn
->kn_qos_override_is_sync
) {
8403 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
8404 /* update the sync qos override counter for suppressed knotes */
8405 kqworkloop_update_override(kqwl
, knote_get_qos_index(kn
),
8406 knote_get_qos_override_index(kn
),
8407 (KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
| KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
));
8411 /* called with kqueue lock held */
8413 knote_unsuppress(struct knote
*kn
)
8415 struct kqtailq
*suppressq
;
8416 struct kqueue
*kq
= knote_get_kq(kn
);
8420 if ((kn
->kn_status
& KN_SUPPRESSED
) == 0)
8423 /* Clear the sync qos on the knote */
8424 knote_adjust_sync_qos(kn
, THREAD_QOS_UNSPECIFIED
, FALSE
);
8426 kn
->kn_status
&= ~KN_SUPPRESSED
;
8427 suppressq
= kqueue_get_suppressed_queue(kq
, knote_get_qos_index(kn
));
8428 TAILQ_REMOVE(suppressq
, kn
, kn_tqe
);
8430 /* udate in-use qos to equal requested qos */
8431 kn
->kn_qos_index
= kn
->kn_req_index
;
8433 /* don't wakeup if unsuppressing just a stay-active knote */
8434 if (knote_enqueue(kn
) && (kn
->kn_status
& KN_ACTIVE
)) {
8438 if ((kq
->kq_state
& KQ_WORKLOOP
) && !(kq
->kq_state
& KQ_NO_WQ_THREAD
) &&
8439 knote_get_qos_override_index(kn
) == THREAD_QOS_USER_INTERACTIVE
&&
8440 kn
->kn_qos_override_is_sync
) {
8441 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
8443 /* update the sync qos override counter for suppressed knotes */
8444 kqworkloop_update_override(kqwl
, knote_get_qos_index(kn
),
8445 knote_get_qos_override_index(kn
),
8446 (KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
| KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
));
8449 if (TAILQ_EMPTY(suppressq
) && (kq
->kq_state
& KQ_WORKLOOP
) &&
8450 !(kq
->kq_state
& KQ_NO_WQ_THREAD
)) {
8451 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
8452 if (kqworkloop_is_processing_on_current_thread(kqwl
)) {
8454 * kqworkloop_end_processing() will perform the required QoS
8455 * computations when it unsets the processing mode.
8458 kqwl_req_lock(kqwl
);
8459 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_RESET_WAKEUP_OVERRIDE
, 0);
8460 kqwl_req_unlock(kqwl
);
8465 /* called with kqueue lock held */
8467 knote_update_sync_override_state(struct knote
*kn
)
8469 struct kqtailq
*queue
= knote_get_queue(kn
);
8470 struct kqueue
*kq
= knote_get_kq(kn
);
8472 if (!(kq
->kq_state
& KQ_WORKLOOP
) ||
8473 knote_get_queue_index(kn
) != THREAD_QOS_USER_INTERACTIVE
)
8476 /* Update the sync ipc state on workloop */
8477 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
8478 boolean_t sync_ipc_override
= FALSE
;
8479 if (!TAILQ_EMPTY(queue
)) {
8480 struct knote
*kn_head
= TAILQ_FIRST(queue
);
8481 if (kn_head
->kn_qos_override_is_sync
)
8482 sync_ipc_override
= TRUE
;
8484 kqworkloop_update_sync_override_state(kqwl
, sync_ipc_override
);
8487 /* called with kqueue lock held */
8489 knote_enqueue(struct knote
*kn
)
8491 if ((kn
->kn_status
& (KN_ACTIVE
| KN_STAYACTIVE
)) == 0 ||
8492 (kn
->kn_status
& (KN_DISABLED
| KN_SUPPRESSED
| KN_DROPPING
)))
8495 if ((kn
->kn_status
& KN_QUEUED
) == 0) {
8496 struct kqtailq
*queue
= knote_get_queue(kn
);
8497 struct kqueue
*kq
= knote_get_kq(kn
);
8500 /* insert at head for sync ipc waiters */
8501 if (kn
->kn_qos_override_is_sync
) {
8502 TAILQ_INSERT_HEAD(queue
, kn
, kn_tqe
);
8504 TAILQ_INSERT_TAIL(queue
, kn
, kn_tqe
);
8506 kn
->kn_status
|= KN_QUEUED
;
8508 knote_update_sync_override_state(kn
);
8511 return ((kn
->kn_status
& KN_STAYACTIVE
) != 0);
8515 /* called with kqueue lock held */
8517 knote_dequeue(struct knote
*kn
)
8519 struct kqueue
*kq
= knote_get_kq(kn
);
8520 struct kqtailq
*queue
;
8524 if ((kn
->kn_status
& KN_QUEUED
) == 0)
8527 queue
= knote_get_queue(kn
);
8528 TAILQ_REMOVE(queue
, kn
, kn_tqe
);
8529 kn
->kn_status
&= ~KN_QUEUED
;
8531 knote_update_sync_override_state(kn
);
8537 knote_zone
= zinit(sizeof(struct knote
), 8192*sizeof(struct knote
),
8538 8192, "knote zone");
8540 kqfile_zone
= zinit(sizeof(struct kqfile
), 8192*sizeof(struct kqfile
),
8541 8192, "kqueue file zone");
8543 kqworkq_zone
= zinit(sizeof(struct kqworkq
), 8192*sizeof(struct kqworkq
),
8544 8192, "kqueue workq zone");
8546 kqworkloop_zone
= zinit(sizeof(struct kqworkloop
), 8192*sizeof(struct kqworkloop
),
8547 8192, "kqueue workloop zone");
8549 /* allocate kq lock group attribute and group */
8550 kq_lck_grp_attr
= lck_grp_attr_alloc_init();
8552 kq_lck_grp
= lck_grp_alloc_init("kqueue", kq_lck_grp_attr
);
8554 /* Allocate kq lock attribute */
8555 kq_lck_attr
= lck_attr_alloc_init();
8557 /* Initialize the timer filter lock */
8558 lck_mtx_init(&_filt_timerlock
, kq_lck_grp
, kq_lck_attr
);
8560 /* Initialize the user filter lock */
8561 lck_spin_init(&_filt_userlock
, kq_lck_grp
, kq_lck_attr
);
8563 #if CONFIG_MEMORYSTATUS
8564 /* Initialize the memorystatus list lock */
8565 memorystatus_kevent_init(kq_lck_grp
, kq_lck_attr
);
8568 SYSINIT(knote
, SI_SUB_PSEUDO
, SI_ORDER_ANY
, knote_init
, NULL
)
8570 const struct filterops
*
8571 knote_fops(struct knote
*kn
)
8573 return sysfilt_ops
[kn
->kn_filtid
];
8576 static struct knote
*
8580 kn
= ((struct knote
*)zalloc(knote_zone
));
8581 *kn
= (struct knote
) { .kn_qos_override
= 0, .kn_qos_sync_override
= 0, .kn_qos_override_is_sync
= 0 };
8586 knote_free(struct knote
*kn
)
8588 zfree(knote_zone
, kn
);
8592 #include <sys/param.h>
8593 #include <sys/socket.h>
8594 #include <sys/protosw.h>
8595 #include <sys/domain.h>
8596 #include <sys/mbuf.h>
8597 #include <sys/kern_event.h>
8598 #include <sys/malloc.h>
8599 #include <sys/sys_domain.h>
8600 #include <sys/syslog.h>
8603 #define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t))
8607 #define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n))
8610 static lck_grp_attr_t
*kev_lck_grp_attr
;
8611 static lck_attr_t
*kev_lck_attr
;
8612 static lck_grp_t
*kev_lck_grp
;
8613 static decl_lck_rw_data(,kev_lck_data
);
8614 static lck_rw_t
*kev_rwlock
= &kev_lck_data
;
8616 static int kev_attach(struct socket
*so
, int proto
, struct proc
*p
);
8617 static int kev_detach(struct socket
*so
);
8618 static int kev_control(struct socket
*so
, u_long cmd
, caddr_t data
,
8619 struct ifnet
*ifp
, struct proc
*p
);
8620 static lck_mtx_t
* event_getlock(struct socket
*, int);
8621 static int event_lock(struct socket
*, int, void *);
8622 static int event_unlock(struct socket
*, int, void *);
8624 static int event_sofreelastref(struct socket
*);
8625 static void kev_delete(struct kern_event_pcb
*);
8627 static struct pr_usrreqs event_usrreqs
= {
8628 .pru_attach
= kev_attach
,
8629 .pru_control
= kev_control
,
8630 .pru_detach
= kev_detach
,
8631 .pru_soreceive
= soreceive
,
8634 static struct protosw eventsw
[] = {
8636 .pr_type
= SOCK_RAW
,
8637 .pr_protocol
= SYSPROTO_EVENT
,
8638 .pr_flags
= PR_ATOMIC
,
8639 .pr_usrreqs
= &event_usrreqs
,
8640 .pr_lock
= event_lock
,
8641 .pr_unlock
= event_unlock
,
8642 .pr_getlock
= event_getlock
,
8646 __private_extern__
int kevt_getstat SYSCTL_HANDLER_ARGS
;
8647 __private_extern__
int kevt_pcblist SYSCTL_HANDLER_ARGS
;
8649 SYSCTL_NODE(_net_systm
, OID_AUTO
, kevt
,
8650 CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "Kernel event family");
8652 struct kevtstat kevtstat
;
8653 SYSCTL_PROC(_net_systm_kevt
, OID_AUTO
, stats
,
8654 CTLTYPE_STRUCT
| CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 0,
8655 kevt_getstat
, "S,kevtstat", "");
8657 SYSCTL_PROC(_net_systm_kevt
, OID_AUTO
, pcblist
,
8658 CTLTYPE_STRUCT
| CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 0,
8659 kevt_pcblist
, "S,xkevtpcb", "");
8662 event_getlock(struct socket
*so
, int flags
)
8664 #pragma unused(flags)
8665 struct kern_event_pcb
*ev_pcb
= (struct kern_event_pcb
*)so
->so_pcb
;
8667 if (so
->so_pcb
!= NULL
) {
8668 if (so
->so_usecount
< 0)
8669 panic("%s: so=%p usecount=%d lrh= %s\n", __func__
,
8670 so
, so
->so_usecount
, solockhistory_nr(so
));
8673 panic("%s: so=%p NULL NO so_pcb %s\n", __func__
,
8674 so
, solockhistory_nr(so
));
8677 return (&ev_pcb
->evp_mtx
);
8681 event_lock(struct socket
*so
, int refcount
, void *lr
)
8686 lr_saved
= __builtin_return_address(0);
8690 if (so
->so_pcb
!= NULL
) {
8691 lck_mtx_lock(&((struct kern_event_pcb
*)so
->so_pcb
)->evp_mtx
);
8693 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__
,
8694 so
, lr_saved
, solockhistory_nr(so
));
8698 if (so
->so_usecount
< 0) {
8699 panic("%s: so=%p so_pcb=%p lr=%p ref=%d lrh= %s\n", __func__
,
8700 so
, so
->so_pcb
, lr_saved
, so
->so_usecount
,
8701 solockhistory_nr(so
));
8708 so
->lock_lr
[so
->next_lock_lr
] = lr_saved
;
8709 so
->next_lock_lr
= (so
->next_lock_lr
+1) % SO_LCKDBG_MAX
;
8714 event_unlock(struct socket
*so
, int refcount
, void *lr
)
8717 lck_mtx_t
*mutex_held
;
8720 lr_saved
= __builtin_return_address(0);
8727 if (so
->so_usecount
< 0) {
8728 panic("%s: so=%p usecount=%d lrh= %s\n", __func__
,
8729 so
, so
->so_usecount
, solockhistory_nr(so
));
8732 if (so
->so_pcb
== NULL
) {
8733 panic("%s: so=%p NO PCB usecount=%d lr=%p lrh= %s\n", __func__
,
8734 so
, so
->so_usecount
, (void *)lr_saved
,
8735 solockhistory_nr(so
));
8738 mutex_held
= (&((struct kern_event_pcb
*)so
->so_pcb
)->evp_mtx
);
8740 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
8741 so
->unlock_lr
[so
->next_unlock_lr
] = lr_saved
;
8742 so
->next_unlock_lr
= (so
->next_unlock_lr
+1) % SO_LCKDBG_MAX
;
8744 if (so
->so_usecount
== 0) {
8745 VERIFY(so
->so_flags
& SOF_PCBCLEARING
);
8746 event_sofreelastref(so
);
8748 lck_mtx_unlock(mutex_held
);
8755 event_sofreelastref(struct socket
*so
)
8757 struct kern_event_pcb
*ev_pcb
= (struct kern_event_pcb
*)so
->so_pcb
;
8759 LCK_MTX_ASSERT(&(ev_pcb
->evp_mtx
), LCK_MTX_ASSERT_OWNED
);
8764 * Disable upcall in the event another thread is in kev_post_msg()
8765 * appending record to the receive socket buffer, since sbwakeup()
8766 * may release the socket lock otherwise.
8768 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
8769 so
->so_snd
.sb_flags
&= ~SB_UPCALL
;
8770 so
->so_event
= sonullevent
;
8771 lck_mtx_unlock(&(ev_pcb
->evp_mtx
));
8773 LCK_MTX_ASSERT(&(ev_pcb
->evp_mtx
), LCK_MTX_ASSERT_NOTOWNED
);
8774 lck_rw_lock_exclusive(kev_rwlock
);
8775 LIST_REMOVE(ev_pcb
, evp_link
);
8776 kevtstat
.kes_pcbcount
--;
8777 kevtstat
.kes_gencnt
++;
8778 lck_rw_done(kev_rwlock
);
8781 sofreelastref(so
, 1);
8785 static int event_proto_count
= (sizeof (eventsw
) / sizeof (struct protosw
));
8788 struct kern_event_head kern_event_head
;
8790 static u_int32_t static_event_id
= 0;
8792 #define EVPCB_ZONE_MAX 65536
8793 #define EVPCB_ZONE_NAME "kerneventpcb"
8794 static struct zone
*ev_pcb_zone
;
8797 * Install the protosw's for the NKE manager. Invoked at extension load time
8800 kern_event_init(struct domain
*dp
)
8805 VERIFY(!(dp
->dom_flags
& DOM_INITIALIZED
));
8806 VERIFY(dp
== systemdomain
);
8808 kev_lck_grp_attr
= lck_grp_attr_alloc_init();
8809 if (kev_lck_grp_attr
== NULL
) {
8810 panic("%s: lck_grp_attr_alloc_init failed\n", __func__
);
8814 kev_lck_grp
= lck_grp_alloc_init("Kernel Event Protocol",
8816 if (kev_lck_grp
== NULL
) {
8817 panic("%s: lck_grp_alloc_init failed\n", __func__
);
8821 kev_lck_attr
= lck_attr_alloc_init();
8822 if (kev_lck_attr
== NULL
) {
8823 panic("%s: lck_attr_alloc_init failed\n", __func__
);
8827 lck_rw_init(kev_rwlock
, kev_lck_grp
, kev_lck_attr
);
8828 if (kev_rwlock
== NULL
) {
8829 panic("%s: lck_mtx_alloc_init failed\n", __func__
);
8833 for (i
= 0, pr
= &eventsw
[0]; i
< event_proto_count
; i
++, pr
++)
8834 net_add_proto(pr
, dp
, 1);
8836 ev_pcb_zone
= zinit(sizeof(struct kern_event_pcb
),
8837 EVPCB_ZONE_MAX
* sizeof(struct kern_event_pcb
), 0, EVPCB_ZONE_NAME
);
8838 if (ev_pcb_zone
== NULL
) {
8839 panic("%s: failed allocating ev_pcb_zone", __func__
);
8842 zone_change(ev_pcb_zone
, Z_EXPAND
, TRUE
);
8843 zone_change(ev_pcb_zone
, Z_CALLERACCT
, TRUE
);
8847 kev_attach(struct socket
*so
, __unused
int proto
, __unused
struct proc
*p
)
8850 struct kern_event_pcb
*ev_pcb
;
8852 error
= soreserve(so
, KEV_SNDSPACE
, KEV_RECVSPACE
);
8856 if ((ev_pcb
= (struct kern_event_pcb
*)zalloc(ev_pcb_zone
)) == NULL
) {
8859 bzero(ev_pcb
, sizeof(struct kern_event_pcb
));
8860 lck_mtx_init(&ev_pcb
->evp_mtx
, kev_lck_grp
, kev_lck_attr
);
8862 ev_pcb
->evp_socket
= so
;
8863 ev_pcb
->evp_vendor_code_filter
= 0xffffffff;
8865 so
->so_pcb
= (caddr_t
) ev_pcb
;
8866 lck_rw_lock_exclusive(kev_rwlock
);
8867 LIST_INSERT_HEAD(&kern_event_head
, ev_pcb
, evp_link
);
8868 kevtstat
.kes_pcbcount
++;
8869 kevtstat
.kes_gencnt
++;
8870 lck_rw_done(kev_rwlock
);
8876 kev_delete(struct kern_event_pcb
*ev_pcb
)
8878 VERIFY(ev_pcb
!= NULL
);
8879 lck_mtx_destroy(&ev_pcb
->evp_mtx
, kev_lck_grp
);
8880 zfree(ev_pcb_zone
, ev_pcb
);
8884 kev_detach(struct socket
*so
)
8886 struct kern_event_pcb
*ev_pcb
= (struct kern_event_pcb
*) so
->so_pcb
;
8888 if (ev_pcb
!= NULL
) {
8889 soisdisconnected(so
);
8890 so
->so_flags
|= SOF_PCBCLEARING
;
8897 * For now, kev_vendor_code and mbuf_tags use the same
8900 errno_t
kev_vendor_code_find(
8902 u_int32_t
*out_vendor_code
)
8904 if (strlen(string
) >= KEV_VENDOR_CODE_MAX_STR_LEN
) {
8907 return (net_str_id_find_internal(string
, out_vendor_code
,
8908 NSI_VENDOR_CODE
, 1));
8912 kev_msg_post(struct kev_msg
*event_msg
)
8914 mbuf_tag_id_t min_vendor
, max_vendor
;
8916 net_str_id_first_last(&min_vendor
, &max_vendor
, NSI_VENDOR_CODE
);
8918 if (event_msg
== NULL
)
8922 * Limit third parties to posting events for registered vendor codes
8925 if (event_msg
->vendor_code
< min_vendor
||
8926 event_msg
->vendor_code
> max_vendor
) {
8927 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_badvendor
);
8930 return (kev_post_msg(event_msg
));
8934 kev_post_msg(struct kev_msg
*event_msg
)
8936 struct mbuf
*m
, *m2
;
8937 struct kern_event_pcb
*ev_pcb
;
8938 struct kern_event_msg
*ev
;
8940 u_int32_t total_size
;
8943 /* Verify the message is small enough to fit in one mbuf w/o cluster */
8944 total_size
= KEV_MSG_HEADER_SIZE
;
8946 for (i
= 0; i
< 5; i
++) {
8947 if (event_msg
->dv
[i
].data_length
== 0)
8949 total_size
+= event_msg
->dv
[i
].data_length
;
8952 if (total_size
> MLEN
) {
8953 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_toobig
);
8957 m
= m_get(M_WAIT
, MT_DATA
);
8959 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_nomem
);
8962 ev
= mtod(m
, struct kern_event_msg
*);
8963 total_size
= KEV_MSG_HEADER_SIZE
;
8965 tmp
= (char *) &ev
->event_data
[0];
8966 for (i
= 0; i
< 5; i
++) {
8967 if (event_msg
->dv
[i
].data_length
== 0)
8970 total_size
+= event_msg
->dv
[i
].data_length
;
8971 bcopy(event_msg
->dv
[i
].data_ptr
, tmp
,
8972 event_msg
->dv
[i
].data_length
);
8973 tmp
+= event_msg
->dv
[i
].data_length
;
8976 ev
->id
= ++static_event_id
;
8977 ev
->total_size
= total_size
;
8978 ev
->vendor_code
= event_msg
->vendor_code
;
8979 ev
->kev_class
= event_msg
->kev_class
;
8980 ev
->kev_subclass
= event_msg
->kev_subclass
;
8981 ev
->event_code
= event_msg
->event_code
;
8983 m
->m_len
= total_size
;
8984 lck_rw_lock_shared(kev_rwlock
);
8985 for (ev_pcb
= LIST_FIRST(&kern_event_head
);
8987 ev_pcb
= LIST_NEXT(ev_pcb
, evp_link
)) {
8988 lck_mtx_lock(&ev_pcb
->evp_mtx
);
8989 if (ev_pcb
->evp_socket
->so_pcb
== NULL
) {
8990 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
8993 if (ev_pcb
->evp_vendor_code_filter
!= KEV_ANY_VENDOR
) {
8994 if (ev_pcb
->evp_vendor_code_filter
!= ev
->vendor_code
) {
8995 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
8999 if (ev_pcb
->evp_class_filter
!= KEV_ANY_CLASS
) {
9000 if (ev_pcb
->evp_class_filter
!= ev
->kev_class
) {
9001 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9005 if ((ev_pcb
->evp_subclass_filter
!=
9006 KEV_ANY_SUBCLASS
) &&
9007 (ev_pcb
->evp_subclass_filter
!=
9008 ev
->kev_subclass
)) {
9009 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9015 m2
= m_copym(m
, 0, m
->m_len
, M_WAIT
);
9017 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_nomem
);
9019 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9020 lck_rw_done(kev_rwlock
);
9023 if (sbappendrecord(&ev_pcb
->evp_socket
->so_rcv
, m2
)) {
9025 * We use "m" for the socket stats as it would be
9026 * unsafe to use "m2"
9028 so_inc_recv_data_stat(ev_pcb
->evp_socket
,
9029 1, m
->m_len
, MBUF_TC_BE
);
9031 sorwakeup(ev_pcb
->evp_socket
);
9032 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_posted
);
9034 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_fullsock
);
9036 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9039 lck_rw_done(kev_rwlock
);
9045 kev_control(struct socket
*so
,
9048 __unused
struct ifnet
*ifp
,
9049 __unused
struct proc
*p
)
9051 struct kev_request
*kev_req
= (struct kev_request
*) data
;
9052 struct kern_event_pcb
*ev_pcb
;
9053 struct kev_vendor_code
*kev_vendor
;
9054 u_int32_t
*id_value
= (u_int32_t
*) data
;
9058 *id_value
= static_event_id
;
9061 ev_pcb
= (struct kern_event_pcb
*) so
->so_pcb
;
9062 ev_pcb
->evp_vendor_code_filter
= kev_req
->vendor_code
;
9063 ev_pcb
->evp_class_filter
= kev_req
->kev_class
;
9064 ev_pcb
->evp_subclass_filter
= kev_req
->kev_subclass
;
9067 ev_pcb
= (struct kern_event_pcb
*) so
->so_pcb
;
9068 kev_req
->vendor_code
= ev_pcb
->evp_vendor_code_filter
;
9069 kev_req
->kev_class
= ev_pcb
->evp_class_filter
;
9070 kev_req
->kev_subclass
= ev_pcb
->evp_subclass_filter
;
9072 case SIOCGKEVVENDOR
:
9073 kev_vendor
= (struct kev_vendor_code
*)data
;
9074 /* Make sure string is NULL terminated */
9075 kev_vendor
->vendor_string
[KEV_VENDOR_CODE_MAX_STR_LEN
-1] = 0;
9076 return (net_str_id_find_internal(kev_vendor
->vendor_string
,
9077 &kev_vendor
->vendor_code
, NSI_VENDOR_CODE
, 0));
9086 kevt_getstat SYSCTL_HANDLER_ARGS
9088 #pragma unused(oidp, arg1, arg2)
9091 lck_rw_lock_shared(kev_rwlock
);
9093 if (req
->newptr
!= USER_ADDR_NULL
) {
9097 if (req
->oldptr
== USER_ADDR_NULL
) {
9098 req
->oldidx
= sizeof(struct kevtstat
);
9102 error
= SYSCTL_OUT(req
, &kevtstat
,
9103 MIN(sizeof(struct kevtstat
), req
->oldlen
));
9105 lck_rw_done(kev_rwlock
);
9110 __private_extern__
int
9111 kevt_pcblist SYSCTL_HANDLER_ARGS
9113 #pragma unused(oidp, arg1, arg2)
9116 struct xsystmgen xsg
;
9118 size_t item_size
= ROUNDUP64(sizeof (struct xkevtpcb
)) +
9119 ROUNDUP64(sizeof (struct xsocket_n
)) +
9120 2 * ROUNDUP64(sizeof (struct xsockbuf_n
)) +
9121 ROUNDUP64(sizeof (struct xsockstat_n
));
9122 struct kern_event_pcb
*ev_pcb
;
9124 buf
= _MALLOC(item_size
, M_TEMP
, M_WAITOK
| M_ZERO
);
9128 lck_rw_lock_shared(kev_rwlock
);
9130 n
= kevtstat
.kes_pcbcount
;
9132 if (req
->oldptr
== USER_ADDR_NULL
) {
9133 req
->oldidx
= (n
+ n
/8) * item_size
;
9136 if (req
->newptr
!= USER_ADDR_NULL
) {
9140 bzero(&xsg
, sizeof (xsg
));
9141 xsg
.xg_len
= sizeof (xsg
);
9143 xsg
.xg_gen
= kevtstat
.kes_gencnt
;
9144 xsg
.xg_sogen
= so_gencnt
;
9145 error
= SYSCTL_OUT(req
, &xsg
, sizeof (xsg
));
9150 * We are done if there is no pcb
9157 for (i
= 0, ev_pcb
= LIST_FIRST(&kern_event_head
);
9158 i
< n
&& ev_pcb
!= NULL
;
9159 i
++, ev_pcb
= LIST_NEXT(ev_pcb
, evp_link
)) {
9160 struct xkevtpcb
*xk
= (struct xkevtpcb
*)buf
;
9161 struct xsocket_n
*xso
= (struct xsocket_n
*)
9162 ADVANCE64(xk
, sizeof (*xk
));
9163 struct xsockbuf_n
*xsbrcv
= (struct xsockbuf_n
*)
9164 ADVANCE64(xso
, sizeof (*xso
));
9165 struct xsockbuf_n
*xsbsnd
= (struct xsockbuf_n
*)
9166 ADVANCE64(xsbrcv
, sizeof (*xsbrcv
));
9167 struct xsockstat_n
*xsostats
= (struct xsockstat_n
*)
9168 ADVANCE64(xsbsnd
, sizeof (*xsbsnd
));
9170 bzero(buf
, item_size
);
9172 lck_mtx_lock(&ev_pcb
->evp_mtx
);
9174 xk
->kep_len
= sizeof(struct xkevtpcb
);
9175 xk
->kep_kind
= XSO_EVT
;
9176 xk
->kep_evtpcb
= (uint64_t)VM_KERNEL_ADDRPERM(ev_pcb
);
9177 xk
->kep_vendor_code_filter
= ev_pcb
->evp_vendor_code_filter
;
9178 xk
->kep_class_filter
= ev_pcb
->evp_class_filter
;
9179 xk
->kep_subclass_filter
= ev_pcb
->evp_subclass_filter
;
9181 sotoxsocket_n(ev_pcb
->evp_socket
, xso
);
9182 sbtoxsockbuf_n(ev_pcb
->evp_socket
?
9183 &ev_pcb
->evp_socket
->so_rcv
: NULL
, xsbrcv
);
9184 sbtoxsockbuf_n(ev_pcb
->evp_socket
?
9185 &ev_pcb
->evp_socket
->so_snd
: NULL
, xsbsnd
);
9186 sbtoxsockstat_n(ev_pcb
->evp_socket
, xsostats
);
9188 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9190 error
= SYSCTL_OUT(req
, buf
, item_size
);
9195 * Give the user an updated idea of our state.
9196 * If the generation differs from what we told
9197 * her before, she knows that something happened
9198 * while we were processing this request, and it
9199 * might be necessary to retry.
9201 bzero(&xsg
, sizeof (xsg
));
9202 xsg
.xg_len
= sizeof (xsg
);
9204 xsg
.xg_gen
= kevtstat
.kes_gencnt
;
9205 xsg
.xg_sogen
= so_gencnt
;
9206 error
= SYSCTL_OUT(req
, &xsg
, sizeof (xsg
));
9213 lck_rw_done(kev_rwlock
);
9218 #endif /* SOCKETS */
9222 fill_kqueueinfo(struct kqueue
*kq
, struct kqueue_info
* kinfo
)
9224 struct vinfo_stat
* st
;
9226 st
= &kinfo
->kq_stat
;
9228 st
->vst_size
= kq
->kq_count
;
9229 if (kq
->kq_state
& KQ_KEV_QOS
)
9230 st
->vst_blksize
= sizeof(struct kevent_qos_s
);
9231 else if (kq
->kq_state
& KQ_KEV64
)
9232 st
->vst_blksize
= sizeof(struct kevent64_s
);
9234 st
->vst_blksize
= sizeof(struct kevent
);
9235 st
->vst_mode
= S_IFIFO
;
9236 st
->vst_ino
= (kq
->kq_state
& KQ_DYNAMIC
) ?
9237 ((struct kqworkloop
*)kq
)->kqwl_dynamicid
: 0;
9239 /* flags exported to libproc as PROC_KQUEUE_* (sys/proc_info.h) */
9240 #define PROC_KQUEUE_MASK (KQ_SEL|KQ_SLEEP|KQ_KEV32|KQ_KEV64|KQ_KEV_QOS|KQ_WORKQ|KQ_WORKLOOP)
9241 kinfo
->kq_state
= kq
->kq_state
& PROC_KQUEUE_MASK
;
9247 fill_kqueue_dyninfo(struct kqueue
*kq
, struct kqueue_dyninfo
*kqdi
)
9249 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
9250 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
9253 if ((kq
->kq_state
& KQ_WORKLOOP
) == 0) {
9257 if ((err
= fill_kqueueinfo(kq
, &kqdi
->kqdi_info
))) {
9261 kqwl_req_lock(kqwl
);
9263 if (kqr
->kqr_thread
) {
9264 kqdi
->kqdi_servicer
= thread_tid(kqr
->kqr_thread
);
9267 if (kqwl
->kqwl_owner
== WL_OWNER_SUSPENDED
) {
9268 kqdi
->kqdi_owner
= ~0ull;
9270 kqdi
->kqdi_owner
= thread_tid(kqwl
->kqwl_owner
);
9273 kqdi
->kqdi_request_state
= kqr
->kqr_state
;
9274 kqdi
->kqdi_async_qos
= kqr
->kqr_qos_index
;
9275 kqdi
->kqdi_events_qos
= kqr
->kqr_override_index
;
9276 kqdi
->kqdi_sync_waiters
= kqr
->kqr_dsync_waiters
;
9277 kqdi
->kqdi_sync_waiter_qos
= kqr
->kqr_dsync_waiters_qos
;
9279 kqwl_req_unlock(kqwl
);
9286 knote_markstayactive(struct knote
*kn
)
9288 struct kqueue
*kq
= knote_get_kq(kn
);
9291 kn
->kn_status
|= KN_STAYACTIVE
;
9294 * Making a knote stay active is a property of the knote that must be
9295 * established before it is fully attached.
9297 assert(kn
->kn_status
& KN_ATTACHING
);
9299 /* handle all stayactive knotes on the (appropriate) manager */
9300 if (kq
->kq_state
& KQ_WORKQ
) {
9301 knote_set_qos_index(kn
, KQWQ_QOS_MANAGER
);
9302 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
9303 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
9304 kqwl_req_lock(kqwl
);
9305 assert(kn
->kn_req_index
&& kn
->kn_req_index
< THREAD_QOS_LAST
);
9306 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_UPDATE_STAYACTIVE_QOS
,
9308 kqwl_req_unlock(kqwl
);
9309 knote_set_qos_index(kn
, KQWL_BUCKET_STAYACTIVE
);
9317 knote_clearstayactive(struct knote
*kn
)
9319 kqlock(knote_get_kq(kn
));
9320 kn
->kn_status
&= ~KN_STAYACTIVE
;
9321 knote_deactivate(kn
);
9322 kqunlock(knote_get_kq(kn
));
9325 static unsigned long
9326 kevent_extinfo_emit(struct kqueue
*kq
, struct knote
*kn
, struct kevent_extinfo
*buf
,
9327 unsigned long buflen
, unsigned long nknotes
)
9329 for (; kn
; kn
= SLIST_NEXT(kn
, kn_link
)) {
9330 if (kq
== knote_get_kq(kn
)) {
9331 if (nknotes
< buflen
) {
9332 struct kevent_extinfo
*info
= &buf
[nknotes
];
9333 struct kevent_internal_s
*kevp
= &kn
->kn_kevent
;
9337 info
->kqext_kev
= (struct kevent_qos_s
){
9338 .ident
= kevp
->ident
,
9339 .filter
= kevp
->filter
,
9340 .flags
= kevp
->flags
,
9341 .fflags
= kevp
->fflags
,
9342 .data
= (int64_t)kevp
->data
,
9343 .udata
= kevp
->udata
,
9344 .ext
[0] = kevp
->ext
[0],
9345 .ext
[1] = kevp
->ext
[1],
9346 .ext
[2] = kevp
->ext
[2],
9347 .ext
[3] = kevp
->ext
[3],
9348 .qos
= kn
->kn_req_index
,
9350 info
->kqext_sdata
= kn
->kn_sdata
;
9351 info
->kqext_status
= kn
->kn_status
;
9352 info
->kqext_sfflags
= kn
->kn_sfflags
;
9357 /* we return total number of knotes, which may be more than requested */
9366 kevent_copyout_proc_dynkqids(void *proc
, user_addr_t ubuf
, uint32_t ubufsize
,
9367 int32_t *nkqueues_out
)
9369 proc_t p
= (proc_t
)proc
;
9370 struct filedesc
*fdp
= p
->p_fd
;
9371 unsigned int nkqueues
= 0;
9372 unsigned long ubuflen
= ubufsize
/ sizeof(kqueue_id_t
);
9373 size_t buflen
, bufsize
;
9374 kqueue_id_t
*kq_ids
= NULL
;
9379 if (ubuf
== USER_ADDR_NULL
&& ubufsize
!= 0) {
9384 buflen
= min(ubuflen
, PROC_PIDDYNKQUEUES_MAX
);
9387 if (os_mul_overflow(sizeof(kqueue_id_t
), buflen
, &bufsize
)) {
9391 kq_ids
= kalloc(bufsize
);
9392 assert(kq_ids
!= NULL
);
9397 if (fdp
->fd_kqhashmask
> 0) {
9398 for (uint32_t i
= 0; i
< fdp
->fd_kqhashmask
+ 1; i
++) {
9399 struct kqworkloop
*kqwl
;
9401 SLIST_FOREACH(kqwl
, &fdp
->fd_kqhash
[i
], kqwl_hashlink
) {
9402 /* report the number of kqueues, even if they don't all fit */
9403 if (nkqueues
< buflen
) {
9404 kq_ids
[nkqueues
] = kqwl
->kqwl_dynamicid
;
9415 if (os_mul_overflow(sizeof(kqueue_id_t
), min(ubuflen
, nkqueues
), ©size
)) {
9420 assert(ubufsize
>= copysize
);
9421 err
= copyout(kq_ids
, ubuf
, copysize
);
9426 kfree(kq_ids
, bufsize
);
9430 *nkqueues_out
= (int)min(nkqueues
, PROC_PIDDYNKQUEUES_MAX
);
9436 kevent_copyout_dynkqinfo(void *proc
, kqueue_id_t kq_id
, user_addr_t ubuf
,
9437 uint32_t ubufsize
, int32_t *size_out
)
9439 proc_t p
= (proc_t
)proc
;
9442 struct kqueue_dyninfo kqdi
= { };
9446 if (ubufsize
< sizeof(struct kqueue_info
)) {
9451 kq
= kqueue_hash_lookup(p
, kq_id
);
9460 * backward compatibility: allow the argument to this call to only be
9461 * a struct kqueue_info
9463 if (ubufsize
>= sizeof(struct kqueue_dyninfo
)) {
9464 ubufsize
= sizeof(struct kqueue_dyninfo
);
9465 err
= fill_kqueue_dyninfo(kq
, &kqdi
);
9467 ubufsize
= sizeof(struct kqueue_info
);
9468 err
= fill_kqueueinfo(kq
, &kqdi
.kqdi_info
);
9470 if (err
== 0 && (err
= copyout(&kqdi
, ubuf
, ubufsize
)) == 0) {
9471 *size_out
= ubufsize
;
9473 kqueue_release_last(p
, kq
);
9478 kevent_copyout_dynkqextinfo(void *proc
, kqueue_id_t kq_id
, user_addr_t ubuf
,
9479 uint32_t ubufsize
, int32_t *nknotes_out
)
9481 proc_t p
= (proc_t
)proc
;
9488 kq
= kqueue_hash_lookup(p
, kq_id
);
9496 err
= pid_kqueue_extinfo(p
, kq
, ubuf
, ubufsize
, nknotes_out
);
9497 kqueue_release_last(p
, kq
);
9502 pid_kqueue_extinfo(proc_t p
, struct kqueue
*kq
, user_addr_t ubuf
,
9503 uint32_t bufsize
, int32_t *retval
)
9508 struct filedesc
*fdp
= p
->p_fd
;
9509 unsigned long nknotes
= 0;
9510 unsigned long buflen
= bufsize
/ sizeof(struct kevent_extinfo
);
9511 struct kevent_extinfo
*kqext
= NULL
;
9513 /* arbitrary upper limit to cap kernel memory usage, copyout size, etc. */
9514 buflen
= min(buflen
, PROC_PIDFDKQUEUE_KNOTES_MAX
);
9516 kqext
= kalloc(buflen
* sizeof(struct kevent_extinfo
));
9517 if (kqext
== NULL
) {
9521 bzero(kqext
, buflen
* sizeof(struct kevent_extinfo
));
9524 for (i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
9525 kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
]);
9526 nknotes
= kevent_extinfo_emit(kq
, kn
, kqext
, buflen
, nknotes
);
9530 if (fdp
->fd_knhashmask
!= 0) {
9531 for (i
= 0; i
< (int)fdp
->fd_knhashmask
+ 1; i
++) {
9533 kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
]);
9534 nknotes
= kevent_extinfo_emit(kq
, kn
, kqext
, buflen
, nknotes
);
9539 assert(bufsize
>= sizeof(struct kevent_extinfo
) * min(buflen
, nknotes
));
9540 err
= copyout(kqext
, ubuf
, sizeof(struct kevent_extinfo
) * min(buflen
, nknotes
));
9544 kfree(kqext
, buflen
* sizeof(struct kevent_extinfo
));
9549 *retval
= min(nknotes
, PROC_PIDFDKQUEUE_KNOTES_MAX
);
9555 klist_copy_udata(struct klist
*list
, uint64_t *buf
,
9556 unsigned int buflen
, unsigned int nknotes
)
9558 struct kevent_internal_s
*kev
;
9560 SLIST_FOREACH(kn
, list
, kn_link
) {
9561 if (nknotes
< buflen
) {
9562 struct kqueue
*kq
= knote_get_kq(kn
);
9564 kev
= &(kn
->kn_kevent
);
9565 buf
[nknotes
] = kev
->udata
;
9568 /* we return total number of knotes, which may be more than requested */
9576 kqlist_copy_dynamicids(__assert_only proc_t p
, struct kqlist
*list
,
9577 uint64_t *buf
, unsigned int buflen
, unsigned int nids
)
9579 kqhash_lock_held(p
);
9580 struct kqworkloop
*kqwl
;
9581 SLIST_FOREACH(kqwl
, list
, kqwl_hashlink
) {
9582 if (nids
< buflen
) {
9583 buf
[nids
] = kqwl
->kqwl_dynamicid
;
9591 kevent_proc_copy_uptrs(void *proc
, uint64_t *buf
, int bufsize
)
9593 proc_t p
= (proc_t
)proc
;
9594 struct filedesc
*fdp
= p
->p_fd
;
9595 unsigned int nuptrs
= 0;
9596 unsigned long buflen
= bufsize
/ sizeof(uint64_t);
9599 assert(buf
!= NULL
);
9603 for (int i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
9604 nuptrs
= klist_copy_udata(&fdp
->fd_knlist
[i
], buf
, buflen
, nuptrs
);
9608 if (fdp
->fd_knhashmask
!= 0) {
9609 for (int i
= 0; i
< (int)fdp
->fd_knhashmask
+ 1; i
++) {
9610 nuptrs
= klist_copy_udata(&fdp
->fd_knhash
[i
], buf
, buflen
, nuptrs
);
9616 if (fdp
->fd_kqhashmask
!= 0) {
9617 for (int i
= 0; i
< (int)fdp
->fd_kqhashmask
+ 1; i
++) {
9618 nuptrs
= kqlist_copy_dynamicids(p
, &fdp
->fd_kqhash
[i
], buf
, buflen
,
9628 kevent_redrive_proc_thread_request(proc_t p
)
9630 __assert_only
int ret
;
9631 ret
= (*pthread_functions
->workq_threadreq
)(p
, NULL
, WORKQ_THREADREQ_REDRIVE
, 0, 0);
9632 assert(ret
== 0 || ret
== ECANCELED
);
9636 kevent_set_return_to_kernel_user_tsd(proc_t p
, thread_t thread
)
9639 bool proc_is_64bit
= !!(p
->p_flag
& P_LP64
);
9640 size_t user_addr_size
= proc_is_64bit
? 8 : 4;
9641 uint32_t ast_flags32
= 0;
9642 uint64_t ast_flags64
= 0;
9643 struct uthread
*ut
= get_bsdthread_info(thread
);
9645 if (ut
->uu_kqueue_bound
!= NULL
) {
9646 if (ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKLOOP
) {
9647 ast_flags64
|= R2K_WORKLOOP_PENDING_EVENTS
;
9648 } else if (ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ
) {
9649 ast_flags64
|= R2K_WORKQ_PENDING_EVENTS
;
9653 if (ast_flags64
== 0) {
9657 if (!(p
->p_flag
& P_LP64
)) {
9658 ast_flags32
= (uint32_t)ast_flags64
;
9659 assert(ast_flags64
< 0x100000000ull
);
9662 ast_addr
= thread_rettokern_addr(thread
);
9663 if (ast_addr
== 0) {
9667 if (copyout((proc_is_64bit
? (void *)&ast_flags64
: (void *)&ast_flags32
),
9668 (user_addr_t
)ast_addr
,
9669 user_addr_size
) != 0) {
9670 printf("pid %d (tid:%llu): copyout of return_to_kernel ast flags failed with "
9671 "ast_addr = %llu\n", p
->p_pid
, thread_tid(current_thread()), ast_addr
);
9676 kevent_ast(thread_t thread
, uint16_t bits
)
9678 proc_t p
= current_proc();
9680 if (bits
& AST_KEVENT_REDRIVE_THREADREQ
) {
9681 kevent_redrive_proc_thread_request(p
);
9683 if (bits
& AST_KEVENT_RETURN_TO_KERNEL
) {
9684 kevent_set_return_to_kernel_user_tsd(p
, thread
);
9688 #if DEVELOPMENT || DEBUG
9690 #define KEVENT_SYSCTL_BOUND_ID 1
9693 kevent_sysctl SYSCTL_HANDLER_ARGS
9695 #pragma unused(oidp, arg2)
9696 uintptr_t type
= (uintptr_t)arg1
;
9697 uint64_t bound_id
= 0;
9701 if (type
!= KEVENT_SYSCTL_BOUND_ID
) {
9709 ut
= get_bsdthread_info(current_thread());
9714 kq
= ut
->uu_kqueue_bound
;
9716 if (kq
->kq_state
& KQ_WORKLOOP
) {
9717 bound_id
= ((struct kqworkloop
*)kq
)->kqwl_dynamicid
;
9718 } else if (kq
->kq_state
& KQ_WORKQ
) {
9723 return sysctl_io_number(req
, bound_id
, sizeof(bound_id
), NULL
, NULL
);
9726 SYSCTL_NODE(_kern
, OID_AUTO
, kevent
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0,
9727 "kevent information");
9729 SYSCTL_PROC(_kern_kevent
, OID_AUTO
, bound_id
,
9730 CTLTYPE_QUAD
| CTLFLAG_RD
| CTLFLAG_LOCKED
| CTLFLAG_MASKED
,
9731 (void *)KEVENT_SYSCTL_BOUND_ID
,
9732 sizeof(kqueue_id_t
), kevent_sysctl
, "Q",
9733 "get the ID of the bound kqueue");
9735 #endif /* DEVELOPMENT || DEBUG */