2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
31 * All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * @(#)kern_event.c 1.0 (3/31/2000)
58 #include <stdatomic.h>
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/filedesc.h>
63 #include <sys/kernel.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/malloc.h>
67 #include <sys/unistd.h>
68 #include <sys/file_internal.h>
69 #include <sys/fcntl.h>
70 #include <sys/select.h>
71 #include <sys/queue.h>
72 #include <sys/event.h>
73 #include <sys/eventvar.h>
74 #include <sys/protosw.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
78 #include <sys/sysctl.h>
80 #include <sys/sysproto.h>
82 #include <sys/vnode_internal.h>
84 #include <sys/proc_info.h>
85 #include <sys/codesign.h>
86 #include <sys/pthread_shims.h>
87 #include <sys/kdebug.h>
88 #include <sys/reason.h>
89 #include <os/reason_private.h>
91 #include <kern/locks.h>
92 #include <kern/clock.h>
93 #include <kern/cpu_data.h>
94 #include <kern/policy_internal.h>
95 #include <kern/thread_call.h>
96 #include <kern/sched_prim.h>
97 #include <kern/waitq.h>
98 #include <kern/zalloc.h>
99 #include <kern/kalloc.h>
100 #include <kern/assert.h>
101 #include <kern/ast.h>
102 #include <kern/thread.h>
103 #include <kern/kcdata.h>
105 #include <libkern/libkern.h>
106 #include <libkern/OSAtomic.h>
108 #include "net/net_str_id.h"
110 #include <mach/task.h>
111 #include <libkern/section_keywords.h>
113 #if CONFIG_MEMORYSTATUS
114 #include <sys/kern_memorystatus.h>
117 extern thread_t
port_name_to_thread(mach_port_name_t port_name
); /* osfmk/kern/ipc_tt.h */
118 extern mach_port_name_t
ipc_entry_name_mask(mach_port_name_t name
); /* osfmk/ipc/ipc_entry.h */
120 #define KEV_EVTID(code) BSDDBG_CODE(DBG_BSD_KEVENT, (code))
123 * JMM - this typedef needs to be unified with pthread_priority_t
124 * and mach_msg_priority_t. It also needs to be the same type
127 typedef int32_t qos_t
;
129 MALLOC_DEFINE(M_KQUEUE
, "kqueue", "memory for kqueue system");
131 #define KQ_EVENT NO_EVENT64
133 #define KNUSE_NONE 0x0
134 #define KNUSE_STEAL_DROP 0x1
135 #define KNUSE_BOOST 0x2
136 static int kqlock2knoteuse(struct kqueue
*kq
, struct knote
*kn
, int flags
);
137 static int kqlock2knotedrop(struct kqueue
*kq
, struct knote
*kn
);
138 static int kqlock2knotedetach(struct kqueue
*kq
, struct knote
*kn
, int flags
);
139 static int knoteuse2kqlock(struct kqueue
*kq
, struct knote
*kn
, int flags
);
141 static int kqueue_read(struct fileproc
*fp
, struct uio
*uio
,
142 int flags
, vfs_context_t ctx
);
143 static int kqueue_write(struct fileproc
*fp
, struct uio
*uio
,
144 int flags
, vfs_context_t ctx
);
145 static int kqueue_ioctl(struct fileproc
*fp
, u_long com
, caddr_t data
,
147 static int kqueue_select(struct fileproc
*fp
, int which
, void *wq_link_id
,
149 static int kqueue_close(struct fileglob
*fg
, vfs_context_t ctx
);
150 static int kqueue_kqfilter(struct fileproc
*fp
, struct knote
*kn
,
151 struct kevent_internal_s
*kev
, vfs_context_t ctx
);
152 static int kqueue_drain(struct fileproc
*fp
, vfs_context_t ctx
);
154 static const struct fileops kqueueops
= {
155 .fo_type
= DTYPE_KQUEUE
,
156 .fo_read
= kqueue_read
,
157 .fo_write
= kqueue_write
,
158 .fo_ioctl
= kqueue_ioctl
,
159 .fo_select
= kqueue_select
,
160 .fo_close
= kqueue_close
,
161 .fo_kqfilter
= kqueue_kqfilter
,
162 .fo_drain
= kqueue_drain
,
165 static void kevent_put_kq(struct proc
*p
, kqueue_id_t id
, struct fileproc
*fp
, struct kqueue
*kq
);
166 static int kevent_internal(struct proc
*p
,
167 kqueue_id_t id
, kqueue_id_t
*id_out
,
168 user_addr_t changelist
, int nchanges
,
169 user_addr_t eventlist
, int nevents
,
170 user_addr_t data_out
, uint64_t data_available
,
171 unsigned int flags
, user_addr_t utimeout
,
172 kqueue_continue_t continuation
,
174 static int kevent_copyin(user_addr_t
*addrp
, struct kevent_internal_s
*kevp
,
175 struct proc
*p
, unsigned int flags
);
176 static int kevent_copyout(struct kevent_internal_s
*kevp
, user_addr_t
*addrp
,
177 struct proc
*p
, unsigned int flags
);
178 char * kevent_description(struct kevent_internal_s
*kevp
, char *s
, size_t n
);
180 static void kqueue_interrupt(struct kqueue
*kq
);
181 static int kevent_callback(struct kqueue
*kq
, struct kevent_internal_s
*kevp
,
183 static void kevent_continue(struct kqueue
*kq
, void *data
, int error
);
184 static void kqueue_scan_continue(void *contp
, wait_result_t wait_result
);
185 static int kqueue_process(struct kqueue
*kq
, kevent_callback_t callback
, void *callback_data
,
186 struct filt_process_s
*process_data
, int *countp
, struct proc
*p
);
187 static struct kqtailq
*kqueue_get_base_queue(struct kqueue
*kq
, kq_index_t qos_index
);
188 static struct kqtailq
*kqueue_get_high_queue(struct kqueue
*kq
, kq_index_t qos_index
);
189 static int kqueue_queue_empty(struct kqueue
*kq
, kq_index_t qos_index
);
191 static struct kqtailq
*kqueue_get_suppressed_queue(struct kqueue
*kq
, kq_index_t qos_index
);
193 static void kqworkq_request_thread(struct kqworkq
*kqwq
, kq_index_t qos_index
);
194 static void kqworkq_request_help(struct kqworkq
*kqwq
, kq_index_t qos_index
);
195 static void kqworkq_update_override(struct kqworkq
*kqwq
, kq_index_t qos_index
, kq_index_t override_index
);
196 static void kqworkq_bind_thread_impl(struct kqworkq
*kqwq
, kq_index_t qos_index
, thread_t thread
, unsigned int flags
);
197 static void kqworkq_unbind_thread(struct kqworkq
*kqwq
, kq_index_t qos_index
, thread_t thread
, unsigned int flags
);
198 static struct kqrequest
*kqworkq_get_request(struct kqworkq
*kqwq
, kq_index_t qos_index
);
202 KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
= 0x1,
203 KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
= 0x2,
204 KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
= 0x4,
205 KQWL_UO_UPDATE_OVERRIDE_LAZY
= 0x8
208 static void kqworkloop_update_override(struct kqworkloop
*kqwl
, kq_index_t qos_index
, kq_index_t override_index
, uint32_t flags
);
209 static void kqworkloop_bind_thread_impl(struct kqworkloop
*kqwl
, thread_t thread
, unsigned int flags
);
210 static void kqworkloop_unbind_thread(struct kqworkloop
*kqwl
, thread_t thread
, unsigned int flags
);
211 static inline kq_index_t
kqworkloop_combined_qos(struct kqworkloop
*kqwl
, boolean_t
*);
212 static void kqworkloop_update_suppress_sync_count(struct kqrequest
*kqr
, uint32_t flags
);
216 * The wakeup qos is the qos of QUEUED knotes.
218 * This QoS is accounted for with the events override in the
219 * kqr_override_index field. It is raised each time a new knote is queued at
220 * a given QoS. The kqr_wakeup_indexes field is a superset of the non empty
221 * knote buckets and is recomputed after each event delivery.
223 KQWL_UTQ_UPDATE_WAKEUP_QOS
,
224 KQWL_UTQ_UPDATE_STAYACTIVE_QOS
,
225 KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
,
227 * The wakeup override is for suppressed knotes that have fired again at
228 * a higher QoS than the one for which they are suppressed already.
229 * This override is cleared when the knote suppressed list becomes empty.
231 KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
,
232 KQWL_UTQ_RESET_WAKEUP_OVERRIDE
,
234 * The async QoS is the maximum QoS of an event enqueued on this workloop in
235 * userland. It is copied from the only EVFILT_WORKLOOP knote with
236 * a NOTE_WL_THREAD_REQUEST bit set allowed on this workloop. If there is no
237 * such knote, this QoS is 0.
239 KQWL_UTQ_SET_ASYNC_QOS
,
241 * The sync waiters QoS is the maximum QoS of any thread blocked on an
242 * EVFILT_WORKLOOP knote marked with the NOTE_WL_SYNC_WAIT bit.
243 * If there is no such knote, this QoS is 0.
245 KQWL_UTQ_SET_SYNC_WAITERS_QOS
,
246 KQWL_UTQ_REDRIVE_EVENTS
,
248 static void kqworkloop_update_threads_qos(struct kqworkloop
*kqwl
, int op
, kq_index_t qos
);
249 static void kqworkloop_request_help(struct kqworkloop
*kqwl
, kq_index_t qos_index
);
251 static int knote_process(struct knote
*kn
, kevent_callback_t callback
, void *callback_data
,
252 struct filt_process_s
*process_data
, struct proc
*p
);
254 static void knote_put(struct knote
*kn
);
257 static int kq_add_knote(struct kqueue
*kq
, struct knote
*kn
,
258 struct kevent_internal_s
*kev
, struct proc
*p
, int *knoteuse_flags
);
259 static struct knote
*kq_find_knote_and_kq_lock(struct kqueue
*kq
, struct kevent_internal_s
*kev
, bool is_fd
, struct proc
*p
);
260 static void kq_remove_knote(struct kqueue
*kq
, struct knote
*kn
, struct proc
*p
, kn_status_t
*kn_status
, uint16_t *kq_state
);
262 static void knote_drop(struct knote
*kn
, struct proc
*p
);
263 static struct knote
*knote_alloc(void);
264 static void knote_free(struct knote
*kn
);
266 static void knote_activate(struct knote
*kn
);
267 static void knote_deactivate(struct knote
*kn
);
269 static void knote_enable(struct knote
*kn
);
270 static void knote_disable(struct knote
*kn
);
272 static int knote_enqueue(struct knote
*kn
);
273 static void knote_dequeue(struct knote
*kn
);
275 static void knote_suppress(struct knote
*kn
);
276 static void knote_unsuppress(struct knote
*kn
);
277 static void knote_wakeup(struct knote
*kn
);
279 static kq_index_t
knote_get_queue_index(struct knote
*kn
);
280 static struct kqtailq
*knote_get_queue(struct knote
*kn
);
281 static kq_index_t
knote_get_req_index(struct knote
*kn
);
282 static kq_index_t
knote_get_qos_index(struct knote
*kn
);
283 static void knote_set_qos_index(struct knote
*kn
, kq_index_t qos_index
);
284 static kq_index_t
knote_get_qos_override_index(struct knote
*kn
);
285 static kq_index_t
knote_get_sync_qos_override_index(struct knote
*kn
);
286 static void knote_set_qos_override_index(struct knote
*kn
, kq_index_t qos_index
, boolean_t override_is_sync
);
287 static void knote_set_qos_overcommit(struct knote
*kn
);
289 static int filt_fileattach(struct knote
*kn
, struct kevent_internal_s
*kev
);
290 SECURITY_READ_ONLY_EARLY(static struct filterops
) file_filtops
= {
292 .f_attach
= filt_fileattach
,
295 static void filt_kqdetach(struct knote
*kn
);
296 static int filt_kqueue(struct knote
*kn
, long hint
);
297 static int filt_kqtouch(struct knote
*kn
, struct kevent_internal_s
*kev
);
298 static int filt_kqprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
);
299 SECURITY_READ_ONLY_EARLY(static struct filterops
) kqread_filtops
= {
301 .f_detach
= filt_kqdetach
,
302 .f_event
= filt_kqueue
,
303 .f_touch
= filt_kqtouch
,
304 .f_process
= filt_kqprocess
,
307 /* placeholder for not-yet-implemented filters */
308 static int filt_badattach(struct knote
*kn
, struct kevent_internal_s
*kev
);
309 SECURITY_READ_ONLY_EARLY(static struct filterops
) bad_filtops
= {
310 .f_attach
= filt_badattach
,
313 static int filt_procattach(struct knote
*kn
, struct kevent_internal_s
*kev
);
314 static void filt_procdetach(struct knote
*kn
);
315 static int filt_proc(struct knote
*kn
, long hint
);
316 static int filt_proctouch(struct knote
*kn
, struct kevent_internal_s
*kev
);
317 static int filt_procprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
);
318 SECURITY_READ_ONLY_EARLY(static struct filterops
) proc_filtops
= {
319 .f_attach
= filt_procattach
,
320 .f_detach
= filt_procdetach
,
321 .f_event
= filt_proc
,
322 .f_touch
= filt_proctouch
,
323 .f_process
= filt_procprocess
,
326 #if CONFIG_MEMORYSTATUS
327 extern const struct filterops memorystatus_filtops
;
328 #endif /* CONFIG_MEMORYSTATUS */
330 extern const struct filterops fs_filtops
;
332 extern const struct filterops sig_filtops
;
334 static zone_t knote_zone
;
335 static zone_t kqfile_zone
;
336 static zone_t kqworkq_zone
;
337 static zone_t kqworkloop_zone
;
339 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
341 /* Mach portset filter */
342 extern const struct filterops machport_filtops
;
345 static int filt_userattach(struct knote
*kn
, struct kevent_internal_s
*kev
);
346 static void filt_userdetach(struct knote
*kn
);
347 static int filt_user(struct knote
*kn
, long hint
);
348 static int filt_usertouch(struct knote
*kn
, struct kevent_internal_s
*kev
);
349 static int filt_userprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
);
350 SECURITY_READ_ONLY_EARLY(static struct filterops
) user_filtops
= {
351 .f_attach
= filt_userattach
,
352 .f_detach
= filt_userdetach
,
353 .f_event
= filt_user
,
354 .f_touch
= filt_usertouch
,
355 .f_process
= filt_userprocess
,
358 static lck_spin_t _filt_userlock
;
359 static void filt_userlock(void);
360 static void filt_userunlock(void);
362 /* Workloop filter */
363 static bool filt_wlneeds_boost(struct kevent_internal_s
*kev
);
364 static int filt_wlattach(struct knote
*kn
, struct kevent_internal_s
*kev
);
365 static int filt_wlpost_attach(struct knote
*kn
, struct kevent_internal_s
*kev
);
366 static void filt_wldetach(struct knote
*kn
);
367 static int filt_wlevent(struct knote
*kn
, long hint
);
368 static int filt_wltouch(struct knote
*kn
, struct kevent_internal_s
*kev
);
369 static int filt_wldrop_and_unlock(struct knote
*kn
, struct kevent_internal_s
*kev
);
370 static int filt_wlprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
);
371 SECURITY_READ_ONLY_EARLY(static struct filterops
) workloop_filtops
= {
372 .f_needs_boost
= filt_wlneeds_boost
,
373 .f_attach
= filt_wlattach
,
374 .f_post_attach
= filt_wlpost_attach
,
375 .f_detach
= filt_wldetach
,
376 .f_event
= filt_wlevent
,
377 .f_touch
= filt_wltouch
,
378 .f_drop_and_unlock
= filt_wldrop_and_unlock
,
379 .f_process
= filt_wlprocess
,
382 extern const struct filterops pipe_rfiltops
;
383 extern const struct filterops pipe_wfiltops
;
384 extern const struct filterops ptsd_kqops
;
385 extern const struct filterops ptmx_kqops
;
386 extern const struct filterops soread_filtops
;
387 extern const struct filterops sowrite_filtops
;
388 extern const struct filterops sock_filtops
;
389 extern const struct filterops soexcept_filtops
;
390 extern const struct filterops spec_filtops
;
391 extern const struct filterops bpfread_filtops
;
392 extern const struct filterops necp_fd_rfiltops
;
393 extern const struct filterops fsevent_filtops
;
394 extern const struct filterops vnode_filtops
;
395 extern const struct filterops tty_filtops
;
397 const static struct filterops timer_filtops
;
401 * Rules for adding new filters to the system:
403 * - Add a new "EVFILT_" option value to bsd/sys/event.h (typically a negative value)
404 * in the exported section of the header
405 * - Update the EVFILT_SYSCOUNT value to reflect the new addition
406 * - Add a filterops to the sysfilt_ops array. Public filters should be added at the end
407 * of the Public Filters section in the array.
409 * - Add a new "EVFILT_" value to bsd/sys/event.h (typically a positive value)
410 * in the XNU_KERNEL_PRIVATE section of the header
411 * - Update the EVFILTID_MAX value to reflect the new addition
412 * - Add a filterops to the sysfilt_ops. Private filters should be added at the end of
413 * the Private filters section of the array.
415 SECURITY_READ_ONLY_EARLY(static struct filterops
*) sysfilt_ops
[EVFILTID_MAX
] = {
417 [~EVFILT_READ
] = &file_filtops
,
418 [~EVFILT_WRITE
] = &file_filtops
,
419 [~EVFILT_AIO
] = &bad_filtops
,
420 [~EVFILT_VNODE
] = &file_filtops
,
421 [~EVFILT_PROC
] = &proc_filtops
,
422 [~EVFILT_SIGNAL
] = &sig_filtops
,
423 [~EVFILT_TIMER
] = &timer_filtops
,
424 [~EVFILT_MACHPORT
] = &machport_filtops
,
425 [~EVFILT_FS
] = &fs_filtops
,
426 [~EVFILT_USER
] = &user_filtops
,
429 [~EVFILT_SOCK
] = &file_filtops
,
430 #if CONFIG_MEMORYSTATUS
431 [~EVFILT_MEMORYSTATUS
] = &memorystatus_filtops
,
433 [~EVFILT_MEMORYSTATUS
] = &bad_filtops
,
435 [~EVFILT_EXCEPT
] = &file_filtops
,
437 [~EVFILT_WORKLOOP
] = &workloop_filtops
,
439 /* Private filters */
440 [EVFILTID_KQREAD
] = &kqread_filtops
,
441 [EVFILTID_PIPE_R
] = &pipe_rfiltops
,
442 [EVFILTID_PIPE_W
] = &pipe_wfiltops
,
443 [EVFILTID_PTSD
] = &ptsd_kqops
,
444 [EVFILTID_SOREAD
] = &soread_filtops
,
445 [EVFILTID_SOWRITE
] = &sowrite_filtops
,
446 [EVFILTID_SCK
] = &sock_filtops
,
447 [EVFILTID_SOEXCEPT
] = &soexcept_filtops
,
448 [EVFILTID_SPEC
] = &spec_filtops
,
449 [EVFILTID_BPFREAD
] = &bpfread_filtops
,
450 [EVFILTID_NECP_FD
] = &necp_fd_rfiltops
,
451 [EVFILTID_FSEVENT
] = &fsevent_filtops
,
452 [EVFILTID_VN
] = &vnode_filtops
,
453 [EVFILTID_TTY
] = &tty_filtops
,
454 [EVFILTID_PTMX
] = &ptmx_kqops
,
457 /* waitq prepost callback */
458 void waitq_set__CALLING_PREPOST_HOOK__(void *kq_hook
, void *knote_hook
, int qos
);
460 #ifndef _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
461 #define _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG 0x02000000 /* pthread event manager bit */
463 #ifndef _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
464 #define _PTHREAD_PRIORITY_OVERCOMMIT_FLAG 0x80000000 /* request overcommit threads */
466 #ifndef _PTHREAD_PRIORITY_QOS_CLASS_MASK
467 #define _PTHREAD_PRIORITY_QOS_CLASS_MASK 0x003fff00 /* QoS class mask */
469 #ifndef _PTHREAD_PRIORITY_QOS_CLASS_SHIFT_32
470 #define _PTHREAD_PRIORITY_QOS_CLASS_SHIFT_32 8
473 static inline __kdebug_only
475 kqr_thread_id(struct kqrequest
*kqr
)
477 return (uintptr_t)thread_tid(kqr
->kqr_thread
);
481 boolean_t
is_workqueue_thread(thread_t thread
)
483 return (thread_get_tag(thread
) & THREAD_TAG_WORKQUEUE
);
487 void knote_canonicalize_kevent_qos(struct knote
*kn
)
489 struct kqueue
*kq
= knote_get_kq(kn
);
490 unsigned long canonical
;
492 if ((kq
->kq_state
& (KQ_WORKQ
| KQ_WORKLOOP
)) == 0)
495 /* preserve manager and overcommit flags in this case */
496 canonical
= pthread_priority_canonicalize(kn
->kn_qos
, FALSE
);
497 kn
->kn_qos
= (qos_t
)canonical
;
501 kq_index_t
qos_index_from_qos(struct knote
*kn
, qos_t qos
, boolean_t propagation
)
503 struct kqueue
*kq
= knote_get_kq(kn
);
504 kq_index_t qos_index
;
505 unsigned long flags
= 0;
507 if ((kq
->kq_state
& (KQ_WORKQ
| KQ_WORKLOOP
)) == 0)
508 return QOS_INDEX_KQFILE
;
510 qos_index
= (kq_index_t
)thread_qos_from_pthread_priority(
511 (unsigned long)qos
, &flags
);
513 if (kq
->kq_state
& KQ_WORKQ
) {
514 /* workq kqueues support requesting a manager thread (non-propagation) */
515 if (!propagation
&& (flags
& _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
))
516 return KQWQ_QOS_MANAGER
;
523 qos_t
qos_from_qos_index(kq_index_t qos_index
)
525 /* should only happen for KQ_WORKQ */
526 if (qos_index
== KQWQ_QOS_MANAGER
)
527 return _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
;
530 return THREAD_QOS_UNSPECIFIED
;
532 /* Should have support from pthread kext support */
533 return (1 << (qos_index
- 1 +
534 _PTHREAD_PRIORITY_QOS_CLASS_SHIFT_32
));
537 /* kqr lock must be held */
539 unsigned long pthread_priority_for_kqrequest(
540 struct kqrequest
*kqr
,
541 kq_index_t qos_index
)
543 unsigned long priority
= qos_from_qos_index(qos_index
);
544 if (kqr
->kqr_state
& KQR_THOVERCOMMIT
) {
545 priority
|= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
;
551 kq_index_t
qos_index_for_servicer(int qos_class
, thread_t thread
, int flags
)
553 #pragma unused(thread)
554 kq_index_t qos_index
;
556 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
)
557 return KQWQ_QOS_MANAGER
;
559 qos_index
= (kq_index_t
)qos_class
;
560 assert(qos_index
> 0 && qos_index
< KQWQ_QOS_MANAGER
);
566 * kqueue/note lock implementations
568 * The kqueue lock guards the kq state, the state of its queues,
569 * and the kqueue-aware status and use counts of individual knotes.
571 * The kqueue workq lock is used to protect state guarding the
572 * interaction of the kqueue with the workq. This state cannot
573 * be guarded by the kq lock - as it needs to be taken when we
574 * already have the waitq set lock held (during the waitq hook
575 * callback). It might be better to use the waitq lock itself
576 * for this, but the IRQ requirements make that difficult).
578 * Knote flags, filter flags, and associated data are protected
579 * by the underlying object lock - and are only ever looked at
580 * by calling the filter to get a [consistent] snapshot of that
583 lck_grp_attr_t
* kq_lck_grp_attr
;
584 lck_grp_t
* kq_lck_grp
;
585 lck_attr_t
* kq_lck_attr
;
588 kqlock(struct kqueue
*kq
)
590 lck_spin_lock(&kq
->kq_lock
);
594 kqlock_held(__assert_only
struct kqueue
*kq
)
596 LCK_SPIN_ASSERT(&kq
->kq_lock
, LCK_ASSERT_OWNED
);
600 kqunlock(struct kqueue
*kq
)
602 lck_spin_unlock(&kq
->kq_lock
);
606 knhash_lock(proc_t p
)
608 lck_mtx_lock(&p
->p_fd
->fd_knhashlock
);
612 knhash_unlock(proc_t p
)
614 lck_mtx_unlock(&p
->p_fd
->fd_knhashlock
);
619 * Convert a kq lock to a knote use referece.
621 * If the knote is being dropped, or has
622 * vanished, we can't get a use reference.
623 * Just return with it still locked.
625 * - kq locked at entry
626 * - unlock on exit if we get the use reference
629 kqlock2knoteuse(struct kqueue
*kq
, struct knote
*kn
, int flags
)
631 if (kn
->kn_status
& (KN_DROPPING
| KN_VANISHED
))
634 assert(kn
->kn_status
& KN_ATTACHED
);
636 if (flags
& KNUSE_BOOST
) {
637 set_thread_rwlock_boost();
644 * - kq locked at entry
645 * - kq unlocked at exit
649 knoteusewait(struct kqueue
*kq
, struct knote
*kn
)
651 kn
->kn_status
|= KN_USEWAIT
;
652 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
653 CAST_EVENT64_T(&kn
->kn_status
),
654 THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
);
656 return thread_block(THREAD_CONTINUE_NULL
);
660 knoteuse_needs_boost(struct knote
*kn
, struct kevent_internal_s
*kev
)
662 if (knote_fops(kn
)->f_needs_boost
) {
663 return knote_fops(kn
)->f_needs_boost(kev
);
669 * Convert from a knote use reference back to kq lock.
671 * Drop a use reference and wake any waiters if
672 * this is the last one.
674 * If someone is trying to drop the knote, but the
675 * caller has events they must deliver, take
676 * responsibility for the drop later - and wake the
677 * other attempted dropper in a manner that informs
678 * him of the transfer of responsibility.
680 * The exit return indicates if the knote is still alive
681 * (or if not, the other dropper has been given the green
684 * The kqueue lock is re-taken unconditionally.
687 knoteuse2kqlock(struct kqueue
*kq
, struct knote
*kn
, int flags
)
690 int steal_drop
= (flags
& KNUSE_STEAL_DROP
);
693 if (flags
& KNUSE_BOOST
) {
694 clear_thread_rwlock_boost();
697 if (--kn
->kn_inuse
== 0) {
699 if ((kn
->kn_status
& KN_ATTACHING
) != 0) {
700 kn
->kn_status
&= ~KN_ATTACHING
;
703 if ((kn
->kn_status
& KN_USEWAIT
) != 0) {
704 wait_result_t result
;
706 /* If we need to, try and steal the drop */
707 if (kn
->kn_status
& KN_DROPPING
) {
708 if (steal_drop
&& !(kn
->kn_status
& KN_STOLENDROP
)) {
709 kn
->kn_status
|= KN_STOLENDROP
;
715 /* wakeup indicating if ANY USE stole the drop */
716 result
= (kn
->kn_status
& KN_STOLENDROP
) ?
717 THREAD_RESTART
: THREAD_AWAKENED
;
719 kn
->kn_status
&= ~KN_USEWAIT
;
720 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
721 CAST_EVENT64_T(&kn
->kn_status
),
723 WAITQ_ALL_PRIORITIES
);
725 /* should have seen use-wait if dropping with use refs */
726 assert((kn
->kn_status
& (KN_DROPPING
|KN_STOLENDROP
)) == 0);
729 } else if (kn
->kn_status
& KN_DROPPING
) {
730 /* not the last ref but want to steal a drop if present */
731 if (steal_drop
&& ((kn
->kn_status
& KN_STOLENDROP
) == 0)) {
732 kn
->kn_status
|= KN_STOLENDROP
;
734 /* but we now have to wait to be the last ref */
735 knoteusewait(kq
, kn
);
746 * Convert a kq lock to a knote use reference
747 * (for the purpose of detaching AND vanishing it).
749 * If the knote is being dropped, we can't get
750 * a detach reference, so wait for the knote to
751 * finish dropping before returning.
753 * If the knote is being used for other purposes,
754 * we cannot detach it until those uses are done
755 * as well. Again, just wait for them to finish
756 * (caller will start over at lookup).
758 * - kq locked at entry
762 kqlock2knotedetach(struct kqueue
*kq
, struct knote
*kn
, int flags
)
764 if ((kn
->kn_status
& KN_DROPPING
) || kn
->kn_inuse
) {
765 /* have to wait for dropper or current uses to go away */
766 knoteusewait(kq
, kn
);
769 assert((kn
->kn_status
& KN_VANISHED
) == 0);
770 assert(kn
->kn_status
& KN_ATTACHED
);
771 kn
->kn_status
&= ~KN_ATTACHED
;
772 kn
->kn_status
|= KN_VANISHED
;
773 if (flags
& KNUSE_BOOST
) {
774 clear_thread_rwlock_boost();
782 * Convert a kq lock to a knote drop reference.
784 * If the knote is in use, wait for the use count
785 * to subside. We first mark our intention to drop
786 * it - keeping other users from "piling on."
787 * If we are too late, we have to wait for the
788 * other drop to complete.
790 * - kq locked at entry
791 * - always unlocked on exit.
792 * - caller can't hold any locks that would prevent
793 * the other dropper from completing.
796 kqlock2knotedrop(struct kqueue
*kq
, struct knote
*kn
)
799 wait_result_t result
;
801 oktodrop
= ((kn
->kn_status
& (KN_DROPPING
| KN_ATTACHING
)) == 0);
802 /* if another thread is attaching, they will become the dropping thread */
803 kn
->kn_status
|= KN_DROPPING
;
804 knote_unsuppress(kn
);
807 if (kn
->kn_inuse
== 0) {
812 result
= knoteusewait(kq
, kn
);
813 /* THREAD_RESTART == another thread stole the knote drop */
814 return (result
== THREAD_AWAKENED
);
819 * Release a knote use count reference.
822 knote_put(struct knote
*kn
)
824 struct kqueue
*kq
= knote_get_kq(kn
);
827 if (--kn
->kn_inuse
== 0) {
828 if ((kn
->kn_status
& KN_USEWAIT
) != 0) {
829 kn
->kn_status
&= ~KN_USEWAIT
;
830 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
831 CAST_EVENT64_T(&kn
->kn_status
),
833 WAITQ_ALL_PRIORITIES
);
841 filt_fileattach(struct knote
*kn
, struct kevent_internal_s
*kev
)
843 return (fo_kqfilter(kn
->kn_fp
, kn
, kev
, vfs_context_current()));
846 #define f_flag f_fglob->fg_flag
847 #define f_msgcount f_fglob->fg_msgcount
848 #define f_cred f_fglob->fg_cred
849 #define f_ops f_fglob->fg_ops
850 #define f_offset f_fglob->fg_offset
851 #define f_data f_fglob->fg_data
854 filt_kqdetach(struct knote
*kn
)
856 struct kqfile
*kqf
= (struct kqfile
*)kn
->kn_fp
->f_data
;
857 struct kqueue
*kq
= &kqf
->kqf_kqueue
;
860 KNOTE_DETACH(&kqf
->kqf_sel
.si_note
, kn
);
866 filt_kqueue(struct knote
*kn
, __unused
long hint
)
868 struct kqueue
*kq
= (struct kqueue
*)kn
->kn_fp
->f_data
;
871 count
= kq
->kq_count
;
876 filt_kqtouch(struct knote
*kn
, struct kevent_internal_s
*kev
)
879 struct kqueue
*kq
= (struct kqueue
*)kn
->kn_fp
->f_data
;
883 kn
->kn_data
= kq
->kq_count
;
884 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0)
885 kn
->kn_udata
= kev
->udata
;
886 res
= (kn
->kn_data
> 0);
894 filt_kqprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
)
897 struct kqueue
*kq
= (struct kqueue
*)kn
->kn_fp
->f_data
;
901 kn
->kn_data
= kq
->kq_count
;
902 res
= (kn
->kn_data
> 0);
904 *kev
= kn
->kn_kevent
;
905 if (kn
->kn_flags
& EV_CLEAR
)
913 #pragma mark EVFILT_PROC
916 filt_procattach(struct knote
*kn
, __unused
struct kevent_internal_s
*kev
)
920 assert(PID_MAX
< NOTE_PDATAMASK
);
922 if ((kn
->kn_sfflags
& (NOTE_TRACK
| NOTE_TRACKERR
| NOTE_CHILD
)) != 0) {
923 kn
->kn_flags
= EV_ERROR
;
924 kn
->kn_data
= ENOTSUP
;
928 p
= proc_find(kn
->kn_id
);
930 kn
->kn_flags
= EV_ERROR
;
935 const int NoteExitStatusBits
= NOTE_EXIT
| NOTE_EXITSTATUS
;
937 if ((kn
->kn_sfflags
& NoteExitStatusBits
) == NoteExitStatusBits
)
939 pid_t selfpid
= proc_selfpid();
941 if (p
->p_ppid
== selfpid
)
942 break; /* parent => ok */
944 if ((p
->p_lflag
& P_LTRACED
) != 0 &&
945 (p
->p_oppid
== selfpid
))
946 break; /* parent-in-waiting => ok */
949 kn
->kn_flags
= EV_ERROR
;
950 kn
->kn_data
= EACCES
;
956 kn
->kn_ptr
.p_proc
= p
; /* store the proc handle */
958 KNOTE_ATTACH(&p
->p_klist
, kn
);
965 * only captures edge-triggered events after this point
966 * so it can't already be fired.
973 * The knote may be attached to a different process, which may exit,
974 * leaving nothing for the knote to be attached to. In that case,
975 * the pointer to the process will have already been nulled out.
978 filt_procdetach(struct knote
*kn
)
984 p
= kn
->kn_ptr
.p_proc
;
985 if (p
!= PROC_NULL
) {
986 kn
->kn_ptr
.p_proc
= PROC_NULL
;
987 KNOTE_DETACH(&p
->p_klist
, kn
);
994 filt_proc(struct knote
*kn
, long hint
)
998 /* ALWAYS CALLED WITH proc_klist_lock */
1001 * Note: a lot of bits in hint may be obtained from the knote
1002 * To free some of those bits, see <rdar://problem/12592988> Freeing up
1003 * bits in hint for filt_proc
1005 * mask off extra data
1007 event
= (u_int
)hint
& NOTE_PCTRLMASK
;
1010 * termination lifecycle events can happen while a debugger
1011 * has reparented a process, in which case notifications
1012 * should be quashed except to the tracing parent. When
1013 * the debugger reaps the child (either via wait4(2) or
1014 * process exit), the child will be reparented to the original
1015 * parent and these knotes re-fired.
1017 if (event
& NOTE_EXIT
) {
1018 if ((kn
->kn_ptr
.p_proc
->p_oppid
!= 0)
1019 && (knote_get_kq(kn
)->kq_p
->p_pid
!= kn
->kn_ptr
.p_proc
->p_ppid
)) {
1021 * This knote is not for the current ptrace(2) parent, ignore.
1028 * if the user is interested in this event, record it.
1030 if (kn
->kn_sfflags
& event
)
1031 kn
->kn_fflags
|= event
;
1033 #pragma clang diagnostic push
1034 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
1035 if ((event
== NOTE_REAP
) || ((event
== NOTE_EXIT
) && !(kn
->kn_sfflags
& NOTE_REAP
))) {
1036 kn
->kn_flags
|= (EV_EOF
| EV_ONESHOT
);
1038 #pragma clang diagnostic pop
1042 * The kernel has a wrapper in place that returns the same data
1043 * as is collected here, in kn_data. Any changes to how
1044 * NOTE_EXITSTATUS and NOTE_EXIT_DETAIL are collected
1045 * should also be reflected in the proc_pidnoteexit() wrapper.
1047 if (event
== NOTE_EXIT
) {
1049 if ((kn
->kn_sfflags
& NOTE_EXITSTATUS
) != 0) {
1050 kn
->kn_fflags
|= NOTE_EXITSTATUS
;
1051 kn
->kn_data
|= (hint
& NOTE_PDATAMASK
);
1053 if ((kn
->kn_sfflags
& NOTE_EXIT_DETAIL
) != 0) {
1054 kn
->kn_fflags
|= NOTE_EXIT_DETAIL
;
1055 if ((kn
->kn_ptr
.p_proc
->p_lflag
&
1056 P_LTERM_DECRYPTFAIL
) != 0) {
1057 kn
->kn_data
|= NOTE_EXIT_DECRYPTFAIL
;
1059 if ((kn
->kn_ptr
.p_proc
->p_lflag
&
1060 P_LTERM_JETSAM
) != 0) {
1061 kn
->kn_data
|= NOTE_EXIT_MEMORY
;
1062 switch (kn
->kn_ptr
.p_proc
->p_lflag
& P_JETSAM_MASK
) {
1063 case P_JETSAM_VMPAGESHORTAGE
:
1064 kn
->kn_data
|= NOTE_EXIT_MEMORY_VMPAGESHORTAGE
;
1066 case P_JETSAM_VMTHRASHING
:
1067 kn
->kn_data
|= NOTE_EXIT_MEMORY_VMTHRASHING
;
1069 case P_JETSAM_FCTHRASHING
:
1070 kn
->kn_data
|= NOTE_EXIT_MEMORY_FCTHRASHING
;
1072 case P_JETSAM_VNODE
:
1073 kn
->kn_data
|= NOTE_EXIT_MEMORY_VNODE
;
1075 case P_JETSAM_HIWAT
:
1076 kn
->kn_data
|= NOTE_EXIT_MEMORY_HIWAT
;
1079 kn
->kn_data
|= NOTE_EXIT_MEMORY_PID
;
1081 case P_JETSAM_IDLEEXIT
:
1082 kn
->kn_data
|= NOTE_EXIT_MEMORY_IDLE
;
1086 if ((kn
->kn_ptr
.p_proc
->p_csflags
&
1088 kn
->kn_data
|= NOTE_EXIT_CSERROR
;
1093 /* if we have any matching state, activate the knote */
1094 return (kn
->kn_fflags
!= 0);
1098 filt_proctouch(struct knote
*kn
, struct kevent_internal_s
*kev
)
1104 /* accept new filter flags and mask off output events no long interesting */
1105 kn
->kn_sfflags
= kev
->fflags
;
1106 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0)
1107 kn
->kn_udata
= kev
->udata
;
1109 /* restrict the current results to the (smaller?) set of new interest */
1111 * For compatibility with previous implementations, we leave kn_fflags
1112 * as they were before.
1114 //kn->kn_fflags &= kn->kn_sfflags;
1116 res
= (kn
->kn_fflags
!= 0);
1118 proc_klist_unlock();
1124 filt_procprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
)
1126 #pragma unused(data)
1130 res
= (kn
->kn_fflags
!= 0);
1132 *kev
= kn
->kn_kevent
;
1133 kn
->kn_flags
|= EV_CLEAR
; /* automatically set */
1137 proc_klist_unlock();
1142 #pragma mark EVFILT_TIMER
1146 * Values stored in the knote at rest (using Mach absolute time units)
1148 * kn->kn_hook where the thread_call object is stored
1149 * kn->kn_ext[0] next deadline or 0 if immediate expiration
1150 * kn->kn_ext[1] leeway value
1151 * kn->kn_sdata interval timer: the interval
1152 * absolute/deadline timer: 0
1153 * kn->kn_data fire count
1156 static lck_mtx_t _filt_timerlock
;
1158 static void filt_timerlock(void) { lck_mtx_lock(&_filt_timerlock
); }
1159 static void filt_timerunlock(void) { lck_mtx_unlock(&_filt_timerlock
); }
1161 static inline void filt_timer_assert_locked(void)
1163 LCK_MTX_ASSERT(&_filt_timerlock
, LCK_MTX_ASSERT_OWNED
);
1166 /* state flags stored in kn_hookid */
1167 #define TIMER_RUNNING 0x1
1168 #define TIMER_CANCELWAIT 0x2
1171 * filt_timervalidate - process data from user
1173 * Sets up the deadline, interval, and leeway from the provided user data
1176 * kn_sdata timer deadline or interval time
1177 * kn_sfflags style of timer, unit of measurement
1180 * kn_sdata either interval in abstime or 0 if non-repeating timer
1181 * ext[0] fire deadline in abs/cont time
1182 * (or 0 if NOTE_ABSOLUTE and deadline is in past)
1185 * EINVAL Invalid user data parameters
1187 * Called with timer filter lock held.
1190 filt_timervalidate(struct knote
*kn
)
1193 * There are 4 knobs that need to be chosen for a timer registration:
1195 * A) Units of time (what is the time duration of the specified number)
1196 * Absolute and interval take:
1197 * NOTE_SECONDS, NOTE_USECONDS, NOTE_NSECONDS, NOTE_MACHTIME
1198 * Defaults to milliseconds if not specified
1200 * B) Clock epoch (what is the zero point of the specified number)
1201 * For interval, there is none
1202 * For absolute, defaults to the gettimeofday/calendar epoch
1203 * With NOTE_MACHTIME, uses mach_absolute_time()
1204 * With NOTE_MACHTIME and NOTE_MACH_CONTINUOUS_TIME, uses mach_continuous_time()
1206 * C) The knote's behavior on delivery
1207 * Interval timer causes the knote to arm for the next interval unless one-shot is set
1208 * Absolute is a forced one-shot timer which deletes on delivery
1209 * TODO: Add a way for absolute to be not forced one-shot
1211 * D) Whether the time duration is relative to now or absolute
1212 * Interval fires at now + duration when it is set up
1213 * Absolute fires at now + difference between now walltime and passed in walltime
1214 * With NOTE_MACHTIME it fires at an absolute MAT or MCT.
1216 * E) Whether the timer continues to tick across sleep
1217 * By default all three do not.
1218 * For interval and absolute, NOTE_MACH_CONTINUOUS_TIME causes them to tick across sleep
1219 * With NOTE_ABSOLUTE | NOTE_MACHTIME | NOTE_MACH_CONTINUOUS_TIME:
1220 * expires when mach_continuous_time() is > the passed in value.
1223 filt_timer_assert_locked();
1225 uint64_t multiplier
;
1227 boolean_t use_abstime
= FALSE
;
1229 switch (kn
->kn_sfflags
& (NOTE_SECONDS
|NOTE_USECONDS
|NOTE_NSECONDS
|NOTE_MACHTIME
)) {
1231 multiplier
= NSEC_PER_SEC
;
1234 multiplier
= NSEC_PER_USEC
;
1243 case 0: /* milliseconds (default) */
1244 multiplier
= NSEC_PER_SEC
/ 1000;
1250 /* transform the leeway in kn_ext[1] to same time scale */
1251 if (kn
->kn_sfflags
& NOTE_LEEWAY
) {
1252 uint64_t leeway_abs
;
1255 leeway_abs
= (uint64_t)kn
->kn_ext
[1];
1258 if (os_mul_overflow((uint64_t)kn
->kn_ext
[1], multiplier
, &leeway_ns
))
1261 nanoseconds_to_absolutetime(leeway_ns
, &leeway_abs
);
1264 kn
->kn_ext
[1] = leeway_abs
;
1267 if (kn
->kn_sfflags
& NOTE_ABSOLUTE
) {
1268 uint64_t deadline_abs
;
1271 deadline_abs
= (uint64_t)kn
->kn_sdata
;
1273 uint64_t calendar_deadline_ns
;
1275 if (os_mul_overflow((uint64_t)kn
->kn_sdata
, multiplier
, &calendar_deadline_ns
))
1278 /* calendar_deadline_ns is in nanoseconds since the epoch */
1280 clock_sec_t seconds
;
1281 clock_nsec_t nanoseconds
;
1284 * Note that the conversion through wall-time is only done once.
1286 * If the relationship between MAT and gettimeofday changes,
1287 * the underlying timer does not update.
1289 * TODO: build a wall-time denominated timer_call queue
1290 * and a flag to request DTRTing with wall-time timers
1292 clock_get_calendar_nanotime(&seconds
, &nanoseconds
);
1294 uint64_t calendar_now_ns
= (uint64_t)seconds
* NSEC_PER_SEC
+ nanoseconds
;
1296 /* if deadline is in the future */
1297 if (calendar_now_ns
< calendar_deadline_ns
) {
1298 uint64_t interval_ns
= calendar_deadline_ns
- calendar_now_ns
;
1299 uint64_t interval_abs
;
1301 nanoseconds_to_absolutetime(interval_ns
, &interval_abs
);
1304 * Note that the NOTE_MACH_CONTINUOUS_TIME flag here only
1305 * causes the timer to keep ticking across sleep, but
1306 * it does not change the calendar timebase.
1309 if (kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
)
1310 clock_continuoustime_interval_to_deadline(interval_abs
,
1313 clock_absolutetime_interval_to_deadline(interval_abs
,
1316 deadline_abs
= 0; /* cause immediate expiration */
1320 kn
->kn_ext
[0] = deadline_abs
;
1321 kn
->kn_sdata
= 0; /* NOTE_ABSOLUTE is non-repeating */
1322 } else if (kn
->kn_sdata
< 0) {
1324 * Negative interval timers fire immediately, once.
1326 * Ideally a negative interval would be an error, but certain clients
1327 * pass negative values on accident, and expect an event back.
1329 * In the old implementation the timer would repeat with no delay
1330 * N times until mach_absolute_time() + (N * interval) underflowed,
1331 * then it would wait ~forever by accidentally arming a timer for the far future.
1333 * We now skip the power-wasting hot spin phase and go straight to the idle phase.
1336 kn
->kn_sdata
= 0; /* non-repeating */
1337 kn
->kn_ext
[0] = 0; /* expire immediately */
1339 uint64_t interval_abs
= 0;
1342 interval_abs
= (uint64_t)kn
->kn_sdata
;
1344 uint64_t interval_ns
;
1345 if (os_mul_overflow((uint64_t)kn
->kn_sdata
, multiplier
, &interval_ns
))
1348 nanoseconds_to_absolutetime(interval_ns
, &interval_abs
);
1351 uint64_t deadline
= 0;
1353 if (kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
)
1354 clock_continuoustime_interval_to_deadline(interval_abs
, &deadline
);
1356 clock_absolutetime_interval_to_deadline(interval_abs
, &deadline
);
1358 kn
->kn_sdata
= interval_abs
; /* default to a repeating timer */
1359 kn
->kn_ext
[0] = deadline
;
1369 * filt_timerexpire - the timer callout routine
1371 * Just propagate the timer event into the knote
1372 * filter routine (by going through the knote
1373 * synchronization point). Pass a hint to
1374 * indicate this is a real event, not just a
1378 filt_timerexpire(void *knx
, __unused
void *spare
)
1380 struct klist timer_list
;
1381 struct knote
*kn
= knx
;
1385 kn
->kn_hookid
&= ~TIMER_RUNNING
;
1387 /* no "object" for timers, so fake a list */
1388 SLIST_INIT(&timer_list
);
1389 SLIST_INSERT_HEAD(&timer_list
, kn
, kn_selnext
);
1391 KNOTE(&timer_list
, 1);
1393 /* if someone is waiting for timer to pop */
1394 if (kn
->kn_hookid
& TIMER_CANCELWAIT
) {
1395 struct kqueue
*kq
= knote_get_kq(kn
);
1396 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
1397 CAST_EVENT64_T(&kn
->kn_hook
),
1399 WAITQ_ALL_PRIORITIES
);
1401 kn
->kn_hookid
&= ~TIMER_CANCELWAIT
;
1408 * Cancel a running timer (or wait for the pop).
1409 * Timer filter lock is held.
1410 * May drop and retake the timer filter lock.
1413 filt_timercancel(struct knote
*kn
)
1415 filt_timer_assert_locked();
1417 assert((kn
->kn_hookid
& TIMER_CANCELWAIT
) == 0);
1419 /* if no timer, then we're good */
1420 if ((kn
->kn_hookid
& TIMER_RUNNING
) == 0)
1423 thread_call_t callout
= (thread_call_t
)kn
->kn_hook
;
1425 /* cancel the callout if we can */
1426 if (thread_call_cancel(callout
)) {
1427 kn
->kn_hookid
&= ~TIMER_RUNNING
;
1431 /* cancel failed, we have to wait for the in-flight expire routine */
1433 kn
->kn_hookid
|= TIMER_CANCELWAIT
;
1435 struct kqueue
*kq
= knote_get_kq(kn
);
1437 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
1438 CAST_EVENT64_T(&kn
->kn_hook
),
1439 THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
);
1442 thread_block(THREAD_CONTINUE_NULL
);
1445 assert((kn
->kn_hookid
& TIMER_CANCELWAIT
) == 0);
1446 assert((kn
->kn_hookid
& TIMER_RUNNING
) == 0);
1450 filt_timerarm(struct knote
*kn
)
1452 filt_timer_assert_locked();
1454 assert((kn
->kn_hookid
& TIMER_RUNNING
) == 0);
1456 thread_call_t callout
= (thread_call_t
)kn
->kn_hook
;
1458 uint64_t deadline
= kn
->kn_ext
[0];
1459 uint64_t leeway
= kn
->kn_ext
[1];
1461 int filter_flags
= kn
->kn_sfflags
;
1462 unsigned int timer_flags
= 0;
1464 if (filter_flags
& NOTE_CRITICAL
)
1465 timer_flags
|= THREAD_CALL_DELAY_USER_CRITICAL
;
1466 else if (filter_flags
& NOTE_BACKGROUND
)
1467 timer_flags
|= THREAD_CALL_DELAY_USER_BACKGROUND
;
1469 timer_flags
|= THREAD_CALL_DELAY_USER_NORMAL
;
1471 if (filter_flags
& NOTE_LEEWAY
)
1472 timer_flags
|= THREAD_CALL_DELAY_LEEWAY
;
1474 if (filter_flags
& NOTE_MACH_CONTINUOUS_TIME
)
1475 timer_flags
|= THREAD_CALL_CONTINUOUS
;
1477 thread_call_enter_delayed_with_leeway(callout
, NULL
,
1481 kn
->kn_hookid
|= TIMER_RUNNING
;
1485 * Does this knote need a timer armed for it, or should it be ready immediately?
1488 filt_timer_is_ready(struct knote
*kn
)
1492 if (kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
)
1493 now
= mach_continuous_time();
1495 now
= mach_absolute_time();
1497 uint64_t deadline
= kn
->kn_ext
[0];
1506 * Allocate a thread call for the knote's lifetime, and kick off the timer.
1509 filt_timerattach(struct knote
*kn
, __unused
struct kevent_internal_s
*kev
)
1511 thread_call_t callout
;
1514 callout
= thread_call_allocate_with_options(filt_timerexpire
,
1515 (thread_call_param_t
)kn
, THREAD_CALL_PRIORITY_HIGH
,
1516 THREAD_CALL_OPTIONS_ONCE
);
1518 if (NULL
== callout
) {
1519 kn
->kn_flags
= EV_ERROR
;
1520 kn
->kn_data
= ENOMEM
;
1526 if ((error
= filt_timervalidate(kn
)) != 0) {
1527 kn
->kn_flags
= EV_ERROR
;
1528 kn
->kn_data
= error
;
1531 __assert_only boolean_t freed
= thread_call_free(callout
);
1536 kn
->kn_hook
= (void*)callout
;
1538 kn
->kn_flags
|= EV_CLEAR
;
1540 /* NOTE_ABSOLUTE implies EV_ONESHOT */
1541 if (kn
->kn_sfflags
& NOTE_ABSOLUTE
)
1542 kn
->kn_flags
|= EV_ONESHOT
;
1544 boolean_t timer_ready
= FALSE
;
1546 if ((timer_ready
= filt_timer_is_ready(kn
))) {
1547 /* cause immediate expiration */
1559 * Shut down the timer if it's running, and free the callout.
1562 filt_timerdetach(struct knote
*kn
)
1564 thread_call_t callout
;
1568 callout
= (thread_call_t
)kn
->kn_hook
;
1569 filt_timercancel(kn
);
1573 __assert_only boolean_t freed
= thread_call_free(callout
);
1578 * filt_timerevent - post events to a timer knote
1580 * Called in the context of filt_timerexpire with
1581 * the filt_timerlock held
1584 filt_timerevent(struct knote
*kn
, __unused
long hint
)
1586 filt_timer_assert_locked();
1593 * filt_timertouch - update timer knote with new user input
1595 * Cancel and restart the timer based on new user data. When
1596 * the user picks up a knote, clear the count of how many timer
1597 * pops have gone off (in kn_data).
1602 struct kevent_internal_s
*kev
)
1609 * cancel current call - drops and retakes lock
1610 * TODO: not safe against concurrent touches?
1612 filt_timercancel(kn
);
1614 /* clear if the timer had previously fired, the user no longer wants to see it */
1617 /* capture the new values used to compute deadline */
1618 kn
->kn_sdata
= kev
->data
;
1619 kn
->kn_sfflags
= kev
->fflags
;
1620 kn
->kn_ext
[0] = kev
->ext
[0];
1621 kn
->kn_ext
[1] = kev
->ext
[1];
1623 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0)
1624 kn
->kn_udata
= kev
->udata
;
1626 /* recalculate deadline */
1627 error
= filt_timervalidate(kn
);
1629 /* no way to report error, so mark it in the knote */
1630 kn
->kn_flags
|= EV_ERROR
;
1631 kn
->kn_data
= error
;
1636 boolean_t timer_ready
= FALSE
;
1638 if ((timer_ready
= filt_timer_is_ready(kn
))) {
1639 /* cause immediate expiration */
1651 * filt_timerprocess - query state of knote and snapshot event data
1653 * Determine if the timer has fired in the past, snapshot the state
1654 * of the kevent for returning to user-space, and clear pending event
1655 * counters for the next time.
1660 __unused
struct filt_process_s
*data
,
1661 struct kevent_internal_s
*kev
)
1665 if (kn
->kn_data
== 0 || (kn
->kn_hookid
& TIMER_CANCELWAIT
)) {
1668 * The timer hasn't yet fired, so there's nothing to deliver
1670 * touch is in the middle of canceling the timer,
1671 * so don't deliver or re-arm anything
1673 * This can happen if a touch resets a timer that had fired
1674 * without being processed
1680 if (kn
->kn_sdata
!= 0 && ((kn
->kn_flags
& EV_ERROR
) == 0)) {
1682 * This is a 'repeating' timer, so we have to emit
1683 * how many intervals expired between the arm
1686 * A very strange style of interface, because
1687 * this could easily be done in the client...
1690 /* The timer better have had expired... */
1691 assert((kn
->kn_hookid
& TIMER_RUNNING
) == 0);
1695 if (kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
)
1696 now
= mach_continuous_time();
1698 now
= mach_absolute_time();
1700 uint64_t first_deadline
= kn
->kn_ext
[0];
1701 uint64_t interval_abs
= kn
->kn_sdata
;
1702 uint64_t orig_arm_time
= first_deadline
- interval_abs
;
1704 assert(now
> orig_arm_time
);
1705 assert(now
> first_deadline
);
1707 uint64_t elapsed
= now
- orig_arm_time
;
1709 uint64_t num_fired
= elapsed
/ interval_abs
;
1712 * To reach this code, we must have seen the timer pop
1713 * and be in repeating mode, so therefore it must have been
1714 * more than 'interval' time since the attach or last
1717 * An unsuccessful touch would:
1722 * all of which will prevent this code from running.
1724 assert(num_fired
> 0);
1726 /* report how many intervals have elapsed to the user */
1727 kn
->kn_data
= (int64_t) num_fired
;
1729 /* We only need to re-arm the timer if it's not about to be destroyed */
1730 if ((kn
->kn_flags
& EV_ONESHOT
) == 0) {
1731 /* fire at the end of the next interval */
1732 uint64_t new_deadline
= first_deadline
+ num_fired
* interval_abs
;
1734 assert(new_deadline
> now
);
1736 kn
->kn_ext
[0] = new_deadline
;
1743 * Copy out the interesting kevent state,
1744 * but don't leak out the raw time calculations.
1746 * TODO: potential enhancements - tell the user about:
1747 * - deadline to which this timer thought it was expiring
1748 * - return kn_sfflags in the fflags field so the client can know
1749 * under what flags the timer fired
1751 *kev
= kn
->kn_kevent
;
1753 /* kev->ext[1] = 0; JMM - shouldn't we hide this too? */
1755 /* we have delivered the event, reset the timer pop count */
1762 SECURITY_READ_ONLY_EARLY(static struct filterops
) timer_filtops
= {
1763 .f_attach
= filt_timerattach
,
1764 .f_detach
= filt_timerdetach
,
1765 .f_event
= filt_timerevent
,
1766 .f_touch
= filt_timertouch
,
1767 .f_process
= filt_timerprocess
,
1771 #pragma mark EVFILT_USER
1777 lck_spin_lock(&_filt_userlock
);
1781 filt_userunlock(void)
1783 lck_spin_unlock(&_filt_userlock
);
1787 filt_userattach(struct knote
*kn
, __unused
struct kevent_internal_s
*kev
)
1789 /* EVFILT_USER knotes are not attached to anything in the kernel */
1790 /* Cant discover this knote until after attach - so no lock needed */
1792 if (kn
->kn_sfflags
& NOTE_TRIGGER
) {
1797 return (kn
->kn_hookid
);
1801 filt_userdetach(__unused
struct knote
*kn
)
1803 /* EVFILT_USER knotes are not attached to anything in the kernel */
1808 __unused
struct knote
*kn
,
1818 struct kevent_internal_s
*kev
)
1826 ffctrl
= kev
->fflags
& NOTE_FFCTRLMASK
;
1827 fflags
= kev
->fflags
& NOTE_FFLAGSMASK
;
1832 kn
->kn_sfflags
&= fflags
;
1835 kn
->kn_sfflags
|= fflags
;
1838 kn
->kn_sfflags
= fflags
;
1841 kn
->kn_sdata
= kev
->data
;
1843 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0)
1844 kn
->kn_udata
= kev
->udata
;
1846 if (kev
->fflags
& NOTE_TRIGGER
) {
1849 active
= kn
->kn_hookid
;
1859 __unused
struct filt_process_s
*data
,
1860 struct kevent_internal_s
*kev
)
1864 if (kn
->kn_hookid
== 0) {
1869 *kev
= kn
->kn_kevent
;
1870 kev
->fflags
= (volatile UInt32
)kn
->kn_sfflags
;
1871 kev
->data
= kn
->kn_sdata
;
1872 if (kn
->kn_flags
& EV_CLEAR
) {
1882 #pragma mark EVFILT_WORKLOOP
1884 #if DEBUG || DEVELOPMENT
1886 * see src/queue_internal.h in libdispatch
1888 #define DISPATCH_QUEUE_ENQUEUED 0x1ull
1892 filt_wllock(struct kqworkloop
*kqwl
)
1894 lck_mtx_lock(&kqwl
->kqwl_statelock
);
1898 filt_wlunlock(struct kqworkloop
*kqwl
)
1900 lck_mtx_unlock(&kqwl
->kqwl_statelock
);
1904 filt_wlheld(__assert_only
struct kqworkloop
*kqwl
)
1906 LCK_MTX_ASSERT(&kqwl
->kqwl_statelock
, LCK_MTX_ASSERT_OWNED
);
1909 #define WL_OWNER_SUSPENDED ((thread_t)(~0ull)) /* special owner when suspended */
1912 filt_wlowner_is_valid(thread_t owner
)
1914 return owner
!= THREAD_NULL
&& owner
!= WL_OWNER_SUSPENDED
;
1918 filt_wlshould_end_ownership(struct kqworkloop
*kqwl
,
1919 struct kevent_internal_s
*kev
, int error
)
1921 thread_t owner
= kqwl
->kqwl_owner
;
1922 return (error
== 0 || error
== ESTALE
) &&
1923 (kev
->fflags
& NOTE_WL_END_OWNERSHIP
) &&
1924 (owner
== current_thread() || owner
== WL_OWNER_SUSPENDED
);
1928 filt_wlshould_update_ownership(struct kevent_internal_s
*kev
, int error
)
1930 return error
== 0 && (kev
->fflags
& NOTE_WL_DISCOVER_OWNER
) &&
1931 kev
->ext
[EV_EXTIDX_WL_ADDR
];
1935 filt_wlshould_set_async_qos(struct kevent_internal_s
*kev
, int error
,
1936 kq_index_t async_qos
)
1941 if (async_qos
!= THREAD_QOS_UNSPECIFIED
) {
1944 if ((kev
->fflags
& NOTE_WL_THREAD_REQUEST
) && (kev
->flags
& EV_DELETE
)) {
1945 /* see filt_wlprocess() */
1953 filt_wlupdateowner(struct kqworkloop
*kqwl
, struct kevent_internal_s
*kev
,
1954 int error
, kq_index_t async_qos
)
1956 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
1957 thread_t cur_owner
, new_owner
, extra_thread_ref
= THREAD_NULL
;
1958 kq_index_t cur_override
= THREAD_QOS_UNSPECIFIED
;
1959 kq_index_t old_owner_override
= THREAD_QOS_UNSPECIFIED
;
1960 boolean_t ipc_override_is_sync
= false;
1961 boolean_t old_owner_override_is_sync
= false;
1962 int action
= KQWL_UTQ_NONE
;
1967 * The owner is only changed under both the filt_wllock and the
1968 * kqwl_req_lock. Looking at it with either one held is fine.
1970 cur_owner
= kqwl
->kqwl_owner
;
1971 if (filt_wlshould_end_ownership(kqwl
, kev
, error
)) {
1972 new_owner
= THREAD_NULL
;
1973 } else if (filt_wlshould_update_ownership(kev
, error
)) {
1975 * Decipher the owner port name, and translate accordingly.
1976 * The low 2 bits were borrowed for other flags, so mask them off.
1978 uint64_t udata
= kev
->ext
[EV_EXTIDX_WL_VALUE
];
1979 mach_port_name_t new_owner_name
= (mach_port_name_t
)udata
& ~0x3;
1980 if (new_owner_name
!= MACH_PORT_NULL
) {
1981 new_owner_name
= ipc_entry_name_mask(new_owner_name
);
1984 if (MACH_PORT_VALID(new_owner_name
)) {
1985 new_owner
= port_name_to_thread(new_owner_name
);
1986 if (new_owner
== THREAD_NULL
)
1988 extra_thread_ref
= new_owner
;
1989 } else if (new_owner_name
== MACH_PORT_DEAD
) {
1990 new_owner
= WL_OWNER_SUSPENDED
;
1993 * We never want to learn a new owner that is NULL.
1994 * Ownership should be ended with END_OWNERSHIP.
1996 new_owner
= cur_owner
;
1999 new_owner
= cur_owner
;
2002 if (filt_wlshould_set_async_qos(kev
, error
, async_qos
)) {
2003 action
= KQWL_UTQ_SET_ASYNC_QOS
;
2005 if (cur_owner
== new_owner
&& action
== KQWL_UTQ_NONE
) {
2009 kqwl_req_lock(kqwl
);
2011 /* If already tracked as servicer, don't track as owner */
2012 if ((kqr
->kqr_state
& KQR_BOUND
) && new_owner
== kqr
->kqr_thread
) {
2013 kqwl
->kqwl_owner
= new_owner
= THREAD_NULL
;
2016 if (cur_owner
!= new_owner
) {
2017 kqwl
->kqwl_owner
= new_owner
;
2018 if (new_owner
== extra_thread_ref
) {
2019 /* we just transfered this ref to kqwl_owner */
2020 extra_thread_ref
= THREAD_NULL
;
2022 cur_override
= kqworkloop_combined_qos(kqwl
, &ipc_override_is_sync
);
2023 old_owner_override
= kqr
->kqr_dsync_owner_qos
;
2024 old_owner_override_is_sync
= kqr
->kqr_owner_override_is_sync
;
2026 if (filt_wlowner_is_valid(new_owner
)) {
2027 /* override it before we drop the old */
2028 if (cur_override
!= THREAD_QOS_UNSPECIFIED
) {
2029 thread_add_ipc_override(new_owner
, cur_override
);
2031 if (ipc_override_is_sync
) {
2032 thread_add_sync_ipc_override(new_owner
);
2034 /* Update the kqr to indicate that owner has sync ipc override */
2035 kqr
->kqr_dsync_owner_qos
= cur_override
;
2036 kqr
->kqr_owner_override_is_sync
= ipc_override_is_sync
;
2037 thread_starts_owning_workloop(new_owner
);
2038 if ((kqr
->kqr_state
& (KQR_THREQUESTED
| KQR_BOUND
)) == KQR_THREQUESTED
) {
2039 if (action
== KQWL_UTQ_NONE
) {
2040 action
= KQWL_UTQ_REDRIVE_EVENTS
;
2043 } else if (new_owner
== THREAD_NULL
) {
2044 kqr
->kqr_dsync_owner_qos
= THREAD_QOS_UNSPECIFIED
;
2045 kqr
->kqr_owner_override_is_sync
= false;
2046 if ((kqr
->kqr_state
& (KQR_THREQUESTED
| KQR_WAKEUP
)) == KQR_WAKEUP
) {
2047 if (action
== KQWL_UTQ_NONE
) {
2048 action
= KQWL_UTQ_REDRIVE_EVENTS
;
2054 if (action
!= KQWL_UTQ_NONE
) {
2055 kqworkloop_update_threads_qos(kqwl
, action
, async_qos
);
2058 kqwl_req_unlock(kqwl
);
2060 /* Now that we are unlocked, drop the override and ref on old owner */
2061 if (new_owner
!= cur_owner
&& filt_wlowner_is_valid(cur_owner
)) {
2062 if (old_owner_override
!= THREAD_QOS_UNSPECIFIED
) {
2063 thread_drop_ipc_override(cur_owner
);
2065 if (old_owner_override_is_sync
) {
2066 thread_drop_sync_ipc_override(cur_owner
);
2068 thread_ends_owning_workloop(cur_owner
);
2069 thread_deallocate(cur_owner
);
2073 if (extra_thread_ref
) {
2074 thread_deallocate(extra_thread_ref
);
2081 struct kqworkloop
*kqwl
,
2082 struct kevent_internal_s
*kev
,
2085 user_addr_t addr
= CAST_USER_ADDR_T(kev
->ext
[EV_EXTIDX_WL_ADDR
]);
2089 /* we must have the workloop state mutex held */
2092 /* Do we have a debounce address to work with? */
2094 uint64_t kdata
= kev
->ext
[EV_EXTIDX_WL_VALUE
];
2095 uint64_t mask
= kev
->ext
[EV_EXTIDX_WL_MASK
];
2097 error
= copyin_word(addr
, &udata
, sizeof(udata
));
2102 /* update state as copied in */
2103 kev
->ext
[EV_EXTIDX_WL_VALUE
] = udata
;
2105 /* If the masked bits don't match, reject it as stale */
2106 if ((udata
& mask
) != (kdata
& mask
)) {
2110 #if DEBUG || DEVELOPMENT
2111 if ((kev
->fflags
& NOTE_WL_THREAD_REQUEST
) && !(kev
->flags
& EV_DELETE
)) {
2112 if ((udata
& DISPATCH_QUEUE_ENQUEUED
) == 0 &&
2113 (udata
>> 48) != 0 && (udata
>> 48) != 0xffff) {
2114 panic("kevent: workloop %#016llx is not enqueued "
2115 "(kev:%p dq_state:%#016llx)", kev
->udata
, kev
, udata
);
2121 return default_result
;
2125 * Remembers the last updated that came in from userspace for debugging reasons.
2126 * - fflags is mirrored from the userspace kevent
2127 * - ext[i, i != VALUE] is mirrored from the userspace kevent
2128 * - ext[VALUE] is set to what the kernel loaded atomically
2129 * - data is set to the error if any
2132 filt_wlremember_last_update(
2133 __assert_only
struct kqworkloop
*kqwl
,
2135 struct kevent_internal_s
*kev
,
2139 kn
->kn_fflags
= kev
->fflags
;
2140 kn
->kn_data
= error
;
2141 memcpy(kn
->kn_ext
, kev
->ext
, sizeof(kev
->ext
));
2145 * Return which operations on EVFILT_WORKLOOP need to be protected against
2146 * knoteusewait() causing priority inversions.
2149 filt_wlneeds_boost(struct kevent_internal_s
*kev
)
2153 * this is an f_process() usecount, and it can cause a drop to wait
2157 if (kev
->fflags
& NOTE_WL_THREAD_REQUEST
) {
2159 * All operations on thread requests may starve drops or re-attach of
2160 * the same knote, all of them need boosts. None of what we do under
2161 * thread-request usecount holds blocks anyway.
2165 if (kev
->fflags
& NOTE_WL_SYNC_WAIT
) {
2167 * this may call filt_wlwait() and we don't want to hold any boost when
2168 * woken up, this would cause background threads contending on
2169 * dispatch_sync() to wake up at 64 and be preempted immediately when
2176 * SYNC_WAIT knotes when deleted don't need to be rushed, there's no
2177 * detach/reattach race with these ever. In addition to this, when the
2178 * SYNC_WAIT knote is dropped, the caller is no longer receiving the
2179 * workloop overrides if any, and we'd rather schedule other threads than
2180 * him, he's not possibly stalling anything anymore.
2182 return (kev
->flags
& EV_DELETE
) == 0;
2186 filt_wlattach(struct knote
*kn
, struct kevent_internal_s
*kev
)
2188 struct kqueue
*kq
= knote_get_kq(kn
);
2189 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2191 kq_index_t qos_index
= 0;
2193 if ((kq
->kq_state
& KQ_WORKLOOP
) == 0) {
2198 #if DEVELOPMENT || DEBUG
2199 if (kev
->ident
== 0 && kev
->udata
== 0 && kev
->fflags
== 0) {
2200 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
2202 kqwl_req_lock(kqwl
);
2204 if (kqr
->kqr_dsync_waiters
) {
2205 kev
->fflags
|= NOTE_WL_SYNC_WAIT
;
2207 if (kqr
->kqr_qos_index
) {
2208 kev
->fflags
|= NOTE_WL_THREAD_REQUEST
;
2210 if (kqwl
->kqwl_owner
== WL_OWNER_SUSPENDED
) {
2211 kev
->ext
[0] = ~0ull;
2213 kev
->ext
[0] = thread_tid(kqwl
->kqwl_owner
);
2215 kev
->ext
[1] = thread_tid(kqwl
->kqwl_request
.kqr_thread
);
2216 kev
->ext
[2] = thread_owned_workloops_count(current_thread());
2217 kev
->ext
[3] = kn
->kn_kevent
.ext
[3];
2218 kqwl_req_unlock(kqwl
);
2224 /* Some simple validation */
2225 int command
= (kn
->kn_sfflags
& NOTE_WL_COMMANDS_MASK
);
2227 case NOTE_WL_THREAD_REQUEST
:
2228 if (kn
->kn_id
!= kqwl
->kqwl_dynamicid
) {
2232 qos_index
= qos_index_from_qos(kn
, kn
->kn_qos
, FALSE
);
2233 if (qos_index
< THREAD_QOS_MAINTENANCE
||
2234 qos_index
> THREAD_QOS_USER_INTERACTIVE
) {
2239 case NOTE_WL_SYNC_WAIT
:
2240 case NOTE_WL_SYNC_WAKE
:
2241 if (kq
->kq_state
& KQ_NO_WQ_THREAD
) {
2245 if (kn
->kn_id
== kqwl
->kqwl_dynamicid
) {
2249 if ((kn
->kn_flags
& EV_DISABLE
) == 0) {
2253 if (kn
->kn_sfflags
& NOTE_WL_END_OWNERSHIP
) {
2266 if (command
== NOTE_WL_THREAD_REQUEST
&& kqwl
->kqwl_request
.kqr_qos_index
) {
2268 * There already is a thread request, and well, you're only allowed
2269 * one per workloop, so fail the attach.
2271 * Note: kqr_qos_index is always set with the wllock held, so we
2272 * don't need to take the kqr lock.
2276 /* Make sure user and kernel are in agreement on important state */
2277 error
= filt_wldebounce(kqwl
, kev
, 0);
2280 error
= filt_wlupdateowner(kqwl
, kev
, error
, qos_index
);
2281 filt_wlunlock(kqwl
);
2284 kn
->kn_flags
|= EV_ERROR
;
2285 /* If userland wants ESTALE to be hidden, fail the attach anyway */
2286 if (error
== ESTALE
&& (kn
->kn_sfflags
& NOTE_WL_IGNORE_ESTALE
)) {
2289 kn
->kn_data
= error
;
2293 /* Just attaching the thread request successfully will fire it */
2294 return command
== NOTE_WL_THREAD_REQUEST
;
2297 __attribute__((noinline
,not_tail_called
))
2299 filt_wlwait(struct kqworkloop
*kqwl
,
2301 struct kevent_internal_s
*kev
)
2304 assert((kn
->kn_sfflags
& NOTE_WL_SYNC_WAKE
) == 0);
2307 * Hint to the wakeup side that this thread is waiting. Also used by
2308 * stackshot for waitinfo.
2310 kn
->kn_hook
= current_thread();
2312 thread_set_pending_block_hint(current_thread(), kThreadWaitWorkloopSyncWait
);
2314 wait_result_t wr
= assert_wait(kn
, THREAD_ABORTSAFE
);
2316 if (wr
== THREAD_WAITING
) {
2317 kq_index_t qos_index
= qos_index_from_qos(kn
, kev
->qos
, TRUE
);
2318 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
2320 thread_t thread_to_handoff
= THREAD_NULL
; /* holds +1 thread ref */
2322 thread_t kqwl_owner
= kqwl
->kqwl_owner
;
2323 if (filt_wlowner_is_valid(kqwl_owner
)) {
2324 thread_reference(kqwl_owner
);
2325 thread_to_handoff
= kqwl_owner
;
2328 kqwl_req_lock(kqwl
);
2331 assert(kqr
->kqr_dsync_waiters
< UINT16_MAX
);
2332 kqr
->kqr_dsync_waiters
++;
2333 if (qos_index
> kqr
->kqr_dsync_waiters_qos
) {
2334 kqworkloop_update_threads_qos(kqwl
,
2335 KQWL_UTQ_SET_SYNC_WAITERS_QOS
, qos_index
);
2339 if ((kqr
->kqr_state
& KQR_BOUND
) && thread_to_handoff
== THREAD_NULL
) {
2340 assert(kqr
->kqr_thread
!= THREAD_NULL
);
2341 thread_t servicer
= kqr
->kqr_thread
;
2343 thread_reference(servicer
);
2344 thread_to_handoff
= servicer
;
2347 kqwl_req_unlock(kqwl
);
2349 filt_wlunlock(kqwl
);
2351 /* TODO: use continuation based blocking <rdar://problem/31299584> */
2353 /* consume a refcount on thread_to_handoff, then thread_block() */
2354 wr
= thread_handoff(thread_to_handoff
);
2355 thread_to_handoff
= THREAD_NULL
;
2359 /* clear waiting state (only one waiting thread - so no race) */
2360 assert(kn
->kn_hook
== current_thread());
2363 kqwl_req_lock(kqwl
);
2364 assert(kqr
->kqr_dsync_waiters
> 0);
2365 if (--kqr
->kqr_dsync_waiters
== 0) {
2366 assert(kqr
->kqr_dsync_waiters_qos
);
2367 kqworkloop_update_threads_qos(kqwl
,
2368 KQWL_UTQ_SET_SYNC_WAITERS_QOS
, 0);
2370 kqwl_req_unlock(kqwl
);
2377 case THREAD_AWAKENED
:
2379 case THREAD_INTERRUPTED
:
2381 case THREAD_RESTART
:
2384 panic("filt_wlattach: unexpected wait result %d", wr
);
2389 /* called in stackshot context to report the thread responsible for blocking this thread */
2391 kdp_workloop_sync_wait_find_owner(__assert_only thread_t thread
,
2393 thread_waitinfo_t
*waitinfo
)
2395 struct knote
*kn
= (struct knote
*) event
;
2396 assert(kdp_is_in_zone(kn
, "knote zone"));
2398 assert(kn
->kn_hook
== thread
);
2400 struct kqueue
*kq
= knote_get_kq(kn
);
2401 assert(kdp_is_in_zone(kq
, "kqueue workloop zone"));
2402 assert(kq
->kq_state
& KQ_WORKLOOP
);
2404 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2405 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
2407 thread_t kqwl_owner
= kqwl
->kqwl_owner
;
2408 thread_t servicer
= kqr
->kqr_thread
;
2410 if (kqwl_owner
== WL_OWNER_SUSPENDED
) {
2411 waitinfo
->owner
= STACKSHOT_WAITOWNER_SUSPENDED
;
2412 } else if (kqwl_owner
!= THREAD_NULL
) {
2413 assert(kdp_is_in_zone(kqwl_owner
, "threads"));
2415 waitinfo
->owner
= thread_tid(kqwl
->kqwl_owner
);
2416 } else if (servicer
!= THREAD_NULL
) {
2417 assert(kdp_is_in_zone(servicer
, "threads"));
2419 waitinfo
->owner
= thread_tid(servicer
);
2420 } else if (kqr
->kqr_state
& KQR_THREQUESTED
) {
2421 waitinfo
->owner
= STACKSHOT_WAITOWNER_THREQUESTED
;
2423 waitinfo
->owner
= 0;
2426 waitinfo
->context
= kqwl
->kqwl_dynamicid
;
2432 * Takes kqueue locked, returns locked, may drop in the middle and/or block for a while
2435 filt_wlpost_attach(struct knote
*kn
, struct kevent_internal_s
*kev
)
2437 struct kqueue
*kq
= knote_get_kq(kn
);
2438 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2441 if (kev
->fflags
& NOTE_WL_SYNC_WAIT
) {
2442 if (kqlock2knoteuse(kq
, kn
, KNUSE_NONE
)) {
2444 /* if the wake has already preposted, don't wait */
2445 if ((kn
->kn_sfflags
& NOTE_WL_SYNC_WAKE
) == 0)
2446 error
= filt_wlwait(kqwl
, kn
, kev
);
2447 filt_wlunlock(kqwl
);
2448 knoteuse2kqlock(kq
, kn
, KNUSE_NONE
);
2455 filt_wldetach(__assert_only
struct knote
*kn
)
2457 assert(knote_get_kq(kn
)->kq_state
& KQ_WORKLOOP
);
2460 * Thread requests have nothing to detach.
2461 * Sync waiters should have been aborted out
2462 * and drop their refs before we could drop/
2463 * detach their knotes.
2465 assert(kn
->kn_hook
== NULL
);
2470 __unused
struct knote
*kn
,
2473 panic("filt_wlevent");
2478 filt_wlvalidate_kev_flags(struct knote
*kn
, struct kevent_internal_s
*kev
)
2480 int new_commands
= kev
->fflags
& NOTE_WL_COMMANDS_MASK
;
2481 int sav_commands
= kn
->kn_sfflags
& NOTE_WL_COMMANDS_MASK
;
2484 switch (new_commands
) {
2485 case NOTE_WL_THREAD_REQUEST
:
2486 /* thread requests can only update themselves */
2487 if (sav_commands
!= new_commands
)
2491 case NOTE_WL_SYNC_WAIT
:
2492 if (kev
->fflags
& NOTE_WL_END_OWNERSHIP
)
2495 case NOTE_WL_SYNC_WAKE
:
2496 /* waits and wakes can update themselves or their counterparts */
2497 if (!(sav_commands
& (NOTE_WL_SYNC_WAIT
| NOTE_WL_SYNC_WAKE
)))
2499 if (kev
->fflags
& NOTE_WL_UPDATE_QOS
)
2501 if ((kev
->flags
& (EV_ENABLE
| EV_DELETE
)) == EV_ENABLE
)
2503 if (kev
->flags
& EV_DELETE
) {
2505 * Really this is not supported: there is absolutely no reason
2506 * whatsoever to want to fail the drop of a NOTE_WL_SYNC_WAIT knote.
2508 if (kev
->ext
[EV_EXTIDX_WL_ADDR
] && kev
->ext
[EV_EXTIDX_WL_MASK
]) {
2517 if ((kev
->flags
& EV_DELETE
) && (kev
->fflags
& NOTE_WL_DISCOVER_OWNER
)) {
2526 struct kevent_internal_s
*kev
)
2528 struct kqueue
*kq
= knote_get_kq(kn
);
2530 struct kqworkloop
*kqwl
;
2532 assert(kq
->kq_state
& KQ_WORKLOOP
);
2533 kqwl
= (struct kqworkloop
*)kq
;
2535 error
= filt_wlvalidate_kev_flags(kn
, kev
);
2542 /* Make sure user and kernel are in agreement on important state */
2543 error
= filt_wldebounce(kqwl
, kev
, 0);
2545 error
= filt_wlupdateowner(kqwl
, kev
, error
, 0);
2549 int new_command
= kev
->fflags
& NOTE_WL_COMMANDS_MASK
;
2550 switch (new_command
) {
2551 case NOTE_WL_THREAD_REQUEST
:
2552 assert(kqwl
->kqwl_request
.kqr_qos_index
!= THREAD_QOS_UNSPECIFIED
);
2555 case NOTE_WL_SYNC_WAIT
:
2557 * we need to allow waiting several times on the same knote because
2558 * of EINTR. If it's already woken though, it won't block.
2562 case NOTE_WL_SYNC_WAKE
:
2563 if (kn
->kn_sfflags
& NOTE_WL_SYNC_WAKE
) {
2564 /* disallow waking the same knote twice */
2569 thread_wakeup_thread((event_t
)kn
, (thread_t
)kn
->kn_hook
);
2579 * Save off any additional fflags/data we just accepted
2580 * But only keep the last round of "update" bits we acted on which helps
2583 kn
->kn_sfflags
&= ~NOTE_WL_UPDATES_MASK
;
2584 kn
->kn_sfflags
|= kev
->fflags
;
2585 kn
->kn_sdata
= kev
->data
;
2587 kq_index_t qos_index
= THREAD_QOS_UNSPECIFIED
;
2589 if (kev
->fflags
& NOTE_WL_UPDATE_QOS
) {
2590 qos_t qos
= pthread_priority_canonicalize(kev
->qos
, FALSE
);
2592 if (kn
->kn_qos
!= qos
) {
2593 qos_index
= qos_index_from_qos(kn
, qos
, FALSE
);
2594 if (qos_index
== THREAD_QOS_UNSPECIFIED
) {
2599 if (kn
->kn_status
& KN_QUEUED
) {
2601 knote_set_qos_index(kn
, qos_index
);
2605 knote_set_qos_index(kn
, qos_index
);
2612 error
= filt_wlupdateowner(kqwl
, kev
, 0, qos_index
);
2617 if (new_command
== NOTE_WL_SYNC_WAIT
) {
2618 /* if the wake has already preposted, don't wait */
2619 if ((kn
->kn_sfflags
& NOTE_WL_SYNC_WAKE
) == 0)
2620 error
= filt_wlwait(kqwl
, kn
, kev
);
2624 filt_wlremember_last_update(kqwl
, kn
, kev
, error
);
2625 filt_wlunlock(kqwl
);
2628 if (error
== ESTALE
&& (kev
->fflags
& NOTE_WL_IGNORE_ESTALE
)) {
2629 /* If userland wants ESTALE to be hidden, do not activate */
2632 kev
->flags
|= EV_ERROR
;
2636 /* Just touching the thread request successfully will fire it */
2637 return new_command
== NOTE_WL_THREAD_REQUEST
;
2641 filt_wldrop_and_unlock(
2643 struct kevent_internal_s
*kev
)
2645 struct kqueue
*kq
= knote_get_kq(kn
);
2646 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2647 int error
= 0, knoteuse_flags
= KNUSE_NONE
;
2651 assert(kev
->flags
& EV_DELETE
);
2652 assert(kq
->kq_state
& KQ_WORKLOOP
);
2654 error
= filt_wlvalidate_kev_flags(kn
, kev
);
2659 if (kn
->kn_sfflags
& NOTE_WL_THREAD_REQUEST
) {
2660 knoteuse_flags
|= KNUSE_BOOST
;
2663 /* take a usecount to allow taking the filt_wllock */
2664 if (!kqlock2knoteuse(kq
, kn
, knoteuse_flags
)) {
2665 /* knote is being dropped already */
2666 error
= EINPROGRESS
;
2673 * Make sure user and kernel are in agreement on important state
2675 * Userland will modify bits to cause this to fail for the touch / drop
2676 * race case (when a drop for a thread request quiescing comes in late after
2677 * the workloop has been woken up again).
2679 error
= filt_wldebounce(kqwl
, kev
, 0);
2681 if (!knoteuse2kqlock(kq
, kn
, knoteuse_flags
)) {
2682 /* knote is no longer alive */
2683 error
= EINPROGRESS
;
2687 if (!error
&& (kn
->kn_sfflags
& NOTE_WL_THREAD_REQUEST
) && kn
->kn_inuse
) {
2689 * There is a concurrent drop or touch happening, we can't resolve this,
2690 * userland has to redrive.
2692 * The race we're worried about here is the following:
2694 * f_touch | f_drop_and_unlock
2695 * ------------------------+--------------------------------------------
2697 * | kqlock2knoteuse()
2699 * | debounces successfully
2702 * filt_wllock() <BLOCKS> |
2703 * | knoteuse2kqlock()
2705 * | kqlock2knotedrop() <BLOCKS, WAKES f_touch>
2706 * debounces successfully |
2708 * caller WAKES f_drop |
2709 * | performs drop, but f_touch should have won
2711 * So if the usecount is not 0 here, we need to wait for it to drop and
2712 * redrive the whole logic (including looking up the knote again).
2714 filt_wlunlock(kqwl
);
2715 knoteusewait(kq
, kn
);
2720 * If error is 0 this will set kqr_qos_index to THREAD_QOS_UNSPECIFIED
2722 * If error is 0 or ESTALE this may drop ownership and cause a thread
2723 * request redrive, however the kqlock is held which prevents f_process() to
2724 * run until we did the drop for real.
2726 error
= filt_wlupdateowner(kqwl
, kev
, error
, 0);
2731 if ((kn
->kn_sfflags
& (NOTE_WL_SYNC_WAIT
| NOTE_WL_SYNC_WAKE
)) ==
2732 NOTE_WL_SYNC_WAIT
) {
2734 * When deleting a SYNC_WAIT knote that hasn't been woken up
2735 * explicitly, issue a wake up.
2737 kn
->kn_sfflags
|= NOTE_WL_SYNC_WAKE
;
2739 thread_wakeup_thread((event_t
)kn
, (thread_t
)kn
->kn_hook
);
2744 filt_wlremember_last_update(kqwl
, kn
, kev
, error
);
2745 filt_wlunlock(kqwl
);
2749 /* If nothing failed, do the regular knote drop. */
2750 if (kqlock2knotedrop(kq
, kn
)) {
2751 knote_drop(kn
, current_proc());
2753 error
= EINPROGRESS
;
2758 if (error
== ESTALE
&& (kev
->fflags
& NOTE_WL_IGNORE_ESTALE
)) {
2761 if (error
== EINPROGRESS
) {
2763 * filt_wlprocess() makes sure that no event can be delivered for
2764 * NOTE_WL_THREAD_REQUEST knotes once a drop is happening, and
2765 * NOTE_WL_SYNC_* knotes are never fired.
2767 * It means that EINPROGRESS is about a state that userland cannot
2768 * observe for this filter (an event being delivered concurrently from
2769 * a drop), so silence the error.
2779 __unused
struct filt_process_s
*data
,
2780 struct kevent_internal_s
*kev
)
2782 struct kqueue
*kq
= knote_get_kq(kn
);
2783 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2784 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
2787 assert(kq
->kq_state
& KQ_WORKLOOP
);
2789 /* only thread requests should get here */
2790 assert(kn
->kn_sfflags
& NOTE_WL_THREAD_REQUEST
);
2791 if (kn
->kn_sfflags
& NOTE_WL_THREAD_REQUEST
) {
2793 assert(kqr
->kqr_qos_index
!= THREAD_QOS_UNSPECIFIED
);
2794 if (kqwl
->kqwl_owner
) {
2796 * <rdar://problem/33584321> userspace sometimes due to events being
2797 * delivered but not triggering a drain session can cause a process
2798 * of the thread request knote.
2800 * When that happens, the automatic deactivation due to process
2801 * would swallow the event, so we have to activate the knote again.
2806 } else if (kqr
->kqr_qos_index
) {
2807 #if DEBUG || DEVELOPMENT
2808 user_addr_t addr
= CAST_USER_ADDR_T(kn
->kn_ext
[EV_EXTIDX_WL_ADDR
]);
2809 task_t t
= current_task();
2811 if (addr
&& task_is_active(t
) && !task_is_halting(t
) &&
2812 copyin_word(addr
, &val
, sizeof(val
)) == 0 &&
2813 val
&& (val
& DISPATCH_QUEUE_ENQUEUED
) == 0 &&
2814 (val
>> 48) != 0 && (val
>> 48) != 0xffff) {
2815 panic("kevent: workloop %#016llx is not enqueued "
2816 "(kn:%p dq_state:%#016llx kev.dq_state:%#016llx)",
2817 kn
->kn_udata
, kn
, val
,
2818 kn
->kn_ext
[EV_EXTIDX_WL_VALUE
]);
2821 *kev
= kn
->kn_kevent
;
2822 kev
->fflags
= kn
->kn_sfflags
;
2823 kev
->data
= kn
->kn_sdata
;
2824 kev
->qos
= kn
->kn_qos
;
2827 filt_wlunlock(kqwl
);
2832 #pragma mark kevent / knotes
2835 * JMM - placeholder for not-yet-implemented filters
2838 filt_badattach(__unused
struct knote
*kn
, __unused
struct kevent_internal_s
*kev
)
2840 kn
->kn_flags
|= EV_ERROR
;
2841 kn
->kn_data
= ENOTSUP
;
2846 kqueue_alloc(struct proc
*p
, unsigned int flags
)
2848 struct filedesc
*fdp
= p
->p_fd
;
2849 struct kqueue
*kq
= NULL
;
2852 uint64_t kq_addr_offset
;
2854 if (flags
& KEVENT_FLAG_WORKQ
) {
2855 struct kqworkq
*kqwq
;
2858 kqwq
= (struct kqworkq
*)zalloc(kqworkq_zone
);
2862 kq
= &kqwq
->kqwq_kqueue
;
2863 bzero(kqwq
, sizeof (struct kqworkq
));
2865 kqwq
->kqwq_state
= KQ_WORKQ
;
2867 for (i
= 0; i
< KQWQ_NBUCKETS
; i
++) {
2868 TAILQ_INIT(&kq
->kq_queue
[i
]);
2870 for (i
= 0; i
< KQWQ_NQOS
; i
++) {
2871 kqwq
->kqwq_request
[i
].kqr_qos_index
= i
;
2874 lck_spin_init(&kqwq
->kqwq_reqlock
, kq_lck_grp
, kq_lck_attr
);
2875 policy
= SYNC_POLICY_FIFO
;
2876 hook
= (void *)kqwq
;
2878 } else if (flags
& KEVENT_FLAG_WORKLOOP
) {
2879 struct kqworkloop
*kqwl
;
2882 kqwl
= (struct kqworkloop
*)zalloc(kqworkloop_zone
);
2886 bzero(kqwl
, sizeof (struct kqworkloop
));
2888 kqwl
->kqwl_state
= KQ_WORKLOOP
| KQ_DYNAMIC
;
2889 kqwl
->kqwl_retains
= 1; /* donate a retain to creator */
2891 kq
= &kqwl
->kqwl_kqueue
;
2892 for (i
= 0; i
< KQWL_NBUCKETS
; i
++) {
2893 TAILQ_INIT(&kq
->kq_queue
[i
]);
2895 TAILQ_INIT(&kqwl
->kqwl_request
.kqr_suppressed
);
2897 lck_spin_init(&kqwl
->kqwl_reqlock
, kq_lck_grp
, kq_lck_attr
);
2898 lck_mtx_init(&kqwl
->kqwl_statelock
, kq_lck_grp
, kq_lck_attr
);
2900 policy
= SYNC_POLICY_FIFO
;
2901 if (flags
& KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD
) {
2902 policy
|= SYNC_POLICY_PREPOST
;
2903 kq
->kq_state
|= KQ_NO_WQ_THREAD
;
2905 hook
= (void *)kqwl
;
2911 kqf
= (struct kqfile
*)zalloc(kqfile_zone
);
2915 kq
= &kqf
->kqf_kqueue
;
2916 bzero(kqf
, sizeof (struct kqfile
));
2917 TAILQ_INIT(&kq
->kq_queue
[0]);
2918 TAILQ_INIT(&kqf
->kqf_suppressed
);
2920 policy
= SYNC_POLICY_FIFO
| SYNC_POLICY_PREPOST
;
2923 waitq_set_init(&kq
->kq_wqs
, policy
, NULL
, hook
);
2924 lck_spin_init(&kq
->kq_lock
, kq_lck_grp
, kq_lck_attr
);
2927 if (fdp
->fd_knlistsize
< 0) {
2929 if (fdp
->fd_knlistsize
< 0)
2930 fdp
->fd_knlistsize
= 0; /* this process has had a kq */
2934 kq_addr_offset
= ((uintptr_t)kq
- (uintptr_t)VM_MIN_KERNEL_AND_KEXT_ADDRESS
);
2935 /* Assert that the address can be pointer compacted for use with knote */
2936 assert(kq_addr_offset
< (uint64_t)(1ull << KNOTE_KQ_BITSIZE
));
2941 * knotes_dealloc - detach all knotes for the process and drop them
2943 * Called with proc_fdlock held.
2944 * Returns with it locked.
2945 * May drop it temporarily.
2946 * Process is in such a state that it will not try to allocate
2947 * any more knotes during this process (stopped for exit or exec).
2950 knotes_dealloc(proc_t p
)
2952 struct filedesc
*fdp
= p
->p_fd
;
2955 struct klist
*kn_hash
= NULL
;
2958 /* Close all the fd-indexed knotes up front */
2959 if (fdp
->fd_knlistsize
> 0) {
2960 for (i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
2961 while ((kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
])) != NULL
) {
2962 kq
= knote_get_kq(kn
);
2965 /* drop it ourselves or wait */
2966 if (kqlock2knotedrop(kq
, kn
)) {
2972 /* free the table */
2973 FREE(fdp
->fd_knlist
, M_KQUEUE
);
2974 fdp
->fd_knlist
= NULL
;
2976 fdp
->fd_knlistsize
= -1;
2981 /* Clean out all the hashed knotes as well */
2982 if (fdp
->fd_knhashmask
!= 0) {
2983 for (i
= 0; i
<= (int)fdp
->fd_knhashmask
; i
++) {
2984 while ((kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
])) != NULL
) {
2985 kq
= knote_get_kq(kn
);
2988 /* drop it ourselves or wait */
2989 if (kqlock2knotedrop(kq
, kn
)) {
2995 kn_hash
= fdp
->fd_knhash
;
2996 fdp
->fd_knhashmask
= 0;
2997 fdp
->fd_knhash
= NULL
;
3002 /* free the kn_hash table */
3004 FREE(kn_hash
, M_KQUEUE
);
3011 * kqueue_dealloc - detach all knotes from a kqueue and free it
3013 * We walk each list looking for knotes referencing this
3014 * this kqueue. If we find one, we try to drop it. But
3015 * if we fail to get a drop reference, that will wait
3016 * until it is dropped. So, we can just restart again
3017 * safe in the assumption that the list will eventually
3018 * not contain any more references to this kqueue (either
3019 * we dropped them all, or someone else did).
3021 * Assumes no new events are being added to the kqueue.
3022 * Nothing locked on entry or exit.
3024 * Workloop kqueues cant get here unless all the knotes
3025 * are already gone and all requested threads have come
3026 * and gone (cancelled or arrived).
3029 kqueue_dealloc(struct kqueue
*kq
)
3032 struct filedesc
*fdp
;
3043 for (i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
3044 kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
]);
3045 while (kn
!= NULL
) {
3046 if (kq
== knote_get_kq(kn
)) {
3047 assert((kq
->kq_state
& KQ_WORKLOOP
) == 0);
3050 /* drop it ourselves or wait */
3051 if (kqlock2knotedrop(kq
, kn
)) {
3055 /* start over at beginning of list */
3056 kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
]);
3059 kn
= SLIST_NEXT(kn
, kn_link
);
3065 if (fdp
->fd_knhashmask
!= 0) {
3066 for (i
= 0; i
< (int)fdp
->fd_knhashmask
+ 1; i
++) {
3067 kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
]);
3068 while (kn
!= NULL
) {
3069 if (kq
== knote_get_kq(kn
)) {
3070 assert((kq
->kq_state
& KQ_WORKLOOP
) == 0);
3073 /* drop it ourselves or wait */
3074 if (kqlock2knotedrop(kq
, kn
)) {
3078 /* start over at beginning of list */
3079 kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
]);
3082 kn
= SLIST_NEXT(kn
, kn_link
);
3088 if (kq
->kq_state
& KQ_WORKLOOP
) {
3089 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3090 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
3091 thread_t cur_owner
= kqwl
->kqwl_owner
;
3093 assert(TAILQ_EMPTY(&kqwl
->kqwl_request
.kqr_suppressed
));
3094 if (filt_wlowner_is_valid(cur_owner
)) {
3096 * If the kqueue had an owner that prevented the thread request to
3097 * go through, then no unbind happened, and we may have lingering
3098 * overrides to drop.
3100 if (kqr
->kqr_dsync_owner_qos
!= THREAD_QOS_UNSPECIFIED
) {
3101 thread_drop_ipc_override(cur_owner
);
3102 kqr
->kqr_dsync_owner_qos
= THREAD_QOS_UNSPECIFIED
;
3105 if (kqr
->kqr_owner_override_is_sync
) {
3106 thread_drop_sync_ipc_override(cur_owner
);
3107 kqr
->kqr_owner_override_is_sync
= 0;
3109 thread_ends_owning_workloop(cur_owner
);
3110 thread_deallocate(cur_owner
);
3111 kqwl
->kqwl_owner
= THREAD_NULL
;
3116 * waitq_set_deinit() remove the KQ's waitq set from
3117 * any select sets to which it may belong.
3119 waitq_set_deinit(&kq
->kq_wqs
);
3120 lck_spin_destroy(&kq
->kq_lock
, kq_lck_grp
);
3122 if (kq
->kq_state
& KQ_WORKQ
) {
3123 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
3125 lck_spin_destroy(&kqwq
->kqwq_reqlock
, kq_lck_grp
);
3126 zfree(kqworkq_zone
, kqwq
);
3127 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
3128 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3130 assert(kqwl
->kqwl_retains
== 0);
3131 lck_spin_destroy(&kqwl
->kqwl_reqlock
, kq_lck_grp
);
3132 lck_mtx_destroy(&kqwl
->kqwl_statelock
, kq_lck_grp
);
3133 zfree(kqworkloop_zone
, kqwl
);
3135 struct kqfile
*kqf
= (struct kqfile
*)kq
;
3137 zfree(kqfile_zone
, kqf
);
3142 kqueue_retain(struct kqueue
*kq
)
3144 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3147 if ((kq
->kq_state
& KQ_DYNAMIC
) == 0)
3150 previous
= OSIncrementAtomic(&kqwl
->kqwl_retains
);
3151 if (previous
== KQ_WORKLOOP_RETAINS_MAX
)
3152 panic("kq(%p) retain overflow", kq
);
3155 panic("kq(%p) resurrection", kq
);
3158 #define KQUEUE_CANT_BE_LAST_REF 0
3159 #define KQUEUE_MIGHT_BE_LAST_REF 1
3162 kqueue_release(struct kqueue
*kq
, __assert_only
int possibly_last
)
3164 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3166 if ((kq
->kq_state
& KQ_DYNAMIC
) == 0) {
3170 assert(kq
->kq_state
& KQ_WORKLOOP
); /* for now */
3171 uint32_t refs
= OSDecrementAtomic(&kqwl
->kqwl_retains
);
3172 if (__improbable(refs
== 0)) {
3173 panic("kq(%p) over-release", kq
);
3176 assert(possibly_last
);
3182 kqueue_body(struct proc
*p
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
)
3185 struct fileproc
*fp
;
3188 error
= falloc_withalloc(p
,
3189 &fp
, &fd
, vfs_context_current(), fp_zalloc
, cra
);
3194 kq
= kqueue_alloc(p
, 0);
3200 fp
->f_flag
= FREAD
| FWRITE
;
3201 fp
->f_ops
= &kqueueops
;
3205 *fdflags(p
, fd
) |= UF_EXCLOSE
;
3206 procfdtbl_releasefd(p
, fd
, NULL
);
3207 fp_drop(p
, fd
, fp
, 1);
3215 kqueue(struct proc
*p
, __unused
struct kqueue_args
*uap
, int32_t *retval
)
3217 return (kqueue_body(p
, fileproc_alloc_init
, NULL
, retval
));
3221 kevent_copyin(user_addr_t
*addrp
, struct kevent_internal_s
*kevp
, struct proc
*p
,
3227 if (flags
& KEVENT_FLAG_LEGACY32
) {
3228 bzero(kevp
, sizeof (*kevp
));
3230 if (IS_64BIT_PROCESS(p
)) {
3231 struct user64_kevent kev64
;
3233 advance
= sizeof (kev64
);
3234 error
= copyin(*addrp
, (caddr_t
)&kev64
, advance
);
3237 kevp
->ident
= kev64
.ident
;
3238 kevp
->filter
= kev64
.filter
;
3239 kevp
->flags
= kev64
.flags
;
3240 kevp
->udata
= kev64
.udata
;
3241 kevp
->fflags
= kev64
.fflags
;
3242 kevp
->data
= kev64
.data
;
3244 struct user32_kevent kev32
;
3246 advance
= sizeof (kev32
);
3247 error
= copyin(*addrp
, (caddr_t
)&kev32
, advance
);
3250 kevp
->ident
= (uintptr_t)kev32
.ident
;
3251 kevp
->filter
= kev32
.filter
;
3252 kevp
->flags
= kev32
.flags
;
3253 kevp
->udata
= CAST_USER_ADDR_T(kev32
.udata
);
3254 kevp
->fflags
= kev32
.fflags
;
3255 kevp
->data
= (intptr_t)kev32
.data
;
3257 } else if (flags
& KEVENT_FLAG_LEGACY64
) {
3258 struct kevent64_s kev64
;
3260 bzero(kevp
, sizeof (*kevp
));
3262 advance
= sizeof (struct kevent64_s
);
3263 error
= copyin(*addrp
, (caddr_t
)&kev64
, advance
);
3266 kevp
->ident
= kev64
.ident
;
3267 kevp
->filter
= kev64
.filter
;
3268 kevp
->flags
= kev64
.flags
;
3269 kevp
->udata
= kev64
.udata
;
3270 kevp
->fflags
= kev64
.fflags
;
3271 kevp
->data
= kev64
.data
;
3272 kevp
->ext
[0] = kev64
.ext
[0];
3273 kevp
->ext
[1] = kev64
.ext
[1];
3276 struct kevent_qos_s kevqos
;
3278 bzero(kevp
, sizeof (*kevp
));
3280 advance
= sizeof (struct kevent_qos_s
);
3281 error
= copyin(*addrp
, (caddr_t
)&kevqos
, advance
);
3284 kevp
->ident
= kevqos
.ident
;
3285 kevp
->filter
= kevqos
.filter
;
3286 kevp
->flags
= kevqos
.flags
;
3287 kevp
->qos
= kevqos
.qos
;
3288 // kevp->xflags = kevqos.xflags;
3289 kevp
->udata
= kevqos
.udata
;
3290 kevp
->fflags
= kevqos
.fflags
;
3291 kevp
->data
= kevqos
.data
;
3292 kevp
->ext
[0] = kevqos
.ext
[0];
3293 kevp
->ext
[1] = kevqos
.ext
[1];
3294 kevp
->ext
[2] = kevqos
.ext
[2];
3295 kevp
->ext
[3] = kevqos
.ext
[3];
3303 kevent_copyout(struct kevent_internal_s
*kevp
, user_addr_t
*addrp
, struct proc
*p
,
3306 user_addr_t addr
= *addrp
;
3311 * fully initialize the differnt output event structure
3312 * types from the internal kevent (and some universal
3313 * defaults for fields not represented in the internal
3316 if (flags
& KEVENT_FLAG_LEGACY32
) {
3317 assert((flags
& KEVENT_FLAG_STACK_EVENTS
) == 0);
3319 if (IS_64BIT_PROCESS(p
)) {
3320 struct user64_kevent kev64
;
3322 advance
= sizeof (kev64
);
3323 bzero(&kev64
, advance
);
3326 * deal with the special case of a user-supplied
3327 * value of (uintptr_t)-1.
3329 kev64
.ident
= (kevp
->ident
== (uintptr_t)-1) ?
3330 (uint64_t)-1LL : (uint64_t)kevp
->ident
;
3332 kev64
.filter
= kevp
->filter
;
3333 kev64
.flags
= kevp
->flags
;
3334 kev64
.fflags
= kevp
->fflags
;
3335 kev64
.data
= (int64_t) kevp
->data
;
3336 kev64
.udata
= kevp
->udata
;
3337 error
= copyout((caddr_t
)&kev64
, addr
, advance
);
3339 struct user32_kevent kev32
;
3341 advance
= sizeof (kev32
);
3342 bzero(&kev32
, advance
);
3343 kev32
.ident
= (uint32_t)kevp
->ident
;
3344 kev32
.filter
= kevp
->filter
;
3345 kev32
.flags
= kevp
->flags
;
3346 kev32
.fflags
= kevp
->fflags
;
3347 kev32
.data
= (int32_t)kevp
->data
;
3348 kev32
.udata
= kevp
->udata
;
3349 error
= copyout((caddr_t
)&kev32
, addr
, advance
);
3351 } else if (flags
& KEVENT_FLAG_LEGACY64
) {
3352 struct kevent64_s kev64
;
3354 advance
= sizeof (struct kevent64_s
);
3355 if (flags
& KEVENT_FLAG_STACK_EVENTS
) {
3358 bzero(&kev64
, advance
);
3359 kev64
.ident
= kevp
->ident
;
3360 kev64
.filter
= kevp
->filter
;
3361 kev64
.flags
= kevp
->flags
;
3362 kev64
.fflags
= kevp
->fflags
;
3363 kev64
.data
= (int64_t) kevp
->data
;
3364 kev64
.udata
= kevp
->udata
;
3365 kev64
.ext
[0] = kevp
->ext
[0];
3366 kev64
.ext
[1] = kevp
->ext
[1];
3367 error
= copyout((caddr_t
)&kev64
, addr
, advance
);
3369 struct kevent_qos_s kevqos
;
3371 advance
= sizeof (struct kevent_qos_s
);
3372 if (flags
& KEVENT_FLAG_STACK_EVENTS
) {
3375 bzero(&kevqos
, advance
);
3376 kevqos
.ident
= kevp
->ident
;
3377 kevqos
.filter
= kevp
->filter
;
3378 kevqos
.flags
= kevp
->flags
;
3379 kevqos
.qos
= kevp
->qos
;
3380 kevqos
.udata
= kevp
->udata
;
3381 kevqos
.fflags
= kevp
->fflags
;
3383 kevqos
.data
= (int64_t) kevp
->data
;
3384 kevqos
.ext
[0] = kevp
->ext
[0];
3385 kevqos
.ext
[1] = kevp
->ext
[1];
3386 kevqos
.ext
[2] = kevp
->ext
[2];
3387 kevqos
.ext
[3] = kevp
->ext
[3];
3388 error
= copyout((caddr_t
)&kevqos
, addr
, advance
);
3391 if (flags
& KEVENT_FLAG_STACK_EVENTS
)
3394 *addrp
= addr
+ advance
;
3400 kevent_get_data_size(struct proc
*p
,
3401 uint64_t data_available
,
3403 user_size_t
*residp
)
3408 if (data_available
!= USER_ADDR_NULL
) {
3409 if (flags
& KEVENT_FLAG_KERNEL
) {
3410 resid
= *(user_size_t
*)(uintptr_t)data_available
;
3411 } else if (IS_64BIT_PROCESS(p
)) {
3412 user64_size_t usize
;
3413 error
= copyin((user_addr_t
)data_available
, &usize
, sizeof(usize
));
3414 resid
= (user_size_t
)usize
;
3416 user32_size_t usize
;
3417 error
= copyin((user_addr_t
)data_available
, &usize
, sizeof(usize
));
3418 resid
= (user_size_t
)usize
;
3430 kevent_put_data_size(struct proc
*p
,
3431 uint64_t data_available
,
3437 if (data_available
) {
3438 if (flags
& KEVENT_FLAG_KERNEL
) {
3439 *(user_size_t
*)(uintptr_t)data_available
= resid
;
3440 } else if (IS_64BIT_PROCESS(p
)) {
3441 user64_size_t usize
= (user64_size_t
)resid
;
3442 error
= copyout(&usize
, (user_addr_t
)data_available
, sizeof(usize
));
3444 user32_size_t usize
= (user32_size_t
)resid
;
3445 error
= copyout(&usize
, (user_addr_t
)data_available
, sizeof(usize
));
3452 * kevent_continue - continue a kevent syscall after blocking
3454 * assume we inherit a use count on the kq fileglob.
3457 __attribute__((noreturn
))
3459 kevent_continue(__unused
struct kqueue
*kq
, void *data
, int error
)
3461 struct _kevent
*cont_args
;
3462 struct fileproc
*fp
;
3463 uint64_t data_available
;
3464 user_size_t data_size
;
3465 user_size_t data_resid
;
3470 struct proc
*p
= current_proc();
3472 cont_args
= (struct _kevent
*)data
;
3473 data_available
= cont_args
->data_available
;
3474 flags
= cont_args
->process_data
.fp_flags
;
3475 data_size
= cont_args
->process_data
.fp_data_size
;
3476 data_resid
= cont_args
->process_data
.fp_data_resid
;
3477 noutputs
= cont_args
->eventout
;
3478 retval
= cont_args
->retval
;
3482 kevent_put_kq(p
, fd
, fp
, kq
);
3484 /* don't abandon other output just because of residual copyout failures */
3485 if (error
== 0 && data_available
&& data_resid
!= data_size
) {
3486 (void)kevent_put_data_size(p
, data_available
, flags
, data_resid
);
3489 /* don't restart after signals... */
3490 if (error
== ERESTART
)
3492 else if (error
== EWOULDBLOCK
)
3496 unix_syscall_return(error
);
3500 * kevent - [syscall] register and wait for kernel events
3504 kevent(struct proc
*p
, struct kevent_args
*uap
, int32_t *retval
)
3506 unsigned int flags
= KEVENT_FLAG_LEGACY32
;
3508 return kevent_internal(p
,
3509 (kqueue_id_t
)uap
->fd
, NULL
,
3510 uap
->changelist
, uap
->nchanges
,
3511 uap
->eventlist
, uap
->nevents
,
3520 kevent64(struct proc
*p
, struct kevent64_args
*uap
, int32_t *retval
)
3524 /* restrict to user flags and set legacy64 */
3525 flags
= uap
->flags
& KEVENT_FLAG_USER
;
3526 flags
|= KEVENT_FLAG_LEGACY64
;
3528 return kevent_internal(p
,
3529 (kqueue_id_t
)uap
->fd
, NULL
,
3530 uap
->changelist
, uap
->nchanges
,
3531 uap
->eventlist
, uap
->nevents
,
3540 kevent_qos(struct proc
*p
, struct kevent_qos_args
*uap
, int32_t *retval
)
3542 /* restrict to user flags */
3543 uap
->flags
&= KEVENT_FLAG_USER
;
3545 return kevent_internal(p
,
3546 (kqueue_id_t
)uap
->fd
, NULL
,
3547 uap
->changelist
, uap
->nchanges
,
3548 uap
->eventlist
, uap
->nevents
,
3549 uap
->data_out
, (uint64_t)uap
->data_available
,
3557 kevent_qos_internal(struct proc
*p
, int fd
,
3558 user_addr_t changelist
, int nchanges
,
3559 user_addr_t eventlist
, int nevents
,
3560 user_addr_t data_out
, user_size_t
*data_available
,
3564 return kevent_internal(p
,
3565 (kqueue_id_t
)fd
, NULL
,
3566 changelist
, nchanges
,
3568 data_out
, (uint64_t)data_available
,
3569 (flags
| KEVENT_FLAG_KERNEL
),
3576 kevent_id(struct proc
*p
, struct kevent_id_args
*uap
, int32_t *retval
)
3578 /* restrict to user flags */
3579 uap
->flags
&= KEVENT_FLAG_USER
;
3581 return kevent_internal(p
,
3582 (kqueue_id_t
)uap
->id
, NULL
,
3583 uap
->changelist
, uap
->nchanges
,
3584 uap
->eventlist
, uap
->nevents
,
3585 uap
->data_out
, (uint64_t)uap
->data_available
,
3586 (uap
->flags
| KEVENT_FLAG_DYNAMIC_KQUEUE
),
3593 kevent_id_internal(struct proc
*p
, kqueue_id_t
*id
,
3594 user_addr_t changelist
, int nchanges
,
3595 user_addr_t eventlist
, int nevents
,
3596 user_addr_t data_out
, user_size_t
*data_available
,
3600 return kevent_internal(p
,
3602 changelist
, nchanges
,
3604 data_out
, (uint64_t)data_available
,
3605 (flags
| KEVENT_FLAG_KERNEL
| KEVENT_FLAG_DYNAMIC_KQUEUE
),
3612 kevent_get_timeout(struct proc
*p
,
3613 user_addr_t utimeout
,
3615 struct timeval
*atvp
)
3620 if (flags
& KEVENT_FLAG_IMMEDIATE
) {
3621 getmicrouptime(&atv
);
3622 } else if (utimeout
!= USER_ADDR_NULL
) {
3624 if (flags
& KEVENT_FLAG_KERNEL
) {
3625 struct timespec
*tsp
= (struct timespec
*)utimeout
;
3626 TIMESPEC_TO_TIMEVAL(&rtv
, tsp
);
3627 } else if (IS_64BIT_PROCESS(p
)) {
3628 struct user64_timespec ts
;
3629 error
= copyin(utimeout
, &ts
, sizeof(ts
));
3630 if ((ts
.tv_sec
& 0xFFFFFFFF00000000ull
) != 0)
3633 TIMESPEC_TO_TIMEVAL(&rtv
, &ts
);
3635 struct user32_timespec ts
;
3636 error
= copyin(utimeout
, &ts
, sizeof(ts
));
3637 TIMESPEC_TO_TIMEVAL(&rtv
, &ts
);
3641 if (itimerfix(&rtv
))
3643 getmicrouptime(&atv
);
3644 timevaladd(&atv
, &rtv
);
3646 /* wait forever value */
3655 kevent_set_kq_mode(struct kqueue
*kq
, unsigned int flags
)
3657 /* each kq should only be used for events of one type */
3659 if (kq
->kq_state
& (KQ_KEV32
| KQ_KEV64
| KQ_KEV_QOS
)) {
3660 if (flags
& KEVENT_FLAG_LEGACY32
) {
3661 if ((kq
->kq_state
& KQ_KEV32
) == 0) {
3665 } else if (kq
->kq_state
& KQ_KEV32
) {
3669 } else if (flags
& KEVENT_FLAG_LEGACY32
) {
3670 kq
->kq_state
|= KQ_KEV32
;
3671 } else if (flags
& KEVENT_FLAG_LEGACY64
) {
3672 kq
->kq_state
|= KQ_KEV64
;
3674 kq
->kq_state
|= KQ_KEV_QOS
;
3680 #define KQ_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
3681 #define CONFIG_KQ_HASHSIZE CONFIG_KN_HASHSIZE
3684 kqhash_lock(proc_t p
)
3686 lck_mtx_lock_spin_always(&p
->p_fd
->fd_kqhashlock
);
3690 kqhash_lock_held(__assert_only proc_t p
)
3692 LCK_MTX_ASSERT(&p
->p_fd
->fd_kqhashlock
, LCK_MTX_ASSERT_OWNED
);
3696 kqhash_unlock(proc_t p
)
3698 lck_mtx_unlock(&p
->p_fd
->fd_kqhashlock
);
3702 kqueue_hash_init_if_needed(proc_t p
)
3704 struct filedesc
*fdp
= p
->p_fd
;
3706 kqhash_lock_held(p
);
3708 if (__improbable(fdp
->fd_kqhash
== NULL
)) {
3709 struct kqlist
*alloc_hash
;
3713 alloc_hash
= hashinit(CONFIG_KQ_HASHSIZE
, M_KQUEUE
, &alloc_mask
);
3716 /* See if we won the race */
3717 if (fdp
->fd_kqhashmask
== 0) {
3718 fdp
->fd_kqhash
= alloc_hash
;
3719 fdp
->fd_kqhashmask
= alloc_mask
;
3722 FREE(alloc_hash
, M_KQUEUE
);
3729 * Called with the kqhash_lock() held
3737 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3738 struct filedesc
*fdp
= p
->p_fd
;
3739 struct kqlist
*list
;
3741 /* should hold the kq hash lock */
3742 kqhash_lock_held(p
);
3744 if ((kq
->kq_state
& KQ_DYNAMIC
) == 0) {
3745 assert(kq
->kq_state
& KQ_DYNAMIC
);
3749 /* only dynamically allocate workloop kqs for now */
3750 assert(kq
->kq_state
& KQ_WORKLOOP
);
3751 assert(fdp
->fd_kqhash
);
3753 kqwl
->kqwl_dynamicid
= id
;
3755 list
= &fdp
->fd_kqhash
[KQ_HASH(id
, fdp
->fd_kqhashmask
)];
3756 SLIST_INSERT_HEAD(list
, kqwl
, kqwl_hashlink
);
3759 /* Called with kqhash_lock held */
3765 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3766 struct filedesc
*fdp
= p
->p_fd
;
3767 struct kqlist
*list
;
3769 /* should hold the kq hash lock */
3770 kqhash_lock_held(p
);
3772 if ((kq
->kq_state
& KQ_DYNAMIC
) == 0) {
3773 assert(kq
->kq_state
& KQ_DYNAMIC
);
3776 assert(kq
->kq_state
& KQ_WORKLOOP
); /* for now */
3777 list
= &fdp
->fd_kqhash
[KQ_HASH(kqwl
->kqwl_dynamicid
, fdp
->fd_kqhashmask
)];
3778 SLIST_REMOVE(list
, kqwl
, kqworkloop
, kqwl_hashlink
);
3781 /* Called with kqhash_lock held */
3782 static struct kqueue
*
3783 kqueue_hash_lookup(struct proc
*p
, kqueue_id_t id
)
3785 struct filedesc
*fdp
= p
->p_fd
;
3786 struct kqlist
*list
;
3787 struct kqworkloop
*kqwl
;
3789 /* should hold the kq hash lock */
3790 kqhash_lock_held(p
);
3792 if (fdp
->fd_kqhashmask
== 0) return NULL
;
3794 list
= &fdp
->fd_kqhash
[KQ_HASH(id
, fdp
->fd_kqhashmask
)];
3795 SLIST_FOREACH(kqwl
, list
, kqwl_hashlink
) {
3796 if (kqwl
->kqwl_dynamicid
== id
) {
3797 struct kqueue
*kq
= (struct kqueue
*)kqwl
;
3799 assert(kq
->kq_state
& KQ_DYNAMIC
);
3800 assert(kq
->kq_state
& KQ_WORKLOOP
); /* for now */
3808 kqueue_release_last(struct proc
*p
, struct kqueue
*kq
)
3810 if (kq
->kq_state
& KQ_DYNAMIC
) {
3812 if (kqueue_release(kq
, KQUEUE_MIGHT_BE_LAST_REF
)) {
3813 kqueue_hash_remove(p
, kq
);
3822 static struct kqueue
*
3823 kevent_get_bound_kq(__assert_only
struct proc
*p
, thread_t thread
,
3824 unsigned int kev_flags
, unsigned int kq_flags
)
3827 struct uthread
*ut
= get_bsdthread_info(thread
);
3829 assert(p
== get_bsdthreadtask_info(thread
));
3831 if (!(ut
->uu_kqueue_flags
& kev_flags
))
3834 kq
= ut
->uu_kqueue_bound
;
3838 if (!(kq
->kq_state
& kq_flags
))
3845 kevent_get_kq(struct proc
*p
, kqueue_id_t id
, unsigned int flags
, struct fileproc
**fpp
, int *fdp
, struct kqueue
**kqp
)
3847 struct filedesc
*descp
= p
->p_fd
;
3848 struct fileproc
*fp
= NULL
;
3853 /* Was the workloop flag passed? Then it is for sure only a workloop */
3854 if (flags
& KEVENT_FLAG_DYNAMIC_KQUEUE
) {
3855 assert(flags
& KEVENT_FLAG_WORKLOOP
);
3856 if (id
== (kqueue_id_t
)-1 &&
3857 (flags
& KEVENT_FLAG_KERNEL
) &&
3858 (flags
& KEVENT_FLAG_WORKLOOP
)) {
3860 assert(is_workqueue_thread(current_thread()));
3863 * when kevent_id_internal is called from within the
3864 * kernel, and the passed 'id' value is '-1' then we
3865 * look for the currently bound workloop kq.
3867 * Until pthread kext avoids calling in to kevent_id_internal
3868 * for threads whose fulfill is canceled, calling in unbound
3871 kq
= kevent_get_bound_kq(p
, current_thread(),
3872 KEVENT_FLAG_WORKLOOP
, KQ_WORKLOOP
);
3876 struct uthread
*ut
= get_bsdthread_info(current_thread());
3878 /* If thread is unbound due to cancel, just return an error */
3879 if (ut
->uu_kqueue_flags
== KEVENT_FLAG_WORKLOOP_CANCELED
) {
3880 ut
->uu_kqueue_flags
= 0;
3883 panic("Unbound thread called kevent_internal with id=-1"
3884 " uu_kqueue_flags:0x%x, uu_kqueue_bound:%p",
3885 ut
->uu_kqueue_flags
, ut
->uu_kqueue_bound
);
3895 /* try shortcut on kq lookup for bound threads */
3896 kq
= kevent_get_bound_kq(p
, current_thread(), KEVENT_FLAG_WORKLOOP
, KQ_WORKLOOP
);
3897 if (kq
!= NULL
&& ((struct kqworkloop
*)kq
)->kqwl_dynamicid
== id
) {
3899 if (flags
& KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST
) {
3905 /* retain a reference while working with this kq. */
3906 assert(kq
->kq_state
& KQ_DYNAMIC
);
3912 /* look for the kq on the hash table */
3914 kq
= kqueue_hash_lookup(p
, id
);
3918 if (flags
& KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST
) {
3923 struct kqueue
*alloc_kq
;
3924 alloc_kq
= kqueue_alloc(p
, flags
);
3927 kqueue_hash_init_if_needed(p
);
3928 kq
= kqueue_hash_lookup(p
, id
);
3930 /* insert our new one */
3932 kqueue_hash_insert(p
, id
, kq
);
3935 /* lost race, retain existing workloop */
3938 kqueue_release(alloc_kq
, KQUEUE_MIGHT_BE_LAST_REF
);
3939 kqueue_dealloc(alloc_kq
);
3947 if (flags
& KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST
) {
3954 /* retain a reference while working with this kq. */
3955 assert(kq
->kq_state
& KQ_DYNAMIC
);
3960 } else if (flags
& KEVENT_FLAG_WORKQ
) {
3961 /* must already exist for bound threads. */
3962 if (flags
& KEVENT_FLAG_KERNEL
) {
3963 assert(descp
->fd_wqkqueue
!= NULL
);
3967 * use the private kq associated with the proc workq.
3968 * Just being a thread within the process (and not
3969 * being the exit/exec thread) is enough to hold a
3970 * reference on this special kq.
3972 kq
= descp
->fd_wqkqueue
;
3974 struct kqueue
*alloc_kq
= kqueue_alloc(p
, KEVENT_FLAG_WORKQ
);
3975 if (alloc_kq
== NULL
)
3979 if (descp
->fd_wqkqueue
== NULL
) {
3980 kq
= descp
->fd_wqkqueue
= alloc_kq
;
3984 kq
= descp
->fd_wqkqueue
;
3985 kqueue_dealloc(alloc_kq
);
3989 /* get a usecount for the kq itself */
3991 if ((error
= fp_getfkq(p
, fd
, &fp
, &kq
)) != 0)
3994 if ((error
= kevent_set_kq_mode(kq
, flags
)) != 0) {
3995 /* drop the usecount */
3997 fp_drop(p
, fd
, fp
, 0);
4013 struct fileproc
*fp
,
4016 kqueue_release_last(p
, kq
);
4018 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
4019 fp_drop(p
, (int)id
, fp
, 0);
4024 kevent_workloop_serial_no_copyin(proc_t p
, uint64_t workloop_id
)
4026 uint64_t serial_no
= 0;
4030 if (workloop_id
== 0 || p
->p_dispatchqueue_serialno_offset
== 0) {
4033 addr
= (user_addr_t
)(workloop_id
+ p
->p_dispatchqueue_serialno_offset
);
4035 if (proc_is64bit(p
)) {
4036 rc
= copyin(addr
, (caddr_t
)&serial_no
, sizeof(serial_no
));
4038 uint32_t serial_no32
= 0;
4039 rc
= copyin(addr
, (caddr_t
)&serial_no32
, sizeof(serial_no32
));
4040 serial_no
= serial_no32
;
4042 return rc
== 0 ? serial_no
: 0;
4046 kevent_exit_on_workloop_ownership_leak(thread_t thread
)
4048 proc_t p
= current_proc();
4049 struct filedesc
*fdp
= p
->p_fd
;
4050 kqueue_id_t workloop_id
= 0;
4052 mach_vm_address_t addr
;
4053 uint32_t reason_size
;
4056 if (fdp
->fd_kqhashmask
> 0) {
4057 for (uint32_t i
= 0; i
< fdp
->fd_kqhashmask
+ 1; i
++) {
4058 struct kqworkloop
*kqwl
;
4060 SLIST_FOREACH(kqwl
, &fdp
->fd_kqhash
[i
], kqwl_hashlink
) {
4061 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
4062 if ((kq
->kq_state
& KQ_DYNAMIC
) && kqwl
->kqwl_owner
== thread
) {
4063 workloop_id
= kqwl
->kqwl_dynamicid
;
4070 assert(workloop_id
);
4072 reason
= os_reason_create(OS_REASON_LIBSYSTEM
,
4073 OS_REASON_LIBSYSTEM_CODE_WORKLOOP_OWNERSHIP_LEAK
);
4074 if (reason
== OS_REASON_NULL
) {
4078 reason
->osr_flags
|= OS_REASON_FLAG_GENERATE_CRASH_REPORT
;
4079 reason_size
= 2 * sizeof(uint64_t);
4080 reason_size
= kcdata_estimate_required_buffer_size(2, reason_size
);
4081 if (os_reason_alloc_buffer(reason
, reason_size
) != 0) {
4085 struct kcdata_descriptor
*kcd
= &reason
->osr_kcd_descriptor
;
4087 if (kcdata_get_memory_addr(kcd
, EXIT_REASON_WORKLOOP_ID
,
4088 sizeof(workloop_id
), &addr
) == KERN_SUCCESS
) {
4089 kcdata_memcpy(kcd
, addr
, &workloop_id
, sizeof(workloop_id
));
4092 uint64_t serial_no
= kevent_workloop_serial_no_copyin(p
, workloop_id
);
4093 if (serial_no
&& kcdata_get_memory_addr(kcd
, EXIT_REASON_DISPATCH_QUEUE_NO
,
4094 sizeof(serial_no
), &addr
) == KERN_SUCCESS
) {
4095 kcdata_memcpy(kcd
, addr
, &serial_no
, sizeof(serial_no
));
4099 #if DEVELOPMENT || DEBUG
4100 psignal_try_thread_with_reason(p
, thread
, SIGABRT
, reason
);
4103 return exit_with_reason(p
, W_EXITCODE(0, SIGKILL
), (int *)NULL
,
4104 FALSE
, FALSE
, 0, reason
);
4110 kevent_servicer_detach_preflight(thread_t thread
, unsigned int flags
, struct kqueue
*kq
)
4113 struct kqworkloop
*kqwl
;
4115 struct kqrequest
*kqr
;
4117 if (!(flags
& KEVENT_FLAG_WORKLOOP
) || !(kq
->kq_state
& KQ_WORKLOOP
))
4120 /* only kq created with KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD from userspace can have attached threads */
4121 if (!(kq
->kq_state
& KQ_NO_WQ_THREAD
))
4124 /* allow detach only on not wq threads */
4125 if (is_workqueue_thread(thread
))
4128 /* check that the current thread is bound to the requested wq */
4129 ut
= get_bsdthread_info(thread
);
4130 if (ut
->uu_kqueue_bound
!= kq
)
4133 kqwl
= (struct kqworkloop
*)kq
;
4134 kqwl_req_lock(kqwl
);
4135 kqr
= &kqwl
->kqwl_request
;
4137 /* check that the wq is bound to the thread */
4138 if ((kqr
->kqr_state
& KQR_BOUND
) == 0 || (kqr
->kqr_thread
!= thread
))
4141 kqwl_req_unlock(kqwl
);
4147 kevent_servicer_detach_thread(struct proc
*p
, kqueue_id_t id
, thread_t thread
,
4148 unsigned int flags
, struct kqueue
*kq
)
4150 struct kqworkloop
*kqwl
;
4153 assert((flags
& KEVENT_FLAG_WORKLOOP
) && (kq
->kq_state
& KQ_WORKLOOP
));
4155 /* allow detach only on not wqthreads threads */
4156 assert(!is_workqueue_thread(thread
));
4158 /* only kq created with KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD from userspace can have attached threads */
4159 assert(kq
->kq_state
& KQ_NO_WQ_THREAD
);
4161 /* check that the current thread is bound to the requested kq */
4162 ut
= get_bsdthread_info(thread
);
4163 assert(ut
->uu_kqueue_bound
== kq
);
4165 kqwl
= (struct kqworkloop
*)kq
;
4169 /* unbind the thread.
4170 * unbind itself checks if still processing and ends it.
4172 kqworkloop_unbind_thread(kqwl
, thread
, flags
);
4176 kevent_put_kq(p
, id
, NULL
, kq
);
4182 kevent_servicer_attach_thread(thread_t thread
, unsigned int flags
, struct kqueue
*kq
)
4185 struct kqworkloop
*kqwl
;
4187 struct kqrequest
*kqr
;
4189 if (!(flags
& KEVENT_FLAG_WORKLOOP
) || !(kq
->kq_state
& KQ_WORKLOOP
))
4192 /* only kq created with KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD from userspace can have attached threads*/
4193 if (!(kq
->kq_state
& KQ_NO_WQ_THREAD
))
4196 /* allow attach only on not wqthreads */
4197 if (is_workqueue_thread(thread
))
4200 /* check that the thread is not already bound */
4201 ut
= get_bsdthread_info(thread
);
4202 if (ut
->uu_kqueue_bound
!= NULL
)
4205 assert(ut
->uu_kqueue_flags
== 0);
4208 kqwl
= (struct kqworkloop
*)kq
;
4209 kqwl_req_lock(kqwl
);
4210 kqr
= &kqwl
->kqwl_request
;
4212 /* check that the kqueue is not already bound */
4213 if (kqr
->kqr_state
& (KQR_BOUND
| KQR_THREQUESTED
| KQR_DRAIN
)) {
4218 assert(kqr
->kqr_thread
== NULL
);
4219 assert((kqr
->kqr_state
& KQR_PROCESSING
) == 0);
4221 kqr
->kqr_state
|= KQR_THREQUESTED
;
4222 kqr
->kqr_qos_index
= THREAD_QOS_UNSPECIFIED
;
4223 kqr
->kqr_override_index
= THREAD_QOS_UNSPECIFIED
;
4224 kqr
->kqr_dsync_owner_qos
= THREAD_QOS_UNSPECIFIED
;
4225 kqr
->kqr_owner_override_is_sync
= 0;
4227 kqworkloop_bind_thread_impl(kqwl
, thread
, KEVENT_FLAG_WORKLOOP
);
4229 /* get a ref on the wlkq on behalf of the attached thread */
4233 kqwl_req_unlock(kqwl
);
4240 boolean_t
kevent_args_requesting_events(unsigned int flags
, int nevents
)
4242 return (!(flags
& KEVENT_FLAG_ERROR_EVENTS
) && nevents
> 0);
4246 kevent_internal(struct proc
*p
,
4247 kqueue_id_t id
, kqueue_id_t
*id_out
,
4248 user_addr_t changelist
, int nchanges
,
4249 user_addr_t ueventlist
, int nevents
,
4250 user_addr_t data_out
, uint64_t data_available
,
4252 user_addr_t utimeout
,
4253 kqueue_continue_t continuation
,
4256 struct _kevent
*cont_args
;
4259 struct fileproc
*fp
= NULL
;
4261 struct kevent_internal_s kev
;
4262 int error
, noutputs
;
4264 user_size_t data_size
;
4265 user_size_t data_resid
;
4266 thread_t thread
= current_thread();
4268 /* Don't allow user-space threads to process output events from the workq kqs */
4269 if (((flags
& (KEVENT_FLAG_WORKQ
| KEVENT_FLAG_KERNEL
)) == KEVENT_FLAG_WORKQ
) &&
4270 kevent_args_requesting_events(flags
, nevents
))
4273 /* restrict dynamic kqueue allocation to workloops (for now) */
4274 if ((flags
& (KEVENT_FLAG_DYNAMIC_KQUEUE
| KEVENT_FLAG_WORKLOOP
)) == KEVENT_FLAG_DYNAMIC_KQUEUE
)
4277 if (flags
& (KEVENT_FLAG_WORKLOOP_SERVICER_ATTACH
| KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
|
4278 KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST
| KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST
| KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD
)) {
4280 /* allowed only on workloops when calling kevent_id from user-space */
4281 if (!(flags
& KEVENT_FLAG_WORKLOOP
) || (flags
& KEVENT_FLAG_KERNEL
) || !(flags
& KEVENT_FLAG_DYNAMIC_KQUEUE
))
4284 /* cannot attach and detach simultaneously*/
4285 if ((flags
& KEVENT_FLAG_WORKLOOP_SERVICER_ATTACH
) && (flags
& KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
))
4288 /* cannot ask for events and detach */
4289 if ((flags
& KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
) && kevent_args_requesting_events(flags
, nevents
))
4294 /* prepare to deal with stack-wise allocation of out events */
4295 if (flags
& KEVENT_FLAG_STACK_EVENTS
) {
4296 int scale
= ((flags
& KEVENT_FLAG_LEGACY32
) ?
4297 (IS_64BIT_PROCESS(p
) ? sizeof(struct user64_kevent
) :
4298 sizeof(struct user32_kevent
)) :
4299 ((flags
& KEVENT_FLAG_LEGACY64
) ? sizeof(struct kevent64_s
) :
4300 sizeof(struct kevent_qos_s
)));
4301 ueventlist
+= nevents
* scale
;
4304 /* convert timeout to absolute - if we have one (and not immediate) */
4305 error
= kevent_get_timeout(p
, utimeout
, flags
, &atv
);
4309 /* copyin initial value of data residual from data_available */
4310 error
= kevent_get_data_size(p
, data_available
, flags
, &data_size
);
4314 /* get the kq we are going to be working on */
4315 error
= kevent_get_kq(p
, id
, flags
, &fp
, &fd
, &kq
);
4319 /* only bound threads can receive events on workloops */
4320 if ((flags
& KEVENT_FLAG_WORKLOOP
) && kevent_args_requesting_events(flags
, nevents
)) {
4321 ut
= (uthread_t
)get_bsdthread_info(thread
);
4322 if (ut
->uu_kqueue_bound
!= kq
) {
4329 /* attach the current thread if necessary */
4330 if (flags
& KEVENT_FLAG_WORKLOOP_SERVICER_ATTACH
) {
4331 error
= kevent_servicer_attach_thread(thread
, flags
, kq
);
4336 /* before processing events and committing to the system call, return an error if the thread cannot be detached when requested */
4337 if (flags
& KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
) {
4338 error
= kevent_servicer_detach_preflight(thread
, flags
, kq
);
4344 if (id_out
&& kq
&& (flags
& KEVENT_FLAG_WORKLOOP
)) {
4345 assert(kq
->kq_state
& KQ_WORKLOOP
);
4346 struct kqworkloop
*kqwl
;
4347 kqwl
= (struct kqworkloop
*)kq
;
4348 *id_out
= kqwl
->kqwl_dynamicid
;
4351 /* register all the change requests the user provided... */
4353 while (nchanges
> 0 && error
== 0) {
4354 error
= kevent_copyin(&changelist
, &kev
, p
, flags
);
4358 /* Make sure user doesn't pass in any system flags */
4359 kev
.flags
&= ~EV_SYSFLAGS
;
4361 kevent_register(kq
, &kev
, p
);
4364 ((kev
.flags
& EV_ERROR
) || (kev
.flags
& EV_RECEIPT
))) {
4365 if (kev
.flags
& EV_RECEIPT
) {
4366 kev
.flags
|= EV_ERROR
;
4369 error
= kevent_copyout(&kev
, &ueventlist
, p
, flags
);
4374 } else if (kev
.flags
& EV_ERROR
) {
4380 /* short-circuit the scan if we only want error events */
4381 if (flags
& KEVENT_FLAG_ERROR_EVENTS
)
4384 /* process pending events */
4385 if (nevents
> 0 && noutputs
== 0 && error
== 0) {
4386 /* store the continuation/completion data in the uthread */
4387 ut
= (uthread_t
)get_bsdthread_info(thread
);
4388 cont_args
= &ut
->uu_kevent
.ss_kevent
;
4391 cont_args
->retval
= retval
;
4392 cont_args
->eventlist
= ueventlist
;
4393 cont_args
->eventcount
= nevents
;
4394 cont_args
->eventout
= noutputs
;
4395 cont_args
->data_available
= data_available
;
4396 cont_args
->process_data
.fp_fd
= (int)id
;
4397 cont_args
->process_data
.fp_flags
= flags
;
4398 cont_args
->process_data
.fp_data_out
= data_out
;
4399 cont_args
->process_data
.fp_data_size
= data_size
;
4400 cont_args
->process_data
.fp_data_resid
= data_size
;
4402 error
= kqueue_scan(kq
, kevent_callback
,
4403 continuation
, cont_args
,
4404 &cont_args
->process_data
,
4407 /* process remaining outputs */
4408 noutputs
= cont_args
->eventout
;
4409 data_resid
= cont_args
->process_data
.fp_data_resid
;
4411 /* copyout residual data size value (if it needs to be copied out) */
4412 /* don't abandon other output just because of residual copyout failures */
4413 if (error
== 0 && data_available
&& data_resid
!= data_size
) {
4414 (void)kevent_put_data_size(p
, data_available
, flags
, data_resid
);
4418 /* detach the current thread if necessary */
4419 if (flags
& KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
) {
4421 kevent_servicer_detach_thread(p
, id
, thread
, flags
, kq
);
4425 kevent_put_kq(p
, id
, fp
, kq
);
4427 /* don't restart after signals... */
4428 if (error
== ERESTART
)
4430 else if (error
== EWOULDBLOCK
)
4439 * kevent_callback - callback for each individual event
4441 * called with nothing locked
4442 * caller holds a reference on the kqueue
4445 kevent_callback(__unused
struct kqueue
*kq
, struct kevent_internal_s
*kevp
,
4448 struct _kevent
*cont_args
;
4451 cont_args
= (struct _kevent
*)data
;
4452 assert(cont_args
->eventout
< cont_args
->eventcount
);
4455 * Copy out the appropriate amount of event data for this user.
4457 error
= kevent_copyout(kevp
, &cont_args
->eventlist
, current_proc(),
4458 cont_args
->process_data
.fp_flags
);
4461 * If there isn't space for additional events, return
4462 * a harmless error to stop the processing here
4464 if (error
== 0 && ++cont_args
->eventout
== cont_args
->eventcount
)
4465 error
= EWOULDBLOCK
;
4470 * kevent_description - format a description of a kevent for diagnostic output
4472 * called with a 256-byte string buffer
4476 kevent_description(struct kevent_internal_s
*kevp
, char *s
, size_t n
)
4480 "{.ident=%#llx, .filter=%d, .flags=%#x, .udata=%#llx, .fflags=%#x, .data=%#llx, .ext[0]=%#llx, .ext[1]=%#llx}",
4494 * kevent_register - add a new event to a kqueue
4496 * Creates a mapping between the event source and
4497 * the kqueue via a knote data structure.
4499 * Because many/most the event sources are file
4500 * descriptor related, the knote is linked off
4501 * the filedescriptor table for quick access.
4503 * called with nothing locked
4504 * caller holds a reference on the kqueue
4508 kevent_register(struct kqueue
*kq
, struct kevent_internal_s
*kev
,
4509 __unused
struct proc
*ctxp
)
4511 struct proc
*p
= kq
->kq_p
;
4512 const struct filterops
*fops
;
4513 struct knote
*kn
= NULL
;
4516 unsigned short kev_flags
= kev
->flags
;
4517 int knoteuse_flags
= KNUSE_NONE
;
4519 if (kev
->filter
< 0) {
4520 if (kev
->filter
+ EVFILT_SYSCOUNT
< 0) {
4524 fops
= sysfilt_ops
[~kev
->filter
]; /* to 0-base index */
4530 /* restrict EV_VANISHED to adding udata-specific dispatch kevents */
4531 if ((kev
->flags
& EV_VANISHED
) &&
4532 (kev
->flags
& (EV_ADD
| EV_DISPATCH2
)) != (EV_ADD
| EV_DISPATCH2
)) {
4537 /* Simplify the flags - delete and disable overrule */
4538 if (kev
->flags
& EV_DELETE
)
4539 kev
->flags
&= ~EV_ADD
;
4540 if (kev
->flags
& EV_DISABLE
)
4541 kev
->flags
&= ~EV_ENABLE
;
4543 if (kq
->kq_state
& KQ_WORKLOOP
) {
4544 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_REGISTER
),
4545 ((struct kqworkloop
*)kq
)->kqwl_dynamicid
,
4546 kev
->udata
, kev
->flags
, kev
->filter
);
4547 } else if (kq
->kq_state
& KQ_WORKQ
) {
4548 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_REGISTER
),
4549 0, kev
->udata
, kev
->flags
, kev
->filter
);
4551 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_REGISTER
),
4552 VM_KERNEL_UNSLIDE_OR_PERM(kq
),
4553 kev
->udata
, kev
->flags
, kev
->filter
);
4558 /* find the matching knote from the fd tables/hashes */
4559 kn
= kq_find_knote_and_kq_lock(kq
, kev
, fops
->f_isfd
, p
);
4562 if (kev
->flags
& EV_ADD
) {
4563 struct fileproc
*knote_fp
= NULL
;
4565 /* grab a file reference for the new knote */
4567 if ((error
= fp_lookup(p
, kev
->ident
, &knote_fp
, 0)) != 0) {
4575 if (knote_fp
!= NULL
)
4576 fp_drop(p
, kev
->ident
, knote_fp
, 0);
4580 kn
->kn_fp
= knote_fp
;
4581 knote_set_kq(kn
, kq
);
4582 kqueue_retain(kq
); /* retain a kq ref */
4583 kn
->kn_filtid
= ~kev
->filter
;
4584 kn
->kn_inuse
= 1; /* for f_attach() */
4585 kn
->kn_status
= KN_ATTACHING
| KN_ATTACHED
;
4587 /* was vanish support requested */
4588 if (kev
->flags
& EV_VANISHED
) {
4589 kev
->flags
&= ~EV_VANISHED
;
4590 kn
->kn_status
|= KN_REQVANISH
;
4593 /* snapshot matching/dispatching protcol flags into knote */
4594 if (kev
->flags
& EV_DISPATCH
)
4595 kn
->kn_status
|= KN_DISPATCH
;
4596 if (kev
->flags
& EV_UDATA_SPECIFIC
)
4597 kn
->kn_status
|= KN_UDATA_SPECIFIC
;
4600 * copy the kevent state into knote
4601 * protocol is that fflags and data
4602 * are saved off, and cleared before
4603 * calling the attach routine.
4605 kn
->kn_kevent
= *kev
;
4606 kn
->kn_sfflags
= kev
->fflags
;
4607 kn
->kn_sdata
= kev
->data
;
4611 /* invoke pthread kext to convert kevent qos to thread qos */
4612 knote_canonicalize_kevent_qos(kn
);
4613 knote_set_qos_index(kn
, qos_index_from_qos(kn
, kn
->kn_qos
, FALSE
));
4615 /* before anyone can find it */
4616 if (kev
->flags
& EV_DISABLE
) {
4618 * do this before anyone can find it,
4619 * this can't call knote_disable() because it expects having
4622 kn
->kn_status
|= KN_DISABLED
;
4625 /* Add the knote for lookup thru the fd table */
4626 error
= kq_add_knote(kq
, kn
, kev
, p
, &knoteuse_flags
);
4628 (void)kqueue_release(kq
, KQUEUE_CANT_BE_LAST_REF
);
4630 if (knote_fp
!= NULL
)
4631 fp_drop(p
, kev
->ident
, knote_fp
, 0);
4633 if (error
== ERESTART
) {
4640 /* fp reference count now applies to knote */
4641 /* rwlock boost is now held */
4643 /* call filter attach routine */
4644 result
= fops
->f_attach(kn
, kev
);
4647 * Trade knote use count for kq lock.
4648 * Cannot be dropped because we held
4649 * KN_ATTACHING throughout.
4651 knoteuse2kqlock(kq
, kn
, KNUSE_STEAL_DROP
| knoteuse_flags
);
4653 if (kn
->kn_flags
& EV_ERROR
) {
4655 * Failed to attach correctly, so drop.
4656 * All other possible users/droppers
4657 * have deferred to us. Save the error
4658 * to return to our caller.
4660 kn
->kn_status
&= ~KN_ATTACHED
;
4661 kn
->kn_status
|= KN_DROPPING
;
4662 error
= kn
->kn_data
;
4668 /* end "attaching" phase - now just attached */
4669 kn
->kn_status
&= ~KN_ATTACHING
;
4671 if (kn
->kn_status
& KN_DROPPING
) {
4673 * Attach succeeded, but someone else
4674 * deferred their drop - now we have
4675 * to do it for them.
4682 /* Mark the thread request overcommit - if appropos */
4683 knote_set_qos_overcommit(kn
);
4686 * If the attach routine indicated that an
4687 * event is already fired, activate the knote.
4692 if (knote_fops(kn
)->f_post_attach
) {
4693 error
= knote_fops(kn
)->f_post_attach(kn
, kev
);
4701 if ((kev_flags
& (EV_ADD
| EV_DELETE
)) == (EV_ADD
| EV_DELETE
) &&
4702 (kq
->kq_state
& KQ_WORKLOOP
)) {
4704 * For workloops, understand EV_ADD|EV_DELETE as a "soft" delete
4705 * that doesn't care about ENOENT, so just pretend the deletion
4715 /* existing knote: kqueue lock already taken by kq_find_knote_and_kq_lock */
4717 if ((kn
->kn_status
& (KN_DROPPING
| KN_ATTACHING
)) != 0) {
4719 * The knote is not in a stable state, wait for that
4720 * transition to complete and then redrive the lookup.
4722 knoteusewait(kq
, kn
);
4726 if (kev
->flags
& EV_DELETE
) {
4729 * If attempting to delete a disabled dispatch2 knote,
4730 * we must wait for the knote to be re-enabled (unless
4731 * it is being re-enabled atomically here).
4733 if ((kev
->flags
& EV_ENABLE
) == 0 &&
4734 (kn
->kn_status
& (KN_DISPATCH2
| KN_DISABLED
)) ==
4735 (KN_DISPATCH2
| KN_DISABLED
)) {
4736 kn
->kn_status
|= KN_DEFERDELETE
;
4738 error
= EINPROGRESS
;
4739 } else if (knote_fops(kn
)->f_drop_and_unlock
) {
4741 * The filter has requested to handle EV_DELETE events
4743 * ERESTART means the kevent has to be re-evaluated
4745 error
= knote_fops(kn
)->f_drop_and_unlock(kn
, kev
);
4746 if (error
== ERESTART
) {
4750 } else if (kqlock2knotedrop(kq
, kn
)) {
4751 /* standard/default EV_DELETE path */
4755 * The kqueue is unlocked, it's not being
4756 * dropped, and kqlock2knotedrop returned 0:
4757 * this means that someone stole the drop of
4758 * the knote from us.
4760 error
= EINPROGRESS
;
4766 * If we are re-enabling a deferred-delete knote,
4767 * just enable it now and avoid calling the
4768 * filter touch routine (it has delivered its
4769 * last event already).
4771 if ((kev
->flags
& EV_ENABLE
) &&
4772 (kn
->kn_status
& KN_DEFERDELETE
)) {
4773 assert(kn
->kn_status
& KN_DISABLED
);
4781 * If we are disabling, do it before unlocking and
4782 * calling the touch routine (so no processing can
4783 * see the new kevent state before the disable is
4786 if (kev
->flags
& EV_DISABLE
)
4790 * Convert the kqlock to a use reference on the
4791 * knote so we can call the filter touch routine.
4793 if (knoteuse_needs_boost(kn
, kev
)) {
4794 knoteuse_flags
|= KNUSE_BOOST
;
4796 if (kqlock2knoteuse(kq
, kn
, knoteuse_flags
)) {
4798 * Call touch routine to notify filter of changes
4799 * in filter values (and to re-determine if any
4800 * events are fired).
4802 result
= knote_fops(kn
)->f_touch(kn
, kev
);
4804 /* Get the kq lock back (don't defer droppers). */
4805 if (!knoteuse2kqlock(kq
, kn
, knoteuse_flags
)) {
4810 /* Handle errors during touch routine */
4811 if (kev
->flags
& EV_ERROR
) {
4817 /* Activate it if the touch routine said to */
4822 /* Enable the knote if called for */
4823 if (kev
->flags
& EV_ENABLE
)
4828 /* still have kqlock held and knote is valid */
4832 /* output local errors through the kevent */
4834 kev
->flags
|= EV_ERROR
;
4841 * knote_process - process a triggered event
4843 * Validate that it is really still a triggered event
4844 * by calling the filter routines (if necessary). Hold
4845 * a use reference on the knote to avoid it being detached.
4847 * If it is still considered triggered, we will have taken
4848 * a copy of the state under the filter lock. We use that
4849 * snapshot to dispatch the knote for future processing (or
4850 * not, if this was a lost event).
4852 * Our caller assures us that nobody else can be processing
4853 * events from this knote during the whole operation. But
4854 * others can be touching or posting events to the knote
4855 * interspersed with our processing it.
4857 * caller holds a reference on the kqueue.
4858 * kqueue locked on entry and exit - but may be dropped
4861 knote_process(struct knote
*kn
,
4862 kevent_callback_t callback
,
4863 void *callback_data
,
4864 struct filt_process_s
*process_data
,
4867 struct kevent_internal_s kev
;
4868 struct kqueue
*kq
= knote_get_kq(kn
);
4872 bzero(&kev
, sizeof(kev
));
4875 * Must be active or stayactive
4876 * Must be queued and not disabled/suppressed
4878 assert(kn
->kn_status
& KN_QUEUED
);
4879 assert(kn
->kn_status
& (KN_ACTIVE
|KN_STAYACTIVE
));
4880 assert(!(kn
->kn_status
& (KN_DISABLED
|KN_SUPPRESSED
|KN_DROPPING
)));
4882 if (kq
->kq_state
& KQ_WORKLOOP
) {
4883 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS
),
4884 ((struct kqworkloop
*)kq
)->kqwl_dynamicid
,
4885 kn
->kn_udata
, kn
->kn_status
| (kn
->kn_id
<< 32),
4887 } else if (kq
->kq_state
& KQ_WORKQ
) {
4888 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS
),
4889 0, kn
->kn_udata
, kn
->kn_status
| (kn
->kn_id
<< 32),
4892 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS
),
4893 VM_KERNEL_UNSLIDE_OR_PERM(kq
), kn
->kn_udata
,
4894 kn
->kn_status
| (kn
->kn_id
<< 32), kn
->kn_filtid
);
4898 * For deferred-drop or vanished events, we just create a fake
4899 * event to acknowledge end-of-life. Otherwise, we call the
4900 * filter's process routine to snapshot the kevent state under
4901 * the filter's locking protocol.
4903 if (kn
->kn_status
& (KN_DEFERDELETE
| KN_VANISHED
)) {
4904 /* create fake event */
4905 kev
.filter
= kn
->kn_filter
;
4906 kev
.ident
= kn
->kn_id
;
4907 kev
.qos
= kn
->kn_qos
;
4908 kev
.flags
= (kn
->kn_status
& KN_DEFERDELETE
) ?
4909 EV_DELETE
: EV_VANISHED
;
4910 kev
.flags
|= (EV_DISPATCH2
| EV_ONESHOT
);
4911 kev
.udata
= kn
->kn_udata
;
4916 int flags
= KNUSE_NONE
;
4917 /* deactivate - so new activations indicate a wakeup */
4918 knote_deactivate(kn
);
4920 /* suppress knotes to avoid returning the same event multiple times in a single call. */
4923 if (knoteuse_needs_boost(kn
, NULL
)) {
4924 flags
|= KNUSE_BOOST
;
4926 /* convert lock to a knote use reference */
4927 if (!kqlock2knoteuse(kq
, kn
, flags
))
4928 panic("dropping knote found on queue\n");
4930 /* call out to the filter to process with just a ref */
4931 result
= knote_fops(kn
)->f_process(kn
, process_data
, &kev
);
4932 if (result
) flags
|= KNUSE_STEAL_DROP
;
4935 * convert our reference back to a lock. accept drop
4936 * responsibility from others if we've committed to
4937 * delivering event data.
4939 if (!knoteuse2kqlock(kq
, kn
, flags
)) {
4947 * Determine how to dispatch the knote for future event handling.
4948 * not-fired: just return (do not callout, leave deactivated).
4949 * One-shot: If dispatch2, enter deferred-delete mode (unless this is
4950 * is the deferred delete event delivery itself). Otherwise,
4952 * stolendrop:We took responsibility for someone else's drop attempt.
4953 * treat this just like one-shot and prepare to turn it back
4954 * into a deferred delete if required.
4955 * Dispatch: don't clear state, just mark it disabled.
4956 * Cleared: just leave it deactivated.
4957 * Others: re-activate as there may be more events to handle.
4958 * This will not wake up more handlers right now, but
4959 * at the completion of handling events it may trigger
4960 * more handler threads (TODO: optimize based on more than
4961 * just this one event being detected by the filter).
4965 return (EJUSTRETURN
);
4967 if ((kev
.flags
& EV_ONESHOT
) || (kn
->kn_status
& KN_STOLENDROP
)) {
4968 if ((kn
->kn_status
& (KN_DISPATCH2
| KN_DEFERDELETE
)) == KN_DISPATCH2
) {
4969 /* defer dropping non-delete oneshot dispatch2 events */
4970 kn
->kn_status
|= KN_DEFERDELETE
;
4973 /* if we took over another's drop clear those flags here */
4974 if (kn
->kn_status
& KN_STOLENDROP
) {
4975 assert(kn
->kn_status
& KN_DROPPING
);
4977 * the knote will be dropped when the
4978 * deferred deletion occurs
4980 kn
->kn_status
&= ~(KN_DROPPING
|KN_STOLENDROP
);
4982 } else if (kn
->kn_status
& KN_STOLENDROP
) {
4983 /* We now own the drop of the knote. */
4984 assert(kn
->kn_status
& KN_DROPPING
);
4985 knote_unsuppress(kn
);
4989 } else if (kqlock2knotedrop(kq
, kn
)) {
4990 /* just EV_ONESHOT, _not_ DISPATCH2 */
4994 } else if (kn
->kn_status
& KN_DISPATCH
) {
4995 /* disable all dispatch knotes */
4997 } else if ((kev
.flags
& EV_CLEAR
) == 0) {
4998 /* re-activate in case there are more events */
5004 * callback to handle each event as we find it.
5005 * If we have to detach and drop the knote, do
5006 * it while we have the kq unlocked.
5010 error
= (callback
)(kq
, &kev
, callback_data
);
5018 * Return 0 to indicate that processing should proceed,
5019 * -1 if there is nothing to process.
5021 * Called with kqueue locked and returns the same way,
5022 * but may drop lock temporarily.
5025 kqworkq_begin_processing(struct kqworkq
*kqwq
, kq_index_t qos_index
, int flags
)
5027 struct kqrequest
*kqr
;
5028 thread_t self
= current_thread();
5029 __assert_only
struct uthread
*ut
= get_bsdthread_info(self
);
5031 assert(kqwq
->kqwq_state
& KQ_WORKQ
);
5032 assert(qos_index
< KQWQ_NQOS
);
5034 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN
) | DBG_FUNC_START
,
5037 kqwq_req_lock(kqwq
);
5039 kqr
= kqworkq_get_request(kqwq
, qos_index
);
5041 /* manager skips buckets that haven't asked for its help */
5042 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
) {
5044 /* If nothing for manager to do, just return */
5045 if ((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0) {
5046 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5048 kqwq_req_unlock(kqwq
);
5051 /* bind manager thread from this time on */
5052 kqworkq_bind_thread_impl(kqwq
, qos_index
, self
, flags
);
5055 /* We should already be bound to this kqueue */
5056 assert(kqr
->kqr_state
& KQR_BOUND
);
5057 assert(kqr
->kqr_thread
== self
);
5058 assert(ut
->uu_kqueue_bound
== (struct kqueue
*)kqwq
);
5059 assert(ut
->uu_kqueue_qos_index
== qos_index
);
5060 assert((ut
->uu_kqueue_flags
& flags
) == ut
->uu_kqueue_flags
);
5064 * we should have been requested to be here
5065 * and nobody else should still be processing
5067 assert(kqr
->kqr_state
& KQR_WAKEUP
);
5068 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
5069 assert((kqr
->kqr_state
& KQR_PROCESSING
) == 0);
5071 /* reset wakeup trigger to catch new events after we start processing */
5072 kqr
->kqr_state
&= ~KQR_WAKEUP
;
5074 /* convert to processing mode */
5075 kqr
->kqr_state
|= KQR_PROCESSING
;
5077 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5078 kqr_thread_id(kqr
), kqr
->kqr_state
);
5080 kqwq_req_unlock(kqwq
);
5085 kqworkloop_is_processing_on_current_thread(struct kqworkloop
*kqwl
)
5087 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
5091 if (kq
->kq_state
& KQ_PROCESSING
) {
5093 * KQ_PROCESSING is unset with the kqlock held, and the kqr thread is
5094 * never modified while KQ_PROCESSING is set, meaning that peeking at
5095 * its value is safe from this context.
5097 return kqwl
->kqwl_request
.kqr_thread
== current_thread();
5103 kqworkloop_acknowledge_events(struct kqworkloop
*kqwl
, boolean_t clear_ipc_override
)
5105 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
5106 struct knote
*kn
, *tmp
;
5108 kqlock_held(&kqwl
->kqwl_kqueue
);
5110 TAILQ_FOREACH_SAFE(kn
, &kqr
->kqr_suppressed
, kn_tqe
, tmp
) {
5112 * If a knote that can adjust QoS is disabled because of the automatic
5113 * behavior of EV_DISPATCH, the knotes should stay suppressed so that
5114 * further overrides keep pushing.
5116 if (knote_fops(kn
)->f_adjusts_qos
&& (kn
->kn_status
& KN_DISABLED
) &&
5117 (kn
->kn_status
& (KN_STAYACTIVE
| KN_DROPPING
)) == 0 &&
5118 (kn
->kn_flags
& (EV_DISPATCH
| EV_DISABLE
)) == EV_DISPATCH
) {
5120 * When called from unbind, clear the sync ipc override on the knote
5121 * for events which are delivered.
5123 if (clear_ipc_override
) {
5124 knote_adjust_sync_qos(kn
, THREAD_QOS_UNSPECIFIED
, FALSE
);
5128 knote_unsuppress(kn
);
5133 kqworkloop_begin_processing(struct kqworkloop
*kqwl
,
5134 __assert_only
unsigned int flags
)
5136 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
5137 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
5141 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN
) | DBG_FUNC_START
,
5142 kqwl
->kqwl_dynamicid
, flags
, 0);
5144 kqwl_req_lock(kqwl
);
5146 /* nobody else should still be processing */
5147 assert((kqr
->kqr_state
& KQR_PROCESSING
) == 0);
5148 assert((kq
->kq_state
& KQ_PROCESSING
) == 0);
5150 kqr
->kqr_state
|= KQR_PROCESSING
| KQR_R2K_NOTIF_ARMED
;
5151 kq
->kq_state
|= KQ_PROCESSING
;
5153 kqwl_req_unlock(kqwl
);
5155 kqworkloop_acknowledge_events(kqwl
, FALSE
);
5157 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN
) | DBG_FUNC_END
,
5158 kqwl
->kqwl_dynamicid
, flags
, 0);
5164 * Return 0 to indicate that processing should proceed,
5165 * -1 if there is nothing to process.
5167 * Called with kqueue locked and returns the same way,
5168 * but may drop lock temporarily.
5172 kqueue_begin_processing(struct kqueue
*kq
, kq_index_t qos_index
, unsigned int flags
)
5174 struct kqtailq
*suppressq
;
5178 if (kq
->kq_state
& KQ_WORKQ
) {
5179 return kqworkq_begin_processing((struct kqworkq
*)kq
, qos_index
, flags
);
5180 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
5181 return kqworkloop_begin_processing((struct kqworkloop
*)kq
, flags
);
5184 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_START
,
5185 VM_KERNEL_UNSLIDE_OR_PERM(kq
), flags
);
5187 assert(qos_index
== QOS_INDEX_KQFILE
);
5189 /* wait to become the exclusive processing thread */
5191 if (kq
->kq_state
& KQ_DRAIN
) {
5192 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5193 VM_KERNEL_UNSLIDE_OR_PERM(kq
), 2);
5197 if ((kq
->kq_state
& KQ_PROCESSING
) == 0)
5200 /* if someone else is processing the queue, wait */
5201 kq
->kq_state
|= KQ_PROCWAIT
;
5202 suppressq
= kqueue_get_suppressed_queue(kq
, qos_index
);
5203 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
5204 CAST_EVENT64_T(suppressq
),
5205 THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
);
5208 thread_block(THREAD_CONTINUE_NULL
);
5212 /* Nobody else processing */
5214 /* clear pre-posts and KQ_WAKEUP now, in case we bail early */
5215 waitq_set_clear_preposts(&kq
->kq_wqs
);
5216 kq
->kq_state
&= ~KQ_WAKEUP
;
5218 /* anything left to process? */
5219 if (kqueue_queue_empty(kq
, qos_index
)) {
5220 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5221 VM_KERNEL_UNSLIDE_OR_PERM(kq
), 1);
5225 /* convert to processing mode */
5226 kq
->kq_state
|= KQ_PROCESSING
;
5228 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5229 VM_KERNEL_UNSLIDE_OR_PERM(kq
));
5235 * kqworkq_end_processing - Complete the processing of a workq kqueue
5237 * We may have to request new threads.
5238 * This can happen there are no waiting processing threads and:
5239 * - there were active events we never got to (count > 0)
5240 * - we pended waitq hook callouts during processing
5241 * - we pended wakeups while processing (or unsuppressing)
5243 * Called with kqueue lock held.
5246 kqworkq_end_processing(struct kqworkq
*kqwq
, kq_index_t qos_index
, int flags
)
5248 #pragma unused(flags)
5250 struct kqueue
*kq
= &kqwq
->kqwq_kqueue
;
5251 struct kqtailq
*suppressq
= kqueue_get_suppressed_queue(kq
, qos_index
);
5253 thread_t self
= current_thread();
5254 struct uthread
*ut
= get_bsdthread_info(self
);
5256 struct kqrequest
*kqr
;
5259 assert(kqwq
->kqwq_state
& KQ_WORKQ
);
5260 assert(qos_index
< KQWQ_NQOS
);
5262 /* Are we really bound to this kqueue? */
5263 if (ut
->uu_kqueue_bound
!= kq
) {
5264 assert(ut
->uu_kqueue_bound
== kq
);
5268 kqr
= kqworkq_get_request(kqwq
, qos_index
);
5270 kqwq_req_lock(kqwq
);
5272 /* Do we claim to be manager? */
5273 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
) {
5275 /* bail if not bound that way */
5276 if (ut
->uu_kqueue_qos_index
!= KQWQ_QOS_MANAGER
||
5277 (ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
) == 0) {
5278 assert(ut
->uu_kqueue_qos_index
== KQWQ_QOS_MANAGER
);
5279 assert(ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
);
5280 kqwq_req_unlock(kqwq
);
5284 /* bail if this request wasn't already getting manager help */
5285 if ((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0 ||
5286 (kqr
->kqr_state
& KQR_PROCESSING
) == 0) {
5287 kqwq_req_unlock(kqwq
);
5291 if (ut
->uu_kqueue_qos_index
!= qos_index
||
5292 (ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
)) {
5293 assert(ut
->uu_kqueue_qos_index
== qos_index
);
5294 assert((ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
) == 0);
5295 kqwq_req_unlock(kqwq
);
5300 assert(kqr
->kqr_state
& KQR_BOUND
);
5301 thread
= kqr
->kqr_thread
;
5302 assert(thread
== self
);
5304 assert(kqr
->kqr_state
& KQR_PROCESSING
);
5306 /* If we didn't drain the whole queue, re-mark a wakeup being needed */
5307 if (!kqueue_queue_empty(kq
, qos_index
))
5308 kqr
->kqr_state
|= KQR_WAKEUP
;
5310 kqwq_req_unlock(kqwq
);
5313 * Return suppressed knotes to their original state.
5314 * For workq kqueues, suppressed ones that are still
5315 * truly active (not just forced into the queue) will
5316 * set flags we check below to see if anything got
5319 while ((kn
= TAILQ_FIRST(suppressq
)) != NULL
) {
5320 assert(kn
->kn_status
& KN_SUPPRESSED
);
5321 knote_unsuppress(kn
);
5324 kqwq_req_lock(kqwq
);
5326 /* Indicate that we are done processing this request */
5327 kqr
->kqr_state
&= ~KQR_PROCESSING
;
5330 * Drop our association with this one request and its
5333 kqworkq_unbind_thread(kqwq
, qos_index
, thread
, flags
);
5336 * request a new thread if we didn't process the whole
5337 * queue or real events have happened (not just putting
5338 * stay-active events back).
5340 if (kqr
->kqr_state
& KQR_WAKEUP
) {
5341 if (kqueue_queue_empty(kq
, qos_index
)) {
5342 kqr
->kqr_state
&= ~KQR_WAKEUP
;
5344 kqworkq_request_thread(kqwq
, qos_index
);
5347 kqwq_req_unlock(kqwq
);
5351 kqworkloop_end_processing(struct kqworkloop
*kqwl
, int nevents
,
5354 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
5355 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
5359 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END
) | DBG_FUNC_START
,
5360 kqwl
->kqwl_dynamicid
, flags
, 0);
5362 if ((kq
->kq_state
& KQ_NO_WQ_THREAD
) && nevents
== 0 &&
5363 (flags
& KEVENT_FLAG_IMMEDIATE
) == 0) {
5365 * <rdar://problem/31634014> We may soon block, but have returned no
5366 * kevents that need to be kept supressed for overriding purposes.
5368 * It is hence safe to acknowledge events and unsuppress everything, so
5369 * that if we block we can observe all events firing.
5371 kqworkloop_acknowledge_events(kqwl
, TRUE
);
5374 kqwl_req_lock(kqwl
);
5376 assert(kqr
->kqr_state
& KQR_PROCESSING
);
5377 assert(kq
->kq_state
& KQ_PROCESSING
);
5379 kq
->kq_state
&= ~KQ_PROCESSING
;
5380 kqr
->kqr_state
&= ~KQR_PROCESSING
;
5381 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
, 0);
5383 kqwl_req_unlock(kqwl
);
5385 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END
) | DBG_FUNC_END
,
5386 kqwl
->kqwl_dynamicid
, flags
, 0);
5390 * Called with kqueue lock held.
5393 kqueue_end_processing(struct kqueue
*kq
, kq_index_t qos_index
,
5394 int nevents
, unsigned int flags
)
5397 struct kqtailq
*suppressq
;
5402 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
5404 if (kq
->kq_state
& KQ_WORKLOOP
) {
5405 return kqworkloop_end_processing((struct kqworkloop
*)kq
, nevents
, flags
);
5408 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_END
),
5409 VM_KERNEL_UNSLIDE_OR_PERM(kq
), flags
);
5411 assert(qos_index
== QOS_INDEX_KQFILE
);
5414 * Return suppressed knotes to their original state.
5416 suppressq
= kqueue_get_suppressed_queue(kq
, qos_index
);
5417 while ((kn
= TAILQ_FIRST(suppressq
)) != NULL
) {
5418 assert(kn
->kn_status
& KN_SUPPRESSED
);
5419 knote_unsuppress(kn
);
5422 procwait
= (kq
->kq_state
& KQ_PROCWAIT
);
5423 kq
->kq_state
&= ~(KQ_PROCESSING
| KQ_PROCWAIT
);
5426 /* first wake up any thread already waiting to process */
5427 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
5428 CAST_EVENT64_T(suppressq
),
5430 WAITQ_ALL_PRIORITIES
);
5435 * kqwq_internal_bind - bind thread to processing workq kqueue
5437 * Determines if the provided thread will be responsible for
5438 * servicing the particular QoS class index specified in the
5439 * parameters. Once the binding is done, any overrides that may
5440 * be associated with the cooresponding events can be applied.
5442 * This should be called as soon as the thread identity is known,
5443 * preferably while still at high priority during creation.
5445 * - caller holds a reference on the process (and workq kq)
5446 * - the thread MUST call kevent_qos_internal after being bound
5447 * or the bucket of events may never be delivered.
5449 * (unless this is a synchronous bind, then the request is locked)
5452 kqworkq_internal_bind(
5454 kq_index_t qos_index
,
5459 struct kqworkq
*kqwq
;
5460 struct kqrequest
*kqr
;
5461 struct uthread
*ut
= get_bsdthread_info(thread
);
5463 /* If no process workq, can't be our thread. */
5464 kq
= p
->p_fd
->fd_wqkqueue
;
5469 assert(kq
->kq_state
& KQ_WORKQ
);
5470 kqwq
= (struct kqworkq
*)kq
;
5473 * No need to bind the manager thread to any specific
5474 * bucket, but still claim the thread.
5476 if (qos_index
== KQWQ_QOS_MANAGER
) {
5477 assert(ut
->uu_kqueue_bound
== NULL
);
5478 assert(flags
& KEVENT_FLAG_WORKQ_MANAGER
);
5479 ut
->uu_kqueue_bound
= kq
;
5480 ut
->uu_kqueue_qos_index
= qos_index
;
5481 ut
->uu_kqueue_flags
= flags
;
5483 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_BIND
),
5484 thread_tid(thread
), flags
, qos_index
);
5490 * If this is a synchronous bind callback, the request
5491 * lock is already held, so just do the bind.
5493 if (flags
& KEVENT_FLAG_SYNCHRONOUS_BIND
) {
5494 kqwq_req_held(kqwq
);
5495 /* strip out synchronout bind flag */
5496 flags
&= ~KEVENT_FLAG_SYNCHRONOUS_BIND
;
5497 kqworkq_bind_thread_impl(kqwq
, qos_index
, thread
, flags
);
5502 * check the request that corresponds to our qos_index
5503 * to see if there is an outstanding request.
5505 kqr
= kqworkq_get_request(kqwq
, qos_index
);
5506 assert(kqr
->kqr_qos_index
== qos_index
);
5507 kqwq_req_lock(kqwq
);
5509 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_BIND
),
5510 thread_tid(thread
), flags
, qos_index
, kqr
->kqr_state
);
5512 if ((kqr
->kqr_state
& KQR_THREQUESTED
) &&
5513 (kqr
->kqr_state
& KQR_PROCESSING
) == 0) {
5515 if ((kqr
->kqr_state
& KQR_BOUND
) &&
5516 thread
== kqr
->kqr_thread
) {
5517 /* duplicate bind - claim the thread */
5518 assert(ut
->uu_kqueue_bound
== kq
);
5519 assert(ut
->uu_kqueue_qos_index
== qos_index
);
5520 kqwq_req_unlock(kqwq
);
5523 if ((kqr
->kqr_state
& (KQR_BOUND
| KQWQ_THMANAGER
)) == 0) {
5524 /* ours to bind to */
5525 kqworkq_bind_thread_impl(kqwq
, qos_index
, thread
, flags
);
5526 kqwq_req_unlock(kqwq
);
5530 kqwq_req_unlock(kqwq
);
5535 kqworkloop_bind_thread_impl(struct kqworkloop
*kqwl
,
5537 __assert_only
unsigned int flags
)
5539 assert(flags
& KEVENT_FLAG_WORKLOOP
);
5541 /* the request object must be locked */
5542 kqwl_req_held(kqwl
);
5544 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
5545 struct uthread
*ut
= get_bsdthread_info(thread
);
5546 boolean_t ipc_override_is_sync
;
5547 kq_index_t qos_index
= kqworkloop_combined_qos(kqwl
, &ipc_override_is_sync
);
5549 /* nobody else bound so finally bind (as a workloop) */
5550 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
5551 assert((kqr
->kqr_state
& (KQR_BOUND
| KQR_PROCESSING
)) == 0);
5552 assert(thread
!= kqwl
->kqwl_owner
);
5554 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_BIND
),
5555 kqwl
->kqwl_dynamicid
, (uintptr_t)thread_tid(thread
),
5557 (uintptr_t)(((uintptr_t)kqr
->kqr_override_index
<< 16) |
5558 (((uintptr_t)kqr
->kqr_state
) << 8) |
5559 ((uintptr_t)ipc_override_is_sync
)));
5561 kqr
->kqr_state
|= KQR_BOUND
| KQR_R2K_NOTIF_ARMED
;
5562 kqr
->kqr_thread
= thread
;
5564 /* bind the workloop to the uthread */
5565 ut
->uu_kqueue_bound
= (struct kqueue
*)kqwl
;
5566 ut
->uu_kqueue_flags
= flags
;
5567 ut
->uu_kqueue_qos_index
= qos_index
;
5568 assert(ut
->uu_kqueue_override_is_sync
== 0);
5569 ut
->uu_kqueue_override_is_sync
= ipc_override_is_sync
;
5571 thread_add_ipc_override(thread
, qos_index
);
5573 if (ipc_override_is_sync
) {
5574 thread_add_sync_ipc_override(thread
);
5579 * workloop_fulfill_threadreq - bind thread to processing workloop
5581 * The provided thread will be responsible for delivering events
5582 * associated with the given kqrequest. Bind it and get ready for
5583 * the thread to eventually arrive.
5585 * If WORKLOOP_FULFILL_THREADREQ_SYNC is specified, the callback
5586 * within the context of the pthread_functions->workq_threadreq
5587 * callout. In this case, the request structure is already locked.
5590 workloop_fulfill_threadreq(struct proc
*p
,
5591 workq_threadreq_t req
,
5595 int sync
= (flags
& WORKLOOP_FULFILL_THREADREQ_SYNC
);
5596 int cancel
= (flags
& WORKLOOP_FULFILL_THREADREQ_CANCEL
);
5597 struct kqrequest
*kqr
;
5598 struct kqworkloop
*kqwl
;
5600 kqwl
= (struct kqworkloop
*)((uintptr_t)req
-
5601 offsetof(struct kqworkloop
, kqwl_request
) -
5602 offsetof(struct kqrequest
, kqr_req
));
5603 kqr
= &kqwl
->kqwl_request
;
5605 /* validate we're looking at something valid */
5606 if (kqwl
->kqwl_p
!= p
||
5607 (kqwl
->kqwl_state
& KQ_WORKLOOP
) == 0) {
5608 assert(kqwl
->kqwl_p
== p
);
5609 assert(kqwl
->kqwl_state
& KQ_WORKLOOP
);
5614 kqwl_req_lock(kqwl
);
5616 /* Should be a pending request */
5617 if ((kqr
->kqr_state
& KQR_BOUND
) ||
5618 (kqr
->kqr_state
& KQR_THREQUESTED
) == 0) {
5620 assert((kqr
->kqr_state
& KQR_BOUND
) == 0);
5621 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
5623 kqwl_req_unlock(kqwl
);
5627 assert((kqr
->kqr_state
& KQR_DRAIN
) == 0);
5630 * Is it a cancel indication from pthread.
5631 * If so, we must be exiting/exec'ing. Forget
5632 * our pending request.
5635 kqr
->kqr_state
&= ~KQR_THREQUESTED
;
5636 kqr
->kqr_state
|= KQR_DRAIN
;
5638 /* do the actual bind? */
5639 kqworkloop_bind_thread_impl(kqwl
, thread
, KEVENT_FLAG_WORKLOOP
);
5643 kqwl_req_unlock(kqwl
);
5646 kqueue_release_last(p
, &kqwl
->kqwl_kqueue
); /* may dealloc kq */
5653 * kevent_qos_internal_bind - bind thread to processing kqueue
5655 * Indicates that the provided thread will be responsible for
5656 * servicing the particular QoS class index specified in the
5657 * parameters. Once the binding is done, any overrides that may
5658 * be associated with the cooresponding events can be applied.
5660 * This should be called as soon as the thread identity is known,
5661 * preferably while still at high priority during creation.
5663 * - caller holds a reference on the kqueue.
5664 * - the thread MUST call kevent_qos_internal after being bound
5665 * or the bucket of events may never be delivered.
5666 * - Nothing locked (may take mutex or block).
5670 kevent_qos_internal_bind(
5676 kq_index_t qos_index
;
5678 assert(flags
& KEVENT_FLAG_WORKQ
);
5680 if (thread
== THREAD_NULL
|| (flags
& KEVENT_FLAG_WORKQ
) == 0) {
5684 /* get the qos index we're going to service */
5685 qos_index
= qos_index_for_servicer(qos_class
, thread
, flags
);
5687 if (kqworkq_internal_bind(p
, qos_index
, thread
, flags
))
5695 kqworkloop_internal_unbind(
5701 struct kqworkloop
*kqwl
;
5702 struct uthread
*ut
= get_bsdthread_info(thread
);
5704 assert(ut
->uu_kqueue_bound
!= NULL
);
5705 kq
= ut
->uu_kqueue_bound
;
5706 assert(kq
->kq_state
& KQ_WORKLOOP
);
5707 kqwl
= (struct kqworkloop
*)kq
;
5709 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_UNBIND
),
5710 kqwl
->kqwl_dynamicid
, (uintptr_t)thread_tid(thread
),
5713 if (!(kq
->kq_state
& KQ_NO_WQ_THREAD
)) {
5714 assert(is_workqueue_thread(thread
));
5717 kqworkloop_unbind_thread(kqwl
, thread
, flags
);
5720 /* If last reference, dealloc the workloop kq */
5721 kqueue_release_last(p
, kq
);
5723 assert(!is_workqueue_thread(thread
));
5724 kevent_servicer_detach_thread(p
, kqwl
->kqwl_dynamicid
, thread
, flags
, kq
);
5729 kqworkq_internal_unbind(
5731 kq_index_t qos_index
,
5736 struct kqworkq
*kqwq
;
5738 kq_index_t end_index
;
5740 assert(thread
== current_thread());
5741 ut
= get_bsdthread_info(thread
);
5743 kq
= p
->p_fd
->fd_wqkqueue
;
5744 assert(kq
->kq_state
& KQ_WORKQ
);
5745 assert(ut
->uu_kqueue_bound
== kq
);
5747 kqwq
= (struct kqworkq
*)kq
;
5749 /* end servicing any requests we might own */
5750 end_index
= (qos_index
== KQWQ_QOS_MANAGER
) ?
5754 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_UNBIND
),
5755 (uintptr_t)thread_tid(thread
), flags
, qos_index
);
5758 kqworkq_end_processing(kqwq
, qos_index
, flags
);
5759 } while (qos_index
-- > end_index
);
5761 ut
->uu_kqueue_bound
= NULL
;
5762 ut
->uu_kqueue_qos_index
= 0;
5763 ut
->uu_kqueue_flags
= 0;
5769 * kevent_qos_internal_unbind - unbind thread from processing kqueue
5771 * End processing the per-QoS bucket of events and allow other threads
5772 * to be requested for future servicing.
5774 * caller holds a reference on the kqueue.
5775 * thread is the current thread.
5779 kevent_qos_internal_unbind(
5785 #pragma unused(qos_class)
5789 unsigned int bound_flags
;
5792 ut
= get_bsdthread_info(thread
);
5793 if (ut
->uu_kqueue_bound
== NULL
) {
5794 /* early out if we are already unbound */
5795 assert(ut
->uu_kqueue_flags
== 0);
5796 assert(ut
->uu_kqueue_qos_index
== 0);
5797 assert(ut
->uu_kqueue_override_is_sync
== 0);
5801 assert(flags
& (KEVENT_FLAG_WORKQ
| KEVENT_FLAG_WORKLOOP
));
5802 assert(thread
== current_thread());
5804 check_flags
= flags
& KEVENT_FLAG_UNBIND_CHECK_FLAGS
;
5806 /* Get the kqueue we started with */
5807 kq
= ut
->uu_kqueue_bound
;
5809 assert(kq
->kq_state
& (KQ_WORKQ
| KQ_WORKLOOP
));
5811 /* get flags and QoS parameters we started with */
5812 bound_flags
= ut
->uu_kqueue_flags
;
5814 /* Unbind from the class of workq */
5815 if (kq
->kq_state
& KQ_WORKQ
) {
5816 if (check_flags
&& !(flags
& KEVENT_FLAG_WORKQ
)) {
5820 kqworkq_internal_unbind(p
, ut
->uu_kqueue_qos_index
, thread
, bound_flags
);
5822 if (check_flags
&& !(flags
& KEVENT_FLAG_WORKLOOP
)) {
5826 kqworkloop_internal_unbind(p
, thread
, bound_flags
);
5833 * kqueue_process - process the triggered events in a kqueue
5835 * Walk the queued knotes and validate that they are
5836 * really still triggered events by calling the filter
5837 * routines (if necessary). Hold a use reference on
5838 * the knote to avoid it being detached. For each event
5839 * that is still considered triggered, invoke the
5840 * callback routine provided.
5842 * caller holds a reference on the kqueue.
5843 * kqueue locked on entry and exit - but may be dropped
5844 * kqueue list locked (held for duration of call)
5848 kqueue_process(struct kqueue
*kq
,
5849 kevent_callback_t callback
,
5850 void *callback_data
,
5851 struct filt_process_s
*process_data
,
5855 unsigned int flags
= process_data
? process_data
->fp_flags
: 0;
5856 struct uthread
*ut
= get_bsdthread_info(current_thread());
5857 kq_index_t start_index
, end_index
, i
;
5863 * Based on the mode of the kqueue and the bound QoS of the servicer,
5864 * determine the range of thread requests that need checking
5866 if (kq
->kq_state
& KQ_WORKQ
) {
5867 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
) {
5868 start_index
= KQWQ_QOS_MANAGER
;
5869 } else if (ut
->uu_kqueue_bound
!= kq
) {
5872 start_index
= ut
->uu_kqueue_qos_index
;
5875 /* manager services every request in a workq kqueue */
5876 assert(start_index
> 0 && start_index
<= KQWQ_QOS_MANAGER
);
5877 end_index
= (start_index
== KQWQ_QOS_MANAGER
) ? 0 : start_index
;
5879 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
5880 if (ut
->uu_kqueue_bound
!= kq
)
5884 * Single request servicing
5885 * we want to deliver all events, regardless of the QOS
5887 start_index
= end_index
= THREAD_QOS_UNSPECIFIED
;
5889 start_index
= end_index
= QOS_INDEX_KQFILE
;
5895 if (kqueue_begin_processing(kq
, i
, flags
) == -1) {
5897 /* Nothing to process */
5902 * loop through the enqueued knotes associated with this request,
5903 * processing each one. Each request may have several queues
5904 * of knotes to process (depending on the type of kqueue) so we
5905 * have to loop through all the queues as long as we have additional
5910 struct kqtailq
*base_queue
= kqueue_get_base_queue(kq
, i
);
5911 struct kqtailq
*queue
= kqueue_get_high_queue(kq
, i
);
5913 while (error
== 0 && (kn
= TAILQ_FIRST(queue
)) != NULL
) {
5914 error
= knote_process(kn
, callback
, callback_data
, process_data
, p
);
5915 if (error
== EJUSTRETURN
) {
5920 /* error is EWOULDBLOCK when the out event array is full */
5922 } while (error
== 0 && queue
-- > base_queue
);
5924 if ((kq
->kq_state
& KQ_WORKQ
) == 0) {
5925 kqueue_end_processing(kq
, i
, nevents
, flags
);
5928 if (error
== EWOULDBLOCK
) {
5929 /* break out if no more space for additional events */
5933 } while (i
-- > end_index
);
5940 kqueue_scan_continue(void *data
, wait_result_t wait_result
)
5942 thread_t self
= current_thread();
5943 uthread_t ut
= (uthread_t
)get_bsdthread_info(self
);
5944 struct _kqueue_scan
* cont_args
= &ut
->uu_kevent
.ss_kqueue_scan
;
5945 struct kqueue
*kq
= (struct kqueue
*)data
;
5946 struct filt_process_s
*process_data
= cont_args
->process_data
;
5950 /* convert the (previous) wait_result to a proper error */
5951 switch (wait_result
) {
5952 case THREAD_AWAKENED
: {
5955 error
= kqueue_process(kq
, cont_args
->call
, cont_args
->data
,
5956 process_data
, &count
, current_proc());
5957 if (error
== 0 && count
== 0) {
5958 if (kq
->kq_state
& KQ_DRAIN
) {
5963 if (kq
->kq_state
& KQ_WAKEUP
)
5966 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
5967 KQ_EVENT
, THREAD_ABORTSAFE
,
5968 cont_args
->deadline
);
5969 kq
->kq_state
|= KQ_SLEEP
;
5971 thread_block_parameter(kqueue_scan_continue
, kq
);
5976 case THREAD_TIMED_OUT
:
5977 error
= EWOULDBLOCK
;
5979 case THREAD_INTERRUPTED
:
5982 case THREAD_RESTART
:
5987 panic("%s: - invalid wait_result (%d)", __func__
,
5992 /* call the continuation with the results */
5993 assert(cont_args
->cont
!= NULL
);
5994 (cont_args
->cont
)(kq
, cont_args
->data
, error
);
5999 * kqueue_scan - scan and wait for events in a kqueue
6001 * Process the triggered events in a kqueue.
6003 * If there are no events triggered arrange to
6004 * wait for them. If the caller provided a
6005 * continuation routine, then kevent_scan will
6008 * The callback routine must be valid.
6009 * The caller must hold a use-count reference on the kq.
6013 kqueue_scan(struct kqueue
*kq
,
6014 kevent_callback_t callback
,
6015 kqueue_continue_t continuation
,
6016 void *callback_data
,
6017 struct filt_process_s
*process_data
,
6018 struct timeval
*atvp
,
6021 thread_continue_t cont
= THREAD_CONTINUE_NULL
;
6028 assert(callback
!= NULL
);
6031 * Determine which QoS index we are servicing
6033 flags
= (process_data
) ? process_data
->fp_flags
: 0;
6034 fd
= (process_data
) ? process_data
->fp_fd
: -1;
6038 wait_result_t wait_result
;
6042 * Make a pass through the kq to find events already
6046 error
= kqueue_process(kq
, callback
, callback_data
,
6047 process_data
, &count
, p
);
6049 break; /* lock still held */
6051 /* looks like we have to consider blocking */
6054 /* convert the timeout to a deadline once */
6055 if (atvp
->tv_sec
|| atvp
->tv_usec
) {
6058 clock_get_uptime(&now
);
6059 nanoseconds_to_absolutetime((uint64_t)atvp
->tv_sec
* NSEC_PER_SEC
+
6060 atvp
->tv_usec
* (long)NSEC_PER_USEC
,
6062 if (now
>= deadline
) {
6063 /* non-blocking call */
6064 error
= EWOULDBLOCK
;
6065 break; /* lock still held */
6068 clock_absolutetime_interval_to_deadline(deadline
, &deadline
);
6070 deadline
= 0; /* block forever */
6074 uthread_t ut
= (uthread_t
)get_bsdthread_info(current_thread());
6075 struct _kqueue_scan
*cont_args
= &ut
->uu_kevent
.ss_kqueue_scan
;
6077 cont_args
->call
= callback
;
6078 cont_args
->cont
= continuation
;
6079 cont_args
->deadline
= deadline
;
6080 cont_args
->data
= callback_data
;
6081 cont_args
->process_data
= process_data
;
6082 cont
= kqueue_scan_continue
;
6086 if (kq
->kq_state
& KQ_DRAIN
) {
6091 /* If awakened during processing, try again */
6092 if (kq
->kq_state
& KQ_WAKEUP
) {
6097 /* go ahead and wait */
6098 waitq_assert_wait64_leeway((struct waitq
*)&kq
->kq_wqs
,
6099 KQ_EVENT
, THREAD_ABORTSAFE
,
6100 TIMEOUT_URGENCY_USER_NORMAL
,
6101 deadline
, TIMEOUT_NO_LEEWAY
);
6102 kq
->kq_state
|= KQ_SLEEP
;
6104 wait_result
= thread_block_parameter(cont
, kq
);
6105 /* NOTREACHED if (continuation != NULL) */
6107 switch (wait_result
) {
6108 case THREAD_AWAKENED
:
6110 case THREAD_TIMED_OUT
:
6112 case THREAD_INTERRUPTED
:
6114 case THREAD_RESTART
:
6117 panic("%s: - bad wait_result (%d)", __func__
,
6129 * This could be expanded to call kqueue_scan, if desired.
6133 kqueue_read(__unused
struct fileproc
*fp
,
6134 __unused
struct uio
*uio
,
6136 __unused vfs_context_t ctx
)
6143 kqueue_write(__unused
struct fileproc
*fp
,
6144 __unused
struct uio
*uio
,
6146 __unused vfs_context_t ctx
)
6153 kqueue_ioctl(__unused
struct fileproc
*fp
,
6154 __unused u_long com
,
6155 __unused caddr_t data
,
6156 __unused vfs_context_t ctx
)
6163 kqueue_select(struct fileproc
*fp
, int which
, void *wq_link_id
,
6164 __unused vfs_context_t ctx
)
6166 struct kqueue
*kq
= (struct kqueue
*)fp
->f_data
;
6167 struct kqtailq
*queue
;
6168 struct kqtailq
*suppressq
;
6177 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
6180 * If this is the first pass, link the wait queue associated with the
6181 * the kqueue onto the wait queue set for the select(). Normally we
6182 * use selrecord() for this, but it uses the wait queue within the
6183 * selinfo structure and we need to use the main one for the kqueue to
6184 * catch events from KN_STAYQUEUED sources. So we do the linkage manually.
6185 * (The select() call will unlink them when it ends).
6187 if (wq_link_id
!= NULL
) {
6188 thread_t cur_act
= current_thread();
6189 struct uthread
* ut
= get_bsdthread_info(cur_act
);
6191 kq
->kq_state
|= KQ_SEL
;
6192 waitq_link((struct waitq
*)&kq
->kq_wqs
, ut
->uu_wqset
,
6193 WAITQ_SHOULD_LOCK
, (uint64_t *)wq_link_id
);
6195 /* always consume the reserved link object */
6196 waitq_link_release(*(uint64_t *)wq_link_id
);
6197 *(uint64_t *)wq_link_id
= 0;
6200 * selprocess() is expecting that we send it back the waitq
6201 * that was just added to the thread's waitq set. In order
6202 * to not change the selrecord() API (which is exported to
6203 * kexts), we pass this value back through the
6204 * void *wq_link_id pointer we were passed. We need to use
6205 * memcpy here because the pointer may not be properly aligned
6206 * on 32-bit systems.
6208 void *wqptr
= &kq
->kq_wqs
;
6209 memcpy(wq_link_id
, (void *)&wqptr
, sizeof(void *));
6212 if (kqueue_begin_processing(kq
, QOS_INDEX_KQFILE
, 0) == -1) {
6217 queue
= kqueue_get_base_queue(kq
, QOS_INDEX_KQFILE
);
6218 if (!TAILQ_EMPTY(queue
)) {
6220 * there is something queued - but it might be a
6221 * KN_STAYACTIVE knote, which may or may not have
6222 * any events pending. Otherwise, we have to walk
6223 * the list of knotes to see, and peek at the
6224 * (non-vanished) stay-active ones to be really sure.
6226 while ((kn
= (struct knote
*)TAILQ_FIRST(queue
)) != NULL
) {
6227 if (kn
->kn_status
& KN_ACTIVE
) {
6231 assert(kn
->kn_status
& KN_STAYACTIVE
);
6236 * There were no regular events on the queue, so take
6237 * a deeper look at the stay-queued ones we suppressed.
6239 suppressq
= kqueue_get_suppressed_queue(kq
, QOS_INDEX_KQFILE
);
6240 while ((kn
= (struct knote
*)TAILQ_FIRST(suppressq
)) != NULL
) {
6243 assert(!knoteuse_needs_boost(kn
, NULL
));
6245 /* If didn't vanish while suppressed - peek at it */
6246 if (kqlock2knoteuse(kq
, kn
, KNUSE_NONE
)) {
6247 peek
= knote_fops(kn
)->f_peek(kn
);
6249 /* if it dropped while getting lock - move on */
6250 if (!knoteuse2kqlock(kq
, kn
, KNUSE_NONE
))
6255 knote_unsuppress(kn
);
6257 /* has data or it has to report a vanish */
6266 kqueue_end_processing(kq
, QOS_INDEX_KQFILE
, retnum
, 0);
6276 kqueue_close(struct fileglob
*fg
, __unused vfs_context_t ctx
)
6278 struct kqfile
*kqf
= (struct kqfile
*)fg
->fg_data
;
6280 assert((kqf
->kqf_state
& KQ_WORKQ
) == 0);
6281 kqueue_dealloc(&kqf
->kqf_kqueue
);
6288 * The callers has taken a use-count reference on this kqueue and will donate it
6289 * to the kqueue we are being added to. This keeps the kqueue from closing until
6290 * that relationship is torn down.
6293 kqueue_kqfilter(__unused
struct fileproc
*fp
, struct knote
*kn
,
6294 __unused
struct kevent_internal_s
*kev
, __unused vfs_context_t ctx
)
6296 struct kqfile
*kqf
= (struct kqfile
*)kn
->kn_fp
->f_data
;
6297 struct kqueue
*kq
= &kqf
->kqf_kqueue
;
6298 struct kqueue
*parentkq
= knote_get_kq(kn
);
6300 assert((kqf
->kqf_state
& KQ_WORKQ
) == 0);
6302 if (parentkq
== kq
||
6303 kn
->kn_filter
!= EVFILT_READ
) {
6304 kn
->kn_flags
= EV_ERROR
;
6305 kn
->kn_data
= EINVAL
;
6310 * We have to avoid creating a cycle when nesting kqueues
6311 * inside another. Rather than trying to walk the whole
6312 * potential DAG of nested kqueues, we just use a simple
6313 * ceiling protocol. When a kqueue is inserted into another,
6314 * we check that the (future) parent is not already nested
6315 * into another kqueue at a lower level than the potenial
6316 * child (because it could indicate a cycle). If that test
6317 * passes, we just mark the nesting levels accordingly.
6321 if (parentkq
->kq_level
> 0 &&
6322 parentkq
->kq_level
< kq
->kq_level
)
6325 kn
->kn_flags
= EV_ERROR
;
6326 kn
->kn_data
= EINVAL
;
6329 /* set parent level appropriately */
6330 if (parentkq
->kq_level
== 0)
6331 parentkq
->kq_level
= 2;
6332 if (parentkq
->kq_level
< kq
->kq_level
+ 1)
6333 parentkq
->kq_level
= kq
->kq_level
+ 1;
6336 kn
->kn_filtid
= EVFILTID_KQREAD
;
6338 KNOTE_ATTACH(&kqf
->kqf_sel
.si_note
, kn
);
6339 /* indicate nesting in child, if needed */
6340 if (kq
->kq_level
== 0)
6343 int count
= kq
->kq_count
;
6350 * kqueue_drain - called when kq is closed
6354 kqueue_drain(struct fileproc
*fp
, __unused vfs_context_t ctx
)
6356 struct kqueue
*kq
= (struct kqueue
*)fp
->f_fglob
->fg_data
;
6358 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
6361 kq
->kq_state
|= KQ_DRAIN
;
6362 kqueue_interrupt(kq
);
6369 kqueue_stat(struct kqueue
*kq
, void *ub
, int isstat64
, proc_t p
)
6371 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
6374 if (isstat64
!= 0) {
6375 struct stat64
*sb64
= (struct stat64
*)ub
;
6377 bzero((void *)sb64
, sizeof(*sb64
));
6378 sb64
->st_size
= kq
->kq_count
;
6379 if (kq
->kq_state
& KQ_KEV_QOS
)
6380 sb64
->st_blksize
= sizeof(struct kevent_qos_s
);
6381 else if (kq
->kq_state
& KQ_KEV64
)
6382 sb64
->st_blksize
= sizeof(struct kevent64_s
);
6383 else if (IS_64BIT_PROCESS(p
))
6384 sb64
->st_blksize
= sizeof(struct user64_kevent
);
6386 sb64
->st_blksize
= sizeof(struct user32_kevent
);
6387 sb64
->st_mode
= S_IFIFO
;
6389 struct stat
*sb
= (struct stat
*)ub
;
6391 bzero((void *)sb
, sizeof(*sb
));
6392 sb
->st_size
= kq
->kq_count
;
6393 if (kq
->kq_state
& KQ_KEV_QOS
)
6394 sb
->st_blksize
= sizeof(struct kevent_qos_s
);
6395 else if (kq
->kq_state
& KQ_KEV64
)
6396 sb
->st_blksize
= sizeof(struct kevent64_s
);
6397 else if (IS_64BIT_PROCESS(p
))
6398 sb
->st_blksize
= sizeof(struct user64_kevent
);
6400 sb
->st_blksize
= sizeof(struct user32_kevent
);
6401 sb
->st_mode
= S_IFIFO
;
6408 * Interact with the pthread kext to request a servicing there.
6409 * Eventually, this will request threads at specific QoS levels.
6410 * For now, it only requests a dispatch-manager-QoS thread, and
6411 * only one-at-a-time.
6413 * - Caller holds the workq request lock
6415 * - May be called with the kqueue's wait queue set locked,
6416 * so cannot do anything that could recurse on that.
6419 kqworkq_request_thread(
6420 struct kqworkq
*kqwq
,
6421 kq_index_t qos_index
)
6423 struct kqrequest
*kqr
;
6425 assert(kqwq
->kqwq_state
& KQ_WORKQ
);
6426 assert(qos_index
< KQWQ_NQOS
);
6428 kqr
= kqworkq_get_request(kqwq
, qos_index
);
6430 assert(kqr
->kqr_state
& KQR_WAKEUP
);
6433 * If we have already requested a thread, and it hasn't
6434 * started processing yet, there's no use hammering away
6435 * on the pthread kext.
6437 if (kqr
->kqr_state
& KQR_THREQUESTED
)
6440 assert((kqr
->kqr_state
& KQR_BOUND
) == 0);
6442 /* request additional workq threads if appropriate */
6443 if (pthread_functions
!= NULL
&&
6444 pthread_functions
->workq_reqthreads
!= NULL
) {
6445 unsigned int flags
= KEVENT_FLAG_WORKQ
;
6446 unsigned long priority
;
6449 /* Compute the appropriate pthread priority */
6450 priority
= qos_from_qos_index(qos_index
);
6453 /* JMM - for now remain compatible with old invocations */
6454 /* set the over-commit flag on the request if needed */
6455 if (kqr
->kqr_state
& KQR_THOVERCOMMIT
)
6456 priority
|= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
;
6459 /* Compute a priority based on qos_index. */
6460 struct workq_reqthreads_req_s request
= {
6461 .priority
= priority
,
6465 /* mark that we are making a request */
6466 kqr
->kqr_state
|= KQR_THREQUESTED
;
6467 if (qos_index
== KQWQ_QOS_MANAGER
)
6468 kqr
->kqr_state
|= KQWQ_THMANAGER
;
6470 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_THREQUEST
),
6472 (((uintptr_t)kqr
->kqr_override_index
<< 8) |
6473 (uintptr_t)kqr
->kqr_state
));
6474 wqthread
= (*pthread_functions
->workq_reqthreads
)(kqwq
->kqwq_p
, 1, &request
);
6476 /* We've been switched to the emergency/manager thread */
6477 if (wqthread
== (thread_t
)-1) {
6478 assert(qos_index
!= KQWQ_QOS_MANAGER
);
6479 kqr
->kqr_state
|= KQWQ_THMANAGER
;
6484 * bind the returned thread identity
6485 * This goes away when we switch to synchronous callback
6486 * binding from the pthread kext.
6488 if (wqthread
!= NULL
) {
6489 kqworkq_bind_thread_impl(kqwq
, qos_index
, wqthread
, flags
);
6495 * If we aren't already busy processing events [for this QoS],
6496 * request workq thread support as appropriate.
6498 * TBD - for now, we don't segregate out processing by QoS.
6500 * - May be called with the kqueue's wait queue set locked,
6501 * so cannot do anything that could recurse on that.
6504 kqworkq_request_help(
6505 struct kqworkq
*kqwq
,
6506 kq_index_t qos_index
)
6508 struct kqrequest
*kqr
;
6510 /* convert to thread qos value */
6511 assert(qos_index
< KQWQ_NQOS
);
6513 kqwq_req_lock(kqwq
);
6514 kqr
= kqworkq_get_request(kqwq
, qos_index
);
6516 if ((kqr
->kqr_state
& KQR_WAKEUP
) == 0) {
6517 /* Indicate that we needed help from this request */
6518 kqr
->kqr_state
|= KQR_WAKEUP
;
6520 /* Go assure a thread request has been made */
6521 kqworkq_request_thread(kqwq
, qos_index
);
6523 kqwq_req_unlock(kqwq
);
6527 kqworkloop_threadreq_impl(struct kqworkloop
*kqwl
, kq_index_t qos_index
)
6529 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6530 unsigned long pri
= pthread_priority_for_kqrequest(kqr
, qos_index
);
6533 assert((kqr
->kqr_state
& (KQR_THREQUESTED
| KQR_BOUND
)) == KQR_THREQUESTED
);
6536 * New-style thread request supported. Provide
6537 * the pthread kext a pointer to a workq_threadreq_s
6538 * structure for its use until a corresponding
6539 * workloop_fulfill_threqreq callback.
6541 if (current_proc() == kqwl
->kqwl_kqueue
.kq_p
) {
6542 op
= WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
;
6544 op
= WORKQ_THREADREQ_WORKLOOP
;
6547 ret
= (*pthread_functions
->workq_threadreq
)(kqwl
->kqwl_p
, &kqr
->kqr_req
,
6548 WORKQ_THREADREQ_WORKLOOP
, pri
, 0);
6551 assert(op
== WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
);
6552 op
= WORKQ_THREADREQ_WORKLOOP
;
6558 * Process is shutting down or exec'ing.
6559 * All the kqueues are going to be cleaned up
6560 * soon. Forget we even asked for a thread -
6561 * and make sure we don't ask for more.
6563 kqueue_release((struct kqueue
*)kqwl
, KQUEUE_CANT_BE_LAST_REF
);
6564 kqr
->kqr_state
&= ~KQR_THREQUESTED
;
6565 kqr
->kqr_state
|= KQR_DRAIN
;
6569 assert(op
== WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
);
6570 act_set_astkevent(current_thread(), AST_KEVENT_REDRIVE_THREADREQ
);
6579 kqworkloop_threadreq_modify(struct kqworkloop
*kqwl
, kq_index_t qos_index
)
6581 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6582 unsigned long pri
= pthread_priority_for_kqrequest(kqr
, qos_index
);
6583 int ret
, op
= WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
;
6585 assert((kqr
->kqr_state
& (KQR_THREQUESTED
| KQR_BOUND
)) == KQR_THREQUESTED
);
6587 if (current_proc() == kqwl
->kqwl_kqueue
.kq_p
) {
6588 op
= WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
;
6590 op
= WORKQ_THREADREQ_CHANGE_PRI
;
6593 ret
= (*pthread_functions
->workq_threadreq_modify
)(kqwl
->kqwl_p
,
6594 &kqr
->kqr_req
, op
, pri
, 0);
6597 assert(op
== WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
);
6598 op
= WORKQ_THREADREQ_CHANGE_PRI
;
6602 assert(op
== WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
);
6603 act_set_astkevent(current_thread(), AST_KEVENT_REDRIVE_THREADREQ
);
6617 * Interact with the pthread kext to request a servicing thread.
6618 * This will request a single thread at the highest QoS level
6619 * for which there is work (whether that was the requested QoS
6620 * for an event or an override applied to a lower-QoS request).
6622 * - Caller holds the workloop request lock
6624 * - May be called with the kqueue's wait queue set locked,
6625 * so cannot do anything that could recurse on that.
6628 kqworkloop_request_thread(struct kqworkloop
*kqwl
, kq_index_t qos_index
)
6630 struct kqrequest
*kqr
;
6632 assert(kqwl
->kqwl_state
& KQ_WORKLOOP
);
6634 kqr
= &kqwl
->kqwl_request
;
6636 assert(kqwl
->kqwl_owner
== THREAD_NULL
);
6637 assert((kqr
->kqr_state
& KQR_BOUND
) == 0);
6638 assert((kqr
->kqr_state
& KQR_THREQUESTED
) == 0);
6639 assert(!(kqwl
->kqwl_kqueue
.kq_state
& KQ_NO_WQ_THREAD
));
6641 /* If we're draining thread requests, just bail */
6642 if (kqr
->kqr_state
& KQR_DRAIN
)
6645 if (pthread_functions
!= NULL
&&
6646 pthread_functions
->workq_threadreq
!= NULL
) {
6648 * set request state flags, etc... before calling pthread
6649 * This assures they are set before a possible synchronous
6650 * callback to workloop_fulfill_threadreq().
6652 kqr
->kqr_state
|= KQR_THREQUESTED
;
6654 /* Add a thread request reference on the kqueue. */
6655 kqueue_retain((struct kqueue
*)kqwl
);
6657 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THREQUEST
),
6658 kqwl
->kqwl_dynamicid
,
6659 0, qos_index
, kqr
->kqr_state
);
6660 kqworkloop_threadreq_impl(kqwl
, qos_index
);
6662 panic("kqworkloop_request_thread");
6668 kqworkloop_update_sync_override_state(struct kqworkloop
*kqwl
, boolean_t sync_ipc_override
)
6670 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6671 kqwl_req_lock(kqwl
);
6672 kqr
->kqr_has_sync_override
= sync_ipc_override
;
6673 kqwl_req_unlock(kqwl
);
6677 static inline kq_index_t
6678 kqworkloop_combined_qos(struct kqworkloop
*kqwl
, boolean_t
*ipc_override_is_sync
)
6680 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6681 kq_index_t override
;
6683 *ipc_override_is_sync
= FALSE
;
6684 override
= MAX(MAX(kqr
->kqr_qos_index
, kqr
->kqr_override_index
),
6685 kqr
->kqr_dsync_waiters_qos
);
6687 if (kqr
->kqr_sync_suppress_count
> 0 || kqr
->kqr_has_sync_override
) {
6688 *ipc_override_is_sync
= TRUE
;
6689 override
= THREAD_QOS_USER_INTERACTIVE
;
6695 kqworkloop_request_fire_r2k_notification(struct kqworkloop
*kqwl
)
6697 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6699 kqwl_req_held(kqwl
);
6701 if (kqr
->kqr_state
& KQR_R2K_NOTIF_ARMED
) {
6702 assert(kqr
->kqr_state
& KQR_BOUND
);
6703 assert(kqr
->kqr_thread
);
6705 kqr
->kqr_state
&= ~KQR_R2K_NOTIF_ARMED
;
6706 act_set_astkevent(kqr
->kqr_thread
, AST_KEVENT_RETURN_TO_KERNEL
);
6711 kqworkloop_update_threads_qos(struct kqworkloop
*kqwl
, int op
, kq_index_t qos
)
6713 const uint8_t KQWL_STAYACTIVE_FIRED_BIT
= (1 << 0);
6715 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6716 boolean_t old_ipc_override_is_sync
= FALSE
;
6717 kq_index_t old_qos
= kqworkloop_combined_qos(kqwl
, &old_ipc_override_is_sync
);
6718 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
6719 bool static_thread
= (kq
->kq_state
& KQ_NO_WQ_THREAD
);
6722 /* must hold the kqr lock */
6723 kqwl_req_held(kqwl
);
6726 case KQWL_UTQ_UPDATE_WAKEUP_QOS
:
6727 if (qos
== KQWL_BUCKET_STAYACTIVE
) {
6729 * the KQWL_BUCKET_STAYACTIVE is not a QoS bucket, we only remember
6730 * a high watermark (kqr_stayactive_qos) of any stay active knote
6731 * that was ever registered with this workloop.
6733 * When waitq_set__CALLING_PREPOST_HOOK__() wakes up any stay active
6734 * knote, we use this high-watermark as a wakeup-index, and also set
6735 * the magic KQWL_BUCKET_STAYACTIVE bit to make sure we remember
6736 * there is at least one stay active knote fired until the next full
6737 * processing of this bucket.
6739 kqr
->kqr_wakeup_indexes
|= KQWL_STAYACTIVE_FIRED_BIT
;
6740 qos
= kqr
->kqr_stayactive_qos
;
6742 assert(!static_thread
);
6744 if (kqr
->kqr_wakeup_indexes
& (1 << qos
)) {
6745 assert(kqr
->kqr_state
& KQR_WAKEUP
);
6749 kqr
->kqr_wakeup_indexes
|= (1 << qos
);
6750 kqr
->kqr_state
|= KQR_WAKEUP
;
6751 kqworkloop_request_fire_r2k_notification(kqwl
);
6752 goto recompute_async
;
6754 case KQWL_UTQ_UPDATE_STAYACTIVE_QOS
:
6756 if (kqr
->kqr_stayactive_qos
< qos
) {
6757 kqr
->kqr_stayactive_qos
= qos
;
6758 if (kqr
->kqr_wakeup_indexes
& KQWL_STAYACTIVE_FIRED_BIT
) {
6759 assert(kqr
->kqr_state
& KQR_WAKEUP
);
6760 kqr
->kqr_wakeup_indexes
|= (1 << qos
);
6761 goto recompute_async
;
6766 case KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
:
6767 kqlock_held(kq
); // to look at kq_queues
6768 kqr
->kqr_has_sync_override
= FALSE
;
6769 i
= KQWL_BUCKET_STAYACTIVE
;
6770 if (TAILQ_EMPTY(&kqr
->kqr_suppressed
)) {
6771 kqr
->kqr_override_index
= THREAD_QOS_UNSPECIFIED
;
6773 if (!TAILQ_EMPTY(&kq
->kq_queue
[i
]) &&
6774 (kqr
->kqr_wakeup_indexes
& KQWL_STAYACTIVE_FIRED_BIT
)) {
6776 * If the KQWL_STAYACTIVE_FIRED_BIT is set, it means a stay active
6777 * knote may have fired, so we need to merge in kqr_stayactive_qos.
6779 * Unlike other buckets, this one is never empty but could be idle.
6781 kqr
->kqr_wakeup_indexes
&= KQWL_STAYACTIVE_FIRED_BIT
;
6782 kqr
->kqr_wakeup_indexes
|= (1 << kqr
->kqr_stayactive_qos
);
6784 kqr
->kqr_wakeup_indexes
= 0;
6786 for (i
= THREAD_QOS_UNSPECIFIED
+ 1; i
< KQWL_BUCKET_STAYACTIVE
; i
++) {
6787 if (!TAILQ_EMPTY(&kq
->kq_queue
[i
])) {
6788 kqr
->kqr_wakeup_indexes
|= (1 << i
);
6789 struct knote
*kn
= TAILQ_FIRST(&kqwl
->kqwl_kqueue
.kq_queue
[i
]);
6790 if (i
== THREAD_QOS_USER_INTERACTIVE
&&
6791 kn
->kn_qos_override_is_sync
) {
6792 kqr
->kqr_has_sync_override
= TRUE
;
6796 if (kqr
->kqr_wakeup_indexes
) {
6797 kqr
->kqr_state
|= KQR_WAKEUP
;
6798 kqworkloop_request_fire_r2k_notification(kqwl
);
6800 kqr
->kqr_state
&= ~KQR_WAKEUP
;
6802 assert(qos
== THREAD_QOS_UNSPECIFIED
);
6803 goto recompute_async
;
6805 case KQWL_UTQ_RESET_WAKEUP_OVERRIDE
:
6806 kqr
->kqr_override_index
= THREAD_QOS_UNSPECIFIED
;
6807 assert(qos
== THREAD_QOS_UNSPECIFIED
);
6808 goto recompute_async
;
6810 case KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
:
6813 * When modifying the wakeup QoS or the async override QoS, we always
6814 * need to maintain our invariant that kqr_override_index is at least as
6815 * large as the highest QoS for which an event is fired.
6817 * However this override index can be larger when there is an overriden
6818 * suppressed knote pushing on the kqueue.
6820 if (kqr
->kqr_wakeup_indexes
> (1 << qos
)) {
6821 qos
= fls(kqr
->kqr_wakeup_indexes
) - 1; /* fls is 1-based */
6823 if (kqr
->kqr_override_index
< qos
) {
6824 kqr
->kqr_override_index
= qos
;
6828 case KQWL_UTQ_REDRIVE_EVENTS
:
6831 case KQWL_UTQ_SET_ASYNC_QOS
:
6833 kqr
->kqr_qos_index
= qos
;
6836 case KQWL_UTQ_SET_SYNC_WAITERS_QOS
:
6838 kqr
->kqr_dsync_waiters_qos
= qos
;
6842 panic("unknown kqwl thread qos update operation: %d", op
);
6845 boolean_t new_ipc_override_is_sync
= FALSE
;
6846 kq_index_t new_qos
= kqworkloop_combined_qos(kqwl
, &new_ipc_override_is_sync
);
6847 thread_t kqwl_owner
= kqwl
->kqwl_owner
;
6848 thread_t servicer
= kqr
->kqr_thread
;
6849 __assert_only
int ret
;
6852 * Apply the diffs to the owner if applicable
6854 if (filt_wlowner_is_valid(kqwl_owner
)) {
6856 /* JMM - need new trace hooks for owner overrides */
6857 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST
),
6858 kqwl
->kqwl_dynamicid
,
6859 (kqr
->kqr_state
& KQR_BOUND
) ? thread_tid(kqwl_owner
) : 0,
6860 (kqr
->kqr_qos_index
<< 8) | new_qos
,
6861 (kqr
->kqr_override_index
<< 8) | kqr
->kqr_state
);
6863 if (new_qos
== kqr
->kqr_dsync_owner_qos
) {
6865 } else if (kqr
->kqr_dsync_owner_qos
== THREAD_QOS_UNSPECIFIED
) {
6866 thread_add_ipc_override(kqwl_owner
, new_qos
);
6867 } else if (new_qos
== THREAD_QOS_UNSPECIFIED
) {
6868 thread_drop_ipc_override(kqwl_owner
);
6869 } else /* kqr->kqr_dsync_owner_qos != new_qos */ {
6870 thread_update_ipc_override(kqwl_owner
, new_qos
);
6872 kqr
->kqr_dsync_owner_qos
= new_qos
;
6874 if (new_ipc_override_is_sync
&&
6875 !kqr
->kqr_owner_override_is_sync
) {
6876 thread_add_sync_ipc_override(kqwl_owner
);
6877 } else if (!new_ipc_override_is_sync
&&
6878 kqr
->kqr_owner_override_is_sync
) {
6879 thread_drop_sync_ipc_override(kqwl_owner
);
6881 kqr
->kqr_owner_override_is_sync
= new_ipc_override_is_sync
;
6885 * apply the diffs to the servicer
6887 if (static_thread
) {
6889 * Statically bound thread
6891 * These threads don't participates in QoS overrides today, just wakeup
6892 * the thread blocked on this kqueue if a new event arrived.
6896 case KQWL_UTQ_UPDATE_WAKEUP_QOS
:
6897 case KQWL_UTQ_UPDATE_STAYACTIVE_QOS
:
6898 case KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
:
6901 case KQWL_UTQ_RESET_WAKEUP_OVERRIDE
:
6902 case KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
:
6903 case KQWL_UTQ_REDRIVE_EVENTS
:
6904 case KQWL_UTQ_SET_ASYNC_QOS
:
6905 case KQWL_UTQ_SET_SYNC_WAITERS_QOS
:
6906 panic("should never be called");
6912 if ((kqr
->kqr_state
& KQR_BOUND
) && (kqr
->kqr_state
& KQR_WAKEUP
)) {
6913 assert(servicer
&& !is_workqueue_thread(servicer
));
6914 if (kq
->kq_state
& (KQ_SLEEP
| KQ_SEL
)) {
6915 kq
->kq_state
&= ~(KQ_SLEEP
| KQ_SEL
);
6916 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
, KQ_EVENT
,
6917 THREAD_AWAKENED
, WAITQ_ALL_PRIORITIES
);
6920 } else if ((kqr
->kqr_state
& KQR_THREQUESTED
) == 0) {
6922 * No servicer, nor thread-request
6924 * Make a new thread request, unless there is an owner (or the workloop
6925 * is suspended in userland) or if there is no asynchronous work in the
6929 if (kqwl_owner
== THREAD_NULL
&& (kqr
->kqr_state
& KQR_WAKEUP
)) {
6930 kqworkloop_request_thread(kqwl
, new_qos
);
6932 } else if ((kqr
->kqr_state
& KQR_BOUND
) == 0 &&
6933 (kqwl_owner
|| (kqr
->kqr_state
& KQR_WAKEUP
) == 0)) {
6935 * No servicer, thread request in flight we want to cancel
6937 * We just got rid of the last knote of the kqueue or noticed an owner
6938 * with a thread request still in flight, take it back.
6940 ret
= (*pthread_functions
->workq_threadreq_modify
)(kqwl
->kqwl_p
,
6941 &kqr
->kqr_req
, WORKQ_THREADREQ_CANCEL
, 0, 0);
6943 kqr
->kqr_state
&= ~KQR_THREQUESTED
;
6944 kqueue_release(kq
, KQUEUE_CANT_BE_LAST_REF
);
6947 boolean_t qos_changed
= FALSE
;
6950 * Servicer or request is in flight
6952 * Just apply the diff to the servicer or the thread request
6954 if (kqr
->kqr_state
& KQR_BOUND
) {
6955 servicer
= kqr
->kqr_thread
;
6956 struct uthread
*ut
= get_bsdthread_info(servicer
);
6957 if (ut
->uu_kqueue_qos_index
!= new_qos
) {
6958 if (ut
->uu_kqueue_qos_index
== THREAD_QOS_UNSPECIFIED
) {
6959 thread_add_ipc_override(servicer
, new_qos
);
6960 } else if (new_qos
== THREAD_QOS_UNSPECIFIED
) {
6961 thread_drop_ipc_override(servicer
);
6962 } else /* ut->uu_kqueue_qos_index != new_qos */ {
6963 thread_update_ipc_override(servicer
, new_qos
);
6965 ut
->uu_kqueue_qos_index
= new_qos
;
6969 if (new_ipc_override_is_sync
!= ut
->uu_kqueue_override_is_sync
) {
6970 if (new_ipc_override_is_sync
&&
6971 !ut
->uu_kqueue_override_is_sync
) {
6972 thread_add_sync_ipc_override(servicer
);
6973 } else if (!new_ipc_override_is_sync
&&
6974 ut
->uu_kqueue_override_is_sync
) {
6975 thread_drop_sync_ipc_override(servicer
);
6977 ut
->uu_kqueue_override_is_sync
= new_ipc_override_is_sync
;
6980 } else if (old_qos
!= new_qos
) {
6982 kqworkloop_threadreq_modify(kqwl
, new_qos
);
6986 servicer
= kqr
->kqr_thread
;
6987 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST
),
6988 kqwl
->kqwl_dynamicid
,
6989 (kqr
->kqr_state
& KQR_BOUND
) ? thread_tid(servicer
) : 0,
6990 (kqr
->kqr_qos_index
<< 16) | (new_qos
<< 8) | new_ipc_override_is_sync
,
6991 (kqr
->kqr_override_index
<< 8) | kqr
->kqr_state
);
6997 kqworkloop_request_help(struct kqworkloop
*kqwl
, kq_index_t qos_index
)
6999 /* convert to thread qos value */
7000 assert(qos_index
< KQWL_NBUCKETS
);
7002 kqwl_req_lock(kqwl
);
7003 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_UPDATE_WAKEUP_QOS
, qos_index
);
7004 kqwl_req_unlock(kqwl
);
7008 * These arrays described the low and high qindexes for a given qos_index.
7009 * The values come from the chart in <sys/eventvar.h> (must stay in sync).
7011 static kq_index_t _kqwq_base_index
[KQWQ_NQOS
] = {0, 0, 6, 11, 15, 18, 20, 21};
7012 static kq_index_t _kqwq_high_index
[KQWQ_NQOS
] = {0, 5, 10, 14, 17, 19, 20, 21};
7014 static struct kqtailq
*
7015 kqueue_get_base_queue(struct kqueue
*kq
, kq_index_t qos_index
)
7017 if (kq
->kq_state
& KQ_WORKQ
) {
7018 assert(qos_index
< KQWQ_NQOS
);
7019 return &kq
->kq_queue
[_kqwq_base_index
[qos_index
]];
7020 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7021 assert(qos_index
< KQWL_NBUCKETS
);
7022 return &kq
->kq_queue
[qos_index
];
7024 assert(qos_index
== QOS_INDEX_KQFILE
);
7025 return &kq
->kq_queue
[QOS_INDEX_KQFILE
];
7029 static struct kqtailq
*
7030 kqueue_get_high_queue(struct kqueue
*kq
, kq_index_t qos_index
)
7032 if (kq
->kq_state
& KQ_WORKQ
) {
7033 assert(qos_index
< KQWQ_NQOS
);
7034 return &kq
->kq_queue
[_kqwq_high_index
[qos_index
]];
7035 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7036 assert(qos_index
< KQWL_NBUCKETS
);
7037 return &kq
->kq_queue
[KQWL_BUCKET_STAYACTIVE
];
7039 assert(qos_index
== QOS_INDEX_KQFILE
);
7040 return &kq
->kq_queue
[QOS_INDEX_KQFILE
];
7045 kqueue_queue_empty(struct kqueue
*kq
, kq_index_t qos_index
)
7047 struct kqtailq
*base_queue
= kqueue_get_base_queue(kq
, qos_index
);
7048 struct kqtailq
*queue
= kqueue_get_high_queue(kq
, qos_index
);
7051 if (!TAILQ_EMPTY(queue
))
7053 } while (queue
-- > base_queue
);
7057 static struct kqtailq
*
7058 kqueue_get_suppressed_queue(struct kqueue
*kq
, kq_index_t qos_index
)
7060 struct kqtailq
*res
;
7061 struct kqrequest
*kqr
;
7063 if (kq
->kq_state
& KQ_WORKQ
) {
7064 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
7066 kqr
= kqworkq_get_request(kqwq
, qos_index
);
7067 res
= &kqr
->kqr_suppressed
;
7068 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7069 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7071 kqr
= &kqwl
->kqwl_request
;
7072 res
= &kqr
->kqr_suppressed
;
7074 struct kqfile
*kqf
= (struct kqfile
*)kq
;
7075 res
= &kqf
->kqf_suppressed
;
7081 knote_get_queue_index(struct knote
*kn
)
7083 kq_index_t override_index
= knote_get_qos_override_index(kn
);
7084 kq_index_t qos_index
= knote_get_qos_index(kn
);
7085 struct kqueue
*kq
= knote_get_kq(kn
);
7088 if (kq
->kq_state
& KQ_WORKQ
) {
7089 res
= _kqwq_base_index
[qos_index
];
7090 if (override_index
> qos_index
)
7091 res
+= override_index
- qos_index
;
7092 assert(res
<= _kqwq_high_index
[qos_index
]);
7093 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7094 res
= MAX(override_index
, qos_index
);
7095 assert(res
< KQWL_NBUCKETS
);
7097 assert(qos_index
== QOS_INDEX_KQFILE
);
7098 assert(override_index
== QOS_INDEX_KQFILE
);
7099 res
= QOS_INDEX_KQFILE
;
7104 static struct kqtailq
*
7105 knote_get_queue(struct knote
*kn
)
7107 kq_index_t qindex
= knote_get_queue_index(kn
);
7109 return &(knote_get_kq(kn
))->kq_queue
[qindex
];
7113 knote_get_req_index(struct knote
*kn
)
7115 return kn
->kn_req_index
;
7119 knote_get_qos_index(struct knote
*kn
)
7121 return kn
->kn_qos_index
;
7125 knote_set_qos_index(struct knote
*kn
, kq_index_t qos_index
)
7127 struct kqueue
*kq
= knote_get_kq(kn
);
7129 assert(qos_index
< KQWQ_NQOS
);
7130 assert((kn
->kn_status
& KN_QUEUED
) == 0);
7132 if (kq
->kq_state
& KQ_WORKQ
) {
7133 assert(qos_index
> THREAD_QOS_UNSPECIFIED
);
7134 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7135 /* XXX this policy decision shouldn't be here */
7136 if (qos_index
== THREAD_QOS_UNSPECIFIED
)
7137 qos_index
= THREAD_QOS_LEGACY
;
7139 qos_index
= QOS_INDEX_KQFILE
;
7141 /* always set requested */
7142 kn
->kn_req_index
= qos_index
;
7144 /* only adjust in-use qos index when not suppressed */
7145 if ((kn
->kn_status
& KN_SUPPRESSED
) == 0)
7146 kn
->kn_qos_index
= qos_index
;
7150 knote_set_qos_overcommit(struct knote
*kn
)
7152 struct kqueue
*kq
= knote_get_kq(kn
);
7153 struct kqrequest
*kqr
;
7155 /* turn overcommit on for the appropriate thread request? */
7156 if (kn
->kn_qos
& _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
) {
7157 if (kq
->kq_state
& KQ_WORKQ
) {
7158 kq_index_t qos_index
= knote_get_qos_index(kn
);
7159 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
7161 kqr
= kqworkq_get_request(kqwq
, qos_index
);
7163 kqwq_req_lock(kqwq
);
7164 kqr
->kqr_state
|= KQR_THOVERCOMMIT
;
7165 kqwq_req_unlock(kqwq
);
7166 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7167 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7169 kqr
= &kqwl
->kqwl_request
;
7171 kqwl_req_lock(kqwl
);
7172 kqr
->kqr_state
|= KQR_THOVERCOMMIT
;
7173 kqwl_req_unlock(kqwl
);
7179 knote_get_qos_override_index(struct knote
*kn
)
7181 return kn
->kn_qos_override
;
7185 knote_set_qos_override_index(struct knote
*kn
, kq_index_t override_index
,
7186 boolean_t override_is_sync
)
7188 struct kqueue
*kq
= knote_get_kq(kn
);
7189 kq_index_t qos_index
= knote_get_qos_index(kn
);
7190 kq_index_t old_override_index
= knote_get_qos_override_index(kn
);
7191 boolean_t old_override_is_sync
= kn
->kn_qos_override_is_sync
;
7194 assert((kn
->kn_status
& KN_QUEUED
) == 0);
7196 if (override_index
== KQWQ_QOS_MANAGER
) {
7197 assert(qos_index
== KQWQ_QOS_MANAGER
);
7199 assert(override_index
< KQWQ_QOS_MANAGER
);
7202 kn
->kn_qos_override
= override_index
;
7203 kn
->kn_qos_override_is_sync
= override_is_sync
;
7206 * If this is a workq/workloop kqueue, apply the override to the
7209 if (kq
->kq_state
& KQ_WORKQ
) {
7210 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
7212 assert(qos_index
> THREAD_QOS_UNSPECIFIED
);
7213 kqworkq_update_override(kqwq
, qos_index
, override_index
);
7214 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7215 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7217 if ((kn
->kn_status
& KN_SUPPRESSED
) == KN_SUPPRESSED
) {
7218 flags
= flags
| KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
;
7220 if (override_index
== THREAD_QOS_USER_INTERACTIVE
7221 && override_is_sync
) {
7222 flags
= flags
| KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
;
7225 if (old_override_index
== THREAD_QOS_USER_INTERACTIVE
7226 && old_override_is_sync
) {
7227 flags
= flags
| KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
;
7231 assert(qos_index
> THREAD_QOS_UNSPECIFIED
);
7232 kqworkloop_update_override(kqwl
, qos_index
, override_index
, flags
);
7237 knote_get_sync_qos_override_index(struct knote
*kn
)
7239 return kn
->kn_qos_sync_override
;
7243 kqworkq_update_override(struct kqworkq
*kqwq
, kq_index_t qos_index
, kq_index_t override_index
)
7245 struct kqrequest
*kqr
;
7246 kq_index_t old_override_index
;
7248 if (override_index
<= qos_index
) {
7252 kqr
= kqworkq_get_request(kqwq
, qos_index
);
7254 kqwq_req_lock(kqwq
);
7255 old_override_index
= kqr
->kqr_override_index
;
7256 if (override_index
> MAX(kqr
->kqr_qos_index
, old_override_index
)) {
7257 kqr
->kqr_override_index
= override_index
;
7259 /* apply the override to [incoming?] servicing thread */
7260 if (kqr
->kqr_state
& KQR_BOUND
) {
7261 thread_t wqthread
= kqr
->kqr_thread
;
7263 /* only apply if non-manager */
7265 if ((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0) {
7266 if (old_override_index
)
7267 thread_update_ipc_override(wqthread
, override_index
);
7269 thread_add_ipc_override(wqthread
, override_index
);
7273 kqwq_req_unlock(kqwq
);
7276 /* called with the kqworkq lock held */
7278 kqworkq_bind_thread_impl(
7279 struct kqworkq
*kqwq
,
7280 kq_index_t qos_index
,
7284 /* request lock must be held */
7285 kqwq_req_held(kqwq
);
7287 struct kqrequest
*kqr
= kqworkq_get_request(kqwq
, qos_index
);
7288 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
7290 if (qos_index
== KQWQ_QOS_MANAGER
)
7291 flags
|= KEVENT_FLAG_WORKQ_MANAGER
;
7293 struct uthread
*ut
= get_bsdthread_info(thread
);
7296 * If this is a manager, and the manager request bit is
7297 * not set, assure no other thread is bound. If the bit
7298 * is set, make sure the old thread is us (or not set).
7300 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
) {
7301 if ((kqr
->kqr_state
& KQR_BOUND
) == 0) {
7302 kqr
->kqr_state
|= (KQR_BOUND
| KQWQ_THMANAGER
);
7303 TAILQ_INIT(&kqr
->kqr_suppressed
);
7304 kqr
->kqr_thread
= thread
;
7305 ut
->uu_kqueue_bound
= (struct kqueue
*)kqwq
;
7306 ut
->uu_kqueue_qos_index
= KQWQ_QOS_MANAGER
;
7307 ut
->uu_kqueue_flags
= (KEVENT_FLAG_WORKQ
|
7308 KEVENT_FLAG_WORKQ_MANAGER
);
7310 assert(kqr
->kqr_state
& KQR_BOUND
);
7311 assert(thread
== kqr
->kqr_thread
);
7312 assert(ut
->uu_kqueue_bound
== (struct kqueue
*)kqwq
);
7313 assert(ut
->uu_kqueue_qos_index
== KQWQ_QOS_MANAGER
);
7314 assert(ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
);
7319 /* Just a normal one-queue servicing thread */
7320 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
7321 assert(kqr
->kqr_qos_index
== qos_index
);
7323 if ((kqr
->kqr_state
& KQR_BOUND
) == 0) {
7324 kqr
->kqr_state
|= KQR_BOUND
;
7325 TAILQ_INIT(&kqr
->kqr_suppressed
);
7326 kqr
->kqr_thread
= thread
;
7328 /* apply an ipc QoS override if one is needed */
7329 if (kqr
->kqr_override_index
) {
7330 assert(kqr
->kqr_qos_index
);
7331 assert(kqr
->kqr_override_index
> kqr
->kqr_qos_index
);
7332 assert(thread_get_ipc_override(thread
) == THREAD_QOS_UNSPECIFIED
);
7333 thread_add_ipc_override(thread
, kqr
->kqr_override_index
);
7336 /* indicate that we are processing in the uthread */
7337 ut
->uu_kqueue_bound
= (struct kqueue
*)kqwq
;
7338 ut
->uu_kqueue_qos_index
= qos_index
;
7339 ut
->uu_kqueue_flags
= flags
;
7342 * probably syncronously bound AND post-request bound
7343 * this logic can go away when we get rid of post-request bind
7345 assert(kqr
->kqr_state
& KQR_BOUND
);
7346 assert(thread
== kqr
->kqr_thread
);
7347 assert(ut
->uu_kqueue_bound
== (struct kqueue
*)kqwq
);
7348 assert(ut
->uu_kqueue_qos_index
== qos_index
);
7349 assert((ut
->uu_kqueue_flags
& flags
) == flags
);
7354 kqworkloop_update_override(
7355 struct kqworkloop
*kqwl
,
7356 kq_index_t qos_index
,
7357 kq_index_t override_index
,
7360 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
7362 kqwl_req_lock(kqwl
);
7364 /* Do not override on attached threads */
7365 if (kqr
->kqr_state
& KQR_BOUND
) {
7366 assert(kqr
->kqr_thread
);
7368 if (kqwl
->kqwl_kqueue
.kq_state
& KQ_NO_WQ_THREAD
) {
7369 kqwl_req_unlock(kqwl
);
7370 assert(!is_workqueue_thread(kqr
->kqr_thread
));
7375 /* Update sync ipc counts on kqr for suppressed knotes */
7376 if (flags
& KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
) {
7377 kqworkloop_update_suppress_sync_count(kqr
, flags
);
7380 if ((flags
& KQWL_UO_UPDATE_OVERRIDE_LAZY
) == 0) {
7381 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
,
7382 MAX(qos_index
, override_index
));
7384 kqwl_req_unlock(kqwl
);
7388 kqworkloop_update_suppress_sync_count(
7389 struct kqrequest
*kqr
,
7392 if (flags
& KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
) {
7393 kqr
->kqr_sync_suppress_count
++;
7396 if (flags
& KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
) {
7397 assert(kqr
->kqr_sync_suppress_count
> 0);
7398 kqr
->kqr_sync_suppress_count
--;
7403 * kqworkloop_unbind_thread - Unbind the servicer thread of a workloop kqueue
7405 * It will end the processing phase in case it was still processing:
7407 * We may have to request a new thread for not KQ_NO_WQ_THREAD workloop.
7408 * This can happen if :
7409 * - there were active events at or above our QoS we never got to (count > 0)
7410 * - we pended waitq hook callouts during processing
7411 * - we pended wakeups while processing (or unsuppressing)
7413 * Called with kqueue lock held.
7417 kqworkloop_unbind_thread(
7418 struct kqworkloop
*kqwl
,
7420 __unused
unsigned int flags
)
7422 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
7423 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
7427 assert((kq
->kq_state
& KQ_PROCESSING
) == 0);
7428 if (kq
->kq_state
& KQ_PROCESSING
) {
7433 * Forcing the KQ_PROCESSING flag allows for QoS updates because of
7434 * unsuppressing knotes not to be applied until the eventual call to
7435 * kqworkloop_update_threads_qos() below.
7437 kq
->kq_state
|= KQ_PROCESSING
;
7438 kqworkloop_acknowledge_events(kqwl
, TRUE
);
7439 kq
->kq_state
&= ~KQ_PROCESSING
;
7441 kqwl_req_lock(kqwl
);
7443 /* deal with extraneous unbinds in release kernels */
7444 assert((kqr
->kqr_state
& (KQR_BOUND
| KQR_PROCESSING
)) == KQR_BOUND
);
7445 if ((kqr
->kqr_state
& (KQR_BOUND
| KQR_PROCESSING
)) != KQR_BOUND
) {
7446 kqwl_req_unlock(kqwl
);
7450 assert(thread
== current_thread());
7451 assert(kqr
->kqr_thread
== thread
);
7452 if (kqr
->kqr_thread
!= thread
) {
7453 kqwl_req_unlock(kqwl
);
7457 struct uthread
*ut
= get_bsdthread_info(thread
);
7458 kq_index_t old_qos_index
= ut
->uu_kqueue_qos_index
;
7459 boolean_t ipc_override_is_sync
= ut
->uu_kqueue_override_is_sync
;
7460 ut
->uu_kqueue_bound
= NULL
;
7461 ut
->uu_kqueue_qos_index
= 0;
7462 ut
->uu_kqueue_override_is_sync
= 0;
7463 ut
->uu_kqueue_flags
= 0;
7465 /* unbind the servicer thread, drop overrides */
7466 kqr
->kqr_thread
= NULL
;
7467 kqr
->kqr_state
&= ~(KQR_BOUND
| KQR_THREQUESTED
| KQR_R2K_NOTIF_ARMED
);
7468 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
, 0);
7470 kqwl_req_unlock(kqwl
);
7473 * Drop the override on the current thread last, after the call to
7474 * kqworkloop_update_threads_qos above.
7476 if (old_qos_index
) {
7477 thread_drop_ipc_override(thread
);
7479 if (ipc_override_is_sync
) {
7480 thread_drop_sync_ipc_override(thread
);
7484 /* called with the kqworkq lock held */
7486 kqworkq_unbind_thread(
7487 struct kqworkq
*kqwq
,
7488 kq_index_t qos_index
,
7490 __unused
unsigned int flags
)
7492 struct kqrequest
*kqr
= kqworkq_get_request(kqwq
, qos_index
);
7493 kq_index_t override_index
= 0;
7495 /* request lock must be held */
7496 kqwq_req_held(kqwq
);
7498 assert(thread
== current_thread());
7500 if ((kqr
->kqr_state
& KQR_BOUND
) == 0) {
7501 assert(kqr
->kqr_state
& KQR_BOUND
);
7505 assert(kqr
->kqr_thread
== thread
);
7506 assert(TAILQ_EMPTY(&kqr
->kqr_suppressed
));
7509 * If there is an override, drop it from the current thread
7510 * and then we are free to recompute (a potentially lower)
7511 * minimum override to apply to the next thread request.
7513 if (kqr
->kqr_override_index
) {
7514 struct kqtailq
*base_queue
= kqueue_get_base_queue(&kqwq
->kqwq_kqueue
, qos_index
);
7515 struct kqtailq
*queue
= kqueue_get_high_queue(&kqwq
->kqwq_kqueue
, qos_index
);
7517 /* if not bound to a manager thread, drop the current ipc override */
7518 if ((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0) {
7519 thread_drop_ipc_override(thread
);
7522 /* recompute the new override */
7524 if (!TAILQ_EMPTY(queue
)) {
7525 override_index
= queue
- base_queue
+ qos_index
;
7528 } while (queue
-- > base_queue
);
7531 /* Mark it unbound */
7532 kqr
->kqr_thread
= NULL
;
7533 kqr
->kqr_state
&= ~(KQR_BOUND
| KQR_THREQUESTED
| KQWQ_THMANAGER
);
7535 /* apply the new override */
7536 if (override_index
> kqr
->kqr_qos_index
) {
7537 kqr
->kqr_override_index
= override_index
;
7539 kqr
->kqr_override_index
= THREAD_QOS_UNSPECIFIED
;
7544 kqworkq_get_request(struct kqworkq
*kqwq
, kq_index_t qos_index
)
7546 assert(qos_index
< KQWQ_NQOS
);
7547 return &kqwq
->kqwq_request
[qos_index
];
7551 knote_adjust_qos(struct knote
*kn
, qos_t new_qos
, qos_t new_override
, kq_index_t sync_override_index
)
7553 struct kqueue
*kq
= knote_get_kq(kn
);
7554 boolean_t override_is_sync
= FALSE
;
7556 if (kq
->kq_state
& (KQ_WORKQ
| KQ_WORKLOOP
)) {
7557 kq_index_t new_qos_index
;
7558 kq_index_t new_override_index
;
7559 kq_index_t servicer_qos_index
;
7561 new_qos_index
= qos_index_from_qos(kn
, new_qos
, FALSE
);
7562 new_override_index
= qos_index_from_qos(kn
, new_override
, TRUE
);
7564 /* make sure the servicer qos acts as a floor */
7565 servicer_qos_index
= qos_index_from_qos(kn
, kn
->kn_qos
, FALSE
);
7566 if (servicer_qos_index
> new_qos_index
)
7567 new_qos_index
= servicer_qos_index
;
7568 if (servicer_qos_index
> new_override_index
)
7569 new_override_index
= servicer_qos_index
;
7570 if (sync_override_index
>= new_override_index
) {
7571 new_override_index
= sync_override_index
;
7572 override_is_sync
= TRUE
;
7576 if (new_qos_index
!= knote_get_req_index(kn
) ||
7577 new_override_index
!= knote_get_qos_override_index(kn
) ||
7578 override_is_sync
!= kn
->kn_qos_override_is_sync
) {
7579 if (kn
->kn_status
& KN_QUEUED
) {
7581 knote_set_qos_index(kn
, new_qos_index
);
7582 knote_set_qos_override_index(kn
, new_override_index
, override_is_sync
);
7586 knote_set_qos_index(kn
, new_qos_index
);
7587 knote_set_qos_override_index(kn
, new_override_index
, override_is_sync
);
7595 knote_adjust_sync_qos(struct knote
*kn
, kq_index_t sync_qos
, boolean_t lock_kq
)
7597 struct kqueue
*kq
= knote_get_kq(kn
);
7598 kq_index_t old_sync_override
;
7599 kq_index_t qos_index
= knote_get_qos_index(kn
);
7602 /* Tracking only happens for UI qos */
7603 if (sync_qos
!= THREAD_QOS_USER_INTERACTIVE
&&
7604 sync_qos
!= THREAD_QOS_UNSPECIFIED
) {
7611 if (kq
->kq_state
& KQ_WORKLOOP
) {
7612 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7614 old_sync_override
= knote_get_sync_qos_override_index(kn
);
7615 if (old_sync_override
!= sync_qos
) {
7616 kn
->kn_qos_sync_override
= sync_qos
;
7618 /* update sync ipc counters for suppressed knotes */
7619 if ((kn
->kn_status
& KN_SUPPRESSED
) == KN_SUPPRESSED
) {
7620 flags
= flags
| KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
;
7622 /* Do not recalculate kqwl override, it would be done later */
7623 flags
= flags
| KQWL_UO_UPDATE_OVERRIDE_LAZY
;
7625 if (sync_qos
== THREAD_QOS_USER_INTERACTIVE
) {
7626 flags
= flags
| KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
;
7629 if (old_sync_override
== THREAD_QOS_USER_INTERACTIVE
) {
7630 flags
= flags
| KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
;
7633 kqworkloop_update_override(kqwl
, qos_index
, sync_qos
,
7644 knote_wakeup(struct knote
*kn
)
7646 struct kqueue
*kq
= knote_get_kq(kn
);
7647 kq_index_t qos_index
= knote_get_qos_index(kn
);
7651 if (kq
->kq_state
& KQ_WORKQ
) {
7652 /* request a servicing thread */
7653 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
7655 kqworkq_request_help(kqwq
, qos_index
);
7657 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7658 /* request a servicing thread */
7659 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7661 if (kqworkloop_is_processing_on_current_thread(kqwl
)) {
7663 * kqworkloop_end_processing() will perform the required QoS
7664 * computations when it unsets the processing mode.
7668 kqworkloop_request_help(kqwl
, qos_index
);
7670 struct kqfile
*kqf
= (struct kqfile
*)kq
;
7672 /* flag wakeups during processing */
7673 if (kq
->kq_state
& KQ_PROCESSING
)
7674 kq
->kq_state
|= KQ_WAKEUP
;
7676 /* wakeup a thread waiting on this queue */
7677 if (kq
->kq_state
& (KQ_SLEEP
| KQ_SEL
)) {
7678 kq
->kq_state
&= ~(KQ_SLEEP
| KQ_SEL
);
7679 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
7682 WAITQ_ALL_PRIORITIES
);
7685 /* wakeup other kqueues/select sets we're inside */
7686 KNOTE(&kqf
->kqf_sel
.si_note
, 0);
7691 * Called with the kqueue locked
7694 kqueue_interrupt(struct kqueue
*kq
)
7696 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
7698 /* wakeup sleeping threads */
7699 if ((kq
->kq_state
& (KQ_SLEEP
| KQ_SEL
)) != 0) {
7700 kq
->kq_state
&= ~(KQ_SLEEP
| KQ_SEL
);
7701 (void)waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
7704 WAITQ_ALL_PRIORITIES
);
7707 /* wakeup threads waiting their turn to process */
7708 if (kq
->kq_state
& KQ_PROCWAIT
) {
7709 struct kqtailq
*suppressq
;
7711 assert(kq
->kq_state
& KQ_PROCESSING
);
7713 kq
->kq_state
&= ~KQ_PROCWAIT
;
7714 suppressq
= kqueue_get_suppressed_queue(kq
, QOS_INDEX_KQFILE
);
7715 (void)waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
7716 CAST_EVENT64_T(suppressq
),
7718 WAITQ_ALL_PRIORITIES
);
7723 * Called back from waitq code when no threads waiting and the hook was set.
7725 * Interrupts are likely disabled and spin locks are held - minimal work
7726 * can be done in this context!!!
7728 * JMM - in the future, this will try to determine which knotes match the
7729 * wait queue wakeup and apply these wakeups against those knotes themselves.
7730 * For now, all the events dispatched this way are dispatch-manager handled,
7731 * so hard-code that for now.
7734 waitq_set__CALLING_PREPOST_HOOK__(void *kq_hook
, void *knote_hook
, int qos
)
7736 #pragma unused(knote_hook, qos)
7738 struct kqueue
*kq
= (struct kqueue
*)kq_hook
;
7740 if (kq
->kq_state
& KQ_WORKQ
) {
7741 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
7743 kqworkq_request_help(kqwq
, KQWQ_QOS_MANAGER
);
7745 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7746 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7748 kqworkloop_request_help(kqwl
, KQWL_BUCKET_STAYACTIVE
);
7753 klist_init(struct klist
*list
)
7760 * Query/Post each knote in the object's list
7762 * The object lock protects the list. It is assumed
7763 * that the filter/event routine for the object can
7764 * determine that the object is already locked (via
7765 * the hint) and not deadlock itself.
7767 * The object lock should also hold off pending
7768 * detach/drop operations. But we'll prevent it here
7769 * too (by taking a use reference) - just in case.
7772 knote(struct klist
*list
, long hint
)
7776 SLIST_FOREACH(kn
, list
, kn_selnext
) {
7777 struct kqueue
*kq
= knote_get_kq(kn
);
7781 assert(!knoteuse_needs_boost(kn
, NULL
));
7783 /* If we can get a use reference - deliver event */
7784 if (kqlock2knoteuse(kq
, kn
, KNUSE_NONE
)) {
7787 /* call the event with only a use count */
7788 result
= knote_fops(kn
)->f_event(kn
, hint
);
7790 /* if its not going away and triggered */
7791 if (knoteuse2kqlock(kq
, kn
, KNUSE_NONE
) && result
)
7800 * attach a knote to the specified list. Return true if this is the first entry.
7801 * The list is protected by whatever lock the object it is associated with uses.
7804 knote_attach(struct klist
*list
, struct knote
*kn
)
7806 int ret
= SLIST_EMPTY(list
);
7807 SLIST_INSERT_HEAD(list
, kn
, kn_selnext
);
7812 * detach a knote from the specified list. Return true if that was the last entry.
7813 * The list is protected by whatever lock the object it is associated with uses.
7816 knote_detach(struct klist
*list
, struct knote
*kn
)
7818 SLIST_REMOVE(list
, kn
, knote
, kn_selnext
);
7819 return (SLIST_EMPTY(list
));
7823 * knote_vanish - Indicate that the source has vanished
7825 * If the knote has requested EV_VANISHED delivery,
7826 * arrange for that. Otherwise, deliver a NOTE_REVOKE
7827 * event for backward compatibility.
7829 * The knote is marked as having vanished, but is not
7830 * actually detached from the source in this instance.
7831 * The actual detach is deferred until the knote drop.
7833 * Our caller already has the object lock held. Calling
7834 * the detach routine would try to take that lock
7835 * recursively - which likely is not supported.
7838 knote_vanish(struct klist
*list
)
7841 struct knote
*kn_next
;
7843 SLIST_FOREACH_SAFE(kn
, list
, kn_selnext
, kn_next
) {
7844 struct kqueue
*kq
= knote_get_kq(kn
);
7849 assert(!knoteuse_needs_boost(kn
, NULL
));
7851 if ((kn
->kn_status
& KN_DROPPING
) == 0) {
7852 /* If EV_VANISH supported - prepare to deliver one */
7853 if (kn
->kn_status
& KN_REQVANISH
) {
7854 kn
->kn_status
|= KN_VANISHED
;
7857 } else if (kqlock2knoteuse(kq
, kn
, KNUSE_NONE
)) {
7858 /* call the event with only a use count */
7859 result
= knote_fops(kn
)->f_event(kn
, NOTE_REVOKE
);
7861 /* if its not going away and triggered */
7862 if (knoteuse2kqlock(kq
, kn
, KNUSE_NONE
) && result
)
7864 /* lock held again */
7872 * For a given knote, link a provided wait queue directly with the kqueue.
7873 * Wakeups will happen via recursive wait queue support. But nothing will move
7874 * the knote to the active list at wakeup (nothing calls knote()). Instead,
7875 * we permanently enqueue them here.
7877 * kqueue and knote references are held by caller.
7878 * waitq locked by caller.
7880 * caller provides the wait queue link structure.
7883 knote_link_waitq(struct knote
*kn
, struct waitq
*wq
, uint64_t *reserved_link
)
7885 struct kqueue
*kq
= knote_get_kq(kn
);
7888 kr
= waitq_link(wq
, &kq
->kq_wqs
, WAITQ_ALREADY_LOCKED
, reserved_link
);
7889 if (kr
== KERN_SUCCESS
) {
7890 knote_markstayactive(kn
);
7898 * Unlink the provided wait queue from the kqueue associated with a knote.
7899 * Also remove it from the magic list of directly attached knotes.
7901 * Note that the unlink may have already happened from the other side, so
7902 * ignore any failures to unlink and just remove it from the kqueue list.
7904 * On success, caller is responsible for the link structure
7907 knote_unlink_waitq(struct knote
*kn
, struct waitq
*wq
)
7909 struct kqueue
*kq
= knote_get_kq(kn
);
7912 kr
= waitq_unlink(wq
, &kq
->kq_wqs
);
7913 knote_clearstayactive(kn
);
7914 return ((kr
!= KERN_SUCCESS
) ? EINVAL
: 0);
7918 * remove all knotes referencing a specified fd
7920 * Essentially an inlined knote_remove & knote_drop
7921 * when we know for sure that the thing is a file
7923 * Entered with the proc_fd lock already held.
7924 * It returns the same way, but may drop it temporarily.
7927 knote_fdclose(struct proc
*p
, int fd
, int force
)
7933 list
= &p
->p_fd
->fd_knlist
[fd
];
7934 SLIST_FOREACH(kn
, list
, kn_link
) {
7935 struct kqueue
*kq
= knote_get_kq(kn
);
7940 panic("%s: proc mismatch (kq->kq_p=%p != p=%p)",
7941 __func__
, kq
->kq_p
, p
);
7944 * If the knote supports EV_VANISHED delivery,
7945 * transition it to vanished mode (or skip over
7946 * it if already vanished).
7948 if (!force
&& (kn
->kn_status
& KN_REQVANISH
)) {
7950 if ((kn
->kn_status
& KN_VANISHED
) == 0) {
7953 assert(!knoteuse_needs_boost(kn
, NULL
));
7955 /* get detach reference (also marks vanished) */
7956 if (kqlock2knotedetach(kq
, kn
, KNUSE_NONE
)) {
7957 /* detach knote and drop fp use reference */
7958 knote_fops(kn
)->f_detach(kn
);
7959 if (knote_fops(kn
)->f_isfd
)
7960 fp_drop(p
, kn
->kn_id
, kn
->kn_fp
, 0);
7962 /* activate it if it's still in existence */
7963 if (knoteuse2kqlock(kq
, kn
, KNUSE_NONE
)) {
7979 * Convert the kq lock to a drop ref.
7980 * If we get it, go ahead and drop it.
7981 * Otherwise, we waited for the blocking
7982 * condition to complete. Either way,
7983 * we dropped the fdlock so start over.
7985 if (kqlock2knotedrop(kq
, kn
)) {
7995 * knote_fdfind - lookup a knote in the fd table for process
7997 * If the filter is file-based, lookup based on fd index.
7998 * Otherwise use a hash based on the ident.
8000 * Matching is based on kq, filter, and ident. Optionally,
8001 * it may also be based on the udata field in the kevent -
8002 * allowing multiple event registration for the file object
8005 * fd_knhashlock or fdlock held on entry (and exit)
8007 static struct knote
*
8008 knote_fdfind(struct kqueue
*kq
,
8009 struct kevent_internal_s
*kev
,
8013 struct filedesc
*fdp
= p
->p_fd
;
8014 struct klist
*list
= NULL
;
8015 struct knote
*kn
= NULL
;
8018 * determine where to look for the knote
8021 /* fd-based knotes are linked off the fd table */
8022 if (kev
->ident
< (u_int
)fdp
->fd_knlistsize
) {
8023 list
= &fdp
->fd_knlist
[kev
->ident
];
8025 } else if (fdp
->fd_knhashmask
!= 0) {
8026 /* hash non-fd knotes here too */
8027 list
= &fdp
->fd_knhash
[KN_HASH((u_long
)kev
->ident
, fdp
->fd_knhashmask
)];
8031 * scan the selected list looking for a match
8034 SLIST_FOREACH(kn
, list
, kn_link
) {
8035 if (kq
== knote_get_kq(kn
) &&
8036 kev
->ident
== kn
->kn_id
&&
8037 kev
->filter
== kn
->kn_filter
) {
8038 if (kev
->flags
& EV_UDATA_SPECIFIC
) {
8039 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) &&
8040 kev
->udata
== kn
->kn_udata
) {
8041 break; /* matching udata-specific knote */
8043 } else if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0) {
8044 break; /* matching non-udata-specific knote */
8053 * kq_add_knote- Add knote to the fd table for process
8054 * while checking for duplicates.
8056 * All file-based filters associate a list of knotes by file
8057 * descriptor index. All other filters hash the knote by ident.
8059 * May have to grow the table of knote lists to cover the
8060 * file descriptor index presented.
8062 * fd_knhashlock and fdlock unheld on entry (and exit).
8064 * Takes a rwlock boost if inserting the knote is successful.
8067 kq_add_knote(struct kqueue
*kq
, struct knote
*kn
,
8068 struct kevent_internal_s
*kev
,
8069 struct proc
*p
, int *knoteuse_flags
)
8071 struct filedesc
*fdp
= p
->p_fd
;
8072 struct klist
*list
= NULL
;
8074 bool is_fd
= knote_fops(kn
)->f_isfd
;
8081 if (knote_fdfind(kq
, kev
, is_fd
, p
) != NULL
) {
8082 /* found an existing knote: we can't add this one */
8087 /* knote was not found: add it now */
8089 if (fdp
->fd_knhashmask
== 0) {
8092 list
= hashinit(CONFIG_KN_HASHSIZE
, M_KQUEUE
,
8099 fdp
->fd_knhash
= list
;
8100 fdp
->fd_knhashmask
= size
;
8103 list
= &fdp
->fd_knhash
[KN_HASH(kn
->kn_id
, fdp
->fd_knhashmask
)];
8104 SLIST_INSERT_HEAD(list
, kn
, kn_link
);
8109 /* knote is fd based */
8111 if ((u_int
)fdp
->fd_knlistsize
<= kn
->kn_id
) {
8114 if (kn
->kn_id
>= (uint64_t)p
->p_rlimit
[RLIMIT_NOFILE
].rlim_cur
8115 || kn
->kn_id
>= (uint64_t)maxfiles
) {
8119 /* have to grow the fd_knlist */
8120 size
= fdp
->fd_knlistsize
;
8121 while (size
<= kn
->kn_id
)
8124 if (size
>= (UINT_MAX
/sizeof(struct klist
*))) {
8129 MALLOC(list
, struct klist
*,
8130 size
* sizeof(struct klist
*), M_KQUEUE
, M_WAITOK
);
8136 bcopy((caddr_t
)fdp
->fd_knlist
, (caddr_t
)list
,
8137 fdp
->fd_knlistsize
* sizeof(struct klist
*));
8138 bzero((caddr_t
)list
+
8139 fdp
->fd_knlistsize
* sizeof(struct klist
*),
8140 (size
- fdp
->fd_knlistsize
) * sizeof(struct klist
*));
8141 FREE(fdp
->fd_knlist
, M_KQUEUE
);
8142 fdp
->fd_knlist
= list
;
8143 fdp
->fd_knlistsize
= size
;
8146 list
= &fdp
->fd_knlist
[kn
->kn_id
];
8147 SLIST_INSERT_HEAD(list
, kn
, kn_link
);
8154 if (ret
== 0 && knoteuse_needs_boost(kn
, kev
)) {
8155 set_thread_rwlock_boost();
8156 *knoteuse_flags
= KNUSE_BOOST
;
8158 *knoteuse_flags
= KNUSE_NONE
;
8169 * kq_remove_knote - remove a knote from the fd table for process
8170 * and copy kn_status an kq_state while holding kqlock and
8173 * If the filter is file-based, remove based on fd index.
8174 * Otherwise remove from the hash based on the ident.
8176 * fd_knhashlock and fdlock unheld on entry (and exit).
8179 kq_remove_knote(struct kqueue
*kq
, struct knote
*kn
, struct proc
*p
,
8180 kn_status_t
*kn_status
, uint16_t *kq_state
)
8182 struct filedesc
*fdp
= p
->p_fd
;
8183 struct klist
*list
= NULL
;
8186 is_fd
= knote_fops(kn
)->f_isfd
;
8194 assert ((u_int
)fdp
->fd_knlistsize
> kn
->kn_id
);
8195 list
= &fdp
->fd_knlist
[kn
->kn_id
];
8197 list
= &fdp
->fd_knhash
[KN_HASH(kn
->kn_id
, fdp
->fd_knhashmask
)];
8199 SLIST_REMOVE(list
, kn
, knote
, kn_link
);
8202 *kn_status
= kn
->kn_status
;
8203 *kq_state
= kq
->kq_state
;
8213 * kq_find_knote_and_kq_lock - lookup a knote in the fd table for process
8214 * and, if the knote is found, acquires the kqlock while holding the fd table lock/spinlock.
8216 * fd_knhashlock or fdlock unheld on entry (and exit)
8219 static struct knote
*
8220 kq_find_knote_and_kq_lock(struct kqueue
*kq
,
8221 struct kevent_internal_s
*kev
,
8232 ret
= knote_fdfind(kq
, kev
, is_fd
, p
);
8246 * knote_drop - disconnect and drop the knote
8248 * Called with the kqueue unlocked and holding a
8249 * "drop reference" on the knote in question.
8250 * This reference is most often aquired thru a call
8251 * to kqlock2knotedrop(). But it can also be acquired
8252 * through stealing a drop reference via a call to
8253 * knoteuse2knotedrop() or during the initial attach
8256 * The knote may have already been detached from
8257 * (or not yet attached to) its source object.
8260 knote_drop(struct knote
*kn
, __unused
struct proc
*ctxp
)
8262 struct kqueue
*kq
= knote_get_kq(kn
);
8263 struct proc
*p
= kq
->kq_p
;
8264 kn_status_t kn_status
;
8267 /* If we are attached, disconnect from the source first */
8268 if (kn
->kn_status
& KN_ATTACHED
) {
8269 knote_fops(kn
)->f_detach(kn
);
8272 /* Remove the source from the appropriate hash */
8273 kq_remove_knote(kq
, kn
, p
, &kn_status
, &kq_state
);
8276 * If a kqueue_dealloc is happening in parallel for the kq
8277 * pointed by the knote the kq could be aready deallocated
8279 * Do not access the kq after the kq_remove_knote if it is
8283 /* determine if anyone needs to know about the drop */
8284 assert((kn_status
& (KN_DROPPING
| KN_SUPPRESSED
| KN_QUEUED
)) == KN_DROPPING
);
8287 * If KN_USEWAIT is set, some other thread was trying to drop the kn.
8288 * Or it was in kqueue_dealloc, so the kqueue_dealloc did not happen
8289 * because that thread was waiting on this wake, or it was a drop happening
8290 * because of a kevent_register that takes a reference on the kq, and therefore
8291 * the kq cannot be deallocated in parallel.
8293 * It is safe to access kq->kq_wqs if needswakeup is set.
8295 if (kn_status
& KN_USEWAIT
)
8296 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
8297 CAST_EVENT64_T(&kn
->kn_status
),
8299 WAITQ_ALL_PRIORITIES
);
8301 if (knote_fops(kn
)->f_isfd
&& ((kn
->kn_status
& KN_VANISHED
) == 0))
8302 fp_drop(p
, kn
->kn_id
, kn
->kn_fp
, 0);
8307 * release reference on dynamic kq (and free if last).
8308 * Will only be last if this is from fdfree, etc...
8309 * because otherwise processing thread has reference.
8311 if (kq_state
& KQ_DYNAMIC
)
8312 kqueue_release_last(p
, kq
);
8315 /* called with kqueue lock held */
8317 knote_activate(struct knote
*kn
)
8319 if (kn
->kn_status
& KN_ACTIVE
)
8322 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KNOTE_ACTIVATE
),
8323 kn
->kn_udata
, kn
->kn_status
| (kn
->kn_id
<< 32),
8326 kn
->kn_status
|= KN_ACTIVE
;
8327 if (knote_enqueue(kn
))
8331 /* called with kqueue lock held */
8333 knote_deactivate(struct knote
*kn
)
8335 kn
->kn_status
&= ~KN_ACTIVE
;
8336 if ((kn
->kn_status
& KN_STAYACTIVE
) == 0)
8340 /* called with kqueue lock held */
8342 knote_enable(struct knote
*kn
)
8344 if ((kn
->kn_status
& KN_DISABLED
) == 0)
8347 kn
->kn_status
&= ~KN_DISABLED
;
8349 if (kn
->kn_status
& KN_SUPPRESSED
) {
8350 /* Clear the sync qos on the knote */
8351 knote_adjust_sync_qos(kn
, THREAD_QOS_UNSPECIFIED
, FALSE
);
8354 * it is possible for userland to have knotes registered for a given
8355 * workloop `wl_orig` but really handled on another workloop `wl_new`.
8357 * In that case, rearming will happen from the servicer thread of
8358 * `wl_new` which if `wl_orig` is no longer being serviced, would cause
8359 * this knote to stay suppressed forever if we only relied on
8360 * kqworkloop_acknowledge_events to be called by `wl_orig`.
8362 * However if we see the KQ_PROCESSING bit on `wl_orig` set, we can't
8363 * unsuppress because that would mess with the processing phase of
8364 * `wl_orig`, however it also means kqworkloop_acknowledge_events()
8367 struct kqueue
*kq
= knote_get_kq(kn
);
8368 if ((kq
->kq_state
& KQ_PROCESSING
) == 0) {
8369 knote_unsuppress(kn
);
8371 } else if (knote_enqueue(kn
)) {
8376 /* called with kqueue lock held */
8378 knote_disable(struct knote
*kn
)
8380 if (kn
->kn_status
& KN_DISABLED
)
8383 kn
->kn_status
|= KN_DISABLED
;
8387 /* called with kqueue lock held */
8389 knote_suppress(struct knote
*kn
)
8391 struct kqtailq
*suppressq
;
8392 struct kqueue
*kq
= knote_get_kq(kn
);
8396 if (kn
->kn_status
& KN_SUPPRESSED
)
8400 kn
->kn_status
|= KN_SUPPRESSED
;
8401 suppressq
= kqueue_get_suppressed_queue(kq
, knote_get_qos_index(kn
));
8402 TAILQ_INSERT_TAIL(suppressq
, kn
, kn_tqe
);
8404 if ((kq
->kq_state
& KQ_WORKLOOP
) &&
8405 knote_get_qos_override_index(kn
) == THREAD_QOS_USER_INTERACTIVE
&&
8406 kn
->kn_qos_override_is_sync
) {
8407 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
8408 /* update the sync qos override counter for suppressed knotes */
8409 kqworkloop_update_override(kqwl
, knote_get_qos_index(kn
),
8410 knote_get_qos_override_index(kn
),
8411 (KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
| KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
));
8415 /* called with kqueue lock held */
8417 knote_unsuppress(struct knote
*kn
)
8419 struct kqtailq
*suppressq
;
8420 struct kqueue
*kq
= knote_get_kq(kn
);
8424 if ((kn
->kn_status
& KN_SUPPRESSED
) == 0)
8427 /* Clear the sync qos on the knote */
8428 knote_adjust_sync_qos(kn
, THREAD_QOS_UNSPECIFIED
, FALSE
);
8430 kn
->kn_status
&= ~KN_SUPPRESSED
;
8431 suppressq
= kqueue_get_suppressed_queue(kq
, knote_get_qos_index(kn
));
8432 TAILQ_REMOVE(suppressq
, kn
, kn_tqe
);
8434 /* udate in-use qos to equal requested qos */
8435 kn
->kn_qos_index
= kn
->kn_req_index
;
8437 /* don't wakeup if unsuppressing just a stay-active knote */
8438 if (knote_enqueue(kn
) && (kn
->kn_status
& KN_ACTIVE
)) {
8442 if ((kq
->kq_state
& KQ_WORKLOOP
) && !(kq
->kq_state
& KQ_NO_WQ_THREAD
) &&
8443 knote_get_qos_override_index(kn
) == THREAD_QOS_USER_INTERACTIVE
&&
8444 kn
->kn_qos_override_is_sync
) {
8445 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
8447 /* update the sync qos override counter for suppressed knotes */
8448 kqworkloop_update_override(kqwl
, knote_get_qos_index(kn
),
8449 knote_get_qos_override_index(kn
),
8450 (KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
| KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
));
8453 if (TAILQ_EMPTY(suppressq
) && (kq
->kq_state
& KQ_WORKLOOP
) &&
8454 !(kq
->kq_state
& KQ_NO_WQ_THREAD
)) {
8455 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
8456 if (kqworkloop_is_processing_on_current_thread(kqwl
)) {
8458 * kqworkloop_end_processing() will perform the required QoS
8459 * computations when it unsets the processing mode.
8462 kqwl_req_lock(kqwl
);
8463 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_RESET_WAKEUP_OVERRIDE
, 0);
8464 kqwl_req_unlock(kqwl
);
8469 /* called with kqueue lock held */
8471 knote_update_sync_override_state(struct knote
*kn
)
8473 struct kqtailq
*queue
= knote_get_queue(kn
);
8474 struct kqueue
*kq
= knote_get_kq(kn
);
8476 if (!(kq
->kq_state
& KQ_WORKLOOP
) ||
8477 knote_get_queue_index(kn
) != THREAD_QOS_USER_INTERACTIVE
)
8480 /* Update the sync ipc state on workloop */
8481 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
8482 boolean_t sync_ipc_override
= FALSE
;
8483 if (!TAILQ_EMPTY(queue
)) {
8484 struct knote
*kn_head
= TAILQ_FIRST(queue
);
8485 if (kn_head
->kn_qos_override_is_sync
)
8486 sync_ipc_override
= TRUE
;
8488 kqworkloop_update_sync_override_state(kqwl
, sync_ipc_override
);
8491 /* called with kqueue lock held */
8493 knote_enqueue(struct knote
*kn
)
8495 if ((kn
->kn_status
& (KN_ACTIVE
| KN_STAYACTIVE
)) == 0 ||
8496 (kn
->kn_status
& (KN_DISABLED
| KN_SUPPRESSED
| KN_DROPPING
)))
8499 if ((kn
->kn_status
& KN_QUEUED
) == 0) {
8500 struct kqtailq
*queue
= knote_get_queue(kn
);
8501 struct kqueue
*kq
= knote_get_kq(kn
);
8504 /* insert at head for sync ipc waiters */
8505 if (kn
->kn_qos_override_is_sync
) {
8506 TAILQ_INSERT_HEAD(queue
, kn
, kn_tqe
);
8508 TAILQ_INSERT_TAIL(queue
, kn
, kn_tqe
);
8510 kn
->kn_status
|= KN_QUEUED
;
8512 knote_update_sync_override_state(kn
);
8515 return ((kn
->kn_status
& KN_STAYACTIVE
) != 0);
8519 /* called with kqueue lock held */
8521 knote_dequeue(struct knote
*kn
)
8523 struct kqueue
*kq
= knote_get_kq(kn
);
8524 struct kqtailq
*queue
;
8528 if ((kn
->kn_status
& KN_QUEUED
) == 0)
8531 queue
= knote_get_queue(kn
);
8532 TAILQ_REMOVE(queue
, kn
, kn_tqe
);
8533 kn
->kn_status
&= ~KN_QUEUED
;
8535 knote_update_sync_override_state(kn
);
8541 knote_zone
= zinit(sizeof(struct knote
), 8192*sizeof(struct knote
),
8542 8192, "knote zone");
8544 kqfile_zone
= zinit(sizeof(struct kqfile
), 8192*sizeof(struct kqfile
),
8545 8192, "kqueue file zone");
8547 kqworkq_zone
= zinit(sizeof(struct kqworkq
), 8192*sizeof(struct kqworkq
),
8548 8192, "kqueue workq zone");
8550 kqworkloop_zone
= zinit(sizeof(struct kqworkloop
), 8192*sizeof(struct kqworkloop
),
8551 8192, "kqueue workloop zone");
8553 /* allocate kq lock group attribute and group */
8554 kq_lck_grp_attr
= lck_grp_attr_alloc_init();
8556 kq_lck_grp
= lck_grp_alloc_init("kqueue", kq_lck_grp_attr
);
8558 /* Allocate kq lock attribute */
8559 kq_lck_attr
= lck_attr_alloc_init();
8561 /* Initialize the timer filter lock */
8562 lck_mtx_init(&_filt_timerlock
, kq_lck_grp
, kq_lck_attr
);
8564 /* Initialize the user filter lock */
8565 lck_spin_init(&_filt_userlock
, kq_lck_grp
, kq_lck_attr
);
8567 #if CONFIG_MEMORYSTATUS
8568 /* Initialize the memorystatus list lock */
8569 memorystatus_kevent_init(kq_lck_grp
, kq_lck_attr
);
8572 SYSINIT(knote
, SI_SUB_PSEUDO
, SI_ORDER_ANY
, knote_init
, NULL
)
8574 const struct filterops
*
8575 knote_fops(struct knote
*kn
)
8577 return sysfilt_ops
[kn
->kn_filtid
];
8580 static struct knote
*
8584 kn
= ((struct knote
*)zalloc(knote_zone
));
8585 *kn
= (struct knote
) { .kn_qos_override
= 0, .kn_qos_sync_override
= 0, .kn_qos_override_is_sync
= 0 };
8590 knote_free(struct knote
*kn
)
8592 zfree(knote_zone
, kn
);
8596 #include <sys/param.h>
8597 #include <sys/socket.h>
8598 #include <sys/protosw.h>
8599 #include <sys/domain.h>
8600 #include <sys/mbuf.h>
8601 #include <sys/kern_event.h>
8602 #include <sys/malloc.h>
8603 #include <sys/sys_domain.h>
8604 #include <sys/syslog.h>
8607 #define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t))
8611 #define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n))
8614 static lck_grp_attr_t
*kev_lck_grp_attr
;
8615 static lck_attr_t
*kev_lck_attr
;
8616 static lck_grp_t
*kev_lck_grp
;
8617 static decl_lck_rw_data(,kev_lck_data
);
8618 static lck_rw_t
*kev_rwlock
= &kev_lck_data
;
8620 static int kev_attach(struct socket
*so
, int proto
, struct proc
*p
);
8621 static int kev_detach(struct socket
*so
);
8622 static int kev_control(struct socket
*so
, u_long cmd
, caddr_t data
,
8623 struct ifnet
*ifp
, struct proc
*p
);
8624 static lck_mtx_t
* event_getlock(struct socket
*, int);
8625 static int event_lock(struct socket
*, int, void *);
8626 static int event_unlock(struct socket
*, int, void *);
8628 static int event_sofreelastref(struct socket
*);
8629 static void kev_delete(struct kern_event_pcb
*);
8631 static struct pr_usrreqs event_usrreqs
= {
8632 .pru_attach
= kev_attach
,
8633 .pru_control
= kev_control
,
8634 .pru_detach
= kev_detach
,
8635 .pru_soreceive
= soreceive
,
8638 static struct protosw eventsw
[] = {
8640 .pr_type
= SOCK_RAW
,
8641 .pr_protocol
= SYSPROTO_EVENT
,
8642 .pr_flags
= PR_ATOMIC
,
8643 .pr_usrreqs
= &event_usrreqs
,
8644 .pr_lock
= event_lock
,
8645 .pr_unlock
= event_unlock
,
8646 .pr_getlock
= event_getlock
,
8650 __private_extern__
int kevt_getstat SYSCTL_HANDLER_ARGS
;
8651 __private_extern__
int kevt_pcblist SYSCTL_HANDLER_ARGS
;
8653 SYSCTL_NODE(_net_systm
, OID_AUTO
, kevt
,
8654 CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "Kernel event family");
8656 struct kevtstat kevtstat
;
8657 SYSCTL_PROC(_net_systm_kevt
, OID_AUTO
, stats
,
8658 CTLTYPE_STRUCT
| CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 0,
8659 kevt_getstat
, "S,kevtstat", "");
8661 SYSCTL_PROC(_net_systm_kevt
, OID_AUTO
, pcblist
,
8662 CTLTYPE_STRUCT
| CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 0,
8663 kevt_pcblist
, "S,xkevtpcb", "");
8666 event_getlock(struct socket
*so
, int flags
)
8668 #pragma unused(flags)
8669 struct kern_event_pcb
*ev_pcb
= (struct kern_event_pcb
*)so
->so_pcb
;
8671 if (so
->so_pcb
!= NULL
) {
8672 if (so
->so_usecount
< 0)
8673 panic("%s: so=%p usecount=%d lrh= %s\n", __func__
,
8674 so
, so
->so_usecount
, solockhistory_nr(so
));
8677 panic("%s: so=%p NULL NO so_pcb %s\n", __func__
,
8678 so
, solockhistory_nr(so
));
8681 return (&ev_pcb
->evp_mtx
);
8685 event_lock(struct socket
*so
, int refcount
, void *lr
)
8690 lr_saved
= __builtin_return_address(0);
8694 if (so
->so_pcb
!= NULL
) {
8695 lck_mtx_lock(&((struct kern_event_pcb
*)so
->so_pcb
)->evp_mtx
);
8697 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__
,
8698 so
, lr_saved
, solockhistory_nr(so
));
8702 if (so
->so_usecount
< 0) {
8703 panic("%s: so=%p so_pcb=%p lr=%p ref=%d lrh= %s\n", __func__
,
8704 so
, so
->so_pcb
, lr_saved
, so
->so_usecount
,
8705 solockhistory_nr(so
));
8712 so
->lock_lr
[so
->next_lock_lr
] = lr_saved
;
8713 so
->next_lock_lr
= (so
->next_lock_lr
+1) % SO_LCKDBG_MAX
;
8718 event_unlock(struct socket
*so
, int refcount
, void *lr
)
8721 lck_mtx_t
*mutex_held
;
8724 lr_saved
= __builtin_return_address(0);
8731 if (so
->so_usecount
< 0) {
8732 panic("%s: so=%p usecount=%d lrh= %s\n", __func__
,
8733 so
, so
->so_usecount
, solockhistory_nr(so
));
8736 if (so
->so_pcb
== NULL
) {
8737 panic("%s: so=%p NO PCB usecount=%d lr=%p lrh= %s\n", __func__
,
8738 so
, so
->so_usecount
, (void *)lr_saved
,
8739 solockhistory_nr(so
));
8742 mutex_held
= (&((struct kern_event_pcb
*)so
->so_pcb
)->evp_mtx
);
8744 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
8745 so
->unlock_lr
[so
->next_unlock_lr
] = lr_saved
;
8746 so
->next_unlock_lr
= (so
->next_unlock_lr
+1) % SO_LCKDBG_MAX
;
8748 if (so
->so_usecount
== 0) {
8749 VERIFY(so
->so_flags
& SOF_PCBCLEARING
);
8750 event_sofreelastref(so
);
8752 lck_mtx_unlock(mutex_held
);
8759 event_sofreelastref(struct socket
*so
)
8761 struct kern_event_pcb
*ev_pcb
= (struct kern_event_pcb
*)so
->so_pcb
;
8763 LCK_MTX_ASSERT(&(ev_pcb
->evp_mtx
), LCK_MTX_ASSERT_OWNED
);
8768 * Disable upcall in the event another thread is in kev_post_msg()
8769 * appending record to the receive socket buffer, since sbwakeup()
8770 * may release the socket lock otherwise.
8772 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
8773 so
->so_snd
.sb_flags
&= ~SB_UPCALL
;
8774 so
->so_event
= sonullevent
;
8775 lck_mtx_unlock(&(ev_pcb
->evp_mtx
));
8777 LCK_MTX_ASSERT(&(ev_pcb
->evp_mtx
), LCK_MTX_ASSERT_NOTOWNED
);
8778 lck_rw_lock_exclusive(kev_rwlock
);
8779 LIST_REMOVE(ev_pcb
, evp_link
);
8780 kevtstat
.kes_pcbcount
--;
8781 kevtstat
.kes_gencnt
++;
8782 lck_rw_done(kev_rwlock
);
8785 sofreelastref(so
, 1);
8789 static int event_proto_count
= (sizeof (eventsw
) / sizeof (struct protosw
));
8792 struct kern_event_head kern_event_head
;
8794 static u_int32_t static_event_id
= 0;
8796 #define EVPCB_ZONE_MAX 65536
8797 #define EVPCB_ZONE_NAME "kerneventpcb"
8798 static struct zone
*ev_pcb_zone
;
8801 * Install the protosw's for the NKE manager. Invoked at extension load time
8804 kern_event_init(struct domain
*dp
)
8809 VERIFY(!(dp
->dom_flags
& DOM_INITIALIZED
));
8810 VERIFY(dp
== systemdomain
);
8812 kev_lck_grp_attr
= lck_grp_attr_alloc_init();
8813 if (kev_lck_grp_attr
== NULL
) {
8814 panic("%s: lck_grp_attr_alloc_init failed\n", __func__
);
8818 kev_lck_grp
= lck_grp_alloc_init("Kernel Event Protocol",
8820 if (kev_lck_grp
== NULL
) {
8821 panic("%s: lck_grp_alloc_init failed\n", __func__
);
8825 kev_lck_attr
= lck_attr_alloc_init();
8826 if (kev_lck_attr
== NULL
) {
8827 panic("%s: lck_attr_alloc_init failed\n", __func__
);
8831 lck_rw_init(kev_rwlock
, kev_lck_grp
, kev_lck_attr
);
8832 if (kev_rwlock
== NULL
) {
8833 panic("%s: lck_mtx_alloc_init failed\n", __func__
);
8837 for (i
= 0, pr
= &eventsw
[0]; i
< event_proto_count
; i
++, pr
++)
8838 net_add_proto(pr
, dp
, 1);
8840 ev_pcb_zone
= zinit(sizeof(struct kern_event_pcb
),
8841 EVPCB_ZONE_MAX
* sizeof(struct kern_event_pcb
), 0, EVPCB_ZONE_NAME
);
8842 if (ev_pcb_zone
== NULL
) {
8843 panic("%s: failed allocating ev_pcb_zone", __func__
);
8846 zone_change(ev_pcb_zone
, Z_EXPAND
, TRUE
);
8847 zone_change(ev_pcb_zone
, Z_CALLERACCT
, TRUE
);
8851 kev_attach(struct socket
*so
, __unused
int proto
, __unused
struct proc
*p
)
8854 struct kern_event_pcb
*ev_pcb
;
8856 error
= soreserve(so
, KEV_SNDSPACE
, KEV_RECVSPACE
);
8860 if ((ev_pcb
= (struct kern_event_pcb
*)zalloc(ev_pcb_zone
)) == NULL
) {
8863 bzero(ev_pcb
, sizeof(struct kern_event_pcb
));
8864 lck_mtx_init(&ev_pcb
->evp_mtx
, kev_lck_grp
, kev_lck_attr
);
8866 ev_pcb
->evp_socket
= so
;
8867 ev_pcb
->evp_vendor_code_filter
= 0xffffffff;
8869 so
->so_pcb
= (caddr_t
) ev_pcb
;
8870 lck_rw_lock_exclusive(kev_rwlock
);
8871 LIST_INSERT_HEAD(&kern_event_head
, ev_pcb
, evp_link
);
8872 kevtstat
.kes_pcbcount
++;
8873 kevtstat
.kes_gencnt
++;
8874 lck_rw_done(kev_rwlock
);
8880 kev_delete(struct kern_event_pcb
*ev_pcb
)
8882 VERIFY(ev_pcb
!= NULL
);
8883 lck_mtx_destroy(&ev_pcb
->evp_mtx
, kev_lck_grp
);
8884 zfree(ev_pcb_zone
, ev_pcb
);
8888 kev_detach(struct socket
*so
)
8890 struct kern_event_pcb
*ev_pcb
= (struct kern_event_pcb
*) so
->so_pcb
;
8892 if (ev_pcb
!= NULL
) {
8893 soisdisconnected(so
);
8894 so
->so_flags
|= SOF_PCBCLEARING
;
8901 * For now, kev_vendor_code and mbuf_tags use the same
8904 errno_t
kev_vendor_code_find(
8906 u_int32_t
*out_vendor_code
)
8908 if (strlen(string
) >= KEV_VENDOR_CODE_MAX_STR_LEN
) {
8911 return (net_str_id_find_internal(string
, out_vendor_code
,
8912 NSI_VENDOR_CODE
, 1));
8916 kev_msg_post(struct kev_msg
*event_msg
)
8918 mbuf_tag_id_t min_vendor
, max_vendor
;
8920 net_str_id_first_last(&min_vendor
, &max_vendor
, NSI_VENDOR_CODE
);
8922 if (event_msg
== NULL
)
8926 * Limit third parties to posting events for registered vendor codes
8929 if (event_msg
->vendor_code
< min_vendor
||
8930 event_msg
->vendor_code
> max_vendor
) {
8931 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_badvendor
);
8934 return (kev_post_msg(event_msg
));
8938 kev_post_msg(struct kev_msg
*event_msg
)
8940 struct mbuf
*m
, *m2
;
8941 struct kern_event_pcb
*ev_pcb
;
8942 struct kern_event_msg
*ev
;
8944 u_int32_t total_size
;
8947 /* Verify the message is small enough to fit in one mbuf w/o cluster */
8948 total_size
= KEV_MSG_HEADER_SIZE
;
8950 for (i
= 0; i
< 5; i
++) {
8951 if (event_msg
->dv
[i
].data_length
== 0)
8953 total_size
+= event_msg
->dv
[i
].data_length
;
8956 if (total_size
> MLEN
) {
8957 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_toobig
);
8961 m
= m_get(M_WAIT
, MT_DATA
);
8963 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_nomem
);
8966 ev
= mtod(m
, struct kern_event_msg
*);
8967 total_size
= KEV_MSG_HEADER_SIZE
;
8969 tmp
= (char *) &ev
->event_data
[0];
8970 for (i
= 0; i
< 5; i
++) {
8971 if (event_msg
->dv
[i
].data_length
== 0)
8974 total_size
+= event_msg
->dv
[i
].data_length
;
8975 bcopy(event_msg
->dv
[i
].data_ptr
, tmp
,
8976 event_msg
->dv
[i
].data_length
);
8977 tmp
+= event_msg
->dv
[i
].data_length
;
8980 ev
->id
= ++static_event_id
;
8981 ev
->total_size
= total_size
;
8982 ev
->vendor_code
= event_msg
->vendor_code
;
8983 ev
->kev_class
= event_msg
->kev_class
;
8984 ev
->kev_subclass
= event_msg
->kev_subclass
;
8985 ev
->event_code
= event_msg
->event_code
;
8987 m
->m_len
= total_size
;
8988 lck_rw_lock_shared(kev_rwlock
);
8989 for (ev_pcb
= LIST_FIRST(&kern_event_head
);
8991 ev_pcb
= LIST_NEXT(ev_pcb
, evp_link
)) {
8992 lck_mtx_lock(&ev_pcb
->evp_mtx
);
8993 if (ev_pcb
->evp_socket
->so_pcb
== NULL
) {
8994 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
8997 if (ev_pcb
->evp_vendor_code_filter
!= KEV_ANY_VENDOR
) {
8998 if (ev_pcb
->evp_vendor_code_filter
!= ev
->vendor_code
) {
8999 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9003 if (ev_pcb
->evp_class_filter
!= KEV_ANY_CLASS
) {
9004 if (ev_pcb
->evp_class_filter
!= ev
->kev_class
) {
9005 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9009 if ((ev_pcb
->evp_subclass_filter
!=
9010 KEV_ANY_SUBCLASS
) &&
9011 (ev_pcb
->evp_subclass_filter
!=
9012 ev
->kev_subclass
)) {
9013 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9019 m2
= m_copym(m
, 0, m
->m_len
, M_WAIT
);
9021 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_nomem
);
9023 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9024 lck_rw_done(kev_rwlock
);
9027 if (sbappendrecord(&ev_pcb
->evp_socket
->so_rcv
, m2
)) {
9029 * We use "m" for the socket stats as it would be
9030 * unsafe to use "m2"
9032 so_inc_recv_data_stat(ev_pcb
->evp_socket
,
9033 1, m
->m_len
, MBUF_TC_BE
);
9035 sorwakeup(ev_pcb
->evp_socket
);
9036 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_posted
);
9038 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_fullsock
);
9040 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9043 lck_rw_done(kev_rwlock
);
9049 kev_control(struct socket
*so
,
9052 __unused
struct ifnet
*ifp
,
9053 __unused
struct proc
*p
)
9055 struct kev_request
*kev_req
= (struct kev_request
*) data
;
9056 struct kern_event_pcb
*ev_pcb
;
9057 struct kev_vendor_code
*kev_vendor
;
9058 u_int32_t
*id_value
= (u_int32_t
*) data
;
9062 *id_value
= static_event_id
;
9065 ev_pcb
= (struct kern_event_pcb
*) so
->so_pcb
;
9066 ev_pcb
->evp_vendor_code_filter
= kev_req
->vendor_code
;
9067 ev_pcb
->evp_class_filter
= kev_req
->kev_class
;
9068 ev_pcb
->evp_subclass_filter
= kev_req
->kev_subclass
;
9071 ev_pcb
= (struct kern_event_pcb
*) so
->so_pcb
;
9072 kev_req
->vendor_code
= ev_pcb
->evp_vendor_code_filter
;
9073 kev_req
->kev_class
= ev_pcb
->evp_class_filter
;
9074 kev_req
->kev_subclass
= ev_pcb
->evp_subclass_filter
;
9076 case SIOCGKEVVENDOR
:
9077 kev_vendor
= (struct kev_vendor_code
*)data
;
9078 /* Make sure string is NULL terminated */
9079 kev_vendor
->vendor_string
[KEV_VENDOR_CODE_MAX_STR_LEN
-1] = 0;
9080 return (net_str_id_find_internal(kev_vendor
->vendor_string
,
9081 &kev_vendor
->vendor_code
, NSI_VENDOR_CODE
, 0));
9090 kevt_getstat SYSCTL_HANDLER_ARGS
9092 #pragma unused(oidp, arg1, arg2)
9095 lck_rw_lock_shared(kev_rwlock
);
9097 if (req
->newptr
!= USER_ADDR_NULL
) {
9101 if (req
->oldptr
== USER_ADDR_NULL
) {
9102 req
->oldidx
= sizeof(struct kevtstat
);
9106 error
= SYSCTL_OUT(req
, &kevtstat
,
9107 MIN(sizeof(struct kevtstat
), req
->oldlen
));
9109 lck_rw_done(kev_rwlock
);
9114 __private_extern__
int
9115 kevt_pcblist SYSCTL_HANDLER_ARGS
9117 #pragma unused(oidp, arg1, arg2)
9120 struct xsystmgen xsg
;
9122 size_t item_size
= ROUNDUP64(sizeof (struct xkevtpcb
)) +
9123 ROUNDUP64(sizeof (struct xsocket_n
)) +
9124 2 * ROUNDUP64(sizeof (struct xsockbuf_n
)) +
9125 ROUNDUP64(sizeof (struct xsockstat_n
));
9126 struct kern_event_pcb
*ev_pcb
;
9128 buf
= _MALLOC(item_size
, M_TEMP
, M_WAITOK
| M_ZERO
);
9132 lck_rw_lock_shared(kev_rwlock
);
9134 n
= kevtstat
.kes_pcbcount
;
9136 if (req
->oldptr
== USER_ADDR_NULL
) {
9137 req
->oldidx
= (n
+ n
/8) * item_size
;
9140 if (req
->newptr
!= USER_ADDR_NULL
) {
9144 bzero(&xsg
, sizeof (xsg
));
9145 xsg
.xg_len
= sizeof (xsg
);
9147 xsg
.xg_gen
= kevtstat
.kes_gencnt
;
9148 xsg
.xg_sogen
= so_gencnt
;
9149 error
= SYSCTL_OUT(req
, &xsg
, sizeof (xsg
));
9154 * We are done if there is no pcb
9161 for (i
= 0, ev_pcb
= LIST_FIRST(&kern_event_head
);
9162 i
< n
&& ev_pcb
!= NULL
;
9163 i
++, ev_pcb
= LIST_NEXT(ev_pcb
, evp_link
)) {
9164 struct xkevtpcb
*xk
= (struct xkevtpcb
*)buf
;
9165 struct xsocket_n
*xso
= (struct xsocket_n
*)
9166 ADVANCE64(xk
, sizeof (*xk
));
9167 struct xsockbuf_n
*xsbrcv
= (struct xsockbuf_n
*)
9168 ADVANCE64(xso
, sizeof (*xso
));
9169 struct xsockbuf_n
*xsbsnd
= (struct xsockbuf_n
*)
9170 ADVANCE64(xsbrcv
, sizeof (*xsbrcv
));
9171 struct xsockstat_n
*xsostats
= (struct xsockstat_n
*)
9172 ADVANCE64(xsbsnd
, sizeof (*xsbsnd
));
9174 bzero(buf
, item_size
);
9176 lck_mtx_lock(&ev_pcb
->evp_mtx
);
9178 xk
->kep_len
= sizeof(struct xkevtpcb
);
9179 xk
->kep_kind
= XSO_EVT
;
9180 xk
->kep_evtpcb
= (uint64_t)VM_KERNEL_ADDRPERM(ev_pcb
);
9181 xk
->kep_vendor_code_filter
= ev_pcb
->evp_vendor_code_filter
;
9182 xk
->kep_class_filter
= ev_pcb
->evp_class_filter
;
9183 xk
->kep_subclass_filter
= ev_pcb
->evp_subclass_filter
;
9185 sotoxsocket_n(ev_pcb
->evp_socket
, xso
);
9186 sbtoxsockbuf_n(ev_pcb
->evp_socket
?
9187 &ev_pcb
->evp_socket
->so_rcv
: NULL
, xsbrcv
);
9188 sbtoxsockbuf_n(ev_pcb
->evp_socket
?
9189 &ev_pcb
->evp_socket
->so_snd
: NULL
, xsbsnd
);
9190 sbtoxsockstat_n(ev_pcb
->evp_socket
, xsostats
);
9192 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9194 error
= SYSCTL_OUT(req
, buf
, item_size
);
9199 * Give the user an updated idea of our state.
9200 * If the generation differs from what we told
9201 * her before, she knows that something happened
9202 * while we were processing this request, and it
9203 * might be necessary to retry.
9205 bzero(&xsg
, sizeof (xsg
));
9206 xsg
.xg_len
= sizeof (xsg
);
9208 xsg
.xg_gen
= kevtstat
.kes_gencnt
;
9209 xsg
.xg_sogen
= so_gencnt
;
9210 error
= SYSCTL_OUT(req
, &xsg
, sizeof (xsg
));
9217 lck_rw_done(kev_rwlock
);
9222 #endif /* SOCKETS */
9226 fill_kqueueinfo(struct kqueue
*kq
, struct kqueue_info
* kinfo
)
9228 struct vinfo_stat
* st
;
9230 st
= &kinfo
->kq_stat
;
9232 st
->vst_size
= kq
->kq_count
;
9233 if (kq
->kq_state
& KQ_KEV_QOS
)
9234 st
->vst_blksize
= sizeof(struct kevent_qos_s
);
9235 else if (kq
->kq_state
& KQ_KEV64
)
9236 st
->vst_blksize
= sizeof(struct kevent64_s
);
9238 st
->vst_blksize
= sizeof(struct kevent
);
9239 st
->vst_mode
= S_IFIFO
;
9240 st
->vst_ino
= (kq
->kq_state
& KQ_DYNAMIC
) ?
9241 ((struct kqworkloop
*)kq
)->kqwl_dynamicid
: 0;
9243 /* flags exported to libproc as PROC_KQUEUE_* (sys/proc_info.h) */
9244 #define PROC_KQUEUE_MASK (KQ_SEL|KQ_SLEEP|KQ_KEV32|KQ_KEV64|KQ_KEV_QOS|KQ_WORKQ|KQ_WORKLOOP)
9245 kinfo
->kq_state
= kq
->kq_state
& PROC_KQUEUE_MASK
;
9251 fill_kqueue_dyninfo(struct kqueue
*kq
, struct kqueue_dyninfo
*kqdi
)
9253 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
9254 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
9257 if ((kq
->kq_state
& KQ_WORKLOOP
) == 0) {
9261 if ((err
= fill_kqueueinfo(kq
, &kqdi
->kqdi_info
))) {
9265 kqwl_req_lock(kqwl
);
9267 if (kqr
->kqr_thread
) {
9268 kqdi
->kqdi_servicer
= thread_tid(kqr
->kqr_thread
);
9271 if (kqwl
->kqwl_owner
== WL_OWNER_SUSPENDED
) {
9272 kqdi
->kqdi_owner
= ~0ull;
9274 kqdi
->kqdi_owner
= thread_tid(kqwl
->kqwl_owner
);
9277 kqdi
->kqdi_request_state
= kqr
->kqr_state
;
9278 kqdi
->kqdi_async_qos
= kqr
->kqr_qos_index
;
9279 kqdi
->kqdi_events_qos
= kqr
->kqr_override_index
;
9280 kqdi
->kqdi_sync_waiters
= kqr
->kqr_dsync_waiters
;
9281 kqdi
->kqdi_sync_waiter_qos
= kqr
->kqr_dsync_waiters_qos
;
9283 kqwl_req_unlock(kqwl
);
9290 knote_markstayactive(struct knote
*kn
)
9292 struct kqueue
*kq
= knote_get_kq(kn
);
9295 kn
->kn_status
|= KN_STAYACTIVE
;
9298 * Making a knote stay active is a property of the knote that must be
9299 * established before it is fully attached.
9301 assert(kn
->kn_status
& KN_ATTACHING
);
9303 /* handle all stayactive knotes on the (appropriate) manager */
9304 if (kq
->kq_state
& KQ_WORKQ
) {
9305 knote_set_qos_index(kn
, KQWQ_QOS_MANAGER
);
9306 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
9307 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
9308 kqwl_req_lock(kqwl
);
9309 assert(kn
->kn_req_index
&& kn
->kn_req_index
< THREAD_QOS_LAST
);
9310 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_UPDATE_STAYACTIVE_QOS
,
9312 kqwl_req_unlock(kqwl
);
9313 knote_set_qos_index(kn
, KQWL_BUCKET_STAYACTIVE
);
9321 knote_clearstayactive(struct knote
*kn
)
9323 kqlock(knote_get_kq(kn
));
9324 kn
->kn_status
&= ~KN_STAYACTIVE
;
9325 knote_deactivate(kn
);
9326 kqunlock(knote_get_kq(kn
));
9329 static unsigned long
9330 kevent_extinfo_emit(struct kqueue
*kq
, struct knote
*kn
, struct kevent_extinfo
*buf
,
9331 unsigned long buflen
, unsigned long nknotes
)
9333 for (; kn
; kn
= SLIST_NEXT(kn
, kn_link
)) {
9334 if (kq
== knote_get_kq(kn
)) {
9335 if (nknotes
< buflen
) {
9336 struct kevent_extinfo
*info
= &buf
[nknotes
];
9337 struct kevent_internal_s
*kevp
= &kn
->kn_kevent
;
9341 info
->kqext_kev
= (struct kevent_qos_s
){
9342 .ident
= kevp
->ident
,
9343 .filter
= kevp
->filter
,
9344 .flags
= kevp
->flags
,
9345 .fflags
= kevp
->fflags
,
9346 .data
= (int64_t)kevp
->data
,
9347 .udata
= kevp
->udata
,
9348 .ext
[0] = kevp
->ext
[0],
9349 .ext
[1] = kevp
->ext
[1],
9350 .ext
[2] = kevp
->ext
[2],
9351 .ext
[3] = kevp
->ext
[3],
9352 .qos
= kn
->kn_req_index
,
9354 info
->kqext_sdata
= kn
->kn_sdata
;
9355 info
->kqext_status
= kn
->kn_status
;
9356 info
->kqext_sfflags
= kn
->kn_sfflags
;
9361 /* we return total number of knotes, which may be more than requested */
9370 kevent_copyout_proc_dynkqids(void *proc
, user_addr_t ubuf
, uint32_t ubufsize
,
9371 int32_t *nkqueues_out
)
9373 proc_t p
= (proc_t
)proc
;
9374 struct filedesc
*fdp
= p
->p_fd
;
9375 unsigned int nkqueues
= 0;
9376 unsigned long ubuflen
= ubufsize
/ sizeof(kqueue_id_t
);
9377 size_t buflen
, bufsize
;
9378 kqueue_id_t
*kq_ids
= NULL
;
9383 if (ubuf
== USER_ADDR_NULL
&& ubufsize
!= 0) {
9388 buflen
= min(ubuflen
, PROC_PIDDYNKQUEUES_MAX
);
9391 if (os_mul_overflow(sizeof(kqueue_id_t
), buflen
, &bufsize
)) {
9395 kq_ids
= kalloc(bufsize
);
9396 assert(kq_ids
!= NULL
);
9401 if (fdp
->fd_kqhashmask
> 0) {
9402 for (uint32_t i
= 0; i
< fdp
->fd_kqhashmask
+ 1; i
++) {
9403 struct kqworkloop
*kqwl
;
9405 SLIST_FOREACH(kqwl
, &fdp
->fd_kqhash
[i
], kqwl_hashlink
) {
9406 /* report the number of kqueues, even if they don't all fit */
9407 if (nkqueues
< buflen
) {
9408 kq_ids
[nkqueues
] = kqwl
->kqwl_dynamicid
;
9419 if (os_mul_overflow(sizeof(kqueue_id_t
), min(ubuflen
, nkqueues
), ©size
)) {
9424 assert(ubufsize
>= copysize
);
9425 err
= copyout(kq_ids
, ubuf
, copysize
);
9430 kfree(kq_ids
, bufsize
);
9434 *nkqueues_out
= (int)min(nkqueues
, PROC_PIDDYNKQUEUES_MAX
);
9440 kevent_copyout_dynkqinfo(void *proc
, kqueue_id_t kq_id
, user_addr_t ubuf
,
9441 uint32_t ubufsize
, int32_t *size_out
)
9443 proc_t p
= (proc_t
)proc
;
9446 struct kqueue_dyninfo kqdi
= { };
9450 if (ubufsize
< sizeof(struct kqueue_info
)) {
9455 kq
= kqueue_hash_lookup(p
, kq_id
);
9464 * backward compatibility: allow the argument to this call to only be
9465 * a struct kqueue_info
9467 if (ubufsize
>= sizeof(struct kqueue_dyninfo
)) {
9468 ubufsize
= sizeof(struct kqueue_dyninfo
);
9469 err
= fill_kqueue_dyninfo(kq
, &kqdi
);
9471 ubufsize
= sizeof(struct kqueue_info
);
9472 err
= fill_kqueueinfo(kq
, &kqdi
.kqdi_info
);
9474 if (err
== 0 && (err
= copyout(&kqdi
, ubuf
, ubufsize
)) == 0) {
9475 *size_out
= ubufsize
;
9477 kqueue_release_last(p
, kq
);
9482 kevent_copyout_dynkqextinfo(void *proc
, kqueue_id_t kq_id
, user_addr_t ubuf
,
9483 uint32_t ubufsize
, int32_t *nknotes_out
)
9485 proc_t p
= (proc_t
)proc
;
9492 kq
= kqueue_hash_lookup(p
, kq_id
);
9500 err
= pid_kqueue_extinfo(p
, kq
, ubuf
, ubufsize
, nknotes_out
);
9501 kqueue_release_last(p
, kq
);
9506 pid_kqueue_extinfo(proc_t p
, struct kqueue
*kq
, user_addr_t ubuf
,
9507 uint32_t bufsize
, int32_t *retval
)
9512 struct filedesc
*fdp
= p
->p_fd
;
9513 unsigned long nknotes
= 0;
9514 unsigned long buflen
= bufsize
/ sizeof(struct kevent_extinfo
);
9515 struct kevent_extinfo
*kqext
= NULL
;
9517 /* arbitrary upper limit to cap kernel memory usage, copyout size, etc. */
9518 buflen
= min(buflen
, PROC_PIDFDKQUEUE_KNOTES_MAX
);
9520 kqext
= kalloc(buflen
* sizeof(struct kevent_extinfo
));
9521 if (kqext
== NULL
) {
9525 bzero(kqext
, buflen
* sizeof(struct kevent_extinfo
));
9528 for (i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
9529 kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
]);
9530 nknotes
= kevent_extinfo_emit(kq
, kn
, kqext
, buflen
, nknotes
);
9534 if (fdp
->fd_knhashmask
!= 0) {
9535 for (i
= 0; i
< (int)fdp
->fd_knhashmask
+ 1; i
++) {
9537 kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
]);
9538 nknotes
= kevent_extinfo_emit(kq
, kn
, kqext
, buflen
, nknotes
);
9543 assert(bufsize
>= sizeof(struct kevent_extinfo
) * min(buflen
, nknotes
));
9544 err
= copyout(kqext
, ubuf
, sizeof(struct kevent_extinfo
) * min(buflen
, nknotes
));
9548 kfree(kqext
, buflen
* sizeof(struct kevent_extinfo
));
9553 *retval
= min(nknotes
, PROC_PIDFDKQUEUE_KNOTES_MAX
);
9559 klist_copy_udata(struct klist
*list
, uint64_t *buf
,
9560 unsigned int buflen
, unsigned int nknotes
)
9562 struct kevent_internal_s
*kev
;
9564 SLIST_FOREACH(kn
, list
, kn_link
) {
9565 if (nknotes
< buflen
) {
9566 struct kqueue
*kq
= knote_get_kq(kn
);
9568 kev
= &(kn
->kn_kevent
);
9569 buf
[nknotes
] = kev
->udata
;
9572 /* we return total number of knotes, which may be more than requested */
9580 kqlist_copy_dynamicids(__assert_only proc_t p
, struct kqlist
*list
,
9581 uint64_t *buf
, unsigned int buflen
, unsigned int nids
)
9583 kqhash_lock_held(p
);
9584 struct kqworkloop
*kqwl
;
9585 SLIST_FOREACH(kqwl
, list
, kqwl_hashlink
) {
9586 if (nids
< buflen
) {
9587 buf
[nids
] = kqwl
->kqwl_dynamicid
;
9595 kevent_proc_copy_uptrs(void *proc
, uint64_t *buf
, int bufsize
)
9597 proc_t p
= (proc_t
)proc
;
9598 struct filedesc
*fdp
= p
->p_fd
;
9599 unsigned int nuptrs
= 0;
9600 unsigned long buflen
= bufsize
/ sizeof(uint64_t);
9603 assert(buf
!= NULL
);
9607 for (int i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
9608 nuptrs
= klist_copy_udata(&fdp
->fd_knlist
[i
], buf
, buflen
, nuptrs
);
9612 if (fdp
->fd_knhashmask
!= 0) {
9613 for (int i
= 0; i
< (int)fdp
->fd_knhashmask
+ 1; i
++) {
9614 nuptrs
= klist_copy_udata(&fdp
->fd_knhash
[i
], buf
, buflen
, nuptrs
);
9620 if (fdp
->fd_kqhashmask
!= 0) {
9621 for (int i
= 0; i
< (int)fdp
->fd_kqhashmask
+ 1; i
++) {
9622 nuptrs
= kqlist_copy_dynamicids(p
, &fdp
->fd_kqhash
[i
], buf
, buflen
,
9632 kevent_redrive_proc_thread_request(proc_t p
)
9634 __assert_only
int ret
;
9635 ret
= (*pthread_functions
->workq_threadreq
)(p
, NULL
, WORKQ_THREADREQ_REDRIVE
, 0, 0);
9636 assert(ret
== 0 || ret
== ECANCELED
);
9640 kevent_set_return_to_kernel_user_tsd(proc_t p
, thread_t thread
)
9643 bool proc_is_64bit
= !!(p
->p_flag
& P_LP64
);
9644 size_t user_addr_size
= proc_is_64bit
? 8 : 4;
9645 uint32_t ast_flags32
= 0;
9646 uint64_t ast_flags64
= 0;
9647 struct uthread
*ut
= get_bsdthread_info(thread
);
9649 if (ut
->uu_kqueue_bound
!= NULL
) {
9650 if (ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKLOOP
) {
9651 ast_flags64
|= R2K_WORKLOOP_PENDING_EVENTS
;
9652 } else if (ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ
) {
9653 ast_flags64
|= R2K_WORKQ_PENDING_EVENTS
;
9657 if (ast_flags64
== 0) {
9661 if (!(p
->p_flag
& P_LP64
)) {
9662 ast_flags32
= (uint32_t)ast_flags64
;
9663 assert(ast_flags64
< 0x100000000ull
);
9666 ast_addr
= thread_rettokern_addr(thread
);
9667 if (ast_addr
== 0) {
9671 if (copyout((proc_is_64bit
? (void *)&ast_flags64
: (void *)&ast_flags32
),
9672 (user_addr_t
)ast_addr
,
9673 user_addr_size
) != 0) {
9674 printf("pid %d (tid:%llu): copyout of return_to_kernel ast flags failed with "
9675 "ast_addr = %llu\n", p
->p_pid
, thread_tid(current_thread()), ast_addr
);
9680 kevent_ast(thread_t thread
, uint16_t bits
)
9682 proc_t p
= current_proc();
9684 if (bits
& AST_KEVENT_REDRIVE_THREADREQ
) {
9685 kevent_redrive_proc_thread_request(p
);
9687 if (bits
& AST_KEVENT_RETURN_TO_KERNEL
) {
9688 kevent_set_return_to_kernel_user_tsd(p
, thread
);
9692 #if DEVELOPMENT || DEBUG
9694 #define KEVENT_SYSCTL_BOUND_ID 1
9697 kevent_sysctl SYSCTL_HANDLER_ARGS
9699 #pragma unused(oidp, arg2)
9700 uintptr_t type
= (uintptr_t)arg1
;
9701 uint64_t bound_id
= 0;
9705 if (type
!= KEVENT_SYSCTL_BOUND_ID
) {
9713 ut
= get_bsdthread_info(current_thread());
9718 kq
= ut
->uu_kqueue_bound
;
9720 if (kq
->kq_state
& KQ_WORKLOOP
) {
9721 bound_id
= ((struct kqworkloop
*)kq
)->kqwl_dynamicid
;
9722 } else if (kq
->kq_state
& KQ_WORKQ
) {
9727 return sysctl_io_number(req
, bound_id
, sizeof(bound_id
), NULL
, NULL
);
9730 SYSCTL_NODE(_kern
, OID_AUTO
, kevent
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0,
9731 "kevent information");
9733 SYSCTL_PROC(_kern_kevent
, OID_AUTO
, bound_id
,
9734 CTLTYPE_QUAD
| CTLFLAG_RD
| CTLFLAG_LOCKED
| CTLFLAG_MASKED
,
9735 (void *)KEVENT_SYSCTL_BOUND_ID
,
9736 sizeof(kqueue_id_t
), kevent_sysctl
, "Q",
9737 "get the ID of the bound kqueue");
9739 #endif /* DEVELOPMENT || DEBUG */