2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
31 * All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * @(#)kern_event.c 1.0 (3/31/2000)
58 #include <stdatomic.h>
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/filedesc.h>
63 #include <sys/kernel.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/malloc.h>
67 #include <sys/unistd.h>
68 #include <sys/file_internal.h>
69 #include <sys/fcntl.h>
70 #include <sys/select.h>
71 #include <sys/queue.h>
72 #include <sys/event.h>
73 #include <sys/eventvar.h>
74 #include <sys/protosw.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
78 #include <sys/sysctl.h>
80 #include <sys/sysproto.h>
82 #include <sys/vnode_internal.h>
84 #include <sys/proc_info.h>
85 #include <sys/codesign.h>
86 #include <sys/pthread_shims.h>
87 #include <sys/kdebug.h>
88 #include <sys/reason.h>
89 #include <os/reason_private.h>
91 #include <kern/locks.h>
92 #include <kern/clock.h>
93 #include <kern/cpu_data.h>
94 #include <kern/policy_internal.h>
95 #include <kern/thread_call.h>
96 #include <kern/sched_prim.h>
97 #include <kern/waitq.h>
98 #include <kern/zalloc.h>
99 #include <kern/kalloc.h>
100 #include <kern/assert.h>
101 #include <kern/ast.h>
102 #include <kern/thread.h>
103 #include <kern/kcdata.h>
105 #include <libkern/libkern.h>
106 #include <libkern/OSAtomic.h>
108 #include "net/net_str_id.h"
110 #include <mach/task.h>
111 #include <libkern/section_keywords.h>
113 #if CONFIG_MEMORYSTATUS
114 #include <sys/kern_memorystatus.h>
117 extern thread_t
port_name_to_thread(mach_port_name_t port_name
); /* osfmk/kern/ipc_tt.h */
118 extern mach_port_name_t
ipc_entry_name_mask(mach_port_name_t name
); /* osfmk/ipc/ipc_entry.h */
120 #define KEV_EVTID(code) BSDDBG_CODE(DBG_BSD_KEVENT, (code))
123 * JMM - this typedef needs to be unified with pthread_priority_t
124 * and mach_msg_priority_t. It also needs to be the same type
127 typedef int32_t qos_t
;
129 MALLOC_DEFINE(M_KQUEUE
, "kqueue", "memory for kqueue system");
131 #define KQ_EVENT NO_EVENT64
133 #define KNUSE_NONE 0x0
134 #define KNUSE_STEAL_DROP 0x1
135 #define KNUSE_BOOST 0x2
136 static int kqlock2knoteuse(struct kqueue
*kq
, struct knote
*kn
, int flags
);
137 static int kqlock2knotedrop(struct kqueue
*kq
, struct knote
*kn
);
138 static int kqlock2knotedetach(struct kqueue
*kq
, struct knote
*kn
, int flags
);
139 static int knoteuse2kqlock(struct kqueue
*kq
, struct knote
*kn
, int flags
);
141 static int kqueue_read(struct fileproc
*fp
, struct uio
*uio
,
142 int flags
, vfs_context_t ctx
);
143 static int kqueue_write(struct fileproc
*fp
, struct uio
*uio
,
144 int flags
, vfs_context_t ctx
);
145 static int kqueue_ioctl(struct fileproc
*fp
, u_long com
, caddr_t data
,
147 static int kqueue_select(struct fileproc
*fp
, int which
, void *wq_link_id
,
149 static int kqueue_close(struct fileglob
*fg
, vfs_context_t ctx
);
150 static int kqueue_kqfilter(struct fileproc
*fp
, struct knote
*kn
,
151 struct kevent_internal_s
*kev
, vfs_context_t ctx
);
152 static int kqueue_drain(struct fileproc
*fp
, vfs_context_t ctx
);
154 static const struct fileops kqueueops
= {
155 .fo_type
= DTYPE_KQUEUE
,
156 .fo_read
= kqueue_read
,
157 .fo_write
= kqueue_write
,
158 .fo_ioctl
= kqueue_ioctl
,
159 .fo_select
= kqueue_select
,
160 .fo_close
= kqueue_close
,
161 .fo_kqfilter
= kqueue_kqfilter
,
162 .fo_drain
= kqueue_drain
,
165 static void kevent_put_kq(struct proc
*p
, kqueue_id_t id
, struct fileproc
*fp
, struct kqueue
*kq
);
166 static int kevent_internal(struct proc
*p
,
167 kqueue_id_t id
, kqueue_id_t
*id_out
,
168 user_addr_t changelist
, int nchanges
,
169 user_addr_t eventlist
, int nevents
,
170 user_addr_t data_out
, uint64_t data_available
,
171 unsigned int flags
, user_addr_t utimeout
,
172 kqueue_continue_t continuation
,
174 static int kevent_copyin(user_addr_t
*addrp
, struct kevent_internal_s
*kevp
,
175 struct proc
*p
, unsigned int flags
);
176 static int kevent_copyout(struct kevent_internal_s
*kevp
, user_addr_t
*addrp
,
177 struct proc
*p
, unsigned int flags
);
178 char * kevent_description(struct kevent_internal_s
*kevp
, char *s
, size_t n
);
180 static void kqueue_interrupt(struct kqueue
*kq
);
181 static int kevent_callback(struct kqueue
*kq
, struct kevent_internal_s
*kevp
,
183 static void kevent_continue(struct kqueue
*kq
, void *data
, int error
);
184 static void kqueue_scan_continue(void *contp
, wait_result_t wait_result
);
185 static int kqueue_process(struct kqueue
*kq
, kevent_callback_t callback
, void *callback_data
,
186 struct filt_process_s
*process_data
, int *countp
, struct proc
*p
);
187 static struct kqtailq
*kqueue_get_base_queue(struct kqueue
*kq
, kq_index_t qos_index
);
188 static struct kqtailq
*kqueue_get_high_queue(struct kqueue
*kq
, kq_index_t qos_index
);
189 static int kqueue_queue_empty(struct kqueue
*kq
, kq_index_t qos_index
);
191 static struct kqtailq
*kqueue_get_suppressed_queue(struct kqueue
*kq
, kq_index_t qos_index
);
193 static void kqworkq_request_thread(struct kqworkq
*kqwq
, kq_index_t qos_index
);
194 static void kqworkq_request_help(struct kqworkq
*kqwq
, kq_index_t qos_index
);
195 static void kqworkq_update_override(struct kqworkq
*kqwq
, kq_index_t qos_index
, kq_index_t override_index
);
196 static void kqworkq_bind_thread_impl(struct kqworkq
*kqwq
, kq_index_t qos_index
, thread_t thread
, unsigned int flags
);
197 static void kqworkq_unbind_thread(struct kqworkq
*kqwq
, kq_index_t qos_index
, thread_t thread
, unsigned int flags
);
198 static struct kqrequest
*kqworkq_get_request(struct kqworkq
*kqwq
, kq_index_t qos_index
);
202 KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
= 0x1,
203 KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
= 0x2,
204 KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
= 0x4,
205 KQWL_UO_UPDATE_OVERRIDE_LAZY
= 0x8
208 static void kqworkloop_update_override(struct kqworkloop
*kqwl
, kq_index_t qos_index
, kq_index_t override_index
, uint32_t flags
);
209 static void kqworkloop_bind_thread_impl(struct kqworkloop
*kqwl
, thread_t thread
, unsigned int flags
);
210 static void kqworkloop_unbind_thread(struct kqworkloop
*kqwl
, thread_t thread
, unsigned int flags
);
211 static inline kq_index_t
kqworkloop_combined_qos(struct kqworkloop
*kqwl
, boolean_t
*);
212 static void kqworkloop_update_suppress_sync_count(struct kqrequest
*kqr
, uint32_t flags
);
216 * The wakeup qos is the qos of QUEUED knotes.
218 * This QoS is accounted for with the events override in the
219 * kqr_override_index field. It is raised each time a new knote is queued at
220 * a given QoS. The kqr_wakeup_indexes field is a superset of the non empty
221 * knote buckets and is recomputed after each event delivery.
223 KQWL_UTQ_UPDATE_WAKEUP_QOS
,
224 KQWL_UTQ_UPDATE_STAYACTIVE_QOS
,
225 KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
,
227 * The wakeup override is for suppressed knotes that have fired again at
228 * a higher QoS than the one for which they are suppressed already.
229 * This override is cleared when the knote suppressed list becomes empty.
231 KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
,
232 KQWL_UTQ_RESET_WAKEUP_OVERRIDE
,
234 * The async QoS is the maximum QoS of an event enqueued on this workloop in
235 * userland. It is copied from the only EVFILT_WORKLOOP knote with
236 * a NOTE_WL_THREAD_REQUEST bit set allowed on this workloop. If there is no
237 * such knote, this QoS is 0.
239 KQWL_UTQ_SET_ASYNC_QOS
,
241 * The sync waiters QoS is the maximum QoS of any thread blocked on an
242 * EVFILT_WORKLOOP knote marked with the NOTE_WL_SYNC_WAIT bit.
243 * If there is no such knote, this QoS is 0.
245 KQWL_UTQ_SET_SYNC_WAITERS_QOS
,
246 KQWL_UTQ_REDRIVE_EVENTS
,
248 static void kqworkloop_update_threads_qos(struct kqworkloop
*kqwl
, int op
, kq_index_t qos
);
249 static void kqworkloop_request_help(struct kqworkloop
*kqwl
, kq_index_t qos_index
);
251 static int knote_process(struct knote
*kn
, kevent_callback_t callback
, void *callback_data
,
252 struct filt_process_s
*process_data
, struct proc
*p
);
254 static void knote_put(struct knote
*kn
);
257 static int kq_add_knote(struct kqueue
*kq
, struct knote
*kn
,
258 struct kevent_internal_s
*kev
, struct proc
*p
, int *knoteuse_flags
);
259 static struct knote
*kq_find_knote_and_kq_lock(struct kqueue
*kq
, struct kevent_internal_s
*kev
, bool is_fd
, struct proc
*p
);
260 static void kq_remove_knote(struct kqueue
*kq
, struct knote
*kn
, struct proc
*p
, kn_status_t
*kn_status
, uint16_t *kq_state
);
262 static void knote_drop(struct knote
*kn
, struct proc
*p
);
263 static struct knote
*knote_alloc(void);
264 static void knote_free(struct knote
*kn
);
266 static void knote_activate(struct knote
*kn
);
267 static void knote_deactivate(struct knote
*kn
);
269 static void knote_enable(struct knote
*kn
);
270 static void knote_disable(struct knote
*kn
);
272 static int knote_enqueue(struct knote
*kn
);
273 static void knote_dequeue(struct knote
*kn
);
275 static void knote_suppress(struct knote
*kn
);
276 static void knote_unsuppress(struct knote
*kn
);
277 static void knote_wakeup(struct knote
*kn
);
279 static kq_index_t
knote_get_queue_index(struct knote
*kn
);
280 static struct kqtailq
*knote_get_queue(struct knote
*kn
);
281 static kq_index_t
knote_get_req_index(struct knote
*kn
);
282 static kq_index_t
knote_get_qos_index(struct knote
*kn
);
283 static void knote_set_qos_index(struct knote
*kn
, kq_index_t qos_index
);
284 static kq_index_t
knote_get_qos_override_index(struct knote
*kn
);
285 static kq_index_t
knote_get_sync_qos_override_index(struct knote
*kn
);
286 static void knote_set_qos_override_index(struct knote
*kn
, kq_index_t qos_index
, boolean_t override_is_sync
);
287 static void knote_set_qos_overcommit(struct knote
*kn
);
289 static int filt_fileattach(struct knote
*kn
, struct kevent_internal_s
*kev
);
290 SECURITY_READ_ONLY_EARLY(static struct filterops
) file_filtops
= {
292 .f_attach
= filt_fileattach
,
295 static void filt_kqdetach(struct knote
*kn
);
296 static int filt_kqueue(struct knote
*kn
, long hint
);
297 static int filt_kqtouch(struct knote
*kn
, struct kevent_internal_s
*kev
);
298 static int filt_kqprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
);
299 SECURITY_READ_ONLY_EARLY(static struct filterops
) kqread_filtops
= {
301 .f_detach
= filt_kqdetach
,
302 .f_event
= filt_kqueue
,
303 .f_touch
= filt_kqtouch
,
304 .f_process
= filt_kqprocess
,
307 /* placeholder for not-yet-implemented filters */
308 static int filt_badattach(struct knote
*kn
, struct kevent_internal_s
*kev
);
309 SECURITY_READ_ONLY_EARLY(static struct filterops
) bad_filtops
= {
310 .f_attach
= filt_badattach
,
313 static int filt_procattach(struct knote
*kn
, struct kevent_internal_s
*kev
);
314 static void filt_procdetach(struct knote
*kn
);
315 static int filt_proc(struct knote
*kn
, long hint
);
316 static int filt_proctouch(struct knote
*kn
, struct kevent_internal_s
*kev
);
317 static int filt_procprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
);
318 SECURITY_READ_ONLY_EARLY(static struct filterops
) proc_filtops
= {
319 .f_attach
= filt_procattach
,
320 .f_detach
= filt_procdetach
,
321 .f_event
= filt_proc
,
322 .f_touch
= filt_proctouch
,
323 .f_process
= filt_procprocess
,
326 #if CONFIG_MEMORYSTATUS
327 extern const struct filterops memorystatus_filtops
;
328 #endif /* CONFIG_MEMORYSTATUS */
330 extern const struct filterops fs_filtops
;
332 extern const struct filterops sig_filtops
;
334 static zone_t knote_zone
;
335 static zone_t kqfile_zone
;
336 static zone_t kqworkq_zone
;
337 static zone_t kqworkloop_zone
;
339 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
341 /* Mach portset filter */
342 extern const struct filterops machport_filtops
;
345 static int filt_userattach(struct knote
*kn
, struct kevent_internal_s
*kev
);
346 static void filt_userdetach(struct knote
*kn
);
347 static int filt_user(struct knote
*kn
, long hint
);
348 static int filt_usertouch(struct knote
*kn
, struct kevent_internal_s
*kev
);
349 static int filt_userprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
);
350 SECURITY_READ_ONLY_EARLY(static struct filterops
) user_filtops
= {
351 .f_attach
= filt_userattach
,
352 .f_detach
= filt_userdetach
,
353 .f_event
= filt_user
,
354 .f_touch
= filt_usertouch
,
355 .f_process
= filt_userprocess
,
358 static lck_spin_t _filt_userlock
;
359 static void filt_userlock(void);
360 static void filt_userunlock(void);
362 /* Workloop filter */
363 static bool filt_wlneeds_boost(struct kevent_internal_s
*kev
);
364 static int filt_wlattach(struct knote
*kn
, struct kevent_internal_s
*kev
);
365 static int filt_wlpost_attach(struct knote
*kn
, struct kevent_internal_s
*kev
);
366 static void filt_wldetach(struct knote
*kn
);
367 static int filt_wlevent(struct knote
*kn
, long hint
);
368 static int filt_wltouch(struct knote
*kn
, struct kevent_internal_s
*kev
);
369 static int filt_wldrop_and_unlock(struct knote
*kn
, struct kevent_internal_s
*kev
);
370 static int filt_wlprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
);
371 SECURITY_READ_ONLY_EARLY(static struct filterops
) workloop_filtops
= {
372 .f_needs_boost
= filt_wlneeds_boost
,
373 .f_attach
= filt_wlattach
,
374 .f_post_attach
= filt_wlpost_attach
,
375 .f_detach
= filt_wldetach
,
376 .f_event
= filt_wlevent
,
377 .f_touch
= filt_wltouch
,
378 .f_drop_and_unlock
= filt_wldrop_and_unlock
,
379 .f_process
= filt_wlprocess
,
382 extern const struct filterops pipe_rfiltops
;
383 extern const struct filterops pipe_wfiltops
;
384 extern const struct filterops ptsd_kqops
;
385 extern const struct filterops ptmx_kqops
;
386 extern const struct filterops soread_filtops
;
387 extern const struct filterops sowrite_filtops
;
388 extern const struct filterops sock_filtops
;
389 extern const struct filterops soexcept_filtops
;
390 extern const struct filterops spec_filtops
;
391 extern const struct filterops bpfread_filtops
;
392 extern const struct filterops necp_fd_rfiltops
;
393 extern const struct filterops fsevent_filtops
;
394 extern const struct filterops vnode_filtops
;
395 extern const struct filterops tty_filtops
;
397 const static struct filterops timer_filtops
;
401 * Rules for adding new filters to the system:
403 * - Add a new "EVFILT_" option value to bsd/sys/event.h (typically a negative value)
404 * in the exported section of the header
405 * - Update the EVFILT_SYSCOUNT value to reflect the new addition
406 * - Add a filterops to the sysfilt_ops array. Public filters should be added at the end
407 * of the Public Filters section in the array.
409 * - Add a new "EVFILT_" value to bsd/sys/event.h (typically a positive value)
410 * in the XNU_KERNEL_PRIVATE section of the header
411 * - Update the EVFILTID_MAX value to reflect the new addition
412 * - Add a filterops to the sysfilt_ops. Private filters should be added at the end of
413 * the Private filters section of the array.
415 SECURITY_READ_ONLY_EARLY(static struct filterops
*) sysfilt_ops
[EVFILTID_MAX
] = {
417 [~EVFILT_READ
] = &file_filtops
,
418 [~EVFILT_WRITE
] = &file_filtops
,
419 [~EVFILT_AIO
] = &bad_filtops
,
420 [~EVFILT_VNODE
] = &file_filtops
,
421 [~EVFILT_PROC
] = &proc_filtops
,
422 [~EVFILT_SIGNAL
] = &sig_filtops
,
423 [~EVFILT_TIMER
] = &timer_filtops
,
424 [~EVFILT_MACHPORT
] = &machport_filtops
,
425 [~EVFILT_FS
] = &fs_filtops
,
426 [~EVFILT_USER
] = &user_filtops
,
429 [~EVFILT_SOCK
] = &file_filtops
,
430 #if CONFIG_MEMORYSTATUS
431 [~EVFILT_MEMORYSTATUS
] = &memorystatus_filtops
,
433 [~EVFILT_MEMORYSTATUS
] = &bad_filtops
,
435 [~EVFILT_EXCEPT
] = &file_filtops
,
437 [~EVFILT_WORKLOOP
] = &workloop_filtops
,
439 /* Private filters */
440 [EVFILTID_KQREAD
] = &kqread_filtops
,
441 [EVFILTID_PIPE_R
] = &pipe_rfiltops
,
442 [EVFILTID_PIPE_W
] = &pipe_wfiltops
,
443 [EVFILTID_PTSD
] = &ptsd_kqops
,
444 [EVFILTID_SOREAD
] = &soread_filtops
,
445 [EVFILTID_SOWRITE
] = &sowrite_filtops
,
446 [EVFILTID_SCK
] = &sock_filtops
,
447 [EVFILTID_SOEXCEPT
] = &soexcept_filtops
,
448 [EVFILTID_SPEC
] = &spec_filtops
,
449 [EVFILTID_BPFREAD
] = &bpfread_filtops
,
450 [EVFILTID_NECP_FD
] = &necp_fd_rfiltops
,
451 [EVFILTID_FSEVENT
] = &fsevent_filtops
,
452 [EVFILTID_VN
] = &vnode_filtops
,
453 [EVFILTID_TTY
] = &tty_filtops
,
454 [EVFILTID_PTMX
] = &ptmx_kqops
,
457 /* waitq prepost callback */
458 void waitq_set__CALLING_PREPOST_HOOK__(void *kq_hook
, void *knote_hook
, int qos
);
460 #ifndef _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
461 #define _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG 0x02000000 /* pthread event manager bit */
463 #ifndef _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
464 #define _PTHREAD_PRIORITY_OVERCOMMIT_FLAG 0x80000000 /* request overcommit threads */
466 #ifndef _PTHREAD_PRIORITY_QOS_CLASS_MASK
467 #define _PTHREAD_PRIORITY_QOS_CLASS_MASK 0x003fff00 /* QoS class mask */
469 #ifndef _PTHREAD_PRIORITY_QOS_CLASS_SHIFT_32
470 #define _PTHREAD_PRIORITY_QOS_CLASS_SHIFT_32 8
473 static inline __kdebug_only
475 kqr_thread_id(struct kqrequest
*kqr
)
477 return (uintptr_t)thread_tid(kqr
->kqr_thread
);
481 boolean_t
is_workqueue_thread(thread_t thread
)
483 return (thread_get_tag(thread
) & THREAD_TAG_WORKQUEUE
);
487 void knote_canonicalize_kevent_qos(struct knote
*kn
)
489 struct kqueue
*kq
= knote_get_kq(kn
);
490 unsigned long canonical
;
492 if ((kq
->kq_state
& (KQ_WORKQ
| KQ_WORKLOOP
)) == 0)
495 /* preserve manager and overcommit flags in this case */
496 canonical
= pthread_priority_canonicalize(kn
->kn_qos
, FALSE
);
497 kn
->kn_qos
= (qos_t
)canonical
;
501 kq_index_t
qos_index_from_qos(struct knote
*kn
, qos_t qos
, boolean_t propagation
)
503 struct kqueue
*kq
= knote_get_kq(kn
);
504 kq_index_t qos_index
;
505 unsigned long flags
= 0;
507 if ((kq
->kq_state
& (KQ_WORKQ
| KQ_WORKLOOP
)) == 0)
508 return QOS_INDEX_KQFILE
;
510 qos_index
= (kq_index_t
)thread_qos_from_pthread_priority(
511 (unsigned long)qos
, &flags
);
513 if (kq
->kq_state
& KQ_WORKQ
) {
514 /* workq kqueues support requesting a manager thread (non-propagation) */
515 if (!propagation
&& (flags
& _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
))
516 return KQWQ_QOS_MANAGER
;
523 qos_t
qos_from_qos_index(kq_index_t qos_index
)
525 /* should only happen for KQ_WORKQ */
526 if (qos_index
== KQWQ_QOS_MANAGER
)
527 return _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
;
530 return THREAD_QOS_UNSPECIFIED
;
532 /* Should have support from pthread kext support */
533 return (1 << (qos_index
- 1 +
534 _PTHREAD_PRIORITY_QOS_CLASS_SHIFT_32
));
537 /* kqr lock must be held */
539 unsigned long pthread_priority_for_kqrequest(
540 struct kqrequest
*kqr
,
541 kq_index_t qos_index
)
543 unsigned long priority
= qos_from_qos_index(qos_index
);
544 if (kqr
->kqr_state
& KQR_THOVERCOMMIT
) {
545 priority
|= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
;
551 kq_index_t
qos_index_for_servicer(int qos_class
, thread_t thread
, int flags
)
553 #pragma unused(thread)
554 kq_index_t qos_index
;
556 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
)
557 return KQWQ_QOS_MANAGER
;
559 qos_index
= (kq_index_t
)qos_class
;
560 assert(qos_index
> 0 && qos_index
< KQWQ_QOS_MANAGER
);
566 * kqueue/note lock implementations
568 * The kqueue lock guards the kq state, the state of its queues,
569 * and the kqueue-aware status and use counts of individual knotes.
571 * The kqueue workq lock is used to protect state guarding the
572 * interaction of the kqueue with the workq. This state cannot
573 * be guarded by the kq lock - as it needs to be taken when we
574 * already have the waitq set lock held (during the waitq hook
575 * callback). It might be better to use the waitq lock itself
576 * for this, but the IRQ requirements make that difficult).
578 * Knote flags, filter flags, and associated data are protected
579 * by the underlying object lock - and are only ever looked at
580 * by calling the filter to get a [consistent] snapshot of that
583 lck_grp_attr_t
* kq_lck_grp_attr
;
584 lck_grp_t
* kq_lck_grp
;
585 lck_attr_t
* kq_lck_attr
;
588 kqlock(struct kqueue
*kq
)
590 lck_spin_lock(&kq
->kq_lock
);
594 kqlock_held(__assert_only
struct kqueue
*kq
)
596 LCK_SPIN_ASSERT(&kq
->kq_lock
, LCK_ASSERT_OWNED
);
600 kqunlock(struct kqueue
*kq
)
602 lck_spin_unlock(&kq
->kq_lock
);
606 knhash_lock(proc_t p
)
608 lck_mtx_lock(&p
->p_fd
->fd_knhashlock
);
612 knhash_unlock(proc_t p
)
614 lck_mtx_unlock(&p
->p_fd
->fd_knhashlock
);
619 * Convert a kq lock to a knote use referece.
621 * If the knote is being dropped, or has
622 * vanished, we can't get a use reference.
623 * Just return with it still locked.
625 * - kq locked at entry
626 * - unlock on exit if we get the use reference
629 kqlock2knoteuse(struct kqueue
*kq
, struct knote
*kn
, int flags
)
631 if (kn
->kn_status
& (KN_DROPPING
| KN_VANISHED
))
634 assert(kn
->kn_status
& KN_ATTACHED
);
636 if (flags
& KNUSE_BOOST
) {
637 set_thread_rwlock_boost();
644 * - kq locked at entry
645 * - kq unlocked at exit
649 knoteusewait(struct kqueue
*kq
, struct knote
*kn
)
651 kn
->kn_status
|= KN_USEWAIT
;
652 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
653 CAST_EVENT64_T(&kn
->kn_status
),
654 THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
);
656 return thread_block(THREAD_CONTINUE_NULL
);
660 knoteuse_needs_boost(struct knote
*kn
, struct kevent_internal_s
*kev
)
662 if (knote_fops(kn
)->f_needs_boost
) {
663 return knote_fops(kn
)->f_needs_boost(kev
);
669 * Convert from a knote use reference back to kq lock.
671 * Drop a use reference and wake any waiters if
672 * this is the last one.
674 * If someone is trying to drop the knote, but the
675 * caller has events they must deliver, take
676 * responsibility for the drop later - and wake the
677 * other attempted dropper in a manner that informs
678 * him of the transfer of responsibility.
680 * The exit return indicates if the knote is still alive
681 * (or if not, the other dropper has been given the green
684 * The kqueue lock is re-taken unconditionally.
687 knoteuse2kqlock(struct kqueue
*kq
, struct knote
*kn
, int flags
)
690 int steal_drop
= (flags
& KNUSE_STEAL_DROP
);
693 if (flags
& KNUSE_BOOST
) {
694 clear_thread_rwlock_boost();
697 if (--kn
->kn_inuse
== 0) {
699 if ((kn
->kn_status
& KN_ATTACHING
) != 0) {
700 kn
->kn_status
&= ~KN_ATTACHING
;
703 if ((kn
->kn_status
& KN_USEWAIT
) != 0) {
704 wait_result_t result
;
706 /* If we need to, try and steal the drop */
707 if (kn
->kn_status
& KN_DROPPING
) {
708 if (steal_drop
&& !(kn
->kn_status
& KN_STOLENDROP
)) {
709 kn
->kn_status
|= KN_STOLENDROP
;
715 /* wakeup indicating if ANY USE stole the drop */
716 result
= (kn
->kn_status
& KN_STOLENDROP
) ?
717 THREAD_RESTART
: THREAD_AWAKENED
;
719 kn
->kn_status
&= ~KN_USEWAIT
;
720 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
721 CAST_EVENT64_T(&kn
->kn_status
),
723 WAITQ_ALL_PRIORITIES
);
725 /* should have seen use-wait if dropping with use refs */
726 assert((kn
->kn_status
& (KN_DROPPING
|KN_STOLENDROP
)) == 0);
729 } else if (kn
->kn_status
& KN_DROPPING
) {
730 /* not the last ref but want to steal a drop if present */
731 if (steal_drop
&& ((kn
->kn_status
& KN_STOLENDROP
) == 0)) {
732 kn
->kn_status
|= KN_STOLENDROP
;
734 /* but we now have to wait to be the last ref */
735 knoteusewait(kq
, kn
);
746 * Convert a kq lock to a knote use reference
747 * (for the purpose of detaching AND vanishing it).
749 * If the knote is being dropped, we can't get
750 * a detach reference, so wait for the knote to
751 * finish dropping before returning.
753 * If the knote is being used for other purposes,
754 * we cannot detach it until those uses are done
755 * as well. Again, just wait for them to finish
756 * (caller will start over at lookup).
758 * - kq locked at entry
762 kqlock2knotedetach(struct kqueue
*kq
, struct knote
*kn
, int flags
)
764 if ((kn
->kn_status
& KN_DROPPING
) || kn
->kn_inuse
) {
765 /* have to wait for dropper or current uses to go away */
766 knoteusewait(kq
, kn
);
769 assert((kn
->kn_status
& KN_VANISHED
) == 0);
770 assert(kn
->kn_status
& KN_ATTACHED
);
771 kn
->kn_status
&= ~KN_ATTACHED
;
772 kn
->kn_status
|= KN_VANISHED
;
773 if (flags
& KNUSE_BOOST
) {
774 clear_thread_rwlock_boost();
782 * Convert a kq lock to a knote drop reference.
784 * If the knote is in use, wait for the use count
785 * to subside. We first mark our intention to drop
786 * it - keeping other users from "piling on."
787 * If we are too late, we have to wait for the
788 * other drop to complete.
790 * - kq locked at entry
791 * - always unlocked on exit.
792 * - caller can't hold any locks that would prevent
793 * the other dropper from completing.
796 kqlock2knotedrop(struct kqueue
*kq
, struct knote
*kn
)
799 wait_result_t result
;
801 oktodrop
= ((kn
->kn_status
& (KN_DROPPING
| KN_ATTACHING
)) == 0);
802 /* if another thread is attaching, they will become the dropping thread */
803 kn
->kn_status
|= KN_DROPPING
;
804 knote_unsuppress(kn
);
807 if (kn
->kn_inuse
== 0) {
812 result
= knoteusewait(kq
, kn
);
813 /* THREAD_RESTART == another thread stole the knote drop */
814 return (result
== THREAD_AWAKENED
);
819 * Release a knote use count reference.
822 knote_put(struct knote
*kn
)
824 struct kqueue
*kq
= knote_get_kq(kn
);
827 if (--kn
->kn_inuse
== 0) {
828 if ((kn
->kn_status
& KN_USEWAIT
) != 0) {
829 kn
->kn_status
&= ~KN_USEWAIT
;
830 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
831 CAST_EVENT64_T(&kn
->kn_status
),
833 WAITQ_ALL_PRIORITIES
);
841 filt_fileattach(struct knote
*kn
, struct kevent_internal_s
*kev
)
843 return (fo_kqfilter(kn
->kn_fp
, kn
, kev
, vfs_context_current()));
846 #define f_flag f_fglob->fg_flag
847 #define f_msgcount f_fglob->fg_msgcount
848 #define f_cred f_fglob->fg_cred
849 #define f_ops f_fglob->fg_ops
850 #define f_offset f_fglob->fg_offset
851 #define f_data f_fglob->fg_data
854 filt_kqdetach(struct knote
*kn
)
856 struct kqfile
*kqf
= (struct kqfile
*)kn
->kn_fp
->f_data
;
857 struct kqueue
*kq
= &kqf
->kqf_kqueue
;
860 KNOTE_DETACH(&kqf
->kqf_sel
.si_note
, kn
);
866 filt_kqueue(struct knote
*kn
, __unused
long hint
)
868 struct kqueue
*kq
= (struct kqueue
*)kn
->kn_fp
->f_data
;
871 count
= kq
->kq_count
;
876 filt_kqtouch(struct knote
*kn
, struct kevent_internal_s
*kev
)
879 struct kqueue
*kq
= (struct kqueue
*)kn
->kn_fp
->f_data
;
883 kn
->kn_data
= kq
->kq_count
;
884 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0)
885 kn
->kn_udata
= kev
->udata
;
886 res
= (kn
->kn_data
> 0);
894 filt_kqprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
)
897 struct kqueue
*kq
= (struct kqueue
*)kn
->kn_fp
->f_data
;
901 kn
->kn_data
= kq
->kq_count
;
902 res
= (kn
->kn_data
> 0);
904 *kev
= kn
->kn_kevent
;
905 if (kn
->kn_flags
& EV_CLEAR
)
913 #pragma mark EVFILT_PROC
916 filt_procattach(struct knote
*kn
, __unused
struct kevent_internal_s
*kev
)
920 assert(PID_MAX
< NOTE_PDATAMASK
);
922 if ((kn
->kn_sfflags
& (NOTE_TRACK
| NOTE_TRACKERR
| NOTE_CHILD
)) != 0) {
923 kn
->kn_flags
= EV_ERROR
;
924 kn
->kn_data
= ENOTSUP
;
928 p
= proc_find(kn
->kn_id
);
930 kn
->kn_flags
= EV_ERROR
;
935 const int NoteExitStatusBits
= NOTE_EXIT
| NOTE_EXITSTATUS
;
937 if ((kn
->kn_sfflags
& NoteExitStatusBits
) == NoteExitStatusBits
)
939 pid_t selfpid
= proc_selfpid();
941 if (p
->p_ppid
== selfpid
)
942 break; /* parent => ok */
944 if ((p
->p_lflag
& P_LTRACED
) != 0 &&
945 (p
->p_oppid
== selfpid
))
946 break; /* parent-in-waiting => ok */
949 kn
->kn_flags
= EV_ERROR
;
950 kn
->kn_data
= EACCES
;
956 kn
->kn_ptr
.p_proc
= p
; /* store the proc handle */
958 KNOTE_ATTACH(&p
->p_klist
, kn
);
965 * only captures edge-triggered events after this point
966 * so it can't already be fired.
973 * The knote may be attached to a different process, which may exit,
974 * leaving nothing for the knote to be attached to. In that case,
975 * the pointer to the process will have already been nulled out.
978 filt_procdetach(struct knote
*kn
)
984 p
= kn
->kn_ptr
.p_proc
;
985 if (p
!= PROC_NULL
) {
986 kn
->kn_ptr
.p_proc
= PROC_NULL
;
987 KNOTE_DETACH(&p
->p_klist
, kn
);
994 filt_proc(struct knote
*kn
, long hint
)
998 /* ALWAYS CALLED WITH proc_klist_lock */
1001 * Note: a lot of bits in hint may be obtained from the knote
1002 * To free some of those bits, see <rdar://problem/12592988> Freeing up
1003 * bits in hint for filt_proc
1005 * mask off extra data
1007 event
= (u_int
)hint
& NOTE_PCTRLMASK
;
1010 * termination lifecycle events can happen while a debugger
1011 * has reparented a process, in which case notifications
1012 * should be quashed except to the tracing parent. When
1013 * the debugger reaps the child (either via wait4(2) or
1014 * process exit), the child will be reparented to the original
1015 * parent and these knotes re-fired.
1017 if (event
& NOTE_EXIT
) {
1018 if ((kn
->kn_ptr
.p_proc
->p_oppid
!= 0)
1019 && (knote_get_kq(kn
)->kq_p
->p_pid
!= kn
->kn_ptr
.p_proc
->p_ppid
)) {
1021 * This knote is not for the current ptrace(2) parent, ignore.
1028 * if the user is interested in this event, record it.
1030 if (kn
->kn_sfflags
& event
)
1031 kn
->kn_fflags
|= event
;
1033 #pragma clang diagnostic push
1034 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
1035 if ((event
== NOTE_REAP
) || ((event
== NOTE_EXIT
) && !(kn
->kn_sfflags
& NOTE_REAP
))) {
1036 kn
->kn_flags
|= (EV_EOF
| EV_ONESHOT
);
1038 #pragma clang diagnostic pop
1042 * The kernel has a wrapper in place that returns the same data
1043 * as is collected here, in kn_data. Any changes to how
1044 * NOTE_EXITSTATUS and NOTE_EXIT_DETAIL are collected
1045 * should also be reflected in the proc_pidnoteexit() wrapper.
1047 if (event
== NOTE_EXIT
) {
1049 if ((kn
->kn_sfflags
& NOTE_EXITSTATUS
) != 0) {
1050 kn
->kn_fflags
|= NOTE_EXITSTATUS
;
1051 kn
->kn_data
|= (hint
& NOTE_PDATAMASK
);
1053 if ((kn
->kn_sfflags
& NOTE_EXIT_DETAIL
) != 0) {
1054 kn
->kn_fflags
|= NOTE_EXIT_DETAIL
;
1055 if ((kn
->kn_ptr
.p_proc
->p_lflag
&
1056 P_LTERM_DECRYPTFAIL
) != 0) {
1057 kn
->kn_data
|= NOTE_EXIT_DECRYPTFAIL
;
1059 if ((kn
->kn_ptr
.p_proc
->p_lflag
&
1060 P_LTERM_JETSAM
) != 0) {
1061 kn
->kn_data
|= NOTE_EXIT_MEMORY
;
1062 switch (kn
->kn_ptr
.p_proc
->p_lflag
& P_JETSAM_MASK
) {
1063 case P_JETSAM_VMPAGESHORTAGE
:
1064 kn
->kn_data
|= NOTE_EXIT_MEMORY_VMPAGESHORTAGE
;
1066 case P_JETSAM_VMTHRASHING
:
1067 kn
->kn_data
|= NOTE_EXIT_MEMORY_VMTHRASHING
;
1069 case P_JETSAM_FCTHRASHING
:
1070 kn
->kn_data
|= NOTE_EXIT_MEMORY_FCTHRASHING
;
1072 case P_JETSAM_VNODE
:
1073 kn
->kn_data
|= NOTE_EXIT_MEMORY_VNODE
;
1075 case P_JETSAM_HIWAT
:
1076 kn
->kn_data
|= NOTE_EXIT_MEMORY_HIWAT
;
1079 kn
->kn_data
|= NOTE_EXIT_MEMORY_PID
;
1081 case P_JETSAM_IDLEEXIT
:
1082 kn
->kn_data
|= NOTE_EXIT_MEMORY_IDLE
;
1086 if ((kn
->kn_ptr
.p_proc
->p_csflags
&
1088 kn
->kn_data
|= NOTE_EXIT_CSERROR
;
1093 /* if we have any matching state, activate the knote */
1094 return (kn
->kn_fflags
!= 0);
1098 filt_proctouch(struct knote
*kn
, struct kevent_internal_s
*kev
)
1104 /* accept new filter flags and mask off output events no long interesting */
1105 kn
->kn_sfflags
= kev
->fflags
;
1106 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0)
1107 kn
->kn_udata
= kev
->udata
;
1109 /* restrict the current results to the (smaller?) set of new interest */
1111 * For compatibility with previous implementations, we leave kn_fflags
1112 * as they were before.
1114 //kn->kn_fflags &= kn->kn_sfflags;
1116 res
= (kn
->kn_fflags
!= 0);
1118 proc_klist_unlock();
1124 filt_procprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
)
1126 #pragma unused(data)
1130 res
= (kn
->kn_fflags
!= 0);
1132 *kev
= kn
->kn_kevent
;
1133 kn
->kn_flags
|= EV_CLEAR
; /* automatically set */
1137 proc_klist_unlock();
1142 #pragma mark EVFILT_TIMER
1146 * Values stored in the knote at rest (using Mach absolute time units)
1148 * kn->kn_hook where the thread_call object is stored
1149 * kn->kn_ext[0] next deadline or 0 if immediate expiration
1150 * kn->kn_ext[1] leeway value
1151 * kn->kn_sdata interval timer: the interval
1152 * absolute/deadline timer: 0
1153 * kn->kn_data fire count
1156 static lck_mtx_t _filt_timerlock
;
1158 static void filt_timerlock(void) { lck_mtx_lock(&_filt_timerlock
); }
1159 static void filt_timerunlock(void) { lck_mtx_unlock(&_filt_timerlock
); }
1161 static inline void filt_timer_assert_locked(void)
1163 LCK_MTX_ASSERT(&_filt_timerlock
, LCK_MTX_ASSERT_OWNED
);
1166 /* state flags stored in kn_hookid */
1167 #define TIMER_RUNNING 0x1
1168 #define TIMER_CANCELWAIT 0x2
1171 * filt_timervalidate - process data from user
1173 * Sets up the deadline, interval, and leeway from the provided user data
1176 * kn_sdata timer deadline or interval time
1177 * kn_sfflags style of timer, unit of measurement
1180 * kn_sdata either interval in abstime or 0 if non-repeating timer
1181 * ext[0] fire deadline in abs/cont time
1182 * (or 0 if NOTE_ABSOLUTE and deadline is in past)
1185 * EINVAL Invalid user data parameters
1187 * Called with timer filter lock held.
1190 filt_timervalidate(struct knote
*kn
)
1193 * There are 4 knobs that need to be chosen for a timer registration:
1195 * A) Units of time (what is the time duration of the specified number)
1196 * Absolute and interval take:
1197 * NOTE_SECONDS, NOTE_USECONDS, NOTE_NSECONDS, NOTE_MACHTIME
1198 * Defaults to milliseconds if not specified
1200 * B) Clock epoch (what is the zero point of the specified number)
1201 * For interval, there is none
1202 * For absolute, defaults to the gettimeofday/calendar epoch
1203 * With NOTE_MACHTIME, uses mach_absolute_time()
1204 * With NOTE_MACHTIME and NOTE_MACH_CONTINUOUS_TIME, uses mach_continuous_time()
1206 * C) The knote's behavior on delivery
1207 * Interval timer causes the knote to arm for the next interval unless one-shot is set
1208 * Absolute is a forced one-shot timer which deletes on delivery
1209 * TODO: Add a way for absolute to be not forced one-shot
1211 * D) Whether the time duration is relative to now or absolute
1212 * Interval fires at now + duration when it is set up
1213 * Absolute fires at now + difference between now walltime and passed in walltime
1214 * With NOTE_MACHTIME it fires at an absolute MAT or MCT.
1216 * E) Whether the timer continues to tick across sleep
1217 * By default all three do not.
1218 * For interval and absolute, NOTE_MACH_CONTINUOUS_TIME causes them to tick across sleep
1219 * With NOTE_ABSOLUTE | NOTE_MACHTIME | NOTE_MACH_CONTINUOUS_TIME:
1220 * expires when mach_continuous_time() is > the passed in value.
1223 filt_timer_assert_locked();
1225 uint64_t multiplier
;
1227 boolean_t use_abstime
= FALSE
;
1229 switch (kn
->kn_sfflags
& (NOTE_SECONDS
|NOTE_USECONDS
|NOTE_NSECONDS
|NOTE_MACHTIME
)) {
1231 multiplier
= NSEC_PER_SEC
;
1234 multiplier
= NSEC_PER_USEC
;
1243 case 0: /* milliseconds (default) */
1244 multiplier
= NSEC_PER_SEC
/ 1000;
1250 /* transform the leeway in kn_ext[1] to same time scale */
1251 if (kn
->kn_sfflags
& NOTE_LEEWAY
) {
1252 uint64_t leeway_abs
;
1255 leeway_abs
= (uint64_t)kn
->kn_ext
[1];
1258 if (os_mul_overflow((uint64_t)kn
->kn_ext
[1], multiplier
, &leeway_ns
))
1261 nanoseconds_to_absolutetime(leeway_ns
, &leeway_abs
);
1264 kn
->kn_ext
[1] = leeway_abs
;
1267 if (kn
->kn_sfflags
& NOTE_ABSOLUTE
) {
1268 uint64_t deadline_abs
;
1271 deadline_abs
= (uint64_t)kn
->kn_sdata
;
1273 uint64_t calendar_deadline_ns
;
1275 if (os_mul_overflow((uint64_t)kn
->kn_sdata
, multiplier
, &calendar_deadline_ns
))
1278 /* calendar_deadline_ns is in nanoseconds since the epoch */
1280 clock_sec_t seconds
;
1281 clock_nsec_t nanoseconds
;
1284 * Note that the conversion through wall-time is only done once.
1286 * If the relationship between MAT and gettimeofday changes,
1287 * the underlying timer does not update.
1289 * TODO: build a wall-time denominated timer_call queue
1290 * and a flag to request DTRTing with wall-time timers
1292 clock_get_calendar_nanotime(&seconds
, &nanoseconds
);
1294 uint64_t calendar_now_ns
= (uint64_t)seconds
* NSEC_PER_SEC
+ nanoseconds
;
1296 /* if deadline is in the future */
1297 if (calendar_now_ns
< calendar_deadline_ns
) {
1298 uint64_t interval_ns
= calendar_deadline_ns
- calendar_now_ns
;
1299 uint64_t interval_abs
;
1301 nanoseconds_to_absolutetime(interval_ns
, &interval_abs
);
1304 * Note that the NOTE_MACH_CONTINUOUS_TIME flag here only
1305 * causes the timer to keep ticking across sleep, but
1306 * it does not change the calendar timebase.
1309 if (kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
)
1310 clock_continuoustime_interval_to_deadline(interval_abs
,
1313 clock_absolutetime_interval_to_deadline(interval_abs
,
1316 deadline_abs
= 0; /* cause immediate expiration */
1320 kn
->kn_ext
[0] = deadline_abs
;
1321 kn
->kn_sdata
= 0; /* NOTE_ABSOLUTE is non-repeating */
1322 } else if (kn
->kn_sdata
< 0) {
1324 * Negative interval timers fire immediately, once.
1326 * Ideally a negative interval would be an error, but certain clients
1327 * pass negative values on accident, and expect an event back.
1329 * In the old implementation the timer would repeat with no delay
1330 * N times until mach_absolute_time() + (N * interval) underflowed,
1331 * then it would wait ~forever by accidentally arming a timer for the far future.
1333 * We now skip the power-wasting hot spin phase and go straight to the idle phase.
1336 kn
->kn_sdata
= 0; /* non-repeating */
1337 kn
->kn_ext
[0] = 0; /* expire immediately */
1339 uint64_t interval_abs
= 0;
1342 interval_abs
= (uint64_t)kn
->kn_sdata
;
1344 uint64_t interval_ns
;
1345 if (os_mul_overflow((uint64_t)kn
->kn_sdata
, multiplier
, &interval_ns
))
1348 nanoseconds_to_absolutetime(interval_ns
, &interval_abs
);
1351 uint64_t deadline
= 0;
1353 if (kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
)
1354 clock_continuoustime_interval_to_deadline(interval_abs
, &deadline
);
1356 clock_absolutetime_interval_to_deadline(interval_abs
, &deadline
);
1358 kn
->kn_sdata
= interval_abs
; /* default to a repeating timer */
1359 kn
->kn_ext
[0] = deadline
;
1369 * filt_timerexpire - the timer callout routine
1371 * Just propagate the timer event into the knote
1372 * filter routine (by going through the knote
1373 * synchronization point). Pass a hint to
1374 * indicate this is a real event, not just a
1378 filt_timerexpire(void *knx
, __unused
void *spare
)
1380 struct klist timer_list
;
1381 struct knote
*kn
= knx
;
1385 kn
->kn_hookid
&= ~TIMER_RUNNING
;
1387 /* no "object" for timers, so fake a list */
1388 SLIST_INIT(&timer_list
);
1389 SLIST_INSERT_HEAD(&timer_list
, kn
, kn_selnext
);
1391 KNOTE(&timer_list
, 1);
1393 /* if someone is waiting for timer to pop */
1394 if (kn
->kn_hookid
& TIMER_CANCELWAIT
) {
1395 struct kqueue
*kq
= knote_get_kq(kn
);
1396 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
1397 CAST_EVENT64_T(&kn
->kn_hook
),
1399 WAITQ_ALL_PRIORITIES
);
1401 kn
->kn_hookid
&= ~TIMER_CANCELWAIT
;
1408 * Cancel a running timer (or wait for the pop).
1409 * Timer filter lock is held.
1410 * May drop and retake the timer filter lock.
1413 filt_timercancel(struct knote
*kn
)
1415 filt_timer_assert_locked();
1417 assert((kn
->kn_hookid
& TIMER_CANCELWAIT
) == 0);
1419 /* if no timer, then we're good */
1420 if ((kn
->kn_hookid
& TIMER_RUNNING
) == 0)
1423 thread_call_t callout
= (thread_call_t
)kn
->kn_hook
;
1425 /* cancel the callout if we can */
1426 if (thread_call_cancel(callout
)) {
1427 kn
->kn_hookid
&= ~TIMER_RUNNING
;
1431 /* cancel failed, we have to wait for the in-flight expire routine */
1433 kn
->kn_hookid
|= TIMER_CANCELWAIT
;
1435 struct kqueue
*kq
= knote_get_kq(kn
);
1437 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
1438 CAST_EVENT64_T(&kn
->kn_hook
),
1439 THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
);
1442 thread_block(THREAD_CONTINUE_NULL
);
1445 assert((kn
->kn_hookid
& TIMER_CANCELWAIT
) == 0);
1446 assert((kn
->kn_hookid
& TIMER_RUNNING
) == 0);
1450 filt_timerarm(struct knote
*kn
)
1452 filt_timer_assert_locked();
1454 assert((kn
->kn_hookid
& TIMER_RUNNING
) == 0);
1456 thread_call_t callout
= (thread_call_t
)kn
->kn_hook
;
1458 uint64_t deadline
= kn
->kn_ext
[0];
1459 uint64_t leeway
= kn
->kn_ext
[1];
1461 int filter_flags
= kn
->kn_sfflags
;
1462 unsigned int timer_flags
= 0;
1464 if (filter_flags
& NOTE_CRITICAL
)
1465 timer_flags
|= THREAD_CALL_DELAY_USER_CRITICAL
;
1466 else if (filter_flags
& NOTE_BACKGROUND
)
1467 timer_flags
|= THREAD_CALL_DELAY_USER_BACKGROUND
;
1469 timer_flags
|= THREAD_CALL_DELAY_USER_NORMAL
;
1471 if (filter_flags
& NOTE_LEEWAY
)
1472 timer_flags
|= THREAD_CALL_DELAY_LEEWAY
;
1474 if (filter_flags
& NOTE_MACH_CONTINUOUS_TIME
)
1475 timer_flags
|= THREAD_CALL_CONTINUOUS
;
1477 thread_call_enter_delayed_with_leeway(callout
, NULL
,
1481 kn
->kn_hookid
|= TIMER_RUNNING
;
1485 * Does this knote need a timer armed for it, or should it be ready immediately?
1488 filt_timer_is_ready(struct knote
*kn
)
1492 if (kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
)
1493 now
= mach_continuous_time();
1495 now
= mach_absolute_time();
1497 uint64_t deadline
= kn
->kn_ext
[0];
1506 * Allocate a thread call for the knote's lifetime, and kick off the timer.
1509 filt_timerattach(struct knote
*kn
, __unused
struct kevent_internal_s
*kev
)
1511 thread_call_t callout
;
1514 callout
= thread_call_allocate_with_options(filt_timerexpire
,
1515 (thread_call_param_t
)kn
, THREAD_CALL_PRIORITY_HIGH
,
1516 THREAD_CALL_OPTIONS_ONCE
);
1518 if (NULL
== callout
) {
1519 kn
->kn_flags
= EV_ERROR
;
1520 kn
->kn_data
= ENOMEM
;
1526 if ((error
= filt_timervalidate(kn
)) != 0) {
1527 kn
->kn_flags
= EV_ERROR
;
1528 kn
->kn_data
= error
;
1531 __assert_only boolean_t freed
= thread_call_free(callout
);
1536 kn
->kn_hook
= (void*)callout
;
1538 kn
->kn_flags
|= EV_CLEAR
;
1540 /* NOTE_ABSOLUTE implies EV_ONESHOT */
1541 if (kn
->kn_sfflags
& NOTE_ABSOLUTE
)
1542 kn
->kn_flags
|= EV_ONESHOT
;
1544 boolean_t timer_ready
= FALSE
;
1546 if ((timer_ready
= filt_timer_is_ready(kn
))) {
1547 /* cause immediate expiration */
1559 * Shut down the timer if it's running, and free the callout.
1562 filt_timerdetach(struct knote
*kn
)
1564 thread_call_t callout
;
1568 callout
= (thread_call_t
)kn
->kn_hook
;
1569 filt_timercancel(kn
);
1573 __assert_only boolean_t freed
= thread_call_free(callout
);
1578 * filt_timerevent - post events to a timer knote
1580 * Called in the context of filt_timerexpire with
1581 * the filt_timerlock held
1584 filt_timerevent(struct knote
*kn
, __unused
long hint
)
1586 filt_timer_assert_locked();
1593 * filt_timertouch - update timer knote with new user input
1595 * Cancel and restart the timer based on new user data. When
1596 * the user picks up a knote, clear the count of how many timer
1597 * pops have gone off (in kn_data).
1602 struct kevent_internal_s
*kev
)
1609 * cancel current call - drops and retakes lock
1610 * TODO: not safe against concurrent touches?
1612 filt_timercancel(kn
);
1614 /* clear if the timer had previously fired, the user no longer wants to see it */
1617 /* capture the new values used to compute deadline */
1618 kn
->kn_sdata
= kev
->data
;
1619 kn
->kn_sfflags
= kev
->fflags
;
1620 kn
->kn_ext
[0] = kev
->ext
[0];
1621 kn
->kn_ext
[1] = kev
->ext
[1];
1623 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0)
1624 kn
->kn_udata
= kev
->udata
;
1626 /* recalculate deadline */
1627 error
= filt_timervalidate(kn
);
1629 /* no way to report error, so mark it in the knote */
1630 kn
->kn_flags
|= EV_ERROR
;
1631 kn
->kn_data
= error
;
1636 boolean_t timer_ready
= FALSE
;
1638 if ((timer_ready
= filt_timer_is_ready(kn
))) {
1639 /* cause immediate expiration */
1651 * filt_timerprocess - query state of knote and snapshot event data
1653 * Determine if the timer has fired in the past, snapshot the state
1654 * of the kevent for returning to user-space, and clear pending event
1655 * counters for the next time.
1660 __unused
struct filt_process_s
*data
,
1661 struct kevent_internal_s
*kev
)
1665 if (kn
->kn_data
== 0 || (kn
->kn_hookid
& TIMER_CANCELWAIT
)) {
1668 * The timer hasn't yet fired, so there's nothing to deliver
1670 * touch is in the middle of canceling the timer,
1671 * so don't deliver or re-arm anything
1673 * This can happen if a touch resets a timer that had fired
1674 * without being processed
1680 if (kn
->kn_sdata
!= 0 && ((kn
->kn_flags
& EV_ERROR
) == 0)) {
1682 * This is a 'repeating' timer, so we have to emit
1683 * how many intervals expired between the arm
1686 * A very strange style of interface, because
1687 * this could easily be done in the client...
1690 /* The timer better have had expired... */
1691 assert((kn
->kn_hookid
& TIMER_RUNNING
) == 0);
1695 if (kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
)
1696 now
= mach_continuous_time();
1698 now
= mach_absolute_time();
1700 uint64_t first_deadline
= kn
->kn_ext
[0];
1701 uint64_t interval_abs
= kn
->kn_sdata
;
1702 uint64_t orig_arm_time
= first_deadline
- interval_abs
;
1704 assert(now
> orig_arm_time
);
1705 assert(now
> first_deadline
);
1707 uint64_t elapsed
= now
- orig_arm_time
;
1709 uint64_t num_fired
= elapsed
/ interval_abs
;
1712 * To reach this code, we must have seen the timer pop
1713 * and be in repeating mode, so therefore it must have been
1714 * more than 'interval' time since the attach or last
1717 * An unsuccessful touch would:
1722 * all of which will prevent this code from running.
1724 assert(num_fired
> 0);
1726 /* report how many intervals have elapsed to the user */
1727 kn
->kn_data
= (int64_t) num_fired
;
1729 /* We only need to re-arm the timer if it's not about to be destroyed */
1730 if ((kn
->kn_flags
& EV_ONESHOT
) == 0) {
1731 /* fire at the end of the next interval */
1732 uint64_t new_deadline
= first_deadline
+ num_fired
* interval_abs
;
1734 assert(new_deadline
> now
);
1736 kn
->kn_ext
[0] = new_deadline
;
1743 * Copy out the interesting kevent state,
1744 * but don't leak out the raw time calculations.
1746 * TODO: potential enhancements - tell the user about:
1747 * - deadline to which this timer thought it was expiring
1748 * - return kn_sfflags in the fflags field so the client can know
1749 * under what flags the timer fired
1751 *kev
= kn
->kn_kevent
;
1753 /* kev->ext[1] = 0; JMM - shouldn't we hide this too? */
1755 /* we have delivered the event, reset the timer pop count */
1762 SECURITY_READ_ONLY_EARLY(static struct filterops
) timer_filtops
= {
1763 .f_attach
= filt_timerattach
,
1764 .f_detach
= filt_timerdetach
,
1765 .f_event
= filt_timerevent
,
1766 .f_touch
= filt_timertouch
,
1767 .f_process
= filt_timerprocess
,
1771 #pragma mark EVFILT_USER
1777 lck_spin_lock(&_filt_userlock
);
1781 filt_userunlock(void)
1783 lck_spin_unlock(&_filt_userlock
);
1787 filt_userattach(struct knote
*kn
, __unused
struct kevent_internal_s
*kev
)
1789 /* EVFILT_USER knotes are not attached to anything in the kernel */
1790 /* Cant discover this knote until after attach - so no lock needed */
1792 if (kn
->kn_sfflags
& NOTE_TRIGGER
) {
1797 return (kn
->kn_hookid
);
1801 filt_userdetach(__unused
struct knote
*kn
)
1803 /* EVFILT_USER knotes are not attached to anything in the kernel */
1808 __unused
struct knote
*kn
,
1818 struct kevent_internal_s
*kev
)
1826 ffctrl
= kev
->fflags
& NOTE_FFCTRLMASK
;
1827 fflags
= kev
->fflags
& NOTE_FFLAGSMASK
;
1832 kn
->kn_sfflags
&= fflags
;
1835 kn
->kn_sfflags
|= fflags
;
1838 kn
->kn_sfflags
= fflags
;
1841 kn
->kn_sdata
= kev
->data
;
1843 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0)
1844 kn
->kn_udata
= kev
->udata
;
1846 if (kev
->fflags
& NOTE_TRIGGER
) {
1849 active
= kn
->kn_hookid
;
1859 __unused
struct filt_process_s
*data
,
1860 struct kevent_internal_s
*kev
)
1864 if (kn
->kn_hookid
== 0) {
1869 *kev
= kn
->kn_kevent
;
1870 kev
->fflags
= (volatile UInt32
)kn
->kn_sfflags
;
1871 kev
->data
= kn
->kn_sdata
;
1872 if (kn
->kn_flags
& EV_CLEAR
) {
1882 #pragma mark EVFILT_WORKLOOP
1884 #if DEBUG || DEVELOPMENT
1886 * see src/queue_internal.h in libdispatch
1888 #define DISPATCH_QUEUE_ENQUEUED 0x1ull
1892 filt_wllock(struct kqworkloop
*kqwl
)
1894 lck_mtx_lock(&kqwl
->kqwl_statelock
);
1898 filt_wlunlock(struct kqworkloop
*kqwl
)
1900 lck_mtx_unlock(&kqwl
->kqwl_statelock
);
1904 filt_wlheld(__assert_only
struct kqworkloop
*kqwl
)
1906 LCK_MTX_ASSERT(&kqwl
->kqwl_statelock
, LCK_MTX_ASSERT_OWNED
);
1909 #define WL_OWNER_SUSPENDED ((thread_t)(~0ull)) /* special owner when suspended */
1912 filt_wlowner_is_valid(thread_t owner
)
1914 return owner
!= THREAD_NULL
&& owner
!= WL_OWNER_SUSPENDED
;
1918 filt_wlshould_end_ownership(struct kqworkloop
*kqwl
,
1919 struct kevent_internal_s
*kev
, int error
)
1921 thread_t owner
= kqwl
->kqwl_owner
;
1922 return (error
== 0 || error
== ESTALE
) &&
1923 (kev
->fflags
& NOTE_WL_END_OWNERSHIP
) &&
1924 (owner
== current_thread() || owner
== WL_OWNER_SUSPENDED
);
1928 filt_wlshould_update_ownership(struct kevent_internal_s
*kev
, int error
)
1930 return error
== 0 && (kev
->fflags
& NOTE_WL_DISCOVER_OWNER
) &&
1931 kev
->ext
[EV_EXTIDX_WL_ADDR
];
1935 filt_wlshould_set_async_qos(struct kevent_internal_s
*kev
, int error
,
1936 kq_index_t async_qos
)
1941 if (async_qos
!= THREAD_QOS_UNSPECIFIED
) {
1944 if ((kev
->fflags
& NOTE_WL_THREAD_REQUEST
) && (kev
->flags
& EV_DELETE
)) {
1945 /* see filt_wlprocess() */
1953 filt_wlupdateowner(struct kqworkloop
*kqwl
, struct kevent_internal_s
*kev
,
1954 int error
, kq_index_t async_qos
)
1956 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
1957 thread_t cur_owner
, new_owner
, extra_thread_ref
= THREAD_NULL
;
1958 kq_index_t cur_override
= THREAD_QOS_UNSPECIFIED
;
1959 kq_index_t old_owner_override
= THREAD_QOS_UNSPECIFIED
;
1960 boolean_t ipc_override_is_sync
= false;
1961 boolean_t old_owner_override_is_sync
= false;
1962 int action
= KQWL_UTQ_NONE
;
1967 * The owner is only changed under both the filt_wllock and the
1968 * kqwl_req_lock. Looking at it with either one held is fine.
1970 cur_owner
= kqwl
->kqwl_owner
;
1971 if (filt_wlshould_end_ownership(kqwl
, kev
, error
)) {
1972 new_owner
= THREAD_NULL
;
1973 } else if (filt_wlshould_update_ownership(kev
, error
)) {
1975 * Decipher the owner port name, and translate accordingly.
1976 * The low 2 bits were borrowed for other flags, so mask them off.
1978 uint64_t udata
= kev
->ext
[EV_EXTIDX_WL_VALUE
];
1979 mach_port_name_t new_owner_name
= (mach_port_name_t
)udata
& ~0x3;
1980 if (new_owner_name
!= MACH_PORT_NULL
) {
1981 new_owner_name
= ipc_entry_name_mask(new_owner_name
);
1984 if (MACH_PORT_VALID(new_owner_name
)) {
1985 new_owner
= port_name_to_thread(new_owner_name
);
1986 if (new_owner
== THREAD_NULL
)
1988 extra_thread_ref
= new_owner
;
1989 } else if (new_owner_name
== MACH_PORT_DEAD
) {
1990 new_owner
= WL_OWNER_SUSPENDED
;
1993 * We never want to learn a new owner that is NULL.
1994 * Ownership should be ended with END_OWNERSHIP.
1996 new_owner
= cur_owner
;
1999 new_owner
= cur_owner
;
2002 if (filt_wlshould_set_async_qos(kev
, error
, async_qos
)) {
2003 action
= KQWL_UTQ_SET_ASYNC_QOS
;
2005 if (cur_owner
== new_owner
&& action
== KQWL_UTQ_NONE
) {
2009 kqwl_req_lock(kqwl
);
2011 /* If already tracked as servicer, don't track as owner */
2012 if ((kqr
->kqr_state
& KQR_BOUND
) && new_owner
== kqr
->kqr_thread
) {
2013 kqwl
->kqwl_owner
= new_owner
= THREAD_NULL
;
2016 if (cur_owner
!= new_owner
) {
2017 kqwl
->kqwl_owner
= new_owner
;
2018 if (new_owner
== extra_thread_ref
) {
2019 /* we just transfered this ref to kqwl_owner */
2020 extra_thread_ref
= THREAD_NULL
;
2022 cur_override
= kqworkloop_combined_qos(kqwl
, &ipc_override_is_sync
);
2023 old_owner_override
= kqr
->kqr_dsync_owner_qos
;
2024 old_owner_override_is_sync
= kqr
->kqr_owner_override_is_sync
;
2026 if (filt_wlowner_is_valid(new_owner
)) {
2027 /* override it before we drop the old */
2028 if (cur_override
!= THREAD_QOS_UNSPECIFIED
) {
2029 thread_add_ipc_override(new_owner
, cur_override
);
2031 if (ipc_override_is_sync
) {
2032 thread_add_sync_ipc_override(new_owner
);
2034 /* Update the kqr to indicate that owner has sync ipc override */
2035 kqr
->kqr_dsync_owner_qos
= cur_override
;
2036 kqr
->kqr_owner_override_is_sync
= ipc_override_is_sync
;
2037 thread_starts_owning_workloop(new_owner
);
2038 if ((kqr
->kqr_state
& (KQR_THREQUESTED
| KQR_BOUND
)) == KQR_THREQUESTED
) {
2039 if (action
== KQWL_UTQ_NONE
) {
2040 action
= KQWL_UTQ_REDRIVE_EVENTS
;
2043 } else if (new_owner
== THREAD_NULL
) {
2044 kqr
->kqr_dsync_owner_qos
= THREAD_QOS_UNSPECIFIED
;
2045 kqr
->kqr_owner_override_is_sync
= false;
2046 if ((kqr
->kqr_state
& (KQR_THREQUESTED
| KQR_WAKEUP
)) == KQR_WAKEUP
) {
2047 if (action
== KQWL_UTQ_NONE
) {
2048 action
= KQWL_UTQ_REDRIVE_EVENTS
;
2054 if (action
!= KQWL_UTQ_NONE
) {
2055 kqworkloop_update_threads_qos(kqwl
, action
, async_qos
);
2058 kqwl_req_unlock(kqwl
);
2060 /* Now that we are unlocked, drop the override and ref on old owner */
2061 if (new_owner
!= cur_owner
&& filt_wlowner_is_valid(cur_owner
)) {
2062 if (old_owner_override
!= THREAD_QOS_UNSPECIFIED
) {
2063 thread_drop_ipc_override(cur_owner
);
2065 if (old_owner_override_is_sync
) {
2066 thread_drop_sync_ipc_override(cur_owner
);
2068 thread_ends_owning_workloop(cur_owner
);
2069 thread_deallocate(cur_owner
);
2073 if (extra_thread_ref
) {
2074 thread_deallocate(extra_thread_ref
);
2081 struct kqworkloop
*kqwl
,
2082 struct kevent_internal_s
*kev
,
2085 user_addr_t addr
= CAST_USER_ADDR_T(kev
->ext
[EV_EXTIDX_WL_ADDR
]);
2089 /* we must have the workloop state mutex held */
2092 /* Do we have a debounce address to work with? */
2094 uint64_t kdata
= kev
->ext
[EV_EXTIDX_WL_VALUE
];
2095 uint64_t mask
= kev
->ext
[EV_EXTIDX_WL_MASK
];
2097 error
= copyin_word(addr
, &udata
, sizeof(udata
));
2102 /* update state as copied in */
2103 kev
->ext
[EV_EXTIDX_WL_VALUE
] = udata
;
2105 /* If the masked bits don't match, reject it as stale */
2106 if ((udata
& mask
) != (kdata
& mask
)) {
2110 #if DEBUG || DEVELOPMENT
2111 if ((kev
->fflags
& NOTE_WL_THREAD_REQUEST
) && !(kev
->flags
& EV_DELETE
)) {
2112 if ((udata
& DISPATCH_QUEUE_ENQUEUED
) == 0 &&
2113 (udata
>> 48) != 0 && (udata
>> 48) != 0xffff) {
2114 panic("kevent: workloop %#016llx is not enqueued "
2115 "(kev:%p dq_state:%#016llx)", kev
->udata
, kev
, udata
);
2121 return default_result
;
2125 * Remembers the last updated that came in from userspace for debugging reasons.
2126 * - fflags is mirrored from the userspace kevent
2127 * - ext[i, i != VALUE] is mirrored from the userspace kevent
2128 * - ext[VALUE] is set to what the kernel loaded atomically
2129 * - data is set to the error if any
2132 filt_wlremember_last_update(
2133 __assert_only
struct kqworkloop
*kqwl
,
2135 struct kevent_internal_s
*kev
,
2139 kn
->kn_fflags
= kev
->fflags
;
2140 kn
->kn_data
= error
;
2141 memcpy(kn
->kn_ext
, kev
->ext
, sizeof(kev
->ext
));
2145 * Return which operations on EVFILT_WORKLOOP need to be protected against
2146 * knoteusewait() causing priority inversions.
2149 filt_wlneeds_boost(struct kevent_internal_s
*kev
)
2153 * this is an f_process() usecount, and it can cause a drop to wait
2157 if (kev
->fflags
& NOTE_WL_THREAD_REQUEST
) {
2159 * All operations on thread requests may starve drops or re-attach of
2160 * the same knote, all of them need boosts. None of what we do under
2161 * thread-request usecount holds blocks anyway.
2165 if (kev
->fflags
& NOTE_WL_SYNC_WAIT
) {
2167 * this may call filt_wlwait() and we don't want to hold any boost when
2168 * woken up, this would cause background threads contending on
2169 * dispatch_sync() to wake up at 64 and be preempted immediately when
2176 * SYNC_WAIT knotes when deleted don't need to be rushed, there's no
2177 * detach/reattach race with these ever. In addition to this, when the
2178 * SYNC_WAIT knote is dropped, the caller is no longer receiving the
2179 * workloop overrides if any, and we'd rather schedule other threads than
2180 * him, he's not possibly stalling anything anymore.
2182 return (kev
->flags
& EV_DELETE
) == 0;
2186 filt_wlattach(struct knote
*kn
, struct kevent_internal_s
*kev
)
2188 struct kqueue
*kq
= knote_get_kq(kn
);
2189 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2191 kq_index_t qos_index
= 0;
2193 if ((kq
->kq_state
& KQ_WORKLOOP
) == 0) {
2198 #if DEVELOPMENT || DEBUG
2199 if (kev
->ident
== 0 && kev
->udata
== 0 && kev
->fflags
== 0) {
2200 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
2202 kqwl_req_lock(kqwl
);
2204 if (kqr
->kqr_dsync_waiters
) {
2205 kev
->fflags
|= NOTE_WL_SYNC_WAIT
;
2207 if (kqr
->kqr_qos_index
) {
2208 kev
->fflags
|= NOTE_WL_THREAD_REQUEST
;
2210 if (kqwl
->kqwl_owner
== WL_OWNER_SUSPENDED
) {
2211 kev
->ext
[0] = ~0ull;
2213 kev
->ext
[0] = thread_tid(kqwl
->kqwl_owner
);
2215 kev
->ext
[1] = thread_tid(kqwl
->kqwl_request
.kqr_thread
);
2216 kev
->ext
[2] = thread_owned_workloops_count(current_thread());
2217 kev
->ext
[3] = kn
->kn_kevent
.ext
[3];
2218 kqwl_req_unlock(kqwl
);
2224 /* Some simple validation */
2225 int command
= (kn
->kn_sfflags
& NOTE_WL_COMMANDS_MASK
);
2227 case NOTE_WL_THREAD_REQUEST
:
2228 if (kn
->kn_id
!= kqwl
->kqwl_dynamicid
) {
2232 qos_index
= qos_index_from_qos(kn
, kn
->kn_qos
, FALSE
);
2233 if (qos_index
< THREAD_QOS_MAINTENANCE
||
2234 qos_index
> THREAD_QOS_USER_INTERACTIVE
) {
2239 case NOTE_WL_SYNC_WAIT
:
2240 case NOTE_WL_SYNC_WAKE
:
2241 if (kq
->kq_state
& KQ_NO_WQ_THREAD
) {
2245 if (kn
->kn_id
== kqwl
->kqwl_dynamicid
) {
2249 if ((kn
->kn_flags
& EV_DISABLE
) == 0) {
2253 if (kn
->kn_sfflags
& NOTE_WL_END_OWNERSHIP
) {
2266 if (command
== NOTE_WL_THREAD_REQUEST
&& kqwl
->kqwl_request
.kqr_qos_index
) {
2268 * There already is a thread request, and well, you're only allowed
2269 * one per workloop, so fail the attach.
2271 * Note: kqr_qos_index is always set with the wllock held, so we
2272 * don't need to take the kqr lock.
2276 /* Make sure user and kernel are in agreement on important state */
2277 error
= filt_wldebounce(kqwl
, kev
, 0);
2280 error
= filt_wlupdateowner(kqwl
, kev
, error
, qos_index
);
2281 filt_wlunlock(kqwl
);
2284 kn
->kn_flags
|= EV_ERROR
;
2285 /* If userland wants ESTALE to be hidden, fail the attach anyway */
2286 if (error
== ESTALE
&& (kn
->kn_sfflags
& NOTE_WL_IGNORE_ESTALE
)) {
2289 kn
->kn_data
= error
;
2293 /* Just attaching the thread request successfully will fire it */
2294 return command
== NOTE_WL_THREAD_REQUEST
;
2297 __attribute__((noinline
,not_tail_called
))
2299 filt_wlwait(struct kqworkloop
*kqwl
,
2301 struct kevent_internal_s
*kev
)
2304 assert((kn
->kn_sfflags
& NOTE_WL_SYNC_WAKE
) == 0);
2307 * Hint to the wakeup side that this thread is waiting. Also used by
2308 * stackshot for waitinfo.
2310 kn
->kn_hook
= current_thread();
2312 thread_set_pending_block_hint(current_thread(), kThreadWaitWorkloopSyncWait
);
2314 wait_result_t wr
= assert_wait(kn
, THREAD_ABORTSAFE
);
2316 if (wr
== THREAD_WAITING
) {
2317 kq_index_t qos_index
= qos_index_from_qos(kn
, kev
->qos
, TRUE
);
2318 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
2320 thread_t thread_to_handoff
= THREAD_NULL
; /* holds +1 thread ref */
2322 thread_t kqwl_owner
= kqwl
->kqwl_owner
;
2323 if (filt_wlowner_is_valid(kqwl_owner
)) {
2324 thread_reference(kqwl_owner
);
2325 thread_to_handoff
= kqwl_owner
;
2328 kqwl_req_lock(kqwl
);
2331 assert(kqr
->kqr_dsync_waiters
< UINT16_MAX
);
2332 kqr
->kqr_dsync_waiters
++;
2333 if (qos_index
> kqr
->kqr_dsync_waiters_qos
) {
2334 kqworkloop_update_threads_qos(kqwl
,
2335 KQWL_UTQ_SET_SYNC_WAITERS_QOS
, qos_index
);
2339 if ((kqr
->kqr_state
& KQR_BOUND
) && thread_to_handoff
== THREAD_NULL
) {
2340 assert(kqr
->kqr_thread
!= THREAD_NULL
);
2341 thread_t servicer
= kqr
->kqr_thread
;
2343 thread_reference(servicer
);
2344 thread_to_handoff
= servicer
;
2347 kqwl_req_unlock(kqwl
);
2349 filt_wlunlock(kqwl
);
2351 /* TODO: use continuation based blocking <rdar://problem/31299584> */
2353 /* consume a refcount on thread_to_handoff, then thread_block() */
2354 wr
= thread_handoff(thread_to_handoff
);
2355 thread_to_handoff
= THREAD_NULL
;
2359 /* clear waiting state (only one waiting thread - so no race) */
2360 assert(kn
->kn_hook
== current_thread());
2363 kqwl_req_lock(kqwl
);
2364 assert(kqr
->kqr_dsync_waiters
> 0);
2365 if (--kqr
->kqr_dsync_waiters
== 0) {
2366 assert(kqr
->kqr_dsync_waiters_qos
);
2367 kqworkloop_update_threads_qos(kqwl
,
2368 KQWL_UTQ_SET_SYNC_WAITERS_QOS
, 0);
2370 kqwl_req_unlock(kqwl
);
2377 case THREAD_AWAKENED
:
2379 case THREAD_INTERRUPTED
:
2381 case THREAD_RESTART
:
2384 panic("filt_wlattach: unexpected wait result %d", wr
);
2389 /* called in stackshot context to report the thread responsible for blocking this thread */
2391 kdp_workloop_sync_wait_find_owner(__assert_only thread_t thread
,
2393 thread_waitinfo_t
*waitinfo
)
2395 struct knote
*kn
= (struct knote
*) event
;
2396 assert(kdp_is_in_zone(kn
, "knote zone"));
2398 assert(kn
->kn_hook
== thread
);
2400 struct kqueue
*kq
= knote_get_kq(kn
);
2401 assert(kdp_is_in_zone(kq
, "kqueue workloop zone"));
2402 assert(kq
->kq_state
& KQ_WORKLOOP
);
2404 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2405 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
2407 thread_t kqwl_owner
= kqwl
->kqwl_owner
;
2408 thread_t servicer
= kqr
->kqr_thread
;
2410 if (kqwl_owner
== WL_OWNER_SUSPENDED
) {
2411 waitinfo
->owner
= STACKSHOT_WAITOWNER_SUSPENDED
;
2412 } else if (kqwl_owner
!= THREAD_NULL
) {
2413 assert(kdp_is_in_zone(kqwl_owner
, "threads"));
2415 waitinfo
->owner
= thread_tid(kqwl
->kqwl_owner
);
2416 } else if (servicer
!= THREAD_NULL
) {
2417 assert(kdp_is_in_zone(servicer
, "threads"));
2419 waitinfo
->owner
= thread_tid(servicer
);
2420 } else if (kqr
->kqr_state
& KQR_THREQUESTED
) {
2421 waitinfo
->owner
= STACKSHOT_WAITOWNER_THREQUESTED
;
2423 waitinfo
->owner
= 0;
2426 waitinfo
->context
= kqwl
->kqwl_dynamicid
;
2432 * Takes kqueue locked, returns locked, may drop in the middle and/or block for a while
2435 filt_wlpost_attach(struct knote
*kn
, struct kevent_internal_s
*kev
)
2437 struct kqueue
*kq
= knote_get_kq(kn
);
2438 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2441 if (kev
->fflags
& NOTE_WL_SYNC_WAIT
) {
2442 if (kqlock2knoteuse(kq
, kn
, KNUSE_NONE
)) {
2444 /* if the wake has already preposted, don't wait */
2445 if ((kn
->kn_sfflags
& NOTE_WL_SYNC_WAKE
) == 0)
2446 error
= filt_wlwait(kqwl
, kn
, kev
);
2447 filt_wlunlock(kqwl
);
2448 knoteuse2kqlock(kq
, kn
, KNUSE_NONE
);
2455 filt_wldetach(__assert_only
struct knote
*kn
)
2457 assert(knote_get_kq(kn
)->kq_state
& KQ_WORKLOOP
);
2460 * Thread requests have nothing to detach.
2461 * Sync waiters should have been aborted out
2462 * and drop their refs before we could drop/
2463 * detach their knotes.
2465 assert(kn
->kn_hook
== NULL
);
2470 __unused
struct knote
*kn
,
2473 panic("filt_wlevent");
2478 filt_wlvalidate_kev_flags(struct knote
*kn
, struct kevent_internal_s
*kev
)
2480 int new_commands
= kev
->fflags
& NOTE_WL_COMMANDS_MASK
;
2481 int sav_commands
= kn
->kn_sfflags
& NOTE_WL_COMMANDS_MASK
;
2484 switch (new_commands
) {
2485 case NOTE_WL_THREAD_REQUEST
:
2486 /* thread requests can only update themselves */
2487 if (sav_commands
!= new_commands
)
2491 case NOTE_WL_SYNC_WAIT
:
2492 if (kev
->fflags
& NOTE_WL_END_OWNERSHIP
)
2495 case NOTE_WL_SYNC_WAKE
:
2496 /* waits and wakes can update themselves or their counterparts */
2497 if (!(sav_commands
& (NOTE_WL_SYNC_WAIT
| NOTE_WL_SYNC_WAKE
)))
2499 if (kev
->fflags
& NOTE_WL_UPDATE_QOS
)
2501 if ((kev
->flags
& (EV_ENABLE
| EV_DELETE
)) == EV_ENABLE
)
2503 if (kev
->flags
& EV_DELETE
) {
2505 * Really this is not supported: there is absolutely no reason
2506 * whatsoever to want to fail the drop of a NOTE_WL_SYNC_WAIT knote.
2508 if (kev
->ext
[EV_EXTIDX_WL_ADDR
] && kev
->ext
[EV_EXTIDX_WL_MASK
]) {
2517 if ((kev
->flags
& EV_DELETE
) && (kev
->fflags
& NOTE_WL_DISCOVER_OWNER
)) {
2526 struct kevent_internal_s
*kev
)
2528 struct kqueue
*kq
= knote_get_kq(kn
);
2530 struct kqworkloop
*kqwl
;
2532 assert(kq
->kq_state
& KQ_WORKLOOP
);
2533 kqwl
= (struct kqworkloop
*)kq
;
2535 error
= filt_wlvalidate_kev_flags(kn
, kev
);
2542 /* Make sure user and kernel are in agreement on important state */
2543 error
= filt_wldebounce(kqwl
, kev
, 0);
2545 error
= filt_wlupdateowner(kqwl
, kev
, error
, 0);
2549 int new_command
= kev
->fflags
& NOTE_WL_COMMANDS_MASK
;
2550 switch (new_command
) {
2551 case NOTE_WL_THREAD_REQUEST
:
2552 assert(kqwl
->kqwl_request
.kqr_qos_index
!= THREAD_QOS_UNSPECIFIED
);
2555 case NOTE_WL_SYNC_WAIT
:
2557 * we need to allow waiting several times on the same knote because
2558 * of EINTR. If it's already woken though, it won't block.
2562 case NOTE_WL_SYNC_WAKE
:
2563 if (kn
->kn_sfflags
& NOTE_WL_SYNC_WAKE
) {
2564 /* disallow waking the same knote twice */
2569 thread_wakeup_thread((event_t
)kn
, (thread_t
)kn
->kn_hook
);
2579 * Save off any additional fflags/data we just accepted
2580 * But only keep the last round of "update" bits we acted on which helps
2583 kn
->kn_sfflags
&= ~NOTE_WL_UPDATES_MASK
;
2584 kn
->kn_sfflags
|= kev
->fflags
;
2585 kn
->kn_sdata
= kev
->data
;
2587 kq_index_t qos_index
= THREAD_QOS_UNSPECIFIED
;
2589 if (kev
->fflags
& NOTE_WL_UPDATE_QOS
) {
2590 qos_t qos
= pthread_priority_canonicalize(kev
->qos
, FALSE
);
2592 if (kn
->kn_qos
!= qos
) {
2593 qos_index
= qos_index_from_qos(kn
, qos
, FALSE
);
2594 if (qos_index
== THREAD_QOS_UNSPECIFIED
) {
2599 if (kn
->kn_status
& KN_QUEUED
) {
2601 knote_set_qos_index(kn
, qos_index
);
2605 knote_set_qos_index(kn
, qos_index
);
2612 error
= filt_wlupdateowner(kqwl
, kev
, 0, qos_index
);
2617 if (new_command
== NOTE_WL_SYNC_WAIT
) {
2618 /* if the wake has already preposted, don't wait */
2619 if ((kn
->kn_sfflags
& NOTE_WL_SYNC_WAKE
) == 0)
2620 error
= filt_wlwait(kqwl
, kn
, kev
);
2624 filt_wlremember_last_update(kqwl
, kn
, kev
, error
);
2625 filt_wlunlock(kqwl
);
2628 if (error
== ESTALE
&& (kev
->fflags
& NOTE_WL_IGNORE_ESTALE
)) {
2629 /* If userland wants ESTALE to be hidden, do not activate */
2632 kev
->flags
|= EV_ERROR
;
2636 /* Just touching the thread request successfully will fire it */
2637 return new_command
== NOTE_WL_THREAD_REQUEST
;
2641 filt_wldrop_and_unlock(
2643 struct kevent_internal_s
*kev
)
2645 struct kqueue
*kq
= knote_get_kq(kn
);
2646 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2647 int error
= 0, knoteuse_flags
= KNUSE_NONE
;
2651 assert(kev
->flags
& EV_DELETE
);
2652 assert(kq
->kq_state
& KQ_WORKLOOP
);
2654 error
= filt_wlvalidate_kev_flags(kn
, kev
);
2659 if (kn
->kn_sfflags
& NOTE_WL_THREAD_REQUEST
) {
2660 knoteuse_flags
|= KNUSE_BOOST
;
2663 /* take a usecount to allow taking the filt_wllock */
2664 if (!kqlock2knoteuse(kq
, kn
, knoteuse_flags
)) {
2665 /* knote is being dropped already */
2666 error
= EINPROGRESS
;
2673 * Make sure user and kernel are in agreement on important state
2675 * Userland will modify bits to cause this to fail for the touch / drop
2676 * race case (when a drop for a thread request quiescing comes in late after
2677 * the workloop has been woken up again).
2679 error
= filt_wldebounce(kqwl
, kev
, 0);
2681 if (!knoteuse2kqlock(kq
, kn
, knoteuse_flags
)) {
2682 /* knote is no longer alive */
2683 error
= EINPROGRESS
;
2687 if (!error
&& (kn
->kn_sfflags
& NOTE_WL_THREAD_REQUEST
) && kn
->kn_inuse
) {
2689 * There is a concurrent drop or touch happening, we can't resolve this,
2690 * userland has to redrive.
2692 * The race we're worried about here is the following:
2694 * f_touch | f_drop_and_unlock
2695 * ------------------------+--------------------------------------------
2697 * | kqlock2knoteuse()
2699 * | debounces successfully
2702 * filt_wllock() <BLOCKS> |
2703 * | knoteuse2kqlock()
2705 * | kqlock2knotedrop() <BLOCKS, WAKES f_touch>
2706 * debounces successfully |
2708 * caller WAKES f_drop |
2709 * | performs drop, but f_touch should have won
2711 * So if the usecount is not 0 here, we need to wait for it to drop and
2712 * redrive the whole logic (including looking up the knote again).
2714 filt_wlunlock(kqwl
);
2715 knoteusewait(kq
, kn
);
2720 * If error is 0 this will set kqr_qos_index to THREAD_QOS_UNSPECIFIED
2722 * If error is 0 or ESTALE this may drop ownership and cause a thread
2723 * request redrive, however the kqlock is held which prevents f_process() to
2724 * run until we did the drop for real.
2726 error
= filt_wlupdateowner(kqwl
, kev
, error
, 0);
2731 if ((kn
->kn_sfflags
& (NOTE_WL_SYNC_WAIT
| NOTE_WL_SYNC_WAKE
)) ==
2732 NOTE_WL_SYNC_WAIT
) {
2734 * When deleting a SYNC_WAIT knote that hasn't been woken up
2735 * explicitly, issue a wake up.
2737 kn
->kn_sfflags
|= NOTE_WL_SYNC_WAKE
;
2739 thread_wakeup_thread((event_t
)kn
, (thread_t
)kn
->kn_hook
);
2744 filt_wlremember_last_update(kqwl
, kn
, kev
, error
);
2745 filt_wlunlock(kqwl
);
2749 /* If nothing failed, do the regular knote drop. */
2750 if (kqlock2knotedrop(kq
, kn
)) {
2751 knote_drop(kn
, current_proc());
2753 error
= EINPROGRESS
;
2758 if (error
== ESTALE
&& (kev
->fflags
& NOTE_WL_IGNORE_ESTALE
)) {
2761 if (error
== EINPROGRESS
) {
2763 * filt_wlprocess() makes sure that no event can be delivered for
2764 * NOTE_WL_THREAD_REQUEST knotes once a drop is happening, and
2765 * NOTE_WL_SYNC_* knotes are never fired.
2767 * It means that EINPROGRESS is about a state that userland cannot
2768 * observe for this filter (an event being delivered concurrently from
2769 * a drop), so silence the error.
2779 __unused
struct filt_process_s
*data
,
2780 struct kevent_internal_s
*kev
)
2782 struct kqueue
*kq
= knote_get_kq(kn
);
2783 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
2784 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
2787 assert(kq
->kq_state
& KQ_WORKLOOP
);
2789 /* only thread requests should get here */
2790 assert(kn
->kn_sfflags
& NOTE_WL_THREAD_REQUEST
);
2791 if (kn
->kn_sfflags
& NOTE_WL_THREAD_REQUEST
) {
2793 assert(kqr
->kqr_qos_index
!= THREAD_QOS_UNSPECIFIED
);
2794 if (kqwl
->kqwl_owner
) {
2796 * <rdar://problem/33584321> userspace sometimes due to events being
2797 * delivered but not triggering a drain session can cause a process
2798 * of the thread request knote.
2800 * When that happens, the automatic deactivation due to process
2801 * would swallow the event, so we have to activate the knote again.
2806 } else if (kqr
->kqr_qos_index
) {
2807 #if DEBUG || DEVELOPMENT
2808 user_addr_t addr
= CAST_USER_ADDR_T(kn
->kn_ext
[EV_EXTIDX_WL_ADDR
]);
2809 task_t t
= current_task();
2811 if (addr
&& task_is_active(t
) && !task_is_halting(t
) &&
2812 copyin_word(addr
, &val
, sizeof(val
)) == 0 &&
2813 val
&& (val
& DISPATCH_QUEUE_ENQUEUED
) == 0 &&
2814 (val
>> 48) != 0 && (val
>> 48) != 0xffff) {
2815 panic("kevent: workloop %#016llx is not enqueued "
2816 "(kn:%p dq_state:%#016llx kev.dq_state:%#016llx)",
2817 kn
->kn_udata
, kn
, val
,
2818 kn
->kn_ext
[EV_EXTIDX_WL_VALUE
]);
2821 *kev
= kn
->kn_kevent
;
2822 kev
->fflags
= kn
->kn_sfflags
;
2823 kev
->data
= kn
->kn_sdata
;
2824 kev
->qos
= kn
->kn_qos
;
2827 filt_wlunlock(kqwl
);
2832 #pragma mark kevent / knotes
2835 * JMM - placeholder for not-yet-implemented filters
2838 filt_badattach(__unused
struct knote
*kn
, __unused
struct kevent_internal_s
*kev
)
2840 kn
->kn_flags
|= EV_ERROR
;
2841 kn
->kn_data
= ENOTSUP
;
2846 kqueue_alloc(struct proc
*p
, unsigned int flags
)
2848 struct filedesc
*fdp
= p
->p_fd
;
2849 struct kqueue
*kq
= NULL
;
2852 uint64_t kq_addr_offset
;
2854 if (flags
& KEVENT_FLAG_WORKQ
) {
2855 struct kqworkq
*kqwq
;
2858 kqwq
= (struct kqworkq
*)zalloc(kqworkq_zone
);
2862 kq
= &kqwq
->kqwq_kqueue
;
2863 bzero(kqwq
, sizeof (struct kqworkq
));
2865 kqwq
->kqwq_state
= KQ_WORKQ
;
2867 for (i
= 0; i
< KQWQ_NBUCKETS
; i
++) {
2868 TAILQ_INIT(&kq
->kq_queue
[i
]);
2870 for (i
= 0; i
< KQWQ_NQOS
; i
++) {
2871 kqwq
->kqwq_request
[i
].kqr_qos_index
= i
;
2874 lck_spin_init(&kqwq
->kqwq_reqlock
, kq_lck_grp
, kq_lck_attr
);
2875 policy
= SYNC_POLICY_FIFO
;
2876 hook
= (void *)kqwq
;
2878 } else if (flags
& KEVENT_FLAG_WORKLOOP
) {
2879 struct kqworkloop
*kqwl
;
2882 kqwl
= (struct kqworkloop
*)zalloc(kqworkloop_zone
);
2886 bzero(kqwl
, sizeof (struct kqworkloop
));
2888 kqwl
->kqwl_state
= KQ_WORKLOOP
| KQ_DYNAMIC
;
2889 kqwl
->kqwl_retains
= 1; /* donate a retain to creator */
2891 kq
= &kqwl
->kqwl_kqueue
;
2892 for (i
= 0; i
< KQWL_NBUCKETS
; i
++) {
2893 TAILQ_INIT(&kq
->kq_queue
[i
]);
2895 TAILQ_INIT(&kqwl
->kqwl_request
.kqr_suppressed
);
2897 lck_spin_init(&kqwl
->kqwl_reqlock
, kq_lck_grp
, kq_lck_attr
);
2898 lck_mtx_init(&kqwl
->kqwl_statelock
, kq_lck_grp
, kq_lck_attr
);
2900 policy
= SYNC_POLICY_FIFO
;
2901 if (flags
& KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD
) {
2902 policy
|= SYNC_POLICY_PREPOST
;
2903 kq
->kq_state
|= KQ_NO_WQ_THREAD
;
2905 hook
= (void *)kqwl
;
2911 kqf
= (struct kqfile
*)zalloc(kqfile_zone
);
2915 kq
= &kqf
->kqf_kqueue
;
2916 bzero(kqf
, sizeof (struct kqfile
));
2917 TAILQ_INIT(&kq
->kq_queue
[0]);
2918 TAILQ_INIT(&kqf
->kqf_suppressed
);
2920 policy
= SYNC_POLICY_FIFO
| SYNC_POLICY_PREPOST
;
2923 waitq_set_init(&kq
->kq_wqs
, policy
, NULL
, hook
);
2924 lck_spin_init(&kq
->kq_lock
, kq_lck_grp
, kq_lck_attr
);
2927 if (fdp
->fd_knlistsize
< 0) {
2929 if (fdp
->fd_knlistsize
< 0)
2930 fdp
->fd_knlistsize
= 0; /* this process has had a kq */
2934 kq_addr_offset
= ((uintptr_t)kq
- (uintptr_t)VM_MIN_KERNEL_AND_KEXT_ADDRESS
);
2935 /* Assert that the address can be pointer compacted for use with knote */
2936 assert(kq_addr_offset
< (uint64_t)(1ull << KNOTE_KQ_BITSIZE
));
2941 * knotes_dealloc - detach all knotes for the process and drop them
2943 * Called with proc_fdlock held.
2944 * Returns with it locked.
2945 * May drop it temporarily.
2946 * Process is in such a state that it will not try to allocate
2947 * any more knotes during this process (stopped for exit or exec).
2950 knotes_dealloc(proc_t p
)
2952 struct filedesc
*fdp
= p
->p_fd
;
2955 struct klist
*kn_hash
= NULL
;
2958 /* Close all the fd-indexed knotes up front */
2959 if (fdp
->fd_knlistsize
> 0) {
2960 for (i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
2961 while ((kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
])) != NULL
) {
2962 kq
= knote_get_kq(kn
);
2965 /* drop it ourselves or wait */
2966 if (kqlock2knotedrop(kq
, kn
)) {
2972 /* free the table */
2973 FREE(fdp
->fd_knlist
, M_KQUEUE
);
2974 fdp
->fd_knlist
= NULL
;
2976 fdp
->fd_knlistsize
= -1;
2981 /* Clean out all the hashed knotes as well */
2982 if (fdp
->fd_knhashmask
!= 0) {
2983 for (i
= 0; i
<= (int)fdp
->fd_knhashmask
; i
++) {
2984 while ((kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
])) != NULL
) {
2985 kq
= knote_get_kq(kn
);
2988 /* drop it ourselves or wait */
2989 if (kqlock2knotedrop(kq
, kn
)) {
2995 kn_hash
= fdp
->fd_knhash
;
2996 fdp
->fd_knhashmask
= 0;
2997 fdp
->fd_knhash
= NULL
;
3002 /* free the kn_hash table */
3004 FREE(kn_hash
, M_KQUEUE
);
3011 * kqueue_dealloc - detach all knotes from a kqueue and free it
3013 * We walk each list looking for knotes referencing this
3014 * this kqueue. If we find one, we try to drop it. But
3015 * if we fail to get a drop reference, that will wait
3016 * until it is dropped. So, we can just restart again
3017 * safe in the assumption that the list will eventually
3018 * not contain any more references to this kqueue (either
3019 * we dropped them all, or someone else did).
3021 * Assumes no new events are being added to the kqueue.
3022 * Nothing locked on entry or exit.
3024 * Workloop kqueues cant get here unless all the knotes
3025 * are already gone and all requested threads have come
3026 * and gone (cancelled or arrived).
3029 kqueue_dealloc(struct kqueue
*kq
)
3032 struct filedesc
*fdp
;
3042 if ((kq
->kq_state
& KQ_WORKLOOP
) == 0) {
3044 for (i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
3045 kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
]);
3046 while (kn
!= NULL
) {
3047 if (kq
== knote_get_kq(kn
)) {
3050 /* drop it ourselves or wait */
3051 if (kqlock2knotedrop(kq
, kn
)) {
3055 /* start over at beginning of list */
3056 kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
]);
3059 kn
= SLIST_NEXT(kn
, kn_link
);
3065 if (fdp
->fd_knhashmask
!= 0) {
3066 for (i
= 0; i
< (int)fdp
->fd_knhashmask
+ 1; i
++) {
3067 kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
]);
3068 while (kn
!= NULL
) {
3069 if (kq
== knote_get_kq(kn
)) {
3072 /* drop it ourselves or wait */
3073 if (kqlock2knotedrop(kq
, kn
)) {
3077 /* start over at beginning of list */
3078 kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
]);
3081 kn
= SLIST_NEXT(kn
, kn_link
);
3088 if (kq
->kq_state
& KQ_WORKLOOP
) {
3089 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3090 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
3091 thread_t cur_owner
= kqwl
->kqwl_owner
;
3093 assert(TAILQ_EMPTY(&kqwl
->kqwl_request
.kqr_suppressed
));
3094 if (filt_wlowner_is_valid(cur_owner
)) {
3096 * If the kqueue had an owner that prevented the thread request to
3097 * go through, then no unbind happened, and we may have lingering
3098 * overrides to drop.
3100 if (kqr
->kqr_dsync_owner_qos
!= THREAD_QOS_UNSPECIFIED
) {
3101 thread_drop_ipc_override(cur_owner
);
3102 kqr
->kqr_dsync_owner_qos
= THREAD_QOS_UNSPECIFIED
;
3105 if (kqr
->kqr_owner_override_is_sync
) {
3106 thread_drop_sync_ipc_override(cur_owner
);
3107 kqr
->kqr_owner_override_is_sync
= 0;
3109 thread_ends_owning_workloop(cur_owner
);
3110 thread_deallocate(cur_owner
);
3111 kqwl
->kqwl_owner
= THREAD_NULL
;
3116 * waitq_set_deinit() remove the KQ's waitq set from
3117 * any select sets to which it may belong.
3119 waitq_set_deinit(&kq
->kq_wqs
);
3120 lck_spin_destroy(&kq
->kq_lock
, kq_lck_grp
);
3122 if (kq
->kq_state
& KQ_WORKQ
) {
3123 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
3125 lck_spin_destroy(&kqwq
->kqwq_reqlock
, kq_lck_grp
);
3126 zfree(kqworkq_zone
, kqwq
);
3127 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
3128 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3130 assert(kqwl
->kqwl_retains
== 0);
3131 lck_spin_destroy(&kqwl
->kqwl_reqlock
, kq_lck_grp
);
3132 lck_mtx_destroy(&kqwl
->kqwl_statelock
, kq_lck_grp
);
3133 zfree(kqworkloop_zone
, kqwl
);
3135 struct kqfile
*kqf
= (struct kqfile
*)kq
;
3137 zfree(kqfile_zone
, kqf
);
3142 kqueue_retain(struct kqueue
*kq
)
3144 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3147 if ((kq
->kq_state
& KQ_DYNAMIC
) == 0)
3150 previous
= OSIncrementAtomic(&kqwl
->kqwl_retains
);
3151 if (previous
== KQ_WORKLOOP_RETAINS_MAX
)
3152 panic("kq(%p) retain overflow", kq
);
3155 panic("kq(%p) resurrection", kq
);
3158 #define KQUEUE_CANT_BE_LAST_REF 0
3159 #define KQUEUE_MIGHT_BE_LAST_REF 1
3162 kqueue_release(struct kqueue
*kq
, __assert_only
int possibly_last
)
3164 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3166 if ((kq
->kq_state
& KQ_DYNAMIC
) == 0) {
3170 assert(kq
->kq_state
& KQ_WORKLOOP
); /* for now */
3171 uint32_t refs
= OSDecrementAtomic(&kqwl
->kqwl_retains
);
3172 if (__improbable(refs
== 0)) {
3173 panic("kq(%p) over-release", kq
);
3176 assert(possibly_last
);
3182 kqueue_body(struct proc
*p
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
)
3185 struct fileproc
*fp
;
3188 error
= falloc_withalloc(p
,
3189 &fp
, &fd
, vfs_context_current(), fp_zalloc
, cra
);
3194 kq
= kqueue_alloc(p
, 0);
3200 fp
->f_flag
= FREAD
| FWRITE
;
3201 fp
->f_ops
= &kqueueops
;
3205 *fdflags(p
, fd
) |= UF_EXCLOSE
;
3206 procfdtbl_releasefd(p
, fd
, NULL
);
3207 fp_drop(p
, fd
, fp
, 1);
3215 kqueue(struct proc
*p
, __unused
struct kqueue_args
*uap
, int32_t *retval
)
3217 return (kqueue_body(p
, fileproc_alloc_init
, NULL
, retval
));
3221 kevent_copyin(user_addr_t
*addrp
, struct kevent_internal_s
*kevp
, struct proc
*p
,
3227 if (flags
& KEVENT_FLAG_LEGACY32
) {
3228 bzero(kevp
, sizeof (*kevp
));
3230 if (IS_64BIT_PROCESS(p
)) {
3231 struct user64_kevent kev64
;
3233 advance
= sizeof (kev64
);
3234 error
= copyin(*addrp
, (caddr_t
)&kev64
, advance
);
3237 kevp
->ident
= kev64
.ident
;
3238 kevp
->filter
= kev64
.filter
;
3239 kevp
->flags
= kev64
.flags
;
3240 kevp
->udata
= kev64
.udata
;
3241 kevp
->fflags
= kev64
.fflags
;
3242 kevp
->data
= kev64
.data
;
3244 struct user32_kevent kev32
;
3246 advance
= sizeof (kev32
);
3247 error
= copyin(*addrp
, (caddr_t
)&kev32
, advance
);
3250 kevp
->ident
= (uintptr_t)kev32
.ident
;
3251 kevp
->filter
= kev32
.filter
;
3252 kevp
->flags
= kev32
.flags
;
3253 kevp
->udata
= CAST_USER_ADDR_T(kev32
.udata
);
3254 kevp
->fflags
= kev32
.fflags
;
3255 kevp
->data
= (intptr_t)kev32
.data
;
3257 } else if (flags
& KEVENT_FLAG_LEGACY64
) {
3258 struct kevent64_s kev64
;
3260 bzero(kevp
, sizeof (*kevp
));
3262 advance
= sizeof (struct kevent64_s
);
3263 error
= copyin(*addrp
, (caddr_t
)&kev64
, advance
);
3266 kevp
->ident
= kev64
.ident
;
3267 kevp
->filter
= kev64
.filter
;
3268 kevp
->flags
= kev64
.flags
;
3269 kevp
->udata
= kev64
.udata
;
3270 kevp
->fflags
= kev64
.fflags
;
3271 kevp
->data
= kev64
.data
;
3272 kevp
->ext
[0] = kev64
.ext
[0];
3273 kevp
->ext
[1] = kev64
.ext
[1];
3276 struct kevent_qos_s kevqos
;
3278 bzero(kevp
, sizeof (*kevp
));
3280 advance
= sizeof (struct kevent_qos_s
);
3281 error
= copyin(*addrp
, (caddr_t
)&kevqos
, advance
);
3284 kevp
->ident
= kevqos
.ident
;
3285 kevp
->filter
= kevqos
.filter
;
3286 kevp
->flags
= kevqos
.flags
;
3287 kevp
->qos
= kevqos
.qos
;
3288 // kevp->xflags = kevqos.xflags;
3289 kevp
->udata
= kevqos
.udata
;
3290 kevp
->fflags
= kevqos
.fflags
;
3291 kevp
->data
= kevqos
.data
;
3292 kevp
->ext
[0] = kevqos
.ext
[0];
3293 kevp
->ext
[1] = kevqos
.ext
[1];
3294 kevp
->ext
[2] = kevqos
.ext
[2];
3295 kevp
->ext
[3] = kevqos
.ext
[3];
3303 kevent_copyout(struct kevent_internal_s
*kevp
, user_addr_t
*addrp
, struct proc
*p
,
3306 user_addr_t addr
= *addrp
;
3311 * fully initialize the differnt output event structure
3312 * types from the internal kevent (and some universal
3313 * defaults for fields not represented in the internal
3316 if (flags
& KEVENT_FLAG_LEGACY32
) {
3317 assert((flags
& KEVENT_FLAG_STACK_EVENTS
) == 0);
3319 if (IS_64BIT_PROCESS(p
)) {
3320 struct user64_kevent kev64
;
3322 advance
= sizeof (kev64
);
3323 bzero(&kev64
, advance
);
3326 * deal with the special case of a user-supplied
3327 * value of (uintptr_t)-1.
3329 kev64
.ident
= (kevp
->ident
== (uintptr_t)-1) ?
3330 (uint64_t)-1LL : (uint64_t)kevp
->ident
;
3332 kev64
.filter
= kevp
->filter
;
3333 kev64
.flags
= kevp
->flags
;
3334 kev64
.fflags
= kevp
->fflags
;
3335 kev64
.data
= (int64_t) kevp
->data
;
3336 kev64
.udata
= kevp
->udata
;
3337 error
= copyout((caddr_t
)&kev64
, addr
, advance
);
3339 struct user32_kevent kev32
;
3341 advance
= sizeof (kev32
);
3342 bzero(&kev32
, advance
);
3343 kev32
.ident
= (uint32_t)kevp
->ident
;
3344 kev32
.filter
= kevp
->filter
;
3345 kev32
.flags
= kevp
->flags
;
3346 kev32
.fflags
= kevp
->fflags
;
3347 kev32
.data
= (int32_t)kevp
->data
;
3348 kev32
.udata
= kevp
->udata
;
3349 error
= copyout((caddr_t
)&kev32
, addr
, advance
);
3351 } else if (flags
& KEVENT_FLAG_LEGACY64
) {
3352 struct kevent64_s kev64
;
3354 advance
= sizeof (struct kevent64_s
);
3355 if (flags
& KEVENT_FLAG_STACK_EVENTS
) {
3358 bzero(&kev64
, advance
);
3359 kev64
.ident
= kevp
->ident
;
3360 kev64
.filter
= kevp
->filter
;
3361 kev64
.flags
= kevp
->flags
;
3362 kev64
.fflags
= kevp
->fflags
;
3363 kev64
.data
= (int64_t) kevp
->data
;
3364 kev64
.udata
= kevp
->udata
;
3365 kev64
.ext
[0] = kevp
->ext
[0];
3366 kev64
.ext
[1] = kevp
->ext
[1];
3367 error
= copyout((caddr_t
)&kev64
, addr
, advance
);
3369 struct kevent_qos_s kevqos
;
3371 advance
= sizeof (struct kevent_qos_s
);
3372 if (flags
& KEVENT_FLAG_STACK_EVENTS
) {
3375 bzero(&kevqos
, advance
);
3376 kevqos
.ident
= kevp
->ident
;
3377 kevqos
.filter
= kevp
->filter
;
3378 kevqos
.flags
= kevp
->flags
;
3379 kevqos
.qos
= kevp
->qos
;
3380 kevqos
.udata
= kevp
->udata
;
3381 kevqos
.fflags
= kevp
->fflags
;
3383 kevqos
.data
= (int64_t) kevp
->data
;
3384 kevqos
.ext
[0] = kevp
->ext
[0];
3385 kevqos
.ext
[1] = kevp
->ext
[1];
3386 kevqos
.ext
[2] = kevp
->ext
[2];
3387 kevqos
.ext
[3] = kevp
->ext
[3];
3388 error
= copyout((caddr_t
)&kevqos
, addr
, advance
);
3391 if (flags
& KEVENT_FLAG_STACK_EVENTS
)
3394 *addrp
= addr
+ advance
;
3400 kevent_get_data_size(struct proc
*p
,
3401 uint64_t data_available
,
3403 user_size_t
*residp
)
3408 if (data_available
!= USER_ADDR_NULL
) {
3409 if (flags
& KEVENT_FLAG_KERNEL
) {
3410 resid
= *(user_size_t
*)(uintptr_t)data_available
;
3411 } else if (IS_64BIT_PROCESS(p
)) {
3412 user64_size_t usize
;
3413 error
= copyin((user_addr_t
)data_available
, &usize
, sizeof(usize
));
3414 resid
= (user_size_t
)usize
;
3416 user32_size_t usize
;
3417 error
= copyin((user_addr_t
)data_available
, &usize
, sizeof(usize
));
3418 resid
= (user_size_t
)usize
;
3430 kevent_put_data_size(struct proc
*p
,
3431 uint64_t data_available
,
3437 if (data_available
) {
3438 if (flags
& KEVENT_FLAG_KERNEL
) {
3439 *(user_size_t
*)(uintptr_t)data_available
= resid
;
3440 } else if (IS_64BIT_PROCESS(p
)) {
3441 user64_size_t usize
= (user64_size_t
)resid
;
3442 error
= copyout(&usize
, (user_addr_t
)data_available
, sizeof(usize
));
3444 user32_size_t usize
= (user32_size_t
)resid
;
3445 error
= copyout(&usize
, (user_addr_t
)data_available
, sizeof(usize
));
3452 * kevent_continue - continue a kevent syscall after blocking
3454 * assume we inherit a use count on the kq fileglob.
3457 __attribute__((noreturn
))
3459 kevent_continue(__unused
struct kqueue
*kq
, void *data
, int error
)
3461 struct _kevent
*cont_args
;
3462 struct fileproc
*fp
;
3463 uint64_t data_available
;
3464 user_size_t data_size
;
3465 user_size_t data_resid
;
3470 struct proc
*p
= current_proc();
3472 cont_args
= (struct _kevent
*)data
;
3473 data_available
= cont_args
->data_available
;
3474 flags
= cont_args
->process_data
.fp_flags
;
3475 data_size
= cont_args
->process_data
.fp_data_size
;
3476 data_resid
= cont_args
->process_data
.fp_data_resid
;
3477 noutputs
= cont_args
->eventout
;
3478 retval
= cont_args
->retval
;
3482 kevent_put_kq(p
, fd
, fp
, kq
);
3484 /* don't abandon other output just because of residual copyout failures */
3485 if (error
== 0 && data_available
&& data_resid
!= data_size
) {
3486 (void)kevent_put_data_size(p
, data_available
, flags
, data_resid
);
3489 /* don't restart after signals... */
3490 if (error
== ERESTART
)
3492 else if (error
== EWOULDBLOCK
)
3496 unix_syscall_return(error
);
3500 * kevent - [syscall] register and wait for kernel events
3504 kevent(struct proc
*p
, struct kevent_args
*uap
, int32_t *retval
)
3506 unsigned int flags
= KEVENT_FLAG_LEGACY32
;
3508 return kevent_internal(p
,
3509 (kqueue_id_t
)uap
->fd
, NULL
,
3510 uap
->changelist
, uap
->nchanges
,
3511 uap
->eventlist
, uap
->nevents
,
3520 kevent64(struct proc
*p
, struct kevent64_args
*uap
, int32_t *retval
)
3524 /* restrict to user flags and set legacy64 */
3525 flags
= uap
->flags
& KEVENT_FLAG_USER
;
3526 flags
|= KEVENT_FLAG_LEGACY64
;
3528 return kevent_internal(p
,
3529 (kqueue_id_t
)uap
->fd
, NULL
,
3530 uap
->changelist
, uap
->nchanges
,
3531 uap
->eventlist
, uap
->nevents
,
3540 kevent_qos(struct proc
*p
, struct kevent_qos_args
*uap
, int32_t *retval
)
3542 /* restrict to user flags */
3543 uap
->flags
&= KEVENT_FLAG_USER
;
3545 return kevent_internal(p
,
3546 (kqueue_id_t
)uap
->fd
, NULL
,
3547 uap
->changelist
, uap
->nchanges
,
3548 uap
->eventlist
, uap
->nevents
,
3549 uap
->data_out
, (uint64_t)uap
->data_available
,
3557 kevent_qos_internal(struct proc
*p
, int fd
,
3558 user_addr_t changelist
, int nchanges
,
3559 user_addr_t eventlist
, int nevents
,
3560 user_addr_t data_out
, user_size_t
*data_available
,
3564 return kevent_internal(p
,
3565 (kqueue_id_t
)fd
, NULL
,
3566 changelist
, nchanges
,
3568 data_out
, (uint64_t)data_available
,
3569 (flags
| KEVENT_FLAG_KERNEL
),
3576 kevent_id(struct proc
*p
, struct kevent_id_args
*uap
, int32_t *retval
)
3578 /* restrict to user flags */
3579 uap
->flags
&= KEVENT_FLAG_USER
;
3581 return kevent_internal(p
,
3582 (kqueue_id_t
)uap
->id
, NULL
,
3583 uap
->changelist
, uap
->nchanges
,
3584 uap
->eventlist
, uap
->nevents
,
3585 uap
->data_out
, (uint64_t)uap
->data_available
,
3586 (uap
->flags
| KEVENT_FLAG_DYNAMIC_KQUEUE
),
3593 kevent_id_internal(struct proc
*p
, kqueue_id_t
*id
,
3594 user_addr_t changelist
, int nchanges
,
3595 user_addr_t eventlist
, int nevents
,
3596 user_addr_t data_out
, user_size_t
*data_available
,
3600 return kevent_internal(p
,
3602 changelist
, nchanges
,
3604 data_out
, (uint64_t)data_available
,
3605 (flags
| KEVENT_FLAG_KERNEL
| KEVENT_FLAG_DYNAMIC_KQUEUE
),
3612 kevent_get_timeout(struct proc
*p
,
3613 user_addr_t utimeout
,
3615 struct timeval
*atvp
)
3620 if (flags
& KEVENT_FLAG_IMMEDIATE
) {
3621 getmicrouptime(&atv
);
3622 } else if (utimeout
!= USER_ADDR_NULL
) {
3624 if (flags
& KEVENT_FLAG_KERNEL
) {
3625 struct timespec
*tsp
= (struct timespec
*)utimeout
;
3626 TIMESPEC_TO_TIMEVAL(&rtv
, tsp
);
3627 } else if (IS_64BIT_PROCESS(p
)) {
3628 struct user64_timespec ts
;
3629 error
= copyin(utimeout
, &ts
, sizeof(ts
));
3630 if ((ts
.tv_sec
& 0xFFFFFFFF00000000ull
) != 0)
3633 TIMESPEC_TO_TIMEVAL(&rtv
, &ts
);
3635 struct user32_timespec ts
;
3636 error
= copyin(utimeout
, &ts
, sizeof(ts
));
3637 TIMESPEC_TO_TIMEVAL(&rtv
, &ts
);
3641 if (itimerfix(&rtv
))
3643 getmicrouptime(&atv
);
3644 timevaladd(&atv
, &rtv
);
3646 /* wait forever value */
3655 kevent_set_kq_mode(struct kqueue
*kq
, unsigned int flags
)
3657 /* each kq should only be used for events of one type */
3659 if (kq
->kq_state
& (KQ_KEV32
| KQ_KEV64
| KQ_KEV_QOS
)) {
3660 if (flags
& KEVENT_FLAG_LEGACY32
) {
3661 if ((kq
->kq_state
& KQ_KEV32
) == 0) {
3665 } else if (kq
->kq_state
& KQ_KEV32
) {
3669 } else if (flags
& KEVENT_FLAG_LEGACY32
) {
3670 kq
->kq_state
|= KQ_KEV32
;
3671 } else if (flags
& KEVENT_FLAG_LEGACY64
) {
3672 kq
->kq_state
|= KQ_KEV64
;
3674 kq
->kq_state
|= KQ_KEV_QOS
;
3680 #define KQ_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
3681 #define CONFIG_KQ_HASHSIZE CONFIG_KN_HASHSIZE
3684 kqhash_lock(proc_t p
)
3686 lck_mtx_lock_spin_always(&p
->p_fd
->fd_kqhashlock
);
3690 kqhash_lock_held(__assert_only proc_t p
)
3692 LCK_MTX_ASSERT(&p
->p_fd
->fd_kqhashlock
, LCK_MTX_ASSERT_OWNED
);
3696 kqhash_unlock(proc_t p
)
3698 lck_mtx_unlock(&p
->p_fd
->fd_kqhashlock
);
3702 kqueue_hash_init_if_needed(proc_t p
)
3704 struct filedesc
*fdp
= p
->p_fd
;
3706 kqhash_lock_held(p
);
3708 if (__improbable(fdp
->fd_kqhash
== NULL
)) {
3709 struct kqlist
*alloc_hash
;
3713 alloc_hash
= hashinit(CONFIG_KQ_HASHSIZE
, M_KQUEUE
, &alloc_mask
);
3716 /* See if we won the race */
3717 if (fdp
->fd_kqhashmask
== 0) {
3718 fdp
->fd_kqhash
= alloc_hash
;
3719 fdp
->fd_kqhashmask
= alloc_mask
;
3722 FREE(alloc_hash
, M_KQUEUE
);
3729 * Called with the kqhash_lock() held
3737 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3738 struct filedesc
*fdp
= p
->p_fd
;
3739 struct kqlist
*list
;
3741 /* should hold the kq hash lock */
3742 kqhash_lock_held(p
);
3744 if ((kq
->kq_state
& KQ_DYNAMIC
) == 0) {
3745 assert(kq
->kq_state
& KQ_DYNAMIC
);
3749 /* only dynamically allocate workloop kqs for now */
3750 assert(kq
->kq_state
& KQ_WORKLOOP
);
3751 assert(fdp
->fd_kqhash
);
3753 kqwl
->kqwl_dynamicid
= id
;
3755 list
= &fdp
->fd_kqhash
[KQ_HASH(id
, fdp
->fd_kqhashmask
)];
3756 SLIST_INSERT_HEAD(list
, kqwl
, kqwl_hashlink
);
3759 /* Called with kqhash_lock held */
3765 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
3766 struct filedesc
*fdp
= p
->p_fd
;
3767 struct kqlist
*list
;
3769 /* should hold the kq hash lock */
3770 kqhash_lock_held(p
);
3772 if ((kq
->kq_state
& KQ_DYNAMIC
) == 0) {
3773 assert(kq
->kq_state
& KQ_DYNAMIC
);
3776 assert(kq
->kq_state
& KQ_WORKLOOP
); /* for now */
3777 list
= &fdp
->fd_kqhash
[KQ_HASH(kqwl
->kqwl_dynamicid
, fdp
->fd_kqhashmask
)];
3778 SLIST_REMOVE(list
, kqwl
, kqworkloop
, kqwl_hashlink
);
3781 /* Called with kqhash_lock held */
3782 static struct kqueue
*
3783 kqueue_hash_lookup(struct proc
*p
, kqueue_id_t id
)
3785 struct filedesc
*fdp
= p
->p_fd
;
3786 struct kqlist
*list
;
3787 struct kqworkloop
*kqwl
;
3789 /* should hold the kq hash lock */
3790 kqhash_lock_held(p
);
3792 if (fdp
->fd_kqhashmask
== 0) return NULL
;
3794 list
= &fdp
->fd_kqhash
[KQ_HASH(id
, fdp
->fd_kqhashmask
)];
3795 SLIST_FOREACH(kqwl
, list
, kqwl_hashlink
) {
3796 if (kqwl
->kqwl_dynamicid
== id
) {
3797 struct kqueue
*kq
= (struct kqueue
*)kqwl
;
3799 assert(kq
->kq_state
& KQ_DYNAMIC
);
3800 assert(kq
->kq_state
& KQ_WORKLOOP
); /* for now */
3808 kqueue_release_last(struct proc
*p
, struct kqueue
*kq
)
3810 if (kq
->kq_state
& KQ_DYNAMIC
) {
3812 if (kqueue_release(kq
, KQUEUE_MIGHT_BE_LAST_REF
)) {
3813 kqueue_hash_remove(p
, kq
);
3822 static struct kqueue
*
3823 kevent_get_bound_kq(__assert_only
struct proc
*p
, thread_t thread
,
3824 unsigned int kev_flags
, unsigned int kq_flags
)
3827 struct uthread
*ut
= get_bsdthread_info(thread
);
3829 assert(p
== get_bsdthreadtask_info(thread
));
3831 if (!(ut
->uu_kqueue_flags
& kev_flags
))
3834 kq
= ut
->uu_kqueue_bound
;
3838 if (!(kq
->kq_state
& kq_flags
))
3845 kevent_get_kq(struct proc
*p
, kqueue_id_t id
, unsigned int flags
, struct fileproc
**fpp
, int *fdp
, struct kqueue
**kqp
)
3847 struct filedesc
*descp
= p
->p_fd
;
3848 struct fileproc
*fp
= NULL
;
3853 /* Was the workloop flag passed? Then it is for sure only a workloop */
3854 if (flags
& KEVENT_FLAG_DYNAMIC_KQUEUE
) {
3855 assert(flags
& KEVENT_FLAG_WORKLOOP
);
3856 if (id
== (kqueue_id_t
)-1 &&
3857 (flags
& KEVENT_FLAG_KERNEL
) &&
3858 (flags
& KEVENT_FLAG_WORKLOOP
)) {
3860 assert(is_workqueue_thread(current_thread()));
3863 * when kevent_id_internal is called from within the
3864 * kernel, and the passed 'id' value is '-1' then we
3865 * look for the currently bound workloop kq.
3867 * Until pthread kext avoids calling in to kevent_id_internal
3868 * for threads whose fulfill is canceled, calling in unbound
3871 kq
= kevent_get_bound_kq(p
, current_thread(),
3872 KEVENT_FLAG_WORKLOOP
, KQ_WORKLOOP
);
3876 struct uthread
*ut
= get_bsdthread_info(current_thread());
3878 /* If thread is unbound due to cancel, just return an error */
3879 if (ut
->uu_kqueue_flags
== KEVENT_FLAG_WORKLOOP_CANCELED
) {
3880 ut
->uu_kqueue_flags
= 0;
3883 panic("Unbound thread called kevent_internal with id=-1"
3884 " uu_kqueue_flags:0x%x, uu_kqueue_bound:%p",
3885 ut
->uu_kqueue_flags
, ut
->uu_kqueue_bound
);
3895 /* try shortcut on kq lookup for bound threads */
3896 kq
= kevent_get_bound_kq(p
, current_thread(), KEVENT_FLAG_WORKLOOP
, KQ_WORKLOOP
);
3897 if (kq
!= NULL
&& ((struct kqworkloop
*)kq
)->kqwl_dynamicid
== id
) {
3899 if (flags
& KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST
) {
3905 /* retain a reference while working with this kq. */
3906 assert(kq
->kq_state
& KQ_DYNAMIC
);
3912 /* look for the kq on the hash table */
3914 kq
= kqueue_hash_lookup(p
, id
);
3918 if (flags
& KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST
) {
3923 struct kqueue
*alloc_kq
;
3924 alloc_kq
= kqueue_alloc(p
, flags
);
3927 kqueue_hash_init_if_needed(p
);
3928 kq
= kqueue_hash_lookup(p
, id
);
3930 /* insert our new one */
3932 kqueue_hash_insert(p
, id
, kq
);
3935 /* lost race, retain existing workloop */
3938 kqueue_release(alloc_kq
, KQUEUE_MIGHT_BE_LAST_REF
);
3939 kqueue_dealloc(alloc_kq
);
3947 if (flags
& KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST
) {
3954 /* retain a reference while working with this kq. */
3955 assert(kq
->kq_state
& KQ_DYNAMIC
);
3960 } else if (flags
& KEVENT_FLAG_WORKQ
) {
3961 /* must already exist for bound threads. */
3962 if (flags
& KEVENT_FLAG_KERNEL
) {
3963 assert(descp
->fd_wqkqueue
!= NULL
);
3967 * use the private kq associated with the proc workq.
3968 * Just being a thread within the process (and not
3969 * being the exit/exec thread) is enough to hold a
3970 * reference on this special kq.
3972 kq
= descp
->fd_wqkqueue
;
3974 struct kqueue
*alloc_kq
= kqueue_alloc(p
, KEVENT_FLAG_WORKQ
);
3975 if (alloc_kq
== NULL
)
3979 if (descp
->fd_wqkqueue
== NULL
) {
3980 kq
= descp
->fd_wqkqueue
= alloc_kq
;
3984 kq
= descp
->fd_wqkqueue
;
3985 kqueue_dealloc(alloc_kq
);
3989 /* get a usecount for the kq itself */
3991 if ((error
= fp_getfkq(p
, fd
, &fp
, &kq
)) != 0)
3994 if ((error
= kevent_set_kq_mode(kq
, flags
)) != 0) {
3995 /* drop the usecount */
3997 fp_drop(p
, fd
, fp
, 0);
4013 struct fileproc
*fp
,
4016 kqueue_release_last(p
, kq
);
4018 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
4019 fp_drop(p
, (int)id
, fp
, 0);
4024 kevent_workloop_serial_no_copyin(proc_t p
, uint64_t workloop_id
)
4026 uint64_t serial_no
= 0;
4030 if (workloop_id
== 0 || p
->p_dispatchqueue_serialno_offset
== 0) {
4033 addr
= (user_addr_t
)(workloop_id
+ p
->p_dispatchqueue_serialno_offset
);
4035 if (proc_is64bit(p
)) {
4036 rc
= copyin(addr
, (caddr_t
)&serial_no
, sizeof(serial_no
));
4038 uint32_t serial_no32
= 0;
4039 rc
= copyin(addr
, (caddr_t
)&serial_no32
, sizeof(serial_no32
));
4040 serial_no
= serial_no32
;
4042 return rc
== 0 ? serial_no
: 0;
4046 kevent_exit_on_workloop_ownership_leak(thread_t thread
)
4048 proc_t p
= current_proc();
4049 struct filedesc
*fdp
= p
->p_fd
;
4050 kqueue_id_t workloop_id
= 0;
4052 mach_vm_address_t addr
;
4053 uint32_t reason_size
;
4056 if (fdp
->fd_kqhashmask
> 0) {
4057 for (uint32_t i
= 0; i
< fdp
->fd_kqhashmask
+ 1; i
++) {
4058 struct kqworkloop
*kqwl
;
4060 SLIST_FOREACH(kqwl
, &fdp
->fd_kqhash
[i
], kqwl_hashlink
) {
4061 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
4062 if ((kq
->kq_state
& KQ_DYNAMIC
) && kqwl
->kqwl_owner
== thread
) {
4063 workloop_id
= kqwl
->kqwl_dynamicid
;
4070 assert(workloop_id
);
4072 reason
= os_reason_create(OS_REASON_LIBSYSTEM
,
4073 OS_REASON_LIBSYSTEM_CODE_WORKLOOP_OWNERSHIP_LEAK
);
4074 if (reason
== OS_REASON_NULL
) {
4078 reason
->osr_flags
|= OS_REASON_FLAG_GENERATE_CRASH_REPORT
;
4079 reason_size
= 2 * sizeof(uint64_t);
4080 reason_size
= kcdata_estimate_required_buffer_size(2, reason_size
);
4081 if (os_reason_alloc_buffer(reason
, reason_size
) != 0) {
4085 struct kcdata_descriptor
*kcd
= &reason
->osr_kcd_descriptor
;
4087 if (kcdata_get_memory_addr(kcd
, EXIT_REASON_WORKLOOP_ID
,
4088 sizeof(workloop_id
), &addr
) == KERN_SUCCESS
) {
4089 kcdata_memcpy(kcd
, addr
, &workloop_id
, sizeof(workloop_id
));
4092 uint64_t serial_no
= kevent_workloop_serial_no_copyin(p
, workloop_id
);
4093 if (serial_no
&& kcdata_get_memory_addr(kcd
, EXIT_REASON_DISPATCH_QUEUE_NO
,
4094 sizeof(serial_no
), &addr
) == KERN_SUCCESS
) {
4095 kcdata_memcpy(kcd
, addr
, &serial_no
, sizeof(serial_no
));
4099 #if DEVELOPMENT || DEBUG
4100 psignal_try_thread_with_reason(p
, thread
, SIGABRT
, reason
);
4103 return exit_with_reason(p
, W_EXITCODE(0, SIGKILL
), (int *)NULL
,
4104 FALSE
, FALSE
, 0, reason
);
4110 kevent_servicer_detach_preflight(thread_t thread
, unsigned int flags
, struct kqueue
*kq
)
4113 struct kqworkloop
*kqwl
;
4115 struct kqrequest
*kqr
;
4117 if (!(flags
& KEVENT_FLAG_WORKLOOP
) || !(kq
->kq_state
& KQ_WORKLOOP
))
4120 /* only kq created with KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD from userspace can have attached threads */
4121 if (!(kq
->kq_state
& KQ_NO_WQ_THREAD
))
4124 /* allow detach only on not wq threads */
4125 if (is_workqueue_thread(thread
))
4128 /* check that the current thread is bound to the requested wq */
4129 ut
= get_bsdthread_info(thread
);
4130 if (ut
->uu_kqueue_bound
!= kq
)
4133 kqwl
= (struct kqworkloop
*)kq
;
4134 kqwl_req_lock(kqwl
);
4135 kqr
= &kqwl
->kqwl_request
;
4137 /* check that the wq is bound to the thread */
4138 if ((kqr
->kqr_state
& KQR_BOUND
) == 0 || (kqr
->kqr_thread
!= thread
))
4141 kqwl_req_unlock(kqwl
);
4147 kevent_servicer_detach_thread(struct proc
*p
, kqueue_id_t id
, thread_t thread
,
4148 unsigned int flags
, struct kqueue
*kq
)
4150 struct kqworkloop
*kqwl
;
4153 assert((flags
& KEVENT_FLAG_WORKLOOP
) && (kq
->kq_state
& KQ_WORKLOOP
));
4155 /* allow detach only on not wqthreads threads */
4156 assert(!is_workqueue_thread(thread
));
4158 /* only kq created with KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD from userspace can have attached threads */
4159 assert(kq
->kq_state
& KQ_NO_WQ_THREAD
);
4161 /* check that the current thread is bound to the requested kq */
4162 ut
= get_bsdthread_info(thread
);
4163 assert(ut
->uu_kqueue_bound
== kq
);
4165 kqwl
= (struct kqworkloop
*)kq
;
4169 /* unbind the thread.
4170 * unbind itself checks if still processing and ends it.
4172 kqworkloop_unbind_thread(kqwl
, thread
, flags
);
4176 kevent_put_kq(p
, id
, NULL
, kq
);
4182 kevent_servicer_attach_thread(thread_t thread
, unsigned int flags
, struct kqueue
*kq
)
4185 struct kqworkloop
*kqwl
;
4187 struct kqrequest
*kqr
;
4189 if (!(flags
& KEVENT_FLAG_WORKLOOP
) || !(kq
->kq_state
& KQ_WORKLOOP
))
4192 /* only kq created with KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD from userspace can have attached threads*/
4193 if (!(kq
->kq_state
& KQ_NO_WQ_THREAD
))
4196 /* allow attach only on not wqthreads */
4197 if (is_workqueue_thread(thread
))
4200 /* check that the thread is not already bound */
4201 ut
= get_bsdthread_info(thread
);
4202 if (ut
->uu_kqueue_bound
!= NULL
)
4205 assert(ut
->uu_kqueue_flags
== 0);
4208 kqwl
= (struct kqworkloop
*)kq
;
4209 kqwl_req_lock(kqwl
);
4210 kqr
= &kqwl
->kqwl_request
;
4212 /* check that the kqueue is not already bound */
4213 if (kqr
->kqr_state
& (KQR_BOUND
| KQR_THREQUESTED
| KQR_DRAIN
)) {
4218 assert(kqr
->kqr_thread
== NULL
);
4219 assert((kqr
->kqr_state
& KQR_PROCESSING
) == 0);
4221 kqr
->kqr_state
|= KQR_THREQUESTED
;
4222 kqr
->kqr_qos_index
= THREAD_QOS_UNSPECIFIED
;
4223 kqr
->kqr_override_index
= THREAD_QOS_UNSPECIFIED
;
4224 kqr
->kqr_dsync_owner_qos
= THREAD_QOS_UNSPECIFIED
;
4225 kqr
->kqr_owner_override_is_sync
= 0;
4227 kqworkloop_bind_thread_impl(kqwl
, thread
, KEVENT_FLAG_WORKLOOP
);
4229 /* get a ref on the wlkq on behalf of the attached thread */
4233 kqwl_req_unlock(kqwl
);
4240 boolean_t
kevent_args_requesting_events(unsigned int flags
, int nevents
)
4242 return (!(flags
& KEVENT_FLAG_ERROR_EVENTS
) && nevents
> 0);
4246 kevent_internal(struct proc
*p
,
4247 kqueue_id_t id
, kqueue_id_t
*id_out
,
4248 user_addr_t changelist
, int nchanges
,
4249 user_addr_t ueventlist
, int nevents
,
4250 user_addr_t data_out
, uint64_t data_available
,
4252 user_addr_t utimeout
,
4253 kqueue_continue_t continuation
,
4256 struct _kevent
*cont_args
;
4259 struct fileproc
*fp
= NULL
;
4261 struct kevent_internal_s kev
;
4262 int error
, noutputs
;
4264 user_size_t data_size
;
4265 user_size_t data_resid
;
4266 thread_t thread
= current_thread();
4268 /* Don't allow user-space threads to process output events from the workq kqs */
4269 if (((flags
& (KEVENT_FLAG_WORKQ
| KEVENT_FLAG_KERNEL
)) == KEVENT_FLAG_WORKQ
) &&
4270 kevent_args_requesting_events(flags
, nevents
))
4273 /* restrict dynamic kqueue allocation to workloops (for now) */
4274 if ((flags
& (KEVENT_FLAG_DYNAMIC_KQUEUE
| KEVENT_FLAG_WORKLOOP
)) == KEVENT_FLAG_DYNAMIC_KQUEUE
)
4277 if ((flags
& (KEVENT_FLAG_WORKLOOP
)) && (flags
& (KEVENT_FLAG_WORKQ
)))
4280 if (flags
& (KEVENT_FLAG_WORKLOOP_SERVICER_ATTACH
| KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
|
4281 KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST
| KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST
| KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD
)) {
4283 /* allowed only on workloops when calling kevent_id from user-space */
4284 if (!(flags
& KEVENT_FLAG_WORKLOOP
) || (flags
& KEVENT_FLAG_KERNEL
) || !(flags
& KEVENT_FLAG_DYNAMIC_KQUEUE
))
4287 /* cannot attach and detach simultaneously*/
4288 if ((flags
& KEVENT_FLAG_WORKLOOP_SERVICER_ATTACH
) && (flags
& KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
))
4291 /* cannot ask for events and detach */
4292 if ((flags
& KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
) && kevent_args_requesting_events(flags
, nevents
))
4297 /* prepare to deal with stack-wise allocation of out events */
4298 if (flags
& KEVENT_FLAG_STACK_EVENTS
) {
4299 int scale
= ((flags
& KEVENT_FLAG_LEGACY32
) ?
4300 (IS_64BIT_PROCESS(p
) ? sizeof(struct user64_kevent
) :
4301 sizeof(struct user32_kevent
)) :
4302 ((flags
& KEVENT_FLAG_LEGACY64
) ? sizeof(struct kevent64_s
) :
4303 sizeof(struct kevent_qos_s
)));
4304 ueventlist
+= nevents
* scale
;
4307 /* convert timeout to absolute - if we have one (and not immediate) */
4308 error
= kevent_get_timeout(p
, utimeout
, flags
, &atv
);
4312 /* copyin initial value of data residual from data_available */
4313 error
= kevent_get_data_size(p
, data_available
, flags
, &data_size
);
4317 /* get the kq we are going to be working on */
4318 error
= kevent_get_kq(p
, id
, flags
, &fp
, &fd
, &kq
);
4322 /* only bound threads can receive events on workloops */
4323 if ((flags
& KEVENT_FLAG_WORKLOOP
) && kevent_args_requesting_events(flags
, nevents
)) {
4324 ut
= (uthread_t
)get_bsdthread_info(thread
);
4325 if (ut
->uu_kqueue_bound
!= kq
) {
4332 /* attach the current thread if necessary */
4333 if (flags
& KEVENT_FLAG_WORKLOOP_SERVICER_ATTACH
) {
4334 error
= kevent_servicer_attach_thread(thread
, flags
, kq
);
4339 /* before processing events and committing to the system call, return an error if the thread cannot be detached when requested */
4340 if (flags
& KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
) {
4341 error
= kevent_servicer_detach_preflight(thread
, flags
, kq
);
4347 if (id_out
&& kq
&& (flags
& KEVENT_FLAG_WORKLOOP
)) {
4348 assert(kq
->kq_state
& KQ_WORKLOOP
);
4349 struct kqworkloop
*kqwl
;
4350 kqwl
= (struct kqworkloop
*)kq
;
4351 *id_out
= kqwl
->kqwl_dynamicid
;
4354 /* register all the change requests the user provided... */
4356 while (nchanges
> 0 && error
== 0) {
4357 error
= kevent_copyin(&changelist
, &kev
, p
, flags
);
4361 /* Make sure user doesn't pass in any system flags */
4362 kev
.flags
&= ~EV_SYSFLAGS
;
4364 kevent_register(kq
, &kev
, p
);
4367 ((kev
.flags
& EV_ERROR
) || (kev
.flags
& EV_RECEIPT
))) {
4368 if (kev
.flags
& EV_RECEIPT
) {
4369 kev
.flags
|= EV_ERROR
;
4372 error
= kevent_copyout(&kev
, &ueventlist
, p
, flags
);
4377 } else if (kev
.flags
& EV_ERROR
) {
4383 /* short-circuit the scan if we only want error events */
4384 if (flags
& KEVENT_FLAG_ERROR_EVENTS
)
4387 /* process pending events */
4388 if (nevents
> 0 && noutputs
== 0 && error
== 0) {
4389 /* store the continuation/completion data in the uthread */
4390 ut
= (uthread_t
)get_bsdthread_info(thread
);
4391 cont_args
= &ut
->uu_kevent
.ss_kevent
;
4394 cont_args
->retval
= retval
;
4395 cont_args
->eventlist
= ueventlist
;
4396 cont_args
->eventcount
= nevents
;
4397 cont_args
->eventout
= noutputs
;
4398 cont_args
->data_available
= data_available
;
4399 cont_args
->process_data
.fp_fd
= (int)id
;
4400 cont_args
->process_data
.fp_flags
= flags
;
4401 cont_args
->process_data
.fp_data_out
= data_out
;
4402 cont_args
->process_data
.fp_data_size
= data_size
;
4403 cont_args
->process_data
.fp_data_resid
= data_size
;
4405 error
= kqueue_scan(kq
, kevent_callback
,
4406 continuation
, cont_args
,
4407 &cont_args
->process_data
,
4410 /* process remaining outputs */
4411 noutputs
= cont_args
->eventout
;
4412 data_resid
= cont_args
->process_data
.fp_data_resid
;
4414 /* copyout residual data size value (if it needs to be copied out) */
4415 /* don't abandon other output just because of residual copyout failures */
4416 if (error
== 0 && data_available
&& data_resid
!= data_size
) {
4417 (void)kevent_put_data_size(p
, data_available
, flags
, data_resid
);
4421 /* detach the current thread if necessary */
4422 if (flags
& KEVENT_FLAG_WORKLOOP_SERVICER_DETACH
) {
4424 kevent_servicer_detach_thread(p
, id
, thread
, flags
, kq
);
4428 kevent_put_kq(p
, id
, fp
, kq
);
4430 /* don't restart after signals... */
4431 if (error
== ERESTART
)
4433 else if (error
== EWOULDBLOCK
)
4442 * kevent_callback - callback for each individual event
4444 * called with nothing locked
4445 * caller holds a reference on the kqueue
4448 kevent_callback(__unused
struct kqueue
*kq
, struct kevent_internal_s
*kevp
,
4451 struct _kevent
*cont_args
;
4454 cont_args
= (struct _kevent
*)data
;
4455 assert(cont_args
->eventout
< cont_args
->eventcount
);
4458 * Copy out the appropriate amount of event data for this user.
4460 error
= kevent_copyout(kevp
, &cont_args
->eventlist
, current_proc(),
4461 cont_args
->process_data
.fp_flags
);
4464 * If there isn't space for additional events, return
4465 * a harmless error to stop the processing here
4467 if (error
== 0 && ++cont_args
->eventout
== cont_args
->eventcount
)
4468 error
= EWOULDBLOCK
;
4473 * kevent_description - format a description of a kevent for diagnostic output
4475 * called with a 256-byte string buffer
4479 kevent_description(struct kevent_internal_s
*kevp
, char *s
, size_t n
)
4483 "{.ident=%#llx, .filter=%d, .flags=%#x, .udata=%#llx, .fflags=%#x, .data=%#llx, .ext[0]=%#llx, .ext[1]=%#llx}",
4497 * kevent_register - add a new event to a kqueue
4499 * Creates a mapping between the event source and
4500 * the kqueue via a knote data structure.
4502 * Because many/most the event sources are file
4503 * descriptor related, the knote is linked off
4504 * the filedescriptor table for quick access.
4506 * called with nothing locked
4507 * caller holds a reference on the kqueue
4511 kevent_register(struct kqueue
*kq
, struct kevent_internal_s
*kev
,
4512 __unused
struct proc
*ctxp
)
4514 struct proc
*p
= kq
->kq_p
;
4515 const struct filterops
*fops
;
4516 struct knote
*kn
= NULL
;
4519 unsigned short kev_flags
= kev
->flags
;
4520 int knoteuse_flags
= KNUSE_NONE
;
4522 if (kev
->filter
< 0) {
4523 if (kev
->filter
+ EVFILT_SYSCOUNT
< 0) {
4527 fops
= sysfilt_ops
[~kev
->filter
]; /* to 0-base index */
4533 /* restrict EV_VANISHED to adding udata-specific dispatch kevents */
4534 if ((kev
->flags
& EV_VANISHED
) &&
4535 (kev
->flags
& (EV_ADD
| EV_DISPATCH2
)) != (EV_ADD
| EV_DISPATCH2
)) {
4540 /* Simplify the flags - delete and disable overrule */
4541 if (kev
->flags
& EV_DELETE
)
4542 kev
->flags
&= ~EV_ADD
;
4543 if (kev
->flags
& EV_DISABLE
)
4544 kev
->flags
&= ~EV_ENABLE
;
4546 if (kq
->kq_state
& KQ_WORKLOOP
) {
4547 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_REGISTER
),
4548 ((struct kqworkloop
*)kq
)->kqwl_dynamicid
,
4549 kev
->udata
, kev
->flags
, kev
->filter
);
4550 } else if (kq
->kq_state
& KQ_WORKQ
) {
4551 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_REGISTER
),
4552 0, kev
->udata
, kev
->flags
, kev
->filter
);
4554 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_REGISTER
),
4555 VM_KERNEL_UNSLIDE_OR_PERM(kq
),
4556 kev
->udata
, kev
->flags
, kev
->filter
);
4561 /* find the matching knote from the fd tables/hashes */
4562 kn
= kq_find_knote_and_kq_lock(kq
, kev
, fops
->f_isfd
, p
);
4565 if (kev
->flags
& EV_ADD
) {
4566 struct fileproc
*knote_fp
= NULL
;
4568 /* grab a file reference for the new knote */
4570 if ((error
= fp_lookup(p
, kev
->ident
, &knote_fp
, 0)) != 0) {
4578 if (knote_fp
!= NULL
)
4579 fp_drop(p
, kev
->ident
, knote_fp
, 0);
4583 kn
->kn_fp
= knote_fp
;
4584 knote_set_kq(kn
, kq
);
4585 kqueue_retain(kq
); /* retain a kq ref */
4586 kn
->kn_filtid
= ~kev
->filter
;
4587 kn
->kn_inuse
= 1; /* for f_attach() */
4588 kn
->kn_status
= KN_ATTACHING
| KN_ATTACHED
;
4590 /* was vanish support requested */
4591 if (kev
->flags
& EV_VANISHED
) {
4592 kev
->flags
&= ~EV_VANISHED
;
4593 kn
->kn_status
|= KN_REQVANISH
;
4596 /* snapshot matching/dispatching protcol flags into knote */
4597 if (kev
->flags
& EV_DISPATCH
)
4598 kn
->kn_status
|= KN_DISPATCH
;
4599 if (kev
->flags
& EV_UDATA_SPECIFIC
)
4600 kn
->kn_status
|= KN_UDATA_SPECIFIC
;
4603 * copy the kevent state into knote
4604 * protocol is that fflags and data
4605 * are saved off, and cleared before
4606 * calling the attach routine.
4608 kn
->kn_kevent
= *kev
;
4609 kn
->kn_sfflags
= kev
->fflags
;
4610 kn
->kn_sdata
= kev
->data
;
4614 /* invoke pthread kext to convert kevent qos to thread qos */
4615 knote_canonicalize_kevent_qos(kn
);
4616 knote_set_qos_index(kn
, qos_index_from_qos(kn
, kn
->kn_qos
, FALSE
));
4618 /* before anyone can find it */
4619 if (kev
->flags
& EV_DISABLE
) {
4621 * do this before anyone can find it,
4622 * this can't call knote_disable() because it expects having
4625 kn
->kn_status
|= KN_DISABLED
;
4628 /* Add the knote for lookup thru the fd table */
4629 error
= kq_add_knote(kq
, kn
, kev
, p
, &knoteuse_flags
);
4631 (void)kqueue_release(kq
, KQUEUE_CANT_BE_LAST_REF
);
4633 if (knote_fp
!= NULL
)
4634 fp_drop(p
, kev
->ident
, knote_fp
, 0);
4636 if (error
== ERESTART
) {
4643 /* fp reference count now applies to knote */
4644 /* rwlock boost is now held */
4646 /* call filter attach routine */
4647 result
= fops
->f_attach(kn
, kev
);
4650 * Trade knote use count for kq lock.
4651 * Cannot be dropped because we held
4652 * KN_ATTACHING throughout.
4654 knoteuse2kqlock(kq
, kn
, KNUSE_STEAL_DROP
| knoteuse_flags
);
4656 if (kn
->kn_flags
& EV_ERROR
) {
4658 * Failed to attach correctly, so drop.
4659 * All other possible users/droppers
4660 * have deferred to us. Save the error
4661 * to return to our caller.
4663 kn
->kn_status
&= ~KN_ATTACHED
;
4664 kn
->kn_status
|= KN_DROPPING
;
4665 error
= kn
->kn_data
;
4671 /* end "attaching" phase - now just attached */
4672 kn
->kn_status
&= ~KN_ATTACHING
;
4674 if (kn
->kn_status
& KN_DROPPING
) {
4676 * Attach succeeded, but someone else
4677 * deferred their drop - now we have
4678 * to do it for them.
4685 /* Mark the thread request overcommit - if appropos */
4686 knote_set_qos_overcommit(kn
);
4689 * If the attach routine indicated that an
4690 * event is already fired, activate the knote.
4695 if (knote_fops(kn
)->f_post_attach
) {
4696 error
= knote_fops(kn
)->f_post_attach(kn
, kev
);
4704 if ((kev_flags
& (EV_ADD
| EV_DELETE
)) == (EV_ADD
| EV_DELETE
) &&
4705 (kq
->kq_state
& KQ_WORKLOOP
)) {
4707 * For workloops, understand EV_ADD|EV_DELETE as a "soft" delete
4708 * that doesn't care about ENOENT, so just pretend the deletion
4718 /* existing knote: kqueue lock already taken by kq_find_knote_and_kq_lock */
4720 if ((kn
->kn_status
& (KN_DROPPING
| KN_ATTACHING
)) != 0) {
4722 * The knote is not in a stable state, wait for that
4723 * transition to complete and then redrive the lookup.
4725 knoteusewait(kq
, kn
);
4729 if (kev
->flags
& EV_DELETE
) {
4732 * If attempting to delete a disabled dispatch2 knote,
4733 * we must wait for the knote to be re-enabled (unless
4734 * it is being re-enabled atomically here).
4736 if ((kev
->flags
& EV_ENABLE
) == 0 &&
4737 (kn
->kn_status
& (KN_DISPATCH2
| KN_DISABLED
)) ==
4738 (KN_DISPATCH2
| KN_DISABLED
)) {
4739 kn
->kn_status
|= KN_DEFERDELETE
;
4741 error
= EINPROGRESS
;
4742 } else if (knote_fops(kn
)->f_drop_and_unlock
) {
4744 * The filter has requested to handle EV_DELETE events
4746 * ERESTART means the kevent has to be re-evaluated
4748 error
= knote_fops(kn
)->f_drop_and_unlock(kn
, kev
);
4749 if (error
== ERESTART
) {
4753 } else if (kqlock2knotedrop(kq
, kn
)) {
4754 /* standard/default EV_DELETE path */
4758 * The kqueue is unlocked, it's not being
4759 * dropped, and kqlock2knotedrop returned 0:
4760 * this means that someone stole the drop of
4761 * the knote from us.
4763 error
= EINPROGRESS
;
4769 * If we are re-enabling a deferred-delete knote,
4770 * just enable it now and avoid calling the
4771 * filter touch routine (it has delivered its
4772 * last event already).
4774 if ((kev
->flags
& EV_ENABLE
) &&
4775 (kn
->kn_status
& KN_DEFERDELETE
)) {
4776 assert(kn
->kn_status
& KN_DISABLED
);
4784 * If we are disabling, do it before unlocking and
4785 * calling the touch routine (so no processing can
4786 * see the new kevent state before the disable is
4789 if (kev
->flags
& EV_DISABLE
)
4793 * Convert the kqlock to a use reference on the
4794 * knote so we can call the filter touch routine.
4796 if (knoteuse_needs_boost(kn
, kev
)) {
4797 knoteuse_flags
|= KNUSE_BOOST
;
4799 if (kqlock2knoteuse(kq
, kn
, knoteuse_flags
)) {
4801 * Call touch routine to notify filter of changes
4802 * in filter values (and to re-determine if any
4803 * events are fired).
4805 result
= knote_fops(kn
)->f_touch(kn
, kev
);
4807 /* Get the kq lock back (don't defer droppers). */
4808 if (!knoteuse2kqlock(kq
, kn
, knoteuse_flags
)) {
4813 /* Handle errors during touch routine */
4814 if (kev
->flags
& EV_ERROR
) {
4820 /* Activate it if the touch routine said to */
4825 /* Enable the knote if called for */
4826 if (kev
->flags
& EV_ENABLE
)
4831 /* still have kqlock held and knote is valid */
4835 /* output local errors through the kevent */
4837 kev
->flags
|= EV_ERROR
;
4844 * knote_process - process a triggered event
4846 * Validate that it is really still a triggered event
4847 * by calling the filter routines (if necessary). Hold
4848 * a use reference on the knote to avoid it being detached.
4850 * If it is still considered triggered, we will have taken
4851 * a copy of the state under the filter lock. We use that
4852 * snapshot to dispatch the knote for future processing (or
4853 * not, if this was a lost event).
4855 * Our caller assures us that nobody else can be processing
4856 * events from this knote during the whole operation. But
4857 * others can be touching or posting events to the knote
4858 * interspersed with our processing it.
4860 * caller holds a reference on the kqueue.
4861 * kqueue locked on entry and exit - but may be dropped
4864 knote_process(struct knote
*kn
,
4865 kevent_callback_t callback
,
4866 void *callback_data
,
4867 struct filt_process_s
*process_data
,
4870 struct kevent_internal_s kev
;
4871 struct kqueue
*kq
= knote_get_kq(kn
);
4875 bzero(&kev
, sizeof(kev
));
4878 * Must be active or stayactive
4879 * Must be queued and not disabled/suppressed
4881 assert(kn
->kn_status
& KN_QUEUED
);
4882 assert(kn
->kn_status
& (KN_ACTIVE
|KN_STAYACTIVE
));
4883 assert(!(kn
->kn_status
& (KN_DISABLED
|KN_SUPPRESSED
|KN_DROPPING
)));
4885 if (kq
->kq_state
& KQ_WORKLOOP
) {
4886 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS
),
4887 ((struct kqworkloop
*)kq
)->kqwl_dynamicid
,
4888 kn
->kn_udata
, kn
->kn_status
| (kn
->kn_id
<< 32),
4890 } else if (kq
->kq_state
& KQ_WORKQ
) {
4891 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS
),
4892 0, kn
->kn_udata
, kn
->kn_status
| (kn
->kn_id
<< 32),
4895 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS
),
4896 VM_KERNEL_UNSLIDE_OR_PERM(kq
), kn
->kn_udata
,
4897 kn
->kn_status
| (kn
->kn_id
<< 32), kn
->kn_filtid
);
4901 * For deferred-drop or vanished events, we just create a fake
4902 * event to acknowledge end-of-life. Otherwise, we call the
4903 * filter's process routine to snapshot the kevent state under
4904 * the filter's locking protocol.
4906 if (kn
->kn_status
& (KN_DEFERDELETE
| KN_VANISHED
)) {
4907 /* create fake event */
4908 kev
.filter
= kn
->kn_filter
;
4909 kev
.ident
= kn
->kn_id
;
4910 kev
.qos
= kn
->kn_qos
;
4911 kev
.flags
= (kn
->kn_status
& KN_DEFERDELETE
) ?
4912 EV_DELETE
: EV_VANISHED
;
4913 kev
.flags
|= (EV_DISPATCH2
| EV_ONESHOT
);
4914 kev
.udata
= kn
->kn_udata
;
4919 int flags
= KNUSE_NONE
;
4920 /* deactivate - so new activations indicate a wakeup */
4921 knote_deactivate(kn
);
4923 /* suppress knotes to avoid returning the same event multiple times in a single call. */
4926 if (knoteuse_needs_boost(kn
, NULL
)) {
4927 flags
|= KNUSE_BOOST
;
4929 /* convert lock to a knote use reference */
4930 if (!kqlock2knoteuse(kq
, kn
, flags
))
4931 panic("dropping knote found on queue\n");
4933 /* call out to the filter to process with just a ref */
4934 result
= knote_fops(kn
)->f_process(kn
, process_data
, &kev
);
4935 if (result
) flags
|= KNUSE_STEAL_DROP
;
4938 * convert our reference back to a lock. accept drop
4939 * responsibility from others if we've committed to
4940 * delivering event data.
4942 if (!knoteuse2kqlock(kq
, kn
, flags
)) {
4950 * Determine how to dispatch the knote for future event handling.
4951 * not-fired: just return (do not callout, leave deactivated).
4952 * One-shot: If dispatch2, enter deferred-delete mode (unless this is
4953 * is the deferred delete event delivery itself). Otherwise,
4955 * stolendrop:We took responsibility for someone else's drop attempt.
4956 * treat this just like one-shot and prepare to turn it back
4957 * into a deferred delete if required.
4958 * Dispatch: don't clear state, just mark it disabled.
4959 * Cleared: just leave it deactivated.
4960 * Others: re-activate as there may be more events to handle.
4961 * This will not wake up more handlers right now, but
4962 * at the completion of handling events it may trigger
4963 * more handler threads (TODO: optimize based on more than
4964 * just this one event being detected by the filter).
4968 return (EJUSTRETURN
);
4970 if ((kev
.flags
& EV_ONESHOT
) || (kn
->kn_status
& KN_STOLENDROP
)) {
4971 if ((kn
->kn_status
& (KN_DISPATCH2
| KN_DEFERDELETE
)) == KN_DISPATCH2
) {
4972 /* defer dropping non-delete oneshot dispatch2 events */
4973 kn
->kn_status
|= KN_DEFERDELETE
;
4976 /* if we took over another's drop clear those flags here */
4977 if (kn
->kn_status
& KN_STOLENDROP
) {
4978 assert(kn
->kn_status
& KN_DROPPING
);
4980 * the knote will be dropped when the
4981 * deferred deletion occurs
4983 kn
->kn_status
&= ~(KN_DROPPING
|KN_STOLENDROP
);
4985 } else if (kn
->kn_status
& KN_STOLENDROP
) {
4986 /* We now own the drop of the knote. */
4987 assert(kn
->kn_status
& KN_DROPPING
);
4988 knote_unsuppress(kn
);
4992 } else if (kqlock2knotedrop(kq
, kn
)) {
4993 /* just EV_ONESHOT, _not_ DISPATCH2 */
4997 } else if (kn
->kn_status
& KN_DISPATCH
) {
4998 /* disable all dispatch knotes */
5000 } else if ((kev
.flags
& EV_CLEAR
) == 0) {
5001 /* re-activate in case there are more events */
5007 * callback to handle each event as we find it.
5008 * If we have to detach and drop the knote, do
5009 * it while we have the kq unlocked.
5013 error
= (callback
)(kq
, &kev
, callback_data
);
5021 * Return 0 to indicate that processing should proceed,
5022 * -1 if there is nothing to process.
5024 * Called with kqueue locked and returns the same way,
5025 * but may drop lock temporarily.
5028 kqworkq_begin_processing(struct kqworkq
*kqwq
, kq_index_t qos_index
, int flags
)
5030 struct kqrequest
*kqr
;
5031 thread_t self
= current_thread();
5032 __assert_only
struct uthread
*ut
= get_bsdthread_info(self
);
5034 assert(kqwq
->kqwq_state
& KQ_WORKQ
);
5035 assert(qos_index
< KQWQ_NQOS
);
5037 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN
) | DBG_FUNC_START
,
5040 kqwq_req_lock(kqwq
);
5042 kqr
= kqworkq_get_request(kqwq
, qos_index
);
5044 /* manager skips buckets that haven't asked for its help */
5045 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
) {
5047 /* If nothing for manager to do, just return */
5048 if ((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0) {
5049 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5051 kqwq_req_unlock(kqwq
);
5054 /* bind manager thread from this time on */
5055 kqworkq_bind_thread_impl(kqwq
, qos_index
, self
, flags
);
5058 /* We should already be bound to this kqueue */
5059 assert(kqr
->kqr_state
& KQR_BOUND
);
5060 assert(kqr
->kqr_thread
== self
);
5061 assert(ut
->uu_kqueue_bound
== (struct kqueue
*)kqwq
);
5062 assert(ut
->uu_kqueue_qos_index
== qos_index
);
5063 assert((ut
->uu_kqueue_flags
& flags
) == ut
->uu_kqueue_flags
);
5067 * we should have been requested to be here
5068 * and nobody else should still be processing
5070 assert(kqr
->kqr_state
& KQR_WAKEUP
);
5071 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
5072 assert((kqr
->kqr_state
& KQR_PROCESSING
) == 0);
5074 /* reset wakeup trigger to catch new events after we start processing */
5075 kqr
->kqr_state
&= ~KQR_WAKEUP
;
5077 /* convert to processing mode */
5078 kqr
->kqr_state
|= KQR_PROCESSING
;
5080 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5081 kqr_thread_id(kqr
), kqr
->kqr_state
);
5083 kqwq_req_unlock(kqwq
);
5088 kqworkloop_is_processing_on_current_thread(struct kqworkloop
*kqwl
)
5090 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
5094 if (kq
->kq_state
& KQ_PROCESSING
) {
5096 * KQ_PROCESSING is unset with the kqlock held, and the kqr thread is
5097 * never modified while KQ_PROCESSING is set, meaning that peeking at
5098 * its value is safe from this context.
5100 return kqwl
->kqwl_request
.kqr_thread
== current_thread();
5106 kqworkloop_acknowledge_events(struct kqworkloop
*kqwl
, boolean_t clear_ipc_override
)
5108 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
5109 struct knote
*kn
, *tmp
;
5111 kqlock_held(&kqwl
->kqwl_kqueue
);
5113 TAILQ_FOREACH_SAFE(kn
, &kqr
->kqr_suppressed
, kn_tqe
, tmp
) {
5115 * If a knote that can adjust QoS is disabled because of the automatic
5116 * behavior of EV_DISPATCH, the knotes should stay suppressed so that
5117 * further overrides keep pushing.
5119 if (knote_fops(kn
)->f_adjusts_qos
&& (kn
->kn_status
& KN_DISABLED
) &&
5120 (kn
->kn_status
& (KN_STAYACTIVE
| KN_DROPPING
)) == 0 &&
5121 (kn
->kn_flags
& (EV_DISPATCH
| EV_DISABLE
)) == EV_DISPATCH
) {
5123 * When called from unbind, clear the sync ipc override on the knote
5124 * for events which are delivered.
5126 if (clear_ipc_override
) {
5127 knote_adjust_sync_qos(kn
, THREAD_QOS_UNSPECIFIED
, FALSE
);
5131 knote_unsuppress(kn
);
5136 kqworkloop_begin_processing(struct kqworkloop
*kqwl
,
5137 __assert_only
unsigned int flags
)
5139 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
5140 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
5144 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN
) | DBG_FUNC_START
,
5145 kqwl
->kqwl_dynamicid
, flags
, 0);
5147 kqwl_req_lock(kqwl
);
5149 /* nobody else should still be processing */
5150 assert((kqr
->kqr_state
& KQR_PROCESSING
) == 0);
5151 assert((kq
->kq_state
& KQ_PROCESSING
) == 0);
5153 kqr
->kqr_state
|= KQR_PROCESSING
| KQR_R2K_NOTIF_ARMED
;
5154 kq
->kq_state
|= KQ_PROCESSING
;
5156 kqwl_req_unlock(kqwl
);
5158 kqworkloop_acknowledge_events(kqwl
, FALSE
);
5160 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN
) | DBG_FUNC_END
,
5161 kqwl
->kqwl_dynamicid
, flags
, 0);
5167 * Return 0 to indicate that processing should proceed,
5168 * -1 if there is nothing to process.
5170 * Called with kqueue locked and returns the same way,
5171 * but may drop lock temporarily.
5175 kqueue_begin_processing(struct kqueue
*kq
, kq_index_t qos_index
, unsigned int flags
)
5177 struct kqtailq
*suppressq
;
5181 if (kq
->kq_state
& KQ_WORKQ
) {
5182 return kqworkq_begin_processing((struct kqworkq
*)kq
, qos_index
, flags
);
5183 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
5184 return kqworkloop_begin_processing((struct kqworkloop
*)kq
, flags
);
5187 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_START
,
5188 VM_KERNEL_UNSLIDE_OR_PERM(kq
), flags
);
5190 assert(qos_index
== QOS_INDEX_KQFILE
);
5192 /* wait to become the exclusive processing thread */
5194 if (kq
->kq_state
& KQ_DRAIN
) {
5195 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5196 VM_KERNEL_UNSLIDE_OR_PERM(kq
), 2);
5200 if ((kq
->kq_state
& KQ_PROCESSING
) == 0)
5203 /* if someone else is processing the queue, wait */
5204 kq
->kq_state
|= KQ_PROCWAIT
;
5205 suppressq
= kqueue_get_suppressed_queue(kq
, qos_index
);
5206 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
5207 CAST_EVENT64_T(suppressq
),
5208 THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
);
5211 thread_block(THREAD_CONTINUE_NULL
);
5215 /* Nobody else processing */
5217 /* clear pre-posts and KQ_WAKEUP now, in case we bail early */
5218 waitq_set_clear_preposts(&kq
->kq_wqs
);
5219 kq
->kq_state
&= ~KQ_WAKEUP
;
5221 /* anything left to process? */
5222 if (kqueue_queue_empty(kq
, qos_index
)) {
5223 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5224 VM_KERNEL_UNSLIDE_OR_PERM(kq
), 1);
5228 /* convert to processing mode */
5229 kq
->kq_state
|= KQ_PROCESSING
;
5231 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN
) | DBG_FUNC_END
,
5232 VM_KERNEL_UNSLIDE_OR_PERM(kq
));
5238 * kqworkq_end_processing - Complete the processing of a workq kqueue
5240 * We may have to request new threads.
5241 * This can happen there are no waiting processing threads and:
5242 * - there were active events we never got to (count > 0)
5243 * - we pended waitq hook callouts during processing
5244 * - we pended wakeups while processing (or unsuppressing)
5246 * Called with kqueue lock held.
5249 kqworkq_end_processing(struct kqworkq
*kqwq
, kq_index_t qos_index
, int flags
)
5251 #pragma unused(flags)
5253 struct kqueue
*kq
= &kqwq
->kqwq_kqueue
;
5254 struct kqtailq
*suppressq
= kqueue_get_suppressed_queue(kq
, qos_index
);
5256 thread_t self
= current_thread();
5257 struct uthread
*ut
= get_bsdthread_info(self
);
5259 struct kqrequest
*kqr
;
5262 assert(kqwq
->kqwq_state
& KQ_WORKQ
);
5263 assert(qos_index
< KQWQ_NQOS
);
5265 /* Are we really bound to this kqueue? */
5266 if (ut
->uu_kqueue_bound
!= kq
) {
5267 assert(ut
->uu_kqueue_bound
== kq
);
5271 kqr
= kqworkq_get_request(kqwq
, qos_index
);
5273 kqwq_req_lock(kqwq
);
5275 /* Do we claim to be manager? */
5276 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
) {
5278 /* bail if not bound that way */
5279 if (ut
->uu_kqueue_qos_index
!= KQWQ_QOS_MANAGER
||
5280 (ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
) == 0) {
5281 assert(ut
->uu_kqueue_qos_index
== KQWQ_QOS_MANAGER
);
5282 assert(ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
);
5283 kqwq_req_unlock(kqwq
);
5287 /* bail if this request wasn't already getting manager help */
5288 if ((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0 ||
5289 (kqr
->kqr_state
& KQR_PROCESSING
) == 0) {
5290 kqwq_req_unlock(kqwq
);
5294 if (ut
->uu_kqueue_qos_index
!= qos_index
||
5295 (ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
)) {
5296 assert(ut
->uu_kqueue_qos_index
== qos_index
);
5297 assert((ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
) == 0);
5298 kqwq_req_unlock(kqwq
);
5303 assert(kqr
->kqr_state
& KQR_BOUND
);
5304 thread
= kqr
->kqr_thread
;
5305 assert(thread
== self
);
5307 assert(kqr
->kqr_state
& KQR_PROCESSING
);
5309 /* If we didn't drain the whole queue, re-mark a wakeup being needed */
5310 if (!kqueue_queue_empty(kq
, qos_index
))
5311 kqr
->kqr_state
|= KQR_WAKEUP
;
5313 kqwq_req_unlock(kqwq
);
5316 * Return suppressed knotes to their original state.
5317 * For workq kqueues, suppressed ones that are still
5318 * truly active (not just forced into the queue) will
5319 * set flags we check below to see if anything got
5322 while ((kn
= TAILQ_FIRST(suppressq
)) != NULL
) {
5323 assert(kn
->kn_status
& KN_SUPPRESSED
);
5324 knote_unsuppress(kn
);
5327 kqwq_req_lock(kqwq
);
5329 /* Indicate that we are done processing this request */
5330 kqr
->kqr_state
&= ~KQR_PROCESSING
;
5333 * Drop our association with this one request and its
5336 kqworkq_unbind_thread(kqwq
, qos_index
, thread
, flags
);
5339 * request a new thread if we didn't process the whole
5340 * queue or real events have happened (not just putting
5341 * stay-active events back).
5343 if (kqr
->kqr_state
& KQR_WAKEUP
) {
5344 if (kqueue_queue_empty(kq
, qos_index
)) {
5345 kqr
->kqr_state
&= ~KQR_WAKEUP
;
5347 kqworkq_request_thread(kqwq
, qos_index
);
5350 kqwq_req_unlock(kqwq
);
5354 kqworkloop_end_processing(struct kqworkloop
*kqwl
, int nevents
,
5357 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
5358 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
5362 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END
) | DBG_FUNC_START
,
5363 kqwl
->kqwl_dynamicid
, flags
, 0);
5365 if ((kq
->kq_state
& KQ_NO_WQ_THREAD
) && nevents
== 0 &&
5366 (flags
& KEVENT_FLAG_IMMEDIATE
) == 0) {
5368 * <rdar://problem/31634014> We may soon block, but have returned no
5369 * kevents that need to be kept supressed for overriding purposes.
5371 * It is hence safe to acknowledge events and unsuppress everything, so
5372 * that if we block we can observe all events firing.
5374 kqworkloop_acknowledge_events(kqwl
, TRUE
);
5377 kqwl_req_lock(kqwl
);
5379 assert(kqr
->kqr_state
& KQR_PROCESSING
);
5380 assert(kq
->kq_state
& KQ_PROCESSING
);
5382 kq
->kq_state
&= ~KQ_PROCESSING
;
5383 kqr
->kqr_state
&= ~KQR_PROCESSING
;
5384 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
, 0);
5386 kqwl_req_unlock(kqwl
);
5388 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END
) | DBG_FUNC_END
,
5389 kqwl
->kqwl_dynamicid
, flags
, 0);
5393 * Called with kqueue lock held.
5396 kqueue_end_processing(struct kqueue
*kq
, kq_index_t qos_index
,
5397 int nevents
, unsigned int flags
)
5400 struct kqtailq
*suppressq
;
5405 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
5407 if (kq
->kq_state
& KQ_WORKLOOP
) {
5408 return kqworkloop_end_processing((struct kqworkloop
*)kq
, nevents
, flags
);
5411 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_END
),
5412 VM_KERNEL_UNSLIDE_OR_PERM(kq
), flags
);
5414 assert(qos_index
== QOS_INDEX_KQFILE
);
5417 * Return suppressed knotes to their original state.
5419 suppressq
= kqueue_get_suppressed_queue(kq
, qos_index
);
5420 while ((kn
= TAILQ_FIRST(suppressq
)) != NULL
) {
5421 assert(kn
->kn_status
& KN_SUPPRESSED
);
5422 knote_unsuppress(kn
);
5425 procwait
= (kq
->kq_state
& KQ_PROCWAIT
);
5426 kq
->kq_state
&= ~(KQ_PROCESSING
| KQ_PROCWAIT
);
5429 /* first wake up any thread already waiting to process */
5430 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
5431 CAST_EVENT64_T(suppressq
),
5433 WAITQ_ALL_PRIORITIES
);
5438 * kqwq_internal_bind - bind thread to processing workq kqueue
5440 * Determines if the provided thread will be responsible for
5441 * servicing the particular QoS class index specified in the
5442 * parameters. Once the binding is done, any overrides that may
5443 * be associated with the cooresponding events can be applied.
5445 * This should be called as soon as the thread identity is known,
5446 * preferably while still at high priority during creation.
5448 * - caller holds a reference on the process (and workq kq)
5449 * - the thread MUST call kevent_qos_internal after being bound
5450 * or the bucket of events may never be delivered.
5452 * (unless this is a synchronous bind, then the request is locked)
5455 kqworkq_internal_bind(
5457 kq_index_t qos_index
,
5462 struct kqworkq
*kqwq
;
5463 struct kqrequest
*kqr
;
5464 struct uthread
*ut
= get_bsdthread_info(thread
);
5466 /* If no process workq, can't be our thread. */
5467 kq
= p
->p_fd
->fd_wqkqueue
;
5472 assert(kq
->kq_state
& KQ_WORKQ
);
5473 kqwq
= (struct kqworkq
*)kq
;
5476 * No need to bind the manager thread to any specific
5477 * bucket, but still claim the thread.
5479 if (qos_index
== KQWQ_QOS_MANAGER
) {
5480 assert(ut
->uu_kqueue_bound
== NULL
);
5481 assert(flags
& KEVENT_FLAG_WORKQ_MANAGER
);
5482 ut
->uu_kqueue_bound
= kq
;
5483 ut
->uu_kqueue_qos_index
= qos_index
;
5484 ut
->uu_kqueue_flags
= flags
;
5486 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_BIND
),
5487 thread_tid(thread
), flags
, qos_index
);
5493 * If this is a synchronous bind callback, the request
5494 * lock is already held, so just do the bind.
5496 if (flags
& KEVENT_FLAG_SYNCHRONOUS_BIND
) {
5497 kqwq_req_held(kqwq
);
5498 /* strip out synchronout bind flag */
5499 flags
&= ~KEVENT_FLAG_SYNCHRONOUS_BIND
;
5500 kqworkq_bind_thread_impl(kqwq
, qos_index
, thread
, flags
);
5505 * check the request that corresponds to our qos_index
5506 * to see if there is an outstanding request.
5508 kqr
= kqworkq_get_request(kqwq
, qos_index
);
5509 assert(kqr
->kqr_qos_index
== qos_index
);
5510 kqwq_req_lock(kqwq
);
5512 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_BIND
),
5513 thread_tid(thread
), flags
, qos_index
, kqr
->kqr_state
);
5515 if ((kqr
->kqr_state
& KQR_THREQUESTED
) &&
5516 (kqr
->kqr_state
& KQR_PROCESSING
) == 0) {
5518 if ((kqr
->kqr_state
& KQR_BOUND
) &&
5519 thread
== kqr
->kqr_thread
) {
5520 /* duplicate bind - claim the thread */
5521 assert(ut
->uu_kqueue_bound
== kq
);
5522 assert(ut
->uu_kqueue_qos_index
== qos_index
);
5523 kqwq_req_unlock(kqwq
);
5526 if ((kqr
->kqr_state
& (KQR_BOUND
| KQWQ_THMANAGER
)) == 0) {
5527 /* ours to bind to */
5528 kqworkq_bind_thread_impl(kqwq
, qos_index
, thread
, flags
);
5529 kqwq_req_unlock(kqwq
);
5533 kqwq_req_unlock(kqwq
);
5538 kqworkloop_bind_thread_impl(struct kqworkloop
*kqwl
,
5540 __assert_only
unsigned int flags
)
5542 assert(flags
& KEVENT_FLAG_WORKLOOP
);
5544 /* the request object must be locked */
5545 kqwl_req_held(kqwl
);
5547 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
5548 struct uthread
*ut
= get_bsdthread_info(thread
);
5549 boolean_t ipc_override_is_sync
;
5550 kq_index_t qos_index
= kqworkloop_combined_qos(kqwl
, &ipc_override_is_sync
);
5552 /* nobody else bound so finally bind (as a workloop) */
5553 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
5554 assert((kqr
->kqr_state
& (KQR_BOUND
| KQR_PROCESSING
)) == 0);
5555 assert(thread
!= kqwl
->kqwl_owner
);
5557 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_BIND
),
5558 kqwl
->kqwl_dynamicid
, (uintptr_t)thread_tid(thread
),
5560 (uintptr_t)(((uintptr_t)kqr
->kqr_override_index
<< 16) |
5561 (((uintptr_t)kqr
->kqr_state
) << 8) |
5562 ((uintptr_t)ipc_override_is_sync
)));
5564 kqr
->kqr_state
|= KQR_BOUND
| KQR_R2K_NOTIF_ARMED
;
5565 kqr
->kqr_thread
= thread
;
5567 /* bind the workloop to the uthread */
5568 ut
->uu_kqueue_bound
= (struct kqueue
*)kqwl
;
5569 ut
->uu_kqueue_flags
= flags
;
5570 ut
->uu_kqueue_qos_index
= qos_index
;
5571 assert(ut
->uu_kqueue_override_is_sync
== 0);
5572 ut
->uu_kqueue_override_is_sync
= ipc_override_is_sync
;
5574 thread_add_ipc_override(thread
, qos_index
);
5576 if (ipc_override_is_sync
) {
5577 thread_add_sync_ipc_override(thread
);
5582 * workloop_fulfill_threadreq - bind thread to processing workloop
5584 * The provided thread will be responsible for delivering events
5585 * associated with the given kqrequest. Bind it and get ready for
5586 * the thread to eventually arrive.
5588 * If WORKLOOP_FULFILL_THREADREQ_SYNC is specified, the callback
5589 * within the context of the pthread_functions->workq_threadreq
5590 * callout. In this case, the request structure is already locked.
5593 workloop_fulfill_threadreq(struct proc
*p
,
5594 workq_threadreq_t req
,
5598 int sync
= (flags
& WORKLOOP_FULFILL_THREADREQ_SYNC
);
5599 int cancel
= (flags
& WORKLOOP_FULFILL_THREADREQ_CANCEL
);
5600 struct kqrequest
*kqr
;
5601 struct kqworkloop
*kqwl
;
5603 kqwl
= (struct kqworkloop
*)((uintptr_t)req
-
5604 offsetof(struct kqworkloop
, kqwl_request
) -
5605 offsetof(struct kqrequest
, kqr_req
));
5606 kqr
= &kqwl
->kqwl_request
;
5608 /* validate we're looking at something valid */
5609 if (kqwl
->kqwl_p
!= p
||
5610 (kqwl
->kqwl_state
& KQ_WORKLOOP
) == 0) {
5611 assert(kqwl
->kqwl_p
== p
);
5612 assert(kqwl
->kqwl_state
& KQ_WORKLOOP
);
5617 kqwl_req_lock(kqwl
);
5619 /* Should be a pending request */
5620 if ((kqr
->kqr_state
& KQR_BOUND
) ||
5621 (kqr
->kqr_state
& KQR_THREQUESTED
) == 0) {
5623 assert((kqr
->kqr_state
& KQR_BOUND
) == 0);
5624 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
5626 kqwl_req_unlock(kqwl
);
5630 assert((kqr
->kqr_state
& KQR_DRAIN
) == 0);
5633 * Is it a cancel indication from pthread.
5634 * If so, we must be exiting/exec'ing. Forget
5635 * our pending request.
5638 kqr
->kqr_state
&= ~KQR_THREQUESTED
;
5639 kqr
->kqr_state
|= KQR_DRAIN
;
5641 /* do the actual bind? */
5642 kqworkloop_bind_thread_impl(kqwl
, thread
, KEVENT_FLAG_WORKLOOP
);
5646 kqwl_req_unlock(kqwl
);
5649 kqueue_release_last(p
, &kqwl
->kqwl_kqueue
); /* may dealloc kq */
5656 * kevent_qos_internal_bind - bind thread to processing kqueue
5658 * Indicates that the provided thread will be responsible for
5659 * servicing the particular QoS class index specified in the
5660 * parameters. Once the binding is done, any overrides that may
5661 * be associated with the cooresponding events can be applied.
5663 * This should be called as soon as the thread identity is known,
5664 * preferably while still at high priority during creation.
5666 * - caller holds a reference on the kqueue.
5667 * - the thread MUST call kevent_qos_internal after being bound
5668 * or the bucket of events may never be delivered.
5669 * - Nothing locked (may take mutex or block).
5673 kevent_qos_internal_bind(
5679 kq_index_t qos_index
;
5681 assert(flags
& KEVENT_FLAG_WORKQ
);
5683 if (thread
== THREAD_NULL
|| (flags
& KEVENT_FLAG_WORKQ
) == 0) {
5687 /* get the qos index we're going to service */
5688 qos_index
= qos_index_for_servicer(qos_class
, thread
, flags
);
5690 if (kqworkq_internal_bind(p
, qos_index
, thread
, flags
))
5698 kqworkloop_internal_unbind(
5704 struct kqworkloop
*kqwl
;
5705 struct uthread
*ut
= get_bsdthread_info(thread
);
5707 assert(ut
->uu_kqueue_bound
!= NULL
);
5708 kq
= ut
->uu_kqueue_bound
;
5709 assert(kq
->kq_state
& KQ_WORKLOOP
);
5710 kqwl
= (struct kqworkloop
*)kq
;
5712 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_UNBIND
),
5713 kqwl
->kqwl_dynamicid
, (uintptr_t)thread_tid(thread
),
5716 if (!(kq
->kq_state
& KQ_NO_WQ_THREAD
)) {
5717 assert(is_workqueue_thread(thread
));
5720 kqworkloop_unbind_thread(kqwl
, thread
, flags
);
5723 /* If last reference, dealloc the workloop kq */
5724 kqueue_release_last(p
, kq
);
5726 assert(!is_workqueue_thread(thread
));
5727 kevent_servicer_detach_thread(p
, kqwl
->kqwl_dynamicid
, thread
, flags
, kq
);
5732 kqworkq_internal_unbind(
5734 kq_index_t qos_index
,
5739 struct kqworkq
*kqwq
;
5741 kq_index_t end_index
;
5743 assert(thread
== current_thread());
5744 ut
= get_bsdthread_info(thread
);
5746 kq
= p
->p_fd
->fd_wqkqueue
;
5747 assert(kq
->kq_state
& KQ_WORKQ
);
5748 assert(ut
->uu_kqueue_bound
== kq
);
5750 kqwq
= (struct kqworkq
*)kq
;
5752 /* end servicing any requests we might own */
5753 end_index
= (qos_index
== KQWQ_QOS_MANAGER
) ?
5757 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_UNBIND
),
5758 (uintptr_t)thread_tid(thread
), flags
, qos_index
);
5761 kqworkq_end_processing(kqwq
, qos_index
, flags
);
5762 } while (qos_index
-- > end_index
);
5764 ut
->uu_kqueue_bound
= NULL
;
5765 ut
->uu_kqueue_qos_index
= 0;
5766 ut
->uu_kqueue_flags
= 0;
5772 * kevent_qos_internal_unbind - unbind thread from processing kqueue
5774 * End processing the per-QoS bucket of events and allow other threads
5775 * to be requested for future servicing.
5777 * caller holds a reference on the kqueue.
5778 * thread is the current thread.
5782 kevent_qos_internal_unbind(
5788 #pragma unused(qos_class)
5792 unsigned int bound_flags
;
5795 ut
= get_bsdthread_info(thread
);
5796 if (ut
->uu_kqueue_bound
== NULL
) {
5797 /* early out if we are already unbound */
5798 assert(ut
->uu_kqueue_flags
== 0);
5799 assert(ut
->uu_kqueue_qos_index
== 0);
5800 assert(ut
->uu_kqueue_override_is_sync
== 0);
5804 assert(flags
& (KEVENT_FLAG_WORKQ
| KEVENT_FLAG_WORKLOOP
));
5805 assert(thread
== current_thread());
5807 check_flags
= flags
& KEVENT_FLAG_UNBIND_CHECK_FLAGS
;
5809 /* Get the kqueue we started with */
5810 kq
= ut
->uu_kqueue_bound
;
5812 assert(kq
->kq_state
& (KQ_WORKQ
| KQ_WORKLOOP
));
5814 /* get flags and QoS parameters we started with */
5815 bound_flags
= ut
->uu_kqueue_flags
;
5817 /* Unbind from the class of workq */
5818 if (kq
->kq_state
& KQ_WORKQ
) {
5819 if (check_flags
&& !(flags
& KEVENT_FLAG_WORKQ
)) {
5823 kqworkq_internal_unbind(p
, ut
->uu_kqueue_qos_index
, thread
, bound_flags
);
5825 if (check_flags
&& !(flags
& KEVENT_FLAG_WORKLOOP
)) {
5829 kqworkloop_internal_unbind(p
, thread
, bound_flags
);
5836 * kqueue_process - process the triggered events in a kqueue
5838 * Walk the queued knotes and validate that they are
5839 * really still triggered events by calling the filter
5840 * routines (if necessary). Hold a use reference on
5841 * the knote to avoid it being detached. For each event
5842 * that is still considered triggered, invoke the
5843 * callback routine provided.
5845 * caller holds a reference on the kqueue.
5846 * kqueue locked on entry and exit - but may be dropped
5847 * kqueue list locked (held for duration of call)
5851 kqueue_process(struct kqueue
*kq
,
5852 kevent_callback_t callback
,
5853 void *callback_data
,
5854 struct filt_process_s
*process_data
,
5858 unsigned int flags
= process_data
? process_data
->fp_flags
: 0;
5859 struct uthread
*ut
= get_bsdthread_info(current_thread());
5860 kq_index_t start_index
, end_index
, i
;
5866 * Based on the mode of the kqueue and the bound QoS of the servicer,
5867 * determine the range of thread requests that need checking
5869 if (kq
->kq_state
& KQ_WORKQ
) {
5870 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
) {
5871 start_index
= KQWQ_QOS_MANAGER
;
5872 } else if (ut
->uu_kqueue_bound
!= kq
) {
5875 start_index
= ut
->uu_kqueue_qos_index
;
5878 /* manager services every request in a workq kqueue */
5879 assert(start_index
> 0 && start_index
<= KQWQ_QOS_MANAGER
);
5880 end_index
= (start_index
== KQWQ_QOS_MANAGER
) ? 0 : start_index
;
5882 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
5883 if (ut
->uu_kqueue_bound
!= kq
)
5887 * Single request servicing
5888 * we want to deliver all events, regardless of the QOS
5890 start_index
= end_index
= THREAD_QOS_UNSPECIFIED
;
5892 start_index
= end_index
= QOS_INDEX_KQFILE
;
5898 if (kqueue_begin_processing(kq
, i
, flags
) == -1) {
5900 /* Nothing to process */
5905 * loop through the enqueued knotes associated with this request,
5906 * processing each one. Each request may have several queues
5907 * of knotes to process (depending on the type of kqueue) so we
5908 * have to loop through all the queues as long as we have additional
5913 struct kqtailq
*base_queue
= kqueue_get_base_queue(kq
, i
);
5914 struct kqtailq
*queue
= kqueue_get_high_queue(kq
, i
);
5916 while (error
== 0 && (kn
= TAILQ_FIRST(queue
)) != NULL
) {
5917 error
= knote_process(kn
, callback
, callback_data
, process_data
, p
);
5918 if (error
== EJUSTRETURN
) {
5923 /* error is EWOULDBLOCK when the out event array is full */
5925 } while (error
== 0 && queue
-- > base_queue
);
5927 if ((kq
->kq_state
& KQ_WORKQ
) == 0) {
5928 kqueue_end_processing(kq
, i
, nevents
, flags
);
5931 if (error
== EWOULDBLOCK
) {
5932 /* break out if no more space for additional events */
5936 } while (i
-- > end_index
);
5943 kqueue_scan_continue(void *data
, wait_result_t wait_result
)
5945 thread_t self
= current_thread();
5946 uthread_t ut
= (uthread_t
)get_bsdthread_info(self
);
5947 struct _kqueue_scan
* cont_args
= &ut
->uu_kevent
.ss_kqueue_scan
;
5948 struct kqueue
*kq
= (struct kqueue
*)data
;
5949 struct filt_process_s
*process_data
= cont_args
->process_data
;
5953 /* convert the (previous) wait_result to a proper error */
5954 switch (wait_result
) {
5955 case THREAD_AWAKENED
: {
5958 error
= kqueue_process(kq
, cont_args
->call
, cont_args
->data
,
5959 process_data
, &count
, current_proc());
5960 if (error
== 0 && count
== 0) {
5961 if (kq
->kq_state
& KQ_DRAIN
) {
5966 if (kq
->kq_state
& KQ_WAKEUP
)
5969 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
5970 KQ_EVENT
, THREAD_ABORTSAFE
,
5971 cont_args
->deadline
);
5972 kq
->kq_state
|= KQ_SLEEP
;
5974 thread_block_parameter(kqueue_scan_continue
, kq
);
5979 case THREAD_TIMED_OUT
:
5980 error
= EWOULDBLOCK
;
5982 case THREAD_INTERRUPTED
:
5985 case THREAD_RESTART
:
5990 panic("%s: - invalid wait_result (%d)", __func__
,
5995 /* call the continuation with the results */
5996 assert(cont_args
->cont
!= NULL
);
5997 (cont_args
->cont
)(kq
, cont_args
->data
, error
);
6002 * kqueue_scan - scan and wait for events in a kqueue
6004 * Process the triggered events in a kqueue.
6006 * If there are no events triggered arrange to
6007 * wait for them. If the caller provided a
6008 * continuation routine, then kevent_scan will
6011 * The callback routine must be valid.
6012 * The caller must hold a use-count reference on the kq.
6016 kqueue_scan(struct kqueue
*kq
,
6017 kevent_callback_t callback
,
6018 kqueue_continue_t continuation
,
6019 void *callback_data
,
6020 struct filt_process_s
*process_data
,
6021 struct timeval
*atvp
,
6024 thread_continue_t cont
= THREAD_CONTINUE_NULL
;
6031 assert(callback
!= NULL
);
6034 * Determine which QoS index we are servicing
6036 flags
= (process_data
) ? process_data
->fp_flags
: 0;
6037 fd
= (process_data
) ? process_data
->fp_fd
: -1;
6041 wait_result_t wait_result
;
6045 * Make a pass through the kq to find events already
6049 error
= kqueue_process(kq
, callback
, callback_data
,
6050 process_data
, &count
, p
);
6052 break; /* lock still held */
6054 /* looks like we have to consider blocking */
6057 /* convert the timeout to a deadline once */
6058 if (atvp
->tv_sec
|| atvp
->tv_usec
) {
6061 clock_get_uptime(&now
);
6062 nanoseconds_to_absolutetime((uint64_t)atvp
->tv_sec
* NSEC_PER_SEC
+
6063 atvp
->tv_usec
* (long)NSEC_PER_USEC
,
6065 if (now
>= deadline
) {
6066 /* non-blocking call */
6067 error
= EWOULDBLOCK
;
6068 break; /* lock still held */
6071 clock_absolutetime_interval_to_deadline(deadline
, &deadline
);
6073 deadline
= 0; /* block forever */
6077 uthread_t ut
= (uthread_t
)get_bsdthread_info(current_thread());
6078 struct _kqueue_scan
*cont_args
= &ut
->uu_kevent
.ss_kqueue_scan
;
6080 cont_args
->call
= callback
;
6081 cont_args
->cont
= continuation
;
6082 cont_args
->deadline
= deadline
;
6083 cont_args
->data
= callback_data
;
6084 cont_args
->process_data
= process_data
;
6085 cont
= kqueue_scan_continue
;
6089 if (kq
->kq_state
& KQ_DRAIN
) {
6094 /* If awakened during processing, try again */
6095 if (kq
->kq_state
& KQ_WAKEUP
) {
6100 /* go ahead and wait */
6101 waitq_assert_wait64_leeway((struct waitq
*)&kq
->kq_wqs
,
6102 KQ_EVENT
, THREAD_ABORTSAFE
,
6103 TIMEOUT_URGENCY_USER_NORMAL
,
6104 deadline
, TIMEOUT_NO_LEEWAY
);
6105 kq
->kq_state
|= KQ_SLEEP
;
6107 wait_result
= thread_block_parameter(cont
, kq
);
6108 /* NOTREACHED if (continuation != NULL) */
6110 switch (wait_result
) {
6111 case THREAD_AWAKENED
:
6113 case THREAD_TIMED_OUT
:
6115 case THREAD_INTERRUPTED
:
6117 case THREAD_RESTART
:
6120 panic("%s: - bad wait_result (%d)", __func__
,
6132 * This could be expanded to call kqueue_scan, if desired.
6136 kqueue_read(__unused
struct fileproc
*fp
,
6137 __unused
struct uio
*uio
,
6139 __unused vfs_context_t ctx
)
6146 kqueue_write(__unused
struct fileproc
*fp
,
6147 __unused
struct uio
*uio
,
6149 __unused vfs_context_t ctx
)
6156 kqueue_ioctl(__unused
struct fileproc
*fp
,
6157 __unused u_long com
,
6158 __unused caddr_t data
,
6159 __unused vfs_context_t ctx
)
6166 kqueue_select(struct fileproc
*fp
, int which
, void *wq_link_id
,
6167 __unused vfs_context_t ctx
)
6169 struct kqueue
*kq
= (struct kqueue
*)fp
->f_data
;
6170 struct kqtailq
*queue
;
6171 struct kqtailq
*suppressq
;
6180 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
6183 * If this is the first pass, link the wait queue associated with the
6184 * the kqueue onto the wait queue set for the select(). Normally we
6185 * use selrecord() for this, but it uses the wait queue within the
6186 * selinfo structure and we need to use the main one for the kqueue to
6187 * catch events from KN_STAYQUEUED sources. So we do the linkage manually.
6188 * (The select() call will unlink them when it ends).
6190 if (wq_link_id
!= NULL
) {
6191 thread_t cur_act
= current_thread();
6192 struct uthread
* ut
= get_bsdthread_info(cur_act
);
6194 kq
->kq_state
|= KQ_SEL
;
6195 waitq_link((struct waitq
*)&kq
->kq_wqs
, ut
->uu_wqset
,
6196 WAITQ_SHOULD_LOCK
, (uint64_t *)wq_link_id
);
6198 /* always consume the reserved link object */
6199 waitq_link_release(*(uint64_t *)wq_link_id
);
6200 *(uint64_t *)wq_link_id
= 0;
6203 * selprocess() is expecting that we send it back the waitq
6204 * that was just added to the thread's waitq set. In order
6205 * to not change the selrecord() API (which is exported to
6206 * kexts), we pass this value back through the
6207 * void *wq_link_id pointer we were passed. We need to use
6208 * memcpy here because the pointer may not be properly aligned
6209 * on 32-bit systems.
6211 void *wqptr
= &kq
->kq_wqs
;
6212 memcpy(wq_link_id
, (void *)&wqptr
, sizeof(void *));
6215 if (kqueue_begin_processing(kq
, QOS_INDEX_KQFILE
, 0) == -1) {
6220 queue
= kqueue_get_base_queue(kq
, QOS_INDEX_KQFILE
);
6221 if (!TAILQ_EMPTY(queue
)) {
6223 * there is something queued - but it might be a
6224 * KN_STAYACTIVE knote, which may or may not have
6225 * any events pending. Otherwise, we have to walk
6226 * the list of knotes to see, and peek at the
6227 * (non-vanished) stay-active ones to be really sure.
6229 while ((kn
= (struct knote
*)TAILQ_FIRST(queue
)) != NULL
) {
6230 if (kn
->kn_status
& KN_ACTIVE
) {
6234 assert(kn
->kn_status
& KN_STAYACTIVE
);
6239 * There were no regular events on the queue, so take
6240 * a deeper look at the stay-queued ones we suppressed.
6242 suppressq
= kqueue_get_suppressed_queue(kq
, QOS_INDEX_KQFILE
);
6243 while ((kn
= (struct knote
*)TAILQ_FIRST(suppressq
)) != NULL
) {
6246 assert(!knoteuse_needs_boost(kn
, NULL
));
6248 /* If didn't vanish while suppressed - peek at it */
6249 if (kqlock2knoteuse(kq
, kn
, KNUSE_NONE
)) {
6250 peek
= knote_fops(kn
)->f_peek(kn
);
6252 /* if it dropped while getting lock - move on */
6253 if (!knoteuse2kqlock(kq
, kn
, KNUSE_NONE
))
6258 knote_unsuppress(kn
);
6260 /* has data or it has to report a vanish */
6269 kqueue_end_processing(kq
, QOS_INDEX_KQFILE
, retnum
, 0);
6279 kqueue_close(struct fileglob
*fg
, __unused vfs_context_t ctx
)
6281 struct kqfile
*kqf
= (struct kqfile
*)fg
->fg_data
;
6283 assert((kqf
->kqf_state
& KQ_WORKQ
) == 0);
6284 kqueue_dealloc(&kqf
->kqf_kqueue
);
6291 * The callers has taken a use-count reference on this kqueue and will donate it
6292 * to the kqueue we are being added to. This keeps the kqueue from closing until
6293 * that relationship is torn down.
6296 kqueue_kqfilter(__unused
struct fileproc
*fp
, struct knote
*kn
,
6297 __unused
struct kevent_internal_s
*kev
, __unused vfs_context_t ctx
)
6299 struct kqfile
*kqf
= (struct kqfile
*)kn
->kn_fp
->f_data
;
6300 struct kqueue
*kq
= &kqf
->kqf_kqueue
;
6301 struct kqueue
*parentkq
= knote_get_kq(kn
);
6303 assert((kqf
->kqf_state
& KQ_WORKQ
) == 0);
6305 if (parentkq
== kq
||
6306 kn
->kn_filter
!= EVFILT_READ
) {
6307 kn
->kn_flags
= EV_ERROR
;
6308 kn
->kn_data
= EINVAL
;
6313 * We have to avoid creating a cycle when nesting kqueues
6314 * inside another. Rather than trying to walk the whole
6315 * potential DAG of nested kqueues, we just use a simple
6316 * ceiling protocol. When a kqueue is inserted into another,
6317 * we check that the (future) parent is not already nested
6318 * into another kqueue at a lower level than the potenial
6319 * child (because it could indicate a cycle). If that test
6320 * passes, we just mark the nesting levels accordingly.
6324 if (parentkq
->kq_level
> 0 &&
6325 parentkq
->kq_level
< kq
->kq_level
)
6328 kn
->kn_flags
= EV_ERROR
;
6329 kn
->kn_data
= EINVAL
;
6332 /* set parent level appropriately */
6333 if (parentkq
->kq_level
== 0)
6334 parentkq
->kq_level
= 2;
6335 if (parentkq
->kq_level
< kq
->kq_level
+ 1)
6336 parentkq
->kq_level
= kq
->kq_level
+ 1;
6339 kn
->kn_filtid
= EVFILTID_KQREAD
;
6341 KNOTE_ATTACH(&kqf
->kqf_sel
.si_note
, kn
);
6342 /* indicate nesting in child, if needed */
6343 if (kq
->kq_level
== 0)
6346 int count
= kq
->kq_count
;
6353 * kqueue_drain - called when kq is closed
6357 kqueue_drain(struct fileproc
*fp
, __unused vfs_context_t ctx
)
6359 struct kqueue
*kq
= (struct kqueue
*)fp
->f_fglob
->fg_data
;
6361 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
6364 kq
->kq_state
|= KQ_DRAIN
;
6365 kqueue_interrupt(kq
);
6372 kqueue_stat(struct kqueue
*kq
, void *ub
, int isstat64
, proc_t p
)
6374 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
6377 if (isstat64
!= 0) {
6378 struct stat64
*sb64
= (struct stat64
*)ub
;
6380 bzero((void *)sb64
, sizeof(*sb64
));
6381 sb64
->st_size
= kq
->kq_count
;
6382 if (kq
->kq_state
& KQ_KEV_QOS
)
6383 sb64
->st_blksize
= sizeof(struct kevent_qos_s
);
6384 else if (kq
->kq_state
& KQ_KEV64
)
6385 sb64
->st_blksize
= sizeof(struct kevent64_s
);
6386 else if (IS_64BIT_PROCESS(p
))
6387 sb64
->st_blksize
= sizeof(struct user64_kevent
);
6389 sb64
->st_blksize
= sizeof(struct user32_kevent
);
6390 sb64
->st_mode
= S_IFIFO
;
6392 struct stat
*sb
= (struct stat
*)ub
;
6394 bzero((void *)sb
, sizeof(*sb
));
6395 sb
->st_size
= kq
->kq_count
;
6396 if (kq
->kq_state
& KQ_KEV_QOS
)
6397 sb
->st_blksize
= sizeof(struct kevent_qos_s
);
6398 else if (kq
->kq_state
& KQ_KEV64
)
6399 sb
->st_blksize
= sizeof(struct kevent64_s
);
6400 else if (IS_64BIT_PROCESS(p
))
6401 sb
->st_blksize
= sizeof(struct user64_kevent
);
6403 sb
->st_blksize
= sizeof(struct user32_kevent
);
6404 sb
->st_mode
= S_IFIFO
;
6411 * Interact with the pthread kext to request a servicing there.
6412 * Eventually, this will request threads at specific QoS levels.
6413 * For now, it only requests a dispatch-manager-QoS thread, and
6414 * only one-at-a-time.
6416 * - Caller holds the workq request lock
6418 * - May be called with the kqueue's wait queue set locked,
6419 * so cannot do anything that could recurse on that.
6422 kqworkq_request_thread(
6423 struct kqworkq
*kqwq
,
6424 kq_index_t qos_index
)
6426 struct kqrequest
*kqr
;
6428 assert(kqwq
->kqwq_state
& KQ_WORKQ
);
6429 assert(qos_index
< KQWQ_NQOS
);
6431 kqr
= kqworkq_get_request(kqwq
, qos_index
);
6433 assert(kqr
->kqr_state
& KQR_WAKEUP
);
6436 * If we have already requested a thread, and it hasn't
6437 * started processing yet, there's no use hammering away
6438 * on the pthread kext.
6440 if (kqr
->kqr_state
& KQR_THREQUESTED
)
6443 assert((kqr
->kqr_state
& KQR_BOUND
) == 0);
6445 /* request additional workq threads if appropriate */
6446 if (pthread_functions
!= NULL
&&
6447 pthread_functions
->workq_reqthreads
!= NULL
) {
6448 unsigned int flags
= KEVENT_FLAG_WORKQ
;
6449 unsigned long priority
;
6452 /* Compute the appropriate pthread priority */
6453 priority
= qos_from_qos_index(qos_index
);
6456 /* JMM - for now remain compatible with old invocations */
6457 /* set the over-commit flag on the request if needed */
6458 if (kqr
->kqr_state
& KQR_THOVERCOMMIT
)
6459 priority
|= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
;
6462 /* Compute a priority based on qos_index. */
6463 struct workq_reqthreads_req_s request
= {
6464 .priority
= priority
,
6468 /* mark that we are making a request */
6469 kqr
->kqr_state
|= KQR_THREQUESTED
;
6470 if (qos_index
== KQWQ_QOS_MANAGER
)
6471 kqr
->kqr_state
|= KQWQ_THMANAGER
;
6473 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_THREQUEST
),
6475 (((uintptr_t)kqr
->kqr_override_index
<< 8) |
6476 (uintptr_t)kqr
->kqr_state
));
6477 wqthread
= (*pthread_functions
->workq_reqthreads
)(kqwq
->kqwq_p
, 1, &request
);
6479 /* We've been switched to the emergency/manager thread */
6480 if (wqthread
== (thread_t
)-1) {
6481 assert(qos_index
!= KQWQ_QOS_MANAGER
);
6482 kqr
->kqr_state
|= KQWQ_THMANAGER
;
6487 * bind the returned thread identity
6488 * This goes away when we switch to synchronous callback
6489 * binding from the pthread kext.
6491 if (wqthread
!= NULL
) {
6492 kqworkq_bind_thread_impl(kqwq
, qos_index
, wqthread
, flags
);
6498 * If we aren't already busy processing events [for this QoS],
6499 * request workq thread support as appropriate.
6501 * TBD - for now, we don't segregate out processing by QoS.
6503 * - May be called with the kqueue's wait queue set locked,
6504 * so cannot do anything that could recurse on that.
6507 kqworkq_request_help(
6508 struct kqworkq
*kqwq
,
6509 kq_index_t qos_index
)
6511 struct kqrequest
*kqr
;
6513 /* convert to thread qos value */
6514 assert(qos_index
< KQWQ_NQOS
);
6516 kqwq_req_lock(kqwq
);
6517 kqr
= kqworkq_get_request(kqwq
, qos_index
);
6519 if ((kqr
->kqr_state
& KQR_WAKEUP
) == 0) {
6520 /* Indicate that we needed help from this request */
6521 kqr
->kqr_state
|= KQR_WAKEUP
;
6523 /* Go assure a thread request has been made */
6524 kqworkq_request_thread(kqwq
, qos_index
);
6526 kqwq_req_unlock(kqwq
);
6530 kqworkloop_threadreq_impl(struct kqworkloop
*kqwl
, kq_index_t qos_index
)
6532 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6533 unsigned long pri
= pthread_priority_for_kqrequest(kqr
, qos_index
);
6536 assert((kqr
->kqr_state
& (KQR_THREQUESTED
| KQR_BOUND
)) == KQR_THREQUESTED
);
6539 * New-style thread request supported. Provide
6540 * the pthread kext a pointer to a workq_threadreq_s
6541 * structure for its use until a corresponding
6542 * workloop_fulfill_threqreq callback.
6544 if (current_proc() == kqwl
->kqwl_kqueue
.kq_p
) {
6545 op
= WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
;
6547 op
= WORKQ_THREADREQ_WORKLOOP
;
6550 ret
= (*pthread_functions
->workq_threadreq
)(kqwl
->kqwl_p
, &kqr
->kqr_req
,
6551 WORKQ_THREADREQ_WORKLOOP
, pri
, 0);
6554 assert(op
== WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
);
6555 op
= WORKQ_THREADREQ_WORKLOOP
;
6561 * Process is shutting down or exec'ing.
6562 * All the kqueues are going to be cleaned up
6563 * soon. Forget we even asked for a thread -
6564 * and make sure we don't ask for more.
6566 kqueue_release((struct kqueue
*)kqwl
, KQUEUE_CANT_BE_LAST_REF
);
6567 kqr
->kqr_state
&= ~KQR_THREQUESTED
;
6568 kqr
->kqr_state
|= KQR_DRAIN
;
6572 assert(op
== WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
);
6573 act_set_astkevent(current_thread(), AST_KEVENT_REDRIVE_THREADREQ
);
6582 kqworkloop_threadreq_modify(struct kqworkloop
*kqwl
, kq_index_t qos_index
)
6584 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6585 unsigned long pri
= pthread_priority_for_kqrequest(kqr
, qos_index
);
6586 int ret
, op
= WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
;
6588 assert((kqr
->kqr_state
& (KQR_THREQUESTED
| KQR_BOUND
)) == KQR_THREQUESTED
);
6590 if (current_proc() == kqwl
->kqwl_kqueue
.kq_p
) {
6591 op
= WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
;
6593 op
= WORKQ_THREADREQ_CHANGE_PRI
;
6596 ret
= (*pthread_functions
->workq_threadreq_modify
)(kqwl
->kqwl_p
,
6597 &kqr
->kqr_req
, op
, pri
, 0);
6600 assert(op
== WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
);
6601 op
= WORKQ_THREADREQ_CHANGE_PRI
;
6605 assert(op
== WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
);
6606 act_set_astkevent(current_thread(), AST_KEVENT_REDRIVE_THREADREQ
);
6620 * Interact with the pthread kext to request a servicing thread.
6621 * This will request a single thread at the highest QoS level
6622 * for which there is work (whether that was the requested QoS
6623 * for an event or an override applied to a lower-QoS request).
6625 * - Caller holds the workloop request lock
6627 * - May be called with the kqueue's wait queue set locked,
6628 * so cannot do anything that could recurse on that.
6631 kqworkloop_request_thread(struct kqworkloop
*kqwl
, kq_index_t qos_index
)
6633 struct kqrequest
*kqr
;
6635 assert(kqwl
->kqwl_state
& KQ_WORKLOOP
);
6637 kqr
= &kqwl
->kqwl_request
;
6639 assert(kqwl
->kqwl_owner
== THREAD_NULL
);
6640 assert((kqr
->kqr_state
& KQR_BOUND
) == 0);
6641 assert((kqr
->kqr_state
& KQR_THREQUESTED
) == 0);
6642 assert(!(kqwl
->kqwl_kqueue
.kq_state
& KQ_NO_WQ_THREAD
));
6644 /* If we're draining thread requests, just bail */
6645 if (kqr
->kqr_state
& KQR_DRAIN
)
6648 if (pthread_functions
!= NULL
&&
6649 pthread_functions
->workq_threadreq
!= NULL
) {
6651 * set request state flags, etc... before calling pthread
6652 * This assures they are set before a possible synchronous
6653 * callback to workloop_fulfill_threadreq().
6655 kqr
->kqr_state
|= KQR_THREQUESTED
;
6657 /* Add a thread request reference on the kqueue. */
6658 kqueue_retain((struct kqueue
*)kqwl
);
6660 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THREQUEST
),
6661 kqwl
->kqwl_dynamicid
,
6662 0, qos_index
, kqr
->kqr_state
);
6663 kqworkloop_threadreq_impl(kqwl
, qos_index
);
6665 panic("kqworkloop_request_thread");
6671 kqworkloop_update_sync_override_state(struct kqworkloop
*kqwl
, boolean_t sync_ipc_override
)
6673 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6674 kqwl_req_lock(kqwl
);
6675 kqr
->kqr_has_sync_override
= sync_ipc_override
;
6676 kqwl_req_unlock(kqwl
);
6680 static inline kq_index_t
6681 kqworkloop_combined_qos(struct kqworkloop
*kqwl
, boolean_t
*ipc_override_is_sync
)
6683 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6684 kq_index_t override
;
6686 *ipc_override_is_sync
= FALSE
;
6687 override
= MAX(MAX(kqr
->kqr_qos_index
, kqr
->kqr_override_index
),
6688 kqr
->kqr_dsync_waiters_qos
);
6690 if (kqr
->kqr_sync_suppress_count
> 0 || kqr
->kqr_has_sync_override
) {
6691 *ipc_override_is_sync
= TRUE
;
6692 override
= THREAD_QOS_USER_INTERACTIVE
;
6698 kqworkloop_request_fire_r2k_notification(struct kqworkloop
*kqwl
)
6700 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6702 kqwl_req_held(kqwl
);
6704 if (kqr
->kqr_state
& KQR_R2K_NOTIF_ARMED
) {
6705 assert(kqr
->kqr_state
& KQR_BOUND
);
6706 assert(kqr
->kqr_thread
);
6708 kqr
->kqr_state
&= ~KQR_R2K_NOTIF_ARMED
;
6709 act_set_astkevent(kqr
->kqr_thread
, AST_KEVENT_RETURN_TO_KERNEL
);
6714 kqworkloop_update_threads_qos(struct kqworkloop
*kqwl
, int op
, kq_index_t qos
)
6716 const uint8_t KQWL_STAYACTIVE_FIRED_BIT
= (1 << 0);
6718 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
6719 boolean_t old_ipc_override_is_sync
= FALSE
;
6720 kq_index_t old_qos
= kqworkloop_combined_qos(kqwl
, &old_ipc_override_is_sync
);
6721 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
6722 bool static_thread
= (kq
->kq_state
& KQ_NO_WQ_THREAD
);
6725 /* must hold the kqr lock */
6726 kqwl_req_held(kqwl
);
6729 case KQWL_UTQ_UPDATE_WAKEUP_QOS
:
6730 if (qos
== KQWL_BUCKET_STAYACTIVE
) {
6732 * the KQWL_BUCKET_STAYACTIVE is not a QoS bucket, we only remember
6733 * a high watermark (kqr_stayactive_qos) of any stay active knote
6734 * that was ever registered with this workloop.
6736 * When waitq_set__CALLING_PREPOST_HOOK__() wakes up any stay active
6737 * knote, we use this high-watermark as a wakeup-index, and also set
6738 * the magic KQWL_BUCKET_STAYACTIVE bit to make sure we remember
6739 * there is at least one stay active knote fired until the next full
6740 * processing of this bucket.
6742 kqr
->kqr_wakeup_indexes
|= KQWL_STAYACTIVE_FIRED_BIT
;
6743 qos
= kqr
->kqr_stayactive_qos
;
6745 assert(!static_thread
);
6747 if (kqr
->kqr_wakeup_indexes
& (1 << qos
)) {
6748 assert(kqr
->kqr_state
& KQR_WAKEUP
);
6752 kqr
->kqr_wakeup_indexes
|= (1 << qos
);
6753 kqr
->kqr_state
|= KQR_WAKEUP
;
6754 kqworkloop_request_fire_r2k_notification(kqwl
);
6755 goto recompute_async
;
6757 case KQWL_UTQ_UPDATE_STAYACTIVE_QOS
:
6759 if (kqr
->kqr_stayactive_qos
< qos
) {
6760 kqr
->kqr_stayactive_qos
= qos
;
6761 if (kqr
->kqr_wakeup_indexes
& KQWL_STAYACTIVE_FIRED_BIT
) {
6762 assert(kqr
->kqr_state
& KQR_WAKEUP
);
6763 kqr
->kqr_wakeup_indexes
|= (1 << qos
);
6764 goto recompute_async
;
6769 case KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
:
6770 kqlock_held(kq
); // to look at kq_queues
6771 kqr
->kqr_has_sync_override
= FALSE
;
6772 i
= KQWL_BUCKET_STAYACTIVE
;
6773 if (TAILQ_EMPTY(&kqr
->kqr_suppressed
)) {
6774 kqr
->kqr_override_index
= THREAD_QOS_UNSPECIFIED
;
6776 if (!TAILQ_EMPTY(&kq
->kq_queue
[i
]) &&
6777 (kqr
->kqr_wakeup_indexes
& KQWL_STAYACTIVE_FIRED_BIT
)) {
6779 * If the KQWL_STAYACTIVE_FIRED_BIT is set, it means a stay active
6780 * knote may have fired, so we need to merge in kqr_stayactive_qos.
6782 * Unlike other buckets, this one is never empty but could be idle.
6784 kqr
->kqr_wakeup_indexes
&= KQWL_STAYACTIVE_FIRED_BIT
;
6785 kqr
->kqr_wakeup_indexes
|= (1 << kqr
->kqr_stayactive_qos
);
6787 kqr
->kqr_wakeup_indexes
= 0;
6789 for (i
= THREAD_QOS_UNSPECIFIED
+ 1; i
< KQWL_BUCKET_STAYACTIVE
; i
++) {
6790 if (!TAILQ_EMPTY(&kq
->kq_queue
[i
])) {
6791 kqr
->kqr_wakeup_indexes
|= (1 << i
);
6792 struct knote
*kn
= TAILQ_FIRST(&kqwl
->kqwl_kqueue
.kq_queue
[i
]);
6793 if (i
== THREAD_QOS_USER_INTERACTIVE
&&
6794 kn
->kn_qos_override_is_sync
) {
6795 kqr
->kqr_has_sync_override
= TRUE
;
6799 if (kqr
->kqr_wakeup_indexes
) {
6800 kqr
->kqr_state
|= KQR_WAKEUP
;
6801 kqworkloop_request_fire_r2k_notification(kqwl
);
6803 kqr
->kqr_state
&= ~KQR_WAKEUP
;
6805 assert(qos
== THREAD_QOS_UNSPECIFIED
);
6806 goto recompute_async
;
6808 case KQWL_UTQ_RESET_WAKEUP_OVERRIDE
:
6809 kqr
->kqr_override_index
= THREAD_QOS_UNSPECIFIED
;
6810 assert(qos
== THREAD_QOS_UNSPECIFIED
);
6811 goto recompute_async
;
6813 case KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
:
6816 * When modifying the wakeup QoS or the async override QoS, we always
6817 * need to maintain our invariant that kqr_override_index is at least as
6818 * large as the highest QoS for which an event is fired.
6820 * However this override index can be larger when there is an overriden
6821 * suppressed knote pushing on the kqueue.
6823 if (kqr
->kqr_wakeup_indexes
> (1 << qos
)) {
6824 qos
= fls(kqr
->kqr_wakeup_indexes
) - 1; /* fls is 1-based */
6826 if (kqr
->kqr_override_index
< qos
) {
6827 kqr
->kqr_override_index
= qos
;
6831 case KQWL_UTQ_REDRIVE_EVENTS
:
6834 case KQWL_UTQ_SET_ASYNC_QOS
:
6836 kqr
->kqr_qos_index
= qos
;
6839 case KQWL_UTQ_SET_SYNC_WAITERS_QOS
:
6841 kqr
->kqr_dsync_waiters_qos
= qos
;
6845 panic("unknown kqwl thread qos update operation: %d", op
);
6848 boolean_t new_ipc_override_is_sync
= FALSE
;
6849 kq_index_t new_qos
= kqworkloop_combined_qos(kqwl
, &new_ipc_override_is_sync
);
6850 thread_t kqwl_owner
= kqwl
->kqwl_owner
;
6851 thread_t servicer
= kqr
->kqr_thread
;
6852 __assert_only
int ret
;
6855 * Apply the diffs to the owner if applicable
6857 if (filt_wlowner_is_valid(kqwl_owner
)) {
6859 /* JMM - need new trace hooks for owner overrides */
6860 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST
),
6861 kqwl
->kqwl_dynamicid
,
6862 (kqr
->kqr_state
& KQR_BOUND
) ? thread_tid(kqwl_owner
) : 0,
6863 (kqr
->kqr_qos_index
<< 8) | new_qos
,
6864 (kqr
->kqr_override_index
<< 8) | kqr
->kqr_state
);
6866 if (new_qos
== kqr
->kqr_dsync_owner_qos
) {
6868 } else if (kqr
->kqr_dsync_owner_qos
== THREAD_QOS_UNSPECIFIED
) {
6869 thread_add_ipc_override(kqwl_owner
, new_qos
);
6870 } else if (new_qos
== THREAD_QOS_UNSPECIFIED
) {
6871 thread_drop_ipc_override(kqwl_owner
);
6872 } else /* kqr->kqr_dsync_owner_qos != new_qos */ {
6873 thread_update_ipc_override(kqwl_owner
, new_qos
);
6875 kqr
->kqr_dsync_owner_qos
= new_qos
;
6877 if (new_ipc_override_is_sync
&&
6878 !kqr
->kqr_owner_override_is_sync
) {
6879 thread_add_sync_ipc_override(kqwl_owner
);
6880 } else if (!new_ipc_override_is_sync
&&
6881 kqr
->kqr_owner_override_is_sync
) {
6882 thread_drop_sync_ipc_override(kqwl_owner
);
6884 kqr
->kqr_owner_override_is_sync
= new_ipc_override_is_sync
;
6888 * apply the diffs to the servicer
6890 if (static_thread
) {
6892 * Statically bound thread
6894 * These threads don't participates in QoS overrides today, just wakeup
6895 * the thread blocked on this kqueue if a new event arrived.
6899 case KQWL_UTQ_UPDATE_WAKEUP_QOS
:
6900 case KQWL_UTQ_UPDATE_STAYACTIVE_QOS
:
6901 case KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
:
6904 case KQWL_UTQ_RESET_WAKEUP_OVERRIDE
:
6905 case KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
:
6906 case KQWL_UTQ_REDRIVE_EVENTS
:
6907 case KQWL_UTQ_SET_ASYNC_QOS
:
6908 case KQWL_UTQ_SET_SYNC_WAITERS_QOS
:
6909 panic("should never be called");
6915 if ((kqr
->kqr_state
& KQR_BOUND
) && (kqr
->kqr_state
& KQR_WAKEUP
)) {
6916 assert(servicer
&& !is_workqueue_thread(servicer
));
6917 if (kq
->kq_state
& (KQ_SLEEP
| KQ_SEL
)) {
6918 kq
->kq_state
&= ~(KQ_SLEEP
| KQ_SEL
);
6919 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
, KQ_EVENT
,
6920 THREAD_AWAKENED
, WAITQ_ALL_PRIORITIES
);
6923 } else if ((kqr
->kqr_state
& KQR_THREQUESTED
) == 0) {
6925 * No servicer, nor thread-request
6927 * Make a new thread request, unless there is an owner (or the workloop
6928 * is suspended in userland) or if there is no asynchronous work in the
6932 if (kqwl_owner
== THREAD_NULL
&& (kqr
->kqr_state
& KQR_WAKEUP
)) {
6933 kqworkloop_request_thread(kqwl
, new_qos
);
6935 } else if ((kqr
->kqr_state
& KQR_BOUND
) == 0 &&
6936 (kqwl_owner
|| (kqr
->kqr_state
& KQR_WAKEUP
) == 0)) {
6938 * No servicer, thread request in flight we want to cancel
6940 * We just got rid of the last knote of the kqueue or noticed an owner
6941 * with a thread request still in flight, take it back.
6943 ret
= (*pthread_functions
->workq_threadreq_modify
)(kqwl
->kqwl_p
,
6944 &kqr
->kqr_req
, WORKQ_THREADREQ_CANCEL
, 0, 0);
6946 kqr
->kqr_state
&= ~KQR_THREQUESTED
;
6947 kqueue_release(kq
, KQUEUE_CANT_BE_LAST_REF
);
6950 boolean_t qos_changed
= FALSE
;
6953 * Servicer or request is in flight
6955 * Just apply the diff to the servicer or the thread request
6957 if (kqr
->kqr_state
& KQR_BOUND
) {
6958 servicer
= kqr
->kqr_thread
;
6959 struct uthread
*ut
= get_bsdthread_info(servicer
);
6960 if (ut
->uu_kqueue_qos_index
!= new_qos
) {
6961 if (ut
->uu_kqueue_qos_index
== THREAD_QOS_UNSPECIFIED
) {
6962 thread_add_ipc_override(servicer
, new_qos
);
6963 } else if (new_qos
== THREAD_QOS_UNSPECIFIED
) {
6964 thread_drop_ipc_override(servicer
);
6965 } else /* ut->uu_kqueue_qos_index != new_qos */ {
6966 thread_update_ipc_override(servicer
, new_qos
);
6968 ut
->uu_kqueue_qos_index
= new_qos
;
6972 if (new_ipc_override_is_sync
!= ut
->uu_kqueue_override_is_sync
) {
6973 if (new_ipc_override_is_sync
&&
6974 !ut
->uu_kqueue_override_is_sync
) {
6975 thread_add_sync_ipc_override(servicer
);
6976 } else if (!new_ipc_override_is_sync
&&
6977 ut
->uu_kqueue_override_is_sync
) {
6978 thread_drop_sync_ipc_override(servicer
);
6980 ut
->uu_kqueue_override_is_sync
= new_ipc_override_is_sync
;
6983 } else if (old_qos
!= new_qos
) {
6985 kqworkloop_threadreq_modify(kqwl
, new_qos
);
6989 servicer
= kqr
->kqr_thread
;
6990 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST
),
6991 kqwl
->kqwl_dynamicid
,
6992 (kqr
->kqr_state
& KQR_BOUND
) ? thread_tid(servicer
) : 0,
6993 (kqr
->kqr_qos_index
<< 16) | (new_qos
<< 8) | new_ipc_override_is_sync
,
6994 (kqr
->kqr_override_index
<< 8) | kqr
->kqr_state
);
7000 kqworkloop_request_help(struct kqworkloop
*kqwl
, kq_index_t qos_index
)
7002 /* convert to thread qos value */
7003 assert(qos_index
< KQWL_NBUCKETS
);
7005 kqwl_req_lock(kqwl
);
7006 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_UPDATE_WAKEUP_QOS
, qos_index
);
7007 kqwl_req_unlock(kqwl
);
7011 * These arrays described the low and high qindexes for a given qos_index.
7012 * The values come from the chart in <sys/eventvar.h> (must stay in sync).
7014 static kq_index_t _kqwq_base_index
[KQWQ_NQOS
] = {0, 0, 6, 11, 15, 18, 20, 21};
7015 static kq_index_t _kqwq_high_index
[KQWQ_NQOS
] = {0, 5, 10, 14, 17, 19, 20, 21};
7017 static struct kqtailq
*
7018 kqueue_get_base_queue(struct kqueue
*kq
, kq_index_t qos_index
)
7020 if (kq
->kq_state
& KQ_WORKQ
) {
7021 assert(qos_index
< KQWQ_NQOS
);
7022 return &kq
->kq_queue
[_kqwq_base_index
[qos_index
]];
7023 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7024 assert(qos_index
< KQWL_NBUCKETS
);
7025 return &kq
->kq_queue
[qos_index
];
7027 assert(qos_index
== QOS_INDEX_KQFILE
);
7028 return &kq
->kq_queue
[QOS_INDEX_KQFILE
];
7032 static struct kqtailq
*
7033 kqueue_get_high_queue(struct kqueue
*kq
, kq_index_t qos_index
)
7035 if (kq
->kq_state
& KQ_WORKQ
) {
7036 assert(qos_index
< KQWQ_NQOS
);
7037 return &kq
->kq_queue
[_kqwq_high_index
[qos_index
]];
7038 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7039 assert(qos_index
< KQWL_NBUCKETS
);
7040 return &kq
->kq_queue
[KQWL_BUCKET_STAYACTIVE
];
7042 assert(qos_index
== QOS_INDEX_KQFILE
);
7043 return &kq
->kq_queue
[QOS_INDEX_KQFILE
];
7048 kqueue_queue_empty(struct kqueue
*kq
, kq_index_t qos_index
)
7050 struct kqtailq
*base_queue
= kqueue_get_base_queue(kq
, qos_index
);
7051 struct kqtailq
*queue
= kqueue_get_high_queue(kq
, qos_index
);
7054 if (!TAILQ_EMPTY(queue
))
7056 } while (queue
-- > base_queue
);
7060 static struct kqtailq
*
7061 kqueue_get_suppressed_queue(struct kqueue
*kq
, kq_index_t qos_index
)
7063 struct kqtailq
*res
;
7064 struct kqrequest
*kqr
;
7066 if (kq
->kq_state
& KQ_WORKQ
) {
7067 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
7069 kqr
= kqworkq_get_request(kqwq
, qos_index
);
7070 res
= &kqr
->kqr_suppressed
;
7071 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7072 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7074 kqr
= &kqwl
->kqwl_request
;
7075 res
= &kqr
->kqr_suppressed
;
7077 struct kqfile
*kqf
= (struct kqfile
*)kq
;
7078 res
= &kqf
->kqf_suppressed
;
7084 knote_get_queue_index(struct knote
*kn
)
7086 kq_index_t override_index
= knote_get_qos_override_index(kn
);
7087 kq_index_t qos_index
= knote_get_qos_index(kn
);
7088 struct kqueue
*kq
= knote_get_kq(kn
);
7091 if (kq
->kq_state
& KQ_WORKQ
) {
7092 res
= _kqwq_base_index
[qos_index
];
7093 if (override_index
> qos_index
)
7094 res
+= override_index
- qos_index
;
7095 assert(res
<= _kqwq_high_index
[qos_index
]);
7096 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7097 res
= MAX(override_index
, qos_index
);
7098 assert(res
< KQWL_NBUCKETS
);
7100 assert(qos_index
== QOS_INDEX_KQFILE
);
7101 assert(override_index
== QOS_INDEX_KQFILE
);
7102 res
= QOS_INDEX_KQFILE
;
7107 static struct kqtailq
*
7108 knote_get_queue(struct knote
*kn
)
7110 kq_index_t qindex
= knote_get_queue_index(kn
);
7112 return &(knote_get_kq(kn
))->kq_queue
[qindex
];
7116 knote_get_req_index(struct knote
*kn
)
7118 return kn
->kn_req_index
;
7122 knote_get_qos_index(struct knote
*kn
)
7124 return kn
->kn_qos_index
;
7128 knote_set_qos_index(struct knote
*kn
, kq_index_t qos_index
)
7130 struct kqueue
*kq
= knote_get_kq(kn
);
7132 assert(qos_index
< KQWQ_NQOS
);
7133 assert((kn
->kn_status
& KN_QUEUED
) == 0);
7135 if (kq
->kq_state
& KQ_WORKQ
) {
7136 assert(qos_index
> THREAD_QOS_UNSPECIFIED
);
7137 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7138 /* XXX this policy decision shouldn't be here */
7139 if (qos_index
== THREAD_QOS_UNSPECIFIED
)
7140 qos_index
= THREAD_QOS_LEGACY
;
7142 qos_index
= QOS_INDEX_KQFILE
;
7144 /* always set requested */
7145 kn
->kn_req_index
= qos_index
;
7147 /* only adjust in-use qos index when not suppressed */
7148 if ((kn
->kn_status
& KN_SUPPRESSED
) == 0)
7149 kn
->kn_qos_index
= qos_index
;
7153 knote_set_qos_overcommit(struct knote
*kn
)
7155 struct kqueue
*kq
= knote_get_kq(kn
);
7156 struct kqrequest
*kqr
;
7158 /* turn overcommit on for the appropriate thread request? */
7159 if (kn
->kn_qos
& _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
) {
7160 if (kq
->kq_state
& KQ_WORKQ
) {
7161 kq_index_t qos_index
= knote_get_qos_index(kn
);
7162 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
7164 kqr
= kqworkq_get_request(kqwq
, qos_index
);
7166 kqwq_req_lock(kqwq
);
7167 kqr
->kqr_state
|= KQR_THOVERCOMMIT
;
7168 kqwq_req_unlock(kqwq
);
7169 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7170 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7172 kqr
= &kqwl
->kqwl_request
;
7174 kqwl_req_lock(kqwl
);
7175 kqr
->kqr_state
|= KQR_THOVERCOMMIT
;
7176 kqwl_req_unlock(kqwl
);
7182 knote_get_qos_override_index(struct knote
*kn
)
7184 return kn
->kn_qos_override
;
7188 knote_set_qos_override_index(struct knote
*kn
, kq_index_t override_index
,
7189 boolean_t override_is_sync
)
7191 struct kqueue
*kq
= knote_get_kq(kn
);
7192 kq_index_t qos_index
= knote_get_qos_index(kn
);
7193 kq_index_t old_override_index
= knote_get_qos_override_index(kn
);
7194 boolean_t old_override_is_sync
= kn
->kn_qos_override_is_sync
;
7197 assert((kn
->kn_status
& KN_QUEUED
) == 0);
7199 if (override_index
== KQWQ_QOS_MANAGER
) {
7200 assert(qos_index
== KQWQ_QOS_MANAGER
);
7202 assert(override_index
< KQWQ_QOS_MANAGER
);
7205 kn
->kn_qos_override
= override_index
;
7206 kn
->kn_qos_override_is_sync
= override_is_sync
;
7209 * If this is a workq/workloop kqueue, apply the override to the
7212 if (kq
->kq_state
& KQ_WORKQ
) {
7213 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
7215 assert(qos_index
> THREAD_QOS_UNSPECIFIED
);
7216 kqworkq_update_override(kqwq
, qos_index
, override_index
);
7217 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7218 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7220 if ((kn
->kn_status
& KN_SUPPRESSED
) == KN_SUPPRESSED
) {
7221 flags
= flags
| KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
;
7223 if (override_index
== THREAD_QOS_USER_INTERACTIVE
7224 && override_is_sync
) {
7225 flags
= flags
| KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
;
7228 if (old_override_index
== THREAD_QOS_USER_INTERACTIVE
7229 && old_override_is_sync
) {
7230 flags
= flags
| KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
;
7234 assert(qos_index
> THREAD_QOS_UNSPECIFIED
);
7235 kqworkloop_update_override(kqwl
, qos_index
, override_index
, flags
);
7240 knote_get_sync_qos_override_index(struct knote
*kn
)
7242 return kn
->kn_qos_sync_override
;
7246 kqworkq_update_override(struct kqworkq
*kqwq
, kq_index_t qos_index
, kq_index_t override_index
)
7248 struct kqrequest
*kqr
;
7249 kq_index_t old_override_index
;
7251 if (override_index
<= qos_index
) {
7255 kqr
= kqworkq_get_request(kqwq
, qos_index
);
7257 kqwq_req_lock(kqwq
);
7258 old_override_index
= kqr
->kqr_override_index
;
7259 if (override_index
> MAX(kqr
->kqr_qos_index
, old_override_index
)) {
7260 kqr
->kqr_override_index
= override_index
;
7262 /* apply the override to [incoming?] servicing thread */
7263 if (kqr
->kqr_state
& KQR_BOUND
) {
7264 thread_t wqthread
= kqr
->kqr_thread
;
7266 /* only apply if non-manager */
7268 if ((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0) {
7269 if (old_override_index
)
7270 thread_update_ipc_override(wqthread
, override_index
);
7272 thread_add_ipc_override(wqthread
, override_index
);
7276 kqwq_req_unlock(kqwq
);
7279 /* called with the kqworkq lock held */
7281 kqworkq_bind_thread_impl(
7282 struct kqworkq
*kqwq
,
7283 kq_index_t qos_index
,
7287 /* request lock must be held */
7288 kqwq_req_held(kqwq
);
7290 struct kqrequest
*kqr
= kqworkq_get_request(kqwq
, qos_index
);
7291 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
7293 if (qos_index
== KQWQ_QOS_MANAGER
)
7294 flags
|= KEVENT_FLAG_WORKQ_MANAGER
;
7296 struct uthread
*ut
= get_bsdthread_info(thread
);
7299 * If this is a manager, and the manager request bit is
7300 * not set, assure no other thread is bound. If the bit
7301 * is set, make sure the old thread is us (or not set).
7303 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
) {
7304 if ((kqr
->kqr_state
& KQR_BOUND
) == 0) {
7305 kqr
->kqr_state
|= (KQR_BOUND
| KQWQ_THMANAGER
);
7306 TAILQ_INIT(&kqr
->kqr_suppressed
);
7307 kqr
->kqr_thread
= thread
;
7308 ut
->uu_kqueue_bound
= (struct kqueue
*)kqwq
;
7309 ut
->uu_kqueue_qos_index
= KQWQ_QOS_MANAGER
;
7310 ut
->uu_kqueue_flags
= (KEVENT_FLAG_WORKQ
|
7311 KEVENT_FLAG_WORKQ_MANAGER
);
7313 assert(kqr
->kqr_state
& KQR_BOUND
);
7314 assert(thread
== kqr
->kqr_thread
);
7315 assert(ut
->uu_kqueue_bound
== (struct kqueue
*)kqwq
);
7316 assert(ut
->uu_kqueue_qos_index
== KQWQ_QOS_MANAGER
);
7317 assert(ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
);
7322 /* Just a normal one-queue servicing thread */
7323 assert(kqr
->kqr_state
& KQR_THREQUESTED
);
7324 assert(kqr
->kqr_qos_index
== qos_index
);
7326 if ((kqr
->kqr_state
& KQR_BOUND
) == 0) {
7327 kqr
->kqr_state
|= KQR_BOUND
;
7328 TAILQ_INIT(&kqr
->kqr_suppressed
);
7329 kqr
->kqr_thread
= thread
;
7331 /* apply an ipc QoS override if one is needed */
7332 if (kqr
->kqr_override_index
) {
7333 assert(kqr
->kqr_qos_index
);
7334 assert(kqr
->kqr_override_index
> kqr
->kqr_qos_index
);
7335 assert(thread_get_ipc_override(thread
) == THREAD_QOS_UNSPECIFIED
);
7336 thread_add_ipc_override(thread
, kqr
->kqr_override_index
);
7339 /* indicate that we are processing in the uthread */
7340 ut
->uu_kqueue_bound
= (struct kqueue
*)kqwq
;
7341 ut
->uu_kqueue_qos_index
= qos_index
;
7342 ut
->uu_kqueue_flags
= flags
;
7345 * probably syncronously bound AND post-request bound
7346 * this logic can go away when we get rid of post-request bind
7348 assert(kqr
->kqr_state
& KQR_BOUND
);
7349 assert(thread
== kqr
->kqr_thread
);
7350 assert(ut
->uu_kqueue_bound
== (struct kqueue
*)kqwq
);
7351 assert(ut
->uu_kqueue_qos_index
== qos_index
);
7352 assert((ut
->uu_kqueue_flags
& flags
) == flags
);
7357 kqworkloop_update_override(
7358 struct kqworkloop
*kqwl
,
7359 kq_index_t qos_index
,
7360 kq_index_t override_index
,
7363 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
7365 kqwl_req_lock(kqwl
);
7367 /* Do not override on attached threads */
7368 if (kqr
->kqr_state
& KQR_BOUND
) {
7369 assert(kqr
->kqr_thread
);
7371 if (kqwl
->kqwl_kqueue
.kq_state
& KQ_NO_WQ_THREAD
) {
7372 kqwl_req_unlock(kqwl
);
7373 assert(!is_workqueue_thread(kqr
->kqr_thread
));
7378 /* Update sync ipc counts on kqr for suppressed knotes */
7379 if (flags
& KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
) {
7380 kqworkloop_update_suppress_sync_count(kqr
, flags
);
7383 if ((flags
& KQWL_UO_UPDATE_OVERRIDE_LAZY
) == 0) {
7384 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE
,
7385 MAX(qos_index
, override_index
));
7387 kqwl_req_unlock(kqwl
);
7391 kqworkloop_update_suppress_sync_count(
7392 struct kqrequest
*kqr
,
7395 if (flags
& KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
) {
7396 kqr
->kqr_sync_suppress_count
++;
7399 if (flags
& KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
) {
7400 assert(kqr
->kqr_sync_suppress_count
> 0);
7401 kqr
->kqr_sync_suppress_count
--;
7406 * kqworkloop_unbind_thread - Unbind the servicer thread of a workloop kqueue
7408 * It will end the processing phase in case it was still processing:
7410 * We may have to request a new thread for not KQ_NO_WQ_THREAD workloop.
7411 * This can happen if :
7412 * - there were active events at or above our QoS we never got to (count > 0)
7413 * - we pended waitq hook callouts during processing
7414 * - we pended wakeups while processing (or unsuppressing)
7416 * Called with kqueue lock held.
7420 kqworkloop_unbind_thread(
7421 struct kqworkloop
*kqwl
,
7423 __unused
unsigned int flags
)
7425 struct kqueue
*kq
= &kqwl
->kqwl_kqueue
;
7426 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
7430 assert((kq
->kq_state
& KQ_PROCESSING
) == 0);
7431 if (kq
->kq_state
& KQ_PROCESSING
) {
7436 * Forcing the KQ_PROCESSING flag allows for QoS updates because of
7437 * unsuppressing knotes not to be applied until the eventual call to
7438 * kqworkloop_update_threads_qos() below.
7440 kq
->kq_state
|= KQ_PROCESSING
;
7441 kqworkloop_acknowledge_events(kqwl
, TRUE
);
7442 kq
->kq_state
&= ~KQ_PROCESSING
;
7444 kqwl_req_lock(kqwl
);
7446 /* deal with extraneous unbinds in release kernels */
7447 assert((kqr
->kqr_state
& (KQR_BOUND
| KQR_PROCESSING
)) == KQR_BOUND
);
7448 if ((kqr
->kqr_state
& (KQR_BOUND
| KQR_PROCESSING
)) != KQR_BOUND
) {
7449 kqwl_req_unlock(kqwl
);
7453 assert(thread
== current_thread());
7454 assert(kqr
->kqr_thread
== thread
);
7455 if (kqr
->kqr_thread
!= thread
) {
7456 kqwl_req_unlock(kqwl
);
7460 struct uthread
*ut
= get_bsdthread_info(thread
);
7461 kq_index_t old_qos_index
= ut
->uu_kqueue_qos_index
;
7462 boolean_t ipc_override_is_sync
= ut
->uu_kqueue_override_is_sync
;
7463 ut
->uu_kqueue_bound
= NULL
;
7464 ut
->uu_kqueue_qos_index
= 0;
7465 ut
->uu_kqueue_override_is_sync
= 0;
7466 ut
->uu_kqueue_flags
= 0;
7468 /* unbind the servicer thread, drop overrides */
7469 kqr
->kqr_thread
= NULL
;
7470 kqr
->kqr_state
&= ~(KQR_BOUND
| KQR_THREQUESTED
| KQR_R2K_NOTIF_ARMED
);
7471 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_RECOMPUTE_WAKEUP_QOS
, 0);
7473 kqwl_req_unlock(kqwl
);
7476 * Drop the override on the current thread last, after the call to
7477 * kqworkloop_update_threads_qos above.
7479 if (old_qos_index
) {
7480 thread_drop_ipc_override(thread
);
7482 if (ipc_override_is_sync
) {
7483 thread_drop_sync_ipc_override(thread
);
7487 /* called with the kqworkq lock held */
7489 kqworkq_unbind_thread(
7490 struct kqworkq
*kqwq
,
7491 kq_index_t qos_index
,
7493 __unused
unsigned int flags
)
7495 struct kqrequest
*kqr
= kqworkq_get_request(kqwq
, qos_index
);
7496 kq_index_t override_index
= 0;
7498 /* request lock must be held */
7499 kqwq_req_held(kqwq
);
7501 assert(thread
== current_thread());
7503 if ((kqr
->kqr_state
& KQR_BOUND
) == 0) {
7504 assert(kqr
->kqr_state
& KQR_BOUND
);
7508 assert(kqr
->kqr_thread
== thread
);
7509 assert(TAILQ_EMPTY(&kqr
->kqr_suppressed
));
7512 * If there is an override, drop it from the current thread
7513 * and then we are free to recompute (a potentially lower)
7514 * minimum override to apply to the next thread request.
7516 if (kqr
->kqr_override_index
) {
7517 struct kqtailq
*base_queue
= kqueue_get_base_queue(&kqwq
->kqwq_kqueue
, qos_index
);
7518 struct kqtailq
*queue
= kqueue_get_high_queue(&kqwq
->kqwq_kqueue
, qos_index
);
7520 /* if not bound to a manager thread, drop the current ipc override */
7521 if ((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0) {
7522 thread_drop_ipc_override(thread
);
7525 /* recompute the new override */
7527 if (!TAILQ_EMPTY(queue
)) {
7528 override_index
= queue
- base_queue
+ qos_index
;
7531 } while (queue
-- > base_queue
);
7534 /* Mark it unbound */
7535 kqr
->kqr_thread
= NULL
;
7536 kqr
->kqr_state
&= ~(KQR_BOUND
| KQR_THREQUESTED
| KQWQ_THMANAGER
);
7538 /* apply the new override */
7539 if (override_index
> kqr
->kqr_qos_index
) {
7540 kqr
->kqr_override_index
= override_index
;
7542 kqr
->kqr_override_index
= THREAD_QOS_UNSPECIFIED
;
7547 kqworkq_get_request(struct kqworkq
*kqwq
, kq_index_t qos_index
)
7549 assert(qos_index
< KQWQ_NQOS
);
7550 return &kqwq
->kqwq_request
[qos_index
];
7554 knote_adjust_qos(struct knote
*kn
, qos_t new_qos
, qos_t new_override
, kq_index_t sync_override_index
)
7556 struct kqueue
*kq
= knote_get_kq(kn
);
7557 boolean_t override_is_sync
= FALSE
;
7559 if (kq
->kq_state
& (KQ_WORKQ
| KQ_WORKLOOP
)) {
7560 kq_index_t new_qos_index
;
7561 kq_index_t new_override_index
;
7562 kq_index_t servicer_qos_index
;
7564 new_qos_index
= qos_index_from_qos(kn
, new_qos
, FALSE
);
7565 new_override_index
= qos_index_from_qos(kn
, new_override
, TRUE
);
7567 /* make sure the servicer qos acts as a floor */
7568 servicer_qos_index
= qos_index_from_qos(kn
, kn
->kn_qos
, FALSE
);
7569 if (servicer_qos_index
> new_qos_index
)
7570 new_qos_index
= servicer_qos_index
;
7571 if (servicer_qos_index
> new_override_index
)
7572 new_override_index
= servicer_qos_index
;
7573 if (sync_override_index
>= new_override_index
) {
7574 new_override_index
= sync_override_index
;
7575 override_is_sync
= TRUE
;
7579 if (new_qos_index
!= knote_get_req_index(kn
) ||
7580 new_override_index
!= knote_get_qos_override_index(kn
) ||
7581 override_is_sync
!= kn
->kn_qos_override_is_sync
) {
7582 if (kn
->kn_status
& KN_QUEUED
) {
7584 knote_set_qos_index(kn
, new_qos_index
);
7585 knote_set_qos_override_index(kn
, new_override_index
, override_is_sync
);
7589 knote_set_qos_index(kn
, new_qos_index
);
7590 knote_set_qos_override_index(kn
, new_override_index
, override_is_sync
);
7598 knote_adjust_sync_qos(struct knote
*kn
, kq_index_t sync_qos
, boolean_t lock_kq
)
7600 struct kqueue
*kq
= knote_get_kq(kn
);
7601 kq_index_t old_sync_override
;
7602 kq_index_t qos_index
= knote_get_qos_index(kn
);
7605 /* Tracking only happens for UI qos */
7606 if (sync_qos
!= THREAD_QOS_USER_INTERACTIVE
&&
7607 sync_qos
!= THREAD_QOS_UNSPECIFIED
) {
7614 if (kq
->kq_state
& KQ_WORKLOOP
) {
7615 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7617 old_sync_override
= knote_get_sync_qos_override_index(kn
);
7618 if (old_sync_override
!= sync_qos
) {
7619 kn
->kn_qos_sync_override
= sync_qos
;
7621 /* update sync ipc counters for suppressed knotes */
7622 if ((kn
->kn_status
& KN_SUPPRESSED
) == KN_SUPPRESSED
) {
7623 flags
= flags
| KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
;
7625 /* Do not recalculate kqwl override, it would be done later */
7626 flags
= flags
| KQWL_UO_UPDATE_OVERRIDE_LAZY
;
7628 if (sync_qos
== THREAD_QOS_USER_INTERACTIVE
) {
7629 flags
= flags
| KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
;
7632 if (old_sync_override
== THREAD_QOS_USER_INTERACTIVE
) {
7633 flags
= flags
| KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
;
7636 kqworkloop_update_override(kqwl
, qos_index
, sync_qos
,
7647 knote_wakeup(struct knote
*kn
)
7649 struct kqueue
*kq
= knote_get_kq(kn
);
7650 kq_index_t qos_index
= knote_get_qos_index(kn
);
7654 if (kq
->kq_state
& KQ_WORKQ
) {
7655 /* request a servicing thread */
7656 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
7658 kqworkq_request_help(kqwq
, qos_index
);
7660 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7661 /* request a servicing thread */
7662 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7664 if (kqworkloop_is_processing_on_current_thread(kqwl
)) {
7666 * kqworkloop_end_processing() will perform the required QoS
7667 * computations when it unsets the processing mode.
7671 kqworkloop_request_help(kqwl
, qos_index
);
7673 struct kqfile
*kqf
= (struct kqfile
*)kq
;
7675 /* flag wakeups during processing */
7676 if (kq
->kq_state
& KQ_PROCESSING
)
7677 kq
->kq_state
|= KQ_WAKEUP
;
7679 /* wakeup a thread waiting on this queue */
7680 if (kq
->kq_state
& (KQ_SLEEP
| KQ_SEL
)) {
7681 kq
->kq_state
&= ~(KQ_SLEEP
| KQ_SEL
);
7682 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
7685 WAITQ_ALL_PRIORITIES
);
7688 /* wakeup other kqueues/select sets we're inside */
7689 KNOTE(&kqf
->kqf_sel
.si_note
, 0);
7694 * Called with the kqueue locked
7697 kqueue_interrupt(struct kqueue
*kq
)
7699 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
7701 /* wakeup sleeping threads */
7702 if ((kq
->kq_state
& (KQ_SLEEP
| KQ_SEL
)) != 0) {
7703 kq
->kq_state
&= ~(KQ_SLEEP
| KQ_SEL
);
7704 (void)waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
7707 WAITQ_ALL_PRIORITIES
);
7710 /* wakeup threads waiting their turn to process */
7711 if (kq
->kq_state
& KQ_PROCWAIT
) {
7712 struct kqtailq
*suppressq
;
7714 assert(kq
->kq_state
& KQ_PROCESSING
);
7716 kq
->kq_state
&= ~KQ_PROCWAIT
;
7717 suppressq
= kqueue_get_suppressed_queue(kq
, QOS_INDEX_KQFILE
);
7718 (void)waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
7719 CAST_EVENT64_T(suppressq
),
7721 WAITQ_ALL_PRIORITIES
);
7726 * Called back from waitq code when no threads waiting and the hook was set.
7728 * Interrupts are likely disabled and spin locks are held - minimal work
7729 * can be done in this context!!!
7731 * JMM - in the future, this will try to determine which knotes match the
7732 * wait queue wakeup and apply these wakeups against those knotes themselves.
7733 * For now, all the events dispatched this way are dispatch-manager handled,
7734 * so hard-code that for now.
7737 waitq_set__CALLING_PREPOST_HOOK__(void *kq_hook
, void *knote_hook
, int qos
)
7739 #pragma unused(knote_hook, qos)
7741 struct kqueue
*kq
= (struct kqueue
*)kq_hook
;
7743 if (kq
->kq_state
& KQ_WORKQ
) {
7744 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
7746 kqworkq_request_help(kqwq
, KQWQ_QOS_MANAGER
);
7748 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
7749 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
7751 kqworkloop_request_help(kqwl
, KQWL_BUCKET_STAYACTIVE
);
7756 klist_init(struct klist
*list
)
7763 * Query/Post each knote in the object's list
7765 * The object lock protects the list. It is assumed
7766 * that the filter/event routine for the object can
7767 * determine that the object is already locked (via
7768 * the hint) and not deadlock itself.
7770 * The object lock should also hold off pending
7771 * detach/drop operations. But we'll prevent it here
7772 * too (by taking a use reference) - just in case.
7775 knote(struct klist
*list
, long hint
)
7779 SLIST_FOREACH(kn
, list
, kn_selnext
) {
7780 struct kqueue
*kq
= knote_get_kq(kn
);
7784 assert(!knoteuse_needs_boost(kn
, NULL
));
7786 /* If we can get a use reference - deliver event */
7787 if (kqlock2knoteuse(kq
, kn
, KNUSE_NONE
)) {
7790 /* call the event with only a use count */
7791 result
= knote_fops(kn
)->f_event(kn
, hint
);
7793 /* if its not going away and triggered */
7794 if (knoteuse2kqlock(kq
, kn
, KNUSE_NONE
) && result
)
7803 * attach a knote to the specified list. Return true if this is the first entry.
7804 * The list is protected by whatever lock the object it is associated with uses.
7807 knote_attach(struct klist
*list
, struct knote
*kn
)
7809 int ret
= SLIST_EMPTY(list
);
7810 SLIST_INSERT_HEAD(list
, kn
, kn_selnext
);
7815 * detach a knote from the specified list. Return true if that was the last entry.
7816 * The list is protected by whatever lock the object it is associated with uses.
7819 knote_detach(struct klist
*list
, struct knote
*kn
)
7821 SLIST_REMOVE(list
, kn
, knote
, kn_selnext
);
7822 return (SLIST_EMPTY(list
));
7826 * knote_vanish - Indicate that the source has vanished
7828 * If the knote has requested EV_VANISHED delivery,
7829 * arrange for that. Otherwise, deliver a NOTE_REVOKE
7830 * event for backward compatibility.
7832 * The knote is marked as having vanished, but is not
7833 * actually detached from the source in this instance.
7834 * The actual detach is deferred until the knote drop.
7836 * Our caller already has the object lock held. Calling
7837 * the detach routine would try to take that lock
7838 * recursively - which likely is not supported.
7841 knote_vanish(struct klist
*list
)
7844 struct knote
*kn_next
;
7846 SLIST_FOREACH_SAFE(kn
, list
, kn_selnext
, kn_next
) {
7847 struct kqueue
*kq
= knote_get_kq(kn
);
7852 assert(!knoteuse_needs_boost(kn
, NULL
));
7854 if ((kn
->kn_status
& KN_DROPPING
) == 0) {
7855 /* If EV_VANISH supported - prepare to deliver one */
7856 if (kn
->kn_status
& KN_REQVANISH
) {
7857 kn
->kn_status
|= KN_VANISHED
;
7860 } else if (kqlock2knoteuse(kq
, kn
, KNUSE_NONE
)) {
7861 /* call the event with only a use count */
7862 result
= knote_fops(kn
)->f_event(kn
, NOTE_REVOKE
);
7864 /* if its not going away and triggered */
7865 if (knoteuse2kqlock(kq
, kn
, KNUSE_NONE
) && result
)
7867 /* lock held again */
7875 * For a given knote, link a provided wait queue directly with the kqueue.
7876 * Wakeups will happen via recursive wait queue support. But nothing will move
7877 * the knote to the active list at wakeup (nothing calls knote()). Instead,
7878 * we permanently enqueue them here.
7880 * kqueue and knote references are held by caller.
7881 * waitq locked by caller.
7883 * caller provides the wait queue link structure.
7886 knote_link_waitq(struct knote
*kn
, struct waitq
*wq
, uint64_t *reserved_link
)
7888 struct kqueue
*kq
= knote_get_kq(kn
);
7891 kr
= waitq_link(wq
, &kq
->kq_wqs
, WAITQ_ALREADY_LOCKED
, reserved_link
);
7892 if (kr
== KERN_SUCCESS
) {
7893 knote_markstayactive(kn
);
7901 * Unlink the provided wait queue from the kqueue associated with a knote.
7902 * Also remove it from the magic list of directly attached knotes.
7904 * Note that the unlink may have already happened from the other side, so
7905 * ignore any failures to unlink and just remove it from the kqueue list.
7907 * On success, caller is responsible for the link structure
7910 knote_unlink_waitq(struct knote
*kn
, struct waitq
*wq
)
7912 struct kqueue
*kq
= knote_get_kq(kn
);
7915 kr
= waitq_unlink(wq
, &kq
->kq_wqs
);
7916 knote_clearstayactive(kn
);
7917 return ((kr
!= KERN_SUCCESS
) ? EINVAL
: 0);
7921 * remove all knotes referencing a specified fd
7923 * Essentially an inlined knote_remove & knote_drop
7924 * when we know for sure that the thing is a file
7926 * Entered with the proc_fd lock already held.
7927 * It returns the same way, but may drop it temporarily.
7930 knote_fdclose(struct proc
*p
, int fd
, int force
)
7936 list
= &p
->p_fd
->fd_knlist
[fd
];
7937 SLIST_FOREACH(kn
, list
, kn_link
) {
7938 struct kqueue
*kq
= knote_get_kq(kn
);
7943 panic("%s: proc mismatch (kq->kq_p=%p != p=%p)",
7944 __func__
, kq
->kq_p
, p
);
7947 * If the knote supports EV_VANISHED delivery,
7948 * transition it to vanished mode (or skip over
7949 * it if already vanished).
7951 if (!force
&& (kn
->kn_status
& KN_REQVANISH
)) {
7953 if ((kn
->kn_status
& KN_VANISHED
) == 0) {
7956 assert(!knoteuse_needs_boost(kn
, NULL
));
7958 /* get detach reference (also marks vanished) */
7959 if (kqlock2knotedetach(kq
, kn
, KNUSE_NONE
)) {
7960 /* detach knote and drop fp use reference */
7961 knote_fops(kn
)->f_detach(kn
);
7962 if (knote_fops(kn
)->f_isfd
)
7963 fp_drop(p
, kn
->kn_id
, kn
->kn_fp
, 0);
7965 /* activate it if it's still in existence */
7966 if (knoteuse2kqlock(kq
, kn
, KNUSE_NONE
)) {
7982 * Convert the kq lock to a drop ref.
7983 * If we get it, go ahead and drop it.
7984 * Otherwise, we waited for the blocking
7985 * condition to complete. Either way,
7986 * we dropped the fdlock so start over.
7988 if (kqlock2knotedrop(kq
, kn
)) {
7998 * knote_fdfind - lookup a knote in the fd table for process
8000 * If the filter is file-based, lookup based on fd index.
8001 * Otherwise use a hash based on the ident.
8003 * Matching is based on kq, filter, and ident. Optionally,
8004 * it may also be based on the udata field in the kevent -
8005 * allowing multiple event registration for the file object
8008 * fd_knhashlock or fdlock held on entry (and exit)
8010 static struct knote
*
8011 knote_fdfind(struct kqueue
*kq
,
8012 struct kevent_internal_s
*kev
,
8016 struct filedesc
*fdp
= p
->p_fd
;
8017 struct klist
*list
= NULL
;
8018 struct knote
*kn
= NULL
;
8021 * determine where to look for the knote
8024 /* fd-based knotes are linked off the fd table */
8025 if (kev
->ident
< (u_int
)fdp
->fd_knlistsize
) {
8026 list
= &fdp
->fd_knlist
[kev
->ident
];
8028 } else if (fdp
->fd_knhashmask
!= 0) {
8029 /* hash non-fd knotes here too */
8030 list
= &fdp
->fd_knhash
[KN_HASH((u_long
)kev
->ident
, fdp
->fd_knhashmask
)];
8034 * scan the selected list looking for a match
8037 SLIST_FOREACH(kn
, list
, kn_link
) {
8038 if (kq
== knote_get_kq(kn
) &&
8039 kev
->ident
== kn
->kn_id
&&
8040 kev
->filter
== kn
->kn_filter
) {
8041 if (kev
->flags
& EV_UDATA_SPECIFIC
) {
8042 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) &&
8043 kev
->udata
== kn
->kn_udata
) {
8044 break; /* matching udata-specific knote */
8046 } else if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0) {
8047 break; /* matching non-udata-specific knote */
8056 * kq_add_knote- Add knote to the fd table for process
8057 * while checking for duplicates.
8059 * All file-based filters associate a list of knotes by file
8060 * descriptor index. All other filters hash the knote by ident.
8062 * May have to grow the table of knote lists to cover the
8063 * file descriptor index presented.
8065 * fd_knhashlock and fdlock unheld on entry (and exit).
8067 * Takes a rwlock boost if inserting the knote is successful.
8070 kq_add_knote(struct kqueue
*kq
, struct knote
*kn
,
8071 struct kevent_internal_s
*kev
,
8072 struct proc
*p
, int *knoteuse_flags
)
8074 struct filedesc
*fdp
= p
->p_fd
;
8075 struct klist
*list
= NULL
;
8077 bool is_fd
= knote_fops(kn
)->f_isfd
;
8084 if (knote_fdfind(kq
, kev
, is_fd
, p
) != NULL
) {
8085 /* found an existing knote: we can't add this one */
8090 /* knote was not found: add it now */
8092 if (fdp
->fd_knhashmask
== 0) {
8095 list
= hashinit(CONFIG_KN_HASHSIZE
, M_KQUEUE
,
8102 fdp
->fd_knhash
= list
;
8103 fdp
->fd_knhashmask
= size
;
8106 list
= &fdp
->fd_knhash
[KN_HASH(kn
->kn_id
, fdp
->fd_knhashmask
)];
8107 SLIST_INSERT_HEAD(list
, kn
, kn_link
);
8112 /* knote is fd based */
8114 if ((u_int
)fdp
->fd_knlistsize
<= kn
->kn_id
) {
8117 if (kn
->kn_id
>= (uint64_t)p
->p_rlimit
[RLIMIT_NOFILE
].rlim_cur
8118 || kn
->kn_id
>= (uint64_t)maxfiles
) {
8122 /* have to grow the fd_knlist */
8123 size
= fdp
->fd_knlistsize
;
8124 while (size
<= kn
->kn_id
)
8127 if (size
>= (UINT_MAX
/sizeof(struct klist
*))) {
8132 MALLOC(list
, struct klist
*,
8133 size
* sizeof(struct klist
*), M_KQUEUE
, M_WAITOK
);
8139 bcopy((caddr_t
)fdp
->fd_knlist
, (caddr_t
)list
,
8140 fdp
->fd_knlistsize
* sizeof(struct klist
*));
8141 bzero((caddr_t
)list
+
8142 fdp
->fd_knlistsize
* sizeof(struct klist
*),
8143 (size
- fdp
->fd_knlistsize
) * sizeof(struct klist
*));
8144 FREE(fdp
->fd_knlist
, M_KQUEUE
);
8145 fdp
->fd_knlist
= list
;
8146 fdp
->fd_knlistsize
= size
;
8149 list
= &fdp
->fd_knlist
[kn
->kn_id
];
8150 SLIST_INSERT_HEAD(list
, kn
, kn_link
);
8157 if (ret
== 0 && knoteuse_needs_boost(kn
, kev
)) {
8158 set_thread_rwlock_boost();
8159 *knoteuse_flags
= KNUSE_BOOST
;
8161 *knoteuse_flags
= KNUSE_NONE
;
8172 * kq_remove_knote - remove a knote from the fd table for process
8173 * and copy kn_status an kq_state while holding kqlock and
8176 * If the filter is file-based, remove based on fd index.
8177 * Otherwise remove from the hash based on the ident.
8179 * fd_knhashlock and fdlock unheld on entry (and exit).
8182 kq_remove_knote(struct kqueue
*kq
, struct knote
*kn
, struct proc
*p
,
8183 kn_status_t
*kn_status
, uint16_t *kq_state
)
8185 struct filedesc
*fdp
= p
->p_fd
;
8186 struct klist
*list
= NULL
;
8189 is_fd
= knote_fops(kn
)->f_isfd
;
8197 assert ((u_int
)fdp
->fd_knlistsize
> kn
->kn_id
);
8198 list
= &fdp
->fd_knlist
[kn
->kn_id
];
8200 list
= &fdp
->fd_knhash
[KN_HASH(kn
->kn_id
, fdp
->fd_knhashmask
)];
8202 SLIST_REMOVE(list
, kn
, knote
, kn_link
);
8205 *kn_status
= kn
->kn_status
;
8206 *kq_state
= kq
->kq_state
;
8216 * kq_find_knote_and_kq_lock - lookup a knote in the fd table for process
8217 * and, if the knote is found, acquires the kqlock while holding the fd table lock/spinlock.
8219 * fd_knhashlock or fdlock unheld on entry (and exit)
8222 static struct knote
*
8223 kq_find_knote_and_kq_lock(struct kqueue
*kq
,
8224 struct kevent_internal_s
*kev
,
8235 ret
= knote_fdfind(kq
, kev
, is_fd
, p
);
8249 * knote_drop - disconnect and drop the knote
8251 * Called with the kqueue unlocked and holding a
8252 * "drop reference" on the knote in question.
8253 * This reference is most often aquired thru a call
8254 * to kqlock2knotedrop(). But it can also be acquired
8255 * through stealing a drop reference via a call to
8256 * knoteuse2knotedrop() or during the initial attach
8259 * The knote may have already been detached from
8260 * (or not yet attached to) its source object.
8263 knote_drop(struct knote
*kn
, __unused
struct proc
*ctxp
)
8265 struct kqueue
*kq
= knote_get_kq(kn
);
8266 struct proc
*p
= kq
->kq_p
;
8267 kn_status_t kn_status
;
8270 /* If we are attached, disconnect from the source first */
8271 if (kn
->kn_status
& KN_ATTACHED
) {
8272 knote_fops(kn
)->f_detach(kn
);
8275 /* Remove the source from the appropriate hash */
8276 kq_remove_knote(kq
, kn
, p
, &kn_status
, &kq_state
);
8279 * If a kqueue_dealloc is happening in parallel for the kq
8280 * pointed by the knote the kq could be aready deallocated
8282 * Do not access the kq after the kq_remove_knote if it is
8286 /* determine if anyone needs to know about the drop */
8287 assert((kn_status
& (KN_DROPPING
| KN_SUPPRESSED
| KN_QUEUED
)) == KN_DROPPING
);
8290 * If KN_USEWAIT is set, some other thread was trying to drop the kn.
8291 * Or it was in kqueue_dealloc, so the kqueue_dealloc did not happen
8292 * because that thread was waiting on this wake, or it was a drop happening
8293 * because of a kevent_register that takes a reference on the kq, and therefore
8294 * the kq cannot be deallocated in parallel.
8296 * It is safe to access kq->kq_wqs if needswakeup is set.
8298 if (kn_status
& KN_USEWAIT
)
8299 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
8300 CAST_EVENT64_T(&kn
->kn_status
),
8302 WAITQ_ALL_PRIORITIES
);
8304 if (knote_fops(kn
)->f_isfd
&& ((kn
->kn_status
& KN_VANISHED
) == 0))
8305 fp_drop(p
, kn
->kn_id
, kn
->kn_fp
, 0);
8310 * release reference on dynamic kq (and free if last).
8311 * Will only be last if this is from fdfree, etc...
8312 * because otherwise processing thread has reference.
8314 if (kq_state
& KQ_DYNAMIC
)
8315 kqueue_release_last(p
, kq
);
8318 /* called with kqueue lock held */
8320 knote_activate(struct knote
*kn
)
8322 if (kn
->kn_status
& KN_ACTIVE
)
8325 KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KNOTE_ACTIVATE
),
8326 kn
->kn_udata
, kn
->kn_status
| (kn
->kn_id
<< 32),
8329 kn
->kn_status
|= KN_ACTIVE
;
8330 if (knote_enqueue(kn
))
8334 /* called with kqueue lock held */
8336 knote_deactivate(struct knote
*kn
)
8338 kn
->kn_status
&= ~KN_ACTIVE
;
8339 if ((kn
->kn_status
& KN_STAYACTIVE
) == 0)
8343 /* called with kqueue lock held */
8345 knote_enable(struct knote
*kn
)
8347 if ((kn
->kn_status
& KN_DISABLED
) == 0)
8350 kn
->kn_status
&= ~KN_DISABLED
;
8352 if (kn
->kn_status
& KN_SUPPRESSED
) {
8353 /* Clear the sync qos on the knote */
8354 knote_adjust_sync_qos(kn
, THREAD_QOS_UNSPECIFIED
, FALSE
);
8357 * it is possible for userland to have knotes registered for a given
8358 * workloop `wl_orig` but really handled on another workloop `wl_new`.
8360 * In that case, rearming will happen from the servicer thread of
8361 * `wl_new` which if `wl_orig` is no longer being serviced, would cause
8362 * this knote to stay suppressed forever if we only relied on
8363 * kqworkloop_acknowledge_events to be called by `wl_orig`.
8365 * However if we see the KQ_PROCESSING bit on `wl_orig` set, we can't
8366 * unsuppress because that would mess with the processing phase of
8367 * `wl_orig`, however it also means kqworkloop_acknowledge_events()
8370 struct kqueue
*kq
= knote_get_kq(kn
);
8371 if ((kq
->kq_state
& KQ_PROCESSING
) == 0) {
8372 knote_unsuppress(kn
);
8374 } else if (knote_enqueue(kn
)) {
8379 /* called with kqueue lock held */
8381 knote_disable(struct knote
*kn
)
8383 if (kn
->kn_status
& KN_DISABLED
)
8386 kn
->kn_status
|= KN_DISABLED
;
8390 /* called with kqueue lock held */
8392 knote_suppress(struct knote
*kn
)
8394 struct kqtailq
*suppressq
;
8395 struct kqueue
*kq
= knote_get_kq(kn
);
8399 if (kn
->kn_status
& KN_SUPPRESSED
)
8403 kn
->kn_status
|= KN_SUPPRESSED
;
8404 suppressq
= kqueue_get_suppressed_queue(kq
, knote_get_qos_index(kn
));
8405 TAILQ_INSERT_TAIL(suppressq
, kn
, kn_tqe
);
8407 if ((kq
->kq_state
& KQ_WORKLOOP
) &&
8408 knote_get_qos_override_index(kn
) == THREAD_QOS_USER_INTERACTIVE
&&
8409 kn
->kn_qos_override_is_sync
) {
8410 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
8411 /* update the sync qos override counter for suppressed knotes */
8412 kqworkloop_update_override(kqwl
, knote_get_qos_index(kn
),
8413 knote_get_qos_override_index(kn
),
8414 (KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
| KQWL_UO_NEW_OVERRIDE_IS_SYNC_UI
));
8418 /* called with kqueue lock held */
8420 knote_unsuppress(struct knote
*kn
)
8422 struct kqtailq
*suppressq
;
8423 struct kqueue
*kq
= knote_get_kq(kn
);
8427 if ((kn
->kn_status
& KN_SUPPRESSED
) == 0)
8430 /* Clear the sync qos on the knote */
8431 knote_adjust_sync_qos(kn
, THREAD_QOS_UNSPECIFIED
, FALSE
);
8433 kn
->kn_status
&= ~KN_SUPPRESSED
;
8434 suppressq
= kqueue_get_suppressed_queue(kq
, knote_get_qos_index(kn
));
8435 TAILQ_REMOVE(suppressq
, kn
, kn_tqe
);
8437 /* udate in-use qos to equal requested qos */
8438 kn
->kn_qos_index
= kn
->kn_req_index
;
8440 /* don't wakeup if unsuppressing just a stay-active knote */
8441 if (knote_enqueue(kn
) && (kn
->kn_status
& KN_ACTIVE
)) {
8445 if ((kq
->kq_state
& KQ_WORKLOOP
) && !(kq
->kq_state
& KQ_NO_WQ_THREAD
) &&
8446 knote_get_qos_override_index(kn
) == THREAD_QOS_USER_INTERACTIVE
&&
8447 kn
->kn_qos_override_is_sync
) {
8448 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
8450 /* update the sync qos override counter for suppressed knotes */
8451 kqworkloop_update_override(kqwl
, knote_get_qos_index(kn
),
8452 knote_get_qos_override_index(kn
),
8453 (KQWL_UO_UPDATE_SUPPRESS_SYNC_COUNTERS
| KQWL_UO_OLD_OVERRIDE_IS_SYNC_UI
));
8456 if (TAILQ_EMPTY(suppressq
) && (kq
->kq_state
& KQ_WORKLOOP
) &&
8457 !(kq
->kq_state
& KQ_NO_WQ_THREAD
)) {
8458 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
8459 if (kqworkloop_is_processing_on_current_thread(kqwl
)) {
8461 * kqworkloop_end_processing() will perform the required QoS
8462 * computations when it unsets the processing mode.
8465 kqwl_req_lock(kqwl
);
8466 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_RESET_WAKEUP_OVERRIDE
, 0);
8467 kqwl_req_unlock(kqwl
);
8472 /* called with kqueue lock held */
8474 knote_update_sync_override_state(struct knote
*kn
)
8476 struct kqtailq
*queue
= knote_get_queue(kn
);
8477 struct kqueue
*kq
= knote_get_kq(kn
);
8479 if (!(kq
->kq_state
& KQ_WORKLOOP
) ||
8480 knote_get_queue_index(kn
) != THREAD_QOS_USER_INTERACTIVE
)
8483 /* Update the sync ipc state on workloop */
8484 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
8485 boolean_t sync_ipc_override
= FALSE
;
8486 if (!TAILQ_EMPTY(queue
)) {
8487 struct knote
*kn_head
= TAILQ_FIRST(queue
);
8488 if (kn_head
->kn_qos_override_is_sync
)
8489 sync_ipc_override
= TRUE
;
8491 kqworkloop_update_sync_override_state(kqwl
, sync_ipc_override
);
8494 /* called with kqueue lock held */
8496 knote_enqueue(struct knote
*kn
)
8498 if ((kn
->kn_status
& (KN_ACTIVE
| KN_STAYACTIVE
)) == 0 ||
8499 (kn
->kn_status
& (KN_DISABLED
| KN_SUPPRESSED
| KN_DROPPING
)))
8502 if ((kn
->kn_status
& KN_QUEUED
) == 0) {
8503 struct kqtailq
*queue
= knote_get_queue(kn
);
8504 struct kqueue
*kq
= knote_get_kq(kn
);
8507 /* insert at head for sync ipc waiters */
8508 if (kn
->kn_qos_override_is_sync
) {
8509 TAILQ_INSERT_HEAD(queue
, kn
, kn_tqe
);
8511 TAILQ_INSERT_TAIL(queue
, kn
, kn_tqe
);
8513 kn
->kn_status
|= KN_QUEUED
;
8515 knote_update_sync_override_state(kn
);
8518 return ((kn
->kn_status
& KN_STAYACTIVE
) != 0);
8522 /* called with kqueue lock held */
8524 knote_dequeue(struct knote
*kn
)
8526 struct kqueue
*kq
= knote_get_kq(kn
);
8527 struct kqtailq
*queue
;
8531 if ((kn
->kn_status
& KN_QUEUED
) == 0)
8534 queue
= knote_get_queue(kn
);
8535 TAILQ_REMOVE(queue
, kn
, kn_tqe
);
8536 kn
->kn_status
&= ~KN_QUEUED
;
8538 knote_update_sync_override_state(kn
);
8544 knote_zone
= zinit(sizeof(struct knote
), 8192*sizeof(struct knote
),
8545 8192, "knote zone");
8547 kqfile_zone
= zinit(sizeof(struct kqfile
), 8192*sizeof(struct kqfile
),
8548 8192, "kqueue file zone");
8550 kqworkq_zone
= zinit(sizeof(struct kqworkq
), 8192*sizeof(struct kqworkq
),
8551 8192, "kqueue workq zone");
8553 kqworkloop_zone
= zinit(sizeof(struct kqworkloop
), 8192*sizeof(struct kqworkloop
),
8554 8192, "kqueue workloop zone");
8556 /* allocate kq lock group attribute and group */
8557 kq_lck_grp_attr
= lck_grp_attr_alloc_init();
8559 kq_lck_grp
= lck_grp_alloc_init("kqueue", kq_lck_grp_attr
);
8561 /* Allocate kq lock attribute */
8562 kq_lck_attr
= lck_attr_alloc_init();
8564 /* Initialize the timer filter lock */
8565 lck_mtx_init(&_filt_timerlock
, kq_lck_grp
, kq_lck_attr
);
8567 /* Initialize the user filter lock */
8568 lck_spin_init(&_filt_userlock
, kq_lck_grp
, kq_lck_attr
);
8570 #if CONFIG_MEMORYSTATUS
8571 /* Initialize the memorystatus list lock */
8572 memorystatus_kevent_init(kq_lck_grp
, kq_lck_attr
);
8575 SYSINIT(knote
, SI_SUB_PSEUDO
, SI_ORDER_ANY
, knote_init
, NULL
)
8577 const struct filterops
*
8578 knote_fops(struct knote
*kn
)
8580 return sysfilt_ops
[kn
->kn_filtid
];
8583 static struct knote
*
8587 kn
= ((struct knote
*)zalloc(knote_zone
));
8588 *kn
= (struct knote
) { .kn_qos_override
= 0, .kn_qos_sync_override
= 0, .kn_qos_override_is_sync
= 0 };
8593 knote_free(struct knote
*kn
)
8595 zfree(knote_zone
, kn
);
8599 #include <sys/param.h>
8600 #include <sys/socket.h>
8601 #include <sys/protosw.h>
8602 #include <sys/domain.h>
8603 #include <sys/mbuf.h>
8604 #include <sys/kern_event.h>
8605 #include <sys/malloc.h>
8606 #include <sys/sys_domain.h>
8607 #include <sys/syslog.h>
8610 #define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t))
8614 #define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n))
8617 static lck_grp_attr_t
*kev_lck_grp_attr
;
8618 static lck_attr_t
*kev_lck_attr
;
8619 static lck_grp_t
*kev_lck_grp
;
8620 static decl_lck_rw_data(,kev_lck_data
);
8621 static lck_rw_t
*kev_rwlock
= &kev_lck_data
;
8623 static int kev_attach(struct socket
*so
, int proto
, struct proc
*p
);
8624 static int kev_detach(struct socket
*so
);
8625 static int kev_control(struct socket
*so
, u_long cmd
, caddr_t data
,
8626 struct ifnet
*ifp
, struct proc
*p
);
8627 static lck_mtx_t
* event_getlock(struct socket
*, int);
8628 static int event_lock(struct socket
*, int, void *);
8629 static int event_unlock(struct socket
*, int, void *);
8631 static int event_sofreelastref(struct socket
*);
8632 static void kev_delete(struct kern_event_pcb
*);
8634 static struct pr_usrreqs event_usrreqs
= {
8635 .pru_attach
= kev_attach
,
8636 .pru_control
= kev_control
,
8637 .pru_detach
= kev_detach
,
8638 .pru_soreceive
= soreceive
,
8641 static struct protosw eventsw
[] = {
8643 .pr_type
= SOCK_RAW
,
8644 .pr_protocol
= SYSPROTO_EVENT
,
8645 .pr_flags
= PR_ATOMIC
,
8646 .pr_usrreqs
= &event_usrreqs
,
8647 .pr_lock
= event_lock
,
8648 .pr_unlock
= event_unlock
,
8649 .pr_getlock
= event_getlock
,
8653 __private_extern__
int kevt_getstat SYSCTL_HANDLER_ARGS
;
8654 __private_extern__
int kevt_pcblist SYSCTL_HANDLER_ARGS
;
8656 SYSCTL_NODE(_net_systm
, OID_AUTO
, kevt
,
8657 CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "Kernel event family");
8659 struct kevtstat kevtstat
;
8660 SYSCTL_PROC(_net_systm_kevt
, OID_AUTO
, stats
,
8661 CTLTYPE_STRUCT
| CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 0,
8662 kevt_getstat
, "S,kevtstat", "");
8664 SYSCTL_PROC(_net_systm_kevt
, OID_AUTO
, pcblist
,
8665 CTLTYPE_STRUCT
| CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 0,
8666 kevt_pcblist
, "S,xkevtpcb", "");
8669 event_getlock(struct socket
*so
, int flags
)
8671 #pragma unused(flags)
8672 struct kern_event_pcb
*ev_pcb
= (struct kern_event_pcb
*)so
->so_pcb
;
8674 if (so
->so_pcb
!= NULL
) {
8675 if (so
->so_usecount
< 0)
8676 panic("%s: so=%p usecount=%d lrh= %s\n", __func__
,
8677 so
, so
->so_usecount
, solockhistory_nr(so
));
8680 panic("%s: so=%p NULL NO so_pcb %s\n", __func__
,
8681 so
, solockhistory_nr(so
));
8684 return (&ev_pcb
->evp_mtx
);
8688 event_lock(struct socket
*so
, int refcount
, void *lr
)
8693 lr_saved
= __builtin_return_address(0);
8697 if (so
->so_pcb
!= NULL
) {
8698 lck_mtx_lock(&((struct kern_event_pcb
*)so
->so_pcb
)->evp_mtx
);
8700 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__
,
8701 so
, lr_saved
, solockhistory_nr(so
));
8705 if (so
->so_usecount
< 0) {
8706 panic("%s: so=%p so_pcb=%p lr=%p ref=%d lrh= %s\n", __func__
,
8707 so
, so
->so_pcb
, lr_saved
, so
->so_usecount
,
8708 solockhistory_nr(so
));
8715 so
->lock_lr
[so
->next_lock_lr
] = lr_saved
;
8716 so
->next_lock_lr
= (so
->next_lock_lr
+1) % SO_LCKDBG_MAX
;
8721 event_unlock(struct socket
*so
, int refcount
, void *lr
)
8724 lck_mtx_t
*mutex_held
;
8727 lr_saved
= __builtin_return_address(0);
8734 if (so
->so_usecount
< 0) {
8735 panic("%s: so=%p usecount=%d lrh= %s\n", __func__
,
8736 so
, so
->so_usecount
, solockhistory_nr(so
));
8739 if (so
->so_pcb
== NULL
) {
8740 panic("%s: so=%p NO PCB usecount=%d lr=%p lrh= %s\n", __func__
,
8741 so
, so
->so_usecount
, (void *)lr_saved
,
8742 solockhistory_nr(so
));
8745 mutex_held
= (&((struct kern_event_pcb
*)so
->so_pcb
)->evp_mtx
);
8747 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
8748 so
->unlock_lr
[so
->next_unlock_lr
] = lr_saved
;
8749 so
->next_unlock_lr
= (so
->next_unlock_lr
+1) % SO_LCKDBG_MAX
;
8751 if (so
->so_usecount
== 0) {
8752 VERIFY(so
->so_flags
& SOF_PCBCLEARING
);
8753 event_sofreelastref(so
);
8755 lck_mtx_unlock(mutex_held
);
8762 event_sofreelastref(struct socket
*so
)
8764 struct kern_event_pcb
*ev_pcb
= (struct kern_event_pcb
*)so
->so_pcb
;
8766 LCK_MTX_ASSERT(&(ev_pcb
->evp_mtx
), LCK_MTX_ASSERT_OWNED
);
8771 * Disable upcall in the event another thread is in kev_post_msg()
8772 * appending record to the receive socket buffer, since sbwakeup()
8773 * may release the socket lock otherwise.
8775 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
8776 so
->so_snd
.sb_flags
&= ~SB_UPCALL
;
8777 so
->so_event
= sonullevent
;
8778 lck_mtx_unlock(&(ev_pcb
->evp_mtx
));
8780 LCK_MTX_ASSERT(&(ev_pcb
->evp_mtx
), LCK_MTX_ASSERT_NOTOWNED
);
8781 lck_rw_lock_exclusive(kev_rwlock
);
8782 LIST_REMOVE(ev_pcb
, evp_link
);
8783 kevtstat
.kes_pcbcount
--;
8784 kevtstat
.kes_gencnt
++;
8785 lck_rw_done(kev_rwlock
);
8788 sofreelastref(so
, 1);
8792 static int event_proto_count
= (sizeof (eventsw
) / sizeof (struct protosw
));
8795 struct kern_event_head kern_event_head
;
8797 static u_int32_t static_event_id
= 0;
8799 #define EVPCB_ZONE_MAX 65536
8800 #define EVPCB_ZONE_NAME "kerneventpcb"
8801 static struct zone
*ev_pcb_zone
;
8804 * Install the protosw's for the NKE manager. Invoked at extension load time
8807 kern_event_init(struct domain
*dp
)
8812 VERIFY(!(dp
->dom_flags
& DOM_INITIALIZED
));
8813 VERIFY(dp
== systemdomain
);
8815 kev_lck_grp_attr
= lck_grp_attr_alloc_init();
8816 if (kev_lck_grp_attr
== NULL
) {
8817 panic("%s: lck_grp_attr_alloc_init failed\n", __func__
);
8821 kev_lck_grp
= lck_grp_alloc_init("Kernel Event Protocol",
8823 if (kev_lck_grp
== NULL
) {
8824 panic("%s: lck_grp_alloc_init failed\n", __func__
);
8828 kev_lck_attr
= lck_attr_alloc_init();
8829 if (kev_lck_attr
== NULL
) {
8830 panic("%s: lck_attr_alloc_init failed\n", __func__
);
8834 lck_rw_init(kev_rwlock
, kev_lck_grp
, kev_lck_attr
);
8835 if (kev_rwlock
== NULL
) {
8836 panic("%s: lck_mtx_alloc_init failed\n", __func__
);
8840 for (i
= 0, pr
= &eventsw
[0]; i
< event_proto_count
; i
++, pr
++)
8841 net_add_proto(pr
, dp
, 1);
8843 ev_pcb_zone
= zinit(sizeof(struct kern_event_pcb
),
8844 EVPCB_ZONE_MAX
* sizeof(struct kern_event_pcb
), 0, EVPCB_ZONE_NAME
);
8845 if (ev_pcb_zone
== NULL
) {
8846 panic("%s: failed allocating ev_pcb_zone", __func__
);
8849 zone_change(ev_pcb_zone
, Z_EXPAND
, TRUE
);
8850 zone_change(ev_pcb_zone
, Z_CALLERACCT
, TRUE
);
8854 kev_attach(struct socket
*so
, __unused
int proto
, __unused
struct proc
*p
)
8857 struct kern_event_pcb
*ev_pcb
;
8859 error
= soreserve(so
, KEV_SNDSPACE
, KEV_RECVSPACE
);
8863 if ((ev_pcb
= (struct kern_event_pcb
*)zalloc(ev_pcb_zone
)) == NULL
) {
8866 bzero(ev_pcb
, sizeof(struct kern_event_pcb
));
8867 lck_mtx_init(&ev_pcb
->evp_mtx
, kev_lck_grp
, kev_lck_attr
);
8869 ev_pcb
->evp_socket
= so
;
8870 ev_pcb
->evp_vendor_code_filter
= 0xffffffff;
8872 so
->so_pcb
= (caddr_t
) ev_pcb
;
8873 lck_rw_lock_exclusive(kev_rwlock
);
8874 LIST_INSERT_HEAD(&kern_event_head
, ev_pcb
, evp_link
);
8875 kevtstat
.kes_pcbcount
++;
8876 kevtstat
.kes_gencnt
++;
8877 lck_rw_done(kev_rwlock
);
8883 kev_delete(struct kern_event_pcb
*ev_pcb
)
8885 VERIFY(ev_pcb
!= NULL
);
8886 lck_mtx_destroy(&ev_pcb
->evp_mtx
, kev_lck_grp
);
8887 zfree(ev_pcb_zone
, ev_pcb
);
8891 kev_detach(struct socket
*so
)
8893 struct kern_event_pcb
*ev_pcb
= (struct kern_event_pcb
*) so
->so_pcb
;
8895 if (ev_pcb
!= NULL
) {
8896 soisdisconnected(so
);
8897 so
->so_flags
|= SOF_PCBCLEARING
;
8904 * For now, kev_vendor_code and mbuf_tags use the same
8907 errno_t
kev_vendor_code_find(
8909 u_int32_t
*out_vendor_code
)
8911 if (strlen(string
) >= KEV_VENDOR_CODE_MAX_STR_LEN
) {
8914 return (net_str_id_find_internal(string
, out_vendor_code
,
8915 NSI_VENDOR_CODE
, 1));
8919 kev_msg_post(struct kev_msg
*event_msg
)
8921 mbuf_tag_id_t min_vendor
, max_vendor
;
8923 net_str_id_first_last(&min_vendor
, &max_vendor
, NSI_VENDOR_CODE
);
8925 if (event_msg
== NULL
)
8929 * Limit third parties to posting events for registered vendor codes
8932 if (event_msg
->vendor_code
< min_vendor
||
8933 event_msg
->vendor_code
> max_vendor
) {
8934 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_badvendor
);
8937 return (kev_post_msg(event_msg
));
8941 kev_post_msg(struct kev_msg
*event_msg
)
8943 struct mbuf
*m
, *m2
;
8944 struct kern_event_pcb
*ev_pcb
;
8945 struct kern_event_msg
*ev
;
8947 u_int32_t total_size
;
8950 /* Verify the message is small enough to fit in one mbuf w/o cluster */
8951 total_size
= KEV_MSG_HEADER_SIZE
;
8953 for (i
= 0; i
< 5; i
++) {
8954 if (event_msg
->dv
[i
].data_length
== 0)
8956 total_size
+= event_msg
->dv
[i
].data_length
;
8959 if (total_size
> MLEN
) {
8960 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_toobig
);
8964 m
= m_get(M_WAIT
, MT_DATA
);
8966 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_nomem
);
8969 ev
= mtod(m
, struct kern_event_msg
*);
8970 total_size
= KEV_MSG_HEADER_SIZE
;
8972 tmp
= (char *) &ev
->event_data
[0];
8973 for (i
= 0; i
< 5; i
++) {
8974 if (event_msg
->dv
[i
].data_length
== 0)
8977 total_size
+= event_msg
->dv
[i
].data_length
;
8978 bcopy(event_msg
->dv
[i
].data_ptr
, tmp
,
8979 event_msg
->dv
[i
].data_length
);
8980 tmp
+= event_msg
->dv
[i
].data_length
;
8983 ev
->id
= ++static_event_id
;
8984 ev
->total_size
= total_size
;
8985 ev
->vendor_code
= event_msg
->vendor_code
;
8986 ev
->kev_class
= event_msg
->kev_class
;
8987 ev
->kev_subclass
= event_msg
->kev_subclass
;
8988 ev
->event_code
= event_msg
->event_code
;
8990 m
->m_len
= total_size
;
8991 lck_rw_lock_shared(kev_rwlock
);
8992 for (ev_pcb
= LIST_FIRST(&kern_event_head
);
8994 ev_pcb
= LIST_NEXT(ev_pcb
, evp_link
)) {
8995 lck_mtx_lock(&ev_pcb
->evp_mtx
);
8996 if (ev_pcb
->evp_socket
->so_pcb
== NULL
) {
8997 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9000 if (ev_pcb
->evp_vendor_code_filter
!= KEV_ANY_VENDOR
) {
9001 if (ev_pcb
->evp_vendor_code_filter
!= ev
->vendor_code
) {
9002 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9006 if (ev_pcb
->evp_class_filter
!= KEV_ANY_CLASS
) {
9007 if (ev_pcb
->evp_class_filter
!= ev
->kev_class
) {
9008 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9012 if ((ev_pcb
->evp_subclass_filter
!=
9013 KEV_ANY_SUBCLASS
) &&
9014 (ev_pcb
->evp_subclass_filter
!=
9015 ev
->kev_subclass
)) {
9016 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9022 m2
= m_copym(m
, 0, m
->m_len
, M_WAIT
);
9024 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_nomem
);
9026 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9027 lck_rw_done(kev_rwlock
);
9030 if (sbappendrecord(&ev_pcb
->evp_socket
->so_rcv
, m2
)) {
9032 * We use "m" for the socket stats as it would be
9033 * unsafe to use "m2"
9035 so_inc_recv_data_stat(ev_pcb
->evp_socket
,
9036 1, m
->m_len
, MBUF_TC_BE
);
9038 sorwakeup(ev_pcb
->evp_socket
);
9039 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_posted
);
9041 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_fullsock
);
9043 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9046 lck_rw_done(kev_rwlock
);
9052 kev_control(struct socket
*so
,
9055 __unused
struct ifnet
*ifp
,
9056 __unused
struct proc
*p
)
9058 struct kev_request
*kev_req
= (struct kev_request
*) data
;
9059 struct kern_event_pcb
*ev_pcb
;
9060 struct kev_vendor_code
*kev_vendor
;
9061 u_int32_t
*id_value
= (u_int32_t
*) data
;
9065 *id_value
= static_event_id
;
9068 ev_pcb
= (struct kern_event_pcb
*) so
->so_pcb
;
9069 ev_pcb
->evp_vendor_code_filter
= kev_req
->vendor_code
;
9070 ev_pcb
->evp_class_filter
= kev_req
->kev_class
;
9071 ev_pcb
->evp_subclass_filter
= kev_req
->kev_subclass
;
9074 ev_pcb
= (struct kern_event_pcb
*) so
->so_pcb
;
9075 kev_req
->vendor_code
= ev_pcb
->evp_vendor_code_filter
;
9076 kev_req
->kev_class
= ev_pcb
->evp_class_filter
;
9077 kev_req
->kev_subclass
= ev_pcb
->evp_subclass_filter
;
9079 case SIOCGKEVVENDOR
:
9080 kev_vendor
= (struct kev_vendor_code
*)data
;
9081 /* Make sure string is NULL terminated */
9082 kev_vendor
->vendor_string
[KEV_VENDOR_CODE_MAX_STR_LEN
-1] = 0;
9083 return (net_str_id_find_internal(kev_vendor
->vendor_string
,
9084 &kev_vendor
->vendor_code
, NSI_VENDOR_CODE
, 0));
9093 kevt_getstat SYSCTL_HANDLER_ARGS
9095 #pragma unused(oidp, arg1, arg2)
9098 lck_rw_lock_shared(kev_rwlock
);
9100 if (req
->newptr
!= USER_ADDR_NULL
) {
9104 if (req
->oldptr
== USER_ADDR_NULL
) {
9105 req
->oldidx
= sizeof(struct kevtstat
);
9109 error
= SYSCTL_OUT(req
, &kevtstat
,
9110 MIN(sizeof(struct kevtstat
), req
->oldlen
));
9112 lck_rw_done(kev_rwlock
);
9117 __private_extern__
int
9118 kevt_pcblist SYSCTL_HANDLER_ARGS
9120 #pragma unused(oidp, arg1, arg2)
9123 struct xsystmgen xsg
;
9125 size_t item_size
= ROUNDUP64(sizeof (struct xkevtpcb
)) +
9126 ROUNDUP64(sizeof (struct xsocket_n
)) +
9127 2 * ROUNDUP64(sizeof (struct xsockbuf_n
)) +
9128 ROUNDUP64(sizeof (struct xsockstat_n
));
9129 struct kern_event_pcb
*ev_pcb
;
9131 buf
= _MALLOC(item_size
, M_TEMP
, M_WAITOK
| M_ZERO
);
9135 lck_rw_lock_shared(kev_rwlock
);
9137 n
= kevtstat
.kes_pcbcount
;
9139 if (req
->oldptr
== USER_ADDR_NULL
) {
9140 req
->oldidx
= (n
+ n
/8) * item_size
;
9143 if (req
->newptr
!= USER_ADDR_NULL
) {
9147 bzero(&xsg
, sizeof (xsg
));
9148 xsg
.xg_len
= sizeof (xsg
);
9150 xsg
.xg_gen
= kevtstat
.kes_gencnt
;
9151 xsg
.xg_sogen
= so_gencnt
;
9152 error
= SYSCTL_OUT(req
, &xsg
, sizeof (xsg
));
9157 * We are done if there is no pcb
9164 for (i
= 0, ev_pcb
= LIST_FIRST(&kern_event_head
);
9165 i
< n
&& ev_pcb
!= NULL
;
9166 i
++, ev_pcb
= LIST_NEXT(ev_pcb
, evp_link
)) {
9167 struct xkevtpcb
*xk
= (struct xkevtpcb
*)buf
;
9168 struct xsocket_n
*xso
= (struct xsocket_n
*)
9169 ADVANCE64(xk
, sizeof (*xk
));
9170 struct xsockbuf_n
*xsbrcv
= (struct xsockbuf_n
*)
9171 ADVANCE64(xso
, sizeof (*xso
));
9172 struct xsockbuf_n
*xsbsnd
= (struct xsockbuf_n
*)
9173 ADVANCE64(xsbrcv
, sizeof (*xsbrcv
));
9174 struct xsockstat_n
*xsostats
= (struct xsockstat_n
*)
9175 ADVANCE64(xsbsnd
, sizeof (*xsbsnd
));
9177 bzero(buf
, item_size
);
9179 lck_mtx_lock(&ev_pcb
->evp_mtx
);
9181 xk
->kep_len
= sizeof(struct xkevtpcb
);
9182 xk
->kep_kind
= XSO_EVT
;
9183 xk
->kep_evtpcb
= (uint64_t)VM_KERNEL_ADDRPERM(ev_pcb
);
9184 xk
->kep_vendor_code_filter
= ev_pcb
->evp_vendor_code_filter
;
9185 xk
->kep_class_filter
= ev_pcb
->evp_class_filter
;
9186 xk
->kep_subclass_filter
= ev_pcb
->evp_subclass_filter
;
9188 sotoxsocket_n(ev_pcb
->evp_socket
, xso
);
9189 sbtoxsockbuf_n(ev_pcb
->evp_socket
?
9190 &ev_pcb
->evp_socket
->so_rcv
: NULL
, xsbrcv
);
9191 sbtoxsockbuf_n(ev_pcb
->evp_socket
?
9192 &ev_pcb
->evp_socket
->so_snd
: NULL
, xsbsnd
);
9193 sbtoxsockstat_n(ev_pcb
->evp_socket
, xsostats
);
9195 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
9197 error
= SYSCTL_OUT(req
, buf
, item_size
);
9202 * Give the user an updated idea of our state.
9203 * If the generation differs from what we told
9204 * her before, she knows that something happened
9205 * while we were processing this request, and it
9206 * might be necessary to retry.
9208 bzero(&xsg
, sizeof (xsg
));
9209 xsg
.xg_len
= sizeof (xsg
);
9211 xsg
.xg_gen
= kevtstat
.kes_gencnt
;
9212 xsg
.xg_sogen
= so_gencnt
;
9213 error
= SYSCTL_OUT(req
, &xsg
, sizeof (xsg
));
9220 lck_rw_done(kev_rwlock
);
9225 #endif /* SOCKETS */
9229 fill_kqueueinfo(struct kqueue
*kq
, struct kqueue_info
* kinfo
)
9231 struct vinfo_stat
* st
;
9233 st
= &kinfo
->kq_stat
;
9235 st
->vst_size
= kq
->kq_count
;
9236 if (kq
->kq_state
& KQ_KEV_QOS
)
9237 st
->vst_blksize
= sizeof(struct kevent_qos_s
);
9238 else if (kq
->kq_state
& KQ_KEV64
)
9239 st
->vst_blksize
= sizeof(struct kevent64_s
);
9241 st
->vst_blksize
= sizeof(struct kevent
);
9242 st
->vst_mode
= S_IFIFO
;
9243 st
->vst_ino
= (kq
->kq_state
& KQ_DYNAMIC
) ?
9244 ((struct kqworkloop
*)kq
)->kqwl_dynamicid
: 0;
9246 /* flags exported to libproc as PROC_KQUEUE_* (sys/proc_info.h) */
9247 #define PROC_KQUEUE_MASK (KQ_SEL|KQ_SLEEP|KQ_KEV32|KQ_KEV64|KQ_KEV_QOS|KQ_WORKQ|KQ_WORKLOOP)
9248 kinfo
->kq_state
= kq
->kq_state
& PROC_KQUEUE_MASK
;
9254 fill_kqueue_dyninfo(struct kqueue
*kq
, struct kqueue_dyninfo
*kqdi
)
9256 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
9257 struct kqrequest
*kqr
= &kqwl
->kqwl_request
;
9260 if ((kq
->kq_state
& KQ_WORKLOOP
) == 0) {
9264 if ((err
= fill_kqueueinfo(kq
, &kqdi
->kqdi_info
))) {
9268 kqwl_req_lock(kqwl
);
9270 if (kqr
->kqr_thread
) {
9271 kqdi
->kqdi_servicer
= thread_tid(kqr
->kqr_thread
);
9274 if (kqwl
->kqwl_owner
== WL_OWNER_SUSPENDED
) {
9275 kqdi
->kqdi_owner
= ~0ull;
9277 kqdi
->kqdi_owner
= thread_tid(kqwl
->kqwl_owner
);
9280 kqdi
->kqdi_request_state
= kqr
->kqr_state
;
9281 kqdi
->kqdi_async_qos
= kqr
->kqr_qos_index
;
9282 kqdi
->kqdi_events_qos
= kqr
->kqr_override_index
;
9283 kqdi
->kqdi_sync_waiters
= kqr
->kqr_dsync_waiters
;
9284 kqdi
->kqdi_sync_waiter_qos
= kqr
->kqr_dsync_waiters_qos
;
9286 kqwl_req_unlock(kqwl
);
9293 knote_markstayactive(struct knote
*kn
)
9295 struct kqueue
*kq
= knote_get_kq(kn
);
9298 kn
->kn_status
|= KN_STAYACTIVE
;
9301 * Making a knote stay active is a property of the knote that must be
9302 * established before it is fully attached.
9304 assert(kn
->kn_status
& KN_ATTACHING
);
9306 /* handle all stayactive knotes on the (appropriate) manager */
9307 if (kq
->kq_state
& KQ_WORKQ
) {
9308 knote_set_qos_index(kn
, KQWQ_QOS_MANAGER
);
9309 } else if (kq
->kq_state
& KQ_WORKLOOP
) {
9310 struct kqworkloop
*kqwl
= (struct kqworkloop
*)kq
;
9311 kqwl_req_lock(kqwl
);
9312 assert(kn
->kn_req_index
&& kn
->kn_req_index
< THREAD_QOS_LAST
);
9313 kqworkloop_update_threads_qos(kqwl
, KQWL_UTQ_UPDATE_STAYACTIVE_QOS
,
9315 kqwl_req_unlock(kqwl
);
9316 knote_set_qos_index(kn
, KQWL_BUCKET_STAYACTIVE
);
9324 knote_clearstayactive(struct knote
*kn
)
9326 kqlock(knote_get_kq(kn
));
9327 kn
->kn_status
&= ~KN_STAYACTIVE
;
9328 knote_deactivate(kn
);
9329 kqunlock(knote_get_kq(kn
));
9332 static unsigned long
9333 kevent_extinfo_emit(struct kqueue
*kq
, struct knote
*kn
, struct kevent_extinfo
*buf
,
9334 unsigned long buflen
, unsigned long nknotes
)
9336 for (; kn
; kn
= SLIST_NEXT(kn
, kn_link
)) {
9337 if (kq
== knote_get_kq(kn
)) {
9338 if (nknotes
< buflen
) {
9339 struct kevent_extinfo
*info
= &buf
[nknotes
];
9340 struct kevent_internal_s
*kevp
= &kn
->kn_kevent
;
9344 info
->kqext_kev
= (struct kevent_qos_s
){
9345 .ident
= kevp
->ident
,
9346 .filter
= kevp
->filter
,
9347 .flags
= kevp
->flags
,
9348 .fflags
= kevp
->fflags
,
9349 .data
= (int64_t)kevp
->data
,
9350 .udata
= kevp
->udata
,
9351 .ext
[0] = kevp
->ext
[0],
9352 .ext
[1] = kevp
->ext
[1],
9353 .ext
[2] = kevp
->ext
[2],
9354 .ext
[3] = kevp
->ext
[3],
9355 .qos
= kn
->kn_req_index
,
9357 info
->kqext_sdata
= kn
->kn_sdata
;
9358 info
->kqext_status
= kn
->kn_status
;
9359 info
->kqext_sfflags
= kn
->kn_sfflags
;
9364 /* we return total number of knotes, which may be more than requested */
9373 kevent_copyout_proc_dynkqids(void *proc
, user_addr_t ubuf
, uint32_t ubufsize
,
9374 int32_t *nkqueues_out
)
9376 proc_t p
= (proc_t
)proc
;
9377 struct filedesc
*fdp
= p
->p_fd
;
9378 unsigned int nkqueues
= 0;
9379 unsigned long ubuflen
= ubufsize
/ sizeof(kqueue_id_t
);
9380 size_t buflen
, bufsize
;
9381 kqueue_id_t
*kq_ids
= NULL
;
9386 if (ubuf
== USER_ADDR_NULL
&& ubufsize
!= 0) {
9391 buflen
= min(ubuflen
, PROC_PIDDYNKQUEUES_MAX
);
9394 if (os_mul_overflow(sizeof(kqueue_id_t
), buflen
, &bufsize
)) {
9398 kq_ids
= kalloc(bufsize
);
9399 assert(kq_ids
!= NULL
);
9404 if (fdp
->fd_kqhashmask
> 0) {
9405 for (uint32_t i
= 0; i
< fdp
->fd_kqhashmask
+ 1; i
++) {
9406 struct kqworkloop
*kqwl
;
9408 SLIST_FOREACH(kqwl
, &fdp
->fd_kqhash
[i
], kqwl_hashlink
) {
9409 /* report the number of kqueues, even if they don't all fit */
9410 if (nkqueues
< buflen
) {
9411 kq_ids
[nkqueues
] = kqwl
->kqwl_dynamicid
;
9422 if (os_mul_overflow(sizeof(kqueue_id_t
), min(ubuflen
, nkqueues
), ©size
)) {
9427 assert(ubufsize
>= copysize
);
9428 err
= copyout(kq_ids
, ubuf
, copysize
);
9433 kfree(kq_ids
, bufsize
);
9437 *nkqueues_out
= (int)min(nkqueues
, PROC_PIDDYNKQUEUES_MAX
);
9443 kevent_copyout_dynkqinfo(void *proc
, kqueue_id_t kq_id
, user_addr_t ubuf
,
9444 uint32_t ubufsize
, int32_t *size_out
)
9446 proc_t p
= (proc_t
)proc
;
9449 struct kqueue_dyninfo kqdi
= { };
9453 if (ubufsize
< sizeof(struct kqueue_info
)) {
9458 kq
= kqueue_hash_lookup(p
, kq_id
);
9467 * backward compatibility: allow the argument to this call to only be
9468 * a struct kqueue_info
9470 if (ubufsize
>= sizeof(struct kqueue_dyninfo
)) {
9471 ubufsize
= sizeof(struct kqueue_dyninfo
);
9472 err
= fill_kqueue_dyninfo(kq
, &kqdi
);
9474 ubufsize
= sizeof(struct kqueue_info
);
9475 err
= fill_kqueueinfo(kq
, &kqdi
.kqdi_info
);
9477 if (err
== 0 && (err
= copyout(&kqdi
, ubuf
, ubufsize
)) == 0) {
9478 *size_out
= ubufsize
;
9480 kqueue_release_last(p
, kq
);
9485 kevent_copyout_dynkqextinfo(void *proc
, kqueue_id_t kq_id
, user_addr_t ubuf
,
9486 uint32_t ubufsize
, int32_t *nknotes_out
)
9488 proc_t p
= (proc_t
)proc
;
9495 kq
= kqueue_hash_lookup(p
, kq_id
);
9503 err
= pid_kqueue_extinfo(p
, kq
, ubuf
, ubufsize
, nknotes_out
);
9504 kqueue_release_last(p
, kq
);
9509 pid_kqueue_extinfo(proc_t p
, struct kqueue
*kq
, user_addr_t ubuf
,
9510 uint32_t bufsize
, int32_t *retval
)
9515 struct filedesc
*fdp
= p
->p_fd
;
9516 unsigned long nknotes
= 0;
9517 unsigned long buflen
= bufsize
/ sizeof(struct kevent_extinfo
);
9518 struct kevent_extinfo
*kqext
= NULL
;
9520 /* arbitrary upper limit to cap kernel memory usage, copyout size, etc. */
9521 buflen
= min(buflen
, PROC_PIDFDKQUEUE_KNOTES_MAX
);
9523 kqext
= kalloc(buflen
* sizeof(struct kevent_extinfo
));
9524 if (kqext
== NULL
) {
9528 bzero(kqext
, buflen
* sizeof(struct kevent_extinfo
));
9531 for (i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
9532 kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
]);
9533 nknotes
= kevent_extinfo_emit(kq
, kn
, kqext
, buflen
, nknotes
);
9537 if (fdp
->fd_knhashmask
!= 0) {
9538 for (i
= 0; i
< (int)fdp
->fd_knhashmask
+ 1; i
++) {
9540 kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
]);
9541 nknotes
= kevent_extinfo_emit(kq
, kn
, kqext
, buflen
, nknotes
);
9546 assert(bufsize
>= sizeof(struct kevent_extinfo
) * min(buflen
, nknotes
));
9547 err
= copyout(kqext
, ubuf
, sizeof(struct kevent_extinfo
) * min(buflen
, nknotes
));
9551 kfree(kqext
, buflen
* sizeof(struct kevent_extinfo
));
9556 *retval
= min(nknotes
, PROC_PIDFDKQUEUE_KNOTES_MAX
);
9562 klist_copy_udata(struct klist
*list
, uint64_t *buf
,
9563 unsigned int buflen
, unsigned int nknotes
)
9565 struct kevent_internal_s
*kev
;
9567 SLIST_FOREACH(kn
, list
, kn_link
) {
9568 if (nknotes
< buflen
) {
9569 struct kqueue
*kq
= knote_get_kq(kn
);
9571 kev
= &(kn
->kn_kevent
);
9572 buf
[nknotes
] = kev
->udata
;
9575 /* we return total number of knotes, which may be more than requested */
9583 kqlist_copy_dynamicids(__assert_only proc_t p
, struct kqlist
*list
,
9584 uint64_t *buf
, unsigned int buflen
, unsigned int nids
)
9586 kqhash_lock_held(p
);
9587 struct kqworkloop
*kqwl
;
9588 SLIST_FOREACH(kqwl
, list
, kqwl_hashlink
) {
9589 if (nids
< buflen
) {
9590 buf
[nids
] = kqwl
->kqwl_dynamicid
;
9598 kevent_proc_copy_uptrs(void *proc
, uint64_t *buf
, int bufsize
)
9600 proc_t p
= (proc_t
)proc
;
9601 struct filedesc
*fdp
= p
->p_fd
;
9602 unsigned int nuptrs
= 0;
9603 unsigned long buflen
= bufsize
/ sizeof(uint64_t);
9606 assert(buf
!= NULL
);
9610 for (int i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
9611 nuptrs
= klist_copy_udata(&fdp
->fd_knlist
[i
], buf
, buflen
, nuptrs
);
9615 if (fdp
->fd_knhashmask
!= 0) {
9616 for (int i
= 0; i
< (int)fdp
->fd_knhashmask
+ 1; i
++) {
9617 nuptrs
= klist_copy_udata(&fdp
->fd_knhash
[i
], buf
, buflen
, nuptrs
);
9623 if (fdp
->fd_kqhashmask
!= 0) {
9624 for (int i
= 0; i
< (int)fdp
->fd_kqhashmask
+ 1; i
++) {
9625 nuptrs
= kqlist_copy_dynamicids(p
, &fdp
->fd_kqhash
[i
], buf
, buflen
,
9635 kevent_redrive_proc_thread_request(proc_t p
)
9637 __assert_only
int ret
;
9638 ret
= (*pthread_functions
->workq_threadreq
)(p
, NULL
, WORKQ_THREADREQ_REDRIVE
, 0, 0);
9639 assert(ret
== 0 || ret
== ECANCELED
);
9643 kevent_set_return_to_kernel_user_tsd(proc_t p
, thread_t thread
)
9646 bool proc_is_64bit
= !!(p
->p_flag
& P_LP64
);
9647 size_t user_addr_size
= proc_is_64bit
? 8 : 4;
9648 uint32_t ast_flags32
= 0;
9649 uint64_t ast_flags64
= 0;
9650 struct uthread
*ut
= get_bsdthread_info(thread
);
9652 if (ut
->uu_kqueue_bound
!= NULL
) {
9653 if (ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKLOOP
) {
9654 ast_flags64
|= R2K_WORKLOOP_PENDING_EVENTS
;
9655 } else if (ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ
) {
9656 ast_flags64
|= R2K_WORKQ_PENDING_EVENTS
;
9660 if (ast_flags64
== 0) {
9664 if (!(p
->p_flag
& P_LP64
)) {
9665 ast_flags32
= (uint32_t)ast_flags64
;
9666 assert(ast_flags64
< 0x100000000ull
);
9669 ast_addr
= thread_rettokern_addr(thread
);
9670 if (ast_addr
== 0) {
9674 if (copyout((proc_is_64bit
? (void *)&ast_flags64
: (void *)&ast_flags32
),
9675 (user_addr_t
)ast_addr
,
9676 user_addr_size
) != 0) {
9677 printf("pid %d (tid:%llu): copyout of return_to_kernel ast flags failed with "
9678 "ast_addr = %llu\n", p
->p_pid
, thread_tid(current_thread()), ast_addr
);
9683 kevent_ast(thread_t thread
, uint16_t bits
)
9685 proc_t p
= current_proc();
9687 if (bits
& AST_KEVENT_REDRIVE_THREADREQ
) {
9688 kevent_redrive_proc_thread_request(p
);
9690 if (bits
& AST_KEVENT_RETURN_TO_KERNEL
) {
9691 kevent_set_return_to_kernel_user_tsd(p
, thread
);
9695 #if DEVELOPMENT || DEBUG
9697 #define KEVENT_SYSCTL_BOUND_ID 1
9700 kevent_sysctl SYSCTL_HANDLER_ARGS
9702 #pragma unused(oidp, arg2)
9703 uintptr_t type
= (uintptr_t)arg1
;
9704 uint64_t bound_id
= 0;
9708 if (type
!= KEVENT_SYSCTL_BOUND_ID
) {
9716 ut
= get_bsdthread_info(current_thread());
9721 kq
= ut
->uu_kqueue_bound
;
9723 if (kq
->kq_state
& KQ_WORKLOOP
) {
9724 bound_id
= ((struct kqworkloop
*)kq
)->kqwl_dynamicid
;
9725 } else if (kq
->kq_state
& KQ_WORKQ
) {
9730 return sysctl_io_number(req
, bound_id
, sizeof(bound_id
), NULL
, NULL
);
9733 SYSCTL_NODE(_kern
, OID_AUTO
, kevent
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0,
9734 "kevent information");
9736 SYSCTL_PROC(_kern_kevent
, OID_AUTO
, bound_id
,
9737 CTLTYPE_QUAD
| CTLFLAG_RD
| CTLFLAG_LOCKED
| CTLFLAG_MASKED
,
9738 (void *)KEVENT_SYSCTL_BOUND_ID
,
9739 sizeof(kqueue_id_t
), kevent_sysctl
, "Q",
9740 "get the ID of the bound kqueue");
9742 #endif /* DEVELOPMENT || DEBUG */