2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
31 * All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * @(#)kern_event.c 1.0 (3/31/2000)
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/filedesc.h>
62 #include <sys/kernel.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/malloc.h>
66 #include <sys/unistd.h>
67 #include <sys/file_internal.h>
68 #include <sys/fcntl.h>
69 #include <sys/select.h>
70 #include <sys/queue.h>
71 #include <sys/event.h>
72 #include <sys/eventvar.h>
73 #include <sys/protosw.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
77 #include <sys/sysctl.h>
79 #include <sys/sysproto.h>
81 #include <sys/vnode_internal.h>
83 #include <sys/proc_info.h>
84 #include <sys/codesign.h>
85 #include <sys/pthread_shims.h>
87 #include <kern/locks.h>
88 #include <kern/clock.h>
89 #include <kern/policy_internal.h>
90 #include <kern/thread_call.h>
91 #include <kern/sched_prim.h>
92 #include <kern/waitq.h>
93 #include <kern/zalloc.h>
94 #include <kern/kalloc.h>
95 #include <kern/assert.h>
97 #include <machine/spl.h>
99 #include <libkern/libkern.h>
100 #include "net/net_str_id.h"
102 #include <mach/task.h>
104 #if CONFIG_MEMORYSTATUS
105 #include <sys/kern_memorystatus.h>
109 * JMM - this typedef needs to be unified with pthread_priority_t
110 * and mach_msg_priority_t. It also needs to be the same type
113 typedef int32_t qos_t
;
115 MALLOC_DEFINE(M_KQUEUE
, "kqueue", "memory for kqueue system");
117 #define KQ_EVENT NO_EVENT64
119 static inline void kqlock(struct kqueue
*kq
);
120 static inline void kqunlock(struct kqueue
*kq
);
122 static int kqlock2knoteuse(struct kqueue
*kq
, struct knote
*kn
);
123 static int kqlock2knotedrop(struct kqueue
*kq
, struct knote
*kn
);
124 static int kqlock2knotedetach(struct kqueue
*kq
, struct knote
*kn
);
125 static int knoteuse2kqlock(struct kqueue
*kq
, struct knote
*kn
, int defer_drop
);
127 static int kqueue_read(struct fileproc
*fp
, struct uio
*uio
,
128 int flags
, vfs_context_t ctx
);
129 static int kqueue_write(struct fileproc
*fp
, struct uio
*uio
,
130 int flags
, vfs_context_t ctx
);
131 static int kqueue_ioctl(struct fileproc
*fp
, u_long com
, caddr_t data
,
133 static int kqueue_select(struct fileproc
*fp
, int which
, void *wq_link_id
,
135 static int kqueue_close(struct fileglob
*fg
, vfs_context_t ctx
);
136 static int kqueue_kqfilter(struct fileproc
*fp
, struct knote
*kn
,
138 static int kqueue_drain(struct fileproc
*fp
, vfs_context_t ctx
);
140 static const struct fileops kqueueops
= {
141 .fo_type
= DTYPE_KQUEUE
,
142 .fo_read
= kqueue_read
,
143 .fo_write
= kqueue_write
,
144 .fo_ioctl
= kqueue_ioctl
,
145 .fo_select
= kqueue_select
,
146 .fo_close
= kqueue_close
,
147 .fo_kqfilter
= kqueue_kqfilter
,
148 .fo_drain
= kqueue_drain
,
151 static int kevent_internal(struct proc
*p
, int fd
,
152 user_addr_t changelist
, int nchanges
,
153 user_addr_t eventlist
, int nevents
,
154 user_addr_t data_out
, uint64_t data_available
,
155 unsigned int flags
, user_addr_t utimeout
,
156 kqueue_continue_t continuation
,
158 static int kevent_copyin(user_addr_t
*addrp
, struct kevent_internal_s
*kevp
,
159 struct proc
*p
, unsigned int flags
);
160 static int kevent_copyout(struct kevent_internal_s
*kevp
, user_addr_t
*addrp
,
161 struct proc
*p
, unsigned int flags
);
162 char * kevent_description(struct kevent_internal_s
*kevp
, char *s
, size_t n
);
164 static void kqueue_interrupt(struct kqueue
*kq
);
165 static int kevent_callback(struct kqueue
*kq
, struct kevent_internal_s
*kevp
,
167 static void kevent_continue(struct kqueue
*kq
, void *data
, int error
);
168 static void kqueue_scan_continue(void *contp
, wait_result_t wait_result
);
169 static int kqueue_process(struct kqueue
*kq
, kevent_callback_t callback
, void *callback_data
,
170 struct filt_process_s
*process_data
, kq_index_t servicer_qos_index
,
171 int *countp
, struct proc
*p
);
172 static int kqueue_begin_processing(struct kqueue
*kq
, kq_index_t qos_index
, unsigned int flags
);
173 static void kqueue_end_processing(struct kqueue
*kq
, kq_index_t qos_index
, unsigned int flags
);
174 static struct kqtailq
*kqueue_get_base_queue(struct kqueue
*kq
, kq_index_t qos_index
);
175 static struct kqtailq
*kqueue_get_high_queue(struct kqueue
*kq
, kq_index_t qos_index
);
176 static int kqueue_queue_empty(struct kqueue
*kq
, kq_index_t qos_index
);
178 static struct kqtailq
*kqueue_get_suppressed_queue(struct kqueue
*kq
, kq_index_t qos_index
);
180 static void kqworkq_request_thread(struct kqworkq
*kqwq
, kq_index_t qos_index
);
181 static void kqworkq_request_help(struct kqworkq
*kqwq
, kq_index_t qos_index
, uint32_t type
);
182 static void kqworkq_update_override(struct kqworkq
*kqwq
, kq_index_t qos_index
, kq_index_t override_index
);
183 static void kqworkq_bind_thread(struct kqworkq
*kqwq
, kq_index_t qos_index
, thread_t thread
, unsigned int flags
);
184 static void kqworkq_unbind_thread(struct kqworkq
*kqwq
, kq_index_t qos_index
, thread_t thread
, unsigned int flags
);
185 static struct kqrequest
*kqworkq_get_request(struct kqworkq
*kqwq
, kq_index_t qos_index
);
188 static int knote_process(struct knote
*kn
, kevent_callback_t callback
, void *callback_data
,
189 struct filt_process_s
*process_data
, struct proc
*p
);
191 static void knote_put(struct knote
*kn
);
194 static int knote_fdadd(struct knote
*kn
, struct proc
*p
);
195 static void knote_fdremove(struct knote
*kn
, struct proc
*p
);
196 static struct knote
*knote_fdfind(struct kqueue
*kq
, struct kevent_internal_s
*kev
, struct proc
*p
);
198 static void knote_drop(struct knote
*kn
, struct proc
*p
);
199 static struct knote
*knote_alloc(void);
200 static void knote_free(struct knote
*kn
);
202 static void knote_activate(struct knote
*kn
);
203 static void knote_deactivate(struct knote
*kn
);
205 static void knote_enable(struct knote
*kn
);
206 static void knote_disable(struct knote
*kn
);
208 static int knote_enqueue(struct knote
*kn
);
209 static void knote_dequeue(struct knote
*kn
);
211 static void knote_suppress(struct knote
*kn
);
212 static void knote_unsuppress(struct knote
*kn
);
213 static void knote_wakeup(struct knote
*kn
);
215 static kq_index_t
knote_get_queue_index(struct knote
*kn
);
216 static struct kqtailq
*knote_get_queue(struct knote
*kn
);
217 static struct kqtailq
*knote_get_suppressed_queue(struct knote
*kn
);
218 static kq_index_t
knote_get_req_index(struct knote
*kn
);
219 static kq_index_t
knote_get_qos_index(struct knote
*kn
);
220 static void knote_set_qos_index(struct knote
*kn
, kq_index_t qos_index
);
221 static kq_index_t
knote_get_qos_override_index(struct knote
*kn
);
222 static void knote_set_qos_override_index(struct knote
*kn
, kq_index_t qos_index
);
224 static int filt_fileattach(struct knote
*kn
);
225 static struct filterops file_filtops
= {
227 .f_attach
= filt_fileattach
,
230 static void filt_kqdetach(struct knote
*kn
);
231 static int filt_kqueue(struct knote
*kn
, long hint
);
232 static int filt_kqtouch(struct knote
*kn
, struct kevent_internal_s
*kev
);
233 static int filt_kqprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
);
234 static struct filterops kqread_filtops
= {
236 .f_detach
= filt_kqdetach
,
237 .f_event
= filt_kqueue
,
238 .f_touch
= filt_kqtouch
,
239 .f_process
= filt_kqprocess
,
242 /* placeholder for not-yet-implemented filters */
243 static int filt_badattach(struct knote
*kn
);
244 static struct filterops bad_filtops
= {
245 .f_attach
= filt_badattach
,
248 static int filt_procattach(struct knote
*kn
);
249 static void filt_procdetach(struct knote
*kn
);
250 static int filt_proc(struct knote
*kn
, long hint
);
251 static int filt_proctouch(struct knote
*kn
, struct kevent_internal_s
*kev
);
252 static int filt_procprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
);
253 static struct filterops proc_filtops
= {
254 .f_attach
= filt_procattach
,
255 .f_detach
= filt_procdetach
,
256 .f_event
= filt_proc
,
257 .f_touch
= filt_proctouch
,
258 .f_process
= filt_procprocess
,
261 #if CONFIG_MEMORYSTATUS
262 extern struct filterops memorystatus_filtops
;
263 #endif /* CONFIG_MEMORYSTATUS */
265 extern struct filterops fs_filtops
;
267 extern struct filterops sig_filtops
;
270 static int filt_timerattach(struct knote
*kn
);
271 static void filt_timerdetach(struct knote
*kn
);
272 static int filt_timer(struct knote
*kn
, long hint
);
273 static int filt_timertouch(struct knote
*kn
, struct kevent_internal_s
*kev
);
274 static int filt_timerprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
);
275 static struct filterops timer_filtops
= {
276 .f_attach
= filt_timerattach
,
277 .f_detach
= filt_timerdetach
,
278 .f_event
= filt_timer
,
279 .f_touch
= filt_timertouch
,
280 .f_process
= filt_timerprocess
,
284 static void filt_timerexpire(void *knx
, void *param1
);
285 static int filt_timervalidate(struct knote
*kn
);
286 static void filt_timerupdate(struct knote
*kn
, int num_fired
);
287 static void filt_timercancel(struct knote
*kn
);
289 #define TIMER_RUNNING 0x1
290 #define TIMER_CANCELWAIT 0x2
292 static lck_mtx_t _filt_timerlock
;
293 static void filt_timerlock(void);
294 static void filt_timerunlock(void);
296 static zone_t knote_zone
;
297 static zone_t kqfile_zone
;
298 static zone_t kqworkq_zone
;
300 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
303 extern struct filterops aio_filtops
;
306 /* Mach portset filter */
307 extern struct filterops machport_filtops
;
310 static int filt_userattach(struct knote
*kn
);
311 static void filt_userdetach(struct knote
*kn
);
312 static int filt_user(struct knote
*kn
, long hint
);
313 static int filt_usertouch(struct knote
*kn
, struct kevent_internal_s
*kev
);
314 static int filt_userprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
);
315 static struct filterops user_filtops
= {
316 .f_attach
= filt_userattach
,
317 .f_detach
= filt_userdetach
,
318 .f_event
= filt_user
,
319 .f_touch
= filt_usertouch
,
320 .f_process
= filt_userprocess
,
323 static lck_spin_t _filt_userlock
;
324 static void filt_userlock(void);
325 static void filt_userunlock(void);
327 extern struct filterops pipe_rfiltops
;
328 extern struct filterops pipe_wfiltops
;
329 extern struct filterops ptsd_kqops
;
330 extern struct filterops soread_filtops
;
331 extern struct filterops sowrite_filtops
;
332 extern struct filterops sock_filtops
;
333 extern struct filterops soexcept_filtops
;
334 extern struct filterops spec_filtops
;
335 extern struct filterops bpfread_filtops
;
336 extern struct filterops necp_fd_rfiltops
;
337 extern struct filterops skywalk_channel_rfiltops
;
338 extern struct filterops skywalk_channel_wfiltops
;
339 extern struct filterops fsevent_filtops
;
340 extern struct filterops vnode_filtops
;
344 * Rules for adding new filters to the system:
346 * - Add a new "EVFILT_" option value to bsd/sys/event.h (typically a negative value)
347 * in the exported section of the header
348 * - Update the EVFILT_SYSCOUNT value to reflect the new addition
349 * - Add a filterops to the sysfilt_ops array. Public filters should be added at the end
350 * of the Public Filters section in the array.
352 * - Add a new "EVFILT_" value to bsd/sys/event.h (typically a positive value)
353 * in the XNU_KERNEL_PRIVATE section of the header
354 * - Update the EVFILTID_MAX value to reflect the new addition
355 * - Add a filterops to the sysfilt_ops. Private filters should be added at the end of
356 * the Private filters section of the array.
358 static struct filterops
*sysfilt_ops
[EVFILTID_MAX
] = {
360 [~EVFILT_READ
] = &file_filtops
,
361 [~EVFILT_WRITE
] = &file_filtops
,
362 [~EVFILT_AIO
] = &bad_filtops
,
363 [~EVFILT_VNODE
] = &file_filtops
,
364 [~EVFILT_PROC
] = &proc_filtops
,
365 [~EVFILT_SIGNAL
] = &sig_filtops
,
366 [~EVFILT_TIMER
] = &timer_filtops
,
367 [~EVFILT_MACHPORT
] = &machport_filtops
,
368 [~EVFILT_FS
] = &fs_filtops
,
369 [~EVFILT_USER
] = &user_filtops
,
372 [~EVFILT_SOCK
] = &file_filtops
,
373 #if CONFIG_MEMORYSTATUS
374 [~EVFILT_MEMORYSTATUS
] = &memorystatus_filtops
,
376 [~EVFILT_MEMORYSTATUS
] = &bad_filtops
,
378 [~EVFILT_EXCEPT
] = &file_filtops
,
380 /* Private filters */
381 [EVFILTID_KQREAD
] = &kqread_filtops
,
382 [EVFILTID_PIPE_R
] = &pipe_rfiltops
,
383 [EVFILTID_PIPE_W
] = &pipe_wfiltops
,
384 [EVFILTID_PTSD
] = &ptsd_kqops
,
385 [EVFILTID_SOREAD
] = &soread_filtops
,
386 [EVFILTID_SOWRITE
] = &sowrite_filtops
,
387 [EVFILTID_SCK
] = &sock_filtops
,
388 [EVFILTID_SOEXCEPT
] = &soexcept_filtops
,
389 [EVFILTID_SPEC
] = &spec_filtops
,
390 [EVFILTID_BPFREAD
] = &bpfread_filtops
,
391 [EVFILTID_NECP_FD
] = &necp_fd_rfiltops
,
392 [EVFILTID_FSEVENT
] = &fsevent_filtops
,
393 [EVFILTID_VN
] = &vnode_filtops
396 /* waitq prepost callback */
397 void waitq_set__CALLING_PREPOST_HOOK__(void *kq_hook
, void *knote_hook
, int qos
);
399 #ifndef _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
400 #define _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG 0x02000000 /* pthread event manager bit */
402 #ifndef _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
403 #define _PTHREAD_PRIORITY_OVERCOMMIT_FLAG 0x80000000 /* request overcommit threads */
405 #ifndef _PTHREAD_PRIORITY_QOS_CLASS_MASK
406 #define _PTHREAD_PRIORITY_QOS_CLASS_MASK 0x003fff00 /* QoS class mask */
408 #ifndef _PTHREAD_PRIORITY_QOS_CLASS_SHIFT_32
409 #define _PTHREAD_PRIORITY_QOS_CLASS_SHIFT_32 8
413 qos_t
canonicalize_kevent_qos(qos_t qos
)
415 unsigned long canonical
;
417 /* preserve manager and overcommit flags in this case */
418 canonical
= pthread_priority_canonicalize(qos
, FALSE
);
419 return (qos_t
)canonical
;
423 kq_index_t
qos_index_from_qos(qos_t qos
, boolean_t propagation
)
425 kq_index_t qos_index
;
426 unsigned long flags
= 0;
428 qos_index
= (kq_index_t
)thread_qos_from_pthread_priority(
429 (unsigned long)qos
, &flags
);
431 if (!propagation
&& (flags
& _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
))
432 return KQWQ_QOS_MANAGER
;
438 qos_t
qos_from_qos_index(kq_index_t qos_index
)
440 if (qos_index
== KQWQ_QOS_MANAGER
)
441 return _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
;
444 return 0; /* Unspecified */
446 /* Should have support from pthread kext support */
447 return (1 << (qos_index
- 1 +
448 _PTHREAD_PRIORITY_QOS_CLASS_SHIFT_32
));
452 kq_index_t
qos_index_for_servicer(int qos_class
, thread_t thread
, int flags
)
454 kq_index_t qos_index
;
456 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
)
457 return KQWQ_QOS_MANAGER
;
460 * If the caller didn't pass in a class (legacy pthread kext)
461 * the we use the thread policy QoS of the current thread.
463 assert(qos_class
!= -1);
465 qos_index
= proc_get_thread_policy(thread
,
466 TASK_POLICY_ATTRIBUTE
,
469 qos_index
= (kq_index_t
)qos_class
;
471 assert(qos_index
> 0 && qos_index
< KQWQ_NQOS
);
477 * kqueue/note lock implementations
479 * The kqueue lock guards the kq state, the state of its queues,
480 * and the kqueue-aware status and use counts of individual knotes.
482 * The kqueue workq lock is used to protect state guarding the
483 * interaction of the kqueue with the workq. This state cannot
484 * be guarded by the kq lock - as it needs to be taken when we
485 * already have the waitq set lock held (during the waitq hook
486 * callback). It might be better to use the waitq lock itself
487 * for this, but the IRQ requirements make that difficult).
489 * Knote flags, filter flags, and associated data are protected
490 * by the underlying object lock - and are only ever looked at
491 * by calling the filter to get a [consistent] snapshot of that
494 lck_grp_attr_t
* kq_lck_grp_attr
;
495 lck_grp_t
* kq_lck_grp
;
496 lck_attr_t
* kq_lck_attr
;
499 kqlock(struct kqueue
*kq
)
501 lck_spin_lock(&kq
->kq_lock
);
505 kqunlock(struct kqueue
*kq
)
507 lck_spin_unlock(&kq
->kq_lock
);
512 * Convert a kq lock to a knote use referece.
514 * If the knote is being dropped, or has
515 * vanished, we can't get a use reference.
516 * Just return with it still locked.
518 * - kq locked at entry
519 * - unlock on exit if we get the use reference
522 kqlock2knoteuse(struct kqueue
*kq
, struct knote
*kn
)
524 if (kn
->kn_status
& (KN_DROPPING
| KN_VANISHED
))
527 assert(kn
->kn_status
& KN_ATTACHED
);
535 * Convert from a knote use reference back to kq lock.
537 * Drop a use reference and wake any waiters if
538 * this is the last one.
540 * If someone is trying to drop the knote, but the
541 * caller has events they must deliver, take
542 * responsibility for the drop later - and wake the
543 * other attempted dropper in a manner that informs
544 * him of the transfer of responsibility.
546 * The exit return indicates if the knote is still alive
547 * (or if not, the other dropper has been given the green
550 * The kqueue lock is re-taken unconditionally.
553 knoteuse2kqlock(struct kqueue
*kq
, struct knote
*kn
, int steal_drop
)
558 if (--kn
->kn_inuse
== 0) {
560 if ((kn
->kn_status
& KN_ATTACHING
) != 0) {
561 kn
->kn_status
&= ~KN_ATTACHING
;
564 if ((kn
->kn_status
& KN_USEWAIT
) != 0) {
565 wait_result_t result
;
567 /* If we need to, try and steal the drop */
568 if (kn
->kn_status
& KN_DROPPING
) {
569 if (steal_drop
&& !(kn
->kn_status
& KN_STOLENDROP
)) {
570 kn
->kn_status
|= KN_STOLENDROP
;
576 /* wakeup indicating if ANY USE stole the drop */
577 result
= (kn
->kn_status
& KN_STOLENDROP
) ?
578 THREAD_RESTART
: THREAD_AWAKENED
;
580 kn
->kn_status
&= ~KN_USEWAIT
;
581 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
582 CAST_EVENT64_T(&kn
->kn_status
),
584 WAITQ_ALL_PRIORITIES
);
586 /* should have seen use-wait if dropping with use refs */
587 assert((kn
->kn_status
& (KN_DROPPING
|KN_STOLENDROP
)) == 0);
590 } else if (kn
->kn_status
& KN_DROPPING
) {
591 /* not the last ref but want to steal a drop if present */
592 if (steal_drop
&& ((kn
->kn_status
& KN_STOLENDROP
) == 0)) {
593 kn
->kn_status
|= KN_STOLENDROP
;
595 /* but we now have to wait to be the last ref */
596 kn
->kn_status
|= KN_USEWAIT
;
597 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
598 CAST_EVENT64_T(&kn
->kn_status
),
599 THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
);
601 thread_block(THREAD_CONTINUE_NULL
);
612 * Convert a kq lock to a knote use reference
613 * (for the purpose of detaching AND vanishing it).
615 * If the knote is being dropped, we can't get
616 * a detach reference, so wait for the knote to
617 * finish dropping before returning.
619 * If the knote is being used for other purposes,
620 * we cannot detach it until those uses are done
621 * as well. Again, just wait for them to finish
622 * (caller will start over at lookup).
624 * - kq locked at entry
628 kqlock2knotedetach(struct kqueue
*kq
, struct knote
*kn
)
630 if ((kn
->kn_status
& KN_DROPPING
) || kn
->kn_inuse
) {
631 /* have to wait for dropper or current uses to go away */
632 kn
->kn_status
|= KN_USEWAIT
;
633 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
634 CAST_EVENT64_T(&kn
->kn_status
),
635 THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
);
637 thread_block(THREAD_CONTINUE_NULL
);
640 assert((kn
->kn_status
& KN_VANISHED
) == 0);
641 assert(kn
->kn_status
& KN_ATTACHED
);
642 kn
->kn_status
&= ~KN_ATTACHED
;
643 kn
->kn_status
|= KN_VANISHED
;
650 * Convert a kq lock to a knote drop reference.
652 * If the knote is in use, wait for the use count
653 * to subside. We first mark our intention to drop
654 * it - keeping other users from "piling on."
655 * If we are too late, we have to wait for the
656 * other drop to complete.
658 * - kq locked at entry
659 * - always unlocked on exit.
660 * - caller can't hold any locks that would prevent
661 * the other dropper from completing.
664 kqlock2knotedrop(struct kqueue
*kq
, struct knote
*kn
)
667 wait_result_t result
;
669 oktodrop
= ((kn
->kn_status
& (KN_DROPPING
| KN_ATTACHING
)) == 0);
670 /* if another thread is attaching, they will become the dropping thread */
671 kn
->kn_status
|= KN_DROPPING
;
672 knote_unsuppress(kn
);
675 if (kn
->kn_inuse
== 0) {
680 kn
->kn_status
|= KN_USEWAIT
;
681 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
682 CAST_EVENT64_T(&kn
->kn_status
),
683 THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
);
685 result
= thread_block(THREAD_CONTINUE_NULL
);
686 /* THREAD_RESTART == another thread stole the knote drop */
687 return (result
== THREAD_AWAKENED
);
692 * Release a knote use count reference.
695 knote_put(struct knote
*kn
)
697 struct kqueue
*kq
= knote_get_kq(kn
);
700 if (--kn
->kn_inuse
== 0) {
701 if ((kn
->kn_status
& KN_USEWAIT
) != 0) {
702 kn
->kn_status
&= ~KN_USEWAIT
;
703 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
704 CAST_EVENT64_T(&kn
->kn_status
),
706 WAITQ_ALL_PRIORITIES
);
714 filt_fileattach(struct knote
*kn
)
716 return (fo_kqfilter(kn
->kn_fp
, kn
, vfs_context_current()));
719 #define f_flag f_fglob->fg_flag
720 #define f_msgcount f_fglob->fg_msgcount
721 #define f_cred f_fglob->fg_cred
722 #define f_ops f_fglob->fg_ops
723 #define f_offset f_fglob->fg_offset
724 #define f_data f_fglob->fg_data
727 filt_kqdetach(struct knote
*kn
)
729 struct kqfile
*kqf
= (struct kqfile
*)kn
->kn_fp
->f_data
;
730 struct kqueue
*kq
= &kqf
->kqf_kqueue
;
733 KNOTE_DETACH(&kqf
->kqf_sel
.si_note
, kn
);
739 filt_kqueue(struct knote
*kn
, __unused
long hint
)
741 struct kqueue
*kq
= (struct kqueue
*)kn
->kn_fp
->f_data
;
744 count
= kq
->kq_count
;
749 filt_kqtouch(struct knote
*kn
, struct kevent_internal_s
*kev
)
752 struct kqueue
*kq
= (struct kqueue
*)kn
->kn_fp
->f_data
;
756 kn
->kn_data
= kq
->kq_count
;
757 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0)
758 kn
->kn_udata
= kev
->udata
;
759 res
= (kn
->kn_data
> 0);
767 filt_kqprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
)
770 struct kqueue
*kq
= (struct kqueue
*)kn
->kn_fp
->f_data
;
774 kn
->kn_data
= kq
->kq_count
;
775 res
= (kn
->kn_data
> 0);
777 *kev
= kn
->kn_kevent
;
778 if (kn
->kn_flags
& EV_CLEAR
)
787 filt_procattach(struct knote
*kn
)
791 assert(PID_MAX
< NOTE_PDATAMASK
);
793 if ((kn
->kn_sfflags
& (NOTE_TRACK
| NOTE_TRACKERR
| NOTE_CHILD
)) != 0) {
794 kn
->kn_flags
= EV_ERROR
;
795 kn
->kn_data
= ENOTSUP
;
799 p
= proc_find(kn
->kn_id
);
801 kn
->kn_flags
= EV_ERROR
;
806 const int NoteExitStatusBits
= NOTE_EXIT
| NOTE_EXITSTATUS
;
808 if ((kn
->kn_sfflags
& NoteExitStatusBits
) == NoteExitStatusBits
)
810 pid_t selfpid
= proc_selfpid();
812 if (p
->p_ppid
== selfpid
)
813 break; /* parent => ok */
815 if ((p
->p_lflag
& P_LTRACED
) != 0 &&
816 (p
->p_oppid
== selfpid
))
817 break; /* parent-in-waiting => ok */
820 kn
->kn_flags
= EV_ERROR
;
821 kn
->kn_data
= EACCES
;
827 kn
->kn_ptr
.p_proc
= p
; /* store the proc handle */
829 KNOTE_ATTACH(&p
->p_klist
, kn
);
836 * only captures edge-triggered events after this point
837 * so it can't already be fired.
844 * The knote may be attached to a different process, which may exit,
845 * leaving nothing for the knote to be attached to. In that case,
846 * the pointer to the process will have already been nulled out.
849 filt_procdetach(struct knote
*kn
)
855 p
= kn
->kn_ptr
.p_proc
;
856 if (p
!= PROC_NULL
) {
857 kn
->kn_ptr
.p_proc
= PROC_NULL
;
858 KNOTE_DETACH(&p
->p_klist
, kn
);
865 filt_proc(struct knote
*kn
, long hint
)
869 /* ALWAYS CALLED WITH proc_klist_lock */
872 * Note: a lot of bits in hint may be obtained from the knote
873 * To free some of those bits, see <rdar://problem/12592988> Freeing up
874 * bits in hint for filt_proc
876 * mask off extra data
878 event
= (u_int
)hint
& NOTE_PCTRLMASK
;
881 * termination lifecycle events can happen while a debugger
882 * has reparented a process, in which case notifications
883 * should be quashed except to the tracing parent. When
884 * the debugger reaps the child (either via wait4(2) or
885 * process exit), the child will be reparented to the original
886 * parent and these knotes re-fired.
888 if (event
& NOTE_EXIT
) {
889 if ((kn
->kn_ptr
.p_proc
->p_oppid
!= 0)
890 && (knote_get_kq(kn
)->kq_p
->p_pid
!= kn
->kn_ptr
.p_proc
->p_ppid
)) {
892 * This knote is not for the current ptrace(2) parent, ignore.
899 * if the user is interested in this event, record it.
901 if (kn
->kn_sfflags
& event
)
902 kn
->kn_fflags
|= event
;
904 #pragma clang diagnostic push
905 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
906 if ((event
== NOTE_REAP
) || ((event
== NOTE_EXIT
) && !(kn
->kn_sfflags
& NOTE_REAP
))) {
907 kn
->kn_flags
|= (EV_EOF
| EV_ONESHOT
);
909 #pragma clang diagnostic pop
913 * The kernel has a wrapper in place that returns the same data
914 * as is collected here, in kn_data. Any changes to how
915 * NOTE_EXITSTATUS and NOTE_EXIT_DETAIL are collected
916 * should also be reflected in the proc_pidnoteexit() wrapper.
918 if (event
== NOTE_EXIT
) {
920 if ((kn
->kn_sfflags
& NOTE_EXITSTATUS
) != 0) {
921 kn
->kn_fflags
|= NOTE_EXITSTATUS
;
922 kn
->kn_data
|= (hint
& NOTE_PDATAMASK
);
924 if ((kn
->kn_sfflags
& NOTE_EXIT_DETAIL
) != 0) {
925 kn
->kn_fflags
|= NOTE_EXIT_DETAIL
;
926 if ((kn
->kn_ptr
.p_proc
->p_lflag
&
927 P_LTERM_DECRYPTFAIL
) != 0) {
928 kn
->kn_data
|= NOTE_EXIT_DECRYPTFAIL
;
930 if ((kn
->kn_ptr
.p_proc
->p_lflag
&
931 P_LTERM_JETSAM
) != 0) {
932 kn
->kn_data
|= NOTE_EXIT_MEMORY
;
933 switch (kn
->kn_ptr
.p_proc
->p_lflag
& P_JETSAM_MASK
) {
934 case P_JETSAM_VMPAGESHORTAGE
:
935 kn
->kn_data
|= NOTE_EXIT_MEMORY_VMPAGESHORTAGE
;
937 case P_JETSAM_VMTHRASHING
:
938 kn
->kn_data
|= NOTE_EXIT_MEMORY_VMTHRASHING
;
940 case P_JETSAM_FCTHRASHING
:
941 kn
->kn_data
|= NOTE_EXIT_MEMORY_FCTHRASHING
;
944 kn
->kn_data
|= NOTE_EXIT_MEMORY_VNODE
;
947 kn
->kn_data
|= NOTE_EXIT_MEMORY_HIWAT
;
950 kn
->kn_data
|= NOTE_EXIT_MEMORY_PID
;
952 case P_JETSAM_IDLEEXIT
:
953 kn
->kn_data
|= NOTE_EXIT_MEMORY_IDLE
;
957 if ((kn
->kn_ptr
.p_proc
->p_csflags
&
959 kn
->kn_data
|= NOTE_EXIT_CSERROR
;
964 /* if we have any matching state, activate the knote */
965 return (kn
->kn_fflags
!= 0);
969 filt_proctouch(struct knote
*kn
, struct kevent_internal_s
*kev
)
975 /* accept new filter flags and mask off output events no long interesting */
976 kn
->kn_sfflags
= kev
->fflags
;
977 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0)
978 kn
->kn_udata
= kev
->udata
;
980 /* restrict the current results to the (smaller?) set of new interest */
982 * For compatibility with previous implementations, we leave kn_fflags
983 * as they were before.
985 //kn->kn_fflags &= kn->kn_sfflags;
987 res
= (kn
->kn_fflags
!= 0);
995 filt_procprocess(struct knote
*kn
, struct filt_process_s
*data
, struct kevent_internal_s
*kev
)
1001 res
= (kn
->kn_fflags
!= 0);
1003 *kev
= kn
->kn_kevent
;
1004 kn
->kn_flags
|= EV_CLEAR
; /* automatically set */
1008 proc_klist_unlock();
1013 * filt_timervalidate - process data from user
1015 * Converts to either interval or deadline format.
1017 * The saved-data field in the knote contains the
1018 * time value. The saved filter-flags indicates
1019 * the unit of measurement.
1021 * After validation, either the saved-data field
1022 * contains the interval in absolute time, or ext[0]
1023 * contains the expected deadline. If that deadline
1024 * is in the past, ext[0] is 0.
1026 * Returns EINVAL for unrecognized units of time.
1028 * Timer filter lock is held.
1032 filt_timervalidate(struct knote
*kn
)
1034 uint64_t multiplier
;
1037 switch (kn
->kn_sfflags
& (NOTE_SECONDS
|NOTE_USECONDS
|NOTE_NSECONDS
)) {
1039 multiplier
= NSEC_PER_SEC
;
1042 multiplier
= NSEC_PER_USEC
;
1047 case 0: /* milliseconds (default) */
1048 multiplier
= NSEC_PER_SEC
/ 1000;
1054 /* transform the slop delta(leeway) in kn_ext[1] if passed to same time scale */
1055 if(kn
->kn_sfflags
& NOTE_LEEWAY
){
1056 nanoseconds_to_absolutetime((uint64_t)kn
->kn_ext
[1] * multiplier
, &raw
);
1057 kn
->kn_ext
[1] = raw
;
1060 nanoseconds_to_absolutetime((uint64_t)kn
->kn_sdata
* multiplier
, &raw
);
1065 if (kn
->kn_sfflags
& NOTE_ABSOLUTE
) {
1066 clock_sec_t seconds
;
1067 clock_nsec_t nanoseconds
;
1070 clock_get_calendar_nanotime(&seconds
, &nanoseconds
);
1071 nanoseconds_to_absolutetime((uint64_t)seconds
* NSEC_PER_SEC
+
1074 /* if time is in the future */
1078 if (kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
) {
1079 clock_continuoustime_interval_to_deadline(raw
,
1082 clock_absolutetime_interval_to_deadline(raw
,
1094 * filt_timerupdate - compute the next deadline
1096 * Repeating timers store their interval in kn_sdata. Absolute
1097 * timers have already calculated the deadline, stored in ext[0].
1099 * On return, the next deadline (or zero if no deadline is needed)
1100 * is stored in kn_ext[0].
1102 * Timer filter lock is held.
1105 filt_timerupdate(struct knote
*kn
, int num_fired
)
1107 assert(num_fired
> 0);
1109 /* if there's no interval, deadline is just in kn_ext[0] */
1110 if (kn
->kn_sdata
== 0)
1113 /* if timer hasn't fired before, fire in interval nsecs */
1114 if (kn
->kn_ext
[0] == 0) {
1115 assert(num_fired
== 1);
1116 if (kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
) {
1117 clock_continuoustime_interval_to_deadline(kn
->kn_sdata
,
1120 clock_absolutetime_interval_to_deadline(kn
->kn_sdata
,
1125 * If timer has fired before, schedule the next pop
1126 * relative to the last intended deadline.
1128 * We could check for whether the deadline has expired,
1129 * but the thread call layer can handle that.
1131 * Go forward an additional number of periods, in the case the
1132 * timer fired multiple times while the system was asleep.
1134 kn
->kn_ext
[0] += (kn
->kn_sdata
* num_fired
);
1139 * filt_timerexpire - the timer callout routine
1141 * Just propagate the timer event into the knote
1142 * filter routine (by going through the knote
1143 * synchronization point). Pass a hint to
1144 * indicate this is a real event, not just a
1148 filt_timerexpire(void *knx
, __unused
void *spare
)
1150 struct klist timer_list
;
1151 struct knote
*kn
= knx
;
1155 kn
->kn_hookid
&= ~TIMER_RUNNING
;
1157 /* no "object" for timers, so fake a list */
1158 SLIST_INIT(&timer_list
);
1159 SLIST_INSERT_HEAD(&timer_list
, kn
, kn_selnext
);
1160 KNOTE(&timer_list
, 1);
1162 /* if someone is waiting for timer to pop */
1163 if (kn
->kn_hookid
& TIMER_CANCELWAIT
) {
1164 struct kqueue
*kq
= knote_get_kq(kn
);
1165 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
1166 CAST_EVENT64_T(&kn
->kn_hook
),
1168 WAITQ_ALL_PRIORITIES
);
1175 * Cancel a running timer (or wait for the pop).
1176 * Timer filter lock is held.
1179 filt_timercancel(struct knote
*kn
)
1181 struct kqueue
*kq
= knote_get_kq(kn
);
1182 thread_call_t callout
= kn
->kn_hook
;
1183 boolean_t cancelled
;
1185 if (kn
->kn_hookid
& TIMER_RUNNING
) {
1186 /* cancel the callout if we can */
1187 cancelled
= thread_call_cancel(callout
);
1189 kn
->kn_hookid
&= ~TIMER_RUNNING
;
1191 /* we have to wait for the expire routine. */
1192 kn
->kn_hookid
|= TIMER_CANCELWAIT
;
1193 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
1194 CAST_EVENT64_T(&kn
->kn_hook
),
1195 THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
);
1197 thread_block(THREAD_CONTINUE_NULL
);
1199 assert((kn
->kn_hookid
& TIMER_RUNNING
) == 0);
1205 * Allocate a thread call for the knote's lifetime, and kick off the timer.
1208 filt_timerattach(struct knote
*kn
)
1210 thread_call_t callout
;
1214 callout
= thread_call_allocate(filt_timerexpire
, kn
);
1215 if (NULL
== callout
) {
1216 kn
->kn_flags
= EV_ERROR
;
1217 kn
->kn_data
= ENOMEM
;
1222 error
= filt_timervalidate(kn
);
1225 thread_call_free(callout
);
1226 kn
->kn_flags
= EV_ERROR
;
1227 kn
->kn_data
= error
;
1231 kn
->kn_hook
= (void*)callout
;
1234 /* absolute=EV_ONESHOT */
1235 if (kn
->kn_sfflags
& NOTE_ABSOLUTE
)
1236 kn
->kn_flags
|= EV_ONESHOT
;
1238 filt_timerupdate(kn
, 1);
1239 if (kn
->kn_ext
[0]) {
1240 kn
->kn_flags
|= EV_CLEAR
;
1241 unsigned int timer_flags
= 0;
1242 if (kn
->kn_sfflags
& NOTE_CRITICAL
)
1243 timer_flags
|= THREAD_CALL_DELAY_USER_CRITICAL
;
1244 else if (kn
->kn_sfflags
& NOTE_BACKGROUND
)
1245 timer_flags
|= THREAD_CALL_DELAY_USER_BACKGROUND
;
1247 timer_flags
|= THREAD_CALL_DELAY_USER_NORMAL
;
1249 if (kn
->kn_sfflags
& NOTE_LEEWAY
)
1250 timer_flags
|= THREAD_CALL_DELAY_LEEWAY
;
1251 if (kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
)
1252 timer_flags
|= THREAD_CALL_CONTINUOUS
;
1254 thread_call_enter_delayed_with_leeway(callout
, NULL
,
1255 kn
->kn_ext
[0], kn
->kn_ext
[1], timer_flags
);
1257 kn
->kn_hookid
|= TIMER_RUNNING
;
1259 /* fake immediate */
1263 res
= (kn
->kn_data
> 0);
1271 * Shut down the timer if it's running, and free the callout.
1274 filt_timerdetach(struct knote
*kn
)
1276 thread_call_t callout
;
1280 callout
= (thread_call_t
)kn
->kn_hook
;
1281 filt_timercancel(kn
);
1285 thread_call_free(callout
);
1289 static int filt_timer_num_fired(struct knote
*kn
)
1291 /* by default we fire a timer once */
1295 * When the time base is mach_continuous_time, we have to calculate
1296 * the number of times the timer fired while we were asleep.
1298 if ((kn
->kn_sfflags
& NOTE_MACH_CONTINUOUS_TIME
) &&
1299 (kn
->kn_sdata
!= 0) &&
1300 (kn
->kn_ext
[0] != 0))
1302 const uint64_t now
= mach_continuous_time();
1303 // time for timer to fire (right now) is kn_ext[0]
1304 // kn_sdata is period for timer to fire
1305 assert(now
>= kn
->kn_ext
[0]);
1306 assert(kn
->kn_sdata
> 0);
1308 const uint64_t overrun_ticks
= now
- kn
->kn_ext
[0];
1309 const uint64_t kn_sdata
= kn
->kn_sdata
;
1311 if (overrun_ticks
< kn_sdata
) {
1313 } else if (overrun_ticks
< (kn_sdata
<< 1)) {
1316 num_fired
= (overrun_ticks
/ kn_sdata
) + 1;
1324 * filt_timer - post events to a timer knote
1326 * Count the timer fire and re-arm as requested.
1327 * This always crosses the threshold of interest,
1328 * so always return an indication that the knote
1329 * should be activated (if not already).
1336 #pragma unused(hint)
1338 /* real timer pop -- timer lock held by filt_timerexpire */
1339 int num_fired
= filt_timer_num_fired(kn
);
1340 kn
->kn_data
+= num_fired
;
1342 if (((kn
->kn_hookid
& TIMER_CANCELWAIT
) == 0) &&
1343 ((kn
->kn_flags
& EV_ONESHOT
) == 0)) {
1344 /* evaluate next time to fire */
1345 filt_timerupdate(kn
, num_fired
);
1347 if (kn
->kn_ext
[0]) {
1348 unsigned int timer_flags
= 0;
1350 /* keep the callout and re-arm */
1351 if (kn
->kn_sfflags
& NOTE_CRITICAL
)
1352 timer_flags
|= THREAD_CALL_DELAY_USER_CRITICAL
;
1353 else if (kn
->kn_sfflags
& NOTE_BACKGROUND
)
1354 timer_flags
|= THREAD_CALL_DELAY_USER_BACKGROUND
;
1356 timer_flags
|= THREAD_CALL_DELAY_USER_NORMAL
;
1358 if (kn
->kn_sfflags
& NOTE_LEEWAY
)
1359 timer_flags
|= THREAD_CALL_DELAY_LEEWAY
;
1361 thread_call_enter_delayed_with_leeway(kn
->kn_hook
, NULL
,
1362 kn
->kn_ext
[0], kn
->kn_ext
[1], timer_flags
);
1364 kn
->kn_hookid
|= TIMER_RUNNING
;
1373 * filt_timertouch - update timer knote with new user input
1375 * Cancel and restart the timer based on new user data. When
1376 * the user picks up a knote, clear the count of how many timer
1377 * pops have gone off (in kn_data).
1382 struct kevent_internal_s
*kev
)
1389 /* cancel current call */
1390 filt_timercancel(kn
);
1392 /* capture the new values used to compute deadline */
1393 kn
->kn_sdata
= kev
->data
;
1394 kn
->kn_sfflags
= kev
->fflags
;
1395 kn
->kn_ext
[0] = kev
->ext
[0];
1396 kn
->kn_ext
[1] = kev
->ext
[1];
1398 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0)
1399 kn
->kn_udata
= kev
->udata
;
1401 /* recalculate deadline */
1402 error
= filt_timervalidate(kn
);
1404 /* no way to report error, so mark it in the knote */
1406 kn
->kn_flags
|= EV_ERROR
;
1407 kn
->kn_data
= error
;
1411 /* start timer if necessary */
1412 filt_timerupdate(kn
, 1);
1414 if (kn
->kn_ext
[0]) {
1415 unsigned int timer_flags
= 0;
1416 if (kn
->kn_sfflags
& NOTE_CRITICAL
)
1417 timer_flags
|= THREAD_CALL_DELAY_USER_CRITICAL
;
1418 else if (kn
->kn_sfflags
& NOTE_BACKGROUND
)
1419 timer_flags
|= THREAD_CALL_DELAY_USER_BACKGROUND
;
1421 timer_flags
|= THREAD_CALL_DELAY_USER_NORMAL
;
1423 if (kn
->kn_sfflags
& NOTE_LEEWAY
)
1424 timer_flags
|= THREAD_CALL_DELAY_LEEWAY
;
1426 thread_call_enter_delayed_with_leeway(kn
->kn_hook
, NULL
,
1427 kn
->kn_ext
[0], kn
->kn_ext
[1], timer_flags
);
1429 kn
->kn_hookid
|= TIMER_RUNNING
;
1431 /* pretend the timer has fired */
1435 /* capture if already fired */
1436 res
= (kn
->kn_data
> 0);
1444 * filt_timerprocess - query state of knote and snapshot event data
1446 * Determine if the timer has fired in the past, snapshot the state
1447 * of the kevent for returning to user-space, and clear pending event
1448 * counters for the next time.
1453 __unused
struct filt_process_s
*data
,
1454 struct kevent_internal_s
*kev
)
1459 if (kn
->kn_data
== 0) {
1465 * Copy out the interesting kevent state,
1466 * but don't leak out the raw time calculations.
1468 *kev
= kn
->kn_kevent
;
1470 /* kev->ext[1] = 0; JMM - shouldn't we hide this too? */
1473 * reset the timer pop count in kn_data
1474 * and (optionally) clear the fflags.
1477 if (kn
->kn_flags
& EV_CLEAR
)
1485 filt_timerlock(void)
1487 lck_mtx_lock(&_filt_timerlock
);
1491 filt_timerunlock(void)
1493 lck_mtx_unlock(&_filt_timerlock
);
1499 lck_spin_lock(&_filt_userlock
);
1503 filt_userunlock(void)
1505 lck_spin_unlock(&_filt_userlock
);
1509 filt_userattach(struct knote
*kn
)
1511 /* EVFILT_USER knotes are not attached to anything in the kernel */
1512 /* Cant discover this knote until after attach - so no lock needed */
1514 if (kn
->kn_fflags
& NOTE_TRIGGER
) {
1519 return (kn
->kn_hookid
);
1523 filt_userdetach(__unused
struct knote
*kn
)
1525 /* EVFILT_USER knotes are not attached to anything in the kernel */
1530 __unused
struct knote
*kn
,
1540 struct kevent_internal_s
*kev
)
1548 ffctrl
= kev
->fflags
& NOTE_FFCTRLMASK
;
1549 fflags
= kev
->fflags
& NOTE_FFLAGSMASK
;
1554 kn
->kn_sfflags
&= fflags
;
1557 kn
->kn_sfflags
|= fflags
;
1560 kn
->kn_sfflags
= fflags
;
1563 kn
->kn_sdata
= kev
->data
;
1565 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0)
1566 kn
->kn_udata
= kev
->udata
;
1568 if (kev
->fflags
& NOTE_TRIGGER
) {
1571 active
= kn
->kn_hookid
;
1581 __unused
struct filt_process_s
*data
,
1582 struct kevent_internal_s
*kev
)
1586 if (kn
->kn_hookid
== 0) {
1591 *kev
= kn
->kn_kevent
;
1592 kev
->fflags
= (volatile UInt32
)kn
->kn_sfflags
;
1593 kev
->data
= kn
->kn_sdata
;
1594 if (kn
->kn_flags
& EV_CLEAR
) {
1605 * JMM - placeholder for not-yet-implemented filters
1608 filt_badattach(__unused
struct knote
*kn
)
1610 kn
->kn_flags
|= EV_ERROR
;
1611 kn
->kn_data
= ENOTSUP
;
1616 kqueue_alloc(struct proc
*p
, unsigned int flags
)
1618 struct filedesc
*fdp
= p
->p_fd
;
1619 struct kqueue
*kq
= NULL
;
1622 uint64_t kq_addr_offset
;
1624 if (flags
& KEVENT_FLAG_WORKQ
) {
1625 struct kqworkq
*kqwq
;
1628 kqwq
= (struct kqworkq
*)zalloc(kqworkq_zone
);
1632 kq
= &kqwq
->kqwq_kqueue
;
1633 bzero(kqwq
, sizeof (struct kqworkq
));
1635 kqwq
->kqwq_state
= KQ_WORKQ
;
1637 for (i
= 0; i
< KQWQ_NBUCKETS
; i
++) {
1638 TAILQ_INIT(&kq
->kq_queue
[i
]);
1640 for (i
= 0; i
< KQWQ_NQOS
; i
++) {
1641 TAILQ_INIT(&kqwq
->kqwq_request
[i
].kqr_suppressed
);
1644 lck_spin_init(&kqwq
->kqwq_reqlock
, kq_lck_grp
, kq_lck_attr
);
1645 policy
= SYNC_POLICY_FIFO
;
1646 hook
= (void *)kqwq
;
1651 kqf
= (struct kqfile
*)zalloc(kqfile_zone
);
1655 kq
= &kqf
->kqf_kqueue
;
1656 bzero(kqf
, sizeof (struct kqfile
));
1657 TAILQ_INIT(&kq
->kq_queue
[0]);
1658 TAILQ_INIT(&kqf
->kqf_suppressed
);
1660 policy
= SYNC_POLICY_FIFO
| SYNC_POLICY_PREPOST
;
1665 waitq_set_init(&kq
->kq_wqs
, policy
, NULL
, hook
);
1666 lck_spin_init(&kq
->kq_lock
, kq_lck_grp
, kq_lck_attr
);
1669 if (fdp
->fd_knlistsize
< 0) {
1671 if (fdp
->fd_knlistsize
< 0)
1672 fdp
->fd_knlistsize
= 0; /* this process has had a kq */
1676 kq_addr_offset
= ((uintptr_t)kq
- (uintptr_t)VM_MIN_KERNEL_AND_KEXT_ADDRESS
);
1677 /* Assert that the address can be pointer compacted for use with knote */
1678 assert(kq_addr_offset
< (uint64_t)(1ull << KNOTE_KQ_BITSIZE
));
1683 * kqueue_dealloc - detach all knotes from a kqueue and free it
1685 * We walk each list looking for knotes referencing this
1686 * this kqueue. If we find one, we try to drop it. But
1687 * if we fail to get a drop reference, that will wait
1688 * until it is dropped. So, we can just restart again
1689 * safe in the assumption that the list will eventually
1690 * not contain any more references to this kqueue (either
1691 * we dropped them all, or someone else did).
1693 * Assumes no new events are being added to the kqueue.
1694 * Nothing locked on entry or exit.
1697 kqueue_dealloc(struct kqueue
*kq
)
1700 struct filedesc
*fdp
;
1711 for (i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
1712 kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
]);
1713 while (kn
!= NULL
) {
1714 if (kq
== knote_get_kq(kn
)) {
1717 /* drop it ourselves or wait */
1718 if (kqlock2knotedrop(kq
, kn
)) {
1722 /* start over at beginning of list */
1723 kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
]);
1726 kn
= SLIST_NEXT(kn
, kn_link
);
1729 if (fdp
->fd_knhashmask
!= 0) {
1730 for (i
= 0; i
< (int)fdp
->fd_knhashmask
+ 1; i
++) {
1731 kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
]);
1732 while (kn
!= NULL
) {
1733 if (kq
== knote_get_kq(kn
)) {
1736 /* drop it ourselves or wait */
1737 if (kqlock2knotedrop(kq
, kn
)) {
1741 /* start over at beginning of list */
1742 kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
]);
1745 kn
= SLIST_NEXT(kn
, kn_link
);
1752 * waitq_set_deinit() remove the KQ's waitq set from
1753 * any select sets to which it may belong.
1755 waitq_set_deinit(&kq
->kq_wqs
);
1756 lck_spin_destroy(&kq
->kq_lock
, kq_lck_grp
);
1758 if (kq
->kq_state
& KQ_WORKQ
) {
1759 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
1761 lck_spin_destroy(&kqwq
->kqwq_reqlock
, kq_lck_grp
);
1762 zfree(kqworkq_zone
, kqwq
);
1764 struct kqfile
*kqf
= (struct kqfile
*)kq
;
1766 zfree(kqfile_zone
, kqf
);
1771 kqueue_body(struct proc
*p
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
)
1774 struct fileproc
*fp
;
1777 error
= falloc_withalloc(p
,
1778 &fp
, &fd
, vfs_context_current(), fp_zalloc
, cra
);
1783 kq
= kqueue_alloc(p
, 0);
1789 fp
->f_flag
= FREAD
| FWRITE
;
1790 fp
->f_ops
= &kqueueops
;
1794 *fdflags(p
, fd
) |= UF_EXCLOSE
;
1795 procfdtbl_releasefd(p
, fd
, NULL
);
1796 fp_drop(p
, fd
, fp
, 1);
1804 kqueue(struct proc
*p
, __unused
struct kqueue_args
*uap
, int32_t *retval
)
1806 return (kqueue_body(p
, fileproc_alloc_init
, NULL
, retval
));
1810 kevent_copyin(user_addr_t
*addrp
, struct kevent_internal_s
*kevp
, struct proc
*p
,
1816 if (flags
& KEVENT_FLAG_LEGACY32
) {
1817 bzero(kevp
, sizeof (*kevp
));
1819 if (IS_64BIT_PROCESS(p
)) {
1820 struct user64_kevent kev64
;
1822 advance
= sizeof (kev64
);
1823 error
= copyin(*addrp
, (caddr_t
)&kev64
, advance
);
1826 kevp
->ident
= kev64
.ident
;
1827 kevp
->filter
= kev64
.filter
;
1828 kevp
->flags
= kev64
.flags
;
1829 kevp
->udata
= kev64
.udata
;
1830 kevp
->fflags
= kev64
.fflags
;
1831 kevp
->data
= kev64
.data
;
1833 struct user32_kevent kev32
;
1835 advance
= sizeof (kev32
);
1836 error
= copyin(*addrp
, (caddr_t
)&kev32
, advance
);
1839 kevp
->ident
= (uintptr_t)kev32
.ident
;
1840 kevp
->filter
= kev32
.filter
;
1841 kevp
->flags
= kev32
.flags
;
1842 kevp
->udata
= CAST_USER_ADDR_T(kev32
.udata
);
1843 kevp
->fflags
= kev32
.fflags
;
1844 kevp
->data
= (intptr_t)kev32
.data
;
1846 } else if (flags
& KEVENT_FLAG_LEGACY64
) {
1847 struct kevent64_s kev64
;
1849 bzero(kevp
, sizeof (*kevp
));
1851 advance
= sizeof (struct kevent64_s
);
1852 error
= copyin(*addrp
, (caddr_t
)&kev64
, advance
);
1855 kevp
->ident
= kev64
.ident
;
1856 kevp
->filter
= kev64
.filter
;
1857 kevp
->flags
= kev64
.flags
;
1858 kevp
->udata
= kev64
.udata
;
1859 kevp
->fflags
= kev64
.fflags
;
1860 kevp
->data
= kev64
.data
;
1861 kevp
->ext
[0] = kev64
.ext
[0];
1862 kevp
->ext
[1] = kev64
.ext
[1];
1865 struct kevent_qos_s kevqos
;
1867 bzero(kevp
, sizeof (*kevp
));
1869 advance
= sizeof (struct kevent_qos_s
);
1870 error
= copyin(*addrp
, (caddr_t
)&kevqos
, advance
);
1873 kevp
->ident
= kevqos
.ident
;
1874 kevp
->filter
= kevqos
.filter
;
1875 kevp
->flags
= kevqos
.flags
;
1876 kevp
->qos
= kevqos
.qos
;
1877 // kevp->xflags = kevqos.xflags;
1878 kevp
->udata
= kevqos
.udata
;
1879 kevp
->fflags
= kevqos
.fflags
;
1880 kevp
->data
= kevqos
.data
;
1881 kevp
->ext
[0] = kevqos
.ext
[0];
1882 kevp
->ext
[1] = kevqos
.ext
[1];
1883 kevp
->ext
[2] = kevqos
.ext
[2];
1884 kevp
->ext
[3] = kevqos
.ext
[3];
1892 kevent_copyout(struct kevent_internal_s
*kevp
, user_addr_t
*addrp
, struct proc
*p
,
1895 user_addr_t addr
= *addrp
;
1900 * fully initialize the differnt output event structure
1901 * types from the internal kevent (and some universal
1902 * defaults for fields not represented in the internal
1905 if (flags
& KEVENT_FLAG_LEGACY32
) {
1906 assert((flags
& KEVENT_FLAG_STACK_EVENTS
) == 0);
1908 if (IS_64BIT_PROCESS(p
)) {
1909 struct user64_kevent kev64
;
1911 advance
= sizeof (kev64
);
1912 bzero(&kev64
, advance
);
1915 * deal with the special case of a user-supplied
1916 * value of (uintptr_t)-1.
1918 kev64
.ident
= (kevp
->ident
== (uintptr_t)-1) ?
1919 (uint64_t)-1LL : (uint64_t)kevp
->ident
;
1921 kev64
.filter
= kevp
->filter
;
1922 kev64
.flags
= kevp
->flags
;
1923 kev64
.fflags
= kevp
->fflags
;
1924 kev64
.data
= (int64_t) kevp
->data
;
1925 kev64
.udata
= kevp
->udata
;
1926 error
= copyout((caddr_t
)&kev64
, addr
, advance
);
1928 struct user32_kevent kev32
;
1930 advance
= sizeof (kev32
);
1931 bzero(&kev32
, advance
);
1932 kev32
.ident
= (uint32_t)kevp
->ident
;
1933 kev32
.filter
= kevp
->filter
;
1934 kev32
.flags
= kevp
->flags
;
1935 kev32
.fflags
= kevp
->fflags
;
1936 kev32
.data
= (int32_t)kevp
->data
;
1937 kev32
.udata
= kevp
->udata
;
1938 error
= copyout((caddr_t
)&kev32
, addr
, advance
);
1940 } else if (flags
& KEVENT_FLAG_LEGACY64
) {
1941 struct kevent64_s kev64
;
1943 advance
= sizeof (struct kevent64_s
);
1944 if (flags
& KEVENT_FLAG_STACK_EVENTS
) {
1947 bzero(&kev64
, advance
);
1948 kev64
.ident
= kevp
->ident
;
1949 kev64
.filter
= kevp
->filter
;
1950 kev64
.flags
= kevp
->flags
;
1951 kev64
.fflags
= kevp
->fflags
;
1952 kev64
.data
= (int64_t) kevp
->data
;
1953 kev64
.udata
= kevp
->udata
;
1954 kev64
.ext
[0] = kevp
->ext
[0];
1955 kev64
.ext
[1] = kevp
->ext
[1];
1956 error
= copyout((caddr_t
)&kev64
, addr
, advance
);
1958 struct kevent_qos_s kevqos
;
1960 advance
= sizeof (struct kevent_qos_s
);
1961 if (flags
& KEVENT_FLAG_STACK_EVENTS
) {
1964 bzero(&kevqos
, advance
);
1965 kevqos
.ident
= kevp
->ident
;
1966 kevqos
.filter
= kevp
->filter
;
1967 kevqos
.flags
= kevp
->flags
;
1968 kevqos
.qos
= kevp
->qos
;
1969 kevqos
.udata
= kevp
->udata
;
1970 kevqos
.fflags
= kevp
->fflags
;
1972 kevqos
.data
= (int64_t) kevp
->data
;
1973 kevqos
.ext
[0] = kevp
->ext
[0];
1974 kevqos
.ext
[1] = kevp
->ext
[1];
1975 kevqos
.ext
[2] = kevp
->ext
[2];
1976 kevqos
.ext
[3] = kevp
->ext
[3];
1977 error
= copyout((caddr_t
)&kevqos
, addr
, advance
);
1980 if (flags
& KEVENT_FLAG_STACK_EVENTS
)
1983 *addrp
= addr
+ advance
;
1989 kevent_get_data_size(struct proc
*p
,
1990 uint64_t data_available
,
1992 user_size_t
*residp
)
1997 if (data_available
!= USER_ADDR_NULL
) {
1998 if (flags
& KEVENT_FLAG_KERNEL
) {
1999 resid
= *(user_size_t
*)(uintptr_t)data_available
;
2000 } else if (IS_64BIT_PROCESS(p
)) {
2001 user64_size_t usize
;
2002 error
= copyin((user_addr_t
)data_available
, &usize
, sizeof(usize
));
2003 resid
= (user_size_t
)usize
;
2005 user32_size_t usize
;
2006 error
= copyin((user_addr_t
)data_available
, &usize
, sizeof(usize
));
2007 resid
= (user_size_t
)usize
;
2019 kevent_put_data_size(struct proc
*p
,
2020 uint64_t data_available
,
2026 if (data_available
) {
2027 if (flags
& KEVENT_FLAG_KERNEL
) {
2028 *(user_size_t
*)(uintptr_t)data_available
= resid
;
2029 } else if (IS_64BIT_PROCESS(p
)) {
2030 user64_size_t usize
= (user64_size_t
)resid
;
2031 error
= copyout(&usize
, (user_addr_t
)data_available
, sizeof(usize
));
2033 user32_size_t usize
= (user32_size_t
)resid
;
2034 error
= copyout(&usize
, (user_addr_t
)data_available
, sizeof(usize
));
2041 * kevent_continue - continue a kevent syscall after blocking
2043 * assume we inherit a use count on the kq fileglob.
2046 __attribute__((noreturn
))
2048 kevent_continue(__unused
struct kqueue
*kq
, void *data
, int error
)
2050 struct _kevent
*cont_args
;
2051 struct fileproc
*fp
;
2052 uint64_t data_available
;
2053 user_size_t data_size
;
2054 user_size_t data_resid
;
2059 struct proc
*p
= current_proc();
2061 cont_args
= (struct _kevent
*)data
;
2062 data_available
= cont_args
->data_available
;
2063 flags
= cont_args
->process_data
.fp_flags
;
2064 data_size
= cont_args
->process_data
.fp_data_size
;
2065 data_resid
= cont_args
->process_data
.fp_data_resid
;
2066 noutputs
= cont_args
->eventout
;
2067 retval
= cont_args
->retval
;
2072 fp_drop(p
, fd
, fp
, 0);
2074 /* don't abandon other output just because of residual copyout failures */
2075 if (error
== 0 && data_available
&& data_resid
!= data_size
) {
2076 (void)kevent_put_data_size(p
, data_available
, flags
, data_resid
);
2079 /* don't restart after signals... */
2080 if (error
== ERESTART
)
2082 else if (error
== EWOULDBLOCK
)
2086 unix_syscall_return(error
);
2090 * kevent - [syscall] register and wait for kernel events
2094 kevent(struct proc
*p
, struct kevent_args
*uap
, int32_t *retval
)
2096 unsigned int flags
= KEVENT_FLAG_LEGACY32
;
2098 return kevent_internal(p
,
2100 uap
->changelist
, uap
->nchanges
,
2101 uap
->eventlist
, uap
->nevents
,
2110 kevent64(struct proc
*p
, struct kevent64_args
*uap
, int32_t *retval
)
2114 /* restrict to user flags and set legacy64 */
2115 flags
= uap
->flags
& KEVENT_FLAG_USER
;
2116 flags
|= KEVENT_FLAG_LEGACY64
;
2118 return kevent_internal(p
,
2120 uap
->changelist
, uap
->nchanges
,
2121 uap
->eventlist
, uap
->nevents
,
2130 kevent_qos(struct proc
*p
, struct kevent_qos_args
*uap
, int32_t *retval
)
2132 /* restrict to user flags */
2133 uap
->flags
&= KEVENT_FLAG_USER
;
2135 return kevent_internal(p
,
2137 uap
->changelist
, uap
->nchanges
,
2138 uap
->eventlist
, uap
->nevents
,
2139 uap
->data_out
, (uint64_t)uap
->data_available
,
2147 kevent_qos_internal(struct proc
*p
, int fd
,
2148 user_addr_t changelist
, int nchanges
,
2149 user_addr_t eventlist
, int nevents
,
2150 user_addr_t data_out
, user_size_t
*data_available
,
2154 return kevent_internal(p
,
2156 changelist
, nchanges
,
2158 data_out
, (uint64_t)data_available
,
2159 (flags
| KEVENT_FLAG_KERNEL
),
2166 kevent_get_timeout(struct proc
*p
,
2167 user_addr_t utimeout
,
2169 struct timeval
*atvp
)
2174 if (flags
& KEVENT_FLAG_IMMEDIATE
) {
2175 getmicrouptime(&atv
);
2176 } else if (utimeout
!= USER_ADDR_NULL
) {
2178 if (flags
& KEVENT_FLAG_KERNEL
) {
2179 struct timespec
*tsp
= (struct timespec
*)utimeout
;
2180 TIMESPEC_TO_TIMEVAL(&rtv
, tsp
);
2181 } else if (IS_64BIT_PROCESS(p
)) {
2182 struct user64_timespec ts
;
2183 error
= copyin(utimeout
, &ts
, sizeof(ts
));
2184 if ((ts
.tv_sec
& 0xFFFFFFFF00000000ull
) != 0)
2187 TIMESPEC_TO_TIMEVAL(&rtv
, &ts
);
2189 struct user32_timespec ts
;
2190 error
= copyin(utimeout
, &ts
, sizeof(ts
));
2191 TIMESPEC_TO_TIMEVAL(&rtv
, &ts
);
2195 if (itimerfix(&rtv
))
2197 getmicrouptime(&atv
);
2198 timevaladd(&atv
, &rtv
);
2200 /* wait forever value */
2209 kevent_set_kq_mode(struct kqueue
*kq
, unsigned int flags
)
2211 /* each kq should only be used for events of one type */
2213 if (kq
->kq_state
& (KQ_KEV32
| KQ_KEV64
| KQ_KEV_QOS
)) {
2214 if (flags
& KEVENT_FLAG_LEGACY32
) {
2215 if ((kq
->kq_state
& KQ_KEV32
) == 0) {
2219 } else if (kq
->kq_state
& KQ_KEV32
) {
2223 } else if (flags
& KEVENT_FLAG_LEGACY32
) {
2224 kq
->kq_state
|= KQ_KEV32
;
2226 /* JMM - set KQ_KEVQOS when we are ready for exclusive */
2227 kq
->kq_state
|= KQ_KEV64
;
2234 kevent_get_kq(struct proc
*p
, int fd
, unsigned int flags
, struct fileproc
**fpp
, struct kqueue
**kqp
)
2236 struct fileproc
*fp
= NULL
;
2240 if (flags
& KEVENT_FLAG_WORKQ
) {
2242 * use the private kq associated with the proc workq.
2243 * Just being a thread within the process (and not
2244 * being the exit/exec thread) is enough to hold a
2245 * reference on this special kq.
2249 struct kqueue
*alloc_kq
= kqueue_alloc(p
, KEVENT_FLAG_WORKQ
);
2250 if (alloc_kq
== NULL
)
2254 if (p
->p_wqkqueue
== NULL
) {
2255 kq
= p
->p_wqkqueue
= alloc_kq
;
2260 kqueue_dealloc(alloc_kq
);
2264 /* get a usecount for the kq itself */
2265 if ((error
= fp_getfkq(p
, fd
, &fp
, &kq
)) != 0)
2268 if ((error
= kevent_set_kq_mode(kq
, flags
)) != 0) {
2269 /* drop the usecount */
2271 fp_drop(p
, fd
, fp
, 0);
2282 kevent_internal(struct proc
*p
,
2284 user_addr_t changelist
, int nchanges
,
2285 user_addr_t ueventlist
, int nevents
,
2286 user_addr_t data_out
, uint64_t data_available
,
2288 user_addr_t utimeout
,
2289 kqueue_continue_t continuation
,
2292 struct _kevent
*cont_args
;
2295 struct fileproc
*fp
= NULL
;
2296 struct kevent_internal_s kev
;
2297 int error
, noutputs
;
2299 user_size_t data_size
;
2300 user_size_t data_resid
;
2302 /* Don't allow user-space threads to process output events from the workq kq */
2303 if ((flags
& (KEVENT_FLAG_WORKQ
| KEVENT_FLAG_KERNEL
)) == KEVENT_FLAG_WORKQ
&&
2304 !(flags
& KEVENT_FLAG_ERROR_EVENTS
) && nevents
> 0)
2307 /* prepare to deal with stack-wise allocation of out events */
2308 if (flags
& KEVENT_FLAG_STACK_EVENTS
) {
2309 int scale
= ((flags
& KEVENT_FLAG_LEGACY32
) ?
2310 (IS_64BIT_PROCESS(p
) ? sizeof(struct user64_kevent
) :
2311 sizeof(struct user32_kevent
)) :
2312 ((flags
& KEVENT_FLAG_LEGACY64
) ? sizeof(struct kevent64_s
) :
2313 sizeof(struct kevent_qos_s
)));
2314 ueventlist
+= nevents
* scale
;
2317 /* convert timeout to absolute - if we have one (and not immediate) */
2318 error
= kevent_get_timeout(p
, utimeout
, flags
, &atv
);
2322 /* copyin initial value of data residual from data_available */
2323 error
= kevent_get_data_size(p
, data_available
, flags
, &data_size
);
2327 /* get the kq we are going to be working on */
2328 error
= kevent_get_kq(p
, fd
, flags
, &fp
, &kq
);
2332 /* register all the change requests the user provided... */
2334 while (nchanges
> 0 && error
== 0) {
2335 error
= kevent_copyin(&changelist
, &kev
, p
, flags
);
2339 /* Make sure user doesn't pass in any system flags */
2340 kev
.flags
&= ~EV_SYSFLAGS
;
2342 kevent_register(kq
, &kev
, p
);
2345 ((kev
.flags
& EV_ERROR
) || (kev
.flags
& EV_RECEIPT
))) {
2346 if (kev
.flags
& EV_RECEIPT
) {
2347 kev
.flags
|= EV_ERROR
;
2350 error
= kevent_copyout(&kev
, &ueventlist
, p
, flags
);
2355 } else if (kev
.flags
& EV_ERROR
) {
2361 /* short-circuit the scan if we only want error events */
2362 if (flags
& KEVENT_FLAG_ERROR_EVENTS
)
2365 /* process pending events */
2366 if (nevents
> 0 && noutputs
== 0 && error
== 0) {
2368 /* store the continuation/completion data in the uthread */
2369 ut
= (uthread_t
)get_bsdthread_info(current_thread());
2370 cont_args
= &ut
->uu_kevent
.ss_kevent
;
2373 cont_args
->retval
= retval
;
2374 cont_args
->eventlist
= ueventlist
;
2375 cont_args
->eventcount
= nevents
;
2376 cont_args
->eventout
= noutputs
;
2377 cont_args
->data_available
= data_available
;
2378 cont_args
->process_data
.fp_fd
= fd
;
2379 cont_args
->process_data
.fp_flags
= flags
;
2380 cont_args
->process_data
.fp_data_out
= data_out
;
2381 cont_args
->process_data
.fp_data_size
= data_size
;
2382 cont_args
->process_data
.fp_data_resid
= data_size
;
2384 error
= kqueue_scan(kq
, kevent_callback
,
2385 continuation
, cont_args
,
2386 &cont_args
->process_data
,
2389 /* process remaining outputs */
2390 noutputs
= cont_args
->eventout
;
2391 data_resid
= cont_args
->process_data
.fp_data_resid
;
2393 /* copyout residual data size value (if it needs to be copied out) */
2394 /* don't abandon other output just because of residual copyout failures */
2395 if (error
== 0 && data_available
&& data_resid
!= data_size
) {
2396 (void)kevent_put_data_size(p
, data_available
, flags
, data_resid
);
2400 /* don't restart after signals... */
2401 if (error
== ERESTART
)
2403 else if (error
== EWOULDBLOCK
)
2408 fp_drop(p
, fd
, fp
, 0);
2414 * kevent_callback - callback for each individual event
2416 * called with nothing locked
2417 * caller holds a reference on the kqueue
2420 kevent_callback(__unused
struct kqueue
*kq
, struct kevent_internal_s
*kevp
,
2423 struct _kevent
*cont_args
;
2426 cont_args
= (struct _kevent
*)data
;
2427 assert(cont_args
->eventout
< cont_args
->eventcount
);
2430 * Copy out the appropriate amount of event data for this user.
2432 error
= kevent_copyout(kevp
, &cont_args
->eventlist
, current_proc(),
2433 cont_args
->process_data
.fp_flags
);
2436 * If there isn't space for additional events, return
2437 * a harmless error to stop the processing here
2439 if (error
== 0 && ++cont_args
->eventout
== cont_args
->eventcount
)
2440 error
= EWOULDBLOCK
;
2445 * kevent_description - format a description of a kevent for diagnostic output
2447 * called with a 256-byte string buffer
2451 kevent_description(struct kevent_internal_s
*kevp
, char *s
, size_t n
)
2455 "{.ident=%#llx, .filter=%d, .flags=%#x, .udata=%#llx, .fflags=%#x, .data=%#llx, .ext[0]=%#llx, .ext[1]=%#llx}",
2469 * kevent_register - add a new event to a kqueue
2471 * Creates a mapping between the event source and
2472 * the kqueue via a knote data structure.
2474 * Because many/most the event sources are file
2475 * descriptor related, the knote is linked off
2476 * the filedescriptor table for quick access.
2478 * called with nothing locked
2479 * caller holds a reference on the kqueue
2483 kevent_register(struct kqueue
*kq
, struct kevent_internal_s
*kev
,
2484 __unused
struct proc
*ctxp
)
2486 struct proc
*p
= kq
->kq_p
;
2487 struct filterops
*fops
;
2488 struct knote
*kn
= NULL
;
2492 if (kev
->filter
< 0) {
2493 if (kev
->filter
+ EVFILT_SYSCOUNT
< 0) {
2497 fops
= sysfilt_ops
[~kev
->filter
]; /* to 0-base index */
2503 /* restrict EV_VANISHED to adding udata-specific dispatch kevents */
2504 if ((kev
->flags
& EV_VANISHED
) &&
2505 (kev
->flags
& (EV_ADD
| EV_DISPATCH2
)) != (EV_ADD
| EV_DISPATCH2
)) {
2510 /* Simplify the flags - delete and disable overrule */
2511 if (kev
->flags
& EV_DELETE
)
2512 kev
->flags
&= ~EV_ADD
;
2513 if (kev
->flags
& EV_DISABLE
)
2514 kev
->flags
&= ~EV_ENABLE
;
2520 /* find the matching knote from the fd tables/hashes */
2521 kn
= knote_fdfind(kq
, kev
, p
);
2524 if (kev
->flags
& EV_ADD
) {
2525 struct fileproc
*fp
= NULL
;
2527 /* grab a file reference for the new knote */
2529 if ((error
= fp_lookup(p
, kev
->ident
, &fp
, 1)) != 0) {
2540 fp_drop(p
, kev
->ident
, fp
, 0);
2545 knote_set_kq(kn
,kq
);
2546 kn
->kn_filtid
= ~kev
->filter
;
2547 kn
->kn_inuse
= 1; /* for f_attach() */
2548 kn
->kn_status
= KN_ATTACHING
| KN_ATTACHED
;
2550 /* was vanish support requested */
2551 if (kev
->flags
& EV_VANISHED
) {
2552 kev
->flags
&= ~EV_VANISHED
;
2553 kn
->kn_status
|= KN_REQVANISH
;
2556 /* snapshot matching/dispatching protcol flags into knote */
2557 if (kev
->flags
& EV_DISPATCH
)
2558 kn
->kn_status
|= KN_DISPATCH
;
2559 if (kev
->flags
& EV_UDATA_SPECIFIC
)
2560 kn
->kn_status
|= KN_UDATA_SPECIFIC
;
2563 * copy the kevent state into knote
2564 * protocol is that fflags and data
2565 * are saved off, and cleared before
2566 * calling the attach routine.
2568 kn
->kn_kevent
= *kev
;
2569 kn
->kn_sfflags
= kev
->fflags
;
2570 kn
->kn_sdata
= kev
->data
;
2574 /* invoke pthread kext to convert kevent qos to thread qos */
2575 if (kq
->kq_state
& KQ_WORKQ
) {
2576 kn
->kn_qos
= canonicalize_kevent_qos(kn
->kn_qos
);
2577 knote_set_qos_index(kn
, qos_index_from_qos(kn
->kn_qos
, FALSE
));
2578 knote_set_qos_override_index(kn
, QOS_INDEX_KQFILE
);
2579 assert(knote_get_qos_index(kn
) < KQWQ_NQOS
);
2581 knote_set_qos_index(kn
, QOS_INDEX_KQFILE
);
2582 knote_set_qos_override_index(kn
, QOS_INDEX_KQFILE
);
2585 /* before anyone can find it */
2586 if (kev
->flags
& EV_DISABLE
)
2589 /* Add the knote for lookup thru the fd table */
2590 error
= knote_fdadd(kn
, p
);
2596 fp_drop(p
, kev
->ident
, fp
, 0);
2600 /* fp reference count now applies to knote */
2602 /* call filter attach routine */
2603 result
= fops
->f_attach(kn
);
2606 * Trade knote use count for kq lock.
2607 * Cannot be dropped because we held
2608 * KN_ATTACHING throughout.
2610 knoteuse2kqlock(kq
, kn
, 1);
2612 if (kn
->kn_flags
& EV_ERROR
) {
2614 * Failed to attach correctly, so drop.
2615 * All other possible users/droppers
2616 * have deferred to us. Save the error
2617 * to return to our caller.
2619 kn
->kn_status
&= ~KN_ATTACHED
;
2620 kn
->kn_status
|= KN_DROPPING
;
2621 error
= kn
->kn_data
;
2627 /* end "attaching" phase - now just attached */
2628 kn
->kn_status
&= ~KN_ATTACHING
;
2630 if (kn
->kn_status
& KN_DROPPING
) {
2632 * Attach succeeded, but someone else
2633 * deferred their drop - now we have
2634 * to do it for them.
2642 * If the attach routine indicated that an
2643 * event is already fired, activate the knote.
2655 /* existing knote - get kqueue lock */
2659 if ((kn
->kn_status
& (KN_DROPPING
| KN_ATTACHING
)) != 0) {
2661 * The knote is not in a stable state, wait for that
2662 * transition to complete and then redrive the lookup.
2664 kn
->kn_status
|= KN_USEWAIT
;
2665 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
2666 CAST_EVENT64_T(&kn
->kn_status
),
2667 THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
);
2669 thread_block(THREAD_CONTINUE_NULL
);
2673 if (kev
->flags
& EV_DELETE
) {
2676 * If attempting to delete a disabled dispatch2 knote,
2677 * we must wait for the knote to be re-enabled (unless
2678 * it is being re-enabled atomically here).
2680 if ((kev
->flags
& EV_ENABLE
) == 0 &&
2681 (kn
->kn_status
& (KN_DISPATCH2
| KN_DISABLED
)) ==
2682 (KN_DISPATCH2
| KN_DISABLED
)) {
2683 kn
->kn_status
|= KN_DEFERDELETE
;
2685 error
= EINPROGRESS
;
2686 } else if (kqlock2knotedrop(kq
, kn
)) {
2690 * The kqueue is unlocked, it's not being
2691 * dropped, and kqlock2knotedrop returned 0:
2692 * this means that someone stole the drop of
2693 * the knote from us.
2695 error
= EINPROGRESS
;
2701 * If we are re-enabling a deferred-delete knote,
2702 * just enable it now and avoid calling the
2703 * filter touch routine (it has delivered its
2704 * last event already).
2706 if ((kev
->flags
& EV_ENABLE
) &&
2707 (kn
->kn_status
& KN_DEFERDELETE
)) {
2708 assert(kn
->kn_status
& KN_DISABLED
);
2716 * If we are disabling, do it before unlocking and
2717 * calling the touch routine (so no processing can
2718 * see the new kevent state before the disable is
2721 if (kev
->flags
& EV_DISABLE
)
2725 * Convert the kqlock to a use reference on the
2726 * knote so we can call the filter touch routine.
2728 if (kqlock2knoteuse(kq
, kn
)) {
2731 * Call touch routine to notify filter of changes
2732 * in filter values (and to re-determine if any
2733 * events are fired).
2735 result
= knote_fops(kn
)->f_touch(kn
, kev
);
2737 /* Get the kq lock back (don't defer droppers). */
2738 if (!knoteuse2kqlock(kq
, kn
, 0)) {
2743 /* Activate it if the touch routine said to */
2748 /* Enable the knote if called for */
2749 if (kev
->flags
& EV_ENABLE
)
2754 /* still have kqlock held and knote is valid */
2758 /* output local errors through the kevent */
2760 kev
->flags
|= EV_ERROR
;
2767 * knote_process - process a triggered event
2769 * Validate that it is really still a triggered event
2770 * by calling the filter routines (if necessary). Hold
2771 * a use reference on the knote to avoid it being detached.
2773 * If it is still considered triggered, we will have taken
2774 * a copy of the state under the filter lock. We use that
2775 * snapshot to dispatch the knote for future processing (or
2776 * not, if this was a lost event).
2778 * Our caller assures us that nobody else can be processing
2779 * events from this knote during the whole operation. But
2780 * others can be touching or posting events to the knote
2781 * interspersed with our processing it.
2783 * caller holds a reference on the kqueue.
2784 * kqueue locked on entry and exit - but may be dropped
2787 knote_process(struct knote
*kn
,
2788 kevent_callback_t callback
,
2789 void *callback_data
,
2790 struct filt_process_s
*process_data
,
2793 struct kevent_internal_s kev
;
2794 struct kqueue
*kq
= knote_get_kq(kn
);
2798 bzero(&kev
, sizeof(kev
));
2801 * Must be active or stayactive
2802 * Must be queued and not disabled/suppressed
2804 assert(kn
->kn_status
& KN_QUEUED
);
2805 assert(kn
->kn_status
& (KN_ACTIVE
|KN_STAYACTIVE
));
2806 assert(!(kn
->kn_status
& (KN_DISABLED
|KN_SUPPRESSED
|KN_DROPPING
)));
2809 * For deferred-drop or vanished events, we just create a fake
2810 * event to acknowledge end-of-life. Otherwise, we call the
2811 * filter's process routine to snapshot the kevent state under
2812 * the filter's locking protocol.
2814 if (kn
->kn_status
& (KN_DEFERDELETE
| KN_VANISHED
)) {
2815 /* create fake event */
2816 kev
.filter
= kn
->kn_filter
;
2817 kev
.ident
= kn
->kn_id
;
2818 kev
.qos
= kn
->kn_qos
;
2819 kev
.flags
= (kn
->kn_status
& KN_DEFERDELETE
) ?
2820 EV_DELETE
: EV_VANISHED
;
2821 kev
.flags
|= (EV_DISPATCH2
| EV_ONESHOT
);
2822 kev
.udata
= kn
->kn_udata
;
2828 /* deactivate - so new activations indicate a wakeup */
2829 knote_deactivate(kn
);
2831 /* suppress knotes to avoid returning the same event multiple times in a single call. */
2834 /* convert lock to a knote use reference */
2835 if (!kqlock2knoteuse(kq
, kn
))
2836 panic("dropping knote found on queue\n");
2838 /* call out to the filter to process with just a ref */
2839 result
= knote_fops(kn
)->f_process(kn
, process_data
, &kev
);
2842 * convert our reference back to a lock. accept drop
2843 * responsibility from others if we've committed to
2844 * delivering event data.
2846 if (!knoteuse2kqlock(kq
, kn
, result
)) {
2854 * Determine how to dispatch the knote for future event handling.
2855 * not-fired: just return (do not callout, leave deactivated).
2856 * One-shot: If dispatch2, enter deferred-delete mode (unless this is
2857 * is the deferred delete event delivery itself). Otherwise,
2859 * stolendrop:We took responsibility for someone else's drop attempt.
2860 * treat this just like one-shot and prepare to turn it back
2861 * into a deferred delete if required.
2862 * Dispatch: don't clear state, just mark it disabled.
2863 * Cleared: just leave it deactivated.
2864 * Others: re-activate as there may be more events to handle.
2865 * This will not wake up more handlers right now, but
2866 * at the completion of handling events it may trigger
2867 * more handler threads (TODO: optimize based on more than
2868 * just this one event being detected by the filter).
2872 return (EJUSTRETURN
);
2874 if ((kev
.flags
& EV_ONESHOT
) || (kn
->kn_status
& KN_STOLENDROP
)) {
2875 if ((kn
->kn_status
& (KN_DISPATCH2
| KN_DEFERDELETE
)) == KN_DISPATCH2
) {
2876 /* defer dropping non-delete oneshot dispatch2 events */
2877 kn
->kn_status
|= KN_DEFERDELETE
;
2880 /* if we took over another's drop clear those flags here */
2881 if (kn
->kn_status
& KN_STOLENDROP
) {
2882 assert(kn
->kn_status
& KN_DROPPING
);
2884 * the knote will be dropped when the
2885 * deferred deletion occurs
2887 kn
->kn_status
&= ~(KN_DROPPING
|KN_STOLENDROP
);
2889 } else if (kn
->kn_status
& KN_STOLENDROP
) {
2890 /* We now own the drop of the knote. */
2891 assert(kn
->kn_status
& KN_DROPPING
);
2892 knote_unsuppress(kn
);
2896 } else if (kqlock2knotedrop(kq
, kn
)) {
2897 /* just EV_ONESHOT, _not_ DISPATCH2 */
2901 } else if (kn
->kn_status
& KN_DISPATCH
) {
2902 /* disable all dispatch knotes */
2904 } else if ((kev
.flags
& EV_CLEAR
) == 0) {
2905 /* re-activate in case there are more events */
2911 * callback to handle each event as we find it.
2912 * If we have to detach and drop the knote, do
2913 * it while we have the kq unlocked.
2917 error
= (callback
)(kq
, &kev
, callback_data
);
2925 * Return 0 to indicate that processing should proceed,
2926 * -1 if there is nothing to process.
2928 * Called with kqueue locked and returns the same way,
2929 * but may drop lock temporarily.
2932 kqworkq_begin_processing(struct kqworkq
*kqwq
, kq_index_t qos_index
, int flags
)
2934 struct kqrequest
*kqr
;
2935 thread_t self
= current_thread();
2936 __assert_only
struct uthread
*ut
= get_bsdthread_info(self
);
2939 assert(kqwq
->kqwq_state
& KQ_WORKQ
);
2940 assert(qos_index
< KQWQ_NQOS
);
2942 kqwq_req_lock(kqwq
);
2943 kqr
= kqworkq_get_request(kqwq
, qos_index
);
2945 thread
= kqr
->kqr_thread
;
2947 /* manager skips buckets that haven't ask for its help */
2948 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
) {
2950 /* If nothing for manager to do, just return */
2951 if ((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0) {
2952 assert(kqr
->kqr_thread
!= self
);
2953 kqwq_req_unlock(kqwq
);
2957 /* bind manager thread from this time on */
2958 kqworkq_bind_thread(kqwq
, qos_index
, self
, flags
);
2961 /* must have been bound by now */
2962 assert(thread
== self
);
2963 assert(ut
->uu_kqueue_bound
== qos_index
);
2964 assert((ut
->uu_kqueue_flags
& flags
) == ut
->uu_kqueue_flags
);
2967 /* nobody else should still be processing */
2968 assert(kqr
->kqr_state
& KQWQ_THREQUESTED
);
2969 assert((kqr
->kqr_state
& KQWQ_PROCESSING
) == 0);
2971 /* anything left to process? */
2972 if (kqueue_queue_empty(&kqwq
->kqwq_kqueue
, qos_index
)) {
2973 kqwq_req_unlock(kqwq
);
2977 /* convert to processing mode */
2978 /* reset workq triggers and thread requests - maybe processing */
2979 kqr
->kqr_state
&= ~(KQWQ_HOOKCALLED
| KQWQ_WAKEUP
);
2980 kqr
->kqr_state
|= KQWQ_PROCESSING
;
2981 kqwq_req_unlock(kqwq
);
2986 * Return 0 to indicate that processing should proceed,
2987 * -1 if there is nothing to process.
2989 * Called with kqueue locked and returns the same way,
2990 * but may drop lock temporarily.
2994 kqueue_begin_processing(struct kqueue
*kq
, kq_index_t qos_index
, unsigned int flags
)
2996 struct kqtailq
*suppressq
;
2998 if (kq
->kq_state
& KQ_WORKQ
)
2999 return kqworkq_begin_processing((struct kqworkq
*)kq
, qos_index
, flags
);
3001 assert(qos_index
== QOS_INDEX_KQFILE
);
3003 /* wait to become the exclusive processing thread */
3005 if (kq
->kq_state
& KQ_DRAIN
)
3008 if ((kq
->kq_state
& KQ_PROCESSING
) == 0)
3011 /* if someone else is processing the queue, wait */
3012 kq
->kq_state
|= KQ_PROCWAIT
;
3013 suppressq
= kqueue_get_suppressed_queue(kq
, qos_index
);
3014 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
3015 CAST_EVENT64_T(suppressq
),
3016 THREAD_UNINT
, TIMEOUT_WAIT_FOREVER
);
3019 thread_block(THREAD_CONTINUE_NULL
);
3023 /* Nobody else processing */
3025 /* clear pre-posts and KQ_WAKEUP now, in case we bail early */
3026 waitq_set_clear_preposts(&kq
->kq_wqs
);
3027 kq
->kq_state
&= ~KQ_WAKEUP
;
3029 /* anything left to process? */
3030 if (kqueue_queue_empty(kq
, qos_index
))
3033 /* convert to processing mode */
3034 kq
->kq_state
|= KQ_PROCESSING
;
3040 * kqworkq_end_processing - Complete the processing of a workq kqueue
3042 * We may have to request new threads.
3043 * This can happen there are no waiting processing threads and:
3044 * - there were active events we never got to (count > 0)
3045 * - we pended waitq hook callouts during processing
3046 * - we pended wakeups while processing (or unsuppressing)
3048 * Called with kqueue lock held.
3051 kqworkq_end_processing(struct kqworkq
*kqwq
, kq_index_t qos_index
, int flags
)
3053 #pragma unused(flags)
3055 struct kqueue
*kq
= &kqwq
->kqwq_kqueue
;
3056 struct kqtailq
*suppressq
= kqueue_get_suppressed_queue(kq
, qos_index
);
3058 thread_t self
= current_thread();
3059 __assert_only
struct uthread
*ut
= get_bsdthread_info(self
);
3061 struct kqrequest
*kqr
;
3066 assert(kqwq
->kqwq_state
& KQ_WORKQ
);
3067 assert(qos_index
< KQWQ_NQOS
);
3069 /* leave early if we are not even processing */
3070 kqwq_req_lock(kqwq
);
3071 kqr
= kqworkq_get_request(kqwq
, qos_index
);
3072 thread
= kqr
->kqr_thread
;
3074 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
) {
3075 assert(ut
->uu_kqueue_bound
== KQWQ_QOS_MANAGER
);
3076 assert(ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
);
3078 /* if this bucket didn't need manager help, bail */
3079 if ((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0) {
3080 assert(thread
!= self
);
3081 kqwq_req_unlock(kqwq
);
3085 assert(kqr
->kqr_state
& KQWQ_THREQUESTED
);
3087 /* unbound bucket - see if still needs servicing */
3088 if (thread
== THREAD_NULL
) {
3089 assert((kqr
->kqr_state
& KQWQ_PROCESSING
) == 0);
3090 assert(TAILQ_EMPTY(suppressq
));
3092 assert(thread
== self
);
3096 assert(thread
== self
);
3097 assert(ut
->uu_kqueue_bound
== qos_index
);
3098 assert((ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
) == 0);
3101 kqwq_req_unlock(kqwq
);
3103 /* Any events queued before we put suppressed ones back? */
3104 queued_events
= !kqueue_queue_empty(kq
, qos_index
);
3107 * Return suppressed knotes to their original state.
3108 * For workq kqueues, suppressed ones that are still
3109 * truly active (not just forced into the queue) will
3110 * set flags we check below to see if anything got
3113 while ((kn
= TAILQ_FIRST(suppressq
)) != NULL
) {
3114 assert(kn
->kn_status
& KN_SUPPRESSED
);
3115 knote_unsuppress(kn
);
3118 kqwq_req_lock(kqwq
);
3120 /* Determine if wakeup-type events were pended during servicing */
3121 pended
= (kqr
->kqr_state
& (KQWQ_HOOKCALLED
| KQWQ_WAKEUP
));
3123 /* unbind thread thread */
3124 kqworkq_unbind_thread(kqwq
, qos_index
, self
, flags
);
3126 /* Indicate that we are done processing */
3127 kqr
->kqr_state
&= ~(KQWQ_PROCESSING
| \
3128 KQWQ_THREQUESTED
| KQWQ_THMANAGER
);
3131 * request a new thread if events have happened
3132 * (not just putting stay-active events back).
3134 if ((queued_events
|| pended
) &&
3135 !kqueue_queue_empty(kq
, qos_index
)) {
3136 kqworkq_request_thread(kqwq
, qos_index
);
3139 kqwq_req_unlock(kqwq
);
3143 * Called with kqueue lock held.
3146 kqueue_end_processing(struct kqueue
*kq
, kq_index_t qos_index
, unsigned int flags
)
3149 struct kqtailq
*suppressq
;
3152 if (kq
->kq_state
& KQ_WORKQ
) {
3153 kqworkq_end_processing((struct kqworkq
*)kq
, qos_index
, flags
);
3157 assert(qos_index
== QOS_INDEX_KQFILE
);
3160 * Return suppressed knotes to their original state.
3161 * For workq kqueues, suppressed ones that are still
3162 * truly active (not just forced into the queue) will
3163 * set flags we check below to see if anything got
3166 suppressq
= kqueue_get_suppressed_queue(kq
, qos_index
);
3167 while ((kn
= TAILQ_FIRST(suppressq
)) != NULL
) {
3168 assert(kn
->kn_status
& KN_SUPPRESSED
);
3169 knote_unsuppress(kn
);
3172 procwait
= (kq
->kq_state
& KQ_PROCWAIT
);
3173 kq
->kq_state
&= ~(KQ_PROCESSING
| KQ_PROCWAIT
);
3176 /* first wake up any thread already waiting to process */
3177 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
3178 CAST_EVENT64_T(suppressq
),
3180 WAITQ_ALL_PRIORITIES
);
3185 * kevent_qos_internal_bind - bind thread to processing kqueue
3187 * Indicates that the provided thread will be responsible for
3188 * servicing the particular QoS class index specified in the
3189 * parameters. Once the binding is done, any overrides that may
3190 * be associated with the cooresponding events can be applied.
3192 * This should be called as soon as the thread identity is known,
3193 * preferably while still at high priority during creation.
3195 * - caller holds a reference on the kqueue.
3196 * - the thread MUST call kevent_qos_internal after being bound
3197 * or the bucket of events may never be delivered.
3198 * - Nothing locked (may take mutex or block).
3202 kevent_qos_internal_bind(
3208 struct fileproc
*fp
= NULL
;
3209 struct kqueue
*kq
= NULL
;
3210 struct kqworkq
*kqwq
;
3211 struct kqrequest
*kqr
;
3213 kq_index_t qos_index
;
3216 assert(thread
!= THREAD_NULL
);
3217 assert(flags
& KEVENT_FLAG_WORKQ
);
3219 if (thread
== THREAD_NULL
||
3220 (flags
& KEVENT_FLAG_WORKQ
) == 0) {
3224 ut
= get_bsdthread_info(thread
);
3226 /* find the kqueue */
3227 res
= kevent_get_kq(p
, -1, flags
, &fp
, &kq
);
3232 /* get the qos index we're going to service */
3233 qos_index
= qos_index_for_servicer(qos_class
, thread
, flags
);
3235 /* No need to bind the manager thread to any bucket */
3236 if (qos_index
== KQWQ_QOS_MANAGER
) {
3237 assert(ut
->uu_kqueue_bound
== 0);
3238 ut
->uu_kqueue_bound
= qos_index
;
3239 ut
->uu_kqueue_flags
= flags
;
3244 assert(kq
->kq_state
& KQ_WORKQ
);
3246 kqwq
= (struct kqworkq
*)kq
;
3247 kqr
= kqworkq_get_request(kqwq
, qos_index
);
3249 kqwq_req_lock(kqwq
);
3252 * A (non-emergency) request should have been made
3253 * and nobody should already be servicing this bucket.
3255 assert(kqr
->kqr_state
& KQWQ_THREQUESTED
);
3256 assert((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0);
3257 assert((kqr
->kqr_state
& KQWQ_PROCESSING
) == 0);
3259 /* Is this is an extraneous bind? */
3260 if (thread
== kqr
->kqr_thread
) {
3261 assert(ut
->uu_kqueue_bound
== qos_index
);
3265 /* nobody else bound and we're not bound elsewhere */
3266 assert(ut
->uu_kqueue_bound
== 0);
3267 assert(ut
->uu_kqueue_flags
== 0);
3268 assert(kqr
->kqr_thread
== THREAD_NULL
);
3270 /* Don't bind if there is a conflict */
3271 if (kqr
->kqr_thread
!= THREAD_NULL
||
3272 (kqr
->kqr_state
& KQWQ_THMANAGER
)) {
3277 /* finally bind the thread */
3278 kqr
->kqr_thread
= thread
;
3279 ut
->uu_kqueue_bound
= qos_index
;
3280 ut
->uu_kqueue_flags
= flags
;
3282 /* add any pending overrides to the thread */
3283 if (kqr
->kqr_override_delta
) {
3284 thread_add_ipc_override(thread
, qos_index
+ kqr
->kqr_override_delta
);
3288 kqwq_req_unlock(kqwq
);
3295 * kevent_qos_internal_unbind - unbind thread from processing kqueue
3297 * End processing the per-QoS bucket of events and allow other threads
3298 * to be requested for future servicing.
3300 * caller holds a reference on the kqueue.
3301 * thread is the current thread.
3305 kevent_qos_internal_unbind(
3313 struct fileproc
*fp
= NULL
;
3314 kq_index_t qos_index
;
3315 kq_index_t end_index
;
3318 assert(flags
& KEVENT_FLAG_WORKQ
);
3319 assert(thread
== current_thread());
3321 if (thread
== THREAD_NULL
||
3322 (flags
& KEVENT_FLAG_WORKQ
) == 0)
3326 res
= kevent_get_kq(p
, -1, flags
, &fp
, &kq
);
3331 assert(kq
->kq_state
& KQ_WORKQ
);
3333 /* get the index we have been servicing */
3334 qos_index
= qos_index_for_servicer(qos_class
, thread
, flags
);
3336 ut
= get_bsdthread_info(thread
);
3338 /* early out if we were already unbound - or never bound */
3339 if (ut
->uu_kqueue_bound
!= qos_index
) {
3340 __assert_only
struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
3341 __assert_only
struct kqrequest
*kqr
= kqworkq_get_request(kqwq
, qos_index
);
3343 assert(ut
->uu_kqueue_bound
== 0);
3344 assert(ut
->uu_kqueue_flags
== 0);
3345 assert(kqr
->kqr_thread
!= thread
);
3349 /* unbind from all the buckets we might own */
3350 end_index
= (qos_index
== KQWQ_QOS_MANAGER
) ?
3354 kqueue_end_processing(kq
, qos_index
, flags
);
3355 } while (qos_index
-- > end_index
);
3358 /* indicate that we are done processing in the uthread */
3359 ut
->uu_kqueue_bound
= 0;
3360 ut
->uu_kqueue_flags
= 0;
3366 * kqueue_process - process the triggered events in a kqueue
3368 * Walk the queued knotes and validate that they are
3369 * really still triggered events by calling the filter
3370 * routines (if necessary). Hold a use reference on
3371 * the knote to avoid it being detached. For each event
3372 * that is still considered triggered, invoke the
3373 * callback routine provided.
3375 * caller holds a reference on the kqueue.
3376 * kqueue locked on entry and exit - but may be dropped
3377 * kqueue list locked (held for duration of call)
3381 kqueue_process(struct kqueue
*kq
,
3382 kevent_callback_t callback
,
3383 void *callback_data
,
3384 struct filt_process_s
*process_data
,
3385 kq_index_t servicer_qos_index
,
3389 unsigned int flags
= process_data
? process_data
->fp_flags
: 0;
3390 kq_index_t start_index
, end_index
, i
;
3396 * Based on the native QoS of the servicer,
3397 * determine the range of QoSes that need checking
3399 start_index
= servicer_qos_index
;
3400 end_index
= (start_index
== KQWQ_QOS_MANAGER
) ? 0 : start_index
;
3405 if (kqueue_begin_processing(kq
, i
, flags
) == -1) {
3407 /* Nothing to process */
3412 * loop through the enqueued knotes, processing each one and
3413 * revalidating those that need it. As they are processed,
3414 * they get moved to the inprocess queue (so the loop can end).
3418 struct kqtailq
*base_queue
= kqueue_get_base_queue(kq
, i
);
3419 struct kqtailq
*queue
= kqueue_get_high_queue(kq
, i
);
3421 while (error
== 0 &&
3422 (kn
= TAILQ_FIRST(queue
)) != NULL
) {
3423 /* Process the knote */
3424 error
= knote_process(kn
, callback
, callback_data
, process_data
, p
);
3425 if (error
== EJUSTRETURN
)
3430 /* break out if no more space for additional events */
3431 if (error
== EWOULDBLOCK
) {
3432 if ((kq
->kq_state
& KQ_WORKQ
) == 0)
3433 kqueue_end_processing(kq
, i
, flags
);
3438 } while (error
== 0 && queue
-- > base_queue
);
3440 /* let somebody else process events if we're not in workq mode */
3441 if ((kq
->kq_state
& KQ_WORKQ
) == 0)
3442 kqueue_end_processing(kq
, i
, flags
);
3444 } while (i
-- > end_index
);
3452 kqueue_scan_continue(void *data
, wait_result_t wait_result
)
3454 thread_t self
= current_thread();
3455 uthread_t ut
= (uthread_t
)get_bsdthread_info(self
);
3456 struct _kqueue_scan
* cont_args
= &ut
->uu_kevent
.ss_kqueue_scan
;
3457 struct kqueue
*kq
= (struct kqueue
*)data
;
3458 struct filt_process_s
*process_data
= cont_args
->process_data
;
3462 /* convert the (previous) wait_result to a proper error */
3463 switch (wait_result
) {
3464 case THREAD_AWAKENED
: {
3467 error
= kqueue_process(kq
, cont_args
->call
, cont_args
->data
,
3468 process_data
, cont_args
->servicer_qos_index
,
3469 &count
, current_proc());
3470 if (error
== 0 && count
== 0) {
3471 if (kq
->kq_state
& KQ_WAKEUP
)
3473 waitq_assert_wait64((struct waitq
*)&kq
->kq_wqs
,
3474 KQ_EVENT
, THREAD_ABORTSAFE
,
3475 cont_args
->deadline
);
3476 kq
->kq_state
|= KQ_SLEEP
;
3478 thread_block_parameter(kqueue_scan_continue
, kq
);
3483 case THREAD_TIMED_OUT
:
3484 error
= EWOULDBLOCK
;
3486 case THREAD_INTERRUPTED
:
3489 case THREAD_RESTART
:
3493 panic("%s: - invalid wait_result (%d)", __func__
,
3498 /* call the continuation with the results */
3499 assert(cont_args
->cont
!= NULL
);
3500 (cont_args
->cont
)(kq
, cont_args
->data
, error
);
3505 * kqueue_scan - scan and wait for events in a kqueue
3507 * Process the triggered events in a kqueue.
3509 * If there are no events triggered arrange to
3510 * wait for them. If the caller provided a
3511 * continuation routine, then kevent_scan will
3514 * The callback routine must be valid.
3515 * The caller must hold a use-count reference on the kq.
3519 kqueue_scan(struct kqueue
*kq
,
3520 kevent_callback_t callback
,
3521 kqueue_continue_t continuation
,
3522 void *callback_data
,
3523 struct filt_process_s
*process_data
,
3524 struct timeval
*atvp
,
3527 thread_continue_t cont
= THREAD_CONTINUE_NULL
;
3528 kq_index_t servicer_qos_index
;
3535 assert(callback
!= NULL
);
3538 * Determine which QoS index we are servicing
3540 flags
= (process_data
) ? process_data
->fp_flags
: 0;
3541 fd
= (process_data
) ? process_data
->fp_fd
: -1;
3542 servicer_qos_index
= (kq
->kq_state
& KQ_WORKQ
) ?
3543 qos_index_for_servicer(fd
, current_thread(), flags
) :
3548 wait_result_t wait_result
;
3552 * Make a pass through the kq to find events already
3556 error
= kqueue_process(kq
, callback
, callback_data
,
3557 process_data
, servicer_qos_index
,
3560 break; /* lock still held */
3562 /* looks like we have to consider blocking */
3565 /* convert the timeout to a deadline once */
3566 if (atvp
->tv_sec
|| atvp
->tv_usec
) {
3569 clock_get_uptime(&now
);
3570 nanoseconds_to_absolutetime((uint64_t)atvp
->tv_sec
* NSEC_PER_SEC
+
3571 atvp
->tv_usec
* (long)NSEC_PER_USEC
,
3573 if (now
>= deadline
) {
3574 /* non-blocking call */
3575 error
= EWOULDBLOCK
;
3576 break; /* lock still held */
3579 clock_absolutetime_interval_to_deadline(deadline
, &deadline
);
3581 deadline
= 0; /* block forever */
3585 uthread_t ut
= (uthread_t
)get_bsdthread_info(current_thread());
3586 struct _kqueue_scan
*cont_args
= &ut
->uu_kevent
.ss_kqueue_scan
;
3588 cont_args
->call
= callback
;
3589 cont_args
->cont
= continuation
;
3590 cont_args
->deadline
= deadline
;
3591 cont_args
->data
= callback_data
;
3592 cont_args
->process_data
= process_data
;
3593 cont_args
->servicer_qos_index
= servicer_qos_index
;
3594 cont
= kqueue_scan_continue
;
3598 /* If awakened during processing, try again */
3599 if (kq
->kq_state
& KQ_WAKEUP
) {
3604 /* go ahead and wait */
3605 waitq_assert_wait64_leeway((struct waitq
*)&kq
->kq_wqs
,
3606 KQ_EVENT
, THREAD_ABORTSAFE
,
3607 TIMEOUT_URGENCY_USER_NORMAL
,
3608 deadline
, TIMEOUT_NO_LEEWAY
);
3609 kq
->kq_state
|= KQ_SLEEP
;
3611 wait_result
= thread_block_parameter(cont
, kq
);
3612 /* NOTREACHED if (continuation != NULL) */
3614 switch (wait_result
) {
3615 case THREAD_AWAKENED
:
3617 case THREAD_TIMED_OUT
:
3619 case THREAD_INTERRUPTED
:
3621 case THREAD_RESTART
:
3624 panic("%s: - bad wait_result (%d)", __func__
,
3636 * This could be expanded to call kqueue_scan, if desired.
3640 kqueue_read(__unused
struct fileproc
*fp
,
3641 __unused
struct uio
*uio
,
3643 __unused vfs_context_t ctx
)
3650 kqueue_write(__unused
struct fileproc
*fp
,
3651 __unused
struct uio
*uio
,
3653 __unused vfs_context_t ctx
)
3660 kqueue_ioctl(__unused
struct fileproc
*fp
,
3661 __unused u_long com
,
3662 __unused caddr_t data
,
3663 __unused vfs_context_t ctx
)
3670 kqueue_select(struct fileproc
*fp
, int which
, void *wq_link_id
,
3671 __unused vfs_context_t ctx
)
3673 struct kqueue
*kq
= (struct kqueue
*)fp
->f_data
;
3674 struct kqtailq
*queue
;
3675 struct kqtailq
*suppressq
;
3684 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
3687 * If this is the first pass, link the wait queue associated with the
3688 * the kqueue onto the wait queue set for the select(). Normally we
3689 * use selrecord() for this, but it uses the wait queue within the
3690 * selinfo structure and we need to use the main one for the kqueue to
3691 * catch events from KN_STAYQUEUED sources. So we do the linkage manually.
3692 * (The select() call will unlink them when it ends).
3694 if (wq_link_id
!= NULL
) {
3695 thread_t cur_act
= current_thread();
3696 struct uthread
* ut
= get_bsdthread_info(cur_act
);
3698 kq
->kq_state
|= KQ_SEL
;
3699 waitq_link((struct waitq
*)&kq
->kq_wqs
, ut
->uu_wqset
,
3700 WAITQ_SHOULD_LOCK
, (uint64_t *)wq_link_id
);
3702 /* always consume the reserved link object */
3703 waitq_link_release(*(uint64_t *)wq_link_id
);
3704 *(uint64_t *)wq_link_id
= 0;
3707 * selprocess() is expecting that we send it back the waitq
3708 * that was just added to the thread's waitq set. In order
3709 * to not change the selrecord() API (which is exported to
3710 * kexts), we pass this value back through the
3711 * void *wq_link_id pointer we were passed. We need to use
3712 * memcpy here because the pointer may not be properly aligned
3713 * on 32-bit systems.
3715 void *wqptr
= &kq
->kq_wqs
;
3716 memcpy(wq_link_id
, (void *)&wqptr
, sizeof(void *));
3719 if (kqueue_begin_processing(kq
, QOS_INDEX_KQFILE
, 0) == -1) {
3724 queue
= kqueue_get_base_queue(kq
, QOS_INDEX_KQFILE
);
3725 if (!TAILQ_EMPTY(queue
)) {
3727 * there is something queued - but it might be a
3728 * KN_STAYACTIVE knote, which may or may not have
3729 * any events pending. Otherwise, we have to walk
3730 * the list of knotes to see, and peek at the
3731 * (non-vanished) stay-active ones to be really sure.
3733 while ((kn
= (struct knote
*)TAILQ_FIRST(queue
)) != NULL
) {
3734 if (kn
->kn_status
& KN_ACTIVE
) {
3738 assert(kn
->kn_status
& KN_STAYACTIVE
);
3743 * There were no regular events on the queue, so take
3744 * a deeper look at the stay-queued ones we suppressed.
3746 suppressq
= kqueue_get_suppressed_queue(kq
, QOS_INDEX_KQFILE
);
3747 while ((kn
= (struct knote
*)TAILQ_FIRST(suppressq
)) != NULL
) {
3750 /* If didn't vanish while suppressed - peek at it */
3751 if (kqlock2knoteuse(kq
, kn
)) {
3753 peek
= knote_fops(kn
)->f_peek(kn
);
3755 /* if it dropped while getting lock - move on */
3756 if (!knoteuse2kqlock(kq
, kn
, 0))
3761 knote_unsuppress(kn
);
3763 /* has data or it has to report a vanish */
3772 kqueue_end_processing(kq
, QOS_INDEX_KQFILE
, 0);
3782 kqueue_close(struct fileglob
*fg
, __unused vfs_context_t ctx
)
3784 struct kqfile
*kqf
= (struct kqfile
*)fg
->fg_data
;
3786 assert((kqf
->kqf_state
& KQ_WORKQ
) == 0);
3787 kqueue_dealloc(&kqf
->kqf_kqueue
);
3794 * The callers has taken a use-count reference on this kqueue and will donate it
3795 * to the kqueue we are being added to. This keeps the kqueue from closing until
3796 * that relationship is torn down.
3799 kqueue_kqfilter(__unused
struct fileproc
*fp
, struct knote
*kn
, __unused vfs_context_t ctx
)
3801 struct kqfile
*kqf
= (struct kqfile
*)kn
->kn_fp
->f_data
;
3802 struct kqueue
*kq
= &kqf
->kqf_kqueue
;
3803 struct kqueue
*parentkq
= knote_get_kq(kn
);
3805 assert((kqf
->kqf_state
& KQ_WORKQ
) == 0);
3807 if (parentkq
== kq
||
3808 kn
->kn_filter
!= EVFILT_READ
) {
3809 kn
->kn_flags
= EV_ERROR
;
3810 kn
->kn_data
= EINVAL
;
3815 * We have to avoid creating a cycle when nesting kqueues
3816 * inside another. Rather than trying to walk the whole
3817 * potential DAG of nested kqueues, we just use a simple
3818 * ceiling protocol. When a kqueue is inserted into another,
3819 * we check that the (future) parent is not already nested
3820 * into another kqueue at a lower level than the potenial
3821 * child (because it could indicate a cycle). If that test
3822 * passes, we just mark the nesting levels accordingly.
3826 if (parentkq
->kq_level
> 0 &&
3827 parentkq
->kq_level
< kq
->kq_level
)
3830 kn
->kn_flags
= EV_ERROR
;
3831 kn
->kn_data
= EINVAL
;
3834 /* set parent level appropriately */
3835 if (parentkq
->kq_level
== 0)
3836 parentkq
->kq_level
= 2;
3837 if (parentkq
->kq_level
< kq
->kq_level
+ 1)
3838 parentkq
->kq_level
= kq
->kq_level
+ 1;
3841 kn
->kn_filtid
= EVFILTID_KQREAD
;
3843 KNOTE_ATTACH(&kqf
->kqf_sel
.si_note
, kn
);
3844 /* indicate nesting in child, if needed */
3845 if (kq
->kq_level
== 0)
3848 int count
= kq
->kq_count
;
3855 * kqueue_drain - called when kq is closed
3859 kqueue_drain(struct fileproc
*fp
, __unused vfs_context_t ctx
)
3861 struct kqueue
*kq
= (struct kqueue
*)fp
->f_fglob
->fg_data
;
3863 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
3866 kq
->kq_state
|= KQ_DRAIN
;
3867 kqueue_interrupt(kq
);
3874 kqueue_stat(struct kqueue
*kq
, void *ub
, int isstat64
, proc_t p
)
3876 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
3879 if (isstat64
!= 0) {
3880 struct stat64
*sb64
= (struct stat64
*)ub
;
3882 bzero((void *)sb64
, sizeof(*sb64
));
3883 sb64
->st_size
= kq
->kq_count
;
3884 if (kq
->kq_state
& KQ_KEV_QOS
)
3885 sb64
->st_blksize
= sizeof(struct kevent_qos_s
);
3886 else if (kq
->kq_state
& KQ_KEV64
)
3887 sb64
->st_blksize
= sizeof(struct kevent64_s
);
3888 else if (IS_64BIT_PROCESS(p
))
3889 sb64
->st_blksize
= sizeof(struct user64_kevent
);
3891 sb64
->st_blksize
= sizeof(struct user32_kevent
);
3892 sb64
->st_mode
= S_IFIFO
;
3894 struct stat
*sb
= (struct stat
*)ub
;
3896 bzero((void *)sb
, sizeof(*sb
));
3897 sb
->st_size
= kq
->kq_count
;
3898 if (kq
->kq_state
& KQ_KEV_QOS
)
3899 sb
->st_blksize
= sizeof(struct kevent_qos_s
);
3900 else if (kq
->kq_state
& KQ_KEV64
)
3901 sb
->st_blksize
= sizeof(struct kevent64_s
);
3902 else if (IS_64BIT_PROCESS(p
))
3903 sb
->st_blksize
= sizeof(struct user64_kevent
);
3905 sb
->st_blksize
= sizeof(struct user32_kevent
);
3906 sb
->st_mode
= S_IFIFO
;
3914 * Interact with the pthread kext to request a servicing there.
3915 * Eventually, this will request threads at specific QoS levels.
3916 * For now, it only requests a dispatch-manager-QoS thread, and
3917 * only one-at-a-time.
3919 * - Caller holds the workq request lock
3921 * - May be called with the kqueue's wait queue set locked,
3922 * so cannot do anything that could recurse on that.
3925 kqworkq_request_thread(
3926 struct kqworkq
*kqwq
,
3927 kq_index_t qos_index
)
3929 struct kqrequest
*kqr
;
3931 assert(kqwq
->kqwq_state
& KQ_WORKQ
);
3932 assert(qos_index
< KQWQ_NQOS
);
3934 kqr
= kqworkq_get_request(kqwq
, qos_index
);
3937 * If we have already requested a thread, and it hasn't
3938 * started processing yet, there's no use hammering away
3939 * on the pthread kext.
3941 if (kqr
->kqr_state
& KQWQ_THREQUESTED
)
3944 assert(kqr
->kqr_thread
== THREAD_NULL
);
3946 /* request additional workq threads if appropriate */
3947 if (pthread_functions
!= NULL
&&
3948 pthread_functions
->workq_reqthreads
!= NULL
) {
3949 unsigned int flags
= KEVENT_FLAG_WORKQ
;
3951 /* Compute a priority based on qos_index. */
3952 struct workq_reqthreads_req_s request
= {
3953 .priority
= qos_from_qos_index(qos_index
),
3958 wqthread
= (*pthread_functions
->workq_reqthreads
)(kqwq
->kqwq_p
, 1, &request
);
3959 kqr
->kqr_state
|= KQWQ_THREQUESTED
;
3961 /* Have we been switched to the emergency/manager thread? */
3962 if (wqthread
== (thread_t
)-1) {
3963 flags
|= KEVENT_FLAG_WORKQ_MANAGER
;
3964 wqthread
= THREAD_NULL
;
3965 } else if (qos_index
== KQWQ_QOS_MANAGER
)
3966 flags
|= KEVENT_FLAG_WORKQ_MANAGER
;
3968 /* bind the thread */
3969 kqworkq_bind_thread(kqwq
, qos_index
, wqthread
, flags
);
3974 * If we aren't already busy processing events [for this QoS],
3975 * request workq thread support as appropriate.
3977 * TBD - for now, we don't segregate out processing by QoS.
3979 * - May be called with the kqueue's wait queue set locked,
3980 * so cannot do anything that could recurse on that.
3983 kqworkq_request_help(
3984 struct kqworkq
*kqwq
,
3985 kq_index_t qos_index
,
3988 struct kqrequest
*kqr
;
3990 /* convert to thread qos value */
3991 assert(qos_index
< KQWQ_NQOS
);
3993 kqwq_req_lock(kqwq
);
3994 kqr
= kqworkq_get_request(kqwq
, qos_index
);
3997 * If someone is processing the queue, just mark what type
3998 * of attempt this was (from a kq wakeup or from a waitq hook).
3999 * They'll be noticed at the end of servicing and a new thread
4000 * will be requested at that point.
4002 if (kqr
->kqr_state
& KQWQ_PROCESSING
) {
4003 kqr
->kqr_state
|= type
;
4004 kqwq_req_unlock(kqwq
);
4008 kqworkq_request_thread(kqwq
, qos_index
);
4009 kqwq_req_unlock(kqwq
);
4013 * These arrays described the low and high qindexes for a given qos_index.
4014 * The values come from the chart in <sys/eventvar.h> (must stay in sync).
4016 static kq_index_t _kq_base_index
[KQWQ_NQOS
] = {0, 0, 6, 11, 15, 18, 20, 21};
4017 static kq_index_t _kq_high_index
[KQWQ_NQOS
] = {0, 5, 10, 14, 17, 19, 20, 21};
4019 static struct kqtailq
*
4020 kqueue_get_base_queue(struct kqueue
*kq
, kq_index_t qos_index
)
4022 assert(qos_index
< KQWQ_NQOS
);
4023 return &kq
->kq_queue
[_kq_base_index
[qos_index
]];
4026 static struct kqtailq
*
4027 kqueue_get_high_queue(struct kqueue
*kq
, kq_index_t qos_index
)
4029 assert(qos_index
< KQWQ_NQOS
);
4030 return &kq
->kq_queue
[_kq_high_index
[qos_index
]];
4034 kqueue_queue_empty(struct kqueue
*kq
, kq_index_t qos_index
)
4036 struct kqtailq
*base_queue
= kqueue_get_base_queue(kq
, qos_index
);
4037 struct kqtailq
*queue
= kqueue_get_high_queue(kq
, qos_index
);
4040 if (!TAILQ_EMPTY(queue
))
4042 } while (queue
-- > base_queue
);
4046 static struct kqtailq
*
4047 kqueue_get_suppressed_queue(struct kqueue
*kq
, kq_index_t qos_index
)
4049 if (kq
->kq_state
& KQ_WORKQ
) {
4050 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
4051 struct kqrequest
*kqr
;
4053 kqr
= kqworkq_get_request(kqwq
, qos_index
);
4054 return &kqr
->kqr_suppressed
;
4056 struct kqfile
*kqf
= (struct kqfile
*)kq
;
4057 return &kqf
->kqf_suppressed
;
4062 knote_get_queue_index(struct knote
*kn
)
4064 kq_index_t override_index
= knote_get_qos_override_index(kn
);
4065 kq_index_t qos_index
= knote_get_qos_index(kn
);
4066 struct kqueue
*kq
= knote_get_kq(kn
);
4069 if ((kq
->kq_state
& KQ_WORKQ
) == 0) {
4070 assert(qos_index
== 0);
4071 assert(override_index
== 0);
4073 res
= _kq_base_index
[qos_index
];
4074 if (override_index
> qos_index
)
4075 res
+= override_index
- qos_index
;
4077 assert(res
<= _kq_high_index
[qos_index
]);
4081 static struct kqtailq
*
4082 knote_get_queue(struct knote
*kn
)
4084 kq_index_t qindex
= knote_get_queue_index(kn
);
4086 return &(knote_get_kq(kn
))->kq_queue
[qindex
];
4089 static struct kqtailq
*
4090 knote_get_suppressed_queue(struct knote
*kn
)
4092 kq_index_t qos_index
= knote_get_qos_index(kn
);
4093 struct kqueue
*kq
= knote_get_kq(kn
);
4095 return kqueue_get_suppressed_queue(kq
, qos_index
);
4099 knote_get_req_index(struct knote
*kn
)
4101 return kn
->kn_req_index
;
4105 knote_get_qos_index(struct knote
*kn
)
4107 return kn
->kn_qos_index
;
4111 knote_set_qos_index(struct knote
*kn
, kq_index_t qos_index
)
4113 struct kqueue
*kq
= knote_get_kq(kn
);
4115 assert(qos_index
< KQWQ_NQOS
);
4116 assert((kn
->kn_status
& KN_QUEUED
) == 0);
4118 if (kq
->kq_state
& KQ_WORKQ
)
4119 assert(qos_index
> QOS_INDEX_KQFILE
);
4121 assert(qos_index
== QOS_INDEX_KQFILE
);
4123 /* always set requested */
4124 kn
->kn_req_index
= qos_index
;
4126 /* only adjust in-use qos index when not suppressed */
4127 if ((kn
->kn_status
& KN_SUPPRESSED
) == 0)
4128 kn
->kn_qos_index
= qos_index
;
4132 knote_get_qos_override_index(struct knote
*kn
)
4134 return kn
->kn_qos_override
;
4138 knote_set_qos_override_index(struct knote
*kn
, kq_index_t override_index
)
4140 struct kqueue
*kq
= knote_get_kq(kn
);
4141 kq_index_t qos_index
= knote_get_qos_index(kn
);
4143 assert((kn
->kn_status
& KN_QUEUED
) == 0);
4145 if (override_index
== KQWQ_QOS_MANAGER
)
4146 assert(qos_index
== KQWQ_QOS_MANAGER
);
4148 assert(override_index
< KQWQ_QOS_MANAGER
);
4150 kn
->kn_qos_override
= override_index
;
4153 * If this is a workq kqueue, apply the override to the
4154 * workq servicing thread.
4156 if (kq
->kq_state
& KQ_WORKQ
) {
4157 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
4159 assert(qos_index
> QOS_INDEX_KQFILE
);
4160 kqworkq_update_override(kqwq
, qos_index
, override_index
);
4165 kqworkq_update_override(struct kqworkq
*kqwq
, kq_index_t qos_index
, kq_index_t override_index
)
4167 struct kqrequest
*kqr
;
4168 kq_index_t new_delta
;
4169 kq_index_t old_delta
;
4171 new_delta
= (override_index
> qos_index
) ?
4172 override_index
- qos_index
: 0;
4174 kqr
= kqworkq_get_request(kqwq
, qos_index
);
4176 kqwq_req_lock(kqwq
);
4177 old_delta
= kqr
->kqr_override_delta
;
4179 if (new_delta
> old_delta
) {
4180 thread_t wqthread
= kqr
->kqr_thread
;
4182 /* store the new override delta */
4183 kqr
->kqr_override_delta
= new_delta
;
4185 /* apply the override to [incoming?] servicing thread */
4187 /* only apply if non-manager */
4188 if ((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0) {
4190 thread_update_ipc_override(wqthread
, override_index
);
4192 thread_add_ipc_override(wqthread
, override_index
);
4196 kqwq_req_unlock(kqwq
);
4199 /* called with the kqworkq lock held */
4201 kqworkq_bind_thread(
4202 struct kqworkq
*kqwq
,
4203 kq_index_t qos_index
,
4207 struct kqrequest
*kqr
= kqworkq_get_request(kqwq
, qos_index
);
4208 thread_t old_thread
= kqr
->kqr_thread
;
4211 assert(kqr
->kqr_state
& KQWQ_THREQUESTED
);
4213 /* If no identity yet, just set flags as needed */
4214 if (thread
== THREAD_NULL
) {
4215 assert(old_thread
== THREAD_NULL
);
4217 /* emergency or unindetified */
4218 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
) {
4219 assert((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0);
4220 kqr
->kqr_state
|= KQWQ_THMANAGER
;
4225 /* Known thread identity */
4226 ut
= get_bsdthread_info(thread
);
4229 * If this is a manager, and the manager request bit is
4230 * not set, assure no other thread is bound. If the bit
4231 * is set, make sure the old thread is us (or not set).
4233 if (flags
& KEVENT_FLAG_WORKQ_MANAGER
) {
4234 if ((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0) {
4235 assert(old_thread
== THREAD_NULL
);
4236 kqr
->kqr_state
|= KQWQ_THMANAGER
;
4237 } else if (old_thread
== THREAD_NULL
) {
4238 kqr
->kqr_thread
= thread
;
4239 ut
->uu_kqueue_bound
= KQWQ_QOS_MANAGER
;
4240 ut
->uu_kqueue_flags
= (KEVENT_FLAG_WORKQ
|
4241 KEVENT_FLAG_WORKQ_MANAGER
);
4243 assert(thread
== old_thread
);
4244 assert(ut
->uu_kqueue_bound
== KQWQ_QOS_MANAGER
);
4245 assert(ut
->uu_kqueue_flags
& KEVENT_FLAG_WORKQ_MANAGER
);
4250 /* Just a normal one-queue servicing thread */
4251 assert(old_thread
== THREAD_NULL
);
4252 assert((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0);
4254 kqr
->kqr_thread
= thread
;
4256 /* apply an ipc QoS override if one is needed */
4257 if (kqr
->kqr_override_delta
)
4258 thread_add_ipc_override(thread
, qos_index
+ kqr
->kqr_override_delta
);
4260 /* indicate that we are processing in the uthread */
4261 ut
->uu_kqueue_bound
= qos_index
;
4262 ut
->uu_kqueue_flags
= flags
;
4265 /* called with the kqworkq lock held */
4267 kqworkq_unbind_thread(
4268 struct kqworkq
*kqwq
,
4269 kq_index_t qos_index
,
4271 __unused
unsigned int flags
)
4273 struct kqrequest
*kqr
= kqworkq_get_request(kqwq
, qos_index
);
4274 kq_index_t override
= 0;
4276 assert(thread
== current_thread());
4279 * If there is an override, drop it from the current thread
4280 * and then we are free to recompute (a potentially lower)
4281 * minimum override to apply to the next thread request.
4283 if (kqr
->kqr_override_delta
) {
4284 struct kqtailq
*base_queue
= kqueue_get_base_queue(&kqwq
->kqwq_kqueue
, qos_index
);
4285 struct kqtailq
*queue
= kqueue_get_high_queue(&kqwq
->kqwq_kqueue
, qos_index
);
4287 /* if not bound to a manager thread, drop the current ipc override */
4288 if ((kqr
->kqr_state
& KQWQ_THMANAGER
) == 0) {
4289 assert(thread
== kqr
->kqr_thread
);
4290 thread_drop_ipc_override(thread
);
4293 /* recompute the new override */
4295 if (!TAILQ_EMPTY(queue
)) {
4296 override
= queue
- base_queue
;
4299 } while (queue
-- > base_queue
);
4302 /* unbind the thread and apply the new override */
4303 kqr
->kqr_thread
= THREAD_NULL
;
4304 kqr
->kqr_override_delta
= override
;
4308 kqworkq_get_request(struct kqworkq
*kqwq
, kq_index_t qos_index
)
4310 assert(qos_index
< KQWQ_NQOS
);
4311 return &kqwq
->kqwq_request
[qos_index
];
4315 knote_adjust_qos(struct knote
*kn
, qos_t new_qos
, qos_t new_override
)
4317 if (knote_get_kq(kn
)->kq_state
& KQ_WORKQ
) {
4318 kq_index_t new_qos_index
;
4319 kq_index_t new_override_index
;
4320 kq_index_t servicer_qos_index
;
4322 new_qos_index
= qos_index_from_qos(new_qos
, FALSE
);
4323 new_override_index
= qos_index_from_qos(new_override
, TRUE
);
4325 /* make sure the servicer qos acts as a floor */
4326 servicer_qos_index
= qos_index_from_qos(kn
->kn_qos
, FALSE
);
4327 if (servicer_qos_index
> new_qos_index
)
4328 new_qos_index
= servicer_qos_index
;
4329 if (servicer_qos_index
> new_override_index
)
4330 new_override_index
= servicer_qos_index
;
4332 kqlock(knote_get_kq(kn
));
4333 if (new_qos_index
!= knote_get_req_index(kn
) ||
4334 new_override_index
!= knote_get_qos_override_index(kn
)) {
4335 if (kn
->kn_status
& KN_QUEUED
) {
4337 knote_set_qos_index(kn
, new_qos_index
);
4338 knote_set_qos_override_index(kn
, new_override_index
);
4342 knote_set_qos_index(kn
, new_qos_index
);
4343 knote_set_qos_override_index(kn
, new_override_index
);
4346 kqunlock(knote_get_kq(kn
));
4351 knote_wakeup(struct knote
*kn
)
4353 struct kqueue
*kq
= knote_get_kq(kn
);
4355 if (kq
->kq_state
& KQ_WORKQ
) {
4356 /* request a servicing thread */
4357 struct kqworkq
*kqwq
= (struct kqworkq
*)kq
;
4358 kq_index_t qos_index
= knote_get_qos_index(kn
);
4360 kqworkq_request_help(kqwq
, qos_index
, KQWQ_WAKEUP
);
4363 struct kqfile
*kqf
= (struct kqfile
*)kq
;
4365 /* flag wakeups during processing */
4366 if (kq
->kq_state
& KQ_PROCESSING
)
4367 kq
->kq_state
|= KQ_WAKEUP
;
4369 /* wakeup a thread waiting on this queue */
4370 if (kq
->kq_state
& (KQ_SLEEP
| KQ_SEL
)) {
4371 kq
->kq_state
&= ~(KQ_SLEEP
| KQ_SEL
);
4372 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
4375 WAITQ_ALL_PRIORITIES
);
4378 /* wakeup other kqueues/select sets we're inside */
4379 KNOTE(&kqf
->kqf_sel
.si_note
, 0);
4384 * Called with the kqueue locked
4387 kqueue_interrupt(struct kqueue
*kq
)
4389 assert((kq
->kq_state
& KQ_WORKQ
) == 0);
4391 /* wakeup sleeping threads */
4392 if ((kq
->kq_state
& (KQ_SLEEP
| KQ_SEL
)) != 0) {
4393 kq
->kq_state
&= ~(KQ_SLEEP
| KQ_SEL
);
4394 (void)waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
4397 WAITQ_ALL_PRIORITIES
);
4400 /* wakeup threads waiting their turn to process */
4401 if (kq
->kq_state
& KQ_PROCWAIT
) {
4402 struct kqtailq
*suppressq
;
4404 assert(kq
->kq_state
& KQ_PROCESSING
);
4406 kq
->kq_state
&= ~KQ_PROCWAIT
;
4407 suppressq
= kqueue_get_suppressed_queue(kq
, QOS_INDEX_KQFILE
);
4408 (void)waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
4409 CAST_EVENT64_T(suppressq
),
4411 WAITQ_ALL_PRIORITIES
);
4416 * Called back from waitq code when no threads waiting and the hook was set.
4418 * Interrupts are likely disabled and spin locks are held - minimal work
4419 * can be done in this context!!!
4421 * JMM - in the future, this will try to determine which knotes match the
4422 * wait queue wakeup and apply these wakeups against those knotes themselves.
4423 * For now, all the events dispatched this way are dispatch-manager handled,
4424 * so hard-code that for now.
4427 waitq_set__CALLING_PREPOST_HOOK__(void *kq_hook
, void *knote_hook
, int qos
)
4429 #pragma unused(knote_hook, qos)
4431 struct kqworkq
*kqwq
= (struct kqworkq
*)kq_hook
;
4433 assert(kqwq
->kqwq_state
& KQ_WORKQ
);
4434 kqworkq_request_help(kqwq
, KQWQ_QOS_MANAGER
, KQWQ_HOOKCALLED
);
4438 klist_init(struct klist
*list
)
4445 * Query/Post each knote in the object's list
4447 * The object lock protects the list. It is assumed
4448 * that the filter/event routine for the object can
4449 * determine that the object is already locked (via
4450 * the hint) and not deadlock itself.
4452 * The object lock should also hold off pending
4453 * detach/drop operations. But we'll prevent it here
4454 * too (by taking a use reference) - just in case.
4457 knote(struct klist
*list
, long hint
)
4461 SLIST_FOREACH(kn
, list
, kn_selnext
) {
4462 struct kqueue
*kq
= knote_get_kq(kn
);
4466 /* If we can get a use reference - deliver event */
4467 if (kqlock2knoteuse(kq
, kn
)) {
4470 /* call the event with only a use count */
4471 result
= knote_fops(kn
)->f_event(kn
, hint
);
4473 /* if its not going away and triggered */
4474 if (knoteuse2kqlock(kq
, kn
, 0) && result
)
4483 * attach a knote to the specified list. Return true if this is the first entry.
4484 * The list is protected by whatever lock the object it is associated with uses.
4487 knote_attach(struct klist
*list
, struct knote
*kn
)
4489 int ret
= SLIST_EMPTY(list
);
4490 SLIST_INSERT_HEAD(list
, kn
, kn_selnext
);
4495 * detach a knote from the specified list. Return true if that was the last entry.
4496 * The list is protected by whatever lock the object it is associated with uses.
4499 knote_detach(struct klist
*list
, struct knote
*kn
)
4501 SLIST_REMOVE(list
, kn
, knote
, kn_selnext
);
4502 return (SLIST_EMPTY(list
));
4506 * knote_vanish - Indicate that the source has vanished
4508 * If the knote has requested EV_VANISHED delivery,
4509 * arrange for that. Otherwise, deliver a NOTE_REVOKE
4510 * event for backward compatibility.
4512 * The knote is marked as having vanished, but is not
4513 * actually detached from the source in this instance.
4514 * The actual detach is deferred until the knote drop.
4516 * Our caller already has the object lock held. Calling
4517 * the detach routine would try to take that lock
4518 * recursively - which likely is not supported.
4521 knote_vanish(struct klist
*list
)
4524 struct knote
*kn_next
;
4526 SLIST_FOREACH_SAFE(kn
, list
, kn_selnext
, kn_next
) {
4527 struct kqueue
*kq
= knote_get_kq(kn
);
4531 if ((kn
->kn_status
& KN_DROPPING
) == 0) {
4533 /* If EV_VANISH supported - prepare to deliver one */
4534 if (kn
->kn_status
& KN_REQVANISH
) {
4535 kn
->kn_status
|= KN_VANISHED
;
4538 } else if (kqlock2knoteuse(kq
, kn
)) {
4539 /* call the event with only a use count */
4540 result
= knote_fops(kn
)->f_event(kn
, NOTE_REVOKE
);
4542 /* if its not going away and triggered */
4543 if (knoteuse2kqlock(kq
, kn
, 0) && result
)
4545 /* lock held again */
4553 * For a given knote, link a provided wait queue directly with the kqueue.
4554 * Wakeups will happen via recursive wait queue support. But nothing will move
4555 * the knote to the active list at wakeup (nothing calls knote()). Instead,
4556 * we permanently enqueue them here.
4558 * kqueue and knote references are held by caller.
4559 * waitq locked by caller.
4561 * caller provides the wait queue link structure.
4564 knote_link_waitq(struct knote
*kn
, struct waitq
*wq
, uint64_t *reserved_link
)
4566 struct kqueue
*kq
= knote_get_kq(kn
);
4569 kr
= waitq_link(wq
, &kq
->kq_wqs
, WAITQ_ALREADY_LOCKED
, reserved_link
);
4570 if (kr
== KERN_SUCCESS
) {
4571 knote_markstayactive(kn
);
4579 * Unlink the provided wait queue from the kqueue associated with a knote.
4580 * Also remove it from the magic list of directly attached knotes.
4582 * Note that the unlink may have already happened from the other side, so
4583 * ignore any failures to unlink and just remove it from the kqueue list.
4585 * On success, caller is responsible for the link structure
4588 knote_unlink_waitq(struct knote
*kn
, struct waitq
*wq
)
4590 struct kqueue
*kq
= knote_get_kq(kn
);
4593 kr
= waitq_unlink(wq
, &kq
->kq_wqs
);
4594 knote_clearstayactive(kn
);
4595 return ((kr
!= KERN_SUCCESS
) ? EINVAL
: 0);
4599 * remove all knotes referencing a specified fd
4601 * Essentially an inlined knote_remove & knote_drop
4602 * when we know for sure that the thing is a file
4604 * Entered with the proc_fd lock already held.
4605 * It returns the same way, but may drop it temporarily.
4608 knote_fdclose(struct proc
*p
, int fd
, int force
)
4614 list
= &p
->p_fd
->fd_knlist
[fd
];
4615 SLIST_FOREACH(kn
, list
, kn_link
) {
4616 struct kqueue
*kq
= knote_get_kq(kn
);
4621 panic("%s: proc mismatch (kq->kq_p=%p != p=%p)",
4622 __func__
, kq
->kq_p
, p
);
4625 * If the knote supports EV_VANISHED delivery,
4626 * transition it to vanished mode (or skip over
4627 * it if already vanished).
4629 if (!force
&& (kn
->kn_status
& KN_REQVANISH
)) {
4631 if ((kn
->kn_status
& KN_VANISHED
) == 0) {
4634 /* get detach reference (also marks vanished) */
4635 if (kqlock2knotedetach(kq
, kn
)) {
4637 /* detach knote and drop fp use reference */
4638 knote_fops(kn
)->f_detach(kn
);
4639 if (knote_fops(kn
)->f_isfd
)
4640 fp_drop(p
, kn
->kn_id
, kn
->kn_fp
, 0);
4642 /* activate it if it's still in existence */
4643 if (knoteuse2kqlock(kq
, kn
, 0)) {
4659 * Convert the kq lock to a drop ref.
4660 * If we get it, go ahead and drop it.
4661 * Otherwise, we waited for the blocking
4662 * condition to complete. Either way,
4663 * we dropped the fdlock so start over.
4665 if (kqlock2knotedrop(kq
, kn
)) {
4675 * knote_fdadd - Add knote to the fd table for process
4677 * All file-based filters associate a list of knotes by file
4678 * descriptor index. All other filters hash the knote by ident.
4680 * May have to grow the table of knote lists to cover the
4681 * file descriptor index presented.
4683 * proc_fdlock held on entry (and exit)
4686 knote_fdadd(struct knote
*kn
, struct proc
*p
)
4688 struct filedesc
*fdp
= p
->p_fd
;
4689 struct klist
*list
= NULL
;
4691 if (! knote_fops(kn
)->f_isfd
) {
4692 if (fdp
->fd_knhashmask
== 0)
4693 fdp
->fd_knhash
= hashinit(CONFIG_KN_HASHSIZE
, M_KQUEUE
,
4694 &fdp
->fd_knhashmask
);
4695 list
= &fdp
->fd_knhash
[KN_HASH(kn
->kn_id
, fdp
->fd_knhashmask
)];
4697 if ((u_int
)fdp
->fd_knlistsize
<= kn
->kn_id
) {
4700 if (kn
->kn_id
>= (uint64_t)p
->p_rlimit
[RLIMIT_NOFILE
].rlim_cur
4701 || kn
->kn_id
>= (uint64_t)maxfiles
)
4704 /* have to grow the fd_knlist */
4705 size
= fdp
->fd_knlistsize
;
4706 while (size
<= kn
->kn_id
)
4709 if (size
>= (UINT_MAX
/sizeof(struct klist
*)))
4712 MALLOC(list
, struct klist
*,
4713 size
* sizeof(struct klist
*), M_KQUEUE
, M_WAITOK
);
4717 bcopy((caddr_t
)fdp
->fd_knlist
, (caddr_t
)list
,
4718 fdp
->fd_knlistsize
* sizeof(struct klist
*));
4719 bzero((caddr_t
)list
+
4720 fdp
->fd_knlistsize
* sizeof(struct klist
*),
4721 (size
- fdp
->fd_knlistsize
) * sizeof(struct klist
*));
4722 FREE(fdp
->fd_knlist
, M_KQUEUE
);
4723 fdp
->fd_knlist
= list
;
4724 fdp
->fd_knlistsize
= size
;
4726 list
= &fdp
->fd_knlist
[kn
->kn_id
];
4728 SLIST_INSERT_HEAD(list
, kn
, kn_link
);
4733 * knote_fdremove - remove a knote from the fd table for process
4735 * If the filter is file-based, remove based on fd index.
4736 * Otherwise remove from the hash based on the ident.
4738 * proc_fdlock held on entry (and exit)
4741 knote_fdremove(struct knote
*kn
, struct proc
*p
)
4743 struct filedesc
*fdp
= p
->p_fd
;
4744 struct klist
*list
= NULL
;
4746 if (knote_fops(kn
)->f_isfd
) {
4747 assert ((u_int
)fdp
->fd_knlistsize
> kn
->kn_id
);
4748 list
= &fdp
->fd_knlist
[kn
->kn_id
];
4750 list
= &fdp
->fd_knhash
[KN_HASH(kn
->kn_id
, fdp
->fd_knhashmask
)];
4752 SLIST_REMOVE(list
, kn
, knote
, kn_link
);
4756 * knote_fdfind - lookup a knote in the fd table for process
4758 * If the filter is file-based, lookup based on fd index.
4759 * Otherwise use a hash based on the ident.
4761 * Matching is based on kq, filter, and ident. Optionally,
4762 * it may also be based on the udata field in the kevent -
4763 * allowing multiple event registration for the file object
4766 * proc_fdlock held on entry (and exit)
4768 static struct knote
*
4769 knote_fdfind(struct kqueue
*kq
,
4770 struct kevent_internal_s
*kev
,
4773 struct filedesc
*fdp
= p
->p_fd
;
4774 struct klist
*list
= NULL
;
4775 struct knote
*kn
= NULL
;
4776 struct filterops
*fops
;
4778 fops
= sysfilt_ops
[~kev
->filter
]; /* to 0-base index */
4781 * determine where to look for the knote
4784 /* fd-based knotes are linked off the fd table */
4785 if (kev
->ident
< (u_int
)fdp
->fd_knlistsize
) {
4786 list
= &fdp
->fd_knlist
[kev
->ident
];
4788 } else if (fdp
->fd_knhashmask
!= 0) {
4789 /* hash non-fd knotes here too */
4790 list
= &fdp
->fd_knhash
[KN_HASH((u_long
)kev
->ident
, fdp
->fd_knhashmask
)];
4794 * scan the selected list looking for a match
4797 SLIST_FOREACH(kn
, list
, kn_link
) {
4798 if (kq
== knote_get_kq(kn
) &&
4799 kev
->ident
== kn
->kn_id
&&
4800 kev
->filter
== kn
->kn_filter
) {
4801 if (kev
->flags
& EV_UDATA_SPECIFIC
) {
4802 if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) &&
4803 kev
->udata
== kn
->kn_udata
) {
4804 break; /* matching udata-specific knote */
4806 } else if ((kn
->kn_status
& KN_UDATA_SPECIFIC
) == 0) {
4807 break; /* matching non-udata-specific knote */
4816 * knote_drop - disconnect and drop the knote
4818 * Called with the kqueue unlocked and holding a
4819 * "drop reference" on the knote in question.
4820 * This reference is most often aquired thru a call
4821 * to kqlock2knotedrop(). But it can also be acquired
4822 * through stealing a drop reference via a call to
4823 * knoteuse2knotedrop() or during the initial attach
4826 * The knote may have already been detached from
4827 * (or not yet attached to) its source object.
4829 * should be called at spl == 0, since we don't want to hold spl
4830 * while calling fdrop and free.
4833 knote_drop(struct knote
*kn
, __unused
struct proc
*ctxp
)
4835 struct kqueue
*kq
= knote_get_kq(kn
);
4836 struct proc
*p
= kq
->kq_p
;
4839 /* We have to have a dropping reference on the knote */
4840 assert(kn
->kn_status
& KN_DROPPING
);
4842 /* If we are attached, disconnect from the source first */
4843 if (kn
->kn_status
& KN_ATTACHED
) {
4844 knote_fops(kn
)->f_detach(kn
);
4849 /* Remove the source from the appropriate hash */
4850 knote_fdremove(kn
, p
);
4852 /* trade fdlock for kq lock */
4856 /* determine if anyone needs to know about the drop */
4857 assert((kn
->kn_status
& (KN_SUPPRESSED
| KN_QUEUED
)) == 0);
4858 needswakeup
= (kn
->kn_status
& KN_USEWAIT
);
4862 waitq_wakeup64_all((struct waitq
*)&kq
->kq_wqs
,
4863 CAST_EVENT64_T(&kn
->kn_status
),
4865 WAITQ_ALL_PRIORITIES
);
4867 if (knote_fops(kn
)->f_isfd
&& ((kn
->kn_status
& KN_VANISHED
) == 0))
4868 fp_drop(p
, kn
->kn_id
, kn
->kn_fp
, 0);
4873 /* called with kqueue lock held */
4875 knote_activate(struct knote
*kn
)
4877 if (kn
->kn_status
& KN_ACTIVE
)
4880 kn
->kn_status
|= KN_ACTIVE
;
4881 if (knote_enqueue(kn
))
4885 /* called with kqueue lock held */
4887 knote_deactivate(struct knote
*kn
)
4889 kn
->kn_status
&= ~KN_ACTIVE
;
4890 if ((kn
->kn_status
& KN_STAYACTIVE
) == 0)
4894 /* called with kqueue lock held */
4896 knote_enable(struct knote
*kn
)
4898 if ((kn
->kn_status
& KN_DISABLED
) == 0)
4901 kn
->kn_status
&= ~KN_DISABLED
;
4902 if (knote_enqueue(kn
))
4906 /* called with kqueue lock held */
4908 knote_disable(struct knote
*kn
)
4910 if (kn
->kn_status
& KN_DISABLED
)
4913 kn
->kn_status
|= KN_DISABLED
;
4917 /* called with kqueue lock held */
4919 knote_suppress(struct knote
*kn
)
4921 struct kqtailq
*suppressq
;
4923 if (kn
->kn_status
& KN_SUPPRESSED
)
4927 kn
->kn_status
|= KN_SUPPRESSED
;
4928 suppressq
= knote_get_suppressed_queue(kn
);
4929 TAILQ_INSERT_TAIL(suppressq
, kn
, kn_tqe
);
4932 /* called with kqueue lock held */
4934 knote_unsuppress(struct knote
*kn
)
4936 struct kqtailq
*suppressq
;
4938 if ((kn
->kn_status
& KN_SUPPRESSED
) == 0)
4941 kn
->kn_status
&= ~KN_SUPPRESSED
;
4942 suppressq
= knote_get_suppressed_queue(kn
);
4943 TAILQ_REMOVE(suppressq
, kn
, kn_tqe
);
4945 /* udate in-use qos to equal requested qos */
4946 kn
->kn_qos_index
= kn
->kn_req_index
;
4948 /* don't wakeup if unsuppressing just a stay-active knote */
4949 if (knote_enqueue(kn
) &&
4950 (kn
->kn_status
& KN_ACTIVE
))
4954 /* called with kqueue lock held */
4956 knote_enqueue(struct knote
*kn
)
4958 if ((kn
->kn_status
& (KN_ACTIVE
| KN_STAYACTIVE
)) == 0 ||
4959 (kn
->kn_status
& (KN_DISABLED
| KN_SUPPRESSED
| KN_DROPPING
)))
4962 if ((kn
->kn_status
& KN_QUEUED
) == 0) {
4963 struct kqtailq
*queue
= knote_get_queue(kn
);
4964 struct kqueue
*kq
= knote_get_kq(kn
);
4966 TAILQ_INSERT_TAIL(queue
, kn
, kn_tqe
);
4967 kn
->kn_status
|= KN_QUEUED
;
4971 return ((kn
->kn_status
& KN_STAYACTIVE
) != 0);
4975 /* called with kqueue lock held */
4977 knote_dequeue(struct knote
*kn
)
4979 struct kqueue
*kq
= knote_get_kq(kn
);
4980 struct kqtailq
*queue
;
4982 if ((kn
->kn_status
& KN_QUEUED
) == 0)
4985 queue
= knote_get_queue(kn
);
4986 TAILQ_REMOVE(queue
, kn
, kn_tqe
);
4987 kn
->kn_status
&= ~KN_QUEUED
;
4994 knote_zone
= zinit(sizeof(struct knote
), 8192*sizeof(struct knote
),
4995 8192, "knote zone");
4997 kqfile_zone
= zinit(sizeof(struct kqfile
), 8192*sizeof(struct kqfile
),
4998 8192, "kqueue file zone");
5000 kqworkq_zone
= zinit(sizeof(struct kqworkq
), 8192*sizeof(struct kqworkq
),
5001 8192, "kqueue workq zone");
5003 /* allocate kq lock group attribute and group */
5004 kq_lck_grp_attr
= lck_grp_attr_alloc_init();
5006 kq_lck_grp
= lck_grp_alloc_init("kqueue", kq_lck_grp_attr
);
5008 /* Allocate kq lock attribute */
5009 kq_lck_attr
= lck_attr_alloc_init();
5011 /* Initialize the timer filter lock */
5012 lck_mtx_init(&_filt_timerlock
, kq_lck_grp
, kq_lck_attr
);
5014 /* Initialize the user filter lock */
5015 lck_spin_init(&_filt_userlock
, kq_lck_grp
, kq_lck_attr
);
5017 #if CONFIG_MEMORYSTATUS
5018 /* Initialize the memorystatus list lock */
5019 memorystatus_kevent_init(kq_lck_grp
, kq_lck_attr
);
5022 SYSINIT(knote
, SI_SUB_PSEUDO
, SI_ORDER_ANY
, knote_init
, NULL
)
5025 knote_fops(struct knote
*kn
)
5027 return sysfilt_ops
[kn
->kn_filtid
];
5030 static struct knote
*
5033 return ((struct knote
*)zalloc(knote_zone
));
5037 knote_free(struct knote
*kn
)
5039 zfree(knote_zone
, kn
);
5043 #include <sys/param.h>
5044 #include <sys/socket.h>
5045 #include <sys/protosw.h>
5046 #include <sys/domain.h>
5047 #include <sys/mbuf.h>
5048 #include <sys/kern_event.h>
5049 #include <sys/malloc.h>
5050 #include <sys/sys_domain.h>
5051 #include <sys/syslog.h>
5054 #define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t))
5058 #define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n))
5061 static lck_grp_attr_t
*kev_lck_grp_attr
;
5062 static lck_attr_t
*kev_lck_attr
;
5063 static lck_grp_t
*kev_lck_grp
;
5064 static decl_lck_rw_data(,kev_lck_data
);
5065 static lck_rw_t
*kev_rwlock
= &kev_lck_data
;
5067 static int kev_attach(struct socket
*so
, int proto
, struct proc
*p
);
5068 static int kev_detach(struct socket
*so
);
5069 static int kev_control(struct socket
*so
, u_long cmd
, caddr_t data
,
5070 struct ifnet
*ifp
, struct proc
*p
);
5071 static lck_mtx_t
* event_getlock(struct socket
*, int);
5072 static int event_lock(struct socket
*, int, void *);
5073 static int event_unlock(struct socket
*, int, void *);
5075 static int event_sofreelastref(struct socket
*);
5076 static void kev_delete(struct kern_event_pcb
*);
5078 static struct pr_usrreqs event_usrreqs
= {
5079 .pru_attach
= kev_attach
,
5080 .pru_control
= kev_control
,
5081 .pru_detach
= kev_detach
,
5082 .pru_soreceive
= soreceive
,
5085 static struct protosw eventsw
[] = {
5087 .pr_type
= SOCK_RAW
,
5088 .pr_protocol
= SYSPROTO_EVENT
,
5089 .pr_flags
= PR_ATOMIC
,
5090 .pr_usrreqs
= &event_usrreqs
,
5091 .pr_lock
= event_lock
,
5092 .pr_unlock
= event_unlock
,
5093 .pr_getlock
= event_getlock
,
5097 __private_extern__
int kevt_getstat SYSCTL_HANDLER_ARGS
;
5098 __private_extern__
int kevt_pcblist SYSCTL_HANDLER_ARGS
;
5100 SYSCTL_NODE(_net_systm
, OID_AUTO
, kevt
,
5101 CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "Kernel event family");
5103 struct kevtstat kevtstat
;
5104 SYSCTL_PROC(_net_systm_kevt
, OID_AUTO
, stats
,
5105 CTLTYPE_STRUCT
| CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 0,
5106 kevt_getstat
, "S,kevtstat", "");
5108 SYSCTL_PROC(_net_systm_kevt
, OID_AUTO
, pcblist
,
5109 CTLTYPE_STRUCT
| CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 0,
5110 kevt_pcblist
, "S,xkevtpcb", "");
5113 event_getlock(struct socket
*so
, int locktype
)
5115 #pragma unused(locktype)
5116 struct kern_event_pcb
*ev_pcb
= (struct kern_event_pcb
*)so
->so_pcb
;
5118 if (so
->so_pcb
!= NULL
) {
5119 if (so
->so_usecount
< 0)
5120 panic("%s: so=%p usecount=%d lrh= %s\n", __func__
,
5121 so
, so
->so_usecount
, solockhistory_nr(so
));
5124 panic("%s: so=%p NULL NO so_pcb %s\n", __func__
,
5125 so
, solockhistory_nr(so
));
5128 return (&ev_pcb
->evp_mtx
);
5132 event_lock(struct socket
*so
, int refcount
, void *lr
)
5137 lr_saved
= __builtin_return_address(0);
5141 if (so
->so_pcb
!= NULL
) {
5142 lck_mtx_lock(&((struct kern_event_pcb
*)so
->so_pcb
)->evp_mtx
);
5144 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__
,
5145 so
, lr_saved
, solockhistory_nr(so
));
5149 if (so
->so_usecount
< 0) {
5150 panic("%s: so=%p so_pcb=%p lr=%p ref=%d lrh= %s\n", __func__
,
5151 so
, so
->so_pcb
, lr_saved
, so
->so_usecount
,
5152 solockhistory_nr(so
));
5159 so
->lock_lr
[so
->next_lock_lr
] = lr_saved
;
5160 so
->next_lock_lr
= (so
->next_lock_lr
+1) % SO_LCKDBG_MAX
;
5165 event_unlock(struct socket
*so
, int refcount
, void *lr
)
5168 lck_mtx_t
*mutex_held
;
5171 lr_saved
= __builtin_return_address(0);
5178 if (so
->so_usecount
< 0) {
5179 panic("%s: so=%p usecount=%d lrh= %s\n", __func__
,
5180 so
, so
->so_usecount
, solockhistory_nr(so
));
5183 if (so
->so_pcb
== NULL
) {
5184 panic("%s: so=%p NO PCB usecount=%d lr=%p lrh= %s\n", __func__
,
5185 so
, so
->so_usecount
, (void *)lr_saved
,
5186 solockhistory_nr(so
));
5189 mutex_held
= (&((struct kern_event_pcb
*)so
->so_pcb
)->evp_mtx
);
5191 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
5192 so
->unlock_lr
[so
->next_unlock_lr
] = lr_saved
;
5193 so
->next_unlock_lr
= (so
->next_unlock_lr
+1) % SO_LCKDBG_MAX
;
5195 if (so
->so_usecount
== 0) {
5196 VERIFY(so
->so_flags
& SOF_PCBCLEARING
);
5197 event_sofreelastref(so
);
5199 lck_mtx_unlock(mutex_held
);
5206 event_sofreelastref(struct socket
*so
)
5208 struct kern_event_pcb
*ev_pcb
= (struct kern_event_pcb
*)so
->so_pcb
;
5210 lck_mtx_assert(&(ev_pcb
->evp_mtx
), LCK_MTX_ASSERT_OWNED
);
5215 * Disable upcall in the event another thread is in kev_post_msg()
5216 * appending record to the receive socket buffer, since sbwakeup()
5217 * may release the socket lock otherwise.
5219 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
5220 so
->so_snd
.sb_flags
&= ~SB_UPCALL
;
5221 so
->so_event
= sonullevent
;
5222 lck_mtx_unlock(&(ev_pcb
->evp_mtx
));
5224 lck_mtx_assert(&(ev_pcb
->evp_mtx
), LCK_MTX_ASSERT_NOTOWNED
);
5225 lck_rw_lock_exclusive(kev_rwlock
);
5226 LIST_REMOVE(ev_pcb
, evp_link
);
5227 kevtstat
.kes_pcbcount
--;
5228 kevtstat
.kes_gencnt
++;
5229 lck_rw_done(kev_rwlock
);
5232 sofreelastref(so
, 1);
5236 static int event_proto_count
= (sizeof (eventsw
) / sizeof (struct protosw
));
5239 struct kern_event_head kern_event_head
;
5241 static u_int32_t static_event_id
= 0;
5243 #define EVPCB_ZONE_MAX 65536
5244 #define EVPCB_ZONE_NAME "kerneventpcb"
5245 static struct zone
*ev_pcb_zone
;
5248 * Install the protosw's for the NKE manager. Invoked at extension load time
5251 kern_event_init(struct domain
*dp
)
5256 VERIFY(!(dp
->dom_flags
& DOM_INITIALIZED
));
5257 VERIFY(dp
== systemdomain
);
5259 kev_lck_grp_attr
= lck_grp_attr_alloc_init();
5260 if (kev_lck_grp_attr
== NULL
) {
5261 panic("%s: lck_grp_attr_alloc_init failed\n", __func__
);
5265 kev_lck_grp
= lck_grp_alloc_init("Kernel Event Protocol",
5267 if (kev_lck_grp
== NULL
) {
5268 panic("%s: lck_grp_alloc_init failed\n", __func__
);
5272 kev_lck_attr
= lck_attr_alloc_init();
5273 if (kev_lck_attr
== NULL
) {
5274 panic("%s: lck_attr_alloc_init failed\n", __func__
);
5278 lck_rw_init(kev_rwlock
, kev_lck_grp
, kev_lck_attr
);
5279 if (kev_rwlock
== NULL
) {
5280 panic("%s: lck_mtx_alloc_init failed\n", __func__
);
5284 for (i
= 0, pr
= &eventsw
[0]; i
< event_proto_count
; i
++, pr
++)
5285 net_add_proto(pr
, dp
, 1);
5287 ev_pcb_zone
= zinit(sizeof(struct kern_event_pcb
),
5288 EVPCB_ZONE_MAX
* sizeof(struct kern_event_pcb
), 0, EVPCB_ZONE_NAME
);
5289 if (ev_pcb_zone
== NULL
) {
5290 panic("%s: failed allocating ev_pcb_zone", __func__
);
5293 zone_change(ev_pcb_zone
, Z_EXPAND
, TRUE
);
5294 zone_change(ev_pcb_zone
, Z_CALLERACCT
, TRUE
);
5298 kev_attach(struct socket
*so
, __unused
int proto
, __unused
struct proc
*p
)
5301 struct kern_event_pcb
*ev_pcb
;
5303 error
= soreserve(so
, KEV_SNDSPACE
, KEV_RECVSPACE
);
5307 if ((ev_pcb
= (struct kern_event_pcb
*)zalloc(ev_pcb_zone
)) == NULL
) {
5310 bzero(ev_pcb
, sizeof(struct kern_event_pcb
));
5311 lck_mtx_init(&ev_pcb
->evp_mtx
, kev_lck_grp
, kev_lck_attr
);
5313 ev_pcb
->evp_socket
= so
;
5314 ev_pcb
->evp_vendor_code_filter
= 0xffffffff;
5316 so
->so_pcb
= (caddr_t
) ev_pcb
;
5317 lck_rw_lock_exclusive(kev_rwlock
);
5318 LIST_INSERT_HEAD(&kern_event_head
, ev_pcb
, evp_link
);
5319 kevtstat
.kes_pcbcount
++;
5320 kevtstat
.kes_gencnt
++;
5321 lck_rw_done(kev_rwlock
);
5327 kev_delete(struct kern_event_pcb
*ev_pcb
)
5329 VERIFY(ev_pcb
!= NULL
);
5330 lck_mtx_destroy(&ev_pcb
->evp_mtx
, kev_lck_grp
);
5331 zfree(ev_pcb_zone
, ev_pcb
);
5335 kev_detach(struct socket
*so
)
5337 struct kern_event_pcb
*ev_pcb
= (struct kern_event_pcb
*) so
->so_pcb
;
5339 if (ev_pcb
!= NULL
) {
5340 soisdisconnected(so
);
5341 so
->so_flags
|= SOF_PCBCLEARING
;
5348 * For now, kev_vendor_code and mbuf_tags use the same
5351 errno_t
kev_vendor_code_find(
5353 u_int32_t
*out_vendor_code
)
5355 if (strlen(string
) >= KEV_VENDOR_CODE_MAX_STR_LEN
) {
5358 return (net_str_id_find_internal(string
, out_vendor_code
,
5359 NSI_VENDOR_CODE
, 1));
5363 kev_msg_post(struct kev_msg
*event_msg
)
5365 mbuf_tag_id_t min_vendor
, max_vendor
;
5367 net_str_id_first_last(&min_vendor
, &max_vendor
, NSI_VENDOR_CODE
);
5369 if (event_msg
== NULL
)
5373 * Limit third parties to posting events for registered vendor codes
5376 if (event_msg
->vendor_code
< min_vendor
||
5377 event_msg
->vendor_code
> max_vendor
) {
5378 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_badvendor
);
5381 return (kev_post_msg(event_msg
));
5385 kev_post_msg(struct kev_msg
*event_msg
)
5387 struct mbuf
*m
, *m2
;
5388 struct kern_event_pcb
*ev_pcb
;
5389 struct kern_event_msg
*ev
;
5391 u_int32_t total_size
;
5394 /* Verify the message is small enough to fit in one mbuf w/o cluster */
5395 total_size
= KEV_MSG_HEADER_SIZE
;
5397 for (i
= 0; i
< 5; i
++) {
5398 if (event_msg
->dv
[i
].data_length
== 0)
5400 total_size
+= event_msg
->dv
[i
].data_length
;
5403 if (total_size
> MLEN
) {
5404 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_toobig
);
5408 m
= m_get(M_DONTWAIT
, MT_DATA
);
5410 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_nomem
);
5413 ev
= mtod(m
, struct kern_event_msg
*);
5414 total_size
= KEV_MSG_HEADER_SIZE
;
5416 tmp
= (char *) &ev
->event_data
[0];
5417 for (i
= 0; i
< 5; i
++) {
5418 if (event_msg
->dv
[i
].data_length
== 0)
5421 total_size
+= event_msg
->dv
[i
].data_length
;
5422 bcopy(event_msg
->dv
[i
].data_ptr
, tmp
,
5423 event_msg
->dv
[i
].data_length
);
5424 tmp
+= event_msg
->dv
[i
].data_length
;
5427 ev
->id
= ++static_event_id
;
5428 ev
->total_size
= total_size
;
5429 ev
->vendor_code
= event_msg
->vendor_code
;
5430 ev
->kev_class
= event_msg
->kev_class
;
5431 ev
->kev_subclass
= event_msg
->kev_subclass
;
5432 ev
->event_code
= event_msg
->event_code
;
5434 m
->m_len
= total_size
;
5435 lck_rw_lock_shared(kev_rwlock
);
5436 for (ev_pcb
= LIST_FIRST(&kern_event_head
);
5438 ev_pcb
= LIST_NEXT(ev_pcb
, evp_link
)) {
5439 lck_mtx_lock(&ev_pcb
->evp_mtx
);
5440 if (ev_pcb
->evp_socket
->so_pcb
== NULL
) {
5441 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
5444 if (ev_pcb
->evp_vendor_code_filter
!= KEV_ANY_VENDOR
) {
5445 if (ev_pcb
->evp_vendor_code_filter
!= ev
->vendor_code
) {
5446 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
5450 if (ev_pcb
->evp_class_filter
!= KEV_ANY_CLASS
) {
5451 if (ev_pcb
->evp_class_filter
!= ev
->kev_class
) {
5452 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
5456 if ((ev_pcb
->evp_subclass_filter
!=
5457 KEV_ANY_SUBCLASS
) &&
5458 (ev_pcb
->evp_subclass_filter
!=
5459 ev
->kev_subclass
)) {
5460 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
5466 m2
= m_copym(m
, 0, m
->m_len
, M_NOWAIT
);
5468 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_nomem
);
5470 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
5471 lck_rw_done(kev_rwlock
);
5474 if (sbappendrecord(&ev_pcb
->evp_socket
->so_rcv
, m2
)) {
5476 * We use "m" for the socket stats as it would be
5477 * unsafe to use "m2"
5479 so_inc_recv_data_stat(ev_pcb
->evp_socket
,
5480 1, m
->m_len
, MBUF_TC_BE
);
5482 sorwakeup(ev_pcb
->evp_socket
);
5483 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_posted
);
5485 OSIncrementAtomic64((SInt64
*)&kevtstat
.kes_fullsock
);
5487 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
5490 lck_rw_done(kev_rwlock
);
5496 kev_control(struct socket
*so
,
5499 __unused
struct ifnet
*ifp
,
5500 __unused
struct proc
*p
)
5502 struct kev_request
*kev_req
= (struct kev_request
*) data
;
5503 struct kern_event_pcb
*ev_pcb
;
5504 struct kev_vendor_code
*kev_vendor
;
5505 u_int32_t
*id_value
= (u_int32_t
*) data
;
5509 *id_value
= static_event_id
;
5512 ev_pcb
= (struct kern_event_pcb
*) so
->so_pcb
;
5513 ev_pcb
->evp_vendor_code_filter
= kev_req
->vendor_code
;
5514 ev_pcb
->evp_class_filter
= kev_req
->kev_class
;
5515 ev_pcb
->evp_subclass_filter
= kev_req
->kev_subclass
;
5518 ev_pcb
= (struct kern_event_pcb
*) so
->so_pcb
;
5519 kev_req
->vendor_code
= ev_pcb
->evp_vendor_code_filter
;
5520 kev_req
->kev_class
= ev_pcb
->evp_class_filter
;
5521 kev_req
->kev_subclass
= ev_pcb
->evp_subclass_filter
;
5523 case SIOCGKEVVENDOR
:
5524 kev_vendor
= (struct kev_vendor_code
*)data
;
5525 /* Make sure string is NULL terminated */
5526 kev_vendor
->vendor_string
[KEV_VENDOR_CODE_MAX_STR_LEN
-1] = 0;
5527 return (net_str_id_find_internal(kev_vendor
->vendor_string
,
5528 &kev_vendor
->vendor_code
, NSI_VENDOR_CODE
, 0));
5537 kevt_getstat SYSCTL_HANDLER_ARGS
5539 #pragma unused(oidp, arg1, arg2)
5542 lck_rw_lock_shared(kev_rwlock
);
5544 if (req
->newptr
!= USER_ADDR_NULL
) {
5548 if (req
->oldptr
== USER_ADDR_NULL
) {
5549 req
->oldidx
= sizeof(struct kevtstat
);
5553 error
= SYSCTL_OUT(req
, &kevtstat
,
5554 MIN(sizeof(struct kevtstat
), req
->oldlen
));
5556 lck_rw_done(kev_rwlock
);
5561 __private_extern__
int
5562 kevt_pcblist SYSCTL_HANDLER_ARGS
5564 #pragma unused(oidp, arg1, arg2)
5567 struct xsystmgen xsg
;
5569 size_t item_size
= ROUNDUP64(sizeof (struct xkevtpcb
)) +
5570 ROUNDUP64(sizeof (struct xsocket_n
)) +
5571 2 * ROUNDUP64(sizeof (struct xsockbuf_n
)) +
5572 ROUNDUP64(sizeof (struct xsockstat_n
));
5573 struct kern_event_pcb
*ev_pcb
;
5575 buf
= _MALLOC(item_size
, M_TEMP
, M_WAITOK
| M_ZERO
);
5579 lck_rw_lock_shared(kev_rwlock
);
5581 n
= kevtstat
.kes_pcbcount
;
5583 if (req
->oldptr
== USER_ADDR_NULL
) {
5584 req
->oldidx
= (n
+ n
/8) * item_size
;
5587 if (req
->newptr
!= USER_ADDR_NULL
) {
5591 bzero(&xsg
, sizeof (xsg
));
5592 xsg
.xg_len
= sizeof (xsg
);
5594 xsg
.xg_gen
= kevtstat
.kes_gencnt
;
5595 xsg
.xg_sogen
= so_gencnt
;
5596 error
= SYSCTL_OUT(req
, &xsg
, sizeof (xsg
));
5601 * We are done if there is no pcb
5608 for (i
= 0, ev_pcb
= LIST_FIRST(&kern_event_head
);
5609 i
< n
&& ev_pcb
!= NULL
;
5610 i
++, ev_pcb
= LIST_NEXT(ev_pcb
, evp_link
)) {
5611 struct xkevtpcb
*xk
= (struct xkevtpcb
*)buf
;
5612 struct xsocket_n
*xso
= (struct xsocket_n
*)
5613 ADVANCE64(xk
, sizeof (*xk
));
5614 struct xsockbuf_n
*xsbrcv
= (struct xsockbuf_n
*)
5615 ADVANCE64(xso
, sizeof (*xso
));
5616 struct xsockbuf_n
*xsbsnd
= (struct xsockbuf_n
*)
5617 ADVANCE64(xsbrcv
, sizeof (*xsbrcv
));
5618 struct xsockstat_n
*xsostats
= (struct xsockstat_n
*)
5619 ADVANCE64(xsbsnd
, sizeof (*xsbsnd
));
5621 bzero(buf
, item_size
);
5623 lck_mtx_lock(&ev_pcb
->evp_mtx
);
5625 xk
->kep_len
= sizeof(struct xkevtpcb
);
5626 xk
->kep_kind
= XSO_EVT
;
5627 xk
->kep_evtpcb
= (uint64_t)VM_KERNEL_ADDRPERM(ev_pcb
);
5628 xk
->kep_vendor_code_filter
= ev_pcb
->evp_vendor_code_filter
;
5629 xk
->kep_class_filter
= ev_pcb
->evp_class_filter
;
5630 xk
->kep_subclass_filter
= ev_pcb
->evp_subclass_filter
;
5632 sotoxsocket_n(ev_pcb
->evp_socket
, xso
);
5633 sbtoxsockbuf_n(ev_pcb
->evp_socket
?
5634 &ev_pcb
->evp_socket
->so_rcv
: NULL
, xsbrcv
);
5635 sbtoxsockbuf_n(ev_pcb
->evp_socket
?
5636 &ev_pcb
->evp_socket
->so_snd
: NULL
, xsbsnd
);
5637 sbtoxsockstat_n(ev_pcb
->evp_socket
, xsostats
);
5639 lck_mtx_unlock(&ev_pcb
->evp_mtx
);
5641 error
= SYSCTL_OUT(req
, buf
, item_size
);
5646 * Give the user an updated idea of our state.
5647 * If the generation differs from what we told
5648 * her before, she knows that something happened
5649 * while we were processing this request, and it
5650 * might be necessary to retry.
5652 bzero(&xsg
, sizeof (xsg
));
5653 xsg
.xg_len
= sizeof (xsg
);
5655 xsg
.xg_gen
= kevtstat
.kes_gencnt
;
5656 xsg
.xg_sogen
= so_gencnt
;
5657 error
= SYSCTL_OUT(req
, &xsg
, sizeof (xsg
));
5664 lck_rw_done(kev_rwlock
);
5669 #endif /* SOCKETS */
5673 fill_kqueueinfo(struct kqueue
*kq
, struct kqueue_info
* kinfo
)
5675 struct vinfo_stat
* st
;
5677 st
= &kinfo
->kq_stat
;
5679 st
->vst_size
= kq
->kq_count
;
5680 if (kq
->kq_state
& KQ_KEV_QOS
)
5681 st
->vst_blksize
= sizeof(struct kevent_qos_s
);
5682 else if (kq
->kq_state
& KQ_KEV64
)
5683 st
->vst_blksize
= sizeof(struct kevent64_s
);
5685 st
->vst_blksize
= sizeof(struct kevent
);
5686 st
->vst_mode
= S_IFIFO
;
5688 /* flags exported to libproc as PROC_KQUEUE_* (sys/proc_info.h) */
5689 #define PROC_KQUEUE_MASK (KQ_SEL|KQ_SLEEP|KQ_KEV32|KQ_KEV64|KQ_KEV_QOS)
5690 kinfo
->kq_state
= kq
->kq_state
& PROC_KQUEUE_MASK
;
5697 knote_markstayactive(struct knote
*kn
)
5699 kqlock(knote_get_kq(kn
));
5700 kn
->kn_status
|= KN_STAYACTIVE
;
5702 /* handle all stayactive knotes on the manager */
5703 if (knote_get_kq(kn
)->kq_state
& KQ_WORKQ
)
5704 knote_set_qos_index(kn
, KQWQ_QOS_MANAGER
);
5707 kqunlock(knote_get_kq(kn
));
5711 knote_clearstayactive(struct knote
*kn
)
5713 kqlock(knote_get_kq(kn
));
5714 kn
->kn_status
&= ~KN_STAYACTIVE
;
5715 knote_deactivate(kn
);
5716 kqunlock(knote_get_kq(kn
));
5719 static unsigned long
5720 kevent_extinfo_emit(struct kqueue
*kq
, struct knote
*kn
, struct kevent_extinfo
*buf
,
5721 unsigned long buflen
, unsigned long nknotes
)
5723 struct kevent_internal_s
*kevp
;
5724 for (; kn
; kn
= SLIST_NEXT(kn
, kn_link
)) {
5725 if (kq
== knote_get_kq(kn
)) {
5726 if (nknotes
< buflen
) {
5727 struct kevent_extinfo
*info
= &buf
[nknotes
];
5728 struct kevent_qos_s kevqos
;
5731 kevp
= &(kn
->kn_kevent
);
5733 bzero(&kevqos
, sizeof(kevqos
));
5734 kevqos
.ident
= kevp
->ident
;
5735 kevqos
.filter
= kevp
->filter
;
5736 kevqos
.flags
= kevp
->flags
;
5737 kevqos
.fflags
= kevp
->fflags
;
5738 kevqos
.data
= (int64_t) kevp
->data
;
5739 kevqos
.udata
= kevp
->udata
;
5740 kevqos
.ext
[0] = kevp
->ext
[0];
5741 kevqos
.ext
[1] = kevp
->ext
[1];
5743 memcpy(&info
->kqext_kev
, &kevqos
, sizeof(info
->kqext_kev
));
5744 info
->kqext_sdata
= kn
->kn_sdata
;
5745 info
->kqext_status
= kn
->kn_status
;
5746 info
->kqext_sfflags
= kn
->kn_sfflags
;
5751 /* we return total number of knotes, which may be more than requested */
5760 pid_kqueue_extinfo(proc_t p
, struct kqueue
*kq
, user_addr_t ubuf
,
5761 uint32_t bufsize
, int32_t *retval
)
5766 struct filedesc
*fdp
= p
->p_fd
;
5767 unsigned long nknotes
= 0;
5768 unsigned long buflen
= bufsize
/ sizeof(struct kevent_extinfo
);
5769 struct kevent_extinfo
*kqext
= NULL
;
5771 /* arbitrary upper limit to cap kernel memory usage, copyout size, etc. */
5772 buflen
= min(buflen
, PROC_PIDFDKQUEUE_KNOTES_MAX
);
5774 kqext
= kalloc(buflen
* sizeof(struct kevent_extinfo
));
5775 if (kqext
== NULL
) {
5779 bzero(kqext
, buflen
* sizeof(struct kevent_extinfo
));
5783 for (i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
5784 kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
]);
5785 nknotes
= kevent_extinfo_emit(kq
, kn
, kqext
, buflen
, nknotes
);
5788 if (fdp
->fd_knhashmask
!= 0) {
5789 for (i
= 0; i
< (int)fdp
->fd_knhashmask
+ 1; i
++) {
5790 kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
]);
5791 nknotes
= kevent_extinfo_emit(kq
, kn
, kqext
, buflen
, nknotes
);
5797 assert(bufsize
>= sizeof(struct kevent_extinfo
) * min(buflen
, nknotes
));
5798 err
= copyout(kqext
, ubuf
, sizeof(struct kevent_extinfo
) * min(buflen
, nknotes
));
5802 kfree(kqext
, buflen
* sizeof(struct kevent_extinfo
));
5807 *retval
= min(nknotes
, PROC_PIDFDKQUEUE_KNOTES_MAX
);
5812 static unsigned long
5813 kevent_udatainfo_emit(struct kqueue
*kq
, struct knote
*kn
, uint64_t *buf
,
5814 unsigned long buflen
, unsigned long nknotes
)
5816 struct kevent_internal_s
*kevp
;
5817 for (; kn
; kn
= SLIST_NEXT(kn
, kn_link
)) {
5818 if (kq
== knote_get_kq(kn
)) {
5819 if (nknotes
< buflen
) {
5821 kevp
= &(kn
->kn_kevent
);
5822 buf
[nknotes
] = kevp
->udata
;
5826 /* we return total number of knotes, which may be more than requested */
5835 pid_kqueue_udatainfo(proc_t p
, struct kqueue
*kq
, uint64_t *buf
,
5840 struct filedesc
*fdp
= p
->p_fd
;
5841 unsigned long nknotes
= 0;
5842 unsigned long buflen
= bufsize
/ sizeof(uint64_t);
5846 for (i
= 0; i
< fdp
->fd_knlistsize
; i
++) {
5847 kn
= SLIST_FIRST(&fdp
->fd_knlist
[i
]);
5848 nknotes
= kevent_udatainfo_emit(kq
, kn
, buf
, buflen
, nknotes
);
5851 if (fdp
->fd_knhashmask
!= 0) {
5852 for (i
= 0; i
< (int)fdp
->fd_knhashmask
+ 1; i
++) {
5853 kn
= SLIST_FIRST(&fdp
->fd_knhash
[i
]);
5854 nknotes
= kevent_udatainfo_emit(kq
, kn
, buf
, buflen
, nknotes
);
5859 return (int)nknotes
;