2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * File: ipc/ipc_pset.c
63 * Functions to manipulate IPC port sets.
66 #include <mach/port.h>
67 #include <mach/kern_return.h>
68 #include <mach/message.h>
69 #include <ipc/ipc_mqueue.h>
70 #include <ipc/ipc_object.h>
71 #include <ipc/ipc_pset.h>
72 #include <ipc/ipc_right.h>
73 #include <ipc/ipc_space.h>
74 #include <ipc/ipc_port.h>
76 #include <kern/kern_types.h>
78 #include <vm/vm_map.h>
79 #include <libkern/section_keywords.h>
82 * Routine: ipc_pset_alloc
84 * Allocate a port set.
86 * Nothing locked. If successful, the port set is returned
87 * locked. (The caller doesn't have a reference.)
89 * KERN_SUCCESS The port set is allocated.
90 * KERN_INVALID_TASK The space is dead.
91 * KERN_NO_SPACE No room for an entry in the space.
92 * KERN_RESOURCE_SHORTAGE Couldn't allocate memory.
98 mach_port_name_t
*namep
,
102 mach_port_name_t name
;
105 kr
= ipc_object_alloc(space
, IOT_PORT_SET
,
106 MACH_PORT_TYPE_PORT_SET
, 0,
107 &name
, (ipc_object_t
*) &pset
);
108 if (kr
!= KERN_SUCCESS
) {
111 /* pset and space are locked */
113 ipc_mqueue_init(&pset
->ips_messages
, IPC_MQUEUE_KIND_SET
);
114 is_write_unlock(space
);
122 * Routine: ipc_pset_alloc_name
124 * Allocate a port set, with a specific name.
126 * Nothing locked. If successful, the port set is returned
127 * locked. (The caller doesn't have a reference.)
129 * KERN_SUCCESS The port set is allocated.
130 * KERN_INVALID_TASK The space is dead.
131 * KERN_NAME_EXISTS The name already denotes a right.
132 * KERN_RESOURCE_SHORTAGE Couldn't allocate memory.
138 mach_port_name_t name
,
144 kr
= ipc_object_alloc_name(space
, IOT_PORT_SET
,
145 MACH_PORT_TYPE_PORT_SET
, 0,
146 name
, (ipc_object_t
*) &pset
);
147 if (kr
!= KERN_SUCCESS
) {
152 ipc_mqueue_init(&pset
->ips_messages
, IPC_MQUEUE_KIND_SET
);
160 * Routine: ipc_pset_alloc_special
162 * Allocate a port set in a special space.
163 * The new port set is returned with one ref.
164 * If unsuccessful, IPS_NULL is returned.
169 ipc_pset_alloc_special(
170 __assert_only ipc_space_t space
)
174 assert(space
!= IS_NULL
);
175 assert(space
->is_table
== IE_NULL
);
176 assert(!is_active(space
));
178 pset
= ips_object_to_pset(io_alloc(IOT_PORT_SET
));
179 if (pset
== IPS_NULL
) {
183 bzero((char *)pset
, sizeof(*pset
));
185 io_lock_init(ips_to_object(pset
));
186 pset
->ips_references
= 1;
187 pset
->ips_object
.io_bits
= io_makebits(TRUE
, IOT_PORT_SET
, 0);
189 ipc_mqueue_init(&pset
->ips_messages
, IPC_MQUEUE_KIND_SET
);
196 * Routine: ipc_pset_member
198 * Checks to see if a port is a member of a pset
200 * Both port and port set are locked.
201 * The port must be active.
208 require_ip_active(port
);
210 return ipc_mqueue_member(&port
->ip_messages
, &pset
->ips_messages
);
215 * Routine: ipc_pset_add
217 * Puts a port into a port set.
219 * Both port and port set are locked and active.
220 * The owner of the port set is also receiver for the port.
227 uint64_t *reserved_link
,
228 uint64_t *reserved_prepost
)
232 assert(ips_active(pset
));
233 require_ip_active(port
);
235 kr
= ipc_mqueue_add(&port
->ip_messages
, &pset
->ips_messages
,
236 reserved_link
, reserved_prepost
);
244 * Routine: ipc_pset_remove
246 * Removes a port from a port set.
247 * The port set loses a reference.
249 * Both port and port set are locked.
250 * The port must be active.
259 require_ip_active(port
);
261 if (port
->ip_in_pset
== 0) {
262 return KERN_NOT_IN_SET
;
265 kr
= ipc_mqueue_remove(&port
->ip_messages
, &pset
->ips_messages
);
271 * Routine: ipc_pset_lazy_allocate
273 * lazily initialize the wqset of a port set.
279 ipc_pset_lazy_allocate(
281 mach_port_name_t psname
)
288 kr
= ipc_right_lookup_read(space
, psname
, &entry
);
289 if (kr
!= KERN_SUCCESS
) {
293 /* space is read-locked and active */
294 if ((entry
->ie_bits
& MACH_PORT_TYPE_PORT_SET
) == 0) {
295 is_read_unlock(space
);
296 kr
= KERN_INVALID_RIGHT
;
300 psobj
= entry
->ie_object
;
301 pset
= ips_object_to_pset(psobj
);
302 assert(pset
!= NULL
);
303 ipc_mqueue_t set_mqueue
= &pset
->ips_messages
;
304 struct waitq_set
*wqset
= &set_mqueue
->imq_set_queue
;
307 is_read_unlock(space
);
310 * lazily initialize the wqset to avoid
311 * possible allocation while linking
314 waitq_set_lazy_init_link(wqset
);
321 * Routine: ipc_pset_remove_from_all
323 * Removes a port from all it's port sets.
325 * port is locked and active.
329 ipc_pset_remove_from_all(
332 if (port
->ip_in_pset
== 0) {
333 return KERN_NOT_IN_SET
;
337 * Remove the port's mqueue from all sets
339 ipc_mqueue_remove_from_all(&port
->ip_messages
);
345 * Routine: ipc_pset_destroy
347 * Destroys a port_set.
349 * The port_set is locked and alive.
350 * The caller has a reference, which is consumed.
351 * Afterwards, the port_set is unlocked and dead.
359 assert(ips_active(pset
));
361 pset
->ips_object
.io_bits
&= ~IO_BITS_ACTIVE
;
364 * remove all the member message queues
365 * AND remove this message queue from any containing sets
367 ipc_mqueue_remove_all(&pset
->ips_messages
);
370 * Set all waiters on the portset running to
371 * discover the change.
373 imq_lock(&pset
->ips_messages
);
374 ipc_mqueue_changed(space
, &pset
->ips_messages
);
375 imq_unlock(&pset
->ips_messages
);
377 ipc_mqueue_deinit(&pset
->ips_messages
);
380 ips_release(pset
); /* consume the ref our caller gave us */
384 * Kqueue EVFILT_MACHPORT support
386 * - kn_mqueue points to the monitored mqueue
388 * - (in/out) ext[0] holds a mach_vm_address_t to a userspace buffer
389 * that can be used to direct-deliver messages when
390 * MACH_RCV_MSG is set in kn_sfflags
392 * - (in/out) ext[1] holds a mach_msg_size_t representing the size
393 * of the userspace buffer held in ext[0].
395 * - (out) ext[2] is used to deliver qos information
396 * about the send queue to userspace.
398 * - (abused) ext[3] is used in kernel to hold a reference to the first port
399 * with a turnstile that participate to sync IPC override.
401 * - kn_hook is optionally a "knote" turnstile. It is used as the inheritor
402 * of turnstiles for rights copied out as part of direct message delivery
403 * when they can participate to sync IPC override.
405 * It is used to atomically neuter the sync IPC override when the knote is
410 #include <sys/event.h>
411 #include <sys/errno.h>
414 filt_machport_adjust_qos(struct knote
*kn
, ipc_kmsg_t first
)
416 if (kn
->kn_sfflags
& MACH_RCV_MSG
) {
417 int qos
= _pthread_priority_thread_qos(first
->ikm_qos_override
);
418 return FILTER_ADJUST_EVENT_QOS(qos
);
424 filt_ipc_kqueue_turnstile(struct knote
*kn
)
426 assert(kn
->kn_filter
== EVFILT_MACHPORT
|| kn
->kn_filter
== EVFILT_WORKLOOP
);
427 return kqueue_turnstile(knote_get_kq(kn
));
431 filt_machport_kqueue_has_turnstile(struct knote
*kn
)
433 assert(kn
->kn_filter
== EVFILT_MACHPORT
);
434 return ((kn
->kn_sfflags
& MACH_RCV_MSG
) || (kn
->kn_sfflags
& MACH_RCV_SYNC_PEEK
))
435 && (kn
->kn_flags
& EV_DISPATCH
);
439 * Stashes a port that participate to sync IPC override until the knote
440 * is being re-enabled.
443 * - the turnstile to use as an inheritor for the stashed port
444 * - the kind of stash that happened as PORT_SYNC_* value among:
445 * o not stashed (no sync IPC support)
446 * o stashed in the knote (in kn_ext[3])
447 * o to be hooked to the kn_hook knote
450 filt_machport_stash_port(struct knote
*kn
, ipc_port_t port
, int *link
)
452 struct turnstile
*ts
= TURNSTILE_NULL
;
454 if (kn
->kn_filter
== EVFILT_WORKLOOP
) {
455 assert(kn
->kn_mqueue
== NULL
);
456 kn
->kn_mqueue
= &port
->ip_messages
;
459 *link
= PORT_SYNC_LINK_WORKLOOP_KNOTE
;
461 ts
= filt_ipc_kqueue_turnstile(kn
);
462 } else if (!filt_machport_kqueue_has_turnstile(kn
)) {
464 *link
= PORT_SYNC_LINK_NO_LINKAGE
;
466 } else if (kn
->kn_ext
[3] == 0) {
468 kn
->kn_ext
[3] = (uintptr_t)port
;
469 ts
= filt_ipc_kqueue_turnstile(kn
);
471 *link
= PORT_SYNC_LINK_WORKLOOP_KNOTE
;
474 ts
= (struct turnstile
*)kn
->kn_hook
;
476 *link
= PORT_SYNC_LINK_WORKLOOP_STASH
;
484 * Lazily prepare a turnstile so that filt_machport_stash_port()
485 * can be called with the mqueue lock held.
487 * It will allocate a turnstile in kn_hook if:
488 * - the knote supports sync IPC override,
489 * - we already stashed a port in kn_ext[3],
490 * - the object that will be copied out has a chance to ask to be stashed.
492 * It is setup so that its inheritor is the workloop turnstile that has been
493 * allocated when this knote was attached.
496 filt_machport_turnstile_prepare_lazily(
498 mach_msg_type_name_t msgt_name
,
501 /* This is called from within filt_machportprocess */
502 assert((kn
->kn_status
& KN_SUPPRESSED
) && (kn
->kn_status
& KN_LOCKED
));
504 if (!filt_machport_kqueue_has_turnstile(kn
)) {
508 if (kn
->kn_ext
[3] == 0 || kn
->kn_hook
) {
512 struct turnstile
*ts
= filt_ipc_kqueue_turnstile(kn
);
513 if ((msgt_name
== MACH_MSG_TYPE_PORT_SEND_ONCE
&& port
->ip_specialreply
) ||
514 (msgt_name
== MACH_MSG_TYPE_PORT_RECEIVE
)) {
515 struct turnstile
*kn_ts
= turnstile_alloc();
516 kn_ts
= turnstile_prepare((uintptr_t)kn
,
517 (struct turnstile
**)&kn
->kn_hook
, kn_ts
, TURNSTILE_KNOTE
);
518 turnstile_update_inheritor(kn_ts
, ts
,
519 TURNSTILE_IMMEDIATE_UPDATE
| TURNSTILE_INHERITOR_TURNSTILE
);
525 filt_machport_turnstile_complete_port(struct knote
*kn
, ipc_port_t port
,
528 struct turnstile
*ts
= TURNSTILE_NULL
;
531 if (port
->ip_specialreply
) {
533 * If the reply has been sent to the special reply port already,
534 * then the special reply port may already be reused to do something
535 * entirely different.
537 * However, the only reason for it to still point to this knote is
538 * that it's still waiting for a reply, so when this is the case,
539 * neuter the linkage.
541 if (port
->ip_sync_link_state
== PORT_SYNC_LINK_WORKLOOP_KNOTE
&&
542 port
->ip_sync_inheritor_knote
== kn
) {
543 ipc_port_adjust_special_reply_port_locked(port
, NULL
,
544 (IPC_PORT_ADJUST_SR_NONE
| IPC_PORT_ADJUST_SR_ENABLE_EVENT
), FALSE
);
550 * For receive rights, if their IMQ_KNOTE() is still this
551 * knote, then sever the link.
554 if (port
->ip_sync_link_state
== PORT_SYNC_LINK_WORKLOOP_KNOTE
&&
555 mqueue
->imq_inheritor_knote
== kn
) {
556 ipc_port_adjust_sync_link_state_locked(port
, PORT_SYNC_LINK_ANY
, NULL
);
557 ts
= port_send_turnstile(port
);
560 turnstile_reference(ts
);
561 turnstile_update_inheritor(ts
, TURNSTILE_INHERITOR_NULL
,
562 TURNSTILE_IMMEDIATE_UPDATE
);
568 turnstile_update_inheritor_complete(ts
,
569 TURNSTILE_INTERLOCK_NOT_HELD
);
570 turnstile_deallocate(ts
);
578 filt_wldetach_sync_ipc(struct knote
*kn
)
580 ipc_mqueue_t mqueue
= kn
->kn_mqueue
;
581 filt_machport_turnstile_complete_port(kn
, ip_from_mq(mqueue
), mqueue
);
582 kn
->kn_mqueue
= NULL
;
586 * Other half of filt_machport_turnstile_prepare_lazily()
588 * This is serialized by the knote state machine.
591 filt_machport_turnstile_complete(struct knote
*kn
)
594 ipc_port_t port
= (ipc_port_t
)kn
->kn_ext
[3];
595 filt_machport_turnstile_complete_port(kn
, port
, &port
->ip_messages
);
600 struct turnstile
*ts
= kn
->kn_hook
;
602 turnstile_update_inheritor(ts
, TURNSTILE_INHERITOR_NULL
,
603 TURNSTILE_IMMEDIATE_UPDATE
);
604 turnstile_update_inheritor_complete(ts
, TURNSTILE_INTERLOCK_HELD
);
606 turnstile_complete((uintptr_t)kn
, (struct turnstile
**)&kn
->kn_hook
, &ts
, TURNSTILE_KNOTE
);
610 turnstile_deallocate(ts
);
615 filt_machport_link(ipc_mqueue_t mqueue
, struct knote
*kn
)
617 struct knote
*hd
= SLIST_FIRST(&mqueue
->imq_klist
);
619 if (hd
&& filt_machport_kqueue_has_turnstile(kn
)) {
620 SLIST_INSERT_AFTER(hd
, kn
, kn_selnext
);
622 SLIST_INSERT_HEAD(&mqueue
->imq_klist
, kn
, kn_selnext
);
627 filt_machport_unlink(ipc_mqueue_t mqueue
, struct knote
*kn
)
629 struct knote
**knprev
;
631 KNOTE_DETACH(&mqueue
->imq_klist
, kn
);
633 /* make sure the first knote is a knote we can push on */
634 SLIST_FOREACH_PREVPTR(kn
, knprev
, &mqueue
->imq_klist
, kn_selnext
) {
635 if (filt_machport_kqueue_has_turnstile(kn
)) {
636 *knprev
= SLIST_NEXT(kn
, kn_selnext
);
637 SLIST_INSERT_HEAD(&mqueue
->imq_klist
, kn
, kn_selnext
);
644 filt_wlattach_sync_ipc(struct knote
*kn
)
646 mach_port_name_t name
= (mach_port_name_t
)kn
->kn_id
;
647 ipc_space_t space
= current_space();
649 ipc_port_t port
= IP_NULL
;
652 if (ipc_right_lookup_read(space
, name
, &entry
) != KERN_SUCCESS
) {
656 /* space is read-locked */
658 if (entry
->ie_bits
& MACH_PORT_TYPE_RECEIVE
) {
659 port
= ip_object_to_port(entry
->ie_object
);
660 if (port
->ip_specialreply
) {
663 } else if (entry
->ie_bits
& MACH_PORT_TYPE_SEND_ONCE
) {
664 port
= ip_object_to_port(entry
->ie_object
);
665 if (!port
->ip_specialreply
) {
672 is_read_unlock(space
);
677 is_read_unlock(space
);
679 if (port
->ip_sync_link_state
== PORT_SYNC_LINK_ANY
) {
682 * We cannot start a sync IPC inheritance chain, only further one
683 * Note: this can also happen if the inheritance chain broke
684 * because the original requestor died.
689 if (port
->ip_specialreply
) {
690 ipc_port_adjust_special_reply_port_locked(port
, kn
,
691 IPC_PORT_ADJUST_SR_LINK_WORKLOOP
, FALSE
);
693 ipc_port_adjust_port_locked(port
, kn
, FALSE
);
696 /* make sure the port was stashed */
697 assert(kn
->kn_mqueue
== &port
->ip_messages
);
699 /* port has been unlocked by ipc_port_adjust_* */
707 __unused
struct kevent_qos_s
*kev
)
709 mach_port_name_t name
= (mach_port_name_t
)kn
->kn_id
;
710 uint64_t wq_link_id
= waitq_link_reserve(NULL
);
711 ipc_space_t space
= current_space();
713 struct turnstile
*send_turnstile
= TURNSTILE_NULL
;
721 kn
->kn_flags
&= ~EV_EOF
;
724 if (filt_machport_kqueue_has_turnstile(kn
)) {
726 * If the filter is likely to support sync IPC override,
727 * and it happens to be attaching to a workloop,
728 * make sure the workloop has an allocated turnstile.
730 kqueue_alloc_turnstile(knote_get_kq(kn
));
734 kr
= ipc_right_lookup_read(space
, name
, &entry
);
736 if (kr
!= KERN_SUCCESS
) {
741 /* space is read-locked and active */
743 if ((entry
->ie_bits
& MACH_PORT_TYPE_PORT_SET
) &&
744 knote_link_waitqset_should_lazy_alloc(kn
)) {
745 is_read_unlock(space
);
748 * We need to link the portset of the kn,
749 * to insure that the link is allocated before taking
752 * Because we have to drop the space lock so that
753 * knote_link_waitqset_lazy_alloc() can allocate memory,
754 * we will need to redo the lookup.
756 knote_link_waitqset_lazy_alloc(kn
);
760 if (entry
->ie_bits
& MACH_PORT_TYPE_PORT_SET
) {
763 pset
= ips_object_to_pset(entry
->ie_object
);
764 mqueue
= &pset
->ips_messages
;
768 kn
->kn_mqueue
= mqueue
;
771 * Bind the portset wait queue directly to knote/kqueue.
772 * This allows us to just use wait_queue foo to effect a wakeup,
773 * rather than having to call knote() from the Mach code on each
774 * message. We still attach the knote to the mqueue klist for
775 * NOTE_REVOKE purposes only.
777 error
= knote_link_waitq(kn
, &mqueue
->imq_wait_queue
, &wq_link_id
);
779 filt_machport_link(mqueue
, kn
);
782 kn
->kn_mqueue
= IMQ_NULL
;
787 is_read_unlock(space
);
790 * linked knotes are marked stay-active and therefore don't
791 * need an indication of their fired state to be returned
792 * from the attach operation.
794 } else if (entry
->ie_bits
& MACH_PORT_TYPE_RECEIVE
) {
795 ipc_port_t port
= ip_object_to_port(entry
->ie_object
);
797 if (port
->ip_specialreply
) {
799 * Registering for kevents on special reply ports
800 * isn't supported for two reasons:
802 * 1. it really makes very little sense for a port that
803 * is supposed to be used synchronously
805 * 2. their mqueue's imq_klist field will be used to
806 * store the receive turnstile, so we can't possibly
807 * attach them anyway.
809 is_read_unlock(space
);
814 mqueue
= &port
->ip_messages
;
818 * attach knote to port and determine result
819 * If the filter requested direct message receipt,
820 * we may need to adjust the qos of the knote to
821 * reflect the requested and override qos of the
822 * first message in the queue.
827 kn
->kn_mqueue
= mqueue
;
828 if (port
->ip_sync_link_state
!= PORT_SYNC_LINK_ANY
) {
830 * We're attaching a port that used to have an IMQ_KNOTE,
831 * clobber this state, we'll fixup its turnstile inheritor below.
833 ipc_port_adjust_sync_link_state_locked(port
, PORT_SYNC_LINK_ANY
, NULL
);
835 filt_machport_link(mqueue
, kn
);
837 if ((first
= ipc_kmsg_queue_first(&mqueue
->imq_messages
)) != IKM_NULL
) {
838 result
= FILTER_ACTIVE
| filt_machport_adjust_qos(kn
, first
);
842 * Update the port's turnstile inheritor
844 * Unlike filt_machportdetach(), we don't have to care about races for
845 * turnstile_workloop_pusher_info(): filt_machport_link() doesn't affect
846 * already pushing knotes, and if the current one becomes the new
847 * pusher, it'll only be visible when turnstile_workloop_pusher_info()
850 send_turnstile
= port_send_turnstile(port
);
851 if (send_turnstile
) {
852 turnstile_reference(send_turnstile
);
853 ipc_port_send_update_inheritor(port
, send_turnstile
,
854 TURNSTILE_IMMEDIATE_UPDATE
);
857 * rdar://problem/48861190
859 * When a listener connection resumes a peer,
860 * updating the inheritor above has moved the push
861 * from the current thread to the workloop.
863 * However, we haven't told the workloop yet
864 * that it needs a thread request, and we risk
865 * to be preeempted as soon as we drop the space
868 * To avoid this disable preemption and let kevent
869 * reenable it after it takes the kqlock.
871 disable_preemption();
872 result
|= FILTER_THREADREQ_NODEFEER
;
878 is_read_unlock(space
);
879 if (send_turnstile
) {
880 turnstile_update_inheritor_complete(send_turnstile
,
881 TURNSTILE_INTERLOCK_NOT_HELD
);
882 turnstile_deallocate_safe(send_turnstile
);
887 is_read_unlock(space
);
892 waitq_link_release(wq_link_id
);
894 /* bail out on errors */
896 knote_set_error(kn
, error
);
903 /* Validate imq_to_object implementation "works" */
904 _Static_assert(offsetof(struct ipc_pset
, ips_messages
) ==
905 offsetof(struct ipc_port
, ip_messages
),
906 "Make sure the mqueue aliases in both ports and psets");
912 ipc_mqueue_t mqueue
= kn
->kn_mqueue
;
913 ipc_object_t object
= imq_to_object(mqueue
);
914 struct turnstile
*send_turnstile
= TURNSTILE_NULL
;
916 filt_machport_turnstile_complete(kn
);
919 if ((kn
->kn_status
& KN_VANISHED
) || (kn
->kn_flags
& EV_EOF
)) {
921 * ipc_mqueue_changed() already unhooked this knote from the mqueue,
924 ipc_port_t port
= IP_NULL
;
927 * When the knote being detached is the first one in the list,
928 * then unlinking the knote *and* updating the turnstile inheritor
929 * need to happen atomically with respect to the callers of
930 * turnstile_workloop_pusher_info().
932 * The caller of turnstile_workloop_pusher_info() will use the kq req
933 * lock (and hence the kqlock), so we just need to hold the kqlock too.
935 if (io_otype(object
) == IOT_PORT
) {
936 port
= ip_object_to_port(object
);
937 assert(port
->ip_sync_link_state
== PORT_SYNC_LINK_ANY
);
938 if (kn
== SLIST_FIRST(&mqueue
->imq_klist
)) {
939 send_turnstile
= port_send_turnstile(port
);
943 filt_machport_unlink(mqueue
, kn
);
945 if (send_turnstile
) {
946 turnstile_reference(send_turnstile
);
947 ipc_port_send_update_inheritor(port
, send_turnstile
,
948 TURNSTILE_IMMEDIATE_UPDATE
);
952 /* Clear the knote pointer once the knote has been removed from turnstile */
953 kn
->kn_mqueue
= IMQ_NULL
;
956 if (send_turnstile
) {
957 turnstile_update_inheritor_complete(send_turnstile
,
958 TURNSTILE_INTERLOCK_NOT_HELD
);
959 turnstile_deallocate(send_turnstile
);
962 if (io_otype(object
) == IOT_PORT_SET
) {
964 * Unlink the portset wait queue from knote/kqueue.
965 * JMM - Does this need to be atomic under the mq lock?
967 (void)knote_unlink_waitq(kn
, &mqueue
->imq_wait_queue
);
973 * filt_machportevent - deliver events into the mach port filter
975 * Mach port message arrival events are currently only posted via the
976 * kqueue filter routine for ports. Port sets are marked stay-active
977 * and the wait queue code will break any kqueue waiters out to go
978 * poll the stay-queued knotes again.
980 * If there is a message at the head of the queue,
981 * we indicate that the knote should go active. If
982 * the message is to be direct-received, we adjust the
983 * QoS of the knote according the requested and override
984 * QoS of that first message.
987 filt_machportevent(struct knote
*kn
, long hint __assert_only
)
989 ipc_mqueue_t mqueue
= kn
->kn_mqueue
;
993 /* mqueue locked by caller */
994 assert(imq_held(mqueue
));
995 assert(hint
!= NOTE_REVOKE
);
996 if (imq_is_valid(mqueue
)) {
997 assert(!imq_is_set(mqueue
));
998 if ((first
= ipc_kmsg_queue_first(&mqueue
->imq_messages
)) != IKM_NULL
) {
999 result
= FILTER_ACTIVE
| filt_machport_adjust_qos(kn
, first
);
1009 struct kevent_qos_s
*kev
)
1011 ipc_mqueue_t mqueue
= kn
->kn_mqueue
;
1015 /* copy in new settings and save off new input fflags */
1016 kn
->kn_sfflags
= kev
->fflags
;
1017 kn
->kn_ext
[0] = kev
->ext
[0];
1018 kn
->kn_ext
[1] = kev
->ext
[1];
1020 if (kev
->flags
& EV_ENABLE
) {
1022 * If the knote is being enabled, make sure there's no lingering
1023 * IPC overrides from the previous message delivery.
1025 filt_machport_turnstile_complete(kn
);
1029 * If the mqueue is a valid port and there is a message
1030 * that will be direct-received from the knote, update
1031 * the knote qos based on the first message and trigger
1032 * the event. If there are no more messages, reset the
1033 * QoS to the value provided by the kevent.
1036 if (imq_is_valid(mqueue
) && !imq_is_set(mqueue
) &&
1037 (first
= ipc_kmsg_queue_first(&mqueue
->imq_messages
)) != IKM_NULL
) {
1038 result
= FILTER_ACTIVE
| filt_machport_adjust_qos(kn
, first
);
1039 } else if (kn
->kn_sfflags
& MACH_RCV_MSG
) {
1040 result
= FILTER_RESET_EVENT_QOS
;
1048 filt_machportprocess(struct knote
*kn
, struct kevent_qos_s
*kev
)
1050 ipc_mqueue_t mqueue
= kn
->kn_mqueue
;
1051 ipc_object_t object
= imq_to_object(mqueue
);
1052 thread_t self
= current_thread();
1053 kevent_ctx_t kectx
= NULL
;
1055 wait_result_t wresult
;
1056 mach_msg_option_t option
;
1057 mach_vm_address_t addr
;
1058 mach_msg_size_t size
;
1060 /* Capture current state */
1061 knote_fill_kevent(kn
, kev
, MACH_PORT_NULL
);
1062 kev
->ext
[3] = 0; /* hide our port reference from userspace */
1064 /* If already deallocated/moved return one last EOF event */
1065 if (kev
->flags
& EV_EOF
) {
1066 return FILTER_ACTIVE
| FILTER_RESET_EVENT_QOS
;
1070 * Only honor supported receive options. If no options are
1071 * provided, just force a MACH_RCV_TOO_LARGE to detect the
1072 * name of the port and sizeof the waiting message.
1074 option
= kn
->kn_sfflags
& (MACH_RCV_MSG
| MACH_RCV_LARGE
| MACH_RCV_LARGE_IDENTITY
|
1075 MACH_RCV_TRAILER_MASK
| MACH_RCV_VOUCHER
| MACH_MSG_STRICT_REPLY
);
1077 if (option
& MACH_RCV_MSG
) {
1078 addr
= (mach_vm_address_t
) kn
->kn_ext
[0];
1079 size
= (mach_msg_size_t
) kn
->kn_ext
[1];
1082 * If the kevent didn't specify a buffer and length, carve a buffer
1083 * from the filter processing data according to the flags.
1086 kectx
= kevent_get_context(self
);
1087 addr
= (mach_vm_address_t
)kectx
->kec_data_out
;
1088 size
= (mach_msg_size_t
)kectx
->kec_data_resid
;
1089 option
|= (MACH_RCV_LARGE
| MACH_RCV_LARGE_IDENTITY
);
1090 if (kectx
->kec_process_flags
& KEVENT_FLAG_STACK_DATA
) {
1091 option
|= MACH_RCV_STACK
;
1095 /* just detect the port name (if a set) and size of the first message */
1096 option
= MACH_RCV_LARGE
;
1103 /* just use the reference from here on out */
1104 io_reference(object
);
1107 * Set up to receive a message or the notification of a
1108 * too large message. But never allow this call to wait.
1109 * If the user provided aditional options, like trailer
1110 * options, pass those through here. But we don't support
1111 * scatter lists through this interface.
1113 self
->ith_object
= object
;
1114 self
->ith_msg_addr
= addr
;
1115 self
->ith_rsize
= size
;
1116 self
->ith_msize
= 0;
1117 self
->ith_option
= option
;
1118 self
->ith_receiver_name
= MACH_PORT_NULL
;
1119 self
->ith_continuation
= NULL
;
1120 option
|= MACH_RCV_TIMEOUT
; // never wait
1121 self
->ith_state
= MACH_RCV_IN_PROGRESS
;
1122 self
->ith_knote
= kn
;
1124 wresult
= ipc_mqueue_receive_on_thread(
1127 size
, /* max_size */
1128 0, /* immediate timeout */
1129 THREAD_INTERRUPTIBLE
,
1131 /* mqueue unlocked */
1134 * If we timed out, or the process is exiting, just release the
1135 * reference on the ipc_object and return zero.
1137 if (wresult
== THREAD_RESTART
|| self
->ith_state
== MACH_RCV_TIMED_OUT
) {
1138 assert(self
->turnstile
!= TURNSTILE_NULL
);
1143 assert(wresult
== THREAD_NOT_WAITING
);
1144 assert(self
->ith_state
!= MACH_RCV_IN_PROGRESS
);
1147 * If we weren't attempting to receive a message
1148 * directly, we need to return the port name in
1149 * the kevent structure.
1151 if ((option
& MACH_RCV_MSG
) != MACH_RCV_MSG
) {
1152 assert(self
->ith_state
== MACH_RCV_TOO_LARGE
);
1153 assert(self
->ith_kmsg
== IKM_NULL
);
1154 kev
->data
= self
->ith_receiver_name
;
1156 return FILTER_ACTIVE
;
1160 * Attempt to receive the message directly, returning
1161 * the results in the fflags field.
1163 kev
->fflags
= mach_msg_receive_results(&size
);
1165 /* kmsg and object reference consumed */
1168 * if the user asked for the identity of ports containing a
1169 * a too-large message, return it in the data field (as we
1170 * do for messages we didn't try to receive).
1172 if (kev
->fflags
== MACH_RCV_TOO_LARGE
) {
1173 kev
->ext
[1] = self
->ith_msize
;
1174 if (option
& MACH_RCV_LARGE_IDENTITY
) {
1175 kev
->data
= self
->ith_receiver_name
;
1177 kev
->data
= MACH_PORT_NULL
;
1181 kev
->data
= MACH_PORT_NULL
;
1185 * If we used a data buffer carved out from the filt_process data,
1186 * store the address used in the knote and adjust the residual and
1187 * other parameters for future use.
1190 assert(kectx
->kec_data_resid
>= size
);
1191 kectx
->kec_data_resid
-= size
;
1192 if ((kectx
->kec_process_flags
& KEVENT_FLAG_STACK_DATA
) == 0) {
1193 kev
->ext
[0] = kectx
->kec_data_out
;
1194 kectx
->kec_data_out
+= size
;
1196 assert(option
& MACH_RCV_STACK
);
1197 kev
->ext
[0] = kectx
->kec_data_out
+ kectx
->kec_data_resid
;
1202 * Apply message-based QoS values to output kevent as prescribed.
1203 * The kev->ext[2] field gets (msg-qos << 32) | (override-qos).
1205 * The mach_msg_receive_results() call saved off the message
1206 * QoS values in the continuation save area on successful receive.
1208 if (kev
->fflags
== MACH_MSG_SUCCESS
) {
1209 kev
->ext
[2] = ((uint64_t)self
->ith_qos
<< 32) |
1210 (uint64_t)self
->ith_qos_override
;
1213 return FILTER_ACTIVE
;
1217 * Peek to see if the message queue associated with the knote has any
1218 * events. This pre-hook is called when a filter uses the stay-
1219 * on-queue mechanism (as the knote_link_waitq mechanism does for
1220 * portsets) and someone calls select() against the containing kqueue.
1222 * Just peek at the pre-post status of the portset's wait queue
1223 * to determine if it has anything interesting. We can do it
1224 * without holding the lock, as it is just a snapshot in time
1225 * (if this is used as part of really waiting for events, we
1226 * will catch changes in this status when the event gets posted
1227 * up to the knote's kqueue).
1230 filt_machportpeek(struct knote
*kn
)
1232 ipc_mqueue_t mqueue
= kn
->kn_mqueue
;
1234 return ipc_mqueue_set_peek(mqueue
) ? FILTER_ACTIVE
: 0;
1237 SECURITY_READ_ONLY_EARLY(struct filterops
) machport_filtops
= {
1238 .f_adjusts_qos
= true,
1239 .f_extended_codes
= true,
1240 .f_attach
= filt_machportattach
,
1241 .f_detach
= filt_machportdetach
,
1242 .f_event
= filt_machportevent
,
1243 .f_touch
= filt_machporttouch
,
1244 .f_process
= filt_machportprocess
,
1245 .f_peek
= filt_machportpeek
,