2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * File: ipc/ipc_pset.c
63 * Functions to manipulate IPC port sets.
66 #include <mach/port.h>
67 #include <mach/kern_return.h>
68 #include <mach/message.h>
69 #include <ipc/ipc_mqueue.h>
70 #include <ipc/ipc_object.h>
71 #include <ipc/ipc_pset.h>
72 #include <ipc/ipc_right.h>
73 #include <ipc/ipc_space.h>
74 #include <ipc/ipc_port.h>
76 #include <kern/kern_types.h>
78 #include <vm/vm_map.h>
79 #include <libkern/section_keywords.h>
80 #include <pthread/priority_private.h>
83 * Routine: ipc_pset_alloc
85 * Allocate a port set.
87 * Nothing locked. If successful, the port set is returned
88 * locked. (The caller doesn't have a reference.)
90 * KERN_SUCCESS The port set is allocated.
91 * KERN_INVALID_TASK The space is dead.
92 * KERN_NO_SPACE No room for an entry in the space.
93 * KERN_RESOURCE_SHORTAGE Couldn't allocate memory.
99 mach_port_name_t
*namep
,
103 mach_port_name_t name
;
106 kr
= ipc_object_alloc(space
, IOT_PORT_SET
,
107 MACH_PORT_TYPE_PORT_SET
, 0,
108 &name
, (ipc_object_t
*) &pset
);
109 if (kr
!= KERN_SUCCESS
) {
112 /* pset and space are locked */
114 ipc_mqueue_init(&pset
->ips_messages
, IPC_MQUEUE_KIND_SET
);
115 is_write_unlock(space
);
123 * Routine: ipc_pset_alloc_name
125 * Allocate a port set, with a specific name.
127 * Nothing locked. If successful, the port set is returned
128 * locked. (The caller doesn't have a reference.)
130 * KERN_SUCCESS The port set is allocated.
131 * KERN_INVALID_TASK The space is dead.
132 * KERN_NAME_EXISTS The name already denotes a right.
133 * KERN_RESOURCE_SHORTAGE Couldn't allocate memory.
139 mach_port_name_t name
,
145 kr
= ipc_object_alloc_name(space
, IOT_PORT_SET
,
146 MACH_PORT_TYPE_PORT_SET
, 0,
147 name
, (ipc_object_t
*) &pset
);
148 if (kr
!= KERN_SUCCESS
) {
153 ipc_mqueue_init(&pset
->ips_messages
, IPC_MQUEUE_KIND_SET
);
161 * Routine: ipc_pset_alloc_special
163 * Allocate a port set in a special space.
164 * The new port set is returned with one ref.
165 * If unsuccessful, IPS_NULL is returned.
170 ipc_pset_alloc_special(
171 __assert_only ipc_space_t space
)
175 assert(space
!= IS_NULL
);
176 assert(space
->is_table
== IE_NULL
);
177 assert(!is_active(space
));
179 pset
= ips_object_to_pset(io_alloc(IOT_PORT_SET
, Z_WAITOK
| Z_ZERO
));
180 if (pset
== IPS_NULL
) {
184 io_lock_init(ips_to_object(pset
));
185 pset
->ips_references
= 1;
186 pset
->ips_object
.io_bits
= io_makebits(TRUE
, IOT_PORT_SET
, 0);
188 ipc_mqueue_init(&pset
->ips_messages
, IPC_MQUEUE_KIND_SET
);
195 * Routine: ipc_pset_member
197 * Checks to see if a port is a member of a pset
199 * Both port and port set are locked.
200 * The port must be active.
207 require_ip_active(port
);
209 return ipc_mqueue_member(&port
->ip_messages
, &pset
->ips_messages
);
214 * Routine: ipc_pset_add
216 * Puts a port into a port set.
218 * Both port and port set are locked and active.
219 * The owner of the port set is also receiver for the port.
226 uint64_t *reserved_link
,
227 uint64_t *reserved_prepost
)
231 assert(ips_active(pset
));
232 require_ip_active(port
);
234 kr
= ipc_mqueue_add(&port
->ip_messages
, &pset
->ips_messages
,
235 reserved_link
, reserved_prepost
);
243 * Routine: ipc_pset_remove
245 * Removes a port from a port set.
246 * The port set loses a reference.
248 * Both port and port set are locked.
249 * The port must be active.
258 require_ip_active(port
);
260 if (port
->ip_in_pset
== 0) {
261 return KERN_NOT_IN_SET
;
264 kr
= ipc_mqueue_remove(&port
->ip_messages
, &pset
->ips_messages
);
270 * Routine: ipc_pset_lazy_allocate
272 * lazily initialize the wqset of a port set.
278 ipc_pset_lazy_allocate(
280 mach_port_name_t psname
)
287 kr
= ipc_right_lookup_read(space
, psname
, &entry
);
288 if (kr
!= KERN_SUCCESS
) {
292 /* space is read-locked and active */
293 if ((entry
->ie_bits
& MACH_PORT_TYPE_PORT_SET
) == 0) {
294 is_read_unlock(space
);
295 kr
= KERN_INVALID_RIGHT
;
299 psobj
= entry
->ie_object
;
300 pset
= ips_object_to_pset(psobj
);
301 assert(pset
!= NULL
);
302 ipc_mqueue_t set_mqueue
= &pset
->ips_messages
;
303 struct waitq_set
*wqset
= &set_mqueue
->imq_set_queue
;
306 is_read_unlock(space
);
309 * lazily initialize the wqset to avoid
310 * possible allocation while linking
313 waitq_set_lazy_init_link(wqset
);
320 * Routine: ipc_pset_remove_from_all
322 * Removes a port from all it's port sets.
324 * port is locked and active.
328 ipc_pset_remove_from_all(
331 if (port
->ip_in_pset
== 0) {
332 return KERN_NOT_IN_SET
;
336 * Remove the port's mqueue from all sets
338 ipc_mqueue_remove_from_all(&port
->ip_messages
);
344 * Routine: ipc_pset_destroy
346 * Destroys a port_set.
348 * The port_set is locked and alive.
349 * The caller has a reference, which is consumed.
350 * Afterwards, the port_set is unlocked and dead.
358 assert(ips_active(pset
));
360 pset
->ips_object
.io_bits
&= ~IO_BITS_ACTIVE
;
363 * remove all the member message queues
364 * AND remove this message queue from any containing sets
366 ipc_mqueue_remove_all(&pset
->ips_messages
);
369 * Set all waiters on the portset running to
370 * discover the change.
372 imq_lock(&pset
->ips_messages
);
373 ipc_mqueue_changed(space
, &pset
->ips_messages
);
374 imq_unlock(&pset
->ips_messages
);
376 ipc_mqueue_deinit(&pset
->ips_messages
);
379 ips_release(pset
); /* consume the ref our caller gave us */
383 * Kqueue EVFILT_MACHPORT support
385 * - kn_mqueue points to the monitored mqueue
387 * - (in/out) ext[0] holds a mach_vm_address_t to a userspace buffer
388 * that can be used to direct-deliver messages when
389 * MACH_RCV_MSG is set in kn_sfflags
391 * - (in/out) ext[1] holds a mach_msg_size_t representing the size
392 * of the userspace buffer held in ext[0].
394 * - (out) ext[2] is used to deliver qos information
395 * about the send queue to userspace.
397 * - (abused) ext[3] is used in kernel to hold a reference to the first port
398 * with a turnstile that participate to sync IPC override.
400 * - kn_hook is optionally a "knote" turnstile. It is used as the inheritor
401 * of turnstiles for rights copied out as part of direct message delivery
402 * when they can participate to sync IPC override.
404 * It is used to atomically neuter the sync IPC override when the knote is
409 #include <sys/event.h>
410 #include <sys/errno.h>
413 filt_machport_adjust_qos(struct knote
*kn
, ipc_kmsg_t first
)
415 if (kn
->kn_sfflags
& MACH_RCV_MSG
) {
416 return FILTER_ADJUST_EVENT_QOS(first
->ikm_qos_override
);
422 filt_ipc_kqueue_turnstile(struct knote
*kn
)
424 assert(kn
->kn_filter
== EVFILT_MACHPORT
|| kn
->kn_filter
== EVFILT_WORKLOOP
);
425 return kqueue_turnstile(knote_get_kq(kn
));
429 filt_machport_kqueue_has_turnstile(struct knote
*kn
)
431 assert(kn
->kn_filter
== EVFILT_MACHPORT
);
432 return ((kn
->kn_sfflags
& MACH_RCV_MSG
) || (kn
->kn_sfflags
& MACH_RCV_SYNC_PEEK
))
433 && (kn
->kn_flags
& EV_DISPATCH
);
437 * Stashes a port that participate to sync IPC override until the knote
438 * is being re-enabled.
441 * - the turnstile to use as an inheritor for the stashed port
442 * - the kind of stash that happened as PORT_SYNC_* value among:
443 * o not stashed (no sync IPC support)
444 * o stashed in the knote (in kn_ext[3])
445 * o to be hooked to the kn_hook knote
448 filt_machport_stash_port(struct knote
*kn
, ipc_port_t port
, int *link
)
450 struct turnstile
*ts
= TURNSTILE_NULL
;
452 if (kn
->kn_filter
== EVFILT_WORKLOOP
) {
453 assert(kn
->kn_mqueue
== NULL
);
454 kn
->kn_mqueue
= &port
->ip_messages
;
457 *link
= PORT_SYNC_LINK_WORKLOOP_KNOTE
;
459 ts
= filt_ipc_kqueue_turnstile(kn
);
460 } else if (!filt_machport_kqueue_has_turnstile(kn
)) {
462 *link
= PORT_SYNC_LINK_NO_LINKAGE
;
464 } else if (kn
->kn_ext
[3] == 0) {
466 kn
->kn_ext
[3] = (uintptr_t)port
;
467 ts
= filt_ipc_kqueue_turnstile(kn
);
469 *link
= PORT_SYNC_LINK_WORKLOOP_KNOTE
;
472 ts
= (struct turnstile
*)kn
->kn_hook
;
474 *link
= PORT_SYNC_LINK_WORKLOOP_STASH
;
482 * Lazily prepare a turnstile so that filt_machport_stash_port()
483 * can be called with the mqueue lock held.
485 * It will allocate a turnstile in kn_hook if:
486 * - the knote supports sync IPC override,
487 * - we already stashed a port in kn_ext[3],
488 * - the object that will be copied out has a chance to ask to be stashed.
490 * It is setup so that its inheritor is the workloop turnstile that has been
491 * allocated when this knote was attached.
494 filt_machport_turnstile_prepare_lazily(
496 mach_msg_type_name_t msgt_name
,
499 /* This is called from within filt_machportprocess */
500 assert((kn
->kn_status
& KN_SUPPRESSED
) && (kn
->kn_status
& KN_LOCKED
));
502 if (!filt_machport_kqueue_has_turnstile(kn
)) {
506 if (kn
->kn_ext
[3] == 0 || kn
->kn_hook
) {
510 struct turnstile
*ts
= filt_ipc_kqueue_turnstile(kn
);
511 if ((msgt_name
== MACH_MSG_TYPE_PORT_SEND_ONCE
&& port
->ip_specialreply
) ||
512 (msgt_name
== MACH_MSG_TYPE_PORT_RECEIVE
)) {
513 struct turnstile
*kn_ts
= turnstile_alloc();
514 kn_ts
= turnstile_prepare((uintptr_t)kn
,
515 (struct turnstile
**)&kn
->kn_hook
, kn_ts
, TURNSTILE_KNOTE
);
516 turnstile_update_inheritor(kn_ts
, ts
,
517 TURNSTILE_IMMEDIATE_UPDATE
| TURNSTILE_INHERITOR_TURNSTILE
);
523 filt_machport_turnstile_complete_port(struct knote
*kn
, ipc_port_t port
,
526 struct turnstile
*ts
= TURNSTILE_NULL
;
529 if (port
->ip_specialreply
) {
531 * If the reply has been sent to the special reply port already,
532 * then the special reply port may already be reused to do something
533 * entirely different.
535 * However, the only reason for it to still point to this knote is
536 * that it's still waiting for a reply, so when this is the case,
537 * neuter the linkage.
539 if (port
->ip_sync_link_state
== PORT_SYNC_LINK_WORKLOOP_KNOTE
&&
540 port
->ip_sync_inheritor_knote
== kn
) {
541 ipc_port_adjust_special_reply_port_locked(port
, NULL
,
542 (IPC_PORT_ADJUST_SR_NONE
| IPC_PORT_ADJUST_SR_ENABLE_EVENT
), FALSE
);
548 * For receive rights, if their IMQ_KNOTE() is still this
549 * knote, then sever the link.
552 if (port
->ip_sync_link_state
== PORT_SYNC_LINK_WORKLOOP_KNOTE
&&
553 mqueue
->imq_inheritor_knote
== kn
) {
554 ipc_port_adjust_sync_link_state_locked(port
, PORT_SYNC_LINK_ANY
, NULL
);
555 ts
= port_send_turnstile(port
);
558 turnstile_reference(ts
);
559 turnstile_update_inheritor(ts
, TURNSTILE_INHERITOR_NULL
,
560 TURNSTILE_IMMEDIATE_UPDATE
);
566 turnstile_update_inheritor_complete(ts
,
567 TURNSTILE_INTERLOCK_NOT_HELD
);
568 turnstile_deallocate(ts
);
576 filt_wldetach_sync_ipc(struct knote
*kn
)
578 ipc_mqueue_t mqueue
= kn
->kn_mqueue
;
579 filt_machport_turnstile_complete_port(kn
, ip_from_mq(mqueue
), mqueue
);
580 kn
->kn_mqueue
= NULL
;
584 * Other half of filt_machport_turnstile_prepare_lazily()
586 * This is serialized by the knote state machine.
589 filt_machport_turnstile_complete(struct knote
*kn
)
592 ipc_port_t port
= (ipc_port_t
)kn
->kn_ext
[3];
593 filt_machport_turnstile_complete_port(kn
, port
, &port
->ip_messages
);
598 struct turnstile
*ts
= kn
->kn_hook
;
600 turnstile_update_inheritor(ts
, TURNSTILE_INHERITOR_NULL
,
601 TURNSTILE_IMMEDIATE_UPDATE
);
602 turnstile_update_inheritor_complete(ts
, TURNSTILE_INTERLOCK_HELD
);
604 turnstile_complete((uintptr_t)kn
, (struct turnstile
**)&kn
->kn_hook
, &ts
, TURNSTILE_KNOTE
);
608 turnstile_deallocate(ts
);
613 filt_machport_link(ipc_mqueue_t mqueue
, struct knote
*kn
)
615 struct knote
*hd
= SLIST_FIRST(&mqueue
->imq_klist
);
617 if (hd
&& filt_machport_kqueue_has_turnstile(kn
)) {
618 SLIST_INSERT_AFTER(hd
, kn
, kn_selnext
);
620 SLIST_INSERT_HEAD(&mqueue
->imq_klist
, kn
, kn_selnext
);
625 filt_machport_unlink(ipc_mqueue_t mqueue
, struct knote
*kn
)
627 struct knote
**knprev
;
629 KNOTE_DETACH(&mqueue
->imq_klist
, kn
);
631 /* make sure the first knote is a knote we can push on */
632 SLIST_FOREACH_PREVPTR(kn
, knprev
, &mqueue
->imq_klist
, kn_selnext
) {
633 if (filt_machport_kqueue_has_turnstile(kn
)) {
634 *knprev
= SLIST_NEXT(kn
, kn_selnext
);
635 SLIST_INSERT_HEAD(&mqueue
->imq_klist
, kn
, kn_selnext
);
642 filt_wlattach_sync_ipc(struct knote
*kn
)
644 mach_port_name_t name
= (mach_port_name_t
)kn
->kn_id
;
645 ipc_space_t space
= current_space();
647 ipc_port_t port
= IP_NULL
;
650 if (ipc_right_lookup_read(space
, name
, &entry
) != KERN_SUCCESS
) {
654 /* space is read-locked */
656 if (entry
->ie_bits
& MACH_PORT_TYPE_RECEIVE
) {
657 port
= ip_object_to_port(entry
->ie_object
);
658 if (port
->ip_specialreply
) {
661 } else if (entry
->ie_bits
& MACH_PORT_TYPE_SEND_ONCE
) {
662 port
= ip_object_to_port(entry
->ie_object
);
663 if (!port
->ip_specialreply
) {
670 is_read_unlock(space
);
675 is_read_unlock(space
);
677 if (port
->ip_sync_link_state
== PORT_SYNC_LINK_ANY
) {
680 * We cannot start a sync IPC inheritance chain, only further one
681 * Note: this can also happen if the inheritance chain broke
682 * because the original requestor died.
687 if (port
->ip_specialreply
) {
688 ipc_port_adjust_special_reply_port_locked(port
, kn
,
689 IPC_PORT_ADJUST_SR_LINK_WORKLOOP
, FALSE
);
691 ipc_port_adjust_port_locked(port
, kn
, FALSE
);
694 /* make sure the port was stashed */
695 assert(kn
->kn_mqueue
== &port
->ip_messages
);
697 /* port has been unlocked by ipc_port_adjust_* */
705 __unused
struct kevent_qos_s
*kev
)
707 mach_port_name_t name
= (mach_port_name_t
)kn
->kn_id
;
708 uint64_t wq_link_id
= waitq_link_reserve(NULL
);
709 ipc_space_t space
= current_space();
711 struct turnstile
*send_turnstile
= TURNSTILE_NULL
;
719 kn
->kn_flags
&= ~EV_EOF
;
722 if (filt_machport_kqueue_has_turnstile(kn
)) {
724 * If the filter is likely to support sync IPC override,
725 * and it happens to be attaching to a workloop,
726 * make sure the workloop has an allocated turnstile.
728 kqueue_alloc_turnstile(knote_get_kq(kn
));
732 kr
= ipc_right_lookup_read(space
, name
, &entry
);
734 if (kr
!= KERN_SUCCESS
) {
739 /* space is read-locked and active */
741 if ((entry
->ie_bits
& MACH_PORT_TYPE_PORT_SET
) &&
742 knote_link_waitqset_should_lazy_alloc(kn
)) {
743 is_read_unlock(space
);
746 * We need to link the portset of the kn,
747 * to insure that the link is allocated before taking
750 * Because we have to drop the space lock so that
751 * knote_link_waitqset_lazy_alloc() can allocate memory,
752 * we will need to redo the lookup.
754 knote_link_waitqset_lazy_alloc(kn
);
758 if (entry
->ie_bits
& MACH_PORT_TYPE_PORT_SET
) {
761 pset
= ips_object_to_pset(entry
->ie_object
);
762 mqueue
= &pset
->ips_messages
;
766 kn
->kn_mqueue
= mqueue
;
769 * Bind the portset wait queue directly to knote/kqueue.
770 * This allows us to just use wait_queue foo to effect a wakeup,
771 * rather than having to call knote() from the Mach code on each
772 * message. We still attach the knote to the mqueue klist for
773 * NOTE_REVOKE purposes only.
775 error
= knote_link_waitq(kn
, &mqueue
->imq_wait_queue
, &wq_link_id
);
777 filt_machport_link(mqueue
, kn
);
780 kn
->kn_mqueue
= IMQ_NULL
;
785 is_read_unlock(space
);
788 * linked knotes are marked stay-active and therefore don't
789 * need an indication of their fired state to be returned
790 * from the attach operation.
792 } else if (entry
->ie_bits
& MACH_PORT_TYPE_RECEIVE
) {
793 ipc_port_t port
= ip_object_to_port(entry
->ie_object
);
795 if (port
->ip_specialreply
) {
797 * Registering for kevents on special reply ports
798 * isn't supported for two reasons:
800 * 1. it really makes very little sense for a port that
801 * is supposed to be used synchronously
803 * 2. their mqueue's imq_klist field will be used to
804 * store the receive turnstile, so we can't possibly
805 * attach them anyway.
807 is_read_unlock(space
);
812 mqueue
= &port
->ip_messages
;
816 * attach knote to port and determine result
817 * If the filter requested direct message receipt,
818 * we may need to adjust the qos of the knote to
819 * reflect the requested and override qos of the
820 * first message in the queue.
825 kn
->kn_mqueue
= mqueue
;
826 if (port
->ip_sync_link_state
!= PORT_SYNC_LINK_ANY
) {
828 * We're attaching a port that used to have an IMQ_KNOTE,
829 * clobber this state, we'll fixup its turnstile inheritor below.
831 ipc_port_adjust_sync_link_state_locked(port
, PORT_SYNC_LINK_ANY
, NULL
);
833 filt_machport_link(mqueue
, kn
);
835 if ((first
= ipc_kmsg_queue_first(&mqueue
->imq_messages
)) != IKM_NULL
) {
836 result
= FILTER_ACTIVE
| filt_machport_adjust_qos(kn
, first
);
840 * Update the port's turnstile inheritor
842 * Unlike filt_machportdetach(), we don't have to care about races for
843 * turnstile_workloop_pusher_info(): filt_machport_link() doesn't affect
844 * already pushing knotes, and if the current one becomes the new
845 * pusher, it'll only be visible when turnstile_workloop_pusher_info()
848 send_turnstile
= port_send_turnstile(port
);
849 if (send_turnstile
) {
850 turnstile_reference(send_turnstile
);
851 ipc_port_send_update_inheritor(port
, send_turnstile
,
852 TURNSTILE_IMMEDIATE_UPDATE
);
855 * rdar://problem/48861190
857 * When a listener connection resumes a peer,
858 * updating the inheritor above has moved the push
859 * from the current thread to the workloop.
861 * However, we haven't told the workloop yet
862 * that it needs a thread request, and we risk
863 * to be preeempted as soon as we drop the space
866 * To avoid this disable preemption and let kevent
867 * reenable it after it takes the kqlock.
869 disable_preemption();
870 result
|= FILTER_THREADREQ_NODEFEER
;
876 is_read_unlock(space
);
877 if (send_turnstile
) {
878 turnstile_update_inheritor_complete(send_turnstile
,
879 TURNSTILE_INTERLOCK_NOT_HELD
);
880 turnstile_deallocate_safe(send_turnstile
);
885 is_read_unlock(space
);
890 waitq_link_release(wq_link_id
);
892 /* bail out on errors */
894 knote_set_error(kn
, error
);
901 /* Validate imq_to_object implementation "works" */
902 _Static_assert(offsetof(struct ipc_pset
, ips_messages
) ==
903 offsetof(struct ipc_port
, ip_messages
),
904 "Make sure the mqueue aliases in both ports and psets");
910 ipc_mqueue_t mqueue
= kn
->kn_mqueue
;
911 ipc_object_t object
= imq_to_object(mqueue
);
912 struct turnstile
*send_turnstile
= TURNSTILE_NULL
;
914 filt_machport_turnstile_complete(kn
);
917 if ((kn
->kn_status
& KN_VANISHED
) || (kn
->kn_flags
& EV_EOF
)) {
919 * ipc_mqueue_changed() already unhooked this knote from the mqueue,
922 ipc_port_t port
= IP_NULL
;
925 * When the knote being detached is the first one in the list,
926 * then unlinking the knote *and* updating the turnstile inheritor
927 * need to happen atomically with respect to the callers of
928 * turnstile_workloop_pusher_info().
930 * The caller of turnstile_workloop_pusher_info() will use the kq req
931 * lock (and hence the kqlock), so we just need to hold the kqlock too.
933 if (io_otype(object
) == IOT_PORT
) {
934 port
= ip_object_to_port(object
);
935 assert(port
->ip_sync_link_state
== PORT_SYNC_LINK_ANY
);
936 if (kn
== SLIST_FIRST(&mqueue
->imq_klist
)) {
937 send_turnstile
= port_send_turnstile(port
);
941 filt_machport_unlink(mqueue
, kn
);
943 if (send_turnstile
) {
944 turnstile_reference(send_turnstile
);
945 ipc_port_send_update_inheritor(port
, send_turnstile
,
946 TURNSTILE_IMMEDIATE_UPDATE
);
950 /* Clear the knote pointer once the knote has been removed from turnstile */
951 kn
->kn_mqueue
= IMQ_NULL
;
954 if (send_turnstile
) {
955 turnstile_update_inheritor_complete(send_turnstile
,
956 TURNSTILE_INTERLOCK_NOT_HELD
);
957 turnstile_deallocate(send_turnstile
);
960 if (io_otype(object
) == IOT_PORT_SET
) {
962 * Unlink the portset wait queue from knote/kqueue.
963 * JMM - Does this need to be atomic under the mq lock?
965 (void)knote_unlink_waitq(kn
, &mqueue
->imq_wait_queue
);
971 * filt_machportevent - deliver events into the mach port filter
973 * Mach port message arrival events are currently only posted via the
974 * kqueue filter routine for ports. Port sets are marked stay-active
975 * and the wait queue code will break any kqueue waiters out to go
976 * poll the stay-queued knotes again.
978 * If there is a message at the head of the queue,
979 * we indicate that the knote should go active. If
980 * the message is to be direct-received, we adjust the
981 * QoS of the knote according the requested and override
982 * QoS of that first message.
985 filt_machportevent(struct knote
*kn
, long hint __assert_only
)
987 ipc_mqueue_t mqueue
= kn
->kn_mqueue
;
991 /* mqueue locked by caller */
993 assert(hint
!= NOTE_REVOKE
);
994 if (imq_is_valid(mqueue
)) {
995 assert(!imq_is_set(mqueue
));
996 if ((first
= ipc_kmsg_queue_first(&mqueue
->imq_messages
)) != IKM_NULL
) {
997 result
= FILTER_ACTIVE
| filt_machport_adjust_qos(kn
, first
);
1007 struct kevent_qos_s
*kev
)
1009 ipc_mqueue_t mqueue
= kn
->kn_mqueue
;
1013 /* copy in new settings and save off new input fflags */
1014 kn
->kn_sfflags
= kev
->fflags
;
1015 kn
->kn_ext
[0] = kev
->ext
[0];
1016 kn
->kn_ext
[1] = kev
->ext
[1];
1018 if (kev
->flags
& EV_ENABLE
) {
1020 * If the knote is being enabled, make sure there's no lingering
1021 * IPC overrides from the previous message delivery.
1023 filt_machport_turnstile_complete(kn
);
1027 * If the mqueue is a valid port and there is a message
1028 * that will be direct-received from the knote, update
1029 * the knote qos based on the first message and trigger
1030 * the event. If there are no more messages, reset the
1031 * QoS to the value provided by the kevent.
1034 if (imq_is_valid(mqueue
) && !imq_is_set(mqueue
) &&
1035 (first
= ipc_kmsg_queue_first(&mqueue
->imq_messages
)) != IKM_NULL
) {
1036 result
= FILTER_ACTIVE
| filt_machport_adjust_qos(kn
, first
);
1037 } else if (kn
->kn_sfflags
& MACH_RCV_MSG
) {
1038 result
= FILTER_RESET_EVENT_QOS
;
1046 filt_machportprocess(struct knote
*kn
, struct kevent_qos_s
*kev
)
1048 ipc_mqueue_t mqueue
= kn
->kn_mqueue
;
1049 ipc_object_t object
= imq_to_object(mqueue
);
1050 thread_t self
= current_thread();
1051 kevent_ctx_t kectx
= NULL
;
1053 wait_result_t wresult
;
1054 mach_msg_option_t option
;
1055 mach_vm_address_t addr
;
1056 mach_msg_size_t size
;
1058 /* Capture current state */
1059 knote_fill_kevent(kn
, kev
, MACH_PORT_NULL
);
1060 kev
->ext
[3] = 0; /* hide our port reference from userspace */
1062 /* If already deallocated/moved return one last EOF event */
1063 if (kev
->flags
& EV_EOF
) {
1064 return FILTER_ACTIVE
| FILTER_RESET_EVENT_QOS
;
1068 * Only honor supported receive options. If no options are
1069 * provided, just force a MACH_RCV_TOO_LARGE to detect the
1070 * name of the port and sizeof the waiting message.
1072 option
= kn
->kn_sfflags
& (MACH_RCV_MSG
| MACH_RCV_LARGE
| MACH_RCV_LARGE_IDENTITY
|
1073 MACH_RCV_TRAILER_MASK
| MACH_RCV_VOUCHER
| MACH_MSG_STRICT_REPLY
);
1075 if (option
& MACH_RCV_MSG
) {
1076 addr
= (mach_vm_address_t
) kn
->kn_ext
[0];
1077 size
= (mach_msg_size_t
) kn
->kn_ext
[1];
1080 * If the kevent didn't specify a buffer and length, carve a buffer
1081 * from the filter processing data according to the flags.
1084 kectx
= kevent_get_context(self
);
1085 addr
= (mach_vm_address_t
)kectx
->kec_data_out
;
1086 size
= (mach_msg_size_t
)kectx
->kec_data_resid
;
1087 option
|= (MACH_RCV_LARGE
| MACH_RCV_LARGE_IDENTITY
);
1088 if (kectx
->kec_process_flags
& KEVENT_FLAG_STACK_DATA
) {
1089 option
|= MACH_RCV_STACK
;
1093 /* just detect the port name (if a set) and size of the first message */
1094 option
= MACH_RCV_LARGE
;
1101 /* just use the reference from here on out */
1102 io_reference(object
);
1105 * Set up to receive a message or the notification of a
1106 * too large message. But never allow this call to wait.
1107 * If the user provided aditional options, like trailer
1108 * options, pass those through here. But we don't support
1109 * scatter lists through this interface.
1111 self
->ith_object
= object
;
1112 self
->ith_msg_addr
= addr
;
1113 self
->ith_rsize
= size
;
1114 self
->ith_msize
= 0;
1115 self
->ith_option
= option
;
1116 self
->ith_receiver_name
= MACH_PORT_NULL
;
1117 self
->ith_continuation
= NULL
;
1118 option
|= MACH_RCV_TIMEOUT
; // never wait
1119 self
->ith_state
= MACH_RCV_IN_PROGRESS
;
1120 self
->ith_knote
= kn
;
1122 wresult
= ipc_mqueue_receive_on_thread(
1125 size
, /* max_size */
1126 0, /* immediate timeout */
1127 THREAD_INTERRUPTIBLE
,
1129 /* mqueue unlocked */
1132 * If we timed out, or the process is exiting, just release the
1133 * reference on the ipc_object and return zero.
1135 if (wresult
== THREAD_RESTART
|| self
->ith_state
== MACH_RCV_TIMED_OUT
) {
1136 assert(self
->turnstile
!= TURNSTILE_NULL
);
1141 assert(wresult
== THREAD_NOT_WAITING
);
1142 assert(self
->ith_state
!= MACH_RCV_IN_PROGRESS
);
1145 * If we weren't attempting to receive a message
1146 * directly, we need to return the port name in
1147 * the kevent structure.
1149 if ((option
& MACH_RCV_MSG
) != MACH_RCV_MSG
) {
1150 assert(self
->ith_state
== MACH_RCV_TOO_LARGE
);
1151 assert(self
->ith_kmsg
== IKM_NULL
);
1152 kev
->data
= self
->ith_receiver_name
;
1154 return FILTER_ACTIVE
;
1158 * Attempt to receive the message directly, returning
1159 * the results in the fflags field.
1161 kev
->fflags
= mach_msg_receive_results(&size
);
1163 /* kmsg and object reference consumed */
1166 * if the user asked for the identity of ports containing a
1167 * a too-large message, return it in the data field (as we
1168 * do for messages we didn't try to receive).
1170 if (kev
->fflags
== MACH_RCV_TOO_LARGE
) {
1171 kev
->ext
[1] = self
->ith_msize
;
1172 if (option
& MACH_RCV_LARGE_IDENTITY
) {
1173 kev
->data
= self
->ith_receiver_name
;
1175 kev
->data
= MACH_PORT_NULL
;
1179 kev
->data
= MACH_PORT_NULL
;
1183 * If we used a data buffer carved out from the filt_process data,
1184 * store the address used in the knote and adjust the residual and
1185 * other parameters for future use.
1188 assert(kectx
->kec_data_resid
>= size
);
1189 kectx
->kec_data_resid
-= size
;
1190 if ((kectx
->kec_process_flags
& KEVENT_FLAG_STACK_DATA
) == 0) {
1191 kev
->ext
[0] = kectx
->kec_data_out
;
1192 kectx
->kec_data_out
+= size
;
1194 assert(option
& MACH_RCV_STACK
);
1195 kev
->ext
[0] = kectx
->kec_data_out
+ kectx
->kec_data_resid
;
1200 * Apply message-based QoS values to output kevent as prescribed.
1201 * The kev->ext[2] field gets (msg-qos << 32) | (override-qos).
1203 * The mach_msg_receive_results() call saved off the message
1204 * QoS values in the continuation save area on successful receive.
1206 if (kev
->fflags
== MACH_MSG_SUCCESS
) {
1207 kev
->ext
[2] = ((uint64_t)self
->ith_ppriority
<< 32) |
1208 _pthread_priority_make_from_thread_qos(self
->ith_qos_override
, 0, 0);
1211 return FILTER_ACTIVE
;
1215 * Peek to see if the message queue associated with the knote has any
1216 * events. This pre-hook is called when a filter uses the stay-
1217 * on-queue mechanism (as the knote_link_waitq mechanism does for
1218 * portsets) and someone calls select() against the containing kqueue.
1220 * Just peek at the pre-post status of the portset's wait queue
1221 * to determine if it has anything interesting. We can do it
1222 * without holding the lock, as it is just a snapshot in time
1223 * (if this is used as part of really waiting for events, we
1224 * will catch changes in this status when the event gets posted
1225 * up to the knote's kqueue).
1228 filt_machportpeek(struct knote
*kn
)
1230 ipc_mqueue_t mqueue
= kn
->kn_mqueue
;
1232 return ipc_mqueue_set_peek(mqueue
) ? FILTER_ACTIVE
: 0;
1235 SECURITY_READ_ONLY_EARLY(struct filterops
) machport_filtops
= {
1236 .f_adjusts_qos
= true,
1237 .f_extended_codes
= true,
1238 .f_attach
= filt_machportattach
,
1239 .f_detach
= filt_machportdetach
,
1240 .f_event
= filt_machportevent
,
1241 .f_touch
= filt_machporttouch
,
1242 .f_process
= filt_machportprocess
,
1243 .f_peek
= filt_machportpeek
,