X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/d9a64523371fa019c4575bb400cbbc3a50ac9903..94ff46dc2849db4d43eaaf144872decc522aafb4:/osfmk/ipc/ipc_pset.c diff --git a/osfmk/ipc/ipc_pset.c b/osfmk/ipc/ipc_pset.c index 8a8e12979..e73364b48 100644 --- a/osfmk/ipc/ipc_pset.c +++ b/osfmk/ipc/ipc_pset.c @@ -2,7 +2,7 @@ * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,34 +22,34 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ */ -/* +/* * Mach Operating System * Copyright (c) 1991,1990,1989 Carnegie Mellon University * All Rights Reserved. - * + * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * + * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to - * + * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 - * + * * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ @@ -94,23 +94,23 @@ kern_return_t ipc_pset_alloc( - ipc_space_t space, - mach_port_name_t *namep, - ipc_pset_t *psetp) + ipc_space_t space, + mach_port_name_t *namep, + ipc_pset_t *psetp) { ipc_pset_t pset; mach_port_name_t name; kern_return_t kr; kr = ipc_object_alloc(space, IOT_PORT_SET, - MACH_PORT_TYPE_PORT_SET, 0, - &name, (ipc_object_t *) &pset); + MACH_PORT_TYPE_PORT_SET, 0, + &name, (ipc_object_t *) &pset); if (kr != KERN_SUCCESS) { return kr; } /* pset and space are locked */ - ipc_mqueue_init(&pset->ips_messages, TRUE /* set */); + ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET); is_write_unlock(space); *namep = name; @@ -134,22 +134,22 @@ ipc_pset_alloc( kern_return_t ipc_pset_alloc_name( - ipc_space_t space, - mach_port_name_t name, - ipc_pset_t *psetp) + ipc_space_t space, + mach_port_name_t name, + ipc_pset_t *psetp) { ipc_pset_t pset; kern_return_t kr; kr = ipc_object_alloc_name(space, IOT_PORT_SET, - MACH_PORT_TYPE_PORT_SET, 0, - name, (ipc_object_t *) &pset); + MACH_PORT_TYPE_PORT_SET, 0, + name, (ipc_object_t *) &pset); if (kr != KERN_SUCCESS) { return kr; } /* pset is locked */ - ipc_mqueue_init(&pset->ips_messages, TRUE /* set */); + ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET); *psetp = pset; return KERN_SUCCESS; @@ -175,18 +175,18 @@ ipc_pset_alloc_special( assert(space->is_table == IE_NULL); assert(!is_active(space)); - __IGNORE_WCASTALIGN(pset = (ipc_pset_t)io_alloc(IOT_PORT_SET)); + pset = ips_object_to_pset(io_alloc(IOT_PORT_SET)); if (pset == IPS_NULL) { return IPS_NULL; } bzero((char *)pset, sizeof(*pset)); - io_lock_init(&pset->ips_object); + io_lock_init(ips_to_object(pset)); pset->ips_references = 1; pset->ips_object.io_bits = io_makebits(TRUE, IOT_PORT_SET, 0); - ipc_mqueue_init(&pset->ips_messages, TRUE /* set */); + ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET); return pset; } @@ -202,12 +202,12 @@ ipc_pset_alloc_special( */ boolean_t ipc_pset_member( - ipc_pset_t pset, - ipc_port_t port) + ipc_pset_t pset, + ipc_port_t port) { - assert(ip_active(port)); + require_ip_active(port); - return (ipc_mqueue_member(&port->ip_messages, &pset->ips_messages)); + return ipc_mqueue_member(&port->ip_messages, &pset->ips_messages); } @@ -222,18 +222,18 @@ ipc_pset_member( kern_return_t ipc_pset_add( - ipc_pset_t pset, - ipc_port_t port, - uint64_t *reserved_link, - uint64_t *reserved_prepost) + ipc_pset_t pset, + ipc_port_t port, + uint64_t *reserved_link, + uint64_t *reserved_prepost) { kern_return_t kr; assert(ips_active(pset)); - assert(ip_active(port)); + require_ip_active(port); kr = ipc_mqueue_add(&port->ip_messages, &pset->ips_messages, - reserved_link, reserved_prepost); + reserved_link, reserved_prepost); return kr; } @@ -252,15 +252,15 @@ ipc_pset_add( kern_return_t ipc_pset_remove( - ipc_pset_t pset, - ipc_port_t port) + ipc_pset_t pset, + ipc_port_t port) { kern_return_t kr; + require_ip_active(port); - assert(ip_active(port)); - - if (port->ip_in_pset == 0) + if (port->ip_in_pset == 0) { return KERN_NOT_IN_SET; + } kr = ipc_mqueue_remove(&port->ip_messages, &pset->ips_messages); @@ -286,8 +286,9 @@ ipc_pset_lazy_allocate( ipc_pset_t pset; kr = ipc_right_lookup_read(space, psname, &entry); - if (kr != KERN_SUCCESS) + if (kr != KERN_SUCCESS) { return kr; + } /* space is read-locked and active */ if ((entry->ie_bits & MACH_PORT_TYPE_PORT_SET) == 0) { @@ -297,7 +298,7 @@ ipc_pset_lazy_allocate( } psobj = entry->ie_object; - __IGNORE_WCASTALIGN(pset = (ipc_pset_t) psobj); + pset = ips_object_to_pset(psobj); assert(pset != NULL); ipc_mqueue_t set_mqueue = &pset->ips_messages; struct waitq_set *wqset = &set_mqueue->imq_set_queue; @@ -326,12 +327,13 @@ ipc_pset_lazy_allocate( kern_return_t ipc_pset_remove_from_all( - ipc_port_t port) + ipc_port_t port) { - if (port->ip_in_pset == 0) + if (port->ip_in_pset == 0) { return KERN_NOT_IN_SET; + } - /* + /* * Remove the port's mqueue from all sets */ ipc_mqueue_remove_from_all(&port->ip_messages); @@ -351,7 +353,8 @@ ipc_pset_remove_from_all( void ipc_pset_destroy( - ipc_pset_t pset) + ipc_space_t space, + ipc_pset_t pset) { assert(ips_active(pset)); @@ -368,7 +371,7 @@ ipc_pset_destroy( * discover the change. */ imq_lock(&pset->ips_messages); - ipc_mqueue_changed(&pset->ips_messages); + ipc_mqueue_changed(space, &pset->ips_messages); imq_unlock(&pset->ips_messages); ipc_mqueue_deinit(&pset->ips_messages); @@ -380,7 +383,7 @@ ipc_pset_destroy( /* * Kqueue EVFILT_MACHPORT support * - * - kn_ptr.p_mqueue points to the monitored mqueue + * - kn_mqueue points to the monitored mqueue * * - (in/out) ext[0] holds a mach_vm_address_t to a userspace buffer * that can be used to direct-deliver messages when @@ -418,12 +421,18 @@ filt_machport_adjust_qos(struct knote *kn, ipc_kmsg_t first) } struct turnstile * -filt_machport_kqueue_turnstile(struct knote *kn) +filt_ipc_kqueue_turnstile(struct knote *kn) { - if ((kn->kn_sfflags & MACH_RCV_MSG) && (kn->kn_status & KN_DISPATCH)) { - return kqueue_turnstile(knote_get_kq(kn)); - } - return TURNSTILE_NULL; + assert(kn->kn_filter == EVFILT_MACHPORT || kn->kn_filter == EVFILT_WORKLOOP); + return kqueue_turnstile(knote_get_kq(kn)); +} + +bool +filt_machport_kqueue_has_turnstile(struct knote *kn) +{ + assert(kn->kn_filter == EVFILT_MACHPORT); + return ((kn->kn_sfflags & MACH_RCV_MSG) || (kn->kn_sfflags & MACH_RCV_SYNC_PEEK)) + && (kn->kn_flags & EV_DISPATCH); } /* @@ -440,35 +449,37 @@ filt_machport_kqueue_turnstile(struct knote *kn) struct turnstile * filt_machport_stash_port(struct knote *kn, ipc_port_t port, int *link) { - struct turnstile *ts = filt_machport_kqueue_turnstile(kn); + struct turnstile *ts = TURNSTILE_NULL; - if (!ts) { - if (link) *link = PORT_SYNC_LINK_NO_LINKAGE; + if (kn->kn_filter == EVFILT_WORKLOOP) { + assert(kn->kn_mqueue == NULL); + kn->kn_mqueue = &port->ip_messages; + ip_reference(port); + if (link) { + *link = PORT_SYNC_LINK_WORKLOOP_KNOTE; + } + ts = filt_ipc_kqueue_turnstile(kn); + } else if (!filt_machport_kqueue_has_turnstile(kn)) { + if (link) { + *link = PORT_SYNC_LINK_NO_LINKAGE; + } } else if (kn->kn_ext[3] == 0) { ip_reference(port); kn->kn_ext[3] = (uintptr_t)port; - if (link) *link = PORT_SYNC_LINK_WORKLOOP_KNOTE; + ts = filt_ipc_kqueue_turnstile(kn); + if (link) { + *link = PORT_SYNC_LINK_WORKLOOP_KNOTE; + } } else { ts = (struct turnstile *)kn->kn_hook; - if (link) *link = PORT_SYNC_LINK_WORKLOOP_STASH; + if (link) { + *link = PORT_SYNC_LINK_WORKLOOP_STASH; + } } return ts; } -struct turnstile * -filt_machport_stashed_special_reply_port_turnstile(ipc_port_t port) -{ - struct knote *kn = port->ip_sync_inheritor_knote; - - assert(port->ip_specialreply); - assert(port->ip_sync_link_state == PORT_SYNC_LINK_WORKLOOP_KNOTE); - if (kn->kn_ext[3] == (uint64_t)port) { - return kqueue_turnstile(knote_get_kq(kn)); - } - return kn->kn_hook; -} - /* * Lazily prepare a turnstile so that filt_machport_stash_port() * can be called with the mqueue lock held. @@ -483,28 +494,94 @@ filt_machport_stashed_special_reply_port_turnstile(ipc_port_t port) */ void filt_machport_turnstile_prepare_lazily( - struct knote *kn, - mach_msg_type_name_t msgt_name, - ipc_port_t port) + struct knote *kn, + mach_msg_type_name_t msgt_name, + ipc_port_t port) { /* This is called from within filt_machportprocess */ assert((kn->kn_status & KN_SUPPRESSED) && (kn->kn_status & KN_LOCKED)); - struct turnstile *ts = filt_machport_kqueue_turnstile(kn); - if (ts == TURNSTILE_NULL || kn->kn_ext[3] == 0 || kn->kn_hook) + if (!filt_machport_kqueue_has_turnstile(kn)) { return; + } + if (kn->kn_ext[3] == 0 || kn->kn_hook) { + return; + } + + struct turnstile *ts = filt_ipc_kqueue_turnstile(kn); if ((msgt_name == MACH_MSG_TYPE_PORT_SEND_ONCE && port->ip_specialreply) || - (msgt_name == MACH_MSG_TYPE_PORT_RECEIVE)) { + (msgt_name == MACH_MSG_TYPE_PORT_RECEIVE)) { struct turnstile *kn_ts = turnstile_alloc(); kn_ts = turnstile_prepare((uintptr_t)kn, - (struct turnstile **)&kn->kn_hook, kn_ts, TURNSTILE_KNOTE); + (struct turnstile **)&kn->kn_hook, kn_ts, TURNSTILE_KNOTE); turnstile_update_inheritor(kn_ts, ts, - TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_TURNSTILE); + TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_TURNSTILE); turnstile_cleanup(); } } +static void +filt_machport_turnstile_complete_port(struct knote *kn, ipc_port_t port, + ipc_mqueue_t mqueue) +{ + struct turnstile *ts = TURNSTILE_NULL; + + ip_lock(port); + if (port->ip_specialreply) { + /* + * If the reply has been sent to the special reply port already, + * then the special reply port may already be reused to do something + * entirely different. + * + * However, the only reason for it to still point to this knote is + * that it's still waiting for a reply, so when this is the case, + * neuter the linkage. + */ + if (port->ip_sync_link_state == PORT_SYNC_LINK_WORKLOOP_KNOTE && + port->ip_sync_inheritor_knote == kn) { + ipc_port_adjust_special_reply_port_locked(port, NULL, + (IPC_PORT_ADJUST_SR_NONE | IPC_PORT_ADJUST_SR_ENABLE_EVENT), FALSE); + } else { + ip_unlock(port); + } + } else { + /* + * For receive rights, if their IMQ_KNOTE() is still this + * knote, then sever the link. + */ + imq_lock(mqueue); + if (port->ip_sync_link_state == PORT_SYNC_LINK_WORKLOOP_KNOTE && + mqueue->imq_inheritor_knote == kn) { + ipc_port_adjust_sync_link_state_locked(port, PORT_SYNC_LINK_ANY, NULL); + ts = port_send_turnstile(port); + } + if (ts) { + turnstile_reference(ts); + turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, + TURNSTILE_IMMEDIATE_UPDATE); + } + imq_unlock(mqueue); + ip_unlock(port); + + if (ts) { + turnstile_update_inheritor_complete(ts, + TURNSTILE_INTERLOCK_NOT_HELD); + turnstile_deallocate(ts); + } + } + + ip_release(port); +} + +void +filt_wldetach_sync_ipc(struct knote *kn) +{ + ipc_mqueue_t mqueue = kn->kn_mqueue; + filt_machport_turnstile_complete_port(kn, ip_from_mq(mqueue), mqueue); + kn->kn_mqueue = NULL; +} + /* * Other half of filt_machport_turnstile_prepare_lazily() * @@ -513,75 +590,20 @@ filt_machport_turnstile_prepare_lazily( static void filt_machport_turnstile_complete(struct knote *kn) { - struct turnstile *ts = TURNSTILE_NULL; - if (kn->kn_ext[3]) { ipc_port_t port = (ipc_port_t)kn->kn_ext[3]; - ipc_mqueue_t mqueue = &port->ip_messages; - - ip_lock(port); - if (port->ip_specialreply) { - /* - * If the reply has been sent to the special reply port already, - * then the special reply port may already be reused to do something - * entirely different. - * - * However, the only reason for it to still point to this knote is - * that it's still waiting for a reply, so when this is the case, - * neuter the linkage. - */ - if (port->ip_sync_link_state == PORT_SYNC_LINK_WORKLOOP_KNOTE && - port->ip_sync_inheritor_knote == kn) { - ipc_port_adjust_special_reply_port_locked(port, NULL, - (IPC_PORT_ADJUST_SR_NONE | IPC_PORT_ADJUST_SR_ENABLE_EVENT), FALSE); - } else { - ip_unlock(port); - } - } else { - struct turnstile *kq_ts = kqueue_turnstile(knote_get_kq(kn)); - - /* - * For receive rights, if their IMQ_INHERITOR() is still this - * workloop, then sever the link. - * - * It has a theoretical hole: if the port is sent again to a new - * receive right that is also monitored by the same kqueue, - * we would sever the link incorrectly. - * - * However this would be a REALLY cumbersome thing to do. - */ - imq_lock(mqueue); - if (!IMQ_KLIST_VALID(mqueue) && IMQ_INHERITOR(mqueue) == kq_ts) { - turnstile_deallocate_safe(kq_ts); - klist_init(&mqueue->imq_klist); - ts = port_send_turnstile(port); - } - if (ts) { - turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, - TURNSTILE_IMMEDIATE_UPDATE); - turnstile_reference(ts); - } - imq_unlock(mqueue); - ip_unlock(port); - - if (ts) { - turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD); - turnstile_deallocate(ts); - } - } - - ip_release(port); + filt_machport_turnstile_complete_port(kn, port, &port->ip_messages); kn->kn_ext[3] = 0; } if (kn->kn_hook) { - ts = kn->kn_hook; + struct turnstile *ts = kn->kn_hook; turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, - TURNSTILE_IMMEDIATE_UPDATE); + TURNSTILE_IMMEDIATE_UPDATE); turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD); - turnstile_complete((uintptr_t)kn, (struct turnstile **)&kn->kn_hook, &ts); + turnstile_complete((uintptr_t)kn, (struct turnstile **)&kn->kn_hook, &ts, TURNSTILE_KNOTE); turnstile_cleanup(); assert(ts); @@ -589,16 +611,105 @@ filt_machport_turnstile_complete(struct knote *kn) } } +static void +filt_machport_link(ipc_mqueue_t mqueue, struct knote *kn) +{ + struct knote *hd = SLIST_FIRST(&mqueue->imq_klist); + + if (hd && filt_machport_kqueue_has_turnstile(kn)) { + SLIST_INSERT_AFTER(hd, kn, kn_selnext); + } else { + SLIST_INSERT_HEAD(&mqueue->imq_klist, kn, kn_selnext); + } +} + +static void +filt_machport_unlink(ipc_mqueue_t mqueue, struct knote *kn) +{ + struct knote **knprev; + + KNOTE_DETACH(&mqueue->imq_klist, kn); + + /* make sure the first knote is a knote we can push on */ + SLIST_FOREACH_PREVPTR(kn, knprev, &mqueue->imq_klist, kn_selnext) { + if (filt_machport_kqueue_has_turnstile(kn)) { + *knprev = SLIST_NEXT(kn, kn_selnext); + SLIST_INSERT_HEAD(&mqueue->imq_klist, kn, kn_selnext); + break; + } + } +} + +int +filt_wlattach_sync_ipc(struct knote *kn) +{ + mach_port_name_t name = (mach_port_name_t)kn->kn_id; + ipc_space_t space = current_space(); + ipc_entry_t entry; + ipc_port_t port = IP_NULL; + int error = 0; + + if (ipc_right_lookup_read(space, name, &entry) != KERN_SUCCESS) { + return ENOENT; + } + + /* space is read-locked */ + + if (entry->ie_bits & MACH_PORT_TYPE_RECEIVE) { + port = ip_object_to_port(entry->ie_object); + if (port->ip_specialreply) { + error = ENOENT; + } + } else if (entry->ie_bits & MACH_PORT_TYPE_SEND_ONCE) { + port = ip_object_to_port(entry->ie_object); + if (!port->ip_specialreply) { + error = ENOENT; + } + } else { + error = ENOENT; + } + if (error) { + is_read_unlock(space); + return error; + } + + ip_lock(port); + is_read_unlock(space); + + if (port->ip_sync_link_state == PORT_SYNC_LINK_ANY) { + ip_unlock(port); + /* + * We cannot start a sync IPC inheritance chain, only further one + * Note: this can also happen if the inheritance chain broke + * because the original requestor died. + */ + return ENOENT; + } + + if (port->ip_specialreply) { + ipc_port_adjust_special_reply_port_locked(port, kn, + IPC_PORT_ADJUST_SR_LINK_WORKLOOP, FALSE); + } else { + ipc_port_adjust_port_locked(port, kn, FALSE); + } + + /* make sure the port was stashed */ + assert(kn->kn_mqueue == &port->ip_messages); + + /* port has been unlocked by ipc_port_adjust_* */ + + return 0; +} + static int filt_machportattach( - struct knote *kn, - __unused struct kevent_internal_s *kev) + struct knote *kn, + __unused struct kevent_qos_s *kev) { - mach_port_name_t name = (mach_port_name_t)kn->kn_kevent.ident; + mach_port_name_t name = (mach_port_name_t)kn->kn_id; uint64_t wq_link_id = waitq_link_reserve(NULL); ipc_space_t space = current_space(); ipc_kmsg_t first; - struct turnstile *turnstile = TURNSTILE_NULL; struct turnstile *send_turnstile = TURNSTILE_NULL; int error; @@ -610,133 +721,174 @@ filt_machportattach( kn->kn_flags &= ~EV_EOF; kn->kn_ext[3] = 0; - if ((kn->kn_sfflags & MACH_RCV_MSG) && (kn->kn_status & KN_DISPATCH)) { + if (filt_machport_kqueue_has_turnstile(kn)) { /* * If the filter is likely to support sync IPC override, * and it happens to be attaching to a workloop, * make sure the workloop has an allocated turnstile. */ - turnstile = kqueue_alloc_turnstile(knote_get_kq(kn)); + kqueue_alloc_turnstile(knote_get_kq(kn)); } +lookup_again: kr = ipc_right_lookup_read(space, name, &entry); -check_lookup: - if (kr == KERN_SUCCESS) { - /* space is read-locked and active */ - - if (entry->ie_bits & MACH_PORT_TYPE_PORT_SET) { - ipc_pset_t pset; - - if (knote_link_waitqset_should_lazy_alloc(kn)) { - is_read_unlock(space); - - /* - * We need to link the portset of the kn, - * to insure that the link is allocated before taking - * any spinlocks. - */ - knote_link_waitqset_lazy_alloc(kn); - - /* - * We had to drop the space lock because knote_link_waitqset_lazy_alloc() - * could have allocated memory. The ipc_right_lookup_read() - * function returns with the space locked, so we need to revalidate state. - */ - kr = ipc_right_lookup_read(space, name, &entry); - if (!(kr == KERN_SUCCESS) || !(entry->ie_bits & MACH_PORT_TYPE_PORT_SET)) { - goto check_lookup; - } - } + if (kr != KERN_SUCCESS) { + error = ENOENT; + goto out; + } + + /* space is read-locked and active */ + + if ((entry->ie_bits & MACH_PORT_TYPE_PORT_SET) && + knote_link_waitqset_should_lazy_alloc(kn)) { + is_read_unlock(space); + + /* + * We need to link the portset of the kn, + * to insure that the link is allocated before taking + * any spinlocks. + * + * Because we have to drop the space lock so that + * knote_link_waitqset_lazy_alloc() can allocate memory, + * we will need to redo the lookup. + */ + knote_link_waitqset_lazy_alloc(kn); + goto lookup_again; + } + + if (entry->ie_bits & MACH_PORT_TYPE_PORT_SET) { + ipc_pset_t pset; + + pset = ips_object_to_pset(entry->ie_object); + mqueue = &pset->ips_messages; + ips_reference(pset); + + imq_lock(mqueue); + kn->kn_mqueue = mqueue; + + /* + * Bind the portset wait queue directly to knote/kqueue. + * This allows us to just use wait_queue foo to effect a wakeup, + * rather than having to call knote() from the Mach code on each + * message. We still attach the knote to the mqueue klist for + * NOTE_REVOKE purposes only. + */ + error = knote_link_waitq(kn, &mqueue->imq_wait_queue, &wq_link_id); + if (!error) { + filt_machport_link(mqueue, kn); + imq_unlock(mqueue); + } else { + kn->kn_mqueue = IMQ_NULL; + imq_unlock(mqueue); + ips_release(pset); + } - __IGNORE_WCASTALIGN(pset = (ipc_pset_t)entry->ie_object); - mqueue = &pset->ips_messages; - ips_reference(pset); + is_read_unlock(space); - imq_lock(mqueue); - kn->kn_ptr.p_mqueue = mqueue; + /* + * linked knotes are marked stay-active and therefore don't + * need an indication of their fired state to be returned + * from the attach operation. + */ + } else if (entry->ie_bits & MACH_PORT_TYPE_RECEIVE) { + ipc_port_t port = ip_object_to_port(entry->ie_object); + if (port->ip_specialreply) { /* - * Bind the portset wait queue directly to knote/kqueue. - * This allows us to just use wait_queue foo to effect a wakeup, - * rather than having to call knote() from the Mach code on each - * message. We still attach the knote to the mqueue klist for - * NOTE_REVOKE purposes only. + * Registering for kevents on special reply ports + * isn't supported for two reasons: + * + * 1. it really makes very little sense for a port that + * is supposed to be used synchronously + * + * 2. their mqueue's imq_klist field will be used to + * store the receive turnstile, so we can't possibly + * attach them anyway. */ - error = knote_link_waitq(kn, &mqueue->imq_wait_queue, &wq_link_id); - if (!error) { - assert(IMQ_KLIST_VALID(mqueue)); - KNOTE_ATTACH(&mqueue->imq_klist, kn); - imq_unlock(mqueue); - } else { - kn->kn_ptr.p_mqueue = IMQ_NULL; - imq_unlock(mqueue); - ips_release(pset); - } - is_read_unlock(space); + error = ENOTSUP; + goto out; + } + + mqueue = &port->ip_messages; + ip_reference(port); + + /* + * attach knote to port and determine result + * If the filter requested direct message receipt, + * we may need to adjust the qos of the knote to + * reflect the requested and override qos of the + * first message in the queue. + */ + ip_lock(port); + imq_lock(mqueue); + kn->kn_mqueue = mqueue; + if (port->ip_sync_link_state != PORT_SYNC_LINK_ANY) { /* - * linked knotes are marked stay-active and therefore don't - * need an indication of their fired state to be returned - * from the attach operation. + * We're attaching a port that used to have an IMQ_KNOTE, + * clobber this state, we'll fixup its turnstile inheritor below. */ + ipc_port_adjust_sync_link_state_locked(port, PORT_SYNC_LINK_ANY, NULL); + } + filt_machport_link(mqueue, kn); - } else if (entry->ie_bits & MACH_PORT_TYPE_RECEIVE) { - ipc_port_t port; + if ((first = ipc_kmsg_queue_first(&mqueue->imq_messages)) != IKM_NULL) { + result = FILTER_ACTIVE | filt_machport_adjust_qos(kn, first); + } - __IGNORE_WCASTALIGN(port = (ipc_port_t)entry->ie_object); - mqueue = &port->ip_messages; - ip_reference(port); + /* + * Update the port's turnstile inheritor + * + * Unlike filt_machportdetach(), we don't have to care about races for + * turnstile_workloop_pusher_info(): filt_machport_link() doesn't affect + * already pushing knotes, and if the current one becomes the new + * pusher, it'll only be visible when turnstile_workloop_pusher_info() + * returns. + */ + send_turnstile = port_send_turnstile(port); + if (send_turnstile) { + turnstile_reference(send_turnstile); + ipc_port_send_update_inheritor(port, send_turnstile, + TURNSTILE_IMMEDIATE_UPDATE); /* - * attach knote to port and determine result - * If the filter requested direct message receipt, - * we may need to adjust the qos of the knote to - * reflect the requested and override qos of the - * first message in the queue. + * rdar://problem/48861190 + * + * When a listener connection resumes a peer, + * updating the inheritor above has moved the push + * from the current thread to the workloop. + * + * However, we haven't told the workloop yet + * that it needs a thread request, and we risk + * to be preeempted as soon as we drop the space + * lock below. + * + * To avoid this disable preemption and let kevent + * reenable it after it takes the kqlock. */ - imq_lock(mqueue); - kn->kn_ptr.p_mqueue = mqueue; - if (!IMQ_KLIST_VALID(mqueue)) { - /* - * We're attaching a port that used to have an IMQ_INHERITOR, - * clobber this state, and set the inheritor of its turnstile - * to the kqueue it's now attached to. - */ - turnstile_deallocate_safe(IMQ_INHERITOR(mqueue)); - klist_init(&mqueue->imq_klist); - } - KNOTE_ATTACH(&mqueue->imq_klist, kn); - - /* Update the port's turnstile inheritor */ - send_turnstile = port_send_turnstile(port); - if (send_turnstile) { - turnstile_reference(send_turnstile); - turnstile_update_inheritor(send_turnstile, turnstile, - (TURNSTILE_INHERITOR_TURNSTILE | TURNSTILE_IMMEDIATE_UPDATE)); - } + disable_preemption(); + result |= FILTER_THREADREQ_NODEFEER; + } - if ((first = ipc_kmsg_queue_first(&mqueue->imq_messages)) != IKM_NULL) { - result = FILTER_ACTIVE | filt_machport_adjust_qos(kn, first); - } - imq_unlock(mqueue); - is_read_unlock(space); - if (send_turnstile) { - turnstile_update_inheritor_complete(send_turnstile, - TURNSTILE_INTERLOCK_NOT_HELD); - turnstile_deallocate(send_turnstile); - } + imq_unlock(mqueue); + ip_unlock(port); - error = 0; - } else { - is_read_unlock(space); - error = ENOTSUP; + is_read_unlock(space); + if (send_turnstile) { + turnstile_update_inheritor_complete(send_turnstile, + TURNSTILE_INTERLOCK_NOT_HELD); + turnstile_deallocate_safe(send_turnstile); } - } else { - error = ENOENT; + + error = 0; + } else { + is_read_unlock(space); + error = ENOTSUP; } +out: waitq_link_release(wq_link_id); /* bail out on errors */ @@ -748,18 +900,17 @@ check_lookup: return result; } -/* NOT proud of these - we should have a stricter relationship between mqueue and ipc object */ -#define mqueue_to_pset(mq) ((ipc_pset_t)((uintptr_t)mq-offsetof(struct ipc_pset, ips_messages))) -#define mqueue_to_port(mq) ((ipc_port_t)((uintptr_t)mq-offsetof(struct ipc_port, ip_messages))) -#define mqueue_to_object(mq) (((ipc_object_t)(mq)) - 1) - +/* Validate imq_to_object implementation "works" */ +_Static_assert(offsetof(struct ipc_pset, ips_messages) == + offsetof(struct ipc_port, ip_messages), + "Make sure the mqueue aliases in both ports and psets"); static void filt_machportdetach( struct knote *kn) { - ipc_mqueue_t mqueue = kn->kn_ptr.p_mqueue; - ipc_object_t object = mqueue_to_object(mqueue); + ipc_mqueue_t mqueue = kn->kn_mqueue; + ipc_object_t object = imq_to_object(mqueue); struct turnstile *send_turnstile = TURNSTILE_NULL; filt_machport_turnstile_complete(kn); @@ -770,29 +921,41 @@ filt_machportdetach( * ipc_mqueue_changed() already unhooked this knote from the mqueue, */ } else { - assert(IMQ_KLIST_VALID(mqueue)); - KNOTE_DETACH(&mqueue->imq_klist, kn); - } + ipc_port_t port = IP_NULL; + + /* + * When the knote being detached is the first one in the list, + * then unlinking the knote *and* updating the turnstile inheritor + * need to happen atomically with respect to the callers of + * turnstile_workloop_pusher_info(). + * + * The caller of turnstile_workloop_pusher_info() will use the kq req + * lock (and hence the kqlock), so we just need to hold the kqlock too. + */ + if (io_otype(object) == IOT_PORT) { + port = ip_object_to_port(object); + assert(port->ip_sync_link_state == PORT_SYNC_LINK_ANY); + if (kn == SLIST_FIRST(&mqueue->imq_klist)) { + send_turnstile = port_send_turnstile(port); + } + } - if (io_otype(object) == IOT_PORT) { - ipc_port_t port = ip_from_mq(mqueue); + filt_machport_unlink(mqueue, kn); - send_turnstile = port_send_turnstile(port); if (send_turnstile) { turnstile_reference(send_turnstile); - turnstile_update_inheritor(send_turnstile, - ipc_port_get_inheritor(port), - TURNSTILE_INHERITOR_TURNSTILE | TURNSTILE_IMMEDIATE_UPDATE); + ipc_port_send_update_inheritor(port, send_turnstile, + TURNSTILE_IMMEDIATE_UPDATE); } } /* Clear the knote pointer once the knote has been removed from turnstile */ - kn->kn_ptr.p_mqueue = IMQ_NULL; + kn->kn_mqueue = IMQ_NULL; imq_unlock(mqueue); if (send_turnstile) { turnstile_update_inheritor_complete(send_turnstile, - TURNSTILE_INTERLOCK_NOT_HELD); + TURNSTILE_INTERLOCK_NOT_HELD); turnstile_deallocate(send_turnstile); } @@ -819,31 +982,18 @@ filt_machportdetach( * the message is to be direct-received, we adjust the * QoS of the knote according the requested and override * QoS of that first message. - * - * NOTE_REVOKE events are a legacy way to indicate that the port/portset - * was deallocated or left the current Mach portspace (modern technique - * is with an EV_VANISHED protocol). If we see NOTE_REVOKE, deliver an - * EV_EOF event for these changes (hopefully it will get delivered before - * the port name recycles to the same generation count and someone tries - * to re-register a kevent for it or the events are udata-specific - - * avoiding a conflict). */ static int -filt_machportevent( - struct knote *kn, - long hint) +filt_machportevent(struct knote *kn, long hint __assert_only) { - ipc_mqueue_t mqueue = kn->kn_ptr.p_mqueue; + ipc_mqueue_t mqueue = kn->kn_mqueue; ipc_kmsg_t first; int result = 0; /* mqueue locked by caller */ assert(imq_held(mqueue)); - - if (hint == NOTE_REVOKE) { - kn->kn_flags |= EV_EOF | EV_ONESHOT; - result = FILTER_ACTIVE | FILTER_RESET_EVENT_QOS; - } else if (imq_is_valid(mqueue)) { + assert(hint != NOTE_REVOKE); + if (imq_is_valid(mqueue)) { assert(!imq_is_set(mqueue)); if ((first = ipc_kmsg_queue_first(&mqueue->imq_messages)) != IKM_NULL) { result = FILTER_ACTIVE | filt_machport_adjust_qos(kn, first); @@ -856,9 +1006,9 @@ filt_machportevent( static int filt_machporttouch( struct knote *kn, - struct kevent_internal_s *kev) + struct kevent_qos_s *kev) { - ipc_mqueue_t mqueue = kn->kn_ptr.p_mqueue; + ipc_mqueue_t mqueue = kn->kn_mqueue; ipc_kmsg_t first; int result = 0; @@ -895,23 +1045,20 @@ filt_machporttouch( } static int -filt_machportprocess( - struct knote *kn, - struct filt_process_s *process_data, - struct kevent_internal_s *kev) +filt_machportprocess(struct knote *kn, struct kevent_qos_s *kev) { - ipc_mqueue_t mqueue = kn->kn_ptr.p_mqueue; - ipc_object_t object = mqueue_to_object(mqueue); + ipc_mqueue_t mqueue = kn->kn_mqueue; + ipc_object_t object = imq_to_object(mqueue); thread_t self = current_thread(); - boolean_t used_filtprocess_data = FALSE; + kevent_ctx_t kectx = NULL; wait_result_t wresult; mach_msg_option_t option; mach_vm_address_t addr; - mach_msg_size_t size; + mach_msg_size_t size; /* Capture current state */ - *kev = kn->kn_kevent; + knote_fill_kevent(kn, kev, MACH_PORT_NULL); kev->ext[3] = 0; /* hide our port reference from userspace */ /* If already deallocated/moved return one last EOF event */ @@ -924,8 +1071,8 @@ filt_machportprocess( * provided, just force a MACH_RCV_TOO_LARGE to detect the * name of the port and sizeof the waiting message. */ - option = kn->kn_sfflags & (MACH_RCV_MSG|MACH_RCV_LARGE|MACH_RCV_LARGE_IDENTITY| - MACH_RCV_TRAILER_MASK|MACH_RCV_VOUCHER); + option = kn->kn_sfflags & (MACH_RCV_MSG | MACH_RCV_LARGE | MACH_RCV_LARGE_IDENTITY | + MACH_RCV_TRAILER_MASK | MACH_RCV_VOUCHER | MACH_MSG_STRICT_REPLY); if (option & MACH_RCV_MSG) { addr = (mach_vm_address_t) kn->kn_ext[0]; @@ -935,14 +1082,14 @@ filt_machportprocess( * If the kevent didn't specify a buffer and length, carve a buffer * from the filter processing data according to the flags. */ - if (size == 0 && process_data != NULL) { - used_filtprocess_data = TRUE; - - addr = (mach_vm_address_t)process_data->fp_data_out; - size = (mach_msg_size_t)process_data->fp_data_resid; + if (size == 0) { + kectx = kevent_get_context(self); + addr = (mach_vm_address_t)kectx->kec_data_out; + size = (mach_msg_size_t)kectx->kec_data_resid; option |= (MACH_RCV_LARGE | MACH_RCV_LARGE_IDENTITY); - if (process_data->fp_flags & KEVENT_FLAG_STACK_DATA) + if (kectx->kec_process_flags & KEVENT_FLAG_STACK_DATA) { option |= MACH_RCV_STACK; + } } } else { /* just detect the port name (if a set) and size of the first message */ @@ -975,12 +1122,12 @@ filt_machportprocess( self->ith_knote = kn; wresult = ipc_mqueue_receive_on_thread( - mqueue, - option, - size, /* max_size */ - 0, /* immediate timeout */ - THREAD_INTERRUPTIBLE, - self); + mqueue, + option, + size, /* max_size */ + 0, /* immediate timeout */ + THREAD_INTERRUPTIBLE, + self); /* mqueue unlocked */ /* @@ -1024,10 +1171,11 @@ filt_machportprocess( */ if (kev->fflags == MACH_RCV_TOO_LARGE) { kev->ext[1] = self->ith_msize; - if (option & MACH_RCV_LARGE_IDENTITY) + if (option & MACH_RCV_LARGE_IDENTITY) { kev->data = self->ith_receiver_name; - else + } else { kev->data = MACH_PORT_NULL; + } } else { kev->ext[1] = size; kev->data = MACH_PORT_NULL; @@ -1038,16 +1186,15 @@ filt_machportprocess( * store the address used in the knote and adjust the residual and * other parameters for future use. */ - if (used_filtprocess_data) { - assert(process_data->fp_data_resid >= size); - process_data->fp_data_resid -= size; - if ((process_data->fp_flags & KEVENT_FLAG_STACK_DATA) == 0) { - kev->ext[0] = process_data->fp_data_out; - process_data->fp_data_out += size; + if (kectx) { + assert(kectx->kec_data_resid >= size); + kectx->kec_data_resid -= size; + if ((kectx->kec_process_flags & KEVENT_FLAG_STACK_DATA) == 0) { + kev->ext[0] = kectx->kec_data_out; + kectx->kec_data_out += size; } else { assert(option & MACH_RCV_STACK); - kev->ext[0] = process_data->fp_data_out + - process_data->fp_data_resid; + kev->ext[0] = kectx->kec_data_out + kectx->kec_data_resid; } } @@ -1060,7 +1207,7 @@ filt_machportprocess( */ if (kev->fflags == MACH_MSG_SUCCESS) { kev->ext[2] = ((uint64_t)self->ith_qos << 32) | - (uint64_t)self->ith_qos_override; + (uint64_t)self->ith_qos_override; } return FILTER_ACTIVE; @@ -1082,7 +1229,7 @@ filt_machportprocess( static int filt_machportpeek(struct knote *kn) { - ipc_mqueue_t mqueue = kn->kn_ptr.p_mqueue; + ipc_mqueue_t mqueue = kn->kn_mqueue; return ipc_mqueue_set_peek(mqueue) ? FILTER_ACTIVE : 0; }