]> git.saurik.com Git - apple/xnu.git/blob - osfmk/ipc/ipc_pset.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / ipc / ipc_pset.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: ipc/ipc_pset.c
60 * Author: Rich Draves
61 * Date: 1989
62 *
63 * Functions to manipulate IPC port sets.
64 */
65
66 #include <mach/port.h>
67 #include <mach/kern_return.h>
68 #include <mach/message.h>
69 #include <ipc/ipc_mqueue.h>
70 #include <ipc/ipc_object.h>
71 #include <ipc/ipc_pset.h>
72 #include <ipc/ipc_right.h>
73 #include <ipc/ipc_space.h>
74 #include <ipc/ipc_port.h>
75
76 #include <kern/kern_types.h>
77
78 #include <vm/vm_map.h>
79 #include <libkern/section_keywords.h>
80 #include <pthread/priority_private.h>
81
82 /*
83 * Routine: ipc_pset_alloc
84 * Purpose:
85 * Allocate a port set.
86 * Conditions:
87 * Nothing locked. If successful, the port set is returned
88 * locked. (The caller doesn't have a reference.)
89 * Returns:
90 * KERN_SUCCESS The port set is allocated.
91 * KERN_INVALID_TASK The space is dead.
92 * KERN_NO_SPACE No room for an entry in the space.
93 * KERN_RESOURCE_SHORTAGE Couldn't allocate memory.
94 */
95
96 kern_return_t
97 ipc_pset_alloc(
98 ipc_space_t space,
99 mach_port_name_t *namep,
100 ipc_pset_t *psetp)
101 {
102 ipc_pset_t pset;
103 mach_port_name_t name;
104 kern_return_t kr;
105
106 kr = ipc_object_alloc(space, IOT_PORT_SET,
107 MACH_PORT_TYPE_PORT_SET, 0,
108 &name, (ipc_object_t *) &pset);
109 if (kr != KERN_SUCCESS) {
110 return kr;
111 }
112 /* pset and space are locked */
113
114 ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET);
115 is_write_unlock(space);
116
117 *namep = name;
118 *psetp = pset;
119 return KERN_SUCCESS;
120 }
121
122 /*
123 * Routine: ipc_pset_alloc_name
124 * Purpose:
125 * Allocate a port set, with a specific name.
126 * Conditions:
127 * Nothing locked. If successful, the port set is returned
128 * locked. (The caller doesn't have a reference.)
129 * Returns:
130 * KERN_SUCCESS The port set is allocated.
131 * KERN_INVALID_TASK The space is dead.
132 * KERN_NAME_EXISTS The name already denotes a right.
133 * KERN_RESOURCE_SHORTAGE Couldn't allocate memory.
134 */
135
136 kern_return_t
137 ipc_pset_alloc_name(
138 ipc_space_t space,
139 mach_port_name_t name,
140 ipc_pset_t *psetp)
141 {
142 ipc_pset_t pset;
143 kern_return_t kr;
144
145 kr = ipc_object_alloc_name(space, IOT_PORT_SET,
146 MACH_PORT_TYPE_PORT_SET, 0,
147 name, (ipc_object_t *) &pset);
148 if (kr != KERN_SUCCESS) {
149 return kr;
150 }
151 /* pset is locked */
152
153 ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET);
154
155 *psetp = pset;
156 return KERN_SUCCESS;
157 }
158
159
160 /*
161 * Routine: ipc_pset_alloc_special
162 * Purpose:
163 * Allocate a port set in a special space.
164 * The new port set is returned with one ref.
165 * If unsuccessful, IPS_NULL is returned.
166 * Conditions:
167 * Nothing locked.
168 */
169 ipc_pset_t
170 ipc_pset_alloc_special(
171 __assert_only ipc_space_t space)
172 {
173 ipc_pset_t pset;
174
175 assert(space != IS_NULL);
176 assert(space->is_table == IE_NULL);
177 assert(!is_active(space));
178
179 pset = ips_object_to_pset(io_alloc(IOT_PORT_SET, Z_WAITOK | Z_ZERO));
180 if (pset == IPS_NULL) {
181 return IPS_NULL;
182 }
183
184 io_lock_init(ips_to_object(pset));
185 pset->ips_references = 1;
186 pset->ips_object.io_bits = io_makebits(TRUE, IOT_PORT_SET, 0);
187
188 ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET);
189
190 return pset;
191 }
192
193
194 /*
195 * Routine: ipc_pset_member
196 * Purpose:
197 * Checks to see if a port is a member of a pset
198 * Conditions:
199 * Both port and port set are locked.
200 * The port must be active.
201 */
202 boolean_t
203 ipc_pset_member(
204 ipc_pset_t pset,
205 ipc_port_t port)
206 {
207 require_ip_active(port);
208
209 return ipc_mqueue_member(&port->ip_messages, &pset->ips_messages);
210 }
211
212
213 /*
214 * Routine: ipc_pset_add
215 * Purpose:
216 * Puts a port into a port set.
217 * Conditions:
218 * Both port and port set are locked and active.
219 * The owner of the port set is also receiver for the port.
220 */
221
222 kern_return_t
223 ipc_pset_add(
224 ipc_pset_t pset,
225 ipc_port_t port,
226 uint64_t *reserved_link,
227 uint64_t *reserved_prepost)
228 {
229 kern_return_t kr;
230
231 assert(ips_active(pset));
232 require_ip_active(port);
233
234 kr = ipc_mqueue_add(&port->ip_messages, &pset->ips_messages,
235 reserved_link, reserved_prepost);
236
237 return kr;
238 }
239
240
241
242 /*
243 * Routine: ipc_pset_remove
244 * Purpose:
245 * Removes a port from a port set.
246 * The port set loses a reference.
247 * Conditions:
248 * Both port and port set are locked.
249 * The port must be active.
250 */
251
252 kern_return_t
253 ipc_pset_remove(
254 ipc_pset_t pset,
255 ipc_port_t port)
256 {
257 kern_return_t kr;
258 require_ip_active(port);
259
260 if (port->ip_in_pset == 0) {
261 return KERN_NOT_IN_SET;
262 }
263
264 kr = ipc_mqueue_remove(&port->ip_messages, &pset->ips_messages);
265
266 return kr;
267 }
268
269 /*
270 * Routine: ipc_pset_lazy_allocate
271 * Purpose:
272 * lazily initialize the wqset of a port set.
273 * Conditions:
274 * Nothing locked.
275 */
276
277 kern_return_t
278 ipc_pset_lazy_allocate(
279 ipc_space_t space,
280 mach_port_name_t psname)
281 {
282 kern_return_t kr;
283 ipc_entry_t entry;
284 ipc_object_t psobj;
285 ipc_pset_t pset;
286
287 kr = ipc_right_lookup_read(space, psname, &entry);
288 if (kr != KERN_SUCCESS) {
289 return kr;
290 }
291
292 /* space is read-locked and active */
293 if ((entry->ie_bits & MACH_PORT_TYPE_PORT_SET) == 0) {
294 is_read_unlock(space);
295 kr = KERN_INVALID_RIGHT;
296 return kr;
297 }
298
299 psobj = entry->ie_object;
300 pset = ips_object_to_pset(psobj);
301 assert(pset != NULL);
302 ipc_mqueue_t set_mqueue = &pset->ips_messages;
303 struct waitq_set *wqset = &set_mqueue->imq_set_queue;
304
305 io_reference(psobj);
306 is_read_unlock(space);
307
308 /*
309 * lazily initialize the wqset to avoid
310 * possible allocation while linking
311 * under spinlocks.
312 */
313 waitq_set_lazy_init_link(wqset);
314 io_release(psobj);
315
316 return KERN_SUCCESS;
317 }
318
319 /*
320 * Routine: ipc_pset_remove_from_all
321 * Purpose:
322 * Removes a port from all it's port sets.
323 * Conditions:
324 * port is locked and active.
325 */
326
327 kern_return_t
328 ipc_pset_remove_from_all(
329 ipc_port_t port)
330 {
331 if (port->ip_in_pset == 0) {
332 return KERN_NOT_IN_SET;
333 }
334
335 /*
336 * Remove the port's mqueue from all sets
337 */
338 ipc_mqueue_remove_from_all(&port->ip_messages);
339 return KERN_SUCCESS;
340 }
341
342
343 /*
344 * Routine: ipc_pset_destroy
345 * Purpose:
346 * Destroys a port_set.
347 * Conditions:
348 * The port_set is locked and alive.
349 * The caller has a reference, which is consumed.
350 * Afterwards, the port_set is unlocked and dead.
351 */
352
353 void
354 ipc_pset_destroy(
355 ipc_space_t space,
356 ipc_pset_t pset)
357 {
358 assert(ips_active(pset));
359
360 pset->ips_object.io_bits &= ~IO_BITS_ACTIVE;
361
362 /*
363 * remove all the member message queues
364 * AND remove this message queue from any containing sets
365 */
366 ipc_mqueue_remove_all(&pset->ips_messages);
367
368 /*
369 * Set all waiters on the portset running to
370 * discover the change.
371 */
372 imq_lock(&pset->ips_messages);
373 ipc_mqueue_changed(space, &pset->ips_messages);
374 imq_unlock(&pset->ips_messages);
375
376 ipc_mqueue_deinit(&pset->ips_messages);
377
378 ips_unlock(pset);
379 ips_release(pset); /* consume the ref our caller gave us */
380 }
381
382 /*
383 * Kqueue EVFILT_MACHPORT support
384 *
385 * - kn_mqueue points to the monitored mqueue
386 *
387 * - (in/out) ext[0] holds a mach_vm_address_t to a userspace buffer
388 * that can be used to direct-deliver messages when
389 * MACH_RCV_MSG is set in kn_sfflags
390 *
391 * - (in/out) ext[1] holds a mach_msg_size_t representing the size
392 * of the userspace buffer held in ext[0].
393 *
394 * - (out) ext[2] is used to deliver qos information
395 * about the send queue to userspace.
396 *
397 * - (abused) ext[3] is used in kernel to hold a reference to the first port
398 * with a turnstile that participate to sync IPC override.
399 *
400 * - kn_hook is optionally a "knote" turnstile. It is used as the inheritor
401 * of turnstiles for rights copied out as part of direct message delivery
402 * when they can participate to sync IPC override.
403 *
404 * It is used to atomically neuter the sync IPC override when the knote is
405 * re-enabled.
406 *
407 */
408
409 #include <sys/event.h>
410 #include <sys/errno.h>
411
412 static int
413 filt_machport_adjust_qos(struct knote *kn, ipc_kmsg_t first)
414 {
415 if (kn->kn_sfflags & MACH_RCV_MSG) {
416 return FILTER_ADJUST_EVENT_QOS(first->ikm_qos_override);
417 }
418 return 0;
419 }
420
421 struct turnstile *
422 filt_ipc_kqueue_turnstile(struct knote *kn)
423 {
424 assert(kn->kn_filter == EVFILT_MACHPORT || kn->kn_filter == EVFILT_WORKLOOP);
425 return kqueue_turnstile(knote_get_kq(kn));
426 }
427
428 bool
429 filt_machport_kqueue_has_turnstile(struct knote *kn)
430 {
431 assert(kn->kn_filter == EVFILT_MACHPORT);
432 return ((kn->kn_sfflags & MACH_RCV_MSG) || (kn->kn_sfflags & MACH_RCV_SYNC_PEEK))
433 && (kn->kn_flags & EV_DISPATCH);
434 }
435
436 /*
437 * Stashes a port that participate to sync IPC override until the knote
438 * is being re-enabled.
439 *
440 * It returns:
441 * - the turnstile to use as an inheritor for the stashed port
442 * - the kind of stash that happened as PORT_SYNC_* value among:
443 * o not stashed (no sync IPC support)
444 * o stashed in the knote (in kn_ext[3])
445 * o to be hooked to the kn_hook knote
446 */
447 struct turnstile *
448 filt_machport_stash_port(struct knote *kn, ipc_port_t port, int *link)
449 {
450 struct turnstile *ts = TURNSTILE_NULL;
451
452 if (kn->kn_filter == EVFILT_WORKLOOP) {
453 assert(kn->kn_mqueue == NULL);
454 kn->kn_mqueue = &port->ip_messages;
455 ip_reference(port);
456 if (link) {
457 *link = PORT_SYNC_LINK_WORKLOOP_KNOTE;
458 }
459 ts = filt_ipc_kqueue_turnstile(kn);
460 } else if (!filt_machport_kqueue_has_turnstile(kn)) {
461 if (link) {
462 *link = PORT_SYNC_LINK_NO_LINKAGE;
463 }
464 } else if (kn->kn_ext[3] == 0) {
465 ip_reference(port);
466 kn->kn_ext[3] = (uintptr_t)port;
467 ts = filt_ipc_kqueue_turnstile(kn);
468 if (link) {
469 *link = PORT_SYNC_LINK_WORKLOOP_KNOTE;
470 }
471 } else {
472 ts = (struct turnstile *)kn->kn_hook;
473 if (link) {
474 *link = PORT_SYNC_LINK_WORKLOOP_STASH;
475 }
476 }
477
478 return ts;
479 }
480
481 /*
482 * Lazily prepare a turnstile so that filt_machport_stash_port()
483 * can be called with the mqueue lock held.
484 *
485 * It will allocate a turnstile in kn_hook if:
486 * - the knote supports sync IPC override,
487 * - we already stashed a port in kn_ext[3],
488 * - the object that will be copied out has a chance to ask to be stashed.
489 *
490 * It is setup so that its inheritor is the workloop turnstile that has been
491 * allocated when this knote was attached.
492 */
493 void
494 filt_machport_turnstile_prepare_lazily(
495 struct knote *kn,
496 mach_msg_type_name_t msgt_name,
497 ipc_port_t port)
498 {
499 /* This is called from within filt_machportprocess */
500 assert((kn->kn_status & KN_SUPPRESSED) && (kn->kn_status & KN_LOCKED));
501
502 if (!filt_machport_kqueue_has_turnstile(kn)) {
503 return;
504 }
505
506 if (kn->kn_ext[3] == 0 || kn->kn_hook) {
507 return;
508 }
509
510 struct turnstile *ts = filt_ipc_kqueue_turnstile(kn);
511 if ((msgt_name == MACH_MSG_TYPE_PORT_SEND_ONCE && port->ip_specialreply) ||
512 (msgt_name == MACH_MSG_TYPE_PORT_RECEIVE)) {
513 struct turnstile *kn_ts = turnstile_alloc();
514 kn_ts = turnstile_prepare((uintptr_t)kn,
515 (struct turnstile **)&kn->kn_hook, kn_ts, TURNSTILE_KNOTE);
516 turnstile_update_inheritor(kn_ts, ts,
517 TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_TURNSTILE);
518 turnstile_cleanup();
519 }
520 }
521
522 static void
523 filt_machport_turnstile_complete_port(struct knote *kn, ipc_port_t port,
524 ipc_mqueue_t mqueue)
525 {
526 struct turnstile *ts = TURNSTILE_NULL;
527
528 ip_lock(port);
529 if (port->ip_specialreply) {
530 /*
531 * If the reply has been sent to the special reply port already,
532 * then the special reply port may already be reused to do something
533 * entirely different.
534 *
535 * However, the only reason for it to still point to this knote is
536 * that it's still waiting for a reply, so when this is the case,
537 * neuter the linkage.
538 */
539 if (port->ip_sync_link_state == PORT_SYNC_LINK_WORKLOOP_KNOTE &&
540 port->ip_sync_inheritor_knote == kn) {
541 ipc_port_adjust_special_reply_port_locked(port, NULL,
542 (IPC_PORT_ADJUST_SR_NONE | IPC_PORT_ADJUST_SR_ENABLE_EVENT), FALSE);
543 } else {
544 ip_unlock(port);
545 }
546 } else {
547 /*
548 * For receive rights, if their IMQ_KNOTE() is still this
549 * knote, then sever the link.
550 */
551 imq_lock(mqueue);
552 if (port->ip_sync_link_state == PORT_SYNC_LINK_WORKLOOP_KNOTE &&
553 mqueue->imq_inheritor_knote == kn) {
554 ipc_port_adjust_sync_link_state_locked(port, PORT_SYNC_LINK_ANY, NULL);
555 ts = port_send_turnstile(port);
556 }
557 if (ts) {
558 turnstile_reference(ts);
559 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL,
560 TURNSTILE_IMMEDIATE_UPDATE);
561 }
562 imq_unlock(mqueue);
563 ip_unlock(port);
564
565 if (ts) {
566 turnstile_update_inheritor_complete(ts,
567 TURNSTILE_INTERLOCK_NOT_HELD);
568 turnstile_deallocate(ts);
569 }
570 }
571
572 ip_release(port);
573 }
574
575 void
576 filt_wldetach_sync_ipc(struct knote *kn)
577 {
578 ipc_mqueue_t mqueue = kn->kn_mqueue;
579 filt_machport_turnstile_complete_port(kn, ip_from_mq(mqueue), mqueue);
580 kn->kn_mqueue = NULL;
581 }
582
583 /*
584 * Other half of filt_machport_turnstile_prepare_lazily()
585 *
586 * This is serialized by the knote state machine.
587 */
588 static void
589 filt_machport_turnstile_complete(struct knote *kn)
590 {
591 if (kn->kn_ext[3]) {
592 ipc_port_t port = (ipc_port_t)kn->kn_ext[3];
593 filt_machport_turnstile_complete_port(kn, port, &port->ip_messages);
594 kn->kn_ext[3] = 0;
595 }
596
597 if (kn->kn_hook) {
598 struct turnstile *ts = kn->kn_hook;
599
600 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL,
601 TURNSTILE_IMMEDIATE_UPDATE);
602 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
603
604 turnstile_complete((uintptr_t)kn, (struct turnstile **)&kn->kn_hook, &ts, TURNSTILE_KNOTE);
605 turnstile_cleanup();
606
607 assert(ts);
608 turnstile_deallocate(ts);
609 }
610 }
611
612 static void
613 filt_machport_link(ipc_mqueue_t mqueue, struct knote *kn)
614 {
615 struct knote *hd = SLIST_FIRST(&mqueue->imq_klist);
616
617 if (hd && filt_machport_kqueue_has_turnstile(kn)) {
618 SLIST_INSERT_AFTER(hd, kn, kn_selnext);
619 } else {
620 SLIST_INSERT_HEAD(&mqueue->imq_klist, kn, kn_selnext);
621 }
622 }
623
624 static void
625 filt_machport_unlink(ipc_mqueue_t mqueue, struct knote *kn)
626 {
627 struct knote **knprev;
628
629 KNOTE_DETACH(&mqueue->imq_klist, kn);
630
631 /* make sure the first knote is a knote we can push on */
632 SLIST_FOREACH_PREVPTR(kn, knprev, &mqueue->imq_klist, kn_selnext) {
633 if (filt_machport_kqueue_has_turnstile(kn)) {
634 *knprev = SLIST_NEXT(kn, kn_selnext);
635 SLIST_INSERT_HEAD(&mqueue->imq_klist, kn, kn_selnext);
636 break;
637 }
638 }
639 }
640
641 int
642 filt_wlattach_sync_ipc(struct knote *kn)
643 {
644 mach_port_name_t name = (mach_port_name_t)kn->kn_id;
645 ipc_space_t space = current_space();
646 ipc_entry_t entry;
647 ipc_port_t port = IP_NULL;
648 int error = 0;
649
650 if (ipc_right_lookup_read(space, name, &entry) != KERN_SUCCESS) {
651 return ENOENT;
652 }
653
654 /* space is read-locked */
655
656 if (entry->ie_bits & MACH_PORT_TYPE_RECEIVE) {
657 port = ip_object_to_port(entry->ie_object);
658 if (port->ip_specialreply) {
659 error = ENOENT;
660 }
661 } else if (entry->ie_bits & MACH_PORT_TYPE_SEND_ONCE) {
662 port = ip_object_to_port(entry->ie_object);
663 if (!port->ip_specialreply) {
664 error = ENOENT;
665 }
666 } else {
667 error = ENOENT;
668 }
669 if (error) {
670 is_read_unlock(space);
671 return error;
672 }
673
674 ip_lock(port);
675 is_read_unlock(space);
676
677 if (port->ip_sync_link_state == PORT_SYNC_LINK_ANY) {
678 ip_unlock(port);
679 /*
680 * We cannot start a sync IPC inheritance chain, only further one
681 * Note: this can also happen if the inheritance chain broke
682 * because the original requestor died.
683 */
684 return ENOENT;
685 }
686
687 if (port->ip_specialreply) {
688 ipc_port_adjust_special_reply_port_locked(port, kn,
689 IPC_PORT_ADJUST_SR_LINK_WORKLOOP, FALSE);
690 } else {
691 ipc_port_adjust_port_locked(port, kn, FALSE);
692 }
693
694 /* make sure the port was stashed */
695 assert(kn->kn_mqueue == &port->ip_messages);
696
697 /* port has been unlocked by ipc_port_adjust_* */
698
699 return 0;
700 }
701
702 static int
703 filt_machportattach(
704 struct knote *kn,
705 __unused struct kevent_qos_s *kev)
706 {
707 mach_port_name_t name = (mach_port_name_t)kn->kn_id;
708 uint64_t wq_link_id = waitq_link_reserve(NULL);
709 ipc_space_t space = current_space();
710 ipc_kmsg_t first;
711 struct turnstile *send_turnstile = TURNSTILE_NULL;
712
713 int error;
714 int result = 0;
715 kern_return_t kr;
716 ipc_entry_t entry;
717 ipc_mqueue_t mqueue;
718
719 kn->kn_flags &= ~EV_EOF;
720 kn->kn_ext[3] = 0;
721
722 if (filt_machport_kqueue_has_turnstile(kn)) {
723 /*
724 * If the filter is likely to support sync IPC override,
725 * and it happens to be attaching to a workloop,
726 * make sure the workloop has an allocated turnstile.
727 */
728 kqueue_alloc_turnstile(knote_get_kq(kn));
729 }
730
731 lookup_again:
732 kr = ipc_right_lookup_read(space, name, &entry);
733
734 if (kr != KERN_SUCCESS) {
735 error = ENOENT;
736 goto out;
737 }
738
739 /* space is read-locked and active */
740
741 if ((entry->ie_bits & MACH_PORT_TYPE_PORT_SET) &&
742 knote_link_waitqset_should_lazy_alloc(kn)) {
743 is_read_unlock(space);
744
745 /*
746 * We need to link the portset of the kn,
747 * to insure that the link is allocated before taking
748 * any spinlocks.
749 *
750 * Because we have to drop the space lock so that
751 * knote_link_waitqset_lazy_alloc() can allocate memory,
752 * we will need to redo the lookup.
753 */
754 knote_link_waitqset_lazy_alloc(kn);
755 goto lookup_again;
756 }
757
758 if (entry->ie_bits & MACH_PORT_TYPE_PORT_SET) {
759 ipc_pset_t pset;
760
761 pset = ips_object_to_pset(entry->ie_object);
762 mqueue = &pset->ips_messages;
763 ips_reference(pset);
764
765 imq_lock(mqueue);
766 kn->kn_mqueue = mqueue;
767
768 /*
769 * Bind the portset wait queue directly to knote/kqueue.
770 * This allows us to just use wait_queue foo to effect a wakeup,
771 * rather than having to call knote() from the Mach code on each
772 * message. We still attach the knote to the mqueue klist for
773 * NOTE_REVOKE purposes only.
774 */
775 error = knote_link_waitq(kn, &mqueue->imq_wait_queue, &wq_link_id);
776 if (!error) {
777 filt_machport_link(mqueue, kn);
778 imq_unlock(mqueue);
779 } else {
780 kn->kn_mqueue = IMQ_NULL;
781 imq_unlock(mqueue);
782 ips_release(pset);
783 }
784
785 is_read_unlock(space);
786
787 /*
788 * linked knotes are marked stay-active and therefore don't
789 * need an indication of their fired state to be returned
790 * from the attach operation.
791 */
792 } else if (entry->ie_bits & MACH_PORT_TYPE_RECEIVE) {
793 ipc_port_t port = ip_object_to_port(entry->ie_object);
794
795 if (port->ip_specialreply) {
796 /*
797 * Registering for kevents on special reply ports
798 * isn't supported for two reasons:
799 *
800 * 1. it really makes very little sense for a port that
801 * is supposed to be used synchronously
802 *
803 * 2. their mqueue's imq_klist field will be used to
804 * store the receive turnstile, so we can't possibly
805 * attach them anyway.
806 */
807 is_read_unlock(space);
808 error = ENOTSUP;
809 goto out;
810 }
811
812 mqueue = &port->ip_messages;
813 ip_reference(port);
814
815 /*
816 * attach knote to port and determine result
817 * If the filter requested direct message receipt,
818 * we may need to adjust the qos of the knote to
819 * reflect the requested and override qos of the
820 * first message in the queue.
821 */
822 ip_lock(port);
823 imq_lock(mqueue);
824
825 kn->kn_mqueue = mqueue;
826 if (port->ip_sync_link_state != PORT_SYNC_LINK_ANY) {
827 /*
828 * We're attaching a port that used to have an IMQ_KNOTE,
829 * clobber this state, we'll fixup its turnstile inheritor below.
830 */
831 ipc_port_adjust_sync_link_state_locked(port, PORT_SYNC_LINK_ANY, NULL);
832 }
833 filt_machport_link(mqueue, kn);
834
835 if ((first = ipc_kmsg_queue_first(&mqueue->imq_messages)) != IKM_NULL) {
836 result = FILTER_ACTIVE | filt_machport_adjust_qos(kn, first);
837 }
838
839 /*
840 * Update the port's turnstile inheritor
841 *
842 * Unlike filt_machportdetach(), we don't have to care about races for
843 * turnstile_workloop_pusher_info(): filt_machport_link() doesn't affect
844 * already pushing knotes, and if the current one becomes the new
845 * pusher, it'll only be visible when turnstile_workloop_pusher_info()
846 * returns.
847 */
848 send_turnstile = port_send_turnstile(port);
849 if (send_turnstile) {
850 turnstile_reference(send_turnstile);
851 ipc_port_send_update_inheritor(port, send_turnstile,
852 TURNSTILE_IMMEDIATE_UPDATE);
853
854 /*
855 * rdar://problem/48861190
856 *
857 * When a listener connection resumes a peer,
858 * updating the inheritor above has moved the push
859 * from the current thread to the workloop.
860 *
861 * However, we haven't told the workloop yet
862 * that it needs a thread request, and we risk
863 * to be preeempted as soon as we drop the space
864 * lock below.
865 *
866 * To avoid this disable preemption and let kevent
867 * reenable it after it takes the kqlock.
868 */
869 disable_preemption();
870 result |= FILTER_THREADREQ_NODEFEER;
871 }
872
873 imq_unlock(mqueue);
874 ip_unlock(port);
875
876 is_read_unlock(space);
877 if (send_turnstile) {
878 turnstile_update_inheritor_complete(send_turnstile,
879 TURNSTILE_INTERLOCK_NOT_HELD);
880 turnstile_deallocate_safe(send_turnstile);
881 }
882
883 error = 0;
884 } else {
885 is_read_unlock(space);
886 error = ENOTSUP;
887 }
888
889 out:
890 waitq_link_release(wq_link_id);
891
892 /* bail out on errors */
893 if (error) {
894 knote_set_error(kn, error);
895 return 0;
896 }
897
898 return result;
899 }
900
901 /* Validate imq_to_object implementation "works" */
902 _Static_assert(offsetof(struct ipc_pset, ips_messages) ==
903 offsetof(struct ipc_port, ip_messages),
904 "Make sure the mqueue aliases in both ports and psets");
905
906 static void
907 filt_machportdetach(
908 struct knote *kn)
909 {
910 ipc_mqueue_t mqueue = kn->kn_mqueue;
911 ipc_object_t object = imq_to_object(mqueue);
912 struct turnstile *send_turnstile = TURNSTILE_NULL;
913
914 filt_machport_turnstile_complete(kn);
915
916 imq_lock(mqueue);
917 if ((kn->kn_status & KN_VANISHED) || (kn->kn_flags & EV_EOF)) {
918 /*
919 * ipc_mqueue_changed() already unhooked this knote from the mqueue,
920 */
921 } else {
922 ipc_port_t port = IP_NULL;
923
924 /*
925 * When the knote being detached is the first one in the list,
926 * then unlinking the knote *and* updating the turnstile inheritor
927 * need to happen atomically with respect to the callers of
928 * turnstile_workloop_pusher_info().
929 *
930 * The caller of turnstile_workloop_pusher_info() will use the kq req
931 * lock (and hence the kqlock), so we just need to hold the kqlock too.
932 */
933 if (io_otype(object) == IOT_PORT) {
934 port = ip_object_to_port(object);
935 assert(port->ip_sync_link_state == PORT_SYNC_LINK_ANY);
936 if (kn == SLIST_FIRST(&mqueue->imq_klist)) {
937 send_turnstile = port_send_turnstile(port);
938 }
939 }
940
941 filt_machport_unlink(mqueue, kn);
942
943 if (send_turnstile) {
944 turnstile_reference(send_turnstile);
945 ipc_port_send_update_inheritor(port, send_turnstile,
946 TURNSTILE_IMMEDIATE_UPDATE);
947 }
948 }
949
950 /* Clear the knote pointer once the knote has been removed from turnstile */
951 kn->kn_mqueue = IMQ_NULL;
952 imq_unlock(mqueue);
953
954 if (send_turnstile) {
955 turnstile_update_inheritor_complete(send_turnstile,
956 TURNSTILE_INTERLOCK_NOT_HELD);
957 turnstile_deallocate(send_turnstile);
958 }
959
960 if (io_otype(object) == IOT_PORT_SET) {
961 /*
962 * Unlink the portset wait queue from knote/kqueue.
963 * JMM - Does this need to be atomic under the mq lock?
964 */
965 (void)knote_unlink_waitq(kn, &mqueue->imq_wait_queue);
966 }
967 io_release(object);
968 }
969
970 /*
971 * filt_machportevent - deliver events into the mach port filter
972 *
973 * Mach port message arrival events are currently only posted via the
974 * kqueue filter routine for ports. Port sets are marked stay-active
975 * and the wait queue code will break any kqueue waiters out to go
976 * poll the stay-queued knotes again.
977 *
978 * If there is a message at the head of the queue,
979 * we indicate that the knote should go active. If
980 * the message is to be direct-received, we adjust the
981 * QoS of the knote according the requested and override
982 * QoS of that first message.
983 */
984 static int
985 filt_machportevent(struct knote *kn, long hint __assert_only)
986 {
987 ipc_mqueue_t mqueue = kn->kn_mqueue;
988 ipc_kmsg_t first;
989 int result = 0;
990
991 /* mqueue locked by caller */
992 imq_held(mqueue);
993 assert(hint != NOTE_REVOKE);
994 if (imq_is_valid(mqueue)) {
995 assert(!imq_is_set(mqueue));
996 if ((first = ipc_kmsg_queue_first(&mqueue->imq_messages)) != IKM_NULL) {
997 result = FILTER_ACTIVE | filt_machport_adjust_qos(kn, first);
998 }
999 }
1000
1001 return result;
1002 }
1003
1004 static int
1005 filt_machporttouch(
1006 struct knote *kn,
1007 struct kevent_qos_s *kev)
1008 {
1009 ipc_mqueue_t mqueue = kn->kn_mqueue;
1010 ipc_kmsg_t first;
1011 int result = 0;
1012
1013 /* copy in new settings and save off new input fflags */
1014 kn->kn_sfflags = kev->fflags;
1015 kn->kn_ext[0] = kev->ext[0];
1016 kn->kn_ext[1] = kev->ext[1];
1017
1018 if (kev->flags & EV_ENABLE) {
1019 /*
1020 * If the knote is being enabled, make sure there's no lingering
1021 * IPC overrides from the previous message delivery.
1022 */
1023 filt_machport_turnstile_complete(kn);
1024 }
1025
1026 /*
1027 * If the mqueue is a valid port and there is a message
1028 * that will be direct-received from the knote, update
1029 * the knote qos based on the first message and trigger
1030 * the event. If there are no more messages, reset the
1031 * QoS to the value provided by the kevent.
1032 */
1033 imq_lock(mqueue);
1034 if (imq_is_valid(mqueue) && !imq_is_set(mqueue) &&
1035 (first = ipc_kmsg_queue_first(&mqueue->imq_messages)) != IKM_NULL) {
1036 result = FILTER_ACTIVE | filt_machport_adjust_qos(kn, first);
1037 } else if (kn->kn_sfflags & MACH_RCV_MSG) {
1038 result = FILTER_RESET_EVENT_QOS;
1039 }
1040 imq_unlock(mqueue);
1041
1042 return result;
1043 }
1044
1045 static int
1046 filt_machportprocess(struct knote *kn, struct kevent_qos_s *kev)
1047 {
1048 ipc_mqueue_t mqueue = kn->kn_mqueue;
1049 ipc_object_t object = imq_to_object(mqueue);
1050 thread_t self = current_thread();
1051 kevent_ctx_t kectx = NULL;
1052
1053 wait_result_t wresult;
1054 mach_msg_option_t option;
1055 mach_vm_address_t addr;
1056 mach_msg_size_t size;
1057
1058 /* Capture current state */
1059 knote_fill_kevent(kn, kev, MACH_PORT_NULL);
1060 kev->ext[3] = 0; /* hide our port reference from userspace */
1061
1062 /* If already deallocated/moved return one last EOF event */
1063 if (kev->flags & EV_EOF) {
1064 return FILTER_ACTIVE | FILTER_RESET_EVENT_QOS;
1065 }
1066
1067 /*
1068 * Only honor supported receive options. If no options are
1069 * provided, just force a MACH_RCV_TOO_LARGE to detect the
1070 * name of the port and sizeof the waiting message.
1071 */
1072 option = kn->kn_sfflags & (MACH_RCV_MSG | MACH_RCV_LARGE | MACH_RCV_LARGE_IDENTITY |
1073 MACH_RCV_TRAILER_MASK | MACH_RCV_VOUCHER | MACH_MSG_STRICT_REPLY);
1074
1075 if (option & MACH_RCV_MSG) {
1076 addr = (mach_vm_address_t) kn->kn_ext[0];
1077 size = (mach_msg_size_t) kn->kn_ext[1];
1078
1079 /*
1080 * If the kevent didn't specify a buffer and length, carve a buffer
1081 * from the filter processing data according to the flags.
1082 */
1083 if (size == 0) {
1084 kectx = kevent_get_context(self);
1085 addr = (mach_vm_address_t)kectx->kec_data_out;
1086 size = (mach_msg_size_t)kectx->kec_data_resid;
1087 option |= (MACH_RCV_LARGE | MACH_RCV_LARGE_IDENTITY);
1088 if (kectx->kec_process_flags & KEVENT_FLAG_STACK_DATA) {
1089 option |= MACH_RCV_STACK;
1090 }
1091 }
1092 } else {
1093 /* just detect the port name (if a set) and size of the first message */
1094 option = MACH_RCV_LARGE;
1095 addr = 0;
1096 size = 0;
1097 }
1098
1099 imq_lock(mqueue);
1100
1101 /* just use the reference from here on out */
1102 io_reference(object);
1103
1104 /*
1105 * Set up to receive a message or the notification of a
1106 * too large message. But never allow this call to wait.
1107 * If the user provided aditional options, like trailer
1108 * options, pass those through here. But we don't support
1109 * scatter lists through this interface.
1110 */
1111 self->ith_object = object;
1112 self->ith_msg_addr = addr;
1113 self->ith_rsize = size;
1114 self->ith_msize = 0;
1115 self->ith_option = option;
1116 self->ith_receiver_name = MACH_PORT_NULL;
1117 self->ith_continuation = NULL;
1118 option |= MACH_RCV_TIMEOUT; // never wait
1119 self->ith_state = MACH_RCV_IN_PROGRESS;
1120 self->ith_knote = kn;
1121
1122 wresult = ipc_mqueue_receive_on_thread(
1123 mqueue,
1124 option,
1125 size, /* max_size */
1126 0, /* immediate timeout */
1127 THREAD_INTERRUPTIBLE,
1128 self);
1129 /* mqueue unlocked */
1130
1131 /*
1132 * If we timed out, or the process is exiting, just release the
1133 * reference on the ipc_object and return zero.
1134 */
1135 if (wresult == THREAD_RESTART || self->ith_state == MACH_RCV_TIMED_OUT) {
1136 assert(self->turnstile != TURNSTILE_NULL);
1137 io_release(object);
1138 return 0;
1139 }
1140
1141 assert(wresult == THREAD_NOT_WAITING);
1142 assert(self->ith_state != MACH_RCV_IN_PROGRESS);
1143
1144 /*
1145 * If we weren't attempting to receive a message
1146 * directly, we need to return the port name in
1147 * the kevent structure.
1148 */
1149 if ((option & MACH_RCV_MSG) != MACH_RCV_MSG) {
1150 assert(self->ith_state == MACH_RCV_TOO_LARGE);
1151 assert(self->ith_kmsg == IKM_NULL);
1152 kev->data = self->ith_receiver_name;
1153 io_release(object);
1154 return FILTER_ACTIVE;
1155 }
1156
1157 /*
1158 * Attempt to receive the message directly, returning
1159 * the results in the fflags field.
1160 */
1161 kev->fflags = mach_msg_receive_results(&size);
1162
1163 /* kmsg and object reference consumed */
1164
1165 /*
1166 * if the user asked for the identity of ports containing a
1167 * a too-large message, return it in the data field (as we
1168 * do for messages we didn't try to receive).
1169 */
1170 if (kev->fflags == MACH_RCV_TOO_LARGE) {
1171 kev->ext[1] = self->ith_msize;
1172 if (option & MACH_RCV_LARGE_IDENTITY) {
1173 kev->data = self->ith_receiver_name;
1174 } else {
1175 kev->data = MACH_PORT_NULL;
1176 }
1177 } else {
1178 kev->ext[1] = size;
1179 kev->data = MACH_PORT_NULL;
1180 }
1181
1182 /*
1183 * If we used a data buffer carved out from the filt_process data,
1184 * store the address used in the knote and adjust the residual and
1185 * other parameters for future use.
1186 */
1187 if (kectx) {
1188 assert(kectx->kec_data_resid >= size);
1189 kectx->kec_data_resid -= size;
1190 if ((kectx->kec_process_flags & KEVENT_FLAG_STACK_DATA) == 0) {
1191 kev->ext[0] = kectx->kec_data_out;
1192 kectx->kec_data_out += size;
1193 } else {
1194 assert(option & MACH_RCV_STACK);
1195 kev->ext[0] = kectx->kec_data_out + kectx->kec_data_resid;
1196 }
1197 }
1198
1199 /*
1200 * Apply message-based QoS values to output kevent as prescribed.
1201 * The kev->ext[2] field gets (msg-qos << 32) | (override-qos).
1202 *
1203 * The mach_msg_receive_results() call saved off the message
1204 * QoS values in the continuation save area on successful receive.
1205 */
1206 if (kev->fflags == MACH_MSG_SUCCESS) {
1207 kev->ext[2] = ((uint64_t)self->ith_ppriority << 32) |
1208 _pthread_priority_make_from_thread_qos(self->ith_qos_override, 0, 0);
1209 }
1210
1211 return FILTER_ACTIVE;
1212 }
1213
1214 /*
1215 * Peek to see if the message queue associated with the knote has any
1216 * events. This pre-hook is called when a filter uses the stay-
1217 * on-queue mechanism (as the knote_link_waitq mechanism does for
1218 * portsets) and someone calls select() against the containing kqueue.
1219 *
1220 * Just peek at the pre-post status of the portset's wait queue
1221 * to determine if it has anything interesting. We can do it
1222 * without holding the lock, as it is just a snapshot in time
1223 * (if this is used as part of really waiting for events, we
1224 * will catch changes in this status when the event gets posted
1225 * up to the knote's kqueue).
1226 */
1227 static int
1228 filt_machportpeek(struct knote *kn)
1229 {
1230 ipc_mqueue_t mqueue = kn->kn_mqueue;
1231
1232 return ipc_mqueue_set_peek(mqueue) ? FILTER_ACTIVE : 0;
1233 }
1234
1235 SECURITY_READ_ONLY_EARLY(struct filterops) machport_filtops = {
1236 .f_adjusts_qos = true,
1237 .f_extended_codes = true,
1238 .f_attach = filt_machportattach,
1239 .f_detach = filt_machportdetach,
1240 .f_event = filt_machportevent,
1241 .f_touch = filt_machporttouch,
1242 .f_process = filt_machportprocess,
1243 .f_peek = filt_machportpeek,
1244 };