]> git.saurik.com Git - apple/xnu.git/blob - osfmk/ipc/ipc_pset.c
xnu-6153.61.1.tar.gz
[apple/xnu.git] / osfmk / ipc / ipc_pset.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: ipc/ipc_pset.c
60 * Author: Rich Draves
61 * Date: 1989
62 *
63 * Functions to manipulate IPC port sets.
64 */
65
66 #include <mach/port.h>
67 #include <mach/kern_return.h>
68 #include <mach/message.h>
69 #include <ipc/ipc_mqueue.h>
70 #include <ipc/ipc_object.h>
71 #include <ipc/ipc_pset.h>
72 #include <ipc/ipc_right.h>
73 #include <ipc/ipc_space.h>
74 #include <ipc/ipc_port.h>
75
76 #include <kern/kern_types.h>
77
78 #include <vm/vm_map.h>
79 #include <libkern/section_keywords.h>
80
81 /*
82 * Routine: ipc_pset_alloc
83 * Purpose:
84 * Allocate a port set.
85 * Conditions:
86 * Nothing locked. If successful, the port set is returned
87 * locked. (The caller doesn't have a reference.)
88 * Returns:
89 * KERN_SUCCESS The port set is allocated.
90 * KERN_INVALID_TASK The space is dead.
91 * KERN_NO_SPACE No room for an entry in the space.
92 * KERN_RESOURCE_SHORTAGE Couldn't allocate memory.
93 */
94
95 kern_return_t
96 ipc_pset_alloc(
97 ipc_space_t space,
98 mach_port_name_t *namep,
99 ipc_pset_t *psetp)
100 {
101 ipc_pset_t pset;
102 mach_port_name_t name;
103 kern_return_t kr;
104
105 kr = ipc_object_alloc(space, IOT_PORT_SET,
106 MACH_PORT_TYPE_PORT_SET, 0,
107 &name, (ipc_object_t *) &pset);
108 if (kr != KERN_SUCCESS) {
109 return kr;
110 }
111 /* pset and space are locked */
112
113 ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET);
114 is_write_unlock(space);
115
116 *namep = name;
117 *psetp = pset;
118 return KERN_SUCCESS;
119 }
120
121 /*
122 * Routine: ipc_pset_alloc_name
123 * Purpose:
124 * Allocate a port set, with a specific name.
125 * Conditions:
126 * Nothing locked. If successful, the port set is returned
127 * locked. (The caller doesn't have a reference.)
128 * Returns:
129 * KERN_SUCCESS The port set is allocated.
130 * KERN_INVALID_TASK The space is dead.
131 * KERN_NAME_EXISTS The name already denotes a right.
132 * KERN_RESOURCE_SHORTAGE Couldn't allocate memory.
133 */
134
135 kern_return_t
136 ipc_pset_alloc_name(
137 ipc_space_t space,
138 mach_port_name_t name,
139 ipc_pset_t *psetp)
140 {
141 ipc_pset_t pset;
142 kern_return_t kr;
143
144 kr = ipc_object_alloc_name(space, IOT_PORT_SET,
145 MACH_PORT_TYPE_PORT_SET, 0,
146 name, (ipc_object_t *) &pset);
147 if (kr != KERN_SUCCESS) {
148 return kr;
149 }
150 /* pset is locked */
151
152 ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET);
153
154 *psetp = pset;
155 return KERN_SUCCESS;
156 }
157
158
159 /*
160 * Routine: ipc_pset_alloc_special
161 * Purpose:
162 * Allocate a port set in a special space.
163 * The new port set is returned with one ref.
164 * If unsuccessful, IPS_NULL is returned.
165 * Conditions:
166 * Nothing locked.
167 */
168 ipc_pset_t
169 ipc_pset_alloc_special(
170 __assert_only ipc_space_t space)
171 {
172 ipc_pset_t pset;
173
174 assert(space != IS_NULL);
175 assert(space->is_table == IE_NULL);
176 assert(!is_active(space));
177
178 pset = ips_object_to_pset(io_alloc(IOT_PORT_SET));
179 if (pset == IPS_NULL) {
180 return IPS_NULL;
181 }
182
183 bzero((char *)pset, sizeof(*pset));
184
185 io_lock_init(ips_to_object(pset));
186 pset->ips_references = 1;
187 pset->ips_object.io_bits = io_makebits(TRUE, IOT_PORT_SET, 0);
188
189 ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET);
190
191 return pset;
192 }
193
194
195 /*
196 * Routine: ipc_pset_member
197 * Purpose:
198 * Checks to see if a port is a member of a pset
199 * Conditions:
200 * Both port and port set are locked.
201 * The port must be active.
202 */
203 boolean_t
204 ipc_pset_member(
205 ipc_pset_t pset,
206 ipc_port_t port)
207 {
208 require_ip_active(port);
209
210 return ipc_mqueue_member(&port->ip_messages, &pset->ips_messages);
211 }
212
213
214 /*
215 * Routine: ipc_pset_add
216 * Purpose:
217 * Puts a port into a port set.
218 * Conditions:
219 * Both port and port set are locked and active.
220 * The owner of the port set is also receiver for the port.
221 */
222
223 kern_return_t
224 ipc_pset_add(
225 ipc_pset_t pset,
226 ipc_port_t port,
227 uint64_t *reserved_link,
228 uint64_t *reserved_prepost)
229 {
230 kern_return_t kr;
231
232 assert(ips_active(pset));
233 require_ip_active(port);
234
235 kr = ipc_mqueue_add(&port->ip_messages, &pset->ips_messages,
236 reserved_link, reserved_prepost);
237
238 return kr;
239 }
240
241
242
243 /*
244 * Routine: ipc_pset_remove
245 * Purpose:
246 * Removes a port from a port set.
247 * The port set loses a reference.
248 * Conditions:
249 * Both port and port set are locked.
250 * The port must be active.
251 */
252
253 kern_return_t
254 ipc_pset_remove(
255 ipc_pset_t pset,
256 ipc_port_t port)
257 {
258 kern_return_t kr;
259 require_ip_active(port);
260
261 if (port->ip_in_pset == 0) {
262 return KERN_NOT_IN_SET;
263 }
264
265 kr = ipc_mqueue_remove(&port->ip_messages, &pset->ips_messages);
266
267 return kr;
268 }
269
270 /*
271 * Routine: ipc_pset_lazy_allocate
272 * Purpose:
273 * lazily initialize the wqset of a port set.
274 * Conditions:
275 * Nothing locked.
276 */
277
278 kern_return_t
279 ipc_pset_lazy_allocate(
280 ipc_space_t space,
281 mach_port_name_t psname)
282 {
283 kern_return_t kr;
284 ipc_entry_t entry;
285 ipc_object_t psobj;
286 ipc_pset_t pset;
287
288 kr = ipc_right_lookup_read(space, psname, &entry);
289 if (kr != KERN_SUCCESS) {
290 return kr;
291 }
292
293 /* space is read-locked and active */
294 if ((entry->ie_bits & MACH_PORT_TYPE_PORT_SET) == 0) {
295 is_read_unlock(space);
296 kr = KERN_INVALID_RIGHT;
297 return kr;
298 }
299
300 psobj = entry->ie_object;
301 pset = ips_object_to_pset(psobj);
302 assert(pset != NULL);
303 ipc_mqueue_t set_mqueue = &pset->ips_messages;
304 struct waitq_set *wqset = &set_mqueue->imq_set_queue;
305
306 io_reference(psobj);
307 is_read_unlock(space);
308
309 /*
310 * lazily initialize the wqset to avoid
311 * possible allocation while linking
312 * under spinlocks.
313 */
314 waitq_set_lazy_init_link(wqset);
315 io_release(psobj);
316
317 return KERN_SUCCESS;
318 }
319
320 /*
321 * Routine: ipc_pset_remove_from_all
322 * Purpose:
323 * Removes a port from all it's port sets.
324 * Conditions:
325 * port is locked and active.
326 */
327
328 kern_return_t
329 ipc_pset_remove_from_all(
330 ipc_port_t port)
331 {
332 if (port->ip_in_pset == 0) {
333 return KERN_NOT_IN_SET;
334 }
335
336 /*
337 * Remove the port's mqueue from all sets
338 */
339 ipc_mqueue_remove_from_all(&port->ip_messages);
340 return KERN_SUCCESS;
341 }
342
343
344 /*
345 * Routine: ipc_pset_destroy
346 * Purpose:
347 * Destroys a port_set.
348 * Conditions:
349 * The port_set is locked and alive.
350 * The caller has a reference, which is consumed.
351 * Afterwards, the port_set is unlocked and dead.
352 */
353
354 void
355 ipc_pset_destroy(
356 ipc_space_t space,
357 ipc_pset_t pset)
358 {
359 assert(ips_active(pset));
360
361 pset->ips_object.io_bits &= ~IO_BITS_ACTIVE;
362
363 /*
364 * remove all the member message queues
365 * AND remove this message queue from any containing sets
366 */
367 ipc_mqueue_remove_all(&pset->ips_messages);
368
369 /*
370 * Set all waiters on the portset running to
371 * discover the change.
372 */
373 imq_lock(&pset->ips_messages);
374 ipc_mqueue_changed(space, &pset->ips_messages);
375 imq_unlock(&pset->ips_messages);
376
377 ipc_mqueue_deinit(&pset->ips_messages);
378
379 ips_unlock(pset);
380 ips_release(pset); /* consume the ref our caller gave us */
381 }
382
383 /*
384 * Kqueue EVFILT_MACHPORT support
385 *
386 * - kn_mqueue points to the monitored mqueue
387 *
388 * - (in/out) ext[0] holds a mach_vm_address_t to a userspace buffer
389 * that can be used to direct-deliver messages when
390 * MACH_RCV_MSG is set in kn_sfflags
391 *
392 * - (in/out) ext[1] holds a mach_msg_size_t representing the size
393 * of the userspace buffer held in ext[0].
394 *
395 * - (out) ext[2] is used to deliver qos information
396 * about the send queue to userspace.
397 *
398 * - (abused) ext[3] is used in kernel to hold a reference to the first port
399 * with a turnstile that participate to sync IPC override.
400 *
401 * - kn_hook is optionally a "knote" turnstile. It is used as the inheritor
402 * of turnstiles for rights copied out as part of direct message delivery
403 * when they can participate to sync IPC override.
404 *
405 * It is used to atomically neuter the sync IPC override when the knote is
406 * re-enabled.
407 *
408 */
409
410 #include <sys/event.h>
411 #include <sys/errno.h>
412
413 static int
414 filt_machport_adjust_qos(struct knote *kn, ipc_kmsg_t first)
415 {
416 if (kn->kn_sfflags & MACH_RCV_MSG) {
417 int qos = _pthread_priority_thread_qos(first->ikm_qos_override);
418 return FILTER_ADJUST_EVENT_QOS(qos);
419 }
420 return 0;
421 }
422
423 struct turnstile *
424 filt_ipc_kqueue_turnstile(struct knote *kn)
425 {
426 assert(kn->kn_filter == EVFILT_MACHPORT || kn->kn_filter == EVFILT_WORKLOOP);
427 return kqueue_turnstile(knote_get_kq(kn));
428 }
429
430 bool
431 filt_machport_kqueue_has_turnstile(struct knote *kn)
432 {
433 assert(kn->kn_filter == EVFILT_MACHPORT);
434 return ((kn->kn_sfflags & MACH_RCV_MSG) || (kn->kn_sfflags & MACH_RCV_SYNC_PEEK))
435 && (kn->kn_flags & EV_DISPATCH);
436 }
437
438 /*
439 * Stashes a port that participate to sync IPC override until the knote
440 * is being re-enabled.
441 *
442 * It returns:
443 * - the turnstile to use as an inheritor for the stashed port
444 * - the kind of stash that happened as PORT_SYNC_* value among:
445 * o not stashed (no sync IPC support)
446 * o stashed in the knote (in kn_ext[3])
447 * o to be hooked to the kn_hook knote
448 */
449 struct turnstile *
450 filt_machport_stash_port(struct knote *kn, ipc_port_t port, int *link)
451 {
452 struct turnstile *ts = TURNSTILE_NULL;
453
454 if (kn->kn_filter == EVFILT_WORKLOOP) {
455 assert(kn->kn_mqueue == NULL);
456 kn->kn_mqueue = &port->ip_messages;
457 ip_reference(port);
458 if (link) {
459 *link = PORT_SYNC_LINK_WORKLOOP_KNOTE;
460 }
461 ts = filt_ipc_kqueue_turnstile(kn);
462 } else if (!filt_machport_kqueue_has_turnstile(kn)) {
463 if (link) {
464 *link = PORT_SYNC_LINK_NO_LINKAGE;
465 }
466 } else if (kn->kn_ext[3] == 0) {
467 ip_reference(port);
468 kn->kn_ext[3] = (uintptr_t)port;
469 ts = filt_ipc_kqueue_turnstile(kn);
470 if (link) {
471 *link = PORT_SYNC_LINK_WORKLOOP_KNOTE;
472 }
473 } else {
474 ts = (struct turnstile *)kn->kn_hook;
475 if (link) {
476 *link = PORT_SYNC_LINK_WORKLOOP_STASH;
477 }
478 }
479
480 return ts;
481 }
482
483 /*
484 * Lazily prepare a turnstile so that filt_machport_stash_port()
485 * can be called with the mqueue lock held.
486 *
487 * It will allocate a turnstile in kn_hook if:
488 * - the knote supports sync IPC override,
489 * - we already stashed a port in kn_ext[3],
490 * - the object that will be copied out has a chance to ask to be stashed.
491 *
492 * It is setup so that its inheritor is the workloop turnstile that has been
493 * allocated when this knote was attached.
494 */
495 void
496 filt_machport_turnstile_prepare_lazily(
497 struct knote *kn,
498 mach_msg_type_name_t msgt_name,
499 ipc_port_t port)
500 {
501 /* This is called from within filt_machportprocess */
502 assert((kn->kn_status & KN_SUPPRESSED) && (kn->kn_status & KN_LOCKED));
503
504 if (!filt_machport_kqueue_has_turnstile(kn)) {
505 return;
506 }
507
508 if (kn->kn_ext[3] == 0 || kn->kn_hook) {
509 return;
510 }
511
512 struct turnstile *ts = filt_ipc_kqueue_turnstile(kn);
513 if ((msgt_name == MACH_MSG_TYPE_PORT_SEND_ONCE && port->ip_specialreply) ||
514 (msgt_name == MACH_MSG_TYPE_PORT_RECEIVE)) {
515 struct turnstile *kn_ts = turnstile_alloc();
516 kn_ts = turnstile_prepare((uintptr_t)kn,
517 (struct turnstile **)&kn->kn_hook, kn_ts, TURNSTILE_KNOTE);
518 turnstile_update_inheritor(kn_ts, ts,
519 TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_TURNSTILE);
520 turnstile_cleanup();
521 }
522 }
523
524 static void
525 filt_machport_turnstile_complete_port(struct knote *kn, ipc_port_t port,
526 ipc_mqueue_t mqueue)
527 {
528 struct turnstile *ts = TURNSTILE_NULL;
529
530 ip_lock(port);
531 if (port->ip_specialreply) {
532 /*
533 * If the reply has been sent to the special reply port already,
534 * then the special reply port may already be reused to do something
535 * entirely different.
536 *
537 * However, the only reason for it to still point to this knote is
538 * that it's still waiting for a reply, so when this is the case,
539 * neuter the linkage.
540 */
541 if (port->ip_sync_link_state == PORT_SYNC_LINK_WORKLOOP_KNOTE &&
542 port->ip_sync_inheritor_knote == kn) {
543 ipc_port_adjust_special_reply_port_locked(port, NULL,
544 (IPC_PORT_ADJUST_SR_NONE | IPC_PORT_ADJUST_SR_ENABLE_EVENT), FALSE);
545 } else {
546 ip_unlock(port);
547 }
548 } else {
549 /*
550 * For receive rights, if their IMQ_KNOTE() is still this
551 * knote, then sever the link.
552 */
553 imq_lock(mqueue);
554 if (port->ip_sync_link_state == PORT_SYNC_LINK_WORKLOOP_KNOTE &&
555 mqueue->imq_inheritor_knote == kn) {
556 ipc_port_adjust_sync_link_state_locked(port, PORT_SYNC_LINK_ANY, NULL);
557 ts = port_send_turnstile(port);
558 }
559 if (ts) {
560 turnstile_reference(ts);
561 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL,
562 TURNSTILE_IMMEDIATE_UPDATE);
563 }
564 imq_unlock(mqueue);
565 ip_unlock(port);
566
567 if (ts) {
568 turnstile_update_inheritor_complete(ts,
569 TURNSTILE_INTERLOCK_NOT_HELD);
570 turnstile_deallocate(ts);
571 }
572 }
573
574 ip_release(port);
575 }
576
577 void
578 filt_wldetach_sync_ipc(struct knote *kn)
579 {
580 ipc_mqueue_t mqueue = kn->kn_mqueue;
581 filt_machport_turnstile_complete_port(kn, ip_from_mq(mqueue), mqueue);
582 kn->kn_mqueue = NULL;
583 }
584
585 /*
586 * Other half of filt_machport_turnstile_prepare_lazily()
587 *
588 * This is serialized by the knote state machine.
589 */
590 static void
591 filt_machport_turnstile_complete(struct knote *kn)
592 {
593 if (kn->kn_ext[3]) {
594 ipc_port_t port = (ipc_port_t)kn->kn_ext[3];
595 filt_machport_turnstile_complete_port(kn, port, &port->ip_messages);
596 kn->kn_ext[3] = 0;
597 }
598
599 if (kn->kn_hook) {
600 struct turnstile *ts = kn->kn_hook;
601
602 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL,
603 TURNSTILE_IMMEDIATE_UPDATE);
604 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
605
606 turnstile_complete((uintptr_t)kn, (struct turnstile **)&kn->kn_hook, &ts, TURNSTILE_KNOTE);
607 turnstile_cleanup();
608
609 assert(ts);
610 turnstile_deallocate(ts);
611 }
612 }
613
614 static void
615 filt_machport_link(ipc_mqueue_t mqueue, struct knote *kn)
616 {
617 struct knote *hd = SLIST_FIRST(&mqueue->imq_klist);
618
619 if (hd && filt_machport_kqueue_has_turnstile(kn)) {
620 SLIST_INSERT_AFTER(hd, kn, kn_selnext);
621 } else {
622 SLIST_INSERT_HEAD(&mqueue->imq_klist, kn, kn_selnext);
623 }
624 }
625
626 static void
627 filt_machport_unlink(ipc_mqueue_t mqueue, struct knote *kn)
628 {
629 struct knote **knprev;
630
631 KNOTE_DETACH(&mqueue->imq_klist, kn);
632
633 /* make sure the first knote is a knote we can push on */
634 SLIST_FOREACH_PREVPTR(kn, knprev, &mqueue->imq_klist, kn_selnext) {
635 if (filt_machport_kqueue_has_turnstile(kn)) {
636 *knprev = SLIST_NEXT(kn, kn_selnext);
637 SLIST_INSERT_HEAD(&mqueue->imq_klist, kn, kn_selnext);
638 break;
639 }
640 }
641 }
642
643 int
644 filt_wlattach_sync_ipc(struct knote *kn)
645 {
646 mach_port_name_t name = (mach_port_name_t)kn->kn_id;
647 ipc_space_t space = current_space();
648 ipc_entry_t entry;
649 ipc_port_t port = IP_NULL;
650 int error = 0;
651
652 if (ipc_right_lookup_read(space, name, &entry) != KERN_SUCCESS) {
653 return ENOENT;
654 }
655
656 /* space is read-locked */
657
658 if (entry->ie_bits & MACH_PORT_TYPE_RECEIVE) {
659 port = ip_object_to_port(entry->ie_object);
660 if (port->ip_specialreply) {
661 error = ENOENT;
662 }
663 } else if (entry->ie_bits & MACH_PORT_TYPE_SEND_ONCE) {
664 port = ip_object_to_port(entry->ie_object);
665 if (!port->ip_specialreply) {
666 error = ENOENT;
667 }
668 } else {
669 error = ENOENT;
670 }
671 if (error) {
672 is_read_unlock(space);
673 return error;
674 }
675
676 ip_lock(port);
677 is_read_unlock(space);
678
679 if (port->ip_sync_link_state == PORT_SYNC_LINK_ANY) {
680 ip_unlock(port);
681 /*
682 * We cannot start a sync IPC inheritance chain, only further one
683 * Note: this can also happen if the inheritance chain broke
684 * because the original requestor died.
685 */
686 return ENOENT;
687 }
688
689 if (port->ip_specialreply) {
690 ipc_port_adjust_special_reply_port_locked(port, kn,
691 IPC_PORT_ADJUST_SR_LINK_WORKLOOP, FALSE);
692 } else {
693 ipc_port_adjust_port_locked(port, kn, FALSE);
694 }
695
696 /* make sure the port was stashed */
697 assert(kn->kn_mqueue == &port->ip_messages);
698
699 /* port has been unlocked by ipc_port_adjust_* */
700
701 return 0;
702 }
703
704 static int
705 filt_machportattach(
706 struct knote *kn,
707 __unused struct kevent_qos_s *kev)
708 {
709 mach_port_name_t name = (mach_port_name_t)kn->kn_id;
710 uint64_t wq_link_id = waitq_link_reserve(NULL);
711 ipc_space_t space = current_space();
712 ipc_kmsg_t first;
713 struct turnstile *send_turnstile = TURNSTILE_NULL;
714
715 int error;
716 int result = 0;
717 kern_return_t kr;
718 ipc_entry_t entry;
719 ipc_mqueue_t mqueue;
720
721 kn->kn_flags &= ~EV_EOF;
722 kn->kn_ext[3] = 0;
723
724 if (filt_machport_kqueue_has_turnstile(kn)) {
725 /*
726 * If the filter is likely to support sync IPC override,
727 * and it happens to be attaching to a workloop,
728 * make sure the workloop has an allocated turnstile.
729 */
730 kqueue_alloc_turnstile(knote_get_kq(kn));
731 }
732
733 lookup_again:
734 kr = ipc_right_lookup_read(space, name, &entry);
735
736 if (kr != KERN_SUCCESS) {
737 error = ENOENT;
738 goto out;
739 }
740
741 /* space is read-locked and active */
742
743 if ((entry->ie_bits & MACH_PORT_TYPE_PORT_SET) &&
744 knote_link_waitqset_should_lazy_alloc(kn)) {
745 is_read_unlock(space);
746
747 /*
748 * We need to link the portset of the kn,
749 * to insure that the link is allocated before taking
750 * any spinlocks.
751 *
752 * Because we have to drop the space lock so that
753 * knote_link_waitqset_lazy_alloc() can allocate memory,
754 * we will need to redo the lookup.
755 */
756 knote_link_waitqset_lazy_alloc(kn);
757 goto lookup_again;
758 }
759
760 if (entry->ie_bits & MACH_PORT_TYPE_PORT_SET) {
761 ipc_pset_t pset;
762
763 pset = ips_object_to_pset(entry->ie_object);
764 mqueue = &pset->ips_messages;
765 ips_reference(pset);
766
767 imq_lock(mqueue);
768 kn->kn_mqueue = mqueue;
769
770 /*
771 * Bind the portset wait queue directly to knote/kqueue.
772 * This allows us to just use wait_queue foo to effect a wakeup,
773 * rather than having to call knote() from the Mach code on each
774 * message. We still attach the knote to the mqueue klist for
775 * NOTE_REVOKE purposes only.
776 */
777 error = knote_link_waitq(kn, &mqueue->imq_wait_queue, &wq_link_id);
778 if (!error) {
779 filt_machport_link(mqueue, kn);
780 imq_unlock(mqueue);
781 } else {
782 kn->kn_mqueue = IMQ_NULL;
783 imq_unlock(mqueue);
784 ips_release(pset);
785 }
786
787 is_read_unlock(space);
788
789 /*
790 * linked knotes are marked stay-active and therefore don't
791 * need an indication of their fired state to be returned
792 * from the attach operation.
793 */
794 } else if (entry->ie_bits & MACH_PORT_TYPE_RECEIVE) {
795 ipc_port_t port = ip_object_to_port(entry->ie_object);
796
797 if (port->ip_specialreply) {
798 /*
799 * Registering for kevents on special reply ports
800 * isn't supported for two reasons:
801 *
802 * 1. it really makes very little sense for a port that
803 * is supposed to be used synchronously
804 *
805 * 2. their mqueue's imq_klist field will be used to
806 * store the receive turnstile, so we can't possibly
807 * attach them anyway.
808 */
809 is_read_unlock(space);
810 error = ENOTSUP;
811 goto out;
812 }
813
814 mqueue = &port->ip_messages;
815 ip_reference(port);
816
817 /*
818 * attach knote to port and determine result
819 * If the filter requested direct message receipt,
820 * we may need to adjust the qos of the knote to
821 * reflect the requested and override qos of the
822 * first message in the queue.
823 */
824 ip_lock(port);
825 imq_lock(mqueue);
826
827 kn->kn_mqueue = mqueue;
828 if (port->ip_sync_link_state != PORT_SYNC_LINK_ANY) {
829 /*
830 * We're attaching a port that used to have an IMQ_KNOTE,
831 * clobber this state, we'll fixup its turnstile inheritor below.
832 */
833 ipc_port_adjust_sync_link_state_locked(port, PORT_SYNC_LINK_ANY, NULL);
834 }
835 filt_machport_link(mqueue, kn);
836
837 if ((first = ipc_kmsg_queue_first(&mqueue->imq_messages)) != IKM_NULL) {
838 result = FILTER_ACTIVE | filt_machport_adjust_qos(kn, first);
839 }
840
841 /*
842 * Update the port's turnstile inheritor
843 *
844 * Unlike filt_machportdetach(), we don't have to care about races for
845 * turnstile_workloop_pusher_info(): filt_machport_link() doesn't affect
846 * already pushing knotes, and if the current one becomes the new
847 * pusher, it'll only be visible when turnstile_workloop_pusher_info()
848 * returns.
849 */
850 send_turnstile = port_send_turnstile(port);
851 if (send_turnstile) {
852 turnstile_reference(send_turnstile);
853 ipc_port_send_update_inheritor(port, send_turnstile,
854 TURNSTILE_IMMEDIATE_UPDATE);
855
856 /*
857 * rdar://problem/48861190
858 *
859 * When a listener connection resumes a peer,
860 * updating the inheritor above has moved the push
861 * from the current thread to the workloop.
862 *
863 * However, we haven't told the workloop yet
864 * that it needs a thread request, and we risk
865 * to be preeempted as soon as we drop the space
866 * lock below.
867 *
868 * To avoid this disable preemption and let kevent
869 * reenable it after it takes the kqlock.
870 */
871 disable_preemption();
872 result |= FILTER_THREADREQ_NODEFEER;
873 }
874
875 imq_unlock(mqueue);
876 ip_unlock(port);
877
878 is_read_unlock(space);
879 if (send_turnstile) {
880 turnstile_update_inheritor_complete(send_turnstile,
881 TURNSTILE_INTERLOCK_NOT_HELD);
882 turnstile_deallocate_safe(send_turnstile);
883 }
884
885 error = 0;
886 } else {
887 is_read_unlock(space);
888 error = ENOTSUP;
889 }
890
891 out:
892 waitq_link_release(wq_link_id);
893
894 /* bail out on errors */
895 if (error) {
896 knote_set_error(kn, error);
897 return 0;
898 }
899
900 return result;
901 }
902
903 /* Validate imq_to_object implementation "works" */
904 _Static_assert(offsetof(struct ipc_pset, ips_messages) ==
905 offsetof(struct ipc_port, ip_messages),
906 "Make sure the mqueue aliases in both ports and psets");
907
908 static void
909 filt_machportdetach(
910 struct knote *kn)
911 {
912 ipc_mqueue_t mqueue = kn->kn_mqueue;
913 ipc_object_t object = imq_to_object(mqueue);
914 struct turnstile *send_turnstile = TURNSTILE_NULL;
915
916 filt_machport_turnstile_complete(kn);
917
918 imq_lock(mqueue);
919 if ((kn->kn_status & KN_VANISHED) || (kn->kn_flags & EV_EOF)) {
920 /*
921 * ipc_mqueue_changed() already unhooked this knote from the mqueue,
922 */
923 } else {
924 ipc_port_t port = IP_NULL;
925
926 /*
927 * When the knote being detached is the first one in the list,
928 * then unlinking the knote *and* updating the turnstile inheritor
929 * need to happen atomically with respect to the callers of
930 * turnstile_workloop_pusher_info().
931 *
932 * The caller of turnstile_workloop_pusher_info() will use the kq req
933 * lock (and hence the kqlock), so we just need to hold the kqlock too.
934 */
935 if (io_otype(object) == IOT_PORT) {
936 port = ip_object_to_port(object);
937 assert(port->ip_sync_link_state == PORT_SYNC_LINK_ANY);
938 if (kn == SLIST_FIRST(&mqueue->imq_klist)) {
939 send_turnstile = port_send_turnstile(port);
940 }
941 }
942
943 filt_machport_unlink(mqueue, kn);
944
945 if (send_turnstile) {
946 turnstile_reference(send_turnstile);
947 ipc_port_send_update_inheritor(port, send_turnstile,
948 TURNSTILE_IMMEDIATE_UPDATE);
949 }
950 }
951
952 /* Clear the knote pointer once the knote has been removed from turnstile */
953 kn->kn_mqueue = IMQ_NULL;
954 imq_unlock(mqueue);
955
956 if (send_turnstile) {
957 turnstile_update_inheritor_complete(send_turnstile,
958 TURNSTILE_INTERLOCK_NOT_HELD);
959 turnstile_deallocate(send_turnstile);
960 }
961
962 if (io_otype(object) == IOT_PORT_SET) {
963 /*
964 * Unlink the portset wait queue from knote/kqueue.
965 * JMM - Does this need to be atomic under the mq lock?
966 */
967 (void)knote_unlink_waitq(kn, &mqueue->imq_wait_queue);
968 }
969 io_release(object);
970 }
971
972 /*
973 * filt_machportevent - deliver events into the mach port filter
974 *
975 * Mach port message arrival events are currently only posted via the
976 * kqueue filter routine for ports. Port sets are marked stay-active
977 * and the wait queue code will break any kqueue waiters out to go
978 * poll the stay-queued knotes again.
979 *
980 * If there is a message at the head of the queue,
981 * we indicate that the knote should go active. If
982 * the message is to be direct-received, we adjust the
983 * QoS of the knote according the requested and override
984 * QoS of that first message.
985 */
986 static int
987 filt_machportevent(struct knote *kn, long hint __assert_only)
988 {
989 ipc_mqueue_t mqueue = kn->kn_mqueue;
990 ipc_kmsg_t first;
991 int result = 0;
992
993 /* mqueue locked by caller */
994 assert(imq_held(mqueue));
995 assert(hint != NOTE_REVOKE);
996 if (imq_is_valid(mqueue)) {
997 assert(!imq_is_set(mqueue));
998 if ((first = ipc_kmsg_queue_first(&mqueue->imq_messages)) != IKM_NULL) {
999 result = FILTER_ACTIVE | filt_machport_adjust_qos(kn, first);
1000 }
1001 }
1002
1003 return result;
1004 }
1005
1006 static int
1007 filt_machporttouch(
1008 struct knote *kn,
1009 struct kevent_qos_s *kev)
1010 {
1011 ipc_mqueue_t mqueue = kn->kn_mqueue;
1012 ipc_kmsg_t first;
1013 int result = 0;
1014
1015 /* copy in new settings and save off new input fflags */
1016 kn->kn_sfflags = kev->fflags;
1017 kn->kn_ext[0] = kev->ext[0];
1018 kn->kn_ext[1] = kev->ext[1];
1019
1020 if (kev->flags & EV_ENABLE) {
1021 /*
1022 * If the knote is being enabled, make sure there's no lingering
1023 * IPC overrides from the previous message delivery.
1024 */
1025 filt_machport_turnstile_complete(kn);
1026 }
1027
1028 /*
1029 * If the mqueue is a valid port and there is a message
1030 * that will be direct-received from the knote, update
1031 * the knote qos based on the first message and trigger
1032 * the event. If there are no more messages, reset the
1033 * QoS to the value provided by the kevent.
1034 */
1035 imq_lock(mqueue);
1036 if (imq_is_valid(mqueue) && !imq_is_set(mqueue) &&
1037 (first = ipc_kmsg_queue_first(&mqueue->imq_messages)) != IKM_NULL) {
1038 result = FILTER_ACTIVE | filt_machport_adjust_qos(kn, first);
1039 } else if (kn->kn_sfflags & MACH_RCV_MSG) {
1040 result = FILTER_RESET_EVENT_QOS;
1041 }
1042 imq_unlock(mqueue);
1043
1044 return result;
1045 }
1046
1047 static int
1048 filt_machportprocess(struct knote *kn, struct kevent_qos_s *kev)
1049 {
1050 ipc_mqueue_t mqueue = kn->kn_mqueue;
1051 ipc_object_t object = imq_to_object(mqueue);
1052 thread_t self = current_thread();
1053 kevent_ctx_t kectx = NULL;
1054
1055 wait_result_t wresult;
1056 mach_msg_option_t option;
1057 mach_vm_address_t addr;
1058 mach_msg_size_t size;
1059
1060 /* Capture current state */
1061 knote_fill_kevent(kn, kev, MACH_PORT_NULL);
1062 kev->ext[3] = 0; /* hide our port reference from userspace */
1063
1064 /* If already deallocated/moved return one last EOF event */
1065 if (kev->flags & EV_EOF) {
1066 return FILTER_ACTIVE | FILTER_RESET_EVENT_QOS;
1067 }
1068
1069 /*
1070 * Only honor supported receive options. If no options are
1071 * provided, just force a MACH_RCV_TOO_LARGE to detect the
1072 * name of the port and sizeof the waiting message.
1073 */
1074 option = kn->kn_sfflags & (MACH_RCV_MSG | MACH_RCV_LARGE | MACH_RCV_LARGE_IDENTITY |
1075 MACH_RCV_TRAILER_MASK | MACH_RCV_VOUCHER | MACH_MSG_STRICT_REPLY);
1076
1077 if (option & MACH_RCV_MSG) {
1078 addr = (mach_vm_address_t) kn->kn_ext[0];
1079 size = (mach_msg_size_t) kn->kn_ext[1];
1080
1081 /*
1082 * If the kevent didn't specify a buffer and length, carve a buffer
1083 * from the filter processing data according to the flags.
1084 */
1085 if (size == 0) {
1086 kectx = kevent_get_context(self);
1087 addr = (mach_vm_address_t)kectx->kec_data_out;
1088 size = (mach_msg_size_t)kectx->kec_data_resid;
1089 option |= (MACH_RCV_LARGE | MACH_RCV_LARGE_IDENTITY);
1090 if (kectx->kec_process_flags & KEVENT_FLAG_STACK_DATA) {
1091 option |= MACH_RCV_STACK;
1092 }
1093 }
1094 } else {
1095 /* just detect the port name (if a set) and size of the first message */
1096 option = MACH_RCV_LARGE;
1097 addr = 0;
1098 size = 0;
1099 }
1100
1101 imq_lock(mqueue);
1102
1103 /* just use the reference from here on out */
1104 io_reference(object);
1105
1106 /*
1107 * Set up to receive a message or the notification of a
1108 * too large message. But never allow this call to wait.
1109 * If the user provided aditional options, like trailer
1110 * options, pass those through here. But we don't support
1111 * scatter lists through this interface.
1112 */
1113 self->ith_object = object;
1114 self->ith_msg_addr = addr;
1115 self->ith_rsize = size;
1116 self->ith_msize = 0;
1117 self->ith_option = option;
1118 self->ith_receiver_name = MACH_PORT_NULL;
1119 self->ith_continuation = NULL;
1120 option |= MACH_RCV_TIMEOUT; // never wait
1121 self->ith_state = MACH_RCV_IN_PROGRESS;
1122 self->ith_knote = kn;
1123
1124 wresult = ipc_mqueue_receive_on_thread(
1125 mqueue,
1126 option,
1127 size, /* max_size */
1128 0, /* immediate timeout */
1129 THREAD_INTERRUPTIBLE,
1130 self);
1131 /* mqueue unlocked */
1132
1133 /*
1134 * If we timed out, or the process is exiting, just release the
1135 * reference on the ipc_object and return zero.
1136 */
1137 if (wresult == THREAD_RESTART || self->ith_state == MACH_RCV_TIMED_OUT) {
1138 assert(self->turnstile != TURNSTILE_NULL);
1139 io_release(object);
1140 return 0;
1141 }
1142
1143 assert(wresult == THREAD_NOT_WAITING);
1144 assert(self->ith_state != MACH_RCV_IN_PROGRESS);
1145
1146 /*
1147 * If we weren't attempting to receive a message
1148 * directly, we need to return the port name in
1149 * the kevent structure.
1150 */
1151 if ((option & MACH_RCV_MSG) != MACH_RCV_MSG) {
1152 assert(self->ith_state == MACH_RCV_TOO_LARGE);
1153 assert(self->ith_kmsg == IKM_NULL);
1154 kev->data = self->ith_receiver_name;
1155 io_release(object);
1156 return FILTER_ACTIVE;
1157 }
1158
1159 /*
1160 * Attempt to receive the message directly, returning
1161 * the results in the fflags field.
1162 */
1163 kev->fflags = mach_msg_receive_results(&size);
1164
1165 /* kmsg and object reference consumed */
1166
1167 /*
1168 * if the user asked for the identity of ports containing a
1169 * a too-large message, return it in the data field (as we
1170 * do for messages we didn't try to receive).
1171 */
1172 if (kev->fflags == MACH_RCV_TOO_LARGE) {
1173 kev->ext[1] = self->ith_msize;
1174 if (option & MACH_RCV_LARGE_IDENTITY) {
1175 kev->data = self->ith_receiver_name;
1176 } else {
1177 kev->data = MACH_PORT_NULL;
1178 }
1179 } else {
1180 kev->ext[1] = size;
1181 kev->data = MACH_PORT_NULL;
1182 }
1183
1184 /*
1185 * If we used a data buffer carved out from the filt_process data,
1186 * store the address used in the knote and adjust the residual and
1187 * other parameters for future use.
1188 */
1189 if (kectx) {
1190 assert(kectx->kec_data_resid >= size);
1191 kectx->kec_data_resid -= size;
1192 if ((kectx->kec_process_flags & KEVENT_FLAG_STACK_DATA) == 0) {
1193 kev->ext[0] = kectx->kec_data_out;
1194 kectx->kec_data_out += size;
1195 } else {
1196 assert(option & MACH_RCV_STACK);
1197 kev->ext[0] = kectx->kec_data_out + kectx->kec_data_resid;
1198 }
1199 }
1200
1201 /*
1202 * Apply message-based QoS values to output kevent as prescribed.
1203 * The kev->ext[2] field gets (msg-qos << 32) | (override-qos).
1204 *
1205 * The mach_msg_receive_results() call saved off the message
1206 * QoS values in the continuation save area on successful receive.
1207 */
1208 if (kev->fflags == MACH_MSG_SUCCESS) {
1209 kev->ext[2] = ((uint64_t)self->ith_qos << 32) |
1210 (uint64_t)self->ith_qos_override;
1211 }
1212
1213 return FILTER_ACTIVE;
1214 }
1215
1216 /*
1217 * Peek to see if the message queue associated with the knote has any
1218 * events. This pre-hook is called when a filter uses the stay-
1219 * on-queue mechanism (as the knote_link_waitq mechanism does for
1220 * portsets) and someone calls select() against the containing kqueue.
1221 *
1222 * Just peek at the pre-post status of the portset's wait queue
1223 * to determine if it has anything interesting. We can do it
1224 * without holding the lock, as it is just a snapshot in time
1225 * (if this is used as part of really waiting for events, we
1226 * will catch changes in this status when the event gets posted
1227 * up to the knote's kqueue).
1228 */
1229 static int
1230 filt_machportpeek(struct knote *kn)
1231 {
1232 ipc_mqueue_t mqueue = kn->kn_mqueue;
1233
1234 return ipc_mqueue_set_peek(mqueue) ? FILTER_ACTIVE : 0;
1235 }
1236
1237 SECURITY_READ_ONLY_EARLY(struct filterops) machport_filtops = {
1238 .f_adjusts_qos = true,
1239 .f_extended_codes = true,
1240 .f_attach = filt_machportattach,
1241 .f_detach = filt_machportdetach,
1242 .f_event = filt_machportevent,
1243 .f_touch = filt_machporttouch,
1244 .f_process = filt_machportprocess,
1245 .f_peek = filt_machportpeek,
1246 };