]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* | |
29 | * @OSF_COPYRIGHT@ | |
30 | */ | |
31 | /* | |
32 | * Mach Operating System | |
33 | * Copyright (c) 1991,1990,1989 Carnegie Mellon University | |
34 | * All Rights Reserved. | |
35 | * | |
36 | * Permission to use, copy, modify and distribute this software and its | |
37 | * documentation is hereby granted, provided that both the copyright | |
38 | * notice and this permission notice appear in all copies of the | |
39 | * software, derivative works or modified versions, and any portions | |
40 | * thereof, and that both notices appear in supporting documentation. | |
41 | * | |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR | |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
45 | * | |
46 | * Carnegie Mellon requests users of this software to return to | |
47 | * | |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
49 | * School of Computer Science | |
50 | * Carnegie Mellon University | |
51 | * Pittsburgh PA 15213-3890 | |
52 | * | |
53 | * any improvements or extensions that they make and grant Carnegie Mellon | |
54 | * the rights to redistribute these changes. | |
55 | */ | |
56 | /* | |
57 | */ | |
58 | /* | |
59 | * File: ipc/ipc_pset.c | |
60 | * Author: Rich Draves | |
61 | * Date: 1989 | |
62 | * | |
63 | * Functions to manipulate IPC port sets. | |
64 | */ | |
65 | ||
66 | #include <mach/port.h> | |
67 | #include <mach/kern_return.h> | |
68 | #include <mach/message.h> | |
69 | #include <ipc/ipc_mqueue.h> | |
70 | #include <ipc/ipc_object.h> | |
71 | #include <ipc/ipc_pset.h> | |
72 | #include <ipc/ipc_right.h> | |
73 | #include <ipc/ipc_space.h> | |
74 | #include <ipc/ipc_port.h> | |
75 | ||
76 | #include <kern/kern_types.h> | |
77 | ||
78 | #include <vm/vm_map.h> | |
79 | #include <libkern/section_keywords.h> | |
80 | ||
81 | /* | |
82 | * Routine: ipc_pset_alloc | |
83 | * Purpose: | |
84 | * Allocate a port set. | |
85 | * Conditions: | |
86 | * Nothing locked. If successful, the port set is returned | |
87 | * locked. (The caller doesn't have a reference.) | |
88 | * Returns: | |
89 | * KERN_SUCCESS The port set is allocated. | |
90 | * KERN_INVALID_TASK The space is dead. | |
91 | * KERN_NO_SPACE No room for an entry in the space. | |
92 | * KERN_RESOURCE_SHORTAGE Couldn't allocate memory. | |
93 | */ | |
94 | ||
95 | kern_return_t | |
96 | ipc_pset_alloc( | |
97 | ipc_space_t space, | |
98 | mach_port_name_t *namep, | |
99 | ipc_pset_t *psetp) | |
100 | { | |
101 | ipc_pset_t pset; | |
102 | mach_port_name_t name; | |
103 | kern_return_t kr; | |
104 | ||
105 | kr = ipc_object_alloc(space, IOT_PORT_SET, | |
106 | MACH_PORT_TYPE_PORT_SET, 0, | |
107 | &name, (ipc_object_t *) &pset); | |
108 | if (kr != KERN_SUCCESS) { | |
109 | return kr; | |
110 | } | |
111 | /* pset and space are locked */ | |
112 | ||
113 | ipc_mqueue_init(&pset->ips_messages, TRUE /* set */); | |
114 | is_write_unlock(space); | |
115 | ||
116 | *namep = name; | |
117 | *psetp = pset; | |
118 | return KERN_SUCCESS; | |
119 | } | |
120 | ||
121 | /* | |
122 | * Routine: ipc_pset_alloc_name | |
123 | * Purpose: | |
124 | * Allocate a port set, with a specific name. | |
125 | * Conditions: | |
126 | * Nothing locked. If successful, the port set is returned | |
127 | * locked. (The caller doesn't have a reference.) | |
128 | * Returns: | |
129 | * KERN_SUCCESS The port set is allocated. | |
130 | * KERN_INVALID_TASK The space is dead. | |
131 | * KERN_NAME_EXISTS The name already denotes a right. | |
132 | * KERN_RESOURCE_SHORTAGE Couldn't allocate memory. | |
133 | */ | |
134 | ||
135 | kern_return_t | |
136 | ipc_pset_alloc_name( | |
137 | ipc_space_t space, | |
138 | mach_port_name_t name, | |
139 | ipc_pset_t *psetp) | |
140 | { | |
141 | ipc_pset_t pset; | |
142 | kern_return_t kr; | |
143 | ||
144 | kr = ipc_object_alloc_name(space, IOT_PORT_SET, | |
145 | MACH_PORT_TYPE_PORT_SET, 0, | |
146 | name, (ipc_object_t *) &pset); | |
147 | if (kr != KERN_SUCCESS) { | |
148 | return kr; | |
149 | } | |
150 | /* pset is locked */ | |
151 | ||
152 | ipc_mqueue_init(&pset->ips_messages, TRUE /* set */); | |
153 | ||
154 | *psetp = pset; | |
155 | return KERN_SUCCESS; | |
156 | } | |
157 | ||
158 | ||
159 | /* | |
160 | * Routine: ipc_pset_alloc_special | |
161 | * Purpose: | |
162 | * Allocate a port set in a special space. | |
163 | * The new port set is returned with one ref. | |
164 | * If unsuccessful, IPS_NULL is returned. | |
165 | * Conditions: | |
166 | * Nothing locked. | |
167 | */ | |
168 | ipc_pset_t | |
169 | ipc_pset_alloc_special( | |
170 | __assert_only ipc_space_t space) | |
171 | { | |
172 | ipc_pset_t pset; | |
173 | ||
174 | assert(space != IS_NULL); | |
175 | assert(space->is_table == IE_NULL); | |
176 | assert(!is_active(space)); | |
177 | ||
178 | __IGNORE_WCASTALIGN(pset = (ipc_pset_t)io_alloc(IOT_PORT_SET)); | |
179 | if (pset == IPS_NULL) { | |
180 | return IPS_NULL; | |
181 | } | |
182 | ||
183 | bzero((char *)pset, sizeof(*pset)); | |
184 | ||
185 | io_lock_init(&pset->ips_object); | |
186 | pset->ips_references = 1; | |
187 | pset->ips_object.io_bits = io_makebits(TRUE, IOT_PORT_SET, 0); | |
188 | ||
189 | ipc_mqueue_init(&pset->ips_messages, TRUE /* set */); | |
190 | ||
191 | return pset; | |
192 | } | |
193 | ||
194 | ||
195 | /* | |
196 | * Routine: ipc_pset_member | |
197 | * Purpose: | |
198 | * Checks to see if a port is a member of a pset | |
199 | * Conditions: | |
200 | * Both port and port set are locked. | |
201 | * The port must be active. | |
202 | */ | |
203 | boolean_t | |
204 | ipc_pset_member( | |
205 | ipc_pset_t pset, | |
206 | ipc_port_t port) | |
207 | { | |
208 | assert(ip_active(port)); | |
209 | ||
210 | return (ipc_mqueue_member(&port->ip_messages, &pset->ips_messages)); | |
211 | } | |
212 | ||
213 | ||
214 | /* | |
215 | * Routine: ipc_pset_add | |
216 | * Purpose: | |
217 | * Puts a port into a port set. | |
218 | * Conditions: | |
219 | * Both port and port set are locked and active. | |
220 | * The owner of the port set is also receiver for the port. | |
221 | */ | |
222 | ||
223 | kern_return_t | |
224 | ipc_pset_add( | |
225 | ipc_pset_t pset, | |
226 | ipc_port_t port, | |
227 | uint64_t *reserved_link, | |
228 | uint64_t *reserved_prepost) | |
229 | { | |
230 | kern_return_t kr; | |
231 | ||
232 | assert(ips_active(pset)); | |
233 | assert(ip_active(port)); | |
234 | ||
235 | kr = ipc_mqueue_add(&port->ip_messages, &pset->ips_messages, | |
236 | reserved_link, reserved_prepost); | |
237 | ||
238 | return kr; | |
239 | } | |
240 | ||
241 | ||
242 | ||
243 | /* | |
244 | * Routine: ipc_pset_remove | |
245 | * Purpose: | |
246 | * Removes a port from a port set. | |
247 | * The port set loses a reference. | |
248 | * Conditions: | |
249 | * Both port and port set are locked. | |
250 | * The port must be active. | |
251 | */ | |
252 | ||
253 | kern_return_t | |
254 | ipc_pset_remove( | |
255 | ipc_pset_t pset, | |
256 | ipc_port_t port) | |
257 | { | |
258 | kern_return_t kr; | |
259 | ||
260 | assert(ip_active(port)); | |
261 | ||
262 | if (port->ip_in_pset == 0) | |
263 | return KERN_NOT_IN_SET; | |
264 | ||
265 | kr = ipc_mqueue_remove(&port->ip_messages, &pset->ips_messages); | |
266 | ||
267 | return kr; | |
268 | } | |
269 | ||
270 | /* | |
271 | * Routine: ipc_pset_lazy_allocate | |
272 | * Purpose: | |
273 | * lazily initialize the wqset of a port set. | |
274 | * Conditions: | |
275 | * Nothing locked. | |
276 | */ | |
277 | ||
278 | kern_return_t | |
279 | ipc_pset_lazy_allocate( | |
280 | ipc_space_t space, | |
281 | mach_port_name_t psname) | |
282 | { | |
283 | kern_return_t kr; | |
284 | ipc_entry_t entry; | |
285 | ipc_object_t psobj; | |
286 | ipc_pset_t pset; | |
287 | ||
288 | kr = ipc_right_lookup_read(space, psname, &entry); | |
289 | if (kr != KERN_SUCCESS) | |
290 | return kr; | |
291 | ||
292 | /* space is read-locked and active */ | |
293 | if ((entry->ie_bits & MACH_PORT_TYPE_PORT_SET) == 0) { | |
294 | is_read_unlock(space); | |
295 | kr = KERN_INVALID_RIGHT; | |
296 | return kr; | |
297 | } | |
298 | ||
299 | psobj = entry->ie_object; | |
300 | __IGNORE_WCASTALIGN(pset = (ipc_pset_t) psobj); | |
301 | assert(pset != NULL); | |
302 | ipc_mqueue_t set_mqueue = &pset->ips_messages; | |
303 | struct waitq_set *wqset = &set_mqueue->imq_set_queue; | |
304 | ||
305 | io_reference(psobj); | |
306 | is_read_unlock(space); | |
307 | ||
308 | /* | |
309 | * lazily initialize the wqset to avoid | |
310 | * possible allocation while linking | |
311 | * under spinlocks. | |
312 | */ | |
313 | waitq_set_lazy_init_link(wqset); | |
314 | io_release(psobj); | |
315 | ||
316 | return KERN_SUCCESS; | |
317 | } | |
318 | ||
319 | /* | |
320 | * Routine: ipc_pset_remove_from_all | |
321 | * Purpose: | |
322 | * Removes a port from all it's port sets. | |
323 | * Conditions: | |
324 | * port is locked and active. | |
325 | */ | |
326 | ||
327 | kern_return_t | |
328 | ipc_pset_remove_from_all( | |
329 | ipc_port_t port) | |
330 | { | |
331 | if (port->ip_in_pset == 0) | |
332 | return KERN_NOT_IN_SET; | |
333 | ||
334 | /* | |
335 | * Remove the port's mqueue from all sets | |
336 | */ | |
337 | ipc_mqueue_remove_from_all(&port->ip_messages); | |
338 | return KERN_SUCCESS; | |
339 | } | |
340 | ||
341 | ||
342 | /* | |
343 | * Routine: ipc_pset_destroy | |
344 | * Purpose: | |
345 | * Destroys a port_set. | |
346 | * Conditions: | |
347 | * The port_set is locked and alive. | |
348 | * The caller has a reference, which is consumed. | |
349 | * Afterwards, the port_set is unlocked and dead. | |
350 | */ | |
351 | ||
352 | void | |
353 | ipc_pset_destroy( | |
354 | ipc_pset_t pset) | |
355 | { | |
356 | assert(ips_active(pset)); | |
357 | ||
358 | pset->ips_object.io_bits &= ~IO_BITS_ACTIVE; | |
359 | ||
360 | /* | |
361 | * remove all the member message queues | |
362 | * AND remove this message queue from any containing sets | |
363 | */ | |
364 | ipc_mqueue_remove_all(&pset->ips_messages); | |
365 | ||
366 | /* | |
367 | * Set all waiters on the portset running to | |
368 | * discover the change. | |
369 | */ | |
370 | imq_lock(&pset->ips_messages); | |
371 | ipc_mqueue_changed(&pset->ips_messages); | |
372 | imq_unlock(&pset->ips_messages); | |
373 | ||
374 | ipc_mqueue_deinit(&pset->ips_messages); | |
375 | ||
376 | ips_unlock(pset); | |
377 | ips_release(pset); /* consume the ref our caller gave us */ | |
378 | } | |
379 | ||
380 | /* | |
381 | * Kqueue EVFILT_MACHPORT support | |
382 | * | |
383 | * - kn_ptr.p_mqueue points to the monitored mqueue | |
384 | * | |
385 | * - (in/out) ext[0] holds a mach_vm_address_t to a userspace buffer | |
386 | * that can be used to direct-deliver messages when | |
387 | * MACH_RCV_MSG is set in kn_sfflags | |
388 | * | |
389 | * - (in/out) ext[1] holds a mach_msg_size_t representing the size | |
390 | * of the userspace buffer held in ext[0]. | |
391 | * | |
392 | * - (out) ext[2] is used to deliver qos information | |
393 | * about the send queue to userspace. | |
394 | * | |
395 | * - (abused) ext[3] is used in kernel to hold a reference to the first port | |
396 | * with a turnstile that participate to sync IPC override. | |
397 | * | |
398 | * - kn_hook is optionally a "knote" turnstile. It is used as the inheritor | |
399 | * of turnstiles for rights copied out as part of direct message delivery | |
400 | * when they can participate to sync IPC override. | |
401 | * | |
402 | * It is used to atomically neuter the sync IPC override when the knote is | |
403 | * re-enabled. | |
404 | * | |
405 | */ | |
406 | ||
407 | #include <sys/event.h> | |
408 | #include <sys/errno.h> | |
409 | ||
410 | static int | |
411 | filt_machport_adjust_qos(struct knote *kn, ipc_kmsg_t first) | |
412 | { | |
413 | if (kn->kn_sfflags & MACH_RCV_MSG) { | |
414 | int qos = _pthread_priority_thread_qos(first->ikm_qos_override); | |
415 | return FILTER_ADJUST_EVENT_QOS(qos); | |
416 | } | |
417 | return 0; | |
418 | } | |
419 | ||
420 | struct turnstile * | |
421 | filt_machport_kqueue_turnstile(struct knote *kn) | |
422 | { | |
423 | if ((kn->kn_sfflags & MACH_RCV_MSG) && (kn->kn_status & KN_DISPATCH)) { | |
424 | return kqueue_turnstile(knote_get_kq(kn)); | |
425 | } | |
426 | return TURNSTILE_NULL; | |
427 | } | |
428 | ||
429 | /* | |
430 | * Stashes a port that participate to sync IPC override until the knote | |
431 | * is being re-enabled. | |
432 | * | |
433 | * It returns: | |
434 | * - the turnstile to use as an inheritor for the stashed port | |
435 | * - the kind of stash that happened as PORT_SYNC_* value among: | |
436 | * o not stashed (no sync IPC support) | |
437 | * o stashed in the knote (in kn_ext[3]) | |
438 | * o to be hooked to the kn_hook knote | |
439 | */ | |
440 | struct turnstile * | |
441 | filt_machport_stash_port(struct knote *kn, ipc_port_t port, int *link) | |
442 | { | |
443 | struct turnstile *ts = filt_machport_kqueue_turnstile(kn); | |
444 | ||
445 | if (!ts) { | |
446 | if (link) *link = PORT_SYNC_LINK_NO_LINKAGE; | |
447 | } else if (kn->kn_ext[3] == 0) { | |
448 | ip_reference(port); | |
449 | kn->kn_ext[3] = (uintptr_t)port; | |
450 | if (link) *link = PORT_SYNC_LINK_WORKLOOP_KNOTE; | |
451 | } else { | |
452 | ts = (struct turnstile *)kn->kn_hook; | |
453 | if (link) *link = PORT_SYNC_LINK_WORKLOOP_STASH; | |
454 | } | |
455 | ||
456 | return ts; | |
457 | } | |
458 | ||
459 | struct turnstile * | |
460 | filt_machport_stashed_special_reply_port_turnstile(ipc_port_t port) | |
461 | { | |
462 | struct knote *kn = port->ip_sync_inheritor_knote; | |
463 | ||
464 | assert(port->ip_specialreply); | |
465 | assert(port->ip_sync_link_state == PORT_SYNC_LINK_WORKLOOP_KNOTE); | |
466 | if (kn->kn_ext[3] == (uint64_t)port) { | |
467 | return kqueue_turnstile(knote_get_kq(kn)); | |
468 | } | |
469 | return kn->kn_hook; | |
470 | } | |
471 | ||
472 | /* | |
473 | * Lazily prepare a turnstile so that filt_machport_stash_port() | |
474 | * can be called with the mqueue lock held. | |
475 | * | |
476 | * It will allocate a turnstile in kn_hook if: | |
477 | * - the knote supports sync IPC override, | |
478 | * - we already stashed a port in kn_ext[3], | |
479 | * - the object that will be copied out has a chance to ask to be stashed. | |
480 | * | |
481 | * It is setup so that its inheritor is the workloop turnstile that has been | |
482 | * allocated when this knote was attached. | |
483 | */ | |
484 | void | |
485 | filt_machport_turnstile_prepare_lazily( | |
486 | struct knote *kn, | |
487 | mach_msg_type_name_t msgt_name, | |
488 | ipc_port_t port) | |
489 | { | |
490 | /* This is called from within filt_machportprocess */ | |
491 | assert((kn->kn_status & KN_SUPPRESSED) && (kn->kn_status & KN_LOCKED)); | |
492 | ||
493 | struct turnstile *ts = filt_machport_kqueue_turnstile(kn); | |
494 | if (ts == TURNSTILE_NULL || kn->kn_ext[3] == 0 || kn->kn_hook) | |
495 | return; | |
496 | ||
497 | if ((msgt_name == MACH_MSG_TYPE_PORT_SEND_ONCE && port->ip_specialreply) || | |
498 | (msgt_name == MACH_MSG_TYPE_PORT_RECEIVE)) { | |
499 | struct turnstile *kn_ts = turnstile_alloc(); | |
500 | kn_ts = turnstile_prepare((uintptr_t)kn, | |
501 | (struct turnstile **)&kn->kn_hook, kn_ts, TURNSTILE_KNOTE); | |
502 | turnstile_update_inheritor(kn_ts, ts, | |
503 | TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_TURNSTILE); | |
504 | turnstile_cleanup(); | |
505 | } | |
506 | } | |
507 | ||
508 | /* | |
509 | * Other half of filt_machport_turnstile_prepare_lazily() | |
510 | * | |
511 | * This is serialized by the knote state machine. | |
512 | */ | |
513 | static void | |
514 | filt_machport_turnstile_complete(struct knote *kn) | |
515 | { | |
516 | struct turnstile *ts = TURNSTILE_NULL; | |
517 | ||
518 | if (kn->kn_ext[3]) { | |
519 | ipc_port_t port = (ipc_port_t)kn->kn_ext[3]; | |
520 | ipc_mqueue_t mqueue = &port->ip_messages; | |
521 | ||
522 | ip_lock(port); | |
523 | if (port->ip_specialreply) { | |
524 | /* | |
525 | * If the reply has been sent to the special reply port already, | |
526 | * then the special reply port may already be reused to do something | |
527 | * entirely different. | |
528 | * | |
529 | * However, the only reason for it to still point to this knote is | |
530 | * that it's still waiting for a reply, so when this is the case, | |
531 | * neuter the linkage. | |
532 | */ | |
533 | if (port->ip_sync_link_state == PORT_SYNC_LINK_WORKLOOP_KNOTE && | |
534 | port->ip_sync_inheritor_knote == kn) { | |
535 | ipc_port_adjust_special_reply_port_locked(port, NULL, | |
536 | (IPC_PORT_ADJUST_SR_NONE | IPC_PORT_ADJUST_SR_ENABLE_EVENT), FALSE); | |
537 | } else { | |
538 | ip_unlock(port); | |
539 | } | |
540 | } else { | |
541 | struct turnstile *kq_ts = kqueue_turnstile(knote_get_kq(kn)); | |
542 | ||
543 | /* | |
544 | * For receive rights, if their IMQ_INHERITOR() is still this | |
545 | * workloop, then sever the link. | |
546 | * | |
547 | * It has a theoretical hole: if the port is sent again to a new | |
548 | * receive right that is also monitored by the same kqueue, | |
549 | * we would sever the link incorrectly. | |
550 | * | |
551 | * However this would be a REALLY cumbersome thing to do. | |
552 | */ | |
553 | imq_lock(mqueue); | |
554 | if (!IMQ_KLIST_VALID(mqueue) && IMQ_INHERITOR(mqueue) == kq_ts) { | |
555 | turnstile_deallocate_safe(kq_ts); | |
556 | klist_init(&mqueue->imq_klist); | |
557 | ts = port_send_turnstile(port); | |
558 | } | |
559 | if (ts) { | |
560 | turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, | |
561 | TURNSTILE_IMMEDIATE_UPDATE); | |
562 | turnstile_reference(ts); | |
563 | } | |
564 | imq_unlock(mqueue); | |
565 | ip_unlock(port); | |
566 | ||
567 | if (ts) { | |
568 | turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD); | |
569 | turnstile_deallocate(ts); | |
570 | } | |
571 | } | |
572 | ||
573 | ip_release(port); | |
574 | kn->kn_ext[3] = 0; | |
575 | } | |
576 | ||
577 | if (kn->kn_hook) { | |
578 | ts = kn->kn_hook; | |
579 | ||
580 | turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, | |
581 | TURNSTILE_IMMEDIATE_UPDATE); | |
582 | turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD); | |
583 | ||
584 | turnstile_complete((uintptr_t)kn, (struct turnstile **)&kn->kn_hook, &ts); | |
585 | turnstile_cleanup(); | |
586 | ||
587 | assert(ts); | |
588 | turnstile_deallocate(ts); | |
589 | } | |
590 | } | |
591 | ||
592 | static int | |
593 | filt_machportattach( | |
594 | struct knote *kn, | |
595 | __unused struct kevent_internal_s *kev) | |
596 | { | |
597 | mach_port_name_t name = (mach_port_name_t)kn->kn_kevent.ident; | |
598 | uint64_t wq_link_id = waitq_link_reserve(NULL); | |
599 | ipc_space_t space = current_space(); | |
600 | ipc_kmsg_t first; | |
601 | struct turnstile *turnstile = TURNSTILE_NULL; | |
602 | struct turnstile *send_turnstile = TURNSTILE_NULL; | |
603 | ||
604 | int error; | |
605 | int result = 0; | |
606 | kern_return_t kr; | |
607 | ipc_entry_t entry; | |
608 | ipc_mqueue_t mqueue; | |
609 | ||
610 | kn->kn_flags &= ~EV_EOF; | |
611 | kn->kn_ext[3] = 0; | |
612 | ||
613 | if ((kn->kn_sfflags & MACH_RCV_MSG) && (kn->kn_status & KN_DISPATCH)) { | |
614 | /* | |
615 | * If the filter is likely to support sync IPC override, | |
616 | * and it happens to be attaching to a workloop, | |
617 | * make sure the workloop has an allocated turnstile. | |
618 | */ | |
619 | turnstile = kqueue_alloc_turnstile(knote_get_kq(kn)); | |
620 | } | |
621 | ||
622 | kr = ipc_right_lookup_read(space, name, &entry); | |
623 | ||
624 | check_lookup: | |
625 | if (kr == KERN_SUCCESS) { | |
626 | /* space is read-locked and active */ | |
627 | ||
628 | if (entry->ie_bits & MACH_PORT_TYPE_PORT_SET) { | |
629 | ipc_pset_t pset; | |
630 | ||
631 | if (knote_link_waitqset_should_lazy_alloc(kn)) { | |
632 | is_read_unlock(space); | |
633 | ||
634 | /* | |
635 | * We need to link the portset of the kn, | |
636 | * to insure that the link is allocated before taking | |
637 | * any spinlocks. | |
638 | */ | |
639 | knote_link_waitqset_lazy_alloc(kn); | |
640 | ||
641 | /* | |
642 | * We had to drop the space lock because knote_link_waitqset_lazy_alloc() | |
643 | * could have allocated memory. The ipc_right_lookup_read() | |
644 | * function returns with the space locked, so we need to revalidate state. | |
645 | */ | |
646 | kr = ipc_right_lookup_read(space, name, &entry); | |
647 | if (!(kr == KERN_SUCCESS) || !(entry->ie_bits & MACH_PORT_TYPE_PORT_SET)) { | |
648 | goto check_lookup; | |
649 | } | |
650 | } | |
651 | ||
652 | __IGNORE_WCASTALIGN(pset = (ipc_pset_t)entry->ie_object); | |
653 | mqueue = &pset->ips_messages; | |
654 | ips_reference(pset); | |
655 | ||
656 | imq_lock(mqueue); | |
657 | kn->kn_ptr.p_mqueue = mqueue; | |
658 | ||
659 | /* | |
660 | * Bind the portset wait queue directly to knote/kqueue. | |
661 | * This allows us to just use wait_queue foo to effect a wakeup, | |
662 | * rather than having to call knote() from the Mach code on each | |
663 | * message. We still attach the knote to the mqueue klist for | |
664 | * NOTE_REVOKE purposes only. | |
665 | */ | |
666 | error = knote_link_waitq(kn, &mqueue->imq_wait_queue, &wq_link_id); | |
667 | if (!error) { | |
668 | assert(IMQ_KLIST_VALID(mqueue)); | |
669 | KNOTE_ATTACH(&mqueue->imq_klist, kn); | |
670 | imq_unlock(mqueue); | |
671 | } else { | |
672 | kn->kn_ptr.p_mqueue = IMQ_NULL; | |
673 | imq_unlock(mqueue); | |
674 | ips_release(pset); | |
675 | } | |
676 | ||
677 | is_read_unlock(space); | |
678 | ||
679 | /* | |
680 | * linked knotes are marked stay-active and therefore don't | |
681 | * need an indication of their fired state to be returned | |
682 | * from the attach operation. | |
683 | */ | |
684 | ||
685 | } else if (entry->ie_bits & MACH_PORT_TYPE_RECEIVE) { | |
686 | ipc_port_t port; | |
687 | ||
688 | __IGNORE_WCASTALIGN(port = (ipc_port_t)entry->ie_object); | |
689 | mqueue = &port->ip_messages; | |
690 | ip_reference(port); | |
691 | ||
692 | /* | |
693 | * attach knote to port and determine result | |
694 | * If the filter requested direct message receipt, | |
695 | * we may need to adjust the qos of the knote to | |
696 | * reflect the requested and override qos of the | |
697 | * first message in the queue. | |
698 | */ | |
699 | imq_lock(mqueue); | |
700 | kn->kn_ptr.p_mqueue = mqueue; | |
701 | if (!IMQ_KLIST_VALID(mqueue)) { | |
702 | /* | |
703 | * We're attaching a port that used to have an IMQ_INHERITOR, | |
704 | * clobber this state, and set the inheritor of its turnstile | |
705 | * to the kqueue it's now attached to. | |
706 | */ | |
707 | turnstile_deallocate_safe(IMQ_INHERITOR(mqueue)); | |
708 | klist_init(&mqueue->imq_klist); | |
709 | } | |
710 | KNOTE_ATTACH(&mqueue->imq_klist, kn); | |
711 | ||
712 | /* Update the port's turnstile inheritor */ | |
713 | send_turnstile = port_send_turnstile(port); | |
714 | if (send_turnstile) { | |
715 | turnstile_reference(send_turnstile); | |
716 | turnstile_update_inheritor(send_turnstile, turnstile, | |
717 | (TURNSTILE_INHERITOR_TURNSTILE | TURNSTILE_IMMEDIATE_UPDATE)); | |
718 | } | |
719 | ||
720 | if ((first = ipc_kmsg_queue_first(&mqueue->imq_messages)) != IKM_NULL) { | |
721 | result = FILTER_ACTIVE | filt_machport_adjust_qos(kn, first); | |
722 | } | |
723 | imq_unlock(mqueue); | |
724 | is_read_unlock(space); | |
725 | if (send_turnstile) { | |
726 | turnstile_update_inheritor_complete(send_turnstile, | |
727 | TURNSTILE_INTERLOCK_NOT_HELD); | |
728 | turnstile_deallocate(send_turnstile); | |
729 | } | |
730 | ||
731 | error = 0; | |
732 | } else { | |
733 | is_read_unlock(space); | |
734 | error = ENOTSUP; | |
735 | } | |
736 | } else { | |
737 | error = ENOENT; | |
738 | } | |
739 | ||
740 | waitq_link_release(wq_link_id); | |
741 | ||
742 | /* bail out on errors */ | |
743 | if (error) { | |
744 | knote_set_error(kn, error); | |
745 | return 0; | |
746 | } | |
747 | ||
748 | return result; | |
749 | } | |
750 | ||
751 | /* NOT proud of these - we should have a stricter relationship between mqueue and ipc object */ | |
752 | #define mqueue_to_pset(mq) ((ipc_pset_t)((uintptr_t)mq-offsetof(struct ipc_pset, ips_messages))) | |
753 | #define mqueue_to_port(mq) ((ipc_port_t)((uintptr_t)mq-offsetof(struct ipc_port, ip_messages))) | |
754 | #define mqueue_to_object(mq) (((ipc_object_t)(mq)) - 1) | |
755 | ||
756 | ||
757 | static void | |
758 | filt_machportdetach( | |
759 | struct knote *kn) | |
760 | { | |
761 | ipc_mqueue_t mqueue = kn->kn_ptr.p_mqueue; | |
762 | ipc_object_t object = mqueue_to_object(mqueue); | |
763 | struct turnstile *send_turnstile = TURNSTILE_NULL; | |
764 | ||
765 | filt_machport_turnstile_complete(kn); | |
766 | ||
767 | imq_lock(mqueue); | |
768 | if ((kn->kn_status & KN_VANISHED) || (kn->kn_flags & EV_EOF)) { | |
769 | /* | |
770 | * ipc_mqueue_changed() already unhooked this knote from the mqueue, | |
771 | */ | |
772 | } else { | |
773 | assert(IMQ_KLIST_VALID(mqueue)); | |
774 | KNOTE_DETACH(&mqueue->imq_klist, kn); | |
775 | } | |
776 | ||
777 | if (io_otype(object) == IOT_PORT) { | |
778 | ipc_port_t port = ip_from_mq(mqueue); | |
779 | ||
780 | send_turnstile = port_send_turnstile(port); | |
781 | if (send_turnstile) { | |
782 | turnstile_reference(send_turnstile); | |
783 | turnstile_update_inheritor(send_turnstile, | |
784 | ipc_port_get_inheritor(port), | |
785 | TURNSTILE_INHERITOR_TURNSTILE | TURNSTILE_IMMEDIATE_UPDATE); | |
786 | } | |
787 | } | |
788 | ||
789 | /* Clear the knote pointer once the knote has been removed from turnstile */ | |
790 | kn->kn_ptr.p_mqueue = IMQ_NULL; | |
791 | imq_unlock(mqueue); | |
792 | ||
793 | if (send_turnstile) { | |
794 | turnstile_update_inheritor_complete(send_turnstile, | |
795 | TURNSTILE_INTERLOCK_NOT_HELD); | |
796 | turnstile_deallocate(send_turnstile); | |
797 | } | |
798 | ||
799 | if (io_otype(object) == IOT_PORT_SET) { | |
800 | /* | |
801 | * Unlink the portset wait queue from knote/kqueue. | |
802 | * JMM - Does this need to be atomic under the mq lock? | |
803 | */ | |
804 | (void)knote_unlink_waitq(kn, &mqueue->imq_wait_queue); | |
805 | } | |
806 | io_release(object); | |
807 | } | |
808 | ||
809 | /* | |
810 | * filt_machportevent - deliver events into the mach port filter | |
811 | * | |
812 | * Mach port message arrival events are currently only posted via the | |
813 | * kqueue filter routine for ports. Port sets are marked stay-active | |
814 | * and the wait queue code will break any kqueue waiters out to go | |
815 | * poll the stay-queued knotes again. | |
816 | * | |
817 | * If there is a message at the head of the queue, | |
818 | * we indicate that the knote should go active. If | |
819 | * the message is to be direct-received, we adjust the | |
820 | * QoS of the knote according the requested and override | |
821 | * QoS of that first message. | |
822 | * | |
823 | * NOTE_REVOKE events are a legacy way to indicate that the port/portset | |
824 | * was deallocated or left the current Mach portspace (modern technique | |
825 | * is with an EV_VANISHED protocol). If we see NOTE_REVOKE, deliver an | |
826 | * EV_EOF event for these changes (hopefully it will get delivered before | |
827 | * the port name recycles to the same generation count and someone tries | |
828 | * to re-register a kevent for it or the events are udata-specific - | |
829 | * avoiding a conflict). | |
830 | */ | |
831 | static int | |
832 | filt_machportevent( | |
833 | struct knote *kn, | |
834 | long hint) | |
835 | { | |
836 | ipc_mqueue_t mqueue = kn->kn_ptr.p_mqueue; | |
837 | ipc_kmsg_t first; | |
838 | int result = 0; | |
839 | ||
840 | /* mqueue locked by caller */ | |
841 | assert(imq_held(mqueue)); | |
842 | ||
843 | if (hint == NOTE_REVOKE) { | |
844 | kn->kn_flags |= EV_EOF | EV_ONESHOT; | |
845 | result = FILTER_ACTIVE | FILTER_RESET_EVENT_QOS; | |
846 | } else if (imq_is_valid(mqueue)) { | |
847 | assert(!imq_is_set(mqueue)); | |
848 | if ((first = ipc_kmsg_queue_first(&mqueue->imq_messages)) != IKM_NULL) { | |
849 | result = FILTER_ACTIVE | filt_machport_adjust_qos(kn, first); | |
850 | } | |
851 | } | |
852 | ||
853 | return result; | |
854 | } | |
855 | ||
856 | static int | |
857 | filt_machporttouch( | |
858 | struct knote *kn, | |
859 | struct kevent_internal_s *kev) | |
860 | { | |
861 | ipc_mqueue_t mqueue = kn->kn_ptr.p_mqueue; | |
862 | ipc_kmsg_t first; | |
863 | int result = 0; | |
864 | ||
865 | /* copy in new settings and save off new input fflags */ | |
866 | kn->kn_sfflags = kev->fflags; | |
867 | kn->kn_ext[0] = kev->ext[0]; | |
868 | kn->kn_ext[1] = kev->ext[1]; | |
869 | ||
870 | if (kev->flags & EV_ENABLE) { | |
871 | /* | |
872 | * If the knote is being enabled, make sure there's no lingering | |
873 | * IPC overrides from the previous message delivery. | |
874 | */ | |
875 | filt_machport_turnstile_complete(kn); | |
876 | } | |
877 | ||
878 | /* | |
879 | * If the mqueue is a valid port and there is a message | |
880 | * that will be direct-received from the knote, update | |
881 | * the knote qos based on the first message and trigger | |
882 | * the event. If there are no more messages, reset the | |
883 | * QoS to the value provided by the kevent. | |
884 | */ | |
885 | imq_lock(mqueue); | |
886 | if (imq_is_valid(mqueue) && !imq_is_set(mqueue) && | |
887 | (first = ipc_kmsg_queue_first(&mqueue->imq_messages)) != IKM_NULL) { | |
888 | result = FILTER_ACTIVE | filt_machport_adjust_qos(kn, first); | |
889 | } else if (kn->kn_sfflags & MACH_RCV_MSG) { | |
890 | result = FILTER_RESET_EVENT_QOS; | |
891 | } | |
892 | imq_unlock(mqueue); | |
893 | ||
894 | return result; | |
895 | } | |
896 | ||
897 | static int | |
898 | filt_machportprocess( | |
899 | struct knote *kn, | |
900 | struct filt_process_s *process_data, | |
901 | struct kevent_internal_s *kev) | |
902 | { | |
903 | ipc_mqueue_t mqueue = kn->kn_ptr.p_mqueue; | |
904 | ipc_object_t object = mqueue_to_object(mqueue); | |
905 | thread_t self = current_thread(); | |
906 | boolean_t used_filtprocess_data = FALSE; | |
907 | ||
908 | wait_result_t wresult; | |
909 | mach_msg_option_t option; | |
910 | mach_vm_address_t addr; | |
911 | mach_msg_size_t size; | |
912 | ||
913 | /* Capture current state */ | |
914 | *kev = kn->kn_kevent; | |
915 | kev->ext[3] = 0; /* hide our port reference from userspace */ | |
916 | ||
917 | /* If already deallocated/moved return one last EOF event */ | |
918 | if (kev->flags & EV_EOF) { | |
919 | return FILTER_ACTIVE | FILTER_RESET_EVENT_QOS; | |
920 | } | |
921 | ||
922 | /* | |
923 | * Only honor supported receive options. If no options are | |
924 | * provided, just force a MACH_RCV_TOO_LARGE to detect the | |
925 | * name of the port and sizeof the waiting message. | |
926 | */ | |
927 | option = kn->kn_sfflags & (MACH_RCV_MSG|MACH_RCV_LARGE|MACH_RCV_LARGE_IDENTITY| | |
928 | MACH_RCV_TRAILER_MASK|MACH_RCV_VOUCHER); | |
929 | ||
930 | if (option & MACH_RCV_MSG) { | |
931 | addr = (mach_vm_address_t) kn->kn_ext[0]; | |
932 | size = (mach_msg_size_t) kn->kn_ext[1]; | |
933 | ||
934 | /* | |
935 | * If the kevent didn't specify a buffer and length, carve a buffer | |
936 | * from the filter processing data according to the flags. | |
937 | */ | |
938 | if (size == 0 && process_data != NULL) { | |
939 | used_filtprocess_data = TRUE; | |
940 | ||
941 | addr = (mach_vm_address_t)process_data->fp_data_out; | |
942 | size = (mach_msg_size_t)process_data->fp_data_resid; | |
943 | option |= (MACH_RCV_LARGE | MACH_RCV_LARGE_IDENTITY); | |
944 | if (process_data->fp_flags & KEVENT_FLAG_STACK_DATA) | |
945 | option |= MACH_RCV_STACK; | |
946 | } | |
947 | } else { | |
948 | /* just detect the port name (if a set) and size of the first message */ | |
949 | option = MACH_RCV_LARGE; | |
950 | addr = 0; | |
951 | size = 0; | |
952 | } | |
953 | ||
954 | imq_lock(mqueue); | |
955 | ||
956 | /* just use the reference from here on out */ | |
957 | io_reference(object); | |
958 | ||
959 | /* | |
960 | * Set up to receive a message or the notification of a | |
961 | * too large message. But never allow this call to wait. | |
962 | * If the user provided aditional options, like trailer | |
963 | * options, pass those through here. But we don't support | |
964 | * scatter lists through this interface. | |
965 | */ | |
966 | self->ith_object = object; | |
967 | self->ith_msg_addr = addr; | |
968 | self->ith_rsize = size; | |
969 | self->ith_msize = 0; | |
970 | self->ith_option = option; | |
971 | self->ith_receiver_name = MACH_PORT_NULL; | |
972 | self->ith_continuation = NULL; | |
973 | option |= MACH_RCV_TIMEOUT; // never wait | |
974 | self->ith_state = MACH_RCV_IN_PROGRESS; | |
975 | self->ith_knote = kn; | |
976 | ||
977 | wresult = ipc_mqueue_receive_on_thread( | |
978 | mqueue, | |
979 | option, | |
980 | size, /* max_size */ | |
981 | 0, /* immediate timeout */ | |
982 | THREAD_INTERRUPTIBLE, | |
983 | self); | |
984 | /* mqueue unlocked */ | |
985 | ||
986 | /* | |
987 | * If we timed out, or the process is exiting, just release the | |
988 | * reference on the ipc_object and return zero. | |
989 | */ | |
990 | if (wresult == THREAD_RESTART || self->ith_state == MACH_RCV_TIMED_OUT) { | |
991 | assert(self->turnstile != TURNSTILE_NULL); | |
992 | io_release(object); | |
993 | return 0; | |
994 | } | |
995 | ||
996 | assert(wresult == THREAD_NOT_WAITING); | |
997 | assert(self->ith_state != MACH_RCV_IN_PROGRESS); | |
998 | ||
999 | /* | |
1000 | * If we weren't attempting to receive a message | |
1001 | * directly, we need to return the port name in | |
1002 | * the kevent structure. | |
1003 | */ | |
1004 | if ((option & MACH_RCV_MSG) != MACH_RCV_MSG) { | |
1005 | assert(self->ith_state == MACH_RCV_TOO_LARGE); | |
1006 | assert(self->ith_kmsg == IKM_NULL); | |
1007 | kev->data = self->ith_receiver_name; | |
1008 | io_release(object); | |
1009 | return FILTER_ACTIVE; | |
1010 | } | |
1011 | ||
1012 | /* | |
1013 | * Attempt to receive the message directly, returning | |
1014 | * the results in the fflags field. | |
1015 | */ | |
1016 | kev->fflags = mach_msg_receive_results(&size); | |
1017 | ||
1018 | /* kmsg and object reference consumed */ | |
1019 | ||
1020 | /* | |
1021 | * if the user asked for the identity of ports containing a | |
1022 | * a too-large message, return it in the data field (as we | |
1023 | * do for messages we didn't try to receive). | |
1024 | */ | |
1025 | if (kev->fflags == MACH_RCV_TOO_LARGE) { | |
1026 | kev->ext[1] = self->ith_msize; | |
1027 | if (option & MACH_RCV_LARGE_IDENTITY) | |
1028 | kev->data = self->ith_receiver_name; | |
1029 | else | |
1030 | kev->data = MACH_PORT_NULL; | |
1031 | } else { | |
1032 | kev->ext[1] = size; | |
1033 | kev->data = MACH_PORT_NULL; | |
1034 | } | |
1035 | ||
1036 | /* | |
1037 | * If we used a data buffer carved out from the filt_process data, | |
1038 | * store the address used in the knote and adjust the residual and | |
1039 | * other parameters for future use. | |
1040 | */ | |
1041 | if (used_filtprocess_data) { | |
1042 | assert(process_data->fp_data_resid >= size); | |
1043 | process_data->fp_data_resid -= size; | |
1044 | if ((process_data->fp_flags & KEVENT_FLAG_STACK_DATA) == 0) { | |
1045 | kev->ext[0] = process_data->fp_data_out; | |
1046 | process_data->fp_data_out += size; | |
1047 | } else { | |
1048 | assert(option & MACH_RCV_STACK); | |
1049 | kev->ext[0] = process_data->fp_data_out + | |
1050 | process_data->fp_data_resid; | |
1051 | } | |
1052 | } | |
1053 | ||
1054 | /* | |
1055 | * Apply message-based QoS values to output kevent as prescribed. | |
1056 | * The kev->ext[2] field gets (msg-qos << 32) | (override-qos). | |
1057 | * | |
1058 | * The mach_msg_receive_results() call saved off the message | |
1059 | * QoS values in the continuation save area on successful receive. | |
1060 | */ | |
1061 | if (kev->fflags == MACH_MSG_SUCCESS) { | |
1062 | kev->ext[2] = ((uint64_t)self->ith_qos << 32) | | |
1063 | (uint64_t)self->ith_qos_override; | |
1064 | } | |
1065 | ||
1066 | return FILTER_ACTIVE; | |
1067 | } | |
1068 | ||
1069 | /* | |
1070 | * Peek to see if the message queue associated with the knote has any | |
1071 | * events. This pre-hook is called when a filter uses the stay- | |
1072 | * on-queue mechanism (as the knote_link_waitq mechanism does for | |
1073 | * portsets) and someone calls select() against the containing kqueue. | |
1074 | * | |
1075 | * Just peek at the pre-post status of the portset's wait queue | |
1076 | * to determine if it has anything interesting. We can do it | |
1077 | * without holding the lock, as it is just a snapshot in time | |
1078 | * (if this is used as part of really waiting for events, we | |
1079 | * will catch changes in this status when the event gets posted | |
1080 | * up to the knote's kqueue). | |
1081 | */ | |
1082 | static int | |
1083 | filt_machportpeek(struct knote *kn) | |
1084 | { | |
1085 | ipc_mqueue_t mqueue = kn->kn_ptr.p_mqueue; | |
1086 | ||
1087 | return ipc_mqueue_set_peek(mqueue) ? FILTER_ACTIVE : 0; | |
1088 | } | |
1089 | ||
1090 | SECURITY_READ_ONLY_EARLY(struct filterops) machport_filtops = { | |
1091 | .f_adjusts_qos = true, | |
1092 | .f_extended_codes = true, | |
1093 | .f_attach = filt_machportattach, | |
1094 | .f_detach = filt_machportdetach, | |
1095 | .f_event = filt_machportevent, | |
1096 | .f_touch = filt_machporttouch, | |
1097 | .f_process = filt_machportprocess, | |
1098 | .f_peek = filt_machportpeek, | |
1099 | }; |