2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * File: ipc/ipc_entry.c
63 * Primitive functions to manipulate translation entries.
66 #include <mach_debug.h>
68 #include <mach/kern_return.h>
69 #include <mach/port.h>
70 #include <kern/assert.h>
71 #include <kern/sched_prim.h>
72 #include <kern/zalloc.h>
73 #include <kern/misc_protos.h>
75 #include <ipc/ipc_entry.h>
76 #include <ipc/ipc_space.h>
77 #include <ipc/ipc_object.h>
78 #include <ipc/ipc_hash.h>
79 #include <ipc/ipc_table.h>
80 #include <ipc/ipc_port.h>
82 #include <sys/kdebug.h>
85 * Routine: ipc_entry_lookup
87 * Searches for an entry, given its name.
89 * The space must be read or write locked throughout.
90 * The space must be active.
96 mach_port_name_t name
)
98 mach_port_index_t index
;
101 assert(is_active(space
));
103 index
= MACH_PORT_INDEX(name
);
104 if (index
< space
->is_table_size
) {
105 entry
= &space
->is_table
[index
];
106 if (IE_BITS_GEN(entry
->ie_bits
) != MACH_PORT_GEN(name
) ||
107 IE_BITS_TYPE(entry
->ie_bits
) == MACH_PORT_TYPE_NONE
) {
114 assert((entry
== IE_NULL
) || IE_BITS_TYPE(entry
->ie_bits
));
120 * Routine: ipc_entries_hold
122 * Verifies that there are at least 'entries_needed'
125 * The space is write-locked and active throughout.
126 * An object may be locked. Will not allocate memory.
128 * KERN_SUCCESS Free entries were found.
129 * KERN_NO_SPACE No entry allocated.
135 uint32_t entries_needed
)
138 mach_port_index_t next_free
= 0;
142 * Assume that all new entries will need hashing.
143 * If the table is more than 87.5% full pretend we didn't have space.
145 if (space
->is_table_hashed
+ entries_needed
>
146 space
->is_table_size
* 7 / 8) {
147 return KERN_NO_SPACE
;
150 assert(is_active(space
));
152 table
= &space
->is_table
[0];
154 for (i
= 0; i
< entries_needed
; i
++) {
155 next_free
= table
[next_free
].ie_next
;
156 if (next_free
== 0) {
157 return KERN_NO_SPACE
;
159 assert(next_free
< space
->is_table_size
);
160 assert(table
[next_free
].ie_object
== IO_NULL
);
166 * Routine: ipc_entry_claim
168 * Take formal ownership of a held entry.
170 * The space is write-locked and active throughout.
171 * An object may be locked. Will not allocate memory.
173 * Note: The returned entry must be marked as modified before
174 * releasing the space lock
180 mach_port_name_t
*namep
,
185 mach_port_index_t first_free
;
187 mach_port_name_t new_name
;
189 table
= &space
->is_table
[0];
191 first_free
= table
->ie_next
;
192 assert(first_free
!= 0);
194 entry
= &table
[first_free
];
195 table
->ie_next
= entry
->ie_next
;
196 space
->is_table_free
--;
198 assert(table
->ie_next
< space
->is_table_size
);
201 * Initialize the new entry: increment gencount and reset
202 * rollover point if it rolled over, and clear ie_request.
204 gen
= ipc_entry_new_gen(entry
->ie_bits
);
205 if (__improbable(ipc_entry_gen_rolled(entry
->ie_bits
, gen
))) {
206 ipc_entry_bits_t roll
= ipc_space_get_rollpoint(space
);
207 gen
= ipc_entry_new_rollpoint(roll
);
209 entry
->ie_bits
= gen
;
210 entry
->ie_request
= IE_REQ_NONE
;
213 * The new name can't be MACH_PORT_NULL because index
214 * is non-zero. It can't be MACH_PORT_DEAD because
215 * the table isn't allowed to grow big enough.
216 * (See comment in ipc/ipc_table.h.)
218 new_name
= MACH_PORT_MAKE(first_free
, gen
);
219 assert(MACH_PORT_VALID(new_name
));
227 * Routine: ipc_entry_alloc
229 * Allocate an entry out of the space.
231 * The space is not locked before, but it is write-locked after
232 * if the call is successful. May allocate memory.
234 * KERN_SUCCESS An entry was allocated.
235 * KERN_INVALID_TASK The space is dead.
236 * KERN_NO_SPACE No room for an entry in the space.
237 * KERN_RESOURCE_SHORTAGE Couldn't allocate memory for an entry.
243 mach_port_name_t
*namep
,
248 is_write_lock(space
);
251 if (!is_active(space
)) {
252 is_write_unlock(space
);
253 return KERN_INVALID_TASK
;
256 kr
= ipc_entries_hold(space
, 1);
257 if (kr
== KERN_SUCCESS
) {
258 return ipc_entry_claim(space
, namep
, entryp
);
261 kr
= ipc_entry_grow_table(space
, ITS_SIZE_NONE
);
262 if (kr
!= KERN_SUCCESS
) {
263 return kr
; /* space is unlocked */
269 * Routine: ipc_entry_alloc_name
271 * Allocates/finds an entry with a specific name.
272 * If an existing entry is returned, its type will be nonzero.
274 * The space is not locked before, but it is write-locked after
275 * if the call is successful. May allocate memory.
277 * KERN_SUCCESS Found existing entry with same name.
278 * KERN_SUCCESS Allocated a new entry.
279 * KERN_INVALID_TASK The space is dead.
280 * KERN_RESOURCE_SHORTAGE Couldn't allocate memory.
281 * KERN_FAILURE Couldn't allocate requested name.
285 ipc_entry_alloc_name(
287 mach_port_name_t name
,
290 mach_port_index_t index
= MACH_PORT_INDEX(name
);
291 mach_port_gen_t gen
= MACH_PORT_GEN(name
);
293 if (index
> ipc_table_max_entries()) {
294 return KERN_NO_SPACE
;
297 assert(MACH_PORT_VALID(name
));
300 is_write_lock(space
);
305 if (!is_active(space
)) {
306 is_write_unlock(space
);
307 return KERN_INVALID_TASK
;
311 * If we are under the table cutoff,
312 * there are usually four cases:
313 * 1) The entry is reserved (index 0)
314 * 2) The entry is inuse, for the same name
315 * 3) The entry is inuse, for a different name
316 * 4) The entry is free
317 * For a task with a "fast" IPC space, we disallow
318 * cases 1) and 3), because ports cannot be renamed.
320 if (index
< space
->is_table_size
) {
321 ipc_entry_t table
= space
->is_table
;
323 entry
= &table
[index
];
326 /* case #1 - the entry is reserved */
327 assert(!IE_BITS_TYPE(entry
->ie_bits
));
328 assert(!IE_BITS_GEN(entry
->ie_bits
));
329 is_write_unlock(space
);
331 } else if (IE_BITS_TYPE(entry
->ie_bits
)) {
332 if (IE_BITS_GEN(entry
->ie_bits
) == gen
) {
333 /* case #2 -- the entry is inuse, for the same name */
337 /* case #3 -- the entry is inuse, for a different name. */
338 /* Collisions are not allowed */
339 is_write_unlock(space
);
343 mach_port_index_t free_index
, next_index
;
346 * case #4 -- the entry is free
347 * Rip the entry out of the free list.
351 (next_index
= table
[free_index
].ie_next
)
353 free_index
= next_index
) {
357 table
[free_index
].ie_next
=
358 table
[next_index
].ie_next
;
359 space
->is_table_free
--;
361 /* mark the previous entry modified - reconstructing the name */
362 ipc_entry_modified(space
,
363 MACH_PORT_MAKE(free_index
,
364 IE_BITS_GEN(table
[free_index
].ie_bits
)),
367 entry
->ie_bits
= gen
;
368 entry
->ie_request
= IE_REQ_NONE
;
371 assert(entry
->ie_object
== IO_NULL
);
377 * We grow the table so that the name
378 * index fits in the array space.
379 * Because the space will be unlocked,
383 kr
= ipc_entry_grow_table(space
, index
+ 1);
384 if (kr
!= KERN_SUCCESS
) {
385 /* space is unlocked */
393 * Routine: ipc_entry_dealloc
395 * Deallocates an entry from a space.
397 * The space must be write-locked throughout.
398 * The space must be active.
404 mach_port_name_t name
,
408 ipc_entry_num_t size
;
409 mach_port_index_t index
;
411 assert(is_active(space
));
412 assert(entry
->ie_object
== IO_NULL
);
413 assert(entry
->ie_request
== IE_REQ_NONE
);
416 if (entry
->ie_request
!= IE_REQ_NONE
) {
417 panic("ipc_entry_dealloc()\n");
421 index
= MACH_PORT_INDEX(name
);
422 table
= space
->is_table
;
423 size
= space
->is_table_size
;
425 if ((index
< size
) && (entry
== &table
[index
])) {
426 assert(IE_BITS_GEN(entry
->ie_bits
) == MACH_PORT_GEN(name
));
427 entry
->ie_bits
&= (IE_BITS_GEN_MASK
| IE_BITS_ROLL_MASK
);
428 entry
->ie_next
= table
->ie_next
;
429 table
->ie_next
= index
;
430 space
->is_table_free
++;
433 * Nothing to do. The entry does not match
434 * so there is nothing to deallocate.
436 assert(index
< size
);
437 assert(entry
== &table
[index
]);
438 assert(IE_BITS_GEN(entry
->ie_bits
) == MACH_PORT_GEN(name
));
440 ipc_entry_modified(space
, name
, entry
);
444 * Routine: ipc_entry_modified
446 * Note that an entry was modified in a space.
448 * Assumes exclusive write access to the space,
449 * either through a write lock or being the cleaner
450 * on an inactive space.
456 mach_port_name_t name
,
457 __assert_only ipc_entry_t entry
)
460 ipc_entry_num_t size
;
461 mach_port_index_t index
;
463 index
= MACH_PORT_INDEX(name
);
464 table
= space
->is_table
;
465 size
= space
->is_table_size
;
467 assert(index
< size
);
468 assert(entry
== &table
[index
]);
470 assert(space
->is_low_mod
<= size
);
471 assert(space
->is_high_mod
< size
);
473 if (index
< space
->is_low_mod
) {
474 space
->is_low_mod
= index
;
476 if (index
> space
->is_high_mod
) {
477 space
->is_high_mod
= index
;
480 KERNEL_DEBUG_CONSTANT(
481 MACHDBG_CODE(DBG_MACH_IPC
, MACH_IPC_PORT_ENTRY_MODIFY
) | DBG_FUNC_NONE
,
482 space
->is_task
? task_pid(space
->is_task
) : 0,
489 #define IPC_ENTRY_GROW_STATS 1
490 #if IPC_ENTRY_GROW_STATS
491 static uint64_t ipc_entry_grow_count
= 0;
492 static uint64_t ipc_entry_grow_rescan
= 0;
493 static uint64_t ipc_entry_grow_rescan_max
= 0;
494 static uint64_t ipc_entry_grow_rescan_entries
= 0;
495 static uint64_t ipc_entry_grow_rescan_entries_max
= 0;
496 static uint64_t ipc_entry_grow_freelist_entries
= 0;
497 static uint64_t ipc_entry_grow_freelist_entries_max
= 0;
501 * Routine: ipc_entry_grow_table
503 * Grows the table in a space.
505 * The space must be write-locked and active before.
506 * If successful, the space is also returned locked.
507 * On failure, the space is returned unlocked.
510 * KERN_SUCCESS Grew the table.
511 * KERN_SUCCESS Somebody else grew the table.
512 * KERN_SUCCESS The space died.
513 * KERN_NO_SPACE Table has maximum size already.
514 * KERN_RESOURCE_SHORTAGE Couldn't allocate a new table.
518 ipc_entry_grow_table(
520 ipc_table_elems_t target_size
)
522 ipc_entry_num_t osize
, size
, nsize
, psize
;
524 ipc_entry_t otable
, table
;
525 ipc_table_size_t oits
, its
, nits
;
526 mach_port_index_t i
, free_index
;
527 mach_port_index_t low_mod
, hi_mod
;
528 ipc_table_index_t sanity
;
529 #if IPC_ENTRY_GROW_STATS
530 uint64_t rescan_count
= 0;
532 assert(is_active(space
));
534 if (is_growing(space
)) {
536 * Somebody else is growing the table.
537 * We just wait for them to finish.
540 is_write_sleep(space
);
544 otable
= space
->is_table
;
546 its
= space
->is_table_next
;
547 size
= its
->its_size
;
550 * Since is_table_next points to the next natural size
551 * we can identify the current size entry.
554 osize
= oits
->its_size
;
557 * If there is no target size, then the new size is simply
558 * specified by is_table_next. If there is a target
559 * size, then search for the next entry.
561 if (target_size
!= ITS_SIZE_NONE
) {
562 if (target_size
<= osize
) {
563 /* the space is locked */
568 while ((psize
!= size
) && (target_size
> size
)) {
571 size
= its
->its_size
;
574 is_write_unlock(space
);
575 return KERN_NO_SPACE
;
580 is_write_unlock(space
);
581 return KERN_NO_SPACE
;
585 nsize
= nits
->its_size
;
586 assert((osize
< size
) && (size
<= nsize
));
589 * We'll attempt to grow the table.
591 * Because we will be copying without the space lock, reset
592 * the lowest_mod index to just beyond the end of the current
593 * table. Modification of entries (other than hashes) will
594 * bump this downward, and we only have to reprocess entries
595 * above that mark. Eventually, we'll get done.
597 is_start_growing(space
);
598 space
->is_low_mod
= osize
;
599 space
->is_high_mod
= 0;
600 #if IPC_ENTRY_GROW_STATS
601 ipc_entry_grow_count
++;
603 is_write_unlock(space
);
605 table
= it_entries_alloc(its
);
606 if (table
== IE_NULL
) {
607 is_write_lock(space
);
608 is_done_growing(space
);
609 is_write_unlock(space
);
610 thread_wakeup((event_t
) space
);
611 return KERN_RESOURCE_SHORTAGE
;
614 ipc_space_rand_freelist(space
, table
, osize
, size
);
616 /* clear out old entries in new table */
617 memset((void *)table
, 0, osize
* sizeof(*table
));
623 * Within the range of the table that changed, determine what we
624 * have to take action on. For each entry, take a snapshot of the
625 * corresponding entry in the old table (so it won't change
626 * during this iteration). The snapshot may not be self-consistent
627 * (if we caught it in the middle of being changed), so be very
628 * cautious with the values.
630 for (i
= low_mod
; i
<= hi_mod
; i
++) {
631 ipc_entry_t entry
= &table
[i
];
632 struct ipc_entry osnap
= otable
[i
];
634 if (entry
->ie_object
!= osnap
.ie_object
||
635 IE_BITS_TYPE(entry
->ie_bits
) != IE_BITS_TYPE(osnap
.ie_bits
)) {
636 if (entry
->ie_object
!= IO_NULL
&&
637 IE_BITS_TYPE(entry
->ie_bits
) == MACH_PORT_TYPE_SEND
) {
638 ipc_hash_table_delete(table
, size
, entry
->ie_object
, i
, entry
);
641 entry
->ie_object
= osnap
.ie_object
;
642 entry
->ie_bits
= osnap
.ie_bits
;
643 entry
->ie_request
= osnap
.ie_request
; /* or ie_next */
645 if (entry
->ie_object
!= IO_NULL
&&
646 IE_BITS_TYPE(entry
->ie_bits
) == MACH_PORT_TYPE_SEND
) {
647 ipc_hash_table_insert(table
, size
, entry
->ie_object
, i
, entry
);
650 assert(entry
->ie_object
== osnap
.ie_object
);
651 entry
->ie_bits
= osnap
.ie_bits
;
652 entry
->ie_request
= osnap
.ie_request
; /* or ie_next */
655 table
[0].ie_next
= otable
[0].ie_next
; /* always rebase the freelist */
658 * find the end of the freelist (should be short). But be careful,
659 * the list items can change so only follow through truly free entries
660 * (no problem stopping short in those cases, because we'll rescan).
663 for (sanity
= 0; sanity
< osize
; sanity
++) {
664 if (table
[free_index
].ie_object
!= IPC_OBJECT_NULL
) {
667 i
= table
[free_index
].ie_next
;
668 if (i
== 0 || i
>= osize
) {
673 #if IPC_ENTRY_GROW_STATS
674 ipc_entry_grow_freelist_entries
+= sanity
;
675 if (sanity
> ipc_entry_grow_freelist_entries_max
) {
676 ipc_entry_grow_freelist_entries_max
= sanity
;
680 is_write_lock(space
);
683 * We need to do a wakeup on the space,
684 * to rouse waiting threads. We defer
685 * this until the space is unlocked,
686 * because we don't want them to spin.
689 if (!is_active(space
)) {
691 * The space died while it was unlocked.
694 is_done_growing(space
);
695 is_write_unlock(space
);
696 thread_wakeup((event_t
) space
);
697 it_entries_free(its
, table
);
698 is_write_lock(space
);
702 /* If the space changed while unlocked, go back and process the changes */
703 if (space
->is_low_mod
< osize
) {
704 assert(space
->is_high_mod
> 0);
705 low_mod
= space
->is_low_mod
;
706 space
->is_low_mod
= osize
;
707 hi_mod
= space
->is_high_mod
;
708 space
->is_high_mod
= 0;
709 is_write_unlock(space
);
710 #if IPC_ENTRY_GROW_STATS
712 if (rescan_count
> ipc_entry_grow_rescan_max
) {
713 ipc_entry_grow_rescan_max
= rescan_count
;
716 ipc_entry_grow_rescan
++;
717 ipc_entry_grow_rescan_entries
+= hi_mod
- low_mod
+ 1;
718 if (hi_mod
- low_mod
+ 1 > ipc_entry_grow_rescan_entries_max
) {
719 ipc_entry_grow_rescan_entries_max
= hi_mod
- low_mod
+ 1;
725 /* link new free entries onto the rest of the freelist */
726 assert(table
[free_index
].ie_next
== 0 &&
727 table
[free_index
].ie_object
== IO_NULL
);
728 table
[free_index
].ie_next
= osize
;
730 assert(space
->is_table
== otable
);
731 assert((space
->is_table_next
== its
) ||
732 (target_size
!= ITS_SIZE_NONE
));
733 assert(space
->is_table_size
== osize
);
735 space
->is_table
= table
;
736 space
->is_table_size
= size
;
737 space
->is_table_next
= nits
;
738 space
->is_table_free
+= size
- osize
;
740 is_done_growing(space
);
741 is_write_unlock(space
);
743 thread_wakeup((event_t
) space
);
746 * Now we need to free the old table.
748 it_entries_free(oits
, otable
);
749 is_write_lock(space
);
756 * Routine: ipc_entry_name_mask
758 * Ensure a mach port name has the default ipc entry
759 * generation bits set. This can be used to ensure that
760 * a name passed in by user space matches names generated
765 * 'name' input with default generation bits masked or added
769 ipc_entry_name_mask(mach_port_name_t name
)
772 static mach_port_name_t null_name
= MACH_PORT_MAKE(0, IE_BITS_GEN_MASK
+ IE_BITS_GEN_ONE
);
773 return name
| null_name
;
775 static mach_port_name_t null_name
= MACH_PORT_MAKE(0, ~(IE_BITS_GEN_MASK
+ IE_BITS_GEN_ONE
));
776 return name
& ~null_name
;