]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/ipc/ipc_entry.c
xnu-4570.41.2.tar.gz
[apple/xnu.git] / osfmk / ipc / ipc_entry.c
index 5956602399f409423ba56efd98bd8f3481d79cff..288ea6a5801857470956296ef1c8c343f6932734 100644 (file)
@@ -63,7 +63,6 @@
  *     Primitive functions to manipulate translation entries.
  */
 
-#include <mach_kdb.h>
 #include <mach_debug.h>
 
 #include <mach/kern_return.h>
 #include <kern/sched_prim.h>
 #include <kern/zalloc.h>
 #include <kern/misc_protos.h>
-#if MACH_KDB
-#include <kern/task.h>
-#endif
 #include <ipc/port.h>
 #include <ipc/ipc_entry.h>
 #include <ipc/ipc_space.h>
-#include <ipc/ipc_splay.h>
 #include <ipc/ipc_object.h>
 #include <ipc/ipc_hash.h>
 #include <ipc/ipc_table.h>
 #include <ipc/ipc_port.h>
 #include <string.h>
 
-zone_t ipc_tree_entry_zone;
-
-
-
-/*
- * Forward declarations
- */
-boolean_t ipc_entry_tree_collision(
-       ipc_space_t             space,
-       mach_port_name_t        name);
-
-/*
- *     Routine:        ipc_entry_tree_collision
- *     Purpose:
- *             Checks if "name" collides with an allocated name
- *             in the space's tree.  That is, returns TRUE
- *             if the splay tree contains a name with the same
- *             index as "name".
- *     Conditions:
- *             The space is locked (read or write) and active.
- */
-
-boolean_t
-ipc_entry_tree_collision(
-       ipc_space_t             space,
-       mach_port_name_t        name)
-{
-       mach_port_index_t index;
-       mach_port_name_t lower, upper;
-
-       assert(space->is_active);
-
-       /*
-        *      Check if we collide with the next smaller name
-        *      or the next larger name.
-        */
-
-       ipc_splay_tree_bounds(&space->is_tree, name, &lower, &upper);
-
-       index = MACH_PORT_INDEX(name);
-       return (((lower != (mach_port_name_t)~0) && 
-                (MACH_PORT_INDEX(lower) == index)) ||
-               ((upper != 0) && (MACH_PORT_INDEX(upper) == index)));
-}
-
 /*
  *     Routine:        ipc_entry_lookup
  *     Purpose:
@@ -147,112 +97,149 @@ ipc_entry_lookup(
        mach_port_index_t index;
        ipc_entry_t entry;
 
-       assert(space->is_active);
+       assert(is_active(space));
 
-                       
        index = MACH_PORT_INDEX(name);
-       /*
-        * If space is fast, we assume no splay tree and name within table
-        * bounds, but still check generation numbers (if enabled) and
-        * look for null entries.
-        */
-       if (is_fast_space(space)) {
-               entry = &space->is_table[index];
+       if (index <  space->is_table_size) {
+                entry = &space->is_table[index];
                if (IE_BITS_GEN(entry->ie_bits) != MACH_PORT_GEN(name) ||
-                   IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE)
-                       entry = IE_NULL;
+                   IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE) {
+                       entry = IE_NULL;                
+               }
        }
-       else
-       if (index < space->is_table_size) {
-               entry = &space->is_table[index];
-               if (IE_BITS_GEN(entry->ie_bits) != MACH_PORT_GEN(name))
-                       if (entry->ie_bits & IE_BITS_COLLISION) {
-                               assert(space->is_tree_total > 0);
-                               goto tree_lookup;
-                       } else
-                               entry = IE_NULL;
-               else if (IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE)
-                       entry = IE_NULL;
-       } else if (space->is_tree_total == 0)
-               entry = IE_NULL;
        else {
-           tree_lookup:
-               entry = (ipc_entry_t)
-                               ipc_splay_tree_lookup(&space->is_tree, name);
-               /* with sub-space introduction, an entry may appear in      */
-               /* the splay tree and yet not show rights for this subspace */
-               if(entry != IE_NULL)  {
-                       if(!(IE_BITS_TYPE(entry->ie_bits)))
-                               entry = IE_NULL; 
-               }
+               entry = IE_NULL;
        }
 
        assert((entry == IE_NULL) || IE_BITS_TYPE(entry->ie_bits));
        return entry;
 }
 
+
 /*
- *     Routine:        ipc_entry_get
+ *     Routine:        ipc_entries_hold
  *     Purpose:
- *             Tries to allocate an entry out of the space.
+ *             Verifies that there are at least 'entries_needed'
+ *             free list members
  *     Conditions:
  *             The space is write-locked and active throughout.
  *             An object may be locked.  Will not allocate memory.
  *     Returns:
- *             KERN_SUCCESS            A free entry was found.
+ *             KERN_SUCCESS            Free entries were found.
  *             KERN_NO_SPACE           No entry allocated.
  */
 
 kern_return_t
-ipc_entry_get(
+ipc_entries_hold(
+       ipc_space_t             space,
+       uint32_t                entries_needed)
+{
+
+       ipc_entry_t table;
+       mach_port_index_t next_free = 0;
+       uint32_t i;
+
+       assert(is_active(space));
+
+       table = &space->is_table[0];
+
+       for (i = 0; i < entries_needed; i++) {
+               next_free = table[next_free].ie_next;
+               if (next_free == 0) {
+                       return KERN_NO_SPACE;
+               }
+               assert(next_free < space->is_table_size);
+               assert(table[next_free].ie_object == IO_NULL);
+       }
+       return KERN_SUCCESS;
+}
+
+/*
+ *     Routine:        ipc_entry_claim
+ *     Purpose:
+ *             Take formal ownership of a held entry.
+ *     Conditions:
+ *             The space is write-locked and active throughout.
+ *             An object may be locked.  Will not allocate memory.
+ *
+ *     Note: The returned entry must be marked as modified before
+ *           releasing the space lock
+ */
+
+kern_return_t
+ipc_entry_claim(
        ipc_space_t             space,
        mach_port_name_t        *namep,
        ipc_entry_t             *entryp)
 {
+       ipc_entry_t entry;
        ipc_entry_t table;
        mach_port_index_t first_free;
-       ipc_entry_t free_entry;
+       mach_port_gen_t gen;
+       mach_port_name_t new_name;
 
-       assert(space->is_active);
+       table = &space->is_table[0];
 
-       {
-               table = space->is_table;
-               first_free = table->ie_next;
+       first_free = table->ie_next;
+       assert(first_free != 0);
 
-               if (first_free == 0)
-                       return KERN_NO_SPACE;
+       entry = &table[first_free];
+       table->ie_next = entry->ie_next;
+       space->is_table_free--;
+
+       assert(table->ie_next < space->is_table_size);
 
-               free_entry = &table[first_free];
-               table->ie_next = free_entry->ie_next;
+       /*
+        *      Initialize the new entry: increment gencount and reset
+        *      rollover point if it rolled over, and clear ie_request.
+        */
+       gen = ipc_entry_new_gen(entry->ie_bits);
+       if (__improbable(ipc_entry_gen_rolled(entry->ie_bits, gen))) {
+               ipc_entry_bits_t roll = ipc_space_get_rollpoint(space);
+               gen = ipc_entry_new_rollpoint(roll);
        }
+       entry->ie_bits = gen;
+       entry->ie_request = IE_REQ_NONE;
 
        /*
-        *      Initialize the new entry.  We need only
-        *      increment the generation number and clear ie_request.
+        *      The new name can't be MACH_PORT_NULL because index
+        *      is non-zero.  It can't be MACH_PORT_DEAD because
+        *      the table isn't allowed to grow big enough.
+        *      (See comment in ipc/ipc_table.h.)
         */
-       {
-               mach_port_name_t new_name;
-               mach_port_gen_t gen;
+       new_name = MACH_PORT_MAKE(first_free, gen);
+       assert(MACH_PORT_VALID(new_name));
+       *namep = new_name;
+       *entryp = entry;
 
-               gen = IE_BITS_NEW_GEN(free_entry->ie_bits);
-               free_entry->ie_bits = gen;
-               free_entry->ie_request = IE_REQ_NONE;
+       return KERN_SUCCESS;
+}
 
-               /*
-                *      The new name can't be MACH_PORT_NULL because index
-                *      is non-zero.  It can't be MACH_PORT_DEAD because
-                *      the table isn't allowed to grow big enough.
-                *      (See comment in ipc/ipc_table.h.)
-                */
-               new_name = MACH_PORT_MAKE(first_free, gen);
-               assert(MACH_PORT_VALID(new_name));
-               *namep = new_name;
-       }
+/*
+ *     Routine:        ipc_entry_get
+ *     Purpose:
+ *             Tries to allocate an entry out of the space.
+ *     Conditions:
+ *             The space is write-locked and active throughout.
+ *             An object may be locked.  Will not allocate memory.
+ *     Returns:
+ *             KERN_SUCCESS            A free entry was found.
+ *             KERN_NO_SPACE           No entry allocated.
+ */
 
-       assert(free_entry->ie_object == IO_NULL);
+kern_return_t
+ipc_entry_get(
+       ipc_space_t             space,
+       mach_port_name_t        *namep,
+       ipc_entry_t             *entryp)
+{
+       kern_return_t kr;
 
-       *entryp = free_entry;
-       return KERN_SUCCESS;
+       kr = ipc_entries_hold(space, 1);
+       if (KERN_SUCCESS != kr)
+               return kr;
+
+       return ipc_entry_claim(space, namep, entryp);
 }
 
 /*
@@ -280,7 +267,7 @@ ipc_entry_alloc(
        is_write_lock(space);
 
        for (;;) {
-               if (!space->is_active) {
+               if (!is_active(space)) {
                        is_write_unlock(space);
                        return KERN_INVALID_TASK;
                }
@@ -308,6 +295,7 @@ ipc_entry_alloc(
  *             KERN_SUCCESS            Allocated a new entry.
  *             KERN_INVALID_TASK       The space is dead.
  *             KERN_RESOURCE_SHORTAGE  Couldn't allocate memory.
+ *             KERN_FAILURE            Couldn't allocate requested name.
  */
 
 kern_return_t
@@ -318,7 +306,9 @@ ipc_entry_alloc_name(
 {
        mach_port_index_t index = MACH_PORT_INDEX(name);
        mach_port_gen_t gen = MACH_PORT_GEN(name);
-       ipc_tree_entry_t tentry = ITE_NULL;
+
+       if (index > ipc_table_max_entries())
+               return KERN_NO_SPACE;
 
        assert(MACH_PORT_VALID(name));
 
@@ -327,12 +317,9 @@ ipc_entry_alloc_name(
 
        for (;;) {
                ipc_entry_t entry;
-               ipc_tree_entry_t tentry2;
-               ipc_table_size_t its;
 
-               if (!space->is_active) {
+               if (!is_active(space)) {
                        is_write_unlock(space);
-                       if (tentry) ite_free(tentry);
                        return KERN_INVALID_TASK;
                }
 
@@ -352,18 +339,27 @@ ipc_entry_alloc_name(
                        entry = &table[index];
 
                        if (index == 0) {
+                               /* case #1 - the entry is reserved */
                                assert(!IE_BITS_TYPE(entry->ie_bits));
                                assert(!IE_BITS_GEN(entry->ie_bits));
+                               is_write_unlock(space);                         
+                               return KERN_FAILURE;
                        } else if (IE_BITS_TYPE(entry->ie_bits)) {
                                if (IE_BITS_GEN(entry->ie_bits) == gen) {
+                                       /* case #2 -- the entry is inuse, for the same name */
                                        *entryp = entry;
-                                       assert(!tentry);
                                        return KERN_SUCCESS;
+                               } else {
+                                       /* case #3 -- the entry is inuse, for a different name. */
+                                       /* Collisions are not allowed */
+                                       is_write_unlock(space);                                 
+                                       return KERN_FAILURE;
                                }
                        } else {
                                mach_port_index_t free_index, next_index;
 
                                /*
+                                *      case #4 -- the entry is free
                                 *      Rip the entry out of the free list.
                                 */
 
@@ -375,123 +371,37 @@ ipc_entry_alloc_name(
 
                                table[free_index].ie_next =
                                        table[next_index].ie_next;
+                               space->is_table_free--;
+
+                               /* mark the previous entry modified - reconstructing the name */
+                               ipc_entry_modified(space, 
+                                                  MACH_PORT_MAKE(free_index, 
+                                                       IE_BITS_GEN(table[free_index].ie_bits)),
+                                                  &table[free_index]);
 
                                entry->ie_bits = gen;
                                entry->ie_request = IE_REQ_NONE;
                                *entryp = entry;
 
                                assert(entry->ie_object == IO_NULL);
-                               if (is_fast_space(space))
-                                       assert(!tentry);
-                               else if (tentry)
-                                       ite_free(tentry);
                                return KERN_SUCCESS;
                        }
                }
 
                /*
-                * In a fast space, ipc_entry_alloc_name may be
-                * used only to add a right to a port name already
-                * known in this space.
-                */
-               if (is_fast_space(space)) {
-                       is_write_unlock(space);
-                       assert(!tentry);
-                       return KERN_FAILURE;
-               }
-
-               /*
-                *      Before trying to allocate any memory,
-                *      check if the entry already exists in the tree.
-                *      This avoids spurious resource errors.
-                *      The splay tree makes a subsequent lookup/insert
-                *      of the same name cheap, so this costs little.
-                */
-
-               if ((space->is_tree_total > 0) &&
-                   ((tentry2 = ipc_splay_tree_lookup(&space->is_tree, name))
-                    != ITE_NULL)) {
-                       assert(tentry2->ite_space == space);
-                       assert(IE_BITS_TYPE(tentry2->ite_bits));
-
-                       *entryp = &tentry2->ite_entry;
-                       if (tentry) ite_free(tentry);
-                       return KERN_SUCCESS;
-               }
-
-               its = space->is_table_next;
-
-               /*
-                *      Check if the table should be grown.
-                *
-                *      Note that if space->is_table_size == its->its_size,
-                *      then we won't ever try to grow the table.
-                *
-                *      Note that we are optimistically assuming that name
-                *      doesn't collide with any existing names.  (So if
-                *      it were entered into the tree, is_tree_small would
-                *      be incremented.)  This is OK, because even in that
-                *      case, we don't lose memory by growing the table.
+                *      We grow the table so that the name
+                *      index fits in the array space.
+                *      Because the space will be unlocked,
+                *      we must restart.
                 */
-               if ((space->is_table_size <= index) &&
-                   (index < its->its_size) &&
-                   (((its->its_size - space->is_table_size) *
-                     sizeof(struct ipc_entry)) <
-                    ((space->is_tree_small + 1) *
-                     sizeof(struct ipc_tree_entry)))) {
-                       kern_return_t kr;
-
-                       /*
-                        *      Can save space by growing the table.
-                        *      Because the space will be unlocked,
-                        *      we must restart.
-                        */
-
-                       kr = ipc_entry_grow_table(space, ITS_SIZE_NONE);
-                       assert(kr != KERN_NO_SPACE);
-                       if (kr != KERN_SUCCESS) {
-                               /* space is unlocked */
-                               if (tentry) ite_free(tentry);
-                               return kr;
-                       }
-
-                       continue;
-               }
-
-               /*
-                *      If a splay-tree entry was allocated previously,
-                *      go ahead and insert it into the tree.
-                */
-
-               if (tentry != ITE_NULL) {
-
-                       space->is_tree_total++;
-
-                       if (index < space->is_table_size) {
-                               entry = &space->is_table[index];
-                               entry->ie_bits |= IE_BITS_COLLISION;
-                       } else if ((index < its->its_size) &&
-                                  !ipc_entry_tree_collision(space, name))
-                               space->is_tree_small++;
-
-                       ipc_splay_tree_insert(&space->is_tree, name, tentry);
-                       tentry->ite_bits = 0;
-                       tentry->ite_request = 0;
-                       tentry->ite_object = IO_NULL;
-                       tentry->ite_space = space;
-                       *entryp = &tentry->ite_entry;
-                       return KERN_SUCCESS;
+                kern_return_t kr;
+               kr = ipc_entry_grow_table(space, index + 1);
+               assert(kr != KERN_NO_SPACE);
+               if (kr != KERN_SUCCESS) {
+                       /* space is unlocked */
+                       return kr;
                }
-
-               /*
-                *      Allocate a tree entry and try again.
-                */
-
-               is_write_unlock(space);
-               tentry = ite_alloc();
-               if (tentry == ITE_NULL)
-                       return KERN_RESOURCE_SHORTAGE;
-               is_write_lock(space);
+               continue;
        }
 }
 
@@ -514,7 +424,7 @@ ipc_entry_dealloc(
        ipc_entry_num_t size;
        mach_port_index_t index;
 
-       assert(space->is_active);
+       assert(is_active(space));
        assert(entry->ie_object == IO_NULL);
        assert(entry->ie_request == IE_REQ_NONE);
 
@@ -527,113 +437,79 @@ ipc_entry_dealloc(
        table = space->is_table;
        size = space->is_table_size;
 
-       if (is_fast_space(space)) {
-               assert(index < size);
-               assert(entry == &table[index]);
+       if ((index < size) && (entry == &table[index])) {
                assert(IE_BITS_GEN(entry->ie_bits) == MACH_PORT_GEN(name));
-               assert(!(entry->ie_bits & IE_BITS_COLLISION));
-               entry->ie_bits &= IE_BITS_GEN_MASK;
+               entry->ie_bits &= (IE_BITS_GEN_MASK | IE_BITS_ROLL_MASK);
                entry->ie_next = table->ie_next;
                table->ie_next = index;
-               return;
-       }
-
-
-       if ((index < size) && (entry == &table[index])) {
-               assert(IE_BITS_GEN(entry->ie_bits) == MACH_PORT_GEN(name));
-
-               if (entry->ie_bits & IE_BITS_COLLISION) {
-                       struct ipc_splay_tree small, collisions;
-                       ipc_tree_entry_t tentry;
-                       mach_port_name_t tname;
-                       boolean_t pick;
-                       ipc_object_t obj;
-
-                       /* must move an entry from tree to table */
-
-                       ipc_splay_tree_split(&space->is_tree,
-                                            MACH_PORT_MAKE(index+1, 0),
-                                            &collisions);
-                       ipc_splay_tree_split(&collisions,
-                                            MACH_PORT_MAKE(index, 0),
-                                            &small);
-
-                       pick = ipc_splay_tree_pick(&collisions,
-                                                  &tname, &tentry);
-                       assert(pick);
-                       assert(MACH_PORT_INDEX(tname) == index);
-
-                       entry->ie_object = obj = tentry->ite_object;
-                       entry->ie_bits = tentry->ite_bits|MACH_PORT_GEN(tname);
-                       entry->ie_request = tentry->ite_request;
-
-                       assert(tentry->ite_space == space);
-
-                       if (IE_BITS_TYPE(tentry->ite_bits)==MACH_PORT_TYPE_SEND) {
-                               ipc_hash_global_delete(space, obj,
-                                                      tname, tentry);
-                               ipc_hash_local_insert(space, obj,
-                                                     index, entry);
-                       }
-
-                       ipc_splay_tree_delete(&collisions, tname, tentry);
-
-                       assert(space->is_tree_total > 0);
-                       space->is_tree_total--;
-
-                       /* check if collision bit should still be on */
-
-                       pick = ipc_splay_tree_pick(&collisions,
-                                                  &tname, &tentry);
-                       if (pick) {
-                               entry->ie_bits |= IE_BITS_COLLISION;
-                               ipc_splay_tree_join(&space->is_tree,
-                                                   &collisions);
-                       }
-
-                       ipc_splay_tree_join(&space->is_tree, &small);
-
-               } else {
-                       entry->ie_bits &= IE_BITS_GEN_MASK;
-                       entry->ie_next = table->ie_next;
-                       table->ie_next = index;
-               }
-
+               space->is_table_free++;
        } else {
-               ipc_tree_entry_t tentry = (ipc_tree_entry_t) entry;
-
-               assert(tentry->ite_space == space);
-
-               ipc_splay_tree_delete(&space->is_tree, name, tentry);
+               /*
+                * Nothing to do.  The entry does not match
+                * so there is nothing to deallocate.
+                */
+                assert(index < size);
+               assert(entry == &table[index]);
+               assert(IE_BITS_GEN(entry->ie_bits) == MACH_PORT_GEN(name));
+       }
+       ipc_entry_modified(space, name, entry);
+}
 
-               assert(space->is_tree_total > 0);
-               space->is_tree_total--;
+/*
+ *     Routine:        ipc_entry_modified
+ *     Purpose:
+ *             Note that an entry was modified in a space.
+ *     Conditions:
+ *             Assumes exclusive write access to the space,
+ *             either through a write lock or being the cleaner
+ *             on an inactive space.
+ */
 
-               if (index < size) {
-                       ipc_entry_t ientry = &table[index];
+void
+ipc_entry_modified(
+       ipc_space_t             space,
+       mach_port_name_t        name,
+       __assert_only ipc_entry_t entry)
+{
+       ipc_entry_t table;
+       ipc_entry_num_t size;
+       mach_port_index_t index;
 
-                       assert(ientry->ie_bits & IE_BITS_COLLISION);
-                       
-                       if (!ipc_entry_tree_collision(space, name))
-                               ientry->ie_bits &= ~IE_BITS_COLLISION;
+       index = MACH_PORT_INDEX(name);
+       table = space->is_table;
+       size = space->is_table_size;
 
-               } else if ((index < space->is_table_next->its_size) &&
-                          !ipc_entry_tree_collision(space, name)) {
+       assert(index < size);
+       assert(entry == &table[index]);
 
-                       assert(space->is_tree_small > 0);
+       assert(space->is_low_mod <= size);
+       assert(space->is_high_mod < size);
 
-                       space->is_tree_small--;
-               }
-       }
+       if (index < space->is_low_mod)
+               space->is_low_mod = index;
+       if (index > space->is_high_mod)
+               space->is_high_mod = index;
 }
 
+#define IPC_ENTRY_GROW_STATS 1
+#if IPC_ENTRY_GROW_STATS
+static uint64_t ipc_entry_grow_count = 0;
+static uint64_t ipc_entry_grow_rescan = 0;
+static uint64_t ipc_entry_grow_rescan_max = 0;
+static uint64_t ipc_entry_grow_rescan_entries = 0;
+static uint64_t ipc_entry_grow_rescan_entries_max = 0;
+static uint64_t        ipc_entry_grow_freelist_entries = 0;
+static uint64_t        ipc_entry_grow_freelist_entries_max = 0;
+#endif
+
 /*
  *     Routine:        ipc_entry_grow_table
  *     Purpose:
  *             Grows the table in a space.
  *     Conditions:
  *             The space must be write-locked and active before.
- *             If successful, it is also returned locked.
+ *             If successful, the space is also returned locked.
+ *             On failure, the space is returned unlocked.
  *             Allocates memory.
  *     Returns:
  *             KERN_SUCCESS            Grew the table.
@@ -650,342 +526,253 @@ ipc_entry_grow_table(
 {
        ipc_entry_num_t osize, size, nsize, psize;
 
-       do {
-               boolean_t         reallocated=FALSE;
-       
-               ipc_entry_t otable, table;
-               ipc_table_size_t oits, its, nits;
-               mach_port_index_t i, free_index;
-
-               assert(space->is_active);
+       ipc_entry_t otable, table;
+       ipc_table_size_t oits, its, nits;
+       mach_port_index_t i, free_index;
+       mach_port_index_t low_mod, hi_mod;
+       ipc_table_index_t sanity;
+#if IPC_ENTRY_GROW_STATS
+       uint64_t rescan_count = 0;
+#endif
+       assert(is_active(space));
 
-               if (space->is_growing) {
-                       /*
-                        *      Somebody else is growing the table.
-                        *      We just wait for them to finish.
-                        */
+       if (is_growing(space)) {
+               /*
+                *      Somebody else is growing the table.
+                *      We just wait for them to finish.
+                */
 
-                       is_write_sleep(space);
-                       return KERN_SUCCESS;
-               }
+               is_write_sleep(space);
+               return KERN_SUCCESS;
+       }
 
-               otable = space->is_table;
+       otable = space->is_table;
                
-               its = space->is_table_next;
-               size = its->its_size;
+       its = space->is_table_next;
+       size = its->its_size;
                
-               /*
-                * Since is_table_next points to the next natural size
-                * we can identify the current size entry.
-                */
-               oits = its - 1;
-               osize = oits->its_size;
+       /*
+        * Since is_table_next points to the next natural size
+        * we can identify the current size entry.
+        */
+       oits = its - 1;
+       osize = oits->its_size;
                
-               /*
-                * If there is no target size, then the new size is simply
-                * specified by is_table_next.  If there is a target
-                * size, then search for the next entry.
-                */
-               if (target_size != ITS_SIZE_NONE) {
-                       if (target_size <= osize) {
-                               is_write_unlock(space);
-                               return KERN_SUCCESS;
-                       }
-
-                       psize = osize;
-                       while ((psize != size) && (target_size > size)) {
-                               psize = size;
-                               its++;
-                               size = its->its_size;
-                       }
-                       if (psize == size) {
-                               is_write_unlock(space);
-                               return KERN_NO_SPACE;
-                       }
+       /*
+        * If there is no target size, then the new size is simply
+        * specified by is_table_next.  If there is a target
+        * size, then search for the next entry.
+        */
+       if (target_size != ITS_SIZE_NONE) {
+               if (target_size <= osize) {
+                       /* the space is locked */                       
+                       return KERN_SUCCESS;
                }
 
-               if (osize == size) {
+               psize = osize;
+               while ((psize != size) && (target_size > size)) {
+                       psize = size;
+                       its++;
+                       size = its->its_size;
+               }
+               if (psize == size) {
                        is_write_unlock(space);
                        return KERN_NO_SPACE;
                }
-               nits = its + 1;
-               nsize = nits->its_size;
-
-               assert((osize < size) && (size <= nsize));
-
-               /*
-                *      OK, we'll attempt to grow the table.
-                *      The realloc requires that the old table
-                *      remain in existence.
-                */
+       }
 
-               space->is_growing = TRUE;
+       if (osize == size) {
                is_write_unlock(space);
+               return KERN_NO_SPACE;
+       }
+       nits = its + 1;
+       nsize = nits->its_size;
+       assert((osize < size) && (size <= nsize));
 
-               if (it_entries_reallocable(oits)) {
-                       table = it_entries_realloc(oits, otable, its);
-                       reallocated=TRUE;
-               }
-               else {
-                       table = it_entries_alloc(its);
-               }
+       /*
+        * We'll attempt to grow the table.
+        *
+        * Because we will be copying without the space lock, reset
+        * the lowest_mod index to just beyond the end of the current
+        * table.  Modification of entries (other than hashes) will
+        * bump this downward, and we only have to reprocess entries
+        * above that mark.  Eventually, we'll get done.
+        */
+       is_start_growing(space);
+       space->is_low_mod = osize;
+       space->is_high_mod = 0;
+#if IPC_ENTRY_GROW_STATS
+       ipc_entry_grow_count++;
+#endif
+       is_write_unlock(space);
 
+       table = it_entries_alloc(its);
+       if (table == IE_NULL) {
                is_write_lock(space);
-               space->is_growing = FALSE;
-
-               /*
-                *      We need to do a wakeup on the space,
-                *      to rouse waiting threads.  We defer
-                *      this until the space is unlocked,
-                *      because we don't want them to spin.
-                */
+               is_done_growing(space);
+               is_write_unlock(space);
+               thread_wakeup((event_t) space);
+               return KERN_RESOURCE_SHORTAGE;
+       }
 
-               if (table == IE_NULL) {
-                       is_write_unlock(space);
-                       thread_wakeup((event_t) space);
-                       return KERN_RESOURCE_SHORTAGE;
-               }
+       ipc_space_rand_freelist(space, table, osize, size);
 
-               if (!space->is_active) {
-                       /*
-                        *      The space died while it was unlocked.
-                        */
+       /* clear out old entries in new table */
+       memset((void *)table, 0, osize * sizeof(*table));
 
-                       is_write_unlock(space);
-                       thread_wakeup((event_t) space);
-                       it_entries_free(its, table);
-                       is_write_lock(space);
-                       return KERN_SUCCESS;
-               }
-
-               assert(space->is_table == otable);
-               assert((space->is_table_next == its) ||
-                      (target_size != ITS_SIZE_NONE));
-               assert(space->is_table_size == osize);
+       low_mod = 0;
+       hi_mod = osize - 1;
+ rescan:       
+       /*
+        * Within the range of the table that changed, determine what we
+        * have to take action on. For each entry, take a snapshot of the
+        * corresponding entry in the old table (so it won't change
+        * during this iteration). The snapshot may not be self-consistent
+        * (if we caught it in the middle of being changed), so be very
+        * cautious with the values.
+        */
+       for (i = low_mod; i <= hi_mod; i++) {
+               ipc_entry_t entry = &table[i];
+               struct ipc_entry osnap = otable[i]; 
 
-               space->is_table = table;
-               space->is_table_size = size;
-               space->is_table_next = nits;
+               if (entry->ie_object != osnap.ie_object ||
+                   IE_BITS_TYPE(entry->ie_bits) != IE_BITS_TYPE(osnap.ie_bits)) {
+                       
+                       if (entry->ie_object != IO_NULL &&
+                           IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_SEND)
+                               ipc_hash_table_delete(table, size, entry->ie_object, i, entry);
 
-               /*
-                *      If we did a realloc, it remapped the data.
-                *      Otherwise we copy by hand first.  Then we have
-                *      to zero the new part and the old local hash
-                *   values.
-                */
-               if (!reallocated) 
-                       (void) memcpy((void *) table, (const void *) otable,
-                             osize * (sizeof(struct ipc_entry)));
+                       entry->ie_object = osnap.ie_object;
+                       entry->ie_bits = osnap.ie_bits;
+                       entry->ie_request = osnap.ie_request; /* or ie_next */
 
-               for (i = 0; i < osize; i++)
-                       table[i].ie_index = 0;
+                       if (entry->ie_object != IO_NULL &&
+                           IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_SEND)
+                               ipc_hash_table_insert(table, size, entry->ie_object, i, entry);
+               } else {
+                       assert(entry->ie_object == osnap.ie_object);
+                       entry->ie_bits = osnap.ie_bits;
+                       entry->ie_request = osnap.ie_request; /* or ie_next */
+               }
 
-               (void) memset((void *) (table + osize) , 0,
-                       ((size - osize) * (sizeof(struct ipc_entry))));
+       }
+       table[0].ie_next = otable[0].ie_next;  /* always rebase the freelist */
 
-               /*
-                *      Put old entries into the reverse hash table.
-                */
-               for (i = 0; i < osize; i++) {
-                       ipc_entry_t entry = &table[i];
+       /*
+        * find the end of the freelist (should be short). But be careful,
+        * the list items can change so only follow through truly free entries
+        * (no problem stopping short in those cases, because we'll rescan).
+        */
+       free_index = 0;
+       for (sanity = 0; sanity < osize; sanity++) {
+               if (table[free_index].ie_object != IPC_OBJECT_NULL)
+                       break;
+               i = table[free_index].ie_next;
+               if (i == 0 || i >= osize)
+                       break;
+               free_index = i;
+       }
+#if IPC_ENTRY_GROW_STATS
+       ipc_entry_grow_freelist_entries += sanity;
+       if (sanity > ipc_entry_grow_freelist_entries_max)
+               ipc_entry_grow_freelist_entries_max = sanity;
+#endif
+               
+       is_write_lock(space);
 
-                       if (IE_BITS_TYPE(entry->ie_bits)==MACH_PORT_TYPE_SEND) {
-                               ipc_hash_local_insert(space, entry->ie_object,
-                                                     i, entry);
-                       }
-               }
+       /*
+        *      We need to do a wakeup on the space,
+        *      to rouse waiting threads.  We defer
+        *      this until the space is unlocked,
+        *      because we don't want them to spin.
+        */
 
+       if (!is_active(space)) {
                /*
-                *      If there are entries in the splay tree,
-                *      then we have work to do:
-                *              1) transfer entries to the table
-                *              2) update is_tree_small
+                *      The space died while it was unlocked.
                 */
-               assert(!is_fast_space(space) || space->is_tree_total == 0);
-               if (space->is_tree_total > 0) {
-                       mach_port_index_t index;
-                       boolean_t delete;
-                       struct ipc_splay_tree ignore;
-                       struct ipc_splay_tree move;
-                       struct ipc_splay_tree small;
-                       ipc_entry_num_t nosmall;
-                       ipc_tree_entry_t tentry;
-
-                       /*
-                        *      The splay tree divides into four regions,
-                        *      based on the index of the entries:
-                        *              1) 0 <= index < osize
-                        *              2) osize <= index < size
-                        *              3) size <= index < nsize
-                        *              4) nsize <= index
-                        *
-                        *      Entries in the first part are ignored.
-                        *      Entries in the second part, that don't
-                        *      collide, are moved into the table.
-                        *      Entries in the third part, that don't
-                        *      collide, are counted for is_tree_small.
-                        *      Entries in the fourth part are ignored.
-                        */
-
-                       ipc_splay_tree_split(&space->is_tree,
-                                            MACH_PORT_MAKE(nsize, 0),
-                                            &small);
-                       ipc_splay_tree_split(&small,
-                                            MACH_PORT_MAKE(size, 0),
-                                            &move);
-                       ipc_splay_tree_split(&move,
-                                            MACH_PORT_MAKE(osize, 0),
-                                            &ignore);
-
-                       /* move entries into the table */
-
-                       for (tentry = ipc_splay_traverse_start(&move);
-                            tentry != ITE_NULL;
-                            tentry = ipc_splay_traverse_next(&move, delete)) {
-
-                               mach_port_name_t name;
-                               mach_port_gen_t gen;
-                               mach_port_type_t type;
-                               ipc_entry_bits_t bits;
-                               ipc_object_t obj;
-                               ipc_entry_t entry;
-
-                               name = tentry->ite_name;
-                               gen = MACH_PORT_GEN(name);
-                               index = MACH_PORT_INDEX(name);
-
-                               assert(tentry->ite_space == space);
-                               assert((osize <= index) && (index < size));
-
-                               entry = &table[index];
-                               bits = entry->ie_bits;
-                               if (IE_BITS_TYPE(bits)) {
-                                       assert(IE_BITS_GEN(bits) != gen);
-                                       entry->ie_bits |= IE_BITS_COLLISION;
-                                       delete = FALSE;
-                                       continue;
-                               }
-                               
-                               bits = tentry->ite_bits;
-                               type = IE_BITS_TYPE(bits);
-                               assert(type != MACH_PORT_TYPE_NONE);
-
-                               entry->ie_bits = bits | gen;
-                               entry->ie_request = tentry->ite_request;
-                               entry->ie_object = obj = tentry->ite_object;
-
-                               if (type == MACH_PORT_TYPE_SEND) {
-                                       ipc_hash_global_delete(space, obj,
-                                                              name, tentry);
-                                       ipc_hash_local_insert(space, obj,
-                                                             index, entry);
-                               }
-                               space->is_tree_total--;
-                               delete = TRUE;
-                       }
-                       ipc_splay_traverse_finish(&move);
 
-                       /* count entries for is_tree_small */
-
-                       nosmall = 0; index = 0;
-                       for (tentry = ipc_splay_traverse_start(&small);
-                            tentry != ITE_NULL;
-                            tentry = ipc_splay_traverse_next(&small, FALSE)) {
-                               mach_port_index_t nindex;
-
-                               nindex = MACH_PORT_INDEX(tentry->ite_name);
-
-                               if (nindex != index) {
-                                       nosmall++;
-                                       index = nindex;
-                               }
-                       }
-                       ipc_splay_traverse_finish(&small);
-
-                       assert(nosmall <= (nsize - size));
-                       assert(nosmall <= space->is_tree_total);
-                       space->is_tree_small = nosmall;
-
-                       /* put the splay tree back together */
+               is_done_growing(space);
+               is_write_unlock(space);
+               thread_wakeup((event_t) space);
+               it_entries_free(its, table);
+               is_write_lock(space);
+               return KERN_SUCCESS;
+       }
 
-                       ipc_splay_tree_join(&space->is_tree, &small);
-                       ipc_splay_tree_join(&space->is_tree, &move);
-                       ipc_splay_tree_join(&space->is_tree, &ignore);
-               }
+       /* If the space changed while unlocked, go back and process the changes */
+       if (space->is_low_mod < osize) {
+               assert(space->is_high_mod > 0);
+               low_mod = space->is_low_mod;
+               space->is_low_mod = osize;
+               hi_mod = space->is_high_mod;
+               space->is_high_mod = 0;
+               is_write_unlock(space);
+#if IPC_ENTRY_GROW_STATS
+               rescan_count++;
+               if (rescan_count > ipc_entry_grow_rescan_max)
+                       ipc_entry_grow_rescan_max = rescan_count;
+
+               ipc_entry_grow_rescan++;
+               ipc_entry_grow_rescan_entries += hi_mod - low_mod + 1;
+               if (hi_mod - low_mod + 1 > ipc_entry_grow_rescan_entries_max)
+                       ipc_entry_grow_rescan_entries_max = hi_mod - low_mod + 1;
+#endif
+               goto rescan;
+       }
 
-               /*
-                *      Add entries in the new part which still aren't used
-                *      to the free list.  Add them in reverse order,
-                *      and set the generation number to -1, so that
-                *      early allocations produce "natural" names.
-                */
+       /* link new free entries onto the rest of the freelist */
+       assert(table[free_index].ie_next == 0 &&
+              table[free_index].ie_object == IO_NULL);
+       table[free_index].ie_next = osize;
 
-               free_index = table[0].ie_next;
-               for (i = size-1; i >= osize; --i) {
-                       ipc_entry_t entry = &table[i];
+       assert(space->is_table == otable);
+       assert((space->is_table_next == its) ||
+           (target_size != ITS_SIZE_NONE));
+       assert(space->is_table_size == osize);
 
-                       if (entry->ie_bits == 0) {
-                               entry->ie_bits = IE_BITS_GEN_MASK;
-                               entry->ie_next = free_index;
-                               free_index = i;
-                       }
-               }
-               table[0].ie_next = free_index;
+       space->is_table = table;
+       space->is_table_size = size;
+       space->is_table_next = nits;
+       space->is_table_free += size - osize;
 
-               /*
-                *      Now we need to free the old table.
-                *      If the space dies or grows while unlocked,
-                *      then we can quit here.
-                */
-               is_write_unlock(space);
-               thread_wakeup((event_t) space);
+       is_done_growing(space);
+       is_write_unlock(space);
 
-               it_entries_free(oits, otable);
-               is_write_lock(space);
-               if (!space->is_active || (space->is_table_next != nits))
-                       return KERN_SUCCESS;
+       thread_wakeup((event_t) space);
 
-               /*
-                *      We might have moved enough entries from
-                *      the splay tree into the table that
-                *      the table can be profitably grown again.
-                *
-                *      Note that if size == nsize, then
-                *      space->is_tree_small == 0.
-                */
-       } while ((space->is_tree_small > 0) &&
-                (((nsize - size) * sizeof(struct ipc_entry)) <
-                 (space->is_tree_small * sizeof(struct ipc_tree_entry))));
+       /*
+        *      Now we need to free the old table.
+        */
+       it_entries_free(oits, otable);
+       is_write_lock(space);
 
        return KERN_SUCCESS;
 }
 
 
-#if    MACH_KDB
-#include <ddb/db_output.h>
-#define        printf  kdbprintf 
-
-ipc_entry_t    db_ipc_object_by_name(
-                       task_t                  task,
-                       mach_port_name_t        name);
-
-
-ipc_entry_t
-db_ipc_object_by_name(
-       task_t          task,
-       mach_port_name_t        name)
+/*
+ *     Routine:        ipc_entry_name_mask
+ *     Purpose:
+ *             Ensure a mach port name has the default ipc entry
+ *             generation bits set. This can be used to ensure that
+ *             a name passed in by user space matches names generated
+ *             by the kernel.
+ *     Conditions:
+ *             None.
+ *     Returns:
+ *             'name' input with default generation bits masked or added
+ *             as appropriate.
+ */
+mach_port_name_t
+ipc_entry_name_mask(mach_port_name_t name)
 {
-        ipc_space_t space = task->itk_space;
-        ipc_entry_t entry;
-        entry = ipc_entry_lookup(space, name);
-        if(entry != IE_NULL) {
-                iprintf("(task 0x%x, name 0x%x) ==> object 0x%x\n",
-                       task, name, entry->ie_object);
-                return (ipc_entry_t) entry->ie_object;
-        }
-        return entry;
+#ifndef NO_PORT_GEN
+       static mach_port_name_t null_name = MACH_PORT_MAKE(0, IE_BITS_GEN_MASK + IE_BITS_GEN_ONE);
+       return name | null_name;
+#else
+       static mach_port_name_t null_name = MACH_PORT_MAKE(0, ~(IE_BITS_GEN_MASK + IE_BITS_GEN_ONE));
+       return name & ~null_name;
+#endif
 }
-#endif /* MACH_KDB */