]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/kern/zalloc.c
xnu-792.6.70.tar.gz
[apple/xnu.git] / osfmk / kern / zalloc.c
index 66e36c0160d98f71c7f4825a167329c8629fc17c..bb68d22362e4d4b5524694dbf5a25e7d8aa015b9 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
 #include <zone_debug.h>
 #include <norma_vm.h>
 #include <mach_kdb.h>
-#include <kern/ast.h>
+
+#include <mach/mach_types.h>
+#include <mach/vm_param.h>
+#include <mach/kern_return.h>
+#include <mach/mach_host_server.h>
+#include <mach/machine/vm_types.h>
+#include <mach_debug/zone_info.h>
+
+#include <kern/kern_types.h>
 #include <kern/assert.h>
+#include <kern/host.h>
 #include <kern/macro_help.h>
 #include <kern/sched.h>
 #include <kern/lock.h>
 #include <kern/misc_protos.h>
 #include <kern/thread_call.h>
 #include <kern/zalloc.h>
-#include <mach/vm_param.h>
+#include <kern/kalloc.h>
+
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
 #include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
 #include <machine/machparam.h>
 
+#if defined(__ppc__)
+/* for fake zone stat routines */
+#include <ppc/savearea.h>
+#include <ppc/mappings.h>
+#endif
 
 #if    MACH_ASSERT
 /* Detect use of zone elt after freeing it by two methods:
 #if defined(__alpha)
 
 #define is_kernel_data_addr(a)                                         \
-               (!(a) || IS_SYS_VA(a) && !((a) & (sizeof(long)-1)))
+  (!(a) || (IS_SYS_VA(a) && !((a) & (sizeof(long)-1))))
 
 #else /* !defined(__alpha) */
 
 #define is_kernel_data_addr(a)                                         \
-               (!(a) || (a) >= VM_MIN_KERNEL_ADDRESS && !((a) & 0x3))
+  (!(a) || ((a) >= VM_MIN_KERNEL_ADDRESS && !((a) & 0x3)))
 
 #endif /* defined(__alpha) */
 
@@ -101,7 +120,7 @@ boolean_t zfree_clear = FALSE;
 #define ADD_TO_ZONE(zone, element)                                     \
 MACRO_BEGIN                                                            \
                if (zfree_clear)                                        \
-               {   int i;                                              \
+               {   unsigned int i;                                             \
                    for (i=1;                                           \
                         i < zone->elem_size/sizeof(vm_offset_t) - 1;   \
                         i++)                                           \
@@ -145,6 +164,8 @@ MACRO_END
 
 #if    ZONE_DEBUG
 #define zone_debug_enabled(z) z->active_zones.next
+#define        ROUNDUP(x,y)            ((((x)+(y)-1)/(y))*(y))
+#define ZONE_DEBUG_OFFSET      ROUNDUP(sizeof(queue_chain_t),16) 
 #endif /* ZONE_DEBUG */
 
 /*
@@ -152,19 +173,11 @@ MACRO_END
  */
 
 struct zone_page_table_entry {
-       struct  zone_page_table_entry   *next;
-       short   in_free_list;
+       struct zone_page_table_entry    *link;
        short   alloc_count;
+       short   collect_count;
 };
 
-extern struct zone_page_table_entry * zone_page_table;
-
-#define lock_zone_page_table() simple_lock(&zone_page_table_lock)
-#define unlock_zone_page_table() simple_unlock(&zone_page_table_lock)
-
-#define        zone_page(addr) \
-    (&(zone_page_table[(atop(((vm_offset_t)addr) - zone_map_min_address))]))
-
 /* Forwards */
 void           zone_page_init(
                                vm_offset_t     addr,
@@ -175,19 +188,12 @@ void              zone_page_alloc(
                                vm_offset_t     addr,
                                vm_size_t       size);
 
-void           zone_add_free_page_list(
-                               struct zone_page_table_entry    **free_list,
-                               vm_offset_t     addr,
-                               vm_size_t       size);
-void           zone_page_dealloc(
+void           zone_page_free_element(
+                               struct zone_page_table_entry    **free_pages,
                                vm_offset_t     addr,
                                vm_size_t       size);
 
-void           zone_page_in_use(
-                               vm_offset_t     addr,
-                               vm_size_t       size);
-
-void           zone_page_free(
+void           zone_page_collect(
                                vm_offset_t     addr,
                                vm_size_t       size);
 
@@ -224,26 +230,26 @@ vm_size_t zdata_size;
 
 #define lock_zone(zone)                                        \
 MACRO_BEGIN                                            \
-       simple_lock(&(zone)->lock);                     \
+       mutex_lock(&(zone)->lock);                      \
 MACRO_END
 
 #define unlock_zone(zone)                              \
 MACRO_BEGIN                                            \
-       simple_unlock(&(zone)->lock);                   \
+       mutex_unlock(&(zone)->lock);                    \
 MACRO_END
 
 #define zone_wakeup(zone) thread_wakeup((event_t)(zone))
 #define zone_sleep(zone)                               \
-       thread_sleep_simple_lock((event_t)(zone),       \
+       thread_sleep_mutex((event_t)(zone),     \
                                &(zone)->lock,          \
                                THREAD_UNINT)
 
 #define lock_zone_init(zone)                           \
 MACRO_BEGIN                                            \
-       simple_lock_init(&zone->lock, ETAP_MISC_ZONE);  \
+       mutex_init(&zone->lock, 0);     \
 MACRO_END
 
-#define lock_try_zone(zone)    simple_lock_try(&zone->lock)
+#define lock_try_zone(zone)    mutex_try(&zone->lock)
 
 kern_return_t          zget_space(
                                vm_offset_t size,
@@ -257,20 +263,19 @@ vm_size_t zalloc_wasted_space;
 /*
  *     Garbage collection map information
  */
-decl_simple_lock_data(,                zone_page_table_lock)
 struct zone_page_table_entry * zone_page_table;
 vm_offset_t                    zone_map_min_address;
 vm_offset_t                    zone_map_max_address;
-integer_t                      zone_pages;
+unsigned int                   zone_pages;
 
 /*
  *     Exclude more than one concurrent garbage collection
  */
 decl_mutex_data(,              zone_gc_lock)
 
-#define from_zone_map(addr) \
+#define from_zone_map(addr, size) \
        ((vm_offset_t)(addr) >= zone_map_min_address && \
-        (vm_offset_t)(addr) <  zone_map_max_address)
+        ((vm_offset_t)(addr) + size -1) <  zone_map_max_address)
 
 #define        ZONE_PAGE_USED  0
 #define ZONE_PAGE_UNUSED -1
@@ -283,7 +288,7 @@ decl_mutex_data(,           zone_gc_lock)
 decl_simple_lock_data(,        all_zones_lock)
 zone_t                 first_zone;
 zone_t                 *last_zone;
-int                    num_zones;
+unsigned int           num_zones;
 
 boolean_t zone_gc_allowed = TRUE;
 boolean_t zone_gc_forced = FALSE;
@@ -301,7 +306,7 @@ zinit(
        vm_size_t       size,           /* the size of an element */
        vm_size_t       max,            /* maximum memory to use */
        vm_size_t       alloc,          /* allocation size */
-       char            *name)          /* a name for the zone */
+       const char      *name)          /* a name for the zone */
 {
        zone_t          z;
 
@@ -326,15 +331,26 @@ zinit(
        alloc = round_page(alloc);
        max   = round_page(max);
        /*
-        * We look for an allocation size with least fragmentation
-        * in the range of 1 - 5 pages.  This size will be used unless
+        * we look for an allocation size with less than 1% waste
+        * up to 5 pages in size...
+        * otherwise, we look for an allocation size with least fragmentation
+        * in the range of 1 - 5 pages
+        * This size will be used unless
         * the user suggestion is larger AND has less fragmentation
         */
        {       vm_size_t best, waste; unsigned int i;
                best  = PAGE_SIZE;
                waste = best % size;
-               for (i = 2; i <= 5; i++){       vm_size_t tsize, twaste;
-                       tsize  = i * PAGE_SIZE;
+
+               for (i = 1; i <= 5; i++) {
+                       vm_size_t tsize, twaste;
+
+                       tsize = i * PAGE_SIZE;
+
+                       if ((tsize % size) < (tsize / 100)) {
+                               alloc = tsize;
+                               goto use_this_allocation;
+                       }
                        twaste = tsize % size;
                        if (twaste < waste)
                                best = tsize, waste = twaste;
@@ -342,6 +358,7 @@ zinit(
                if (alloc <= best || (alloc % size >= waste))
                        alloc = best;
        }
+use_this_allocation:
        if (max && (max < alloc))
                max = alloc;
 
@@ -353,6 +370,7 @@ zinit(
        z->zone_name = name;
        z->count = 0;
        z->doing_alloc = FALSE;
+       z->doing_gc = FALSE;
        z->exhaustible = FALSE;
        z->collectable = TRUE;
        z->allows_foreign = FALSE;
@@ -387,22 +405,23 @@ zinit(
 void
 zcram(
        register zone_t         zone,
-       vm_offset_t             newmem,
+       void                    *newaddr,
        vm_size_t               size)
 {
        register vm_size_t      elem_size;
+       vm_offset_t             newmem = (vm_offset_t) newaddr;
 
        /* Basic sanity checks */
        assert(zone != ZONE_NULL && newmem != (vm_offset_t)0);
        assert(!zone->collectable || zone->allows_foreign
-               || (from_zone_map(newmem) && from_zone_map(newmem+size-1)));
+               || (from_zone_map(newmem, size)));
 
        elem_size = zone->elem_size;
 
        lock_zone(zone);
        while (size >= elem_size) {
                ADD_TO_ZONE(zone, newmem);
-               if (from_zone_map(newmem))
+               if (from_zone_map(newmem, elem_size))
                        zone_page_alloc(newmem, elem_size);
                zone->count++;  /* compensate for ADD_TO_ZONE */
                size -= elem_size;
@@ -423,7 +442,7 @@ zget_space(
        vm_offset_t *result)
 {
        vm_offset_t     new_space = 0;
-       vm_size_t       space_to_add;
+       vm_size_t       space_to_add = 0;
 
        simple_lock(&zget_space_lock);
        while ((zalloc_next_space + size) > zalloc_end_of_space) {
@@ -501,7 +520,7 @@ void
 zone_steal_memory(void)
 {
        zdata_size = round_page(128*sizeof(struct zone));
-       zdata = pmap_steal_memory(zdata_size);
+       zdata = (vm_offset_t)((char *)pmap_steal_memory(zdata_size) - (char *)0);
 }
 
 
@@ -532,7 +551,7 @@ zfill(
                return 0;
 
        zone_change(zone, Z_FOREIGN, TRUE);
-       zcram(zone, memory, size);
+       zcram(zone, (void *)memory, size);
        nalloc = size / zone->elem_size;
        assert(nalloc >= nelem);
 
@@ -550,13 +569,13 @@ zone_bootstrap(void)
        vm_size_t zone_zone_size;
        vm_offset_t zone_zone_space;
 
-       simple_lock_init(&all_zones_lock, ETAP_MISC_ZONE_ALL);
+       simple_lock_init(&all_zones_lock, 0);
 
        first_zone = ZONE_NULL;
        last_zone = &first_zone;
        num_zones = 0;
 
-       simple_lock_init(&zget_space_lock, ETAP_MISC_ZONE_GET);
+       simple_lock_init(&zget_space_lock, 0);
        zalloc_next_space = zdata;
        zalloc_end_of_space = zdata + zdata_size;
        zalloc_wasted_space = 0;
@@ -568,7 +587,7 @@ zone_bootstrap(void)
        zone_change(zone_zone, Z_COLLECT, FALSE);
        zone_zone_size = zalloc_end_of_space - zalloc_next_space;
        zget_space(zone_zone_size, &zone_zone_space);
-       zcram(zone_zone, zone_zone_space, zone_zone_size);
+       zcram(zone_zone, (void *)zone_zone_space, zone_zone_size);
 }
 
 void
@@ -581,24 +600,24 @@ zone_init(
        vm_size_t       zone_table_size;
 
        retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
-                                               FALSE, TRUE, &zone_map);
+                                               FALSE, VM_FLAGS_ANYWHERE, &zone_map);
+
        if (retval != KERN_SUCCESS)
                panic("zone_init: kmem_suballoc failed");
        zone_max = zone_min + round_page(max_zonemap_size);
        /*
         * Setup garbage collection information:
         */
-       zone_table_size = atop(zone_max - zone_min) * 
+       zone_table_size = atop_32(zone_max - zone_min) * 
                                sizeof(struct zone_page_table_entry);
        if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table,
                             zone_table_size) != KERN_SUCCESS)
                panic("zone_init");
        zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size);
-       zone_pages = atop(zone_max - zone_min);
+       zone_pages = atop_32(zone_max - zone_min);
        zone_map_min_address = zone_min;
        zone_map_max_address = zone_max;
-       simple_lock_init(&zone_page_table_lock, ETAP_MISC_ZONE_PTABLE);
-       mutex_init(&zone_gc_lock, ETAP_NO_TRACE);
+       mutex_init(&zone_gc_lock, 0);
        zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED);
 }
 
@@ -606,7 +625,7 @@ zone_init(
 /*
  *     zalloc returns an element from the specified zone.
  */
-vm_offset_t
+void *
 zalloc_canblock(
        register zone_t zone,
        boolean_t canblock)
@@ -615,12 +634,17 @@ zalloc_canblock(
        kern_return_t retval;
 
        assert(zone != ZONE_NULL);
-       check_simple_locks();
 
        lock_zone(zone);
 
        REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
 
+       while ((addr == 0) && canblock && (zone->doing_gc)) {
+               zone->waiting = TRUE;
+               zone_sleep(zone);
+               REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
+       }
+
        while ((addr == 0) && canblock) {
                /*
                 *      If nothing was there, try to get more
@@ -662,24 +686,33 @@ zalloc_canblock(
                        if (zone->collectable) {
                                vm_offset_t space;
                                vm_size_t alloc_size;
-
-                               if (vm_pool_low())
-                                       alloc_size = 
-                                         round_page(zone->elem_size);
-                               else
-                                       alloc_size = zone->alloc_size;
-
-                               retval = kernel_memory_allocate(zone_map,
-                                       &space, alloc_size, 0,
-                                       KMA_KOBJECT|KMA_NOPAGEWAIT);
-                               if (retval == KERN_SUCCESS) {
-                                       zone_page_init(space, alloc_size,
-                                               ZONE_PAGE_USED);
-                                       zcram(zone, space, alloc_size);
-                               } else if (retval != KERN_RESOURCE_SHORTAGE) {
-                                       /* would like to cause a zone_gc() */
-
-                                       panic("zalloc");
+                               boolean_t retry = FALSE;
+
+                               for (;;) {
+
+                                       if (vm_pool_low() || retry == TRUE)
+                                               alloc_size = 
+                                                 round_page(zone->elem_size);
+                                       else
+                                               alloc_size = zone->alloc_size;
+
+                                       retval = kernel_memory_allocate(zone_map,
+                                                                       &space, alloc_size, 0,
+                                                                       KMA_KOBJECT|KMA_NOPAGEWAIT);
+                                       if (retval == KERN_SUCCESS) {
+                                               zone_page_init(space, alloc_size,
+                                                              ZONE_PAGE_USED);
+                                               zcram(zone, (void *)space, alloc_size);
+
+                                               break;
+                                       } else if (retval != KERN_RESOURCE_SHORTAGE) {
+                                               /* would like to cause a zone_gc() */
+                                               if (retry == TRUE)
+                                                       panic("zalloc: \"%s\" (%d elements) retry fail %d", zone->zone_name, zone->count, retval);
+                                               retry = TRUE;
+                                       } else {
+                                               break;
+                                       }
                                }
                                lock_zone(zone);
                                zone->doing_alloc = FALSE; 
@@ -717,9 +750,9 @@ zalloc_canblock(
                                        zone_page_alloc(space, zone->elem_size);
 #if    ZONE_DEBUG
                                        if (zone_debug_enabled(zone))
-                                               space += sizeof(queue_chain_t);
+                                               space += ZONE_DEBUG_OFFSET;
 #endif
-                                       return(space);
+                                       return((void *)space);
                                }
                                if (retval == KERN_RESOURCE_SHORTAGE) {
                                        unlock_zone(zone);
@@ -727,7 +760,7 @@ zalloc_canblock(
                                        VM_PAGE_WAIT();
                                        lock_zone(zone);
                                } else {
-                                       panic("zalloc");
+                                       panic("zalloc: \"%s\" (%d elements) zget_space returned %d", zone->zone_name, zone->count, retval);
                                }
                        }
                }
@@ -746,24 +779,24 @@ zalloc_canblock(
 #if    ZONE_DEBUG
        if (addr && zone_debug_enabled(zone)) {
                enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
-               addr += sizeof(queue_chain_t);
+               addr += ZONE_DEBUG_OFFSET;
        }
 #endif
 
        unlock_zone(zone);
 
-       return(addr);
+       return((void *)addr);
 }
 
 
-vm_offset_t
+void *
 zalloc(
        register zone_t zone)
 {
   return( zalloc_canblock(zone, TRUE) );
 }
 
-vm_offset_t
+void *
 zalloc_noblock(
               register zone_t zone)
 {
@@ -772,10 +805,10 @@ zalloc_noblock(
 
 void
 zalloc_async(
-       thread_call_param_t     p0,
-       thread_call_param_t     p1)
+       thread_call_param_t          p0,
+       __unused thread_call_param_t p1)
 {
-       vm_offset_t     elt;
+       void *elt;
 
        elt = zalloc_canblock((zone_t)p0, TRUE);
        zfree((zone_t)p0, elt);
@@ -792,7 +825,7 @@ zalloc_async(
  *     This form should be used when you can not block (like when
  *     processing an interrupt).
  */
-vm_offset_t
+void *
 zget(
        register zone_t zone)
 {
@@ -801,29 +834,33 @@ zget(
        assert( zone != ZONE_NULL );
 
        if (!lock_try_zone(zone))
-           return ((vm_offset_t)0);
+               return NULL;
 
        REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
 #if    ZONE_DEBUG
        if (addr && zone_debug_enabled(zone)) {
                enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
-               addr += sizeof(queue_chain_t);
+               addr += ZONE_DEBUG_OFFSET;
        }
 #endif /* ZONE_DEBUG */
        unlock_zone(zone);
 
-       return(addr);
+       return((void *) addr);
 }
 
 /* Keep this FALSE by default.  Large memory machine run orders of magnitude
    slower in debug mode when true.  Use debugger to enable if needed */
-boolean_t zone_check = FALSE;
+/* static */ boolean_t zone_check = FALSE;
+
+static zone_t zone_last_bogus_zone = ZONE_NULL;
+static vm_offset_t zone_last_bogus_elem = 0;
 
 void
 zfree(
        register zone_t zone,
-       vm_offset_t     elem)
+       void            *addr)
 {
+       vm_offset_t     elem = (vm_offset_t) addr;
 
 #if MACH_ASSERT
        /* Basic sanity checks */
@@ -832,17 +869,24 @@ zfree(
        /* zone_gc assumes zones are never freed */
        if (zone == zone_zone)
                panic("zfree: freeing to zone_zone breaks zone_gc!");
+#endif
+
        if (zone->collectable && !zone->allows_foreign &&
-           (!from_zone_map(elem) || !from_zone_map(elem+zone->elem_size-1)))
+           !from_zone_map(elem, zone->elem_size)) {
+#if MACH_ASSERT
                panic("zfree: non-allocated memory in collectable zone!");
 #endif
+               zone_last_bogus_zone = zone;
+               zone_last_bogus_elem = elem;
+               return;
+       }
 
        lock_zone(zone);
 #if    ZONE_DEBUG
        if (zone_debug_enabled(zone)) {
                queue_t tmp_elem;
 
-               elem -= sizeof(queue_chain_t);
+               elem -= ZONE_DEBUG_OFFSET;
                if (zone_check) {
                        /* check the zone's consistency */
 
@@ -953,68 +997,34 @@ zprealloc(
                if (kmem_alloc_wired(zone_map, &addr, size) != KERN_SUCCESS)
                  panic("zprealloc");
                zone_page_init(addr, size, ZONE_PAGE_USED);
-               zcram(zone, addr, size);
+               zcram(zone, (void *)addr, size);
        }
 }
 
 /*
  *  Zone garbage collection subroutines
- *
- *  These routines have in common the modification of entries in the
- *  zone_page_table.  The latter contains one entry for every page
- *  in the zone_map.  
- *
- *  For each page table entry in the given range:
- *
- *     zone_page_collectable   - test if one (in_free_list == alloc_count)
- *     zone_page_keep          - reset in_free_list
- *     zone_page_in_use        - decrements in_free_list
- *     zone_page_free          - increments in_free_list
- *     zone_page_init          - initializes in_free_list and alloc_count
- *     zone_page_alloc         - increments alloc_count
- *     zone_page_dealloc       - decrements alloc_count
- *     zone_add_free_page_list - adds the page to the free list
- *   
- *  Two counts are maintained for each page, the in_free_list count and
- *  alloc_count.  The alloc_count is how many zone elements have been
- *  allocated from a page.  (Note that the page could contain elements
- *  that span page boundaries.  The count includes these elements so
- *  one element may be counted in two pages.) In_free_list is a count
- *  of how many zone elements are currently free.  If in_free_list is
- *  equal to alloc_count then the page is eligible for garbage
- *  collection.
- *
- *  Alloc_count and in_free_list are initialized to the correct values
- *  for a particular zone when a page is zcram'ed into a zone.  Subsequent
- *  gets and frees of zone elements will call zone_page_in_use and 
- *  zone_page_free which modify the in_free_list count.  When the zones
- *  garbage collector runs it will walk through a zones free element list,
- *  remove the elements that reside on collectable pages, and use 
- *  zone_add_free_page_list to create a list of pages to be collected.
  */
+
 boolean_t
 zone_page_collectable(
        vm_offset_t     addr,
        vm_size_t       size)
 {
+       struct zone_page_table_entry    *zp;
        natural_t i, j;
 
 #if MACH_ASSERT
-       if (!from_zone_map(addr) || !from_zone_map(addr+size-1))
+       if (!from_zone_map(addr, size))
                panic("zone_page_collectable");
 #endif
 
-       i = atop(addr-zone_map_min_address);
-       j = atop((addr+size-1) - zone_map_min_address);
-       lock_zone_page_table();
-       for (; i <= j; i++) {
-               if (zone_page_table[i].in_free_list ==
-                   zone_page_table[i].alloc_count) {
-                       unlock_zone_page_table();
+       i = atop_32(addr-zone_map_min_address);
+       j = atop_32((addr+size-1) - zone_map_min_address);
+
+       for (zp = zone_page_table + i; i <= j; zp++, i++)
+               if (zp->collect_count == zp->alloc_count)
                        return (TRUE);
-               }
-       }
-       unlock_zone_page_table();
+
        return (FALSE);
 }
 
@@ -1023,64 +1033,39 @@ zone_page_keep(
        vm_offset_t     addr,
        vm_size_t       size)
 {
+       struct zone_page_table_entry    *zp;
        natural_t i, j;
 
 #if MACH_ASSERT
-       if (!from_zone_map(addr) || !from_zone_map(addr+size-1))
+       if (!from_zone_map(addr, size))
                panic("zone_page_keep");
 #endif
 
-       i = atop(addr-zone_map_min_address);
-       j = atop((addr+size-1) - zone_map_min_address);
-       lock_zone_page_table();
-       for (; i <= j; i++) {
-               zone_page_table[i].in_free_list = 0;
-       }
-       unlock_zone_page_table();
-}
+       i = atop_32(addr-zone_map_min_address);
+       j = atop_32((addr+size-1) - zone_map_min_address);
 
-void
-zone_page_in_use(
-       vm_offset_t     addr,
-       vm_size_t       size)
-{
-       natural_t i, j;
-
-#if MACH_ASSERT
-       if (!from_zone_map(addr) || !from_zone_map(addr+size-1))
-               panic("zone_page_in_use");
-#endif
-
-       i = atop(addr-zone_map_min_address);
-       j = atop((addr+size-1) - zone_map_min_address);
-       lock_zone_page_table();
-       for (; i <= j; i++) {
-               if (zone_page_table[i].in_free_list > 0)
-                       zone_page_table[i].in_free_list--;
-       }
-       unlock_zone_page_table();
+       for (zp = zone_page_table + i; i <= j; zp++, i++)
+               zp->collect_count = 0;
 }
 
 void
-zone_page_free(
+zone_page_collect(
        vm_offset_t     addr,
        vm_size_t       size)
 {
+       struct zone_page_table_entry    *zp;
        natural_t i, j;
 
 #if MACH_ASSERT
-       if (!from_zone_map(addr) || !from_zone_map(addr+size-1))
-               panic("zone_page_free");
+       if (!from_zone_map(addr, size))
+               panic("zone_page_collect");
 #endif
 
-       i = atop(addr-zone_map_min_address);
-       j = atop((addr+size-1) - zone_map_min_address);
-       lock_zone_page_table();
-       for (; i <= j; i++) {
-               assert(zone_page_table[i].in_free_list >= 0);
-               zone_page_table[i].in_free_list++;
-       }
-       unlock_zone_page_table();
+       i = atop_32(addr-zone_map_min_address);
+       j = atop_32((addr+size-1) - zone_map_min_address);
+
+       for (zp = zone_page_table + i; i <= j; zp++, i++)
+               ++zp->collect_count;
 }
 
 void
@@ -1089,21 +1074,21 @@ zone_page_init(
        vm_size_t       size,
        int             value)
 {
+       struct zone_page_table_entry    *zp;
        natural_t i, j;
 
 #if MACH_ASSERT
-       if (!from_zone_map(addr) || !from_zone_map(addr+size-1))
+       if (!from_zone_map(addr, size))
                panic("zone_page_init");
 #endif
 
-       i = atop(addr-zone_map_min_address);
-       j = atop((addr+size-1) - zone_map_min_address);
-       lock_zone_page_table();
-       for (; i <= j; i++) {
-               zone_page_table[i].alloc_count = value;
-               zone_page_table[i].in_free_list = 0;
+       i = atop_32(addr-zone_map_min_address);
+       j = atop_32((addr+size-1) - zone_map_min_address);
+
+       for (zp = zone_page_table + i; i <= j; zp++, i++) {
+               zp->alloc_count = value;
+               zp->collect_count = 0;
        }
-       unlock_zone_page_table();
 }
 
 void
@@ -1111,85 +1096,73 @@ zone_page_alloc(
        vm_offset_t     addr,
        vm_size_t       size)
 {
+       struct zone_page_table_entry    *zp;
        natural_t i, j;
 
 #if MACH_ASSERT
-       if (!from_zone_map(addr) || !from_zone_map(addr+size-1))
+       if (!from_zone_map(addr, size))
                panic("zone_page_alloc");
 #endif
 
-       i = atop(addr-zone_map_min_address);
-       j = atop((addr+size-1) - zone_map_min_address);
-       lock_zone_page_table();
-       for (; i <= j; i++) {
-               /* Set alloc_count to (ZONE_PAGE_USED + 1) if
+       i = atop_32(addr-zone_map_min_address);
+       j = atop_32((addr+size-1) - zone_map_min_address);
+
+       for (zp = zone_page_table + i; i <= j; zp++, i++) {
+               /*
+                * Set alloc_count to (ZONE_PAGE_USED + 1) if
                 * it was previously set to ZONE_PAGE_UNUSED.
                 */
-               if (zone_page_table[i].alloc_count == ZONE_PAGE_UNUSED) {
-                       zone_page_table[i].alloc_count = 1;
-               } else {
-                       zone_page_table[i].alloc_count++;
-               }
+               if (zp->alloc_count == ZONE_PAGE_UNUSED)
+                       zp->alloc_count = 1;
+               else
+                       ++zp->alloc_count;
        }
-       unlock_zone_page_table();
 }
 
 void
-zone_page_dealloc(
+zone_page_free_element(
+       struct zone_page_table_entry    **free_pages,
        vm_offset_t     addr,
        vm_size_t       size)
 {
+       struct zone_page_table_entry    *zp;
        natural_t i, j;
 
 #if MACH_ASSERT
-       if (!from_zone_map(addr) || !from_zone_map(addr+size-1))
-               panic("zone_page_dealloc");
+       if (!from_zone_map(addr, size))
+               panic("zone_page_free_element");
 #endif
 
-       i = atop(addr-zone_map_min_address);
-       j = atop((addr+size-1) - zone_map_min_address);
-       lock_zone_page_table();
-       for (; i <= j; i++) {
-               zone_page_table[i].alloc_count--;
-       }
-       unlock_zone_page_table();
-}
+       i = atop_32(addr-zone_map_min_address);
+       j = atop_32((addr+size-1) - zone_map_min_address);
 
-void
-zone_add_free_page_list(
-       struct zone_page_table_entry    **free_list,
-       vm_offset_t     addr,
-       vm_size_t       size)
-{
-       natural_t i, j;
+       for (zp = zone_page_table + i; i <= j; zp++, i++) {
+               if (zp->collect_count > 0)
+                       --zp->collect_count;
+               if (--zp->alloc_count == 0) {
+                       zp->alloc_count  = ZONE_PAGE_UNUSED;
+                       zp->collect_count = 0;
 
-#if MACH_ASSERT
-       if (!from_zone_map(addr) || !from_zone_map(addr+size-1))
-               panic("zone_add_free_page_list");
-#endif
-
-       i = atop(addr-zone_map_min_address);
-       j = atop((addr+size-1) - zone_map_min_address);
-       lock_zone_page_table();
-       for (; i <= j; i++) {
-               if (zone_page_table[i].alloc_count == 0) {
-                       zone_page_table[i].next = *free_list;
-                       *free_list = &zone_page_table[i];
-                       zone_page_table[i].alloc_count  = ZONE_PAGE_UNUSED;
-                       zone_page_table[i].in_free_list = 0;
+                       zp->link = *free_pages;
+                       *free_pages = zp;
                }
        }
-       unlock_zone_page_table();
 }
 
 
 /* This is used for walking through a zone's free element list.
  */
-struct zone_free_entry {
-       struct zone_free_entry * next;
+struct zone_free_element {
+       struct zone_free_element * next;
 };
 
-int reclaim_page_count = 0;
+struct {
+       uint32_t        pgs_freed;
+
+       uint32_t        elems_collected,
+                               elems_freed,
+                               elems_kept;
+} zgc_stats;
 
 /*     Zone garbage collection
  *
@@ -1202,35 +1175,28 @@ void
 zone_gc(void)
 {
        unsigned int    max_zones;
-       zone_t          z;
+       zone_t                  z;
        unsigned int    i;
-       struct zone_page_table_entry    *freep;
-       struct zone_page_table_entry    *zone_free_page_list;
+       struct zone_page_table_entry    *zp, *zone_free_pages;
 
        mutex_lock(&zone_gc_lock);
 
-       /*
-        * Note that this scheme of locking only to walk the zone list
-        * assumes that zones are never freed (checked by zfree)
-        */ 
        simple_lock(&all_zones_lock);
        max_zones = num_zones;
        z = first_zone;
        simple_unlock(&all_zones_lock);
 
 #if MACH_ASSERT
-       lock_zone_page_table();
        for (i = 0; i < zone_pages; i++)
-               assert(zone_page_table[i].in_free_list == 0);
-       unlock_zone_page_table();
+               assert(zone_page_table[i].collect_count == 0);
 #endif /* MACH_ASSERT */
 
-       zone_free_page_list = (struct zone_page_table_entry *) 0;
+       zone_free_pages = NULL;
 
        for (i = 0; i < max_zones; i++, z = z->next_zone) {
-               struct zone_free_entry * prev;
-               struct zone_free_entry * elt;
-               struct zone_free_entry * end;
+               unsigned int                            n, m;
+               vm_size_t                                       elt_size, size_freed;
+               struct zone_free_element        *elt, *base_elt, *base_prev, *prev, *scan, *keep, *tail;
 
                assert(z != ZONE_NULL);
 
@@ -1239,82 +1205,213 @@ zone_gc(void)
 
                lock_zone(z);
 
+               elt_size = z->elem_size;
+
                /*
                 * Do a quick feasability check before we scan the zone: 
-                * skip unless there is likelihood of getting 1+ pages back.
+                * skip unless there is likelihood of getting pages back
+                * (i.e we need a whole allocation block's worth of free
+                * elements before we can garbage collect) and
+                * the zone has more than 10 percent of it's elements free
                 */
-               if ((z->cur_size - z->count * z->elem_size) <= (2*PAGE_SIZE)){
+               if (((z->cur_size - z->count * elt_size) <= (2 * z->alloc_size)) ||
+                   ((z->cur_size - z->count * elt_size) <= (z->cur_size / 10))) {
                        unlock_zone(z);         
                        continue;
                }
 
-               /* Count the free elements in each page.  This loop
-                * requires that all in_free_list entries are zero.
-                *
-                * Exit the loop early if we need to hurry up and drop
-                * the lock to allow preemption - but we must fully process
-                * all elements we looked at so far.
+               z->doing_gc = TRUE;
+
+               /*
+                * Snatch all of the free elements away from the zone.
                 */
-               elt = (struct zone_free_entry *)(z->free_elements);
-               while (!ast_urgency() && (elt != (struct zone_free_entry *)0)) {
-                       if (from_zone_map(elt))
-                               zone_page_free((vm_offset_t)elt, z->elem_size);
-                       elt = elt->next;
-               }
-               end = elt;
 
-               /* Now determine which elements should be removed
-                * from the free list and, after all the elements
-                * on a page have been removed, add the element's
-                * page to a list of pages to be freed.
+               scan = (void *)z->free_elements;
+               (void *)z->free_elements = NULL;
+
+               unlock_zone(z);
+
+               /*
+                * Pass 1:
+                *
+                * Determine which elements we can attempt to collect
+                * and count them up in the page table.  Foreign elements
+                * are returned to the zone.
                 */
-               prev = elt = (struct zone_free_entry *)(z->free_elements);
-               while (elt != end) {
-                       if (!from_zone_map(elt)) {
+
+               prev = (void *)&scan;
+               elt = scan;
+               n = 0; tail = keep = NULL;
+               while (elt != NULL) {
+                       if (from_zone_map(elt, elt_size)) {
+                               zone_page_collect((vm_offset_t)elt, elt_size);
+
                                prev = elt;
                                elt = elt->next;
-                               continue;
+
+                               ++zgc_stats.elems_collected;
                        }
-                       if (zone_page_collectable((vm_offset_t)elt,
-                                                 z->elem_size)) {
-                               z->cur_size -= z->elem_size;
-                               zone_page_in_use((vm_offset_t)elt,
-                                                z->elem_size);
-                               zone_page_dealloc((vm_offset_t)elt,
-                                                 z->elem_size);
-                               zone_add_free_page_list(&zone_free_page_list, 
-                                                       (vm_offset_t)elt,
-                                                       z->elem_size);
-                               if (elt == prev) {
-                                       elt = elt->next;
-                                       z->free_elements =(vm_offset_t)elt;
-                                       prev = elt;
-                               } else {
-                                       prev->next = elt->next;
-                                       elt = elt->next;
+                       else {
+                               if (keep == NULL)
+                                       keep = tail = elt;
+                               else
+                                       tail = tail->next = elt;
+
+                               elt = prev->next = elt->next;
+                               tail->next = NULL;
+                       }
+
+                       /*
+                        * Dribble back the elements we are keeping.
+                        */
+
+                       if (++n >= 50) {
+                               if (z->waiting == TRUE) {
+                                       lock_zone(z);
+
+                                       if (keep != NULL) {
+                                               tail->next = (void *)z->free_elements;
+                                               (void *)z->free_elements = keep;
+                                               tail = keep = NULL;
+                                       } else {
+                                               m =0;
+                                               base_elt = elt;
+                                               base_prev = prev;
+                                               while ((elt != NULL) && (++m < 50)) { 
+                                                       prev = elt;
+                                                       elt = elt->next;
+                                               }
+                                               if (m !=0 ) {
+                                                       prev->next = (void *)z->free_elements;
+                                                       (void *)z->free_elements = (void *)base_elt;
+                                                       base_prev->next = elt;
+                                                       prev = base_prev;
+                                               }
+                                       }
+
+                                       if (z->waiting) {
+                                               z->waiting = FALSE;
+                                               zone_wakeup(z);
+                                       }
+
+                                       unlock_zone(z);
                                }
-                       } else {
-                               /* This element is not eligible for collection
-                                * so clear in_free_list in preparation for a
-                                * subsequent garbage collection pass.
-                                */
-                               zone_page_keep((vm_offset_t)elt, z->elem_size);
-                               prev = elt;
-                               elt = elt->next;
+                               n =0;
+                       }
+               }
+
+               /*
+                * Return any remaining elements.
+                */
+
+               if (keep != NULL) {
+                       lock_zone(z);
+
+                       tail->next = (void *)z->free_elements;
+                       (void *)z->free_elements = keep;
+
+                       unlock_zone(z);
+               }
+
+               /*
+                * Pass 2:
+                *
+                * Determine which pages we can reclaim and
+                * free those elements.
+                */
+
+               size_freed = 0;
+               prev = (void *)&scan;
+               elt = scan;
+               n = 0; tail = keep = NULL;
+               while (elt != NULL) {
+                       if (zone_page_collectable((vm_offset_t)elt, elt_size)) {
+                               size_freed += elt_size;
+                               zone_page_free_element(&zone_free_pages,
+                                                                               (vm_offset_t)elt, elt_size);
+
+                               elt = prev->next = elt->next;
+
+                               ++zgc_stats.elems_freed;
+                       }
+                       else {
+                               zone_page_keep((vm_offset_t)elt, elt_size);
+
+                               if (keep == NULL)
+                                       keep = tail = elt;
+                               else
+                                       tail = tail->next = elt;
+
+                               elt = prev->next = elt->next;
+                               tail->next = NULL;
+
+                               ++zgc_stats.elems_kept;
+                       }
+
+                       /*
+                        * Dribble back the elements we are keeping,
+                        * and update the zone size info.
+                        */
+
+                       if (++n >= 50) {
+                               lock_zone(z);
+
+                               z->cur_size -= size_freed;
+                               size_freed = 0;
+
+                               if (keep != NULL) {
+                                       tail->next = (void *)z->free_elements;
+                                       (void *)z->free_elements = keep;
+                               }
+
+                               if (z->waiting) {
+                                       z->waiting = FALSE;
+                                       zone_wakeup(z);
+                               }
+
+                               unlock_zone(z);
+
+                               n = 0; tail = keep = NULL;
+                       }
+               }
+
+               /*
+                * Return any remaining elements, and update
+                * the zone size info.
+                */
+
+               lock_zone(z);
+
+               if (size_freed > 0 || keep != NULL) {
+
+                       z->cur_size -= size_freed;
+
+                       if (keep != NULL) {
+                               tail->next = (void *)z->free_elements;
+                               (void *)z->free_elements = keep;
                        }
-               } /* end while(elt != end) */
 
+               }
+
+               z->doing_gc = FALSE;
+               if (z->waiting) {
+                       z->waiting = FALSE;
+                       zone_wakeup(z);
+               }
                unlock_zone(z);
        }
 
-       for (freep = zone_free_page_list; freep != 0; freep = freep->next) {
-               vm_offset_t     free_addr;
+       /*
+        * Reclaim the pages we are freeing.
+        */
 
-               free_addr = zone_map_min_address + 
-                           PAGE_SIZE * (freep - zone_page_table);
-               kmem_free(zone_map, free_addr, PAGE_SIZE);
-               reclaim_page_count++;
+       while ((zp = zone_free_pages) != NULL) {
+               zone_free_pages = zp->link;
+               kmem_free(zone_map, zone_map_min_address + PAGE_SIZE *
+                                                                               (zp - zone_page_table), PAGE_SIZE);
+               ++zgc_stats.pgs_freed;
        }
+
        mutex_unlock(&zone_gc_lock);
 }
 
@@ -1329,11 +1426,11 @@ consider_zone_gc(void)
 {
        /*
         *      By default, don't attempt zone GC more frequently
-        *      than once a second.
+        *      than once / 1 minutes.
         */
 
        if (zone_gc_max_rate == 0)
-               zone_gc_max_rate = (1 << SCHED_TICK_SHIFT) + 1;
+               zone_gc_max_rate = (60 << SCHED_TICK_SHIFT) + 1;
 
        if (zone_gc_allowed &&
            ((sched_tick > (zone_gc_last_tick + zone_gc_max_rate)) ||
@@ -1344,14 +1441,6 @@ consider_zone_gc(void)
        }
 }
 
-#include <mach/kern_return.h>
-#include <mach/machine/vm_types.h>
-#include <mach_debug/zone_info.h>
-#include <kern/host.h>
-#include <vm/vm_map.h>
-#include <vm/vm_kern.h>
-
-#include <mach/mach_host_server.h>
 
 kern_return_t
 host_zone_info(
@@ -1392,7 +1481,7 @@ host_zone_info(
 
        if (max_zones <= *namesCntp) {
                /* use in-line memory */
-
+               names_size = *namesCntp * sizeof *names;
                names = *namesp;
        } else {
                names_size = round_page(max_zones * sizeof *names);
@@ -1405,7 +1494,7 @@ host_zone_info(
 
        if (max_zones <= *infoCntp) {
                /* use in-line memory */
-
+               info_size = *infoCntp * sizeof *info;
                info = *infop;
        } else {
                info_size = round_page(max_zones * sizeof *info);
@@ -1482,8 +1571,8 @@ host_zone_info(
                if (used != names_size)
                        bzero((char *) (names_addr + used), names_size - used);
 
-               kr = vm_map_copyin(ipc_kernel_map, names_addr, names_size,
-                                  TRUE, &copy);
+               kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
+                                  (vm_map_size_t)names_size, TRUE, &copy);
                assert(kr == KERN_SUCCESS);
 
                *namesp = (zone_name_t *) copy;
@@ -1499,8 +1588,8 @@ host_zone_info(
                if (used != info_size)
                        bzero((char *) (info_addr + used), info_size - used);
 
-               kr = vm_map_copyin(ipc_kernel_map, info_addr, info_size,
-                                  TRUE, &copy);
+               kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
+                                  (vm_map_size_t)info_size, TRUE, &copy);
                assert(kr == KERN_SUCCESS);
 
                *infop = (zone_info_t *) copy;
@@ -1554,12 +1643,12 @@ db_print_zone(
 /*ARGSUSED*/
 void
 db_show_one_zone(
-        db_expr_t       addr,
-        int            have_addr,
-        db_expr_t      count,
-        char *          modif)
+        db_expr_t              addr,
+        int                    have_addr,
+        __unused db_expr_t     count,
+        __unused char *        modif)
 {
-       struct zone *z = (zone_t)addr;
+       struct zone *z = (zone_t)((char *)0 + addr);
 
        if (z == ZONE_NULL || !have_addr){
                db_error("No Zone\n");
@@ -1573,10 +1662,10 @@ db_show_one_zone(
 /*ARGSUSED*/
 void
 db_show_all_zones(
-        db_expr_t      addr,
-        int            have_addr,
-        db_expr_t      count,
-        char *         modif)
+        __unused db_expr_t     addr,
+        int                    have_addr,
+        db_expr_t              count,
+        __unused char *        modif)
 {
        zone_t          z;
        unsigned total = 0;
@@ -1608,8 +1697,7 @@ db_show_all_zones(
                }
        }
        db_printf("\nTotal              %8x", total);
-       db_printf("\n\nzone_gc() has reclaimed %d pages\n",
-                 reclaim_page_count);
+       db_printf("\n\nzone_gc() has reclaimed %d pages\n", zgc_stats.pgs_freed);
 }
 
 #if    ZONE_DEBUG
@@ -1724,33 +1812,35 @@ db_zone_print_free(
 /* should we care about locks here ? */
 
 #if    MACH_KDB
-vm_offset_t
+void *
 next_element(
        zone_t          z,
-       vm_offset_t     elt)
+       void            *prev)
 {
+       char            *elt = (char *)prev;
+
        if (!zone_debug_enabled(z))
                return(0);
-       elt -= sizeof(queue_chain_t);
-       elt = (vm_offset_t) queue_next((queue_t) elt);
+       elt -= ZONE_DEBUG_OFFSET;
+       elt = (char *) queue_next((queue_t) elt);
        if ((queue_t) elt == &z->active_zones)
                return(0);
-       elt += sizeof(queue_chain_t);
+       elt += ZONE_DEBUG_OFFSET;
        return(elt);
 }
 
-vm_offset_t
+void *
 first_element(
        zone_t          z)
 {
-       vm_offset_t     elt;
+       char            *elt;
 
        if (!zone_debug_enabled(z))
                return(0);
        if (queue_empty(&z->active_zones))
                return(0);
-       elt = (vm_offset_t) queue_first(&z->active_zones);
-       elt += sizeof(queue_chain_t);
+       elt = (char *)queue_first(&z->active_zones);
+       elt += ZONE_DEBUG_OFFSET;
        return(elt);
 }
 
@@ -1765,7 +1855,7 @@ zone_count(
        zone_t          z,
        int             tail)
 {
-       vm_offset_t     elt;
+       void            *elt;
        int             count = 0;
        boolean_t       print = (tail != 0);
 
@@ -1791,10 +1881,10 @@ zone_debug_enable(
        zone_t          z)
 {
        if (zone_debug_enabled(z) || zone_in_use(z) ||
-           z->alloc_size < (z->elem_size + sizeof(queue_chain_t)))
+           z->alloc_size < (z->elem_size + ZONE_DEBUG_OFFSET))
                return;
        queue_init(&z->active_zones);
-       z->elem_size += sizeof(queue_chain_t);
+       z->elem_size += ZONE_DEBUG_OFFSET;
 }
 
 void
@@ -1803,7 +1893,7 @@ zone_debug_disable(
 {
        if (!zone_debug_enabled(z) || zone_in_use(z))
                return;
-       z->elem_size -= sizeof(queue_chain_t);
+       z->elem_size -= ZONE_DEBUG_OFFSET;
        z->active_zones.next = z->active_zones.prev = 0;        
 }
 #endif /* ZONE_DEBUG */