]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/kern/zalloc.c
xnu-3248.30.4.tar.gz
[apple/xnu.git] / osfmk / kern / zalloc.c
index 7b4efb64565e7bae348aeb448efed05376dd65ec..351268d3454930e451cace047a95db6c92629554 100644 (file)
@@ -375,6 +375,13 @@ uint64_t zone_map_table_page_count = 0;
 vm_offset_t     zone_map_min_address = 0;  /* initialized in zone_init */
 vm_offset_t     zone_map_max_address = 0;
 
 vm_offset_t     zone_map_min_address = 0;  /* initialized in zone_init */
 vm_offset_t     zone_map_max_address = 0;
 
+/* Globals for random boolean generator for elements in free list */
+#define MAX_ENTROPY_PER_ZCRAM          4
+#define RANDOM_BOOL_GEN_SEED_COUNT      4
+static unsigned int bool_gen_seed[RANDOM_BOOL_GEN_SEED_COUNT];
+static unsigned int bool_gen_global = 0;
+decl_simple_lock_data(, bool_gen_lock)
+
 /* Helpful for walking through a zone's free element list. */
 struct zone_free_element {
        struct zone_free_element *next;
 /* Helpful for walking through a zone's free element list. */
 struct zone_free_element {
        struct zone_free_element *next;
@@ -401,7 +408,7 @@ get_backup_ptr(vm_size_t  elem_size,
 static inline struct zone_page_metadata *
 get_zone_page_metadata(struct zone_free_element *element)
 {
 static inline struct zone_page_metadata *
 get_zone_page_metadata(struct zone_free_element *element)
 {
-       return (struct zone_page_metadata *)(trunc_page((vm_offset_t)element) + PAGE_SIZE - sizeof(struct zone_page_metadata));
+       return (struct zone_page_metadata *)(trunc_page((vm_offset_t)element));
 }
 
 /*
 }
 
 /*
@@ -553,7 +560,6 @@ backup_ptr_mismatch_panic(zone_t        zone,
        zone_element_was_modified_panic(zone, element, primary, likely_backup, 0);
 }
 
        zone_element_was_modified_panic(zone, element, primary, likely_backup, 0);
 }
 
-
 /*
  * Sets the next element of tail to elem.
  * elem can be NULL.
 /*
  * Sets the next element of tail to elem.
  * elem can be NULL.
@@ -815,21 +821,6 @@ struct fake_zone_info {
 };
 
 static const struct fake_zone_info fake_zones[] = {
 };
 
 static const struct fake_zone_info fake_zones[] = {
-       {
-               .name = "kernel_stacks",
-               .init = stack_fake_zone_init,
-               .query = stack_fake_zone_info,
-       },
-       {
-               .name = "page_tables",
-               .init = pt_fake_zone_init,
-               .query = pt_fake_zone_info,
-       },
-       {
-               .name = "kalloc.large",
-               .init = kalloc_fake_zone_init,
-               .query = kalloc_fake_zone_info,
-       },
 };
 static const unsigned int num_fake_zones =
        sizeof (fake_zones) / sizeof (fake_zones[0]);
 };
 static const unsigned int num_fake_zones =
        sizeof (fake_zones) / sizeof (fake_zones[0]);
@@ -936,6 +927,10 @@ zone_t             zinfo_zone = ZONE_NULL; /* zone of per-task zone info */
 
 vm_offset_t    zdata;
 vm_size_t      zdata_size;
 
 vm_offset_t    zdata;
 vm_size_t      zdata_size;
+/*
+ * Align elements that use the zone page list to 32 byte boundaries.
+ */
+#define ZONE_ELEMENT_ALIGNMENT 32
 
 #define zone_wakeup(zone) thread_wakeup((event_t)(zone))
 #define zone_sleep(zone)                               \
 
 #define zone_wakeup(zone) thread_wakeup((event_t)(zone))
 #define zone_sleep(zone)                               \
@@ -999,6 +994,9 @@ boolean_t zone_gc_forced = FALSE;
 boolean_t panic_include_zprint = FALSE;
 boolean_t zone_gc_allowed_by_time_throttle = TRUE;
 
 boolean_t panic_include_zprint = FALSE;
 boolean_t zone_gc_allowed_by_time_throttle = TRUE;
 
+vm_offset_t panic_kext_memory_info = 0;
+vm_size_t panic_kext_memory_size = 0;
+
 #define ZALLOC_DEBUG_ZONEGC            0x00000001
 #define ZALLOC_DEBUG_ZCRAM             0x00000002
 uint32_t zalloc_debug = 0;
 #define ZALLOC_DEBUG_ZONEGC            0x00000001
 #define ZALLOC_DEBUG_ZCRAM             0x00000002
 uint32_t zalloc_debug = 0;
@@ -1311,12 +1309,12 @@ zleak_activate(void)
        lck_spin_unlock(&zleak_lock);
 
        /* Allocate and zero tables */
        lck_spin_unlock(&zleak_lock);
 
        /* Allocate and zero tables */
-       retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&allocations_ptr, z_alloc_size);
+       retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&allocations_ptr, z_alloc_size, VM_KERN_MEMORY_OSFMK);
        if (retval != KERN_SUCCESS) {
                goto fail;
        }
 
        if (retval != KERN_SUCCESS) {
                goto fail;
        }
 
-       retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&traces_ptr, z_trace_size);
+       retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&traces_ptr, z_trace_size, VM_KERN_MEMORY_OSFMK);
        if (retval != KERN_SUCCESS) {
                goto fail;
        }
        if (retval != KERN_SUCCESS) {
                goto fail;
        }
@@ -1738,10 +1736,19 @@ use_this_allocation:
         * to its page_metadata, and if the wastage in the tail of
         * the allocation is not too large
         */
         * to its page_metadata, and if the wastage in the tail of
         * the allocation is not too large
         */
-       if (alloc == PAGE_SIZE) {
-               if ((PAGE_SIZE % size) >= sizeof(struct zone_page_metadata)) {
-                       use_page_list = TRUE;
-               } else if ((PAGE_SIZE - sizeof(struct zone_page_metadata)) % size <= PAGE_SIZE / 100) {
+
+       /* zone_zone can't use page metadata since the page metadata will overwrite zone metadata */
+       if (alloc == PAGE_SIZE && zone_zone != ZONE_NULL) {
+               vm_offset_t first_element_offset;
+               size_t zone_page_metadata_size = sizeof(struct zone_page_metadata);
+
+               if (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT == 0) {
+                       first_element_offset = zone_page_metadata_size;
+               } else {
+                       first_element_offset = zone_page_metadata_size + (ZONE_ELEMENT_ALIGNMENT - (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT));
+               }
+
+               if (((PAGE_SIZE - first_element_offset) % size) <= PAGE_SIZE / 100) {
                        use_page_list = TRUE;
                }
        }
                        use_page_list = TRUE;
                }
        }
@@ -1760,7 +1767,8 @@ use_this_allocation:
        z->count = 0;
        z->countfree = 0;
        z->sum_count = 0LL;
        z->count = 0;
        z->countfree = 0;
        z->sum_count = 0LL;
-       z->doing_alloc = FALSE;
+       z->doing_alloc_without_vm_priv = FALSE;
+       z->doing_alloc_with_vm_priv = FALSE;
        z->doing_gc = FALSE;
        z->exhaustible = FALSE;
        z->collectable = TRUE;
        z->doing_gc = FALSE;
        z->exhaustible = FALSE;
        z->collectable = TRUE;
@@ -1851,7 +1859,8 @@ static void zone_replenish_thread(zone_t z) {
                lock_zone(z);
                assert(z->prio_refill_watermark != 0);
                while ((free_size = (z->cur_size - (z->count * z->elem_size))) < (z->prio_refill_watermark * z->elem_size)) {
                lock_zone(z);
                assert(z->prio_refill_watermark != 0);
                while ((free_size = (z->cur_size - (z->count * z->elem_size))) < (z->prio_refill_watermark * z->elem_size)) {
-                       assert(z->doing_alloc == FALSE);
+                       assert(z->doing_alloc_without_vm_priv == FALSE);
+                       assert(z->doing_alloc_with_vm_priv == FALSE);
                        assert(z->async_prio_refill == TRUE);
 
                        unlock_zone(z);
                        assert(z->async_prio_refill == TRUE);
 
                        unlock_zone(z);
@@ -1867,19 +1876,18 @@ static void zone_replenish_thread(zone_t z) {
                        if (z->noencrypt)
                                zflags |= KMA_NOENCRYPT;
                                
                        if (z->noencrypt)
                                zflags |= KMA_NOENCRYPT;
                                
-                       kr = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags);
+                       kr = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags, VM_KERN_MEMORY_ZONE);
 
                        if (kr == KERN_SUCCESS) {
 #if    ZONE_ALIAS_ADDR
                                if (alloc_size == PAGE_SIZE)
                                        space = zone_alias_addr(space);
 #endif
 
                        if (kr == KERN_SUCCESS) {
 #if    ZONE_ALIAS_ADDR
                                if (alloc_size == PAGE_SIZE)
                                        space = zone_alias_addr(space);
 #endif
-                               ZONE_PAGE_COUNT_INCR(z, (alloc_size / PAGE_SIZE));      
                                zcram(z, space, alloc_size);
                        } else if (kr == KERN_RESOURCE_SHORTAGE) {
                                VM_PAGE_WAIT();
                        } else if (kr == KERN_NO_SPACE) {
                                zcram(z, space, alloc_size);
                        } else if (kr == KERN_RESOURCE_SHORTAGE) {
                                VM_PAGE_WAIT();
                        } else if (kr == KERN_NO_SPACE) {
-                               kr = kernel_memory_allocate(kernel_map, &space, alloc_size, 0, zflags);
+                               kr = kernel_memory_allocate(kernel_map, &space, alloc_size, 0, zflags, VM_KERN_MEMORY_ZONE);
                                if (kr == KERN_SUCCESS) {
 #if    ZONE_ALIAS_ADDR
                                        if (alloc_size == PAGE_SIZE)
                                if (kr == KERN_SUCCESS) {
 #if    ZONE_ALIAS_ADDR
                                        if (alloc_size == PAGE_SIZE)
@@ -1924,7 +1932,85 @@ zone_prio_refill_configure(zone_t z, vm_size_t low_water_mark) {
 }
 
 /*
 }
 
 /*
- *     Cram the given memory into the specified zone.
+ * Boolean Random Number Generator for generating booleans to randomize 
+ * the order of elements in newly zcram()'ed memory. The algorithm is a 
+ * modified version of the KISS RNG proposed in the paper:
+ * http://stat.fsu.edu/techreports/M802.pdf
+ * The modifications have been documented in the technical paper 
+ * paper from UCL:
+ * http://www0.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf 
+ */
+
+static void random_bool_gen_entropy(
+               int     *buffer,
+               int     count)
+{
+
+       int i, t;
+       simple_lock(&bool_gen_lock);
+       for (i = 0; i < count; i++) {
+               bool_gen_seed[1] ^= (bool_gen_seed[1] << 5);
+               bool_gen_seed[1] ^= (bool_gen_seed[1] >> 7);
+               bool_gen_seed[1] ^= (bool_gen_seed[1] << 22);
+               t = bool_gen_seed[2] + bool_gen_seed[3] + bool_gen_global;
+               bool_gen_seed[2] = bool_gen_seed[3];
+               bool_gen_global = t < 0;
+               bool_gen_seed[3] = t &2147483647;
+               bool_gen_seed[0] += 1411392427;
+               buffer[i] = (bool_gen_seed[0] + bool_gen_seed[1] + bool_gen_seed[3]);
+       }
+       simple_unlock(&bool_gen_lock);
+}
+
+static boolean_t random_bool_gen(
+               int     *buffer,
+               int     index,
+               int     bufsize)
+{
+       int valindex, bitpos;
+       valindex = (index / (8 * sizeof(int))) % bufsize;
+       bitpos = index % (8 * sizeof(int));
+       return (boolean_t)(buffer[valindex] & (1 << bitpos));
+} 
+
+static void 
+random_free_to_zone(
+                       zone_t          zone,
+                       vm_offset_t     newmem,
+                       vm_offset_t     first_element_offset,
+                       int             element_count,
+                       boolean_t       from_zm,
+                       int             *entropy_buffer)
+{
+       vm_offset_t     last_element_offset;
+       vm_offset_t     element_addr;
+       vm_size_t       elem_size;
+       int             index;  
+
+       elem_size = zone->elem_size;
+       last_element_offset = first_element_offset + ((element_count * elem_size) - elem_size);
+       for (index = 0; index < element_count; index++) {
+               assert(first_element_offset <= last_element_offset);
+               if (random_bool_gen(entropy_buffer, index, MAX_ENTROPY_PER_ZCRAM)) {
+                       element_addr = newmem + first_element_offset;
+                       first_element_offset += elem_size;
+               } else {
+                       element_addr = newmem + last_element_offset;
+                       last_element_offset -= elem_size;
+               }
+               if (element_addr != (vm_offset_t)zone) {
+                       zone->count++;  /* compensate for free_to_zone */
+                       free_to_zone(zone, element_addr, FALSE);
+               }
+               if (!zone->use_page_list && from_zm) {
+                       zone_page_alloc(element_addr, elem_size);
+               }
+               zone->cur_size += elem_size;
+       }
+}
+
+/*
+ *     Cram the given memory into the specified zone. Update the zone page count accordingly.
  */
 void
 zcram(
  */
 void
 zcram(
@@ -1934,6 +2020,9 @@ zcram(
 {
        vm_size_t       elem_size;
        boolean_t   from_zm = FALSE;
 {
        vm_size_t       elem_size;
        boolean_t   from_zm = FALSE;
+       vm_offset_t first_element_offset;
+       int element_count;
+       int entropy_buffer[MAX_ENTROPY_PER_ZCRAM];
 
        /* Basic sanity checks */
        assert(zone != ZONE_NULL && newmem != (vm_offset_t)0);
 
        /* Basic sanity checks */
        assert(zone != ZONE_NULL && newmem != (vm_offset_t)0);
@@ -1942,6 +2031,8 @@ zcram(
 
        elem_size = zone->elem_size;
 
 
        elem_size = zone->elem_size;
 
+       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) | DBG_FUNC_START, VM_KERNEL_ADDRPERM(zone), size, 0, 0, 0);
+
        if (from_zone_map(newmem, size))
                from_zm = TRUE;
 
        if (from_zone_map(newmem, size))
                from_zm = TRUE;
 
@@ -1952,17 +2043,21 @@ zcram(
        if (from_zm && !zone->use_page_list)
                zone_page_init(newmem, size);
 
        if (from_zm && !zone->use_page_list)
                zone_page_init(newmem, size);
 
+       ZONE_PAGE_COUNT_INCR(zone, (size / PAGE_SIZE));
+
+       random_bool_gen_entropy(entropy_buffer, MAX_ENTROPY_PER_ZCRAM);
+       
        lock_zone(zone);
 
        if (zone->use_page_list) {
                struct zone_page_metadata *page_metadata;
        lock_zone(zone);
 
        if (zone->use_page_list) {
                struct zone_page_metadata *page_metadata;
+               size_t zone_page_metadata_size = sizeof(struct zone_page_metadata);
 
                assert((newmem & PAGE_MASK) == 0);
                assert((size & PAGE_MASK) == 0);
                for (; size > 0; newmem += PAGE_SIZE, size -= PAGE_SIZE) {
 
 
                assert((newmem & PAGE_MASK) == 0);
                assert((size & PAGE_MASK) == 0);
                for (; size > 0; newmem += PAGE_SIZE, size -= PAGE_SIZE) {
 
-                       vm_size_t pos_in_page;
-                       page_metadata = (struct zone_page_metadata *)(newmem + PAGE_SIZE - sizeof(struct zone_page_metadata));
+                       page_metadata = (struct zone_page_metadata *)(newmem);
                        
                        page_metadata->pages.next = NULL;
                        page_metadata->pages.prev = NULL;
                        
                        page_metadata->pages.next = NULL;
                        page_metadata->pages.prev = NULL;
@@ -1973,37 +2068,24 @@ zcram(
 
                        enqueue_tail(&zone->pages.all_used, (queue_entry_t)page_metadata);
 
 
                        enqueue_tail(&zone->pages.all_used, (queue_entry_t)page_metadata);
 
-                       for (pos_in_page = 0; (newmem + pos_in_page + elem_size) < (vm_offset_t)page_metadata; pos_in_page += elem_size) {
-                               page_metadata->alloc_count++;
-                               zone->count++;  /* compensate for free_to_zone */
-                               if ((newmem + pos_in_page) == (vm_offset_t)zone) {
-                                       /*
-                                        * special case for the "zone_zone" zone, which is using the first
-                                        * allocation of its pmap_steal_memory()-ed allocation for
-                                        * the "zone_zone" variable already.
-                                        */
-                               } else {
-                                       free_to_zone(zone, newmem + pos_in_page, FALSE);
-                               }
-                               zone->cur_size += elem_size;
-                       }
-               }
-       } else {
-               while (size >= elem_size) {
-                       zone->count++;  /* compensate for free_to_zone */
-                       if (newmem == (vm_offset_t)zone) {
-                               /* Don't free zone_zone zone */
+                       if (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT == 0){
+                               first_element_offset = zone_page_metadata_size;
                        } else {
                        } else {
-                               free_to_zone(zone, newmem, FALSE);
+                               first_element_offset = zone_page_metadata_size + (ZONE_ELEMENT_ALIGNMENT - (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT));
                        }
                        }
-                       if (from_zm)
-                               zone_page_alloc(newmem, elem_size);
-                       size -= elem_size;
-                       newmem += elem_size;
-                       zone->cur_size += elem_size;
+                       element_count = (int)((PAGE_SIZE - first_element_offset) / elem_size);
+                       page_metadata->alloc_count += element_count;
+                       random_free_to_zone(zone, newmem, first_element_offset, element_count, from_zm, entropy_buffer);                        
                }
                }
+       } else {        
+               first_element_offset = 0;
+               element_count = (int)((size - first_element_offset) / elem_size);               
+               random_free_to_zone(zone, newmem, first_element_offset, element_count, from_zm, entropy_buffer);
        }
        unlock_zone(zone);
        }
        unlock_zone(zone);
+       
+       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) | DBG_FUNC_END, VM_KERNEL_ADDRPERM(zone), 0, 0, 0, 0);
+
 }
 
 
 }
 
 
@@ -2046,12 +2128,11 @@ zfill(
                return 0;
        size = nelem * zone->elem_size;
        size = round_page(size);
                return 0;
        size = nelem * zone->elem_size;
        size = round_page(size);
-       kr = kmem_alloc_kobject(kernel_map, &memory, size);
+       kr = kmem_alloc_kobject(kernel_map, &memory, size, VM_KERN_MEMORY_ZONE);
        if (kr != KERN_SUCCESS)
                return 0;
 
        zone_change(zone, Z_FOREIGN, TRUE);
        if (kr != KERN_SUCCESS)
                return 0;
 
        zone_change(zone, Z_FOREIGN, TRUE);
-       ZONE_PAGE_COUNT_INCR(zone, (size / PAGE_SIZE));
        zcram(zone, memory, size);
        nalloc = (int)(size / zone->elem_size);
        assert(nalloc >= nelem);
        zcram(zone, memory, size);
        nalloc = (int)(size / zone->elem_size);
        assert(nalloc >= nelem);
@@ -2068,6 +2149,7 @@ void
 zone_bootstrap(void)
 {
        char temp_buf[16];
 zone_bootstrap(void)
 {
        char temp_buf[16];
+       unsigned int i;
 
        if (PE_parse_boot_argn("-zinfop", temp_buf, sizeof(temp_buf))) {
                zinfo_per_task = TRUE;
 
        if (PE_parse_boot_argn("-zinfop", temp_buf, sizeof(temp_buf))) {
                zinfo_per_task = TRUE;
@@ -2079,6 +2161,12 @@ zone_bootstrap(void)
        /* Set up zone element poisoning */
        zp_init();
 
        /* Set up zone element poisoning */
        zp_init();
 
+       /* Seed the random boolean generator for elements in zone free list */
+       for (i = 0; i < RANDOM_BOOL_GEN_SEED_COUNT; i++) {
+               bool_gen_seed[i] = (unsigned int)early_random();
+       }
+       simple_lock_init(&bool_gen_lock, 0);
+
        /* should zlog log to debug zone corruption instead of leaks? */
        if (PE_parse_boot_argn("-zc", temp_buf, sizeof(temp_buf))) {
                corruption_debug_flag = TRUE;
        /* should zlog log to debug zone corruption instead of leaks? */
        if (PE_parse_boot_argn("-zc", temp_buf, sizeof(temp_buf))) {
                corruption_debug_flag = TRUE;
@@ -2132,11 +2220,11 @@ zone_bootstrap(void)
        zone_change(zone_zone, Z_NOENCRYPT, TRUE);
 
        zcram(zone_zone, zdata, zdata_size);
        zone_change(zone_zone, Z_NOENCRYPT, TRUE);
 
        zcram(zone_zone, zdata, zdata_size);
+       VM_PAGE_MOVE_STOLEN(atop_64(zdata_size));
 
        /* initialize fake zones and zone info if tracking by task */
        if (zinfo_per_task) {
                vm_size_t zisize = sizeof(zinfo_usage_store_t) * ZINFO_SLOTS;
 
        /* initialize fake zones and zone info if tracking by task */
        if (zinfo_per_task) {
                vm_size_t zisize = sizeof(zinfo_usage_store_t) * ZINFO_SLOTS;
-               unsigned int i;
 
                for (i = 0; i < num_fake_zones; i++)
                        fake_zones[i].init(ZINFO_SLOTS - num_fake_zones + i);
 
                for (i = 0; i < num_fake_zones; i++)
                        fake_zones[i].init(ZINFO_SLOTS - num_fake_zones + i);
@@ -2179,7 +2267,7 @@ zone_init(
        vm_offset_t     zone_max;
 
        retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
        vm_offset_t     zone_max;
 
        retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
-                              FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT,
+                              FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT | VM_MAKE_TAG(VM_KERN_MEMORY_ZONE),
                               &zone_map);
 
        if (retval != KERN_SUCCESS)
                               &zone_map);
 
        if (retval != KERN_SUCCESS)
@@ -2259,7 +2347,7 @@ zone_page_table_expand(zone_page_index_t pindex)
                struct zone_page_table_entry *entry_array;
 
                if (kmem_alloc_kobject(zone_map, &second_level_array,
                struct zone_page_table_entry *entry_array;
 
                if (kmem_alloc_kobject(zone_map, &second_level_array,
-                                                          second_level_size) != KERN_SUCCESS) {
+                                                          second_level_size, VM_KERN_MEMORY_OSFMK) != KERN_SUCCESS) {
                        panic("zone_page_table_expand");
                }
                zone_map_table_page_count += (second_level_size / PAGE_SIZE);
                        panic("zone_page_table_expand");
                }
                zone_map_table_page_count += (second_level_size / PAGE_SIZE);
@@ -2324,6 +2412,7 @@ zalloc_internal(
 #endif
        thread_t thr = current_thread();
        boolean_t       check_poison = FALSE;
 #endif
        thread_t thr = current_thread();
        boolean_t       check_poison = FALSE;
+       boolean_t       set_doing_alloc_with_vm_priv = FALSE;
 
 #if CONFIG_ZLEAKS
        uint32_t        zleak_tracedepth = 0;  /* log this allocation if nonzero */
 
 #if CONFIG_ZLEAKS
        uint32_t        zleak_tracedepth = 0;  /* log this allocation if nonzero */
@@ -2395,21 +2484,35 @@ zalloc_internal(
 
        while ((addr == 0) && canblock) {
                /*
 
        while ((addr == 0) && canblock) {
                /*
-                *      If nothing was there, try to get more
+                * zone is empty, try to expand it
+                * 
+                * Note that we now allow up to 2 threads (1 vm_privliged and 1 non-vm_privliged)
+                * to expand the zone concurrently...  this is necessary to avoid stalling
+                * vm_privileged threads running critical code necessary to continue compressing/swapping
+                * pages (i.e. making new free pages) from stalling behind non-vm_privileged threads
+                * waiting to acquire free pages when the vm_page_free_count is below the
+                * vm_page_free_reserved limit.
                 */
                 */
-               if (zone->doing_alloc) {
+               if ((zone->doing_alloc_without_vm_priv || zone->doing_alloc_with_vm_priv) &&
+                   (((thr->options & TH_OPT_VMPRIV) == 0) || zone->doing_alloc_with_vm_priv)) {
                        /*
                        /*
-                        *      Someone is allocating memory for this zone.
-                        *      Wait for it to show up, then try again.
+                        * This is a non-vm_privileged thread and a non-vm_privileged or
+                        * a vm_privileged thread is already expanding the zone...
+                        *    OR
+                        * this is a vm_privileged thread and a vm_privileged thread is
+                        * already expanding the zone...
+                        *
+                        * In either case wait for a thread to finish, then try again.
                         */
                        zone->waiting = TRUE;
                        zone_sleep(zone);
                } else if (zone->doing_gc) {
                         */
                        zone->waiting = TRUE;
                        zone_sleep(zone);
                } else if (zone->doing_gc) {
-                       /* zone_gc() is running. Since we need an element
+                       /*
+                        * zone_gc() is running. Since we need an element
                         * from the free list that is currently being
                         * from the free list that is currently being
-                        * collected, set the waiting bit and try to
-                        * interrupt the GC process, and try again
-                        * when we obtain the lock.
+                        * collected, set the waiting bit and 
+                        * wait for the GC process to finish
+                        * before trying again
                         */
                        zone->waiting = TRUE;
                        zone_sleep(zone);
                         */
                        zone->waiting = TRUE;
                        zone_sleep(zone);
@@ -2445,7 +2548,12 @@ zalloc_internal(
                                        panic("zalloc: zone \"%s\" empty.", zone->zone_name);
                                }
                        }
                                        panic("zalloc: zone \"%s\" empty.", zone->zone_name);
                                }
                        }
-                       zone->doing_alloc = TRUE;
+                       if ((thr->options & TH_OPT_VMPRIV)) {
+                               zone->doing_alloc_with_vm_priv = TRUE;
+                               set_doing_alloc_with_vm_priv = TRUE;
+                       } else {
+                               zone->doing_alloc_without_vm_priv = TRUE;
+                       }
                        unlock_zone(zone);
 
                        for (;;) {
                        unlock_zone(zone);
 
                        for (;;) {
@@ -2460,7 +2568,7 @@ zalloc_internal(
                                if (zone->noencrypt)
                                        zflags |= KMA_NOENCRYPT;
                                
                                if (zone->noencrypt)
                                        zflags |= KMA_NOENCRYPT;
                                
-                               retval = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags);
+                               retval = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags, VM_KERN_MEMORY_ZONE);
                                if (retval == KERN_SUCCESS) {
 #if    ZONE_ALIAS_ADDR
                                        if (alloc_size == PAGE_SIZE)
                                if (retval == KERN_SUCCESS) {
 #if    ZONE_ALIAS_ADDR
                                        if (alloc_size == PAGE_SIZE)
@@ -2485,7 +2593,6 @@ zalloc_internal(
                                                }       
                                        }
 #endif /* CONFIG_ZLEAKS */
                                                }       
                                        }
 #endif /* CONFIG_ZLEAKS */
-                                       ZONE_PAGE_COUNT_INCR(zone, (alloc_size / PAGE_SIZE));
                                        zcram(zone, space, alloc_size);
                                        
                                        break;
                                        zcram(zone, space, alloc_size);
                                        
                                        break;
@@ -2518,9 +2625,14 @@ zalloc_internal(
                                }
                        }
                        lock_zone(zone);
                                }
                        }
                        lock_zone(zone);
-                       zone->doing_alloc = FALSE; 
+
+                       if (set_doing_alloc_with_vm_priv == TRUE)
+                               zone->doing_alloc_with_vm_priv = FALSE;
+                       else
+                               zone->doing_alloc_without_vm_priv = FALSE; 
+                       
                        if (zone->waiting) {
                        if (zone->waiting) {
-                               zone->waiting = FALSE;
+                               zone->waiting = FALSE;
                                zone_wakeup(zone);
                        }
                        addr = try_alloc_from_zone(zone, &check_poison);
                                zone_wakeup(zone);
                        }
                        addr = try_alloc_from_zone(zone, &check_poison);
@@ -3089,6 +3201,13 @@ zone_change(
                        break;
                case Z_ALIGNMENT_REQUIRED:
                        zone->alignment_required = value;
                        break;
                case Z_ALIGNMENT_REQUIRED:
                        zone->alignment_required = value;
+                       /*
+                        * Disable the page list optimization here to provide
+                        * more of an alignment guarantee. This prevents
+                        * the alignment from being modified by the metadata stored
+                        * at the beginning of the page.
+                        */
+                       zone->use_page_list = FALSE;
 #if    ZONE_DEBUG                      
                        zone_debug_disable(zone);
 #endif
 #if    ZONE_DEBUG                      
                        zone_debug_disable(zone);
 #endif
@@ -3330,7 +3449,7 @@ zone_page_free_element(
 }
 
 
 }
 
 
-
+#define ZONEGC_SMALL_ELEMENT_SIZE      4096
 
 struct {
        uint64_t        zgc_invoked;
 
 struct {
        uint64_t        zgc_invoked;
@@ -3402,7 +3521,7 @@ zone_gc(boolean_t all_zones)
                if (!z->collectable)
                        continue;
 
                if (!z->collectable)
                        continue;
 
-               if (all_zones == FALSE && z->elem_size < PAGE_SIZE && !z->use_page_list)
+               if (all_zones == FALSE && z->elem_size < ZONEGC_SMALL_ELEMENT_SIZE && !z->use_page_list)
                        continue;
 
                lock_zone(z);
                        continue;
 
                lock_zone(z);
@@ -3878,14 +3997,14 @@ task_zone_info(
 
        names_size = round_page(max_zones * sizeof *names);
        kr = kmem_alloc_pageable(ipc_kernel_map,
 
        names_size = round_page(max_zones * sizeof *names);
        kr = kmem_alloc_pageable(ipc_kernel_map,
-                                &names_addr, names_size);
+                                &names_addr, names_size, VM_KERN_MEMORY_IPC);
        if (kr != KERN_SUCCESS)
                return kr;
        names = (mach_zone_name_t *) names_addr;
 
        info_size = round_page(max_zones * sizeof *info);
        kr = kmem_alloc_pageable(ipc_kernel_map,
        if (kr != KERN_SUCCESS)
                return kr;
        names = (mach_zone_name_t *) names_addr;
 
        info_size = round_page(max_zones * sizeof *info);
        kr = kmem_alloc_pageable(ipc_kernel_map,
-                                &info_addr, info_size);
+                                &info_addr, info_size, VM_KERN_MEMORY_IPC);
        if (kr != KERN_SUCCESS) {
                kmem_free(ipc_kernel_map,
                          names_addr, names_size);
        if (kr != KERN_SUCCESS) {
                kmem_free(ipc_kernel_map,
                          names_addr, names_size);
@@ -3916,7 +4035,7 @@ task_zone_info(
                zn->mzn_name[sizeof zn->mzn_name - 1] = '\0';
 
                zi->tzi_count = (uint64_t)zcopy.count;
                zn->mzn_name[sizeof zn->mzn_name - 1] = '\0';
 
                zi->tzi_count = (uint64_t)zcopy.count;
-               zi->tzi_cur_size = (uint64_t)zcopy.cur_size;
+               zi->tzi_cur_size = ptoa_64(zcopy.page_count);
                zi->tzi_max_size = (uint64_t)zcopy.max_size;
                zi->tzi_elem_size = (uint64_t)zcopy.elem_size;
                zi->tzi_alloc_size = (uint64_t)zcopy.alloc_size;
                zi->tzi_max_size = (uint64_t)zcopy.max_size;
                zi->tzi_elem_size = (uint64_t)zcopy.elem_size;
                zi->tzi_alloc_size = (uint64_t)zcopy.alloc_size;
@@ -3976,7 +4095,7 @@ task_zone_info(
                bzero((char *) (names_addr + used), names_size - used);
 
        kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
                bzero((char *) (names_addr + used), names_size - used);
 
        kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
-                          (vm_map_size_t)names_size, TRUE, &copy);
+                          (vm_map_size_t)used, TRUE, &copy);
        assert(kr == KERN_SUCCESS);
 
        *namesp = (mach_zone_name_t *) copy;
        assert(kr == KERN_SUCCESS);
 
        *namesp = (mach_zone_name_t *) copy;
@@ -3988,7 +4107,7 @@ task_zone_info(
                bzero((char *) (info_addr + used), info_size - used);
 
        kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
                bzero((char *) (info_addr + used), info_size - used);
 
        kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
-                          (vm_map_size_t)info_size, TRUE, &copy);
+                          (vm_map_size_t)used, TRUE, &copy);
        assert(kr == KERN_SUCCESS);
 
        *infop = (task_zone_info_t *) copy;
        assert(kr == KERN_SUCCESS);
 
        *infop = (task_zone_info_t *) copy;
@@ -4019,13 +4138,34 @@ mach_zone_info(
        mach_msg_type_number_t  *namesCntp,
        mach_zone_info_array_t  *infop,
        mach_msg_type_number_t  *infoCntp)
        mach_msg_type_number_t  *namesCntp,
        mach_zone_info_array_t  *infop,
        mach_msg_type_number_t  *infoCntp)
+{
+       return (mach_memory_info(host, namesp, namesCntp, infop, infoCntp, NULL, NULL));
+}
+
+kern_return_t
+mach_memory_info(
+       host_priv_t             host,
+       mach_zone_name_array_t  *namesp,
+       mach_msg_type_number_t  *namesCntp,
+       mach_zone_info_array_t  *infop,
+       mach_msg_type_number_t  *infoCntp,
+       mach_memory_info_array_t *memoryInfop,
+       mach_msg_type_number_t   *memoryInfoCntp)
 {
        mach_zone_name_t        *names;
        vm_offset_t             names_addr;
        vm_size_t               names_size;
 {
        mach_zone_name_t        *names;
        vm_offset_t             names_addr;
        vm_size_t               names_size;
+
        mach_zone_info_t        *info;
        vm_offset_t             info_addr;
        vm_size_t               info_size;
        mach_zone_info_t        *info;
        vm_offset_t             info_addr;
        vm_size_t               info_size;
+
+       mach_memory_info_t      *memory_info;
+       vm_offset_t             memory_info_addr;
+       vm_size_t               memory_info_size;
+       vm_size_t               memory_info_vmsize;
+        unsigned int           num_sites;
+
        unsigned int            max_zones, i;
        zone_t                  z;
        mach_zone_name_t        *zn;
        unsigned int            max_zones, i;
        zone_t                  z;
        mach_zone_name_t        *zn;
@@ -4055,22 +4195,49 @@ mach_zone_info(
 
        names_size = round_page(max_zones * sizeof *names);
        kr = kmem_alloc_pageable(ipc_kernel_map,
 
        names_size = round_page(max_zones * sizeof *names);
        kr = kmem_alloc_pageable(ipc_kernel_map,
-                                &names_addr, names_size);
+                                &names_addr, names_size, VM_KERN_MEMORY_IPC);
        if (kr != KERN_SUCCESS)
                return kr;
        names = (mach_zone_name_t *) names_addr;
 
        info_size = round_page(max_zones * sizeof *info);
        kr = kmem_alloc_pageable(ipc_kernel_map,
        if (kr != KERN_SUCCESS)
                return kr;
        names = (mach_zone_name_t *) names_addr;
 
        info_size = round_page(max_zones * sizeof *info);
        kr = kmem_alloc_pageable(ipc_kernel_map,
-                                &info_addr, info_size);
+                                &info_addr, info_size, VM_KERN_MEMORY_IPC);
        if (kr != KERN_SUCCESS) {
                kmem_free(ipc_kernel_map,
                          names_addr, names_size);
                return kr;
        }
        if (kr != KERN_SUCCESS) {
                kmem_free(ipc_kernel_map,
                          names_addr, names_size);
                return kr;
        }
-
        info = (mach_zone_info_t *) info_addr;
 
        info = (mach_zone_info_t *) info_addr;
 
+       num_sites = 0;
+       memory_info_addr = 0;
+       if (memoryInfop && memoryInfoCntp)
+       {
+               num_sites = VM_KERN_MEMORY_COUNT + VM_KERN_COUNTER_COUNT;
+               memory_info_size = num_sites * sizeof(*info);
+               memory_info_vmsize = round_page(memory_info_size);
+               kr = kmem_alloc_pageable(ipc_kernel_map,
+                                        &memory_info_addr, memory_info_vmsize, VM_KERN_MEMORY_IPC);
+               if (kr != KERN_SUCCESS) {
+                       kmem_free(ipc_kernel_map,
+                                 names_addr, names_size);
+                       kmem_free(ipc_kernel_map,
+                                 info_addr, info_size);
+                       return kr;
+               }
+
+               kr = vm_map_wire(ipc_kernel_map, memory_info_addr, memory_info_addr + memory_info_vmsize,
+                                    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_IPC), FALSE);
+               assert(kr == KERN_SUCCESS);
+
+               memory_info = (mach_memory_info_t *) memory_info_addr;
+               vm_page_diagnose(memory_info, num_sites);
+
+               kr = vm_map_unwire(ipc_kernel_map, memory_info_addr, memory_info_addr + memory_info_vmsize, FALSE);
+               assert(kr == KERN_SUCCESS);
+       }
+
        zn = &names[0];
        zi = &info[0];
 
        zn = &names[0];
        zi = &info[0];
 
@@ -4093,7 +4260,7 @@ mach_zone_info(
                zn->mzn_name[sizeof zn->mzn_name - 1] = '\0';
 
                zi->mzi_count = (uint64_t)zcopy.count;
                zn->mzn_name[sizeof zn->mzn_name - 1] = '\0';
 
                zi->mzi_count = (uint64_t)zcopy.count;
-               zi->mzi_cur_size = (uint64_t)zcopy.cur_size;
+               zi->mzi_cur_size = ptoa_64(zcopy.page_count);
                zi->mzi_max_size = (uint64_t)zcopy.max_size;
                zi->mzi_elem_size = (uint64_t)zcopy.elem_size;
                zi->mzi_alloc_size = (uint64_t)zcopy.alloc_size;
                zi->mzi_max_size = (uint64_t)zcopy.max_size;
                zi->mzi_elem_size = (uint64_t)zcopy.elem_size;
                zi->mzi_alloc_size = (uint64_t)zcopy.alloc_size;
@@ -4137,7 +4304,7 @@ mach_zone_info(
                bzero((char *) (names_addr + used), names_size - used);
 
        kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
                bzero((char *) (names_addr + used), names_size - used);
 
        kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
-                          (vm_map_size_t)names_size, TRUE, &copy);
+                          (vm_map_size_t)used, TRUE, &copy);
        assert(kr == KERN_SUCCESS);
 
        *namesp = (mach_zone_name_t *) copy;
        assert(kr == KERN_SUCCESS);
 
        *namesp = (mach_zone_name_t *) copy;
@@ -4149,12 +4316,22 @@ mach_zone_info(
                bzero((char *) (info_addr + used), info_size - used);
 
        kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
                bzero((char *) (info_addr + used), info_size - used);
 
        kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
-                          (vm_map_size_t)info_size, TRUE, &copy);
+                          (vm_map_size_t)used, TRUE, &copy);
        assert(kr == KERN_SUCCESS);
 
        *infop = (mach_zone_info_t *) copy;
        *infoCntp = max_zones;
 
        assert(kr == KERN_SUCCESS);
 
        *infop = (mach_zone_info_t *) copy;
        *infoCntp = max_zones;
 
+       if (memoryInfop && memoryInfoCntp)
+       {
+               kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)memory_info_addr,
+                                  (vm_map_size_t)memory_info_size, TRUE, &copy);
+               assert(kr == KERN_SUCCESS);
+
+               *memoryInfop = (mach_memory_info_t *) copy;
+               *memoryInfoCntp = num_sites;
+       }
+
        return KERN_SUCCESS;
 }
 
        return KERN_SUCCESS;
 }
 
@@ -4213,14 +4390,14 @@ host_zone_info(
 
        names_size = round_page(max_zones * sizeof *names);
        kr = kmem_alloc_pageable(ipc_kernel_map,
 
        names_size = round_page(max_zones * sizeof *names);
        kr = kmem_alloc_pageable(ipc_kernel_map,
-                                &names_addr, names_size);
+                                &names_addr, names_size, VM_KERN_MEMORY_IPC);
        if (kr != KERN_SUCCESS)
                return kr;
        names = (zone_name_t *) names_addr;
 
        info_size = round_page(max_zones * sizeof *info);
        kr = kmem_alloc_pageable(ipc_kernel_map,
        if (kr != KERN_SUCCESS)
                return kr;
        names = (zone_name_t *) names_addr;
 
        info_size = round_page(max_zones * sizeof *info);
        kr = kmem_alloc_pageable(ipc_kernel_map,
-                                &info_addr, info_size);
+                                &info_addr, info_size, VM_KERN_MEMORY_IPC);
        if (kr != KERN_SUCCESS) {
                kmem_free(ipc_kernel_map,
                          names_addr, names_size);
        if (kr != KERN_SUCCESS) {
                kmem_free(ipc_kernel_map,
                          names_addr, names_size);
@@ -4251,7 +4428,7 @@ host_zone_info(
                zn->zn_name[sizeof zn->zn_name - 1] = '\0';
 
                zi->zi_count = zcopy.count;
                zn->zn_name[sizeof zn->zn_name - 1] = '\0';
 
                zi->zi_count = zcopy.count;
-               zi->zi_cur_size = zcopy.cur_size;
+               zi->zi_cur_size = ptoa(zcopy.page_count);
                zi->zi_max_size = zcopy.max_size;
                zi->zi_elem_size = zcopy.elem_size;
                zi->zi_alloc_size = zcopy.alloc_size;
                zi->zi_max_size = zcopy.max_size;
                zi->zi_elem_size = zcopy.elem_size;
                zi->zi_alloc_size = zcopy.alloc_size;
@@ -4284,7 +4461,7 @@ host_zone_info(
                bzero((char *) (names_addr + used), names_size - used);
 
        kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
                bzero((char *) (names_addr + used), names_size - used);
 
        kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
-                          (vm_map_size_t)names_size, TRUE, &copy);
+                          (vm_map_size_t)used, TRUE, &copy);
        assert(kr == KERN_SUCCESS);
 
        *namesp = (zone_name_t *) copy;
        assert(kr == KERN_SUCCESS);
 
        *namesp = (zone_name_t *) copy;
@@ -4295,7 +4472,7 @@ host_zone_info(
                bzero((char *) (info_addr + used), info_size - used);
 
        kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
                bzero((char *) (info_addr + used), info_size - used);
 
        kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
-                          (vm_map_size_t)info_size, TRUE, &copy);
+                          (vm_map_size_t)used, TRUE, &copy);
        assert(kr == KERN_SUCCESS);
 
        *infop = (zone_info_t *) copy;
        assert(kr == KERN_SUCCESS);
 
        *infop = (zone_info_t *) copy;