osfmk/kern/zalloc.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*
  23  * @OSF_COPYRIGHT@
  24  */
  25 /*
  26  * Mach Operating System
  27  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  28  * All Rights Reserved.
  29  *
  30  * Permission to use, copy, modify and distribute this software and its
  31  * documentation is hereby granted, provided that both the copyright
  32  * notice and this permission notice appear in all copies of the
  33  * software, derivative works or modified versions, and any portions
  34  * thereof, and that both notices appear in supporting documentation.
  35  *
  36  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  37  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  38  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  39  *
  40  * Carnegie Mellon requests users of this software to return to
  41  *
  42  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  43  *  School of Computer Science
  44  *  Carnegie Mellon University
  45  *  Pittsburgh PA 15213-3890
  46  *
  47  * any improvements or extensions that they make and grant Carnegie Mellon
  48  * the rights to redistribute these changes.
  49  */
  50 /*
  51  */
  52 /*
  53  *      File:   kern/zalloc.c
  54  *      Author: Avadis Tevanian, Jr.
  55  *
  56  *      Zone-based memory allocator.  A zone is a collection of fixed size
  57  *      data blocks for which quick allocation/deallocation is possible.
  58  */
  59 #include <zone_debug.h>
  60 #include <norma_vm.h>
  61 #include <mach_kdb.h>
  62 #include <kern/ast.h>
  63 #include <kern/assert.h>
  64 #include <kern/macro_help.h>
  65 #include <kern/sched.h>
  66 #include <kern/lock.h>
  67 #include <kern/sched_prim.h>
  68 #include <kern/misc_protos.h>
  69 #include <kern/zalloc.h>
  70 #include <mach/vm_param.h>
  71 #include <vm/vm_kern.h>
  72 #include <machine/machparam.h>
  73
  74
  75 #if     MACH_ASSERT
  76 /* Detect use of zone elt after freeing it by two methods:
  77  * (1) Range-check the free-list "next" ptr for sanity.
  78  * (2) Store the ptr in two different words, and compare them against
  79  *     each other when re-using the zone elt, to detect modifications;
  80  */
  81
  82 #if defined(__alpha)
  83
  84 #define is_kernel_data_addr(a)                                          \
  85                 (!(a) || IS_SYS_VA(a) && !((a) & (sizeof(long)-1)))
  86
  87 #else /* !defined(__alpha) */
  88
  89 #define is_kernel_data_addr(a)                                          \
  90                 (!(a) || (a) >= VM_MIN_KERNEL_ADDRESS && !((a) & 0x3))
  91
  92 #endif /* defined(__alpha) */
  93
  94 /* Should we set all words of the zone element to an illegal address
  95  * when it is freed, to help catch usage after freeing?  The down-side
  96  * is that this obscures the identity of the freed element.
  97  */
  98 boolean_t zfree_clear = FALSE;
  99
 100 #define ADD_TO_ZONE(zone, element)                                      \
 101 MACRO_BEGIN                                                             \
 102                 if (zfree_clear)                                        \
 103                 {   int i;                                              \
 104                     for (i=1;                                           \
 105                          i < zone->elem_size/sizeof(vm_offset_t) - 1;   \
 106                          i++)                                           \
 107                     ((vm_offset_t *)(element))[i] = 0xdeadbeef;         \
 108                 }                                                       \
 109                 ((vm_offset_t *)(element))[0] = (zone)->free_elements;  \
 110                 (zone)->free_elements = (vm_offset_t) (element);        \
 111                 (zone)->count--;                                        \
 112 MACRO_END
 113
 114 #define REMOVE_FROM_ZONE(zone, ret, type)                               \
 115 MACRO_BEGIN                                                             \
 116         (ret) = (type) (zone)->free_elements;                           \
 117         if ((ret) != (type) 0) {                                        \
 118             if (!is_kernel_data_addr(((vm_offset_t *)(ret))[0])) {      \
 119                 panic("A freed zone element has been modified.\n");     \
 120             }                                                           \
 121             (zone)->count++;                                            \
 122             (zone)->free_elements = *((vm_offset_t *)(ret));            \
 123         }                                                               \
 124 MACRO_END
 125 #else   /* MACH_ASSERT */
 126
 127 #define ADD_TO_ZONE(zone, element)                                      \
 128 MACRO_BEGIN                                                             \
 129                 *((vm_offset_t *)(element)) = (zone)->free_elements;    \
 130                 (zone)->free_elements = (vm_offset_t) (element);        \
 131                 (zone)->count--;                                        \
 132 MACRO_END
 133
 134 #define REMOVE_FROM_ZONE(zone, ret, type)                               \
 135 MACRO_BEGIN                                                             \
 136         (ret) = (type) (zone)->free_elements;                           \
 137         if ((ret) != (type) 0) {                                        \
 138                 (zone)->count++;                                        \
 139                 (zone)->free_elements = *((vm_offset_t *)(ret));        \
 140         }                                                               \
 141 MACRO_END
 142
 143 #endif  /* MACH_ASSERT */
 144
 145 #if     ZONE_DEBUG
 146 #define zone_debug_enabled(z) z->active_zones.next
 147 #endif  /* ZONE_DEBUG */
 148
 149 /*
 150  * Support for garbage collection of unused zone pages:
 151  */
 152
 153 struct zone_page_table_entry {
 154         struct  zone_page_table_entry   *next;
 155         short   in_free_list;
 156         short   alloc_count;
 157 };
 158
 159 extern struct zone_page_table_entry * zone_page_table;
 160
 161 #define lock_zone_page_table() simple_lock(&zone_page_table_lock)
 162 #define unlock_zone_page_table() simple_unlock(&zone_page_table_lock)
 163
 164 #define zone_page(addr) \
 165     (&(zone_page_table[(atop(((vm_offset_t)addr) - zone_map_min_address))]))
 166
 167 /* Forwards */
 168 void            zone_page_init(
 169                                 vm_offset_t     addr,
 170                                 vm_size_t       size,
 171                                 int             value);
 172
 173 void            zone_page_alloc(
 174                                 vm_offset_t     addr,
 175                                 vm_size_t       size);
 176
 177 void            zone_add_free_page_list(
 178                                 struct zone_page_table_entry    **free_list,
 179                                 vm_offset_t     addr,
 180                                 vm_size_t       size);
 181 void            zone_page_dealloc(
 182                                 vm_offset_t     addr,
 183                                 vm_size_t       size);
 184
 185 void            zone_page_in_use(
 186                                 vm_offset_t     addr,
 187                                 vm_size_t       size);
 188
 189 void            zone_page_free(
 190                                 vm_offset_t     addr,
 191                                 vm_size_t       size);
 192
 193 boolean_t       zone_page_collectable(
 194                                 vm_offset_t     addr,
 195                                 vm_size_t       size);
 196
 197 void            zone_page_keep(
 198                                 vm_offset_t     addr,
 199                                 vm_size_t       size);
 200
 201 #if     ZONE_DEBUG && MACH_KDB
 202 int             zone_count(
 203                                 zone_t          z,
 204                                 int             tail);
 205 #endif  /* ZONE_DEBUG && MACH_KDB */
 206
 207 vm_map_t        zone_map = VM_MAP_NULL;
 208
 209 zone_t          zone_zone = ZONE_NULL;  /* the zone containing other zones */
 210
 211 /*
 212  *      The VM system gives us an initial chunk of memory.
 213  *      It has to be big enough to allocate the zone_zone
 214  */
 215
 216 vm_offset_t     zdata;
 217 vm_size_t       zdata_size;
 218
 219 #define lock_zone(zone)                                 \
 220 MACRO_BEGIN                                             \
 221         simple_lock(&zone->lock);                       \
 222 MACRO_END
 223
 224 #define unlock_zone(zone)                               \
 225 MACRO_BEGIN                                             \
 226         simple_unlock(&zone->lock);                     \
 227 MACRO_END
 228
 229 #define lock_zone_init(zone)                            \
 230 MACRO_BEGIN                                             \
 231         simple_lock_init(&zone->lock, ETAP_MISC_ZONE);  \
 232 MACRO_END
 233
 234 #define lock_try_zone(zone)     simple_lock_try(&zone->lock)
 235
 236 kern_return_t           zget_space(
 237                                 vm_offset_t size,
 238                                 vm_offset_t *result);
 239
 240 decl_simple_lock_data(,zget_space_lock)
 241 vm_offset_t     zalloc_next_space;
 242 vm_offset_t     zalloc_end_of_space;
 243 vm_size_t       zalloc_wasted_space;
 244
 245 /*
 246  *      Garbage collection map information
 247  */
 248 decl_simple_lock_data(,         zone_page_table_lock)
 249 struct zone_page_table_entry *  zone_page_table;
 250 vm_offset_t                     zone_map_min_address;
 251 vm_offset_t                     zone_map_max_address;
 252 integer_t                       zone_pages;
 253
 254 /*
 255  *      Exclude more than one concurrent garbage collection
 256  */
 257 decl_mutex_data(,               zone_gc_lock)
 258
 259 #define from_zone_map(addr) \
 260         ((vm_offset_t)(addr) >= zone_map_min_address && \
 261          (vm_offset_t)(addr) <  zone_map_max_address)
 262
 263 #define ZONE_PAGE_USED  0
 264 #define ZONE_PAGE_UNUSED -1
 265
 266
 267 /*
 268  *      Protects first_zone, last_zone, num_zones,
 269  *      and the next_zone field of zones.
 270  */
 271 decl_simple_lock_data(, all_zones_lock)
 272 zone_t                  first_zone;
 273 zone_t                  *last_zone;
 274 int                     num_zones;
 275
 276 /*
 277  *      zinit initializes a new zone.  The zone data structures themselves
 278  *      are stored in a zone, which is initially a static structure that
 279  *      is initialized by zone_init.
 280  */
 281 zone_t
 282 zinit(
 283         vm_size_t       size,           /* the size of an element */
 284         vm_size_t       max,            /* maximum memory to use */
 285         vm_size_t       alloc,          /* allocation size */
 286         char            *name)          /* a name for the zone */
 287 {
 288         zone_t          z;
 289
 290         if (zone_zone == ZONE_NULL) {
 291                 if (zget_space(sizeof(struct zone), (vm_offset_t *)&z)
 292                     != KERN_SUCCESS)
 293                         return(ZONE_NULL);
 294         } else
 295                 z = (zone_t) zalloc(zone_zone);
 296         if (z == ZONE_NULL)
 297                 return(ZONE_NULL);
 298
 299         /*
 300          *      Round off all the parameters appropriately.
 301          */
 302         if (size < sizeof(z->free_elements))
 303                 size = sizeof(z->free_elements);
 304         size = ((size-1)  + sizeof(z->free_elements)) -
 305                 ((size-1) % sizeof(z->free_elements));
 306         if (alloc == 0)
 307                 alloc = PAGE_SIZE;
 308         alloc = round_page(alloc);
 309         max   = round_page(max);
 310         /*
 311          * We look for an allocation size with least fragmentation
 312          * in the range of 1 - 5 pages.  This size will be used unless
 313          * the user suggestion is larger AND has less fragmentation
 314          */
 315         {       vm_size_t best, waste; unsigned int i;
 316                 best  = PAGE_SIZE;
 317                 waste = best % size;
 318                 for (i = 2; i <= 5; i++){       vm_size_t tsize, twaste;
 319                         tsize  = i * PAGE_SIZE;
 320                         twaste = tsize % size;
 321                         if (twaste < waste)
 322                                 best = tsize, waste = twaste;
 323                 }
 324                 if (alloc <= best || (alloc % size >= waste))
 325                         alloc = best;
 326         }
 327         if (max && (max < alloc))
 328                 max = alloc;
 329
 330         z->free_elements = 0;
 331         z->cur_size = 0;
 332         z->max_size = max;
 333         z->elem_size = size;
 334         z->alloc_size = alloc;
 335         z->zone_name = name;
 336         z->count = 0;
 337         z->doing_alloc = FALSE;
 338         z->exhaustible = FALSE;
 339         z->collectable = TRUE;
 340         z->allows_foreign = FALSE;
 341         z->expandable  = TRUE;
 342         z->waiting = FALSE;
 343
 344 #if     ZONE_DEBUG
 345         z->active_zones.next = z->active_zones.prev = 0;
 346         zone_debug_enable(z);
 347 #endif  /* ZONE_DEBUG */
 348         lock_zone_init(z);
 349
 350         /*
 351          *      Add the zone to the all-zones list.
 352          */
 353
 354         z->next_zone = ZONE_NULL;
 355         simple_lock(&all_zones_lock);
 356         *last_zone = z;
 357         last_zone = &z->next_zone;
 358         num_zones++;
 359         simple_unlock(&all_zones_lock);
 360
 361         return(z);
 362 }
 363
 364 /*
 365  *      Cram the given memory into the specified zone.
 366  */
 367 void
 368 zcram(
 369         register zone_t         zone,
 370         vm_offset_t             newmem,
 371         vm_size_t               size)
 372 {
 373         register vm_size_t      elem_size;
 374
 375         /* Basic sanity checks */
 376         assert(zone != ZONE_NULL && newmem != (vm_offset_t)0);
 377         assert(!zone->collectable || zone->allows_foreign
 378                 || (from_zone_map(newmem) && from_zone_map(newmem+size-1)));
 379
 380         elem_size = zone->elem_size;
 381
 382         lock_zone(zone);
 383         while (size >= elem_size) {
 384                 ADD_TO_ZONE(zone, newmem);
 385                 if (from_zone_map(newmem))
 386                         zone_page_alloc(newmem, elem_size);
 387                 zone->count++;  /* compensate for ADD_TO_ZONE */
 388                 size -= elem_size;
 389                 newmem += elem_size;
 390                 zone->cur_size += elem_size;
 391         }
 392         unlock_zone(zone);
 393 }
 394
 395 /*
 396  * Contiguous space allocator for non-paged zones. Allocates "size" amount
 397  * of memory from zone_map.
 398  */
 399
 400 kern_return_t
 401 zget_space(
 402         vm_offset_t size,
 403         vm_offset_t *result)
 404 {
 405         vm_offset_t     new_space = 0;
 406         vm_size_t       space_to_add;
 407
 408         simple_lock(&zget_space_lock);
 409         while ((zalloc_next_space + size) > zalloc_end_of_space) {
 410                 /*
 411                  *      Add at least one page to allocation area.
 412                  */
 413
 414                 space_to_add = round_page(size);
 415
 416                 if (new_space == 0) {
 417                         kern_return_t retval;
 418                         /*
 419                          *      Memory cannot be wired down while holding
 420                          *      any locks that the pageout daemon might
 421                          *      need to free up pages.  [Making the zget_space
 422                          *      lock a complex lock does not help in this
 423                          *      regard.]
 424                          *
 425                          *      Unlock and allocate memory.  Because several
 426                          *      threads might try to do this at once, don't
 427                          *      use the memory before checking for available
 428                          *      space again.
 429                          */
 430
 431                         simple_unlock(&zget_space_lock);
 432
 433                         retval = kernel_memory_allocate(zone_map, &new_space,
 434                                 space_to_add, 0, KMA_KOBJECT|KMA_NOPAGEWAIT);
 435                         if (retval != KERN_SUCCESS)
 436                                 return(retval);
 437                         zone_page_init(new_space, space_to_add,
 438                                                         ZONE_PAGE_USED);
 439                         simple_lock(&zget_space_lock);
 440                         continue;
 441                 }
 442
 443
 444                 /*
 445                  *      Memory was allocated in a previous iteration.
 446                  *
 447                  *      Check whether the new region is contiguous
 448                  *      with the old one.
 449                  */
 450
 451                 if (new_space != zalloc_end_of_space) {
 452                         /*
 453                          *      Throw away the remainder of the
 454                          *      old space, and start a new one.
 455                          */
 456                         zalloc_wasted_space +=
 457                                 zalloc_end_of_space - zalloc_next_space;
 458                         zalloc_next_space = new_space;
 459                 }
 460
 461                 zalloc_end_of_space = new_space + space_to_add;
 462
 463                 new_space = 0;
 464         }
 465         *result = zalloc_next_space;
 466         zalloc_next_space += size;
 467         simple_unlock(&zget_space_lock);
 468
 469         if (new_space != 0)
 470                 kmem_free(zone_map, new_space, space_to_add);
 471
 472         return(KERN_SUCCESS);
 473 }
 474
 475
 476 /*
 477  *      Steal memory for the zone package.  Called from
 478  *      vm_page_bootstrap().
 479  */
 480 void
 481 zone_steal_memory(void)
 482 {
 483         zdata_size = round_page(128*sizeof(struct zone));
 484         zdata = pmap_steal_memory(zdata_size);
 485 }
 486
 487
 488 /*
 489  * Fill a zone with enough memory to contain at least nelem elements.
 490  * Memory is obtained with kmem_alloc_wired from the kernel_map.
 491  * Return the number of elements actually put into the zone, which may
 492  * be more than the caller asked for since the memory allocation is
 493  * rounded up to a full page.
 494  */
 495 int
 496 zfill(
 497         zone_t  zone,
 498         int     nelem)
 499 {
 500         kern_return_t   kr;
 501         vm_size_t       size;
 502         vm_offset_t     memory;
 503         int             nalloc;
 504
 505         assert(nelem > 0);
 506         if (nelem <= 0)
 507                 return 0;
 508         size = nelem * zone->elem_size;
 509         size = round_page(size);
 510         kr = kmem_alloc_wired(kernel_map, &memory, size);
 511         if (kr != KERN_SUCCESS)
 512                 return 0;
 513
 514         zone_change(zone, Z_FOREIGN, TRUE);
 515         zcram(zone, memory, size);
 516         nalloc = size / zone->elem_size;
 517         assert(nalloc >= nelem);
 518
 519         return nalloc;
 520 }
 521
 522 /*
 523  *      Initialize the "zone of zones" which uses fixed memory allocated
 524  *      earlier in memory initialization.  zone_bootstrap is called
 525  *      before zone_init.
 526  */
 527 void
 528 zone_bootstrap(void)
 529 {
 530         vm_size_t zone_zone_size;
 531         vm_offset_t zone_zone_space;
 532
 533         simple_lock_init(&all_zones_lock, ETAP_MISC_ZONE_ALL);
 534
 535         first_zone = ZONE_NULL;
 536         last_zone = &first_zone;
 537         num_zones = 0;
 538
 539         simple_lock_init(&zget_space_lock, ETAP_MISC_ZONE_GET);
 540         zalloc_next_space = zdata;
 541         zalloc_end_of_space = zdata + zdata_size;
 542         zalloc_wasted_space = 0;
 543
 544         /* assertion: nobody else called zinit before us */
 545         assert(zone_zone == ZONE_NULL);
 546         zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone),
 547                           sizeof(struct zone), "zones");
 548         zone_change(zone_zone, Z_COLLECT, FALSE);
 549         zone_zone_size = zalloc_end_of_space - zalloc_next_space;
 550         zget_space(zone_zone_size, &zone_zone_space);
 551         zcram(zone_zone, zone_zone_space, zone_zone_size);
 552 }
 553
 554 void
 555 zone_init(
 556         vm_size_t max_zonemap_size)
 557 {
 558         kern_return_t   retval;
 559         vm_offset_t     zone_min;
 560         vm_offset_t     zone_max;
 561         vm_size_t       zone_table_size;
 562
 563         retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
 564                                                 FALSE, TRUE, &zone_map);
 565         if (retval != KERN_SUCCESS)
 566                 panic("zone_init: kmem_suballoc failed");
 567         zone_max = zone_min + round_page(max_zonemap_size);
 568         /*
 569          * Setup garbage collection information:
 570          */
 571         zone_table_size = atop(zone_max - zone_min) *
 572                                 sizeof(struct zone_page_table_entry);
 573         if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table,
 574                              zone_table_size) != KERN_SUCCESS)
 575                 panic("zone_init");
 576         zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size);
 577         zone_pages = atop(zone_max - zone_min);
 578         zone_map_min_address = zone_min;
 579         zone_map_max_address = zone_max;
 580         simple_lock_init(&zone_page_table_lock, ETAP_MISC_ZONE_PTABLE);
 581         mutex_init(&zone_gc_lock, ETAP_NO_TRACE);
 582         zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED);
 583 }
 584
 585
 586 /*
 587  *      zalloc returns an element from the specified zone.
 588  */
 589 vm_offset_t
 590 zalloc_canblock(
 591         register zone_t zone,
 592         boolean_t canblock)
 593 {
 594         vm_offset_t     addr;
 595         kern_return_t retval;
 596
 597         assert(zone != ZONE_NULL);
 598         check_simple_locks();
 599
 600         lock_zone(zone);
 601
 602         REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
 603         while (addr == 0) {
 604                 /*
 605                  *      If nothing was there, try to get more
 606                  */
 607                 if (zone->doing_alloc) {
 608                         if (!canblock) {
 609                           unlock_zone(zone);
 610                           return(0);
 611                         }
 612                         /*
 613                          *      Someone is allocating memory for this zone.
 614                          *      Wait for it to show up, then try again.
 615                          */
 616                         assert_wait((event_t)zone, THREAD_INTERRUPTIBLE);
 617                         zone->waiting = TRUE;
 618                         unlock_zone(zone);
 619                         thread_block((void (*)(void)) 0);
 620                         lock_zone(zone);
 621                 }
 622                 else {
 623                         if ((zone->cur_size + zone->elem_size) >
 624                             zone->max_size) {
 625                                 if (zone->exhaustible)
 626                                         break;
 627                                 if (zone->expandable) {
 628                                         /*
 629                                          * We're willing to overflow certain
 630                                          * zones, but not without complaining.
 631                                          *
 632                                          * This is best used in conjunction
 633                                          * with the collectable flag. What we
 634                                          * want is an assurance we can get the
 635                                          * memory back, assuming there's no
 636                                          * leak.
 637                                          */
 638                                         zone->max_size += (zone->max_size >> 1);
 639                                 } else {
 640                                         unlock_zone(zone);
 641
 642                                         if (!canblock) {
 643                                           return(0);
 644                                         }
 645
 646                                         panic("zalloc: zone \"%s\" empty.", zone->zone_name);
 647                                 }
 648                         }
 649                         zone->doing_alloc = TRUE;
 650                         unlock_zone(zone);
 651
 652                         if (zone->collectable) {
 653                                 vm_offset_t space;
 654                                 vm_size_t alloc_size;
 655
 656                                 if (vm_pool_low())
 657                                         alloc_size =
 658                                           round_page(zone->elem_size);
 659                                 else
 660                                         alloc_size = zone->alloc_size;
 661
 662                                 retval = kernel_memory_allocate(zone_map,
 663                                         &space, alloc_size, 0,
 664                                         KMA_KOBJECT|KMA_NOPAGEWAIT);
 665                                 if (retval == KERN_SUCCESS) {
 666                                         zone_page_init(space, alloc_size,
 667                                                 ZONE_PAGE_USED);
 668                                         zcram(zone, space, alloc_size);
 669                                 } else if (retval != KERN_RESOURCE_SHORTAGE) {
 670                                         /* would like to cause a zone_gc() */
 671
 672                                         if (!canblock) {
 673                                           return(0);
 674                                         }
 675
 676                                         panic("zalloc");
 677                                 }
 678                                 lock_zone(zone);
 679                                 zone->doing_alloc = FALSE;
 680                                 if (zone->waiting) {
 681                                         zone->waiting = FALSE;
 682                                         thread_wakeup((event_t)zone);
 683                                 }
 684                                 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
 685                                 if (addr == 0 &&
 686                                         retval == KERN_RESOURCE_SHORTAGE) {
 687                                         unlock_zone(zone);
 688
 689                                         if (!canblock) {
 690                                           return(0);
 691                                         }
 692
 693                                         VM_PAGE_WAIT();
 694                                         lock_zone(zone);
 695                                 }
 696                         } else {
 697                                 vm_offset_t space;
 698                                 retval = zget_space(zone->elem_size, &space);
 699
 700                                 lock_zone(zone);
 701                                 zone->doing_alloc = FALSE;
 702                                 if (zone->waiting) {
 703                                         zone->waiting = FALSE;
 704                                         thread_wakeup((event_t)zone);
 705                                 }
 706                                 if (retval == KERN_SUCCESS) {
 707                                         zone->count++;
 708                                         zone->cur_size += zone->elem_size;
 709 #if     ZONE_DEBUG
 710                                         if (zone_debug_enabled(zone)) {
 711                                             enqueue_tail(&zone->active_zones, (queue_entry_t)space);
 712                                         }
 713 #endif
 714                                         unlock_zone(zone);
 715                                         zone_page_alloc(space, zone->elem_size);
 716 #if     ZONE_DEBUG
 717                                         if (zone_debug_enabled(zone))
 718                                                 space += sizeof(queue_chain_t);
 719 #endif
 720                                         return(space);
 721                                 }
 722                                 if (retval == KERN_RESOURCE_SHORTAGE) {
 723                                         unlock_zone(zone);
 724
 725                                         if (!canblock) {
 726                                           return(0);
 727                                         }
 728
 729                                         VM_PAGE_WAIT();
 730                                         lock_zone(zone);
 731                                 } else {
 732                                         if (!canblock) {
 733                                           return(0);
 734                                         }
 735
 736                                         panic("zalloc");
 737                                 }
 738                         }
 739                 }
 740                 if (addr == 0)
 741                         REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
 742         }
 743
 744 #if     ZONE_DEBUG
 745         if (addr && zone_debug_enabled(zone)) {
 746                 enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
 747                 addr += sizeof(queue_chain_t);
 748         }
 749 #endif
 750
 751         unlock_zone(zone);
 752         return(addr);
 753 }
 754
 755
 756 vm_offset_t
 757 zalloc(
 758        register zone_t zone)
 759 {
 760   return( zalloc_canblock(zone, TRUE) );
 761 }
 762
 763 vm_offset_t
 764 zalloc_noblock(
 765                register zone_t zone)
 766 {
 767   return( zalloc_canblock(zone, FALSE) );
 768 }
 769
 770
 771 /*
 772  *      zget returns an element from the specified zone
 773  *      and immediately returns nothing if there is nothing there.
 774  *
 775  *      This form should be used when you can not block (like when
 776  *      processing an interrupt).
 777  */
 778 vm_offset_t
 779 zget(
 780         register zone_t zone)
 781 {
 782         register vm_offset_t    addr;
 783
 784         assert( zone != ZONE_NULL );
 785
 786         if (!lock_try_zone(zone))
 787             return ((vm_offset_t)0);
 788
 789         REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
 790 #if     ZONE_DEBUG
 791         if (addr && zone_debug_enabled(zone)) {
 792                 enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
 793                 addr += sizeof(queue_chain_t);
 794         }
 795 #endif  /* ZONE_DEBUG */
 796         unlock_zone(zone);
 797
 798         return(addr);
 799 }
 800
 801 /* Keep this FALSE by default.  Large memory machine run orders of magnitude
 802    slower in debug mode when true.  Use debugger to enable if needed */
 803 boolean_t zone_check = FALSE;
 804
 805 void
 806 zfree(
 807         register zone_t zone,
 808         vm_offset_t     elem)
 809 {
 810
 811 #if MACH_ASSERT
 812         /* Basic sanity checks */
 813         if (zone == ZONE_NULL || elem == (vm_offset_t)0)
 814                 panic("zfree: NULL");
 815         /* zone_gc assumes zones are never freed */
 816         if (zone == zone_zone)
 817                 panic("zfree: freeing to zone_zone breaks zone_gc!");
 818         if (zone->collectable && !zone->allows_foreign &&
 819             (!from_zone_map(elem) || !from_zone_map(elem+zone->elem_size-1)))
 820                 panic("zfree: non-allocated memory in collectable zone!");
 821 #endif
 822
 823         lock_zone(zone);
 824 #if     ZONE_DEBUG
 825         if (zone_debug_enabled(zone)) {
 826                 queue_t tmp_elem;
 827
 828                 elem -= sizeof(queue_chain_t);
 829                 if (zone_check) {
 830                         /* check the zone's consistency */
 831
 832                         for (tmp_elem = queue_first(&zone->active_zones);
 833                              !queue_end(tmp_elem, &zone->active_zones);
 834                              tmp_elem = queue_next(tmp_elem))
 835                                 if (elem == (vm_offset_t)tmp_elem)
 836                                         break;
 837                         if (elem != (vm_offset_t)tmp_elem)
 838                                 panic("zfree()ing element from wrong zone");
 839                 }
 840                 remqueue(&zone->active_zones, (queue_t) elem);
 841         }
 842 #endif  /* ZONE_DEBUG */
 843         if (zone_check) {
 844                 vm_offset_t this;
 845
 846                 /* check the zone's consistency */
 847
 848                 for (this = zone->free_elements;
 849                      this != 0;
 850                      this = * (vm_offset_t *) this)
 851                         if (!pmap_kernel_va(this) || this == elem)
 852                                 panic("zfree");
 853         }
 854         /*
 855          * If elements have one or more pages, and memory is low,
 856          * put it directly back into circulation rather than
 857          * back into a zone, where a non-vm_privileged task can grab it.
 858          * This lessens the impact of a privileged task cycling reserved
 859          * memory into a publicly accessible zone.
 860          */
 861         if (zone->elem_size >= PAGE_SIZE &&
 862             vm_pool_low()){
 863                 assert( !(zone->elem_size & (zone->alloc_size-1)) );
 864                 zone->count--;
 865                 zone->cur_size -= zone->elem_size;
 866                 zone_page_init(elem, zone->elem_size, ZONE_PAGE_UNUSED);
 867                 unlock_zone(zone);
 868                 kmem_free(zone_map, elem, zone->elem_size);
 869                 return;
 870         }
 871         ADD_TO_ZONE(zone, elem);
 872         unlock_zone(zone);
 873 }
 874
 875
 876 /*      Change a zone's flags.
 877  *      This routine must be called immediately after zinit.
 878  */
 879 void
 880 zone_change(
 881         zone_t          zone,
 882         unsigned int    item,
 883         boolean_t       value)
 884 {
 885         assert( zone != ZONE_NULL );
 886         assert( value == TRUE || value == FALSE );
 887
 888         switch(item){
 889                 case Z_EXHAUST:
 890                         zone->exhaustible = value;
 891                         break;
 892                 case Z_COLLECT:
 893                         zone->collectable = value;
 894                         break;
 895                 case Z_EXPAND:
 896                         zone->expandable = value;
 897                         break;
 898                 case Z_FOREIGN:
 899                         zone->allows_foreign = value;
 900                         break;
 901 #if MACH_ASSERT
 902                 default:
 903                         panic("Zone_change: Wrong Item Type!");
 904                         /* break; */
 905 #endif
 906         }
 907         lock_zone_init(zone);
 908 }
 909
 910 /*
 911  * Return the expected number of free elements in the zone.
 912  * This calculation will be incorrect if items are zfree'd that
 913  * were never zalloc'd/zget'd. The correct way to stuff memory
 914  * into a zone is by zcram.
 915  */
 916
 917 integer_t
 918 zone_free_count(zone_t zone)
 919 {
 920         integer_t free_count;
 921
 922         lock_zone(zone);
 923         free_count = zone->cur_size/zone->elem_size - zone->count;
 924         unlock_zone(zone);
 925
 926         assert(free_count >= 0);
 927
 928         return(free_count);
 929 }
 930
 931 /*
 932  *      zprealloc preallocates wired memory, exanding the specified
 933  *      zone to the specified size
 934  */
 935 void
 936 zprealloc(
 937         zone_t  zone,
 938         vm_size_t size)
 939 {
 940         vm_offset_t addr;
 941
 942         if (size != 0) {
 943                 if (kmem_alloc_wired(zone_map, &addr, size) != KERN_SUCCESS)
 944                   panic("zprealloc");
 945                 zone_page_init(addr, size, ZONE_PAGE_USED);
 946                 zcram(zone, addr, size);
 947         }
 948 }
 949
 950 /*
 951  *  Zone garbage collection subroutines
 952  *
 953  *  These routines have in common the modification of entries in the
 954  *  zone_page_table.  The latter contains one entry for every page
 955  *  in the zone_map.
 956  *
 957  *  For each page table entry in the given range:
 958  *
 959  *      zone_page_collectable   - test if one (in_free_list == alloc_count)
 960  *      zone_page_keep          - reset in_free_list
 961  *      zone_page_in_use        - decrements in_free_list
 962  *      zone_page_free          - increments in_free_list
 963  *      zone_page_init          - initializes in_free_list and alloc_count
 964  *      zone_page_alloc         - increments alloc_count
 965  *      zone_page_dealloc       - decrements alloc_count
 966  *      zone_add_free_page_list - adds the page to the free list
 967  *
 968  *  Two counts are maintained for each page, the in_free_list count and
 969  *  alloc_count.  The alloc_count is how many zone elements have been
 970  *  allocated from a page.  (Note that the page could contain elements
 971  *  that span page boundaries.  The count includes these elements so
 972  *  one element may be counted in two pages.) In_free_list is a count
 973  *  of how many zone elements are currently free.  If in_free_list is
 974  *  equal to alloc_count then the page is eligible for garbage
 975  *  collection.
 976  *
 977  *  Alloc_count and in_free_list are initialized to the correct values
 978  *  for a particular zone when a page is zcram'ed into a zone.  Subsequent
 979  *  gets and frees of zone elements will call zone_page_in_use and
 980  *  zone_page_free which modify the in_free_list count.  When the zones
 981  *  garbage collector runs it will walk through a zones free element list,
 982  *  remove the elements that reside on collectable pages, and use
 983  *  zone_add_free_page_list to create a list of pages to be collected.
 984  */
 985 boolean_t
 986 zone_page_collectable(
 987         vm_offset_t     addr,
 988         vm_size_t       size)
 989 {
 990         natural_t i, j;
 991
 992 #if MACH_ASSERT
 993         if (!from_zone_map(addr) || !from_zone_map(addr+size-1))
 994                 panic("zone_page_collectable");
 995 #endif
 996
 997         i = atop(addr-zone_map_min_address);
 998         j = atop((addr+size-1) - zone_map_min_address);
 999         lock_zone_page_table();
1000         for (; i <= j; i++) {
1001                 if (zone_page_table[i].in_free_list ==
1002                     zone_page_table[i].alloc_count) {
1003                         unlock_zone_page_table();
1004                         return (TRUE);
1005                 }
1006         }
1007         unlock_zone_page_table();
1008         return (FALSE);
1009 }
1010
1011 void
1012 zone_page_keep(
1013         vm_offset_t     addr,
1014         vm_size_t       size)
1015 {
1016         natural_t i, j;
1017
1018 #if MACH_ASSERT
1019         if (!from_zone_map(addr) || !from_zone_map(addr+size-1))
1020                 panic("zone_page_keep");
1021 #endif
1022
1023         i = atop(addr-zone_map_min_address);
1024         j = atop((addr+size-1) - zone_map_min_address);
1025         lock_zone_page_table();
1026         for (; i <= j; i++) {
1027                 zone_page_table[i].in_free_list = 0;
1028         }
1029         unlock_zone_page_table();
1030 }
1031
1032 void
1033 zone_page_in_use(
1034         vm_offset_t     addr,
1035         vm_size_t       size)
1036 {
1037         natural_t i, j;
1038
1039 #if MACH_ASSERT
1040         if (!from_zone_map(addr) || !from_zone_map(addr+size-1))
1041                 panic("zone_page_in_use");
1042 #endif
1043
1044         i = atop(addr-zone_map_min_address);
1045         j = atop((addr+size-1) - zone_map_min_address);
1046         lock_zone_page_table();
1047         for (; i <= j; i++) {
1048                 if (zone_page_table[i].in_free_list > 0)
1049                         zone_page_table[i].in_free_list--;
1050         }
1051         unlock_zone_page_table();
1052 }
1053
1054 void
1055 zone_page_free(
1056         vm_offset_t     addr,
1057         vm_size_t       size)
1058 {
1059         natural_t i, j;
1060
1061 #if MACH_ASSERT
1062         if (!from_zone_map(addr) || !from_zone_map(addr+size-1))
1063                 panic("zone_page_free");
1064 #endif
1065
1066         i = atop(addr-zone_map_min_address);
1067         j = atop((addr+size-1) - zone_map_min_address);
1068         lock_zone_page_table();
1069         for (; i <= j; i++) {
1070                 assert(zone_page_table[i].in_free_list >= 0);
1071                 zone_page_table[i].in_free_list++;
1072         }
1073         unlock_zone_page_table();
1074 }
1075
1076 void
1077 zone_page_init(
1078         vm_offset_t     addr,
1079         vm_size_t       size,
1080         int             value)
1081 {
1082         natural_t i, j;
1083
1084 #if MACH_ASSERT
1085         if (!from_zone_map(addr) || !from_zone_map(addr+size-1))
1086                 panic("zone_page_init");
1087 #endif
1088
1089         i = atop(addr-zone_map_min_address);
1090         j = atop((addr+size-1) - zone_map_min_address);
1091         lock_zone_page_table();
1092         for (; i <= j; i++) {
1093                 zone_page_table[i].alloc_count = value;
1094                 zone_page_table[i].in_free_list = 0;
1095         }
1096         unlock_zone_page_table();
1097 }
1098
1099 void
1100 zone_page_alloc(
1101         vm_offset_t     addr,
1102         vm_size_t       size)
1103 {
1104         natural_t i, j;
1105
1106 #if MACH_ASSERT
1107         if (!from_zone_map(addr) || !from_zone_map(addr+size-1))
1108                 panic("zone_page_alloc");
1109 #endif
1110
1111         i = atop(addr-zone_map_min_address);
1112         j = atop((addr+size-1) - zone_map_min_address);
1113         lock_zone_page_table();
1114         for (; i <= j; i++) {
1115                 /* Set alloc_count to (ZONE_PAGE_USED + 1) if
1116                  * it was previously set to ZONE_PAGE_UNUSED.
1117                  */
1118                 if (zone_page_table[i].alloc_count == ZONE_PAGE_UNUSED) {
1119                         zone_page_table[i].alloc_count = 1;
1120                 } else {
1121                         zone_page_table[i].alloc_count++;
1122                 }
1123         }
1124         unlock_zone_page_table();
1125 }
1126
1127 void
1128 zone_page_dealloc(
1129         vm_offset_t     addr,
1130         vm_size_t       size)
1131 {
1132         natural_t i, j;
1133
1134 #if MACH_ASSERT
1135         if (!from_zone_map(addr) || !from_zone_map(addr+size-1))
1136                 panic("zone_page_dealloc");
1137 #endif
1138
1139         i = atop(addr-zone_map_min_address);
1140         j = atop((addr+size-1) - zone_map_min_address);
1141         lock_zone_page_table();
1142         for (; i <= j; i++) {
1143                 zone_page_table[i].alloc_count--;
1144         }
1145         unlock_zone_page_table();
1146 }
1147
1148 void
1149 zone_add_free_page_list(
1150         struct zone_page_table_entry    **free_list,
1151         vm_offset_t     addr,
1152         vm_size_t       size)
1153 {
1154         natural_t i, j;
1155
1156 #if MACH_ASSERT
1157         if (!from_zone_map(addr) || !from_zone_map(addr+size-1))
1158                 panic("zone_add_free_page_list");
1159 #endif
1160
1161         i = atop(addr-zone_map_min_address);
1162         j = atop((addr+size-1) - zone_map_min_address);
1163         lock_zone_page_table();
1164         for (; i <= j; i++) {
1165                 if (zone_page_table[i].alloc_count == 0) {
1166                         zone_page_table[i].next = *free_list;
1167                         *free_list = &zone_page_table[i];
1168                         zone_page_table[i].alloc_count  = ZONE_PAGE_UNUSED;
1169                         zone_page_table[i].in_free_list = 0;
1170                 }
1171         }
1172         unlock_zone_page_table();
1173 }
1174
1175
1176 /* This is used for walking through a zone's free element list.
1177  */
1178 struct zone_free_entry {
1179         struct zone_free_entry * next;
1180 };
1181
1182 int reclaim_page_count = 0;
1183
1184 /*      Zone garbage collection
1185  *
1186  *      zone_gc will walk through all the free elements in all the
1187  *      zones that are marked collectable looking for reclaimable
1188  *      pages.  zone_gc is called by consider_zone_gc when the system
1189  *      begins to run out of memory.
1190  */
1191 void
1192 zone_gc(void)
1193 {
1194         unsigned int    max_zones;
1195         zone_t          z;
1196         unsigned int    i;
1197         struct zone_page_table_entry    *freep;
1198         struct zone_page_table_entry    *zone_free_page_list;
1199
1200         mutex_lock(&zone_gc_lock);
1201
1202         /*
1203          * Note that this scheme of locking only to walk the zone list
1204          * assumes that zones are never freed (checked by zfree)
1205          */
1206         simple_lock(&all_zones_lock);
1207         max_zones = num_zones;
1208         z = first_zone;
1209         simple_unlock(&all_zones_lock);
1210
1211 #if MACH_ASSERT
1212         lock_zone_page_table();
1213         for (i = 0; i < zone_pages; i++)
1214                 assert(zone_page_table[i].in_free_list == 0);
1215         unlock_zone_page_table();
1216 #endif /* MACH_ASSERT */
1217
1218         zone_free_page_list = (struct zone_page_table_entry *) 0;
1219
1220         for (i = 0; i < max_zones; i++, z = z->next_zone) {
1221                 struct zone_free_entry * prev;
1222                 struct zone_free_entry * elt;
1223                 struct zone_free_entry * end;
1224
1225                 assert(z != ZONE_NULL);
1226
1227                 if (!z->collectable)
1228                         continue;
1229
1230                 lock_zone(z);
1231
1232                 /*
1233                  * Do a quick feasability check before we scan the zone:
1234                  * skip unless there is likelihood of getting 1+ pages back.
1235                  */
1236                 if ((z->cur_size - z->count * z->elem_size) <= (2*PAGE_SIZE)){
1237                         unlock_zone(z);
1238                         continue;
1239                 }
1240
1241                 /* Count the free elements in each page.  This loop
1242                  * requires that all in_free_list entries are zero.
1243                  *
1244                  * Exit the loop early if we need to hurry up and drop
1245                  * the lock to allow preemption - but we must fully process
1246                  * all elements we looked at so far.
1247                  */
1248                 elt = (struct zone_free_entry *)(z->free_elements);
1249                 while (!ast_urgency() && (elt != (struct zone_free_entry *)0)) {
1250                         if (from_zone_map(elt))
1251                                 zone_page_free((vm_offset_t)elt, z->elem_size);
1252                         elt = elt->next;
1253                 }
1254                 end = elt;
1255
1256                 /* Now determine which elements should be removed
1257                  * from the free list and, after all the elements
1258                  * on a page have been removed, add the element's
1259                  * page to a list of pages to be freed.
1260                  */
1261                 prev = elt = (struct zone_free_entry *)(z->free_elements);
1262                 while (elt != end) {
1263                         if (!from_zone_map(elt)) {
1264                                 prev = elt;
1265                                 elt = elt->next;
1266                                 continue;
1267                         }
1268                         if (zone_page_collectable((vm_offset_t)elt,
1269                                                   z->elem_size)) {
1270                                 z->cur_size -= z->elem_size;
1271                                 zone_page_in_use((vm_offset_t)elt,
1272                                                  z->elem_size);
1273                                 zone_page_dealloc((vm_offset_t)elt,
1274                                                   z->elem_size);
1275                                 zone_add_free_page_list(&zone_free_page_list,
1276                                                         (vm_offset_t)elt,
1277                                                         z->elem_size);
1278                                 if (elt == prev) {
1279                                         elt = elt->next;
1280                                         z->free_elements =(vm_offset_t)elt;
1281                                         prev = elt;
1282                                 } else {
1283                                         prev->next = elt->next;
1284                                         elt = elt->next;
1285                                 }
1286                         } else {
1287                                 /* This element is not eligible for collection
1288                                  * so clear in_free_list in preparation for a
1289                                  * subsequent garbage collection pass.
1290                                  */
1291                                 zone_page_keep((vm_offset_t)elt, z->elem_size);
1292                                 prev = elt;
1293                                 elt = elt->next;
1294                         }
1295                 } /* end while(elt != end) */
1296
1297                 unlock_zone(z);
1298         }
1299
1300         for (freep = zone_free_page_list; freep != 0; freep = freep->next) {
1301                 vm_offset_t     free_addr;
1302
1303                 free_addr = zone_map_min_address +
1304                             PAGE_SIZE * (freep - zone_page_table);
1305                 kmem_free(zone_map, free_addr, PAGE_SIZE);
1306                 reclaim_page_count++;
1307         }
1308         mutex_unlock(&zone_gc_lock);
1309 }
1310
1311 boolean_t zone_gc_allowed = TRUE;       /* XXX */
1312 unsigned zone_gc_last_tick = 0;
1313 unsigned zone_gc_max_rate = 0;          /* in ticks */
1314
1315 /*
1316  *      consider_zone_gc:
1317  *
1318  *      Called by the pageout daemon when the system needs more free pages.
1319  */
1320
1321 void
1322 consider_zone_gc(void)
1323 {
1324         /*
1325          *      By default, don't attempt zone GC more frequently
1326          *      than once a second (which is one scheduler tick).
1327          */
1328
1329         if (zone_gc_max_rate == 0)
1330                 zone_gc_max_rate = 2;           /* sched_tick is a 1 second resolution 2 here insures at least 1 second interval */
1331
1332         if (zone_gc_allowed &&
1333             (sched_tick > (zone_gc_last_tick + zone_gc_max_rate))) {
1334                 zone_gc_last_tick = sched_tick;
1335                 zone_gc();
1336         }
1337 }
1338
1339 #include <mach/kern_return.h>
1340 #include <mach/machine/vm_types.h>
1341 #include <mach_debug/zone_info.h>
1342 #include <kern/host.h>
1343 #include <vm/vm_map.h>
1344 #include <vm/vm_kern.h>
1345
1346 #include <mach/mach_host_server.h>
1347
1348 kern_return_t
1349 host_zone_info(
1350         host_t                  host,
1351         zone_name_array_t       *namesp,
1352         mach_msg_type_number_t  *namesCntp,
1353         zone_info_array_t       *infop,
1354         mach_msg_type_number_t  *infoCntp)
1355 {
1356         zone_name_t     *names;
1357         vm_offset_t     names_addr;
1358         vm_size_t       names_size;
1359         zone_info_t     *info;
1360         vm_offset_t     info_addr;
1361         vm_size_t       info_size;
1362         unsigned int    max_zones, i;
1363         zone_t          z;
1364         zone_name_t    *zn;
1365         zone_info_t    *zi;
1366         kern_return_t   kr;
1367
1368         if (host == HOST_NULL)
1369                 return KERN_INVALID_HOST;
1370
1371         /*
1372          *      We assume that zones aren't freed once allocated.
1373          *      We won't pick up any zones that are allocated later.
1374          */
1375
1376         simple_lock(&all_zones_lock);
1377 #ifdef ppc
1378         max_zones = num_zones + 4;
1379 #else
1380         max_zones = num_zones + 2;
1381 #endif
1382         z = first_zone;
1383         simple_unlock(&all_zones_lock);
1384
1385         if (max_zones <= *namesCntp) {
1386                 /* use in-line memory */
1387
1388                 names = *namesp;
1389         } else {
1390                 names_size = round_page(max_zones * sizeof *names);
1391                 kr = kmem_alloc_pageable(ipc_kernel_map,
1392                                          &names_addr, names_size);
1393                 if (kr != KERN_SUCCESS)
1394                         return kr;
1395                 names = (zone_name_t *) names_addr;
1396         }
1397
1398         if (max_zones <= *infoCntp) {
1399                 /* use in-line memory */
1400
1401                 info = *infop;
1402         } else {
1403                 info_size = round_page(max_zones * sizeof *info);
1404                 kr = kmem_alloc_pageable(ipc_kernel_map,
1405                                          &info_addr, info_size);
1406                 if (kr != KERN_SUCCESS) {
1407                         if (names != *namesp)
1408                                 kmem_free(ipc_kernel_map,
1409                                           names_addr, names_size);
1410                         return kr;
1411                 }
1412
1413                 info = (zone_info_t *) info_addr;
1414         }
1415         zn = &names[0];
1416         zi = &info[0];
1417
1418         for (i = 0; i < num_zones; i++) {
1419                 struct zone zcopy;
1420
1421                 assert(z != ZONE_NULL);
1422
1423                 lock_zone(z);
1424                 zcopy = *z;
1425                 unlock_zone(z);
1426
1427                 simple_lock(&all_zones_lock);
1428                 z = z->next_zone;
1429                 simple_unlock(&all_zones_lock);
1430
1431                 /* assuming here the name data is static */
1432                 (void) strncpy(zn->zn_name, zcopy.zone_name,
1433                                sizeof zn->zn_name);
1434
1435                 zi->zi_count = zcopy.count;
1436                 zi->zi_cur_size = zcopy.cur_size;
1437                 zi->zi_max_size = zcopy.max_size;
1438                 zi->zi_elem_size = zcopy.elem_size;
1439                 zi->zi_alloc_size = zcopy.alloc_size;
1440                 zi->zi_exhaustible = zcopy.exhaustible;
1441                 zi->zi_collectable = zcopy.collectable;
1442
1443                 zn++;
1444                 zi++;
1445         }
1446         strcpy(zn->zn_name, "kernel_stacks");
1447         stack_fake_zone_info(&zi->zi_count, &zi->zi_cur_size, &zi->zi_max_size, &zi->zi_elem_size,
1448                              &zi->zi_alloc_size, &zi->zi_collectable, &zi->zi_exhaustible);
1449         zn++;
1450         zi++;
1451 #ifdef ppc
1452         strcpy(zn->zn_name, "save_areas");
1453         save_fake_zone_info(&zi->zi_count, &zi->zi_cur_size, &zi->zi_max_size, &zi->zi_elem_size,
1454                             &zi->zi_alloc_size, &zi->zi_collectable, &zi->zi_exhaustible);
1455         zn++;
1456         zi++;
1457
1458         strcpy(zn->zn_name, "pmap_mappings");
1459         mapping_fake_zone_info(&zi->zi_count, &zi->zi_cur_size, &zi->zi_max_size, &zi->zi_elem_size,
1460                                &zi->zi_alloc_size, &zi->zi_collectable, &zi->zi_exhaustible);
1461         zn++;
1462         zi++;
1463 #endif
1464         strcpy(zn->zn_name, "kalloc.large");
1465         kalloc_fake_zone_info(&zi->zi_count, &zi->zi_cur_size, &zi->zi_max_size, &zi->zi_elem_size,
1466                                &zi->zi_alloc_size, &zi->zi_collectable, &zi->zi_exhaustible);
1467
1468         if (names != *namesp) {
1469                 vm_size_t used;
1470                 vm_map_copy_t copy;
1471
1472                 used = max_zones * sizeof *names;
1473
1474                 if (used != names_size)
1475                         bzero((char *) (names_addr + used), names_size - used);
1476
1477                 kr = vm_map_copyin(ipc_kernel_map, names_addr, names_size,
1478                                    TRUE, &copy);
1479                 assert(kr == KERN_SUCCESS);
1480
1481                 *namesp = (zone_name_t *) copy;
1482         }
1483         *namesCntp = max_zones;
1484
1485         if (info != *infop) {
1486                 vm_size_t used;
1487                 vm_map_copy_t copy;
1488
1489                 used = max_zones * sizeof *info;
1490
1491                 if (used != info_size)
1492                         bzero((char *) (info_addr + used), info_size - used);
1493
1494                 kr = vm_map_copyin(ipc_kernel_map, info_addr, info_size,
1495                                    TRUE, &copy);
1496                 assert(kr == KERN_SUCCESS);
1497
1498                 *infop = (zone_info_t *) copy;
1499         }
1500         *infoCntp = max_zones;
1501
1502         return KERN_SUCCESS;
1503 }
1504
1505 #if     MACH_KDB
1506 #include <ddb/db_command.h>
1507 #include <ddb/db_output.h>
1508 #include <kern/kern_print.h>
1509
1510 const char *zone_labels =
1511 "ENTRY       COUNT   TOT_SZ   MAX_SZ ELT_SZ ALLOC_SZ NAME";
1512
1513 /* Forwards */
1514 void    db_print_zone(
1515                 zone_t          addr);
1516
1517 #if     ZONE_DEBUG
1518 void    db_zone_check_active(
1519                 zone_t          zone);
1520 void    db_zone_print_active(
1521                 zone_t          zone);
1522 #endif  /* ZONE_DEBUG */
1523 void    db_zone_print_free(
1524                 zone_t          zone);
1525 void
1526 db_print_zone(
1527         zone_t          addr)
1528 {
1529         struct zone zcopy;
1530
1531         zcopy = *addr;
1532
1533         db_printf("%8x %8x %8x %8x %6x %8x %s ",
1534                   addr, zcopy.count, zcopy.cur_size,
1535                   zcopy.max_size, zcopy.elem_size,
1536                   zcopy.alloc_size, zcopy.zone_name);
1537         if (zcopy.exhaustible)
1538                 db_printf("H");
1539         if (zcopy.collectable)
1540                 db_printf("C");
1541         if (zcopy.expandable)
1542                 db_printf("X");
1543         db_printf("\n");
1544 }
1545
1546 /*ARGSUSED*/
1547 void
1548 db_show_one_zone(
1549         db_expr_t       addr,
1550         int             have_addr,
1551         db_expr_t       count,
1552         char *          modif)
1553 {
1554         struct zone *z = (zone_t)addr;
1555
1556         if (z == ZONE_NULL || !have_addr){
1557                 db_error("No Zone\n");
1558                 /*NOTREACHED*/
1559         }
1560
1561         db_printf("%s\n", zone_labels);
1562         db_print_zone(z);
1563 }
1564
1565 /*ARGSUSED*/
1566 void
1567 db_show_all_zones(
1568         db_expr_t       addr,
1569         int             have_addr,
1570         db_expr_t       count,
1571         char *          modif)
1572 {
1573         zone_t          z;
1574         unsigned total = 0;
1575
1576         /*
1577          * Don't risk hanging by unconditionally locking,
1578          * risk of incoherent data is small (zones aren't freed).
1579          */
1580         have_addr = simple_lock_try(&all_zones_lock);
1581         count = num_zones;
1582         z = first_zone;
1583         if (have_addr) {
1584                 simple_unlock(&all_zones_lock);
1585         }
1586
1587         db_printf("%s\n", zone_labels);
1588         for (  ; count > 0; count--) {
1589                 if (!z) {
1590                         db_error("Mangled Zone List\n");
1591                         /*NOTREACHED*/
1592                 }
1593                 db_print_zone(z);
1594                 total += z->cur_size,
1595
1596                 have_addr = simple_lock_try(&all_zones_lock);
1597                 z = z->next_zone;
1598                 if (have_addr) {
1599                         simple_unlock(&all_zones_lock);
1600                 }
1601         }
1602         db_printf("\nTotal              %8x", total);
1603         db_printf("\n\nzone_gc() has reclaimed %d pages\n",
1604                   reclaim_page_count);
1605 }
1606
1607 #if     ZONE_DEBUG
1608 void
1609 db_zone_check_active(
1610         zone_t  zone)
1611 {
1612         int count = 0;
1613         queue_t tmp_elem;
1614
1615         if (!zone_debug_enabled(zone) || !zone_check)
1616                 return;
1617         tmp_elem = queue_first(&zone->active_zones);
1618         while (count < zone->count) {
1619                 count++;
1620                 if (tmp_elem == 0) {
1621                         printf("unexpected zero element, zone=0x%x, count=%d\n",
1622                                 zone, count);
1623                         assert(FALSE);
1624                         break;
1625                 }
1626                 if (queue_end(tmp_elem, &zone->active_zones)) {
1627                         printf("unexpected queue_end, zone=0x%x, count=%d\n",
1628                                 zone, count);
1629                         assert(FALSE);
1630                         break;
1631                 }
1632                 tmp_elem = queue_next(tmp_elem);
1633         }
1634         if (!queue_end(tmp_elem, &zone->active_zones)) {
1635                 printf("not at queue_end, zone=0x%x, tmp_elem=0x%x\n",
1636                         zone, tmp_elem);
1637                 assert(FALSE);
1638         }
1639 }
1640
1641 void
1642 db_zone_print_active(
1643         zone_t  zone)
1644 {
1645         int count = 0;
1646         queue_t tmp_elem;
1647
1648         if (!zone_debug_enabled(zone)) {
1649                 printf("zone 0x%x debug not enabled\n", zone);
1650                 return;
1651         }
1652         if (!zone_check) {
1653                 printf("zone_check FALSE\n");
1654                 return;
1655         }
1656
1657         printf("zone 0x%x, active elements %d\n", zone, zone->count);
1658         printf("active list:\n");
1659         tmp_elem = queue_first(&zone->active_zones);
1660         while (count < zone->count) {
1661                 printf("  0x%x", tmp_elem);
1662                 count++;
1663                 if ((count % 6) == 0)
1664                         printf("\n");
1665                 if (tmp_elem == 0) {
1666                         printf("\nunexpected zero element, count=%d\n", count);
1667                         break;
1668                 }
1669                 if (queue_end(tmp_elem, &zone->active_zones)) {
1670                         printf("\nunexpected queue_end, count=%d\n", count);
1671                         break;
1672                 }
1673                 tmp_elem = queue_next(tmp_elem);
1674         }
1675         if (!queue_end(tmp_elem, &zone->active_zones))
1676                 printf("\nnot at queue_end, tmp_elem=0x%x\n", tmp_elem);
1677         else
1678                 printf("\n");
1679 }
1680 #endif  /* ZONE_DEBUG */
1681
1682 void
1683 db_zone_print_free(
1684         zone_t  zone)
1685 {
1686         int count = 0;
1687         int freecount;
1688         vm_offset_t elem;
1689
1690         freecount = zone_free_count(zone);
1691         printf("zone 0x%x, free elements %d\n", zone, freecount);
1692         printf("free list:\n");
1693         elem = zone->free_elements;
1694         while (count < freecount) {
1695                 printf("  0x%x", elem);
1696                 count++;
1697                 if ((count % 6) == 0)
1698                         printf("\n");
1699                 if (elem == 0) {
1700                         printf("\nunexpected zero element, count=%d\n", count);
1701                         break;
1702                 }
1703                 elem = *((vm_offset_t *)elem);
1704         }
1705         if (elem != 0)
1706                 printf("\nnot at end of free list, elem=0x%x\n", elem);
1707         else
1708                 printf("\n");
1709 }
1710
1711 #endif /* MACH_KDB */
1712
1713
1714 #if     ZONE_DEBUG
1715
1716 /* should we care about locks here ? */
1717
1718 #if     MACH_KDB
1719 vm_offset_t
1720 next_element(
1721         zone_t          z,
1722         vm_offset_t     elt)
1723 {
1724         if (!zone_debug_enabled(z))
1725                 return(0);
1726         elt -= sizeof(queue_chain_t);
1727         elt = (vm_offset_t) queue_next((queue_t) elt);
1728         if ((queue_t) elt == &z->active_zones)
1729                 return(0);
1730         elt += sizeof(queue_chain_t);
1731         return(elt);
1732 }
1733
1734 vm_offset_t
1735 first_element(
1736         zone_t          z)
1737 {
1738         vm_offset_t     elt;
1739
1740         if (!zone_debug_enabled(z))
1741                 return(0);
1742         if (queue_empty(&z->active_zones))
1743                 return(0);
1744         elt = (vm_offset_t) queue_first(&z->active_zones);
1745         elt += sizeof(queue_chain_t);
1746         return(elt);
1747 }
1748
1749 /*
1750  * Second arg controls how many zone elements are printed:
1751  *   0 => none
1752  *   n, n < 0 => all
1753  *   n, n > 0 => last n on active list
1754  */
1755 int
1756 zone_count(
1757         zone_t          z,
1758         int             tail)
1759 {
1760         vm_offset_t     elt;
1761         int             count = 0;
1762         boolean_t       print = (tail != 0);
1763
1764         if (tail < 0)
1765                 tail = z->count;
1766         if (z->count < tail)
1767                 tail = 0;
1768         tail = z->count - tail;
1769         for (elt = first_element(z); elt; elt = next_element(z, elt)) {
1770                 if (print && tail <= count)
1771                         db_printf("%8x\n", elt);
1772                 count++;
1773         }
1774         assert(count == z->count);
1775         return(count);
1776 }
1777 #endif /* MACH_KDB */
1778
1779 #define zone_in_use(z)  ( z->count || z->free_elements )
1780
1781 void
1782 zone_debug_enable(
1783         zone_t          z)
1784 {
1785         if (zone_debug_enabled(z) || zone_in_use(z) ||
1786             z->alloc_size < (z->elem_size + sizeof(queue_chain_t)))
1787                 return;
1788         queue_init(&z->active_zones);
1789         z->elem_size += sizeof(queue_chain_t);
1790 }
1791
1792 void
1793 zone_debug_disable(
1794         zone_t          z)
1795 {
1796         if (!zone_debug_enabled(z) || zone_in_use(z))
1797                 return;
1798         z->elem_size -= sizeof(queue_chain_t);
1799         z->active_zones.next = z->active_zones.prev = 0;
1800 }
1801 #endif  /* ZONE_DEBUG */