]> git.saurik.com Git - apple/xnu.git/blame - osfmk/kern/zalloc.c
xnu-792.6.76.tar.gz
[apple/xnu.git] / osfmk / kern / zalloc.c
CommitLineData
1c79356b 1/*
91447636 2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
37839358
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
37839358
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
37839358
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * @OSF_COPYRIGHT@
24 */
25/*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50/*
51 */
52/*
53 * File: kern/zalloc.c
54 * Author: Avadis Tevanian, Jr.
55 *
56 * Zone-based memory allocator. A zone is a collection of fixed size
57 * data blocks for which quick allocation/deallocation is possible.
58 */
59#include <zone_debug.h>
60#include <norma_vm.h>
61#include <mach_kdb.h>
91447636
A
62
63#include <mach/mach_types.h>
64#include <mach/vm_param.h>
65#include <mach/kern_return.h>
66#include <mach/mach_host_server.h>
67#include <mach/machine/vm_types.h>
68#include <mach_debug/zone_info.h>
69
70#include <kern/kern_types.h>
1c79356b 71#include <kern/assert.h>
91447636 72#include <kern/host.h>
1c79356b
A
73#include <kern/macro_help.h>
74#include <kern/sched.h>
75#include <kern/lock.h>
76#include <kern/sched_prim.h>
77#include <kern/misc_protos.h>
0b4e3aa0 78#include <kern/thread_call.h>
1c79356b 79#include <kern/zalloc.h>
91447636
A
80#include <kern/kalloc.h>
81
82#include <vm/pmap.h>
83#include <vm/vm_map.h>
1c79356b 84#include <vm/vm_kern.h>
91447636
A
85#include <vm/vm_page.h>
86
1c79356b
A
87#include <machine/machparam.h>
88
91447636
A
89#if defined(__ppc__)
90/* for fake zone stat routines */
91#include <ppc/savearea.h>
92#include <ppc/mappings.h>
93#endif
1c79356b
A
94
95#if MACH_ASSERT
96/* Detect use of zone elt after freeing it by two methods:
97 * (1) Range-check the free-list "next" ptr for sanity.
98 * (2) Store the ptr in two different words, and compare them against
99 * each other when re-using the zone elt, to detect modifications;
100 */
101
102#if defined(__alpha)
103
104#define is_kernel_data_addr(a) \
91447636 105 (!(a) || (IS_SYS_VA(a) && !((a) & (sizeof(long)-1))))
1c79356b
A
106
107#else /* !defined(__alpha) */
108
109#define is_kernel_data_addr(a) \
91447636 110 (!(a) || ((a) >= VM_MIN_KERNEL_ADDRESS && !((a) & 0x3)))
1c79356b
A
111
112#endif /* defined(__alpha) */
113
114/* Should we set all words of the zone element to an illegal address
115 * when it is freed, to help catch usage after freeing? The down-side
116 * is that this obscures the identity of the freed element.
117 */
118boolean_t zfree_clear = FALSE;
119
120#define ADD_TO_ZONE(zone, element) \
121MACRO_BEGIN \
122 if (zfree_clear) \
91447636 123 { unsigned int i; \
1c79356b
A
124 for (i=1; \
125 i < zone->elem_size/sizeof(vm_offset_t) - 1; \
126 i++) \
127 ((vm_offset_t *)(element))[i] = 0xdeadbeef; \
128 } \
129 ((vm_offset_t *)(element))[0] = (zone)->free_elements; \
130 (zone)->free_elements = (vm_offset_t) (element); \
131 (zone)->count--; \
132MACRO_END
133
134#define REMOVE_FROM_ZONE(zone, ret, type) \
135MACRO_BEGIN \
136 (ret) = (type) (zone)->free_elements; \
137 if ((ret) != (type) 0) { \
138 if (!is_kernel_data_addr(((vm_offset_t *)(ret))[0])) { \
139 panic("A freed zone element has been modified.\n"); \
140 } \
141 (zone)->count++; \
142 (zone)->free_elements = *((vm_offset_t *)(ret)); \
143 } \
144MACRO_END
145#else /* MACH_ASSERT */
146
147#define ADD_TO_ZONE(zone, element) \
148MACRO_BEGIN \
149 *((vm_offset_t *)(element)) = (zone)->free_elements; \
150 (zone)->free_elements = (vm_offset_t) (element); \
151 (zone)->count--; \
152MACRO_END
153
154#define REMOVE_FROM_ZONE(zone, ret, type) \
155MACRO_BEGIN \
156 (ret) = (type) (zone)->free_elements; \
157 if ((ret) != (type) 0) { \
158 (zone)->count++; \
159 (zone)->free_elements = *((vm_offset_t *)(ret)); \
160 } \
161MACRO_END
162
163#endif /* MACH_ASSERT */
164
165#if ZONE_DEBUG
166#define zone_debug_enabled(z) z->active_zones.next
55e303ae
A
167#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
168#define ZONE_DEBUG_OFFSET ROUNDUP(sizeof(queue_chain_t),16)
1c79356b
A
169#endif /* ZONE_DEBUG */
170
171/*
172 * Support for garbage collection of unused zone pages:
173 */
174
175struct zone_page_table_entry {
55e303ae 176 struct zone_page_table_entry *link;
1c79356b 177 short alloc_count;
55e303ae 178 short collect_count;
1c79356b
A
179};
180
1c79356b
A
181/* Forwards */
182void zone_page_init(
183 vm_offset_t addr,
184 vm_size_t size,
185 int value);
186
187void zone_page_alloc(
188 vm_offset_t addr,
189 vm_size_t size);
190
55e303ae
A
191void zone_page_free_element(
192 struct zone_page_table_entry **free_pages,
1c79356b
A
193 vm_offset_t addr,
194 vm_size_t size);
195
55e303ae 196void zone_page_collect(
1c79356b
A
197 vm_offset_t addr,
198 vm_size_t size);
199
200boolean_t zone_page_collectable(
201 vm_offset_t addr,
202 vm_size_t size);
203
204void zone_page_keep(
205 vm_offset_t addr,
206 vm_size_t size);
207
0b4e3aa0
A
208void zalloc_async(
209 thread_call_param_t p0,
210 thread_call_param_t p1);
211
212
1c79356b
A
213#if ZONE_DEBUG && MACH_KDB
214int zone_count(
215 zone_t z,
216 int tail);
217#endif /* ZONE_DEBUG && MACH_KDB */
218
219vm_map_t zone_map = VM_MAP_NULL;
220
221zone_t zone_zone = ZONE_NULL; /* the zone containing other zones */
222
223/*
224 * The VM system gives us an initial chunk of memory.
225 * It has to be big enough to allocate the zone_zone
226 */
227
228vm_offset_t zdata;
229vm_size_t zdata_size;
230
231#define lock_zone(zone) \
232MACRO_BEGIN \
91447636 233 mutex_lock(&(zone)->lock); \
1c79356b
A
234MACRO_END
235
236#define unlock_zone(zone) \
237MACRO_BEGIN \
91447636 238 mutex_unlock(&(zone)->lock); \
1c79356b
A
239MACRO_END
240
9bccf70c
A
241#define zone_wakeup(zone) thread_wakeup((event_t)(zone))
242#define zone_sleep(zone) \
91447636 243 thread_sleep_mutex((event_t)(zone), \
9bccf70c
A
244 &(zone)->lock, \
245 THREAD_UNINT)
246
1c79356b
A
247#define lock_zone_init(zone) \
248MACRO_BEGIN \
91447636 249 mutex_init(&zone->lock, 0); \
1c79356b
A
250MACRO_END
251
91447636 252#define lock_try_zone(zone) mutex_try(&zone->lock)
1c79356b
A
253
254kern_return_t zget_space(
255 vm_offset_t size,
256 vm_offset_t *result);
257
258decl_simple_lock_data(,zget_space_lock)
259vm_offset_t zalloc_next_space;
260vm_offset_t zalloc_end_of_space;
261vm_size_t zalloc_wasted_space;
262
263/*
264 * Garbage collection map information
265 */
1c79356b
A
266struct zone_page_table_entry * zone_page_table;
267vm_offset_t zone_map_min_address;
268vm_offset_t zone_map_max_address;
91447636 269unsigned int zone_pages;
1c79356b
A
270
271/*
272 * Exclude more than one concurrent garbage collection
273 */
274decl_mutex_data(, zone_gc_lock)
275
55e303ae 276#define from_zone_map(addr, size) \
1c79356b 277 ((vm_offset_t)(addr) >= zone_map_min_address && \
55e303ae 278 ((vm_offset_t)(addr) + size -1) < zone_map_max_address)
1c79356b
A
279
280#define ZONE_PAGE_USED 0
281#define ZONE_PAGE_UNUSED -1
282
283
284/*
285 * Protects first_zone, last_zone, num_zones,
286 * and the next_zone field of zones.
287 */
288decl_simple_lock_data(, all_zones_lock)
289zone_t first_zone;
290zone_t *last_zone;
91447636 291unsigned int num_zones;
1c79356b 292
0b4e3aa0
A
293boolean_t zone_gc_allowed = TRUE;
294boolean_t zone_gc_forced = FALSE;
295unsigned zone_gc_last_tick = 0;
296unsigned zone_gc_max_rate = 0; /* in ticks */
297
298
1c79356b
A
299/*
300 * zinit initializes a new zone. The zone data structures themselves
301 * are stored in a zone, which is initially a static structure that
302 * is initialized by zone_init.
303 */
304zone_t
305zinit(
306 vm_size_t size, /* the size of an element */
307 vm_size_t max, /* maximum memory to use */
308 vm_size_t alloc, /* allocation size */
91447636 309 const char *name) /* a name for the zone */
1c79356b
A
310{
311 zone_t z;
312
313 if (zone_zone == ZONE_NULL) {
314 if (zget_space(sizeof(struct zone), (vm_offset_t *)&z)
315 != KERN_SUCCESS)
316 return(ZONE_NULL);
317 } else
318 z = (zone_t) zalloc(zone_zone);
319 if (z == ZONE_NULL)
320 return(ZONE_NULL);
321
322 /*
323 * Round off all the parameters appropriately.
324 */
325 if (size < sizeof(z->free_elements))
326 size = sizeof(z->free_elements);
327 size = ((size-1) + sizeof(z->free_elements)) -
328 ((size-1) % sizeof(z->free_elements));
329 if (alloc == 0)
330 alloc = PAGE_SIZE;
91447636
A
331 alloc = round_page(alloc);
332 max = round_page(max);
1c79356b 333 /*
91447636
A
334 * we look for an allocation size with less than 1% waste
335 * up to 5 pages in size...
336 * otherwise, we look for an allocation size with least fragmentation
337 * in the range of 1 - 5 pages
338 * This size will be used unless
1c79356b
A
339 * the user suggestion is larger AND has less fragmentation
340 */
341 { vm_size_t best, waste; unsigned int i;
342 best = PAGE_SIZE;
343 waste = best % size;
91447636
A
344
345 for (i = 1; i <= 5; i++) {
346 vm_size_t tsize, twaste;
347
348 tsize = i * PAGE_SIZE;
349
350 if ((tsize % size) < (tsize / 100)) {
351 alloc = tsize;
352 goto use_this_allocation;
353 }
1c79356b
A
354 twaste = tsize % size;
355 if (twaste < waste)
356 best = tsize, waste = twaste;
357 }
358 if (alloc <= best || (alloc % size >= waste))
359 alloc = best;
360 }
91447636 361use_this_allocation:
1c79356b
A
362 if (max && (max < alloc))
363 max = alloc;
364
365 z->free_elements = 0;
366 z->cur_size = 0;
367 z->max_size = max;
368 z->elem_size = size;
369 z->alloc_size = alloc;
370 z->zone_name = name;
371 z->count = 0;
372 z->doing_alloc = FALSE;
a3d08fcd 373 z->doing_gc = FALSE;
1c79356b
A
374 z->exhaustible = FALSE;
375 z->collectable = TRUE;
376 z->allows_foreign = FALSE;
377 z->expandable = TRUE;
378 z->waiting = FALSE;
0b4e3aa0 379 z->async_pending = FALSE;
1c79356b
A
380
381#if ZONE_DEBUG
382 z->active_zones.next = z->active_zones.prev = 0;
383 zone_debug_enable(z);
384#endif /* ZONE_DEBUG */
385 lock_zone_init(z);
386
387 /*
388 * Add the zone to the all-zones list.
389 */
390
391 z->next_zone = ZONE_NULL;
0b4e3aa0 392 thread_call_setup(&z->call_async_alloc, zalloc_async, z);
1c79356b
A
393 simple_lock(&all_zones_lock);
394 *last_zone = z;
395 last_zone = &z->next_zone;
396 num_zones++;
397 simple_unlock(&all_zones_lock);
398
399 return(z);
400}
401
402/*
403 * Cram the given memory into the specified zone.
404 */
405void
406zcram(
407 register zone_t zone,
91447636 408 void *newaddr,
1c79356b
A
409 vm_size_t size)
410{
411 register vm_size_t elem_size;
91447636 412 vm_offset_t newmem = (vm_offset_t) newaddr;
1c79356b
A
413
414 /* Basic sanity checks */
415 assert(zone != ZONE_NULL && newmem != (vm_offset_t)0);
416 assert(!zone->collectable || zone->allows_foreign
55e303ae 417 || (from_zone_map(newmem, size)));
1c79356b
A
418
419 elem_size = zone->elem_size;
420
421 lock_zone(zone);
422 while (size >= elem_size) {
423 ADD_TO_ZONE(zone, newmem);
55e303ae 424 if (from_zone_map(newmem, elem_size))
1c79356b
A
425 zone_page_alloc(newmem, elem_size);
426 zone->count++; /* compensate for ADD_TO_ZONE */
427 size -= elem_size;
428 newmem += elem_size;
429 zone->cur_size += elem_size;
430 }
431 unlock_zone(zone);
432}
433
434/*
435 * Contiguous space allocator for non-paged zones. Allocates "size" amount
436 * of memory from zone_map.
437 */
438
439kern_return_t
440zget_space(
441 vm_offset_t size,
442 vm_offset_t *result)
443{
444 vm_offset_t new_space = 0;
91447636 445 vm_size_t space_to_add = 0;
1c79356b
A
446
447 simple_lock(&zget_space_lock);
448 while ((zalloc_next_space + size) > zalloc_end_of_space) {
449 /*
450 * Add at least one page to allocation area.
451 */
452
91447636 453 space_to_add = round_page(size);
1c79356b
A
454
455 if (new_space == 0) {
456 kern_return_t retval;
457 /*
458 * Memory cannot be wired down while holding
459 * any locks that the pageout daemon might
460 * need to free up pages. [Making the zget_space
461 * lock a complex lock does not help in this
462 * regard.]
463 *
464 * Unlock and allocate memory. Because several
465 * threads might try to do this at once, don't
466 * use the memory before checking for available
467 * space again.
468 */
469
470 simple_unlock(&zget_space_lock);
471
472 retval = kernel_memory_allocate(zone_map, &new_space,
473 space_to_add, 0, KMA_KOBJECT|KMA_NOPAGEWAIT);
474 if (retval != KERN_SUCCESS)
475 return(retval);
476 zone_page_init(new_space, space_to_add,
477 ZONE_PAGE_USED);
478 simple_lock(&zget_space_lock);
479 continue;
480 }
481
482
483 /*
484 * Memory was allocated in a previous iteration.
485 *
486 * Check whether the new region is contiguous
487 * with the old one.
488 */
489
490 if (new_space != zalloc_end_of_space) {
491 /*
492 * Throw away the remainder of the
493 * old space, and start a new one.
494 */
495 zalloc_wasted_space +=
496 zalloc_end_of_space - zalloc_next_space;
497 zalloc_next_space = new_space;
498 }
499
500 zalloc_end_of_space = new_space + space_to_add;
501
502 new_space = 0;
503 }
504 *result = zalloc_next_space;
505 zalloc_next_space += size;
506 simple_unlock(&zget_space_lock);
507
508 if (new_space != 0)
509 kmem_free(zone_map, new_space, space_to_add);
510
511 return(KERN_SUCCESS);
512}
513
514
515/*
516 * Steal memory for the zone package. Called from
517 * vm_page_bootstrap().
518 */
519void
520zone_steal_memory(void)
521{
91447636
A
522 zdata_size = round_page(128*sizeof(struct zone));
523 zdata = (vm_offset_t)((char *)pmap_steal_memory(zdata_size) - (char *)0);
1c79356b
A
524}
525
526
527/*
528 * Fill a zone with enough memory to contain at least nelem elements.
529 * Memory is obtained with kmem_alloc_wired from the kernel_map.
530 * Return the number of elements actually put into the zone, which may
531 * be more than the caller asked for since the memory allocation is
532 * rounded up to a full page.
533 */
534int
535zfill(
536 zone_t zone,
537 int nelem)
538{
539 kern_return_t kr;
540 vm_size_t size;
541 vm_offset_t memory;
542 int nalloc;
543
544 assert(nelem > 0);
545 if (nelem <= 0)
546 return 0;
547 size = nelem * zone->elem_size;
91447636 548 size = round_page(size);
1c79356b
A
549 kr = kmem_alloc_wired(kernel_map, &memory, size);
550 if (kr != KERN_SUCCESS)
551 return 0;
552
553 zone_change(zone, Z_FOREIGN, TRUE);
91447636 554 zcram(zone, (void *)memory, size);
1c79356b
A
555 nalloc = size / zone->elem_size;
556 assert(nalloc >= nelem);
557
558 return nalloc;
559}
560
561/*
562 * Initialize the "zone of zones" which uses fixed memory allocated
563 * earlier in memory initialization. zone_bootstrap is called
564 * before zone_init.
565 */
566void
567zone_bootstrap(void)
568{
569 vm_size_t zone_zone_size;
570 vm_offset_t zone_zone_space;
571
91447636 572 simple_lock_init(&all_zones_lock, 0);
1c79356b
A
573
574 first_zone = ZONE_NULL;
575 last_zone = &first_zone;
576 num_zones = 0;
577
91447636 578 simple_lock_init(&zget_space_lock, 0);
1c79356b
A
579 zalloc_next_space = zdata;
580 zalloc_end_of_space = zdata + zdata_size;
581 zalloc_wasted_space = 0;
582
583 /* assertion: nobody else called zinit before us */
584 assert(zone_zone == ZONE_NULL);
585 zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone),
586 sizeof(struct zone), "zones");
587 zone_change(zone_zone, Z_COLLECT, FALSE);
588 zone_zone_size = zalloc_end_of_space - zalloc_next_space;
589 zget_space(zone_zone_size, &zone_zone_space);
91447636 590 zcram(zone_zone, (void *)zone_zone_space, zone_zone_size);
1c79356b
A
591}
592
593void
594zone_init(
595 vm_size_t max_zonemap_size)
596{
597 kern_return_t retval;
598 vm_offset_t zone_min;
599 vm_offset_t zone_max;
600 vm_size_t zone_table_size;
601
602 retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
91447636
A
603 FALSE, VM_FLAGS_ANYWHERE, &zone_map);
604
1c79356b
A
605 if (retval != KERN_SUCCESS)
606 panic("zone_init: kmem_suballoc failed");
91447636 607 zone_max = zone_min + round_page(max_zonemap_size);
1c79356b
A
608 /*
609 * Setup garbage collection information:
610 */
55e303ae 611 zone_table_size = atop_32(zone_max - zone_min) *
1c79356b
A
612 sizeof(struct zone_page_table_entry);
613 if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table,
614 zone_table_size) != KERN_SUCCESS)
615 panic("zone_init");
91447636 616 zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size);
55e303ae 617 zone_pages = atop_32(zone_max - zone_min);
1c79356b
A
618 zone_map_min_address = zone_min;
619 zone_map_max_address = zone_max;
91447636 620 mutex_init(&zone_gc_lock, 0);
1c79356b
A
621 zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED);
622}
623
624
625/*
626 * zalloc returns an element from the specified zone.
627 */
91447636 628void *
1c79356b
A
629zalloc_canblock(
630 register zone_t zone,
631 boolean_t canblock)
632{
633 vm_offset_t addr;
634 kern_return_t retval;
635
636 assert(zone != ZONE_NULL);
1c79356b
A
637
638 lock_zone(zone);
639
640 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
0b4e3aa0 641
a3d08fcd
A
642 while ((addr == 0) && canblock && (zone->doing_gc)) {
643 zone->waiting = TRUE;
644 zone_sleep(zone);
645 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
646 }
647
0b4e3aa0 648 while ((addr == 0) && canblock) {
1c79356b
A
649 /*
650 * If nothing was there, try to get more
651 */
652 if (zone->doing_alloc) {
1c79356b
A
653 /*
654 * Someone is allocating memory for this zone.
655 * Wait for it to show up, then try again.
656 */
1c79356b 657 zone->waiting = TRUE;
9bccf70c 658 zone_sleep(zone);
1c79356b
A
659 }
660 else {
661 if ((zone->cur_size + zone->elem_size) >
662 zone->max_size) {
663 if (zone->exhaustible)
664 break;
665 if (zone->expandable) {
666 /*
667 * We're willing to overflow certain
668 * zones, but not without complaining.
669 *
670 * This is best used in conjunction
671 * with the collectable flag. What we
672 * want is an assurance we can get the
673 * memory back, assuming there's no
674 * leak.
675 */
676 zone->max_size += (zone->max_size >> 1);
677 } else {
678 unlock_zone(zone);
679
1c79356b
A
680 panic("zalloc: zone \"%s\" empty.", zone->zone_name);
681 }
682 }
683 zone->doing_alloc = TRUE;
684 unlock_zone(zone);
685
686 if (zone->collectable) {
687 vm_offset_t space;
688 vm_size_t alloc_size;
55e303ae
A
689 boolean_t retry = FALSE;
690
691 for (;;) {
692
693 if (vm_pool_low() || retry == TRUE)
694 alloc_size =
91447636 695 round_page(zone->elem_size);
55e303ae
A
696 else
697 alloc_size = zone->alloc_size;
698
699 retval = kernel_memory_allocate(zone_map,
700 &space, alloc_size, 0,
701 KMA_KOBJECT|KMA_NOPAGEWAIT);
702 if (retval == KERN_SUCCESS) {
703 zone_page_init(space, alloc_size,
704 ZONE_PAGE_USED);
91447636 705 zcram(zone, (void *)space, alloc_size);
55e303ae
A
706
707 break;
708 } else if (retval != KERN_RESOURCE_SHORTAGE) {
709 /* would like to cause a zone_gc() */
710 if (retry == TRUE)
91447636 711 panic("zalloc: \"%s\" (%d elements) retry fail %d", zone->zone_name, zone->count, retval);
55e303ae 712 retry = TRUE;
e5568f75
A
713 } else {
714 break;
55e303ae 715 }
1c79356b
A
716 }
717 lock_zone(zone);
718 zone->doing_alloc = FALSE;
719 if (zone->waiting) {
720 zone->waiting = FALSE;
9bccf70c 721 zone_wakeup(zone);
1c79356b
A
722 }
723 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
724 if (addr == 0 &&
725 retval == KERN_RESOURCE_SHORTAGE) {
726 unlock_zone(zone);
727
1c79356b
A
728 VM_PAGE_WAIT();
729 lock_zone(zone);
730 }
731 } else {
732 vm_offset_t space;
733 retval = zget_space(zone->elem_size, &space);
734
735 lock_zone(zone);
736 zone->doing_alloc = FALSE;
737 if (zone->waiting) {
738 zone->waiting = FALSE;
739 thread_wakeup((event_t)zone);
740 }
741 if (retval == KERN_SUCCESS) {
742 zone->count++;
743 zone->cur_size += zone->elem_size;
744#if ZONE_DEBUG
745 if (zone_debug_enabled(zone)) {
746 enqueue_tail(&zone->active_zones, (queue_entry_t)space);
747 }
748#endif
749 unlock_zone(zone);
750 zone_page_alloc(space, zone->elem_size);
751#if ZONE_DEBUG
752 if (zone_debug_enabled(zone))
55e303ae 753 space += ZONE_DEBUG_OFFSET;
1c79356b 754#endif
91447636 755 return((void *)space);
1c79356b
A
756 }
757 if (retval == KERN_RESOURCE_SHORTAGE) {
758 unlock_zone(zone);
759
1c79356b
A
760 VM_PAGE_WAIT();
761 lock_zone(zone);
762 } else {
91447636 763 panic("zalloc: \"%s\" (%d elements) zget_space returned %d", zone->zone_name, zone->count, retval);
1c79356b
A
764 }
765 }
766 }
767 if (addr == 0)
768 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
769 }
770
0b4e3aa0
A
771 if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (!vm_pool_low())) {
772 zone->async_pending = TRUE;
773 unlock_zone(zone);
774 thread_call_enter(&zone->call_async_alloc);
775 lock_zone(zone);
776 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
777 }
778
1c79356b
A
779#if ZONE_DEBUG
780 if (addr && zone_debug_enabled(zone)) {
781 enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
55e303ae 782 addr += ZONE_DEBUG_OFFSET;
1c79356b
A
783 }
784#endif
785
786 unlock_zone(zone);
0b4e3aa0 787
91447636 788 return((void *)addr);
1c79356b
A
789}
790
791
91447636 792void *
1c79356b
A
793zalloc(
794 register zone_t zone)
795{
796 return( zalloc_canblock(zone, TRUE) );
797}
798
91447636 799void *
1c79356b
A
800zalloc_noblock(
801 register zone_t zone)
802{
803 return( zalloc_canblock(zone, FALSE) );
804}
805
0b4e3aa0
A
806void
807zalloc_async(
91447636
A
808 thread_call_param_t p0,
809 __unused thread_call_param_t p1)
0b4e3aa0 810{
91447636 811 void *elt;
0b4e3aa0
A
812
813 elt = zalloc_canblock((zone_t)p0, TRUE);
814 zfree((zone_t)p0, elt);
815 lock_zone(((zone_t)p0));
816 ((zone_t)p0)->async_pending = FALSE;
817 unlock_zone(((zone_t)p0));
818}
819
1c79356b
A
820
821/*
822 * zget returns an element from the specified zone
823 * and immediately returns nothing if there is nothing there.
824 *
825 * This form should be used when you can not block (like when
826 * processing an interrupt).
827 */
91447636 828void *
1c79356b
A
829zget(
830 register zone_t zone)
831{
832 register vm_offset_t addr;
833
834 assert( zone != ZONE_NULL );
835
836 if (!lock_try_zone(zone))
91447636 837 return NULL;
1c79356b
A
838
839 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
840#if ZONE_DEBUG
841 if (addr && zone_debug_enabled(zone)) {
842 enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
55e303ae 843 addr += ZONE_DEBUG_OFFSET;
1c79356b
A
844 }
845#endif /* ZONE_DEBUG */
846 unlock_zone(zone);
847
91447636 848 return((void *) addr);
1c79356b
A
849}
850
851/* Keep this FALSE by default. Large memory machine run orders of magnitude
852 slower in debug mode when true. Use debugger to enable if needed */
55e303ae
A
853/* static */ boolean_t zone_check = FALSE;
854
855static zone_t zone_last_bogus_zone = ZONE_NULL;
856static vm_offset_t zone_last_bogus_elem = 0;
1c79356b
A
857
858void
859zfree(
860 register zone_t zone,
91447636 861 void *addr)
1c79356b 862{
91447636 863 vm_offset_t elem = (vm_offset_t) addr;
1c79356b
A
864
865#if MACH_ASSERT
866 /* Basic sanity checks */
867 if (zone == ZONE_NULL || elem == (vm_offset_t)0)
868 panic("zfree: NULL");
869 /* zone_gc assumes zones are never freed */
870 if (zone == zone_zone)
871 panic("zfree: freeing to zone_zone breaks zone_gc!");
55e303ae
A
872#endif
873
1c79356b 874 if (zone->collectable && !zone->allows_foreign &&
55e303ae
A
875 !from_zone_map(elem, zone->elem_size)) {
876#if MACH_ASSERT
1c79356b 877 panic("zfree: non-allocated memory in collectable zone!");
91447636 878#endif
55e303ae
A
879 zone_last_bogus_zone = zone;
880 zone_last_bogus_elem = elem;
881 return;
55e303ae 882 }
1c79356b
A
883
884 lock_zone(zone);
885#if ZONE_DEBUG
886 if (zone_debug_enabled(zone)) {
887 queue_t tmp_elem;
888
55e303ae 889 elem -= ZONE_DEBUG_OFFSET;
1c79356b
A
890 if (zone_check) {
891 /* check the zone's consistency */
892
893 for (tmp_elem = queue_first(&zone->active_zones);
894 !queue_end(tmp_elem, &zone->active_zones);
895 tmp_elem = queue_next(tmp_elem))
896 if (elem == (vm_offset_t)tmp_elem)
897 break;
898 if (elem != (vm_offset_t)tmp_elem)
899 panic("zfree()ing element from wrong zone");
900 }
901 remqueue(&zone->active_zones, (queue_t) elem);
902 }
903#endif /* ZONE_DEBUG */
904 if (zone_check) {
905 vm_offset_t this;
906
907 /* check the zone's consistency */
908
909 for (this = zone->free_elements;
910 this != 0;
911 this = * (vm_offset_t *) this)
912 if (!pmap_kernel_va(this) || this == elem)
913 panic("zfree");
914 }
0b4e3aa0
A
915 ADD_TO_ZONE(zone, elem);
916
1c79356b
A
917 /*
918 * If elements have one or more pages, and memory is low,
0b4e3aa0
A
919 * request to run the garbage collection in the zone the next
920 * time the pageout thread runs.
1c79356b
A
921 */
922 if (zone->elem_size >= PAGE_SIZE &&
923 vm_pool_low()){
0b4e3aa0 924 zone_gc_forced = TRUE;
1c79356b 925 }
1c79356b
A
926 unlock_zone(zone);
927}
928
929
930/* Change a zone's flags.
931 * This routine must be called immediately after zinit.
932 */
933void
934zone_change(
935 zone_t zone,
936 unsigned int item,
937 boolean_t value)
938{
939 assert( zone != ZONE_NULL );
940 assert( value == TRUE || value == FALSE );
941
942 switch(item){
943 case Z_EXHAUST:
944 zone->exhaustible = value;
945 break;
946 case Z_COLLECT:
947 zone->collectable = value;
948 break;
949 case Z_EXPAND:
950 zone->expandable = value;
951 break;
952 case Z_FOREIGN:
953 zone->allows_foreign = value;
954 break;
955#if MACH_ASSERT
956 default:
957 panic("Zone_change: Wrong Item Type!");
958 /* break; */
959#endif
960 }
961 lock_zone_init(zone);
962}
963
964/*
965 * Return the expected number of free elements in the zone.
966 * This calculation will be incorrect if items are zfree'd that
967 * were never zalloc'd/zget'd. The correct way to stuff memory
968 * into a zone is by zcram.
969 */
970
971integer_t
972zone_free_count(zone_t zone)
973{
974 integer_t free_count;
975
976 lock_zone(zone);
977 free_count = zone->cur_size/zone->elem_size - zone->count;
978 unlock_zone(zone);
979
980 assert(free_count >= 0);
981
982 return(free_count);
983}
984
985/*
986 * zprealloc preallocates wired memory, exanding the specified
987 * zone to the specified size
988 */
989void
990zprealloc(
991 zone_t zone,
992 vm_size_t size)
993{
994 vm_offset_t addr;
995
996 if (size != 0) {
997 if (kmem_alloc_wired(zone_map, &addr, size) != KERN_SUCCESS)
998 panic("zprealloc");
999 zone_page_init(addr, size, ZONE_PAGE_USED);
91447636 1000 zcram(zone, (void *)addr, size);
1c79356b
A
1001 }
1002}
1003
1004/*
1005 * Zone garbage collection subroutines
1c79356b 1006 */
55e303ae 1007
1c79356b
A
1008boolean_t
1009zone_page_collectable(
1010 vm_offset_t addr,
1011 vm_size_t size)
1012{
55e303ae 1013 struct zone_page_table_entry *zp;
1c79356b
A
1014 natural_t i, j;
1015
1016#if MACH_ASSERT
55e303ae 1017 if (!from_zone_map(addr, size))
1c79356b
A
1018 panic("zone_page_collectable");
1019#endif
1020
55e303ae
A
1021 i = atop_32(addr-zone_map_min_address);
1022 j = atop_32((addr+size-1) - zone_map_min_address);
1023
1024 for (zp = zone_page_table + i; i <= j; zp++, i++)
1025 if (zp->collect_count == zp->alloc_count)
1c79356b 1026 return (TRUE);
55e303ae 1027
1c79356b
A
1028 return (FALSE);
1029}
1030
1031void
1032zone_page_keep(
1033 vm_offset_t addr,
1034 vm_size_t size)
1035{
55e303ae 1036 struct zone_page_table_entry *zp;
1c79356b
A
1037 natural_t i, j;
1038
1039#if MACH_ASSERT
55e303ae 1040 if (!from_zone_map(addr, size))
1c79356b
A
1041 panic("zone_page_keep");
1042#endif
1043
55e303ae
A
1044 i = atop_32(addr-zone_map_min_address);
1045 j = atop_32((addr+size-1) - zone_map_min_address);
1c79356b 1046
55e303ae
A
1047 for (zp = zone_page_table + i; i <= j; zp++, i++)
1048 zp->collect_count = 0;
1c79356b
A
1049}
1050
1051void
55e303ae 1052zone_page_collect(
1c79356b
A
1053 vm_offset_t addr,
1054 vm_size_t size)
1055{
55e303ae 1056 struct zone_page_table_entry *zp;
1c79356b
A
1057 natural_t i, j;
1058
1059#if MACH_ASSERT
55e303ae
A
1060 if (!from_zone_map(addr, size))
1061 panic("zone_page_collect");
1c79356b
A
1062#endif
1063
55e303ae
A
1064 i = atop_32(addr-zone_map_min_address);
1065 j = atop_32((addr+size-1) - zone_map_min_address);
1066
1067 for (zp = zone_page_table + i; i <= j; zp++, i++)
1068 ++zp->collect_count;
1c79356b
A
1069}
1070
1071void
1072zone_page_init(
1073 vm_offset_t addr,
1074 vm_size_t size,
1075 int value)
1076{
55e303ae 1077 struct zone_page_table_entry *zp;
1c79356b
A
1078 natural_t i, j;
1079
1080#if MACH_ASSERT
55e303ae 1081 if (!from_zone_map(addr, size))
1c79356b
A
1082 panic("zone_page_init");
1083#endif
1084
55e303ae
A
1085 i = atop_32(addr-zone_map_min_address);
1086 j = atop_32((addr+size-1) - zone_map_min_address);
1087
1088 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1089 zp->alloc_count = value;
1090 zp->collect_count = 0;
1c79356b 1091 }
1c79356b
A
1092}
1093
1094void
1095zone_page_alloc(
1096 vm_offset_t addr,
1097 vm_size_t size)
1098{
55e303ae 1099 struct zone_page_table_entry *zp;
1c79356b
A
1100 natural_t i, j;
1101
1102#if MACH_ASSERT
55e303ae 1103 if (!from_zone_map(addr, size))
1c79356b
A
1104 panic("zone_page_alloc");
1105#endif
1106
55e303ae
A
1107 i = atop_32(addr-zone_map_min_address);
1108 j = atop_32((addr+size-1) - zone_map_min_address);
1109
1110 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1111 /*
1112 * Set alloc_count to (ZONE_PAGE_USED + 1) if
1c79356b
A
1113 * it was previously set to ZONE_PAGE_UNUSED.
1114 */
55e303ae
A
1115 if (zp->alloc_count == ZONE_PAGE_UNUSED)
1116 zp->alloc_count = 1;
1117 else
1118 ++zp->alloc_count;
1c79356b 1119 }
1c79356b
A
1120}
1121
1122void
55e303ae
A
1123zone_page_free_element(
1124 struct zone_page_table_entry **free_pages,
1c79356b
A
1125 vm_offset_t addr,
1126 vm_size_t size)
1127{
55e303ae 1128 struct zone_page_table_entry *zp;
1c79356b
A
1129 natural_t i, j;
1130
1131#if MACH_ASSERT
55e303ae
A
1132 if (!from_zone_map(addr, size))
1133 panic("zone_page_free_element");
1c79356b
A
1134#endif
1135
55e303ae
A
1136 i = atop_32(addr-zone_map_min_address);
1137 j = atop_32((addr+size-1) - zone_map_min_address);
1c79356b 1138
55e303ae
A
1139 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1140 if (zp->collect_count > 0)
1141 --zp->collect_count;
1142 if (--zp->alloc_count == 0) {
1143 zp->alloc_count = ZONE_PAGE_UNUSED;
1144 zp->collect_count = 0;
1c79356b 1145
55e303ae
A
1146 zp->link = *free_pages;
1147 *free_pages = zp;
1c79356b
A
1148 }
1149 }
1c79356b
A
1150}
1151
1152
1153/* This is used for walking through a zone's free element list.
1154 */
55e303ae
A
1155struct zone_free_element {
1156 struct zone_free_element * next;
1c79356b
A
1157};
1158
55e303ae
A
1159struct {
1160 uint32_t pgs_freed;
1161
1162 uint32_t elems_collected,
1163 elems_freed,
1164 elems_kept;
1165} zgc_stats;
1c79356b
A
1166
1167/* Zone garbage collection
1168 *
1169 * zone_gc will walk through all the free elements in all the
1170 * zones that are marked collectable looking for reclaimable
1171 * pages. zone_gc is called by consider_zone_gc when the system
1172 * begins to run out of memory.
1173 */
1174void
1175zone_gc(void)
1176{
1177 unsigned int max_zones;
55e303ae 1178 zone_t z;
1c79356b 1179 unsigned int i;
55e303ae 1180 struct zone_page_table_entry *zp, *zone_free_pages;
1c79356b
A
1181
1182 mutex_lock(&zone_gc_lock);
1183
1c79356b
A
1184 simple_lock(&all_zones_lock);
1185 max_zones = num_zones;
1186 z = first_zone;
1187 simple_unlock(&all_zones_lock);
1188
1189#if MACH_ASSERT
1c79356b 1190 for (i = 0; i < zone_pages; i++)
55e303ae 1191 assert(zone_page_table[i].collect_count == 0);
1c79356b
A
1192#endif /* MACH_ASSERT */
1193
55e303ae 1194 zone_free_pages = NULL;
1c79356b
A
1195
1196 for (i = 0; i < max_zones; i++, z = z->next_zone) {
a3d08fcd 1197 unsigned int n, m;
55e303ae 1198 vm_size_t elt_size, size_freed;
a3d08fcd 1199 struct zone_free_element *elt, *base_elt, *base_prev, *prev, *scan, *keep, *tail;
1c79356b
A
1200
1201 assert(z != ZONE_NULL);
1202
1203 if (!z->collectable)
1204 continue;
1205
1206 lock_zone(z);
1207
55e303ae
A
1208 elt_size = z->elem_size;
1209
1c79356b
A
1210 /*
1211 * Do a quick feasability check before we scan the zone:
91447636
A
1212 * skip unless there is likelihood of getting pages back
1213 * (i.e we need a whole allocation block's worth of free
1214 * elements before we can garbage collect) and
1215 * the zone has more than 10 percent of it's elements free
1c79356b 1216 */
91447636
A
1217 if (((z->cur_size - z->count * elt_size) <= (2 * z->alloc_size)) ||
1218 ((z->cur_size - z->count * elt_size) <= (z->cur_size / 10))) {
1c79356b
A
1219 unlock_zone(z);
1220 continue;
1221 }
1222
a3d08fcd
A
1223 z->doing_gc = TRUE;
1224
55e303ae
A
1225 /*
1226 * Snatch all of the free elements away from the zone.
1c79356b 1227 */
1c79356b 1228
55e303ae
A
1229 scan = (void *)z->free_elements;
1230 (void *)z->free_elements = NULL;
1231
1232 unlock_zone(z);
1233
1234 /*
1235 * Pass 1:
1236 *
1237 * Determine which elements we can attempt to collect
1238 * and count them up in the page table. Foreign elements
1239 * are returned to the zone.
1c79356b 1240 */
55e303ae
A
1241
1242 prev = (void *)&scan;
1243 elt = scan;
1244 n = 0; tail = keep = NULL;
1245 while (elt != NULL) {
1246 if (from_zone_map(elt, elt_size)) {
1247 zone_page_collect((vm_offset_t)elt, elt_size);
1248
1c79356b
A
1249 prev = elt;
1250 elt = elt->next;
55e303ae
A
1251
1252 ++zgc_stats.elems_collected;
1c79356b 1253 }
55e303ae
A
1254 else {
1255 if (keep == NULL)
1256 keep = tail = elt;
1257 else
1258 tail = tail->next = elt;
1259
1260 elt = prev->next = elt->next;
1261 tail->next = NULL;
1c79356b 1262 }
1c79356b 1263
55e303ae
A
1264 /*
1265 * Dribble back the elements we are keeping.
1266 */
1267
a3d08fcd
A
1268 if (++n >= 50) {
1269 if (z->waiting == TRUE) {
1270 lock_zone(z);
55e303ae 1271
a3d08fcd
A
1272 if (keep != NULL) {
1273 tail->next = (void *)z->free_elements;
1274 (void *)z->free_elements = keep;
1275 tail = keep = NULL;
1276 } else {
1277 m =0;
1278 base_elt = elt;
1279 base_prev = prev;
1280 while ((elt != NULL) && (++m < 50)) {
1281 prev = elt;
1282 elt = elt->next;
1283 }
1284 if (m !=0 ) {
1285 prev->next = (void *)z->free_elements;
1286 (void *)z->free_elements = (void *)base_elt;
1287 base_prev->next = elt;
1288 prev = base_prev;
1289 }
1290 }
55e303ae 1291
a3d08fcd
A
1292 if (z->waiting) {
1293 z->waiting = FALSE;
1294 zone_wakeup(z);
1295 }
55e303ae 1296
a3d08fcd
A
1297 unlock_zone(z);
1298 }
1299 n =0;
55e303ae
A
1300 }
1301 }
1302
1303 /*
1304 * Return any remaining elements.
1305 */
1306
1307 if (keep != NULL) {
1308 lock_zone(z);
1309
1310 tail->next = (void *)z->free_elements;
1311 (void *)z->free_elements = keep;
1312
1313 unlock_zone(z);
1314 }
1315
1316 /*
1317 * Pass 2:
1318 *
1319 * Determine which pages we can reclaim and
1320 * free those elements.
1321 */
1322
1323 size_freed = 0;
1324 prev = (void *)&scan;
1325 elt = scan;
1326 n = 0; tail = keep = NULL;
1327 while (elt != NULL) {
1328 if (zone_page_collectable((vm_offset_t)elt, elt_size)) {
1329 size_freed += elt_size;
1330 zone_page_free_element(&zone_free_pages,
1331 (vm_offset_t)elt, elt_size);
1332
1333 elt = prev->next = elt->next;
1334
1335 ++zgc_stats.elems_freed;
1336 }
1337 else {
1338 zone_page_keep((vm_offset_t)elt, elt_size);
1339
1340 if (keep == NULL)
1341 keep = tail = elt;
1342 else
1343 tail = tail->next = elt;
1344
1345 elt = prev->next = elt->next;
1346 tail->next = NULL;
1347
1348 ++zgc_stats.elems_kept;
1349 }
1350
1351 /*
1352 * Dribble back the elements we are keeping,
1353 * and update the zone size info.
1354 */
1355
a3d08fcd 1356 if (++n >= 50) {
55e303ae
A
1357 lock_zone(z);
1358
1359 z->cur_size -= size_freed;
1360 size_freed = 0;
1361
a3d08fcd
A
1362 if (keep != NULL) {
1363 tail->next = (void *)z->free_elements;
1364 (void *)z->free_elements = keep;
1365 }
1366
1367 if (z->waiting) {
1368 z->waiting = FALSE;
1369 zone_wakeup(z);
1370 }
55e303ae
A
1371
1372 unlock_zone(z);
1373
1374 n = 0; tail = keep = NULL;
1375 }
1376 }
1377
1378 /*
1379 * Return any remaining elements, and update
1380 * the zone size info.
1381 */
1382
a3d08fcd
A
1383 lock_zone(z);
1384
55e303ae 1385 if (size_freed > 0 || keep != NULL) {
55e303ae
A
1386
1387 z->cur_size -= size_freed;
1388
1389 if (keep != NULL) {
1390 tail->next = (void *)z->free_elements;
1391 (void *)z->free_elements = keep;
1392 }
1393
55e303ae 1394 }
a3d08fcd
A
1395
1396 z->doing_gc = FALSE;
1397 if (z->waiting) {
1398 z->waiting = FALSE;
1399 zone_wakeup(z);
1400 }
1401 unlock_zone(z);
1c79356b
A
1402 }
1403
55e303ae
A
1404 /*
1405 * Reclaim the pages we are freeing.
1406 */
1c79356b 1407
55e303ae
A
1408 while ((zp = zone_free_pages) != NULL) {
1409 zone_free_pages = zp->link;
1410 kmem_free(zone_map, zone_map_min_address + PAGE_SIZE *
1411 (zp - zone_page_table), PAGE_SIZE);
1412 ++zgc_stats.pgs_freed;
1c79356b 1413 }
55e303ae 1414
1c79356b
A
1415 mutex_unlock(&zone_gc_lock);
1416}
1417
1c79356b
A
1418/*
1419 * consider_zone_gc:
1420 *
1421 * Called by the pageout daemon when the system needs more free pages.
1422 */
1423
1424void
1425consider_zone_gc(void)
1426{
1427 /*
1428 * By default, don't attempt zone GC more frequently
91447636 1429 * than once / 1 minutes.
1c79356b
A
1430 */
1431
1432 if (zone_gc_max_rate == 0)
91447636 1433 zone_gc_max_rate = (60 << SCHED_TICK_SHIFT) + 1;
1c79356b
A
1434
1435 if (zone_gc_allowed &&
0b4e3aa0
A
1436 ((sched_tick > (zone_gc_last_tick + zone_gc_max_rate)) ||
1437 zone_gc_forced)) {
1438 zone_gc_forced = FALSE;
1c79356b
A
1439 zone_gc_last_tick = sched_tick;
1440 zone_gc();
1441 }
1442}
1443
1c79356b
A
1444
1445kern_return_t
1446host_zone_info(
1447 host_t host,
1448 zone_name_array_t *namesp,
1449 mach_msg_type_number_t *namesCntp,
1450 zone_info_array_t *infop,
1451 mach_msg_type_number_t *infoCntp)
1452{
1453 zone_name_t *names;
1454 vm_offset_t names_addr;
1455 vm_size_t names_size;
1456 zone_info_t *info;
1457 vm_offset_t info_addr;
1458 vm_size_t info_size;
1459 unsigned int max_zones, i;
1460 zone_t z;
1461 zone_name_t *zn;
1462 zone_info_t *zi;
1463 kern_return_t kr;
1464
1465 if (host == HOST_NULL)
1466 return KERN_INVALID_HOST;
1467
1468 /*
1469 * We assume that zones aren't freed once allocated.
1470 * We won't pick up any zones that are allocated later.
1471 */
1472
1473 simple_lock(&all_zones_lock);
1474#ifdef ppc
1475 max_zones = num_zones + 4;
1476#else
1477 max_zones = num_zones + 2;
1478#endif
1479 z = first_zone;
1480 simple_unlock(&all_zones_lock);
1481
1482 if (max_zones <= *namesCntp) {
1483 /* use in-line memory */
91447636 1484 names_size = *namesCntp * sizeof *names;
1c79356b
A
1485 names = *namesp;
1486 } else {
91447636 1487 names_size = round_page(max_zones * sizeof *names);
1c79356b
A
1488 kr = kmem_alloc_pageable(ipc_kernel_map,
1489 &names_addr, names_size);
1490 if (kr != KERN_SUCCESS)
1491 return kr;
1492 names = (zone_name_t *) names_addr;
1493 }
1494
1495 if (max_zones <= *infoCntp) {
1496 /* use in-line memory */
91447636 1497 info_size = *infoCntp * sizeof *info;
1c79356b
A
1498 info = *infop;
1499 } else {
91447636 1500 info_size = round_page(max_zones * sizeof *info);
1c79356b
A
1501 kr = kmem_alloc_pageable(ipc_kernel_map,
1502 &info_addr, info_size);
1503 if (kr != KERN_SUCCESS) {
1504 if (names != *namesp)
1505 kmem_free(ipc_kernel_map,
1506 names_addr, names_size);
1507 return kr;
1508 }
1509
1510 info = (zone_info_t *) info_addr;
1511 }
1512 zn = &names[0];
1513 zi = &info[0];
1514
1515 for (i = 0; i < num_zones; i++) {
1516 struct zone zcopy;
1517
1518 assert(z != ZONE_NULL);
1519
1520 lock_zone(z);
1521 zcopy = *z;
1522 unlock_zone(z);
1523
1524 simple_lock(&all_zones_lock);
1525 z = z->next_zone;
1526 simple_unlock(&all_zones_lock);
1527
1528 /* assuming here the name data is static */
1529 (void) strncpy(zn->zn_name, zcopy.zone_name,
1530 sizeof zn->zn_name);
1531
1532 zi->zi_count = zcopy.count;
1533 zi->zi_cur_size = zcopy.cur_size;
1534 zi->zi_max_size = zcopy.max_size;
1535 zi->zi_elem_size = zcopy.elem_size;
1536 zi->zi_alloc_size = zcopy.alloc_size;
1537 zi->zi_exhaustible = zcopy.exhaustible;
1538 zi->zi_collectable = zcopy.collectable;
1539
1540 zn++;
1541 zi++;
1542 }
1543 strcpy(zn->zn_name, "kernel_stacks");
1544 stack_fake_zone_info(&zi->zi_count, &zi->zi_cur_size, &zi->zi_max_size, &zi->zi_elem_size,
1545 &zi->zi_alloc_size, &zi->zi_collectable, &zi->zi_exhaustible);
1546 zn++;
1547 zi++;
1548#ifdef ppc
1549 strcpy(zn->zn_name, "save_areas");
1550 save_fake_zone_info(&zi->zi_count, &zi->zi_cur_size, &zi->zi_max_size, &zi->zi_elem_size,
1551 &zi->zi_alloc_size, &zi->zi_collectable, &zi->zi_exhaustible);
1552 zn++;
1553 zi++;
1554
1555 strcpy(zn->zn_name, "pmap_mappings");
1556 mapping_fake_zone_info(&zi->zi_count, &zi->zi_cur_size, &zi->zi_max_size, &zi->zi_elem_size,
1557 &zi->zi_alloc_size, &zi->zi_collectable, &zi->zi_exhaustible);
1558 zn++;
1559 zi++;
1560#endif
1561 strcpy(zn->zn_name, "kalloc.large");
1562 kalloc_fake_zone_info(&zi->zi_count, &zi->zi_cur_size, &zi->zi_max_size, &zi->zi_elem_size,
1563 &zi->zi_alloc_size, &zi->zi_collectable, &zi->zi_exhaustible);
1564
1565 if (names != *namesp) {
1566 vm_size_t used;
1567 vm_map_copy_t copy;
1568
1569 used = max_zones * sizeof *names;
1570
1571 if (used != names_size)
1572 bzero((char *) (names_addr + used), names_size - used);
1573
91447636
A
1574 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
1575 (vm_map_size_t)names_size, TRUE, &copy);
1c79356b
A
1576 assert(kr == KERN_SUCCESS);
1577
1578 *namesp = (zone_name_t *) copy;
1579 }
1580 *namesCntp = max_zones;
1581
1582 if (info != *infop) {
1583 vm_size_t used;
1584 vm_map_copy_t copy;
1585
1586 used = max_zones * sizeof *info;
1587
1588 if (used != info_size)
1589 bzero((char *) (info_addr + used), info_size - used);
1590
91447636
A
1591 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
1592 (vm_map_size_t)info_size, TRUE, &copy);
1c79356b
A
1593 assert(kr == KERN_SUCCESS);
1594
1595 *infop = (zone_info_t *) copy;
1596 }
1597 *infoCntp = max_zones;
1598
1599 return KERN_SUCCESS;
1600}
1601
1602#if MACH_KDB
1603#include <ddb/db_command.h>
1604#include <ddb/db_output.h>
1605#include <kern/kern_print.h>
1606
1607const char *zone_labels =
1608"ENTRY COUNT TOT_SZ MAX_SZ ELT_SZ ALLOC_SZ NAME";
1609
1610/* Forwards */
1611void db_print_zone(
1612 zone_t addr);
1613
1614#if ZONE_DEBUG
1615void db_zone_check_active(
1616 zone_t zone);
1617void db_zone_print_active(
1618 zone_t zone);
1619#endif /* ZONE_DEBUG */
1620void db_zone_print_free(
1621 zone_t zone);
1622void
1623db_print_zone(
1624 zone_t addr)
1625{
1626 struct zone zcopy;
1627
1628 zcopy = *addr;
1629
1630 db_printf("%8x %8x %8x %8x %6x %8x %s ",
1631 addr, zcopy.count, zcopy.cur_size,
1632 zcopy.max_size, zcopy.elem_size,
1633 zcopy.alloc_size, zcopy.zone_name);
1634 if (zcopy.exhaustible)
1635 db_printf("H");
1636 if (zcopy.collectable)
1637 db_printf("C");
1638 if (zcopy.expandable)
1639 db_printf("X");
1640 db_printf("\n");
1641}
1642
1643/*ARGSUSED*/
1644void
1645db_show_one_zone(
91447636
A
1646 db_expr_t addr,
1647 int have_addr,
1648 __unused db_expr_t count,
1649 __unused char * modif)
1c79356b 1650{
91447636 1651 struct zone *z = (zone_t)((char *)0 + addr);
1c79356b
A
1652
1653 if (z == ZONE_NULL || !have_addr){
1654 db_error("No Zone\n");
1655 /*NOTREACHED*/
1656 }
1657
1658 db_printf("%s\n", zone_labels);
1659 db_print_zone(z);
1660}
1661
1662/*ARGSUSED*/
1663void
1664db_show_all_zones(
91447636
A
1665 __unused db_expr_t addr,
1666 int have_addr,
1667 db_expr_t count,
1668 __unused char * modif)
1c79356b
A
1669{
1670 zone_t z;
1671 unsigned total = 0;
1672
1673 /*
1674 * Don't risk hanging by unconditionally locking,
1675 * risk of incoherent data is small (zones aren't freed).
1676 */
1677 have_addr = simple_lock_try(&all_zones_lock);
1678 count = num_zones;
1679 z = first_zone;
1680 if (have_addr) {
1681 simple_unlock(&all_zones_lock);
1682 }
1683
1684 db_printf("%s\n", zone_labels);
1685 for ( ; count > 0; count--) {
1686 if (!z) {
1687 db_error("Mangled Zone List\n");
1688 /*NOTREACHED*/
1689 }
1690 db_print_zone(z);
1691 total += z->cur_size,
1692
1693 have_addr = simple_lock_try(&all_zones_lock);
1694 z = z->next_zone;
1695 if (have_addr) {
1696 simple_unlock(&all_zones_lock);
1697 }
1698 }
1699 db_printf("\nTotal %8x", total);
55e303ae 1700 db_printf("\n\nzone_gc() has reclaimed %d pages\n", zgc_stats.pgs_freed);
1c79356b
A
1701}
1702
1703#if ZONE_DEBUG
1704void
1705db_zone_check_active(
1706 zone_t zone)
1707{
1708 int count = 0;
1709 queue_t tmp_elem;
1710
1711 if (!zone_debug_enabled(zone) || !zone_check)
1712 return;
1713 tmp_elem = queue_first(&zone->active_zones);
1714 while (count < zone->count) {
1715 count++;
1716 if (tmp_elem == 0) {
1717 printf("unexpected zero element, zone=0x%x, count=%d\n",
1718 zone, count);
1719 assert(FALSE);
1720 break;
1721 }
1722 if (queue_end(tmp_elem, &zone->active_zones)) {
1723 printf("unexpected queue_end, zone=0x%x, count=%d\n",
1724 zone, count);
1725 assert(FALSE);
1726 break;
1727 }
1728 tmp_elem = queue_next(tmp_elem);
1729 }
1730 if (!queue_end(tmp_elem, &zone->active_zones)) {
1731 printf("not at queue_end, zone=0x%x, tmp_elem=0x%x\n",
1732 zone, tmp_elem);
1733 assert(FALSE);
1734 }
1735}
1736
1737void
1738db_zone_print_active(
1739 zone_t zone)
1740{
1741 int count = 0;
1742 queue_t tmp_elem;
1743
1744 if (!zone_debug_enabled(zone)) {
1745 printf("zone 0x%x debug not enabled\n", zone);
1746 return;
1747 }
1748 if (!zone_check) {
1749 printf("zone_check FALSE\n");
1750 return;
1751 }
1752
1753 printf("zone 0x%x, active elements %d\n", zone, zone->count);
1754 printf("active list:\n");
1755 tmp_elem = queue_first(&zone->active_zones);
1756 while (count < zone->count) {
1757 printf(" 0x%x", tmp_elem);
1758 count++;
1759 if ((count % 6) == 0)
1760 printf("\n");
1761 if (tmp_elem == 0) {
1762 printf("\nunexpected zero element, count=%d\n", count);
1763 break;
1764 }
1765 if (queue_end(tmp_elem, &zone->active_zones)) {
1766 printf("\nunexpected queue_end, count=%d\n", count);
1767 break;
1768 }
1769 tmp_elem = queue_next(tmp_elem);
1770 }
1771 if (!queue_end(tmp_elem, &zone->active_zones))
1772 printf("\nnot at queue_end, tmp_elem=0x%x\n", tmp_elem);
1773 else
1774 printf("\n");
1775}
1776#endif /* ZONE_DEBUG */
1777
1778void
1779db_zone_print_free(
1780 zone_t zone)
1781{
1782 int count = 0;
1783 int freecount;
1784 vm_offset_t elem;
1785
1786 freecount = zone_free_count(zone);
1787 printf("zone 0x%x, free elements %d\n", zone, freecount);
1788 printf("free list:\n");
1789 elem = zone->free_elements;
1790 while (count < freecount) {
1791 printf(" 0x%x", elem);
1792 count++;
1793 if ((count % 6) == 0)
1794 printf("\n");
1795 if (elem == 0) {
1796 printf("\nunexpected zero element, count=%d\n", count);
1797 break;
1798 }
1799 elem = *((vm_offset_t *)elem);
1800 }
1801 if (elem != 0)
1802 printf("\nnot at end of free list, elem=0x%x\n", elem);
1803 else
1804 printf("\n");
1805}
1806
1807#endif /* MACH_KDB */
1808
1809
1810#if ZONE_DEBUG
1811
1812/* should we care about locks here ? */
1813
1814#if MACH_KDB
91447636 1815void *
1c79356b
A
1816next_element(
1817 zone_t z,
91447636 1818 void *prev)
1c79356b 1819{
91447636
A
1820 char *elt = (char *)prev;
1821
1c79356b
A
1822 if (!zone_debug_enabled(z))
1823 return(0);
55e303ae 1824 elt -= ZONE_DEBUG_OFFSET;
91447636 1825 elt = (char *) queue_next((queue_t) elt);
1c79356b
A
1826 if ((queue_t) elt == &z->active_zones)
1827 return(0);
55e303ae 1828 elt += ZONE_DEBUG_OFFSET;
1c79356b
A
1829 return(elt);
1830}
1831
91447636 1832void *
1c79356b
A
1833first_element(
1834 zone_t z)
1835{
91447636 1836 char *elt;
1c79356b
A
1837
1838 if (!zone_debug_enabled(z))
1839 return(0);
1840 if (queue_empty(&z->active_zones))
1841 return(0);
91447636 1842 elt = (char *)queue_first(&z->active_zones);
55e303ae 1843 elt += ZONE_DEBUG_OFFSET;
1c79356b
A
1844 return(elt);
1845}
1846
1847/*
1848 * Second arg controls how many zone elements are printed:
1849 * 0 => none
1850 * n, n < 0 => all
1851 * n, n > 0 => last n on active list
1852 */
1853int
1854zone_count(
1855 zone_t z,
1856 int tail)
1857{
91447636 1858 void *elt;
1c79356b
A
1859 int count = 0;
1860 boolean_t print = (tail != 0);
1861
1862 if (tail < 0)
1863 tail = z->count;
1864 if (z->count < tail)
1865 tail = 0;
1866 tail = z->count - tail;
1867 for (elt = first_element(z); elt; elt = next_element(z, elt)) {
1868 if (print && tail <= count)
1869 db_printf("%8x\n", elt);
1870 count++;
1871 }
1872 assert(count == z->count);
1873 return(count);
1874}
1875#endif /* MACH_KDB */
1876
1877#define zone_in_use(z) ( z->count || z->free_elements )
1878
1879void
1880zone_debug_enable(
1881 zone_t z)
1882{
1883 if (zone_debug_enabled(z) || zone_in_use(z) ||
55e303ae 1884 z->alloc_size < (z->elem_size + ZONE_DEBUG_OFFSET))
1c79356b
A
1885 return;
1886 queue_init(&z->active_zones);
55e303ae 1887 z->elem_size += ZONE_DEBUG_OFFSET;
1c79356b
A
1888}
1889
1890void
1891zone_debug_disable(
1892 zone_t z)
1893{
1894 if (!zone_debug_enabled(z) || zone_in_use(z))
1895 return;
55e303ae 1896 z->elem_size -= ZONE_DEBUG_OFFSET;
1c79356b
A
1897 z->active_zones.next = z->active_zones.prev = 0;
1898}
1899#endif /* ZONE_DEBUG */