]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/zalloc.c
f8ac4c12fd8bbf2a39a123dc25174a365390c688
[apple/xnu.git] / osfmk / kern / zalloc.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: kern/zalloc.c
60 * Author: Avadis Tevanian, Jr.
61 *
62 * Zone-based memory allocator. A zone is a collection of fixed size
63 * data blocks for which quick allocation/deallocation is possible.
64 */
65 #include <zone_debug.h>
66 #include <zone_alias_addr.h>
67 #include <norma_vm.h>
68 #include <mach_kdb.h>
69
70 #include <mach/mach_types.h>
71 #include <mach/vm_param.h>
72 #include <mach/kern_return.h>
73 #include <mach/mach_host_server.h>
74 #include <mach/machine/vm_types.h>
75 #include <mach_debug/zone_info.h>
76
77 #include <kern/kern_types.h>
78 #include <kern/assert.h>
79 #include <kern/host.h>
80 #include <kern/macro_help.h>
81 #include <kern/sched.h>
82 #include <kern/lock.h>
83 #include <kern/sched_prim.h>
84 #include <kern/misc_protos.h>
85 #include <kern/thread_call.h>
86 #include <kern/zalloc.h>
87 #include <kern/kalloc.h>
88
89 #include <vm/pmap.h>
90 #include <vm/vm_map.h>
91 #include <vm/vm_kern.h>
92 #include <vm/vm_page.h>
93
94 #include <machine/machparam.h>
95
96 #include <libkern/OSDebug.h>
97 #include <sys/kdebug.h>
98
99 #if defined(__ppc__)
100 /* for fake zone stat routines */
101 #include <ppc/savearea.h>
102 #include <ppc/mappings.h>
103 #endif
104
105
106 /*
107 * Zone Corruption Debugging
108 *
109 * We provide three methods to detect use of a zone element after it's been freed. These
110 * checks are enabled by specifying "-zc" and/or "-zp" in the boot-args:
111 *
112 * (1) Range-check the free-list "next" ptr for sanity.
113 * (2) Store the ptr in two different words, and compare them against
114 * each other when re-using the zone element, to detect modifications.
115 * (3) poison the freed memory by overwriting it with 0xdeadbeef.
116 *
117 * The first two checks are farily light weight and are enabled by specifying "-zc"
118 * in the boot-args. If you want more aggressive checking for use-after-free bugs
119 * and you don't mind the additional overhead, then turn on poisoning by adding
120 * "-zp" to the boot-args in addition to "-zc". If you specify -zp without -zc,
121 * it still poisons the memory when it's freed, but doesn't check if the memory
122 * has been altered later when it's reallocated.
123 */
124
125 boolean_t check_freed_element = FALSE; /* enabled by -zc in boot-args */
126 boolean_t zfree_clear = FALSE; /* enabled by -zp in boot-args */
127
128 #define is_kernel_data_addr(a) (!(a) || ((a) >= vm_min_kernel_address && !((a) & 0x3)))
129
130 #define ADD_TO_ZONE(zone, element) \
131 MACRO_BEGIN \
132 if (zfree_clear) \
133 { unsigned int i; \
134 for (i=0; \
135 i < zone->elem_size/sizeof(uint32_t); \
136 i++) \
137 ((uint32_t *)(element))[i] = 0xdeadbeef; \
138 } \
139 *((vm_offset_t *)(element)) = (zone)->free_elements; \
140 if (check_freed_element) { \
141 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
142 ((vm_offset_t *)(element))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
143 (zone)->free_elements; \
144 } \
145 (zone)->free_elements = (vm_offset_t) (element); \
146 (zone)->count--; \
147 MACRO_END
148
149 #define REMOVE_FROM_ZONE(zone, ret, type) \
150 MACRO_BEGIN \
151 (ret) = (type) (zone)->free_elements; \
152 if ((ret) != (type) 0) { \
153 if (check_freed_element) { \
154 if (!is_kernel_data_addr(((vm_offset_t *)(ret))[0]) || \
155 ((zone)->elem_size >= (2 * sizeof(vm_offset_t)) && \
156 ((vm_offset_t *)(ret))[((zone)->elem_size/sizeof(vm_offset_t))-1] != \
157 ((vm_offset_t *)(ret))[0])) \
158 panic("a freed zone element has been modified");\
159 if (zfree_clear) { \
160 unsigned int ii; \
161 for (ii = sizeof(vm_offset_t) / sizeof(uint32_t); \
162 ii < zone->elem_size/sizeof(uint32_t) - sizeof(vm_offset_t) / sizeof(uint32_t); \
163 ii++) \
164 if (((uint32_t *)(ret))[ii] != (uint32_t)0xdeadbeef) \
165 panic("a freed zone element has been modified");\
166 } \
167 } \
168 (zone)->count++; \
169 (zone)->free_elements = *((vm_offset_t *)(ret)); \
170 } \
171 MACRO_END
172
173 #if ZONE_DEBUG
174 #define zone_debug_enabled(z) z->active_zones.next
175 #define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
176 #define ZONE_DEBUG_OFFSET ROUNDUP(sizeof(queue_chain_t),16)
177 #endif /* ZONE_DEBUG */
178
179 /*
180 * Support for garbage collection of unused zone pages:
181 */
182
183 struct zone_page_table_entry {
184 struct zone_page_table_entry *link;
185 short alloc_count;
186 short collect_count;
187 };
188
189 /* Forwards */
190 void zone_page_init(
191 vm_offset_t addr,
192 vm_size_t size,
193 int value);
194
195 void zone_page_alloc(
196 vm_offset_t addr,
197 vm_size_t size);
198
199 void zone_page_free_element(
200 struct zone_page_table_entry **free_pages,
201 vm_offset_t addr,
202 vm_size_t size);
203
204 void zone_page_collect(
205 vm_offset_t addr,
206 vm_size_t size);
207
208 boolean_t zone_page_collectable(
209 vm_offset_t addr,
210 vm_size_t size);
211
212 void zone_page_keep(
213 vm_offset_t addr,
214 vm_size_t size);
215
216 void zalloc_async(
217 thread_call_param_t p0,
218 thread_call_param_t p1);
219
220
221 #if ZONE_DEBUG && MACH_KDB
222 int zone_count(
223 zone_t z,
224 int tail);
225 #endif /* ZONE_DEBUG && MACH_KDB */
226
227 vm_map_t zone_map = VM_MAP_NULL;
228
229 zone_t zone_zone = ZONE_NULL; /* the zone containing other zones */
230
231 /*
232 * The VM system gives us an initial chunk of memory.
233 * It has to be big enough to allocate the zone_zone
234 */
235
236 vm_offset_t zdata;
237 vm_size_t zdata_size;
238
239 #define lock_zone(zone) \
240 MACRO_BEGIN \
241 lck_mtx_lock(&(zone)->lock); \
242 MACRO_END
243
244 #define unlock_zone(zone) \
245 MACRO_BEGIN \
246 lck_mtx_unlock(&(zone)->lock); \
247 MACRO_END
248
249 #define zone_wakeup(zone) thread_wakeup((event_t)(zone))
250 #define zone_sleep(zone) \
251 (void) lck_mtx_sleep(&(zone)->lock, 0, (event_t)(zone), THREAD_UNINT);
252
253
254 #define lock_zone_init(zone) \
255 MACRO_BEGIN \
256 char _name[32]; \
257 (void) snprintf(_name, sizeof (_name), "zone.%s", (zone)->zone_name); \
258 lck_grp_attr_setdefault(&(zone)->lock_grp_attr); \
259 lck_grp_init(&(zone)->lock_grp, _name, &(zone)->lock_grp_attr); \
260 lck_attr_setdefault(&(zone)->lock_attr); \
261 lck_mtx_init_ext(&(zone)->lock, &(zone)->lock_ext, \
262 &(zone)->lock_grp, &(zone)->lock_attr); \
263 MACRO_END
264
265 #define lock_try_zone(zone) lck_mtx_try_lock(&zone->lock)
266
267 kern_return_t zget_space(
268 vm_offset_t size,
269 vm_offset_t *result);
270
271 decl_simple_lock_data(,zget_space_lock)
272 vm_offset_t zalloc_next_space;
273 vm_offset_t zalloc_end_of_space;
274 vm_size_t zalloc_wasted_space;
275
276 /*
277 * Garbage collection map information
278 */
279 struct zone_page_table_entry * zone_page_table;
280 vm_offset_t zone_map_min_address;
281 vm_offset_t zone_map_max_address;
282 unsigned int zone_pages;
283
284 /*
285 * Exclude more than one concurrent garbage collection
286 */
287 decl_mutex_data(, zone_gc_lock)
288
289 #if !ZONE_ALIAS_ADDR
290 #define from_zone_map(addr, size) \
291 ((vm_offset_t)(addr) >= zone_map_min_address && \
292 ((vm_offset_t)(addr) + size -1) < zone_map_max_address)
293 #else
294 #define from_zone_map(addr, size) \
295 ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) >= zone_map_min_address && \
296 ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) + size -1) < zone_map_max_address)
297 #endif
298
299 #define ZONE_PAGE_USED 0
300 #define ZONE_PAGE_UNUSED -1
301
302
303 /*
304 * Protects first_zone, last_zone, num_zones,
305 * and the next_zone field of zones.
306 */
307 decl_simple_lock_data(, all_zones_lock)
308 zone_t first_zone;
309 zone_t *last_zone;
310 unsigned int num_zones;
311
312 boolean_t zone_gc_allowed = TRUE;
313 boolean_t zone_gc_forced = FALSE;
314 boolean_t panic_include_zprint = FALSE;
315 unsigned zone_gc_last_tick = 0;
316 unsigned zone_gc_max_rate = 0; /* in ticks */
317
318 /*
319 * Zone leak debugging code
320 *
321 * When enabled, this code keeps a log to track allocations to a particular zone that have not
322 * yet been freed. Examining this log will reveal the source of a zone leak. The log is allocated
323 * only when logging is enabled, so there is no effect on the system when it's turned off. Logging is
324 * off by default.
325 *
326 * Enable the logging via the boot-args. Add the parameter "zlog=<zone>" to boot-args where <zone>
327 * is the name of the zone you wish to log.
328 *
329 * This code only tracks one zone, so you need to identify which one is leaking first.
330 * Generally, you'll know you have a leak when you get a "zalloc retry failed 3" panic from the zone
331 * garbage collector. Note that the zone name printed in the panic message is not necessarily the one
332 * containing the leak. So do a zprint from gdb and locate the zone with the bloated size. This
333 * is most likely the problem zone, so set zlog in boot-args to this zone name, reboot and re-run the test. The
334 * next time it panics with this message, examine the log using the kgmacros zstack, findoldest and countpcs.
335 * See the help in the kgmacros for usage info.
336 *
337 *
338 * Zone corruption logging
339 *
340 * Logging can also be used to help identify the source of a zone corruption. First, identify the zone
341 * that is being corrupted, then add "-zc zlog=<zone name>" to the boot-args. When -zc is used in conjunction
342 * with zlog, it changes the logging style to track both allocations and frees to the zone. So when the
343 * corruption is detected, examining the log will show you the stack traces of the callers who last allocated
344 * and freed any particular element in the zone. Use the findelem kgmacro with the address of the element that's been
345 * corrupted to examine its history. This should lead to the source of the corruption.
346 */
347
348 static int log_records; /* size of the log, expressed in number of records */
349
350 #define MAX_ZONE_NAME 32 /* max length of a zone name we can take from the boot-args */
351
352 static char zone_name_to_log[MAX_ZONE_NAME] = ""; /* the zone name we're logging, if any */
353
354 /*
355 * The number of records in the log is configurable via the zrecs parameter in boot-args. Set this to
356 * the number of records you want in the log. For example, "zrecs=1000" sets it to 1000 records. Note
357 * that the larger the size of the log, the slower the system will run due to linear searching in the log,
358 * but one doesn't generally care about performance when tracking down a leak. The log is capped at 8000
359 * records since going much larger than this tends to make the system unresponsive and unbootable on small
360 * memory configurations. The default value is 4000 records.
361 *
362 * MAX_DEPTH configures how deep of a stack trace is taken on each zalloc in the zone of interrest. 15
363 * levels is usually enough to get past all the layers of code in kalloc and IOKit and see who the actual
364 * caller is up above these lower levels.
365 */
366
367 #define ZRECORDS_MAX 8000 /* Max records allowed in the log */
368 #define ZRECORDS_DEFAULT 4000 /* default records in log if zrecs is not specificed in boot-args */
369 #define MAX_DEPTH 15 /* number of levels of the stack trace to record */
370
371 /*
372 * Each record in the log contains a pointer to the zone element it refers to, a "time" number that allows
373 * the records to be ordered chronologically, and a small array to hold the pc's from the stack trace. A
374 * record is added to the log each time a zalloc() is done in the zone_of_interest. For leak debugging,
375 * the record is cleared when a zfree() is done. For corruption debugging, the log tracks both allocs and frees.
376 * If the log fills, old records are replaced as if it were a circular buffer.
377 */
378
379 struct zrecord {
380 void *z_element; /* the element that was zalloc'ed of zfree'ed */
381 uint32_t z_opcode:1, /* whether it was a zalloc or zfree */
382 z_time:31; /* time index when operation was done */
383 void *z_pc[MAX_DEPTH]; /* stack trace of caller */
384 };
385
386 /*
387 * Opcodes for the z_opcode field:
388 */
389
390 #define ZOP_ALLOC 1
391 #define ZOP_FREE 0
392
393 /*
394 * The allocation log and all the related variables are protected by the zone lock for the zone_of_interest
395 */
396
397 static struct zrecord *zrecords; /* the log itself, dynamically allocated when logging is enabled */
398 static int zcurrent = 0; /* index of the next slot in the log to use */
399 static int zrecorded = 0; /* number of allocations recorded in the log */
400 static unsigned int ztime = 0; /* a timestamp of sorts */
401 static zone_t zone_of_interest = NULL; /* the zone being watched; corresponds to zone_name_to_log */
402
403 /*
404 * Decide if we want to log this zone by doing a string compare between a zone name and the name
405 * of the zone to log. Return true if the strings are equal, false otherwise. Because it's not
406 * possible to include spaces in strings passed in via the boot-args, a period in the logname will
407 * match a space in the zone name.
408 */
409
410 static int
411 log_this_zone(const char *zonename, const char *logname)
412 {
413 int len;
414 const char *zc = zonename;
415 const char *lc = logname;
416
417 /*
418 * Compare the strings. We bound the compare by MAX_ZONE_NAME.
419 */
420
421 for (len = 1; len <= MAX_ZONE_NAME; zc++, lc++, len++) {
422
423 /*
424 * If the current characters don't match, check for a space in
425 * in the zone name and a corresponding period in the log name.
426 * If that's not there, then the strings don't match.
427 */
428
429 if (*zc != *lc && !(*zc == ' ' && *lc == '.'))
430 break;
431
432 /*
433 * The strings are equal so far. If we're at the end, then it's a match.
434 */
435
436 if (*zc == '\0')
437 return TRUE;
438 }
439
440 return FALSE;
441 }
442
443
444 /*
445 * Test if we want to log this zalloc/zfree event. We log if this is the zone we're interested in and
446 * the buffer for the records has been allocated.
447 */
448
449 #define DO_LOGGING(z) (zrecords && (z) == zone_of_interest)
450
451 extern boolean_t zlog_ready;
452
453
454 /*
455 * zinit initializes a new zone. The zone data structures themselves
456 * are stored in a zone, which is initially a static structure that
457 * is initialized by zone_init.
458 */
459 zone_t
460 zinit(
461 vm_size_t size, /* the size of an element */
462 vm_size_t max, /* maximum memory to use */
463 vm_size_t alloc, /* allocation size */
464 const char *name) /* a name for the zone */
465 {
466 zone_t z;
467
468 if (zone_zone == ZONE_NULL) {
469 if (zget_space(sizeof(struct zone), (vm_offset_t *)&z)
470 != KERN_SUCCESS)
471 return(ZONE_NULL);
472 } else
473 z = (zone_t) zalloc(zone_zone);
474 if (z == ZONE_NULL)
475 return(ZONE_NULL);
476
477 /*
478 * Round off all the parameters appropriately.
479 */
480 if (size < sizeof(z->free_elements))
481 size = sizeof(z->free_elements);
482 size = ((size-1) + sizeof(z->free_elements)) -
483 ((size-1) % sizeof(z->free_elements));
484 if (alloc == 0)
485 alloc = PAGE_SIZE;
486 alloc = round_page(alloc);
487 max = round_page(max);
488 /*
489 * we look for an allocation size with less than 1% waste
490 * up to 5 pages in size...
491 * otherwise, we look for an allocation size with least fragmentation
492 * in the range of 1 - 5 pages
493 * This size will be used unless
494 * the user suggestion is larger AND has less fragmentation
495 */
496 #if ZONE_ALIAS_ADDR
497 if ((size < PAGE_SIZE) && (PAGE_SIZE % size <= PAGE_SIZE / 10))
498 alloc = PAGE_SIZE;
499 else
500 #endif
501 { vm_size_t best, waste; unsigned int i;
502 best = PAGE_SIZE;
503 waste = best % size;
504
505 for (i = 1; i <= 5; i++) {
506 vm_size_t tsize, twaste;
507
508 tsize = i * PAGE_SIZE;
509
510 if ((tsize % size) < (tsize / 100)) {
511 alloc = tsize;
512 goto use_this_allocation;
513 }
514 twaste = tsize % size;
515 if (twaste < waste)
516 best = tsize, waste = twaste;
517 }
518 if (alloc <= best || (alloc % size >= waste))
519 alloc = best;
520 }
521 use_this_allocation:
522 if (max && (max < alloc))
523 max = alloc;
524
525 z->free_elements = 0;
526 z->cur_size = 0;
527 z->max_size = max;
528 z->elem_size = size;
529 z->alloc_size = alloc;
530 z->zone_name = name;
531 z->count = 0;
532 z->doing_alloc = FALSE;
533 z->doing_gc = FALSE;
534 z->exhaustible = FALSE;
535 z->collectable = TRUE;
536 z->allows_foreign = FALSE;
537 z->expandable = TRUE;
538 z->waiting = FALSE;
539 z->async_pending = FALSE;
540
541 #if ZONE_DEBUG
542 z->active_zones.next = z->active_zones.prev = NULL;
543 zone_debug_enable(z);
544 #endif /* ZONE_DEBUG */
545 lock_zone_init(z);
546
547 /*
548 * Add the zone to the all-zones list.
549 */
550
551 z->next_zone = ZONE_NULL;
552 thread_call_setup(&z->call_async_alloc, zalloc_async, z);
553 simple_lock(&all_zones_lock);
554 *last_zone = z;
555 last_zone = &z->next_zone;
556 num_zones++;
557 simple_unlock(&all_zones_lock);
558
559 /*
560 * Check if we should be logging this zone. If so, remember the zone pointer.
561 */
562
563 if (log_this_zone(z->zone_name, zone_name_to_log)) {
564 zone_of_interest = z;
565 }
566
567 /*
568 * If we want to log a zone, see if we need to allocate buffer space for the log. Some vm related zones are
569 * zinit'ed before we can do a kmem_alloc, so we have to defer allocation in that case. zlog_ready is set to
570 * TRUE once enough of the VM system is up and running to allow a kmem_alloc to work. If we want to log one
571 * of the VM related zones that's set up early on, we will skip allocation of the log until zinit is called again
572 * later on some other zone. So note we may be allocating a buffer to log a zone other than the one being initialized
573 * right now.
574 */
575
576 if (zone_of_interest != NULL && zrecords == NULL && zlog_ready) {
577 if (kmem_alloc(kernel_map, (vm_offset_t *)&zrecords, log_records * sizeof(struct zrecord)) == KERN_SUCCESS) {
578
579 /*
580 * We got the memory for the log. Zero it out since the code needs this to identify unused records.
581 * At this point, everything is set up and we're ready to start logging this zone.
582 */
583
584 bzero((void *)zrecords, log_records * sizeof(struct zrecord));
585 printf("zone: logging started for zone %s (%p)\n", zone_of_interest->zone_name, zone_of_interest);
586
587 } else {
588 printf("zone: couldn't allocate memory for zrecords, turning off zleak logging\n");
589 zone_of_interest = NULL;
590 }
591 }
592
593 return(z);
594 }
595
596 /*
597 * Cram the given memory into the specified zone.
598 */
599 void
600 zcram(
601 register zone_t zone,
602 void *newaddr,
603 vm_size_t size)
604 {
605 register vm_size_t elem_size;
606 vm_offset_t newmem = (vm_offset_t) newaddr;
607
608 /* Basic sanity checks */
609 assert(zone != ZONE_NULL && newmem != (vm_offset_t)0);
610 assert(!zone->collectable || zone->allows_foreign
611 || (from_zone_map(newmem, size)));
612
613 elem_size = zone->elem_size;
614
615 lock_zone(zone);
616 while (size >= elem_size) {
617 ADD_TO_ZONE(zone, newmem);
618 if (from_zone_map(newmem, elem_size))
619 zone_page_alloc(newmem, elem_size);
620 zone->count++; /* compensate for ADD_TO_ZONE */
621 size -= elem_size;
622 newmem += elem_size;
623 zone->cur_size += elem_size;
624 }
625 unlock_zone(zone);
626 }
627
628 /*
629 * Contiguous space allocator for non-paged zones. Allocates "size" amount
630 * of memory from zone_map.
631 */
632
633 kern_return_t
634 zget_space(
635 vm_offset_t size,
636 vm_offset_t *result)
637 {
638 vm_offset_t new_space = 0;
639 vm_size_t space_to_add = 0;
640
641 simple_lock(&zget_space_lock);
642 while ((zalloc_next_space + size) > zalloc_end_of_space) {
643 /*
644 * Add at least one page to allocation area.
645 */
646
647 space_to_add = round_page(size);
648
649 if (new_space == 0) {
650 kern_return_t retval;
651 /*
652 * Memory cannot be wired down while holding
653 * any locks that the pageout daemon might
654 * need to free up pages. [Making the zget_space
655 * lock a complex lock does not help in this
656 * regard.]
657 *
658 * Unlock and allocate memory. Because several
659 * threads might try to do this at once, don't
660 * use the memory before checking for available
661 * space again.
662 */
663
664 simple_unlock(&zget_space_lock);
665
666 retval = kernel_memory_allocate(zone_map, &new_space,
667 space_to_add, 0, KMA_KOBJECT|KMA_NOPAGEWAIT);
668 if (retval != KERN_SUCCESS)
669 return(retval);
670 #if ZONE_ALIAS_ADDR
671 if (space_to_add == PAGE_SIZE)
672 new_space = zone_alias_addr(new_space);
673 #endif
674 zone_page_init(new_space, space_to_add,
675 ZONE_PAGE_USED);
676 simple_lock(&zget_space_lock);
677 continue;
678 }
679
680
681 /*
682 * Memory was allocated in a previous iteration.
683 *
684 * Check whether the new region is contiguous
685 * with the old one.
686 */
687
688 if (new_space != zalloc_end_of_space) {
689 /*
690 * Throw away the remainder of the
691 * old space, and start a new one.
692 */
693 zalloc_wasted_space +=
694 zalloc_end_of_space - zalloc_next_space;
695 zalloc_next_space = new_space;
696 }
697
698 zalloc_end_of_space = new_space + space_to_add;
699
700 new_space = 0;
701 }
702 *result = zalloc_next_space;
703 zalloc_next_space += size;
704 simple_unlock(&zget_space_lock);
705
706 if (new_space != 0)
707 kmem_free(zone_map, new_space, space_to_add);
708
709 return(KERN_SUCCESS);
710 }
711
712
713 /*
714 * Steal memory for the zone package. Called from
715 * vm_page_bootstrap().
716 */
717 void
718 zone_steal_memory(void)
719 {
720 zdata_size = round_page(128*sizeof(struct zone));
721 zdata = (vm_offset_t)((char *)pmap_steal_memory(zdata_size) - (char *)0);
722 }
723
724
725 /*
726 * Fill a zone with enough memory to contain at least nelem elements.
727 * Memory is obtained with kmem_alloc_wired from the kernel_map.
728 * Return the number of elements actually put into the zone, which may
729 * be more than the caller asked for since the memory allocation is
730 * rounded up to a full page.
731 */
732 int
733 zfill(
734 zone_t zone,
735 int nelem)
736 {
737 kern_return_t kr;
738 vm_size_t size;
739 vm_offset_t memory;
740 int nalloc;
741
742 assert(nelem > 0);
743 if (nelem <= 0)
744 return 0;
745 size = nelem * zone->elem_size;
746 size = round_page(size);
747 kr = kmem_alloc_wired(kernel_map, &memory, size);
748 if (kr != KERN_SUCCESS)
749 return 0;
750
751 zone_change(zone, Z_FOREIGN, TRUE);
752 zcram(zone, (void *)memory, size);
753 nalloc = size / zone->elem_size;
754 assert(nalloc >= nelem);
755
756 return nalloc;
757 }
758
759 /*
760 * Initialize the "zone of zones" which uses fixed memory allocated
761 * earlier in memory initialization. zone_bootstrap is called
762 * before zone_init.
763 */
764 void
765 zone_bootstrap(void)
766 {
767 vm_size_t zone_zone_size;
768 vm_offset_t zone_zone_space;
769 char temp_buf[16];
770
771 /* see if we want freed zone element checking and/or poisoning */
772 if (PE_parse_boot_argn("-zc", temp_buf, sizeof (temp_buf))) {
773 check_freed_element = TRUE;
774 }
775
776 if (PE_parse_boot_argn("-zp", temp_buf, sizeof (temp_buf))) {
777 zfree_clear = TRUE;
778 }
779
780 /*
781 * Check for and set up zone leak detection if requested via boot-args. We recognized two
782 * boot-args:
783 *
784 * zlog=<zone_to_log>
785 * zrecs=<num_records_in_log>
786 *
787 * The zlog arg is used to specify the zone name that should be logged, and zrecs is used to
788 * control the size of the log. If zrecs is not specified, a default value is used.
789 */
790
791 if (PE_parse_boot_argn("zlog", zone_name_to_log, sizeof(zone_name_to_log)) == TRUE) {
792 if (PE_parse_boot_argn("zrecs", &log_records, sizeof(log_records)) == TRUE) {
793
794 /*
795 * Don't allow more than ZRECORDS_MAX records even if the user asked for more.
796 * This prevents accidentally hogging too much kernel memory and making the system
797 * unusable.
798 */
799
800 log_records = MIN(ZRECORDS_MAX, log_records);
801
802 } else {
803 log_records = ZRECORDS_DEFAULT;
804 }
805 }
806
807 simple_lock_init(&all_zones_lock, 0);
808
809 first_zone = ZONE_NULL;
810 last_zone = &first_zone;
811 num_zones = 0;
812
813 simple_lock_init(&zget_space_lock, 0);
814 zalloc_next_space = zdata;
815 zalloc_end_of_space = zdata + zdata_size;
816 zalloc_wasted_space = 0;
817
818 /* assertion: nobody else called zinit before us */
819 assert(zone_zone == ZONE_NULL);
820 zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone),
821 sizeof(struct zone), "zones");
822 zone_change(zone_zone, Z_COLLECT, FALSE);
823 zone_zone_size = zalloc_end_of_space - zalloc_next_space;
824 zget_space(zone_zone_size, &zone_zone_space);
825 zcram(zone_zone, (void *)zone_zone_space, zone_zone_size);
826 }
827
828 void
829 zone_init(
830 vm_size_t max_zonemap_size)
831 {
832 kern_return_t retval;
833 vm_offset_t zone_min;
834 vm_offset_t zone_max;
835 vm_size_t zone_table_size;
836
837 retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
838 FALSE, VM_FLAGS_ANYWHERE, &zone_map);
839
840 if (retval != KERN_SUCCESS)
841 panic("zone_init: kmem_suballoc failed");
842 zone_max = zone_min + round_page(max_zonemap_size);
843 /*
844 * Setup garbage collection information:
845 */
846 zone_table_size = atop_32(zone_max - zone_min) *
847 sizeof(struct zone_page_table_entry);
848 if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table,
849 zone_table_size) != KERN_SUCCESS)
850 panic("zone_init");
851 zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size);
852 zone_pages = atop_32(zone_max - zone_min);
853 zone_map_min_address = zone_min;
854 zone_map_max_address = zone_max;
855 mutex_init(&zone_gc_lock, 0);
856 zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED);
857 }
858
859
860 /*
861 * zalloc returns an element from the specified zone.
862 */
863 void *
864 zalloc_canblock(
865 register zone_t zone,
866 boolean_t canblock)
867 {
868 vm_offset_t addr;
869 kern_return_t retval;
870 void *bt[MAX_DEPTH]; /* only used if zone logging is enabled */
871 int numsaved = 0;
872 int i;
873
874 assert(zone != ZONE_NULL);
875
876 /*
877 * If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
878 */
879
880 if (DO_LOGGING(zone))
881 numsaved = OSBacktrace(&bt[0], MAX_DEPTH);
882
883 lock_zone(zone);
884
885 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
886
887 while ((addr == 0) && canblock && (zone->doing_gc)) {
888 zone->waiting = TRUE;
889 zone_sleep(zone);
890 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
891 }
892
893 while ((addr == 0) && canblock) {
894 /*
895 * If nothing was there, try to get more
896 */
897 if (zone->doing_alloc) {
898 /*
899 * Someone is allocating memory for this zone.
900 * Wait for it to show up, then try again.
901 */
902 zone->waiting = TRUE;
903 zone_sleep(zone);
904 }
905 else {
906 if ((zone->cur_size + zone->elem_size) >
907 zone->max_size) {
908 if (zone->exhaustible)
909 break;
910 if (zone->expandable) {
911 /*
912 * We're willing to overflow certain
913 * zones, but not without complaining.
914 *
915 * This is best used in conjunction
916 * with the collectable flag. What we
917 * want is an assurance we can get the
918 * memory back, assuming there's no
919 * leak.
920 */
921 zone->max_size += (zone->max_size >> 1);
922 } else {
923 unlock_zone(zone);
924
925 panic("zalloc: zone \"%s\" empty.", zone->zone_name);
926 }
927 }
928 zone->doing_alloc = TRUE;
929 unlock_zone(zone);
930
931 if (zone->collectable) {
932 vm_offset_t space;
933 vm_size_t alloc_size;
934 int retry = 0;
935
936 for (;;) {
937
938 if (vm_pool_low() || retry >= 1)
939 alloc_size =
940 round_page(zone->elem_size);
941 else
942 alloc_size = zone->alloc_size;
943
944 retval = kernel_memory_allocate(zone_map,
945 &space, alloc_size, 0,
946 KMA_KOBJECT|KMA_NOPAGEWAIT);
947 if (retval == KERN_SUCCESS) {
948 #if ZONE_ALIAS_ADDR
949 if (alloc_size == PAGE_SIZE)
950 space = zone_alias_addr(space);
951 #endif
952 zone_page_init(space, alloc_size,
953 ZONE_PAGE_USED);
954 zcram(zone, (void *)space, alloc_size);
955
956 break;
957 } else if (retval != KERN_RESOURCE_SHORTAGE) {
958 retry++;
959
960 if (retry == 2) {
961 zone_gc();
962 printf("zalloc did gc\n");
963 }
964 if (retry == 3) {
965 panic_include_zprint = TRUE;
966 panic("zalloc: \"%s\" (%d elements) retry fail %d", zone->zone_name, zone->count, retval);
967 }
968 } else {
969 break;
970 }
971 }
972 lock_zone(zone);
973 zone->doing_alloc = FALSE;
974 if (zone->waiting) {
975 zone->waiting = FALSE;
976 zone_wakeup(zone);
977 }
978 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
979 if (addr == 0 &&
980 retval == KERN_RESOURCE_SHORTAGE) {
981 unlock_zone(zone);
982
983 VM_PAGE_WAIT();
984 lock_zone(zone);
985 }
986 } else {
987 vm_offset_t space;
988 retval = zget_space(zone->elem_size, &space);
989
990 lock_zone(zone);
991 zone->doing_alloc = FALSE;
992 if (zone->waiting) {
993 zone->waiting = FALSE;
994 thread_wakeup((event_t)zone);
995 }
996 if (retval == KERN_SUCCESS) {
997 zone->count++;
998 zone->cur_size += zone->elem_size;
999 #if ZONE_DEBUG
1000 if (zone_debug_enabled(zone)) {
1001 enqueue_tail(&zone->active_zones, (queue_entry_t)space);
1002 }
1003 #endif
1004 unlock_zone(zone);
1005 zone_page_alloc(space, zone->elem_size);
1006 #if ZONE_DEBUG
1007 if (zone_debug_enabled(zone))
1008 space += ZONE_DEBUG_OFFSET;
1009 #endif
1010 addr = space;
1011 goto success;
1012 }
1013 if (retval == KERN_RESOURCE_SHORTAGE) {
1014 unlock_zone(zone);
1015
1016 VM_PAGE_WAIT();
1017 lock_zone(zone);
1018 } else {
1019 panic("zalloc: \"%s\" (%d elements) zget_space returned %d", zone->zone_name, zone->count, retval);
1020 }
1021 }
1022 }
1023 if (addr == 0)
1024 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
1025 }
1026
1027 /*
1028 * See if we should be logging allocations in this zone. Logging is rarely done except when a leak is
1029 * suspected, so this code rarely executes. We need to do this code while still holding the zone lock
1030 * since it protects the various log related data structures.
1031 */
1032
1033 if (DO_LOGGING(zone) && addr) {
1034
1035 /*
1036 * Look for a place to record this new allocation. We implement two different logging strategies
1037 * depending on whether we're looking for the source of a zone leak or a zone corruption. When looking
1038 * for a leak, we want to log as many allocations as possible in order to clearly identify the leaker
1039 * among all the records. So we look for an unused slot in the log and fill that in before overwriting
1040 * an old entry. When looking for a corrution however, it's better to have a chronological log of all
1041 * the allocations and frees done in the zone so that the history of operations for a specific zone
1042 * element can be inspected. So in this case, we treat the log as a circular buffer and overwrite the
1043 * oldest entry whenever a new one needs to be added.
1044 *
1045 * The check_freed_element flag tells us what style of logging to do. It's set if we're supposed to be
1046 * doing corruption style logging (indicated via -zc in the boot-args).
1047 */
1048
1049 if (!check_freed_element && zrecords[zcurrent].z_element && zrecorded < log_records) {
1050
1051 /*
1052 * If we get here, we're doing leak style logging and there's still some unused entries in
1053 * the log (since zrecorded is smaller than the size of the log). Look for an unused slot
1054 * starting at zcurrent and wrap-around if we reach the end of the buffer. If the buffer
1055 * is already full, we just fall through and overwrite the element indexed by zcurrent.
1056 */
1057
1058 for (i = zcurrent; i < log_records; i++) {
1059 if (zrecords[i].z_element == NULL) {
1060 zcurrent = i;
1061 goto empty_slot;
1062 }
1063 }
1064
1065 for (i = 0; i < zcurrent; i++) {
1066 if (zrecords[i].z_element == NULL) {
1067 zcurrent = i;
1068 goto empty_slot;
1069 }
1070 }
1071 }
1072
1073 /*
1074 * Save a record of this allocation
1075 */
1076
1077 empty_slot:
1078 if (zrecords[zcurrent].z_element == NULL)
1079 zrecorded++;
1080
1081 zrecords[zcurrent].z_element = (void *)addr;
1082 zrecords[zcurrent].z_time = ztime++;
1083 zrecords[zcurrent].z_opcode = ZOP_ALLOC;
1084
1085 for (i = 0; i < numsaved; i++)
1086 zrecords[zcurrent].z_pc[i] = bt[i];
1087
1088 for (; i < MAX_DEPTH; i++)
1089 zrecords[zcurrent].z_pc[i] = 0;
1090
1091 zcurrent++;
1092
1093 if (zcurrent >= log_records)
1094 zcurrent = 0;
1095 }
1096
1097 if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) {
1098 zone->async_pending = TRUE;
1099 unlock_zone(zone);
1100 thread_call_enter(&zone->call_async_alloc);
1101 lock_zone(zone);
1102 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
1103 }
1104
1105 #if ZONE_DEBUG
1106 if (addr && zone_debug_enabled(zone)) {
1107 enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
1108 addr += ZONE_DEBUG_OFFSET;
1109 }
1110 #endif
1111
1112 unlock_zone(zone);
1113
1114 success:
1115 TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr);
1116
1117 return((void *)addr);
1118 }
1119
1120
1121 void *
1122 zalloc(
1123 register zone_t zone)
1124 {
1125 return( zalloc_canblock(zone, TRUE) );
1126 }
1127
1128 void *
1129 zalloc_noblock(
1130 register zone_t zone)
1131 {
1132 return( zalloc_canblock(zone, FALSE) );
1133 }
1134
1135 void
1136 zalloc_async(
1137 thread_call_param_t p0,
1138 __unused thread_call_param_t p1)
1139 {
1140 void *elt;
1141
1142 elt = zalloc_canblock((zone_t)p0, TRUE);
1143 zfree((zone_t)p0, elt);
1144 lock_zone(((zone_t)p0));
1145 ((zone_t)p0)->async_pending = FALSE;
1146 unlock_zone(((zone_t)p0));
1147 }
1148
1149
1150 /*
1151 * zget returns an element from the specified zone
1152 * and immediately returns nothing if there is nothing there.
1153 *
1154 * This form should be used when you can not block (like when
1155 * processing an interrupt).
1156 */
1157 void *
1158 zget(
1159 register zone_t zone)
1160 {
1161 register vm_offset_t addr;
1162
1163 assert( zone != ZONE_NULL );
1164
1165 if (!lock_try_zone(zone))
1166 return NULL;
1167
1168 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
1169 #if ZONE_DEBUG
1170 if (addr && zone_debug_enabled(zone)) {
1171 enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
1172 addr += ZONE_DEBUG_OFFSET;
1173 }
1174 #endif /* ZONE_DEBUG */
1175 unlock_zone(zone);
1176
1177 return((void *) addr);
1178 }
1179
1180 /* Keep this FALSE by default. Large memory machine run orders of magnitude
1181 slower in debug mode when true. Use debugger to enable if needed */
1182 /* static */ boolean_t zone_check = FALSE;
1183
1184 static zone_t zone_last_bogus_zone = ZONE_NULL;
1185 static vm_offset_t zone_last_bogus_elem = 0;
1186
1187 void
1188 zfree(
1189 register zone_t zone,
1190 void *addr)
1191 {
1192 vm_offset_t elem = (vm_offset_t) addr;
1193 void *bt[MAX_DEPTH]; /* only used if zone logging is enable via boot-args */
1194 int numsaved = 0;
1195
1196 assert(zone != ZONE_NULL);
1197
1198 /*
1199 * If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
1200 */
1201
1202 if (DO_LOGGING(zone))
1203 numsaved = OSBacktrace(&bt[0], MAX_DEPTH);
1204
1205 #if MACH_ASSERT
1206 /* Basic sanity checks */
1207 if (zone == ZONE_NULL || elem == (vm_offset_t)0)
1208 panic("zfree: NULL");
1209 /* zone_gc assumes zones are never freed */
1210 if (zone == zone_zone)
1211 panic("zfree: freeing to zone_zone breaks zone_gc!");
1212 #endif
1213
1214 TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, zone->elem_size, (int)addr);
1215
1216 if (zone->collectable && !zone->allows_foreign &&
1217 !from_zone_map(elem, zone->elem_size)) {
1218 #if MACH_ASSERT
1219 panic("zfree: non-allocated memory in collectable zone!");
1220 #endif
1221 zone_last_bogus_zone = zone;
1222 zone_last_bogus_elem = elem;
1223 return;
1224 }
1225
1226 lock_zone(zone);
1227
1228 /*
1229 * See if we're doing logging on this zone. There are two styles of logging used depending on
1230 * whether we're trying to catch a leak or corruption. See comments above in zalloc for details.
1231 */
1232
1233 if (DO_LOGGING(zone)) {
1234 int i;
1235
1236 if (check_freed_element) {
1237
1238 /*
1239 * We're logging to catch a corruption. Add a record of this zfree operation
1240 * to log.
1241 */
1242
1243 if (zrecords[zcurrent].z_element == NULL)
1244 zrecorded++;
1245
1246 zrecords[zcurrent].z_element = (void *)addr;
1247 zrecords[zcurrent].z_time = ztime++;
1248 zrecords[zcurrent].z_opcode = ZOP_FREE;
1249
1250 for (i = 0; i < numsaved; i++)
1251 zrecords[zcurrent].z_pc[i] = bt[i];
1252
1253 for (; i < MAX_DEPTH; i++)
1254 zrecords[zcurrent].z_pc[i] = 0;
1255
1256 zcurrent++;
1257
1258 if (zcurrent >= log_records)
1259 zcurrent = 0;
1260
1261 } else {
1262
1263 /*
1264 * We're logging to catch a leak. Remove any record we might have for this
1265 * element since it's being freed. Note that we may not find it if the buffer
1266 * overflowed and that's OK. Since the log is of a limited size, old records
1267 * get overwritten if there are more zallocs than zfrees.
1268 */
1269
1270 for (i = 0; i < log_records; i++) {
1271 if (zrecords[i].z_element == addr) {
1272 zrecords[i].z_element = NULL;
1273 zcurrent = i;
1274 zrecorded--;
1275 break;
1276 }
1277 }
1278 }
1279 }
1280
1281
1282 #if ZONE_DEBUG
1283 if (zone_debug_enabled(zone)) {
1284 queue_t tmp_elem;
1285
1286 elem -= ZONE_DEBUG_OFFSET;
1287 if (zone_check) {
1288 /* check the zone's consistency */
1289
1290 for (tmp_elem = queue_first(&zone->active_zones);
1291 !queue_end(tmp_elem, &zone->active_zones);
1292 tmp_elem = queue_next(tmp_elem))
1293 if (elem == (vm_offset_t)tmp_elem)
1294 break;
1295 if (elem != (vm_offset_t)tmp_elem)
1296 panic("zfree()ing element from wrong zone");
1297 }
1298 remqueue(&zone->active_zones, (queue_t) elem);
1299 }
1300 #endif /* ZONE_DEBUG */
1301 if (zone_check) {
1302 vm_offset_t this;
1303
1304 /* check the zone's consistency */
1305
1306 for (this = zone->free_elements;
1307 this != 0;
1308 this = * (vm_offset_t *) this)
1309 if (!pmap_kernel_va(this) || this == elem)
1310 panic("zfree");
1311 }
1312 ADD_TO_ZONE(zone, elem);
1313
1314 /*
1315 * If elements have one or more pages, and memory is low,
1316 * request to run the garbage collection in the zone the next
1317 * time the pageout thread runs.
1318 */
1319 if (zone->elem_size >= PAGE_SIZE &&
1320 vm_pool_low()){
1321 zone_gc_forced = TRUE;
1322 }
1323 unlock_zone(zone);
1324 }
1325
1326
1327 /* Change a zone's flags.
1328 * This routine must be called immediately after zinit.
1329 */
1330 void
1331 zone_change(
1332 zone_t zone,
1333 unsigned int item,
1334 boolean_t value)
1335 {
1336 assert( zone != ZONE_NULL );
1337 assert( value == TRUE || value == FALSE );
1338
1339 switch(item){
1340 case Z_EXHAUST:
1341 zone->exhaustible = value;
1342 break;
1343 case Z_COLLECT:
1344 zone->collectable = value;
1345 break;
1346 case Z_EXPAND:
1347 zone->expandable = value;
1348 break;
1349 case Z_FOREIGN:
1350 zone->allows_foreign = value;
1351 break;
1352 #if MACH_ASSERT
1353 default:
1354 panic("Zone_change: Wrong Item Type!");
1355 /* break; */
1356 #endif
1357 }
1358 }
1359
1360 /*
1361 * Return the expected number of free elements in the zone.
1362 * This calculation will be incorrect if items are zfree'd that
1363 * were never zalloc'd/zget'd. The correct way to stuff memory
1364 * into a zone is by zcram.
1365 */
1366
1367 integer_t
1368 zone_free_count(zone_t zone)
1369 {
1370 integer_t free_count;
1371
1372 lock_zone(zone);
1373 free_count = zone->cur_size/zone->elem_size - zone->count;
1374 unlock_zone(zone);
1375
1376 assert(free_count >= 0);
1377
1378 return(free_count);
1379 }
1380
1381 /*
1382 * zprealloc preallocates wired memory, exanding the specified
1383 * zone to the specified size
1384 */
1385 void
1386 zprealloc(
1387 zone_t zone,
1388 vm_size_t size)
1389 {
1390 vm_offset_t addr;
1391
1392 if (size != 0) {
1393 if (kmem_alloc_wired(zone_map, &addr, size) != KERN_SUCCESS)
1394 panic("zprealloc");
1395 zone_page_init(addr, size, ZONE_PAGE_USED);
1396 zcram(zone, (void *)addr, size);
1397 }
1398 }
1399
1400 /*
1401 * Zone garbage collection subroutines
1402 */
1403
1404 boolean_t
1405 zone_page_collectable(
1406 vm_offset_t addr,
1407 vm_size_t size)
1408 {
1409 struct zone_page_table_entry *zp;
1410 natural_t i, j;
1411
1412 #if ZONE_ALIAS_ADDR
1413 addr = zone_virtual_addr(addr);
1414 #endif
1415 #if MACH_ASSERT
1416 if (!from_zone_map(addr, size))
1417 panic("zone_page_collectable");
1418 #endif
1419
1420 i = atop_32(addr-zone_map_min_address);
1421 j = atop_32((addr+size-1) - zone_map_min_address);
1422
1423 for (zp = zone_page_table + i; i <= j; zp++, i++)
1424 if (zp->collect_count == zp->alloc_count)
1425 return (TRUE);
1426
1427 return (FALSE);
1428 }
1429
1430 void
1431 zone_page_keep(
1432 vm_offset_t addr,
1433 vm_size_t size)
1434 {
1435 struct zone_page_table_entry *zp;
1436 natural_t i, j;
1437
1438 #if ZONE_ALIAS_ADDR
1439 addr = zone_virtual_addr(addr);
1440 #endif
1441 #if MACH_ASSERT
1442 if (!from_zone_map(addr, size))
1443 panic("zone_page_keep");
1444 #endif
1445
1446 i = atop_32(addr-zone_map_min_address);
1447 j = atop_32((addr+size-1) - zone_map_min_address);
1448
1449 for (zp = zone_page_table + i; i <= j; zp++, i++)
1450 zp->collect_count = 0;
1451 }
1452
1453 void
1454 zone_page_collect(
1455 vm_offset_t addr,
1456 vm_size_t size)
1457 {
1458 struct zone_page_table_entry *zp;
1459 natural_t i, j;
1460
1461 #if ZONE_ALIAS_ADDR
1462 addr = zone_virtual_addr(addr);
1463 #endif
1464 #if MACH_ASSERT
1465 if (!from_zone_map(addr, size))
1466 panic("zone_page_collect");
1467 #endif
1468
1469 i = atop_32(addr-zone_map_min_address);
1470 j = atop_32((addr+size-1) - zone_map_min_address);
1471
1472 for (zp = zone_page_table + i; i <= j; zp++, i++)
1473 ++zp->collect_count;
1474 }
1475
1476 void
1477 zone_page_init(
1478 vm_offset_t addr,
1479 vm_size_t size,
1480 int value)
1481 {
1482 struct zone_page_table_entry *zp;
1483 natural_t i, j;
1484
1485 #if ZONE_ALIAS_ADDR
1486 addr = zone_virtual_addr(addr);
1487 #endif
1488 #if MACH_ASSERT
1489 if (!from_zone_map(addr, size))
1490 panic("zone_page_init");
1491 #endif
1492
1493 i = atop_32(addr-zone_map_min_address);
1494 j = atop_32((addr+size-1) - zone_map_min_address);
1495
1496 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1497 zp->alloc_count = value;
1498 zp->collect_count = 0;
1499 }
1500 }
1501
1502 void
1503 zone_page_alloc(
1504 vm_offset_t addr,
1505 vm_size_t size)
1506 {
1507 struct zone_page_table_entry *zp;
1508 natural_t i, j;
1509
1510 #if ZONE_ALIAS_ADDR
1511 addr = zone_virtual_addr(addr);
1512 #endif
1513 #if MACH_ASSERT
1514 if (!from_zone_map(addr, size))
1515 panic("zone_page_alloc");
1516 #endif
1517
1518 i = atop_32(addr-zone_map_min_address);
1519 j = atop_32((addr+size-1) - zone_map_min_address);
1520
1521 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1522 /*
1523 * Set alloc_count to (ZONE_PAGE_USED + 1) if
1524 * it was previously set to ZONE_PAGE_UNUSED.
1525 */
1526 if (zp->alloc_count == ZONE_PAGE_UNUSED)
1527 zp->alloc_count = 1;
1528 else
1529 ++zp->alloc_count;
1530 }
1531 }
1532
1533 void
1534 zone_page_free_element(
1535 struct zone_page_table_entry **free_pages,
1536 vm_offset_t addr,
1537 vm_size_t size)
1538 {
1539 struct zone_page_table_entry *zp;
1540 natural_t i, j;
1541
1542 #if ZONE_ALIAS_ADDR
1543 addr = zone_virtual_addr(addr);
1544 #endif
1545 #if MACH_ASSERT
1546 if (!from_zone_map(addr, size))
1547 panic("zone_page_free_element");
1548 #endif
1549
1550 i = atop_32(addr-zone_map_min_address);
1551 j = atop_32((addr+size-1) - zone_map_min_address);
1552
1553 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1554 if (zp->collect_count > 0)
1555 --zp->collect_count;
1556 if (--zp->alloc_count == 0) {
1557 zp->alloc_count = ZONE_PAGE_UNUSED;
1558 zp->collect_count = 0;
1559
1560 zp->link = *free_pages;
1561 *free_pages = zp;
1562 }
1563 }
1564 }
1565
1566
1567 /* This is used for walking through a zone's free element list.
1568 */
1569 struct zone_free_element {
1570 struct zone_free_element * next;
1571 };
1572
1573 /*
1574 * Add a linked list of pages starting at base back into the zone
1575 * free list. Tail points to the last element on the list.
1576 */
1577
1578 #define ADD_LIST_TO_ZONE(zone, base, tail) \
1579 MACRO_BEGIN \
1580 (tail)->next = (void *)((zone)->free_elements); \
1581 if (check_freed_element) { \
1582 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
1583 ((vm_offset_t *)(tail))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
1584 (zone)->free_elements; \
1585 } \
1586 (zone)->free_elements = (unsigned long)(base); \
1587 MACRO_END
1588
1589 /*
1590 * Add an element to the chain pointed to by prev.
1591 */
1592
1593 #define ADD_ELEMENT(zone, prev, elem) \
1594 MACRO_BEGIN \
1595 (prev)->next = (elem); \
1596 if (check_freed_element) { \
1597 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
1598 ((vm_offset_t *)(prev))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
1599 (vm_offset_t)(elem); \
1600 } \
1601 MACRO_END
1602
1603 struct {
1604 uint32_t pgs_freed;
1605
1606 uint32_t elems_collected,
1607 elems_freed,
1608 elems_kept;
1609 } zgc_stats;
1610
1611 /* Zone garbage collection
1612 *
1613 * zone_gc will walk through all the free elements in all the
1614 * zones that are marked collectable looking for reclaimable
1615 * pages. zone_gc is called by consider_zone_gc when the system
1616 * begins to run out of memory.
1617 */
1618 void
1619 zone_gc(void)
1620 {
1621 unsigned int max_zones;
1622 zone_t z;
1623 unsigned int i;
1624 struct zone_page_table_entry *zp, *zone_free_pages;
1625
1626 mutex_lock(&zone_gc_lock);
1627
1628 simple_lock(&all_zones_lock);
1629 max_zones = num_zones;
1630 z = first_zone;
1631 simple_unlock(&all_zones_lock);
1632
1633 #if MACH_ASSERT
1634 for (i = 0; i < zone_pages; i++)
1635 assert(zone_page_table[i].collect_count == 0);
1636 #endif /* MACH_ASSERT */
1637
1638 zone_free_pages = NULL;
1639
1640 for (i = 0; i < max_zones; i++, z = z->next_zone) {
1641 unsigned int n, m;
1642 vm_size_t elt_size, size_freed;
1643 struct zone_free_element *elt, *base_elt, *base_prev, *prev, *scan, *keep, *tail;
1644
1645 assert(z != ZONE_NULL);
1646
1647 if (!z->collectable)
1648 continue;
1649
1650 lock_zone(z);
1651
1652 elt_size = z->elem_size;
1653
1654 /*
1655 * Do a quick feasability check before we scan the zone:
1656 * skip unless there is likelihood of getting pages back
1657 * (i.e we need a whole allocation block's worth of free
1658 * elements before we can garbage collect) and
1659 * the zone has more than 10 percent of it's elements free
1660 * or the element size is a multiple of the PAGE_SIZE
1661 */
1662 if ((elt_size & PAGE_MASK) &&
1663 (((z->cur_size - z->count * elt_size) <= (2 * z->alloc_size)) ||
1664 ((z->cur_size - z->count * elt_size) <= (z->cur_size / 10)))) {
1665 unlock_zone(z);
1666 continue;
1667 }
1668
1669 z->doing_gc = TRUE;
1670
1671 /*
1672 * Snatch all of the free elements away from the zone.
1673 */
1674
1675 scan = (void *)z->free_elements;
1676 z->free_elements = 0;
1677
1678 unlock_zone(z);
1679
1680 /*
1681 * Pass 1:
1682 *
1683 * Determine which elements we can attempt to collect
1684 * and count them up in the page table. Foreign elements
1685 * are returned to the zone.
1686 */
1687
1688 prev = (void *)&scan;
1689 elt = scan;
1690 n = 0; tail = keep = NULL;
1691 while (elt != NULL) {
1692 if (from_zone_map(elt, elt_size)) {
1693 zone_page_collect((vm_offset_t)elt, elt_size);
1694
1695 prev = elt;
1696 elt = elt->next;
1697
1698 ++zgc_stats.elems_collected;
1699 }
1700 else {
1701 if (keep == NULL)
1702 keep = tail = elt;
1703 else {
1704 ADD_ELEMENT(z, tail, elt);
1705 tail = elt;
1706 }
1707
1708 ADD_ELEMENT(z, prev, elt->next);
1709 elt = elt->next;
1710 ADD_ELEMENT(z, tail, NULL);
1711 }
1712
1713 /*
1714 * Dribble back the elements we are keeping.
1715 */
1716
1717 if (++n >= 50) {
1718 if (z->waiting == TRUE) {
1719 lock_zone(z);
1720
1721 if (keep != NULL) {
1722 ADD_LIST_TO_ZONE(z, keep, tail);
1723 tail = keep = NULL;
1724 } else {
1725 m =0;
1726 base_elt = elt;
1727 base_prev = prev;
1728 while ((elt != NULL) && (++m < 50)) {
1729 prev = elt;
1730 elt = elt->next;
1731 }
1732 if (m !=0 ) {
1733 ADD_LIST_TO_ZONE(z, base_elt, prev);
1734 ADD_ELEMENT(z, base_prev, elt);
1735 prev = base_prev;
1736 }
1737 }
1738
1739 if (z->waiting) {
1740 z->waiting = FALSE;
1741 zone_wakeup(z);
1742 }
1743
1744 unlock_zone(z);
1745 }
1746 n =0;
1747 }
1748 }
1749
1750 /*
1751 * Return any remaining elements.
1752 */
1753
1754 if (keep != NULL) {
1755 lock_zone(z);
1756
1757 ADD_LIST_TO_ZONE(z, keep, tail);
1758
1759 unlock_zone(z);
1760 }
1761
1762 /*
1763 * Pass 2:
1764 *
1765 * Determine which pages we can reclaim and
1766 * free those elements.
1767 */
1768
1769 size_freed = 0;
1770 elt = scan;
1771 n = 0; tail = keep = NULL;
1772 while (elt != NULL) {
1773 if (zone_page_collectable((vm_offset_t)elt, elt_size)) {
1774 size_freed += elt_size;
1775 zone_page_free_element(&zone_free_pages,
1776 (vm_offset_t)elt, elt_size);
1777
1778 elt = elt->next;
1779
1780 ++zgc_stats.elems_freed;
1781 }
1782 else {
1783 zone_page_keep((vm_offset_t)elt, elt_size);
1784
1785 if (keep == NULL)
1786 keep = tail = elt;
1787 else {
1788 ADD_ELEMENT(z, tail, elt);
1789 tail = elt;
1790 }
1791
1792 elt = elt->next;
1793 ADD_ELEMENT(z, tail, NULL);
1794
1795 ++zgc_stats.elems_kept;
1796 }
1797
1798 /*
1799 * Dribble back the elements we are keeping,
1800 * and update the zone size info.
1801 */
1802
1803 if (++n >= 50) {
1804 lock_zone(z);
1805
1806 z->cur_size -= size_freed;
1807 size_freed = 0;
1808
1809 if (keep != NULL) {
1810 ADD_LIST_TO_ZONE(z, keep, tail);
1811 }
1812
1813 if (z->waiting) {
1814 z->waiting = FALSE;
1815 zone_wakeup(z);
1816 }
1817
1818 unlock_zone(z);
1819
1820 n = 0; tail = keep = NULL;
1821 }
1822 }
1823
1824 /*
1825 * Return any remaining elements, and update
1826 * the zone size info.
1827 */
1828
1829 lock_zone(z);
1830
1831 if (size_freed > 0 || keep != NULL) {
1832
1833 z->cur_size -= size_freed;
1834
1835 if (keep != NULL) {
1836 ADD_LIST_TO_ZONE(z, keep, tail);
1837 }
1838
1839 }
1840
1841 z->doing_gc = FALSE;
1842 if (z->waiting) {
1843 z->waiting = FALSE;
1844 zone_wakeup(z);
1845 }
1846 unlock_zone(z);
1847 }
1848
1849 /*
1850 * Reclaim the pages we are freeing.
1851 */
1852
1853 while ((zp = zone_free_pages) != NULL) {
1854 zone_free_pages = zp->link;
1855 #if ZONE_ALIAS_ADDR
1856 z = zone_virtual_addr((vm_map_address_t)z);
1857 #endif
1858 kmem_free(zone_map, zone_map_min_address + PAGE_SIZE *
1859 (zp - zone_page_table), PAGE_SIZE);
1860 ++zgc_stats.pgs_freed;
1861 }
1862
1863 mutex_unlock(&zone_gc_lock);
1864 }
1865
1866 /*
1867 * consider_zone_gc:
1868 *
1869 * Called by the pageout daemon when the system needs more free pages.
1870 */
1871
1872 void
1873 consider_zone_gc(void)
1874 {
1875 /*
1876 * By default, don't attempt zone GC more frequently
1877 * than once / 1 minutes.
1878 */
1879
1880 if (zone_gc_max_rate == 0)
1881 zone_gc_max_rate = (60 << SCHED_TICK_SHIFT) + 1;
1882
1883 if (zone_gc_allowed &&
1884 ((sched_tick > (zone_gc_last_tick + zone_gc_max_rate)) ||
1885 zone_gc_forced)) {
1886 zone_gc_forced = FALSE;
1887 zone_gc_last_tick = sched_tick;
1888 zone_gc();
1889 }
1890 }
1891
1892 struct fake_zone_info {
1893 const char* name;
1894 void (*func)(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *,
1895 int *, int *);
1896 };
1897
1898 static struct fake_zone_info fake_zones[] = {
1899 {
1900 .name = "kernel_stacks",
1901 .func = stack_fake_zone_info,
1902 },
1903 #ifdef ppc
1904 {
1905 .name = "save_areas",
1906 .func = save_fake_zone_info,
1907 },
1908 {
1909 .name = "pmap_mappings",
1910 .func = mapping_fake_zone_info,
1911 },
1912 #endif /* ppc */
1913 #ifdef i386
1914 {
1915 .name = "page_tables",
1916 .func = pt_fake_zone_info,
1917 },
1918 #endif /* i386 */
1919 {
1920 .name = "kalloc.large",
1921 .func = kalloc_fake_zone_info,
1922 },
1923 };
1924
1925 kern_return_t
1926 host_zone_info(
1927 host_t host,
1928 zone_name_array_t *namesp,
1929 mach_msg_type_number_t *namesCntp,
1930 zone_info_array_t *infop,
1931 mach_msg_type_number_t *infoCntp)
1932 {
1933 zone_name_t *names;
1934 vm_offset_t names_addr;
1935 vm_size_t names_size;
1936 zone_info_t *info;
1937 vm_offset_t info_addr;
1938 vm_size_t info_size;
1939 unsigned int max_zones, i;
1940 zone_t z;
1941 zone_name_t *zn;
1942 zone_info_t *zi;
1943 kern_return_t kr;
1944 size_t num_fake_zones;
1945
1946 if (host == HOST_NULL)
1947 return KERN_INVALID_HOST;
1948
1949 num_fake_zones = sizeof fake_zones / sizeof fake_zones[0];
1950
1951 /*
1952 * We assume that zones aren't freed once allocated.
1953 * We won't pick up any zones that are allocated later.
1954 */
1955
1956 simple_lock(&all_zones_lock);
1957 max_zones = num_zones + num_fake_zones;
1958 z = first_zone;
1959 simple_unlock(&all_zones_lock);
1960
1961 if (max_zones <= *namesCntp) {
1962 /* use in-line memory */
1963 names_size = *namesCntp * sizeof *names;
1964 names = *namesp;
1965 } else {
1966 names_size = round_page(max_zones * sizeof *names);
1967 kr = kmem_alloc_pageable(ipc_kernel_map,
1968 &names_addr, names_size);
1969 if (kr != KERN_SUCCESS)
1970 return kr;
1971 names = (zone_name_t *) names_addr;
1972 }
1973
1974 if (max_zones <= *infoCntp) {
1975 /* use in-line memory */
1976 info_size = *infoCntp * sizeof *info;
1977 info = *infop;
1978 } else {
1979 info_size = round_page(max_zones * sizeof *info);
1980 kr = kmem_alloc_pageable(ipc_kernel_map,
1981 &info_addr, info_size);
1982 if (kr != KERN_SUCCESS) {
1983 if (names != *namesp)
1984 kmem_free(ipc_kernel_map,
1985 names_addr, names_size);
1986 return kr;
1987 }
1988
1989 info = (zone_info_t *) info_addr;
1990 }
1991 zn = &names[0];
1992 zi = &info[0];
1993
1994 for (i = 0; i < num_zones; i++) {
1995 struct zone zcopy;
1996
1997 assert(z != ZONE_NULL);
1998
1999 lock_zone(z);
2000 zcopy = *z;
2001 unlock_zone(z);
2002
2003 simple_lock(&all_zones_lock);
2004 z = z->next_zone;
2005 simple_unlock(&all_zones_lock);
2006
2007 /* assuming here the name data is static */
2008 (void) strncpy(zn->zn_name, zcopy.zone_name,
2009 sizeof zn->zn_name);
2010 zn->zn_name[sizeof zn->zn_name - 1] = '\0';
2011
2012 zi->zi_count = zcopy.count;
2013 zi->zi_cur_size = zcopy.cur_size;
2014 zi->zi_max_size = zcopy.max_size;
2015 zi->zi_elem_size = zcopy.elem_size;
2016 zi->zi_alloc_size = zcopy.alloc_size;
2017 zi->zi_exhaustible = zcopy.exhaustible;
2018 zi->zi_collectable = zcopy.collectable;
2019
2020 zn++;
2021 zi++;
2022 }
2023
2024 /*
2025 * loop through the fake zones and fill them using the specialized
2026 * functions
2027 */
2028 for (i = 0; i < num_fake_zones; i++) {
2029 strncpy(zn->zn_name, fake_zones[i].name, sizeof zn->zn_name);
2030 zn->zn_name[sizeof zn->zn_name - 1] = '\0';
2031 fake_zones[i].func(&zi->zi_count, &zi->zi_cur_size,
2032 &zi->zi_max_size, &zi->zi_elem_size,
2033 &zi->zi_alloc_size, &zi->zi_collectable,
2034 &zi->zi_exhaustible);
2035 zn++;
2036 zi++;
2037 }
2038
2039 if (names != *namesp) {
2040 vm_size_t used;
2041 vm_map_copy_t copy;
2042
2043 used = max_zones * sizeof *names;
2044
2045 if (used != names_size)
2046 bzero((char *) (names_addr + used), names_size - used);
2047
2048 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
2049 (vm_map_size_t)names_size, TRUE, &copy);
2050 assert(kr == KERN_SUCCESS);
2051
2052 *namesp = (zone_name_t *) copy;
2053 }
2054 *namesCntp = max_zones;
2055
2056 if (info != *infop) {
2057 vm_size_t used;
2058 vm_map_copy_t copy;
2059
2060 used = max_zones * sizeof *info;
2061
2062 if (used != info_size)
2063 bzero((char *) (info_addr + used), info_size - used);
2064
2065 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
2066 (vm_map_size_t)info_size, TRUE, &copy);
2067 assert(kr == KERN_SUCCESS);
2068
2069 *infop = (zone_info_t *) copy;
2070 }
2071 *infoCntp = max_zones;
2072
2073 return KERN_SUCCESS;
2074 }
2075
2076 #if MACH_KDB
2077 #include <ddb/db_command.h>
2078 #include <ddb/db_output.h>
2079 #include <kern/kern_print.h>
2080
2081 const char *zone_labels =
2082 "ENTRY COUNT TOT_SZ MAX_SZ ELT_SZ ALLOC_SZ NAME";
2083
2084 /* Forwards */
2085 void db_print_zone(
2086 zone_t addr);
2087
2088 #if ZONE_DEBUG
2089 void db_zone_check_active(
2090 zone_t zone);
2091 void db_zone_print_active(
2092 zone_t zone);
2093 #endif /* ZONE_DEBUG */
2094 void db_zone_print_free(
2095 zone_t zone);
2096 void
2097 db_print_zone(
2098 zone_t addr)
2099 {
2100 struct zone zcopy;
2101
2102 zcopy = *addr;
2103
2104 db_printf("%8x %8x %8x %8x %6x %8x %s ",
2105 addr, zcopy.count, zcopy.cur_size,
2106 zcopy.max_size, zcopy.elem_size,
2107 zcopy.alloc_size, zcopy.zone_name);
2108 if (zcopy.exhaustible)
2109 db_printf("H");
2110 if (zcopy.collectable)
2111 db_printf("C");
2112 if (zcopy.expandable)
2113 db_printf("X");
2114 db_printf("\n");
2115 }
2116
2117 /*ARGSUSED*/
2118 void
2119 db_show_one_zone(db_expr_t addr, boolean_t have_addr,
2120 __unused db_expr_t count, __unused char *modif)
2121 {
2122 struct zone *z = (zone_t)((char *)0 + addr);
2123
2124 if (z == ZONE_NULL || !have_addr){
2125 db_error("No Zone\n");
2126 /*NOTREACHED*/
2127 }
2128
2129 db_printf("%s\n", zone_labels);
2130 db_print_zone(z);
2131 }
2132
2133 /*ARGSUSED*/
2134 void
2135 db_show_all_zones(__unused db_expr_t addr, boolean_t have_addr, db_expr_t count,
2136 __unused char *modif)
2137 {
2138 zone_t z;
2139 unsigned total = 0;
2140
2141 /*
2142 * Don't risk hanging by unconditionally locking,
2143 * risk of incoherent data is small (zones aren't freed).
2144 */
2145 have_addr = simple_lock_try(&all_zones_lock);
2146 count = num_zones;
2147 z = first_zone;
2148 if (have_addr) {
2149 simple_unlock(&all_zones_lock);
2150 }
2151
2152 db_printf("%s\n", zone_labels);
2153 for ( ; count > 0; count--) {
2154 if (!z) {
2155 db_error("Mangled Zone List\n");
2156 /*NOTREACHED*/
2157 }
2158 db_print_zone(z);
2159 total += z->cur_size,
2160
2161 have_addr = simple_lock_try(&all_zones_lock);
2162 z = z->next_zone;
2163 if (have_addr) {
2164 simple_unlock(&all_zones_lock);
2165 }
2166 }
2167 db_printf("\nTotal %8x", total);
2168 db_printf("\n\nzone_gc() has reclaimed %d pages\n", zgc_stats.pgs_freed);
2169 }
2170
2171 #if ZONE_DEBUG
2172 void
2173 db_zone_check_active(
2174 zone_t zone)
2175 {
2176 int count = 0;
2177 queue_t tmp_elem;
2178
2179 if (!zone_debug_enabled(zone) || !zone_check)
2180 return;
2181 tmp_elem = queue_first(&zone->active_zones);
2182 while (count < zone->count) {
2183 count++;
2184 if (tmp_elem == 0) {
2185 printf("unexpected zero element, zone=%p, count=%d\n",
2186 zone, count);
2187 assert(FALSE);
2188 break;
2189 }
2190 if (queue_end(tmp_elem, &zone->active_zones)) {
2191 printf("unexpected queue_end, zone=%p, count=%d\n",
2192 zone, count);
2193 assert(FALSE);
2194 break;
2195 }
2196 tmp_elem = queue_next(tmp_elem);
2197 }
2198 if (!queue_end(tmp_elem, &zone->active_zones)) {
2199 printf("not at queue_end, zone=%p, tmp_elem=%p\n",
2200 zone, tmp_elem);
2201 assert(FALSE);
2202 }
2203 }
2204
2205 void
2206 db_zone_print_active(
2207 zone_t zone)
2208 {
2209 int count = 0;
2210 queue_t tmp_elem;
2211
2212 if (!zone_debug_enabled(zone)) {
2213 printf("zone %p debug not enabled\n", zone);
2214 return;
2215 }
2216 if (!zone_check) {
2217 printf("zone_check FALSE\n");
2218 return;
2219 }
2220
2221 printf("zone %p, active elements %d\n", zone, zone->count);
2222 printf("active list:\n");
2223 tmp_elem = queue_first(&zone->active_zones);
2224 while (count < zone->count) {
2225 printf(" %p", tmp_elem);
2226 count++;
2227 if ((count % 6) == 0)
2228 printf("\n");
2229 if (tmp_elem == 0) {
2230 printf("\nunexpected zero element, count=%d\n", count);
2231 break;
2232 }
2233 if (queue_end(tmp_elem, &zone->active_zones)) {
2234 printf("\nunexpected queue_end, count=%d\n", count);
2235 break;
2236 }
2237 tmp_elem = queue_next(tmp_elem);
2238 }
2239 if (!queue_end(tmp_elem, &zone->active_zones))
2240 printf("\nnot at queue_end, tmp_elem=%p\n", tmp_elem);
2241 else
2242 printf("\n");
2243 }
2244 #endif /* ZONE_DEBUG */
2245
2246 void
2247 db_zone_print_free(
2248 zone_t zone)
2249 {
2250 int count = 0;
2251 int freecount;
2252 vm_offset_t elem;
2253
2254 freecount = zone_free_count(zone);
2255 printf("zone %p, free elements %d\n", zone, freecount);
2256 printf("free list:\n");
2257 elem = zone->free_elements;
2258 while (count < freecount) {
2259 printf(" 0x%x", elem);
2260 count++;
2261 if ((count % 6) == 0)
2262 printf("\n");
2263 if (elem == 0) {
2264 printf("\nunexpected zero element, count=%d\n", count);
2265 break;
2266 }
2267 elem = *((vm_offset_t *)elem);
2268 }
2269 if (elem != 0)
2270 printf("\nnot at end of free list, elem=0x%x\n", elem);
2271 else
2272 printf("\n");
2273 }
2274
2275 #endif /* MACH_KDB */
2276
2277
2278 #if ZONE_DEBUG
2279
2280 /* should we care about locks here ? */
2281
2282 #if MACH_KDB
2283 void *
2284 next_element(
2285 zone_t z,
2286 void *prev)
2287 {
2288 char *elt = (char *)prev;
2289
2290 if (!zone_debug_enabled(z))
2291 return(NULL);
2292 elt -= ZONE_DEBUG_OFFSET;
2293 elt = (char *) queue_next((queue_t) elt);
2294 if ((queue_t) elt == &z->active_zones)
2295 return(NULL);
2296 elt += ZONE_DEBUG_OFFSET;
2297 return(elt);
2298 }
2299
2300 void *
2301 first_element(
2302 zone_t z)
2303 {
2304 char *elt;
2305
2306 if (!zone_debug_enabled(z))
2307 return(NULL);
2308 if (queue_empty(&z->active_zones))
2309 return(NULL);
2310 elt = (char *)queue_first(&z->active_zones);
2311 elt += ZONE_DEBUG_OFFSET;
2312 return(elt);
2313 }
2314
2315 /*
2316 * Second arg controls how many zone elements are printed:
2317 * 0 => none
2318 * n, n < 0 => all
2319 * n, n > 0 => last n on active list
2320 */
2321 int
2322 zone_count(
2323 zone_t z,
2324 int tail)
2325 {
2326 void *elt;
2327 int count = 0;
2328 boolean_t print = (tail != 0);
2329
2330 if (tail < 0)
2331 tail = z->count;
2332 if (z->count < tail)
2333 tail = 0;
2334 tail = z->count - tail;
2335 for (elt = first_element(z); elt; elt = next_element(z, elt)) {
2336 if (print && tail <= count)
2337 db_printf("%8x\n", elt);
2338 count++;
2339 }
2340 assert(count == z->count);
2341 return(count);
2342 }
2343 #endif /* MACH_KDB */
2344
2345 #define zone_in_use(z) ( z->count || z->free_elements )
2346
2347 void
2348 zone_debug_enable(
2349 zone_t z)
2350 {
2351 if (zone_debug_enabled(z) || zone_in_use(z) ||
2352 z->alloc_size < (z->elem_size + ZONE_DEBUG_OFFSET))
2353 return;
2354 queue_init(&z->active_zones);
2355 z->elem_size += ZONE_DEBUG_OFFSET;
2356 }
2357
2358 void
2359 zone_debug_disable(
2360 zone_t z)
2361 {
2362 if (!zone_debug_enabled(z) || zone_in_use(z))
2363 return;
2364 z->elem_size -= ZONE_DEBUG_OFFSET;
2365 z->active_zones.next = z->active_zones.prev = NULL;
2366 }
2367 #endif /* ZONE_DEBUG */