]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/zalloc.c
c6bf2f01ead19d17136d9c3473b1900d89491d4e
[apple/xnu.git] / osfmk / kern / zalloc.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: kern/zalloc.c
60 * Author: Avadis Tevanian, Jr.
61 *
62 * Zone-based memory allocator. A zone is a collection of fixed size
63 * data blocks for which quick allocation/deallocation is possible.
64 */
65 #include <zone_debug.h>
66 #include <zone_alias_addr.h>
67 #include <norma_vm.h>
68 #include <mach_kdb.h>
69
70 #include <mach/mach_types.h>
71 #include <mach/vm_param.h>
72 #include <mach/kern_return.h>
73 #include <mach/mach_host_server.h>
74 #include <mach/task_server.h>
75 #include <mach/machine/vm_types.h>
76 #include <mach_debug/zone_info.h>
77
78 #include <kern/kern_types.h>
79 #include <kern/assert.h>
80 #include <kern/host.h>
81 #include <kern/macro_help.h>
82 #include <kern/sched.h>
83 #include <kern/locks.h>
84 #include <kern/sched_prim.h>
85 #include <kern/misc_protos.h>
86 #include <kern/thread_call.h>
87 #include <kern/zalloc.h>
88 #include <kern/kalloc.h>
89
90 #include <vm/pmap.h>
91 #include <vm/vm_map.h>
92 #include <vm/vm_kern.h>
93 #include <vm/vm_page.h>
94
95 #include <machine/machparam.h>
96
97 #include <libkern/OSDebug.h>
98 #include <sys/kdebug.h>
99
100 /*
101 * Zone Corruption Debugging
102 *
103 * We provide three methods to detect use of a zone element after it's been freed. These
104 * checks are enabled by specifying "-zc" and/or "-zp" in the boot-args:
105 *
106 * (1) Range-check the free-list "next" ptr for sanity.
107 * (2) Store the ptr in two different words, and compare them against
108 * each other when re-using the zone element, to detect modifications.
109 * (3) poison the freed memory by overwriting it with 0xdeadbeef.
110 *
111 * The first two checks are fairly light weight and are enabled by specifying "-zc"
112 * in the boot-args. If you want more aggressive checking for use-after-free bugs
113 * and you don't mind the additional overhead, then turn on poisoning by adding
114 * "-zp" to the boot-args in addition to "-zc". If you specify -zp without -zc,
115 * it still poisons the memory when it's freed, but doesn't check if the memory
116 * has been altered later when it's reallocated.
117 */
118
119 boolean_t check_freed_element = FALSE; /* enabled by -zc in boot-args */
120 boolean_t zfree_clear = FALSE; /* enabled by -zp in boot-args */
121
122 /*
123 * Fake zones for things that want to report via zprint but are not actually zones.
124 */
125 struct fake_zone_info {
126 const char* name;
127 void (*init)(int);
128 void (*query)(int *,
129 vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *,
130 uint64_t *, int *, int *, int *);
131 };
132
133 static struct fake_zone_info fake_zones[] = {
134 {
135 .name = "kernel_stacks",
136 .init = stack_fake_zone_init,
137 .query = stack_fake_zone_info,
138 },
139 #if defined(__i386__) || defined (__x86_64__)
140 {
141 .name = "page_tables",
142 .init = pt_fake_zone_init,
143 .query = pt_fake_zone_info,
144 },
145 #endif /* i386 */
146 {
147 .name = "kalloc.large",
148 .init = kalloc_fake_zone_init,
149 .query = kalloc_fake_zone_info,
150 },
151 };
152 unsigned int num_fake_zones = sizeof(fake_zones)/sizeof(fake_zones[0]);
153
154 /*
155 * Zone info options
156 */
157 boolean_t zinfo_per_task = FALSE; /* enabled by -zinfop in boot-args */
158 #define ZINFO_SLOTS 200 /* for now */
159 #define ZONES_MAX (ZINFO_SLOTS - num_fake_zones - 1)
160
161 /*
162 * Allocation helper macros
163 */
164 #define is_kernel_data_addr(a) (!(a) || ((a) >= vm_min_kernel_address && !((a) & 0x3)))
165
166 #define ADD_TO_ZONE(zone, element) \
167 MACRO_BEGIN \
168 if (zfree_clear) \
169 { unsigned int i; \
170 for (i=0; \
171 i < zone->elem_size/sizeof(uint32_t); \
172 i++) \
173 ((uint32_t *)(element))[i] = 0xdeadbeef; \
174 } \
175 *((vm_offset_t *)(element)) = (zone)->free_elements; \
176 if (check_freed_element) { \
177 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
178 ((vm_offset_t *)(element))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
179 (zone)->free_elements; \
180 } \
181 (zone)->free_elements = (vm_offset_t) (element); \
182 (zone)->count--; \
183 MACRO_END
184
185 #define REMOVE_FROM_ZONE(zone, ret, type) \
186 MACRO_BEGIN \
187 (ret) = (type) (zone)->free_elements; \
188 if ((ret) != (type) 0) { \
189 if (check_freed_element) { \
190 if (!is_kernel_data_addr(((vm_offset_t *)(ret))[0]) || \
191 ((zone)->elem_size >= (2 * sizeof(vm_offset_t)) && \
192 ((vm_offset_t *)(ret))[((zone)->elem_size/sizeof(vm_offset_t))-1] != \
193 ((vm_offset_t *)(ret))[0])) \
194 panic("a freed zone element has been modified");\
195 if (zfree_clear) { \
196 unsigned int ii; \
197 for (ii = sizeof(vm_offset_t) / sizeof(uint32_t); \
198 ii < (zone)->elem_size/sizeof(uint32_t) - sizeof(vm_offset_t) / sizeof(uint32_t); \
199 ii++) \
200 if (((uint32_t *)(ret))[ii] != (uint32_t)0xdeadbeef) \
201 panic("a freed zone element has been modified");\
202 } \
203 } \
204 (zone)->count++; \
205 (zone)->sum_count++; \
206 (zone)->free_elements = *((vm_offset_t *)(ret)); \
207 } \
208 MACRO_END
209
210 #if ZONE_DEBUG
211 #define zone_debug_enabled(z) z->active_zones.next
212 #define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
213 #define ZONE_DEBUG_OFFSET ROUNDUP(sizeof(queue_chain_t),16)
214 #endif /* ZONE_DEBUG */
215
216 /*
217 * Support for garbage collection of unused zone pages:
218 */
219
220 struct zone_page_table_entry {
221 struct zone_page_table_entry *link;
222 short alloc_count;
223 short collect_count;
224 };
225
226 /* Forwards */
227 void zone_page_init(
228 vm_offset_t addr,
229 vm_size_t size,
230 int value);
231
232 void zone_page_alloc(
233 vm_offset_t addr,
234 vm_size_t size);
235
236 void zone_page_free_element(
237 struct zone_page_table_entry **free_pages,
238 vm_offset_t addr,
239 vm_size_t size);
240
241 void zone_page_collect(
242 vm_offset_t addr,
243 vm_size_t size);
244
245 boolean_t zone_page_collectable(
246 vm_offset_t addr,
247 vm_size_t size);
248
249 void zone_page_keep(
250 vm_offset_t addr,
251 vm_size_t size);
252
253 void zalloc_async(
254 thread_call_param_t p0,
255 thread_call_param_t p1);
256
257 void zone_display_zprint( void );
258
259 #if ZONE_DEBUG && MACH_KDB
260 int zone_count(
261 zone_t z,
262 int tail);
263 #endif /* ZONE_DEBUG && MACH_KDB */
264
265 vm_map_t zone_map = VM_MAP_NULL;
266
267 zone_t zone_zone = ZONE_NULL; /* the zone containing other zones */
268
269 zone_t zinfo_zone = ZONE_NULL; /* zone of per-task zone info */
270
271 /*
272 * The VM system gives us an initial chunk of memory.
273 * It has to be big enough to allocate the zone_zone
274 */
275
276 vm_offset_t zdata;
277 vm_size_t zdata_size;
278
279 #define lock_zone(zone) \
280 MACRO_BEGIN \
281 lck_mtx_lock_spin(&(zone)->lock); \
282 MACRO_END
283
284 #define unlock_zone(zone) \
285 MACRO_BEGIN \
286 lck_mtx_unlock(&(zone)->lock); \
287 MACRO_END
288
289 #define zone_wakeup(zone) thread_wakeup((event_t)(zone))
290 #define zone_sleep(zone) \
291 (void) lck_mtx_sleep(&(zone)->lock, LCK_SLEEP_SPIN, (event_t)(zone), THREAD_UNINT);
292
293
294 #define lock_zone_init(zone) \
295 MACRO_BEGIN \
296 char _name[32]; \
297 (void) snprintf(_name, sizeof (_name), "zone.%s", (zone)->zone_name); \
298 lck_grp_attr_setdefault(&(zone)->lock_grp_attr); \
299 lck_grp_init(&(zone)->lock_grp, _name, &(zone)->lock_grp_attr); \
300 lck_attr_setdefault(&(zone)->lock_attr); \
301 lck_mtx_init_ext(&(zone)->lock, &(zone)->lock_ext, \
302 &(zone)->lock_grp, &(zone)->lock_attr); \
303 MACRO_END
304
305 #define lock_try_zone(zone) lck_mtx_try_lock_spin(&zone->lock)
306
307 kern_return_t zget_space(
308 zone_t zone,
309 vm_offset_t size,
310 vm_offset_t *result);
311
312 decl_simple_lock_data(,zget_space_lock)
313 vm_offset_t zalloc_next_space;
314 vm_offset_t zalloc_end_of_space;
315 vm_size_t zalloc_wasted_space;
316
317 /*
318 * Garbage collection map information
319 */
320 struct zone_page_table_entry * zone_page_table;
321 vm_offset_t zone_map_min_address;
322 vm_offset_t zone_map_max_address;
323 unsigned int zone_pages;
324
325 /*
326 * Exclude more than one concurrent garbage collection
327 */
328 decl_lck_mtx_data(, zone_gc_lock)
329
330 lck_attr_t zone_lck_attr;
331 lck_grp_t zone_lck_grp;
332 lck_grp_attr_t zone_lck_grp_attr;
333 lck_mtx_ext_t zone_lck_ext;
334
335
336 #if !ZONE_ALIAS_ADDR
337 #define from_zone_map(addr, size) \
338 ((vm_offset_t)(addr) >= zone_map_min_address && \
339 ((vm_offset_t)(addr) + size -1) < zone_map_max_address)
340 #else
341 #define from_zone_map(addr, size) \
342 ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) >= zone_map_min_address && \
343 ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) + size -1) < zone_map_max_address)
344 #endif
345
346 #define ZONE_PAGE_USED 0
347 #define ZONE_PAGE_UNUSED -1
348
349
350 /*
351 * Protects first_zone, last_zone, num_zones,
352 * and the next_zone field of zones.
353 */
354 decl_simple_lock_data(, all_zones_lock)
355 zone_t first_zone;
356 zone_t *last_zone;
357 unsigned int num_zones;
358
359 boolean_t zone_gc_allowed = TRUE;
360 boolean_t zone_gc_forced = FALSE;
361 boolean_t panic_include_zprint = FALSE;
362 boolean_t zone_gc_allowed_by_time_throttle = TRUE;
363
364 /*
365 * Zone leak debugging code
366 *
367 * When enabled, this code keeps a log to track allocations to a particular zone that have not
368 * yet been freed. Examining this log will reveal the source of a zone leak. The log is allocated
369 * only when logging is enabled, so there is no effect on the system when it's turned off. Logging is
370 * off by default.
371 *
372 * Enable the logging via the boot-args. Add the parameter "zlog=<zone>" to boot-args where <zone>
373 * is the name of the zone you wish to log.
374 *
375 * This code only tracks one zone, so you need to identify which one is leaking first.
376 * Generally, you'll know you have a leak when you get a "zalloc retry failed 3" panic from the zone
377 * garbage collector. Note that the zone name printed in the panic message is not necessarily the one
378 * containing the leak. So do a zprint from gdb and locate the zone with the bloated size. This
379 * is most likely the problem zone, so set zlog in boot-args to this zone name, reboot and re-run the test. The
380 * next time it panics with this message, examine the log using the kgmacros zstack, findoldest and countpcs.
381 * See the help in the kgmacros for usage info.
382 *
383 *
384 * Zone corruption logging
385 *
386 * Logging can also be used to help identify the source of a zone corruption. First, identify the zone
387 * that is being corrupted, then add "-zc zlog=<zone name>" to the boot-args. When -zc is used in conjunction
388 * with zlog, it changes the logging style to track both allocations and frees to the zone. So when the
389 * corruption is detected, examining the log will show you the stack traces of the callers who last allocated
390 * and freed any particular element in the zone. Use the findelem kgmacro with the address of the element that's been
391 * corrupted to examine its history. This should lead to the source of the corruption.
392 */
393
394 static int log_records; /* size of the log, expressed in number of records */
395
396 #define MAX_ZONE_NAME 32 /* max length of a zone name we can take from the boot-args */
397
398 static char zone_name_to_log[MAX_ZONE_NAME] = ""; /* the zone name we're logging, if any */
399
400 /*
401 * The number of records in the log is configurable via the zrecs parameter in boot-args. Set this to
402 * the number of records you want in the log. For example, "zrecs=1000" sets it to 1000 records. Note
403 * that the larger the size of the log, the slower the system will run due to linear searching in the log,
404 * but one doesn't generally care about performance when tracking down a leak. The log is capped at 8000
405 * records since going much larger than this tends to make the system unresponsive and unbootable on small
406 * memory configurations. The default value is 4000 records.
407 */
408 #if defined(__LP64__)
409 #define ZRECORDS_MAX 16000 /* Max records allowed in the log */
410 #else
411 #define ZRECORDS_MAX 8000 /* Max records allowed in the log */
412 #endif
413 #define ZRECORDS_DEFAULT 4000 /* default records in log if zrecs is not specificed in boot-args */
414
415 /*
416 * Each record in the log contains a pointer to the zone element it refers to, a "time" number that allows
417 * the records to be ordered chronologically, and a small array to hold the pc's from the stack trace. A
418 * record is added to the log each time a zalloc() is done in the zone_of_interest. For leak debugging,
419 * the record is cleared when a zfree() is done. For corruption debugging, the log tracks both allocs and frees.
420 * If the log fills, old records are replaced as if it were a circular buffer.
421 */
422
423 struct zrecord {
424 void *z_element; /* the element that was zalloc'ed of zfree'ed */
425 uint32_t z_opcode:1, /* whether it was a zalloc or zfree */
426 z_time:31; /* time index when operation was done */
427 void *z_pc[MAX_ZTRACE_DEPTH]; /* stack trace of caller */
428 };
429
430 /*
431 * Opcodes for the z_opcode field:
432 */
433
434 #define ZOP_ALLOC 1
435 #define ZOP_FREE 0
436
437 /*
438 * The allocation log and all the related variables are protected by the zone lock for the zone_of_interest
439 */
440
441 static struct zrecord *zrecords; /* the log itself, dynamically allocated when logging is enabled */
442 static int zcurrent = 0; /* index of the next slot in the log to use */
443 static int zrecorded = 0; /* number of allocations recorded in the log */
444 static unsigned int ztime = 0; /* a timestamp of sorts */
445 static zone_t zone_of_interest = NULL; /* the zone being watched; corresponds to zone_name_to_log */
446
447 /*
448 * Decide if we want to log this zone by doing a string compare between a zone name and the name
449 * of the zone to log. Return true if the strings are equal, false otherwise. Because it's not
450 * possible to include spaces in strings passed in via the boot-args, a period in the logname will
451 * match a space in the zone name.
452 */
453
454 static int
455 log_this_zone(const char *zonename, const char *logname)
456 {
457 int len;
458 const char *zc = zonename;
459 const char *lc = logname;
460
461 /*
462 * Compare the strings. We bound the compare by MAX_ZONE_NAME.
463 */
464
465 for (len = 1; len <= MAX_ZONE_NAME; zc++, lc++, len++) {
466
467 /*
468 * If the current characters don't match, check for a space in
469 * in the zone name and a corresponding period in the log name.
470 * If that's not there, then the strings don't match.
471 */
472
473 if (*zc != *lc && !(*zc == ' ' && *lc == '.'))
474 break;
475
476 /*
477 * The strings are equal so far. If we're at the end, then it's a match.
478 */
479
480 if (*zc == '\0')
481 return TRUE;
482 }
483
484 return FALSE;
485 }
486
487
488 /*
489 * Test if we want to log this zalloc/zfree event. We log if this is the zone we're interested in and
490 * the buffer for the records has been allocated.
491 */
492
493 #define DO_LOGGING(z) (zrecords && (z) == zone_of_interest)
494
495 extern boolean_t zlog_ready;
496
497 #if CONFIG_ZLEAKS
498 #pragma mark -
499 #pragma mark Zone Leak Detection
500
501 /*
502 * The zone leak detector, abbreviated 'zleak', keeps track of a subset of the currently outstanding
503 * allocations made by the zone allocator. Every z_sample_factor allocations in each zone, we capture a
504 * backtrace. Every free, we examine the table and determine if the allocation was being tracked,
505 * and stop tracking it if it was being tracked.
506 *
507 * We track the allocations in the zallocations hash table, which stores the address that was returned from
508 * the zone allocator. Each stored entry in the zallocations table points to an entry in the ztraces table, which
509 * stores the backtrace associated with that allocation. This provides uniquing for the relatively large
510 * backtraces - we don't store them more than once.
511 *
512 * Data collection begins when the zone map is 50% full, and only occurs for zones that are taking up
513 * a large amount of virtual space.
514 */
515 #define ZLEAK_STATE_ENABLED 0x01 /* Zone leak monitoring should be turned on if zone_map fills up. */
516 #define ZLEAK_STATE_ACTIVE 0x02 /* We are actively collecting traces. */
517 #define ZLEAK_STATE_ACTIVATING 0x04 /* Some thread is doing setup; others should move along. */
518 #define ZLEAK_STATE_FAILED 0x08 /* Attempt to allocate tables failed. We will not try again. */
519 uint32_t zleak_state = 0; /* State of collection, as above */
520
521 boolean_t panic_include_ztrace = FALSE; /* Enable zleak logging on panic */
522 vm_size_t zleak_global_tracking_threshold; /* Size of zone map at which to start collecting data */
523 vm_size_t zleak_per_zone_tracking_threshold; /* Size a zone will have before we will collect data on it */
524 unsigned int z_sample_factor = 1000; /* Allocations per sample attempt */
525
526 /*
527 * Counters for allocation statistics.
528 */
529
530 /* Times two active records want to occupy the same spot */
531 unsigned int z_alloc_collisions = 0;
532 unsigned int z_trace_collisions = 0;
533
534 /* Times a new record lands on a spot previously occupied by a freed allocation */
535 unsigned int z_alloc_overwrites = 0;
536 unsigned int z_trace_overwrites = 0;
537
538 /* Times a new alloc or trace is put into the hash table */
539 unsigned int z_alloc_recorded = 0;
540 unsigned int z_trace_recorded = 0;
541
542 /* Times zleak_log returned false due to not being able to acquire the lock */
543 unsigned int z_total_conflicts = 0;
544
545
546 #pragma mark struct zallocation
547 /*
548 * Structure for keeping track of an allocation
549 * An allocation bucket is in use if its element is not NULL
550 */
551 struct zallocation {
552 uintptr_t za_element; /* the element that was zalloc'ed or zfree'ed, NULL if bucket unused */
553 vm_size_t za_size; /* how much memory did this allocation take up? */
554 uint32_t za_trace_index; /* index into ztraces for backtrace associated with allocation */
555 /* TODO: #if this out */
556 uint32_t za_hit_count; /* for determining effectiveness of hash function */
557 };
558
559 /* Size must be a power of two for the zhash to be able to just mask off bits instead of mod */
560 #define ZLEAK_ALLOCATION_MAP_NUM 16384
561 #define ZLEAK_TRACE_MAP_NUM 8192
562
563 uint32_t zleak_alloc_buckets = ZLEAK_ALLOCATION_MAP_NUM;
564 uint32_t zleak_trace_buckets = ZLEAK_TRACE_MAP_NUM;
565
566 vm_size_t zleak_max_zonemap_size;
567
568 /* Hashmaps of allocations and their corresponding traces */
569 static struct zallocation* zallocations;
570 static struct ztrace* ztraces;
571
572 /* not static so that panic can see this, see kern/debug.c */
573 struct ztrace* top_ztrace;
574
575 /* Lock to protect zallocations, ztraces, and top_ztrace from concurrent modification. */
576 static lck_mtx_t zleak_lock;
577 static lck_attr_t zleak_lock_attr;
578 static lck_grp_t zleak_lock_grp;
579 static lck_grp_attr_t zleak_lock_grp_attr;
580
581 /*
582 * Initializes the zone leak monitor. Called from zone_init()
583 */
584 static void
585 zleak_init(vm_size_t max_zonemap_size)
586 {
587 char scratch_buf[16];
588 boolean_t zleak_enable_flag = FALSE;
589
590 zleak_max_zonemap_size = max_zonemap_size;
591 zleak_global_tracking_threshold = max_zonemap_size / 2;
592 zleak_per_zone_tracking_threshold = zleak_global_tracking_threshold / 8;
593
594 /* -zleakoff (flag to disable zone leak monitor) */
595 if (PE_parse_boot_argn("-zleakoff", scratch_buf, sizeof(scratch_buf))) {
596 zleak_enable_flag = FALSE;
597 printf("zone leak detection disabled\n");
598 } else {
599 zleak_enable_flag = TRUE;
600 printf("zone leak detection enabled\n");
601 }
602
603 /* zfactor=XXXX (override how often to sample the zone allocator) */
604 if (PE_parse_boot_argn("zfactor", &z_sample_factor, sizeof(z_sample_factor))) {
605 printf("Zone leak factor override:%u\n", z_sample_factor);
606 }
607
608 /* zleak-allocs=XXXX (override number of buckets in zallocations) */
609 if (PE_parse_boot_argn("zleak-allocs", &zleak_alloc_buckets, sizeof(zleak_alloc_buckets))) {
610 printf("Zone leak alloc buckets override:%u\n", zleak_alloc_buckets);
611 /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */
612 if (zleak_alloc_buckets == 0 || (zleak_alloc_buckets & (zleak_alloc_buckets-1))) {
613 printf("Override isn't a power of two, bad things might happen!");
614 }
615 }
616
617 /* zleak-traces=XXXX (override number of buckets in ztraces) */
618 if (PE_parse_boot_argn("zleak-traces", &zleak_trace_buckets, sizeof(zleak_trace_buckets))) {
619 printf("Zone leak trace buckets override:%u\n", zleak_trace_buckets);
620 /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */
621 if (zleak_trace_buckets == 0 || (zleak_trace_buckets & (zleak_trace_buckets-1))) {
622 printf("Override isn't a power of two, bad things might happen!");
623 }
624 }
625
626 /* allocate the zleak_lock */
627 lck_grp_attr_setdefault(&zleak_lock_grp_attr);
628 lck_grp_init(&zleak_lock_grp, "zleak_lock", &zleak_lock_grp_attr);
629 lck_attr_setdefault(&zleak_lock_attr);
630 lck_mtx_init(&zleak_lock, &zleak_lock_grp, &zleak_lock_attr);
631
632 if (zleak_enable_flag) {
633 zleak_state = ZLEAK_STATE_ENABLED;
634 }
635 }
636
637 #if CONFIG_ZLEAKS
638
639 /*
640 * Support for kern.zleak.active sysctl - a simplified
641 * simplified version of the zleak_state variable.
642 */
643 int
644 get_zleak_state(void)
645 {
646 if (zleak_state & ZLEAK_STATE_FAILED)
647 return (-1);
648 if (zleak_state & ZLEAK_STATE_ACTIVE)
649 return (1);
650 return (0);
651 }
652
653 #endif
654
655
656 kern_return_t
657 zleak_activate(void)
658 {
659 kern_return_t retval;
660 vm_size_t z_alloc_size = zleak_alloc_buckets * sizeof(struct zallocation);
661 vm_size_t z_trace_size = zleak_trace_buckets * sizeof(struct ztrace);
662 void *allocations_ptr = NULL;
663 void *traces_ptr = NULL;
664
665 /* Only one thread attempts to activate at a time */
666 if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) {
667 return KERN_SUCCESS;
668 }
669
670 /* Indicate that we're doing the setup */
671 lck_mtx_lock_spin(&zleak_lock);
672 if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) {
673 lck_mtx_unlock(&zleak_lock);
674 return KERN_SUCCESS;
675 }
676
677 zleak_state |= ZLEAK_STATE_ACTIVATING;
678 lck_mtx_unlock(&zleak_lock);
679
680 /* Allocate and zero tables */
681 retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&allocations_ptr, z_alloc_size);
682 if (retval != KERN_SUCCESS) {
683 goto fail;
684 }
685
686 retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&traces_ptr, z_trace_size);
687 if (retval != KERN_SUCCESS) {
688 goto fail;
689 }
690
691 bzero(allocations_ptr, z_alloc_size);
692 bzero(traces_ptr, z_trace_size);
693
694 /* Everything's set. Install tables, mark active. */
695 zallocations = allocations_ptr;
696 ztraces = traces_ptr;
697
698 /*
699 * Initialize the top_ztrace to the first entry in ztraces,
700 * so we don't have to check for null in zleak_log
701 */
702 top_ztrace = &ztraces[0];
703
704 /*
705 * Note that we do need a barrier between installing
706 * the tables and setting the active flag, because the zfree()
707 * path accesses the table without a lock if we're active.
708 */
709 lck_mtx_lock_spin(&zleak_lock);
710 zleak_state |= ZLEAK_STATE_ACTIVE;
711 zleak_state &= ~ZLEAK_STATE_ACTIVATING;
712 lck_mtx_unlock(&zleak_lock);
713
714 return 0;
715
716 fail:
717 /*
718 * If we fail to allocate memory, don't further tax
719 * the system by trying again.
720 */
721 lck_mtx_lock_spin(&zleak_lock);
722 zleak_state |= ZLEAK_STATE_FAILED;
723 zleak_state &= ~ZLEAK_STATE_ACTIVATING;
724 lck_mtx_unlock(&zleak_lock);
725
726 if (allocations_ptr != NULL) {
727 kmem_free(kernel_map, (vm_offset_t)allocations_ptr, z_alloc_size);
728 }
729
730 if (traces_ptr != NULL) {
731 kmem_free(kernel_map, (vm_offset_t)traces_ptr, z_trace_size);
732 }
733
734 return retval;
735 }
736
737 /*
738 * TODO: What about allocations that never get deallocated,
739 * especially ones with unique backtraces? Should we wait to record
740 * until after boot has completed?
741 * (How many persistent zallocs are there?)
742 */
743
744 /*
745 * This function records the allocation in the allocations table,
746 * and stores the associated backtrace in the traces table
747 * (or just increments the refcount if the trace is already recorded)
748 * If the allocation slot is in use, the old allocation is replaced with the new allocation, and
749 * the associated trace's refcount is decremented.
750 * If the trace slot is in use, it returns.
751 * The refcount is incremented by the amount of memory the allocation consumes.
752 * The return value indicates whether to try again next time.
753 */
754 static boolean_t
755 zleak_log(uintptr_t* bt,
756 uintptr_t addr,
757 uint32_t depth,
758 vm_size_t allocation_size)
759 {
760 /* Quit if there's someone else modifying the hash tables */
761 if (!lck_mtx_try_lock_spin(&zleak_lock)) {
762 z_total_conflicts++;
763 return FALSE;
764 }
765
766 struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)];
767
768 uint32_t trace_index = hashbacktrace(bt, depth, zleak_trace_buckets);
769 struct ztrace* trace = &ztraces[trace_index];
770
771 allocation->za_hit_count++;
772 trace->zt_hit_count++;
773
774 /*
775 * If the allocation bucket we want to be in is occupied, and if the occupier
776 * has the same trace as us, just bail.
777 */
778 if (allocation->za_element != (uintptr_t) 0 && trace_index == allocation->za_trace_index) {
779 z_alloc_collisions++;
780
781 lck_mtx_unlock(&zleak_lock);
782 return TRUE;
783 }
784
785 /* STEP 1: Store the backtrace in the traces array. */
786 /* A size of zero indicates that the trace bucket is free. */
787
788 if (trace->zt_size > 0 && bcmp(trace->zt_stack, bt, (depth * sizeof(uintptr_t))) != 0 ) {
789 /*
790 * Different unique trace with same hash!
791 * Just bail - if we're trying to record the leaker, hopefully the other trace will be deallocated
792 * and get out of the way for later chances
793 */
794 trace->zt_collisions++;
795 z_trace_collisions++;
796
797 lck_mtx_unlock(&zleak_lock);
798 return TRUE;
799 } else if (trace->zt_size > 0) {
800 /* Same trace, already added, so increment refcount */
801 trace->zt_size += allocation_size;
802 } else {
803 /* Found an unused trace bucket, record the trace here! */
804 if (trace->zt_depth != 0) /* if this slot was previously used but not currently in use */
805 z_trace_overwrites++;
806
807 z_trace_recorded++;
808 trace->zt_size = allocation_size;
809 memcpy(trace->zt_stack, bt, (depth * sizeof(uintptr_t)) );
810
811 trace->zt_depth = depth;
812 trace->zt_collisions = 0;
813 }
814
815 /* STEP 2: Store the allocation record in the allocations array. */
816
817 if (allocation->za_element != (uintptr_t) 0) {
818 /*
819 * Straight up replace any allocation record that was there. We don't want to do the work
820 * to preserve the allocation entries that were there, because we only record a subset of the
821 * allocations anyways.
822 */
823
824 z_alloc_collisions++;
825
826 struct ztrace* associated_trace = &ztraces[allocation->za_trace_index];
827 /* Knock off old allocation's size, not the new allocation */
828 associated_trace->zt_size -= allocation->za_size;
829 } else if (allocation->za_trace_index != 0) {
830 /* Slot previously used but not currently in use */
831 z_alloc_overwrites++;
832 }
833
834 allocation->za_element = addr;
835 allocation->za_trace_index = trace_index;
836 allocation->za_size = allocation_size;
837
838 z_alloc_recorded++;
839
840 if (top_ztrace->zt_size < trace->zt_size)
841 top_ztrace = trace;
842
843 lck_mtx_unlock(&zleak_lock);
844 return TRUE;
845 }
846
847 /*
848 * Free the allocation record and release the stacktrace.
849 * This should be as fast as possible because it will be called for every free.
850 */
851 static void
852 zleak_free(uintptr_t addr,
853 vm_size_t allocation_size)
854 {
855 if (addr == (uintptr_t) 0)
856 return;
857
858 struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)];
859
860 /* Double-checked locking: check to find out if we're interested, lock, check to make
861 * sure it hasn't changed, then modify it, and release the lock.
862 */
863
864 if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) {
865 /* if the allocation was the one, grab the lock, check again, then delete it */
866 lck_mtx_lock_spin(&zleak_lock);
867
868 if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) {
869 struct ztrace *trace;
870
871 /* allocation_size had better match what was passed into zleak_log - otherwise someone is freeing into the wrong zone! */
872 if (allocation->za_size != allocation_size) {
873 panic("Freeing as size %lu memory that was allocated with size %lu\n",
874 (uintptr_t)allocation_size, (uintptr_t)allocation->za_size);
875 }
876
877 trace = &ztraces[allocation->za_trace_index];
878
879 /* size of 0 indicates trace bucket is unused */
880 if (trace->zt_size > 0) {
881 trace->zt_size -= allocation_size;
882 }
883
884 /* A NULL element means the allocation bucket is unused */
885 allocation->za_element = 0;
886 }
887 lck_mtx_unlock(&zleak_lock);
888 }
889 }
890
891 #endif /* CONFIG_ZLEAKS */
892
893 /* These functions outside of CONFIG_ZLEAKS because they are also used in
894 * mbuf.c for mbuf leak-detection. This is why they lack the z_ prefix.
895 */
896
897 /*
898 * This function captures a backtrace from the current stack and
899 * returns the number of frames captured, limited by max_frames.
900 * It's fast because it does no checking to make sure there isn't bad data.
901 * Since it's only called from threads that we're going to keep executing,
902 * if there's bad data we were going to die eventually.
903 * This seems to work for x86 and X86_64.
904 * ARMTODO: Test it on ARM, I think it will work but I can't test it. If it works, remove the ifdef.
905 * If this function is inlined, it doesn't record the frame of the function it's inside.
906 * (because there's no stack frame!)
907 */
908 uint32_t
909 fastbacktrace(uintptr_t* bt, uint32_t max_frames)
910 {
911 #if defined(__x86_64__) || defined(__i386__)
912 uintptr_t* frameptr = NULL, *frameptr_next = NULL;
913 uintptr_t retaddr = 0;
914 uint32_t frame_index = 0, frames = 0;
915 uintptr_t kstackb, kstackt;
916
917 kstackb = current_thread()->kernel_stack;
918 kstackt = kstackb + kernel_stack_size;
919 /* Load stack frame pointer (EBP on x86) into frameptr */
920 frameptr = __builtin_frame_address(0);
921
922 while (frameptr != NULL && frame_index < max_frames ) {
923 /* Next frame pointer is pointed to by the previous one */
924 frameptr_next = (uintptr_t*) *frameptr;
925
926 /* Bail if we see a zero in the stack frame, that means we've reached the top of the stack */
927 /* That also means the return address is worthless, so don't record it */
928 if (frameptr_next == NULL)
929 break;
930 /* Verify thread stack bounds */
931 if (((uintptr_t)frameptr_next > kstackt) || ((uintptr_t)frameptr_next < kstackb))
932 break;
933 /* Pull return address from one spot above the frame pointer */
934 retaddr = *(frameptr + 1);
935
936 /* Store it in the backtrace array */
937 bt[frame_index++] = retaddr;
938
939 frameptr = frameptr_next;
940 }
941
942 /* Save the number of frames captured for return value */
943 frames = frame_index;
944
945 /* Fill in the rest of the backtrace with zeros */
946 while (frame_index < max_frames)
947 bt[frame_index++] = 0;
948
949 return frames;
950 #else
951 return OSBacktrace((void*)bt, max_frames);
952 #endif
953 }
954
955 /* "Thomas Wang's 32/64 bit mix functions." http://www.concentric.net/~Ttwang/tech/inthash.htm */
956 uintptr_t
957 hash_mix(uintptr_t x)
958 {
959 #ifndef __LP64__
960 x += ~(x << 15);
961 x ^= (x >> 10);
962 x += (x << 3 );
963 x ^= (x >> 6 );
964 x += ~(x << 11);
965 x ^= (x >> 16);
966 #else
967 x += ~(x << 32);
968 x ^= (x >> 22);
969 x += ~(x << 13);
970 x ^= (x >> 8 );
971 x += (x << 3 );
972 x ^= (x >> 15);
973 x += ~(x << 27);
974 x ^= (x >> 31);
975 #endif
976 return x;
977 }
978
979 uint32_t
980 hashbacktrace(uintptr_t* bt, uint32_t depth, uint32_t max_size)
981 {
982
983 uintptr_t hash = 0;
984 uintptr_t mask = max_size - 1;
985
986 while (--depth) {
987 hash += bt[depth];
988 }
989
990 hash = hash_mix(hash) & mask;
991
992 assert(hash < max_size);
993
994 return (uint32_t) hash;
995 }
996
997 /*
998 * TODO: Determine how well distributed this is
999 * max_size must be a power of 2. i.e 0x10000 because 0x10000-1 is 0x0FFFF which is a great bitmask
1000 */
1001 uint32_t
1002 hashaddr(uintptr_t pt, uint32_t max_size)
1003 {
1004 uintptr_t hash = 0;
1005 uintptr_t mask = max_size - 1;
1006
1007 hash = hash_mix(pt) & mask;
1008
1009 assert(hash < max_size);
1010
1011 return (uint32_t) hash;
1012 }
1013
1014 /* End of all leak-detection code */
1015 #pragma mark -
1016
1017 /*
1018 * zinit initializes a new zone. The zone data structures themselves
1019 * are stored in a zone, which is initially a static structure that
1020 * is initialized by zone_init.
1021 */
1022 zone_t
1023 zinit(
1024 vm_size_t size, /* the size of an element */
1025 vm_size_t max, /* maximum memory to use */
1026 vm_size_t alloc, /* allocation size */
1027 const char *name) /* a name for the zone */
1028 {
1029 zone_t z;
1030
1031 if (zone_zone == ZONE_NULL) {
1032 if (zget_space(NULL, sizeof(struct zone), (vm_offset_t *)&z)
1033 != KERN_SUCCESS)
1034 return(ZONE_NULL);
1035 } else
1036 z = (zone_t) zalloc(zone_zone);
1037 if (z == ZONE_NULL)
1038 return(ZONE_NULL);
1039
1040 /*
1041 * Round off all the parameters appropriately.
1042 */
1043 if (size < sizeof(z->free_elements))
1044 size = sizeof(z->free_elements);
1045 size = ((size-1) + sizeof(z->free_elements)) -
1046 ((size-1) % sizeof(z->free_elements));
1047 if (alloc == 0)
1048 alloc = PAGE_SIZE;
1049 alloc = round_page(alloc);
1050 max = round_page(max);
1051 /*
1052 * we look for an allocation size with less than 1% waste
1053 * up to 5 pages in size...
1054 * otherwise, we look for an allocation size with least fragmentation
1055 * in the range of 1 - 5 pages
1056 * This size will be used unless
1057 * the user suggestion is larger AND has less fragmentation
1058 */
1059 #if ZONE_ALIAS_ADDR
1060 if ((size < PAGE_SIZE) && (PAGE_SIZE % size <= PAGE_SIZE / 10))
1061 alloc = PAGE_SIZE;
1062 else
1063 #endif
1064 { vm_size_t best, waste; unsigned int i;
1065 best = PAGE_SIZE;
1066 waste = best % size;
1067
1068 for (i = 1; i <= 5; i++) {
1069 vm_size_t tsize, twaste;
1070
1071 tsize = i * PAGE_SIZE;
1072
1073 if ((tsize % size) < (tsize / 100)) {
1074 alloc = tsize;
1075 goto use_this_allocation;
1076 }
1077 twaste = tsize % size;
1078 if (twaste < waste)
1079 best = tsize, waste = twaste;
1080 }
1081 if (alloc <= best || (alloc % size >= waste))
1082 alloc = best;
1083 }
1084 use_this_allocation:
1085 if (max && (max < alloc))
1086 max = alloc;
1087
1088 z->free_elements = 0;
1089 z->cur_size = 0;
1090 z->max_size = max;
1091 z->elem_size = size;
1092 z->alloc_size = alloc;
1093 z->zone_name = name;
1094 z->count = 0;
1095 z->sum_count = 0LL;
1096 z->doing_alloc = FALSE;
1097 z->doing_gc = FALSE;
1098 z->exhaustible = FALSE;
1099 z->collectable = TRUE;
1100 z->allows_foreign = FALSE;
1101 z->expandable = TRUE;
1102 z->waiting = FALSE;
1103 z->async_pending = FALSE;
1104 z->caller_acct = TRUE;
1105 z->noencrypt = FALSE;
1106
1107 #if CONFIG_ZLEAKS
1108 z->num_allocs = 0;
1109 z->num_frees = 0;
1110 z->zleak_capture = 0;
1111 z->zleak_on = FALSE;
1112 #endif /* CONFIG_ZLEAKS */
1113
1114 #if ZONE_DEBUG
1115 z->active_zones.next = z->active_zones.prev = NULL;
1116 zone_debug_enable(z);
1117 #endif /* ZONE_DEBUG */
1118 lock_zone_init(z);
1119
1120 /*
1121 * Add the zone to the all-zones list.
1122 * If we are tracking zone info per task, and we have
1123 * already used all the available stat slots, then keep
1124 * using the overflow zone slot.
1125 */
1126 z->next_zone = ZONE_NULL;
1127 thread_call_setup(&z->call_async_alloc, zalloc_async, z);
1128 simple_lock(&all_zones_lock);
1129 *last_zone = z;
1130 last_zone = &z->next_zone;
1131 z->index = num_zones;
1132 if (zinfo_per_task) {
1133 if (num_zones > ZONES_MAX)
1134 z->index = ZONES_MAX;
1135 }
1136 num_zones++;
1137 simple_unlock(&all_zones_lock);
1138
1139 /*
1140 * Check if we should be logging this zone. If so, remember the zone pointer.
1141 */
1142
1143 if (log_this_zone(z->zone_name, zone_name_to_log)) {
1144 zone_of_interest = z;
1145 }
1146
1147 /*
1148 * If we want to log a zone, see if we need to allocate buffer space for the log. Some vm related zones are
1149 * zinit'ed before we can do a kmem_alloc, so we have to defer allocation in that case. zlog_ready is set to
1150 * TRUE once enough of the VM system is up and running to allow a kmem_alloc to work. If we want to log one
1151 * of the VM related zones that's set up early on, we will skip allocation of the log until zinit is called again
1152 * later on some other zone. So note we may be allocating a buffer to log a zone other than the one being initialized
1153 * right now.
1154 */
1155
1156 if (zone_of_interest != NULL && zrecords == NULL && zlog_ready) {
1157 if (kmem_alloc(kernel_map, (vm_offset_t *)&zrecords, log_records * sizeof(struct zrecord)) == KERN_SUCCESS) {
1158
1159 /*
1160 * We got the memory for the log. Zero it out since the code needs this to identify unused records.
1161 * At this point, everything is set up and we're ready to start logging this zone.
1162 */
1163
1164 bzero((void *)zrecords, log_records * sizeof(struct zrecord));
1165 printf("zone: logging started for zone %s (%p)\n", zone_of_interest->zone_name, zone_of_interest);
1166
1167 } else {
1168 printf("zone: couldn't allocate memory for zrecords, turning off zleak logging\n");
1169 zone_of_interest = NULL;
1170 }
1171 }
1172
1173 return(z);
1174 }
1175
1176 /*
1177 * Cram the given memory into the specified zone.
1178 */
1179 void
1180 zcram(
1181 register zone_t zone,
1182 void *newaddr,
1183 vm_size_t size)
1184 {
1185 register vm_size_t elem_size;
1186 vm_offset_t newmem = (vm_offset_t) newaddr;
1187
1188 /* Basic sanity checks */
1189 assert(zone != ZONE_NULL && newmem != (vm_offset_t)0);
1190 assert(!zone->collectable || zone->allows_foreign
1191 || (from_zone_map(newmem, size)));
1192
1193 elem_size = zone->elem_size;
1194
1195 lock_zone(zone);
1196 while (size >= elem_size) {
1197 ADD_TO_ZONE(zone, newmem);
1198 if (from_zone_map(newmem, elem_size))
1199 zone_page_alloc(newmem, elem_size);
1200 zone->count++; /* compensate for ADD_TO_ZONE */
1201 size -= elem_size;
1202 newmem += elem_size;
1203 zone->cur_size += elem_size;
1204 }
1205 unlock_zone(zone);
1206 }
1207
1208 /*
1209 * Contiguous space allocator for non-paged zones. Allocates "size" amount
1210 * of memory from zone_map.
1211 */
1212
1213 kern_return_t
1214 zget_space(
1215 zone_t zone,
1216 vm_offset_t size,
1217 vm_offset_t *result)
1218 {
1219 vm_offset_t new_space = 0;
1220 vm_size_t space_to_add = 0;
1221
1222 simple_lock(&zget_space_lock);
1223 while ((zalloc_next_space + size) > zalloc_end_of_space) {
1224 /*
1225 * Add at least one page to allocation area.
1226 */
1227
1228 space_to_add = round_page(size);
1229
1230 if (new_space == 0) {
1231 kern_return_t retval;
1232 int zflags = KMA_KOBJECT|KMA_NOPAGEWAIT;
1233
1234 /*
1235 * Memory cannot be wired down while holding
1236 * any locks that the pageout daemon might
1237 * need to free up pages. [Making the zget_space
1238 * lock a complex lock does not help in this
1239 * regard.]
1240 *
1241 * Unlock and allocate memory. Because several
1242 * threads might try to do this at once, don't
1243 * use the memory before checking for available
1244 * space again.
1245 */
1246
1247 simple_unlock(&zget_space_lock);
1248
1249 if (zone == NULL || zone->noencrypt)
1250 zflags |= KMA_NOENCRYPT;
1251
1252 retval = kernel_memory_allocate(zone_map, &new_space, space_to_add, 0, zflags);
1253 if (retval != KERN_SUCCESS)
1254 return(retval);
1255 #if ZONE_ALIAS_ADDR
1256 if (space_to_add == PAGE_SIZE)
1257 new_space = zone_alias_addr(new_space);
1258 #endif
1259 zone_page_init(new_space, space_to_add,
1260 ZONE_PAGE_USED);
1261 simple_lock(&zget_space_lock);
1262 continue;
1263 }
1264
1265
1266 /*
1267 * Memory was allocated in a previous iteration.
1268 *
1269 * Check whether the new region is contiguous
1270 * with the old one.
1271 */
1272
1273 if (new_space != zalloc_end_of_space) {
1274 /*
1275 * Throw away the remainder of the
1276 * old space, and start a new one.
1277 */
1278 zalloc_wasted_space +=
1279 zalloc_end_of_space - zalloc_next_space;
1280 zalloc_next_space = new_space;
1281 }
1282
1283 zalloc_end_of_space = new_space + space_to_add;
1284
1285 new_space = 0;
1286 }
1287 *result = zalloc_next_space;
1288 zalloc_next_space += size;
1289 simple_unlock(&zget_space_lock);
1290
1291 if (new_space != 0)
1292 kmem_free(zone_map, new_space, space_to_add);
1293
1294 return(KERN_SUCCESS);
1295 }
1296
1297
1298 /*
1299 * Steal memory for the zone package. Called from
1300 * vm_page_bootstrap().
1301 */
1302 void
1303 zone_steal_memory(void)
1304 {
1305 zdata_size = round_page(128*sizeof(struct zone));
1306 zdata = (vm_offset_t)((char *)pmap_steal_memory(zdata_size) - (char *)0);
1307 }
1308
1309
1310 /*
1311 * Fill a zone with enough memory to contain at least nelem elements.
1312 * Memory is obtained with kmem_alloc_kobject from the kernel_map.
1313 * Return the number of elements actually put into the zone, which may
1314 * be more than the caller asked for since the memory allocation is
1315 * rounded up to a full page.
1316 */
1317 int
1318 zfill(
1319 zone_t zone,
1320 int nelem)
1321 {
1322 kern_return_t kr;
1323 vm_size_t size;
1324 vm_offset_t memory;
1325 int nalloc;
1326
1327 assert(nelem > 0);
1328 if (nelem <= 0)
1329 return 0;
1330 size = nelem * zone->elem_size;
1331 size = round_page(size);
1332 kr = kmem_alloc_kobject(kernel_map, &memory, size);
1333 if (kr != KERN_SUCCESS)
1334 return 0;
1335
1336 zone_change(zone, Z_FOREIGN, TRUE);
1337 zcram(zone, (void *)memory, size);
1338 nalloc = (int)(size / zone->elem_size);
1339 assert(nalloc >= nelem);
1340
1341 return nalloc;
1342 }
1343
1344 /*
1345 * Initialize the "zone of zones" which uses fixed memory allocated
1346 * earlier in memory initialization. zone_bootstrap is called
1347 * before zone_init.
1348 */
1349 void
1350 zone_bootstrap(void)
1351 {
1352 vm_size_t zone_zone_size;
1353 vm_offset_t zone_zone_space;
1354 char temp_buf[16];
1355
1356 #if 6094439
1357 /* enable zone checks by default, to try and catch offenders... */
1358 #if 0
1359 /* 7968354: turn "-zc" back off */
1360 check_freed_element = TRUE;
1361 /* 7995202: turn "-zp" back off */
1362 zfree_clear = TRUE;
1363 #endif
1364
1365 /* ... but allow them to be turned off explicitely */
1366 if (PE_parse_boot_argn("-no_zc", temp_buf, sizeof (temp_buf))) {
1367 check_freed_element = FALSE;
1368 }
1369 if (PE_parse_boot_argn("-no_zp", temp_buf, sizeof (temp_buf))) {
1370 zfree_clear = FALSE;
1371 }
1372 #endif
1373
1374 /* see if we want freed zone element checking and/or poisoning */
1375 if (PE_parse_boot_argn("-zc", temp_buf, sizeof (temp_buf))) {
1376 check_freed_element = TRUE;
1377 }
1378
1379 if (PE_parse_boot_argn("-zp", temp_buf, sizeof (temp_buf))) {
1380 zfree_clear = TRUE;
1381 }
1382
1383 if (PE_parse_boot_argn("-zinfop", temp_buf, sizeof (temp_buf))) {
1384 zinfo_per_task = TRUE;
1385 }
1386
1387 /*
1388 * Check for and set up zone leak detection if requested via boot-args. We recognized two
1389 * boot-args:
1390 *
1391 * zlog=<zone_to_log>
1392 * zrecs=<num_records_in_log>
1393 *
1394 * The zlog arg is used to specify the zone name that should be logged, and zrecs is used to
1395 * control the size of the log. If zrecs is not specified, a default value is used.
1396 */
1397
1398 if (PE_parse_boot_argn("zlog", zone_name_to_log, sizeof(zone_name_to_log)) == TRUE) {
1399 if (PE_parse_boot_argn("zrecs", &log_records, sizeof(log_records)) == TRUE) {
1400
1401 /*
1402 * Don't allow more than ZRECORDS_MAX records even if the user asked for more.
1403 * This prevents accidentally hogging too much kernel memory and making the system
1404 * unusable.
1405 */
1406
1407 log_records = MIN(ZRECORDS_MAX, log_records);
1408
1409 } else {
1410 log_records = ZRECORDS_DEFAULT;
1411 }
1412 }
1413
1414 simple_lock_init(&all_zones_lock, 0);
1415
1416 first_zone = ZONE_NULL;
1417 last_zone = &first_zone;
1418 num_zones = 0;
1419
1420 simple_lock_init(&zget_space_lock, 0);
1421 zalloc_next_space = zdata;
1422 zalloc_end_of_space = zdata + zdata_size;
1423 zalloc_wasted_space = 0;
1424
1425 /* assertion: nobody else called zinit before us */
1426 assert(zone_zone == ZONE_NULL);
1427 zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone),
1428 sizeof(struct zone), "zones");
1429 zone_change(zone_zone, Z_COLLECT, FALSE);
1430 zone_change(zone_zone, Z_CALLERACCT, FALSE);
1431 zone_change(zone_zone, Z_NOENCRYPT, TRUE);
1432
1433 zone_zone_size = zalloc_end_of_space - zalloc_next_space;
1434 zget_space(NULL, zone_zone_size, &zone_zone_space);
1435 zcram(zone_zone, (void *)zone_zone_space, zone_zone_size);
1436
1437 /* initialize fake zones and zone info if tracking by task */
1438 if (zinfo_per_task) {
1439 vm_size_t zisize = sizeof(zinfo_usage_store_t) * ZINFO_SLOTS;
1440 unsigned int i;
1441
1442 for (i = 0; i < num_fake_zones; i++)
1443 fake_zones[i].init(ZINFO_SLOTS - num_fake_zones + i);
1444 zinfo_zone = zinit(zisize, zisize * CONFIG_TASK_MAX,
1445 zisize, "per task zinfo");
1446 zone_change(zinfo_zone, Z_CALLERACCT, FALSE);
1447 }
1448 }
1449
1450 void
1451 zinfo_task_init(task_t task)
1452 {
1453 if (zinfo_per_task) {
1454 task->tkm_zinfo = zalloc(zinfo_zone);
1455 memset(task->tkm_zinfo, 0, sizeof(zinfo_usage_store_t) * ZINFO_SLOTS);
1456 } else {
1457 task->tkm_zinfo = NULL;
1458 }
1459 }
1460
1461 void
1462 zinfo_task_free(task_t task)
1463 {
1464 assert(task != kernel_task);
1465 if (task->tkm_zinfo != NULL) {
1466 zfree(zinfo_zone, task->tkm_zinfo);
1467 task->tkm_zinfo = NULL;
1468 }
1469 }
1470
1471 void
1472 zone_init(
1473 vm_size_t max_zonemap_size)
1474 {
1475 kern_return_t retval;
1476 vm_offset_t zone_min;
1477 vm_offset_t zone_max;
1478 vm_size_t zone_table_size;
1479
1480 retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
1481 FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT,
1482 &zone_map);
1483
1484 if (retval != KERN_SUCCESS)
1485 panic("zone_init: kmem_suballoc failed");
1486 zone_max = zone_min + round_page(max_zonemap_size);
1487 /*
1488 * Setup garbage collection information:
1489 */
1490 zone_table_size = atop_kernel(zone_max - zone_min) *
1491 sizeof(struct zone_page_table_entry);
1492 if (kmem_alloc_kobject(zone_map, (vm_offset_t *) &zone_page_table,
1493 zone_table_size) != KERN_SUCCESS)
1494 panic("zone_init");
1495 zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size);
1496 zone_pages = (unsigned int)atop_kernel(zone_max - zone_min);
1497 zone_map_min_address = zone_min;
1498 zone_map_max_address = zone_max;
1499
1500 lck_grp_attr_setdefault(&zone_lck_grp_attr);
1501 lck_grp_init(&zone_lck_grp, "zones", &zone_lck_grp_attr);
1502 lck_attr_setdefault(&zone_lck_attr);
1503 lck_mtx_init_ext(&zone_gc_lock, &zone_lck_ext, &zone_lck_grp, &zone_lck_attr);
1504
1505 zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED);
1506
1507 #if CONFIG_ZLEAKS
1508 /*
1509 * Initialize the zone leak monitor
1510 */
1511 zleak_init(max_zonemap_size);
1512 #endif /* CONFIG_ZLEAKS */
1513 }
1514
1515 extern volatile SInt32 kfree_nop_count;
1516
1517 #pragma mark -
1518 #pragma mark zalloc_canblock
1519
1520 /*
1521 * zalloc returns an element from the specified zone.
1522 */
1523 void *
1524 zalloc_canblock(
1525 register zone_t zone,
1526 boolean_t canblock)
1527 {
1528 vm_offset_t addr;
1529 kern_return_t retval;
1530 uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* used in zone leak logging and zone leak detection */
1531 int numsaved = 0;
1532 int i;
1533
1534 #if CONFIG_ZLEAKS
1535 uint32_t zleak_tracedepth = 0; /* log this allocation if nonzero */
1536 #endif /* CONFIG_ZLEAKS */
1537
1538 assert(zone != ZONE_NULL);
1539
1540 lock_zone(zone);
1541
1542 /*
1543 * If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
1544 */
1545
1546 if (DO_LOGGING(zone))
1547 numsaved = OSBacktrace((void*) zbt, MAX_ZTRACE_DEPTH);
1548
1549 #if CONFIG_ZLEAKS
1550 /*
1551 * Zone leak detection: capture a backtrace every z_sample_factor
1552 * allocations in this zone.
1553 */
1554 if (zone->zleak_on && (zone->zleak_capture++ % z_sample_factor == 0)) {
1555 zone->zleak_capture = 1;
1556
1557 /* Avoid backtracing twice if zone logging is on */
1558 if (numsaved == 0 )
1559 zleak_tracedepth = fastbacktrace(zbt, MAX_ZTRACE_DEPTH);
1560 else
1561 zleak_tracedepth = numsaved;
1562 }
1563 #endif /* CONFIG_ZLEAKS */
1564
1565 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
1566
1567 while ((addr == 0) && canblock && (zone->doing_gc)) {
1568 zone->waiting = TRUE;
1569 zone_sleep(zone);
1570 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
1571 }
1572
1573 while ((addr == 0) && canblock) {
1574 /*
1575 * If nothing was there, try to get more
1576 */
1577 if (zone->doing_alloc) {
1578 /*
1579 * Someone is allocating memory for this zone.
1580 * Wait for it to show up, then try again.
1581 */
1582 zone->waiting = TRUE;
1583 zone_sleep(zone);
1584 }
1585 else {
1586 if ((zone->cur_size + zone->elem_size) >
1587 zone->max_size) {
1588 if (zone->exhaustible)
1589 break;
1590 if (zone->expandable) {
1591 /*
1592 * We're willing to overflow certain
1593 * zones, but not without complaining.
1594 *
1595 * This is best used in conjunction
1596 * with the collectable flag. What we
1597 * want is an assurance we can get the
1598 * memory back, assuming there's no
1599 * leak.
1600 */
1601 zone->max_size += (zone->max_size >> 1);
1602 } else {
1603 unlock_zone(zone);
1604
1605 panic("zalloc: zone \"%s\" empty.", zone->zone_name);
1606 }
1607 }
1608 zone->doing_alloc = TRUE;
1609 unlock_zone(zone);
1610
1611 if (zone->collectable) {
1612 vm_offset_t space;
1613 vm_size_t alloc_size;
1614 int retry = 0;
1615
1616 for (;;) {
1617 int zflags = KMA_KOBJECT|KMA_NOPAGEWAIT;
1618
1619 if (vm_pool_low() || retry >= 1)
1620 alloc_size =
1621 round_page(zone->elem_size);
1622 else
1623 alloc_size = zone->alloc_size;
1624
1625 if (zone->noencrypt)
1626 zflags |= KMA_NOENCRYPT;
1627
1628 retval = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags);
1629 if (retval == KERN_SUCCESS) {
1630 #if ZONE_ALIAS_ADDR
1631 if (alloc_size == PAGE_SIZE)
1632 space = zone_alias_addr(space);
1633 #endif
1634
1635 #if CONFIG_ZLEAKS
1636 if ((zleak_state & (ZLEAK_STATE_ENABLED | ZLEAK_STATE_ACTIVE)) == ZLEAK_STATE_ENABLED) {
1637 if (zone_map->size >= zleak_global_tracking_threshold) {
1638 kern_return_t kr;
1639
1640 kr = zleak_activate();
1641 if (kr != KERN_SUCCESS) {
1642 printf("Failed to activate live zone leak debugging (%d).\n", kr);
1643 }
1644 }
1645 }
1646
1647 if ((zleak_state & ZLEAK_STATE_ACTIVE) && !(zone->zleak_on)) {
1648 if (zone->cur_size > zleak_per_zone_tracking_threshold) {
1649 zone->zleak_on = TRUE;
1650 }
1651 }
1652 #endif /* CONFIG_ZLEAKS */
1653
1654 zone_page_init(space, alloc_size,
1655 ZONE_PAGE_USED);
1656 zcram(zone, (void *)space, alloc_size);
1657
1658 break;
1659 } else if (retval != KERN_RESOURCE_SHORTAGE) {
1660 retry++;
1661
1662 if (retry == 2) {
1663 zone_gc();
1664 printf("zalloc did gc\n");
1665 zone_display_zprint();
1666 }
1667 if (retry == 3) {
1668 panic_include_zprint = TRUE;
1669 #if CONFIG_ZLEAKS
1670 if ((zleak_state & ZLEAK_STATE_ACTIVE)) {
1671 panic_include_ztrace = TRUE;
1672 }
1673 #endif /* CONFIG_ZLEAKS */
1674 /* TODO: Change this to something more descriptive, perhaps
1675 * 'zone_map exhausted' only if we get retval 3 (KERN_NO_SPACE).
1676 */
1677 panic("zalloc: \"%s\" (%d elements) retry fail %d, kfree_nop_count: %d", zone->zone_name, zone->count, retval, (int)kfree_nop_count);
1678 }
1679 } else {
1680 break;
1681 }
1682 }
1683 lock_zone(zone);
1684 zone->doing_alloc = FALSE;
1685 if (zone->waiting) {
1686 zone->waiting = FALSE;
1687 zone_wakeup(zone);
1688 }
1689 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
1690 if (addr == 0 &&
1691 retval == KERN_RESOURCE_SHORTAGE) {
1692 unlock_zone(zone);
1693
1694 VM_PAGE_WAIT();
1695 lock_zone(zone);
1696 }
1697 } else {
1698 vm_offset_t space;
1699 retval = zget_space(zone, zone->elem_size, &space);
1700
1701 lock_zone(zone);
1702 zone->doing_alloc = FALSE;
1703 if (zone->waiting) {
1704 zone->waiting = FALSE;
1705 thread_wakeup((event_t)zone);
1706 }
1707 if (retval == KERN_SUCCESS) {
1708 zone->count++;
1709 zone->sum_count++;
1710 zone->cur_size += zone->elem_size;
1711 #if ZONE_DEBUG
1712 if (zone_debug_enabled(zone)) {
1713 enqueue_tail(&zone->active_zones, (queue_entry_t)space);
1714 }
1715 #endif
1716 unlock_zone(zone);
1717 zone_page_alloc(space, zone->elem_size);
1718 #if ZONE_DEBUG
1719 if (zone_debug_enabled(zone))
1720 space += ZONE_DEBUG_OFFSET;
1721 #endif
1722 addr = space;
1723 goto success;
1724 }
1725 if (retval == KERN_RESOURCE_SHORTAGE) {
1726 unlock_zone(zone);
1727
1728 VM_PAGE_WAIT();
1729 lock_zone(zone);
1730 } else {
1731 /*
1732 * Equivalent to a 'retry fail 3', we're out of address space in the zone_map
1733 * (if it returned KERN_NO_SPACE)
1734 */
1735 if (retval == KERN_NO_SPACE) {
1736 panic_include_zprint = TRUE;
1737 #if CONFIG_ZLEAKS
1738 if ((zleak_state & ZLEAK_STATE_ACTIVE)) {
1739 panic_include_ztrace = TRUE;
1740 }
1741 #endif /* CONFIG_ZLEAKS */
1742 }
1743 panic("zalloc: \"%s\" (%d elements) zget_space returned %d", zone->zone_name, zone->count, retval);
1744 }
1745 }
1746 }
1747 if (addr == 0)
1748 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
1749 }
1750
1751 #if CONFIG_ZLEAKS
1752 /* Zone leak detection:
1753 * If we're sampling this allocation, add it to the zleaks hash table.
1754 */
1755 if (addr && zleak_tracedepth > 0) {
1756 /* Sampling can fail if another sample is happening at the same time in a different zone. */
1757 if (!zleak_log(zbt, addr, zleak_tracedepth, zone->elem_size)) {
1758 /* If it failed, roll back the counter so we sample the next allocation instead. */
1759 zone->zleak_capture = z_sample_factor;
1760 }
1761 }
1762 #endif /* CONFIG_ZLEAKS */
1763
1764
1765 /*
1766 * See if we should be logging allocations in this zone. Logging is rarely done except when a leak is
1767 * suspected, so this code rarely executes. We need to do this code while still holding the zone lock
1768 * since it protects the various log related data structures.
1769 */
1770
1771 if (DO_LOGGING(zone) && addr) {
1772
1773 /*
1774 * Look for a place to record this new allocation. We implement two different logging strategies
1775 * depending on whether we're looking for the source of a zone leak or a zone corruption. When looking
1776 * for a leak, we want to log as many allocations as possible in order to clearly identify the leaker
1777 * among all the records. So we look for an unused slot in the log and fill that in before overwriting
1778 * an old entry. When looking for a corrution however, it's better to have a chronological log of all
1779 * the allocations and frees done in the zone so that the history of operations for a specific zone
1780 * element can be inspected. So in this case, we treat the log as a circular buffer and overwrite the
1781 * oldest entry whenever a new one needs to be added.
1782 *
1783 * The check_freed_element flag tells us what style of logging to do. It's set if we're supposed to be
1784 * doing corruption style logging (indicated via -zc in the boot-args).
1785 */
1786
1787 if (!check_freed_element && zrecords[zcurrent].z_element && zrecorded < log_records) {
1788
1789 /*
1790 * If we get here, we're doing leak style logging and there's still some unused entries in
1791 * the log (since zrecorded is smaller than the size of the log). Look for an unused slot
1792 * starting at zcurrent and wrap-around if we reach the end of the buffer. If the buffer
1793 * is already full, we just fall through and overwrite the element indexed by zcurrent.
1794 */
1795
1796 for (i = zcurrent; i < log_records; i++) {
1797 if (zrecords[i].z_element == NULL) {
1798 zcurrent = i;
1799 goto empty_slot;
1800 }
1801 }
1802
1803 for (i = 0; i < zcurrent; i++) {
1804 if (zrecords[i].z_element == NULL) {
1805 zcurrent = i;
1806 goto empty_slot;
1807 }
1808 }
1809 }
1810
1811 /*
1812 * Save a record of this allocation
1813 */
1814
1815 empty_slot:
1816 if (zrecords[zcurrent].z_element == NULL)
1817 zrecorded++;
1818
1819 zrecords[zcurrent].z_element = (void *)addr;
1820 zrecords[zcurrent].z_time = ztime++;
1821 zrecords[zcurrent].z_opcode = ZOP_ALLOC;
1822
1823 for (i = 0; i < numsaved; i++)
1824 zrecords[zcurrent].z_pc[i] = (void*) zbt[i];
1825
1826 for (; i < MAX_ZTRACE_DEPTH; i++)
1827 zrecords[zcurrent].z_pc[i] = 0;
1828
1829 zcurrent++;
1830
1831 if (zcurrent >= log_records)
1832 zcurrent = 0;
1833 }
1834
1835 if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) {
1836 zone->async_pending = TRUE;
1837 unlock_zone(zone);
1838 thread_call_enter(&zone->call_async_alloc);
1839 lock_zone(zone);
1840 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
1841 }
1842
1843 #if ZONE_DEBUG
1844 if (addr && zone_debug_enabled(zone)) {
1845 enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
1846 addr += ZONE_DEBUG_OFFSET;
1847 }
1848 #endif
1849
1850 #if CONFIG_ZLEAKS
1851 if (addr != 0) {
1852 zone->num_allocs++;
1853 }
1854 #endif /* CONFIG_ZLEAKS */
1855
1856 unlock_zone(zone);
1857
1858 success:
1859 TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr);
1860
1861 if (addr) {
1862 thread_t thr = current_thread();
1863 task_t task;
1864 zinfo_usage_t zinfo;
1865
1866 if (zone->caller_acct)
1867 thr->tkm_private.alloc += zone->elem_size;
1868 else
1869 thr->tkm_shared.alloc += zone->elem_size;
1870
1871 if ((task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
1872 OSAddAtomic64(zone->elem_size, (int64_t *)&zinfo[zone->index].alloc);
1873 }
1874 return((void *)addr);
1875 }
1876
1877
1878 void *
1879 zalloc(
1880 register zone_t zone)
1881 {
1882 return( zalloc_canblock(zone, TRUE) );
1883 }
1884
1885 void *
1886 zalloc_noblock(
1887 register zone_t zone)
1888 {
1889 return( zalloc_canblock(zone, FALSE) );
1890 }
1891
1892 void
1893 zalloc_async(
1894 thread_call_param_t p0,
1895 __unused thread_call_param_t p1)
1896 {
1897 void *elt;
1898
1899 elt = zalloc_canblock((zone_t)p0, TRUE);
1900 zfree((zone_t)p0, elt);
1901 lock_zone(((zone_t)p0));
1902 ((zone_t)p0)->async_pending = FALSE;
1903 unlock_zone(((zone_t)p0));
1904 }
1905
1906
1907 /*
1908 * zget returns an element from the specified zone
1909 * and immediately returns nothing if there is nothing there.
1910 *
1911 * This form should be used when you can not block (like when
1912 * processing an interrupt).
1913 *
1914 * XXX: It seems like only vm_page_grab_fictitious_common uses this, and its
1915 * friend vm_page_more_fictitious can block, so it doesn't seem like
1916 * this is used for interrupts any more....
1917 */
1918 void *
1919 zget(
1920 register zone_t zone)
1921 {
1922 register vm_offset_t addr;
1923
1924 #if CONFIG_ZLEAKS
1925 uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* used for zone leak detection */
1926 uint32_t zleak_tracedepth = 0; /* log this allocation if nonzero */
1927 #endif /* CONFIG_ZLEAKS */
1928
1929 assert( zone != ZONE_NULL );
1930
1931 if (!lock_try_zone(zone))
1932 return NULL;
1933
1934 #if CONFIG_ZLEAKS
1935 /*
1936 * Zone leak detection: capture a backtrace
1937 */
1938 if (zone->zleak_on && (zone->zleak_capture++ % z_sample_factor == 0)) {
1939 zone->zleak_capture = 1;
1940 zleak_tracedepth = fastbacktrace(zbt, MAX_ZTRACE_DEPTH);
1941 }
1942 #endif /* CONFIG_ZLEAKS */
1943
1944 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
1945 #if ZONE_DEBUG
1946 if (addr && zone_debug_enabled(zone)) {
1947 enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
1948 addr += ZONE_DEBUG_OFFSET;
1949 }
1950 #endif /* ZONE_DEBUG */
1951
1952 #if CONFIG_ZLEAKS
1953 /*
1954 * Zone leak detection: record the allocation
1955 */
1956 if (zone->zleak_on && zleak_tracedepth > 0 && addr) {
1957 /* Sampling can fail if another sample is happening at the same time in a different zone. */
1958 if (!zleak_log(zbt, addr, zleak_tracedepth, zone->elem_size)) {
1959 /* If it failed, roll back the counter so we sample the next allocation instead. */
1960 zone->zleak_capture = z_sample_factor;
1961 }
1962 }
1963
1964 if (addr != 0) {
1965 zone->num_allocs++;
1966 }
1967 #endif /* CONFIG_ZLEAKS */
1968
1969 unlock_zone(zone);
1970
1971 return((void *) addr);
1972 }
1973
1974 /* Keep this FALSE by default. Large memory machine run orders of magnitude
1975 slower in debug mode when true. Use debugger to enable if needed */
1976 /* static */ boolean_t zone_check = FALSE;
1977
1978 static zone_t zone_last_bogus_zone = ZONE_NULL;
1979 static vm_offset_t zone_last_bogus_elem = 0;
1980
1981 void
1982 zfree(
1983 register zone_t zone,
1984 void *addr)
1985 {
1986 vm_offset_t elem = (vm_offset_t) addr;
1987 void *zbt[MAX_ZTRACE_DEPTH]; /* only used if zone logging is enabled via boot-args */
1988 int numsaved = 0;
1989
1990 assert(zone != ZONE_NULL);
1991
1992 /*
1993 * If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
1994 */
1995
1996 if (DO_LOGGING(zone))
1997 numsaved = OSBacktrace(&zbt[0], MAX_ZTRACE_DEPTH);
1998
1999 #if MACH_ASSERT
2000 /* Basic sanity checks */
2001 if (zone == ZONE_NULL || elem == (vm_offset_t)0)
2002 panic("zfree: NULL");
2003 /* zone_gc assumes zones are never freed */
2004 if (zone == zone_zone)
2005 panic("zfree: freeing to zone_zone breaks zone_gc!");
2006 #endif
2007
2008 TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, zone->elem_size, (uintptr_t)addr);
2009
2010 if (zone->collectable && !zone->allows_foreign &&
2011 !from_zone_map(elem, zone->elem_size)) {
2012 #if MACH_ASSERT
2013 panic("zfree: non-allocated memory in collectable zone!");
2014 #endif
2015 zone_last_bogus_zone = zone;
2016 zone_last_bogus_elem = elem;
2017 return;
2018 }
2019
2020 lock_zone(zone);
2021
2022 /*
2023 * See if we're doing logging on this zone. There are two styles of logging used depending on
2024 * whether we're trying to catch a leak or corruption. See comments above in zalloc for details.
2025 */
2026
2027 if (DO_LOGGING(zone)) {
2028 int i;
2029
2030 if (check_freed_element) {
2031
2032 /*
2033 * We're logging to catch a corruption. Add a record of this zfree operation
2034 * to log.
2035 */
2036
2037 if (zrecords[zcurrent].z_element == NULL)
2038 zrecorded++;
2039
2040 zrecords[zcurrent].z_element = (void *)addr;
2041 zrecords[zcurrent].z_time = ztime++;
2042 zrecords[zcurrent].z_opcode = ZOP_FREE;
2043
2044 for (i = 0; i < numsaved; i++)
2045 zrecords[zcurrent].z_pc[i] = zbt[i];
2046
2047 for (; i < MAX_ZTRACE_DEPTH; i++)
2048 zrecords[zcurrent].z_pc[i] = 0;
2049
2050 zcurrent++;
2051
2052 if (zcurrent >= log_records)
2053 zcurrent = 0;
2054
2055 } else {
2056
2057 /*
2058 * We're logging to catch a leak. Remove any record we might have for this
2059 * element since it's being freed. Note that we may not find it if the buffer
2060 * overflowed and that's OK. Since the log is of a limited size, old records
2061 * get overwritten if there are more zallocs than zfrees.
2062 */
2063
2064 for (i = 0; i < log_records; i++) {
2065 if (zrecords[i].z_element == addr) {
2066 zrecords[i].z_element = NULL;
2067 zcurrent = i;
2068 zrecorded--;
2069 break;
2070 }
2071 }
2072 }
2073 }
2074
2075
2076 #if ZONE_DEBUG
2077 if (zone_debug_enabled(zone)) {
2078 queue_t tmp_elem;
2079
2080 elem -= ZONE_DEBUG_OFFSET;
2081 if (zone_check) {
2082 /* check the zone's consistency */
2083
2084 for (tmp_elem = queue_first(&zone->active_zones);
2085 !queue_end(tmp_elem, &zone->active_zones);
2086 tmp_elem = queue_next(tmp_elem))
2087 if (elem == (vm_offset_t)tmp_elem)
2088 break;
2089 if (elem != (vm_offset_t)tmp_elem)
2090 panic("zfree()ing element from wrong zone");
2091 }
2092 remqueue((queue_t) elem);
2093 }
2094 #endif /* ZONE_DEBUG */
2095 if (zone_check) {
2096 vm_offset_t this;
2097
2098 /* check the zone's consistency */
2099
2100 for (this = zone->free_elements;
2101 this != 0;
2102 this = * (vm_offset_t *) this)
2103 if (!pmap_kernel_va(this) || this == elem)
2104 panic("zfree");
2105 }
2106 ADD_TO_ZONE(zone, elem);
2107 #if MACH_ASSERT
2108 if (zone->count < 0)
2109 panic("zfree: count < 0!");
2110 #endif
2111
2112
2113 #if CONFIG_ZLEAKS
2114 zone->num_frees++;
2115
2116 /*
2117 * Zone leak detection: un-track the allocation
2118 */
2119 if (zone->zleak_on) {
2120 zleak_free(elem, zone->elem_size);
2121 }
2122 #endif /* CONFIG_ZLEAKS */
2123
2124 /*
2125 * If elements have one or more pages, and memory is low,
2126 * request to run the garbage collection in the zone the next
2127 * time the pageout thread runs.
2128 */
2129 if (zone->elem_size >= PAGE_SIZE &&
2130 vm_pool_low()){
2131 zone_gc_forced = TRUE;
2132 }
2133 unlock_zone(zone);
2134
2135 {
2136 thread_t thr = current_thread();
2137 task_t task;
2138 zinfo_usage_t zinfo;
2139
2140 if (zone->caller_acct)
2141 thr->tkm_private.free += zone->elem_size;
2142 else
2143 thr->tkm_shared.free += zone->elem_size;
2144 if ((task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
2145 OSAddAtomic64(zone->elem_size,
2146 (int64_t *)&zinfo[zone->index].free);
2147 }
2148 }
2149
2150
2151 /* Change a zone's flags.
2152 * This routine must be called immediately after zinit.
2153 */
2154 void
2155 zone_change(
2156 zone_t zone,
2157 unsigned int item,
2158 boolean_t value)
2159 {
2160 assert( zone != ZONE_NULL );
2161 assert( value == TRUE || value == FALSE );
2162
2163 switch(item){
2164 case Z_NOENCRYPT:
2165 zone->noencrypt = value;
2166 break;
2167 case Z_EXHAUST:
2168 zone->exhaustible = value;
2169 break;
2170 case Z_COLLECT:
2171 zone->collectable = value;
2172 break;
2173 case Z_EXPAND:
2174 zone->expandable = value;
2175 break;
2176 case Z_FOREIGN:
2177 zone->allows_foreign = value;
2178 break;
2179 case Z_CALLERACCT:
2180 zone->caller_acct = value;
2181 break;
2182 #if MACH_ASSERT
2183 default:
2184 panic("Zone_change: Wrong Item Type!");
2185 /* break; */
2186 #endif
2187 }
2188 }
2189
2190 /*
2191 * Return the expected number of free elements in the zone.
2192 * This calculation will be incorrect if items are zfree'd that
2193 * were never zalloc'd/zget'd. The correct way to stuff memory
2194 * into a zone is by zcram.
2195 */
2196
2197 integer_t
2198 zone_free_count(zone_t zone)
2199 {
2200 integer_t free_count;
2201
2202 lock_zone(zone);
2203 free_count = (integer_t)(zone->cur_size/zone->elem_size - zone->count);
2204 unlock_zone(zone);
2205
2206 assert(free_count >= 0);
2207
2208 return(free_count);
2209 }
2210
2211 /*
2212 * zprealloc preallocates wired memory, exanding the specified
2213 * zone to the specified size
2214 */
2215 void
2216 zprealloc(
2217 zone_t zone,
2218 vm_size_t size)
2219 {
2220 vm_offset_t addr;
2221
2222 if (size != 0) {
2223 if (kmem_alloc_kobject(zone_map, &addr, size) != KERN_SUCCESS)
2224 panic("zprealloc");
2225 zone_page_init(addr, size, ZONE_PAGE_USED);
2226 zcram(zone, (void *)addr, size);
2227 }
2228 }
2229
2230 /*
2231 * Zone garbage collection subroutines
2232 */
2233
2234 boolean_t
2235 zone_page_collectable(
2236 vm_offset_t addr,
2237 vm_size_t size)
2238 {
2239 struct zone_page_table_entry *zp;
2240 natural_t i, j;
2241
2242 #if ZONE_ALIAS_ADDR
2243 addr = zone_virtual_addr(addr);
2244 #endif
2245 #if MACH_ASSERT
2246 if (!from_zone_map(addr, size))
2247 panic("zone_page_collectable");
2248 #endif
2249
2250 i = (natural_t)atop_kernel(addr-zone_map_min_address);
2251 j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
2252
2253 for (zp = zone_page_table + i; i <= j; zp++, i++)
2254 if (zp->collect_count == zp->alloc_count)
2255 return (TRUE);
2256
2257 return (FALSE);
2258 }
2259
2260 void
2261 zone_page_keep(
2262 vm_offset_t addr,
2263 vm_size_t size)
2264 {
2265 struct zone_page_table_entry *zp;
2266 natural_t i, j;
2267
2268 #if ZONE_ALIAS_ADDR
2269 addr = zone_virtual_addr(addr);
2270 #endif
2271 #if MACH_ASSERT
2272 if (!from_zone_map(addr, size))
2273 panic("zone_page_keep");
2274 #endif
2275
2276 i = (natural_t)atop_kernel(addr-zone_map_min_address);
2277 j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
2278
2279 for (zp = zone_page_table + i; i <= j; zp++, i++)
2280 zp->collect_count = 0;
2281 }
2282
2283 void
2284 zone_page_collect(
2285 vm_offset_t addr,
2286 vm_size_t size)
2287 {
2288 struct zone_page_table_entry *zp;
2289 natural_t i, j;
2290
2291 #if ZONE_ALIAS_ADDR
2292 addr = zone_virtual_addr(addr);
2293 #endif
2294 #if MACH_ASSERT
2295 if (!from_zone_map(addr, size))
2296 panic("zone_page_collect");
2297 #endif
2298
2299 i = (natural_t)atop_kernel(addr-zone_map_min_address);
2300 j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
2301
2302 for (zp = zone_page_table + i; i <= j; zp++, i++)
2303 ++zp->collect_count;
2304 }
2305
2306 void
2307 zone_page_init(
2308 vm_offset_t addr,
2309 vm_size_t size,
2310 int value)
2311 {
2312 struct zone_page_table_entry *zp;
2313 natural_t i, j;
2314
2315 #if ZONE_ALIAS_ADDR
2316 addr = zone_virtual_addr(addr);
2317 #endif
2318 #if MACH_ASSERT
2319 if (!from_zone_map(addr, size))
2320 panic("zone_page_init");
2321 #endif
2322
2323 i = (natural_t)atop_kernel(addr-zone_map_min_address);
2324 j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
2325
2326 for (zp = zone_page_table + i; i <= j; zp++, i++) {
2327 zp->alloc_count = value;
2328 zp->collect_count = 0;
2329 }
2330 }
2331
2332 void
2333 zone_page_alloc(
2334 vm_offset_t addr,
2335 vm_size_t size)
2336 {
2337 struct zone_page_table_entry *zp;
2338 natural_t i, j;
2339
2340 #if ZONE_ALIAS_ADDR
2341 addr = zone_virtual_addr(addr);
2342 #endif
2343 #if MACH_ASSERT
2344 if (!from_zone_map(addr, size))
2345 panic("zone_page_alloc");
2346 #endif
2347
2348 i = (natural_t)atop_kernel(addr-zone_map_min_address);
2349 j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
2350
2351 for (zp = zone_page_table + i; i <= j; zp++, i++) {
2352 /*
2353 * Set alloc_count to (ZONE_PAGE_USED + 1) if
2354 * it was previously set to ZONE_PAGE_UNUSED.
2355 */
2356 if (zp->alloc_count == ZONE_PAGE_UNUSED)
2357 zp->alloc_count = 1;
2358 else
2359 ++zp->alloc_count;
2360 }
2361 }
2362
2363 void
2364 zone_page_free_element(
2365 struct zone_page_table_entry **free_pages,
2366 vm_offset_t addr,
2367 vm_size_t size)
2368 {
2369 struct zone_page_table_entry *zp;
2370 natural_t i, j;
2371
2372 #if ZONE_ALIAS_ADDR
2373 addr = zone_virtual_addr(addr);
2374 #endif
2375 #if MACH_ASSERT
2376 if (!from_zone_map(addr, size))
2377 panic("zone_page_free_element");
2378 #endif
2379
2380 i = (natural_t)atop_kernel(addr-zone_map_min_address);
2381 j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
2382
2383 for (zp = zone_page_table + i; i <= j; zp++, i++) {
2384 if (zp->collect_count > 0)
2385 --zp->collect_count;
2386 if (--zp->alloc_count == 0) {
2387 zp->alloc_count = ZONE_PAGE_UNUSED;
2388 zp->collect_count = 0;
2389
2390 zp->link = *free_pages;
2391 *free_pages = zp;
2392 }
2393 }
2394 }
2395
2396
2397 /* This is used for walking through a zone's free element list.
2398 */
2399 struct zone_free_element {
2400 struct zone_free_element * next;
2401 };
2402
2403 /*
2404 * Add a linked list of pages starting at base back into the zone
2405 * free list. Tail points to the last element on the list.
2406 */
2407
2408 #define ADD_LIST_TO_ZONE(zone, base, tail) \
2409 MACRO_BEGIN \
2410 (tail)->next = (void *)((zone)->free_elements); \
2411 if (check_freed_element) { \
2412 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
2413 ((vm_offset_t *)(tail))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
2414 (zone)->free_elements; \
2415 } \
2416 (zone)->free_elements = (unsigned long)(base); \
2417 MACRO_END
2418
2419 /*
2420 * Add an element to the chain pointed to by prev.
2421 */
2422
2423 #define ADD_ELEMENT(zone, prev, elem) \
2424 MACRO_BEGIN \
2425 (prev)->next = (elem); \
2426 if (check_freed_element) { \
2427 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
2428 ((vm_offset_t *)(prev))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
2429 (vm_offset_t)(elem); \
2430 } \
2431 MACRO_END
2432
2433 struct {
2434 uint32_t pgs_freed;
2435
2436 uint32_t elems_collected,
2437 elems_freed,
2438 elems_kept;
2439 } zgc_stats;
2440
2441 /* Zone garbage collection
2442 *
2443 * zone_gc will walk through all the free elements in all the
2444 * zones that are marked collectable looking for reclaimable
2445 * pages. zone_gc is called by consider_zone_gc when the system
2446 * begins to run out of memory.
2447 */
2448 void
2449 zone_gc(void)
2450 {
2451 unsigned int max_zones;
2452 zone_t z;
2453 unsigned int i;
2454 struct zone_page_table_entry *zp, *zone_free_pages;
2455
2456 lck_mtx_lock(&zone_gc_lock);
2457
2458 simple_lock(&all_zones_lock);
2459 max_zones = num_zones;
2460 z = first_zone;
2461 simple_unlock(&all_zones_lock);
2462
2463 #if MACH_ASSERT
2464 for (i = 0; i < zone_pages; i++)
2465 assert(zone_page_table[i].collect_count == 0);
2466 #endif /* MACH_ASSERT */
2467
2468 zone_free_pages = NULL;
2469
2470 for (i = 0; i < max_zones; i++, z = z->next_zone) {
2471 unsigned int n, m;
2472 vm_size_t elt_size, size_freed;
2473 struct zone_free_element *elt, *base_elt, *base_prev, *prev, *scan, *keep, *tail;
2474
2475 assert(z != ZONE_NULL);
2476
2477 if (!z->collectable)
2478 continue;
2479
2480 lock_zone(z);
2481
2482 elt_size = z->elem_size;
2483
2484 /*
2485 * Do a quick feasability check before we scan the zone:
2486 * skip unless there is likelihood of getting pages back
2487 * (i.e we need a whole allocation block's worth of free
2488 * elements before we can garbage collect) and
2489 * the zone has more than 10 percent of it's elements free
2490 * or the element size is a multiple of the PAGE_SIZE
2491 */
2492 if ((elt_size & PAGE_MASK) &&
2493 (((z->cur_size - z->count * elt_size) <= (2 * z->alloc_size)) ||
2494 ((z->cur_size - z->count * elt_size) <= (z->cur_size / 10)))) {
2495 unlock_zone(z);
2496 continue;
2497 }
2498
2499 z->doing_gc = TRUE;
2500
2501 /*
2502 * Snatch all of the free elements away from the zone.
2503 */
2504
2505 scan = (void *)z->free_elements;
2506 z->free_elements = 0;
2507
2508 unlock_zone(z);
2509
2510 /*
2511 * Pass 1:
2512 *
2513 * Determine which elements we can attempt to collect
2514 * and count them up in the page table. Foreign elements
2515 * are returned to the zone.
2516 */
2517
2518 prev = (void *)&scan;
2519 elt = scan;
2520 n = 0; tail = keep = NULL;
2521 while (elt != NULL) {
2522 if (from_zone_map(elt, elt_size)) {
2523 zone_page_collect((vm_offset_t)elt, elt_size);
2524
2525 prev = elt;
2526 elt = elt->next;
2527
2528 ++zgc_stats.elems_collected;
2529 }
2530 else {
2531 if (keep == NULL)
2532 keep = tail = elt;
2533 else {
2534 ADD_ELEMENT(z, tail, elt);
2535 tail = elt;
2536 }
2537
2538 ADD_ELEMENT(z, prev, elt->next);
2539 elt = elt->next;
2540 ADD_ELEMENT(z, tail, NULL);
2541 }
2542
2543 /*
2544 * Dribble back the elements we are keeping.
2545 */
2546
2547 if (++n >= 50) {
2548 if (z->waiting == TRUE) {
2549 lock_zone(z);
2550
2551 if (keep != NULL) {
2552 ADD_LIST_TO_ZONE(z, keep, tail);
2553 tail = keep = NULL;
2554 } else {
2555 m =0;
2556 base_elt = elt;
2557 base_prev = prev;
2558 while ((elt != NULL) && (++m < 50)) {
2559 prev = elt;
2560 elt = elt->next;
2561 }
2562 if (m !=0 ) {
2563 ADD_LIST_TO_ZONE(z, base_elt, prev);
2564 ADD_ELEMENT(z, base_prev, elt);
2565 prev = base_prev;
2566 }
2567 }
2568
2569 if (z->waiting) {
2570 z->waiting = FALSE;
2571 zone_wakeup(z);
2572 }
2573
2574 unlock_zone(z);
2575 }
2576 n =0;
2577 }
2578 }
2579
2580 /*
2581 * Return any remaining elements.
2582 */
2583
2584 if (keep != NULL) {
2585 lock_zone(z);
2586
2587 ADD_LIST_TO_ZONE(z, keep, tail);
2588
2589 unlock_zone(z);
2590 }
2591
2592 /*
2593 * Pass 2:
2594 *
2595 * Determine which pages we can reclaim and
2596 * free those elements.
2597 */
2598
2599 size_freed = 0;
2600 elt = scan;
2601 n = 0; tail = keep = NULL;
2602 while (elt != NULL) {
2603 if (zone_page_collectable((vm_offset_t)elt, elt_size)) {
2604 size_freed += elt_size;
2605 zone_page_free_element(&zone_free_pages,
2606 (vm_offset_t)elt, elt_size);
2607
2608 elt = elt->next;
2609
2610 ++zgc_stats.elems_freed;
2611 }
2612 else {
2613 zone_page_keep((vm_offset_t)elt, elt_size);
2614
2615 if (keep == NULL)
2616 keep = tail = elt;
2617 else {
2618 ADD_ELEMENT(z, tail, elt);
2619 tail = elt;
2620 }
2621
2622 elt = elt->next;
2623 ADD_ELEMENT(z, tail, NULL);
2624
2625 ++zgc_stats.elems_kept;
2626 }
2627
2628 /*
2629 * Dribble back the elements we are keeping,
2630 * and update the zone size info.
2631 */
2632
2633 if (++n >= 50) {
2634 lock_zone(z);
2635
2636 z->cur_size -= size_freed;
2637 size_freed = 0;
2638
2639 if (keep != NULL) {
2640 ADD_LIST_TO_ZONE(z, keep, tail);
2641 }
2642
2643 if (z->waiting) {
2644 z->waiting = FALSE;
2645 zone_wakeup(z);
2646 }
2647
2648 unlock_zone(z);
2649
2650 n = 0; tail = keep = NULL;
2651 }
2652 }
2653
2654 /*
2655 * Return any remaining elements, and update
2656 * the zone size info.
2657 */
2658
2659 lock_zone(z);
2660
2661 if (size_freed > 0 || keep != NULL) {
2662
2663 z->cur_size -= size_freed;
2664
2665 if (keep != NULL) {
2666 ADD_LIST_TO_ZONE(z, keep, tail);
2667 }
2668
2669 }
2670
2671 z->doing_gc = FALSE;
2672 if (z->waiting) {
2673 z->waiting = FALSE;
2674 zone_wakeup(z);
2675 }
2676 unlock_zone(z);
2677 }
2678
2679 /*
2680 * Reclaim the pages we are freeing.
2681 */
2682
2683 while ((zp = zone_free_pages) != NULL) {
2684 zone_free_pages = zp->link;
2685 #if ZONE_ALIAS_ADDR
2686 z = (zone_t)zone_virtual_addr((vm_map_address_t)z);
2687 #endif
2688 kmem_free(zone_map, zone_map_min_address + PAGE_SIZE *
2689 (zp - zone_page_table), PAGE_SIZE);
2690 ++zgc_stats.pgs_freed;
2691 }
2692
2693 lck_mtx_unlock(&zone_gc_lock);
2694 }
2695
2696 /*
2697 * consider_zone_gc:
2698 *
2699 * Called by the pageout daemon when the system needs more free pages.
2700 */
2701
2702 void
2703 consider_zone_gc(boolean_t force)
2704 {
2705
2706 if (zone_gc_allowed &&
2707 (zone_gc_allowed_by_time_throttle ||
2708 zone_gc_forced ||
2709 force)) {
2710 zone_gc_forced = FALSE;
2711 zone_gc_allowed_by_time_throttle = FALSE; /* reset periodically */
2712 zone_gc();
2713 }
2714 }
2715
2716 /*
2717 * By default, don't attempt zone GC more frequently
2718 * than once / 1 minutes.
2719 */
2720 void
2721 compute_zone_gc_throttle(void *arg __unused)
2722 {
2723 zone_gc_allowed_by_time_throttle = TRUE;
2724 }
2725
2726
2727 kern_return_t
2728 task_zone_info(
2729 task_t task,
2730 mach_zone_name_array_t *namesp,
2731 mach_msg_type_number_t *namesCntp,
2732 task_zone_info_array_t *infop,
2733 mach_msg_type_number_t *infoCntp)
2734 {
2735 mach_zone_name_t *names;
2736 vm_offset_t names_addr;
2737 vm_size_t names_size;
2738 task_zone_info_t *info;
2739 vm_offset_t info_addr;
2740 vm_size_t info_size;
2741 unsigned int max_zones, i;
2742 zone_t z;
2743 mach_zone_name_t *zn;
2744 task_zone_info_t *zi;
2745 kern_return_t kr;
2746
2747 vm_size_t used;
2748 vm_map_copy_t copy;
2749
2750
2751 if (task == TASK_NULL)
2752 return KERN_INVALID_TASK;
2753
2754 /*
2755 * We assume that zones aren't freed once allocated.
2756 * We won't pick up any zones that are allocated later.
2757 */
2758
2759 simple_lock(&all_zones_lock);
2760 max_zones = (unsigned int)(num_zones + num_fake_zones);
2761 z = first_zone;
2762 simple_unlock(&all_zones_lock);
2763
2764 names_size = round_page(max_zones * sizeof *names);
2765 kr = kmem_alloc_pageable(ipc_kernel_map,
2766 &names_addr, names_size);
2767 if (kr != KERN_SUCCESS)
2768 return kr;
2769 names = (mach_zone_name_t *) names_addr;
2770
2771 info_size = round_page(max_zones * sizeof *info);
2772 kr = kmem_alloc_pageable(ipc_kernel_map,
2773 &info_addr, info_size);
2774 if (kr != KERN_SUCCESS) {
2775 kmem_free(ipc_kernel_map,
2776 names_addr, names_size);
2777 return kr;
2778 }
2779
2780 info = (task_zone_info_t *) info_addr;
2781
2782 zn = &names[0];
2783 zi = &info[0];
2784
2785 for (i = 0; i < max_zones - num_fake_zones; i++) {
2786 struct zone zcopy;
2787
2788 assert(z != ZONE_NULL);
2789
2790 lock_zone(z);
2791 zcopy = *z;
2792 unlock_zone(z);
2793
2794 simple_lock(&all_zones_lock);
2795 z = z->next_zone;
2796 simple_unlock(&all_zones_lock);
2797
2798 /* assuming here the name data is static */
2799 (void) strncpy(zn->mzn_name, zcopy.zone_name,
2800 sizeof zn->mzn_name);
2801 zn->mzn_name[sizeof zn->mzn_name - 1] = '\0';
2802
2803 zi->tzi_count = (uint64_t)zcopy.count;
2804 zi->tzi_cur_size = (uint64_t)zcopy.cur_size;
2805 zi->tzi_max_size = (uint64_t)zcopy.max_size;
2806 zi->tzi_elem_size = (uint64_t)zcopy.elem_size;
2807 zi->tzi_alloc_size = (uint64_t)zcopy.alloc_size;
2808 zi->tzi_sum_size = zcopy.sum_count * zcopy.elem_size;
2809 zi->tzi_exhaustible = (uint64_t)zcopy.exhaustible;
2810 zi->tzi_collectable = (uint64_t)zcopy.collectable;
2811 zi->tzi_caller_acct = (uint64_t)zcopy.caller_acct;
2812 if (task->tkm_zinfo != NULL) {
2813 zi->tzi_task_alloc = task->tkm_zinfo[zcopy.index].alloc;
2814 zi->tzi_task_free = task->tkm_zinfo[zcopy.index].free;
2815 } else {
2816 zi->tzi_task_alloc = 0;
2817 zi->tzi_task_free = 0;
2818 }
2819 zn++;
2820 zi++;
2821 }
2822
2823 /*
2824 * loop through the fake zones and fill them using the specialized
2825 * functions
2826 */
2827 for (i = 0; i < num_fake_zones; i++) {
2828 int count, collectable, exhaustible, caller_acct, index;
2829 vm_size_t cur_size, max_size, elem_size, alloc_size;
2830 uint64_t sum_size;
2831
2832 strncpy(zn->mzn_name, fake_zones[i].name, sizeof zn->mzn_name);
2833 zn->mzn_name[sizeof zn->mzn_name - 1] = '\0';
2834 fake_zones[i].query(&count, &cur_size,
2835 &max_size, &elem_size,
2836 &alloc_size, &sum_size,
2837 &collectable, &exhaustible, &caller_acct);
2838 zi->tzi_count = (uint64_t)count;
2839 zi->tzi_cur_size = (uint64_t)cur_size;
2840 zi->tzi_max_size = (uint64_t)max_size;
2841 zi->tzi_elem_size = (uint64_t)elem_size;
2842 zi->tzi_alloc_size = (uint64_t)alloc_size;
2843 zi->tzi_sum_size = sum_size;
2844 zi->tzi_collectable = (uint64_t)collectable;
2845 zi->tzi_exhaustible = (uint64_t)exhaustible;
2846 zi->tzi_caller_acct = (uint64_t)caller_acct;
2847 if (task->tkm_zinfo != NULL) {
2848 index = ZINFO_SLOTS - num_fake_zones + i;
2849 zi->tzi_task_alloc = task->tkm_zinfo[index].alloc;
2850 zi->tzi_task_free = task->tkm_zinfo[index].free;
2851 } else {
2852 zi->tzi_task_alloc = 0;
2853 zi->tzi_task_free = 0;
2854 }
2855 zn++;
2856 zi++;
2857 }
2858
2859 used = max_zones * sizeof *names;
2860 if (used != names_size)
2861 bzero((char *) (names_addr + used), names_size - used);
2862
2863 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
2864 (vm_map_size_t)names_size, TRUE, &copy);
2865 assert(kr == KERN_SUCCESS);
2866
2867 *namesp = (mach_zone_name_t *) copy;
2868 *namesCntp = max_zones;
2869
2870 used = max_zones * sizeof *info;
2871
2872 if (used != info_size)
2873 bzero((char *) (info_addr + used), info_size - used);
2874
2875 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
2876 (vm_map_size_t)info_size, TRUE, &copy);
2877 assert(kr == KERN_SUCCESS);
2878
2879 *infop = (task_zone_info_t *) copy;
2880 *infoCntp = max_zones;
2881
2882 return KERN_SUCCESS;
2883 }
2884
2885 kern_return_t
2886 mach_zone_info(
2887 host_t host,
2888 mach_zone_name_array_t *namesp,
2889 mach_msg_type_number_t *namesCntp,
2890 mach_zone_info_array_t *infop,
2891 mach_msg_type_number_t *infoCntp)
2892 {
2893 mach_zone_name_t *names;
2894 vm_offset_t names_addr;
2895 vm_size_t names_size;
2896 mach_zone_info_t *info;
2897 vm_offset_t info_addr;
2898 vm_size_t info_size;
2899 unsigned int max_zones, i;
2900 zone_t z;
2901 mach_zone_name_t *zn;
2902 mach_zone_info_t *zi;
2903 kern_return_t kr;
2904
2905 vm_size_t used;
2906 vm_map_copy_t copy;
2907
2908
2909 if (host == HOST_NULL)
2910 return KERN_INVALID_HOST;
2911
2912 num_fake_zones = sizeof fake_zones / sizeof fake_zones[0];
2913
2914 /*
2915 * We assume that zones aren't freed once allocated.
2916 * We won't pick up any zones that are allocated later.
2917 */
2918
2919 simple_lock(&all_zones_lock);
2920 max_zones = (unsigned int)(num_zones + num_fake_zones);
2921 z = first_zone;
2922 simple_unlock(&all_zones_lock);
2923
2924 names_size = round_page(max_zones * sizeof *names);
2925 kr = kmem_alloc_pageable(ipc_kernel_map,
2926 &names_addr, names_size);
2927 if (kr != KERN_SUCCESS)
2928 return kr;
2929 names = (mach_zone_name_t *) names_addr;
2930
2931 info_size = round_page(max_zones * sizeof *info);
2932 kr = kmem_alloc_pageable(ipc_kernel_map,
2933 &info_addr, info_size);
2934 if (kr != KERN_SUCCESS) {
2935 kmem_free(ipc_kernel_map,
2936 names_addr, names_size);
2937 return kr;
2938 }
2939
2940 info = (mach_zone_info_t *) info_addr;
2941
2942 zn = &names[0];
2943 zi = &info[0];
2944
2945 for (i = 0; i < max_zones - num_fake_zones; i++) {
2946 struct zone zcopy;
2947
2948 assert(z != ZONE_NULL);
2949
2950 lock_zone(z);
2951 zcopy = *z;
2952 unlock_zone(z);
2953
2954 simple_lock(&all_zones_lock);
2955 z = z->next_zone;
2956 simple_unlock(&all_zones_lock);
2957
2958 /* assuming here the name data is static */
2959 (void) strncpy(zn->mzn_name, zcopy.zone_name,
2960 sizeof zn->mzn_name);
2961 zn->mzn_name[sizeof zn->mzn_name - 1] = '\0';
2962
2963 zi->mzi_count = (uint64_t)zcopy.count;
2964 zi->mzi_cur_size = (uint64_t)zcopy.cur_size;
2965 zi->mzi_max_size = (uint64_t)zcopy.max_size;
2966 zi->mzi_elem_size = (uint64_t)zcopy.elem_size;
2967 zi->mzi_alloc_size = (uint64_t)zcopy.alloc_size;
2968 zi->mzi_sum_size = zcopy.sum_count * zcopy.elem_size;
2969 zi->mzi_exhaustible = (uint64_t)zcopy.exhaustible;
2970 zi->mzi_collectable = (uint64_t)zcopy.collectable;
2971 zn++;
2972 zi++;
2973 }
2974
2975 /*
2976 * loop through the fake zones and fill them using the specialized
2977 * functions
2978 */
2979 for (i = 0; i < num_fake_zones; i++) {
2980 int count, collectable, exhaustible, caller_acct;
2981 vm_size_t cur_size, max_size, elem_size, alloc_size;
2982 uint64_t sum_size;
2983
2984 strncpy(zn->mzn_name, fake_zones[i].name, sizeof zn->mzn_name);
2985 zn->mzn_name[sizeof zn->mzn_name - 1] = '\0';
2986 fake_zones[i].query(&count, &cur_size,
2987 &max_size, &elem_size,
2988 &alloc_size, &sum_size,
2989 &collectable, &exhaustible, &caller_acct);
2990 zi->mzi_count = (uint64_t)count;
2991 zi->mzi_cur_size = (uint64_t)cur_size;
2992 zi->mzi_max_size = (uint64_t)max_size;
2993 zi->mzi_elem_size = (uint64_t)elem_size;
2994 zi->mzi_alloc_size = (uint64_t)alloc_size;
2995 zi->mzi_sum_size = sum_size;
2996 zi->mzi_collectable = (uint64_t)collectable;
2997 zi->mzi_exhaustible = (uint64_t)exhaustible;
2998
2999 zn++;
3000 zi++;
3001 }
3002
3003 used = max_zones * sizeof *names;
3004 if (used != names_size)
3005 bzero((char *) (names_addr + used), names_size - used);
3006
3007 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
3008 (vm_map_size_t)names_size, TRUE, &copy);
3009 assert(kr == KERN_SUCCESS);
3010
3011 *namesp = (mach_zone_name_t *) copy;
3012 *namesCntp = max_zones;
3013
3014 used = max_zones * sizeof *info;
3015
3016 if (used != info_size)
3017 bzero((char *) (info_addr + used), info_size - used);
3018
3019 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
3020 (vm_map_size_t)info_size, TRUE, &copy);
3021 assert(kr == KERN_SUCCESS);
3022
3023 *infop = (mach_zone_info_t *) copy;
3024 *infoCntp = max_zones;
3025
3026 return KERN_SUCCESS;
3027 }
3028
3029 /*
3030 * host_zone_info - LEGACY user interface for Mach zone information
3031 * Should use mach_zone_info() instead!
3032 */
3033 kern_return_t
3034 host_zone_info(
3035 host_t host,
3036 zone_name_array_t *namesp,
3037 mach_msg_type_number_t *namesCntp,
3038 zone_info_array_t *infop,
3039 mach_msg_type_number_t *infoCntp)
3040 {
3041 zone_name_t *names;
3042 vm_offset_t names_addr;
3043 vm_size_t names_size;
3044 zone_info_t *info;
3045 vm_offset_t info_addr;
3046 vm_size_t info_size;
3047 unsigned int max_zones, i;
3048 zone_t z;
3049 zone_name_t *zn;
3050 zone_info_t *zi;
3051 kern_return_t kr;
3052
3053 vm_size_t used;
3054 vm_map_copy_t copy;
3055
3056
3057 if (host == HOST_NULL)
3058 return KERN_INVALID_HOST;
3059
3060 #if defined(__LP64__)
3061 if (!thread_is_64bit(current_thread()))
3062 return KERN_NOT_SUPPORTED;
3063 #else
3064 if (thread_is_64bit(current_thread()))
3065 return KERN_NOT_SUPPORTED;
3066 #endif
3067
3068 num_fake_zones = sizeof fake_zones / sizeof fake_zones[0];
3069
3070 /*
3071 * We assume that zones aren't freed once allocated.
3072 * We won't pick up any zones that are allocated later.
3073 */
3074
3075 simple_lock(&all_zones_lock);
3076 max_zones = (unsigned int)(num_zones + num_fake_zones);
3077 z = first_zone;
3078 simple_unlock(&all_zones_lock);
3079
3080 names_size = round_page(max_zones * sizeof *names);
3081 kr = kmem_alloc_pageable(ipc_kernel_map,
3082 &names_addr, names_size);
3083 if (kr != KERN_SUCCESS)
3084 return kr;
3085 names = (zone_name_t *) names_addr;
3086
3087 info_size = round_page(max_zones * sizeof *info);
3088 kr = kmem_alloc_pageable(ipc_kernel_map,
3089 &info_addr, info_size);
3090 if (kr != KERN_SUCCESS) {
3091 kmem_free(ipc_kernel_map,
3092 names_addr, names_size);
3093 return kr;
3094 }
3095
3096 info = (zone_info_t *) info_addr;
3097
3098 zn = &names[0];
3099 zi = &info[0];
3100
3101 for (i = 0; i < max_zones - num_fake_zones; i++) {
3102 struct zone zcopy;
3103
3104 assert(z != ZONE_NULL);
3105
3106 lock_zone(z);
3107 zcopy = *z;
3108 unlock_zone(z);
3109
3110 simple_lock(&all_zones_lock);
3111 z = z->next_zone;
3112 simple_unlock(&all_zones_lock);
3113
3114 /* assuming here the name data is static */
3115 (void) strncpy(zn->zn_name, zcopy.zone_name,
3116 sizeof zn->zn_name);
3117 zn->zn_name[sizeof zn->zn_name - 1] = '\0';
3118
3119 zi->zi_count = zcopy.count;
3120 zi->zi_cur_size = zcopy.cur_size;
3121 zi->zi_max_size = zcopy.max_size;
3122 zi->zi_elem_size = zcopy.elem_size;
3123 zi->zi_alloc_size = zcopy.alloc_size;
3124 zi->zi_exhaustible = zcopy.exhaustible;
3125 zi->zi_collectable = zcopy.collectable;
3126
3127 zn++;
3128 zi++;
3129 }
3130
3131 /*
3132 * loop through the fake zones and fill them using the specialized
3133 * functions
3134 */
3135 for (i = 0; i < num_fake_zones; i++) {
3136 int caller_acct;
3137 uint64_t sum_space;
3138 strncpy(zn->zn_name, fake_zones[i].name, sizeof zn->zn_name);
3139 zn->zn_name[sizeof zn->zn_name - 1] = '\0';
3140 fake_zones[i].query(&zi->zi_count, &zi->zi_cur_size,
3141 &zi->zi_max_size, &zi->zi_elem_size,
3142 &zi->zi_alloc_size, &sum_space,
3143 &zi->zi_collectable, &zi->zi_exhaustible, &caller_acct);
3144 zn++;
3145 zi++;
3146 }
3147
3148 used = max_zones * sizeof *names;
3149 if (used != names_size)
3150 bzero((char *) (names_addr + used), names_size - used);
3151
3152 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
3153 (vm_map_size_t)names_size, TRUE, &copy);
3154 assert(kr == KERN_SUCCESS);
3155
3156 *namesp = (zone_name_t *) copy;
3157 *namesCntp = max_zones;
3158
3159 used = max_zones * sizeof *info;
3160 if (used != info_size)
3161 bzero((char *) (info_addr + used), info_size - used);
3162
3163 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
3164 (vm_map_size_t)info_size, TRUE, &copy);
3165 assert(kr == KERN_SUCCESS);
3166
3167 *infop = (zone_info_t *) copy;
3168 *infoCntp = max_zones;
3169
3170 return KERN_SUCCESS;
3171 }
3172
3173 extern unsigned int stack_total;
3174 extern unsigned long long stack_allocs;
3175
3176 #if defined(__i386__) || defined (__x86_64__)
3177 extern unsigned int inuse_ptepages_count;
3178 extern long long alloc_ptepages_count;
3179 #endif
3180
3181 void zone_display_zprint()
3182 {
3183 unsigned int i;
3184 zone_t the_zone;
3185
3186 if(first_zone!=NULL) {
3187 the_zone = first_zone;
3188 for (i = 0; i < num_zones; i++) {
3189 if(the_zone->cur_size > (1024*1024)) {
3190 printf("%.20s:\t%lu\n",the_zone->zone_name,(uintptr_t)the_zone->cur_size);
3191 }
3192
3193 if(the_zone->next_zone == NULL) {
3194 break;
3195 }
3196
3197 the_zone = the_zone->next_zone;
3198 }
3199 }
3200
3201 printf("Kernel Stacks:\t%lu\n",(uintptr_t)(kernel_stack_size * stack_total));
3202
3203 #if defined(__i386__) || defined (__x86_64__)
3204 printf("PageTables:\t%lu\n",(uintptr_t)(PAGE_SIZE * inuse_ptepages_count));
3205 #endif
3206
3207 printf("Kalloc.Large:\t%lu\n",(uintptr_t)kalloc_large_total);
3208 }
3209
3210
3211
3212 #if MACH_KDB
3213 #include <ddb/db_command.h>
3214 #include <ddb/db_output.h>
3215 #include <kern/kern_print.h>
3216
3217 const char *zone_labels =
3218 "ENTRY COUNT TOT_SZ MAX_SZ ELT_SZ ALLOC_SZ NAME";
3219
3220 /* Forwards */
3221 void db_print_zone(
3222 zone_t addr);
3223
3224 #if ZONE_DEBUG
3225 void db_zone_check_active(
3226 zone_t zone);
3227 void db_zone_print_active(
3228 zone_t zone);
3229 #endif /* ZONE_DEBUG */
3230 void db_zone_print_free(
3231 zone_t zone);
3232 void
3233 db_print_zone(
3234 zone_t addr)
3235 {
3236 struct zone zcopy;
3237
3238 zcopy = *addr;
3239
3240 db_printf("%8x %8x %8x %8x %6x %8x %s ",
3241 addr, zcopy.count, zcopy.cur_size,
3242 zcopy.max_size, zcopy.elem_size,
3243 zcopy.alloc_size, zcopy.zone_name);
3244 if (zcopy.exhaustible)
3245 db_printf("H");
3246 if (zcopy.collectable)
3247 db_printf("C");
3248 if (zcopy.expandable)
3249 db_printf("X");
3250 if (zcopy.caller_acct)
3251 db_printf("A");
3252 db_printf("\n");
3253 }
3254
3255 /*ARGSUSED*/
3256 void
3257 db_show_one_zone(db_expr_t addr, boolean_t have_addr,
3258 __unused db_expr_t count, __unused char *modif)
3259 {
3260 struct zone *z = (zone_t)((char *)0 + addr);
3261
3262 if (z == ZONE_NULL || !have_addr){
3263 db_error("No Zone\n");
3264 /*NOTREACHED*/
3265 }
3266
3267 db_printf("%s\n", zone_labels);
3268 db_print_zone(z);
3269 }
3270
3271 /*ARGSUSED*/
3272 void
3273 db_show_all_zones(__unused db_expr_t addr, boolean_t have_addr, db_expr_t count,
3274 __unused char *modif)
3275 {
3276 zone_t z;
3277 unsigned total = 0;
3278
3279 /*
3280 * Don't risk hanging by unconditionally locking,
3281 * risk of incoherent data is small (zones aren't freed).
3282 */
3283 have_addr = simple_lock_try(&all_zones_lock);
3284 count = num_zones;
3285 z = first_zone;
3286 if (have_addr) {
3287 simple_unlock(&all_zones_lock);
3288 }
3289
3290 db_printf("%s\n", zone_labels);
3291 for ( ; count > 0; count--) {
3292 if (!z) {
3293 db_error("Mangled Zone List\n");
3294 /*NOTREACHED*/
3295 }
3296 db_print_zone(z);
3297 total += z->cur_size,
3298
3299 have_addr = simple_lock_try(&all_zones_lock);
3300 z = z->next_zone;
3301 if (have_addr) {
3302 simple_unlock(&all_zones_lock);
3303 }
3304 }
3305 db_printf("\nTotal %8x", total);
3306 db_printf("\n\nzone_gc() has reclaimed %d pages\n", zgc_stats.pgs_freed);
3307 }
3308
3309 #if ZONE_DEBUG
3310 void
3311 db_zone_check_active(
3312 zone_t zone)
3313 {
3314 int count = 0;
3315 queue_t tmp_elem;
3316
3317 if (!zone_debug_enabled(zone) || !zone_check)
3318 return;
3319 tmp_elem = queue_first(&zone->active_zones);
3320 while (count < zone->count) {
3321 count++;
3322 if (tmp_elem == 0) {
3323 printf("unexpected zero element, zone=%p, count=%d\n",
3324 zone, count);
3325 assert(FALSE);
3326 break;
3327 }
3328 if (queue_end(tmp_elem, &zone->active_zones)) {
3329 printf("unexpected queue_end, zone=%p, count=%d\n",
3330 zone, count);
3331 assert(FALSE);
3332 break;
3333 }
3334 tmp_elem = queue_next(tmp_elem);
3335 }
3336 if (!queue_end(tmp_elem, &zone->active_zones)) {
3337 printf("not at queue_end, zone=%p, tmp_elem=%p\n",
3338 zone, tmp_elem);
3339 assert(FALSE);
3340 }
3341 }
3342
3343 void
3344 db_zone_print_active(
3345 zone_t zone)
3346 {
3347 int count = 0;
3348 queue_t tmp_elem;
3349
3350 if (!zone_debug_enabled(zone)) {
3351 printf("zone %p debug not enabled\n", zone);
3352 return;
3353 }
3354 if (!zone_check) {
3355 printf("zone_check FALSE\n");
3356 return;
3357 }
3358
3359 printf("zone %p, active elements %d\n", zone, zone->count);
3360 printf("active list:\n");
3361 tmp_elem = queue_first(&zone->active_zones);
3362 while (count < zone->count) {
3363 printf(" %p", tmp_elem);
3364 count++;
3365 if ((count % 6) == 0)
3366 printf("\n");
3367 if (tmp_elem == 0) {
3368 printf("\nunexpected zero element, count=%d\n", count);
3369 break;
3370 }
3371 if (queue_end(tmp_elem, &zone->active_zones)) {
3372 printf("\nunexpected queue_end, count=%d\n", count);
3373 break;
3374 }
3375 tmp_elem = queue_next(tmp_elem);
3376 }
3377 if (!queue_end(tmp_elem, &zone->active_zones))
3378 printf("\nnot at queue_end, tmp_elem=%p\n", tmp_elem);
3379 else
3380 printf("\n");
3381 }
3382 #endif /* ZONE_DEBUG */
3383
3384 void
3385 db_zone_print_free(
3386 zone_t zone)
3387 {
3388 int count = 0;
3389 int freecount;
3390 vm_offset_t elem;
3391
3392 freecount = zone_free_count(zone);
3393 printf("zone %p, free elements %d\n", zone, freecount);
3394 printf("free list:\n");
3395 elem = zone->free_elements;
3396 while (count < freecount) {
3397 printf(" 0x%x", elem);
3398 count++;
3399 if ((count % 6) == 0)
3400 printf("\n");
3401 if (elem == 0) {
3402 printf("\nunexpected zero element, count=%d\n", count);
3403 break;
3404 }
3405 elem = *((vm_offset_t *)elem);
3406 }
3407 if (elem != 0)
3408 printf("\nnot at end of free list, elem=0x%x\n", elem);
3409 else
3410 printf("\n");
3411 }
3412
3413 #endif /* MACH_KDB */
3414
3415
3416 #if ZONE_DEBUG
3417
3418 /* should we care about locks here ? */
3419
3420 #if MACH_KDB
3421 void *
3422 next_element(
3423 zone_t z,
3424 void *prev)
3425 {
3426 char *elt = (char *)prev;
3427
3428 if (!zone_debug_enabled(z))
3429 return(NULL);
3430 elt -= ZONE_DEBUG_OFFSET;
3431 elt = (char *) queue_next((queue_t) elt);
3432 if ((queue_t) elt == &z->active_zones)
3433 return(NULL);
3434 elt += ZONE_DEBUG_OFFSET;
3435 return(elt);
3436 }
3437
3438 void *
3439 first_element(
3440 zone_t z)
3441 {
3442 char *elt;
3443
3444 if (!zone_debug_enabled(z))
3445 return(NULL);
3446 if (queue_empty(&z->active_zones))
3447 return(NULL);
3448 elt = (char *)queue_first(&z->active_zones);
3449 elt += ZONE_DEBUG_OFFSET;
3450 return(elt);
3451 }
3452
3453 /*
3454 * Second arg controls how many zone elements are printed:
3455 * 0 => none
3456 * n, n < 0 => all
3457 * n, n > 0 => last n on active list
3458 */
3459 int
3460 zone_count(
3461 zone_t z,
3462 int tail)
3463 {
3464 void *elt;
3465 int count = 0;
3466 boolean_t print = (tail != 0);
3467
3468 if (tail < 0)
3469 tail = z->count;
3470 if (z->count < tail)
3471 tail = 0;
3472 tail = z->count - tail;
3473 for (elt = first_element(z); elt; elt = next_element(z, elt)) {
3474 if (print && tail <= count)
3475 db_printf("%8x\n", elt);
3476 count++;
3477 }
3478 assert(count == z->count);
3479 return(count);
3480 }
3481 #endif /* MACH_KDB */
3482
3483 #define zone_in_use(z) ( z->count || z->free_elements )
3484
3485 void
3486 zone_debug_enable(
3487 zone_t z)
3488 {
3489 if (zone_debug_enabled(z) || zone_in_use(z) ||
3490 z->alloc_size < (z->elem_size + ZONE_DEBUG_OFFSET))
3491 return;
3492 queue_init(&z->active_zones);
3493 z->elem_size += ZONE_DEBUG_OFFSET;
3494 }
3495
3496 void
3497 zone_debug_disable(
3498 zone_t z)
3499 {
3500 if (!zone_debug_enabled(z) || zone_in_use(z))
3501 return;
3502 z->elem_size -= ZONE_DEBUG_OFFSET;
3503 z->active_zones.next = z->active_zones.prev = NULL;
3504 }
3505
3506
3507 #endif /* ZONE_DEBUG */