]> git.saurik.com Git - apple/xnu.git/blame - osfmk/kern/zalloc.c
xnu-1504.7.4.tar.gz
[apple/xnu.git] / osfmk / kern / zalloc.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: kern/zalloc.c
60 * Author: Avadis Tevanian, Jr.
61 *
62 * Zone-based memory allocator. A zone is a collection of fixed size
63 * data blocks for which quick allocation/deallocation is possible.
64 */
65#include <zone_debug.h>
2d21ac55 66#include <zone_alias_addr.h>
1c79356b
A
67#include <norma_vm.h>
68#include <mach_kdb.h>
91447636
A
69
70#include <mach/mach_types.h>
71#include <mach/vm_param.h>
72#include <mach/kern_return.h>
73#include <mach/mach_host_server.h>
74#include <mach/machine/vm_types.h>
75#include <mach_debug/zone_info.h>
76
77#include <kern/kern_types.h>
1c79356b 78#include <kern/assert.h>
91447636 79#include <kern/host.h>
1c79356b
A
80#include <kern/macro_help.h>
81#include <kern/sched.h>
b0d623f7 82#include <kern/locks.h>
1c79356b
A
83#include <kern/sched_prim.h>
84#include <kern/misc_protos.h>
0b4e3aa0 85#include <kern/thread_call.h>
1c79356b 86#include <kern/zalloc.h>
91447636
A
87#include <kern/kalloc.h>
88
89#include <vm/pmap.h>
90#include <vm/vm_map.h>
1c79356b 91#include <vm/vm_kern.h>
91447636
A
92#include <vm/vm_page.h>
93
1c79356b
A
94#include <machine/machparam.h>
95
2d21ac55
A
96#include <libkern/OSDebug.h>
97#include <sys/kdebug.h>
98
91447636
A
99#if defined(__ppc__)
100/* for fake zone stat routines */
101#include <ppc/savearea.h>
102#include <ppc/mappings.h>
103#endif
1c79356b 104
2d21ac55 105
c910b4d9
A
106/*
107 * Zone Corruption Debugging
108 *
109 * We provide three methods to detect use of a zone element after it's been freed. These
110 * checks are enabled by specifying "-zc" and/or "-zp" in the boot-args:
111 *
1c79356b
A
112 * (1) Range-check the free-list "next" ptr for sanity.
113 * (2) Store the ptr in two different words, and compare them against
c910b4d9
A
114 * each other when re-using the zone element, to detect modifications.
115 * (3) poison the freed memory by overwriting it with 0xdeadbeef.
116 *
117 * The first two checks are farily light weight and are enabled by specifying "-zc"
118 * in the boot-args. If you want more aggressive checking for use-after-free bugs
119 * and you don't mind the additional overhead, then turn on poisoning by adding
120 * "-zp" to the boot-args in addition to "-zc". If you specify -zp without -zc,
121 * it still poisons the memory when it's freed, but doesn't check if the memory
122 * has been altered later when it's reallocated.
1c79356b
A
123 */
124
c910b4d9
A
125boolean_t check_freed_element = FALSE; /* enabled by -zc in boot-args */
126boolean_t zfree_clear = FALSE; /* enabled by -zp in boot-args */
1c79356b 127
c910b4d9 128#define is_kernel_data_addr(a) (!(a) || ((a) >= vm_min_kernel_address && !((a) & 0x3)))
1c79356b
A
129
130#define ADD_TO_ZONE(zone, element) \
131MACRO_BEGIN \
c910b4d9
A
132 if (zfree_clear) \
133 { unsigned int i; \
134 for (i=0; \
135 i < zone->elem_size/sizeof(uint32_t); \
136 i++) \
137 ((uint32_t *)(element))[i] = 0xdeadbeef; \
1c79356b 138 } \
c910b4d9
A
139 *((vm_offset_t *)(element)) = (zone)->free_elements; \
140 if (check_freed_element) { \
141 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
142 ((vm_offset_t *)(element))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
143 (zone)->free_elements; \
1c79356b 144 } \
c910b4d9
A
145 (zone)->free_elements = (vm_offset_t) (element); \
146 (zone)->count--; \
1c79356b
A
147MACRO_END
148
c910b4d9
A
149#define REMOVE_FROM_ZONE(zone, ret, type) \
150MACRO_BEGIN \
151 (ret) = (type) (zone)->free_elements; \
152 if ((ret) != (type) 0) { \
153 if (check_freed_element) { \
154 if (!is_kernel_data_addr(((vm_offset_t *)(ret))[0]) || \
155 ((zone)->elem_size >= (2 * sizeof(vm_offset_t)) && \
156 ((vm_offset_t *)(ret))[((zone)->elem_size/sizeof(vm_offset_t))-1] != \
157 ((vm_offset_t *)(ret))[0])) \
158 panic("a freed zone element has been modified");\
159 if (zfree_clear) { \
160 unsigned int ii; \
161 for (ii = sizeof(vm_offset_t) / sizeof(uint32_t); \
162 ii < zone->elem_size/sizeof(uint32_t) - sizeof(vm_offset_t) / sizeof(uint32_t); \
163 ii++) \
164 if (((uint32_t *)(ret))[ii] != (uint32_t)0xdeadbeef) \
165 panic("a freed zone element has been modified");\
166 } \
167 } \
168 (zone)->count++; \
169 (zone)->free_elements = *((vm_offset_t *)(ret)); \
170 } \
171MACRO_END
1c79356b
A
172
173#if ZONE_DEBUG
174#define zone_debug_enabled(z) z->active_zones.next
55e303ae
A
175#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
176#define ZONE_DEBUG_OFFSET ROUNDUP(sizeof(queue_chain_t),16)
1c79356b
A
177#endif /* ZONE_DEBUG */
178
179/*
180 * Support for garbage collection of unused zone pages:
181 */
182
183struct zone_page_table_entry {
55e303ae 184 struct zone_page_table_entry *link;
1c79356b 185 short alloc_count;
55e303ae 186 short collect_count;
1c79356b
A
187};
188
1c79356b
A
189/* Forwards */
190void zone_page_init(
191 vm_offset_t addr,
192 vm_size_t size,
193 int value);
194
195void zone_page_alloc(
196 vm_offset_t addr,
197 vm_size_t size);
198
55e303ae
A
199void zone_page_free_element(
200 struct zone_page_table_entry **free_pages,
1c79356b
A
201 vm_offset_t addr,
202 vm_size_t size);
203
55e303ae 204void zone_page_collect(
1c79356b
A
205 vm_offset_t addr,
206 vm_size_t size);
207
208boolean_t zone_page_collectable(
209 vm_offset_t addr,
210 vm_size_t size);
211
212void zone_page_keep(
213 vm_offset_t addr,
214 vm_size_t size);
215
0b4e3aa0
A
216void zalloc_async(
217 thread_call_param_t p0,
218 thread_call_param_t p1);
219
b0d623f7 220void zone_display_zprint( void );
0b4e3aa0 221
1c79356b
A
222#if ZONE_DEBUG && MACH_KDB
223int zone_count(
224 zone_t z,
225 int tail);
226#endif /* ZONE_DEBUG && MACH_KDB */
227
228vm_map_t zone_map = VM_MAP_NULL;
229
230zone_t zone_zone = ZONE_NULL; /* the zone containing other zones */
231
232/*
233 * The VM system gives us an initial chunk of memory.
234 * It has to be big enough to allocate the zone_zone
235 */
236
237vm_offset_t zdata;
238vm_size_t zdata_size;
239
240#define lock_zone(zone) \
241MACRO_BEGIN \
b0d623f7 242 lck_mtx_lock_spin(&(zone)->lock); \
1c79356b
A
243MACRO_END
244
245#define unlock_zone(zone) \
246MACRO_BEGIN \
2d21ac55 247 lck_mtx_unlock(&(zone)->lock); \
1c79356b
A
248MACRO_END
249
9bccf70c
A
250#define zone_wakeup(zone) thread_wakeup((event_t)(zone))
251#define zone_sleep(zone) \
b0d623f7 252 (void) lck_mtx_sleep(&(zone)->lock, LCK_SLEEP_SPIN, (event_t)(zone), THREAD_UNINT);
2d21ac55 253
9bccf70c 254
1c79356b
A
255#define lock_zone_init(zone) \
256MACRO_BEGIN \
2d21ac55
A
257 char _name[32]; \
258 (void) snprintf(_name, sizeof (_name), "zone.%s", (zone)->zone_name); \
259 lck_grp_attr_setdefault(&(zone)->lock_grp_attr); \
260 lck_grp_init(&(zone)->lock_grp, _name, &(zone)->lock_grp_attr); \
261 lck_attr_setdefault(&(zone)->lock_attr); \
262 lck_mtx_init_ext(&(zone)->lock, &(zone)->lock_ext, \
263 &(zone)->lock_grp, &(zone)->lock_attr); \
1c79356b
A
264MACRO_END
265
b0d623f7 266#define lock_try_zone(zone) lck_mtx_try_lock_spin(&zone->lock)
1c79356b
A
267
268kern_return_t zget_space(
269 vm_offset_t size,
270 vm_offset_t *result);
271
272decl_simple_lock_data(,zget_space_lock)
273vm_offset_t zalloc_next_space;
274vm_offset_t zalloc_end_of_space;
275vm_size_t zalloc_wasted_space;
276
277/*
278 * Garbage collection map information
279 */
1c79356b
A
280struct zone_page_table_entry * zone_page_table;
281vm_offset_t zone_map_min_address;
282vm_offset_t zone_map_max_address;
91447636 283unsigned int zone_pages;
1c79356b
A
284
285/*
286 * Exclude more than one concurrent garbage collection
287 */
b0d623f7
A
288decl_lck_mtx_data(, zone_gc_lock)
289
290lck_attr_t zone_lck_attr;
291lck_grp_t zone_lck_grp;
292lck_grp_attr_t zone_lck_grp_attr;
293lck_mtx_ext_t zone_lck_ext;
294
1c79356b 295
2d21ac55 296#if !ZONE_ALIAS_ADDR
55e303ae 297#define from_zone_map(addr, size) \
1c79356b 298 ((vm_offset_t)(addr) >= zone_map_min_address && \
55e303ae 299 ((vm_offset_t)(addr) + size -1) < zone_map_max_address)
2d21ac55
A
300#else
301#define from_zone_map(addr, size) \
302 ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) >= zone_map_min_address && \
303 ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) + size -1) < zone_map_max_address)
304#endif
1c79356b
A
305
306#define ZONE_PAGE_USED 0
307#define ZONE_PAGE_UNUSED -1
308
309
310/*
311 * Protects first_zone, last_zone, num_zones,
312 * and the next_zone field of zones.
313 */
314decl_simple_lock_data(, all_zones_lock)
315zone_t first_zone;
316zone_t *last_zone;
91447636 317unsigned int num_zones;
1c79356b 318
0b4e3aa0
A
319boolean_t zone_gc_allowed = TRUE;
320boolean_t zone_gc_forced = FALSE;
c910b4d9 321boolean_t panic_include_zprint = FALSE;
0b4e3aa0
A
322unsigned zone_gc_last_tick = 0;
323unsigned zone_gc_max_rate = 0; /* in ticks */
324
c910b4d9
A
325/*
326 * Zone leak debugging code
327 *
328 * When enabled, this code keeps a log to track allocations to a particular zone that have not
329 * yet been freed. Examining this log will reveal the source of a zone leak. The log is allocated
330 * only when logging is enabled, so there is no effect on the system when it's turned off. Logging is
331 * off by default.
332 *
333 * Enable the logging via the boot-args. Add the parameter "zlog=<zone>" to boot-args where <zone>
334 * is the name of the zone you wish to log.
335 *
336 * This code only tracks one zone, so you need to identify which one is leaking first.
337 * Generally, you'll know you have a leak when you get a "zalloc retry failed 3" panic from the zone
338 * garbage collector. Note that the zone name printed in the panic message is not necessarily the one
339 * containing the leak. So do a zprint from gdb and locate the zone with the bloated size. This
340 * is most likely the problem zone, so set zlog in boot-args to this zone name, reboot and re-run the test. The
341 * next time it panics with this message, examine the log using the kgmacros zstack, findoldest and countpcs.
342 * See the help in the kgmacros for usage info.
343 *
344 *
345 * Zone corruption logging
346 *
347 * Logging can also be used to help identify the source of a zone corruption. First, identify the zone
348 * that is being corrupted, then add "-zc zlog=<zone name>" to the boot-args. When -zc is used in conjunction
349 * with zlog, it changes the logging style to track both allocations and frees to the zone. So when the
350 * corruption is detected, examining the log will show you the stack traces of the callers who last allocated
351 * and freed any particular element in the zone. Use the findelem kgmacro with the address of the element that's been
352 * corrupted to examine its history. This should lead to the source of the corruption.
353 */
354
355static int log_records; /* size of the log, expressed in number of records */
356
357#define MAX_ZONE_NAME 32 /* max length of a zone name we can take from the boot-args */
358
359static char zone_name_to_log[MAX_ZONE_NAME] = ""; /* the zone name we're logging, if any */
360
361/*
362 * The number of records in the log is configurable via the zrecs parameter in boot-args. Set this to
363 * the number of records you want in the log. For example, "zrecs=1000" sets it to 1000 records. Note
364 * that the larger the size of the log, the slower the system will run due to linear searching in the log,
365 * but one doesn't generally care about performance when tracking down a leak. The log is capped at 8000
366 * records since going much larger than this tends to make the system unresponsive and unbootable on small
367 * memory configurations. The default value is 4000 records.
368 *
369 * MAX_DEPTH configures how deep of a stack trace is taken on each zalloc in the zone of interrest. 15
370 * levels is usually enough to get past all the layers of code in kalloc and IOKit and see who the actual
371 * caller is up above these lower levels.
372 */
373
374#define ZRECORDS_MAX 8000 /* Max records allowed in the log */
375#define ZRECORDS_DEFAULT 4000 /* default records in log if zrecs is not specificed in boot-args */
376#define MAX_DEPTH 15 /* number of levels of the stack trace to record */
0b4e3aa0 377
c910b4d9
A
378/*
379 * Each record in the log contains a pointer to the zone element it refers to, a "time" number that allows
380 * the records to be ordered chronologically, and a small array to hold the pc's from the stack trace. A
381 * record is added to the log each time a zalloc() is done in the zone_of_interest. For leak debugging,
382 * the record is cleared when a zfree() is done. For corruption debugging, the log tracks both allocs and frees.
383 * If the log fills, old records are replaced as if it were a circular buffer.
384 */
385
386struct zrecord {
387 void *z_element; /* the element that was zalloc'ed of zfree'ed */
388 uint32_t z_opcode:1, /* whether it was a zalloc or zfree */
389 z_time:31; /* time index when operation was done */
390 void *z_pc[MAX_DEPTH]; /* stack trace of caller */
391};
392
393/*
394 * Opcodes for the z_opcode field:
395 */
396
397#define ZOP_ALLOC 1
398#define ZOP_FREE 0
399
400/*
401 * The allocation log and all the related variables are protected by the zone lock for the zone_of_interest
402 */
403
404static struct zrecord *zrecords; /* the log itself, dynamically allocated when logging is enabled */
405static int zcurrent = 0; /* index of the next slot in the log to use */
406static int zrecorded = 0; /* number of allocations recorded in the log */
407static unsigned int ztime = 0; /* a timestamp of sorts */
408static zone_t zone_of_interest = NULL; /* the zone being watched; corresponds to zone_name_to_log */
409
410/*
411 * Decide if we want to log this zone by doing a string compare between a zone name and the name
412 * of the zone to log. Return true if the strings are equal, false otherwise. Because it's not
413 * possible to include spaces in strings passed in via the boot-args, a period in the logname will
414 * match a space in the zone name.
415 */
416
417static int
418log_this_zone(const char *zonename, const char *logname)
419{
420 int len;
421 const char *zc = zonename;
422 const char *lc = logname;
423
424 /*
425 * Compare the strings. We bound the compare by MAX_ZONE_NAME.
426 */
427
428 for (len = 1; len <= MAX_ZONE_NAME; zc++, lc++, len++) {
429
430 /*
431 * If the current characters don't match, check for a space in
432 * in the zone name and a corresponding period in the log name.
433 * If that's not there, then the strings don't match.
434 */
435
436 if (*zc != *lc && !(*zc == ' ' && *lc == '.'))
437 break;
438
439 /*
440 * The strings are equal so far. If we're at the end, then it's a match.
441 */
442
443 if (*zc == '\0')
444 return TRUE;
445 }
446
447 return FALSE;
448}
449
450
451/*
452 * Test if we want to log this zalloc/zfree event. We log if this is the zone we're interested in and
453 * the buffer for the records has been allocated.
454 */
455
456#define DO_LOGGING(z) (zrecords && (z) == zone_of_interest)
457
458extern boolean_t zlog_ready;
459
460
1c79356b
A
461/*
462 * zinit initializes a new zone. The zone data structures themselves
463 * are stored in a zone, which is initially a static structure that
464 * is initialized by zone_init.
465 */
466zone_t
467zinit(
468 vm_size_t size, /* the size of an element */
469 vm_size_t max, /* maximum memory to use */
470 vm_size_t alloc, /* allocation size */
91447636 471 const char *name) /* a name for the zone */
1c79356b
A
472{
473 zone_t z;
474
475 if (zone_zone == ZONE_NULL) {
476 if (zget_space(sizeof(struct zone), (vm_offset_t *)&z)
477 != KERN_SUCCESS)
478 return(ZONE_NULL);
479 } else
480 z = (zone_t) zalloc(zone_zone);
481 if (z == ZONE_NULL)
482 return(ZONE_NULL);
483
484 /*
485 * Round off all the parameters appropriately.
486 */
487 if (size < sizeof(z->free_elements))
488 size = sizeof(z->free_elements);
489 size = ((size-1) + sizeof(z->free_elements)) -
490 ((size-1) % sizeof(z->free_elements));
491 if (alloc == 0)
492 alloc = PAGE_SIZE;
91447636
A
493 alloc = round_page(alloc);
494 max = round_page(max);
1c79356b 495 /*
91447636
A
496 * we look for an allocation size with less than 1% waste
497 * up to 5 pages in size...
498 * otherwise, we look for an allocation size with least fragmentation
499 * in the range of 1 - 5 pages
500 * This size will be used unless
1c79356b
A
501 * the user suggestion is larger AND has less fragmentation
502 */
2d21ac55
A
503#if ZONE_ALIAS_ADDR
504 if ((size < PAGE_SIZE) && (PAGE_SIZE % size <= PAGE_SIZE / 10))
505 alloc = PAGE_SIZE;
506 else
507#endif
1c79356b
A
508 { vm_size_t best, waste; unsigned int i;
509 best = PAGE_SIZE;
510 waste = best % size;
91447636
A
511
512 for (i = 1; i <= 5; i++) {
513 vm_size_t tsize, twaste;
514
515 tsize = i * PAGE_SIZE;
516
517 if ((tsize % size) < (tsize / 100)) {
518 alloc = tsize;
519 goto use_this_allocation;
520 }
1c79356b
A
521 twaste = tsize % size;
522 if (twaste < waste)
523 best = tsize, waste = twaste;
524 }
525 if (alloc <= best || (alloc % size >= waste))
526 alloc = best;
527 }
91447636 528use_this_allocation:
1c79356b
A
529 if (max && (max < alloc))
530 max = alloc;
531
532 z->free_elements = 0;
533 z->cur_size = 0;
534 z->max_size = max;
535 z->elem_size = size;
536 z->alloc_size = alloc;
537 z->zone_name = name;
538 z->count = 0;
539 z->doing_alloc = FALSE;
a3d08fcd 540 z->doing_gc = FALSE;
1c79356b
A
541 z->exhaustible = FALSE;
542 z->collectable = TRUE;
543 z->allows_foreign = FALSE;
544 z->expandable = TRUE;
545 z->waiting = FALSE;
0b4e3aa0 546 z->async_pending = FALSE;
1c79356b
A
547
548#if ZONE_DEBUG
2d21ac55 549 z->active_zones.next = z->active_zones.prev = NULL;
1c79356b
A
550 zone_debug_enable(z);
551#endif /* ZONE_DEBUG */
552 lock_zone_init(z);
553
554 /*
555 * Add the zone to the all-zones list.
556 */
557
558 z->next_zone = ZONE_NULL;
0b4e3aa0 559 thread_call_setup(&z->call_async_alloc, zalloc_async, z);
1c79356b
A
560 simple_lock(&all_zones_lock);
561 *last_zone = z;
562 last_zone = &z->next_zone;
563 num_zones++;
564 simple_unlock(&all_zones_lock);
565
c910b4d9
A
566 /*
567 * Check if we should be logging this zone. If so, remember the zone pointer.
568 */
569
570 if (log_this_zone(z->zone_name, zone_name_to_log)) {
571 zone_of_interest = z;
572 }
573
574 /*
575 * If we want to log a zone, see if we need to allocate buffer space for the log. Some vm related zones are
576 * zinit'ed before we can do a kmem_alloc, so we have to defer allocation in that case. zlog_ready is set to
577 * TRUE once enough of the VM system is up and running to allow a kmem_alloc to work. If we want to log one
578 * of the VM related zones that's set up early on, we will skip allocation of the log until zinit is called again
579 * later on some other zone. So note we may be allocating a buffer to log a zone other than the one being initialized
580 * right now.
581 */
582
583 if (zone_of_interest != NULL && zrecords == NULL && zlog_ready) {
584 if (kmem_alloc(kernel_map, (vm_offset_t *)&zrecords, log_records * sizeof(struct zrecord)) == KERN_SUCCESS) {
585
586 /*
587 * We got the memory for the log. Zero it out since the code needs this to identify unused records.
588 * At this point, everything is set up and we're ready to start logging this zone.
589 */
590
591 bzero((void *)zrecords, log_records * sizeof(struct zrecord));
592 printf("zone: logging started for zone %s (%p)\n", zone_of_interest->zone_name, zone_of_interest);
593
594 } else {
595 printf("zone: couldn't allocate memory for zrecords, turning off zleak logging\n");
596 zone_of_interest = NULL;
597 }
598 }
599
1c79356b
A
600 return(z);
601}
602
603/*
604 * Cram the given memory into the specified zone.
605 */
606void
607zcram(
608 register zone_t zone,
91447636 609 void *newaddr,
1c79356b
A
610 vm_size_t size)
611{
612 register vm_size_t elem_size;
91447636 613 vm_offset_t newmem = (vm_offset_t) newaddr;
1c79356b
A
614
615 /* Basic sanity checks */
616 assert(zone != ZONE_NULL && newmem != (vm_offset_t)0);
617 assert(!zone->collectable || zone->allows_foreign
55e303ae 618 || (from_zone_map(newmem, size)));
1c79356b
A
619
620 elem_size = zone->elem_size;
621
622 lock_zone(zone);
623 while (size >= elem_size) {
624 ADD_TO_ZONE(zone, newmem);
55e303ae 625 if (from_zone_map(newmem, elem_size))
1c79356b
A
626 zone_page_alloc(newmem, elem_size);
627 zone->count++; /* compensate for ADD_TO_ZONE */
628 size -= elem_size;
629 newmem += elem_size;
630 zone->cur_size += elem_size;
631 }
632 unlock_zone(zone);
633}
634
635/*
636 * Contiguous space allocator for non-paged zones. Allocates "size" amount
637 * of memory from zone_map.
638 */
639
640kern_return_t
641zget_space(
642 vm_offset_t size,
643 vm_offset_t *result)
644{
645 vm_offset_t new_space = 0;
91447636 646 vm_size_t space_to_add = 0;
1c79356b
A
647
648 simple_lock(&zget_space_lock);
649 while ((zalloc_next_space + size) > zalloc_end_of_space) {
650 /*
651 * Add at least one page to allocation area.
652 */
653
91447636 654 space_to_add = round_page(size);
1c79356b
A
655
656 if (new_space == 0) {
657 kern_return_t retval;
658 /*
659 * Memory cannot be wired down while holding
660 * any locks that the pageout daemon might
661 * need to free up pages. [Making the zget_space
662 * lock a complex lock does not help in this
663 * regard.]
664 *
665 * Unlock and allocate memory. Because several
666 * threads might try to do this at once, don't
667 * use the memory before checking for available
668 * space again.
669 */
670
671 simple_unlock(&zget_space_lock);
672
673 retval = kernel_memory_allocate(zone_map, &new_space,
674 space_to_add, 0, KMA_KOBJECT|KMA_NOPAGEWAIT);
675 if (retval != KERN_SUCCESS)
676 return(retval);
2d21ac55
A
677#if ZONE_ALIAS_ADDR
678 if (space_to_add == PAGE_SIZE)
679 new_space = zone_alias_addr(new_space);
680#endif
1c79356b
A
681 zone_page_init(new_space, space_to_add,
682 ZONE_PAGE_USED);
683 simple_lock(&zget_space_lock);
684 continue;
685 }
686
687
688 /*
689 * Memory was allocated in a previous iteration.
690 *
691 * Check whether the new region is contiguous
692 * with the old one.
693 */
694
695 if (new_space != zalloc_end_of_space) {
696 /*
697 * Throw away the remainder of the
698 * old space, and start a new one.
699 */
700 zalloc_wasted_space +=
701 zalloc_end_of_space - zalloc_next_space;
702 zalloc_next_space = new_space;
703 }
704
705 zalloc_end_of_space = new_space + space_to_add;
706
707 new_space = 0;
708 }
709 *result = zalloc_next_space;
710 zalloc_next_space += size;
711 simple_unlock(&zget_space_lock);
712
713 if (new_space != 0)
714 kmem_free(zone_map, new_space, space_to_add);
715
716 return(KERN_SUCCESS);
717}
718
719
720/*
721 * Steal memory for the zone package. Called from
722 * vm_page_bootstrap().
723 */
724void
725zone_steal_memory(void)
726{
91447636
A
727 zdata_size = round_page(128*sizeof(struct zone));
728 zdata = (vm_offset_t)((char *)pmap_steal_memory(zdata_size) - (char *)0);
1c79356b
A
729}
730
731
732/*
733 * Fill a zone with enough memory to contain at least nelem elements.
b0d623f7 734 * Memory is obtained with kmem_alloc_kobject from the kernel_map.
1c79356b
A
735 * Return the number of elements actually put into the zone, which may
736 * be more than the caller asked for since the memory allocation is
737 * rounded up to a full page.
738 */
739int
740zfill(
741 zone_t zone,
742 int nelem)
743{
744 kern_return_t kr;
745 vm_size_t size;
746 vm_offset_t memory;
747 int nalloc;
748
749 assert(nelem > 0);
750 if (nelem <= 0)
751 return 0;
752 size = nelem * zone->elem_size;
91447636 753 size = round_page(size);
b0d623f7 754 kr = kmem_alloc_kobject(kernel_map, &memory, size);
1c79356b
A
755 if (kr != KERN_SUCCESS)
756 return 0;
757
758 zone_change(zone, Z_FOREIGN, TRUE);
91447636 759 zcram(zone, (void *)memory, size);
b0d623f7 760 nalloc = (int)(size / zone->elem_size);
1c79356b
A
761 assert(nalloc >= nelem);
762
763 return nalloc;
764}
765
766/*
767 * Initialize the "zone of zones" which uses fixed memory allocated
768 * earlier in memory initialization. zone_bootstrap is called
769 * before zone_init.
770 */
771void
772zone_bootstrap(void)
773{
774 vm_size_t zone_zone_size;
775 vm_offset_t zone_zone_space;
2d21ac55
A
776 char temp_buf[16];
777
c910b4d9 778 /* see if we want freed zone element checking and/or poisoning */
593a1d5f 779 if (PE_parse_boot_argn("-zc", temp_buf, sizeof (temp_buf))) {
c910b4d9
A
780 check_freed_element = TRUE;
781 }
782
783 if (PE_parse_boot_argn("-zp", temp_buf, sizeof (temp_buf))) {
784 zfree_clear = TRUE;
785 }
786
787 /*
788 * Check for and set up zone leak detection if requested via boot-args. We recognized two
789 * boot-args:
790 *
791 * zlog=<zone_to_log>
792 * zrecs=<num_records_in_log>
793 *
794 * The zlog arg is used to specify the zone name that should be logged, and zrecs is used to
795 * control the size of the log. If zrecs is not specified, a default value is used.
796 */
797
798 if (PE_parse_boot_argn("zlog", zone_name_to_log, sizeof(zone_name_to_log)) == TRUE) {
799 if (PE_parse_boot_argn("zrecs", &log_records, sizeof(log_records)) == TRUE) {
800
801 /*
802 * Don't allow more than ZRECORDS_MAX records even if the user asked for more.
803 * This prevents accidentally hogging too much kernel memory and making the system
804 * unusable.
805 */
806
807 log_records = MIN(ZRECORDS_MAX, log_records);
808
809 } else {
810 log_records = ZRECORDS_DEFAULT;
811 }
2d21ac55 812 }
1c79356b 813
91447636 814 simple_lock_init(&all_zones_lock, 0);
1c79356b
A
815
816 first_zone = ZONE_NULL;
817 last_zone = &first_zone;
818 num_zones = 0;
819
91447636 820 simple_lock_init(&zget_space_lock, 0);
1c79356b
A
821 zalloc_next_space = zdata;
822 zalloc_end_of_space = zdata + zdata_size;
823 zalloc_wasted_space = 0;
824
825 /* assertion: nobody else called zinit before us */
826 assert(zone_zone == ZONE_NULL);
827 zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone),
828 sizeof(struct zone), "zones");
829 zone_change(zone_zone, Z_COLLECT, FALSE);
830 zone_zone_size = zalloc_end_of_space - zalloc_next_space;
831 zget_space(zone_zone_size, &zone_zone_space);
91447636 832 zcram(zone_zone, (void *)zone_zone_space, zone_zone_size);
1c79356b
A
833}
834
835void
836zone_init(
837 vm_size_t max_zonemap_size)
838{
839 kern_return_t retval;
840 vm_offset_t zone_min;
841 vm_offset_t zone_max;
842 vm_size_t zone_table_size;
843
844 retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
b0d623f7
A
845 FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT,
846 &zone_map);
91447636 847
1c79356b
A
848 if (retval != KERN_SUCCESS)
849 panic("zone_init: kmem_suballoc failed");
91447636 850 zone_max = zone_min + round_page(max_zonemap_size);
1c79356b
A
851 /*
852 * Setup garbage collection information:
853 */
b0d623f7 854 zone_table_size = atop_kernel(zone_max - zone_min) *
1c79356b 855 sizeof(struct zone_page_table_entry);
b0d623f7 856 if (kmem_alloc_kobject(zone_map, (vm_offset_t *) &zone_page_table,
1c79356b
A
857 zone_table_size) != KERN_SUCCESS)
858 panic("zone_init");
91447636 859 zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size);
b0d623f7 860 zone_pages = (unsigned int)atop_kernel(zone_max - zone_min);
1c79356b
A
861 zone_map_min_address = zone_min;
862 zone_map_max_address = zone_max;
b0d623f7
A
863
864 lck_grp_attr_setdefault(&zone_lck_grp_attr);
865 lck_grp_init(&zone_lck_grp, "zones", &zone_lck_grp_attr);
866 lck_attr_setdefault(&zone_lck_attr);
867 lck_mtx_init_ext(&zone_gc_lock, &zone_lck_ext, &zone_lck_grp, &zone_lck_attr);
868
1c79356b
A
869 zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED);
870}
871
b0d623f7 872extern volatile SInt32 kfree_nop_count;
1c79356b
A
873
874/*
875 * zalloc returns an element from the specified zone.
876 */
91447636 877void *
1c79356b
A
878zalloc_canblock(
879 register zone_t zone,
880 boolean_t canblock)
881{
882 vm_offset_t addr;
883 kern_return_t retval;
c910b4d9
A
884 void *bt[MAX_DEPTH]; /* only used if zone logging is enabled */
885 int numsaved = 0;
886 int i;
1c79356b
A
887
888 assert(zone != ZONE_NULL);
1c79356b 889
c910b4d9
A
890 /*
891 * If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
892 */
893
894 if (DO_LOGGING(zone))
895 numsaved = OSBacktrace(&bt[0], MAX_DEPTH);
896
1c79356b
A
897 lock_zone(zone);
898
899 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
0b4e3aa0 900
a3d08fcd
A
901 while ((addr == 0) && canblock && (zone->doing_gc)) {
902 zone->waiting = TRUE;
903 zone_sleep(zone);
904 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
905 }
906
0b4e3aa0 907 while ((addr == 0) && canblock) {
1c79356b
A
908 /*
909 * If nothing was there, try to get more
910 */
911 if (zone->doing_alloc) {
1c79356b
A
912 /*
913 * Someone is allocating memory for this zone.
914 * Wait for it to show up, then try again.
915 */
1c79356b 916 zone->waiting = TRUE;
9bccf70c 917 zone_sleep(zone);
1c79356b
A
918 }
919 else {
920 if ((zone->cur_size + zone->elem_size) >
921 zone->max_size) {
922 if (zone->exhaustible)
923 break;
924 if (zone->expandable) {
925 /*
926 * We're willing to overflow certain
927 * zones, but not without complaining.
928 *
929 * This is best used in conjunction
930 * with the collectable flag. What we
931 * want is an assurance we can get the
932 * memory back, assuming there's no
933 * leak.
934 */
935 zone->max_size += (zone->max_size >> 1);
936 } else {
937 unlock_zone(zone);
938
1c79356b
A
939 panic("zalloc: zone \"%s\" empty.", zone->zone_name);
940 }
941 }
942 zone->doing_alloc = TRUE;
943 unlock_zone(zone);
944
945 if (zone->collectable) {
946 vm_offset_t space;
b0d623f7 947 vm_size_t alloc_size;
2d21ac55 948 int retry = 0;
55e303ae
A
949
950 for (;;) {
951
2d21ac55 952 if (vm_pool_low() || retry >= 1)
55e303ae 953 alloc_size =
91447636 954 round_page(zone->elem_size);
55e303ae
A
955 else
956 alloc_size = zone->alloc_size;
957
958 retval = kernel_memory_allocate(zone_map,
959 &space, alloc_size, 0,
960 KMA_KOBJECT|KMA_NOPAGEWAIT);
961 if (retval == KERN_SUCCESS) {
2d21ac55
A
962#if ZONE_ALIAS_ADDR
963 if (alloc_size == PAGE_SIZE)
964 space = zone_alias_addr(space);
965#endif
55e303ae
A
966 zone_page_init(space, alloc_size,
967 ZONE_PAGE_USED);
91447636 968 zcram(zone, (void *)space, alloc_size);
55e303ae
A
969
970 break;
971 } else if (retval != KERN_RESOURCE_SHORTAGE) {
2d21ac55
A
972 retry++;
973
974 if (retry == 2) {
975 zone_gc();
976 printf("zalloc did gc\n");
b0d623f7 977 zone_display_zprint();
2d21ac55 978 }
c910b4d9 979 if (retry == 3) {
b0d623f7
A
980 panic_include_zprint = TRUE;
981 panic("zalloc: \"%s\" (%d elements) retry fail %d, kfree_nop_count: %d", zone->zone_name, zone->count, retval, (int)kfree_nop_count);
c910b4d9 982 }
e5568f75
A
983 } else {
984 break;
55e303ae 985 }
1c79356b
A
986 }
987 lock_zone(zone);
988 zone->doing_alloc = FALSE;
989 if (zone->waiting) {
990 zone->waiting = FALSE;
9bccf70c 991 zone_wakeup(zone);
1c79356b
A
992 }
993 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
994 if (addr == 0 &&
995 retval == KERN_RESOURCE_SHORTAGE) {
996 unlock_zone(zone);
997
1c79356b
A
998 VM_PAGE_WAIT();
999 lock_zone(zone);
1000 }
1001 } else {
1002 vm_offset_t space;
1003 retval = zget_space(zone->elem_size, &space);
1004
1005 lock_zone(zone);
1006 zone->doing_alloc = FALSE;
1007 if (zone->waiting) {
1008 zone->waiting = FALSE;
1009 thread_wakeup((event_t)zone);
1010 }
1011 if (retval == KERN_SUCCESS) {
1012 zone->count++;
1013 zone->cur_size += zone->elem_size;
1014#if ZONE_DEBUG
1015 if (zone_debug_enabled(zone)) {
1016 enqueue_tail(&zone->active_zones, (queue_entry_t)space);
1017 }
1018#endif
1019 unlock_zone(zone);
1020 zone_page_alloc(space, zone->elem_size);
1021#if ZONE_DEBUG
1022 if (zone_debug_enabled(zone))
55e303ae 1023 space += ZONE_DEBUG_OFFSET;
1c79356b 1024#endif
2d21ac55
A
1025 addr = space;
1026 goto success;
1c79356b
A
1027 }
1028 if (retval == KERN_RESOURCE_SHORTAGE) {
1029 unlock_zone(zone);
1030
1c79356b
A
1031 VM_PAGE_WAIT();
1032 lock_zone(zone);
1033 } else {
91447636 1034 panic("zalloc: \"%s\" (%d elements) zget_space returned %d", zone->zone_name, zone->count, retval);
1c79356b
A
1035 }
1036 }
1037 }
1038 if (addr == 0)
1039 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
1040 }
1041
c910b4d9
A
1042 /*
1043 * See if we should be logging allocations in this zone. Logging is rarely done except when a leak is
1044 * suspected, so this code rarely executes. We need to do this code while still holding the zone lock
1045 * since it protects the various log related data structures.
1046 */
1047
1048 if (DO_LOGGING(zone) && addr) {
1049
1050 /*
1051 * Look for a place to record this new allocation. We implement two different logging strategies
1052 * depending on whether we're looking for the source of a zone leak or a zone corruption. When looking
1053 * for a leak, we want to log as many allocations as possible in order to clearly identify the leaker
1054 * among all the records. So we look for an unused slot in the log and fill that in before overwriting
1055 * an old entry. When looking for a corrution however, it's better to have a chronological log of all
1056 * the allocations and frees done in the zone so that the history of operations for a specific zone
1057 * element can be inspected. So in this case, we treat the log as a circular buffer and overwrite the
1058 * oldest entry whenever a new one needs to be added.
1059 *
1060 * The check_freed_element flag tells us what style of logging to do. It's set if we're supposed to be
1061 * doing corruption style logging (indicated via -zc in the boot-args).
1062 */
1063
1064 if (!check_freed_element && zrecords[zcurrent].z_element && zrecorded < log_records) {
1065
1066 /*
1067 * If we get here, we're doing leak style logging and there's still some unused entries in
1068 * the log (since zrecorded is smaller than the size of the log). Look for an unused slot
1069 * starting at zcurrent and wrap-around if we reach the end of the buffer. If the buffer
1070 * is already full, we just fall through and overwrite the element indexed by zcurrent.
1071 */
1072
1073 for (i = zcurrent; i < log_records; i++) {
1074 if (zrecords[i].z_element == NULL) {
1075 zcurrent = i;
1076 goto empty_slot;
1077 }
1078 }
1079
1080 for (i = 0; i < zcurrent; i++) {
1081 if (zrecords[i].z_element == NULL) {
1082 zcurrent = i;
1083 goto empty_slot;
1084 }
1085 }
1086 }
1087
1088 /*
1089 * Save a record of this allocation
1090 */
1091
1092empty_slot:
1093 if (zrecords[zcurrent].z_element == NULL)
1094 zrecorded++;
1095
1096 zrecords[zcurrent].z_element = (void *)addr;
1097 zrecords[zcurrent].z_time = ztime++;
1098 zrecords[zcurrent].z_opcode = ZOP_ALLOC;
1099
1100 for (i = 0; i < numsaved; i++)
1101 zrecords[zcurrent].z_pc[i] = bt[i];
1102
1103 for (; i < MAX_DEPTH; i++)
1104 zrecords[zcurrent].z_pc[i] = 0;
1105
1106 zcurrent++;
1107
1108 if (zcurrent >= log_records)
1109 zcurrent = 0;
1110 }
1111
2d21ac55 1112 if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) {
0b4e3aa0
A
1113 zone->async_pending = TRUE;
1114 unlock_zone(zone);
1115 thread_call_enter(&zone->call_async_alloc);
1116 lock_zone(zone);
1117 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
1118 }
1119
1c79356b
A
1120#if ZONE_DEBUG
1121 if (addr && zone_debug_enabled(zone)) {
1122 enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
55e303ae 1123 addr += ZONE_DEBUG_OFFSET;
1c79356b
A
1124 }
1125#endif
1126
1127 unlock_zone(zone);
0b4e3aa0 1128
2d21ac55
A
1129success:
1130 TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr);
1131
91447636 1132 return((void *)addr);
1c79356b
A
1133}
1134
1135
91447636 1136void *
1c79356b
A
1137zalloc(
1138 register zone_t zone)
1139{
1140 return( zalloc_canblock(zone, TRUE) );
1141}
1142
91447636 1143void *
1c79356b
A
1144zalloc_noblock(
1145 register zone_t zone)
1146{
1147 return( zalloc_canblock(zone, FALSE) );
1148}
1149
0b4e3aa0
A
1150void
1151zalloc_async(
91447636
A
1152 thread_call_param_t p0,
1153 __unused thread_call_param_t p1)
0b4e3aa0 1154{
91447636 1155 void *elt;
0b4e3aa0
A
1156
1157 elt = zalloc_canblock((zone_t)p0, TRUE);
1158 zfree((zone_t)p0, elt);
1159 lock_zone(((zone_t)p0));
1160 ((zone_t)p0)->async_pending = FALSE;
1161 unlock_zone(((zone_t)p0));
1162}
1163
1c79356b
A
1164
1165/*
1166 * zget returns an element from the specified zone
1167 * and immediately returns nothing if there is nothing there.
1168 *
1169 * This form should be used when you can not block (like when
1170 * processing an interrupt).
1171 */
91447636 1172void *
1c79356b
A
1173zget(
1174 register zone_t zone)
1175{
1176 register vm_offset_t addr;
1177
1178 assert( zone != ZONE_NULL );
1179
1180 if (!lock_try_zone(zone))
91447636 1181 return NULL;
1c79356b
A
1182
1183 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
1184#if ZONE_DEBUG
1185 if (addr && zone_debug_enabled(zone)) {
1186 enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
55e303ae 1187 addr += ZONE_DEBUG_OFFSET;
1c79356b
A
1188 }
1189#endif /* ZONE_DEBUG */
1190 unlock_zone(zone);
1191
91447636 1192 return((void *) addr);
1c79356b
A
1193}
1194
1195/* Keep this FALSE by default. Large memory machine run orders of magnitude
1196 slower in debug mode when true. Use debugger to enable if needed */
55e303ae
A
1197/* static */ boolean_t zone_check = FALSE;
1198
1199static zone_t zone_last_bogus_zone = ZONE_NULL;
1200static vm_offset_t zone_last_bogus_elem = 0;
1c79356b
A
1201
1202void
1203zfree(
1204 register zone_t zone,
91447636 1205 void *addr)
1c79356b 1206{
91447636 1207 vm_offset_t elem = (vm_offset_t) addr;
c910b4d9
A
1208 void *bt[MAX_DEPTH]; /* only used if zone logging is enable via boot-args */
1209 int numsaved = 0;
1210
1211 assert(zone != ZONE_NULL);
1212
1213 /*
1214 * If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
1215 */
1216
1217 if (DO_LOGGING(zone))
1218 numsaved = OSBacktrace(&bt[0], MAX_DEPTH);
1c79356b
A
1219
1220#if MACH_ASSERT
1221 /* Basic sanity checks */
1222 if (zone == ZONE_NULL || elem == (vm_offset_t)0)
1223 panic("zfree: NULL");
1224 /* zone_gc assumes zones are never freed */
1225 if (zone == zone_zone)
1226 panic("zfree: freeing to zone_zone breaks zone_gc!");
55e303ae
A
1227#endif
1228
b0d623f7 1229 TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, zone->elem_size, (uintptr_t)addr);
2d21ac55 1230
1c79356b 1231 if (zone->collectable && !zone->allows_foreign &&
55e303ae
A
1232 !from_zone_map(elem, zone->elem_size)) {
1233#if MACH_ASSERT
1c79356b 1234 panic("zfree: non-allocated memory in collectable zone!");
91447636 1235#endif
55e303ae
A
1236 zone_last_bogus_zone = zone;
1237 zone_last_bogus_elem = elem;
1238 return;
55e303ae 1239 }
1c79356b
A
1240
1241 lock_zone(zone);
c910b4d9
A
1242
1243 /*
1244 * See if we're doing logging on this zone. There are two styles of logging used depending on
1245 * whether we're trying to catch a leak or corruption. See comments above in zalloc for details.
1246 */
1247
1248 if (DO_LOGGING(zone)) {
1249 int i;
1250
1251 if (check_freed_element) {
1252
1253 /*
1254 * We're logging to catch a corruption. Add a record of this zfree operation
1255 * to log.
1256 */
1257
1258 if (zrecords[zcurrent].z_element == NULL)
1259 zrecorded++;
1260
1261 zrecords[zcurrent].z_element = (void *)addr;
1262 zrecords[zcurrent].z_time = ztime++;
1263 zrecords[zcurrent].z_opcode = ZOP_FREE;
1264
1265 for (i = 0; i < numsaved; i++)
1266 zrecords[zcurrent].z_pc[i] = bt[i];
1267
1268 for (; i < MAX_DEPTH; i++)
1269 zrecords[zcurrent].z_pc[i] = 0;
1270
1271 zcurrent++;
1272
1273 if (zcurrent >= log_records)
1274 zcurrent = 0;
1275
1276 } else {
1277
1278 /*
1279 * We're logging to catch a leak. Remove any record we might have for this
1280 * element since it's being freed. Note that we may not find it if the buffer
1281 * overflowed and that's OK. Since the log is of a limited size, old records
1282 * get overwritten if there are more zallocs than zfrees.
1283 */
1284
1285 for (i = 0; i < log_records; i++) {
1286 if (zrecords[i].z_element == addr) {
1287 zrecords[i].z_element = NULL;
1288 zcurrent = i;
1289 zrecorded--;
1290 break;
1291 }
1292 }
1293 }
1294 }
1295
1296
1c79356b
A
1297#if ZONE_DEBUG
1298 if (zone_debug_enabled(zone)) {
1299 queue_t tmp_elem;
1300
55e303ae 1301 elem -= ZONE_DEBUG_OFFSET;
1c79356b
A
1302 if (zone_check) {
1303 /* check the zone's consistency */
1304
1305 for (tmp_elem = queue_first(&zone->active_zones);
1306 !queue_end(tmp_elem, &zone->active_zones);
1307 tmp_elem = queue_next(tmp_elem))
1308 if (elem == (vm_offset_t)tmp_elem)
1309 break;
1310 if (elem != (vm_offset_t)tmp_elem)
1311 panic("zfree()ing element from wrong zone");
1312 }
1313 remqueue(&zone->active_zones, (queue_t) elem);
1314 }
1315#endif /* ZONE_DEBUG */
1316 if (zone_check) {
1317 vm_offset_t this;
1318
1319 /* check the zone's consistency */
1320
1321 for (this = zone->free_elements;
1322 this != 0;
1323 this = * (vm_offset_t *) this)
1324 if (!pmap_kernel_va(this) || this == elem)
1325 panic("zfree");
1326 }
0b4e3aa0 1327 ADD_TO_ZONE(zone, elem);
b0d623f7
A
1328#if MACH_ASSERT
1329 if (zone->count < 0)
1330 panic("zfree: count < 0!");
1331#endif
0b4e3aa0 1332
1c79356b
A
1333 /*
1334 * If elements have one or more pages, and memory is low,
0b4e3aa0
A
1335 * request to run the garbage collection in the zone the next
1336 * time the pageout thread runs.
1c79356b
A
1337 */
1338 if (zone->elem_size >= PAGE_SIZE &&
1339 vm_pool_low()){
0b4e3aa0 1340 zone_gc_forced = TRUE;
1c79356b 1341 }
1c79356b
A
1342 unlock_zone(zone);
1343}
1344
1345
1346/* Change a zone's flags.
1347 * This routine must be called immediately after zinit.
1348 */
1349void
1350zone_change(
1351 zone_t zone,
1352 unsigned int item,
1353 boolean_t value)
1354{
1355 assert( zone != ZONE_NULL );
1356 assert( value == TRUE || value == FALSE );
1357
1358 switch(item){
1359 case Z_EXHAUST:
1360 zone->exhaustible = value;
1361 break;
1362 case Z_COLLECT:
1363 zone->collectable = value;
1364 break;
1365 case Z_EXPAND:
1366 zone->expandable = value;
1367 break;
1368 case Z_FOREIGN:
1369 zone->allows_foreign = value;
1370 break;
1371#if MACH_ASSERT
1372 default:
1373 panic("Zone_change: Wrong Item Type!");
1374 /* break; */
1375#endif
1376 }
1c79356b
A
1377}
1378
1379/*
1380 * Return the expected number of free elements in the zone.
1381 * This calculation will be incorrect if items are zfree'd that
1382 * were never zalloc'd/zget'd. The correct way to stuff memory
1383 * into a zone is by zcram.
1384 */
1385
1386integer_t
1387zone_free_count(zone_t zone)
1388{
1389 integer_t free_count;
1390
1391 lock_zone(zone);
b0d623f7 1392 free_count = (integer_t)(zone->cur_size/zone->elem_size - zone->count);
1c79356b
A
1393 unlock_zone(zone);
1394
1395 assert(free_count >= 0);
1396
1397 return(free_count);
1398}
1399
1400/*
1401 * zprealloc preallocates wired memory, exanding the specified
1402 * zone to the specified size
1403 */
1404void
1405zprealloc(
1406 zone_t zone,
1407 vm_size_t size)
1408{
1409 vm_offset_t addr;
1410
1411 if (size != 0) {
b0d623f7 1412 if (kmem_alloc_kobject(zone_map, &addr, size) != KERN_SUCCESS)
1c79356b
A
1413 panic("zprealloc");
1414 zone_page_init(addr, size, ZONE_PAGE_USED);
91447636 1415 zcram(zone, (void *)addr, size);
1c79356b
A
1416 }
1417}
1418
1419/*
1420 * Zone garbage collection subroutines
1c79356b 1421 */
55e303ae 1422
1c79356b
A
1423boolean_t
1424zone_page_collectable(
1425 vm_offset_t addr,
1426 vm_size_t size)
1427{
55e303ae 1428 struct zone_page_table_entry *zp;
1c79356b
A
1429 natural_t i, j;
1430
2d21ac55
A
1431#if ZONE_ALIAS_ADDR
1432 addr = zone_virtual_addr(addr);
1433#endif
1c79356b 1434#if MACH_ASSERT
55e303ae 1435 if (!from_zone_map(addr, size))
1c79356b
A
1436 panic("zone_page_collectable");
1437#endif
1438
b0d623f7
A
1439 i = (natural_t)atop_kernel(addr-zone_map_min_address);
1440 j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
55e303ae
A
1441
1442 for (zp = zone_page_table + i; i <= j; zp++, i++)
1443 if (zp->collect_count == zp->alloc_count)
1c79356b 1444 return (TRUE);
55e303ae 1445
1c79356b
A
1446 return (FALSE);
1447}
1448
1449void
1450zone_page_keep(
1451 vm_offset_t addr,
1452 vm_size_t size)
1453{
55e303ae 1454 struct zone_page_table_entry *zp;
1c79356b
A
1455 natural_t i, j;
1456
2d21ac55
A
1457#if ZONE_ALIAS_ADDR
1458 addr = zone_virtual_addr(addr);
1459#endif
1c79356b 1460#if MACH_ASSERT
55e303ae 1461 if (!from_zone_map(addr, size))
1c79356b
A
1462 panic("zone_page_keep");
1463#endif
1464
b0d623f7
A
1465 i = (natural_t)atop_kernel(addr-zone_map_min_address);
1466 j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
1c79356b 1467
55e303ae
A
1468 for (zp = zone_page_table + i; i <= j; zp++, i++)
1469 zp->collect_count = 0;
1c79356b
A
1470}
1471
1472void
55e303ae 1473zone_page_collect(
1c79356b
A
1474 vm_offset_t addr,
1475 vm_size_t size)
1476{
55e303ae 1477 struct zone_page_table_entry *zp;
1c79356b
A
1478 natural_t i, j;
1479
2d21ac55
A
1480#if ZONE_ALIAS_ADDR
1481 addr = zone_virtual_addr(addr);
1482#endif
1c79356b 1483#if MACH_ASSERT
55e303ae
A
1484 if (!from_zone_map(addr, size))
1485 panic("zone_page_collect");
1c79356b
A
1486#endif
1487
b0d623f7
A
1488 i = (natural_t)atop_kernel(addr-zone_map_min_address);
1489 j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
55e303ae
A
1490
1491 for (zp = zone_page_table + i; i <= j; zp++, i++)
1492 ++zp->collect_count;
1c79356b
A
1493}
1494
1495void
1496zone_page_init(
1497 vm_offset_t addr,
1498 vm_size_t size,
1499 int value)
1500{
55e303ae 1501 struct zone_page_table_entry *zp;
1c79356b
A
1502 natural_t i, j;
1503
2d21ac55
A
1504#if ZONE_ALIAS_ADDR
1505 addr = zone_virtual_addr(addr);
1506#endif
1c79356b 1507#if MACH_ASSERT
55e303ae 1508 if (!from_zone_map(addr, size))
1c79356b
A
1509 panic("zone_page_init");
1510#endif
1511
b0d623f7
A
1512 i = (natural_t)atop_kernel(addr-zone_map_min_address);
1513 j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
55e303ae
A
1514
1515 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1516 zp->alloc_count = value;
1517 zp->collect_count = 0;
1c79356b 1518 }
1c79356b
A
1519}
1520
1521void
1522zone_page_alloc(
1523 vm_offset_t addr,
1524 vm_size_t size)
1525{
55e303ae 1526 struct zone_page_table_entry *zp;
1c79356b
A
1527 natural_t i, j;
1528
2d21ac55
A
1529#if ZONE_ALIAS_ADDR
1530 addr = zone_virtual_addr(addr);
1531#endif
1c79356b 1532#if MACH_ASSERT
55e303ae 1533 if (!from_zone_map(addr, size))
1c79356b
A
1534 panic("zone_page_alloc");
1535#endif
1536
b0d623f7
A
1537 i = (natural_t)atop_kernel(addr-zone_map_min_address);
1538 j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
55e303ae
A
1539
1540 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1541 /*
1542 * Set alloc_count to (ZONE_PAGE_USED + 1) if
1c79356b
A
1543 * it was previously set to ZONE_PAGE_UNUSED.
1544 */
55e303ae
A
1545 if (zp->alloc_count == ZONE_PAGE_UNUSED)
1546 zp->alloc_count = 1;
1547 else
1548 ++zp->alloc_count;
1c79356b 1549 }
1c79356b
A
1550}
1551
1552void
55e303ae
A
1553zone_page_free_element(
1554 struct zone_page_table_entry **free_pages,
1c79356b
A
1555 vm_offset_t addr,
1556 vm_size_t size)
1557{
55e303ae 1558 struct zone_page_table_entry *zp;
1c79356b
A
1559 natural_t i, j;
1560
2d21ac55
A
1561#if ZONE_ALIAS_ADDR
1562 addr = zone_virtual_addr(addr);
1563#endif
1c79356b 1564#if MACH_ASSERT
55e303ae
A
1565 if (!from_zone_map(addr, size))
1566 panic("zone_page_free_element");
1c79356b
A
1567#endif
1568
b0d623f7
A
1569 i = (natural_t)atop_kernel(addr-zone_map_min_address);
1570 j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
1c79356b 1571
55e303ae
A
1572 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1573 if (zp->collect_count > 0)
1574 --zp->collect_count;
1575 if (--zp->alloc_count == 0) {
1576 zp->alloc_count = ZONE_PAGE_UNUSED;
1577 zp->collect_count = 0;
1c79356b 1578
55e303ae
A
1579 zp->link = *free_pages;
1580 *free_pages = zp;
1c79356b
A
1581 }
1582 }
1c79356b
A
1583}
1584
1585
1586/* This is used for walking through a zone's free element list.
1587 */
55e303ae
A
1588struct zone_free_element {
1589 struct zone_free_element * next;
1c79356b
A
1590};
1591
2d21ac55
A
1592/*
1593 * Add a linked list of pages starting at base back into the zone
1594 * free list. Tail points to the last element on the list.
1595 */
1596
1597#define ADD_LIST_TO_ZONE(zone, base, tail) \
1598MACRO_BEGIN \
1599 (tail)->next = (void *)((zone)->free_elements); \
1600 if (check_freed_element) { \
1601 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
1602 ((vm_offset_t *)(tail))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
1603 (zone)->free_elements; \
1604 } \
1605 (zone)->free_elements = (unsigned long)(base); \
1606MACRO_END
1607
1608/*
1609 * Add an element to the chain pointed to by prev.
1610 */
1611
1612#define ADD_ELEMENT(zone, prev, elem) \
1613MACRO_BEGIN \
1614 (prev)->next = (elem); \
1615 if (check_freed_element) { \
1616 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
1617 ((vm_offset_t *)(prev))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
1618 (vm_offset_t)(elem); \
1619 } \
1620MACRO_END
1621
55e303ae
A
1622struct {
1623 uint32_t pgs_freed;
1624
1625 uint32_t elems_collected,
1626 elems_freed,
1627 elems_kept;
1628} zgc_stats;
1c79356b
A
1629
1630/* Zone garbage collection
1631 *
1632 * zone_gc will walk through all the free elements in all the
1633 * zones that are marked collectable looking for reclaimable
1634 * pages. zone_gc is called by consider_zone_gc when the system
1635 * begins to run out of memory.
1636 */
1637void
1638zone_gc(void)
1639{
1640 unsigned int max_zones;
55e303ae 1641 zone_t z;
1c79356b 1642 unsigned int i;
55e303ae 1643 struct zone_page_table_entry *zp, *zone_free_pages;
1c79356b 1644
b0d623f7 1645 lck_mtx_lock(&zone_gc_lock);
1c79356b 1646
1c79356b
A
1647 simple_lock(&all_zones_lock);
1648 max_zones = num_zones;
1649 z = first_zone;
1650 simple_unlock(&all_zones_lock);
1651
1652#if MACH_ASSERT
1c79356b 1653 for (i = 0; i < zone_pages; i++)
55e303ae 1654 assert(zone_page_table[i].collect_count == 0);
1c79356b
A
1655#endif /* MACH_ASSERT */
1656
55e303ae 1657 zone_free_pages = NULL;
1c79356b
A
1658
1659 for (i = 0; i < max_zones; i++, z = z->next_zone) {
a3d08fcd 1660 unsigned int n, m;
55e303ae 1661 vm_size_t elt_size, size_freed;
a3d08fcd 1662 struct zone_free_element *elt, *base_elt, *base_prev, *prev, *scan, *keep, *tail;
1c79356b
A
1663
1664 assert(z != ZONE_NULL);
1665
1666 if (!z->collectable)
1667 continue;
1668
1669 lock_zone(z);
1670
55e303ae
A
1671 elt_size = z->elem_size;
1672
1c79356b
A
1673 /*
1674 * Do a quick feasability check before we scan the zone:
91447636
A
1675 * skip unless there is likelihood of getting pages back
1676 * (i.e we need a whole allocation block's worth of free
1677 * elements before we can garbage collect) and
1678 * the zone has more than 10 percent of it's elements free
2d21ac55 1679 * or the element size is a multiple of the PAGE_SIZE
1c79356b 1680 */
2d21ac55
A
1681 if ((elt_size & PAGE_MASK) &&
1682 (((z->cur_size - z->count * elt_size) <= (2 * z->alloc_size)) ||
1683 ((z->cur_size - z->count * elt_size) <= (z->cur_size / 10)))) {
1c79356b
A
1684 unlock_zone(z);
1685 continue;
1686 }
1687
a3d08fcd
A
1688 z->doing_gc = TRUE;
1689
55e303ae
A
1690 /*
1691 * Snatch all of the free elements away from the zone.
1c79356b 1692 */
1c79356b 1693
55e303ae 1694 scan = (void *)z->free_elements;
0c530ab8 1695 z->free_elements = 0;
55e303ae
A
1696
1697 unlock_zone(z);
1698
1699 /*
1700 * Pass 1:
1701 *
1702 * Determine which elements we can attempt to collect
1703 * and count them up in the page table. Foreign elements
1704 * are returned to the zone.
1c79356b 1705 */
55e303ae
A
1706
1707 prev = (void *)&scan;
1708 elt = scan;
1709 n = 0; tail = keep = NULL;
1710 while (elt != NULL) {
1711 if (from_zone_map(elt, elt_size)) {
1712 zone_page_collect((vm_offset_t)elt, elt_size);
1713
1c79356b
A
1714 prev = elt;
1715 elt = elt->next;
55e303ae
A
1716
1717 ++zgc_stats.elems_collected;
1c79356b 1718 }
55e303ae
A
1719 else {
1720 if (keep == NULL)
1721 keep = tail = elt;
2d21ac55
A
1722 else {
1723 ADD_ELEMENT(z, tail, elt);
1724 tail = elt;
1725 }
55e303ae 1726
2d21ac55
A
1727 ADD_ELEMENT(z, prev, elt->next);
1728 elt = elt->next;
1729 ADD_ELEMENT(z, tail, NULL);
1c79356b 1730 }
1c79356b 1731
55e303ae
A
1732 /*
1733 * Dribble back the elements we are keeping.
1734 */
1735
a3d08fcd
A
1736 if (++n >= 50) {
1737 if (z->waiting == TRUE) {
1738 lock_zone(z);
55e303ae 1739
a3d08fcd 1740 if (keep != NULL) {
2d21ac55 1741 ADD_LIST_TO_ZONE(z, keep, tail);
a3d08fcd
A
1742 tail = keep = NULL;
1743 } else {
1744 m =0;
1745 base_elt = elt;
1746 base_prev = prev;
1747 while ((elt != NULL) && (++m < 50)) {
1748 prev = elt;
1749 elt = elt->next;
1750 }
1751 if (m !=0 ) {
2d21ac55
A
1752 ADD_LIST_TO_ZONE(z, base_elt, prev);
1753 ADD_ELEMENT(z, base_prev, elt);
a3d08fcd
A
1754 prev = base_prev;
1755 }
1756 }
55e303ae 1757
a3d08fcd
A
1758 if (z->waiting) {
1759 z->waiting = FALSE;
1760 zone_wakeup(z);
1761 }
55e303ae 1762
a3d08fcd
A
1763 unlock_zone(z);
1764 }
1765 n =0;
55e303ae
A
1766 }
1767 }
1768
1769 /*
1770 * Return any remaining elements.
1771 */
1772
1773 if (keep != NULL) {
1774 lock_zone(z);
1775
2d21ac55 1776 ADD_LIST_TO_ZONE(z, keep, tail);
55e303ae
A
1777
1778 unlock_zone(z);
1779 }
1780
1781 /*
1782 * Pass 2:
1783 *
1784 * Determine which pages we can reclaim and
1785 * free those elements.
1786 */
1787
1788 size_freed = 0;
55e303ae
A
1789 elt = scan;
1790 n = 0; tail = keep = NULL;
1791 while (elt != NULL) {
1792 if (zone_page_collectable((vm_offset_t)elt, elt_size)) {
1793 size_freed += elt_size;
1794 zone_page_free_element(&zone_free_pages,
1795 (vm_offset_t)elt, elt_size);
1796
2d21ac55 1797 elt = elt->next;
55e303ae
A
1798
1799 ++zgc_stats.elems_freed;
1800 }
1801 else {
1802 zone_page_keep((vm_offset_t)elt, elt_size);
1803
1804 if (keep == NULL)
1805 keep = tail = elt;
2d21ac55
A
1806 else {
1807 ADD_ELEMENT(z, tail, elt);
1808 tail = elt;
1809 }
55e303ae 1810
2d21ac55
A
1811 elt = elt->next;
1812 ADD_ELEMENT(z, tail, NULL);
55e303ae
A
1813
1814 ++zgc_stats.elems_kept;
1815 }
1816
1817 /*
1818 * Dribble back the elements we are keeping,
1819 * and update the zone size info.
1820 */
1821
a3d08fcd 1822 if (++n >= 50) {
55e303ae
A
1823 lock_zone(z);
1824
1825 z->cur_size -= size_freed;
1826 size_freed = 0;
1827
a3d08fcd 1828 if (keep != NULL) {
2d21ac55 1829 ADD_LIST_TO_ZONE(z, keep, tail);
a3d08fcd
A
1830 }
1831
1832 if (z->waiting) {
1833 z->waiting = FALSE;
1834 zone_wakeup(z);
1835 }
55e303ae
A
1836
1837 unlock_zone(z);
1838
1839 n = 0; tail = keep = NULL;
1840 }
1841 }
1842
1843 /*
1844 * Return any remaining elements, and update
1845 * the zone size info.
1846 */
1847
a3d08fcd
A
1848 lock_zone(z);
1849
55e303ae 1850 if (size_freed > 0 || keep != NULL) {
55e303ae
A
1851
1852 z->cur_size -= size_freed;
1853
1854 if (keep != NULL) {
2d21ac55 1855 ADD_LIST_TO_ZONE(z, keep, tail);
55e303ae
A
1856 }
1857
55e303ae 1858 }
a3d08fcd
A
1859
1860 z->doing_gc = FALSE;
1861 if (z->waiting) {
1862 z->waiting = FALSE;
1863 zone_wakeup(z);
1864 }
1865 unlock_zone(z);
1c79356b
A
1866 }
1867
55e303ae
A
1868 /*
1869 * Reclaim the pages we are freeing.
1870 */
1c79356b 1871
55e303ae
A
1872 while ((zp = zone_free_pages) != NULL) {
1873 zone_free_pages = zp->link;
2d21ac55
A
1874#if ZONE_ALIAS_ADDR
1875 z = zone_virtual_addr((vm_map_address_t)z);
1876#endif
55e303ae
A
1877 kmem_free(zone_map, zone_map_min_address + PAGE_SIZE *
1878 (zp - zone_page_table), PAGE_SIZE);
1879 ++zgc_stats.pgs_freed;
1c79356b 1880 }
55e303ae 1881
b0d623f7 1882 lck_mtx_unlock(&zone_gc_lock);
1c79356b
A
1883}
1884
1c79356b
A
1885/*
1886 * consider_zone_gc:
1887 *
1888 * Called by the pageout daemon when the system needs more free pages.
1889 */
1890
1891void
b0d623f7 1892consider_zone_gc(boolean_t force)
1c79356b
A
1893{
1894 /*
1895 * By default, don't attempt zone GC more frequently
91447636 1896 * than once / 1 minutes.
1c79356b
A
1897 */
1898
1899 if (zone_gc_max_rate == 0)
91447636 1900 zone_gc_max_rate = (60 << SCHED_TICK_SHIFT) + 1;
1c79356b
A
1901
1902 if (zone_gc_allowed &&
0b4e3aa0 1903 ((sched_tick > (zone_gc_last_tick + zone_gc_max_rate)) ||
b0d623f7
A
1904 zone_gc_forced ||
1905 force)) {
0b4e3aa0 1906 zone_gc_forced = FALSE;
1c79356b
A
1907 zone_gc_last_tick = sched_tick;
1908 zone_gc();
1909 }
1910}
1911
2d21ac55
A
1912struct fake_zone_info {
1913 const char* name;
1914 void (*func)(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *,
1915 int *, int *);
1916};
1917
1918static struct fake_zone_info fake_zones[] = {
1919 {
1920 .name = "kernel_stacks",
1921 .func = stack_fake_zone_info,
1922 },
1923#ifdef ppc
1924 {
1925 .name = "save_areas",
1926 .func = save_fake_zone_info,
1927 },
1928 {
1929 .name = "pmap_mappings",
1930 .func = mapping_fake_zone_info,
1931 },
1932#endif /* ppc */
b0d623f7 1933#if defined(__i386__) || defined (__x86_64__)
2d21ac55
A
1934 {
1935 .name = "page_tables",
1936 .func = pt_fake_zone_info,
1937 },
1938#endif /* i386 */
1939 {
1940 .name = "kalloc.large",
1941 .func = kalloc_fake_zone_info,
1942 },
1943};
1c79356b
A
1944
1945kern_return_t
1946host_zone_info(
1947 host_t host,
1948 zone_name_array_t *namesp,
1949 mach_msg_type_number_t *namesCntp,
1950 zone_info_array_t *infop,
1951 mach_msg_type_number_t *infoCntp)
1952{
1953 zone_name_t *names;
1954 vm_offset_t names_addr;
1955 vm_size_t names_size;
1956 zone_info_t *info;
1957 vm_offset_t info_addr;
1958 vm_size_t info_size;
1959 unsigned int max_zones, i;
1960 zone_t z;
1961 zone_name_t *zn;
1962 zone_info_t *zi;
1963 kern_return_t kr;
2d21ac55 1964 size_t num_fake_zones;
1c79356b 1965
b0d623f7 1966
1c79356b
A
1967 if (host == HOST_NULL)
1968 return KERN_INVALID_HOST;
1969
b0d623f7
A
1970#if defined(__LP64__)
1971 if (!thread_is_64bit(current_thread()))
1972 return KERN_NOT_SUPPORTED;
1973#else
1974 if (thread_is_64bit(current_thread()))
1975 return KERN_NOT_SUPPORTED;
1976#endif
1977
2d21ac55
A
1978 num_fake_zones = sizeof fake_zones / sizeof fake_zones[0];
1979
1c79356b
A
1980 /*
1981 * We assume that zones aren't freed once allocated.
1982 * We won't pick up any zones that are allocated later.
1983 */
1984
1985 simple_lock(&all_zones_lock);
b0d623f7 1986 max_zones = (unsigned int)(num_zones + num_fake_zones);
1c79356b
A
1987 z = first_zone;
1988 simple_unlock(&all_zones_lock);
1989
1990 if (max_zones <= *namesCntp) {
1991 /* use in-line memory */
91447636 1992 names_size = *namesCntp * sizeof *names;
1c79356b
A
1993 names = *namesp;
1994 } else {
91447636 1995 names_size = round_page(max_zones * sizeof *names);
1c79356b
A
1996 kr = kmem_alloc_pageable(ipc_kernel_map,
1997 &names_addr, names_size);
1998 if (kr != KERN_SUCCESS)
1999 return kr;
2000 names = (zone_name_t *) names_addr;
2001 }
2002
2003 if (max_zones <= *infoCntp) {
2004 /* use in-line memory */
91447636 2005 info_size = *infoCntp * sizeof *info;
1c79356b
A
2006 info = *infop;
2007 } else {
91447636 2008 info_size = round_page(max_zones * sizeof *info);
1c79356b
A
2009 kr = kmem_alloc_pageable(ipc_kernel_map,
2010 &info_addr, info_size);
2011 if (kr != KERN_SUCCESS) {
2012 if (names != *namesp)
2013 kmem_free(ipc_kernel_map,
2014 names_addr, names_size);
2015 return kr;
2016 }
2017
2018 info = (zone_info_t *) info_addr;
2019 }
2020 zn = &names[0];
2021 zi = &info[0];
2022
2023 for (i = 0; i < num_zones; i++) {
2024 struct zone zcopy;
2025
2026 assert(z != ZONE_NULL);
2027
2028 lock_zone(z);
2029 zcopy = *z;
2030 unlock_zone(z);
2031
2032 simple_lock(&all_zones_lock);
2033 z = z->next_zone;
2034 simple_unlock(&all_zones_lock);
2035
2036 /* assuming here the name data is static */
2037 (void) strncpy(zn->zn_name, zcopy.zone_name,
2038 sizeof zn->zn_name);
2d21ac55 2039 zn->zn_name[sizeof zn->zn_name - 1] = '\0';
1c79356b
A
2040
2041 zi->zi_count = zcopy.count;
2042 zi->zi_cur_size = zcopy.cur_size;
2043 zi->zi_max_size = zcopy.max_size;
2044 zi->zi_elem_size = zcopy.elem_size;
2045 zi->zi_alloc_size = zcopy.alloc_size;
2046 zi->zi_exhaustible = zcopy.exhaustible;
2047 zi->zi_collectable = zcopy.collectable;
2048
2049 zn++;
2050 zi++;
2051 }
0c530ab8 2052
2d21ac55
A
2053 /*
2054 * loop through the fake zones and fill them using the specialized
2055 * functions
2056 */
2057 for (i = 0; i < num_fake_zones; i++) {
2058 strncpy(zn->zn_name, fake_zones[i].name, sizeof zn->zn_name);
2059 zn->zn_name[sizeof zn->zn_name - 1] = '\0';
2060 fake_zones[i].func(&zi->zi_count, &zi->zi_cur_size,
2061 &zi->zi_max_size, &zi->zi_elem_size,
2062 &zi->zi_alloc_size, &zi->zi_collectable,
2063 &zi->zi_exhaustible);
2064 zn++;
2065 zi++;
2066 }
1c79356b
A
2067
2068 if (names != *namesp) {
2069 vm_size_t used;
2070 vm_map_copy_t copy;
2071
2072 used = max_zones * sizeof *names;
2073
2074 if (used != names_size)
2075 bzero((char *) (names_addr + used), names_size - used);
2076
91447636
A
2077 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
2078 (vm_map_size_t)names_size, TRUE, &copy);
1c79356b
A
2079 assert(kr == KERN_SUCCESS);
2080
2081 *namesp = (zone_name_t *) copy;
2082 }
2083 *namesCntp = max_zones;
2084
2085 if (info != *infop) {
2086 vm_size_t used;
2087 vm_map_copy_t copy;
2088
2089 used = max_zones * sizeof *info;
2090
2091 if (used != info_size)
2092 bzero((char *) (info_addr + used), info_size - used);
2093
91447636
A
2094 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
2095 (vm_map_size_t)info_size, TRUE, &copy);
1c79356b
A
2096 assert(kr == KERN_SUCCESS);
2097
2098 *infop = (zone_info_t *) copy;
2099 }
2100 *infoCntp = max_zones;
2101
2102 return KERN_SUCCESS;
2103}
2104
b0d623f7
A
2105extern unsigned int stack_total;
2106
2107#if defined(__i386__) || defined (__x86_64__)
2108extern unsigned int inuse_ptepages_count;
2109#endif
2110
2111void zone_display_zprint()
2112{
2113 unsigned int i;
2114 zone_t the_zone;
2115
2116 if(first_zone!=NULL) {
2117 the_zone = first_zone;
2118 for (i = 0; i < num_zones; i++) {
2119 if(the_zone->cur_size > (1024*1024)) {
2120 printf("%.20s:\t%lu\n",the_zone->zone_name,(uintptr_t)the_zone->cur_size);
2121 }
2122
2123 if(the_zone->next_zone == NULL) {
2124 break;
2125 }
2126
2127 the_zone = the_zone->next_zone;
2128 }
2129 }
2130
2131 printf("Kernel Stacks:\t%lu\n",(uintptr_t)(kernel_stack_size * stack_total));
2132
2133#if defined(__i386__) || defined (__x86_64__)
2134 printf("PageTables:\t%lu\n",(uintptr_t)(PAGE_SIZE * inuse_ptepages_count));
2135#endif
2136
2137 printf("Kalloc.Large:\t%lu\n",(uintptr_t)kalloc_large_total);
2138}
2139
2140
2141
1c79356b
A
2142#if MACH_KDB
2143#include <ddb/db_command.h>
2144#include <ddb/db_output.h>
2145#include <kern/kern_print.h>
2146
2147const char *zone_labels =
2148"ENTRY COUNT TOT_SZ MAX_SZ ELT_SZ ALLOC_SZ NAME";
2149
2150/* Forwards */
2151void db_print_zone(
2152 zone_t addr);
2153
2154#if ZONE_DEBUG
2155void db_zone_check_active(
2156 zone_t zone);
2157void db_zone_print_active(
2158 zone_t zone);
2159#endif /* ZONE_DEBUG */
2160void db_zone_print_free(
2161 zone_t zone);
2162void
2163db_print_zone(
2164 zone_t addr)
2165{
2166 struct zone zcopy;
2167
2168 zcopy = *addr;
2169
2170 db_printf("%8x %8x %8x %8x %6x %8x %s ",
2171 addr, zcopy.count, zcopy.cur_size,
2172 zcopy.max_size, zcopy.elem_size,
2173 zcopy.alloc_size, zcopy.zone_name);
2174 if (zcopy.exhaustible)
2175 db_printf("H");
2176 if (zcopy.collectable)
2177 db_printf("C");
2178 if (zcopy.expandable)
2179 db_printf("X");
2180 db_printf("\n");
2181}
2182
2183/*ARGSUSED*/
2184void
2d21ac55
A
2185db_show_one_zone(db_expr_t addr, boolean_t have_addr,
2186 __unused db_expr_t count, __unused char *modif)
1c79356b 2187{
91447636 2188 struct zone *z = (zone_t)((char *)0 + addr);
1c79356b
A
2189
2190 if (z == ZONE_NULL || !have_addr){
2191 db_error("No Zone\n");
2192 /*NOTREACHED*/
2193 }
2194
2195 db_printf("%s\n", zone_labels);
2196 db_print_zone(z);
2197}
2198
2199/*ARGSUSED*/
2200void
2d21ac55
A
2201db_show_all_zones(__unused db_expr_t addr, boolean_t have_addr, db_expr_t count,
2202 __unused char *modif)
1c79356b
A
2203{
2204 zone_t z;
2205 unsigned total = 0;
2206
2207 /*
2208 * Don't risk hanging by unconditionally locking,
2209 * risk of incoherent data is small (zones aren't freed).
2210 */
2211 have_addr = simple_lock_try(&all_zones_lock);
2212 count = num_zones;
2213 z = first_zone;
2214 if (have_addr) {
2215 simple_unlock(&all_zones_lock);
2216 }
2217
2218 db_printf("%s\n", zone_labels);
2219 for ( ; count > 0; count--) {
2220 if (!z) {
2221 db_error("Mangled Zone List\n");
2222 /*NOTREACHED*/
2223 }
2224 db_print_zone(z);
2225 total += z->cur_size,
2226
2227 have_addr = simple_lock_try(&all_zones_lock);
2228 z = z->next_zone;
2229 if (have_addr) {
2230 simple_unlock(&all_zones_lock);
2231 }
2232 }
2233 db_printf("\nTotal %8x", total);
55e303ae 2234 db_printf("\n\nzone_gc() has reclaimed %d pages\n", zgc_stats.pgs_freed);
1c79356b
A
2235}
2236
2237#if ZONE_DEBUG
2238void
2239db_zone_check_active(
2240 zone_t zone)
2241{
2242 int count = 0;
2243 queue_t tmp_elem;
2244
2245 if (!zone_debug_enabled(zone) || !zone_check)
2246 return;
2247 tmp_elem = queue_first(&zone->active_zones);
2248 while (count < zone->count) {
2249 count++;
2250 if (tmp_elem == 0) {
2d21ac55 2251 printf("unexpected zero element, zone=%p, count=%d\n",
1c79356b
A
2252 zone, count);
2253 assert(FALSE);
2254 break;
2255 }
2256 if (queue_end(tmp_elem, &zone->active_zones)) {
2d21ac55 2257 printf("unexpected queue_end, zone=%p, count=%d\n",
1c79356b
A
2258 zone, count);
2259 assert(FALSE);
2260 break;
2261 }
2262 tmp_elem = queue_next(tmp_elem);
2263 }
2264 if (!queue_end(tmp_elem, &zone->active_zones)) {
2d21ac55 2265 printf("not at queue_end, zone=%p, tmp_elem=%p\n",
1c79356b
A
2266 zone, tmp_elem);
2267 assert(FALSE);
2268 }
2269}
2270
2271void
2272db_zone_print_active(
2273 zone_t zone)
2274{
2275 int count = 0;
2276 queue_t tmp_elem;
2277
2278 if (!zone_debug_enabled(zone)) {
2d21ac55 2279 printf("zone %p debug not enabled\n", zone);
1c79356b
A
2280 return;
2281 }
2282 if (!zone_check) {
2283 printf("zone_check FALSE\n");
2284 return;
2285 }
2286
2d21ac55 2287 printf("zone %p, active elements %d\n", zone, zone->count);
1c79356b
A
2288 printf("active list:\n");
2289 tmp_elem = queue_first(&zone->active_zones);
2290 while (count < zone->count) {
2d21ac55 2291 printf(" %p", tmp_elem);
1c79356b
A
2292 count++;
2293 if ((count % 6) == 0)
2294 printf("\n");
2295 if (tmp_elem == 0) {
2296 printf("\nunexpected zero element, count=%d\n", count);
2297 break;
2298 }
2299 if (queue_end(tmp_elem, &zone->active_zones)) {
2300 printf("\nunexpected queue_end, count=%d\n", count);
2301 break;
2302 }
2303 tmp_elem = queue_next(tmp_elem);
2304 }
2305 if (!queue_end(tmp_elem, &zone->active_zones))
2d21ac55 2306 printf("\nnot at queue_end, tmp_elem=%p\n", tmp_elem);
1c79356b
A
2307 else
2308 printf("\n");
2309}
2310#endif /* ZONE_DEBUG */
2311
2312void
2313db_zone_print_free(
2314 zone_t zone)
2315{
2316 int count = 0;
2317 int freecount;
2318 vm_offset_t elem;
2319
2320 freecount = zone_free_count(zone);
2d21ac55 2321 printf("zone %p, free elements %d\n", zone, freecount);
1c79356b
A
2322 printf("free list:\n");
2323 elem = zone->free_elements;
2324 while (count < freecount) {
2325 printf(" 0x%x", elem);
2326 count++;
2327 if ((count % 6) == 0)
2328 printf("\n");
2329 if (elem == 0) {
2330 printf("\nunexpected zero element, count=%d\n", count);
2331 break;
2332 }
2333 elem = *((vm_offset_t *)elem);
2334 }
2335 if (elem != 0)
2336 printf("\nnot at end of free list, elem=0x%x\n", elem);
2337 else
2338 printf("\n");
2339}
2340
2341#endif /* MACH_KDB */
2342
2343
2344#if ZONE_DEBUG
2345
2346/* should we care about locks here ? */
2347
2348#if MACH_KDB
91447636 2349void *
1c79356b
A
2350next_element(
2351 zone_t z,
91447636 2352 void *prev)
1c79356b 2353{
91447636
A
2354 char *elt = (char *)prev;
2355
1c79356b 2356 if (!zone_debug_enabled(z))
2d21ac55 2357 return(NULL);
55e303ae 2358 elt -= ZONE_DEBUG_OFFSET;
91447636 2359 elt = (char *) queue_next((queue_t) elt);
1c79356b 2360 if ((queue_t) elt == &z->active_zones)
2d21ac55 2361 return(NULL);
55e303ae 2362 elt += ZONE_DEBUG_OFFSET;
1c79356b
A
2363 return(elt);
2364}
2365
91447636 2366void *
1c79356b
A
2367first_element(
2368 zone_t z)
2369{
91447636 2370 char *elt;
1c79356b
A
2371
2372 if (!zone_debug_enabled(z))
2d21ac55 2373 return(NULL);
1c79356b 2374 if (queue_empty(&z->active_zones))
2d21ac55 2375 return(NULL);
91447636 2376 elt = (char *)queue_first(&z->active_zones);
55e303ae 2377 elt += ZONE_DEBUG_OFFSET;
1c79356b
A
2378 return(elt);
2379}
2380
2381/*
2382 * Second arg controls how many zone elements are printed:
2383 * 0 => none
2384 * n, n < 0 => all
2385 * n, n > 0 => last n on active list
2386 */
2387int
2388zone_count(
2389 zone_t z,
2390 int tail)
2391{
91447636 2392 void *elt;
1c79356b
A
2393 int count = 0;
2394 boolean_t print = (tail != 0);
2395
2396 if (tail < 0)
2397 tail = z->count;
2398 if (z->count < tail)
2399 tail = 0;
2400 tail = z->count - tail;
2401 for (elt = first_element(z); elt; elt = next_element(z, elt)) {
2402 if (print && tail <= count)
2403 db_printf("%8x\n", elt);
2404 count++;
2405 }
2406 assert(count == z->count);
2407 return(count);
2408}
2409#endif /* MACH_KDB */
2410
2411#define zone_in_use(z) ( z->count || z->free_elements )
2412
2413void
2414zone_debug_enable(
2415 zone_t z)
2416{
2417 if (zone_debug_enabled(z) || zone_in_use(z) ||
55e303ae 2418 z->alloc_size < (z->elem_size + ZONE_DEBUG_OFFSET))
1c79356b
A
2419 return;
2420 queue_init(&z->active_zones);
55e303ae 2421 z->elem_size += ZONE_DEBUG_OFFSET;
1c79356b
A
2422}
2423
2424void
2425zone_debug_disable(
2426 zone_t z)
2427{
2428 if (!zone_debug_enabled(z) || zone_in_use(z))
2429 return;
55e303ae 2430 z->elem_size -= ZONE_DEBUG_OFFSET;
2d21ac55 2431 z->active_zones.next = z->active_zones.prev = NULL;
1c79356b 2432}
b0d623f7
A
2433
2434
1c79356b 2435#endif /* ZONE_DEBUG */