]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
6d2010ae | 2 | * Copyright (c) 2000-2009 Apple Inc. All rights reserved. |
1c79356b | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | * @OSF_COPYRIGHT@ | |
30 | */ | |
31 | /* | |
32 | * Mach Operating System | |
33 | * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University | |
34 | * All Rights Reserved. | |
35 | * | |
36 | * Permission to use, copy, modify and distribute this software and its | |
37 | * documentation is hereby granted, provided that both the copyright | |
38 | * notice and this permission notice appear in all copies of the | |
39 | * software, derivative works or modified versions, and any portions | |
40 | * thereof, and that both notices appear in supporting documentation. | |
41 | * | |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR | |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
45 | * | |
46 | * Carnegie Mellon requests users of this software to return to | |
47 | * | |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
49 | * School of Computer Science | |
50 | * Carnegie Mellon University | |
51 | * Pittsburgh PA 15213-3890 | |
52 | * | |
53 | * any improvements or extensions that they make and grant Carnegie Mellon | |
54 | * the rights to redistribute these changes. | |
55 | */ | |
56 | /* | |
57 | */ | |
58 | /* | |
59 | * File: kern/zalloc.c | |
60 | * Author: Avadis Tevanian, Jr. | |
61 | * | |
62 | * Zone-based memory allocator. A zone is a collection of fixed size | |
63 | * data blocks for which quick allocation/deallocation is possible. | |
64 | */ | |
65 | #include <zone_debug.h> | |
2d21ac55 | 66 | #include <zone_alias_addr.h> |
1c79356b A |
67 | #include <norma_vm.h> |
68 | #include <mach_kdb.h> | |
91447636 A |
69 | |
70 | #include <mach/mach_types.h> | |
71 | #include <mach/vm_param.h> | |
72 | #include <mach/kern_return.h> | |
73 | #include <mach/mach_host_server.h> | |
6d2010ae | 74 | #include <mach/task_server.h> |
91447636 A |
75 | #include <mach/machine/vm_types.h> |
76 | #include <mach_debug/zone_info.h> | |
77 | ||
78 | #include <kern/kern_types.h> | |
1c79356b | 79 | #include <kern/assert.h> |
91447636 | 80 | #include <kern/host.h> |
1c79356b A |
81 | #include <kern/macro_help.h> |
82 | #include <kern/sched.h> | |
b0d623f7 | 83 | #include <kern/locks.h> |
1c79356b A |
84 | #include <kern/sched_prim.h> |
85 | #include <kern/misc_protos.h> | |
0b4e3aa0 | 86 | #include <kern/thread_call.h> |
1c79356b | 87 | #include <kern/zalloc.h> |
91447636 A |
88 | #include <kern/kalloc.h> |
89 | ||
90 | #include <vm/pmap.h> | |
91 | #include <vm/vm_map.h> | |
1c79356b | 92 | #include <vm/vm_kern.h> |
91447636 A |
93 | #include <vm/vm_page.h> |
94 | ||
1c79356b A |
95 | #include <machine/machparam.h> |
96 | ||
2d21ac55 | 97 | #include <libkern/OSDebug.h> |
7ddcb079 | 98 | #include <libkern/OSAtomic.h> |
2d21ac55 A |
99 | #include <sys/kdebug.h> |
100 | ||
c910b4d9 A |
101 | /* |
102 | * Zone Corruption Debugging | |
103 | * | |
104 | * We provide three methods to detect use of a zone element after it's been freed. These | |
105 | * checks are enabled by specifying "-zc" and/or "-zp" in the boot-args: | |
106 | * | |
1c79356b A |
107 | * (1) Range-check the free-list "next" ptr for sanity. |
108 | * (2) Store the ptr in two different words, and compare them against | |
c910b4d9 A |
109 | * each other when re-using the zone element, to detect modifications. |
110 | * (3) poison the freed memory by overwriting it with 0xdeadbeef. | |
111 | * | |
6d2010ae | 112 | * The first two checks are fairly light weight and are enabled by specifying "-zc" |
c910b4d9 A |
113 | * in the boot-args. If you want more aggressive checking for use-after-free bugs |
114 | * and you don't mind the additional overhead, then turn on poisoning by adding | |
115 | * "-zp" to the boot-args in addition to "-zc". If you specify -zp without -zc, | |
116 | * it still poisons the memory when it's freed, but doesn't check if the memory | |
117 | * has been altered later when it's reallocated. | |
1c79356b A |
118 | */ |
119 | ||
c910b4d9 A |
120 | boolean_t check_freed_element = FALSE; /* enabled by -zc in boot-args */ |
121 | boolean_t zfree_clear = FALSE; /* enabled by -zp in boot-args */ | |
1c79356b | 122 | |
6d2010ae A |
123 | /* |
124 | * Fake zones for things that want to report via zprint but are not actually zones. | |
125 | */ | |
126 | struct fake_zone_info { | |
127 | const char* name; | |
128 | void (*init)(int); | |
129 | void (*query)(int *, | |
130 | vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *, | |
131 | uint64_t *, int *, int *, int *); | |
132 | }; | |
133 | ||
134 | static struct fake_zone_info fake_zones[] = { | |
135 | { | |
136 | .name = "kernel_stacks", | |
137 | .init = stack_fake_zone_init, | |
138 | .query = stack_fake_zone_info, | |
139 | }, | |
140 | #if defined(__i386__) || defined (__x86_64__) | |
141 | { | |
142 | .name = "page_tables", | |
143 | .init = pt_fake_zone_init, | |
144 | .query = pt_fake_zone_info, | |
145 | }, | |
146 | #endif /* i386 */ | |
147 | { | |
148 | .name = "kalloc.large", | |
149 | .init = kalloc_fake_zone_init, | |
150 | .query = kalloc_fake_zone_info, | |
151 | }, | |
152 | }; | |
153 | unsigned int num_fake_zones = sizeof(fake_zones)/sizeof(fake_zones[0]); | |
154 | ||
155 | /* | |
156 | * Zone info options | |
157 | */ | |
158 | boolean_t zinfo_per_task = FALSE; /* enabled by -zinfop in boot-args */ | |
159 | #define ZINFO_SLOTS 200 /* for now */ | |
160 | #define ZONES_MAX (ZINFO_SLOTS - num_fake_zones - 1) | |
161 | ||
162 | /* | |
163 | * Allocation helper macros | |
164 | */ | |
c910b4d9 | 165 | #define is_kernel_data_addr(a) (!(a) || ((a) >= vm_min_kernel_address && !((a) & 0x3))) |
1c79356b A |
166 | |
167 | #define ADD_TO_ZONE(zone, element) \ | |
168 | MACRO_BEGIN \ | |
c910b4d9 A |
169 | if (zfree_clear) \ |
170 | { unsigned int i; \ | |
171 | for (i=0; \ | |
172 | i < zone->elem_size/sizeof(uint32_t); \ | |
173 | i++) \ | |
174 | ((uint32_t *)(element))[i] = 0xdeadbeef; \ | |
1c79356b | 175 | } \ |
c910b4d9 A |
176 | *((vm_offset_t *)(element)) = (zone)->free_elements; \ |
177 | if (check_freed_element) { \ | |
178 | if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \ | |
179 | ((vm_offset_t *)(element))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \ | |
180 | (zone)->free_elements; \ | |
1c79356b | 181 | } \ |
c910b4d9 A |
182 | (zone)->free_elements = (vm_offset_t) (element); \ |
183 | (zone)->count--; \ | |
1c79356b A |
184 | MACRO_END |
185 | ||
c910b4d9 A |
186 | #define REMOVE_FROM_ZONE(zone, ret, type) \ |
187 | MACRO_BEGIN \ | |
188 | (ret) = (type) (zone)->free_elements; \ | |
189 | if ((ret) != (type) 0) { \ | |
190 | if (check_freed_element) { \ | |
191 | if (!is_kernel_data_addr(((vm_offset_t *)(ret))[0]) || \ | |
192 | ((zone)->elem_size >= (2 * sizeof(vm_offset_t)) && \ | |
193 | ((vm_offset_t *)(ret))[((zone)->elem_size/sizeof(vm_offset_t))-1] != \ | |
194 | ((vm_offset_t *)(ret))[0])) \ | |
195 | panic("a freed zone element has been modified");\ | |
196 | if (zfree_clear) { \ | |
197 | unsigned int ii; \ | |
198 | for (ii = sizeof(vm_offset_t) / sizeof(uint32_t); \ | |
6d2010ae | 199 | ii < (zone)->elem_size/sizeof(uint32_t) - sizeof(vm_offset_t) / sizeof(uint32_t); \ |
c910b4d9 A |
200 | ii++) \ |
201 | if (((uint32_t *)(ret))[ii] != (uint32_t)0xdeadbeef) \ | |
202 | panic("a freed zone element has been modified");\ | |
203 | } \ | |
204 | } \ | |
205 | (zone)->count++; \ | |
6d2010ae | 206 | (zone)->sum_count++; \ |
c910b4d9 A |
207 | (zone)->free_elements = *((vm_offset_t *)(ret)); \ |
208 | } \ | |
209 | MACRO_END | |
1c79356b A |
210 | |
211 | #if ZONE_DEBUG | |
212 | #define zone_debug_enabled(z) z->active_zones.next | |
55e303ae A |
213 | #define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y)) |
214 | #define ZONE_DEBUG_OFFSET ROUNDUP(sizeof(queue_chain_t),16) | |
1c79356b A |
215 | #endif /* ZONE_DEBUG */ |
216 | ||
217 | /* | |
7ddcb079 A |
218 | * Support for garbage collection of unused zone pages |
219 | * | |
220 | * The kernel virtually allocates the "zone map" submap of the kernel | |
221 | * map. When an individual zone needs more storage, memory is allocated | |
222 | * out of the zone map, and the two-level "zone_page_table" is | |
223 | * on-demand expanded so that it has entries for those pages. | |
224 | * zone_page_init()/zone_page_alloc() initialize "alloc_count" | |
225 | * to the number of zone elements that occupy the zone page (which may | |
226 | * be a minimum of 1, including if a zone element spans multiple | |
227 | * pages). | |
228 | * | |
229 | * Asynchronously, the zone_gc() logic attempts to walk zone free | |
230 | * lists to see if all the elements on a zone page are free. If | |
231 | * "collect_count" (which it increments during the scan) matches | |
232 | * "alloc_count", the zone page is a candidate for collection and the | |
233 | * physical page is returned to the VM system. During this process, the | |
234 | * first word of the zone page is re-used to maintain a linked list of | |
235 | * to-be-collected zone pages. | |
1c79356b | 236 | */ |
7ddcb079 A |
237 | typedef uint32_t zone_page_index_t; |
238 | #define ZONE_PAGE_INDEX_INVALID ((zone_page_index_t)0xFFFFFFFFU) | |
1c79356b A |
239 | |
240 | struct zone_page_table_entry { | |
7ddcb079 A |
241 | volatile uint16_t alloc_count; |
242 | volatile uint16_t collect_count; | |
1c79356b A |
243 | }; |
244 | ||
7ddcb079 A |
245 | #define ZONE_PAGE_USED 0 |
246 | #define ZONE_PAGE_UNUSED 0xffff | |
247 | ||
1c79356b A |
248 | /* Forwards */ |
249 | void zone_page_init( | |
250 | vm_offset_t addr, | |
7ddcb079 | 251 | vm_size_t size); |
1c79356b A |
252 | |
253 | void zone_page_alloc( | |
254 | vm_offset_t addr, | |
255 | vm_size_t size); | |
256 | ||
55e303ae | 257 | void zone_page_free_element( |
7ddcb079 | 258 | zone_page_index_t *free_page_list, |
1c79356b A |
259 | vm_offset_t addr, |
260 | vm_size_t size); | |
261 | ||
55e303ae | 262 | void zone_page_collect( |
1c79356b A |
263 | vm_offset_t addr, |
264 | vm_size_t size); | |
265 | ||
266 | boolean_t zone_page_collectable( | |
267 | vm_offset_t addr, | |
268 | vm_size_t size); | |
269 | ||
270 | void zone_page_keep( | |
271 | vm_offset_t addr, | |
272 | vm_size_t size); | |
273 | ||
0b4e3aa0 A |
274 | void zalloc_async( |
275 | thread_call_param_t p0, | |
276 | thread_call_param_t p1); | |
277 | ||
b0d623f7 | 278 | void zone_display_zprint( void ); |
0b4e3aa0 | 279 | |
1c79356b A |
280 | #if ZONE_DEBUG && MACH_KDB |
281 | int zone_count( | |
282 | zone_t z, | |
283 | int tail); | |
284 | #endif /* ZONE_DEBUG && MACH_KDB */ | |
285 | ||
286 | vm_map_t zone_map = VM_MAP_NULL; | |
287 | ||
288 | zone_t zone_zone = ZONE_NULL; /* the zone containing other zones */ | |
289 | ||
6d2010ae A |
290 | zone_t zinfo_zone = ZONE_NULL; /* zone of per-task zone info */ |
291 | ||
1c79356b A |
292 | /* |
293 | * The VM system gives us an initial chunk of memory. | |
294 | * It has to be big enough to allocate the zone_zone | |
7ddcb079 | 295 | * all the way through the pmap zone. |
1c79356b A |
296 | */ |
297 | ||
298 | vm_offset_t zdata; | |
299 | vm_size_t zdata_size; | |
300 | ||
301 | #define lock_zone(zone) \ | |
302 | MACRO_BEGIN \ | |
b0d623f7 | 303 | lck_mtx_lock_spin(&(zone)->lock); \ |
1c79356b A |
304 | MACRO_END |
305 | ||
306 | #define unlock_zone(zone) \ | |
307 | MACRO_BEGIN \ | |
2d21ac55 | 308 | lck_mtx_unlock(&(zone)->lock); \ |
1c79356b A |
309 | MACRO_END |
310 | ||
9bccf70c A |
311 | #define zone_wakeup(zone) thread_wakeup((event_t)(zone)) |
312 | #define zone_sleep(zone) \ | |
b0d623f7 | 313 | (void) lck_mtx_sleep(&(zone)->lock, LCK_SLEEP_SPIN, (event_t)(zone), THREAD_UNINT); |
2d21ac55 | 314 | |
9bccf70c | 315 | |
1c79356b A |
316 | #define lock_zone_init(zone) \ |
317 | MACRO_BEGIN \ | |
2d21ac55 A |
318 | char _name[32]; \ |
319 | (void) snprintf(_name, sizeof (_name), "zone.%s", (zone)->zone_name); \ | |
320 | lck_grp_attr_setdefault(&(zone)->lock_grp_attr); \ | |
321 | lck_grp_init(&(zone)->lock_grp, _name, &(zone)->lock_grp_attr); \ | |
322 | lck_attr_setdefault(&(zone)->lock_attr); \ | |
323 | lck_mtx_init_ext(&(zone)->lock, &(zone)->lock_ext, \ | |
324 | &(zone)->lock_grp, &(zone)->lock_attr); \ | |
1c79356b A |
325 | MACRO_END |
326 | ||
b0d623f7 | 327 | #define lock_try_zone(zone) lck_mtx_try_lock_spin(&zone->lock) |
1c79356b | 328 | |
1c79356b A |
329 | /* |
330 | * Garbage collection map information | |
331 | */ | |
7ddcb079 A |
332 | #define ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE (32) |
333 | struct zone_page_table_entry * volatile zone_page_table[ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE]; | |
334 | vm_size_t zone_page_table_used_size; | |
1c79356b A |
335 | vm_offset_t zone_map_min_address; |
336 | vm_offset_t zone_map_max_address; | |
91447636 | 337 | unsigned int zone_pages; |
7ddcb079 A |
338 | unsigned int zone_page_table_second_level_size; /* power of 2 */ |
339 | unsigned int zone_page_table_second_level_shift_amount; | |
340 | ||
341 | #define zone_page_table_first_level_slot(x) ((x) >> zone_page_table_second_level_shift_amount) | |
342 | #define zone_page_table_second_level_slot(x) ((x) & (zone_page_table_second_level_size - 1)) | |
343 | ||
344 | void zone_page_table_expand(zone_page_index_t pindex); | |
345 | struct zone_page_table_entry *zone_page_table_lookup(zone_page_index_t pindex); | |
1c79356b A |
346 | |
347 | /* | |
348 | * Exclude more than one concurrent garbage collection | |
349 | */ | |
b0d623f7 A |
350 | decl_lck_mtx_data(, zone_gc_lock) |
351 | ||
352 | lck_attr_t zone_lck_attr; | |
353 | lck_grp_t zone_lck_grp; | |
354 | lck_grp_attr_t zone_lck_grp_attr; | |
355 | lck_mtx_ext_t zone_lck_ext; | |
356 | ||
1c79356b | 357 | |
2d21ac55 | 358 | #if !ZONE_ALIAS_ADDR |
55e303ae | 359 | #define from_zone_map(addr, size) \ |
1c79356b | 360 | ((vm_offset_t)(addr) >= zone_map_min_address && \ |
55e303ae | 361 | ((vm_offset_t)(addr) + size -1) < zone_map_max_address) |
2d21ac55 A |
362 | #else |
363 | #define from_zone_map(addr, size) \ | |
364 | ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) >= zone_map_min_address && \ | |
365 | ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) + size -1) < zone_map_max_address) | |
366 | #endif | |
1c79356b | 367 | |
1c79356b A |
368 | /* |
369 | * Protects first_zone, last_zone, num_zones, | |
370 | * and the next_zone field of zones. | |
371 | */ | |
372 | decl_simple_lock_data(, all_zones_lock) | |
373 | zone_t first_zone; | |
374 | zone_t *last_zone; | |
91447636 | 375 | unsigned int num_zones; |
1c79356b | 376 | |
0b4e3aa0 A |
377 | boolean_t zone_gc_allowed = TRUE; |
378 | boolean_t zone_gc_forced = FALSE; | |
c910b4d9 | 379 | boolean_t panic_include_zprint = FALSE; |
6d2010ae | 380 | boolean_t zone_gc_allowed_by_time_throttle = TRUE; |
0b4e3aa0 | 381 | |
c910b4d9 A |
382 | /* |
383 | * Zone leak debugging code | |
384 | * | |
385 | * When enabled, this code keeps a log to track allocations to a particular zone that have not | |
386 | * yet been freed. Examining this log will reveal the source of a zone leak. The log is allocated | |
387 | * only when logging is enabled, so there is no effect on the system when it's turned off. Logging is | |
388 | * off by default. | |
389 | * | |
390 | * Enable the logging via the boot-args. Add the parameter "zlog=<zone>" to boot-args where <zone> | |
391 | * is the name of the zone you wish to log. | |
392 | * | |
393 | * This code only tracks one zone, so you need to identify which one is leaking first. | |
394 | * Generally, you'll know you have a leak when you get a "zalloc retry failed 3" panic from the zone | |
395 | * garbage collector. Note that the zone name printed in the panic message is not necessarily the one | |
396 | * containing the leak. So do a zprint from gdb and locate the zone with the bloated size. This | |
397 | * is most likely the problem zone, so set zlog in boot-args to this zone name, reboot and re-run the test. The | |
398 | * next time it panics with this message, examine the log using the kgmacros zstack, findoldest and countpcs. | |
399 | * See the help in the kgmacros for usage info. | |
400 | * | |
401 | * | |
402 | * Zone corruption logging | |
403 | * | |
404 | * Logging can also be used to help identify the source of a zone corruption. First, identify the zone | |
405 | * that is being corrupted, then add "-zc zlog=<zone name>" to the boot-args. When -zc is used in conjunction | |
406 | * with zlog, it changes the logging style to track both allocations and frees to the zone. So when the | |
407 | * corruption is detected, examining the log will show you the stack traces of the callers who last allocated | |
408 | * and freed any particular element in the zone. Use the findelem kgmacro with the address of the element that's been | |
409 | * corrupted to examine its history. This should lead to the source of the corruption. | |
410 | */ | |
411 | ||
412 | static int log_records; /* size of the log, expressed in number of records */ | |
413 | ||
414 | #define MAX_ZONE_NAME 32 /* max length of a zone name we can take from the boot-args */ | |
415 | ||
416 | static char zone_name_to_log[MAX_ZONE_NAME] = ""; /* the zone name we're logging, if any */ | |
417 | ||
418 | /* | |
419 | * The number of records in the log is configurable via the zrecs parameter in boot-args. Set this to | |
420 | * the number of records you want in the log. For example, "zrecs=1000" sets it to 1000 records. Note | |
421 | * that the larger the size of the log, the slower the system will run due to linear searching in the log, | |
422 | * but one doesn't generally care about performance when tracking down a leak. The log is capped at 8000 | |
423 | * records since going much larger than this tends to make the system unresponsive and unbootable on small | |
424 | * memory configurations. The default value is 4000 records. | |
c910b4d9 | 425 | */ |
6d2010ae A |
426 | #if defined(__LP64__) |
427 | #define ZRECORDS_MAX 16000 /* Max records allowed in the log */ | |
428 | #else | |
c910b4d9 | 429 | #define ZRECORDS_MAX 8000 /* Max records allowed in the log */ |
6d2010ae | 430 | #endif |
c910b4d9 | 431 | #define ZRECORDS_DEFAULT 4000 /* default records in log if zrecs is not specificed in boot-args */ |
0b4e3aa0 | 432 | |
c910b4d9 A |
433 | /* |
434 | * Each record in the log contains a pointer to the zone element it refers to, a "time" number that allows | |
435 | * the records to be ordered chronologically, and a small array to hold the pc's from the stack trace. A | |
436 | * record is added to the log each time a zalloc() is done in the zone_of_interest. For leak debugging, | |
437 | * the record is cleared when a zfree() is done. For corruption debugging, the log tracks both allocs and frees. | |
438 | * If the log fills, old records are replaced as if it were a circular buffer. | |
439 | */ | |
440 | ||
441 | struct zrecord { | |
442 | void *z_element; /* the element that was zalloc'ed of zfree'ed */ | |
443 | uint32_t z_opcode:1, /* whether it was a zalloc or zfree */ | |
444 | z_time:31; /* time index when operation was done */ | |
6d2010ae | 445 | void *z_pc[MAX_ZTRACE_DEPTH]; /* stack trace of caller */ |
c910b4d9 A |
446 | }; |
447 | ||
448 | /* | |
449 | * Opcodes for the z_opcode field: | |
450 | */ | |
451 | ||
452 | #define ZOP_ALLOC 1 | |
453 | #define ZOP_FREE 0 | |
454 | ||
455 | /* | |
456 | * The allocation log and all the related variables are protected by the zone lock for the zone_of_interest | |
457 | */ | |
458 | ||
459 | static struct zrecord *zrecords; /* the log itself, dynamically allocated when logging is enabled */ | |
460 | static int zcurrent = 0; /* index of the next slot in the log to use */ | |
461 | static int zrecorded = 0; /* number of allocations recorded in the log */ | |
462 | static unsigned int ztime = 0; /* a timestamp of sorts */ | |
463 | static zone_t zone_of_interest = NULL; /* the zone being watched; corresponds to zone_name_to_log */ | |
464 | ||
465 | /* | |
466 | * Decide if we want to log this zone by doing a string compare between a zone name and the name | |
467 | * of the zone to log. Return true if the strings are equal, false otherwise. Because it's not | |
468 | * possible to include spaces in strings passed in via the boot-args, a period in the logname will | |
469 | * match a space in the zone name. | |
470 | */ | |
471 | ||
472 | static int | |
473 | log_this_zone(const char *zonename, const char *logname) | |
474 | { | |
475 | int len; | |
476 | const char *zc = zonename; | |
477 | const char *lc = logname; | |
478 | ||
479 | /* | |
480 | * Compare the strings. We bound the compare by MAX_ZONE_NAME. | |
481 | */ | |
482 | ||
483 | for (len = 1; len <= MAX_ZONE_NAME; zc++, lc++, len++) { | |
484 | ||
485 | /* | |
486 | * If the current characters don't match, check for a space in | |
487 | * in the zone name and a corresponding period in the log name. | |
488 | * If that's not there, then the strings don't match. | |
489 | */ | |
490 | ||
491 | if (*zc != *lc && !(*zc == ' ' && *lc == '.')) | |
492 | break; | |
493 | ||
494 | /* | |
495 | * The strings are equal so far. If we're at the end, then it's a match. | |
496 | */ | |
497 | ||
498 | if (*zc == '\0') | |
499 | return TRUE; | |
500 | } | |
501 | ||
502 | return FALSE; | |
503 | } | |
504 | ||
505 | ||
506 | /* | |
507 | * Test if we want to log this zalloc/zfree event. We log if this is the zone we're interested in and | |
508 | * the buffer for the records has been allocated. | |
509 | */ | |
510 | ||
511 | #define DO_LOGGING(z) (zrecords && (z) == zone_of_interest) | |
512 | ||
513 | extern boolean_t zlog_ready; | |
514 | ||
6d2010ae A |
515 | #if CONFIG_ZLEAKS |
516 | #pragma mark - | |
517 | #pragma mark Zone Leak Detection | |
518 | ||
519 | /* | |
520 | * The zone leak detector, abbreviated 'zleak', keeps track of a subset of the currently outstanding | |
521 | * allocations made by the zone allocator. Every z_sample_factor allocations in each zone, we capture a | |
522 | * backtrace. Every free, we examine the table and determine if the allocation was being tracked, | |
523 | * and stop tracking it if it was being tracked. | |
524 | * | |
525 | * We track the allocations in the zallocations hash table, which stores the address that was returned from | |
526 | * the zone allocator. Each stored entry in the zallocations table points to an entry in the ztraces table, which | |
527 | * stores the backtrace associated with that allocation. This provides uniquing for the relatively large | |
528 | * backtraces - we don't store them more than once. | |
529 | * | |
530 | * Data collection begins when the zone map is 50% full, and only occurs for zones that are taking up | |
531 | * a large amount of virtual space. | |
532 | */ | |
533 | #define ZLEAK_STATE_ENABLED 0x01 /* Zone leak monitoring should be turned on if zone_map fills up. */ | |
534 | #define ZLEAK_STATE_ACTIVE 0x02 /* We are actively collecting traces. */ | |
535 | #define ZLEAK_STATE_ACTIVATING 0x04 /* Some thread is doing setup; others should move along. */ | |
536 | #define ZLEAK_STATE_FAILED 0x08 /* Attempt to allocate tables failed. We will not try again. */ | |
537 | uint32_t zleak_state = 0; /* State of collection, as above */ | |
538 | ||
539 | boolean_t panic_include_ztrace = FALSE; /* Enable zleak logging on panic */ | |
540 | vm_size_t zleak_global_tracking_threshold; /* Size of zone map at which to start collecting data */ | |
541 | vm_size_t zleak_per_zone_tracking_threshold; /* Size a zone will have before we will collect data on it */ | |
542 | unsigned int z_sample_factor = 1000; /* Allocations per sample attempt */ | |
543 | ||
544 | /* | |
545 | * Counters for allocation statistics. | |
546 | */ | |
547 | ||
548 | /* Times two active records want to occupy the same spot */ | |
549 | unsigned int z_alloc_collisions = 0; | |
550 | unsigned int z_trace_collisions = 0; | |
551 | ||
552 | /* Times a new record lands on a spot previously occupied by a freed allocation */ | |
553 | unsigned int z_alloc_overwrites = 0; | |
554 | unsigned int z_trace_overwrites = 0; | |
555 | ||
556 | /* Times a new alloc or trace is put into the hash table */ | |
557 | unsigned int z_alloc_recorded = 0; | |
558 | unsigned int z_trace_recorded = 0; | |
559 | ||
560 | /* Times zleak_log returned false due to not being able to acquire the lock */ | |
561 | unsigned int z_total_conflicts = 0; | |
562 | ||
563 | ||
564 | #pragma mark struct zallocation | |
565 | /* | |
566 | * Structure for keeping track of an allocation | |
567 | * An allocation bucket is in use if its element is not NULL | |
568 | */ | |
569 | struct zallocation { | |
570 | uintptr_t za_element; /* the element that was zalloc'ed or zfree'ed, NULL if bucket unused */ | |
571 | vm_size_t za_size; /* how much memory did this allocation take up? */ | |
572 | uint32_t za_trace_index; /* index into ztraces for backtrace associated with allocation */ | |
573 | /* TODO: #if this out */ | |
574 | uint32_t za_hit_count; /* for determining effectiveness of hash function */ | |
575 | }; | |
576 | ||
577 | /* Size must be a power of two for the zhash to be able to just mask off bits instead of mod */ | |
578 | #define ZLEAK_ALLOCATION_MAP_NUM 16384 | |
579 | #define ZLEAK_TRACE_MAP_NUM 8192 | |
580 | ||
581 | uint32_t zleak_alloc_buckets = ZLEAK_ALLOCATION_MAP_NUM; | |
582 | uint32_t zleak_trace_buckets = ZLEAK_TRACE_MAP_NUM; | |
583 | ||
584 | vm_size_t zleak_max_zonemap_size; | |
585 | ||
586 | /* Hashmaps of allocations and their corresponding traces */ | |
587 | static struct zallocation* zallocations; | |
588 | static struct ztrace* ztraces; | |
589 | ||
590 | /* not static so that panic can see this, see kern/debug.c */ | |
591 | struct ztrace* top_ztrace; | |
592 | ||
593 | /* Lock to protect zallocations, ztraces, and top_ztrace from concurrent modification. */ | |
594 | static lck_mtx_t zleak_lock; | |
595 | static lck_attr_t zleak_lock_attr; | |
596 | static lck_grp_t zleak_lock_grp; | |
597 | static lck_grp_attr_t zleak_lock_grp_attr; | |
598 | ||
599 | /* | |
600 | * Initializes the zone leak monitor. Called from zone_init() | |
601 | */ | |
602 | static void | |
603 | zleak_init(vm_size_t max_zonemap_size) | |
604 | { | |
605 | char scratch_buf[16]; | |
606 | boolean_t zleak_enable_flag = FALSE; | |
607 | ||
608 | zleak_max_zonemap_size = max_zonemap_size; | |
609 | zleak_global_tracking_threshold = max_zonemap_size / 2; | |
610 | zleak_per_zone_tracking_threshold = zleak_global_tracking_threshold / 8; | |
611 | ||
612 | /* -zleakoff (flag to disable zone leak monitor) */ | |
613 | if (PE_parse_boot_argn("-zleakoff", scratch_buf, sizeof(scratch_buf))) { | |
614 | zleak_enable_flag = FALSE; | |
615 | printf("zone leak detection disabled\n"); | |
616 | } else { | |
617 | zleak_enable_flag = TRUE; | |
618 | printf("zone leak detection enabled\n"); | |
619 | } | |
620 | ||
621 | /* zfactor=XXXX (override how often to sample the zone allocator) */ | |
622 | if (PE_parse_boot_argn("zfactor", &z_sample_factor, sizeof(z_sample_factor))) { | |
623 | printf("Zone leak factor override:%u\n", z_sample_factor); | |
624 | } | |
625 | ||
626 | /* zleak-allocs=XXXX (override number of buckets in zallocations) */ | |
627 | if (PE_parse_boot_argn("zleak-allocs", &zleak_alloc_buckets, sizeof(zleak_alloc_buckets))) { | |
628 | printf("Zone leak alloc buckets override:%u\n", zleak_alloc_buckets); | |
629 | /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */ | |
630 | if (zleak_alloc_buckets == 0 || (zleak_alloc_buckets & (zleak_alloc_buckets-1))) { | |
631 | printf("Override isn't a power of two, bad things might happen!"); | |
632 | } | |
633 | } | |
634 | ||
635 | /* zleak-traces=XXXX (override number of buckets in ztraces) */ | |
636 | if (PE_parse_boot_argn("zleak-traces", &zleak_trace_buckets, sizeof(zleak_trace_buckets))) { | |
637 | printf("Zone leak trace buckets override:%u\n", zleak_trace_buckets); | |
638 | /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */ | |
639 | if (zleak_trace_buckets == 0 || (zleak_trace_buckets & (zleak_trace_buckets-1))) { | |
640 | printf("Override isn't a power of two, bad things might happen!"); | |
641 | } | |
642 | } | |
643 | ||
644 | /* allocate the zleak_lock */ | |
645 | lck_grp_attr_setdefault(&zleak_lock_grp_attr); | |
646 | lck_grp_init(&zleak_lock_grp, "zleak_lock", &zleak_lock_grp_attr); | |
647 | lck_attr_setdefault(&zleak_lock_attr); | |
648 | lck_mtx_init(&zleak_lock, &zleak_lock_grp, &zleak_lock_attr); | |
649 | ||
650 | if (zleak_enable_flag) { | |
651 | zleak_state = ZLEAK_STATE_ENABLED; | |
652 | } | |
653 | } | |
654 | ||
655 | #if CONFIG_ZLEAKS | |
656 | ||
657 | /* | |
658 | * Support for kern.zleak.active sysctl - a simplified | |
659 | * simplified version of the zleak_state variable. | |
660 | */ | |
661 | int | |
662 | get_zleak_state(void) | |
663 | { | |
664 | if (zleak_state & ZLEAK_STATE_FAILED) | |
665 | return (-1); | |
666 | if (zleak_state & ZLEAK_STATE_ACTIVE) | |
667 | return (1); | |
668 | return (0); | |
669 | } | |
670 | ||
671 | #endif | |
672 | ||
673 | ||
674 | kern_return_t | |
675 | zleak_activate(void) | |
676 | { | |
677 | kern_return_t retval; | |
678 | vm_size_t z_alloc_size = zleak_alloc_buckets * sizeof(struct zallocation); | |
679 | vm_size_t z_trace_size = zleak_trace_buckets * sizeof(struct ztrace); | |
680 | void *allocations_ptr = NULL; | |
681 | void *traces_ptr = NULL; | |
682 | ||
683 | /* Only one thread attempts to activate at a time */ | |
684 | if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) { | |
685 | return KERN_SUCCESS; | |
686 | } | |
687 | ||
688 | /* Indicate that we're doing the setup */ | |
689 | lck_mtx_lock_spin(&zleak_lock); | |
690 | if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) { | |
691 | lck_mtx_unlock(&zleak_lock); | |
692 | return KERN_SUCCESS; | |
693 | } | |
694 | ||
695 | zleak_state |= ZLEAK_STATE_ACTIVATING; | |
696 | lck_mtx_unlock(&zleak_lock); | |
697 | ||
698 | /* Allocate and zero tables */ | |
699 | retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&allocations_ptr, z_alloc_size); | |
700 | if (retval != KERN_SUCCESS) { | |
701 | goto fail; | |
702 | } | |
703 | ||
704 | retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&traces_ptr, z_trace_size); | |
705 | if (retval != KERN_SUCCESS) { | |
706 | goto fail; | |
707 | } | |
708 | ||
709 | bzero(allocations_ptr, z_alloc_size); | |
710 | bzero(traces_ptr, z_trace_size); | |
711 | ||
712 | /* Everything's set. Install tables, mark active. */ | |
713 | zallocations = allocations_ptr; | |
714 | ztraces = traces_ptr; | |
715 | ||
716 | /* | |
717 | * Initialize the top_ztrace to the first entry in ztraces, | |
718 | * so we don't have to check for null in zleak_log | |
719 | */ | |
720 | top_ztrace = &ztraces[0]; | |
721 | ||
722 | /* | |
723 | * Note that we do need a barrier between installing | |
724 | * the tables and setting the active flag, because the zfree() | |
725 | * path accesses the table without a lock if we're active. | |
726 | */ | |
727 | lck_mtx_lock_spin(&zleak_lock); | |
728 | zleak_state |= ZLEAK_STATE_ACTIVE; | |
729 | zleak_state &= ~ZLEAK_STATE_ACTIVATING; | |
730 | lck_mtx_unlock(&zleak_lock); | |
731 | ||
732 | return 0; | |
733 | ||
734 | fail: | |
735 | /* | |
736 | * If we fail to allocate memory, don't further tax | |
737 | * the system by trying again. | |
738 | */ | |
739 | lck_mtx_lock_spin(&zleak_lock); | |
740 | zleak_state |= ZLEAK_STATE_FAILED; | |
741 | zleak_state &= ~ZLEAK_STATE_ACTIVATING; | |
742 | lck_mtx_unlock(&zleak_lock); | |
743 | ||
744 | if (allocations_ptr != NULL) { | |
745 | kmem_free(kernel_map, (vm_offset_t)allocations_ptr, z_alloc_size); | |
746 | } | |
747 | ||
748 | if (traces_ptr != NULL) { | |
749 | kmem_free(kernel_map, (vm_offset_t)traces_ptr, z_trace_size); | |
750 | } | |
751 | ||
752 | return retval; | |
753 | } | |
754 | ||
755 | /* | |
756 | * TODO: What about allocations that never get deallocated, | |
757 | * especially ones with unique backtraces? Should we wait to record | |
758 | * until after boot has completed? | |
759 | * (How many persistent zallocs are there?) | |
760 | */ | |
761 | ||
762 | /* | |
763 | * This function records the allocation in the allocations table, | |
764 | * and stores the associated backtrace in the traces table | |
765 | * (or just increments the refcount if the trace is already recorded) | |
766 | * If the allocation slot is in use, the old allocation is replaced with the new allocation, and | |
767 | * the associated trace's refcount is decremented. | |
768 | * If the trace slot is in use, it returns. | |
769 | * The refcount is incremented by the amount of memory the allocation consumes. | |
770 | * The return value indicates whether to try again next time. | |
771 | */ | |
772 | static boolean_t | |
773 | zleak_log(uintptr_t* bt, | |
774 | uintptr_t addr, | |
775 | uint32_t depth, | |
776 | vm_size_t allocation_size) | |
777 | { | |
778 | /* Quit if there's someone else modifying the hash tables */ | |
779 | if (!lck_mtx_try_lock_spin(&zleak_lock)) { | |
780 | z_total_conflicts++; | |
781 | return FALSE; | |
782 | } | |
783 | ||
784 | struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)]; | |
785 | ||
786 | uint32_t trace_index = hashbacktrace(bt, depth, zleak_trace_buckets); | |
787 | struct ztrace* trace = &ztraces[trace_index]; | |
788 | ||
789 | allocation->za_hit_count++; | |
790 | trace->zt_hit_count++; | |
791 | ||
792 | /* | |
793 | * If the allocation bucket we want to be in is occupied, and if the occupier | |
794 | * has the same trace as us, just bail. | |
795 | */ | |
796 | if (allocation->za_element != (uintptr_t) 0 && trace_index == allocation->za_trace_index) { | |
797 | z_alloc_collisions++; | |
798 | ||
799 | lck_mtx_unlock(&zleak_lock); | |
800 | return TRUE; | |
801 | } | |
802 | ||
803 | /* STEP 1: Store the backtrace in the traces array. */ | |
804 | /* A size of zero indicates that the trace bucket is free. */ | |
805 | ||
806 | if (trace->zt_size > 0 && bcmp(trace->zt_stack, bt, (depth * sizeof(uintptr_t))) != 0 ) { | |
807 | /* | |
808 | * Different unique trace with same hash! | |
809 | * Just bail - if we're trying to record the leaker, hopefully the other trace will be deallocated | |
810 | * and get out of the way for later chances | |
811 | */ | |
812 | trace->zt_collisions++; | |
813 | z_trace_collisions++; | |
814 | ||
815 | lck_mtx_unlock(&zleak_lock); | |
816 | return TRUE; | |
817 | } else if (trace->zt_size > 0) { | |
818 | /* Same trace, already added, so increment refcount */ | |
819 | trace->zt_size += allocation_size; | |
820 | } else { | |
821 | /* Found an unused trace bucket, record the trace here! */ | |
822 | if (trace->zt_depth != 0) /* if this slot was previously used but not currently in use */ | |
823 | z_trace_overwrites++; | |
824 | ||
825 | z_trace_recorded++; | |
826 | trace->zt_size = allocation_size; | |
827 | memcpy(trace->zt_stack, bt, (depth * sizeof(uintptr_t)) ); | |
828 | ||
829 | trace->zt_depth = depth; | |
830 | trace->zt_collisions = 0; | |
831 | } | |
832 | ||
833 | /* STEP 2: Store the allocation record in the allocations array. */ | |
834 | ||
835 | if (allocation->za_element != (uintptr_t) 0) { | |
836 | /* | |
837 | * Straight up replace any allocation record that was there. We don't want to do the work | |
838 | * to preserve the allocation entries that were there, because we only record a subset of the | |
839 | * allocations anyways. | |
840 | */ | |
841 | ||
842 | z_alloc_collisions++; | |
843 | ||
844 | struct ztrace* associated_trace = &ztraces[allocation->za_trace_index]; | |
845 | /* Knock off old allocation's size, not the new allocation */ | |
846 | associated_trace->zt_size -= allocation->za_size; | |
847 | } else if (allocation->za_trace_index != 0) { | |
848 | /* Slot previously used but not currently in use */ | |
849 | z_alloc_overwrites++; | |
850 | } | |
851 | ||
852 | allocation->za_element = addr; | |
853 | allocation->za_trace_index = trace_index; | |
854 | allocation->za_size = allocation_size; | |
855 | ||
856 | z_alloc_recorded++; | |
857 | ||
858 | if (top_ztrace->zt_size < trace->zt_size) | |
859 | top_ztrace = trace; | |
860 | ||
861 | lck_mtx_unlock(&zleak_lock); | |
862 | return TRUE; | |
863 | } | |
864 | ||
865 | /* | |
866 | * Free the allocation record and release the stacktrace. | |
867 | * This should be as fast as possible because it will be called for every free. | |
868 | */ | |
869 | static void | |
870 | zleak_free(uintptr_t addr, | |
871 | vm_size_t allocation_size) | |
872 | { | |
873 | if (addr == (uintptr_t) 0) | |
874 | return; | |
875 | ||
876 | struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)]; | |
877 | ||
878 | /* Double-checked locking: check to find out if we're interested, lock, check to make | |
879 | * sure it hasn't changed, then modify it, and release the lock. | |
880 | */ | |
c910b4d9 | 881 | |
6d2010ae A |
882 | if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) { |
883 | /* if the allocation was the one, grab the lock, check again, then delete it */ | |
884 | lck_mtx_lock_spin(&zleak_lock); | |
885 | ||
886 | if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) { | |
887 | struct ztrace *trace; | |
888 | ||
889 | /* allocation_size had better match what was passed into zleak_log - otherwise someone is freeing into the wrong zone! */ | |
890 | if (allocation->za_size != allocation_size) { | |
891 | panic("Freeing as size %lu memory that was allocated with size %lu\n", | |
892 | (uintptr_t)allocation_size, (uintptr_t)allocation->za_size); | |
893 | } | |
894 | ||
895 | trace = &ztraces[allocation->za_trace_index]; | |
896 | ||
897 | /* size of 0 indicates trace bucket is unused */ | |
898 | if (trace->zt_size > 0) { | |
899 | trace->zt_size -= allocation_size; | |
900 | } | |
901 | ||
902 | /* A NULL element means the allocation bucket is unused */ | |
903 | allocation->za_element = 0; | |
904 | } | |
905 | lck_mtx_unlock(&zleak_lock); | |
906 | } | |
907 | } | |
908 | ||
909 | #endif /* CONFIG_ZLEAKS */ | |
910 | ||
911 | /* These functions outside of CONFIG_ZLEAKS because they are also used in | |
912 | * mbuf.c for mbuf leak-detection. This is why they lack the z_ prefix. | |
913 | */ | |
914 | ||
915 | /* | |
916 | * This function captures a backtrace from the current stack and | |
917 | * returns the number of frames captured, limited by max_frames. | |
918 | * It's fast because it does no checking to make sure there isn't bad data. | |
919 | * Since it's only called from threads that we're going to keep executing, | |
920 | * if there's bad data we were going to die eventually. | |
921 | * This seems to work for x86 and X86_64. | |
922 | * ARMTODO: Test it on ARM, I think it will work but I can't test it. If it works, remove the ifdef. | |
923 | * If this function is inlined, it doesn't record the frame of the function it's inside. | |
924 | * (because there's no stack frame!) | |
925 | */ | |
926 | uint32_t | |
927 | fastbacktrace(uintptr_t* bt, uint32_t max_frames) | |
928 | { | |
929 | #if defined(__x86_64__) || defined(__i386__) | |
930 | uintptr_t* frameptr = NULL, *frameptr_next = NULL; | |
931 | uintptr_t retaddr = 0; | |
932 | uint32_t frame_index = 0, frames = 0; | |
933 | uintptr_t kstackb, kstackt; | |
934 | ||
935 | kstackb = current_thread()->kernel_stack; | |
936 | kstackt = kstackb + kernel_stack_size; | |
937 | /* Load stack frame pointer (EBP on x86) into frameptr */ | |
938 | frameptr = __builtin_frame_address(0); | |
939 | ||
940 | while (frameptr != NULL && frame_index < max_frames ) { | |
941 | /* Next frame pointer is pointed to by the previous one */ | |
942 | frameptr_next = (uintptr_t*) *frameptr; | |
943 | ||
944 | /* Bail if we see a zero in the stack frame, that means we've reached the top of the stack */ | |
945 | /* That also means the return address is worthless, so don't record it */ | |
946 | if (frameptr_next == NULL) | |
947 | break; | |
948 | /* Verify thread stack bounds */ | |
949 | if (((uintptr_t)frameptr_next > kstackt) || ((uintptr_t)frameptr_next < kstackb)) | |
950 | break; | |
951 | /* Pull return address from one spot above the frame pointer */ | |
952 | retaddr = *(frameptr + 1); | |
953 | ||
954 | /* Store it in the backtrace array */ | |
955 | bt[frame_index++] = retaddr; | |
956 | ||
957 | frameptr = frameptr_next; | |
958 | } | |
959 | ||
960 | /* Save the number of frames captured for return value */ | |
961 | frames = frame_index; | |
962 | ||
963 | /* Fill in the rest of the backtrace with zeros */ | |
964 | while (frame_index < max_frames) | |
965 | bt[frame_index++] = 0; | |
966 | ||
967 | return frames; | |
968 | #else | |
969 | return OSBacktrace((void*)bt, max_frames); | |
970 | #endif | |
971 | } | |
972 | ||
973 | /* "Thomas Wang's 32/64 bit mix functions." http://www.concentric.net/~Ttwang/tech/inthash.htm */ | |
974 | uintptr_t | |
975 | hash_mix(uintptr_t x) | |
976 | { | |
977 | #ifndef __LP64__ | |
978 | x += ~(x << 15); | |
979 | x ^= (x >> 10); | |
980 | x += (x << 3 ); | |
981 | x ^= (x >> 6 ); | |
982 | x += ~(x << 11); | |
983 | x ^= (x >> 16); | |
984 | #else | |
985 | x += ~(x << 32); | |
986 | x ^= (x >> 22); | |
987 | x += ~(x << 13); | |
988 | x ^= (x >> 8 ); | |
989 | x += (x << 3 ); | |
990 | x ^= (x >> 15); | |
991 | x += ~(x << 27); | |
992 | x ^= (x >> 31); | |
993 | #endif | |
994 | return x; | |
995 | } | |
996 | ||
997 | uint32_t | |
998 | hashbacktrace(uintptr_t* bt, uint32_t depth, uint32_t max_size) | |
999 | { | |
1000 | ||
1001 | uintptr_t hash = 0; | |
1002 | uintptr_t mask = max_size - 1; | |
1003 | ||
1004 | while (--depth) { | |
1005 | hash += bt[depth]; | |
1006 | } | |
1007 | ||
1008 | hash = hash_mix(hash) & mask; | |
1009 | ||
1010 | assert(hash < max_size); | |
1011 | ||
1012 | return (uint32_t) hash; | |
1013 | } | |
1014 | ||
1015 | /* | |
1016 | * TODO: Determine how well distributed this is | |
1017 | * max_size must be a power of 2. i.e 0x10000 because 0x10000-1 is 0x0FFFF which is a great bitmask | |
1018 | */ | |
1019 | uint32_t | |
1020 | hashaddr(uintptr_t pt, uint32_t max_size) | |
1021 | { | |
1022 | uintptr_t hash = 0; | |
1023 | uintptr_t mask = max_size - 1; | |
1024 | ||
1025 | hash = hash_mix(pt) & mask; | |
1026 | ||
1027 | assert(hash < max_size); | |
1028 | ||
1029 | return (uint32_t) hash; | |
1030 | } | |
1031 | ||
1032 | /* End of all leak-detection code */ | |
1033 | #pragma mark - | |
1034 | ||
1c79356b A |
1035 | /* |
1036 | * zinit initializes a new zone. The zone data structures themselves | |
1037 | * are stored in a zone, which is initially a static structure that | |
1038 | * is initialized by zone_init. | |
1039 | */ | |
1040 | zone_t | |
1041 | zinit( | |
1042 | vm_size_t size, /* the size of an element */ | |
1043 | vm_size_t max, /* maximum memory to use */ | |
1044 | vm_size_t alloc, /* allocation size */ | |
91447636 | 1045 | const char *name) /* a name for the zone */ |
1c79356b A |
1046 | { |
1047 | zone_t z; | |
1048 | ||
1049 | if (zone_zone == ZONE_NULL) { | |
7ddcb079 A |
1050 | |
1051 | z = (struct zone *)zdata; | |
1052 | zdata += sizeof(*z); | |
1053 | zdata_size -= sizeof(*z); | |
1c79356b A |
1054 | } else |
1055 | z = (zone_t) zalloc(zone_zone); | |
1056 | if (z == ZONE_NULL) | |
1057 | return(ZONE_NULL); | |
1058 | ||
1059 | /* | |
1060 | * Round off all the parameters appropriately. | |
1061 | */ | |
1062 | if (size < sizeof(z->free_elements)) | |
1063 | size = sizeof(z->free_elements); | |
1064 | size = ((size-1) + sizeof(z->free_elements)) - | |
1065 | ((size-1) % sizeof(z->free_elements)); | |
1066 | if (alloc == 0) | |
1067 | alloc = PAGE_SIZE; | |
91447636 A |
1068 | alloc = round_page(alloc); |
1069 | max = round_page(max); | |
1c79356b | 1070 | /* |
91447636 A |
1071 | * we look for an allocation size with less than 1% waste |
1072 | * up to 5 pages in size... | |
1073 | * otherwise, we look for an allocation size with least fragmentation | |
1074 | * in the range of 1 - 5 pages | |
1075 | * This size will be used unless | |
1c79356b A |
1076 | * the user suggestion is larger AND has less fragmentation |
1077 | */ | |
2d21ac55 A |
1078 | #if ZONE_ALIAS_ADDR |
1079 | if ((size < PAGE_SIZE) && (PAGE_SIZE % size <= PAGE_SIZE / 10)) | |
1080 | alloc = PAGE_SIZE; | |
1081 | else | |
1082 | #endif | |
7ddcb079 A |
1083 | #if defined(__LP64__) |
1084 | if (((alloc % size) != 0) || (alloc > PAGE_SIZE * 8)) | |
1085 | #endif | |
1086 | { | |
1087 | vm_size_t best, waste; unsigned int i; | |
1c79356b A |
1088 | best = PAGE_SIZE; |
1089 | waste = best % size; | |
91447636 A |
1090 | |
1091 | for (i = 1; i <= 5; i++) { | |
1092 | vm_size_t tsize, twaste; | |
1093 | ||
1094 | tsize = i * PAGE_SIZE; | |
1095 | ||
1096 | if ((tsize % size) < (tsize / 100)) { | |
1097 | alloc = tsize; | |
1098 | goto use_this_allocation; | |
1099 | } | |
1c79356b A |
1100 | twaste = tsize % size; |
1101 | if (twaste < waste) | |
1102 | best = tsize, waste = twaste; | |
1103 | } | |
1104 | if (alloc <= best || (alloc % size >= waste)) | |
1105 | alloc = best; | |
1106 | } | |
91447636 | 1107 | use_this_allocation: |
1c79356b A |
1108 | if (max && (max < alloc)) |
1109 | max = alloc; | |
1110 | ||
1111 | z->free_elements = 0; | |
1112 | z->cur_size = 0; | |
1113 | z->max_size = max; | |
1114 | z->elem_size = size; | |
1115 | z->alloc_size = alloc; | |
1116 | z->zone_name = name; | |
1117 | z->count = 0; | |
6d2010ae | 1118 | z->sum_count = 0LL; |
1c79356b | 1119 | z->doing_alloc = FALSE; |
a3d08fcd | 1120 | z->doing_gc = FALSE; |
1c79356b A |
1121 | z->exhaustible = FALSE; |
1122 | z->collectable = TRUE; | |
1123 | z->allows_foreign = FALSE; | |
1124 | z->expandable = TRUE; | |
1125 | z->waiting = FALSE; | |
0b4e3aa0 | 1126 | z->async_pending = FALSE; |
6d2010ae | 1127 | z->caller_acct = TRUE; |
0b4c1975 | 1128 | z->noencrypt = FALSE; |
7ddcb079 A |
1129 | z->no_callout = FALSE; |
1130 | z->async_prio_refill = FALSE; | |
1131 | z->prio_refill_watermark = 0; | |
1132 | z->zone_replenish_thread = NULL; | |
6d2010ae A |
1133 | #if CONFIG_ZLEAKS |
1134 | z->num_allocs = 0; | |
1135 | z->num_frees = 0; | |
1136 | z->zleak_capture = 0; | |
1137 | z->zleak_on = FALSE; | |
1138 | #endif /* CONFIG_ZLEAKS */ | |
1139 | ||
1c79356b | 1140 | #if ZONE_DEBUG |
2d21ac55 | 1141 | z->active_zones.next = z->active_zones.prev = NULL; |
1c79356b A |
1142 | zone_debug_enable(z); |
1143 | #endif /* ZONE_DEBUG */ | |
1144 | lock_zone_init(z); | |
1145 | ||
1146 | /* | |
1147 | * Add the zone to the all-zones list. | |
6d2010ae A |
1148 | * If we are tracking zone info per task, and we have |
1149 | * already used all the available stat slots, then keep | |
1150 | * using the overflow zone slot. | |
1c79356b | 1151 | */ |
1c79356b | 1152 | z->next_zone = ZONE_NULL; |
0b4e3aa0 | 1153 | thread_call_setup(&z->call_async_alloc, zalloc_async, z); |
1c79356b A |
1154 | simple_lock(&all_zones_lock); |
1155 | *last_zone = z; | |
1156 | last_zone = &z->next_zone; | |
6d2010ae A |
1157 | z->index = num_zones; |
1158 | if (zinfo_per_task) { | |
1159 | if (num_zones > ZONES_MAX) | |
1160 | z->index = ZONES_MAX; | |
1161 | } | |
1c79356b A |
1162 | num_zones++; |
1163 | simple_unlock(&all_zones_lock); | |
1164 | ||
c910b4d9 A |
1165 | /* |
1166 | * Check if we should be logging this zone. If so, remember the zone pointer. | |
1167 | */ | |
1168 | ||
1169 | if (log_this_zone(z->zone_name, zone_name_to_log)) { | |
1170 | zone_of_interest = z; | |
1171 | } | |
1172 | ||
1173 | /* | |
1174 | * If we want to log a zone, see if we need to allocate buffer space for the log. Some vm related zones are | |
1175 | * zinit'ed before we can do a kmem_alloc, so we have to defer allocation in that case. zlog_ready is set to | |
1176 | * TRUE once enough of the VM system is up and running to allow a kmem_alloc to work. If we want to log one | |
1177 | * of the VM related zones that's set up early on, we will skip allocation of the log until zinit is called again | |
1178 | * later on some other zone. So note we may be allocating a buffer to log a zone other than the one being initialized | |
1179 | * right now. | |
1180 | */ | |
1181 | ||
1182 | if (zone_of_interest != NULL && zrecords == NULL && zlog_ready) { | |
1183 | if (kmem_alloc(kernel_map, (vm_offset_t *)&zrecords, log_records * sizeof(struct zrecord)) == KERN_SUCCESS) { | |
1184 | ||
1185 | /* | |
1186 | * We got the memory for the log. Zero it out since the code needs this to identify unused records. | |
1187 | * At this point, everything is set up and we're ready to start logging this zone. | |
1188 | */ | |
1189 | ||
1190 | bzero((void *)zrecords, log_records * sizeof(struct zrecord)); | |
1191 | printf("zone: logging started for zone %s (%p)\n", zone_of_interest->zone_name, zone_of_interest); | |
1192 | ||
1193 | } else { | |
1194 | printf("zone: couldn't allocate memory for zrecords, turning off zleak logging\n"); | |
1195 | zone_of_interest = NULL; | |
1196 | } | |
1197 | } | |
1198 | ||
1c79356b A |
1199 | return(z); |
1200 | } | |
7ddcb079 A |
1201 | unsigned zone_replenish_loops, zone_replenish_wakeups, zone_replenish_wakeups_initiated; |
1202 | ||
1203 | static void zone_replenish_thread(zone_t); | |
1204 | ||
1205 | /* High priority VM privileged thread used to asynchronously refill a designated | |
1206 | * zone, such as the reserved VM map entry zone. | |
1207 | */ | |
1208 | static void zone_replenish_thread(zone_t z) { | |
1209 | vm_size_t free_size; | |
1210 | current_thread()->options |= TH_OPT_VMPRIV; | |
1211 | ||
1212 | for (;;) { | |
1213 | lock_zone(z); | |
1214 | assert(z->prio_refill_watermark != 0); | |
1215 | while ((free_size = (z->cur_size - (z->count * z->elem_size))) < (z->prio_refill_watermark * z->elem_size)) { | |
1216 | assert(z->doing_alloc == FALSE); | |
1217 | assert(z->async_prio_refill == TRUE); | |
1218 | ||
1219 | unlock_zone(z); | |
1220 | int zflags = KMA_KOBJECT|KMA_NOPAGEWAIT; | |
1221 | vm_offset_t space, alloc_size; | |
1222 | kern_return_t kr; | |
1223 | ||
1224 | if (vm_pool_low()) | |
1225 | alloc_size = round_page(z->elem_size); | |
1226 | else | |
1227 | alloc_size = z->alloc_size; | |
1228 | ||
1229 | if (z->noencrypt) | |
1230 | zflags |= KMA_NOENCRYPT; | |
1231 | ||
1232 | kr = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags); | |
1233 | ||
1234 | if (kr == KERN_SUCCESS) { | |
1235 | #if ZONE_ALIAS_ADDR | |
1236 | if (alloc_size == PAGE_SIZE) | |
1237 | space = zone_alias_addr(space); | |
1238 | #endif | |
1239 | zcram(z, space, alloc_size); | |
1240 | } else if (kr == KERN_RESOURCE_SHORTAGE) { | |
1241 | VM_PAGE_WAIT(); | |
1242 | } else if (kr == KERN_NO_SPACE) { | |
1243 | kr = kernel_memory_allocate(kernel_map, &space, alloc_size, 0, zflags); | |
1244 | if (kr == KERN_SUCCESS) { | |
1245 | #if ZONE_ALIAS_ADDR | |
1246 | if (alloc_size == PAGE_SIZE) | |
1247 | space = zone_alias_addr(space); | |
1248 | #endif | |
1249 | zcram(z, space, alloc_size); | |
1250 | } else { | |
1251 | assert_wait_timeout(&z->zone_replenish_thread, THREAD_UNINT, 1, 100 * NSEC_PER_USEC); | |
1252 | thread_block(THREAD_CONTINUE_NULL); | |
1253 | } | |
1254 | } | |
1255 | ||
1256 | lock_zone(z); | |
1257 | zone_replenish_loops++; | |
1258 | } | |
1259 | ||
1260 | unlock_zone(z); | |
1261 | assert_wait(&z->zone_replenish_thread, THREAD_UNINT); | |
1262 | thread_block(THREAD_CONTINUE_NULL); | |
1263 | zone_replenish_wakeups++; | |
1264 | } | |
1265 | } | |
1266 | ||
1267 | void | |
1268 | zone_prio_refill_configure(zone_t z, vm_size_t low_water_mark) { | |
1269 | z->prio_refill_watermark = low_water_mark; | |
1270 | ||
1271 | z->async_prio_refill = TRUE; | |
1272 | OSMemoryBarrier(); | |
1273 | kern_return_t tres = kernel_thread_start_priority((thread_continue_t)zone_replenish_thread, z, MAXPRI_KERNEL, &z->zone_replenish_thread); | |
1274 | ||
1275 | if (tres != KERN_SUCCESS) { | |
1276 | panic("zone_prio_refill_configure, thread create: 0x%x", tres); | |
1277 | } | |
1278 | ||
1279 | thread_deallocate(z->zone_replenish_thread); | |
1280 | } | |
1c79356b A |
1281 | |
1282 | /* | |
1283 | * Cram the given memory into the specified zone. | |
1284 | */ | |
1285 | void | |
1286 | zcram( | |
7ddcb079 A |
1287 | zone_t zone, |
1288 | vm_offset_t newmem, | |
1c79356b A |
1289 | vm_size_t size) |
1290 | { | |
7ddcb079 A |
1291 | vm_size_t elem_size; |
1292 | boolean_t from_zm = FALSE; | |
1c79356b A |
1293 | |
1294 | /* Basic sanity checks */ | |
1295 | assert(zone != ZONE_NULL && newmem != (vm_offset_t)0); | |
1296 | assert(!zone->collectable || zone->allows_foreign | |
55e303ae | 1297 | || (from_zone_map(newmem, size))); |
1c79356b A |
1298 | |
1299 | elem_size = zone->elem_size; | |
1300 | ||
7ddcb079 A |
1301 | if (from_zone_map(newmem, size)) |
1302 | from_zm = TRUE; | |
1303 | ||
1304 | if (from_zm) | |
1305 | zone_page_init(newmem, size); | |
1306 | ||
1c79356b A |
1307 | lock_zone(zone); |
1308 | while (size >= elem_size) { | |
1309 | ADD_TO_ZONE(zone, newmem); | |
7ddcb079 | 1310 | if (from_zm) |
1c79356b A |
1311 | zone_page_alloc(newmem, elem_size); |
1312 | zone->count++; /* compensate for ADD_TO_ZONE */ | |
1313 | size -= elem_size; | |
1314 | newmem += elem_size; | |
1315 | zone->cur_size += elem_size; | |
1316 | } | |
1317 | unlock_zone(zone); | |
1318 | } | |
1319 | ||
1c79356b A |
1320 | |
1321 | /* | |
1322 | * Steal memory for the zone package. Called from | |
1323 | * vm_page_bootstrap(). | |
1324 | */ | |
1325 | void | |
1326 | zone_steal_memory(void) | |
1327 | { | |
7ddcb079 A |
1328 | /* Request enough early memory to get to the pmap zone */ |
1329 | zdata_size = 12 * sizeof(struct zone); | |
1330 | zdata = (vm_offset_t)pmap_steal_memory(round_page(zdata_size)); | |
1c79356b A |
1331 | } |
1332 | ||
1333 | ||
1334 | /* | |
1335 | * Fill a zone with enough memory to contain at least nelem elements. | |
b0d623f7 | 1336 | * Memory is obtained with kmem_alloc_kobject from the kernel_map. |
1c79356b A |
1337 | * Return the number of elements actually put into the zone, which may |
1338 | * be more than the caller asked for since the memory allocation is | |
1339 | * rounded up to a full page. | |
1340 | */ | |
1341 | int | |
1342 | zfill( | |
1343 | zone_t zone, | |
1344 | int nelem) | |
1345 | { | |
1346 | kern_return_t kr; | |
1347 | vm_size_t size; | |
1348 | vm_offset_t memory; | |
1349 | int nalloc; | |
1350 | ||
1351 | assert(nelem > 0); | |
1352 | if (nelem <= 0) | |
1353 | return 0; | |
1354 | size = nelem * zone->elem_size; | |
91447636 | 1355 | size = round_page(size); |
b0d623f7 | 1356 | kr = kmem_alloc_kobject(kernel_map, &memory, size); |
1c79356b A |
1357 | if (kr != KERN_SUCCESS) |
1358 | return 0; | |
1359 | ||
1360 | zone_change(zone, Z_FOREIGN, TRUE); | |
7ddcb079 | 1361 | zcram(zone, memory, size); |
b0d623f7 | 1362 | nalloc = (int)(size / zone->elem_size); |
1c79356b A |
1363 | assert(nalloc >= nelem); |
1364 | ||
1365 | return nalloc; | |
1366 | } | |
1367 | ||
1368 | /* | |
1369 | * Initialize the "zone of zones" which uses fixed memory allocated | |
1370 | * earlier in memory initialization. zone_bootstrap is called | |
1371 | * before zone_init. | |
1372 | */ | |
1373 | void | |
1374 | zone_bootstrap(void) | |
1375 | { | |
2d21ac55 A |
1376 | char temp_buf[16]; |
1377 | ||
6d2010ae A |
1378 | #if 6094439 |
1379 | /* enable zone checks by default, to try and catch offenders... */ | |
1380 | #if 0 | |
1381 | /* 7968354: turn "-zc" back off */ | |
1382 | check_freed_element = TRUE; | |
1383 | /* 7995202: turn "-zp" back off */ | |
1384 | zfree_clear = TRUE; | |
1385 | #endif | |
1386 | ||
1387 | /* ... but allow them to be turned off explicitely */ | |
1388 | if (PE_parse_boot_argn("-no_zc", temp_buf, sizeof (temp_buf))) { | |
1389 | check_freed_element = FALSE; | |
1390 | } | |
1391 | if (PE_parse_boot_argn("-no_zp", temp_buf, sizeof (temp_buf))) { | |
1392 | zfree_clear = FALSE; | |
1393 | } | |
1394 | #endif | |
1395 | ||
c910b4d9 | 1396 | /* see if we want freed zone element checking and/or poisoning */ |
593a1d5f | 1397 | if (PE_parse_boot_argn("-zc", temp_buf, sizeof (temp_buf))) { |
c910b4d9 A |
1398 | check_freed_element = TRUE; |
1399 | } | |
1400 | ||
1401 | if (PE_parse_boot_argn("-zp", temp_buf, sizeof (temp_buf))) { | |
1402 | zfree_clear = TRUE; | |
1403 | } | |
1404 | ||
6d2010ae A |
1405 | if (PE_parse_boot_argn("-zinfop", temp_buf, sizeof (temp_buf))) { |
1406 | zinfo_per_task = TRUE; | |
1407 | } | |
1408 | ||
c910b4d9 A |
1409 | /* |
1410 | * Check for and set up zone leak detection if requested via boot-args. We recognized two | |
1411 | * boot-args: | |
1412 | * | |
1413 | * zlog=<zone_to_log> | |
1414 | * zrecs=<num_records_in_log> | |
1415 | * | |
1416 | * The zlog arg is used to specify the zone name that should be logged, and zrecs is used to | |
1417 | * control the size of the log. If zrecs is not specified, a default value is used. | |
1418 | */ | |
1419 | ||
1420 | if (PE_parse_boot_argn("zlog", zone_name_to_log, sizeof(zone_name_to_log)) == TRUE) { | |
1421 | if (PE_parse_boot_argn("zrecs", &log_records, sizeof(log_records)) == TRUE) { | |
1422 | ||
1423 | /* | |
1424 | * Don't allow more than ZRECORDS_MAX records even if the user asked for more. | |
1425 | * This prevents accidentally hogging too much kernel memory and making the system | |
1426 | * unusable. | |
1427 | */ | |
1428 | ||
1429 | log_records = MIN(ZRECORDS_MAX, log_records); | |
1430 | ||
1431 | } else { | |
1432 | log_records = ZRECORDS_DEFAULT; | |
1433 | } | |
2d21ac55 | 1434 | } |
1c79356b | 1435 | |
91447636 | 1436 | simple_lock_init(&all_zones_lock, 0); |
1c79356b A |
1437 | |
1438 | first_zone = ZONE_NULL; | |
1439 | last_zone = &first_zone; | |
1440 | num_zones = 0; | |
1441 | ||
1c79356b A |
1442 | /* assertion: nobody else called zinit before us */ |
1443 | assert(zone_zone == ZONE_NULL); | |
1444 | zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone), | |
1445 | sizeof(struct zone), "zones"); | |
1446 | zone_change(zone_zone, Z_COLLECT, FALSE); | |
6d2010ae | 1447 | zone_change(zone_zone, Z_CALLERACCT, FALSE); |
0b4c1975 A |
1448 | zone_change(zone_zone, Z_NOENCRYPT, TRUE); |
1449 | ||
7ddcb079 | 1450 | zcram(zone_zone, zdata, zdata_size); |
6d2010ae A |
1451 | |
1452 | /* initialize fake zones and zone info if tracking by task */ | |
1453 | if (zinfo_per_task) { | |
1454 | vm_size_t zisize = sizeof(zinfo_usage_store_t) * ZINFO_SLOTS; | |
1455 | unsigned int i; | |
1456 | ||
1457 | for (i = 0; i < num_fake_zones; i++) | |
1458 | fake_zones[i].init(ZINFO_SLOTS - num_fake_zones + i); | |
1459 | zinfo_zone = zinit(zisize, zisize * CONFIG_TASK_MAX, | |
1460 | zisize, "per task zinfo"); | |
1461 | zone_change(zinfo_zone, Z_CALLERACCT, FALSE); | |
1462 | } | |
1463 | } | |
1464 | ||
1465 | void | |
1466 | zinfo_task_init(task_t task) | |
1467 | { | |
1468 | if (zinfo_per_task) { | |
1469 | task->tkm_zinfo = zalloc(zinfo_zone); | |
1470 | memset(task->tkm_zinfo, 0, sizeof(zinfo_usage_store_t) * ZINFO_SLOTS); | |
1471 | } else { | |
1472 | task->tkm_zinfo = NULL; | |
1473 | } | |
1c79356b A |
1474 | } |
1475 | ||
6d2010ae A |
1476 | void |
1477 | zinfo_task_free(task_t task) | |
1478 | { | |
1479 | assert(task != kernel_task); | |
1480 | if (task->tkm_zinfo != NULL) { | |
1481 | zfree(zinfo_zone, task->tkm_zinfo); | |
1482 | task->tkm_zinfo = NULL; | |
1483 | } | |
1484 | } | |
1485 | ||
1c79356b A |
1486 | void |
1487 | zone_init( | |
1488 | vm_size_t max_zonemap_size) | |
1489 | { | |
1490 | kern_return_t retval; | |
1491 | vm_offset_t zone_min; | |
1492 | vm_offset_t zone_max; | |
1c79356b A |
1493 | |
1494 | retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size, | |
b0d623f7 A |
1495 | FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT, |
1496 | &zone_map); | |
91447636 | 1497 | |
1c79356b A |
1498 | if (retval != KERN_SUCCESS) |
1499 | panic("zone_init: kmem_suballoc failed"); | |
91447636 | 1500 | zone_max = zone_min + round_page(max_zonemap_size); |
1c79356b A |
1501 | /* |
1502 | * Setup garbage collection information: | |
1503 | */ | |
1c79356b A |
1504 | zone_map_min_address = zone_min; |
1505 | zone_map_max_address = zone_max; | |
7ddcb079 A |
1506 | |
1507 | zone_pages = (unsigned int)atop_kernel(zone_max - zone_min); | |
1508 | zone_page_table_used_size = sizeof(zone_page_table); | |
1509 | ||
1510 | zone_page_table_second_level_size = 1; | |
1511 | zone_page_table_second_level_shift_amount = 0; | |
1512 | ||
1513 | /* | |
1514 | * Find the power of 2 for the second level that allows | |
1515 | * the first level to fit in ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE | |
1516 | * slots. | |
1517 | */ | |
1518 | while ((zone_page_table_first_level_slot(zone_pages-1)) >= ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE) { | |
1519 | zone_page_table_second_level_size <<= 1; | |
1520 | zone_page_table_second_level_shift_amount++; | |
1521 | } | |
b0d623f7 A |
1522 | |
1523 | lck_grp_attr_setdefault(&zone_lck_grp_attr); | |
1524 | lck_grp_init(&zone_lck_grp, "zones", &zone_lck_grp_attr); | |
1525 | lck_attr_setdefault(&zone_lck_attr); | |
1526 | lck_mtx_init_ext(&zone_gc_lock, &zone_lck_ext, &zone_lck_grp, &zone_lck_attr); | |
1527 | ||
6d2010ae A |
1528 | #if CONFIG_ZLEAKS |
1529 | /* | |
1530 | * Initialize the zone leak monitor | |
1531 | */ | |
1532 | zleak_init(max_zonemap_size); | |
1533 | #endif /* CONFIG_ZLEAKS */ | |
1c79356b A |
1534 | } |
1535 | ||
7ddcb079 A |
1536 | void |
1537 | zone_page_table_expand(zone_page_index_t pindex) | |
1538 | { | |
1539 | unsigned int first_index; | |
1540 | struct zone_page_table_entry * volatile * first_level_ptr; | |
1541 | ||
1542 | assert(pindex < zone_pages); | |
1543 | ||
1544 | first_index = zone_page_table_first_level_slot(pindex); | |
1545 | first_level_ptr = &zone_page_table[first_index]; | |
1546 | ||
1547 | if (*first_level_ptr == NULL) { | |
1548 | /* | |
1549 | * We were able to verify the old first-level slot | |
1550 | * had NULL, so attempt to populate it. | |
1551 | */ | |
1552 | ||
1553 | vm_offset_t second_level_array = 0; | |
1554 | vm_size_t second_level_size = round_page(zone_page_table_second_level_size * sizeof(struct zone_page_table_entry)); | |
1555 | zone_page_index_t i; | |
1556 | struct zone_page_table_entry *entry_array; | |
1557 | ||
1558 | if (kmem_alloc_kobject(zone_map, &second_level_array, | |
1559 | second_level_size) != KERN_SUCCESS) { | |
1560 | panic("zone_page_table_expand"); | |
1561 | } | |
1562 | ||
1563 | /* | |
1564 | * zone_gc() may scan the "zone_page_table" directly, | |
1565 | * so make sure any slots have a valid unused state. | |
1566 | */ | |
1567 | entry_array = (struct zone_page_table_entry *)second_level_array; | |
1568 | for (i=0; i < zone_page_table_second_level_size; i++) { | |
1569 | entry_array[i].alloc_count = ZONE_PAGE_UNUSED; | |
1570 | entry_array[i].collect_count = 0; | |
1571 | } | |
1572 | ||
1573 | if (OSCompareAndSwapPtr(NULL, entry_array, first_level_ptr)) { | |
1574 | /* Old slot was NULL, replaced with expanded level */ | |
1575 | OSAddAtomicLong(second_level_size, &zone_page_table_used_size); | |
1576 | } else { | |
1577 | /* Old slot was not NULL, someone else expanded first */ | |
1578 | kmem_free(zone_map, second_level_array, second_level_size); | |
1579 | } | |
1580 | } else { | |
1581 | /* Old slot was not NULL, already been expanded */ | |
1582 | } | |
1583 | } | |
1584 | ||
1585 | struct zone_page_table_entry * | |
1586 | zone_page_table_lookup(zone_page_index_t pindex) | |
1587 | { | |
1588 | unsigned int first_index = zone_page_table_first_level_slot(pindex); | |
1589 | struct zone_page_table_entry *second_level = zone_page_table[first_index]; | |
1590 | ||
1591 | if (second_level) { | |
1592 | return &second_level[zone_page_table_second_level_slot(pindex)]; | |
1593 | } | |
1594 | ||
1595 | return NULL; | |
1596 | } | |
1597 | ||
b0d623f7 | 1598 | extern volatile SInt32 kfree_nop_count; |
1c79356b | 1599 | |
6d2010ae A |
1600 | #pragma mark - |
1601 | #pragma mark zalloc_canblock | |
1602 | ||
1c79356b A |
1603 | /* |
1604 | * zalloc returns an element from the specified zone. | |
1605 | */ | |
91447636 | 1606 | void * |
1c79356b A |
1607 | zalloc_canblock( |
1608 | register zone_t zone, | |
1609 | boolean_t canblock) | |
1610 | { | |
1611 | vm_offset_t addr; | |
1612 | kern_return_t retval; | |
6d2010ae | 1613 | uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* used in zone leak logging and zone leak detection */ |
c910b4d9 | 1614 | int numsaved = 0; |
6d2010ae | 1615 | int i; |
7ddcb079 | 1616 | boolean_t zone_replenish_wakeup = FALSE; |
6d2010ae A |
1617 | |
1618 | #if CONFIG_ZLEAKS | |
1619 | uint32_t zleak_tracedepth = 0; /* log this allocation if nonzero */ | |
1620 | #endif /* CONFIG_ZLEAKS */ | |
1c79356b A |
1621 | |
1622 | assert(zone != ZONE_NULL); | |
6d2010ae A |
1623 | |
1624 | lock_zone(zone); | |
1c79356b | 1625 | |
c910b4d9 A |
1626 | /* |
1627 | * If zone logging is turned on and this is the zone we're tracking, grab a backtrace. | |
1628 | */ | |
6d2010ae | 1629 | |
c910b4d9 | 1630 | if (DO_LOGGING(zone)) |
6d2010ae A |
1631 | numsaved = OSBacktrace((void*) zbt, MAX_ZTRACE_DEPTH); |
1632 | ||
1633 | #if CONFIG_ZLEAKS | |
1634 | /* | |
1635 | * Zone leak detection: capture a backtrace every z_sample_factor | |
1636 | * allocations in this zone. | |
1637 | */ | |
1638 | if (zone->zleak_on && (zone->zleak_capture++ % z_sample_factor == 0)) { | |
1639 | zone->zleak_capture = 1; | |
1640 | ||
1641 | /* Avoid backtracing twice if zone logging is on */ | |
1642 | if (numsaved == 0 ) | |
1643 | zleak_tracedepth = fastbacktrace(zbt, MAX_ZTRACE_DEPTH); | |
1644 | else | |
1645 | zleak_tracedepth = numsaved; | |
1646 | } | |
1647 | #endif /* CONFIG_ZLEAKS */ | |
1c79356b A |
1648 | |
1649 | REMOVE_FROM_ZONE(zone, addr, vm_offset_t); | |
0b4e3aa0 | 1650 | |
7ddcb079 A |
1651 | if (zone->async_prio_refill && |
1652 | ((zone->cur_size - (zone->count * zone->elem_size)) < (zone->prio_refill_watermark * zone->elem_size))) { | |
1653 | zone_replenish_wakeup = TRUE; | |
1654 | zone_replenish_wakeups_initiated++; | |
a3d08fcd A |
1655 | } |
1656 | ||
0b4e3aa0 | 1657 | while ((addr == 0) && canblock) { |
1c79356b A |
1658 | /* |
1659 | * If nothing was there, try to get more | |
1660 | */ | |
1661 | if (zone->doing_alloc) { | |
1c79356b A |
1662 | /* |
1663 | * Someone is allocating memory for this zone. | |
1664 | * Wait for it to show up, then try again. | |
1665 | */ | |
1c79356b | 1666 | zone->waiting = TRUE; |
9bccf70c | 1667 | zone_sleep(zone); |
7ddcb079 A |
1668 | } else if (zone->doing_gc) { |
1669 | /* zone_gc() is running. Since we need an element | |
1670 | * from the free list that is currently being | |
1671 | * collected, set the waiting bit and try to | |
1672 | * interrupt the GC process, and try again | |
1673 | * when we obtain the lock. | |
1674 | */ | |
1675 | zone->waiting = TRUE; | |
1676 | zone_sleep(zone); | |
1677 | } else { | |
1678 | vm_offset_t space; | |
1679 | vm_size_t alloc_size; | |
1680 | int retry = 0; | |
1681 | ||
1c79356b A |
1682 | if ((zone->cur_size + zone->elem_size) > |
1683 | zone->max_size) { | |
1684 | if (zone->exhaustible) | |
1685 | break; | |
1686 | if (zone->expandable) { | |
1687 | /* | |
1688 | * We're willing to overflow certain | |
1689 | * zones, but not without complaining. | |
1690 | * | |
1691 | * This is best used in conjunction | |
1692 | * with the collectable flag. What we | |
1693 | * want is an assurance we can get the | |
1694 | * memory back, assuming there's no | |
1695 | * leak. | |
1696 | */ | |
1697 | zone->max_size += (zone->max_size >> 1); | |
1698 | } else { | |
1699 | unlock_zone(zone); | |
1700 | ||
1c79356b A |
1701 | panic("zalloc: zone \"%s\" empty.", zone->zone_name); |
1702 | } | |
1703 | } | |
1704 | zone->doing_alloc = TRUE; | |
1705 | unlock_zone(zone); | |
1706 | ||
7ddcb079 A |
1707 | for (;;) { |
1708 | int zflags = KMA_KOBJECT|KMA_NOPAGEWAIT; | |
1709 | ||
1710 | if (vm_pool_low() || retry >= 1) | |
1711 | alloc_size = | |
1712 | round_page(zone->elem_size); | |
1713 | else | |
1714 | alloc_size = zone->alloc_size; | |
1715 | ||
1716 | if (zone->noencrypt) | |
1717 | zflags |= KMA_NOENCRYPT; | |
1718 | ||
1719 | retval = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags); | |
1720 | if (retval == KERN_SUCCESS) { | |
2d21ac55 | 1721 | #if ZONE_ALIAS_ADDR |
7ddcb079 A |
1722 | if (alloc_size == PAGE_SIZE) |
1723 | space = zone_alias_addr(space); | |
2d21ac55 | 1724 | #endif |
7ddcb079 | 1725 | |
6d2010ae | 1726 | #if CONFIG_ZLEAKS |
7ddcb079 A |
1727 | if ((zleak_state & (ZLEAK_STATE_ENABLED | ZLEAK_STATE_ACTIVE)) == ZLEAK_STATE_ENABLED) { |
1728 | if (zone_map->size >= zleak_global_tracking_threshold) { | |
1729 | kern_return_t kr; | |
1730 | ||
1731 | kr = zleak_activate(); | |
1732 | if (kr != KERN_SUCCESS) { | |
1733 | printf("Failed to activate live zone leak debugging (%d).\n", kr); | |
6d2010ae A |
1734 | } |
1735 | } | |
55e303ae | 1736 | } |
1c79356b | 1737 | |
7ddcb079 A |
1738 | if ((zleak_state & ZLEAK_STATE_ACTIVE) && !(zone->zleak_on)) { |
1739 | if (zone->cur_size > zleak_per_zone_tracking_threshold) { | |
1740 | zone->zleak_on = TRUE; | |
1741 | } | |
1c79356b | 1742 | } |
7ddcb079 | 1743 | #endif /* CONFIG_ZLEAKS */ |
1c79356b | 1744 | |
7ddcb079 A |
1745 | zcram(zone, space, alloc_size); |
1746 | ||
1747 | break; | |
1748 | } else if (retval != KERN_RESOURCE_SHORTAGE) { | |
1749 | retry++; | |
1750 | ||
1751 | if (retry == 2) { | |
1752 | zone_gc(); | |
1753 | printf("zalloc did gc\n"); | |
1754 | zone_display_zprint(); | |
1755 | } | |
1756 | if (retry == 3) { | |
6d2010ae A |
1757 | panic_include_zprint = TRUE; |
1758 | #if CONFIG_ZLEAKS | |
7ddcb079 | 1759 | if ((zleak_state & ZLEAK_STATE_ACTIVE)) { |
6d2010ae A |
1760 | panic_include_ztrace = TRUE; |
1761 | } | |
7ddcb079 A |
1762 | #endif /* CONFIG_ZLEAKS */ |
1763 | /* TODO: Change this to something more descriptive, perhaps | |
1764 | * 'zone_map exhausted' only if we get retval 3 (KERN_NO_SPACE). | |
1765 | */ | |
1766 | panic("zalloc: \"%s\" (%d elements) retry fail %d, kfree_nop_count: %d", zone->zone_name, zone->count, retval, (int)kfree_nop_count); | |
6d2010ae | 1767 | } |
7ddcb079 A |
1768 | } else { |
1769 | break; | |
1c79356b A |
1770 | } |
1771 | } | |
7ddcb079 A |
1772 | lock_zone(zone); |
1773 | zone->doing_alloc = FALSE; | |
1774 | if (zone->waiting) { | |
1775 | zone->waiting = FALSE; | |
1776 | zone_wakeup(zone); | |
1777 | } | |
1778 | REMOVE_FROM_ZONE(zone, addr, vm_offset_t); | |
1779 | if (addr == 0 && | |
1780 | retval == KERN_RESOURCE_SHORTAGE) { | |
1781 | unlock_zone(zone); | |
1782 | ||
1783 | VM_PAGE_WAIT(); | |
1784 | lock_zone(zone); | |
1785 | } | |
1c79356b A |
1786 | } |
1787 | if (addr == 0) | |
1788 | REMOVE_FROM_ZONE(zone, addr, vm_offset_t); | |
1789 | } | |
1790 | ||
6d2010ae A |
1791 | #if CONFIG_ZLEAKS |
1792 | /* Zone leak detection: | |
1793 | * If we're sampling this allocation, add it to the zleaks hash table. | |
1794 | */ | |
1795 | if (addr && zleak_tracedepth > 0) { | |
1796 | /* Sampling can fail if another sample is happening at the same time in a different zone. */ | |
1797 | if (!zleak_log(zbt, addr, zleak_tracedepth, zone->elem_size)) { | |
1798 | /* If it failed, roll back the counter so we sample the next allocation instead. */ | |
1799 | zone->zleak_capture = z_sample_factor; | |
1800 | } | |
1801 | } | |
1802 | #endif /* CONFIG_ZLEAKS */ | |
1803 | ||
1804 | ||
c910b4d9 A |
1805 | /* |
1806 | * See if we should be logging allocations in this zone. Logging is rarely done except when a leak is | |
1807 | * suspected, so this code rarely executes. We need to do this code while still holding the zone lock | |
1808 | * since it protects the various log related data structures. | |
1809 | */ | |
1810 | ||
1811 | if (DO_LOGGING(zone) && addr) { | |
1812 | ||
1813 | /* | |
1814 | * Look for a place to record this new allocation. We implement two different logging strategies | |
1815 | * depending on whether we're looking for the source of a zone leak or a zone corruption. When looking | |
1816 | * for a leak, we want to log as many allocations as possible in order to clearly identify the leaker | |
1817 | * among all the records. So we look for an unused slot in the log and fill that in before overwriting | |
1818 | * an old entry. When looking for a corrution however, it's better to have a chronological log of all | |
1819 | * the allocations and frees done in the zone so that the history of operations for a specific zone | |
1820 | * element can be inspected. So in this case, we treat the log as a circular buffer and overwrite the | |
1821 | * oldest entry whenever a new one needs to be added. | |
1822 | * | |
1823 | * The check_freed_element flag tells us what style of logging to do. It's set if we're supposed to be | |
1824 | * doing corruption style logging (indicated via -zc in the boot-args). | |
1825 | */ | |
1826 | ||
1827 | if (!check_freed_element && zrecords[zcurrent].z_element && zrecorded < log_records) { | |
1828 | ||
1829 | /* | |
1830 | * If we get here, we're doing leak style logging and there's still some unused entries in | |
1831 | * the log (since zrecorded is smaller than the size of the log). Look for an unused slot | |
1832 | * starting at zcurrent and wrap-around if we reach the end of the buffer. If the buffer | |
1833 | * is already full, we just fall through and overwrite the element indexed by zcurrent. | |
1834 | */ | |
1835 | ||
1836 | for (i = zcurrent; i < log_records; i++) { | |
1837 | if (zrecords[i].z_element == NULL) { | |
1838 | zcurrent = i; | |
1839 | goto empty_slot; | |
1840 | } | |
1841 | } | |
1842 | ||
1843 | for (i = 0; i < zcurrent; i++) { | |
1844 | if (zrecords[i].z_element == NULL) { | |
1845 | zcurrent = i; | |
1846 | goto empty_slot; | |
1847 | } | |
1848 | } | |
1849 | } | |
1850 | ||
1851 | /* | |
1852 | * Save a record of this allocation | |
1853 | */ | |
1854 | ||
1855 | empty_slot: | |
1856 | if (zrecords[zcurrent].z_element == NULL) | |
1857 | zrecorded++; | |
1858 | ||
1859 | zrecords[zcurrent].z_element = (void *)addr; | |
1860 | zrecords[zcurrent].z_time = ztime++; | |
1861 | zrecords[zcurrent].z_opcode = ZOP_ALLOC; | |
1862 | ||
1863 | for (i = 0; i < numsaved; i++) | |
6d2010ae | 1864 | zrecords[zcurrent].z_pc[i] = (void*) zbt[i]; |
c910b4d9 | 1865 | |
6d2010ae | 1866 | for (; i < MAX_ZTRACE_DEPTH; i++) |
c910b4d9 A |
1867 | zrecords[zcurrent].z_pc[i] = 0; |
1868 | ||
1869 | zcurrent++; | |
1870 | ||
1871 | if (zcurrent >= log_records) | |
1872 | zcurrent = 0; | |
1873 | } | |
1874 | ||
7ddcb079 | 1875 | if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (zone->no_callout == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) { |
0b4e3aa0 A |
1876 | zone->async_pending = TRUE; |
1877 | unlock_zone(zone); | |
1878 | thread_call_enter(&zone->call_async_alloc); | |
1879 | lock_zone(zone); | |
1880 | REMOVE_FROM_ZONE(zone, addr, vm_offset_t); | |
1881 | } | |
1882 | ||
1c79356b A |
1883 | #if ZONE_DEBUG |
1884 | if (addr && zone_debug_enabled(zone)) { | |
1885 | enqueue_tail(&zone->active_zones, (queue_entry_t)addr); | |
55e303ae | 1886 | addr += ZONE_DEBUG_OFFSET; |
1c79356b A |
1887 | } |
1888 | #endif | |
6d2010ae A |
1889 | |
1890 | #if CONFIG_ZLEAKS | |
1891 | if (addr != 0) { | |
1892 | zone->num_allocs++; | |
1893 | } | |
1894 | #endif /* CONFIG_ZLEAKS */ | |
1c79356b A |
1895 | |
1896 | unlock_zone(zone); | |
0b4e3aa0 | 1897 | |
7ddcb079 A |
1898 | if (zone_replenish_wakeup) |
1899 | thread_wakeup(&zone->zone_replenish_thread); | |
1900 | ||
2d21ac55 A |
1901 | TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr); |
1902 | ||
6d2010ae A |
1903 | if (addr) { |
1904 | thread_t thr = current_thread(); | |
1905 | task_t task; | |
1906 | zinfo_usage_t zinfo; | |
1907 | ||
1908 | if (zone->caller_acct) | |
1909 | thr->tkm_private.alloc += zone->elem_size; | |
1910 | else | |
1911 | thr->tkm_shared.alloc += zone->elem_size; | |
1912 | ||
1913 | if ((task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL) | |
1914 | OSAddAtomic64(zone->elem_size, (int64_t *)&zinfo[zone->index].alloc); | |
1915 | } | |
91447636 | 1916 | return((void *)addr); |
1c79356b A |
1917 | } |
1918 | ||
1919 | ||
91447636 | 1920 | void * |
1c79356b A |
1921 | zalloc( |
1922 | register zone_t zone) | |
1923 | { | |
1924 | return( zalloc_canblock(zone, TRUE) ); | |
1925 | } | |
1926 | ||
91447636 | 1927 | void * |
1c79356b A |
1928 | zalloc_noblock( |
1929 | register zone_t zone) | |
1930 | { | |
1931 | return( zalloc_canblock(zone, FALSE) ); | |
1932 | } | |
1933 | ||
0b4e3aa0 A |
1934 | void |
1935 | zalloc_async( | |
91447636 A |
1936 | thread_call_param_t p0, |
1937 | __unused thread_call_param_t p1) | |
0b4e3aa0 | 1938 | { |
91447636 | 1939 | void *elt; |
0b4e3aa0 A |
1940 | |
1941 | elt = zalloc_canblock((zone_t)p0, TRUE); | |
1942 | zfree((zone_t)p0, elt); | |
1943 | lock_zone(((zone_t)p0)); | |
1944 | ((zone_t)p0)->async_pending = FALSE; | |
1945 | unlock_zone(((zone_t)p0)); | |
1946 | } | |
1947 | ||
1c79356b A |
1948 | |
1949 | /* | |
1950 | * zget returns an element from the specified zone | |
1951 | * and immediately returns nothing if there is nothing there. | |
1952 | * | |
1953 | * This form should be used when you can not block (like when | |
1954 | * processing an interrupt). | |
6d2010ae A |
1955 | * |
1956 | * XXX: It seems like only vm_page_grab_fictitious_common uses this, and its | |
1957 | * friend vm_page_more_fictitious can block, so it doesn't seem like | |
1958 | * this is used for interrupts any more.... | |
1c79356b | 1959 | */ |
91447636 | 1960 | void * |
1c79356b A |
1961 | zget( |
1962 | register zone_t zone) | |
1963 | { | |
1964 | register vm_offset_t addr; | |
6d2010ae A |
1965 | |
1966 | #if CONFIG_ZLEAKS | |
1967 | uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* used for zone leak detection */ | |
1968 | uint32_t zleak_tracedepth = 0; /* log this allocation if nonzero */ | |
1969 | #endif /* CONFIG_ZLEAKS */ | |
1c79356b A |
1970 | |
1971 | assert( zone != ZONE_NULL ); | |
1972 | ||
1973 | if (!lock_try_zone(zone)) | |
91447636 | 1974 | return NULL; |
6d2010ae A |
1975 | |
1976 | #if CONFIG_ZLEAKS | |
1977 | /* | |
1978 | * Zone leak detection: capture a backtrace | |
1979 | */ | |
1980 | if (zone->zleak_on && (zone->zleak_capture++ % z_sample_factor == 0)) { | |
1981 | zone->zleak_capture = 1; | |
1982 | zleak_tracedepth = fastbacktrace(zbt, MAX_ZTRACE_DEPTH); | |
1983 | } | |
1984 | #endif /* CONFIG_ZLEAKS */ | |
1c79356b A |
1985 | |
1986 | REMOVE_FROM_ZONE(zone, addr, vm_offset_t); | |
1987 | #if ZONE_DEBUG | |
1988 | if (addr && zone_debug_enabled(zone)) { | |
1989 | enqueue_tail(&zone->active_zones, (queue_entry_t)addr); | |
55e303ae | 1990 | addr += ZONE_DEBUG_OFFSET; |
1c79356b A |
1991 | } |
1992 | #endif /* ZONE_DEBUG */ | |
6d2010ae A |
1993 | |
1994 | #if CONFIG_ZLEAKS | |
1995 | /* | |
1996 | * Zone leak detection: record the allocation | |
1997 | */ | |
1998 | if (zone->zleak_on && zleak_tracedepth > 0 && addr) { | |
1999 | /* Sampling can fail if another sample is happening at the same time in a different zone. */ | |
2000 | if (!zleak_log(zbt, addr, zleak_tracedepth, zone->elem_size)) { | |
2001 | /* If it failed, roll back the counter so we sample the next allocation instead. */ | |
2002 | zone->zleak_capture = z_sample_factor; | |
2003 | } | |
2004 | } | |
2005 | ||
2006 | if (addr != 0) { | |
2007 | zone->num_allocs++; | |
2008 | } | |
2009 | #endif /* CONFIG_ZLEAKS */ | |
2010 | ||
1c79356b A |
2011 | unlock_zone(zone); |
2012 | ||
91447636 | 2013 | return((void *) addr); |
1c79356b A |
2014 | } |
2015 | ||
2016 | /* Keep this FALSE by default. Large memory machine run orders of magnitude | |
2017 | slower in debug mode when true. Use debugger to enable if needed */ | |
55e303ae A |
2018 | /* static */ boolean_t zone_check = FALSE; |
2019 | ||
2020 | static zone_t zone_last_bogus_zone = ZONE_NULL; | |
2021 | static vm_offset_t zone_last_bogus_elem = 0; | |
1c79356b A |
2022 | |
2023 | void | |
2024 | zfree( | |
2025 | register zone_t zone, | |
91447636 | 2026 | void *addr) |
1c79356b | 2027 | { |
91447636 | 2028 | vm_offset_t elem = (vm_offset_t) addr; |
6d2010ae | 2029 | void *zbt[MAX_ZTRACE_DEPTH]; /* only used if zone logging is enabled via boot-args */ |
c910b4d9 A |
2030 | int numsaved = 0; |
2031 | ||
2032 | assert(zone != ZONE_NULL); | |
2033 | ||
2034 | /* | |
2035 | * If zone logging is turned on and this is the zone we're tracking, grab a backtrace. | |
2036 | */ | |
2037 | ||
2038 | if (DO_LOGGING(zone)) | |
6d2010ae | 2039 | numsaved = OSBacktrace(&zbt[0], MAX_ZTRACE_DEPTH); |
1c79356b A |
2040 | |
2041 | #if MACH_ASSERT | |
2042 | /* Basic sanity checks */ | |
2043 | if (zone == ZONE_NULL || elem == (vm_offset_t)0) | |
2044 | panic("zfree: NULL"); | |
2045 | /* zone_gc assumes zones are never freed */ | |
2046 | if (zone == zone_zone) | |
2047 | panic("zfree: freeing to zone_zone breaks zone_gc!"); | |
55e303ae A |
2048 | #endif |
2049 | ||
b0d623f7 | 2050 | TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, zone->elem_size, (uintptr_t)addr); |
2d21ac55 | 2051 | |
1c79356b | 2052 | if (zone->collectable && !zone->allows_foreign && |
55e303ae A |
2053 | !from_zone_map(elem, zone->elem_size)) { |
2054 | #if MACH_ASSERT | |
1c79356b | 2055 | panic("zfree: non-allocated memory in collectable zone!"); |
91447636 | 2056 | #endif |
55e303ae A |
2057 | zone_last_bogus_zone = zone; |
2058 | zone_last_bogus_elem = elem; | |
2059 | return; | |
55e303ae | 2060 | } |
1c79356b A |
2061 | |
2062 | lock_zone(zone); | |
c910b4d9 A |
2063 | |
2064 | /* | |
2065 | * See if we're doing logging on this zone. There are two styles of logging used depending on | |
2066 | * whether we're trying to catch a leak or corruption. See comments above in zalloc for details. | |
2067 | */ | |
2068 | ||
2069 | if (DO_LOGGING(zone)) { | |
2070 | int i; | |
2071 | ||
2072 | if (check_freed_element) { | |
2073 | ||
2074 | /* | |
2075 | * We're logging to catch a corruption. Add a record of this zfree operation | |
2076 | * to log. | |
2077 | */ | |
2078 | ||
2079 | if (zrecords[zcurrent].z_element == NULL) | |
2080 | zrecorded++; | |
2081 | ||
2082 | zrecords[zcurrent].z_element = (void *)addr; | |
2083 | zrecords[zcurrent].z_time = ztime++; | |
2084 | zrecords[zcurrent].z_opcode = ZOP_FREE; | |
2085 | ||
2086 | for (i = 0; i < numsaved; i++) | |
6d2010ae | 2087 | zrecords[zcurrent].z_pc[i] = zbt[i]; |
c910b4d9 | 2088 | |
6d2010ae | 2089 | for (; i < MAX_ZTRACE_DEPTH; i++) |
c910b4d9 A |
2090 | zrecords[zcurrent].z_pc[i] = 0; |
2091 | ||
2092 | zcurrent++; | |
2093 | ||
2094 | if (zcurrent >= log_records) | |
2095 | zcurrent = 0; | |
2096 | ||
2097 | } else { | |
2098 | ||
2099 | /* | |
2100 | * We're logging to catch a leak. Remove any record we might have for this | |
2101 | * element since it's being freed. Note that we may not find it if the buffer | |
2102 | * overflowed and that's OK. Since the log is of a limited size, old records | |
2103 | * get overwritten if there are more zallocs than zfrees. | |
2104 | */ | |
2105 | ||
2106 | for (i = 0; i < log_records; i++) { | |
2107 | if (zrecords[i].z_element == addr) { | |
2108 | zrecords[i].z_element = NULL; | |
2109 | zcurrent = i; | |
2110 | zrecorded--; | |
2111 | break; | |
2112 | } | |
2113 | } | |
2114 | } | |
2115 | } | |
2116 | ||
2117 | ||
1c79356b A |
2118 | #if ZONE_DEBUG |
2119 | if (zone_debug_enabled(zone)) { | |
2120 | queue_t tmp_elem; | |
2121 | ||
55e303ae | 2122 | elem -= ZONE_DEBUG_OFFSET; |
1c79356b A |
2123 | if (zone_check) { |
2124 | /* check the zone's consistency */ | |
2125 | ||
2126 | for (tmp_elem = queue_first(&zone->active_zones); | |
2127 | !queue_end(tmp_elem, &zone->active_zones); | |
2128 | tmp_elem = queue_next(tmp_elem)) | |
2129 | if (elem == (vm_offset_t)tmp_elem) | |
2130 | break; | |
2131 | if (elem != (vm_offset_t)tmp_elem) | |
2132 | panic("zfree()ing element from wrong zone"); | |
2133 | } | |
6d2010ae | 2134 | remqueue((queue_t) elem); |
1c79356b A |
2135 | } |
2136 | #endif /* ZONE_DEBUG */ | |
2137 | if (zone_check) { | |
2138 | vm_offset_t this; | |
2139 | ||
2140 | /* check the zone's consistency */ | |
2141 | ||
2142 | for (this = zone->free_elements; | |
2143 | this != 0; | |
2144 | this = * (vm_offset_t *) this) | |
2145 | if (!pmap_kernel_va(this) || this == elem) | |
2146 | panic("zfree"); | |
2147 | } | |
0b4e3aa0 | 2148 | ADD_TO_ZONE(zone, elem); |
b0d623f7 A |
2149 | #if MACH_ASSERT |
2150 | if (zone->count < 0) | |
2151 | panic("zfree: count < 0!"); | |
2152 | #endif | |
6d2010ae | 2153 | |
0b4e3aa0 | 2154 | |
6d2010ae A |
2155 | #if CONFIG_ZLEAKS |
2156 | zone->num_frees++; | |
2157 | ||
2158 | /* | |
2159 | * Zone leak detection: un-track the allocation | |
2160 | */ | |
2161 | if (zone->zleak_on) { | |
2162 | zleak_free(elem, zone->elem_size); | |
2163 | } | |
2164 | #endif /* CONFIG_ZLEAKS */ | |
2165 | ||
1c79356b A |
2166 | /* |
2167 | * If elements have one or more pages, and memory is low, | |
0b4e3aa0 A |
2168 | * request to run the garbage collection in the zone the next |
2169 | * time the pageout thread runs. | |
1c79356b A |
2170 | */ |
2171 | if (zone->elem_size >= PAGE_SIZE && | |
2172 | vm_pool_low()){ | |
0b4e3aa0 | 2173 | zone_gc_forced = TRUE; |
1c79356b | 2174 | } |
1c79356b | 2175 | unlock_zone(zone); |
6d2010ae A |
2176 | |
2177 | { | |
2178 | thread_t thr = current_thread(); | |
2179 | task_t task; | |
2180 | zinfo_usage_t zinfo; | |
2181 | ||
2182 | if (zone->caller_acct) | |
2183 | thr->tkm_private.free += zone->elem_size; | |
2184 | else | |
2185 | thr->tkm_shared.free += zone->elem_size; | |
2186 | if ((task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL) | |
2187 | OSAddAtomic64(zone->elem_size, | |
2188 | (int64_t *)&zinfo[zone->index].free); | |
2189 | } | |
1c79356b A |
2190 | } |
2191 | ||
2192 | ||
2193 | /* Change a zone's flags. | |
2194 | * This routine must be called immediately after zinit. | |
2195 | */ | |
2196 | void | |
2197 | zone_change( | |
2198 | zone_t zone, | |
2199 | unsigned int item, | |
2200 | boolean_t value) | |
2201 | { | |
2202 | assert( zone != ZONE_NULL ); | |
2203 | assert( value == TRUE || value == FALSE ); | |
2204 | ||
2205 | switch(item){ | |
0b4c1975 A |
2206 | case Z_NOENCRYPT: |
2207 | zone->noencrypt = value; | |
2208 | break; | |
1c79356b A |
2209 | case Z_EXHAUST: |
2210 | zone->exhaustible = value; | |
2211 | break; | |
2212 | case Z_COLLECT: | |
2213 | zone->collectable = value; | |
2214 | break; | |
2215 | case Z_EXPAND: | |
2216 | zone->expandable = value; | |
2217 | break; | |
2218 | case Z_FOREIGN: | |
2219 | zone->allows_foreign = value; | |
2220 | break; | |
6d2010ae A |
2221 | case Z_CALLERACCT: |
2222 | zone->caller_acct = value; | |
2223 | break; | |
7ddcb079 A |
2224 | case Z_NOCALLOUT: |
2225 | zone->no_callout = value; | |
2226 | break; | |
1c79356b A |
2227 | #if MACH_ASSERT |
2228 | default: | |
2229 | panic("Zone_change: Wrong Item Type!"); | |
2230 | /* break; */ | |
2231 | #endif | |
2232 | } | |
1c79356b A |
2233 | } |
2234 | ||
2235 | /* | |
2236 | * Return the expected number of free elements in the zone. | |
2237 | * This calculation will be incorrect if items are zfree'd that | |
2238 | * were never zalloc'd/zget'd. The correct way to stuff memory | |
2239 | * into a zone is by zcram. | |
2240 | */ | |
2241 | ||
2242 | integer_t | |
2243 | zone_free_count(zone_t zone) | |
2244 | { | |
2245 | integer_t free_count; | |
2246 | ||
2247 | lock_zone(zone); | |
b0d623f7 | 2248 | free_count = (integer_t)(zone->cur_size/zone->elem_size - zone->count); |
1c79356b A |
2249 | unlock_zone(zone); |
2250 | ||
2251 | assert(free_count >= 0); | |
2252 | ||
2253 | return(free_count); | |
2254 | } | |
2255 | ||
2256 | /* | |
2257 | * zprealloc preallocates wired memory, exanding the specified | |
2258 | * zone to the specified size | |
2259 | */ | |
2260 | void | |
2261 | zprealloc( | |
2262 | zone_t zone, | |
2263 | vm_size_t size) | |
2264 | { | |
2265 | vm_offset_t addr; | |
2266 | ||
2267 | if (size != 0) { | |
b0d623f7 | 2268 | if (kmem_alloc_kobject(zone_map, &addr, size) != KERN_SUCCESS) |
1c79356b | 2269 | panic("zprealloc"); |
7ddcb079 | 2270 | zcram(zone, addr, size); |
1c79356b A |
2271 | } |
2272 | } | |
2273 | ||
2274 | /* | |
2275 | * Zone garbage collection subroutines | |
1c79356b | 2276 | */ |
55e303ae | 2277 | |
1c79356b A |
2278 | boolean_t |
2279 | zone_page_collectable( | |
2280 | vm_offset_t addr, | |
2281 | vm_size_t size) | |
2282 | { | |
55e303ae | 2283 | struct zone_page_table_entry *zp; |
7ddcb079 | 2284 | zone_page_index_t i, j; |
1c79356b | 2285 | |
2d21ac55 A |
2286 | #if ZONE_ALIAS_ADDR |
2287 | addr = zone_virtual_addr(addr); | |
2288 | #endif | |
1c79356b | 2289 | #if MACH_ASSERT |
55e303ae | 2290 | if (!from_zone_map(addr, size)) |
1c79356b A |
2291 | panic("zone_page_collectable"); |
2292 | #endif | |
2293 | ||
7ddcb079 A |
2294 | i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); |
2295 | j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); | |
55e303ae | 2296 | |
7ddcb079 A |
2297 | for (; i <= j; i++) { |
2298 | zp = zone_page_table_lookup(i); | |
55e303ae | 2299 | if (zp->collect_count == zp->alloc_count) |
1c79356b | 2300 | return (TRUE); |
7ddcb079 | 2301 | } |
55e303ae | 2302 | |
1c79356b A |
2303 | return (FALSE); |
2304 | } | |
2305 | ||
2306 | void | |
2307 | zone_page_keep( | |
2308 | vm_offset_t addr, | |
2309 | vm_size_t size) | |
2310 | { | |
55e303ae | 2311 | struct zone_page_table_entry *zp; |
7ddcb079 | 2312 | zone_page_index_t i, j; |
1c79356b | 2313 | |
2d21ac55 A |
2314 | #if ZONE_ALIAS_ADDR |
2315 | addr = zone_virtual_addr(addr); | |
2316 | #endif | |
1c79356b | 2317 | #if MACH_ASSERT |
55e303ae | 2318 | if (!from_zone_map(addr, size)) |
1c79356b A |
2319 | panic("zone_page_keep"); |
2320 | #endif | |
2321 | ||
7ddcb079 A |
2322 | i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); |
2323 | j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); | |
1c79356b | 2324 | |
7ddcb079 A |
2325 | for (; i <= j; i++) { |
2326 | zp = zone_page_table_lookup(i); | |
55e303ae | 2327 | zp->collect_count = 0; |
7ddcb079 | 2328 | } |
1c79356b A |
2329 | } |
2330 | ||
2331 | void | |
55e303ae | 2332 | zone_page_collect( |
1c79356b A |
2333 | vm_offset_t addr, |
2334 | vm_size_t size) | |
2335 | { | |
55e303ae | 2336 | struct zone_page_table_entry *zp; |
7ddcb079 | 2337 | zone_page_index_t i, j; |
1c79356b | 2338 | |
2d21ac55 A |
2339 | #if ZONE_ALIAS_ADDR |
2340 | addr = zone_virtual_addr(addr); | |
2341 | #endif | |
1c79356b | 2342 | #if MACH_ASSERT |
55e303ae A |
2343 | if (!from_zone_map(addr, size)) |
2344 | panic("zone_page_collect"); | |
1c79356b A |
2345 | #endif |
2346 | ||
7ddcb079 A |
2347 | i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); |
2348 | j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); | |
55e303ae | 2349 | |
7ddcb079 A |
2350 | for (; i <= j; i++) { |
2351 | zp = zone_page_table_lookup(i); | |
55e303ae | 2352 | ++zp->collect_count; |
7ddcb079 | 2353 | } |
1c79356b A |
2354 | } |
2355 | ||
2356 | void | |
2357 | zone_page_init( | |
2358 | vm_offset_t addr, | |
7ddcb079 | 2359 | vm_size_t size) |
1c79356b | 2360 | { |
55e303ae | 2361 | struct zone_page_table_entry *zp; |
7ddcb079 | 2362 | zone_page_index_t i, j; |
1c79356b | 2363 | |
2d21ac55 A |
2364 | #if ZONE_ALIAS_ADDR |
2365 | addr = zone_virtual_addr(addr); | |
2366 | #endif | |
1c79356b | 2367 | #if MACH_ASSERT |
55e303ae | 2368 | if (!from_zone_map(addr, size)) |
1c79356b A |
2369 | panic("zone_page_init"); |
2370 | #endif | |
2371 | ||
7ddcb079 A |
2372 | i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); |
2373 | j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); | |
2374 | ||
2375 | for (; i <= j; i++) { | |
2376 | /* make sure entry exists before marking unused */ | |
2377 | zone_page_table_expand(i); | |
55e303ae | 2378 | |
7ddcb079 A |
2379 | zp = zone_page_table_lookup(i); |
2380 | assert(zp); | |
2381 | zp->alloc_count = ZONE_PAGE_UNUSED; | |
55e303ae | 2382 | zp->collect_count = 0; |
1c79356b | 2383 | } |
1c79356b A |
2384 | } |
2385 | ||
2386 | void | |
2387 | zone_page_alloc( | |
2388 | vm_offset_t addr, | |
2389 | vm_size_t size) | |
2390 | { | |
55e303ae | 2391 | struct zone_page_table_entry *zp; |
7ddcb079 | 2392 | zone_page_index_t i, j; |
1c79356b | 2393 | |
2d21ac55 A |
2394 | #if ZONE_ALIAS_ADDR |
2395 | addr = zone_virtual_addr(addr); | |
2396 | #endif | |
1c79356b | 2397 | #if MACH_ASSERT |
55e303ae | 2398 | if (!from_zone_map(addr, size)) |
1c79356b A |
2399 | panic("zone_page_alloc"); |
2400 | #endif | |
2401 | ||
7ddcb079 A |
2402 | i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); |
2403 | j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); | |
2404 | ||
2405 | for (; i <= j; i++) { | |
2406 | zp = zone_page_table_lookup(i); | |
2407 | assert(zp); | |
55e303ae | 2408 | |
55e303ae | 2409 | /* |
7ddcb079 | 2410 | * Set alloc_count to ZONE_PAGE_USED if |
1c79356b A |
2411 | * it was previously set to ZONE_PAGE_UNUSED. |
2412 | */ | |
55e303ae | 2413 | if (zp->alloc_count == ZONE_PAGE_UNUSED) |
7ddcb079 A |
2414 | zp->alloc_count = ZONE_PAGE_USED; |
2415 | ||
2416 | ++zp->alloc_count; | |
1c79356b | 2417 | } |
1c79356b A |
2418 | } |
2419 | ||
2420 | void | |
55e303ae | 2421 | zone_page_free_element( |
7ddcb079 | 2422 | zone_page_index_t *free_page_list, |
1c79356b A |
2423 | vm_offset_t addr, |
2424 | vm_size_t size) | |
2425 | { | |
55e303ae | 2426 | struct zone_page_table_entry *zp; |
7ddcb079 | 2427 | zone_page_index_t i, j; |
1c79356b | 2428 | |
2d21ac55 A |
2429 | #if ZONE_ALIAS_ADDR |
2430 | addr = zone_virtual_addr(addr); | |
2431 | #endif | |
1c79356b | 2432 | #if MACH_ASSERT |
55e303ae A |
2433 | if (!from_zone_map(addr, size)) |
2434 | panic("zone_page_free_element"); | |
1c79356b A |
2435 | #endif |
2436 | ||
7ddcb079 A |
2437 | i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); |
2438 | j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); | |
2439 | ||
2440 | for (; i <= j; i++) { | |
2441 | zp = zone_page_table_lookup(i); | |
1c79356b | 2442 | |
55e303ae A |
2443 | if (zp->collect_count > 0) |
2444 | --zp->collect_count; | |
2445 | if (--zp->alloc_count == 0) { | |
7ddcb079 A |
2446 | vm_address_t free_page_address; |
2447 | ||
55e303ae A |
2448 | zp->alloc_count = ZONE_PAGE_UNUSED; |
2449 | zp->collect_count = 0; | |
1c79356b | 2450 | |
7ddcb079 A |
2451 | |
2452 | /* | |
2453 | * This element was the last one on this page, re-use the page's | |
2454 | * storage for a page freelist | |
2455 | */ | |
2456 | free_page_address = zone_map_min_address + PAGE_SIZE * ((vm_size_t)i); | |
2457 | *(zone_page_index_t *)free_page_address = *free_page_list; | |
2458 | *free_page_list = i; | |
1c79356b A |
2459 | } |
2460 | } | |
1c79356b A |
2461 | } |
2462 | ||
2463 | ||
2464 | /* This is used for walking through a zone's free element list. | |
2465 | */ | |
55e303ae A |
2466 | struct zone_free_element { |
2467 | struct zone_free_element * next; | |
1c79356b A |
2468 | }; |
2469 | ||
2d21ac55 A |
2470 | /* |
2471 | * Add a linked list of pages starting at base back into the zone | |
2472 | * free list. Tail points to the last element on the list. | |
2473 | */ | |
2474 | ||
2475 | #define ADD_LIST_TO_ZONE(zone, base, tail) \ | |
2476 | MACRO_BEGIN \ | |
2477 | (tail)->next = (void *)((zone)->free_elements); \ | |
2478 | if (check_freed_element) { \ | |
2479 | if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \ | |
2480 | ((vm_offset_t *)(tail))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \ | |
2481 | (zone)->free_elements; \ | |
2482 | } \ | |
2483 | (zone)->free_elements = (unsigned long)(base); \ | |
2484 | MACRO_END | |
2485 | ||
2486 | /* | |
2487 | * Add an element to the chain pointed to by prev. | |
2488 | */ | |
2489 | ||
2490 | #define ADD_ELEMENT(zone, prev, elem) \ | |
2491 | MACRO_BEGIN \ | |
2492 | (prev)->next = (elem); \ | |
2493 | if (check_freed_element) { \ | |
2494 | if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \ | |
2495 | ((vm_offset_t *)(prev))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \ | |
2496 | (vm_offset_t)(elem); \ | |
2497 | } \ | |
2498 | MACRO_END | |
2499 | ||
55e303ae A |
2500 | struct { |
2501 | uint32_t pgs_freed; | |
2502 | ||
2503 | uint32_t elems_collected, | |
2504 | elems_freed, | |
2505 | elems_kept; | |
2506 | } zgc_stats; | |
1c79356b A |
2507 | |
2508 | /* Zone garbage collection | |
2509 | * | |
2510 | * zone_gc will walk through all the free elements in all the | |
2511 | * zones that are marked collectable looking for reclaimable | |
2512 | * pages. zone_gc is called by consider_zone_gc when the system | |
2513 | * begins to run out of memory. | |
2514 | */ | |
2515 | void | |
2516 | zone_gc(void) | |
2517 | { | |
2518 | unsigned int max_zones; | |
55e303ae | 2519 | zone_t z; |
1c79356b | 2520 | unsigned int i; |
7ddcb079 | 2521 | zone_page_index_t zone_free_page_head; |
1c79356b | 2522 | |
b0d623f7 | 2523 | lck_mtx_lock(&zone_gc_lock); |
1c79356b | 2524 | |
1c79356b A |
2525 | simple_lock(&all_zones_lock); |
2526 | max_zones = num_zones; | |
2527 | z = first_zone; | |
2528 | simple_unlock(&all_zones_lock); | |
2529 | ||
2530 | #if MACH_ASSERT | |
7ddcb079 A |
2531 | for (i = 0; i < zone_pages; i++) { |
2532 | struct zone_page_table_entry *zp; | |
2533 | ||
2534 | zp = zone_page_table_lookup(i); | |
2535 | assert(!zp || (zp->collect_count == 0)); | |
2536 | } | |
1c79356b A |
2537 | #endif /* MACH_ASSERT */ |
2538 | ||
7ddcb079 | 2539 | zone_free_page_head = ZONE_PAGE_INDEX_INVALID; |
1c79356b A |
2540 | |
2541 | for (i = 0; i < max_zones; i++, z = z->next_zone) { | |
a3d08fcd | 2542 | unsigned int n, m; |
55e303ae | 2543 | vm_size_t elt_size, size_freed; |
a3d08fcd | 2544 | struct zone_free_element *elt, *base_elt, *base_prev, *prev, *scan, *keep, *tail; |
1c79356b A |
2545 | |
2546 | assert(z != ZONE_NULL); | |
2547 | ||
2548 | if (!z->collectable) | |
2549 | continue; | |
2550 | ||
2551 | lock_zone(z); | |
2552 | ||
55e303ae A |
2553 | elt_size = z->elem_size; |
2554 | ||
1c79356b A |
2555 | /* |
2556 | * Do a quick feasability check before we scan the zone: | |
91447636 A |
2557 | * skip unless there is likelihood of getting pages back |
2558 | * (i.e we need a whole allocation block's worth of free | |
2559 | * elements before we can garbage collect) and | |
2560 | * the zone has more than 10 percent of it's elements free | |
2d21ac55 | 2561 | * or the element size is a multiple of the PAGE_SIZE |
1c79356b | 2562 | */ |
2d21ac55 A |
2563 | if ((elt_size & PAGE_MASK) && |
2564 | (((z->cur_size - z->count * elt_size) <= (2 * z->alloc_size)) || | |
2565 | ((z->cur_size - z->count * elt_size) <= (z->cur_size / 10)))) { | |
1c79356b A |
2566 | unlock_zone(z); |
2567 | continue; | |
2568 | } | |
2569 | ||
a3d08fcd A |
2570 | z->doing_gc = TRUE; |
2571 | ||
55e303ae A |
2572 | /* |
2573 | * Snatch all of the free elements away from the zone. | |
1c79356b | 2574 | */ |
1c79356b | 2575 | |
55e303ae | 2576 | scan = (void *)z->free_elements; |
0c530ab8 | 2577 | z->free_elements = 0; |
55e303ae A |
2578 | |
2579 | unlock_zone(z); | |
2580 | ||
2581 | /* | |
2582 | * Pass 1: | |
2583 | * | |
2584 | * Determine which elements we can attempt to collect | |
2585 | * and count them up in the page table. Foreign elements | |
2586 | * are returned to the zone. | |
1c79356b | 2587 | */ |
55e303ae A |
2588 | |
2589 | prev = (void *)&scan; | |
2590 | elt = scan; | |
2591 | n = 0; tail = keep = NULL; | |
2592 | while (elt != NULL) { | |
2593 | if (from_zone_map(elt, elt_size)) { | |
2594 | zone_page_collect((vm_offset_t)elt, elt_size); | |
2595 | ||
1c79356b A |
2596 | prev = elt; |
2597 | elt = elt->next; | |
55e303ae A |
2598 | |
2599 | ++zgc_stats.elems_collected; | |
1c79356b | 2600 | } |
55e303ae A |
2601 | else { |
2602 | if (keep == NULL) | |
2603 | keep = tail = elt; | |
2d21ac55 A |
2604 | else { |
2605 | ADD_ELEMENT(z, tail, elt); | |
2606 | tail = elt; | |
2607 | } | |
55e303ae | 2608 | |
2d21ac55 A |
2609 | ADD_ELEMENT(z, prev, elt->next); |
2610 | elt = elt->next; | |
2611 | ADD_ELEMENT(z, tail, NULL); | |
1c79356b | 2612 | } |
1c79356b | 2613 | |
55e303ae A |
2614 | /* |
2615 | * Dribble back the elements we are keeping. | |
2616 | */ | |
2617 | ||
a3d08fcd A |
2618 | if (++n >= 50) { |
2619 | if (z->waiting == TRUE) { | |
7ddcb079 | 2620 | /* z->waiting checked without lock held, rechecked below after locking */ |
a3d08fcd | 2621 | lock_zone(z); |
55e303ae | 2622 | |
a3d08fcd | 2623 | if (keep != NULL) { |
2d21ac55 | 2624 | ADD_LIST_TO_ZONE(z, keep, tail); |
a3d08fcd A |
2625 | tail = keep = NULL; |
2626 | } else { | |
2627 | m =0; | |
2628 | base_elt = elt; | |
2629 | base_prev = prev; | |
2630 | while ((elt != NULL) && (++m < 50)) { | |
2631 | prev = elt; | |
2632 | elt = elt->next; | |
2633 | } | |
2634 | if (m !=0 ) { | |
2d21ac55 A |
2635 | ADD_LIST_TO_ZONE(z, base_elt, prev); |
2636 | ADD_ELEMENT(z, base_prev, elt); | |
a3d08fcd A |
2637 | prev = base_prev; |
2638 | } | |
2639 | } | |
55e303ae | 2640 | |
a3d08fcd A |
2641 | if (z->waiting) { |
2642 | z->waiting = FALSE; | |
2643 | zone_wakeup(z); | |
2644 | } | |
55e303ae | 2645 | |
a3d08fcd A |
2646 | unlock_zone(z); |
2647 | } | |
2648 | n =0; | |
55e303ae A |
2649 | } |
2650 | } | |
2651 | ||
2652 | /* | |
2653 | * Return any remaining elements. | |
2654 | */ | |
2655 | ||
2656 | if (keep != NULL) { | |
2657 | lock_zone(z); | |
2658 | ||
2d21ac55 | 2659 | ADD_LIST_TO_ZONE(z, keep, tail); |
55e303ae | 2660 | |
7ddcb079 A |
2661 | if (z->waiting) { |
2662 | z->waiting = FALSE; | |
2663 | zone_wakeup(z); | |
2664 | } | |
2665 | ||
55e303ae A |
2666 | unlock_zone(z); |
2667 | } | |
2668 | ||
2669 | /* | |
2670 | * Pass 2: | |
2671 | * | |
2672 | * Determine which pages we can reclaim and | |
2673 | * free those elements. | |
2674 | */ | |
2675 | ||
2676 | size_freed = 0; | |
55e303ae A |
2677 | elt = scan; |
2678 | n = 0; tail = keep = NULL; | |
2679 | while (elt != NULL) { | |
2680 | if (zone_page_collectable((vm_offset_t)elt, elt_size)) { | |
7ddcb079 A |
2681 | struct zone_free_element *next_elt = elt->next; |
2682 | ||
55e303ae | 2683 | size_freed += elt_size; |
7ddcb079 A |
2684 | |
2685 | /* | |
2686 | * If this is the last allocation on the page(s), | |
2687 | * we may use their storage to maintain the linked | |
2688 | * list of free-able pages. So store elt->next because | |
2689 | * "elt" may be scribbled over. | |
2690 | */ | |
2691 | zone_page_free_element(&zone_free_page_head, | |
55e303ae A |
2692 | (vm_offset_t)elt, elt_size); |
2693 | ||
7ddcb079 | 2694 | elt = next_elt; |
55e303ae A |
2695 | |
2696 | ++zgc_stats.elems_freed; | |
2697 | } | |
2698 | else { | |
2699 | zone_page_keep((vm_offset_t)elt, elt_size); | |
2700 | ||
2701 | if (keep == NULL) | |
2702 | keep = tail = elt; | |
2d21ac55 A |
2703 | else { |
2704 | ADD_ELEMENT(z, tail, elt); | |
2705 | tail = elt; | |
2706 | } | |
55e303ae | 2707 | |
2d21ac55 A |
2708 | elt = elt->next; |
2709 | ADD_ELEMENT(z, tail, NULL); | |
55e303ae A |
2710 | |
2711 | ++zgc_stats.elems_kept; | |
2712 | } | |
2713 | ||
2714 | /* | |
2715 | * Dribble back the elements we are keeping, | |
2716 | * and update the zone size info. | |
2717 | */ | |
2718 | ||
a3d08fcd | 2719 | if (++n >= 50) { |
55e303ae A |
2720 | lock_zone(z); |
2721 | ||
2722 | z->cur_size -= size_freed; | |
2723 | size_freed = 0; | |
2724 | ||
a3d08fcd | 2725 | if (keep != NULL) { |
2d21ac55 | 2726 | ADD_LIST_TO_ZONE(z, keep, tail); |
a3d08fcd A |
2727 | } |
2728 | ||
2729 | if (z->waiting) { | |
2730 | z->waiting = FALSE; | |
2731 | zone_wakeup(z); | |
2732 | } | |
55e303ae A |
2733 | |
2734 | unlock_zone(z); | |
2735 | ||
2736 | n = 0; tail = keep = NULL; | |
2737 | } | |
2738 | } | |
2739 | ||
2740 | /* | |
2741 | * Return any remaining elements, and update | |
2742 | * the zone size info. | |
2743 | */ | |
2744 | ||
a3d08fcd A |
2745 | lock_zone(z); |
2746 | ||
55e303ae | 2747 | if (size_freed > 0 || keep != NULL) { |
55e303ae A |
2748 | |
2749 | z->cur_size -= size_freed; | |
2750 | ||
2751 | if (keep != NULL) { | |
2d21ac55 | 2752 | ADD_LIST_TO_ZONE(z, keep, tail); |
55e303ae A |
2753 | } |
2754 | ||
55e303ae | 2755 | } |
a3d08fcd A |
2756 | |
2757 | z->doing_gc = FALSE; | |
2758 | if (z->waiting) { | |
2759 | z->waiting = FALSE; | |
2760 | zone_wakeup(z); | |
2761 | } | |
2762 | unlock_zone(z); | |
1c79356b A |
2763 | } |
2764 | ||
55e303ae A |
2765 | /* |
2766 | * Reclaim the pages we are freeing. | |
2767 | */ | |
1c79356b | 2768 | |
7ddcb079 A |
2769 | while (zone_free_page_head != ZONE_PAGE_INDEX_INVALID) { |
2770 | zone_page_index_t zind = zone_free_page_head; | |
2771 | vm_address_t free_page_address; | |
2d21ac55 | 2772 | #if ZONE_ALIAS_ADDR |
6d2010ae | 2773 | z = (zone_t)zone_virtual_addr((vm_map_address_t)z); |
2d21ac55 | 2774 | #endif |
7ddcb079 A |
2775 | /* Use the first word of the page about to be freed to find the next free page */ |
2776 | free_page_address = zone_map_min_address + PAGE_SIZE * ((vm_size_t)zind); | |
2777 | zone_free_page_head = *(zone_page_index_t *)free_page_address; | |
2778 | ||
2779 | kmem_free(zone_map, free_page_address, PAGE_SIZE); | |
55e303ae | 2780 | ++zgc_stats.pgs_freed; |
1c79356b | 2781 | } |
55e303ae | 2782 | |
b0d623f7 | 2783 | lck_mtx_unlock(&zone_gc_lock); |
1c79356b A |
2784 | } |
2785 | ||
1c79356b A |
2786 | /* |
2787 | * consider_zone_gc: | |
2788 | * | |
2789 | * Called by the pageout daemon when the system needs more free pages. | |
2790 | */ | |
2791 | ||
2792 | void | |
b0d623f7 | 2793 | consider_zone_gc(boolean_t force) |
1c79356b | 2794 | { |
1c79356b A |
2795 | |
2796 | if (zone_gc_allowed && | |
6d2010ae | 2797 | (zone_gc_allowed_by_time_throttle || |
b0d623f7 A |
2798 | zone_gc_forced || |
2799 | force)) { | |
0b4e3aa0 | 2800 | zone_gc_forced = FALSE; |
6d2010ae | 2801 | zone_gc_allowed_by_time_throttle = FALSE; /* reset periodically */ |
1c79356b A |
2802 | zone_gc(); |
2803 | } | |
2804 | } | |
2805 | ||
6d2010ae A |
2806 | /* |
2807 | * By default, don't attempt zone GC more frequently | |
2808 | * than once / 1 minutes. | |
2809 | */ | |
2810 | void | |
2811 | compute_zone_gc_throttle(void *arg __unused) | |
2812 | { | |
2813 | zone_gc_allowed_by_time_throttle = TRUE; | |
2814 | } | |
2d21ac55 | 2815 | |
1c79356b | 2816 | |
6d2010ae A |
2817 | kern_return_t |
2818 | task_zone_info( | |
2819 | task_t task, | |
2820 | mach_zone_name_array_t *namesp, | |
2821 | mach_msg_type_number_t *namesCntp, | |
2822 | task_zone_info_array_t *infop, | |
2823 | mach_msg_type_number_t *infoCntp) | |
2824 | { | |
2825 | mach_zone_name_t *names; | |
2826 | vm_offset_t names_addr; | |
2827 | vm_size_t names_size; | |
2828 | task_zone_info_t *info; | |
2829 | vm_offset_t info_addr; | |
2830 | vm_size_t info_size; | |
2831 | unsigned int max_zones, i; | |
2832 | zone_t z; | |
2833 | mach_zone_name_t *zn; | |
2834 | task_zone_info_t *zi; | |
2835 | kern_return_t kr; | |
2836 | ||
2837 | vm_size_t used; | |
2838 | vm_map_copy_t copy; | |
2839 | ||
2840 | ||
2841 | if (task == TASK_NULL) | |
2842 | return KERN_INVALID_TASK; | |
2843 | ||
2844 | /* | |
2845 | * We assume that zones aren't freed once allocated. | |
2846 | * We won't pick up any zones that are allocated later. | |
2847 | */ | |
2848 | ||
2849 | simple_lock(&all_zones_lock); | |
2850 | max_zones = (unsigned int)(num_zones + num_fake_zones); | |
2851 | z = first_zone; | |
2852 | simple_unlock(&all_zones_lock); | |
2853 | ||
2854 | names_size = round_page(max_zones * sizeof *names); | |
2855 | kr = kmem_alloc_pageable(ipc_kernel_map, | |
2856 | &names_addr, names_size); | |
2857 | if (kr != KERN_SUCCESS) | |
2858 | return kr; | |
2859 | names = (mach_zone_name_t *) names_addr; | |
2860 | ||
2861 | info_size = round_page(max_zones * sizeof *info); | |
2862 | kr = kmem_alloc_pageable(ipc_kernel_map, | |
2863 | &info_addr, info_size); | |
2864 | if (kr != KERN_SUCCESS) { | |
2865 | kmem_free(ipc_kernel_map, | |
2866 | names_addr, names_size); | |
2867 | return kr; | |
2868 | } | |
2869 | ||
2870 | info = (task_zone_info_t *) info_addr; | |
2871 | ||
2872 | zn = &names[0]; | |
2873 | zi = &info[0]; | |
2874 | ||
2875 | for (i = 0; i < max_zones - num_fake_zones; i++) { | |
2876 | struct zone zcopy; | |
2877 | ||
2878 | assert(z != ZONE_NULL); | |
2879 | ||
2880 | lock_zone(z); | |
2881 | zcopy = *z; | |
2882 | unlock_zone(z); | |
2883 | ||
2884 | simple_lock(&all_zones_lock); | |
2885 | z = z->next_zone; | |
2886 | simple_unlock(&all_zones_lock); | |
2887 | ||
2888 | /* assuming here the name data is static */ | |
2889 | (void) strncpy(zn->mzn_name, zcopy.zone_name, | |
2890 | sizeof zn->mzn_name); | |
2891 | zn->mzn_name[sizeof zn->mzn_name - 1] = '\0'; | |
2892 | ||
2893 | zi->tzi_count = (uint64_t)zcopy.count; | |
2894 | zi->tzi_cur_size = (uint64_t)zcopy.cur_size; | |
2895 | zi->tzi_max_size = (uint64_t)zcopy.max_size; | |
2896 | zi->tzi_elem_size = (uint64_t)zcopy.elem_size; | |
2897 | zi->tzi_alloc_size = (uint64_t)zcopy.alloc_size; | |
2898 | zi->tzi_sum_size = zcopy.sum_count * zcopy.elem_size; | |
2899 | zi->tzi_exhaustible = (uint64_t)zcopy.exhaustible; | |
2900 | zi->tzi_collectable = (uint64_t)zcopy.collectable; | |
2901 | zi->tzi_caller_acct = (uint64_t)zcopy.caller_acct; | |
2902 | if (task->tkm_zinfo != NULL) { | |
2903 | zi->tzi_task_alloc = task->tkm_zinfo[zcopy.index].alloc; | |
2904 | zi->tzi_task_free = task->tkm_zinfo[zcopy.index].free; | |
2905 | } else { | |
2906 | zi->tzi_task_alloc = 0; | |
2907 | zi->tzi_task_free = 0; | |
2908 | } | |
2909 | zn++; | |
2910 | zi++; | |
2911 | } | |
2912 | ||
2913 | /* | |
2914 | * loop through the fake zones and fill them using the specialized | |
2915 | * functions | |
2916 | */ | |
2917 | for (i = 0; i < num_fake_zones; i++) { | |
2918 | int count, collectable, exhaustible, caller_acct, index; | |
2919 | vm_size_t cur_size, max_size, elem_size, alloc_size; | |
2920 | uint64_t sum_size; | |
2921 | ||
2922 | strncpy(zn->mzn_name, fake_zones[i].name, sizeof zn->mzn_name); | |
2923 | zn->mzn_name[sizeof zn->mzn_name - 1] = '\0'; | |
2924 | fake_zones[i].query(&count, &cur_size, | |
2925 | &max_size, &elem_size, | |
2926 | &alloc_size, &sum_size, | |
2927 | &collectable, &exhaustible, &caller_acct); | |
2928 | zi->tzi_count = (uint64_t)count; | |
2929 | zi->tzi_cur_size = (uint64_t)cur_size; | |
2930 | zi->tzi_max_size = (uint64_t)max_size; | |
2931 | zi->tzi_elem_size = (uint64_t)elem_size; | |
2932 | zi->tzi_alloc_size = (uint64_t)alloc_size; | |
2933 | zi->tzi_sum_size = sum_size; | |
2934 | zi->tzi_collectable = (uint64_t)collectable; | |
2935 | zi->tzi_exhaustible = (uint64_t)exhaustible; | |
2936 | zi->tzi_caller_acct = (uint64_t)caller_acct; | |
2937 | if (task->tkm_zinfo != NULL) { | |
2938 | index = ZINFO_SLOTS - num_fake_zones + i; | |
2939 | zi->tzi_task_alloc = task->tkm_zinfo[index].alloc; | |
2940 | zi->tzi_task_free = task->tkm_zinfo[index].free; | |
2941 | } else { | |
2942 | zi->tzi_task_alloc = 0; | |
2943 | zi->tzi_task_free = 0; | |
2944 | } | |
2945 | zn++; | |
2946 | zi++; | |
2947 | } | |
2948 | ||
2949 | used = max_zones * sizeof *names; | |
2950 | if (used != names_size) | |
2951 | bzero((char *) (names_addr + used), names_size - used); | |
2952 | ||
2953 | kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr, | |
2954 | (vm_map_size_t)names_size, TRUE, ©); | |
2955 | assert(kr == KERN_SUCCESS); | |
2956 | ||
2957 | *namesp = (mach_zone_name_t *) copy; | |
2958 | *namesCntp = max_zones; | |
2959 | ||
2960 | used = max_zones * sizeof *info; | |
2961 | ||
2962 | if (used != info_size) | |
2963 | bzero((char *) (info_addr + used), info_size - used); | |
2964 | ||
2965 | kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr, | |
2966 | (vm_map_size_t)info_size, TRUE, ©); | |
2967 | assert(kr == KERN_SUCCESS); | |
2968 | ||
2969 | *infop = (task_zone_info_t *) copy; | |
2970 | *infoCntp = max_zones; | |
2971 | ||
2972 | return KERN_SUCCESS; | |
2973 | } | |
2974 | ||
2975 | kern_return_t | |
2976 | mach_zone_info( | |
2977 | host_t host, | |
2978 | mach_zone_name_array_t *namesp, | |
2979 | mach_msg_type_number_t *namesCntp, | |
2980 | mach_zone_info_array_t *infop, | |
2981 | mach_msg_type_number_t *infoCntp) | |
2982 | { | |
2983 | mach_zone_name_t *names; | |
2984 | vm_offset_t names_addr; | |
2985 | vm_size_t names_size; | |
2986 | mach_zone_info_t *info; | |
2987 | vm_offset_t info_addr; | |
2988 | vm_size_t info_size; | |
2989 | unsigned int max_zones, i; | |
2990 | zone_t z; | |
2991 | mach_zone_name_t *zn; | |
2992 | mach_zone_info_t *zi; | |
2993 | kern_return_t kr; | |
2994 | ||
2995 | vm_size_t used; | |
2996 | vm_map_copy_t copy; | |
2997 | ||
2998 | ||
2999 | if (host == HOST_NULL) | |
3000 | return KERN_INVALID_HOST; | |
3001 | ||
3002 | num_fake_zones = sizeof fake_zones / sizeof fake_zones[0]; | |
3003 | ||
3004 | /* | |
3005 | * We assume that zones aren't freed once allocated. | |
3006 | * We won't pick up any zones that are allocated later. | |
3007 | */ | |
3008 | ||
3009 | simple_lock(&all_zones_lock); | |
3010 | max_zones = (unsigned int)(num_zones + num_fake_zones); | |
3011 | z = first_zone; | |
3012 | simple_unlock(&all_zones_lock); | |
3013 | ||
3014 | names_size = round_page(max_zones * sizeof *names); | |
3015 | kr = kmem_alloc_pageable(ipc_kernel_map, | |
3016 | &names_addr, names_size); | |
3017 | if (kr != KERN_SUCCESS) | |
3018 | return kr; | |
3019 | names = (mach_zone_name_t *) names_addr; | |
3020 | ||
3021 | info_size = round_page(max_zones * sizeof *info); | |
3022 | kr = kmem_alloc_pageable(ipc_kernel_map, | |
3023 | &info_addr, info_size); | |
3024 | if (kr != KERN_SUCCESS) { | |
3025 | kmem_free(ipc_kernel_map, | |
3026 | names_addr, names_size); | |
3027 | return kr; | |
3028 | } | |
3029 | ||
3030 | info = (mach_zone_info_t *) info_addr; | |
3031 | ||
3032 | zn = &names[0]; | |
3033 | zi = &info[0]; | |
3034 | ||
3035 | for (i = 0; i < max_zones - num_fake_zones; i++) { | |
3036 | struct zone zcopy; | |
3037 | ||
3038 | assert(z != ZONE_NULL); | |
3039 | ||
3040 | lock_zone(z); | |
3041 | zcopy = *z; | |
3042 | unlock_zone(z); | |
3043 | ||
3044 | simple_lock(&all_zones_lock); | |
3045 | z = z->next_zone; | |
3046 | simple_unlock(&all_zones_lock); | |
3047 | ||
3048 | /* assuming here the name data is static */ | |
3049 | (void) strncpy(zn->mzn_name, zcopy.zone_name, | |
3050 | sizeof zn->mzn_name); | |
3051 | zn->mzn_name[sizeof zn->mzn_name - 1] = '\0'; | |
3052 | ||
3053 | zi->mzi_count = (uint64_t)zcopy.count; | |
3054 | zi->mzi_cur_size = (uint64_t)zcopy.cur_size; | |
3055 | zi->mzi_max_size = (uint64_t)zcopy.max_size; | |
3056 | zi->mzi_elem_size = (uint64_t)zcopy.elem_size; | |
3057 | zi->mzi_alloc_size = (uint64_t)zcopy.alloc_size; | |
3058 | zi->mzi_sum_size = zcopy.sum_count * zcopy.elem_size; | |
3059 | zi->mzi_exhaustible = (uint64_t)zcopy.exhaustible; | |
3060 | zi->mzi_collectable = (uint64_t)zcopy.collectable; | |
3061 | zn++; | |
3062 | zi++; | |
3063 | } | |
3064 | ||
3065 | /* | |
3066 | * loop through the fake zones and fill them using the specialized | |
3067 | * functions | |
3068 | */ | |
3069 | for (i = 0; i < num_fake_zones; i++) { | |
3070 | int count, collectable, exhaustible, caller_acct; | |
3071 | vm_size_t cur_size, max_size, elem_size, alloc_size; | |
3072 | uint64_t sum_size; | |
3073 | ||
3074 | strncpy(zn->mzn_name, fake_zones[i].name, sizeof zn->mzn_name); | |
3075 | zn->mzn_name[sizeof zn->mzn_name - 1] = '\0'; | |
3076 | fake_zones[i].query(&count, &cur_size, | |
3077 | &max_size, &elem_size, | |
3078 | &alloc_size, &sum_size, | |
3079 | &collectable, &exhaustible, &caller_acct); | |
3080 | zi->mzi_count = (uint64_t)count; | |
3081 | zi->mzi_cur_size = (uint64_t)cur_size; | |
3082 | zi->mzi_max_size = (uint64_t)max_size; | |
3083 | zi->mzi_elem_size = (uint64_t)elem_size; | |
3084 | zi->mzi_alloc_size = (uint64_t)alloc_size; | |
3085 | zi->mzi_sum_size = sum_size; | |
3086 | zi->mzi_collectable = (uint64_t)collectable; | |
3087 | zi->mzi_exhaustible = (uint64_t)exhaustible; | |
3088 | ||
3089 | zn++; | |
3090 | zi++; | |
3091 | } | |
3092 | ||
3093 | used = max_zones * sizeof *names; | |
3094 | if (used != names_size) | |
3095 | bzero((char *) (names_addr + used), names_size - used); | |
3096 | ||
3097 | kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr, | |
3098 | (vm_map_size_t)names_size, TRUE, ©); | |
3099 | assert(kr == KERN_SUCCESS); | |
3100 | ||
3101 | *namesp = (mach_zone_name_t *) copy; | |
3102 | *namesCntp = max_zones; | |
3103 | ||
3104 | used = max_zones * sizeof *info; | |
3105 | ||
3106 | if (used != info_size) | |
3107 | bzero((char *) (info_addr + used), info_size - used); | |
3108 | ||
3109 | kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr, | |
3110 | (vm_map_size_t)info_size, TRUE, ©); | |
3111 | assert(kr == KERN_SUCCESS); | |
3112 | ||
3113 | *infop = (mach_zone_info_t *) copy; | |
3114 | *infoCntp = max_zones; | |
3115 | ||
3116 | return KERN_SUCCESS; | |
3117 | } | |
3118 | ||
3119 | /* | |
3120 | * host_zone_info - LEGACY user interface for Mach zone information | |
3121 | * Should use mach_zone_info() instead! | |
3122 | */ | |
1c79356b A |
3123 | kern_return_t |
3124 | host_zone_info( | |
3125 | host_t host, | |
3126 | zone_name_array_t *namesp, | |
3127 | mach_msg_type_number_t *namesCntp, | |
3128 | zone_info_array_t *infop, | |
3129 | mach_msg_type_number_t *infoCntp) | |
3130 | { | |
3131 | zone_name_t *names; | |
3132 | vm_offset_t names_addr; | |
3133 | vm_size_t names_size; | |
3134 | zone_info_t *info; | |
3135 | vm_offset_t info_addr; | |
3136 | vm_size_t info_size; | |
3137 | unsigned int max_zones, i; | |
3138 | zone_t z; | |
3139 | zone_name_t *zn; | |
3140 | zone_info_t *zi; | |
3141 | kern_return_t kr; | |
6d2010ae A |
3142 | |
3143 | vm_size_t used; | |
3144 | vm_map_copy_t copy; | |
1c79356b | 3145 | |
b0d623f7 | 3146 | |
1c79356b A |
3147 | if (host == HOST_NULL) |
3148 | return KERN_INVALID_HOST; | |
3149 | ||
b0d623f7 A |
3150 | #if defined(__LP64__) |
3151 | if (!thread_is_64bit(current_thread())) | |
3152 | return KERN_NOT_SUPPORTED; | |
3153 | #else | |
3154 | if (thread_is_64bit(current_thread())) | |
3155 | return KERN_NOT_SUPPORTED; | |
3156 | #endif | |
3157 | ||
2d21ac55 A |
3158 | num_fake_zones = sizeof fake_zones / sizeof fake_zones[0]; |
3159 | ||
1c79356b A |
3160 | /* |
3161 | * We assume that zones aren't freed once allocated. | |
3162 | * We won't pick up any zones that are allocated later. | |
3163 | */ | |
3164 | ||
3165 | simple_lock(&all_zones_lock); | |
b0d623f7 | 3166 | max_zones = (unsigned int)(num_zones + num_fake_zones); |
1c79356b A |
3167 | z = first_zone; |
3168 | simple_unlock(&all_zones_lock); | |
3169 | ||
6d2010ae A |
3170 | names_size = round_page(max_zones * sizeof *names); |
3171 | kr = kmem_alloc_pageable(ipc_kernel_map, | |
3172 | &names_addr, names_size); | |
3173 | if (kr != KERN_SUCCESS) | |
3174 | return kr; | |
3175 | names = (zone_name_t *) names_addr; | |
3176 | ||
3177 | info_size = round_page(max_zones * sizeof *info); | |
3178 | kr = kmem_alloc_pageable(ipc_kernel_map, | |
3179 | &info_addr, info_size); | |
3180 | if (kr != KERN_SUCCESS) { | |
3181 | kmem_free(ipc_kernel_map, | |
3182 | names_addr, names_size); | |
3183 | return kr; | |
1c79356b | 3184 | } |
6d2010ae A |
3185 | |
3186 | info = (zone_info_t *) info_addr; | |
3187 | ||
1c79356b A |
3188 | zn = &names[0]; |
3189 | zi = &info[0]; | |
3190 | ||
6d2010ae | 3191 | for (i = 0; i < max_zones - num_fake_zones; i++) { |
1c79356b A |
3192 | struct zone zcopy; |
3193 | ||
3194 | assert(z != ZONE_NULL); | |
3195 | ||
3196 | lock_zone(z); | |
3197 | zcopy = *z; | |
3198 | unlock_zone(z); | |
3199 | ||
3200 | simple_lock(&all_zones_lock); | |
3201 | z = z->next_zone; | |
3202 | simple_unlock(&all_zones_lock); | |
3203 | ||
3204 | /* assuming here the name data is static */ | |
3205 | (void) strncpy(zn->zn_name, zcopy.zone_name, | |
3206 | sizeof zn->zn_name); | |
2d21ac55 | 3207 | zn->zn_name[sizeof zn->zn_name - 1] = '\0'; |
1c79356b A |
3208 | |
3209 | zi->zi_count = zcopy.count; | |
3210 | zi->zi_cur_size = zcopy.cur_size; | |
3211 | zi->zi_max_size = zcopy.max_size; | |
3212 | zi->zi_elem_size = zcopy.elem_size; | |
3213 | zi->zi_alloc_size = zcopy.alloc_size; | |
3214 | zi->zi_exhaustible = zcopy.exhaustible; | |
3215 | zi->zi_collectable = zcopy.collectable; | |
3216 | ||
3217 | zn++; | |
3218 | zi++; | |
3219 | } | |
0c530ab8 | 3220 | |
2d21ac55 A |
3221 | /* |
3222 | * loop through the fake zones and fill them using the specialized | |
3223 | * functions | |
3224 | */ | |
3225 | for (i = 0; i < num_fake_zones; i++) { | |
6d2010ae A |
3226 | int caller_acct; |
3227 | uint64_t sum_space; | |
2d21ac55 A |
3228 | strncpy(zn->zn_name, fake_zones[i].name, sizeof zn->zn_name); |
3229 | zn->zn_name[sizeof zn->zn_name - 1] = '\0'; | |
6d2010ae A |
3230 | fake_zones[i].query(&zi->zi_count, &zi->zi_cur_size, |
3231 | &zi->zi_max_size, &zi->zi_elem_size, | |
3232 | &zi->zi_alloc_size, &sum_space, | |
3233 | &zi->zi_collectable, &zi->zi_exhaustible, &caller_acct); | |
2d21ac55 A |
3234 | zn++; |
3235 | zi++; | |
3236 | } | |
1c79356b | 3237 | |
6d2010ae A |
3238 | used = max_zones * sizeof *names; |
3239 | if (used != names_size) | |
3240 | bzero((char *) (names_addr + used), names_size - used); | |
1c79356b | 3241 | |
6d2010ae A |
3242 | kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr, |
3243 | (vm_map_size_t)names_size, TRUE, ©); | |
3244 | assert(kr == KERN_SUCCESS); | |
1c79356b | 3245 | |
6d2010ae | 3246 | *namesp = (zone_name_t *) copy; |
1c79356b A |
3247 | *namesCntp = max_zones; |
3248 | ||
6d2010ae A |
3249 | used = max_zones * sizeof *info; |
3250 | if (used != info_size) | |
3251 | bzero((char *) (info_addr + used), info_size - used); | |
1c79356b | 3252 | |
6d2010ae A |
3253 | kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr, |
3254 | (vm_map_size_t)info_size, TRUE, ©); | |
3255 | assert(kr == KERN_SUCCESS); | |
1c79356b | 3256 | |
6d2010ae | 3257 | *infop = (zone_info_t *) copy; |
1c79356b A |
3258 | *infoCntp = max_zones; |
3259 | ||
3260 | return KERN_SUCCESS; | |
3261 | } | |
3262 | ||
b0d623f7 | 3263 | extern unsigned int stack_total; |
6d2010ae | 3264 | extern unsigned long long stack_allocs; |
b0d623f7 A |
3265 | |
3266 | #if defined(__i386__) || defined (__x86_64__) | |
3267 | extern unsigned int inuse_ptepages_count; | |
6d2010ae | 3268 | extern long long alloc_ptepages_count; |
b0d623f7 A |
3269 | #endif |
3270 | ||
3271 | void zone_display_zprint() | |
3272 | { | |
3273 | unsigned int i; | |
3274 | zone_t the_zone; | |
3275 | ||
3276 | if(first_zone!=NULL) { | |
3277 | the_zone = first_zone; | |
3278 | for (i = 0; i < num_zones; i++) { | |
3279 | if(the_zone->cur_size > (1024*1024)) { | |
3280 | printf("%.20s:\t%lu\n",the_zone->zone_name,(uintptr_t)the_zone->cur_size); | |
3281 | } | |
3282 | ||
3283 | if(the_zone->next_zone == NULL) { | |
3284 | break; | |
3285 | } | |
3286 | ||
3287 | the_zone = the_zone->next_zone; | |
3288 | } | |
3289 | } | |
3290 | ||
3291 | printf("Kernel Stacks:\t%lu\n",(uintptr_t)(kernel_stack_size * stack_total)); | |
3292 | ||
3293 | #if defined(__i386__) || defined (__x86_64__) | |
3294 | printf("PageTables:\t%lu\n",(uintptr_t)(PAGE_SIZE * inuse_ptepages_count)); | |
3295 | #endif | |
3296 | ||
3297 | printf("Kalloc.Large:\t%lu\n",(uintptr_t)kalloc_large_total); | |
3298 | } | |
3299 | ||
3300 | ||
3301 | ||
1c79356b A |
3302 | #if MACH_KDB |
3303 | #include <ddb/db_command.h> | |
3304 | #include <ddb/db_output.h> | |
3305 | #include <kern/kern_print.h> | |
3306 | ||
3307 | const char *zone_labels = | |
3308 | "ENTRY COUNT TOT_SZ MAX_SZ ELT_SZ ALLOC_SZ NAME"; | |
3309 | ||
3310 | /* Forwards */ | |
3311 | void db_print_zone( | |
3312 | zone_t addr); | |
3313 | ||
3314 | #if ZONE_DEBUG | |
3315 | void db_zone_check_active( | |
3316 | zone_t zone); | |
3317 | void db_zone_print_active( | |
3318 | zone_t zone); | |
3319 | #endif /* ZONE_DEBUG */ | |
3320 | void db_zone_print_free( | |
3321 | zone_t zone); | |
3322 | void | |
3323 | db_print_zone( | |
3324 | zone_t addr) | |
3325 | { | |
3326 | struct zone zcopy; | |
3327 | ||
3328 | zcopy = *addr; | |
3329 | ||
3330 | db_printf("%8x %8x %8x %8x %6x %8x %s ", | |
3331 | addr, zcopy.count, zcopy.cur_size, | |
3332 | zcopy.max_size, zcopy.elem_size, | |
3333 | zcopy.alloc_size, zcopy.zone_name); | |
3334 | if (zcopy.exhaustible) | |
3335 | db_printf("H"); | |
3336 | if (zcopy.collectable) | |
3337 | db_printf("C"); | |
3338 | if (zcopy.expandable) | |
3339 | db_printf("X"); | |
6d2010ae A |
3340 | if (zcopy.caller_acct) |
3341 | db_printf("A"); | |
1c79356b A |
3342 | db_printf("\n"); |
3343 | } | |
3344 | ||
3345 | /*ARGSUSED*/ | |
3346 | void | |
2d21ac55 A |
3347 | db_show_one_zone(db_expr_t addr, boolean_t have_addr, |
3348 | __unused db_expr_t count, __unused char *modif) | |
1c79356b | 3349 | { |
91447636 | 3350 | struct zone *z = (zone_t)((char *)0 + addr); |
1c79356b A |
3351 | |
3352 | if (z == ZONE_NULL || !have_addr){ | |
3353 | db_error("No Zone\n"); | |
3354 | /*NOTREACHED*/ | |
3355 | } | |
3356 | ||
3357 | db_printf("%s\n", zone_labels); | |
3358 | db_print_zone(z); | |
3359 | } | |
3360 | ||
3361 | /*ARGSUSED*/ | |
3362 | void | |
2d21ac55 A |
3363 | db_show_all_zones(__unused db_expr_t addr, boolean_t have_addr, db_expr_t count, |
3364 | __unused char *modif) | |
1c79356b A |
3365 | { |
3366 | zone_t z; | |
3367 | unsigned total = 0; | |
3368 | ||
3369 | /* | |
3370 | * Don't risk hanging by unconditionally locking, | |
3371 | * risk of incoherent data is small (zones aren't freed). | |
3372 | */ | |
3373 | have_addr = simple_lock_try(&all_zones_lock); | |
3374 | count = num_zones; | |
3375 | z = first_zone; | |
3376 | if (have_addr) { | |
3377 | simple_unlock(&all_zones_lock); | |
3378 | } | |
3379 | ||
3380 | db_printf("%s\n", zone_labels); | |
3381 | for ( ; count > 0; count--) { | |
3382 | if (!z) { | |
3383 | db_error("Mangled Zone List\n"); | |
3384 | /*NOTREACHED*/ | |
3385 | } | |
3386 | db_print_zone(z); | |
3387 | total += z->cur_size, | |
3388 | ||
3389 | have_addr = simple_lock_try(&all_zones_lock); | |
3390 | z = z->next_zone; | |
3391 | if (have_addr) { | |
3392 | simple_unlock(&all_zones_lock); | |
3393 | } | |
3394 | } | |
3395 | db_printf("\nTotal %8x", total); | |
55e303ae | 3396 | db_printf("\n\nzone_gc() has reclaimed %d pages\n", zgc_stats.pgs_freed); |
1c79356b A |
3397 | } |
3398 | ||
3399 | #if ZONE_DEBUG | |
3400 | void | |
3401 | db_zone_check_active( | |
3402 | zone_t zone) | |
3403 | { | |
3404 | int count = 0; | |
3405 | queue_t tmp_elem; | |
3406 | ||
3407 | if (!zone_debug_enabled(zone) || !zone_check) | |
3408 | return; | |
3409 | tmp_elem = queue_first(&zone->active_zones); | |
3410 | while (count < zone->count) { | |
3411 | count++; | |
3412 | if (tmp_elem == 0) { | |
2d21ac55 | 3413 | printf("unexpected zero element, zone=%p, count=%d\n", |
1c79356b A |
3414 | zone, count); |
3415 | assert(FALSE); | |
3416 | break; | |
3417 | } | |
3418 | if (queue_end(tmp_elem, &zone->active_zones)) { | |
2d21ac55 | 3419 | printf("unexpected queue_end, zone=%p, count=%d\n", |
1c79356b A |
3420 | zone, count); |
3421 | assert(FALSE); | |
3422 | break; | |
3423 | } | |
3424 | tmp_elem = queue_next(tmp_elem); | |
3425 | } | |
3426 | if (!queue_end(tmp_elem, &zone->active_zones)) { | |
2d21ac55 | 3427 | printf("not at queue_end, zone=%p, tmp_elem=%p\n", |
1c79356b A |
3428 | zone, tmp_elem); |
3429 | assert(FALSE); | |
3430 | } | |
3431 | } | |
3432 | ||
3433 | void | |
3434 | db_zone_print_active( | |
3435 | zone_t zone) | |
3436 | { | |
3437 | int count = 0; | |
3438 | queue_t tmp_elem; | |
3439 | ||
3440 | if (!zone_debug_enabled(zone)) { | |
2d21ac55 | 3441 | printf("zone %p debug not enabled\n", zone); |
1c79356b A |
3442 | return; |
3443 | } | |
3444 | if (!zone_check) { | |
3445 | printf("zone_check FALSE\n"); | |
3446 | return; | |
3447 | } | |
3448 | ||
2d21ac55 | 3449 | printf("zone %p, active elements %d\n", zone, zone->count); |
1c79356b A |
3450 | printf("active list:\n"); |
3451 | tmp_elem = queue_first(&zone->active_zones); | |
3452 | while (count < zone->count) { | |
2d21ac55 | 3453 | printf(" %p", tmp_elem); |
1c79356b A |
3454 | count++; |
3455 | if ((count % 6) == 0) | |
3456 | printf("\n"); | |
3457 | if (tmp_elem == 0) { | |
3458 | printf("\nunexpected zero element, count=%d\n", count); | |
3459 | break; | |
3460 | } | |
3461 | if (queue_end(tmp_elem, &zone->active_zones)) { | |
3462 | printf("\nunexpected queue_end, count=%d\n", count); | |
3463 | break; | |
3464 | } | |
3465 | tmp_elem = queue_next(tmp_elem); | |
3466 | } | |
3467 | if (!queue_end(tmp_elem, &zone->active_zones)) | |
2d21ac55 | 3468 | printf("\nnot at queue_end, tmp_elem=%p\n", tmp_elem); |
1c79356b A |
3469 | else |
3470 | printf("\n"); | |
3471 | } | |
3472 | #endif /* ZONE_DEBUG */ | |
3473 | ||
3474 | void | |
3475 | db_zone_print_free( | |
3476 | zone_t zone) | |
3477 | { | |
3478 | int count = 0; | |
3479 | int freecount; | |
3480 | vm_offset_t elem; | |
3481 | ||
3482 | freecount = zone_free_count(zone); | |
2d21ac55 | 3483 | printf("zone %p, free elements %d\n", zone, freecount); |
1c79356b A |
3484 | printf("free list:\n"); |
3485 | elem = zone->free_elements; | |
3486 | while (count < freecount) { | |
3487 | printf(" 0x%x", elem); | |
3488 | count++; | |
3489 | if ((count % 6) == 0) | |
3490 | printf("\n"); | |
3491 | if (elem == 0) { | |
3492 | printf("\nunexpected zero element, count=%d\n", count); | |
3493 | break; | |
3494 | } | |
3495 | elem = *((vm_offset_t *)elem); | |
3496 | } | |
3497 | if (elem != 0) | |
3498 | printf("\nnot at end of free list, elem=0x%x\n", elem); | |
3499 | else | |
3500 | printf("\n"); | |
3501 | } | |
3502 | ||
3503 | #endif /* MACH_KDB */ | |
3504 | ||
3505 | ||
3506 | #if ZONE_DEBUG | |
3507 | ||
3508 | /* should we care about locks here ? */ | |
3509 | ||
3510 | #if MACH_KDB | |
91447636 | 3511 | void * |
1c79356b A |
3512 | next_element( |
3513 | zone_t z, | |
91447636 | 3514 | void *prev) |
1c79356b | 3515 | { |
91447636 A |
3516 | char *elt = (char *)prev; |
3517 | ||
1c79356b | 3518 | if (!zone_debug_enabled(z)) |
2d21ac55 | 3519 | return(NULL); |
55e303ae | 3520 | elt -= ZONE_DEBUG_OFFSET; |
91447636 | 3521 | elt = (char *) queue_next((queue_t) elt); |
1c79356b | 3522 | if ((queue_t) elt == &z->active_zones) |
2d21ac55 | 3523 | return(NULL); |
55e303ae | 3524 | elt += ZONE_DEBUG_OFFSET; |
1c79356b A |
3525 | return(elt); |
3526 | } | |
3527 | ||
91447636 | 3528 | void * |
1c79356b A |
3529 | first_element( |
3530 | zone_t z) | |
3531 | { | |
91447636 | 3532 | char *elt; |
1c79356b A |
3533 | |
3534 | if (!zone_debug_enabled(z)) | |
2d21ac55 | 3535 | return(NULL); |
1c79356b | 3536 | if (queue_empty(&z->active_zones)) |
2d21ac55 | 3537 | return(NULL); |
91447636 | 3538 | elt = (char *)queue_first(&z->active_zones); |
55e303ae | 3539 | elt += ZONE_DEBUG_OFFSET; |
1c79356b A |
3540 | return(elt); |
3541 | } | |
3542 | ||
3543 | /* | |
3544 | * Second arg controls how many zone elements are printed: | |
3545 | * 0 => none | |
3546 | * n, n < 0 => all | |
3547 | * n, n > 0 => last n on active list | |
3548 | */ | |
3549 | int | |
3550 | zone_count( | |
3551 | zone_t z, | |
3552 | int tail) | |
3553 | { | |
91447636 | 3554 | void *elt; |
1c79356b A |
3555 | int count = 0; |
3556 | boolean_t print = (tail != 0); | |
3557 | ||
3558 | if (tail < 0) | |
3559 | tail = z->count; | |
3560 | if (z->count < tail) | |
3561 | tail = 0; | |
3562 | tail = z->count - tail; | |
3563 | for (elt = first_element(z); elt; elt = next_element(z, elt)) { | |
3564 | if (print && tail <= count) | |
3565 | db_printf("%8x\n", elt); | |
3566 | count++; | |
3567 | } | |
3568 | assert(count == z->count); | |
3569 | return(count); | |
3570 | } | |
3571 | #endif /* MACH_KDB */ | |
3572 | ||
3573 | #define zone_in_use(z) ( z->count || z->free_elements ) | |
3574 | ||
3575 | void | |
3576 | zone_debug_enable( | |
3577 | zone_t z) | |
3578 | { | |
3579 | if (zone_debug_enabled(z) || zone_in_use(z) || | |
55e303ae | 3580 | z->alloc_size < (z->elem_size + ZONE_DEBUG_OFFSET)) |
1c79356b A |
3581 | return; |
3582 | queue_init(&z->active_zones); | |
55e303ae | 3583 | z->elem_size += ZONE_DEBUG_OFFSET; |
1c79356b A |
3584 | } |
3585 | ||
3586 | void | |
3587 | zone_debug_disable( | |
3588 | zone_t z) | |
3589 | { | |
3590 | if (!zone_debug_enabled(z) || zone_in_use(z)) | |
3591 | return; | |
55e303ae | 3592 | z->elem_size -= ZONE_DEBUG_OFFSET; |
2d21ac55 | 3593 | z->active_zones.next = z->active_zones.prev = NULL; |
1c79356b | 3594 | } |
b0d623f7 A |
3595 | |
3596 | ||
1c79356b | 3597 | #endif /* ZONE_DEBUG */ |