]>
Commit | Line | Data |
---|---|---|
1c79356b A |
1 | /* |
2 | * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * The contents of this file constitute Original Code as defined in and | |
7 | * are subject to the Apple Public Source License Version 1.1 (the | |
8 | * "License"). You may not use this file except in compliance with the | |
9 | * License. Please obtain a copy of the License at | |
10 | * http://www.apple.com/publicsource and read it before using this file. | |
11 | * | |
12 | * This Original Code and all software distributed under the License are | |
13 | * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
14 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
15 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the | |
17 | * License for the specific language governing rights and limitations | |
18 | * under the License. | |
19 | * | |
20 | * @APPLE_LICENSE_HEADER_END@ | |
21 | */ | |
22 | /* | |
23 | * @OSF_COPYRIGHT@ | |
24 | */ | |
25 | /* | |
26 | * Mach Operating System | |
27 | * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University | |
28 | * All Rights Reserved. | |
29 | * | |
30 | * Permission to use, copy, modify and distribute this software and its | |
31 | * documentation is hereby granted, provided that both the copyright | |
32 | * notice and this permission notice appear in all copies of the | |
33 | * software, derivative works or modified versions, and any portions | |
34 | * thereof, and that both notices appear in supporting documentation. | |
35 | * | |
36 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
37 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR | |
38 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
39 | * | |
40 | * Carnegie Mellon requests users of this software to return to | |
41 | * | |
42 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
43 | * School of Computer Science | |
44 | * Carnegie Mellon University | |
45 | * Pittsburgh PA 15213-3890 | |
46 | * | |
47 | * any improvements or extensions that they make and grant Carnegie Mellon | |
48 | * the rights to redistribute these changes. | |
49 | */ | |
50 | /* | |
51 | */ | |
52 | /* | |
53 | * File: kern/zalloc.c | |
54 | * Author: Avadis Tevanian, Jr. | |
55 | * | |
56 | * Zone-based memory allocator. A zone is a collection of fixed size | |
57 | * data blocks for which quick allocation/deallocation is possible. | |
58 | */ | |
59 | #include <zone_debug.h> | |
60 | #include <norma_vm.h> | |
61 | #include <mach_kdb.h> | |
62 | #include <kern/ast.h> | |
63 | #include <kern/assert.h> | |
64 | #include <kern/macro_help.h> | |
65 | #include <kern/sched.h> | |
66 | #include <kern/lock.h> | |
67 | #include <kern/sched_prim.h> | |
68 | #include <kern/misc_protos.h> | |
0b4e3aa0 | 69 | #include <kern/thread_call.h> |
1c79356b A |
70 | #include <kern/zalloc.h> |
71 | #include <mach/vm_param.h> | |
72 | #include <vm/vm_kern.h> | |
73 | #include <machine/machparam.h> | |
74 | ||
75 | ||
76 | #if MACH_ASSERT | |
77 | /* Detect use of zone elt after freeing it by two methods: | |
78 | * (1) Range-check the free-list "next" ptr for sanity. | |
79 | * (2) Store the ptr in two different words, and compare them against | |
80 | * each other when re-using the zone elt, to detect modifications; | |
81 | */ | |
82 | ||
83 | #if defined(__alpha) | |
84 | ||
85 | #define is_kernel_data_addr(a) \ | |
86 | (!(a) || IS_SYS_VA(a) && !((a) & (sizeof(long)-1))) | |
87 | ||
88 | #else /* !defined(__alpha) */ | |
89 | ||
90 | #define is_kernel_data_addr(a) \ | |
91 | (!(a) || (a) >= VM_MIN_KERNEL_ADDRESS && !((a) & 0x3)) | |
92 | ||
93 | #endif /* defined(__alpha) */ | |
94 | ||
95 | /* Should we set all words of the zone element to an illegal address | |
96 | * when it is freed, to help catch usage after freeing? The down-side | |
97 | * is that this obscures the identity of the freed element. | |
98 | */ | |
99 | boolean_t zfree_clear = FALSE; | |
100 | ||
101 | #define ADD_TO_ZONE(zone, element) \ | |
102 | MACRO_BEGIN \ | |
103 | if (zfree_clear) \ | |
104 | { int i; \ | |
105 | for (i=1; \ | |
106 | i < zone->elem_size/sizeof(vm_offset_t) - 1; \ | |
107 | i++) \ | |
108 | ((vm_offset_t *)(element))[i] = 0xdeadbeef; \ | |
109 | } \ | |
110 | ((vm_offset_t *)(element))[0] = (zone)->free_elements; \ | |
111 | (zone)->free_elements = (vm_offset_t) (element); \ | |
112 | (zone)->count--; \ | |
113 | MACRO_END | |
114 | ||
115 | #define REMOVE_FROM_ZONE(zone, ret, type) \ | |
116 | MACRO_BEGIN \ | |
117 | (ret) = (type) (zone)->free_elements; \ | |
118 | if ((ret) != (type) 0) { \ | |
119 | if (!is_kernel_data_addr(((vm_offset_t *)(ret))[0])) { \ | |
120 | panic("A freed zone element has been modified.\n"); \ | |
121 | } \ | |
122 | (zone)->count++; \ | |
123 | (zone)->free_elements = *((vm_offset_t *)(ret)); \ | |
124 | } \ | |
125 | MACRO_END | |
126 | #else /* MACH_ASSERT */ | |
127 | ||
128 | #define ADD_TO_ZONE(zone, element) \ | |
129 | MACRO_BEGIN \ | |
130 | *((vm_offset_t *)(element)) = (zone)->free_elements; \ | |
131 | (zone)->free_elements = (vm_offset_t) (element); \ | |
132 | (zone)->count--; \ | |
133 | MACRO_END | |
134 | ||
135 | #define REMOVE_FROM_ZONE(zone, ret, type) \ | |
136 | MACRO_BEGIN \ | |
137 | (ret) = (type) (zone)->free_elements; \ | |
138 | if ((ret) != (type) 0) { \ | |
139 | (zone)->count++; \ | |
140 | (zone)->free_elements = *((vm_offset_t *)(ret)); \ | |
141 | } \ | |
142 | MACRO_END | |
143 | ||
144 | #endif /* MACH_ASSERT */ | |
145 | ||
146 | #if ZONE_DEBUG | |
147 | #define zone_debug_enabled(z) z->active_zones.next | |
148 | #endif /* ZONE_DEBUG */ | |
149 | ||
150 | /* | |
151 | * Support for garbage collection of unused zone pages: | |
152 | */ | |
153 | ||
154 | struct zone_page_table_entry { | |
155 | struct zone_page_table_entry *next; | |
156 | short in_free_list; | |
157 | short alloc_count; | |
158 | }; | |
159 | ||
160 | extern struct zone_page_table_entry * zone_page_table; | |
161 | ||
162 | #define lock_zone_page_table() simple_lock(&zone_page_table_lock) | |
163 | #define unlock_zone_page_table() simple_unlock(&zone_page_table_lock) | |
164 | ||
165 | #define zone_page(addr) \ | |
166 | (&(zone_page_table[(atop(((vm_offset_t)addr) - zone_map_min_address))])) | |
167 | ||
168 | /* Forwards */ | |
169 | void zone_page_init( | |
170 | vm_offset_t addr, | |
171 | vm_size_t size, | |
172 | int value); | |
173 | ||
174 | void zone_page_alloc( | |
175 | vm_offset_t addr, | |
176 | vm_size_t size); | |
177 | ||
178 | void zone_add_free_page_list( | |
179 | struct zone_page_table_entry **free_list, | |
180 | vm_offset_t addr, | |
181 | vm_size_t size); | |
182 | void zone_page_dealloc( | |
183 | vm_offset_t addr, | |
184 | vm_size_t size); | |
185 | ||
186 | void zone_page_in_use( | |
187 | vm_offset_t addr, | |
188 | vm_size_t size); | |
189 | ||
190 | void zone_page_free( | |
191 | vm_offset_t addr, | |
192 | vm_size_t size); | |
193 | ||
194 | boolean_t zone_page_collectable( | |
195 | vm_offset_t addr, | |
196 | vm_size_t size); | |
197 | ||
198 | void zone_page_keep( | |
199 | vm_offset_t addr, | |
200 | vm_size_t size); | |
201 | ||
0b4e3aa0 A |
202 | void zalloc_async( |
203 | thread_call_param_t p0, | |
204 | thread_call_param_t p1); | |
205 | ||
206 | ||
1c79356b A |
207 | #if ZONE_DEBUG && MACH_KDB |
208 | int zone_count( | |
209 | zone_t z, | |
210 | int tail); | |
211 | #endif /* ZONE_DEBUG && MACH_KDB */ | |
212 | ||
213 | vm_map_t zone_map = VM_MAP_NULL; | |
214 | ||
215 | zone_t zone_zone = ZONE_NULL; /* the zone containing other zones */ | |
216 | ||
217 | /* | |
218 | * The VM system gives us an initial chunk of memory. | |
219 | * It has to be big enough to allocate the zone_zone | |
220 | */ | |
221 | ||
222 | vm_offset_t zdata; | |
223 | vm_size_t zdata_size; | |
224 | ||
225 | #define lock_zone(zone) \ | |
226 | MACRO_BEGIN \ | |
9bccf70c | 227 | simple_lock(&(zone)->lock); \ |
1c79356b A |
228 | MACRO_END |
229 | ||
230 | #define unlock_zone(zone) \ | |
231 | MACRO_BEGIN \ | |
9bccf70c | 232 | simple_unlock(&(zone)->lock); \ |
1c79356b A |
233 | MACRO_END |
234 | ||
9bccf70c A |
235 | #define zone_wakeup(zone) thread_wakeup((event_t)(zone)) |
236 | #define zone_sleep(zone) \ | |
237 | thread_sleep_simple_lock((event_t)(zone), \ | |
238 | &(zone)->lock, \ | |
239 | THREAD_UNINT) | |
240 | ||
1c79356b A |
241 | #define lock_zone_init(zone) \ |
242 | MACRO_BEGIN \ | |
243 | simple_lock_init(&zone->lock, ETAP_MISC_ZONE); \ | |
244 | MACRO_END | |
245 | ||
246 | #define lock_try_zone(zone) simple_lock_try(&zone->lock) | |
247 | ||
248 | kern_return_t zget_space( | |
249 | vm_offset_t size, | |
250 | vm_offset_t *result); | |
251 | ||
252 | decl_simple_lock_data(,zget_space_lock) | |
253 | vm_offset_t zalloc_next_space; | |
254 | vm_offset_t zalloc_end_of_space; | |
255 | vm_size_t zalloc_wasted_space; | |
256 | ||
257 | /* | |
258 | * Garbage collection map information | |
259 | */ | |
260 | decl_simple_lock_data(, zone_page_table_lock) | |
261 | struct zone_page_table_entry * zone_page_table; | |
262 | vm_offset_t zone_map_min_address; | |
263 | vm_offset_t zone_map_max_address; | |
264 | integer_t zone_pages; | |
265 | ||
266 | /* | |
267 | * Exclude more than one concurrent garbage collection | |
268 | */ | |
269 | decl_mutex_data(, zone_gc_lock) | |
270 | ||
271 | #define from_zone_map(addr) \ | |
272 | ((vm_offset_t)(addr) >= zone_map_min_address && \ | |
273 | (vm_offset_t)(addr) < zone_map_max_address) | |
274 | ||
275 | #define ZONE_PAGE_USED 0 | |
276 | #define ZONE_PAGE_UNUSED -1 | |
277 | ||
278 | ||
279 | /* | |
280 | * Protects first_zone, last_zone, num_zones, | |
281 | * and the next_zone field of zones. | |
282 | */ | |
283 | decl_simple_lock_data(, all_zones_lock) | |
284 | zone_t first_zone; | |
285 | zone_t *last_zone; | |
286 | int num_zones; | |
287 | ||
0b4e3aa0 A |
288 | boolean_t zone_gc_allowed = TRUE; |
289 | boolean_t zone_gc_forced = FALSE; | |
290 | unsigned zone_gc_last_tick = 0; | |
291 | unsigned zone_gc_max_rate = 0; /* in ticks */ | |
292 | ||
293 | ||
1c79356b A |
294 | /* |
295 | * zinit initializes a new zone. The zone data structures themselves | |
296 | * are stored in a zone, which is initially a static structure that | |
297 | * is initialized by zone_init. | |
298 | */ | |
299 | zone_t | |
300 | zinit( | |
301 | vm_size_t size, /* the size of an element */ | |
302 | vm_size_t max, /* maximum memory to use */ | |
303 | vm_size_t alloc, /* allocation size */ | |
304 | char *name) /* a name for the zone */ | |
305 | { | |
306 | zone_t z; | |
307 | ||
308 | if (zone_zone == ZONE_NULL) { | |
309 | if (zget_space(sizeof(struct zone), (vm_offset_t *)&z) | |
310 | != KERN_SUCCESS) | |
311 | return(ZONE_NULL); | |
312 | } else | |
313 | z = (zone_t) zalloc(zone_zone); | |
314 | if (z == ZONE_NULL) | |
315 | return(ZONE_NULL); | |
316 | ||
317 | /* | |
318 | * Round off all the parameters appropriately. | |
319 | */ | |
320 | if (size < sizeof(z->free_elements)) | |
321 | size = sizeof(z->free_elements); | |
322 | size = ((size-1) + sizeof(z->free_elements)) - | |
323 | ((size-1) % sizeof(z->free_elements)); | |
324 | if (alloc == 0) | |
325 | alloc = PAGE_SIZE; | |
326 | alloc = round_page(alloc); | |
327 | max = round_page(max); | |
328 | /* | |
329 | * We look for an allocation size with least fragmentation | |
330 | * in the range of 1 - 5 pages. This size will be used unless | |
331 | * the user suggestion is larger AND has less fragmentation | |
332 | */ | |
333 | { vm_size_t best, waste; unsigned int i; | |
334 | best = PAGE_SIZE; | |
335 | waste = best % size; | |
336 | for (i = 2; i <= 5; i++){ vm_size_t tsize, twaste; | |
337 | tsize = i * PAGE_SIZE; | |
338 | twaste = tsize % size; | |
339 | if (twaste < waste) | |
340 | best = tsize, waste = twaste; | |
341 | } | |
342 | if (alloc <= best || (alloc % size >= waste)) | |
343 | alloc = best; | |
344 | } | |
345 | if (max && (max < alloc)) | |
346 | max = alloc; | |
347 | ||
348 | z->free_elements = 0; | |
349 | z->cur_size = 0; | |
350 | z->max_size = max; | |
351 | z->elem_size = size; | |
352 | z->alloc_size = alloc; | |
353 | z->zone_name = name; | |
354 | z->count = 0; | |
355 | z->doing_alloc = FALSE; | |
356 | z->exhaustible = FALSE; | |
357 | z->collectable = TRUE; | |
358 | z->allows_foreign = FALSE; | |
359 | z->expandable = TRUE; | |
360 | z->waiting = FALSE; | |
0b4e3aa0 | 361 | z->async_pending = FALSE; |
1c79356b A |
362 | |
363 | #if ZONE_DEBUG | |
364 | z->active_zones.next = z->active_zones.prev = 0; | |
365 | zone_debug_enable(z); | |
366 | #endif /* ZONE_DEBUG */ | |
367 | lock_zone_init(z); | |
368 | ||
369 | /* | |
370 | * Add the zone to the all-zones list. | |
371 | */ | |
372 | ||
373 | z->next_zone = ZONE_NULL; | |
0b4e3aa0 | 374 | thread_call_setup(&z->call_async_alloc, zalloc_async, z); |
1c79356b A |
375 | simple_lock(&all_zones_lock); |
376 | *last_zone = z; | |
377 | last_zone = &z->next_zone; | |
378 | num_zones++; | |
379 | simple_unlock(&all_zones_lock); | |
380 | ||
381 | return(z); | |
382 | } | |
383 | ||
384 | /* | |
385 | * Cram the given memory into the specified zone. | |
386 | */ | |
387 | void | |
388 | zcram( | |
389 | register zone_t zone, | |
390 | vm_offset_t newmem, | |
391 | vm_size_t size) | |
392 | { | |
393 | register vm_size_t elem_size; | |
394 | ||
395 | /* Basic sanity checks */ | |
396 | assert(zone != ZONE_NULL && newmem != (vm_offset_t)0); | |
397 | assert(!zone->collectable || zone->allows_foreign | |
398 | || (from_zone_map(newmem) && from_zone_map(newmem+size-1))); | |
399 | ||
400 | elem_size = zone->elem_size; | |
401 | ||
402 | lock_zone(zone); | |
403 | while (size >= elem_size) { | |
404 | ADD_TO_ZONE(zone, newmem); | |
405 | if (from_zone_map(newmem)) | |
406 | zone_page_alloc(newmem, elem_size); | |
407 | zone->count++; /* compensate for ADD_TO_ZONE */ | |
408 | size -= elem_size; | |
409 | newmem += elem_size; | |
410 | zone->cur_size += elem_size; | |
411 | } | |
412 | unlock_zone(zone); | |
413 | } | |
414 | ||
415 | /* | |
416 | * Contiguous space allocator for non-paged zones. Allocates "size" amount | |
417 | * of memory from zone_map. | |
418 | */ | |
419 | ||
420 | kern_return_t | |
421 | zget_space( | |
422 | vm_offset_t size, | |
423 | vm_offset_t *result) | |
424 | { | |
425 | vm_offset_t new_space = 0; | |
426 | vm_size_t space_to_add; | |
427 | ||
428 | simple_lock(&zget_space_lock); | |
429 | while ((zalloc_next_space + size) > zalloc_end_of_space) { | |
430 | /* | |
431 | * Add at least one page to allocation area. | |
432 | */ | |
433 | ||
434 | space_to_add = round_page(size); | |
435 | ||
436 | if (new_space == 0) { | |
437 | kern_return_t retval; | |
438 | /* | |
439 | * Memory cannot be wired down while holding | |
440 | * any locks that the pageout daemon might | |
441 | * need to free up pages. [Making the zget_space | |
442 | * lock a complex lock does not help in this | |
443 | * regard.] | |
444 | * | |
445 | * Unlock and allocate memory. Because several | |
446 | * threads might try to do this at once, don't | |
447 | * use the memory before checking for available | |
448 | * space again. | |
449 | */ | |
450 | ||
451 | simple_unlock(&zget_space_lock); | |
452 | ||
453 | retval = kernel_memory_allocate(zone_map, &new_space, | |
454 | space_to_add, 0, KMA_KOBJECT|KMA_NOPAGEWAIT); | |
455 | if (retval != KERN_SUCCESS) | |
456 | return(retval); | |
457 | zone_page_init(new_space, space_to_add, | |
458 | ZONE_PAGE_USED); | |
459 | simple_lock(&zget_space_lock); | |
460 | continue; | |
461 | } | |
462 | ||
463 | ||
464 | /* | |
465 | * Memory was allocated in a previous iteration. | |
466 | * | |
467 | * Check whether the new region is contiguous | |
468 | * with the old one. | |
469 | */ | |
470 | ||
471 | if (new_space != zalloc_end_of_space) { | |
472 | /* | |
473 | * Throw away the remainder of the | |
474 | * old space, and start a new one. | |
475 | */ | |
476 | zalloc_wasted_space += | |
477 | zalloc_end_of_space - zalloc_next_space; | |
478 | zalloc_next_space = new_space; | |
479 | } | |
480 | ||
481 | zalloc_end_of_space = new_space + space_to_add; | |
482 | ||
483 | new_space = 0; | |
484 | } | |
485 | *result = zalloc_next_space; | |
486 | zalloc_next_space += size; | |
487 | simple_unlock(&zget_space_lock); | |
488 | ||
489 | if (new_space != 0) | |
490 | kmem_free(zone_map, new_space, space_to_add); | |
491 | ||
492 | return(KERN_SUCCESS); | |
493 | } | |
494 | ||
495 | ||
496 | /* | |
497 | * Steal memory for the zone package. Called from | |
498 | * vm_page_bootstrap(). | |
499 | */ | |
500 | void | |
501 | zone_steal_memory(void) | |
502 | { | |
503 | zdata_size = round_page(128*sizeof(struct zone)); | |
504 | zdata = pmap_steal_memory(zdata_size); | |
505 | } | |
506 | ||
507 | ||
508 | /* | |
509 | * Fill a zone with enough memory to contain at least nelem elements. | |
510 | * Memory is obtained with kmem_alloc_wired from the kernel_map. | |
511 | * Return the number of elements actually put into the zone, which may | |
512 | * be more than the caller asked for since the memory allocation is | |
513 | * rounded up to a full page. | |
514 | */ | |
515 | int | |
516 | zfill( | |
517 | zone_t zone, | |
518 | int nelem) | |
519 | { | |
520 | kern_return_t kr; | |
521 | vm_size_t size; | |
522 | vm_offset_t memory; | |
523 | int nalloc; | |
524 | ||
525 | assert(nelem > 0); | |
526 | if (nelem <= 0) | |
527 | return 0; | |
528 | size = nelem * zone->elem_size; | |
529 | size = round_page(size); | |
530 | kr = kmem_alloc_wired(kernel_map, &memory, size); | |
531 | if (kr != KERN_SUCCESS) | |
532 | return 0; | |
533 | ||
534 | zone_change(zone, Z_FOREIGN, TRUE); | |
535 | zcram(zone, memory, size); | |
536 | nalloc = size / zone->elem_size; | |
537 | assert(nalloc >= nelem); | |
538 | ||
539 | return nalloc; | |
540 | } | |
541 | ||
542 | /* | |
543 | * Initialize the "zone of zones" which uses fixed memory allocated | |
544 | * earlier in memory initialization. zone_bootstrap is called | |
545 | * before zone_init. | |
546 | */ | |
547 | void | |
548 | zone_bootstrap(void) | |
549 | { | |
550 | vm_size_t zone_zone_size; | |
551 | vm_offset_t zone_zone_space; | |
552 | ||
553 | simple_lock_init(&all_zones_lock, ETAP_MISC_ZONE_ALL); | |
554 | ||
555 | first_zone = ZONE_NULL; | |
556 | last_zone = &first_zone; | |
557 | num_zones = 0; | |
558 | ||
559 | simple_lock_init(&zget_space_lock, ETAP_MISC_ZONE_GET); | |
560 | zalloc_next_space = zdata; | |
561 | zalloc_end_of_space = zdata + zdata_size; | |
562 | zalloc_wasted_space = 0; | |
563 | ||
564 | /* assertion: nobody else called zinit before us */ | |
565 | assert(zone_zone == ZONE_NULL); | |
566 | zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone), | |
567 | sizeof(struct zone), "zones"); | |
568 | zone_change(zone_zone, Z_COLLECT, FALSE); | |
569 | zone_zone_size = zalloc_end_of_space - zalloc_next_space; | |
570 | zget_space(zone_zone_size, &zone_zone_space); | |
571 | zcram(zone_zone, zone_zone_space, zone_zone_size); | |
572 | } | |
573 | ||
574 | void | |
575 | zone_init( | |
576 | vm_size_t max_zonemap_size) | |
577 | { | |
578 | kern_return_t retval; | |
579 | vm_offset_t zone_min; | |
580 | vm_offset_t zone_max; | |
581 | vm_size_t zone_table_size; | |
582 | ||
583 | retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size, | |
584 | FALSE, TRUE, &zone_map); | |
585 | if (retval != KERN_SUCCESS) | |
586 | panic("zone_init: kmem_suballoc failed"); | |
587 | zone_max = zone_min + round_page(max_zonemap_size); | |
588 | /* | |
589 | * Setup garbage collection information: | |
590 | */ | |
591 | zone_table_size = atop(zone_max - zone_min) * | |
592 | sizeof(struct zone_page_table_entry); | |
593 | if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table, | |
594 | zone_table_size) != KERN_SUCCESS) | |
595 | panic("zone_init"); | |
596 | zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size); | |
597 | zone_pages = atop(zone_max - zone_min); | |
598 | zone_map_min_address = zone_min; | |
599 | zone_map_max_address = zone_max; | |
600 | simple_lock_init(&zone_page_table_lock, ETAP_MISC_ZONE_PTABLE); | |
601 | mutex_init(&zone_gc_lock, ETAP_NO_TRACE); | |
602 | zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED); | |
603 | } | |
604 | ||
605 | ||
606 | /* | |
607 | * zalloc returns an element from the specified zone. | |
608 | */ | |
609 | vm_offset_t | |
610 | zalloc_canblock( | |
611 | register zone_t zone, | |
612 | boolean_t canblock) | |
613 | { | |
614 | vm_offset_t addr; | |
615 | kern_return_t retval; | |
616 | ||
617 | assert(zone != ZONE_NULL); | |
618 | check_simple_locks(); | |
619 | ||
620 | lock_zone(zone); | |
621 | ||
622 | REMOVE_FROM_ZONE(zone, addr, vm_offset_t); | |
0b4e3aa0 A |
623 | |
624 | while ((addr == 0) && canblock) { | |
1c79356b A |
625 | /* |
626 | * If nothing was there, try to get more | |
627 | */ | |
628 | if (zone->doing_alloc) { | |
1c79356b A |
629 | /* |
630 | * Someone is allocating memory for this zone. | |
631 | * Wait for it to show up, then try again. | |
632 | */ | |
1c79356b | 633 | zone->waiting = TRUE; |
9bccf70c | 634 | zone_sleep(zone); |
1c79356b A |
635 | } |
636 | else { | |
637 | if ((zone->cur_size + zone->elem_size) > | |
638 | zone->max_size) { | |
639 | if (zone->exhaustible) | |
640 | break; | |
641 | if (zone->expandable) { | |
642 | /* | |
643 | * We're willing to overflow certain | |
644 | * zones, but not without complaining. | |
645 | * | |
646 | * This is best used in conjunction | |
647 | * with the collectable flag. What we | |
648 | * want is an assurance we can get the | |
649 | * memory back, assuming there's no | |
650 | * leak. | |
651 | */ | |
652 | zone->max_size += (zone->max_size >> 1); | |
653 | } else { | |
654 | unlock_zone(zone); | |
655 | ||
1c79356b A |
656 | panic("zalloc: zone \"%s\" empty.", zone->zone_name); |
657 | } | |
658 | } | |
659 | zone->doing_alloc = TRUE; | |
660 | unlock_zone(zone); | |
661 | ||
662 | if (zone->collectable) { | |
663 | vm_offset_t space; | |
664 | vm_size_t alloc_size; | |
665 | ||
666 | if (vm_pool_low()) | |
667 | alloc_size = | |
668 | round_page(zone->elem_size); | |
669 | else | |
670 | alloc_size = zone->alloc_size; | |
671 | ||
672 | retval = kernel_memory_allocate(zone_map, | |
673 | &space, alloc_size, 0, | |
674 | KMA_KOBJECT|KMA_NOPAGEWAIT); | |
675 | if (retval == KERN_SUCCESS) { | |
676 | zone_page_init(space, alloc_size, | |
677 | ZONE_PAGE_USED); | |
678 | zcram(zone, space, alloc_size); | |
679 | } else if (retval != KERN_RESOURCE_SHORTAGE) { | |
680 | /* would like to cause a zone_gc() */ | |
681 | ||
1c79356b A |
682 | panic("zalloc"); |
683 | } | |
684 | lock_zone(zone); | |
685 | zone->doing_alloc = FALSE; | |
686 | if (zone->waiting) { | |
687 | zone->waiting = FALSE; | |
9bccf70c | 688 | zone_wakeup(zone); |
1c79356b A |
689 | } |
690 | REMOVE_FROM_ZONE(zone, addr, vm_offset_t); | |
691 | if (addr == 0 && | |
692 | retval == KERN_RESOURCE_SHORTAGE) { | |
693 | unlock_zone(zone); | |
694 | ||
1c79356b A |
695 | VM_PAGE_WAIT(); |
696 | lock_zone(zone); | |
697 | } | |
698 | } else { | |
699 | vm_offset_t space; | |
700 | retval = zget_space(zone->elem_size, &space); | |
701 | ||
702 | lock_zone(zone); | |
703 | zone->doing_alloc = FALSE; | |
704 | if (zone->waiting) { | |
705 | zone->waiting = FALSE; | |
706 | thread_wakeup((event_t)zone); | |
707 | } | |
708 | if (retval == KERN_SUCCESS) { | |
709 | zone->count++; | |
710 | zone->cur_size += zone->elem_size; | |
711 | #if ZONE_DEBUG | |
712 | if (zone_debug_enabled(zone)) { | |
713 | enqueue_tail(&zone->active_zones, (queue_entry_t)space); | |
714 | } | |
715 | #endif | |
716 | unlock_zone(zone); | |
717 | zone_page_alloc(space, zone->elem_size); | |
718 | #if ZONE_DEBUG | |
719 | if (zone_debug_enabled(zone)) | |
720 | space += sizeof(queue_chain_t); | |
721 | #endif | |
722 | return(space); | |
723 | } | |
724 | if (retval == KERN_RESOURCE_SHORTAGE) { | |
725 | unlock_zone(zone); | |
726 | ||
1c79356b A |
727 | VM_PAGE_WAIT(); |
728 | lock_zone(zone); | |
729 | } else { | |
1c79356b A |
730 | panic("zalloc"); |
731 | } | |
732 | } | |
733 | } | |
734 | if (addr == 0) | |
735 | REMOVE_FROM_ZONE(zone, addr, vm_offset_t); | |
736 | } | |
737 | ||
0b4e3aa0 A |
738 | if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (!vm_pool_low())) { |
739 | zone->async_pending = TRUE; | |
740 | unlock_zone(zone); | |
741 | thread_call_enter(&zone->call_async_alloc); | |
742 | lock_zone(zone); | |
743 | REMOVE_FROM_ZONE(zone, addr, vm_offset_t); | |
744 | } | |
745 | ||
1c79356b A |
746 | #if ZONE_DEBUG |
747 | if (addr && zone_debug_enabled(zone)) { | |
748 | enqueue_tail(&zone->active_zones, (queue_entry_t)addr); | |
749 | addr += sizeof(queue_chain_t); | |
750 | } | |
751 | #endif | |
752 | ||
753 | unlock_zone(zone); | |
0b4e3aa0 | 754 | |
1c79356b A |
755 | return(addr); |
756 | } | |
757 | ||
758 | ||
759 | vm_offset_t | |
760 | zalloc( | |
761 | register zone_t zone) | |
762 | { | |
763 | return( zalloc_canblock(zone, TRUE) ); | |
764 | } | |
765 | ||
766 | vm_offset_t | |
767 | zalloc_noblock( | |
768 | register zone_t zone) | |
769 | { | |
770 | return( zalloc_canblock(zone, FALSE) ); | |
771 | } | |
772 | ||
0b4e3aa0 A |
773 | void |
774 | zalloc_async( | |
775 | thread_call_param_t p0, | |
776 | thread_call_param_t p1) | |
777 | { | |
778 | vm_offset_t elt; | |
779 | ||
780 | elt = zalloc_canblock((zone_t)p0, TRUE); | |
781 | zfree((zone_t)p0, elt); | |
782 | lock_zone(((zone_t)p0)); | |
783 | ((zone_t)p0)->async_pending = FALSE; | |
784 | unlock_zone(((zone_t)p0)); | |
785 | } | |
786 | ||
1c79356b A |
787 | |
788 | /* | |
789 | * zget returns an element from the specified zone | |
790 | * and immediately returns nothing if there is nothing there. | |
791 | * | |
792 | * This form should be used when you can not block (like when | |
793 | * processing an interrupt). | |
794 | */ | |
795 | vm_offset_t | |
796 | zget( | |
797 | register zone_t zone) | |
798 | { | |
799 | register vm_offset_t addr; | |
800 | ||
801 | assert( zone != ZONE_NULL ); | |
802 | ||
803 | if (!lock_try_zone(zone)) | |
804 | return ((vm_offset_t)0); | |
805 | ||
806 | REMOVE_FROM_ZONE(zone, addr, vm_offset_t); | |
807 | #if ZONE_DEBUG | |
808 | if (addr && zone_debug_enabled(zone)) { | |
809 | enqueue_tail(&zone->active_zones, (queue_entry_t)addr); | |
810 | addr += sizeof(queue_chain_t); | |
811 | } | |
812 | #endif /* ZONE_DEBUG */ | |
813 | unlock_zone(zone); | |
814 | ||
815 | return(addr); | |
816 | } | |
817 | ||
818 | /* Keep this FALSE by default. Large memory machine run orders of magnitude | |
819 | slower in debug mode when true. Use debugger to enable if needed */ | |
820 | boolean_t zone_check = FALSE; | |
821 | ||
822 | void | |
823 | zfree( | |
824 | register zone_t zone, | |
825 | vm_offset_t elem) | |
826 | { | |
827 | ||
828 | #if MACH_ASSERT | |
829 | /* Basic sanity checks */ | |
830 | if (zone == ZONE_NULL || elem == (vm_offset_t)0) | |
831 | panic("zfree: NULL"); | |
832 | /* zone_gc assumes zones are never freed */ | |
833 | if (zone == zone_zone) | |
834 | panic("zfree: freeing to zone_zone breaks zone_gc!"); | |
835 | if (zone->collectable && !zone->allows_foreign && | |
836 | (!from_zone_map(elem) || !from_zone_map(elem+zone->elem_size-1))) | |
837 | panic("zfree: non-allocated memory in collectable zone!"); | |
838 | #endif | |
839 | ||
840 | lock_zone(zone); | |
841 | #if ZONE_DEBUG | |
842 | if (zone_debug_enabled(zone)) { | |
843 | queue_t tmp_elem; | |
844 | ||
845 | elem -= sizeof(queue_chain_t); | |
846 | if (zone_check) { | |
847 | /* check the zone's consistency */ | |
848 | ||
849 | for (tmp_elem = queue_first(&zone->active_zones); | |
850 | !queue_end(tmp_elem, &zone->active_zones); | |
851 | tmp_elem = queue_next(tmp_elem)) | |
852 | if (elem == (vm_offset_t)tmp_elem) | |
853 | break; | |
854 | if (elem != (vm_offset_t)tmp_elem) | |
855 | panic("zfree()ing element from wrong zone"); | |
856 | } | |
857 | remqueue(&zone->active_zones, (queue_t) elem); | |
858 | } | |
859 | #endif /* ZONE_DEBUG */ | |
860 | if (zone_check) { | |
861 | vm_offset_t this; | |
862 | ||
863 | /* check the zone's consistency */ | |
864 | ||
865 | for (this = zone->free_elements; | |
866 | this != 0; | |
867 | this = * (vm_offset_t *) this) | |
868 | if (!pmap_kernel_va(this) || this == elem) | |
869 | panic("zfree"); | |
870 | } | |
0b4e3aa0 A |
871 | ADD_TO_ZONE(zone, elem); |
872 | ||
1c79356b A |
873 | /* |
874 | * If elements have one or more pages, and memory is low, | |
0b4e3aa0 A |
875 | * request to run the garbage collection in the zone the next |
876 | * time the pageout thread runs. | |
1c79356b A |
877 | */ |
878 | if (zone->elem_size >= PAGE_SIZE && | |
879 | vm_pool_low()){ | |
0b4e3aa0 | 880 | zone_gc_forced = TRUE; |
1c79356b | 881 | } |
1c79356b A |
882 | unlock_zone(zone); |
883 | } | |
884 | ||
885 | ||
886 | /* Change a zone's flags. | |
887 | * This routine must be called immediately after zinit. | |
888 | */ | |
889 | void | |
890 | zone_change( | |
891 | zone_t zone, | |
892 | unsigned int item, | |
893 | boolean_t value) | |
894 | { | |
895 | assert( zone != ZONE_NULL ); | |
896 | assert( value == TRUE || value == FALSE ); | |
897 | ||
898 | switch(item){ | |
899 | case Z_EXHAUST: | |
900 | zone->exhaustible = value; | |
901 | break; | |
902 | case Z_COLLECT: | |
903 | zone->collectable = value; | |
904 | break; | |
905 | case Z_EXPAND: | |
906 | zone->expandable = value; | |
907 | break; | |
908 | case Z_FOREIGN: | |
909 | zone->allows_foreign = value; | |
910 | break; | |
911 | #if MACH_ASSERT | |
912 | default: | |
913 | panic("Zone_change: Wrong Item Type!"); | |
914 | /* break; */ | |
915 | #endif | |
916 | } | |
917 | lock_zone_init(zone); | |
918 | } | |
919 | ||
920 | /* | |
921 | * Return the expected number of free elements in the zone. | |
922 | * This calculation will be incorrect if items are zfree'd that | |
923 | * were never zalloc'd/zget'd. The correct way to stuff memory | |
924 | * into a zone is by zcram. | |
925 | */ | |
926 | ||
927 | integer_t | |
928 | zone_free_count(zone_t zone) | |
929 | { | |
930 | integer_t free_count; | |
931 | ||
932 | lock_zone(zone); | |
933 | free_count = zone->cur_size/zone->elem_size - zone->count; | |
934 | unlock_zone(zone); | |
935 | ||
936 | assert(free_count >= 0); | |
937 | ||
938 | return(free_count); | |
939 | } | |
940 | ||
941 | /* | |
942 | * zprealloc preallocates wired memory, exanding the specified | |
943 | * zone to the specified size | |
944 | */ | |
945 | void | |
946 | zprealloc( | |
947 | zone_t zone, | |
948 | vm_size_t size) | |
949 | { | |
950 | vm_offset_t addr; | |
951 | ||
952 | if (size != 0) { | |
953 | if (kmem_alloc_wired(zone_map, &addr, size) != KERN_SUCCESS) | |
954 | panic("zprealloc"); | |
955 | zone_page_init(addr, size, ZONE_PAGE_USED); | |
956 | zcram(zone, addr, size); | |
957 | } | |
958 | } | |
959 | ||
960 | /* | |
961 | * Zone garbage collection subroutines | |
962 | * | |
963 | * These routines have in common the modification of entries in the | |
964 | * zone_page_table. The latter contains one entry for every page | |
965 | * in the zone_map. | |
966 | * | |
967 | * For each page table entry in the given range: | |
968 | * | |
969 | * zone_page_collectable - test if one (in_free_list == alloc_count) | |
970 | * zone_page_keep - reset in_free_list | |
971 | * zone_page_in_use - decrements in_free_list | |
972 | * zone_page_free - increments in_free_list | |
973 | * zone_page_init - initializes in_free_list and alloc_count | |
974 | * zone_page_alloc - increments alloc_count | |
975 | * zone_page_dealloc - decrements alloc_count | |
976 | * zone_add_free_page_list - adds the page to the free list | |
977 | * | |
978 | * Two counts are maintained for each page, the in_free_list count and | |
979 | * alloc_count. The alloc_count is how many zone elements have been | |
980 | * allocated from a page. (Note that the page could contain elements | |
981 | * that span page boundaries. The count includes these elements so | |
982 | * one element may be counted in two pages.) In_free_list is a count | |
983 | * of how many zone elements are currently free. If in_free_list is | |
984 | * equal to alloc_count then the page is eligible for garbage | |
985 | * collection. | |
986 | * | |
987 | * Alloc_count and in_free_list are initialized to the correct values | |
988 | * for a particular zone when a page is zcram'ed into a zone. Subsequent | |
989 | * gets and frees of zone elements will call zone_page_in_use and | |
990 | * zone_page_free which modify the in_free_list count. When the zones | |
991 | * garbage collector runs it will walk through a zones free element list, | |
992 | * remove the elements that reside on collectable pages, and use | |
993 | * zone_add_free_page_list to create a list of pages to be collected. | |
994 | */ | |
995 | boolean_t | |
996 | zone_page_collectable( | |
997 | vm_offset_t addr, | |
998 | vm_size_t size) | |
999 | { | |
1000 | natural_t i, j; | |
1001 | ||
1002 | #if MACH_ASSERT | |
1003 | if (!from_zone_map(addr) || !from_zone_map(addr+size-1)) | |
1004 | panic("zone_page_collectable"); | |
1005 | #endif | |
1006 | ||
1007 | i = atop(addr-zone_map_min_address); | |
1008 | j = atop((addr+size-1) - zone_map_min_address); | |
1009 | lock_zone_page_table(); | |
1010 | for (; i <= j; i++) { | |
1011 | if (zone_page_table[i].in_free_list == | |
1012 | zone_page_table[i].alloc_count) { | |
1013 | unlock_zone_page_table(); | |
1014 | return (TRUE); | |
1015 | } | |
1016 | } | |
1017 | unlock_zone_page_table(); | |
1018 | return (FALSE); | |
1019 | } | |
1020 | ||
1021 | void | |
1022 | zone_page_keep( | |
1023 | vm_offset_t addr, | |
1024 | vm_size_t size) | |
1025 | { | |
1026 | natural_t i, j; | |
1027 | ||
1028 | #if MACH_ASSERT | |
1029 | if (!from_zone_map(addr) || !from_zone_map(addr+size-1)) | |
1030 | panic("zone_page_keep"); | |
1031 | #endif | |
1032 | ||
1033 | i = atop(addr-zone_map_min_address); | |
1034 | j = atop((addr+size-1) - zone_map_min_address); | |
1035 | lock_zone_page_table(); | |
1036 | for (; i <= j; i++) { | |
1037 | zone_page_table[i].in_free_list = 0; | |
1038 | } | |
1039 | unlock_zone_page_table(); | |
1040 | } | |
1041 | ||
1042 | void | |
1043 | zone_page_in_use( | |
1044 | vm_offset_t addr, | |
1045 | vm_size_t size) | |
1046 | { | |
1047 | natural_t i, j; | |
1048 | ||
1049 | #if MACH_ASSERT | |
1050 | if (!from_zone_map(addr) || !from_zone_map(addr+size-1)) | |
1051 | panic("zone_page_in_use"); | |
1052 | #endif | |
1053 | ||
1054 | i = atop(addr-zone_map_min_address); | |
1055 | j = atop((addr+size-1) - zone_map_min_address); | |
1056 | lock_zone_page_table(); | |
1057 | for (; i <= j; i++) { | |
1058 | if (zone_page_table[i].in_free_list > 0) | |
1059 | zone_page_table[i].in_free_list--; | |
1060 | } | |
1061 | unlock_zone_page_table(); | |
1062 | } | |
1063 | ||
1064 | void | |
1065 | zone_page_free( | |
1066 | vm_offset_t addr, | |
1067 | vm_size_t size) | |
1068 | { | |
1069 | natural_t i, j; | |
1070 | ||
1071 | #if MACH_ASSERT | |
1072 | if (!from_zone_map(addr) || !from_zone_map(addr+size-1)) | |
1073 | panic("zone_page_free"); | |
1074 | #endif | |
1075 | ||
1076 | i = atop(addr-zone_map_min_address); | |
1077 | j = atop((addr+size-1) - zone_map_min_address); | |
1078 | lock_zone_page_table(); | |
1079 | for (; i <= j; i++) { | |
1080 | assert(zone_page_table[i].in_free_list >= 0); | |
1081 | zone_page_table[i].in_free_list++; | |
1082 | } | |
1083 | unlock_zone_page_table(); | |
1084 | } | |
1085 | ||
1086 | void | |
1087 | zone_page_init( | |
1088 | vm_offset_t addr, | |
1089 | vm_size_t size, | |
1090 | int value) | |
1091 | { | |
1092 | natural_t i, j; | |
1093 | ||
1094 | #if MACH_ASSERT | |
1095 | if (!from_zone_map(addr) || !from_zone_map(addr+size-1)) | |
1096 | panic("zone_page_init"); | |
1097 | #endif | |
1098 | ||
1099 | i = atop(addr-zone_map_min_address); | |
1100 | j = atop((addr+size-1) - zone_map_min_address); | |
1101 | lock_zone_page_table(); | |
1102 | for (; i <= j; i++) { | |
1103 | zone_page_table[i].alloc_count = value; | |
1104 | zone_page_table[i].in_free_list = 0; | |
1105 | } | |
1106 | unlock_zone_page_table(); | |
1107 | } | |
1108 | ||
1109 | void | |
1110 | zone_page_alloc( | |
1111 | vm_offset_t addr, | |
1112 | vm_size_t size) | |
1113 | { | |
1114 | natural_t i, j; | |
1115 | ||
1116 | #if MACH_ASSERT | |
1117 | if (!from_zone_map(addr) || !from_zone_map(addr+size-1)) | |
1118 | panic("zone_page_alloc"); | |
1119 | #endif | |
1120 | ||
1121 | i = atop(addr-zone_map_min_address); | |
1122 | j = atop((addr+size-1) - zone_map_min_address); | |
1123 | lock_zone_page_table(); | |
1124 | for (; i <= j; i++) { | |
1125 | /* Set alloc_count to (ZONE_PAGE_USED + 1) if | |
1126 | * it was previously set to ZONE_PAGE_UNUSED. | |
1127 | */ | |
1128 | if (zone_page_table[i].alloc_count == ZONE_PAGE_UNUSED) { | |
1129 | zone_page_table[i].alloc_count = 1; | |
1130 | } else { | |
1131 | zone_page_table[i].alloc_count++; | |
1132 | } | |
1133 | } | |
1134 | unlock_zone_page_table(); | |
1135 | } | |
1136 | ||
1137 | void | |
1138 | zone_page_dealloc( | |
1139 | vm_offset_t addr, | |
1140 | vm_size_t size) | |
1141 | { | |
1142 | natural_t i, j; | |
1143 | ||
1144 | #if MACH_ASSERT | |
1145 | if (!from_zone_map(addr) || !from_zone_map(addr+size-1)) | |
1146 | panic("zone_page_dealloc"); | |
1147 | #endif | |
1148 | ||
1149 | i = atop(addr-zone_map_min_address); | |
1150 | j = atop((addr+size-1) - zone_map_min_address); | |
1151 | lock_zone_page_table(); | |
1152 | for (; i <= j; i++) { | |
1153 | zone_page_table[i].alloc_count--; | |
1154 | } | |
1155 | unlock_zone_page_table(); | |
1156 | } | |
1157 | ||
1158 | void | |
1159 | zone_add_free_page_list( | |
1160 | struct zone_page_table_entry **free_list, | |
1161 | vm_offset_t addr, | |
1162 | vm_size_t size) | |
1163 | { | |
1164 | natural_t i, j; | |
1165 | ||
1166 | #if MACH_ASSERT | |
1167 | if (!from_zone_map(addr) || !from_zone_map(addr+size-1)) | |
1168 | panic("zone_add_free_page_list"); | |
1169 | #endif | |
1170 | ||
1171 | i = atop(addr-zone_map_min_address); | |
1172 | j = atop((addr+size-1) - zone_map_min_address); | |
1173 | lock_zone_page_table(); | |
1174 | for (; i <= j; i++) { | |
1175 | if (zone_page_table[i].alloc_count == 0) { | |
1176 | zone_page_table[i].next = *free_list; | |
1177 | *free_list = &zone_page_table[i]; | |
1178 | zone_page_table[i].alloc_count = ZONE_PAGE_UNUSED; | |
1179 | zone_page_table[i].in_free_list = 0; | |
1180 | } | |
1181 | } | |
1182 | unlock_zone_page_table(); | |
1183 | } | |
1184 | ||
1185 | ||
1186 | /* This is used for walking through a zone's free element list. | |
1187 | */ | |
1188 | struct zone_free_entry { | |
1189 | struct zone_free_entry * next; | |
1190 | }; | |
1191 | ||
1192 | int reclaim_page_count = 0; | |
1193 | ||
1194 | /* Zone garbage collection | |
1195 | * | |
1196 | * zone_gc will walk through all the free elements in all the | |
1197 | * zones that are marked collectable looking for reclaimable | |
1198 | * pages. zone_gc is called by consider_zone_gc when the system | |
1199 | * begins to run out of memory. | |
1200 | */ | |
1201 | void | |
1202 | zone_gc(void) | |
1203 | { | |
1204 | unsigned int max_zones; | |
1205 | zone_t z; | |
1206 | unsigned int i; | |
1207 | struct zone_page_table_entry *freep; | |
1208 | struct zone_page_table_entry *zone_free_page_list; | |
1209 | ||
1210 | mutex_lock(&zone_gc_lock); | |
1211 | ||
1212 | /* | |
1213 | * Note that this scheme of locking only to walk the zone list | |
1214 | * assumes that zones are never freed (checked by zfree) | |
1215 | */ | |
1216 | simple_lock(&all_zones_lock); | |
1217 | max_zones = num_zones; | |
1218 | z = first_zone; | |
1219 | simple_unlock(&all_zones_lock); | |
1220 | ||
1221 | #if MACH_ASSERT | |
1222 | lock_zone_page_table(); | |
1223 | for (i = 0; i < zone_pages; i++) | |
1224 | assert(zone_page_table[i].in_free_list == 0); | |
1225 | unlock_zone_page_table(); | |
1226 | #endif /* MACH_ASSERT */ | |
1227 | ||
1228 | zone_free_page_list = (struct zone_page_table_entry *) 0; | |
1229 | ||
1230 | for (i = 0; i < max_zones; i++, z = z->next_zone) { | |
1231 | struct zone_free_entry * prev; | |
1232 | struct zone_free_entry * elt; | |
1233 | struct zone_free_entry * end; | |
1234 | ||
1235 | assert(z != ZONE_NULL); | |
1236 | ||
1237 | if (!z->collectable) | |
1238 | continue; | |
1239 | ||
1240 | lock_zone(z); | |
1241 | ||
1242 | /* | |
1243 | * Do a quick feasability check before we scan the zone: | |
1244 | * skip unless there is likelihood of getting 1+ pages back. | |
1245 | */ | |
1246 | if ((z->cur_size - z->count * z->elem_size) <= (2*PAGE_SIZE)){ | |
1247 | unlock_zone(z); | |
1248 | continue; | |
1249 | } | |
1250 | ||
1251 | /* Count the free elements in each page. This loop | |
1252 | * requires that all in_free_list entries are zero. | |
1253 | * | |
1254 | * Exit the loop early if we need to hurry up and drop | |
1255 | * the lock to allow preemption - but we must fully process | |
1256 | * all elements we looked at so far. | |
1257 | */ | |
1258 | elt = (struct zone_free_entry *)(z->free_elements); | |
1259 | while (!ast_urgency() && (elt != (struct zone_free_entry *)0)) { | |
1260 | if (from_zone_map(elt)) | |
1261 | zone_page_free((vm_offset_t)elt, z->elem_size); | |
1262 | elt = elt->next; | |
1263 | } | |
1264 | end = elt; | |
1265 | ||
1266 | /* Now determine which elements should be removed | |
1267 | * from the free list and, after all the elements | |
1268 | * on a page have been removed, add the element's | |
1269 | * page to a list of pages to be freed. | |
1270 | */ | |
1271 | prev = elt = (struct zone_free_entry *)(z->free_elements); | |
1272 | while (elt != end) { | |
1273 | if (!from_zone_map(elt)) { | |
1274 | prev = elt; | |
1275 | elt = elt->next; | |
1276 | continue; | |
1277 | } | |
1278 | if (zone_page_collectable((vm_offset_t)elt, | |
1279 | z->elem_size)) { | |
1280 | z->cur_size -= z->elem_size; | |
1281 | zone_page_in_use((vm_offset_t)elt, | |
1282 | z->elem_size); | |
1283 | zone_page_dealloc((vm_offset_t)elt, | |
1284 | z->elem_size); | |
1285 | zone_add_free_page_list(&zone_free_page_list, | |
1286 | (vm_offset_t)elt, | |
1287 | z->elem_size); | |
1288 | if (elt == prev) { | |
1289 | elt = elt->next; | |
1290 | z->free_elements =(vm_offset_t)elt; | |
1291 | prev = elt; | |
1292 | } else { | |
1293 | prev->next = elt->next; | |
1294 | elt = elt->next; | |
1295 | } | |
1296 | } else { | |
1297 | /* This element is not eligible for collection | |
1298 | * so clear in_free_list in preparation for a | |
1299 | * subsequent garbage collection pass. | |
1300 | */ | |
1301 | zone_page_keep((vm_offset_t)elt, z->elem_size); | |
1302 | prev = elt; | |
1303 | elt = elt->next; | |
1304 | } | |
1305 | } /* end while(elt != end) */ | |
1306 | ||
1307 | unlock_zone(z); | |
1308 | } | |
1309 | ||
1310 | for (freep = zone_free_page_list; freep != 0; freep = freep->next) { | |
1311 | vm_offset_t free_addr; | |
1312 | ||
1313 | free_addr = zone_map_min_address + | |
1314 | PAGE_SIZE * (freep - zone_page_table); | |
1315 | kmem_free(zone_map, free_addr, PAGE_SIZE); | |
1316 | reclaim_page_count++; | |
1317 | } | |
1318 | mutex_unlock(&zone_gc_lock); | |
1319 | } | |
1320 | ||
1c79356b A |
1321 | /* |
1322 | * consider_zone_gc: | |
1323 | * | |
1324 | * Called by the pageout daemon when the system needs more free pages. | |
1325 | */ | |
1326 | ||
1327 | void | |
1328 | consider_zone_gc(void) | |
1329 | { | |
1330 | /* | |
1331 | * By default, don't attempt zone GC more frequently | |
0b4e3aa0 | 1332 | * than once a second. |
1c79356b A |
1333 | */ |
1334 | ||
1335 | if (zone_gc_max_rate == 0) | |
0b4e3aa0 | 1336 | zone_gc_max_rate = (1 << SCHED_TICK_SHIFT) + 1; |
1c79356b A |
1337 | |
1338 | if (zone_gc_allowed && | |
0b4e3aa0 A |
1339 | ((sched_tick > (zone_gc_last_tick + zone_gc_max_rate)) || |
1340 | zone_gc_forced)) { | |
1341 | zone_gc_forced = FALSE; | |
1c79356b A |
1342 | zone_gc_last_tick = sched_tick; |
1343 | zone_gc(); | |
1344 | } | |
1345 | } | |
1346 | ||
1347 | #include <mach/kern_return.h> | |
1348 | #include <mach/machine/vm_types.h> | |
1349 | #include <mach_debug/zone_info.h> | |
1350 | #include <kern/host.h> | |
1351 | #include <vm/vm_map.h> | |
1352 | #include <vm/vm_kern.h> | |
1353 | ||
1354 | #include <mach/mach_host_server.h> | |
1355 | ||
1356 | kern_return_t | |
1357 | host_zone_info( | |
1358 | host_t host, | |
1359 | zone_name_array_t *namesp, | |
1360 | mach_msg_type_number_t *namesCntp, | |
1361 | zone_info_array_t *infop, | |
1362 | mach_msg_type_number_t *infoCntp) | |
1363 | { | |
1364 | zone_name_t *names; | |
1365 | vm_offset_t names_addr; | |
1366 | vm_size_t names_size; | |
1367 | zone_info_t *info; | |
1368 | vm_offset_t info_addr; | |
1369 | vm_size_t info_size; | |
1370 | unsigned int max_zones, i; | |
1371 | zone_t z; | |
1372 | zone_name_t *zn; | |
1373 | zone_info_t *zi; | |
1374 | kern_return_t kr; | |
1375 | ||
1376 | if (host == HOST_NULL) | |
1377 | return KERN_INVALID_HOST; | |
1378 | ||
1379 | /* | |
1380 | * We assume that zones aren't freed once allocated. | |
1381 | * We won't pick up any zones that are allocated later. | |
1382 | */ | |
1383 | ||
1384 | simple_lock(&all_zones_lock); | |
1385 | #ifdef ppc | |
1386 | max_zones = num_zones + 4; | |
1387 | #else | |
1388 | max_zones = num_zones + 2; | |
1389 | #endif | |
1390 | z = first_zone; | |
1391 | simple_unlock(&all_zones_lock); | |
1392 | ||
1393 | if (max_zones <= *namesCntp) { | |
1394 | /* use in-line memory */ | |
1395 | ||
1396 | names = *namesp; | |
1397 | } else { | |
1398 | names_size = round_page(max_zones * sizeof *names); | |
1399 | kr = kmem_alloc_pageable(ipc_kernel_map, | |
1400 | &names_addr, names_size); | |
1401 | if (kr != KERN_SUCCESS) | |
1402 | return kr; | |
1403 | names = (zone_name_t *) names_addr; | |
1404 | } | |
1405 | ||
1406 | if (max_zones <= *infoCntp) { | |
1407 | /* use in-line memory */ | |
1408 | ||
1409 | info = *infop; | |
1410 | } else { | |
1411 | info_size = round_page(max_zones * sizeof *info); | |
1412 | kr = kmem_alloc_pageable(ipc_kernel_map, | |
1413 | &info_addr, info_size); | |
1414 | if (kr != KERN_SUCCESS) { | |
1415 | if (names != *namesp) | |
1416 | kmem_free(ipc_kernel_map, | |
1417 | names_addr, names_size); | |
1418 | return kr; | |
1419 | } | |
1420 | ||
1421 | info = (zone_info_t *) info_addr; | |
1422 | } | |
1423 | zn = &names[0]; | |
1424 | zi = &info[0]; | |
1425 | ||
1426 | for (i = 0; i < num_zones; i++) { | |
1427 | struct zone zcopy; | |
1428 | ||
1429 | assert(z != ZONE_NULL); | |
1430 | ||
1431 | lock_zone(z); | |
1432 | zcopy = *z; | |
1433 | unlock_zone(z); | |
1434 | ||
1435 | simple_lock(&all_zones_lock); | |
1436 | z = z->next_zone; | |
1437 | simple_unlock(&all_zones_lock); | |
1438 | ||
1439 | /* assuming here the name data is static */ | |
1440 | (void) strncpy(zn->zn_name, zcopy.zone_name, | |
1441 | sizeof zn->zn_name); | |
1442 | ||
1443 | zi->zi_count = zcopy.count; | |
1444 | zi->zi_cur_size = zcopy.cur_size; | |
1445 | zi->zi_max_size = zcopy.max_size; | |
1446 | zi->zi_elem_size = zcopy.elem_size; | |
1447 | zi->zi_alloc_size = zcopy.alloc_size; | |
1448 | zi->zi_exhaustible = zcopy.exhaustible; | |
1449 | zi->zi_collectable = zcopy.collectable; | |
1450 | ||
1451 | zn++; | |
1452 | zi++; | |
1453 | } | |
1454 | strcpy(zn->zn_name, "kernel_stacks"); | |
1455 | stack_fake_zone_info(&zi->zi_count, &zi->zi_cur_size, &zi->zi_max_size, &zi->zi_elem_size, | |
1456 | &zi->zi_alloc_size, &zi->zi_collectable, &zi->zi_exhaustible); | |
1457 | zn++; | |
1458 | zi++; | |
1459 | #ifdef ppc | |
1460 | strcpy(zn->zn_name, "save_areas"); | |
1461 | save_fake_zone_info(&zi->zi_count, &zi->zi_cur_size, &zi->zi_max_size, &zi->zi_elem_size, | |
1462 | &zi->zi_alloc_size, &zi->zi_collectable, &zi->zi_exhaustible); | |
1463 | zn++; | |
1464 | zi++; | |
1465 | ||
1466 | strcpy(zn->zn_name, "pmap_mappings"); | |
1467 | mapping_fake_zone_info(&zi->zi_count, &zi->zi_cur_size, &zi->zi_max_size, &zi->zi_elem_size, | |
1468 | &zi->zi_alloc_size, &zi->zi_collectable, &zi->zi_exhaustible); | |
1469 | zn++; | |
1470 | zi++; | |
1471 | #endif | |
1472 | strcpy(zn->zn_name, "kalloc.large"); | |
1473 | kalloc_fake_zone_info(&zi->zi_count, &zi->zi_cur_size, &zi->zi_max_size, &zi->zi_elem_size, | |
1474 | &zi->zi_alloc_size, &zi->zi_collectable, &zi->zi_exhaustible); | |
1475 | ||
1476 | if (names != *namesp) { | |
1477 | vm_size_t used; | |
1478 | vm_map_copy_t copy; | |
1479 | ||
1480 | used = max_zones * sizeof *names; | |
1481 | ||
1482 | if (used != names_size) | |
1483 | bzero((char *) (names_addr + used), names_size - used); | |
1484 | ||
1485 | kr = vm_map_copyin(ipc_kernel_map, names_addr, names_size, | |
1486 | TRUE, ©); | |
1487 | assert(kr == KERN_SUCCESS); | |
1488 | ||
1489 | *namesp = (zone_name_t *) copy; | |
1490 | } | |
1491 | *namesCntp = max_zones; | |
1492 | ||
1493 | if (info != *infop) { | |
1494 | vm_size_t used; | |
1495 | vm_map_copy_t copy; | |
1496 | ||
1497 | used = max_zones * sizeof *info; | |
1498 | ||
1499 | if (used != info_size) | |
1500 | bzero((char *) (info_addr + used), info_size - used); | |
1501 | ||
1502 | kr = vm_map_copyin(ipc_kernel_map, info_addr, info_size, | |
1503 | TRUE, ©); | |
1504 | assert(kr == KERN_SUCCESS); | |
1505 | ||
1506 | *infop = (zone_info_t *) copy; | |
1507 | } | |
1508 | *infoCntp = max_zones; | |
1509 | ||
1510 | return KERN_SUCCESS; | |
1511 | } | |
1512 | ||
1513 | #if MACH_KDB | |
1514 | #include <ddb/db_command.h> | |
1515 | #include <ddb/db_output.h> | |
1516 | #include <kern/kern_print.h> | |
1517 | ||
1518 | const char *zone_labels = | |
1519 | "ENTRY COUNT TOT_SZ MAX_SZ ELT_SZ ALLOC_SZ NAME"; | |
1520 | ||
1521 | /* Forwards */ | |
1522 | void db_print_zone( | |
1523 | zone_t addr); | |
1524 | ||
1525 | #if ZONE_DEBUG | |
1526 | void db_zone_check_active( | |
1527 | zone_t zone); | |
1528 | void db_zone_print_active( | |
1529 | zone_t zone); | |
1530 | #endif /* ZONE_DEBUG */ | |
1531 | void db_zone_print_free( | |
1532 | zone_t zone); | |
1533 | void | |
1534 | db_print_zone( | |
1535 | zone_t addr) | |
1536 | { | |
1537 | struct zone zcopy; | |
1538 | ||
1539 | zcopy = *addr; | |
1540 | ||
1541 | db_printf("%8x %8x %8x %8x %6x %8x %s ", | |
1542 | addr, zcopy.count, zcopy.cur_size, | |
1543 | zcopy.max_size, zcopy.elem_size, | |
1544 | zcopy.alloc_size, zcopy.zone_name); | |
1545 | if (zcopy.exhaustible) | |
1546 | db_printf("H"); | |
1547 | if (zcopy.collectable) | |
1548 | db_printf("C"); | |
1549 | if (zcopy.expandable) | |
1550 | db_printf("X"); | |
1551 | db_printf("\n"); | |
1552 | } | |
1553 | ||
1554 | /*ARGSUSED*/ | |
1555 | void | |
1556 | db_show_one_zone( | |
1557 | db_expr_t addr, | |
1558 | int have_addr, | |
1559 | db_expr_t count, | |
1560 | char * modif) | |
1561 | { | |
1562 | struct zone *z = (zone_t)addr; | |
1563 | ||
1564 | if (z == ZONE_NULL || !have_addr){ | |
1565 | db_error("No Zone\n"); | |
1566 | /*NOTREACHED*/ | |
1567 | } | |
1568 | ||
1569 | db_printf("%s\n", zone_labels); | |
1570 | db_print_zone(z); | |
1571 | } | |
1572 | ||
1573 | /*ARGSUSED*/ | |
1574 | void | |
1575 | db_show_all_zones( | |
1576 | db_expr_t addr, | |
1577 | int have_addr, | |
1578 | db_expr_t count, | |
1579 | char * modif) | |
1580 | { | |
1581 | zone_t z; | |
1582 | unsigned total = 0; | |
1583 | ||
1584 | /* | |
1585 | * Don't risk hanging by unconditionally locking, | |
1586 | * risk of incoherent data is small (zones aren't freed). | |
1587 | */ | |
1588 | have_addr = simple_lock_try(&all_zones_lock); | |
1589 | count = num_zones; | |
1590 | z = first_zone; | |
1591 | if (have_addr) { | |
1592 | simple_unlock(&all_zones_lock); | |
1593 | } | |
1594 | ||
1595 | db_printf("%s\n", zone_labels); | |
1596 | for ( ; count > 0; count--) { | |
1597 | if (!z) { | |
1598 | db_error("Mangled Zone List\n"); | |
1599 | /*NOTREACHED*/ | |
1600 | } | |
1601 | db_print_zone(z); | |
1602 | total += z->cur_size, | |
1603 | ||
1604 | have_addr = simple_lock_try(&all_zones_lock); | |
1605 | z = z->next_zone; | |
1606 | if (have_addr) { | |
1607 | simple_unlock(&all_zones_lock); | |
1608 | } | |
1609 | } | |
1610 | db_printf("\nTotal %8x", total); | |
1611 | db_printf("\n\nzone_gc() has reclaimed %d pages\n", | |
1612 | reclaim_page_count); | |
1613 | } | |
1614 | ||
1615 | #if ZONE_DEBUG | |
1616 | void | |
1617 | db_zone_check_active( | |
1618 | zone_t zone) | |
1619 | { | |
1620 | int count = 0; | |
1621 | queue_t tmp_elem; | |
1622 | ||
1623 | if (!zone_debug_enabled(zone) || !zone_check) | |
1624 | return; | |
1625 | tmp_elem = queue_first(&zone->active_zones); | |
1626 | while (count < zone->count) { | |
1627 | count++; | |
1628 | if (tmp_elem == 0) { | |
1629 | printf("unexpected zero element, zone=0x%x, count=%d\n", | |
1630 | zone, count); | |
1631 | assert(FALSE); | |
1632 | break; | |
1633 | } | |
1634 | if (queue_end(tmp_elem, &zone->active_zones)) { | |
1635 | printf("unexpected queue_end, zone=0x%x, count=%d\n", | |
1636 | zone, count); | |
1637 | assert(FALSE); | |
1638 | break; | |
1639 | } | |
1640 | tmp_elem = queue_next(tmp_elem); | |
1641 | } | |
1642 | if (!queue_end(tmp_elem, &zone->active_zones)) { | |
1643 | printf("not at queue_end, zone=0x%x, tmp_elem=0x%x\n", | |
1644 | zone, tmp_elem); | |
1645 | assert(FALSE); | |
1646 | } | |
1647 | } | |
1648 | ||
1649 | void | |
1650 | db_zone_print_active( | |
1651 | zone_t zone) | |
1652 | { | |
1653 | int count = 0; | |
1654 | queue_t tmp_elem; | |
1655 | ||
1656 | if (!zone_debug_enabled(zone)) { | |
1657 | printf("zone 0x%x debug not enabled\n", zone); | |
1658 | return; | |
1659 | } | |
1660 | if (!zone_check) { | |
1661 | printf("zone_check FALSE\n"); | |
1662 | return; | |
1663 | } | |
1664 | ||
1665 | printf("zone 0x%x, active elements %d\n", zone, zone->count); | |
1666 | printf("active list:\n"); | |
1667 | tmp_elem = queue_first(&zone->active_zones); | |
1668 | while (count < zone->count) { | |
1669 | printf(" 0x%x", tmp_elem); | |
1670 | count++; | |
1671 | if ((count % 6) == 0) | |
1672 | printf("\n"); | |
1673 | if (tmp_elem == 0) { | |
1674 | printf("\nunexpected zero element, count=%d\n", count); | |
1675 | break; | |
1676 | } | |
1677 | if (queue_end(tmp_elem, &zone->active_zones)) { | |
1678 | printf("\nunexpected queue_end, count=%d\n", count); | |
1679 | break; | |
1680 | } | |
1681 | tmp_elem = queue_next(tmp_elem); | |
1682 | } | |
1683 | if (!queue_end(tmp_elem, &zone->active_zones)) | |
1684 | printf("\nnot at queue_end, tmp_elem=0x%x\n", tmp_elem); | |
1685 | else | |
1686 | printf("\n"); | |
1687 | } | |
1688 | #endif /* ZONE_DEBUG */ | |
1689 | ||
1690 | void | |
1691 | db_zone_print_free( | |
1692 | zone_t zone) | |
1693 | { | |
1694 | int count = 0; | |
1695 | int freecount; | |
1696 | vm_offset_t elem; | |
1697 | ||
1698 | freecount = zone_free_count(zone); | |
1699 | printf("zone 0x%x, free elements %d\n", zone, freecount); | |
1700 | printf("free list:\n"); | |
1701 | elem = zone->free_elements; | |
1702 | while (count < freecount) { | |
1703 | printf(" 0x%x", elem); | |
1704 | count++; | |
1705 | if ((count % 6) == 0) | |
1706 | printf("\n"); | |
1707 | if (elem == 0) { | |
1708 | printf("\nunexpected zero element, count=%d\n", count); | |
1709 | break; | |
1710 | } | |
1711 | elem = *((vm_offset_t *)elem); | |
1712 | } | |
1713 | if (elem != 0) | |
1714 | printf("\nnot at end of free list, elem=0x%x\n", elem); | |
1715 | else | |
1716 | printf("\n"); | |
1717 | } | |
1718 | ||
1719 | #endif /* MACH_KDB */ | |
1720 | ||
1721 | ||
1722 | #if ZONE_DEBUG | |
1723 | ||
1724 | /* should we care about locks here ? */ | |
1725 | ||
1726 | #if MACH_KDB | |
1727 | vm_offset_t | |
1728 | next_element( | |
1729 | zone_t z, | |
1730 | vm_offset_t elt) | |
1731 | { | |
1732 | if (!zone_debug_enabled(z)) | |
1733 | return(0); | |
1734 | elt -= sizeof(queue_chain_t); | |
1735 | elt = (vm_offset_t) queue_next((queue_t) elt); | |
1736 | if ((queue_t) elt == &z->active_zones) | |
1737 | return(0); | |
1738 | elt += sizeof(queue_chain_t); | |
1739 | return(elt); | |
1740 | } | |
1741 | ||
1742 | vm_offset_t | |
1743 | first_element( | |
1744 | zone_t z) | |
1745 | { | |
1746 | vm_offset_t elt; | |
1747 | ||
1748 | if (!zone_debug_enabled(z)) | |
1749 | return(0); | |
1750 | if (queue_empty(&z->active_zones)) | |
1751 | return(0); | |
1752 | elt = (vm_offset_t) queue_first(&z->active_zones); | |
1753 | elt += sizeof(queue_chain_t); | |
1754 | return(elt); | |
1755 | } | |
1756 | ||
1757 | /* | |
1758 | * Second arg controls how many zone elements are printed: | |
1759 | * 0 => none | |
1760 | * n, n < 0 => all | |
1761 | * n, n > 0 => last n on active list | |
1762 | */ | |
1763 | int | |
1764 | zone_count( | |
1765 | zone_t z, | |
1766 | int tail) | |
1767 | { | |
1768 | vm_offset_t elt; | |
1769 | int count = 0; | |
1770 | boolean_t print = (tail != 0); | |
1771 | ||
1772 | if (tail < 0) | |
1773 | tail = z->count; | |
1774 | if (z->count < tail) | |
1775 | tail = 0; | |
1776 | tail = z->count - tail; | |
1777 | for (elt = first_element(z); elt; elt = next_element(z, elt)) { | |
1778 | if (print && tail <= count) | |
1779 | db_printf("%8x\n", elt); | |
1780 | count++; | |
1781 | } | |
1782 | assert(count == z->count); | |
1783 | return(count); | |
1784 | } | |
1785 | #endif /* MACH_KDB */ | |
1786 | ||
1787 | #define zone_in_use(z) ( z->count || z->free_elements ) | |
1788 | ||
1789 | void | |
1790 | zone_debug_enable( | |
1791 | zone_t z) | |
1792 | { | |
1793 | if (zone_debug_enabled(z) || zone_in_use(z) || | |
1794 | z->alloc_size < (z->elem_size + sizeof(queue_chain_t))) | |
1795 | return; | |
1796 | queue_init(&z->active_zones); | |
1797 | z->elem_size += sizeof(queue_chain_t); | |
1798 | } | |
1799 | ||
1800 | void | |
1801 | zone_debug_disable( | |
1802 | zone_t z) | |
1803 | { | |
1804 | if (!zone_debug_enabled(z) || zone_in_use(z)) | |
1805 | return; | |
1806 | z->elem_size -= sizeof(queue_chain_t); | |
1807 | z->active_zones.next = z->active_zones.prev = 0; | |
1808 | } | |
1809 | #endif /* ZONE_DEBUG */ |