]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/zalloc.c
xnu-1228.3.13.tar.gz
[apple/xnu.git] / osfmk / kern / zalloc.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: kern/zalloc.c
60 * Author: Avadis Tevanian, Jr.
61 *
62 * Zone-based memory allocator. A zone is a collection of fixed size
63 * data blocks for which quick allocation/deallocation is possible.
64 */
65 #include <zone_debug.h>
66 #include <zone_alias_addr.h>
67 #include <norma_vm.h>
68 #include <mach_kdb.h>
69
70 #include <mach/mach_types.h>
71 #include <mach/vm_param.h>
72 #include <mach/kern_return.h>
73 #include <mach/mach_host_server.h>
74 #include <mach/machine/vm_types.h>
75 #include <mach_debug/zone_info.h>
76
77 #include <kern/kern_types.h>
78 #include <kern/assert.h>
79 #include <kern/host.h>
80 #include <kern/macro_help.h>
81 #include <kern/sched.h>
82 #include <kern/lock.h>
83 #include <kern/sched_prim.h>
84 #include <kern/misc_protos.h>
85 #include <kern/thread_call.h>
86 #include <kern/zalloc.h>
87 #include <kern/kalloc.h>
88
89 #include <vm/pmap.h>
90 #include <vm/vm_map.h>
91 #include <vm/vm_kern.h>
92 #include <vm/vm_page.h>
93
94 #include <machine/machparam.h>
95
96 #include <libkern/OSDebug.h>
97 #include <sys/kdebug.h>
98
99 #if defined(__ppc__)
100 /* for fake zone stat routines */
101 #include <ppc/savearea.h>
102 #include <ppc/mappings.h>
103 #endif
104
105 int check_freed_element = 0;
106
107 #if MACH_ASSERT
108 /* Detect use of zone elt after freeing it by two methods:
109 * (1) Range-check the free-list "next" ptr for sanity.
110 * (2) Store the ptr in two different words, and compare them against
111 * each other when re-using the zone elt, to detect modifications;
112 */
113
114 #if defined(__alpha)
115
116 #define is_kernel_data_addr(a) \
117 (!(a) || (IS_SYS_VA(a) && !((a) & (sizeof(long)-1))))
118
119 #else /* !defined(__alpha) */
120
121 #define is_kernel_data_addr(a) \
122 (!(a) || ((a) >= vm_min_kernel_address && !((a) & 0x3)))
123
124 #endif /* defined(__alpha) */
125
126 /* Should we set all words of the zone element to an illegal address
127 * when it is freed, to help catch usage after freeing? The down-side
128 * is that this obscures the identity of the freed element.
129 */
130 boolean_t zfree_clear = FALSE;
131
132 #define ADD_TO_ZONE(zone, element) \
133 MACRO_BEGIN \
134 if (zfree_clear) \
135 { unsigned int i; \
136 for (i=1; \
137 i < zone->elem_size/sizeof(vm_offset_t) - 1; \
138 i++) \
139 ((vm_offset_t *)(element))[i] = 0xdeadbeef; \
140 } \
141 ((vm_offset_t *)(element))[0] = (zone)->free_elements; \
142 (zone)->free_elements = (vm_offset_t) (element); \
143 (zone)->count--; \
144 MACRO_END
145
146 #define REMOVE_FROM_ZONE(zone, ret, type) \
147 MACRO_BEGIN \
148 (ret) = (type) (zone)->free_elements; \
149 if ((ret) != (type) 0) { \
150 if (!is_kernel_data_addr(((vm_offset_t *)(ret))[0])) { \
151 panic("A freed zone element has been modified.\n"); \
152 } \
153 (zone)->count++; \
154 (zone)->free_elements = *((vm_offset_t *)(ret)); \
155 } \
156 MACRO_END
157 #else /* MACH_ASSERT */
158
159 #define ADD_TO_ZONE(zone, element) \
160 MACRO_BEGIN \
161 *((vm_offset_t *)(element)) = (zone)->free_elements; \
162 if (check_freed_element) { \
163 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
164 ((vm_offset_t *)(element))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
165 (zone)->free_elements; \
166 } \
167 (zone)->free_elements = (vm_offset_t) (element); \
168 (zone)->count--; \
169 MACRO_END
170
171 #define REMOVE_FROM_ZONE(zone, ret, type) \
172 MACRO_BEGIN \
173 (ret) = (type) (zone)->free_elements; \
174 if ((ret) != (type) 0) { \
175 if (check_freed_element) { \
176 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t)) && \
177 ((vm_offset_t *)(ret))[((zone)->elem_size/sizeof(vm_offset_t))-1] != \
178 ((vm_offset_t *)(ret))[0]) \
179 panic("a freed zone element has been modified");\
180 } \
181 (zone)->count++; \
182 (zone)->free_elements = *((vm_offset_t *)(ret)); \
183 } \
184 MACRO_END
185
186 #endif /* MACH_ASSERT */
187
188 #if ZONE_DEBUG
189 #define zone_debug_enabled(z) z->active_zones.next
190 #define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
191 #define ZONE_DEBUG_OFFSET ROUNDUP(sizeof(queue_chain_t),16)
192 #endif /* ZONE_DEBUG */
193
194 /*
195 * Support for garbage collection of unused zone pages:
196 */
197
198 struct zone_page_table_entry {
199 struct zone_page_table_entry *link;
200 short alloc_count;
201 short collect_count;
202 };
203
204 /* Forwards */
205 void zone_page_init(
206 vm_offset_t addr,
207 vm_size_t size,
208 int value);
209
210 void zone_page_alloc(
211 vm_offset_t addr,
212 vm_size_t size);
213
214 void zone_page_free_element(
215 struct zone_page_table_entry **free_pages,
216 vm_offset_t addr,
217 vm_size_t size);
218
219 void zone_page_collect(
220 vm_offset_t addr,
221 vm_size_t size);
222
223 boolean_t zone_page_collectable(
224 vm_offset_t addr,
225 vm_size_t size);
226
227 void zone_page_keep(
228 vm_offset_t addr,
229 vm_size_t size);
230
231 void zalloc_async(
232 thread_call_param_t p0,
233 thread_call_param_t p1);
234
235
236 #if ZONE_DEBUG && MACH_KDB
237 int zone_count(
238 zone_t z,
239 int tail);
240 #endif /* ZONE_DEBUG && MACH_KDB */
241
242 vm_map_t zone_map = VM_MAP_NULL;
243
244 zone_t zone_zone = ZONE_NULL; /* the zone containing other zones */
245
246 /*
247 * The VM system gives us an initial chunk of memory.
248 * It has to be big enough to allocate the zone_zone
249 */
250
251 vm_offset_t zdata;
252 vm_size_t zdata_size;
253
254 #define lock_zone(zone) \
255 MACRO_BEGIN \
256 lck_mtx_lock(&(zone)->lock); \
257 MACRO_END
258
259 #define unlock_zone(zone) \
260 MACRO_BEGIN \
261 lck_mtx_unlock(&(zone)->lock); \
262 MACRO_END
263
264 #define zone_wakeup(zone) thread_wakeup((event_t)(zone))
265 #define zone_sleep(zone) \
266 (void) lck_mtx_sleep(&(zone)->lock, 0, (event_t)(zone), THREAD_UNINT);
267
268 extern int snprintf(char *, size_t, const char *, ...) __printflike(3,4);
269
270 #define lock_zone_init(zone) \
271 MACRO_BEGIN \
272 char _name[32]; \
273 (void) snprintf(_name, sizeof (_name), "zone.%s", (zone)->zone_name); \
274 lck_grp_attr_setdefault(&(zone)->lock_grp_attr); \
275 lck_grp_init(&(zone)->lock_grp, _name, &(zone)->lock_grp_attr); \
276 lck_attr_setdefault(&(zone)->lock_attr); \
277 lck_mtx_init_ext(&(zone)->lock, &(zone)->lock_ext, \
278 &(zone)->lock_grp, &(zone)->lock_attr); \
279 MACRO_END
280
281 #define lock_try_zone(zone) lck_mtx_try_lock(&zone->lock)
282
283 kern_return_t zget_space(
284 vm_offset_t size,
285 vm_offset_t *result);
286
287 decl_simple_lock_data(,zget_space_lock)
288 vm_offset_t zalloc_next_space;
289 vm_offset_t zalloc_end_of_space;
290 vm_size_t zalloc_wasted_space;
291
292 /*
293 * Garbage collection map information
294 */
295 struct zone_page_table_entry * zone_page_table;
296 vm_offset_t zone_map_min_address;
297 vm_offset_t zone_map_max_address;
298 unsigned int zone_pages;
299
300 /*
301 * Exclude more than one concurrent garbage collection
302 */
303 decl_mutex_data(, zone_gc_lock)
304
305 #if !ZONE_ALIAS_ADDR
306 #define from_zone_map(addr, size) \
307 ((vm_offset_t)(addr) >= zone_map_min_address && \
308 ((vm_offset_t)(addr) + size -1) < zone_map_max_address)
309 #else
310 #define from_zone_map(addr, size) \
311 ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) >= zone_map_min_address && \
312 ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) + size -1) < zone_map_max_address)
313 #endif
314
315 #define ZONE_PAGE_USED 0
316 #define ZONE_PAGE_UNUSED -1
317
318
319 /*
320 * Protects first_zone, last_zone, num_zones,
321 * and the next_zone field of zones.
322 */
323 decl_simple_lock_data(, all_zones_lock)
324 zone_t first_zone;
325 zone_t *last_zone;
326 unsigned int num_zones;
327
328 boolean_t zone_gc_allowed = TRUE;
329 boolean_t zone_gc_forced = FALSE;
330 unsigned zone_gc_last_tick = 0;
331 unsigned zone_gc_max_rate = 0; /* in ticks */
332
333
334 /*
335 * zinit initializes a new zone. The zone data structures themselves
336 * are stored in a zone, which is initially a static structure that
337 * is initialized by zone_init.
338 */
339 zone_t
340 zinit(
341 vm_size_t size, /* the size of an element */
342 vm_size_t max, /* maximum memory to use */
343 vm_size_t alloc, /* allocation size */
344 const char *name) /* a name for the zone */
345 {
346 zone_t z;
347
348 if (zone_zone == ZONE_NULL) {
349 if (zget_space(sizeof(struct zone), (vm_offset_t *)&z)
350 != KERN_SUCCESS)
351 return(ZONE_NULL);
352 } else
353 z = (zone_t) zalloc(zone_zone);
354 if (z == ZONE_NULL)
355 return(ZONE_NULL);
356
357 /*
358 * Round off all the parameters appropriately.
359 */
360 if (size < sizeof(z->free_elements))
361 size = sizeof(z->free_elements);
362 size = ((size-1) + sizeof(z->free_elements)) -
363 ((size-1) % sizeof(z->free_elements));
364 if (alloc == 0)
365 alloc = PAGE_SIZE;
366 alloc = round_page(alloc);
367 max = round_page(max);
368 /*
369 * we look for an allocation size with less than 1% waste
370 * up to 5 pages in size...
371 * otherwise, we look for an allocation size with least fragmentation
372 * in the range of 1 - 5 pages
373 * This size will be used unless
374 * the user suggestion is larger AND has less fragmentation
375 */
376 #if ZONE_ALIAS_ADDR
377 if ((size < PAGE_SIZE) && (PAGE_SIZE % size <= PAGE_SIZE / 10))
378 alloc = PAGE_SIZE;
379 else
380 #endif
381 { vm_size_t best, waste; unsigned int i;
382 best = PAGE_SIZE;
383 waste = best % size;
384
385 for (i = 1; i <= 5; i++) {
386 vm_size_t tsize, twaste;
387
388 tsize = i * PAGE_SIZE;
389
390 if ((tsize % size) < (tsize / 100)) {
391 alloc = tsize;
392 goto use_this_allocation;
393 }
394 twaste = tsize % size;
395 if (twaste < waste)
396 best = tsize, waste = twaste;
397 }
398 if (alloc <= best || (alloc % size >= waste))
399 alloc = best;
400 }
401 use_this_allocation:
402 if (max && (max < alloc))
403 max = alloc;
404
405 z->free_elements = 0;
406 z->cur_size = 0;
407 z->max_size = max;
408 z->elem_size = size;
409 z->alloc_size = alloc;
410 z->zone_name = name;
411 z->count = 0;
412 z->doing_alloc = FALSE;
413 z->doing_gc = FALSE;
414 z->exhaustible = FALSE;
415 z->collectable = TRUE;
416 z->allows_foreign = FALSE;
417 z->expandable = TRUE;
418 z->waiting = FALSE;
419 z->async_pending = FALSE;
420
421 #if ZONE_DEBUG
422 z->active_zones.next = z->active_zones.prev = NULL;
423 zone_debug_enable(z);
424 #endif /* ZONE_DEBUG */
425 lock_zone_init(z);
426
427 /*
428 * Add the zone to the all-zones list.
429 */
430
431 z->next_zone = ZONE_NULL;
432 thread_call_setup(&z->call_async_alloc, zalloc_async, z);
433 simple_lock(&all_zones_lock);
434 *last_zone = z;
435 last_zone = &z->next_zone;
436 num_zones++;
437 simple_unlock(&all_zones_lock);
438
439 return(z);
440 }
441
442 /*
443 * Cram the given memory into the specified zone.
444 */
445 void
446 zcram(
447 register zone_t zone,
448 void *newaddr,
449 vm_size_t size)
450 {
451 register vm_size_t elem_size;
452 vm_offset_t newmem = (vm_offset_t) newaddr;
453
454 /* Basic sanity checks */
455 assert(zone != ZONE_NULL && newmem != (vm_offset_t)0);
456 assert(!zone->collectable || zone->allows_foreign
457 || (from_zone_map(newmem, size)));
458
459 elem_size = zone->elem_size;
460
461 lock_zone(zone);
462 while (size >= elem_size) {
463 ADD_TO_ZONE(zone, newmem);
464 if (from_zone_map(newmem, elem_size))
465 zone_page_alloc(newmem, elem_size);
466 zone->count++; /* compensate for ADD_TO_ZONE */
467 size -= elem_size;
468 newmem += elem_size;
469 zone->cur_size += elem_size;
470 }
471 unlock_zone(zone);
472 }
473
474 /*
475 * Contiguous space allocator for non-paged zones. Allocates "size" amount
476 * of memory from zone_map.
477 */
478
479 kern_return_t
480 zget_space(
481 vm_offset_t size,
482 vm_offset_t *result)
483 {
484 vm_offset_t new_space = 0;
485 vm_size_t space_to_add = 0;
486
487 simple_lock(&zget_space_lock);
488 while ((zalloc_next_space + size) > zalloc_end_of_space) {
489 /*
490 * Add at least one page to allocation area.
491 */
492
493 space_to_add = round_page(size);
494
495 if (new_space == 0) {
496 kern_return_t retval;
497 /*
498 * Memory cannot be wired down while holding
499 * any locks that the pageout daemon might
500 * need to free up pages. [Making the zget_space
501 * lock a complex lock does not help in this
502 * regard.]
503 *
504 * Unlock and allocate memory. Because several
505 * threads might try to do this at once, don't
506 * use the memory before checking for available
507 * space again.
508 */
509
510 simple_unlock(&zget_space_lock);
511
512 retval = kernel_memory_allocate(zone_map, &new_space,
513 space_to_add, 0, KMA_KOBJECT|KMA_NOPAGEWAIT);
514 if (retval != KERN_SUCCESS)
515 return(retval);
516 #if ZONE_ALIAS_ADDR
517 if (space_to_add == PAGE_SIZE)
518 new_space = zone_alias_addr(new_space);
519 #endif
520 zone_page_init(new_space, space_to_add,
521 ZONE_PAGE_USED);
522 simple_lock(&zget_space_lock);
523 continue;
524 }
525
526
527 /*
528 * Memory was allocated in a previous iteration.
529 *
530 * Check whether the new region is contiguous
531 * with the old one.
532 */
533
534 if (new_space != zalloc_end_of_space) {
535 /*
536 * Throw away the remainder of the
537 * old space, and start a new one.
538 */
539 zalloc_wasted_space +=
540 zalloc_end_of_space - zalloc_next_space;
541 zalloc_next_space = new_space;
542 }
543
544 zalloc_end_of_space = new_space + space_to_add;
545
546 new_space = 0;
547 }
548 *result = zalloc_next_space;
549 zalloc_next_space += size;
550 simple_unlock(&zget_space_lock);
551
552 if (new_space != 0)
553 kmem_free(zone_map, new_space, space_to_add);
554
555 return(KERN_SUCCESS);
556 }
557
558
559 /*
560 * Steal memory for the zone package. Called from
561 * vm_page_bootstrap().
562 */
563 void
564 zone_steal_memory(void)
565 {
566 zdata_size = round_page(128*sizeof(struct zone));
567 zdata = (vm_offset_t)((char *)pmap_steal_memory(zdata_size) - (char *)0);
568 }
569
570
571 /*
572 * Fill a zone with enough memory to contain at least nelem elements.
573 * Memory is obtained with kmem_alloc_wired from the kernel_map.
574 * Return the number of elements actually put into the zone, which may
575 * be more than the caller asked for since the memory allocation is
576 * rounded up to a full page.
577 */
578 int
579 zfill(
580 zone_t zone,
581 int nelem)
582 {
583 kern_return_t kr;
584 vm_size_t size;
585 vm_offset_t memory;
586 int nalloc;
587
588 assert(nelem > 0);
589 if (nelem <= 0)
590 return 0;
591 size = nelem * zone->elem_size;
592 size = round_page(size);
593 kr = kmem_alloc_wired(kernel_map, &memory, size);
594 if (kr != KERN_SUCCESS)
595 return 0;
596
597 zone_change(zone, Z_FOREIGN, TRUE);
598 zcram(zone, (void *)memory, size);
599 nalloc = size / zone->elem_size;
600 assert(nalloc >= nelem);
601
602 return nalloc;
603 }
604
605 /*
606 * Initialize the "zone of zones" which uses fixed memory allocated
607 * earlier in memory initialization. zone_bootstrap is called
608 * before zone_init.
609 */
610 void
611 zone_bootstrap(void)
612 {
613 vm_size_t zone_zone_size;
614 vm_offset_t zone_zone_space;
615 char temp_buf[16];
616
617 /* see if we want freed zone element checking */
618 if (PE_parse_boot_arg("-zc", temp_buf)) {
619 check_freed_element = 1;
620 }
621
622 simple_lock_init(&all_zones_lock, 0);
623
624 first_zone = ZONE_NULL;
625 last_zone = &first_zone;
626 num_zones = 0;
627
628 simple_lock_init(&zget_space_lock, 0);
629 zalloc_next_space = zdata;
630 zalloc_end_of_space = zdata + zdata_size;
631 zalloc_wasted_space = 0;
632
633 /* assertion: nobody else called zinit before us */
634 assert(zone_zone == ZONE_NULL);
635 zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone),
636 sizeof(struct zone), "zones");
637 zone_change(zone_zone, Z_COLLECT, FALSE);
638 zone_zone_size = zalloc_end_of_space - zalloc_next_space;
639 zget_space(zone_zone_size, &zone_zone_space);
640 zcram(zone_zone, (void *)zone_zone_space, zone_zone_size);
641 }
642
643 void
644 zone_init(
645 vm_size_t max_zonemap_size)
646 {
647 kern_return_t retval;
648 vm_offset_t zone_min;
649 vm_offset_t zone_max;
650 vm_size_t zone_table_size;
651
652 retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
653 FALSE, VM_FLAGS_ANYWHERE, &zone_map);
654
655 if (retval != KERN_SUCCESS)
656 panic("zone_init: kmem_suballoc failed");
657 zone_max = zone_min + round_page(max_zonemap_size);
658 /*
659 * Setup garbage collection information:
660 */
661 zone_table_size = atop_32(zone_max - zone_min) *
662 sizeof(struct zone_page_table_entry);
663 if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table,
664 zone_table_size) != KERN_SUCCESS)
665 panic("zone_init");
666 zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size);
667 zone_pages = atop_32(zone_max - zone_min);
668 zone_map_min_address = zone_min;
669 zone_map_max_address = zone_max;
670 mutex_init(&zone_gc_lock, 0);
671 zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED);
672 }
673
674
675 /*
676 * zalloc returns an element from the specified zone.
677 */
678 void *
679 zalloc_canblock(
680 register zone_t zone,
681 boolean_t canblock)
682 {
683 vm_offset_t addr;
684 kern_return_t retval;
685
686 assert(zone != ZONE_NULL);
687
688 lock_zone(zone);
689
690 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
691
692 while ((addr == 0) && canblock && (zone->doing_gc)) {
693 zone->waiting = TRUE;
694 zone_sleep(zone);
695 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
696 }
697
698 while ((addr == 0) && canblock) {
699 /*
700 * If nothing was there, try to get more
701 */
702 if (zone->doing_alloc) {
703 /*
704 * Someone is allocating memory for this zone.
705 * Wait for it to show up, then try again.
706 */
707 zone->waiting = TRUE;
708 zone_sleep(zone);
709 }
710 else {
711 if ((zone->cur_size + zone->elem_size) >
712 zone->max_size) {
713 if (zone->exhaustible)
714 break;
715 if (zone->expandable) {
716 /*
717 * We're willing to overflow certain
718 * zones, but not without complaining.
719 *
720 * This is best used in conjunction
721 * with the collectable flag. What we
722 * want is an assurance we can get the
723 * memory back, assuming there's no
724 * leak.
725 */
726 zone->max_size += (zone->max_size >> 1);
727 } else {
728 unlock_zone(zone);
729
730 panic("zalloc: zone \"%s\" empty.", zone->zone_name);
731 }
732 }
733 zone->doing_alloc = TRUE;
734 unlock_zone(zone);
735
736 if (zone->collectable) {
737 vm_offset_t space;
738 vm_size_t alloc_size;
739 int retry = 0;
740
741 for (;;) {
742
743 if (vm_pool_low() || retry >= 1)
744 alloc_size =
745 round_page(zone->elem_size);
746 else
747 alloc_size = zone->alloc_size;
748
749 retval = kernel_memory_allocate(zone_map,
750 &space, alloc_size, 0,
751 KMA_KOBJECT|KMA_NOPAGEWAIT);
752 if (retval == KERN_SUCCESS) {
753 #if ZONE_ALIAS_ADDR
754 if (alloc_size == PAGE_SIZE)
755 space = zone_alias_addr(space);
756 #endif
757 zone_page_init(space, alloc_size,
758 ZONE_PAGE_USED);
759 zcram(zone, (void *)space, alloc_size);
760
761 break;
762 } else if (retval != KERN_RESOURCE_SHORTAGE) {
763 retry++;
764
765 if (retry == 2) {
766 zone_gc();
767 printf("zalloc did gc\n");
768 }
769 if (retry == 3)
770 panic("zalloc: \"%s\" (%d elements) retry fail %d", zone->zone_name, zone->count, retval);
771 } else {
772 break;
773 }
774 }
775 lock_zone(zone);
776 zone->doing_alloc = FALSE;
777 if (zone->waiting) {
778 zone->waiting = FALSE;
779 zone_wakeup(zone);
780 }
781 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
782 if (addr == 0 &&
783 retval == KERN_RESOURCE_SHORTAGE) {
784 unlock_zone(zone);
785
786 VM_PAGE_WAIT();
787 lock_zone(zone);
788 }
789 } else {
790 vm_offset_t space;
791 retval = zget_space(zone->elem_size, &space);
792
793 lock_zone(zone);
794 zone->doing_alloc = FALSE;
795 if (zone->waiting) {
796 zone->waiting = FALSE;
797 thread_wakeup((event_t)zone);
798 }
799 if (retval == KERN_SUCCESS) {
800 zone->count++;
801 zone->cur_size += zone->elem_size;
802 #if ZONE_DEBUG
803 if (zone_debug_enabled(zone)) {
804 enqueue_tail(&zone->active_zones, (queue_entry_t)space);
805 }
806 #endif
807 unlock_zone(zone);
808 zone_page_alloc(space, zone->elem_size);
809 #if ZONE_DEBUG
810 if (zone_debug_enabled(zone))
811 space += ZONE_DEBUG_OFFSET;
812 #endif
813 addr = space;
814 goto success;
815 }
816 if (retval == KERN_RESOURCE_SHORTAGE) {
817 unlock_zone(zone);
818
819 VM_PAGE_WAIT();
820 lock_zone(zone);
821 } else {
822 panic("zalloc: \"%s\" (%d elements) zget_space returned %d", zone->zone_name, zone->count, retval);
823 }
824 }
825 }
826 if (addr == 0)
827 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
828 }
829
830 if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) {
831 zone->async_pending = TRUE;
832 unlock_zone(zone);
833 thread_call_enter(&zone->call_async_alloc);
834 lock_zone(zone);
835 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
836 }
837
838 #if ZONE_DEBUG
839 if (addr && zone_debug_enabled(zone)) {
840 enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
841 addr += ZONE_DEBUG_OFFSET;
842 }
843 #endif
844
845 unlock_zone(zone);
846
847 success:
848 TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr);
849
850 return((void *)addr);
851 }
852
853
854 void *
855 zalloc(
856 register zone_t zone)
857 {
858 return( zalloc_canblock(zone, TRUE) );
859 }
860
861 void *
862 zalloc_noblock(
863 register zone_t zone)
864 {
865 return( zalloc_canblock(zone, FALSE) );
866 }
867
868 void
869 zalloc_async(
870 thread_call_param_t p0,
871 __unused thread_call_param_t p1)
872 {
873 void *elt;
874
875 elt = zalloc_canblock((zone_t)p0, TRUE);
876 zfree((zone_t)p0, elt);
877 lock_zone(((zone_t)p0));
878 ((zone_t)p0)->async_pending = FALSE;
879 unlock_zone(((zone_t)p0));
880 }
881
882
883 /*
884 * zget returns an element from the specified zone
885 * and immediately returns nothing if there is nothing there.
886 *
887 * This form should be used when you can not block (like when
888 * processing an interrupt).
889 */
890 void *
891 zget(
892 register zone_t zone)
893 {
894 register vm_offset_t addr;
895
896 assert( zone != ZONE_NULL );
897
898 if (!lock_try_zone(zone))
899 return NULL;
900
901 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
902 #if ZONE_DEBUG
903 if (addr && zone_debug_enabled(zone)) {
904 enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
905 addr += ZONE_DEBUG_OFFSET;
906 }
907 #endif /* ZONE_DEBUG */
908 unlock_zone(zone);
909
910 return((void *) addr);
911 }
912
913 /* Keep this FALSE by default. Large memory machine run orders of magnitude
914 slower in debug mode when true. Use debugger to enable if needed */
915 /* static */ boolean_t zone_check = FALSE;
916
917 static zone_t zone_last_bogus_zone = ZONE_NULL;
918 static vm_offset_t zone_last_bogus_elem = 0;
919
920 void
921 zfree(
922 register zone_t zone,
923 void *addr)
924 {
925 vm_offset_t elem = (vm_offset_t) addr;
926
927 #if MACH_ASSERT
928 /* Basic sanity checks */
929 if (zone == ZONE_NULL || elem == (vm_offset_t)0)
930 panic("zfree: NULL");
931 /* zone_gc assumes zones are never freed */
932 if (zone == zone_zone)
933 panic("zfree: freeing to zone_zone breaks zone_gc!");
934 #endif
935
936 TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, zone->elem_size, (int)addr);
937
938 if (zone->collectable && !zone->allows_foreign &&
939 !from_zone_map(elem, zone->elem_size)) {
940 #if MACH_ASSERT
941 panic("zfree: non-allocated memory in collectable zone!");
942 #endif
943 zone_last_bogus_zone = zone;
944 zone_last_bogus_elem = elem;
945 return;
946 }
947
948 lock_zone(zone);
949 #if ZONE_DEBUG
950 if (zone_debug_enabled(zone)) {
951 queue_t tmp_elem;
952
953 elem -= ZONE_DEBUG_OFFSET;
954 if (zone_check) {
955 /* check the zone's consistency */
956
957 for (tmp_elem = queue_first(&zone->active_zones);
958 !queue_end(tmp_elem, &zone->active_zones);
959 tmp_elem = queue_next(tmp_elem))
960 if (elem == (vm_offset_t)tmp_elem)
961 break;
962 if (elem != (vm_offset_t)tmp_elem)
963 panic("zfree()ing element from wrong zone");
964 }
965 remqueue(&zone->active_zones, (queue_t) elem);
966 }
967 #endif /* ZONE_DEBUG */
968 if (zone_check) {
969 vm_offset_t this;
970
971 /* check the zone's consistency */
972
973 for (this = zone->free_elements;
974 this != 0;
975 this = * (vm_offset_t *) this)
976 if (!pmap_kernel_va(this) || this == elem)
977 panic("zfree");
978 }
979 ADD_TO_ZONE(zone, elem);
980
981 /*
982 * If elements have one or more pages, and memory is low,
983 * request to run the garbage collection in the zone the next
984 * time the pageout thread runs.
985 */
986 if (zone->elem_size >= PAGE_SIZE &&
987 vm_pool_low()){
988 zone_gc_forced = TRUE;
989 }
990 unlock_zone(zone);
991 }
992
993
994 /* Change a zone's flags.
995 * This routine must be called immediately after zinit.
996 */
997 void
998 zone_change(
999 zone_t zone,
1000 unsigned int item,
1001 boolean_t value)
1002 {
1003 assert( zone != ZONE_NULL );
1004 assert( value == TRUE || value == FALSE );
1005
1006 switch(item){
1007 case Z_EXHAUST:
1008 zone->exhaustible = value;
1009 break;
1010 case Z_COLLECT:
1011 zone->collectable = value;
1012 break;
1013 case Z_EXPAND:
1014 zone->expandable = value;
1015 break;
1016 case Z_FOREIGN:
1017 zone->allows_foreign = value;
1018 break;
1019 #if MACH_ASSERT
1020 default:
1021 panic("Zone_change: Wrong Item Type!");
1022 /* break; */
1023 #endif
1024 }
1025 }
1026
1027 /*
1028 * Return the expected number of free elements in the zone.
1029 * This calculation will be incorrect if items are zfree'd that
1030 * were never zalloc'd/zget'd. The correct way to stuff memory
1031 * into a zone is by zcram.
1032 */
1033
1034 integer_t
1035 zone_free_count(zone_t zone)
1036 {
1037 integer_t free_count;
1038
1039 lock_zone(zone);
1040 free_count = zone->cur_size/zone->elem_size - zone->count;
1041 unlock_zone(zone);
1042
1043 assert(free_count >= 0);
1044
1045 return(free_count);
1046 }
1047
1048 /*
1049 * zprealloc preallocates wired memory, exanding the specified
1050 * zone to the specified size
1051 */
1052 void
1053 zprealloc(
1054 zone_t zone,
1055 vm_size_t size)
1056 {
1057 vm_offset_t addr;
1058
1059 if (size != 0) {
1060 if (kmem_alloc_wired(zone_map, &addr, size) != KERN_SUCCESS)
1061 panic("zprealloc");
1062 zone_page_init(addr, size, ZONE_PAGE_USED);
1063 zcram(zone, (void *)addr, size);
1064 }
1065 }
1066
1067 /*
1068 * Zone garbage collection subroutines
1069 */
1070
1071 boolean_t
1072 zone_page_collectable(
1073 vm_offset_t addr,
1074 vm_size_t size)
1075 {
1076 struct zone_page_table_entry *zp;
1077 natural_t i, j;
1078
1079 #if ZONE_ALIAS_ADDR
1080 addr = zone_virtual_addr(addr);
1081 #endif
1082 #if MACH_ASSERT
1083 if (!from_zone_map(addr, size))
1084 panic("zone_page_collectable");
1085 #endif
1086
1087 i = atop_32(addr-zone_map_min_address);
1088 j = atop_32((addr+size-1) - zone_map_min_address);
1089
1090 for (zp = zone_page_table + i; i <= j; zp++, i++)
1091 if (zp->collect_count == zp->alloc_count)
1092 return (TRUE);
1093
1094 return (FALSE);
1095 }
1096
1097 void
1098 zone_page_keep(
1099 vm_offset_t addr,
1100 vm_size_t size)
1101 {
1102 struct zone_page_table_entry *zp;
1103 natural_t i, j;
1104
1105 #if ZONE_ALIAS_ADDR
1106 addr = zone_virtual_addr(addr);
1107 #endif
1108 #if MACH_ASSERT
1109 if (!from_zone_map(addr, size))
1110 panic("zone_page_keep");
1111 #endif
1112
1113 i = atop_32(addr-zone_map_min_address);
1114 j = atop_32((addr+size-1) - zone_map_min_address);
1115
1116 for (zp = zone_page_table + i; i <= j; zp++, i++)
1117 zp->collect_count = 0;
1118 }
1119
1120 void
1121 zone_page_collect(
1122 vm_offset_t addr,
1123 vm_size_t size)
1124 {
1125 struct zone_page_table_entry *zp;
1126 natural_t i, j;
1127
1128 #if ZONE_ALIAS_ADDR
1129 addr = zone_virtual_addr(addr);
1130 #endif
1131 #if MACH_ASSERT
1132 if (!from_zone_map(addr, size))
1133 panic("zone_page_collect");
1134 #endif
1135
1136 i = atop_32(addr-zone_map_min_address);
1137 j = atop_32((addr+size-1) - zone_map_min_address);
1138
1139 for (zp = zone_page_table + i; i <= j; zp++, i++)
1140 ++zp->collect_count;
1141 }
1142
1143 void
1144 zone_page_init(
1145 vm_offset_t addr,
1146 vm_size_t size,
1147 int value)
1148 {
1149 struct zone_page_table_entry *zp;
1150 natural_t i, j;
1151
1152 #if ZONE_ALIAS_ADDR
1153 addr = zone_virtual_addr(addr);
1154 #endif
1155 #if MACH_ASSERT
1156 if (!from_zone_map(addr, size))
1157 panic("zone_page_init");
1158 #endif
1159
1160 i = atop_32(addr-zone_map_min_address);
1161 j = atop_32((addr+size-1) - zone_map_min_address);
1162
1163 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1164 zp->alloc_count = value;
1165 zp->collect_count = 0;
1166 }
1167 }
1168
1169 void
1170 zone_page_alloc(
1171 vm_offset_t addr,
1172 vm_size_t size)
1173 {
1174 struct zone_page_table_entry *zp;
1175 natural_t i, j;
1176
1177 #if ZONE_ALIAS_ADDR
1178 addr = zone_virtual_addr(addr);
1179 #endif
1180 #if MACH_ASSERT
1181 if (!from_zone_map(addr, size))
1182 panic("zone_page_alloc");
1183 #endif
1184
1185 i = atop_32(addr-zone_map_min_address);
1186 j = atop_32((addr+size-1) - zone_map_min_address);
1187
1188 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1189 /*
1190 * Set alloc_count to (ZONE_PAGE_USED + 1) if
1191 * it was previously set to ZONE_PAGE_UNUSED.
1192 */
1193 if (zp->alloc_count == ZONE_PAGE_UNUSED)
1194 zp->alloc_count = 1;
1195 else
1196 ++zp->alloc_count;
1197 }
1198 }
1199
1200 void
1201 zone_page_free_element(
1202 struct zone_page_table_entry **free_pages,
1203 vm_offset_t addr,
1204 vm_size_t size)
1205 {
1206 struct zone_page_table_entry *zp;
1207 natural_t i, j;
1208
1209 #if ZONE_ALIAS_ADDR
1210 addr = zone_virtual_addr(addr);
1211 #endif
1212 #if MACH_ASSERT
1213 if (!from_zone_map(addr, size))
1214 panic("zone_page_free_element");
1215 #endif
1216
1217 i = atop_32(addr-zone_map_min_address);
1218 j = atop_32((addr+size-1) - zone_map_min_address);
1219
1220 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1221 if (zp->collect_count > 0)
1222 --zp->collect_count;
1223 if (--zp->alloc_count == 0) {
1224 zp->alloc_count = ZONE_PAGE_UNUSED;
1225 zp->collect_count = 0;
1226
1227 zp->link = *free_pages;
1228 *free_pages = zp;
1229 }
1230 }
1231 }
1232
1233
1234 /* This is used for walking through a zone's free element list.
1235 */
1236 struct zone_free_element {
1237 struct zone_free_element * next;
1238 };
1239
1240 /*
1241 * Add a linked list of pages starting at base back into the zone
1242 * free list. Tail points to the last element on the list.
1243 */
1244
1245 #define ADD_LIST_TO_ZONE(zone, base, tail) \
1246 MACRO_BEGIN \
1247 (tail)->next = (void *)((zone)->free_elements); \
1248 if (check_freed_element) { \
1249 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
1250 ((vm_offset_t *)(tail))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
1251 (zone)->free_elements; \
1252 } \
1253 (zone)->free_elements = (unsigned long)(base); \
1254 MACRO_END
1255
1256 /*
1257 * Add an element to the chain pointed to by prev.
1258 */
1259
1260 #define ADD_ELEMENT(zone, prev, elem) \
1261 MACRO_BEGIN \
1262 (prev)->next = (elem); \
1263 if (check_freed_element) { \
1264 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
1265 ((vm_offset_t *)(prev))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
1266 (vm_offset_t)(elem); \
1267 } \
1268 MACRO_END
1269
1270 struct {
1271 uint32_t pgs_freed;
1272
1273 uint32_t elems_collected,
1274 elems_freed,
1275 elems_kept;
1276 } zgc_stats;
1277
1278 /* Zone garbage collection
1279 *
1280 * zone_gc will walk through all the free elements in all the
1281 * zones that are marked collectable looking for reclaimable
1282 * pages. zone_gc is called by consider_zone_gc when the system
1283 * begins to run out of memory.
1284 */
1285 void
1286 zone_gc(void)
1287 {
1288 unsigned int max_zones;
1289 zone_t z;
1290 unsigned int i;
1291 struct zone_page_table_entry *zp, *zone_free_pages;
1292
1293 mutex_lock(&zone_gc_lock);
1294
1295 simple_lock(&all_zones_lock);
1296 max_zones = num_zones;
1297 z = first_zone;
1298 simple_unlock(&all_zones_lock);
1299
1300 #if MACH_ASSERT
1301 for (i = 0; i < zone_pages; i++)
1302 assert(zone_page_table[i].collect_count == 0);
1303 #endif /* MACH_ASSERT */
1304
1305 zone_free_pages = NULL;
1306
1307 for (i = 0; i < max_zones; i++, z = z->next_zone) {
1308 unsigned int n, m;
1309 vm_size_t elt_size, size_freed;
1310 struct zone_free_element *elt, *base_elt, *base_prev, *prev, *scan, *keep, *tail;
1311
1312 assert(z != ZONE_NULL);
1313
1314 if (!z->collectable)
1315 continue;
1316
1317 lock_zone(z);
1318
1319 elt_size = z->elem_size;
1320
1321 /*
1322 * Do a quick feasability check before we scan the zone:
1323 * skip unless there is likelihood of getting pages back
1324 * (i.e we need a whole allocation block's worth of free
1325 * elements before we can garbage collect) and
1326 * the zone has more than 10 percent of it's elements free
1327 * or the element size is a multiple of the PAGE_SIZE
1328 */
1329 if ((elt_size & PAGE_MASK) &&
1330 (((z->cur_size - z->count * elt_size) <= (2 * z->alloc_size)) ||
1331 ((z->cur_size - z->count * elt_size) <= (z->cur_size / 10)))) {
1332 unlock_zone(z);
1333 continue;
1334 }
1335
1336 z->doing_gc = TRUE;
1337
1338 /*
1339 * Snatch all of the free elements away from the zone.
1340 */
1341
1342 scan = (void *)z->free_elements;
1343 z->free_elements = 0;
1344
1345 unlock_zone(z);
1346
1347 /*
1348 * Pass 1:
1349 *
1350 * Determine which elements we can attempt to collect
1351 * and count them up in the page table. Foreign elements
1352 * are returned to the zone.
1353 */
1354
1355 prev = (void *)&scan;
1356 elt = scan;
1357 n = 0; tail = keep = NULL;
1358 while (elt != NULL) {
1359 if (from_zone_map(elt, elt_size)) {
1360 zone_page_collect((vm_offset_t)elt, elt_size);
1361
1362 prev = elt;
1363 elt = elt->next;
1364
1365 ++zgc_stats.elems_collected;
1366 }
1367 else {
1368 if (keep == NULL)
1369 keep = tail = elt;
1370 else {
1371 ADD_ELEMENT(z, tail, elt);
1372 tail = elt;
1373 }
1374
1375 ADD_ELEMENT(z, prev, elt->next);
1376 elt = elt->next;
1377 ADD_ELEMENT(z, tail, NULL);
1378 }
1379
1380 /*
1381 * Dribble back the elements we are keeping.
1382 */
1383
1384 if (++n >= 50) {
1385 if (z->waiting == TRUE) {
1386 lock_zone(z);
1387
1388 if (keep != NULL) {
1389 ADD_LIST_TO_ZONE(z, keep, tail);
1390 tail = keep = NULL;
1391 } else {
1392 m =0;
1393 base_elt = elt;
1394 base_prev = prev;
1395 while ((elt != NULL) && (++m < 50)) {
1396 prev = elt;
1397 elt = elt->next;
1398 }
1399 if (m !=0 ) {
1400 ADD_LIST_TO_ZONE(z, base_elt, prev);
1401 ADD_ELEMENT(z, base_prev, elt);
1402 prev = base_prev;
1403 }
1404 }
1405
1406 if (z->waiting) {
1407 z->waiting = FALSE;
1408 zone_wakeup(z);
1409 }
1410
1411 unlock_zone(z);
1412 }
1413 n =0;
1414 }
1415 }
1416
1417 /*
1418 * Return any remaining elements.
1419 */
1420
1421 if (keep != NULL) {
1422 lock_zone(z);
1423
1424 ADD_LIST_TO_ZONE(z, keep, tail);
1425
1426 unlock_zone(z);
1427 }
1428
1429 /*
1430 * Pass 2:
1431 *
1432 * Determine which pages we can reclaim and
1433 * free those elements.
1434 */
1435
1436 size_freed = 0;
1437 elt = scan;
1438 n = 0; tail = keep = NULL;
1439 while (elt != NULL) {
1440 if (zone_page_collectable((vm_offset_t)elt, elt_size)) {
1441 size_freed += elt_size;
1442 zone_page_free_element(&zone_free_pages,
1443 (vm_offset_t)elt, elt_size);
1444
1445 elt = elt->next;
1446
1447 ++zgc_stats.elems_freed;
1448 }
1449 else {
1450 zone_page_keep((vm_offset_t)elt, elt_size);
1451
1452 if (keep == NULL)
1453 keep = tail = elt;
1454 else {
1455 ADD_ELEMENT(z, tail, elt);
1456 tail = elt;
1457 }
1458
1459 elt = elt->next;
1460 ADD_ELEMENT(z, tail, NULL);
1461
1462 ++zgc_stats.elems_kept;
1463 }
1464
1465 /*
1466 * Dribble back the elements we are keeping,
1467 * and update the zone size info.
1468 */
1469
1470 if (++n >= 50) {
1471 lock_zone(z);
1472
1473 z->cur_size -= size_freed;
1474 size_freed = 0;
1475
1476 if (keep != NULL) {
1477 ADD_LIST_TO_ZONE(z, keep, tail);
1478 }
1479
1480 if (z->waiting) {
1481 z->waiting = FALSE;
1482 zone_wakeup(z);
1483 }
1484
1485 unlock_zone(z);
1486
1487 n = 0; tail = keep = NULL;
1488 }
1489 }
1490
1491 /*
1492 * Return any remaining elements, and update
1493 * the zone size info.
1494 */
1495
1496 lock_zone(z);
1497
1498 if (size_freed > 0 || keep != NULL) {
1499
1500 z->cur_size -= size_freed;
1501
1502 if (keep != NULL) {
1503 ADD_LIST_TO_ZONE(z, keep, tail);
1504 }
1505
1506 }
1507
1508 z->doing_gc = FALSE;
1509 if (z->waiting) {
1510 z->waiting = FALSE;
1511 zone_wakeup(z);
1512 }
1513 unlock_zone(z);
1514 }
1515
1516 /*
1517 * Reclaim the pages we are freeing.
1518 */
1519
1520 while ((zp = zone_free_pages) != NULL) {
1521 zone_free_pages = zp->link;
1522 #if ZONE_ALIAS_ADDR
1523 z = zone_virtual_addr((vm_map_address_t)z);
1524 #endif
1525 kmem_free(zone_map, zone_map_min_address + PAGE_SIZE *
1526 (zp - zone_page_table), PAGE_SIZE);
1527 ++zgc_stats.pgs_freed;
1528 }
1529
1530 mutex_unlock(&zone_gc_lock);
1531 }
1532
1533 /*
1534 * consider_zone_gc:
1535 *
1536 * Called by the pageout daemon when the system needs more free pages.
1537 */
1538
1539 void
1540 consider_zone_gc(void)
1541 {
1542 /*
1543 * By default, don't attempt zone GC more frequently
1544 * than once / 1 minutes.
1545 */
1546
1547 if (zone_gc_max_rate == 0)
1548 zone_gc_max_rate = (60 << SCHED_TICK_SHIFT) + 1;
1549
1550 if (zone_gc_allowed &&
1551 ((sched_tick > (zone_gc_last_tick + zone_gc_max_rate)) ||
1552 zone_gc_forced)) {
1553 zone_gc_forced = FALSE;
1554 zone_gc_last_tick = sched_tick;
1555 zone_gc();
1556 }
1557 }
1558
1559 struct fake_zone_info {
1560 const char* name;
1561 void (*func)(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *,
1562 int *, int *);
1563 };
1564
1565 static struct fake_zone_info fake_zones[] = {
1566 {
1567 .name = "kernel_stacks",
1568 .func = stack_fake_zone_info,
1569 },
1570 #ifdef ppc
1571 {
1572 .name = "save_areas",
1573 .func = save_fake_zone_info,
1574 },
1575 {
1576 .name = "pmap_mappings",
1577 .func = mapping_fake_zone_info,
1578 },
1579 #endif /* ppc */
1580 #ifdef i386
1581 {
1582 .name = "page_tables",
1583 .func = pt_fake_zone_info,
1584 },
1585 #endif /* i386 */
1586 {
1587 .name = "kalloc.large",
1588 .func = kalloc_fake_zone_info,
1589 },
1590 };
1591
1592 kern_return_t
1593 host_zone_info(
1594 host_t host,
1595 zone_name_array_t *namesp,
1596 mach_msg_type_number_t *namesCntp,
1597 zone_info_array_t *infop,
1598 mach_msg_type_number_t *infoCntp)
1599 {
1600 zone_name_t *names;
1601 vm_offset_t names_addr;
1602 vm_size_t names_size;
1603 zone_info_t *info;
1604 vm_offset_t info_addr;
1605 vm_size_t info_size;
1606 unsigned int max_zones, i;
1607 zone_t z;
1608 zone_name_t *zn;
1609 zone_info_t *zi;
1610 kern_return_t kr;
1611 size_t num_fake_zones;
1612
1613 if (host == HOST_NULL)
1614 return KERN_INVALID_HOST;
1615
1616 num_fake_zones = sizeof fake_zones / sizeof fake_zones[0];
1617
1618 /*
1619 * We assume that zones aren't freed once allocated.
1620 * We won't pick up any zones that are allocated later.
1621 */
1622
1623 simple_lock(&all_zones_lock);
1624 max_zones = num_zones + num_fake_zones;
1625 z = first_zone;
1626 simple_unlock(&all_zones_lock);
1627
1628 if (max_zones <= *namesCntp) {
1629 /* use in-line memory */
1630 names_size = *namesCntp * sizeof *names;
1631 names = *namesp;
1632 } else {
1633 names_size = round_page(max_zones * sizeof *names);
1634 kr = kmem_alloc_pageable(ipc_kernel_map,
1635 &names_addr, names_size);
1636 if (kr != KERN_SUCCESS)
1637 return kr;
1638 names = (zone_name_t *) names_addr;
1639 }
1640
1641 if (max_zones <= *infoCntp) {
1642 /* use in-line memory */
1643 info_size = *infoCntp * sizeof *info;
1644 info = *infop;
1645 } else {
1646 info_size = round_page(max_zones * sizeof *info);
1647 kr = kmem_alloc_pageable(ipc_kernel_map,
1648 &info_addr, info_size);
1649 if (kr != KERN_SUCCESS) {
1650 if (names != *namesp)
1651 kmem_free(ipc_kernel_map,
1652 names_addr, names_size);
1653 return kr;
1654 }
1655
1656 info = (zone_info_t *) info_addr;
1657 }
1658 zn = &names[0];
1659 zi = &info[0];
1660
1661 for (i = 0; i < num_zones; i++) {
1662 struct zone zcopy;
1663
1664 assert(z != ZONE_NULL);
1665
1666 lock_zone(z);
1667 zcopy = *z;
1668 unlock_zone(z);
1669
1670 simple_lock(&all_zones_lock);
1671 z = z->next_zone;
1672 simple_unlock(&all_zones_lock);
1673
1674 /* assuming here the name data is static */
1675 (void) strncpy(zn->zn_name, zcopy.zone_name,
1676 sizeof zn->zn_name);
1677 zn->zn_name[sizeof zn->zn_name - 1] = '\0';
1678
1679 zi->zi_count = zcopy.count;
1680 zi->zi_cur_size = zcopy.cur_size;
1681 zi->zi_max_size = zcopy.max_size;
1682 zi->zi_elem_size = zcopy.elem_size;
1683 zi->zi_alloc_size = zcopy.alloc_size;
1684 zi->zi_exhaustible = zcopy.exhaustible;
1685 zi->zi_collectable = zcopy.collectable;
1686
1687 zn++;
1688 zi++;
1689 }
1690
1691 /*
1692 * loop through the fake zones and fill them using the specialized
1693 * functions
1694 */
1695 for (i = 0; i < num_fake_zones; i++) {
1696 strncpy(zn->zn_name, fake_zones[i].name, sizeof zn->zn_name);
1697 zn->zn_name[sizeof zn->zn_name - 1] = '\0';
1698 fake_zones[i].func(&zi->zi_count, &zi->zi_cur_size,
1699 &zi->zi_max_size, &zi->zi_elem_size,
1700 &zi->zi_alloc_size, &zi->zi_collectable,
1701 &zi->zi_exhaustible);
1702 zn++;
1703 zi++;
1704 }
1705
1706 if (names != *namesp) {
1707 vm_size_t used;
1708 vm_map_copy_t copy;
1709
1710 used = max_zones * sizeof *names;
1711
1712 if (used != names_size)
1713 bzero((char *) (names_addr + used), names_size - used);
1714
1715 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
1716 (vm_map_size_t)names_size, TRUE, &copy);
1717 assert(kr == KERN_SUCCESS);
1718
1719 *namesp = (zone_name_t *) copy;
1720 }
1721 *namesCntp = max_zones;
1722
1723 if (info != *infop) {
1724 vm_size_t used;
1725 vm_map_copy_t copy;
1726
1727 used = max_zones * sizeof *info;
1728
1729 if (used != info_size)
1730 bzero((char *) (info_addr + used), info_size - used);
1731
1732 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
1733 (vm_map_size_t)info_size, TRUE, &copy);
1734 assert(kr == KERN_SUCCESS);
1735
1736 *infop = (zone_info_t *) copy;
1737 }
1738 *infoCntp = max_zones;
1739
1740 return KERN_SUCCESS;
1741 }
1742
1743 #if MACH_KDB
1744 #include <ddb/db_command.h>
1745 #include <ddb/db_output.h>
1746 #include <kern/kern_print.h>
1747
1748 const char *zone_labels =
1749 "ENTRY COUNT TOT_SZ MAX_SZ ELT_SZ ALLOC_SZ NAME";
1750
1751 /* Forwards */
1752 void db_print_zone(
1753 zone_t addr);
1754
1755 #if ZONE_DEBUG
1756 void db_zone_check_active(
1757 zone_t zone);
1758 void db_zone_print_active(
1759 zone_t zone);
1760 #endif /* ZONE_DEBUG */
1761 void db_zone_print_free(
1762 zone_t zone);
1763 void
1764 db_print_zone(
1765 zone_t addr)
1766 {
1767 struct zone zcopy;
1768
1769 zcopy = *addr;
1770
1771 db_printf("%8x %8x %8x %8x %6x %8x %s ",
1772 addr, zcopy.count, zcopy.cur_size,
1773 zcopy.max_size, zcopy.elem_size,
1774 zcopy.alloc_size, zcopy.zone_name);
1775 if (zcopy.exhaustible)
1776 db_printf("H");
1777 if (zcopy.collectable)
1778 db_printf("C");
1779 if (zcopy.expandable)
1780 db_printf("X");
1781 db_printf("\n");
1782 }
1783
1784 /*ARGSUSED*/
1785 void
1786 db_show_one_zone(db_expr_t addr, boolean_t have_addr,
1787 __unused db_expr_t count, __unused char *modif)
1788 {
1789 struct zone *z = (zone_t)((char *)0 + addr);
1790
1791 if (z == ZONE_NULL || !have_addr){
1792 db_error("No Zone\n");
1793 /*NOTREACHED*/
1794 }
1795
1796 db_printf("%s\n", zone_labels);
1797 db_print_zone(z);
1798 }
1799
1800 /*ARGSUSED*/
1801 void
1802 db_show_all_zones(__unused db_expr_t addr, boolean_t have_addr, db_expr_t count,
1803 __unused char *modif)
1804 {
1805 zone_t z;
1806 unsigned total = 0;
1807
1808 /*
1809 * Don't risk hanging by unconditionally locking,
1810 * risk of incoherent data is small (zones aren't freed).
1811 */
1812 have_addr = simple_lock_try(&all_zones_lock);
1813 count = num_zones;
1814 z = first_zone;
1815 if (have_addr) {
1816 simple_unlock(&all_zones_lock);
1817 }
1818
1819 db_printf("%s\n", zone_labels);
1820 for ( ; count > 0; count--) {
1821 if (!z) {
1822 db_error("Mangled Zone List\n");
1823 /*NOTREACHED*/
1824 }
1825 db_print_zone(z);
1826 total += z->cur_size,
1827
1828 have_addr = simple_lock_try(&all_zones_lock);
1829 z = z->next_zone;
1830 if (have_addr) {
1831 simple_unlock(&all_zones_lock);
1832 }
1833 }
1834 db_printf("\nTotal %8x", total);
1835 db_printf("\n\nzone_gc() has reclaimed %d pages\n", zgc_stats.pgs_freed);
1836 }
1837
1838 #if ZONE_DEBUG
1839 void
1840 db_zone_check_active(
1841 zone_t zone)
1842 {
1843 int count = 0;
1844 queue_t tmp_elem;
1845
1846 if (!zone_debug_enabled(zone) || !zone_check)
1847 return;
1848 tmp_elem = queue_first(&zone->active_zones);
1849 while (count < zone->count) {
1850 count++;
1851 if (tmp_elem == 0) {
1852 printf("unexpected zero element, zone=%p, count=%d\n",
1853 zone, count);
1854 assert(FALSE);
1855 break;
1856 }
1857 if (queue_end(tmp_elem, &zone->active_zones)) {
1858 printf("unexpected queue_end, zone=%p, count=%d\n",
1859 zone, count);
1860 assert(FALSE);
1861 break;
1862 }
1863 tmp_elem = queue_next(tmp_elem);
1864 }
1865 if (!queue_end(tmp_elem, &zone->active_zones)) {
1866 printf("not at queue_end, zone=%p, tmp_elem=%p\n",
1867 zone, tmp_elem);
1868 assert(FALSE);
1869 }
1870 }
1871
1872 void
1873 db_zone_print_active(
1874 zone_t zone)
1875 {
1876 int count = 0;
1877 queue_t tmp_elem;
1878
1879 if (!zone_debug_enabled(zone)) {
1880 printf("zone %p debug not enabled\n", zone);
1881 return;
1882 }
1883 if (!zone_check) {
1884 printf("zone_check FALSE\n");
1885 return;
1886 }
1887
1888 printf("zone %p, active elements %d\n", zone, zone->count);
1889 printf("active list:\n");
1890 tmp_elem = queue_first(&zone->active_zones);
1891 while (count < zone->count) {
1892 printf(" %p", tmp_elem);
1893 count++;
1894 if ((count % 6) == 0)
1895 printf("\n");
1896 if (tmp_elem == 0) {
1897 printf("\nunexpected zero element, count=%d\n", count);
1898 break;
1899 }
1900 if (queue_end(tmp_elem, &zone->active_zones)) {
1901 printf("\nunexpected queue_end, count=%d\n", count);
1902 break;
1903 }
1904 tmp_elem = queue_next(tmp_elem);
1905 }
1906 if (!queue_end(tmp_elem, &zone->active_zones))
1907 printf("\nnot at queue_end, tmp_elem=%p\n", tmp_elem);
1908 else
1909 printf("\n");
1910 }
1911 #endif /* ZONE_DEBUG */
1912
1913 void
1914 db_zone_print_free(
1915 zone_t zone)
1916 {
1917 int count = 0;
1918 int freecount;
1919 vm_offset_t elem;
1920
1921 freecount = zone_free_count(zone);
1922 printf("zone %p, free elements %d\n", zone, freecount);
1923 printf("free list:\n");
1924 elem = zone->free_elements;
1925 while (count < freecount) {
1926 printf(" 0x%x", elem);
1927 count++;
1928 if ((count % 6) == 0)
1929 printf("\n");
1930 if (elem == 0) {
1931 printf("\nunexpected zero element, count=%d\n", count);
1932 break;
1933 }
1934 elem = *((vm_offset_t *)elem);
1935 }
1936 if (elem != 0)
1937 printf("\nnot at end of free list, elem=0x%x\n", elem);
1938 else
1939 printf("\n");
1940 }
1941
1942 #endif /* MACH_KDB */
1943
1944
1945 #if ZONE_DEBUG
1946
1947 /* should we care about locks here ? */
1948
1949 #if MACH_KDB
1950 void *
1951 next_element(
1952 zone_t z,
1953 void *prev)
1954 {
1955 char *elt = (char *)prev;
1956
1957 if (!zone_debug_enabled(z))
1958 return(NULL);
1959 elt -= ZONE_DEBUG_OFFSET;
1960 elt = (char *) queue_next((queue_t) elt);
1961 if ((queue_t) elt == &z->active_zones)
1962 return(NULL);
1963 elt += ZONE_DEBUG_OFFSET;
1964 return(elt);
1965 }
1966
1967 void *
1968 first_element(
1969 zone_t z)
1970 {
1971 char *elt;
1972
1973 if (!zone_debug_enabled(z))
1974 return(NULL);
1975 if (queue_empty(&z->active_zones))
1976 return(NULL);
1977 elt = (char *)queue_first(&z->active_zones);
1978 elt += ZONE_DEBUG_OFFSET;
1979 return(elt);
1980 }
1981
1982 /*
1983 * Second arg controls how many zone elements are printed:
1984 * 0 => none
1985 * n, n < 0 => all
1986 * n, n > 0 => last n on active list
1987 */
1988 int
1989 zone_count(
1990 zone_t z,
1991 int tail)
1992 {
1993 void *elt;
1994 int count = 0;
1995 boolean_t print = (tail != 0);
1996
1997 if (tail < 0)
1998 tail = z->count;
1999 if (z->count < tail)
2000 tail = 0;
2001 tail = z->count - tail;
2002 for (elt = first_element(z); elt; elt = next_element(z, elt)) {
2003 if (print && tail <= count)
2004 db_printf("%8x\n", elt);
2005 count++;
2006 }
2007 assert(count == z->count);
2008 return(count);
2009 }
2010 #endif /* MACH_KDB */
2011
2012 #define zone_in_use(z) ( z->count || z->free_elements )
2013
2014 void
2015 zone_debug_enable(
2016 zone_t z)
2017 {
2018 if (zone_debug_enabled(z) || zone_in_use(z) ||
2019 z->alloc_size < (z->elem_size + ZONE_DEBUG_OFFSET))
2020 return;
2021 queue_init(&z->active_zones);
2022 z->elem_size += ZONE_DEBUG_OFFSET;
2023 }
2024
2025 void
2026 zone_debug_disable(
2027 zone_t z)
2028 {
2029 if (!zone_debug_enabled(z) || zone_in_use(z))
2030 return;
2031 z->elem_size -= ZONE_DEBUG_OFFSET;
2032 z->active_zones.next = z->active_zones.prev = NULL;
2033 }
2034 #endif /* ZONE_DEBUG */