]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/zalloc.c
xnu-1228.9.59.tar.gz
[apple/xnu.git] / osfmk / kern / zalloc.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: kern/zalloc.c
60 * Author: Avadis Tevanian, Jr.
61 *
62 * Zone-based memory allocator. A zone is a collection of fixed size
63 * data blocks for which quick allocation/deallocation is possible.
64 */
65 #include <zone_debug.h>
66 #include <zone_alias_addr.h>
67 #include <norma_vm.h>
68 #include <mach_kdb.h>
69
70 #include <mach/mach_types.h>
71 #include <mach/vm_param.h>
72 #include <mach/kern_return.h>
73 #include <mach/mach_host_server.h>
74 #include <mach/machine/vm_types.h>
75 #include <mach_debug/zone_info.h>
76
77 #include <kern/kern_types.h>
78 #include <kern/assert.h>
79 #include <kern/host.h>
80 #include <kern/macro_help.h>
81 #include <kern/sched.h>
82 #include <kern/lock.h>
83 #include <kern/sched_prim.h>
84 #include <kern/misc_protos.h>
85 #include <kern/thread_call.h>
86 #include <kern/zalloc.h>
87 #include <kern/kalloc.h>
88
89 #include <vm/pmap.h>
90 #include <vm/vm_map.h>
91 #include <vm/vm_kern.h>
92 #include <vm/vm_page.h>
93
94 #include <machine/machparam.h>
95
96 #include <libkern/OSDebug.h>
97 #include <sys/kdebug.h>
98
99 #if defined(__ppc__)
100 /* for fake zone stat routines */
101 #include <ppc/savearea.h>
102 #include <ppc/mappings.h>
103 #endif
104
105 int check_freed_element = 0;
106
107 #if MACH_ASSERT
108 /* Detect use of zone elt after freeing it by two methods:
109 * (1) Range-check the free-list "next" ptr for sanity.
110 * (2) Store the ptr in two different words, and compare them against
111 * each other when re-using the zone elt, to detect modifications;
112 */
113
114 #if defined(__alpha)
115
116 #define is_kernel_data_addr(a) \
117 (!(a) || (IS_SYS_VA(a) && !((a) & (sizeof(long)-1))))
118
119 #else /* !defined(__alpha) */
120
121 #define is_kernel_data_addr(a) \
122 (!(a) || ((a) >= vm_min_kernel_address && !((a) & 0x3)))
123
124 #endif /* defined(__alpha) */
125
126 /* Should we set all words of the zone element to an illegal address
127 * when it is freed, to help catch usage after freeing? The down-side
128 * is that this obscures the identity of the freed element.
129 */
130 boolean_t zfree_clear = FALSE;
131
132 #define ADD_TO_ZONE(zone, element) \
133 MACRO_BEGIN \
134 if (zfree_clear) \
135 { unsigned int i; \
136 for (i=1; \
137 i < zone->elem_size/sizeof(vm_offset_t) - 1; \
138 i++) \
139 ((vm_offset_t *)(element))[i] = 0xdeadbeef; \
140 } \
141 ((vm_offset_t *)(element))[0] = (zone)->free_elements; \
142 (zone)->free_elements = (vm_offset_t) (element); \
143 (zone)->count--; \
144 MACRO_END
145
146 #define REMOVE_FROM_ZONE(zone, ret, type) \
147 MACRO_BEGIN \
148 (ret) = (type) (zone)->free_elements; \
149 if ((ret) != (type) 0) { \
150 if (!is_kernel_data_addr(((vm_offset_t *)(ret))[0])) { \
151 panic("A freed zone element has been modified.\n"); \
152 } \
153 (zone)->count++; \
154 (zone)->free_elements = *((vm_offset_t *)(ret)); \
155 } \
156 MACRO_END
157 #else /* MACH_ASSERT */
158
159 #define ADD_TO_ZONE(zone, element) \
160 MACRO_BEGIN \
161 *((vm_offset_t *)(element)) = (zone)->free_elements; \
162 if (check_freed_element) { \
163 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
164 ((vm_offset_t *)(element))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
165 (zone)->free_elements; \
166 } \
167 (zone)->free_elements = (vm_offset_t) (element); \
168 (zone)->count--; \
169 MACRO_END
170
171 #define REMOVE_FROM_ZONE(zone, ret, type) \
172 MACRO_BEGIN \
173 (ret) = (type) (zone)->free_elements; \
174 if ((ret) != (type) 0) { \
175 if (check_freed_element) { \
176 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t)) && \
177 ((vm_offset_t *)(ret))[((zone)->elem_size/sizeof(vm_offset_t))-1] != \
178 ((vm_offset_t *)(ret))[0]) \
179 panic("a freed zone element has been modified");\
180 } \
181 (zone)->count++; \
182 (zone)->free_elements = *((vm_offset_t *)(ret)); \
183 } \
184 MACRO_END
185
186 #endif /* MACH_ASSERT */
187
188 #if ZONE_DEBUG
189 #define zone_debug_enabled(z) z->active_zones.next
190 #define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
191 #define ZONE_DEBUG_OFFSET ROUNDUP(sizeof(queue_chain_t),16)
192 #endif /* ZONE_DEBUG */
193
194 /*
195 * Support for garbage collection of unused zone pages:
196 */
197
198 struct zone_page_table_entry {
199 struct zone_page_table_entry *link;
200 short alloc_count;
201 short collect_count;
202 };
203
204 /* Forwards */
205 void zone_page_init(
206 vm_offset_t addr,
207 vm_size_t size,
208 int value);
209
210 void zone_page_alloc(
211 vm_offset_t addr,
212 vm_size_t size);
213
214 void zone_page_free_element(
215 struct zone_page_table_entry **free_pages,
216 vm_offset_t addr,
217 vm_size_t size);
218
219 void zone_page_collect(
220 vm_offset_t addr,
221 vm_size_t size);
222
223 boolean_t zone_page_collectable(
224 vm_offset_t addr,
225 vm_size_t size);
226
227 void zone_page_keep(
228 vm_offset_t addr,
229 vm_size_t size);
230
231 void zalloc_async(
232 thread_call_param_t p0,
233 thread_call_param_t p1);
234
235
236 #if ZONE_DEBUG && MACH_KDB
237 int zone_count(
238 zone_t z,
239 int tail);
240 #endif /* ZONE_DEBUG && MACH_KDB */
241
242 vm_map_t zone_map = VM_MAP_NULL;
243
244 zone_t zone_zone = ZONE_NULL; /* the zone containing other zones */
245
246 /*
247 * The VM system gives us an initial chunk of memory.
248 * It has to be big enough to allocate the zone_zone
249 */
250
251 vm_offset_t zdata;
252 vm_size_t zdata_size;
253
254 #define lock_zone(zone) \
255 MACRO_BEGIN \
256 lck_mtx_lock(&(zone)->lock); \
257 MACRO_END
258
259 #define unlock_zone(zone) \
260 MACRO_BEGIN \
261 lck_mtx_unlock(&(zone)->lock); \
262 MACRO_END
263
264 #define zone_wakeup(zone) thread_wakeup((event_t)(zone))
265 #define zone_sleep(zone) \
266 (void) lck_mtx_sleep(&(zone)->lock, 0, (event_t)(zone), THREAD_UNINT);
267
268
269 #define lock_zone_init(zone) \
270 MACRO_BEGIN \
271 char _name[32]; \
272 (void) snprintf(_name, sizeof (_name), "zone.%s", (zone)->zone_name); \
273 lck_grp_attr_setdefault(&(zone)->lock_grp_attr); \
274 lck_grp_init(&(zone)->lock_grp, _name, &(zone)->lock_grp_attr); \
275 lck_attr_setdefault(&(zone)->lock_attr); \
276 lck_mtx_init_ext(&(zone)->lock, &(zone)->lock_ext, \
277 &(zone)->lock_grp, &(zone)->lock_attr); \
278 MACRO_END
279
280 #define lock_try_zone(zone) lck_mtx_try_lock(&zone->lock)
281
282 kern_return_t zget_space(
283 vm_offset_t size,
284 vm_offset_t *result);
285
286 decl_simple_lock_data(,zget_space_lock)
287 vm_offset_t zalloc_next_space;
288 vm_offset_t zalloc_end_of_space;
289 vm_size_t zalloc_wasted_space;
290
291 /*
292 * Garbage collection map information
293 */
294 struct zone_page_table_entry * zone_page_table;
295 vm_offset_t zone_map_min_address;
296 vm_offset_t zone_map_max_address;
297 unsigned int zone_pages;
298
299 /*
300 * Exclude more than one concurrent garbage collection
301 */
302 decl_mutex_data(, zone_gc_lock)
303
304 #if !ZONE_ALIAS_ADDR
305 #define from_zone_map(addr, size) \
306 ((vm_offset_t)(addr) >= zone_map_min_address && \
307 ((vm_offset_t)(addr) + size -1) < zone_map_max_address)
308 #else
309 #define from_zone_map(addr, size) \
310 ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) >= zone_map_min_address && \
311 ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) + size -1) < zone_map_max_address)
312 #endif
313
314 #define ZONE_PAGE_USED 0
315 #define ZONE_PAGE_UNUSED -1
316
317
318 /*
319 * Protects first_zone, last_zone, num_zones,
320 * and the next_zone field of zones.
321 */
322 decl_simple_lock_data(, all_zones_lock)
323 zone_t first_zone;
324 zone_t *last_zone;
325 unsigned int num_zones;
326
327 boolean_t zone_gc_allowed = TRUE;
328 boolean_t zone_gc_forced = FALSE;
329 unsigned zone_gc_last_tick = 0;
330 unsigned zone_gc_max_rate = 0; /* in ticks */
331
332
333 /*
334 * zinit initializes a new zone. The zone data structures themselves
335 * are stored in a zone, which is initially a static structure that
336 * is initialized by zone_init.
337 */
338 zone_t
339 zinit(
340 vm_size_t size, /* the size of an element */
341 vm_size_t max, /* maximum memory to use */
342 vm_size_t alloc, /* allocation size */
343 const char *name) /* a name for the zone */
344 {
345 zone_t z;
346
347 if (zone_zone == ZONE_NULL) {
348 if (zget_space(sizeof(struct zone), (vm_offset_t *)&z)
349 != KERN_SUCCESS)
350 return(ZONE_NULL);
351 } else
352 z = (zone_t) zalloc(zone_zone);
353 if (z == ZONE_NULL)
354 return(ZONE_NULL);
355
356 /*
357 * Round off all the parameters appropriately.
358 */
359 if (size < sizeof(z->free_elements))
360 size = sizeof(z->free_elements);
361 size = ((size-1) + sizeof(z->free_elements)) -
362 ((size-1) % sizeof(z->free_elements));
363 if (alloc == 0)
364 alloc = PAGE_SIZE;
365 alloc = round_page(alloc);
366 max = round_page(max);
367 /*
368 * we look for an allocation size with less than 1% waste
369 * up to 5 pages in size...
370 * otherwise, we look for an allocation size with least fragmentation
371 * in the range of 1 - 5 pages
372 * This size will be used unless
373 * the user suggestion is larger AND has less fragmentation
374 */
375 #if ZONE_ALIAS_ADDR
376 if ((size < PAGE_SIZE) && (PAGE_SIZE % size <= PAGE_SIZE / 10))
377 alloc = PAGE_SIZE;
378 else
379 #endif
380 { vm_size_t best, waste; unsigned int i;
381 best = PAGE_SIZE;
382 waste = best % size;
383
384 for (i = 1; i <= 5; i++) {
385 vm_size_t tsize, twaste;
386
387 tsize = i * PAGE_SIZE;
388
389 if ((tsize % size) < (tsize / 100)) {
390 alloc = tsize;
391 goto use_this_allocation;
392 }
393 twaste = tsize % size;
394 if (twaste < waste)
395 best = tsize, waste = twaste;
396 }
397 if (alloc <= best || (alloc % size >= waste))
398 alloc = best;
399 }
400 use_this_allocation:
401 if (max && (max < alloc))
402 max = alloc;
403
404 z->free_elements = 0;
405 z->cur_size = 0;
406 z->max_size = max;
407 z->elem_size = size;
408 z->alloc_size = alloc;
409 z->zone_name = name;
410 z->count = 0;
411 z->doing_alloc = FALSE;
412 z->doing_gc = FALSE;
413 z->exhaustible = FALSE;
414 z->collectable = TRUE;
415 z->allows_foreign = FALSE;
416 z->expandable = TRUE;
417 z->waiting = FALSE;
418 z->async_pending = FALSE;
419
420 #if ZONE_DEBUG
421 z->active_zones.next = z->active_zones.prev = NULL;
422 zone_debug_enable(z);
423 #endif /* ZONE_DEBUG */
424 lock_zone_init(z);
425
426 /*
427 * Add the zone to the all-zones list.
428 */
429
430 z->next_zone = ZONE_NULL;
431 thread_call_setup(&z->call_async_alloc, zalloc_async, z);
432 simple_lock(&all_zones_lock);
433 *last_zone = z;
434 last_zone = &z->next_zone;
435 num_zones++;
436 simple_unlock(&all_zones_lock);
437
438 return(z);
439 }
440
441 /*
442 * Cram the given memory into the specified zone.
443 */
444 void
445 zcram(
446 register zone_t zone,
447 void *newaddr,
448 vm_size_t size)
449 {
450 register vm_size_t elem_size;
451 vm_offset_t newmem = (vm_offset_t) newaddr;
452
453 /* Basic sanity checks */
454 assert(zone != ZONE_NULL && newmem != (vm_offset_t)0);
455 assert(!zone->collectable || zone->allows_foreign
456 || (from_zone_map(newmem, size)));
457
458 elem_size = zone->elem_size;
459
460 lock_zone(zone);
461 while (size >= elem_size) {
462 ADD_TO_ZONE(zone, newmem);
463 if (from_zone_map(newmem, elem_size))
464 zone_page_alloc(newmem, elem_size);
465 zone->count++; /* compensate for ADD_TO_ZONE */
466 size -= elem_size;
467 newmem += elem_size;
468 zone->cur_size += elem_size;
469 }
470 unlock_zone(zone);
471 }
472
473 /*
474 * Contiguous space allocator for non-paged zones. Allocates "size" amount
475 * of memory from zone_map.
476 */
477
478 kern_return_t
479 zget_space(
480 vm_offset_t size,
481 vm_offset_t *result)
482 {
483 vm_offset_t new_space = 0;
484 vm_size_t space_to_add = 0;
485
486 simple_lock(&zget_space_lock);
487 while ((zalloc_next_space + size) > zalloc_end_of_space) {
488 /*
489 * Add at least one page to allocation area.
490 */
491
492 space_to_add = round_page(size);
493
494 if (new_space == 0) {
495 kern_return_t retval;
496 /*
497 * Memory cannot be wired down while holding
498 * any locks that the pageout daemon might
499 * need to free up pages. [Making the zget_space
500 * lock a complex lock does not help in this
501 * regard.]
502 *
503 * Unlock and allocate memory. Because several
504 * threads might try to do this at once, don't
505 * use the memory before checking for available
506 * space again.
507 */
508
509 simple_unlock(&zget_space_lock);
510
511 retval = kernel_memory_allocate(zone_map, &new_space,
512 space_to_add, 0, KMA_KOBJECT|KMA_NOPAGEWAIT);
513 if (retval != KERN_SUCCESS)
514 return(retval);
515 #if ZONE_ALIAS_ADDR
516 if (space_to_add == PAGE_SIZE)
517 new_space = zone_alias_addr(new_space);
518 #endif
519 zone_page_init(new_space, space_to_add,
520 ZONE_PAGE_USED);
521 simple_lock(&zget_space_lock);
522 continue;
523 }
524
525
526 /*
527 * Memory was allocated in a previous iteration.
528 *
529 * Check whether the new region is contiguous
530 * with the old one.
531 */
532
533 if (new_space != zalloc_end_of_space) {
534 /*
535 * Throw away the remainder of the
536 * old space, and start a new one.
537 */
538 zalloc_wasted_space +=
539 zalloc_end_of_space - zalloc_next_space;
540 zalloc_next_space = new_space;
541 }
542
543 zalloc_end_of_space = new_space + space_to_add;
544
545 new_space = 0;
546 }
547 *result = zalloc_next_space;
548 zalloc_next_space += size;
549 simple_unlock(&zget_space_lock);
550
551 if (new_space != 0)
552 kmem_free(zone_map, new_space, space_to_add);
553
554 return(KERN_SUCCESS);
555 }
556
557
558 /*
559 * Steal memory for the zone package. Called from
560 * vm_page_bootstrap().
561 */
562 void
563 zone_steal_memory(void)
564 {
565 zdata_size = round_page(128*sizeof(struct zone));
566 zdata = (vm_offset_t)((char *)pmap_steal_memory(zdata_size) - (char *)0);
567 }
568
569
570 /*
571 * Fill a zone with enough memory to contain at least nelem elements.
572 * Memory is obtained with kmem_alloc_wired from the kernel_map.
573 * Return the number of elements actually put into the zone, which may
574 * be more than the caller asked for since the memory allocation is
575 * rounded up to a full page.
576 */
577 int
578 zfill(
579 zone_t zone,
580 int nelem)
581 {
582 kern_return_t kr;
583 vm_size_t size;
584 vm_offset_t memory;
585 int nalloc;
586
587 assert(nelem > 0);
588 if (nelem <= 0)
589 return 0;
590 size = nelem * zone->elem_size;
591 size = round_page(size);
592 kr = kmem_alloc_wired(kernel_map, &memory, size);
593 if (kr != KERN_SUCCESS)
594 return 0;
595
596 zone_change(zone, Z_FOREIGN, TRUE);
597 zcram(zone, (void *)memory, size);
598 nalloc = size / zone->elem_size;
599 assert(nalloc >= nelem);
600
601 return nalloc;
602 }
603
604 /*
605 * Initialize the "zone of zones" which uses fixed memory allocated
606 * earlier in memory initialization. zone_bootstrap is called
607 * before zone_init.
608 */
609 void
610 zone_bootstrap(void)
611 {
612 vm_size_t zone_zone_size;
613 vm_offset_t zone_zone_space;
614 char temp_buf[16];
615
616 /* see if we want freed zone element checking */
617 if (PE_parse_boot_argn("-zc", temp_buf, sizeof (temp_buf))) {
618 check_freed_element = 1;
619 }
620
621 simple_lock_init(&all_zones_lock, 0);
622
623 first_zone = ZONE_NULL;
624 last_zone = &first_zone;
625 num_zones = 0;
626
627 simple_lock_init(&zget_space_lock, 0);
628 zalloc_next_space = zdata;
629 zalloc_end_of_space = zdata + zdata_size;
630 zalloc_wasted_space = 0;
631
632 /* assertion: nobody else called zinit before us */
633 assert(zone_zone == ZONE_NULL);
634 zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone),
635 sizeof(struct zone), "zones");
636 zone_change(zone_zone, Z_COLLECT, FALSE);
637 zone_zone_size = zalloc_end_of_space - zalloc_next_space;
638 zget_space(zone_zone_size, &zone_zone_space);
639 zcram(zone_zone, (void *)zone_zone_space, zone_zone_size);
640 }
641
642 void
643 zone_init(
644 vm_size_t max_zonemap_size)
645 {
646 kern_return_t retval;
647 vm_offset_t zone_min;
648 vm_offset_t zone_max;
649 vm_size_t zone_table_size;
650
651 retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
652 FALSE, VM_FLAGS_ANYWHERE, &zone_map);
653
654 if (retval != KERN_SUCCESS)
655 panic("zone_init: kmem_suballoc failed");
656 zone_max = zone_min + round_page(max_zonemap_size);
657 /*
658 * Setup garbage collection information:
659 */
660 zone_table_size = atop_32(zone_max - zone_min) *
661 sizeof(struct zone_page_table_entry);
662 if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table,
663 zone_table_size) != KERN_SUCCESS)
664 panic("zone_init");
665 zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size);
666 zone_pages = atop_32(zone_max - zone_min);
667 zone_map_min_address = zone_min;
668 zone_map_max_address = zone_max;
669 mutex_init(&zone_gc_lock, 0);
670 zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED);
671 }
672
673
674 /*
675 * zalloc returns an element from the specified zone.
676 */
677 void *
678 zalloc_canblock(
679 register zone_t zone,
680 boolean_t canblock)
681 {
682 vm_offset_t addr;
683 kern_return_t retval;
684
685 assert(zone != ZONE_NULL);
686
687 lock_zone(zone);
688
689 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
690
691 while ((addr == 0) && canblock && (zone->doing_gc)) {
692 zone->waiting = TRUE;
693 zone_sleep(zone);
694 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
695 }
696
697 while ((addr == 0) && canblock) {
698 /*
699 * If nothing was there, try to get more
700 */
701 if (zone->doing_alloc) {
702 /*
703 * Someone is allocating memory for this zone.
704 * Wait for it to show up, then try again.
705 */
706 zone->waiting = TRUE;
707 zone_sleep(zone);
708 }
709 else {
710 if ((zone->cur_size + zone->elem_size) >
711 zone->max_size) {
712 if (zone->exhaustible)
713 break;
714 if (zone->expandable) {
715 /*
716 * We're willing to overflow certain
717 * zones, but not without complaining.
718 *
719 * This is best used in conjunction
720 * with the collectable flag. What we
721 * want is an assurance we can get the
722 * memory back, assuming there's no
723 * leak.
724 */
725 zone->max_size += (zone->max_size >> 1);
726 } else {
727 unlock_zone(zone);
728
729 panic("zalloc: zone \"%s\" empty.", zone->zone_name);
730 }
731 }
732 zone->doing_alloc = TRUE;
733 unlock_zone(zone);
734
735 if (zone->collectable) {
736 vm_offset_t space;
737 vm_size_t alloc_size;
738 int retry = 0;
739
740 for (;;) {
741
742 if (vm_pool_low() || retry >= 1)
743 alloc_size =
744 round_page(zone->elem_size);
745 else
746 alloc_size = zone->alloc_size;
747
748 retval = kernel_memory_allocate(zone_map,
749 &space, alloc_size, 0,
750 KMA_KOBJECT|KMA_NOPAGEWAIT);
751 if (retval == KERN_SUCCESS) {
752 #if ZONE_ALIAS_ADDR
753 if (alloc_size == PAGE_SIZE)
754 space = zone_alias_addr(space);
755 #endif
756 zone_page_init(space, alloc_size,
757 ZONE_PAGE_USED);
758 zcram(zone, (void *)space, alloc_size);
759
760 break;
761 } else if (retval != KERN_RESOURCE_SHORTAGE) {
762 retry++;
763
764 if (retry == 2) {
765 zone_gc();
766 printf("zalloc did gc\n");
767 }
768 if (retry == 3)
769 panic("zalloc: \"%s\" (%d elements) retry fail %d", zone->zone_name, zone->count, retval);
770 } else {
771 break;
772 }
773 }
774 lock_zone(zone);
775 zone->doing_alloc = FALSE;
776 if (zone->waiting) {
777 zone->waiting = FALSE;
778 zone_wakeup(zone);
779 }
780 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
781 if (addr == 0 &&
782 retval == KERN_RESOURCE_SHORTAGE) {
783 unlock_zone(zone);
784
785 VM_PAGE_WAIT();
786 lock_zone(zone);
787 }
788 } else {
789 vm_offset_t space;
790 retval = zget_space(zone->elem_size, &space);
791
792 lock_zone(zone);
793 zone->doing_alloc = FALSE;
794 if (zone->waiting) {
795 zone->waiting = FALSE;
796 thread_wakeup((event_t)zone);
797 }
798 if (retval == KERN_SUCCESS) {
799 zone->count++;
800 zone->cur_size += zone->elem_size;
801 #if ZONE_DEBUG
802 if (zone_debug_enabled(zone)) {
803 enqueue_tail(&zone->active_zones, (queue_entry_t)space);
804 }
805 #endif
806 unlock_zone(zone);
807 zone_page_alloc(space, zone->elem_size);
808 #if ZONE_DEBUG
809 if (zone_debug_enabled(zone))
810 space += ZONE_DEBUG_OFFSET;
811 #endif
812 addr = space;
813 goto success;
814 }
815 if (retval == KERN_RESOURCE_SHORTAGE) {
816 unlock_zone(zone);
817
818 VM_PAGE_WAIT();
819 lock_zone(zone);
820 } else {
821 panic("zalloc: \"%s\" (%d elements) zget_space returned %d", zone->zone_name, zone->count, retval);
822 }
823 }
824 }
825 if (addr == 0)
826 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
827 }
828
829 if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) {
830 zone->async_pending = TRUE;
831 unlock_zone(zone);
832 thread_call_enter(&zone->call_async_alloc);
833 lock_zone(zone);
834 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
835 }
836
837 #if ZONE_DEBUG
838 if (addr && zone_debug_enabled(zone)) {
839 enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
840 addr += ZONE_DEBUG_OFFSET;
841 }
842 #endif
843
844 unlock_zone(zone);
845
846 success:
847 TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr);
848
849 return((void *)addr);
850 }
851
852
853 void *
854 zalloc(
855 register zone_t zone)
856 {
857 return( zalloc_canblock(zone, TRUE) );
858 }
859
860 void *
861 zalloc_noblock(
862 register zone_t zone)
863 {
864 return( zalloc_canblock(zone, FALSE) );
865 }
866
867 void
868 zalloc_async(
869 thread_call_param_t p0,
870 __unused thread_call_param_t p1)
871 {
872 void *elt;
873
874 elt = zalloc_canblock((zone_t)p0, TRUE);
875 zfree((zone_t)p0, elt);
876 lock_zone(((zone_t)p0));
877 ((zone_t)p0)->async_pending = FALSE;
878 unlock_zone(((zone_t)p0));
879 }
880
881
882 /*
883 * zget returns an element from the specified zone
884 * and immediately returns nothing if there is nothing there.
885 *
886 * This form should be used when you can not block (like when
887 * processing an interrupt).
888 */
889 void *
890 zget(
891 register zone_t zone)
892 {
893 register vm_offset_t addr;
894
895 assert( zone != ZONE_NULL );
896
897 if (!lock_try_zone(zone))
898 return NULL;
899
900 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
901 #if ZONE_DEBUG
902 if (addr && zone_debug_enabled(zone)) {
903 enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
904 addr += ZONE_DEBUG_OFFSET;
905 }
906 #endif /* ZONE_DEBUG */
907 unlock_zone(zone);
908
909 return((void *) addr);
910 }
911
912 /* Keep this FALSE by default. Large memory machine run orders of magnitude
913 slower in debug mode when true. Use debugger to enable if needed */
914 /* static */ boolean_t zone_check = FALSE;
915
916 static zone_t zone_last_bogus_zone = ZONE_NULL;
917 static vm_offset_t zone_last_bogus_elem = 0;
918
919 void
920 zfree(
921 register zone_t zone,
922 void *addr)
923 {
924 vm_offset_t elem = (vm_offset_t) addr;
925
926 #if MACH_ASSERT
927 /* Basic sanity checks */
928 if (zone == ZONE_NULL || elem == (vm_offset_t)0)
929 panic("zfree: NULL");
930 /* zone_gc assumes zones are never freed */
931 if (zone == zone_zone)
932 panic("zfree: freeing to zone_zone breaks zone_gc!");
933 #endif
934
935 TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, zone->elem_size, (int)addr);
936
937 if (zone->collectable && !zone->allows_foreign &&
938 !from_zone_map(elem, zone->elem_size)) {
939 #if MACH_ASSERT
940 panic("zfree: non-allocated memory in collectable zone!");
941 #endif
942 zone_last_bogus_zone = zone;
943 zone_last_bogus_elem = elem;
944 return;
945 }
946
947 lock_zone(zone);
948 #if ZONE_DEBUG
949 if (zone_debug_enabled(zone)) {
950 queue_t tmp_elem;
951
952 elem -= ZONE_DEBUG_OFFSET;
953 if (zone_check) {
954 /* check the zone's consistency */
955
956 for (tmp_elem = queue_first(&zone->active_zones);
957 !queue_end(tmp_elem, &zone->active_zones);
958 tmp_elem = queue_next(tmp_elem))
959 if (elem == (vm_offset_t)tmp_elem)
960 break;
961 if (elem != (vm_offset_t)tmp_elem)
962 panic("zfree()ing element from wrong zone");
963 }
964 remqueue(&zone->active_zones, (queue_t) elem);
965 }
966 #endif /* ZONE_DEBUG */
967 if (zone_check) {
968 vm_offset_t this;
969
970 /* check the zone's consistency */
971
972 for (this = zone->free_elements;
973 this != 0;
974 this = * (vm_offset_t *) this)
975 if (!pmap_kernel_va(this) || this == elem)
976 panic("zfree");
977 }
978 ADD_TO_ZONE(zone, elem);
979
980 /*
981 * If elements have one or more pages, and memory is low,
982 * request to run the garbage collection in the zone the next
983 * time the pageout thread runs.
984 */
985 if (zone->elem_size >= PAGE_SIZE &&
986 vm_pool_low()){
987 zone_gc_forced = TRUE;
988 }
989 unlock_zone(zone);
990 }
991
992
993 /* Change a zone's flags.
994 * This routine must be called immediately after zinit.
995 */
996 void
997 zone_change(
998 zone_t zone,
999 unsigned int item,
1000 boolean_t value)
1001 {
1002 assert( zone != ZONE_NULL );
1003 assert( value == TRUE || value == FALSE );
1004
1005 switch(item){
1006 case Z_EXHAUST:
1007 zone->exhaustible = value;
1008 break;
1009 case Z_COLLECT:
1010 zone->collectable = value;
1011 break;
1012 case Z_EXPAND:
1013 zone->expandable = value;
1014 break;
1015 case Z_FOREIGN:
1016 zone->allows_foreign = value;
1017 break;
1018 #if MACH_ASSERT
1019 default:
1020 panic("Zone_change: Wrong Item Type!");
1021 /* break; */
1022 #endif
1023 }
1024 }
1025
1026 /*
1027 * Return the expected number of free elements in the zone.
1028 * This calculation will be incorrect if items are zfree'd that
1029 * were never zalloc'd/zget'd. The correct way to stuff memory
1030 * into a zone is by zcram.
1031 */
1032
1033 integer_t
1034 zone_free_count(zone_t zone)
1035 {
1036 integer_t free_count;
1037
1038 lock_zone(zone);
1039 free_count = zone->cur_size/zone->elem_size - zone->count;
1040 unlock_zone(zone);
1041
1042 assert(free_count >= 0);
1043
1044 return(free_count);
1045 }
1046
1047 /*
1048 * zprealloc preallocates wired memory, exanding the specified
1049 * zone to the specified size
1050 */
1051 void
1052 zprealloc(
1053 zone_t zone,
1054 vm_size_t size)
1055 {
1056 vm_offset_t addr;
1057
1058 if (size != 0) {
1059 if (kmem_alloc_wired(zone_map, &addr, size) != KERN_SUCCESS)
1060 panic("zprealloc");
1061 zone_page_init(addr, size, ZONE_PAGE_USED);
1062 zcram(zone, (void *)addr, size);
1063 }
1064 }
1065
1066 /*
1067 * Zone garbage collection subroutines
1068 */
1069
1070 boolean_t
1071 zone_page_collectable(
1072 vm_offset_t addr,
1073 vm_size_t size)
1074 {
1075 struct zone_page_table_entry *zp;
1076 natural_t i, j;
1077
1078 #if ZONE_ALIAS_ADDR
1079 addr = zone_virtual_addr(addr);
1080 #endif
1081 #if MACH_ASSERT
1082 if (!from_zone_map(addr, size))
1083 panic("zone_page_collectable");
1084 #endif
1085
1086 i = atop_32(addr-zone_map_min_address);
1087 j = atop_32((addr+size-1) - zone_map_min_address);
1088
1089 for (zp = zone_page_table + i; i <= j; zp++, i++)
1090 if (zp->collect_count == zp->alloc_count)
1091 return (TRUE);
1092
1093 return (FALSE);
1094 }
1095
1096 void
1097 zone_page_keep(
1098 vm_offset_t addr,
1099 vm_size_t size)
1100 {
1101 struct zone_page_table_entry *zp;
1102 natural_t i, j;
1103
1104 #if ZONE_ALIAS_ADDR
1105 addr = zone_virtual_addr(addr);
1106 #endif
1107 #if MACH_ASSERT
1108 if (!from_zone_map(addr, size))
1109 panic("zone_page_keep");
1110 #endif
1111
1112 i = atop_32(addr-zone_map_min_address);
1113 j = atop_32((addr+size-1) - zone_map_min_address);
1114
1115 for (zp = zone_page_table + i; i <= j; zp++, i++)
1116 zp->collect_count = 0;
1117 }
1118
1119 void
1120 zone_page_collect(
1121 vm_offset_t addr,
1122 vm_size_t size)
1123 {
1124 struct zone_page_table_entry *zp;
1125 natural_t i, j;
1126
1127 #if ZONE_ALIAS_ADDR
1128 addr = zone_virtual_addr(addr);
1129 #endif
1130 #if MACH_ASSERT
1131 if (!from_zone_map(addr, size))
1132 panic("zone_page_collect");
1133 #endif
1134
1135 i = atop_32(addr-zone_map_min_address);
1136 j = atop_32((addr+size-1) - zone_map_min_address);
1137
1138 for (zp = zone_page_table + i; i <= j; zp++, i++)
1139 ++zp->collect_count;
1140 }
1141
1142 void
1143 zone_page_init(
1144 vm_offset_t addr,
1145 vm_size_t size,
1146 int value)
1147 {
1148 struct zone_page_table_entry *zp;
1149 natural_t i, j;
1150
1151 #if ZONE_ALIAS_ADDR
1152 addr = zone_virtual_addr(addr);
1153 #endif
1154 #if MACH_ASSERT
1155 if (!from_zone_map(addr, size))
1156 panic("zone_page_init");
1157 #endif
1158
1159 i = atop_32(addr-zone_map_min_address);
1160 j = atop_32((addr+size-1) - zone_map_min_address);
1161
1162 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1163 zp->alloc_count = value;
1164 zp->collect_count = 0;
1165 }
1166 }
1167
1168 void
1169 zone_page_alloc(
1170 vm_offset_t addr,
1171 vm_size_t size)
1172 {
1173 struct zone_page_table_entry *zp;
1174 natural_t i, j;
1175
1176 #if ZONE_ALIAS_ADDR
1177 addr = zone_virtual_addr(addr);
1178 #endif
1179 #if MACH_ASSERT
1180 if (!from_zone_map(addr, size))
1181 panic("zone_page_alloc");
1182 #endif
1183
1184 i = atop_32(addr-zone_map_min_address);
1185 j = atop_32((addr+size-1) - zone_map_min_address);
1186
1187 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1188 /*
1189 * Set alloc_count to (ZONE_PAGE_USED + 1) if
1190 * it was previously set to ZONE_PAGE_UNUSED.
1191 */
1192 if (zp->alloc_count == ZONE_PAGE_UNUSED)
1193 zp->alloc_count = 1;
1194 else
1195 ++zp->alloc_count;
1196 }
1197 }
1198
1199 void
1200 zone_page_free_element(
1201 struct zone_page_table_entry **free_pages,
1202 vm_offset_t addr,
1203 vm_size_t size)
1204 {
1205 struct zone_page_table_entry *zp;
1206 natural_t i, j;
1207
1208 #if ZONE_ALIAS_ADDR
1209 addr = zone_virtual_addr(addr);
1210 #endif
1211 #if MACH_ASSERT
1212 if (!from_zone_map(addr, size))
1213 panic("zone_page_free_element");
1214 #endif
1215
1216 i = atop_32(addr-zone_map_min_address);
1217 j = atop_32((addr+size-1) - zone_map_min_address);
1218
1219 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1220 if (zp->collect_count > 0)
1221 --zp->collect_count;
1222 if (--zp->alloc_count == 0) {
1223 zp->alloc_count = ZONE_PAGE_UNUSED;
1224 zp->collect_count = 0;
1225
1226 zp->link = *free_pages;
1227 *free_pages = zp;
1228 }
1229 }
1230 }
1231
1232
1233 /* This is used for walking through a zone's free element list.
1234 */
1235 struct zone_free_element {
1236 struct zone_free_element * next;
1237 };
1238
1239 /*
1240 * Add a linked list of pages starting at base back into the zone
1241 * free list. Tail points to the last element on the list.
1242 */
1243
1244 #define ADD_LIST_TO_ZONE(zone, base, tail) \
1245 MACRO_BEGIN \
1246 (tail)->next = (void *)((zone)->free_elements); \
1247 if (check_freed_element) { \
1248 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
1249 ((vm_offset_t *)(tail))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
1250 (zone)->free_elements; \
1251 } \
1252 (zone)->free_elements = (unsigned long)(base); \
1253 MACRO_END
1254
1255 /*
1256 * Add an element to the chain pointed to by prev.
1257 */
1258
1259 #define ADD_ELEMENT(zone, prev, elem) \
1260 MACRO_BEGIN \
1261 (prev)->next = (elem); \
1262 if (check_freed_element) { \
1263 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
1264 ((vm_offset_t *)(prev))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
1265 (vm_offset_t)(elem); \
1266 } \
1267 MACRO_END
1268
1269 struct {
1270 uint32_t pgs_freed;
1271
1272 uint32_t elems_collected,
1273 elems_freed,
1274 elems_kept;
1275 } zgc_stats;
1276
1277 /* Zone garbage collection
1278 *
1279 * zone_gc will walk through all the free elements in all the
1280 * zones that are marked collectable looking for reclaimable
1281 * pages. zone_gc is called by consider_zone_gc when the system
1282 * begins to run out of memory.
1283 */
1284 void
1285 zone_gc(void)
1286 {
1287 unsigned int max_zones;
1288 zone_t z;
1289 unsigned int i;
1290 struct zone_page_table_entry *zp, *zone_free_pages;
1291
1292 mutex_lock(&zone_gc_lock);
1293
1294 simple_lock(&all_zones_lock);
1295 max_zones = num_zones;
1296 z = first_zone;
1297 simple_unlock(&all_zones_lock);
1298
1299 #if MACH_ASSERT
1300 for (i = 0; i < zone_pages; i++)
1301 assert(zone_page_table[i].collect_count == 0);
1302 #endif /* MACH_ASSERT */
1303
1304 zone_free_pages = NULL;
1305
1306 for (i = 0; i < max_zones; i++, z = z->next_zone) {
1307 unsigned int n, m;
1308 vm_size_t elt_size, size_freed;
1309 struct zone_free_element *elt, *base_elt, *base_prev, *prev, *scan, *keep, *tail;
1310
1311 assert(z != ZONE_NULL);
1312
1313 if (!z->collectable)
1314 continue;
1315
1316 lock_zone(z);
1317
1318 elt_size = z->elem_size;
1319
1320 /*
1321 * Do a quick feasability check before we scan the zone:
1322 * skip unless there is likelihood of getting pages back
1323 * (i.e we need a whole allocation block's worth of free
1324 * elements before we can garbage collect) and
1325 * the zone has more than 10 percent of it's elements free
1326 * or the element size is a multiple of the PAGE_SIZE
1327 */
1328 if ((elt_size & PAGE_MASK) &&
1329 (((z->cur_size - z->count * elt_size) <= (2 * z->alloc_size)) ||
1330 ((z->cur_size - z->count * elt_size) <= (z->cur_size / 10)))) {
1331 unlock_zone(z);
1332 continue;
1333 }
1334
1335 z->doing_gc = TRUE;
1336
1337 /*
1338 * Snatch all of the free elements away from the zone.
1339 */
1340
1341 scan = (void *)z->free_elements;
1342 z->free_elements = 0;
1343
1344 unlock_zone(z);
1345
1346 /*
1347 * Pass 1:
1348 *
1349 * Determine which elements we can attempt to collect
1350 * and count them up in the page table. Foreign elements
1351 * are returned to the zone.
1352 */
1353
1354 prev = (void *)&scan;
1355 elt = scan;
1356 n = 0; tail = keep = NULL;
1357 while (elt != NULL) {
1358 if (from_zone_map(elt, elt_size)) {
1359 zone_page_collect((vm_offset_t)elt, elt_size);
1360
1361 prev = elt;
1362 elt = elt->next;
1363
1364 ++zgc_stats.elems_collected;
1365 }
1366 else {
1367 if (keep == NULL)
1368 keep = tail = elt;
1369 else {
1370 ADD_ELEMENT(z, tail, elt);
1371 tail = elt;
1372 }
1373
1374 ADD_ELEMENT(z, prev, elt->next);
1375 elt = elt->next;
1376 ADD_ELEMENT(z, tail, NULL);
1377 }
1378
1379 /*
1380 * Dribble back the elements we are keeping.
1381 */
1382
1383 if (++n >= 50) {
1384 if (z->waiting == TRUE) {
1385 lock_zone(z);
1386
1387 if (keep != NULL) {
1388 ADD_LIST_TO_ZONE(z, keep, tail);
1389 tail = keep = NULL;
1390 } else {
1391 m =0;
1392 base_elt = elt;
1393 base_prev = prev;
1394 while ((elt != NULL) && (++m < 50)) {
1395 prev = elt;
1396 elt = elt->next;
1397 }
1398 if (m !=0 ) {
1399 ADD_LIST_TO_ZONE(z, base_elt, prev);
1400 ADD_ELEMENT(z, base_prev, elt);
1401 prev = base_prev;
1402 }
1403 }
1404
1405 if (z->waiting) {
1406 z->waiting = FALSE;
1407 zone_wakeup(z);
1408 }
1409
1410 unlock_zone(z);
1411 }
1412 n =0;
1413 }
1414 }
1415
1416 /*
1417 * Return any remaining elements.
1418 */
1419
1420 if (keep != NULL) {
1421 lock_zone(z);
1422
1423 ADD_LIST_TO_ZONE(z, keep, tail);
1424
1425 unlock_zone(z);
1426 }
1427
1428 /*
1429 * Pass 2:
1430 *
1431 * Determine which pages we can reclaim and
1432 * free those elements.
1433 */
1434
1435 size_freed = 0;
1436 elt = scan;
1437 n = 0; tail = keep = NULL;
1438 while (elt != NULL) {
1439 if (zone_page_collectable((vm_offset_t)elt, elt_size)) {
1440 size_freed += elt_size;
1441 zone_page_free_element(&zone_free_pages,
1442 (vm_offset_t)elt, elt_size);
1443
1444 elt = elt->next;
1445
1446 ++zgc_stats.elems_freed;
1447 }
1448 else {
1449 zone_page_keep((vm_offset_t)elt, elt_size);
1450
1451 if (keep == NULL)
1452 keep = tail = elt;
1453 else {
1454 ADD_ELEMENT(z, tail, elt);
1455 tail = elt;
1456 }
1457
1458 elt = elt->next;
1459 ADD_ELEMENT(z, tail, NULL);
1460
1461 ++zgc_stats.elems_kept;
1462 }
1463
1464 /*
1465 * Dribble back the elements we are keeping,
1466 * and update the zone size info.
1467 */
1468
1469 if (++n >= 50) {
1470 lock_zone(z);
1471
1472 z->cur_size -= size_freed;
1473 size_freed = 0;
1474
1475 if (keep != NULL) {
1476 ADD_LIST_TO_ZONE(z, keep, tail);
1477 }
1478
1479 if (z->waiting) {
1480 z->waiting = FALSE;
1481 zone_wakeup(z);
1482 }
1483
1484 unlock_zone(z);
1485
1486 n = 0; tail = keep = NULL;
1487 }
1488 }
1489
1490 /*
1491 * Return any remaining elements, and update
1492 * the zone size info.
1493 */
1494
1495 lock_zone(z);
1496
1497 if (size_freed > 0 || keep != NULL) {
1498
1499 z->cur_size -= size_freed;
1500
1501 if (keep != NULL) {
1502 ADD_LIST_TO_ZONE(z, keep, tail);
1503 }
1504
1505 }
1506
1507 z->doing_gc = FALSE;
1508 if (z->waiting) {
1509 z->waiting = FALSE;
1510 zone_wakeup(z);
1511 }
1512 unlock_zone(z);
1513 }
1514
1515 /*
1516 * Reclaim the pages we are freeing.
1517 */
1518
1519 while ((zp = zone_free_pages) != NULL) {
1520 zone_free_pages = zp->link;
1521 #if ZONE_ALIAS_ADDR
1522 z = zone_virtual_addr((vm_map_address_t)z);
1523 #endif
1524 kmem_free(zone_map, zone_map_min_address + PAGE_SIZE *
1525 (zp - zone_page_table), PAGE_SIZE);
1526 ++zgc_stats.pgs_freed;
1527 }
1528
1529 mutex_unlock(&zone_gc_lock);
1530 }
1531
1532 /*
1533 * consider_zone_gc:
1534 *
1535 * Called by the pageout daemon when the system needs more free pages.
1536 */
1537
1538 void
1539 consider_zone_gc(void)
1540 {
1541 /*
1542 * By default, don't attempt zone GC more frequently
1543 * than once / 1 minutes.
1544 */
1545
1546 if (zone_gc_max_rate == 0)
1547 zone_gc_max_rate = (60 << SCHED_TICK_SHIFT) + 1;
1548
1549 if (zone_gc_allowed &&
1550 ((sched_tick > (zone_gc_last_tick + zone_gc_max_rate)) ||
1551 zone_gc_forced)) {
1552 zone_gc_forced = FALSE;
1553 zone_gc_last_tick = sched_tick;
1554 zone_gc();
1555 }
1556 }
1557
1558 struct fake_zone_info {
1559 const char* name;
1560 void (*func)(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *,
1561 int *, int *);
1562 };
1563
1564 static struct fake_zone_info fake_zones[] = {
1565 {
1566 .name = "kernel_stacks",
1567 .func = stack_fake_zone_info,
1568 },
1569 #ifdef ppc
1570 {
1571 .name = "save_areas",
1572 .func = save_fake_zone_info,
1573 },
1574 {
1575 .name = "pmap_mappings",
1576 .func = mapping_fake_zone_info,
1577 },
1578 #endif /* ppc */
1579 #ifdef i386
1580 {
1581 .name = "page_tables",
1582 .func = pt_fake_zone_info,
1583 },
1584 #endif /* i386 */
1585 {
1586 .name = "kalloc.large",
1587 .func = kalloc_fake_zone_info,
1588 },
1589 };
1590
1591 kern_return_t
1592 host_zone_info(
1593 host_t host,
1594 zone_name_array_t *namesp,
1595 mach_msg_type_number_t *namesCntp,
1596 zone_info_array_t *infop,
1597 mach_msg_type_number_t *infoCntp)
1598 {
1599 zone_name_t *names;
1600 vm_offset_t names_addr;
1601 vm_size_t names_size;
1602 zone_info_t *info;
1603 vm_offset_t info_addr;
1604 vm_size_t info_size;
1605 unsigned int max_zones, i;
1606 zone_t z;
1607 zone_name_t *zn;
1608 zone_info_t *zi;
1609 kern_return_t kr;
1610 size_t num_fake_zones;
1611
1612 if (host == HOST_NULL)
1613 return KERN_INVALID_HOST;
1614
1615 num_fake_zones = sizeof fake_zones / sizeof fake_zones[0];
1616
1617 /*
1618 * We assume that zones aren't freed once allocated.
1619 * We won't pick up any zones that are allocated later.
1620 */
1621
1622 simple_lock(&all_zones_lock);
1623 max_zones = num_zones + num_fake_zones;
1624 z = first_zone;
1625 simple_unlock(&all_zones_lock);
1626
1627 if (max_zones <= *namesCntp) {
1628 /* use in-line memory */
1629 names_size = *namesCntp * sizeof *names;
1630 names = *namesp;
1631 } else {
1632 names_size = round_page(max_zones * sizeof *names);
1633 kr = kmem_alloc_pageable(ipc_kernel_map,
1634 &names_addr, names_size);
1635 if (kr != KERN_SUCCESS)
1636 return kr;
1637 names = (zone_name_t *) names_addr;
1638 }
1639
1640 if (max_zones <= *infoCntp) {
1641 /* use in-line memory */
1642 info_size = *infoCntp * sizeof *info;
1643 info = *infop;
1644 } else {
1645 info_size = round_page(max_zones * sizeof *info);
1646 kr = kmem_alloc_pageable(ipc_kernel_map,
1647 &info_addr, info_size);
1648 if (kr != KERN_SUCCESS) {
1649 if (names != *namesp)
1650 kmem_free(ipc_kernel_map,
1651 names_addr, names_size);
1652 return kr;
1653 }
1654
1655 info = (zone_info_t *) info_addr;
1656 }
1657 zn = &names[0];
1658 zi = &info[0];
1659
1660 for (i = 0; i < num_zones; i++) {
1661 struct zone zcopy;
1662
1663 assert(z != ZONE_NULL);
1664
1665 lock_zone(z);
1666 zcopy = *z;
1667 unlock_zone(z);
1668
1669 simple_lock(&all_zones_lock);
1670 z = z->next_zone;
1671 simple_unlock(&all_zones_lock);
1672
1673 /* assuming here the name data is static */
1674 (void) strncpy(zn->zn_name, zcopy.zone_name,
1675 sizeof zn->zn_name);
1676 zn->zn_name[sizeof zn->zn_name - 1] = '\0';
1677
1678 zi->zi_count = zcopy.count;
1679 zi->zi_cur_size = zcopy.cur_size;
1680 zi->zi_max_size = zcopy.max_size;
1681 zi->zi_elem_size = zcopy.elem_size;
1682 zi->zi_alloc_size = zcopy.alloc_size;
1683 zi->zi_exhaustible = zcopy.exhaustible;
1684 zi->zi_collectable = zcopy.collectable;
1685
1686 zn++;
1687 zi++;
1688 }
1689
1690 /*
1691 * loop through the fake zones and fill them using the specialized
1692 * functions
1693 */
1694 for (i = 0; i < num_fake_zones; i++) {
1695 strncpy(zn->zn_name, fake_zones[i].name, sizeof zn->zn_name);
1696 zn->zn_name[sizeof zn->zn_name - 1] = '\0';
1697 fake_zones[i].func(&zi->zi_count, &zi->zi_cur_size,
1698 &zi->zi_max_size, &zi->zi_elem_size,
1699 &zi->zi_alloc_size, &zi->zi_collectable,
1700 &zi->zi_exhaustible);
1701 zn++;
1702 zi++;
1703 }
1704
1705 if (names != *namesp) {
1706 vm_size_t used;
1707 vm_map_copy_t copy;
1708
1709 used = max_zones * sizeof *names;
1710
1711 if (used != names_size)
1712 bzero((char *) (names_addr + used), names_size - used);
1713
1714 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
1715 (vm_map_size_t)names_size, TRUE, &copy);
1716 assert(kr == KERN_SUCCESS);
1717
1718 *namesp = (zone_name_t *) copy;
1719 }
1720 *namesCntp = max_zones;
1721
1722 if (info != *infop) {
1723 vm_size_t used;
1724 vm_map_copy_t copy;
1725
1726 used = max_zones * sizeof *info;
1727
1728 if (used != info_size)
1729 bzero((char *) (info_addr + used), info_size - used);
1730
1731 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
1732 (vm_map_size_t)info_size, TRUE, &copy);
1733 assert(kr == KERN_SUCCESS);
1734
1735 *infop = (zone_info_t *) copy;
1736 }
1737 *infoCntp = max_zones;
1738
1739 return KERN_SUCCESS;
1740 }
1741
1742 #if MACH_KDB
1743 #include <ddb/db_command.h>
1744 #include <ddb/db_output.h>
1745 #include <kern/kern_print.h>
1746
1747 const char *zone_labels =
1748 "ENTRY COUNT TOT_SZ MAX_SZ ELT_SZ ALLOC_SZ NAME";
1749
1750 /* Forwards */
1751 void db_print_zone(
1752 zone_t addr);
1753
1754 #if ZONE_DEBUG
1755 void db_zone_check_active(
1756 zone_t zone);
1757 void db_zone_print_active(
1758 zone_t zone);
1759 #endif /* ZONE_DEBUG */
1760 void db_zone_print_free(
1761 zone_t zone);
1762 void
1763 db_print_zone(
1764 zone_t addr)
1765 {
1766 struct zone zcopy;
1767
1768 zcopy = *addr;
1769
1770 db_printf("%8x %8x %8x %8x %6x %8x %s ",
1771 addr, zcopy.count, zcopy.cur_size,
1772 zcopy.max_size, zcopy.elem_size,
1773 zcopy.alloc_size, zcopy.zone_name);
1774 if (zcopy.exhaustible)
1775 db_printf("H");
1776 if (zcopy.collectable)
1777 db_printf("C");
1778 if (zcopy.expandable)
1779 db_printf("X");
1780 db_printf("\n");
1781 }
1782
1783 /*ARGSUSED*/
1784 void
1785 db_show_one_zone(db_expr_t addr, boolean_t have_addr,
1786 __unused db_expr_t count, __unused char *modif)
1787 {
1788 struct zone *z = (zone_t)((char *)0 + addr);
1789
1790 if (z == ZONE_NULL || !have_addr){
1791 db_error("No Zone\n");
1792 /*NOTREACHED*/
1793 }
1794
1795 db_printf("%s\n", zone_labels);
1796 db_print_zone(z);
1797 }
1798
1799 /*ARGSUSED*/
1800 void
1801 db_show_all_zones(__unused db_expr_t addr, boolean_t have_addr, db_expr_t count,
1802 __unused char *modif)
1803 {
1804 zone_t z;
1805 unsigned total = 0;
1806
1807 /*
1808 * Don't risk hanging by unconditionally locking,
1809 * risk of incoherent data is small (zones aren't freed).
1810 */
1811 have_addr = simple_lock_try(&all_zones_lock);
1812 count = num_zones;
1813 z = first_zone;
1814 if (have_addr) {
1815 simple_unlock(&all_zones_lock);
1816 }
1817
1818 db_printf("%s\n", zone_labels);
1819 for ( ; count > 0; count--) {
1820 if (!z) {
1821 db_error("Mangled Zone List\n");
1822 /*NOTREACHED*/
1823 }
1824 db_print_zone(z);
1825 total += z->cur_size,
1826
1827 have_addr = simple_lock_try(&all_zones_lock);
1828 z = z->next_zone;
1829 if (have_addr) {
1830 simple_unlock(&all_zones_lock);
1831 }
1832 }
1833 db_printf("\nTotal %8x", total);
1834 db_printf("\n\nzone_gc() has reclaimed %d pages\n", zgc_stats.pgs_freed);
1835 }
1836
1837 #if ZONE_DEBUG
1838 void
1839 db_zone_check_active(
1840 zone_t zone)
1841 {
1842 int count = 0;
1843 queue_t tmp_elem;
1844
1845 if (!zone_debug_enabled(zone) || !zone_check)
1846 return;
1847 tmp_elem = queue_first(&zone->active_zones);
1848 while (count < zone->count) {
1849 count++;
1850 if (tmp_elem == 0) {
1851 printf("unexpected zero element, zone=%p, count=%d\n",
1852 zone, count);
1853 assert(FALSE);
1854 break;
1855 }
1856 if (queue_end(tmp_elem, &zone->active_zones)) {
1857 printf("unexpected queue_end, zone=%p, count=%d\n",
1858 zone, count);
1859 assert(FALSE);
1860 break;
1861 }
1862 tmp_elem = queue_next(tmp_elem);
1863 }
1864 if (!queue_end(tmp_elem, &zone->active_zones)) {
1865 printf("not at queue_end, zone=%p, tmp_elem=%p\n",
1866 zone, tmp_elem);
1867 assert(FALSE);
1868 }
1869 }
1870
1871 void
1872 db_zone_print_active(
1873 zone_t zone)
1874 {
1875 int count = 0;
1876 queue_t tmp_elem;
1877
1878 if (!zone_debug_enabled(zone)) {
1879 printf("zone %p debug not enabled\n", zone);
1880 return;
1881 }
1882 if (!zone_check) {
1883 printf("zone_check FALSE\n");
1884 return;
1885 }
1886
1887 printf("zone %p, active elements %d\n", zone, zone->count);
1888 printf("active list:\n");
1889 tmp_elem = queue_first(&zone->active_zones);
1890 while (count < zone->count) {
1891 printf(" %p", tmp_elem);
1892 count++;
1893 if ((count % 6) == 0)
1894 printf("\n");
1895 if (tmp_elem == 0) {
1896 printf("\nunexpected zero element, count=%d\n", count);
1897 break;
1898 }
1899 if (queue_end(tmp_elem, &zone->active_zones)) {
1900 printf("\nunexpected queue_end, count=%d\n", count);
1901 break;
1902 }
1903 tmp_elem = queue_next(tmp_elem);
1904 }
1905 if (!queue_end(tmp_elem, &zone->active_zones))
1906 printf("\nnot at queue_end, tmp_elem=%p\n", tmp_elem);
1907 else
1908 printf("\n");
1909 }
1910 #endif /* ZONE_DEBUG */
1911
1912 void
1913 db_zone_print_free(
1914 zone_t zone)
1915 {
1916 int count = 0;
1917 int freecount;
1918 vm_offset_t elem;
1919
1920 freecount = zone_free_count(zone);
1921 printf("zone %p, free elements %d\n", zone, freecount);
1922 printf("free list:\n");
1923 elem = zone->free_elements;
1924 while (count < freecount) {
1925 printf(" 0x%x", elem);
1926 count++;
1927 if ((count % 6) == 0)
1928 printf("\n");
1929 if (elem == 0) {
1930 printf("\nunexpected zero element, count=%d\n", count);
1931 break;
1932 }
1933 elem = *((vm_offset_t *)elem);
1934 }
1935 if (elem != 0)
1936 printf("\nnot at end of free list, elem=0x%x\n", elem);
1937 else
1938 printf("\n");
1939 }
1940
1941 #endif /* MACH_KDB */
1942
1943
1944 #if ZONE_DEBUG
1945
1946 /* should we care about locks here ? */
1947
1948 #if MACH_KDB
1949 void *
1950 next_element(
1951 zone_t z,
1952 void *prev)
1953 {
1954 char *elt = (char *)prev;
1955
1956 if (!zone_debug_enabled(z))
1957 return(NULL);
1958 elt -= ZONE_DEBUG_OFFSET;
1959 elt = (char *) queue_next((queue_t) elt);
1960 if ((queue_t) elt == &z->active_zones)
1961 return(NULL);
1962 elt += ZONE_DEBUG_OFFSET;
1963 return(elt);
1964 }
1965
1966 void *
1967 first_element(
1968 zone_t z)
1969 {
1970 char *elt;
1971
1972 if (!zone_debug_enabled(z))
1973 return(NULL);
1974 if (queue_empty(&z->active_zones))
1975 return(NULL);
1976 elt = (char *)queue_first(&z->active_zones);
1977 elt += ZONE_DEBUG_OFFSET;
1978 return(elt);
1979 }
1980
1981 /*
1982 * Second arg controls how many zone elements are printed:
1983 * 0 => none
1984 * n, n < 0 => all
1985 * n, n > 0 => last n on active list
1986 */
1987 int
1988 zone_count(
1989 zone_t z,
1990 int tail)
1991 {
1992 void *elt;
1993 int count = 0;
1994 boolean_t print = (tail != 0);
1995
1996 if (tail < 0)
1997 tail = z->count;
1998 if (z->count < tail)
1999 tail = 0;
2000 tail = z->count - tail;
2001 for (elt = first_element(z); elt; elt = next_element(z, elt)) {
2002 if (print && tail <= count)
2003 db_printf("%8x\n", elt);
2004 count++;
2005 }
2006 assert(count == z->count);
2007 return(count);
2008 }
2009 #endif /* MACH_KDB */
2010
2011 #define zone_in_use(z) ( z->count || z->free_elements )
2012
2013 void
2014 zone_debug_enable(
2015 zone_t z)
2016 {
2017 if (zone_debug_enabled(z) || zone_in_use(z) ||
2018 z->alloc_size < (z->elem_size + ZONE_DEBUG_OFFSET))
2019 return;
2020 queue_init(&z->active_zones);
2021 z->elem_size += ZONE_DEBUG_OFFSET;
2022 }
2023
2024 void
2025 zone_debug_disable(
2026 zone_t z)
2027 {
2028 if (!zone_debug_enabled(z) || zone_in_use(z))
2029 return;
2030 z->elem_size -= ZONE_DEBUG_OFFSET;
2031 z->active_zones.next = z->active_zones.prev = NULL;
2032 }
2033 #endif /* ZONE_DEBUG */