]> git.saurik.com Git - apple/xnu.git/blame - osfmk/kern/zalloc.c
xnu-1228.5.18.tar.gz
[apple/xnu.git] / osfmk / kern / zalloc.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: kern/zalloc.c
60 * Author: Avadis Tevanian, Jr.
61 *
62 * Zone-based memory allocator. A zone is a collection of fixed size
63 * data blocks for which quick allocation/deallocation is possible.
64 */
65#include <zone_debug.h>
2d21ac55 66#include <zone_alias_addr.h>
1c79356b
A
67#include <norma_vm.h>
68#include <mach_kdb.h>
91447636
A
69
70#include <mach/mach_types.h>
71#include <mach/vm_param.h>
72#include <mach/kern_return.h>
73#include <mach/mach_host_server.h>
74#include <mach/machine/vm_types.h>
75#include <mach_debug/zone_info.h>
76
77#include <kern/kern_types.h>
1c79356b 78#include <kern/assert.h>
91447636 79#include <kern/host.h>
1c79356b
A
80#include <kern/macro_help.h>
81#include <kern/sched.h>
82#include <kern/lock.h>
83#include <kern/sched_prim.h>
84#include <kern/misc_protos.h>
0b4e3aa0 85#include <kern/thread_call.h>
1c79356b 86#include <kern/zalloc.h>
91447636
A
87#include <kern/kalloc.h>
88
89#include <vm/pmap.h>
90#include <vm/vm_map.h>
1c79356b 91#include <vm/vm_kern.h>
91447636
A
92#include <vm/vm_page.h>
93
1c79356b
A
94#include <machine/machparam.h>
95
2d21ac55
A
96#include <libkern/OSDebug.h>
97#include <sys/kdebug.h>
98
91447636
A
99#if defined(__ppc__)
100/* for fake zone stat routines */
101#include <ppc/savearea.h>
102#include <ppc/mappings.h>
103#endif
1c79356b 104
2d21ac55
A
105int check_freed_element = 0;
106
1c79356b
A
107#if MACH_ASSERT
108/* Detect use of zone elt after freeing it by two methods:
109 * (1) Range-check the free-list "next" ptr for sanity.
110 * (2) Store the ptr in two different words, and compare them against
111 * each other when re-using the zone elt, to detect modifications;
112 */
113
114#if defined(__alpha)
115
116#define is_kernel_data_addr(a) \
91447636 117 (!(a) || (IS_SYS_VA(a) && !((a) & (sizeof(long)-1))))
1c79356b
A
118
119#else /* !defined(__alpha) */
120
121#define is_kernel_data_addr(a) \
0c530ab8 122 (!(a) || ((a) >= vm_min_kernel_address && !((a) & 0x3)))
1c79356b
A
123
124#endif /* defined(__alpha) */
125
126/* Should we set all words of the zone element to an illegal address
127 * when it is freed, to help catch usage after freeing? The down-side
128 * is that this obscures the identity of the freed element.
129 */
130boolean_t zfree_clear = FALSE;
131
132#define ADD_TO_ZONE(zone, element) \
133MACRO_BEGIN \
134 if (zfree_clear) \
2d21ac55 135 { unsigned int i; \
1c79356b
A
136 for (i=1; \
137 i < zone->elem_size/sizeof(vm_offset_t) - 1; \
138 i++) \
139 ((vm_offset_t *)(element))[i] = 0xdeadbeef; \
140 } \
141 ((vm_offset_t *)(element))[0] = (zone)->free_elements; \
142 (zone)->free_elements = (vm_offset_t) (element); \
143 (zone)->count--; \
144MACRO_END
145
146#define REMOVE_FROM_ZONE(zone, ret, type) \
147MACRO_BEGIN \
148 (ret) = (type) (zone)->free_elements; \
149 if ((ret) != (type) 0) { \
150 if (!is_kernel_data_addr(((vm_offset_t *)(ret))[0])) { \
151 panic("A freed zone element has been modified.\n"); \
152 } \
153 (zone)->count++; \
154 (zone)->free_elements = *((vm_offset_t *)(ret)); \
155 } \
156MACRO_END
157#else /* MACH_ASSERT */
158
159#define ADD_TO_ZONE(zone, element) \
160MACRO_BEGIN \
161 *((vm_offset_t *)(element)) = (zone)->free_elements; \
2d21ac55
A
162 if (check_freed_element) { \
163 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
164 ((vm_offset_t *)(element))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
165 (zone)->free_elements; \
166 } \
1c79356b
A
167 (zone)->free_elements = (vm_offset_t) (element); \
168 (zone)->count--; \
169MACRO_END
170
171#define REMOVE_FROM_ZONE(zone, ret, type) \
172MACRO_BEGIN \
173 (ret) = (type) (zone)->free_elements; \
174 if ((ret) != (type) 0) { \
2d21ac55
A
175 if (check_freed_element) { \
176 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t)) && \
177 ((vm_offset_t *)(ret))[((zone)->elem_size/sizeof(vm_offset_t))-1] != \
178 ((vm_offset_t *)(ret))[0]) \
179 panic("a freed zone element has been modified");\
180 } \
1c79356b
A
181 (zone)->count++; \
182 (zone)->free_elements = *((vm_offset_t *)(ret)); \
183 } \
184MACRO_END
185
186#endif /* MACH_ASSERT */
187
188#if ZONE_DEBUG
189#define zone_debug_enabled(z) z->active_zones.next
55e303ae
A
190#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
191#define ZONE_DEBUG_OFFSET ROUNDUP(sizeof(queue_chain_t),16)
1c79356b
A
192#endif /* ZONE_DEBUG */
193
194/*
195 * Support for garbage collection of unused zone pages:
196 */
197
198struct zone_page_table_entry {
55e303ae 199 struct zone_page_table_entry *link;
1c79356b 200 short alloc_count;
55e303ae 201 short collect_count;
1c79356b
A
202};
203
1c79356b
A
204/* Forwards */
205void zone_page_init(
206 vm_offset_t addr,
207 vm_size_t size,
208 int value);
209
210void zone_page_alloc(
211 vm_offset_t addr,
212 vm_size_t size);
213
55e303ae
A
214void zone_page_free_element(
215 struct zone_page_table_entry **free_pages,
1c79356b
A
216 vm_offset_t addr,
217 vm_size_t size);
218
55e303ae 219void zone_page_collect(
1c79356b
A
220 vm_offset_t addr,
221 vm_size_t size);
222
223boolean_t zone_page_collectable(
224 vm_offset_t addr,
225 vm_size_t size);
226
227void zone_page_keep(
228 vm_offset_t addr,
229 vm_size_t size);
230
0b4e3aa0
A
231void zalloc_async(
232 thread_call_param_t p0,
233 thread_call_param_t p1);
234
235
1c79356b
A
236#if ZONE_DEBUG && MACH_KDB
237int zone_count(
238 zone_t z,
239 int tail);
240#endif /* ZONE_DEBUG && MACH_KDB */
241
242vm_map_t zone_map = VM_MAP_NULL;
243
244zone_t zone_zone = ZONE_NULL; /* the zone containing other zones */
245
246/*
247 * The VM system gives us an initial chunk of memory.
248 * It has to be big enough to allocate the zone_zone
249 */
250
251vm_offset_t zdata;
252vm_size_t zdata_size;
253
254#define lock_zone(zone) \
255MACRO_BEGIN \
2d21ac55 256 lck_mtx_lock(&(zone)->lock); \
1c79356b
A
257MACRO_END
258
259#define unlock_zone(zone) \
260MACRO_BEGIN \
2d21ac55 261 lck_mtx_unlock(&(zone)->lock); \
1c79356b
A
262MACRO_END
263
9bccf70c
A
264#define zone_wakeup(zone) thread_wakeup((event_t)(zone))
265#define zone_sleep(zone) \
2d21ac55
A
266 (void) lck_mtx_sleep(&(zone)->lock, 0, (event_t)(zone), THREAD_UNINT);
267
268extern int snprintf(char *, size_t, const char *, ...) __printflike(3,4);
9bccf70c 269
1c79356b
A
270#define lock_zone_init(zone) \
271MACRO_BEGIN \
2d21ac55
A
272 char _name[32]; \
273 (void) snprintf(_name, sizeof (_name), "zone.%s", (zone)->zone_name); \
274 lck_grp_attr_setdefault(&(zone)->lock_grp_attr); \
275 lck_grp_init(&(zone)->lock_grp, _name, &(zone)->lock_grp_attr); \
276 lck_attr_setdefault(&(zone)->lock_attr); \
277 lck_mtx_init_ext(&(zone)->lock, &(zone)->lock_ext, \
278 &(zone)->lock_grp, &(zone)->lock_attr); \
1c79356b
A
279MACRO_END
280
2d21ac55 281#define lock_try_zone(zone) lck_mtx_try_lock(&zone->lock)
1c79356b
A
282
283kern_return_t zget_space(
284 vm_offset_t size,
285 vm_offset_t *result);
286
287decl_simple_lock_data(,zget_space_lock)
288vm_offset_t zalloc_next_space;
289vm_offset_t zalloc_end_of_space;
290vm_size_t zalloc_wasted_space;
291
292/*
293 * Garbage collection map information
294 */
1c79356b
A
295struct zone_page_table_entry * zone_page_table;
296vm_offset_t zone_map_min_address;
297vm_offset_t zone_map_max_address;
91447636 298unsigned int zone_pages;
1c79356b
A
299
300/*
301 * Exclude more than one concurrent garbage collection
302 */
303decl_mutex_data(, zone_gc_lock)
304
2d21ac55 305#if !ZONE_ALIAS_ADDR
55e303ae 306#define from_zone_map(addr, size) \
1c79356b 307 ((vm_offset_t)(addr) >= zone_map_min_address && \
55e303ae 308 ((vm_offset_t)(addr) + size -1) < zone_map_max_address)
2d21ac55
A
309#else
310#define from_zone_map(addr, size) \
311 ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) >= zone_map_min_address && \
312 ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) + size -1) < zone_map_max_address)
313#endif
1c79356b
A
314
315#define ZONE_PAGE_USED 0
316#define ZONE_PAGE_UNUSED -1
317
318
319/*
320 * Protects first_zone, last_zone, num_zones,
321 * and the next_zone field of zones.
322 */
323decl_simple_lock_data(, all_zones_lock)
324zone_t first_zone;
325zone_t *last_zone;
91447636 326unsigned int num_zones;
1c79356b 327
0b4e3aa0
A
328boolean_t zone_gc_allowed = TRUE;
329boolean_t zone_gc_forced = FALSE;
330unsigned zone_gc_last_tick = 0;
331unsigned zone_gc_max_rate = 0; /* in ticks */
332
333
1c79356b
A
334/*
335 * zinit initializes a new zone. The zone data structures themselves
336 * are stored in a zone, which is initially a static structure that
337 * is initialized by zone_init.
338 */
339zone_t
340zinit(
341 vm_size_t size, /* the size of an element */
342 vm_size_t max, /* maximum memory to use */
343 vm_size_t alloc, /* allocation size */
91447636 344 const char *name) /* a name for the zone */
1c79356b
A
345{
346 zone_t z;
347
348 if (zone_zone == ZONE_NULL) {
349 if (zget_space(sizeof(struct zone), (vm_offset_t *)&z)
350 != KERN_SUCCESS)
351 return(ZONE_NULL);
352 } else
353 z = (zone_t) zalloc(zone_zone);
354 if (z == ZONE_NULL)
355 return(ZONE_NULL);
356
357 /*
358 * Round off all the parameters appropriately.
359 */
360 if (size < sizeof(z->free_elements))
361 size = sizeof(z->free_elements);
362 size = ((size-1) + sizeof(z->free_elements)) -
363 ((size-1) % sizeof(z->free_elements));
364 if (alloc == 0)
365 alloc = PAGE_SIZE;
91447636
A
366 alloc = round_page(alloc);
367 max = round_page(max);
1c79356b 368 /*
91447636
A
369 * we look for an allocation size with less than 1% waste
370 * up to 5 pages in size...
371 * otherwise, we look for an allocation size with least fragmentation
372 * in the range of 1 - 5 pages
373 * This size will be used unless
1c79356b
A
374 * the user suggestion is larger AND has less fragmentation
375 */
2d21ac55
A
376#if ZONE_ALIAS_ADDR
377 if ((size < PAGE_SIZE) && (PAGE_SIZE % size <= PAGE_SIZE / 10))
378 alloc = PAGE_SIZE;
379 else
380#endif
1c79356b
A
381 { vm_size_t best, waste; unsigned int i;
382 best = PAGE_SIZE;
383 waste = best % size;
91447636
A
384
385 for (i = 1; i <= 5; i++) {
386 vm_size_t tsize, twaste;
387
388 tsize = i * PAGE_SIZE;
389
390 if ((tsize % size) < (tsize / 100)) {
391 alloc = tsize;
392 goto use_this_allocation;
393 }
1c79356b
A
394 twaste = tsize % size;
395 if (twaste < waste)
396 best = tsize, waste = twaste;
397 }
398 if (alloc <= best || (alloc % size >= waste))
399 alloc = best;
400 }
91447636 401use_this_allocation:
1c79356b
A
402 if (max && (max < alloc))
403 max = alloc;
404
405 z->free_elements = 0;
406 z->cur_size = 0;
407 z->max_size = max;
408 z->elem_size = size;
409 z->alloc_size = alloc;
410 z->zone_name = name;
411 z->count = 0;
412 z->doing_alloc = FALSE;
a3d08fcd 413 z->doing_gc = FALSE;
1c79356b
A
414 z->exhaustible = FALSE;
415 z->collectable = TRUE;
416 z->allows_foreign = FALSE;
417 z->expandable = TRUE;
418 z->waiting = FALSE;
0b4e3aa0 419 z->async_pending = FALSE;
1c79356b
A
420
421#if ZONE_DEBUG
2d21ac55 422 z->active_zones.next = z->active_zones.prev = NULL;
1c79356b
A
423 zone_debug_enable(z);
424#endif /* ZONE_DEBUG */
425 lock_zone_init(z);
426
427 /*
428 * Add the zone to the all-zones list.
429 */
430
431 z->next_zone = ZONE_NULL;
0b4e3aa0 432 thread_call_setup(&z->call_async_alloc, zalloc_async, z);
1c79356b
A
433 simple_lock(&all_zones_lock);
434 *last_zone = z;
435 last_zone = &z->next_zone;
436 num_zones++;
437 simple_unlock(&all_zones_lock);
438
439 return(z);
440}
441
442/*
443 * Cram the given memory into the specified zone.
444 */
445void
446zcram(
447 register zone_t zone,
91447636 448 void *newaddr,
1c79356b
A
449 vm_size_t size)
450{
451 register vm_size_t elem_size;
91447636 452 vm_offset_t newmem = (vm_offset_t) newaddr;
1c79356b
A
453
454 /* Basic sanity checks */
455 assert(zone != ZONE_NULL && newmem != (vm_offset_t)0);
456 assert(!zone->collectable || zone->allows_foreign
55e303ae 457 || (from_zone_map(newmem, size)));
1c79356b
A
458
459 elem_size = zone->elem_size;
460
461 lock_zone(zone);
462 while (size >= elem_size) {
463 ADD_TO_ZONE(zone, newmem);
55e303ae 464 if (from_zone_map(newmem, elem_size))
1c79356b
A
465 zone_page_alloc(newmem, elem_size);
466 zone->count++; /* compensate for ADD_TO_ZONE */
467 size -= elem_size;
468 newmem += elem_size;
469 zone->cur_size += elem_size;
470 }
471 unlock_zone(zone);
472}
473
474/*
475 * Contiguous space allocator for non-paged zones. Allocates "size" amount
476 * of memory from zone_map.
477 */
478
479kern_return_t
480zget_space(
481 vm_offset_t size,
482 vm_offset_t *result)
483{
484 vm_offset_t new_space = 0;
91447636 485 vm_size_t space_to_add = 0;
1c79356b
A
486
487 simple_lock(&zget_space_lock);
488 while ((zalloc_next_space + size) > zalloc_end_of_space) {
489 /*
490 * Add at least one page to allocation area.
491 */
492
91447636 493 space_to_add = round_page(size);
1c79356b
A
494
495 if (new_space == 0) {
496 kern_return_t retval;
497 /*
498 * Memory cannot be wired down while holding
499 * any locks that the pageout daemon might
500 * need to free up pages. [Making the zget_space
501 * lock a complex lock does not help in this
502 * regard.]
503 *
504 * Unlock and allocate memory. Because several
505 * threads might try to do this at once, don't
506 * use the memory before checking for available
507 * space again.
508 */
509
510 simple_unlock(&zget_space_lock);
511
512 retval = kernel_memory_allocate(zone_map, &new_space,
513 space_to_add, 0, KMA_KOBJECT|KMA_NOPAGEWAIT);
514 if (retval != KERN_SUCCESS)
515 return(retval);
2d21ac55
A
516#if ZONE_ALIAS_ADDR
517 if (space_to_add == PAGE_SIZE)
518 new_space = zone_alias_addr(new_space);
519#endif
1c79356b
A
520 zone_page_init(new_space, space_to_add,
521 ZONE_PAGE_USED);
522 simple_lock(&zget_space_lock);
523 continue;
524 }
525
526
527 /*
528 * Memory was allocated in a previous iteration.
529 *
530 * Check whether the new region is contiguous
531 * with the old one.
532 */
533
534 if (new_space != zalloc_end_of_space) {
535 /*
536 * Throw away the remainder of the
537 * old space, and start a new one.
538 */
539 zalloc_wasted_space +=
540 zalloc_end_of_space - zalloc_next_space;
541 zalloc_next_space = new_space;
542 }
543
544 zalloc_end_of_space = new_space + space_to_add;
545
546 new_space = 0;
547 }
548 *result = zalloc_next_space;
549 zalloc_next_space += size;
550 simple_unlock(&zget_space_lock);
551
552 if (new_space != 0)
553 kmem_free(zone_map, new_space, space_to_add);
554
555 return(KERN_SUCCESS);
556}
557
558
559/*
560 * Steal memory for the zone package. Called from
561 * vm_page_bootstrap().
562 */
563void
564zone_steal_memory(void)
565{
91447636
A
566 zdata_size = round_page(128*sizeof(struct zone));
567 zdata = (vm_offset_t)((char *)pmap_steal_memory(zdata_size) - (char *)0);
1c79356b
A
568}
569
570
571/*
572 * Fill a zone with enough memory to contain at least nelem elements.
573 * Memory is obtained with kmem_alloc_wired from the kernel_map.
574 * Return the number of elements actually put into the zone, which may
575 * be more than the caller asked for since the memory allocation is
576 * rounded up to a full page.
577 */
578int
579zfill(
580 zone_t zone,
581 int nelem)
582{
583 kern_return_t kr;
584 vm_size_t size;
585 vm_offset_t memory;
586 int nalloc;
587
588 assert(nelem > 0);
589 if (nelem <= 0)
590 return 0;
591 size = nelem * zone->elem_size;
91447636 592 size = round_page(size);
1c79356b
A
593 kr = kmem_alloc_wired(kernel_map, &memory, size);
594 if (kr != KERN_SUCCESS)
595 return 0;
596
597 zone_change(zone, Z_FOREIGN, TRUE);
91447636 598 zcram(zone, (void *)memory, size);
1c79356b
A
599 nalloc = size / zone->elem_size;
600 assert(nalloc >= nelem);
601
602 return nalloc;
603}
604
605/*
606 * Initialize the "zone of zones" which uses fixed memory allocated
607 * earlier in memory initialization. zone_bootstrap is called
608 * before zone_init.
609 */
610void
611zone_bootstrap(void)
612{
613 vm_size_t zone_zone_size;
614 vm_offset_t zone_zone_space;
2d21ac55
A
615 char temp_buf[16];
616
617 /* see if we want freed zone element checking */
618 if (PE_parse_boot_arg("-zc", temp_buf)) {
619 check_freed_element = 1;
620 }
1c79356b 621
91447636 622 simple_lock_init(&all_zones_lock, 0);
1c79356b
A
623
624 first_zone = ZONE_NULL;
625 last_zone = &first_zone;
626 num_zones = 0;
627
91447636 628 simple_lock_init(&zget_space_lock, 0);
1c79356b
A
629 zalloc_next_space = zdata;
630 zalloc_end_of_space = zdata + zdata_size;
631 zalloc_wasted_space = 0;
632
633 /* assertion: nobody else called zinit before us */
634 assert(zone_zone == ZONE_NULL);
635 zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone),
636 sizeof(struct zone), "zones");
637 zone_change(zone_zone, Z_COLLECT, FALSE);
638 zone_zone_size = zalloc_end_of_space - zalloc_next_space;
639 zget_space(zone_zone_size, &zone_zone_space);
91447636 640 zcram(zone_zone, (void *)zone_zone_space, zone_zone_size);
1c79356b
A
641}
642
643void
644zone_init(
645 vm_size_t max_zonemap_size)
646{
647 kern_return_t retval;
648 vm_offset_t zone_min;
649 vm_offset_t zone_max;
650 vm_size_t zone_table_size;
651
652 retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
91447636
A
653 FALSE, VM_FLAGS_ANYWHERE, &zone_map);
654
1c79356b
A
655 if (retval != KERN_SUCCESS)
656 panic("zone_init: kmem_suballoc failed");
91447636 657 zone_max = zone_min + round_page(max_zonemap_size);
1c79356b
A
658 /*
659 * Setup garbage collection information:
660 */
55e303ae 661 zone_table_size = atop_32(zone_max - zone_min) *
1c79356b
A
662 sizeof(struct zone_page_table_entry);
663 if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table,
664 zone_table_size) != KERN_SUCCESS)
665 panic("zone_init");
91447636 666 zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size);
55e303ae 667 zone_pages = atop_32(zone_max - zone_min);
1c79356b
A
668 zone_map_min_address = zone_min;
669 zone_map_max_address = zone_max;
91447636 670 mutex_init(&zone_gc_lock, 0);
1c79356b
A
671 zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED);
672}
673
674
675/*
676 * zalloc returns an element from the specified zone.
677 */
91447636 678void *
1c79356b
A
679zalloc_canblock(
680 register zone_t zone,
681 boolean_t canblock)
682{
683 vm_offset_t addr;
684 kern_return_t retval;
685
686 assert(zone != ZONE_NULL);
1c79356b
A
687
688 lock_zone(zone);
689
690 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
0b4e3aa0 691
a3d08fcd
A
692 while ((addr == 0) && canblock && (zone->doing_gc)) {
693 zone->waiting = TRUE;
694 zone_sleep(zone);
695 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
696 }
697
0b4e3aa0 698 while ((addr == 0) && canblock) {
1c79356b
A
699 /*
700 * If nothing was there, try to get more
701 */
702 if (zone->doing_alloc) {
1c79356b
A
703 /*
704 * Someone is allocating memory for this zone.
705 * Wait for it to show up, then try again.
706 */
1c79356b 707 zone->waiting = TRUE;
9bccf70c 708 zone_sleep(zone);
1c79356b
A
709 }
710 else {
711 if ((zone->cur_size + zone->elem_size) >
712 zone->max_size) {
713 if (zone->exhaustible)
714 break;
715 if (zone->expandable) {
716 /*
717 * We're willing to overflow certain
718 * zones, but not without complaining.
719 *
720 * This is best used in conjunction
721 * with the collectable flag. What we
722 * want is an assurance we can get the
723 * memory back, assuming there's no
724 * leak.
725 */
726 zone->max_size += (zone->max_size >> 1);
727 } else {
728 unlock_zone(zone);
729
1c79356b
A
730 panic("zalloc: zone \"%s\" empty.", zone->zone_name);
731 }
732 }
733 zone->doing_alloc = TRUE;
734 unlock_zone(zone);
735
736 if (zone->collectable) {
737 vm_offset_t space;
738 vm_size_t alloc_size;
2d21ac55 739 int retry = 0;
55e303ae
A
740
741 for (;;) {
742
2d21ac55 743 if (vm_pool_low() || retry >= 1)
55e303ae 744 alloc_size =
91447636 745 round_page(zone->elem_size);
55e303ae
A
746 else
747 alloc_size = zone->alloc_size;
748
749 retval = kernel_memory_allocate(zone_map,
750 &space, alloc_size, 0,
751 KMA_KOBJECT|KMA_NOPAGEWAIT);
752 if (retval == KERN_SUCCESS) {
2d21ac55
A
753#if ZONE_ALIAS_ADDR
754 if (alloc_size == PAGE_SIZE)
755 space = zone_alias_addr(space);
756#endif
55e303ae
A
757 zone_page_init(space, alloc_size,
758 ZONE_PAGE_USED);
91447636 759 zcram(zone, (void *)space, alloc_size);
55e303ae
A
760
761 break;
762 } else if (retval != KERN_RESOURCE_SHORTAGE) {
2d21ac55
A
763 retry++;
764
765 if (retry == 2) {
766 zone_gc();
767 printf("zalloc did gc\n");
768 }
769 if (retry == 3)
91447636 770 panic("zalloc: \"%s\" (%d elements) retry fail %d", zone->zone_name, zone->count, retval);
e5568f75
A
771 } else {
772 break;
55e303ae 773 }
1c79356b
A
774 }
775 lock_zone(zone);
776 zone->doing_alloc = FALSE;
777 if (zone->waiting) {
778 zone->waiting = FALSE;
9bccf70c 779 zone_wakeup(zone);
1c79356b
A
780 }
781 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
782 if (addr == 0 &&
783 retval == KERN_RESOURCE_SHORTAGE) {
784 unlock_zone(zone);
785
1c79356b
A
786 VM_PAGE_WAIT();
787 lock_zone(zone);
788 }
789 } else {
790 vm_offset_t space;
791 retval = zget_space(zone->elem_size, &space);
792
793 lock_zone(zone);
794 zone->doing_alloc = FALSE;
795 if (zone->waiting) {
796 zone->waiting = FALSE;
797 thread_wakeup((event_t)zone);
798 }
799 if (retval == KERN_SUCCESS) {
800 zone->count++;
801 zone->cur_size += zone->elem_size;
802#if ZONE_DEBUG
803 if (zone_debug_enabled(zone)) {
804 enqueue_tail(&zone->active_zones, (queue_entry_t)space);
805 }
806#endif
807 unlock_zone(zone);
808 zone_page_alloc(space, zone->elem_size);
809#if ZONE_DEBUG
810 if (zone_debug_enabled(zone))
55e303ae 811 space += ZONE_DEBUG_OFFSET;
1c79356b 812#endif
2d21ac55
A
813 addr = space;
814 goto success;
1c79356b
A
815 }
816 if (retval == KERN_RESOURCE_SHORTAGE) {
817 unlock_zone(zone);
818
1c79356b
A
819 VM_PAGE_WAIT();
820 lock_zone(zone);
821 } else {
91447636 822 panic("zalloc: \"%s\" (%d elements) zget_space returned %d", zone->zone_name, zone->count, retval);
1c79356b
A
823 }
824 }
825 }
826 if (addr == 0)
827 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
828 }
829
2d21ac55 830 if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) {
0b4e3aa0
A
831 zone->async_pending = TRUE;
832 unlock_zone(zone);
833 thread_call_enter(&zone->call_async_alloc);
834 lock_zone(zone);
835 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
836 }
837
1c79356b
A
838#if ZONE_DEBUG
839 if (addr && zone_debug_enabled(zone)) {
840 enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
55e303ae 841 addr += ZONE_DEBUG_OFFSET;
1c79356b
A
842 }
843#endif
844
845 unlock_zone(zone);
0b4e3aa0 846
2d21ac55
A
847success:
848 TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr);
849
91447636 850 return((void *)addr);
1c79356b
A
851}
852
853
91447636 854void *
1c79356b
A
855zalloc(
856 register zone_t zone)
857{
858 return( zalloc_canblock(zone, TRUE) );
859}
860
91447636 861void *
1c79356b
A
862zalloc_noblock(
863 register zone_t zone)
864{
865 return( zalloc_canblock(zone, FALSE) );
866}
867
0b4e3aa0
A
868void
869zalloc_async(
91447636
A
870 thread_call_param_t p0,
871 __unused thread_call_param_t p1)
0b4e3aa0 872{
91447636 873 void *elt;
0b4e3aa0
A
874
875 elt = zalloc_canblock((zone_t)p0, TRUE);
876 zfree((zone_t)p0, elt);
877 lock_zone(((zone_t)p0));
878 ((zone_t)p0)->async_pending = FALSE;
879 unlock_zone(((zone_t)p0));
880}
881
1c79356b
A
882
883/*
884 * zget returns an element from the specified zone
885 * and immediately returns nothing if there is nothing there.
886 *
887 * This form should be used when you can not block (like when
888 * processing an interrupt).
889 */
91447636 890void *
1c79356b
A
891zget(
892 register zone_t zone)
893{
894 register vm_offset_t addr;
895
896 assert( zone != ZONE_NULL );
897
898 if (!lock_try_zone(zone))
91447636 899 return NULL;
1c79356b
A
900
901 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
902#if ZONE_DEBUG
903 if (addr && zone_debug_enabled(zone)) {
904 enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
55e303ae 905 addr += ZONE_DEBUG_OFFSET;
1c79356b
A
906 }
907#endif /* ZONE_DEBUG */
908 unlock_zone(zone);
909
91447636 910 return((void *) addr);
1c79356b
A
911}
912
913/* Keep this FALSE by default. Large memory machine run orders of magnitude
914 slower in debug mode when true. Use debugger to enable if needed */
55e303ae
A
915/* static */ boolean_t zone_check = FALSE;
916
917static zone_t zone_last_bogus_zone = ZONE_NULL;
918static vm_offset_t zone_last_bogus_elem = 0;
1c79356b
A
919
920void
921zfree(
922 register zone_t zone,
91447636 923 void *addr)
1c79356b 924{
91447636 925 vm_offset_t elem = (vm_offset_t) addr;
1c79356b
A
926
927#if MACH_ASSERT
928 /* Basic sanity checks */
929 if (zone == ZONE_NULL || elem == (vm_offset_t)0)
930 panic("zfree: NULL");
931 /* zone_gc assumes zones are never freed */
932 if (zone == zone_zone)
933 panic("zfree: freeing to zone_zone breaks zone_gc!");
55e303ae
A
934#endif
935
2d21ac55
A
936 TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, zone->elem_size, (int)addr);
937
1c79356b 938 if (zone->collectable && !zone->allows_foreign &&
55e303ae
A
939 !from_zone_map(elem, zone->elem_size)) {
940#if MACH_ASSERT
1c79356b 941 panic("zfree: non-allocated memory in collectable zone!");
91447636 942#endif
55e303ae
A
943 zone_last_bogus_zone = zone;
944 zone_last_bogus_elem = elem;
945 return;
55e303ae 946 }
1c79356b
A
947
948 lock_zone(zone);
949#if ZONE_DEBUG
950 if (zone_debug_enabled(zone)) {
951 queue_t tmp_elem;
952
55e303ae 953 elem -= ZONE_DEBUG_OFFSET;
1c79356b
A
954 if (zone_check) {
955 /* check the zone's consistency */
956
957 for (tmp_elem = queue_first(&zone->active_zones);
958 !queue_end(tmp_elem, &zone->active_zones);
959 tmp_elem = queue_next(tmp_elem))
960 if (elem == (vm_offset_t)tmp_elem)
961 break;
962 if (elem != (vm_offset_t)tmp_elem)
963 panic("zfree()ing element from wrong zone");
964 }
965 remqueue(&zone->active_zones, (queue_t) elem);
966 }
967#endif /* ZONE_DEBUG */
968 if (zone_check) {
969 vm_offset_t this;
970
971 /* check the zone's consistency */
972
973 for (this = zone->free_elements;
974 this != 0;
975 this = * (vm_offset_t *) this)
976 if (!pmap_kernel_va(this) || this == elem)
977 panic("zfree");
978 }
0b4e3aa0
A
979 ADD_TO_ZONE(zone, elem);
980
1c79356b
A
981 /*
982 * If elements have one or more pages, and memory is low,
0b4e3aa0
A
983 * request to run the garbage collection in the zone the next
984 * time the pageout thread runs.
1c79356b
A
985 */
986 if (zone->elem_size >= PAGE_SIZE &&
987 vm_pool_low()){
0b4e3aa0 988 zone_gc_forced = TRUE;
1c79356b 989 }
1c79356b
A
990 unlock_zone(zone);
991}
992
993
994/* Change a zone's flags.
995 * This routine must be called immediately after zinit.
996 */
997void
998zone_change(
999 zone_t zone,
1000 unsigned int item,
1001 boolean_t value)
1002{
1003 assert( zone != ZONE_NULL );
1004 assert( value == TRUE || value == FALSE );
1005
1006 switch(item){
1007 case Z_EXHAUST:
1008 zone->exhaustible = value;
1009 break;
1010 case Z_COLLECT:
1011 zone->collectable = value;
1012 break;
1013 case Z_EXPAND:
1014 zone->expandable = value;
1015 break;
1016 case Z_FOREIGN:
1017 zone->allows_foreign = value;
1018 break;
1019#if MACH_ASSERT
1020 default:
1021 panic("Zone_change: Wrong Item Type!");
1022 /* break; */
1023#endif
1024 }
1c79356b
A
1025}
1026
1027/*
1028 * Return the expected number of free elements in the zone.
1029 * This calculation will be incorrect if items are zfree'd that
1030 * were never zalloc'd/zget'd. The correct way to stuff memory
1031 * into a zone is by zcram.
1032 */
1033
1034integer_t
1035zone_free_count(zone_t zone)
1036{
1037 integer_t free_count;
1038
1039 lock_zone(zone);
1040 free_count = zone->cur_size/zone->elem_size - zone->count;
1041 unlock_zone(zone);
1042
1043 assert(free_count >= 0);
1044
1045 return(free_count);
1046}
1047
1048/*
1049 * zprealloc preallocates wired memory, exanding the specified
1050 * zone to the specified size
1051 */
1052void
1053zprealloc(
1054 zone_t zone,
1055 vm_size_t size)
1056{
1057 vm_offset_t addr;
1058
1059 if (size != 0) {
1060 if (kmem_alloc_wired(zone_map, &addr, size) != KERN_SUCCESS)
1061 panic("zprealloc");
1062 zone_page_init(addr, size, ZONE_PAGE_USED);
91447636 1063 zcram(zone, (void *)addr, size);
1c79356b
A
1064 }
1065}
1066
1067/*
1068 * Zone garbage collection subroutines
1c79356b 1069 */
55e303ae 1070
1c79356b
A
1071boolean_t
1072zone_page_collectable(
1073 vm_offset_t addr,
1074 vm_size_t size)
1075{
55e303ae 1076 struct zone_page_table_entry *zp;
1c79356b
A
1077 natural_t i, j;
1078
2d21ac55
A
1079#if ZONE_ALIAS_ADDR
1080 addr = zone_virtual_addr(addr);
1081#endif
1c79356b 1082#if MACH_ASSERT
55e303ae 1083 if (!from_zone_map(addr, size))
1c79356b
A
1084 panic("zone_page_collectable");
1085#endif
1086
55e303ae
A
1087 i = atop_32(addr-zone_map_min_address);
1088 j = atop_32((addr+size-1) - zone_map_min_address);
1089
1090 for (zp = zone_page_table + i; i <= j; zp++, i++)
1091 if (zp->collect_count == zp->alloc_count)
1c79356b 1092 return (TRUE);
55e303ae 1093
1c79356b
A
1094 return (FALSE);
1095}
1096
1097void
1098zone_page_keep(
1099 vm_offset_t addr,
1100 vm_size_t size)
1101{
55e303ae 1102 struct zone_page_table_entry *zp;
1c79356b
A
1103 natural_t i, j;
1104
2d21ac55
A
1105#if ZONE_ALIAS_ADDR
1106 addr = zone_virtual_addr(addr);
1107#endif
1c79356b 1108#if MACH_ASSERT
55e303ae 1109 if (!from_zone_map(addr, size))
1c79356b
A
1110 panic("zone_page_keep");
1111#endif
1112
55e303ae
A
1113 i = atop_32(addr-zone_map_min_address);
1114 j = atop_32((addr+size-1) - zone_map_min_address);
1c79356b 1115
55e303ae
A
1116 for (zp = zone_page_table + i; i <= j; zp++, i++)
1117 zp->collect_count = 0;
1c79356b
A
1118}
1119
1120void
55e303ae 1121zone_page_collect(
1c79356b
A
1122 vm_offset_t addr,
1123 vm_size_t size)
1124{
55e303ae 1125 struct zone_page_table_entry *zp;
1c79356b
A
1126 natural_t i, j;
1127
2d21ac55
A
1128#if ZONE_ALIAS_ADDR
1129 addr = zone_virtual_addr(addr);
1130#endif
1c79356b 1131#if MACH_ASSERT
55e303ae
A
1132 if (!from_zone_map(addr, size))
1133 panic("zone_page_collect");
1c79356b
A
1134#endif
1135
55e303ae
A
1136 i = atop_32(addr-zone_map_min_address);
1137 j = atop_32((addr+size-1) - zone_map_min_address);
1138
1139 for (zp = zone_page_table + i; i <= j; zp++, i++)
1140 ++zp->collect_count;
1c79356b
A
1141}
1142
1143void
1144zone_page_init(
1145 vm_offset_t addr,
1146 vm_size_t size,
1147 int value)
1148{
55e303ae 1149 struct zone_page_table_entry *zp;
1c79356b
A
1150 natural_t i, j;
1151
2d21ac55
A
1152#if ZONE_ALIAS_ADDR
1153 addr = zone_virtual_addr(addr);
1154#endif
1c79356b 1155#if MACH_ASSERT
55e303ae 1156 if (!from_zone_map(addr, size))
1c79356b
A
1157 panic("zone_page_init");
1158#endif
1159
55e303ae
A
1160 i = atop_32(addr-zone_map_min_address);
1161 j = atop_32((addr+size-1) - zone_map_min_address);
1162
1163 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1164 zp->alloc_count = value;
1165 zp->collect_count = 0;
1c79356b 1166 }
1c79356b
A
1167}
1168
1169void
1170zone_page_alloc(
1171 vm_offset_t addr,
1172 vm_size_t size)
1173{
55e303ae 1174 struct zone_page_table_entry *zp;
1c79356b
A
1175 natural_t i, j;
1176
2d21ac55
A
1177#if ZONE_ALIAS_ADDR
1178 addr = zone_virtual_addr(addr);
1179#endif
1c79356b 1180#if MACH_ASSERT
55e303ae 1181 if (!from_zone_map(addr, size))
1c79356b
A
1182 panic("zone_page_alloc");
1183#endif
1184
55e303ae
A
1185 i = atop_32(addr-zone_map_min_address);
1186 j = atop_32((addr+size-1) - zone_map_min_address);
1187
1188 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1189 /*
1190 * Set alloc_count to (ZONE_PAGE_USED + 1) if
1c79356b
A
1191 * it was previously set to ZONE_PAGE_UNUSED.
1192 */
55e303ae
A
1193 if (zp->alloc_count == ZONE_PAGE_UNUSED)
1194 zp->alloc_count = 1;
1195 else
1196 ++zp->alloc_count;
1c79356b 1197 }
1c79356b
A
1198}
1199
1200void
55e303ae
A
1201zone_page_free_element(
1202 struct zone_page_table_entry **free_pages,
1c79356b
A
1203 vm_offset_t addr,
1204 vm_size_t size)
1205{
55e303ae 1206 struct zone_page_table_entry *zp;
1c79356b
A
1207 natural_t i, j;
1208
2d21ac55
A
1209#if ZONE_ALIAS_ADDR
1210 addr = zone_virtual_addr(addr);
1211#endif
1c79356b 1212#if MACH_ASSERT
55e303ae
A
1213 if (!from_zone_map(addr, size))
1214 panic("zone_page_free_element");
1c79356b
A
1215#endif
1216
55e303ae
A
1217 i = atop_32(addr-zone_map_min_address);
1218 j = atop_32((addr+size-1) - zone_map_min_address);
1c79356b 1219
55e303ae
A
1220 for (zp = zone_page_table + i; i <= j; zp++, i++) {
1221 if (zp->collect_count > 0)
1222 --zp->collect_count;
1223 if (--zp->alloc_count == 0) {
1224 zp->alloc_count = ZONE_PAGE_UNUSED;
1225 zp->collect_count = 0;
1c79356b 1226
55e303ae
A
1227 zp->link = *free_pages;
1228 *free_pages = zp;
1c79356b
A
1229 }
1230 }
1c79356b
A
1231}
1232
1233
1234/* This is used for walking through a zone's free element list.
1235 */
55e303ae
A
1236struct zone_free_element {
1237 struct zone_free_element * next;
1c79356b
A
1238};
1239
2d21ac55
A
1240/*
1241 * Add a linked list of pages starting at base back into the zone
1242 * free list. Tail points to the last element on the list.
1243 */
1244
1245#define ADD_LIST_TO_ZONE(zone, base, tail) \
1246MACRO_BEGIN \
1247 (tail)->next = (void *)((zone)->free_elements); \
1248 if (check_freed_element) { \
1249 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
1250 ((vm_offset_t *)(tail))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
1251 (zone)->free_elements; \
1252 } \
1253 (zone)->free_elements = (unsigned long)(base); \
1254MACRO_END
1255
1256/*
1257 * Add an element to the chain pointed to by prev.
1258 */
1259
1260#define ADD_ELEMENT(zone, prev, elem) \
1261MACRO_BEGIN \
1262 (prev)->next = (elem); \
1263 if (check_freed_element) { \
1264 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
1265 ((vm_offset_t *)(prev))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
1266 (vm_offset_t)(elem); \
1267 } \
1268MACRO_END
1269
55e303ae
A
1270struct {
1271 uint32_t pgs_freed;
1272
1273 uint32_t elems_collected,
1274 elems_freed,
1275 elems_kept;
1276} zgc_stats;
1c79356b
A
1277
1278/* Zone garbage collection
1279 *
1280 * zone_gc will walk through all the free elements in all the
1281 * zones that are marked collectable looking for reclaimable
1282 * pages. zone_gc is called by consider_zone_gc when the system
1283 * begins to run out of memory.
1284 */
1285void
1286zone_gc(void)
1287{
1288 unsigned int max_zones;
55e303ae 1289 zone_t z;
1c79356b 1290 unsigned int i;
55e303ae 1291 struct zone_page_table_entry *zp, *zone_free_pages;
1c79356b
A
1292
1293 mutex_lock(&zone_gc_lock);
1294
1c79356b
A
1295 simple_lock(&all_zones_lock);
1296 max_zones = num_zones;
1297 z = first_zone;
1298 simple_unlock(&all_zones_lock);
1299
1300#if MACH_ASSERT
1c79356b 1301 for (i = 0; i < zone_pages; i++)
55e303ae 1302 assert(zone_page_table[i].collect_count == 0);
1c79356b
A
1303#endif /* MACH_ASSERT */
1304
55e303ae 1305 zone_free_pages = NULL;
1c79356b
A
1306
1307 for (i = 0; i < max_zones; i++, z = z->next_zone) {
a3d08fcd 1308 unsigned int n, m;
55e303ae 1309 vm_size_t elt_size, size_freed;
a3d08fcd 1310 struct zone_free_element *elt, *base_elt, *base_prev, *prev, *scan, *keep, *tail;
1c79356b
A
1311
1312 assert(z != ZONE_NULL);
1313
1314 if (!z->collectable)
1315 continue;
1316
1317 lock_zone(z);
1318
55e303ae
A
1319 elt_size = z->elem_size;
1320
1c79356b
A
1321 /*
1322 * Do a quick feasability check before we scan the zone:
91447636
A
1323 * skip unless there is likelihood of getting pages back
1324 * (i.e we need a whole allocation block's worth of free
1325 * elements before we can garbage collect) and
1326 * the zone has more than 10 percent of it's elements free
2d21ac55 1327 * or the element size is a multiple of the PAGE_SIZE
1c79356b 1328 */
2d21ac55
A
1329 if ((elt_size & PAGE_MASK) &&
1330 (((z->cur_size - z->count * elt_size) <= (2 * z->alloc_size)) ||
1331 ((z->cur_size - z->count * elt_size) <= (z->cur_size / 10)))) {
1c79356b
A
1332 unlock_zone(z);
1333 continue;
1334 }
1335
a3d08fcd
A
1336 z->doing_gc = TRUE;
1337
55e303ae
A
1338 /*
1339 * Snatch all of the free elements away from the zone.
1c79356b 1340 */
1c79356b 1341
55e303ae 1342 scan = (void *)z->free_elements;
0c530ab8 1343 z->free_elements = 0;
55e303ae
A
1344
1345 unlock_zone(z);
1346
1347 /*
1348 * Pass 1:
1349 *
1350 * Determine which elements we can attempt to collect
1351 * and count them up in the page table. Foreign elements
1352 * are returned to the zone.
1c79356b 1353 */
55e303ae
A
1354
1355 prev = (void *)&scan;
1356 elt = scan;
1357 n = 0; tail = keep = NULL;
1358 while (elt != NULL) {
1359 if (from_zone_map(elt, elt_size)) {
1360 zone_page_collect((vm_offset_t)elt, elt_size);
1361
1c79356b
A
1362 prev = elt;
1363 elt = elt->next;
55e303ae
A
1364
1365 ++zgc_stats.elems_collected;
1c79356b 1366 }
55e303ae
A
1367 else {
1368 if (keep == NULL)
1369 keep = tail = elt;
2d21ac55
A
1370 else {
1371 ADD_ELEMENT(z, tail, elt);
1372 tail = elt;
1373 }
55e303ae 1374
2d21ac55
A
1375 ADD_ELEMENT(z, prev, elt->next);
1376 elt = elt->next;
1377 ADD_ELEMENT(z, tail, NULL);
1c79356b 1378 }
1c79356b 1379
55e303ae
A
1380 /*
1381 * Dribble back the elements we are keeping.
1382 */
1383
a3d08fcd
A
1384 if (++n >= 50) {
1385 if (z->waiting == TRUE) {
1386 lock_zone(z);
55e303ae 1387
a3d08fcd 1388 if (keep != NULL) {
2d21ac55 1389 ADD_LIST_TO_ZONE(z, keep, tail);
a3d08fcd
A
1390 tail = keep = NULL;
1391 } else {
1392 m =0;
1393 base_elt = elt;
1394 base_prev = prev;
1395 while ((elt != NULL) && (++m < 50)) {
1396 prev = elt;
1397 elt = elt->next;
1398 }
1399 if (m !=0 ) {
2d21ac55
A
1400 ADD_LIST_TO_ZONE(z, base_elt, prev);
1401 ADD_ELEMENT(z, base_prev, elt);
a3d08fcd
A
1402 prev = base_prev;
1403 }
1404 }
55e303ae 1405
a3d08fcd
A
1406 if (z->waiting) {
1407 z->waiting = FALSE;
1408 zone_wakeup(z);
1409 }
55e303ae 1410
a3d08fcd
A
1411 unlock_zone(z);
1412 }
1413 n =0;
55e303ae
A
1414 }
1415 }
1416
1417 /*
1418 * Return any remaining elements.
1419 */
1420
1421 if (keep != NULL) {
1422 lock_zone(z);
1423
2d21ac55 1424 ADD_LIST_TO_ZONE(z, keep, tail);
55e303ae
A
1425
1426 unlock_zone(z);
1427 }
1428
1429 /*
1430 * Pass 2:
1431 *
1432 * Determine which pages we can reclaim and
1433 * free those elements.
1434 */
1435
1436 size_freed = 0;
55e303ae
A
1437 elt = scan;
1438 n = 0; tail = keep = NULL;
1439 while (elt != NULL) {
1440 if (zone_page_collectable((vm_offset_t)elt, elt_size)) {
1441 size_freed += elt_size;
1442 zone_page_free_element(&zone_free_pages,
1443 (vm_offset_t)elt, elt_size);
1444
2d21ac55 1445 elt = elt->next;
55e303ae
A
1446
1447 ++zgc_stats.elems_freed;
1448 }
1449 else {
1450 zone_page_keep((vm_offset_t)elt, elt_size);
1451
1452 if (keep == NULL)
1453 keep = tail = elt;
2d21ac55
A
1454 else {
1455 ADD_ELEMENT(z, tail, elt);
1456 tail = elt;
1457 }
55e303ae 1458
2d21ac55
A
1459 elt = elt->next;
1460 ADD_ELEMENT(z, tail, NULL);
55e303ae
A
1461
1462 ++zgc_stats.elems_kept;
1463 }
1464
1465 /*
1466 * Dribble back the elements we are keeping,
1467 * and update the zone size info.
1468 */
1469
a3d08fcd 1470 if (++n >= 50) {
55e303ae
A
1471 lock_zone(z);
1472
1473 z->cur_size -= size_freed;
1474 size_freed = 0;
1475
a3d08fcd 1476 if (keep != NULL) {
2d21ac55 1477 ADD_LIST_TO_ZONE(z, keep, tail);
a3d08fcd
A
1478 }
1479
1480 if (z->waiting) {
1481 z->waiting = FALSE;
1482 zone_wakeup(z);
1483 }
55e303ae
A
1484
1485 unlock_zone(z);
1486
1487 n = 0; tail = keep = NULL;
1488 }
1489 }
1490
1491 /*
1492 * Return any remaining elements, and update
1493 * the zone size info.
1494 */
1495
a3d08fcd
A
1496 lock_zone(z);
1497
55e303ae 1498 if (size_freed > 0 || keep != NULL) {
55e303ae
A
1499
1500 z->cur_size -= size_freed;
1501
1502 if (keep != NULL) {
2d21ac55 1503 ADD_LIST_TO_ZONE(z, keep, tail);
55e303ae
A
1504 }
1505
55e303ae 1506 }
a3d08fcd
A
1507
1508 z->doing_gc = FALSE;
1509 if (z->waiting) {
1510 z->waiting = FALSE;
1511 zone_wakeup(z);
1512 }
1513 unlock_zone(z);
1c79356b
A
1514 }
1515
55e303ae
A
1516 /*
1517 * Reclaim the pages we are freeing.
1518 */
1c79356b 1519
55e303ae
A
1520 while ((zp = zone_free_pages) != NULL) {
1521 zone_free_pages = zp->link;
2d21ac55
A
1522#if ZONE_ALIAS_ADDR
1523 z = zone_virtual_addr((vm_map_address_t)z);
1524#endif
55e303ae
A
1525 kmem_free(zone_map, zone_map_min_address + PAGE_SIZE *
1526 (zp - zone_page_table), PAGE_SIZE);
1527 ++zgc_stats.pgs_freed;
1c79356b 1528 }
55e303ae 1529
1c79356b
A
1530 mutex_unlock(&zone_gc_lock);
1531}
1532
1c79356b
A
1533/*
1534 * consider_zone_gc:
1535 *
1536 * Called by the pageout daemon when the system needs more free pages.
1537 */
1538
1539void
1540consider_zone_gc(void)
1541{
1542 /*
1543 * By default, don't attempt zone GC more frequently
91447636 1544 * than once / 1 minutes.
1c79356b
A
1545 */
1546
1547 if (zone_gc_max_rate == 0)
91447636 1548 zone_gc_max_rate = (60 << SCHED_TICK_SHIFT) + 1;
1c79356b
A
1549
1550 if (zone_gc_allowed &&
0b4e3aa0
A
1551 ((sched_tick > (zone_gc_last_tick + zone_gc_max_rate)) ||
1552 zone_gc_forced)) {
1553 zone_gc_forced = FALSE;
1c79356b
A
1554 zone_gc_last_tick = sched_tick;
1555 zone_gc();
1556 }
1557}
1558
2d21ac55
A
1559struct fake_zone_info {
1560 const char* name;
1561 void (*func)(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *,
1562 int *, int *);
1563};
1564
1565static struct fake_zone_info fake_zones[] = {
1566 {
1567 .name = "kernel_stacks",
1568 .func = stack_fake_zone_info,
1569 },
1570#ifdef ppc
1571 {
1572 .name = "save_areas",
1573 .func = save_fake_zone_info,
1574 },
1575 {
1576 .name = "pmap_mappings",
1577 .func = mapping_fake_zone_info,
1578 },
1579#endif /* ppc */
1580#ifdef i386
1581 {
1582 .name = "page_tables",
1583 .func = pt_fake_zone_info,
1584 },
1585#endif /* i386 */
1586 {
1587 .name = "kalloc.large",
1588 .func = kalloc_fake_zone_info,
1589 },
1590};
1c79356b
A
1591
1592kern_return_t
1593host_zone_info(
1594 host_t host,
1595 zone_name_array_t *namesp,
1596 mach_msg_type_number_t *namesCntp,
1597 zone_info_array_t *infop,
1598 mach_msg_type_number_t *infoCntp)
1599{
1600 zone_name_t *names;
1601 vm_offset_t names_addr;
1602 vm_size_t names_size;
1603 zone_info_t *info;
1604 vm_offset_t info_addr;
1605 vm_size_t info_size;
1606 unsigned int max_zones, i;
1607 zone_t z;
1608 zone_name_t *zn;
1609 zone_info_t *zi;
1610 kern_return_t kr;
2d21ac55 1611 size_t num_fake_zones;
1c79356b
A
1612
1613 if (host == HOST_NULL)
1614 return KERN_INVALID_HOST;
1615
2d21ac55
A
1616 num_fake_zones = sizeof fake_zones / sizeof fake_zones[0];
1617
1c79356b
A
1618 /*
1619 * We assume that zones aren't freed once allocated.
1620 * We won't pick up any zones that are allocated later.
1621 */
1622
1623 simple_lock(&all_zones_lock);
2d21ac55 1624 max_zones = num_zones + num_fake_zones;
1c79356b
A
1625 z = first_zone;
1626 simple_unlock(&all_zones_lock);
1627
1628 if (max_zones <= *namesCntp) {
1629 /* use in-line memory */
91447636 1630 names_size = *namesCntp * sizeof *names;
1c79356b
A
1631 names = *namesp;
1632 } else {
91447636 1633 names_size = round_page(max_zones * sizeof *names);
1c79356b
A
1634 kr = kmem_alloc_pageable(ipc_kernel_map,
1635 &names_addr, names_size);
1636 if (kr != KERN_SUCCESS)
1637 return kr;
1638 names = (zone_name_t *) names_addr;
1639 }
1640
1641 if (max_zones <= *infoCntp) {
1642 /* use in-line memory */
91447636 1643 info_size = *infoCntp * sizeof *info;
1c79356b
A
1644 info = *infop;
1645 } else {
91447636 1646 info_size = round_page(max_zones * sizeof *info);
1c79356b
A
1647 kr = kmem_alloc_pageable(ipc_kernel_map,
1648 &info_addr, info_size);
1649 if (kr != KERN_SUCCESS) {
1650 if (names != *namesp)
1651 kmem_free(ipc_kernel_map,
1652 names_addr, names_size);
1653 return kr;
1654 }
1655
1656 info = (zone_info_t *) info_addr;
1657 }
1658 zn = &names[0];
1659 zi = &info[0];
1660
1661 for (i = 0; i < num_zones; i++) {
1662 struct zone zcopy;
1663
1664 assert(z != ZONE_NULL);
1665
1666 lock_zone(z);
1667 zcopy = *z;
1668 unlock_zone(z);
1669
1670 simple_lock(&all_zones_lock);
1671 z = z->next_zone;
1672 simple_unlock(&all_zones_lock);
1673
1674 /* assuming here the name data is static */
1675 (void) strncpy(zn->zn_name, zcopy.zone_name,
1676 sizeof zn->zn_name);
2d21ac55 1677 zn->zn_name[sizeof zn->zn_name - 1] = '\0';
1c79356b
A
1678
1679 zi->zi_count = zcopy.count;
1680 zi->zi_cur_size = zcopy.cur_size;
1681 zi->zi_max_size = zcopy.max_size;
1682 zi->zi_elem_size = zcopy.elem_size;
1683 zi->zi_alloc_size = zcopy.alloc_size;
1684 zi->zi_exhaustible = zcopy.exhaustible;
1685 zi->zi_collectable = zcopy.collectable;
1686
1687 zn++;
1688 zi++;
1689 }
0c530ab8 1690
2d21ac55
A
1691 /*
1692 * loop through the fake zones and fill them using the specialized
1693 * functions
1694 */
1695 for (i = 0; i < num_fake_zones; i++) {
1696 strncpy(zn->zn_name, fake_zones[i].name, sizeof zn->zn_name);
1697 zn->zn_name[sizeof zn->zn_name - 1] = '\0';
1698 fake_zones[i].func(&zi->zi_count, &zi->zi_cur_size,
1699 &zi->zi_max_size, &zi->zi_elem_size,
1700 &zi->zi_alloc_size, &zi->zi_collectable,
1701 &zi->zi_exhaustible);
1702 zn++;
1703 zi++;
1704 }
1c79356b
A
1705
1706 if (names != *namesp) {
1707 vm_size_t used;
1708 vm_map_copy_t copy;
1709
1710 used = max_zones * sizeof *names;
1711
1712 if (used != names_size)
1713 bzero((char *) (names_addr + used), names_size - used);
1714
91447636
A
1715 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
1716 (vm_map_size_t)names_size, TRUE, &copy);
1c79356b
A
1717 assert(kr == KERN_SUCCESS);
1718
1719 *namesp = (zone_name_t *) copy;
1720 }
1721 *namesCntp = max_zones;
1722
1723 if (info != *infop) {
1724 vm_size_t used;
1725 vm_map_copy_t copy;
1726
1727 used = max_zones * sizeof *info;
1728
1729 if (used != info_size)
1730 bzero((char *) (info_addr + used), info_size - used);
1731
91447636
A
1732 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
1733 (vm_map_size_t)info_size, TRUE, &copy);
1c79356b
A
1734 assert(kr == KERN_SUCCESS);
1735
1736 *infop = (zone_info_t *) copy;
1737 }
1738 *infoCntp = max_zones;
1739
1740 return KERN_SUCCESS;
1741}
1742
1743#if MACH_KDB
1744#include <ddb/db_command.h>
1745#include <ddb/db_output.h>
1746#include <kern/kern_print.h>
1747
1748const char *zone_labels =
1749"ENTRY COUNT TOT_SZ MAX_SZ ELT_SZ ALLOC_SZ NAME";
1750
1751/* Forwards */
1752void db_print_zone(
1753 zone_t addr);
1754
1755#if ZONE_DEBUG
1756void db_zone_check_active(
1757 zone_t zone);
1758void db_zone_print_active(
1759 zone_t zone);
1760#endif /* ZONE_DEBUG */
1761void db_zone_print_free(
1762 zone_t zone);
1763void
1764db_print_zone(
1765 zone_t addr)
1766{
1767 struct zone zcopy;
1768
1769 zcopy = *addr;
1770
1771 db_printf("%8x %8x %8x %8x %6x %8x %s ",
1772 addr, zcopy.count, zcopy.cur_size,
1773 zcopy.max_size, zcopy.elem_size,
1774 zcopy.alloc_size, zcopy.zone_name);
1775 if (zcopy.exhaustible)
1776 db_printf("H");
1777 if (zcopy.collectable)
1778 db_printf("C");
1779 if (zcopy.expandable)
1780 db_printf("X");
1781 db_printf("\n");
1782}
1783
1784/*ARGSUSED*/
1785void
2d21ac55
A
1786db_show_one_zone(db_expr_t addr, boolean_t have_addr,
1787 __unused db_expr_t count, __unused char *modif)
1c79356b 1788{
91447636 1789 struct zone *z = (zone_t)((char *)0 + addr);
1c79356b
A
1790
1791 if (z == ZONE_NULL || !have_addr){
1792 db_error("No Zone\n");
1793 /*NOTREACHED*/
1794 }
1795
1796 db_printf("%s\n", zone_labels);
1797 db_print_zone(z);
1798}
1799
1800/*ARGSUSED*/
1801void
2d21ac55
A
1802db_show_all_zones(__unused db_expr_t addr, boolean_t have_addr, db_expr_t count,
1803 __unused char *modif)
1c79356b
A
1804{
1805 zone_t z;
1806 unsigned total = 0;
1807
1808 /*
1809 * Don't risk hanging by unconditionally locking,
1810 * risk of incoherent data is small (zones aren't freed).
1811 */
1812 have_addr = simple_lock_try(&all_zones_lock);
1813 count = num_zones;
1814 z = first_zone;
1815 if (have_addr) {
1816 simple_unlock(&all_zones_lock);
1817 }
1818
1819 db_printf("%s\n", zone_labels);
1820 for ( ; count > 0; count--) {
1821 if (!z) {
1822 db_error("Mangled Zone List\n");
1823 /*NOTREACHED*/
1824 }
1825 db_print_zone(z);
1826 total += z->cur_size,
1827
1828 have_addr = simple_lock_try(&all_zones_lock);
1829 z = z->next_zone;
1830 if (have_addr) {
1831 simple_unlock(&all_zones_lock);
1832 }
1833 }
1834 db_printf("\nTotal %8x", total);
55e303ae 1835 db_printf("\n\nzone_gc() has reclaimed %d pages\n", zgc_stats.pgs_freed);
1c79356b
A
1836}
1837
1838#if ZONE_DEBUG
1839void
1840db_zone_check_active(
1841 zone_t zone)
1842{
1843 int count = 0;
1844 queue_t tmp_elem;
1845
1846 if (!zone_debug_enabled(zone) || !zone_check)
1847 return;
1848 tmp_elem = queue_first(&zone->active_zones);
1849 while (count < zone->count) {
1850 count++;
1851 if (tmp_elem == 0) {
2d21ac55 1852 printf("unexpected zero element, zone=%p, count=%d\n",
1c79356b
A
1853 zone, count);
1854 assert(FALSE);
1855 break;
1856 }
1857 if (queue_end(tmp_elem, &zone->active_zones)) {
2d21ac55 1858 printf("unexpected queue_end, zone=%p, count=%d\n",
1c79356b
A
1859 zone, count);
1860 assert(FALSE);
1861 break;
1862 }
1863 tmp_elem = queue_next(tmp_elem);
1864 }
1865 if (!queue_end(tmp_elem, &zone->active_zones)) {
2d21ac55 1866 printf("not at queue_end, zone=%p, tmp_elem=%p\n",
1c79356b
A
1867 zone, tmp_elem);
1868 assert(FALSE);
1869 }
1870}
1871
1872void
1873db_zone_print_active(
1874 zone_t zone)
1875{
1876 int count = 0;
1877 queue_t tmp_elem;
1878
1879 if (!zone_debug_enabled(zone)) {
2d21ac55 1880 printf("zone %p debug not enabled\n", zone);
1c79356b
A
1881 return;
1882 }
1883 if (!zone_check) {
1884 printf("zone_check FALSE\n");
1885 return;
1886 }
1887
2d21ac55 1888 printf("zone %p, active elements %d\n", zone, zone->count);
1c79356b
A
1889 printf("active list:\n");
1890 tmp_elem = queue_first(&zone->active_zones);
1891 while (count < zone->count) {
2d21ac55 1892 printf(" %p", tmp_elem);
1c79356b
A
1893 count++;
1894 if ((count % 6) == 0)
1895 printf("\n");
1896 if (tmp_elem == 0) {
1897 printf("\nunexpected zero element, count=%d\n", count);
1898 break;
1899 }
1900 if (queue_end(tmp_elem, &zone->active_zones)) {
1901 printf("\nunexpected queue_end, count=%d\n", count);
1902 break;
1903 }
1904 tmp_elem = queue_next(tmp_elem);
1905 }
1906 if (!queue_end(tmp_elem, &zone->active_zones))
2d21ac55 1907 printf("\nnot at queue_end, tmp_elem=%p\n", tmp_elem);
1c79356b
A
1908 else
1909 printf("\n");
1910}
1911#endif /* ZONE_DEBUG */
1912
1913void
1914db_zone_print_free(
1915 zone_t zone)
1916{
1917 int count = 0;
1918 int freecount;
1919 vm_offset_t elem;
1920
1921 freecount = zone_free_count(zone);
2d21ac55 1922 printf("zone %p, free elements %d\n", zone, freecount);
1c79356b
A
1923 printf("free list:\n");
1924 elem = zone->free_elements;
1925 while (count < freecount) {
1926 printf(" 0x%x", elem);
1927 count++;
1928 if ((count % 6) == 0)
1929 printf("\n");
1930 if (elem == 0) {
1931 printf("\nunexpected zero element, count=%d\n", count);
1932 break;
1933 }
1934 elem = *((vm_offset_t *)elem);
1935 }
1936 if (elem != 0)
1937 printf("\nnot at end of free list, elem=0x%x\n", elem);
1938 else
1939 printf("\n");
1940}
1941
1942#endif /* MACH_KDB */
1943
1944
1945#if ZONE_DEBUG
1946
1947/* should we care about locks here ? */
1948
1949#if MACH_KDB
91447636 1950void *
1c79356b
A
1951next_element(
1952 zone_t z,
91447636 1953 void *prev)
1c79356b 1954{
91447636
A
1955 char *elt = (char *)prev;
1956
1c79356b 1957 if (!zone_debug_enabled(z))
2d21ac55 1958 return(NULL);
55e303ae 1959 elt -= ZONE_DEBUG_OFFSET;
91447636 1960 elt = (char *) queue_next((queue_t) elt);
1c79356b 1961 if ((queue_t) elt == &z->active_zones)
2d21ac55 1962 return(NULL);
55e303ae 1963 elt += ZONE_DEBUG_OFFSET;
1c79356b
A
1964 return(elt);
1965}
1966
91447636 1967void *
1c79356b
A
1968first_element(
1969 zone_t z)
1970{
91447636 1971 char *elt;
1c79356b
A
1972
1973 if (!zone_debug_enabled(z))
2d21ac55 1974 return(NULL);
1c79356b 1975 if (queue_empty(&z->active_zones))
2d21ac55 1976 return(NULL);
91447636 1977 elt = (char *)queue_first(&z->active_zones);
55e303ae 1978 elt += ZONE_DEBUG_OFFSET;
1c79356b
A
1979 return(elt);
1980}
1981
1982/*
1983 * Second arg controls how many zone elements are printed:
1984 * 0 => none
1985 * n, n < 0 => all
1986 * n, n > 0 => last n on active list
1987 */
1988int
1989zone_count(
1990 zone_t z,
1991 int tail)
1992{
91447636 1993 void *elt;
1c79356b
A
1994 int count = 0;
1995 boolean_t print = (tail != 0);
1996
1997 if (tail < 0)
1998 tail = z->count;
1999 if (z->count < tail)
2000 tail = 0;
2001 tail = z->count - tail;
2002 for (elt = first_element(z); elt; elt = next_element(z, elt)) {
2003 if (print && tail <= count)
2004 db_printf("%8x\n", elt);
2005 count++;
2006 }
2007 assert(count == z->count);
2008 return(count);
2009}
2010#endif /* MACH_KDB */
2011
2012#define zone_in_use(z) ( z->count || z->free_elements )
2013
2014void
2015zone_debug_enable(
2016 zone_t z)
2017{
2018 if (zone_debug_enabled(z) || zone_in_use(z) ||
55e303ae 2019 z->alloc_size < (z->elem_size + ZONE_DEBUG_OFFSET))
1c79356b
A
2020 return;
2021 queue_init(&z->active_zones);
55e303ae 2022 z->elem_size += ZONE_DEBUG_OFFSET;
1c79356b
A
2023}
2024
2025void
2026zone_debug_disable(
2027 zone_t z)
2028{
2029 if (!zone_debug_enabled(z) || zone_in_use(z))
2030 return;
55e303ae 2031 z->elem_size -= ZONE_DEBUG_OFFSET;
2d21ac55 2032 z->active_zones.next = z->active_zones.prev = NULL;
1c79356b
A
2033}
2034#endif /* ZONE_DEBUG */