2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr.
63 * General kernel memory allocator. This allocator is designed
64 * to be used by the kernel to manage dynamic memory fast.
67 #include <zone_debug.h>
69 #include <mach/boolean.h>
70 #include <mach/machine/vm_types.h>
71 #include <mach/vm_param.h>
72 #include <kern/misc_protos.h>
73 #include <kern/zalloc.h>
74 #include <kern/kalloc.h>
75 #include <kern/lock.h>
76 #include <vm/vm_kern.h>
77 #include <vm/vm_object.h>
78 #include <vm/vm_map.h>
79 #include <libkern/OSMalloc.h>
82 zone_t
kalloc_zone(vm_size_t
);
85 #define KALLOC_MAP_SIZE_MIN (16 * 1024 * 1024)
86 #define KALLOC_MAP_SIZE_MAX (128 * 1024 * 1024)
89 vm_size_t kalloc_max_prerounded
;
90 vm_size_t kalloc_kernmap_size
; /* size of kallocs that can come from kernel map */
92 unsigned int kalloc_large_inuse
;
93 vm_size_t kalloc_large_total
;
94 vm_size_t kalloc_large_max
;
95 vm_size_t kalloc_largest_allocated
= 0;
96 uint64_t kalloc_large_sum
;
98 int kalloc_fake_zone_index
= -1; /* index of our fake zone in statistics arrays */
100 vm_offset_t kalloc_map_min
;
101 vm_offset_t kalloc_map_max
;
105 * Diagnostic code to track mutexes separately rather than via the 2^ zones
111 KALLOC_ZINFO_SALLOC(vm_size_t bytes
)
113 thread_t thr
= current_thread();
117 thr
->tkm_shared
.alloc
+= bytes
;
118 if (kalloc_fake_zone_index
!= -1 &&
119 (task
= thr
->task
) != NULL
&& (zinfo
= task
->tkm_zinfo
) != NULL
)
120 zinfo
[kalloc_fake_zone_index
].alloc
+= bytes
;
124 KALLOC_ZINFO_SFREE(vm_size_t bytes
)
126 thread_t thr
= current_thread();
130 thr
->tkm_shared
.free
+= bytes
;
131 if (kalloc_fake_zone_index
!= -1 &&
132 (task
= thr
->task
) != NULL
&& (zinfo
= task
->tkm_zinfo
) != NULL
)
133 zinfo
[kalloc_fake_zone_index
].free
+= bytes
;
137 * All allocations of size less than kalloc_max are rounded to the
138 * next highest power of 2. This allocator is built on top of
139 * the zone allocator. A zone is created for each potential size
140 * that we are willing to get in small blocks.
142 * We assume that kalloc_max is not greater than 64K;
143 * thus 16 is a safe array size for k_zone and k_zone_name.
145 * Note that kalloc_max is somewhat confusingly named.
146 * It represents the first power of two for which no zone exists.
147 * kalloc_max_prerounded is the smallest allocation size, before
148 * rounding, for which no zone exists.
149 * Also if the allocation size is more than kalloc_kernmap_size
150 * then allocate from kernel map rather than kalloc_map.
153 int first_k_zone
= -1;
154 struct zone
*k_zone
[16];
155 static const char *k_zone_name
[16] = {
156 "kalloc.1", "kalloc.2",
157 "kalloc.4", "kalloc.8",
158 "kalloc.16", "kalloc.32",
159 "kalloc.64", "kalloc.128",
160 "kalloc.256", "kalloc.512",
161 "kalloc.1024", "kalloc.2048",
162 "kalloc.4096", "kalloc.8192",
163 "kalloc.16384", "kalloc.32768"
167 * Max number of elements per zone. zinit rounds things up correctly
168 * Doing things this way permits each zone to have a different maximum size
169 * based on need, rather than just guessing; it also
170 * means its patchable in case you're wrong!
172 unsigned long k_zone_max
[16] = {
183 1024, /* 1024 Byte */
184 1024, /* 2048 Byte */
185 1024, /* 4096 Byte */
186 4096, /* 8192 Byte */
191 /* forward declarations */
192 void * kalloc_canblock(
197 lck_grp_t
*kalloc_lck_grp
;
198 lck_mtx_t kalloc_lock
;
200 #define kalloc_spin_lock() lck_mtx_lock_spin(&kalloc_lock)
201 #define kalloc_unlock() lck_mtx_unlock(&kalloc_lock)
204 /* OSMalloc local data declarations */
206 queue_head_t OSMalloc_tag_list
;
208 lck_grp_t
*OSMalloc_tag_lck_grp
;
209 lck_mtx_t OSMalloc_tag_lock
;
211 #define OSMalloc_tag_spin_lock() lck_mtx_lock_spin(&OSMalloc_tag_lock)
212 #define OSMalloc_tag_unlock() lck_mtx_unlock(&OSMalloc_tag_lock)
215 /* OSMalloc forward declarations */
216 void OSMalloc_init(void);
217 void OSMalloc_Tagref(OSMallocTag tag
);
218 void OSMalloc_Tagrele(OSMallocTag tag
);
221 * Initialize the memory allocator. This should be called only
222 * once on a system wide basis (i.e. first processor to get here
223 * does the initialization).
225 * This initializes all of the zones.
232 kern_return_t retval
;
234 vm_size_t size
, kalloc_map_size
;
238 * Scale the kalloc_map_size to physical memory size: stay below
239 * 1/8th the total zone map size, or 128 MB (for a 32-bit kernel).
241 kalloc_map_size
= (vm_size_t
)(sane_size
>> 5);
243 if (kalloc_map_size
> KALLOC_MAP_SIZE_MAX
)
244 kalloc_map_size
= KALLOC_MAP_SIZE_MAX
;
245 #endif /* !__LP64__ */
246 if (kalloc_map_size
< KALLOC_MAP_SIZE_MIN
)
247 kalloc_map_size
= KALLOC_MAP_SIZE_MIN
;
249 retval
= kmem_suballoc(kernel_map
, &min
, kalloc_map_size
,
250 FALSE
, VM_FLAGS_ANYWHERE
| VM_FLAGS_PERMANENT
,
253 if (retval
!= KERN_SUCCESS
)
254 panic("kalloc_init: kmem_suballoc failed");
256 kalloc_map_min
= min
;
257 kalloc_map_max
= min
+ kalloc_map_size
- 1;
260 * Ensure that zones up to size 8192 bytes exist.
261 * This is desirable because messages are allocated
262 * with kalloc, and messages up through size 8192 are common.
265 if (PAGE_SIZE
< 16*1024)
266 kalloc_max
= 16*1024;
268 kalloc_max
= PAGE_SIZE
;
269 kalloc_max_prerounded
= kalloc_max
/ 2 + 1;
270 /* size it to be more than 16 times kalloc_max (256k) for allocations from kernel map */
271 kalloc_kernmap_size
= (kalloc_max
* 16) + 1;
272 kalloc_largest_allocated
= kalloc_kernmap_size
;
275 * Allocate a zone for each size we are going to handle.
276 * We specify non-paged memory. Don't charge the caller
277 * for the allocation, as we aren't sure how the memory
280 for (i
= 0, size
= 1; size
< kalloc_max
; i
++, size
<<= 1) {
281 if (size
< KALLOC_MINSIZE
) {
285 if (size
== KALLOC_MINSIZE
) {
288 k_zone
[i
] = zinit(size
, k_zone_max
[i
] * size
, size
,
290 zone_change(k_zone
[i
], Z_CALLERACCT
, FALSE
);
292 kalloc_lck_grp
= lck_grp_alloc_init("kalloc.large", LCK_GRP_ATTR_NULL
);
293 lck_mtx_init(&kalloc_lock
, kalloc_lck_grp
, LCK_ATTR_NULL
);
296 lck_mtx_zone
= zinit(sizeof(struct _lck_mtx_
), 1024*256, 4096, "lck_mtx");
307 register vm_size_t allocsize
;
308 vm_map_t alloc_map
= VM_MAP_NULL
;
311 * If size is too large for a zone, then use kmem_alloc.
312 * (We use kmem_alloc instead of kmem_alloc_kobject so that
313 * krealloc can use kmem_realloc.)
316 if (size
>= kalloc_max_prerounded
) {
319 /* kmem_alloc could block so we return if noblock */
324 if (size
>= kalloc_kernmap_size
)
325 alloc_map
= kernel_map
;
327 alloc_map
= kalloc_map
;
329 if (kmem_alloc(alloc_map
, (vm_offset_t
*)&addr
, size
) != KERN_SUCCESS
) {
330 if (alloc_map
!= kernel_map
) {
331 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&addr
, size
) != KERN_SUCCESS
)
341 * Thread-safe version of the workaround for 4740071
344 if (size
> kalloc_largest_allocated
)
345 kalloc_largest_allocated
= size
;
347 kalloc_large_inuse
++;
348 kalloc_large_total
+= size
;
349 kalloc_large_sum
+= size
;
351 if (kalloc_large_total
> kalloc_large_max
)
352 kalloc_large_max
= kalloc_large_total
;
356 KALLOC_ZINFO_SALLOC(size
);
361 /* compute the size of the block that we will actually allocate */
363 allocsize
= KALLOC_MINSIZE
;
364 zindex
= first_k_zone
;
365 while (allocsize
< size
) {
370 /* allocate from the appropriate zone */
371 assert(allocsize
< kalloc_max
);
372 return(zalloc_canblock(k_zone
[zindex
], canblock
));
379 return( kalloc_canblock(size
, TRUE
) );
386 return( kalloc_canblock(size
, FALSE
) );
398 register vm_size_t allocsize
;
400 vm_map_t alloc_map
= VM_MAP_NULL
;
402 /* can only be used for increasing allocation size */
404 assert(new_size
> old_size
);
406 /* if old_size is zero, then we are simply allocating */
410 naddr
= kalloc(new_size
);
416 /* if old block was kmem_alloc'd, then use kmem_realloc if necessary */
418 if (old_size
>= kalloc_max_prerounded
) {
419 if (old_size
>= kalloc_kernmap_size
)
420 alloc_map
= kernel_map
;
422 alloc_map
= kalloc_map
;
424 old_size
= round_page(old_size
);
425 new_size
= round_page(new_size
);
426 if (new_size
> old_size
) {
428 if (KERN_SUCCESS
!= kmem_realloc(alloc_map
,
429 (vm_offset_t
)*addrp
, old_size
,
430 (vm_offset_t
*)&naddr
, new_size
))
431 panic("krealloc: kmem_realloc");
434 *addrp
= (void *) naddr
;
436 /* kmem_realloc() doesn't free old page range. */
437 kmem_free(alloc_map
, (vm_offset_t
)*addrp
, old_size
);
439 kalloc_large_total
+= (new_size
- old_size
);
440 kalloc_large_sum
+= (new_size
- old_size
);
442 if (kalloc_large_total
> kalloc_large_max
)
443 kalloc_large_max
= kalloc_large_total
;
449 /* compute the size of the block that we actually allocated */
451 allocsize
= KALLOC_MINSIZE
;
452 zindex
= first_k_zone
;
453 while (allocsize
< old_size
) {
458 /* if new size fits in old block, then return */
460 if (new_size
<= allocsize
) {
464 /* if new size does not fit in zone, kmem_alloc it, else zalloc it */
467 if (new_size
>= kalloc_max_prerounded
) {
468 if (new_size
>= kalloc_kernmap_size
)
469 alloc_map
= kernel_map
;
471 alloc_map
= kalloc_map
;
472 if (KERN_SUCCESS
!= kmem_alloc(alloc_map
,
473 (vm_offset_t
*)&naddr
, new_size
)) {
474 panic("krealloc: kmem_alloc");
481 kalloc_large_inuse
++;
482 kalloc_large_sum
+= new_size
;
483 kalloc_large_total
+= new_size
;
485 if (kalloc_large_total
> kalloc_large_max
)
486 kalloc_large_max
= kalloc_large_total
;
490 KALLOC_ZINFO_SALLOC(new_size
);
492 register int new_zindex
;
495 new_zindex
= zindex
+ 1;
496 while (allocsize
< new_size
) {
500 naddr
= zalloc(k_zone
[new_zindex
]);
504 /* copy existing data */
506 bcopy((const char *)*addrp
, (char *)naddr
, old_size
);
508 /* free old block, and return */
510 zfree(k_zone
[zindex
], *addrp
);
512 /* set up new address */
514 *addrp
= (void *) naddr
;
523 register vm_size_t allocsize
;
525 /* size must not be too large for a zone */
527 if (size
>= kalloc_max_prerounded
) {
528 /* This will never work, so we might as well panic */
532 /* compute the size of the block that we will actually allocate */
534 allocsize
= KALLOC_MINSIZE
;
535 zindex
= first_k_zone
;
536 while (allocsize
< size
) {
541 /* allocate from the appropriate zone */
543 assert(allocsize
< kalloc_max
);
544 return(zget(k_zone
[zindex
]));
547 volatile SInt32 kfree_nop_count
= 0;
555 register vm_size_t freesize
;
556 vm_map_t alloc_map
= kernel_map
;
558 /* if size was too large for a zone, then use kmem_free */
560 if (size
>= kalloc_max_prerounded
) {
561 if ((((vm_offset_t
) data
) >= kalloc_map_min
) && (((vm_offset_t
) data
) <= kalloc_map_max
))
562 alloc_map
= kalloc_map
;
563 if (size
> kalloc_largest_allocated
) {
565 * work around double FREEs of small MALLOCs
566 * this use to end up being a nop
567 * since the pointer being freed from an
568 * alloc backed by the zalloc world could
569 * never show up in the kalloc_map... however,
570 * the kernel_map is a different issue... since it
571 * was released back into the zalloc pool, a pointer
572 * would have gotten written over the 'size' that
573 * the MALLOC was retaining in the first 4 bytes of
574 * the underlying allocation... that pointer ends up
575 * looking like a really big size on the 2nd FREE and
576 * pushes the kfree into the kernel_map... we
577 * end up removing a ton of virutal space before we panic
578 * this check causes us to ignore the kfree for a size
579 * that must be 'bogus'... note that it might not be due
580 * to the above scenario, but it would still be wrong and
581 * cause serious damage.
584 OSAddAtomic(1, &kfree_nop_count
);
587 kmem_free(alloc_map
, (vm_offset_t
)data
, size
);
591 kalloc_large_total
-= size
;
592 kalloc_large_inuse
--;
596 KALLOC_ZINFO_SFREE(size
);
600 /* compute the size of the block that we actually allocated from */
602 freesize
= KALLOC_MINSIZE
;
603 zindex
= first_k_zone
;
604 while (freesize
< size
) {
609 /* free to the appropriate zone */
611 assert(freesize
< kalloc_max
);
612 zfree(k_zone
[zindex
], data
);
620 register int zindex
= 0;
621 register vm_size_t allocsize
;
623 /* compute the size of the block that we will actually allocate */
626 if (size
<= kalloc_max
) {
627 allocsize
= KALLOC_MINSIZE
;
628 zindex
= first_k_zone
;
629 while (allocsize
< size
) {
633 return (k_zone
[zindex
]);
640 kalloc_fake_zone_init(int zone_index
)
642 kalloc_fake_zone_index
= zone_index
;
646 kalloc_fake_zone_info(int *count
,
647 vm_size_t
*cur_size
, vm_size_t
*max_size
, vm_size_t
*elem_size
, vm_size_t
*alloc_size
,
648 uint64_t *sum_size
, int *collectable
, int *exhaustable
, int *caller_acct
)
650 *count
= kalloc_large_inuse
;
651 *cur_size
= kalloc_large_total
;
652 *max_size
= kalloc_large_max
;
654 if (kalloc_large_inuse
) {
655 *elem_size
= kalloc_large_total
/ kalloc_large_inuse
;
656 *alloc_size
= kalloc_large_total
/ kalloc_large_inuse
;
661 *sum_size
= kalloc_large_sum
;
672 queue_init(&OSMalloc_tag_list
);
674 OSMalloc_tag_lck_grp
= lck_grp_alloc_init("OSMalloc_tag", LCK_GRP_ATTR_NULL
);
675 lck_mtx_init(&OSMalloc_tag_lock
, OSMalloc_tag_lck_grp
, LCK_ATTR_NULL
);
685 OSMTag
= (OSMallocTag
)kalloc(sizeof(*OSMTag
));
687 bzero((void *)OSMTag
, sizeof(*OSMTag
));
689 if (flags
& OSMT_PAGEABLE
)
690 OSMTag
->OSMT_attr
= OSMT_ATTR_PAGEABLE
;
692 OSMTag
->OSMT_refcnt
= 1;
694 strncpy(OSMTag
->OSMT_name
, str
, OSMT_MAX_NAME
);
696 OSMalloc_tag_spin_lock();
697 enqueue_tail(&OSMalloc_tag_list
, (queue_entry_t
)OSMTag
);
698 OSMalloc_tag_unlock();
699 OSMTag
->OSMT_state
= OSMT_VALID
;
707 if (!((tag
->OSMT_state
& OSMT_VALID_MASK
) == OSMT_VALID
))
708 panic("OSMalloc_Tagref(): bad state 0x%08X\n",tag
->OSMT_state
);
710 (void)hw_atomic_add(&tag
->OSMT_refcnt
, 1);
717 if (!((tag
->OSMT_state
& OSMT_VALID_MASK
) == OSMT_VALID
))
718 panic("OSMalloc_Tagref(): bad state 0x%08X\n",tag
->OSMT_state
);
720 if (hw_atomic_sub(&tag
->OSMT_refcnt
, 1) == 0) {
721 if (hw_compare_and_store(OSMT_VALID
|OSMT_RELEASED
, OSMT_VALID
|OSMT_RELEASED
, &tag
->OSMT_state
)) {
722 OSMalloc_tag_spin_lock();
723 (void)remque((queue_entry_t
)tag
);
724 OSMalloc_tag_unlock();
725 kfree((void*)tag
, sizeof(*tag
));
727 panic("OSMalloc_Tagrele(): refcnt 0\n");
735 if (!hw_compare_and_store(OSMT_VALID
, OSMT_VALID
|OSMT_RELEASED
, &tag
->OSMT_state
))
736 panic("OSMalloc_Tagfree(): bad state 0x%08X\n", tag
->OSMT_state
);
738 if (hw_atomic_sub(&tag
->OSMT_refcnt
, 1) == 0) {
739 OSMalloc_tag_spin_lock();
740 (void)remque((queue_entry_t
)tag
);
741 OSMalloc_tag_unlock();
742 kfree((void*)tag
, sizeof(*tag
));
754 OSMalloc_Tagref(tag
);
755 if ((tag
->OSMT_attr
& OSMT_PAGEABLE
)
756 && (size
& ~PAGE_MASK
)) {
758 if ((kr
= kmem_alloc_pageable(kernel_map
, (vm_offset_t
*)&addr
, size
)) != KERN_SUCCESS
)
761 addr
= kalloc((vm_size_t
)size
);
764 OSMalloc_Tagrele(tag
);
776 if (tag
->OSMT_attr
& OSMT_PAGEABLE
)
779 OSMalloc_Tagref(tag
);
780 /* XXX: use non-blocking kalloc for now */
781 addr
= kalloc_noblock((vm_size_t
)size
);
783 OSMalloc_Tagrele(tag
);
795 if (tag
->OSMT_attr
& OSMT_PAGEABLE
)
798 OSMalloc_Tagref(tag
);
799 addr
= kalloc_noblock((vm_size_t
)size
);
801 OSMalloc_Tagrele(tag
);
812 if ((tag
->OSMT_attr
& OSMT_PAGEABLE
)
813 && (size
& ~PAGE_MASK
)) {
814 kmem_free(kernel_map
, (vm_offset_t
)addr
, size
);
816 kfree((void*)addr
, size
);
818 OSMalloc_Tagrele(tag
);