2 * Copyright (c) 2000-2011 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr.
63 * General kernel memory allocator. This allocator is designed
64 * to be used by the kernel to manage dynamic memory fast.
67 #include <zone_debug.h>
69 #include <mach/boolean.h>
70 #include <mach/machine/vm_types.h>
71 #include <mach/vm_param.h>
72 #include <kern/misc_protos.h>
73 #include <kern/zalloc.h>
74 #include <kern/kalloc.h>
75 #include <kern/ledger.h>
76 #include <vm/vm_kern.h>
77 #include <vm/vm_object.h>
78 #include <vm/vm_map.h>
79 #include <libkern/OSMalloc.h>
82 zone_t
kalloc_zone(vm_size_t
);
85 #define KALLOC_MAP_SIZE_MIN (16 * 1024 * 1024)
86 #define KALLOC_MAP_SIZE_MAX (128 * 1024 * 1024)
89 vm_size_t kalloc_max_prerounded
;
90 vm_size_t kalloc_kernmap_size
; /* size of kallocs that can come from kernel map */
92 unsigned int kalloc_large_inuse
;
93 vm_size_t kalloc_large_total
;
94 vm_size_t kalloc_large_max
;
95 vm_size_t kalloc_largest_allocated
= 0;
96 uint64_t kalloc_large_sum
;
98 int kalloc_fake_zone_index
= -1; /* index of our fake zone in statistics arrays */
100 vm_offset_t kalloc_map_min
;
101 vm_offset_t kalloc_map_max
;
105 * Diagnostic code to track mutexes separately rather than via the 2^ zones
111 KALLOC_ZINFO_SALLOC(vm_size_t bytes
)
113 thread_t thr
= current_thread();
117 ledger_debit(thr
->t_ledger
, task_ledgers
.tkm_shared
, bytes
);
119 if (kalloc_fake_zone_index
!= -1 &&
120 (task
= thr
->task
) != NULL
&& (zinfo
= task
->tkm_zinfo
) != NULL
)
121 zinfo
[kalloc_fake_zone_index
].alloc
+= bytes
;
125 KALLOC_ZINFO_SFREE(vm_size_t bytes
)
127 thread_t thr
= current_thread();
131 ledger_credit(thr
->t_ledger
, task_ledgers
.tkm_shared
, bytes
);
133 if (kalloc_fake_zone_index
!= -1 &&
134 (task
= thr
->task
) != NULL
&& (zinfo
= task
->tkm_zinfo
) != NULL
)
135 zinfo
[kalloc_fake_zone_index
].free
+= bytes
;
139 * All allocations of size less than kalloc_max are rounded to the
140 * next nearest sized zone. This allocator is built on top of
141 * the zone allocator. A zone is created for each potential size
142 * that we are willing to get in small blocks.
144 * We assume that kalloc_max is not greater than 64K;
146 * Note that kalloc_max is somewhat confusingly named.
147 * It represents the first power of two for which no zone exists.
148 * kalloc_max_prerounded is the smallest allocation size, before
149 * rounding, for which no zone exists.
151 * Also if the allocation size is more than kalloc_kernmap_size
152 * then allocate from kernel map rather than kalloc_map.
155 #if KALLOC_MINSIZE == 16 && KALLOC_LOG2_MINALIGN == 4
158 * "Legacy" aka "power-of-2" backing zones with 16-byte minimum
159 * size and alignment. Users of this profile would probably
160 * benefit from some tuning.
163 #define K_ZONE_SIZES \
175 #define K_ZONE_NAMES \
178 /* 6 */ "kalloc.64", \
181 /* 9 */ "kalloc.512", \
184 /* C */ "kalloc.4096"
186 #define K_ZONE_MAXIMA \
197 #elif KALLOC_MINSIZE == 8 && KALLOC_LOG2_MINALIGN == 3
200 * Tweaked for ARM (and x64) in 04/2011
203 #define K_ZONE_SIZES \
207 /* 6 */ 64, 88, 112, \
215 #define K_ZONE_NAMES \
216 /* 3 */ "kalloc.8", \
217 "kalloc.16", "kalloc.24", \
218 "kalloc.32", "kalloc.40", "kalloc.48", \
219 /* 6 */ "kalloc.64", "kalloc.88", "kalloc.112", \
220 "kalloc.128", "kalloc.192", \
221 "kalloc.256", "kalloc.384", \
222 /* 9 */ "kalloc.512", "kalloc.768", \
223 "kalloc.1024", "kalloc.1536", \
224 "kalloc.2048", "kalloc.3072", \
225 "kalloc.4096", "kalloc.6144"
227 #define K_ZONE_MAXIMA \
231 /* 6 */ 4096, 4096, 4096, \
234 /* 9 */ 1024, 1024, \
240 #error missing zone size parameters for kalloc
243 #define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN)
245 static const int k_zone_size
[] = {
252 #define N_K_ZONE (sizeof (k_zone_size) / sizeof (k_zone_size[0]))
255 * Many kalloc() allocations are for small structures containing a few
256 * pointers and longs - the k_zone_dlut[] direct lookup table, indexed by
257 * size normalized to the minimum alignment, finds the right zone index
258 * for them in one dereference.
261 #define INDEX_ZDLUT(size) \
262 (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN)
263 #define N_K_ZDLUT (2048 / KALLOC_MINALIGN)
264 /* covers sizes [0 .. 2048 - KALLOC_MINALIGN] */
265 #define MAX_SIZE_ZDLUT ((N_K_ZDLUT - 1) * KALLOC_MINALIGN)
267 static int8_t k_zone_dlut
[N_K_ZDLUT
]; /* table of indices into k_zone[] */
270 * If there's no hit in the DLUT, then start searching from k_zindex_start.
272 static int k_zindex_start
;
274 static zone_t k_zone
[N_K_ZONE
];
276 static const char *k_zone_name
[N_K_ZONE
] = {
280 /* F */ "kalloc.32768"
284 * Max number of elements per zone. zinit rounds things up correctly
285 * Doing things this way permits each zone to have a different maximum size
286 * based on need, rather than just guessing; it also
287 * means its patchable in case you're wrong!
289 unsigned int k_zone_max
[N_K_ZONE
] = {
296 /* #define KALLOC_DEBUG 1 */
298 /* forward declarations */
299 void * kalloc_canblock(
304 lck_grp_t
*kalloc_lck_grp
;
305 lck_mtx_t kalloc_lock
;
307 #define kalloc_spin_lock() lck_mtx_lock_spin(&kalloc_lock)
308 #define kalloc_unlock() lck_mtx_unlock(&kalloc_lock)
311 /* OSMalloc local data declarations */
313 queue_head_t OSMalloc_tag_list
;
315 lck_grp_t
*OSMalloc_tag_lck_grp
;
316 lck_mtx_t OSMalloc_tag_lock
;
318 #define OSMalloc_tag_spin_lock() lck_mtx_lock_spin(&OSMalloc_tag_lock)
319 #define OSMalloc_tag_unlock() lck_mtx_unlock(&OSMalloc_tag_lock)
322 /* OSMalloc forward declarations */
323 void OSMalloc_init(void);
324 void OSMalloc_Tagref(OSMallocTag tag
);
325 void OSMalloc_Tagrele(OSMallocTag tag
);
328 * Initialize the memory allocator. This should be called only
329 * once on a system wide basis (i.e. first processor to get here
330 * does the initialization).
332 * This initializes all of the zones.
339 kern_return_t retval
;
341 vm_size_t size
, kalloc_map_size
;
345 * Scale the kalloc_map_size to physical memory size: stay below
346 * 1/8th the total zone map size, or 128 MB (for a 32-bit kernel).
348 kalloc_map_size
= (vm_size_t
)(sane_size
>> 5);
350 if (kalloc_map_size
> KALLOC_MAP_SIZE_MAX
)
351 kalloc_map_size
= KALLOC_MAP_SIZE_MAX
;
352 #endif /* !__LP64__ */
353 if (kalloc_map_size
< KALLOC_MAP_SIZE_MIN
)
354 kalloc_map_size
= KALLOC_MAP_SIZE_MIN
;
356 retval
= kmem_suballoc(kernel_map
, &min
, kalloc_map_size
,
357 FALSE
, VM_FLAGS_ANYWHERE
| VM_FLAGS_PERMANENT
,
360 if (retval
!= KERN_SUCCESS
)
361 panic("kalloc_init: kmem_suballoc failed");
363 kalloc_map_min
= min
;
364 kalloc_map_max
= min
+ kalloc_map_size
- 1;
367 * Ensure that zones up to size 8192 bytes exist.
368 * This is desirable because messages are allocated
369 * with kalloc, and messages up through size 8192 are common.
372 if (PAGE_SIZE
< 16*1024)
373 kalloc_max
= 16*1024;
375 kalloc_max
= PAGE_SIZE
;
376 kalloc_max_prerounded
= kalloc_max
/ 2 + 1;
377 /* size it to be more than 16 times kalloc_max (256k) for allocations from kernel map */
378 kalloc_kernmap_size
= (kalloc_max
* 16) + 1;
379 kalloc_largest_allocated
= kalloc_kernmap_size
;
382 * Allocate a zone for each size we are going to handle.
383 * We specify non-paged memory. Don't charge the caller
384 * for the allocation, as we aren't sure how the memory
387 for (i
= 0; (size
= k_zone_size
[i
]) < kalloc_max
; i
++) {
388 k_zone
[i
] = zinit(size
, k_zone_max
[i
] * size
, size
,
390 zone_change(k_zone
[i
], Z_CALLERACCT
, FALSE
);
394 * Build the Direct LookUp Table for small allocations
396 for (i
= 0, size
= 0; i
<= N_K_ZDLUT
; i
++, size
+= KALLOC_MINALIGN
) {
399 while ((vm_size_t
)k_zone_size
[zindex
] < size
)
402 if (i
== N_K_ZDLUT
) {
403 k_zindex_start
= zindex
;
406 k_zone_dlut
[i
] = (int8_t)zindex
;
410 printf("kalloc_init: k_zindex_start %d\n", k_zindex_start
);
413 * Do a quick synthesis to see how well/badly we can
414 * find-a-zone for a given size.
415 * Useful when debugging/tweaking the array of zone sizes.
416 * Cache misses probably more critical than compare-branches!
418 for (i
= 0; i
< (int)N_K_ZONE
; i
++) {
419 vm_size_t testsize
= (vm_size_t
)k_zone_size
[i
] - 1;
423 if (testsize
< MAX_SIZE_ZDLUT
) {
424 compare
+= 1; /* 'if' (T) */
426 long dindex
= INDEX_ZDLUT(testsize
);
427 zindex
= (int)k_zone_dlut
[dindex
];
429 } else if (testsize
< kalloc_max_prerounded
) {
431 compare
+= 2; /* 'if' (F), 'if' (T) */
433 zindex
= k_zindex_start
;
434 while ((vm_size_t
)k_zone_size
[zindex
] < testsize
) {
436 compare
++; /* 'while' (T) */
438 compare
++; /* 'while' (F) */
440 break; /* not zone-backed */
442 zone_t z
= k_zone
[zindex
];
443 printf("kalloc_init: req size %4lu: %11s took %d compare%s\n",
444 (unsigned long)testsize
, z
->zone_name
, compare
,
445 compare
== 1 ? "" : "s");
448 kalloc_lck_grp
= lck_grp_alloc_init("kalloc.large", LCK_GRP_ATTR_NULL
);
449 lck_mtx_init(&kalloc_lock
, kalloc_lck_grp
, LCK_ATTR_NULL
);
452 lck_mtx_zone
= zinit(sizeof(struct _lck_mtx_
), 1024*256, 4096, "lck_mtx");
457 * Given an allocation size, return the kalloc zone it belongs to.
458 * Direct LookUp Table variant.
460 static __inline zone_t
461 get_zone_dlut(vm_size_t size
)
463 long dindex
= INDEX_ZDLUT(size
);
464 int zindex
= (int)k_zone_dlut
[dindex
];
465 return (k_zone
[zindex
]);
468 /* As above, but linear search k_zone_size[] for the next zone that fits. */
470 static __inline zone_t
471 get_zone_search(vm_size_t size
, int zindex
)
473 assert(size
< kalloc_max_prerounded
);
475 while ((vm_size_t
)k_zone_size
[zindex
] < size
)
478 assert((unsigned)zindex
< N_K_ZONE
&&
479 (vm_size_t
)k_zone_size
[zindex
] < kalloc_max
);
481 return (k_zone
[zindex
]);
491 if (size
< MAX_SIZE_ZDLUT
)
492 z
= get_zone_dlut(size
);
493 else if (size
< kalloc_max_prerounded
)
494 z
= get_zone_search(size
, k_zindex_start
);
497 * If size is too large for a zone, then use kmem_alloc.
498 * (We use kmem_alloc instead of kmem_alloc_kobject so that
499 * krealloc can use kmem_realloc.)
504 /* kmem_alloc could block so we return if noblock */
509 if (size
>= kalloc_kernmap_size
)
510 alloc_map
= kernel_map
;
512 alloc_map
= kalloc_map
;
514 if (kmem_alloc(alloc_map
, (vm_offset_t
*)&addr
, size
) != KERN_SUCCESS
) {
515 if (alloc_map
!= kernel_map
) {
516 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&addr
, size
) != KERN_SUCCESS
)
526 * Thread-safe version of the workaround for 4740071
529 if (size
> kalloc_largest_allocated
)
530 kalloc_largest_allocated
= size
;
532 kalloc_large_inuse
++;
533 kalloc_large_total
+= size
;
534 kalloc_large_sum
+= size
;
536 if (kalloc_large_total
> kalloc_large_max
)
537 kalloc_large_max
= kalloc_large_total
;
541 KALLOC_ZINFO_SALLOC(size
);
546 if (size
> z
->elem_size
)
547 panic("%s: z %p (%s) but requested size %lu", __func__
,
548 z
, z
->zone_name
, (unsigned long)size
);
550 assert(size
<= z
->elem_size
);
551 return (zalloc_canblock(z
, canblock
));
558 return( kalloc_canblock(size
, TRUE
) );
565 return( kalloc_canblock(size
, FALSE
) );
568 volatile SInt32 kfree_nop_count
= 0;
577 if (size
< MAX_SIZE_ZDLUT
)
578 z
= get_zone_dlut(size
);
579 else if (size
< kalloc_max_prerounded
)
580 z
= get_zone_search(size
, k_zindex_start
);
582 /* if size was too large for a zone, then use kmem_free */
584 vm_map_t alloc_map
= kernel_map
;
586 if ((((vm_offset_t
) data
) >= kalloc_map_min
) && (((vm_offset_t
) data
) <= kalloc_map_max
))
587 alloc_map
= kalloc_map
;
588 if (size
> kalloc_largest_allocated
) {
590 * work around double FREEs of small MALLOCs
591 * this used to end up being a nop
592 * since the pointer being freed from an
593 * alloc backed by the zalloc world could
594 * never show up in the kalloc_map... however,
595 * the kernel_map is a different issue... since it
596 * was released back into the zalloc pool, a pointer
597 * would have gotten written over the 'size' that
598 * the MALLOC was retaining in the first 4 bytes of
599 * the underlying allocation... that pointer ends up
600 * looking like a really big size on the 2nd FREE and
601 * pushes the kfree into the kernel_map... we
602 * end up removing a ton of virtual space before we panic
603 * this check causes us to ignore the kfree for a size
604 * that must be 'bogus'... note that it might not be due
605 * to the above scenario, but it would still be wrong and
606 * cause serious damage.
609 OSAddAtomic(1, &kfree_nop_count
);
612 kmem_free(alloc_map
, (vm_offset_t
)data
, size
);
616 kalloc_large_total
-= size
;
617 kalloc_large_inuse
--;
621 KALLOC_ZINFO_SFREE(size
);
625 /* free to the appropriate zone */
627 if (size
> z
->elem_size
)
628 panic("%s: z %p (%s) but requested size %lu", __func__
,
629 z
, z
->zone_name
, (unsigned long)size
);
631 assert(size
<= z
->elem_size
);
640 if (size
< MAX_SIZE_ZDLUT
)
641 return (get_zone_dlut(size
));
642 if (size
<= kalloc_max
)
643 return (get_zone_search(size
, k_zindex_start
));
649 kalloc_fake_zone_init(int zone_index
)
651 kalloc_fake_zone_index
= zone_index
;
655 kalloc_fake_zone_info(int *count
,
656 vm_size_t
*cur_size
, vm_size_t
*max_size
, vm_size_t
*elem_size
, vm_size_t
*alloc_size
,
657 uint64_t *sum_size
, int *collectable
, int *exhaustable
, int *caller_acct
)
659 *count
= kalloc_large_inuse
;
660 *cur_size
= kalloc_large_total
;
661 *max_size
= kalloc_large_max
;
663 if (kalloc_large_inuse
) {
664 *elem_size
= kalloc_large_total
/ kalloc_large_inuse
;
665 *alloc_size
= kalloc_large_total
/ kalloc_large_inuse
;
670 *sum_size
= kalloc_large_sum
;
681 queue_init(&OSMalloc_tag_list
);
683 OSMalloc_tag_lck_grp
= lck_grp_alloc_init("OSMalloc_tag", LCK_GRP_ATTR_NULL
);
684 lck_mtx_init(&OSMalloc_tag_lock
, OSMalloc_tag_lck_grp
, LCK_ATTR_NULL
);
694 OSMTag
= (OSMallocTag
)kalloc(sizeof(*OSMTag
));
696 bzero((void *)OSMTag
, sizeof(*OSMTag
));
698 if (flags
& OSMT_PAGEABLE
)
699 OSMTag
->OSMT_attr
= OSMT_ATTR_PAGEABLE
;
701 OSMTag
->OSMT_refcnt
= 1;
703 strncpy(OSMTag
->OSMT_name
, str
, OSMT_MAX_NAME
);
705 OSMalloc_tag_spin_lock();
706 enqueue_tail(&OSMalloc_tag_list
, (queue_entry_t
)OSMTag
);
707 OSMalloc_tag_unlock();
708 OSMTag
->OSMT_state
= OSMT_VALID
;
716 if (!((tag
->OSMT_state
& OSMT_VALID_MASK
) == OSMT_VALID
))
717 panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag
->OSMT_name
, tag
->OSMT_state
);
719 (void)hw_atomic_add(&tag
->OSMT_refcnt
, 1);
726 if (!((tag
->OSMT_state
& OSMT_VALID_MASK
) == OSMT_VALID
))
727 panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag
->OSMT_name
, tag
->OSMT_state
);
729 if (hw_atomic_sub(&tag
->OSMT_refcnt
, 1) == 0) {
730 if (hw_compare_and_store(OSMT_VALID
|OSMT_RELEASED
, OSMT_VALID
|OSMT_RELEASED
, &tag
->OSMT_state
)) {
731 OSMalloc_tag_spin_lock();
732 (void)remque((queue_entry_t
)tag
);
733 OSMalloc_tag_unlock();
734 kfree((void*)tag
, sizeof(*tag
));
736 panic("OSMalloc_Tagrele():'%s' has refcnt 0\n", tag
->OSMT_name
);
744 if (!hw_compare_and_store(OSMT_VALID
, OSMT_VALID
|OSMT_RELEASED
, &tag
->OSMT_state
))
745 panic("OSMalloc_Tagfree():'%s' has bad state 0x%08X \n", tag
->OSMT_name
, tag
->OSMT_state
);
747 if (hw_atomic_sub(&tag
->OSMT_refcnt
, 1) == 0) {
748 OSMalloc_tag_spin_lock();
749 (void)remque((queue_entry_t
)tag
);
750 OSMalloc_tag_unlock();
751 kfree((void*)tag
, sizeof(*tag
));
763 OSMalloc_Tagref(tag
);
764 if ((tag
->OSMT_attr
& OSMT_PAGEABLE
)
765 && (size
& ~PAGE_MASK
)) {
767 if ((kr
= kmem_alloc_pageable(kernel_map
, (vm_offset_t
*)&addr
, size
)) != KERN_SUCCESS
)
770 addr
= kalloc((vm_size_t
)size
);
773 OSMalloc_Tagrele(tag
);
785 if (tag
->OSMT_attr
& OSMT_PAGEABLE
)
788 OSMalloc_Tagref(tag
);
789 /* XXX: use non-blocking kalloc for now */
790 addr
= kalloc_noblock((vm_size_t
)size
);
792 OSMalloc_Tagrele(tag
);
804 if (tag
->OSMT_attr
& OSMT_PAGEABLE
)
807 OSMalloc_Tagref(tag
);
808 addr
= kalloc_noblock((vm_size_t
)size
);
810 OSMalloc_Tagrele(tag
);
821 if ((tag
->OSMT_attr
& OSMT_PAGEABLE
)
822 && (size
& ~PAGE_MASK
)) {
823 kmem_free(kernel_map
, (vm_offset_t
)addr
, size
);
825 kfree((void *)addr
, size
);
827 OSMalloc_Tagrele(tag
);