2 * Copyright (c) 2000-2011 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr.
63 * General kernel memory allocator. This allocator is designed
64 * to be used by the kernel to manage dynamic memory fast.
67 #include <zone_debug.h>
69 #include <mach/boolean.h>
70 #include <mach/machine/vm_types.h>
71 #include <mach/vm_param.h>
72 #include <kern/misc_protos.h>
73 #include <kern/zalloc.h>
74 #include <kern/kalloc.h>
75 #include <kern/lock.h>
76 #include <kern/ledger.h>
77 #include <vm/vm_kern.h>
78 #include <vm/vm_object.h>
79 #include <vm/vm_map.h>
80 #include <libkern/OSMalloc.h>
83 zone_t
kalloc_zone(vm_size_t
);
86 #define KALLOC_MAP_SIZE_MIN (16 * 1024 * 1024)
87 #define KALLOC_MAP_SIZE_MAX (128 * 1024 * 1024)
90 vm_size_t kalloc_max_prerounded
;
91 vm_size_t kalloc_kernmap_size
; /* size of kallocs that can come from kernel map */
93 unsigned int kalloc_large_inuse
;
94 vm_size_t kalloc_large_total
;
95 vm_size_t kalloc_large_max
;
96 vm_size_t kalloc_largest_allocated
= 0;
97 uint64_t kalloc_large_sum
;
99 int kalloc_fake_zone_index
= -1; /* index of our fake zone in statistics arrays */
101 vm_offset_t kalloc_map_min
;
102 vm_offset_t kalloc_map_max
;
106 * Diagnostic code to track mutexes separately rather than via the 2^ zones
112 KALLOC_ZINFO_SALLOC(vm_size_t bytes
)
114 thread_t thr
= current_thread();
118 ledger_debit(thr
->t_ledger
, task_ledgers
.tkm_shared
, bytes
);
120 if (kalloc_fake_zone_index
!= -1 &&
121 (task
= thr
->task
) != NULL
&& (zinfo
= task
->tkm_zinfo
) != NULL
)
122 zinfo
[kalloc_fake_zone_index
].alloc
+= bytes
;
126 KALLOC_ZINFO_SFREE(vm_size_t bytes
)
128 thread_t thr
= current_thread();
132 ledger_credit(thr
->t_ledger
, task_ledgers
.tkm_shared
, bytes
);
134 if (kalloc_fake_zone_index
!= -1 &&
135 (task
= thr
->task
) != NULL
&& (zinfo
= task
->tkm_zinfo
) != NULL
)
136 zinfo
[kalloc_fake_zone_index
].free
+= bytes
;
140 * All allocations of size less than kalloc_max are rounded to the
141 * next nearest sized zone. This allocator is built on top of
142 * the zone allocator. A zone is created for each potential size
143 * that we are willing to get in small blocks.
145 * We assume that kalloc_max is not greater than 64K;
147 * Note that kalloc_max is somewhat confusingly named.
148 * It represents the first power of two for which no zone exists.
149 * kalloc_max_prerounded is the smallest allocation size, before
150 * rounding, for which no zone exists.
152 * Also if the allocation size is more than kalloc_kernmap_size
153 * then allocate from kernel map rather than kalloc_map.
156 #if KALLOC_MINSIZE == 16 && KALLOC_LOG2_MINALIGN == 4
159 * "Legacy" aka "power-of-2" backing zones with 16-byte minimum
160 * size and alignment. Users of this profile would probably
161 * benefit from some tuning.
164 #define K_ZONE_SIZES \
176 #define K_ZONE_NAMES \
179 /* 6 */ "kalloc.64", \
182 /* 9 */ "kalloc.512", \
185 /* C */ "kalloc.4096"
187 #define K_ZONE_MAXIMA \
198 #elif KALLOC_MINSIZE == 8 && KALLOC_LOG2_MINALIGN == 3
201 * Tweaked for ARM (and x64) in 04/2011
204 #define K_ZONE_SIZES \
208 /* 6 */ 64, 88, 112, \
216 #define K_ZONE_NAMES \
217 /* 3 */ "kalloc.8", \
218 "kalloc.16", "kalloc.24", \
219 "kalloc.32", "kalloc.40", "kalloc.48", \
220 /* 6 */ "kalloc.64", "kalloc.88", "kalloc.112", \
221 "kalloc.128", "kalloc.192", \
222 "kalloc.256", "kalloc.384", \
223 /* 9 */ "kalloc.512", "kalloc.768", \
224 "kalloc.1024", "kalloc.1536", \
225 "kalloc.2048", "kalloc.3072", \
226 "kalloc.4096", "kalloc.6144"
228 #define K_ZONE_MAXIMA \
232 /* 6 */ 4096, 4096, 4096, \
235 /* 9 */ 1024, 1024, \
241 #error missing zone size parameters for kalloc
244 #define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN)
246 static const int k_zone_size
[] = {
253 #define N_K_ZONE (sizeof (k_zone_size) / sizeof (k_zone_size[0]))
256 * Many kalloc() allocations are for small structures containing a few
257 * pointers and longs - the k_zone_dlut[] direct lookup table, indexed by
258 * size normalized to the minimum alignment, finds the right zone index
259 * for them in one dereference.
262 #define INDEX_ZDLUT(size) \
263 (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN)
264 #define N_K_ZDLUT (2048 / KALLOC_MINALIGN)
265 /* covers sizes [0 .. 2048 - KALLOC_MINALIGN] */
266 #define MAX_SIZE_ZDLUT ((N_K_ZDLUT - 1) * KALLOC_MINALIGN)
268 static int8_t k_zone_dlut
[N_K_ZDLUT
]; /* table of indices into k_zone[] */
271 * If there's no hit in the DLUT, then start searching from k_zindex_start.
273 static int k_zindex_start
;
275 static zone_t k_zone
[N_K_ZONE
];
277 static const char *k_zone_name
[N_K_ZONE
] = {
281 /* F */ "kalloc.32768"
285 * Max number of elements per zone. zinit rounds things up correctly
286 * Doing things this way permits each zone to have a different maximum size
287 * based on need, rather than just guessing; it also
288 * means its patchable in case you're wrong!
290 unsigned int k_zone_max
[N_K_ZONE
] = {
297 /* #define KALLOC_DEBUG 1 */
299 /* forward declarations */
300 void * kalloc_canblock(
305 lck_grp_t
*kalloc_lck_grp
;
306 lck_mtx_t kalloc_lock
;
308 #define kalloc_spin_lock() lck_mtx_lock_spin(&kalloc_lock)
309 #define kalloc_unlock() lck_mtx_unlock(&kalloc_lock)
312 /* OSMalloc local data declarations */
314 queue_head_t OSMalloc_tag_list
;
316 lck_grp_t
*OSMalloc_tag_lck_grp
;
317 lck_mtx_t OSMalloc_tag_lock
;
319 #define OSMalloc_tag_spin_lock() lck_mtx_lock_spin(&OSMalloc_tag_lock)
320 #define OSMalloc_tag_unlock() lck_mtx_unlock(&OSMalloc_tag_lock)
323 /* OSMalloc forward declarations */
324 void OSMalloc_init(void);
325 void OSMalloc_Tagref(OSMallocTag tag
);
326 void OSMalloc_Tagrele(OSMallocTag tag
);
329 * Initialize the memory allocator. This should be called only
330 * once on a system wide basis (i.e. first processor to get here
331 * does the initialization).
333 * This initializes all of the zones.
340 kern_return_t retval
;
342 vm_size_t size
, kalloc_map_size
;
346 * Scale the kalloc_map_size to physical memory size: stay below
347 * 1/8th the total zone map size, or 128 MB (for a 32-bit kernel).
349 kalloc_map_size
= (vm_size_t
)(sane_size
>> 5);
351 if (kalloc_map_size
> KALLOC_MAP_SIZE_MAX
)
352 kalloc_map_size
= KALLOC_MAP_SIZE_MAX
;
353 #endif /* !__LP64__ */
354 if (kalloc_map_size
< KALLOC_MAP_SIZE_MIN
)
355 kalloc_map_size
= KALLOC_MAP_SIZE_MIN
;
357 retval
= kmem_suballoc(kernel_map
, &min
, kalloc_map_size
,
358 FALSE
, VM_FLAGS_ANYWHERE
| VM_FLAGS_PERMANENT
,
361 if (retval
!= KERN_SUCCESS
)
362 panic("kalloc_init: kmem_suballoc failed");
364 kalloc_map_min
= min
;
365 kalloc_map_max
= min
+ kalloc_map_size
- 1;
368 * Ensure that zones up to size 8192 bytes exist.
369 * This is desirable because messages are allocated
370 * with kalloc, and messages up through size 8192 are common.
373 if (PAGE_SIZE
< 16*1024)
374 kalloc_max
= 16*1024;
376 kalloc_max
= PAGE_SIZE
;
377 kalloc_max_prerounded
= kalloc_max
/ 2 + 1;
378 /* size it to be more than 16 times kalloc_max (256k) for allocations from kernel map */
379 kalloc_kernmap_size
= (kalloc_max
* 16) + 1;
380 kalloc_largest_allocated
= kalloc_kernmap_size
;
383 * Allocate a zone for each size we are going to handle.
384 * We specify non-paged memory. Don't charge the caller
385 * for the allocation, as we aren't sure how the memory
388 for (i
= 0; (size
= k_zone_size
[i
]) < kalloc_max
; i
++) {
389 k_zone
[i
] = zinit(size
, k_zone_max
[i
] * size
, size
,
391 zone_change(k_zone
[i
], Z_CALLERACCT
, FALSE
);
395 * Build the Direct LookUp Table for small allocations
397 for (i
= 0, size
= 0; i
<= N_K_ZDLUT
; i
++, size
+= KALLOC_MINALIGN
) {
400 while ((vm_size_t
)k_zone_size
[zindex
] < size
)
403 if (i
== N_K_ZDLUT
) {
404 k_zindex_start
= zindex
;
407 k_zone_dlut
[i
] = (int8_t)zindex
;
411 printf("kalloc_init: k_zindex_start %d\n", k_zindex_start
);
414 * Do a quick synthesis to see how well/badly we can
415 * find-a-zone for a given size.
416 * Useful when debugging/tweaking the array of zone sizes.
417 * Cache misses probably more critical than compare-branches!
419 for (i
= 0; i
< (int)N_K_ZONE
; i
++) {
420 vm_size_t testsize
= (vm_size_t
)k_zone_size
[i
] - 1;
424 if (testsize
< MAX_SIZE_ZDLUT
) {
425 compare
+= 1; /* 'if' (T) */
427 long dindex
= INDEX_ZDLUT(testsize
);
428 zindex
= (int)k_zone_dlut
[dindex
];
430 } else if (testsize
< kalloc_max_prerounded
) {
432 compare
+= 2; /* 'if' (F), 'if' (T) */
434 zindex
= k_zindex_start
;
435 while ((vm_size_t
)k_zone_size
[zindex
] < testsize
) {
437 compare
++; /* 'while' (T) */
439 compare
++; /* 'while' (F) */
441 break; /* not zone-backed */
443 zone_t z
= k_zone
[zindex
];
444 printf("kalloc_init: req size %4lu: %11s took %d compare%s\n",
445 (unsigned long)testsize
, z
->zone_name
, compare
,
446 compare
== 1 ? "" : "s");
449 kalloc_lck_grp
= lck_grp_alloc_init("kalloc.large", LCK_GRP_ATTR_NULL
);
450 lck_mtx_init(&kalloc_lock
, kalloc_lck_grp
, LCK_ATTR_NULL
);
453 lck_mtx_zone
= zinit(sizeof(struct _lck_mtx_
), 1024*256, 4096, "lck_mtx");
458 * Given an allocation size, return the kalloc zone it belongs to.
459 * Direct LookUp Table variant.
461 static __inline zone_t
462 get_zone_dlut(vm_size_t size
)
464 long dindex
= INDEX_ZDLUT(size
);
465 int zindex
= (int)k_zone_dlut
[dindex
];
466 return (k_zone
[zindex
]);
469 /* As above, but linear search k_zone_size[] for the next zone that fits. */
471 static __inline zone_t
472 get_zone_search(vm_size_t size
, int zindex
)
474 assert(size
< kalloc_max_prerounded
);
476 while ((vm_size_t
)k_zone_size
[zindex
] < size
)
479 assert((unsigned)zindex
< N_K_ZONE
&&
480 (vm_size_t
)k_zone_size
[zindex
] < kalloc_max
);
482 return (k_zone
[zindex
]);
492 if (size
< MAX_SIZE_ZDLUT
)
493 z
= get_zone_dlut(size
);
494 else if (size
< kalloc_max_prerounded
)
495 z
= get_zone_search(size
, k_zindex_start
);
498 * If size is too large for a zone, then use kmem_alloc.
499 * (We use kmem_alloc instead of kmem_alloc_kobject so that
500 * krealloc can use kmem_realloc.)
505 /* kmem_alloc could block so we return if noblock */
510 if (size
>= kalloc_kernmap_size
)
511 alloc_map
= kernel_map
;
513 alloc_map
= kalloc_map
;
515 if (kmem_alloc(alloc_map
, (vm_offset_t
*)&addr
, size
) != KERN_SUCCESS
) {
516 if (alloc_map
!= kernel_map
) {
517 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&addr
, size
) != KERN_SUCCESS
)
527 * Thread-safe version of the workaround for 4740071
530 if (size
> kalloc_largest_allocated
)
531 kalloc_largest_allocated
= size
;
533 kalloc_large_inuse
++;
534 kalloc_large_total
+= size
;
535 kalloc_large_sum
+= size
;
537 if (kalloc_large_total
> kalloc_large_max
)
538 kalloc_large_max
= kalloc_large_total
;
542 KALLOC_ZINFO_SALLOC(size
);
547 if (size
> z
->elem_size
)
548 panic("%s: z %p (%s) but requested size %lu", __func__
,
549 z
, z
->zone_name
, (unsigned long)size
);
551 assert(size
<= z
->elem_size
);
552 return (zalloc_canblock(z
, canblock
));
559 return( kalloc_canblock(size
, TRUE
) );
566 return( kalloc_canblock(size
, FALSE
) );
569 volatile SInt32 kfree_nop_count
= 0;
578 if (size
< MAX_SIZE_ZDLUT
)
579 z
= get_zone_dlut(size
);
580 else if (size
< kalloc_max_prerounded
)
581 z
= get_zone_search(size
, k_zindex_start
);
583 /* if size was too large for a zone, then use kmem_free */
585 vm_map_t alloc_map
= kernel_map
;
587 if ((((vm_offset_t
) data
) >= kalloc_map_min
) && (((vm_offset_t
) data
) <= kalloc_map_max
))
588 alloc_map
= kalloc_map
;
589 if (size
> kalloc_largest_allocated
) {
591 * work around double FREEs of small MALLOCs
592 * this used to end up being a nop
593 * since the pointer being freed from an
594 * alloc backed by the zalloc world could
595 * never show up in the kalloc_map... however,
596 * the kernel_map is a different issue... since it
597 * was released back into the zalloc pool, a pointer
598 * would have gotten written over the 'size' that
599 * the MALLOC was retaining in the first 4 bytes of
600 * the underlying allocation... that pointer ends up
601 * looking like a really big size on the 2nd FREE and
602 * pushes the kfree into the kernel_map... we
603 * end up removing a ton of virtual space before we panic
604 * this check causes us to ignore the kfree for a size
605 * that must be 'bogus'... note that it might not be due
606 * to the above scenario, but it would still be wrong and
607 * cause serious damage.
610 OSAddAtomic(1, &kfree_nop_count
);
613 kmem_free(alloc_map
, (vm_offset_t
)data
, size
);
617 kalloc_large_total
-= size
;
618 kalloc_large_inuse
--;
622 KALLOC_ZINFO_SFREE(size
);
626 /* free to the appropriate zone */
628 if (size
> z
->elem_size
)
629 panic("%s: z %p (%s) but requested size %lu", __func__
,
630 z
, z
->zone_name
, (unsigned long)size
);
632 assert(size
<= z
->elem_size
);
641 if (size
< MAX_SIZE_ZDLUT
)
642 return (get_zone_dlut(size
));
643 if (size
<= kalloc_max
)
644 return (get_zone_search(size
, k_zindex_start
));
650 kalloc_fake_zone_init(int zone_index
)
652 kalloc_fake_zone_index
= zone_index
;
656 kalloc_fake_zone_info(int *count
,
657 vm_size_t
*cur_size
, vm_size_t
*max_size
, vm_size_t
*elem_size
, vm_size_t
*alloc_size
,
658 uint64_t *sum_size
, int *collectable
, int *exhaustable
, int *caller_acct
)
660 *count
= kalloc_large_inuse
;
661 *cur_size
= kalloc_large_total
;
662 *max_size
= kalloc_large_max
;
664 if (kalloc_large_inuse
) {
665 *elem_size
= kalloc_large_total
/ kalloc_large_inuse
;
666 *alloc_size
= kalloc_large_total
/ kalloc_large_inuse
;
671 *sum_size
= kalloc_large_sum
;
682 queue_init(&OSMalloc_tag_list
);
684 OSMalloc_tag_lck_grp
= lck_grp_alloc_init("OSMalloc_tag", LCK_GRP_ATTR_NULL
);
685 lck_mtx_init(&OSMalloc_tag_lock
, OSMalloc_tag_lck_grp
, LCK_ATTR_NULL
);
695 OSMTag
= (OSMallocTag
)kalloc(sizeof(*OSMTag
));
697 bzero((void *)OSMTag
, sizeof(*OSMTag
));
699 if (flags
& OSMT_PAGEABLE
)
700 OSMTag
->OSMT_attr
= OSMT_ATTR_PAGEABLE
;
702 OSMTag
->OSMT_refcnt
= 1;
704 strncpy(OSMTag
->OSMT_name
, str
, OSMT_MAX_NAME
);
706 OSMalloc_tag_spin_lock();
707 enqueue_tail(&OSMalloc_tag_list
, (queue_entry_t
)OSMTag
);
708 OSMalloc_tag_unlock();
709 OSMTag
->OSMT_state
= OSMT_VALID
;
717 if (!((tag
->OSMT_state
& OSMT_VALID_MASK
) == OSMT_VALID
))
718 panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag
->OSMT_name
, tag
->OSMT_state
);
720 (void)hw_atomic_add(&tag
->OSMT_refcnt
, 1);
727 if (!((tag
->OSMT_state
& OSMT_VALID_MASK
) == OSMT_VALID
))
728 panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag
->OSMT_name
, tag
->OSMT_state
);
730 if (hw_atomic_sub(&tag
->OSMT_refcnt
, 1) == 0) {
731 if (hw_compare_and_store(OSMT_VALID
|OSMT_RELEASED
, OSMT_VALID
|OSMT_RELEASED
, &tag
->OSMT_state
)) {
732 OSMalloc_tag_spin_lock();
733 (void)remque((queue_entry_t
)tag
);
734 OSMalloc_tag_unlock();
735 kfree((void*)tag
, sizeof(*tag
));
737 panic("OSMalloc_Tagrele():'%s' has refcnt 0\n", tag
->OSMT_name
);
745 if (!hw_compare_and_store(OSMT_VALID
, OSMT_VALID
|OSMT_RELEASED
, &tag
->OSMT_state
))
746 panic("OSMalloc_Tagfree():'%s' has bad state 0x%08X \n", tag
->OSMT_name
, tag
->OSMT_state
);
748 if (hw_atomic_sub(&tag
->OSMT_refcnt
, 1) == 0) {
749 OSMalloc_tag_spin_lock();
750 (void)remque((queue_entry_t
)tag
);
751 OSMalloc_tag_unlock();
752 kfree((void*)tag
, sizeof(*tag
));
764 OSMalloc_Tagref(tag
);
765 if ((tag
->OSMT_attr
& OSMT_PAGEABLE
)
766 && (size
& ~PAGE_MASK
)) {
768 if ((kr
= kmem_alloc_pageable(kernel_map
, (vm_offset_t
*)&addr
, size
)) != KERN_SUCCESS
)
771 addr
= kalloc((vm_size_t
)size
);
774 OSMalloc_Tagrele(tag
);
786 if (tag
->OSMT_attr
& OSMT_PAGEABLE
)
789 OSMalloc_Tagref(tag
);
790 /* XXX: use non-blocking kalloc for now */
791 addr
= kalloc_noblock((vm_size_t
)size
);
793 OSMalloc_Tagrele(tag
);
805 if (tag
->OSMT_attr
& OSMT_PAGEABLE
)
808 OSMalloc_Tagref(tag
);
809 addr
= kalloc_noblock((vm_size_t
)size
);
811 OSMalloc_Tagrele(tag
);
822 if ((tag
->OSMT_attr
& OSMT_PAGEABLE
)
823 && (size
& ~PAGE_MASK
)) {
824 kmem_free(kernel_map
, (vm_offset_t
)addr
, size
);
826 kfree((void *)addr
, size
);
828 OSMalloc_Tagrele(tag
);