2 * Copyright (c) 2000-2011 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr.
63 * General kernel memory allocator. This allocator is designed
64 * to be used by the kernel to manage dynamic memory fast.
67 #include <zone_debug.h>
69 #include <mach/boolean.h>
70 #include <mach/machine/vm_types.h>
71 #include <mach/vm_param.h>
72 #include <kern/misc_protos.h>
73 #include <kern/zalloc.h>
74 #include <kern/kalloc.h>
75 #include <kern/ledger.h>
76 #include <vm/vm_kern.h>
77 #include <vm/vm_object.h>
78 #include <vm/vm_map.h>
79 #include <libkern/OSMalloc.h>
82 zone_t
kalloc_zone(vm_size_t
);
85 #define KALLOC_MAP_SIZE_MIN (16 * 1024 * 1024)
86 #define KALLOC_MAP_SIZE_MAX (128 * 1024 * 1024)
89 vm_size_t kalloc_max_prerounded
;
90 vm_size_t kalloc_kernmap_size
; /* size of kallocs that can come from kernel map */
92 /* how many times we couldn't allocate out of kalloc_map and fell back to kernel_map */
93 unsigned long kalloc_fallback_count
;
95 unsigned int kalloc_large_inuse
;
96 vm_size_t kalloc_large_total
;
97 vm_size_t kalloc_large_max
;
98 vm_size_t kalloc_largest_allocated
= 0;
99 uint64_t kalloc_large_sum
;
101 int kalloc_fake_zone_index
= -1; /* index of our fake zone in statistics arrays */
103 vm_offset_t kalloc_map_min
;
104 vm_offset_t kalloc_map_max
;
108 * Diagnostic code to track mutexes separately rather than via the 2^ zones
114 KALLOC_ZINFO_SALLOC(vm_size_t bytes
)
116 thread_t thr
= current_thread();
120 ledger_debit(thr
->t_ledger
, task_ledgers
.tkm_shared
, bytes
);
122 if (kalloc_fake_zone_index
!= -1 &&
123 (task
= thr
->task
) != NULL
&& (zinfo
= task
->tkm_zinfo
) != NULL
)
124 zinfo
[kalloc_fake_zone_index
].alloc
+= bytes
;
128 KALLOC_ZINFO_SFREE(vm_size_t bytes
)
130 thread_t thr
= current_thread();
134 ledger_credit(thr
->t_ledger
, task_ledgers
.tkm_shared
, bytes
);
136 if (kalloc_fake_zone_index
!= -1 &&
137 (task
= thr
->task
) != NULL
&& (zinfo
= task
->tkm_zinfo
) != NULL
)
138 zinfo
[kalloc_fake_zone_index
].free
+= bytes
;
142 * All allocations of size less than kalloc_max are rounded to the
143 * next nearest sized zone. This allocator is built on top of
144 * the zone allocator. A zone is created for each potential size
145 * that we are willing to get in small blocks.
147 * We assume that kalloc_max is not greater than 64K;
149 * Note that kalloc_max is somewhat confusingly named.
150 * It represents the first power of two for which no zone exists.
151 * kalloc_max_prerounded is the smallest allocation size, before
152 * rounding, for which no zone exists.
154 * Also if the allocation size is more than kalloc_kernmap_size
155 * then allocate from kernel map rather than kalloc_map.
158 #if KALLOC_MINSIZE == 16 && KALLOC_LOG2_MINALIGN == 4
160 #define K_ZONE_SIZES \
177 #define K_ZONE_NAMES \
181 /* 3 */ "kalloc.64", \
184 /* 6 */ "kalloc.128", \
187 /* 9 */ "kalloc.288", \
190 /* C */ "kalloc.1280", \
194 #elif KALLOC_MINSIZE == 8 && KALLOC_LOG2_MINALIGN == 3
197 * Tweaked for ARM (and x64) in 04/2011
200 #define K_ZONE_SIZES \
204 /* 6 */ 64, 72, 88, 112, \
206 256, 288, 384, 440, \
212 #define K_ZONE_NAMES \
213 /* 3 */ "kalloc.8", \
214 "kalloc.16", "kalloc.24", \
215 "kalloc.32", "kalloc.40", "kalloc.48", \
216 /* 6 */ "kalloc.64", "kalloc.72", "kalloc.88", "kalloc.112", \
217 "kalloc.128", "kalloc.192", \
218 "kalloc.256", "kalloc.288", "kalloc.384", "kalloc.440", \
219 /* 9 */ "kalloc.512", "kalloc.768", \
220 "kalloc.1024", "kalloc.1152", "kalloc.1536", \
221 "kalloc.2048", "kalloc.3072", \
222 "kalloc.4096", "kalloc.6144"
225 #error missing zone size parameters for kalloc
228 #define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN)
229 #define KiB(x) (1024 * (x))
231 static const int k_zone_size
[] = {
238 #define MAX_K_ZONE (sizeof (k_zone_size) / sizeof (k_zone_size[0]))
240 static const char *k_zone_name
[MAX_K_ZONE
] = {
249 * Many kalloc() allocations are for small structures containing a few
250 * pointers and longs - the k_zone_dlut[] direct lookup table, indexed by
251 * size normalized to the minimum alignment, finds the right zone index
252 * for them in one dereference.
255 #define INDEX_ZDLUT(size) \
256 (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN)
257 #define N_K_ZDLUT (2048 / KALLOC_MINALIGN)
258 /* covers sizes [0 .. 2048 - KALLOC_MINALIGN] */
259 #define MAX_SIZE_ZDLUT ((N_K_ZDLUT - 1) * KALLOC_MINALIGN)
261 static int8_t k_zone_dlut
[N_K_ZDLUT
]; /* table of indices into k_zone[] */
264 * If there's no hit in the DLUT, then start searching from k_zindex_start.
266 static int k_zindex_start
;
268 static zone_t k_zone
[MAX_K_ZONE
];
270 /* #define KALLOC_DEBUG 1 */
272 /* forward declarations */
274 lck_grp_t kalloc_lck_grp
;
275 lck_mtx_t kalloc_lock
;
277 #define kalloc_spin_lock() lck_mtx_lock_spin(&kalloc_lock)
278 #define kalloc_unlock() lck_mtx_unlock(&kalloc_lock)
281 /* OSMalloc local data declarations */
283 queue_head_t OSMalloc_tag_list
;
285 lck_grp_t
*OSMalloc_tag_lck_grp
;
286 lck_mtx_t OSMalloc_tag_lock
;
288 #define OSMalloc_tag_spin_lock() lck_mtx_lock_spin(&OSMalloc_tag_lock)
289 #define OSMalloc_tag_unlock() lck_mtx_unlock(&OSMalloc_tag_lock)
292 /* OSMalloc forward declarations */
293 void OSMalloc_init(void);
294 void OSMalloc_Tagref(OSMallocTag tag
);
295 void OSMalloc_Tagrele(OSMallocTag tag
);
298 * Initialize the memory allocator. This should be called only
299 * once on a system wide basis (i.e. first processor to get here
300 * does the initialization).
302 * This initializes all of the zones.
309 kern_return_t retval
;
311 vm_size_t size
, kalloc_map_size
;
315 * Scale the kalloc_map_size to physical memory size: stay below
316 * 1/8th the total zone map size, or 128 MB (for a 32-bit kernel).
318 kalloc_map_size
= (vm_size_t
)(sane_size
>> 5);
320 if (kalloc_map_size
> KALLOC_MAP_SIZE_MAX
)
321 kalloc_map_size
= KALLOC_MAP_SIZE_MAX
;
322 #endif /* !__LP64__ */
323 if (kalloc_map_size
< KALLOC_MAP_SIZE_MIN
)
324 kalloc_map_size
= KALLOC_MAP_SIZE_MIN
;
326 retval
= kmem_suballoc(kernel_map
, &min
, kalloc_map_size
,
327 FALSE
, VM_FLAGS_ANYWHERE
| VM_FLAGS_PERMANENT
| VM_MAKE_TAG(0),
330 if (retval
!= KERN_SUCCESS
)
331 panic("kalloc_init: kmem_suballoc failed");
333 kalloc_map_min
= min
;
334 kalloc_map_max
= min
+ kalloc_map_size
- 1;
337 * Create zones up to a least 2 pages because small page-multiples are common
338 * allocations. Also ensure that zones up to size 8192 bytes exist. This is
339 * desirable because messages are allocated with kalloc(), and messages up
340 * through size 8192 are common.
342 kalloc_max
= PAGE_SIZE
<< 2;
343 if (kalloc_max
< KiB(16)) {
344 kalloc_max
= KiB(16);
346 assert(kalloc_max
<= KiB(64)); /* assumption made in size arrays */
348 kalloc_max_prerounded
= kalloc_max
/ 2 + 1;
349 /* allocations larger than 16 times kalloc_max go directly to kernel map */
350 kalloc_kernmap_size
= (kalloc_max
* 16) + 1;
351 kalloc_largest_allocated
= kalloc_kernmap_size
;
354 * Allocate a zone for each size we are going to handle. Don't charge the
355 * caller for the allocation, as we aren't sure how the memory will be
358 for (i
= 0; i
< (int)MAX_K_ZONE
&& (size
= k_zone_size
[i
]) < kalloc_max
; i
++) {
359 k_zone
[i
] = zinit(size
, size
, size
, k_zone_name
[i
]);
360 zone_change(k_zone
[i
], Z_CALLERACCT
, FALSE
);
364 * Build the Direct LookUp Table for small allocations
366 for (i
= 0, size
= 0; i
<= N_K_ZDLUT
; i
++, size
+= KALLOC_MINALIGN
) {
369 while ((vm_size_t
)k_zone_size
[zindex
] < size
)
372 if (i
== N_K_ZDLUT
) {
373 k_zindex_start
= zindex
;
376 k_zone_dlut
[i
] = (int8_t)zindex
;
380 printf("kalloc_init: k_zindex_start %d\n", k_zindex_start
);
383 * Do a quick synthesis to see how well/badly we can
384 * find-a-zone for a given size.
385 * Useful when debugging/tweaking the array of zone sizes.
386 * Cache misses probably more critical than compare-branches!
388 for (i
= 0; i
< (int)MAX_K_ZONE
; i
++) {
389 vm_size_t testsize
= (vm_size_t
)k_zone_size
[i
] - 1;
393 if (testsize
< MAX_SIZE_ZDLUT
) {
394 compare
+= 1; /* 'if' (T) */
396 long dindex
= INDEX_ZDLUT(testsize
);
397 zindex
= (int)k_zone_dlut
[dindex
];
399 } else if (testsize
< kalloc_max_prerounded
) {
401 compare
+= 2; /* 'if' (F), 'if' (T) */
403 zindex
= k_zindex_start
;
404 while ((vm_size_t
)k_zone_size
[zindex
] < testsize
) {
406 compare
++; /* 'while' (T) */
408 compare
++; /* 'while' (F) */
410 break; /* not zone-backed */
412 zone_t z
= k_zone
[zindex
];
413 printf("kalloc_init: req size %4lu: %11s took %d compare%s\n",
414 (unsigned long)testsize
, z
->zone_name
, compare
,
415 compare
== 1 ? "" : "s");
419 lck_grp_init(&kalloc_lck_grp
, "kalloc.large", LCK_GRP_ATTR_NULL
);
420 lck_mtx_init(&kalloc_lock
, &kalloc_lck_grp
, LCK_ATTR_NULL
);
423 lck_mtx_zone
= zinit(sizeof(struct _lck_mtx_
), 1024*256, 4096, "lck_mtx");
428 * Given an allocation size, return the kalloc zone it belongs to.
429 * Direct LookUp Table variant.
431 static __inline zone_t
432 get_zone_dlut(vm_size_t size
)
434 long dindex
= INDEX_ZDLUT(size
);
435 int zindex
= (int)k_zone_dlut
[dindex
];
436 return (k_zone
[zindex
]);
439 /* As above, but linear search k_zone_size[] for the next zone that fits. */
441 static __inline zone_t
442 get_zone_search(vm_size_t size
, int zindex
)
444 assert(size
< kalloc_max_prerounded
);
446 while ((vm_size_t
)k_zone_size
[zindex
] < size
)
449 assert((unsigned)zindex
< MAX_K_ZONE
&&
450 (vm_size_t
)k_zone_size
[zindex
] < kalloc_max
);
452 return (k_zone
[zindex
]);
459 vm_allocation_site_t
* site
)
463 if (size
< MAX_SIZE_ZDLUT
)
464 z
= get_zone_dlut(size
);
465 else if (size
< kalloc_max_prerounded
)
466 z
= get_zone_search(size
, k_zindex_start
);
469 * If size is too large for a zone, then use kmem_alloc.
470 * (We use kmem_alloc instead of kmem_alloc_kobject so that
471 * krealloc can use kmem_realloc.)
476 /* kmem_alloc could block so we return if noblock */
481 if (size
>= kalloc_kernmap_size
)
482 alloc_map
= kernel_map
;
484 alloc_map
= kalloc_map
;
487 tag
= (site
? tag
= vm_tag_alloc(site
) : VM_KERN_MEMORY_KALLOC
);
489 if (kmem_alloc(alloc_map
, (vm_offset_t
*)&addr
, size
, tag
) != KERN_SUCCESS
) {
490 if (alloc_map
!= kernel_map
) {
491 if (kalloc_fallback_count
++ == 0) {
492 printf("%s: falling back to kernel_map\n", __func__
);
494 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&addr
, size
, tag
) != KERN_SUCCESS
)
504 * Thread-safe version of the workaround for 4740071
507 if (size
> kalloc_largest_allocated
)
508 kalloc_largest_allocated
= size
;
510 kalloc_large_inuse
++;
511 kalloc_large_total
+= size
;
512 kalloc_large_sum
+= size
;
514 if (kalloc_large_total
> kalloc_large_max
)
515 kalloc_large_max
= kalloc_large_total
;
519 KALLOC_ZINFO_SALLOC(size
);
524 if (size
> z
->elem_size
)
525 panic("%s: z %p (%s) but requested size %lu", __func__
,
526 z
, z
->zone_name
, (unsigned long)size
);
528 assert(size
<= z
->elem_size
);
529 return zalloc_canblock(z
, canblock
);
539 return( kalloc_tag_bt(size
, VM_KERN_MEMORY_KALLOC
) );
542 volatile SInt32 kfree_nop_count
= 0;
551 if (size
< MAX_SIZE_ZDLUT
)
552 z
= get_zone_dlut(size
);
553 else if (size
< kalloc_max_prerounded
)
554 z
= get_zone_search(size
, k_zindex_start
);
556 /* if size was too large for a zone, then use kmem_free */
558 vm_map_t alloc_map
= kernel_map
;
560 if ((((vm_offset_t
) data
) >= kalloc_map_min
) && (((vm_offset_t
) data
) <= kalloc_map_max
))
561 alloc_map
= kalloc_map
;
562 if (size
> kalloc_largest_allocated
) {
564 * work around double FREEs of small MALLOCs
565 * this used to end up being a nop
566 * since the pointer being freed from an
567 * alloc backed by the zalloc world could
568 * never show up in the kalloc_map... however,
569 * the kernel_map is a different issue... since it
570 * was released back into the zalloc pool, a pointer
571 * would have gotten written over the 'size' that
572 * the MALLOC was retaining in the first 4 bytes of
573 * the underlying allocation... that pointer ends up
574 * looking like a really big size on the 2nd FREE and
575 * pushes the kfree into the kernel_map... we
576 * end up removing a ton of virtual space before we panic
577 * this check causes us to ignore the kfree for a size
578 * that must be 'bogus'... note that it might not be due
579 * to the above scenario, but it would still be wrong and
580 * cause serious damage.
583 OSAddAtomic(1, &kfree_nop_count
);
586 kmem_free(alloc_map
, (vm_offset_t
)data
, size
);
590 kalloc_large_total
-= size
;
591 kalloc_large_inuse
--;
595 KALLOC_ZINFO_SFREE(size
);
599 /* free to the appropriate zone */
601 if (size
> z
->elem_size
)
602 panic("%s: z %p (%s) but requested size %lu", __func__
,
603 z
, z
->zone_name
, (unsigned long)size
);
605 assert(size
<= z
->elem_size
);
614 if (size
< MAX_SIZE_ZDLUT
)
615 return (get_zone_dlut(size
));
616 if (size
<= kalloc_max
)
617 return (get_zone_search(size
, k_zindex_start
));
623 kalloc_fake_zone_init(int zone_index
)
625 kalloc_fake_zone_index
= zone_index
;
629 kalloc_fake_zone_info(int *count
,
630 vm_size_t
*cur_size
, vm_size_t
*max_size
, vm_size_t
*elem_size
, vm_size_t
*alloc_size
,
631 uint64_t *sum_size
, int *collectable
, int *exhaustable
, int *caller_acct
)
633 *count
= kalloc_large_inuse
;
634 *cur_size
= kalloc_large_total
;
635 *max_size
= kalloc_large_max
;
637 if (kalloc_large_inuse
) {
638 *elem_size
= kalloc_large_total
/ kalloc_large_inuse
;
639 *alloc_size
= kalloc_large_total
/ kalloc_large_inuse
;
644 *sum_size
= kalloc_large_sum
;
655 queue_init(&OSMalloc_tag_list
);
657 OSMalloc_tag_lck_grp
= lck_grp_alloc_init("OSMalloc_tag", LCK_GRP_ATTR_NULL
);
658 lck_mtx_init(&OSMalloc_tag_lock
, OSMalloc_tag_lck_grp
, LCK_ATTR_NULL
);
668 OSMTag
= (OSMallocTag
)kalloc(sizeof(*OSMTag
));
670 bzero((void *)OSMTag
, sizeof(*OSMTag
));
672 if (flags
& OSMT_PAGEABLE
)
673 OSMTag
->OSMT_attr
= OSMT_ATTR_PAGEABLE
;
675 OSMTag
->OSMT_refcnt
= 1;
677 strlcpy(OSMTag
->OSMT_name
, str
, OSMT_MAX_NAME
);
679 OSMalloc_tag_spin_lock();
680 enqueue_tail(&OSMalloc_tag_list
, (queue_entry_t
)OSMTag
);
681 OSMalloc_tag_unlock();
682 OSMTag
->OSMT_state
= OSMT_VALID
;
690 if (!((tag
->OSMT_state
& OSMT_VALID_MASK
) == OSMT_VALID
))
691 panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag
->OSMT_name
, tag
->OSMT_state
);
693 (void)hw_atomic_add(&tag
->OSMT_refcnt
, 1);
700 if (!((tag
->OSMT_state
& OSMT_VALID_MASK
) == OSMT_VALID
))
701 panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag
->OSMT_name
, tag
->OSMT_state
);
703 if (hw_atomic_sub(&tag
->OSMT_refcnt
, 1) == 0) {
704 if (hw_compare_and_store(OSMT_VALID
|OSMT_RELEASED
, OSMT_VALID
|OSMT_RELEASED
, &tag
->OSMT_state
)) {
705 OSMalloc_tag_spin_lock();
706 (void)remque((queue_entry_t
)tag
);
707 OSMalloc_tag_unlock();
708 kfree((void*)tag
, sizeof(*tag
));
710 panic("OSMalloc_Tagrele():'%s' has refcnt 0\n", tag
->OSMT_name
);
718 if (!hw_compare_and_store(OSMT_VALID
, OSMT_VALID
|OSMT_RELEASED
, &tag
->OSMT_state
))
719 panic("OSMalloc_Tagfree():'%s' has bad state 0x%08X \n", tag
->OSMT_name
, tag
->OSMT_state
);
721 if (hw_atomic_sub(&tag
->OSMT_refcnt
, 1) == 0) {
722 OSMalloc_tag_spin_lock();
723 (void)remque((queue_entry_t
)tag
);
724 OSMalloc_tag_unlock();
725 kfree((void*)tag
, sizeof(*tag
));
737 OSMalloc_Tagref(tag
);
738 if ((tag
->OSMT_attr
& OSMT_PAGEABLE
)
739 && (size
& ~PAGE_MASK
)) {
740 if ((kr
= kmem_alloc_pageable_external(kernel_map
, (vm_offset_t
*)&addr
, size
)) != KERN_SUCCESS
)
743 addr
= kalloc_tag_bt((vm_size_t
)size
, VM_KERN_MEMORY_KALLOC
);
746 OSMalloc_Tagrele(tag
);
758 if (tag
->OSMT_attr
& OSMT_PAGEABLE
)
761 OSMalloc_Tagref(tag
);
762 /* XXX: use non-blocking kalloc for now */
763 addr
= kalloc_noblock_tag_bt((vm_size_t
)size
, VM_KERN_MEMORY_KALLOC
);
765 OSMalloc_Tagrele(tag
);
777 if (tag
->OSMT_attr
& OSMT_PAGEABLE
)
780 OSMalloc_Tagref(tag
);
781 addr
= kalloc_noblock_tag_bt((vm_size_t
)size
, VM_KERN_MEMORY_KALLOC
);
783 OSMalloc_Tagrele(tag
);
794 if ((tag
->OSMT_attr
& OSMT_PAGEABLE
)
795 && (size
& ~PAGE_MASK
)) {
796 kmem_free(kernel_map
, (vm_offset_t
)addr
, size
);
798 kfree((void *)addr
, size
);
800 OSMalloc_Tagrele(tag
);