2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * File: kern/gzalloc.c
32 * "Guard mode" zone allocator, used to trap use-after-free errors,
33 * overruns, underruns, mismatched allocations/frees, uninitialized
34 * zone element use, timing dependent races etc.
36 * The allocator is configured by these boot-args:
37 * gzalloc_size=<size>: target all zones with elements of <size> bytes
38 * gzalloc_min=<size>: target zones with elements >= size
39 * gzalloc_max=<size>: target zones with elements <= size
40 * gzalloc_min/max can be specified in conjunction to target a range of
42 * gzalloc_fc_size=<size>: number of zone elements (effectively page
43 * multiple sized) to retain in the free VA cache. This cache is evicted
44 * (backing pages and VA released) in a least-recently-freed fashion.
45 * Larger free VA caches allow for a longer window of opportunity to trap
46 * delayed use-after-free operations, but use more memory.
47 * -gzalloc_wp: Write protect, rather than unmap, freed allocations
48 * lingering in the free VA cache. Useful to disambiguate between
49 * read-after-frees/read overruns and writes. Also permits direct inspection
50 * of the freed element in the cache via the kernel debugger. As each
51 * element has a "header" (trailer in underflow detection mode), the zone
52 * of origin of the element can be easily determined in this mode.
53 * -gzalloc_uf_mode: Underflow detection mode, where the guard page
54 * adjoining each element is placed *before* the element page rather than
55 * after. The element is also located at the top of the page, rather than
56 * abutting the bottom as with the standard overflow detection mode.
57 * -gzalloc_noconsistency: disable consistency checks that flag mismatched
58 * frees, corruptions of the header/trailer signatures etc.
59 * -nogzalloc_mode: Disables the guard mode allocator. The DEBUG kernel
60 * enables the guard allocator for zones sized 8K-16K (if present) by
61 * default, this option can disable that behaviour.
64 #include <zone_debug.h>
66 #include <mach/mach_types.h>
67 #include <mach/vm_param.h>
68 #include <mach/kern_return.h>
69 #include <mach/machine/vm_types.h>
70 #include <mach_debug/zone_info.h>
71 #include <mach/vm_map.h>
73 #include <kern/kern_types.h>
74 #include <kern/assert.h>
75 #include <kern/sched.h>
76 #include <kern/locks.h>
77 #include <kern/misc_protos.h>
78 #include <kern/zalloc.h>
79 #include <kern/kalloc.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_kern.h>
84 #include <vm/vm_page.h>
86 #include <pexpert/pexpert.h>
88 #include <machine/machparam.h>
90 #include <libkern/OSDebug.h>
91 #include <libkern/OSAtomic.h>
92 #include <sys/kdebug.h>
94 extern boolean_t vm_kernel_ready
, kmem_ready
;
95 boolean_t gzalloc_mode
= FALSE
;
96 uint32_t pdzalloc_count
, pdzfree_count
;
98 #define GZALLOC_MIN_DEFAULT (1024)
99 #define GZDEADZONE ((zone_t) 0xDEAD201E)
100 #define GZALLOC_SIGNATURE (0xABADCAFE)
101 #define GZALLOC_RESERVE_SIZE_DEFAULT (2 * 1024 * 1024)
102 #define GZFC_DEFAULT_SIZE (1024)
104 char gzalloc_fill_pattern
= 0x67; /* 'g' */
106 uint32_t gzalloc_min
= ~0U;
107 uint32_t gzalloc_max
= 0;
108 uint32_t gzalloc_size
= 0;
109 uint64_t gzalloc_allocated
, gzalloc_freed
, gzalloc_early_alloc
, gzalloc_early_free
, gzalloc_wasted
;
110 boolean_t gzalloc_uf_mode
= FALSE
, gzalloc_consistency_checks
= TRUE
;
111 vm_prot_t gzalloc_prot
= VM_PROT_NONE
;
112 uint32_t gzalloc_guard
= KMA_GUARD_LAST
;
113 uint32_t gzfc_size
= GZFC_DEFAULT_SIZE
;
115 vm_map_t gzalloc_map
;
116 vm_offset_t gzalloc_map_min
, gzalloc_map_max
;
117 vm_offset_t gzalloc_reserve
;
118 vm_size_t gzalloc_reserve_size
;
120 typedef struct gzalloc_header
{
126 #define GZHEADER_SIZE (sizeof(gzhdr_t))
128 extern zone_t vm_page_zone
;
130 void gzalloc_reconfigure(__unused zone_t z
) {
131 /* Nothing for now */
134 boolean_t
gzalloc_enabled(void) {
138 void gzalloc_zone_init(zone_t z
) {
140 bzero(&z
->gz
, sizeof(z
->gz
));
142 if (gzfc_size
&& (z
->elem_size
>= gzalloc_min
) && (z
->elem_size
<= gzalloc_max
) && (z
->gzalloc_exempt
== FALSE
)) {
143 vm_size_t gzfcsz
= round_page(sizeof(*z
->gz
.gzfc
) * gzfc_size
);
145 /* If the VM/kmem system aren't yet configured, carve
146 * out the free element cache structure directly from the
147 * gzalloc_reserve supplied by the pmap layer.
150 if (gzalloc_reserve_size
< gzfcsz
)
151 panic("gzalloc reserve exhausted");
153 z
->gz
.gzfc
= (vm_offset_t
*)gzalloc_reserve
;
154 gzalloc_reserve
+= gzfcsz
;
155 gzalloc_reserve_size
-= gzfcsz
;
159 if ((kr
= kernel_memory_allocate(kernel_map
, (vm_offset_t
*)&z
->gz
.gzfc
, gzfcsz
, 0, KMA_KOBJECT
, VM_KERN_MEMORY_OSFMK
)) != KERN_SUCCESS
) {
160 panic("zinit/gzalloc: kernel_memory_allocate failed (%d) for 0x%lx bytes", kr
, (unsigned long) gzfcsz
);
163 bzero((void *)z
->gz
.gzfc
, gzfcsz
);
168 void gzalloc_configure(void) {
171 if (PE_parse_boot_argn("-gzalloc_mode", temp_buf
, sizeof (temp_buf
))) {
173 gzalloc_min
= GZALLOC_MIN_DEFAULT
;
177 if (PE_parse_boot_argn("gzalloc_min", &gzalloc_min
, sizeof(gzalloc_min
))) {
182 if (PE_parse_boot_argn("gzalloc_max", &gzalloc_max
, sizeof(gzalloc_max
))) {
184 if (gzalloc_min
== ~0U)
188 if (PE_parse_boot_argn("gzalloc_size", &gzalloc_size
, sizeof(gzalloc_size
))) {
189 gzalloc_min
= gzalloc_max
= gzalloc_size
;
193 (void)PE_parse_boot_argn("gzalloc_fc_size", &gzfc_size
, sizeof(gzfc_size
));
195 if (PE_parse_boot_argn("-gzalloc_wp", temp_buf
, sizeof (temp_buf
))) {
196 gzalloc_prot
= VM_PROT_READ
;
199 if (PE_parse_boot_argn("-gzalloc_uf_mode", temp_buf
, sizeof (temp_buf
))) {
200 gzalloc_uf_mode
= TRUE
;
201 gzalloc_guard
= KMA_GUARD_FIRST
;
204 if (PE_parse_boot_argn("-gzalloc_noconsistency", temp_buf
, sizeof (temp_buf
))) {
205 gzalloc_consistency_checks
= FALSE
;
208 if (gzalloc_mode
== FALSE
) {
211 gzalloc_prot
= VM_PROT_READ
;
215 if (PE_parse_boot_argn("-nogzalloc_mode", temp_buf
, sizeof (temp_buf
)))
216 gzalloc_mode
= FALSE
;
219 gzalloc_reserve_size
= GZALLOC_RESERVE_SIZE_DEFAULT
;
220 gzalloc_reserve
= (vm_offset_t
) pmap_steal_memory(gzalloc_reserve_size
);
224 void gzalloc_init(vm_size_t max_zonemap_size
) {
225 kern_return_t retval
;
228 retval
= kmem_suballoc(kernel_map
, &gzalloc_map_min
, (max_zonemap_size
<< 2),
229 FALSE
, VM_FLAGS_ANYWHERE
| VM_FLAGS_PERMANENT
| VM_MAKE_TAG(VM_KERN_MEMORY_ZONE
),
232 if (retval
!= KERN_SUCCESS
)
233 panic("zone_init: kmem_suballoc(gzalloc) failed");
234 gzalloc_map_max
= gzalloc_map_min
+ (max_zonemap_size
<< 2);
239 gzalloc_alloc(zone_t zone
, boolean_t canblock
) {
240 vm_offset_t addr
= 0;
242 if (__improbable(gzalloc_mode
&&
243 (((zone
->elem_size
>= gzalloc_min
) &&
244 (zone
->elem_size
<= gzalloc_max
))) &&
245 (zone
->gzalloc_exempt
== 0))) {
247 if (get_preemption_level() != 0) {
248 if (canblock
== TRUE
) {
255 vm_offset_t rounded_size
= round_page(zone
->elem_size
+ GZHEADER_SIZE
);
256 vm_offset_t residue
= rounded_size
- zone
->elem_size
;
257 vm_offset_t gzaddr
= 0;
258 gzhdr_t
*gzh
, *gzhcopy
= NULL
;
260 if (!kmem_ready
|| (vm_page_zone
== ZONE_NULL
)) {
261 /* Early allocations are supplied directly from the
264 if (gzalloc_reserve_size
< rounded_size
)
265 panic("gzalloc reserve exhausted");
266 gzaddr
= gzalloc_reserve
;
267 /* No guard page for these early allocations, just
268 * waste an additional page.
270 gzalloc_reserve
+= rounded_size
+ PAGE_SIZE
;
271 gzalloc_reserve_size
-= rounded_size
+ PAGE_SIZE
;
272 OSAddAtomic64((SInt32
) (rounded_size
), &gzalloc_early_alloc
);
275 kern_return_t kr
= kernel_memory_allocate(gzalloc_map
,
276 &gzaddr
, rounded_size
+ (1*PAGE_SIZE
),
277 0, KMA_KOBJECT
| KMA_ATOMIC
| gzalloc_guard
,
278 VM_KERN_MEMORY_OSFMK
);
279 if (kr
!= KERN_SUCCESS
)
280 panic("gzalloc: kernel_memory_allocate for size 0x%llx failed with %d", (uint64_t)rounded_size
, kr
);
284 if (gzalloc_uf_mode
) {
286 /* The "header" becomes a "footer" in underflow
289 gzh
= (gzhdr_t
*) (gzaddr
+ zone
->elem_size
);
291 gzhcopy
= (gzhdr_t
*) (gzaddr
+ rounded_size
- sizeof(gzhdr_t
));
293 gzh
= (gzhdr_t
*) (gzaddr
+ residue
- GZHEADER_SIZE
);
294 addr
= (gzaddr
+ residue
);
297 /* Fill with a pattern on allocation to trap uninitialized
298 * data use. Since the element size may be "rounded up"
299 * by higher layers such as the kalloc layer, this may
300 * also identify overruns between the originally requested
301 * size and the rounded size via visual inspection.
302 * TBD: plumb through the originally requested size,
303 * prior to rounding by kalloc/IOMalloc etc.
304 * We also add a signature and the zone of origin in a header
305 * prefixed to the allocation.
307 memset((void *)gzaddr
, gzalloc_fill_pattern
, rounded_size
);
309 gzh
->gzone
= (kmem_ready
&& vm_page_zone
) ? zone
: GZDEADZONE
;
310 gzh
->gzsize
= (uint32_t) zone
->elem_size
;
311 gzh
->gzsig
= GZALLOC_SIGNATURE
;
313 /* In underflow detection mode, stash away a copy of the
314 * metadata at the edge of the allocated range, for
315 * retrieval by gzalloc_element_size()
324 zone
->cur_size
+= rounded_size
;
327 OSAddAtomic64((SInt32
) rounded_size
, &gzalloc_allocated
);
328 OSAddAtomic64((SInt32
) (rounded_size
- zone
->elem_size
), &gzalloc_wasted
);
333 boolean_t
gzalloc_free(zone_t zone
, void *addr
) {
334 boolean_t gzfreed
= FALSE
;
337 if (__improbable(gzalloc_mode
&&
338 (((zone
->elem_size
>= gzalloc_min
) &&
339 (zone
->elem_size
<= gzalloc_max
))) &&
340 (zone
->gzalloc_exempt
== 0))) {
342 vm_offset_t rounded_size
= round_page(zone
->elem_size
+ GZHEADER_SIZE
);
343 vm_offset_t residue
= rounded_size
- zone
->elem_size
;
345 vm_offset_t free_addr
= 0;
347 if (gzalloc_uf_mode
) {
348 gzh
= (gzhdr_t
*)((vm_offset_t
)addr
+ zone
->elem_size
);
349 saddr
= (vm_offset_t
) addr
- PAGE_SIZE
;
351 gzh
= (gzhdr_t
*)((vm_offset_t
)addr
- GZHEADER_SIZE
);
352 saddr
= ((vm_offset_t
)addr
) - residue
;
355 assert((saddr
& PAGE_MASK
) == 0);
357 if (gzalloc_consistency_checks
) {
358 if (gzh
->gzsig
!= GZALLOC_SIGNATURE
) {
359 panic("GZALLOC signature mismatch for element %p, expected 0x%x, found 0x%x", addr
, GZALLOC_SIGNATURE
, gzh
->gzsig
);
362 if (gzh
->gzone
!= zone
&& (gzh
->gzone
!= GZDEADZONE
))
363 panic("%s: Mismatched zone or under/overflow, current zone: %p, recorded zone: %p, address: %p", __FUNCTION__
, zone
, gzh
->gzone
, (void *)addr
);
364 /* Partially redundant given the zone check, but may flag header corruption */
365 if (gzh
->gzsize
!= zone
->elem_size
) {
366 panic("Mismatched zfree or under/overflow for zone %p, recorded size: 0x%x, element size: 0x%x, address: %p\n", zone
, gzh
->gzsize
, (uint32_t) zone
->elem_size
, (void *)addr
);
370 if (!kmem_ready
|| gzh
->gzone
== GZDEADZONE
) {
371 /* For now, just leak frees of early allocations
372 * performed before kmem is fully configured.
373 * They don't seem to get freed currently;
374 * consider ml_static_mfree in the future.
376 OSAddAtomic64((SInt32
) (rounded_size
), &gzalloc_early_free
);
380 if (get_preemption_level() != 0) {
385 /* Either write protect or unmap the newly freed
391 saddr
+ rounded_size
+ (1 * PAGE_SIZE
),
394 if (kr
!= KERN_SUCCESS
)
395 panic("%s: vm_map_protect: %p, 0x%x", __FUNCTION__
, (void *)saddr
, kr
);
402 /* Insert newly freed element into the protected free element
403 * cache, and rotate out the LRU element.
406 if (zone
->gz
.gzfc_index
>= gzfc_size
) {
407 zone
->gz
.gzfc_index
= 0;
409 free_addr
= zone
->gz
.gzfc
[zone
->gz
.gzfc_index
];
410 zone
->gz
.gzfc
[zone
->gz
.gzfc_index
++] = saddr
;
415 zone
->cur_size
-= rounded_size
;
424 free_addr
+ rounded_size
+ (1 * PAGE_SIZE
),
425 VM_MAP_REMOVE_KUNWIRE
);
426 if (kr
!= KERN_SUCCESS
)
427 panic("gzfree: vm_map_remove: %p, 0x%x", (void *)free_addr
, kr
);
429 OSAddAtomic64((SInt32
)rounded_size
, &gzalloc_freed
);
430 OSAddAtomic64(-((SInt32
) (rounded_size
- zone
->elem_size
)), &gzalloc_wasted
);
438 boolean_t
gzalloc_element_size(void *gzaddr
, zone_t
*z
, vm_size_t
*gzsz
) {
439 uintptr_t a
= (uintptr_t)gzaddr
;
440 if (__improbable(gzalloc_mode
&& (a
>= gzalloc_map_min
) && (a
<= gzalloc_map_max
))) {
443 /* Locate the gzalloc metadata adjoining the element */
444 if (gzalloc_uf_mode
== TRUE
) {
446 vm_map_entry_t gzvme
= NULL
;
448 /* In underflow detection mode, locate the map entry describing
449 * the element, and then locate the copy of the gzalloc
450 * header at the trailing edge of the range.
452 vm_map_lock_read(gzalloc_map
);
453 vmef
= vm_map_lookup_entry(gzalloc_map
, (vm_map_offset_t
)a
, &gzvme
);
454 vm_map_unlock(gzalloc_map
);
456 panic("GZALLOC: unable to locate map entry for %p\n", (void *)a
);
458 assertf(gzvme
->vme_atomic
!= 0, "GZALLOC: VM map entry inconsistency, vme: %p, start: %llu end: %llu", gzvme
, gzvme
->vme_start
, gzvme
->vme_end
);
459 gzh
= (gzhdr_t
*)(gzvme
->vme_end
- GZHEADER_SIZE
);
461 gzh
= (gzhdr_t
*)(a
- GZHEADER_SIZE
);
464 if (gzh
->gzsig
!= GZALLOC_SIGNATURE
) {
465 panic("GZALLOC signature mismatch for element %p, expected 0x%x, found 0x%x", (void *)a
, GZALLOC_SIGNATURE
, gzh
->gzsig
);
468 *gzsz
= gzh
->gzone
->elem_size
;
469 if ((*gzsz
< gzalloc_min
) || (*gzsz
> gzalloc_max
)) {
470 panic("GZALLOC: invalid element size %lu\n", *gzsz
);