2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * File: kern/gzalloc.c
32 * "Guard mode" zone allocator, used to trap use-after-free errors,
33 * overruns, underruns, mismatched allocations/frees, uninitialized
34 * zone element use, timing dependent races etc.
36 * The allocator is configured by these boot-args:
37 * gzalloc_size=<size>: target all zones with elements of <size> bytes
38 * gzalloc_min=<size>: target zones with elements >= size
39 * gzalloc_max=<size>: target zones with elements <= size
40 * gzalloc_min/max can be specified in conjunction to target a range of
42 * gzalloc_fc_size=<size>: number of zone elements (effectively page
43 * multiple sized) to retain in the free VA cache. This cache is evicted
44 * (backing pages and VA released) in a least-recently-freed fashion.
45 * Larger free VA caches allow for a longer window of opportunity to trap
46 * delayed use-after-free operations, but use more memory.
47 * -gzalloc_wp: Write protect, rather than unmap, freed allocations
48 * lingering in the free VA cache. Useful to disambiguate between
49 * read-after-frees/read overruns and writes. Also permits direct inspection
50 * of the freed element in the cache via the kernel debugger. As each
51 * element has a "header" (trailer in underflow detection mode), the zone
52 * of origin of the element can be easily determined in this mode.
53 * -gzalloc_uf_mode: Underflow detection mode, where the guard page
54 * adjoining each element is placed *before* the element page rather than
55 * after. The element is also located at the top of the page, rather than
56 * abutting the bottom as with the standard overflow detection mode.
57 * -gzalloc_noconsistency: disable consistency checks that flag mismatched
58 * frees, corruptions of the header/trailer signatures etc.
59 * -nogzalloc_mode: Disables the guard mode allocator. The DEBUG kernel
60 * enables the guard allocator for zones sized 8K-16K (if present) by
61 * default, this option can disable that behaviour.
64 #include <zone_debug.h>
65 #include <zone_alias_addr.h>
67 #include <mach/mach_types.h>
68 #include <mach/vm_param.h>
69 #include <mach/kern_return.h>
70 #include <mach/machine/vm_types.h>
71 #include <mach_debug/zone_info.h>
72 #include <mach/vm_map.h>
74 #include <kern/kern_types.h>
75 #include <kern/assert.h>
76 #include <kern/sched.h>
77 #include <kern/locks.h>
78 #include <kern/misc_protos.h>
79 #include <kern/zalloc.h>
80 #include <kern/kalloc.h>
83 #include <vm/vm_map.h>
84 #include <vm/vm_kern.h>
85 #include <vm/vm_page.h>
87 #include <pexpert/pexpert.h>
89 #include <machine/machparam.h>
91 #include <libkern/OSDebug.h>
92 #include <libkern/OSAtomic.h>
93 #include <sys/kdebug.h>
95 extern boolean_t vm_kernel_ready
, kmem_ready
;
96 boolean_t gzalloc_mode
= FALSE
;
97 uint32_t pdzalloc_count
, pdzfree_count
;
99 #define GZALLOC_MIN_DEFAULT (1024)
100 #define GZDEADZONE ((zone_t) 0xDEAD201E)
101 #define GZALLOC_SIGNATURE (0xABADCAFE)
102 #define GZALLOC_RESERVE_SIZE_DEFAULT (2 * 1024 * 1024)
103 #define GZFC_DEFAULT_SIZE (1024)
105 char gzalloc_fill_pattern
= 0x67; /* 'g' */
107 uint32_t gzalloc_min
= ~0U;
108 uint32_t gzalloc_max
= 0;
109 uint32_t gzalloc_size
= 0;
110 uint64_t gzalloc_allocated
, gzalloc_freed
, gzalloc_early_alloc
, gzalloc_early_free
, gzalloc_wasted
;
111 boolean_t gzalloc_uf_mode
= FALSE
, gzalloc_consistency_checks
= TRUE
;
112 vm_prot_t gzalloc_prot
= VM_PROT_NONE
;
113 uint32_t gzalloc_guard
= KMA_GUARD_LAST
;
114 uint32_t gzfc_size
= GZFC_DEFAULT_SIZE
;
116 vm_map_t gzalloc_map
;
117 vm_offset_t gzalloc_map_min
, gzalloc_map_max
;
118 vm_offset_t gzalloc_reserve
;
119 vm_size_t gzalloc_reserve_size
;
121 typedef struct gzalloc_header
{
127 #define GZHEADER_SIZE (sizeof(gzhdr_t))
129 extern zone_t vm_page_zone
;
131 void gzalloc_reconfigure(__unused zone_t z
) {
132 /* Nothing for now */
135 boolean_t
gzalloc_enabled(void) {
139 void gzalloc_zone_init(zone_t z
) {
141 bzero(&z
->gz
, sizeof(z
->gz
));
143 if (gzfc_size
&& (z
->elem_size
>= gzalloc_min
) && (z
->elem_size
<= gzalloc_max
) && (z
->gzalloc_exempt
== FALSE
)) {
144 vm_size_t gzfcsz
= round_page(sizeof(*z
->gz
.gzfc
) * gzfc_size
);
146 /* If the VM/kmem system aren't yet configured, carve
147 * out the free element cache structure directly from the
148 * gzalloc_reserve supplied by the pmap layer.
151 if (gzalloc_reserve_size
< gzfcsz
)
152 panic("gzalloc reserve exhausted");
154 z
->gz
.gzfc
= (vm_offset_t
*)gzalloc_reserve
;
155 gzalloc_reserve
+= gzfcsz
;
156 gzalloc_reserve_size
-= gzfcsz
;
160 if ((kr
= kernel_memory_allocate(kernel_map
, (vm_offset_t
*)&z
->gz
.gzfc
, gzfcsz
, 0, KMA_KOBJECT
)) != KERN_SUCCESS
) {
161 panic("zinit/gzalloc: kernel_memory_allocate failed (%d) for 0x%lx bytes", kr
, (unsigned long) gzfcsz
);
164 bzero((void *)z
->gz
.gzfc
, gzfcsz
);
169 void gzalloc_configure(void) {
172 if (PE_parse_boot_argn("-gzalloc_mode", temp_buf
, sizeof (temp_buf
))) {
174 gzalloc_min
= GZALLOC_MIN_DEFAULT
;
176 gzalloc_min
+= (typeof(gzalloc_min
))ZONE_DEBUG_OFFSET
;
181 if (PE_parse_boot_argn("gzalloc_min", &gzalloc_min
, sizeof(gzalloc_min
))) {
183 gzalloc_min
+= (typeof(gzalloc_min
))ZONE_DEBUG_OFFSET
;
189 if (PE_parse_boot_argn("gzalloc_max", &gzalloc_max
, sizeof(gzalloc_max
))) {
191 gzalloc_max
+= (typeof(gzalloc_min
))ZONE_DEBUG_OFFSET
;
194 if (gzalloc_min
== ~0U)
198 if (PE_parse_boot_argn("gzalloc_size", &gzalloc_size
, sizeof(gzalloc_size
))) {
200 gzalloc_size
+= (typeof(gzalloc_min
))ZONE_DEBUG_OFFSET
;
202 gzalloc_min
= gzalloc_max
= gzalloc_size
;
206 (void)PE_parse_boot_argn("gzalloc_fc_size", &gzfc_size
, sizeof(gzfc_size
));
208 if (PE_parse_boot_argn("-gzalloc_wp", temp_buf
, sizeof (temp_buf
))) {
209 gzalloc_prot
= VM_PROT_READ
;
212 if (PE_parse_boot_argn("-gzalloc_uf_mode", temp_buf
, sizeof (temp_buf
))) {
213 gzalloc_uf_mode
= TRUE
;
214 gzalloc_guard
= KMA_GUARD_FIRST
;
217 if (PE_parse_boot_argn("-gzalloc_noconsistency", temp_buf
, sizeof (temp_buf
))) {
218 gzalloc_consistency_checks
= FALSE
;
221 if (gzalloc_mode
== FALSE
) {
224 gzalloc_prot
= VM_PROT_READ
;
228 if (PE_parse_boot_argn("-nogzalloc_mode", temp_buf
, sizeof (temp_buf
)))
229 gzalloc_mode
= FALSE
;
232 gzalloc_reserve_size
= GZALLOC_RESERVE_SIZE_DEFAULT
;
233 gzalloc_reserve
= (vm_offset_t
) pmap_steal_memory(gzalloc_reserve_size
);
237 void gzalloc_init(vm_size_t max_zonemap_size
) {
238 kern_return_t retval
;
241 retval
= kmem_suballoc(kernel_map
, &gzalloc_map_min
, (max_zonemap_size
<< 2),
242 FALSE
, VM_FLAGS_ANYWHERE
| VM_FLAGS_PERMANENT
,
245 if (retval
!= KERN_SUCCESS
)
246 panic("zone_init: kmem_suballoc(gzalloc) failed");
247 gzalloc_map_max
= gzalloc_map_min
+ (max_zonemap_size
<< 2);
252 gzalloc_alloc(zone_t zone
, boolean_t canblock
) {
253 vm_offset_t addr
= 0;
255 if (__improbable(gzalloc_mode
&&
256 (((zone
->elem_size
>= gzalloc_min
) &&
257 (zone
->elem_size
<= gzalloc_max
))) &&
258 (zone
->gzalloc_exempt
== 0))) {
260 if (get_preemption_level() != 0) {
261 if (canblock
== TRUE
) {
268 vm_offset_t rounded_size
= round_page(zone
->elem_size
+ GZHEADER_SIZE
);
269 vm_offset_t residue
= rounded_size
- zone
->elem_size
;
270 vm_offset_t gzaddr
= 0;
273 if (!kmem_ready
|| (vm_page_zone
== ZONE_NULL
)) {
274 /* Early allocations are supplied directly from the
277 if (gzalloc_reserve_size
< rounded_size
)
278 panic("gzalloc reserve exhausted");
279 gzaddr
= gzalloc_reserve
;
280 /* No guard page for these early allocations, just
281 * waste an additional page.
283 gzalloc_reserve
+= rounded_size
+ PAGE_SIZE
;
284 gzalloc_reserve_size
-= rounded_size
+ PAGE_SIZE
;
285 OSAddAtomic64((SInt32
) (rounded_size
), &gzalloc_early_alloc
);
288 kern_return_t kr
= kernel_memory_allocate(gzalloc_map
,
289 &gzaddr
, rounded_size
+ (1*PAGE_SIZE
),
290 0, KMA_KOBJECT
| gzalloc_guard
);
291 if (kr
!= KERN_SUCCESS
)
292 panic("gzalloc: kernel_memory_allocate for size 0x%llx failed with %d", (uint64_t)rounded_size
, kr
);
296 if (gzalloc_uf_mode
) {
298 /* The "header" becomes a "footer" in underflow
301 gzh
= (gzhdr_t
*) (gzaddr
+ zone
->elem_size
);
304 gzh
= (gzhdr_t
*) (gzaddr
+ residue
- GZHEADER_SIZE
);
305 addr
= (gzaddr
+ residue
);
308 /* Fill with a pattern on allocation to trap uninitialized
309 * data use. Since the element size may be "rounded up"
310 * by higher layers such as the kalloc layer, this may
311 * also identify overruns between the originally requested
312 * size and the rounded size via visual inspection.
313 * TBD: plumb through the originally requested size,
314 * prior to rounding by kalloc/IOMalloc etc.
315 * We also add a signature and the zone of origin in a header
316 * prefixed to the allocation.
318 memset((void *)gzaddr
, gzalloc_fill_pattern
, rounded_size
);
320 gzh
->gzone
= (kmem_ready
&& vm_page_zone
) ? zone
: GZDEADZONE
;
321 gzh
->gzsize
= (uint32_t) zone
->elem_size
;
322 gzh
->gzsig
= GZALLOC_SIGNATURE
;
327 zone
->cur_size
+= rounded_size
;
330 OSAddAtomic64((SInt32
) rounded_size
, &gzalloc_allocated
);
331 OSAddAtomic64((SInt32
) (rounded_size
- zone
->elem_size
), &gzalloc_wasted
);
336 boolean_t
gzalloc_free(zone_t zone
, void *addr
) {
337 boolean_t gzfreed
= FALSE
;
340 if (__improbable(gzalloc_mode
&&
341 (((zone
->elem_size
>= gzalloc_min
) &&
342 (zone
->elem_size
<= gzalloc_max
))) &&
343 (zone
->gzalloc_exempt
== 0))) {
345 vm_offset_t rounded_size
= round_page(zone
->elem_size
+ GZHEADER_SIZE
);
346 vm_offset_t residue
= rounded_size
- zone
->elem_size
;
348 vm_offset_t free_addr
= 0;
350 if (gzalloc_uf_mode
) {
351 gzh
= (gzhdr_t
*)((vm_offset_t
)addr
+ zone
->elem_size
);
352 saddr
= (vm_offset_t
) addr
- PAGE_SIZE
;
354 gzh
= (gzhdr_t
*)((vm_offset_t
)addr
- GZHEADER_SIZE
);
355 saddr
= ((vm_offset_t
)addr
) - residue
;
358 assert((saddr
& PAGE_MASK
) == 0);
360 if (gzalloc_consistency_checks
) {
361 if (gzh
->gzsig
!= GZALLOC_SIGNATURE
) {
362 panic("GZALLOC signature mismatch for element %p, expected 0x%x, found 0x%x", addr
, GZALLOC_SIGNATURE
, gzh
->gzsig
);
365 if (gzh
->gzone
!= zone
&& (gzh
->gzone
!= GZDEADZONE
))
366 panic("%s: Mismatched zone or under/overflow, current zone: %p, recorded zone: %p, address: %p", __FUNCTION__
, zone
, gzh
->gzone
, (void *)addr
);
367 /* Partially redundant given the zone check, but may flag header corruption */
368 if (gzh
->gzsize
!= zone
->elem_size
) {
369 panic("Mismatched zfree or under/overflow for zone %p, recorded size: 0x%x, element size: 0x%x, address: %p\n", zone
, gzh
->gzsize
, (uint32_t) zone
->elem_size
, (void *)addr
);
373 if (!kmem_ready
|| gzh
->gzone
== GZDEADZONE
) {
374 /* For now, just leak frees of early allocations
375 * performed before kmem is fully configured.
376 * They don't seem to get freed currently;
377 * consider ml_static_mfree in the future.
379 OSAddAtomic64((SInt32
) (rounded_size
), &gzalloc_early_free
);
383 if (get_preemption_level() != 0) {
388 /* Either write protect or unmap the newly freed
394 saddr
+ rounded_size
+ (1 * PAGE_SIZE
),
397 if (kr
!= KERN_SUCCESS
)
398 panic("%s: vm_map_protect: %p, 0x%x", __FUNCTION__
, (void *)saddr
, kr
);
405 /* Insert newly freed element into the protected free element
406 * cache, and rotate out the LRU element.
409 if (zone
->gz
.gzfc_index
>= gzfc_size
) {
410 zone
->gz
.gzfc_index
= 0;
412 free_addr
= zone
->gz
.gzfc
[zone
->gz
.gzfc_index
];
413 zone
->gz
.gzfc
[zone
->gz
.gzfc_index
++] = saddr
;
418 zone
->cur_size
-= rounded_size
;
427 free_addr
+ rounded_size
+ (1 * PAGE_SIZE
),
428 VM_MAP_REMOVE_KUNWIRE
);
429 if (kr
!= KERN_SUCCESS
)
430 panic("gzfree: vm_map_remove: %p, 0x%x", (void *)free_addr
, kr
);
432 OSAddAtomic64((SInt32
)rounded_size
, &gzalloc_freed
);
433 OSAddAtomic64(-((SInt32
) (rounded_size
- zone
->elem_size
)), &gzalloc_wasted
);