2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * File: kern/gzalloc.c
32 * "Guard mode" zone allocator, used to trap use-after-free errors,
33 * overruns, underruns, mismatched allocations/frees, uninitialized
34 * zone element use, timing dependent races etc.
36 * The allocator is configured by these boot-args:
37 * gzalloc_size=<size>: target all zones with elements of <size> bytes
38 * gzalloc_min=<size>: target zones with elements >= size
39 * gzalloc_max=<size>: target zones with elements <= size
40 * gzalloc_min/max can be specified in conjunction to target a range of
42 * gzalloc_fc_size=<size>: number of zone elements (effectively page
43 * multiple sized) to retain in the free VA cache. This cache is evicted
44 * (backing pages and VA released) in a least-recently-freed fashion.
45 * Larger free VA caches allow for a longer window of opportunity to trap
46 * delayed use-after-free operations, but use more memory.
47 * -gzalloc_wp: Write protect, rather than unmap, freed allocations
48 * lingering in the free VA cache. Useful to disambiguate between
49 * read-after-frees/read overruns and writes. Also permits direct inspection
50 * of the freed element in the cache via the kernel debugger. As each
51 * element has a "header" (trailer in underflow detection mode), the zone
52 * of origin of the element can be easily determined in this mode.
53 * -gzalloc_uf_mode: Underflow detection mode, where the guard page
54 * adjoining each element is placed *before* the element page rather than
55 * after. The element is also located at the top of the page, rather than
56 * abutting the bottom as with the standard overflow detection mode.
57 * -gzalloc_noconsistency: disable consistency checks that flag mismatched
58 * frees, corruptions of the header/trailer signatures etc.
59 * -nogzalloc_mode: Disables the guard mode allocator. The DEBUG kernel
60 * enables the guard allocator for zones sized 1K (if present) by
61 * default, this option can disable that behaviour.
62 * gzname=<name> target a zone by name. Can be coupled with size-based
63 * targeting. Naming conventions match those of the zlog boot-arg, i.e.
64 * "a period in the logname will match a space in the zone name"
65 * -gzalloc_no_dfree_check Eliminate double free checks
66 * gzalloc_zscale=<value> specify size multiplier for the dedicated gzalloc submap
69 #include <zone_debug.h>
71 #include <mach/mach_types.h>
72 #include <mach/vm_param.h>
73 #include <mach/kern_return.h>
74 #include <mach/machine/vm_types.h>
75 #include <mach_debug/zone_info.h>
76 #include <mach/vm_map.h>
78 #include <kern/kern_types.h>
79 #include <kern/assert.h>
80 #include <kern/sched.h>
81 #include <kern/locks.h>
82 #include <kern/misc_protos.h>
83 #include <kern/zalloc.h>
84 #include <kern/kalloc.h>
87 #include <vm/vm_map.h>
88 #include <vm/vm_kern.h>
89 #include <vm/vm_page.h>
91 #include <pexpert/pexpert.h>
93 #include <machine/machparam.h>
95 #include <libkern/OSDebug.h>
96 #include <libkern/OSAtomic.h>
97 #include <sys/kdebug.h>
99 extern boolean_t vm_kernel_ready
, kmem_ready
;
100 boolean_t gzalloc_mode
= FALSE
;
101 uint32_t pdzalloc_count
, pdzfree_count
;
103 #define GZALLOC_MIN_DEFAULT (1024)
104 #define GZDEADZONE ((zone_t) 0xDEAD201E)
105 #define GZALLOC_SIGNATURE (0xABADCAFE)
106 #define GZALLOC_RESERVE_SIZE_DEFAULT (2 * 1024 * 1024)
107 #define GZFC_DEFAULT_SIZE (1536)
109 char gzalloc_fill_pattern
= 0x67; /* 'g' */
111 uint32_t gzalloc_min
= ~0U;
112 uint32_t gzalloc_max
= 0;
113 uint32_t gzalloc_size
= 0;
114 uint64_t gzalloc_allocated
, gzalloc_freed
, gzalloc_early_alloc
, gzalloc_early_free
, gzalloc_wasted
;
115 boolean_t gzalloc_uf_mode
= FALSE
, gzalloc_consistency_checks
= TRUE
, gzalloc_dfree_check
= TRUE
;
116 vm_prot_t gzalloc_prot
= VM_PROT_NONE
;
117 uint32_t gzalloc_guard
= KMA_GUARD_LAST
;
118 uint32_t gzfc_size
= GZFC_DEFAULT_SIZE
;
119 uint32_t gzalloc_zonemap_scale
= 6;
121 vm_map_t gzalloc_map
;
122 vm_offset_t gzalloc_map_min
, gzalloc_map_max
;
123 vm_offset_t gzalloc_reserve
;
124 vm_size_t gzalloc_reserve_size
;
126 typedef struct gzalloc_header
{
132 #define GZHEADER_SIZE (sizeof(gzhdr_t))
134 extern zone_t vm_page_zone
;
136 static zone_t gztrackzone
= NULL
;
137 static char gznamedzone
[MAX_ZONE_NAME
] = "";
139 void gzalloc_reconfigure(__unused zone_t z
) {
140 /* Nothing for now */
143 boolean_t
gzalloc_enabled(void) {
147 static inline boolean_t
gzalloc_tracked(zone_t z
) {
148 return (gzalloc_mode
&&
149 (((z
->elem_size
>= gzalloc_min
) && (z
->elem_size
<= gzalloc_max
)) || (z
== gztrackzone
)) &&
150 (z
->gzalloc_exempt
== 0));
153 void gzalloc_zone_init(zone_t z
) {
155 bzero(&z
->gz
, sizeof(z
->gz
));
157 if (track_this_zone(z
->zone_name
, gznamedzone
)) {
162 gzalloc_tracked(z
)) {
163 vm_size_t gzfcsz
= round_page(sizeof(*z
->gz
.gzfc
) * gzfc_size
);
165 /* If the VM/kmem system aren't yet configured, carve
166 * out the free element cache structure directly from the
167 * gzalloc_reserve supplied by the pmap layer.
170 if (gzalloc_reserve_size
< gzfcsz
)
171 panic("gzalloc reserve exhausted");
173 z
->gz
.gzfc
= (vm_offset_t
*)gzalloc_reserve
;
174 gzalloc_reserve
+= gzfcsz
;
175 gzalloc_reserve_size
-= gzfcsz
;
179 if ((kr
= kernel_memory_allocate(kernel_map
, (vm_offset_t
*)&z
->gz
.gzfc
, gzfcsz
, 0, KMA_KOBJECT
, VM_KERN_MEMORY_OSFMK
)) != KERN_SUCCESS
) {
180 panic("zinit/gzalloc: kernel_memory_allocate failed (%d) for 0x%lx bytes", kr
, (unsigned long) gzfcsz
);
183 bzero((void *)z
->gz
.gzfc
, gzfcsz
);
188 /* Called by zdestroy() to dump the free cache elements so the zone count can drop to zero. */
189 void gzalloc_empty_free_cache(zone_t zone
) {
190 if (__improbable(gzalloc_tracked(zone
))) {
192 int freed_elements
= 0;
193 vm_offset_t free_addr
= 0;
194 vm_offset_t rounded_size
= round_page(zone
->elem_size
+ GZHEADER_SIZE
);
195 vm_offset_t gzfcsz
= round_page(sizeof(*zone
->gz
.gzfc
) * gzfc_size
);
196 vm_offset_t gzfc_copy
;
198 kr
= kmem_alloc(kernel_map
, &gzfc_copy
, gzfcsz
, VM_KERN_MEMORY_OSFMK
);
199 if (kr
!= KERN_SUCCESS
) {
200 panic("gzalloc_empty_free_cache: kmem_alloc: 0x%x", kr
);
203 /* Reset gzalloc_data. */
205 memcpy((void *)gzfc_copy
, (void *)zone
->gz
.gzfc
, gzfcsz
);
206 bzero((void *)zone
->gz
.gzfc
, gzfcsz
);
207 zone
->gz
.gzfc_index
= 0;
210 /* Free up all the cached elements. */
211 for (uint32_t index
= 0; index
< gzfc_size
; index
++) {
212 free_addr
= ((vm_offset_t
*)gzfc_copy
)[index
];
213 if (free_addr
&& free_addr
>= gzalloc_map_min
&& free_addr
< gzalloc_map_max
) {
217 free_addr
+ rounded_size
+ (1 * PAGE_SIZE
),
218 VM_MAP_REMOVE_KUNWIRE
);
219 if (kr
!= KERN_SUCCESS
) {
220 panic("gzalloc_empty_free_cache: vm_map_remove: %p, 0x%x", (void *)free_addr
, kr
);
222 OSAddAtomic64((SInt32
)rounded_size
, &gzalloc_freed
);
223 OSAddAtomic64(-((SInt32
) (rounded_size
- zone
->elem_size
)), &gzalloc_wasted
);
229 * TODO: Consider freeing up zone->gz.gzfc as well if it didn't come from the gzalloc_reserve pool.
230 * For now we're reusing this buffer across zdestroy's. We would have to allocate it again on a
231 * subsequent zinit() as well.
234 /* Decrement zone counters. */
236 zone
->count
-= freed_elements
;
237 zone
->cur_size
-= (freed_elements
* rounded_size
);
240 kmem_free(kernel_map
, gzfc_copy
, gzfcsz
);
244 void gzalloc_configure(void) {
247 if (PE_parse_boot_argn("-gzalloc_mode", temp_buf
, sizeof (temp_buf
))) {
249 gzalloc_min
= GZALLOC_MIN_DEFAULT
;
253 if (PE_parse_boot_argn("gzalloc_min", &gzalloc_min
, sizeof(gzalloc_min
))) {
258 if (PE_parse_boot_argn("gzalloc_max", &gzalloc_max
, sizeof(gzalloc_max
))) {
260 if (gzalloc_min
== ~0U)
264 if (PE_parse_boot_argn("gzalloc_size", &gzalloc_size
, sizeof(gzalloc_size
))) {
265 gzalloc_min
= gzalloc_max
= gzalloc_size
;
269 (void)PE_parse_boot_argn("gzalloc_fc_size", &gzfc_size
, sizeof(gzfc_size
));
271 if (PE_parse_boot_argn("-gzalloc_wp", temp_buf
, sizeof (temp_buf
))) {
272 gzalloc_prot
= VM_PROT_READ
;
275 if (PE_parse_boot_argn("-gzalloc_uf_mode", temp_buf
, sizeof (temp_buf
))) {
276 gzalloc_uf_mode
= TRUE
;
277 gzalloc_guard
= KMA_GUARD_FIRST
;
280 if (PE_parse_boot_argn("-gzalloc_no_dfree_check", temp_buf
, sizeof(temp_buf
))) {
281 gzalloc_dfree_check
= FALSE
;
284 (void) PE_parse_boot_argn("gzalloc_zscale", &gzalloc_zonemap_scale
, sizeof(gzalloc_zonemap_scale
));
286 if (PE_parse_boot_argn("-gzalloc_noconsistency", temp_buf
, sizeof (temp_buf
))) {
287 gzalloc_consistency_checks
= FALSE
;
290 if (PE_parse_boot_argn("gzname", gznamedzone
, sizeof(gznamedzone
))) {
294 if (gzalloc_mode
== FALSE
) {
297 strlcpy(gznamedzone
, "pmap", sizeof(gznamedzone
));
298 gzalloc_prot
= VM_PROT_READ
;
302 if (PE_parse_boot_argn("-nogzalloc_mode", temp_buf
, sizeof (temp_buf
)))
303 gzalloc_mode
= FALSE
;
306 gzalloc_reserve_size
= GZALLOC_RESERVE_SIZE_DEFAULT
;
307 gzalloc_reserve
= (vm_offset_t
) pmap_steal_memory(gzalloc_reserve_size
);
311 void gzalloc_init(vm_size_t max_zonemap_size
) {
312 kern_return_t retval
;
315 vm_map_kernel_flags_t vmk_flags
;
317 vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
318 vmk_flags
.vmkf_permanent
= TRUE
;
319 retval
= kmem_suballoc(kernel_map
, &gzalloc_map_min
, (max_zonemap_size
* gzalloc_zonemap_scale
),
320 FALSE
, VM_FLAGS_ANYWHERE
, vmk_flags
, VM_KERN_MEMORY_ZONE
,
323 if (retval
!= KERN_SUCCESS
) {
324 panic("zone_init: kmem_suballoc(gzalloc_map, 0x%lx, %u) failed", max_zonemap_size
, gzalloc_zonemap_scale
);
326 gzalloc_map_max
= gzalloc_map_min
+ (max_zonemap_size
* gzalloc_zonemap_scale
);
331 gzalloc_alloc(zone_t zone
, boolean_t canblock
) {
332 vm_offset_t addr
= 0;
334 if (__improbable(gzalloc_tracked(zone
))) {
336 if (get_preemption_level() != 0) {
337 if (canblock
== TRUE
) {
344 vm_offset_t rounded_size
= round_page(zone
->elem_size
+ GZHEADER_SIZE
);
345 vm_offset_t residue
= rounded_size
- zone
->elem_size
;
346 vm_offset_t gzaddr
= 0;
347 gzhdr_t
*gzh
, *gzhcopy
= NULL
;
349 if (!kmem_ready
|| (vm_page_zone
== ZONE_NULL
)) {
350 /* Early allocations are supplied directly from the
353 if (gzalloc_reserve_size
< (rounded_size
+ PAGE_SIZE
))
354 panic("gzalloc reserve exhausted");
355 gzaddr
= gzalloc_reserve
;
356 /* No guard page for these early allocations, just
357 * waste an additional page.
359 gzalloc_reserve
+= rounded_size
+ PAGE_SIZE
;
360 gzalloc_reserve_size
-= rounded_size
+ PAGE_SIZE
;
361 OSAddAtomic64((SInt32
) (rounded_size
), &gzalloc_early_alloc
);
364 kern_return_t kr
= kernel_memory_allocate(gzalloc_map
,
365 &gzaddr
, rounded_size
+ (1*PAGE_SIZE
),
366 0, KMA_KOBJECT
| KMA_ATOMIC
| gzalloc_guard
,
367 VM_KERN_MEMORY_OSFMK
);
368 if (kr
!= KERN_SUCCESS
)
369 panic("gzalloc: kernel_memory_allocate for size 0x%llx failed with %d", (uint64_t)rounded_size
, kr
);
373 if (gzalloc_uf_mode
) {
375 /* The "header" becomes a "footer" in underflow
378 gzh
= (gzhdr_t
*) (gzaddr
+ zone
->elem_size
);
380 gzhcopy
= (gzhdr_t
*) (gzaddr
+ rounded_size
- sizeof(gzhdr_t
));
382 gzh
= (gzhdr_t
*) (gzaddr
+ residue
- GZHEADER_SIZE
);
383 addr
= (gzaddr
+ residue
);
386 /* Fill with a pattern on allocation to trap uninitialized
387 * data use. Since the element size may be "rounded up"
388 * by higher layers such as the kalloc layer, this may
389 * also identify overruns between the originally requested
390 * size and the rounded size via visual inspection.
391 * TBD: plumb through the originally requested size,
392 * prior to rounding by kalloc/IOMalloc etc.
393 * We also add a signature and the zone of origin in a header
394 * prefixed to the allocation.
396 memset((void *)gzaddr
, gzalloc_fill_pattern
, rounded_size
);
398 gzh
->gzone
= (kmem_ready
&& vm_page_zone
) ? zone
: GZDEADZONE
;
399 gzh
->gzsize
= (uint32_t) zone
->elem_size
;
400 gzh
->gzsig
= GZALLOC_SIGNATURE
;
402 /* In underflow detection mode, stash away a copy of the
403 * metadata at the edge of the allocated range, for
404 * retrieval by gzalloc_element_size()
411 assert(zone
->zone_valid
);
414 zone
->cur_size
+= rounded_size
;
417 OSAddAtomic64((SInt32
) rounded_size
, &gzalloc_allocated
);
418 OSAddAtomic64((SInt32
) (rounded_size
- zone
->elem_size
), &gzalloc_wasted
);
423 boolean_t
gzalloc_free(zone_t zone
, void *addr
) {
424 boolean_t gzfreed
= FALSE
;
427 if (__improbable(gzalloc_tracked(zone
))) {
429 vm_offset_t rounded_size
= round_page(zone
->elem_size
+ GZHEADER_SIZE
);
430 vm_offset_t residue
= rounded_size
- zone
->elem_size
;
432 vm_offset_t free_addr
= 0;
434 if (gzalloc_uf_mode
) {
435 gzh
= (gzhdr_t
*)((vm_offset_t
)addr
+ zone
->elem_size
);
436 saddr
= (vm_offset_t
) addr
- PAGE_SIZE
;
438 gzh
= (gzhdr_t
*)((vm_offset_t
)addr
- GZHEADER_SIZE
);
439 saddr
= ((vm_offset_t
)addr
) - residue
;
442 if ((saddr
& PAGE_MASK
) != 0) {
443 panic("gzalloc_free: invalid address supplied: %p (adjusted: 0x%lx) for zone with element sized 0x%lx\n", addr
, saddr
, zone
->elem_size
);
447 if (gzalloc_dfree_check
) {
451 assert(zone
->zone_valid
);
452 for (gd
= 0; gd
< gzfc_size
; gd
++) {
453 if (zone
->gz
.gzfc
[gd
] == saddr
) {
454 panic("gzalloc: double free detected, freed address: 0x%lx, current free cache index: %d, freed index: %d", saddr
, zone
->gz
.gzfc_index
, gd
);
461 if (gzalloc_consistency_checks
) {
462 if (gzh
->gzsig
!= GZALLOC_SIGNATURE
) {
463 panic("GZALLOC signature mismatch for element %p, expected 0x%x, found 0x%x", addr
, GZALLOC_SIGNATURE
, gzh
->gzsig
);
466 if (gzh
->gzone
!= zone
&& (gzh
->gzone
!= GZDEADZONE
))
467 panic("%s: Mismatched zone or under/overflow, current zone: %p, recorded zone: %p, address: %p", __FUNCTION__
, zone
, gzh
->gzone
, (void *)addr
);
468 /* Partially redundant given the zone check, but may flag header corruption */
469 if (gzh
->gzsize
!= zone
->elem_size
) {
470 panic("Mismatched zfree or under/overflow for zone %p, recorded size: 0x%x, element size: 0x%x, address: %p\n", zone
, gzh
->gzsize
, (uint32_t) zone
->elem_size
, (void *)addr
);
473 char *gzc
, *checkstart
, *checkend
;
474 if (gzalloc_uf_mode
) {
475 checkstart
= (char *) ((uintptr_t) gzh
+ sizeof(gzh
));
476 checkend
= (char *) ((((vm_offset_t
)addr
) & ~PAGE_MASK
) + PAGE_SIZE
);
478 checkstart
= (char *) trunc_page_64(addr
);
479 checkend
= (char *)gzh
;
482 for (gzc
= checkstart
; gzc
< checkend
; gzc
++) {
483 if (*gzc
!= gzalloc_fill_pattern
) {
484 panic("GZALLOC: detected over/underflow, byte at %p, element %p, contents 0x%x from 0x%lx byte sized zone (%s) doesn't match fill pattern (%c)", gzc
, addr
, *gzc
, zone
->elem_size
, zone
->zone_name
, gzalloc_fill_pattern
);
489 if (!kmem_ready
|| gzh
->gzone
== GZDEADZONE
) {
490 /* For now, just leak frees of early allocations
491 * performed before kmem is fully configured.
492 * They don't seem to get freed currently;
493 * consider ml_static_mfree in the future.
495 OSAddAtomic64((SInt32
) (rounded_size
), &gzalloc_early_free
);
499 if (get_preemption_level() != 0) {
504 /* Either write protect or unmap the newly freed
510 saddr
+ rounded_size
+ (1 * PAGE_SIZE
),
513 if (kr
!= KERN_SUCCESS
)
514 panic("%s: vm_map_protect: %p, 0x%x", __FUNCTION__
, (void *)saddr
, kr
);
520 assert(zone
->zone_valid
);
522 /* Insert newly freed element into the protected free element
523 * cache, and rotate out the LRU element.
526 if (zone
->gz
.gzfc_index
>= gzfc_size
) {
527 zone
->gz
.gzfc_index
= 0;
529 free_addr
= zone
->gz
.gzfc
[zone
->gz
.gzfc_index
];
530 zone
->gz
.gzfc
[zone
->gz
.gzfc_index
++] = saddr
;
535 zone
->cur_size
-= rounded_size
;
541 // TODO: consider using physical reads to check for
542 // corruption while on the protected freelist
543 // (i.e. physical corruption)
547 free_addr
+ rounded_size
+ (1 * PAGE_SIZE
),
548 VM_MAP_REMOVE_KUNWIRE
);
549 if (kr
!= KERN_SUCCESS
)
550 panic("gzfree: vm_map_remove: %p, 0x%x", (void *)free_addr
, kr
);
551 // TODO: sysctl-ize for quick reference
552 OSAddAtomic64((SInt32
)rounded_size
, &gzalloc_freed
);
553 OSAddAtomic64(-((SInt32
) (rounded_size
- zone
->elem_size
)), &gzalloc_wasted
);
561 boolean_t
gzalloc_element_size(void *gzaddr
, zone_t
*z
, vm_size_t
*gzsz
) {
562 uintptr_t a
= (uintptr_t)gzaddr
;
563 if (__improbable(gzalloc_mode
&& (a
>= gzalloc_map_min
) && (a
< gzalloc_map_max
))) {
566 /* Locate the gzalloc metadata adjoining the element */
567 if (gzalloc_uf_mode
== TRUE
) {
569 vm_map_entry_t gzvme
= NULL
;
571 /* In underflow detection mode, locate the map entry describing
572 * the element, and then locate the copy of the gzalloc
573 * header at the trailing edge of the range.
575 vm_map_lock_read(gzalloc_map
);
576 vmef
= vm_map_lookup_entry(gzalloc_map
, (vm_map_offset_t
)a
, &gzvme
);
577 vm_map_unlock(gzalloc_map
);
579 panic("GZALLOC: unable to locate map entry for %p\n", (void *)a
);
581 assertf(gzvme
->vme_atomic
!= 0, "GZALLOC: VM map entry inconsistency, vme: %p, start: %llu end: %llu", gzvme
, gzvme
->vme_start
, gzvme
->vme_end
);
582 gzh
= (gzhdr_t
*)(gzvme
->vme_end
- GZHEADER_SIZE
);
584 gzh
= (gzhdr_t
*)(a
- GZHEADER_SIZE
);
587 if (gzh
->gzsig
!= GZALLOC_SIGNATURE
) {
588 panic("GZALLOC signature mismatch for element %p, expected 0x%x, found 0x%x", (void *)a
, GZALLOC_SIGNATURE
, gzh
->gzsig
);
591 *gzsz
= gzh
->gzone
->elem_size
;
592 if (__improbable((gzalloc_tracked(gzh
->gzone
)) == FALSE
)) {
593 panic("GZALLOC: zone mismatch (%p)\n", gzh
->gzone
);