2 * Copyright (c) 2006-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * Memory allocator with per-CPU caching, derived from the kmem magazine
31 * concept and implementation as described in the following paper:
32 * http://www.usenix.org/events/usenix01/full_papers/bonwick/bonwick.pdf
33 * That implementation is Copyright 2006 Sun Microsystems, Inc. All rights
34 * reserved. Use is subject to license terms.
36 * There are several major differences between this and the original kmem
37 * magazine: this derivative implementation allows for multiple objects to
38 * be allocated and freed from/to the object cache in one call; in addition,
39 * it provides for better flexibility where the user is allowed to define
40 * its own slab allocator (instead of the default zone allocator). Finally,
41 * no object construction/destruction takes place at the moment, although
42 * this could be added in future to improve efficiency.
45 #include <sys/param.h>
46 #include <sys/types.h>
47 #include <sys/malloc.h>
49 #include <sys/queue.h>
50 #include <sys/kernel.h>
51 #include <sys/systm.h>
53 #include <kern/debug.h>
54 #include <kern/zalloc.h>
55 #include <kern/cpu_number.h>
56 #include <kern/locks.h>
57 #include <kern/thread_call.h>
59 #include <libkern/libkern.h>
60 #include <libkern/OSAtomic.h>
61 #include <libkern/OSDebug.h>
63 #include <mach/vm_param.h>
64 #include <machine/limits.h>
65 #include <machine/machine_routines.h>
69 #include <sys/mcache.h>
71 #define MCACHE_SIZE(n) \
72 __builtin_offsetof(mcache_t, mc_cpu[n])
74 /* Allocate extra in case we need to manually align the pointer */
75 #define MCACHE_ALLOC_SIZE \
76 (sizeof (void *) + MCACHE_SIZE(ncpu) + CPU_CACHE_LINE_SIZE)
78 #define MCACHE_CPU(c) \
79 (mcache_cpu_t *)((void *)((char *)(c) + MCACHE_SIZE(cpu_number())))
82 * MCACHE_LIST_LOCK() and MCACHE_LIST_UNLOCK() are macros used
83 * to serialize accesses to the global list of caches in the system.
84 * They also record the thread currently running in the critical
85 * section, so that we can avoid recursive requests to reap the
86 * caches when memory runs low.
88 #define MCACHE_LIST_LOCK() { \
89 lck_mtx_lock(mcache_llock); \
90 mcache_llock_owner = current_thread(); \
93 #define MCACHE_LIST_UNLOCK() { \
94 mcache_llock_owner = NULL; \
95 lck_mtx_unlock(mcache_llock); \
98 #define MCACHE_LOCK(l) lck_mtx_lock(l)
99 #define MCACHE_UNLOCK(l) lck_mtx_unlock(l)
100 #define MCACHE_LOCK_TRY(l) lck_mtx_try_lock(l)
102 static unsigned int ncpu
;
103 static unsigned int cache_line_size
;
104 static lck_mtx_t
*mcache_llock
;
105 static struct thread
*mcache_llock_owner
;
106 static lck_attr_t
*mcache_llock_attr
;
107 static lck_grp_t
*mcache_llock_grp
;
108 static lck_grp_attr_t
*mcache_llock_grp_attr
;
109 static struct zone
*mcache_zone
;
110 static const uint32_t mcache_reap_interval
= 15;
111 static const uint32_t mcache_reap_interval_leeway
= 2;
112 static UInt32 mcache_reaping
;
113 static int mcache_ready
;
114 static int mcache_updating
;
116 static int mcache_bkt_contention
= 3;
118 static unsigned int mcache_flags
= MCF_DEBUG
;
120 static unsigned int mcache_flags
= 0;
123 int mca_trn_max
= MCA_TRN_MAX
;
125 #define DUMP_MCA_BUF_SIZE 512
126 static char *mca_dump_buf
;
128 static mcache_bkttype_t mcache_bkttype
[] = {
129 { 1, 4096, 32768, NULL
},
130 { 3, 2048, 16384, NULL
},
131 { 7, 1024, 12288, NULL
},
132 { 15, 256, 8192, NULL
},
133 { 31, 64, 4096, NULL
},
134 { 47, 0, 2048, NULL
},
135 { 63, 0, 1024, NULL
},
136 { 95, 0, 512, NULL
},
137 { 143, 0, 256, NULL
},
141 static mcache_t
*mcache_create_common(const char *, size_t, size_t,
142 mcache_allocfn_t
, mcache_freefn_t
, mcache_auditfn_t
, mcache_logfn_t
,
143 mcache_notifyfn_t
, void *, u_int32_t
, int, int);
144 static unsigned int mcache_slab_alloc(void *, mcache_obj_t
***,
146 static void mcache_slab_free(void *, mcache_obj_t
*, boolean_t
);
147 static void mcache_slab_audit(void *, mcache_obj_t
*, boolean_t
);
148 static void mcache_cpu_refill(mcache_cpu_t
*, mcache_bkt_t
*, int);
149 static mcache_bkt_t
*mcache_bkt_alloc(mcache_t
*, mcache_bktlist_t
*);
150 static void mcache_bkt_free(mcache_t
*, mcache_bktlist_t
*, mcache_bkt_t
*);
151 static void mcache_cache_bkt_enable(mcache_t
*);
152 static void mcache_bkt_purge(mcache_t
*);
153 static void mcache_bkt_destroy(mcache_t
*, mcache_bkt_t
*, int);
154 static void mcache_bkt_ws_update(mcache_t
*);
155 static void mcache_bkt_ws_zero(mcache_t
*);
156 static void mcache_bkt_ws_reap(mcache_t
*);
157 static void mcache_dispatch(void (*)(void *), void *);
158 static void mcache_cache_reap(mcache_t
*);
159 static void mcache_cache_update(mcache_t
*);
160 static void mcache_cache_bkt_resize(void *);
161 static void mcache_cache_enable(void *);
162 static void mcache_update(thread_call_param_t __unused
, thread_call_param_t __unused
);
163 static void mcache_update_timeout(void *);
164 static void mcache_applyall(void (*)(mcache_t
*));
165 static void mcache_reap_start(void *);
166 static void mcache_reap_done(void *);
167 static void mcache_reap_timeout(thread_call_param_t __unused
, thread_call_param_t
);
168 static void mcache_notify(mcache_t
*, u_int32_t
);
169 static void mcache_purge(void *);
171 static LIST_HEAD(, mcache
) mcache_head
;
172 mcache_t
*mcache_audit_cache
;
174 static thread_call_t mcache_reap_tcall
;
175 static thread_call_t mcache_update_tcall
;
178 * Initialize the framework; this is currently called as part of BSD init.
180 __private_extern__
void
183 mcache_bkttype_t
*btp
;
187 VERIFY(mca_trn_max
>= 2);
189 ncpu
= ml_wait_max_cpus();
190 (void) mcache_cache_line_size(); /* prime it */
192 mcache_llock_grp_attr
= lck_grp_attr_alloc_init();
193 mcache_llock_grp
= lck_grp_alloc_init("mcache.list",
194 mcache_llock_grp_attr
);
195 mcache_llock_attr
= lck_attr_alloc_init();
196 mcache_llock
= lck_mtx_alloc_init(mcache_llock_grp
, mcache_llock_attr
);
198 mcache_reap_tcall
= thread_call_allocate(mcache_reap_timeout
, NULL
);
199 mcache_update_tcall
= thread_call_allocate(mcache_update
, NULL
);
200 if (mcache_reap_tcall
== NULL
|| mcache_update_tcall
== NULL
) {
201 panic("mcache_init: thread_call_allocate failed");
203 __builtin_unreachable();
206 mcache_zone
= zone_create("mcache", MCACHE_ALLOC_SIZE
, ZC_DESTRUCTIBLE
);
208 LIST_INIT(&mcache_head
);
210 for (i
= 0; i
< sizeof(mcache_bkttype
) / sizeof(*btp
); i
++) {
211 btp
= &mcache_bkttype
[i
];
212 (void) snprintf(name
, sizeof(name
), "bkt_%d",
214 btp
->bt_cache
= mcache_create(name
,
215 (btp
->bt_bktsize
+ 1) * sizeof(void *), 0, 0, MCR_SLEEP
);
218 PE_parse_boot_argn("mcache_flags", &mcache_flags
, sizeof(mcache_flags
));
219 mcache_flags
&= MCF_FLAGS_MASK
;
221 mcache_audit_cache
= mcache_create("audit", sizeof(mcache_audit_t
),
224 mcache_applyall(mcache_cache_bkt_enable
);
227 printf("mcache: %d CPU(s), %d bytes CPU cache line size\n",
228 ncpu
, CPU_CACHE_LINE_SIZE
);
232 * Return the global mcache flags.
234 __private_extern__
unsigned int
235 mcache_getflags(void)
241 * Return the CPU cache line size.
243 __private_extern__
unsigned int
244 mcache_cache_line_size(void)
246 if (cache_line_size
== 0) {
247 ml_cpu_info_t cpu_info
;
248 ml_cpu_get_info(&cpu_info
);
249 cache_line_size
= (unsigned int)cpu_info
.cache_line_size
;
251 return cache_line_size
;
255 * Create a cache using the zone allocator as the backend slab allocator.
256 * The caller may specify any alignment for the object; if it specifies 0
257 * the default alignment (MCACHE_ALIGN) will be used.
259 __private_extern__ mcache_t
*
260 mcache_create(const char *name
, size_t bufsize
, size_t align
,
261 u_int32_t flags
, int wait
)
263 return mcache_create_common(name
, bufsize
, align
, mcache_slab_alloc
,
264 mcache_slab_free
, mcache_slab_audit
, NULL
, NULL
, NULL
, flags
, 1,
269 * Create a cache using a custom backend slab allocator. Since the caller
270 * is responsible for allocation, no alignment guarantee will be provided
273 __private_extern__ mcache_t
*
274 mcache_create_ext(const char *name
, size_t bufsize
,
275 mcache_allocfn_t allocfn
, mcache_freefn_t freefn
, mcache_auditfn_t auditfn
,
276 mcache_logfn_t logfn
, mcache_notifyfn_t notifyfn
, void *arg
,
277 u_int32_t flags
, int wait
)
279 return mcache_create_common(name
, bufsize
, 0, allocfn
,
280 freefn
, auditfn
, logfn
, notifyfn
, arg
, flags
, 0, wait
);
284 * Common cache creation routine.
287 mcache_create_common(const char *name
, size_t bufsize
, size_t align
,
288 mcache_allocfn_t allocfn
, mcache_freefn_t freefn
, mcache_auditfn_t auditfn
,
289 mcache_logfn_t logfn
, mcache_notifyfn_t notifyfn
, void *arg
,
290 u_int32_t flags
, int need_zone
, int wait
)
292 mcache_bkttype_t
*btp
;
299 /* If auditing is on and print buffer is NULL, allocate it now */
300 if ((flags
& MCF_DEBUG
) && mca_dump_buf
== NULL
) {
301 int malloc_wait
= (wait
& MCR_NOSLEEP
) ? M_NOWAIT
: M_WAITOK
;
302 MALLOC(mca_dump_buf
, char *, DUMP_MCA_BUF_SIZE
, M_TEMP
,
303 malloc_wait
| M_ZERO
);
304 if (mca_dump_buf
== NULL
) {
309 buf
= zalloc(mcache_zone
);
314 bzero(buf
, MCACHE_ALLOC_SIZE
);
317 * In case we didn't get a cache-aligned memory, round it up
318 * accordingly. This is needed in order to get the rest of
319 * structure members aligned properly. It also means that
320 * the memory span gets shifted due to the round up, but it
321 * is okay since we've allocated extra space for this.
324 P2ROUNDUP((intptr_t)buf
+ sizeof(void *), CPU_CACHE_LINE_SIZE
);
325 pbuf
= (void **)((intptr_t)cp
- sizeof(void *));
329 * Guaranteed alignment is valid only when we use the internal
330 * slab allocator (currently set to use the zone allocator).
335 /* Enforce 64-bit minimum alignment for zone-based buffers */
337 align
= MCACHE_ALIGN
;
339 align
= P2ROUNDUP(align
, MCACHE_ALIGN
);
342 if ((align
& (align
- 1)) != 0) {
343 panic("mcache_create: bad alignment %lu", align
);
345 __builtin_unreachable();
348 cp
->mc_align
= align
;
349 cp
->mc_slab_alloc
= allocfn
;
350 cp
->mc_slab_free
= freefn
;
351 cp
->mc_slab_audit
= auditfn
;
352 cp
->mc_slab_log
= logfn
;
353 cp
->mc_slab_notify
= notifyfn
;
354 cp
->mc_private
= need_zone
? cp
: arg
;
355 cp
->mc_bufsize
= bufsize
;
356 cp
->mc_flags
= (flags
& MCF_FLAGS_MASK
) | mcache_flags
;
358 (void) snprintf(cp
->mc_name
, sizeof(cp
->mc_name
), "mcache.%s", name
);
360 (void) snprintf(lck_name
, sizeof(lck_name
), "%s.cpu", cp
->mc_name
);
361 cp
->mc_cpu_lock_grp_attr
= lck_grp_attr_alloc_init();
362 cp
->mc_cpu_lock_grp
= lck_grp_alloc_init(lck_name
,
363 cp
->mc_cpu_lock_grp_attr
);
364 cp
->mc_cpu_lock_attr
= lck_attr_alloc_init();
367 * Allocation chunk size is the object's size plus any extra size
368 * needed to satisfy the object's alignment. It is enforced to be
369 * at least the size of an LP64 pointer to simplify auditing and to
370 * handle multiple-element allocation requests, where the elements
371 * returned are linked together in a list.
373 chunksize
= MAX(bufsize
, sizeof(u_int64_t
));
375 VERIFY(align
!= 0 && (align
% MCACHE_ALIGN
) == 0);
376 chunksize
+= sizeof(uint64_t) + align
;
377 chunksize
= P2ROUNDUP(chunksize
, align
);
378 cp
->mc_slab_zone
= zone_create(cp
->mc_name
, chunksize
, ZC_DESTRUCTIBLE
);
380 cp
->mc_chunksize
= chunksize
;
383 * Initialize the bucket layer.
385 (void) snprintf(lck_name
, sizeof(lck_name
), "%s.bkt", cp
->mc_name
);
386 cp
->mc_bkt_lock_grp_attr
= lck_grp_attr_alloc_init();
387 cp
->mc_bkt_lock_grp
= lck_grp_alloc_init(lck_name
,
388 cp
->mc_bkt_lock_grp_attr
);
389 cp
->mc_bkt_lock_attr
= lck_attr_alloc_init();
390 lck_mtx_init(&cp
->mc_bkt_lock
, cp
->mc_bkt_lock_grp
,
391 cp
->mc_bkt_lock_attr
);
393 (void) snprintf(lck_name
, sizeof(lck_name
), "%s.sync", cp
->mc_name
);
394 cp
->mc_sync_lock_grp_attr
= lck_grp_attr_alloc_init();
395 cp
->mc_sync_lock_grp
= lck_grp_alloc_init(lck_name
,
396 cp
->mc_sync_lock_grp_attr
);
397 cp
->mc_sync_lock_attr
= lck_attr_alloc_init();
398 lck_mtx_init(&cp
->mc_sync_lock
, cp
->mc_sync_lock_grp
,
399 cp
->mc_sync_lock_attr
);
401 for (btp
= mcache_bkttype
; chunksize
<= btp
->bt_minbuf
; btp
++) {
405 cp
->cache_bkttype
= btp
;
408 * Initialize the CPU layer. Each per-CPU structure is aligned
409 * on the CPU cache line boundary to prevent false sharing.
411 for (c
= 0; c
< ncpu
; c
++) {
412 mcache_cpu_t
*ccp
= &cp
->mc_cpu
[c
];
414 VERIFY(IS_P2ALIGNED(ccp
, CPU_CACHE_LINE_SIZE
));
415 lck_mtx_init(&ccp
->cc_lock
, cp
->mc_cpu_lock_grp
,
416 cp
->mc_cpu_lock_attr
);
422 mcache_cache_bkt_enable(cp
);
425 /* TODO: dynamically create sysctl for stats */
428 LIST_INSERT_HEAD(&mcache_head
, cp
, mc_list
);
429 MCACHE_LIST_UNLOCK();
432 * If cache buckets are enabled and this is the first cache
433 * created, start the periodic cache update.
435 if (!(mcache_flags
& MCF_NOCPUCACHE
) && !mcache_updating
) {
437 mcache_update_timeout(NULL
);
439 if (cp
->mc_flags
& MCF_DEBUG
) {
440 printf("mcache_create: %s (%s) arg %p bufsize %lu align %lu "
441 "chunksize %lu bktsize %d\n", name
, need_zone
? "i" : "e",
442 arg
, bufsize
, cp
->mc_align
, chunksize
, btp
->bt_bktsize
);
448 zfree(mcache_zone
, buf
);
454 * Allocate one or more objects from a cache.
456 __private_extern__
unsigned int
457 mcache_alloc_ext(mcache_t
*cp
, mcache_obj_t
**list
, unsigned int num
, int wait
)
460 mcache_obj_t
**top
= &(*list
);
462 unsigned int need
= num
;
463 boolean_t nwretry
= FALSE
;
465 /* MCR_NOSLEEP and MCR_FAILOK are mutually exclusive */
466 VERIFY((wait
& (MCR_NOSLEEP
| MCR_FAILOK
)) != (MCR_NOSLEEP
| MCR_FAILOK
));
468 ASSERT(list
!= NULL
);
476 /* We may not always be running in the same CPU in case of retries */
477 ccp
= MCACHE_CPU(cp
);
479 MCACHE_LOCK(&ccp
->cc_lock
);
482 * If we have an object in the current CPU's filled bucket,
483 * chain the object to any previous objects and return if
484 * we've satisfied the number of requested objects.
486 if (ccp
->cc_objs
> 0) {
491 * Objects in the bucket are already linked together
492 * with the most recently freed object at the head of
493 * the list; grab as many objects as we can.
495 objs
= MIN((unsigned int)ccp
->cc_objs
, need
);
496 *list
= ccp
->cc_filled
->bkt_obj
[ccp
->cc_objs
- 1];
497 ccp
->cc_objs
-= objs
;
498 ccp
->cc_alloc
+= objs
;
500 tail
= ccp
->cc_filled
->bkt_obj
[ccp
->cc_objs
];
501 list
= &tail
->obj_next
;
504 /* If we got them all, return to caller */
505 if ((need
-= objs
) == 0) {
506 MCACHE_UNLOCK(&ccp
->cc_lock
);
508 if (!(cp
->mc_flags
& MCF_NOLEAKLOG
) &&
509 cp
->mc_slab_log
!= NULL
) {
510 (*cp
->mc_slab_log
)(num
, *top
, TRUE
);
513 if (cp
->mc_flags
& MCF_DEBUG
) {
522 * The CPU's filled bucket is empty. If the previous filled
523 * bucket was full, exchange and try again.
525 if (ccp
->cc_pobjs
> 0) {
526 mcache_cpu_refill(ccp
, ccp
->cc_pfilled
, ccp
->cc_pobjs
);
531 * If the bucket layer is disabled, allocate from slab. This
532 * can happen either because MCF_NOCPUCACHE is set, or because
533 * the bucket layer is currently being resized.
535 if (ccp
->cc_bktsize
== 0) {
540 * Both of the CPU's buckets are empty; try to get a full
541 * bucket from the bucket layer. Upon success, refill this
542 * CPU and place any empty bucket into the empty list.
544 bkt
= mcache_bkt_alloc(cp
, &cp
->mc_full
);
546 if (ccp
->cc_pfilled
!= NULL
) {
547 mcache_bkt_free(cp
, &cp
->mc_empty
,
550 mcache_cpu_refill(ccp
, bkt
, ccp
->cc_bktsize
);
555 * The bucket layer has no full buckets; allocate the
556 * object(s) directly from the slab layer.
560 MCACHE_UNLOCK(&ccp
->cc_lock
);
562 need
-= (*cp
->mc_slab_alloc
)(cp
->mc_private
, &list
, need
, wait
);
565 * If this is a blocking allocation, or if it is non-blocking and
566 * the cache's full bucket is non-empty, then retry the allocation.
569 if (!(wait
& MCR_NONBLOCKING
)) {
570 atomic_add_32(&cp
->mc_wretry_cnt
, 1);
572 } else if ((wait
& (MCR_NOSLEEP
| MCR_TRYHARD
)) &&
573 !mcache_bkt_isempty(cp
)) {
577 atomic_add_32(&cp
->mc_nwretry_cnt
, 1);
579 } else if (nwretry
) {
580 atomic_add_32(&cp
->mc_nwfail_cnt
, 1);
584 if (!(cp
->mc_flags
& MCF_NOLEAKLOG
) && cp
->mc_slab_log
!= NULL
) {
585 (*cp
->mc_slab_log
)((num
- need
), *top
, TRUE
);
588 if (!(cp
->mc_flags
& MCF_DEBUG
)) {
593 if (cp
->mc_flags
& MCF_DEBUG
) {
594 mcache_obj_t
**o
= top
;
599 * Verify that the chain of objects have the same count as
600 * what we are about to report to the caller. Any mismatch
601 * here means that the object list is insanely broken and
602 * therefore we must panic.
608 if (n
!= (num
- need
)) {
609 panic("mcache_alloc_ext: %s cp %p corrupted list "
610 "(got %d actual %d)\n", cp
->mc_name
,
611 (void *)cp
, num
- need
, n
);
613 __builtin_unreachable();
617 /* Invoke the slab layer audit callback if auditing is enabled */
618 if ((cp
->mc_flags
& MCF_DEBUG
) && cp
->mc_slab_audit
!= NULL
) {
619 (*cp
->mc_slab_audit
)(cp
->mc_private
, *top
, TRUE
);
626 * Allocate a single object from a cache.
628 __private_extern__
void *
629 mcache_alloc(mcache_t
*cp
, int wait
)
633 (void) mcache_alloc_ext(cp
, &buf
, 1, wait
);
637 __private_extern__
void
638 mcache_waiter_inc(mcache_t
*cp
)
640 atomic_add_32(&cp
->mc_waiter_cnt
, 1);
643 __private_extern__
void
644 mcache_waiter_dec(mcache_t
*cp
)
646 atomic_add_32(&cp
->mc_waiter_cnt
, -1);
649 __private_extern__ boolean_t
650 mcache_bkt_isempty(mcache_t
*cp
)
653 * This isn't meant to accurately tell whether there are
654 * any full buckets in the cache; it is simply a way to
655 * obtain "hints" about the state of the cache.
657 return cp
->mc_full
.bl_total
== 0;
661 * Notify the slab layer about an event.
664 mcache_notify(mcache_t
*cp
, u_int32_t event
)
666 if (cp
->mc_slab_notify
!= NULL
) {
667 (*cp
->mc_slab_notify
)(cp
->mc_private
, event
);
672 * Purge the cache and disable its buckets.
675 mcache_purge(void *arg
)
679 mcache_bkt_purge(cp
);
681 * We cannot simply call mcache_cache_bkt_enable() from here as
682 * a bucket resize may be in flight and we would cause the CPU
683 * layers of the cache to point to different sizes. Therefore,
684 * we simply increment the enable count so that during the next
685 * periodic cache update the buckets can be reenabled.
687 lck_mtx_lock_spin(&cp
->mc_sync_lock
);
689 lck_mtx_unlock(&cp
->mc_sync_lock
);
692 __private_extern__ boolean_t
693 mcache_purge_cache(mcache_t
*cp
, boolean_t async
)
696 * Purging a cache that has no per-CPU caches or is already
697 * in the process of being purged is rather pointless.
699 if (cp
->mc_flags
& MCF_NOCPUCACHE
) {
703 lck_mtx_lock_spin(&cp
->mc_sync_lock
);
704 if (cp
->mc_purge_cnt
> 0) {
705 lck_mtx_unlock(&cp
->mc_sync_lock
);
709 lck_mtx_unlock(&cp
->mc_sync_lock
);
712 mcache_dispatch(mcache_purge
, cp
);
721 * Free a single object to a cache.
723 __private_extern__
void
724 mcache_free(mcache_t
*cp
, void *buf
)
726 ((mcache_obj_t
*)buf
)->obj_next
= NULL
;
727 mcache_free_ext(cp
, (mcache_obj_t
*)buf
);
731 * Free one or more objects to a cache.
733 __private_extern__
void
734 mcache_free_ext(mcache_t
*cp
, mcache_obj_t
*list
)
736 mcache_cpu_t
*ccp
= MCACHE_CPU(cp
);
737 mcache_bkttype_t
*btp
;
741 if (!(cp
->mc_flags
& MCF_NOLEAKLOG
) && cp
->mc_slab_log
!= NULL
) {
742 (*cp
->mc_slab_log
)(0, list
, FALSE
);
745 /* Invoke the slab layer audit callback if auditing is enabled */
746 if ((cp
->mc_flags
& MCF_DEBUG
) && cp
->mc_slab_audit
!= NULL
) {
747 (*cp
->mc_slab_audit
)(cp
->mc_private
, list
, FALSE
);
750 MCACHE_LOCK(&ccp
->cc_lock
);
753 * If there is space in the current CPU's filled bucket, put
754 * the object there and return once all objects are freed.
755 * Note the cast to unsigned integer takes care of the case
756 * where the bucket layer is disabled (when cc_objs is -1).
758 if ((unsigned int)ccp
->cc_objs
<
759 (unsigned int)ccp
->cc_bktsize
) {
761 * Reverse the list while we place the object into the
762 * bucket; this effectively causes the most recently
763 * freed object(s) to be reused during allocation.
765 nlist
= list
->obj_next
;
766 list
->obj_next
= (ccp
->cc_objs
== 0) ? NULL
:
767 ccp
->cc_filled
->bkt_obj
[ccp
->cc_objs
- 1];
768 ccp
->cc_filled
->bkt_obj
[ccp
->cc_objs
++] = list
;
771 if ((list
= nlist
) != NULL
) {
775 /* We are done; return to caller */
776 MCACHE_UNLOCK(&ccp
->cc_lock
);
778 /* If there is a waiter below, notify it */
779 if (cp
->mc_waiter_cnt
> 0) {
780 mcache_notify(cp
, MCN_RETRYALLOC
);
786 * The CPU's filled bucket is full. If the previous filled
787 * bucket was empty, exchange and try again.
789 if (ccp
->cc_pobjs
== 0) {
790 mcache_cpu_refill(ccp
, ccp
->cc_pfilled
, ccp
->cc_pobjs
);
795 * If the bucket layer is disabled, free to slab. This can
796 * happen either because MCF_NOCPUCACHE is set, or because
797 * the bucket layer is currently being resized.
799 if (ccp
->cc_bktsize
== 0) {
804 * Both of the CPU's buckets are full; try to get an empty
805 * bucket from the bucket layer. Upon success, empty this
806 * CPU and place any full bucket into the full list.
808 bkt
= mcache_bkt_alloc(cp
, &cp
->mc_empty
);
810 if (ccp
->cc_pfilled
!= NULL
) {
811 mcache_bkt_free(cp
, &cp
->mc_full
,
814 mcache_cpu_refill(ccp
, bkt
, 0);
817 btp
= cp
->cache_bkttype
;
820 * We need an empty bucket to put our freed objects into
821 * but couldn't get an empty bucket from the bucket layer;
822 * attempt to allocate one. We do not want to block for
823 * allocation here, and if the bucket allocation fails
824 * we will simply fall through to the slab layer.
826 MCACHE_UNLOCK(&ccp
->cc_lock
);
827 bkt
= mcache_alloc(btp
->bt_cache
, MCR_NOSLEEP
);
828 MCACHE_LOCK(&ccp
->cc_lock
);
832 * We have an empty bucket, but since we drop the
833 * CPU lock above, the cache's bucket size may have
834 * changed. If so, free the bucket and try again.
836 if (ccp
->cc_bktsize
!= btp
->bt_bktsize
) {
837 MCACHE_UNLOCK(&ccp
->cc_lock
);
838 mcache_free(btp
->bt_cache
, bkt
);
839 MCACHE_LOCK(&ccp
->cc_lock
);
844 * Store it in the bucket object since we'll
845 * need to refer to it during bucket destroy;
846 * we can't safely refer to cache_bkttype as
847 * the bucket lock may not be acquired then.
852 * We have an empty bucket of the right size;
853 * add it to the bucket layer and try again.
855 mcache_bkt_free(cp
, &cp
->mc_empty
, bkt
);
860 * The bucket layer has no empty buckets; free the
861 * object(s) directly to the slab layer.
865 MCACHE_UNLOCK(&ccp
->cc_lock
);
867 /* If there is a waiter below, notify it */
868 if (cp
->mc_waiter_cnt
> 0) {
869 mcache_notify(cp
, MCN_RETRYALLOC
);
872 /* Advise the slab layer to purge the object(s) */
873 (*cp
->mc_slab_free
)(cp
->mc_private
, list
,
874 (cp
->mc_flags
& MCF_DEBUG
) || cp
->mc_purge_cnt
);
878 * Cache destruction routine.
880 __private_extern__
void
881 mcache_destroy(mcache_t
*cp
)
886 LIST_REMOVE(cp
, mc_list
);
887 MCACHE_LIST_UNLOCK();
889 mcache_bkt_purge(cp
);
892 * This cache is dead; there should be no further transaction.
893 * If it's still invoked, make sure that it induces a fault.
895 cp
->mc_slab_alloc
= NULL
;
896 cp
->mc_slab_free
= NULL
;
897 cp
->mc_slab_audit
= NULL
;
899 lck_attr_free(cp
->mc_bkt_lock_attr
);
900 lck_grp_free(cp
->mc_bkt_lock_grp
);
901 lck_grp_attr_free(cp
->mc_bkt_lock_grp_attr
);
903 lck_attr_free(cp
->mc_cpu_lock_attr
);
904 lck_grp_free(cp
->mc_cpu_lock_grp
);
905 lck_grp_attr_free(cp
->mc_cpu_lock_grp_attr
);
907 lck_attr_free(cp
->mc_sync_lock_attr
);
908 lck_grp_free(cp
->mc_sync_lock_grp
);
909 lck_grp_attr_free(cp
->mc_sync_lock_grp_attr
);
912 * TODO: We need to destroy the zone here, but cannot do it
913 * because there is no such way to achieve that. Until then
914 * the memory allocated for the zone structure is leaked.
915 * Once it is achievable, uncomment these lines:
917 * if (cp->mc_slab_zone != NULL) {
918 * zdestroy(cp->mc_slab_zone);
919 * cp->mc_slab_zone = NULL;
923 /* Get the original address since we're about to free it */
924 pbuf
= (void **)((intptr_t)cp
- sizeof(void *));
926 zfree(mcache_zone
, *pbuf
);
930 * Internal slab allocator used as a backend for simple caches. The current
931 * implementation uses the zone allocator for simplicity reasons.
934 mcache_slab_alloc(void *arg
, mcache_obj_t
***plist
, unsigned int num
,
939 unsigned int need
= num
;
940 size_t rsize
= P2ROUNDUP(cp
->mc_bufsize
, sizeof(u_int64_t
));
941 u_int32_t flags
= cp
->mc_flags
;
942 void *buf
, *base
, **pbuf
;
943 mcache_obj_t
**list
= *plist
;
948 buf
= zalloc(cp
->mc_slab_zone
);
953 /* Get the aligned base address for this object */
954 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof(u_int64_t
),
958 * Wind back a pointer size from the aligned base and
959 * save the original address so we can free it later.
961 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
964 VERIFY(((intptr_t)base
+ cp
->mc_bufsize
) <=
965 ((intptr_t)buf
+ cp
->mc_chunksize
));
968 * If auditing is enabled, patternize the contents of
969 * the buffer starting from the 64-bit aligned base to
970 * the end of the buffer; the length is rounded up to
971 * the nearest 64-bit multiply; this is because we use
972 * 64-bit memory access to set/check the pattern.
974 if (flags
& MCF_DEBUG
) {
975 VERIFY(((intptr_t)base
+ rsize
) <=
976 ((intptr_t)buf
+ cp
->mc_chunksize
));
977 mcache_set_pattern(MCACHE_FREE_PATTERN
, base
, rsize
);
980 VERIFY(IS_P2ALIGNED(base
, cp
->mc_align
));
981 *list
= (mcache_obj_t
*)base
;
983 (*list
)->obj_next
= NULL
;
984 list
= *plist
= &(*list
)->obj_next
;
986 /* If we got them all, return to mcache */
996 * Internal slab deallocator used as a backend for simple caches.
999 mcache_slab_free(void *arg
, mcache_obj_t
*list
, __unused boolean_t purged
)
1002 mcache_obj_t
*nlist
;
1003 size_t rsize
= P2ROUNDUP(cp
->mc_bufsize
, sizeof(u_int64_t
));
1004 u_int32_t flags
= cp
->mc_flags
;
1009 nlist
= list
->obj_next
;
1010 list
->obj_next
= NULL
;
1013 VERIFY(IS_P2ALIGNED(base
, cp
->mc_align
));
1015 /* Get the original address since we're about to free it */
1016 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
1018 VERIFY(((intptr_t)base
+ cp
->mc_bufsize
) <=
1019 ((intptr_t)*pbuf
+ cp
->mc_chunksize
));
1021 if (flags
& MCF_DEBUG
) {
1022 VERIFY(((intptr_t)base
+ rsize
) <=
1023 ((intptr_t)*pbuf
+ cp
->mc_chunksize
));
1024 mcache_audit_free_verify(NULL
, base
, 0, rsize
);
1027 /* Free it to zone */
1028 zfree(cp
->mc_slab_zone
, *pbuf
);
1030 /* No more objects to free; return to mcache */
1031 if ((list
= nlist
) == NULL
) {
1038 * Internal slab auditor for simple caches.
1041 mcache_slab_audit(void *arg
, mcache_obj_t
*list
, boolean_t alloc
)
1044 size_t rsize
= P2ROUNDUP(cp
->mc_bufsize
, sizeof(u_int64_t
));
1047 while (list
!= NULL
) {
1048 mcache_obj_t
*next
= list
->obj_next
;
1051 VERIFY(IS_P2ALIGNED(base
, cp
->mc_align
));
1053 /* Get the original address */
1054 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
1056 VERIFY(((intptr_t)base
+ rsize
) <=
1057 ((intptr_t)*pbuf
+ cp
->mc_chunksize
));
1060 mcache_set_pattern(MCACHE_FREE_PATTERN
, base
, rsize
);
1062 mcache_audit_free_verify_set(NULL
, base
, 0, rsize
);
1065 list
= list
->obj_next
= next
;
1070 * Refill the CPU's filled bucket with bkt and save the previous one.
1073 mcache_cpu_refill(mcache_cpu_t
*ccp
, mcache_bkt_t
*bkt
, int objs
)
1075 ASSERT((ccp
->cc_filled
== NULL
&& ccp
->cc_objs
== -1) ||
1076 (ccp
->cc_filled
&& ccp
->cc_objs
+ objs
== ccp
->cc_bktsize
));
1077 ASSERT(ccp
->cc_bktsize
> 0);
1079 ccp
->cc_pfilled
= ccp
->cc_filled
;
1080 ccp
->cc_pobjs
= ccp
->cc_objs
;
1081 ccp
->cc_filled
= bkt
;
1082 ccp
->cc_objs
= objs
;
1086 * Allocate a bucket from the bucket layer.
1088 static mcache_bkt_t
*
1089 mcache_bkt_alloc(mcache_t
*cp
, mcache_bktlist_t
*blp
)
1093 if (!MCACHE_LOCK_TRY(&cp
->mc_bkt_lock
)) {
1095 * The bucket layer lock is held by another CPU; increase
1096 * the contention count so that we can later resize the
1097 * bucket size accordingly.
1099 MCACHE_LOCK(&cp
->mc_bkt_lock
);
1100 cp
->mc_bkt_contention
++;
1103 if ((bkt
= blp
->bl_list
) != NULL
) {
1104 blp
->bl_list
= bkt
->bkt_next
;
1105 if (--blp
->bl_total
< blp
->bl_min
) {
1106 blp
->bl_min
= blp
->bl_total
;
1111 MCACHE_UNLOCK(&cp
->mc_bkt_lock
);
1117 * Free a bucket to the bucket layer.
1120 mcache_bkt_free(mcache_t
*cp
, mcache_bktlist_t
*blp
, mcache_bkt_t
*bkt
)
1122 MCACHE_LOCK(&cp
->mc_bkt_lock
);
1124 bkt
->bkt_next
= blp
->bl_list
;
1128 MCACHE_UNLOCK(&cp
->mc_bkt_lock
);
1132 * Enable the bucket layer of a cache.
1135 mcache_cache_bkt_enable(mcache_t
*cp
)
1140 if (cp
->mc_flags
& MCF_NOCPUCACHE
) {
1144 for (cpu
= 0; cpu
< ncpu
; cpu
++) {
1145 ccp
= &cp
->mc_cpu
[cpu
];
1146 MCACHE_LOCK(&ccp
->cc_lock
);
1147 ccp
->cc_bktsize
= cp
->cache_bkttype
->bt_bktsize
;
1148 MCACHE_UNLOCK(&ccp
->cc_lock
);
1153 * Purge all buckets from a cache and disable its bucket layer.
1156 mcache_bkt_purge(mcache_t
*cp
)
1159 mcache_bkt_t
*bp
, *pbp
;
1163 for (cpu
= 0; cpu
< ncpu
; cpu
++) {
1164 ccp
= &cp
->mc_cpu
[cpu
];
1166 MCACHE_LOCK(&ccp
->cc_lock
);
1168 bp
= ccp
->cc_filled
;
1169 pbp
= ccp
->cc_pfilled
;
1170 objs
= ccp
->cc_objs
;
1171 pobjs
= ccp
->cc_pobjs
;
1172 ccp
->cc_filled
= NULL
;
1173 ccp
->cc_pfilled
= NULL
;
1176 ccp
->cc_bktsize
= 0;
1178 MCACHE_UNLOCK(&ccp
->cc_lock
);
1181 mcache_bkt_destroy(cp
, bp
, objs
);
1184 mcache_bkt_destroy(cp
, pbp
, pobjs
);
1188 mcache_bkt_ws_zero(cp
);
1189 mcache_bkt_ws_reap(cp
);
1193 * Free one or more objects in the bucket to the slab layer,
1194 * and also free the bucket itself.
1197 mcache_bkt_destroy(mcache_t
*cp
, mcache_bkt_t
*bkt
, int nobjs
)
1200 mcache_obj_t
*top
= bkt
->bkt_obj
[nobjs
- 1];
1202 if (cp
->mc_flags
& MCF_DEBUG
) {
1203 mcache_obj_t
*o
= top
;
1207 * Verify that the chain of objects in the bucket is
1208 * valid. Any mismatch here means a mistake when the
1209 * object(s) were freed to the CPU layer, so we panic.
1216 panic("mcache_bkt_destroy: %s cp %p corrupted "
1217 "list in bkt %p (nobjs %d actual %d)\n",
1218 cp
->mc_name
, (void *)cp
, (void *)bkt
,
1221 __builtin_unreachable();
1225 /* Advise the slab layer to purge the object(s) */
1226 (*cp
->mc_slab_free
)(cp
->mc_private
, top
,
1227 (cp
->mc_flags
& MCF_DEBUG
) || cp
->mc_purge_cnt
);
1229 mcache_free(bkt
->bkt_type
->bt_cache
, bkt
);
1233 * Update the bucket layer working set statistics.
1236 mcache_bkt_ws_update(mcache_t
*cp
)
1238 MCACHE_LOCK(&cp
->mc_bkt_lock
);
1240 cp
->mc_full
.bl_reaplimit
= cp
->mc_full
.bl_min
;
1241 cp
->mc_full
.bl_min
= cp
->mc_full
.bl_total
;
1242 cp
->mc_empty
.bl_reaplimit
= cp
->mc_empty
.bl_min
;
1243 cp
->mc_empty
.bl_min
= cp
->mc_empty
.bl_total
;
1245 MCACHE_UNLOCK(&cp
->mc_bkt_lock
);
1249 * Mark everything as eligible for reaping (working set is zero).
1252 mcache_bkt_ws_zero(mcache_t
*cp
)
1254 MCACHE_LOCK(&cp
->mc_bkt_lock
);
1256 cp
->mc_full
.bl_reaplimit
= cp
->mc_full
.bl_total
;
1257 cp
->mc_full
.bl_min
= cp
->mc_full
.bl_total
;
1258 cp
->mc_empty
.bl_reaplimit
= cp
->mc_empty
.bl_total
;
1259 cp
->mc_empty
.bl_min
= cp
->mc_empty
.bl_total
;
1261 MCACHE_UNLOCK(&cp
->mc_bkt_lock
);
1265 * Reap all buckets that are beyond the working set.
1268 mcache_bkt_ws_reap(mcache_t
*cp
)
1273 reap
= MIN(cp
->mc_full
.bl_reaplimit
, cp
->mc_full
.bl_min
);
1275 (bkt
= mcache_bkt_alloc(cp
, &cp
->mc_full
)) != NULL
) {
1276 mcache_bkt_destroy(cp
, bkt
, bkt
->bkt_type
->bt_bktsize
);
1279 reap
= MIN(cp
->mc_empty
.bl_reaplimit
, cp
->mc_empty
.bl_min
);
1281 (bkt
= mcache_bkt_alloc(cp
, &cp
->mc_empty
)) != NULL
) {
1282 mcache_bkt_destroy(cp
, bkt
, 0);
1287 mcache_reap_timeout(thread_call_param_t dummy __unused
,
1288 thread_call_param_t arg
)
1290 volatile UInt32
*flag
= arg
;
1292 ASSERT(flag
== &mcache_reaping
);
1298 mcache_reap_done(void *flag
)
1300 uint64_t deadline
, leeway
;
1302 clock_interval_to_deadline(mcache_reap_interval
, NSEC_PER_SEC
,
1304 clock_interval_to_absolutetime_interval(mcache_reap_interval_leeway
,
1305 NSEC_PER_SEC
, &leeway
);
1306 thread_call_enter_delayed_with_leeway(mcache_reap_tcall
, flag
,
1307 deadline
, leeway
, THREAD_CALL_DELAY_LEEWAY
);
1311 mcache_reap_start(void *arg
)
1315 ASSERT(flag
== &mcache_reaping
);
1317 mcache_applyall(mcache_cache_reap
);
1318 mcache_dispatch(mcache_reap_done
, flag
);
1321 __private_extern__
void
1324 UInt32
*flag
= &mcache_reaping
;
1326 if (mcache_llock_owner
== current_thread() ||
1327 !OSCompareAndSwap(0, 1, flag
)) {
1331 mcache_dispatch(mcache_reap_start
, flag
);
1334 __private_extern__
void
1335 mcache_reap_now(mcache_t
*cp
, boolean_t purge
)
1338 mcache_bkt_purge(cp
);
1339 mcache_cache_bkt_enable(cp
);
1341 mcache_bkt_ws_zero(cp
);
1342 mcache_bkt_ws_reap(cp
);
1347 mcache_cache_reap(mcache_t
*cp
)
1349 mcache_bkt_ws_reap(cp
);
1353 * Performs period maintenance on a cache.
1356 mcache_cache_update(mcache_t
*cp
)
1358 int need_bkt_resize
= 0;
1359 int need_bkt_reenable
= 0;
1361 lck_mtx_assert(mcache_llock
, LCK_MTX_ASSERT_OWNED
);
1363 mcache_bkt_ws_update(cp
);
1366 * Cache resize and post-purge reenable are mutually exclusive.
1367 * If the cache was previously purged, there is no point of
1368 * increasing the bucket size as there was an indication of
1369 * memory pressure on the system.
1371 lck_mtx_lock_spin(&cp
->mc_sync_lock
);
1372 if (!(cp
->mc_flags
& MCF_NOCPUCACHE
) && cp
->mc_enable_cnt
) {
1373 need_bkt_reenable
= 1;
1375 lck_mtx_unlock(&cp
->mc_sync_lock
);
1377 MCACHE_LOCK(&cp
->mc_bkt_lock
);
1379 * If the contention count is greater than the threshold, and if
1380 * we are not already at the maximum bucket size, increase it.
1381 * Otherwise, if this cache was previously purged by the user
1382 * then we simply reenable it.
1384 if ((unsigned int)cp
->mc_chunksize
< cp
->cache_bkttype
->bt_maxbuf
&&
1385 (int)(cp
->mc_bkt_contention
- cp
->mc_bkt_contention_prev
) >
1386 mcache_bkt_contention
&& !need_bkt_reenable
) {
1387 need_bkt_resize
= 1;
1390 cp
->mc_bkt_contention_prev
= cp
->mc_bkt_contention
;
1391 MCACHE_UNLOCK(&cp
->mc_bkt_lock
);
1393 if (need_bkt_resize
) {
1394 mcache_dispatch(mcache_cache_bkt_resize
, cp
);
1395 } else if (need_bkt_reenable
) {
1396 mcache_dispatch(mcache_cache_enable
, cp
);
1401 * Recompute a cache's bucket size. This is an expensive operation
1402 * and should not be done frequently; larger buckets provide for a
1403 * higher transfer rate with the bucket while smaller buckets reduce
1404 * the memory consumption.
1407 mcache_cache_bkt_resize(void *arg
)
1410 mcache_bkttype_t
*btp
= cp
->cache_bkttype
;
1412 if ((unsigned int)cp
->mc_chunksize
< btp
->bt_maxbuf
) {
1413 mcache_bkt_purge(cp
);
1416 * Upgrade to the next bucket type with larger bucket size;
1417 * temporarily set the previous contention snapshot to a
1418 * negative number to prevent unnecessary resize request.
1420 MCACHE_LOCK(&cp
->mc_bkt_lock
);
1421 cp
->cache_bkttype
= ++btp
;
1422 cp
->mc_bkt_contention_prev
= cp
->mc_bkt_contention
+ INT_MAX
;
1423 MCACHE_UNLOCK(&cp
->mc_bkt_lock
);
1425 mcache_cache_enable(cp
);
1430 * Reenable a previously disabled cache due to purge.
1433 mcache_cache_enable(void *arg
)
1437 lck_mtx_lock_spin(&cp
->mc_sync_lock
);
1438 cp
->mc_purge_cnt
= 0;
1439 cp
->mc_enable_cnt
= 0;
1440 lck_mtx_unlock(&cp
->mc_sync_lock
);
1442 mcache_cache_bkt_enable(cp
);
1446 mcache_update_timeout(__unused
void *arg
)
1448 uint64_t deadline
, leeway
;
1450 clock_interval_to_deadline(mcache_reap_interval
, NSEC_PER_SEC
,
1452 clock_interval_to_absolutetime_interval(mcache_reap_interval_leeway
,
1453 NSEC_PER_SEC
, &leeway
);
1454 thread_call_enter_delayed_with_leeway(mcache_update_tcall
, NULL
,
1455 deadline
, leeway
, THREAD_CALL_DELAY_LEEWAY
);
1459 mcache_update(thread_call_param_t arg __unused
,
1460 thread_call_param_t dummy __unused
)
1462 mcache_applyall(mcache_cache_update
);
1463 mcache_update_timeout(NULL
);
1467 mcache_applyall(void (*func
)(mcache_t
*))
1472 LIST_FOREACH(cp
, &mcache_head
, mc_list
) {
1475 MCACHE_LIST_UNLOCK();
1479 mcache_dispatch(void (*func
)(void *), void *arg
)
1481 ASSERT(func
!= NULL
);
1482 timeout(func
, arg
, hz
/ 1000);
1485 __private_extern__
void
1486 mcache_buffer_log(mcache_audit_t
*mca
, void *addr
, mcache_t
*cp
,
1487 struct timeval
*base_ts
)
1489 struct timeval now
, base
= { .tv_sec
= 0, .tv_usec
= 0 };
1490 void *stack
[MCACHE_STACK_DEPTH
+ 1];
1491 struct mca_trn
*transaction
;
1493 transaction
= &mca
->mca_trns
[mca
->mca_next_trn
];
1495 mca
->mca_addr
= addr
;
1496 mca
->mca_cache
= cp
;
1498 transaction
->mca_thread
= current_thread();
1500 bzero(stack
, sizeof(stack
));
1501 transaction
->mca_depth
= (uint16_t)OSBacktrace(stack
, MCACHE_STACK_DEPTH
+ 1) - 1;
1502 bcopy(&stack
[1], transaction
->mca_stack
,
1503 sizeof(transaction
->mca_stack
));
1506 if (base_ts
!= NULL
) {
1509 /* tstamp is in ms relative to base_ts */
1510 transaction
->mca_tstamp
= ((now
.tv_usec
- base
.tv_usec
) / 1000);
1511 if ((now
.tv_sec
- base
.tv_sec
) > 0) {
1512 transaction
->mca_tstamp
+= ((now
.tv_sec
- base
.tv_sec
) * 1000);
1516 (mca
->mca_next_trn
+ 1) % mca_trn_max
;
1520 * N.B.: mcache_set_pattern(), mcache_verify_pattern() and
1521 * mcache_verify_set_pattern() are marked as noinline to prevent the
1522 * compiler from aliasing pointers when they are inlined inside the callers
1523 * (e.g. mcache_audit_free_verify_set()) which would be undefined behavior.
1525 __private_extern__ OS_NOINLINE
void
1526 mcache_set_pattern(u_int64_t pattern
, void *buf_arg
, size_t size
)
1528 u_int64_t
*buf_end
= (u_int64_t
*)((void *)((char *)buf_arg
+ size
));
1529 u_int64_t
*buf
= (u_int64_t
*)buf_arg
;
1531 VERIFY(IS_P2ALIGNED(buf_arg
, sizeof(u_int64_t
)));
1532 VERIFY(IS_P2ALIGNED(size
, sizeof(u_int64_t
)));
1534 while (buf
< buf_end
) {
1539 __private_extern__ OS_NOINLINE
void *
1540 mcache_verify_pattern(u_int64_t pattern
, void *buf_arg
, size_t size
)
1542 u_int64_t
*buf_end
= (u_int64_t
*)((void *)((char *)buf_arg
+ size
));
1545 VERIFY(IS_P2ALIGNED(buf_arg
, sizeof(u_int64_t
)));
1546 VERIFY(IS_P2ALIGNED(size
, sizeof(u_int64_t
)));
1548 for (buf
= buf_arg
; buf
< buf_end
; buf
++) {
1549 if (*buf
!= pattern
) {
1556 OS_NOINLINE
static void *
1557 mcache_verify_set_pattern(u_int64_t old
, u_int64_t
new, void *buf_arg
,
1560 u_int64_t
*buf_end
= (u_int64_t
*)((void *)((char *)buf_arg
+ size
));
1563 VERIFY(IS_P2ALIGNED(buf_arg
, sizeof(u_int64_t
)));
1564 VERIFY(IS_P2ALIGNED(size
, sizeof(u_int64_t
)));
1566 for (buf
= buf_arg
; buf
< buf_end
; buf
++) {
1568 mcache_set_pattern(old
, buf_arg
,
1569 (uintptr_t)buf
- (uintptr_t)buf_arg
);
1577 __private_extern__
void
1578 mcache_audit_free_verify(mcache_audit_t
*mca
, void *base
, size_t offset
,
1585 addr
= (void *)((uintptr_t)base
+ offset
);
1586 next
= ((mcache_obj_t
*)addr
)->obj_next
;
1588 /* For the "obj_next" pointer in the buffer */
1589 oaddr64
= (u_int64_t
*)P2ROUNDDOWN(addr
, sizeof(u_int64_t
));
1590 *oaddr64
= MCACHE_FREE_PATTERN
;
1592 if ((oaddr64
= mcache_verify_pattern(MCACHE_FREE_PATTERN
,
1593 (caddr_t
)base
, size
)) != NULL
) {
1594 mcache_audit_panic(mca
, addr
, (caddr_t
)oaddr64
- (caddr_t
)base
,
1595 (int64_t)MCACHE_FREE_PATTERN
, (int64_t)*oaddr64
);
1598 ((mcache_obj_t
*)addr
)->obj_next
= next
;
1601 __private_extern__
void
1602 mcache_audit_free_verify_set(mcache_audit_t
*mca
, void *base
, size_t offset
,
1609 addr
= (void *)((uintptr_t)base
+ offset
);
1610 next
= ((mcache_obj_t
*)addr
)->obj_next
;
1612 /* For the "obj_next" pointer in the buffer */
1613 oaddr64
= (u_int64_t
*)P2ROUNDDOWN(addr
, sizeof(u_int64_t
));
1614 *oaddr64
= MCACHE_FREE_PATTERN
;
1616 if ((oaddr64
= mcache_verify_set_pattern(MCACHE_FREE_PATTERN
,
1617 MCACHE_UNINITIALIZED_PATTERN
, (caddr_t
)base
, size
)) != NULL
) {
1618 mcache_audit_panic(mca
, addr
, (caddr_t
)oaddr64
- (caddr_t
)base
,
1619 (int64_t)MCACHE_FREE_PATTERN
, (int64_t)*oaddr64
);
1622 ((mcache_obj_t
*)addr
)->obj_next
= next
;
1627 #define DUMP_TRN_FMT() \
1628 "%s transaction thread %p saved PC stack (%d deep):\n" \
1629 "\t%p, %p, %p, %p, %p, %p, %p, %p\n" \
1630 "\t%p, %p, %p, %p, %p, %p, %p, %p\n"
1632 #define DUMP_TRN_FIELDS(s, x) \
1634 mca->mca_trns[x].mca_thread, mca->mca_trns[x].mca_depth, \
1635 mca->mca_trns[x].mca_stack[0], mca->mca_trns[x].mca_stack[1], \
1636 mca->mca_trns[x].mca_stack[2], mca->mca_trns[x].mca_stack[3], \
1637 mca->mca_trns[x].mca_stack[4], mca->mca_trns[x].mca_stack[5], \
1638 mca->mca_trns[x].mca_stack[6], mca->mca_trns[x].mca_stack[7], \
1639 mca->mca_trns[x].mca_stack[8], mca->mca_trns[x].mca_stack[9], \
1640 mca->mca_trns[x].mca_stack[10], mca->mca_trns[x].mca_stack[11], \
1641 mca->mca_trns[x].mca_stack[12], mca->mca_trns[x].mca_stack[13], \
1642 mca->mca_trns[x].mca_stack[14], mca->mca_trns[x].mca_stack[15]
1644 #define MCA_TRN_LAST ((mca->mca_next_trn + mca_trn_max) % mca_trn_max)
1645 #define MCA_TRN_PREV ((mca->mca_next_trn + mca_trn_max - 1) % mca_trn_max)
1647 __private_extern__
char *
1648 mcache_dump_mca(mcache_audit_t
*mca
)
1650 if (mca_dump_buf
== NULL
) {
1654 snprintf(mca_dump_buf
, DUMP_MCA_BUF_SIZE
,
1655 "mca %p: addr %p, cache %p (%s) nxttrn %d\n"
1659 mca
, mca
->mca_addr
, mca
->mca_cache
,
1660 mca
->mca_cache
? mca
->mca_cache
->mc_name
: "?",
1663 DUMP_TRN_FIELDS("last", MCA_TRN_LAST
),
1664 DUMP_TRN_FIELDS("previous", MCA_TRN_PREV
));
1666 return mca_dump_buf
;
1669 __private_extern__
void
1670 mcache_audit_panic(mcache_audit_t
*mca
, void *addr
, size_t offset
,
1671 int64_t expected
, int64_t got
)
1674 panic("mcache_audit: buffer %p modified after free at "
1675 "offset 0x%lx (0x%llx instead of 0x%llx)\n", addr
,
1676 offset
, got
, expected
);
1678 __builtin_unreachable();
1681 panic("mcache_audit: buffer %p modified after free at offset 0x%lx "
1682 "(0x%llx instead of 0x%llx)\n%s\n",
1683 addr
, offset
, got
, expected
, mcache_dump_mca(mca
));
1685 __builtin_unreachable();
1688 __attribute__((noinline
, cold
, not_tail_called
, noreturn
))
1689 __private_extern__
int
1690 assfail(const char *a
, const char *f
, int l
)
1692 panic("assertion failed: %s, file: %s, line: %d", a
, f
, l
);
1694 __builtin_unreachable();