]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/mcache.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / kern / mcache.c
CommitLineData
2d21ac55 1/*
f427ee49 2 * Copyright (c) 2006-2020 Apple Inc. All rights reserved.
2d21ac55
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * Memory allocator with per-CPU caching, derived from the kmem magazine
31 * concept and implementation as described in the following paper:
32 * http://www.usenix.org/events/usenix01/full_papers/bonwick/bonwick.pdf
33 * That implementation is Copyright 2006 Sun Microsystems, Inc. All rights
34 * reserved. Use is subject to license terms.
35 *
36 * There are several major differences between this and the original kmem
37 * magazine: this derivative implementation allows for multiple objects to
38 * be allocated and freed from/to the object cache in one call; in addition,
39 * it provides for better flexibility where the user is allowed to define
40 * its own slab allocator (instead of the default zone allocator). Finally,
41 * no object construction/destruction takes place at the moment, although
42 * this could be added in future to improve efficiency.
43 */
44
45#include <sys/param.h>
46#include <sys/types.h>
47#include <sys/malloc.h>
48#include <sys/mbuf.h>
49#include <sys/queue.h>
50#include <sys/kernel.h>
51#include <sys/systm.h>
52
53#include <kern/debug.h>
54#include <kern/zalloc.h>
55#include <kern/cpu_number.h>
56#include <kern/locks.h>
fe8ab488 57#include <kern/thread_call.h>
2d21ac55
A
58
59#include <libkern/libkern.h>
60#include <libkern/OSAtomic.h>
61#include <libkern/OSDebug.h>
62
63#include <mach/vm_param.h>
64#include <machine/limits.h>
65#include <machine/machine_routines.h>
66
67#include <string.h>
68
69#include <sys/mcache.h>
70
0a7de745 71#define MCACHE_SIZE(n) \
5ba3f43e 72 __builtin_offsetof(mcache_t, mc_cpu[n])
2d21ac55
A
73
74/* Allocate extra in case we need to manually align the pointer */
0a7de745 75#define MCACHE_ALLOC_SIZE \
39236c6e 76 (sizeof (void *) + MCACHE_SIZE(ncpu) + CPU_CACHE_LINE_SIZE)
2d21ac55 77
0a7de745 78#define MCACHE_CPU(c) \
316670eb 79 (mcache_cpu_t *)((void *)((char *)(c) + MCACHE_SIZE(cpu_number())))
2d21ac55
A
80
81/*
82 * MCACHE_LIST_LOCK() and MCACHE_LIST_UNLOCK() are macros used
83 * to serialize accesses to the global list of caches in the system.
84 * They also record the thread currently running in the critical
85 * section, so that we can avoid recursive requests to reap the
86 * caches when memory runs low.
87 */
0a7de745 88#define MCACHE_LIST_LOCK() { \
c3c9b80d 89 lck_mtx_lock(&mcache_llock); \
0a7de745 90 mcache_llock_owner = current_thread(); \
2d21ac55
A
91}
92
0a7de745
A
93#define MCACHE_LIST_UNLOCK() { \
94 mcache_llock_owner = NULL; \
c3c9b80d 95 lck_mtx_unlock(&mcache_llock); \
2d21ac55
A
96}
97
0a7de745
A
98#define MCACHE_LOCK(l) lck_mtx_lock(l)
99#define MCACHE_UNLOCK(l) lck_mtx_unlock(l)
100#define MCACHE_LOCK_TRY(l) lck_mtx_try_lock(l)
2d21ac55 101
f427ee49 102static unsigned int ncpu;
39236c6e 103static unsigned int cache_line_size;
2d21ac55 104static struct thread *mcache_llock_owner;
c3c9b80d
A
105static LCK_GRP_DECLARE(mcache_llock_grp, "mcache.list");
106static LCK_MTX_DECLARE(mcache_llock, &mcache_llock_grp);
2d21ac55 107static struct zone *mcache_zone;
fe8ab488
A
108static const uint32_t mcache_reap_interval = 15;
109static const uint32_t mcache_reap_interval_leeway = 2;
2d21ac55
A
110static UInt32 mcache_reaping;
111static int mcache_ready;
112static int mcache_updating;
113
114static int mcache_bkt_contention = 3;
115#if DEBUG
116static unsigned int mcache_flags = MCF_DEBUG;
117#else
118static unsigned int mcache_flags = 0;
119#endif
120
fe8ab488
A
121int mca_trn_max = MCA_TRN_MAX;
122
2d21ac55 123static mcache_bkttype_t mcache_bkttype[] = {
0a7de745
A
124 { 1, 4096, 32768, NULL },
125 { 3, 2048, 16384, NULL },
126 { 7, 1024, 12288, NULL },
127 { 15, 256, 8192, NULL },
128 { 31, 64, 4096, NULL },
129 { 47, 0, 2048, NULL },
130 { 63, 0, 1024, NULL },
131 { 95, 0, 512, NULL },
132 { 143, 0, 256, NULL },
133 { 165, 0, 0, NULL },
2d21ac55
A
134};
135
136static mcache_t *mcache_create_common(const char *, size_t, size_t,
6d2010ae 137 mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_logfn_t,
c3c9b80d 138 mcache_notifyfn_t, void *, u_int32_t, int);
2d21ac55
A
139static unsigned int mcache_slab_alloc(void *, mcache_obj_t ***,
140 unsigned int, int);
141static void mcache_slab_free(void *, mcache_obj_t *, boolean_t);
142static void mcache_slab_audit(void *, mcache_obj_t *, boolean_t);
143static void mcache_cpu_refill(mcache_cpu_t *, mcache_bkt_t *, int);
cb323159 144static mcache_bkt_t *mcache_bkt_alloc(mcache_t *, mcache_bktlist_t *);
2d21ac55
A
145static void mcache_bkt_free(mcache_t *, mcache_bktlist_t *, mcache_bkt_t *);
146static void mcache_cache_bkt_enable(mcache_t *);
147static void mcache_bkt_purge(mcache_t *);
cb323159 148static void mcache_bkt_destroy(mcache_t *, mcache_bkt_t *, int);
2d21ac55 149static void mcache_bkt_ws_update(mcache_t *);
5ba3f43e 150static void mcache_bkt_ws_zero(mcache_t *);
2d21ac55
A
151static void mcache_bkt_ws_reap(mcache_t *);
152static void mcache_dispatch(void (*)(void *), void *);
153static void mcache_cache_reap(mcache_t *);
154static void mcache_cache_update(mcache_t *);
155static void mcache_cache_bkt_resize(void *);
156static void mcache_cache_enable(void *);
fe8ab488 157static void mcache_update(thread_call_param_t __unused, thread_call_param_t __unused);
2d21ac55
A
158static void mcache_update_timeout(void *);
159static void mcache_applyall(void (*)(mcache_t *));
160static void mcache_reap_start(void *);
161static void mcache_reap_done(void *);
fe8ab488 162static void mcache_reap_timeout(thread_call_param_t __unused, thread_call_param_t);
2d21ac55
A
163static void mcache_notify(mcache_t *, u_int32_t);
164static void mcache_purge(void *);
165
166static LIST_HEAD(, mcache) mcache_head;
167mcache_t *mcache_audit_cache;
168
fe8ab488
A
169static thread_call_t mcache_reap_tcall;
170static thread_call_t mcache_update_tcall;
171
2d21ac55
A
172/*
173 * Initialize the framework; this is currently called as part of BSD init.
174 */
175__private_extern__ void
176mcache_init(void)
177{
178 mcache_bkttype_t *btp;
179 unsigned int i;
180 char name[32];
181
fe8ab488
A
182 VERIFY(mca_trn_max >= 2);
183
f427ee49 184 ncpu = ml_wait_max_cpus();
0a7de745 185 (void) mcache_cache_line_size(); /* prime it */
2d21ac55 186
fe8ab488
A
187 mcache_reap_tcall = thread_call_allocate(mcache_reap_timeout, NULL);
188 mcache_update_tcall = thread_call_allocate(mcache_update, NULL);
0a7de745 189 if (mcache_reap_tcall == NULL || mcache_update_tcall == NULL) {
fe8ab488 190 panic("mcache_init: thread_call_allocate failed");
cb323159
A
191 /* NOTREACHED */
192 __builtin_unreachable();
0a7de745 193 }
fe8ab488 194
f427ee49 195 mcache_zone = zone_create("mcache", MCACHE_ALLOC_SIZE, ZC_DESTRUCTIBLE);
2d21ac55
A
196
197 LIST_INIT(&mcache_head);
198
0a7de745 199 for (i = 0; i < sizeof(mcache_bkttype) / sizeof(*btp); i++) {
2d21ac55 200 btp = &mcache_bkttype[i];
0a7de745 201 (void) snprintf(name, sizeof(name), "bkt_%d",
2d21ac55
A
202 btp->bt_bktsize);
203 btp->bt_cache = mcache_create(name,
0a7de745 204 (btp->bt_bktsize + 1) * sizeof(void *), 0, 0, MCR_SLEEP);
2d21ac55
A
205 }
206
fe8ab488 207 PE_parse_boot_argn("mcache_flags", &mcache_flags, sizeof(mcache_flags));
2d21ac55
A
208 mcache_flags &= MCF_FLAGS_MASK;
209
0a7de745 210 mcache_audit_cache = mcache_create("audit", sizeof(mcache_audit_t),
2d21ac55
A
211 0, 0, MCR_SLEEP);
212
2d21ac55
A
213 mcache_applyall(mcache_cache_bkt_enable);
214 mcache_ready = 1;
39236c6e
A
215
216 printf("mcache: %d CPU(s), %d bytes CPU cache line size\n",
217 ncpu, CPU_CACHE_LINE_SIZE);
2d21ac55
A
218}
219
220/*
221 * Return the global mcache flags.
222 */
223__private_extern__ unsigned int
224mcache_getflags(void)
225{
0a7de745 226 return mcache_flags;
2d21ac55
A
227}
228
39236c6e
A
229/*
230 * Return the CPU cache line size.
231 */
232__private_extern__ unsigned int
233mcache_cache_line_size(void)
234{
235 if (cache_line_size == 0) {
236 ml_cpu_info_t cpu_info;
237 ml_cpu_get_info(&cpu_info);
f427ee49 238 cache_line_size = (unsigned int)cpu_info.cache_line_size;
39236c6e 239 }
0a7de745 240 return cache_line_size;
39236c6e
A
241}
242
2d21ac55
A
243/*
244 * Create a cache using the zone allocator as the backend slab allocator.
245 * The caller may specify any alignment for the object; if it specifies 0
246 * the default alignment (MCACHE_ALIGN) will be used.
247 */
248__private_extern__ mcache_t *
249mcache_create(const char *name, size_t bufsize, size_t align,
c3c9b80d 250 u_int32_t flags, int wait __unused)
2d21ac55 251{
0a7de745 252 return mcache_create_common(name, bufsize, align, mcache_slab_alloc,
c3c9b80d 253 mcache_slab_free, mcache_slab_audit, NULL, NULL, NULL, flags, 1);
2d21ac55
A
254}
255
256/*
257 * Create a cache using a custom backend slab allocator. Since the caller
258 * is responsible for allocation, no alignment guarantee will be provided
259 * by this framework.
260 */
261__private_extern__ mcache_t *
262mcache_create_ext(const char *name, size_t bufsize,
263 mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn,
6d2010ae 264 mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg,
c3c9b80d 265 u_int32_t flags, int wait __unused)
2d21ac55 266{
0a7de745 267 return mcache_create_common(name, bufsize, 0, allocfn,
c3c9b80d 268 freefn, auditfn, logfn, notifyfn, arg, flags, 0);
2d21ac55
A
269}
270
271/*
272 * Common cache creation routine.
273 */
274static mcache_t *
275mcache_create_common(const char *name, size_t bufsize, size_t align,
276 mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn,
6d2010ae 277 mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg,
c3c9b80d 278 u_int32_t flags, int need_zone)
2d21ac55
A
279{
280 mcache_bkttype_t *btp;
281 mcache_t *cp = NULL;
282 size_t chunksize;
283 void *buf, **pbuf;
f427ee49 284 unsigned int c;
2d21ac55
A
285 char lck_name[64];
286
c3c9b80d 287 buf = zalloc_flags(mcache_zone, Z_WAITOK | Z_ZERO);
0a7de745 288 if (buf == NULL) {
2d21ac55 289 goto fail;
0a7de745 290 }
2d21ac55 291
2d21ac55
A
292 /*
293 * In case we didn't get a cache-aligned memory, round it up
294 * accordingly. This is needed in order to get the rest of
295 * structure members aligned properly. It also means that
296 * the memory span gets shifted due to the round up, but it
297 * is okay since we've allocated extra space for this.
298 */
299 cp = (mcache_t *)
0a7de745
A
300 P2ROUNDUP((intptr_t)buf + sizeof(void *), CPU_CACHE_LINE_SIZE);
301 pbuf = (void **)((intptr_t)cp - sizeof(void *));
2d21ac55
A
302 *pbuf = buf;
303
304 /*
305 * Guaranteed alignment is valid only when we use the internal
306 * slab allocator (currently set to use the zone allocator).
307 */
5ba3f43e 308 if (!need_zone) {
2d21ac55 309 align = 1;
5ba3f43e
A
310 } else {
311 /* Enforce 64-bit minimum alignment for zone-based buffers */
0a7de745 312 if (align == 0) {
5ba3f43e 313 align = MCACHE_ALIGN;
0a7de745 314 }
5ba3f43e
A
315 align = P2ROUNDUP(align, MCACHE_ALIGN);
316 }
2d21ac55 317
0a7de745 318 if ((align & (align - 1)) != 0) {
2d21ac55 319 panic("mcache_create: bad alignment %lu", align);
cb323159
A
320 /* NOTREACHED */
321 __builtin_unreachable();
0a7de745 322 }
2d21ac55
A
323
324 cp->mc_align = align;
325 cp->mc_slab_alloc = allocfn;
326 cp->mc_slab_free = freefn;
327 cp->mc_slab_audit = auditfn;
6d2010ae 328 cp->mc_slab_log = logfn;
2d21ac55
A
329 cp->mc_slab_notify = notifyfn;
330 cp->mc_private = need_zone ? cp : arg;
331 cp->mc_bufsize = bufsize;
332 cp->mc_flags = (flags & MCF_FLAGS_MASK) | mcache_flags;
333
0a7de745 334 (void) snprintf(cp->mc_name, sizeof(cp->mc_name), "mcache.%s", name);
2d21ac55 335
0a7de745 336 (void) snprintf(lck_name, sizeof(lck_name), "%s.cpu", cp->mc_name);
c3c9b80d 337 cp->mc_cpu_lock_grp = lck_grp_alloc_init(lck_name, LCK_GRP_ATTR_NULL);
2d21ac55
A
338
339 /*
340 * Allocation chunk size is the object's size plus any extra size
341 * needed to satisfy the object's alignment. It is enforced to be
342 * at least the size of an LP64 pointer to simplify auditing and to
343 * handle multiple-element allocation requests, where the elements
344 * returned are linked together in a list.
345 */
0a7de745 346 chunksize = MAX(bufsize, sizeof(u_int64_t));
2d21ac55 347 if (need_zone) {
5ba3f43e 348 VERIFY(align != 0 && (align % MCACHE_ALIGN) == 0);
0a7de745 349 chunksize += sizeof(uint64_t) + align;
2d21ac55 350 chunksize = P2ROUNDUP(chunksize, align);
f427ee49 351 cp->mc_slab_zone = zone_create(cp->mc_name, chunksize, ZC_DESTRUCTIBLE);
2d21ac55
A
352 }
353 cp->mc_chunksize = chunksize;
354
355 /*
356 * Initialize the bucket layer.
357 */
0a7de745 358 (void) snprintf(lck_name, sizeof(lck_name), "%s.bkt", cp->mc_name);
2d21ac55 359 cp->mc_bkt_lock_grp = lck_grp_alloc_init(lck_name,
c3c9b80d
A
360 LCK_GRP_ATTR_NULL);
361 lck_mtx_init(&cp->mc_bkt_lock, cp->mc_bkt_lock_grp, LCK_ATTR_NULL);
2d21ac55 362
0a7de745 363 (void) snprintf(lck_name, sizeof(lck_name), "%s.sync", cp->mc_name);
2d21ac55 364 cp->mc_sync_lock_grp = lck_grp_alloc_init(lck_name,
c3c9b80d
A
365 LCK_GRP_ATTR_NULL);
366 lck_mtx_init(&cp->mc_sync_lock, cp->mc_sync_lock_grp, LCK_ATTR_NULL);
2d21ac55 367
0a7de745 368 for (btp = mcache_bkttype; chunksize <= btp->bt_minbuf; btp++) {
2d21ac55 369 continue;
0a7de745 370 }
2d21ac55
A
371
372 cp->cache_bkttype = btp;
373
374 /*
375 * Initialize the CPU layer. Each per-CPU structure is aligned
376 * on the CPU cache line boundary to prevent false sharing.
377 */
378 for (c = 0; c < ncpu; c++) {
379 mcache_cpu_t *ccp = &cp->mc_cpu[c];
380
39236c6e 381 VERIFY(IS_P2ALIGNED(ccp, CPU_CACHE_LINE_SIZE));
c3c9b80d 382 lck_mtx_init(&ccp->cc_lock, cp->mc_cpu_lock_grp, LCK_ATTR_NULL);
2d21ac55
A
383 ccp->cc_objs = -1;
384 ccp->cc_pobjs = -1;
385 }
386
0a7de745 387 if (mcache_ready) {
2d21ac55 388 mcache_cache_bkt_enable(cp);
0a7de745 389 }
2d21ac55
A
390
391 /* TODO: dynamically create sysctl for stats */
392
393 MCACHE_LIST_LOCK();
394 LIST_INSERT_HEAD(&mcache_head, cp, mc_list);
395 MCACHE_LIST_UNLOCK();
396
397 /*
398 * If cache buckets are enabled and this is the first cache
399 * created, start the periodic cache update.
400 */
401 if (!(mcache_flags & MCF_NOCPUCACHE) && !mcache_updating) {
402 mcache_updating = 1;
403 mcache_update_timeout(NULL);
404 }
405 if (cp->mc_flags & MCF_DEBUG) {
406 printf("mcache_create: %s (%s) arg %p bufsize %lu align %lu "
407 "chunksize %lu bktsize %d\n", name, need_zone ? "i" : "e",
408 arg, bufsize, cp->mc_align, chunksize, btp->bt_bktsize);
409 }
0a7de745 410 return cp;
2d21ac55
A
411
412fail:
0a7de745 413 if (buf != NULL) {
2d21ac55 414 zfree(mcache_zone, buf);
0a7de745
A
415 }
416 return NULL;
2d21ac55
A
417}
418
419/*
420 * Allocate one or more objects from a cache.
421 */
422__private_extern__ unsigned int
423mcache_alloc_ext(mcache_t *cp, mcache_obj_t **list, unsigned int num, int wait)
424{
425 mcache_cpu_t *ccp;
426 mcache_obj_t **top = &(*list);
427 mcache_bkt_t *bkt;
428 unsigned int need = num;
429 boolean_t nwretry = FALSE;
430
431 /* MCR_NOSLEEP and MCR_FAILOK are mutually exclusive */
0a7de745 432 VERIFY((wait & (MCR_NOSLEEP | MCR_FAILOK)) != (MCR_NOSLEEP | MCR_FAILOK));
2d21ac55
A
433
434 ASSERT(list != NULL);
435 *list = NULL;
436
0a7de745
A
437 if (num == 0) {
438 return 0;
439 }
2d21ac55
A
440
441retry_alloc:
442 /* We may not always be running in the same CPU in case of retries */
443 ccp = MCACHE_CPU(cp);
444
445 MCACHE_LOCK(&ccp->cc_lock);
446 for (;;) {
447 /*
448 * If we have an object in the current CPU's filled bucket,
449 * chain the object to any previous objects and return if
450 * we've satisfied the number of requested objects.
451 */
452 if (ccp->cc_objs > 0) {
453 mcache_obj_t *tail;
454 int objs;
455
456 /*
457 * Objects in the bucket are already linked together
458 * with the most recently freed object at the head of
459 * the list; grab as many objects as we can.
460 */
461 objs = MIN((unsigned int)ccp->cc_objs, need);
462 *list = ccp->cc_filled->bkt_obj[ccp->cc_objs - 1];
463 ccp->cc_objs -= objs;
464 ccp->cc_alloc += objs;
465
466 tail = ccp->cc_filled->bkt_obj[ccp->cc_objs];
467 list = &tail->obj_next;
468 *list = NULL;
469
470 /* If we got them all, return to caller */
471 if ((need -= objs) == 0) {
472 MCACHE_UNLOCK(&ccp->cc_lock);
6d2010ae
A
473
474 if (!(cp->mc_flags & MCF_NOLEAKLOG) &&
0a7de745 475 cp->mc_slab_log != NULL) {
6d2010ae 476 (*cp->mc_slab_log)(num, *top, TRUE);
0a7de745 477 }
6d2010ae 478
0a7de745 479 if (cp->mc_flags & MCF_DEBUG) {
2d21ac55 480 goto debug_alloc;
0a7de745 481 }
2d21ac55 482
0a7de745 483 return num;
2d21ac55
A
484 }
485 }
486
487 /*
488 * The CPU's filled bucket is empty. If the previous filled
489 * bucket was full, exchange and try again.
490 */
491 if (ccp->cc_pobjs > 0) {
492 mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs);
493 continue;
494 }
495
496 /*
497 * If the bucket layer is disabled, allocate from slab. This
498 * can happen either because MCF_NOCPUCACHE is set, or because
499 * the bucket layer is currently being resized.
500 */
0a7de745 501 if (ccp->cc_bktsize == 0) {
2d21ac55 502 break;
0a7de745 503 }
2d21ac55
A
504
505 /*
506 * Both of the CPU's buckets are empty; try to get a full
507 * bucket from the bucket layer. Upon success, refill this
508 * CPU and place any empty bucket into the empty list.
509 */
cb323159 510 bkt = mcache_bkt_alloc(cp, &cp->mc_full);
2d21ac55 511 if (bkt != NULL) {
0a7de745 512 if (ccp->cc_pfilled != NULL) {
2d21ac55
A
513 mcache_bkt_free(cp, &cp->mc_empty,
514 ccp->cc_pfilled);
0a7de745 515 }
2d21ac55
A
516 mcache_cpu_refill(ccp, bkt, ccp->cc_bktsize);
517 continue;
518 }
519
520 /*
521 * The bucket layer has no full buckets; allocate the
522 * object(s) directly from the slab layer.
523 */
524 break;
525 }
526 MCACHE_UNLOCK(&ccp->cc_lock);
527
528 need -= (*cp->mc_slab_alloc)(cp->mc_private, &list, need, wait);
529
530 /*
531 * If this is a blocking allocation, or if it is non-blocking and
532 * the cache's full bucket is non-empty, then retry the allocation.
533 */
534 if (need > 0) {
535 if (!(wait & MCR_NONBLOCKING)) {
536 atomic_add_32(&cp->mc_wretry_cnt, 1);
537 goto retry_alloc;
538 } else if ((wait & (MCR_NOSLEEP | MCR_TRYHARD)) &&
539 !mcache_bkt_isempty(cp)) {
0a7de745 540 if (!nwretry) {
2d21ac55 541 nwretry = TRUE;
0a7de745 542 }
2d21ac55
A
543 atomic_add_32(&cp->mc_nwretry_cnt, 1);
544 goto retry_alloc;
545 } else if (nwretry) {
546 atomic_add_32(&cp->mc_nwfail_cnt, 1);
547 }
548 }
549
0a7de745 550 if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL) {
6d2010ae 551 (*cp->mc_slab_log)((num - need), *top, TRUE);
0a7de745 552 }
6d2010ae 553
0a7de745
A
554 if (!(cp->mc_flags & MCF_DEBUG)) {
555 return num - need;
556 }
2d21ac55
A
557
558debug_alloc:
6d2010ae 559 if (cp->mc_flags & MCF_DEBUG) {
2d21ac55
A
560 mcache_obj_t **o = top;
561 unsigned int n;
562
563 n = 0;
564 /*
565 * Verify that the chain of objects have the same count as
566 * what we are about to report to the caller. Any mismatch
567 * here means that the object list is insanely broken and
568 * therefore we must panic.
569 */
570 while (*o != NULL) {
571 o = &(*o)->obj_next;
572 ++n;
573 }
574 if (n != (num - need)) {
575 panic("mcache_alloc_ext: %s cp %p corrupted list "
576 "(got %d actual %d)\n", cp->mc_name,
577 (void *)cp, num - need, n);
cb323159
A
578 /* NOTREACHED */
579 __builtin_unreachable();
2d21ac55
A
580 }
581 }
582
583 /* Invoke the slab layer audit callback if auditing is enabled */
0a7de745 584 if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL) {
2d21ac55 585 (*cp->mc_slab_audit)(cp->mc_private, *top, TRUE);
0a7de745 586 }
2d21ac55 587
0a7de745 588 return num - need;
2d21ac55
A
589}
590
591/*
592 * Allocate a single object from a cache.
593 */
594__private_extern__ void *
595mcache_alloc(mcache_t *cp, int wait)
596{
597 mcache_obj_t *buf;
598
599 (void) mcache_alloc_ext(cp, &buf, 1, wait);
0a7de745 600 return buf;
2d21ac55
A
601}
602
603__private_extern__ void
604mcache_waiter_inc(mcache_t *cp)
605{
606 atomic_add_32(&cp->mc_waiter_cnt, 1);
607}
608
609__private_extern__ void
610mcache_waiter_dec(mcache_t *cp)
611{
612 atomic_add_32(&cp->mc_waiter_cnt, -1);
613}
614
615__private_extern__ boolean_t
616mcache_bkt_isempty(mcache_t *cp)
617{
618 /*
619 * This isn't meant to accurately tell whether there are
620 * any full buckets in the cache; it is simply a way to
621 * obtain "hints" about the state of the cache.
622 */
0a7de745 623 return cp->mc_full.bl_total == 0;
2d21ac55
A
624}
625
626/*
627 * Notify the slab layer about an event.
628 */
629static void
630mcache_notify(mcache_t *cp, u_int32_t event)
631{
0a7de745 632 if (cp->mc_slab_notify != NULL) {
2d21ac55 633 (*cp->mc_slab_notify)(cp->mc_private, event);
0a7de745 634 }
2d21ac55
A
635}
636
637/*
638 * Purge the cache and disable its buckets.
639 */
640static void
641mcache_purge(void *arg)
642{
643 mcache_t *cp = arg;
644
645 mcache_bkt_purge(cp);
646 /*
647 * We cannot simply call mcache_cache_bkt_enable() from here as
648 * a bucket resize may be in flight and we would cause the CPU
649 * layers of the cache to point to different sizes. Therefore,
650 * we simply increment the enable count so that during the next
651 * periodic cache update the buckets can be reenabled.
652 */
653 lck_mtx_lock_spin(&cp->mc_sync_lock);
654 cp->mc_enable_cnt++;
655 lck_mtx_unlock(&cp->mc_sync_lock);
2d21ac55
A
656}
657
658__private_extern__ boolean_t
fe8ab488 659mcache_purge_cache(mcache_t *cp, boolean_t async)
2d21ac55
A
660{
661 /*
662 * Purging a cache that has no per-CPU caches or is already
663 * in the process of being purged is rather pointless.
664 */
0a7de745
A
665 if (cp->mc_flags & MCF_NOCPUCACHE) {
666 return FALSE;
667 }
2d21ac55
A
668
669 lck_mtx_lock_spin(&cp->mc_sync_lock);
670 if (cp->mc_purge_cnt > 0) {
671 lck_mtx_unlock(&cp->mc_sync_lock);
0a7de745 672 return FALSE;
2d21ac55
A
673 }
674 cp->mc_purge_cnt++;
675 lck_mtx_unlock(&cp->mc_sync_lock);
676
0a7de745 677 if (async) {
fe8ab488 678 mcache_dispatch(mcache_purge, cp);
0a7de745 679 } else {
fe8ab488 680 mcache_purge(cp);
0a7de745 681 }
2d21ac55 682
0a7de745 683 return TRUE;
2d21ac55
A
684}
685
686/*
687 * Free a single object to a cache.
688 */
689__private_extern__ void
690mcache_free(mcache_t *cp, void *buf)
691{
692 ((mcache_obj_t *)buf)->obj_next = NULL;
693 mcache_free_ext(cp, (mcache_obj_t *)buf);
694}
695
696/*
697 * Free one or more objects to a cache.
698 */
699__private_extern__ void
700mcache_free_ext(mcache_t *cp, mcache_obj_t *list)
701{
702 mcache_cpu_t *ccp = MCACHE_CPU(cp);
703 mcache_bkttype_t *btp;
704 mcache_obj_t *nlist;
705 mcache_bkt_t *bkt;
706
0a7de745 707 if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL) {
6d2010ae 708 (*cp->mc_slab_log)(0, list, FALSE);
0a7de745 709 }
6d2010ae 710
2d21ac55 711 /* Invoke the slab layer audit callback if auditing is enabled */
0a7de745 712 if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL) {
2d21ac55 713 (*cp->mc_slab_audit)(cp->mc_private, list, FALSE);
0a7de745 714 }
2d21ac55
A
715
716 MCACHE_LOCK(&ccp->cc_lock);
717 for (;;) {
718 /*
719 * If there is space in the current CPU's filled bucket, put
720 * the object there and return once all objects are freed.
721 * Note the cast to unsigned integer takes care of the case
722 * where the bucket layer is disabled (when cc_objs is -1).
723 */
724 if ((unsigned int)ccp->cc_objs <
725 (unsigned int)ccp->cc_bktsize) {
726 /*
727 * Reverse the list while we place the object into the
728 * bucket; this effectively causes the most recently
729 * freed object(s) to be reused during allocation.
730 */
731 nlist = list->obj_next;
732 list->obj_next = (ccp->cc_objs == 0) ? NULL :
733 ccp->cc_filled->bkt_obj[ccp->cc_objs - 1];
734 ccp->cc_filled->bkt_obj[ccp->cc_objs++] = list;
735 ccp->cc_free++;
736
0a7de745 737 if ((list = nlist) != NULL) {
2d21ac55 738 continue;
0a7de745 739 }
2d21ac55
A
740
741 /* We are done; return to caller */
742 MCACHE_UNLOCK(&ccp->cc_lock);
743
744 /* If there is a waiter below, notify it */
0a7de745 745 if (cp->mc_waiter_cnt > 0) {
2d21ac55 746 mcache_notify(cp, MCN_RETRYALLOC);
0a7de745 747 }
2d21ac55
A
748 return;
749 }
750
751 /*
752 * The CPU's filled bucket is full. If the previous filled
753 * bucket was empty, exchange and try again.
754 */
755 if (ccp->cc_pobjs == 0) {
756 mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs);
757 continue;
758 }
759
760 /*
761 * If the bucket layer is disabled, free to slab. This can
762 * happen either because MCF_NOCPUCACHE is set, or because
763 * the bucket layer is currently being resized.
764 */
0a7de745 765 if (ccp->cc_bktsize == 0) {
2d21ac55 766 break;
0a7de745 767 }
2d21ac55
A
768
769 /*
770 * Both of the CPU's buckets are full; try to get an empty
771 * bucket from the bucket layer. Upon success, empty this
772 * CPU and place any full bucket into the full list.
773 */
cb323159 774 bkt = mcache_bkt_alloc(cp, &cp->mc_empty);
2d21ac55 775 if (bkt != NULL) {
0a7de745 776 if (ccp->cc_pfilled != NULL) {
2d21ac55
A
777 mcache_bkt_free(cp, &cp->mc_full,
778 ccp->cc_pfilled);
0a7de745 779 }
2d21ac55
A
780 mcache_cpu_refill(ccp, bkt, 0);
781 continue;
782 }
cb323159 783 btp = cp->cache_bkttype;
2d21ac55
A
784
785 /*
786 * We need an empty bucket to put our freed objects into
787 * but couldn't get an empty bucket from the bucket layer;
788 * attempt to allocate one. We do not want to block for
789 * allocation here, and if the bucket allocation fails
790 * we will simply fall through to the slab layer.
791 */
792 MCACHE_UNLOCK(&ccp->cc_lock);
793 bkt = mcache_alloc(btp->bt_cache, MCR_NOSLEEP);
794 MCACHE_LOCK(&ccp->cc_lock);
795
796 if (bkt != NULL) {
797 /*
798 * We have an empty bucket, but since we drop the
799 * CPU lock above, the cache's bucket size may have
800 * changed. If so, free the bucket and try again.
801 */
802 if (ccp->cc_bktsize != btp->bt_bktsize) {
803 MCACHE_UNLOCK(&ccp->cc_lock);
804 mcache_free(btp->bt_cache, bkt);
805 MCACHE_LOCK(&ccp->cc_lock);
806 continue;
807 }
808
cb323159
A
809 /*
810 * Store it in the bucket object since we'll
811 * need to refer to it during bucket destroy;
812 * we can't safely refer to cache_bkttype as
813 * the bucket lock may not be acquired then.
814 */
815 bkt->bkt_type = btp;
816
2d21ac55
A
817 /*
818 * We have an empty bucket of the right size;
819 * add it to the bucket layer and try again.
820 */
821 mcache_bkt_free(cp, &cp->mc_empty, bkt);
822 continue;
823 }
824
825 /*
826 * The bucket layer has no empty buckets; free the
827 * object(s) directly to the slab layer.
828 */
829 break;
830 }
831 MCACHE_UNLOCK(&ccp->cc_lock);
832
833 /* If there is a waiter below, notify it */
0a7de745 834 if (cp->mc_waiter_cnt > 0) {
2d21ac55 835 mcache_notify(cp, MCN_RETRYALLOC);
0a7de745 836 }
2d21ac55
A
837
838 /* Advise the slab layer to purge the object(s) */
839 (*cp->mc_slab_free)(cp->mc_private, list,
840 (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt);
841}
842
843/*
844 * Cache destruction routine.
845 */
846__private_extern__ void
847mcache_destroy(mcache_t *cp)
848{
849 void **pbuf;
850
851 MCACHE_LIST_LOCK();
852 LIST_REMOVE(cp, mc_list);
853 MCACHE_LIST_UNLOCK();
854
855 mcache_bkt_purge(cp);
856
857 /*
858 * This cache is dead; there should be no further transaction.
859 * If it's still invoked, make sure that it induces a fault.
860 */
861 cp->mc_slab_alloc = NULL;
862 cp->mc_slab_free = NULL;
863 cp->mc_slab_audit = NULL;
864
2d21ac55 865 lck_grp_free(cp->mc_bkt_lock_grp);
2d21ac55 866 lck_grp_free(cp->mc_cpu_lock_grp);
2d21ac55 867 lck_grp_free(cp->mc_sync_lock_grp);
2d21ac55
A
868
869 /*
870 * TODO: We need to destroy the zone here, but cannot do it
871 * because there is no such way to achieve that. Until then
872 * the memory allocated for the zone structure is leaked.
873 * Once it is achievable, uncomment these lines:
874 *
875 * if (cp->mc_slab_zone != NULL) {
876 * zdestroy(cp->mc_slab_zone);
877 * cp->mc_slab_zone = NULL;
878 * }
879 */
880
881 /* Get the original address since we're about to free it */
0a7de745 882 pbuf = (void **)((intptr_t)cp - sizeof(void *));
2d21ac55
A
883
884 zfree(mcache_zone, *pbuf);
885}
886
887/*
888 * Internal slab allocator used as a backend for simple caches. The current
889 * implementation uses the zone allocator for simplicity reasons.
890 */
891static unsigned int
5ba3f43e
A
892mcache_slab_alloc(void *arg, mcache_obj_t ***plist, unsigned int num,
893 int wait)
2d21ac55 894{
5ba3f43e 895#pragma unused(wait)
2d21ac55
A
896 mcache_t *cp = arg;
897 unsigned int need = num;
0a7de745 898 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof(u_int64_t));
2d21ac55
A
899 u_int32_t flags = cp->mc_flags;
900 void *buf, *base, **pbuf;
901 mcache_obj_t **list = *plist;
902
903 *list = NULL;
904
2d21ac55 905 for (;;) {
5ba3f43e 906 buf = zalloc(cp->mc_slab_zone);
0a7de745 907 if (buf == NULL) {
2d21ac55 908 break;
0a7de745 909 }
2d21ac55 910
5ba3f43e 911 /* Get the aligned base address for this object */
0a7de745 912 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
5ba3f43e 913 cp->mc_align);
2d21ac55
A
914
915 /*
916 * Wind back a pointer size from the aligned base and
917 * save the original address so we can free it later.
918 */
0a7de745 919 pbuf = (void **)((intptr_t)base - sizeof(void *));
2d21ac55
A
920 *pbuf = buf;
921
0a7de745 922 VERIFY(((intptr_t)base + cp->mc_bufsize) <=
5ba3f43e
A
923 ((intptr_t)buf + cp->mc_chunksize));
924
2d21ac55
A
925 /*
926 * If auditing is enabled, patternize the contents of
927 * the buffer starting from the 64-bit aligned base to
928 * the end of the buffer; the length is rounded up to
929 * the nearest 64-bit multiply; this is because we use
930 * 64-bit memory access to set/check the pattern.
931 */
6d2010ae 932 if (flags & MCF_DEBUG) {
2d21ac55
A
933 VERIFY(((intptr_t)base + rsize) <=
934 ((intptr_t)buf + cp->mc_chunksize));
935 mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize);
936 }
937
5ba3f43e
A
938 VERIFY(IS_P2ALIGNED(base, cp->mc_align));
939 *list = (mcache_obj_t *)base;
2d21ac55
A
940
941 (*list)->obj_next = NULL;
942 list = *plist = &(*list)->obj_next;
943
944 /* If we got them all, return to mcache */
0a7de745 945 if (--need == 0) {
2d21ac55 946 break;
0a7de745 947 }
2d21ac55
A
948 }
949
0a7de745 950 return num - need;
2d21ac55
A
951}
952
953/*
954 * Internal slab deallocator used as a backend for simple caches.
955 */
956static void
957mcache_slab_free(void *arg, mcache_obj_t *list, __unused boolean_t purged)
958{
959 mcache_t *cp = arg;
960 mcache_obj_t *nlist;
0a7de745 961 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof(u_int64_t));
2d21ac55
A
962 u_int32_t flags = cp->mc_flags;
963 void *base;
964 void **pbuf;
965
2d21ac55
A
966 for (;;) {
967 nlist = list->obj_next;
968 list->obj_next = NULL;
969
5ba3f43e
A
970 base = list;
971 VERIFY(IS_P2ALIGNED(base, cp->mc_align));
2d21ac55
A
972
973 /* Get the original address since we're about to free it */
0a7de745 974 pbuf = (void **)((intptr_t)base - sizeof(void *));
2d21ac55 975
5ba3f43e
A
976 VERIFY(((intptr_t)base + cp->mc_bufsize) <=
977 ((intptr_t)*pbuf + cp->mc_chunksize));
978
6d2010ae 979 if (flags & MCF_DEBUG) {
2d21ac55
A
980 VERIFY(((intptr_t)base + rsize) <=
981 ((intptr_t)*pbuf + cp->mc_chunksize));
5ba3f43e 982 mcache_audit_free_verify(NULL, base, 0, rsize);
2d21ac55
A
983 }
984
985 /* Free it to zone */
2d21ac55
A
986 zfree(cp->mc_slab_zone, *pbuf);
987
988 /* No more objects to free; return to mcache */
0a7de745 989 if ((list = nlist) == NULL) {
2d21ac55 990 break;
0a7de745 991 }
2d21ac55
A
992 }
993}
994
995/*
996 * Internal slab auditor for simple caches.
997 */
998static void
999mcache_slab_audit(void *arg, mcache_obj_t *list, boolean_t alloc)
1000{
1001 mcache_t *cp = arg;
0a7de745 1002 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof(u_int64_t));
2d21ac55
A
1003 void *base, **pbuf;
1004
2d21ac55
A
1005 while (list != NULL) {
1006 mcache_obj_t *next = list->obj_next;
1007
5ba3f43e
A
1008 base = list;
1009 VERIFY(IS_P2ALIGNED(base, cp->mc_align));
2d21ac55
A
1010
1011 /* Get the original address */
0a7de745 1012 pbuf = (void **)((intptr_t)base - sizeof(void *));
2d21ac55
A
1013
1014 VERIFY(((intptr_t)base + rsize) <=
1015 ((intptr_t)*pbuf + cp->mc_chunksize));
1016
0a7de745 1017 if (!alloc) {
2d21ac55 1018 mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize);
0a7de745 1019 } else {
5ba3f43e 1020 mcache_audit_free_verify_set(NULL, base, 0, rsize);
0a7de745 1021 }
2d21ac55
A
1022
1023 list = list->obj_next = next;
1024 }
1025}
1026
1027/*
1028 * Refill the CPU's filled bucket with bkt and save the previous one.
1029 */
1030static void
1031mcache_cpu_refill(mcache_cpu_t *ccp, mcache_bkt_t *bkt, int objs)
1032{
1033 ASSERT((ccp->cc_filled == NULL && ccp->cc_objs == -1) ||
1034 (ccp->cc_filled && ccp->cc_objs + objs == ccp->cc_bktsize));
1035 ASSERT(ccp->cc_bktsize > 0);
1036
1037 ccp->cc_pfilled = ccp->cc_filled;
1038 ccp->cc_pobjs = ccp->cc_objs;
1039 ccp->cc_filled = bkt;
1040 ccp->cc_objs = objs;
1041}
1042
1043/*
1044 * Allocate a bucket from the bucket layer.
1045 */
1046static mcache_bkt_t *
cb323159 1047mcache_bkt_alloc(mcache_t *cp, mcache_bktlist_t *blp)
2d21ac55
A
1048{
1049 mcache_bkt_t *bkt;
1050
1051 if (!MCACHE_LOCK_TRY(&cp->mc_bkt_lock)) {
1052 /*
1053 * The bucket layer lock is held by another CPU; increase
1054 * the contention count so that we can later resize the
1055 * bucket size accordingly.
1056 */
1057 MCACHE_LOCK(&cp->mc_bkt_lock);
1058 cp->mc_bkt_contention++;
1059 }
1060
1061 if ((bkt = blp->bl_list) != NULL) {
1062 blp->bl_list = bkt->bkt_next;
0a7de745 1063 if (--blp->bl_total < blp->bl_min) {
2d21ac55 1064 blp->bl_min = blp->bl_total;
0a7de745 1065 }
2d21ac55
A
1066 blp->bl_alloc++;
1067 }
1068
2d21ac55
A
1069 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1070
0a7de745 1071 return bkt;
2d21ac55
A
1072}
1073
1074/*
1075 * Free a bucket to the bucket layer.
1076 */
1077static void
1078mcache_bkt_free(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkt_t *bkt)
1079{
1080 MCACHE_LOCK(&cp->mc_bkt_lock);
1081
1082 bkt->bkt_next = blp->bl_list;
1083 blp->bl_list = bkt;
1084 blp->bl_total++;
1085
1086 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1087}
1088
1089/*
1090 * Enable the bucket layer of a cache.
1091 */
1092static void
1093mcache_cache_bkt_enable(mcache_t *cp)
1094{
1095 mcache_cpu_t *ccp;
f427ee49 1096 unsigned int cpu;
2d21ac55 1097
0a7de745 1098 if (cp->mc_flags & MCF_NOCPUCACHE) {
2d21ac55 1099 return;
0a7de745 1100 }
2d21ac55
A
1101
1102 for (cpu = 0; cpu < ncpu; cpu++) {
1103 ccp = &cp->mc_cpu[cpu];
1104 MCACHE_LOCK(&ccp->cc_lock);
1105 ccp->cc_bktsize = cp->cache_bkttype->bt_bktsize;
1106 MCACHE_UNLOCK(&ccp->cc_lock);
1107 }
1108}
1109
1110/*
1111 * Purge all buckets from a cache and disable its bucket layer.
1112 */
1113static void
1114mcache_bkt_purge(mcache_t *cp)
1115{
1116 mcache_cpu_t *ccp;
1117 mcache_bkt_t *bp, *pbp;
f427ee49
A
1118 int objs, pobjs;
1119 unsigned int cpu;
2d21ac55
A
1120
1121 for (cpu = 0; cpu < ncpu; cpu++) {
1122 ccp = &cp->mc_cpu[cpu];
1123
1124 MCACHE_LOCK(&ccp->cc_lock);
1125
2d21ac55
A
1126 bp = ccp->cc_filled;
1127 pbp = ccp->cc_pfilled;
1128 objs = ccp->cc_objs;
1129 pobjs = ccp->cc_pobjs;
1130 ccp->cc_filled = NULL;
1131 ccp->cc_pfilled = NULL;
1132 ccp->cc_objs = -1;
1133 ccp->cc_pobjs = -1;
1134 ccp->cc_bktsize = 0;
1135
1136 MCACHE_UNLOCK(&ccp->cc_lock);
1137
0a7de745 1138 if (bp != NULL) {
cb323159 1139 mcache_bkt_destroy(cp, bp, objs);
0a7de745
A
1140 }
1141 if (pbp != NULL) {
cb323159 1142 mcache_bkt_destroy(cp, pbp, pobjs);
0a7de745 1143 }
2d21ac55
A
1144 }
1145
5ba3f43e 1146 mcache_bkt_ws_zero(cp);
2d21ac55
A
1147 mcache_bkt_ws_reap(cp);
1148}
1149
1150/*
1151 * Free one or more objects in the bucket to the slab layer,
1152 * and also free the bucket itself.
1153 */
1154static void
cb323159 1155mcache_bkt_destroy(mcache_t *cp, mcache_bkt_t *bkt, int nobjs)
2d21ac55
A
1156{
1157 if (nobjs > 0) {
1158 mcache_obj_t *top = bkt->bkt_obj[nobjs - 1];
1159
6d2010ae 1160 if (cp->mc_flags & MCF_DEBUG) {
2d21ac55
A
1161 mcache_obj_t *o = top;
1162 int cnt = 0;
1163
1164 /*
1165 * Verify that the chain of objects in the bucket is
1166 * valid. Any mismatch here means a mistake when the
1167 * object(s) were freed to the CPU layer, so we panic.
1168 */
1169 while (o != NULL) {
1170 o = o->obj_next;
1171 ++cnt;
1172 }
1173 if (cnt != nobjs) {
1174 panic("mcache_bkt_destroy: %s cp %p corrupted "
1175 "list in bkt %p (nobjs %d actual %d)\n",
1176 cp->mc_name, (void *)cp, (void *)bkt,
1177 nobjs, cnt);
cb323159
A
1178 /* NOTREACHED */
1179 __builtin_unreachable();
2d21ac55
A
1180 }
1181 }
1182
1183 /* Advise the slab layer to purge the object(s) */
1184 (*cp->mc_slab_free)(cp->mc_private, top,
1185 (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt);
1186 }
cb323159 1187 mcache_free(bkt->bkt_type->bt_cache, bkt);
2d21ac55
A
1188}
1189
1190/*
1191 * Update the bucket layer working set statistics.
1192 */
1193static void
1194mcache_bkt_ws_update(mcache_t *cp)
1195{
1196 MCACHE_LOCK(&cp->mc_bkt_lock);
1197
1198 cp->mc_full.bl_reaplimit = cp->mc_full.bl_min;
1199 cp->mc_full.bl_min = cp->mc_full.bl_total;
1200 cp->mc_empty.bl_reaplimit = cp->mc_empty.bl_min;
1201 cp->mc_empty.bl_min = cp->mc_empty.bl_total;
1202
1203 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1204}
1205
5ba3f43e
A
1206/*
1207 * Mark everything as eligible for reaping (working set is zero).
1208 */
1209static void
1210mcache_bkt_ws_zero(mcache_t *cp)
1211{
1212 MCACHE_LOCK(&cp->mc_bkt_lock);
1213
1214 cp->mc_full.bl_reaplimit = cp->mc_full.bl_total;
1215 cp->mc_full.bl_min = cp->mc_full.bl_total;
1216 cp->mc_empty.bl_reaplimit = cp->mc_empty.bl_total;
1217 cp->mc_empty.bl_min = cp->mc_empty.bl_total;
1218
1219 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1220}
1221
2d21ac55
A
1222/*
1223 * Reap all buckets that are beyond the working set.
1224 */
1225static void
1226mcache_bkt_ws_reap(mcache_t *cp)
1227{
1228 long reap;
1229 mcache_bkt_t *bkt;
2d21ac55
A
1230
1231 reap = MIN(cp->mc_full.bl_reaplimit, cp->mc_full.bl_min);
1232 while (reap-- &&
cb323159
A
1233 (bkt = mcache_bkt_alloc(cp, &cp->mc_full)) != NULL) {
1234 mcache_bkt_destroy(cp, bkt, bkt->bkt_type->bt_bktsize);
0a7de745 1235 }
2d21ac55
A
1236
1237 reap = MIN(cp->mc_empty.bl_reaplimit, cp->mc_empty.bl_min);
1238 while (reap-- &&
cb323159
A
1239 (bkt = mcache_bkt_alloc(cp, &cp->mc_empty)) != NULL) {
1240 mcache_bkt_destroy(cp, bkt, 0);
0a7de745 1241 }
2d21ac55
A
1242}
1243
1244static void
fe8ab488
A
1245mcache_reap_timeout(thread_call_param_t dummy __unused,
1246 thread_call_param_t arg)
2d21ac55
A
1247{
1248 volatile UInt32 *flag = arg;
1249
1250 ASSERT(flag == &mcache_reaping);
1251
1252 *flag = 0;
1253}
1254
1255static void
1256mcache_reap_done(void *flag)
1257{
fe8ab488
A
1258 uint64_t deadline, leeway;
1259
1260 clock_interval_to_deadline(mcache_reap_interval, NSEC_PER_SEC,
1261 &deadline);
1262 clock_interval_to_absolutetime_interval(mcache_reap_interval_leeway,
1263 NSEC_PER_SEC, &leeway);
1264 thread_call_enter_delayed_with_leeway(mcache_reap_tcall, flag,
1265 deadline, leeway, THREAD_CALL_DELAY_LEEWAY);
2d21ac55
A
1266}
1267
1268static void
1269mcache_reap_start(void *arg)
1270{
1271 UInt32 *flag = arg;
1272
1273 ASSERT(flag == &mcache_reaping);
1274
1275 mcache_applyall(mcache_cache_reap);
1276 mcache_dispatch(mcache_reap_done, flag);
1277}
1278
1279__private_extern__ void
1280mcache_reap(void)
1281{
1282 UInt32 *flag = &mcache_reaping;
1283
1284 if (mcache_llock_owner == current_thread() ||
0a7de745 1285 !OSCompareAndSwap(0, 1, flag)) {
2d21ac55 1286 return;
0a7de745 1287 }
2d21ac55
A
1288
1289 mcache_dispatch(mcache_reap_start, flag);
1290}
1291
5ba3f43e
A
1292__private_extern__ void
1293mcache_reap_now(mcache_t *cp, boolean_t purge)
1294{
1295 if (purge) {
1296 mcache_bkt_purge(cp);
1297 mcache_cache_bkt_enable(cp);
1298 } else {
1299 mcache_bkt_ws_zero(cp);
1300 mcache_bkt_ws_reap(cp);
1301 }
1302}
1303
2d21ac55
A
1304static void
1305mcache_cache_reap(mcache_t *cp)
1306{
1307 mcache_bkt_ws_reap(cp);
1308}
1309
1310/*
1311 * Performs period maintenance on a cache.
1312 */
1313static void
1314mcache_cache_update(mcache_t *cp)
1315{
1316 int need_bkt_resize = 0;
1317 int need_bkt_reenable = 0;
1318
c3c9b80d 1319 lck_mtx_assert(&mcache_llock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
1320
1321 mcache_bkt_ws_update(cp);
1322
1323 /*
1324 * Cache resize and post-purge reenable are mutually exclusive.
1325 * If the cache was previously purged, there is no point of
1326 * increasing the bucket size as there was an indication of
1327 * memory pressure on the system.
1328 */
1329 lck_mtx_lock_spin(&cp->mc_sync_lock);
0a7de745 1330 if (!(cp->mc_flags & MCF_NOCPUCACHE) && cp->mc_enable_cnt) {
2d21ac55 1331 need_bkt_reenable = 1;
0a7de745 1332 }
2d21ac55
A
1333 lck_mtx_unlock(&cp->mc_sync_lock);
1334
1335 MCACHE_LOCK(&cp->mc_bkt_lock);
1336 /*
1337 * If the contention count is greater than the threshold, and if
1338 * we are not already at the maximum bucket size, increase it.
1339 * Otherwise, if this cache was previously purged by the user
1340 * then we simply reenable it.
1341 */
1342 if ((unsigned int)cp->mc_chunksize < cp->cache_bkttype->bt_maxbuf &&
1343 (int)(cp->mc_bkt_contention - cp->mc_bkt_contention_prev) >
0a7de745 1344 mcache_bkt_contention && !need_bkt_reenable) {
2d21ac55 1345 need_bkt_resize = 1;
0a7de745 1346 }
2d21ac55 1347
0a7de745 1348 cp->mc_bkt_contention_prev = cp->mc_bkt_contention;
2d21ac55
A
1349 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1350
0a7de745 1351 if (need_bkt_resize) {
2d21ac55 1352 mcache_dispatch(mcache_cache_bkt_resize, cp);
0a7de745 1353 } else if (need_bkt_reenable) {
2d21ac55 1354 mcache_dispatch(mcache_cache_enable, cp);
0a7de745 1355 }
2d21ac55
A
1356}
1357
1358/*
1359 * Recompute a cache's bucket size. This is an expensive operation
1360 * and should not be done frequently; larger buckets provide for a
1361 * higher transfer rate with the bucket while smaller buckets reduce
1362 * the memory consumption.
1363 */
1364static void
1365mcache_cache_bkt_resize(void *arg)
1366{
1367 mcache_t *cp = arg;
1368 mcache_bkttype_t *btp = cp->cache_bkttype;
1369
1370 if ((unsigned int)cp->mc_chunksize < btp->bt_maxbuf) {
1371 mcache_bkt_purge(cp);
1372
1373 /*
1374 * Upgrade to the next bucket type with larger bucket size;
1375 * temporarily set the previous contention snapshot to a
1376 * negative number to prevent unnecessary resize request.
1377 */
1378 MCACHE_LOCK(&cp->mc_bkt_lock);
1379 cp->cache_bkttype = ++btp;
0a7de745 1380 cp->mc_bkt_contention_prev = cp->mc_bkt_contention + INT_MAX;
2d21ac55
A
1381 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1382
1383 mcache_cache_enable(cp);
1384 }
1385}
1386
1387/*
1388 * Reenable a previously disabled cache due to purge.
1389 */
1390static void
1391mcache_cache_enable(void *arg)
1392{
1393 mcache_t *cp = arg;
1394
1395 lck_mtx_lock_spin(&cp->mc_sync_lock);
1396 cp->mc_purge_cnt = 0;
1397 cp->mc_enable_cnt = 0;
1398 lck_mtx_unlock(&cp->mc_sync_lock);
1399
1400 mcache_cache_bkt_enable(cp);
1401}
1402
1403static void
1404mcache_update_timeout(__unused void *arg)
1405{
fe8ab488
A
1406 uint64_t deadline, leeway;
1407
1408 clock_interval_to_deadline(mcache_reap_interval, NSEC_PER_SEC,
1409 &deadline);
1410 clock_interval_to_absolutetime_interval(mcache_reap_interval_leeway,
1411 NSEC_PER_SEC, &leeway);
1412 thread_call_enter_delayed_with_leeway(mcache_update_tcall, NULL,
1413 deadline, leeway, THREAD_CALL_DELAY_LEEWAY);
2d21ac55
A
1414}
1415
1416static void
fe8ab488
A
1417mcache_update(thread_call_param_t arg __unused,
1418 thread_call_param_t dummy __unused)
2d21ac55
A
1419{
1420 mcache_applyall(mcache_cache_update);
fe8ab488 1421 mcache_update_timeout(NULL);
2d21ac55
A
1422}
1423
1424static void
1425mcache_applyall(void (*func)(mcache_t *))
1426{
1427 mcache_t *cp;
1428
1429 MCACHE_LIST_LOCK();
1430 LIST_FOREACH(cp, &mcache_head, mc_list) {
1431 func(cp);
1432 }
1433 MCACHE_LIST_UNLOCK();
1434}
1435
1436static void
1437mcache_dispatch(void (*func)(void *), void *arg)
1438{
1439 ASSERT(func != NULL);
0a7de745 1440 timeout(func, arg, hz / 1000);
2d21ac55
A
1441}
1442
1443__private_extern__ void
39236c6e
A
1444mcache_buffer_log(mcache_audit_t *mca, void *addr, mcache_t *cp,
1445 struct timeval *base_ts)
2d21ac55 1446{
cb323159 1447 struct timeval now, base = { .tv_sec = 0, .tv_usec = 0 };
39236c6e 1448 void *stack[MCACHE_STACK_DEPTH + 1];
fe8ab488
A
1449 struct mca_trn *transaction;
1450
1451 transaction = &mca->mca_trns[mca->mca_next_trn];
39236c6e 1452
2d21ac55
A
1453 mca->mca_addr = addr;
1454 mca->mca_cache = cp;
fe8ab488
A
1455
1456 transaction->mca_thread = current_thread();
1457
0a7de745 1458 bzero(stack, sizeof(stack));
f427ee49 1459 transaction->mca_depth = (uint16_t)OSBacktrace(stack, MCACHE_STACK_DEPTH + 1) - 1;
fe8ab488 1460 bcopy(&stack[1], transaction->mca_stack,
0a7de745 1461 sizeof(transaction->mca_stack));
39236c6e 1462
39236c6e 1463 microuptime(&now);
0a7de745 1464 if (base_ts != NULL) {
39236c6e 1465 base = *base_ts;
0a7de745 1466 }
39236c6e 1467 /* tstamp is in ms relative to base_ts */
fe8ab488 1468 transaction->mca_tstamp = ((now.tv_usec - base.tv_usec) / 1000);
0a7de745 1469 if ((now.tv_sec - base.tv_sec) > 0) {
fe8ab488 1470 transaction->mca_tstamp += ((now.tv_sec - base.tv_sec) * 1000);
0a7de745 1471 }
fe8ab488
A
1472
1473 mca->mca_next_trn =
0a7de745 1474 (mca->mca_next_trn + 1) % mca_trn_max;
2d21ac55
A
1475}
1476
f427ee49
A
1477/*
1478 * N.B.: mcache_set_pattern(), mcache_verify_pattern() and
1479 * mcache_verify_set_pattern() are marked as noinline to prevent the
1480 * compiler from aliasing pointers when they are inlined inside the callers
1481 * (e.g. mcache_audit_free_verify_set()) which would be undefined behavior.
1482 */
1483__private_extern__ OS_NOINLINE void
2d21ac55
A
1484mcache_set_pattern(u_int64_t pattern, void *buf_arg, size_t size)
1485{
316670eb 1486 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size));
2d21ac55
A
1487 u_int64_t *buf = (u_int64_t *)buf_arg;
1488
0a7de745
A
1489 VERIFY(IS_P2ALIGNED(buf_arg, sizeof(u_int64_t)));
1490 VERIFY(IS_P2ALIGNED(size, sizeof(u_int64_t)));
2d21ac55 1491
0a7de745 1492 while (buf < buf_end) {
2d21ac55 1493 *buf++ = pattern;
0a7de745 1494 }
2d21ac55
A
1495}
1496
f427ee49 1497__private_extern__ OS_NOINLINE void *
2d21ac55
A
1498mcache_verify_pattern(u_int64_t pattern, void *buf_arg, size_t size)
1499{
316670eb 1500 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size));
2d21ac55
A
1501 u_int64_t *buf;
1502
0a7de745
A
1503 VERIFY(IS_P2ALIGNED(buf_arg, sizeof(u_int64_t)));
1504 VERIFY(IS_P2ALIGNED(size, sizeof(u_int64_t)));
2d21ac55
A
1505
1506 for (buf = buf_arg; buf < buf_end; buf++) {
0a7de745
A
1507 if (*buf != pattern) {
1508 return buf;
1509 }
2d21ac55 1510 }
0a7de745 1511 return NULL;
2d21ac55
A
1512}
1513
f427ee49 1514OS_NOINLINE static void *
2d21ac55
A
1515mcache_verify_set_pattern(u_int64_t old, u_int64_t new, void *buf_arg,
1516 size_t size)
1517{
316670eb 1518 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size));
2d21ac55
A
1519 u_int64_t *buf;
1520
0a7de745
A
1521 VERIFY(IS_P2ALIGNED(buf_arg, sizeof(u_int64_t)));
1522 VERIFY(IS_P2ALIGNED(size, sizeof(u_int64_t)));
2d21ac55
A
1523
1524 for (buf = buf_arg; buf < buf_end; buf++) {
1525 if (*buf != old) {
1526 mcache_set_pattern(old, buf_arg,
1527 (uintptr_t)buf - (uintptr_t)buf_arg);
0a7de745 1528 return buf;
2d21ac55
A
1529 }
1530 *buf = new;
1531 }
0a7de745 1532 return NULL;
2d21ac55
A
1533}
1534
1535__private_extern__ void
1536mcache_audit_free_verify(mcache_audit_t *mca, void *base, size_t offset,
1537 size_t size)
1538{
1539 void *addr;
1540 u_int64_t *oaddr64;
1541 mcache_obj_t *next;
1542
1543 addr = (void *)((uintptr_t)base + offset);
1544 next = ((mcache_obj_t *)addr)->obj_next;
1545
1546 /* For the "obj_next" pointer in the buffer */
0a7de745 1547 oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof(u_int64_t));
2d21ac55
A
1548 *oaddr64 = MCACHE_FREE_PATTERN;
1549
1550 if ((oaddr64 = mcache_verify_pattern(MCACHE_FREE_PATTERN,
1551 (caddr_t)base, size)) != NULL) {
1552 mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base,
1553 (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64);
1554 /* NOTREACHED */
1555 }
1556 ((mcache_obj_t *)addr)->obj_next = next;
1557}
1558
1559__private_extern__ void
1560mcache_audit_free_verify_set(mcache_audit_t *mca, void *base, size_t offset,
1561 size_t size)
1562{
1563 void *addr;
1564 u_int64_t *oaddr64;
1565 mcache_obj_t *next;
1566
1567 addr = (void *)((uintptr_t)base + offset);
1568 next = ((mcache_obj_t *)addr)->obj_next;
1569
1570 /* For the "obj_next" pointer in the buffer */
0a7de745 1571 oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof(u_int64_t));
2d21ac55
A
1572 *oaddr64 = MCACHE_FREE_PATTERN;
1573
1574 if ((oaddr64 = mcache_verify_set_pattern(MCACHE_FREE_PATTERN,
1575 MCACHE_UNINITIALIZED_PATTERN, (caddr_t)base, size)) != NULL) {
1576 mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base,
1577 (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64);
1578 /* NOTREACHED */
1579 }
1580 ((mcache_obj_t *)addr)->obj_next = next;
1581}
1582
b0d623f7 1583#undef panic
2d21ac55 1584
0a7de745 1585#define DUMP_TRN_FMT() \
fe8ab488
A
1586 "%s transaction thread %p saved PC stack (%d deep):\n" \
1587 "\t%p, %p, %p, %p, %p, %p, %p, %p\n" \
1588 "\t%p, %p, %p, %p, %p, %p, %p, %p\n"
1589
0a7de745 1590#define DUMP_TRN_FIELDS(s, x) \
fe8ab488
A
1591 s, \
1592 mca->mca_trns[x].mca_thread, mca->mca_trns[x].mca_depth, \
1593 mca->mca_trns[x].mca_stack[0], mca->mca_trns[x].mca_stack[1], \
1594 mca->mca_trns[x].mca_stack[2], mca->mca_trns[x].mca_stack[3], \
1595 mca->mca_trns[x].mca_stack[4], mca->mca_trns[x].mca_stack[5], \
1596 mca->mca_trns[x].mca_stack[6], mca->mca_trns[x].mca_stack[7], \
1597 mca->mca_trns[x].mca_stack[8], mca->mca_trns[x].mca_stack[9], \
1598 mca->mca_trns[x].mca_stack[10], mca->mca_trns[x].mca_stack[11], \
1599 mca->mca_trns[x].mca_stack[12], mca->mca_trns[x].mca_stack[13], \
1600 mca->mca_trns[x].mca_stack[14], mca->mca_trns[x].mca_stack[15]
1601
0a7de745
A
1602#define MCA_TRN_LAST ((mca->mca_next_trn + mca_trn_max) % mca_trn_max)
1603#define MCA_TRN_PREV ((mca->mca_next_trn + mca_trn_max - 1) % mca_trn_max)
fe8ab488 1604
2d21ac55 1605__private_extern__ char *
c3c9b80d 1606mcache_dump_mca(char buf[static DUMP_MCA_BUF_SIZE], mcache_audit_t *mca)
2d21ac55 1607{
c3c9b80d 1608 snprintf(buf, DUMP_MCA_BUF_SIZE,
fe8ab488
A
1609 "mca %p: addr %p, cache %p (%s) nxttrn %d\n"
1610 DUMP_TRN_FMT()
1611 DUMP_TRN_FMT(),
1612
2d21ac55
A
1613 mca, mca->mca_addr, mca->mca_cache,
1614 mca->mca_cache ? mca->mca_cache->mc_name : "?",
fe8ab488
A
1615 mca->mca_next_trn,
1616
1617 DUMP_TRN_FIELDS("last", MCA_TRN_LAST),
1618 DUMP_TRN_FIELDS("previous", MCA_TRN_PREV));
2d21ac55 1619
c3c9b80d 1620 return buf;
2d21ac55
A
1621}
1622
1623__private_extern__ void
1624mcache_audit_panic(mcache_audit_t *mca, void *addr, size_t offset,
1625 int64_t expected, int64_t got)
1626{
c3c9b80d
A
1627 char buf[DUMP_MCA_BUF_SIZE];
1628
2d21ac55
A
1629 if (mca == NULL) {
1630 panic("mcache_audit: buffer %p modified after free at "
1631 "offset 0x%lx (0x%llx instead of 0x%llx)\n", addr,
1632 offset, got, expected);
1633 /* NOTREACHED */
cb323159 1634 __builtin_unreachable();
2d21ac55
A
1635 }
1636
1637 panic("mcache_audit: buffer %p modified after free at offset 0x%lx "
1638 "(0x%llx instead of 0x%llx)\n%s\n",
c3c9b80d 1639 addr, offset, got, expected, mcache_dump_mca(buf, mca));
2d21ac55 1640 /* NOTREACHED */
cb323159 1641 __builtin_unreachable();
2d21ac55
A
1642}
1643
cb323159 1644__attribute__((noinline, cold, not_tail_called, noreturn))
2d21ac55
A
1645__private_extern__ int
1646assfail(const char *a, const char *f, int l)
1647{
1648 panic("assertion failed: %s, file: %s, line: %d", a, f, l);
cb323159
A
1649 /* NOTREACHED */
1650 __builtin_unreachable();
2d21ac55 1651}