]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/mcache.c
f3570ae41aef6d7477bc7c1e449054cc6d2830ec
[apple/xnu.git] / bsd / kern / mcache.c
1 /*
2 * Copyright (c) 2006-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Memory allocator with per-CPU caching, derived from the kmem magazine
31 * concept and implementation as described in the following paper:
32 * http://www.usenix.org/events/usenix01/full_papers/bonwick/bonwick.pdf
33 * That implementation is Copyright 2006 Sun Microsystems, Inc. All rights
34 * reserved. Use is subject to license terms.
35 *
36 * There are several major differences between this and the original kmem
37 * magazine: this derivative implementation allows for multiple objects to
38 * be allocated and freed from/to the object cache in one call; in addition,
39 * it provides for better flexibility where the user is allowed to define
40 * its own slab allocator (instead of the default zone allocator). Finally,
41 * no object construction/destruction takes place at the moment, although
42 * this could be added in future to improve efficiency.
43 */
44
45 #include <sys/param.h>
46 #include <sys/types.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/queue.h>
50 #include <sys/kernel.h>
51 #include <sys/systm.h>
52
53 #include <kern/debug.h>
54 #include <kern/zalloc.h>
55 #include <kern/cpu_number.h>
56 #include <kern/locks.h>
57
58 #include <libkern/libkern.h>
59 #include <libkern/OSAtomic.h>
60 #include <libkern/OSDebug.h>
61
62 #include <mach/vm_param.h>
63 #include <machine/limits.h>
64 #include <machine/machine_routines.h>
65
66 #include <string.h>
67
68 #include <sys/mcache.h>
69
70 #define MCACHE_SIZE(n) \
71 ((size_t)(&((mcache_t *)0)->mc_cpu[n]))
72
73 /* Allocate extra in case we need to manually align the pointer */
74 #define MCACHE_ALLOC_SIZE \
75 (sizeof (void *) + MCACHE_SIZE(ncpu) + CPU_CACHE_SIZE)
76
77 #define MCACHE_CPU(c) \
78 (mcache_cpu_t *)((void *)((char *)(c) + MCACHE_SIZE(cpu_number())))
79
80 /*
81 * MCACHE_LIST_LOCK() and MCACHE_LIST_UNLOCK() are macros used
82 * to serialize accesses to the global list of caches in the system.
83 * They also record the thread currently running in the critical
84 * section, so that we can avoid recursive requests to reap the
85 * caches when memory runs low.
86 */
87 #define MCACHE_LIST_LOCK() { \
88 lck_mtx_lock(mcache_llock); \
89 mcache_llock_owner = current_thread(); \
90 }
91
92 #define MCACHE_LIST_UNLOCK() { \
93 mcache_llock_owner = NULL; \
94 lck_mtx_unlock(mcache_llock); \
95 }
96
97 #define MCACHE_LOCK(l) lck_mtx_lock(l)
98 #define MCACHE_UNLOCK(l) lck_mtx_unlock(l)
99 #define MCACHE_LOCK_TRY(l) lck_mtx_try_lock(l)
100
101 static int ncpu;
102 static lck_mtx_t *mcache_llock;
103 static struct thread *mcache_llock_owner;
104 static lck_attr_t *mcache_llock_attr;
105 static lck_grp_t *mcache_llock_grp;
106 static lck_grp_attr_t *mcache_llock_grp_attr;
107 static struct zone *mcache_zone;
108 static unsigned int mcache_reap_interval;
109 static UInt32 mcache_reaping;
110 static int mcache_ready;
111 static int mcache_updating;
112
113 static int mcache_bkt_contention = 3;
114 #if DEBUG
115 static unsigned int mcache_flags = MCF_DEBUG;
116 #else
117 static unsigned int mcache_flags = 0;
118 #endif
119
120 #define DUMP_MCA_BUF_SIZE 512
121 static char *mca_dump_buf;
122
123 static mcache_bkttype_t mcache_bkttype[] = {
124 { 1, 4096, 32768, NULL },
125 { 3, 2048, 16384, NULL },
126 { 7, 1024, 12288, NULL },
127 { 15, 256, 8192, NULL },
128 { 31, 64, 4096, NULL },
129 { 47, 0, 2048, NULL },
130 { 63, 0, 1024, NULL },
131 { 95, 0, 512, NULL },
132 { 143, 0, 256, NULL },
133 { 165, 0, 0, NULL },
134 };
135
136 static mcache_t *mcache_create_common(const char *, size_t, size_t,
137 mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_logfn_t,
138 mcache_notifyfn_t, void *, u_int32_t, int, int);
139 static unsigned int mcache_slab_alloc(void *, mcache_obj_t ***,
140 unsigned int, int);
141 static void mcache_slab_free(void *, mcache_obj_t *, boolean_t);
142 static void mcache_slab_audit(void *, mcache_obj_t *, boolean_t);
143 static void mcache_cpu_refill(mcache_cpu_t *, mcache_bkt_t *, int);
144 static mcache_bkt_t *mcache_bkt_alloc(mcache_t *, mcache_bktlist_t *,
145 mcache_bkttype_t **);
146 static void mcache_bkt_free(mcache_t *, mcache_bktlist_t *, mcache_bkt_t *);
147 static void mcache_cache_bkt_enable(mcache_t *);
148 static void mcache_bkt_purge(mcache_t *);
149 static void mcache_bkt_destroy(mcache_t *, mcache_bkttype_t *,
150 mcache_bkt_t *, int);
151 static void mcache_bkt_ws_update(mcache_t *);
152 static void mcache_bkt_ws_reap(mcache_t *);
153 static void mcache_dispatch(void (*)(void *), void *);
154 static void mcache_cache_reap(mcache_t *);
155 static void mcache_cache_update(mcache_t *);
156 static void mcache_cache_bkt_resize(void *);
157 static void mcache_cache_enable(void *);
158 static void mcache_update(void *);
159 static void mcache_update_timeout(void *);
160 static void mcache_applyall(void (*)(mcache_t *));
161 static void mcache_reap_start(void *);
162 static void mcache_reap_done(void *);
163 static void mcache_reap_timeout(void *);
164 static void mcache_notify(mcache_t *, u_int32_t);
165 static void mcache_purge(void *);
166
167 static LIST_HEAD(, mcache) mcache_head;
168 mcache_t *mcache_audit_cache;
169
170 /*
171 * Initialize the framework; this is currently called as part of BSD init.
172 */
173 __private_extern__ void
174 mcache_init(void)
175 {
176 mcache_bkttype_t *btp;
177 unsigned int i;
178 char name[32];
179
180 ncpu = ml_get_max_cpus();
181
182 mcache_llock_grp_attr = lck_grp_attr_alloc_init();
183 mcache_llock_grp = lck_grp_alloc_init("mcache.list",
184 mcache_llock_grp_attr);
185 mcache_llock_attr = lck_attr_alloc_init();
186 mcache_llock = lck_mtx_alloc_init(mcache_llock_grp, mcache_llock_attr);
187
188 mcache_zone = zinit(MCACHE_ALLOC_SIZE, 256 * MCACHE_ALLOC_SIZE,
189 PAGE_SIZE, "mcache");
190 if (mcache_zone == NULL)
191 panic("mcache_init: failed to allocate mcache zone\n");
192 zone_change(mcache_zone, Z_CALLERACCT, FALSE);
193
194 LIST_INIT(&mcache_head);
195
196 for (i = 0; i < sizeof (mcache_bkttype) / sizeof (*btp); i++) {
197 btp = &mcache_bkttype[i];
198 (void) snprintf(name, sizeof (name), "bkt_%d",
199 btp->bt_bktsize);
200 btp->bt_cache = mcache_create(name,
201 (btp->bt_bktsize + 1) * sizeof (void *), 0, 0, MCR_SLEEP);
202 }
203
204 PE_parse_boot_argn("mcache_flags", &mcache_flags, sizeof (mcache_flags));
205 mcache_flags &= MCF_FLAGS_MASK;
206
207 mcache_audit_cache = mcache_create("audit", sizeof (mcache_audit_t),
208 0, 0, MCR_SLEEP);
209
210 mcache_reap_interval = 15 * hz;
211 mcache_applyall(mcache_cache_bkt_enable);
212 mcache_ready = 1;
213 }
214
215 /*
216 * Return the global mcache flags.
217 */
218 __private_extern__ unsigned int
219 mcache_getflags(void)
220 {
221 return (mcache_flags);
222 }
223
224 /*
225 * Create a cache using the zone allocator as the backend slab allocator.
226 * The caller may specify any alignment for the object; if it specifies 0
227 * the default alignment (MCACHE_ALIGN) will be used.
228 */
229 __private_extern__ mcache_t *
230 mcache_create(const char *name, size_t bufsize, size_t align,
231 u_int32_t flags, int wait)
232 {
233 return (mcache_create_common(name, bufsize, align, mcache_slab_alloc,
234 mcache_slab_free, mcache_slab_audit, NULL, NULL, NULL, flags, 1,
235 wait));
236 }
237
238 /*
239 * Create a cache using a custom backend slab allocator. Since the caller
240 * is responsible for allocation, no alignment guarantee will be provided
241 * by this framework.
242 */
243 __private_extern__ mcache_t *
244 mcache_create_ext(const char *name, size_t bufsize,
245 mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn,
246 mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg,
247 u_int32_t flags, int wait)
248 {
249 return (mcache_create_common(name, bufsize, 0, allocfn,
250 freefn, auditfn, logfn, notifyfn, arg, flags, 0, wait));
251 }
252
253 /*
254 * Common cache creation routine.
255 */
256 static mcache_t *
257 mcache_create_common(const char *name, size_t bufsize, size_t align,
258 mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn,
259 mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg,
260 u_int32_t flags, int need_zone, int wait)
261 {
262 mcache_bkttype_t *btp;
263 mcache_t *cp = NULL;
264 size_t chunksize;
265 void *buf, **pbuf;
266 int c;
267 char lck_name[64];
268
269 /* If auditing is on and print buffer is NULL, allocate it now */
270 if ((flags & MCF_DEBUG) && mca_dump_buf == NULL) {
271 int malloc_wait = (wait & MCR_NOSLEEP) ? M_NOWAIT : M_WAITOK;
272 MALLOC(mca_dump_buf, char *, DUMP_MCA_BUF_SIZE, M_TEMP,
273 malloc_wait | M_ZERO);
274 if (mca_dump_buf == NULL)
275 return (NULL);
276 }
277
278 if (!(wait & MCR_NOSLEEP))
279 buf = zalloc(mcache_zone);
280 else
281 buf = zalloc_noblock(mcache_zone);
282
283 if (buf == NULL)
284 goto fail;
285
286 bzero(buf, MCACHE_ALLOC_SIZE);
287
288 /*
289 * In case we didn't get a cache-aligned memory, round it up
290 * accordingly. This is needed in order to get the rest of
291 * structure members aligned properly. It also means that
292 * the memory span gets shifted due to the round up, but it
293 * is okay since we've allocated extra space for this.
294 */
295 cp = (mcache_t *)
296 P2ROUNDUP((intptr_t)buf + sizeof (void *), CPU_CACHE_SIZE);
297 pbuf = (void **)((intptr_t)cp - sizeof (void *));
298 *pbuf = buf;
299
300 /*
301 * Guaranteed alignment is valid only when we use the internal
302 * slab allocator (currently set to use the zone allocator).
303 */
304 if (!need_zone)
305 align = 1;
306 else if (align == 0)
307 align = MCACHE_ALIGN;
308
309 if ((align & (align - 1)) != 0)
310 panic("mcache_create: bad alignment %lu", align);
311
312 cp->mc_align = align;
313 cp->mc_slab_alloc = allocfn;
314 cp->mc_slab_free = freefn;
315 cp->mc_slab_audit = auditfn;
316 cp->mc_slab_log = logfn;
317 cp->mc_slab_notify = notifyfn;
318 cp->mc_private = need_zone ? cp : arg;
319 cp->mc_bufsize = bufsize;
320 cp->mc_flags = (flags & MCF_FLAGS_MASK) | mcache_flags;
321
322 (void) snprintf(cp->mc_name, sizeof (cp->mc_name), "mcache.%s", name);
323
324 (void) snprintf(lck_name, sizeof (lck_name), "%s.cpu", cp->mc_name);
325 cp->mc_cpu_lock_grp_attr = lck_grp_attr_alloc_init();
326 cp->mc_cpu_lock_grp = lck_grp_alloc_init(lck_name,
327 cp->mc_cpu_lock_grp_attr);
328 cp->mc_cpu_lock_attr = lck_attr_alloc_init();
329
330 /*
331 * Allocation chunk size is the object's size plus any extra size
332 * needed to satisfy the object's alignment. It is enforced to be
333 * at least the size of an LP64 pointer to simplify auditing and to
334 * handle multiple-element allocation requests, where the elements
335 * returned are linked together in a list.
336 */
337 chunksize = MAX(bufsize, sizeof (u_int64_t));
338 if (need_zone) {
339 /* Enforce 64-bit minimum alignment for zone-based buffers */
340 align = MAX(align, sizeof (u_int64_t));
341 chunksize += sizeof (void *) + align;
342 chunksize = P2ROUNDUP(chunksize, align);
343 if ((cp->mc_slab_zone = zinit(chunksize, 64 * 1024 * ncpu,
344 PAGE_SIZE, cp->mc_name)) == NULL)
345 goto fail;
346 zone_change(cp->mc_slab_zone, Z_EXPAND, TRUE);
347 }
348 cp->mc_chunksize = chunksize;
349
350 /*
351 * Initialize the bucket layer.
352 */
353 (void) snprintf(lck_name, sizeof (lck_name), "%s.bkt", cp->mc_name);
354 cp->mc_bkt_lock_grp_attr = lck_grp_attr_alloc_init();
355 cp->mc_bkt_lock_grp = lck_grp_alloc_init(lck_name,
356 cp->mc_bkt_lock_grp_attr);
357 cp->mc_bkt_lock_attr = lck_attr_alloc_init();
358 lck_mtx_init(&cp->mc_bkt_lock, cp->mc_bkt_lock_grp,
359 cp->mc_bkt_lock_attr);
360
361 (void) snprintf(lck_name, sizeof (lck_name), "%s.sync", cp->mc_name);
362 cp->mc_sync_lock_grp_attr = lck_grp_attr_alloc_init();
363 cp->mc_sync_lock_grp = lck_grp_alloc_init(lck_name,
364 cp->mc_sync_lock_grp_attr);
365 cp->mc_sync_lock_attr = lck_attr_alloc_init();
366 lck_mtx_init(&cp->mc_sync_lock, cp->mc_sync_lock_grp,
367 cp->mc_sync_lock_attr);
368
369 for (btp = mcache_bkttype; chunksize <= btp->bt_minbuf; btp++)
370 continue;
371
372 cp->cache_bkttype = btp;
373
374 /*
375 * Initialize the CPU layer. Each per-CPU structure is aligned
376 * on the CPU cache line boundary to prevent false sharing.
377 */
378 for (c = 0; c < ncpu; c++) {
379 mcache_cpu_t *ccp = &cp->mc_cpu[c];
380
381 VERIFY(IS_P2ALIGNED(ccp, CPU_CACHE_SIZE));
382 lck_mtx_init(&ccp->cc_lock, cp->mc_cpu_lock_grp,
383 cp->mc_cpu_lock_attr);
384 ccp->cc_objs = -1;
385 ccp->cc_pobjs = -1;
386 }
387
388 if (mcache_ready)
389 mcache_cache_bkt_enable(cp);
390
391 /* TODO: dynamically create sysctl for stats */
392
393 MCACHE_LIST_LOCK();
394 LIST_INSERT_HEAD(&mcache_head, cp, mc_list);
395 MCACHE_LIST_UNLOCK();
396
397 /*
398 * If cache buckets are enabled and this is the first cache
399 * created, start the periodic cache update.
400 */
401 if (!(mcache_flags & MCF_NOCPUCACHE) && !mcache_updating) {
402 mcache_updating = 1;
403 mcache_update_timeout(NULL);
404 }
405 if (cp->mc_flags & MCF_DEBUG) {
406 printf("mcache_create: %s (%s) arg %p bufsize %lu align %lu "
407 "chunksize %lu bktsize %d\n", name, need_zone ? "i" : "e",
408 arg, bufsize, cp->mc_align, chunksize, btp->bt_bktsize);
409 }
410 return (cp);
411
412 fail:
413 if (buf != NULL)
414 zfree(mcache_zone, buf);
415 return (NULL);
416 }
417
418 /*
419 * Allocate one or more objects from a cache.
420 */
421 __private_extern__ unsigned int
422 mcache_alloc_ext(mcache_t *cp, mcache_obj_t **list, unsigned int num, int wait)
423 {
424 mcache_cpu_t *ccp;
425 mcache_obj_t **top = &(*list);
426 mcache_bkt_t *bkt;
427 unsigned int need = num;
428 boolean_t nwretry = FALSE;
429
430 /* MCR_NOSLEEP and MCR_FAILOK are mutually exclusive */
431 VERIFY((wait & (MCR_NOSLEEP|MCR_FAILOK)) != (MCR_NOSLEEP|MCR_FAILOK));
432
433 ASSERT(list != NULL);
434 *list = NULL;
435
436 if (num == 0)
437 return (0);
438
439 retry_alloc:
440 /* We may not always be running in the same CPU in case of retries */
441 ccp = MCACHE_CPU(cp);
442
443 MCACHE_LOCK(&ccp->cc_lock);
444 for (;;) {
445 /*
446 * If we have an object in the current CPU's filled bucket,
447 * chain the object to any previous objects and return if
448 * we've satisfied the number of requested objects.
449 */
450 if (ccp->cc_objs > 0) {
451 mcache_obj_t *tail;
452 int objs;
453
454 /*
455 * Objects in the bucket are already linked together
456 * with the most recently freed object at the head of
457 * the list; grab as many objects as we can.
458 */
459 objs = MIN((unsigned int)ccp->cc_objs, need);
460 *list = ccp->cc_filled->bkt_obj[ccp->cc_objs - 1];
461 ccp->cc_objs -= objs;
462 ccp->cc_alloc += objs;
463
464 tail = ccp->cc_filled->bkt_obj[ccp->cc_objs];
465 list = &tail->obj_next;
466 *list = NULL;
467
468 /* If we got them all, return to caller */
469 if ((need -= objs) == 0) {
470 MCACHE_UNLOCK(&ccp->cc_lock);
471
472 if (!(cp->mc_flags & MCF_NOLEAKLOG) &&
473 cp->mc_slab_log != NULL)
474 (*cp->mc_slab_log)(num, *top, TRUE);
475
476 if (cp->mc_flags & MCF_DEBUG)
477 goto debug_alloc;
478
479 return (num);
480 }
481 }
482
483 /*
484 * The CPU's filled bucket is empty. If the previous filled
485 * bucket was full, exchange and try again.
486 */
487 if (ccp->cc_pobjs > 0) {
488 mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs);
489 continue;
490 }
491
492 /*
493 * If the bucket layer is disabled, allocate from slab. This
494 * can happen either because MCF_NOCPUCACHE is set, or because
495 * the bucket layer is currently being resized.
496 */
497 if (ccp->cc_bktsize == 0)
498 break;
499
500 /*
501 * Both of the CPU's buckets are empty; try to get a full
502 * bucket from the bucket layer. Upon success, refill this
503 * CPU and place any empty bucket into the empty list.
504 */
505 bkt = mcache_bkt_alloc(cp, &cp->mc_full, NULL);
506 if (bkt != NULL) {
507 if (ccp->cc_pfilled != NULL)
508 mcache_bkt_free(cp, &cp->mc_empty,
509 ccp->cc_pfilled);
510 mcache_cpu_refill(ccp, bkt, ccp->cc_bktsize);
511 continue;
512 }
513
514 /*
515 * The bucket layer has no full buckets; allocate the
516 * object(s) directly from the slab layer.
517 */
518 break;
519 }
520 MCACHE_UNLOCK(&ccp->cc_lock);
521
522 need -= (*cp->mc_slab_alloc)(cp->mc_private, &list, need, wait);
523
524 /*
525 * If this is a blocking allocation, or if it is non-blocking and
526 * the cache's full bucket is non-empty, then retry the allocation.
527 */
528 if (need > 0) {
529 if (!(wait & MCR_NONBLOCKING)) {
530 atomic_add_32(&cp->mc_wretry_cnt, 1);
531 goto retry_alloc;
532 } else if ((wait & (MCR_NOSLEEP | MCR_TRYHARD)) &&
533 !mcache_bkt_isempty(cp)) {
534 if (!nwretry)
535 nwretry = TRUE;
536 atomic_add_32(&cp->mc_nwretry_cnt, 1);
537 goto retry_alloc;
538 } else if (nwretry) {
539 atomic_add_32(&cp->mc_nwfail_cnt, 1);
540 }
541 }
542
543 if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL)
544 (*cp->mc_slab_log)((num - need), *top, TRUE);
545
546 if (!(cp->mc_flags & MCF_DEBUG))
547 return (num - need);
548
549 debug_alloc:
550 if (cp->mc_flags & MCF_DEBUG) {
551 mcache_obj_t **o = top;
552 unsigned int n;
553
554 n = 0;
555 /*
556 * Verify that the chain of objects have the same count as
557 * what we are about to report to the caller. Any mismatch
558 * here means that the object list is insanely broken and
559 * therefore we must panic.
560 */
561 while (*o != NULL) {
562 o = &(*o)->obj_next;
563 ++n;
564 }
565 if (n != (num - need)) {
566 panic("mcache_alloc_ext: %s cp %p corrupted list "
567 "(got %d actual %d)\n", cp->mc_name,
568 (void *)cp, num - need, n);
569 }
570 }
571
572 /* Invoke the slab layer audit callback if auditing is enabled */
573 if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL)
574 (*cp->mc_slab_audit)(cp->mc_private, *top, TRUE);
575
576 return (num - need);
577 }
578
579 /*
580 * Allocate a single object from a cache.
581 */
582 __private_extern__ void *
583 mcache_alloc(mcache_t *cp, int wait)
584 {
585 mcache_obj_t *buf;
586
587 (void) mcache_alloc_ext(cp, &buf, 1, wait);
588 return (buf);
589 }
590
591 __private_extern__ void
592 mcache_waiter_inc(mcache_t *cp)
593 {
594 atomic_add_32(&cp->mc_waiter_cnt, 1);
595 }
596
597 __private_extern__ void
598 mcache_waiter_dec(mcache_t *cp)
599 {
600 atomic_add_32(&cp->mc_waiter_cnt, -1);
601 }
602
603 __private_extern__ boolean_t
604 mcache_bkt_isempty(mcache_t *cp)
605 {
606 /*
607 * This isn't meant to accurately tell whether there are
608 * any full buckets in the cache; it is simply a way to
609 * obtain "hints" about the state of the cache.
610 */
611 return (cp->mc_full.bl_total == 0);
612 }
613
614 /*
615 * Notify the slab layer about an event.
616 */
617 static void
618 mcache_notify(mcache_t *cp, u_int32_t event)
619 {
620 if (cp->mc_slab_notify != NULL)
621 (*cp->mc_slab_notify)(cp->mc_private, event);
622 }
623
624 /*
625 * Purge the cache and disable its buckets.
626 */
627 static void
628 mcache_purge(void *arg)
629 {
630 mcache_t *cp = arg;
631
632 mcache_bkt_purge(cp);
633 /*
634 * We cannot simply call mcache_cache_bkt_enable() from here as
635 * a bucket resize may be in flight and we would cause the CPU
636 * layers of the cache to point to different sizes. Therefore,
637 * we simply increment the enable count so that during the next
638 * periodic cache update the buckets can be reenabled.
639 */
640 lck_mtx_lock_spin(&cp->mc_sync_lock);
641 cp->mc_enable_cnt++;
642 lck_mtx_unlock(&cp->mc_sync_lock);
643
644 }
645
646 __private_extern__ boolean_t
647 mcache_purge_cache(mcache_t *cp)
648 {
649 /*
650 * Purging a cache that has no per-CPU caches or is already
651 * in the process of being purged is rather pointless.
652 */
653 if (cp->mc_flags & MCF_NOCPUCACHE)
654 return (FALSE);
655
656 lck_mtx_lock_spin(&cp->mc_sync_lock);
657 if (cp->mc_purge_cnt > 0) {
658 lck_mtx_unlock(&cp->mc_sync_lock);
659 return (FALSE);
660 }
661 cp->mc_purge_cnt++;
662 lck_mtx_unlock(&cp->mc_sync_lock);
663
664 mcache_dispatch(mcache_purge, cp);
665
666 return (TRUE);
667 }
668
669 /*
670 * Free a single object to a cache.
671 */
672 __private_extern__ void
673 mcache_free(mcache_t *cp, void *buf)
674 {
675 ((mcache_obj_t *)buf)->obj_next = NULL;
676 mcache_free_ext(cp, (mcache_obj_t *)buf);
677 }
678
679 /*
680 * Free one or more objects to a cache.
681 */
682 __private_extern__ void
683 mcache_free_ext(mcache_t *cp, mcache_obj_t *list)
684 {
685 mcache_cpu_t *ccp = MCACHE_CPU(cp);
686 mcache_bkttype_t *btp;
687 mcache_obj_t *nlist;
688 mcache_bkt_t *bkt;
689
690 if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL)
691 (*cp->mc_slab_log)(0, list, FALSE);
692
693 /* Invoke the slab layer audit callback if auditing is enabled */
694 if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL)
695 (*cp->mc_slab_audit)(cp->mc_private, list, FALSE);
696
697 MCACHE_LOCK(&ccp->cc_lock);
698 for (;;) {
699 /*
700 * If there is space in the current CPU's filled bucket, put
701 * the object there and return once all objects are freed.
702 * Note the cast to unsigned integer takes care of the case
703 * where the bucket layer is disabled (when cc_objs is -1).
704 */
705 if ((unsigned int)ccp->cc_objs <
706 (unsigned int)ccp->cc_bktsize) {
707 /*
708 * Reverse the list while we place the object into the
709 * bucket; this effectively causes the most recently
710 * freed object(s) to be reused during allocation.
711 */
712 nlist = list->obj_next;
713 list->obj_next = (ccp->cc_objs == 0) ? NULL :
714 ccp->cc_filled->bkt_obj[ccp->cc_objs - 1];
715 ccp->cc_filled->bkt_obj[ccp->cc_objs++] = list;
716 ccp->cc_free++;
717
718 if ((list = nlist) != NULL)
719 continue;
720
721 /* We are done; return to caller */
722 MCACHE_UNLOCK(&ccp->cc_lock);
723
724 /* If there is a waiter below, notify it */
725 if (cp->mc_waiter_cnt > 0)
726 mcache_notify(cp, MCN_RETRYALLOC);
727 return;
728 }
729
730 /*
731 * The CPU's filled bucket is full. If the previous filled
732 * bucket was empty, exchange and try again.
733 */
734 if (ccp->cc_pobjs == 0) {
735 mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs);
736 continue;
737 }
738
739 /*
740 * If the bucket layer is disabled, free to slab. This can
741 * happen either because MCF_NOCPUCACHE is set, or because
742 * the bucket layer is currently being resized.
743 */
744 if (ccp->cc_bktsize == 0)
745 break;
746
747 /*
748 * Both of the CPU's buckets are full; try to get an empty
749 * bucket from the bucket layer. Upon success, empty this
750 * CPU and place any full bucket into the full list.
751 */
752 bkt = mcache_bkt_alloc(cp, &cp->mc_empty, &btp);
753 if (bkt != NULL) {
754 if (ccp->cc_pfilled != NULL)
755 mcache_bkt_free(cp, &cp->mc_full,
756 ccp->cc_pfilled);
757 mcache_cpu_refill(ccp, bkt, 0);
758 continue;
759 }
760
761 /*
762 * We need an empty bucket to put our freed objects into
763 * but couldn't get an empty bucket from the bucket layer;
764 * attempt to allocate one. We do not want to block for
765 * allocation here, and if the bucket allocation fails
766 * we will simply fall through to the slab layer.
767 */
768 MCACHE_UNLOCK(&ccp->cc_lock);
769 bkt = mcache_alloc(btp->bt_cache, MCR_NOSLEEP);
770 MCACHE_LOCK(&ccp->cc_lock);
771
772 if (bkt != NULL) {
773 /*
774 * We have an empty bucket, but since we drop the
775 * CPU lock above, the cache's bucket size may have
776 * changed. If so, free the bucket and try again.
777 */
778 if (ccp->cc_bktsize != btp->bt_bktsize) {
779 MCACHE_UNLOCK(&ccp->cc_lock);
780 mcache_free(btp->bt_cache, bkt);
781 MCACHE_LOCK(&ccp->cc_lock);
782 continue;
783 }
784
785 /*
786 * We have an empty bucket of the right size;
787 * add it to the bucket layer and try again.
788 */
789 mcache_bkt_free(cp, &cp->mc_empty, bkt);
790 continue;
791 }
792
793 /*
794 * The bucket layer has no empty buckets; free the
795 * object(s) directly to the slab layer.
796 */
797 break;
798 }
799 MCACHE_UNLOCK(&ccp->cc_lock);
800
801 /* If there is a waiter below, notify it */
802 if (cp->mc_waiter_cnt > 0)
803 mcache_notify(cp, MCN_RETRYALLOC);
804
805 /* Advise the slab layer to purge the object(s) */
806 (*cp->mc_slab_free)(cp->mc_private, list,
807 (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt);
808 }
809
810 /*
811 * Cache destruction routine.
812 */
813 __private_extern__ void
814 mcache_destroy(mcache_t *cp)
815 {
816 void **pbuf;
817
818 MCACHE_LIST_LOCK();
819 LIST_REMOVE(cp, mc_list);
820 MCACHE_LIST_UNLOCK();
821
822 mcache_bkt_purge(cp);
823
824 /*
825 * This cache is dead; there should be no further transaction.
826 * If it's still invoked, make sure that it induces a fault.
827 */
828 cp->mc_slab_alloc = NULL;
829 cp->mc_slab_free = NULL;
830 cp->mc_slab_audit = NULL;
831
832 lck_attr_free(cp->mc_bkt_lock_attr);
833 lck_grp_free(cp->mc_bkt_lock_grp);
834 lck_grp_attr_free(cp->mc_bkt_lock_grp_attr);
835
836 lck_attr_free(cp->mc_cpu_lock_attr);
837 lck_grp_free(cp->mc_cpu_lock_grp);
838 lck_grp_attr_free(cp->mc_cpu_lock_grp_attr);
839
840 lck_attr_free(cp->mc_sync_lock_attr);
841 lck_grp_free(cp->mc_sync_lock_grp);
842 lck_grp_attr_free(cp->mc_sync_lock_grp_attr);
843
844 /*
845 * TODO: We need to destroy the zone here, but cannot do it
846 * because there is no such way to achieve that. Until then
847 * the memory allocated for the zone structure is leaked.
848 * Once it is achievable, uncomment these lines:
849 *
850 * if (cp->mc_slab_zone != NULL) {
851 * zdestroy(cp->mc_slab_zone);
852 * cp->mc_slab_zone = NULL;
853 * }
854 */
855
856 /* Get the original address since we're about to free it */
857 pbuf = (void **)((intptr_t)cp - sizeof (void *));
858
859 zfree(mcache_zone, *pbuf);
860 }
861
862 /*
863 * Internal slab allocator used as a backend for simple caches. The current
864 * implementation uses the zone allocator for simplicity reasons.
865 */
866 static unsigned int
867 mcache_slab_alloc(void *arg, mcache_obj_t ***plist, unsigned int num, int wait)
868 {
869 mcache_t *cp = arg;
870 unsigned int need = num;
871 size_t offset = 0;
872 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t));
873 u_int32_t flags = cp->mc_flags;
874 void *buf, *base, **pbuf;
875 mcache_obj_t **list = *plist;
876
877 *list = NULL;
878
879 /*
880 * The address of the object returned to the caller is an
881 * offset from the 64-bit aligned base address only if the
882 * cache's alignment requirement is neither 1 nor 8 bytes.
883 */
884 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t))
885 offset = cp->mc_align;
886
887 for (;;) {
888 if (!(wait & MCR_NOSLEEP))
889 buf = zalloc(cp->mc_slab_zone);
890 else
891 buf = zalloc_noblock(cp->mc_slab_zone);
892
893 if (buf == NULL)
894 break;
895
896 /* Get the 64-bit aligned base address for this object */
897 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
898 sizeof (u_int64_t));
899
900 /*
901 * Wind back a pointer size from the aligned base and
902 * save the original address so we can free it later.
903 */
904 pbuf = (void **)((intptr_t)base - sizeof (void *));
905 *pbuf = buf;
906
907 /*
908 * If auditing is enabled, patternize the contents of
909 * the buffer starting from the 64-bit aligned base to
910 * the end of the buffer; the length is rounded up to
911 * the nearest 64-bit multiply; this is because we use
912 * 64-bit memory access to set/check the pattern.
913 */
914 if (flags & MCF_DEBUG) {
915 VERIFY(((intptr_t)base + rsize) <=
916 ((intptr_t)buf + cp->mc_chunksize));
917 mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize);
918 }
919
920 /*
921 * Fix up the object's address to fulfill the cache's
922 * alignment requirement (if needed) and return this
923 * to the caller.
924 */
925 VERIFY(((intptr_t)base + offset + cp->mc_bufsize) <=
926 ((intptr_t)buf + cp->mc_chunksize));
927 *list = (mcache_obj_t *)((intptr_t)base + offset);
928
929 (*list)->obj_next = NULL;
930 list = *plist = &(*list)->obj_next;
931
932 /* If we got them all, return to mcache */
933 if (--need == 0)
934 break;
935 }
936
937 return (num - need);
938 }
939
940 /*
941 * Internal slab deallocator used as a backend for simple caches.
942 */
943 static void
944 mcache_slab_free(void *arg, mcache_obj_t *list, __unused boolean_t purged)
945 {
946 mcache_t *cp = arg;
947 mcache_obj_t *nlist;
948 size_t offset = 0;
949 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t));
950 u_int32_t flags = cp->mc_flags;
951 void *base;
952 void **pbuf;
953
954 /*
955 * The address of the object is an offset from a 64-bit
956 * aligned base address only if the cache's alignment
957 * requirement is neither 1 nor 8 bytes.
958 */
959 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t))
960 offset = cp->mc_align;
961
962 for (;;) {
963 nlist = list->obj_next;
964 list->obj_next = NULL;
965
966 /* Get the 64-bit aligned base address of this object */
967 base = (void *)((intptr_t)list - offset);
968 VERIFY(IS_P2ALIGNED(base, sizeof (u_int64_t)));
969
970 /* Get the original address since we're about to free it */
971 pbuf = (void **)((intptr_t)base - sizeof (void *));
972
973 if (flags & MCF_DEBUG) {
974 VERIFY(((intptr_t)base + rsize) <=
975 ((intptr_t)*pbuf + cp->mc_chunksize));
976 mcache_audit_free_verify(NULL, base, offset, rsize);
977 }
978
979 /* Free it to zone */
980 VERIFY(((intptr_t)base + offset + cp->mc_bufsize) <=
981 ((intptr_t)*pbuf + cp->mc_chunksize));
982 zfree(cp->mc_slab_zone, *pbuf);
983
984 /* No more objects to free; return to mcache */
985 if ((list = nlist) == NULL)
986 break;
987 }
988 }
989
990 /*
991 * Internal slab auditor for simple caches.
992 */
993 static void
994 mcache_slab_audit(void *arg, mcache_obj_t *list, boolean_t alloc)
995 {
996 mcache_t *cp = arg;
997 size_t offset = 0;
998 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t));
999 void *base, **pbuf;
1000
1001 /*
1002 * The address of the object returned to the caller is an
1003 * offset from the 64-bit aligned base address only if the
1004 * cache's alignment requirement is neither 1 nor 8 bytes.
1005 */
1006 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t))
1007 offset = cp->mc_align;
1008
1009 while (list != NULL) {
1010 mcache_obj_t *next = list->obj_next;
1011
1012 /* Get the 64-bit aligned base address of this object */
1013 base = (void *)((intptr_t)list - offset);
1014 VERIFY(IS_P2ALIGNED(base, sizeof (u_int64_t)));
1015
1016 /* Get the original address */
1017 pbuf = (void **)((intptr_t)base - sizeof (void *));
1018
1019 VERIFY(((intptr_t)base + rsize) <=
1020 ((intptr_t)*pbuf + cp->mc_chunksize));
1021
1022 if (!alloc)
1023 mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize);
1024 else
1025 mcache_audit_free_verify_set(NULL, base, offset, rsize);
1026
1027 list = list->obj_next = next;
1028 }
1029 }
1030
1031 /*
1032 * Refill the CPU's filled bucket with bkt and save the previous one.
1033 */
1034 static void
1035 mcache_cpu_refill(mcache_cpu_t *ccp, mcache_bkt_t *bkt, int objs)
1036 {
1037 ASSERT((ccp->cc_filled == NULL && ccp->cc_objs == -1) ||
1038 (ccp->cc_filled && ccp->cc_objs + objs == ccp->cc_bktsize));
1039 ASSERT(ccp->cc_bktsize > 0);
1040
1041 ccp->cc_pfilled = ccp->cc_filled;
1042 ccp->cc_pobjs = ccp->cc_objs;
1043 ccp->cc_filled = bkt;
1044 ccp->cc_objs = objs;
1045 }
1046
1047 /*
1048 * Allocate a bucket from the bucket layer.
1049 */
1050 static mcache_bkt_t *
1051 mcache_bkt_alloc(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkttype_t **btp)
1052 {
1053 mcache_bkt_t *bkt;
1054
1055 if (!MCACHE_LOCK_TRY(&cp->mc_bkt_lock)) {
1056 /*
1057 * The bucket layer lock is held by another CPU; increase
1058 * the contention count so that we can later resize the
1059 * bucket size accordingly.
1060 */
1061 MCACHE_LOCK(&cp->mc_bkt_lock);
1062 cp->mc_bkt_contention++;
1063 }
1064
1065 if ((bkt = blp->bl_list) != NULL) {
1066 blp->bl_list = bkt->bkt_next;
1067 if (--blp->bl_total < blp->bl_min)
1068 blp->bl_min = blp->bl_total;
1069 blp->bl_alloc++;
1070 }
1071
1072 if (btp != NULL)
1073 *btp = cp->cache_bkttype;
1074
1075 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1076
1077 return (bkt);
1078 }
1079
1080 /*
1081 * Free a bucket to the bucket layer.
1082 */
1083 static void
1084 mcache_bkt_free(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkt_t *bkt)
1085 {
1086 MCACHE_LOCK(&cp->mc_bkt_lock);
1087
1088 bkt->bkt_next = blp->bl_list;
1089 blp->bl_list = bkt;
1090 blp->bl_total++;
1091
1092 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1093 }
1094
1095 /*
1096 * Enable the bucket layer of a cache.
1097 */
1098 static void
1099 mcache_cache_bkt_enable(mcache_t *cp)
1100 {
1101 mcache_cpu_t *ccp;
1102 int cpu;
1103
1104 if (cp->mc_flags & MCF_NOCPUCACHE)
1105 return;
1106
1107 for (cpu = 0; cpu < ncpu; cpu++) {
1108 ccp = &cp->mc_cpu[cpu];
1109 MCACHE_LOCK(&ccp->cc_lock);
1110 ccp->cc_bktsize = cp->cache_bkttype->bt_bktsize;
1111 MCACHE_UNLOCK(&ccp->cc_lock);
1112 }
1113 }
1114
1115 /*
1116 * Purge all buckets from a cache and disable its bucket layer.
1117 */
1118 static void
1119 mcache_bkt_purge(mcache_t *cp)
1120 {
1121 mcache_cpu_t *ccp;
1122 mcache_bkt_t *bp, *pbp;
1123 mcache_bkttype_t *btp;
1124 int cpu, objs, pobjs;
1125
1126 for (cpu = 0; cpu < ncpu; cpu++) {
1127 ccp = &cp->mc_cpu[cpu];
1128
1129 MCACHE_LOCK(&ccp->cc_lock);
1130
1131 btp = cp->cache_bkttype;
1132 bp = ccp->cc_filled;
1133 pbp = ccp->cc_pfilled;
1134 objs = ccp->cc_objs;
1135 pobjs = ccp->cc_pobjs;
1136 ccp->cc_filled = NULL;
1137 ccp->cc_pfilled = NULL;
1138 ccp->cc_objs = -1;
1139 ccp->cc_pobjs = -1;
1140 ccp->cc_bktsize = 0;
1141
1142 MCACHE_UNLOCK(&ccp->cc_lock);
1143
1144 if (bp != NULL)
1145 mcache_bkt_destroy(cp, btp, bp, objs);
1146 if (pbp != NULL)
1147 mcache_bkt_destroy(cp, btp, pbp, pobjs);
1148 }
1149
1150 /*
1151 * Updating the working set back to back essentially sets
1152 * the working set size to zero, so everything is reapable.
1153 */
1154 mcache_bkt_ws_update(cp);
1155 mcache_bkt_ws_update(cp);
1156
1157 mcache_bkt_ws_reap(cp);
1158 }
1159
1160 /*
1161 * Free one or more objects in the bucket to the slab layer,
1162 * and also free the bucket itself.
1163 */
1164 static void
1165 mcache_bkt_destroy(mcache_t *cp, mcache_bkttype_t *btp, mcache_bkt_t *bkt,
1166 int nobjs)
1167 {
1168 if (nobjs > 0) {
1169 mcache_obj_t *top = bkt->bkt_obj[nobjs - 1];
1170
1171 if (cp->mc_flags & MCF_DEBUG) {
1172 mcache_obj_t *o = top;
1173 int cnt = 0;
1174
1175 /*
1176 * Verify that the chain of objects in the bucket is
1177 * valid. Any mismatch here means a mistake when the
1178 * object(s) were freed to the CPU layer, so we panic.
1179 */
1180 while (o != NULL) {
1181 o = o->obj_next;
1182 ++cnt;
1183 }
1184 if (cnt != nobjs) {
1185 panic("mcache_bkt_destroy: %s cp %p corrupted "
1186 "list in bkt %p (nobjs %d actual %d)\n",
1187 cp->mc_name, (void *)cp, (void *)bkt,
1188 nobjs, cnt);
1189 }
1190 }
1191
1192 /* Advise the slab layer to purge the object(s) */
1193 (*cp->mc_slab_free)(cp->mc_private, top,
1194 (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt);
1195 }
1196 mcache_free(btp->bt_cache, bkt);
1197 }
1198
1199 /*
1200 * Update the bucket layer working set statistics.
1201 */
1202 static void
1203 mcache_bkt_ws_update(mcache_t *cp)
1204 {
1205 MCACHE_LOCK(&cp->mc_bkt_lock);
1206
1207 cp->mc_full.bl_reaplimit = cp->mc_full.bl_min;
1208 cp->mc_full.bl_min = cp->mc_full.bl_total;
1209 cp->mc_empty.bl_reaplimit = cp->mc_empty.bl_min;
1210 cp->mc_empty.bl_min = cp->mc_empty.bl_total;
1211
1212 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1213 }
1214
1215 /*
1216 * Reap all buckets that are beyond the working set.
1217 */
1218 static void
1219 mcache_bkt_ws_reap(mcache_t *cp)
1220 {
1221 long reap;
1222 mcache_bkt_t *bkt;
1223 mcache_bkttype_t *btp;
1224
1225 reap = MIN(cp->mc_full.bl_reaplimit, cp->mc_full.bl_min);
1226 while (reap-- &&
1227 (bkt = mcache_bkt_alloc(cp, &cp->mc_full, &btp)) != NULL)
1228 mcache_bkt_destroy(cp, btp, bkt, btp->bt_bktsize);
1229
1230 reap = MIN(cp->mc_empty.bl_reaplimit, cp->mc_empty.bl_min);
1231 while (reap-- &&
1232 (bkt = mcache_bkt_alloc(cp, &cp->mc_empty, &btp)) != NULL)
1233 mcache_bkt_destroy(cp, btp, bkt, 0);
1234 }
1235
1236 static void
1237 mcache_reap_timeout(void *arg)
1238 {
1239 volatile UInt32 *flag = arg;
1240
1241 ASSERT(flag == &mcache_reaping);
1242
1243 *flag = 0;
1244 }
1245
1246 static void
1247 mcache_reap_done(void *flag)
1248 {
1249 timeout(mcache_reap_timeout, flag, mcache_reap_interval);
1250 }
1251
1252 static void
1253 mcache_reap_start(void *arg)
1254 {
1255 UInt32 *flag = arg;
1256
1257 ASSERT(flag == &mcache_reaping);
1258
1259 mcache_applyall(mcache_cache_reap);
1260 mcache_dispatch(mcache_reap_done, flag);
1261 }
1262
1263 __private_extern__ void
1264 mcache_reap(void)
1265 {
1266 UInt32 *flag = &mcache_reaping;
1267
1268 if (mcache_llock_owner == current_thread() ||
1269 !OSCompareAndSwap(0, 1, flag))
1270 return;
1271
1272 mcache_dispatch(mcache_reap_start, flag);
1273 }
1274
1275 static void
1276 mcache_cache_reap(mcache_t *cp)
1277 {
1278 mcache_bkt_ws_reap(cp);
1279 }
1280
1281 /*
1282 * Performs period maintenance on a cache.
1283 */
1284 static void
1285 mcache_cache_update(mcache_t *cp)
1286 {
1287 int need_bkt_resize = 0;
1288 int need_bkt_reenable = 0;
1289
1290 lck_mtx_assert(mcache_llock, LCK_MTX_ASSERT_OWNED);
1291
1292 mcache_bkt_ws_update(cp);
1293
1294 /*
1295 * Cache resize and post-purge reenable are mutually exclusive.
1296 * If the cache was previously purged, there is no point of
1297 * increasing the bucket size as there was an indication of
1298 * memory pressure on the system.
1299 */
1300 lck_mtx_lock_spin(&cp->mc_sync_lock);
1301 if (!(cp->mc_flags & MCF_NOCPUCACHE) && cp->mc_enable_cnt)
1302 need_bkt_reenable = 1;
1303 lck_mtx_unlock(&cp->mc_sync_lock);
1304
1305 MCACHE_LOCK(&cp->mc_bkt_lock);
1306 /*
1307 * If the contention count is greater than the threshold, and if
1308 * we are not already at the maximum bucket size, increase it.
1309 * Otherwise, if this cache was previously purged by the user
1310 * then we simply reenable it.
1311 */
1312 if ((unsigned int)cp->mc_chunksize < cp->cache_bkttype->bt_maxbuf &&
1313 (int)(cp->mc_bkt_contention - cp->mc_bkt_contention_prev) >
1314 mcache_bkt_contention && !need_bkt_reenable)
1315 need_bkt_resize = 1;
1316
1317 cp ->mc_bkt_contention_prev = cp->mc_bkt_contention;
1318 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1319
1320 if (need_bkt_resize)
1321 mcache_dispatch(mcache_cache_bkt_resize, cp);
1322 else if (need_bkt_reenable)
1323 mcache_dispatch(mcache_cache_enable, cp);
1324 }
1325
1326 /*
1327 * Recompute a cache's bucket size. This is an expensive operation
1328 * and should not be done frequently; larger buckets provide for a
1329 * higher transfer rate with the bucket while smaller buckets reduce
1330 * the memory consumption.
1331 */
1332 static void
1333 mcache_cache_bkt_resize(void *arg)
1334 {
1335 mcache_t *cp = arg;
1336 mcache_bkttype_t *btp = cp->cache_bkttype;
1337
1338 if ((unsigned int)cp->mc_chunksize < btp->bt_maxbuf) {
1339 mcache_bkt_purge(cp);
1340
1341 /*
1342 * Upgrade to the next bucket type with larger bucket size;
1343 * temporarily set the previous contention snapshot to a
1344 * negative number to prevent unnecessary resize request.
1345 */
1346 MCACHE_LOCK(&cp->mc_bkt_lock);
1347 cp->cache_bkttype = ++btp;
1348 cp ->mc_bkt_contention_prev = cp->mc_bkt_contention + INT_MAX;
1349 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1350
1351 mcache_cache_enable(cp);
1352 }
1353 }
1354
1355 /*
1356 * Reenable a previously disabled cache due to purge.
1357 */
1358 static void
1359 mcache_cache_enable(void *arg)
1360 {
1361 mcache_t *cp = arg;
1362
1363 lck_mtx_lock_spin(&cp->mc_sync_lock);
1364 cp->mc_purge_cnt = 0;
1365 cp->mc_enable_cnt = 0;
1366 lck_mtx_unlock(&cp->mc_sync_lock);
1367
1368 mcache_cache_bkt_enable(cp);
1369 }
1370
1371 static void
1372 mcache_update_timeout(__unused void *arg)
1373 {
1374 timeout(mcache_update, NULL, mcache_reap_interval);
1375 }
1376
1377 static void
1378 mcache_update(__unused void *arg)
1379 {
1380 mcache_applyall(mcache_cache_update);
1381 mcache_dispatch(mcache_update_timeout, NULL);
1382 }
1383
1384 static void
1385 mcache_applyall(void (*func)(mcache_t *))
1386 {
1387 mcache_t *cp;
1388
1389 MCACHE_LIST_LOCK();
1390 LIST_FOREACH(cp, &mcache_head, mc_list) {
1391 func(cp);
1392 }
1393 MCACHE_LIST_UNLOCK();
1394 }
1395
1396 static void
1397 mcache_dispatch(void (*func)(void *), void *arg)
1398 {
1399 ASSERT(func != NULL);
1400 timeout(func, arg, hz/1000);
1401 }
1402
1403 __private_extern__ void
1404 mcache_buffer_log(mcache_audit_t *mca, void *addr, mcache_t *cp)
1405 {
1406 mca->mca_addr = addr;
1407 mca->mca_cache = cp;
1408 mca->mca_pthread = mca->mca_thread;
1409 mca->mca_thread = current_thread();
1410 bcopy(mca->mca_stack, mca->mca_pstack, sizeof (mca->mca_pstack));
1411 mca->mca_pdepth = mca->mca_depth;
1412 bzero(mca->mca_stack, sizeof (mca->mca_stack));
1413 mca->mca_depth = OSBacktrace(mca->mca_stack, MCACHE_STACK_DEPTH);
1414 }
1415
1416 __private_extern__ void
1417 mcache_set_pattern(u_int64_t pattern, void *buf_arg, size_t size)
1418 {
1419 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size));
1420 u_int64_t *buf = (u_int64_t *)buf_arg;
1421
1422 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t)));
1423 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t)));
1424
1425 while (buf < buf_end)
1426 *buf++ = pattern;
1427 }
1428
1429 __private_extern__ void *
1430 mcache_verify_pattern(u_int64_t pattern, void *buf_arg, size_t size)
1431 {
1432 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size));
1433 u_int64_t *buf;
1434
1435 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t)));
1436 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t)));
1437
1438 for (buf = buf_arg; buf < buf_end; buf++) {
1439 if (*buf != pattern)
1440 return (buf);
1441 }
1442 return (NULL);
1443 }
1444
1445 __private_extern__ void *
1446 mcache_verify_set_pattern(u_int64_t old, u_int64_t new, void *buf_arg,
1447 size_t size)
1448 {
1449 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size));
1450 u_int64_t *buf;
1451
1452 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t)));
1453 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t)));
1454
1455 for (buf = buf_arg; buf < buf_end; buf++) {
1456 if (*buf != old) {
1457 mcache_set_pattern(old, buf_arg,
1458 (uintptr_t)buf - (uintptr_t)buf_arg);
1459 return (buf);
1460 }
1461 *buf = new;
1462 }
1463 return (NULL);
1464 }
1465
1466 __private_extern__ void
1467 mcache_audit_free_verify(mcache_audit_t *mca, void *base, size_t offset,
1468 size_t size)
1469 {
1470 void *addr;
1471 u_int64_t *oaddr64;
1472 mcache_obj_t *next;
1473
1474 addr = (void *)((uintptr_t)base + offset);
1475 next = ((mcache_obj_t *)addr)->obj_next;
1476
1477 /* For the "obj_next" pointer in the buffer */
1478 oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof (u_int64_t));
1479 *oaddr64 = MCACHE_FREE_PATTERN;
1480
1481 if ((oaddr64 = mcache_verify_pattern(MCACHE_FREE_PATTERN,
1482 (caddr_t)base, size)) != NULL) {
1483 mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base,
1484 (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64);
1485 /* NOTREACHED */
1486 }
1487 ((mcache_obj_t *)addr)->obj_next = next;
1488 }
1489
1490 __private_extern__ void
1491 mcache_audit_free_verify_set(mcache_audit_t *mca, void *base, size_t offset,
1492 size_t size)
1493 {
1494 void *addr;
1495 u_int64_t *oaddr64;
1496 mcache_obj_t *next;
1497
1498 addr = (void *)((uintptr_t)base + offset);
1499 next = ((mcache_obj_t *)addr)->obj_next;
1500
1501 /* For the "obj_next" pointer in the buffer */
1502 oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof (u_int64_t));
1503 *oaddr64 = MCACHE_FREE_PATTERN;
1504
1505 if ((oaddr64 = mcache_verify_set_pattern(MCACHE_FREE_PATTERN,
1506 MCACHE_UNINITIALIZED_PATTERN, (caddr_t)base, size)) != NULL) {
1507 mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base,
1508 (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64);
1509 /* NOTREACHED */
1510 }
1511 ((mcache_obj_t *)addr)->obj_next = next;
1512 }
1513
1514 #undef panic
1515
1516 __private_extern__ char *
1517 mcache_dump_mca(mcache_audit_t *mca)
1518 {
1519 if (mca_dump_buf == NULL)
1520 return (NULL);
1521
1522 snprintf(mca_dump_buf, DUMP_MCA_BUF_SIZE,
1523 "mca %p: addr %p, cache %p (%s)\n"
1524 "last transaction; thread %p, saved PC stack (%d deep):\n"
1525 "\t%p, %p, %p, %p, %p, %p, %p, %p\n"
1526 "\t%p, %p, %p, %p, %p, %p, %p, %p\n"
1527 "previous transaction; thread %p, saved PC stack (%d deep):\n"
1528 "\t%p, %p, %p, %p, %p, %p, %p, %p\n"
1529 "\t%p, %p, %p, %p, %p, %p, %p, %p\n",
1530 mca, mca->mca_addr, mca->mca_cache,
1531 mca->mca_cache ? mca->mca_cache->mc_name : "?",
1532 mca->mca_thread, mca->mca_depth,
1533 mca->mca_stack[0], mca->mca_stack[1], mca->mca_stack[2],
1534 mca->mca_stack[3], mca->mca_stack[4], mca->mca_stack[5],
1535 mca->mca_stack[6], mca->mca_stack[7], mca->mca_stack[8],
1536 mca->mca_stack[9], mca->mca_stack[10], mca->mca_stack[11],
1537 mca->mca_stack[12], mca->mca_stack[13], mca->mca_stack[14],
1538 mca->mca_stack[15],
1539 mca->mca_pthread, mca->mca_pdepth,
1540 mca->mca_pstack[0], mca->mca_pstack[1], mca->mca_pstack[2],
1541 mca->mca_pstack[3], mca->mca_pstack[4], mca->mca_pstack[5],
1542 mca->mca_pstack[6], mca->mca_pstack[7], mca->mca_pstack[8],
1543 mca->mca_pstack[9], mca->mca_pstack[10], mca->mca_pstack[11],
1544 mca->mca_pstack[12], mca->mca_pstack[13], mca->mca_pstack[14],
1545 mca->mca_pstack[15]);
1546
1547 return (mca_dump_buf);
1548 }
1549
1550 __private_extern__ void
1551 mcache_audit_panic(mcache_audit_t *mca, void *addr, size_t offset,
1552 int64_t expected, int64_t got)
1553 {
1554 if (mca == NULL) {
1555 panic("mcache_audit: buffer %p modified after free at "
1556 "offset 0x%lx (0x%llx instead of 0x%llx)\n", addr,
1557 offset, got, expected);
1558 /* NOTREACHED */
1559 }
1560
1561 panic("mcache_audit: buffer %p modified after free at offset 0x%lx "
1562 "(0x%llx instead of 0x%llx)\n%s\n",
1563 addr, offset, got, expected, mcache_dump_mca(mca));
1564 /* NOTREACHED */
1565 }
1566
1567 __private_extern__ int
1568 assfail(const char *a, const char *f, int l)
1569 {
1570 panic("assertion failed: %s, file: %s, line: %d", a, f, l);
1571 return (0);
1572 }