]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/mcache.c
xnu-1228.tar.gz
[apple/xnu.git] / bsd / kern / mcache.c
1 /*
2 * Copyright (c) 2006-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Memory allocator with per-CPU caching, derived from the kmem magazine
31 * concept and implementation as described in the following paper:
32 * http://www.usenix.org/events/usenix01/full_papers/bonwick/bonwick.pdf
33 * That implementation is Copyright 2006 Sun Microsystems, Inc. All rights
34 * reserved. Use is subject to license terms.
35 *
36 * There are several major differences between this and the original kmem
37 * magazine: this derivative implementation allows for multiple objects to
38 * be allocated and freed from/to the object cache in one call; in addition,
39 * it provides for better flexibility where the user is allowed to define
40 * its own slab allocator (instead of the default zone allocator). Finally,
41 * no object construction/destruction takes place at the moment, although
42 * this could be added in future to improve efficiency.
43 */
44
45 #include <sys/param.h>
46 #include <sys/types.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/queue.h>
50 #include <sys/kernel.h>
51 #include <sys/systm.h>
52
53 #include <kern/debug.h>
54 #include <kern/zalloc.h>
55 #include <kern/cpu_number.h>
56 #include <kern/locks.h>
57
58 #include <libkern/libkern.h>
59 #include <libkern/OSAtomic.h>
60 #include <libkern/OSDebug.h>
61
62 #include <mach/vm_param.h>
63 #include <machine/limits.h>
64 #include <machine/machine_routines.h>
65
66 #include <string.h>
67
68 #include <sys/mcache.h>
69
70 #define MCACHE_SIZE(n) \
71 ((size_t)(&((mcache_t *)0)->mc_cpu[n]))
72
73 /* Allocate extra in case we need to manually align the pointer */
74 #define MCACHE_ALLOC_SIZE \
75 (sizeof (void *) + MCACHE_SIZE(ncpu) + CPU_CACHE_SIZE)
76
77 #define MCACHE_CPU(c) \
78 (mcache_cpu_t *)((char *)(c) + MCACHE_SIZE(cpu_number()))
79
80 /*
81 * MCACHE_LIST_LOCK() and MCACHE_LIST_UNLOCK() are macros used
82 * to serialize accesses to the global list of caches in the system.
83 * They also record the thread currently running in the critical
84 * section, so that we can avoid recursive requests to reap the
85 * caches when memory runs low.
86 */
87 #define MCACHE_LIST_LOCK() { \
88 lck_mtx_lock(mcache_llock); \
89 mcache_llock_owner = current_thread(); \
90 }
91
92 #define MCACHE_LIST_UNLOCK() { \
93 mcache_llock_owner = NULL; \
94 lck_mtx_unlock(mcache_llock); \
95 }
96
97 #define MCACHE_LOCK(l) lck_mtx_lock(l)
98 #define MCACHE_UNLOCK(l) lck_mtx_unlock(l)
99 #define MCACHE_LOCK_TRY(l) lck_mtx_try_lock(l)
100
101 /* This should be in a header file */
102 #define atomic_add_32(a, n) ((void) OSAddAtomic(n, (volatile SInt32 *)a))
103
104 static int ncpu;
105 static lck_mtx_t *mcache_llock;
106 static struct thread *mcache_llock_owner;
107 static lck_attr_t *mcache_llock_attr;
108 static lck_grp_t *mcache_llock_grp;
109 static lck_grp_attr_t *mcache_llock_grp_attr;
110 static struct zone *mcache_zone;
111 static unsigned int mcache_reap_interval;
112 static UInt32 mcache_reaping;
113 static int mcache_ready;
114 static int mcache_updating;
115
116 static int mcache_bkt_contention = 3;
117 #if DEBUG
118 static unsigned int mcache_flags = MCF_DEBUG;
119 #else
120 static unsigned int mcache_flags = 0;
121 #endif
122
123 #define DUMP_MCA_BUF_SIZE 512
124 static char *mca_dump_buf;
125
126 static mcache_bkttype_t mcache_bkttype[] = {
127 { 1, 4096, 32768, NULL },
128 { 3, 2048, 16384, NULL },
129 { 7, 1024, 12288, NULL },
130 { 15, 256, 8192, NULL },
131 { 31, 64, 4096, NULL },
132 { 47, 0, 2048, NULL },
133 { 63, 0, 1024, NULL },
134 { 95, 0, 512, NULL },
135 { 143, 0, 256, NULL },
136 { 165, 0, 0, NULL },
137 };
138
139 static mcache_t *mcache_create_common(const char *, size_t, size_t,
140 mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_notifyfn_t,
141 void *, u_int32_t, int, int);
142 static unsigned int mcache_slab_alloc(void *, mcache_obj_t ***,
143 unsigned int, int);
144 static void mcache_slab_free(void *, mcache_obj_t *, boolean_t);
145 static void mcache_slab_audit(void *, mcache_obj_t *, boolean_t);
146 static void mcache_cpu_refill(mcache_cpu_t *, mcache_bkt_t *, int);
147 static mcache_bkt_t *mcache_bkt_alloc(mcache_t *, mcache_bktlist_t *,
148 mcache_bkttype_t **);
149 static void mcache_bkt_free(mcache_t *, mcache_bktlist_t *, mcache_bkt_t *);
150 static void mcache_cache_bkt_enable(mcache_t *);
151 static void mcache_bkt_purge(mcache_t *);
152 static void mcache_bkt_destroy(mcache_t *, mcache_bkttype_t *,
153 mcache_bkt_t *, int);
154 static void mcache_bkt_ws_update(mcache_t *);
155 static void mcache_bkt_ws_reap(mcache_t *);
156 static void mcache_dispatch(void (*)(void *), void *);
157 static void mcache_cache_reap(mcache_t *);
158 static void mcache_cache_update(mcache_t *);
159 static void mcache_cache_bkt_resize(void *);
160 static void mcache_cache_enable(void *);
161 static void mcache_update(void *);
162 static void mcache_update_timeout(void *);
163 static void mcache_applyall(void (*)(mcache_t *));
164 static void mcache_reap_start(void *);
165 static void mcache_reap_done(void *);
166 static void mcache_reap_timeout(void *);
167 static void mcache_notify(mcache_t *, u_int32_t);
168 static void mcache_purge(void *);
169
170 static LIST_HEAD(, mcache) mcache_head;
171 mcache_t *mcache_audit_cache;
172
173 /*
174 * Initialize the framework; this is currently called as part of BSD init.
175 */
176 __private_extern__ void
177 mcache_init(void)
178 {
179 mcache_bkttype_t *btp;
180 unsigned int i;
181 char name[32];
182
183 ncpu = ml_get_max_cpus();
184
185 mcache_llock_grp_attr = lck_grp_attr_alloc_init();
186 mcache_llock_grp = lck_grp_alloc_init("mcache.list",
187 mcache_llock_grp_attr);
188 mcache_llock_attr = lck_attr_alloc_init();
189 mcache_llock = lck_mtx_alloc_init(mcache_llock_grp, mcache_llock_attr);
190
191 mcache_zone = zinit(MCACHE_ALLOC_SIZE, 256 * MCACHE_ALLOC_SIZE,
192 PAGE_SIZE, "mcache");
193 if (mcache_zone == NULL)
194 panic("mcache_init: failed to allocate mcache zone\n");
195
196 LIST_INIT(&mcache_head);
197
198 for (i = 0; i < sizeof (mcache_bkttype) / sizeof (*btp); i++) {
199 btp = &mcache_bkttype[i];
200 (void) snprintf(name, sizeof (name), "bkt_%d",
201 btp->bt_bktsize);
202 btp->bt_cache = mcache_create(name,
203 (btp->bt_bktsize + 1) * sizeof (void *), 0, 0, MCR_SLEEP);
204 }
205
206 PE_parse_boot_arg("mcache_flags", &mcache_flags);
207 mcache_flags &= MCF_FLAGS_MASK;
208
209 mcache_audit_cache = mcache_create("audit", sizeof (mcache_audit_t),
210 0, 0, MCR_SLEEP);
211
212 mcache_reap_interval = 15 * hz;
213 mcache_applyall(mcache_cache_bkt_enable);
214 mcache_ready = 1;
215 }
216
217 /*
218 * Return the global mcache flags.
219 */
220 __private_extern__ unsigned int
221 mcache_getflags(void)
222 {
223 return (mcache_flags);
224 }
225
226 /*
227 * Create a cache using the zone allocator as the backend slab allocator.
228 * The caller may specify any alignment for the object; if it specifies 0
229 * the default alignment (MCACHE_ALIGN) will be used.
230 */
231 __private_extern__ mcache_t *
232 mcache_create(const char *name, size_t bufsize, size_t align,
233 u_int32_t flags, int wait)
234 {
235 return (mcache_create_common(name, bufsize, align, mcache_slab_alloc,
236 mcache_slab_free, mcache_slab_audit, NULL, NULL, flags, 1, wait));
237 }
238
239 /*
240 * Create a cache using a custom backend slab allocator. Since the caller
241 * is responsible for allocation, no alignment guarantee will be provided
242 * by this framework.
243 */
244 __private_extern__ mcache_t *
245 mcache_create_ext(const char *name, size_t bufsize,
246 mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn,
247 mcache_notifyfn_t notifyfn, void *arg, u_int32_t flags, int wait)
248 {
249 return (mcache_create_common(name, bufsize, 0, allocfn,
250 freefn, auditfn, notifyfn, arg, flags, 0, wait));
251 }
252
253 /*
254 * Common cache creation routine.
255 */
256 static mcache_t *
257 mcache_create_common(const char *name, size_t bufsize, size_t align,
258 mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn,
259 mcache_notifyfn_t notifyfn, void *arg, u_int32_t flags, int need_zone,
260 int wait)
261 {
262 mcache_bkttype_t *btp;
263 mcache_t *cp = NULL;
264 size_t chunksize;
265 void *buf, **pbuf;
266 int c;
267 char lck_name[64];
268
269 /* If auditing is on and print buffer is NULL, allocate it now */
270 if ((flags & MCF_AUDIT) && mca_dump_buf == NULL) {
271 int malloc_wait = (wait & MCR_NOSLEEP) ? M_NOWAIT : M_WAITOK;
272 MALLOC(mca_dump_buf, char *, DUMP_MCA_BUF_SIZE, M_TEMP,
273 malloc_wait | M_ZERO);
274 if (mca_dump_buf == NULL)
275 return (NULL);
276 }
277
278 if (!(wait & MCR_NOSLEEP))
279 buf = zalloc(mcache_zone);
280 else
281 buf = zalloc_noblock(mcache_zone);
282
283 if (buf == NULL)
284 goto fail;
285
286 bzero(buf, MCACHE_ALLOC_SIZE);
287
288 /*
289 * In case we didn't get a cache-aligned memory, round it up
290 * accordingly. This is needed in order to get the rest of
291 * structure members aligned properly. It also means that
292 * the memory span gets shifted due to the round up, but it
293 * is okay since we've allocated extra space for this.
294 */
295 cp = (mcache_t *)
296 P2ROUNDUP((intptr_t)buf + sizeof (void *), CPU_CACHE_SIZE);
297 pbuf = (void **)((intptr_t)cp - sizeof (void *));
298 *pbuf = buf;
299
300 /*
301 * Guaranteed alignment is valid only when we use the internal
302 * slab allocator (currently set to use the zone allocator).
303 */
304 if (!need_zone)
305 align = 1;
306 else if (align == 0)
307 align = MCACHE_ALIGN;
308
309 if ((align & (align - 1)) != 0)
310 panic("mcache_create: bad alignment %lu", align);
311
312 cp->mc_align = align;
313 cp->mc_slab_alloc = allocfn;
314 cp->mc_slab_free = freefn;
315 cp->mc_slab_audit = auditfn;
316 cp->mc_slab_notify = notifyfn;
317 cp->mc_private = need_zone ? cp : arg;
318 cp->mc_bufsize = bufsize;
319 cp->mc_flags = (flags & MCF_FLAGS_MASK) | mcache_flags;
320
321 (void) snprintf(cp->mc_name, sizeof (cp->mc_name), "mcache.%s", name);
322
323 (void) snprintf(lck_name, sizeof (lck_name), "%s.cpu", cp->mc_name);
324 cp->mc_cpu_lock_grp_attr = lck_grp_attr_alloc_init();
325 cp->mc_cpu_lock_grp = lck_grp_alloc_init(lck_name,
326 cp->mc_cpu_lock_grp_attr);
327 cp->mc_cpu_lock_attr = lck_attr_alloc_init();
328
329 /*
330 * Allocation chunk size is the object's size plus any extra size
331 * needed to satisfy the object's alignment. It is enforced to be
332 * at least the size of an LP64 pointer to simplify auditing and to
333 * handle multiple-element allocation requests, where the elements
334 * returned are linked together in a list.
335 */
336 chunksize = MAX(bufsize, sizeof (u_int64_t));
337 if (need_zone) {
338 /* Enforce 64-bit minimum alignment for zone-based buffers */
339 align = MAX(align, sizeof (u_int64_t));
340 chunksize += sizeof (void *) + align;
341 chunksize = P2ROUNDUP(chunksize, align);
342 if ((cp->mc_slab_zone = zinit(chunksize, 64 * 1024 * ncpu,
343 PAGE_SIZE, cp->mc_name)) == NULL)
344 goto fail;
345 zone_change(cp->mc_slab_zone, Z_EXPAND, TRUE);
346 }
347 cp->mc_chunksize = chunksize;
348
349 /*
350 * Initialize the bucket layer.
351 */
352 (void) snprintf(lck_name, sizeof (lck_name), "%s.bkt", cp->mc_name);
353 cp->mc_bkt_lock_grp_attr = lck_grp_attr_alloc_init();
354 cp->mc_bkt_lock_grp = lck_grp_alloc_init(lck_name,
355 cp->mc_bkt_lock_grp_attr);
356 cp->mc_bkt_lock_attr = lck_attr_alloc_init();
357 lck_mtx_init(&cp->mc_bkt_lock, cp->mc_bkt_lock_grp,
358 cp->mc_bkt_lock_attr);
359
360 (void) snprintf(lck_name, sizeof (lck_name), "%s.sync", cp->mc_name);
361 cp->mc_sync_lock_grp_attr = lck_grp_attr_alloc_init();
362 cp->mc_sync_lock_grp = lck_grp_alloc_init(lck_name,
363 cp->mc_sync_lock_grp_attr);
364 cp->mc_sync_lock_attr = lck_attr_alloc_init();
365 lck_mtx_init(&cp->mc_sync_lock, cp->mc_sync_lock_grp,
366 cp->mc_sync_lock_attr);
367
368 for (btp = mcache_bkttype; chunksize <= btp->bt_minbuf; btp++)
369 continue;
370
371 cp->cache_bkttype = btp;
372
373 /*
374 * Initialize the CPU layer. Each per-CPU structure is aligned
375 * on the CPU cache line boundary to prevent false sharing.
376 */
377 for (c = 0; c < ncpu; c++) {
378 mcache_cpu_t *ccp = &cp->mc_cpu[c];
379
380 VERIFY(IS_P2ALIGNED(ccp, CPU_CACHE_SIZE));
381 lck_mtx_init(&ccp->cc_lock, cp->mc_cpu_lock_grp,
382 cp->mc_cpu_lock_attr);
383 ccp->cc_objs = -1;
384 ccp->cc_pobjs = -1;
385 }
386
387 if (mcache_ready)
388 mcache_cache_bkt_enable(cp);
389
390 /* TODO: dynamically create sysctl for stats */
391
392 MCACHE_LIST_LOCK();
393 LIST_INSERT_HEAD(&mcache_head, cp, mc_list);
394 MCACHE_LIST_UNLOCK();
395
396 /*
397 * If cache buckets are enabled and this is the first cache
398 * created, start the periodic cache update.
399 */
400 if (!(mcache_flags & MCF_NOCPUCACHE) && !mcache_updating) {
401 mcache_updating = 1;
402 mcache_update_timeout(NULL);
403 }
404 if (cp->mc_flags & MCF_DEBUG) {
405 printf("mcache_create: %s (%s) arg %p bufsize %lu align %lu "
406 "chunksize %lu bktsize %d\n", name, need_zone ? "i" : "e",
407 arg, bufsize, cp->mc_align, chunksize, btp->bt_bktsize);
408 }
409 return (cp);
410
411 fail:
412 if (buf != NULL)
413 zfree(mcache_zone, buf);
414 return (NULL);
415 }
416
417 /*
418 * Allocate one or more objects from a cache.
419 */
420 __private_extern__ unsigned int
421 mcache_alloc_ext(mcache_t *cp, mcache_obj_t **list, unsigned int num, int wait)
422 {
423 mcache_cpu_t *ccp;
424 mcache_obj_t **top = &(*list);
425 mcache_bkt_t *bkt;
426 unsigned int need = num;
427 boolean_t nwretry = FALSE;
428
429 /* MCR_NOSLEEP and MCR_FAILOK are mutually exclusive */
430 VERIFY((wait & (MCR_NOSLEEP|MCR_FAILOK)) != (MCR_NOSLEEP|MCR_FAILOK));
431
432 ASSERT(list != NULL);
433 *list = NULL;
434
435 if (num == 0)
436 return (0);
437
438 retry_alloc:
439 /* We may not always be running in the same CPU in case of retries */
440 ccp = MCACHE_CPU(cp);
441
442 MCACHE_LOCK(&ccp->cc_lock);
443 for (;;) {
444 /*
445 * If we have an object in the current CPU's filled bucket,
446 * chain the object to any previous objects and return if
447 * we've satisfied the number of requested objects.
448 */
449 if (ccp->cc_objs > 0) {
450 mcache_obj_t *tail;
451 int objs;
452
453 /*
454 * Objects in the bucket are already linked together
455 * with the most recently freed object at the head of
456 * the list; grab as many objects as we can.
457 */
458 objs = MIN((unsigned int)ccp->cc_objs, need);
459 *list = ccp->cc_filled->bkt_obj[ccp->cc_objs - 1];
460 ccp->cc_objs -= objs;
461 ccp->cc_alloc += objs;
462
463 tail = ccp->cc_filled->bkt_obj[ccp->cc_objs];
464 list = &tail->obj_next;
465 *list = NULL;
466
467 /* If we got them all, return to caller */
468 if ((need -= objs) == 0) {
469 MCACHE_UNLOCK(&ccp->cc_lock);
470 if (cp->mc_flags & MCF_DEBUG)
471 goto debug_alloc;
472
473 return (num);
474 }
475 }
476
477 /*
478 * The CPU's filled bucket is empty. If the previous filled
479 * bucket was full, exchange and try again.
480 */
481 if (ccp->cc_pobjs > 0) {
482 mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs);
483 continue;
484 }
485
486 /*
487 * If the bucket layer is disabled, allocate from slab. This
488 * can happen either because MCF_NOCPUCACHE is set, or because
489 * the bucket layer is currently being resized.
490 */
491 if (ccp->cc_bktsize == 0)
492 break;
493
494 /*
495 * Both of the CPU's buckets are empty; try to get a full
496 * bucket from the bucket layer. Upon success, refill this
497 * CPU and place any empty bucket into the empty list.
498 */
499 bkt = mcache_bkt_alloc(cp, &cp->mc_full, NULL);
500 if (bkt != NULL) {
501 if (ccp->cc_pfilled != NULL)
502 mcache_bkt_free(cp, &cp->mc_empty,
503 ccp->cc_pfilled);
504 mcache_cpu_refill(ccp, bkt, ccp->cc_bktsize);
505 continue;
506 }
507
508 /*
509 * The bucket layer has no full buckets; allocate the
510 * object(s) directly from the slab layer.
511 */
512 break;
513 }
514 MCACHE_UNLOCK(&ccp->cc_lock);
515
516 need -= (*cp->mc_slab_alloc)(cp->mc_private, &list, need, wait);
517
518 /*
519 * If this is a blocking allocation, or if it is non-blocking and
520 * the cache's full bucket is non-empty, then retry the allocation.
521 */
522 if (need > 0) {
523 if (!(wait & MCR_NONBLOCKING)) {
524 atomic_add_32(&cp->mc_wretry_cnt, 1);
525 goto retry_alloc;
526 } else if ((wait & (MCR_NOSLEEP | MCR_TRYHARD)) &&
527 !mcache_bkt_isempty(cp)) {
528 if (!nwretry)
529 nwretry = TRUE;
530 atomic_add_32(&cp->mc_nwretry_cnt, 1);
531 goto retry_alloc;
532 } else if (nwretry) {
533 atomic_add_32(&cp->mc_nwfail_cnt, 1);
534 }
535 }
536
537 if (!(cp->mc_flags & MCF_DEBUG))
538 return (num - need);
539
540 debug_alloc:
541 if (cp->mc_flags & MCF_VERIFY) {
542 mcache_obj_t **o = top;
543 unsigned int n;
544
545 n = 0;
546 /*
547 * Verify that the chain of objects have the same count as
548 * what we are about to report to the caller. Any mismatch
549 * here means that the object list is insanely broken and
550 * therefore we must panic.
551 */
552 while (*o != NULL) {
553 o = &(*o)->obj_next;
554 ++n;
555 }
556 if (n != (num - need)) {
557 panic("mcache_alloc_ext: %s cp %p corrupted list "
558 "(got %d actual %d)\n", cp->mc_name,
559 (void *)cp, num - need, n);
560 }
561 }
562
563 /* Invoke the slab layer audit callback if auditing is enabled */
564 if ((cp->mc_flags & MCF_AUDIT) && cp->mc_slab_audit != NULL)
565 (*cp->mc_slab_audit)(cp->mc_private, *top, TRUE);
566
567 return (num - need);
568 }
569
570 /*
571 * Allocate a single object from a cache.
572 */
573 __private_extern__ void *
574 mcache_alloc(mcache_t *cp, int wait)
575 {
576 mcache_obj_t *buf;
577
578 (void) mcache_alloc_ext(cp, &buf, 1, wait);
579 return (buf);
580 }
581
582 __private_extern__ void
583 mcache_waiter_inc(mcache_t *cp)
584 {
585 atomic_add_32(&cp->mc_waiter_cnt, 1);
586 }
587
588 __private_extern__ void
589 mcache_waiter_dec(mcache_t *cp)
590 {
591 atomic_add_32(&cp->mc_waiter_cnt, -1);
592 }
593
594 __private_extern__ boolean_t
595 mcache_bkt_isempty(mcache_t *cp)
596 {
597 /*
598 * This isn't meant to accurately tell whether there are
599 * any full buckets in the cache; it is simply a way to
600 * obtain "hints" about the state of the cache.
601 */
602 return (cp->mc_full.bl_total == 0);
603 }
604
605 /*
606 * Notify the slab layer about an event.
607 */
608 static void
609 mcache_notify(mcache_t *cp, u_int32_t event)
610 {
611 if (cp->mc_slab_notify != NULL)
612 (*cp->mc_slab_notify)(cp->mc_private, event);
613 }
614
615 /*
616 * Purge the cache and disable its buckets.
617 */
618 static void
619 mcache_purge(void *arg)
620 {
621 mcache_t *cp = arg;
622
623 mcache_bkt_purge(cp);
624 /*
625 * We cannot simply call mcache_cache_bkt_enable() from here as
626 * a bucket resize may be in flight and we would cause the CPU
627 * layers of the cache to point to different sizes. Therefore,
628 * we simply increment the enable count so that during the next
629 * periodic cache update the buckets can be reenabled.
630 */
631 lck_mtx_lock_spin(&cp->mc_sync_lock);
632 cp->mc_enable_cnt++;
633 lck_mtx_unlock(&cp->mc_sync_lock);
634
635 }
636
637 __private_extern__ boolean_t
638 mcache_purge_cache(mcache_t *cp)
639 {
640 /*
641 * Purging a cache that has no per-CPU caches or is already
642 * in the process of being purged is rather pointless.
643 */
644 if (cp->mc_flags & MCF_NOCPUCACHE)
645 return (FALSE);
646
647 lck_mtx_lock_spin(&cp->mc_sync_lock);
648 if (cp->mc_purge_cnt > 0) {
649 lck_mtx_unlock(&cp->mc_sync_lock);
650 return (FALSE);
651 }
652 cp->mc_purge_cnt++;
653 lck_mtx_unlock(&cp->mc_sync_lock);
654
655 mcache_dispatch(mcache_purge, cp);
656
657 return (TRUE);
658 }
659
660 /*
661 * Free a single object to a cache.
662 */
663 __private_extern__ void
664 mcache_free(mcache_t *cp, void *buf)
665 {
666 ((mcache_obj_t *)buf)->obj_next = NULL;
667 mcache_free_ext(cp, (mcache_obj_t *)buf);
668 }
669
670 /*
671 * Free one or more objects to a cache.
672 */
673 __private_extern__ void
674 mcache_free_ext(mcache_t *cp, mcache_obj_t *list)
675 {
676 mcache_cpu_t *ccp = MCACHE_CPU(cp);
677 mcache_bkttype_t *btp;
678 mcache_obj_t *nlist;
679 mcache_bkt_t *bkt;
680
681 /* Invoke the slab layer audit callback if auditing is enabled */
682 if ((cp->mc_flags & MCF_AUDIT) && cp->mc_slab_audit != NULL)
683 (*cp->mc_slab_audit)(cp->mc_private, list, FALSE);
684
685 MCACHE_LOCK(&ccp->cc_lock);
686 for (;;) {
687 /*
688 * If there is space in the current CPU's filled bucket, put
689 * the object there and return once all objects are freed.
690 * Note the cast to unsigned integer takes care of the case
691 * where the bucket layer is disabled (when cc_objs is -1).
692 */
693 if ((unsigned int)ccp->cc_objs <
694 (unsigned int)ccp->cc_bktsize) {
695 /*
696 * Reverse the list while we place the object into the
697 * bucket; this effectively causes the most recently
698 * freed object(s) to be reused during allocation.
699 */
700 nlist = list->obj_next;
701 list->obj_next = (ccp->cc_objs == 0) ? NULL :
702 ccp->cc_filled->bkt_obj[ccp->cc_objs - 1];
703 ccp->cc_filled->bkt_obj[ccp->cc_objs++] = list;
704 ccp->cc_free++;
705
706 if ((list = nlist) != NULL)
707 continue;
708
709 /* We are done; return to caller */
710 MCACHE_UNLOCK(&ccp->cc_lock);
711
712 /* If there is a waiter below, notify it */
713 if (cp->mc_waiter_cnt > 0)
714 mcache_notify(cp, MCN_RETRYALLOC);
715 return;
716 }
717
718 /*
719 * The CPU's filled bucket is full. If the previous filled
720 * bucket was empty, exchange and try again.
721 */
722 if (ccp->cc_pobjs == 0) {
723 mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs);
724 continue;
725 }
726
727 /*
728 * If the bucket layer is disabled, free to slab. This can
729 * happen either because MCF_NOCPUCACHE is set, or because
730 * the bucket layer is currently being resized.
731 */
732 if (ccp->cc_bktsize == 0)
733 break;
734
735 /*
736 * Both of the CPU's buckets are full; try to get an empty
737 * bucket from the bucket layer. Upon success, empty this
738 * CPU and place any full bucket into the full list.
739 */
740 bkt = mcache_bkt_alloc(cp, &cp->mc_empty, &btp);
741 if (bkt != NULL) {
742 if (ccp->cc_pfilled != NULL)
743 mcache_bkt_free(cp, &cp->mc_full,
744 ccp->cc_pfilled);
745 mcache_cpu_refill(ccp, bkt, 0);
746 continue;
747 }
748
749 /*
750 * We need an empty bucket to put our freed objects into
751 * but couldn't get an empty bucket from the bucket layer;
752 * attempt to allocate one. We do not want to block for
753 * allocation here, and if the bucket allocation fails
754 * we will simply fall through to the slab layer.
755 */
756 MCACHE_UNLOCK(&ccp->cc_lock);
757 bkt = mcache_alloc(btp->bt_cache, MCR_NOSLEEP);
758 MCACHE_LOCK(&ccp->cc_lock);
759
760 if (bkt != NULL) {
761 /*
762 * We have an empty bucket, but since we drop the
763 * CPU lock above, the cache's bucket size may have
764 * changed. If so, free the bucket and try again.
765 */
766 if (ccp->cc_bktsize != btp->bt_bktsize) {
767 MCACHE_UNLOCK(&ccp->cc_lock);
768 mcache_free(btp->bt_cache, bkt);
769 MCACHE_LOCK(&ccp->cc_lock);
770 continue;
771 }
772
773 /*
774 * We have an empty bucket of the right size;
775 * add it to the bucket layer and try again.
776 */
777 mcache_bkt_free(cp, &cp->mc_empty, bkt);
778 continue;
779 }
780
781 /*
782 * The bucket layer has no empty buckets; free the
783 * object(s) directly to the slab layer.
784 */
785 break;
786 }
787 MCACHE_UNLOCK(&ccp->cc_lock);
788
789 /* If there is a waiter below, notify it */
790 if (cp->mc_waiter_cnt > 0)
791 mcache_notify(cp, MCN_RETRYALLOC);
792
793 /* Advise the slab layer to purge the object(s) */
794 (*cp->mc_slab_free)(cp->mc_private, list,
795 (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt);
796 }
797
798 /*
799 * Cache destruction routine.
800 */
801 __private_extern__ void
802 mcache_destroy(mcache_t *cp)
803 {
804 void **pbuf;
805
806 MCACHE_LIST_LOCK();
807 LIST_REMOVE(cp, mc_list);
808 MCACHE_LIST_UNLOCK();
809
810 mcache_bkt_purge(cp);
811
812 /*
813 * This cache is dead; there should be no further transaction.
814 * If it's still invoked, make sure that it induces a fault.
815 */
816 cp->mc_slab_alloc = NULL;
817 cp->mc_slab_free = NULL;
818 cp->mc_slab_audit = NULL;
819
820 lck_attr_free(cp->mc_bkt_lock_attr);
821 lck_grp_free(cp->mc_bkt_lock_grp);
822 lck_grp_attr_free(cp->mc_bkt_lock_grp_attr);
823
824 lck_attr_free(cp->mc_cpu_lock_attr);
825 lck_grp_free(cp->mc_cpu_lock_grp);
826 lck_grp_attr_free(cp->mc_cpu_lock_grp_attr);
827
828 lck_attr_free(cp->mc_sync_lock_attr);
829 lck_grp_free(cp->mc_sync_lock_grp);
830 lck_grp_attr_free(cp->mc_sync_lock_grp_attr);
831
832 /*
833 * TODO: We need to destroy the zone here, but cannot do it
834 * because there is no such way to achieve that. Until then
835 * the memory allocated for the zone structure is leaked.
836 * Once it is achievable, uncomment these lines:
837 *
838 * if (cp->mc_slab_zone != NULL) {
839 * zdestroy(cp->mc_slab_zone);
840 * cp->mc_slab_zone = NULL;
841 * }
842 */
843
844 /* Get the original address since we're about to free it */
845 pbuf = (void **)((intptr_t)cp - sizeof (void *));
846
847 zfree(mcache_zone, *pbuf);
848 }
849
850 /*
851 * Internal slab allocator used as a backend for simple caches. The current
852 * implementation uses the zone allocator for simplicity reasons.
853 */
854 static unsigned int
855 mcache_slab_alloc(void *arg, mcache_obj_t ***plist, unsigned int num, int wait)
856 {
857 mcache_t *cp = arg;
858 unsigned int need = num;
859 size_t offset = 0;
860 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t));
861 u_int32_t flags = cp->mc_flags;
862 void *buf, *base, **pbuf;
863 mcache_obj_t **list = *plist;
864
865 *list = NULL;
866
867 /*
868 * The address of the object returned to the caller is an
869 * offset from the 64-bit aligned base address only if the
870 * cache's alignment requirement is neither 1 nor 8 bytes.
871 */
872 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t))
873 offset = cp->mc_align;
874
875 for (;;) {
876 if (!(wait & MCR_NOSLEEP))
877 buf = zalloc(cp->mc_slab_zone);
878 else
879 buf = zalloc_noblock(cp->mc_slab_zone);
880
881 if (buf == NULL)
882 break;
883
884 /* Get the 64-bit aligned base address for this object */
885 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
886 sizeof (u_int64_t));
887
888 /*
889 * Wind back a pointer size from the aligned base and
890 * save the original address so we can free it later.
891 */
892 pbuf = (void **)((intptr_t)base - sizeof (void *));
893 *pbuf = buf;
894
895 /*
896 * If auditing is enabled, patternize the contents of
897 * the buffer starting from the 64-bit aligned base to
898 * the end of the buffer; the length is rounded up to
899 * the nearest 64-bit multiply; this is because we use
900 * 64-bit memory access to set/check the pattern.
901 */
902 if (flags & MCF_AUDIT) {
903 VERIFY(((intptr_t)base + rsize) <=
904 ((intptr_t)buf + cp->mc_chunksize));
905 mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize);
906 }
907
908 /*
909 * Fix up the object's address to fulfill the cache's
910 * alignment requirement (if needed) and return this
911 * to the caller.
912 */
913 VERIFY(((intptr_t)base + offset + cp->mc_bufsize) <=
914 ((intptr_t)buf + cp->mc_chunksize));
915 *list = (mcache_obj_t *)((intptr_t)base + offset);
916
917 (*list)->obj_next = NULL;
918 list = *plist = &(*list)->obj_next;
919
920 /* If we got them all, return to mcache */
921 if (--need == 0)
922 break;
923 }
924
925 return (num - need);
926 }
927
928 /*
929 * Internal slab deallocator used as a backend for simple caches.
930 */
931 static void
932 mcache_slab_free(void *arg, mcache_obj_t *list, __unused boolean_t purged)
933 {
934 mcache_t *cp = arg;
935 mcache_obj_t *nlist;
936 size_t offset = 0;
937 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t));
938 u_int32_t flags = cp->mc_flags;
939 void *base;
940 void **pbuf;
941
942 /*
943 * The address of the object is an offset from a 64-bit
944 * aligned base address only if the cache's alignment
945 * requirement is neither 1 nor 8 bytes.
946 */
947 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t))
948 offset = cp->mc_align;
949
950 for (;;) {
951 nlist = list->obj_next;
952 list->obj_next = NULL;
953
954 /* Get the 64-bit aligned base address of this object */
955 base = (void *)((intptr_t)list - offset);
956 VERIFY(IS_P2ALIGNED(base, sizeof (u_int64_t)));
957
958 /* Get the original address since we're about to free it */
959 pbuf = (void **)((intptr_t)base - sizeof (void *));
960
961 if (flags & MCF_AUDIT) {
962 VERIFY(((intptr_t)base + rsize) <=
963 ((intptr_t)*pbuf + cp->mc_chunksize));
964 mcache_audit_free_verify(NULL, base, offset, rsize);
965 }
966
967 /* Free it to zone */
968 VERIFY(((intptr_t)base + offset + cp->mc_bufsize) <=
969 ((intptr_t)*pbuf + cp->mc_chunksize));
970 zfree(cp->mc_slab_zone, *pbuf);
971
972 /* No more objects to free; return to mcache */
973 if ((list = nlist) == NULL)
974 break;
975 }
976 }
977
978 /*
979 * Internal slab auditor for simple caches.
980 */
981 static void
982 mcache_slab_audit(void *arg, mcache_obj_t *list, boolean_t alloc)
983 {
984 mcache_t *cp = arg;
985 size_t offset = 0;
986 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t));
987 void *base, **pbuf;
988
989 /*
990 * The address of the object returned to the caller is an
991 * offset from the 64-bit aligned base address only if the
992 * cache's alignment requirement is neither 1 nor 8 bytes.
993 */
994 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t))
995 offset = cp->mc_align;
996
997 while (list != NULL) {
998 mcache_obj_t *next = list->obj_next;
999
1000 /* Get the 64-bit aligned base address of this object */
1001 base = (void *)((intptr_t)list - offset);
1002 VERIFY(IS_P2ALIGNED(base, sizeof (u_int64_t)));
1003
1004 /* Get the original address */
1005 pbuf = (void **)((intptr_t)base - sizeof (void *));
1006
1007 VERIFY(((intptr_t)base + rsize) <=
1008 ((intptr_t)*pbuf + cp->mc_chunksize));
1009
1010 if (!alloc)
1011 mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize);
1012 else
1013 mcache_audit_free_verify_set(NULL, base, offset, rsize);
1014
1015 list = list->obj_next = next;
1016 }
1017 }
1018
1019 /*
1020 * Refill the CPU's filled bucket with bkt and save the previous one.
1021 */
1022 static void
1023 mcache_cpu_refill(mcache_cpu_t *ccp, mcache_bkt_t *bkt, int objs)
1024 {
1025 ASSERT((ccp->cc_filled == NULL && ccp->cc_objs == -1) ||
1026 (ccp->cc_filled && ccp->cc_objs + objs == ccp->cc_bktsize));
1027 ASSERT(ccp->cc_bktsize > 0);
1028
1029 ccp->cc_pfilled = ccp->cc_filled;
1030 ccp->cc_pobjs = ccp->cc_objs;
1031 ccp->cc_filled = bkt;
1032 ccp->cc_objs = objs;
1033 }
1034
1035 /*
1036 * Allocate a bucket from the bucket layer.
1037 */
1038 static mcache_bkt_t *
1039 mcache_bkt_alloc(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkttype_t **btp)
1040 {
1041 mcache_bkt_t *bkt;
1042
1043 if (!MCACHE_LOCK_TRY(&cp->mc_bkt_lock)) {
1044 /*
1045 * The bucket layer lock is held by another CPU; increase
1046 * the contention count so that we can later resize the
1047 * bucket size accordingly.
1048 */
1049 MCACHE_LOCK(&cp->mc_bkt_lock);
1050 cp->mc_bkt_contention++;
1051 }
1052
1053 if ((bkt = blp->bl_list) != NULL) {
1054 blp->bl_list = bkt->bkt_next;
1055 if (--blp->bl_total < blp->bl_min)
1056 blp->bl_min = blp->bl_total;
1057 blp->bl_alloc++;
1058 }
1059
1060 if (btp != NULL)
1061 *btp = cp->cache_bkttype;
1062
1063 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1064
1065 return (bkt);
1066 }
1067
1068 /*
1069 * Free a bucket to the bucket layer.
1070 */
1071 static void
1072 mcache_bkt_free(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkt_t *bkt)
1073 {
1074 MCACHE_LOCK(&cp->mc_bkt_lock);
1075
1076 bkt->bkt_next = blp->bl_list;
1077 blp->bl_list = bkt;
1078 blp->bl_total++;
1079
1080 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1081 }
1082
1083 /*
1084 * Enable the bucket layer of a cache.
1085 */
1086 static void
1087 mcache_cache_bkt_enable(mcache_t *cp)
1088 {
1089 mcache_cpu_t *ccp;
1090 int cpu;
1091
1092 if (cp->mc_flags & MCF_NOCPUCACHE)
1093 return;
1094
1095 for (cpu = 0; cpu < ncpu; cpu++) {
1096 ccp = &cp->mc_cpu[cpu];
1097 MCACHE_LOCK(&ccp->cc_lock);
1098 ccp->cc_bktsize = cp->cache_bkttype->bt_bktsize;
1099 MCACHE_UNLOCK(&ccp->cc_lock);
1100 }
1101 }
1102
1103 /*
1104 * Purge all buckets from a cache and disable its bucket layer.
1105 */
1106 static void
1107 mcache_bkt_purge(mcache_t *cp)
1108 {
1109 mcache_cpu_t *ccp;
1110 mcache_bkt_t *bp, *pbp;
1111 mcache_bkttype_t *btp;
1112 int cpu, objs, pobjs;
1113
1114 for (cpu = 0; cpu < ncpu; cpu++) {
1115 ccp = &cp->mc_cpu[cpu];
1116
1117 MCACHE_LOCK(&ccp->cc_lock);
1118
1119 btp = cp->cache_bkttype;
1120 bp = ccp->cc_filled;
1121 pbp = ccp->cc_pfilled;
1122 objs = ccp->cc_objs;
1123 pobjs = ccp->cc_pobjs;
1124 ccp->cc_filled = NULL;
1125 ccp->cc_pfilled = NULL;
1126 ccp->cc_objs = -1;
1127 ccp->cc_pobjs = -1;
1128 ccp->cc_bktsize = 0;
1129
1130 MCACHE_UNLOCK(&ccp->cc_lock);
1131
1132 if (bp != NULL)
1133 mcache_bkt_destroy(cp, btp, bp, objs);
1134 if (pbp != NULL)
1135 mcache_bkt_destroy(cp, btp, pbp, pobjs);
1136 }
1137
1138 /*
1139 * Updating the working set back to back essentially sets
1140 * the working set size to zero, so everything is reapable.
1141 */
1142 mcache_bkt_ws_update(cp);
1143 mcache_bkt_ws_update(cp);
1144
1145 mcache_bkt_ws_reap(cp);
1146 }
1147
1148 /*
1149 * Free one or more objects in the bucket to the slab layer,
1150 * and also free the bucket itself.
1151 */
1152 static void
1153 mcache_bkt_destroy(mcache_t *cp, mcache_bkttype_t *btp, mcache_bkt_t *bkt,
1154 int nobjs)
1155 {
1156 if (nobjs > 0) {
1157 mcache_obj_t *top = bkt->bkt_obj[nobjs - 1];
1158
1159 if (cp->mc_flags & MCF_VERIFY) {
1160 mcache_obj_t *o = top;
1161 int cnt = 0;
1162
1163 /*
1164 * Verify that the chain of objects in the bucket is
1165 * valid. Any mismatch here means a mistake when the
1166 * object(s) were freed to the CPU layer, so we panic.
1167 */
1168 while (o != NULL) {
1169 o = o->obj_next;
1170 ++cnt;
1171 }
1172 if (cnt != nobjs) {
1173 panic("mcache_bkt_destroy: %s cp %p corrupted "
1174 "list in bkt %p (nobjs %d actual %d)\n",
1175 cp->mc_name, (void *)cp, (void *)bkt,
1176 nobjs, cnt);
1177 }
1178 }
1179
1180 /* Advise the slab layer to purge the object(s) */
1181 (*cp->mc_slab_free)(cp->mc_private, top,
1182 (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt);
1183 }
1184 mcache_free(btp->bt_cache, bkt);
1185 }
1186
1187 /*
1188 * Update the bucket layer working set statistics.
1189 */
1190 static void
1191 mcache_bkt_ws_update(mcache_t *cp)
1192 {
1193 MCACHE_LOCK(&cp->mc_bkt_lock);
1194
1195 cp->mc_full.bl_reaplimit = cp->mc_full.bl_min;
1196 cp->mc_full.bl_min = cp->mc_full.bl_total;
1197 cp->mc_empty.bl_reaplimit = cp->mc_empty.bl_min;
1198 cp->mc_empty.bl_min = cp->mc_empty.bl_total;
1199
1200 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1201 }
1202
1203 /*
1204 * Reap all buckets that are beyond the working set.
1205 */
1206 static void
1207 mcache_bkt_ws_reap(mcache_t *cp)
1208 {
1209 long reap;
1210 mcache_bkt_t *bkt;
1211 mcache_bkttype_t *btp;
1212
1213 reap = MIN(cp->mc_full.bl_reaplimit, cp->mc_full.bl_min);
1214 while (reap-- &&
1215 (bkt = mcache_bkt_alloc(cp, &cp->mc_full, &btp)) != NULL)
1216 mcache_bkt_destroy(cp, btp, bkt, btp->bt_bktsize);
1217
1218 reap = MIN(cp->mc_empty.bl_reaplimit, cp->mc_empty.bl_min);
1219 while (reap-- &&
1220 (bkt = mcache_bkt_alloc(cp, &cp->mc_empty, &btp)) != NULL)
1221 mcache_bkt_destroy(cp, btp, bkt, 0);
1222 }
1223
1224 static void
1225 mcache_reap_timeout(void *arg)
1226 {
1227 volatile UInt32 *flag = arg;
1228
1229 ASSERT(flag == &mcache_reaping);
1230
1231 *flag = 0;
1232 }
1233
1234 static void
1235 mcache_reap_done(void *flag)
1236 {
1237 timeout(mcache_reap_timeout, flag, mcache_reap_interval);
1238 }
1239
1240 static void
1241 mcache_reap_start(void *arg)
1242 {
1243 UInt32 *flag = arg;
1244
1245 ASSERT(flag == &mcache_reaping);
1246
1247 mcache_applyall(mcache_cache_reap);
1248 mcache_dispatch(mcache_reap_done, flag);
1249 }
1250
1251 __private_extern__ void
1252 mcache_reap(void)
1253 {
1254 UInt32 *flag = &mcache_reaping;
1255
1256 if (mcache_llock_owner == current_thread() ||
1257 !OSCompareAndSwap(0, 1, flag))
1258 return;
1259
1260 mcache_dispatch(mcache_reap_start, flag);
1261 }
1262
1263 static void
1264 mcache_cache_reap(mcache_t *cp)
1265 {
1266 mcache_bkt_ws_reap(cp);
1267 }
1268
1269 /*
1270 * Performs period maintenance on a cache.
1271 */
1272 static void
1273 mcache_cache_update(mcache_t *cp)
1274 {
1275 int need_bkt_resize = 0;
1276 int need_bkt_reenable = 0;
1277
1278 lck_mtx_assert(mcache_llock, LCK_MTX_ASSERT_OWNED);
1279
1280 mcache_bkt_ws_update(cp);
1281
1282 /*
1283 * Cache resize and post-purge reenable are mutually exclusive.
1284 * If the cache was previously purged, there is no point of
1285 * increasing the bucket size as there was an indication of
1286 * memory pressure on the system.
1287 */
1288 lck_mtx_lock_spin(&cp->mc_sync_lock);
1289 if (!(cp->mc_flags & MCF_NOCPUCACHE) && cp->mc_enable_cnt)
1290 need_bkt_reenable = 1;
1291 lck_mtx_unlock(&cp->mc_sync_lock);
1292
1293 MCACHE_LOCK(&cp->mc_bkt_lock);
1294 /*
1295 * If the contention count is greater than the threshold, and if
1296 * we are not already at the maximum bucket size, increase it.
1297 * Otherwise, if this cache was previously purged by the user
1298 * then we simply reenable it.
1299 */
1300 if ((unsigned int)cp->mc_chunksize < cp->cache_bkttype->bt_maxbuf &&
1301 (int)(cp->mc_bkt_contention - cp->mc_bkt_contention_prev) >
1302 mcache_bkt_contention && !need_bkt_reenable)
1303 need_bkt_resize = 1;
1304
1305 cp ->mc_bkt_contention_prev = cp->mc_bkt_contention;
1306 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1307
1308 if (need_bkt_resize)
1309 mcache_dispatch(mcache_cache_bkt_resize, cp);
1310 else if (need_bkt_reenable)
1311 mcache_dispatch(mcache_cache_enable, cp);
1312 }
1313
1314 /*
1315 * Recompute a cache's bucket size. This is an expensive operation
1316 * and should not be done frequently; larger buckets provide for a
1317 * higher transfer rate with the bucket while smaller buckets reduce
1318 * the memory consumption.
1319 */
1320 static void
1321 mcache_cache_bkt_resize(void *arg)
1322 {
1323 mcache_t *cp = arg;
1324 mcache_bkttype_t *btp = cp->cache_bkttype;
1325
1326 if ((unsigned int)cp->mc_chunksize < btp->bt_maxbuf) {
1327 mcache_bkt_purge(cp);
1328
1329 /*
1330 * Upgrade to the next bucket type with larger bucket size;
1331 * temporarily set the previous contention snapshot to a
1332 * negative number to prevent unnecessary resize request.
1333 */
1334 MCACHE_LOCK(&cp->mc_bkt_lock);
1335 cp->cache_bkttype = ++btp;
1336 cp ->mc_bkt_contention_prev = cp->mc_bkt_contention + INT_MAX;
1337 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1338
1339 mcache_cache_enable(cp);
1340 }
1341 }
1342
1343 /*
1344 * Reenable a previously disabled cache due to purge.
1345 */
1346 static void
1347 mcache_cache_enable(void *arg)
1348 {
1349 mcache_t *cp = arg;
1350
1351 lck_mtx_lock_spin(&cp->mc_sync_lock);
1352 cp->mc_purge_cnt = 0;
1353 cp->mc_enable_cnt = 0;
1354 lck_mtx_unlock(&cp->mc_sync_lock);
1355
1356 mcache_cache_bkt_enable(cp);
1357 }
1358
1359 static void
1360 mcache_update_timeout(__unused void *arg)
1361 {
1362 timeout(mcache_update, NULL, mcache_reap_interval);
1363 }
1364
1365 static void
1366 mcache_update(__unused void *arg)
1367 {
1368 mcache_applyall(mcache_cache_update);
1369 mcache_dispatch(mcache_update_timeout, NULL);
1370 }
1371
1372 static void
1373 mcache_applyall(void (*func)(mcache_t *))
1374 {
1375 mcache_t *cp;
1376
1377 MCACHE_LIST_LOCK();
1378 LIST_FOREACH(cp, &mcache_head, mc_list) {
1379 func(cp);
1380 }
1381 MCACHE_LIST_UNLOCK();
1382 }
1383
1384 static void
1385 mcache_dispatch(void (*func)(void *), void *arg)
1386 {
1387 ASSERT(func != NULL);
1388 timeout(func, arg, hz/1000);
1389 }
1390
1391 __private_extern__ void
1392 mcache_buffer_log(mcache_audit_t *mca, void *addr, mcache_t *cp)
1393 {
1394 mca->mca_addr = addr;
1395 mca->mca_cache = cp;
1396 mca->mca_pthread = mca->mca_thread;
1397 mca->mca_thread = current_thread();
1398 bcopy(mca->mca_stack, mca->mca_pstack, sizeof (mca->mca_pstack));
1399 mca->mca_pdepth = mca->mca_depth;
1400 bzero(mca->mca_stack, sizeof (mca->mca_stack));
1401 mca->mca_depth = OSBacktrace(mca->mca_stack, MCACHE_STACK_DEPTH);
1402 }
1403
1404 __private_extern__ void
1405 mcache_set_pattern(u_int64_t pattern, void *buf_arg, size_t size)
1406 {
1407 u_int64_t *buf_end = (u_int64_t *)((char *)buf_arg + size);
1408 u_int64_t *buf = (u_int64_t *)buf_arg;
1409
1410 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t)));
1411 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t)));
1412
1413 while (buf < buf_end)
1414 *buf++ = pattern;
1415 }
1416
1417 __private_extern__ void *
1418 mcache_verify_pattern(u_int64_t pattern, void *buf_arg, size_t size)
1419 {
1420 u_int64_t *buf_end = (u_int64_t *)((char *)buf_arg + size);
1421 u_int64_t *buf;
1422
1423 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t)));
1424 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t)));
1425
1426 for (buf = buf_arg; buf < buf_end; buf++) {
1427 if (*buf != pattern)
1428 return (buf);
1429 }
1430 return (NULL);
1431 }
1432
1433 __private_extern__ void *
1434 mcache_verify_set_pattern(u_int64_t old, u_int64_t new, void *buf_arg,
1435 size_t size)
1436 {
1437 u_int64_t *buf_end = (u_int64_t *)((char *)buf_arg + size);
1438 u_int64_t *buf;
1439
1440 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t)));
1441 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t)));
1442
1443 for (buf = buf_arg; buf < buf_end; buf++) {
1444 if (*buf != old) {
1445 mcache_set_pattern(old, buf_arg,
1446 (uintptr_t)buf - (uintptr_t)buf_arg);
1447 return (buf);
1448 }
1449 *buf = new;
1450 }
1451 return (NULL);
1452 }
1453
1454 __private_extern__ void
1455 mcache_audit_free_verify(mcache_audit_t *mca, void *base, size_t offset,
1456 size_t size)
1457 {
1458 void *addr;
1459 u_int64_t *oaddr64;
1460 mcache_obj_t *next;
1461
1462 addr = (void *)((uintptr_t)base + offset);
1463 next = ((mcache_obj_t *)addr)->obj_next;
1464
1465 /* For the "obj_next" pointer in the buffer */
1466 oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof (u_int64_t));
1467 *oaddr64 = MCACHE_FREE_PATTERN;
1468
1469 if ((oaddr64 = mcache_verify_pattern(MCACHE_FREE_PATTERN,
1470 (caddr_t)base, size)) != NULL) {
1471 mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base,
1472 (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64);
1473 /* NOTREACHED */
1474 }
1475 ((mcache_obj_t *)addr)->obj_next = next;
1476 }
1477
1478 __private_extern__ void
1479 mcache_audit_free_verify_set(mcache_audit_t *mca, void *base, size_t offset,
1480 size_t size)
1481 {
1482 void *addr;
1483 u_int64_t *oaddr64;
1484 mcache_obj_t *next;
1485
1486 addr = (void *)((uintptr_t)base + offset);
1487 next = ((mcache_obj_t *)addr)->obj_next;
1488
1489 /* For the "obj_next" pointer in the buffer */
1490 oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof (u_int64_t));
1491 *oaddr64 = MCACHE_FREE_PATTERN;
1492
1493 if ((oaddr64 = mcache_verify_set_pattern(MCACHE_FREE_PATTERN,
1494 MCACHE_UNINITIALIZED_PATTERN, (caddr_t)base, size)) != NULL) {
1495 mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base,
1496 (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64);
1497 /* NOTREACHED */
1498 }
1499 ((mcache_obj_t *)addr)->obj_next = next;
1500 }
1501
1502 #undef panic(...)
1503
1504 __private_extern__ char *
1505 mcache_dump_mca(mcache_audit_t *mca)
1506 {
1507 if (mca_dump_buf == NULL)
1508 return (NULL);
1509
1510 snprintf(mca_dump_buf, DUMP_MCA_BUF_SIZE,
1511 "mca %p: addr %p, cache %p (%s)\n"
1512 "last transaction; thread %p, saved PC stack (%d deep):\n"
1513 "\t%p, %p, %p, %p, %p, %p, %p, %p\n"
1514 "\t%p, %p, %p, %p, %p, %p, %p, %p\n"
1515 "previous transaction; thread %p, saved PC stack (%d deep):\n"
1516 "\t%p, %p, %p, %p, %p, %p, %p, %p\n"
1517 "\t%p, %p, %p, %p, %p, %p, %p, %p\n",
1518 mca, mca->mca_addr, mca->mca_cache,
1519 mca->mca_cache ? mca->mca_cache->mc_name : "?",
1520 mca->mca_thread, mca->mca_depth,
1521 mca->mca_stack[0], mca->mca_stack[1], mca->mca_stack[2],
1522 mca->mca_stack[3], mca->mca_stack[4], mca->mca_stack[5],
1523 mca->mca_stack[6], mca->mca_stack[7], mca->mca_stack[8],
1524 mca->mca_stack[9], mca->mca_stack[10], mca->mca_stack[11],
1525 mca->mca_stack[12], mca->mca_stack[13], mca->mca_stack[14],
1526 mca->mca_stack[15],
1527 mca->mca_pthread, mca->mca_pdepth,
1528 mca->mca_pstack[0], mca->mca_pstack[1], mca->mca_pstack[2],
1529 mca->mca_pstack[3], mca->mca_pstack[4], mca->mca_pstack[5],
1530 mca->mca_pstack[6], mca->mca_pstack[7], mca->mca_pstack[8],
1531 mca->mca_pstack[9], mca->mca_pstack[10], mca->mca_pstack[11],
1532 mca->mca_pstack[12], mca->mca_pstack[13], mca->mca_pstack[14],
1533 mca->mca_pstack[15]);
1534
1535 return (mca_dump_buf);
1536 }
1537
1538 __private_extern__ void
1539 mcache_audit_panic(mcache_audit_t *mca, void *addr, size_t offset,
1540 int64_t expected, int64_t got)
1541 {
1542 if (mca == NULL) {
1543 panic("mcache_audit: buffer %p modified after free at "
1544 "offset 0x%lx (0x%llx instead of 0x%llx)\n", addr,
1545 offset, got, expected);
1546 /* NOTREACHED */
1547 }
1548
1549 panic("mcache_audit: buffer %p modified after free at offset 0x%lx "
1550 "(0x%llx instead of 0x%llx)\n%s\n",
1551 addr, offset, got, expected, mcache_dump_mca(mca));
1552 /* NOTREACHED */
1553 }
1554
1555 __private_extern__ int
1556 assfail(const char *a, const char *f, int l)
1557 {
1558 panic("assertion failed: %s, file: %s, line: %d", a, f, l);
1559 return (0);
1560 }