]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/mcache.c
xnu-2422.90.20.tar.gz
[apple/xnu.git] / bsd / kern / mcache.c
1 /*
2 * Copyright (c) 2006-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Memory allocator with per-CPU caching, derived from the kmem magazine
31 * concept and implementation as described in the following paper:
32 * http://www.usenix.org/events/usenix01/full_papers/bonwick/bonwick.pdf
33 * That implementation is Copyright 2006 Sun Microsystems, Inc. All rights
34 * reserved. Use is subject to license terms.
35 *
36 * There are several major differences between this and the original kmem
37 * magazine: this derivative implementation allows for multiple objects to
38 * be allocated and freed from/to the object cache in one call; in addition,
39 * it provides for better flexibility where the user is allowed to define
40 * its own slab allocator (instead of the default zone allocator). Finally,
41 * no object construction/destruction takes place at the moment, although
42 * this could be added in future to improve efficiency.
43 */
44
45 #include <sys/param.h>
46 #include <sys/types.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/queue.h>
50 #include <sys/kernel.h>
51 #include <sys/systm.h>
52
53 #include <kern/debug.h>
54 #include <kern/zalloc.h>
55 #include <kern/cpu_number.h>
56 #include <kern/locks.h>
57
58 #include <libkern/libkern.h>
59 #include <libkern/OSAtomic.h>
60 #include <libkern/OSDebug.h>
61
62 #include <mach/vm_param.h>
63 #include <machine/limits.h>
64 #include <machine/machine_routines.h>
65
66 #include <string.h>
67
68 #include <sys/mcache.h>
69
70 #define MCACHE_SIZE(n) \
71 ((size_t)(&((mcache_t *)0)->mc_cpu[n]))
72
73 /* Allocate extra in case we need to manually align the pointer */
74 #define MCACHE_ALLOC_SIZE \
75 (sizeof (void *) + MCACHE_SIZE(ncpu) + CPU_CACHE_LINE_SIZE)
76
77 #define MCACHE_CPU(c) \
78 (mcache_cpu_t *)((void *)((char *)(c) + MCACHE_SIZE(cpu_number())))
79
80 /*
81 * MCACHE_LIST_LOCK() and MCACHE_LIST_UNLOCK() are macros used
82 * to serialize accesses to the global list of caches in the system.
83 * They also record the thread currently running in the critical
84 * section, so that we can avoid recursive requests to reap the
85 * caches when memory runs low.
86 */
87 #define MCACHE_LIST_LOCK() { \
88 lck_mtx_lock(mcache_llock); \
89 mcache_llock_owner = current_thread(); \
90 }
91
92 #define MCACHE_LIST_UNLOCK() { \
93 mcache_llock_owner = NULL; \
94 lck_mtx_unlock(mcache_llock); \
95 }
96
97 #define MCACHE_LOCK(l) lck_mtx_lock(l)
98 #define MCACHE_UNLOCK(l) lck_mtx_unlock(l)
99 #define MCACHE_LOCK_TRY(l) lck_mtx_try_lock(l)
100
101 static int ncpu;
102 static unsigned int cache_line_size;
103 static lck_mtx_t *mcache_llock;
104 static struct thread *mcache_llock_owner;
105 static lck_attr_t *mcache_llock_attr;
106 static lck_grp_t *mcache_llock_grp;
107 static lck_grp_attr_t *mcache_llock_grp_attr;
108 static struct zone *mcache_zone;
109 static unsigned int mcache_reap_interval;
110 static UInt32 mcache_reaping;
111 static int mcache_ready;
112 static int mcache_updating;
113
114 static int mcache_bkt_contention = 3;
115 #if DEBUG
116 static unsigned int mcache_flags = MCF_DEBUG;
117 #else
118 static unsigned int mcache_flags = 0;
119 #endif
120
121 #define DUMP_MCA_BUF_SIZE 512
122 static char *mca_dump_buf;
123
124 static mcache_bkttype_t mcache_bkttype[] = {
125 { 1, 4096, 32768, NULL },
126 { 3, 2048, 16384, NULL },
127 { 7, 1024, 12288, NULL },
128 { 15, 256, 8192, NULL },
129 { 31, 64, 4096, NULL },
130 { 47, 0, 2048, NULL },
131 { 63, 0, 1024, NULL },
132 { 95, 0, 512, NULL },
133 { 143, 0, 256, NULL },
134 { 165, 0, 0, NULL },
135 };
136
137 static mcache_t *mcache_create_common(const char *, size_t, size_t,
138 mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_logfn_t,
139 mcache_notifyfn_t, void *, u_int32_t, int, int);
140 static unsigned int mcache_slab_alloc(void *, mcache_obj_t ***,
141 unsigned int, int);
142 static void mcache_slab_free(void *, mcache_obj_t *, boolean_t);
143 static void mcache_slab_audit(void *, mcache_obj_t *, boolean_t);
144 static void mcache_cpu_refill(mcache_cpu_t *, mcache_bkt_t *, int);
145 static mcache_bkt_t *mcache_bkt_alloc(mcache_t *, mcache_bktlist_t *,
146 mcache_bkttype_t **);
147 static void mcache_bkt_free(mcache_t *, mcache_bktlist_t *, mcache_bkt_t *);
148 static void mcache_cache_bkt_enable(mcache_t *);
149 static void mcache_bkt_purge(mcache_t *);
150 static void mcache_bkt_destroy(mcache_t *, mcache_bkttype_t *,
151 mcache_bkt_t *, int);
152 static void mcache_bkt_ws_update(mcache_t *);
153 static void mcache_bkt_ws_reap(mcache_t *);
154 static void mcache_dispatch(void (*)(void *), void *);
155 static void mcache_cache_reap(mcache_t *);
156 static void mcache_cache_update(mcache_t *);
157 static void mcache_cache_bkt_resize(void *);
158 static void mcache_cache_enable(void *);
159 static void mcache_update(void *);
160 static void mcache_update_timeout(void *);
161 static void mcache_applyall(void (*)(mcache_t *));
162 static void mcache_reap_start(void *);
163 static void mcache_reap_done(void *);
164 static void mcache_reap_timeout(void *);
165 static void mcache_notify(mcache_t *, u_int32_t);
166 static void mcache_purge(void *);
167
168 static LIST_HEAD(, mcache) mcache_head;
169 mcache_t *mcache_audit_cache;
170
171 /*
172 * Initialize the framework; this is currently called as part of BSD init.
173 */
174 __private_extern__ void
175 mcache_init(void)
176 {
177 mcache_bkttype_t *btp;
178 unsigned int i;
179 char name[32];
180
181 ncpu = ml_get_max_cpus();
182 (void) mcache_cache_line_size(); /* prime it */
183
184 mcache_llock_grp_attr = lck_grp_attr_alloc_init();
185 mcache_llock_grp = lck_grp_alloc_init("mcache.list",
186 mcache_llock_grp_attr);
187 mcache_llock_attr = lck_attr_alloc_init();
188 mcache_llock = lck_mtx_alloc_init(mcache_llock_grp, mcache_llock_attr);
189
190 mcache_zone = zinit(MCACHE_ALLOC_SIZE, 256 * MCACHE_ALLOC_SIZE,
191 PAGE_SIZE, "mcache");
192 if (mcache_zone == NULL)
193 panic("mcache_init: failed to allocate mcache zone\n");
194 zone_change(mcache_zone, Z_CALLERACCT, FALSE);
195
196 LIST_INIT(&mcache_head);
197
198 for (i = 0; i < sizeof (mcache_bkttype) / sizeof (*btp); i++) {
199 btp = &mcache_bkttype[i];
200 (void) snprintf(name, sizeof (name), "bkt_%d",
201 btp->bt_bktsize);
202 btp->bt_cache = mcache_create(name,
203 (btp->bt_bktsize + 1) * sizeof (void *), 0, 0, MCR_SLEEP);
204 }
205
206 PE_parse_boot_argn("mcache_flags", &mcache_flags, sizeof (mcache_flags));
207 mcache_flags &= MCF_FLAGS_MASK;
208
209 mcache_audit_cache = mcache_create("audit", sizeof (mcache_audit_t),
210 0, 0, MCR_SLEEP);
211
212 mcache_reap_interval = 15 * hz;
213 mcache_applyall(mcache_cache_bkt_enable);
214 mcache_ready = 1;
215
216 printf("mcache: %d CPU(s), %d bytes CPU cache line size\n",
217 ncpu, CPU_CACHE_LINE_SIZE);
218 }
219
220 /*
221 * Return the global mcache flags.
222 */
223 __private_extern__ unsigned int
224 mcache_getflags(void)
225 {
226 return (mcache_flags);
227 }
228
229 /*
230 * Return the CPU cache line size.
231 */
232 __private_extern__ unsigned int
233 mcache_cache_line_size(void)
234 {
235 if (cache_line_size == 0) {
236 ml_cpu_info_t cpu_info;
237 ml_cpu_get_info(&cpu_info);
238 cache_line_size = cpu_info.cache_line_size;
239 }
240 return (cache_line_size);
241 }
242
243 /*
244 * Create a cache using the zone allocator as the backend slab allocator.
245 * The caller may specify any alignment for the object; if it specifies 0
246 * the default alignment (MCACHE_ALIGN) will be used.
247 */
248 __private_extern__ mcache_t *
249 mcache_create(const char *name, size_t bufsize, size_t align,
250 u_int32_t flags, int wait)
251 {
252 return (mcache_create_common(name, bufsize, align, mcache_slab_alloc,
253 mcache_slab_free, mcache_slab_audit, NULL, NULL, NULL, flags, 1,
254 wait));
255 }
256
257 /*
258 * Create a cache using a custom backend slab allocator. Since the caller
259 * is responsible for allocation, no alignment guarantee will be provided
260 * by this framework.
261 */
262 __private_extern__ mcache_t *
263 mcache_create_ext(const char *name, size_t bufsize,
264 mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn,
265 mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg,
266 u_int32_t flags, int wait)
267 {
268 return (mcache_create_common(name, bufsize, 0, allocfn,
269 freefn, auditfn, logfn, notifyfn, arg, flags, 0, wait));
270 }
271
272 /*
273 * Common cache creation routine.
274 */
275 static mcache_t *
276 mcache_create_common(const char *name, size_t bufsize, size_t align,
277 mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn,
278 mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg,
279 u_int32_t flags, int need_zone, int wait)
280 {
281 mcache_bkttype_t *btp;
282 mcache_t *cp = NULL;
283 size_t chunksize;
284 void *buf, **pbuf;
285 int c;
286 char lck_name[64];
287
288 /* If auditing is on and print buffer is NULL, allocate it now */
289 if ((flags & MCF_DEBUG) && mca_dump_buf == NULL) {
290 int malloc_wait = (wait & MCR_NOSLEEP) ? M_NOWAIT : M_WAITOK;
291 MALLOC(mca_dump_buf, char *, DUMP_MCA_BUF_SIZE, M_TEMP,
292 malloc_wait | M_ZERO);
293 if (mca_dump_buf == NULL)
294 return (NULL);
295 }
296
297 if (!(wait & MCR_NOSLEEP))
298 buf = zalloc(mcache_zone);
299 else
300 buf = zalloc_noblock(mcache_zone);
301
302 if (buf == NULL)
303 goto fail;
304
305 bzero(buf, MCACHE_ALLOC_SIZE);
306
307 /*
308 * In case we didn't get a cache-aligned memory, round it up
309 * accordingly. This is needed in order to get the rest of
310 * structure members aligned properly. It also means that
311 * the memory span gets shifted due to the round up, but it
312 * is okay since we've allocated extra space for this.
313 */
314 cp = (mcache_t *)
315 P2ROUNDUP((intptr_t)buf + sizeof (void *), CPU_CACHE_LINE_SIZE);
316 pbuf = (void **)((intptr_t)cp - sizeof (void *));
317 *pbuf = buf;
318
319 /*
320 * Guaranteed alignment is valid only when we use the internal
321 * slab allocator (currently set to use the zone allocator).
322 */
323 if (!need_zone)
324 align = 1;
325 else if (align == 0)
326 align = MCACHE_ALIGN;
327
328 if ((align & (align - 1)) != 0)
329 panic("mcache_create: bad alignment %lu", align);
330
331 cp->mc_align = align;
332 cp->mc_slab_alloc = allocfn;
333 cp->mc_slab_free = freefn;
334 cp->mc_slab_audit = auditfn;
335 cp->mc_slab_log = logfn;
336 cp->mc_slab_notify = notifyfn;
337 cp->mc_private = need_zone ? cp : arg;
338 cp->mc_bufsize = bufsize;
339 cp->mc_flags = (flags & MCF_FLAGS_MASK) | mcache_flags;
340
341 (void) snprintf(cp->mc_name, sizeof (cp->mc_name), "mcache.%s", name);
342
343 (void) snprintf(lck_name, sizeof (lck_name), "%s.cpu", cp->mc_name);
344 cp->mc_cpu_lock_grp_attr = lck_grp_attr_alloc_init();
345 cp->mc_cpu_lock_grp = lck_grp_alloc_init(lck_name,
346 cp->mc_cpu_lock_grp_attr);
347 cp->mc_cpu_lock_attr = lck_attr_alloc_init();
348
349 /*
350 * Allocation chunk size is the object's size plus any extra size
351 * needed to satisfy the object's alignment. It is enforced to be
352 * at least the size of an LP64 pointer to simplify auditing and to
353 * handle multiple-element allocation requests, where the elements
354 * returned are linked together in a list.
355 */
356 chunksize = MAX(bufsize, sizeof (u_int64_t));
357 if (need_zone) {
358 /* Enforce 64-bit minimum alignment for zone-based buffers */
359 align = MAX(align, sizeof (u_int64_t));
360 chunksize += sizeof (void *) + align;
361 chunksize = P2ROUNDUP(chunksize, align);
362 if ((cp->mc_slab_zone = zinit(chunksize, 64 * 1024 * ncpu,
363 PAGE_SIZE, cp->mc_name)) == NULL)
364 goto fail;
365 zone_change(cp->mc_slab_zone, Z_EXPAND, TRUE);
366 }
367 cp->mc_chunksize = chunksize;
368
369 /*
370 * Initialize the bucket layer.
371 */
372 (void) snprintf(lck_name, sizeof (lck_name), "%s.bkt", cp->mc_name);
373 cp->mc_bkt_lock_grp_attr = lck_grp_attr_alloc_init();
374 cp->mc_bkt_lock_grp = lck_grp_alloc_init(lck_name,
375 cp->mc_bkt_lock_grp_attr);
376 cp->mc_bkt_lock_attr = lck_attr_alloc_init();
377 lck_mtx_init(&cp->mc_bkt_lock, cp->mc_bkt_lock_grp,
378 cp->mc_bkt_lock_attr);
379
380 (void) snprintf(lck_name, sizeof (lck_name), "%s.sync", cp->mc_name);
381 cp->mc_sync_lock_grp_attr = lck_grp_attr_alloc_init();
382 cp->mc_sync_lock_grp = lck_grp_alloc_init(lck_name,
383 cp->mc_sync_lock_grp_attr);
384 cp->mc_sync_lock_attr = lck_attr_alloc_init();
385 lck_mtx_init(&cp->mc_sync_lock, cp->mc_sync_lock_grp,
386 cp->mc_sync_lock_attr);
387
388 for (btp = mcache_bkttype; chunksize <= btp->bt_minbuf; btp++)
389 continue;
390
391 cp->cache_bkttype = btp;
392
393 /*
394 * Initialize the CPU layer. Each per-CPU structure is aligned
395 * on the CPU cache line boundary to prevent false sharing.
396 */
397 for (c = 0; c < ncpu; c++) {
398 mcache_cpu_t *ccp = &cp->mc_cpu[c];
399
400 VERIFY(IS_P2ALIGNED(ccp, CPU_CACHE_LINE_SIZE));
401 lck_mtx_init(&ccp->cc_lock, cp->mc_cpu_lock_grp,
402 cp->mc_cpu_lock_attr);
403 ccp->cc_objs = -1;
404 ccp->cc_pobjs = -1;
405 }
406
407 if (mcache_ready)
408 mcache_cache_bkt_enable(cp);
409
410 /* TODO: dynamically create sysctl for stats */
411
412 MCACHE_LIST_LOCK();
413 LIST_INSERT_HEAD(&mcache_head, cp, mc_list);
414 MCACHE_LIST_UNLOCK();
415
416 /*
417 * If cache buckets are enabled and this is the first cache
418 * created, start the periodic cache update.
419 */
420 if (!(mcache_flags & MCF_NOCPUCACHE) && !mcache_updating) {
421 mcache_updating = 1;
422 mcache_update_timeout(NULL);
423 }
424 if (cp->mc_flags & MCF_DEBUG) {
425 printf("mcache_create: %s (%s) arg %p bufsize %lu align %lu "
426 "chunksize %lu bktsize %d\n", name, need_zone ? "i" : "e",
427 arg, bufsize, cp->mc_align, chunksize, btp->bt_bktsize);
428 }
429 return (cp);
430
431 fail:
432 if (buf != NULL)
433 zfree(mcache_zone, buf);
434 return (NULL);
435 }
436
437 /*
438 * Allocate one or more objects from a cache.
439 */
440 __private_extern__ unsigned int
441 mcache_alloc_ext(mcache_t *cp, mcache_obj_t **list, unsigned int num, int wait)
442 {
443 mcache_cpu_t *ccp;
444 mcache_obj_t **top = &(*list);
445 mcache_bkt_t *bkt;
446 unsigned int need = num;
447 boolean_t nwretry = FALSE;
448
449 /* MCR_NOSLEEP and MCR_FAILOK are mutually exclusive */
450 VERIFY((wait & (MCR_NOSLEEP|MCR_FAILOK)) != (MCR_NOSLEEP|MCR_FAILOK));
451
452 ASSERT(list != NULL);
453 *list = NULL;
454
455 if (num == 0)
456 return (0);
457
458 retry_alloc:
459 /* We may not always be running in the same CPU in case of retries */
460 ccp = MCACHE_CPU(cp);
461
462 MCACHE_LOCK(&ccp->cc_lock);
463 for (;;) {
464 /*
465 * If we have an object in the current CPU's filled bucket,
466 * chain the object to any previous objects and return if
467 * we've satisfied the number of requested objects.
468 */
469 if (ccp->cc_objs > 0) {
470 mcache_obj_t *tail;
471 int objs;
472
473 /*
474 * Objects in the bucket are already linked together
475 * with the most recently freed object at the head of
476 * the list; grab as many objects as we can.
477 */
478 objs = MIN((unsigned int)ccp->cc_objs, need);
479 *list = ccp->cc_filled->bkt_obj[ccp->cc_objs - 1];
480 ccp->cc_objs -= objs;
481 ccp->cc_alloc += objs;
482
483 tail = ccp->cc_filled->bkt_obj[ccp->cc_objs];
484 list = &tail->obj_next;
485 *list = NULL;
486
487 /* If we got them all, return to caller */
488 if ((need -= objs) == 0) {
489 MCACHE_UNLOCK(&ccp->cc_lock);
490
491 if (!(cp->mc_flags & MCF_NOLEAKLOG) &&
492 cp->mc_slab_log != NULL)
493 (*cp->mc_slab_log)(num, *top, TRUE);
494
495 if (cp->mc_flags & MCF_DEBUG)
496 goto debug_alloc;
497
498 return (num);
499 }
500 }
501
502 /*
503 * The CPU's filled bucket is empty. If the previous filled
504 * bucket was full, exchange and try again.
505 */
506 if (ccp->cc_pobjs > 0) {
507 mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs);
508 continue;
509 }
510
511 /*
512 * If the bucket layer is disabled, allocate from slab. This
513 * can happen either because MCF_NOCPUCACHE is set, or because
514 * the bucket layer is currently being resized.
515 */
516 if (ccp->cc_bktsize == 0)
517 break;
518
519 /*
520 * Both of the CPU's buckets are empty; try to get a full
521 * bucket from the bucket layer. Upon success, refill this
522 * CPU and place any empty bucket into the empty list.
523 */
524 bkt = mcache_bkt_alloc(cp, &cp->mc_full, NULL);
525 if (bkt != NULL) {
526 if (ccp->cc_pfilled != NULL)
527 mcache_bkt_free(cp, &cp->mc_empty,
528 ccp->cc_pfilled);
529 mcache_cpu_refill(ccp, bkt, ccp->cc_bktsize);
530 continue;
531 }
532
533 /*
534 * The bucket layer has no full buckets; allocate the
535 * object(s) directly from the slab layer.
536 */
537 break;
538 }
539 MCACHE_UNLOCK(&ccp->cc_lock);
540
541 need -= (*cp->mc_slab_alloc)(cp->mc_private, &list, need, wait);
542
543 /*
544 * If this is a blocking allocation, or if it is non-blocking and
545 * the cache's full bucket is non-empty, then retry the allocation.
546 */
547 if (need > 0) {
548 if (!(wait & MCR_NONBLOCKING)) {
549 atomic_add_32(&cp->mc_wretry_cnt, 1);
550 goto retry_alloc;
551 } else if ((wait & (MCR_NOSLEEP | MCR_TRYHARD)) &&
552 !mcache_bkt_isempty(cp)) {
553 if (!nwretry)
554 nwretry = TRUE;
555 atomic_add_32(&cp->mc_nwretry_cnt, 1);
556 goto retry_alloc;
557 } else if (nwretry) {
558 atomic_add_32(&cp->mc_nwfail_cnt, 1);
559 }
560 }
561
562 if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL)
563 (*cp->mc_slab_log)((num - need), *top, TRUE);
564
565 if (!(cp->mc_flags & MCF_DEBUG))
566 return (num - need);
567
568 debug_alloc:
569 if (cp->mc_flags & MCF_DEBUG) {
570 mcache_obj_t **o = top;
571 unsigned int n;
572
573 n = 0;
574 /*
575 * Verify that the chain of objects have the same count as
576 * what we are about to report to the caller. Any mismatch
577 * here means that the object list is insanely broken and
578 * therefore we must panic.
579 */
580 while (*o != NULL) {
581 o = &(*o)->obj_next;
582 ++n;
583 }
584 if (n != (num - need)) {
585 panic("mcache_alloc_ext: %s cp %p corrupted list "
586 "(got %d actual %d)\n", cp->mc_name,
587 (void *)cp, num - need, n);
588 }
589 }
590
591 /* Invoke the slab layer audit callback if auditing is enabled */
592 if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL)
593 (*cp->mc_slab_audit)(cp->mc_private, *top, TRUE);
594
595 return (num - need);
596 }
597
598 /*
599 * Allocate a single object from a cache.
600 */
601 __private_extern__ void *
602 mcache_alloc(mcache_t *cp, int wait)
603 {
604 mcache_obj_t *buf;
605
606 (void) mcache_alloc_ext(cp, &buf, 1, wait);
607 return (buf);
608 }
609
610 __private_extern__ void
611 mcache_waiter_inc(mcache_t *cp)
612 {
613 atomic_add_32(&cp->mc_waiter_cnt, 1);
614 }
615
616 __private_extern__ void
617 mcache_waiter_dec(mcache_t *cp)
618 {
619 atomic_add_32(&cp->mc_waiter_cnt, -1);
620 }
621
622 __private_extern__ boolean_t
623 mcache_bkt_isempty(mcache_t *cp)
624 {
625 /*
626 * This isn't meant to accurately tell whether there are
627 * any full buckets in the cache; it is simply a way to
628 * obtain "hints" about the state of the cache.
629 */
630 return (cp->mc_full.bl_total == 0);
631 }
632
633 /*
634 * Notify the slab layer about an event.
635 */
636 static void
637 mcache_notify(mcache_t *cp, u_int32_t event)
638 {
639 if (cp->mc_slab_notify != NULL)
640 (*cp->mc_slab_notify)(cp->mc_private, event);
641 }
642
643 /*
644 * Purge the cache and disable its buckets.
645 */
646 static void
647 mcache_purge(void *arg)
648 {
649 mcache_t *cp = arg;
650
651 mcache_bkt_purge(cp);
652 /*
653 * We cannot simply call mcache_cache_bkt_enable() from here as
654 * a bucket resize may be in flight and we would cause the CPU
655 * layers of the cache to point to different sizes. Therefore,
656 * we simply increment the enable count so that during the next
657 * periodic cache update the buckets can be reenabled.
658 */
659 lck_mtx_lock_spin(&cp->mc_sync_lock);
660 cp->mc_enable_cnt++;
661 lck_mtx_unlock(&cp->mc_sync_lock);
662
663 }
664
665 __private_extern__ boolean_t
666 mcache_purge_cache(mcache_t *cp)
667 {
668 /*
669 * Purging a cache that has no per-CPU caches or is already
670 * in the process of being purged is rather pointless.
671 */
672 if (cp->mc_flags & MCF_NOCPUCACHE)
673 return (FALSE);
674
675 lck_mtx_lock_spin(&cp->mc_sync_lock);
676 if (cp->mc_purge_cnt > 0) {
677 lck_mtx_unlock(&cp->mc_sync_lock);
678 return (FALSE);
679 }
680 cp->mc_purge_cnt++;
681 lck_mtx_unlock(&cp->mc_sync_lock);
682
683 mcache_dispatch(mcache_purge, cp);
684
685 return (TRUE);
686 }
687
688 /*
689 * Free a single object to a cache.
690 */
691 __private_extern__ void
692 mcache_free(mcache_t *cp, void *buf)
693 {
694 ((mcache_obj_t *)buf)->obj_next = NULL;
695 mcache_free_ext(cp, (mcache_obj_t *)buf);
696 }
697
698 /*
699 * Free one or more objects to a cache.
700 */
701 __private_extern__ void
702 mcache_free_ext(mcache_t *cp, mcache_obj_t *list)
703 {
704 mcache_cpu_t *ccp = MCACHE_CPU(cp);
705 mcache_bkttype_t *btp;
706 mcache_obj_t *nlist;
707 mcache_bkt_t *bkt;
708
709 if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL)
710 (*cp->mc_slab_log)(0, list, FALSE);
711
712 /* Invoke the slab layer audit callback if auditing is enabled */
713 if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL)
714 (*cp->mc_slab_audit)(cp->mc_private, list, FALSE);
715
716 MCACHE_LOCK(&ccp->cc_lock);
717 for (;;) {
718 /*
719 * If there is space in the current CPU's filled bucket, put
720 * the object there and return once all objects are freed.
721 * Note the cast to unsigned integer takes care of the case
722 * where the bucket layer is disabled (when cc_objs is -1).
723 */
724 if ((unsigned int)ccp->cc_objs <
725 (unsigned int)ccp->cc_bktsize) {
726 /*
727 * Reverse the list while we place the object into the
728 * bucket; this effectively causes the most recently
729 * freed object(s) to be reused during allocation.
730 */
731 nlist = list->obj_next;
732 list->obj_next = (ccp->cc_objs == 0) ? NULL :
733 ccp->cc_filled->bkt_obj[ccp->cc_objs - 1];
734 ccp->cc_filled->bkt_obj[ccp->cc_objs++] = list;
735 ccp->cc_free++;
736
737 if ((list = nlist) != NULL)
738 continue;
739
740 /* We are done; return to caller */
741 MCACHE_UNLOCK(&ccp->cc_lock);
742
743 /* If there is a waiter below, notify it */
744 if (cp->mc_waiter_cnt > 0)
745 mcache_notify(cp, MCN_RETRYALLOC);
746 return;
747 }
748
749 /*
750 * The CPU's filled bucket is full. If the previous filled
751 * bucket was empty, exchange and try again.
752 */
753 if (ccp->cc_pobjs == 0) {
754 mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs);
755 continue;
756 }
757
758 /*
759 * If the bucket layer is disabled, free to slab. This can
760 * happen either because MCF_NOCPUCACHE is set, or because
761 * the bucket layer is currently being resized.
762 */
763 if (ccp->cc_bktsize == 0)
764 break;
765
766 /*
767 * Both of the CPU's buckets are full; try to get an empty
768 * bucket from the bucket layer. Upon success, empty this
769 * CPU and place any full bucket into the full list.
770 */
771 bkt = mcache_bkt_alloc(cp, &cp->mc_empty, &btp);
772 if (bkt != NULL) {
773 if (ccp->cc_pfilled != NULL)
774 mcache_bkt_free(cp, &cp->mc_full,
775 ccp->cc_pfilled);
776 mcache_cpu_refill(ccp, bkt, 0);
777 continue;
778 }
779
780 /*
781 * We need an empty bucket to put our freed objects into
782 * but couldn't get an empty bucket from the bucket layer;
783 * attempt to allocate one. We do not want to block for
784 * allocation here, and if the bucket allocation fails
785 * we will simply fall through to the slab layer.
786 */
787 MCACHE_UNLOCK(&ccp->cc_lock);
788 bkt = mcache_alloc(btp->bt_cache, MCR_NOSLEEP);
789 MCACHE_LOCK(&ccp->cc_lock);
790
791 if (bkt != NULL) {
792 /*
793 * We have an empty bucket, but since we drop the
794 * CPU lock above, the cache's bucket size may have
795 * changed. If so, free the bucket and try again.
796 */
797 if (ccp->cc_bktsize != btp->bt_bktsize) {
798 MCACHE_UNLOCK(&ccp->cc_lock);
799 mcache_free(btp->bt_cache, bkt);
800 MCACHE_LOCK(&ccp->cc_lock);
801 continue;
802 }
803
804 /*
805 * We have an empty bucket of the right size;
806 * add it to the bucket layer and try again.
807 */
808 mcache_bkt_free(cp, &cp->mc_empty, bkt);
809 continue;
810 }
811
812 /*
813 * The bucket layer has no empty buckets; free the
814 * object(s) directly to the slab layer.
815 */
816 break;
817 }
818 MCACHE_UNLOCK(&ccp->cc_lock);
819
820 /* If there is a waiter below, notify it */
821 if (cp->mc_waiter_cnt > 0)
822 mcache_notify(cp, MCN_RETRYALLOC);
823
824 /* Advise the slab layer to purge the object(s) */
825 (*cp->mc_slab_free)(cp->mc_private, list,
826 (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt);
827 }
828
829 /*
830 * Cache destruction routine.
831 */
832 __private_extern__ void
833 mcache_destroy(mcache_t *cp)
834 {
835 void **pbuf;
836
837 MCACHE_LIST_LOCK();
838 LIST_REMOVE(cp, mc_list);
839 MCACHE_LIST_UNLOCK();
840
841 mcache_bkt_purge(cp);
842
843 /*
844 * This cache is dead; there should be no further transaction.
845 * If it's still invoked, make sure that it induces a fault.
846 */
847 cp->mc_slab_alloc = NULL;
848 cp->mc_slab_free = NULL;
849 cp->mc_slab_audit = NULL;
850
851 lck_attr_free(cp->mc_bkt_lock_attr);
852 lck_grp_free(cp->mc_bkt_lock_grp);
853 lck_grp_attr_free(cp->mc_bkt_lock_grp_attr);
854
855 lck_attr_free(cp->mc_cpu_lock_attr);
856 lck_grp_free(cp->mc_cpu_lock_grp);
857 lck_grp_attr_free(cp->mc_cpu_lock_grp_attr);
858
859 lck_attr_free(cp->mc_sync_lock_attr);
860 lck_grp_free(cp->mc_sync_lock_grp);
861 lck_grp_attr_free(cp->mc_sync_lock_grp_attr);
862
863 /*
864 * TODO: We need to destroy the zone here, but cannot do it
865 * because there is no such way to achieve that. Until then
866 * the memory allocated for the zone structure is leaked.
867 * Once it is achievable, uncomment these lines:
868 *
869 * if (cp->mc_slab_zone != NULL) {
870 * zdestroy(cp->mc_slab_zone);
871 * cp->mc_slab_zone = NULL;
872 * }
873 */
874
875 /* Get the original address since we're about to free it */
876 pbuf = (void **)((intptr_t)cp - sizeof (void *));
877
878 zfree(mcache_zone, *pbuf);
879 }
880
881 /*
882 * Internal slab allocator used as a backend for simple caches. The current
883 * implementation uses the zone allocator for simplicity reasons.
884 */
885 static unsigned int
886 mcache_slab_alloc(void *arg, mcache_obj_t ***plist, unsigned int num, int wait)
887 {
888 mcache_t *cp = arg;
889 unsigned int need = num;
890 size_t offset = 0;
891 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t));
892 u_int32_t flags = cp->mc_flags;
893 void *buf, *base, **pbuf;
894 mcache_obj_t **list = *plist;
895
896 *list = NULL;
897
898 /*
899 * The address of the object returned to the caller is an
900 * offset from the 64-bit aligned base address only if the
901 * cache's alignment requirement is neither 1 nor 8 bytes.
902 */
903 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t))
904 offset = cp->mc_align;
905
906 for (;;) {
907 if (!(wait & MCR_NOSLEEP))
908 buf = zalloc(cp->mc_slab_zone);
909 else
910 buf = zalloc_noblock(cp->mc_slab_zone);
911
912 if (buf == NULL)
913 break;
914
915 /* Get the 64-bit aligned base address for this object */
916 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
917 sizeof (u_int64_t));
918
919 /*
920 * Wind back a pointer size from the aligned base and
921 * save the original address so we can free it later.
922 */
923 pbuf = (void **)((intptr_t)base - sizeof (void *));
924 *pbuf = buf;
925
926 /*
927 * If auditing is enabled, patternize the contents of
928 * the buffer starting from the 64-bit aligned base to
929 * the end of the buffer; the length is rounded up to
930 * the nearest 64-bit multiply; this is because we use
931 * 64-bit memory access to set/check the pattern.
932 */
933 if (flags & MCF_DEBUG) {
934 VERIFY(((intptr_t)base + rsize) <=
935 ((intptr_t)buf + cp->mc_chunksize));
936 mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize);
937 }
938
939 /*
940 * Fix up the object's address to fulfill the cache's
941 * alignment requirement (if needed) and return this
942 * to the caller.
943 */
944 VERIFY(((intptr_t)base + offset + cp->mc_bufsize) <=
945 ((intptr_t)buf + cp->mc_chunksize));
946 *list = (mcache_obj_t *)((intptr_t)base + offset);
947
948 (*list)->obj_next = NULL;
949 list = *plist = &(*list)->obj_next;
950
951 /* If we got them all, return to mcache */
952 if (--need == 0)
953 break;
954 }
955
956 return (num - need);
957 }
958
959 /*
960 * Internal slab deallocator used as a backend for simple caches.
961 */
962 static void
963 mcache_slab_free(void *arg, mcache_obj_t *list, __unused boolean_t purged)
964 {
965 mcache_t *cp = arg;
966 mcache_obj_t *nlist;
967 size_t offset = 0;
968 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t));
969 u_int32_t flags = cp->mc_flags;
970 void *base;
971 void **pbuf;
972
973 /*
974 * The address of the object is an offset from a 64-bit
975 * aligned base address only if the cache's alignment
976 * requirement is neither 1 nor 8 bytes.
977 */
978 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t))
979 offset = cp->mc_align;
980
981 for (;;) {
982 nlist = list->obj_next;
983 list->obj_next = NULL;
984
985 /* Get the 64-bit aligned base address of this object */
986 base = (void *)((intptr_t)list - offset);
987 VERIFY(IS_P2ALIGNED(base, sizeof (u_int64_t)));
988
989 /* Get the original address since we're about to free it */
990 pbuf = (void **)((intptr_t)base - sizeof (void *));
991
992 if (flags & MCF_DEBUG) {
993 VERIFY(((intptr_t)base + rsize) <=
994 ((intptr_t)*pbuf + cp->mc_chunksize));
995 mcache_audit_free_verify(NULL, base, offset, rsize);
996 }
997
998 /* Free it to zone */
999 VERIFY(((intptr_t)base + offset + cp->mc_bufsize) <=
1000 ((intptr_t)*pbuf + cp->mc_chunksize));
1001 zfree(cp->mc_slab_zone, *pbuf);
1002
1003 /* No more objects to free; return to mcache */
1004 if ((list = nlist) == NULL)
1005 break;
1006 }
1007 }
1008
1009 /*
1010 * Internal slab auditor for simple caches.
1011 */
1012 static void
1013 mcache_slab_audit(void *arg, mcache_obj_t *list, boolean_t alloc)
1014 {
1015 mcache_t *cp = arg;
1016 size_t offset = 0;
1017 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t));
1018 void *base, **pbuf;
1019
1020 /*
1021 * The address of the object returned to the caller is an
1022 * offset from the 64-bit aligned base address only if the
1023 * cache's alignment requirement is neither 1 nor 8 bytes.
1024 */
1025 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t))
1026 offset = cp->mc_align;
1027
1028 while (list != NULL) {
1029 mcache_obj_t *next = list->obj_next;
1030
1031 /* Get the 64-bit aligned base address of this object */
1032 base = (void *)((intptr_t)list - offset);
1033 VERIFY(IS_P2ALIGNED(base, sizeof (u_int64_t)));
1034
1035 /* Get the original address */
1036 pbuf = (void **)((intptr_t)base - sizeof (void *));
1037
1038 VERIFY(((intptr_t)base + rsize) <=
1039 ((intptr_t)*pbuf + cp->mc_chunksize));
1040
1041 if (!alloc)
1042 mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize);
1043 else
1044 mcache_audit_free_verify_set(NULL, base, offset, rsize);
1045
1046 list = list->obj_next = next;
1047 }
1048 }
1049
1050 /*
1051 * Refill the CPU's filled bucket with bkt and save the previous one.
1052 */
1053 static void
1054 mcache_cpu_refill(mcache_cpu_t *ccp, mcache_bkt_t *bkt, int objs)
1055 {
1056 ASSERT((ccp->cc_filled == NULL && ccp->cc_objs == -1) ||
1057 (ccp->cc_filled && ccp->cc_objs + objs == ccp->cc_bktsize));
1058 ASSERT(ccp->cc_bktsize > 0);
1059
1060 ccp->cc_pfilled = ccp->cc_filled;
1061 ccp->cc_pobjs = ccp->cc_objs;
1062 ccp->cc_filled = bkt;
1063 ccp->cc_objs = objs;
1064 }
1065
1066 /*
1067 * Allocate a bucket from the bucket layer.
1068 */
1069 static mcache_bkt_t *
1070 mcache_bkt_alloc(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkttype_t **btp)
1071 {
1072 mcache_bkt_t *bkt;
1073
1074 if (!MCACHE_LOCK_TRY(&cp->mc_bkt_lock)) {
1075 /*
1076 * The bucket layer lock is held by another CPU; increase
1077 * the contention count so that we can later resize the
1078 * bucket size accordingly.
1079 */
1080 MCACHE_LOCK(&cp->mc_bkt_lock);
1081 cp->mc_bkt_contention++;
1082 }
1083
1084 if ((bkt = blp->bl_list) != NULL) {
1085 blp->bl_list = bkt->bkt_next;
1086 if (--blp->bl_total < blp->bl_min)
1087 blp->bl_min = blp->bl_total;
1088 blp->bl_alloc++;
1089 }
1090
1091 if (btp != NULL)
1092 *btp = cp->cache_bkttype;
1093
1094 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1095
1096 return (bkt);
1097 }
1098
1099 /*
1100 * Free a bucket to the bucket layer.
1101 */
1102 static void
1103 mcache_bkt_free(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkt_t *bkt)
1104 {
1105 MCACHE_LOCK(&cp->mc_bkt_lock);
1106
1107 bkt->bkt_next = blp->bl_list;
1108 blp->bl_list = bkt;
1109 blp->bl_total++;
1110
1111 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1112 }
1113
1114 /*
1115 * Enable the bucket layer of a cache.
1116 */
1117 static void
1118 mcache_cache_bkt_enable(mcache_t *cp)
1119 {
1120 mcache_cpu_t *ccp;
1121 int cpu;
1122
1123 if (cp->mc_flags & MCF_NOCPUCACHE)
1124 return;
1125
1126 for (cpu = 0; cpu < ncpu; cpu++) {
1127 ccp = &cp->mc_cpu[cpu];
1128 MCACHE_LOCK(&ccp->cc_lock);
1129 ccp->cc_bktsize = cp->cache_bkttype->bt_bktsize;
1130 MCACHE_UNLOCK(&ccp->cc_lock);
1131 }
1132 }
1133
1134 /*
1135 * Purge all buckets from a cache and disable its bucket layer.
1136 */
1137 static void
1138 mcache_bkt_purge(mcache_t *cp)
1139 {
1140 mcache_cpu_t *ccp;
1141 mcache_bkt_t *bp, *pbp;
1142 mcache_bkttype_t *btp;
1143 int cpu, objs, pobjs;
1144
1145 for (cpu = 0; cpu < ncpu; cpu++) {
1146 ccp = &cp->mc_cpu[cpu];
1147
1148 MCACHE_LOCK(&ccp->cc_lock);
1149
1150 btp = cp->cache_bkttype;
1151 bp = ccp->cc_filled;
1152 pbp = ccp->cc_pfilled;
1153 objs = ccp->cc_objs;
1154 pobjs = ccp->cc_pobjs;
1155 ccp->cc_filled = NULL;
1156 ccp->cc_pfilled = NULL;
1157 ccp->cc_objs = -1;
1158 ccp->cc_pobjs = -1;
1159 ccp->cc_bktsize = 0;
1160
1161 MCACHE_UNLOCK(&ccp->cc_lock);
1162
1163 if (bp != NULL)
1164 mcache_bkt_destroy(cp, btp, bp, objs);
1165 if (pbp != NULL)
1166 mcache_bkt_destroy(cp, btp, pbp, pobjs);
1167 }
1168
1169 /*
1170 * Updating the working set back to back essentially sets
1171 * the working set size to zero, so everything is reapable.
1172 */
1173 mcache_bkt_ws_update(cp);
1174 mcache_bkt_ws_update(cp);
1175
1176 mcache_bkt_ws_reap(cp);
1177 }
1178
1179 /*
1180 * Free one or more objects in the bucket to the slab layer,
1181 * and also free the bucket itself.
1182 */
1183 static void
1184 mcache_bkt_destroy(mcache_t *cp, mcache_bkttype_t *btp, mcache_bkt_t *bkt,
1185 int nobjs)
1186 {
1187 if (nobjs > 0) {
1188 mcache_obj_t *top = bkt->bkt_obj[nobjs - 1];
1189
1190 if (cp->mc_flags & MCF_DEBUG) {
1191 mcache_obj_t *o = top;
1192 int cnt = 0;
1193
1194 /*
1195 * Verify that the chain of objects in the bucket is
1196 * valid. Any mismatch here means a mistake when the
1197 * object(s) were freed to the CPU layer, so we panic.
1198 */
1199 while (o != NULL) {
1200 o = o->obj_next;
1201 ++cnt;
1202 }
1203 if (cnt != nobjs) {
1204 panic("mcache_bkt_destroy: %s cp %p corrupted "
1205 "list in bkt %p (nobjs %d actual %d)\n",
1206 cp->mc_name, (void *)cp, (void *)bkt,
1207 nobjs, cnt);
1208 }
1209 }
1210
1211 /* Advise the slab layer to purge the object(s) */
1212 (*cp->mc_slab_free)(cp->mc_private, top,
1213 (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt);
1214 }
1215 mcache_free(btp->bt_cache, bkt);
1216 }
1217
1218 /*
1219 * Update the bucket layer working set statistics.
1220 */
1221 static void
1222 mcache_bkt_ws_update(mcache_t *cp)
1223 {
1224 MCACHE_LOCK(&cp->mc_bkt_lock);
1225
1226 cp->mc_full.bl_reaplimit = cp->mc_full.bl_min;
1227 cp->mc_full.bl_min = cp->mc_full.bl_total;
1228 cp->mc_empty.bl_reaplimit = cp->mc_empty.bl_min;
1229 cp->mc_empty.bl_min = cp->mc_empty.bl_total;
1230
1231 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1232 }
1233
1234 /*
1235 * Reap all buckets that are beyond the working set.
1236 */
1237 static void
1238 mcache_bkt_ws_reap(mcache_t *cp)
1239 {
1240 long reap;
1241 mcache_bkt_t *bkt;
1242 mcache_bkttype_t *btp;
1243
1244 reap = MIN(cp->mc_full.bl_reaplimit, cp->mc_full.bl_min);
1245 while (reap-- &&
1246 (bkt = mcache_bkt_alloc(cp, &cp->mc_full, &btp)) != NULL)
1247 mcache_bkt_destroy(cp, btp, bkt, btp->bt_bktsize);
1248
1249 reap = MIN(cp->mc_empty.bl_reaplimit, cp->mc_empty.bl_min);
1250 while (reap-- &&
1251 (bkt = mcache_bkt_alloc(cp, &cp->mc_empty, &btp)) != NULL)
1252 mcache_bkt_destroy(cp, btp, bkt, 0);
1253 }
1254
1255 static void
1256 mcache_reap_timeout(void *arg)
1257 {
1258 volatile UInt32 *flag = arg;
1259
1260 ASSERT(flag == &mcache_reaping);
1261
1262 *flag = 0;
1263 }
1264
1265 static void
1266 mcache_reap_done(void *flag)
1267 {
1268 timeout(mcache_reap_timeout, flag, mcache_reap_interval);
1269 }
1270
1271 static void
1272 mcache_reap_start(void *arg)
1273 {
1274 UInt32 *flag = arg;
1275
1276 ASSERT(flag == &mcache_reaping);
1277
1278 mcache_applyall(mcache_cache_reap);
1279 mcache_dispatch(mcache_reap_done, flag);
1280 }
1281
1282 __private_extern__ void
1283 mcache_reap(void)
1284 {
1285 UInt32 *flag = &mcache_reaping;
1286
1287 if (mcache_llock_owner == current_thread() ||
1288 !OSCompareAndSwap(0, 1, flag))
1289 return;
1290
1291 mcache_dispatch(mcache_reap_start, flag);
1292 }
1293
1294 static void
1295 mcache_cache_reap(mcache_t *cp)
1296 {
1297 mcache_bkt_ws_reap(cp);
1298 }
1299
1300 /*
1301 * Performs period maintenance on a cache.
1302 */
1303 static void
1304 mcache_cache_update(mcache_t *cp)
1305 {
1306 int need_bkt_resize = 0;
1307 int need_bkt_reenable = 0;
1308
1309 lck_mtx_assert(mcache_llock, LCK_MTX_ASSERT_OWNED);
1310
1311 mcache_bkt_ws_update(cp);
1312
1313 /*
1314 * Cache resize and post-purge reenable are mutually exclusive.
1315 * If the cache was previously purged, there is no point of
1316 * increasing the bucket size as there was an indication of
1317 * memory pressure on the system.
1318 */
1319 lck_mtx_lock_spin(&cp->mc_sync_lock);
1320 if (!(cp->mc_flags & MCF_NOCPUCACHE) && cp->mc_enable_cnt)
1321 need_bkt_reenable = 1;
1322 lck_mtx_unlock(&cp->mc_sync_lock);
1323
1324 MCACHE_LOCK(&cp->mc_bkt_lock);
1325 /*
1326 * If the contention count is greater than the threshold, and if
1327 * we are not already at the maximum bucket size, increase it.
1328 * Otherwise, if this cache was previously purged by the user
1329 * then we simply reenable it.
1330 */
1331 if ((unsigned int)cp->mc_chunksize < cp->cache_bkttype->bt_maxbuf &&
1332 (int)(cp->mc_bkt_contention - cp->mc_bkt_contention_prev) >
1333 mcache_bkt_contention && !need_bkt_reenable)
1334 need_bkt_resize = 1;
1335
1336 cp ->mc_bkt_contention_prev = cp->mc_bkt_contention;
1337 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1338
1339 if (need_bkt_resize)
1340 mcache_dispatch(mcache_cache_bkt_resize, cp);
1341 else if (need_bkt_reenable)
1342 mcache_dispatch(mcache_cache_enable, cp);
1343 }
1344
1345 /*
1346 * Recompute a cache's bucket size. This is an expensive operation
1347 * and should not be done frequently; larger buckets provide for a
1348 * higher transfer rate with the bucket while smaller buckets reduce
1349 * the memory consumption.
1350 */
1351 static void
1352 mcache_cache_bkt_resize(void *arg)
1353 {
1354 mcache_t *cp = arg;
1355 mcache_bkttype_t *btp = cp->cache_bkttype;
1356
1357 if ((unsigned int)cp->mc_chunksize < btp->bt_maxbuf) {
1358 mcache_bkt_purge(cp);
1359
1360 /*
1361 * Upgrade to the next bucket type with larger bucket size;
1362 * temporarily set the previous contention snapshot to a
1363 * negative number to prevent unnecessary resize request.
1364 */
1365 MCACHE_LOCK(&cp->mc_bkt_lock);
1366 cp->cache_bkttype = ++btp;
1367 cp ->mc_bkt_contention_prev = cp->mc_bkt_contention + INT_MAX;
1368 MCACHE_UNLOCK(&cp->mc_bkt_lock);
1369
1370 mcache_cache_enable(cp);
1371 }
1372 }
1373
1374 /*
1375 * Reenable a previously disabled cache due to purge.
1376 */
1377 static void
1378 mcache_cache_enable(void *arg)
1379 {
1380 mcache_t *cp = arg;
1381
1382 lck_mtx_lock_spin(&cp->mc_sync_lock);
1383 cp->mc_purge_cnt = 0;
1384 cp->mc_enable_cnt = 0;
1385 lck_mtx_unlock(&cp->mc_sync_lock);
1386
1387 mcache_cache_bkt_enable(cp);
1388 }
1389
1390 static void
1391 mcache_update_timeout(__unused void *arg)
1392 {
1393 timeout(mcache_update, NULL, mcache_reap_interval);
1394 }
1395
1396 static void
1397 mcache_update(__unused void *arg)
1398 {
1399 mcache_applyall(mcache_cache_update);
1400 mcache_dispatch(mcache_update_timeout, NULL);
1401 }
1402
1403 static void
1404 mcache_applyall(void (*func)(mcache_t *))
1405 {
1406 mcache_t *cp;
1407
1408 MCACHE_LIST_LOCK();
1409 LIST_FOREACH(cp, &mcache_head, mc_list) {
1410 func(cp);
1411 }
1412 MCACHE_LIST_UNLOCK();
1413 }
1414
1415 static void
1416 mcache_dispatch(void (*func)(void *), void *arg)
1417 {
1418 ASSERT(func != NULL);
1419 timeout(func, arg, hz/1000);
1420 }
1421
1422 __private_extern__ void
1423 mcache_buffer_log(mcache_audit_t *mca, void *addr, mcache_t *cp,
1424 struct timeval *base_ts)
1425 {
1426 struct timeval now, base = { 0, 0 };
1427 void *stack[MCACHE_STACK_DEPTH + 1];
1428
1429 mca->mca_addr = addr;
1430 mca->mca_cache = cp;
1431 mca->mca_pthread = mca->mca_thread;
1432 mca->mca_thread = current_thread();
1433 bcopy(mca->mca_stack, mca->mca_pstack, sizeof (mca->mca_pstack));
1434 mca->mca_pdepth = mca->mca_depth;
1435 bzero(stack, sizeof (stack));
1436 mca->mca_depth = OSBacktrace(stack, MCACHE_STACK_DEPTH + 1) - 1;
1437 bcopy(&stack[1], mca->mca_stack, sizeof (mca->mca_pstack));
1438
1439 mca->mca_ptstamp = mca->mca_tstamp;
1440 microuptime(&now);
1441 if (base_ts != NULL)
1442 base = *base_ts;
1443 /* tstamp is in ms relative to base_ts */
1444 mca->mca_tstamp = ((now.tv_usec - base.tv_usec) / 1000);
1445 if ((now.tv_sec - base.tv_sec) > 0)
1446 mca->mca_tstamp += ((now.tv_sec - base.tv_sec) * 1000);
1447 }
1448
1449 __private_extern__ void
1450 mcache_set_pattern(u_int64_t pattern, void *buf_arg, size_t size)
1451 {
1452 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size));
1453 u_int64_t *buf = (u_int64_t *)buf_arg;
1454
1455 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t)));
1456 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t)));
1457
1458 while (buf < buf_end)
1459 *buf++ = pattern;
1460 }
1461
1462 __private_extern__ void *
1463 mcache_verify_pattern(u_int64_t pattern, void *buf_arg, size_t size)
1464 {
1465 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size));
1466 u_int64_t *buf;
1467
1468 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t)));
1469 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t)));
1470
1471 for (buf = buf_arg; buf < buf_end; buf++) {
1472 if (*buf != pattern)
1473 return (buf);
1474 }
1475 return (NULL);
1476 }
1477
1478 __private_extern__ void *
1479 mcache_verify_set_pattern(u_int64_t old, u_int64_t new, void *buf_arg,
1480 size_t size)
1481 {
1482 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size));
1483 u_int64_t *buf;
1484
1485 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t)));
1486 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t)));
1487
1488 for (buf = buf_arg; buf < buf_end; buf++) {
1489 if (*buf != old) {
1490 mcache_set_pattern(old, buf_arg,
1491 (uintptr_t)buf - (uintptr_t)buf_arg);
1492 return (buf);
1493 }
1494 *buf = new;
1495 }
1496 return (NULL);
1497 }
1498
1499 __private_extern__ void
1500 mcache_audit_free_verify(mcache_audit_t *mca, void *base, size_t offset,
1501 size_t size)
1502 {
1503 void *addr;
1504 u_int64_t *oaddr64;
1505 mcache_obj_t *next;
1506
1507 addr = (void *)((uintptr_t)base + offset);
1508 next = ((mcache_obj_t *)addr)->obj_next;
1509
1510 /* For the "obj_next" pointer in the buffer */
1511 oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof (u_int64_t));
1512 *oaddr64 = MCACHE_FREE_PATTERN;
1513
1514 if ((oaddr64 = mcache_verify_pattern(MCACHE_FREE_PATTERN,
1515 (caddr_t)base, size)) != NULL) {
1516 mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base,
1517 (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64);
1518 /* NOTREACHED */
1519 }
1520 ((mcache_obj_t *)addr)->obj_next = next;
1521 }
1522
1523 __private_extern__ void
1524 mcache_audit_free_verify_set(mcache_audit_t *mca, void *base, size_t offset,
1525 size_t size)
1526 {
1527 void *addr;
1528 u_int64_t *oaddr64;
1529 mcache_obj_t *next;
1530
1531 addr = (void *)((uintptr_t)base + offset);
1532 next = ((mcache_obj_t *)addr)->obj_next;
1533
1534 /* For the "obj_next" pointer in the buffer */
1535 oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof (u_int64_t));
1536 *oaddr64 = MCACHE_FREE_PATTERN;
1537
1538 if ((oaddr64 = mcache_verify_set_pattern(MCACHE_FREE_PATTERN,
1539 MCACHE_UNINITIALIZED_PATTERN, (caddr_t)base, size)) != NULL) {
1540 mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base,
1541 (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64);
1542 /* NOTREACHED */
1543 }
1544 ((mcache_obj_t *)addr)->obj_next = next;
1545 }
1546
1547 #undef panic
1548
1549 __private_extern__ char *
1550 mcache_dump_mca(mcache_audit_t *mca)
1551 {
1552 if (mca_dump_buf == NULL)
1553 return (NULL);
1554
1555 snprintf(mca_dump_buf, DUMP_MCA_BUF_SIZE,
1556 "mca %p: addr %p, cache %p (%s)\n"
1557 "last transaction; thread %p, saved PC stack (%d deep):\n"
1558 "\t%p, %p, %p, %p, %p, %p, %p, %p\n"
1559 "\t%p, %p, %p, %p, %p, %p, %p, %p\n"
1560 "previous transaction; thread %p, saved PC stack (%d deep):\n"
1561 "\t%p, %p, %p, %p, %p, %p, %p, %p\n"
1562 "\t%p, %p, %p, %p, %p, %p, %p, %p\n",
1563 mca, mca->mca_addr, mca->mca_cache,
1564 mca->mca_cache ? mca->mca_cache->mc_name : "?",
1565 mca->mca_thread, mca->mca_depth,
1566 mca->mca_stack[0], mca->mca_stack[1], mca->mca_stack[2],
1567 mca->mca_stack[3], mca->mca_stack[4], mca->mca_stack[5],
1568 mca->mca_stack[6], mca->mca_stack[7], mca->mca_stack[8],
1569 mca->mca_stack[9], mca->mca_stack[10], mca->mca_stack[11],
1570 mca->mca_stack[12], mca->mca_stack[13], mca->mca_stack[14],
1571 mca->mca_stack[15],
1572 mca->mca_pthread, mca->mca_pdepth,
1573 mca->mca_pstack[0], mca->mca_pstack[1], mca->mca_pstack[2],
1574 mca->mca_pstack[3], mca->mca_pstack[4], mca->mca_pstack[5],
1575 mca->mca_pstack[6], mca->mca_pstack[7], mca->mca_pstack[8],
1576 mca->mca_pstack[9], mca->mca_pstack[10], mca->mca_pstack[11],
1577 mca->mca_pstack[12], mca->mca_pstack[13], mca->mca_pstack[14],
1578 mca->mca_pstack[15]);
1579
1580 return (mca_dump_buf);
1581 }
1582
1583 __private_extern__ void
1584 mcache_audit_panic(mcache_audit_t *mca, void *addr, size_t offset,
1585 int64_t expected, int64_t got)
1586 {
1587 if (mca == NULL) {
1588 panic("mcache_audit: buffer %p modified after free at "
1589 "offset 0x%lx (0x%llx instead of 0x%llx)\n", addr,
1590 offset, got, expected);
1591 /* NOTREACHED */
1592 }
1593
1594 panic("mcache_audit: buffer %p modified after free at offset 0x%lx "
1595 "(0x%llx instead of 0x%llx)\n%s\n",
1596 addr, offset, got, expected, mcache_dump_mca(mca));
1597 /* NOTREACHED */
1598 }
1599
1600 __private_extern__ int
1601 assfail(const char *a, const char *f, int l)
1602 {
1603 panic("assertion failed: %s, file: %s, line: %d", a, f, l);
1604 return (0);
1605 }