+/* This function turns on mbuf leak detection */
+static void
+mleak_activate(void)
+{
+ mleak_table.mleak_sample_factor = MLEAK_SAMPLE_FACTOR;
+ PE_parse_boot_argn("mleak_sample_factor",
+ &mleak_table.mleak_sample_factor,
+ sizeof(mleak_table.mleak_sample_factor));
+
+ if (mleak_table.mleak_sample_factor == 0) {
+ mclfindleak = 0;
+ }
+
+ if (mclfindleak == 0) {
+ return;
+ }
+
+ vm_size_t alloc_size =
+ mleak_alloc_buckets * sizeof(struct mallocation);
+ vm_size_t trace_size = mleak_trace_buckets * sizeof(struct mtrace);
+
+ MALLOC(mleak_allocations, struct mallocation *, alloc_size,
+ M_TEMP, M_WAITOK | M_ZERO);
+ VERIFY(mleak_allocations != NULL);
+
+ MALLOC(mleak_traces, struct mtrace *, trace_size,
+ M_TEMP, M_WAITOK | M_ZERO);
+ VERIFY(mleak_traces != NULL);
+
+ MALLOC(mleak_stat, mleak_stat_t *, MLEAK_STAT_SIZE(MLEAK_NUM_TRACES),
+ M_TEMP, M_WAITOK | M_ZERO);
+ VERIFY(mleak_stat != NULL);
+ mleak_stat->ml_cnt = MLEAK_NUM_TRACES;
+#ifdef __LP64__
+ mleak_stat->ml_isaddr64 = 1;
+#endif /* __LP64__ */
+}
+
+static void
+mleak_logger(u_int32_t num, mcache_obj_t *addr, boolean_t alloc)
+{
+ int temp;
+
+ if (mclfindleak == 0) {
+ return;
+ }
+
+ if (!alloc) {
+ return mleak_free(addr);
+ }
+
+ temp = atomic_add_32_ov(&mleak_table.mleak_capture, 1);
+
+ if ((temp % mleak_table.mleak_sample_factor) == 0 && addr != NULL) {
+ uintptr_t bt[MLEAK_STACK_DEPTH];
+ int logged = backtrace(bt, MLEAK_STACK_DEPTH, NULL);
+ mleak_log(bt, addr, logged, num);
+ }
+}
+
+/*
+ * This function records the allocation in the mleak_allocations table
+ * and the backtrace in the mleak_traces table; if allocation slot is in use,
+ * replace old allocation with new one if the trace slot is in use, return
+ * (or increment refcount if same trace).
+ */
+static boolean_t
+mleak_log(uintptr_t *bt, mcache_obj_t *addr, uint32_t depth, int num)
+{
+ struct mallocation *allocation;
+ struct mtrace *trace;
+ uint32_t trace_index;
+
+ /* Quit if someone else modifying the tables */
+ if (!lck_mtx_try_lock_spin(mleak_lock)) {
+ mleak_table.total_conflicts++;
+ return FALSE;
+ }
+
+ allocation = &mleak_allocations[hashaddr((uintptr_t)addr,
+ mleak_alloc_buckets)];
+ trace_index = hashbacktrace(bt, depth, mleak_trace_buckets);
+ trace = &mleak_traces[trace_index];
+
+ VERIFY(allocation <= &mleak_allocations[mleak_alloc_buckets - 1]);
+ VERIFY(trace <= &mleak_traces[mleak_trace_buckets - 1]);
+
+ allocation->hitcount++;
+ trace->hitcount++;
+
+ /*
+ * If the allocation bucket we want is occupied
+ * and the occupier has the same trace, just bail.
+ */
+ if (allocation->element != NULL &&
+ trace_index == allocation->trace_index) {
+ mleak_table.alloc_collisions++;
+ lck_mtx_unlock(mleak_lock);
+ return TRUE;
+ }
+
+ /*
+ * Store the backtrace in the traces array;
+ * Size of zero = trace bucket is free.
+ */
+ if (trace->allocs > 0 &&
+ bcmp(trace->addr, bt, (depth * sizeof(uintptr_t))) != 0) {
+ /* Different, unique trace, but the same hash! Bail out. */
+ trace->collisions++;
+ mleak_table.trace_collisions++;
+ lck_mtx_unlock(mleak_lock);
+ return TRUE;
+ } else if (trace->allocs > 0) {
+ /* Same trace, already added, so increment refcount */
+ trace->allocs++;
+ } else {
+ /* Found an unused trace bucket, so record the trace here */
+ if (trace->depth != 0) {
+ /* this slot previously used but not currently in use */
+ mleak_table.trace_overwrites++;
+ }
+ mleak_table.trace_recorded++;
+ trace->allocs = 1;
+ memcpy(trace->addr, bt, (depth * sizeof(uintptr_t)));
+ trace->depth = depth;
+ trace->collisions = 0;
+ }
+
+ /* Step 2: Store the allocation record in the allocations array */
+ if (allocation->element != NULL) {
+ /*
+ * Replace an existing allocation. No need to preserve
+ * because only a subset of the allocations are being
+ * recorded anyway.
+ */
+ mleak_table.alloc_collisions++;
+ } else if (allocation->trace_index != 0) {
+ mleak_table.alloc_overwrites++;
+ }
+ allocation->element = addr;
+ allocation->trace_index = trace_index;
+ allocation->count = num;
+ mleak_table.alloc_recorded++;
+ mleak_table.outstanding_allocs++;
+
+ lck_mtx_unlock(mleak_lock);
+ return TRUE;
+}
+
+static void
+mleak_free(mcache_obj_t *addr)
+{
+ while (addr != NULL) {
+ struct mallocation *allocation = &mleak_allocations
+ [hashaddr((uintptr_t)addr, mleak_alloc_buckets)];
+
+ if (allocation->element == addr &&
+ allocation->trace_index < mleak_trace_buckets) {
+ lck_mtx_lock_spin(mleak_lock);
+ if (allocation->element == addr &&
+ allocation->trace_index < mleak_trace_buckets) {
+ struct mtrace *trace;
+ trace = &mleak_traces[allocation->trace_index];
+ /* allocs = 0 means trace bucket is unused */
+ if (trace->allocs > 0) {
+ trace->allocs--;
+ }
+ if (trace->allocs == 0) {
+ trace->depth = 0;
+ }
+ /* NULL element means alloc bucket is unused */
+ allocation->element = NULL;
+ mleak_table.outstanding_allocs--;
+ }
+ lck_mtx_unlock(mleak_lock);
+ }
+ addr = addr->obj_next;
+ }
+}
+
+static void
+mleak_sort_traces()
+{
+ int i, j, k;
+ struct mtrace *swap;
+
+ for (i = 0; i < MLEAK_NUM_TRACES; i++) {
+ mleak_top_trace[i] = NULL;
+ }
+
+ for (i = 0, j = 0; j < MLEAK_NUM_TRACES && i < mleak_trace_buckets; i++) {
+ if (mleak_traces[i].allocs <= 0) {
+ continue;
+ }
+
+ mleak_top_trace[j] = &mleak_traces[i];
+ for (k = j; k > 0; k--) {
+ if (mleak_top_trace[k]->allocs <=
+ mleak_top_trace[k - 1]->allocs) {
+ break;
+ }
+
+ swap = mleak_top_trace[k - 1];
+ mleak_top_trace[k - 1] = mleak_top_trace[k];
+ mleak_top_trace[k] = swap;
+ }
+ j++;
+ }
+
+ j--;
+ for (; i < mleak_trace_buckets; i++) {
+ if (mleak_traces[i].allocs <= mleak_top_trace[j]->allocs) {
+ continue;
+ }
+
+ mleak_top_trace[j] = &mleak_traces[i];
+
+ for (k = j; k > 0; k--) {
+ if (mleak_top_trace[k]->allocs <=
+ mleak_top_trace[k - 1]->allocs) {
+ break;
+ }
+
+ swap = mleak_top_trace[k - 1];
+ mleak_top_trace[k - 1] = mleak_top_trace[k];
+ mleak_top_trace[k] = swap;
+ }
+ }
+}
+
+static void
+mleak_update_stats()
+{
+ mleak_trace_stat_t *mltr;
+ int i;
+
+ VERIFY(mleak_stat != NULL);
+#ifdef __LP64__
+ VERIFY(mleak_stat->ml_isaddr64);
+#else
+ VERIFY(!mleak_stat->ml_isaddr64);
+#endif /* !__LP64__ */
+ VERIFY(mleak_stat->ml_cnt == MLEAK_NUM_TRACES);
+
+ mleak_sort_traces();
+
+ mltr = &mleak_stat->ml_trace[0];
+ bzero(mltr, sizeof(*mltr) * MLEAK_NUM_TRACES);
+ for (i = 0; i < MLEAK_NUM_TRACES; i++) {
+ int j;
+
+ if (mleak_top_trace[i] == NULL ||
+ mleak_top_trace[i]->allocs == 0) {
+ continue;
+ }
+
+ mltr->mltr_collisions = mleak_top_trace[i]->collisions;
+ mltr->mltr_hitcount = mleak_top_trace[i]->hitcount;
+ mltr->mltr_allocs = mleak_top_trace[i]->allocs;
+ mltr->mltr_depth = mleak_top_trace[i]->depth;
+
+ VERIFY(mltr->mltr_depth <= MLEAK_STACK_DEPTH);
+ for (j = 0; j < mltr->mltr_depth; j++) {
+ mltr->mltr_addr[j] = mleak_top_trace[i]->addr[j];
+ }
+
+ mltr++;
+ }
+}
+
+static struct mbtypes {
+ int mt_type;
+ const char *mt_name;
+} mbtypes[] = {
+ { MT_DATA, "data" },
+ { MT_OOBDATA, "oob data" },
+ { MT_CONTROL, "ancillary data" },
+ { MT_HEADER, "packet headers" },
+ { MT_SOCKET, "socket structures" },
+ { MT_PCB, "protocol control blocks" },
+ { MT_RTABLE, "routing table entries" },
+ { MT_HTABLE, "IMP host table entries" },
+ { MT_ATABLE, "address resolution tables" },
+ { MT_FTABLE, "fragment reassembly queue headers" },
+ { MT_SONAME, "socket names and addresses" },
+ { MT_SOOPTS, "socket options" },
+ { MT_RIGHTS, "access rights" },
+ { MT_IFADDR, "interface addresses" },
+ { MT_TAG, "packet tags" },
+ { 0, NULL }
+};
+
+#define MBUF_DUMP_BUF_CHK() { \
+ clen -= k; \
+ if (clen < 1) \
+ goto done; \
+ c += k; \
+}
+
+static char *
+mbuf_dump(void)
+{
+ unsigned long totmem = 0, totfree = 0, totmbufs, totused, totpct,
+ totreturned = 0;
+ u_int32_t m_mbufs = 0, m_clfree = 0, m_bigclfree = 0;
+ u_int32_t m_mbufclfree = 0, m_mbufbigclfree = 0;
+ u_int32_t m_16kclusters = 0, m_16kclfree = 0, m_mbuf16kclfree = 0;
+ int nmbtypes = sizeof(mbstat.m_mtypes) / sizeof(short);
+ uint8_t seen[256];
+ struct mbtypes *mp;
+ mb_class_stat_t *sp;
+ mleak_trace_stat_t *mltr;
+ char *c = mbuf_dump_buf;
+ int i, j, k, clen = MBUF_DUMP_BUF_SIZE;
+ bool printed_banner = false;
+
+ mbuf_dump_buf[0] = '\0';
+
+ /* synchronize all statistics in the mbuf table */
+ mbuf_stat_sync();
+ mbuf_mtypes_sync(TRUE);
+
+ sp = &mb_stat->mbs_class[0];
+ for (i = 0; i < mb_stat->mbs_cnt; i++, sp++) {
+ u_int32_t mem;
+
+ if (m_class(i) == MC_MBUF) {
+ m_mbufs = sp->mbcl_active;
+ } else if (m_class(i) == MC_CL) {
+ m_clfree = sp->mbcl_total - sp->mbcl_active;
+ } else if (m_class(i) == MC_BIGCL) {
+ m_bigclfree = sp->mbcl_total - sp->mbcl_active;
+ } else if (njcl > 0 && m_class(i) == MC_16KCL) {
+ m_16kclfree = sp->mbcl_total - sp->mbcl_active;
+ m_16kclusters = sp->mbcl_total;
+ } else if (m_class(i) == MC_MBUF_CL) {
+ m_mbufclfree = sp->mbcl_total - sp->mbcl_active;
+ } else if (m_class(i) == MC_MBUF_BIGCL) {
+ m_mbufbigclfree = sp->mbcl_total - sp->mbcl_active;
+ } else if (njcl > 0 && m_class(i) == MC_MBUF_16KCL) {
+ m_mbuf16kclfree = sp->mbcl_total - sp->mbcl_active;
+ }
+
+ mem = sp->mbcl_ctotal * sp->mbcl_size;
+ totmem += mem;
+ totfree += (sp->mbcl_mc_cached + sp->mbcl_infree) *
+ sp->mbcl_size;
+ totreturned += sp->mbcl_release_cnt;
+ }
+
+ /* adjust free counts to include composite caches */
+ m_clfree += m_mbufclfree;
+ m_bigclfree += m_mbufbigclfree;
+ m_16kclfree += m_mbuf16kclfree;
+
+ totmbufs = 0;
+ for (mp = mbtypes; mp->mt_name != NULL; mp++) {
+ totmbufs += mbstat.m_mtypes[mp->mt_type];
+ }
+ if (totmbufs > m_mbufs) {
+ totmbufs = m_mbufs;
+ }
+ k = scnprintf(c, clen, "%lu/%u mbufs in use:\n", totmbufs, m_mbufs);
+ MBUF_DUMP_BUF_CHK();
+
+ bzero(&seen, sizeof(seen));
+ for (mp = mbtypes; mp->mt_name != NULL; mp++) {
+ if (mbstat.m_mtypes[mp->mt_type] != 0) {
+ seen[mp->mt_type] = 1;
+ k = scnprintf(c, clen, "\t%u mbufs allocated to %s\n",
+ mbstat.m_mtypes[mp->mt_type], mp->mt_name);
+ MBUF_DUMP_BUF_CHK();
+ }
+ }
+ seen[MT_FREE] = 1;
+ for (i = 0; i < nmbtypes; i++) {
+ if (!seen[i] && mbstat.m_mtypes[i] != 0) {
+ k = scnprintf(c, clen, "\t%u mbufs allocated to "
+ "<mbuf type %d>\n", mbstat.m_mtypes[i], i);
+ MBUF_DUMP_BUF_CHK();
+ }
+ }
+ if ((m_mbufs - totmbufs) > 0) {
+ k = scnprintf(c, clen, "\t%lu mbufs allocated to caches\n",
+ m_mbufs - totmbufs);
+ MBUF_DUMP_BUF_CHK();
+ }
+ k = scnprintf(c, clen, "%u/%u mbuf 2KB clusters in use\n"
+ "%u/%u mbuf 4KB clusters in use\n",
+ (unsigned int)(mbstat.m_clusters - m_clfree),
+ (unsigned int)mbstat.m_clusters,
+ (unsigned int)(mbstat.m_bigclusters - m_bigclfree),
+ (unsigned int)mbstat.m_bigclusters);
+ MBUF_DUMP_BUF_CHK();
+
+ if (njcl > 0) {
+ k = scnprintf(c, clen, "%u/%u mbuf %uKB clusters in use\n",
+ m_16kclusters - m_16kclfree, m_16kclusters,
+ njclbytes / 1024);
+ MBUF_DUMP_BUF_CHK();
+ }
+ totused = totmem - totfree;
+ if (totmem == 0) {
+ totpct = 0;
+ } else if (totused < (ULONG_MAX / 100)) {
+ totpct = (totused * 100) / totmem;
+ } else {
+ u_long totmem1 = totmem / 100;
+ u_long totused1 = totused / 100;
+ totpct = (totused1 * 100) / totmem1;
+ }
+ k = scnprintf(c, clen, "%lu KB allocated to network (approx. %lu%% "
+ "in use)\n", totmem / 1024, totpct);
+ MBUF_DUMP_BUF_CHK();
+ k = scnprintf(c, clen, "%lu KB returned to the system\n",
+ totreturned / 1024);
+ MBUF_DUMP_BUF_CHK();
+
+ net_update_uptime();
+ k = scnprintf(c, clen,
+ "VM allocation failures: contiguous %u, normal %u, one page %u\n",
+ mb_kmem_contig_failed, mb_kmem_failed, mb_kmem_one_failed);
+ MBUF_DUMP_BUF_CHK();
+ if (mb_kmem_contig_failed_ts || mb_kmem_failed_ts ||
+ mb_kmem_one_failed_ts) {
+ k = scnprintf(c, clen,
+ "VM allocation failure timestamps: contiguous %llu "
+ "(size %llu), normal %llu (size %llu), one page %llu "
+ "(now %llu)\n",
+ mb_kmem_contig_failed_ts, mb_kmem_contig_failed_size,
+ mb_kmem_failed_ts, mb_kmem_failed_size,
+ mb_kmem_one_failed_ts, net_uptime());
+ MBUF_DUMP_BUF_CHK();
+ k = scnprintf(c, clen,
+ "VM return codes: ");
+ MBUF_DUMP_BUF_CHK();
+ for (i = 0;
+ i < sizeof(mb_kmem_stats) / sizeof(mb_kmem_stats[0]);
+ i++) {
+ k = scnprintf(c, clen, "%s: %u ", mb_kmem_stats_labels[i],
+ mb_kmem_stats[i]);
+ MBUF_DUMP_BUF_CHK();
+ }
+ k = scnprintf(c, clen, "\n");
+ MBUF_DUMP_BUF_CHK();
+ }
+ k = scnprintf(c, clen,
+ "worker thread runs: %u, expansions: %llu, cl %llu/%llu, "
+ "bigcl %llu/%llu, 16k %llu/%llu\n", mbuf_worker_run_cnt,
+ mb_expand_cnt, mb_expand_cl_cnt, mb_expand_cl_total,
+ mb_expand_bigcl_cnt, mb_expand_bigcl_total, mb_expand_16kcl_cnt,
+ mb_expand_16kcl_total);
+ MBUF_DUMP_BUF_CHK();
+ if (mbuf_worker_last_runtime != 0) {
+ k = scnprintf(c, clen, "worker thread last run time: "
+ "%llu (%llu seconds ago)\n",
+ mbuf_worker_last_runtime,
+ net_uptime() - mbuf_worker_last_runtime);
+ MBUF_DUMP_BUF_CHK();
+ }
+ if (mbuf_drain_last_runtime != 0) {
+ k = scnprintf(c, clen, "drain routine last run time: "
+ "%llu (%llu seconds ago)\n",
+ mbuf_drain_last_runtime,
+ net_uptime() - mbuf_drain_last_runtime);
+ MBUF_DUMP_BUF_CHK();
+ }
+
+#if DEBUG || DEVELOPMENT
+ k = scnprintf(c, clen, "\nworker thread log:\n%s\n", mbwdog_logging);
+ MBUF_DUMP_BUF_CHK();
+#endif
+
+ for (j = 0; j < MTRACELARGE_NUM_TRACES; j++) {
+ struct mtracelarge *trace = &mtracelarge_table[j];
+ if (trace->size == 0 || trace->depth == 0) {
+ continue;
+ }
+ if (printed_banner == false) {
+ k = scnprintf(c, clen,
+ "\nlargest allocation failure backtraces:\n");
+ MBUF_DUMP_BUF_CHK();
+ printed_banner = true;
+ }
+ k = scnprintf(c, clen, "size %llu: < ", trace->size);
+ MBUF_DUMP_BUF_CHK();
+ for (i = 0; i < trace->depth; i++) {
+ if (mleak_stat->ml_isaddr64) {
+ k = scnprintf(c, clen, "0x%0llx ",
+ (uint64_t)VM_KERNEL_UNSLIDE(
+ trace->addr[i]));
+ } else {
+ k = scnprintf(c, clen,
+ "0x%08x ",
+ (uint32_t)VM_KERNEL_UNSLIDE(
+ trace->addr[i]));
+ }
+ MBUF_DUMP_BUF_CHK();
+ }
+ k = scnprintf(c, clen, ">\n");
+ MBUF_DUMP_BUF_CHK();
+ }
+
+ /* mbuf leak detection statistics */
+ mleak_update_stats();
+
+ k = scnprintf(c, clen, "\nmbuf leak detection table:\n");
+ MBUF_DUMP_BUF_CHK();
+ k = scnprintf(c, clen, "\ttotal captured: %u (one per %u)\n",
+ mleak_table.mleak_capture / mleak_table.mleak_sample_factor,
+ mleak_table.mleak_sample_factor);
+ MBUF_DUMP_BUF_CHK();
+ k = scnprintf(c, clen, "\ttotal allocs outstanding: %llu\n",
+ mleak_table.outstanding_allocs);
+ MBUF_DUMP_BUF_CHK();
+ k = scnprintf(c, clen, "\tnew hash recorded: %llu allocs, %llu traces\n",
+ mleak_table.alloc_recorded, mleak_table.trace_recorded);
+ MBUF_DUMP_BUF_CHK();
+ k = scnprintf(c, clen, "\thash collisions: %llu allocs, %llu traces\n",
+ mleak_table.alloc_collisions, mleak_table.trace_collisions);
+ MBUF_DUMP_BUF_CHK();
+ k = scnprintf(c, clen, "\toverwrites: %llu allocs, %llu traces\n",
+ mleak_table.alloc_overwrites, mleak_table.trace_overwrites);
+ MBUF_DUMP_BUF_CHK();
+ k = scnprintf(c, clen, "\tlock conflicts: %llu\n\n",
+ mleak_table.total_conflicts);
+ MBUF_DUMP_BUF_CHK();
+
+ k = scnprintf(c, clen, "top %d outstanding traces:\n",
+ mleak_stat->ml_cnt);
+ MBUF_DUMP_BUF_CHK();
+ for (i = 0; i < mleak_stat->ml_cnt; i++) {
+ mltr = &mleak_stat->ml_trace[i];
+ k = scnprintf(c, clen, "[%d] %llu outstanding alloc(s), "
+ "%llu hit(s), %llu collision(s)\n", (i + 1),
+ mltr->mltr_allocs, mltr->mltr_hitcount,
+ mltr->mltr_collisions);
+ MBUF_DUMP_BUF_CHK();
+ }
+
+ if (mleak_stat->ml_isaddr64) {
+ k = scnprintf(c, clen, MB_LEAK_HDR_64);
+ } else {
+ k = scnprintf(c, clen, MB_LEAK_HDR_32);
+ }
+ MBUF_DUMP_BUF_CHK();
+
+ for (i = 0; i < MLEAK_STACK_DEPTH; i++) {
+ k = scnprintf(c, clen, "%2d: ", (i + 1));
+ MBUF_DUMP_BUF_CHK();
+ for (j = 0; j < mleak_stat->ml_cnt; j++) {
+ mltr = &mleak_stat->ml_trace[j];
+ if (i < mltr->mltr_depth) {
+ if (mleak_stat->ml_isaddr64) {
+ k = scnprintf(c, clen, "0x%0llx ",
+ (uint64_t)VM_KERNEL_UNSLIDE(
+ mltr->mltr_addr[i]));
+ } else {
+ k = scnprintf(c, clen,
+ "0x%08x ",
+ (uint32_t)VM_KERNEL_UNSLIDE(
+ mltr->mltr_addr[i]));
+ }
+ } else {
+ if (mleak_stat->ml_isaddr64) {
+ k = scnprintf(c, clen,
+ MB_LEAK_SPACING_64);
+ } else {
+ k = scnprintf(c, clen,
+ MB_LEAK_SPACING_32);
+ }
+ }
+ MBUF_DUMP_BUF_CHK();
+ }
+ k = scnprintf(c, clen, "\n");
+ MBUF_DUMP_BUF_CHK();
+ }
+done:
+ return mbuf_dump_buf;
+}
+
+#undef MBUF_DUMP_BUF_CHK
+
+/*
+ * Convert between a regular and a packet header mbuf. Caller is responsible
+ * for setting or clearing M_PKTHDR; this routine does the rest of the work.
+ */
+int
+m_reinit(struct mbuf *m, int hdr)
+{
+ int ret = 0;
+
+ if (hdr) {
+ VERIFY(!(m->m_flags & M_PKTHDR));
+ if (!(m->m_flags & M_EXT) &&
+ (m->m_data != m->m_dat || m->m_len > 0)) {
+ /*
+ * If there's no external cluster attached and the
+ * mbuf appears to contain user data, we cannot
+ * safely convert this to a packet header mbuf,
+ * as the packet header structure might overlap
+ * with the data.
+ */
+ printf("%s: cannot set M_PKTHDR on altered mbuf %llx, "
+ "m_data %llx (expected %llx), "
+ "m_len %d (expected 0)\n",
+ __func__,
+ (uint64_t)VM_KERNEL_ADDRPERM(m),
+ (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
+ (uint64_t)VM_KERNEL_ADDRPERM(m->m_dat), m->m_len);
+ ret = EBUSY;
+ } else {
+ VERIFY((m->m_flags & M_EXT) || m->m_data == m->m_dat);
+ m->m_flags |= M_PKTHDR;
+ MBUF_INIT_PKTHDR(m);
+ }
+ } else {
+ /* Check for scratch area overflow */
+ m_redzone_verify(m);
+ /* Free the aux data and tags if there is any */
+ m_tag_delete_chain(m, NULL);
+ m->m_flags &= ~M_PKTHDR;
+ }
+
+ return ret;
+}
+
+int
+m_ext_set_prop(struct mbuf *m, uint32_t o, uint32_t n)
+{
+ ASSERT(m->m_flags & M_EXT);
+ return atomic_test_set_32(&MEXT_PRIV(m), o, n);
+}
+
+uint32_t
+m_ext_get_prop(struct mbuf *m)
+{
+ ASSERT(m->m_flags & M_EXT);
+ return MEXT_PRIV(m);
+}
+
+int
+m_ext_paired_is_active(struct mbuf *m)
+{
+ return MBUF_IS_PAIRED(m) ? (MEXT_PREF(m) > MEXT_MINREF(m)) : 1;
+}
+
+void
+m_ext_paired_activate(struct mbuf *m)
+{
+ struct ext_ref *rfa;
+ int hdr, type;
+ caddr_t extbuf;
+ m_ext_free_func_t extfree;
+ u_int extsize;
+
+ VERIFY(MBUF_IS_PAIRED(m));
+ VERIFY(MEXT_REF(m) == MEXT_MINREF(m));
+ VERIFY(MEXT_PREF(m) == MEXT_MINREF(m));
+
+ hdr = (m->m_flags & M_PKTHDR);
+ type = m->m_type;
+ extbuf = m->m_ext.ext_buf;
+ extfree = m_get_ext_free(m);
+ extsize = m->m_ext.ext_size;
+ rfa = m_get_rfa(m);
+
+ VERIFY(extbuf != NULL && rfa != NULL);
+
+ /*
+ * Safe to reinitialize packet header tags, since it's
+ * already taken care of at m_free() time. Similar to
+ * what's done in m_clattach() for the cluster. Bump
+ * up MEXT_PREF to indicate activation.
+ */
+ MBUF_INIT(m, hdr, type);
+ MEXT_INIT(m, extbuf, extsize, extfree, (caddr_t)m, rfa,
+ 1, 1, 2, EXTF_PAIRED, MEXT_PRIV(m), m);
+}
+
+void
+m_scratch_init(struct mbuf *m)
+{
+ struct pkthdr *pkt = &m->m_pkthdr;
+
+ VERIFY(m->m_flags & M_PKTHDR);
+
+ /* See comments in <rdar://problem/14040693> */
+ if (pkt->pkt_flags & PKTF_PRIV_GUARDED) {
+ panic_plain("Invalid attempt to modify guarded module-private "
+ "area: mbuf %p, pkt_flags 0x%x\n", m, pkt->pkt_flags);
+ /* NOTREACHED */
+ }
+
+ bzero(&pkt->pkt_mpriv, sizeof(pkt->pkt_mpriv));
+}
+
+/*
+ * This routine is reserved for mbuf_get_driver_scratch(); clients inside
+ * xnu that intend on utilizing the module-private area should directly
+ * refer to the pkt_mpriv structure in the pkthdr. They are also expected
+ * to set and clear PKTF_PRIV_GUARDED, while owning the packet and prior
+ * to handing it off to another module, respectively.
+ */
+u_int32_t
+m_scratch_get(struct mbuf *m, u_int8_t **p)
+{
+ struct pkthdr *pkt = &m->m_pkthdr;
+
+ VERIFY(m->m_flags & M_PKTHDR);
+
+ /* See comments in <rdar://problem/14040693> */
+ if (pkt->pkt_flags & PKTF_PRIV_GUARDED) {
+ panic_plain("Invalid attempt to access guarded module-private "
+ "area: mbuf %p, pkt_flags 0x%x\n", m, pkt->pkt_flags);
+ /* NOTREACHED */
+ }
+
+ if (mcltrace) {
+ mcache_audit_t *mca;
+
+ lck_mtx_lock(mbuf_mlock);
+ mca = mcl_audit_buf2mca(MC_MBUF, (mcache_obj_t *)m);
+ if (mca->mca_uflags & MB_SCVALID) {
+ mcl_audit_scratch(mca);
+ }
+ lck_mtx_unlock(mbuf_mlock);
+ }
+
+ *p = (u_int8_t *)&pkt->pkt_mpriv;
+ return sizeof(pkt->pkt_mpriv);
+}
+
+static void
+m_redzone_init(struct mbuf *m)
+{
+ VERIFY(m->m_flags & M_PKTHDR);
+ /*
+ * Each mbuf has a unique red zone pattern, which is a XOR
+ * of the red zone cookie and the address of the mbuf.
+ */
+ m->m_pkthdr.redzone = ((u_int32_t)(uintptr_t)m) ^ mb_redzone_cookie;
+}
+
+static void
+m_redzone_verify(struct mbuf *m)
+{
+ u_int32_t mb_redzone;
+
+ VERIFY(m->m_flags & M_PKTHDR);
+
+ mb_redzone = ((u_int32_t)(uintptr_t)m) ^ mb_redzone_cookie;
+ if (m->m_pkthdr.redzone != mb_redzone) {
+ panic("mbuf %p redzone violation with value 0x%x "
+ "(instead of 0x%x, using cookie 0x%x)\n",
+ m, m->m_pkthdr.redzone, mb_redzone, mb_redzone_cookie);
+ /* NOTREACHED */
+ }
+}
+
+__private_extern__ inline void
+m_set_ext(struct mbuf *m, struct ext_ref *rfa, m_ext_free_func_t ext_free,
+ caddr_t ext_arg)
+{
+ VERIFY(m->m_flags & M_EXT);
+ if (rfa != NULL) {
+ m->m_ext.ext_refflags =
+ (struct ext_ref *)(((uintptr_t)rfa) ^ mb_obscure_extref);
+ if (ext_free != NULL) {
+ rfa->ext_token = ((uintptr_t)&rfa->ext_token) ^
+ mb_obscure_extfree;
+ m->m_ext.ext_free = (m_ext_free_func_t)
+ (((uintptr_t)ext_free) ^ rfa->ext_token);
+ if (ext_arg != NULL) {
+ m->m_ext.ext_arg =
+ (caddr_t)(((uintptr_t)ext_arg) ^ rfa->ext_token);
+ } else {
+ m->m_ext.ext_arg = NULL;
+ }
+ } else {
+ rfa->ext_token = 0;
+ m->m_ext.ext_free = NULL;
+ m->m_ext.ext_arg = NULL;
+ }
+ } else {
+ /*
+ * If we are going to loose the cookie in ext_token by
+ * resetting the rfa, we should use the global cookie
+ * to obscure the ext_free and ext_arg pointers.
+ */
+ if (ext_free != NULL) {
+ m->m_ext.ext_free =
+ (m_ext_free_func_t)((uintptr_t)ext_free ^
+ mb_obscure_extfree);
+ if (ext_arg != NULL) {
+ m->m_ext.ext_arg =
+ (caddr_t)((uintptr_t)ext_arg ^
+ mb_obscure_extfree);
+ } else {
+ m->m_ext.ext_arg = NULL;
+ }
+ } else {
+ m->m_ext.ext_free = NULL;
+ m->m_ext.ext_arg = NULL;
+ }
+ m->m_ext.ext_refflags = NULL;
+ }
+}
+
+__private_extern__ inline struct ext_ref *
+m_get_rfa(struct mbuf *m)
+{
+ if (m->m_ext.ext_refflags == NULL) {
+ return NULL;
+ } else {
+ return (struct ext_ref *)(((uintptr_t)m->m_ext.ext_refflags) ^ mb_obscure_extref);
+ }
+}
+
+__private_extern__ inline m_ext_free_func_t
+m_get_ext_free(struct mbuf *m)
+{
+ struct ext_ref *rfa;
+ if (m->m_ext.ext_free == NULL) {
+ return NULL;
+ }
+
+ rfa = m_get_rfa(m);
+ if (rfa == NULL) {
+ return (m_ext_free_func_t)((uintptr_t)m->m_ext.ext_free ^ mb_obscure_extfree);
+ } else {
+ return (m_ext_free_func_t)(((uintptr_t)m->m_ext.ext_free)
+ ^ rfa->ext_token);
+ }
+}
+
+__private_extern__ inline caddr_t
+m_get_ext_arg(struct mbuf *m)
+{
+ struct ext_ref *rfa;
+ if (m->m_ext.ext_arg == NULL) {
+ return NULL;
+ }
+
+ rfa = m_get_rfa(m);
+ if (rfa == NULL) {
+ return (caddr_t)((uintptr_t)m->m_ext.ext_arg ^ mb_obscure_extfree);
+ } else {
+ return (caddr_t)(((uintptr_t)m->m_ext.ext_arg) ^
+ rfa->ext_token);
+ }
+}
+
+/*
+ * Send a report of mbuf usage if the usage is at least 6% of max limit
+ * or if there has been at least 3% increase since the last report.
+ *
+ * The values 6% and 3% are chosen so that we can do simple arithmetic
+ * with shift operations.
+ */
+static boolean_t
+mbuf_report_usage(mbuf_class_t cl)
+{
+ /* if a report is already in progress, nothing to do */
+ if (mb_peak_newreport) {
+ return TRUE;
+ }
+
+ if (m_total(cl) > m_peak(cl) &&
+ m_total(cl) >= (m_maxlimit(cl) >> 4) &&
+ (m_total(cl) - m_peak(cl)) >= (m_peak(cl) >> 5)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+__private_extern__ void
+mbuf_report_peak_usage(void)
+{
+ int i = 0;
+ u_int64_t uptime;
+ struct nstat_sysinfo_data ns_data;
+ uint32_t memreleased = 0;
+ static uint32_t prevmemreleased;
+
+ uptime = net_uptime();
+ lck_mtx_lock(mbuf_mlock);
+
+ /* Generate an initial report after 1 week of uptime */
+ if (!mb_peak_firstreport &&
+ uptime > MBUF_PEAK_FIRST_REPORT_THRESHOLD) {
+ mb_peak_newreport = TRUE;
+ mb_peak_firstreport = TRUE;
+ }
+
+ if (!mb_peak_newreport) {
+ lck_mtx_unlock(mbuf_mlock);
+ return;
+ }
+
+ /*
+ * Since a report is being generated before 1 week,
+ * we do not need to force another one later
+ */
+ if (uptime < MBUF_PEAK_FIRST_REPORT_THRESHOLD) {
+ mb_peak_firstreport = TRUE;
+ }
+
+ for (i = 0; i < NELEM(mbuf_table); i++) {
+ m_peak(m_class(i)) = m_total(m_class(i));
+ memreleased += m_release_cnt(i);
+ }
+ memreleased = memreleased - prevmemreleased;
+ prevmemreleased = memreleased;
+ mb_peak_newreport = FALSE;
+ lck_mtx_unlock(mbuf_mlock);
+
+ bzero(&ns_data, sizeof(ns_data));
+ ns_data.flags = NSTAT_SYSINFO_MBUF_STATS;
+ ns_data.u.mb_stats.total_256b = m_peak(MC_MBUF);
+ ns_data.u.mb_stats.total_2kb = m_peak(MC_CL);
+ ns_data.u.mb_stats.total_4kb = m_peak(MC_BIGCL);
+ ns_data.u.mb_stats.total_16kb = m_peak(MC_16KCL);
+ ns_data.u.mb_stats.sbmb_total = total_sbmb_cnt_peak;
+ ns_data.u.mb_stats.sb_atmbuflimit = sbmb_limreached;
+ ns_data.u.mb_stats.draincnt = mbstat.m_drain;
+ ns_data.u.mb_stats.memreleased = memreleased;
+ ns_data.u.mb_stats.sbmb_floor = total_sbmb_cnt_floor;
+
+ nstat_sysinfo_send_data(&ns_data);
+
+ /*
+ * Reset the floor whenever we report a new
+ * peak to track the trend (increase peek usage
+ * is not a leak if mbufs get released
+ * between reports and the floor stays low)
+ */
+ total_sbmb_cnt_floor = total_sbmb_cnt_peak;
+}
+
+/*
+ * Simple routine to avoid taking the lock when we can't run the
+ * mbuf drain.
+ */
+static int
+mbuf_drain_checks(boolean_t ignore_waiters)
+{
+ if (mb_drain_maxint == 0) {
+ return 0;
+ }
+ if (!ignore_waiters && mb_waiters != 0) {
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Called by the VM when there's memory pressure or when we exhausted
+ * the 4k/16k reserved space.
+ */
+static void
+mbuf_drain_locked(boolean_t ignore_waiters)
+{
+ mbuf_class_t mc;
+ mcl_slab_t *sp, *sp_tmp, *nsp;
+ unsigned int num, k, interval, released = 0;
+ unsigned long total_mem = 0, use_mem = 0;
+ boolean_t ret, purge_caches = FALSE;
+ ppnum_t offset;
+ mcache_obj_t *obj;
+ unsigned long per;
+ static unsigned char scratch[32];
+ static ppnum_t scratch_pa = 0;
+
+ LCK_MTX_ASSERT(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
+ if (!mbuf_drain_checks(ignore_waiters)) {
+ return;
+ }
+ if (scratch_pa == 0) {
+ bzero(scratch, sizeof(scratch));
+ scratch_pa = pmap_find_phys(kernel_pmap, (addr64_t)scratch);
+ VERIFY(scratch_pa);
+ } else if (mclverify) {
+ /*
+ * Panic if a driver wrote to our scratch memory.
+ */
+ for (k = 0; k < sizeof(scratch); k++) {
+ if (scratch[k]) {
+ panic("suspect DMA to freed address");
+ }
+ }
+ }
+ /*
+ * Don't free memory too often as that could cause excessive
+ * waiting times for mbufs. Purge caches if we were asked to drain
+ * in the last 5 minutes.
+ */
+ if (mbuf_drain_last_runtime != 0) {
+ interval = net_uptime() - mbuf_drain_last_runtime;
+ if (interval <= mb_drain_maxint) {
+ return;
+ }
+ if (interval <= mb_drain_maxint * 5) {
+ purge_caches = TRUE;
+ }
+ }
+ mbuf_drain_last_runtime = net_uptime();
+ /*
+ * Don't free any memory if we're using 60% or more.
+ */
+ for (mc = 0; mc < NELEM(mbuf_table); mc++) {
+ total_mem += m_total(mc) * m_maxsize(mc);
+ use_mem += m_active(mc) * m_maxsize(mc);
+ }
+ per = (use_mem * 100) / total_mem;
+ if (per >= 60) {
+ return;
+ }
+ /*
+ * Purge all the caches. This effectively disables
+ * caching for a few seconds, but the mbuf worker thread will
+ * re-enable them again.
+ */
+ if (purge_caches == TRUE) {
+ for (mc = 0; mc < NELEM(mbuf_table); mc++) {
+ if (m_total(mc) < m_avgtotal(mc)) {
+ continue;
+ }
+ lck_mtx_unlock(mbuf_mlock);
+ ret = mcache_purge_cache(m_cache(mc), FALSE);
+ lck_mtx_lock(mbuf_mlock);
+ if (ret == TRUE) {
+ m_purge_cnt(mc)++;
+ }
+ }
+ }
+ /*
+ * Move the objects from the composite class freelist to
+ * the rudimentary slabs list, but keep at least 10% of the average
+ * total in the freelist.
+ */
+ for (mc = 0; mc < NELEM(mbuf_table); mc++) {
+ while (m_cobjlist(mc) &&
+ m_total(mc) < m_avgtotal(mc) &&
+ m_infree(mc) > 0.1 * m_avgtotal(mc) + m_minlimit(mc)) {
+ obj = m_cobjlist(mc);
+ m_cobjlist(mc) = obj->obj_next;
+ obj->obj_next = NULL;
+ num = cslab_free(mc, obj, 1);
+ VERIFY(num == 1);
+ m_free_cnt(mc)++;
+ m_infree(mc)--;
+ /* cslab_free() handles m_total */
+ }
+ }
+ /*
+ * Free the buffers present in the slab list up to 10% of the total
+ * average per class.
+ *
+ * We walk the list backwards in an attempt to reduce fragmentation.
+ */
+ for (mc = NELEM(mbuf_table) - 1; (int)mc >= 0; mc--) {
+ TAILQ_FOREACH_SAFE(sp, &m_slablist(mc), sl_link, sp_tmp) {
+ /*
+ * Process only unused slabs occupying memory.
+ */
+ if (sp->sl_refcnt != 0 || sp->sl_len == 0 ||
+ sp->sl_base == NULL) {
+ continue;
+ }
+ if (m_total(mc) < m_avgtotal(mc) ||
+ m_infree(mc) < 0.1 * m_avgtotal(mc) + m_minlimit(mc)) {
+ break;
+ }
+ slab_remove(sp, mc);
+ switch (mc) {
+ case MC_MBUF:
+ m_infree(mc) -= NMBPG;
+ m_total(mc) -= NMBPG;
+ if (mclaudit != NULL) {
+ mcl_audit_free(sp->sl_base, NMBPG);
+ }
+ break;
+ case MC_CL:
+ m_infree(mc) -= NCLPG;
+ m_total(mc) -= NCLPG;
+ if (mclaudit != NULL) {
+ mcl_audit_free(sp->sl_base, NMBPG);
+ }
+ break;
+ case MC_BIGCL:
+ {
+ m_infree(mc) -= NBCLPG;
+ m_total(mc) -= NBCLPG;
+ if (mclaudit != NULL) {
+ mcl_audit_free(sp->sl_base, NMBPG);
+ }
+ break;
+ }
+ case MC_16KCL:
+ m_infree(mc)--;
+ m_total(mc)--;
+ for (nsp = sp, k = 1; k < NSLABSP16KB; k++) {
+ nsp = nsp->sl_next;
+ VERIFY(nsp->sl_refcnt == 0 &&
+ nsp->sl_base != NULL &&
+ nsp->sl_len == 0);
+ slab_init(nsp, 0, 0, NULL, NULL, 0, 0,
+ 0);
+ nsp->sl_flags = 0;
+ }
+ if (mclaudit != NULL) {
+ if (sp->sl_len == PAGE_SIZE) {
+ mcl_audit_free(sp->sl_base,
+ NMBPG);
+ } else {
+ mcl_audit_free(sp->sl_base, 1);
+ }
+ }
+ break;
+ default:
+ /*
+ * The composite classes have their own
+ * freelist (m_cobjlist), so we only
+ * process rudimentary classes here.
+ */
+ VERIFY(0);
+ }
+ m_release_cnt(mc) += m_size(mc);
+ released += m_size(mc);
+ VERIFY(sp->sl_base != NULL &&
+ sp->sl_len >= PAGE_SIZE);
+ offset = MTOPG(sp->sl_base);
+ /*
+ * Make sure the IOMapper points to a valid, but
+ * bogus, address. This should prevent further DMA
+ * accesses to freed memory.
+ */
+ IOMapperInsertPage(mcl_paddr_base, offset, scratch_pa);
+ mcl_paddr[offset] = 0;
+ kmem_free(mb_map, (vm_offset_t)sp->sl_base,
+ sp->sl_len);
+ slab_init(sp, 0, 0, NULL, NULL, 0, 0, 0);
+ sp->sl_flags = 0;
+ }
+ }
+ mbstat.m_drain++;
+ mbstat.m_bigclusters = m_total(MC_BIGCL);
+ mbstat.m_clusters = m_total(MC_CL);
+ mbstat.m_mbufs = m_total(MC_MBUF);
+ mbuf_stat_sync();
+ mbuf_mtypes_sync(TRUE);
+}
+
+__private_extern__ void
+mbuf_drain(boolean_t ignore_waiters)
+{
+ LCK_MTX_ASSERT(mbuf_mlock, LCK_MTX_ASSERT_NOTOWNED);
+ if (!mbuf_drain_checks(ignore_waiters)) {
+ return;
+ }
+ lck_mtx_lock(mbuf_mlock);
+ mbuf_drain_locked(ignore_waiters);
+ lck_mtx_unlock(mbuf_mlock);
+}
+
+
+static int
+m_drain_force_sysctl SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+ int val = 0, err;
+
+ err = sysctl_handle_int(oidp, &val, 0, req);
+ if (err != 0 || req->newptr == USER_ADDR_NULL) {
+ return err;
+ }
+ if (val) {
+ mbuf_drain(TRUE);
+ }
+
+ return err;
+}
+
+#if DEBUG || DEVELOPMENT
+static void
+_mbwdog_logger(const char *func, const int line, const char *fmt, ...)
+{
+ va_list ap;
+ struct timeval now;
+ char str[384], p[256];
+ int len;
+
+ LCK_MTX_ASSERT(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
+ if (mbwdog_logging == NULL) {
+ mbwdog_logging = _MALLOC(mbwdog_logging_size,
+ M_TEMP, M_ZERO | M_NOWAIT);
+ if (mbwdog_logging == NULL) {
+ return;
+ }
+ }
+ va_start(ap, fmt);
+ vsnprintf(p, sizeof(p), fmt, ap);
+ va_end(ap);
+ microuptime(&now);
+ len = scnprintf(str, sizeof(str),
+ "\n%ld.%d (%d/%llx) %s:%d %s",
+ now.tv_sec, now.tv_usec,
+ current_proc()->p_pid,
+ (uint64_t)VM_KERNEL_ADDRPERM(current_thread()),
+ func, line, p);
+ if (len < 0) {
+ return;
+ }
+ if (mbwdog_logging_used + len > mbwdog_logging_size) {
+ mbwdog_logging_used = mbwdog_logging_used / 2;
+ memmove(mbwdog_logging, mbwdog_logging + mbwdog_logging_used,
+ mbwdog_logging_size - mbwdog_logging_used);
+ mbwdog_logging[mbwdog_logging_used] = 0;
+ }
+ strlcat(mbwdog_logging, str, mbwdog_logging_size);
+ mbwdog_logging_used += len;
+}
+
+static int
+sysctl_mbwdog_log SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ return SYSCTL_OUT(req, mbwdog_logging, mbwdog_logging_used);
+}
+SYSCTL_DECL(_kern_ipc);
+SYSCTL_PROC(_kern_ipc, OID_AUTO, mbwdog_log,
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED,
+ 0, 0, sysctl_mbwdog_log, "A", "");
+
+static int mbtest_val;
+static int mbtest_running;
+
+static void
+mbtest_thread(__unused void *arg)
+{
+ int i;
+ int scale_down = 1;
+ int iterations = 250;
+ int allocations = nmbclusters;
+ iterations = iterations / scale_down;
+ allocations = allocations / scale_down;
+ printf("%s thread starting\n", __func__);
+ for (i = 0; i < iterations; i++) {
+ unsigned int needed = allocations;
+ struct mbuf *m1, *m2, *m3;
+
+ if (njcl > 0) {
+ needed = allocations;
+ m3 = m_getpackets_internal(&needed, 0, M_DONTWAIT, 0, M16KCLBYTES);
+ m_freem_list(m3);
+ }
+
+ needed = allocations;
+ m2 = m_getpackets_internal(&needed, 0, M_DONTWAIT, 0, MBIGCLBYTES);
+ m_freem_list(m2);
+
+ m1 = m_getpackets_internal(&needed, 0, M_DONTWAIT, 0, MCLBYTES);
+ m_freem_list(m1);
+ }
+
+ printf("%s thread ending\n", __func__);
+
+ OSDecrementAtomic(&mbtest_running);
+ wakeup_one((caddr_t)&mbtest_running);
+}
+
+static void
+sysctl_mbtest(void)
+{
+ /* We launch three threads - wait for all of them */
+ OSIncrementAtomic(&mbtest_running);
+ OSIncrementAtomic(&mbtest_running);
+ OSIncrementAtomic(&mbtest_running);
+
+ thread_call_func_delayed((thread_call_func_t)mbtest_thread, NULL, 10);
+ thread_call_func_delayed((thread_call_func_t)mbtest_thread, NULL, 10);
+ thread_call_func_delayed((thread_call_func_t)mbtest_thread, NULL, 10);
+
+ while (mbtest_running) {
+ msleep((caddr_t)&mbtest_running, NULL, PUSER, "mbtest_running", NULL);
+ }
+}
+
+static int
+mbtest SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+ int error = 0, val, oldval = mbtest_val;
+
+ val = oldval;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error || !req->newptr) {
+ return error;
+ }
+
+ if (val != oldval) {
+ sysctl_mbtest();
+ }
+
+ mbtest_val = val;
+
+ return error;
+}
+#endif // DEBUG || DEVELOPMENT
+
+static void
+mtracelarge_register(size_t size)
+{
+ int i;
+ struct mtracelarge *trace;
+ uintptr_t bt[MLEAK_STACK_DEPTH];
+ unsigned int depth;
+
+ depth = backtrace(bt, MLEAK_STACK_DEPTH, NULL);
+ /* Check if this entry is already on the list. */
+ for (i = 0; i < MTRACELARGE_NUM_TRACES; i++) {
+ trace = &mtracelarge_table[i];
+ if (trace->size == size && trace->depth == depth &&
+ memcmp(bt, trace->addr, depth * sizeof(uintptr_t)) == 0) {
+ return;
+ }
+ }
+ for (i = 0; i < MTRACELARGE_NUM_TRACES; i++) {
+ trace = &mtracelarge_table[i];
+ if (size > trace->size) {
+ trace->depth = depth;
+ memcpy(trace->addr, bt, depth * sizeof(uintptr_t));
+ trace->size = size;
+ break;
+ }
+ }
+}
+
+SYSCTL_DECL(_kern_ipc);
+#if DEBUG || DEVELOPMENT
+SYSCTL_PROC(_kern_ipc, OID_AUTO, mbtest,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &mbtest_val, 0, &mbtest, "I",
+ "Toggle to test mbufs");
+#endif
+SYSCTL_PROC(_kern_ipc, KIPC_MBSTAT, mbstat,
+ CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,