[redis.git] / deps / jemalloc / src / tcache.c

#define	JEMALLOC_TCACHE_C_
#include "jemalloc/internal/jemalloc_internal.h"
#ifdef JEMALLOC_TCACHE
/******************************************************************************/
/* Data. */

bool	opt_tcache = true;
ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
ssize_t	opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;

tcache_bin_info_t	*tcache_bin_info;
static unsigned		stack_nelms; /* Total stack elms per tcache. */

/* Map of thread-specific caches. */
#ifndef NO_TLS
__thread tcache_t	*tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
#endif

/*
 * Same contents as tcache, but initialized such that the TSD destructor is
 * called when a thread exits, so that the cache can be cleaned up.
 */
pthread_key_t		tcache_tsd;

size_t				nhbins;
size_t				tcache_maxclass;
unsigned			tcache_gc_incr;

/******************************************************************************/
/* Function prototypes for non-inline static functions. */

static void	tcache_thread_cleanup(void *arg);

/******************************************************************************/

void *
tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
{
	void *ret;

	arena_tcache_fill_small(tcache->arena, tbin, binind
#ifdef JEMALLOC_PROF
	    , tcache->prof_accumbytes
#endif
	    );
#ifdef JEMALLOC_PROF
	tcache->prof_accumbytes = 0;
#endif
	ret = tcache_alloc_easy(tbin);

	return (ret);
}

void
tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
    , tcache_t *tcache
#endif
    )
{
	void *ptr;
	unsigned i, nflush, ndeferred;
#ifdef JEMALLOC_STATS
	bool merged_stats = false;
#endif

	assert(binind < nbins);
	assert(rem <= tbin->ncached);

	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
		/* Lock the arena bin associated with the first object. */
		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
		    tbin->avail[0]);
		arena_t *arena = chunk->arena;
		arena_bin_t *bin = &arena->bins[binind];

#ifdef JEMALLOC_PROF
		if (arena == tcache->arena) {
			malloc_mutex_lock(&arena->lock);
			arena_prof_accum(arena, tcache->prof_accumbytes);
			malloc_mutex_unlock(&arena->lock);
			tcache->prof_accumbytes = 0;
		}
#endif

		malloc_mutex_lock(&bin->lock);
#ifdef JEMALLOC_STATS
		if (arena == tcache->arena) {
			assert(merged_stats == false);
			merged_stats = true;
			bin->stats.nflushes++;
			bin->stats.nrequests += tbin->tstats.nrequests;
			tbin->tstats.nrequests = 0;
		}
#endif
		ndeferred = 0;
		for (i = 0; i < nflush; i++) {
			ptr = tbin->avail[i];
			assert(ptr != NULL);
			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
			if (chunk->arena == arena) {
				size_t pageind = ((uintptr_t)ptr -
				    (uintptr_t)chunk) >> PAGE_SHIFT;
				arena_chunk_map_t *mapelm =
				    &chunk->map[pageind-map_bias];
				arena_dalloc_bin(arena, chunk, ptr, mapelm);
			} else {
				/*
				 * This object was allocated via a different
				 * arena bin than the one that is currently
				 * locked.  Stash the object, so that it can be
				 * handled in a future pass.
				 */
				tbin->avail[ndeferred] = ptr;
				ndeferred++;
			}
		}
		malloc_mutex_unlock(&bin->lock);
	}
#ifdef JEMALLOC_STATS
	if (merged_stats == false) {
		/*
		 * The flush loop didn't happen to flush to this thread's
		 * arena, so the stats didn't get merged.  Manually do so now.
		 */
		arena_bin_t *bin = &tcache->arena->bins[binind];
		malloc_mutex_lock(&bin->lock);
		bin->stats.nflushes++;
		bin->stats.nrequests += tbin->tstats.nrequests;
		tbin->tstats.nrequests = 0;
		malloc_mutex_unlock(&bin->lock);
	}
#endif

	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
	    rem * sizeof(void *));
	tbin->ncached = rem;
	if ((int)tbin->ncached < tbin->low_water)
		tbin->low_water = tbin->ncached;
}

void
tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
    , tcache_t *tcache
#endif
    )
{
	void *ptr;
	unsigned i, nflush, ndeferred;
#ifdef JEMALLOC_STATS
	bool merged_stats = false;
#endif

	assert(binind < nhbins);
	assert(rem <= tbin->ncached);

	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
		/* Lock the arena associated with the first object. */
		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
		    tbin->avail[0]);
		arena_t *arena = chunk->arena;

		malloc_mutex_lock(&arena->lock);
#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
		if (arena == tcache->arena) {
#endif
#ifdef JEMALLOC_PROF
			arena_prof_accum(arena, tcache->prof_accumbytes);
			tcache->prof_accumbytes = 0;
#endif
#ifdef JEMALLOC_STATS
			merged_stats = true;
			arena->stats.nrequests_large += tbin->tstats.nrequests;
			arena->stats.lstats[binind - nbins].nrequests +=
			    tbin->tstats.nrequests;
			tbin->tstats.nrequests = 0;
#endif
#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
		}
#endif
		ndeferred = 0;
		for (i = 0; i < nflush; i++) {
			ptr = tbin->avail[i];
			assert(ptr != NULL);
			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
			if (chunk->arena == arena)
				arena_dalloc_large(arena, chunk, ptr);
			else {
				/*
				 * This object was allocated via a different
				 * arena than the one that is currently locked.
				 * Stash the object, so that it can be handled
				 * in a future pass.
				 */
				tbin->avail[ndeferred] = ptr;
				ndeferred++;
			}
		}
		malloc_mutex_unlock(&arena->lock);
	}
#ifdef JEMALLOC_STATS
	if (merged_stats == false) {
		/*
		 * The flush loop didn't happen to flush to this thread's
		 * arena, so the stats didn't get merged.  Manually do so now.
		 */
		arena_t *arena = tcache->arena;
		malloc_mutex_lock(&arena->lock);
		arena->stats.nrequests_large += tbin->tstats.nrequests;
		arena->stats.lstats[binind - nbins].nrequests +=
		    tbin->tstats.nrequests;
		tbin->tstats.nrequests = 0;
		malloc_mutex_unlock(&arena->lock);
	}
#endif

	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
	    rem * sizeof(void *));
	tbin->ncached = rem;
	if ((int)tbin->ncached < tbin->low_water)
		tbin->low_water = tbin->ncached;
}

tcache_t *
tcache_create(arena_t *arena)
{
	tcache_t *tcache;
	size_t size, stack_offset;
	unsigned i;

	size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
	/* Naturally align the pointer stacks. */
	size = PTR_CEILING(size);
	stack_offset = size;
	size += stack_nelms * sizeof(void *);
	/*
	 * Round up to the nearest multiple of the cacheline size, in order to
	 * avoid the possibility of false cacheline sharing.
	 *
	 * That this works relies on the same logic as in ipalloc(), but we
	 * cannot directly call ipalloc() here due to tcache bootstrapping
	 * issues.
	 */
	size = (size + CACHELINE_MASK) & (-CACHELINE);

	if (size <= small_maxclass)
		tcache = (tcache_t *)arena_malloc_small(arena, size, true);
	else if (size <= tcache_maxclass)
		tcache = (tcache_t *)arena_malloc_large(arena, size, true);
	else
		tcache = (tcache_t *)icalloc(size);

	if (tcache == NULL)
		return (NULL);

#ifdef JEMALLOC_STATS
	/* Link into list of extant tcaches. */
	malloc_mutex_lock(&arena->lock);
	ql_elm_new(tcache, link);
	ql_tail_insert(&arena->tcache_ql, tcache, link);
	malloc_mutex_unlock(&arena->lock);
#endif

	tcache->arena = arena;
	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
	for (i = 0; i < nhbins; i++) {
		tcache->tbins[i].lg_fill_div = 1;
		tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
		    (uintptr_t)stack_offset);
		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
	}

	TCACHE_SET(tcache);

	return (tcache);
}

void
tcache_destroy(tcache_t *tcache)
{
	unsigned i;
	size_t tcache_size;

#ifdef JEMALLOC_STATS
	/* Unlink from list of extant tcaches. */
	malloc_mutex_lock(&tcache->arena->lock);
	ql_remove(&tcache->arena->tcache_ql, tcache, link);
	malloc_mutex_unlock(&tcache->arena->lock);
	tcache_stats_merge(tcache, tcache->arena);
#endif

	for (i = 0; i < nbins; i++) {
		tcache_bin_t *tbin = &tcache->tbins[i];
		tcache_bin_flush_small(tbin, i, 0
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
		    , tcache
#endif
		    );

#ifdef JEMALLOC_STATS
		if (tbin->tstats.nrequests != 0) {
			arena_t *arena = tcache->arena;
			arena_bin_t *bin = &arena->bins[i];
			malloc_mutex_lock(&bin->lock);
			bin->stats.nrequests += tbin->tstats.nrequests;
			malloc_mutex_unlock(&bin->lock);
		}
#endif
	}

	for (; i < nhbins; i++) {
		tcache_bin_t *tbin = &tcache->tbins[i];
		tcache_bin_flush_large(tbin, i, 0
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
		    , tcache
#endif
		    );

#ifdef JEMALLOC_STATS
		if (tbin->tstats.nrequests != 0) {
			arena_t *arena = tcache->arena;
			malloc_mutex_lock(&arena->lock);
			arena->stats.nrequests_large += tbin->tstats.nrequests;
			arena->stats.lstats[i - nbins].nrequests +=
			    tbin->tstats.nrequests;
			malloc_mutex_unlock(&arena->lock);
		}
#endif
	}

#ifdef JEMALLOC_PROF
	if (tcache->prof_accumbytes > 0) {
		malloc_mutex_lock(&tcache->arena->lock);
		arena_prof_accum(tcache->arena, tcache->prof_accumbytes);
		malloc_mutex_unlock(&tcache->arena->lock);
	}
#endif

	tcache_size = arena_salloc(tcache);
	if (tcache_size <= small_maxclass) {
		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
		arena_t *arena = chunk->arena;
		size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
		    PAGE_SHIFT;
		arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias];
		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
		    (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) <<
		    PAGE_SHIFT));
		arena_bin_t *bin = run->bin;

		malloc_mutex_lock(&bin->lock);
		arena_dalloc_bin(arena, chunk, tcache, mapelm);
		malloc_mutex_unlock(&bin->lock);
	} else if (tcache_size <= tcache_maxclass) {
		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
		arena_t *arena = chunk->arena;

		malloc_mutex_lock(&arena->lock);
		arena_dalloc_large(arena, chunk, tcache);
		malloc_mutex_unlock(&arena->lock);
	} else
		idalloc(tcache);
}

static void
tcache_thread_cleanup(void *arg)
{
	tcache_t *tcache = (tcache_t *)arg;

	if (tcache == (void *)(uintptr_t)1) {
		/*
		 * The previous time this destructor was called, we set the key
		 * to 1 so that other destructors wouldn't cause re-creation of
		 * the tcache.  This time, do nothing, so that the destructor
		 * will not be called again.
		 */
	} else if (tcache == (void *)(uintptr_t)2) {
		/*
		 * Another destructor called an allocator function after this
		 * destructor was called.  Reset tcache to 1 in order to
		 * receive another callback.
		 */
		TCACHE_SET((uintptr_t)1);
	} else if (tcache != NULL) {
		assert(tcache != (void *)(uintptr_t)1);
		tcache_destroy(tcache);
		TCACHE_SET((uintptr_t)1);
	}
}

#ifdef JEMALLOC_STATS
void
tcache_stats_merge(tcache_t *tcache, arena_t *arena)
{
	unsigned i;

	/* Merge and reset tcache stats. */
	for (i = 0; i < nbins; i++) {
		arena_bin_t *bin = &arena->bins[i];
		tcache_bin_t *tbin = &tcache->tbins[i];
		malloc_mutex_lock(&bin->lock);
		bin->stats.nrequests += tbin->tstats.nrequests;
		malloc_mutex_unlock(&bin->lock);
		tbin->tstats.nrequests = 0;
	}

	for (; i < nhbins; i++) {
		malloc_large_stats_t *lstats = &arena->stats.lstats[i - nbins];
		tcache_bin_t *tbin = &tcache->tbins[i];
		arena->stats.nrequests_large += tbin->tstats.nrequests;
		lstats->nrequests += tbin->tstats.nrequests;
		tbin->tstats.nrequests = 0;
	}
}
#endif

bool
tcache_boot(void)
{

	if (opt_tcache) {
		unsigned i;

		/*
		 * If necessary, clamp opt_lg_tcache_max, now that
		 * small_maxclass and arena_maxclass are known.
		 */
		if (opt_lg_tcache_max < 0 || (1U <<
		    opt_lg_tcache_max) < small_maxclass)
			tcache_maxclass = small_maxclass;
		else if ((1U << opt_lg_tcache_max) > arena_maxclass)
			tcache_maxclass = arena_maxclass;
		else
			tcache_maxclass = (1U << opt_lg_tcache_max);

		nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT);

		/* Initialize tcache_bin_info. */
		tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
		    sizeof(tcache_bin_info_t));
		if (tcache_bin_info == NULL)
			return (true);
		stack_nelms = 0;
		for (i = 0; i < nbins; i++) {
			if ((arena_bin_info[i].nregs << 1) <=
			    TCACHE_NSLOTS_SMALL_MAX) {
				tcache_bin_info[i].ncached_max =
				    (arena_bin_info[i].nregs << 1);
			} else {
				tcache_bin_info[i].ncached_max =
				    TCACHE_NSLOTS_SMALL_MAX;
			}
			stack_nelms += tcache_bin_info[i].ncached_max;
		}
		for (; i < nhbins; i++) {
			tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
			stack_nelms += tcache_bin_info[i].ncached_max;
		}

		/* Compute incremental GC event threshold. */
		if (opt_lg_tcache_gc_sweep >= 0) {
			tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
			    nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins ==
			    0) ? 0 : 1);
		} else
			tcache_gc_incr = 0;

		if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) !=
		    0) {
			malloc_write(
			    "<jemalloc>: Error in pthread_key_create()\n");
			abort();
		}
	}

	return (false);
}
/******************************************************************************/
#endif /* JEMALLOC_TCACHE */
Commit	Line	Data
a78e148b	1	#define JEMALLOC_TCACHE_C_
	2	#include "jemalloc/internal/jemalloc_internal.h"
	3	#ifdef JEMALLOC_TCACHE
	4	/******************************************************************************/
	5	/* Data. */
	6
	7	bool opt_tcache = true;
	8	ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
	9	ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
	10
	11	tcache_bin_info_t *tcache_bin_info;
	12	static unsigned stack_nelms; /* Total stack elms per tcache. */
	13
	14	/* Map of thread-specific caches. */
	15	#ifndef NO_TLS
	16	__thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
	17	#endif
	18
	19	/*
	20	* Same contents as tcache, but initialized such that the TSD destructor is
	21	* called when a thread exits, so that the cache can be cleaned up.
	22	*/
	23	pthread_key_t tcache_tsd;
	24
	25	size_t nhbins;
	26	size_t tcache_maxclass;
	27	unsigned tcache_gc_incr;
	28
	29	/******************************************************************************/
	30	/* Function prototypes for non-inline static functions. */
	31
	32	static void tcache_thread_cleanup(void *arg);
	33
	34	/******************************************************************************/
	35
	36	void *
	37	tcache_alloc_small_hard(tcache_t tcache, tcache_bin_t tbin, size_t binind)
	38	{
	39	void *ret;
	40
	41	arena_tcache_fill_small(tcache->arena, tbin, binind
	42	#ifdef JEMALLOC_PROF
	43	, tcache->prof_accumbytes
	44	#endif
	45	);
	46	#ifdef JEMALLOC_PROF
	47	tcache->prof_accumbytes = 0;
	48	#endif
	49	ret = tcache_alloc_easy(tbin);
	50
	51	return (ret);
	52	}
	53
	54	void
	55	tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
	56	#if (defined(JEMALLOC_STATS) \|\| defined(JEMALLOC_PROF))
	57	, tcache_t *tcache
	58	#endif
	59	)
	60	{
	61	void *ptr;
	62	unsigned i, nflush, ndeferred;
	63	#ifdef JEMALLOC_STATS
	64	bool merged_stats = false;
65	#endif
66
67	assert(binind < nbins);
68	assert(rem <= tbin->ncached);
69
70	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
71	/* Lock the arena bin associated with the first object. */
72	arena_chunk_t chunk = (arena_chunk_t )CHUNK_ADDR2BASE(
73	tbin->avail[0]);
74	arena_t *arena = chunk->arena;
75	arena_bin_t *bin = &arena->bins[binind];
76
77	#ifdef JEMALLOC_PROF
78	if (arena == tcache->arena) {
79	malloc_mutex_lock(&arena->lock);
80	arena_prof_accum(arena, tcache->prof_accumbytes);
81	malloc_mutex_unlock(&arena->lock);
82	tcache->prof_accumbytes = 0;
83	}
84	#endif
85
86	malloc_mutex_lock(&bin->lock);
87	#ifdef JEMALLOC_STATS
88	if (arena == tcache->arena) {
89	assert(merged_stats == false);
90	merged_stats = true;
91	bin->stats.nflushes++;
92	bin->stats.nrequests += tbin->tstats.nrequests;
93	tbin->tstats.nrequests = 0;
94	}
95	#endif
96	ndeferred = 0;
97	for (i = 0; i < nflush; i++) {
98	ptr = tbin->avail[i];
99	assert(ptr != NULL);
100	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
101	if (chunk->arena == arena) {
102	size_t pageind = ((uintptr_t)ptr -
103	(uintptr_t)chunk) >> PAGE_SHIFT;
104	arena_chunk_map_t *mapelm =
105	&chunk->map[pageind-map_bias];
106	arena_dalloc_bin(arena, chunk, ptr, mapelm);
107	} else {
108	/*
109	* This object was allocated via a different
110	* arena bin than the one that is currently
111	* locked. Stash the object, so that it can be
112	* handled in a future pass.
113	*/
114	tbin->avail[ndeferred] = ptr;
115	ndeferred++;
116	}
117	}
118	malloc_mutex_unlock(&bin->lock);
119	}
120	#ifdef JEMALLOC_STATS
121	if (merged_stats == false) {
122	/*
123	* The flush loop didn't happen to flush to this thread's
124	* arena, so the stats didn't get merged. Manually do so now.
125	*/
126	arena_bin_t *bin = &tcache->arena->bins[binind];
127	malloc_mutex_lock(&bin->lock);
128	bin->stats.nflushes++;
129	bin->stats.nrequests += tbin->tstats.nrequests;
130	tbin->tstats.nrequests = 0;
131	malloc_mutex_unlock(&bin->lock);
132	}
133	#endif
134
135	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
136	rem * sizeof(void *));
137	tbin->ncached = rem;
138	if ((int)tbin->ncached < tbin->low_water)
139	tbin->low_water = tbin->ncached;
140	}
141
142	void
143	tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
144	#if (defined(JEMALLOC_STATS) \|\| defined(JEMALLOC_PROF))
145	, tcache_t *tcache
146	#endif
147	)
148	{
149	void *ptr;
150	unsigned i, nflush, ndeferred;
151	#ifdef JEMALLOC_STATS
152	bool merged_stats = false;
153	#endif
154
155	assert(binind < nhbins);
156	assert(rem <= tbin->ncached);
157
158	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
159	/* Lock the arena associated with the first object. */
160	arena_chunk_t chunk = (arena_chunk_t )CHUNK_ADDR2BASE(
161	tbin->avail[0]);
162	arena_t *arena = chunk->arena;
163
164	malloc_mutex_lock(&arena->lock);
165	#if (defined(JEMALLOC_PROF) \|\| defined(JEMALLOC_STATS))
166	if (arena == tcache->arena) {
167	#endif
168	#ifdef JEMALLOC_PROF
169	arena_prof_accum(arena, tcache->prof_accumbytes);
170	tcache->prof_accumbytes = 0;
171	#endif
172	#ifdef JEMALLOC_STATS
173	merged_stats = true;
174	arena->stats.nrequests_large += tbin->tstats.nrequests;
175	arena->stats.lstats[binind - nbins].nrequests +=
176	tbin->tstats.nrequests;
177	tbin->tstats.nrequests = 0;
178	#endif
179	#if (defined(JEMALLOC_PROF) \|\| defined(JEMALLOC_STATS))
180	}
181	#endif
182	ndeferred = 0;
183	for (i = 0; i < nflush; i++) {
184	ptr = tbin->avail[i];
185	assert(ptr != NULL);
186	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
187	if (chunk->arena == arena)
188	arena_dalloc_large(arena, chunk, ptr);
189	else {
190	/*
191	* This object was allocated via a different
192	* arena than the one that is currently locked.
193	* Stash the object, so that it can be handled
194	* in a future pass.
195	*/
196	tbin->avail[ndeferred] = ptr;
197	ndeferred++;
198	}
199	}
200	malloc_mutex_unlock(&arena->lock);
201	}
202	#ifdef JEMALLOC_STATS
203	if (merged_stats == false) {
204	/*
205	* The flush loop didn't happen to flush to this thread's
206	* arena, so the stats didn't get merged. Manually do so now.
207	*/
208	arena_t *arena = tcache->arena;
209	malloc_mutex_lock(&arena->lock);
210	arena->stats.nrequests_large += tbin->tstats.nrequests;
211	arena->stats.lstats[binind - nbins].nrequests +=
212	tbin->tstats.nrequests;
213	tbin->tstats.nrequests = 0;
214	malloc_mutex_unlock(&arena->lock);
215	}
216	#endif
217
218	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
219	rem * sizeof(void *));
220	tbin->ncached = rem;
221	if ((int)tbin->ncached < tbin->low_water)
222	tbin->low_water = tbin->ncached;
223	}
224
225	tcache_t *
226	tcache_create(arena_t *arena)
227	{
228	tcache_t *tcache;
229	size_t size, stack_offset;
230	unsigned i;
231
232	size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
233	/* Naturally align the pointer stacks. */
234	size = PTR_CEILING(size);
235	stack_offset = size;
236	size += stack_nelms * sizeof(void *);
237	/*
238	* Round up to the nearest multiple of the cacheline size, in order to
239	* avoid the possibility of false cacheline sharing.
240	*
241	* That this works relies on the same logic as in ipalloc(), but we
242	* cannot directly call ipalloc() here due to tcache bootstrapping
243	* issues.
244	*/
245	size = (size + CACHELINE_MASK) & (-CACHELINE);
246
247	if (size <= small_maxclass)
248	tcache = (tcache_t *)arena_malloc_small(arena, size, true);
249	else if (size <= tcache_maxclass)
250	tcache = (tcache_t *)arena_malloc_large(arena, size, true);
251	else
252	tcache = (tcache_t *)icalloc(size);
253
254	if (tcache == NULL)
255	return (NULL);
256
257	#ifdef JEMALLOC_STATS
258	/* Link into list of extant tcaches. */
259	malloc_mutex_lock(&arena->lock);
260	ql_elm_new(tcache, link);
261	ql_tail_insert(&arena->tcache_ql, tcache, link);
262	malloc_mutex_unlock(&arena->lock);
263	#endif
264
265	tcache->arena = arena;
266	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
267	for (i = 0; i < nhbins; i++) {
268	tcache->tbins[i].lg_fill_div = 1;
269	tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
270	(uintptr_t)stack_offset);
271	stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
272	}
273
274	TCACHE_SET(tcache);
275
276	return (tcache);
277	}
278
279	void
280	tcache_destroy(tcache_t *tcache)
281	{
282	unsigned i;
283	size_t tcache_size;
284
285	#ifdef JEMALLOC_STATS
286	/* Unlink from list of extant tcaches. */
287	malloc_mutex_lock(&tcache->arena->lock);
288	ql_remove(&tcache->arena->tcache_ql, tcache, link);
289	malloc_mutex_unlock(&tcache->arena->lock);
290	tcache_stats_merge(tcache, tcache->arena);
291	#endif
292
293	for (i = 0; i < nbins; i++) {
294	tcache_bin_t *tbin = &tcache->tbins[i];
295	tcache_bin_flush_small(tbin, i, 0
296	#if (defined(JEMALLOC_STATS) \|\| defined(JEMALLOC_PROF))
297	, tcache
298	#endif
299	);
300
301	#ifdef JEMALLOC_STATS
302	if (tbin->tstats.nrequests != 0) {
303	arena_t *arena = tcache->arena;
304	arena_bin_t *bin = &arena->bins[i];
305	malloc_mutex_lock(&bin->lock);
306	bin->stats.nrequests += tbin->tstats.nrequests;
307	malloc_mutex_unlock(&bin->lock);
308	}
309	#endif
310	}
311
312	for (; i < nhbins; i++) {
313	tcache_bin_t *tbin = &tcache->tbins[i];
314	tcache_bin_flush_large(tbin, i, 0
315	#if (defined(JEMALLOC_STATS) \|\| defined(JEMALLOC_PROF))
316	, tcache
317	#endif
318	);
319
320	#ifdef JEMALLOC_STATS
321	if (tbin->tstats.nrequests != 0) {
322	arena_t *arena = tcache->arena;
323	malloc_mutex_lock(&arena->lock);
324	arena->stats.nrequests_large += tbin->tstats.nrequests;
325	arena->stats.lstats[i - nbins].nrequests +=
326	tbin->tstats.nrequests;
327	malloc_mutex_unlock(&arena->lock);
328	}
329	#endif
330	}
331
332	#ifdef JEMALLOC_PROF
333	if (tcache->prof_accumbytes > 0) {
334	malloc_mutex_lock(&tcache->arena->lock);
335	arena_prof_accum(tcache->arena, tcache->prof_accumbytes);
336	malloc_mutex_unlock(&tcache->arena->lock);
337	}
338	#endif
339
340	tcache_size = arena_salloc(tcache);
341	if (tcache_size <= small_maxclass) {
342	arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
343	arena_t *arena = chunk->arena;
344	size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
345	PAGE_SHIFT;
346	arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias];
347	arena_run_t run = (arena_run_t )((uintptr_t)chunk +
348	(uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) <<
349	PAGE_SHIFT));
350	arena_bin_t *bin = run->bin;
351
352	malloc_mutex_lock(&bin->lock);
353	arena_dalloc_bin(arena, chunk, tcache, mapelm);
354	malloc_mutex_unlock(&bin->lock);
355	} else if (tcache_size <= tcache_maxclass) {
356	arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
357	arena_t *arena = chunk->arena;
358
359	malloc_mutex_lock(&arena->lock);
360	arena_dalloc_large(arena, chunk, tcache);
361	malloc_mutex_unlock(&arena->lock);
362	} else
363	idalloc(tcache);
364	}
365
366	static void
367	tcache_thread_cleanup(void *arg)
368	{
369	tcache_t tcache = (tcache_t )arg;
370
371	if (tcache == (void *)(uintptr_t)1) {
372	/*
373	* The previous time this destructor was called, we set the key
374	* to 1 so that other destructors wouldn't cause re-creation of
375	* the tcache. This time, do nothing, so that the destructor
376	* will not be called again.
377	*/
378	} else if (tcache == (void *)(uintptr_t)2) {
379	/*
380	* Another destructor called an allocator function after this
381	* destructor was called. Reset tcache to 1 in order to
382	* receive another callback.
383	*/
384	TCACHE_SET((uintptr_t)1);
385	} else if (tcache != NULL) {
386	assert(tcache != (void *)(uintptr_t)1);
387	tcache_destroy(tcache);
388	TCACHE_SET((uintptr_t)1);
389	}
390	}
391
392	#ifdef JEMALLOC_STATS
393	void
394	tcache_stats_merge(tcache_t tcache, arena_t arena)
395	{
396	unsigned i;
397
398	/* Merge and reset tcache stats. */
399	for (i = 0; i < nbins; i++) {
400	arena_bin_t *bin = &arena->bins[i];
401	tcache_bin_t *tbin = &tcache->tbins[i];
402	malloc_mutex_lock(&bin->lock);
403	bin->stats.nrequests += tbin->tstats.nrequests;
404	malloc_mutex_unlock(&bin->lock);
405	tbin->tstats.nrequests = 0;
406	}
407
408	for (; i < nhbins; i++) {
409	malloc_large_stats_t *lstats = &arena->stats.lstats[i - nbins];
410	tcache_bin_t *tbin = &tcache->tbins[i];
411	arena->stats.nrequests_large += tbin->tstats.nrequests;
412	lstats->nrequests += tbin->tstats.nrequests;
413	tbin->tstats.nrequests = 0;
414	}
415	}
416	#endif
417
418	bool
419	tcache_boot(void)
420	{
421
422	if (opt_tcache) {
423	unsigned i;
424
425	/*
426	* If necessary, clamp opt_lg_tcache_max, now that
427	* small_maxclass and arena_maxclass are known.
428	*/
429	if (opt_lg_tcache_max < 0 \|\| (1U <<
430	opt_lg_tcache_max) < small_maxclass)
431	tcache_maxclass = small_maxclass;
432	else if ((1U << opt_lg_tcache_max) > arena_maxclass)
433	tcache_maxclass = arena_maxclass;
434	else
435	tcache_maxclass = (1U << opt_lg_tcache_max);
436
437	nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT);
438
439	/* Initialize tcache_bin_info. */
440	tcache_bin_info = (tcache_bin_info_t )base_alloc(nhbins
441	sizeof(tcache_bin_info_t));
442	if (tcache_bin_info == NULL)
443	return (true);
444	stack_nelms = 0;
445	for (i = 0; i < nbins; i++) {
446	if ((arena_bin_info[i].nregs << 1) <=
447	TCACHE_NSLOTS_SMALL_MAX) {
448	tcache_bin_info[i].ncached_max =
449	(arena_bin_info[i].nregs << 1);
450	} else {
451	tcache_bin_info[i].ncached_max =
452	TCACHE_NSLOTS_SMALL_MAX;
453	}
454	stack_nelms += tcache_bin_info[i].ncached_max;
455	}
456	for (; i < nhbins; i++) {
457	tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
458	stack_nelms += tcache_bin_info[i].ncached_max;
459	}
460
461	/* Compute incremental GC event threshold. */
462	if (opt_lg_tcache_gc_sweep >= 0) {
463	tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
464	nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins ==
465	0) ? 0 : 1);
466	} else
467	tcache_gc_incr = 0;
468
469	if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) !=
470	0) {
471	malloc_write(
472	"<jemalloc>: Error in pthread_key_create()\n");
473	abort();
474	}
475	}
476
477	return (false);
478	}
479	/******************************************************************************/
480	#endif /* JEMALLOC_TCACHE */