]>
Commit | Line | Data |
---|---|---|
a78e148b | 1 | #define JEMALLOC_TCACHE_C_ |
2 | #include "jemalloc/internal/jemalloc_internal.h" | |
3 | #ifdef JEMALLOC_TCACHE | |
4 | /******************************************************************************/ | |
5 | /* Data. */ | |
6 | ||
7 | bool opt_tcache = true; | |
8 | ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; | |
9 | ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT; | |
10 | ||
11 | tcache_bin_info_t *tcache_bin_info; | |
12 | static unsigned stack_nelms; /* Total stack elms per tcache. */ | |
13 | ||
14 | /* Map of thread-specific caches. */ | |
15 | #ifndef NO_TLS | |
16 | __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); | |
17 | #endif | |
18 | ||
19 | /* | |
20 | * Same contents as tcache, but initialized such that the TSD destructor is | |
21 | * called when a thread exits, so that the cache can be cleaned up. | |
22 | */ | |
23 | pthread_key_t tcache_tsd; | |
24 | ||
25 | size_t nhbins; | |
26 | size_t tcache_maxclass; | |
27 | unsigned tcache_gc_incr; | |
28 | ||
29 | /******************************************************************************/ | |
30 | /* Function prototypes for non-inline static functions. */ | |
31 | ||
32 | static void tcache_thread_cleanup(void *arg); | |
33 | ||
34 | /******************************************************************************/ | |
35 | ||
36 | void * | |
37 | tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) | |
38 | { | |
39 | void *ret; | |
40 | ||
41 | arena_tcache_fill_small(tcache->arena, tbin, binind | |
42 | #ifdef JEMALLOC_PROF | |
43 | , tcache->prof_accumbytes | |
44 | #endif | |
45 | ); | |
46 | #ifdef JEMALLOC_PROF | |
47 | tcache->prof_accumbytes = 0; | |
48 | #endif | |
49 | ret = tcache_alloc_easy(tbin); | |
50 | ||
51 | return (ret); | |
52 | } | |
53 | ||
54 | void | |
55 | tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem | |
56 | #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) | |
57 | , tcache_t *tcache | |
58 | #endif | |
59 | ) | |
60 | { | |
61 | void *ptr; | |
62 | unsigned i, nflush, ndeferred; | |
63 | #ifdef JEMALLOC_STATS | |
64 | bool merged_stats = false; | |
65 | #endif | |
66 | ||
67 | assert(binind < nbins); | |
68 | assert(rem <= tbin->ncached); | |
69 | ||
70 | for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { | |
71 | /* Lock the arena bin associated with the first object. */ | |
72 | arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( | |
73 | tbin->avail[0]); | |
74 | arena_t *arena = chunk->arena; | |
75 | arena_bin_t *bin = &arena->bins[binind]; | |
76 | ||
77 | #ifdef JEMALLOC_PROF | |
78 | if (arena == tcache->arena) { | |
79 | malloc_mutex_lock(&arena->lock); | |
80 | arena_prof_accum(arena, tcache->prof_accumbytes); | |
81 | malloc_mutex_unlock(&arena->lock); | |
82 | tcache->prof_accumbytes = 0; | |
83 | } | |
84 | #endif | |
85 | ||
86 | malloc_mutex_lock(&bin->lock); | |
87 | #ifdef JEMALLOC_STATS | |
88 | if (arena == tcache->arena) { | |
89 | assert(merged_stats == false); | |
90 | merged_stats = true; | |
91 | bin->stats.nflushes++; | |
92 | bin->stats.nrequests += tbin->tstats.nrequests; | |
93 | tbin->tstats.nrequests = 0; | |
94 | } | |
95 | #endif | |
96 | ndeferred = 0; | |
97 | for (i = 0; i < nflush; i++) { | |
98 | ptr = tbin->avail[i]; | |
99 | assert(ptr != NULL); | |
100 | chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); | |
101 | if (chunk->arena == arena) { | |
102 | size_t pageind = ((uintptr_t)ptr - | |
103 | (uintptr_t)chunk) >> PAGE_SHIFT; | |
104 | arena_chunk_map_t *mapelm = | |
105 | &chunk->map[pageind-map_bias]; | |
106 | arena_dalloc_bin(arena, chunk, ptr, mapelm); | |
107 | } else { | |
108 | /* | |
109 | * This object was allocated via a different | |
110 | * arena bin than the one that is currently | |
111 | * locked. Stash the object, so that it can be | |
112 | * handled in a future pass. | |
113 | */ | |
114 | tbin->avail[ndeferred] = ptr; | |
115 | ndeferred++; | |
116 | } | |
117 | } | |
118 | malloc_mutex_unlock(&bin->lock); | |
119 | } | |
120 | #ifdef JEMALLOC_STATS | |
121 | if (merged_stats == false) { | |
122 | /* | |
123 | * The flush loop didn't happen to flush to this thread's | |
124 | * arena, so the stats didn't get merged. Manually do so now. | |
125 | */ | |
126 | arena_bin_t *bin = &tcache->arena->bins[binind]; | |
127 | malloc_mutex_lock(&bin->lock); | |
128 | bin->stats.nflushes++; | |
129 | bin->stats.nrequests += tbin->tstats.nrequests; | |
130 | tbin->tstats.nrequests = 0; | |
131 | malloc_mutex_unlock(&bin->lock); | |
132 | } | |
133 | #endif | |
134 | ||
135 | memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], | |
136 | rem * sizeof(void *)); | |
137 | tbin->ncached = rem; | |
138 | if ((int)tbin->ncached < tbin->low_water) | |
139 | tbin->low_water = tbin->ncached; | |
140 | } | |
141 | ||
142 | void | |
143 | tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem | |
144 | #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) | |
145 | , tcache_t *tcache | |
146 | #endif | |
147 | ) | |
148 | { | |
149 | void *ptr; | |
150 | unsigned i, nflush, ndeferred; | |
151 | #ifdef JEMALLOC_STATS | |
152 | bool merged_stats = false; | |
153 | #endif | |
154 | ||
155 | assert(binind < nhbins); | |
156 | assert(rem <= tbin->ncached); | |
157 | ||
158 | for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { | |
159 | /* Lock the arena associated with the first object. */ | |
160 | arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( | |
161 | tbin->avail[0]); | |
162 | arena_t *arena = chunk->arena; | |
163 | ||
164 | malloc_mutex_lock(&arena->lock); | |
165 | #if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) | |
166 | if (arena == tcache->arena) { | |
167 | #endif | |
168 | #ifdef JEMALLOC_PROF | |
169 | arena_prof_accum(arena, tcache->prof_accumbytes); | |
170 | tcache->prof_accumbytes = 0; | |
171 | #endif | |
172 | #ifdef JEMALLOC_STATS | |
173 | merged_stats = true; | |
174 | arena->stats.nrequests_large += tbin->tstats.nrequests; | |
175 | arena->stats.lstats[binind - nbins].nrequests += | |
176 | tbin->tstats.nrequests; | |
177 | tbin->tstats.nrequests = 0; | |
178 | #endif | |
179 | #if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) | |
180 | } | |
181 | #endif | |
182 | ndeferred = 0; | |
183 | for (i = 0; i < nflush; i++) { | |
184 | ptr = tbin->avail[i]; | |
185 | assert(ptr != NULL); | |
186 | chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); | |
187 | if (chunk->arena == arena) | |
188 | arena_dalloc_large(arena, chunk, ptr); | |
189 | else { | |
190 | /* | |
191 | * This object was allocated via a different | |
192 | * arena than the one that is currently locked. | |
193 | * Stash the object, so that it can be handled | |
194 | * in a future pass. | |
195 | */ | |
196 | tbin->avail[ndeferred] = ptr; | |
197 | ndeferred++; | |
198 | } | |
199 | } | |
200 | malloc_mutex_unlock(&arena->lock); | |
201 | } | |
202 | #ifdef JEMALLOC_STATS | |
203 | if (merged_stats == false) { | |
204 | /* | |
205 | * The flush loop didn't happen to flush to this thread's | |
206 | * arena, so the stats didn't get merged. Manually do so now. | |
207 | */ | |
208 | arena_t *arena = tcache->arena; | |
209 | malloc_mutex_lock(&arena->lock); | |
210 | arena->stats.nrequests_large += tbin->tstats.nrequests; | |
211 | arena->stats.lstats[binind - nbins].nrequests += | |
212 | tbin->tstats.nrequests; | |
213 | tbin->tstats.nrequests = 0; | |
214 | malloc_mutex_unlock(&arena->lock); | |
215 | } | |
216 | #endif | |
217 | ||
218 | memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], | |
219 | rem * sizeof(void *)); | |
220 | tbin->ncached = rem; | |
221 | if ((int)tbin->ncached < tbin->low_water) | |
222 | tbin->low_water = tbin->ncached; | |
223 | } | |
224 | ||
225 | tcache_t * | |
226 | tcache_create(arena_t *arena) | |
227 | { | |
228 | tcache_t *tcache; | |
229 | size_t size, stack_offset; | |
230 | unsigned i; | |
231 | ||
232 | size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins); | |
233 | /* Naturally align the pointer stacks. */ | |
234 | size = PTR_CEILING(size); | |
235 | stack_offset = size; | |
236 | size += stack_nelms * sizeof(void *); | |
237 | /* | |
238 | * Round up to the nearest multiple of the cacheline size, in order to | |
239 | * avoid the possibility of false cacheline sharing. | |
240 | * | |
241 | * That this works relies on the same logic as in ipalloc(), but we | |
242 | * cannot directly call ipalloc() here due to tcache bootstrapping | |
243 | * issues. | |
244 | */ | |
245 | size = (size + CACHELINE_MASK) & (-CACHELINE); | |
246 | ||
247 | if (size <= small_maxclass) | |
248 | tcache = (tcache_t *)arena_malloc_small(arena, size, true); | |
249 | else if (size <= tcache_maxclass) | |
250 | tcache = (tcache_t *)arena_malloc_large(arena, size, true); | |
251 | else | |
252 | tcache = (tcache_t *)icalloc(size); | |
253 | ||
254 | if (tcache == NULL) | |
255 | return (NULL); | |
256 | ||
257 | #ifdef JEMALLOC_STATS | |
258 | /* Link into list of extant tcaches. */ | |
259 | malloc_mutex_lock(&arena->lock); | |
260 | ql_elm_new(tcache, link); | |
261 | ql_tail_insert(&arena->tcache_ql, tcache, link); | |
262 | malloc_mutex_unlock(&arena->lock); | |
263 | #endif | |
264 | ||
265 | tcache->arena = arena; | |
266 | assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); | |
267 | for (i = 0; i < nhbins; i++) { | |
268 | tcache->tbins[i].lg_fill_div = 1; | |
269 | tcache->tbins[i].avail = (void **)((uintptr_t)tcache + | |
270 | (uintptr_t)stack_offset); | |
271 | stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); | |
272 | } | |
273 | ||
274 | TCACHE_SET(tcache); | |
275 | ||
276 | return (tcache); | |
277 | } | |
278 | ||
279 | void | |
280 | tcache_destroy(tcache_t *tcache) | |
281 | { | |
282 | unsigned i; | |
283 | size_t tcache_size; | |
284 | ||
285 | #ifdef JEMALLOC_STATS | |
286 | /* Unlink from list of extant tcaches. */ | |
287 | malloc_mutex_lock(&tcache->arena->lock); | |
288 | ql_remove(&tcache->arena->tcache_ql, tcache, link); | |
289 | malloc_mutex_unlock(&tcache->arena->lock); | |
290 | tcache_stats_merge(tcache, tcache->arena); | |
291 | #endif | |
292 | ||
293 | for (i = 0; i < nbins; i++) { | |
294 | tcache_bin_t *tbin = &tcache->tbins[i]; | |
295 | tcache_bin_flush_small(tbin, i, 0 | |
296 | #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) | |
297 | , tcache | |
298 | #endif | |
299 | ); | |
300 | ||
301 | #ifdef JEMALLOC_STATS | |
302 | if (tbin->tstats.nrequests != 0) { | |
303 | arena_t *arena = tcache->arena; | |
304 | arena_bin_t *bin = &arena->bins[i]; | |
305 | malloc_mutex_lock(&bin->lock); | |
306 | bin->stats.nrequests += tbin->tstats.nrequests; | |
307 | malloc_mutex_unlock(&bin->lock); | |
308 | } | |
309 | #endif | |
310 | } | |
311 | ||
312 | for (; i < nhbins; i++) { | |
313 | tcache_bin_t *tbin = &tcache->tbins[i]; | |
314 | tcache_bin_flush_large(tbin, i, 0 | |
315 | #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) | |
316 | , tcache | |
317 | #endif | |
318 | ); | |
319 | ||
320 | #ifdef JEMALLOC_STATS | |
321 | if (tbin->tstats.nrequests != 0) { | |
322 | arena_t *arena = tcache->arena; | |
323 | malloc_mutex_lock(&arena->lock); | |
324 | arena->stats.nrequests_large += tbin->tstats.nrequests; | |
325 | arena->stats.lstats[i - nbins].nrequests += | |
326 | tbin->tstats.nrequests; | |
327 | malloc_mutex_unlock(&arena->lock); | |
328 | } | |
329 | #endif | |
330 | } | |
331 | ||
332 | #ifdef JEMALLOC_PROF | |
333 | if (tcache->prof_accumbytes > 0) { | |
334 | malloc_mutex_lock(&tcache->arena->lock); | |
335 | arena_prof_accum(tcache->arena, tcache->prof_accumbytes); | |
336 | malloc_mutex_unlock(&tcache->arena->lock); | |
337 | } | |
338 | #endif | |
339 | ||
340 | tcache_size = arena_salloc(tcache); | |
341 | if (tcache_size <= small_maxclass) { | |
342 | arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); | |
343 | arena_t *arena = chunk->arena; | |
344 | size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> | |
345 | PAGE_SHIFT; | |
346 | arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; | |
347 | arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + | |
348 | (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << | |
349 | PAGE_SHIFT)); | |
350 | arena_bin_t *bin = run->bin; | |
351 | ||
352 | malloc_mutex_lock(&bin->lock); | |
353 | arena_dalloc_bin(arena, chunk, tcache, mapelm); | |
354 | malloc_mutex_unlock(&bin->lock); | |
355 | } else if (tcache_size <= tcache_maxclass) { | |
356 | arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); | |
357 | arena_t *arena = chunk->arena; | |
358 | ||
359 | malloc_mutex_lock(&arena->lock); | |
360 | arena_dalloc_large(arena, chunk, tcache); | |
361 | malloc_mutex_unlock(&arena->lock); | |
362 | } else | |
363 | idalloc(tcache); | |
364 | } | |
365 | ||
366 | static void | |
367 | tcache_thread_cleanup(void *arg) | |
368 | { | |
369 | tcache_t *tcache = (tcache_t *)arg; | |
370 | ||
371 | if (tcache == (void *)(uintptr_t)1) { | |
372 | /* | |
373 | * The previous time this destructor was called, we set the key | |
374 | * to 1 so that other destructors wouldn't cause re-creation of | |
375 | * the tcache. This time, do nothing, so that the destructor | |
376 | * will not be called again. | |
377 | */ | |
378 | } else if (tcache == (void *)(uintptr_t)2) { | |
379 | /* | |
380 | * Another destructor called an allocator function after this | |
381 | * destructor was called. Reset tcache to 1 in order to | |
382 | * receive another callback. | |
383 | */ | |
384 | TCACHE_SET((uintptr_t)1); | |
385 | } else if (tcache != NULL) { | |
386 | assert(tcache != (void *)(uintptr_t)1); | |
387 | tcache_destroy(tcache); | |
388 | TCACHE_SET((uintptr_t)1); | |
389 | } | |
390 | } | |
391 | ||
392 | #ifdef JEMALLOC_STATS | |
393 | void | |
394 | tcache_stats_merge(tcache_t *tcache, arena_t *arena) | |
395 | { | |
396 | unsigned i; | |
397 | ||
398 | /* Merge and reset tcache stats. */ | |
399 | for (i = 0; i < nbins; i++) { | |
400 | arena_bin_t *bin = &arena->bins[i]; | |
401 | tcache_bin_t *tbin = &tcache->tbins[i]; | |
402 | malloc_mutex_lock(&bin->lock); | |
403 | bin->stats.nrequests += tbin->tstats.nrequests; | |
404 | malloc_mutex_unlock(&bin->lock); | |
405 | tbin->tstats.nrequests = 0; | |
406 | } | |
407 | ||
408 | for (; i < nhbins; i++) { | |
409 | malloc_large_stats_t *lstats = &arena->stats.lstats[i - nbins]; | |
410 | tcache_bin_t *tbin = &tcache->tbins[i]; | |
411 | arena->stats.nrequests_large += tbin->tstats.nrequests; | |
412 | lstats->nrequests += tbin->tstats.nrequests; | |
413 | tbin->tstats.nrequests = 0; | |
414 | } | |
415 | } | |
416 | #endif | |
417 | ||
418 | bool | |
419 | tcache_boot(void) | |
420 | { | |
421 | ||
422 | if (opt_tcache) { | |
423 | unsigned i; | |
424 | ||
425 | /* | |
426 | * If necessary, clamp opt_lg_tcache_max, now that | |
427 | * small_maxclass and arena_maxclass are known. | |
428 | */ | |
429 | if (opt_lg_tcache_max < 0 || (1U << | |
430 | opt_lg_tcache_max) < small_maxclass) | |
431 | tcache_maxclass = small_maxclass; | |
432 | else if ((1U << opt_lg_tcache_max) > arena_maxclass) | |
433 | tcache_maxclass = arena_maxclass; | |
434 | else | |
435 | tcache_maxclass = (1U << opt_lg_tcache_max); | |
436 | ||
437 | nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT); | |
438 | ||
439 | /* Initialize tcache_bin_info. */ | |
440 | tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * | |
441 | sizeof(tcache_bin_info_t)); | |
442 | if (tcache_bin_info == NULL) | |
443 | return (true); | |
444 | stack_nelms = 0; | |
445 | for (i = 0; i < nbins; i++) { | |
446 | if ((arena_bin_info[i].nregs << 1) <= | |
447 | TCACHE_NSLOTS_SMALL_MAX) { | |
448 | tcache_bin_info[i].ncached_max = | |
449 | (arena_bin_info[i].nregs << 1); | |
450 | } else { | |
451 | tcache_bin_info[i].ncached_max = | |
452 | TCACHE_NSLOTS_SMALL_MAX; | |
453 | } | |
454 | stack_nelms += tcache_bin_info[i].ncached_max; | |
455 | } | |
456 | for (; i < nhbins; i++) { | |
457 | tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; | |
458 | stack_nelms += tcache_bin_info[i].ncached_max; | |
459 | } | |
460 | ||
461 | /* Compute incremental GC event threshold. */ | |
462 | if (opt_lg_tcache_gc_sweep >= 0) { | |
463 | tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) / | |
464 | nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins == | |
465 | 0) ? 0 : 1); | |
466 | } else | |
467 | tcache_gc_incr = 0; | |
468 | ||
469 | if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) != | |
470 | 0) { | |
471 | malloc_write( | |
472 | "<jemalloc>: Error in pthread_key_create()\n"); | |
473 | abort(); | |
474 | } | |
475 | } | |
476 | ||
477 | return (false); | |
478 | } | |
479 | /******************************************************************************/ | |
480 | #endif /* JEMALLOC_TCACHE */ |