]> git.saurik.com Git - redis.git/blame - deps/jemalloc/src/chunk_swap.c
Merge pull request #208 from jbergstroem/jemalloc-2.2.5
[redis.git] / deps / jemalloc / src / chunk_swap.c
CommitLineData
a78e148b 1#define JEMALLOC_CHUNK_SWAP_C_
2#include "jemalloc/internal/jemalloc_internal.h"
3#ifdef JEMALLOC_SWAP
4/******************************************************************************/
5/* Data. */
6
7malloc_mutex_t swap_mtx;
8bool swap_enabled;
9bool swap_prezeroed;
10size_t swap_nfds;
11int *swap_fds;
12#ifdef JEMALLOC_STATS
13size_t swap_avail;
14#endif
15
16/* Base address of the mmap()ed file(s). */
17static void *swap_base;
18/* Current end of the space in use (<= swap_max). */
19static void *swap_end;
20/* Absolute upper limit on file-backed addresses. */
21static void *swap_max;
22
23/*
24 * Trees of chunks that were previously allocated (trees differ only in node
25 * ordering). These are used when allocating chunks, in an attempt to re-use
26 * address space. Depending on function, different tree orderings are needed,
27 * which is why there are two trees with the same contents.
28 */
29static extent_tree_t swap_chunks_szad;
30static extent_tree_t swap_chunks_ad;
31
32/******************************************************************************/
33/* Function prototypes for non-inline static functions. */
34
35static void *chunk_recycle_swap(size_t size, bool *zero);
36static extent_node_t *chunk_dealloc_swap_record(void *chunk, size_t size);
37
38/******************************************************************************/
39
40static void *
41chunk_recycle_swap(size_t size, bool *zero)
42{
43 extent_node_t *node, key;
44
45 key.addr = NULL;
46 key.size = size;
47 malloc_mutex_lock(&swap_mtx);
48 node = extent_tree_szad_nsearch(&swap_chunks_szad, &key);
49 if (node != NULL) {
50 void *ret = node->addr;
51
52 /* Remove node from the tree. */
53 extent_tree_szad_remove(&swap_chunks_szad, node);
54 if (node->size == size) {
55 extent_tree_ad_remove(&swap_chunks_ad, node);
56 base_node_dealloc(node);
57 } else {
58 /*
59 * Insert the remainder of node's address range as a
60 * smaller chunk. Its position within swap_chunks_ad
61 * does not change.
62 */
63 assert(node->size > size);
64 node->addr = (void *)((uintptr_t)node->addr + size);
65 node->size -= size;
66 extent_tree_szad_insert(&swap_chunks_szad, node);
67 }
68#ifdef JEMALLOC_STATS
69 swap_avail -= size;
70#endif
71 malloc_mutex_unlock(&swap_mtx);
72
73 if (*zero)
74 memset(ret, 0, size);
75 return (ret);
76 }
77 malloc_mutex_unlock(&swap_mtx);
78
79 return (NULL);
80}
81
82void *
83chunk_alloc_swap(size_t size, bool *zero)
84{
85 void *ret;
86
87 assert(swap_enabled);
88
89 ret = chunk_recycle_swap(size, zero);
90 if (ret != NULL)
91 return (ret);
92
93 malloc_mutex_lock(&swap_mtx);
94 if ((uintptr_t)swap_end + size <= (uintptr_t)swap_max) {
95 ret = swap_end;
96 swap_end = (void *)((uintptr_t)swap_end + size);
97#ifdef JEMALLOC_STATS
98 swap_avail -= size;
99#endif
100 malloc_mutex_unlock(&swap_mtx);
101
102 if (swap_prezeroed)
103 *zero = true;
104 else if (*zero)
105 memset(ret, 0, size);
106 } else {
107 malloc_mutex_unlock(&swap_mtx);
108 return (NULL);
109 }
110
111 return (ret);
112}
113
114static extent_node_t *
115chunk_dealloc_swap_record(void *chunk, size_t size)
116{
117 extent_node_t *xnode, *node, *prev, key;
118
119 xnode = NULL;
120 while (true) {
121 key.addr = (void *)((uintptr_t)chunk + size);
122 node = extent_tree_ad_nsearch(&swap_chunks_ad, &key);
123 /* Try to coalesce forward. */
124 if (node != NULL && node->addr == key.addr) {
125 /*
126 * Coalesce chunk with the following address range.
127 * This does not change the position within
128 * swap_chunks_ad, so only remove/insert from/into
129 * swap_chunks_szad.
130 */
131 extent_tree_szad_remove(&swap_chunks_szad, node);
132 node->addr = chunk;
133 node->size += size;
134 extent_tree_szad_insert(&swap_chunks_szad, node);
135 break;
136 } else if (xnode == NULL) {
137 /*
138 * It is possible that base_node_alloc() will cause a
139 * new base chunk to be allocated, so take care not to
140 * deadlock on swap_mtx, and recover if another thread
141 * deallocates an adjacent chunk while this one is busy
142 * allocating xnode.
143 */
144 malloc_mutex_unlock(&swap_mtx);
145 xnode = base_node_alloc();
146 malloc_mutex_lock(&swap_mtx);
147 if (xnode == NULL)
148 return (NULL);
149 } else {
150 /* Coalescing forward failed, so insert a new node. */
151 node = xnode;
152 xnode = NULL;
153 node->addr = chunk;
154 node->size = size;
155 extent_tree_ad_insert(&swap_chunks_ad, node);
156 extent_tree_szad_insert(&swap_chunks_szad, node);
157 break;
158 }
159 }
160 /* Discard xnode if it ended up unused do to a race. */
161 if (xnode != NULL)
162 base_node_dealloc(xnode);
163
164 /* Try to coalesce backward. */
165 prev = extent_tree_ad_prev(&swap_chunks_ad, node);
166 if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
167 chunk) {
168 /*
169 * Coalesce chunk with the previous address range. This does
170 * not change the position within swap_chunks_ad, so only
171 * remove/insert node from/into swap_chunks_szad.
172 */
173 extent_tree_szad_remove(&swap_chunks_szad, prev);
174 extent_tree_ad_remove(&swap_chunks_ad, prev);
175
176 extent_tree_szad_remove(&swap_chunks_szad, node);
177 node->addr = prev->addr;
178 node->size += prev->size;
179 extent_tree_szad_insert(&swap_chunks_szad, node);
180
181 base_node_dealloc(prev);
182 }
183
184 return (node);
185}
186
187bool
188chunk_in_swap(void *chunk)
189{
190 bool ret;
191
192 assert(swap_enabled);
193
194 malloc_mutex_lock(&swap_mtx);
195 if ((uintptr_t)chunk >= (uintptr_t)swap_base
196 && (uintptr_t)chunk < (uintptr_t)swap_max)
197 ret = true;
198 else
199 ret = false;
200 malloc_mutex_unlock(&swap_mtx);
201
202 return (ret);
203}
204
205bool
206chunk_dealloc_swap(void *chunk, size_t size)
207{
208 bool ret;
209
210 assert(swap_enabled);
211
212 malloc_mutex_lock(&swap_mtx);
213 if ((uintptr_t)chunk >= (uintptr_t)swap_base
214 && (uintptr_t)chunk < (uintptr_t)swap_max) {
215 extent_node_t *node;
216
217 /* Try to coalesce with other unused chunks. */
218 node = chunk_dealloc_swap_record(chunk, size);
219 if (node != NULL) {
220 chunk = node->addr;
221 size = node->size;
222 }
223
224 /*
225 * Try to shrink the in-use memory if this chunk is at the end
226 * of the in-use memory.
227 */
228 if ((void *)((uintptr_t)chunk + size) == swap_end) {
229 swap_end = (void *)((uintptr_t)swap_end - size);
230
231 if (node != NULL) {
232 extent_tree_szad_remove(&swap_chunks_szad,
233 node);
234 extent_tree_ad_remove(&swap_chunks_ad, node);
235 base_node_dealloc(node);
236 }
237 } else
238 madvise(chunk, size, MADV_DONTNEED);
239
240#ifdef JEMALLOC_STATS
241 swap_avail += size;
242#endif
243 ret = false;
244 goto RETURN;
245 }
246
247 ret = true;
248RETURN:
249 malloc_mutex_unlock(&swap_mtx);
250 return (ret);
251}
252
253bool
254chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed)
255{
256 bool ret;
257 unsigned i;
258 off_t off;
259 void *vaddr;
260 size_t cumsize, voff;
261 size_t sizes[nfds];
262
263 malloc_mutex_lock(&swap_mtx);
264
265 /* Get file sizes. */
266 for (i = 0, cumsize = 0; i < nfds; i++) {
267 off = lseek(fds[i], 0, SEEK_END);
268 if (off == ((off_t)-1)) {
269 ret = true;
270 goto RETURN;
271 }
272 if (PAGE_CEILING(off) != off) {
273 /* Truncate to a multiple of the page size. */
274 off &= ~PAGE_MASK;
275 if (ftruncate(fds[i], off) != 0) {
276 ret = true;
277 goto RETURN;
278 }
279 }
280 sizes[i] = off;
281 if (cumsize + off < cumsize) {
282 /*
283 * Cumulative file size is greater than the total
284 * address space. Bail out while it's still obvious
285 * what the problem is.
286 */
287 ret = true;
288 goto RETURN;
289 }
290 cumsize += off;
291 }
292
293 /* Round down to a multiple of the chunk size. */
294 cumsize &= ~chunksize_mask;
295 if (cumsize == 0) {
296 ret = true;
297 goto RETURN;
298 }
299
300 /*
301 * Allocate a chunk-aligned region of anonymous memory, which will
302 * be the final location for the memory-mapped files.
303 */
304 vaddr = chunk_alloc_mmap_noreserve(cumsize);
305 if (vaddr == NULL) {
306 ret = true;
307 goto RETURN;
308 }
309
310 /* Overlay the files onto the anonymous mapping. */
311 for (i = 0, voff = 0; i < nfds; i++) {
312 void *addr = mmap((void *)((uintptr_t)vaddr + voff), sizes[i],
313 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fds[i], 0);
314 if (addr == MAP_FAILED) {
315 char buf[BUFERROR_BUF];
316
317
318 buferror(errno, buf, sizeof(buf));
319 malloc_write(
320 "<jemalloc>: Error in mmap(..., MAP_FIXED, ...): ");
321 malloc_write(buf);
322 malloc_write("\n");
323 if (opt_abort)
324 abort();
325 if (munmap(vaddr, voff) == -1) {
326 buferror(errno, buf, sizeof(buf));
327 malloc_write("<jemalloc>: Error in munmap(): ");
328 malloc_write(buf);
329 malloc_write("\n");
330 }
331 ret = true;
332 goto RETURN;
333 }
334 assert(addr == (void *)((uintptr_t)vaddr + voff));
335
336 /*
337 * Tell the kernel that the mapping will be accessed randomly,
338 * and that it should not gratuitously sync pages to the
339 * filesystem.
340 */
341#ifdef MADV_RANDOM
342 madvise(addr, sizes[i], MADV_RANDOM);
343#endif
344#ifdef MADV_NOSYNC
345 madvise(addr, sizes[i], MADV_NOSYNC);
346#endif
347
348 voff += sizes[i];
349 }
350
351 swap_prezeroed = prezeroed;
352 swap_base = vaddr;
353 swap_end = swap_base;
354 swap_max = (void *)((uintptr_t)vaddr + cumsize);
355
356 /* Copy the fds array for mallctl purposes. */
357 swap_fds = (int *)base_alloc(nfds * sizeof(int));
358 if (swap_fds == NULL) {
359 ret = true;
360 goto RETURN;
361 }
362 memcpy(swap_fds, fds, nfds * sizeof(int));
363 swap_nfds = nfds;
364
365#ifdef JEMALLOC_STATS
366 swap_avail = cumsize;
367#endif
368
369 swap_enabled = true;
370
371 ret = false;
372RETURN:
373 malloc_mutex_unlock(&swap_mtx);
374 return (ret);
375}
376
377bool
378chunk_swap_boot(void)
379{
380
381 if (malloc_mutex_init(&swap_mtx))
382 return (true);
383
384 swap_enabled = false;
385 swap_prezeroed = false; /* swap.* mallctl's depend on this. */
386 swap_nfds = 0;
387 swap_fds = NULL;
388#ifdef JEMALLOC_STATS
389 swap_avail = 0;
390#endif
391 swap_base = NULL;
392 swap_end = NULL;
393 swap_max = NULL;
394
395 extent_tree_szad_new(&swap_chunks_szad);
396 extent_tree_ad_new(&swap_chunks_ad);
397
398 return (false);
399}
400
401/******************************************************************************/
402#endif /* JEMALLOC_SWAP */