]>
Commit | Line | Data |
---|---|---|
a78e148b | 1 | #define JEMALLOC_CHUNK_SWAP_C_ |
2 | #include "jemalloc/internal/jemalloc_internal.h" | |
3 | #ifdef JEMALLOC_SWAP | |
4 | /******************************************************************************/ | |
5 | /* Data. */ | |
6 | ||
7 | malloc_mutex_t swap_mtx; | |
8 | bool swap_enabled; | |
9 | bool swap_prezeroed; | |
10 | size_t swap_nfds; | |
11 | int *swap_fds; | |
12 | #ifdef JEMALLOC_STATS | |
13 | size_t swap_avail; | |
14 | #endif | |
15 | ||
16 | /* Base address of the mmap()ed file(s). */ | |
17 | static void *swap_base; | |
18 | /* Current end of the space in use (<= swap_max). */ | |
19 | static void *swap_end; | |
20 | /* Absolute upper limit on file-backed addresses. */ | |
21 | static void *swap_max; | |
22 | ||
23 | /* | |
24 | * Trees of chunks that were previously allocated (trees differ only in node | |
25 | * ordering). These are used when allocating chunks, in an attempt to re-use | |
26 | * address space. Depending on function, different tree orderings are needed, | |
27 | * which is why there are two trees with the same contents. | |
28 | */ | |
29 | static extent_tree_t swap_chunks_szad; | |
30 | static extent_tree_t swap_chunks_ad; | |
31 | ||
32 | /******************************************************************************/ | |
33 | /* Function prototypes for non-inline static functions. */ | |
34 | ||
35 | static void *chunk_recycle_swap(size_t size, bool *zero); | |
36 | static extent_node_t *chunk_dealloc_swap_record(void *chunk, size_t size); | |
37 | ||
38 | /******************************************************************************/ | |
39 | ||
40 | static void * | |
41 | chunk_recycle_swap(size_t size, bool *zero) | |
42 | { | |
43 | extent_node_t *node, key; | |
44 | ||
45 | key.addr = NULL; | |
46 | key.size = size; | |
47 | malloc_mutex_lock(&swap_mtx); | |
48 | node = extent_tree_szad_nsearch(&swap_chunks_szad, &key); | |
49 | if (node != NULL) { | |
50 | void *ret = node->addr; | |
51 | ||
52 | /* Remove node from the tree. */ | |
53 | extent_tree_szad_remove(&swap_chunks_szad, node); | |
54 | if (node->size == size) { | |
55 | extent_tree_ad_remove(&swap_chunks_ad, node); | |
56 | base_node_dealloc(node); | |
57 | } else { | |
58 | /* | |
59 | * Insert the remainder of node's address range as a | |
60 | * smaller chunk. Its position within swap_chunks_ad | |
61 | * does not change. | |
62 | */ | |
63 | assert(node->size > size); | |
64 | node->addr = (void *)((uintptr_t)node->addr + size); | |
65 | node->size -= size; | |
66 | extent_tree_szad_insert(&swap_chunks_szad, node); | |
67 | } | |
68 | #ifdef JEMALLOC_STATS | |
69 | swap_avail -= size; | |
70 | #endif | |
71 | malloc_mutex_unlock(&swap_mtx); | |
72 | ||
73 | if (*zero) | |
74 | memset(ret, 0, size); | |
75 | return (ret); | |
76 | } | |
77 | malloc_mutex_unlock(&swap_mtx); | |
78 | ||
79 | return (NULL); | |
80 | } | |
81 | ||
82 | void * | |
83 | chunk_alloc_swap(size_t size, bool *zero) | |
84 | { | |
85 | void *ret; | |
86 | ||
87 | assert(swap_enabled); | |
88 | ||
89 | ret = chunk_recycle_swap(size, zero); | |
90 | if (ret != NULL) | |
91 | return (ret); | |
92 | ||
93 | malloc_mutex_lock(&swap_mtx); | |
94 | if ((uintptr_t)swap_end + size <= (uintptr_t)swap_max) { | |
95 | ret = swap_end; | |
96 | swap_end = (void *)((uintptr_t)swap_end + size); | |
97 | #ifdef JEMALLOC_STATS | |
98 | swap_avail -= size; | |
99 | #endif | |
100 | malloc_mutex_unlock(&swap_mtx); | |
101 | ||
102 | if (swap_prezeroed) | |
103 | *zero = true; | |
104 | else if (*zero) | |
105 | memset(ret, 0, size); | |
106 | } else { | |
107 | malloc_mutex_unlock(&swap_mtx); | |
108 | return (NULL); | |
109 | } | |
110 | ||
111 | return (ret); | |
112 | } | |
113 | ||
114 | static extent_node_t * | |
115 | chunk_dealloc_swap_record(void *chunk, size_t size) | |
116 | { | |
117 | extent_node_t *xnode, *node, *prev, key; | |
118 | ||
119 | xnode = NULL; | |
120 | while (true) { | |
121 | key.addr = (void *)((uintptr_t)chunk + size); | |
122 | node = extent_tree_ad_nsearch(&swap_chunks_ad, &key); | |
123 | /* Try to coalesce forward. */ | |
124 | if (node != NULL && node->addr == key.addr) { | |
125 | /* | |
126 | * Coalesce chunk with the following address range. | |
127 | * This does not change the position within | |
128 | * swap_chunks_ad, so only remove/insert from/into | |
129 | * swap_chunks_szad. | |
130 | */ | |
131 | extent_tree_szad_remove(&swap_chunks_szad, node); | |
132 | node->addr = chunk; | |
133 | node->size += size; | |
134 | extent_tree_szad_insert(&swap_chunks_szad, node); | |
135 | break; | |
136 | } else if (xnode == NULL) { | |
137 | /* | |
138 | * It is possible that base_node_alloc() will cause a | |
139 | * new base chunk to be allocated, so take care not to | |
140 | * deadlock on swap_mtx, and recover if another thread | |
141 | * deallocates an adjacent chunk while this one is busy | |
142 | * allocating xnode. | |
143 | */ | |
144 | malloc_mutex_unlock(&swap_mtx); | |
145 | xnode = base_node_alloc(); | |
146 | malloc_mutex_lock(&swap_mtx); | |
147 | if (xnode == NULL) | |
148 | return (NULL); | |
149 | } else { | |
150 | /* Coalescing forward failed, so insert a new node. */ | |
151 | node = xnode; | |
152 | xnode = NULL; | |
153 | node->addr = chunk; | |
154 | node->size = size; | |
155 | extent_tree_ad_insert(&swap_chunks_ad, node); | |
156 | extent_tree_szad_insert(&swap_chunks_szad, node); | |
157 | break; | |
158 | } | |
159 | } | |
160 | /* Discard xnode if it ended up unused do to a race. */ | |
161 | if (xnode != NULL) | |
162 | base_node_dealloc(xnode); | |
163 | ||
164 | /* Try to coalesce backward. */ | |
165 | prev = extent_tree_ad_prev(&swap_chunks_ad, node); | |
166 | if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == | |
167 | chunk) { | |
168 | /* | |
169 | * Coalesce chunk with the previous address range. This does | |
170 | * not change the position within swap_chunks_ad, so only | |
171 | * remove/insert node from/into swap_chunks_szad. | |
172 | */ | |
173 | extent_tree_szad_remove(&swap_chunks_szad, prev); | |
174 | extent_tree_ad_remove(&swap_chunks_ad, prev); | |
175 | ||
176 | extent_tree_szad_remove(&swap_chunks_szad, node); | |
177 | node->addr = prev->addr; | |
178 | node->size += prev->size; | |
179 | extent_tree_szad_insert(&swap_chunks_szad, node); | |
180 | ||
181 | base_node_dealloc(prev); | |
182 | } | |
183 | ||
184 | return (node); | |
185 | } | |
186 | ||
187 | bool | |
188 | chunk_in_swap(void *chunk) | |
189 | { | |
190 | bool ret; | |
191 | ||
192 | assert(swap_enabled); | |
193 | ||
194 | malloc_mutex_lock(&swap_mtx); | |
195 | if ((uintptr_t)chunk >= (uintptr_t)swap_base | |
196 | && (uintptr_t)chunk < (uintptr_t)swap_max) | |
197 | ret = true; | |
198 | else | |
199 | ret = false; | |
200 | malloc_mutex_unlock(&swap_mtx); | |
201 | ||
202 | return (ret); | |
203 | } | |
204 | ||
205 | bool | |
206 | chunk_dealloc_swap(void *chunk, size_t size) | |
207 | { | |
208 | bool ret; | |
209 | ||
210 | assert(swap_enabled); | |
211 | ||
212 | malloc_mutex_lock(&swap_mtx); | |
213 | if ((uintptr_t)chunk >= (uintptr_t)swap_base | |
214 | && (uintptr_t)chunk < (uintptr_t)swap_max) { | |
215 | extent_node_t *node; | |
216 | ||
217 | /* Try to coalesce with other unused chunks. */ | |
218 | node = chunk_dealloc_swap_record(chunk, size); | |
219 | if (node != NULL) { | |
220 | chunk = node->addr; | |
221 | size = node->size; | |
222 | } | |
223 | ||
224 | /* | |
225 | * Try to shrink the in-use memory if this chunk is at the end | |
226 | * of the in-use memory. | |
227 | */ | |
228 | if ((void *)((uintptr_t)chunk + size) == swap_end) { | |
229 | swap_end = (void *)((uintptr_t)swap_end - size); | |
230 | ||
231 | if (node != NULL) { | |
232 | extent_tree_szad_remove(&swap_chunks_szad, | |
233 | node); | |
234 | extent_tree_ad_remove(&swap_chunks_ad, node); | |
235 | base_node_dealloc(node); | |
236 | } | |
237 | } else | |
238 | madvise(chunk, size, MADV_DONTNEED); | |
239 | ||
240 | #ifdef JEMALLOC_STATS | |
241 | swap_avail += size; | |
242 | #endif | |
243 | ret = false; | |
244 | goto RETURN; | |
245 | } | |
246 | ||
247 | ret = true; | |
248 | RETURN: | |
249 | malloc_mutex_unlock(&swap_mtx); | |
250 | return (ret); | |
251 | } | |
252 | ||
253 | bool | |
254 | chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed) | |
255 | { | |
256 | bool ret; | |
257 | unsigned i; | |
258 | off_t off; | |
259 | void *vaddr; | |
260 | size_t cumsize, voff; | |
261 | size_t sizes[nfds]; | |
262 | ||
263 | malloc_mutex_lock(&swap_mtx); | |
264 | ||
265 | /* Get file sizes. */ | |
266 | for (i = 0, cumsize = 0; i < nfds; i++) { | |
267 | off = lseek(fds[i], 0, SEEK_END); | |
268 | if (off == ((off_t)-1)) { | |
269 | ret = true; | |
270 | goto RETURN; | |
271 | } | |
272 | if (PAGE_CEILING(off) != off) { | |
273 | /* Truncate to a multiple of the page size. */ | |
274 | off &= ~PAGE_MASK; | |
275 | if (ftruncate(fds[i], off) != 0) { | |
276 | ret = true; | |
277 | goto RETURN; | |
278 | } | |
279 | } | |
280 | sizes[i] = off; | |
281 | if (cumsize + off < cumsize) { | |
282 | /* | |
283 | * Cumulative file size is greater than the total | |
284 | * address space. Bail out while it's still obvious | |
285 | * what the problem is. | |
286 | */ | |
287 | ret = true; | |
288 | goto RETURN; | |
289 | } | |
290 | cumsize += off; | |
291 | } | |
292 | ||
293 | /* Round down to a multiple of the chunk size. */ | |
294 | cumsize &= ~chunksize_mask; | |
295 | if (cumsize == 0) { | |
296 | ret = true; | |
297 | goto RETURN; | |
298 | } | |
299 | ||
300 | /* | |
301 | * Allocate a chunk-aligned region of anonymous memory, which will | |
302 | * be the final location for the memory-mapped files. | |
303 | */ | |
304 | vaddr = chunk_alloc_mmap_noreserve(cumsize); | |
305 | if (vaddr == NULL) { | |
306 | ret = true; | |
307 | goto RETURN; | |
308 | } | |
309 | ||
310 | /* Overlay the files onto the anonymous mapping. */ | |
311 | for (i = 0, voff = 0; i < nfds; i++) { | |
312 | void *addr = mmap((void *)((uintptr_t)vaddr + voff), sizes[i], | |
313 | PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fds[i], 0); | |
314 | if (addr == MAP_FAILED) { | |
315 | char buf[BUFERROR_BUF]; | |
316 | ||
317 | ||
318 | buferror(errno, buf, sizeof(buf)); | |
319 | malloc_write( | |
320 | "<jemalloc>: Error in mmap(..., MAP_FIXED, ...): "); | |
321 | malloc_write(buf); | |
322 | malloc_write("\n"); | |
323 | if (opt_abort) | |
324 | abort(); | |
325 | if (munmap(vaddr, voff) == -1) { | |
326 | buferror(errno, buf, sizeof(buf)); | |
327 | malloc_write("<jemalloc>: Error in munmap(): "); | |
328 | malloc_write(buf); | |
329 | malloc_write("\n"); | |
330 | } | |
331 | ret = true; | |
332 | goto RETURN; | |
333 | } | |
334 | assert(addr == (void *)((uintptr_t)vaddr + voff)); | |
335 | ||
336 | /* | |
337 | * Tell the kernel that the mapping will be accessed randomly, | |
338 | * and that it should not gratuitously sync pages to the | |
339 | * filesystem. | |
340 | */ | |
341 | #ifdef MADV_RANDOM | |
342 | madvise(addr, sizes[i], MADV_RANDOM); | |
343 | #endif | |
344 | #ifdef MADV_NOSYNC | |
345 | madvise(addr, sizes[i], MADV_NOSYNC); | |
346 | #endif | |
347 | ||
348 | voff += sizes[i]; | |
349 | } | |
350 | ||
351 | swap_prezeroed = prezeroed; | |
352 | swap_base = vaddr; | |
353 | swap_end = swap_base; | |
354 | swap_max = (void *)((uintptr_t)vaddr + cumsize); | |
355 | ||
356 | /* Copy the fds array for mallctl purposes. */ | |
357 | swap_fds = (int *)base_alloc(nfds * sizeof(int)); | |
358 | if (swap_fds == NULL) { | |
359 | ret = true; | |
360 | goto RETURN; | |
361 | } | |
362 | memcpy(swap_fds, fds, nfds * sizeof(int)); | |
363 | swap_nfds = nfds; | |
364 | ||
365 | #ifdef JEMALLOC_STATS | |
366 | swap_avail = cumsize; | |
367 | #endif | |
368 | ||
369 | swap_enabled = true; | |
370 | ||
371 | ret = false; | |
372 | RETURN: | |
373 | malloc_mutex_unlock(&swap_mtx); | |
374 | return (ret); | |
375 | } | |
376 | ||
377 | bool | |
378 | chunk_swap_boot(void) | |
379 | { | |
380 | ||
381 | if (malloc_mutex_init(&swap_mtx)) | |
382 | return (true); | |
383 | ||
384 | swap_enabled = false; | |
385 | swap_prezeroed = false; /* swap.* mallctl's depend on this. */ | |
386 | swap_nfds = 0; | |
387 | swap_fds = NULL; | |
388 | #ifdef JEMALLOC_STATS | |
389 | swap_avail = 0; | |
390 | #endif | |
391 | swap_base = NULL; | |
392 | swap_end = NULL; | |
393 | swap_max = NULL; | |
394 | ||
395 | extent_tree_szad_new(&swap_chunks_szad); | |
396 | extent_tree_ad_new(&swap_chunks_ad); | |
397 | ||
398 | return (false); | |
399 | } | |
400 | ||
401 | /******************************************************************************/ | |
402 | #endif /* JEMALLOC_SWAP */ |