]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_phantom_cache.c
xnu-3789.31.2.tar.gz
[apple/xnu.git] / osfmk / vm / vm_phantom_cache.c
CommitLineData
fe8ab488
A
1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <vm/vm_page.h>
30#include <vm/vm_object.h>
31#include <vm/vm_kern.h>
32#include <vm/vm_pageout.h>
33#include <vm/vm_phantom_cache.h>
34#include <vm/vm_compressor.h>
35
36
37uint32_t phantom_cache_eval_period_in_msecs = 250;
38uint32_t phantom_cache_thrashing_threshold_ssd = 1000;
39uint32_t phantom_cache_thrashing_threshold = 100;
40
41/*
42 * Number of consecutive thrashing periods required before
43 * vm_phantom_cache_check_pressure() returns true.
44 */
45unsigned phantom_cache_contiguous_periods = 2;
46
47clock_sec_t pc_start_of_eval_period_sec = 0;
48clock_nsec_t pc_start_of_eval_period_nsec = 0;
49boolean_t pc_need_eval_reset = FALSE;
50
51/* One bit per recent sampling period. Bit 0 = current period. */
52uint32_t pc_history = 0;
53
54uint32_t sample_period_ghost_added_count = 0;
55uint32_t sample_period_ghost_added_count_ssd = 0;
56uint32_t sample_period_ghost_found_count = 0;
57uint32_t sample_period_ghost_found_count_ssd = 0;
58
59uint32_t vm_phantom_object_id = 1;
60#define VM_PHANTOM_OBJECT_ID_AFTER_WRAP 1000000
61
62vm_ghost_t vm_phantom_cache;
63uint32_t vm_phantom_cache_nindx = 1;
64uint32_t vm_phantom_cache_num_entries = 0;
65uint32_t vm_phantom_cache_size;
66
67typedef uint32_t vm_phantom_hash_entry_t;
68vm_phantom_hash_entry_t *vm_phantom_cache_hash;
69uint32_t vm_phantom_cache_hash_size;
70uint32_t vm_ghost_hash_mask; /* Mask for hash function */
71uint32_t vm_ghost_bucket_hash; /* Basic bucket hash */
72
73
74int pg_masks[4] = {
75 0x1, 0x2, 0x4, 0x8
76};
77
78
79#define vm_phantom_hash(obj_id, offset) (\
80 ( (natural_t)((uintptr_t)obj_id * vm_ghost_bucket_hash) + (offset ^ vm_ghost_bucket_hash)) & vm_ghost_hash_mask)
81
82
83struct phantom_cache_stats {
84 uint32_t pcs_wrapped;
85 uint32_t pcs_added_page_to_entry;
86 uint32_t pcs_added_new_entry;
87 uint32_t pcs_replaced_entry;
88
89 uint32_t pcs_lookup_found_page_in_cache;
90 uint32_t pcs_lookup_entry_not_in_cache;
91 uint32_t pcs_lookup_page_not_in_entry;
92
93 uint32_t pcs_updated_phantom_state;
94} phantom_cache_stats;
95
96
97void
98vm_phantom_cache_init()
99{
100 unsigned int num_entries;
101 unsigned int log1;
102 unsigned int size;
103
39037602
A
104 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE)
105 return;
fe8ab488
A
106 num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 4) / VM_GHOST_PAGES_PER_ENTRY);
107 vm_phantom_cache_num_entries = 1;
108
109 while (vm_phantom_cache_num_entries < num_entries)
110 vm_phantom_cache_num_entries <<= 1;
111
112 vm_phantom_cache_size = sizeof(struct vm_ghost) * vm_phantom_cache_num_entries;
113 vm_phantom_cache_hash_size = sizeof(vm_phantom_hash_entry_t) * vm_phantom_cache_num_entries;
114
3e170ce0 115 if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache), vm_phantom_cache_size, 0, KMA_KOBJECT | KMA_PERMANENT, VM_KERN_MEMORY_PHANTOM_CACHE) != KERN_SUCCESS)
fe8ab488
A
116 panic("vm_phantom_cache_init: kernel_memory_allocate failed\n");
117 bzero(vm_phantom_cache, vm_phantom_cache_size);
118
3e170ce0 119 if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache_hash), vm_phantom_cache_hash_size, 0, KMA_KOBJECT | KMA_PERMANENT, VM_KERN_MEMORY_PHANTOM_CACHE) != KERN_SUCCESS)
fe8ab488
A
120 panic("vm_phantom_cache_init: kernel_memory_allocate failed\n");
121 bzero(vm_phantom_cache_hash, vm_phantom_cache_hash_size);
122
123
124 vm_ghost_hash_mask = vm_phantom_cache_num_entries - 1;
125
126 /*
127 * Calculate object_id shift value for hashing algorithm:
128 * O = log2(sizeof(struct vm_object))
129 * B = log2(vm_page_bucket_count)
130 * hash shifts the object_id left by
131 * B/2 - O
132 */
133 size = vm_phantom_cache_num_entries;
134 for (log1 = 0; size > 1; log1++)
135 size /= 2;
136
137 vm_ghost_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
138 vm_ghost_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
139 vm_ghost_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
140
141 if (vm_ghost_hash_mask & vm_phantom_cache_num_entries)
142 printf("vm_phantom_cache_init: WARNING -- strange page hash\n");
143}
144
145
146void
147vm_phantom_cache_add_ghost(vm_page_t m)
148{
149 vm_ghost_t vpce;
39037602 150 vm_object_t object;
fe8ab488
A
151 int ghost_index;
152 int pg_mask;
153 boolean_t isSSD = FALSE;
154 vm_phantom_hash_entry_t ghost_hash_index;
155
39037602
A
156 object = VM_PAGE_OBJECT(m);
157
158 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
159 vm_object_lock_assert_exclusive(object);
fe8ab488
A
160
161 if (vm_phantom_cache_num_entries == 0)
162 return;
163
164 pg_mask = pg_masks[(m->offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK];
165
39037602 166 if (object->phantom_object_id == 0) {
fe8ab488 167
39037602 168 vnode_pager_get_isSSD(object->pager, &isSSD);
fe8ab488
A
169
170 if (isSSD == TRUE)
39037602 171 object->phantom_isssd = TRUE;
fe8ab488 172
39037602 173 object->phantom_object_id = vm_phantom_object_id++;
fe8ab488
A
174
175 if (vm_phantom_object_id == 0)
176 vm_phantom_object_id = VM_PHANTOM_OBJECT_ID_AFTER_WRAP;
177 } else {
178 if ( (vpce = vm_phantom_cache_lookup_ghost(m, 0)) ) {
179 vpce->g_pages_held |= pg_mask;
180
181 phantom_cache_stats.pcs_added_page_to_entry++;
182 goto done;
183 }
184 }
185 /*
186 * if we're here then the vm_ghost_t of this vm_page_t
187 * is not present in the phantom cache... take the next
188 * available entry in the LRU first evicting the existing
189 * entry if we've wrapped the ring
190 */
191 ghost_index = vm_phantom_cache_nindx++;
192
193 if (vm_phantom_cache_nindx == vm_phantom_cache_num_entries) {
194 vm_phantom_cache_nindx = 1;
195
196 phantom_cache_stats.pcs_wrapped++;
197 }
198 vpce = &vm_phantom_cache[ghost_index];
199
200 if (vpce->g_obj_id) {
201 /*
202 * we're going to replace an existing entry
203 * so first remove it from the hash
204 */
205 vm_ghost_t nvpce;
206
207 ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset);
208
209 nvpce = &vm_phantom_cache[vm_phantom_cache_hash[ghost_hash_index]];
210
211 if (nvpce == vpce) {
212 vm_phantom_cache_hash[ghost_hash_index] = vpce->g_next_index;
213 } else {
214 for (;;) {
215 if (nvpce->g_next_index == 0)
216 panic("didn't find ghost in hash\n");
217
218 if (&vm_phantom_cache[nvpce->g_next_index] == vpce) {
219 nvpce->g_next_index = vpce->g_next_index;
220 break;
221 }
222 nvpce = &vm_phantom_cache[nvpce->g_next_index];
223 }
224 }
225 phantom_cache_stats.pcs_replaced_entry++;
226 } else
227 phantom_cache_stats.pcs_added_new_entry++;
228
229 vpce->g_pages_held = pg_mask;
230 vpce->g_obj_offset = (m->offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK;
39037602 231 vpce->g_obj_id = object->phantom_object_id;
fe8ab488
A
232
233 ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset);
234 vpce->g_next_index = vm_phantom_cache_hash[ghost_hash_index];
235 vm_phantom_cache_hash[ghost_hash_index] = ghost_index;
236
237done:
39037602 238 if (object->phantom_isssd)
fe8ab488
A
239 OSAddAtomic(1, &sample_period_ghost_added_count_ssd);
240 else
241 OSAddAtomic(1, &sample_period_ghost_added_count);
242}
243
244
245vm_ghost_t
246vm_phantom_cache_lookup_ghost(vm_page_t m, uint32_t pg_mask)
247{
248 uint64_t g_obj_offset;
249 uint32_t g_obj_id;
250 uint32_t ghost_index;
39037602 251 vm_object_t object;
fe8ab488 252
39037602
A
253 object = VM_PAGE_OBJECT(m);
254
255 if ((g_obj_id = object->phantom_object_id) == 0) {
fe8ab488
A
256 /*
257 * no entries in phantom cache for this object
258 */
259 return (NULL);
260 }
261 g_obj_offset = (m->offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK;
262
263 ghost_index = vm_phantom_cache_hash[vm_phantom_hash(g_obj_id, g_obj_offset)];
264
265 while (ghost_index) {
266 vm_ghost_t vpce;
267
268 vpce = &vm_phantom_cache[ghost_index];
269
270 if (vpce->g_obj_id == g_obj_id && vpce->g_obj_offset == g_obj_offset) {
271
272 if (pg_mask == 0 || (vpce->g_pages_held & pg_mask)) {
273 phantom_cache_stats.pcs_lookup_found_page_in_cache++;
274
275 return (vpce);
276 }
277 phantom_cache_stats.pcs_lookup_page_not_in_entry++;
278
279 return (NULL);
280 }
281 ghost_index = vpce->g_next_index;
282 }
283 phantom_cache_stats.pcs_lookup_entry_not_in_cache++;
284
285 return (NULL);
286}
287
288
289
290void
291vm_phantom_cache_update(vm_page_t m)
292{
293 int pg_mask;
294 vm_ghost_t vpce;
39037602 295 vm_object_t object;
fe8ab488 296
39037602
A
297 object = VM_PAGE_OBJECT(m);
298
299 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
300 vm_object_lock_assert_exclusive(object);
fe8ab488
A
301
302 if (vm_phantom_cache_num_entries == 0)
303 return;
304
305 pg_mask = pg_masks[(m->offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK];
306
307 if ( (vpce = vm_phantom_cache_lookup_ghost(m, pg_mask)) ) {
308
309 vpce->g_pages_held &= ~pg_mask;
310
311 phantom_cache_stats.pcs_updated_phantom_state++;
312
39037602 313 if (object->phantom_isssd)
fe8ab488
A
314 OSAddAtomic(1, &sample_period_ghost_found_count_ssd);
315 else
316 OSAddAtomic(1, &sample_period_ghost_found_count);
317 }
318}
319
320
321#define PHANTOM_CACHE_DEBUG 1
322
323#if PHANTOM_CACHE_DEBUG
324
325int sample_period_ghost_counts_indx = 0;
326
327struct {
328 uint32_t added;
329 uint32_t found;
330 uint32_t added_ssd;
331 uint32_t found_ssd;
332 uint32_t elapsed_ms;
333 boolean_t pressure_detected;
334} sample_period_ghost_counts[256];
335
336#endif
337
338/*
339 * Determine if the file cache is thrashing from sampling interval statistics.
340 *
341 * Pages added to the phantom cache = pages evicted from the file cache.
342 * Pages found in the phantom cache = reads of pages that were recently evicted.
343 * Threshold is the latency-dependent number of reads we consider thrashing.
344 */
345static boolean_t
346is_thrashing(uint32_t added, uint32_t found, uint32_t threshold)
347{
348 /* Ignore normal activity below the threshold. */
349 if (added < threshold || found < threshold)
350 return FALSE;
351
352 /*
353 * When thrashing in a way that we can mitigate, most of the pages read
354 * into the file cache were recently evicted, and 'found' will be close
355 * to 'added'.
356 *
357 * When replacing the current working set because a new app is
358 * launched, we see very high read traffic with sporadic phantom cache
359 * hits.
360 *
361 * This is not thrashing, or freeing up memory wouldn't help much
362 * anyway.
363 */
364 if (found < added / 2)
365 return FALSE;
366
367 return TRUE;
368}
369
370/*
371 * the following function is never called
372 * from multiple threads simultaneously due
373 * to a condition variable used to serialize
374 * at the compressor level... thus no need
375 * to provide locking for the sample processing
376 */
377boolean_t
378vm_phantom_cache_check_pressure()
379{
380 clock_sec_t cur_ts_sec;
381 clock_nsec_t cur_ts_nsec;
382 uint64_t elapsed_msecs_in_eval;
383 boolean_t pressure_detected = FALSE;
384
385 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec);
386
387 elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, pc_start_of_eval_period_sec, pc_start_of_eval_period_nsec);
388
389 /*
390 * Reset evaluation period after phantom_cache_eval_period_in_msecs or
391 * whenever vm_phantom_cache_restart_sample has been called.
392 */
393 if (elapsed_msecs_in_eval >= phantom_cache_eval_period_in_msecs) {
394 pc_need_eval_reset = TRUE;
395 }
396
397 if (pc_need_eval_reset == TRUE) {
398
399#if PHANTOM_CACHE_DEBUG
400 /*
401 * maintain some info about the last 256 sample periods
402 */
403 sample_period_ghost_counts[sample_period_ghost_counts_indx].added = sample_period_ghost_added_count;
404 sample_period_ghost_counts[sample_period_ghost_counts_indx].found = sample_period_ghost_found_count;
405 sample_period_ghost_counts[sample_period_ghost_counts_indx].added_ssd = sample_period_ghost_added_count_ssd;
406 sample_period_ghost_counts[sample_period_ghost_counts_indx].found_ssd = sample_period_ghost_found_count_ssd;
407 sample_period_ghost_counts[sample_period_ghost_counts_indx].elapsed_ms = (uint32_t)elapsed_msecs_in_eval;
408
409 sample_period_ghost_counts_indx++;
410
411 if (sample_period_ghost_counts_indx >= 256)
412 sample_period_ghost_counts_indx = 0;
413#endif
414 sample_period_ghost_added_count = 0;
415 sample_period_ghost_found_count = 0;
416 sample_period_ghost_added_count_ssd = 0;
417 sample_period_ghost_found_count_ssd = 0;
418
419 pc_start_of_eval_period_sec = cur_ts_sec;
420 pc_start_of_eval_period_nsec = cur_ts_nsec;
421 pc_history <<= 1;
422 pc_need_eval_reset = FALSE;
423 } else {
424 /*
425 * Since the trashing rate is really a function of the read latency of the disk
426 * we have to consider both the SSD and spinning disk case since the file cache
427 * could be backed by either or even both flavors. When the object is first
428 * assigned a phantom_object_id, we query the pager to determine if the backing
429 * backing media is an SSD and remember that answer in the vm_object. We use
430 * that info to maintains counts for both the SSD and spinning disk cases.
431 */
432 if (is_thrashing(sample_period_ghost_added_count,
433 sample_period_ghost_found_count,
434 phantom_cache_thrashing_threshold) ||
435 is_thrashing(sample_period_ghost_added_count_ssd,
436 sample_period_ghost_found_count_ssd,
437 phantom_cache_thrashing_threshold_ssd)) {
438 /* Thrashing in the current period: Set bit 0. */
439 pc_history |= 1;
440 }
441 }
442
443 /*
444 * Declare pressure_detected after phantom_cache_contiguous_periods.
445 *
446 * Create a bitmask with the N low bits set. These bits must all be set
447 * in pc_history. The high bits of pc_history are ignored.
448 */
449 uint32_t bitmask = (1u << phantom_cache_contiguous_periods) - 1;
450 if ((pc_history & bitmask) == bitmask)
451 pressure_detected = TRUE;
452
453 if (vm_page_external_count > ((AVAILABLE_MEMORY) * 50) / 100)
454 pressure_detected = FALSE;
455
456#if PHANTOM_CACHE_DEBUG
457 sample_period_ghost_counts[sample_period_ghost_counts_indx].pressure_detected = pressure_detected;
458#endif
459 return (pressure_detected);
460}
461
462/*
463 * Restart the current sampling because conditions have changed significantly,
464 * and we don't want to react to old data.
465 *
466 * This function can be called from any thread.
467 */
468void
469vm_phantom_cache_restart_sample(void)
470{
471 pc_need_eval_reset = TRUE;
472}