]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_phantom_cache.c
xnu-6153.11.26.tar.gz
[apple/xnu.git] / osfmk / vm / vm_phantom_cache.c
CommitLineData
fe8ab488
A
1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
0a7de745 5 *
fe8ab488
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
0a7de745 14 *
fe8ab488
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
0a7de745 17 *
fe8ab488
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
0a7de745 25 *
fe8ab488
A
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <vm/vm_page.h>
30#include <vm/vm_object.h>
31#include <vm/vm_kern.h>
32#include <vm/vm_pageout.h>
33#include <vm/vm_phantom_cache.h>
34#include <vm/vm_compressor.h>
35
36
37uint32_t phantom_cache_eval_period_in_msecs = 250;
38uint32_t phantom_cache_thrashing_threshold_ssd = 1000;
5ba3f43e
A
39#if CONFIG_EMBEDDED
40uint32_t phantom_cache_thrashing_threshold = 500;
41#else
d9a64523 42uint32_t phantom_cache_thrashing_threshold = 50;
5ba3f43e 43#endif
fe8ab488
A
44
45/*
46 * Number of consecutive thrashing periods required before
47 * vm_phantom_cache_check_pressure() returns true.
48 */
5ba3f43e
A
49#if CONFIG_EMBEDDED
50unsigned phantom_cache_contiguous_periods = 4;
51#else
fe8ab488 52unsigned phantom_cache_contiguous_periods = 2;
5ba3f43e 53#endif
fe8ab488 54
0a7de745
A
55clock_sec_t pc_start_of_eval_period_sec = 0;
56clock_nsec_t pc_start_of_eval_period_nsec = 0;
57boolean_t pc_need_eval_reset = FALSE;
fe8ab488
A
58
59/* One bit per recent sampling period. Bit 0 = current period. */
0a7de745 60uint32_t pc_history = 0;
fe8ab488 61
0a7de745
A
62uint32_t sample_period_ghost_added_count = 0;
63uint32_t sample_period_ghost_added_count_ssd = 0;
64uint32_t sample_period_ghost_found_count = 0;
65uint32_t sample_period_ghost_found_count_ssd = 0;
fe8ab488 66
0a7de745
A
67uint32_t vm_phantom_object_id = 1;
68#define VM_PHANTOM_OBJECT_ID_AFTER_WRAP 1000000
fe8ab488 69
0a7de745
A
70vm_ghost_t vm_phantom_cache;
71uint32_t vm_phantom_cache_nindx = 1;
72uint32_t vm_phantom_cache_num_entries = 0;
73uint32_t vm_phantom_cache_size;
fe8ab488 74
0a7de745
A
75typedef uint32_t vm_phantom_hash_entry_t;
76vm_phantom_hash_entry_t *vm_phantom_cache_hash;
77uint32_t vm_phantom_cache_hash_size;
78uint32_t vm_ghost_hash_mask; /* Mask for hash function */
79uint32_t vm_ghost_bucket_hash; /* Basic bucket hash */
fe8ab488
A
80
81
82int pg_masks[4] = {
83 0x1, 0x2, 0x4, 0x8
84};
85
86
87#define vm_phantom_hash(obj_id, offset) (\
0a7de745 88 ( (natural_t)((uintptr_t)obj_id * vm_ghost_bucket_hash) + (offset ^ vm_ghost_bucket_hash)) & vm_ghost_hash_mask)
fe8ab488
A
89
90
91struct phantom_cache_stats {
0a7de745
A
92 uint32_t pcs_wrapped;
93 uint32_t pcs_added_page_to_entry;
94 uint32_t pcs_added_new_entry;
95 uint32_t pcs_replaced_entry;
fe8ab488 96
0a7de745
A
97 uint32_t pcs_lookup_found_page_in_cache;
98 uint32_t pcs_lookup_entry_not_in_cache;
99 uint32_t pcs_lookup_page_not_in_entry;
fe8ab488 100
0a7de745 101 uint32_t pcs_updated_phantom_state;
fe8ab488
A
102} phantom_cache_stats;
103
104
d9a64523 105
fe8ab488
A
106void
107vm_phantom_cache_init()
108{
0a7de745
A
109 unsigned int num_entries;
110 unsigned int log1;
111 unsigned int size;
fe8ab488 112
0a7de745 113 if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
39037602 114 return;
0a7de745 115 }
5ba3f43e
A
116#if CONFIG_EMBEDDED
117 num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 10) / VM_GHOST_PAGES_PER_ENTRY);
118#else
fe8ab488 119 num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 4) / VM_GHOST_PAGES_PER_ENTRY);
5ba3f43e 120#endif
fe8ab488
A
121 vm_phantom_cache_num_entries = 1;
122
0a7de745 123 while (vm_phantom_cache_num_entries < num_entries) {
fe8ab488 124 vm_phantom_cache_num_entries <<= 1;
0a7de745
A
125 }
126
127 /*
128 * We index this with g_next_index, so don't exceed the width of that bitfield.
129 */
130 if (vm_phantom_cache_num_entries > (1 << VM_GHOST_INDEX_BITS)) {
131 vm_phantom_cache_num_entries = (1 << VM_GHOST_INDEX_BITS);
132 }
fe8ab488
A
133
134 vm_phantom_cache_size = sizeof(struct vm_ghost) * vm_phantom_cache_num_entries;
135 vm_phantom_cache_hash_size = sizeof(vm_phantom_hash_entry_t) * vm_phantom_cache_num_entries;
136
0a7de745 137 if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache), vm_phantom_cache_size, 0, KMA_KOBJECT | KMA_PERMANENT, VM_KERN_MEMORY_PHANTOM_CACHE) != KERN_SUCCESS) {
fe8ab488 138 panic("vm_phantom_cache_init: kernel_memory_allocate failed\n");
0a7de745 139 }
fe8ab488
A
140 bzero(vm_phantom_cache, vm_phantom_cache_size);
141
0a7de745 142 if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache_hash), vm_phantom_cache_hash_size, 0, KMA_KOBJECT | KMA_PERMANENT, VM_KERN_MEMORY_PHANTOM_CACHE) != KERN_SUCCESS) {
fe8ab488 143 panic("vm_phantom_cache_init: kernel_memory_allocate failed\n");
0a7de745 144 }
fe8ab488
A
145 bzero(vm_phantom_cache_hash, vm_phantom_cache_hash_size);
146
147
148 vm_ghost_hash_mask = vm_phantom_cache_num_entries - 1;
149
150 /*
151 * Calculate object_id shift value for hashing algorithm:
152 * O = log2(sizeof(struct vm_object))
153 * B = log2(vm_page_bucket_count)
154 * hash shifts the object_id left by
155 * B/2 - O
156 */
157 size = vm_phantom_cache_num_entries;
0a7de745 158 for (log1 = 0; size > 1; log1++) {
fe8ab488 159 size /= 2;
0a7de745 160 }
fe8ab488 161
0a7de745
A
162 vm_ghost_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
163 vm_ghost_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
164 vm_ghost_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
165
166 if (vm_ghost_hash_mask & vm_phantom_cache_num_entries) {
fe8ab488 167 printf("vm_phantom_cache_init: WARNING -- strange page hash\n");
0a7de745 168 }
fe8ab488
A
169}
170
171
172void
173vm_phantom_cache_add_ghost(vm_page_t m)
174{
0a7de745
A
175 vm_ghost_t vpce;
176 vm_object_t object;
177 int ghost_index;
178 int pg_mask;
179 boolean_t isSSD = FALSE;
fe8ab488
A
180 vm_phantom_hash_entry_t ghost_hash_index;
181
39037602
A
182 object = VM_PAGE_OBJECT(m);
183
184 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
185 vm_object_lock_assert_exclusive(object);
fe8ab488 186
0a7de745 187 if (vm_phantom_cache_num_entries == 0) {
fe8ab488 188 return;
0a7de745
A
189 }
190
d9a64523 191 pg_mask = pg_masks[(m->vmp_offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK];
fe8ab488 192
39037602 193 if (object->phantom_object_id == 0) {
39037602 194 vnode_pager_get_isSSD(object->pager, &isSSD);
fe8ab488 195
0a7de745 196 if (isSSD == TRUE) {
39037602 197 object->phantom_isssd = TRUE;
0a7de745 198 }
fe8ab488 199
39037602 200 object->phantom_object_id = vm_phantom_object_id++;
0a7de745
A
201
202 if (vm_phantom_object_id == 0) {
fe8ab488 203 vm_phantom_object_id = VM_PHANTOM_OBJECT_ID_AFTER_WRAP;
0a7de745 204 }
fe8ab488 205 } else {
0a7de745 206 if ((vpce = vm_phantom_cache_lookup_ghost(m, 0))) {
fe8ab488 207 vpce->g_pages_held |= pg_mask;
0a7de745 208
fe8ab488
A
209 phantom_cache_stats.pcs_added_page_to_entry++;
210 goto done;
211 }
212 }
213 /*
214 * if we're here then the vm_ghost_t of this vm_page_t
215 * is not present in the phantom cache... take the next
216 * available entry in the LRU first evicting the existing
217 * entry if we've wrapped the ring
218 */
219 ghost_index = vm_phantom_cache_nindx++;
220
221 if (vm_phantom_cache_nindx == vm_phantom_cache_num_entries) {
222 vm_phantom_cache_nindx = 1;
223
224 phantom_cache_stats.pcs_wrapped++;
225 }
226 vpce = &vm_phantom_cache[ghost_index];
227
228 if (vpce->g_obj_id) {
229 /*
230 * we're going to replace an existing entry
231 * so first remove it from the hash
232 */
0a7de745 233 vm_ghost_t nvpce;
fe8ab488
A
234
235 ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset);
236
237 nvpce = &vm_phantom_cache[vm_phantom_cache_hash[ghost_hash_index]];
238
239 if (nvpce == vpce) {
240 vm_phantom_cache_hash[ghost_hash_index] = vpce->g_next_index;
241 } else {
242 for (;;) {
0a7de745 243 if (nvpce->g_next_index == 0) {
fe8ab488 244 panic("didn't find ghost in hash\n");
0a7de745 245 }
fe8ab488
A
246
247 if (&vm_phantom_cache[nvpce->g_next_index] == vpce) {
248 nvpce->g_next_index = vpce->g_next_index;
249 break;
250 }
251 nvpce = &vm_phantom_cache[nvpce->g_next_index];
252 }
253 }
254 phantom_cache_stats.pcs_replaced_entry++;
0a7de745 255 } else {
fe8ab488 256 phantom_cache_stats.pcs_added_new_entry++;
0a7de745 257 }
fe8ab488
A
258
259 vpce->g_pages_held = pg_mask;
d9a64523 260 vpce->g_obj_offset = (m->vmp_offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK;
39037602 261 vpce->g_obj_id = object->phantom_object_id;
fe8ab488
A
262
263 ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset);
264 vpce->g_next_index = vm_phantom_cache_hash[ghost_hash_index];
265 vm_phantom_cache_hash[ghost_hash_index] = ghost_index;
266
267done:
d9a64523
A
268 vm_pageout_vminfo.vm_phantom_cache_added_ghost++;
269
0a7de745 270 if (object->phantom_isssd) {
fe8ab488 271 OSAddAtomic(1, &sample_period_ghost_added_count_ssd);
0a7de745 272 } else {
fe8ab488 273 OSAddAtomic(1, &sample_period_ghost_added_count);
0a7de745 274 }
fe8ab488
A
275}
276
277
278vm_ghost_t
279vm_phantom_cache_lookup_ghost(vm_page_t m, uint32_t pg_mask)
280{
0a7de745
A
281 uint64_t g_obj_offset;
282 uint32_t g_obj_id;
283 uint32_t ghost_index;
284 vm_object_t object;
fe8ab488 285
39037602
A
286 object = VM_PAGE_OBJECT(m);
287
288 if ((g_obj_id = object->phantom_object_id) == 0) {
fe8ab488
A
289 /*
290 * no entries in phantom cache for this object
291 */
0a7de745 292 return NULL;
fe8ab488 293 }
d9a64523 294 g_obj_offset = (m->vmp_offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK;
fe8ab488
A
295
296 ghost_index = vm_phantom_cache_hash[vm_phantom_hash(g_obj_id, g_obj_offset)];
297
298 while (ghost_index) {
299 vm_ghost_t vpce;
300
301 vpce = &vm_phantom_cache[ghost_index];
302
303 if (vpce->g_obj_id == g_obj_id && vpce->g_obj_offset == g_obj_offset) {
fe8ab488
A
304 if (pg_mask == 0 || (vpce->g_pages_held & pg_mask)) {
305 phantom_cache_stats.pcs_lookup_found_page_in_cache++;
306
0a7de745 307 return vpce;
fe8ab488
A
308 }
309 phantom_cache_stats.pcs_lookup_page_not_in_entry++;
310
0a7de745 311 return NULL;
fe8ab488
A
312 }
313 ghost_index = vpce->g_next_index;
314 }
315 phantom_cache_stats.pcs_lookup_entry_not_in_cache++;
316
0a7de745 317 return NULL;
fe8ab488
A
318}
319
320
321
322void
323vm_phantom_cache_update(vm_page_t m)
324{
0a7de745 325 int pg_mask;
fe8ab488 326 vm_ghost_t vpce;
0a7de745 327 vm_object_t object;
fe8ab488 328
39037602
A
329 object = VM_PAGE_OBJECT(m);
330
331 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
332 vm_object_lock_assert_exclusive(object);
fe8ab488 333
0a7de745 334 if (vm_phantom_cache_num_entries == 0) {
fe8ab488 335 return;
0a7de745
A
336 }
337
d9a64523 338 pg_mask = pg_masks[(m->vmp_offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK];
fe8ab488 339
0a7de745 340 if ((vpce = vm_phantom_cache_lookup_ghost(m, pg_mask))) {
fe8ab488
A
341 vpce->g_pages_held &= ~pg_mask;
342
343 phantom_cache_stats.pcs_updated_phantom_state++;
d9a64523 344 vm_pageout_vminfo.vm_phantom_cache_found_ghost++;
fe8ab488 345
0a7de745 346 if (object->phantom_isssd) {
fe8ab488 347 OSAddAtomic(1, &sample_period_ghost_found_count_ssd);
0a7de745 348 } else {
fe8ab488 349 OSAddAtomic(1, &sample_period_ghost_found_count);
0a7de745 350 }
fe8ab488
A
351 }
352}
353
354
0a7de745 355#define PHANTOM_CACHE_DEBUG 1
fe8ab488 356
0a7de745 357#if PHANTOM_CACHE_DEBUG
fe8ab488 358
0a7de745 359int sample_period_ghost_counts_indx = 0;
fe8ab488
A
360
361struct {
0a7de745
A
362 uint32_t added;
363 uint32_t found;
364 uint32_t added_ssd;
365 uint32_t found_ssd;
366 uint32_t elapsed_ms;
367 boolean_t pressure_detected;
fe8ab488
A
368} sample_period_ghost_counts[256];
369
370#endif
371
372/*
373 * Determine if the file cache is thrashing from sampling interval statistics.
374 *
375 * Pages added to the phantom cache = pages evicted from the file cache.
376 * Pages found in the phantom cache = reads of pages that were recently evicted.
377 * Threshold is the latency-dependent number of reads we consider thrashing.
378 */
379static boolean_t
380is_thrashing(uint32_t added, uint32_t found, uint32_t threshold)
381{
382 /* Ignore normal activity below the threshold. */
0a7de745 383 if (added < threshold || found < threshold) {
fe8ab488 384 return FALSE;
0a7de745 385 }
fe8ab488
A
386
387 /*
388 * When thrashing in a way that we can mitigate, most of the pages read
389 * into the file cache were recently evicted, and 'found' will be close
390 * to 'added'.
391 *
392 * When replacing the current working set because a new app is
393 * launched, we see very high read traffic with sporadic phantom cache
394 * hits.
395 *
396 * This is not thrashing, or freeing up memory wouldn't help much
397 * anyway.
398 */
0a7de745 399 if (found < added / 2) {
fe8ab488 400 return FALSE;
0a7de745 401 }
fe8ab488
A
402
403 return TRUE;
404}
405
406/*
407 * the following function is never called
408 * from multiple threads simultaneously due
409 * to a condition variable used to serialize
410 * at the compressor level... thus no need
411 * to provide locking for the sample processing
412 */
413boolean_t
414vm_phantom_cache_check_pressure()
415{
0a7de745
A
416 clock_sec_t cur_ts_sec;
417 clock_nsec_t cur_ts_nsec;
418 uint64_t elapsed_msecs_in_eval;
419 boolean_t pressure_detected = FALSE;
fe8ab488
A
420
421 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec);
422
423 elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, pc_start_of_eval_period_sec, pc_start_of_eval_period_nsec);
424
425 /*
426 * Reset evaluation period after phantom_cache_eval_period_in_msecs or
427 * whenever vm_phantom_cache_restart_sample has been called.
428 */
429 if (elapsed_msecs_in_eval >= phantom_cache_eval_period_in_msecs) {
430 pc_need_eval_reset = TRUE;
431 }
432
433 if (pc_need_eval_reset == TRUE) {
fe8ab488
A
434#if PHANTOM_CACHE_DEBUG
435 /*
436 * maintain some info about the last 256 sample periods
437 */
438 sample_period_ghost_counts[sample_period_ghost_counts_indx].added = sample_period_ghost_added_count;
439 sample_period_ghost_counts[sample_period_ghost_counts_indx].found = sample_period_ghost_found_count;
440 sample_period_ghost_counts[sample_period_ghost_counts_indx].added_ssd = sample_period_ghost_added_count_ssd;
441 sample_period_ghost_counts[sample_period_ghost_counts_indx].found_ssd = sample_period_ghost_found_count_ssd;
442 sample_period_ghost_counts[sample_period_ghost_counts_indx].elapsed_ms = (uint32_t)elapsed_msecs_in_eval;
443
444 sample_period_ghost_counts_indx++;
445
0a7de745 446 if (sample_period_ghost_counts_indx >= 256) {
fe8ab488 447 sample_period_ghost_counts_indx = 0;
0a7de745 448 }
fe8ab488
A
449#endif
450 sample_period_ghost_added_count = 0;
451 sample_period_ghost_found_count = 0;
452 sample_period_ghost_added_count_ssd = 0;
453 sample_period_ghost_found_count_ssd = 0;
454
455 pc_start_of_eval_period_sec = cur_ts_sec;
456 pc_start_of_eval_period_nsec = cur_ts_nsec;
457 pc_history <<= 1;
458 pc_need_eval_reset = FALSE;
459 } else {
460 /*
461 * Since the trashing rate is really a function of the read latency of the disk
462 * we have to consider both the SSD and spinning disk case since the file cache
463 * could be backed by either or even both flavors. When the object is first
464 * assigned a phantom_object_id, we query the pager to determine if the backing
465 * backing media is an SSD and remember that answer in the vm_object. We use
466 * that info to maintains counts for both the SSD and spinning disk cases.
467 */
468 if (is_thrashing(sample_period_ghost_added_count,
0a7de745
A
469 sample_period_ghost_found_count,
470 phantom_cache_thrashing_threshold) ||
fe8ab488 471 is_thrashing(sample_period_ghost_added_count_ssd,
0a7de745
A
472 sample_period_ghost_found_count_ssd,
473 phantom_cache_thrashing_threshold_ssd)) {
fe8ab488
A
474 /* Thrashing in the current period: Set bit 0. */
475 pc_history |= 1;
476 }
477 }
478
479 /*
480 * Declare pressure_detected after phantom_cache_contiguous_periods.
481 *
482 * Create a bitmask with the N low bits set. These bits must all be set
483 * in pc_history. The high bits of pc_history are ignored.
484 */
485 uint32_t bitmask = (1u << phantom_cache_contiguous_periods) - 1;
0a7de745 486 if ((pc_history & bitmask) == bitmask) {
fe8ab488 487 pressure_detected = TRUE;
0a7de745 488 }
fe8ab488 489
0a7de745 490 if (vm_page_external_count > ((AVAILABLE_MEMORY) * 50) / 100) {
fe8ab488 491 pressure_detected = FALSE;
0a7de745 492 }
fe8ab488
A
493
494#if PHANTOM_CACHE_DEBUG
495 sample_period_ghost_counts[sample_period_ghost_counts_indx].pressure_detected = pressure_detected;
496#endif
0a7de745 497 return pressure_detected;
fe8ab488
A
498}
499
500/*
501 * Restart the current sampling because conditions have changed significantly,
502 * and we don't want to react to old data.
503 *
504 * This function can be called from any thread.
505 */
506void
507vm_phantom_cache_restart_sample(void)
508{
509 pc_need_eval_reset = TRUE;
510}