]> git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/vm/vm_phantom_cache.c
xnu-6153.11.26.tar.gz
[apple/xnu.git] / osfmk / vm / vm_phantom_cache.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <vm/vm_page.h>
30#include <vm/vm_object.h>
31#include <vm/vm_kern.h>
32#include <vm/vm_pageout.h>
33#include <vm/vm_phantom_cache.h>
34#include <vm/vm_compressor.h>
35
36
37uint32_t phantom_cache_eval_period_in_msecs = 250;
38uint32_t phantom_cache_thrashing_threshold_ssd = 1000;
39#if CONFIG_EMBEDDED
40uint32_t phantom_cache_thrashing_threshold = 500;
41#else
42uint32_t phantom_cache_thrashing_threshold = 50;
43#endif
44
45/*
46 * Number of consecutive thrashing periods required before
47 * vm_phantom_cache_check_pressure() returns true.
48 */
49#if CONFIG_EMBEDDED
50unsigned phantom_cache_contiguous_periods = 4;
51#else
52unsigned phantom_cache_contiguous_periods = 2;
53#endif
54
55clock_sec_t pc_start_of_eval_period_sec = 0;
56clock_nsec_t pc_start_of_eval_period_nsec = 0;
57boolean_t pc_need_eval_reset = FALSE;
58
59/* One bit per recent sampling period. Bit 0 = current period. */
60uint32_t pc_history = 0;
61
62uint32_t sample_period_ghost_added_count = 0;
63uint32_t sample_period_ghost_added_count_ssd = 0;
64uint32_t sample_period_ghost_found_count = 0;
65uint32_t sample_period_ghost_found_count_ssd = 0;
66
67uint32_t vm_phantom_object_id = 1;
68#define VM_PHANTOM_OBJECT_ID_AFTER_WRAP 1000000
69
70vm_ghost_t vm_phantom_cache;
71uint32_t vm_phantom_cache_nindx = 1;
72uint32_t vm_phantom_cache_num_entries = 0;
73uint32_t vm_phantom_cache_size;
74
75typedef uint32_t vm_phantom_hash_entry_t;
76vm_phantom_hash_entry_t *vm_phantom_cache_hash;
77uint32_t vm_phantom_cache_hash_size;
78uint32_t vm_ghost_hash_mask; /* Mask for hash function */
79uint32_t vm_ghost_bucket_hash; /* Basic bucket hash */
80
81
82int pg_masks[4] = {
83 0x1, 0x2, 0x4, 0x8
84};
85
86
87#define vm_phantom_hash(obj_id, offset) (\
88 ( (natural_t)((uintptr_t)obj_id * vm_ghost_bucket_hash) + (offset ^ vm_ghost_bucket_hash)) & vm_ghost_hash_mask)
89
90
91struct phantom_cache_stats {
92 uint32_t pcs_wrapped;
93 uint32_t pcs_added_page_to_entry;
94 uint32_t pcs_added_new_entry;
95 uint32_t pcs_replaced_entry;
96
97 uint32_t pcs_lookup_found_page_in_cache;
98 uint32_t pcs_lookup_entry_not_in_cache;
99 uint32_t pcs_lookup_page_not_in_entry;
100
101 uint32_t pcs_updated_phantom_state;
102} phantom_cache_stats;
103
104
105
106void
107vm_phantom_cache_init()
108{
109 unsigned int num_entries;
110 unsigned int log1;
111 unsigned int size;
112
113 if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
114 return;
115 }
116#if CONFIG_EMBEDDED
117 num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 10) / VM_GHOST_PAGES_PER_ENTRY);
118#else
119 num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 4) / VM_GHOST_PAGES_PER_ENTRY);
120#endif
121 vm_phantom_cache_num_entries = 1;
122
123 while (vm_phantom_cache_num_entries < num_entries) {
124 vm_phantom_cache_num_entries <<= 1;
125 }
126
127 /*
128 * We index this with g_next_index, so don't exceed the width of that bitfield.
129 */
130 if (vm_phantom_cache_num_entries > (1 << VM_GHOST_INDEX_BITS)) {
131 vm_phantom_cache_num_entries = (1 << VM_GHOST_INDEX_BITS);
132 }
133
134 vm_phantom_cache_size = sizeof(struct vm_ghost) * vm_phantom_cache_num_entries;
135 vm_phantom_cache_hash_size = sizeof(vm_phantom_hash_entry_t) * vm_phantom_cache_num_entries;
136
137 if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache), vm_phantom_cache_size, 0, KMA_KOBJECT | KMA_PERMANENT, VM_KERN_MEMORY_PHANTOM_CACHE) != KERN_SUCCESS) {
138 panic("vm_phantom_cache_init: kernel_memory_allocate failed\n");
139 }
140 bzero(vm_phantom_cache, vm_phantom_cache_size);
141
142 if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache_hash), vm_phantom_cache_hash_size, 0, KMA_KOBJECT | KMA_PERMANENT, VM_KERN_MEMORY_PHANTOM_CACHE) != KERN_SUCCESS) {
143 panic("vm_phantom_cache_init: kernel_memory_allocate failed\n");
144 }
145 bzero(vm_phantom_cache_hash, vm_phantom_cache_hash_size);
146
147
148 vm_ghost_hash_mask = vm_phantom_cache_num_entries - 1;
149
150 /*
151 * Calculate object_id shift value for hashing algorithm:
152 * O = log2(sizeof(struct vm_object))
153 * B = log2(vm_page_bucket_count)
154 * hash shifts the object_id left by
155 * B/2 - O
156 */
157 size = vm_phantom_cache_num_entries;
158 for (log1 = 0; size > 1; log1++) {
159 size /= 2;
160 }
161
162 vm_ghost_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
163 vm_ghost_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
164 vm_ghost_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
165
166 if (vm_ghost_hash_mask & vm_phantom_cache_num_entries) {
167 printf("vm_phantom_cache_init: WARNING -- strange page hash\n");
168 }
169}
170
171
172void
173vm_phantom_cache_add_ghost(vm_page_t m)
174{
175 vm_ghost_t vpce;
176 vm_object_t object;
177 int ghost_index;
178 int pg_mask;
179 boolean_t isSSD = FALSE;
180 vm_phantom_hash_entry_t ghost_hash_index;
181
182 object = VM_PAGE_OBJECT(m);
183
184 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
185 vm_object_lock_assert_exclusive(object);
186
187 if (vm_phantom_cache_num_entries == 0) {
188 return;
189 }
190
191 pg_mask = pg_masks[(m->vmp_offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK];
192
193 if (object->phantom_object_id == 0) {
194 vnode_pager_get_isSSD(object->pager, &isSSD);
195
196 if (isSSD == TRUE) {
197 object->phantom_isssd = TRUE;
198 }
199
200 object->phantom_object_id = vm_phantom_object_id++;
201
202 if (vm_phantom_object_id == 0) {
203 vm_phantom_object_id = VM_PHANTOM_OBJECT_ID_AFTER_WRAP;
204 }
205 } else {
206 if ((vpce = vm_phantom_cache_lookup_ghost(m, 0))) {
207 vpce->g_pages_held |= pg_mask;
208
209 phantom_cache_stats.pcs_added_page_to_entry++;
210 goto done;
211 }
212 }
213 /*
214 * if we're here then the vm_ghost_t of this vm_page_t
215 * is not present in the phantom cache... take the next
216 * available entry in the LRU first evicting the existing
217 * entry if we've wrapped the ring
218 */
219 ghost_index = vm_phantom_cache_nindx++;
220
221 if (vm_phantom_cache_nindx == vm_phantom_cache_num_entries) {
222 vm_phantom_cache_nindx = 1;
223
224 phantom_cache_stats.pcs_wrapped++;
225 }
226 vpce = &vm_phantom_cache[ghost_index];
227
228 if (vpce->g_obj_id) {
229 /*
230 * we're going to replace an existing entry
231 * so first remove it from the hash
232 */
233 vm_ghost_t nvpce;
234
235 ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset);
236
237 nvpce = &vm_phantom_cache[vm_phantom_cache_hash[ghost_hash_index]];
238
239 if (nvpce == vpce) {
240 vm_phantom_cache_hash[ghost_hash_index] = vpce->g_next_index;
241 } else {
242 for (;;) {
243 if (nvpce->g_next_index == 0) {
244 panic("didn't find ghost in hash\n");
245 }
246
247 if (&vm_phantom_cache[nvpce->g_next_index] == vpce) {
248 nvpce->g_next_index = vpce->g_next_index;
249 break;
250 }
251 nvpce = &vm_phantom_cache[nvpce->g_next_index];
252 }
253 }
254 phantom_cache_stats.pcs_replaced_entry++;
255 } else {
256 phantom_cache_stats.pcs_added_new_entry++;
257 }
258
259 vpce->g_pages_held = pg_mask;
260 vpce->g_obj_offset = (m->vmp_offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK;
261 vpce->g_obj_id = object->phantom_object_id;
262
263 ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset);
264 vpce->g_next_index = vm_phantom_cache_hash[ghost_hash_index];
265 vm_phantom_cache_hash[ghost_hash_index] = ghost_index;
266
267done:
268 vm_pageout_vminfo.vm_phantom_cache_added_ghost++;
269
270 if (object->phantom_isssd) {
271 OSAddAtomic(1, &sample_period_ghost_added_count_ssd);
272 } else {
273 OSAddAtomic(1, &sample_period_ghost_added_count);
274 }
275}
276
277
278vm_ghost_t
279vm_phantom_cache_lookup_ghost(vm_page_t m, uint32_t pg_mask)
280{
281 uint64_t g_obj_offset;
282 uint32_t g_obj_id;
283 uint32_t ghost_index;
284 vm_object_t object;
285
286 object = VM_PAGE_OBJECT(m);
287
288 if ((g_obj_id = object->phantom_object_id) == 0) {
289 /*
290 * no entries in phantom cache for this object
291 */
292 return NULL;
293 }
294 g_obj_offset = (m->vmp_offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK;
295
296 ghost_index = vm_phantom_cache_hash[vm_phantom_hash(g_obj_id, g_obj_offset)];
297
298 while (ghost_index) {
299 vm_ghost_t vpce;
300
301 vpce = &vm_phantom_cache[ghost_index];
302
303 if (vpce->g_obj_id == g_obj_id && vpce->g_obj_offset == g_obj_offset) {
304 if (pg_mask == 0 || (vpce->g_pages_held & pg_mask)) {
305 phantom_cache_stats.pcs_lookup_found_page_in_cache++;
306
307 return vpce;
308 }
309 phantom_cache_stats.pcs_lookup_page_not_in_entry++;
310
311 return NULL;
312 }
313 ghost_index = vpce->g_next_index;
314 }
315 phantom_cache_stats.pcs_lookup_entry_not_in_cache++;
316
317 return NULL;
318}
319
320
321
322void
323vm_phantom_cache_update(vm_page_t m)
324{
325 int pg_mask;
326 vm_ghost_t vpce;
327 vm_object_t object;
328
329 object = VM_PAGE_OBJECT(m);
330
331 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
332 vm_object_lock_assert_exclusive(object);
333
334 if (vm_phantom_cache_num_entries == 0) {
335 return;
336 }
337
338 pg_mask = pg_masks[(m->vmp_offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK];
339
340 if ((vpce = vm_phantom_cache_lookup_ghost(m, pg_mask))) {
341 vpce->g_pages_held &= ~pg_mask;
342
343 phantom_cache_stats.pcs_updated_phantom_state++;
344 vm_pageout_vminfo.vm_phantom_cache_found_ghost++;
345
346 if (object->phantom_isssd) {
347 OSAddAtomic(1, &sample_period_ghost_found_count_ssd);
348 } else {
349 OSAddAtomic(1, &sample_period_ghost_found_count);
350 }
351 }
352}
353
354
355#define PHANTOM_CACHE_DEBUG 1
356
357#if PHANTOM_CACHE_DEBUG
358
359int sample_period_ghost_counts_indx = 0;
360
361struct {
362 uint32_t added;
363 uint32_t found;
364 uint32_t added_ssd;
365 uint32_t found_ssd;
366 uint32_t elapsed_ms;
367 boolean_t pressure_detected;
368} sample_period_ghost_counts[256];
369
370#endif
371
372/*
373 * Determine if the file cache is thrashing from sampling interval statistics.
374 *
375 * Pages added to the phantom cache = pages evicted from the file cache.
376 * Pages found in the phantom cache = reads of pages that were recently evicted.
377 * Threshold is the latency-dependent number of reads we consider thrashing.
378 */
379static boolean_t
380is_thrashing(uint32_t added, uint32_t found, uint32_t threshold)
381{
382 /* Ignore normal activity below the threshold. */
383 if (added < threshold || found < threshold) {
384 return FALSE;
385 }
386
387 /*
388 * When thrashing in a way that we can mitigate, most of the pages read
389 * into the file cache were recently evicted, and 'found' will be close
390 * to 'added'.
391 *
392 * When replacing the current working set because a new app is
393 * launched, we see very high read traffic with sporadic phantom cache
394 * hits.
395 *
396 * This is not thrashing, or freeing up memory wouldn't help much
397 * anyway.
398 */
399 if (found < added / 2) {
400 return FALSE;
401 }
402
403 return TRUE;
404}
405
406/*
407 * the following function is never called
408 * from multiple threads simultaneously due
409 * to a condition variable used to serialize
410 * at the compressor level... thus no need
411 * to provide locking for the sample processing
412 */
413boolean_t
414vm_phantom_cache_check_pressure()
415{
416 clock_sec_t cur_ts_sec;
417 clock_nsec_t cur_ts_nsec;
418 uint64_t elapsed_msecs_in_eval;
419 boolean_t pressure_detected = FALSE;
420
421 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec);
422
423 elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, pc_start_of_eval_period_sec, pc_start_of_eval_period_nsec);
424
425 /*
426 * Reset evaluation period after phantom_cache_eval_period_in_msecs or
427 * whenever vm_phantom_cache_restart_sample has been called.
428 */
429 if (elapsed_msecs_in_eval >= phantom_cache_eval_period_in_msecs) {
430 pc_need_eval_reset = TRUE;
431 }
432
433 if (pc_need_eval_reset == TRUE) {
434#if PHANTOM_CACHE_DEBUG
435 /*
436 * maintain some info about the last 256 sample periods
437 */
438 sample_period_ghost_counts[sample_period_ghost_counts_indx].added = sample_period_ghost_added_count;
439 sample_period_ghost_counts[sample_period_ghost_counts_indx].found = sample_period_ghost_found_count;
440 sample_period_ghost_counts[sample_period_ghost_counts_indx].added_ssd = sample_period_ghost_added_count_ssd;
441 sample_period_ghost_counts[sample_period_ghost_counts_indx].found_ssd = sample_period_ghost_found_count_ssd;
442 sample_period_ghost_counts[sample_period_ghost_counts_indx].elapsed_ms = (uint32_t)elapsed_msecs_in_eval;
443
444 sample_period_ghost_counts_indx++;
445
446 if (sample_period_ghost_counts_indx >= 256) {
447 sample_period_ghost_counts_indx = 0;
448 }
449#endif
450 sample_period_ghost_added_count = 0;
451 sample_period_ghost_found_count = 0;
452 sample_period_ghost_added_count_ssd = 0;
453 sample_period_ghost_found_count_ssd = 0;
454
455 pc_start_of_eval_period_sec = cur_ts_sec;
456 pc_start_of_eval_period_nsec = cur_ts_nsec;
457 pc_history <<= 1;
458 pc_need_eval_reset = FALSE;
459 } else {
460 /*
461 * Since the trashing rate is really a function of the read latency of the disk
462 * we have to consider both the SSD and spinning disk case since the file cache
463 * could be backed by either or even both flavors. When the object is first
464 * assigned a phantom_object_id, we query the pager to determine if the backing
465 * backing media is an SSD and remember that answer in the vm_object. We use
466 * that info to maintains counts for both the SSD and spinning disk cases.
467 */
468 if (is_thrashing(sample_period_ghost_added_count,
469 sample_period_ghost_found_count,
470 phantom_cache_thrashing_threshold) ||
471 is_thrashing(sample_period_ghost_added_count_ssd,
472 sample_period_ghost_found_count_ssd,
473 phantom_cache_thrashing_threshold_ssd)) {
474 /* Thrashing in the current period: Set bit 0. */
475 pc_history |= 1;
476 }
477 }
478
479 /*
480 * Declare pressure_detected after phantom_cache_contiguous_periods.
481 *
482 * Create a bitmask with the N low bits set. These bits must all be set
483 * in pc_history. The high bits of pc_history are ignored.
484 */
485 uint32_t bitmask = (1u << phantom_cache_contiguous_periods) - 1;
486 if ((pc_history & bitmask) == bitmask) {
487 pressure_detected = TRUE;
488 }
489
490 if (vm_page_external_count > ((AVAILABLE_MEMORY) * 50) / 100) {
491 pressure_detected = FALSE;
492 }
493
494#if PHANTOM_CACHE_DEBUG
495 sample_period_ghost_counts[sample_period_ghost_counts_indx].pressure_detected = pressure_detected;
496#endif
497 return pressure_detected;
498}
499
500/*
501 * Restart the current sampling because conditions have changed significantly,
502 * and we don't want to react to old data.
503 *
504 * This function can be called from any thread.
505 */
506void
507vm_phantom_cache_restart_sample(void)
508{
509 pc_need_eval_reset = TRUE;
510}