2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <vm/vm_page.h>
30 #include <vm/vm_object.h>
31 #include <vm/vm_kern.h>
32 #include <vm/vm_pageout.h>
33 #include <vm/vm_phantom_cache.h>
34 #include <vm/vm_compressor.h>
37 uint32_t phantom_cache_eval_period_in_msecs
= 250;
38 uint32_t phantom_cache_thrashing_threshold_ssd
= 1000;
40 uint32_t phantom_cache_thrashing_threshold
= 500;
42 uint32_t phantom_cache_thrashing_threshold
= 100;
46 * Number of consecutive thrashing periods required before
47 * vm_phantom_cache_check_pressure() returns true.
50 unsigned phantom_cache_contiguous_periods
= 4;
52 unsigned phantom_cache_contiguous_periods
= 2;
55 clock_sec_t pc_start_of_eval_period_sec
= 0;
56 clock_nsec_t pc_start_of_eval_period_nsec
= 0;
57 boolean_t pc_need_eval_reset
= FALSE
;
59 /* One bit per recent sampling period. Bit 0 = current period. */
60 uint32_t pc_history
= 0;
62 uint32_t sample_period_ghost_added_count
= 0;
63 uint32_t sample_period_ghost_added_count_ssd
= 0;
64 uint32_t sample_period_ghost_found_count
= 0;
65 uint32_t sample_period_ghost_found_count_ssd
= 0;
67 uint32_t vm_phantom_object_id
= 1;
68 #define VM_PHANTOM_OBJECT_ID_AFTER_WRAP 1000000
70 vm_ghost_t vm_phantom_cache
;
71 uint32_t vm_phantom_cache_nindx
= 1;
72 uint32_t vm_phantom_cache_num_entries
= 0;
73 uint32_t vm_phantom_cache_size
;
75 typedef uint32_t vm_phantom_hash_entry_t
;
76 vm_phantom_hash_entry_t
*vm_phantom_cache_hash
;
77 uint32_t vm_phantom_cache_hash_size
;
78 uint32_t vm_ghost_hash_mask
; /* Mask for hash function */
79 uint32_t vm_ghost_bucket_hash
; /* Basic bucket hash */
87 #define vm_phantom_hash(obj_id, offset) (\
88 ( (natural_t)((uintptr_t)obj_id * vm_ghost_bucket_hash) + (offset ^ vm_ghost_bucket_hash)) & vm_ghost_hash_mask)
91 struct phantom_cache_stats
{
93 uint32_t pcs_added_page_to_entry
;
94 uint32_t pcs_added_new_entry
;
95 uint32_t pcs_replaced_entry
;
97 uint32_t pcs_lookup_found_page_in_cache
;
98 uint32_t pcs_lookup_entry_not_in_cache
;
99 uint32_t pcs_lookup_page_not_in_entry
;
101 uint32_t pcs_updated_phantom_state
;
102 } phantom_cache_stats
;
106 vm_phantom_cache_init()
108 unsigned int num_entries
;
112 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE
)
115 num_entries
= (uint32_t)(((max_mem
/ PAGE_SIZE
) / 10) / VM_GHOST_PAGES_PER_ENTRY
);
117 num_entries
= (uint32_t)(((max_mem
/ PAGE_SIZE
) / 4) / VM_GHOST_PAGES_PER_ENTRY
);
119 vm_phantom_cache_num_entries
= 1;
121 while (vm_phantom_cache_num_entries
< num_entries
)
122 vm_phantom_cache_num_entries
<<= 1;
124 vm_phantom_cache_size
= sizeof(struct vm_ghost
) * vm_phantom_cache_num_entries
;
125 vm_phantom_cache_hash_size
= sizeof(vm_phantom_hash_entry_t
) * vm_phantom_cache_num_entries
;
127 if (kernel_memory_allocate(kernel_map
, (vm_offset_t
*)(&vm_phantom_cache
), vm_phantom_cache_size
, 0, KMA_KOBJECT
| KMA_PERMANENT
, VM_KERN_MEMORY_PHANTOM_CACHE
) != KERN_SUCCESS
)
128 panic("vm_phantom_cache_init: kernel_memory_allocate failed\n");
129 bzero(vm_phantom_cache
, vm_phantom_cache_size
);
131 if (kernel_memory_allocate(kernel_map
, (vm_offset_t
*)(&vm_phantom_cache_hash
), vm_phantom_cache_hash_size
, 0, KMA_KOBJECT
| KMA_PERMANENT
, VM_KERN_MEMORY_PHANTOM_CACHE
) != KERN_SUCCESS
)
132 panic("vm_phantom_cache_init: kernel_memory_allocate failed\n");
133 bzero(vm_phantom_cache_hash
, vm_phantom_cache_hash_size
);
136 vm_ghost_hash_mask
= vm_phantom_cache_num_entries
- 1;
139 * Calculate object_id shift value for hashing algorithm:
140 * O = log2(sizeof(struct vm_object))
141 * B = log2(vm_page_bucket_count)
142 * hash shifts the object_id left by
145 size
= vm_phantom_cache_num_entries
;
146 for (log1
= 0; size
> 1; log1
++)
149 vm_ghost_bucket_hash
= 1 << ((log1
+ 1) >> 1); /* Get (ceiling of sqrt of table size) */
150 vm_ghost_bucket_hash
|= 1 << ((log1
+ 1) >> 2); /* Get (ceiling of quadroot of table size) */
151 vm_ghost_bucket_hash
|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
153 if (vm_ghost_hash_mask
& vm_phantom_cache_num_entries
)
154 printf("vm_phantom_cache_init: WARNING -- strange page hash\n");
159 vm_phantom_cache_add_ghost(vm_page_t m
)
165 boolean_t isSSD
= FALSE
;
166 vm_phantom_hash_entry_t ghost_hash_index
;
168 object
= VM_PAGE_OBJECT(m
);
170 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
171 vm_object_lock_assert_exclusive(object
);
173 if (vm_phantom_cache_num_entries
== 0)
176 pg_mask
= pg_masks
[(m
->offset
>> PAGE_SHIFT
) & VM_GHOST_PAGE_MASK
];
178 if (object
->phantom_object_id
== 0) {
180 vnode_pager_get_isSSD(object
->pager
, &isSSD
);
183 object
->phantom_isssd
= TRUE
;
185 object
->phantom_object_id
= vm_phantom_object_id
++;
187 if (vm_phantom_object_id
== 0)
188 vm_phantom_object_id
= VM_PHANTOM_OBJECT_ID_AFTER_WRAP
;
190 if ( (vpce
= vm_phantom_cache_lookup_ghost(m
, 0)) ) {
191 vpce
->g_pages_held
|= pg_mask
;
193 phantom_cache_stats
.pcs_added_page_to_entry
++;
198 * if we're here then the vm_ghost_t of this vm_page_t
199 * is not present in the phantom cache... take the next
200 * available entry in the LRU first evicting the existing
201 * entry if we've wrapped the ring
203 ghost_index
= vm_phantom_cache_nindx
++;
205 if (vm_phantom_cache_nindx
== vm_phantom_cache_num_entries
) {
206 vm_phantom_cache_nindx
= 1;
208 phantom_cache_stats
.pcs_wrapped
++;
210 vpce
= &vm_phantom_cache
[ghost_index
];
212 if (vpce
->g_obj_id
) {
214 * we're going to replace an existing entry
215 * so first remove it from the hash
219 ghost_hash_index
= vm_phantom_hash(vpce
->g_obj_id
, vpce
->g_obj_offset
);
221 nvpce
= &vm_phantom_cache
[vm_phantom_cache_hash
[ghost_hash_index
]];
224 vm_phantom_cache_hash
[ghost_hash_index
] = vpce
->g_next_index
;
227 if (nvpce
->g_next_index
== 0)
228 panic("didn't find ghost in hash\n");
230 if (&vm_phantom_cache
[nvpce
->g_next_index
] == vpce
) {
231 nvpce
->g_next_index
= vpce
->g_next_index
;
234 nvpce
= &vm_phantom_cache
[nvpce
->g_next_index
];
237 phantom_cache_stats
.pcs_replaced_entry
++;
239 phantom_cache_stats
.pcs_added_new_entry
++;
241 vpce
->g_pages_held
= pg_mask
;
242 vpce
->g_obj_offset
= (m
->offset
>> (PAGE_SHIFT
+ VM_GHOST_PAGE_SHIFT
)) & VM_GHOST_OFFSET_MASK
;
243 vpce
->g_obj_id
= object
->phantom_object_id
;
245 ghost_hash_index
= vm_phantom_hash(vpce
->g_obj_id
, vpce
->g_obj_offset
);
246 vpce
->g_next_index
= vm_phantom_cache_hash
[ghost_hash_index
];
247 vm_phantom_cache_hash
[ghost_hash_index
] = ghost_index
;
250 if (object
->phantom_isssd
)
251 OSAddAtomic(1, &sample_period_ghost_added_count_ssd
);
253 OSAddAtomic(1, &sample_period_ghost_added_count
);
258 vm_phantom_cache_lookup_ghost(vm_page_t m
, uint32_t pg_mask
)
260 uint64_t g_obj_offset
;
262 uint32_t ghost_index
;
265 object
= VM_PAGE_OBJECT(m
);
267 if ((g_obj_id
= object
->phantom_object_id
) == 0) {
269 * no entries in phantom cache for this object
273 g_obj_offset
= (m
->offset
>> (PAGE_SHIFT
+ VM_GHOST_PAGE_SHIFT
)) & VM_GHOST_OFFSET_MASK
;
275 ghost_index
= vm_phantom_cache_hash
[vm_phantom_hash(g_obj_id
, g_obj_offset
)];
277 while (ghost_index
) {
280 vpce
= &vm_phantom_cache
[ghost_index
];
282 if (vpce
->g_obj_id
== g_obj_id
&& vpce
->g_obj_offset
== g_obj_offset
) {
284 if (pg_mask
== 0 || (vpce
->g_pages_held
& pg_mask
)) {
285 phantom_cache_stats
.pcs_lookup_found_page_in_cache
++;
289 phantom_cache_stats
.pcs_lookup_page_not_in_entry
++;
293 ghost_index
= vpce
->g_next_index
;
295 phantom_cache_stats
.pcs_lookup_entry_not_in_cache
++;
303 vm_phantom_cache_update(vm_page_t m
)
309 object
= VM_PAGE_OBJECT(m
);
311 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
312 vm_object_lock_assert_exclusive(object
);
314 if (vm_phantom_cache_num_entries
== 0)
317 pg_mask
= pg_masks
[(m
->offset
>> PAGE_SHIFT
) & VM_GHOST_PAGE_MASK
];
319 if ( (vpce
= vm_phantom_cache_lookup_ghost(m
, pg_mask
)) ) {
321 vpce
->g_pages_held
&= ~pg_mask
;
323 phantom_cache_stats
.pcs_updated_phantom_state
++;
325 if (object
->phantom_isssd
)
326 OSAddAtomic(1, &sample_period_ghost_found_count_ssd
);
328 OSAddAtomic(1, &sample_period_ghost_found_count
);
333 #define PHANTOM_CACHE_DEBUG 1
335 #if PHANTOM_CACHE_DEBUG
337 int sample_period_ghost_counts_indx
= 0;
345 boolean_t pressure_detected
;
346 } sample_period_ghost_counts
[256];
351 * Determine if the file cache is thrashing from sampling interval statistics.
353 * Pages added to the phantom cache = pages evicted from the file cache.
354 * Pages found in the phantom cache = reads of pages that were recently evicted.
355 * Threshold is the latency-dependent number of reads we consider thrashing.
358 is_thrashing(uint32_t added
, uint32_t found
, uint32_t threshold
)
360 /* Ignore normal activity below the threshold. */
361 if (added
< threshold
|| found
< threshold
)
365 * When thrashing in a way that we can mitigate, most of the pages read
366 * into the file cache were recently evicted, and 'found' will be close
369 * When replacing the current working set because a new app is
370 * launched, we see very high read traffic with sporadic phantom cache
373 * This is not thrashing, or freeing up memory wouldn't help much
376 if (found
< added
/ 2)
383 * the following function is never called
384 * from multiple threads simultaneously due
385 * to a condition variable used to serialize
386 * at the compressor level... thus no need
387 * to provide locking for the sample processing
390 vm_phantom_cache_check_pressure()
392 clock_sec_t cur_ts_sec
;
393 clock_nsec_t cur_ts_nsec
;
394 uint64_t elapsed_msecs_in_eval
;
395 boolean_t pressure_detected
= FALSE
;
397 clock_get_system_nanotime(&cur_ts_sec
, &cur_ts_nsec
);
399 elapsed_msecs_in_eval
= vm_compressor_compute_elapsed_msecs(cur_ts_sec
, cur_ts_nsec
, pc_start_of_eval_period_sec
, pc_start_of_eval_period_nsec
);
402 * Reset evaluation period after phantom_cache_eval_period_in_msecs or
403 * whenever vm_phantom_cache_restart_sample has been called.
405 if (elapsed_msecs_in_eval
>= phantom_cache_eval_period_in_msecs
) {
406 pc_need_eval_reset
= TRUE
;
409 if (pc_need_eval_reset
== TRUE
) {
411 #if PHANTOM_CACHE_DEBUG
413 * maintain some info about the last 256 sample periods
415 sample_period_ghost_counts
[sample_period_ghost_counts_indx
].added
= sample_period_ghost_added_count
;
416 sample_period_ghost_counts
[sample_period_ghost_counts_indx
].found
= sample_period_ghost_found_count
;
417 sample_period_ghost_counts
[sample_period_ghost_counts_indx
].added_ssd
= sample_period_ghost_added_count_ssd
;
418 sample_period_ghost_counts
[sample_period_ghost_counts_indx
].found_ssd
= sample_period_ghost_found_count_ssd
;
419 sample_period_ghost_counts
[sample_period_ghost_counts_indx
].elapsed_ms
= (uint32_t)elapsed_msecs_in_eval
;
421 sample_period_ghost_counts_indx
++;
423 if (sample_period_ghost_counts_indx
>= 256)
424 sample_period_ghost_counts_indx
= 0;
426 sample_period_ghost_added_count
= 0;
427 sample_period_ghost_found_count
= 0;
428 sample_period_ghost_added_count_ssd
= 0;
429 sample_period_ghost_found_count_ssd
= 0;
431 pc_start_of_eval_period_sec
= cur_ts_sec
;
432 pc_start_of_eval_period_nsec
= cur_ts_nsec
;
434 pc_need_eval_reset
= FALSE
;
437 * Since the trashing rate is really a function of the read latency of the disk
438 * we have to consider both the SSD and spinning disk case since the file cache
439 * could be backed by either or even both flavors. When the object is first
440 * assigned a phantom_object_id, we query the pager to determine if the backing
441 * backing media is an SSD and remember that answer in the vm_object. We use
442 * that info to maintains counts for both the SSD and spinning disk cases.
444 if (is_thrashing(sample_period_ghost_added_count
,
445 sample_period_ghost_found_count
,
446 phantom_cache_thrashing_threshold
) ||
447 is_thrashing(sample_period_ghost_added_count_ssd
,
448 sample_period_ghost_found_count_ssd
,
449 phantom_cache_thrashing_threshold_ssd
)) {
450 /* Thrashing in the current period: Set bit 0. */
456 * Declare pressure_detected after phantom_cache_contiguous_periods.
458 * Create a bitmask with the N low bits set. These bits must all be set
459 * in pc_history. The high bits of pc_history are ignored.
461 uint32_t bitmask
= (1u << phantom_cache_contiguous_periods
) - 1;
462 if ((pc_history
& bitmask
) == bitmask
)
463 pressure_detected
= TRUE
;
465 if (vm_page_external_count
> ((AVAILABLE_MEMORY
) * 50) / 100)
466 pressure_detected
= FALSE
;
468 #if PHANTOM_CACHE_DEBUG
469 sample_period_ghost_counts
[sample_period_ghost_counts_indx
].pressure_detected
= pressure_detected
;
471 return (pressure_detected
);
475 * Restart the current sampling because conditions have changed significantly,
476 * and we don't want to react to old data.
478 * This function can be called from any thread.
481 vm_phantom_cache_restart_sample(void)
483 pc_need_eval_reset
= TRUE
;