2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <vm/vm_page.h>
30 #include <vm/vm_object.h>
31 #include <vm/vm_kern.h>
32 #include <vm/vm_pageout.h>
33 #include <vm/vm_phantom_cache.h>
34 #include <vm/vm_compressor.h>
37 uint32_t phantom_cache_eval_period_in_msecs
= 250;
38 uint32_t phantom_cache_thrashing_threshold_ssd
= 1000;
40 uint32_t phantom_cache_thrashing_threshold
= 500;
42 uint32_t phantom_cache_thrashing_threshold
= 50;
46 * Number of consecutive thrashing periods required before
47 * vm_phantom_cache_check_pressure() returns true.
50 unsigned phantom_cache_contiguous_periods
= 4;
52 unsigned phantom_cache_contiguous_periods
= 2;
55 clock_sec_t pc_start_of_eval_period_sec
= 0;
56 clock_nsec_t pc_start_of_eval_period_nsec
= 0;
57 boolean_t pc_need_eval_reset
= FALSE
;
59 /* One bit per recent sampling period. Bit 0 = current period. */
60 uint32_t pc_history
= 0;
62 uint32_t sample_period_ghost_added_count
= 0;
63 uint32_t sample_period_ghost_added_count_ssd
= 0;
64 uint32_t sample_period_ghost_found_count
= 0;
65 uint32_t sample_period_ghost_found_count_ssd
= 0;
67 uint32_t vm_phantom_object_id
= 1;
68 #define VM_PHANTOM_OBJECT_ID_AFTER_WRAP 1000000
70 vm_ghost_t vm_phantom_cache
;
71 uint32_t vm_phantom_cache_nindx
= 1;
72 uint32_t vm_phantom_cache_num_entries
= 0;
73 uint32_t vm_phantom_cache_size
;
75 typedef uint32_t vm_phantom_hash_entry_t
;
76 vm_phantom_hash_entry_t
*vm_phantom_cache_hash
;
77 uint32_t vm_phantom_cache_hash_size
;
78 uint32_t vm_ghost_hash_mask
; /* Mask for hash function */
79 uint32_t vm_ghost_bucket_hash
; /* Basic bucket hash */
87 #define vm_phantom_hash(obj_id, offset) (\
88 ( (natural_t)((uintptr_t)obj_id * vm_ghost_bucket_hash) + (offset ^ vm_ghost_bucket_hash)) & vm_ghost_hash_mask)
91 struct phantom_cache_stats
{
93 uint32_t pcs_added_page_to_entry
;
94 uint32_t pcs_added_new_entry
;
95 uint32_t pcs_replaced_entry
;
97 uint32_t pcs_lookup_found_page_in_cache
;
98 uint32_t pcs_lookup_entry_not_in_cache
;
99 uint32_t pcs_lookup_page_not_in_entry
;
101 uint32_t pcs_updated_phantom_state
;
102 } phantom_cache_stats
;
107 vm_phantom_cache_init()
109 unsigned int num_entries
;
113 if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE
) {
117 num_entries
= (uint32_t)(((max_mem
/ PAGE_SIZE
) / 10) / VM_GHOST_PAGES_PER_ENTRY
);
119 num_entries
= (uint32_t)(((max_mem
/ PAGE_SIZE
) / 4) / VM_GHOST_PAGES_PER_ENTRY
);
121 vm_phantom_cache_num_entries
= 1;
123 while (vm_phantom_cache_num_entries
< num_entries
) {
124 vm_phantom_cache_num_entries
<<= 1;
128 * We index this with g_next_index, so don't exceed the width of that bitfield.
130 if (vm_phantom_cache_num_entries
> (1 << VM_GHOST_INDEX_BITS
)) {
131 vm_phantom_cache_num_entries
= (1 << VM_GHOST_INDEX_BITS
);
134 vm_phantom_cache_size
= sizeof(struct vm_ghost
) * vm_phantom_cache_num_entries
;
135 vm_phantom_cache_hash_size
= sizeof(vm_phantom_hash_entry_t
) * vm_phantom_cache_num_entries
;
137 if (kernel_memory_allocate(kernel_map
, (vm_offset_t
*)(&vm_phantom_cache
), vm_phantom_cache_size
, 0, KMA_KOBJECT
| KMA_PERMANENT
, VM_KERN_MEMORY_PHANTOM_CACHE
) != KERN_SUCCESS
) {
138 panic("vm_phantom_cache_init: kernel_memory_allocate failed\n");
140 bzero(vm_phantom_cache
, vm_phantom_cache_size
);
142 if (kernel_memory_allocate(kernel_map
, (vm_offset_t
*)(&vm_phantom_cache_hash
), vm_phantom_cache_hash_size
, 0, KMA_KOBJECT
| KMA_PERMANENT
, VM_KERN_MEMORY_PHANTOM_CACHE
) != KERN_SUCCESS
) {
143 panic("vm_phantom_cache_init: kernel_memory_allocate failed\n");
145 bzero(vm_phantom_cache_hash
, vm_phantom_cache_hash_size
);
148 vm_ghost_hash_mask
= vm_phantom_cache_num_entries
- 1;
151 * Calculate object_id shift value for hashing algorithm:
152 * O = log2(sizeof(struct vm_object))
153 * B = log2(vm_page_bucket_count)
154 * hash shifts the object_id left by
157 size
= vm_phantom_cache_num_entries
;
158 for (log1
= 0; size
> 1; log1
++) {
162 vm_ghost_bucket_hash
= 1 << ((log1
+ 1) >> 1); /* Get (ceiling of sqrt of table size) */
163 vm_ghost_bucket_hash
|= 1 << ((log1
+ 1) >> 2); /* Get (ceiling of quadroot of table size) */
164 vm_ghost_bucket_hash
|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
166 if (vm_ghost_hash_mask
& vm_phantom_cache_num_entries
) {
167 printf("vm_phantom_cache_init: WARNING -- strange page hash\n");
173 vm_phantom_cache_add_ghost(vm_page_t m
)
179 boolean_t isSSD
= FALSE
;
180 vm_phantom_hash_entry_t ghost_hash_index
;
182 object
= VM_PAGE_OBJECT(m
);
184 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
185 vm_object_lock_assert_exclusive(object
);
187 if (vm_phantom_cache_num_entries
== 0) {
191 pg_mask
= pg_masks
[(m
->vmp_offset
>> PAGE_SHIFT
) & VM_GHOST_PAGE_MASK
];
193 if (object
->phantom_object_id
== 0) {
194 vnode_pager_get_isSSD(object
->pager
, &isSSD
);
197 object
->phantom_isssd
= TRUE
;
200 object
->phantom_object_id
= vm_phantom_object_id
++;
202 if (vm_phantom_object_id
== 0) {
203 vm_phantom_object_id
= VM_PHANTOM_OBJECT_ID_AFTER_WRAP
;
206 if ((vpce
= vm_phantom_cache_lookup_ghost(m
, 0))) {
207 vpce
->g_pages_held
|= pg_mask
;
209 phantom_cache_stats
.pcs_added_page_to_entry
++;
214 * if we're here then the vm_ghost_t of this vm_page_t
215 * is not present in the phantom cache... take the next
216 * available entry in the LRU first evicting the existing
217 * entry if we've wrapped the ring
219 ghost_index
= vm_phantom_cache_nindx
++;
221 if (vm_phantom_cache_nindx
== vm_phantom_cache_num_entries
) {
222 vm_phantom_cache_nindx
= 1;
224 phantom_cache_stats
.pcs_wrapped
++;
226 vpce
= &vm_phantom_cache
[ghost_index
];
228 if (vpce
->g_obj_id
) {
230 * we're going to replace an existing entry
231 * so first remove it from the hash
235 ghost_hash_index
= vm_phantom_hash(vpce
->g_obj_id
, vpce
->g_obj_offset
);
237 nvpce
= &vm_phantom_cache
[vm_phantom_cache_hash
[ghost_hash_index
]];
240 vm_phantom_cache_hash
[ghost_hash_index
] = vpce
->g_next_index
;
243 if (nvpce
->g_next_index
== 0) {
244 panic("didn't find ghost in hash\n");
247 if (&vm_phantom_cache
[nvpce
->g_next_index
] == vpce
) {
248 nvpce
->g_next_index
= vpce
->g_next_index
;
251 nvpce
= &vm_phantom_cache
[nvpce
->g_next_index
];
254 phantom_cache_stats
.pcs_replaced_entry
++;
256 phantom_cache_stats
.pcs_added_new_entry
++;
259 vpce
->g_pages_held
= pg_mask
;
260 vpce
->g_obj_offset
= (m
->vmp_offset
>> (PAGE_SHIFT
+ VM_GHOST_PAGE_SHIFT
)) & VM_GHOST_OFFSET_MASK
;
261 vpce
->g_obj_id
= object
->phantom_object_id
;
263 ghost_hash_index
= vm_phantom_hash(vpce
->g_obj_id
, vpce
->g_obj_offset
);
264 vpce
->g_next_index
= vm_phantom_cache_hash
[ghost_hash_index
];
265 vm_phantom_cache_hash
[ghost_hash_index
] = ghost_index
;
268 vm_pageout_vminfo
.vm_phantom_cache_added_ghost
++;
270 if (object
->phantom_isssd
) {
271 OSAddAtomic(1, &sample_period_ghost_added_count_ssd
);
273 OSAddAtomic(1, &sample_period_ghost_added_count
);
279 vm_phantom_cache_lookup_ghost(vm_page_t m
, uint32_t pg_mask
)
281 uint64_t g_obj_offset
;
283 uint32_t ghost_index
;
286 object
= VM_PAGE_OBJECT(m
);
288 if ((g_obj_id
= object
->phantom_object_id
) == 0) {
290 * no entries in phantom cache for this object
294 g_obj_offset
= (m
->vmp_offset
>> (PAGE_SHIFT
+ VM_GHOST_PAGE_SHIFT
)) & VM_GHOST_OFFSET_MASK
;
296 ghost_index
= vm_phantom_cache_hash
[vm_phantom_hash(g_obj_id
, g_obj_offset
)];
298 while (ghost_index
) {
301 vpce
= &vm_phantom_cache
[ghost_index
];
303 if (vpce
->g_obj_id
== g_obj_id
&& vpce
->g_obj_offset
== g_obj_offset
) {
304 if (pg_mask
== 0 || (vpce
->g_pages_held
& pg_mask
)) {
305 phantom_cache_stats
.pcs_lookup_found_page_in_cache
++;
309 phantom_cache_stats
.pcs_lookup_page_not_in_entry
++;
313 ghost_index
= vpce
->g_next_index
;
315 phantom_cache_stats
.pcs_lookup_entry_not_in_cache
++;
323 vm_phantom_cache_update(vm_page_t m
)
329 object
= VM_PAGE_OBJECT(m
);
331 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
332 vm_object_lock_assert_exclusive(object
);
334 if (vm_phantom_cache_num_entries
== 0) {
338 pg_mask
= pg_masks
[(m
->vmp_offset
>> PAGE_SHIFT
) & VM_GHOST_PAGE_MASK
];
340 if ((vpce
= vm_phantom_cache_lookup_ghost(m
, pg_mask
))) {
341 vpce
->g_pages_held
&= ~pg_mask
;
343 phantom_cache_stats
.pcs_updated_phantom_state
++;
344 vm_pageout_vminfo
.vm_phantom_cache_found_ghost
++;
346 if (object
->phantom_isssd
) {
347 OSAddAtomic(1, &sample_period_ghost_found_count_ssd
);
349 OSAddAtomic(1, &sample_period_ghost_found_count
);
355 #define PHANTOM_CACHE_DEBUG 1
357 #if PHANTOM_CACHE_DEBUG
359 int sample_period_ghost_counts_indx
= 0;
367 boolean_t pressure_detected
;
368 } sample_period_ghost_counts
[256];
373 * Determine if the file cache is thrashing from sampling interval statistics.
375 * Pages added to the phantom cache = pages evicted from the file cache.
376 * Pages found in the phantom cache = reads of pages that were recently evicted.
377 * Threshold is the latency-dependent number of reads we consider thrashing.
380 is_thrashing(uint32_t added
, uint32_t found
, uint32_t threshold
)
382 /* Ignore normal activity below the threshold. */
383 if (added
< threshold
|| found
< threshold
) {
388 * When thrashing in a way that we can mitigate, most of the pages read
389 * into the file cache were recently evicted, and 'found' will be close
392 * When replacing the current working set because a new app is
393 * launched, we see very high read traffic with sporadic phantom cache
396 * This is not thrashing, or freeing up memory wouldn't help much
399 if (found
< added
/ 2) {
407 * the following function is never called
408 * from multiple threads simultaneously due
409 * to a condition variable used to serialize
410 * at the compressor level... thus no need
411 * to provide locking for the sample processing
414 vm_phantom_cache_check_pressure()
416 clock_sec_t cur_ts_sec
;
417 clock_nsec_t cur_ts_nsec
;
418 uint64_t elapsed_msecs_in_eval
;
419 boolean_t pressure_detected
= FALSE
;
421 clock_get_system_nanotime(&cur_ts_sec
, &cur_ts_nsec
);
423 elapsed_msecs_in_eval
= vm_compressor_compute_elapsed_msecs(cur_ts_sec
, cur_ts_nsec
, pc_start_of_eval_period_sec
, pc_start_of_eval_period_nsec
);
426 * Reset evaluation period after phantom_cache_eval_period_in_msecs or
427 * whenever vm_phantom_cache_restart_sample has been called.
429 if (elapsed_msecs_in_eval
>= phantom_cache_eval_period_in_msecs
) {
430 pc_need_eval_reset
= TRUE
;
433 if (pc_need_eval_reset
== TRUE
) {
434 #if PHANTOM_CACHE_DEBUG
436 * maintain some info about the last 256 sample periods
438 sample_period_ghost_counts
[sample_period_ghost_counts_indx
].added
= sample_period_ghost_added_count
;
439 sample_period_ghost_counts
[sample_period_ghost_counts_indx
].found
= sample_period_ghost_found_count
;
440 sample_period_ghost_counts
[sample_period_ghost_counts_indx
].added_ssd
= sample_period_ghost_added_count_ssd
;
441 sample_period_ghost_counts
[sample_period_ghost_counts_indx
].found_ssd
= sample_period_ghost_found_count_ssd
;
442 sample_period_ghost_counts
[sample_period_ghost_counts_indx
].elapsed_ms
= (uint32_t)elapsed_msecs_in_eval
;
444 sample_period_ghost_counts_indx
++;
446 if (sample_period_ghost_counts_indx
>= 256) {
447 sample_period_ghost_counts_indx
= 0;
450 sample_period_ghost_added_count
= 0;
451 sample_period_ghost_found_count
= 0;
452 sample_period_ghost_added_count_ssd
= 0;
453 sample_period_ghost_found_count_ssd
= 0;
455 pc_start_of_eval_period_sec
= cur_ts_sec
;
456 pc_start_of_eval_period_nsec
= cur_ts_nsec
;
458 pc_need_eval_reset
= FALSE
;
461 * Since the trashing rate is really a function of the read latency of the disk
462 * we have to consider both the SSD and spinning disk case since the file cache
463 * could be backed by either or even both flavors. When the object is first
464 * assigned a phantom_object_id, we query the pager to determine if the backing
465 * backing media is an SSD and remember that answer in the vm_object. We use
466 * that info to maintains counts for both the SSD and spinning disk cases.
468 if (is_thrashing(sample_period_ghost_added_count
,
469 sample_period_ghost_found_count
,
470 phantom_cache_thrashing_threshold
) ||
471 is_thrashing(sample_period_ghost_added_count_ssd
,
472 sample_period_ghost_found_count_ssd
,
473 phantom_cache_thrashing_threshold_ssd
)) {
474 /* Thrashing in the current period: Set bit 0. */
480 * Declare pressure_detected after phantom_cache_contiguous_periods.
482 * Create a bitmask with the N low bits set. These bits must all be set
483 * in pc_history. The high bits of pc_history are ignored.
485 uint32_t bitmask
= (1u << phantom_cache_contiguous_periods
) - 1;
486 if ((pc_history
& bitmask
) == bitmask
) {
487 pressure_detected
= TRUE
;
490 if (vm_page_external_count
> ((AVAILABLE_MEMORY
) * 50) / 100) {
491 pressure_detected
= FALSE
;
494 #if PHANTOM_CACHE_DEBUG
495 sample_period_ghost_counts
[sample_period_ghost_counts_indx
].pressure_detected
= pressure_detected
;
497 return pressure_detected
;
501 * Restart the current sampling because conditions have changed significantly,
502 * and we don't want to react to old data.
504 * This function can be called from any thread.
507 vm_phantom_cache_restart_sample(void)
509 pc_need_eval_reset
= TRUE
;