git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2013 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28
	29	#include <vm/vm_page.h>
	30	#include <vm/vm_object.h>
	31	#include <vm/vm_kern.h>
	32	#include <vm/vm_pageout.h>
	33	#include <vm/vm_phantom_cache.h>
	34	#include <vm/vm_compressor.h>
	35
	36
	37	uint32_t phantom_cache_eval_period_in_msecs = 250;
	38	uint32_t phantom_cache_thrashing_threshold_ssd = 1000;
	39	#if CONFIG_EMBEDDED
	40	uint32_t phantom_cache_thrashing_threshold = 500;
	41	#else
	42	uint32_t phantom_cache_thrashing_threshold = 50;
	43	#endif
	44
	45	/*
	46	* Number of consecutive thrashing periods required before
	47	* vm_phantom_cache_check_pressure() returns true.
	48	*/
	49	#if CONFIG_EMBEDDED
	50	unsigned phantom_cache_contiguous_periods = 4;
	51	#else
	52	unsigned phantom_cache_contiguous_periods = 2;
	53	#endif
	54
	55	clock_sec_t pc_start_of_eval_period_sec = 0;
	56	clock_nsec_t pc_start_of_eval_period_nsec = 0;
	57	boolean_t pc_need_eval_reset = FALSE;
	58
	59	/* One bit per recent sampling period. Bit 0 = current period. */
	60	uint32_t pc_history = 0;
	61
	62	uint32_t sample_period_ghost_added_count = 0;
	63	uint32_t sample_period_ghost_added_count_ssd = 0;
	64	uint32_t sample_period_ghost_found_count = 0;
	65	uint32_t sample_period_ghost_found_count_ssd = 0;
	66
	67	uint32_t vm_phantom_object_id = 1;
	68	#define VM_PHANTOM_OBJECT_ID_AFTER_WRAP 1000000
	69
	70	vm_ghost_t vm_phantom_cache;
	71	uint32_t vm_phantom_cache_nindx = 1;
	72	uint32_t vm_phantom_cache_num_entries = 0;
	73	uint32_t vm_phantom_cache_size;
	74
	75	typedef uint32_t vm_phantom_hash_entry_t;
	76	vm_phantom_hash_entry_t *vm_phantom_cache_hash;
	77	uint32_t vm_phantom_cache_hash_size;
	78	uint32_t vm_ghost_hash_mask; /* Mask for hash function */
	79	uint32_t vm_ghost_bucket_hash; /* Basic bucket hash */
	80
	81
	82	int pg_masks[4] = {
	83	0x1, 0x2, 0x4, 0x8
	84	};
	85
	86
	87	#define vm_phantom_hash(obj_id, offset) (\
	88	( (natural_t)((uintptr_t)obj_id * vm_ghost_bucket_hash) + (offset ^ vm_ghost_bucket_hash)) & vm_ghost_hash_mask)
	89
	90
	91	struct phantom_cache_stats {
	92	uint32_t pcs_wrapped;
	93	uint32_t pcs_added_page_to_entry;
	94	uint32_t pcs_added_new_entry;
	95	uint32_t pcs_replaced_entry;
	96
	97	uint32_t pcs_lookup_found_page_in_cache;
	98	uint32_t pcs_lookup_entry_not_in_cache;
	99	uint32_t pcs_lookup_page_not_in_entry;
	100
	101	uint32_t pcs_updated_phantom_state;
	102	} phantom_cache_stats;
	103
	104
	105
	106	void
	107	vm_phantom_cache_init()
	108	{
	109	unsigned int num_entries;
	110	unsigned int log1;
	111	unsigned int size;
	112
	113	if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
	114	return;
	115	}
	116	#if CONFIG_EMBEDDED
	117	num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 10) / VM_GHOST_PAGES_PER_ENTRY);
	118	#else
	119	num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 4) / VM_GHOST_PAGES_PER_ENTRY);
	120	#endif
	121	vm_phantom_cache_num_entries = 1;
	122
	123	while (vm_phantom_cache_num_entries < num_entries) {
	124	vm_phantom_cache_num_entries <<= 1;
	125	}
	126
	127	/*
	128	* We index this with g_next_index, so don't exceed the width of that bitfield.
	129	*/
	130	if (vm_phantom_cache_num_entries > (1 << VM_GHOST_INDEX_BITS)) {
	131	vm_phantom_cache_num_entries = (1 << VM_GHOST_INDEX_BITS);
	132	}
	133
	134	vm_phantom_cache_size = sizeof(struct vm_ghost) * vm_phantom_cache_num_entries;
	135	vm_phantom_cache_hash_size = sizeof(vm_phantom_hash_entry_t) * vm_phantom_cache_num_entries;
	136
	137	if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache), vm_phantom_cache_size, 0, KMA_KOBJECT \| KMA_PERMANENT, VM_KERN_MEMORY_PHANTOM_CACHE) != KERN_SUCCESS) {
	138	panic("vm_phantom_cache_init: kernel_memory_allocate failed\n");
	139	}
	140	bzero(vm_phantom_cache, vm_phantom_cache_size);
	141
	142	if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache_hash), vm_phantom_cache_hash_size, 0, KMA_KOBJECT \| KMA_PERMANENT, VM_KERN_MEMORY_PHANTOM_CACHE) != KERN_SUCCESS) {
	143	panic("vm_phantom_cache_init: kernel_memory_allocate failed\n");
	144	}
	145	bzero(vm_phantom_cache_hash, vm_phantom_cache_hash_size);
	146
	147
	148	vm_ghost_hash_mask = vm_phantom_cache_num_entries - 1;
	149
	150	/*
	151	* Calculate object_id shift value for hashing algorithm:
	152	* O = log2(sizeof(struct vm_object))
	153	* B = log2(vm_page_bucket_count)
	154	* hash shifts the object_id left by
	155	* B/2 - O
	156	*/
	157	size = vm_phantom_cache_num_entries;
	158	for (log1 = 0; size > 1; log1++) {
	159	size /= 2;
	160	}
	161
	162	vm_ghost_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
	163	vm_ghost_bucket_hash \|= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
	164	vm_ghost_bucket_hash \|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
	165
	166	if (vm_ghost_hash_mask & vm_phantom_cache_num_entries) {
	167	printf("vm_phantom_cache_init: WARNING -- strange page hash\n");
	168	}
	169	}
	170
	171
	172	void
	173	vm_phantom_cache_add_ghost(vm_page_t m)
	174	{
	175	vm_ghost_t vpce;
	176	vm_object_t object;
	177	int ghost_index;
	178	int pg_mask;
	179	boolean_t isSSD = FALSE;
	180	vm_phantom_hash_entry_t ghost_hash_index;
	181
	182	object = VM_PAGE_OBJECT(m);
	183
	184	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	185	vm_object_lock_assert_exclusive(object);
	186
	187	if (vm_phantom_cache_num_entries == 0) {
	188	return;
	189	}
	190
	191	pg_mask = pg_masks[(m->vmp_offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK];
	192
	193	if (object->phantom_object_id == 0) {
	194	vnode_pager_get_isSSD(object->pager, &isSSD);
	195
	196	if (isSSD == TRUE) {
	197	object->phantom_isssd = TRUE;
	198	}
	199
	200	object->phantom_object_id = vm_phantom_object_id++;
	201
	202	if (vm_phantom_object_id == 0) {
	203	vm_phantom_object_id = VM_PHANTOM_OBJECT_ID_AFTER_WRAP;
	204	}
	205	} else {
	206	if ((vpce = vm_phantom_cache_lookup_ghost(m, 0))) {
	207	vpce->g_pages_held \|= pg_mask;
	208
	209	phantom_cache_stats.pcs_added_page_to_entry++;
	210	goto done;
	211	}
	212	}
	213	/*
	214	* if we're here then the vm_ghost_t of this vm_page_t
	215	* is not present in the phantom cache... take the next
	216	* available entry in the LRU first evicting the existing
	217	* entry if we've wrapped the ring
	218	*/
	219	ghost_index = vm_phantom_cache_nindx++;
	220
	221	if (vm_phantom_cache_nindx == vm_phantom_cache_num_entries) {
	222	vm_phantom_cache_nindx = 1;
	223
	224	phantom_cache_stats.pcs_wrapped++;
	225	}
	226	vpce = &vm_phantom_cache[ghost_index];
	227
	228	if (vpce->g_obj_id) {
	229	/*
	230	* we're going to replace an existing entry
	231	* so first remove it from the hash
	232	*/
	233	vm_ghost_t nvpce;
	234
	235	ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset);
	236
	237	nvpce = &vm_phantom_cache[vm_phantom_cache_hash[ghost_hash_index]];
	238
	239	if (nvpce == vpce) {
	240	vm_phantom_cache_hash[ghost_hash_index] = vpce->g_next_index;
	241	} else {
	242	for (;;) {
	243	if (nvpce->g_next_index == 0) {
	244	panic("didn't find ghost in hash\n");
	245	}
	246
	247	if (&vm_phantom_cache[nvpce->g_next_index] == vpce) {
	248	nvpce->g_next_index = vpce->g_next_index;
	249	break;
	250	}
	251	nvpce = &vm_phantom_cache[nvpce->g_next_index];
	252	}
	253	}
	254	phantom_cache_stats.pcs_replaced_entry++;
	255	} else {
	256	phantom_cache_stats.pcs_added_new_entry++;
	257	}
	258
	259	vpce->g_pages_held = pg_mask;
	260	vpce->g_obj_offset = (m->vmp_offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK;
	261	vpce->g_obj_id = object->phantom_object_id;
	262
	263	ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset);
	264	vpce->g_next_index = vm_phantom_cache_hash[ghost_hash_index];
	265	vm_phantom_cache_hash[ghost_hash_index] = ghost_index;
	266
	267	done:
	268	vm_pageout_vminfo.vm_phantom_cache_added_ghost++;
	269
	270	if (object->phantom_isssd) {
	271	OSAddAtomic(1, &sample_period_ghost_added_count_ssd);
	272	} else {
	273	OSAddAtomic(1, &sample_period_ghost_added_count);
	274	}
	275	}
	276
	277
	278	vm_ghost_t
	279	vm_phantom_cache_lookup_ghost(vm_page_t m, uint32_t pg_mask)
	280	{
	281	uint64_t g_obj_offset;
	282	uint32_t g_obj_id;
	283	uint32_t ghost_index;
	284	vm_object_t object;
	285
	286	object = VM_PAGE_OBJECT(m);
	287
	288	if ((g_obj_id = object->phantom_object_id) == 0) {
	289	/*
	290	* no entries in phantom cache for this object
	291	*/
	292	return NULL;
	293	}
	294	g_obj_offset = (m->vmp_offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK;
	295
	296	ghost_index = vm_phantom_cache_hash[vm_phantom_hash(g_obj_id, g_obj_offset)];
	297
	298	while (ghost_index) {
	299	vm_ghost_t vpce;
	300
	301	vpce = &vm_phantom_cache[ghost_index];
	302
	303	if (vpce->g_obj_id == g_obj_id && vpce->g_obj_offset == g_obj_offset) {
	304	if (pg_mask == 0 \|\| (vpce->g_pages_held & pg_mask)) {
	305	phantom_cache_stats.pcs_lookup_found_page_in_cache++;
	306
	307	return vpce;
	308	}
	309	phantom_cache_stats.pcs_lookup_page_not_in_entry++;
	310
	311	return NULL;
	312	}
	313	ghost_index = vpce->g_next_index;
	314	}
	315	phantom_cache_stats.pcs_lookup_entry_not_in_cache++;
	316
	317	return NULL;
	318	}
	319
	320
	321
	322	void
	323	vm_phantom_cache_update(vm_page_t m)
	324	{
	325	int pg_mask;
	326	vm_ghost_t vpce;
	327	vm_object_t object;
	328
	329	object = VM_PAGE_OBJECT(m);
	330
	331	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	332	vm_object_lock_assert_exclusive(object);
	333
	334	if (vm_phantom_cache_num_entries == 0) {
	335	return;
	336	}
	337
	338	pg_mask = pg_masks[(m->vmp_offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK];
	339
	340	if ((vpce = vm_phantom_cache_lookup_ghost(m, pg_mask))) {
	341	vpce->g_pages_held &= ~pg_mask;
	342
	343	phantom_cache_stats.pcs_updated_phantom_state++;
	344	vm_pageout_vminfo.vm_phantom_cache_found_ghost++;
	345
	346	if (object->phantom_isssd) {
	347	OSAddAtomic(1, &sample_period_ghost_found_count_ssd);
	348	} else {
	349	OSAddAtomic(1, &sample_period_ghost_found_count);
	350	}
	351	}
	352	}
	353
	354
	355	#define PHANTOM_CACHE_DEBUG 1
	356
	357	#if PHANTOM_CACHE_DEBUG
	358
	359	int sample_period_ghost_counts_indx = 0;
	360
	361	struct {
	362	uint32_t added;
	363	uint32_t found;
	364	uint32_t added_ssd;
	365	uint32_t found_ssd;
	366	uint32_t elapsed_ms;
	367	boolean_t pressure_detected;
	368	} sample_period_ghost_counts[256];
	369
	370	#endif
	371
	372	/*
	373	* Determine if the file cache is thrashing from sampling interval statistics.
	374	*
	375	* Pages added to the phantom cache = pages evicted from the file cache.
	376	* Pages found in the phantom cache = reads of pages that were recently evicted.
	377	* Threshold is the latency-dependent number of reads we consider thrashing.
	378	*/
	379	static boolean_t
	380	is_thrashing(uint32_t added, uint32_t found, uint32_t threshold)
	381	{
	382	/* Ignore normal activity below the threshold. */
	383	if (added < threshold \|\| found < threshold) {
	384	return FALSE;
	385	}
	386
	387	/*
	388	* When thrashing in a way that we can mitigate, most of the pages read
	389	* into the file cache were recently evicted, and 'found' will be close
	390	* to 'added'.
	391	*
	392	* When replacing the current working set because a new app is
	393	* launched, we see very high read traffic with sporadic phantom cache
	394	* hits.
	395	*
	396	* This is not thrashing, or freeing up memory wouldn't help much
	397	* anyway.
	398	*/
	399	if (found < added / 2) {
	400	return FALSE;
	401	}
	402
	403	return TRUE;
	404	}
	405
	406	/*
	407	* the following function is never called
	408	* from multiple threads simultaneously due
	409	* to a condition variable used to serialize
	410	* at the compressor level... thus no need
	411	* to provide locking for the sample processing
	412	*/
	413	boolean_t
	414	vm_phantom_cache_check_pressure()
	415	{
	416	clock_sec_t cur_ts_sec;
	417	clock_nsec_t cur_ts_nsec;
	418	uint64_t elapsed_msecs_in_eval;
	419	boolean_t pressure_detected = FALSE;
	420
	421	clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec);
	422
	423	elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, pc_start_of_eval_period_sec, pc_start_of_eval_period_nsec);
	424
	425	/*
	426	* Reset evaluation period after phantom_cache_eval_period_in_msecs or
	427	* whenever vm_phantom_cache_restart_sample has been called.
	428	*/
	429	if (elapsed_msecs_in_eval >= phantom_cache_eval_period_in_msecs) {
	430	pc_need_eval_reset = TRUE;
	431	}
	432
	433	if (pc_need_eval_reset == TRUE) {
	434	#if PHANTOM_CACHE_DEBUG
	435	/*
	436	* maintain some info about the last 256 sample periods
	437	*/
	438	sample_period_ghost_counts[sample_period_ghost_counts_indx].added = sample_period_ghost_added_count;
	439	sample_period_ghost_counts[sample_period_ghost_counts_indx].found = sample_period_ghost_found_count;
	440	sample_period_ghost_counts[sample_period_ghost_counts_indx].added_ssd = sample_period_ghost_added_count_ssd;
	441	sample_period_ghost_counts[sample_period_ghost_counts_indx].found_ssd = sample_period_ghost_found_count_ssd;
	442	sample_period_ghost_counts[sample_period_ghost_counts_indx].elapsed_ms = (uint32_t)elapsed_msecs_in_eval;
	443
	444	sample_period_ghost_counts_indx++;
	445
	446	if (sample_period_ghost_counts_indx >= 256) {
	447	sample_period_ghost_counts_indx = 0;
	448	}
	449	#endif
	450	sample_period_ghost_added_count = 0;
	451	sample_period_ghost_found_count = 0;
	452	sample_period_ghost_added_count_ssd = 0;
	453	sample_period_ghost_found_count_ssd = 0;
	454
	455	pc_start_of_eval_period_sec = cur_ts_sec;
	456	pc_start_of_eval_period_nsec = cur_ts_nsec;
	457	pc_history <<= 1;
	458	pc_need_eval_reset = FALSE;
	459	} else {
	460	/*
	461	* Since the trashing rate is really a function of the read latency of the disk
	462	* we have to consider both the SSD and spinning disk case since the file cache
	463	* could be backed by either or even both flavors. When the object is first
	464	* assigned a phantom_object_id, we query the pager to determine if the backing
	465	* backing media is an SSD and remember that answer in the vm_object. We use
	466	* that info to maintains counts for both the SSD and spinning disk cases.
	467	*/
	468	if (is_thrashing(sample_period_ghost_added_count,
	469	sample_period_ghost_found_count,
	470	phantom_cache_thrashing_threshold) \|\|
	471	is_thrashing(sample_period_ghost_added_count_ssd,
	472	sample_period_ghost_found_count_ssd,
	473	phantom_cache_thrashing_threshold_ssd)) {
	474	/* Thrashing in the current period: Set bit 0. */
	475	pc_history \|= 1;
	476	}
	477	}
	478
	479	/*
	480	* Declare pressure_detected after phantom_cache_contiguous_periods.
	481	*
	482	* Create a bitmask with the N low bits set. These bits must all be set
	483	* in pc_history. The high bits of pc_history are ignored.
	484	*/
	485	uint32_t bitmask = (1u << phantom_cache_contiguous_periods) - 1;
	486	if ((pc_history & bitmask) == bitmask) {
	487	pressure_detected = TRUE;
	488	}
	489
	490	if (vm_page_external_count > ((AVAILABLE_MEMORY) * 50) / 100) {
	491	pressure_detected = FALSE;
	492	}
	493
	494	#if PHANTOM_CACHE_DEBUG
	495	sample_period_ghost_counts[sample_period_ghost_counts_indx].pressure_detected = pressure_detected;
	496	#endif
	497	return pressure_detected;
	498	}
	499
	500	/*
	501	* Restart the current sampling because conditions have changed significantly,
	502	* and we don't want to react to old data.
	503	*
	504	* This function can be called from any thread.
	505	*/
	506	void
	507	vm_phantom_cache_restart_sample(void)
	508	{
	509	pc_need_eval_reset = TRUE;
	510	}