git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2009 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*
	29	* @OSF_COPYRIGHT@
	30	*/
	31	/*
	32	* Mach Operating System
	33	* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
	34	* All Rights Reserved.
	35	*
	36	* Permission to use, copy, modify and distribute this software and its
	37	* documentation is hereby granted, provided that both the copyright
	38	* notice and this permission notice appear in all copies of the
	39	* software, derivative works or modified versions, and any portions
	40	* thereof, and that both notices appear in supporting documentation.
	41	*
	42	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	43	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
	44	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	45	*
	46	* Carnegie Mellon requests users of this software to return to
	47	*
	48	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	49	* School of Computer Science
	50	* Carnegie Mellon University
	51	* Pittsburgh PA 15213-3890
	52	*
	53	* any improvements or extensions that they make and grant Carnegie Mellon
	54	* the rights to redistribute these changes.
	55	*/
	56	/*
	57	*/
	58	/*
	59	* File: vm/vm_page.c
	60	* Author: Avadis Tevanian, Jr., Michael Wayne Young
	61	*
	62	* Resident memory management module.
	63	*/
	64
	65	#include <debug.h>
	66	#include <libkern/OSAtomic.h>
	67
	68	#include <mach/clock_types.h>
	69	#include <mach/vm_prot.h>
	70	#include <mach/vm_statistics.h>
	71	#include <mach/sdt.h>
	72	#include <kern/counters.h>
	73	#include <kern/sched_prim.h>
	74	#include <kern/task.h>
	75	#include <kern/thread.h>
	76	#include <kern/kalloc.h>
	77	#include <kern/zalloc.h>
	78	#include <kern/xpr.h>
	79	#include <vm/pmap.h>
	80	#include <vm/vm_init.h>
	81	#include <vm/vm_map.h>
	82	#include <vm/vm_page.h>
	83	#include <vm/vm_pageout.h>
	84	#include <vm/vm_kern.h> /* kernel_memory_allocate() */
	85	#include <kern/misc_protos.h>
	86	#include <zone_debug.h>
	87	#include <vm/cpm.h>
	88	#include <pexpert/pexpert.h>
	89
	90	#include <vm/vm_protos.h>
	91	#include <vm/memory_object.h>
	92	#include <vm/vm_purgeable_internal.h>
	93
	94	#include <IOKit/IOHibernatePrivate.h>
	95
	96	#include <sys/kdebug.h>
	97
	98	boolean_t hibernate_cleaning_in_progress = FALSE;
	99	boolean_t vm_page_free_verify = TRUE;
	100
	101	uint32_t vm_lopage_free_count = 0;
	102	uint32_t vm_lopage_free_limit = 0;
	103	uint32_t vm_lopage_lowater = 0;
	104	boolean_t vm_lopage_refill = FALSE;
	105	boolean_t vm_lopage_needed = FALSE;
	106
	107	lck_mtx_ext_t vm_page_queue_lock_ext;
	108	lck_mtx_ext_t vm_page_queue_free_lock_ext;
	109	lck_mtx_ext_t vm_purgeable_queue_lock_ext;
	110
	111	int speculative_age_index = 0;
	112	int speculative_steal_index = 0;
	113	struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
	114
	115
	116	__private_extern__ void vm_page_init_lck_grp(void);
	117
	118	static void vm_page_free_prepare(vm_page_t page);
	119	static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
	120
	121
	122
	123
	124	/*
	125	* Associated with page of user-allocatable memory is a
	126	* page structure.
	127	*/
	128
	129	/*
	130	* These variables record the values returned by vm_page_bootstrap,
	131	* for debugging purposes. The implementation of pmap_steal_memory
	132	* and pmap_startup here also uses them internally.
	133	*/
	134
	135	vm_offset_t virtual_space_start;
	136	vm_offset_t virtual_space_end;
	137	uint32_t vm_page_pages;
	138
	139	/*
	140	* The vm_page_lookup() routine, which provides for fast
	141	* (virtual memory object, offset) to page lookup, employs
	142	* the following hash table. The vm_page_{insert,remove}
	143	* routines install and remove associations in the table.
	144	* [This table is often called the virtual-to-physical,
	145	* or VP, table.]
	146	*/
	147	typedef struct {
	148	vm_page_t pages;
	149	#if MACH_PAGE_HASH_STATS
	150	int cur_count; /* current count */
	151	int hi_count; /* high water mark */
	152	#endif /* MACH_PAGE_HASH_STATS */
	153	} vm_page_bucket_t;
	154
	155
	156	#define BUCKETS_PER_LOCK 16
	157
	158	vm_page_bucket_t vm_page_buckets; / Array of buckets */
	159	unsigned int vm_page_bucket_count = 0; /* How big is array? */
	160	unsigned int vm_page_hash_mask; /* Mask for hash function */
	161	unsigned int vm_page_hash_shift; /* Shift for hash function */
	162	uint32_t vm_page_bucket_hash; /* Basic bucket hash */
	163	unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
	164
	165	lck_spin_t *vm_page_bucket_locks;
	166
	167
	168	#if MACH_PAGE_HASH_STATS
	169	/* This routine is only for debug. It is intended to be called by
	170	* hand by a developer using a kernel debugger. This routine prints
	171	* out vm_page_hash table statistics to the kernel debug console.
	172	*/
	173	void
	174	hash_debug(void)
	175	{
	176	int i;
	177	int numbuckets = 0;
	178	int highsum = 0;
	179	int maxdepth = 0;
	180
	181	for (i = 0; i < vm_page_bucket_count; i++) {
	182	if (vm_page_buckets[i].hi_count) {
	183	numbuckets++;
	184	highsum += vm_page_buckets[i].hi_count;
	185	if (vm_page_buckets[i].hi_count > maxdepth)
	186	maxdepth = vm_page_buckets[i].hi_count;
	187	}
	188	}
	189	printf("Total number of buckets: %d\n", vm_page_bucket_count);
	190	printf("Number used buckets: %d = %d%%\n",
	191	numbuckets, 100*numbuckets/vm_page_bucket_count);
	192	printf("Number unused buckets: %d = %d%%\n",
	193	vm_page_bucket_count - numbuckets,
	194	100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
	195	printf("Sum of bucket max depth: %d\n", highsum);
	196	printf("Average bucket depth: %d.%2d\n",
	197	highsum/vm_page_bucket_count,
	198	highsum%vm_page_bucket_count);
	199	printf("Maximum bucket depth: %d\n", maxdepth);
	200	}
	201	#endif /* MACH_PAGE_HASH_STATS */
	202
	203	/*
	204	* The virtual page size is currently implemented as a runtime
	205	* variable, but is constant once initialized using vm_set_page_size.
	206	* This initialization must be done in the machine-dependent
	207	* bootstrap sequence, before calling other machine-independent
	208	* initializations.
	209	*
	210	* All references to the virtual page size outside this
	211	* module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
	212	* constants.
	213	*/
	214	vm_size_t page_size = PAGE_SIZE;
	215	vm_size_t page_mask = PAGE_MASK;
	216	int page_shift = PAGE_SHIFT;
	217
	218	/*
	219	* Resident page structures are initialized from
	220	* a template (see vm_page_alloc).
	221	*
	222	* When adding a new field to the virtual memory
	223	* object structure, be sure to add initialization
	224	* (see vm_page_bootstrap).
	225	*/
	226	struct vm_page vm_page_template;
	227
	228	vm_page_t vm_pages = VM_PAGE_NULL;
	229	unsigned int vm_pages_count = 0;
	230	ppnum_t vm_page_lowest = 0;
	231
	232	/*
	233	* Resident pages that represent real memory
	234	* are allocated from a set of free lists,
	235	* one per color.
	236	*/
	237	unsigned int vm_colors;
	238	unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
	239	unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
	240	queue_head_t vm_page_queue_free[MAX_COLORS];
	241	unsigned int vm_page_free_wanted;
	242	unsigned int vm_page_free_wanted_privileged;
	243	unsigned int vm_page_free_count;
	244	unsigned int vm_page_fictitious_count;
	245
	246	unsigned int vm_page_free_count_minimum; /* debugging */
	247
	248	/*
	249	* Occasionally, the virtual memory system uses
	250	* resident page structures that do not refer to
	251	* real pages, for example to leave a page with
	252	* important state information in the VP table.
	253	*
	254	* These page structures are allocated the way
	255	* most other kernel structures are.
	256	*/
	257	zone_t vm_page_zone;
	258	vm_locks_array_t vm_page_locks;
	259	decl_lck_mtx_data(,vm_page_alloc_lock)
	260	lck_mtx_ext_t vm_page_alloc_lock_ext;
	261
	262	unsigned int io_throttle_zero_fill;
	263
	264	unsigned int vm_page_local_q_count = 0;
	265	unsigned int vm_page_local_q_soft_limit = 250;
	266	unsigned int vm_page_local_q_hard_limit = 500;
	267	struct vplq *vm_page_local_q = NULL;
	268
	269	/* N.B. Guard and fictitious pages must not
	270	* be assigned a zero phys_page value.
	271	*/
	272	/*
	273	* Fictitious pages don't have a physical address,
	274	* but we must initialize phys_page to something.
	275	* For debugging, this should be a strange value
	276	* that the pmap module can recognize in assertions.
	277	*/
	278	ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
	279
	280	/*
	281	* Guard pages are not accessible so they don't
	282	* need a physical address, but we need to enter
	283	* one in the pmap.
	284	* Let's make it recognizable and make sure that
	285	* we don't use a real physical page with that
	286	* physical address.
	287	*/
	288	ppnum_t vm_page_guard_addr = (ppnum_t) -2;
	289
	290	/*
	291	* Resident page structures are also chained on
	292	* queues that are used by the page replacement
	293	* system (pageout daemon). These queues are
	294	* defined here, but are shared by the pageout
	295	* module. The inactive queue is broken into
	296	* inactive and zf for convenience as the
	297	* pageout daemon often assignes a higher
	298	* affinity to zf pages
	299	*/
	300	queue_head_t vm_page_queue_active;
	301	queue_head_t vm_page_queue_inactive;
	302	queue_head_t vm_page_queue_anonymous; /* inactive memory queue for anonymous pages */
	303	queue_head_t vm_page_queue_throttled;
	304
	305	unsigned int vm_page_active_count;
	306	unsigned int vm_page_inactive_count;
	307	unsigned int vm_page_anonymous_count;
	308	unsigned int vm_page_throttled_count;
	309	unsigned int vm_page_speculative_count;
	310	unsigned int vm_page_wire_count;
	311	unsigned int vm_page_wire_count_initial;
	312	unsigned int vm_page_gobble_count = 0;
	313	unsigned int vm_page_wire_count_warning = 0;
	314	unsigned int vm_page_gobble_count_warning = 0;
	315
	316	unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
	317	unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
	318	uint64_t vm_page_purged_count = 0; /* total count of purged pages */
	319
	320	#if DEVELOPMENT \|\| DEBUG
	321	unsigned int vm_page_speculative_recreated = 0;
	322	unsigned int vm_page_speculative_created = 0;
	323	unsigned int vm_page_speculative_used = 0;
	324	#endif
	325
	326	queue_head_t vm_page_queue_cleaned;
	327
	328	unsigned int vm_page_cleaned_count = 0;
	329	unsigned int vm_pageout_enqueued_cleaned = 0;
	330
	331	uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
	332	ppnum_t max_valid_low_ppnum = 0xffffffff;
	333
	334
	335	/*
	336	* Several page replacement parameters are also
	337	* shared with this module, so that page allocation
	338	* (done here in vm_page_alloc) can trigger the
	339	* pageout daemon.
	340	*/
	341	unsigned int vm_page_free_target = 0;
	342	unsigned int vm_page_free_min = 0;
	343	unsigned int vm_page_throttle_limit = 0;
	344	uint32_t vm_page_creation_throttle = 0;
	345	unsigned int vm_page_inactive_target = 0;
	346	unsigned int vm_page_anonymous_min = 0;
	347	unsigned int vm_page_inactive_min = 0;
	348	unsigned int vm_page_free_reserved = 0;
	349	unsigned int vm_page_throttle_count = 0;
	350
	351
	352	/*
	353	* The VM system has a couple of heuristics for deciding
	354	* that pages are "uninteresting" and should be placed
	355	* on the inactive queue as likely candidates for replacement.
	356	* These variables let the heuristics be controlled at run-time
	357	* to make experimentation easier.
	358	*/
	359
	360	boolean_t vm_page_deactivate_hint = TRUE;
	361
	362	struct vm_page_stats_reusable vm_page_stats_reusable;
	363
	364	/*
	365	* vm_set_page_size:
	366	*
	367	* Sets the page size, perhaps based upon the memory
	368	* size. Must be called before any use of page-size
	369	* dependent functions.
	370	*
	371	* Sets page_shift and page_mask from page_size.
	372	*/
	373	void
	374	vm_set_page_size(void)
	375	{
	376	page_mask = page_size - 1;
	377
	378	if ((page_mask & page_size) != 0)
	379	panic("vm_set_page_size: page size not a power of two");
	380
	381	for (page_shift = 0; ; page_shift++)
	382	if ((1U << page_shift) == page_size)
	383	break;
	384	}
	385
	386
	387	/* Called once during statup, once the cache geometry is known.
	388	*/
	389	static void
	390	vm_page_set_colors( void )
	391	{
	392	unsigned int n, override;
	393
	394	if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
	395	n = override;
	396	else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
	397	n = vm_cache_geometry_colors;
	398	else n = DEFAULT_COLORS; /* use default if all else fails */
	399
	400	if ( n == 0 )
	401	n = 1;
	402	if ( n > MAX_COLORS )
	403	n = MAX_COLORS;
	404
	405	/* the count must be a power of 2 */
	406	if ( ( n & (n - 1)) != 0 )
	407	panic("vm_page_set_colors");
	408
	409	vm_colors = n;
	410	vm_color_mask = n - 1;
	411	}
	412
	413
	414	lck_grp_t vm_page_lck_grp_free;
	415	lck_grp_t vm_page_lck_grp_queue;
	416	lck_grp_t vm_page_lck_grp_local;
	417	lck_grp_t vm_page_lck_grp_purge;
	418	lck_grp_t vm_page_lck_grp_alloc;
	419	lck_grp_t vm_page_lck_grp_bucket;
	420	lck_grp_attr_t vm_page_lck_grp_attr;
	421	lck_attr_t vm_page_lck_attr;
	422
	423
	424	__private_extern__ void
	425	vm_page_init_lck_grp(void)
	426	{
	427	/*
	428	* initialze the vm_page lock world
	429	*/
	430	lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
	431	lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
	432	lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
	433	lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
	434	lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
	435	lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
	436	lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
	437	lck_attr_setdefault(&vm_page_lck_attr);
	438	lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
	439	}
	440
	441	void
	442	vm_page_init_local_q()
	443	{
	444	unsigned int num_cpus;
	445	unsigned int i;
	446	struct vplq *t_local_q;
	447
	448	num_cpus = ml_get_max_cpus();
	449
	450	/*
	451	* no point in this for a uni-processor system
	452	*/
	453	if (num_cpus >= 2) {
	454	t_local_q = (struct vplq )kalloc(num_cpus sizeof(struct vplq));
	455
	456	for (i = 0; i < num_cpus; i++) {
	457	struct vpl *lq;
	458
	459	lq = &t_local_q[i].vpl_un.vpl;
	460	VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
	461	queue_init(&lq->vpl_queue);
	462	lq->vpl_count = 0;
	463	}
	464	vm_page_local_q_count = num_cpus;
	465
	466	vm_page_local_q = (struct vplq *)t_local_q;
	467	}
	468	}
	469
	470
	471	/*
	472	* vm_page_bootstrap:
	473	*
	474	* Initializes the resident memory module.
	475	*
	476	* Allocates memory for the page cells, and
	477	* for the object/offset-to-page hash table headers.
	478	* Each page cell is initialized and placed on the free list.
	479	* Returns the range of available kernel virtual memory.
	480	*/
	481
	482	void
	483	vm_page_bootstrap(
	484	vm_offset_t *startp,
	485	vm_offset_t *endp)
	486	{
	487	register vm_page_t m;
	488	unsigned int i;
	489	unsigned int log1;
	490	unsigned int log2;
	491	unsigned int size;
	492
	493	/*
	494	* Initialize the vm_page template.
	495	*/
	496
	497	m = &vm_page_template;
	498	bzero(m, sizeof (*m));
	499
	500	m->pageq.next = NULL;
	501	m->pageq.prev = NULL;
	502	m->listq.next = NULL;
	503	m->listq.prev = NULL;
	504	m->next = VM_PAGE_NULL;
	505
	506	m->object = VM_OBJECT_NULL; /* reset later */
	507	m->offset = (vm_object_offset_t) -1; /* reset later */
	508
	509	m->wire_count = 0;
	510	m->local = FALSE;
	511	m->inactive = FALSE;
	512	m->active = FALSE;
	513	m->pageout_queue = FALSE;
	514	m->speculative = FALSE;
	515	m->laundry = FALSE;
	516	m->free = FALSE;
	517	m->reference = FALSE;
	518	m->gobbled = FALSE;
	519	m->private = FALSE;
	520	m->throttled = FALSE;
	521	m->__unused_pageq_bits = 0;
	522
	523	m->phys_page = 0; /* reset later */
	524
	525	m->busy = TRUE;
	526	m->wanted = FALSE;
	527	m->tabled = FALSE;
	528	m->fictitious = FALSE;
	529	m->pmapped = FALSE;
	530	m->wpmapped = FALSE;
	531	m->pageout = FALSE;
	532	m->absent = FALSE;
	533	m->error = FALSE;
	534	m->dirty = FALSE;
	535	m->cleaning = FALSE;
	536	m->precious = FALSE;
	537	m->clustered = FALSE;
	538	m->overwriting = FALSE;
	539	m->restart = FALSE;
	540	m->unusual = FALSE;
	541	m->encrypted = FALSE;
	542	m->encrypted_cleaning = FALSE;
	543	m->cs_validated = FALSE;
	544	m->cs_tainted = FALSE;
	545	m->no_cache = FALSE;
	546	m->reusable = FALSE;
	547	m->slid = FALSE;
	548	m->was_dirty = FALSE;
	549	m->__unused_object_bits = 0;
	550
	551
	552	/*
	553	* Initialize the page queues.
	554	*/
	555	vm_page_init_lck_grp();
	556
	557	lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
	558	lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
	559	lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
	560
	561	for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
	562	int group;
	563
	564	purgeable_queues[i].token_q_head = 0;
	565	purgeable_queues[i].token_q_tail = 0;
	566	for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
	567	queue_init(&purgeable_queues[i].objq[group]);
	568
	569	purgeable_queues[i].type = i;
	570	purgeable_queues[i].new_pages = 0;
	571	#if MACH_ASSERT
	572	purgeable_queues[i].debug_count_tokens = 0;
	573	purgeable_queues[i].debug_count_objects = 0;
	574	#endif
	575	};
	576
	577	for (i = 0; i < MAX_COLORS; i++ )
	578	queue_init(&vm_page_queue_free[i]);
	579
	580	queue_init(&vm_lopage_queue_free);
	581	queue_init(&vm_page_queue_active);
	582	queue_init(&vm_page_queue_inactive);
	583	queue_init(&vm_page_queue_cleaned);
	584	queue_init(&vm_page_queue_throttled);
	585	queue_init(&vm_page_queue_anonymous);
	586
	587	for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
	588	queue_init(&vm_page_queue_speculative[i].age_q);
	589
	590	vm_page_queue_speculative[i].age_ts.tv_sec = 0;
	591	vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
	592	}
	593	vm_page_free_wanted = 0;
	594	vm_page_free_wanted_privileged = 0;
	595
	596	vm_page_set_colors();
	597
	598
	599	/*
	600	* Steal memory for the map and zone subsystems.
	601	*/
	602	zone_steal_memory();
	603	vm_map_steal_memory();
	604
	605	/*
	606	* Allocate (and initialize) the virtual-to-physical
	607	* table hash buckets.
	608	*
	609	* The number of buckets should be a power of two to
	610	* get a good hash function. The following computation
	611	* chooses the first power of two that is greater
	612	* than the number of physical pages in the system.
	613	*/
	614
	615	if (vm_page_bucket_count == 0) {
	616	unsigned int npages = pmap_free_pages();
	617
	618	vm_page_bucket_count = 1;
	619	while (vm_page_bucket_count < npages)
	620	vm_page_bucket_count <<= 1;
	621	}
	622	vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
	623
	624	vm_page_hash_mask = vm_page_bucket_count - 1;
	625
	626	/*
	627	* Calculate object shift value for hashing algorithm:
	628	* O = log2(sizeof(struct vm_object))
	629	* B = log2(vm_page_bucket_count)
	630	* hash shifts the object left by
	631	* B/2 - O
	632	*/
	633	size = vm_page_bucket_count;
	634	for (log1 = 0; size > 1; log1++)
	635	size /= 2;
	636	size = sizeof(struct vm_object);
	637	for (log2 = 0; size > 1; log2++)
	638	size /= 2;
	639	vm_page_hash_shift = log1/2 - log2 + 1;
	640
	641	vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
	642	vm_page_bucket_hash \|= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
	643	vm_page_bucket_hash \|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
	644
	645	if (vm_page_hash_mask & vm_page_bucket_count)
	646	printf("vm_page_bootstrap: WARNING -- strange page hash\n");
	647
	648	vm_page_buckets = (vm_page_bucket_t *)
	649	pmap_steal_memory(vm_page_bucket_count *
	650	sizeof(vm_page_bucket_t));
	651
	652	vm_page_bucket_locks = (lck_spin_t *)
	653	pmap_steal_memory(vm_page_bucket_lock_count *
	654	sizeof(lck_spin_t));
	655
	656	for (i = 0; i < vm_page_bucket_count; i++) {
	657	register vm_page_bucket_t *bucket = &vm_page_buckets[i];
	658
	659	bucket->pages = VM_PAGE_NULL;
	660	#if MACH_PAGE_HASH_STATS
	661	bucket->cur_count = 0;
	662	bucket->hi_count = 0;
	663	#endif /* MACH_PAGE_HASH_STATS */
	664	}
	665
	666	for (i = 0; i < vm_page_bucket_lock_count; i++)
	667	lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
	668
	669	/*
	670	* Machine-dependent code allocates the resident page table.
	671	* It uses vm_page_init to initialize the page frames.
	672	* The code also returns to us the virtual space available
	673	* to the kernel. We don't trust the pmap module
	674	* to get the alignment right.
	675	*/
	676
	677	pmap_startup(&virtual_space_start, &virtual_space_end);
	678	virtual_space_start = round_page(virtual_space_start);
	679	virtual_space_end = trunc_page(virtual_space_end);
	680
	681	*startp = virtual_space_start;
	682	*endp = virtual_space_end;
	683
	684	/*
	685	* Compute the initial "wire" count.
	686	* Up until now, the pages which have been set aside are not under
	687	* the VM system's control, so although they aren't explicitly
	688	* wired, they nonetheless can't be moved. At this moment,
	689	* all VM managed pages are "free", courtesy of pmap_startup.
	690	*/
	691	assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
	692	vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
	693	vm_page_wire_count_initial = vm_page_wire_count;
	694	vm_page_free_count_minimum = vm_page_free_count;
	695
	696	printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
	697	vm_page_free_count, vm_page_wire_count);
	698
	699	simple_lock_init(&vm_paging_lock, 0);
	700	}
	701
	702	#ifndef MACHINE_PAGES
	703	/*
	704	* We implement pmap_steal_memory and pmap_startup with the help
	705	* of two simpler functions, pmap_virtual_space and pmap_next_page.
	706	*/
	707
	708	void *
	709	pmap_steal_memory(
	710	vm_size_t size)
	711	{
	712	vm_offset_t addr, vaddr;
	713	ppnum_t phys_page;
	714
	715	/*
	716	* We round the size to a round multiple.
	717	*/
	718
	719	size = (size + sizeof (void ) - 1) &~ (sizeof (void ) - 1);
	720
	721	/*
	722	* If this is the first call to pmap_steal_memory,
	723	* we have to initialize ourself.
	724	*/
	725
	726	if (virtual_space_start == virtual_space_end) {
	727	pmap_virtual_space(&virtual_space_start, &virtual_space_end);
	728
	729	/*
	730	* The initial values must be aligned properly, and
	731	* we don't trust the pmap module to do it right.
	732	*/
	733
	734	virtual_space_start = round_page(virtual_space_start);
	735	virtual_space_end = trunc_page(virtual_space_end);
	736	}
	737
	738	/*
	739	* Allocate virtual memory for this request.
	740	*/
	741
	742	addr = virtual_space_start;
	743	virtual_space_start += size;
	744
	745	//kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
	746
	747	/*
	748	* Allocate and map physical pages to back new virtual pages.
	749	*/
	750
	751	for (vaddr = round_page(addr);
	752	vaddr < addr + size;
	753	vaddr += PAGE_SIZE) {
	754
	755	if (!pmap_next_page_hi(&phys_page))
	756	panic("pmap_steal_memory");
	757
	758	/*
	759	* XXX Logically, these mappings should be wired,
	760	* but some pmap modules barf if they are.
	761	*/
	762	#if defined(__LP64__)
	763	pmap_pre_expand(kernel_pmap, vaddr);
	764	#endif
	765
	766	pmap_enter(kernel_pmap, vaddr, phys_page,
	767	VM_PROT_READ\|VM_PROT_WRITE, VM_PROT_NONE,
	768	VM_WIMG_USE_DEFAULT, FALSE);
	769	/*
	770	* Account for newly stolen memory
	771	*/
	772	vm_page_wire_count++;
	773
	774	}
	775
	776	return (void *) addr;
	777	}
	778
	779	void
	780	pmap_startup(
	781	vm_offset_t *startp,
	782	vm_offset_t *endp)
	783	{
	784	unsigned int i, npages, pages_initialized, fill, fillval;
	785	ppnum_t phys_page;
	786	addr64_t tmpaddr;
	787
	788	/*
	789	* We calculate how many page frames we will have
	790	* and then allocate the page structures in one chunk.
	791	*/
	792
	793	tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
	794	tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
	795	npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(vm_pages))); / Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
	796
	797	vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
	798
	799	/*
	800	* Initialize the page frames.
	801	*/
	802	for (i = 0, pages_initialized = 0; i < npages; i++) {
	803	if (!pmap_next_page(&phys_page))
	804	break;
	805	if (pages_initialized == 0 \|\| phys_page < vm_page_lowest)
	806	vm_page_lowest = phys_page;
	807
	808	vm_page_init(&vm_pages[i], phys_page, FALSE);
	809	vm_page_pages++;
	810	pages_initialized++;
	811	}
	812	vm_pages_count = pages_initialized;
	813
	814	/*
	815	* Check if we want to initialize pages to a known value
	816	*/
	817	fill = 0; /* Assume no fill */
	818	if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
	819	#if DEBUG
	820	/* This slows down booting the DEBUG kernel, particularly on
	821	* large memory systems, but is worthwhile in deterministically
	822	* trapping uninitialized memory usage.
	823	*/
	824	if (fill == 0) {
	825	fill = 1;
	826	fillval = 0xDEB8F177;
	827	}
	828	#endif
	829	if (fill)
	830	kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
	831	// -debug code remove
	832	if (2 == vm_himemory_mode) {
	833	// free low -> high so high is preferred
	834	for (i = 1; i <= pages_initialized; i++) {
	835	if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
	836	vm_page_release(&vm_pages[i - 1]);
	837	}
	838	}
	839	else
	840	// debug code remove-
	841
	842	/*
	843	* Release pages in reverse order so that physical pages
	844	* initially get allocated in ascending addresses. This keeps
	845	* the devices (which must address physical memory) happy if
	846	* they require several consecutive pages.
	847	*/
	848	for (i = pages_initialized; i > 0; i--) {
	849	if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
	850	vm_page_release(&vm_pages[i - 1]);
	851	}
	852
	853	#if 0
	854	{
	855	vm_page_t xx, xxo, xxl;
	856	int i, j, k, l;
	857
	858	j = 0; /* (BRINGUP) */
	859	xxl = 0;
	860
	861	for( i = 0; i < vm_colors; i++ ) {
	862	queue_iterate(&vm_page_queue_free[i],
	863	xx,
	864	vm_page_t,
	865	pageq) { /* BRINGUP */
	866	j++; /* (BRINGUP) */
	867	if(j > vm_page_free_count) { /* (BRINGUP) */
	868	panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
	869	}
	870
	871	l = vm_page_free_count - j; /* (BRINGUP) */
	872	k = 0; /* (BRINGUP) */
	873
	874	if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
	875
	876	for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
	877	k++;
	878	if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
	879	if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
	880	panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
	881	}
	882	}
	883
	884	xxl = xx;
	885	}
	886	}
	887
	888	if(j != vm_page_free_count) { /* (BRINGUP) */
	889	panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
	890	}
	891	}
	892	#endif
	893
	894
	895	/*
	896	* We have to re-align virtual_space_start,
	897	* because pmap_steal_memory has been using it.
	898	*/
	899
	900	virtual_space_start = round_page(virtual_space_start);
	901
	902	*startp = virtual_space_start;
	903	*endp = virtual_space_end;
	904	}
	905	#endif /* MACHINE_PAGES */
	906
	907	/*
	908	* Routine: vm_page_module_init
	909	* Purpose:
	910	* Second initialization pass, to be done after
	911	* the basic VM system is ready.
	912	*/
	913	void
	914	vm_page_module_init(void)
	915	{
	916	vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
	917	0, PAGE_SIZE, "vm pages");
	918
	919	#if ZONE_DEBUG
	920	zone_debug_disable(vm_page_zone);
	921	#endif /* ZONE_DEBUG */
	922
	923	zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
	924	zone_change(vm_page_zone, Z_EXPAND, FALSE);
	925	zone_change(vm_page_zone, Z_EXHAUST, TRUE);
	926	zone_change(vm_page_zone, Z_FOREIGN, TRUE);
	927	zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
	928	/*
	929	* Adjust zone statistics to account for the real pages allocated
	930	* in vm_page_create(). [Q: is this really what we want?]
	931	*/
	932	vm_page_zone->count += vm_page_pages;
	933	vm_page_zone->sum_count += vm_page_pages;
	934	vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
	935	}
	936
	937	/*
	938	* Routine: vm_page_create
	939	* Purpose:
	940	* After the VM system is up, machine-dependent code
	941	* may stumble across more physical memory. For example,
	942	* memory that it was reserving for a frame buffer.
	943	* vm_page_create turns this memory into available pages.
	944	*/
	945
	946	void
	947	vm_page_create(
	948	ppnum_t start,
	949	ppnum_t end)
	950	{
	951	ppnum_t phys_page;
	952	vm_page_t m;
	953
	954	for (phys_page = start;
	955	phys_page < end;
	956	phys_page++) {
	957	while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
	958	== VM_PAGE_NULL)
	959	vm_page_more_fictitious();
	960
	961	m->fictitious = FALSE;
	962	pmap_clear_noencrypt(phys_page);
	963
	964	vm_page_pages++;
	965	vm_page_release(m);
	966	}
	967	}
	968
	969	/*
	970	* vm_page_hash:
	971	*
	972	* Distributes the object/offset key pair among hash buckets.
	973	*
	974	* NOTE: The bucket count must be a power of 2
	975	*/
	976	#define vm_page_hash(object, offset) (\
	977	( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
	978	& vm_page_hash_mask)
	979
	980
	981	/*
	982	* vm_page_insert: [ internal use only ]
	983	*
	984	* Inserts the given mem entry into the object/object-page
	985	* table and object list.
	986	*
	987	* The object must be locked.
	988	*/
	989	void
	990	vm_page_insert(
	991	vm_page_t mem,
	992	vm_object_t object,
	993	vm_object_offset_t offset)
	994	{
	995	vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
	996	}
	997
	998	void
	999	vm_page_insert_internal(
	1000	vm_page_t mem,
	1001	vm_object_t object,
	1002	vm_object_offset_t offset,
	1003	boolean_t queues_lock_held,
	1004	boolean_t insert_in_hash,
	1005	boolean_t batch_pmap_op)
	1006	{
	1007	vm_page_bucket_t *bucket;
	1008	lck_spin_t *bucket_lock;
	1009	int hash_id;
	1010
	1011	XPR(XPR_VM_PAGE,
	1012	"vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
	1013	object, offset, mem, 0,0);
	1014	#if 0
	1015	/*
	1016	* we may not hold the page queue lock
	1017	* so this check isn't safe to make
	1018	*/
	1019	VM_PAGE_CHECK(mem);
	1020	#endif
	1021
	1022	if (object == vm_submap_object) {
	1023	/* the vm_submap_object is only a placeholder for submaps */
	1024	panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
	1025	}
	1026
	1027	vm_object_lock_assert_exclusive(object);
	1028	#if DEBUG
	1029	lck_mtx_assert(&vm_page_queue_lock,
	1030	queues_lock_held ? LCK_MTX_ASSERT_OWNED
	1031	: LCK_MTX_ASSERT_NOTOWNED);
	1032	#endif /* DEBUG */
	1033
	1034	if (insert_in_hash == TRUE) {
	1035	#if DEBUG
	1036	if (mem->tabled \|\| mem->object != VM_OBJECT_NULL)
	1037	panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
	1038	"already in (obj=%p,off=0x%llx)",
	1039	mem, object, offset, mem->object, mem->offset);
	1040	#endif
	1041	assert(!object->internal \|\| offset < object->vo_size);
	1042
	1043	/* only insert "pageout" pages into "pageout" objects,
	1044	* and normal pages into normal objects */
	1045	assert(object->pageout == mem->pageout);
	1046
	1047	assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
	1048
	1049	/*
	1050	* Record the object/offset pair in this page
	1051	*/
	1052
	1053	mem->object = object;
	1054	mem->offset = offset;
	1055
	1056	/*
	1057	* Insert it into the object_object/offset hash table
	1058	*/
	1059	hash_id = vm_page_hash(object, offset);
	1060	bucket = &vm_page_buckets[hash_id];
	1061	bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
	1062
	1063	lck_spin_lock(bucket_lock);
	1064
	1065	mem->next = bucket->pages;
	1066	bucket->pages = mem;
	1067	#if MACH_PAGE_HASH_STATS
	1068	if (++bucket->cur_count > bucket->hi_count)
	1069	bucket->hi_count = bucket->cur_count;
	1070	#endif /* MACH_PAGE_HASH_STATS */
	1071
	1072	lck_spin_unlock(bucket_lock);
	1073	}
	1074
	1075	{
	1076	unsigned int cache_attr;
	1077
	1078	cache_attr = object->wimg_bits & VM_WIMG_MASK;
	1079
	1080	if (cache_attr != VM_WIMG_USE_DEFAULT) {
	1081	PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
	1082	}
	1083	}
	1084	/*
	1085	* Now link into the object's list of backed pages.
	1086	*/
	1087
	1088	VM_PAGE_INSERT(mem, object);
	1089	mem->tabled = TRUE;
	1090
	1091	/*
	1092	* Show that the object has one more resident page.
	1093	*/
	1094
	1095	object->resident_page_count++;
	1096	if (VM_PAGE_WIRED(mem)) {
	1097	object->wired_page_count++;
	1098	}
	1099	assert(object->resident_page_count >= object->wired_page_count);
	1100
	1101	assert(!mem->reusable);
	1102
	1103	if (object->purgable == VM_PURGABLE_VOLATILE) {
	1104	if (VM_PAGE_WIRED(mem)) {
	1105	OSAddAtomic(1, &vm_page_purgeable_wired_count);
	1106	} else {
	1107	OSAddAtomic(1, &vm_page_purgeable_count);
	1108	}
	1109	} else if (object->purgable == VM_PURGABLE_EMPTY &&
	1110	mem->throttled) {
	1111	/*
	1112	* This page belongs to a purged VM object but hasn't
	1113	* been purged (because it was "busy").
	1114	* It's in the "throttled" queue and hence not
	1115	* visible to vm_pageout_scan(). Move it to a pageable
	1116	* queue, so that it can eventually be reclaimed, instead
	1117	* of lingering in the "empty" object.
	1118	*/
	1119	if (queues_lock_held == FALSE)
	1120	vm_page_lockspin_queues();
	1121	vm_page_deactivate(mem);
	1122	if (queues_lock_held == FALSE)
	1123	vm_page_unlock_queues();
	1124	}
	1125	}
	1126
	1127	/*
	1128	* vm_page_replace:
	1129	*
	1130	* Exactly like vm_page_insert, except that we first
	1131	* remove any existing page at the given offset in object.
	1132	*
	1133	* The object must be locked.
	1134	*/
	1135	void
	1136	vm_page_replace(
	1137	register vm_page_t mem,
	1138	register vm_object_t object,
	1139	register vm_object_offset_t offset)
	1140	{
	1141	vm_page_bucket_t *bucket;
	1142	vm_page_t found_m = VM_PAGE_NULL;
	1143	lck_spin_t *bucket_lock;
	1144	int hash_id;
	1145
	1146	#if 0
	1147	/*
	1148	* we don't hold the page queue lock
	1149	* so this check isn't safe to make
	1150	*/
	1151	VM_PAGE_CHECK(mem);
	1152	#endif
	1153	vm_object_lock_assert_exclusive(object);
	1154	#if DEBUG
	1155	if (mem->tabled \|\| mem->object != VM_OBJECT_NULL)
	1156	panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
	1157	"already in (obj=%p,off=0x%llx)",
	1158	mem, object, offset, mem->object, mem->offset);
	1159	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
	1160	#endif
	1161	/*
	1162	* Record the object/offset pair in this page
	1163	*/
	1164
	1165	mem->object = object;
	1166	mem->offset = offset;
	1167
	1168	/*
	1169	* Insert it into the object_object/offset hash table,
	1170	* replacing any page that might have been there.
	1171	*/
	1172
	1173	hash_id = vm_page_hash(object, offset);
	1174	bucket = &vm_page_buckets[hash_id];
	1175	bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
	1176
	1177	lck_spin_lock(bucket_lock);
	1178
	1179	if (bucket->pages) {
	1180	vm_page_t *mp = &bucket->pages;
	1181	vm_page_t m = *mp;
	1182
	1183	do {
	1184	if (m->object == object && m->offset == offset) {
	1185	/*
	1186	* Remove old page from hash list
	1187	*/
	1188	*mp = m->next;
	1189
	1190	found_m = m;
	1191	break;
	1192	}
	1193	mp = &m->next;
	1194	} while ((m = *mp));
	1195
	1196	mem->next = bucket->pages;
	1197	} else {
	1198	mem->next = VM_PAGE_NULL;
	1199	}
	1200	/*
	1201	* insert new page at head of hash list
	1202	*/
	1203	bucket->pages = mem;
	1204
	1205	lck_spin_unlock(bucket_lock);
	1206
	1207	if (found_m) {
	1208	/*
	1209	* there was already a page at the specified
	1210	* offset for this object... remove it from
	1211	* the object and free it back to the free list
	1212	*/
	1213	vm_page_free_unlocked(found_m, FALSE);
	1214	}
	1215	vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
	1216	}
	1217
	1218	/*
	1219	* vm_page_remove: [ internal use only ]
	1220	*
	1221	* Removes the given mem entry from the object/offset-page
	1222	* table and the object page list.
	1223	*
	1224	* The object must be locked.
	1225	*/
	1226
	1227	void
	1228	vm_page_remove(
	1229	vm_page_t mem,
	1230	boolean_t remove_from_hash)
	1231	{
	1232	vm_page_bucket_t *bucket;
	1233	vm_page_t this;
	1234	lck_spin_t *bucket_lock;
	1235	int hash_id;
	1236
	1237	XPR(XPR_VM_PAGE,
	1238	"vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
	1239	mem->object, mem->offset,
	1240	mem, 0,0);
	1241
	1242	vm_object_lock_assert_exclusive(mem->object);
	1243	assert(mem->tabled);
	1244	assert(!mem->cleaning);
	1245	assert(!mem->laundry);
	1246	#if 0
	1247	/*
	1248	* we don't hold the page queue lock
	1249	* so this check isn't safe to make
	1250	*/
	1251	VM_PAGE_CHECK(mem);
	1252	#endif
	1253	if (remove_from_hash == TRUE) {
	1254	/*
	1255	* Remove from the object_object/offset hash table
	1256	*/
	1257	hash_id = vm_page_hash(mem->object, mem->offset);
	1258	bucket = &vm_page_buckets[hash_id];
	1259	bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
	1260
	1261	lck_spin_lock(bucket_lock);
	1262
	1263	if ((this = bucket->pages) == mem) {
	1264	/* optimize for common case */
	1265
	1266	bucket->pages = mem->next;
	1267	} else {
	1268	vm_page_t *prev;
	1269
	1270	for (prev = &this->next;
	1271	(this = *prev) != mem;
	1272	prev = &this->next)
	1273	continue;
	1274	*prev = this->next;
	1275	}
	1276	#if MACH_PAGE_HASH_STATS
	1277	bucket->cur_count--;
	1278	#endif /* MACH_PAGE_HASH_STATS */
	1279
	1280	lck_spin_unlock(bucket_lock);
	1281	}
	1282	/*
	1283	* Now remove from the object's list of backed pages.
	1284	*/
	1285
	1286	VM_PAGE_REMOVE(mem);
	1287
	1288	/*
	1289	* And show that the object has one fewer resident
	1290	* page.
	1291	*/
	1292
	1293	assert(mem->object->resident_page_count > 0);
	1294	mem->object->resident_page_count--;
	1295
	1296	if (!mem->object->internal && (mem->object->objq.next \|\| mem->object->objq.prev)) {
	1297	if (mem->object->resident_page_count == 0)
	1298	vm_object_cache_remove(mem->object);
	1299	}
	1300
	1301	if (VM_PAGE_WIRED(mem)) {
	1302	assert(mem->object->wired_page_count > 0);
	1303	mem->object->wired_page_count--;
	1304	}
	1305	assert(mem->object->resident_page_count >=
	1306	mem->object->wired_page_count);
	1307	if (mem->reusable) {
	1308	assert(mem->object->reusable_page_count > 0);
	1309	mem->object->reusable_page_count--;
	1310	assert(mem->object->reusable_page_count <=
	1311	mem->object->resident_page_count);
	1312	mem->reusable = FALSE;
	1313	OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
	1314	vm_page_stats_reusable.reused_remove++;
	1315	} else if (mem->object->all_reusable) {
	1316	OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
	1317	vm_page_stats_reusable.reused_remove++;
	1318	}
	1319
	1320	if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
	1321	if (VM_PAGE_WIRED(mem)) {
	1322	assert(vm_page_purgeable_wired_count > 0);
	1323	OSAddAtomic(-1, &vm_page_purgeable_wired_count);
	1324	} else {
	1325	assert(vm_page_purgeable_count > 0);
	1326	OSAddAtomic(-1, &vm_page_purgeable_count);
	1327	}
	1328	}
	1329	if (mem->object->set_cache_attr == TRUE)
	1330	pmap_set_cache_attributes(mem->phys_page, 0);
	1331
	1332	mem->tabled = FALSE;
	1333	mem->object = VM_OBJECT_NULL;
	1334	mem->offset = (vm_object_offset_t) -1;
	1335	}
	1336
	1337
	1338	/*
	1339	* vm_page_lookup:
	1340	*
	1341	* Returns the page associated with the object/offset
	1342	* pair specified; if none is found, VM_PAGE_NULL is returned.
	1343	*
	1344	* The object must be locked. No side effects.
	1345	*/
	1346
	1347	unsigned long vm_page_lookup_hint = 0;
	1348	unsigned long vm_page_lookup_hint_next = 0;
	1349	unsigned long vm_page_lookup_hint_prev = 0;
	1350	unsigned long vm_page_lookup_hint_miss = 0;
	1351	unsigned long vm_page_lookup_bucket_NULL = 0;
	1352	unsigned long vm_page_lookup_miss = 0;
	1353
	1354
	1355	vm_page_t
	1356	vm_page_lookup(
	1357	vm_object_t object,
	1358	vm_object_offset_t offset)
	1359	{
	1360	vm_page_t mem;
	1361	vm_page_bucket_t *bucket;
	1362	queue_entry_t qe;
	1363	lck_spin_t *bucket_lock;
	1364	int hash_id;
	1365
	1366	vm_object_lock_assert_held(object);
	1367	mem = object->memq_hint;
	1368
	1369	if (mem != VM_PAGE_NULL) {
	1370	assert(mem->object == object);
	1371
	1372	if (mem->offset == offset) {
	1373	vm_page_lookup_hint++;
	1374	return mem;
	1375	}
	1376	qe = queue_next(&mem->listq);
	1377
	1378	if (! queue_end(&object->memq, qe)) {
	1379	vm_page_t next_page;
	1380
	1381	next_page = (vm_page_t) qe;
	1382	assert(next_page->object == object);
	1383
	1384	if (next_page->offset == offset) {
	1385	vm_page_lookup_hint_next++;
	1386	object->memq_hint = next_page; /* new hint */
	1387	return next_page;
	1388	}
	1389	}
	1390	qe = queue_prev(&mem->listq);
	1391
	1392	if (! queue_end(&object->memq, qe)) {
	1393	vm_page_t prev_page;
	1394
	1395	prev_page = (vm_page_t) qe;
	1396	assert(prev_page->object == object);
	1397
	1398	if (prev_page->offset == offset) {
	1399	vm_page_lookup_hint_prev++;
	1400	object->memq_hint = prev_page; /* new hint */
	1401	return prev_page;
	1402	}
	1403	}
	1404	}
	1405	/*
	1406	* Search the hash table for this object/offset pair
	1407	*/
	1408	hash_id = vm_page_hash(object, offset);
	1409	bucket = &vm_page_buckets[hash_id];
	1410
	1411	/*
	1412	* since we hold the object lock, we are guaranteed that no
	1413	* new pages can be inserted into this object... this in turn
	1414	* guarantess that the page we're looking for can't exist
	1415	* if the bucket it hashes to is currently NULL even when looked
	1416	* at outside the scope of the hash bucket lock... this is a
	1417	* really cheap optimiztion to avoid taking the lock
	1418	*/
	1419	if (bucket->pages == VM_PAGE_NULL) {
	1420	vm_page_lookup_bucket_NULL++;
	1421
	1422	return (VM_PAGE_NULL);
	1423	}
	1424	bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
	1425
	1426	lck_spin_lock(bucket_lock);
	1427
	1428	for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
	1429	#if 0
	1430	/*
	1431	* we don't hold the page queue lock
	1432	* so this check isn't safe to make
	1433	*/
	1434	VM_PAGE_CHECK(mem);
	1435	#endif
	1436	if ((mem->object == object) && (mem->offset == offset))
	1437	break;
	1438	}
	1439	lck_spin_unlock(bucket_lock);
	1440
	1441	if (mem != VM_PAGE_NULL) {
	1442	if (object->memq_hint != VM_PAGE_NULL) {
	1443	vm_page_lookup_hint_miss++;
	1444	}
	1445	assert(mem->object == object);
	1446	object->memq_hint = mem;
	1447	} else
	1448	vm_page_lookup_miss++;
	1449
	1450	return(mem);
	1451	}
	1452
	1453
	1454	/*
	1455	* vm_page_rename:
	1456	*
	1457	* Move the given memory entry from its
	1458	* current object to the specified target object/offset.
	1459	*
	1460	* The object must be locked.
	1461	*/
	1462	void
	1463	vm_page_rename(
	1464	register vm_page_t mem,
	1465	register vm_object_t new_object,
	1466	vm_object_offset_t new_offset,
	1467	boolean_t encrypted_ok)
	1468	{
	1469	assert(mem->object != new_object);
	1470
	1471	/*
	1472	* ENCRYPTED SWAP:
	1473	* The encryption key is based on the page's memory object
	1474	* (aka "pager") and paging offset. Moving the page to
	1475	* another VM object changes its "pager" and "paging_offset"
	1476	* so it has to be decrypted first, or we would lose the key.
	1477	*
	1478	* One exception is VM object collapsing, where we transfer pages
	1479	* from one backing object to its parent object. This operation also
	1480	* transfers the paging information, so the <pager,paging_offset> info
	1481	* should remain consistent. The caller (vm_object_do_collapse())
	1482	* sets "encrypted_ok" in this case.
	1483	*/
	1484	if (!encrypted_ok && mem->encrypted) {
	1485	panic("vm_page_rename: page %p is encrypted\n", mem);
	1486	}
	1487
	1488	XPR(XPR_VM_PAGE,
	1489	"vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
	1490	new_object, new_offset,
	1491	mem, 0,0);
	1492
	1493	/*
	1494	* Changes to mem->object require the page lock because
	1495	* the pageout daemon uses that lock to get the object.
	1496	*/
	1497	vm_page_lockspin_queues();
	1498
	1499	vm_page_remove(mem, TRUE);
	1500	vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
	1501
	1502	vm_page_unlock_queues();
	1503	}
	1504
	1505	/*
	1506	* vm_page_init:
	1507	*
	1508	* Initialize the fields in a new page.
	1509	* This takes a structure with random values and initializes it
	1510	* so that it can be given to vm_page_release or vm_page_insert.
	1511	*/
	1512	void
	1513	vm_page_init(
	1514	vm_page_t mem,
	1515	ppnum_t phys_page,
	1516	boolean_t lopage)
	1517	{
	1518	assert(phys_page);
	1519
	1520	#if DEBUG
	1521	if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
	1522	if (!(pmap_valid_page(phys_page))) {
	1523	panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
	1524	}
	1525	}
	1526	#endif
	1527	*mem = vm_page_template;
	1528	mem->phys_page = phys_page;
	1529	#if 0
	1530	/*
	1531	* we're leaving this turned off for now... currently pages
	1532	* come off the free list and are either immediately dirtied/referenced
	1533	* due to zero-fill or COW faults, or are used to read or write files...
	1534	* in the file I/O case, the UPL mechanism takes care of clearing
	1535	* the state of the HW ref/mod bits in a somewhat fragile way.
	1536	* Since we may change the way this works in the future (to toughen it up),
	1537	* I'm leaving this as a reminder of where these bits could get cleared
	1538	*/
	1539
	1540	/*
	1541	* make sure both the h/w referenced and modified bits are
	1542	* clear at this point... we are especially dependent on
	1543	* not finding a 'stale' h/w modified in a number of spots
	1544	* once this page goes back into use
	1545	*/
	1546	pmap_clear_refmod(phys_page, VM_MEM_MODIFIED \| VM_MEM_REFERENCED);
	1547	#endif
	1548	mem->lopage = lopage;
	1549	}
	1550
	1551	/*
	1552	* vm_page_grab_fictitious:
	1553	*
	1554	* Remove a fictitious page from the free list.
	1555	* Returns VM_PAGE_NULL if there are no free pages.
	1556	*/
	1557	int c_vm_page_grab_fictitious = 0;
	1558	int c_vm_page_grab_fictitious_failed = 0;
	1559	int c_vm_page_release_fictitious = 0;
	1560	int c_vm_page_more_fictitious = 0;
	1561
	1562	vm_page_t
	1563	vm_page_grab_fictitious_common(
	1564	ppnum_t phys_addr)
	1565	{
	1566	vm_page_t m;
	1567
	1568	if ((m = (vm_page_t)zget(vm_page_zone))) {
	1569
	1570	vm_page_init(m, phys_addr, FALSE);
	1571	m->fictitious = TRUE;
	1572
	1573	c_vm_page_grab_fictitious++;
	1574	} else
	1575	c_vm_page_grab_fictitious_failed++;
	1576
	1577	return m;
	1578	}
	1579
	1580	vm_page_t
	1581	vm_page_grab_fictitious(void)
	1582	{
	1583	return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
	1584	}
	1585
	1586	vm_page_t
	1587	vm_page_grab_guard(void)
	1588	{
	1589	return vm_page_grab_fictitious_common(vm_page_guard_addr);
	1590	}
	1591
	1592
	1593	/*
	1594	* vm_page_release_fictitious:
	1595	*
	1596	* Release a fictitious page to the zone pool
	1597	*/
	1598	void
	1599	vm_page_release_fictitious(
	1600	vm_page_t m)
	1601	{
	1602	assert(!m->free);
	1603	assert(m->fictitious);
	1604	assert(m->phys_page == vm_page_fictitious_addr \|\|
	1605	m->phys_page == vm_page_guard_addr);
	1606
	1607	c_vm_page_release_fictitious++;
	1608
	1609	zfree(vm_page_zone, m);
	1610	}
	1611
	1612	/*
	1613	* vm_page_more_fictitious:
	1614	*
	1615	* Add more fictitious pages to the zone.
	1616	* Allowed to block. This routine is way intimate
	1617	* with the zones code, for several reasons:
	1618	* 1. we need to carve some page structures out of physical
	1619	* memory before zones work, so they _cannot_ come from
	1620	* the zone_map.
	1621	* 2. the zone needs to be collectable in order to prevent
	1622	* growth without bound. These structures are used by
	1623	* the device pager (by the hundreds and thousands), as
	1624	* private pages for pageout, and as blocking pages for
	1625	* pagein. Temporary bursts in demand should not result in
	1626	* permanent allocation of a resource.
	1627	* 3. To smooth allocation humps, we allocate single pages
	1628	* with kernel_memory_allocate(), and cram them into the
	1629	* zone.
	1630	*/
	1631
	1632	void vm_page_more_fictitious(void)
	1633	{
	1634	vm_offset_t addr;
	1635	kern_return_t retval;
	1636
	1637	c_vm_page_more_fictitious++;
	1638
	1639	/*
	1640	* Allocate a single page from the zone_map. Do not wait if no physical
	1641	* pages are immediately available, and do not zero the space. We need
	1642	* our own blocking lock here to prevent having multiple,
	1643	* simultaneous requests from piling up on the zone_map lock. Exactly
	1644	* one (of our) threads should be potentially waiting on the map lock.
	1645	* If winner is not vm-privileged, then the page allocation will fail,
	1646	* and it will temporarily block here in the vm_page_wait().
	1647	*/
	1648	lck_mtx_lock(&vm_page_alloc_lock);
	1649	/*
	1650	* If another thread allocated space, just bail out now.
	1651	*/
	1652	if (zone_free_count(vm_page_zone) > 5) {
	1653	/*
	1654	* The number "5" is a small number that is larger than the
	1655	* number of fictitious pages that any single caller will
	1656	* attempt to allocate. Otherwise, a thread will attempt to
	1657	* acquire a fictitious page (vm_page_grab_fictitious), fail,
	1658	* release all of the resources and locks already acquired,
	1659	* and then call this routine. This routine finds the pages
	1660	* that the caller released, so fails to allocate new space.
	1661	* The process repeats infinitely. The largest known number
	1662	* of fictitious pages required in this manner is 2. 5 is
	1663	* simply a somewhat larger number.
	1664	*/
	1665	lck_mtx_unlock(&vm_page_alloc_lock);
	1666	return;
	1667	}
	1668
	1669	retval = kernel_memory_allocate(zone_map,
	1670	&addr, PAGE_SIZE, VM_PROT_ALL,
	1671	KMA_KOBJECT\|KMA_NOPAGEWAIT);
	1672	if (retval != KERN_SUCCESS) {
	1673	/*
	1674	* No page was available. Drop the
	1675	* lock to give another thread a chance at it, and
	1676	* wait for the pageout daemon to make progress.
	1677	*/
	1678	lck_mtx_unlock(&vm_page_alloc_lock);
	1679	vm_page_wait(THREAD_UNINT);
	1680	return;
	1681	}
	1682	zcram(vm_page_zone, addr, PAGE_SIZE);
	1683
	1684	lck_mtx_unlock(&vm_page_alloc_lock);
	1685	}
	1686
	1687
	1688	/*
	1689	* vm_pool_low():
	1690	*
	1691	* Return true if it is not likely that a non-vm_privileged thread
	1692	* can get memory without blocking. Advisory only, since the
	1693	* situation may change under us.
	1694	*/
	1695	int
	1696	vm_pool_low(void)
	1697	{
	1698	/* No locking, at worst we will fib. */
	1699	return( vm_page_free_count <= vm_page_free_reserved );
	1700	}
	1701
	1702
	1703
	1704	/*
	1705	* this is an interface to support bring-up of drivers
	1706	* on platforms with physical memory > 4G...
	1707	*/
	1708	int vm_himemory_mode = 0;
	1709
	1710
	1711	/*
	1712	* this interface exists to support hardware controllers
	1713	* incapable of generating DMAs with more than 32 bits
	1714	* of address on platforms with physical memory > 4G...
	1715	*/
	1716	unsigned int vm_lopages_allocated_q = 0;
	1717	unsigned int vm_lopages_allocated_cpm_success = 0;
	1718	unsigned int vm_lopages_allocated_cpm_failed = 0;
	1719	queue_head_t vm_lopage_queue_free;
	1720
	1721	vm_page_t
	1722	vm_page_grablo(void)
	1723	{
	1724	vm_page_t mem;
	1725
	1726	if (vm_lopage_needed == FALSE)
	1727	return (vm_page_grab());
	1728
	1729	lck_mtx_lock_spin(&vm_page_queue_free_lock);
	1730
	1731	if ( !queue_empty(&vm_lopage_queue_free)) {
	1732	queue_remove_first(&vm_lopage_queue_free,
	1733	mem,
	1734	vm_page_t,
	1735	pageq);
	1736	assert(vm_lopage_free_count);
	1737
	1738	vm_lopage_free_count--;
	1739	vm_lopages_allocated_q++;
	1740
	1741	if (vm_lopage_free_count < vm_lopage_lowater)
	1742	vm_lopage_refill = TRUE;
	1743
	1744	lck_mtx_unlock(&vm_page_queue_free_lock);
	1745	} else {
	1746	lck_mtx_unlock(&vm_page_queue_free_lock);
	1747
	1748	if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
	1749
	1750	lck_mtx_lock_spin(&vm_page_queue_free_lock);
	1751	vm_lopages_allocated_cpm_failed++;
	1752	lck_mtx_unlock(&vm_page_queue_free_lock);
	1753
	1754	return (VM_PAGE_NULL);
	1755	}
	1756	mem->busy = TRUE;
	1757
	1758	vm_page_lockspin_queues();
	1759
	1760	mem->gobbled = FALSE;
	1761	vm_page_gobble_count--;
	1762	vm_page_wire_count--;
	1763
	1764	vm_lopages_allocated_cpm_success++;
	1765	vm_page_unlock_queues();
	1766	}
	1767	assert(mem->busy);
	1768	assert(!mem->free);
	1769	assert(!mem->pmapped);
	1770	assert(!mem->wpmapped);
	1771	assert(!pmap_is_noencrypt(mem->phys_page));
	1772
	1773	mem->pageq.next = NULL;
	1774	mem->pageq.prev = NULL;
	1775
	1776	return (mem);
	1777	}
	1778
	1779
	1780	/*
	1781	* vm_page_grab:
	1782	*
	1783	* first try to grab a page from the per-cpu free list...
	1784	* this must be done while pre-emption is disabled... if
	1785	* a page is available, we're done...
	1786	* if no page is available, grab the vm_page_queue_free_lock
	1787	* and see if current number of free pages would allow us
	1788	* to grab at least 1... if not, return VM_PAGE_NULL as before...
	1789	* if there are pages available, disable preemption and
	1790	* recheck the state of the per-cpu free list... we could
	1791	* have been preempted and moved to a different cpu, or
	1792	* some other thread could have re-filled it... if still
	1793	* empty, figure out how many pages we can steal from the
	1794	* global free queue and move to the per-cpu queue...
	1795	* return 1 of these pages when done... only wakeup the
	1796	* pageout_scan thread if we moved pages from the global
	1797	* list... no need for the wakeup if we've satisfied the
	1798	* request from the per-cpu queue.
	1799	*/
	1800
	1801	#define COLOR_GROUPS_TO_STEAL 4
	1802
	1803
	1804	vm_page_t
	1805	vm_page_grab( void )
	1806	{
	1807	vm_page_t mem;
	1808
	1809
	1810	disable_preemption();
	1811
	1812	if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
	1813	return_page_from_cpu_list:
	1814	PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
	1815	PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
	1816	mem->pageq.next = NULL;
	1817
	1818	enable_preemption();
	1819
	1820	assert(mem->listq.next == NULL && mem->listq.prev == NULL);
	1821	assert(mem->tabled == FALSE);
	1822	assert(mem->object == VM_OBJECT_NULL);
	1823	assert(!mem->laundry);
	1824	assert(!mem->free);
	1825	assert(pmap_verify_free(mem->phys_page));
	1826	assert(mem->busy);
	1827	assert(!mem->encrypted);
	1828	assert(!mem->pmapped);
	1829	assert(!mem->wpmapped);
	1830	assert(!mem->active);
	1831	assert(!mem->inactive);
	1832	assert(!mem->throttled);
	1833	assert(!mem->speculative);
	1834	assert(!pmap_is_noencrypt(mem->phys_page));
	1835
	1836	return mem;
	1837	}
	1838	enable_preemption();
	1839
	1840
	1841	/*
	1842	* Optionally produce warnings if the wire or gobble
	1843	* counts exceed some threshold.
	1844	*/
	1845	if (vm_page_wire_count_warning > 0
	1846	&& vm_page_wire_count >= vm_page_wire_count_warning) {
	1847	printf("mk: vm_page_grab(): high wired page count of %d\n",
	1848	vm_page_wire_count);
	1849	assert(vm_page_wire_count < vm_page_wire_count_warning);
	1850	}
	1851	if (vm_page_gobble_count_warning > 0
	1852	&& vm_page_gobble_count >= vm_page_gobble_count_warning) {
	1853	printf("mk: vm_page_grab(): high gobbled page count of %d\n",
	1854	vm_page_gobble_count);
	1855	assert(vm_page_gobble_count < vm_page_gobble_count_warning);
	1856	}
	1857
	1858	lck_mtx_lock_spin(&vm_page_queue_free_lock);
	1859
	1860	/*
	1861	* Only let privileged threads (involved in pageout)
	1862	* dip into the reserved pool.
	1863	*/
	1864	if ((vm_page_free_count < vm_page_free_reserved) &&
	1865	!(current_thread()->options & TH_OPT_VMPRIV)) {
	1866	lck_mtx_unlock(&vm_page_queue_free_lock);
	1867	mem = VM_PAGE_NULL;
	1868	}
	1869	else {
	1870	vm_page_t head;
	1871	vm_page_t tail;
	1872	unsigned int pages_to_steal;
	1873	unsigned int color;
	1874
	1875	while ( vm_page_free_count == 0 ) {
	1876
	1877	lck_mtx_unlock(&vm_page_queue_free_lock);
	1878	/*
	1879	* must be a privileged thread to be
	1880	* in this state since a non-privileged
	1881	* thread would have bailed if we were
	1882	* under the vm_page_free_reserved mark
	1883	*/
	1884	VM_PAGE_WAIT();
	1885	lck_mtx_lock_spin(&vm_page_queue_free_lock);
	1886	}
	1887
	1888	disable_preemption();
	1889
	1890	if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
	1891	lck_mtx_unlock(&vm_page_queue_free_lock);
	1892
	1893	/*
	1894	* we got preempted and moved to another processor
	1895	* or we got preempted and someone else ran and filled the cache
	1896	*/
	1897	goto return_page_from_cpu_list;
	1898	}
	1899	if (vm_page_free_count <= vm_page_free_reserved)
	1900	pages_to_steal = 1;
	1901	else {
	1902	pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
	1903
	1904	if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
	1905	pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
	1906	}
	1907	color = PROCESSOR_DATA(current_processor(), start_color);
	1908	head = tail = NULL;
	1909
	1910	while (pages_to_steal--) {
	1911	if (--vm_page_free_count < vm_page_free_count_minimum)
	1912	vm_page_free_count_minimum = vm_page_free_count;
	1913
	1914	while (queue_empty(&vm_page_queue_free[color]))
	1915	color = (color + 1) & vm_color_mask;
	1916
	1917	queue_remove_first(&vm_page_queue_free[color],
	1918	mem,
	1919	vm_page_t,
	1920	pageq);
	1921	mem->pageq.next = NULL;
	1922	mem->pageq.prev = NULL;
	1923
	1924	assert(!mem->active);
	1925	assert(!mem->inactive);
	1926	assert(!mem->throttled);
	1927	assert(!mem->speculative);
	1928
	1929	color = (color + 1) & vm_color_mask;
	1930
	1931	if (head == NULL)
	1932	head = mem;
	1933	else
	1934	tail->pageq.next = (queue_t)mem;
	1935	tail = mem;
	1936
	1937	mem->pageq.prev = NULL;
	1938	assert(mem->listq.next == NULL && mem->listq.prev == NULL);
	1939	assert(mem->tabled == FALSE);
	1940	assert(mem->object == VM_OBJECT_NULL);
	1941	assert(!mem->laundry);
	1942	assert(mem->free);
	1943	mem->free = FALSE;
	1944
	1945	assert(pmap_verify_free(mem->phys_page));
	1946	assert(mem->busy);
	1947	assert(!mem->free);
	1948	assert(!mem->encrypted);
	1949	assert(!mem->pmapped);
	1950	assert(!mem->wpmapped);
	1951	assert(!pmap_is_noencrypt(mem->phys_page));
	1952	}
	1953	PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
	1954	PROCESSOR_DATA(current_processor(), start_color) = color;
	1955
	1956	/*
	1957	* satisfy this request
	1958	*/
	1959	PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
	1960	mem = head;
	1961	mem->pageq.next = NULL;
	1962
	1963	lck_mtx_unlock(&vm_page_queue_free_lock);
	1964
	1965	enable_preemption();
	1966	}
	1967	/*
	1968	* Decide if we should poke the pageout daemon.
	1969	* We do this if the free count is less than the low
	1970	* water mark, or if the free count is less than the high
	1971	* water mark (but above the low water mark) and the inactive
	1972	* count is less than its target.
	1973	*
	1974	* We don't have the counts locked ... if they change a little,
	1975	* it doesn't really matter.
	1976	*/
	1977	if ((vm_page_free_count < vm_page_free_min) \|\|
	1978	((vm_page_free_count < vm_page_free_target) &&
	1979	((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
	1980	thread_wakeup((event_t) &vm_page_free_wanted);
	1981
	1982	VM_CHECK_MEMORYSTATUS;
	1983
	1984	// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
	1985
	1986	return mem;
	1987	}
	1988
	1989	/*
	1990	* vm_page_release:
	1991	*
	1992	* Return a page to the free list.
	1993	*/
	1994
	1995	void
	1996	vm_page_release(
	1997	register vm_page_t mem)
	1998	{
	1999	unsigned int color;
	2000	int need_wakeup = 0;
	2001	int need_priv_wakeup = 0;
	2002
	2003
	2004	assert(!mem->private && !mem->fictitious);
	2005	if (vm_page_free_verify) {
	2006	assert(pmap_verify_free(mem->phys_page));
	2007	}
	2008	// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
	2009
	2010	pmap_clear_noencrypt(mem->phys_page);
	2011
	2012	lck_mtx_lock_spin(&vm_page_queue_free_lock);
	2013	#if DEBUG
	2014	if (mem->free)
	2015	panic("vm_page_release");
	2016	#endif
	2017
	2018	assert(mem->busy);
	2019	assert(!mem->laundry);
	2020	assert(mem->object == VM_OBJECT_NULL);
	2021	assert(mem->pageq.next == NULL &&
	2022	mem->pageq.prev == NULL);
	2023	assert(mem->listq.next == NULL &&
	2024	mem->listq.prev == NULL);
	2025
	2026	if ((mem->lopage == TRUE \|\| vm_lopage_refill == TRUE) &&
	2027	vm_lopage_free_count < vm_lopage_free_limit &&
	2028	mem->phys_page < max_valid_low_ppnum) {
	2029	/*
	2030	* this exists to support hardware controllers
	2031	* incapable of generating DMAs with more than 32 bits
	2032	* of address on platforms with physical memory > 4G...
	2033	*/
	2034	queue_enter_first(&vm_lopage_queue_free,
	2035	mem,
	2036	vm_page_t,
	2037	pageq);
	2038	vm_lopage_free_count++;
	2039
	2040	if (vm_lopage_free_count >= vm_lopage_free_limit)
	2041	vm_lopage_refill = FALSE;
	2042
	2043	mem->lopage = TRUE;
	2044	} else {
	2045	mem->lopage = FALSE;
	2046	mem->free = TRUE;
	2047
	2048	color = mem->phys_page & vm_color_mask;
	2049	queue_enter_first(&vm_page_queue_free[color],
	2050	mem,
	2051	vm_page_t,
	2052	pageq);
	2053	vm_page_free_count++;
	2054	/*
	2055	* Check if we should wake up someone waiting for page.
	2056	* But don't bother waking them unless they can allocate.
	2057	*
	2058	* We wakeup only one thread, to prevent starvation.
	2059	* Because the scheduling system handles wait queues FIFO,
	2060	* if we wakeup all waiting threads, one greedy thread
	2061	* can starve multiple niceguy threads. When the threads
	2062	* all wakeup, the greedy threads runs first, grabs the page,
	2063	* and waits for another page. It will be the first to run
	2064	* when the next page is freed.
	2065	*
	2066	* However, there is a slight danger here.
	2067	* The thread we wake might not use the free page.
	2068	* Then the other threads could wait indefinitely
	2069	* while the page goes unused. To forestall this,
	2070	* the pageout daemon will keep making free pages
	2071	* as long as vm_page_free_wanted is non-zero.
	2072	*/
	2073
	2074	assert(vm_page_free_count > 0);
	2075	if (vm_page_free_wanted_privileged > 0) {
	2076	vm_page_free_wanted_privileged--;
	2077	need_priv_wakeup = 1;
	2078	} else if (vm_page_free_wanted > 0 &&
	2079	vm_page_free_count > vm_page_free_reserved) {
	2080	vm_page_free_wanted--;
	2081	need_wakeup = 1;
	2082	}
	2083	}
	2084	lck_mtx_unlock(&vm_page_queue_free_lock);
	2085
	2086	if (need_priv_wakeup)
	2087	thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
	2088	else if (need_wakeup)
	2089	thread_wakeup_one((event_t) &vm_page_free_count);
	2090
	2091	VM_CHECK_MEMORYSTATUS;
	2092	}
	2093
	2094	/*
	2095	* vm_page_wait:
	2096	*
	2097	* Wait for a page to become available.
	2098	* If there are plenty of free pages, then we don't sleep.
	2099	*
	2100	* Returns:
	2101	* TRUE: There may be another page, try again
	2102	* FALSE: We were interrupted out of our wait, don't try again
	2103	*/
	2104
	2105	boolean_t
	2106	vm_page_wait(
	2107	int interruptible )
	2108	{
	2109	/*
	2110	* We can't use vm_page_free_reserved to make this
	2111	* determination. Consider: some thread might
	2112	* need to allocate two pages. The first allocation
	2113	* succeeds, the second fails. After the first page is freed,
	2114	* a call to vm_page_wait must really block.
	2115	*/
	2116	kern_return_t wait_result;
	2117	int need_wakeup = 0;
	2118	int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
	2119
	2120	lck_mtx_lock_spin(&vm_page_queue_free_lock);
	2121
	2122	if (is_privileged && vm_page_free_count) {
	2123	lck_mtx_unlock(&vm_page_queue_free_lock);
	2124	return TRUE;
	2125	}
	2126	if (vm_page_free_count < vm_page_free_target) {
	2127
	2128	if (is_privileged) {
	2129	if (vm_page_free_wanted_privileged++ == 0)
	2130	need_wakeup = 1;
	2131	wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
	2132	} else {
	2133	if (vm_page_free_wanted++ == 0)
	2134	need_wakeup = 1;
	2135	wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
	2136	}
	2137	lck_mtx_unlock(&vm_page_queue_free_lock);
	2138	counter(c_vm_page_wait_block++);
	2139
	2140	if (need_wakeup)
	2141	thread_wakeup((event_t)&vm_page_free_wanted);
	2142
	2143	if (wait_result == THREAD_WAITING)
	2144	wait_result = thread_block(THREAD_CONTINUE_NULL);
	2145
	2146	return(wait_result == THREAD_AWAKENED);
	2147	} else {
	2148	lck_mtx_unlock(&vm_page_queue_free_lock);
	2149	return TRUE;
	2150	}
	2151	}
	2152
	2153	/*
	2154	* vm_page_alloc:
	2155	*
	2156	* Allocate and return a memory cell associated
	2157	* with this VM object/offset pair.
	2158	*
	2159	* Object must be locked.
	2160	*/
	2161
	2162	vm_page_t
	2163	vm_page_alloc(
	2164	vm_object_t object,
	2165	vm_object_offset_t offset)
	2166	{
	2167	register vm_page_t mem;
	2168
	2169	vm_object_lock_assert_exclusive(object);
	2170	mem = vm_page_grab();
	2171	if (mem == VM_PAGE_NULL)
	2172	return VM_PAGE_NULL;
	2173
	2174	vm_page_insert(mem, object, offset);
	2175
	2176	return(mem);
	2177	}
	2178
	2179	vm_page_t
	2180	vm_page_alloclo(
	2181	vm_object_t object,
	2182	vm_object_offset_t offset)
	2183	{
	2184	register vm_page_t mem;
	2185
	2186	vm_object_lock_assert_exclusive(object);
	2187	mem = vm_page_grablo();
	2188	if (mem == VM_PAGE_NULL)
	2189	return VM_PAGE_NULL;
	2190
	2191	vm_page_insert(mem, object, offset);
	2192
	2193	return(mem);
	2194	}
	2195
	2196
	2197	/*
	2198	* vm_page_alloc_guard:
	2199	*
	2200	* Allocate a fictitious page which will be used
	2201	* as a guard page. The page will be inserted into
	2202	* the object and returned to the caller.
	2203	*/
	2204
	2205	vm_page_t
	2206	vm_page_alloc_guard(
	2207	vm_object_t object,
	2208	vm_object_offset_t offset)
	2209	{
	2210	register vm_page_t mem;
	2211
	2212	vm_object_lock_assert_exclusive(object);
	2213	mem = vm_page_grab_guard();
	2214	if (mem == VM_PAGE_NULL)
	2215	return VM_PAGE_NULL;
	2216
	2217	vm_page_insert(mem, object, offset);
	2218
	2219	return(mem);
	2220	}
	2221
	2222
	2223	counter(unsigned int c_laundry_pages_freed = 0;)
	2224
	2225	/*
	2226	* vm_page_free_prepare:
	2227	*
	2228	* Removes page from any queue it may be on
	2229	* and disassociates it from its VM object.
	2230	*
	2231	* Object and page queues must be locked prior to entry.
	2232	*/
	2233	static void
	2234	vm_page_free_prepare(
	2235	vm_page_t mem)
	2236	{
	2237	vm_page_free_prepare_queues(mem);
	2238	vm_page_free_prepare_object(mem, TRUE);
	2239	}
	2240
	2241
	2242	void
	2243	vm_page_free_prepare_queues(
	2244	vm_page_t mem)
	2245	{
	2246	VM_PAGE_CHECK(mem);
	2247	assert(!mem->free);
	2248	assert(!mem->cleaning);
	2249	#if DEBUG
	2250	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	2251	if (mem->free)
	2252	panic("vm_page_free: freeing page on free list\n");
	2253	#endif
	2254	if (mem->object) {
	2255	vm_object_lock_assert_exclusive(mem->object);
	2256	}
	2257	if (mem->laundry) {
	2258	/*
	2259	* We may have to free a page while it's being laundered
	2260	* if we lost its pager (due to a forced unmount, for example).
	2261	* We need to call vm_pageout_steal_laundry() before removing
	2262	* the page from its VM object, so that we can remove it
	2263	* from its pageout queue and adjust the laundry accounting
	2264	*/
	2265	vm_pageout_steal_laundry(mem, TRUE);
	2266	counter(++c_laundry_pages_freed);
	2267	}
	2268
	2269	VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */
	2270
	2271	if (VM_PAGE_WIRED(mem)) {
	2272	if (mem->object) {
	2273	assert(mem->object->wired_page_count > 0);
	2274	mem->object->wired_page_count--;
	2275	assert(mem->object->resident_page_count >=
	2276	mem->object->wired_page_count);
	2277
	2278	if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
	2279	OSAddAtomic(+1, &vm_page_purgeable_count);
	2280	assert(vm_page_purgeable_wired_count > 0);
	2281	OSAddAtomic(-1, &vm_page_purgeable_wired_count);
	2282	}
	2283	}
	2284	if (!mem->private && !mem->fictitious)
	2285	vm_page_wire_count--;
	2286	mem->wire_count = 0;
	2287	assert(!mem->gobbled);
	2288	} else if (mem->gobbled) {
	2289	if (!mem->private && !mem->fictitious)
	2290	vm_page_wire_count--;
	2291	vm_page_gobble_count--;
	2292	}
	2293	}
	2294
	2295
	2296	void
	2297	vm_page_free_prepare_object(
	2298	vm_page_t mem,
	2299	boolean_t remove_from_hash)
	2300	{
	2301	if (mem->tabled)
	2302	vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
	2303
	2304	PAGE_WAKEUP(mem); /* clears wanted */
	2305
	2306	if (mem->private) {
	2307	mem->private = FALSE;
	2308	mem->fictitious = TRUE;
	2309	mem->phys_page = vm_page_fictitious_addr;
	2310	}
	2311	if ( !mem->fictitious) {
	2312	vm_page_init(mem, mem->phys_page, mem->lopage);
	2313	}
	2314	}
	2315
	2316
	2317	/*
	2318	* vm_page_free:
	2319	*
	2320	* Returns the given page to the free list,
	2321	* disassociating it with any VM object.
	2322	*
	2323	* Object and page queues must be locked prior to entry.
	2324	*/
	2325	void
	2326	vm_page_free(
	2327	vm_page_t mem)
	2328	{
	2329	vm_page_free_prepare(mem);
	2330
	2331	if (mem->fictitious) {
	2332	vm_page_release_fictitious(mem);
	2333	} else {
	2334	vm_page_release(mem);
	2335	}
	2336	}
	2337
	2338
	2339	void
	2340	vm_page_free_unlocked(
	2341	vm_page_t mem,
	2342	boolean_t remove_from_hash)
	2343	{
	2344	vm_page_lockspin_queues();
	2345	vm_page_free_prepare_queues(mem);
	2346	vm_page_unlock_queues();
	2347
	2348	vm_page_free_prepare_object(mem, remove_from_hash);
	2349
	2350	if (mem->fictitious) {
	2351	vm_page_release_fictitious(mem);
	2352	} else {
	2353	vm_page_release(mem);
	2354	}
	2355	}
	2356
	2357
	2358	/*
	2359	* Free a list of pages. The list can be up to several hundred pages,
	2360	* as blocked up by vm_pageout_scan().
	2361	* The big win is not having to take the free list lock once
	2362	* per page.
	2363	*/
	2364	void
	2365	vm_page_free_list(
	2366	vm_page_t freeq,
	2367	boolean_t prepare_object)
	2368	{
	2369	vm_page_t mem;
	2370	vm_page_t nxt;
	2371	vm_page_t local_freeq;
	2372	int pg_count;
	2373
	2374	while (freeq) {
	2375
	2376	pg_count = 0;
	2377	local_freeq = VM_PAGE_NULL;
	2378	mem = freeq;
	2379
	2380	/*
	2381	* break up the processing into smaller chunks so
	2382	* that we can 'pipeline' the pages onto the
	2383	* free list w/o introducing too much
	2384	* contention on the global free queue lock
	2385	*/
	2386	while (mem && pg_count < 64) {
	2387
	2388	assert(!mem->inactive);
	2389	assert(!mem->active);
	2390	assert(!mem->throttled);
	2391	assert(!mem->free);
	2392	assert(!mem->speculative);
	2393	assert(!VM_PAGE_WIRED(mem));
	2394	assert(mem->pageq.prev == NULL);
	2395
	2396	nxt = (vm_page_t)(mem->pageq.next);
	2397
	2398	if (vm_page_free_verify && !mem->fictitious && !mem->private) {
	2399	assert(pmap_verify_free(mem->phys_page));
	2400	}
	2401	if (prepare_object == TRUE)
	2402	vm_page_free_prepare_object(mem, TRUE);
	2403
	2404	if (!mem->fictitious) {
	2405	assert(mem->busy);
	2406
	2407	if ((mem->lopage == TRUE \|\| vm_lopage_refill == TRUE) &&
	2408	vm_lopage_free_count < vm_lopage_free_limit &&
	2409	mem->phys_page < max_valid_low_ppnum) {
	2410	mem->pageq.next = NULL;
	2411	vm_page_release(mem);
	2412	} else {
	2413	/*
	2414	* IMPORTANT: we can't set the page "free" here
	2415	* because that would make the page eligible for
	2416	* a physically-contiguous allocation (see
	2417	* vm_page_find_contiguous()) right away (we don't
	2418	* hold the vm_page_queue_free lock). That would
	2419	* cause trouble because the page is not actually
	2420	* in the free queue yet...
	2421	*/
	2422	mem->pageq.next = (queue_entry_t)local_freeq;
	2423	local_freeq = mem;
	2424	pg_count++;
	2425
	2426	pmap_clear_noencrypt(mem->phys_page);
	2427	}
	2428	} else {
	2429	assert(mem->phys_page == vm_page_fictitious_addr \|\|
	2430	mem->phys_page == vm_page_guard_addr);
	2431	vm_page_release_fictitious(mem);
	2432	}
	2433	mem = nxt;
	2434	}
	2435	freeq = mem;
	2436
	2437	if ( (mem = local_freeq) ) {
	2438	unsigned int avail_free_count;
	2439	unsigned int need_wakeup = 0;
	2440	unsigned int need_priv_wakeup = 0;
	2441
	2442	lck_mtx_lock_spin(&vm_page_queue_free_lock);
	2443
	2444	while (mem) {
	2445	int color;
	2446
	2447	nxt = (vm_page_t)(mem->pageq.next);
	2448
	2449	assert(!mem->free);
	2450	assert(mem->busy);
	2451	mem->free = TRUE;
	2452
	2453	color = mem->phys_page & vm_color_mask;
	2454	queue_enter_first(&vm_page_queue_free[color],
	2455	mem,
	2456	vm_page_t,
	2457	pageq);
	2458	mem = nxt;
	2459	}
	2460	vm_page_free_count += pg_count;
	2461	avail_free_count = vm_page_free_count;
	2462
	2463	if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
	2464
	2465	if (avail_free_count < vm_page_free_wanted_privileged) {
	2466	need_priv_wakeup = avail_free_count;
	2467	vm_page_free_wanted_privileged -= avail_free_count;
	2468	avail_free_count = 0;
	2469	} else {
	2470	need_priv_wakeup = vm_page_free_wanted_privileged;
	2471	vm_page_free_wanted_privileged = 0;
	2472	avail_free_count -= vm_page_free_wanted_privileged;
	2473	}
	2474	}
	2475	if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
	2476	unsigned int available_pages;
	2477
	2478	available_pages = avail_free_count - vm_page_free_reserved;
	2479
	2480	if (available_pages >= vm_page_free_wanted) {
	2481	need_wakeup = vm_page_free_wanted;
	2482	vm_page_free_wanted = 0;
	2483	} else {
	2484	need_wakeup = available_pages;
	2485	vm_page_free_wanted -= available_pages;
	2486	}
	2487	}
	2488	lck_mtx_unlock(&vm_page_queue_free_lock);
	2489
	2490	if (need_priv_wakeup != 0) {
	2491	/*
	2492	* There shouldn't be that many VM-privileged threads,
	2493	* so let's wake them all up, even if we don't quite
	2494	* have enough pages to satisfy them all.
	2495	*/
	2496	thread_wakeup((event_t)&vm_page_free_wanted_privileged);
	2497	}
	2498	if (need_wakeup != 0 && vm_page_free_wanted == 0) {
	2499	/*
	2500	* We don't expect to have any more waiters
	2501	* after this, so let's wake them all up at
	2502	* once.
	2503	*/
	2504	thread_wakeup((event_t) &vm_page_free_count);
	2505	} else for (; need_wakeup != 0; need_wakeup--) {
	2506	/*
	2507	* Wake up one waiter per page we just released.
	2508	*/
	2509	thread_wakeup_one((event_t) &vm_page_free_count);
	2510	}
	2511
	2512	VM_CHECK_MEMORYSTATUS;
	2513	}
	2514	}
	2515	}
	2516
	2517
	2518	/*
	2519	* vm_page_wire:
	2520	*
	2521	* Mark this page as wired down by yet
	2522	* another map, removing it from paging queues
	2523	* as necessary.
	2524	*
	2525	* The page's object and the page queues must be locked.
	2526	*/
	2527	void
	2528	vm_page_wire(
	2529	register vm_page_t mem)
	2530	{
	2531
	2532	// dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
	2533
	2534	VM_PAGE_CHECK(mem);
	2535	if (mem->object) {
	2536	vm_object_lock_assert_exclusive(mem->object);
	2537	} else {
	2538	/*
	2539	* In theory, the page should be in an object before it
	2540	* gets wired, since we need to hold the object lock
	2541	* to update some fields in the page structure.
	2542	* However, some code (i386 pmap, for example) might want
	2543	* to wire a page before it gets inserted into an object.
	2544	* That's somewhat OK, as long as nobody else can get to
	2545	* that page and update it at the same time.
	2546	*/
	2547	}
	2548	#if DEBUG
	2549	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	2550	#endif
	2551	if ( !VM_PAGE_WIRED(mem)) {
	2552
	2553	if (mem->pageout_queue) {
	2554	mem->pageout = FALSE;
	2555	vm_pageout_throttle_up(mem);
	2556	}
	2557	VM_PAGE_QUEUES_REMOVE(mem);
	2558
	2559	if (mem->object) {
	2560	mem->object->wired_page_count++;
	2561	assert(mem->object->resident_page_count >=
	2562	mem->object->wired_page_count);
	2563	if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
	2564	assert(vm_page_purgeable_count > 0);
	2565	OSAddAtomic(-1, &vm_page_purgeable_count);
	2566	OSAddAtomic(1, &vm_page_purgeable_wired_count);
	2567	}
	2568	if (mem->object->all_reusable) {
	2569	/*
	2570	* Wired pages are not counted as "re-usable"
	2571	* in "all_reusable" VM objects, so nothing
	2572	* to do here.
	2573	*/
	2574	} else if (mem->reusable) {
	2575	/*
	2576	* This page is not "re-usable" when it's
	2577	* wired, so adjust its state and the
	2578	* accounting.
	2579	*/
	2580	vm_object_reuse_pages(mem->object,
	2581	mem->offset,
	2582	mem->offset+PAGE_SIZE_64,
	2583	FALSE);
	2584	}
	2585	}
	2586	assert(!mem->reusable);
	2587
	2588	if (!mem->private && !mem->fictitious && !mem->gobbled)
	2589	vm_page_wire_count++;
	2590	if (mem->gobbled)
	2591	vm_page_gobble_count--;
	2592	mem->gobbled = FALSE;
	2593
	2594	VM_CHECK_MEMORYSTATUS;
	2595
	2596	/*
	2597	* ENCRYPTED SWAP:
	2598	* The page could be encrypted, but
	2599	* We don't have to decrypt it here
	2600	* because we don't guarantee that the
	2601	* data is actually valid at this point.
	2602	* The page will get decrypted in
	2603	* vm_fault_wire() if needed.
	2604	*/
	2605	}
	2606	assert(!mem->gobbled);
	2607	mem->wire_count++;
	2608	VM_PAGE_CHECK(mem);
	2609	}
	2610
	2611	/*
	2612	* vm_page_gobble:
	2613	*
	2614	* Mark this page as consumed by the vm/ipc/xmm subsystems.
	2615	*
	2616	* Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
	2617	*/
	2618	void
	2619	vm_page_gobble(
	2620	register vm_page_t mem)
	2621	{
	2622	vm_page_lockspin_queues();
	2623	VM_PAGE_CHECK(mem);
	2624
	2625	assert(!mem->gobbled);
	2626	assert( !VM_PAGE_WIRED(mem));
	2627
	2628	if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
	2629	if (!mem->private && !mem->fictitious)
	2630	vm_page_wire_count++;
	2631	}
	2632	vm_page_gobble_count++;
	2633	mem->gobbled = TRUE;
	2634	vm_page_unlock_queues();
	2635	}
	2636
	2637	/*
	2638	* vm_page_unwire:
	2639	*
	2640	* Release one wiring of this page, potentially
	2641	* enabling it to be paged again.
	2642	*
	2643	* The page's object and the page queues must be locked.
	2644	*/
	2645	void
	2646	vm_page_unwire(
	2647	vm_page_t mem,
	2648	boolean_t queueit)
	2649	{
	2650
	2651	// dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
	2652
	2653	VM_PAGE_CHECK(mem);
	2654	assert(VM_PAGE_WIRED(mem));
	2655	assert(mem->object != VM_OBJECT_NULL);
	2656	#if DEBUG
	2657	vm_object_lock_assert_exclusive(mem->object);
	2658	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	2659	#endif
	2660	if (--mem->wire_count == 0) {
	2661	assert(!mem->private && !mem->fictitious);
	2662	vm_page_wire_count--;
	2663	assert(mem->object->wired_page_count > 0);
	2664	mem->object->wired_page_count--;
	2665	assert(mem->object->resident_page_count >=
	2666	mem->object->wired_page_count);
	2667	if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
	2668	OSAddAtomic(+1, &vm_page_purgeable_count);
	2669	assert(vm_page_purgeable_wired_count > 0);
	2670	OSAddAtomic(-1, &vm_page_purgeable_wired_count);
	2671	}
	2672	assert(!mem->laundry);
	2673	assert(mem->object != kernel_object);
	2674	assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
	2675
	2676	if (queueit == TRUE) {
	2677	if (mem->object->purgable == VM_PURGABLE_EMPTY) {
	2678	vm_page_deactivate(mem);
	2679	} else {
	2680	vm_page_activate(mem);
	2681	}
	2682	}
	2683
	2684	VM_CHECK_MEMORYSTATUS;
	2685
	2686	}
	2687	VM_PAGE_CHECK(mem);
	2688	}
	2689
	2690	/*
	2691	* vm_page_deactivate:
	2692	*
	2693	* Returns the given page to the inactive list,
	2694	* indicating that no physical maps have access
	2695	* to this page. [Used by the physical mapping system.]
	2696	*
	2697	* The page queues must be locked.
	2698	*/
	2699	void
	2700	vm_page_deactivate(
	2701	vm_page_t m)
	2702	{
	2703	vm_page_deactivate_internal(m, TRUE);
	2704	}
	2705
	2706
	2707	void
	2708	vm_page_deactivate_internal(
	2709	vm_page_t m,
	2710	boolean_t clear_hw_reference)
	2711	{
	2712
	2713	VM_PAGE_CHECK(m);
	2714	assert(m->object != kernel_object);
	2715	assert(m->phys_page != vm_page_guard_addr);
	2716
	2717	// dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
	2718	#if DEBUG
	2719	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	2720	#endif
	2721	/*
	2722	* This page is no longer very interesting. If it was
	2723	* interesting (active or inactive/referenced), then we
	2724	* clear the reference bit and (re)enter it in the
	2725	* inactive queue. Note wired pages should not have
	2726	* their reference bit cleared.
	2727	*/
	2728	assert ( !(m->absent && !m->unusual));
	2729
	2730	if (m->gobbled) { /* can this happen? */
	2731	assert( !VM_PAGE_WIRED(m));
	2732
	2733	if (!m->private && !m->fictitious)
	2734	vm_page_wire_count--;
	2735	vm_page_gobble_count--;
	2736	m->gobbled = FALSE;
	2737	}
	2738	/*
	2739	* if this page is currently on the pageout queue, we can't do the
	2740	* VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
	2741	* and we can't remove it manually since we would need the object lock
	2742	* (which is not required here) to decrement the activity_in_progress
	2743	* reference which is held on the object while the page is in the pageout queue...
	2744	* just let the normal laundry processing proceed
	2745	*/
	2746	if (m->pageout_queue \|\| m->private \|\| m->fictitious \|\| (VM_PAGE_WIRED(m)))
	2747	return;
	2748
	2749	if (!m->absent && clear_hw_reference == TRUE)
	2750	pmap_clear_reference(m->phys_page);
	2751
	2752	m->reference = FALSE;
	2753	m->no_cache = FALSE;
	2754
	2755	if (!m->inactive) {
	2756	VM_PAGE_QUEUES_REMOVE(m);
	2757
	2758	if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
	2759	m->dirty && m->object->internal &&
	2760	(m->object->purgable == VM_PURGABLE_DENY \|\|
	2761	m->object->purgable == VM_PURGABLE_NONVOLATILE \|\|
	2762	m->object->purgable == VM_PURGABLE_VOLATILE)) {
	2763	queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
	2764	m->throttled = TRUE;
	2765	vm_page_throttled_count++;
	2766	} else {
	2767	if (m->object->named && m->object->ref_count == 1) {
	2768	vm_page_speculate(m, FALSE);
	2769	#if DEVELOPMENT \|\| DEBUG
	2770	vm_page_speculative_recreated++;
	2771	#endif
	2772	} else {
	2773	VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
	2774	}
	2775	}
	2776	}
	2777	}
	2778
	2779	/*
	2780	* vm_page_enqueue_cleaned
	2781	*
	2782	* Put the page on the cleaned queue, mark it cleaned, etc.
	2783	* Being on the cleaned queue (and having m->clean_queue set)
	2784	* does NOT guarantee that the page is clean!
	2785	*
	2786	* Call with the queues lock held.
	2787	*/
	2788
	2789	void vm_page_enqueue_cleaned(vm_page_t m)
	2790	{
	2791	assert(m->phys_page != vm_page_guard_addr);
	2792	#if DEBUG
	2793	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	2794	#endif
	2795	assert( !(m->absent && !m->unusual));
	2796
	2797	if (m->gobbled) {
	2798	assert( !VM_PAGE_WIRED(m));
	2799	if (!m->private && !m->fictitious)
	2800	vm_page_wire_count--;
	2801	vm_page_gobble_count--;
	2802	m->gobbled = FALSE;
	2803	}
	2804	/*
	2805	* if this page is currently on the pageout queue, we can't do the
	2806	* VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
	2807	* and we can't remove it manually since we would need the object lock
	2808	* (which is not required here) to decrement the activity_in_progress
	2809	* reference which is held on the object while the page is in the pageout queue...
	2810	* just let the normal laundry processing proceed
	2811	*/
	2812	if (m->clean_queue \|\| m->pageout_queue \|\| m->private \|\| m->fictitious)
	2813	return;
	2814
	2815	VM_PAGE_QUEUES_REMOVE(m);
	2816
	2817	queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
	2818	m->clean_queue = TRUE;
	2819	vm_page_cleaned_count++;
	2820
	2821	m->inactive = TRUE;
	2822	vm_page_inactive_count++;
	2823
	2824	vm_pageout_enqueued_cleaned++;
	2825	}
	2826
	2827	/*
	2828	* vm_page_activate:
	2829	*
	2830	* Put the specified page on the active list (if appropriate).
	2831	*
	2832	* The page queues must be locked.
	2833	*/
	2834
	2835	void
	2836	vm_page_activate(
	2837	register vm_page_t m)
	2838	{
	2839	VM_PAGE_CHECK(m);
	2840	#ifdef FIXME_4778297
	2841	assert(m->object != kernel_object);
	2842	#endif
	2843	assert(m->phys_page != vm_page_guard_addr);
	2844	#if DEBUG
	2845	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	2846	#endif
	2847	assert( !(m->absent && !m->unusual));
	2848
	2849	if (m->gobbled) {
	2850	assert( !VM_PAGE_WIRED(m));
	2851	if (!m->private && !m->fictitious)
	2852	vm_page_wire_count--;
	2853	vm_page_gobble_count--;
	2854	m->gobbled = FALSE;
	2855	}
	2856	/*
	2857	* if this page is currently on the pageout queue, we can't do the
	2858	* VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
	2859	* and we can't remove it manually since we would need the object lock
	2860	* (which is not required here) to decrement the activity_in_progress
	2861	* reference which is held on the object while the page is in the pageout queue...
	2862	* just let the normal laundry processing proceed
	2863	*/
	2864	if (m->pageout_queue \|\| m->private \|\| m->fictitious)
	2865	return;
	2866
	2867	#if DEBUG
	2868	if (m->active)
	2869	panic("vm_page_activate: already active");
	2870	#endif
	2871
	2872	if (m->speculative) {
	2873	DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
	2874	DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
	2875	}
	2876
	2877	VM_PAGE_QUEUES_REMOVE(m);
	2878
	2879	if ( !VM_PAGE_WIRED(m)) {
	2880
	2881	if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
	2882	m->dirty && m->object->internal &&
	2883	(m->object->purgable == VM_PURGABLE_DENY \|\|
	2884	m->object->purgable == VM_PURGABLE_NONVOLATILE \|\|
	2885	m->object->purgable == VM_PURGABLE_VOLATILE)) {
	2886	queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
	2887	m->throttled = TRUE;
	2888	vm_page_throttled_count++;
	2889	} else {
	2890	queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
	2891	m->active = TRUE;
	2892	vm_page_active_count++;
	2893	}
	2894	m->reference = TRUE;
	2895	m->no_cache = FALSE;
	2896	}
	2897	VM_PAGE_CHECK(m);
	2898	}
	2899
	2900
	2901	/*
	2902	* vm_page_speculate:
	2903	*
	2904	* Put the specified page on the speculative list (if appropriate).
	2905	*
	2906	* The page queues must be locked.
	2907	*/
	2908	void
	2909	vm_page_speculate(
	2910	vm_page_t m,
	2911	boolean_t new)
	2912	{
	2913	struct vm_speculative_age_q *aq;
	2914
	2915	VM_PAGE_CHECK(m);
	2916	assert(m->object != kernel_object);
	2917	assert(m->phys_page != vm_page_guard_addr);
	2918	#if DEBUG
	2919	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	2920	#endif
	2921	assert( !(m->absent && !m->unusual));
	2922
	2923	/*
	2924	* if this page is currently on the pageout queue, we can't do the
	2925	* VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
	2926	* and we can't remove it manually since we would need the object lock
	2927	* (which is not required here) to decrement the activity_in_progress
	2928	* reference which is held on the object while the page is in the pageout queue...
	2929	* just let the normal laundry processing proceed
	2930	*/
	2931	if (m->pageout_queue \|\| m->private \|\| m->fictitious)
	2932	return;
	2933
	2934	VM_PAGE_QUEUES_REMOVE(m);
	2935
	2936	if ( !VM_PAGE_WIRED(m)) {
	2937	mach_timespec_t ts;
	2938	clock_sec_t sec;
	2939	clock_nsec_t nsec;
	2940
	2941	clock_get_system_nanotime(&sec, &nsec);
	2942	ts.tv_sec = (unsigned int) sec;
	2943	ts.tv_nsec = nsec;
	2944
	2945	if (vm_page_speculative_count == 0) {
	2946
	2947	speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
	2948	speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
	2949
	2950	aq = &vm_page_queue_speculative[speculative_age_index];
	2951
	2952	/*
	2953	* set the timer to begin a new group
	2954	*/
	2955	aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
	2956	aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
	2957
	2958	ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
	2959	} else {
	2960	aq = &vm_page_queue_speculative[speculative_age_index];
	2961
	2962	if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
	2963
	2964	speculative_age_index++;
	2965
	2966	if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
	2967	speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
	2968	if (speculative_age_index == speculative_steal_index) {
	2969	speculative_steal_index = speculative_age_index + 1;
	2970
	2971	if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
	2972	speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
	2973	}
	2974	aq = &vm_page_queue_speculative[speculative_age_index];
	2975
	2976	if (!queue_empty(&aq->age_q))
	2977	vm_page_speculate_ageit(aq);
	2978
	2979	aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
	2980	aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
	2981
	2982	ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
	2983	}
	2984	}
	2985	enqueue_tail(&aq->age_q, &m->pageq);
	2986	m->speculative = TRUE;
	2987	vm_page_speculative_count++;
	2988
	2989	if (new == TRUE) {
	2990	vm_object_lock_assert_exclusive(m->object);
	2991
	2992	m->object->pages_created++;
	2993	#if DEVELOPMENT \|\| DEBUG
	2994	vm_page_speculative_created++;
	2995	#endif
	2996	}
	2997	}
	2998	VM_PAGE_CHECK(m);
	2999	}
	3000
	3001
	3002	/*
	3003	* move pages from the specified aging bin to
	3004	* the speculative bin that pageout_scan claims from
	3005	*
	3006	* The page queues must be locked.
	3007	*/
	3008	void
	3009	vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
	3010	{
	3011	struct vm_speculative_age_q *sq;
	3012	vm_page_t t;
	3013
	3014	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
	3015
	3016	if (queue_empty(&sq->age_q)) {
	3017	sq->age_q.next = aq->age_q.next;
	3018	sq->age_q.prev = aq->age_q.prev;
	3019
	3020	t = (vm_page_t)sq->age_q.next;
	3021	t->pageq.prev = &sq->age_q;
	3022
	3023	t = (vm_page_t)sq->age_q.prev;
	3024	t->pageq.next = &sq->age_q;
	3025	} else {
	3026	t = (vm_page_t)sq->age_q.prev;
	3027	t->pageq.next = aq->age_q.next;
	3028
	3029	t = (vm_page_t)aq->age_q.next;
	3030	t->pageq.prev = sq->age_q.prev;
	3031
	3032	t = (vm_page_t)aq->age_q.prev;
	3033	t->pageq.next = &sq->age_q;
	3034
	3035	sq->age_q.prev = aq->age_q.prev;
	3036	}
	3037	queue_init(&aq->age_q);
	3038	}
	3039
	3040
	3041	void
	3042	vm_page_lru(
	3043	vm_page_t m)
	3044	{
	3045	VM_PAGE_CHECK(m);
	3046	assert(m->object != kernel_object);
	3047	assert(m->phys_page != vm_page_guard_addr);
	3048
	3049	#if DEBUG
	3050	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	3051	#endif
	3052	/*
	3053	* if this page is currently on the pageout queue, we can't do the
	3054	* VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
	3055	* and we can't remove it manually since we would need the object lock
	3056	* (which is not required here) to decrement the activity_in_progress
	3057	* reference which is held on the object while the page is in the pageout queue...
	3058	* just let the normal laundry processing proceed
	3059	*/
	3060	if (m->pageout_queue \|\| m->private \|\| (VM_PAGE_WIRED(m)))
	3061	return;
	3062
	3063	m->no_cache = FALSE;
	3064
	3065	VM_PAGE_QUEUES_REMOVE(m);
	3066
	3067	VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
	3068	}
	3069
	3070
	3071	void
	3072	vm_page_reactivate_all_throttled(void)
	3073	{
	3074	vm_page_t first_throttled, last_throttled;
	3075	vm_page_t first_active;
	3076	vm_page_t m;
	3077	int extra_active_count;
	3078
	3079	if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
	3080	return;
	3081
	3082	extra_active_count = 0;
	3083	vm_page_lock_queues();
	3084	if (! queue_empty(&vm_page_queue_throttled)) {
	3085	/*
	3086	* Switch "throttled" pages to "active".
	3087	*/
	3088	queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
	3089	VM_PAGE_CHECK(m);
	3090	assert(m->throttled);
	3091	assert(!m->active);
	3092	assert(!m->inactive);
	3093	assert(!m->speculative);
	3094	assert(!VM_PAGE_WIRED(m));
	3095
	3096	extra_active_count++;
	3097
	3098	m->throttled = FALSE;
	3099	m->active = TRUE;
	3100	VM_PAGE_CHECK(m);
	3101	}
	3102
	3103	/*
	3104	* Transfer the entire throttled queue to a regular LRU page queues.
	3105	* We insert it at the head of the active queue, so that these pages
	3106	* get re-evaluated by the LRU algorithm first, since they've been
	3107	* completely out of it until now.
	3108	*/
	3109	first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
	3110	last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
	3111	first_active = (vm_page_t) queue_first(&vm_page_queue_active);
	3112	if (queue_empty(&vm_page_queue_active)) {
	3113	queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
	3114	} else {
	3115	queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
	3116	}
	3117	queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
	3118	queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
	3119	queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
	3120
	3121	#if DEBUG
	3122	printf("reactivated %d throttled pages\n", vm_page_throttled_count);
	3123	#endif
	3124	queue_init(&vm_page_queue_throttled);
	3125	/*
	3126	* Adjust the global page counts.
	3127	*/
	3128	vm_page_active_count += extra_active_count;
	3129	vm_page_throttled_count = 0;
	3130	}
	3131	assert(vm_page_throttled_count == 0);
	3132	assert(queue_empty(&vm_page_queue_throttled));
	3133	vm_page_unlock_queues();
	3134	}
	3135
	3136
	3137	/*
	3138	* move pages from the indicated local queue to the global active queue
	3139	* its ok to fail if we're below the hard limit and force == FALSE
	3140	* the nolocks == TRUE case is to allow this function to be run on
	3141	* the hibernate path
	3142	*/
	3143
	3144	void
	3145	vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
	3146	{
	3147	struct vpl *lq;
	3148	vm_page_t first_local, last_local;
	3149	vm_page_t first_active;
	3150	vm_page_t m;
	3151	uint32_t count = 0;
	3152
	3153	if (vm_page_local_q == NULL)
	3154	return;
	3155
	3156	lq = &vm_page_local_q[lid].vpl_un.vpl;
	3157
	3158	if (nolocks == FALSE) {
	3159	if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
	3160	if ( !vm_page_trylockspin_queues())
	3161	return;
	3162	} else
	3163	vm_page_lockspin_queues();
	3164
	3165	VPL_LOCK(&lq->vpl_lock);
	3166	}
	3167	if (lq->vpl_count) {
	3168	/*
	3169	* Switch "local" pages to "active".
	3170	*/
	3171	assert(!queue_empty(&lq->vpl_queue));
	3172
	3173	queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
	3174	VM_PAGE_CHECK(m);
	3175	assert(m->local);
	3176	assert(!m->active);
	3177	assert(!m->inactive);
	3178	assert(!m->speculative);
	3179	assert(!VM_PAGE_WIRED(m));
	3180	assert(!m->throttled);
	3181	assert(!m->fictitious);
	3182
	3183	if (m->local_id != lid)
	3184	panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
	3185
	3186	m->local_id = 0;
	3187	m->local = FALSE;
	3188	m->active = TRUE;
	3189	VM_PAGE_CHECK(m);
	3190
	3191	count++;
	3192	}
	3193	if (count != lq->vpl_count)
	3194	panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
	3195
	3196	/*
	3197	* Transfer the entire local queue to a regular LRU page queues.
	3198	*/
	3199	first_local = (vm_page_t) queue_first(&lq->vpl_queue);
	3200	last_local = (vm_page_t) queue_last(&lq->vpl_queue);
	3201	first_active = (vm_page_t) queue_first(&vm_page_queue_active);
	3202
	3203	if (queue_empty(&vm_page_queue_active)) {
	3204	queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
	3205	} else {
	3206	queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
	3207	}
	3208	queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
	3209	queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
	3210	queue_next(&last_local->pageq) = (queue_entry_t) first_active;
	3211
	3212	queue_init(&lq->vpl_queue);
	3213	/*
	3214	* Adjust the global page counts.
	3215	*/
	3216	vm_page_active_count += lq->vpl_count;
	3217	lq->vpl_count = 0;
	3218	}
	3219	assert(queue_empty(&lq->vpl_queue));
	3220
	3221	if (nolocks == FALSE) {
	3222	VPL_UNLOCK(&lq->vpl_lock);
	3223	vm_page_unlock_queues();
	3224	}
	3225	}
	3226
	3227	/*
	3228	* vm_page_part_zero_fill:
	3229	*
	3230	* Zero-fill a part of the page.
	3231	*/
	3232	void
	3233	vm_page_part_zero_fill(
	3234	vm_page_t m,
	3235	vm_offset_t m_pa,
	3236	vm_size_t len)
	3237	{
	3238	vm_page_t tmp;
	3239
	3240	#if 0
	3241	/*
	3242	* we don't hold the page queue lock
	3243	* so this check isn't safe to make
	3244	*/
	3245	VM_PAGE_CHECK(m);
	3246	#endif
	3247
	3248	#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
	3249	pmap_zero_part_page(m->phys_page, m_pa, len);
	3250	#else
	3251	while (1) {
	3252	tmp = vm_page_grab();
	3253	if (tmp == VM_PAGE_NULL) {
	3254	vm_page_wait(THREAD_UNINT);
	3255	continue;
	3256	}
	3257	break;
	3258	}
	3259	vm_page_zero_fill(tmp);
	3260	if(m_pa != 0) {
	3261	vm_page_part_copy(m, 0, tmp, 0, m_pa);
	3262	}
	3263	if((m_pa + len) < PAGE_SIZE) {
	3264	vm_page_part_copy(m, m_pa + len, tmp,
	3265	m_pa + len, PAGE_SIZE - (m_pa + len));
	3266	}
	3267	vm_page_copy(tmp,m);
	3268	VM_PAGE_FREE(tmp);
	3269	#endif
	3270
	3271	}
	3272
	3273	/*
	3274	* vm_page_zero_fill:
	3275	*
	3276	* Zero-fill the specified page.
	3277	*/
	3278	void
	3279	vm_page_zero_fill(
	3280	vm_page_t m)
	3281	{
	3282	XPR(XPR_VM_PAGE,
	3283	"vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
	3284	m->object, m->offset, m, 0,0);
	3285	#if 0
	3286	/*
	3287	* we don't hold the page queue lock
	3288	* so this check isn't safe to make
	3289	*/
	3290	VM_PAGE_CHECK(m);
	3291	#endif
	3292
	3293	// dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
	3294	pmap_zero_page(m->phys_page);
	3295	}
	3296
	3297	/*
	3298	* vm_page_part_copy:
	3299	*
	3300	* copy part of one page to another
	3301	*/
	3302
	3303	void
	3304	vm_page_part_copy(
	3305	vm_page_t src_m,
	3306	vm_offset_t src_pa,
	3307	vm_page_t dst_m,
	3308	vm_offset_t dst_pa,
	3309	vm_size_t len)
	3310	{
	3311	#if 0
	3312	/*
	3313	* we don't hold the page queue lock
	3314	* so this check isn't safe to make
	3315	*/
	3316	VM_PAGE_CHECK(src_m);
	3317	VM_PAGE_CHECK(dst_m);
	3318	#endif
	3319	pmap_copy_part_page(src_m->phys_page, src_pa,
	3320	dst_m->phys_page, dst_pa, len);
	3321	}
	3322
	3323	/*
	3324	* vm_page_copy:
	3325	*
	3326	* Copy one page to another
	3327	*
	3328	* ENCRYPTED SWAP:
	3329	* The source page should not be encrypted. The caller should
	3330	* make sure the page is decrypted first, if necessary.
	3331	*/
	3332
	3333	int vm_page_copy_cs_validations = 0;
	3334	int vm_page_copy_cs_tainted = 0;
	3335
	3336	void
	3337	vm_page_copy(
	3338	vm_page_t src_m,
	3339	vm_page_t dest_m)
	3340	{
	3341	XPR(XPR_VM_PAGE,
	3342	"vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
	3343	src_m->object, src_m->offset,
	3344	dest_m->object, dest_m->offset,
	3345	0);
	3346	#if 0
	3347	/*
	3348	* we don't hold the page queue lock
	3349	* so this check isn't safe to make
	3350	*/
	3351	VM_PAGE_CHECK(src_m);
	3352	VM_PAGE_CHECK(dest_m);
	3353	#endif
	3354	vm_object_lock_assert_held(src_m->object);
	3355
	3356	/*
	3357	* ENCRYPTED SWAP:
	3358	* The source page should not be encrypted at this point.
	3359	* The destination page will therefore not contain encrypted
	3360	* data after the copy.
	3361	*/
	3362	if (src_m->encrypted) {
	3363	panic("vm_page_copy: source page %p is encrypted\n", src_m);
	3364	}
	3365	dest_m->encrypted = FALSE;
	3366
	3367	if (src_m->object != VM_OBJECT_NULL &&
	3368	src_m->object->code_signed) {
	3369	/*
	3370	* We're copying a page from a code-signed object.
	3371	* Whoever ends up mapping the copy page might care about
	3372	* the original page's integrity, so let's validate the
	3373	* source page now.
	3374	*/
	3375	vm_page_copy_cs_validations++;
	3376	vm_page_validate_cs(src_m);
	3377	}
	3378
	3379	if (vm_page_is_slideable(src_m)) {
	3380	boolean_t was_busy = src_m->busy;
	3381	src_m->busy = TRUE;
	3382	(void) vm_page_slide(src_m, 0);
	3383	assert(src_m->busy);
	3384	if (!was_busy) {
	3385	PAGE_WAKEUP_DONE(src_m);
	3386	}
	3387	}
	3388
	3389	/*
	3390	* Propagate the cs_tainted bit to the copy page. Do not propagate
	3391	* the cs_validated bit.
	3392	*/
	3393	dest_m->cs_tainted = src_m->cs_tainted;
	3394	if (dest_m->cs_tainted) {
	3395	vm_page_copy_cs_tainted++;
	3396	}
	3397	dest_m->slid = src_m->slid;
	3398	dest_m->error = src_m->error; /* sliding src_m might have failed... */
	3399	pmap_copy_page(src_m->phys_page, dest_m->phys_page);
	3400	}
	3401
	3402	#if MACH_ASSERT
	3403	static void
	3404	_vm_page_print(
	3405	vm_page_t p)
	3406	{
	3407	printf("vm_page %p: \n", p);
	3408	printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
	3409	printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
	3410	printf(" next=%p\n", p->next);
	3411	printf(" object=%p offset=0x%llx\n", p->object, p->offset);
	3412	printf(" wire_count=%u\n", p->wire_count);
	3413
	3414	printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
	3415	(p->local ? "" : "!"),
	3416	(p->inactive ? "" : "!"),
	3417	(p->active ? "" : "!"),
	3418	(p->pageout_queue ? "" : "!"),
	3419	(p->speculative ? "" : "!"),
	3420	(p->laundry ? "" : "!"));
	3421	printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
	3422	(p->free ? "" : "!"),
	3423	(p->reference ? "" : "!"),
	3424	(p->gobbled ? "" : "!"),
	3425	(p->private ? "" : "!"),
	3426	(p->throttled ? "" : "!"));
	3427	printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
	3428	(p->busy ? "" : "!"),
	3429	(p->wanted ? "" : "!"),
	3430	(p->tabled ? "" : "!"),
	3431	(p->fictitious ? "" : "!"),
	3432	(p->pmapped ? "" : "!"),
	3433	(p->wpmapped ? "" : "!"));
	3434	printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
	3435	(p->pageout ? "" : "!"),
	3436	(p->absent ? "" : "!"),
	3437	(p->error ? "" : "!"),
	3438	(p->dirty ? "" : "!"),
	3439	(p->cleaning ? "" : "!"),
	3440	(p->precious ? "" : "!"),
	3441	(p->clustered ? "" : "!"));
	3442	printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
	3443	(p->overwriting ? "" : "!"),
	3444	(p->restart ? "" : "!"),
	3445	(p->unusual ? "" : "!"),
	3446	(p->encrypted ? "" : "!"),
	3447	(p->encrypted_cleaning ? "" : "!"));
	3448	printf(" %scs_validated, %scs_tainted, %sno_cache\n",
	3449	(p->cs_validated ? "" : "!"),
	3450	(p->cs_tainted ? "" : "!"),
	3451	(p->no_cache ? "" : "!"));
	3452
	3453	printf("phys_page=0x%x\n", p->phys_page);
	3454	}
	3455
	3456	/*
	3457	* Check that the list of pages is ordered by
	3458	* ascending physical address and has no holes.
	3459	*/
	3460	static int
	3461	vm_page_verify_contiguous(
	3462	vm_page_t pages,
	3463	unsigned int npages)
	3464	{
	3465	register vm_page_t m;
	3466	unsigned int page_count;
	3467	vm_offset_t prev_addr;
	3468
	3469	prev_addr = pages->phys_page;
	3470	page_count = 1;
	3471	for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
	3472	if (m->phys_page != prev_addr + 1) {
	3473	printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
	3474	m, (long)prev_addr, m->phys_page);
	3475	printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
	3476	panic("vm_page_verify_contiguous: not contiguous!");
	3477	}
	3478	prev_addr = m->phys_page;
	3479	++page_count;
	3480	}
	3481	if (page_count != npages) {
	3482	printf("pages %p actual count 0x%x but requested 0x%x\n",
	3483	pages, page_count, npages);
	3484	panic("vm_page_verify_contiguous: count error");
	3485	}
	3486	return 1;
	3487	}
	3488
	3489
	3490	/*
	3491	* Check the free lists for proper length etc.
	3492	*/
	3493	static unsigned int
	3494	vm_page_verify_free_list(
	3495	queue_head_t *vm_page_queue,
	3496	unsigned int color,
	3497	vm_page_t look_for_page,
	3498	boolean_t expect_page)
	3499	{
	3500	unsigned int npages;
	3501	vm_page_t m;
	3502	vm_page_t prev_m;
	3503	boolean_t found_page;
	3504
	3505	found_page = FALSE;
	3506	npages = 0;
	3507	prev_m = (vm_page_t) vm_page_queue;
	3508	queue_iterate(vm_page_queue,
	3509	m,
	3510	vm_page_t,
	3511	pageq) {
	3512
	3513	if (m == look_for_page) {
	3514	found_page = TRUE;
	3515	}
	3516	if ((vm_page_t) m->pageq.prev != prev_m)
	3517	panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
	3518	color, npages, m, m->pageq.prev, prev_m);
	3519	if ( ! m->busy )
	3520	panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
	3521	color, npages, m);
	3522	if (color != (unsigned int) -1) {
	3523	if ((m->phys_page & vm_color_mask) != color)
	3524	panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
	3525	color, npages, m, m->phys_page & vm_color_mask, color);
	3526	if ( ! m->free )
	3527	panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
	3528	color, npages, m);
	3529	}
	3530	++npages;
	3531	prev_m = m;
	3532	}
	3533	if (look_for_page != VM_PAGE_NULL) {
	3534	unsigned int other_color;
	3535
	3536	if (expect_page && !found_page) {
	3537	printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
	3538	color, npages, look_for_page, look_for_page->phys_page);
	3539	_vm_page_print(look_for_page);
	3540	for (other_color = 0;
	3541	other_color < vm_colors;
	3542	other_color++) {
	3543	if (other_color == color)
	3544	continue;
	3545	vm_page_verify_free_list(&vm_page_queue_free[other_color],
	3546	other_color, look_for_page, FALSE);
	3547	}
	3548	if (color == (unsigned int) -1) {
	3549	vm_page_verify_free_list(&vm_lopage_queue_free,
	3550	(unsigned int) -1, look_for_page, FALSE);
	3551	}
	3552	panic("vm_page_verify_free_list(color=%u)\n", color);
	3553	}
	3554	if (!expect_page && found_page) {
	3555	printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
	3556	color, npages, look_for_page, look_for_page->phys_page);
	3557	}
	3558	}
	3559	return npages;
	3560	}
	3561
	3562	static boolean_t vm_page_verify_free_lists_enabled = FALSE;
	3563	static void
	3564	vm_page_verify_free_lists( void )
	3565	{
	3566	unsigned int color, npages, nlopages;
	3567
	3568	if (! vm_page_verify_free_lists_enabled)
	3569	return;
	3570
	3571	npages = 0;
	3572
	3573	lck_mtx_lock(&vm_page_queue_free_lock);
	3574
	3575	for( color = 0; color < vm_colors; color++ ) {
	3576	npages += vm_page_verify_free_list(&vm_page_queue_free[color],
	3577	color, VM_PAGE_NULL, FALSE);
	3578	}
	3579	nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
	3580	(unsigned int) -1,
	3581	VM_PAGE_NULL, FALSE);
	3582	if (npages != vm_page_free_count \|\| nlopages != vm_lopage_free_count)
	3583	panic("vm_page_verify_free_lists: "
	3584	"npages %u free_count %d nlopages %u lo_free_count %u",
	3585	npages, vm_page_free_count, nlopages, vm_lopage_free_count);
	3586
	3587	lck_mtx_unlock(&vm_page_queue_free_lock);
	3588	}
	3589
	3590	void
	3591	vm_page_queues_assert(
	3592	vm_page_t mem,
	3593	int val)
	3594	{
	3595	#if DEBUG
	3596	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	3597	#endif
	3598	if (mem->free + mem->active + mem->inactive + mem->speculative +
	3599	mem->throttled + mem->pageout_queue > (val)) {
	3600	_vm_page_print(mem);
	3601	panic("vm_page_queues_assert(%p, %d)\n", mem, val);
	3602	}
	3603	if (VM_PAGE_WIRED(mem)) {
	3604	assert(!mem->active);
	3605	assert(!mem->inactive);
	3606	assert(!mem->speculative);
	3607	assert(!mem->throttled);
	3608	assert(!mem->pageout_queue);
	3609	}
	3610	}
	3611	#endif /* MACH_ASSERT */
	3612
	3613
	3614	/*
	3615	* CONTIGUOUS PAGE ALLOCATION
	3616	*
	3617	* Find a region large enough to contain at least n pages
	3618	* of contiguous physical memory.
	3619	*
	3620	* This is done by traversing the vm_page_t array in a linear fashion
	3621	* we assume that the vm_page_t array has the avaiable physical pages in an
	3622	* ordered, ascending list... this is currently true of all our implementations
	3623	* and must remain so... there can be 'holes' in the array... we also can
	3624	* no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
	3625	* which use to happen via 'vm_page_convert'... that function was no longer
	3626	* being called and was removed...
	3627	*
	3628	* The basic flow consists of stabilizing some of the interesting state of
	3629	* a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
	3630	* sweep at the beginning of the array looking for pages that meet our criterea
	3631	* for a 'stealable' page... currently we are pretty conservative... if the page
	3632	* meets this criterea and is physically contiguous to the previous page in the 'run'
	3633	* we keep developing it. If we hit a page that doesn't fit, we reset our state
	3634	* and start to develop a new run... if at this point we've already considered
	3635	* at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
	3636	* and mutex_pause (which will yield the processor), to keep the latency low w/r
	3637	* to other threads trying to acquire free pages (or move pages from q to q),
	3638	* and then continue from the spot we left off... we only make 1 pass through the
	3639	* array. Once we have a 'run' that is long enough, we'll go into the loop which
	3640	* which steals the pages from the queues they're currently on... pages on the free
	3641	* queue can be stolen directly... pages that are on any of the other queues
	3642	* must be removed from the object they are tabled on... this requires taking the
	3643	* object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
	3644	* or if the state of the page behind the vm_object lock is no longer viable, we'll
	3645	* dump the pages we've currently stolen back to the free list, and pick up our
	3646	* scan from the point where we aborted the 'current' run.
	3647	*
	3648	*
	3649	* Requirements:
	3650	* - neither vm_page_queue nor vm_free_list lock can be held on entry
	3651	*
	3652	* Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
	3653	*
	3654	* Algorithm:
	3655	*/
	3656
	3657	#define MAX_CONSIDERED_BEFORE_YIELD 1000
	3658
	3659
	3660	#define RESET_STATE_OF_RUN() \
	3661	MACRO_BEGIN \
	3662	prevcontaddr = -2; \
	3663	start_pnum = -1; \
	3664	free_considered = 0; \
	3665	substitute_needed = 0; \
	3666	npages = 0; \
	3667	MACRO_END
	3668
	3669	/*
	3670	* Can we steal in-use (i.e. not free) pages when searching for
	3671	* physically-contiguous pages ?
	3672	*/
	3673	#define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
	3674
	3675	static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
	3676	#if DEBUG
	3677	int vm_page_find_contig_debug = 0;
	3678	#endif
	3679
	3680	static vm_page_t
	3681	vm_page_find_contiguous(
	3682	unsigned int contig_pages,
	3683	ppnum_t max_pnum,
	3684	ppnum_t pnum_mask,
	3685	boolean_t wire,
	3686	int flags)
	3687	{
	3688	vm_page_t m = NULL;
	3689	ppnum_t prevcontaddr;
	3690	ppnum_t start_pnum;
	3691	unsigned int npages, considered, scanned;
	3692	unsigned int page_idx, start_idx, last_idx, orig_last_idx;
	3693	unsigned int idx_last_contig_page_found = 0;
	3694	int free_considered, free_available;
	3695	int substitute_needed;
	3696	boolean_t wrapped;
	3697	#if DEBUG
	3698	clock_sec_t tv_start_sec, tv_end_sec;
	3699	clock_usec_t tv_start_usec, tv_end_usec;
	3700	#endif
	3701	#if MACH_ASSERT
	3702	int yielded = 0;
	3703	int dumped_run = 0;
	3704	int stolen_pages = 0;
	3705	#endif
	3706
	3707	if (contig_pages == 0)
	3708	return VM_PAGE_NULL;
	3709
	3710	#if MACH_ASSERT
	3711	vm_page_verify_free_lists();
	3712	#endif
	3713	#if DEBUG
	3714	clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
	3715	#endif
	3716	vm_page_lock_queues();
	3717	lck_mtx_lock(&vm_page_queue_free_lock);
	3718
	3719	RESET_STATE_OF_RUN();
	3720
	3721	scanned = 0;
	3722	considered = 0;
	3723	free_available = vm_page_free_count - vm_page_free_reserved;
	3724
	3725	wrapped = FALSE;
	3726
	3727	if(flags & KMA_LOMEM)
	3728	idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
	3729	else
	3730	idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
	3731
	3732	orig_last_idx = idx_last_contig_page_found;
	3733	last_idx = orig_last_idx;
	3734
	3735	for (page_idx = last_idx, start_idx = last_idx;
	3736	npages < contig_pages && page_idx < vm_pages_count;
	3737	page_idx++) {
	3738	retry:
	3739	if (wrapped &&
	3740	npages == 0 &&
	3741	page_idx >= orig_last_idx) {
	3742	/*
	3743	* We're back where we started and we haven't
	3744	* found any suitable contiguous range. Let's
	3745	* give up.
	3746	*/
	3747	break;
	3748	}
	3749	scanned++;
	3750	m = &vm_pages[page_idx];
	3751
	3752	assert(!m->fictitious);
	3753	assert(!m->private);
	3754
	3755	if (max_pnum && m->phys_page > max_pnum) {
	3756	/* no more low pages... */
	3757	break;
	3758	}
	3759	if (!npages & ((m->phys_page & pnum_mask) != 0)) {
	3760	/*
	3761	* not aligned
	3762	*/
	3763	RESET_STATE_OF_RUN();
	3764
	3765	} else if (VM_PAGE_WIRED(m) \|\| m->gobbled \|\|
	3766	m->encrypted \|\| m->encrypted_cleaning \|\| m->cs_validated \|\| m->cs_tainted \|\|
	3767	m->error \|\| m->absent \|\| m->pageout_queue \|\| m->laundry \|\| m->wanted \|\| m->precious \|\|
	3768	m->cleaning \|\| m->overwriting \|\| m->restart \|\| m->unusual \|\| m->pageout) {
	3769	/*
	3770	* page is in a transient state
	3771	* or a state we don't want to deal
	3772	* with, so don't consider it which
	3773	* means starting a new run
	3774	*/
	3775	RESET_STATE_OF_RUN();
	3776
	3777	} else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
	3778	/*
	3779	* page needs to be on one of our queues
	3780	* in order for it to be stable behind the
	3781	* locks we hold at this point...
	3782	* if not, don't consider it which
	3783	* means starting a new run
	3784	*/
	3785	RESET_STATE_OF_RUN();
	3786
	3787	} else if (!m->free && (!m->tabled \|\| m->busy)) {
	3788	/*
	3789	* pages on the free list are always 'busy'
	3790	* so we couldn't test for 'busy' in the check
	3791	* for the transient states... pages that are
	3792	* 'free' are never 'tabled', so we also couldn't
	3793	* test for 'tabled'. So we check here to make
	3794	* sure that a non-free page is not busy and is
	3795	* tabled on an object...
	3796	* if not, don't consider it which
	3797	* means starting a new run
	3798	*/
	3799	RESET_STATE_OF_RUN();
	3800
	3801	} else {
	3802	if (m->phys_page != prevcontaddr + 1) {
	3803	if ((m->phys_page & pnum_mask) != 0) {
	3804	RESET_STATE_OF_RUN();
	3805	goto did_consider;
	3806	} else {
	3807	npages = 1;
	3808	start_idx = page_idx;
	3809	start_pnum = m->phys_page;
	3810	}
	3811	} else {
	3812	npages++;
	3813	}
	3814	prevcontaddr = m->phys_page;
	3815
	3816	VM_PAGE_CHECK(m);
	3817	if (m->free) {
	3818	free_considered++;
	3819	} else {
	3820	/*
	3821	* This page is not free.
	3822	* If we can't steal used pages,
	3823	* we have to give up this run
	3824	* and keep looking.
	3825	* Otherwise, we might need to
	3826	* move the contents of this page
	3827	* into a substitute page.
	3828	*/
	3829	#if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
	3830	if (m->pmapped \|\| m->dirty) {
	3831	substitute_needed++;
	3832	}
	3833	#else
	3834	RESET_STATE_OF_RUN();
	3835	#endif
	3836	}
	3837
	3838	if ((free_considered + substitute_needed) > free_available) {
	3839	/*
	3840	* if we let this run continue
	3841	* we will end up dropping the vm_page_free_count
	3842	* below the reserve limit... we need to abort
	3843	* this run, but we can at least re-consider this
	3844	* page... thus the jump back to 'retry'
	3845	*/
	3846	RESET_STATE_OF_RUN();
	3847
	3848	if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
	3849	considered++;
	3850	goto retry;
	3851	}
	3852	/*
	3853	* free_available == 0
	3854	* so can't consider any free pages... if
	3855	* we went to retry in this case, we'd
	3856	* get stuck looking at the same page
	3857	* w/o making any forward progress
	3858	* we also want to take this path if we've already
	3859	* reached our limit that controls the lock latency
	3860	*/
	3861	}
	3862	}
	3863	did_consider:
	3864	if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
	3865
	3866	lck_mtx_unlock(&vm_page_queue_free_lock);
	3867	vm_page_unlock_queues();
	3868
	3869	mutex_pause(0);
	3870
	3871	vm_page_lock_queues();
	3872	lck_mtx_lock(&vm_page_queue_free_lock);
	3873
	3874	RESET_STATE_OF_RUN();
	3875	/*
	3876	* reset our free page limit since we
	3877	* dropped the lock protecting the vm_page_free_queue
	3878	*/
	3879	free_available = vm_page_free_count - vm_page_free_reserved;
	3880	considered = 0;
	3881	#if MACH_ASSERT
	3882	yielded++;
	3883	#endif
	3884	goto retry;
	3885	}
	3886	considered++;
	3887	}
	3888	m = VM_PAGE_NULL;
	3889
	3890	if (npages != contig_pages) {
	3891	if (!wrapped) {
	3892	/*
	3893	* We didn't find a contiguous range but we didn't
	3894	* start from the very first page.
	3895	* Start again from the very first page.
	3896	*/
	3897	RESET_STATE_OF_RUN();
	3898	if( flags & KMA_LOMEM)
	3899	idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
	3900	else
	3901	idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
	3902	last_idx = 0;
	3903	page_idx = last_idx;
	3904	wrapped = TRUE;
	3905	goto retry;
	3906	}
	3907	lck_mtx_unlock(&vm_page_queue_free_lock);
	3908	} else {
	3909	vm_page_t m1;
	3910	vm_page_t m2;
	3911	unsigned int cur_idx;
	3912	unsigned int tmp_start_idx;
	3913	vm_object_t locked_object = VM_OBJECT_NULL;
	3914	boolean_t abort_run = FALSE;
	3915
	3916	assert(page_idx - start_idx == contig_pages);
	3917
	3918	tmp_start_idx = start_idx;
	3919
	3920	/*
	3921	* first pass through to pull the free pages
	3922	* off of the free queue so that in case we
	3923	* need substitute pages, we won't grab any
	3924	* of the free pages in the run... we'll clear
	3925	* the 'free' bit in the 2nd pass, and even in
	3926	* an abort_run case, we'll collect all of the
	3927	* free pages in this run and return them to the free list
	3928	*/
	3929	while (start_idx < page_idx) {
	3930
	3931	m1 = &vm_pages[start_idx++];
	3932
	3933	#if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
	3934	assert(m1->free);
	3935	#endif
	3936
	3937	if (m1->free) {
	3938	unsigned int color;
	3939
	3940	color = m1->phys_page & vm_color_mask;
	3941	#if MACH_ASSERT
	3942	vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
	3943	#endif
	3944	queue_remove(&vm_page_queue_free[color],
	3945	m1,
	3946	vm_page_t,
	3947	pageq);
	3948	m1->pageq.next = NULL;
	3949	m1->pageq.prev = NULL;
	3950	#if MACH_ASSERT
	3951	vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
	3952	#endif
	3953	/*
	3954	* Clear the "free" bit so that this page
	3955	* does not get considered for another
	3956	* concurrent physically-contiguous allocation.
	3957	*/
	3958	m1->free = FALSE;
	3959	assert(m1->busy);
	3960
	3961	vm_page_free_count--;
	3962	}
	3963	}
	3964	/*
	3965	* adjust global freelist counts
	3966	*/
	3967	if (vm_page_free_count < vm_page_free_count_minimum)
	3968	vm_page_free_count_minimum = vm_page_free_count;
	3969
	3970	if( flags & KMA_LOMEM)
	3971	vm_page_lomem_find_contiguous_last_idx = page_idx;
	3972	else
	3973	vm_page_find_contiguous_last_idx = page_idx;
	3974
	3975	/*
	3976	* we can drop the free queue lock at this point since
	3977	* we've pulled any 'free' candidates off of the list
	3978	* we need it dropped so that we can do a vm_page_grab
	3979	* when substituing for pmapped/dirty pages
	3980	*/
	3981	lck_mtx_unlock(&vm_page_queue_free_lock);
	3982
	3983	start_idx = tmp_start_idx;
	3984	cur_idx = page_idx - 1;
	3985
	3986	while (start_idx++ < page_idx) {
	3987	/*
	3988	* must go through the list from back to front
	3989	* so that the page list is created in the
	3990	* correct order - low -> high phys addresses
	3991	*/
	3992	m1 = &vm_pages[cur_idx--];
	3993
	3994	assert(!m1->free);
	3995	if (m1->object == VM_OBJECT_NULL) {
	3996	/*
	3997	* page has already been removed from
	3998	* the free list in the 1st pass
	3999	*/
	4000	assert(m1->offset == (vm_object_offset_t) -1);
	4001	assert(m1->busy);
	4002	assert(!m1->wanted);
	4003	assert(!m1->laundry);
	4004	} else {
	4005	vm_object_t object;
	4006
	4007	if (abort_run == TRUE)
	4008	continue;
	4009
	4010	object = m1->object;
	4011
	4012	if (object != locked_object) {
	4013	if (locked_object) {
	4014	vm_object_unlock(locked_object);
	4015	locked_object = VM_OBJECT_NULL;
	4016	}
	4017	if (vm_object_lock_try(object))
	4018	locked_object = object;
	4019	}
	4020	if (locked_object == VM_OBJECT_NULL \|\|
	4021	(VM_PAGE_WIRED(m1) \|\| m1->gobbled \|\|
	4022	m1->encrypted \|\| m1->encrypted_cleaning \|\| m1->cs_validated \|\| m1->cs_tainted \|\|
	4023	m1->error \|\| m1->absent \|\| m1->pageout_queue \|\| m1->laundry \|\| m1->wanted \|\| m1->precious \|\|
	4024	m1->cleaning \|\| m1->overwriting \|\| m1->restart \|\| m1->unusual \|\| m1->busy)) {
	4025
	4026	if (locked_object) {
	4027	vm_object_unlock(locked_object);
	4028	locked_object = VM_OBJECT_NULL;
	4029	}
	4030	tmp_start_idx = cur_idx;
	4031	abort_run = TRUE;
	4032	continue;
	4033	}
	4034	if (m1->pmapped \|\| m1->dirty) {
	4035	int refmod;
	4036	vm_object_offset_t offset;
	4037
	4038	m2 = vm_page_grab();
	4039
	4040	if (m2 == VM_PAGE_NULL) {
	4041	if (locked_object) {
	4042	vm_object_unlock(locked_object);
	4043	locked_object = VM_OBJECT_NULL;
	4044	}
	4045	tmp_start_idx = cur_idx;
	4046	abort_run = TRUE;
	4047	continue;
	4048	}
	4049	if (m1->pmapped)
	4050	refmod = pmap_disconnect(m1->phys_page);
	4051	else
	4052	refmod = 0;
	4053	vm_page_copy(m1, m2);
	4054
	4055	m2->reference = m1->reference;
	4056	m2->dirty = m1->dirty;
	4057
	4058	if (refmod & VM_MEM_REFERENCED)
	4059	m2->reference = TRUE;
	4060	if (refmod & VM_MEM_MODIFIED) {
	4061	SET_PAGE_DIRTY(m2, TRUE);
	4062	}
	4063	offset = m1->offset;
	4064
	4065	/*
	4066	* completely cleans up the state
	4067	* of the page so that it is ready
	4068	* to be put onto the free list, or
	4069	* for this purpose it looks like it
	4070	* just came off of the free list
	4071	*/
	4072	vm_page_free_prepare(m1);
	4073
	4074	/*
	4075	* make sure we clear the ref/mod state
	4076	* from the pmap layer... else we risk
	4077	* inheriting state from the last time
	4078	* this page was used...
	4079	*/
	4080	pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED \| VM_MEM_REFERENCED);
	4081	/*
	4082	* now put the substitute page on the object
	4083	*/
	4084	vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
	4085
	4086	if (m2->reference)
	4087	vm_page_activate(m2);
	4088	else
	4089	vm_page_deactivate(m2);
	4090
	4091	PAGE_WAKEUP_DONE(m2);
	4092
	4093	} else {
	4094	/*
	4095	* completely cleans up the state
	4096	* of the page so that it is ready
	4097	* to be put onto the free list, or
	4098	* for this purpose it looks like it
	4099	* just came off of the free list
	4100	*/
	4101	vm_page_free_prepare(m1);
	4102	}
	4103	#if MACH_ASSERT
	4104	stolen_pages++;
	4105	#endif
	4106	}
	4107	m1->pageq.next = (queue_entry_t) m;
	4108	m1->pageq.prev = NULL;
	4109	m = m1;
	4110	}
	4111	if (locked_object) {
	4112	vm_object_unlock(locked_object);
	4113	locked_object = VM_OBJECT_NULL;
	4114	}
	4115
	4116	if (abort_run == TRUE) {
	4117	if (m != VM_PAGE_NULL) {
	4118	vm_page_free_list(m, FALSE);
	4119	}
	4120	#if MACH_ASSERT
	4121	dumped_run++;
	4122	#endif
	4123	/*
	4124	* want the index of the last
	4125	* page in this run that was
	4126	* successfully 'stolen', so back
	4127	* it up 1 for the auto-decrement on use
	4128	* and 1 more to bump back over this page
	4129	*/
	4130	page_idx = tmp_start_idx + 2;
	4131	if (page_idx >= vm_pages_count) {
	4132	if (wrapped)
	4133	goto done_scanning;
	4134	page_idx = last_idx = 0;
	4135	wrapped = TRUE;
	4136	}
	4137	abort_run = FALSE;
	4138
	4139	/*
	4140	* We didn't find a contiguous range but we didn't
	4141	* start from the very first page.
	4142	* Start again from the very first page.
	4143	*/
	4144	RESET_STATE_OF_RUN();
	4145
	4146	if( flags & KMA_LOMEM)
	4147	idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
	4148	else
	4149	idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
	4150
	4151	last_idx = page_idx;
	4152
	4153	lck_mtx_lock(&vm_page_queue_free_lock);
	4154	/*
	4155	* reset our free page limit since we
	4156	* dropped the lock protecting the vm_page_free_queue
	4157	*/
	4158	free_available = vm_page_free_count - vm_page_free_reserved;
	4159	goto retry;
	4160	}
	4161
	4162	for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
	4163
	4164	if (wire == TRUE)
	4165	m1->wire_count++;
	4166	else
	4167	m1->gobbled = TRUE;
	4168	}
	4169	if (wire == FALSE)
	4170	vm_page_gobble_count += npages;
	4171
	4172	/*
	4173	* gobbled pages are also counted as wired pages
	4174	*/
	4175	vm_page_wire_count += npages;
	4176
	4177	assert(vm_page_verify_contiguous(m, npages));
	4178	}
	4179	done_scanning:
	4180	vm_page_unlock_queues();
	4181
	4182	#if DEBUG
	4183	clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
	4184
	4185	tv_end_sec -= tv_start_sec;
	4186	if (tv_end_usec < tv_start_usec) {
	4187	tv_end_sec--;
	4188	tv_end_usec += 1000000;
	4189	}
	4190	tv_end_usec -= tv_start_usec;
	4191	if (tv_end_usec >= 1000000) {
	4192	tv_end_sec++;
	4193	tv_end_sec -= 1000000;
	4194	}
	4195	if (vm_page_find_contig_debug) {
	4196	printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
	4197	__func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
	4198	(long)tv_end_sec, tv_end_usec, orig_last_idx,
	4199	scanned, yielded, dumped_run, stolen_pages);
	4200	}
	4201
	4202	#endif
	4203	#if MACH_ASSERT
	4204	vm_page_verify_free_lists();
	4205	#endif
	4206	return m;
	4207	}
	4208
	4209	/*
	4210	* Allocate a list of contiguous, wired pages.
	4211	*/
	4212	kern_return_t
	4213	cpm_allocate(
	4214	vm_size_t size,
	4215	vm_page_t *list,
	4216	ppnum_t max_pnum,
	4217	ppnum_t pnum_mask,
	4218	boolean_t wire,
	4219	int flags)
	4220	{
	4221	vm_page_t pages;
	4222	unsigned int npages;
	4223
	4224	if (size % PAGE_SIZE != 0)
	4225	return KERN_INVALID_ARGUMENT;
	4226
	4227	npages = (unsigned int) (size / PAGE_SIZE);
	4228	if (npages != size / PAGE_SIZE) {
	4229	/* 32-bit overflow */
	4230	return KERN_INVALID_ARGUMENT;
	4231	}
	4232
	4233	/*
	4234	* Obtain a pointer to a subset of the free
	4235	* list large enough to satisfy the request;
	4236	* the region will be physically contiguous.
	4237	*/
	4238	pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
	4239
	4240	if (pages == VM_PAGE_NULL)
	4241	return KERN_NO_SPACE;
	4242	/*
	4243	* determine need for wakeups
	4244	*/
	4245	if ((vm_page_free_count < vm_page_free_min) \|\|
	4246	((vm_page_free_count < vm_page_free_target) &&
	4247	((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
	4248	thread_wakeup((event_t) &vm_page_free_wanted);
	4249
	4250	VM_CHECK_MEMORYSTATUS;
	4251
	4252	/*
	4253	* The CPM pages should now be available and
	4254	* ordered by ascending physical address.
	4255	*/
	4256	assert(vm_page_verify_contiguous(pages, npages));
	4257
	4258	*list = pages;
	4259	return KERN_SUCCESS;
	4260	}
	4261
	4262
	4263	unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
	4264
	4265	/*
	4266	* when working on a 'run' of pages, it is necessary to hold
	4267	* the vm_page_queue_lock (a hot global lock) for certain operations
	4268	* on the page... however, the majority of the work can be done
	4269	* while merely holding the object lock... in fact there are certain
	4270	* collections of pages that don't require any work brokered by the
	4271	* vm_page_queue_lock... to mitigate the time spent behind the global
	4272	* lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
	4273	* while doing all of the work that doesn't require the vm_page_queue_lock...
	4274	* then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
	4275	* necessary work for each page... we will grab the busy bit on the page
	4276	* if it's not already held so that vm_page_do_delayed_work can drop the object lock
	4277	* if it can't immediately take the vm_page_queue_lock in order to compete
	4278	* for the locks in the same order that vm_pageout_scan takes them.
	4279	* the operation names are modeled after the names of the routines that
	4280	* need to be called in order to make the changes very obvious in the
	4281	* original loop
	4282	*/
	4283
	4284	void
	4285	vm_page_do_delayed_work(
	4286	vm_object_t object,
	4287	struct vm_page_delayed_work *dwp,
	4288	int dw_count)
	4289	{
	4290	int j;
	4291	vm_page_t m;
	4292	vm_page_t local_free_q = VM_PAGE_NULL;
	4293
	4294	/*
	4295	* pageout_scan takes the vm_page_lock_queues first
	4296	* then tries for the object lock... to avoid what
	4297	* is effectively a lock inversion, we'll go to the
	4298	* trouble of taking them in that same order... otherwise
	4299	* if this object contains the majority of the pages resident
	4300	* in the UBC (or a small set of large objects actively being
	4301	* worked on contain the majority of the pages), we could
	4302	* cause the pageout_scan thread to 'starve' in its attempt
	4303	* to find pages to move to the free queue, since it has to
	4304	* successfully acquire the object lock of any candidate page
	4305	* before it can steal/clean it.
	4306	*/
	4307	if (!vm_page_trylockspin_queues()) {
	4308	vm_object_unlock(object);
	4309
	4310	vm_page_lockspin_queues();
	4311
	4312	for (j = 0; ; j++) {
	4313	if (!vm_object_lock_avoid(object) &&
	4314	_vm_object_lock_try(object))
	4315	break;
	4316	vm_page_unlock_queues();
	4317	mutex_pause(j);
	4318	vm_page_lockspin_queues();
	4319	}
	4320	}
	4321	for (j = 0; j < dw_count; j++, dwp++) {
	4322
	4323	m = dwp->dw_m;
	4324
	4325	if (dwp->dw_mask & DW_vm_pageout_throttle_up)
	4326	vm_pageout_throttle_up(m);
	4327
	4328	if (dwp->dw_mask & DW_vm_page_wire)
	4329	vm_page_wire(m);
	4330	else if (dwp->dw_mask & DW_vm_page_unwire) {
	4331	boolean_t queueit;
	4332
	4333	queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
	4334
	4335	vm_page_unwire(m, queueit);
	4336	}
	4337	if (dwp->dw_mask & DW_vm_page_free) {
	4338	vm_page_free_prepare_queues(m);
	4339
	4340	assert(m->pageq.next == NULL && m->pageq.prev == NULL);
	4341	/*
	4342	* Add this page to our list of reclaimed pages,
	4343	* to be freed later.
	4344	*/
	4345	m->pageq.next = (queue_entry_t) local_free_q;
	4346	local_free_q = m;
	4347	} else {
	4348	if (dwp->dw_mask & DW_vm_page_deactivate_internal)
	4349	vm_page_deactivate_internal(m, FALSE);
	4350	else if (dwp->dw_mask & DW_vm_page_activate) {
	4351	if (m->active == FALSE) {
	4352	vm_page_activate(m);
	4353	}
	4354	}
	4355	else if (dwp->dw_mask & DW_vm_page_speculate)
	4356	vm_page_speculate(m, TRUE);
	4357	else if (dwp->dw_mask & DW_enqueue_cleaned) {
	4358	/*
	4359	* if we didn't hold the object lock and did this,
	4360	* we might disconnect the page, then someone might
	4361	* soft fault it back in, then we would put it on the
	4362	* cleaned queue, and so we would have a referenced (maybe even dirty)
	4363	* page on that queue, which we don't want
	4364	*/
	4365	int refmod_state = pmap_disconnect(m->phys_page);
	4366
	4367	if ((refmod_state & VM_MEM_REFERENCED)) {
	4368	/*
	4369	* this page has been touched since it got cleaned; let's activate it
	4370	* if it hasn't already been
	4371	*/
	4372	vm_pageout_enqueued_cleaned++;
	4373	vm_pageout_cleaned_reactivated++;
	4374	vm_pageout_cleaned_commit_reactivated++;
	4375
	4376	if (m->active == FALSE)
	4377	vm_page_activate(m);
	4378	} else {
	4379	m->reference = FALSE;
	4380	vm_page_enqueue_cleaned(m);
	4381	}
	4382	}
	4383	else if (dwp->dw_mask & DW_vm_page_lru)
	4384	vm_page_lru(m);
	4385	else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
	4386	if ( !m->pageout_queue)
	4387	VM_PAGE_QUEUES_REMOVE(m);
	4388	}
	4389	if (dwp->dw_mask & DW_set_reference)
	4390	m->reference = TRUE;
	4391	else if (dwp->dw_mask & DW_clear_reference)
	4392	m->reference = FALSE;
	4393
	4394	if (dwp->dw_mask & DW_move_page) {
	4395	if ( !m->pageout_queue) {
	4396	VM_PAGE_QUEUES_REMOVE(m);
	4397
	4398	assert(m->object != kernel_object);
	4399
	4400	VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
	4401	}
	4402	}
	4403	if (dwp->dw_mask & DW_clear_busy)
	4404	m->busy = FALSE;
	4405
	4406	if (dwp->dw_mask & DW_PAGE_WAKEUP)
	4407	PAGE_WAKEUP(m);
	4408	}
	4409	}
	4410	vm_page_unlock_queues();
	4411
	4412	if (local_free_q)
	4413	vm_page_free_list(local_free_q, TRUE);
	4414
	4415	VM_CHECK_MEMORYSTATUS;
	4416
	4417	}
	4418
	4419	kern_return_t
	4420	vm_page_alloc_list(
	4421	int page_count,
	4422	int flags,
	4423	vm_page_t *list)
	4424	{
	4425	vm_page_t lo_page_list = VM_PAGE_NULL;
	4426	vm_page_t mem;
	4427	int i;
	4428
	4429	if ( !(flags & KMA_LOMEM))
	4430	panic("vm_page_alloc_list: called w/o KMA_LOMEM");
	4431
	4432	for (i = 0; i < page_count; i++) {
	4433
	4434	mem = vm_page_grablo();
	4435
	4436	if (mem == VM_PAGE_NULL) {
	4437	if (lo_page_list)
	4438	vm_page_free_list(lo_page_list, FALSE);
	4439
	4440	*list = VM_PAGE_NULL;
	4441
	4442	return (KERN_RESOURCE_SHORTAGE);
	4443	}
	4444	mem->pageq.next = (queue_entry_t) lo_page_list;
	4445	lo_page_list = mem;
	4446	}
	4447	*list = lo_page_list;
	4448
	4449	return (KERN_SUCCESS);
	4450	}
	4451
	4452	void
	4453	vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
	4454	{
	4455	page->offset = offset;
	4456	}
	4457
	4458	vm_page_t
	4459	vm_page_get_next(vm_page_t page)
	4460	{
	4461	return ((vm_page_t) page->pageq.next);
	4462	}
	4463
	4464	vm_object_offset_t
	4465	vm_page_get_offset(vm_page_t page)
	4466	{
	4467	return (page->offset);
	4468	}
	4469
	4470	ppnum_t
	4471	vm_page_get_phys_page(vm_page_t page)
	4472	{
	4473	return (page->phys_page);
	4474	}
	4475
	4476
	4477	/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
	4478
	4479	#if HIBERNATION
	4480
	4481	static vm_page_t hibernate_gobble_queue;
	4482
	4483	extern boolean_t (* volatile consider_buffer_cache_collect)(int);
	4484
	4485	static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
	4486	static int hibernate_flush_dirty_pages(void);
	4487	static int hibernate_flush_queue(queue_head_t *, int);
	4488
	4489	void hibernate_flush_wait(void);
	4490	void hibernate_mark_in_progress(void);
	4491	void hibernate_clear_in_progress(void);
	4492
	4493
	4494	struct hibernate_statistics {
	4495	int hibernate_considered;
	4496	int hibernate_reentered_on_q;
	4497	int hibernate_found_dirty;
	4498	int hibernate_skipped_cleaning;
	4499	int hibernate_skipped_transient;
	4500	int hibernate_skipped_precious;
	4501	int hibernate_queue_nolock;
	4502	int hibernate_queue_paused;
	4503	int hibernate_throttled;
	4504	int hibernate_throttle_timeout;
	4505	int hibernate_drained;
	4506	int hibernate_drain_timeout;
	4507	int cd_lock_failed;
	4508	int cd_found_precious;
	4509	int cd_found_wired;
	4510	int cd_found_busy;
	4511	int cd_found_unusual;
	4512	int cd_found_cleaning;
	4513	int cd_found_laundry;
	4514	int cd_found_dirty;
	4515	int cd_local_free;
	4516	int cd_total_free;
	4517	int cd_vm_page_wire_count;
	4518	int cd_pages;
	4519	int cd_discarded;
	4520	int cd_count_wire;
	4521	} hibernate_stats;
	4522
	4523
	4524
	4525	static int
	4526	hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
	4527	{
	4528	wait_result_t wait_result;
	4529
	4530	vm_page_lock_queues();
	4531
	4532	while (q->pgo_laundry) {
	4533
	4534	q->pgo_draining = TRUE;
	4535
	4536	assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
	4537
	4538	vm_page_unlock_queues();
	4539
	4540	wait_result = thread_block(THREAD_CONTINUE_NULL);
	4541
	4542	if (wait_result == THREAD_TIMED_OUT) {
	4543	hibernate_stats.hibernate_drain_timeout++;
	4544	return (1);
	4545	}
	4546	vm_page_lock_queues();
	4547
	4548	hibernate_stats.hibernate_drained++;
	4549	}
	4550	vm_page_unlock_queues();
	4551
	4552	return (0);
	4553	}
	4554
	4555
	4556	static int
	4557	hibernate_flush_queue(queue_head_t *q, int qcount)
	4558	{
	4559	vm_page_t m;
	4560	vm_object_t l_object = NULL;
	4561	vm_object_t m_object = NULL;
	4562	int refmod_state = 0;
	4563	int try_failed_count = 0;
	4564	int retval = 0;
	4565	int current_run = 0;
	4566	struct vm_pageout_queue *iq;
	4567	struct vm_pageout_queue *eq;
	4568	struct vm_pageout_queue *tq;
	4569
	4570	hibernate_cleaning_in_progress = TRUE;
	4571
	4572	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) \| DBG_FUNC_START, q, qcount, 0, 0, 0);
	4573
	4574	iq = &vm_pageout_queue_internal;
	4575	eq = &vm_pageout_queue_external;
	4576
	4577	vm_page_lock_queues();
	4578
	4579	while (qcount && !queue_empty(q)) {
	4580
	4581	if (current_run++ == 1000) {
	4582	if (hibernate_should_abort()) {
	4583	retval = 1;
	4584	break;
	4585	}
	4586	current_run = 0;
	4587	}
	4588
	4589	m = (vm_page_t) queue_first(q);
	4590	m_object = m->object;
	4591
	4592	/*
	4593	* check to see if we currently are working
	4594	* with the same object... if so, we've
	4595	* already got the lock
	4596	*/
	4597	if (m_object != l_object) {
	4598	/*
	4599	* the object associated with candidate page is
	4600	* different from the one we were just working
	4601	* with... dump the lock if we still own it
	4602	*/
	4603	if (l_object != NULL) {
	4604	vm_object_unlock(l_object);
	4605	l_object = NULL;
	4606	}
	4607	/*
	4608	* Try to lock object; since we've alread got the
	4609	* page queues lock, we can only 'try' for this one.
	4610	* if the 'try' fails, we need to do a mutex_pause
	4611	* to allow the owner of the object lock a chance to
	4612	* run...
	4613	*/
	4614	if ( !vm_object_lock_try_scan(m_object)) {
	4615
	4616	if (try_failed_count > 20) {
	4617	hibernate_stats.hibernate_queue_nolock++;
	4618
	4619	goto reenter_pg_on_q;
	4620	}
	4621	vm_pageout_scan_wants_object = m_object;
	4622
	4623	vm_page_unlock_queues();
	4624	mutex_pause(try_failed_count++);
	4625	vm_page_lock_queues();
	4626
	4627	hibernate_stats.hibernate_queue_paused++;
	4628	continue;
	4629	} else {
	4630	l_object = m_object;
	4631	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	4632	}
	4633	}
	4634	if ( !m_object->alive \|\| m->encrypted_cleaning \|\| m->cleaning \|\| m->laundry \|\| m->busy \|\| m->absent \|\| m->error) {
	4635	/*
	4636	* page is not to be cleaned
	4637	* put it back on the head of its queue
	4638	*/
	4639	if (m->cleaning)
	4640	hibernate_stats.hibernate_skipped_cleaning++;
	4641	else
	4642	hibernate_stats.hibernate_skipped_transient++;
	4643
	4644	goto reenter_pg_on_q;
	4645	}
	4646	if ( !m_object->pager_initialized && m_object->pager_created)
	4647	goto reenter_pg_on_q;
	4648
	4649	if (m_object->copy == VM_OBJECT_NULL) {
	4650	if (m_object->purgable == VM_PURGABLE_VOLATILE \|\| m_object->purgable == VM_PURGABLE_EMPTY) {
	4651	/*
	4652	* let the normal hibernate image path
	4653	* deal with these
	4654	*/
	4655	goto reenter_pg_on_q;
	4656	}
	4657	}
	4658	if ( !m->dirty && m->pmapped) {
	4659	refmod_state = pmap_get_refmod(m->phys_page);
	4660
	4661	if ((refmod_state & VM_MEM_MODIFIED)) {
	4662	SET_PAGE_DIRTY(m, FALSE);
	4663	}
	4664	} else
	4665	refmod_state = 0;
	4666
	4667	if ( !m->dirty) {
	4668	/*
	4669	* page is not to be cleaned
	4670	* put it back on the head of its queue
	4671	*/
	4672	if (m->precious)
	4673	hibernate_stats.hibernate_skipped_precious++;
	4674
	4675	goto reenter_pg_on_q;
	4676	}
	4677	tq = NULL;
	4678
	4679	if (m_object->internal) {
	4680	if (VM_PAGE_Q_THROTTLED(iq))
	4681	tq = iq;
	4682	} else if (VM_PAGE_Q_THROTTLED(eq))
	4683	tq = eq;
	4684
	4685	if (tq != NULL) {
	4686	wait_result_t wait_result;
	4687	int wait_count = 5;
	4688
	4689	if (l_object != NULL) {
	4690	vm_object_unlock(l_object);
	4691	l_object = NULL;
	4692	}
	4693	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	4694
	4695	tq->pgo_throttled = TRUE;
	4696
	4697	while (retval == 0) {
	4698
	4699	assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
	4700
	4701	vm_page_unlock_queues();
	4702
	4703	wait_result = thread_block(THREAD_CONTINUE_NULL);
	4704
	4705	vm_page_lock_queues();
	4706
	4707	if (hibernate_should_abort())
	4708	retval = 1;
	4709
	4710	if (wait_result != THREAD_TIMED_OUT)
	4711	break;
	4712
	4713	if (--wait_count == 0) {
	4714	hibernate_stats.hibernate_throttle_timeout++;
	4715	retval = 1;
	4716	}
	4717	}
	4718	if (retval)
	4719	break;
	4720
	4721	hibernate_stats.hibernate_throttled++;
	4722
	4723	continue;
	4724	}
	4725	/*
	4726	* we've already factored out pages in the laundry which
	4727	* means this page can't be on the pageout queue so it's
	4728	* safe to do the VM_PAGE_QUEUES_REMOVE
	4729	*/
	4730	assert(!m->pageout_queue);
	4731
	4732	VM_PAGE_QUEUES_REMOVE(m);
	4733
	4734	vm_pageout_cluster(m, FALSE);
	4735
	4736	hibernate_stats.hibernate_found_dirty++;
	4737
	4738	goto next_pg;
	4739
	4740	reenter_pg_on_q:
	4741	queue_remove(q, m, vm_page_t, pageq);
	4742	queue_enter(q, m, vm_page_t, pageq);
	4743
	4744	hibernate_stats.hibernate_reentered_on_q++;
	4745	next_pg:
	4746	hibernate_stats.hibernate_considered++;
	4747
	4748	qcount--;
	4749	try_failed_count = 0;
	4750	}
	4751	if (l_object != NULL) {
	4752	vm_object_unlock(l_object);
	4753	l_object = NULL;
	4754	}
	4755	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	4756
	4757	vm_page_unlock_queues();
	4758
	4759	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) \| DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
	4760
	4761	hibernate_cleaning_in_progress = FALSE;
	4762
	4763	return (retval);
	4764	}
	4765
	4766
	4767	static int
	4768	hibernate_flush_dirty_pages()
	4769	{
	4770	struct vm_speculative_age_q *aq;
	4771	uint32_t i;
	4772
	4773	bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
	4774
	4775	if (vm_page_local_q) {
	4776	for (i = 0; i < vm_page_local_q_count; i++)
	4777	vm_page_reactivate_local(i, TRUE, FALSE);
	4778	}
	4779
	4780	for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
	4781	int qcount;
	4782	vm_page_t m;
	4783
	4784	aq = &vm_page_queue_speculative[i];
	4785
	4786	if (queue_empty(&aq->age_q))
	4787	continue;
	4788	qcount = 0;
	4789
	4790	vm_page_lockspin_queues();
	4791
	4792	queue_iterate(&aq->age_q,
	4793	m,
	4794	vm_page_t,
	4795	pageq)
	4796	{
	4797	qcount++;
	4798	}
	4799	vm_page_unlock_queues();
	4800
	4801	if (qcount) {
	4802	if (hibernate_flush_queue(&aq->age_q, qcount))
	4803	return (1);
	4804	}
	4805	}
	4806	if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count))
	4807	return (1);
	4808	if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
	4809	return (1);
	4810	if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
	4811	return (1);
	4812	if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
	4813	return (1);
	4814
	4815	if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
	4816	return (1);
	4817	return (hibernate_drain_pageout_queue(&vm_pageout_queue_external));
	4818	}
	4819
	4820
	4821	extern void IOSleep(unsigned int);
	4822	extern int sync_internal(void);
	4823
	4824	int
	4825	hibernate_flush_memory()
	4826	{
	4827	int retval;
	4828
	4829	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) \| DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
	4830
	4831	IOSleep(2 * 1000);
	4832
	4833	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) \| DBG_FUNC_NONE, vm_page_free_count, 0, 0, 0, 0);
	4834
	4835	if ((retval = hibernate_flush_dirty_pages()) == 0) {
	4836	if (consider_buffer_cache_collect != NULL) {
	4837
	4838	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) \| DBG_FUNC_START, vm_page_wire_count, 0, 0, 0, 0);
	4839
	4840	sync_internal();
	4841	(void)(*consider_buffer_cache_collect)(1);
	4842	consider_zone_gc(TRUE);
	4843
	4844	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) \| DBG_FUNC_END, vm_page_wire_count, 0, 0, 0, 0);
	4845	}
	4846	}
	4847	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) \| DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
	4848
	4849	HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
	4850	hibernate_stats.hibernate_considered,
	4851	hibernate_stats.hibernate_reentered_on_q,
	4852	hibernate_stats.hibernate_found_dirty);
	4853	HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n",
	4854	hibernate_stats.hibernate_skipped_cleaning,
	4855	hibernate_stats.hibernate_skipped_transient,
	4856	hibernate_stats.hibernate_skipped_precious,
	4857	hibernate_stats.hibernate_queue_nolock);
	4858	HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
	4859	hibernate_stats.hibernate_queue_paused,
	4860	hibernate_stats.hibernate_throttled,
	4861	hibernate_stats.hibernate_throttle_timeout,
	4862	hibernate_stats.hibernate_drained,
	4863	hibernate_stats.hibernate_drain_timeout);
	4864
	4865	return (retval);
	4866	}
	4867
	4868
	4869	static void
	4870	hibernate_page_list_zero(hibernate_page_list_t *list)
	4871	{
	4872	uint32_t bank;
	4873	hibernate_bitmap_t * bitmap;
	4874
	4875	bitmap = &list->bank_bitmap[0];
	4876	for (bank = 0; bank < list->bank_count; bank++)
	4877	{
	4878	uint32_t last_bit;
	4879
	4880	bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
	4881	// set out-of-bound bits at end of bitmap.
	4882	last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
	4883	if (last_bit)
	4884	bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
	4885
	4886	bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
	4887	}
	4888	}
	4889
	4890	void
	4891	hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
	4892	{
	4893	uint32_t i;
	4894	vm_page_t m;
	4895	uint64_t start, end, timeout, nsec;
	4896	clock_interval_to_deadline(free_page_time, 1000 * 1000 /ms/, &timeout);
	4897	clock_get_uptime(&start);
	4898
	4899	for (i = 0; i < gobble_count; i++)
	4900	{
	4901	while (VM_PAGE_NULL == (m = vm_page_grab()))
	4902	{
	4903	clock_get_uptime(&end);
	4904	if (end >= timeout)
	4905	break;
	4906	VM_PAGE_WAIT();
	4907	}
	4908	if (!m)
	4909	break;
	4910	m->busy = FALSE;
	4911	vm_page_gobble(m);
	4912
	4913	m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
	4914	hibernate_gobble_queue = m;
	4915	}
	4916
	4917	clock_get_uptime(&end);
	4918	absolutetime_to_nanoseconds(end - start, &nsec);
	4919	HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
	4920	}
	4921
	4922	void
	4923	hibernate_free_gobble_pages(void)
	4924	{
	4925	vm_page_t m, next;
	4926	uint32_t count = 0;
	4927
	4928	m = (vm_page_t) hibernate_gobble_queue;
	4929	while(m)
	4930	{
	4931	next = (vm_page_t) m->pageq.next;
	4932	vm_page_free(m);
	4933	count++;
	4934	m = next;
	4935	}
	4936	hibernate_gobble_queue = VM_PAGE_NULL;
	4937
	4938	if (count)
	4939	HIBLOG("Freed %d pages\n", count);
	4940	}
	4941
	4942	static boolean_t
	4943	hibernate_consider_discard(vm_page_t m)
	4944	{
	4945	vm_object_t object = NULL;
	4946	int refmod_state;
	4947	boolean_t discard = FALSE;
	4948
	4949	do
	4950	{
	4951	if (m->private)
	4952	panic("hibernate_consider_discard: private");
	4953
	4954	if (!vm_object_lock_try(m->object)) {
	4955	hibernate_stats.cd_lock_failed++;
	4956	break;
	4957	}
	4958	object = m->object;
	4959
	4960	if (VM_PAGE_WIRED(m)) {
	4961	hibernate_stats.cd_found_wired++;
	4962	break;
	4963	}
	4964	if (m->precious) {
	4965	hibernate_stats.cd_found_precious++;
	4966	break;
	4967	}
	4968	if (m->busy \|\| !object->alive) {
	4969	/*
	4970	* Somebody is playing with this page.
	4971	*/
	4972	hibernate_stats.cd_found_busy++;
	4973	break;
	4974	}
	4975	if (m->absent \|\| m->unusual \|\| m->error) {
	4976	/*
	4977	* If it's unusual in anyway, ignore it
	4978	*/
	4979	hibernate_stats.cd_found_unusual++;
	4980	break;
	4981	}
	4982	if (m->cleaning) {
	4983	hibernate_stats.cd_found_cleaning++;
	4984	break;
	4985	}
	4986	if (m->laundry) {
	4987	hibernate_stats.cd_found_laundry++;
	4988	break;
	4989	}
	4990	if (!m->dirty)
	4991	{
	4992	refmod_state = pmap_get_refmod(m->phys_page);
	4993
	4994	if (refmod_state & VM_MEM_REFERENCED)
	4995	m->reference = TRUE;
	4996	if (refmod_state & VM_MEM_MODIFIED) {
	4997	SET_PAGE_DIRTY(m, FALSE);
	4998	}
	4999	}
	5000
	5001	/*
	5002	* If it's clean or purgeable we can discard the page on wakeup.
	5003	*/
	5004	discard = (!m->dirty)
	5005	\|\| (VM_PURGABLE_VOLATILE == object->purgable)
	5006	\|\| (VM_PURGABLE_EMPTY == object->purgable);
	5007
	5008	if (discard == FALSE)
	5009	hibernate_stats.cd_found_dirty++;
	5010	}
	5011	while (FALSE);
	5012
	5013	if (object)
	5014	vm_object_unlock(object);
	5015
	5016	return (discard);
	5017	}
	5018
	5019
	5020	static void
	5021	hibernate_discard_page(vm_page_t m)
	5022	{
	5023	if (m->absent \|\| m->unusual \|\| m->error)
	5024	/*
	5025	* If it's unusual in anyway, ignore
	5026	*/
	5027	return;
	5028
	5029	#if DEBUG
	5030	vm_object_t object = m->object;
	5031	if (!vm_object_lock_try(m->object))
	5032	panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
	5033	#else
	5034	/* No need to lock page queue for token delete, hibernate_vm_unlock()
	5035	makes sure these locks are uncontended before sleep */
	5036	#endif /* !DEBUG */
	5037
	5038	if (m->pmapped == TRUE)
	5039	{
	5040	__unused int refmod_state = pmap_disconnect(m->phys_page);
	5041	}
	5042
	5043	if (m->laundry)
	5044	panic("hibernate_discard_page(%p) laundry", m);
	5045	if (m->private)
	5046	panic("hibernate_discard_page(%p) private", m);
	5047	if (m->fictitious)
	5048	panic("hibernate_discard_page(%p) fictitious", m);
	5049
	5050	if (VM_PURGABLE_VOLATILE == m->object->purgable)
	5051	{
	5052	/* object should be on a queue */
	5053	assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
	5054	purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
	5055	assert(old_queue);
	5056	vm_purgeable_token_delete_first(old_queue);
	5057	m->object->purgable = VM_PURGABLE_EMPTY;
	5058	}
	5059
	5060	vm_page_free(m);
	5061
	5062	#if DEBUG
	5063	vm_object_unlock(object);
	5064	#endif /* DEBUG */
	5065	}
	5066
	5067	/*
	5068	Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
	5069	pages known to VM to not need saving are subtracted.
	5070	Wired pages to be saved are present in page_list_wired, pageable in page_list.
	5071	*/
	5072
	5073	void
	5074	hibernate_page_list_setall(hibernate_page_list_t * page_list,
	5075	hibernate_page_list_t * page_list_wired,
	5076	hibernate_page_list_t * page_list_pal,
	5077	uint32_t * pagesOut)
	5078	{
	5079	uint64_t start, end, nsec;
	5080	vm_page_t m;
	5081	uint32_t pages = page_list->page_count;
	5082	uint32_t count_zf = 0, count_throttled = 0;
	5083	uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
	5084	uint32_t count_wire = pages;
	5085	uint32_t count_discard_active = 0;
	5086	uint32_t count_discard_inactive = 0;
	5087	uint32_t count_discard_cleaned = 0;
	5088	uint32_t count_discard_purgeable = 0;
	5089	uint32_t count_discard_speculative = 0;
	5090	uint32_t i;
	5091	uint32_t bank;
	5092	hibernate_bitmap_t * bitmap;
	5093	hibernate_bitmap_t * bitmap_wired;
	5094
	5095
	5096	HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list, page_list_wired);
	5097
	5098	#if DEBUG
	5099	vm_page_lock_queues();
	5100	if (vm_page_local_q) {
	5101	for (i = 0; i < vm_page_local_q_count; i++) {
	5102	struct vpl *lq;
	5103	lq = &vm_page_local_q[i].vpl_un.vpl;
	5104	VPL_LOCK(&lq->vpl_lock);
	5105	}
	5106	}
	5107	#endif /* DEBUG */
	5108
	5109
	5110	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) \| DBG_FUNC_START, count_wire, 0, 0, 0, 0);
	5111
	5112	clock_get_uptime(&start);
	5113
	5114	hibernate_page_list_zero(page_list);
	5115	hibernate_page_list_zero(page_list_wired);
	5116	hibernate_page_list_zero(page_list_pal);
	5117
	5118	hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
	5119	hibernate_stats.cd_pages = pages;
	5120
	5121	if (vm_page_local_q) {
	5122	for (i = 0; i < vm_page_local_q_count; i++)
	5123	vm_page_reactivate_local(i, TRUE, TRUE);
	5124	}
	5125
	5126	m = (vm_page_t) hibernate_gobble_queue;
	5127	while(m)
	5128	{
	5129	pages--;
	5130	count_wire--;
	5131	hibernate_page_bitset(page_list, TRUE, m->phys_page);
	5132	hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
	5133	m = (vm_page_t) m->pageq.next;
	5134	}
	5135
	5136	for( i = 0; i < real_ncpus; i++ )
	5137	{
	5138	if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
	5139	{
	5140	for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
	5141	{
	5142	pages--;
	5143	count_wire--;
	5144	hibernate_page_bitset(page_list, TRUE, m->phys_page);
	5145	hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
	5146
	5147	hibernate_stats.cd_local_free++;
	5148	hibernate_stats.cd_total_free++;
	5149	}
	5150	}
	5151	}
	5152
	5153	for( i = 0; i < vm_colors; i++ )
	5154	{
	5155	queue_iterate(&vm_page_queue_free[i],
	5156	m,
	5157	vm_page_t,
	5158	pageq)
	5159	{
	5160	pages--;
	5161	count_wire--;
	5162	hibernate_page_bitset(page_list, TRUE, m->phys_page);
	5163	hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
	5164
	5165	hibernate_stats.cd_total_free++;
	5166	}
	5167	}
	5168
	5169	queue_iterate(&vm_lopage_queue_free,
	5170	m,
	5171	vm_page_t,
	5172	pageq)
	5173	{
	5174	pages--;
	5175	count_wire--;
	5176	hibernate_page_bitset(page_list, TRUE, m->phys_page);
	5177	hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
	5178
	5179	hibernate_stats.cd_total_free++;
	5180	}
	5181
	5182	queue_iterate( &vm_page_queue_throttled,
	5183	m,
	5184	vm_page_t,
	5185	pageq )
	5186	{
	5187	if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
	5188	&& hibernate_consider_discard(m))
	5189	{
	5190	hibernate_page_bitset(page_list, TRUE, m->phys_page);
	5191	count_discard_inactive++;
	5192	}
	5193	else
	5194	count_throttled++;
	5195	count_wire--;
	5196	hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
	5197	}
	5198
	5199	queue_iterate( &vm_page_queue_anonymous,
	5200	m,
	5201	vm_page_t,
	5202	pageq )
	5203	{
	5204	if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
	5205	&& hibernate_consider_discard(m))
	5206	{
	5207	hibernate_page_bitset(page_list, TRUE, m->phys_page);
	5208	if (m->dirty)
	5209	count_discard_purgeable++;
	5210	else
	5211	count_discard_inactive++;
	5212	}
	5213	else
	5214	count_zf++;
	5215	count_wire--;
	5216	hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
	5217	}
	5218
	5219	queue_iterate( &vm_page_queue_inactive,
	5220	m,
	5221	vm_page_t,
	5222	pageq )
	5223	{
	5224	if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
	5225	&& hibernate_consider_discard(m))
	5226	{
	5227	hibernate_page_bitset(page_list, TRUE, m->phys_page);
	5228	if (m->dirty)
	5229	count_discard_purgeable++;
	5230	else
	5231	count_discard_inactive++;
	5232	}
	5233	else
	5234	count_inactive++;
	5235	count_wire--;
	5236	hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
	5237	}
	5238
	5239	queue_iterate( &vm_page_queue_cleaned,
	5240	m,
	5241	vm_page_t,
	5242	pageq )
	5243	{
	5244	if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
	5245	&& hibernate_consider_discard(m))
	5246	{
	5247	hibernate_page_bitset(page_list, TRUE, m->phys_page);
	5248	if (m->dirty)
	5249	count_discard_purgeable++;
	5250	else
	5251	count_discard_cleaned++;
	5252	}
	5253	else
	5254	count_cleaned++;
	5255	count_wire--;
	5256	hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
	5257	}
	5258
	5259	for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
	5260	{
	5261	queue_iterate(&vm_page_queue_speculative[i].age_q,
	5262	m,
	5263	vm_page_t,
	5264	pageq)
	5265	{
	5266	if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
	5267	&& hibernate_consider_discard(m))
	5268	{
	5269	hibernate_page_bitset(page_list, TRUE, m->phys_page);
	5270	count_discard_speculative++;
	5271	}
	5272	else
	5273	count_speculative++;
	5274	count_wire--;
	5275	hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
	5276	}
	5277	}
	5278
	5279	queue_iterate( &vm_page_queue_active,
	5280	m,
	5281	vm_page_t,
	5282	pageq )
	5283	{
	5284	if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
	5285	&& hibernate_consider_discard(m))
	5286	{
	5287	hibernate_page_bitset(page_list, TRUE, m->phys_page);
	5288	if (m->dirty)
	5289	count_discard_purgeable++;
	5290	else
	5291	count_discard_active++;
	5292	}
	5293	else
	5294	count_active++;
	5295	count_wire--;
	5296	hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
	5297	}
	5298
	5299	// pull wired from hibernate_bitmap
	5300
	5301	bitmap = &page_list->bank_bitmap[0];
	5302	bitmap_wired = &page_list_wired->bank_bitmap[0];
	5303	for (bank = 0; bank < page_list->bank_count; bank++)
	5304	{
	5305	for (i = 0; i < bitmap->bitmapwords; i++)
	5306	bitmap->bitmap[i] = bitmap->bitmap[i] \| ~bitmap_wired->bitmap[i];
	5307	bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
	5308	bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
	5309	}
	5310
	5311	// machine dependent adjustments
	5312	hibernate_page_list_setall_machine(page_list, page_list_wired, &pages);
	5313
	5314	hibernate_stats.cd_count_wire = count_wire;
	5315	hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + count_discard_speculative + count_discard_cleaned;
	5316
	5317	clock_get_uptime(&end);
	5318	absolutetime_to_nanoseconds(end - start, &nsec);
	5319	HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
	5320
	5321	HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d cleaned %d\n",
	5322	pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_zf, count_throttled,
	5323	count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
	5324
	5325	*pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
	5326
	5327	#if DEBUG
	5328	if (vm_page_local_q) {
	5329	for (i = 0; i < vm_page_local_q_count; i++) {
	5330	struct vpl *lq;
	5331	lq = &vm_page_local_q[i].vpl_un.vpl;
	5332	VPL_UNLOCK(&lq->vpl_lock);
	5333	}
	5334	}
	5335	vm_page_unlock_queues();
	5336	#endif /* DEBUG */
	5337
	5338	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) \| DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
	5339	}
	5340
	5341	void
	5342	hibernate_page_list_discard(hibernate_page_list_t * page_list)
	5343	{
	5344	uint64_t start, end, nsec;
	5345	vm_page_t m;
	5346	vm_page_t next;
	5347	uint32_t i;
	5348	uint32_t count_discard_active = 0;
	5349	uint32_t count_discard_inactive = 0;
	5350	uint32_t count_discard_purgeable = 0;
	5351	uint32_t count_discard_cleaned = 0;
	5352	uint32_t count_discard_speculative = 0;
	5353
	5354	#if DEBUG
	5355	vm_page_lock_queues();
	5356	if (vm_page_local_q) {
	5357	for (i = 0; i < vm_page_local_q_count; i++) {
	5358	struct vpl *lq;
	5359	lq = &vm_page_local_q[i].vpl_un.vpl;
	5360	VPL_LOCK(&lq->vpl_lock);
	5361	}
	5362	}
	5363	#endif /* DEBUG */
	5364
	5365	clock_get_uptime(&start);
	5366
	5367	m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
	5368	while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
	5369	{
	5370	next = (vm_page_t) m->pageq.next;
	5371	if (hibernate_page_bittst(page_list, m->phys_page))
	5372	{
	5373	if (m->dirty)
	5374	count_discard_purgeable++;
	5375	else
	5376	count_discard_inactive++;
	5377	hibernate_discard_page(m);
	5378	}
	5379	m = next;
	5380	}
	5381
	5382	for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
	5383	{
	5384	m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
	5385	while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
	5386	{
	5387	next = (vm_page_t) m->pageq.next;
	5388	if (hibernate_page_bittst(page_list, m->phys_page))
	5389	{
	5390	count_discard_speculative++;
	5391	hibernate_discard_page(m);
	5392	}
	5393	m = next;
	5394	}
	5395	}
	5396
	5397	m = (vm_page_t) queue_first(&vm_page_queue_inactive);
	5398	while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
	5399	{
	5400	next = (vm_page_t) m->pageq.next;
	5401	if (hibernate_page_bittst(page_list, m->phys_page))
	5402	{
	5403	if (m->dirty)
	5404	count_discard_purgeable++;
	5405	else
	5406	count_discard_inactive++;
	5407	hibernate_discard_page(m);
	5408	}
	5409	m = next;
	5410	}
	5411
	5412	m = (vm_page_t) queue_first(&vm_page_queue_active);
	5413	while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
	5414	{
	5415	next = (vm_page_t) m->pageq.next;
	5416	if (hibernate_page_bittst(page_list, m->phys_page))
	5417	{
	5418	if (m->dirty)
	5419	count_discard_purgeable++;
	5420	else
	5421	count_discard_active++;
	5422	hibernate_discard_page(m);
	5423	}
	5424	m = next;
	5425	}
	5426
	5427	m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
	5428	while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
	5429	{
	5430	next = (vm_page_t) m->pageq.next;
	5431	if (hibernate_page_bittst(page_list, m->phys_page))
	5432	{
	5433	if (m->dirty)
	5434	count_discard_purgeable++;
	5435	else
	5436	count_discard_cleaned++;
	5437	hibernate_discard_page(m);
	5438	}
	5439	m = next;
	5440	}
	5441
	5442	#if DEBUG
	5443	if (vm_page_local_q) {
	5444	for (i = 0; i < vm_page_local_q_count; i++) {
	5445	struct vpl *lq;
	5446	lq = &vm_page_local_q[i].vpl_un.vpl;
	5447	VPL_UNLOCK(&lq->vpl_lock);
	5448	}
	5449	}
	5450	vm_page_unlock_queues();
	5451	#endif /* DEBUG */
	5452
	5453	clock_get_uptime(&end);
	5454	absolutetime_to_nanoseconds(end - start, &nsec);
	5455	HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
	5456	nsec / 1000000ULL,
	5457	count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
	5458	}
	5459
	5460	#endif /* HIBERNATION */
	5461
	5462	/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
	5463
	5464	#include <mach_vm_debug.h>
	5465	#if MACH_VM_DEBUG
	5466
	5467	#include <mach_debug/hash_info.h>
	5468	#include <vm/vm_debug.h>
	5469
	5470	/*
	5471	* Routine: vm_page_info
	5472	* Purpose:
	5473	* Return information about the global VP table.
	5474	* Fills the buffer with as much information as possible
	5475	* and returns the desired size of the buffer.
	5476	* Conditions:
	5477	* Nothing locked. The caller should provide
	5478	* possibly-pageable memory.
	5479	*/
	5480
	5481	unsigned int
	5482	vm_page_info(
	5483	hash_info_bucket_t *info,
	5484	unsigned int count)
	5485	{
	5486	unsigned int i;
	5487	lck_spin_t *bucket_lock;
	5488
	5489	if (vm_page_bucket_count < count)
	5490	count = vm_page_bucket_count;
	5491
	5492	for (i = 0; i < count; i++) {
	5493	vm_page_bucket_t *bucket = &vm_page_buckets[i];
	5494	unsigned int bucket_count = 0;
	5495	vm_page_t m;
	5496
	5497	bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
	5498	lck_spin_lock(bucket_lock);
	5499
	5500	for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
	5501	bucket_count++;
	5502
	5503	lck_spin_unlock(bucket_lock);
	5504
	5505	/* don't touch pageable memory while holding locks */
	5506	info[i].hib_count = bucket_count;
	5507	}
	5508
	5509	return vm_page_bucket_count;
	5510	}
	5511	#endif /* MACH_VM_DEBUG */