git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2007 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*
	29	* @OSF_COPYRIGHT@
	30	*/
	31	/*
	32	* Mach Operating System
	33	* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
	34	* All Rights Reserved.
	35	*
	36	* Permission to use, copy, modify and distribute this software and its
	37	* documentation is hereby granted, provided that both the copyright
	38	* notice and this permission notice appear in all copies of the
	39	* software, derivative works or modified versions, and any portions
	40	* thereof, and that both notices appear in supporting documentation.
	41	*
	42	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	43	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
	44	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	45	*
	46	* Carnegie Mellon requests users of this software to return to
	47	*
	48	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	49	* School of Computer Science
	50	* Carnegie Mellon University
	51	* Pittsburgh PA 15213-3890
	52	*
	53	* any improvements or extensions that they make and grant Carnegie Mellon
	54	* the rights to redistribute these changes.
	55	*/
	56	/*
	57	*/
	58	/*
	59	* File: vm/vm_page.c
	60	* Author: Avadis Tevanian, Jr., Michael Wayne Young
	61	*
	62	* Resident memory management module.
	63	*/
	64
	65	#include <debug.h>
	66	#include <libkern/OSAtomic.h>
	67
	68	#include <mach/clock_types.h>
	69	#include <mach/vm_prot.h>
	70	#include <mach/vm_statistics.h>
	71	#include <mach/sdt.h>
	72	#include <kern/counters.h>
	73	#include <kern/sched_prim.h>
	74	#include <kern/task.h>
	75	#include <kern/thread.h>
	76	#include <kern/zalloc.h>
	77	#include <kern/xpr.h>
	78	#include <vm/pmap.h>
	79	#include <vm/vm_init.h>
	80	#include <vm/vm_map.h>
	81	#include <vm/vm_page.h>
	82	#include <vm/vm_pageout.h>
	83	#include <vm/vm_kern.h> /* kernel_memory_allocate() */
	84	#include <kern/misc_protos.h>
	85	#include <zone_debug.h>
	86	#include <vm/cpm.h>
	87	#include <ppc/mappings.h> /* (BRINGUP) */
	88	#include <pexpert/pexpert.h> /* (BRINGUP) */
	89
	90	#include <vm/vm_protos.h>
	91	#include <vm/memory_object.h>
	92	#include <vm/vm_purgeable_internal.h>
	93
	94	#if CONFIG_EMBEDDED
	95	#include <sys/kern_memorystatus.h>
	96	#endif
	97
	98	int speculative_age_index = 0;
	99	int speculative_steal_index = 0;
	100
	101	struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
	102
	103	static void vm_page_insert_internal(vm_page_t, vm_object_t, vm_object_offset_t, boolean_t);
	104
	105
	106	/*
	107	* Associated with page of user-allocatable memory is a
	108	* page structure.
	109	*/
	110
	111	/*
	112	* These variables record the values returned by vm_page_bootstrap,
	113	* for debugging purposes. The implementation of pmap_steal_memory
	114	* and pmap_startup here also uses them internally.
	115	*/
	116
	117	vm_offset_t virtual_space_start;
	118	vm_offset_t virtual_space_end;
	119	int vm_page_pages;
	120
	121	/*
	122	* The vm_page_lookup() routine, which provides for fast
	123	* (virtual memory object, offset) to page lookup, employs
	124	* the following hash table. The vm_page_{insert,remove}
	125	* routines install and remove associations in the table.
	126	* [This table is often called the virtual-to-physical,
	127	* or VP, table.]
	128	*/
	129	typedef struct {
	130	vm_page_t pages;
	131	#if MACH_PAGE_HASH_STATS
	132	int cur_count; /* current count */
	133	int hi_count; /* high water mark */
	134	#endif /* MACH_PAGE_HASH_STATS */
	135	} vm_page_bucket_t;
	136
	137	vm_page_bucket_t vm_page_buckets; / Array of buckets */
	138	unsigned int vm_page_bucket_count = 0; /* How big is array? */
	139	unsigned int vm_page_hash_mask; /* Mask for hash function */
	140	unsigned int vm_page_hash_shift; /* Shift for hash function */
	141	uint32_t vm_page_bucket_hash; /* Basic bucket hash */
	142	decl_simple_lock_data(,vm_page_bucket_lock)
	143
	144
	145	#if MACH_PAGE_HASH_STATS
	146	/* This routine is only for debug. It is intended to be called by
	147	* hand by a developer using a kernel debugger. This routine prints
	148	* out vm_page_hash table statistics to the kernel debug console.
	149	*/
	150	void
	151	hash_debug(void)
	152	{
	153	int i;
	154	int numbuckets = 0;
	155	int highsum = 0;
	156	int maxdepth = 0;
	157
	158	for (i = 0; i < vm_page_bucket_count; i++) {
	159	if (vm_page_buckets[i].hi_count) {
	160	numbuckets++;
	161	highsum += vm_page_buckets[i].hi_count;
	162	if (vm_page_buckets[i].hi_count > maxdepth)
	163	maxdepth = vm_page_buckets[i].hi_count;
	164	}
	165	}
	166	printf("Total number of buckets: %d\n", vm_page_bucket_count);
	167	printf("Number used buckets: %d = %d%%\n",
	168	numbuckets, 100*numbuckets/vm_page_bucket_count);
	169	printf("Number unused buckets: %d = %d%%\n",
	170	vm_page_bucket_count - numbuckets,
	171	100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
	172	printf("Sum of bucket max depth: %d\n", highsum);
	173	printf("Average bucket depth: %d.%2d\n",
	174	highsum/vm_page_bucket_count,
	175	highsum%vm_page_bucket_count);
	176	printf("Maximum bucket depth: %d\n", maxdepth);
	177	}
	178	#endif /* MACH_PAGE_HASH_STATS */
	179
	180	/*
	181	* The virtual page size is currently implemented as a runtime
	182	* variable, but is constant once initialized using vm_set_page_size.
	183	* This initialization must be done in the machine-dependent
	184	* bootstrap sequence, before calling other machine-independent
	185	* initializations.
	186	*
	187	* All references to the virtual page size outside this
	188	* module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
	189	* constants.
	190	*/
	191	vm_size_t page_size = PAGE_SIZE;
	192	vm_size_t page_mask = PAGE_MASK;
	193	int page_shift = PAGE_SHIFT;
	194
	195	/*
	196	* Resident page structures are initialized from
	197	* a template (see vm_page_alloc).
	198	*
	199	* When adding a new field to the virtual memory
	200	* object structure, be sure to add initialization
	201	* (see vm_page_bootstrap).
	202	*/
	203	struct vm_page vm_page_template;
	204
	205	vm_page_t vm_pages = VM_PAGE_NULL;
	206	unsigned int vm_pages_count = 0;
	207
	208	/*
	209	* Resident pages that represent real memory
	210	* are allocated from a set of free lists,
	211	* one per color.
	212	*/
	213	unsigned int vm_colors;
	214	unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
	215	unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
	216	queue_head_t vm_page_queue_free[MAX_COLORS];
	217	vm_page_t vm_page_queue_fictitious;
	218	unsigned int vm_page_free_wanted;
	219	unsigned int vm_page_free_wanted_privileged;
	220	unsigned int vm_page_free_count;
	221	unsigned int vm_page_fictitious_count;
	222
	223	unsigned int vm_page_free_count_minimum; /* debugging */
	224
	225	/*
	226	* Occasionally, the virtual memory system uses
	227	* resident page structures that do not refer to
	228	* real pages, for example to leave a page with
	229	* important state information in the VP table.
	230	*
	231	* These page structures are allocated the way
	232	* most other kernel structures are.
	233	*/
	234	zone_t vm_page_zone;
	235	decl_mutex_data(,vm_page_alloc_lock)
	236	unsigned int io_throttle_zero_fill;
	237
	238	/*
	239	* Fictitious pages don't have a physical address,
	240	* but we must initialize phys_page to something.
	241	* For debugging, this should be a strange value
	242	* that the pmap module can recognize in assertions.
	243	*/
	244	vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1;
	245
	246	/*
	247	* Guard pages are not accessible so they don't
	248	* need a physical address, but we need to enter
	249	* one in the pmap.
	250	* Let's make it recognizable and make sure that
	251	* we don't use a real physical page with that
	252	* physical address.
	253	*/
	254	vm_offset_t vm_page_guard_addr = (vm_offset_t) -2;
	255
	256	/*
	257	* Resident page structures are also chained on
	258	* queues that are used by the page replacement
	259	* system (pageout daemon). These queues are
	260	* defined here, but are shared by the pageout
	261	* module. The inactive queue is broken into
	262	* inactive and zf for convenience as the
	263	* pageout daemon often assignes a higher
	264	* affinity to zf pages
	265	*/
	266	queue_head_t vm_page_queue_active;
	267	queue_head_t vm_page_queue_inactive;
	268	queue_head_t vm_page_queue_zf; /* inactive memory queue for zero fill */
	269
	270	unsigned int vm_page_active_count;
	271	unsigned int vm_page_inactive_count;
	272	unsigned int vm_page_throttled_count;
	273	unsigned int vm_page_speculative_count;
	274	unsigned int vm_page_wire_count;
	275	unsigned int vm_page_gobble_count = 0;
	276	unsigned int vm_page_wire_count_warning = 0;
	277	unsigned int vm_page_gobble_count_warning = 0;
	278
	279	unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
	280	uint64_t vm_page_purged_count = 0; /* total count of purged pages */
	281
	282	unsigned int vm_page_speculative_recreated = 0;
	283	unsigned int vm_page_speculative_created = 0;
	284	unsigned int vm_page_speculative_used = 0;
	285
	286	ppnum_t vm_lopage_poolstart = 0;
	287	ppnum_t vm_lopage_poolend = 0;
	288	int vm_lopage_poolsize = 0;
	289	uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
	290
	291
	292	/*
	293	* Several page replacement parameters are also
	294	* shared with this module, so that page allocation
	295	* (done here in vm_page_alloc) can trigger the
	296	* pageout daemon.
	297	*/
	298	unsigned int vm_page_free_target = 0;
	299	unsigned int vm_page_free_min = 0;
	300	unsigned int vm_page_inactive_target = 0;
	301	unsigned int vm_page_inactive_min = 0;
	302	unsigned int vm_page_free_reserved = 0;
	303	unsigned int vm_page_zfill_throttle_count = 0;
	304
	305	/*
	306	* The VM system has a couple of heuristics for deciding
	307	* that pages are "uninteresting" and should be placed
	308	* on the inactive queue as likely candidates for replacement.
	309	* These variables let the heuristics be controlled at run-time
	310	* to make experimentation easier.
	311	*/
	312
	313	boolean_t vm_page_deactivate_hint = TRUE;
	314
	315	/*
	316	* vm_set_page_size:
	317	*
	318	* Sets the page size, perhaps based upon the memory
	319	* size. Must be called before any use of page-size
	320	* dependent functions.
	321	*
	322	* Sets page_shift and page_mask from page_size.
	323	*/
	324	void
	325	vm_set_page_size(void)
	326	{
	327	page_mask = page_size - 1;
	328
	329	if ((page_mask & page_size) != 0)
	330	panic("vm_set_page_size: page size not a power of two");
	331
	332	for (page_shift = 0; ; page_shift++)
	333	if ((1U << page_shift) == page_size)
	334	break;
	335	}
	336
	337
	338	/* Called once during statup, once the cache geometry is known.
	339	*/
	340	static void
	341	vm_page_set_colors( void )
	342	{
	343	unsigned int n, override;
	344
	345	if ( PE_parse_boot_arg("colors", &override) ) /* colors specified as a boot-arg? */
	346	n = override;
	347	else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
	348	n = vm_cache_geometry_colors;
	349	else n = DEFAULT_COLORS; /* use default if all else fails */
	350
	351	if ( n == 0 )
	352	n = 1;
	353	if ( n > MAX_COLORS )
	354	n = MAX_COLORS;
	355
	356	/* the count must be a power of 2 */
	357	if ( ( n & (n - 1)) !=0 )
	358	panic("vm_page_set_colors");
	359
	360	vm_colors = n;
	361	vm_color_mask = n - 1;
	362	}
	363
	364
	365	/*
	366	* vm_page_bootstrap:
	367	*
	368	* Initializes the resident memory module.
	369	*
	370	* Allocates memory for the page cells, and
	371	* for the object/offset-to-page hash table headers.
	372	* Each page cell is initialized and placed on the free list.
	373	* Returns the range of available kernel virtual memory.
	374	*/
	375
	376	void
	377	vm_page_bootstrap(
	378	vm_offset_t *startp,
	379	vm_offset_t *endp)
	380	{
	381	register vm_page_t m;
	382	unsigned int i;
	383	unsigned int log1;
	384	unsigned int log2;
	385	unsigned int size;
	386
	387	/*
	388	* Initialize the vm_page template.
	389	*/
	390
	391	m = &vm_page_template;
	392	m->object = VM_OBJECT_NULL; /* reset later */
	393	m->offset = (vm_object_offset_t) -1; /* reset later */
	394	m->wire_count = 0;
	395
	396	m->pageq.next = NULL;
	397	m->pageq.prev = NULL;
	398	m->listq.next = NULL;
	399	m->listq.prev = NULL;
	400
	401	m->speculative = FALSE;
	402	m->throttled = FALSE;
	403	m->inactive = FALSE;
	404	m->active = FALSE;
	405	m->no_cache = FALSE;
	406	m->laundry = FALSE;
	407	m->free = FALSE;
	408	m->pmapped = FALSE;
	409	m->reference = FALSE;
	410	m->pageout = FALSE;
	411	m->dump_cleaning = FALSE;
	412	m->list_req_pending = FALSE;
	413
	414	m->busy = TRUE;
	415	m->wanted = FALSE;
	416	m->tabled = FALSE;
	417	m->fictitious = FALSE;
	418	m->private = FALSE;
	419	m->absent = FALSE;
	420	m->error = FALSE;
	421	m->dirty = FALSE;
	422	m->cleaning = FALSE;
	423	m->precious = FALSE;
	424	m->clustered = FALSE;
	425	m->unusual = FALSE;
	426	m->restart = FALSE;
	427	m->zero_fill = FALSE;
	428	m->encrypted = FALSE;
	429	m->encrypted_cleaning = FALSE;
	430	m->deactivated = FALSE;
	431
	432	m->phys_page = 0; /* reset later */
	433
	434	/*
	435	* Initialize the page queues.
	436	*/
	437
	438	mutex_init(&vm_page_queue_free_lock, 0);
	439	mutex_init(&vm_page_queue_lock, 0);
	440
	441	mutex_init(&vm_purgeable_queue_lock, 0);
	442
	443	for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
	444	int group;
	445
	446	purgeable_queues[i].token_q_head = 0;
	447	purgeable_queues[i].token_q_tail = 0;
	448	for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
	449	queue_init(&purgeable_queues[i].objq[group]);
	450
	451	purgeable_queues[i].type = i;
	452	purgeable_queues[i].new_pages = 0;
	453	#if MACH_ASSERT
	454	purgeable_queues[i].debug_count_tokens = 0;
	455	purgeable_queues[i].debug_count_objects = 0;
	456	#endif
	457	};
	458
	459	for (i = 0; i < MAX_COLORS; i++ )
	460	queue_init(&vm_page_queue_free[i]);
	461	queue_init(&vm_lopage_queue_free);
	462	vm_page_queue_fictitious = VM_PAGE_NULL;
	463	queue_init(&vm_page_queue_active);
	464	queue_init(&vm_page_queue_inactive);
	465	queue_init(&vm_page_queue_throttled);
	466	queue_init(&vm_page_queue_zf);
	467
	468	for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
	469	queue_init(&vm_page_queue_speculative[i].age_q);
	470
	471	vm_page_queue_speculative[i].age_ts.tv_sec = 0;
	472	vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
	473	}
	474	vm_page_free_wanted = 0;
	475	vm_page_free_wanted_privileged = 0;
	476
	477	vm_page_set_colors();
	478
	479
	480	/*
	481	* Steal memory for the map and zone subsystems.
	482	*/
	483
	484	vm_map_steal_memory();
	485	zone_steal_memory();
	486
	487	/*
	488	* Allocate (and initialize) the virtual-to-physical
	489	* table hash buckets.
	490	*
	491	* The number of buckets should be a power of two to
	492	* get a good hash function. The following computation
	493	* chooses the first power of two that is greater
	494	* than the number of physical pages in the system.
	495	*/
	496
	497	simple_lock_init(&vm_page_bucket_lock, 0);
	498
	499	if (vm_page_bucket_count == 0) {
	500	unsigned int npages = pmap_free_pages();
	501
	502	vm_page_bucket_count = 1;
	503	while (vm_page_bucket_count < npages)
	504	vm_page_bucket_count <<= 1;
	505	}
	506
	507	vm_page_hash_mask = vm_page_bucket_count - 1;
	508
	509	/*
	510	* Calculate object shift value for hashing algorithm:
	511	* O = log2(sizeof(struct vm_object))
	512	* B = log2(vm_page_bucket_count)
	513	* hash shifts the object left by
	514	* B/2 - O
	515	*/
	516	size = vm_page_bucket_count;
	517	for (log1 = 0; size > 1; log1++)
	518	size /= 2;
	519	size = sizeof(struct vm_object);
	520	for (log2 = 0; size > 1; log2++)
	521	size /= 2;
	522	vm_page_hash_shift = log1/2 - log2 + 1;
	523
	524	vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
	525	vm_page_bucket_hash \|= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
	526	vm_page_bucket_hash \|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
	527
	528	if (vm_page_hash_mask & vm_page_bucket_count)
	529	printf("vm_page_bootstrap: WARNING -- strange page hash\n");
	530
	531	vm_page_buckets = (vm_page_bucket_t *)
	532	pmap_steal_memory(vm_page_bucket_count *
	533	sizeof(vm_page_bucket_t));
	534
	535	for (i = 0; i < vm_page_bucket_count; i++) {
	536	register vm_page_bucket_t *bucket = &vm_page_buckets[i];
	537
	538	bucket->pages = VM_PAGE_NULL;
	539	#if MACH_PAGE_HASH_STATS
	540	bucket->cur_count = 0;
	541	bucket->hi_count = 0;
	542	#endif /* MACH_PAGE_HASH_STATS */
	543	}
	544
	545	/*
	546	* Machine-dependent code allocates the resident page table.
	547	* It uses vm_page_init to initialize the page frames.
	548	* The code also returns to us the virtual space available
	549	* to the kernel. We don't trust the pmap module
	550	* to get the alignment right.
	551	*/
	552
	553	pmap_startup(&virtual_space_start, &virtual_space_end);
	554	virtual_space_start = round_page(virtual_space_start);
	555	virtual_space_end = trunc_page(virtual_space_end);
	556
	557	*startp = virtual_space_start;
	558	*endp = virtual_space_end;
	559
	560	/*
	561	* Compute the initial "wire" count.
	562	* Up until now, the pages which have been set aside are not under
	563	* the VM system's control, so although they aren't explicitly
	564	* wired, they nonetheless can't be moved. At this moment,
	565	* all VM managed pages are "free", courtesy of pmap_startup.
	566	*/
	567	vm_page_wire_count = atop_64(max_mem) - vm_page_free_count; /* initial value */
	568	vm_page_free_count_minimum = vm_page_free_count;
	569
	570	printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
	571	vm_page_free_count, vm_page_wire_count);
	572
	573	simple_lock_init(&vm_paging_lock, 0);
	574	}
	575
	576	#ifndef MACHINE_PAGES
	577	/*
	578	* We implement pmap_steal_memory and pmap_startup with the help
	579	* of two simpler functions, pmap_virtual_space and pmap_next_page.
	580	*/
	581
	582	void *
	583	pmap_steal_memory(
	584	vm_size_t size)
	585	{
	586	vm_offset_t addr, vaddr;
	587	ppnum_t phys_page;
	588
	589	/*
	590	* We round the size to a round multiple.
	591	*/
	592
	593	size = (size + sizeof (void ) - 1) &~ (sizeof (void ) - 1);
	594
	595	/*
	596	* If this is the first call to pmap_steal_memory,
	597	* we have to initialize ourself.
	598	*/
	599
	600	if (virtual_space_start == virtual_space_end) {
	601	pmap_virtual_space(&virtual_space_start, &virtual_space_end);
	602
	603	/*
	604	* The initial values must be aligned properly, and
	605	* we don't trust the pmap module to do it right.
	606	*/
	607
	608	virtual_space_start = round_page(virtual_space_start);
	609	virtual_space_end = trunc_page(virtual_space_end);
	610	}
	611
	612	/*
	613	* Allocate virtual memory for this request.
	614	*/
	615
	616	addr = virtual_space_start;
	617	virtual_space_start += size;
	618
	619	kprintf("pmap_steal_memory: %08X - %08X; size=%08X\n", addr, virtual_space_start, size); /* (TEST/DEBUG) */
	620
	621	/*
	622	* Allocate and map physical pages to back new virtual pages.
	623	*/
	624
	625	for (vaddr = round_page(addr);
	626	vaddr < addr + size;
	627	vaddr += PAGE_SIZE) {
	628	if (!pmap_next_page(&phys_page))
	629	panic("pmap_steal_memory");
	630
	631	/*
	632	* XXX Logically, these mappings should be wired,
	633	* but some pmap modules barf if they are.
	634	*/
	635
	636	pmap_enter(kernel_pmap, vaddr, phys_page,
	637	VM_PROT_READ\|VM_PROT_WRITE,
	638	VM_WIMG_USE_DEFAULT, FALSE);
	639	/*
	640	* Account for newly stolen memory
	641	*/
	642	vm_page_wire_count++;
	643
	644	}
	645
	646	return (void *) addr;
	647	}
	648
	649	void
	650	pmap_startup(
	651	vm_offset_t *startp,
	652	vm_offset_t *endp)
	653	{
	654	unsigned int i, npages, pages_initialized, fill, fillval;
	655	ppnum_t phys_page;
	656	addr64_t tmpaddr;
	657	unsigned int num_of_lopages = 0;
	658	unsigned int last_index;
	659
	660	/*
	661	* We calculate how many page frames we will have
	662	* and then allocate the page structures in one chunk.
	663	*/
	664
	665	tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
	666	tmpaddr = tmpaddr + (addr64_t)(round_page_32(virtual_space_start) - virtual_space_start); /* Account for any slop */
	667	npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(vm_pages))); / Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
	668
	669	vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
	670
	671	/*
	672	* Initialize the page frames.
	673	*/
	674	for (i = 0, pages_initialized = 0; i < npages; i++) {
	675	if (!pmap_next_page(&phys_page))
	676	break;
	677
	678	vm_page_init(&vm_pages[i], phys_page);
	679	vm_page_pages++;
	680	pages_initialized++;
	681	}
	682	vm_pages_count = pages_initialized;
	683
	684	/*
	685	* Check if we want to initialize pages to a known value
	686	*/
	687	fill = 0; /* Assume no fill */
	688	if (PE_parse_boot_arg("fill", &fillval)) fill = 1; /* Set fill */
	689
	690
	691	/*
	692	* if vm_lopage_poolsize is non-zero, than we need to reserve
	693	* a pool of pages whose addresess are less than 4G... this pool
	694	* is used by drivers whose hardware can't DMA beyond 32 bits...
	695	*
	696	* note that I'm assuming that the page list is ascending and
	697	* ordered w/r to the physical address
	698	*/
	699	for (i = 0, num_of_lopages = vm_lopage_poolsize; num_of_lopages && i < pages_initialized; num_of_lopages--, i++) {
	700	vm_page_t m;
	701
	702	m = &vm_pages[i];
	703
	704	if (m->phys_page >= (1 << (32 - PAGE_SHIFT)))
	705	panic("couldn't reserve the lopage pool: not enough lo pages\n");
	706
	707	if (m->phys_page < vm_lopage_poolend)
	708	panic("couldn't reserve the lopage pool: page list out of order\n");
	709
	710	vm_lopage_poolend = m->phys_page;
	711
	712	if (vm_lopage_poolstart == 0)
	713	vm_lopage_poolstart = m->phys_page;
	714	else {
	715	if (m->phys_page < vm_lopage_poolstart)
	716	panic("couldn't reserve the lopage pool: page list out of order\n");
	717	}
	718
	719	if (fill)
	720	fillPage(m->phys_page, fillval); /* Fill the page with a know value if requested at boot */
	721
	722	vm_page_release(m);
	723	}
	724	last_index = i;
	725
	726	// -debug code remove
	727	if (2 == vm_himemory_mode) {
	728	// free low -> high so high is preferred
	729	for (i = last_index + 1; i <= pages_initialized; i++) {
	730	if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
	731	vm_page_release(&vm_pages[i - 1]);
	732	}
	733	}
	734	else
	735	// debug code remove-
	736
	737	/*
	738	* Release pages in reverse order so that physical pages
	739	* initially get allocated in ascending addresses. This keeps
	740	* the devices (which must address physical memory) happy if
	741	* they require several consecutive pages.
	742	*/
	743	for (i = pages_initialized; i > last_index; i--) {
	744	if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
	745	vm_page_release(&vm_pages[i - 1]);
	746	}
	747
	748	#if 0
	749	{
	750	vm_page_t xx, xxo, xxl;
	751	int i, j, k, l;
	752
	753	j = 0; /* (BRINGUP) */
	754	xxl = 0;
	755
	756	for( i = 0; i < vm_colors; i++ ) {
	757	queue_iterate(&vm_page_queue_free[i],
	758	xx,
	759	vm_page_t,
	760	pageq) { /* BRINGUP */
	761	j++; /* (BRINGUP) */
	762	if(j > vm_page_free_count) { /* (BRINGUP) */
	763	panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
	764	}
	765
	766	l = vm_page_free_count - j; /* (BRINGUP) */
	767	k = 0; /* (BRINGUP) */
	768
	769	if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
	770
	771	for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
	772	k++;
	773	if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
	774	if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
	775	panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
	776	}
	777	}
	778
	779	xxl = xx;
	780	}
	781	}
	782
	783	if(j != vm_page_free_count) { /* (BRINGUP) */
	784	panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
	785	}
	786	}
	787	#endif
	788
	789
	790	/*
	791	* We have to re-align virtual_space_start,
	792	* because pmap_steal_memory has been using it.
	793	*/
	794
	795	virtual_space_start = round_page_32(virtual_space_start);
	796
	797	*startp = virtual_space_start;
	798	*endp = virtual_space_end;
	799	}
	800	#endif /* MACHINE_PAGES */
	801
	802	/*
	803	* Routine: vm_page_module_init
	804	* Purpose:
	805	* Second initialization pass, to be done after
	806	* the basic VM system is ready.
	807	*/
	808	void
	809	vm_page_module_init(void)
	810	{
	811	vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
	812	0, PAGE_SIZE, "vm pages");
	813
	814	#if ZONE_DEBUG
	815	zone_debug_disable(vm_page_zone);
	816	#endif /* ZONE_DEBUG */
	817
	818	zone_change(vm_page_zone, Z_EXPAND, FALSE);
	819	zone_change(vm_page_zone, Z_EXHAUST, TRUE);
	820	zone_change(vm_page_zone, Z_FOREIGN, TRUE);
	821
	822	/*
	823	* Adjust zone statistics to account for the real pages allocated
	824	* in vm_page_create(). [Q: is this really what we want?]
	825	*/
	826	vm_page_zone->count += vm_page_pages;
	827	vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
	828
	829	mutex_init(&vm_page_alloc_lock, 0);
	830	}
	831
	832	/*
	833	* Routine: vm_page_create
	834	* Purpose:
	835	* After the VM system is up, machine-dependent code
	836	* may stumble across more physical memory. For example,
	837	* memory that it was reserving for a frame buffer.
	838	* vm_page_create turns this memory into available pages.
	839	*/
	840
	841	void
	842	vm_page_create(
	843	ppnum_t start,
	844	ppnum_t end)
	845	{
	846	ppnum_t phys_page;
	847	vm_page_t m;
	848
	849	for (phys_page = start;
	850	phys_page < end;
	851	phys_page++) {
	852	while ((m = (vm_page_t) vm_page_grab_fictitious())
	853	== VM_PAGE_NULL)
	854	vm_page_more_fictitious();
	855
	856	vm_page_init(m, phys_page);
	857	vm_page_pages++;
	858	vm_page_release(m);
	859	}
	860	}
	861
	862	/*
	863	* vm_page_hash:
	864	*
	865	* Distributes the object/offset key pair among hash buckets.
	866	*
	867	* NOTE: The bucket count must be a power of 2
	868	*/
	869	#define vm_page_hash(object, offset) (\
	870	( (natural_t)((uint32_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
	871	& vm_page_hash_mask)
	872
	873
	874	/*
	875	* vm_page_insert: [ internal use only ]
	876	*
	877	* Inserts the given mem entry into the object/object-page
	878	* table and object list.
	879	*
	880	* The object must be locked.
	881	*/
	882	void
	883	vm_page_insert(
	884	vm_page_t mem,
	885	vm_object_t object,
	886	vm_object_offset_t offset)
	887	{
	888	vm_page_insert_internal(mem, object, offset, FALSE);
	889	}
	890
	891
	892	static void
	893	vm_page_insert_internal(
	894	vm_page_t mem,
	895	vm_object_t object,
	896	vm_object_offset_t offset,
	897	boolean_t queues_lock_held)
	898	{
	899	register vm_page_bucket_t *bucket;
	900
	901	XPR(XPR_VM_PAGE,
	902	"vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
	903	(integer_t)object, (integer_t)offset, (integer_t)mem, 0,0);
	904
	905	VM_PAGE_CHECK(mem);
	906
	907	if (object == vm_submap_object) {
	908	/* the vm_submap_object is only a placeholder for submaps */
	909	panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
	910	}
	911
	912	vm_object_lock_assert_exclusive(object);
	913	#if DEBUG
	914	if (mem->tabled \|\| mem->object != VM_OBJECT_NULL)
	915	panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
	916	"already in (obj=%p,off=0x%llx)",
	917	mem, object, offset, mem->object, mem->offset);
	918	#endif
	919	assert(!object->internal \|\| offset < object->size);
	920
	921	/* only insert "pageout" pages into "pageout" objects,
	922	* and normal pages into normal objects */
	923	assert(object->pageout == mem->pageout);
	924
	925	assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
	926
	927	/*
	928	* Record the object/offset pair in this page
	929	*/
	930
	931	mem->object = object;
	932	mem->offset = offset;
	933
	934	/*
	935	* Insert it into the object_object/offset hash table
	936	*/
	937
	938	bucket = &vm_page_buckets[vm_page_hash(object, offset)];
	939	simple_lock(&vm_page_bucket_lock);
	940	mem->next = bucket->pages;
	941	bucket->pages = mem;
	942	#if MACH_PAGE_HASH_STATS
	943	if (++bucket->cur_count > bucket->hi_count)
	944	bucket->hi_count = bucket->cur_count;
	945	#endif /* MACH_PAGE_HASH_STATS */
	946	simple_unlock(&vm_page_bucket_lock);
	947
	948	/*
	949	* Now link into the object's list of backed pages.
	950	*/
	951
	952	VM_PAGE_INSERT(mem, object);
	953	mem->tabled = TRUE;
	954
	955	/*
	956	* Show that the object has one more resident page.
	957	*/
	958
	959	object->resident_page_count++;
	960
	961	if (object->purgable == VM_PURGABLE_VOLATILE \|\|
	962	object->purgable == VM_PURGABLE_EMPTY) {
	963	if (queues_lock_held == FALSE)
	964	vm_page_lockspin_queues();
	965
	966	vm_page_purgeable_count++;
	967
	968	if (queues_lock_held == FALSE)
	969	vm_page_unlock_queues();
	970	}
	971	}
	972
	973	/*
	974	* vm_page_replace:
	975	*
	976	* Exactly like vm_page_insert, except that we first
	977	* remove any existing page at the given offset in object.
	978	*
	979	* The object and page queues must be locked.
	980	*/
	981
	982	void
	983	vm_page_replace(
	984	register vm_page_t mem,
	985	register vm_object_t object,
	986	register vm_object_offset_t offset)
	987	{
	988	vm_page_bucket_t *bucket;
	989	vm_page_t found_m = VM_PAGE_NULL;
	990
	991	VM_PAGE_CHECK(mem);
	992	vm_object_lock_assert_exclusive(object);
	993	#if DEBUG
	994	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	995
	996	if (mem->tabled \|\| mem->object != VM_OBJECT_NULL)
	997	panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
	998	"already in (obj=%p,off=0x%llx)",
	999	mem, object, offset, mem->object, mem->offset);
	1000	#endif
	1001	/*
	1002	* Record the object/offset pair in this page
	1003	*/
	1004
	1005	mem->object = object;
	1006	mem->offset = offset;
	1007
	1008	/*
	1009	* Insert it into the object_object/offset hash table,
	1010	* replacing any page that might have been there.
	1011	*/
	1012
	1013	bucket = &vm_page_buckets[vm_page_hash(object, offset)];
	1014	simple_lock(&vm_page_bucket_lock);
	1015
	1016	if (bucket->pages) {
	1017	vm_page_t *mp = &bucket->pages;
	1018	register vm_page_t m = *mp;
	1019
	1020	do {
	1021	if (m->object == object && m->offset == offset) {
	1022	/*
	1023	* Remove old page from hash list
	1024	*/
	1025	*mp = m->next;
	1026
	1027	found_m = m;
	1028	break;
	1029	}
	1030	mp = &m->next;
	1031	} while ((m = *mp));
	1032
	1033	mem->next = bucket->pages;
	1034	} else {
	1035	mem->next = VM_PAGE_NULL;
	1036	}
	1037	/*
	1038	* insert new page at head of hash list
	1039	*/
	1040	bucket->pages = mem;
	1041
	1042	simple_unlock(&vm_page_bucket_lock);
	1043
	1044	if (found_m) {
	1045	/*
	1046	* there was already a page at the specified
	1047	* offset for this object... remove it from
	1048	* the object and free it back to the free list
	1049	*/
	1050	VM_PAGE_REMOVE(found_m);
	1051	found_m->tabled = FALSE;
	1052
	1053	found_m->object = VM_OBJECT_NULL;
	1054	found_m->offset = (vm_object_offset_t) -1;
	1055	object->resident_page_count--;
	1056
	1057	if (object->purgable == VM_PURGABLE_VOLATILE \|\|
	1058	object->purgable == VM_PURGABLE_EMPTY) {
	1059	assert(vm_page_purgeable_count > 0);
	1060	vm_page_purgeable_count--;
	1061	}
	1062
	1063	/*
	1064	* Return page to the free list.
	1065	* Note the page is not tabled now
	1066	*/
	1067	vm_page_free(found_m);
	1068	}
	1069	/*
	1070	* Now link into the object's list of backed pages.
	1071	*/
	1072
	1073	VM_PAGE_INSERT(mem, object);
	1074	mem->tabled = TRUE;
	1075
	1076	/*
	1077	* And show that the object has one more resident
	1078	* page.
	1079	*/
	1080
	1081	object->resident_page_count++;
	1082
	1083	if (object->purgable == VM_PURGABLE_VOLATILE \|\|
	1084	object->purgable == VM_PURGABLE_EMPTY) {
	1085	vm_page_purgeable_count++;
	1086	}
	1087	}
	1088
	1089	/*
	1090	* vm_page_remove: [ internal use only ]
	1091	*
	1092	* Removes the given mem entry from the object/offset-page
	1093	* table and the object page list.
	1094	*
	1095	* The object and page queues must be locked.
	1096	*/
	1097
	1098	void
	1099	vm_page_remove(
	1100	register vm_page_t mem)
	1101	{
	1102	register vm_page_bucket_t *bucket;
	1103	register vm_page_t this;
	1104
	1105	XPR(XPR_VM_PAGE,
	1106	"vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
	1107	(integer_t)mem->object, (integer_t)mem->offset,
	1108	(integer_t)mem, 0,0);
	1109	#if DEBUG
	1110	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	1111	#endif
	1112	vm_object_lock_assert_exclusive(mem->object);
	1113	assert(mem->tabled);
	1114	assert(!mem->cleaning);
	1115	VM_PAGE_CHECK(mem);
	1116
	1117
	1118	/*
	1119	* Remove from the object_object/offset hash table
	1120	*/
	1121
	1122	bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
	1123	simple_lock(&vm_page_bucket_lock);
	1124	if ((this = bucket->pages) == mem) {
	1125	/* optimize for common case */
	1126
	1127	bucket->pages = mem->next;
	1128	} else {
	1129	register vm_page_t *prev;
	1130
	1131	for (prev = &this->next;
	1132	(this = *prev) != mem;
	1133	prev = &this->next)
	1134	continue;
	1135	*prev = this->next;
	1136	}
	1137	#if MACH_PAGE_HASH_STATS
	1138	bucket->cur_count--;
	1139	#endif /* MACH_PAGE_HASH_STATS */
	1140	simple_unlock(&vm_page_bucket_lock);
	1141
	1142	/*
	1143	* Now remove from the object's list of backed pages.
	1144	*/
	1145
	1146	VM_PAGE_REMOVE(mem);
	1147
	1148	/*
	1149	* And show that the object has one fewer resident
	1150	* page.
	1151	*/
	1152
	1153	mem->object->resident_page_count--;
	1154
	1155	if (mem->object->purgable == VM_PURGABLE_VOLATILE \|\|
	1156	mem->object->purgable == VM_PURGABLE_EMPTY) {
	1157	assert(vm_page_purgeable_count > 0);
	1158	vm_page_purgeable_count--;
	1159	}
	1160	mem->tabled = FALSE;
	1161	mem->object = VM_OBJECT_NULL;
	1162	mem->offset = (vm_object_offset_t) -1;
	1163	}
	1164
	1165	/*
	1166	* vm_page_lookup:
	1167	*
	1168	* Returns the page associated with the object/offset
	1169	* pair specified; if none is found, VM_PAGE_NULL is returned.
	1170	*
	1171	* The object must be locked. No side effects.
	1172	*/
	1173
	1174	unsigned long vm_page_lookup_hint = 0;
	1175	unsigned long vm_page_lookup_hint_next = 0;
	1176	unsigned long vm_page_lookup_hint_prev = 0;
	1177	unsigned long vm_page_lookup_hint_miss = 0;
	1178	unsigned long vm_page_lookup_bucket_NULL = 0;
	1179	unsigned long vm_page_lookup_miss = 0;
	1180
	1181
	1182	vm_page_t
	1183	vm_page_lookup(
	1184	register vm_object_t object,
	1185	register vm_object_offset_t offset)
	1186	{
	1187	register vm_page_t mem;
	1188	register vm_page_bucket_t *bucket;
	1189	queue_entry_t qe;
	1190
	1191	vm_object_lock_assert_held(object);
	1192	mem = object->memq_hint;
	1193
	1194	if (mem != VM_PAGE_NULL) {
	1195	assert(mem->object == object);
	1196
	1197	if (mem->offset == offset) {
	1198	vm_page_lookup_hint++;
	1199	return mem;
	1200	}
	1201	qe = queue_next(&mem->listq);
	1202
	1203	if (! queue_end(&object->memq, qe)) {
	1204	vm_page_t next_page;
	1205
	1206	next_page = (vm_page_t) qe;
	1207	assert(next_page->object == object);
	1208
	1209	if (next_page->offset == offset) {
	1210	vm_page_lookup_hint_next++;
	1211	object->memq_hint = next_page; /* new hint */
	1212	return next_page;
	1213	}
	1214	}
	1215	qe = queue_prev(&mem->listq);
	1216
	1217	if (! queue_end(&object->memq, qe)) {
	1218	vm_page_t prev_page;
	1219
	1220	prev_page = (vm_page_t) qe;
	1221	assert(prev_page->object == object);
	1222
	1223	if (prev_page->offset == offset) {
	1224	vm_page_lookup_hint_prev++;
	1225	object->memq_hint = prev_page; /* new hint */
	1226	return prev_page;
	1227	}
	1228	}
	1229	}
	1230	/*
	1231	* Search the hash table for this object/offset pair
	1232	*/
	1233	bucket = &vm_page_buckets[vm_page_hash(object, offset)];
	1234
	1235	/*
	1236	* since we hold the object lock, we are guaranteed that no
	1237	* new pages can be inserted into this object... this in turn
	1238	* guarantess that the page we're looking for can't exist
	1239	* if the bucket it hashes to is currently NULL even when looked
	1240	* at outside the scope of the hash bucket lock... this is a
	1241	* really cheap optimiztion to avoid taking the lock
	1242	*/
	1243	if (bucket->pages == VM_PAGE_NULL) {
	1244	vm_page_lookup_bucket_NULL++;
	1245
	1246	return (VM_PAGE_NULL);
	1247	}
	1248	simple_lock(&vm_page_bucket_lock);
	1249
	1250	for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
	1251	VM_PAGE_CHECK(mem);
	1252	if ((mem->object == object) && (mem->offset == offset))
	1253	break;
	1254	}
	1255	simple_unlock(&vm_page_bucket_lock);
	1256
	1257	if (mem != VM_PAGE_NULL) {
	1258	if (object->memq_hint != VM_PAGE_NULL) {
	1259	vm_page_lookup_hint_miss++;
	1260	}
	1261	assert(mem->object == object);
	1262	object->memq_hint = mem;
	1263	} else
	1264	vm_page_lookup_miss++;
	1265
	1266	return(mem);
	1267	}
	1268
	1269
	1270	/*
	1271	* vm_page_rename:
	1272	*
	1273	* Move the given memory entry from its
	1274	* current object to the specified target object/offset.
	1275	*
	1276	* The object must be locked.
	1277	*/
	1278	void
	1279	vm_page_rename(
	1280	register vm_page_t mem,
	1281	register vm_object_t new_object,
	1282	vm_object_offset_t new_offset,
	1283	boolean_t encrypted_ok)
	1284	{
	1285	assert(mem->object != new_object);
	1286
	1287	/*
	1288	* ENCRYPTED SWAP:
	1289	* The encryption key is based on the page's memory object
	1290	* (aka "pager") and paging offset. Moving the page to
	1291	* another VM object changes its "pager" and "paging_offset"
	1292	* so it has to be decrypted first, or we would lose the key.
	1293	*
	1294	* One exception is VM object collapsing, where we transfer pages
	1295	* from one backing object to its parent object. This operation also
	1296	* transfers the paging information, so the <pager,paging_offset> info
	1297	* should remain consistent. The caller (vm_object_do_collapse())
	1298	* sets "encrypted_ok" in this case.
	1299	*/
	1300	if (!encrypted_ok && mem->encrypted) {
	1301	panic("vm_page_rename: page %p is encrypted\n", mem);
	1302	}
	1303
	1304	/*
	1305	* Changes to mem->object require the page lock because
	1306	* the pageout daemon uses that lock to get the object.
	1307	*/
	1308
	1309	XPR(XPR_VM_PAGE,
	1310	"vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
	1311	(integer_t)new_object, (integer_t)new_offset,
	1312	(integer_t)mem, 0,0);
	1313
	1314	vm_page_lockspin_queues();
	1315	vm_page_remove(mem);
	1316	vm_page_insert(mem, new_object, new_offset);
	1317	vm_page_unlock_queues();
	1318	}
	1319
	1320	/*
	1321	* vm_page_init:
	1322	*
	1323	* Initialize the fields in a new page.
	1324	* This takes a structure with random values and initializes it
	1325	* so that it can be given to vm_page_release or vm_page_insert.
	1326	*/
	1327	void
	1328	vm_page_init(
	1329	vm_page_t mem,
	1330	ppnum_t phys_page)
	1331	{
	1332	assert(phys_page);
	1333	*mem = vm_page_template;
	1334	mem->phys_page = phys_page;
	1335	}
	1336
	1337	/*
	1338	* vm_page_grab_fictitious:
	1339	*
	1340	* Remove a fictitious page from the free list.
	1341	* Returns VM_PAGE_NULL if there are no free pages.
	1342	*/
	1343	int c_vm_page_grab_fictitious = 0;
	1344	int c_vm_page_release_fictitious = 0;
	1345	int c_vm_page_more_fictitious = 0;
	1346
	1347	extern vm_page_t vm_page_grab_fictitious_common(vm_offset_t phys_addr);
	1348
	1349	vm_page_t
	1350	vm_page_grab_fictitious_common(
	1351	vm_offset_t phys_addr)
	1352	{
	1353	register vm_page_t m;
	1354
	1355	m = (vm_page_t)zget(vm_page_zone);
	1356	if (m) {
	1357	vm_page_init(m, phys_addr);
	1358	m->fictitious = TRUE;
	1359	}
	1360
	1361	c_vm_page_grab_fictitious++;
	1362	return m;
	1363	}
	1364
	1365	vm_page_t
	1366	vm_page_grab_fictitious(void)
	1367	{
	1368	return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
	1369	}
	1370
	1371	vm_page_t
	1372	vm_page_grab_guard(void)
	1373	{
	1374	return vm_page_grab_fictitious_common(vm_page_guard_addr);
	1375	}
	1376
	1377	/*
	1378	* vm_page_release_fictitious:
	1379	*
	1380	* Release a fictitious page to the free list.
	1381	*/
	1382
	1383	void
	1384	vm_page_release_fictitious(
	1385	register vm_page_t m)
	1386	{
	1387	assert(!m->free);
	1388	assert(m->busy);
	1389	assert(m->fictitious);
	1390	assert(m->phys_page == vm_page_fictitious_addr \|\|
	1391	m->phys_page == vm_page_guard_addr);
	1392
	1393	c_vm_page_release_fictitious++;
	1394	#if DEBUG
	1395	if (m->free)
	1396	panic("vm_page_release_fictitious");
	1397	#endif
	1398	m->free = TRUE;
	1399	zfree(vm_page_zone, m);
	1400	}
	1401
	1402	/*
	1403	* vm_page_more_fictitious:
	1404	*
	1405	* Add more fictitious pages to the free list.
	1406	* Allowed to block. This routine is way intimate
	1407	* with the zones code, for several reasons:
	1408	* 1. we need to carve some page structures out of physical
	1409	* memory before zones work, so they _cannot_ come from
	1410	* the zone_map.
	1411	* 2. the zone needs to be collectable in order to prevent
	1412	* growth without bound. These structures are used by
	1413	* the device pager (by the hundreds and thousands), as
	1414	* private pages for pageout, and as blocking pages for
	1415	* pagein. Temporary bursts in demand should not result in
	1416	* permanent allocation of a resource.
	1417	* 3. To smooth allocation humps, we allocate single pages
	1418	* with kernel_memory_allocate(), and cram them into the
	1419	* zone. This also allows us to initialize the vm_page_t's
	1420	* on the way into the zone, so that zget() always returns
	1421	* an initialized structure. The zone free element pointer
	1422	* and the free page pointer are both the first item in the
	1423	* vm_page_t.
	1424	* 4. By having the pages in the zone pre-initialized, we need
	1425	* not keep 2 levels of lists. The garbage collector simply
	1426	* scans our list, and reduces physical memory usage as it
	1427	* sees fit.
	1428	*/
	1429
	1430	void vm_page_more_fictitious(void)
	1431	{
	1432	register vm_page_t m;
	1433	vm_offset_t addr;
	1434	kern_return_t retval;
	1435	int i;
	1436
	1437	c_vm_page_more_fictitious++;
	1438
	1439	/*
	1440	* Allocate a single page from the zone_map. Do not wait if no physical
	1441	* pages are immediately available, and do not zero the space. We need
	1442	* our own blocking lock here to prevent having multiple,
	1443	* simultaneous requests from piling up on the zone_map lock. Exactly
	1444	* one (of our) threads should be potentially waiting on the map lock.
	1445	* If winner is not vm-privileged, then the page allocation will fail,
	1446	* and it will temporarily block here in the vm_page_wait().
	1447	*/
	1448	mutex_lock(&vm_page_alloc_lock);
	1449	/*
	1450	* If another thread allocated space, just bail out now.
	1451	*/
	1452	if (zone_free_count(vm_page_zone) > 5) {
	1453	/*
	1454	* The number "5" is a small number that is larger than the
	1455	* number of fictitious pages that any single caller will
	1456	* attempt to allocate. Otherwise, a thread will attempt to
	1457	* acquire a fictitious page (vm_page_grab_fictitious), fail,
	1458	* release all of the resources and locks already acquired,
	1459	* and then call this routine. This routine finds the pages
	1460	* that the caller released, so fails to allocate new space.
	1461	* The process repeats infinitely. The largest known number
	1462	* of fictitious pages required in this manner is 2. 5 is
	1463	* simply a somewhat larger number.
	1464	*/
	1465	mutex_unlock(&vm_page_alloc_lock);
	1466	return;
	1467	}
	1468
	1469	retval = kernel_memory_allocate(zone_map,
	1470	&addr, PAGE_SIZE, VM_PROT_ALL,
	1471	KMA_KOBJECT\|KMA_NOPAGEWAIT);
	1472	if (retval != KERN_SUCCESS) {
	1473	/*
	1474	* No page was available. Tell the pageout daemon, drop the
	1475	* lock to give another thread a chance at it, and
	1476	* wait for the pageout daemon to make progress.
	1477	*/
	1478	mutex_unlock(&vm_page_alloc_lock);
	1479	vm_page_wait(THREAD_UNINT);
	1480	return;
	1481	}
	1482	/*
	1483	* Initialize as many vm_page_t's as will fit on this page. This
	1484	* depends on the zone code disturbing ONLY the first item of
	1485	* each zone element.
	1486	*/
	1487	m = (vm_page_t)addr;
	1488	for (i = PAGE_SIZE/sizeof(struct vm_page); i > 0; i--) {
	1489	vm_page_init(m, vm_page_fictitious_addr);
	1490	m->fictitious = TRUE;
	1491	m++;
	1492	}
	1493	zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
	1494	mutex_unlock(&vm_page_alloc_lock);
	1495	}
	1496
	1497
	1498	/*
	1499	* vm_pool_low():
	1500	*
	1501	* Return true if it is not likely that a non-vm_privileged thread
	1502	* can get memory without blocking. Advisory only, since the
	1503	* situation may change under us.
	1504	*/
	1505	int
	1506	vm_pool_low(void)
	1507	{
	1508	/* No locking, at worst we will fib. */
	1509	return( vm_page_free_count < vm_page_free_reserved );
	1510	}
	1511
	1512
	1513
	1514	/*
	1515	* this is an interface to support bring-up of drivers
	1516	* on platforms with physical memory > 4G...
	1517	*/
	1518	int vm_himemory_mode = 0;
	1519
	1520
	1521	/*
	1522	* this interface exists to support hardware controllers
	1523	* incapable of generating DMAs with more than 32 bits
	1524	* of address on platforms with physical memory > 4G...
	1525	*/
	1526	unsigned int vm_lopage_free_count = 0;
	1527	unsigned int vm_lopage_max_count = 0;
	1528	queue_head_t vm_lopage_queue_free;
	1529
	1530	vm_page_t
	1531	vm_page_grablo(void)
	1532	{
	1533	register vm_page_t mem;
	1534	unsigned int vm_lopage_alloc_count;
	1535
	1536	if (vm_lopage_poolsize == 0)
	1537	return (vm_page_grab());
	1538
	1539	mutex_lock(&vm_page_queue_free_lock);
	1540
	1541	if (! queue_empty(&vm_lopage_queue_free)) {
	1542	queue_remove_first(&vm_lopage_queue_free,
	1543	mem,
	1544	vm_page_t,
	1545	pageq);
	1546	assert(mem->free);
	1547	assert(mem->busy);
	1548	assert(!mem->pmapped);
	1549
	1550	mem->pageq.next = NULL;
	1551	mem->pageq.prev = NULL;
	1552	mem->free = FALSE;
	1553
	1554	vm_lopage_free_count--;
	1555	vm_lopage_alloc_count = (vm_lopage_poolend - vm_lopage_poolstart) - vm_lopage_free_count;
	1556	if (vm_lopage_alloc_count > vm_lopage_max_count)
	1557	vm_lopage_max_count = vm_lopage_alloc_count;
	1558	} else {
	1559	mem = VM_PAGE_NULL;
	1560	}
	1561	mutex_unlock(&vm_page_queue_free_lock);
	1562
	1563	return (mem);
	1564	}
	1565
	1566
	1567	/*
	1568	* vm_page_grab:
	1569	*
	1570	* first try to grab a page from the per-cpu free list...
	1571	* this must be done while pre-emption is disabled... if
	1572	* a page is available, we're done...
	1573	* if no page is available, grab the vm_page_queue_free_lock
	1574	* and see if current number of free pages would allow us
	1575	* to grab at least 1... if not, return VM_PAGE_NULL as before...
	1576	* if there are pages available, disable preemption and
	1577	* recheck the state of the per-cpu free list... we could
	1578	* have been preempted and moved to a different cpu, or
	1579	* some other thread could have re-filled it... if still
	1580	* empty, figure out how many pages we can steal from the
	1581	* global free queue and move to the per-cpu queue...
	1582	* return 1 of these pages when done... only wakeup the
	1583	* pageout_scan thread if we moved pages from the global
	1584	* list... no need for the wakeup if we've satisfied the
	1585	* request from the per-cpu queue.
	1586	*/
	1587
	1588	#define COLOR_GROUPS_TO_STEAL 4
	1589
	1590
	1591	vm_page_t
	1592	vm_page_grab( void )
	1593	{
	1594	vm_page_t mem;
	1595
	1596
	1597	disable_preemption();
	1598
	1599	if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
	1600	return_page_from_cpu_list:
	1601	PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
	1602	PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
	1603	mem->pageq.next = NULL;
	1604
	1605	enable_preemption();
	1606
	1607	assert(mem->listq.next == NULL && mem->listq.prev == NULL);
	1608	assert(mem->tabled == FALSE);
	1609	assert(mem->object == VM_OBJECT_NULL);
	1610	assert(!mem->laundry);
	1611	assert(!mem->free);
	1612	assert(pmap_verify_free(mem->phys_page));
	1613	assert(mem->busy);
	1614	assert(!mem->encrypted);
	1615	assert(!mem->pmapped);
	1616
	1617	return mem;
	1618	}
	1619	enable_preemption();
	1620
	1621
	1622	mutex_lock(&vm_page_queue_free_lock);
	1623
	1624	/*
	1625	* Optionally produce warnings if the wire or gobble
	1626	* counts exceed some threshold.
	1627	*/
	1628	if (vm_page_wire_count_warning > 0
	1629	&& vm_page_wire_count >= vm_page_wire_count_warning) {
	1630	printf("mk: vm_page_grab(): high wired page count of %d\n",
	1631	vm_page_wire_count);
	1632	assert(vm_page_wire_count < vm_page_wire_count_warning);
	1633	}
	1634	if (vm_page_gobble_count_warning > 0
	1635	&& vm_page_gobble_count >= vm_page_gobble_count_warning) {
	1636	printf("mk: vm_page_grab(): high gobbled page count of %d\n",
	1637	vm_page_gobble_count);
	1638	assert(vm_page_gobble_count < vm_page_gobble_count_warning);
	1639	}
	1640
	1641	/*
	1642	* Only let privileged threads (involved in pageout)
	1643	* dip into the reserved pool.
	1644	*/
	1645	if ((vm_page_free_count < vm_page_free_reserved) &&
	1646	!(current_thread()->options & TH_OPT_VMPRIV)) {
	1647	mutex_unlock(&vm_page_queue_free_lock);
	1648	mem = VM_PAGE_NULL;
	1649	}
	1650	else {
	1651	vm_page_t head;
	1652	vm_page_t tail;
	1653	unsigned int pages_to_steal;
	1654	unsigned int color;
	1655
	1656	while ( vm_page_free_count == 0 ) {
	1657
	1658	mutex_unlock(&vm_page_queue_free_lock);
	1659	/*
	1660	* must be a privileged thread to be
	1661	* in this state since a non-privileged
	1662	* thread would have bailed if we were
	1663	* under the vm_page_free_reserved mark
	1664	*/
	1665	VM_PAGE_WAIT();
	1666	mutex_lock(&vm_page_queue_free_lock);
	1667	}
	1668
	1669	disable_preemption();
	1670
	1671	if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
	1672	mutex_unlock(&vm_page_queue_free_lock);
	1673
	1674	/*
	1675	* we got preempted and moved to another processor
	1676	* or we got preempted and someone else ran and filled the cache
	1677	*/
	1678	goto return_page_from_cpu_list;
	1679	}
	1680	if (vm_page_free_count <= vm_page_free_reserved)
	1681	pages_to_steal = 1;
	1682	else {
	1683	pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
	1684
	1685	if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
	1686	pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
	1687	}
	1688	color = PROCESSOR_DATA(current_processor(), start_color);
	1689	head = tail = NULL;
	1690
	1691	while (pages_to_steal--) {
	1692	if (--vm_page_free_count < vm_page_free_count_minimum)
	1693	vm_page_free_count_minimum = vm_page_free_count;
	1694
	1695	while (queue_empty(&vm_page_queue_free[color]))
	1696	color = (color + 1) & vm_color_mask;
	1697
	1698	queue_remove_first(&vm_page_queue_free[color],
	1699	mem,
	1700	vm_page_t,
	1701	pageq);
	1702	mem->pageq.next = NULL;
	1703	mem->pageq.prev = NULL;
	1704
	1705	color = (color + 1) & vm_color_mask;
	1706
	1707	if (head == NULL)
	1708	head = mem;
	1709	else
	1710	tail->pageq.next = (queue_t)mem;
	1711	tail = mem;
	1712
	1713	mem->pageq.prev = NULL;
	1714	assert(mem->listq.next == NULL && mem->listq.prev == NULL);
	1715	assert(mem->tabled == FALSE);
	1716	assert(mem->object == VM_OBJECT_NULL);
	1717	assert(!mem->laundry);
	1718	assert(mem->free);
	1719	mem->free = FALSE;
	1720
	1721	assert(pmap_verify_free(mem->phys_page));
	1722	assert(mem->busy);
	1723	assert(!mem->free);
	1724	assert(!mem->encrypted);
	1725	assert(!mem->pmapped);
	1726	}
	1727	PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
	1728	PROCESSOR_DATA(current_processor(), start_color) = color;
	1729
	1730	/*
	1731	* satisfy this request
	1732	*/
	1733	PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
	1734	mem = head;
	1735	mem->pageq.next = NULL;
	1736
	1737	mutex_unlock(&vm_page_queue_free_lock);
	1738
	1739	enable_preemption();
	1740	}
	1741	/*
	1742	* Decide if we should poke the pageout daemon.
	1743	* We do this if the free count is less than the low
	1744	* water mark, or if the free count is less than the high
	1745	* water mark (but above the low water mark) and the inactive
	1746	* count is less than its target.
	1747	*
	1748	* We don't have the counts locked ... if they change a little,
	1749	* it doesn't really matter.
	1750	*/
	1751	if ((vm_page_free_count < vm_page_free_min) \|\|
	1752	((vm_page_free_count < vm_page_free_target) &&
	1753	((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
	1754	thread_wakeup((event_t) &vm_page_free_wanted);
	1755
	1756	#if CONFIG_EMBEDDED
	1757	{
	1758	int percent_avail;
	1759
	1760	/*
	1761	* Decide if we need to poke the memorystatus notification thread.
	1762	*/
	1763	percent_avail =
	1764	(vm_page_active_count + vm_page_inactive_count +
	1765	vm_page_speculative_count + vm_page_free_count +
	1766	vm_page_purgeable_count ) * 100 /
	1767	atop_64(max_mem);
	1768	if (percent_avail <= (kern_memorystatus_level - 5)) {
	1769	kern_memorystatus_level = percent_avail;
	1770	thread_wakeup((event_t)&kern_memorystatus_wakeup);
	1771	}
	1772	}
	1773	#endif
	1774
	1775	// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
	1776
	1777	return mem;
	1778	}
	1779
	1780	/*
	1781	* vm_page_release:
	1782	*
	1783	* Return a page to the free list.
	1784	*/
	1785
	1786	void
	1787	vm_page_release(
	1788	register vm_page_t mem)
	1789	{
	1790	unsigned int color;
	1791	#if 0
	1792	unsigned int pindex;
	1793	phys_entry *physent;
	1794
	1795	physent = mapping_phys_lookup(mem->phys_page, &pindex); /* (BRINGUP) */
	1796	if(physent->ppLink & ppN) { /* (BRINGUP) */
	1797	panic("vm_page_release: already released - %08X %08X\n", mem, mem->phys_page);
	1798	}
	1799	physent->ppLink = physent->ppLink \| ppN; /* (BRINGUP) */
	1800	#endif
	1801	assert(!mem->private && !mem->fictitious);
	1802
	1803	// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
	1804
	1805	mutex_lock(&vm_page_queue_free_lock);
	1806	#if DEBUG
	1807	if (mem->free)
	1808	panic("vm_page_release");
	1809	#endif
	1810	mem->free = TRUE;
	1811
	1812	assert(mem->busy);
	1813	assert(!mem->laundry);
	1814	assert(mem->object == VM_OBJECT_NULL);
	1815	assert(mem->pageq.next == NULL &&
	1816	mem->pageq.prev == NULL);
	1817	assert(mem->listq.next == NULL &&
	1818	mem->listq.prev == NULL);
	1819
	1820	if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) {
	1821	/*
	1822	* this exists to support hardware controllers
	1823	* incapable of generating DMAs with more than 32 bits
	1824	* of address on platforms with physical memory > 4G...
	1825	*/
	1826	queue_enter_first(&vm_lopage_queue_free,
	1827	mem,
	1828	vm_page_t,
	1829	pageq);
	1830	vm_lopage_free_count++;
	1831	} else {
	1832	color = mem->phys_page & vm_color_mask;
	1833	queue_enter_first(&vm_page_queue_free[color],
	1834	mem,
	1835	vm_page_t,
	1836	pageq);
	1837	vm_page_free_count++;
	1838	/*
	1839	* Check if we should wake up someone waiting for page.
	1840	* But don't bother waking them unless they can allocate.
	1841	*
	1842	* We wakeup only one thread, to prevent starvation.
	1843	* Because the scheduling system handles wait queues FIFO,
	1844	* if we wakeup all waiting threads, one greedy thread
	1845	* can starve multiple niceguy threads. When the threads
	1846	* all wakeup, the greedy threads runs first, grabs the page,
	1847	* and waits for another page. It will be the first to run
	1848	* when the next page is freed.
	1849	*
	1850	* However, there is a slight danger here.
	1851	* The thread we wake might not use the free page.
	1852	* Then the other threads could wait indefinitely
	1853	* while the page goes unused. To forestall this,
	1854	* the pageout daemon will keep making free pages
	1855	* as long as vm_page_free_wanted is non-zero.
	1856	*/
	1857
	1858	if ((vm_page_free_wanted_privileged > 0) && vm_page_free_count) {
	1859	vm_page_free_wanted_privileged--;
	1860	thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
	1861	} else if ((vm_page_free_wanted > 0) &&
	1862	(vm_page_free_count >= vm_page_free_reserved)) {
	1863	vm_page_free_wanted--;
	1864	thread_wakeup_one((event_t) &vm_page_free_count);
	1865	}
	1866	}
	1867	mutex_unlock(&vm_page_queue_free_lock);
	1868
	1869	#if CONFIG_EMBEDDED
	1870	{
	1871	int percent_avail;
	1872
	1873	/*
	1874	* Decide if we need to poke the memorystatus notification thread.
	1875	* Locking is not a big issue, as only a single thread delivers these.
	1876	*/
	1877	percent_avail =
	1878	(vm_page_active_count + vm_page_inactive_count +
	1879	vm_page_speculative_count + vm_page_free_count +
	1880	vm_page_purgeable_count ) * 100 /
	1881	atop_64(max_mem);
	1882	if (percent_avail >= (kern_memorystatus_level + 5)) {
	1883	kern_memorystatus_level = percent_avail;
	1884	thread_wakeup((event_t)&kern_memorystatus_wakeup);
	1885	}
	1886	}
	1887	#endif
	1888	}
	1889
	1890	/*
	1891	* vm_page_wait:
	1892	*
	1893	* Wait for a page to become available.
	1894	* If there are plenty of free pages, then we don't sleep.
	1895	*
	1896	* Returns:
	1897	* TRUE: There may be another page, try again
	1898	* FALSE: We were interrupted out of our wait, don't try again
	1899	*/
	1900
	1901	boolean_t
	1902	vm_page_wait(
	1903	int interruptible )
	1904	{
	1905	/*
	1906	* We can't use vm_page_free_reserved to make this
	1907	* determination. Consider: some thread might
	1908	* need to allocate two pages. The first allocation
	1909	* succeeds, the second fails. After the first page is freed,
	1910	* a call to vm_page_wait must really block.
	1911	*/
	1912	kern_return_t wait_result;
	1913	int need_wakeup = 0;
	1914	int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
	1915
	1916	mutex_lock(&vm_page_queue_free_lock);
	1917
	1918	if (is_privileged && vm_page_free_count) {
	1919	mutex_unlock(&vm_page_queue_free_lock);
	1920	return TRUE;
	1921	}
	1922	if (vm_page_free_count < vm_page_free_target) {
	1923
	1924	if (is_privileged) {
	1925	if (vm_page_free_wanted_privileged++ == 0)
	1926	need_wakeup = 1;
	1927	wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
	1928	} else {
	1929	if (vm_page_free_wanted++ == 0)
	1930	need_wakeup = 1;
	1931	wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
	1932	}
	1933	mutex_unlock(&vm_page_queue_free_lock);
	1934	counter(c_vm_page_wait_block++);
	1935
	1936	if (need_wakeup)
	1937	thread_wakeup((event_t)&vm_page_free_wanted);
	1938
	1939	if (wait_result == THREAD_WAITING)
	1940	wait_result = thread_block(THREAD_CONTINUE_NULL);
	1941
	1942	return(wait_result == THREAD_AWAKENED);
	1943	} else {
	1944	mutex_unlock(&vm_page_queue_free_lock);
	1945	return TRUE;
	1946	}
	1947	}
	1948
	1949	/*
	1950	* vm_page_alloc:
	1951	*
	1952	* Allocate and return a memory cell associated
	1953	* with this VM object/offset pair.
	1954	*
	1955	* Object must be locked.
	1956	*/
	1957
	1958	vm_page_t
	1959	vm_page_alloc(
	1960	vm_object_t object,
	1961	vm_object_offset_t offset)
	1962	{
	1963	register vm_page_t mem;
	1964
	1965	vm_object_lock_assert_exclusive(object);
	1966	mem = vm_page_grab();
	1967	if (mem == VM_PAGE_NULL)
	1968	return VM_PAGE_NULL;
	1969
	1970	vm_page_insert(mem, object, offset);
	1971
	1972	return(mem);
	1973	}
	1974
	1975	vm_page_t
	1976	vm_page_alloclo(
	1977	vm_object_t object,
	1978	vm_object_offset_t offset)
	1979	{
	1980	register vm_page_t mem;
	1981
	1982	vm_object_lock_assert_exclusive(object);
	1983	mem = vm_page_grablo();
	1984	if (mem == VM_PAGE_NULL)
	1985	return VM_PAGE_NULL;
	1986
	1987	vm_page_insert(mem, object, offset);
	1988
	1989	return(mem);
	1990	}
	1991
	1992
	1993	/*
	1994	* vm_page_alloc_guard:
	1995	*
	1996	* Allocate a ficticious page which will be used
	1997	* as a guard page. The page will be inserted into
	1998	* the object and returned to the caller.
	1999	*/
	2000
	2001	vm_page_t
	2002	vm_page_alloc_guard(
	2003	vm_object_t object,
	2004	vm_object_offset_t offset)
	2005	{
	2006	register vm_page_t mem;
	2007
	2008	vm_object_lock_assert_exclusive(object);
	2009	mem = vm_page_grab_guard();
	2010	if (mem == VM_PAGE_NULL)
	2011	return VM_PAGE_NULL;
	2012
	2013	vm_page_insert(mem, object, offset);
	2014
	2015	return(mem);
	2016	}
	2017
	2018
	2019	counter(unsigned int c_laundry_pages_freed = 0;)
	2020
	2021	boolean_t vm_page_free_verify = TRUE;
	2022	/*
	2023	* vm_page_free:
	2024	*
	2025	* Returns the given page to the free list,
	2026	* disassociating it with any VM object.
	2027	*
	2028	* Object and page queues must be locked prior to entry.
	2029	*/
	2030	void
	2031	vm_page_free_prepare(
	2032	register vm_page_t mem)
	2033	{
	2034	VM_PAGE_CHECK(mem);
	2035	assert(!mem->free);
	2036	assert(!mem->cleaning);
	2037	assert(!mem->pageout);
	2038
	2039	#if DEBUG
	2040	if (vm_page_free_verify && !mem->fictitious && !mem->private) {
	2041	assert(pmap_verify_free(mem->phys_page));
	2042	}
	2043	if (mem->object)
	2044	vm_object_lock_assert_exclusive(mem->object);
	2045	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	2046
	2047	if (mem->free)
	2048	panic("vm_page_free: freeing page on free list\n");
	2049	#endif
	2050
	2051	if (mem->laundry) {
	2052	/*
	2053	* We may have to free a page while it's being laundered
	2054	* if we lost its pager (due to a forced unmount, for example).
	2055	* We need to call vm_pageout_throttle_up() before removing
	2056	* the page from its VM object, so that we can find out on
	2057	* which pageout queue the page is.
	2058	*/
	2059	vm_pageout_throttle_up(mem);
	2060	counter(++c_laundry_pages_freed);
	2061	}
	2062
	2063	if (mem->tabled)
	2064	vm_page_remove(mem); /* clears tabled, object, offset */
	2065
	2066	VM_PAGE_QUEUES_REMOVE(mem); /* clears active/inactive/throttled/speculative */
	2067
	2068	if (mem->wire_count) {
	2069	if (!mem->private && !mem->fictitious)
	2070	vm_page_wire_count--;
	2071	mem->wire_count = 0;
	2072	assert(!mem->gobbled);
	2073	} else if (mem->gobbled) {
	2074	if (!mem->private && !mem->fictitious)
	2075	vm_page_wire_count--;
	2076	vm_page_gobble_count--;
	2077	}
	2078	mem->gobbled = FALSE;
	2079
	2080	PAGE_WAKEUP(mem); /* clears wanted */
	2081
	2082	/* Some of these may be unnecessary */
	2083	mem->busy = TRUE;
	2084	mem->absent = FALSE;
	2085	mem->error = FALSE;
	2086	mem->dirty = FALSE;
	2087	mem->precious = FALSE;
	2088	mem->reference = FALSE;
	2089	mem->encrypted = FALSE;
	2090	mem->encrypted_cleaning = FALSE;
	2091	mem->deactivated = FALSE;
	2092	mem->pmapped = FALSE;
	2093
	2094	if (mem->private) {
	2095	mem->private = FALSE;
	2096	mem->fictitious = TRUE;
	2097	mem->phys_page = vm_page_fictitious_addr;
	2098	}
	2099	if (!mem->fictitious) {
	2100	if (mem->zero_fill == TRUE) {
	2101	mem->zero_fill = FALSE;
	2102	OSAddAtomic(-1, (SInt32 *)&vm_zf_count);
	2103	}
	2104	vm_page_init(mem, mem->phys_page);
	2105	}
	2106	}
	2107
	2108	void
	2109	vm_page_free(
	2110	vm_page_t mem)
	2111	{
	2112	vm_page_free_prepare(mem);
	2113	if (mem->fictitious) {
	2114	vm_page_release_fictitious(mem);
	2115	} else {
	2116	vm_page_release(mem);
	2117	}
	2118	}
	2119
	2120	/*
	2121	* Free a list of pages. The list can be up to several hundred pages,
	2122	* as blocked up by vm_pageout_scan().
	2123	* The big win is not having to take the page q and free list locks once
	2124	* per page. We sort the incoming pages into n lists, one for
	2125	* each color.
	2126	*
	2127	* The page queues must be locked, and are kept locked.
	2128	*/
	2129	void
	2130	vm_page_free_list(
	2131	vm_page_t mem)
	2132	{
	2133	vm_page_t nxt;
	2134	int pg_count = 0;
	2135	int color;
	2136	int inuse_list_head = -1;
	2137
	2138	queue_head_t free_list[MAX_COLORS];
	2139	int inuse[MAX_COLORS];
	2140
	2141	for (color = 0; color < (signed) vm_colors; color++) {
	2142	queue_init(&free_list[color]);
	2143	}
	2144
	2145	#if DEBUG
	2146	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	2147	#endif
	2148	while (mem) {
	2149	#if DEBUG
	2150	if (mem->tabled \|\| mem->object)
	2151	panic("vm_page_free_list: freeing tabled page\n");
	2152	if (mem->inactive \|\| mem->active \|\| mem->throttled \|\| mem->free)
	2153	panic("vm_page_free_list: freeing page on list\n");
	2154	if (vm_page_free_verify && !mem->fictitious && !mem->private) {
	2155	assert(pmap_verify_free(mem->phys_page));
	2156	}
	2157	#endif
	2158	assert(mem->pageq.prev == NULL);
	2159	assert(mem->busy);
	2160	assert(!mem->free);
	2161	nxt = (vm_page_t)(mem->pageq.next);
	2162
	2163	if (!mem->fictitious) {
	2164	mem->free = TRUE;
	2165
	2166	color = mem->phys_page & vm_color_mask;
	2167	if (queue_empty(&free_list[color])) {
	2168	inuse[color] = inuse_list_head;
	2169	inuse_list_head = color;
	2170	}
	2171	queue_enter_first(&free_list[color],
	2172	mem,
	2173	vm_page_t,
	2174	pageq);
	2175	pg_count++;
	2176	} else {
	2177	assert(mem->phys_page == vm_page_fictitious_addr \|\|
	2178	mem->phys_page == vm_page_guard_addr);
	2179	vm_page_release_fictitious(mem);
	2180	}
	2181	mem = nxt;
	2182	}
	2183	if (pg_count) {
	2184	unsigned int avail_free_count;
	2185
	2186	mutex_lock(&vm_page_queue_free_lock);
	2187
	2188	color = inuse_list_head;
	2189
	2190	while( color != -1 ) {
	2191	vm_page_t first, last;
	2192	vm_page_t first_free;
	2193
	2194	first = (vm_page_t) queue_first(&free_list[color]);
	2195	last = (vm_page_t) queue_last(&free_list[color]);
	2196	first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
	2197
	2198	if (queue_empty(&vm_page_queue_free[color])) {
	2199	queue_last(&vm_page_queue_free[color]) =
	2200	(queue_entry_t) last;
	2201	} else {
	2202	queue_prev(&first_free->pageq) =
	2203	(queue_entry_t) last;
	2204	}
	2205	queue_first(&vm_page_queue_free[color]) =
	2206	(queue_entry_t) first;
	2207	queue_prev(&first->pageq) =
	2208	(queue_entry_t) &vm_page_queue_free[color];
	2209	queue_next(&last->pageq) =
	2210	(queue_entry_t) first_free;
	2211	color = inuse[color];
	2212	}
	2213
	2214	vm_page_free_count += pg_count;
	2215	avail_free_count = vm_page_free_count;
	2216
	2217	while ((vm_page_free_wanted_privileged > 0) && avail_free_count) {
	2218	vm_page_free_wanted_privileged--;
	2219	avail_free_count--;
	2220
	2221	thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
	2222	}
	2223
	2224	if ((vm_page_free_wanted > 0) &&
	2225	(avail_free_count >= vm_page_free_reserved)) {
	2226	unsigned int available_pages;
	2227
	2228	if (avail_free_count >= vm_page_free_reserved) {
	2229	available_pages = (avail_free_count - vm_page_free_reserved);
	2230	} else {
	2231	available_pages = 0;
	2232	}
	2233
	2234	if (available_pages >= vm_page_free_wanted) {
	2235	vm_page_free_wanted = 0;
	2236	thread_wakeup((event_t) &vm_page_free_count);
	2237	} else {
	2238	while (available_pages--) {
	2239	vm_page_free_wanted--;
	2240	thread_wakeup_one((event_t) &vm_page_free_count);
	2241	}
	2242	}
	2243	}
	2244	mutex_unlock(&vm_page_queue_free_lock);
	2245
	2246	#if CONFIG_EMBEDDED
	2247	{
	2248	int percent_avail;
	2249
	2250	/*
	2251	* Decide if we need to poke the memorystatus notification thread.
	2252	*/
	2253	percent_avail =
	2254	(vm_page_active_count + vm_page_inactive_count +
	2255	vm_page_speculative_count + vm_page_free_count +
	2256	vm_page_purgeable_count ) * 100 /
	2257	atop_64(max_mem);
	2258	if (percent_avail >= (kern_memorystatus_level + 5)) {
	2259	kern_memorystatus_level = percent_avail;
	2260	thread_wakeup((event_t)&kern_memorystatus_wakeup);
	2261	}
	2262	}
	2263	#endif
	2264	}
	2265	}
	2266
	2267
	2268	/*
	2269	* vm_page_wire:
	2270	*
	2271	* Mark this page as wired down by yet
	2272	* another map, removing it from paging queues
	2273	* as necessary.
	2274	*
	2275	* The page's object and the page queues must be locked.
	2276	*/
	2277	void
	2278	vm_page_wire(
	2279	register vm_page_t mem)
	2280	{
	2281
	2282	// dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
	2283
	2284	VM_PAGE_CHECK(mem);
	2285	#if DEBUG
	2286	if (mem->object)
	2287	vm_object_lock_assert_exclusive(mem->object);
	2288	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	2289	#endif
	2290	if (mem->wire_count == 0) {
	2291	VM_PAGE_QUEUES_REMOVE(mem);
	2292	if (!mem->private && !mem->fictitious && !mem->gobbled)
	2293	vm_page_wire_count++;
	2294	if (mem->gobbled)
	2295	vm_page_gobble_count--;
	2296	mem->gobbled = FALSE;
	2297	if (mem->zero_fill == TRUE) {
	2298	mem->zero_fill = FALSE;
	2299	OSAddAtomic(-1, (SInt32 *)&vm_zf_count);
	2300	}
	2301	/*
	2302	* ENCRYPTED SWAP:
	2303	* The page could be encrypted, but
	2304	* We don't have to decrypt it here
	2305	* because we don't guarantee that the
	2306	* data is actually valid at this point.
	2307	* The page will get decrypted in
	2308	* vm_fault_wire() if needed.
	2309	*/
	2310	}
	2311	assert(!mem->gobbled);
	2312	mem->wire_count++;
	2313	}
	2314
	2315	/*
	2316	* vm_page_gobble:
	2317	*
	2318	* Mark this page as consumed by the vm/ipc/xmm subsystems.
	2319	*
	2320	* Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
	2321	*/
	2322	void
	2323	vm_page_gobble(
	2324	register vm_page_t mem)
	2325	{
	2326	vm_page_lockspin_queues();
	2327	VM_PAGE_CHECK(mem);
	2328
	2329	assert(!mem->gobbled);
	2330	assert(mem->wire_count == 0);
	2331
	2332	if (!mem->gobbled && mem->wire_count == 0) {
	2333	if (!mem->private && !mem->fictitious)
	2334	vm_page_wire_count++;
	2335	}
	2336	vm_page_gobble_count++;
	2337	mem->gobbled = TRUE;
	2338	vm_page_unlock_queues();
	2339	}
	2340
	2341	/*
	2342	* vm_page_unwire:
	2343	*
	2344	* Release one wiring of this page, potentially
	2345	* enabling it to be paged again.
	2346	*
	2347	* The page's object and the page queues must be locked.
	2348	*/
	2349	void
	2350	vm_page_unwire(
	2351	register vm_page_t mem)
	2352	{
	2353
	2354	// dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
	2355
	2356	VM_PAGE_CHECK(mem);
	2357	assert(mem->wire_count > 0);
	2358	#if DEBUG
	2359	if (mem->object)
	2360	vm_object_lock_assert_exclusive(mem->object);
	2361	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	2362	#endif
	2363	if (--mem->wire_count == 0) {
	2364	assert(!mem->private && !mem->fictitious);
	2365	vm_page_wire_count--;
	2366	assert(!mem->laundry);
	2367	assert(mem->object != kernel_object);
	2368	assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
	2369	if (!IP_VALID(memory_manager_default) &&
	2370	mem->dirty && mem->object->internal &&
	2371	(mem->object->purgable == VM_PURGABLE_DENY \|\|
	2372	mem->object->purgable == VM_PURGABLE_NONVOLATILE)) {
	2373	queue_enter(&vm_page_queue_throttled, mem, vm_page_t, pageq);
	2374	vm_page_throttled_count++;
	2375	mem->throttled = TRUE;
	2376	} else {
	2377	queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
	2378	vm_page_active_count++;
	2379	mem->active = TRUE;
	2380	}
	2381	mem->reference = TRUE;
	2382	}
	2383	}
	2384
	2385
	2386	/*
	2387	* vm_page_deactivate:
	2388	*
	2389	* Returns the given page to the inactive list,
	2390	* indicating that no physical maps have access
	2391	* to this page. [Used by the physical mapping system.]
	2392	*
	2393	* The page queues must be locked.
	2394	*/
	2395	void
	2396	vm_page_deactivate(
	2397	register vm_page_t m)
	2398	{
	2399	boolean_t rapid_age = FALSE;
	2400
	2401	VM_PAGE_CHECK(m);
	2402	assert(m->object != kernel_object);
	2403	assert(m->phys_page != vm_page_guard_addr);
	2404
	2405	// dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
	2406	#if DEBUG
	2407	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	2408	#endif
	2409	/*
	2410	* This page is no longer very interesting. If it was
	2411	* interesting (active or inactive/referenced), then we
	2412	* clear the reference bit and (re)enter it in the
	2413	* inactive queue. Note wired pages should not have
	2414	* their reference bit cleared.
	2415	*/
	2416	if (m->gobbled) { /* can this happen? */
	2417	assert(m->wire_count == 0);
	2418
	2419	if (!m->private && !m->fictitious)
	2420	vm_page_wire_count--;
	2421	vm_page_gobble_count--;
	2422	m->gobbled = FALSE;
	2423	}
	2424	if (m->private \|\| (m->wire_count != 0))
	2425	return;
	2426
	2427	if (m->active && m->deactivated == TRUE) {
	2428	if (!pmap_is_referenced(m->phys_page))
	2429	rapid_age = TRUE;
	2430	}
	2431	if (rapid_age == FALSE && !m->fictitious && !m->absent)
	2432	pmap_clear_reference(m->phys_page);
	2433
	2434	m->reference = FALSE;
	2435	m->deactivated = FALSE;
	2436	m->no_cache = FALSE;
	2437
	2438	if (!m->inactive) {
	2439	VM_PAGE_QUEUES_REMOVE(m);
	2440
	2441	assert(!m->laundry);
	2442	assert(m->pageq.next == NULL && m->pageq.prev == NULL);
	2443
	2444	if (!IP_VALID(memory_manager_default) &&
	2445	m->dirty && m->object->internal &&
	2446	(m->object->purgable == VM_PURGABLE_DENY \|\|
	2447	m->object->purgable == VM_PURGABLE_NONVOLATILE)) {
	2448	queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
	2449	m->throttled = TRUE;
	2450	vm_page_throttled_count++;
	2451	} else {
	2452	if (rapid_age == TRUE \|\|
	2453	(!m->fictitious && m->object->named && m->object->ref_count == 1)) {
	2454	vm_page_speculate(m, FALSE);
	2455	vm_page_speculative_recreated++;
	2456	return;
	2457	} else {
	2458	if (m->zero_fill) {
	2459	queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq);
	2460	vm_zf_queue_count++;
	2461	} else {
	2462	queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
	2463	}
	2464	}
	2465	m->inactive = TRUE;
	2466	if (!m->fictitious) {
	2467	vm_page_inactive_count++;
	2468	token_new_pagecount++;
	2469	}
	2470	}
	2471	}
	2472	}
	2473
	2474	/*
	2475	* vm_page_activate:
	2476	*
	2477	* Put the specified page on the active list (if appropriate).
	2478	*
	2479	* The page queues must be locked.
	2480	*/
	2481
	2482	void
	2483	vm_page_activate(
	2484	register vm_page_t m)
	2485	{
	2486	VM_PAGE_CHECK(m);
	2487	#ifdef FIXME_4778297
	2488	assert(m->object != kernel_object);
	2489	#endif
	2490	assert(m->phys_page != vm_page_guard_addr);
	2491	#if DEBUG
	2492	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	2493	#endif
	2494	if (m->gobbled) {
	2495	assert(m->wire_count == 0);
	2496	if (!m->private && !m->fictitious)
	2497	vm_page_wire_count--;
	2498	vm_page_gobble_count--;
	2499	m->gobbled = FALSE;
	2500	}
	2501	if (m->private)
	2502	return;
	2503
	2504	#if DEBUG
	2505	if (m->active)
	2506	panic("vm_page_activate: already active");
	2507	#endif
	2508
	2509	if (m->speculative) {
	2510	DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
	2511	DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
	2512	}
	2513
	2514	VM_PAGE_QUEUES_REMOVE(m);
	2515
	2516	if (m->wire_count == 0) {
	2517	assert(!m->laundry);
	2518	assert(m->pageq.next == NULL && m->pageq.prev == NULL);
	2519	if (!IP_VALID(memory_manager_default) &&
	2520	!m->fictitious && m->dirty && m->object->internal &&
	2521	(m->object->purgable == VM_PURGABLE_DENY \|\|
	2522	m->object->purgable == VM_PURGABLE_NONVOLATILE)) {
	2523	queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
	2524	m->throttled = TRUE;
	2525	vm_page_throttled_count++;
	2526	} else {
	2527	queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
	2528	m->active = TRUE;
	2529	if (!m->fictitious)
	2530	vm_page_active_count++;
	2531	}
	2532	m->reference = TRUE;
	2533	m->no_cache = FALSE;
	2534	}
	2535	}
	2536
	2537
	2538	/*
	2539	* vm_page_speculate:
	2540	*
	2541	* Put the specified page on the speculative list (if appropriate).
	2542	*
	2543	* The page queues must be locked.
	2544	*/
	2545	void
	2546	vm_page_speculate(
	2547	vm_page_t m,
	2548	boolean_t new)
	2549	{
	2550	struct vm_speculative_age_q *aq;
	2551
	2552	VM_PAGE_CHECK(m);
	2553	assert(m->object != kernel_object);
	2554	assert(!m->speculative && !m->active && !m->inactive && !m->throttled);
	2555	assert(m->phys_page != vm_page_guard_addr);
	2556	assert(m->pageq.next == NULL && m->pageq.prev == NULL);
	2557	#if DEBUG
	2558	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	2559	#endif
	2560	if (m->wire_count == 0) {
	2561	mach_timespec_t ts;
	2562
	2563	clock_get_system_nanotime(&ts.tv_sec, (unsigned *)&ts.tv_nsec);
	2564
	2565	if (vm_page_speculative_count == 0) {
	2566
	2567	speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
	2568	speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
	2569
	2570	aq = &vm_page_queue_speculative[speculative_age_index];
	2571
	2572	/*
	2573	* set the timer to begin a new group
	2574	*/
	2575	aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
	2576	aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
	2577
	2578	ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
	2579	} else {
	2580	aq = &vm_page_queue_speculative[speculative_age_index];
	2581
	2582	if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
	2583
	2584	speculative_age_index++;
	2585
	2586	if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
	2587	speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
	2588	if (speculative_age_index == speculative_steal_index) {
	2589	speculative_steal_index = speculative_age_index + 1;
	2590
	2591	if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
	2592	speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
	2593	}
	2594	aq = &vm_page_queue_speculative[speculative_age_index];
	2595
	2596	if (!queue_empty(&aq->age_q))
	2597	vm_page_speculate_ageit(aq);
	2598
	2599	aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
	2600	aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
	2601
	2602	ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
	2603	}
	2604	}
	2605	enqueue_tail(&aq->age_q, &m->pageq);
	2606	m->speculative = TRUE;
	2607	vm_page_speculative_count++;
	2608
	2609	if (new == TRUE) {
	2610	m->object->pages_created++;
	2611	vm_page_speculative_created++;
	2612	}
	2613	}
	2614	}
	2615
	2616
	2617	/*
	2618	* move pages from the specified aging bin to
	2619	* the speculative bin that pageout_scan claims from
	2620	*
	2621	* The page queues must be locked.
	2622	*/
	2623	void
	2624	vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
	2625	{
	2626	struct vm_speculative_age_q *sq;
	2627	vm_page_t t;
	2628
	2629	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
	2630
	2631	if (queue_empty(&sq->age_q)) {
	2632	sq->age_q.next = aq->age_q.next;
	2633	sq->age_q.prev = aq->age_q.prev;
	2634
	2635	t = (vm_page_t)sq->age_q.next;
	2636	t->pageq.prev = &sq->age_q;
	2637
	2638	t = (vm_page_t)sq->age_q.prev;
	2639	t->pageq.next = &sq->age_q;
	2640	} else {
	2641	t = (vm_page_t)sq->age_q.prev;
	2642	t->pageq.next = aq->age_q.next;
	2643
	2644	t = (vm_page_t)aq->age_q.next;
	2645	t->pageq.prev = sq->age_q.prev;
	2646
	2647	t = (vm_page_t)aq->age_q.prev;
	2648	t->pageq.next = &sq->age_q;
	2649
	2650	sq->age_q.prev = aq->age_q.prev;
	2651	}
	2652	queue_init(&aq->age_q);
	2653	}
	2654
	2655
	2656	void
	2657	vm_page_lru(
	2658	vm_page_t m)
	2659	{
	2660	VM_PAGE_CHECK(m);
	2661	assert(m->object != kernel_object);
	2662	assert(m->phys_page != vm_page_guard_addr);
	2663
	2664	#if DEBUG
	2665	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	2666	#endif
	2667	if (m->active \|\| m->reference)
	2668	return;
	2669
	2670	if (m->private \|\| (m->wire_count != 0))
	2671	return;
	2672
	2673	m->no_cache = FALSE;
	2674
	2675	VM_PAGE_QUEUES_REMOVE(m);
	2676
	2677	assert(!m->laundry);
	2678	assert(m->pageq.next == NULL && m->pageq.prev == NULL);
	2679
	2680	queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
	2681	m->inactive = TRUE;
	2682
	2683	vm_page_inactive_count++;
	2684	token_new_pagecount++;
	2685	}
	2686
	2687
	2688	/*
	2689	* vm_page_part_zero_fill:
	2690	*
	2691	* Zero-fill a part of the page.
	2692	*/
	2693	void
	2694	vm_page_part_zero_fill(
	2695	vm_page_t m,
	2696	vm_offset_t m_pa,
	2697	vm_size_t len)
	2698	{
	2699	vm_page_t tmp;
	2700
	2701	VM_PAGE_CHECK(m);
	2702	#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
	2703	pmap_zero_part_page(m->phys_page, m_pa, len);
	2704	#else
	2705	while (1) {
	2706	tmp = vm_page_grab();
	2707	if (tmp == VM_PAGE_NULL) {
	2708	vm_page_wait(THREAD_UNINT);
	2709	continue;
	2710	}
	2711	break;
	2712	}
	2713	vm_page_zero_fill(tmp);
	2714	if(m_pa != 0) {
	2715	vm_page_part_copy(m, 0, tmp, 0, m_pa);
	2716	}
	2717	if((m_pa + len) < PAGE_SIZE) {
	2718	vm_page_part_copy(m, m_pa + len, tmp,
	2719	m_pa + len, PAGE_SIZE - (m_pa + len));
	2720	}
	2721	vm_page_copy(tmp,m);
	2722	vm_page_lock_queues();
	2723	vm_page_free(tmp);
	2724	vm_page_unlock_queues();
	2725	#endif
	2726
	2727	}
	2728
	2729	/*
	2730	* vm_page_zero_fill:
	2731	*
	2732	* Zero-fill the specified page.
	2733	*/
	2734	void
	2735	vm_page_zero_fill(
	2736	vm_page_t m)
	2737	{
	2738	XPR(XPR_VM_PAGE,
	2739	"vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
	2740	(integer_t)m->object, (integer_t)m->offset, (integer_t)m, 0,0);
	2741
	2742	VM_PAGE_CHECK(m);
	2743
	2744	// dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
	2745	pmap_zero_page(m->phys_page);
	2746	}
	2747
	2748	/*
	2749	* vm_page_part_copy:
	2750	*
	2751	* copy part of one page to another
	2752	*/
	2753
	2754	void
	2755	vm_page_part_copy(
	2756	vm_page_t src_m,
	2757	vm_offset_t src_pa,
	2758	vm_page_t dst_m,
	2759	vm_offset_t dst_pa,
	2760	vm_size_t len)
	2761	{
	2762	VM_PAGE_CHECK(src_m);
	2763	VM_PAGE_CHECK(dst_m);
	2764
	2765	pmap_copy_part_page(src_m->phys_page, src_pa,
	2766	dst_m->phys_page, dst_pa, len);
	2767	}
	2768
	2769	/*
	2770	* vm_page_copy:
	2771	*
	2772	* Copy one page to another
	2773	*
	2774	* ENCRYPTED SWAP:
	2775	* The source page should not be encrypted. The caller should
	2776	* make sure the page is decrypted first, if necessary.
	2777	*/
	2778
	2779	int vm_page_copy_cs_validations = 0;
	2780	int vm_page_copy_cs_tainted = 0;
	2781
	2782	void
	2783	vm_page_copy(
	2784	vm_page_t src_m,
	2785	vm_page_t dest_m)
	2786	{
	2787	XPR(XPR_VM_PAGE,
	2788	"vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
	2789	(integer_t)src_m->object, src_m->offset,
	2790	(integer_t)dest_m->object, dest_m->offset,
	2791	0);
	2792
	2793	VM_PAGE_CHECK(src_m);
	2794	VM_PAGE_CHECK(dest_m);
	2795
	2796	/*
	2797	* ENCRYPTED SWAP:
	2798	* The source page should not be encrypted at this point.
	2799	* The destination page will therefore not contain encrypted
	2800	* data after the copy.
	2801	*/
	2802	if (src_m->encrypted) {
	2803	panic("vm_page_copy: source page %p is encrypted\n", src_m);
	2804	}
	2805	dest_m->encrypted = FALSE;
	2806
	2807	if (src_m->object != VM_OBJECT_NULL &&
	2808	src_m->object->code_signed &&
	2809	!src_m->cs_validated) {
	2810	/*
	2811	* We're copying a not-yet-validated page from a
	2812	* code-signed object.
	2813	* Whoever ends up mapping the copy page might care about
	2814	* the original page's integrity, so let's validate the
	2815	* source page now.
	2816	*/
	2817	vm_page_copy_cs_validations++;
	2818	vm_page_validate_cs(src_m);
	2819	}
	2820	/*
	2821	* Propagate the code-signing bits to the copy page.
	2822	*/
	2823	dest_m->cs_validated = src_m->cs_validated;
	2824	dest_m->cs_tainted = src_m->cs_tainted;
	2825	if (dest_m->cs_tainted) {
	2826	assert(dest_m->cs_validated);
	2827	vm_page_copy_cs_tainted++;
	2828	}
	2829
	2830	pmap_copy_page(src_m->phys_page, dest_m->phys_page);
	2831	}
	2832
	2833	#if MACH_ASSERT
	2834	/*
	2835	* Check that the list of pages is ordered by
	2836	* ascending physical address and has no holes.
	2837	*/
	2838	static int
	2839	vm_page_verify_contiguous(
	2840	vm_page_t pages,
	2841	unsigned int npages)
	2842	{
	2843	register vm_page_t m;
	2844	unsigned int page_count;
	2845	vm_offset_t prev_addr;
	2846
	2847	prev_addr = pages->phys_page;
	2848	page_count = 1;
	2849	for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
	2850	if (m->phys_page != prev_addr + 1) {
	2851	printf("m %p prev_addr 0x%x, current addr 0x%x\n",
	2852	m, prev_addr, m->phys_page);
	2853	printf("pages %p page_count %d\n", pages, page_count);
	2854	panic("vm_page_verify_contiguous: not contiguous!");
	2855	}
	2856	prev_addr = m->phys_page;
	2857	++page_count;
	2858	}
	2859	if (page_count != npages) {
	2860	printf("pages %p actual count 0x%x but requested 0x%x\n",
	2861	pages, page_count, npages);
	2862	panic("vm_page_verify_contiguous: count error");
	2863	}
	2864	return 1;
	2865	}
	2866	#endif /* MACH_ASSERT */
	2867
	2868
	2869	#if MACH_ASSERT
	2870	/*
	2871	* Check the free lists for proper length etc.
	2872	*/
	2873	static void
	2874	vm_page_verify_free_lists( void )
	2875	{
	2876	unsigned int color, npages;
	2877	vm_page_t m;
	2878	vm_page_t prev_m;
	2879
	2880	npages = 0;
	2881
	2882	mutex_lock(&vm_page_queue_free_lock);
	2883
	2884	for( color = 0; color < vm_colors; color++ ) {
	2885	prev_m = (vm_page_t) &vm_page_queue_free[color];
	2886	queue_iterate(&vm_page_queue_free[color],
	2887	m,
	2888	vm_page_t,
	2889	pageq) {
	2890	if ((vm_page_t) m->pageq.prev != prev_m)
	2891	panic("vm_page_verify_free_lists: corrupted prev ptr");
	2892	if ( ! m->free )
	2893	panic("vm_page_verify_free_lists: not free");
	2894	if ( ! m->busy )
	2895	panic("vm_page_verify_free_lists: not busy");
	2896	if ( (m->phys_page & vm_color_mask) != color)
	2897	panic("vm_page_verify_free_lists: wrong color");
	2898	++npages;
	2899	prev_m = m;
	2900	}
	2901	}
	2902	if (npages != vm_page_free_count)
	2903	panic("vm_page_verify_free_lists: npages %u free_count %d",
	2904	npages, vm_page_free_count);
	2905
	2906	mutex_unlock(&vm_page_queue_free_lock);
	2907	}
	2908	#endif /* MACH_ASSERT */
	2909
	2910
	2911
	2912	/*
	2913	* CONTIGUOUS PAGE ALLOCATION
	2914	* Additional levels of effort:
	2915	* + consider pages that are currently 'pmapped'
	2916	* this could be expensive since we'd have
	2917	* to ask the pmap layer about there state
	2918	* + consider dirty pages
	2919	* either clean them or
	2920	* copy them to other locations...
	2921	*
	2922	* Find a region large enough to contain at least n pages
	2923	* of contiguous physical memory.
	2924	*
	2925	* This is done by traversing the vm_page_t array in a linear fashion
	2926	* we assume that the vm_page_t array has the avaiable physical pages in an
	2927	* ordered, ascending list... this is currently true of all our implementations
	2928	* and must remain so... there can be 'holes' in the array... we also can
	2929	* no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
	2930	* which use to happen via 'vm_page_convert'... that function was no longer
	2931	* being called and was removed...
	2932	*
	2933	* The basic flow consists of stabilizing some of the interesting state of
	2934	* a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
	2935	* sweep at the beginning of the array looking for pages that meet our criterea
	2936	* for a 'stealable' page... currently we are pretty conservative... if the page
	2937	* meets this criterea and is physically contiguous to the previous page in the 'run'
	2938	* we keep developing it. If we hit a page that doesn't fit, we reset our state
	2939	* and start to develop a new run... if at this point we've already considered
	2940	* at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
	2941	* and mutex_pause (which will yield the processor), to keep the latency low w/r
	2942	* to other threads trying to acquire free pages (or move pages from q to q),
	2943	* and then continue from the spot we left off... we only make 1 pass through the
	2944	* array. Once we have a 'run' that is long enough, we'll go into the loop which
	2945	* which steals the pages from the queues they're currently on... pages on the free
	2946	* queue can be stolen directly... pages that are on any of the other queues
	2947	* must be removed from the object they are tabled on... this requires taking the
	2948	* object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
	2949	* or if the state of the page behind the vm_object lock is no longer viable, we'll
	2950	* dump the pages we've currently stolen back to the free list, and pick up our
	2951	* scan from the point where we aborted the 'current' run.
	2952	*
	2953	*
	2954	* Requirements:
	2955	* - neither vm_page_queue nor vm_free_list lock can be held on entry
	2956	*
	2957	* Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
	2958	*
	2959	* Algorithm:
	2960	*/
	2961
	2962	#define MAX_CONSIDERED_BEFORE_YIELD 1000
	2963
	2964
	2965	#define RESET_STATE_OF_RUN() \
	2966	MACRO_BEGIN \
	2967	prevcontaddr = -2; \
	2968	free_considered = 0; \
	2969	substitute_needed = 0; \
	2970	npages = 0; \
	2971	MACRO_END
	2972
	2973
	2974	static vm_page_t
	2975	vm_page_find_contiguous(
	2976	unsigned int contig_pages,
	2977	ppnum_t max_pnum,
	2978	boolean_t wire)
	2979	{
	2980	vm_page_t m = NULL;
	2981	ppnum_t prevcontaddr;
	2982	unsigned int npages, considered;
	2983	unsigned int page_idx, start_idx;
	2984	int free_considered, free_available;
	2985	int substitute_needed;
	2986	#if MACH_ASSERT
	2987	uint32_t tv_start_sec, tv_start_usec, tv_end_sec, tv_end_usec;
	2988	int yielded = 0;
	2989	int dumped_run = 0;
	2990	int stolen_pages = 0;
	2991	#endif
	2992
	2993	if (contig_pages == 0)
	2994	return VM_PAGE_NULL;
	2995
	2996	#if MACH_ASSERT
	2997	vm_page_verify_free_lists();
	2998
	2999	clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
	3000	#endif
	3001	vm_page_lock_queues();
	3002	mutex_lock(&vm_page_queue_free_lock);
	3003
	3004	RESET_STATE_OF_RUN();
	3005
	3006	considered = 0;
	3007	free_available = vm_page_free_count - vm_page_free_reserved;
	3008
	3009	for (page_idx = 0, start_idx = 0;
	3010	npages < contig_pages && page_idx < vm_pages_count;
	3011	page_idx++) {
	3012	retry:
	3013	m = &vm_pages[page_idx];
	3014
	3015	if (max_pnum && m->phys_page > max_pnum) {
	3016	/* no more low pages... */
	3017	break;
	3018	}
	3019	if (m->phys_page <= vm_lopage_poolend &&
	3020	m->phys_page >= vm_lopage_poolstart) {
	3021	/*
	3022	* don't want to take pages from our
	3023	* reserved pool of low memory
	3024	* so don't consider it which
	3025	* means starting a new run
	3026	*/
	3027	RESET_STATE_OF_RUN();
	3028
	3029	} else if (m->wire_count \|\| m->gobbled \|\|
	3030	m->encrypted \|\| m->encrypted_cleaning \|\| m->cs_validated \|\| m->cs_tainted \|\|
	3031	m->error \|\| m->absent \|\| m->pageout_queue \|\| m->laundry \|\| m->wanted \|\| m->precious \|\|
	3032	m->cleaning \|\| m->overwriting \|\| m->restart \|\| m->unusual \|\| m->list_req_pending) {
	3033	/*
	3034	* page is in a transient state
	3035	* or a state we don't want to deal
	3036	* with, so don't consider it which
	3037	* means starting a new run
	3038	*/
	3039	RESET_STATE_OF_RUN();
	3040
	3041	} else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
	3042	/*
	3043	* page needs to be on one of our queues
	3044	* in order for it to be stable behind the
	3045	* locks we hold at this point...
	3046	* if not, don't consider it which
	3047	* means starting a new run
	3048	*/
	3049	RESET_STATE_OF_RUN();
	3050
	3051	} else if (!m->free && (!m->tabled \|\| m->busy)) {
	3052	/*
	3053	* pages on the free list are always 'busy'
	3054	* so we couldn't test for 'busy' in the check
	3055	* for the transient states... pages that are
	3056	* 'free' are never 'tabled', so we also couldn't
	3057	* test for 'tabled'. So we check here to make
	3058	* sure that a non-free page is not busy and is
	3059	* tabled on an object...
	3060	* if not, don't consider it which
	3061	* means starting a new run
	3062	*/
	3063	RESET_STATE_OF_RUN();
	3064
	3065	} else {
	3066	if (m->phys_page != prevcontaddr + 1) {
	3067	npages = 1;
	3068	start_idx = page_idx;
	3069	} else {
	3070	npages++;
	3071	}
	3072	prevcontaddr = m->phys_page;
	3073
	3074	if (m->pmapped \|\| m->dirty)
	3075	substitute_needed++;
	3076
	3077	if (m->free) {
	3078	free_considered++;
	3079	}
	3080	if ((free_considered + substitute_needed) > free_available) {
	3081	/*
	3082	* if we let this run continue
	3083	* we will end up dropping the vm_page_free_count
	3084	* below the reserve limit... we need to abort
	3085	* this run, but we can at least re-consider this
	3086	* page... thus the jump back to 'retry'
	3087	*/
	3088	RESET_STATE_OF_RUN();
	3089
	3090	if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
	3091	considered++;
	3092	goto retry;
	3093	}
	3094	/*
	3095	* free_available == 0
	3096	* so can't consider any free pages... if
	3097	* we went to retry in this case, we'd
	3098	* get stuck looking at the same page
	3099	* w/o making any forward progress
	3100	* we also want to take this path if we've already
	3101	* reached our limit that controls the lock latency
	3102	*/
	3103	}
	3104	}
	3105	if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
	3106
	3107	mutex_unlock(&vm_page_queue_free_lock);
	3108	vm_page_unlock_queues();
	3109
	3110	mutex_pause(0);
	3111
	3112	vm_page_lock_queues();
	3113	mutex_lock(&vm_page_queue_free_lock);
	3114
	3115	RESET_STATE_OF_RUN();
	3116	/*
	3117	* reset our free page limit since we
	3118	* dropped the lock protecting the vm_page_free_queue
	3119	*/
	3120	free_available = vm_page_free_count - vm_page_free_reserved;
	3121	considered = 0;
	3122	#if MACH_ASSERT
	3123	yielded++;
	3124	#endif
	3125	goto retry;
	3126	}
	3127	considered++;
	3128	}
	3129	m = VM_PAGE_NULL;
	3130
	3131	if (npages != contig_pages)
	3132	mutex_unlock(&vm_page_queue_free_lock);
	3133	else {
	3134	vm_page_t m1;
	3135	vm_page_t m2;
	3136	unsigned int cur_idx;
	3137	unsigned int tmp_start_idx;
	3138	vm_object_t locked_object = VM_OBJECT_NULL;
	3139	boolean_t abort_run = FALSE;
	3140
	3141	tmp_start_idx = start_idx;
	3142
	3143	/*
	3144	* first pass through to pull the free pages
	3145	* off of the free queue so that in case we
	3146	* need substitute pages, we won't grab any
	3147	* of the free pages in the run... we'll clear
	3148	* the 'free' bit in the 2nd pass, and even in
	3149	* an abort_run case, we'll collect all of the
	3150	* free pages in this run and return them to the free list
	3151	*/
	3152	while (start_idx < page_idx) {
	3153
	3154	m1 = &vm_pages[start_idx++];
	3155
	3156	if (m1->free) {
	3157	unsigned int color;
	3158
	3159	color = m1->phys_page & vm_color_mask;
	3160	queue_remove(&vm_page_queue_free[color],
	3161	m1,
	3162	vm_page_t,
	3163	pageq);
	3164
	3165	vm_page_free_count--;
	3166	}
	3167	}
	3168	/*
	3169	* adjust global freelist counts
	3170	*/
	3171	if (vm_page_free_count < vm_page_free_count_minimum)
	3172	vm_page_free_count_minimum = vm_page_free_count;
	3173
	3174	/*
	3175	* we can drop the free queue lock at this point since
	3176	* we've pulled any 'free' candidates off of the list
	3177	* we need it dropped so that we can do a vm_page_grab
	3178	* when substituing for pmapped/dirty pages
	3179	*/
	3180	mutex_unlock(&vm_page_queue_free_lock);
	3181
	3182	start_idx = tmp_start_idx;
	3183	cur_idx = page_idx - 1;
	3184
	3185	while (start_idx++ < page_idx) {
	3186	/*
	3187	* must go through the list from back to front
	3188	* so that the page list is created in the
	3189	* correct order - low -> high phys addresses
	3190	*/
	3191	m1 = &vm_pages[cur_idx--];
	3192
	3193	if (m1->free) {
	3194	/*
	3195	* pages have already been removed from
	3196	* the free list in the 1st pass
	3197	*/
	3198	assert(m1->free);
	3199	assert(m1->busy);
	3200	assert(!m1->wanted);
	3201	assert(!m1->laundry);
	3202	m1->free = FALSE;
	3203
	3204	} else {
	3205	vm_object_t object;
	3206
	3207	if (abort_run == TRUE)
	3208	continue;
	3209
	3210	object = m1->object;
	3211
	3212	if (object != locked_object) {
	3213	if (locked_object) {
	3214	vm_object_unlock(locked_object);
	3215	locked_object = VM_OBJECT_NULL;
	3216	}
	3217	if (vm_object_lock_try(object))
	3218	locked_object = object;
	3219	}
	3220	if (locked_object == VM_OBJECT_NULL \|\|
	3221	(m1->wire_count \|\| m1->gobbled \|\|
	3222	m1->encrypted \|\| m1->encrypted_cleaning \|\| m1->cs_validated \|\| m1->cs_tainted \|\|
	3223	m1->error \|\| m1->absent \|\| m1->pageout_queue \|\| m1->laundry \|\| m1->wanted \|\| m1->precious \|\|
	3224	m1->cleaning \|\| m1->overwriting \|\| m1->restart \|\| m1->unusual \|\| m1->list_req_pending \|\| m1->busy)) {
	3225
	3226	if (locked_object) {
	3227	vm_object_unlock(locked_object);
	3228	locked_object = VM_OBJECT_NULL;
	3229	}
	3230	tmp_start_idx = cur_idx;
	3231	abort_run = TRUE;
	3232	continue;
	3233	}
	3234	if (m1->pmapped \|\| m1->dirty) {
	3235	int refmod;
	3236	vm_object_offset_t offset;
	3237
	3238	m2 = vm_page_grab();
	3239
	3240	if (m2 == VM_PAGE_NULL) {
	3241	if (locked_object) {
	3242	vm_object_unlock(locked_object);
	3243	locked_object = VM_OBJECT_NULL;
	3244	}
	3245	tmp_start_idx = cur_idx;
	3246	abort_run = TRUE;
	3247	continue;
	3248	}
	3249	if (m1->pmapped)
	3250	refmod = pmap_disconnect(m1->phys_page);
	3251	else
	3252	refmod = 0;
	3253	vm_page_copy(m1, m2);
	3254
	3255	m2->reference = m1->reference;
	3256	m2->dirty = m1->dirty;
	3257
	3258	if (refmod & VM_MEM_REFERENCED)
	3259	m2->reference = TRUE;
	3260	if (refmod & VM_MEM_MODIFIED)
	3261	m2->dirty = TRUE;
	3262	offset = m1->offset;
	3263
	3264	/*
	3265	* completely cleans up the state
	3266	* of the page so that it is ready
	3267	* to be put onto the free list, or
	3268	* for this purpose it looks like it
	3269	* just came off of the free list
	3270	*/
	3271	vm_page_free_prepare(m1);
	3272
	3273	/*
	3274	* make sure we clear the ref/mod state
	3275	* from the pmap layer... else we risk
	3276	* inheriting state from the last time
	3277	* this page was used...
	3278	*/
	3279	pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED \| VM_MEM_REFERENCED);
	3280	/*
	3281	* now put the substitute page on the object
	3282	*/
	3283	vm_page_insert_internal(m2, locked_object, offset, TRUE);
	3284
	3285	if (m2->reference)
	3286	vm_page_activate(m2);
	3287	else
	3288	vm_page_deactivate(m2);
	3289
	3290	PAGE_WAKEUP_DONE(m2);
	3291
	3292	} else {
	3293	/*
	3294	* completely cleans up the state
	3295	* of the page so that it is ready
	3296	* to be put onto the free list, or
	3297	* for this purpose it looks like it
	3298	* just came off of the free list
	3299	*/
	3300	vm_page_free_prepare(m1);
	3301	}
	3302	#if MACH_ASSERT
	3303	stolen_pages++;
	3304	#endif
	3305	}
	3306	m1->pageq.next = (queue_entry_t) m;
	3307	m1->pageq.prev = NULL;
	3308	m = m1;
	3309	}
	3310	if (locked_object) {
	3311	vm_object_unlock(locked_object);
	3312	locked_object = VM_OBJECT_NULL;
	3313	}
	3314
	3315	if (abort_run == TRUE) {
	3316	if (m != VM_PAGE_NULL) {
	3317	vm_page_free_list(m);
	3318	}
	3319	#if MACH_ASSERT
	3320	dumped_run++;
	3321	#endif
	3322	/*
	3323	* want the index of the last
	3324	* page in this run that was
	3325	* successfully 'stolen', so back
	3326	* it up 1 for the auto-decrement on use
	3327	* and 1 more to bump back over this page
	3328	*/
	3329	page_idx = tmp_start_idx + 2;
	3330
	3331	if (page_idx >= vm_pages_count)
	3332	goto done_scanning;
	3333
	3334	mutex_lock(&vm_page_queue_free_lock);
	3335
	3336	RESET_STATE_OF_RUN();
	3337
	3338	/*
	3339	* reset our free page limit since we
	3340	* dropped the lock protecting the vm_page_free_queue
	3341	*/
	3342	free_available = vm_page_free_count - vm_page_free_reserved;
	3343
	3344	goto retry;
	3345	}
	3346
	3347	for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
	3348
	3349	if (wire == TRUE)
	3350	m1->wire_count++;
	3351	else
	3352	m1->gobbled = TRUE;
	3353	}
	3354	if (wire == FALSE)
	3355	vm_page_gobble_count += npages;
	3356
	3357	/*
	3358	* gobbled pages are also counted as wired pages
	3359	*/
	3360	vm_page_wire_count += npages;
	3361
	3362	assert(vm_page_verify_contiguous(m, npages));
	3363	}
	3364	done_scanning:
	3365	vm_page_unlock_queues();
	3366
	3367	#if MACH_ASSERT
	3368	clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
	3369
	3370	tv_end_sec -= tv_start_sec;
	3371	if (tv_end_usec < tv_start_usec) {
	3372	tv_end_sec--;
	3373	tv_end_usec += 1000000;
	3374	}
	3375	tv_end_usec -= tv_start_usec;
	3376	if (tv_end_usec >= 1000000) {
	3377	tv_end_sec++;
	3378	tv_end_sec -= 1000000;
	3379	}
	3380	printf("vm_find_page_contiguous(num=%d,low=%d): found %d pages in %d.%06ds... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
	3381	contig_pages, max_pnum, npages, tv_end_sec, tv_end_usec, page_idx, yielded, dumped_run, stolen_pages);
	3382
	3383	vm_page_verify_free_lists();
	3384	#endif
	3385	return m;
	3386	}
	3387
	3388	/*
	3389	* Allocate a list of contiguous, wired pages.
	3390	*/
	3391	kern_return_t
	3392	cpm_allocate(
	3393	vm_size_t size,
	3394	vm_page_t *list,
	3395	ppnum_t max_pnum,
	3396	boolean_t wire)
	3397	{
	3398	vm_page_t pages;
	3399	unsigned int npages;
	3400
	3401	if (size % page_size != 0)
	3402	return KERN_INVALID_ARGUMENT;
	3403
	3404	npages = size / page_size;
	3405
	3406	/*
	3407	* Obtain a pointer to a subset of the free
	3408	* list large enough to satisfy the request;
	3409	* the region will be physically contiguous.
	3410	*/
	3411	pages = vm_page_find_contiguous(npages, max_pnum, wire);
	3412
	3413	if (pages == VM_PAGE_NULL)
	3414	return KERN_NO_SPACE;
	3415	/*
	3416	* determine need for wakeups
	3417	*/
	3418	if ((vm_page_free_count < vm_page_free_min) \|\|
	3419	((vm_page_free_count < vm_page_free_target) &&
	3420	((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
	3421	thread_wakeup((event_t) &vm_page_free_wanted);
	3422
	3423	#if CONFIG_EMBEDDED
	3424	{
	3425	int percent_avail;
	3426
	3427	/*
	3428	* Decide if we need to poke the memorystatus notification thread.
	3429	*/
	3430	percent_avail =
	3431	(vm_page_active_count + vm_page_inactive_count +
	3432	vm_page_speculative_count + vm_page_free_count +
	3433	vm_page_purgeable_count ) * 100 /
	3434	atop_64(max_mem);
	3435	if (percent_avail <= (kern_memorystatus_level - 5)) {
	3436	kern_memorystatus_level = percent_avail;
	3437	thread_wakeup((event_t)&kern_memorystatus_wakeup);
	3438	}
	3439	}
	3440	#endif
	3441	/*
	3442	* The CPM pages should now be available and
	3443	* ordered by ascending physical address.
	3444	*/
	3445	assert(vm_page_verify_contiguous(pages, npages));
	3446
	3447	*list = pages;
	3448	return KERN_SUCCESS;
	3449	}
	3450
	3451
	3452	#include <mach_vm_debug.h>
	3453	#if MACH_VM_DEBUG
	3454
	3455	#include <mach_debug/hash_info.h>
	3456	#include <vm/vm_debug.h>
	3457
	3458	/*
	3459	* Routine: vm_page_info
	3460	* Purpose:
	3461	* Return information about the global VP table.
	3462	* Fills the buffer with as much information as possible
	3463	* and returns the desired size of the buffer.
	3464	* Conditions:
	3465	* Nothing locked. The caller should provide
	3466	* possibly-pageable memory.
	3467	*/
	3468
	3469	unsigned int
	3470	vm_page_info(
	3471	hash_info_bucket_t *info,
	3472	unsigned int count)
	3473	{
	3474	unsigned int i;
	3475
	3476	if (vm_page_bucket_count < count)
	3477	count = vm_page_bucket_count;
	3478
	3479	for (i = 0; i < count; i++) {
	3480	vm_page_bucket_t *bucket = &vm_page_buckets[i];
	3481	unsigned int bucket_count = 0;
	3482	vm_page_t m;
	3483
	3484	simple_lock(&vm_page_bucket_lock);
	3485	for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
	3486	bucket_count++;
	3487	simple_unlock(&vm_page_bucket_lock);
	3488
	3489	/* don't touch pageable memory while holding locks */
	3490	info[i].hib_count = bucket_count;
	3491	}
	3492
	3493	return vm_page_bucket_count;
	3494	}
	3495	#endif /* MACH_VM_DEBUG */
	3496
	3497	#include <mach_kdb.h>
	3498	#if MACH_KDB
	3499
	3500	#include <ddb/db_output.h>
	3501	#include <vm/vm_print.h>
	3502	#define printf kdbprintf
	3503
	3504	/*
	3505	* Routine: vm_page_print [exported]
	3506	*/
	3507	void
	3508	vm_page_print(
	3509	db_addr_t db_addr)
	3510	{
	3511	vm_page_t p;
	3512
	3513	p = (vm_page_t) (long) db_addr;
	3514
	3515	iprintf("page 0x%x\n", p);
	3516
	3517	db_indent += 2;
	3518
	3519	iprintf("object=0x%x", p->object);
	3520	printf(", offset=0x%x", p->offset);
	3521	printf(", wire_count=%d", p->wire_count);
	3522
	3523	iprintf("%sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
	3524	(p->inactive ? "" : "!"),
	3525	(p->active ? "" : "!"),
	3526	(p->throttled ? "" : "!"),
	3527	(p->gobbled ? "" : "!"),
	3528	(p->laundry ? "" : "!"),
	3529	(p->free ? "" : "!"),
	3530	(p->reference ? "" : "!"),
	3531	(p->encrypted ? "" : "!"));
	3532	iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
	3533	(p->busy ? "" : "!"),
	3534	(p->wanted ? "" : "!"),
	3535	(p->tabled ? "" : "!"),
	3536	(p->fictitious ? "" : "!"),
	3537	(p->private ? "" : "!"),
	3538	(p->precious ? "" : "!"));
	3539	iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
	3540	(p->absent ? "" : "!"),
	3541	(p->error ? "" : "!"),
	3542	(p->dirty ? "" : "!"),
	3543	(p->cleaning ? "" : "!"),
	3544	(p->pageout ? "" : "!"),
	3545	(p->clustered ? "" : "!"));
	3546	iprintf("%soverwriting, %srestart, %sunusual\n",
	3547	(p->overwriting ? "" : "!"),
	3548	(p->restart ? "" : "!"),
	3549	(p->unusual ? "" : "!"));
	3550
	3551	iprintf("phys_page=0x%x", p->phys_page);
	3552
	3553	db_indent -= 2;
	3554	}
	3555	#endif /* MACH_KDB */