git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2007 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*
	29	* @OSF_COPYRIGHT@
	30	*/
	31	/*
	32	* Mach Operating System
	33	* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
	34	* All Rights Reserved.
	35	*
	36	* Permission to use, copy, modify and distribute this software and its
	37	* documentation is hereby granted, provided that both the copyright
	38	* notice and this permission notice appear in all copies of the
	39	* software, derivative works or modified versions, and any portions
	40	* thereof, and that both notices appear in supporting documentation.
	41	*
	42	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	43	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
	44	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	45	*
	46	* Carnegie Mellon requests users of this software to return to
	47	*
	48	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	49	* School of Computer Science
	50	* Carnegie Mellon University
	51	* Pittsburgh PA 15213-3890
	52	*
	53	* any improvements or extensions that they make and grant Carnegie Mellon
	54	* the rights to redistribute these changes.
	55	*/
	56	/*
	57	*/
	58	/*
	59	* File: vm/vm_page.c
	60	* Author: Avadis Tevanian, Jr., Michael Wayne Young
	61	*
	62	* Resident memory management module.
	63	*/
	64
	65	#include <debug.h>
	66	#include <libkern/OSAtomic.h>
	67
	68	#include <mach/clock_types.h>
	69	#include <mach/vm_prot.h>
	70	#include <mach/vm_statistics.h>
	71	#include <mach/sdt.h>
	72	#include <kern/counters.h>
	73	#include <kern/sched_prim.h>
	74	#include <kern/task.h>
	75	#include <kern/thread.h>
	76	#include <kern/zalloc.h>
	77	#include <kern/xpr.h>
	78	#include <vm/pmap.h>
	79	#include <vm/vm_init.h>
	80	#include <vm/vm_map.h>
	81	#include <vm/vm_page.h>
	82	#include <vm/vm_pageout.h>
	83	#include <vm/vm_kern.h> /* kernel_memory_allocate() */
	84	#include <kern/misc_protos.h>
	85	#include <zone_debug.h>
	86	#include <vm/cpm.h>
	87	#include <ppc/mappings.h> /* (BRINGUP) */
	88	#include <pexpert/pexpert.h> /* (BRINGUP) */
	89
	90	#include <vm/vm_protos.h>
	91	#include <vm/memory_object.h>
	92	#include <vm/vm_purgeable_internal.h>
	93
	94	#if CONFIG_EMBEDDED
	95	#include <sys/kern_memorystatus.h>
	96	#endif
	97
	98	int speculative_age_index = 0;
	99	int speculative_steal_index = 0;
	100
	101	struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
	102
	103
	104	/*
	105	* Associated with page of user-allocatable memory is a
	106	* page structure.
	107	*/
	108
	109	/*
	110	* These variables record the values returned by vm_page_bootstrap,
	111	* for debugging purposes. The implementation of pmap_steal_memory
	112	* and pmap_startup here also uses them internally.
	113	*/
	114
	115	vm_offset_t virtual_space_start;
	116	vm_offset_t virtual_space_end;
	117	int vm_page_pages;
	118
	119	/*
	120	* The vm_page_lookup() routine, which provides for fast
	121	* (virtual memory object, offset) to page lookup, employs
	122	* the following hash table. The vm_page_{insert,remove}
	123	* routines install and remove associations in the table.
	124	* [This table is often called the virtual-to-physical,
	125	* or VP, table.]
	126	*/
	127	typedef struct {
	128	vm_page_t pages;
	129	#if MACH_PAGE_HASH_STATS
	130	int cur_count; /* current count */
	131	int hi_count; /* high water mark */
	132	#endif /* MACH_PAGE_HASH_STATS */
	133	} vm_page_bucket_t;
	134
	135	vm_page_bucket_t vm_page_buckets; / Array of buckets */
	136	unsigned int vm_page_bucket_count = 0; /* How big is array? */
	137	unsigned int vm_page_hash_mask; /* Mask for hash function */
	138	unsigned int vm_page_hash_shift; /* Shift for hash function */
	139	uint32_t vm_page_bucket_hash; /* Basic bucket hash */
	140	decl_simple_lock_data(,vm_page_bucket_lock)
	141
	142
	143	#if MACH_PAGE_HASH_STATS
	144	/* This routine is only for debug. It is intended to be called by
	145	* hand by a developer using a kernel debugger. This routine prints
	146	* out vm_page_hash table statistics to the kernel debug console.
	147	*/
	148	void
	149	hash_debug(void)
	150	{
	151	int i;
	152	int numbuckets = 0;
	153	int highsum = 0;
	154	int maxdepth = 0;
	155
	156	for (i = 0; i < vm_page_bucket_count; i++) {
	157	if (vm_page_buckets[i].hi_count) {
	158	numbuckets++;
	159	highsum += vm_page_buckets[i].hi_count;
	160	if (vm_page_buckets[i].hi_count > maxdepth)
	161	maxdepth = vm_page_buckets[i].hi_count;
	162	}
	163	}
	164	printf("Total number of buckets: %d\n", vm_page_bucket_count);
	165	printf("Number used buckets: %d = %d%%\n",
	166	numbuckets, 100*numbuckets/vm_page_bucket_count);
	167	printf("Number unused buckets: %d = %d%%\n",
	168	vm_page_bucket_count - numbuckets,
	169	100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
	170	printf("Sum of bucket max depth: %d\n", highsum);
	171	printf("Average bucket depth: %d.%2d\n",
	172	highsum/vm_page_bucket_count,
	173	highsum%vm_page_bucket_count);
	174	printf("Maximum bucket depth: %d\n", maxdepth);
	175	}
	176	#endif /* MACH_PAGE_HASH_STATS */
	177
	178	/*
	179	* The virtual page size is currently implemented as a runtime
	180	* variable, but is constant once initialized using vm_set_page_size.
	181	* This initialization must be done in the machine-dependent
	182	* bootstrap sequence, before calling other machine-independent
	183	* initializations.
	184	*
	185	* All references to the virtual page size outside this
	186	* module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
	187	* constants.
	188	*/
	189	vm_size_t page_size = PAGE_SIZE;
	190	vm_size_t page_mask = PAGE_MASK;
	191	int page_shift = PAGE_SHIFT;
	192
	193	/*
	194	* Resident page structures are initialized from
	195	* a template (see vm_page_alloc).
	196	*
	197	* When adding a new field to the virtual memory
	198	* object structure, be sure to add initialization
	199	* (see vm_page_bootstrap).
	200	*/
	201	struct vm_page vm_page_template;
	202
	203	vm_page_t vm_pages = VM_PAGE_NULL;
	204	unsigned int vm_pages_count = 0;
	205
	206	/*
	207	* Resident pages that represent real memory
	208	* are allocated from a set of free lists,
	209	* one per color.
	210	*/
	211	unsigned int vm_colors;
	212	unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
	213	unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
	214	queue_head_t vm_page_queue_free[MAX_COLORS];
	215	vm_page_t vm_page_queue_fictitious;
	216	unsigned int vm_page_free_wanted;
	217	unsigned int vm_page_free_wanted_privileged;
	218	unsigned int vm_page_free_count;
	219	unsigned int vm_page_fictitious_count;
	220
	221	unsigned int vm_page_free_count_minimum; /* debugging */
	222
	223	/*
	224	* Occasionally, the virtual memory system uses
	225	* resident page structures that do not refer to
	226	* real pages, for example to leave a page with
	227	* important state information in the VP table.
	228	*
	229	* These page structures are allocated the way
	230	* most other kernel structures are.
	231	*/
	232	zone_t vm_page_zone;
	233	decl_mutex_data(,vm_page_alloc_lock)
	234	unsigned int io_throttle_zero_fill;
	235
	236	/*
	237	* Fictitious pages don't have a physical address,
	238	* but we must initialize phys_page to something.
	239	* For debugging, this should be a strange value
	240	* that the pmap module can recognize in assertions.
	241	*/
	242	vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1;
	243
	244	/*
	245	* Guard pages are not accessible so they don't
	246	* need a physical address, but we need to enter
	247	* one in the pmap.
	248	* Let's make it recognizable and make sure that
	249	* we don't use a real physical page with that
	250	* physical address.
	251	*/
	252	vm_offset_t vm_page_guard_addr = (vm_offset_t) -2;
	253
	254	/*
	255	* Resident page structures are also chained on
	256	* queues that are used by the page replacement
	257	* system (pageout daemon). These queues are
	258	* defined here, but are shared by the pageout
	259	* module. The inactive queue is broken into
	260	* inactive and zf for convenience as the
	261	* pageout daemon often assignes a higher
	262	* affinity to zf pages
	263	*/
	264	queue_head_t vm_page_queue_active;
	265	queue_head_t vm_page_queue_inactive;
	266	queue_head_t vm_page_queue_zf; /* inactive memory queue for zero fill */
	267
	268	unsigned int vm_page_active_count;
	269	unsigned int vm_page_inactive_count;
	270	unsigned int vm_page_throttled_count;
	271	unsigned int vm_page_speculative_count;
	272	unsigned int vm_page_wire_count;
	273	unsigned int vm_page_gobble_count = 0;
	274	unsigned int vm_page_wire_count_warning = 0;
	275	unsigned int vm_page_gobble_count_warning = 0;
	276
	277	unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
	278	uint64_t vm_page_purged_count = 0; /* total count of purged pages */
	279
	280	unsigned int vm_page_speculative_recreated = 0;
	281	unsigned int vm_page_speculative_created = 0;
	282	unsigned int vm_page_speculative_used = 0;
	283
	284	ppnum_t vm_lopage_poolstart = 0;
	285	ppnum_t vm_lopage_poolend = 0;
	286	int vm_lopage_poolsize = 0;
	287	uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
	288
	289
	290	/*
	291	* Several page replacement parameters are also
	292	* shared with this module, so that page allocation
	293	* (done here in vm_page_alloc) can trigger the
	294	* pageout daemon.
	295	*/
	296	unsigned int vm_page_free_target = 0;
	297	unsigned int vm_page_free_min = 0;
	298	unsigned int vm_page_inactive_target = 0;
	299	unsigned int vm_page_inactive_min = 0;
	300	unsigned int vm_page_free_reserved = 0;
	301	unsigned int vm_page_zfill_throttle_count = 0;
	302
	303	/*
	304	* The VM system has a couple of heuristics for deciding
	305	* that pages are "uninteresting" and should be placed
	306	* on the inactive queue as likely candidates for replacement.
	307	* These variables let the heuristics be controlled at run-time
	308	* to make experimentation easier.
	309	*/
	310
	311	boolean_t vm_page_deactivate_hint = TRUE;
	312
	313	/*
	314	* vm_set_page_size:
	315	*
	316	* Sets the page size, perhaps based upon the memory
	317	* size. Must be called before any use of page-size
	318	* dependent functions.
	319	*
	320	* Sets page_shift and page_mask from page_size.
	321	*/
	322	void
	323	vm_set_page_size(void)
	324	{
	325	page_mask = page_size - 1;
	326
	327	if ((page_mask & page_size) != 0)
	328	panic("vm_set_page_size: page size not a power of two");
	329
	330	for (page_shift = 0; ; page_shift++)
	331	if ((1U << page_shift) == page_size)
	332	break;
	333	}
	334
	335
	336	/* Called once during statup, once the cache geometry is known.
	337	*/
	338	static void
	339	vm_page_set_colors( void )
	340	{
	341	unsigned int n, override;
	342
	343	if ( PE_parse_boot_arg("colors", &override) ) /* colors specified as a boot-arg? */
	344	n = override;
	345	else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
	346	n = vm_cache_geometry_colors;
	347	else n = DEFAULT_COLORS; /* use default if all else fails */
	348
	349	if ( n == 0 )
	350	n = 1;
	351	if ( n > MAX_COLORS )
	352	n = MAX_COLORS;
	353
	354	/* the count must be a power of 2 */
	355	if ( ( n & (n - 1)) !=0 )
	356	panic("vm_page_set_colors");
	357
	358	vm_colors = n;
	359	vm_color_mask = n - 1;
	360	}
	361
	362
	363	/*
	364	* vm_page_bootstrap:
	365	*
	366	* Initializes the resident memory module.
	367	*
	368	* Allocates memory for the page cells, and
	369	* for the object/offset-to-page hash table headers.
	370	* Each page cell is initialized and placed on the free list.
	371	* Returns the range of available kernel virtual memory.
	372	*/
	373
	374	void
	375	vm_page_bootstrap(
	376	vm_offset_t *startp,
	377	vm_offset_t *endp)
	378	{
	379	register vm_page_t m;
	380	unsigned int i;
	381	unsigned int log1;
	382	unsigned int log2;
	383	unsigned int size;
	384
	385	/*
	386	* Initialize the vm_page template.
	387	*/
	388
	389	m = &vm_page_template;
	390	m->object = VM_OBJECT_NULL; /* reset later */
	391	m->offset = (vm_object_offset_t) -1; /* reset later */
	392	m->wire_count = 0;
	393
	394	m->pageq.next = NULL;
	395	m->pageq.prev = NULL;
	396	m->listq.next = NULL;
	397	m->listq.prev = NULL;
	398
	399	m->speculative = FALSE;
	400	m->throttled = FALSE;
	401	m->inactive = FALSE;
	402	m->active = FALSE;
	403	m->no_cache = FALSE;
	404	m->laundry = FALSE;
	405	m->free = FALSE;
	406	m->pmapped = FALSE;
	407	m->wpmapped = FALSE;
	408	m->reference = FALSE;
	409	m->pageout = FALSE;
	410	m->dump_cleaning = FALSE;
	411	m->list_req_pending = FALSE;
	412
	413	m->busy = TRUE;
	414	m->wanted = FALSE;
	415	m->tabled = FALSE;
	416	m->fictitious = FALSE;
	417	m->private = FALSE;
	418	m->absent = FALSE;
	419	m->error = FALSE;
	420	m->dirty = FALSE;
	421	m->cleaning = FALSE;
	422	m->precious = FALSE;
	423	m->clustered = FALSE;
	424	m->unusual = FALSE;
	425	m->restart = FALSE;
	426	m->zero_fill = FALSE;
	427	m->encrypted = FALSE;
	428	m->encrypted_cleaning = FALSE;
	429	m->deactivated = FALSE;
	430
	431	m->phys_page = 0; /* reset later */
	432
	433	/*
	434	* Initialize the page queues.
	435	*/
	436
	437	mutex_init(&vm_page_queue_free_lock, 0);
	438	mutex_init(&vm_page_queue_lock, 0);
	439
	440	mutex_init(&vm_purgeable_queue_lock, 0);
	441
	442	for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
	443	int group;
	444
	445	purgeable_queues[i].token_q_head = 0;
	446	purgeable_queues[i].token_q_tail = 0;
	447	for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
	448	queue_init(&purgeable_queues[i].objq[group]);
	449
	450	purgeable_queues[i].type = i;
	451	purgeable_queues[i].new_pages = 0;
	452	#if MACH_ASSERT
	453	purgeable_queues[i].debug_count_tokens = 0;
	454	purgeable_queues[i].debug_count_objects = 0;
	455	#endif
	456	};
	457
	458	for (i = 0; i < MAX_COLORS; i++ )
	459	queue_init(&vm_page_queue_free[i]);
	460	queue_init(&vm_lopage_queue_free);
	461	vm_page_queue_fictitious = VM_PAGE_NULL;
	462	queue_init(&vm_page_queue_active);
	463	queue_init(&vm_page_queue_inactive);
	464	queue_init(&vm_page_queue_throttled);
	465	queue_init(&vm_page_queue_zf);
	466
	467	for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
	468	queue_init(&vm_page_queue_speculative[i].age_q);
	469
	470	vm_page_queue_speculative[i].age_ts.tv_sec = 0;
	471	vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
	472	}
	473	vm_page_free_wanted = 0;
	474	vm_page_free_wanted_privileged = 0;
	475
	476	vm_page_set_colors();
	477
	478
	479	/*
	480	* Steal memory for the map and zone subsystems.
	481	*/
	482
	483	vm_map_steal_memory();
	484	zone_steal_memory();
	485
	486	/*
	487	* Allocate (and initialize) the virtual-to-physical
	488	* table hash buckets.
	489	*
	490	* The number of buckets should be a power of two to
	491	* get a good hash function. The following computation
	492	* chooses the first power of two that is greater
	493	* than the number of physical pages in the system.
	494	*/
	495
	496	simple_lock_init(&vm_page_bucket_lock, 0);
	497
	498	if (vm_page_bucket_count == 0) {
	499	unsigned int npages = pmap_free_pages();
	500
	501	vm_page_bucket_count = 1;
	502	while (vm_page_bucket_count < npages)
	503	vm_page_bucket_count <<= 1;
	504	}
	505
	506	vm_page_hash_mask = vm_page_bucket_count - 1;
	507
	508	/*
	509	* Calculate object shift value for hashing algorithm:
	510	* O = log2(sizeof(struct vm_object))
	511	* B = log2(vm_page_bucket_count)
	512	* hash shifts the object left by
	513	* B/2 - O
	514	*/
	515	size = vm_page_bucket_count;
	516	for (log1 = 0; size > 1; log1++)
	517	size /= 2;
	518	size = sizeof(struct vm_object);
	519	for (log2 = 0; size > 1; log2++)
	520	size /= 2;
	521	vm_page_hash_shift = log1/2 - log2 + 1;
	522
	523	vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
	524	vm_page_bucket_hash \|= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
	525	vm_page_bucket_hash \|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
	526
	527	if (vm_page_hash_mask & vm_page_bucket_count)
	528	printf("vm_page_bootstrap: WARNING -- strange page hash\n");
	529
	530	vm_page_buckets = (vm_page_bucket_t *)
	531	pmap_steal_memory(vm_page_bucket_count *
	532	sizeof(vm_page_bucket_t));
	533
	534	for (i = 0; i < vm_page_bucket_count; i++) {
	535	register vm_page_bucket_t *bucket = &vm_page_buckets[i];
	536
	537	bucket->pages = VM_PAGE_NULL;
	538	#if MACH_PAGE_HASH_STATS
	539	bucket->cur_count = 0;
	540	bucket->hi_count = 0;
	541	#endif /* MACH_PAGE_HASH_STATS */
	542	}
	543
	544	/*
	545	* Machine-dependent code allocates the resident page table.
	546	* It uses vm_page_init to initialize the page frames.
	547	* The code also returns to us the virtual space available
	548	* to the kernel. We don't trust the pmap module
	549	* to get the alignment right.
	550	*/
	551
	552	pmap_startup(&virtual_space_start, &virtual_space_end);
	553	virtual_space_start = round_page(virtual_space_start);
	554	virtual_space_end = trunc_page(virtual_space_end);
	555
	556	*startp = virtual_space_start;
	557	*endp = virtual_space_end;
	558
	559	/*
	560	* Compute the initial "wire" count.
	561	* Up until now, the pages which have been set aside are not under
	562	* the VM system's control, so although they aren't explicitly
	563	* wired, they nonetheless can't be moved. At this moment,
	564	* all VM managed pages are "free", courtesy of pmap_startup.
	565	*/
	566	vm_page_wire_count = atop_64(max_mem) - vm_page_free_count; /* initial value */
	567	vm_page_free_count_minimum = vm_page_free_count;
	568
	569	printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
	570	vm_page_free_count, vm_page_wire_count);
	571
	572	simple_lock_init(&vm_paging_lock, 0);
	573	}
	574
	575	#ifndef MACHINE_PAGES
	576	/*
	577	* We implement pmap_steal_memory and pmap_startup with the help
	578	* of two simpler functions, pmap_virtual_space and pmap_next_page.
	579	*/
	580
	581	void *
	582	pmap_steal_memory(
	583	vm_size_t size)
	584	{
	585	vm_offset_t addr, vaddr;
	586	ppnum_t phys_page;
	587
	588	/*
	589	* We round the size to a round multiple.
	590	*/
	591
	592	size = (size + sizeof (void ) - 1) &~ (sizeof (void ) - 1);
	593
	594	/*
	595	* If this is the first call to pmap_steal_memory,
	596	* we have to initialize ourself.
	597	*/
	598
	599	if (virtual_space_start == virtual_space_end) {
	600	pmap_virtual_space(&virtual_space_start, &virtual_space_end);
	601
	602	/*
	603	* The initial values must be aligned properly, and
	604	* we don't trust the pmap module to do it right.
	605	*/
	606
	607	virtual_space_start = round_page(virtual_space_start);
	608	virtual_space_end = trunc_page(virtual_space_end);
	609	}
	610
	611	/*
	612	* Allocate virtual memory for this request.
	613	*/
	614
	615	addr = virtual_space_start;
	616	virtual_space_start += size;
	617
	618	kprintf("pmap_steal_memory: %08X - %08X; size=%08X\n", addr, virtual_space_start, size); /* (TEST/DEBUG) */
	619
	620	/*
	621	* Allocate and map physical pages to back new virtual pages.
	622	*/
	623
	624	for (vaddr = round_page(addr);
	625	vaddr < addr + size;
	626	vaddr += PAGE_SIZE) {
	627	if (!pmap_next_page(&phys_page))
	628	panic("pmap_steal_memory");
	629
	630	/*
	631	* XXX Logically, these mappings should be wired,
	632	* but some pmap modules barf if they are.
	633	*/
	634
	635	pmap_enter(kernel_pmap, vaddr, phys_page,
	636	VM_PROT_READ\|VM_PROT_WRITE,
	637	VM_WIMG_USE_DEFAULT, FALSE);
	638	/*
	639	* Account for newly stolen memory
	640	*/
	641	vm_page_wire_count++;
	642
	643	}
	644
	645	return (void *) addr;
	646	}
	647
	648	void
	649	pmap_startup(
	650	vm_offset_t *startp,
	651	vm_offset_t *endp)
	652	{
	653	unsigned int i, npages, pages_initialized, fill, fillval;
	654	ppnum_t phys_page;
	655	addr64_t tmpaddr;
	656	unsigned int num_of_lopages = 0;
	657	unsigned int last_index;
	658
	659	/*
	660	* We calculate how many page frames we will have
	661	* and then allocate the page structures in one chunk.
	662	*/
	663
	664	tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
	665	tmpaddr = tmpaddr + (addr64_t)(round_page_32(virtual_space_start) - virtual_space_start); /* Account for any slop */
	666	npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(vm_pages))); / Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
	667
	668	vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
	669
	670	/*
	671	* Initialize the page frames.
	672	*/
	673	for (i = 0, pages_initialized = 0; i < npages; i++) {
	674	if (!pmap_next_page(&phys_page))
	675	break;
	676
	677	vm_page_init(&vm_pages[i], phys_page);
	678	vm_page_pages++;
	679	pages_initialized++;
	680	}
	681	vm_pages_count = pages_initialized;
	682
	683	/*
	684	* Check if we want to initialize pages to a known value
	685	*/
	686	fill = 0; /* Assume no fill */
	687	if (PE_parse_boot_arg("fill", &fillval)) fill = 1; /* Set fill */
	688
	689
	690	/*
	691	* if vm_lopage_poolsize is non-zero, than we need to reserve
	692	* a pool of pages whose addresess are less than 4G... this pool
	693	* is used by drivers whose hardware can't DMA beyond 32 bits...
	694	*
	695	* note that I'm assuming that the page list is ascending and
	696	* ordered w/r to the physical address
	697	*/
	698	for (i = 0, num_of_lopages = vm_lopage_poolsize; num_of_lopages && i < pages_initialized; num_of_lopages--, i++) {
	699	vm_page_t m;
	700
	701	m = &vm_pages[i];
	702
	703	if (m->phys_page >= (1 << (32 - PAGE_SHIFT)))
	704	panic("couldn't reserve the lopage pool: not enough lo pages\n");
	705
	706	if (m->phys_page < vm_lopage_poolend)
	707	panic("couldn't reserve the lopage pool: page list out of order\n");
	708
	709	vm_lopage_poolend = m->phys_page;
	710
	711	if (vm_lopage_poolstart == 0)
	712	vm_lopage_poolstart = m->phys_page;
	713	else {
	714	if (m->phys_page < vm_lopage_poolstart)
	715	panic("couldn't reserve the lopage pool: page list out of order\n");
	716	}
	717
	718	if (fill)
	719	fillPage(m->phys_page, fillval); /* Fill the page with a know value if requested at boot */
	720
	721	vm_page_release(m);
	722	}
	723	last_index = i;
	724
	725	// -debug code remove
	726	if (2 == vm_himemory_mode) {
	727	// free low -> high so high is preferred
	728	for (i = last_index + 1; i <= pages_initialized; i++) {
	729	if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
	730	vm_page_release(&vm_pages[i - 1]);
	731	}
	732	}
	733	else
	734	// debug code remove-
	735
	736	/*
	737	* Release pages in reverse order so that physical pages
	738	* initially get allocated in ascending addresses. This keeps
	739	* the devices (which must address physical memory) happy if
	740	* they require several consecutive pages.
	741	*/
	742	for (i = pages_initialized; i > last_index; i--) {
	743	if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
	744	vm_page_release(&vm_pages[i - 1]);
	745	}
	746
	747	#if 0
	748	{
	749	vm_page_t xx, xxo, xxl;
	750	int i, j, k, l;
	751
	752	j = 0; /* (BRINGUP) */
	753	xxl = 0;
	754
	755	for( i = 0; i < vm_colors; i++ ) {
	756	queue_iterate(&vm_page_queue_free[i],
	757	xx,
	758	vm_page_t,
	759	pageq) { /* BRINGUP */
	760	j++; /* (BRINGUP) */
	761	if(j > vm_page_free_count) { /* (BRINGUP) */
	762	panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
	763	}
	764
	765	l = vm_page_free_count - j; /* (BRINGUP) */
	766	k = 0; /* (BRINGUP) */
	767
	768	if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
	769
	770	for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
	771	k++;
	772	if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
	773	if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
	774	panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
	775	}
	776	}
	777
	778	xxl = xx;
	779	}
	780	}
	781
	782	if(j != vm_page_free_count) { /* (BRINGUP) */
	783	panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
	784	}
	785	}
	786	#endif
	787
	788
	789	/*
	790	* We have to re-align virtual_space_start,
	791	* because pmap_steal_memory has been using it.
	792	*/
	793
	794	virtual_space_start = round_page_32(virtual_space_start);
	795
	796	*startp = virtual_space_start;
	797	*endp = virtual_space_end;
	798	}
	799	#endif /* MACHINE_PAGES */
	800
	801	/*
	802	* Routine: vm_page_module_init
	803	* Purpose:
	804	* Second initialization pass, to be done after
	805	* the basic VM system is ready.
	806	*/
	807	void
	808	vm_page_module_init(void)
	809	{
	810	vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
	811	0, PAGE_SIZE, "vm pages");
	812
	813	#if ZONE_DEBUG
	814	zone_debug_disable(vm_page_zone);
	815	#endif /* ZONE_DEBUG */
	816
	817	zone_change(vm_page_zone, Z_EXPAND, FALSE);
	818	zone_change(vm_page_zone, Z_EXHAUST, TRUE);
	819	zone_change(vm_page_zone, Z_FOREIGN, TRUE);
	820
	821	/*
	822	* Adjust zone statistics to account for the real pages allocated
	823	* in vm_page_create(). [Q: is this really what we want?]
	824	*/
	825	vm_page_zone->count += vm_page_pages;
	826	vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
	827
	828	mutex_init(&vm_page_alloc_lock, 0);
	829	}
	830
	831	/*
	832	* Routine: vm_page_create
	833	* Purpose:
	834	* After the VM system is up, machine-dependent code
	835	* may stumble across more physical memory. For example,
	836	* memory that it was reserving for a frame buffer.
	837	* vm_page_create turns this memory into available pages.
	838	*/
	839
	840	void
	841	vm_page_create(
	842	ppnum_t start,
	843	ppnum_t end)
	844	{
	845	ppnum_t phys_page;
	846	vm_page_t m;
	847
	848	for (phys_page = start;
	849	phys_page < end;
	850	phys_page++) {
	851	while ((m = (vm_page_t) vm_page_grab_fictitious())
	852	== VM_PAGE_NULL)
	853	vm_page_more_fictitious();
	854
	855	vm_page_init(m, phys_page);
	856	vm_page_pages++;
	857	vm_page_release(m);
	858	}
	859	}
	860
	861	/*
	862	* vm_page_hash:
	863	*
	864	* Distributes the object/offset key pair among hash buckets.
	865	*
	866	* NOTE: The bucket count must be a power of 2
	867	*/
	868	#define vm_page_hash(object, offset) (\
	869	( (natural_t)((uint32_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
	870	& vm_page_hash_mask)
	871
	872
	873	/*
	874	* vm_page_insert: [ internal use only ]
	875	*
	876	* Inserts the given mem entry into the object/object-page
	877	* table and object list.
	878	*
	879	* The object must be locked.
	880	*/
	881	void
	882	vm_page_insert(
	883	vm_page_t mem,
	884	vm_object_t object,
	885	vm_object_offset_t offset)
	886	{
	887	vm_page_insert_internal(mem, object, offset, FALSE);
	888	}
	889
	890
	891	void
	892	vm_page_insert_internal(
	893	vm_page_t mem,
	894	vm_object_t object,
	895	vm_object_offset_t offset,
	896	boolean_t queues_lock_held)
	897	{
	898	register vm_page_bucket_t *bucket;
	899
	900	XPR(XPR_VM_PAGE,
	901	"vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
	902	(integer_t)object, (integer_t)offset, (integer_t)mem, 0,0);
	903
	904	VM_PAGE_CHECK(mem);
	905
	906	if (object == vm_submap_object) {
	907	/* the vm_submap_object is only a placeholder for submaps */
	908	panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
	909	}
	910
	911	vm_object_lock_assert_exclusive(object);
	912	#if DEBUG
	913	if (mem->tabled \|\| mem->object != VM_OBJECT_NULL)
	914	panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
	915	"already in (obj=%p,off=0x%llx)",
	916	mem, object, offset, mem->object, mem->offset);
	917	#endif
	918	assert(!object->internal \|\| offset < object->size);
	919
	920	/* only insert "pageout" pages into "pageout" objects,
	921	* and normal pages into normal objects */
	922	assert(object->pageout == mem->pageout);
	923
	924	assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
	925
	926	/*
	927	* Record the object/offset pair in this page
	928	*/
	929
	930	mem->object = object;
	931	mem->offset = offset;
	932
	933	/*
	934	* Insert it into the object_object/offset hash table
	935	*/
	936
	937	bucket = &vm_page_buckets[vm_page_hash(object, offset)];
	938	simple_lock(&vm_page_bucket_lock);
	939	mem->next = bucket->pages;
	940	bucket->pages = mem;
	941	#if MACH_PAGE_HASH_STATS
	942	if (++bucket->cur_count > bucket->hi_count)
	943	bucket->hi_count = bucket->cur_count;
	944	#endif /* MACH_PAGE_HASH_STATS */
	945	simple_unlock(&vm_page_bucket_lock);
	946
	947	/*
	948	* Now link into the object's list of backed pages.
	949	*/
	950
	951	VM_PAGE_INSERT(mem, object);
	952	mem->tabled = TRUE;
	953
	954	/*
	955	* Show that the object has one more resident page.
	956	*/
	957
	958	object->resident_page_count++;
	959
	960	if (object->purgable == VM_PURGABLE_VOLATILE \|\|
	961	object->purgable == VM_PURGABLE_EMPTY) {
	962	if (queues_lock_held == FALSE)
	963	vm_page_lockspin_queues();
	964
	965	vm_page_purgeable_count++;
	966
	967	if (queues_lock_held == FALSE)
	968	vm_page_unlock_queues();
	969	}
	970	}
	971
	972	/*
	973	* vm_page_replace:
	974	*
	975	* Exactly like vm_page_insert, except that we first
	976	* remove any existing page at the given offset in object.
	977	*
	978	* The object and page queues must be locked.
	979	*/
	980
	981	void
	982	vm_page_replace(
	983	register vm_page_t mem,
	984	register vm_object_t object,
	985	register vm_object_offset_t offset)
	986	{
	987	vm_page_bucket_t *bucket;
	988	vm_page_t found_m = VM_PAGE_NULL;
	989
	990	VM_PAGE_CHECK(mem);
	991	vm_object_lock_assert_exclusive(object);
	992	#if DEBUG
	993	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	994
	995	if (mem->tabled \|\| mem->object != VM_OBJECT_NULL)
	996	panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
	997	"already in (obj=%p,off=0x%llx)",
	998	mem, object, offset, mem->object, mem->offset);
	999	#endif
	1000	/*
	1001	* Record the object/offset pair in this page
	1002	*/
	1003
	1004	mem->object = object;
	1005	mem->offset = offset;
	1006
	1007	/*
	1008	* Insert it into the object_object/offset hash table,
	1009	* replacing any page that might have been there.
	1010	*/
	1011
	1012	bucket = &vm_page_buckets[vm_page_hash(object, offset)];
	1013	simple_lock(&vm_page_bucket_lock);
	1014
	1015	if (bucket->pages) {
	1016	vm_page_t *mp = &bucket->pages;
	1017	register vm_page_t m = *mp;
	1018
	1019	do {
	1020	if (m->object == object && m->offset == offset) {
	1021	/*
	1022	* Remove old page from hash list
	1023	*/
	1024	*mp = m->next;
	1025
	1026	found_m = m;
	1027	break;
	1028	}
	1029	mp = &m->next;
	1030	} while ((m = *mp));
	1031
	1032	mem->next = bucket->pages;
	1033	} else {
	1034	mem->next = VM_PAGE_NULL;
	1035	}
	1036	/*
	1037	* insert new page at head of hash list
	1038	*/
	1039	bucket->pages = mem;
	1040
	1041	simple_unlock(&vm_page_bucket_lock);
	1042
	1043	if (found_m) {
	1044	/*
	1045	* there was already a page at the specified
	1046	* offset for this object... remove it from
	1047	* the object and free it back to the free list
	1048	*/
	1049	VM_PAGE_REMOVE(found_m);
	1050	found_m->tabled = FALSE;
	1051
	1052	found_m->object = VM_OBJECT_NULL;
	1053	found_m->offset = (vm_object_offset_t) -1;
	1054	object->resident_page_count--;
	1055
	1056	if (object->purgable == VM_PURGABLE_VOLATILE \|\|
	1057	object->purgable == VM_PURGABLE_EMPTY) {
	1058	assert(vm_page_purgeable_count > 0);
	1059	vm_page_purgeable_count--;
	1060	}
	1061
	1062	/*
	1063	* Return page to the free list.
	1064	* Note the page is not tabled now
	1065	*/
	1066	vm_page_free(found_m);
	1067	}
	1068	/*
	1069	* Now link into the object's list of backed pages.
	1070	*/
	1071
	1072	VM_PAGE_INSERT(mem, object);
	1073	mem->tabled = TRUE;
	1074
	1075	/*
	1076	* And show that the object has one more resident
	1077	* page.
	1078	*/
	1079
	1080	object->resident_page_count++;
	1081
	1082	if (object->purgable == VM_PURGABLE_VOLATILE \|\|
	1083	object->purgable == VM_PURGABLE_EMPTY) {
	1084	vm_page_purgeable_count++;
	1085	}
	1086	}
	1087
	1088	/*
	1089	* vm_page_remove: [ internal use only ]
	1090	*
	1091	* Removes the given mem entry from the object/offset-page
	1092	* table and the object page list.
	1093	*
	1094	* The object and page queues must be locked.
	1095	*/
	1096
	1097	void
	1098	vm_page_remove(
	1099	register vm_page_t mem)
	1100	{
	1101	register vm_page_bucket_t *bucket;
	1102	register vm_page_t this;
	1103
	1104	XPR(XPR_VM_PAGE,
	1105	"vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
	1106	(integer_t)mem->object, (integer_t)mem->offset,
	1107	(integer_t)mem, 0,0);
	1108	#if DEBUG
	1109	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	1110	#endif
	1111	vm_object_lock_assert_exclusive(mem->object);
	1112	assert(mem->tabled);
	1113	assert(!mem->cleaning);
	1114	VM_PAGE_CHECK(mem);
	1115
	1116
	1117	/*
	1118	* Remove from the object_object/offset hash table
	1119	*/
	1120
	1121	bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
	1122	simple_lock(&vm_page_bucket_lock);
	1123	if ((this = bucket->pages) == mem) {
	1124	/* optimize for common case */
	1125
	1126	bucket->pages = mem->next;
	1127	} else {
	1128	register vm_page_t *prev;
	1129
	1130	for (prev = &this->next;
	1131	(this = *prev) != mem;
	1132	prev = &this->next)
	1133	continue;
	1134	*prev = this->next;
	1135	}
	1136	#if MACH_PAGE_HASH_STATS
	1137	bucket->cur_count--;
	1138	#endif /* MACH_PAGE_HASH_STATS */
	1139	simple_unlock(&vm_page_bucket_lock);
	1140
	1141	/*
	1142	* Now remove from the object's list of backed pages.
	1143	*/
	1144
	1145	VM_PAGE_REMOVE(mem);
	1146
	1147	/*
	1148	* And show that the object has one fewer resident
	1149	* page.
	1150	*/
	1151
	1152	mem->object->resident_page_count--;
	1153
	1154	if (mem->object->purgable == VM_PURGABLE_VOLATILE \|\|
	1155	mem->object->purgable == VM_PURGABLE_EMPTY) {
	1156	assert(vm_page_purgeable_count > 0);
	1157	vm_page_purgeable_count--;
	1158	}
	1159	mem->tabled = FALSE;
	1160	mem->object = VM_OBJECT_NULL;
	1161	mem->offset = (vm_object_offset_t) -1;
	1162	}
	1163
	1164	/*
	1165	* vm_page_lookup:
	1166	*
	1167	* Returns the page associated with the object/offset
	1168	* pair specified; if none is found, VM_PAGE_NULL is returned.
	1169	*
	1170	* The object must be locked. No side effects.
	1171	*/
	1172
	1173	unsigned long vm_page_lookup_hint = 0;
	1174	unsigned long vm_page_lookup_hint_next = 0;
	1175	unsigned long vm_page_lookup_hint_prev = 0;
	1176	unsigned long vm_page_lookup_hint_miss = 0;
	1177	unsigned long vm_page_lookup_bucket_NULL = 0;
	1178	unsigned long vm_page_lookup_miss = 0;
	1179
	1180
	1181	vm_page_t
	1182	vm_page_lookup(
	1183	register vm_object_t object,
	1184	register vm_object_offset_t offset)
	1185	{
	1186	register vm_page_t mem;
	1187	register vm_page_bucket_t *bucket;
	1188	queue_entry_t qe;
	1189
	1190	vm_object_lock_assert_held(object);
	1191	mem = object->memq_hint;
	1192
	1193	if (mem != VM_PAGE_NULL) {
	1194	assert(mem->object == object);
	1195
	1196	if (mem->offset == offset) {
	1197	vm_page_lookup_hint++;
	1198	return mem;
	1199	}
	1200	qe = queue_next(&mem->listq);
	1201
	1202	if (! queue_end(&object->memq, qe)) {
	1203	vm_page_t next_page;
	1204
	1205	next_page = (vm_page_t) qe;
	1206	assert(next_page->object == object);
	1207
	1208	if (next_page->offset == offset) {
	1209	vm_page_lookup_hint_next++;
	1210	object->memq_hint = next_page; /* new hint */
	1211	return next_page;
	1212	}
	1213	}
	1214	qe = queue_prev(&mem->listq);
	1215
	1216	if (! queue_end(&object->memq, qe)) {
	1217	vm_page_t prev_page;
	1218
	1219	prev_page = (vm_page_t) qe;
	1220	assert(prev_page->object == object);
	1221
	1222	if (prev_page->offset == offset) {
	1223	vm_page_lookup_hint_prev++;
	1224	object->memq_hint = prev_page; /* new hint */
	1225	return prev_page;
	1226	}
	1227	}
	1228	}
	1229	/*
	1230	* Search the hash table for this object/offset pair
	1231	*/
	1232	bucket = &vm_page_buckets[vm_page_hash(object, offset)];
	1233
	1234	/*
	1235	* since we hold the object lock, we are guaranteed that no
	1236	* new pages can be inserted into this object... this in turn
	1237	* guarantess that the page we're looking for can't exist
	1238	* if the bucket it hashes to is currently NULL even when looked
	1239	* at outside the scope of the hash bucket lock... this is a
	1240	* really cheap optimiztion to avoid taking the lock
	1241	*/
	1242	if (bucket->pages == VM_PAGE_NULL) {
	1243	vm_page_lookup_bucket_NULL++;
	1244
	1245	return (VM_PAGE_NULL);
	1246	}
	1247	simple_lock(&vm_page_bucket_lock);
	1248
	1249	for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
	1250	VM_PAGE_CHECK(mem);
	1251	if ((mem->object == object) && (mem->offset == offset))
	1252	break;
	1253	}
	1254	simple_unlock(&vm_page_bucket_lock);
	1255
	1256	if (mem != VM_PAGE_NULL) {
	1257	if (object->memq_hint != VM_PAGE_NULL) {
	1258	vm_page_lookup_hint_miss++;
	1259	}
	1260	assert(mem->object == object);
	1261	object->memq_hint = mem;
	1262	} else
	1263	vm_page_lookup_miss++;
	1264
	1265	return(mem);
	1266	}
	1267
	1268
	1269	/*
	1270	* vm_page_rename:
	1271	*
	1272	* Move the given memory entry from its
	1273	* current object to the specified target object/offset.
	1274	*
	1275	* The object must be locked.
	1276	*/
	1277	void
	1278	vm_page_rename(
	1279	register vm_page_t mem,
	1280	register vm_object_t new_object,
	1281	vm_object_offset_t new_offset,
	1282	boolean_t encrypted_ok)
	1283	{
	1284	assert(mem->object != new_object);
	1285
	1286	/*
	1287	* ENCRYPTED SWAP:
	1288	* The encryption key is based on the page's memory object
	1289	* (aka "pager") and paging offset. Moving the page to
	1290	* another VM object changes its "pager" and "paging_offset"
	1291	* so it has to be decrypted first, or we would lose the key.
	1292	*
	1293	* One exception is VM object collapsing, where we transfer pages
	1294	* from one backing object to its parent object. This operation also
	1295	* transfers the paging information, so the <pager,paging_offset> info
	1296	* should remain consistent. The caller (vm_object_do_collapse())
	1297	* sets "encrypted_ok" in this case.
	1298	*/
	1299	if (!encrypted_ok && mem->encrypted) {
	1300	panic("vm_page_rename: page %p is encrypted\n", mem);
	1301	}
	1302
	1303	/*
	1304	* Changes to mem->object require the page lock because
	1305	* the pageout daemon uses that lock to get the object.
	1306	*/
	1307
	1308	XPR(XPR_VM_PAGE,
	1309	"vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
	1310	(integer_t)new_object, (integer_t)new_offset,
	1311	(integer_t)mem, 0,0);
	1312
	1313	vm_page_lockspin_queues();
	1314	vm_page_remove(mem);
	1315	vm_page_insert(mem, new_object, new_offset);
	1316	vm_page_unlock_queues();
	1317	}
	1318
	1319	/*
	1320	* vm_page_init:
	1321	*
	1322	* Initialize the fields in a new page.
	1323	* This takes a structure with random values and initializes it
	1324	* so that it can be given to vm_page_release or vm_page_insert.
	1325	*/
	1326	void
	1327	vm_page_init(
	1328	vm_page_t mem,
	1329	ppnum_t phys_page)
	1330	{
	1331	assert(phys_page);
	1332	*mem = vm_page_template;
	1333	mem->phys_page = phys_page;
	1334	}
	1335
	1336	/*
	1337	* vm_page_grab_fictitious:
	1338	*
	1339	* Remove a fictitious page from the free list.
	1340	* Returns VM_PAGE_NULL if there are no free pages.
	1341	*/
	1342	int c_vm_page_grab_fictitious = 0;
	1343	int c_vm_page_release_fictitious = 0;
	1344	int c_vm_page_more_fictitious = 0;
	1345
	1346	extern vm_page_t vm_page_grab_fictitious_common(vm_offset_t phys_addr);
	1347
	1348	vm_page_t
	1349	vm_page_grab_fictitious_common(
	1350	vm_offset_t phys_addr)
	1351	{
	1352	register vm_page_t m;
	1353
	1354	m = (vm_page_t)zget(vm_page_zone);
	1355	if (m) {
	1356	vm_page_init(m, phys_addr);
	1357	m->fictitious = TRUE;
	1358	}
	1359
	1360	c_vm_page_grab_fictitious++;
	1361	return m;
	1362	}
	1363
	1364	vm_page_t
	1365	vm_page_grab_fictitious(void)
	1366	{
	1367	return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
	1368	}
	1369
	1370	vm_page_t
	1371	vm_page_grab_guard(void)
	1372	{
	1373	return vm_page_grab_fictitious_common(vm_page_guard_addr);
	1374	}
	1375
	1376	/*
	1377	* vm_page_release_fictitious:
	1378	*
	1379	* Release a fictitious page to the free list.
	1380	*/
	1381
	1382	void
	1383	vm_page_release_fictitious(
	1384	register vm_page_t m)
	1385	{
	1386	assert(!m->free);
	1387	assert(m->busy);
	1388	assert(m->fictitious);
	1389	assert(m->phys_page == vm_page_fictitious_addr \|\|
	1390	m->phys_page == vm_page_guard_addr);
	1391
	1392	c_vm_page_release_fictitious++;
	1393	#if DEBUG
	1394	if (m->free)
	1395	panic("vm_page_release_fictitious");
	1396	#endif
	1397	m->free = TRUE;
	1398	zfree(vm_page_zone, m);
	1399	}
	1400
	1401	/*
	1402	* vm_page_more_fictitious:
	1403	*
	1404	* Add more fictitious pages to the free list.
	1405	* Allowed to block. This routine is way intimate
	1406	* with the zones code, for several reasons:
	1407	* 1. we need to carve some page structures out of physical
	1408	* memory before zones work, so they _cannot_ come from
	1409	* the zone_map.
	1410	* 2. the zone needs to be collectable in order to prevent
	1411	* growth without bound. These structures are used by
	1412	* the device pager (by the hundreds and thousands), as
	1413	* private pages for pageout, and as blocking pages for
	1414	* pagein. Temporary bursts in demand should not result in
	1415	* permanent allocation of a resource.
	1416	* 3. To smooth allocation humps, we allocate single pages
	1417	* with kernel_memory_allocate(), and cram them into the
	1418	* zone. This also allows us to initialize the vm_page_t's
	1419	* on the way into the zone, so that zget() always returns
	1420	* an initialized structure. The zone free element pointer
	1421	* and the free page pointer are both the first item in the
	1422	* vm_page_t.
	1423	* 4. By having the pages in the zone pre-initialized, we need
	1424	* not keep 2 levels of lists. The garbage collector simply
	1425	* scans our list, and reduces physical memory usage as it
	1426	* sees fit.
	1427	*/
	1428
	1429	void vm_page_more_fictitious(void)
	1430	{
	1431	register vm_page_t m;
	1432	vm_offset_t addr;
	1433	kern_return_t retval;
	1434	int i;
	1435
	1436	c_vm_page_more_fictitious++;
	1437
	1438	/*
	1439	* Allocate a single page from the zone_map. Do not wait if no physical
	1440	* pages are immediately available, and do not zero the space. We need
	1441	* our own blocking lock here to prevent having multiple,
	1442	* simultaneous requests from piling up on the zone_map lock. Exactly
	1443	* one (of our) threads should be potentially waiting on the map lock.
	1444	* If winner is not vm-privileged, then the page allocation will fail,
	1445	* and it will temporarily block here in the vm_page_wait().
	1446	*/
	1447	mutex_lock(&vm_page_alloc_lock);
	1448	/*
	1449	* If another thread allocated space, just bail out now.
	1450	*/
	1451	if (zone_free_count(vm_page_zone) > 5) {
	1452	/*
	1453	* The number "5" is a small number that is larger than the
	1454	* number of fictitious pages that any single caller will
	1455	* attempt to allocate. Otherwise, a thread will attempt to
	1456	* acquire a fictitious page (vm_page_grab_fictitious), fail,
	1457	* release all of the resources and locks already acquired,
	1458	* and then call this routine. This routine finds the pages
	1459	* that the caller released, so fails to allocate new space.
	1460	* The process repeats infinitely. The largest known number
	1461	* of fictitious pages required in this manner is 2. 5 is
	1462	* simply a somewhat larger number.
	1463	*/
	1464	mutex_unlock(&vm_page_alloc_lock);
	1465	return;
	1466	}
	1467
	1468	retval = kernel_memory_allocate(zone_map,
	1469	&addr, PAGE_SIZE, VM_PROT_ALL,
	1470	KMA_KOBJECT\|KMA_NOPAGEWAIT);
	1471	if (retval != KERN_SUCCESS) {
	1472	/*
	1473	* No page was available. Tell the pageout daemon, drop the
	1474	* lock to give another thread a chance at it, and
	1475	* wait for the pageout daemon to make progress.
	1476	*/
	1477	mutex_unlock(&vm_page_alloc_lock);
	1478	vm_page_wait(THREAD_UNINT);
	1479	return;
	1480	}
	1481	/*
	1482	* Initialize as many vm_page_t's as will fit on this page. This
	1483	* depends on the zone code disturbing ONLY the first item of
	1484	* each zone element.
	1485	*/
	1486	m = (vm_page_t)addr;
	1487	for (i = PAGE_SIZE/sizeof(struct vm_page); i > 0; i--) {
	1488	vm_page_init(m, vm_page_fictitious_addr);
	1489	m->fictitious = TRUE;
	1490	m++;
	1491	}
	1492	zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
	1493	mutex_unlock(&vm_page_alloc_lock);
	1494	}
	1495
	1496
	1497	/*
	1498	* vm_pool_low():
	1499	*
	1500	* Return true if it is not likely that a non-vm_privileged thread
	1501	* can get memory without blocking. Advisory only, since the
	1502	* situation may change under us.
	1503	*/
	1504	int
	1505	vm_pool_low(void)
	1506	{
	1507	/* No locking, at worst we will fib. */
	1508	return( vm_page_free_count < vm_page_free_reserved );
	1509	}
	1510
	1511
	1512
	1513	/*
	1514	* this is an interface to support bring-up of drivers
	1515	* on platforms with physical memory > 4G...
	1516	*/
	1517	int vm_himemory_mode = 0;
	1518
	1519
	1520	/*
	1521	* this interface exists to support hardware controllers
	1522	* incapable of generating DMAs with more than 32 bits
	1523	* of address on platforms with physical memory > 4G...
	1524	*/
	1525	unsigned int vm_lopage_free_count = 0;
	1526	unsigned int vm_lopage_max_count = 0;
	1527	queue_head_t vm_lopage_queue_free;
	1528
	1529	vm_page_t
	1530	vm_page_grablo(void)
	1531	{
	1532	register vm_page_t mem;
	1533	unsigned int vm_lopage_alloc_count;
	1534
	1535	if (vm_lopage_poolsize == 0)
	1536	return (vm_page_grab());
	1537
	1538	mutex_lock(&vm_page_queue_free_lock);
	1539
	1540	if (! queue_empty(&vm_lopage_queue_free)) {
	1541	queue_remove_first(&vm_lopage_queue_free,
	1542	mem,
	1543	vm_page_t,
	1544	pageq);
	1545	assert(mem->free);
	1546	assert(mem->busy);
	1547	assert(!mem->pmapped);
	1548	assert(!mem->wpmapped);
	1549
	1550	mem->pageq.next = NULL;
	1551	mem->pageq.prev = NULL;
	1552	mem->free = FALSE;
	1553
	1554	vm_lopage_free_count--;
	1555	vm_lopage_alloc_count = (vm_lopage_poolend - vm_lopage_poolstart) - vm_lopage_free_count;
	1556	if (vm_lopage_alloc_count > vm_lopage_max_count)
	1557	vm_lopage_max_count = vm_lopage_alloc_count;
	1558	} else {
	1559	mem = VM_PAGE_NULL;
	1560	}
	1561	mutex_unlock(&vm_page_queue_free_lock);
	1562
	1563	return (mem);
	1564	}
	1565
	1566
	1567	/*
	1568	* vm_page_grab:
	1569	*
	1570	* first try to grab a page from the per-cpu free list...
	1571	* this must be done while pre-emption is disabled... if
	1572	* a page is available, we're done...
	1573	* if no page is available, grab the vm_page_queue_free_lock
	1574	* and see if current number of free pages would allow us
	1575	* to grab at least 1... if not, return VM_PAGE_NULL as before...
	1576	* if there are pages available, disable preemption and
	1577	* recheck the state of the per-cpu free list... we could
	1578	* have been preempted and moved to a different cpu, or
	1579	* some other thread could have re-filled it... if still
	1580	* empty, figure out how many pages we can steal from the
	1581	* global free queue and move to the per-cpu queue...
	1582	* return 1 of these pages when done... only wakeup the
	1583	* pageout_scan thread if we moved pages from the global
	1584	* list... no need for the wakeup if we've satisfied the
	1585	* request from the per-cpu queue.
	1586	*/
	1587
	1588	#define COLOR_GROUPS_TO_STEAL 4
	1589
	1590
	1591	vm_page_t
	1592	vm_page_grab( void )
	1593	{
	1594	vm_page_t mem;
	1595
	1596
	1597	disable_preemption();
	1598
	1599	if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
	1600	return_page_from_cpu_list:
	1601	PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
	1602	PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
	1603	mem->pageq.next = NULL;
	1604
	1605	enable_preemption();
	1606
	1607	assert(mem->listq.next == NULL && mem->listq.prev == NULL);
	1608	assert(mem->tabled == FALSE);
	1609	assert(mem->object == VM_OBJECT_NULL);
	1610	assert(!mem->laundry);
	1611	assert(!mem->free);
	1612	assert(pmap_verify_free(mem->phys_page));
	1613	assert(mem->busy);
	1614	assert(!mem->encrypted);
	1615	assert(!mem->pmapped);
	1616	assert(!mem->wpmapped);
	1617
	1618	return mem;
	1619	}
	1620	enable_preemption();
	1621
	1622
	1623	mutex_lock(&vm_page_queue_free_lock);
	1624
	1625	/*
	1626	* Optionally produce warnings if the wire or gobble
	1627	* counts exceed some threshold.
	1628	*/
	1629	if (vm_page_wire_count_warning > 0
	1630	&& vm_page_wire_count >= vm_page_wire_count_warning) {
	1631	printf("mk: vm_page_grab(): high wired page count of %d\n",
	1632	vm_page_wire_count);
	1633	assert(vm_page_wire_count < vm_page_wire_count_warning);
	1634	}
	1635	if (vm_page_gobble_count_warning > 0
	1636	&& vm_page_gobble_count >= vm_page_gobble_count_warning) {
	1637	printf("mk: vm_page_grab(): high gobbled page count of %d\n",
	1638	vm_page_gobble_count);
	1639	assert(vm_page_gobble_count < vm_page_gobble_count_warning);
	1640	}
	1641
	1642	/*
	1643	* Only let privileged threads (involved in pageout)
	1644	* dip into the reserved pool.
	1645	*/
	1646	if ((vm_page_free_count < vm_page_free_reserved) &&
	1647	!(current_thread()->options & TH_OPT_VMPRIV)) {
	1648	mutex_unlock(&vm_page_queue_free_lock);
	1649	mem = VM_PAGE_NULL;
	1650	}
	1651	else {
	1652	vm_page_t head;
	1653	vm_page_t tail;
	1654	unsigned int pages_to_steal;
	1655	unsigned int color;
	1656
	1657	while ( vm_page_free_count == 0 ) {
	1658
	1659	mutex_unlock(&vm_page_queue_free_lock);
	1660	/*
	1661	* must be a privileged thread to be
	1662	* in this state since a non-privileged
	1663	* thread would have bailed if we were
	1664	* under the vm_page_free_reserved mark
	1665	*/
	1666	VM_PAGE_WAIT();
	1667	mutex_lock(&vm_page_queue_free_lock);
	1668	}
	1669
	1670	disable_preemption();
	1671
	1672	if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
	1673	mutex_unlock(&vm_page_queue_free_lock);
	1674
	1675	/*
	1676	* we got preempted and moved to another processor
	1677	* or we got preempted and someone else ran and filled the cache
	1678	*/
	1679	goto return_page_from_cpu_list;
	1680	}
	1681	if (vm_page_free_count <= vm_page_free_reserved)
	1682	pages_to_steal = 1;
	1683	else {
	1684	pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
	1685
	1686	if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
	1687	pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
	1688	}
	1689	color = PROCESSOR_DATA(current_processor(), start_color);
	1690	head = tail = NULL;
	1691
	1692	while (pages_to_steal--) {
	1693	if (--vm_page_free_count < vm_page_free_count_minimum)
	1694	vm_page_free_count_minimum = vm_page_free_count;
	1695
	1696	while (queue_empty(&vm_page_queue_free[color]))
	1697	color = (color + 1) & vm_color_mask;
	1698
	1699	queue_remove_first(&vm_page_queue_free[color],
	1700	mem,
	1701	vm_page_t,
	1702	pageq);
	1703	mem->pageq.next = NULL;
	1704	mem->pageq.prev = NULL;
	1705
	1706	color = (color + 1) & vm_color_mask;
	1707
	1708	if (head == NULL)
	1709	head = mem;
	1710	else
	1711	tail->pageq.next = (queue_t)mem;
	1712	tail = mem;
	1713
	1714	mem->pageq.prev = NULL;
	1715	assert(mem->listq.next == NULL && mem->listq.prev == NULL);
	1716	assert(mem->tabled == FALSE);
	1717	assert(mem->object == VM_OBJECT_NULL);
	1718	assert(!mem->laundry);
	1719	assert(mem->free);
	1720	mem->free = FALSE;
	1721
	1722	assert(pmap_verify_free(mem->phys_page));
	1723	assert(mem->busy);
	1724	assert(!mem->free);
	1725	assert(!mem->encrypted);
	1726	assert(!mem->pmapped);
	1727	assert(!mem->wpmapped);
	1728	}
	1729	PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
	1730	PROCESSOR_DATA(current_processor(), start_color) = color;
	1731
	1732	/*
	1733	* satisfy this request
	1734	*/
	1735	PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
	1736	mem = head;
	1737	mem->pageq.next = NULL;
	1738
	1739	mutex_unlock(&vm_page_queue_free_lock);
	1740
	1741	enable_preemption();
	1742	}
	1743	/*
	1744	* Decide if we should poke the pageout daemon.
	1745	* We do this if the free count is less than the low
	1746	* water mark, or if the free count is less than the high
	1747	* water mark (but above the low water mark) and the inactive
	1748	* count is less than its target.
	1749	*
	1750	* We don't have the counts locked ... if they change a little,
	1751	* it doesn't really matter.
	1752	*/
	1753	if ((vm_page_free_count < vm_page_free_min) \|\|
	1754	((vm_page_free_count < vm_page_free_target) &&
	1755	((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
	1756	thread_wakeup((event_t) &vm_page_free_wanted);
	1757
	1758	#if CONFIG_EMBEDDED
	1759	{
	1760	int percent_avail;
	1761
	1762	/*
	1763	* Decide if we need to poke the memorystatus notification thread.
	1764	*/
	1765	percent_avail =
	1766	(vm_page_active_count + vm_page_inactive_count +
	1767	vm_page_speculative_count + vm_page_free_count +
	1768	(IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
	1769	atop_64(max_mem);
	1770	if (percent_avail <= (kern_memorystatus_level - 5)) {
	1771	kern_memorystatus_level = percent_avail;
	1772	thread_wakeup((event_t)&kern_memorystatus_wakeup);
	1773	}
	1774	}
	1775	#endif
	1776
	1777	// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
	1778
	1779	return mem;
	1780	}
	1781
	1782	/*
	1783	* vm_page_release:
	1784	*
	1785	* Return a page to the free list.
	1786	*/
	1787
	1788	void
	1789	vm_page_release(
	1790	register vm_page_t mem)
	1791	{
	1792	unsigned int color;
	1793	#if 0
	1794	unsigned int pindex;
	1795	phys_entry *physent;
	1796
	1797	physent = mapping_phys_lookup(mem->phys_page, &pindex); /* (BRINGUP) */
	1798	if(physent->ppLink & ppN) { /* (BRINGUP) */
	1799	panic("vm_page_release: already released - %08X %08X\n", mem, mem->phys_page);
	1800	}
	1801	physent->ppLink = physent->ppLink \| ppN; /* (BRINGUP) */
	1802	#endif
	1803	assert(!mem->private && !mem->fictitious);
	1804
	1805	// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
	1806
	1807	mutex_lock(&vm_page_queue_free_lock);
	1808	#if DEBUG
	1809	if (mem->free)
	1810	panic("vm_page_release");
	1811	#endif
	1812	mem->free = TRUE;
	1813
	1814	assert(mem->busy);
	1815	assert(!mem->laundry);
	1816	assert(mem->object == VM_OBJECT_NULL);
	1817	assert(mem->pageq.next == NULL &&
	1818	mem->pageq.prev == NULL);
	1819	assert(mem->listq.next == NULL &&
	1820	mem->listq.prev == NULL);
	1821
	1822	if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) {
	1823	/*
	1824	* this exists to support hardware controllers
	1825	* incapable of generating DMAs with more than 32 bits
	1826	* of address on platforms with physical memory > 4G...
	1827	*/
	1828	queue_enter_first(&vm_lopage_queue_free,
	1829	mem,
	1830	vm_page_t,
	1831	pageq);
	1832	vm_lopage_free_count++;
	1833	} else {
	1834	color = mem->phys_page & vm_color_mask;
	1835	queue_enter_first(&vm_page_queue_free[color],
	1836	mem,
	1837	vm_page_t,
	1838	pageq);
	1839	vm_page_free_count++;
	1840	/*
	1841	* Check if we should wake up someone waiting for page.
	1842	* But don't bother waking them unless they can allocate.
	1843	*
	1844	* We wakeup only one thread, to prevent starvation.
	1845	* Because the scheduling system handles wait queues FIFO,
	1846	* if we wakeup all waiting threads, one greedy thread
	1847	* can starve multiple niceguy threads. When the threads
	1848	* all wakeup, the greedy threads runs first, grabs the page,
	1849	* and waits for another page. It will be the first to run
	1850	* when the next page is freed.
	1851	*
	1852	* However, there is a slight danger here.
	1853	* The thread we wake might not use the free page.
	1854	* Then the other threads could wait indefinitely
	1855	* while the page goes unused. To forestall this,
	1856	* the pageout daemon will keep making free pages
	1857	* as long as vm_page_free_wanted is non-zero.
	1858	*/
	1859
	1860	if ((vm_page_free_wanted_privileged > 0) && vm_page_free_count) {
	1861	vm_page_free_wanted_privileged--;
	1862	thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
	1863	} else if ((vm_page_free_wanted > 0) &&
	1864	(vm_page_free_count >= vm_page_free_reserved)) {
	1865	vm_page_free_wanted--;
	1866	thread_wakeup_one((event_t) &vm_page_free_count);
	1867	}
	1868	}
	1869	mutex_unlock(&vm_page_queue_free_lock);
	1870
	1871	#if CONFIG_EMBEDDED
	1872	{
	1873	int percent_avail;
	1874
	1875	/*
	1876	* Decide if we need to poke the memorystatus notification thread.
	1877	* Locking is not a big issue, as only a single thread delivers these.
	1878	*/
	1879	percent_avail =
	1880	(vm_page_active_count + vm_page_inactive_count +
	1881	vm_page_speculative_count + vm_page_free_count +
	1882	(IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
	1883	atop_64(max_mem);
	1884	if (percent_avail >= (kern_memorystatus_level + 5)) {
	1885	kern_memorystatus_level = percent_avail;
	1886	thread_wakeup((event_t)&kern_memorystatus_wakeup);
	1887	}
	1888	}
	1889	#endif
	1890	}
	1891
	1892	/*
	1893	* vm_page_wait:
	1894	*
	1895	* Wait for a page to become available.
	1896	* If there are plenty of free pages, then we don't sleep.
	1897	*
	1898	* Returns:
	1899	* TRUE: There may be another page, try again
	1900	* FALSE: We were interrupted out of our wait, don't try again
	1901	*/
	1902
	1903	boolean_t
	1904	vm_page_wait(
	1905	int interruptible )
	1906	{
	1907	/*
	1908	* We can't use vm_page_free_reserved to make this
	1909	* determination. Consider: some thread might
	1910	* need to allocate two pages. The first allocation
	1911	* succeeds, the second fails. After the first page is freed,
	1912	* a call to vm_page_wait must really block.
	1913	*/
	1914	kern_return_t wait_result;
	1915	int need_wakeup = 0;
	1916	int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
	1917
	1918	mutex_lock(&vm_page_queue_free_lock);
	1919
	1920	if (is_privileged && vm_page_free_count) {
	1921	mutex_unlock(&vm_page_queue_free_lock);
	1922	return TRUE;
	1923	}
	1924	if (vm_page_free_count < vm_page_free_target) {
	1925
	1926	if (is_privileged) {
	1927	if (vm_page_free_wanted_privileged++ == 0)
	1928	need_wakeup = 1;
	1929	wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
	1930	} else {
	1931	if (vm_page_free_wanted++ == 0)
	1932	need_wakeup = 1;
	1933	wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
	1934	}
	1935	mutex_unlock(&vm_page_queue_free_lock);
	1936	counter(c_vm_page_wait_block++);
	1937
	1938	if (need_wakeup)
	1939	thread_wakeup((event_t)&vm_page_free_wanted);
	1940
	1941	if (wait_result == THREAD_WAITING)
	1942	wait_result = thread_block(THREAD_CONTINUE_NULL);
	1943
	1944	return(wait_result == THREAD_AWAKENED);
	1945	} else {
	1946	mutex_unlock(&vm_page_queue_free_lock);
	1947	return TRUE;
	1948	}
	1949	}
	1950
	1951	/*
	1952	* vm_page_alloc:
	1953	*
	1954	* Allocate and return a memory cell associated
	1955	* with this VM object/offset pair.
	1956	*
	1957	* Object must be locked.
	1958	*/
	1959
	1960	vm_page_t
	1961	vm_page_alloc(
	1962	vm_object_t object,
	1963	vm_object_offset_t offset)
	1964	{
	1965	register vm_page_t mem;
	1966
	1967	vm_object_lock_assert_exclusive(object);
	1968	mem = vm_page_grab();
	1969	if (mem == VM_PAGE_NULL)
	1970	return VM_PAGE_NULL;
	1971
	1972	vm_page_insert(mem, object, offset);
	1973
	1974	return(mem);
	1975	}
	1976
	1977	vm_page_t
	1978	vm_page_alloclo(
	1979	vm_object_t object,
	1980	vm_object_offset_t offset)
	1981	{
	1982	register vm_page_t mem;
	1983
	1984	vm_object_lock_assert_exclusive(object);
	1985	mem = vm_page_grablo();
	1986	if (mem == VM_PAGE_NULL)
	1987	return VM_PAGE_NULL;
	1988
	1989	vm_page_insert(mem, object, offset);
	1990
	1991	return(mem);
	1992	}
	1993
	1994
	1995	/*
	1996	* vm_page_alloc_guard:
	1997	*
	1998	* Allocate a ficticious page which will be used
	1999	* as a guard page. The page will be inserted into
	2000	* the object and returned to the caller.
	2001	*/
	2002
	2003	vm_page_t
	2004	vm_page_alloc_guard(
	2005	vm_object_t object,
	2006	vm_object_offset_t offset)
	2007	{
	2008	register vm_page_t mem;
	2009
	2010	vm_object_lock_assert_exclusive(object);
	2011	mem = vm_page_grab_guard();
	2012	if (mem == VM_PAGE_NULL)
	2013	return VM_PAGE_NULL;
	2014
	2015	vm_page_insert(mem, object, offset);
	2016
	2017	return(mem);
	2018	}
	2019
	2020
	2021	counter(unsigned int c_laundry_pages_freed = 0;)
	2022
	2023	boolean_t vm_page_free_verify = TRUE;
	2024	/*
	2025	* vm_page_free:
	2026	*
	2027	* Returns the given page to the free list,
	2028	* disassociating it with any VM object.
	2029	*
	2030	* Object and page queues must be locked prior to entry.
	2031	*/
	2032	void
	2033	vm_page_free_prepare(
	2034	register vm_page_t mem)
	2035	{
	2036	VM_PAGE_CHECK(mem);
	2037	assert(!mem->free);
	2038	assert(!mem->cleaning);
	2039	assert(!mem->pageout);
	2040
	2041	#if DEBUG
	2042	if (vm_page_free_verify && !mem->fictitious && !mem->private) {
	2043	assert(pmap_verify_free(mem->phys_page));
	2044	}
	2045	if (mem->object)
	2046	vm_object_lock_assert_exclusive(mem->object);
	2047	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	2048
	2049	if (mem->free)
	2050	panic("vm_page_free: freeing page on free list\n");
	2051	#endif
	2052
	2053	if (mem->laundry) {
	2054	/*
	2055	* We may have to free a page while it's being laundered
	2056	* if we lost its pager (due to a forced unmount, for example).
	2057	* We need to call vm_pageout_throttle_up() before removing
	2058	* the page from its VM object, so that we can find out on
	2059	* which pageout queue the page is.
	2060	*/
	2061	vm_pageout_throttle_up(mem);
	2062	counter(++c_laundry_pages_freed);
	2063	}
	2064
	2065	if (mem->tabled)
	2066	vm_page_remove(mem); /* clears tabled, object, offset */
	2067
	2068	VM_PAGE_QUEUES_REMOVE(mem); /* clears active/inactive/throttled/speculative */
	2069
	2070	if (mem->wire_count) {
	2071	if (!mem->private && !mem->fictitious)
	2072	vm_page_wire_count--;
	2073	mem->wire_count = 0;
	2074	assert(!mem->gobbled);
	2075	} else if (mem->gobbled) {
	2076	if (!mem->private && !mem->fictitious)
	2077	vm_page_wire_count--;
	2078	vm_page_gobble_count--;
	2079	}
	2080	mem->gobbled = FALSE;
	2081
	2082	PAGE_WAKEUP(mem); /* clears wanted */
	2083
	2084	/* Some of these may be unnecessary */
	2085	mem->busy = TRUE;
	2086	mem->absent = FALSE;
	2087	mem->error = FALSE;
	2088	mem->dirty = FALSE;
	2089	mem->precious = FALSE;
	2090	mem->reference = FALSE;
	2091	mem->encrypted = FALSE;
	2092	mem->encrypted_cleaning = FALSE;
	2093	mem->deactivated = FALSE;
	2094	mem->pmapped = FALSE;
	2095	mem->wpmapped = FALSE;
	2096
	2097	if (mem->private) {
	2098	mem->private = FALSE;
	2099	mem->fictitious = TRUE;
	2100	mem->phys_page = vm_page_fictitious_addr;
	2101	}
	2102	if (!mem->fictitious) {
	2103	if (mem->zero_fill == TRUE) {
	2104	mem->zero_fill = FALSE;
	2105	OSAddAtomic(-1, (SInt32 *)&vm_zf_count);
	2106	}
	2107	vm_page_init(mem, mem->phys_page);
	2108	}
	2109	}
	2110
	2111	void
	2112	vm_page_free(
	2113	vm_page_t mem)
	2114	{
	2115	vm_page_free_prepare(mem);
	2116	if (mem->fictitious) {
	2117	vm_page_release_fictitious(mem);
	2118	} else {
	2119	vm_page_release(mem);
	2120	}
	2121	}
	2122
	2123	/*
	2124	* Free a list of pages. The list can be up to several hundred pages,
	2125	* as blocked up by vm_pageout_scan().
	2126	* The big win is not having to take the page q and free list locks once
	2127	* per page. We sort the incoming pages into n lists, one for
	2128	* each color.
	2129	*
	2130	* The page queues must be locked, and are kept locked.
	2131	*/
	2132	void
	2133	vm_page_free_list(
	2134	vm_page_t mem)
	2135	{
	2136	vm_page_t nxt;
	2137	int pg_count = 0;
	2138	int color;
	2139	int inuse_list_head = -1;
	2140
	2141	queue_head_t free_list[MAX_COLORS];
	2142	int inuse[MAX_COLORS];
	2143
	2144	for (color = 0; color < (signed) vm_colors; color++) {
	2145	queue_init(&free_list[color]);
	2146	}
	2147
	2148	#if DEBUG
	2149	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	2150	#endif
	2151	while (mem) {
	2152	#if DEBUG
	2153	if (mem->tabled \|\| mem->object)
	2154	panic("vm_page_free_list: freeing tabled page\n");
	2155	if (mem->inactive \|\| mem->active \|\| mem->throttled \|\| mem->free)
	2156	panic("vm_page_free_list: freeing page on list\n");
	2157	if (vm_page_free_verify && !mem->fictitious && !mem->private) {
	2158	assert(pmap_verify_free(mem->phys_page));
	2159	}
	2160	#endif
	2161	assert(mem->pageq.prev == NULL);
	2162	assert(mem->busy);
	2163	assert(!mem->free);
	2164	nxt = (vm_page_t)(mem->pageq.next);
	2165
	2166	if (!mem->fictitious) {
	2167	mem->free = TRUE;
	2168
	2169	color = mem->phys_page & vm_color_mask;
	2170	if (queue_empty(&free_list[color])) {
	2171	inuse[color] = inuse_list_head;
	2172	inuse_list_head = color;
	2173	}
	2174	queue_enter_first(&free_list[color],
	2175	mem,
	2176	vm_page_t,
	2177	pageq);
	2178	pg_count++;
	2179	} else {
	2180	assert(mem->phys_page == vm_page_fictitious_addr \|\|
	2181	mem->phys_page == vm_page_guard_addr);
	2182	vm_page_release_fictitious(mem);
	2183	}
	2184	mem = nxt;
	2185	}
	2186	if (pg_count) {
	2187	unsigned int avail_free_count;
	2188
	2189	mutex_lock(&vm_page_queue_free_lock);
	2190
	2191	color = inuse_list_head;
	2192
	2193	while( color != -1 ) {
	2194	vm_page_t first, last;
	2195	vm_page_t first_free;
	2196
	2197	first = (vm_page_t) queue_first(&free_list[color]);
	2198	last = (vm_page_t) queue_last(&free_list[color]);
	2199	first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
	2200
	2201	if (queue_empty(&vm_page_queue_free[color])) {
	2202	queue_last(&vm_page_queue_free[color]) =
	2203	(queue_entry_t) last;
	2204	} else {
	2205	queue_prev(&first_free->pageq) =
	2206	(queue_entry_t) last;
	2207	}
	2208	queue_first(&vm_page_queue_free[color]) =
	2209	(queue_entry_t) first;
	2210	queue_prev(&first->pageq) =
	2211	(queue_entry_t) &vm_page_queue_free[color];
	2212	queue_next(&last->pageq) =
	2213	(queue_entry_t) first_free;
	2214	color = inuse[color];
	2215	}
	2216
	2217	vm_page_free_count += pg_count;
	2218	avail_free_count = vm_page_free_count;
	2219
	2220	while ((vm_page_free_wanted_privileged > 0) && avail_free_count) {
	2221	vm_page_free_wanted_privileged--;
	2222	avail_free_count--;
	2223
	2224	thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
	2225	}
	2226
	2227	if ((vm_page_free_wanted > 0) &&
	2228	(avail_free_count >= vm_page_free_reserved)) {
	2229	unsigned int available_pages;
	2230
	2231	if (avail_free_count >= vm_page_free_reserved) {
	2232	available_pages = (avail_free_count - vm_page_free_reserved);
	2233	} else {
	2234	available_pages = 0;
	2235	}
	2236
	2237	if (available_pages >= vm_page_free_wanted) {
	2238	vm_page_free_wanted = 0;
	2239	thread_wakeup((event_t) &vm_page_free_count);
	2240	} else {
	2241	while (available_pages--) {
	2242	vm_page_free_wanted--;
	2243	thread_wakeup_one((event_t) &vm_page_free_count);
	2244	}
	2245	}
	2246	}
	2247	mutex_unlock(&vm_page_queue_free_lock);
	2248
	2249	#if CONFIG_EMBEDDED
	2250	{
	2251	int percent_avail;
	2252
	2253	/*
	2254	* Decide if we need to poke the memorystatus notification thread.
	2255	*/
	2256	percent_avail =
	2257	(vm_page_active_count + vm_page_inactive_count +
	2258	vm_page_speculative_count + vm_page_free_count +
	2259	(IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
	2260	atop_64(max_mem);
	2261	if (percent_avail >= (kern_memorystatus_level + 5)) {
	2262	kern_memorystatus_level = percent_avail;
	2263	thread_wakeup((event_t)&kern_memorystatus_wakeup);
	2264	}
	2265	}
	2266	#endif
	2267	}
	2268	}
	2269
	2270
	2271	/*
	2272	* vm_page_wire:
	2273	*
	2274	* Mark this page as wired down by yet
	2275	* another map, removing it from paging queues
	2276	* as necessary.
	2277	*
	2278	* The page's object and the page queues must be locked.
	2279	*/
	2280	void
	2281	vm_page_wire(
	2282	register vm_page_t mem)
	2283	{
	2284
	2285	// dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
	2286
	2287	VM_PAGE_CHECK(mem);
	2288	#if DEBUG
	2289	if (mem->object)
	2290	vm_object_lock_assert_exclusive(mem->object);
	2291	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	2292	#endif
	2293	if (mem->wire_count == 0) {
	2294	VM_PAGE_QUEUES_REMOVE(mem);
	2295	if (!mem->private && !mem->fictitious && !mem->gobbled)
	2296	vm_page_wire_count++;
	2297	if (mem->gobbled)
	2298	vm_page_gobble_count--;
	2299	mem->gobbled = FALSE;
	2300	if (mem->zero_fill == TRUE) {
	2301	mem->zero_fill = FALSE;
	2302	OSAddAtomic(-1, (SInt32 *)&vm_zf_count);
	2303	}
	2304	/*
	2305	* ENCRYPTED SWAP:
	2306	* The page could be encrypted, but
	2307	* We don't have to decrypt it here
	2308	* because we don't guarantee that the
	2309	* data is actually valid at this point.
	2310	* The page will get decrypted in
	2311	* vm_fault_wire() if needed.
	2312	*/
	2313	}
	2314	assert(!mem->gobbled);
	2315	mem->wire_count++;
	2316	}
	2317
	2318	/*
	2319	* vm_page_gobble:
	2320	*
	2321	* Mark this page as consumed by the vm/ipc/xmm subsystems.
	2322	*
	2323	* Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
	2324	*/
	2325	void
	2326	vm_page_gobble(
	2327	register vm_page_t mem)
	2328	{
	2329	vm_page_lockspin_queues();
	2330	VM_PAGE_CHECK(mem);
	2331
	2332	assert(!mem->gobbled);
	2333	assert(mem->wire_count == 0);
	2334
	2335	if (!mem->gobbled && mem->wire_count == 0) {
	2336	if (!mem->private && !mem->fictitious)
	2337	vm_page_wire_count++;
	2338	}
	2339	vm_page_gobble_count++;
	2340	mem->gobbled = TRUE;
	2341	vm_page_unlock_queues();
	2342	}
	2343
	2344	/*
	2345	* vm_page_unwire:
	2346	*
	2347	* Release one wiring of this page, potentially
	2348	* enabling it to be paged again.
	2349	*
	2350	* The page's object and the page queues must be locked.
	2351	*/
	2352	void
	2353	vm_page_unwire(
	2354	register vm_page_t mem)
	2355	{
	2356
	2357	// dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
	2358
	2359	VM_PAGE_CHECK(mem);
	2360	assert(mem->wire_count > 0);
	2361	#if DEBUG
	2362	if (mem->object)
	2363	vm_object_lock_assert_exclusive(mem->object);
	2364	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	2365	#endif
	2366	if (--mem->wire_count == 0) {
	2367	assert(!mem->private && !mem->fictitious);
	2368	vm_page_wire_count--;
	2369	assert(!mem->laundry);
	2370	assert(mem->object != kernel_object);
	2371	assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
	2372	if (!IP_VALID(memory_manager_default) &&
	2373	mem->dirty && mem->object->internal &&
	2374	(mem->object->purgable == VM_PURGABLE_DENY \|\|
	2375	mem->object->purgable == VM_PURGABLE_NONVOLATILE \|\|
	2376	mem->object->purgable == VM_PURGABLE_VOLATILE)) {
	2377	queue_enter(&vm_page_queue_throttled, mem, vm_page_t, pageq);
	2378	vm_page_throttled_count++;
	2379	mem->throttled = TRUE;
	2380	} else {
	2381	queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
	2382	vm_page_active_count++;
	2383	mem->active = TRUE;
	2384	}
	2385	mem->reference = TRUE;
	2386	}
	2387	}
	2388
	2389
	2390	/*
	2391	* vm_page_deactivate:
	2392	*
	2393	* Returns the given page to the inactive list,
	2394	* indicating that no physical maps have access
	2395	* to this page. [Used by the physical mapping system.]
	2396	*
	2397	* The page queues must be locked.
	2398	*/
	2399	void
	2400	vm_page_deactivate(
	2401	register vm_page_t m)
	2402	{
	2403	boolean_t rapid_age = FALSE;
	2404
	2405	VM_PAGE_CHECK(m);
	2406	assert(m->object != kernel_object);
	2407	assert(m->phys_page != vm_page_guard_addr);
	2408
	2409	// dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
	2410	#if DEBUG
	2411	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	2412	#endif
	2413	/*
	2414	* This page is no longer very interesting. If it was
	2415	* interesting (active or inactive/referenced), then we
	2416	* clear the reference bit and (re)enter it in the
	2417	* inactive queue. Note wired pages should not have
	2418	* their reference bit cleared.
	2419	*/
	2420	if (m->gobbled) { /* can this happen? */
	2421	assert(m->wire_count == 0);
	2422
	2423	if (!m->private && !m->fictitious)
	2424	vm_page_wire_count--;
	2425	vm_page_gobble_count--;
	2426	m->gobbled = FALSE;
	2427	}
	2428	if (m->private \|\| (m->wire_count != 0))
	2429	return;
	2430
	2431	if (m->active && m->deactivated == TRUE) {
	2432	if (!pmap_is_referenced(m->phys_page))
	2433	rapid_age = TRUE;
	2434	}
	2435	if (rapid_age == FALSE && !m->fictitious && !m->absent)
	2436	pmap_clear_reference(m->phys_page);
	2437
	2438	m->reference = FALSE;
	2439	m->deactivated = FALSE;
	2440	m->no_cache = FALSE;
	2441
	2442	if (!m->inactive) {
	2443	VM_PAGE_QUEUES_REMOVE(m);
	2444
	2445	assert(!m->laundry);
	2446	assert(m->pageq.next == NULL && m->pageq.prev == NULL);
	2447
	2448	if (!IP_VALID(memory_manager_default) &&
	2449	m->dirty && m->object->internal &&
	2450	(m->object->purgable == VM_PURGABLE_DENY \|\|
	2451	m->object->purgable == VM_PURGABLE_NONVOLATILE \|\|
	2452	m->object->purgable == VM_PURGABLE_VOLATILE )) {
	2453	queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
	2454	m->throttled = TRUE;
	2455	vm_page_throttled_count++;
	2456	} else {
	2457	if (rapid_age == TRUE \|\|
	2458	(!m->fictitious && m->object->named && m->object->ref_count == 1)) {
	2459	vm_page_speculate(m, FALSE);
	2460	vm_page_speculative_recreated++;
	2461	return;
	2462	} else {
	2463	if (m->zero_fill) {
	2464	queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq);
	2465	vm_zf_queue_count++;
	2466	} else {
	2467	queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
	2468	}
	2469	}
	2470	m->inactive = TRUE;
	2471	if (!m->fictitious) {
	2472	vm_page_inactive_count++;
	2473	token_new_pagecount++;
	2474	}
	2475	}
	2476	}
	2477	}
	2478
	2479	/*
	2480	* vm_page_activate:
	2481	*
	2482	* Put the specified page on the active list (if appropriate).
	2483	*
	2484	* The page queues must be locked.
	2485	*/
	2486
	2487	void
	2488	vm_page_activate(
	2489	register vm_page_t m)
	2490	{
	2491	VM_PAGE_CHECK(m);
	2492	#ifdef FIXME_4778297
	2493	assert(m->object != kernel_object);
	2494	#endif
	2495	assert(m->phys_page != vm_page_guard_addr);
	2496	#if DEBUG
	2497	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	2498	#endif
	2499	if (m->gobbled) {
	2500	assert(m->wire_count == 0);
	2501	if (!m->private && !m->fictitious)
	2502	vm_page_wire_count--;
	2503	vm_page_gobble_count--;
	2504	m->gobbled = FALSE;
	2505	}
	2506	if (m->private)
	2507	return;
	2508
	2509	#if DEBUG
	2510	if (m->active)
	2511	panic("vm_page_activate: already active");
	2512	#endif
	2513
	2514	if (m->speculative) {
	2515	DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
	2516	DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
	2517	}
	2518
	2519	VM_PAGE_QUEUES_REMOVE(m);
	2520
	2521	if (m->wire_count == 0) {
	2522	assert(!m->laundry);
	2523	assert(m->pageq.next == NULL && m->pageq.prev == NULL);
	2524	if (!IP_VALID(memory_manager_default) &&
	2525	!m->fictitious && m->dirty && m->object->internal &&
	2526	(m->object->purgable == VM_PURGABLE_DENY \|\|
	2527	m->object->purgable == VM_PURGABLE_NONVOLATILE \|\|
	2528	m->object->purgable == VM_PURGABLE_VOLATILE )) {
	2529	queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
	2530	m->throttled = TRUE;
	2531	vm_page_throttled_count++;
	2532	} else {
	2533	queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
	2534	m->active = TRUE;
	2535	if (!m->fictitious)
	2536	vm_page_active_count++;
	2537	}
	2538	m->reference = TRUE;
	2539	m->no_cache = FALSE;
	2540	}
	2541	}
	2542
	2543
	2544	/*
	2545	* vm_page_speculate:
	2546	*
	2547	* Put the specified page on the speculative list (if appropriate).
	2548	*
	2549	* The page queues must be locked.
	2550	*/
	2551	void
	2552	vm_page_speculate(
	2553	vm_page_t m,
	2554	boolean_t new)
	2555	{
	2556	struct vm_speculative_age_q *aq;
	2557
	2558	VM_PAGE_CHECK(m);
	2559	assert(m->object != kernel_object);
	2560	assert(!m->speculative && !m->active && !m->inactive && !m->throttled);
	2561	assert(m->phys_page != vm_page_guard_addr);
	2562	assert(m->pageq.next == NULL && m->pageq.prev == NULL);
	2563	#if DEBUG
	2564	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	2565	#endif
	2566	if (m->wire_count == 0) {
	2567	mach_timespec_t ts;
	2568
	2569	clock_get_system_nanotime(&ts.tv_sec, (unsigned *)&ts.tv_nsec);
	2570
	2571	if (vm_page_speculative_count == 0) {
	2572
	2573	speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
	2574	speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
	2575
	2576	aq = &vm_page_queue_speculative[speculative_age_index];
	2577
	2578	/*
	2579	* set the timer to begin a new group
	2580	*/
	2581	aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
	2582	aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
	2583
	2584	ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
	2585	} else {
	2586	aq = &vm_page_queue_speculative[speculative_age_index];
	2587
	2588	if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
	2589
	2590	speculative_age_index++;
	2591
	2592	if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
	2593	speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
	2594	if (speculative_age_index == speculative_steal_index) {
	2595	speculative_steal_index = speculative_age_index + 1;
	2596
	2597	if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
	2598	speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
	2599	}
	2600	aq = &vm_page_queue_speculative[speculative_age_index];
	2601
	2602	if (!queue_empty(&aq->age_q))
	2603	vm_page_speculate_ageit(aq);
	2604
	2605	aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
	2606	aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
	2607
	2608	ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
	2609	}
	2610	}
	2611	enqueue_tail(&aq->age_q, &m->pageq);
	2612	m->speculative = TRUE;
	2613	vm_page_speculative_count++;
	2614
	2615	if (new == TRUE) {
	2616	m->object->pages_created++;
	2617	vm_page_speculative_created++;
	2618	}
	2619	}
	2620	}
	2621
	2622
	2623	/*
	2624	* move pages from the specified aging bin to
	2625	* the speculative bin that pageout_scan claims from
	2626	*
	2627	* The page queues must be locked.
	2628	*/
	2629	void
	2630	vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
	2631	{
	2632	struct vm_speculative_age_q *sq;
	2633	vm_page_t t;
	2634
	2635	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
	2636
	2637	if (queue_empty(&sq->age_q)) {
	2638	sq->age_q.next = aq->age_q.next;
	2639	sq->age_q.prev = aq->age_q.prev;
	2640
	2641	t = (vm_page_t)sq->age_q.next;
	2642	t->pageq.prev = &sq->age_q;
	2643
	2644	t = (vm_page_t)sq->age_q.prev;
	2645	t->pageq.next = &sq->age_q;
	2646	} else {
	2647	t = (vm_page_t)sq->age_q.prev;
	2648	t->pageq.next = aq->age_q.next;
	2649
	2650	t = (vm_page_t)aq->age_q.next;
	2651	t->pageq.prev = sq->age_q.prev;
	2652
	2653	t = (vm_page_t)aq->age_q.prev;
	2654	t->pageq.next = &sq->age_q;
	2655
	2656	sq->age_q.prev = aq->age_q.prev;
	2657	}
	2658	queue_init(&aq->age_q);
	2659	}
	2660
	2661
	2662	void
	2663	vm_page_lru(
	2664	vm_page_t m)
	2665	{
	2666	VM_PAGE_CHECK(m);
	2667	assert(m->object != kernel_object);
	2668	assert(m->phys_page != vm_page_guard_addr);
	2669
	2670	#if DEBUG
	2671	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
	2672	#endif
	2673	if (m->active \|\| m->reference)
	2674	return;
	2675
	2676	if (m->private \|\| (m->wire_count != 0))
	2677	return;
	2678
	2679	m->no_cache = FALSE;
	2680
	2681	VM_PAGE_QUEUES_REMOVE(m);
	2682
	2683	assert(!m->laundry);
	2684	assert(m->pageq.next == NULL && m->pageq.prev == NULL);
	2685
	2686	queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
	2687	m->inactive = TRUE;
	2688
	2689	vm_page_inactive_count++;
	2690	token_new_pagecount++;
	2691	}
	2692
	2693
	2694	/*
	2695	* vm_page_part_zero_fill:
	2696	*
	2697	* Zero-fill a part of the page.
	2698	*/
	2699	void
	2700	vm_page_part_zero_fill(
	2701	vm_page_t m,
	2702	vm_offset_t m_pa,
	2703	vm_size_t len)
	2704	{
	2705	vm_page_t tmp;
	2706
	2707	VM_PAGE_CHECK(m);
	2708	#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
	2709	pmap_zero_part_page(m->phys_page, m_pa, len);
	2710	#else
	2711	while (1) {
	2712	tmp = vm_page_grab();
	2713	if (tmp == VM_PAGE_NULL) {
	2714	vm_page_wait(THREAD_UNINT);
	2715	continue;
	2716	}
	2717	break;
	2718	}
	2719	vm_page_zero_fill(tmp);
	2720	if(m_pa != 0) {
	2721	vm_page_part_copy(m, 0, tmp, 0, m_pa);
	2722	}
	2723	if((m_pa + len) < PAGE_SIZE) {
	2724	vm_page_part_copy(m, m_pa + len, tmp,
	2725	m_pa + len, PAGE_SIZE - (m_pa + len));
	2726	}
	2727	vm_page_copy(tmp,m);
	2728	vm_page_lock_queues();
	2729	vm_page_free(tmp);
	2730	vm_page_unlock_queues();
	2731	#endif
	2732
	2733	}
	2734
	2735	/*
	2736	* vm_page_zero_fill:
	2737	*
	2738	* Zero-fill the specified page.
	2739	*/
	2740	void
	2741	vm_page_zero_fill(
	2742	vm_page_t m)
	2743	{
	2744	XPR(XPR_VM_PAGE,
	2745	"vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
	2746	(integer_t)m->object, (integer_t)m->offset, (integer_t)m, 0,0);
	2747
	2748	VM_PAGE_CHECK(m);
	2749
	2750	// dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
	2751	pmap_zero_page(m->phys_page);
	2752	}
	2753
	2754	/*
	2755	* vm_page_part_copy:
	2756	*
	2757	* copy part of one page to another
	2758	*/
	2759
	2760	void
	2761	vm_page_part_copy(
	2762	vm_page_t src_m,
	2763	vm_offset_t src_pa,
	2764	vm_page_t dst_m,
	2765	vm_offset_t dst_pa,
	2766	vm_size_t len)
	2767	{
	2768	VM_PAGE_CHECK(src_m);
	2769	VM_PAGE_CHECK(dst_m);
	2770
	2771	pmap_copy_part_page(src_m->phys_page, src_pa,
	2772	dst_m->phys_page, dst_pa, len);
	2773	}
	2774
	2775	/*
	2776	* vm_page_copy:
	2777	*
	2778	* Copy one page to another
	2779	*
	2780	* ENCRYPTED SWAP:
	2781	* The source page should not be encrypted. The caller should
	2782	* make sure the page is decrypted first, if necessary.
	2783	*/
	2784
	2785	int vm_page_copy_cs_validations = 0;
	2786	int vm_page_copy_cs_tainted = 0;
	2787
	2788	void
	2789	vm_page_copy(
	2790	vm_page_t src_m,
	2791	vm_page_t dest_m)
	2792	{
	2793	XPR(XPR_VM_PAGE,
	2794	"vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
	2795	(integer_t)src_m->object, src_m->offset,
	2796	(integer_t)dest_m->object, dest_m->offset,
	2797	0);
	2798
	2799	VM_PAGE_CHECK(src_m);
	2800	VM_PAGE_CHECK(dest_m);
	2801
	2802	/*
	2803	* ENCRYPTED SWAP:
	2804	* The source page should not be encrypted at this point.
	2805	* The destination page will therefore not contain encrypted
	2806	* data after the copy.
	2807	*/
	2808	if (src_m->encrypted) {
	2809	panic("vm_page_copy: source page %p is encrypted\n", src_m);
	2810	}
	2811	dest_m->encrypted = FALSE;
	2812
	2813	if (src_m->object != VM_OBJECT_NULL &&
	2814	src_m->object->code_signed) {
	2815	/*
	2816	* We're copying a page from a code-signed object.
	2817	* Whoever ends up mapping the copy page might care about
	2818	* the original page's integrity, so let's validate the
	2819	* source page now.
	2820	*/
	2821	vm_page_copy_cs_validations++;
	2822	vm_page_validate_cs(src_m);
	2823	}
	2824	/*
	2825	* Propagate the code-signing bits to the copy page.
	2826	*/
	2827	dest_m->cs_validated = src_m->cs_validated;
	2828	dest_m->cs_tainted = src_m->cs_tainted;
	2829	if (dest_m->cs_tainted) {
	2830	assert(dest_m->cs_validated);
	2831	vm_page_copy_cs_tainted++;
	2832	}
	2833
	2834	pmap_copy_page(src_m->phys_page, dest_m->phys_page);
	2835	}
	2836
	2837	#if MACH_ASSERT
	2838	/*
	2839	* Check that the list of pages is ordered by
	2840	* ascending physical address and has no holes.
	2841	*/
	2842	static int
	2843	vm_page_verify_contiguous(
	2844	vm_page_t pages,
	2845	unsigned int npages)
	2846	{
	2847	register vm_page_t m;
	2848	unsigned int page_count;
	2849	vm_offset_t prev_addr;
	2850
	2851	prev_addr = pages->phys_page;
	2852	page_count = 1;
	2853	for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
	2854	if (m->phys_page != prev_addr + 1) {
	2855	printf("m %p prev_addr 0x%x, current addr 0x%x\n",
	2856	m, prev_addr, m->phys_page);
	2857	printf("pages %p page_count %d\n", pages, page_count);
	2858	panic("vm_page_verify_contiguous: not contiguous!");
	2859	}
	2860	prev_addr = m->phys_page;
	2861	++page_count;
	2862	}
	2863	if (page_count != npages) {
	2864	printf("pages %p actual count 0x%x but requested 0x%x\n",
	2865	pages, page_count, npages);
	2866	panic("vm_page_verify_contiguous: count error");
	2867	}
	2868	return 1;
	2869	}
	2870	#endif /* MACH_ASSERT */
	2871
	2872
	2873	#if MACH_ASSERT
	2874	/*
	2875	* Check the free lists for proper length etc.
	2876	*/
	2877	static void
	2878	vm_page_verify_free_lists( void )
	2879	{
	2880	unsigned int color, npages;
	2881	vm_page_t m;
	2882	vm_page_t prev_m;
	2883
	2884	npages = 0;
	2885
	2886	mutex_lock(&vm_page_queue_free_lock);
	2887
	2888	for( color = 0; color < vm_colors; color++ ) {
	2889	prev_m = (vm_page_t) &vm_page_queue_free[color];
	2890	queue_iterate(&vm_page_queue_free[color],
	2891	m,
	2892	vm_page_t,
	2893	pageq) {
	2894	if ((vm_page_t) m->pageq.prev != prev_m)
	2895	panic("vm_page_verify_free_lists: corrupted prev ptr");
	2896	if ( ! m->free )
	2897	panic("vm_page_verify_free_lists: not free");
	2898	if ( ! m->busy )
	2899	panic("vm_page_verify_free_lists: not busy");
	2900	if ( (m->phys_page & vm_color_mask) != color)
	2901	panic("vm_page_verify_free_lists: wrong color");
	2902	++npages;
	2903	prev_m = m;
	2904	}
	2905	}
	2906	if (npages != vm_page_free_count)
	2907	panic("vm_page_verify_free_lists: npages %u free_count %d",
	2908	npages, vm_page_free_count);
	2909
	2910	mutex_unlock(&vm_page_queue_free_lock);
	2911	}
	2912	#endif /* MACH_ASSERT */
	2913
	2914
	2915
	2916	/*
	2917	* CONTIGUOUS PAGE ALLOCATION
	2918	* Additional levels of effort:
	2919	* + consider pages that are currently 'pmapped'
	2920	* this could be expensive since we'd have
	2921	* to ask the pmap layer about there state
	2922	* + consider dirty pages
	2923	* either clean them or
	2924	* copy them to other locations...
	2925	*
	2926	* Find a region large enough to contain at least n pages
	2927	* of contiguous physical memory.
	2928	*
	2929	* This is done by traversing the vm_page_t array in a linear fashion
	2930	* we assume that the vm_page_t array has the avaiable physical pages in an
	2931	* ordered, ascending list... this is currently true of all our implementations
	2932	* and must remain so... there can be 'holes' in the array... we also can
	2933	* no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
	2934	* which use to happen via 'vm_page_convert'... that function was no longer
	2935	* being called and was removed...
	2936	*
	2937	* The basic flow consists of stabilizing some of the interesting state of
	2938	* a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
	2939	* sweep at the beginning of the array looking for pages that meet our criterea
	2940	* for a 'stealable' page... currently we are pretty conservative... if the page
	2941	* meets this criterea and is physically contiguous to the previous page in the 'run'
	2942	* we keep developing it. If we hit a page that doesn't fit, we reset our state
	2943	* and start to develop a new run... if at this point we've already considered
	2944	* at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
	2945	* and mutex_pause (which will yield the processor), to keep the latency low w/r
	2946	* to other threads trying to acquire free pages (or move pages from q to q),
	2947	* and then continue from the spot we left off... we only make 1 pass through the
	2948	* array. Once we have a 'run' that is long enough, we'll go into the loop which
	2949	* which steals the pages from the queues they're currently on... pages on the free
	2950	* queue can be stolen directly... pages that are on any of the other queues
	2951	* must be removed from the object they are tabled on... this requires taking the
	2952	* object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
	2953	* or if the state of the page behind the vm_object lock is no longer viable, we'll
	2954	* dump the pages we've currently stolen back to the free list, and pick up our
	2955	* scan from the point where we aborted the 'current' run.
	2956	*
	2957	*
	2958	* Requirements:
	2959	* - neither vm_page_queue nor vm_free_list lock can be held on entry
	2960	*
	2961	* Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
	2962	*
	2963	* Algorithm:
	2964	*/
	2965
	2966	#define MAX_CONSIDERED_BEFORE_YIELD 1000
	2967
	2968
	2969	#define RESET_STATE_OF_RUN() \
	2970	MACRO_BEGIN \
	2971	prevcontaddr = -2; \
	2972	free_considered = 0; \
	2973	substitute_needed = 0; \
	2974	npages = 0; \
	2975	MACRO_END
	2976
	2977
	2978	static vm_page_t
	2979	vm_page_find_contiguous(
	2980	unsigned int contig_pages,
	2981	ppnum_t max_pnum,
	2982	boolean_t wire)
	2983	{
	2984	vm_page_t m = NULL;
	2985	ppnum_t prevcontaddr;
	2986	unsigned int npages, considered;
	2987	unsigned int page_idx, start_idx;
	2988	int free_considered, free_available;
	2989	int substitute_needed;
	2990	#if MACH_ASSERT
	2991	uint32_t tv_start_sec, tv_start_usec, tv_end_sec, tv_end_usec;
	2992	int yielded = 0;
	2993	int dumped_run = 0;
	2994	int stolen_pages = 0;
	2995	#endif
	2996
	2997	if (contig_pages == 0)
	2998	return VM_PAGE_NULL;
	2999
	3000	#if MACH_ASSERT
	3001	vm_page_verify_free_lists();
	3002
	3003	clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
	3004	#endif
	3005	vm_page_lock_queues();
	3006	mutex_lock(&vm_page_queue_free_lock);
	3007
	3008	RESET_STATE_OF_RUN();
	3009
	3010	considered = 0;
	3011	free_available = vm_page_free_count - vm_page_free_reserved;
	3012
	3013	for (page_idx = 0, start_idx = 0;
	3014	npages < contig_pages && page_idx < vm_pages_count;
	3015	page_idx++) {
	3016	retry:
	3017	m = &vm_pages[page_idx];
	3018
	3019	if (max_pnum && m->phys_page > max_pnum) {
	3020	/* no more low pages... */
	3021	break;
	3022	}
	3023	if (m->phys_page <= vm_lopage_poolend &&
	3024	m->phys_page >= vm_lopage_poolstart) {
	3025	/*
	3026	* don't want to take pages from our
	3027	* reserved pool of low memory
	3028	* so don't consider it which
	3029	* means starting a new run
	3030	*/
	3031	RESET_STATE_OF_RUN();
	3032
	3033	} else if (m->wire_count \|\| m->gobbled \|\|
	3034	m->encrypted \|\| m->encrypted_cleaning \|\| m->cs_validated \|\| m->cs_tainted \|\|
	3035	m->error \|\| m->absent \|\| m->pageout_queue \|\| m->laundry \|\| m->wanted \|\| m->precious \|\|
	3036	m->cleaning \|\| m->overwriting \|\| m->restart \|\| m->unusual \|\| m->list_req_pending) {
	3037	/*
	3038	* page is in a transient state
	3039	* or a state we don't want to deal
	3040	* with, so don't consider it which
	3041	* means starting a new run
	3042	*/
	3043	RESET_STATE_OF_RUN();
	3044
	3045	} else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
	3046	/*
	3047	* page needs to be on one of our queues
	3048	* in order for it to be stable behind the
	3049	* locks we hold at this point...
	3050	* if not, don't consider it which
	3051	* means starting a new run
	3052	*/
	3053	RESET_STATE_OF_RUN();
	3054
	3055	} else if (!m->free && (!m->tabled \|\| m->busy)) {
	3056	/*
	3057	* pages on the free list are always 'busy'
	3058	* so we couldn't test for 'busy' in the check
	3059	* for the transient states... pages that are
	3060	* 'free' are never 'tabled', so we also couldn't
	3061	* test for 'tabled'. So we check here to make
	3062	* sure that a non-free page is not busy and is
	3063	* tabled on an object...
	3064	* if not, don't consider it which
	3065	* means starting a new run
	3066	*/
	3067	RESET_STATE_OF_RUN();
	3068
	3069	} else {
	3070	if (m->phys_page != prevcontaddr + 1) {
	3071	npages = 1;
	3072	start_idx = page_idx;
	3073	} else {
	3074	npages++;
	3075	}
	3076	prevcontaddr = m->phys_page;
	3077
	3078	if (m->pmapped \|\| m->dirty)
	3079	substitute_needed++;
	3080
	3081	if (m->free) {
	3082	free_considered++;
	3083	}
	3084	if ((free_considered + substitute_needed) > free_available) {
	3085	/*
	3086	* if we let this run continue
	3087	* we will end up dropping the vm_page_free_count
	3088	* below the reserve limit... we need to abort
	3089	* this run, but we can at least re-consider this
	3090	* page... thus the jump back to 'retry'
	3091	*/
	3092	RESET_STATE_OF_RUN();
	3093
	3094	if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
	3095	considered++;
	3096	goto retry;
	3097	}
	3098	/*
	3099	* free_available == 0
	3100	* so can't consider any free pages... if
	3101	* we went to retry in this case, we'd
	3102	* get stuck looking at the same page
	3103	* w/o making any forward progress
	3104	* we also want to take this path if we've already
	3105	* reached our limit that controls the lock latency
	3106	*/
	3107	}
	3108	}
	3109	if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
	3110
	3111	mutex_unlock(&vm_page_queue_free_lock);
	3112	vm_page_unlock_queues();
	3113
	3114	mutex_pause(0);
	3115
	3116	vm_page_lock_queues();
	3117	mutex_lock(&vm_page_queue_free_lock);
	3118
	3119	RESET_STATE_OF_RUN();
	3120	/*
	3121	* reset our free page limit since we
	3122	* dropped the lock protecting the vm_page_free_queue
	3123	*/
	3124	free_available = vm_page_free_count - vm_page_free_reserved;
	3125	considered = 0;
	3126	#if MACH_ASSERT
	3127	yielded++;
	3128	#endif
	3129	goto retry;
	3130	}
	3131	considered++;
	3132	}
	3133	m = VM_PAGE_NULL;
	3134
	3135	if (npages != contig_pages)
	3136	mutex_unlock(&vm_page_queue_free_lock);
	3137	else {
	3138	vm_page_t m1;
	3139	vm_page_t m2;
	3140	unsigned int cur_idx;
	3141	unsigned int tmp_start_idx;
	3142	vm_object_t locked_object = VM_OBJECT_NULL;
	3143	boolean_t abort_run = FALSE;
	3144
	3145	tmp_start_idx = start_idx;
	3146
	3147	/*
	3148	* first pass through to pull the free pages
	3149	* off of the free queue so that in case we
	3150	* need substitute pages, we won't grab any
	3151	* of the free pages in the run... we'll clear
	3152	* the 'free' bit in the 2nd pass, and even in
	3153	* an abort_run case, we'll collect all of the
	3154	* free pages in this run and return them to the free list
	3155	*/
	3156	while (start_idx < page_idx) {
	3157
	3158	m1 = &vm_pages[start_idx++];
	3159
	3160	if (m1->free) {
	3161	unsigned int color;
	3162
	3163	color = m1->phys_page & vm_color_mask;
	3164	queue_remove(&vm_page_queue_free[color],
	3165	m1,
	3166	vm_page_t,
	3167	pageq);
	3168
	3169	vm_page_free_count--;
	3170	}
	3171	}
	3172	/*
	3173	* adjust global freelist counts
	3174	*/
	3175	if (vm_page_free_count < vm_page_free_count_minimum)
	3176	vm_page_free_count_minimum = vm_page_free_count;
	3177
	3178	/*
	3179	* we can drop the free queue lock at this point since
	3180	* we've pulled any 'free' candidates off of the list
	3181	* we need it dropped so that we can do a vm_page_grab
	3182	* when substituing for pmapped/dirty pages
	3183	*/
	3184	mutex_unlock(&vm_page_queue_free_lock);
	3185
	3186	start_idx = tmp_start_idx;
	3187	cur_idx = page_idx - 1;
	3188
	3189	while (start_idx++ < page_idx) {
	3190	/*
	3191	* must go through the list from back to front
	3192	* so that the page list is created in the
	3193	* correct order - low -> high phys addresses
	3194	*/
	3195	m1 = &vm_pages[cur_idx--];
	3196
	3197	if (m1->free) {
	3198	/*
	3199	* pages have already been removed from
	3200	* the free list in the 1st pass
	3201	*/
	3202	assert(m1->free);
	3203	assert(m1->busy);
	3204	assert(!m1->wanted);
	3205	assert(!m1->laundry);
	3206	m1->free = FALSE;
	3207
	3208	} else {
	3209	vm_object_t object;
	3210
	3211	if (abort_run == TRUE)
	3212	continue;
	3213
	3214	object = m1->object;
	3215
	3216	if (object != locked_object) {
	3217	if (locked_object) {
	3218	vm_object_unlock(locked_object);
	3219	locked_object = VM_OBJECT_NULL;
	3220	}
	3221	if (vm_object_lock_try(object))
	3222	locked_object = object;
	3223	}
	3224	if (locked_object == VM_OBJECT_NULL \|\|
	3225	(m1->wire_count \|\| m1->gobbled \|\|
	3226	m1->encrypted \|\| m1->encrypted_cleaning \|\| m1->cs_validated \|\| m1->cs_tainted \|\|
	3227	m1->error \|\| m1->absent \|\| m1->pageout_queue \|\| m1->laundry \|\| m1->wanted \|\| m1->precious \|\|
	3228	m1->cleaning \|\| m1->overwriting \|\| m1->restart \|\| m1->unusual \|\| m1->list_req_pending \|\| m1->busy)) {
	3229
	3230	if (locked_object) {
	3231	vm_object_unlock(locked_object);
	3232	locked_object = VM_OBJECT_NULL;
	3233	}
	3234	tmp_start_idx = cur_idx;
	3235	abort_run = TRUE;
	3236	continue;
	3237	}
	3238	if (m1->pmapped \|\| m1->dirty) {
	3239	int refmod;
	3240	vm_object_offset_t offset;
	3241
	3242	m2 = vm_page_grab();
	3243
	3244	if (m2 == VM_PAGE_NULL) {
	3245	if (locked_object) {
	3246	vm_object_unlock(locked_object);
	3247	locked_object = VM_OBJECT_NULL;
	3248	}
	3249	tmp_start_idx = cur_idx;
	3250	abort_run = TRUE;
	3251	continue;
	3252	}
	3253	if (m1->pmapped)
	3254	refmod = pmap_disconnect(m1->phys_page);
	3255	else
	3256	refmod = 0;
	3257	vm_page_copy(m1, m2);
	3258
	3259	m2->reference = m1->reference;
	3260	m2->dirty = m1->dirty;
	3261
	3262	if (refmod & VM_MEM_REFERENCED)
	3263	m2->reference = TRUE;
	3264	if (refmod & VM_MEM_MODIFIED)
	3265	m2->dirty = TRUE;
	3266	offset = m1->offset;
	3267
	3268	/*
	3269	* completely cleans up the state
	3270	* of the page so that it is ready
	3271	* to be put onto the free list, or
	3272	* for this purpose it looks like it
	3273	* just came off of the free list
	3274	*/
	3275	vm_page_free_prepare(m1);
	3276
	3277	/*
	3278	* make sure we clear the ref/mod state
	3279	* from the pmap layer... else we risk
	3280	* inheriting state from the last time
	3281	* this page was used...
	3282	*/
	3283	pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED \| VM_MEM_REFERENCED);
	3284	/*
	3285	* now put the substitute page on the object
	3286	*/
	3287	vm_page_insert_internal(m2, locked_object, offset, TRUE);
	3288
	3289	if (m2->reference)
	3290	vm_page_activate(m2);
	3291	else
	3292	vm_page_deactivate(m2);
	3293
	3294	PAGE_WAKEUP_DONE(m2);
	3295
	3296	} else {
	3297	/*
	3298	* completely cleans up the state
	3299	* of the page so that it is ready
	3300	* to be put onto the free list, or
	3301	* for this purpose it looks like it
	3302	* just came off of the free list
	3303	*/
	3304	vm_page_free_prepare(m1);
	3305	}
	3306	#if MACH_ASSERT
	3307	stolen_pages++;
	3308	#endif
	3309	}
	3310	m1->pageq.next = (queue_entry_t) m;
	3311	m1->pageq.prev = NULL;
	3312	m = m1;
	3313	}
	3314	if (locked_object) {
	3315	vm_object_unlock(locked_object);
	3316	locked_object = VM_OBJECT_NULL;
	3317	}
	3318
	3319	if (abort_run == TRUE) {
	3320	if (m != VM_PAGE_NULL) {
	3321	vm_page_free_list(m);
	3322	}
	3323	#if MACH_ASSERT
	3324	dumped_run++;
	3325	#endif
	3326	/*
	3327	* want the index of the last
	3328	* page in this run that was
	3329	* successfully 'stolen', so back
	3330	* it up 1 for the auto-decrement on use
	3331	* and 1 more to bump back over this page
	3332	*/
	3333	page_idx = tmp_start_idx + 2;
	3334
	3335	if (page_idx >= vm_pages_count)
	3336	goto done_scanning;
	3337
	3338	mutex_lock(&vm_page_queue_free_lock);
	3339
	3340	RESET_STATE_OF_RUN();
	3341
	3342	/*
	3343	* reset our free page limit since we
	3344	* dropped the lock protecting the vm_page_free_queue
	3345	*/
	3346	free_available = vm_page_free_count - vm_page_free_reserved;
	3347
	3348	goto retry;
	3349	}
	3350
	3351	for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
	3352
	3353	if (wire == TRUE)
	3354	m1->wire_count++;
	3355	else
	3356	m1->gobbled = TRUE;
	3357	}
	3358	if (wire == FALSE)
	3359	vm_page_gobble_count += npages;
	3360
	3361	/*
	3362	* gobbled pages are also counted as wired pages
	3363	*/
	3364	vm_page_wire_count += npages;
	3365
	3366	assert(vm_page_verify_contiguous(m, npages));
	3367	}
	3368	done_scanning:
	3369	vm_page_unlock_queues();
	3370
	3371	#if MACH_ASSERT
	3372	clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
	3373
	3374	tv_end_sec -= tv_start_sec;
	3375	if (tv_end_usec < tv_start_usec) {
	3376	tv_end_sec--;
	3377	tv_end_usec += 1000000;
	3378	}
	3379	tv_end_usec -= tv_start_usec;
	3380	if (tv_end_usec >= 1000000) {
	3381	tv_end_sec++;
	3382	tv_end_sec -= 1000000;
	3383	}
	3384	printf("vm_find_page_contiguous(num=%d,low=%d): found %d pages in %d.%06ds... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
	3385	contig_pages, max_pnum, npages, tv_end_sec, tv_end_usec, page_idx, yielded, dumped_run, stolen_pages);
	3386
	3387	vm_page_verify_free_lists();
	3388	#endif
	3389	return m;
	3390	}
	3391
	3392	/*
	3393	* Allocate a list of contiguous, wired pages.
	3394	*/
	3395	kern_return_t
	3396	cpm_allocate(
	3397	vm_size_t size,
	3398	vm_page_t *list,
	3399	ppnum_t max_pnum,
	3400	boolean_t wire)
	3401	{
	3402	vm_page_t pages;
	3403	unsigned int npages;
	3404
	3405	if (size % page_size != 0)
	3406	return KERN_INVALID_ARGUMENT;
	3407
	3408	npages = size / page_size;
	3409
	3410	/*
	3411	* Obtain a pointer to a subset of the free
	3412	* list large enough to satisfy the request;
	3413	* the region will be physically contiguous.
	3414	*/
	3415	pages = vm_page_find_contiguous(npages, max_pnum, wire);
	3416
	3417	if (pages == VM_PAGE_NULL)
	3418	return KERN_NO_SPACE;
	3419	/*
	3420	* determine need for wakeups
	3421	*/
	3422	if ((vm_page_free_count < vm_page_free_min) \|\|
	3423	((vm_page_free_count < vm_page_free_target) &&
	3424	((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
	3425	thread_wakeup((event_t) &vm_page_free_wanted);
	3426
	3427	#if CONFIG_EMBEDDED
	3428	{
	3429	int percent_avail;
	3430
	3431	/*
	3432	* Decide if we need to poke the memorystatus notification thread.
	3433	*/
	3434	percent_avail =
	3435	(vm_page_active_count + vm_page_inactive_count +
	3436	vm_page_speculative_count + vm_page_free_count +
	3437	(IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
	3438	atop_64(max_mem);
	3439	if (percent_avail <= (kern_memorystatus_level - 5)) {
	3440	kern_memorystatus_level = percent_avail;
	3441	thread_wakeup((event_t)&kern_memorystatus_wakeup);
	3442	}
	3443	}
	3444	#endif
	3445	/*
	3446	* The CPM pages should now be available and
	3447	* ordered by ascending physical address.
	3448	*/
	3449	assert(vm_page_verify_contiguous(pages, npages));
	3450
	3451	*list = pages;
	3452	return KERN_SUCCESS;
	3453	}
	3454
	3455
	3456	#include <mach_vm_debug.h>
	3457	#if MACH_VM_DEBUG
	3458
	3459	#include <mach_debug/hash_info.h>
	3460	#include <vm/vm_debug.h>
	3461
	3462	/*
	3463	* Routine: vm_page_info
	3464	* Purpose:
	3465	* Return information about the global VP table.
	3466	* Fills the buffer with as much information as possible
	3467	* and returns the desired size of the buffer.
	3468	* Conditions:
	3469	* Nothing locked. The caller should provide
	3470	* possibly-pageable memory.
	3471	*/
	3472
	3473	unsigned int
	3474	vm_page_info(
	3475	hash_info_bucket_t *info,
	3476	unsigned int count)
	3477	{
	3478	unsigned int i;
	3479
	3480	if (vm_page_bucket_count < count)
	3481	count = vm_page_bucket_count;
	3482
	3483	for (i = 0; i < count; i++) {
	3484	vm_page_bucket_t *bucket = &vm_page_buckets[i];
	3485	unsigned int bucket_count = 0;
	3486	vm_page_t m;
	3487
	3488	simple_lock(&vm_page_bucket_lock);
	3489	for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
	3490	bucket_count++;
	3491	simple_unlock(&vm_page_bucket_lock);
	3492
	3493	/* don't touch pageable memory while holding locks */
	3494	info[i].hib_count = bucket_count;
	3495	}
	3496
	3497	return vm_page_bucket_count;
	3498	}
	3499	#endif /* MACH_VM_DEBUG */
	3500
	3501	#include <mach_kdb.h>
	3502	#if MACH_KDB
	3503
	3504	#include <ddb/db_output.h>
	3505	#include <vm/vm_print.h>
	3506	#define printf kdbprintf
	3507
	3508	/*
	3509	* Routine: vm_page_print [exported]
	3510	*/
	3511	void
	3512	vm_page_print(
	3513	db_addr_t db_addr)
	3514	{
	3515	vm_page_t p;
	3516
	3517	p = (vm_page_t) (long) db_addr;
	3518
	3519	iprintf("page 0x%x\n", p);
	3520
	3521	db_indent += 2;
	3522
	3523	iprintf("object=0x%x", p->object);
	3524	printf(", offset=0x%x", p->offset);
	3525	printf(", wire_count=%d", p->wire_count);
	3526
	3527	iprintf("%sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
	3528	(p->inactive ? "" : "!"),
	3529	(p->active ? "" : "!"),
	3530	(p->throttled ? "" : "!"),
	3531	(p->gobbled ? "" : "!"),
	3532	(p->laundry ? "" : "!"),
	3533	(p->free ? "" : "!"),
	3534	(p->reference ? "" : "!"),
	3535	(p->encrypted ? "" : "!"));
	3536	iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
	3537	(p->busy ? "" : "!"),
	3538	(p->wanted ? "" : "!"),
	3539	(p->tabled ? "" : "!"),
	3540	(p->fictitious ? "" : "!"),
	3541	(p->private ? "" : "!"),
	3542	(p->precious ? "" : "!"));
	3543	iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
	3544	(p->absent ? "" : "!"),
	3545	(p->error ? "" : "!"),
	3546	(p->dirty ? "" : "!"),
	3547	(p->cleaning ? "" : "!"),
	3548	(p->pageout ? "" : "!"),
	3549	(p->clustered ? "" : "!"));
	3550	iprintf("%soverwriting, %srestart, %sunusual\n",
	3551	(p->overwriting ? "" : "!"),
	3552	(p->restart ? "" : "!"),
	3553	(p->unusual ? "" : "!"));
	3554
	3555	iprintf("phys_page=0x%x", p->phys_page);
	3556
	3557	db_indent -= 2;
	3558	}
	3559	#endif /* MACH_KDB */