git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2014 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*
	29	* @OSF_COPYRIGHT@
	30	*/
	31	/*
	32	* Mach Operating System
	33	* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
	34	* All Rights Reserved.
	35	*
	36	* Permission to use, copy, modify and distribute this software and its
	37	* documentation is hereby granted, provided that both the copyright
	38	* notice and this permission notice appear in all copies of the
	39	* software, derivative works or modified versions, and any portions
	40	* thereof, and that both notices appear in supporting documentation.
	41	*
	42	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	43	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
	44	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	45	*
	46	* Carnegie Mellon requests users of this software to return to
	47	*
	48	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	49	* School of Computer Science
	50	* Carnegie Mellon University
	51	* Pittsburgh PA 15213-3890
	52	*
	53	* any improvements or extensions that they make and grant Carnegie Mellon
	54	* the rights to redistribute these changes.
	55	*/
	56	/*
	57	*/
	58	/*
	59	* File: vm/vm_pageout.c
	60	* Author: Avadis Tevanian, Jr., Michael Wayne Young
	61	* Date: 1985
	62	*
	63	* The proverbial page-out daemon.
	64	*/
	65
	66	#include <stdint.h>
	67
	68	#include <debug.h>
	69	#include <mach_pagemap.h>
	70	#include <mach_cluster_stats.h>
	71
	72	#include <mach/mach_types.h>
	73	#include <mach/memory_object.h>
	74	#include <mach/memory_object_default.h>
	75	#include <mach/memory_object_control_server.h>
	76	#include <mach/mach_host_server.h>
	77	#include <mach/upl.h>
	78	#include <mach/vm_map.h>
	79	#include <mach/vm_param.h>
	80	#include <mach/vm_statistics.h>
	81	#include <mach/sdt.h>
	82
	83	#include <kern/kern_types.h>
	84	#include <kern/counters.h>
	85	#include <kern/host_statistics.h>
	86	#include <kern/machine.h>
	87	#include <kern/misc_protos.h>
	88	#include <kern/sched.h>
	89	#include <kern/thread.h>
	90	#include <kern/xpr.h>
	91	#include <kern/kalloc.h>
	92
	93	#include <machine/vm_tuning.h>
	94	#include <machine/commpage.h>
	95
	96	#include <vm/pmap.h>
	97	#include <vm/vm_compressor_pager.h>
	98	#include <vm/vm_fault.h>
	99	#include <vm/vm_map.h>
	100	#include <vm/vm_object.h>
	101	#include <vm/vm_page.h>
	102	#include <vm/vm_pageout.h>
	103	#include <vm/vm_protos.h> /* must be last */
	104	#include <vm/memory_object.h>
	105	#include <vm/vm_purgeable_internal.h>
	106	#include <vm/vm_shared_region.h>
	107	#include <vm/vm_compressor.h>
	108
	109	#if CONFIG_PHANTOM_CACHE
	110	#include <vm/vm_phantom_cache.h>
	111	#endif
	112	/*
	113	* ENCRYPTED SWAP:
	114	*/
	115	#include <libkern/crypto/aes.h>
	116	extern u_int32_t random(void); /* from <libkern/libkern.h> */
	117
	118	extern int cs_debug;
	119
	120	#if UPL_DEBUG
	121	#include <libkern/OSDebug.h>
	122	#endif
	123
	124	extern void m_drain(void);
	125
	126	#if VM_PRESSURE_EVENTS
	127	extern unsigned int memorystatus_available_pages;
	128	extern unsigned int memorystatus_available_pages_pressure;
	129	extern unsigned int memorystatus_available_pages_critical;
	130	extern unsigned int memorystatus_frozen_count;
	131	extern unsigned int memorystatus_suspended_count;
	132
	133	extern vm_pressure_level_t memorystatus_vm_pressure_level;
	134	int memorystatus_purge_on_warning = 2;
	135	int memorystatus_purge_on_urgent = 5;
	136	int memorystatus_purge_on_critical = 8;
	137
	138	void vm_pressure_response(void);
	139	boolean_t vm_pressure_thread_running = FALSE;
	140	extern void consider_vm_pressure_events(void);
	141
	142	#define MEMORYSTATUS_SUSPENDED_THRESHOLD 4
	143	#endif /* VM_PRESSURE_EVENTS */
	144
	145	boolean_t vm_pressure_changed = FALSE;
	146
	147	#ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */
	148	#define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100
	149	#endif
	150
	151	#ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
	152	#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
	153	#endif
	154
	155	#ifndef VM_PAGEOUT_DEADLOCK_RELIEF
	156	#define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
	157	#endif
	158
	159	#ifndef VM_PAGEOUT_INACTIVE_RELIEF
	160	#define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
	161	#endif
	162
	163	#ifndef VM_PAGE_LAUNDRY_MAX
	164	#define VM_PAGE_LAUNDRY_MAX 128UL /* maximum pageouts on a given pageout queue */
	165	#endif /* VM_PAGEOUT_LAUNDRY_MAX */
	166
	167	#ifndef VM_PAGEOUT_BURST_WAIT
	168	#define VM_PAGEOUT_BURST_WAIT 10 /* milliseconds */
	169	#endif /* VM_PAGEOUT_BURST_WAIT */
	170
	171	#ifndef VM_PAGEOUT_EMPTY_WAIT
	172	#define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
	173	#endif /* VM_PAGEOUT_EMPTY_WAIT */
	174
	175	#ifndef VM_PAGEOUT_DEADLOCK_WAIT
	176	#define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
	177	#endif /* VM_PAGEOUT_DEADLOCK_WAIT */
	178
	179	#ifndef VM_PAGEOUT_IDLE_WAIT
	180	#define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
	181	#endif /* VM_PAGEOUT_IDLE_WAIT */
	182
	183	#ifndef VM_PAGEOUT_SWAP_WAIT
	184	#define VM_PAGEOUT_SWAP_WAIT 50 /* milliseconds */
	185	#endif /* VM_PAGEOUT_SWAP_WAIT */
	186
	187	#ifndef VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED
	188	#define VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED 1000 /* maximum pages considered before we issue a pressure event */
	189	#endif /* VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED */
	190
	191	#ifndef VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS
	192	#define VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS 5 /* seconds */
	193	#endif /* VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS */
	194
	195	unsigned int vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
	196	unsigned int vm_page_speculative_percentage = 5;
	197
	198	#ifndef VM_PAGE_SPECULATIVE_TARGET
	199	#define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_page_speculative_percentage))
	200	#endif /* VM_PAGE_SPECULATIVE_TARGET */
	201
	202
	203	#ifndef VM_PAGE_INACTIVE_HEALTHY_LIMIT
	204	#define VM_PAGE_INACTIVE_HEALTHY_LIMIT(total) ((total) * 1 / 200)
	205	#endif /* VM_PAGE_INACTIVE_HEALTHY_LIMIT */
	206
	207
	208	/*
	209	* To obtain a reasonable LRU approximation, the inactive queue
	210	* needs to be large enough to give pages on it a chance to be
	211	* referenced a second time. This macro defines the fraction
	212	* of active+inactive pages that should be inactive.
	213	* The pageout daemon uses it to update vm_page_inactive_target.
	214	*
	215	* If vm_page_free_count falls below vm_page_free_target and
	216	* vm_page_inactive_count is below vm_page_inactive_target,
	217	* then the pageout daemon starts running.
	218	*/
	219
	220	#ifndef VM_PAGE_INACTIVE_TARGET
	221	#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 2)
	222	#endif /* VM_PAGE_INACTIVE_TARGET */
	223
	224	/*
	225	* Once the pageout daemon starts running, it keeps going
	226	* until vm_page_free_count meets or exceeds vm_page_free_target.
	227	*/
	228
	229	#ifndef VM_PAGE_FREE_TARGET
	230	#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
	231	#endif /* VM_PAGE_FREE_TARGET */
	232
	233
	234	/*
	235	* The pageout daemon always starts running once vm_page_free_count
	236	* falls below vm_page_free_min.
	237	*/
	238
	239	#ifndef VM_PAGE_FREE_MIN
	240	#define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
	241	#endif /* VM_PAGE_FREE_MIN */
	242
	243	#define VM_PAGE_FREE_RESERVED_LIMIT 1700
	244	#define VM_PAGE_FREE_MIN_LIMIT 3500
	245	#define VM_PAGE_FREE_TARGET_LIMIT 4000
	246
	247	/*
	248	* When vm_page_free_count falls below vm_page_free_reserved,
	249	* only vm-privileged threads can allocate pages. vm-privilege
	250	* allows the pageout daemon and default pager (and any other
	251	* associated threads needed for default pageout) to continue
	252	* operation by dipping into the reserved pool of pages.
	253	*/
	254
	255	#ifndef VM_PAGE_FREE_RESERVED
	256	#define VM_PAGE_FREE_RESERVED(n) \
	257	((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
	258	#endif /* VM_PAGE_FREE_RESERVED */
	259
	260	/*
	261	* When we dequeue pages from the inactive list, they are
	262	* reactivated (ie, put back on the active queue) if referenced.
	263	* However, it is possible to starve the free list if other
	264	* processors are referencing pages faster than we can turn off
	265	* the referenced bit. So we limit the number of reactivations
	266	* we will make per call of vm_pageout_scan().
	267	*/
	268	#define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
	269	#ifndef VM_PAGE_REACTIVATE_LIMIT
	270	#define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
	271	#endif /* VM_PAGE_REACTIVATE_LIMIT */
	272	#define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 100
	273
	274
	275	extern boolean_t hibernate_cleaning_in_progress;
	276
	277	/*
	278	* Exported variable used to broadcast the activation of the pageout scan
	279	* Working Set uses this to throttle its use of pmap removes. In this
	280	* way, code which runs within memory in an uncontested context does
	281	* not keep encountering soft faults.
	282	*/
	283
	284	unsigned int vm_pageout_scan_event_counter = 0;
	285
	286	/*
	287	* Forward declarations for internal routines.
	288	*/
	289	struct cq {
	290	struct vm_pageout_queue *q;
	291	void *current_chead;
	292	char *scratch_buf;
	293	};
	294
	295
	296	#if VM_PRESSURE_EVENTS
	297	void vm_pressure_thread(void);
	298
	299	boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void);
	300	boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void);
	301
	302	boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void);
	303	boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void);
	304	#endif
	305	static void vm_pageout_garbage_collect(int);
	306	static void vm_pageout_iothread_continue(struct vm_pageout_queue *);
	307	static void vm_pageout_iothread_external(void);
	308	static void vm_pageout_iothread_internal(struct cq *cq);
	309	static void vm_pageout_adjust_io_throttles(struct vm_pageout_queue , struct vm_pageout_queue , boolean_t);
	310
	311	extern void vm_pageout_continue(void);
	312	extern void vm_pageout_scan(void);
	313
	314	static thread_t vm_pageout_external_iothread = THREAD_NULL;
	315	static thread_t vm_pageout_internal_iothread = THREAD_NULL;
	316
	317	unsigned int vm_pageout_reserved_internal = 0;
	318	unsigned int vm_pageout_reserved_really = 0;
	319
	320	unsigned int vm_pageout_swap_wait = 0;
	321	unsigned int vm_pageout_idle_wait = 0; /* milliseconds */
	322	unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
	323	unsigned int vm_pageout_burst_wait = 0; /* milliseconds */
	324	unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */
	325	unsigned int vm_pageout_deadlock_relief = 0;
	326	unsigned int vm_pageout_inactive_relief = 0;
	327	unsigned int vm_pageout_burst_active_throttle = 0;
	328	unsigned int vm_pageout_burst_inactive_throttle = 0;
	329
	330	int vm_upl_wait_for_pages = 0;
	331
	332
	333	/*
	334	* These variables record the pageout daemon's actions:
	335	* how many pages it looks at and what happens to those pages.
	336	* No locking needed because only one thread modifies the variables.
	337	*/
	338
	339	unsigned int vm_pageout_active = 0; /* debugging */
	340	unsigned int vm_pageout_active_busy = 0; /* debugging */
	341	unsigned int vm_pageout_inactive = 0; /* debugging */
	342	unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
	343	unsigned int vm_pageout_inactive_forced = 0; /* debugging */
	344	unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
	345	unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
	346	unsigned int vm_pageout_inactive_busy = 0; /* debugging */
	347	unsigned int vm_pageout_inactive_error = 0; /* debugging */
	348	unsigned int vm_pageout_inactive_absent = 0; /* debugging */
	349	unsigned int vm_pageout_inactive_notalive = 0; /* debugging */
	350	unsigned int vm_pageout_inactive_used = 0; /* debugging */
	351	unsigned int vm_pageout_cache_evicted = 0; /* debugging */
	352	unsigned int vm_pageout_inactive_clean = 0; /* debugging */
	353	unsigned int vm_pageout_speculative_clean = 0; /* debugging */
	354
	355	unsigned int vm_pageout_freed_from_cleaned = 0;
	356	unsigned int vm_pageout_freed_from_speculative = 0;
	357	unsigned int vm_pageout_freed_from_inactive_clean = 0;
	358
	359	unsigned int vm_pageout_enqueued_cleaned_from_inactive_clean = 0;
	360	unsigned int vm_pageout_enqueued_cleaned_from_inactive_dirty = 0;
	361
	362	unsigned int vm_pageout_cleaned_reclaimed = 0; /* debugging; how many cleaned pages are reclaimed by the pageout scan */
	363	unsigned int vm_pageout_cleaned_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on pageout (and are therefore reactivated) */
	364	unsigned int vm_pageout_cleaned_reference_reactivated = 0;
	365	unsigned int vm_pageout_cleaned_volatile_reactivated = 0;
	366	unsigned int vm_pageout_cleaned_fault_reactivated = 0;
	367	unsigned int vm_pageout_cleaned_commit_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on commit (and are therefore reactivated) */
	368	unsigned int vm_pageout_cleaned_busy = 0;
	369	unsigned int vm_pageout_cleaned_nolock = 0;
	370
	371	unsigned int vm_pageout_inactive_dirty_internal = 0; /* debugging */
	372	unsigned int vm_pageout_inactive_dirty_external = 0; /* debugging */
	373	unsigned int vm_pageout_inactive_deactivated = 0; /* debugging */
	374	unsigned int vm_pageout_inactive_anonymous = 0; /* debugging */
	375	unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
	376	unsigned int vm_pageout_purged_objects = 0; /* debugging */
	377	unsigned int vm_stat_discard = 0; /* debugging */
	378	unsigned int vm_stat_discard_sent = 0; /* debugging */
	379	unsigned int vm_stat_discard_failure = 0; /* debugging */
	380	unsigned int vm_stat_discard_throttle = 0; /* debugging */
	381	unsigned int vm_pageout_reactivation_limit_exceeded = 0; /* debugging */
	382	unsigned int vm_pageout_catch_ups = 0; /* debugging */
	383	unsigned int vm_pageout_inactive_force_reclaim = 0; /* debugging */
	384
	385	unsigned int vm_pageout_scan_reclaimed_throttled = 0;
	386	unsigned int vm_pageout_scan_active_throttled = 0;
	387	unsigned int vm_pageout_scan_inactive_throttled_internal = 0;
	388	unsigned int vm_pageout_scan_inactive_throttled_external = 0;
	389	unsigned int vm_pageout_scan_throttle = 0; /* debugging */
	390	unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */
	391	unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */
	392	unsigned int vm_pageout_scan_swap_throttle = 0; /* debugging */
	393	unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */
	394	unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */
	395	unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */
	396	unsigned int vm_pageout_inactive_external_forced_jetsam_count = 0; /* debugging */
	397	unsigned int vm_page_speculative_count_drifts = 0;
	398	unsigned int vm_page_speculative_count_drift_max = 0;
	399
	400
	401	/*
	402	* Backing store throttle when BS is exhausted
	403	*/
	404	unsigned int vm_backing_store_low = 0;
	405
	406	unsigned int vm_pageout_out_of_line = 0;
	407	unsigned int vm_pageout_in_place = 0;
	408
	409	unsigned int vm_page_steal_pageout_page = 0;
	410
	411	/*
	412	* ENCRYPTED SWAP:
	413	* counters and statistics...
	414	*/
	415	unsigned long vm_page_decrypt_counter = 0;
	416	unsigned long vm_page_decrypt_for_upl_counter = 0;
	417	unsigned long vm_page_encrypt_counter = 0;
	418	unsigned long vm_page_encrypt_abort_counter = 0;
	419	unsigned long vm_page_encrypt_already_encrypted_counter = 0;
	420	boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */
	421
	422	struct vm_pageout_queue vm_pageout_queue_internal;
	423	struct vm_pageout_queue vm_pageout_queue_external;
	424
	425	unsigned int vm_page_speculative_target = 0;
	426
	427	vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	428
	429	boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL;
	430
	431	#if DEVELOPMENT \|\| DEBUG
	432	unsigned long vm_cs_validated_resets = 0;
	433	#endif
	434
	435	int vm_debug_events = 0;
	436
	437	#if CONFIG_MEMORYSTATUS
	438	#if !CONFIG_JETSAM
	439	extern boolean_t memorystatus_idle_exit_from_VM(void);
	440	#endif
	441	extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async);
	442	extern void memorystatus_on_pageout_scan_end(void);
	443	#endif
	444
	445	boolean_t vm_page_compressions_failing = FALSE;
	446
	447	/*
	448	* Routine: vm_backing_store_disable
	449	* Purpose:
	450	* Suspend non-privileged threads wishing to extend
	451	* backing store when we are low on backing store
	452	* (Synchronized by caller)
	453	*/
	454	void
	455	vm_backing_store_disable(
	456	boolean_t disable)
	457	{
	458	if(disable) {
	459	vm_backing_store_low = 1;
	460	} else {
	461	if(vm_backing_store_low) {
	462	vm_backing_store_low = 0;
	463	thread_wakeup((event_t) &vm_backing_store_low);
	464	}
	465	}
	466	}
	467
	468
	469	#if MACH_CLUSTER_STATS
	470	unsigned long vm_pageout_cluster_dirtied = 0;
	471	unsigned long vm_pageout_cluster_cleaned = 0;
	472	unsigned long vm_pageout_cluster_collisions = 0;
	473	unsigned long vm_pageout_cluster_clusters = 0;
	474	unsigned long vm_pageout_cluster_conversions = 0;
	475	unsigned long vm_pageout_target_collisions = 0;
	476	unsigned long vm_pageout_target_page_dirtied = 0;
	477	unsigned long vm_pageout_target_page_freed = 0;
	478	#define CLUSTER_STAT(clause) clause
	479	#else /* MACH_CLUSTER_STATS */
	480	#define CLUSTER_STAT(clause)
	481	#endif /* MACH_CLUSTER_STATS */
	482
	483	/*
	484	* Routine: vm_pageout_object_terminate
	485	* Purpose:
	486	* Destroy the pageout_object, and perform all of the
	487	* required cleanup actions.
	488	*
	489	* In/Out conditions:
	490	* The object must be locked, and will be returned locked.
	491	*/
	492	void
	493	vm_pageout_object_terminate(
	494	vm_object_t object)
	495	{
	496	vm_object_t shadow_object;
	497
	498	/*
	499	* Deal with the deallocation (last reference) of a pageout object
	500	* (used for cleaning-in-place) by dropping the paging references/
	501	* freeing pages in the original object.
	502	*/
	503
	504	assert(object->pageout);
	505	shadow_object = object->shadow;
	506	vm_object_lock(shadow_object);
	507
	508	while (!queue_empty(&object->memq)) {
	509	vm_page_t p, m;
	510	vm_object_offset_t offset;
	511
	512	p = (vm_page_t) queue_first(&object->memq);
	513
	514	assert(p->private);
	515	assert(p->pageout);
	516	p->pageout = FALSE;
	517	assert(!p->cleaning);
	518	assert(!p->laundry);
	519
	520	offset = p->offset;
	521	VM_PAGE_FREE(p);
	522	p = VM_PAGE_NULL;
	523
	524	m = vm_page_lookup(shadow_object,
	525	offset + object->vo_shadow_offset);
	526
	527	if(m == VM_PAGE_NULL)
	528	continue;
	529
	530	assert((m->dirty) \|\| (m->precious) \|\|
	531	(m->busy && m->cleaning));
	532
	533	/*
	534	* Handle the trusted pager throttle.
	535	* Also decrement the burst throttle (if external).
	536	*/
	537	vm_page_lock_queues();
	538	if (m->pageout_queue)
	539	vm_pageout_throttle_up(m);
	540
	541	/*
	542	* Handle the "target" page(s). These pages are to be freed if
	543	* successfully cleaned. Target pages are always busy, and are
	544	* wired exactly once. The initial target pages are not mapped,
	545	* (so cannot be referenced or modified) but converted target
	546	* pages may have been modified between the selection as an
	547	* adjacent page and conversion to a target.
	548	*/
	549	if (m->pageout) {
	550	assert(m->busy);
	551	assert(m->wire_count == 1);
	552	m->cleaning = FALSE;
	553	m->encrypted_cleaning = FALSE;
	554	m->pageout = FALSE;
	555	#if MACH_CLUSTER_STATS
	556	if (m->wanted) vm_pageout_target_collisions++;
	557	#endif
	558	/*
	559	* Revoke all access to the page. Since the object is
	560	* locked, and the page is busy, this prevents the page
	561	* from being dirtied after the pmap_disconnect() call
	562	* returns.
	563	*
	564	* Since the page is left "dirty" but "not modifed", we
	565	* can detect whether the page was redirtied during
	566	* pageout by checking the modify state.
	567	*/
	568	if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED) {
	569	SET_PAGE_DIRTY(m, FALSE);
	570	} else {
	571	m->dirty = FALSE;
	572	}
	573
	574	if (m->dirty) {
	575	CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
	576	vm_page_unwire(m, TRUE); /* reactivates */
	577	VM_STAT_INCR(reactivations);
	578	PAGE_WAKEUP_DONE(m);
	579	} else {
	580	CLUSTER_STAT(vm_pageout_target_page_freed++;)
	581	vm_page_free(m);/* clears busy, etc. */
	582	}
	583	vm_page_unlock_queues();
	584	continue;
	585	}
	586	/*
	587	* Handle the "adjacent" pages. These pages were cleaned in
	588	* place, and should be left alone.
	589	* If prep_pin_count is nonzero, then someone is using the
	590	* page, so make it active.
	591	*/
	592	if (!m->active && !m->inactive && !m->throttled && !m->private) {
	593	if (m->reference)
	594	vm_page_activate(m);
	595	else
	596	vm_page_deactivate(m);
	597	}
	598	if (m->overwriting) {
	599	/*
	600	* the (COPY_OUT_FROM == FALSE) request_page_list case
	601	*/
	602	if (m->busy) {
	603	/*
	604	* We do not re-set m->dirty !
	605	* The page was busy so no extraneous activity
	606	* could have occurred. COPY_INTO is a read into the
	607	* new pages. CLEAN_IN_PLACE does actually write
	608	* out the pages but handling outside of this code
	609	* will take care of resetting dirty. We clear the
	610	* modify however for the Programmed I/O case.
	611	*/
	612	pmap_clear_modify(m->phys_page);
	613
	614	m->busy = FALSE;
	615	m->absent = FALSE;
	616	} else {
	617	/*
	618	* alternate (COPY_OUT_FROM == FALSE) request_page_list case
	619	* Occurs when the original page was wired
	620	* at the time of the list request
	621	*/
	622	assert(VM_PAGE_WIRED(m));
	623	vm_page_unwire(m, TRUE); /* reactivates */
	624	}
	625	m->overwriting = FALSE;
	626	} else {
	627	/*
	628	* Set the dirty state according to whether or not the page was
	629	* modified during the pageout. Note that we purposefully do
	630	* NOT call pmap_clear_modify since the page is still mapped.
	631	* If the page were to be dirtied between the 2 calls, this
	632	* this fact would be lost. This code is only necessary to
	633	* maintain statistics, since the pmap module is always
	634	* consulted if m->dirty is false.
	635	*/
	636	#if MACH_CLUSTER_STATS
	637	m->dirty = pmap_is_modified(m->phys_page);
	638
	639	if (m->dirty) vm_pageout_cluster_dirtied++;
	640	else vm_pageout_cluster_cleaned++;
	641	if (m->wanted) vm_pageout_cluster_collisions++;
	642	#else
	643	m->dirty = FALSE;
	644	#endif
	645	}
	646	if (m->encrypted_cleaning == TRUE) {
	647	m->encrypted_cleaning = FALSE;
	648	m->busy = FALSE;
	649	}
	650	m->cleaning = FALSE;
	651
	652	/*
	653	* Wakeup any thread waiting for the page to be un-cleaning.
	654	*/
	655	PAGE_WAKEUP(m);
	656	vm_page_unlock_queues();
	657	}
	658	/*
	659	* Account for the paging reference taken in vm_paging_object_allocate.
	660	*/
	661	vm_object_activity_end(shadow_object);
	662	vm_object_unlock(shadow_object);
	663
	664	assert(object->ref_count == 0);
	665	assert(object->paging_in_progress == 0);
	666	assert(object->activity_in_progress == 0);
	667	assert(object->resident_page_count == 0);
	668	return;
	669	}
	670
	671	/*
	672	* Routine: vm_pageclean_setup
	673	*
	674	* Purpose: setup a page to be cleaned (made non-dirty), but not
	675	* necessarily flushed from the VM page cache.
	676	* This is accomplished by cleaning in place.
	677	*
	678	* The page must not be busy, and new_object
	679	* must be locked.
	680	*
	681	*/
	682	void
	683	vm_pageclean_setup(
	684	vm_page_t m,
	685	vm_page_t new_m,
	686	vm_object_t new_object,
	687	vm_object_offset_t new_offset)
	688	{
	689	assert(!m->busy);
	690	#if 0
	691	assert(!m->cleaning);
	692	#endif
	693
	694	XPR(XPR_VM_PAGEOUT,
	695	"vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
	696	m->object, m->offset, m,
	697	new_m, new_offset);
	698
	699	pmap_clear_modify(m->phys_page);
	700
	701	/*
	702	* Mark original page as cleaning in place.
	703	*/
	704	m->cleaning = TRUE;
	705	SET_PAGE_DIRTY(m, FALSE);
	706	m->precious = FALSE;
	707
	708	/*
	709	* Convert the fictitious page to a private shadow of
	710	* the real page.
	711	*/
	712	assert(new_m->fictitious);
	713	assert(new_m->phys_page == vm_page_fictitious_addr);
	714	new_m->fictitious = FALSE;
	715	new_m->private = TRUE;
	716	new_m->pageout = TRUE;
	717	new_m->phys_page = m->phys_page;
	718
	719	vm_page_lockspin_queues();
	720	vm_page_wire(new_m);
	721	vm_page_unlock_queues();
	722
	723	vm_page_insert(new_m, new_object, new_offset);
	724	assert(!new_m->wanted);
	725	new_m->busy = FALSE;
	726	}
	727
	728	/*
	729	* Routine: vm_pageout_initialize_page
	730	* Purpose:
	731	* Causes the specified page to be initialized in
	732	* the appropriate memory object. This routine is used to push
	733	* pages into a copy-object when they are modified in the
	734	* permanent object.
	735	*
	736	* The page is moved to a temporary object and paged out.
	737	*
	738	* In/out conditions:
	739	* The page in question must not be on any pageout queues.
	740	* The object to which it belongs must be locked.
	741	* The page must be busy, but not hold a paging reference.
	742	*
	743	* Implementation:
	744	* Move this page to a completely new object.
	745	*/
	746	void
	747	vm_pageout_initialize_page(
	748	vm_page_t m)
	749	{
	750	vm_object_t object;
	751	vm_object_offset_t paging_offset;
	752	memory_object_t pager;
	753
	754	XPR(XPR_VM_PAGEOUT,
	755	"vm_pageout_initialize_page, page 0x%X\n",
	756	m, 0, 0, 0, 0);
	757	assert(m->busy);
	758
	759	/*
	760	* Verify that we really want to clean this page
	761	*/
	762	assert(!m->absent);
	763	assert(!m->error);
	764	assert(m->dirty);
	765
	766	/*
	767	* Create a paging reference to let us play with the object.
	768	*/
	769	object = m->object;
	770	paging_offset = m->offset + object->paging_offset;
	771
	772	if (m->absent \|\| m->error \|\| m->restart \|\| (!m->dirty && !m->precious)) {
	773	VM_PAGE_FREE(m);
	774	panic("reservation without pageout?"); /* alan */
	775	vm_object_unlock(object);
	776
	777	return;
	778	}
	779
	780	/*
	781	* If there's no pager, then we can't clean the page. This should
	782	* never happen since this should be a copy object and therefore not
	783	* an external object, so the pager should always be there.
	784	*/
	785
	786	pager = object->pager;
	787
	788	if (pager == MEMORY_OBJECT_NULL) {
	789	VM_PAGE_FREE(m);
	790	panic("missing pager for copy object");
	791	return;
	792	}
	793
	794	/*
	795	* set the page for future call to vm_fault_list_request
	796	*/
	797	pmap_clear_modify(m->phys_page);
	798	SET_PAGE_DIRTY(m, FALSE);
	799	m->pageout = TRUE;
	800
	801	/*
	802	* keep the object from collapsing or terminating
	803	*/
	804	vm_object_paging_begin(object);
	805	vm_object_unlock(object);
	806
	807	/*
	808	* Write the data to its pager.
	809	* Note that the data is passed by naming the new object,
	810	* not a virtual address; the pager interface has been
	811	* manipulated to use the "internal memory" data type.
	812	* [The object reference from its allocation is donated
	813	* to the eventual recipient.]
	814	*/
	815	memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
	816
	817	vm_object_lock(object);
	818	vm_object_paging_end(object);
	819	}
	820
	821	#if MACH_CLUSTER_STATS
	822	#define MAXCLUSTERPAGES 16
	823	struct {
	824	unsigned long pages_in_cluster;
	825	unsigned long pages_at_higher_offsets;
	826	unsigned long pages_at_lower_offsets;
	827	} cluster_stats[MAXCLUSTERPAGES];
	828	#endif /* MACH_CLUSTER_STATS */
	829
	830
	831	/*
	832	* vm_pageout_cluster:
	833	*
	834	* Given a page, queue it to the appropriate I/O thread,
	835	* which will page it out and attempt to clean adjacent pages
	836	* in the same operation.
	837	*
	838	* The object and queues must be locked. We will take a
	839	* paging reference to prevent deallocation or collapse when we
	840	* release the object lock back at the call site. The I/O thread
	841	* is responsible for consuming this reference
	842	*
	843	* The page must not be on any pageout queue.
	844	*/
	845
	846	void
	847	vm_pageout_cluster(vm_page_t m, boolean_t pageout)
	848	{
	849	vm_object_t object = m->object;
	850	struct vm_pageout_queue *q;
	851
	852
	853	XPR(XPR_VM_PAGEOUT,
	854	"vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
	855	object, m->offset, m, 0, 0);
	856
	857	VM_PAGE_CHECK(m);
	858	#if DEBUG
	859	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	860	#endif
	861	vm_object_lock_assert_exclusive(object);
	862
	863	/*
	864	* Only a certain kind of page is appreciated here.
	865	*/
	866	assert((m->dirty \|\| m->precious) && (!VM_PAGE_WIRED(m)));
	867	assert(!m->cleaning && !m->pageout && !m->laundry);
	868	#ifndef CONFIG_FREEZE
	869	assert(!m->inactive && !m->active);
	870	assert(!m->throttled);
	871	#endif
	872
	873	/*
	874	* protect the object from collapse or termination
	875	*/
	876	vm_object_activity_begin(object);
	877
	878	m->pageout = pageout;
	879
	880	if (object->internal == TRUE) {
	881	if (COMPRESSED_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
	882	m->busy = TRUE;
	883
	884	q = &vm_pageout_queue_internal;
	885	} else
	886	q = &vm_pageout_queue_external;
	887
	888	/*
	889	* pgo_laundry count is tied to the laundry bit
	890	*/
	891	m->laundry = TRUE;
	892	q->pgo_laundry++;
	893
	894	m->pageout_queue = TRUE;
	895	queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
	896
	897	if (q->pgo_idle == TRUE) {
	898	q->pgo_idle = FALSE;
	899	thread_wakeup((event_t) &q->pgo_pending);
	900	}
	901	VM_PAGE_CHECK(m);
	902	}
	903
	904
	905	unsigned long vm_pageout_throttle_up_count = 0;
	906
	907	/*
	908	* A page is back from laundry or we are stealing it back from
	909	* the laundering state. See if there are some pages waiting to
	910	* go to laundry and if we can let some of them go now.
	911	*
	912	* Object and page queues must be locked.
	913	*/
	914	void
	915	vm_pageout_throttle_up(
	916	vm_page_t m)
	917	{
	918	struct vm_pageout_queue *q;
	919
	920	assert(m->object != VM_OBJECT_NULL);
	921	assert(m->object != kernel_object);
	922
	923	#if DEBUG
	924	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	925	vm_object_lock_assert_exclusive(m->object);
	926	#endif
	927
	928	vm_pageout_throttle_up_count++;
	929
	930	if (m->object->internal == TRUE)
	931	q = &vm_pageout_queue_internal;
	932	else
	933	q = &vm_pageout_queue_external;
	934
	935	if (m->pageout_queue == TRUE) {
	936
	937	queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
	938	m->pageout_queue = FALSE;
	939
	940	m->pageq.next = NULL;
	941	m->pageq.prev = NULL;
	942
	943	vm_object_activity_end(m->object);
	944	}
	945	if (m->laundry == TRUE) {
	946
	947	m->laundry = FALSE;
	948	q->pgo_laundry--;
	949
	950	if (q->pgo_throttled == TRUE) {
	951	q->pgo_throttled = FALSE;
	952	thread_wakeup((event_t) &q->pgo_laundry);
	953	}
	954	if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
	955	q->pgo_draining = FALSE;
	956	thread_wakeup((event_t) (&q->pgo_laundry+1));
	957	}
	958	}
	959	}
	960
	961
	962	static void
	963	vm_pageout_throttle_up_batch(
	964	struct vm_pageout_queue *q,
	965	int batch_cnt)
	966	{
	967	#if DEBUG
	968	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	969	#endif
	970
	971	vm_pageout_throttle_up_count += batch_cnt;
	972
	973	q->pgo_laundry -= batch_cnt;
	974
	975	if (q->pgo_throttled == TRUE) {
	976	q->pgo_throttled = FALSE;
	977	thread_wakeup((event_t) &q->pgo_laundry);
	978	}
	979	if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
	980	q->pgo_draining = FALSE;
	981	thread_wakeup((event_t) (&q->pgo_laundry+1));
	982	}
	983	}
	984
	985
	986
	987	/*
	988	* VM memory pressure monitoring.
	989	*
	990	* vm_pageout_scan() keeps track of the number of pages it considers and
	991	* reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
	992	*
	993	* compute_memory_pressure() is called every second from compute_averages()
	994	* and moves "vm_pageout_stat_now" forward, to start accumulating the number
	995	* of recalimed pages in a new vm_pageout_stat[] bucket.
	996	*
	997	* mach_vm_pressure_monitor() collects past statistics about memory pressure.
	998	* The caller provides the number of seconds ("nsecs") worth of statistics
	999	* it wants, up to 30 seconds.
	1000	* It computes the number of pages reclaimed in the past "nsecs" seconds and
	1001	* also returns the number of pages the system still needs to reclaim at this
	1002	* moment in time.
	1003	*/
	1004	#define VM_PAGEOUT_STAT_SIZE 31
	1005	struct vm_pageout_stat {
	1006	unsigned int considered;
	1007	unsigned int reclaimed;
	1008	} vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0}, };
	1009	unsigned int vm_pageout_stat_now = 0;
	1010	unsigned int vm_memory_pressure = 0;
	1011
	1012	#define VM_PAGEOUT_STAT_BEFORE(i) \
	1013	(((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
	1014	#define VM_PAGEOUT_STAT_AFTER(i) \
	1015	(((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
	1016
	1017	#if VM_PAGE_BUCKETS_CHECK
	1018	int vm_page_buckets_check_interval = 10; /* in seconds */
	1019	#endif /* VM_PAGE_BUCKETS_CHECK */
	1020
	1021	/*
	1022	* Called from compute_averages().
	1023	*/
	1024	void
	1025	compute_memory_pressure(
	1026	__unused void *arg)
	1027	{
	1028	unsigned int vm_pageout_next;
	1029
	1030	#if VM_PAGE_BUCKETS_CHECK
	1031	/* check the consistency of VM page buckets at regular interval */
	1032	static int counter = 0;
	1033	if ((++counter % vm_page_buckets_check_interval) == 0) {
	1034	vm_page_buckets_check();
	1035	}
	1036	#endif /* VM_PAGE_BUCKETS_CHECK */
	1037
	1038	vm_memory_pressure =
	1039	vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].reclaimed;
	1040
	1041	commpage_set_memory_pressure( vm_memory_pressure );
	1042
	1043	/* move "now" forward */
	1044	vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
	1045	vm_pageout_stats[vm_pageout_next].considered = 0;
	1046	vm_pageout_stats[vm_pageout_next].reclaimed = 0;
	1047	vm_pageout_stat_now = vm_pageout_next;
	1048	}
	1049
	1050
	1051	/*
	1052	* IMPORTANT
	1053	* mach_vm_ctl_page_free_wanted() is called indirectly, via
	1054	* mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
	1055	* it must be safe in the restricted stackshot context. Locks and/or
	1056	* blocking are not allowable.
	1057	*/
	1058	unsigned int
	1059	mach_vm_ctl_page_free_wanted(void)
	1060	{
	1061	unsigned int page_free_target, page_free_count, page_free_wanted;
	1062
	1063	page_free_target = vm_page_free_target;
	1064	page_free_count = vm_page_free_count;
	1065	if (page_free_target > page_free_count) {
	1066	page_free_wanted = page_free_target - page_free_count;
	1067	} else {
	1068	page_free_wanted = 0;
	1069	}
	1070
	1071	return page_free_wanted;
	1072	}
	1073
	1074
	1075	/*
	1076	* IMPORTANT:
	1077	* mach_vm_pressure_monitor() is called when taking a stackshot, with
	1078	* wait_for_pressure FALSE, so that code path must remain safe in the
	1079	* restricted stackshot context. No blocking or locks are allowable.
	1080	* on that code path.
	1081	*/
	1082
	1083	kern_return_t
	1084	mach_vm_pressure_monitor(
	1085	boolean_t wait_for_pressure,
	1086	unsigned int nsecs_monitored,
	1087	unsigned int *pages_reclaimed_p,
	1088	unsigned int *pages_wanted_p)
	1089	{
	1090	wait_result_t wr;
	1091	unsigned int vm_pageout_then, vm_pageout_now;
	1092	unsigned int pages_reclaimed;
	1093
	1094	/*
	1095	* We don't take the vm_page_queue_lock here because we don't want
	1096	* vm_pressure_monitor() to get in the way of the vm_pageout_scan()
	1097	* thread when it's trying to reclaim memory. We don't need fully
	1098	* accurate monitoring anyway...
	1099	*/
	1100
	1101	if (wait_for_pressure) {
	1102	/* wait until there's memory pressure */
	1103	while (vm_page_free_count >= vm_page_free_target) {
	1104	wr = assert_wait((event_t) &vm_page_free_wanted,
	1105	THREAD_INTERRUPTIBLE);
	1106	if (wr == THREAD_WAITING) {
	1107	wr = thread_block(THREAD_CONTINUE_NULL);
	1108	}
	1109	if (wr == THREAD_INTERRUPTED) {
	1110	return KERN_ABORTED;
	1111	}
	1112	if (wr == THREAD_AWAKENED) {
	1113	/*
	1114	* The memory pressure might have already
	1115	* been relieved but let's not block again
	1116	* and let's report that there was memory
	1117	* pressure at some point.
	1118	*/
	1119	break;
	1120	}
	1121	}
	1122	}
	1123
	1124	/* provide the number of pages the system wants to reclaim */
	1125	if (pages_wanted_p != NULL) {
	1126	*pages_wanted_p = mach_vm_ctl_page_free_wanted();
	1127	}
	1128
	1129	if (pages_reclaimed_p == NULL) {
	1130	return KERN_SUCCESS;
	1131	}
	1132
	1133	/* provide number of pages reclaimed in the last "nsecs_monitored" */
	1134	do {
	1135	vm_pageout_now = vm_pageout_stat_now;
	1136	pages_reclaimed = 0;
	1137	for (vm_pageout_then =
	1138	VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
	1139	vm_pageout_then != vm_pageout_now &&
	1140	nsecs_monitored-- != 0;
	1141	vm_pageout_then =
	1142	VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
	1143	pages_reclaimed += vm_pageout_stats[vm_pageout_then].reclaimed;
	1144	}
	1145	} while (vm_pageout_now != vm_pageout_stat_now);
	1146	*pages_reclaimed_p = pages_reclaimed;
	1147
	1148	return KERN_SUCCESS;
	1149	}
	1150
	1151
	1152
	1153	/*
	1154	* function in BSD to apply I/O throttle to the pageout thread
	1155	*/
	1156	extern void vm_pageout_io_throttle(void);
	1157
	1158	/*
	1159	* Page States: Used below to maintain the page state
	1160	* before it's removed from it's Q. This saved state
	1161	* helps us do the right accounting in certain cases
	1162	*/
	1163	#define PAGE_STATE_SPECULATIVE 1
	1164	#define PAGE_STATE_ANONYMOUS 2
	1165	#define PAGE_STATE_INACTIVE 3
	1166	#define PAGE_STATE_INACTIVE_FIRST 4
	1167	#define PAGE_STATE_CLEAN 5
	1168
	1169
	1170	#define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m) \
	1171	MACRO_BEGIN \
	1172	/* \
	1173	* If a "reusable" page somehow made it back into \
	1174	* the active queue, it's been re-used and is not \
	1175	* quite re-usable. \
	1176	* If the VM object was "all_reusable", consider it \
	1177	* as "all re-used" instead of converting it to \
	1178	* "partially re-used", which could be expensive. \
	1179	*/ \
	1180	if ((m)->reusable \|\| \
	1181	(m)->object->all_reusable) { \
	1182	vm_object_reuse_pages((m)->object, \
	1183	(m)->offset, \
	1184	(m)->offset + PAGE_SIZE_64, \
	1185	FALSE); \
	1186	} \
	1187	MACRO_END
	1188
	1189
	1190	#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 64
	1191	#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024
	1192
	1193	#define FCS_IDLE 0
	1194	#define FCS_DELAYED 1
	1195	#define FCS_DEADLOCK_DETECTED 2
	1196
	1197	struct flow_control {
	1198	int state;
	1199	mach_timespec_t ts;
	1200	};
	1201
	1202	uint32_t vm_pageout_considered_page = 0;
	1203	uint32_t vm_page_filecache_min = 0;
	1204
	1205	#define ANONS_GRABBED_LIMIT 2
	1206
	1207	/*
	1208	* vm_pageout_scan does the dirty work for the pageout daemon.
	1209	* It returns with both vm_page_queue_free_lock and vm_page_queue_lock
	1210	* held and vm_page_free_wanted == 0.
	1211	*/
	1212	void
	1213	vm_pageout_scan(void)
	1214	{
	1215	unsigned int loop_count = 0;
	1216	unsigned int inactive_burst_count = 0;
	1217	unsigned int active_burst_count = 0;
	1218	unsigned int reactivated_this_call;
	1219	unsigned int reactivate_limit;
	1220	vm_page_t local_freeq = NULL;
	1221	int local_freed = 0;
	1222	int delayed_unlock;
	1223	int delayed_unlock_limit = 0;
	1224	int refmod_state = 0;
	1225	int vm_pageout_deadlock_target = 0;
	1226	struct vm_pageout_queue *iq;
	1227	struct vm_pageout_queue *eq;
	1228	struct vm_speculative_age_q *sq;
	1229	struct flow_control flow_control = { 0, { 0, 0 } };
	1230	boolean_t inactive_throttled = FALSE;
	1231	boolean_t try_failed;
	1232	mach_timespec_t ts;
	1233	unsigned int msecs = 0;
	1234	vm_object_t object;
	1235	vm_object_t last_object_tried;
	1236	uint32_t catch_up_count = 0;
	1237	uint32_t inactive_reclaim_run;
	1238	boolean_t forced_reclaim;
	1239	boolean_t exceeded_burst_throttle;
	1240	boolean_t grab_anonymous = FALSE;
	1241	boolean_t force_anonymous = FALSE;
	1242	int anons_grabbed = 0;
	1243	int page_prev_state = 0;
	1244	int cache_evict_throttle = 0;
	1245	uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0;
	1246	int force_purge = 0;
	1247
	1248	#if VM_PRESSURE_EVENTS
	1249	vm_pressure_level_t pressure_level;
	1250	#endif /* VM_PRESSURE_EVENTS */
	1251
	1252	VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
	1253	vm_pageout_speculative_clean, vm_pageout_inactive_clean,
	1254	vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
	1255
	1256	flow_control.state = FCS_IDLE;
	1257	iq = &vm_pageout_queue_internal;
	1258	eq = &vm_pageout_queue_external;
	1259	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
	1260
	1261
	1262	XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
	1263
	1264
	1265	vm_page_lock_queues();
	1266	delayed_unlock = 1; /* must be nonzero if Qs are locked, 0 if unlocked */
	1267
	1268	/*
	1269	* Calculate the max number of referenced pages on the inactive
	1270	* queue that we will reactivate.
	1271	*/
	1272	reactivated_this_call = 0;
	1273	reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
	1274	vm_page_inactive_count);
	1275	inactive_reclaim_run = 0;
	1276
	1277	vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
	1278
	1279	/*
	1280	* We want to gradually dribble pages from the active queue
	1281	* to the inactive queue. If we let the inactive queue get
	1282	* very small, and then suddenly dump many pages into it,
	1283	* those pages won't get a sufficient chance to be referenced
	1284	* before we start taking them from the inactive queue.
	1285	*
	1286	* We must limit the rate at which we send pages to the pagers
	1287	* so that we don't tie up too many pages in the I/O queues.
	1288	* We implement a throttling mechanism using the laundry count
	1289	* to limit the number of pages outstanding to the default
	1290	* and external pagers. We can bypass the throttles and look
	1291	* for clean pages if the pageout queues don't drain in a timely
	1292	* fashion since this may indicate that the pageout paths are
	1293	* stalled waiting for memory, which only we can provide.
	1294	*/
	1295
	1296
	1297	Restart:
	1298	assert(delayed_unlock!=0);
	1299
	1300	/*
	1301	* Recalculate vm_page_inactivate_target.
	1302	*/
	1303	vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
	1304	vm_page_inactive_count +
	1305	vm_page_speculative_count);
	1306
	1307	vm_page_anonymous_min = vm_page_inactive_target / 20;
	1308
	1309
	1310	/*
	1311	* don't want to wake the pageout_scan thread up everytime we fall below
	1312	* the targets... set a low water mark at 0.25% below the target
	1313	*/
	1314	vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400);
	1315
	1316	if (vm_page_speculative_percentage > 50)
	1317	vm_page_speculative_percentage = 50;
	1318	else if (vm_page_speculative_percentage <= 0)
	1319	vm_page_speculative_percentage = 1;
	1320
	1321	vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
	1322	vm_page_inactive_count);
	1323
	1324	object = NULL;
	1325	last_object_tried = NULL;
	1326	try_failed = FALSE;
	1327
	1328	if ((vm_page_inactive_count + vm_page_speculative_count) < VM_PAGE_INACTIVE_HEALTHY_LIMIT(vm_page_active_count))
	1329	catch_up_count = vm_page_inactive_count + vm_page_speculative_count;
	1330	else
	1331	catch_up_count = 0;
	1332
	1333	for (;;) {
	1334	vm_page_t m;
	1335
	1336	DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
	1337
	1338	if (delayed_unlock == 0) {
	1339	vm_page_lock_queues();
	1340	delayed_unlock = 1;
	1341	}
	1342	if (vm_upl_wait_for_pages < 0)
	1343	vm_upl_wait_for_pages = 0;
	1344
	1345	delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
	1346
	1347	if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX)
	1348	delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
	1349
	1350	/*
	1351	* Move pages from active to inactive if we're below the target
	1352	*/
	1353	/* if we are trying to make clean, we need to make sure we actually have inactive - mj */
	1354	if ((vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target)
	1355	goto done_moving_active_pages;
	1356
	1357	if (object != NULL) {
	1358	vm_object_unlock(object);
	1359	object = NULL;
	1360	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	1361	}
	1362	/*
	1363	* Don't sweep through active queue more than the throttle
	1364	* which should be kept relatively low
	1365	*/
	1366	active_burst_count = MIN(vm_pageout_burst_active_throttle, vm_page_active_count);
	1367
	1368	VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_START,
	1369	vm_pageout_inactive, vm_pageout_inactive_used, vm_page_free_count, local_freed);
	1370
	1371	VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_NONE,
	1372	vm_pageout_speculative_clean, vm_pageout_inactive_clean,
	1373	vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
	1374	memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_START);
	1375
	1376
	1377	while (!queue_empty(&vm_page_queue_active) && active_burst_count--) {
	1378
	1379	vm_pageout_active++;
	1380
	1381	m = (vm_page_t) queue_first(&vm_page_queue_active);
	1382
	1383	assert(m->active && !m->inactive);
	1384	assert(!m->laundry);
	1385	assert(m->object != kernel_object);
	1386	assert(m->phys_page != vm_page_guard_addr);
	1387
	1388	DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
	1389
	1390	/*
	1391	* by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
	1392	*
	1393	* a TLB flush isn't really needed here since at worst we'll miss the reference bit being
	1394	* updated in the PTE if a remote processor still has this mapping cached in its TLB when the
	1395	* new reference happens. If no futher references happen on the page after that remote TLB flushes
	1396	* we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
	1397	* by pageout_scan, which is just fine since the last reference would have happened quite far
	1398	* in the past (TLB caches don't hang around for very long), and of course could just as easily
	1399	* have happened before we moved the page
	1400	*/
	1401	pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
	1402
	1403	/*
	1404	* The page might be absent or busy,
	1405	* but vm_page_deactivate can handle that.
	1406	* FALSE indicates that we don't want a H/W clear reference
	1407	*/
	1408	vm_page_deactivate_internal(m, FALSE);
	1409
	1410	if (delayed_unlock++ > delayed_unlock_limit) {
	1411
	1412	if (local_freeq) {
	1413	vm_page_unlock_queues();
	1414
	1415	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
	1416	vm_page_free_count, local_freed, delayed_unlock_limit, 1);
	1417
	1418	vm_page_free_list(local_freeq, TRUE);
	1419
	1420	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
	1421	vm_page_free_count, 0, 0, 1);
	1422
	1423	local_freeq = NULL;
	1424	local_freed = 0;
	1425	vm_page_lock_queues();
	1426	} else {
	1427	lck_mtx_yield(&vm_page_queue_lock);
	1428	}
	1429
	1430	delayed_unlock = 1;
	1431
	1432	/*
	1433	* continue the while loop processing
	1434	* the active queue... need to hold
	1435	* the page queues lock
	1436	*/
	1437	}
	1438	}
	1439
	1440	VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_END,
	1441	vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count, vm_page_inactive_target);
	1442	memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_END);
	1443
	1444	/**********************************************************************
	1445	* above this point we're playing with the active queue
	1446	* below this point we're playing with the throttling mechanisms
	1447	* and the inactive queue
	1448	**********************************************************************/
	1449
	1450	done_moving_active_pages:
	1451
	1452	if (vm_page_free_count + local_freed >= vm_page_free_target) {
	1453	if (object != NULL) {
	1454	vm_object_unlock(object);
	1455	object = NULL;
	1456	}
	1457	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	1458
	1459	if (local_freeq) {
	1460	vm_page_unlock_queues();
	1461
	1462	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
	1463	vm_page_free_count, local_freed, delayed_unlock_limit, 2);
	1464
	1465	vm_page_free_list(local_freeq, TRUE);
	1466
	1467	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
	1468	vm_page_free_count, local_freed, 0, 2);
	1469
	1470	local_freeq = NULL;
	1471	local_freed = 0;
	1472	vm_page_lock_queues();
	1473	}
	1474	/*
	1475	* make sure the pageout I/O threads are running
	1476	* throttled in case there are still requests
	1477	* in the laundry... since we have met our targets
	1478	* we don't need the laundry to be cleaned in a timely
	1479	* fashion... so let's avoid interfering with foreground
	1480	* activity
	1481	*/
	1482	vm_pageout_adjust_io_throttles(iq, eq, TRUE);
	1483
	1484	/*
	1485	* recalculate vm_page_inactivate_target
	1486	*/
	1487	vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
	1488	vm_page_inactive_count +
	1489	vm_page_speculative_count);
	1490	if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) &&
	1491	!queue_empty(&vm_page_queue_active)) {
	1492	/*
	1493	* inactive target still not met... keep going
	1494	* until we get the queues balanced...
	1495	*/
	1496	continue;
	1497	}
	1498	lck_mtx_lock(&vm_page_queue_free_lock);
	1499
	1500	if ((vm_page_free_count >= vm_page_free_target) &&
	1501	(vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
	1502	/*
	1503	* done - we have met our target and
	1504	* there is no one waiting for a page.
	1505	*/
	1506	return_from_scan:
	1507	assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
	1508
	1509	VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
	1510	vm_pageout_inactive, vm_pageout_inactive_used, 0, 0);
	1511	VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
	1512	vm_pageout_speculative_clean, vm_pageout_inactive_clean,
	1513	vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
	1514
	1515	return;
	1516	}
	1517	lck_mtx_unlock(&vm_page_queue_free_lock);
	1518	}
	1519
	1520	/*
	1521	* Before anything, we check if we have any ripe volatile
	1522	* objects around. If so, try to purge the first object.
	1523	* If the purge fails, fall through to reclaim a page instead.
	1524	* If the purge succeeds, go back to the top and reevalute
	1525	* the new memory situation.
	1526	*/
	1527
	1528	assert (available_for_purge>=0);
	1529	force_purge = 0; /* no force-purging */
	1530
	1531	#if VM_PRESSURE_EVENTS
	1532	pressure_level = memorystatus_vm_pressure_level;
	1533
	1534	if (pressure_level > kVMPressureNormal) {
	1535
	1536	if (pressure_level >= kVMPressureCritical) {
	1537	force_purge = memorystatus_purge_on_critical;
	1538	} else if (pressure_level >= kVMPressureUrgent) {
	1539	force_purge = memorystatus_purge_on_urgent;
	1540	} else if (pressure_level >= kVMPressureWarning) {
	1541	force_purge = memorystatus_purge_on_warning;
	1542	}
	1543	}
	1544	#endif /* VM_PRESSURE_EVENTS */
	1545
	1546	if (available_for_purge \|\| force_purge) {
	1547
	1548	if (object != NULL) {
	1549	vm_object_unlock(object);
	1550	object = NULL;
	1551	}
	1552
	1553	memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
	1554
	1555	VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
	1556	if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
	1557
	1558	VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
	1559	memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
	1560	continue;
	1561	}
	1562	VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
	1563	memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
	1564	}
	1565
	1566	if (queue_empty(&sq->age_q) && vm_page_speculative_count) {
	1567	/*
	1568	* try to pull pages from the aging bins...
	1569	* see vm_page.h for an explanation of how
	1570	* this mechanism works
	1571	*/
	1572	struct vm_speculative_age_q *aq;
	1573	mach_timespec_t ts_fully_aged;
	1574	boolean_t can_steal = FALSE;
	1575	int num_scanned_queues;
	1576
	1577	aq = &vm_page_queue_speculative[speculative_steal_index];
	1578
	1579	num_scanned_queues = 0;
	1580	while (queue_empty(&aq->age_q) &&
	1581	num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
	1582
	1583	speculative_steal_index++;
	1584
	1585	if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
	1586	speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
	1587
	1588	aq = &vm_page_queue_speculative[speculative_steal_index];
	1589	}
	1590
	1591	if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
	1592	/*
	1593	* XXX We've scanned all the speculative
	1594	* queues but still haven't found one
	1595	* that is not empty, even though
	1596	* vm_page_speculative_count is not 0.
	1597	*
	1598	* report the anomaly...
	1599	*/
	1600	printf("vm_pageout_scan: "
	1601	"all speculative queues empty "
	1602	"but count=%d. Re-adjusting.\n",
	1603	vm_page_speculative_count);
	1604	if (vm_page_speculative_count > vm_page_speculative_count_drift_max)
	1605	vm_page_speculative_count_drift_max = vm_page_speculative_count;
	1606	vm_page_speculative_count_drifts++;
	1607	#if 6553678
	1608	Debugger("vm_pageout_scan: no speculative pages");
	1609	#endif
	1610	/* readjust... */
	1611	vm_page_speculative_count = 0;
	1612	/* ... and continue */
	1613	continue;
	1614	}
	1615
	1616	if (vm_page_speculative_count > vm_page_speculative_target)
	1617	can_steal = TRUE;
	1618	else {
	1619	ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) / 1000;
	1620	ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) % 1000)
	1621	* 1000 * NSEC_PER_USEC;
	1622
	1623	ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
	1624
	1625	clock_sec_t sec;
	1626	clock_nsec_t nsec;
	1627	clock_get_system_nanotime(&sec, &nsec);
	1628	ts.tv_sec = (unsigned int) sec;
	1629	ts.tv_nsec = nsec;
	1630
	1631	if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
	1632	can_steal = TRUE;
	1633	}
	1634	if (can_steal == TRUE)
	1635	vm_page_speculate_ageit(aq);
	1636	}
	1637	if (queue_empty(&sq->age_q) && cache_evict_throttle == 0) {
	1638	int pages_evicted;
	1639
	1640	if (object != NULL) {
	1641	vm_object_unlock(object);
	1642	object = NULL;
	1643	}
	1644	pages_evicted = vm_object_cache_evict(100, 10);
	1645
	1646	if (pages_evicted) {
	1647
	1648	vm_pageout_cache_evicted += pages_evicted;
	1649
	1650	VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
	1651	vm_page_free_count, pages_evicted, vm_pageout_cache_evicted, 0);
	1652	memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
	1653
	1654	/*
	1655	* we just freed up to 100 pages,
	1656	* so go back to the top of the main loop
	1657	* and re-evaulate the memory situation
	1658	*/
	1659	continue;
	1660	} else
	1661	cache_evict_throttle = 100;
	1662	}
	1663	if (cache_evict_throttle)
	1664	cache_evict_throttle--;
	1665
	1666	/*
	1667	* don't let the filecache_min fall below 33% of available memory...
	1668	*
	1669	* on systems w/o the compressor/swapper, the filecache is always
	1670	* a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
	1671	* since most (if not all) of the anonymous pages are in the
	1672	* throttled queue (which isn't counted as available) which
	1673	* effectively disables this filter
	1674	*/
	1675	vm_page_filecache_min = (AVAILABLE_NON_COMPRESSED_MEMORY / 3);
	1676
	1677	exceeded_burst_throttle = FALSE;
	1678	/*
	1679	* Sometimes we have to pause:
	1680	* 1) No inactive pages - nothing to do.
	1681	* 2) Loop control - no acceptable pages found on the inactive queue
	1682	* within the last vm_pageout_burst_inactive_throttle iterations
	1683	* 3) Flow control - default pageout queue is full
	1684	*/
	1685	if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_anonymous) && queue_empty(&sq->age_q)) {
	1686	vm_pageout_scan_empty_throttle++;
	1687	msecs = vm_pageout_empty_wait;
	1688	goto vm_pageout_scan_delay;
	1689
	1690	} else if (inactive_burst_count >=
	1691	MIN(vm_pageout_burst_inactive_throttle,
	1692	(vm_page_inactive_count +
	1693	vm_page_speculative_count))) {
	1694	vm_pageout_scan_burst_throttle++;
	1695	msecs = vm_pageout_burst_wait;
	1696
	1697	exceeded_burst_throttle = TRUE;
	1698	goto vm_pageout_scan_delay;
	1699
	1700	} else if (vm_page_free_count > (vm_page_free_reserved / 4) &&
	1701	VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
	1702	vm_pageout_scan_swap_throttle++;
	1703	msecs = vm_pageout_swap_wait;
	1704	goto vm_pageout_scan_delay;
	1705
	1706	} else if (VM_PAGE_Q_THROTTLED(iq) &&
	1707	VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) {
	1708	clock_sec_t sec;
	1709	clock_nsec_t nsec;
	1710
	1711	switch (flow_control.state) {
	1712
	1713	case FCS_IDLE:
	1714	if ((vm_page_free_count + local_freed) < vm_page_free_target) {
	1715
	1716	if (vm_page_pageable_external_count > vm_page_filecache_min && !queue_empty(&vm_page_queue_inactive)) {
	1717	anons_grabbed = ANONS_GRABBED_LIMIT;
	1718	goto consider_inactive;
	1719	}
	1720	if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && vm_page_active_count)
	1721	continue;
	1722	}
	1723	reset_deadlock_timer:
	1724	ts.tv_sec = vm_pageout_deadlock_wait / 1000;
	1725	ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
	1726	clock_get_system_nanotime(&sec, &nsec);
	1727	flow_control.ts.tv_sec = (unsigned int) sec;
	1728	flow_control.ts.tv_nsec = nsec;
	1729	ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
	1730
	1731	flow_control.state = FCS_DELAYED;
	1732	msecs = vm_pageout_deadlock_wait;
	1733
	1734	break;
	1735
	1736	case FCS_DELAYED:
	1737	clock_get_system_nanotime(&sec, &nsec);
	1738	ts.tv_sec = (unsigned int) sec;
	1739	ts.tv_nsec = nsec;
	1740
	1741	if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
	1742	/*
	1743	* the pageout thread for the default pager is potentially
	1744	* deadlocked since the
	1745	* default pager queue has been throttled for more than the
	1746	* allowable time... we need to move some clean pages or dirty
	1747	* pages belonging to the external pagers if they aren't throttled
	1748	* vm_page_free_wanted represents the number of threads currently
	1749	* blocked waiting for pages... we'll move one page for each of
	1750	* these plus a fixed amount to break the logjam... once we're done
	1751	* moving this number of pages, we'll re-enter the FSC_DELAYED state
	1752	* with a new timeout target since we have no way of knowing
	1753	* whether we've broken the deadlock except through observation
	1754	* of the queue associated with the default pager... we need to
	1755	* stop moving pages and allow the system to run to see what
	1756	* state it settles into.
	1757	*/
	1758	vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged;
	1759	vm_pageout_scan_deadlock_detected++;
	1760	flow_control.state = FCS_DEADLOCK_DETECTED;
	1761	thread_wakeup((event_t) &vm_pageout_garbage_collect);
	1762	goto consider_inactive;
	1763	}
	1764	/*
	1765	* just resniff instead of trying
	1766	* to compute a new delay time... we're going to be
	1767	* awakened immediately upon a laundry completion,
	1768	* so we won't wait any longer than necessary
	1769	*/
	1770	msecs = vm_pageout_idle_wait;
	1771	break;
	1772
	1773	case FCS_DEADLOCK_DETECTED:
	1774	if (vm_pageout_deadlock_target)
	1775	goto consider_inactive;
	1776	goto reset_deadlock_timer;
	1777
	1778	}
	1779	vm_pageout_scan_delay:
	1780	if (object != NULL) {
	1781	vm_object_unlock(object);
	1782	object = NULL;
	1783	}
	1784	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	1785
	1786	vm_page_unlock_queues();
	1787
	1788	if (local_freeq) {
	1789
	1790	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
	1791	vm_page_free_count, local_freed, delayed_unlock_limit, 3);
	1792
	1793	vm_page_free_list(local_freeq, TRUE);
	1794
	1795	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
	1796	vm_page_free_count, local_freed, 0, 3);
	1797
	1798	local_freeq = NULL;
	1799	local_freed = 0;
	1800	}
	1801	if (COMPRESSED_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
	1802	vm_consider_waking_compactor_swapper();
	1803
	1804	vm_page_lock_queues();
	1805
	1806	if (flow_control.state == FCS_DELAYED &&
	1807	!VM_PAGE_Q_THROTTLED(iq)) {
	1808	flow_control.state = FCS_IDLE;
	1809	goto consider_inactive;
	1810	}
	1811
	1812	if (vm_page_free_count >= vm_page_free_target) {
	1813	/*
	1814	* we're here because
	1815	* 1) someone else freed up some pages while we had
	1816	* the queues unlocked above
	1817	* and we've hit one of the 3 conditions that
	1818	* cause us to pause the pageout scan thread
	1819	*
	1820	* since we already have enough free pages,
	1821	* let's avoid stalling and return normally
	1822	*
	1823	* before we return, make sure the pageout I/O threads
	1824	* are running throttled in case there are still requests
	1825	* in the laundry... since we have enough free pages
	1826	* we don't need the laundry to be cleaned in a timely
	1827	* fashion... so let's avoid interfering with foreground
	1828	* activity
	1829	*
	1830	* we don't want to hold vm_page_queue_free_lock when
	1831	* calling vm_pageout_adjust_io_throttles (since it
	1832	* may cause other locks to be taken), we do the intitial
	1833	* check outside of the lock. Once we take the lock,
	1834	* we recheck the condition since it may have changed.
	1835	* if it has, no problem, we will make the threads
	1836	* non-throttled before actually blocking
	1837	*/
	1838	vm_pageout_adjust_io_throttles(iq, eq, TRUE);
	1839	}
	1840	lck_mtx_lock(&vm_page_queue_free_lock);
	1841
	1842	if (vm_page_free_count >= vm_page_free_target &&
	1843	(vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
	1844	goto return_from_scan;
	1845	}
	1846	lck_mtx_unlock(&vm_page_queue_free_lock);
	1847
	1848	if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
	1849	/*
	1850	* we're most likely about to block due to one of
	1851	* the 3 conditions that cause vm_pageout_scan to
	1852	* not be able to make forward progress w/r
	1853	* to providing new pages to the free queue,
	1854	* so unthrottle the I/O threads in case we
	1855	* have laundry to be cleaned... it needs
	1856	* to be completed ASAP.
	1857	*
	1858	* even if we don't block, we want the io threads
	1859	* running unthrottled since the sum of free +
	1860	* clean pages is still under our free target
	1861	*/
	1862	vm_pageout_adjust_io_throttles(iq, eq, FALSE);
	1863	}
	1864	if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
	1865	/*
	1866	* if we get here we're below our free target and
	1867	* we're stalling due to a full laundry queue or
	1868	* we don't have any inactive pages other then
	1869	* those in the clean queue...
	1870	* however, we have pages on the clean queue that
	1871	* can be moved to the free queue, so let's not
	1872	* stall the pageout scan
	1873	*/
	1874	flow_control.state = FCS_IDLE;
	1875	goto consider_inactive;
	1876	}
	1877	VM_CHECK_MEMORYSTATUS;
	1878
	1879	if (flow_control.state != FCS_IDLE)
	1880	vm_pageout_scan_throttle++;
	1881	iq->pgo_throttled = TRUE;
	1882
	1883	if (COMPRESSED_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
	1884	vm_consider_waking_compactor_swapper();
	1885
	1886	assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
	1887	counter(c_vm_pageout_scan_block++);
	1888
	1889	vm_page_unlock_queues();
	1890
	1891	assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
	1892
	1893	VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
	1894	iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
	1895	memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
	1896
	1897	thread_block(THREAD_CONTINUE_NULL);
	1898
	1899	VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
	1900	iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
	1901	memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
	1902
	1903	vm_page_lock_queues();
	1904	delayed_unlock = 1;
	1905
	1906	iq->pgo_throttled = FALSE;
	1907
	1908	if (loop_count >= vm_page_inactive_count)
	1909	loop_count = 0;
	1910	inactive_burst_count = 0;
	1911
	1912	goto Restart;
	1913	/NOTREACHED/
	1914	}
	1915
	1916
	1917	flow_control.state = FCS_IDLE;
	1918	consider_inactive:
	1919	vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count),
	1920	vm_pageout_inactive_external_forced_reactivate_limit);
	1921	loop_count++;
	1922	inactive_burst_count++;
	1923	vm_pageout_inactive++;
	1924
	1925
	1926	/*
	1927	* Choose a victim.
	1928	*/
	1929	while (1) {
	1930	m = NULL;
	1931
	1932	if (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) {
	1933	assert(vm_page_throttled_count == 0);
	1934	assert(queue_empty(&vm_page_queue_throttled));
	1935	}
	1936	/*
	1937	* The most eligible pages are ones we paged in speculatively,
	1938	* but which have not yet been touched.
	1939	*/
	1940	if (!queue_empty(&sq->age_q) && force_anonymous == FALSE) {
	1941	m = (vm_page_t) queue_first(&sq->age_q);
	1942
	1943	page_prev_state = PAGE_STATE_SPECULATIVE;
	1944
	1945	break;
	1946	}
	1947	/*
	1948	* Try a clean-queue inactive page.
	1949	*/
	1950	if (!queue_empty(&vm_page_queue_cleaned)) {
	1951	m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
	1952
	1953	page_prev_state = PAGE_STATE_CLEAN;
	1954
	1955	break;
	1956	}
	1957
	1958	grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
	1959
	1960	if (vm_page_pageable_external_count < vm_page_filecache_min \|\| force_anonymous == TRUE) {
	1961	grab_anonymous = TRUE;
	1962	anons_grabbed = 0;
	1963	}
	1964
	1965	if (grab_anonymous == FALSE \|\| anons_grabbed >= ANONS_GRABBED_LIMIT \|\| queue_empty(&vm_page_queue_anonymous)) {
	1966
	1967	if ( !queue_empty(&vm_page_queue_inactive) ) {
	1968	m = (vm_page_t) queue_first(&vm_page_queue_inactive);
	1969
	1970	page_prev_state = PAGE_STATE_INACTIVE;
	1971	anons_grabbed = 0;
	1972
	1973	if (vm_page_pageable_external_count < vm_page_filecache_min) {
	1974	if ((++reactivated_this_call % 100))
	1975	goto must_activate_page;
	1976	/*
	1977	* steal 1% of the file backed pages even if
	1978	* we are under the limit that has been set
	1979	* for a healthy filecache
	1980	*/
	1981	}
	1982	break;
	1983	}
	1984	}
	1985	if ( !queue_empty(&vm_page_queue_anonymous) ) {
	1986	m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
	1987
	1988	page_prev_state = PAGE_STATE_ANONYMOUS;
	1989	anons_grabbed++;
	1990
	1991	break;
	1992	}
	1993
	1994	/*
	1995	* if we've gotten here, we have no victim page.
	1996	* if making clean, free the local freed list and return.
	1997	* if making free, check to see if we've finished balancing the queues
	1998	* yet, if we haven't just continue, else panic
	1999	*/
	2000	vm_page_unlock_queues();
	2001
	2002	if (object != NULL) {
	2003	vm_object_unlock(object);
	2004	object = NULL;
	2005	}
	2006	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	2007
	2008	if (local_freeq) {
	2009	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
	2010	vm_page_free_count, local_freed, delayed_unlock_limit, 5);
	2011
	2012	vm_page_free_list(local_freeq, TRUE);
	2013
	2014	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
	2015	vm_page_free_count, local_freed, 0, 5);
	2016
	2017	local_freeq = NULL;
	2018	local_freed = 0;
	2019	}
	2020	vm_page_lock_queues();
	2021	delayed_unlock = 1;
	2022
	2023	force_anonymous = FALSE;
	2024
	2025	if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target)
	2026	goto Restart;
	2027
	2028	if (!queue_empty(&sq->age_q))
	2029	goto Restart;
	2030
	2031	panic("vm_pageout: no victim");
	2032
	2033	/* NOTREACHED */
	2034	}
	2035	force_anonymous = FALSE;
	2036
	2037	/*
	2038	* we just found this page on one of our queues...
	2039	* it can't also be on the pageout queue, so safe
	2040	* to call VM_PAGE_QUEUES_REMOVE
	2041	*/
	2042	assert(!m->pageout_queue);
	2043
	2044	VM_PAGE_QUEUES_REMOVE(m);
	2045
	2046	assert(!m->laundry);
	2047	assert(!m->private);
	2048	assert(!m->fictitious);
	2049	assert(m->object != kernel_object);
	2050	assert(m->phys_page != vm_page_guard_addr);
	2051
	2052
	2053	if (page_prev_state != PAGE_STATE_SPECULATIVE)
	2054	vm_pageout_stats[vm_pageout_stat_now].considered++;
	2055
	2056	DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
	2057
	2058	/*
	2059	* check to see if we currently are working
	2060	* with the same object... if so, we've
	2061	* already got the lock
	2062	*/
	2063	if (m->object != object) {
	2064	/*
	2065	* the object associated with candidate page is
	2066	* different from the one we were just working
	2067	* with... dump the lock if we still own it
	2068	*/
	2069	if (object != NULL) {
	2070	vm_object_unlock(object);
	2071	object = NULL;
	2072	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	2073	}
	2074	/*
	2075	* Try to lock object; since we've alread got the
	2076	* page queues lock, we can only 'try' for this one.
	2077	* if the 'try' fails, we need to do a mutex_pause
	2078	* to allow the owner of the object lock a chance to
	2079	* run... otherwise, we're likely to trip over this
	2080	* object in the same state as we work our way through
	2081	* the queue... clumps of pages associated with the same
	2082	* object are fairly typical on the inactive and active queues
	2083	*/
	2084	if (!vm_object_lock_try_scan(m->object)) {
	2085	vm_page_t m_want = NULL;
	2086
	2087	vm_pageout_inactive_nolock++;
	2088
	2089	if (page_prev_state == PAGE_STATE_CLEAN)
	2090	vm_pageout_cleaned_nolock++;
	2091
	2092	if (page_prev_state == PAGE_STATE_SPECULATIVE)
	2093	page_prev_state = PAGE_STATE_INACTIVE_FIRST;
	2094
	2095	pmap_clear_reference(m->phys_page);
	2096	m->reference = FALSE;
	2097
	2098	/*
	2099	* m->object must be stable since we hold the page queues lock...
	2100	* we can update the scan_collisions field sans the object lock
	2101	* since it is a separate field and this is the only spot that does
	2102	* a read-modify-write operation and it is never executed concurrently...
	2103	* we can asynchronously set this field to 0 when creating a UPL, so it
	2104	* is possible for the value to be a bit non-determistic, but that's ok
	2105	* since it's only used as a hint
	2106	*/
	2107	m->object->scan_collisions++;
	2108
	2109	if ( !queue_empty(&sq->age_q) )
	2110	m_want = (vm_page_t) queue_first(&sq->age_q);
	2111	else if ( !queue_empty(&vm_page_queue_cleaned))
	2112	m_want = (vm_page_t) queue_first(&vm_page_queue_cleaned);
	2113	else if (anons_grabbed >= ANONS_GRABBED_LIMIT \|\| queue_empty(&vm_page_queue_anonymous))
	2114	m_want = (vm_page_t) queue_first(&vm_page_queue_inactive);
	2115	else if ( !queue_empty(&vm_page_queue_anonymous))
	2116	m_want = (vm_page_t) queue_first(&vm_page_queue_anonymous);
	2117
	2118	/*
	2119	* this is the next object we're going to be interested in
	2120	* try to make sure its available after the mutex_yield
	2121	* returns control
	2122	*/
	2123	if (m_want)
	2124	vm_pageout_scan_wants_object = m_want->object;
	2125
	2126	/*
	2127	* force us to dump any collected free pages
	2128	* and to pause before moving on
	2129	*/
	2130	try_failed = TRUE;
	2131
	2132	goto requeue_page;
	2133	}
	2134	object = m->object;
	2135	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	2136
	2137	try_failed = FALSE;
	2138	}
	2139	if (catch_up_count)
	2140	catch_up_count--;
	2141
	2142	if (m->busy) {
	2143	if (m->encrypted_cleaning) {
	2144	/*
	2145	* ENCRYPTED SWAP:
	2146	* if this page has already been picked up as
	2147	* part of a page-out cluster, it will be busy
	2148	* because it is being encrypted (see
	2149	* vm_object_upl_request()). But we still
	2150	* want to demote it from "clean-in-place"
	2151	* (aka "adjacent") to "clean-and-free" (aka
	2152	* "target"), so let's ignore its "busy" bit
	2153	* here and proceed to check for "cleaning" a
	2154	* little bit below...
	2155	*
	2156	* CAUTION CAUTION:
	2157	* A "busy" page should still be left alone for
	2158	* most purposes, so we have to be very careful
	2159	* not to process that page too much.
	2160	*/
	2161	assert(m->cleaning);
	2162	goto consider_inactive_page;
	2163	}
	2164
	2165	/*
	2166	* Somebody is already playing with this page.
	2167	* Put it back on the appropriate queue
	2168	*
	2169	*/
	2170	vm_pageout_inactive_busy++;
	2171
	2172	if (page_prev_state == PAGE_STATE_CLEAN)
	2173	vm_pageout_cleaned_busy++;
	2174
	2175	requeue_page:
	2176	switch (page_prev_state) {
	2177
	2178	case PAGE_STATE_SPECULATIVE:
	2179	vm_page_speculate(m, FALSE);
	2180	break;
	2181
	2182	case PAGE_STATE_ANONYMOUS:
	2183	case PAGE_STATE_CLEAN:
	2184	case PAGE_STATE_INACTIVE:
	2185	VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
	2186	break;
	2187
	2188	case PAGE_STATE_INACTIVE_FIRST:
	2189	VM_PAGE_ENQUEUE_INACTIVE(m, TRUE);
	2190	break;
	2191	}
	2192	goto done_with_inactivepage;
	2193	}
	2194
	2195
	2196	/*
	2197	* If it's absent, in error or the object is no longer alive,
	2198	* we can reclaim the page... in the no longer alive case,
	2199	* there are 2 states the page can be in that preclude us
	2200	* from reclaiming it - busy or cleaning - that we've already
	2201	* dealt with
	2202	*/
	2203	if (m->absent \|\| m->error \|\| !object->alive) {
	2204
	2205	if (m->absent)
	2206	vm_pageout_inactive_absent++;
	2207	else if (!object->alive)
	2208	vm_pageout_inactive_notalive++;
	2209	else
	2210	vm_pageout_inactive_error++;
	2211	reclaim_page:
	2212	if (vm_pageout_deadlock_target) {
	2213	vm_pageout_scan_inactive_throttle_success++;
	2214	vm_pageout_deadlock_target--;
	2215	}
	2216
	2217	DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
	2218
	2219	if (object->internal) {
	2220	DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
	2221	} else {
	2222	DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
	2223	}
	2224	assert(!m->cleaning);
	2225	assert(!m->laundry);
	2226
	2227	m->busy = TRUE;
	2228
	2229	/*
	2230	* remove page from object here since we're already
	2231	* behind the object lock... defer the rest of the work
	2232	* we'd normally do in vm_page_free_prepare_object
	2233	* until 'vm_page_free_list' is called
	2234	*/
	2235	if (m->tabled)
	2236	vm_page_remove(m, TRUE);
	2237
	2238	assert(m->pageq.next == NULL &&
	2239	m->pageq.prev == NULL);
	2240	m->pageq.next = (queue_entry_t)local_freeq;
	2241	local_freeq = m;
	2242	local_freed++;
	2243
	2244	if (page_prev_state == PAGE_STATE_SPECULATIVE)
	2245	vm_pageout_freed_from_speculative++;
	2246	else if (page_prev_state == PAGE_STATE_CLEAN)
	2247	vm_pageout_freed_from_cleaned++;
	2248	else
	2249	vm_pageout_freed_from_inactive_clean++;
	2250
	2251	if (page_prev_state != PAGE_STATE_SPECULATIVE)
	2252	vm_pageout_stats[vm_pageout_stat_now].reclaimed++;
	2253
	2254	inactive_burst_count = 0;
	2255	goto done_with_inactivepage;
	2256	}
	2257	/*
	2258	* If the object is empty, the page must be reclaimed even
	2259	* if dirty or used.
	2260	* If the page belongs to a volatile object, we stick it back
	2261	* on.
	2262	*/
	2263	if (object->copy == VM_OBJECT_NULL) {
	2264	if (object->purgable == VM_PURGABLE_EMPTY) {
	2265	if (m->pmapped == TRUE) {
	2266	/* unmap the page */
	2267	refmod_state = pmap_disconnect(m->phys_page);
	2268	if (refmod_state & VM_MEM_MODIFIED) {
	2269	SET_PAGE_DIRTY(m, FALSE);
	2270	}
	2271	}
	2272	if (m->dirty \|\| m->precious) {
	2273	/* we saved the cost of cleaning this page ! */
	2274	vm_page_purged_count++;
	2275	}
	2276	goto reclaim_page;
	2277	}
	2278
	2279	if (COMPRESSED_PAGER_IS_ACTIVE) {
	2280	/*
	2281	* With the VM compressor, the cost of
	2282	* reclaiming a page is much lower (no I/O),
	2283	* so if we find a "volatile" page, it's better
	2284	* to let it get compressed rather than letting
	2285	* it occupy a full page until it gets purged.
	2286	* So no need to check for "volatile" here.
	2287	*/
	2288	} else if (object->purgable == VM_PURGABLE_VOLATILE) {
	2289	/*
	2290	* Avoid cleaning a "volatile" page which might
	2291	* be purged soon.
	2292	*/
	2293
	2294	/* if it's wired, we can't put it on our queue */
	2295	assert(!VM_PAGE_WIRED(m));
	2296
	2297	/* just stick it back on! */
	2298	reactivated_this_call++;
	2299
	2300	if (page_prev_state == PAGE_STATE_CLEAN)
	2301	vm_pageout_cleaned_volatile_reactivated++;
	2302
	2303	goto reactivate_page;
	2304	}
	2305	}
	2306
	2307	consider_inactive_page:
	2308	if (m->busy) {
	2309	/*
	2310	* CAUTION CAUTION:
	2311	* A "busy" page should always be left alone, except...
	2312	*/
	2313	if (m->cleaning && m->encrypted_cleaning) {
	2314	/*
	2315	* ENCRYPTED_SWAP:
	2316	* We could get here with a "busy" page
	2317	* if it's being encrypted during a
	2318	* "clean-in-place" operation. We'll deal
	2319	* with it right away by testing if it has been
	2320	* referenced and either reactivating it or
	2321	* promoting it from "clean-in-place" to
	2322	* "clean-and-free".
	2323	*/
	2324	} else {
	2325	panic("\"busy\" page considered for pageout\n");
	2326	}
	2327	}
	2328
	2329	/*
	2330	* If it's being used, reactivate.
	2331	* (Fictitious pages are either busy or absent.)
	2332	* First, update the reference and dirty bits
	2333	* to make sure the page is unreferenced.
	2334	*/
	2335	refmod_state = -1;
	2336
	2337	if (m->reference == FALSE && m->pmapped == TRUE) {
	2338	refmod_state = pmap_get_refmod(m->phys_page);
	2339
	2340	if (refmod_state & VM_MEM_REFERENCED)
	2341	m->reference = TRUE;
	2342	if (refmod_state & VM_MEM_MODIFIED) {
	2343	SET_PAGE_DIRTY(m, FALSE);
	2344	}
	2345	}
	2346
	2347	/*
	2348	* if (m->cleaning && !m->pageout)
	2349	* If already cleaning this page in place and it hasn't
	2350	* been recently referenced, just pull off the queue.
	2351	* We can leave the page mapped, and upl_commit_range
	2352	* will put it on the clean queue.
	2353	*
	2354	* note: if m->encrypted_cleaning == TRUE, then
	2355	* m->cleaning == TRUE
	2356	* and we'll handle it here
	2357	*
	2358	* if (m->pageout && !m->cleaning)
	2359	* an msync INVALIDATE is in progress...
	2360	* this page has been marked for destruction
	2361	* after it has been cleaned,
	2362	* but not yet gathered into a UPL
	2363	* where 'cleaning' will be set...
	2364	* just leave it off the paging queues
	2365	*
	2366	* if (m->pageout && m->clenaing)
	2367	* an msync INVALIDATE is in progress
	2368	* and the UPL has already gathered this page...
	2369	* just leave it off the paging queues
	2370	*/
	2371
	2372	/*
	2373	* page with m->pageout and still on the queues means that an
	2374	* MS_INVALIDATE is in progress on this page... leave it alone
	2375	*/
	2376	if (m->pageout) {
	2377	goto done_with_inactivepage;
	2378	}
	2379
	2380	/* if cleaning, reactivate if referenced. otherwise, just pull off queue */
	2381	if (m->cleaning) {
	2382	if (m->reference == TRUE) {
	2383	reactivated_this_call++;
	2384	goto reactivate_page;
	2385	} else {
	2386	goto done_with_inactivepage;
	2387	}
	2388	}
	2389
	2390	if (m->reference \|\| m->dirty) {
	2391	/* deal with a rogue "reusable" page */
	2392	VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m);
	2393	}
	2394
	2395	if (!m->no_cache &&
	2396	(m->reference \|\|
	2397	(m->xpmapped && !object->internal && (vm_page_xpmapped_external_count < (vm_page_external_count / 4))))) {
	2398	/*
	2399	* The page we pulled off the inactive list has
	2400	* been referenced. It is possible for other
	2401	* processors to be touching pages faster than we
	2402	* can clear the referenced bit and traverse the
	2403	* inactive queue, so we limit the number of
	2404	* reactivations.
	2405	*/
	2406	if (++reactivated_this_call >= reactivate_limit) {
	2407	vm_pageout_reactivation_limit_exceeded++;
	2408	} else if (catch_up_count) {
	2409	vm_pageout_catch_ups++;
	2410	} else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
	2411	vm_pageout_inactive_force_reclaim++;
	2412	} else {
	2413	uint32_t isinuse;
	2414
	2415	if (page_prev_state == PAGE_STATE_CLEAN)
	2416	vm_pageout_cleaned_reference_reactivated++;
	2417
	2418	reactivate_page:
	2419	if ( !object->internal && object->pager != MEMORY_OBJECT_NULL &&
	2420	vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
	2421	/*
	2422	* no explict mappings of this object exist
	2423	* and it's not open via the filesystem
	2424	*/
	2425	vm_page_deactivate(m);
	2426	vm_pageout_inactive_deactivated++;
	2427	} else {
	2428	must_activate_page:
	2429	/*
	2430	* The page was/is being used, so put back on active list.
	2431	*/
	2432	vm_page_activate(m);
	2433	VM_STAT_INCR(reactivations);
	2434	inactive_burst_count = 0;
	2435	}
	2436
	2437	if (page_prev_state == PAGE_STATE_CLEAN)
	2438	vm_pageout_cleaned_reactivated++;
	2439
	2440	vm_pageout_inactive_used++;
	2441
	2442	goto done_with_inactivepage;
	2443	}
	2444	/*
	2445	* Make sure we call pmap_get_refmod() if it
	2446	* wasn't already called just above, to update
	2447	* the dirty bit.
	2448	*/
	2449	if ((refmod_state == -1) && !m->dirty && m->pmapped) {
	2450	refmod_state = pmap_get_refmod(m->phys_page);
	2451	if (refmod_state & VM_MEM_MODIFIED) {
	2452	SET_PAGE_DIRTY(m, FALSE);
	2453	}
	2454	}
	2455	forced_reclaim = TRUE;
	2456	} else {
	2457	forced_reclaim = FALSE;
	2458	}
	2459
	2460	XPR(XPR_VM_PAGEOUT,
	2461	"vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
	2462	object, m->offset, m, 0,0);
	2463
	2464	/*
	2465	* we've got a candidate page to steal...
	2466	*
	2467	* m->dirty is up to date courtesy of the
	2468	* preceding check for m->reference... if
	2469	* we get here, then m->reference had to be
	2470	* FALSE (or possibly "reactivate_limit" was
	2471	* exceeded), but in either case we called
	2472	* pmap_get_refmod() and updated both
	2473	* m->reference and m->dirty
	2474	*
	2475	* if it's dirty or precious we need to
	2476	* see if the target queue is throtttled
	2477	* it if is, we need to skip over it by moving it back
	2478	* to the end of the inactive queue
	2479	*/
	2480
	2481	inactive_throttled = FALSE;
	2482
	2483	if (m->dirty \|\| m->precious) {
	2484	if (object->internal) {
	2485	if (VM_PAGE_Q_THROTTLED(iq))
	2486	inactive_throttled = TRUE;
	2487	} else if (VM_PAGE_Q_THROTTLED(eq)) {
	2488	inactive_throttled = TRUE;
	2489	}
	2490	}
	2491	throttle_inactive:
	2492	if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
	2493	object->internal && m->dirty &&
	2494	(object->purgable == VM_PURGABLE_DENY \|\|
	2495	object->purgable == VM_PURGABLE_NONVOLATILE \|\|
	2496	object->purgable == VM_PURGABLE_VOLATILE)) {
	2497	queue_enter(&vm_page_queue_throttled, m,
	2498	vm_page_t, pageq);
	2499	m->throttled = TRUE;
	2500	vm_page_throttled_count++;
	2501
	2502	vm_pageout_scan_reclaimed_throttled++;
	2503
	2504	inactive_burst_count = 0;
	2505	goto done_with_inactivepage;
	2506	}
	2507	if (inactive_throttled == TRUE) {
	2508
	2509	if (object->internal == FALSE) {
	2510	/*
	2511	* we need to break up the following potential deadlock case...
	2512	* a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
	2513	* b) The thread doing the writing is waiting for pages while holding the truncate lock
	2514	* c) Most of the pages in the inactive queue belong to this file.
	2515	*
	2516	* we are potentially in this deadlock because...
	2517	* a) the external pageout queue is throttled
	2518	* b) we're done with the active queue and moved on to the inactive queue
	2519	* c) we've got a dirty external page
	2520	*
	2521	* since we don't know the reason for the external pageout queue being throttled we
	2522	* must suspect that we are deadlocked, so move the current page onto the active queue
	2523	* in an effort to cause a page from the active queue to 'age' to the inactive queue
	2524	*
	2525	* if we don't have jetsam configured (i.e. we have a dynamic pager), set
	2526	* 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
	2527	* pool the next time we select a victim page... if we can make enough new free pages,
	2528	* the deadlock will break, the external pageout queue will empty and it will no longer
	2529	* be throttled
	2530	*
	2531	* if we have jestam configured, keep a count of the pages reactivated this way so
	2532	* that we can try to find clean pages in the active/inactive queues before
	2533	* deciding to jetsam a process
	2534	*/
	2535	vm_pageout_scan_inactive_throttled_external++;
	2536
	2537	queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
	2538	m->active = TRUE;
	2539	vm_page_active_count++;
	2540	vm_page_pageable_external_count++;
	2541
	2542	vm_pageout_adjust_io_throttles(iq, eq, FALSE);
	2543
	2544	#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
	2545	vm_pageout_inactive_external_forced_reactivate_limit--;
	2546
	2547	if (vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
	2548	vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
	2549	/*
	2550	* Possible deadlock scenario so request jetsam action
	2551	*/
	2552	assert(object);
	2553	vm_object_unlock(object);
	2554	object = VM_OBJECT_NULL;
	2555	vm_page_unlock_queues();
	2556
	2557	VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
	2558	vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
	2559
	2560	/* Kill first suitable process */
	2561	if (memorystatus_kill_on_VM_page_shortage(FALSE) == FALSE) {
	2562	panic("vm_pageout_scan: Jetsam request failed\n");
	2563	}
	2564
	2565	VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END, 0, 0, 0, 0);
	2566
	2567	vm_pageout_inactive_external_forced_jetsam_count++;
	2568	vm_page_lock_queues();
	2569	delayed_unlock = 1;
	2570	}
	2571	#else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
	2572	force_anonymous = TRUE;
	2573	#endif
	2574	inactive_burst_count = 0;
	2575	goto done_with_inactivepage;
	2576	} else {
	2577	if (page_prev_state == PAGE_STATE_SPECULATIVE)
	2578	page_prev_state = PAGE_STATE_INACTIVE;
	2579
	2580	vm_pageout_scan_inactive_throttled_internal++;
	2581
	2582	goto requeue_page;
	2583	}
	2584	}
	2585
	2586	/*
	2587	* we've got a page that we can steal...
	2588	* eliminate all mappings and make sure
	2589	* we have the up-to-date modified state
	2590	*
	2591	* if we need to do a pmap_disconnect then we
	2592	* need to re-evaluate m->dirty since the pmap_disconnect
	2593	* provides the true state atomically... the
	2594	* page was still mapped up to the pmap_disconnect
	2595	* and may have been dirtied at the last microsecond
	2596	*
	2597	* Note that if 'pmapped' is FALSE then the page is not
	2598	* and has not been in any map, so there is no point calling
	2599	* pmap_disconnect(). m->dirty could have been set in anticipation
	2600	* of likely usage of the page.
	2601	*/
	2602	if (m->pmapped == TRUE) {
	2603
	2604	if (DEFAULT_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE \|\| object->internal == FALSE) {
	2605	/*
	2606	* Don't count this page as going into the compressor if any of these are true:
	2607	* 1) We have the dynamic pager i.e. no compressed pager
	2608	* 2) Freezer enabled device with a freezer file to hold the app data i.e. no compressed pager
	2609	* 3) Freezer enabled device with compressed pager backend (exclusive use) i.e. most of the VM system
	2610	(including vm_pageout_scan) has no knowledge of the compressor
	2611	* 4) This page belongs to a file and hence will not be sent into the compressor
	2612	*/
	2613
	2614	refmod_state = pmap_disconnect_options(m->phys_page, 0, NULL);
	2615	} else {
	2616	refmod_state = pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL);
	2617	}
	2618
	2619	if (refmod_state & VM_MEM_MODIFIED) {
	2620	SET_PAGE_DIRTY(m, FALSE);
	2621	}
	2622	}
	2623	/*
	2624	* reset our count of pages that have been reclaimed
	2625	* since the last page was 'stolen'
	2626	*/
	2627	inactive_reclaim_run = 0;
	2628
	2629	/*
	2630	* If it's clean and not precious, we can free the page.
	2631	*/
	2632	if (!m->dirty && !m->precious) {
	2633
	2634	if (page_prev_state == PAGE_STATE_SPECULATIVE)
	2635	vm_pageout_speculative_clean++;
	2636	else {
	2637	if (page_prev_state == PAGE_STATE_ANONYMOUS)
	2638	vm_pageout_inactive_anonymous++;
	2639	else if (page_prev_state == PAGE_STATE_CLEAN)
	2640	vm_pageout_cleaned_reclaimed++;
	2641
	2642	vm_pageout_inactive_clean++;
	2643	}
	2644
	2645	/*
	2646	* OK, at this point we have found a page we are going to free.
	2647	*/
	2648	#if CONFIG_PHANTOM_CACHE
	2649	if (!object->internal)
	2650	vm_phantom_cache_add_ghost(m);
	2651	#endif
	2652	goto reclaim_page;
	2653	}
	2654
	2655	/*
	2656	* The page may have been dirtied since the last check
	2657	* for a throttled target queue (which may have been skipped
	2658	* if the page was clean then). With the dirty page
	2659	* disconnected here, we can make one final check.
	2660	*/
	2661	if (object->internal) {
	2662	if (VM_PAGE_Q_THROTTLED(iq))
	2663	inactive_throttled = TRUE;
	2664	} else if (VM_PAGE_Q_THROTTLED(eq)) {
	2665	inactive_throttled = TRUE;
	2666	}
	2667
	2668	if (inactive_throttled == TRUE)
	2669	goto throttle_inactive;
	2670
	2671	#if VM_PRESSURE_EVENTS
	2672	#if CONFIG_JETSAM
	2673
	2674	/*
	2675	* If Jetsam is enabled, then the sending
	2676	* of memory pressure notifications is handled
	2677	* from the same thread that takes care of high-water
	2678	* and other jetsams i.e. the memorystatus_thread.
	2679	*/
	2680
	2681	#else /* CONFIG_JETSAM */
	2682
	2683	vm_pressure_response();
	2684
	2685	#endif /* CONFIG_JETSAM */
	2686	#endif /* VM_PRESSURE_EVENTS */
	2687
	2688	/*
	2689	* do NOT set the pageout bit!
	2690	* sure, we might need free pages, but this page is going to take time to become free
	2691	* anyway, so we may as well put it on the clean queue first and take it from there later
	2692	* if necessary. that way, we'll ensure we don't free up too much. -mj
	2693	*/
	2694	vm_pageout_cluster(m, FALSE);
	2695
	2696	if (page_prev_state == PAGE_STATE_ANONYMOUS)
	2697	vm_pageout_inactive_anonymous++;
	2698	if (object->internal)
	2699	vm_pageout_inactive_dirty_internal++;
	2700	else
	2701	vm_pageout_inactive_dirty_external++;
	2702
	2703
	2704	done_with_inactivepage:
	2705
	2706	if (delayed_unlock++ > delayed_unlock_limit \|\| try_failed == TRUE) {
	2707	boolean_t need_delay = TRUE;
	2708
	2709	if (object != NULL) {
	2710	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	2711	vm_object_unlock(object);
	2712	object = NULL;
	2713	}
	2714	vm_page_unlock_queues();
	2715
	2716	if (local_freeq) {
	2717
	2718	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
	2719	vm_page_free_count, local_freed, delayed_unlock_limit, 4);
	2720
	2721	vm_page_free_list(local_freeq, TRUE);
	2722
	2723	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
	2724	vm_page_free_count, local_freed, 0, 4);
	2725
	2726	local_freeq = NULL;
	2727	local_freed = 0;
	2728	need_delay = FALSE;
	2729	}
	2730	if (COMPRESSED_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
	2731	vm_consider_waking_compactor_swapper();
	2732	need_delay = FALSE;
	2733	}
	2734	vm_page_lock_queues();
	2735
	2736	if (need_delay == TRUE)
	2737	lck_mtx_yield(&vm_page_queue_lock);
	2738
	2739	delayed_unlock = 1;
	2740	}
	2741	vm_pageout_considered_page++;
	2742
	2743	/*
	2744	* back to top of pageout scan loop
	2745	*/
	2746	}
	2747	}
	2748
	2749
	2750	int vm_page_free_count_init;
	2751
	2752	void
	2753	vm_page_free_reserve(
	2754	int pages)
	2755	{
	2756	int free_after_reserve;
	2757
	2758	if (COMPRESSED_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
	2759
	2760	if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT))
	2761	vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT;
	2762	else
	2763	vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT);
	2764
	2765	} else {
	2766	if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT)
	2767	vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT;
	2768	else
	2769	vm_page_free_reserved += pages;
	2770	}
	2771	free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
	2772
	2773	vm_page_free_min = vm_page_free_reserved +
	2774	VM_PAGE_FREE_MIN(free_after_reserve);
	2775
	2776	if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
	2777	vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
	2778
	2779	vm_page_free_target = vm_page_free_reserved +
	2780	VM_PAGE_FREE_TARGET(free_after_reserve);
	2781
	2782	if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
	2783	vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
	2784
	2785	if (vm_page_free_target < vm_page_free_min + 5)
	2786	vm_page_free_target = vm_page_free_min + 5;
	2787
	2788	vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 3);
	2789	}
	2790
	2791	/*
	2792	* vm_pageout is the high level pageout daemon.
	2793	*/
	2794
	2795	void
	2796	vm_pageout_continue(void)
	2797	{
	2798	DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
	2799	vm_pageout_scan_event_counter++;
	2800
	2801	vm_pageout_scan();
	2802	/*
	2803	* we hold both the vm_page_queue_free_lock
	2804	* and the vm_page_queues_lock at this point
	2805	*/
	2806	assert(vm_page_free_wanted == 0);
	2807	assert(vm_page_free_wanted_privileged == 0);
	2808	assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
	2809
	2810	lck_mtx_unlock(&vm_page_queue_free_lock);
	2811	vm_page_unlock_queues();
	2812
	2813	counter(c_vm_pageout_block++);
	2814	thread_block((thread_continue_t)vm_pageout_continue);
	2815	/NOTREACHED/
	2816	}
	2817
	2818
	2819	#ifdef FAKE_DEADLOCK
	2820
	2821	#define FAKE_COUNT 5000
	2822
	2823	int internal_count = 0;
	2824	int fake_deadlock = 0;
	2825
	2826	#endif
	2827
	2828	static void
	2829	vm_pageout_iothread_continue(struct vm_pageout_queue *q)
	2830	{
	2831	vm_page_t m = NULL;
	2832	vm_object_t object;
	2833	vm_object_offset_t offset;
	2834	memory_object_t pager;
	2835	thread_t self = current_thread();
	2836
	2837	if ((vm_pageout_internal_iothread != THREAD_NULL)
	2838	&& (self == vm_pageout_external_iothread )
	2839	&& (self->options & TH_OPT_VMPRIV))
	2840	self->options &= ~TH_OPT_VMPRIV;
	2841
	2842	vm_page_lockspin_queues();
	2843
	2844	while ( !queue_empty(&q->pgo_pending) ) {
	2845
	2846	q->pgo_busy = TRUE;
	2847	queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
	2848	if (m->object->object_slid) {
	2849	panic("slid page %p not allowed on this path\n", m);
	2850	}
	2851	VM_PAGE_CHECK(m);
	2852	m->pageout_queue = FALSE;
	2853	m->pageq.next = NULL;
	2854	m->pageq.prev = NULL;
	2855
	2856	/*
	2857	* grab a snapshot of the object and offset this
	2858	* page is tabled in so that we can relookup this
	2859	* page after we've taken the object lock - these
	2860	* fields are stable while we hold the page queues lock
	2861	* but as soon as we drop it, there is nothing to keep
	2862	* this page in this object... we hold an activity_in_progress
	2863	* on this object which will keep it from terminating
	2864	*/
	2865	object = m->object;
	2866	offset = m->offset;
	2867
	2868	vm_page_unlock_queues();
	2869
	2870	#ifdef FAKE_DEADLOCK
	2871	if (q == &vm_pageout_queue_internal) {
	2872	vm_offset_t addr;
	2873	int pg_count;
	2874
	2875	internal_count++;
	2876
	2877	if ((internal_count == FAKE_COUNT)) {
	2878
	2879	pg_count = vm_page_free_count + vm_page_free_reserved;
	2880
	2881	if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) {
	2882	kmem_free(kernel_map, addr, PAGE_SIZE * pg_count);
	2883	}
	2884	internal_count = 0;
	2885	fake_deadlock++;
	2886	}
	2887	}
	2888	#endif
	2889	vm_object_lock(object);
	2890
	2891	m = vm_page_lookup(object, offset);
	2892
	2893	if (m == NULL \|\|
	2894	m->busy \|\| m->cleaning \|\| m->pageout_queue \|\| !m->laundry) {
	2895	/*
	2896	* it's either the same page that someone else has
	2897	* started cleaning (or it's finished cleaning or
	2898	* been put back on the pageout queue), or
	2899	* the page has been freed or we have found a
	2900	* new page at this offset... in all of these cases
	2901	* we merely need to release the activity_in_progress
	2902	* we took when we put the page on the pageout queue
	2903	*/
	2904	vm_object_activity_end(object);
	2905	vm_object_unlock(object);
	2906
	2907	vm_page_lockspin_queues();
	2908	continue;
	2909	}
	2910	if (!object->pager_initialized) {
	2911
	2912	/*
	2913	* If there is no memory object for the page, create
	2914	* one and hand it to the default pager.
	2915	*/
	2916
	2917	if (!object->pager_initialized)
	2918	vm_object_collapse(object,
	2919	(vm_object_offset_t) 0,
	2920	TRUE);
	2921	if (!object->pager_initialized)
	2922	vm_object_pager_create(object);
	2923	if (!object->pager_initialized) {
	2924	/*
	2925	* Still no pager for the object.
	2926	* Reactivate the page.
	2927	*
	2928	* Should only happen if there is no
	2929	* default pager.
	2930	*/
	2931	m->pageout = FALSE;
	2932
	2933	vm_page_lockspin_queues();
	2934
	2935	vm_pageout_throttle_up(m);
	2936	vm_page_activate(m);
	2937	vm_pageout_dirty_no_pager++;
	2938
	2939	vm_page_unlock_queues();
	2940
	2941	/*
	2942	* And we are done with it.
	2943	*/
	2944	vm_object_activity_end(object);
	2945	vm_object_unlock(object);
	2946
	2947	vm_page_lockspin_queues();
	2948	continue;
	2949	}
	2950	}
	2951	pager = object->pager;
	2952
	2953	if (pager == MEMORY_OBJECT_NULL) {
	2954	/*
	2955	* This pager has been destroyed by either
	2956	* memory_object_destroy or vm_object_destroy, and
	2957	* so there is nowhere for the page to go.
	2958	*/
	2959	if (m->pageout) {
	2960	/*
	2961	* Just free the page... VM_PAGE_FREE takes
	2962	* care of cleaning up all the state...
	2963	* including doing the vm_pageout_throttle_up
	2964	*/
	2965	VM_PAGE_FREE(m);
	2966	} else {
	2967	vm_page_lockspin_queues();
	2968
	2969	vm_pageout_throttle_up(m);
	2970	vm_page_activate(m);
	2971
	2972	vm_page_unlock_queues();
	2973
	2974	/*
	2975	* And we are done with it.
	2976	*/
	2977	}
	2978	vm_object_activity_end(object);
	2979	vm_object_unlock(object);
	2980
	2981	vm_page_lockspin_queues();
	2982	continue;
	2983	}
	2984	#if 0
	2985	/*
	2986	* we don't hold the page queue lock
	2987	* so this check isn't safe to make
	2988	*/
	2989	VM_PAGE_CHECK(m);
	2990	#endif
	2991	/*
	2992	* give back the activity_in_progress reference we
	2993	* took when we queued up this page and replace it
	2994	* it with a paging_in_progress reference that will
	2995	* also hold the paging offset from changing and
	2996	* prevent the object from terminating
	2997	*/
	2998	vm_object_activity_end(object);
	2999	vm_object_paging_begin(object);
	3000	vm_object_unlock(object);
	3001
	3002	/*
	3003	* Send the data to the pager.
	3004	* any pageout clustering happens there
	3005	*/
	3006	memory_object_data_return(pager,
	3007	m->offset + object->paging_offset,
	3008	PAGE_SIZE,
	3009	NULL,
	3010	NULL,
	3011	FALSE,
	3012	FALSE,
	3013	0);
	3014
	3015	vm_object_lock(object);
	3016	vm_object_paging_end(object);
	3017	vm_object_unlock(object);
	3018
	3019	vm_pageout_io_throttle();
	3020
	3021	vm_page_lockspin_queues();
	3022	}
	3023	q->pgo_busy = FALSE;
	3024	q->pgo_idle = TRUE;
	3025
	3026	assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
	3027	vm_page_unlock_queues();
	3028
	3029	thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) q);
	3030	/NOTREACHED/
	3031	}
	3032
	3033
	3034	static void
	3035	vm_pageout_iothread_external_continue(struct vm_pageout_queue *q)
	3036	{
	3037	vm_page_t m = NULL;
	3038	vm_object_t object;
	3039	vm_object_offset_t offset;
	3040	memory_object_t pager;
	3041
	3042
	3043	if (vm_pageout_internal_iothread != THREAD_NULL)
	3044	current_thread()->options &= ~TH_OPT_VMPRIV;
	3045
	3046	vm_page_lockspin_queues();
	3047
	3048	while ( !queue_empty(&q->pgo_pending) ) {
	3049
	3050	q->pgo_busy = TRUE;
	3051	queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
	3052	if (m->object->object_slid) {
	3053	panic("slid page %p not allowed on this path\n", m);
	3054	}
	3055	VM_PAGE_CHECK(m);
	3056	m->pageout_queue = FALSE;
	3057	m->pageq.next = NULL;
	3058	m->pageq.prev = NULL;
	3059
	3060	/*
	3061	* grab a snapshot of the object and offset this
	3062	* page is tabled in so that we can relookup this
	3063	* page after we've taken the object lock - these
	3064	* fields are stable while we hold the page queues lock
	3065	* but as soon as we drop it, there is nothing to keep
	3066	* this page in this object... we hold an activity_in_progress
	3067	* on this object which will keep it from terminating
	3068	*/
	3069	object = m->object;
	3070	offset = m->offset;
	3071
	3072	vm_page_unlock_queues();
	3073
	3074	vm_object_lock(object);
	3075
	3076	m = vm_page_lookup(object, offset);
	3077
	3078	if (m == NULL \|\|
	3079	m->busy \|\| m->cleaning \|\| m->pageout_queue \|\| !m->laundry) {
	3080	/*
	3081	* it's either the same page that someone else has
	3082	* started cleaning (or it's finished cleaning or
	3083	* been put back on the pageout queue), or
	3084	* the page has been freed or we have found a
	3085	* new page at this offset... in all of these cases
	3086	* we merely need to release the activity_in_progress
	3087	* we took when we put the page on the pageout queue
	3088	*/
	3089	vm_object_activity_end(object);
	3090	vm_object_unlock(object);
	3091
	3092	vm_page_lockspin_queues();
	3093	continue;
	3094	}
	3095	pager = object->pager;
	3096
	3097	if (pager == MEMORY_OBJECT_NULL) {
	3098	/*
	3099	* This pager has been destroyed by either
	3100	* memory_object_destroy or vm_object_destroy, and
	3101	* so there is nowhere for the page to go.
	3102	*/
	3103	if (m->pageout) {
	3104	/*
	3105	* Just free the page... VM_PAGE_FREE takes
	3106	* care of cleaning up all the state...
	3107	* including doing the vm_pageout_throttle_up
	3108	*/
	3109	VM_PAGE_FREE(m);
	3110	} else {
	3111	vm_page_lockspin_queues();
	3112
	3113	vm_pageout_throttle_up(m);
	3114	vm_page_activate(m);
	3115
	3116	vm_page_unlock_queues();
	3117
	3118	/*
	3119	* And we are done with it.
	3120	*/
	3121	}
	3122	vm_object_activity_end(object);
	3123	vm_object_unlock(object);
	3124
	3125	vm_page_lockspin_queues();
	3126	continue;
	3127	}
	3128	#if 0
	3129	/*
	3130	* we don't hold the page queue lock
	3131	* so this check isn't safe to make
	3132	*/
	3133	VM_PAGE_CHECK(m);
	3134	#endif
	3135	/*
	3136	* give back the activity_in_progress reference we
	3137	* took when we queued up this page and replace it
	3138	* it with a paging_in_progress reference that will
	3139	* also hold the paging offset from changing and
	3140	* prevent the object from terminating
	3141	*/
	3142	vm_object_activity_end(object);
	3143	vm_object_paging_begin(object);
	3144	vm_object_unlock(object);
	3145
	3146	/*
	3147	* Send the data to the pager.
	3148	* any pageout clustering happens there
	3149	*/
	3150	memory_object_data_return(pager,
	3151	m->offset + object->paging_offset,
	3152	PAGE_SIZE,
	3153	NULL,
	3154	NULL,
	3155	FALSE,
	3156	FALSE,
	3157	0);
	3158
	3159	vm_object_lock(object);
	3160	vm_object_paging_end(object);
	3161	vm_object_unlock(object);
	3162
	3163	vm_pageout_io_throttle();
	3164
	3165	vm_page_lockspin_queues();
	3166	}
	3167	q->pgo_busy = FALSE;
	3168	q->pgo_idle = TRUE;
	3169
	3170	assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
	3171	vm_page_unlock_queues();
	3172
	3173	thread_block_parameter((thread_continue_t)vm_pageout_iothread_external_continue, (void *) q);
	3174	/NOTREACHED/
	3175	}
	3176
	3177
	3178	uint32_t vm_compressor_failed;
	3179
	3180	static void
	3181	vm_pageout_iothread_internal_continue(struct cq *cq)
	3182	{
	3183	struct vm_pageout_queue *q;
	3184	vm_page_t m = NULL;
	3185	vm_object_t object;
	3186	memory_object_t pager;
	3187	boolean_t pgo_draining;
	3188	vm_page_t local_q;
	3189	int local_cnt;
	3190	vm_page_t local_freeq = NULL;
	3191	int local_freed = 0;
	3192	int local_batch_size;
	3193	kern_return_t retval;
	3194	int compressed_count_delta;
	3195
	3196
	3197	KERNEL_DEBUG(0xe040000c \| DBG_FUNC_END, 0, 0, 0, 0, 0);
	3198
	3199	q = cq->q;
	3200	local_batch_size = q->pgo_maxlaundry / (vm_compressor_thread_count * 4);
	3201
	3202	while (TRUE) {
	3203
	3204	local_cnt = 0;
	3205	local_q = NULL;
	3206
	3207	KERNEL_DEBUG(0xe0400014 \| DBG_FUNC_START, 0, 0, 0, 0, 0);
	3208
	3209	vm_page_lock_queues();
	3210
	3211	KERNEL_DEBUG(0xe0400014 \| DBG_FUNC_END, 0, 0, 0, 0, 0);
	3212
	3213	KERNEL_DEBUG(0xe0400018 \| DBG_FUNC_START, 0, 0, 0, 0, 0);
	3214
	3215	while ( !queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) {
	3216
	3217	queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
	3218
	3219	VM_PAGE_CHECK(m);
	3220
	3221	m->pageout_queue = FALSE;
	3222	m->pageq.prev = NULL;
	3223
	3224	m->pageq.next = (queue_entry_t)local_q;
	3225	local_q = m;
	3226	local_cnt++;
	3227	}
	3228	if (local_q == NULL)
	3229	break;
	3230
	3231	q->pgo_busy = TRUE;
	3232
	3233	if ((pgo_draining = q->pgo_draining) == FALSE)
	3234	vm_pageout_throttle_up_batch(q, local_cnt);
	3235
	3236	vm_page_unlock_queues();
	3237
	3238	KERNEL_DEBUG(0xe0400018 \| DBG_FUNC_END, 0, 0, 0, 0, 0);
	3239
	3240	while (local_q) {
	3241
	3242	m = local_q;
	3243	local_q = (vm_page_t)m->pageq.next;
	3244	m->pageq.next = NULL;
	3245
	3246	if (m->object->object_slid) {
	3247	panic("slid page %p not allowed on this path\n", m);
	3248	}
	3249
	3250	object = m->object;
	3251	pager = object->pager;
	3252
	3253	if (!object->pager_initialized \|\| pager == MEMORY_OBJECT_NULL) {
	3254
	3255	KERNEL_DEBUG(0xe0400010 \| DBG_FUNC_START, object, pager, 0, 0, 0);
	3256
	3257	vm_object_lock(object);
	3258
	3259	/*
	3260	* If there is no memory object for the page, create
	3261	* one and hand it to the compression pager.
	3262	*/
	3263
	3264	if (!object->pager_initialized)
	3265	vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
	3266	if (!object->pager_initialized)
	3267	vm_object_compressor_pager_create(object);
	3268
	3269	if (!object->pager_initialized) {
	3270	/*
	3271	* Still no pager for the object.
	3272	* Reactivate the page.
	3273	*
	3274	* Should only happen if there is no
	3275	* compression pager
	3276	*/
	3277	m->pageout = FALSE;
	3278	m->laundry = FALSE;
	3279	PAGE_WAKEUP_DONE(m);
	3280
	3281	vm_page_lockspin_queues();
	3282	vm_page_activate(m);
	3283	vm_pageout_dirty_no_pager++;
	3284	vm_page_unlock_queues();
	3285
	3286	/*
	3287	* And we are done with it.
	3288	*/
	3289	vm_object_activity_end(object);
	3290	vm_object_unlock(object);
	3291
	3292	continue;
	3293	}
	3294	pager = object->pager;
	3295
	3296	if (pager == MEMORY_OBJECT_NULL) {
	3297	/*
	3298	* This pager has been destroyed by either
	3299	* memory_object_destroy or vm_object_destroy, and
	3300	* so there is nowhere for the page to go.
	3301	*/
	3302	if (m->pageout) {
	3303	/*
	3304	* Just free the page... VM_PAGE_FREE takes
	3305	* care of cleaning up all the state...
	3306	* including doing the vm_pageout_throttle_up
	3307	*/
	3308	VM_PAGE_FREE(m);
	3309	} else {
	3310	m->laundry = FALSE;
	3311	PAGE_WAKEUP_DONE(m);
	3312
	3313	vm_page_lockspin_queues();
	3314	vm_page_activate(m);
	3315	vm_page_unlock_queues();
	3316
	3317	/*
	3318	* And we are done with it.
	3319	*/
	3320	}
	3321	vm_object_activity_end(object);
	3322	vm_object_unlock(object);
	3323
	3324	continue;
	3325	}
	3326	vm_object_unlock(object);
	3327
	3328	KERNEL_DEBUG(0xe0400010 \| DBG_FUNC_END, object, pager, 0, 0, 0);
	3329	}
	3330	while (vm_page_free_count < (vm_page_free_reserved - COMPRESSOR_FREE_RESERVED_LIMIT)) {
	3331	kern_return_t wait_result;
	3332	int need_wakeup = 0;
	3333
	3334	if (local_freeq) {
	3335	vm_page_free_list(local_freeq, TRUE);
	3336
	3337	local_freeq = NULL;
	3338	local_freed = 0;
	3339
	3340	continue;
	3341	}
	3342	lck_mtx_lock_spin(&vm_page_queue_free_lock);
	3343
	3344	if (vm_page_free_count < (vm_page_free_reserved - COMPRESSOR_FREE_RESERVED_LIMIT)) {
	3345
	3346	if (vm_page_free_wanted_privileged++ == 0)
	3347	need_wakeup = 1;
	3348	wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT);
	3349
	3350	lck_mtx_unlock(&vm_page_queue_free_lock);
	3351
	3352	if (need_wakeup)
	3353	thread_wakeup((event_t)&vm_page_free_wanted);
	3354
	3355	if (wait_result == THREAD_WAITING)
	3356	thread_block(THREAD_CONTINUE_NULL);
	3357	} else
	3358	lck_mtx_unlock(&vm_page_queue_free_lock);
	3359	}
	3360
	3361	assert(object->activity_in_progress > 0);
	3362
	3363	retval = vm_compressor_pager_put(
	3364	pager,
	3365	m->offset + object->paging_offset,
	3366	m->phys_page,
	3367	&cq->current_chead,
	3368	cq->scratch_buf,
	3369	&compressed_count_delta);
	3370
	3371	vm_object_lock(object);
	3372	assert(object->activity_in_progress > 0);
	3373
	3374	assert(m->object == object);
	3375
	3376	vm_compressor_pager_count(pager,
	3377	compressed_count_delta,
	3378	FALSE, /* shared_lock */
	3379	object);
	3380
	3381	m->laundry = FALSE;
	3382	m->pageout = FALSE;
	3383
	3384	if (retval == KERN_SUCCESS) {
	3385	/*
	3386	* If the object is purgeable, its owner's
	3387	* purgeable ledgers will be updated in
	3388	* vm_page_remove() but the page still
	3389	* contributes to the owner's memory footprint,
	3390	* so account for it as such.
	3391	*/
	3392	if (object->purgable != VM_PURGABLE_DENY &&
	3393	object->vo_purgeable_owner != NULL) {
	3394	/* one more compressed purgeable page */
	3395	vm_purgeable_compressed_update(object,
	3396	+1);
	3397	}
	3398
	3399	vm_page_compressions_failing = FALSE;
	3400
	3401	VM_STAT_INCR(compressions);
	3402
	3403	if (m->tabled)
	3404	vm_page_remove(m, TRUE);
	3405	vm_object_activity_end(object);
	3406	vm_object_unlock(object);
	3407
	3408	m->pageq.next = (queue_entry_t)local_freeq;
	3409	local_freeq = m;
	3410	local_freed++;
	3411
	3412	} else {
	3413	PAGE_WAKEUP_DONE(m);
	3414
	3415	vm_page_lockspin_queues();
	3416
	3417	vm_page_activate(m);
	3418	vm_compressor_failed++;
	3419
	3420	vm_page_compressions_failing = TRUE;
	3421
	3422	vm_page_unlock_queues();
	3423
	3424	vm_object_activity_end(object);
	3425	vm_object_unlock(object);
	3426	}
	3427	}
	3428	if (local_freeq) {
	3429	vm_page_free_list(local_freeq, TRUE);
	3430
	3431	local_freeq = NULL;
	3432	local_freed = 0;
	3433	}
	3434	if (pgo_draining == TRUE) {
	3435	vm_page_lockspin_queues();
	3436	vm_pageout_throttle_up_batch(q, local_cnt);
	3437	vm_page_unlock_queues();
	3438	}
	3439	}
	3440	KERNEL_DEBUG(0xe040000c \| DBG_FUNC_START, 0, 0, 0, 0, 0);
	3441
	3442	/*
	3443	* queue lock is held and our q is empty
	3444	*/
	3445	q->pgo_busy = FALSE;
	3446	q->pgo_idle = TRUE;
	3447
	3448	assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
	3449	vm_page_unlock_queues();
	3450
	3451	KERNEL_DEBUG(0xe0400018 \| DBG_FUNC_END, 0, 0, 0, 0, 0);
	3452
	3453	thread_block_parameter((thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq);
	3454	/NOTREACHED/
	3455	}
	3456
	3457
	3458
	3459	static void
	3460	vm_pageout_adjust_io_throttles(struct vm_pageout_queue iq, struct vm_pageout_queue eq, boolean_t req_lowpriority)
	3461	{
	3462	uint32_t policy;
	3463	boolean_t set_iq = FALSE;
	3464	boolean_t set_eq = FALSE;
	3465
	3466	if (hibernate_cleaning_in_progress == TRUE)
	3467	req_lowpriority = FALSE;
	3468
	3469	if ((DEFAULT_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_IS_ACTIVE) && iq->pgo_inited == TRUE && iq->pgo_lowpriority != req_lowpriority)
	3470	set_iq = TRUE;
	3471
	3472	if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority)
	3473	set_eq = TRUE;
	3474
	3475	if (set_iq == TRUE \|\| set_eq == TRUE) {
	3476
	3477	vm_page_unlock_queues();
	3478
	3479	if (req_lowpriority == TRUE) {
	3480	policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED;
	3481	DTRACE_VM(laundrythrottle);
	3482	} else {
	3483	policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED;
	3484	DTRACE_VM(laundryunthrottle);
	3485	}
	3486	if (set_iq == TRUE) {
	3487	proc_set_task_policy_thread(kernel_task, iq->pgo_tid, TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
	3488
	3489	iq->pgo_lowpriority = req_lowpriority;
	3490	}
	3491	if (set_eq == TRUE) {
	3492	proc_set_task_policy_thread(kernel_task, eq->pgo_tid, TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
	3493
	3494	eq->pgo_lowpriority = req_lowpriority;
	3495	}
	3496	vm_page_lock_queues();
	3497	}
	3498	}
	3499
	3500
	3501	static void
	3502	vm_pageout_iothread_external(void)
	3503	{
	3504	thread_t self = current_thread();
	3505
	3506	self->options \|= TH_OPT_VMPRIV;
	3507
	3508	DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
	3509
	3510	proc_set_task_policy_thread(kernel_task, self->thread_id, TASK_POLICY_EXTERNAL,
	3511	TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
	3512
	3513	vm_page_lock_queues();
	3514
	3515	vm_pageout_queue_external.pgo_tid = self->thread_id;
	3516	vm_pageout_queue_external.pgo_lowpriority = TRUE;
	3517	vm_pageout_queue_external.pgo_inited = TRUE;
	3518
	3519	vm_page_unlock_queues();
	3520
	3521	if (COMPRESSED_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
	3522	vm_pageout_iothread_external_continue(&vm_pageout_queue_external);
	3523	else
	3524	vm_pageout_iothread_continue(&vm_pageout_queue_external);
	3525
	3526	/NOTREACHED/
	3527	}
	3528
	3529
	3530	static void
	3531	vm_pageout_iothread_internal(struct cq *cq)
	3532	{
	3533	thread_t self = current_thread();
	3534
	3535	self->options \|= TH_OPT_VMPRIV;
	3536
	3537	if (DEFAULT_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_IS_ACTIVE) {
	3538	DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
	3539
	3540	proc_set_task_policy_thread(kernel_task, self->thread_id, TASK_POLICY_EXTERNAL,
	3541	TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
	3542	}
	3543	vm_page_lock_queues();
	3544
	3545	vm_pageout_queue_internal.pgo_tid = self->thread_id;
	3546	vm_pageout_queue_internal.pgo_lowpriority = TRUE;
	3547	vm_pageout_queue_internal.pgo_inited = TRUE;
	3548
	3549	vm_page_unlock_queues();
	3550
	3551	if (COMPRESSED_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
	3552	cq->q = &vm_pageout_queue_internal;
	3553	cq->current_chead = NULL;
	3554	cq->scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
	3555
	3556	vm_pageout_iothread_internal_continue(cq);
	3557	} else
	3558	vm_pageout_iothread_continue(&vm_pageout_queue_internal);
	3559
	3560	/NOTREACHED/
	3561	}
	3562
	3563	kern_return_t
	3564	vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
	3565	{
	3566	if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) {
	3567	return KERN_SUCCESS;
	3568	} else {
	3569	return KERN_FAILURE; /* Already set */
	3570	}
	3571	}
	3572
	3573	extern boolean_t memorystatus_manual_testing_on;
	3574	extern unsigned int memorystatus_level;
	3575
	3576
	3577	#if VM_PRESSURE_EVENTS
	3578
	3579	boolean_t vm_pressure_events_enabled = FALSE;
	3580
	3581	void
	3582	vm_pressure_response(void)
	3583	{
	3584
	3585	vm_pressure_level_t old_level = kVMPressureNormal;
	3586	int new_level = -1;
	3587
	3588	uint64_t available_memory = 0;
	3589
	3590	if (vm_pressure_events_enabled == FALSE)
	3591	return;
	3592
	3593
	3594	available_memory = (((uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY) * 100);
	3595
	3596
	3597	memorystatus_level = (unsigned int) (available_memory / atop_64(max_mem));
	3598
	3599	if (memorystatus_manual_testing_on) {
	3600	return;
	3601	}
	3602
	3603	old_level = memorystatus_vm_pressure_level;
	3604
	3605	switch (memorystatus_vm_pressure_level) {
	3606
	3607	case kVMPressureNormal:
	3608	{
	3609	if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
	3610	new_level = kVMPressureCritical;
	3611	} else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
	3612	new_level = kVMPressureWarning;
	3613	}
	3614	break;
	3615	}
	3616
	3617	case kVMPressureWarning:
	3618	case kVMPressureUrgent:
	3619	{
	3620	if (VM_PRESSURE_WARNING_TO_NORMAL()) {
	3621	new_level = kVMPressureNormal;
	3622	} else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
	3623	new_level = kVMPressureCritical;
	3624	}
	3625	break;
	3626	}
	3627
	3628	case kVMPressureCritical:
	3629	{
	3630	if (VM_PRESSURE_WARNING_TO_NORMAL()) {
	3631	new_level = kVMPressureNormal;
	3632	} else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
	3633	new_level = kVMPressureWarning;
	3634	}
	3635	break;
	3636	}
	3637
	3638	default:
	3639	return;
	3640	}
	3641
	3642	if (new_level != -1) {
	3643	memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level;
	3644
	3645	if ((memorystatus_vm_pressure_level != kVMPressureNormal) \|\| (old_level != new_level)) {
	3646	if (vm_pressure_thread_running == FALSE) {
	3647	thread_wakeup(&vm_pressure_thread);
	3648	}
	3649
	3650	if (old_level != new_level) {
	3651	thread_wakeup(&vm_pressure_changed);
	3652	}
	3653	}
	3654	}
	3655
	3656	}
	3657	#endif /* VM_PRESSURE_EVENTS */
	3658
	3659	kern_return_t
	3660	mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) {
	3661
	3662	#if !VM_PRESSURE_EVENTS
	3663
	3664	return KERN_FAILURE;
	3665
	3666	#else /* VM_PRESSURE_EVENTS */
	3667
	3668	kern_return_t kr = KERN_SUCCESS;
	3669
	3670	if (pressure_level != NULL) {
	3671
	3672	vm_pressure_level_t old_level = memorystatus_vm_pressure_level;
	3673
	3674	if (wait_for_pressure == TRUE) {
	3675	wait_result_t wr = 0;
	3676
	3677	while (old_level == *pressure_level) {
	3678	wr = assert_wait((event_t) &vm_pressure_changed,
	3679	THREAD_INTERRUPTIBLE);
	3680	if (wr == THREAD_WAITING) {
	3681	wr = thread_block(THREAD_CONTINUE_NULL);
	3682	}
	3683	if (wr == THREAD_INTERRUPTED) {
	3684	return KERN_ABORTED;
	3685	}
	3686	if (wr == THREAD_AWAKENED) {
	3687
	3688	old_level = memorystatus_vm_pressure_level;
	3689
	3690	if (old_level != *pressure_level) {
	3691	break;
	3692	}
	3693	}
	3694	}
	3695	}
	3696
	3697	*pressure_level = old_level;
	3698	kr = KERN_SUCCESS;
	3699	} else {
	3700	kr = KERN_INVALID_ARGUMENT;
	3701	}
	3702
	3703	return kr;
	3704	#endif /* VM_PRESSURE_EVENTS */
	3705	}
	3706
	3707	#if VM_PRESSURE_EVENTS
	3708	void
	3709	vm_pressure_thread(void) {
	3710	static boolean_t thread_initialized = FALSE;
	3711
	3712	if (thread_initialized == TRUE) {
	3713	vm_pressure_thread_running = TRUE;
	3714	consider_vm_pressure_events();
	3715	vm_pressure_thread_running = FALSE;
	3716	}
	3717
	3718	thread_initialized = TRUE;
	3719	assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT);
	3720	thread_block((thread_continue_t)vm_pressure_thread);
	3721	}
	3722	#endif /* VM_PRESSURE_EVENTS */
	3723
	3724
	3725	uint32_t vm_pageout_considered_page_last = 0;
	3726
	3727	/*
	3728	* called once per-second via "compute_averages"
	3729	*/
	3730	void
	3731	compute_pageout_gc_throttle()
	3732	{
	3733	if (vm_pageout_considered_page != vm_pageout_considered_page_last) {
	3734
	3735	vm_pageout_considered_page_last = vm_pageout_considered_page;
	3736
	3737	thread_wakeup((event_t) &vm_pageout_garbage_collect);
	3738	}
	3739	}
	3740
	3741
	3742	static void
	3743	vm_pageout_garbage_collect(int collect)
	3744	{
	3745
	3746	if (collect) {
	3747	boolean_t buf_large_zfree = FALSE;
	3748	boolean_t first_try = TRUE;
	3749
	3750	stack_collect();
	3751
	3752	consider_machine_collect();
	3753	m_drain();
	3754
	3755	do {
	3756	if (consider_buffer_cache_collect != NULL) {
	3757	buf_large_zfree = (*consider_buffer_cache_collect)(0);
	3758	}
	3759	if (first_try == TRUE \|\| buf_large_zfree == TRUE) {
	3760	/*
	3761	* consider_zone_gc should be last, because the other operations
	3762	* might return memory to zones.
	3763	*/
	3764	consider_zone_gc(buf_large_zfree);
	3765	}
	3766	first_try = FALSE;
	3767
	3768	} while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
	3769
	3770	consider_machine_adjust();
	3771	}
	3772	assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
	3773
	3774	thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
	3775	/NOTREACHED/
	3776	}
	3777
	3778
	3779	void vm_pageout_reinit_tuneables(void);
	3780
	3781	void
	3782	vm_pageout_reinit_tuneables(void)
	3783	{
	3784	vm_compressor_minorcompact_threshold_divisor = 18;
	3785	vm_compressor_majorcompact_threshold_divisor = 22;
	3786	vm_compressor_unthrottle_threshold_divisor = 32;
	3787	}
	3788
	3789
	3790	#if VM_PAGE_BUCKETS_CHECK
	3791	#if VM_PAGE_FAKE_BUCKETS
	3792	extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
	3793	#endif /* VM_PAGE_FAKE_BUCKETS */
	3794	#endif /* VM_PAGE_BUCKETS_CHECK */
	3795
	3796	#define FBDP_TEST_COLLAPSE_COMPRESSOR 0
	3797	#if FBDP_TEST_COLLAPSE_COMPRESSOR
	3798	extern boolean_t vm_object_collapse_compressor_allowed;
	3799	#include <IOKit/IOLib.h>
	3800	#endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */
	3801
	3802	#define FBDP_TEST_WIRE_AND_EXTRACT 0
	3803	#if FBDP_TEST_WIRE_AND_EXTRACT
	3804	extern ledger_template_t task_ledger_template;
	3805	#include <mach/mach_vm.h>
	3806	extern ppnum_t vm_map_get_phys_page(vm_map_t map,
	3807	vm_offset_t offset);
	3808	#endif /* FBDP_TEST_WIRE_AND_EXTRACT */
	3809
	3810	void
	3811	vm_pageout(void)
	3812	{
	3813	thread_t self = current_thread();
	3814	thread_t thread;
	3815	kern_return_t result;
	3816	spl_t s;
	3817
	3818	/*
	3819	* Set thread privileges.
	3820	*/
	3821	s = splsched();
	3822	thread_lock(self);
	3823	self->priority = BASEPRI_PREEMPT - 1;
	3824	set_sched_pri(self, self->priority);
	3825	thread_unlock(self);
	3826
	3827	if (!self->reserved_stack)
	3828	self->reserved_stack = self->kernel_stack;
	3829
	3830	splx(s);
	3831
	3832	/*
	3833	* Initialize some paging parameters.
	3834	*/
	3835
	3836	if (vm_pageout_swap_wait == 0)
	3837	vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT;
	3838
	3839	if (vm_pageout_idle_wait == 0)
	3840	vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
	3841
	3842	if (vm_pageout_burst_wait == 0)
	3843	vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
	3844
	3845	if (vm_pageout_empty_wait == 0)
	3846	vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
	3847
	3848	if (vm_pageout_deadlock_wait == 0)
	3849	vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
	3850
	3851	if (vm_pageout_deadlock_relief == 0)
	3852	vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
	3853
	3854	if (vm_pageout_inactive_relief == 0)
	3855	vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
	3856
	3857	if (vm_pageout_burst_active_throttle == 0)
	3858	vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
	3859
	3860	if (vm_pageout_burst_inactive_throttle == 0)
	3861	vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
	3862
	3863	/*
	3864	* Set kernel task to low backing store privileged
	3865	* status
	3866	*/
	3867	task_lock(kernel_task);
	3868	kernel_task->priv_flags \|= VM_BACKING_STORE_PRIV;
	3869	task_unlock(kernel_task);
	3870
	3871	vm_page_free_count_init = vm_page_free_count;
	3872
	3873	/*
	3874	* even if we've already called vm_page_free_reserve
	3875	* call it again here to insure that the targets are
	3876	* accurately calculated (it uses vm_page_free_count_init)
	3877	* calling it with an arg of 0 will not change the reserve
	3878	* but will re-calculate free_min and free_target
	3879	*/
	3880	if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
	3881	vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
	3882	} else
	3883	vm_page_free_reserve(0);
	3884
	3885
	3886	queue_init(&vm_pageout_queue_external.pgo_pending);
	3887	vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
	3888	vm_pageout_queue_external.pgo_laundry = 0;
	3889	vm_pageout_queue_external.pgo_idle = FALSE;
	3890	vm_pageout_queue_external.pgo_busy = FALSE;
	3891	vm_pageout_queue_external.pgo_throttled = FALSE;
	3892	vm_pageout_queue_external.pgo_draining = FALSE;
	3893	vm_pageout_queue_external.pgo_lowpriority = FALSE;
	3894	vm_pageout_queue_external.pgo_tid = -1;
	3895	vm_pageout_queue_external.pgo_inited = FALSE;
	3896
	3897
	3898	queue_init(&vm_pageout_queue_internal.pgo_pending);
	3899	vm_pageout_queue_internal.pgo_maxlaundry = 0;
	3900	vm_pageout_queue_internal.pgo_laundry = 0;
	3901	vm_pageout_queue_internal.pgo_idle = FALSE;
	3902	vm_pageout_queue_internal.pgo_busy = FALSE;
	3903	vm_pageout_queue_internal.pgo_throttled = FALSE;
	3904	vm_pageout_queue_internal.pgo_draining = FALSE;
	3905	vm_pageout_queue_internal.pgo_lowpriority = FALSE;
	3906	vm_pageout_queue_internal.pgo_tid = -1;
	3907	vm_pageout_queue_internal.pgo_inited = FALSE;
	3908
	3909	/* internal pageout thread started when default pager registered first time */
	3910	/* external pageout and garbage collection threads started here */
	3911
	3912	result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
	3913	BASEPRI_PREEMPT - 1,
	3914	&vm_pageout_external_iothread);
	3915	if (result != KERN_SUCCESS)
	3916	panic("vm_pageout_iothread_external: create failed");
	3917
	3918	thread_deallocate(vm_pageout_external_iothread);
	3919
	3920	result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
	3921	BASEPRI_DEFAULT,
	3922	&thread);
	3923	if (result != KERN_SUCCESS)
	3924	panic("vm_pageout_garbage_collect: create failed");
	3925
	3926	thread_deallocate(thread);
	3927
	3928	#if VM_PRESSURE_EVENTS
	3929	result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL,
	3930	BASEPRI_DEFAULT,
	3931	&thread);
	3932
	3933	if (result != KERN_SUCCESS)
	3934	panic("vm_pressure_thread: create failed");
	3935
	3936	thread_deallocate(thread);
	3937	#endif
	3938
	3939	vm_object_reaper_init();
	3940
	3941	if (COMPRESSED_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
	3942	vm_compressor_pager_init();
	3943
	3944	#if VM_PRESSURE_EVENTS
	3945	vm_pressure_events_enabled = TRUE;
	3946	#endif /* VM_PRESSURE_EVENTS */
	3947
	3948	#if CONFIG_PHANTOM_CACHE
	3949	vm_phantom_cache_init();
	3950	#endif
	3951	#if VM_PAGE_BUCKETS_CHECK
	3952	#if VM_PAGE_FAKE_BUCKETS
	3953	printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
	3954	(uint64_t) vm_page_fake_buckets_start,
	3955	(uint64_t) vm_page_fake_buckets_end);
	3956	pmap_protect(kernel_pmap,
	3957	vm_page_fake_buckets_start,
	3958	vm_page_fake_buckets_end,
	3959	VM_PROT_READ);
	3960	// (char ) vm_page_fake_buckets_start = 'x'; /* panic! */
	3961	#endif /* VM_PAGE_FAKE_BUCKETS */
	3962	#endif /* VM_PAGE_BUCKETS_CHECK */
	3963
	3964	#if VM_OBJECT_TRACKING
	3965	vm_object_tracking_init();
	3966	#endif /* VM_OBJECT_TRACKING */
	3967
	3968
	3969	#if FBDP_TEST_COLLAPSE_COMPRESSOR
	3970	vm_object_size_t backing_size, top_size;
	3971	vm_object_t backing_object, top_object;
	3972	vm_map_offset_t backing_offset, top_offset;
	3973	unsigned char backing_address, top_address;
	3974	kern_return_t kr;
	3975
	3976	printf("FBDP_TEST_COLLAPSE_COMPRESSOR:\n");
	3977
	3978	/* create backing object */
	3979	backing_size = 15 * PAGE_SIZE;
	3980	backing_object = vm_object_allocate(backing_size);
	3981	assert(backing_object != VM_OBJECT_NULL);
	3982	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n",
	3983	backing_object);
	3984	/* map backing object */
	3985	backing_offset = 0;
	3986	kr = vm_map_enter(kernel_map, &backing_offset, backing_size, 0,
	3987	VM_FLAGS_ANYWHERE, backing_object, 0, FALSE,
	3988	VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
	3989	assert(kr == KERN_SUCCESS);
	3990	backing_address = (unsigned char *) backing_offset;
	3991	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
	3992	"mapped backing object %p at 0x%llx\n",
	3993	backing_object, (uint64_t) backing_offset);
	3994	/* populate with pages to be compressed in backing object */
	3995	backing_address[0x1*PAGE_SIZE] = 0xB1;
	3996	backing_address[0x4*PAGE_SIZE] = 0xB4;
	3997	backing_address[0x7*PAGE_SIZE] = 0xB7;
	3998	backing_address[0xa*PAGE_SIZE] = 0xBA;
	3999	backing_address[0xd*PAGE_SIZE] = 0xBD;
	4000	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
	4001	"populated pages to be compressed in "
	4002	"backing_object %p\n", backing_object);
	4003	/* compress backing object */
	4004	vm_object_pageout(backing_object);
	4005	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n",
	4006	backing_object);
	4007	/* wait for all the pages to be gone */
	4008	while ((volatile int )&backing_object->resident_page_count != 0)
	4009	IODelay(10);
	4010	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n",
	4011	backing_object);
	4012	/* populate with pages to be resident in backing object */
	4013	backing_address[0x0*PAGE_SIZE] = 0xB0;
	4014	backing_address[0x3*PAGE_SIZE] = 0xB3;
	4015	backing_address[0x6*PAGE_SIZE] = 0xB6;
	4016	backing_address[0x9*PAGE_SIZE] = 0xB9;
	4017	backing_address[0xc*PAGE_SIZE] = 0xBC;
	4018	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
	4019	"populated pages to be resident in "
	4020	"backing_object %p\n", backing_object);
	4021	/* leave the other pages absent */
	4022	/* mess with the paging_offset of the backing_object */
	4023	assert(backing_object->paging_offset == 0);
	4024	backing_object->paging_offset = 0x3000;
	4025
	4026	/* create top object */
	4027	top_size = 9 * PAGE_SIZE;
	4028	top_object = vm_object_allocate(top_size);
	4029	assert(top_object != VM_OBJECT_NULL);
	4030	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created top object %p\n",
	4031	top_object);
	4032	/* map top object */
	4033	top_offset = 0;
	4034	kr = vm_map_enter(kernel_map, &top_offset, top_size, 0,
	4035	VM_FLAGS_ANYWHERE, top_object, 0, FALSE,
	4036	VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
	4037	assert(kr == KERN_SUCCESS);
	4038	top_address = (unsigned char *) top_offset;
	4039	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
	4040	"mapped top object %p at 0x%llx\n",
	4041	top_object, (uint64_t) top_offset);
	4042	/* populate with pages to be compressed in top object */
	4043	top_address[0x3*PAGE_SIZE] = 0xA3;
	4044	top_address[0x4*PAGE_SIZE] = 0xA4;
	4045	top_address[0x5*PAGE_SIZE] = 0xA5;
	4046	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
	4047	"populated pages to be compressed in "
	4048	"top_object %p\n", top_object);
	4049	/* compress top object */
	4050	vm_object_pageout(top_object);
	4051	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n",
	4052	top_object);
	4053	/* wait for all the pages to be gone */
	4054	while (top_object->resident_page_count != 0);
	4055	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n",
	4056	top_object);
	4057	/* populate with pages to be resident in top object */
	4058	top_address[0x0*PAGE_SIZE] = 0xA0;
	4059	top_address[0x1*PAGE_SIZE] = 0xA1;
	4060	top_address[0x2*PAGE_SIZE] = 0xA2;
	4061	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
	4062	"populated pages to be resident in "
	4063	"top_object %p\n", top_object);
	4064	/* leave the other pages absent */
	4065
	4066	/* link the 2 objects */
	4067	vm_object_reference(backing_object);
	4068	top_object->shadow = backing_object;
	4069	top_object->vo_shadow_offset = 0x3000;
	4070	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n",
	4071	top_object, backing_object);
	4072
	4073	/* unmap backing object */
	4074	vm_map_remove(kernel_map,
	4075	backing_offset,
	4076	backing_offset + backing_size,
	4077	0);
	4078	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
	4079	"unmapped backing_object %p [0x%llx:0x%llx]\n",
	4080	backing_object,
	4081	(uint64_t) backing_offset,
	4082	(uint64_t) (backing_offset + backing_size));
	4083
	4084	/* collapse */
	4085	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object);
	4086	vm_object_lock(top_object);
	4087	vm_object_collapse(top_object, 0, FALSE);
	4088	vm_object_unlock(top_object);
	4089	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object);
	4090
	4091	/* did it work? */
	4092	if (top_object->shadow != VM_OBJECT_NULL) {
	4093	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: not collapsed\n");
	4094	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
	4095	if (vm_object_collapse_compressor_allowed) {
	4096	panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
	4097	}
	4098	} else {
	4099	/* check the contents of the mapping */
	4100	unsigned char expect[9] =
	4101	{ 0xA0, 0xA1, 0xA2, /* resident in top */
	4102	0xA3, 0xA4, 0xA5, /* compressed in top */
	4103	0xB9, /* resident in backing + shadow_offset */
	4104	0xBD, /* compressed in backing + shadow_offset + paging_offset */
	4105	0x00 }; /* absent in both */
	4106	unsigned char actual[9];
	4107	unsigned int i, errors;
	4108
	4109	errors = 0;
	4110	for (i = 0; i < sizeof (actual); i++) {
	4111	actual[i] = (unsigned char) top_address[i*PAGE_SIZE];
	4112	if (actual[i] != expect[i]) {
	4113	errors++;
	4114	}
	4115	}
	4116	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
	4117	"actual [%x %x %x %x %x %x %x %x %x] "
	4118	"expect [%x %x %x %x %x %x %x %x %x] "
	4119	"%d errors\n",
	4120	actual[0], actual[1], actual[2], actual[3],
	4121	actual[4], actual[5], actual[6], actual[7],
	4122	actual[8],
	4123	expect[0], expect[1], expect[2], expect[3],
	4124	expect[4], expect[5], expect[6], expect[7],
	4125	expect[8],
	4126	errors);
	4127	if (errors) {
	4128	panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
	4129	} else {
	4130	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: PASS\n");
	4131	}
	4132	}
	4133	#endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */
	4134
	4135	#if FBDP_TEST_WIRE_AND_EXTRACT
	4136	ledger_t ledger;
	4137	vm_map_t user_map, wire_map;
	4138	mach_vm_address_t user_addr, wire_addr;
	4139	mach_vm_size_t user_size, wire_size;
	4140	mach_vm_offset_t cur_offset;
	4141	vm_prot_t cur_prot, max_prot;
	4142	ppnum_t user_ppnum, wire_ppnum;
	4143	kern_return_t kr;
	4144
	4145	ledger = ledger_instantiate(task_ledger_template,
	4146	LEDGER_CREATE_ACTIVE_ENTRIES);
	4147	user_map = vm_map_create(pmap_create(ledger, 0, TRUE),
	4148	0x100000000ULL,
	4149	0x200000000ULL,
	4150	TRUE);
	4151	wire_map = vm_map_create(NULL,
	4152	0x100000000ULL,
	4153	0x200000000ULL,
	4154	TRUE);
	4155	user_addr = 0;
	4156	user_size = 0x10000;
	4157	kr = mach_vm_allocate(user_map,
	4158	&user_addr,
	4159	user_size,
	4160	VM_FLAGS_ANYWHERE);
	4161	assert(kr == KERN_SUCCESS);
	4162	wire_addr = 0;
	4163	wire_size = user_size;
	4164	kr = mach_vm_remap(wire_map,
	4165	&wire_addr,
	4166	wire_size,
	4167	0,
	4168	VM_FLAGS_ANYWHERE,
	4169	user_map,
	4170	user_addr,
	4171	FALSE,
	4172	&cur_prot,
	4173	&max_prot,
	4174	VM_INHERIT_NONE);
	4175	assert(kr == KERN_SUCCESS);
	4176	for (cur_offset = 0;
	4177	cur_offset < wire_size;
	4178	cur_offset += PAGE_SIZE) {
	4179	kr = vm_map_wire_and_extract(wire_map,
	4180	wire_addr + cur_offset,
	4181	VM_PROT_DEFAULT,
	4182	TRUE,
	4183	&wire_ppnum);
	4184	assert(kr == KERN_SUCCESS);
	4185	user_ppnum = vm_map_get_phys_page(user_map,
	4186	user_addr + cur_offset);
	4187	printf("FBDP_TEST_WIRE_AND_EXTRACT: kr=0x%x "
	4188	"user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
	4189	kr,
	4190	user_map, user_addr + cur_offset, user_ppnum,
	4191	wire_map, wire_addr + cur_offset, wire_ppnum);
	4192	if (kr != KERN_SUCCESS \|\|
	4193	wire_ppnum == 0 \|\|
	4194	wire_ppnum != user_ppnum) {
	4195	panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n");
	4196	}
	4197	}
	4198	cur_offset -= PAGE_SIZE;
	4199	kr = vm_map_wire_and_extract(wire_map,
	4200	wire_addr + cur_offset,
	4201	VM_PROT_DEFAULT,
	4202	TRUE,
	4203	&wire_ppnum);
	4204	assert(kr == KERN_SUCCESS);
	4205	printf("FBDP_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x "
	4206	"user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
	4207	kr,
	4208	user_map, user_addr + cur_offset, user_ppnum,
	4209	wire_map, wire_addr + cur_offset, wire_ppnum);
	4210	if (kr != KERN_SUCCESS \|\|
	4211	wire_ppnum == 0 \|\|
	4212	wire_ppnum != user_ppnum) {
	4213	panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n");
	4214	}
	4215
	4216	printf("FBDP_TEST_WIRE_AND_EXTRACT: PASS\n");
	4217	#endif /* FBDP_TEST_WIRE_AND_EXTRACT */
	4218
	4219
	4220	vm_pageout_continue();
	4221
	4222	/*
	4223	* Unreached code!
	4224	*
	4225	* The vm_pageout_continue() call above never returns, so the code below is never
	4226	* executed. We take advantage of this to declare several DTrace VM related probe
	4227	* points that our kernel doesn't have an analog for. These are probe points that
	4228	* exist in Solaris and are in the DTrace documentation, so people may have written
	4229	* scripts that use them. Declaring the probe points here means their scripts will
	4230	* compile and execute which we want for portability of the scripts, but since this
	4231	* section of code is never reached, the probe points will simply never fire. Yes,
	4232	* this is basically a hack. The problem is the DTrace probe points were chosen with
	4233	* Solaris specific VM events in mind, not portability to different VM implementations.
	4234	*/
	4235
	4236	DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
	4237	DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
	4238	DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
	4239	DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
	4240	DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
	4241	DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
	4242	DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
	4243	/NOTREACHED/
	4244	}
	4245
	4246
	4247
	4248	#define MAX_COMRPESSOR_THREAD_COUNT 8
	4249
	4250	struct cq ciq[MAX_COMRPESSOR_THREAD_COUNT];
	4251
	4252	int vm_compressor_thread_count = 2;
	4253
	4254	kern_return_t
	4255	vm_pageout_internal_start(void)
	4256	{
	4257	kern_return_t result;
	4258	int i;
	4259	host_basic_info_data_t hinfo;
	4260
	4261	if (COMPRESSED_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
	4262	mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
	4263	#define BSD_HOST 1
	4264	host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
	4265
	4266	assert(hinfo.max_cpus > 0);
	4267
	4268	if (vm_compressor_thread_count >= hinfo.max_cpus)
	4269	vm_compressor_thread_count = hinfo.max_cpus - 1;
	4270	if (vm_compressor_thread_count <= 0)
	4271	vm_compressor_thread_count = 1;
	4272	else if (vm_compressor_thread_count > MAX_COMRPESSOR_THREAD_COUNT)
	4273	vm_compressor_thread_count = MAX_COMRPESSOR_THREAD_COUNT;
	4274
	4275	vm_pageout_queue_internal.pgo_maxlaundry = (vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
	4276	} else {
	4277	vm_compressor_thread_count = 1;
	4278	vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
	4279	}
	4280
	4281	for (i = 0; i < vm_compressor_thread_count; i++) {
	4282
	4283	result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i], BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread);
	4284	if (result == KERN_SUCCESS)
	4285	thread_deallocate(vm_pageout_internal_iothread);
	4286	else
	4287	break;
	4288	}
	4289	return result;
	4290	}
	4291
	4292	#if CONFIG_IOSCHED
	4293	/*
	4294	* To support I/O Expedite for compressed files we mark the upls with special flags.
	4295	* The way decmpfs works is that we create a big upl which marks all the pages needed to
	4296	* represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
	4297	* then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
	4298	* being held in the big original UPL. We mark each of these smaller UPLs with the flag
	4299	* UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
	4300	* decmp_io_upl field (in the upl structure). This link is protected in the forward direction
	4301	* by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
	4302	* unless the real I/O upl is being destroyed).
	4303	*/
	4304
	4305
	4306	static void
	4307	upl_set_decmp_info(upl_t upl, upl_t src_upl)
	4308	{
	4309	assert((src_upl->flags & UPL_DECMP_REQ) != 0);
	4310
	4311	upl_lock(src_upl);
	4312	if (src_upl->decmp_io_upl) {
	4313	/*
	4314	* If there is already an alive real I/O UPL, ignore this new UPL.
	4315	* This case should rarely happen and even if it does, it just means
	4316	* that we might issue a spurious expedite which the driver is expected
	4317	* to handle.
	4318	*/
	4319	upl_unlock(src_upl);
	4320	return;
	4321	}
	4322	src_upl->decmp_io_upl = (void *)upl;
	4323	src_upl->ref_count++;
	4324
	4325	upl->flags \|= UPL_DECMP_REAL_IO;
	4326	upl->decmp_io_upl = (void *)src_upl;
	4327	upl_unlock(src_upl);
	4328	}
	4329	#endif /* CONFIG_IOSCHED */
	4330
	4331	#if UPL_DEBUG
	4332	int upl_debug_enabled = 1;
	4333	#else
	4334	int upl_debug_enabled = 0;
	4335	#endif
	4336
	4337	static upl_t
	4338	upl_create(int type, int flags, upl_size_t size)
	4339	{
	4340	upl_t upl;
	4341	vm_size_t page_field_size = 0;
	4342	int upl_flags = 0;
	4343	vm_size_t upl_size = sizeof(struct upl);
	4344
	4345	size = round_page_32(size);
	4346
	4347	if (type & UPL_CREATE_LITE) {
	4348	page_field_size = (atop(size) + 7) >> 3;
	4349	page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
	4350
	4351	upl_flags \|= UPL_LITE;
	4352	}
	4353	if (type & UPL_CREATE_INTERNAL) {
	4354	upl_size += sizeof(struct upl_page_info) * atop(size);
	4355
	4356	upl_flags \|= UPL_INTERNAL;
	4357	}
	4358	upl = (upl_t)kalloc(upl_size + page_field_size);
	4359
	4360	if (page_field_size)
	4361	bzero((char *)upl + upl_size, page_field_size);
	4362
	4363	upl->flags = upl_flags \| flags;
	4364	upl->src_object = NULL;
	4365	upl->kaddr = (vm_offset_t)0;
	4366	upl->size = 0;
	4367	upl->map_object = NULL;
	4368	upl->ref_count = 1;
	4369	upl->ext_ref_count = 0;
	4370	upl->highest_page = 0;
	4371	upl_lock_init(upl);
	4372	upl->vector_upl = NULL;
	4373	#if CONFIG_IOSCHED
	4374	if (type & UPL_CREATE_IO_TRACKING) {
	4375	upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
	4376	}
	4377
	4378	upl->upl_reprio_info = 0;
	4379	upl->decmp_io_upl = 0;
	4380	if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) {
	4381	/* Only support expedite on internal UPLs */
	4382	thread_t curthread = current_thread();
	4383	upl->upl_reprio_info = (uint64_t )kalloc(sizeof(uint64_t) atop(size));
	4384	bzero(upl->upl_reprio_info, (sizeof(uint64_t) * atop(size)));
	4385	upl->flags \|= UPL_EXPEDITE_SUPPORTED;
	4386	if (curthread->decmp_upl != NULL)
	4387	upl_set_decmp_info(upl, curthread->decmp_upl);
	4388	}
	4389	#endif
	4390	#if CONFIG_IOSCHED \|\| UPL_DEBUG
	4391	if ((type & UPL_CREATE_IO_TRACKING) \|\| upl_debug_enabled) {
	4392	upl->upl_creator = current_thread();
	4393	upl->uplq.next = 0;
	4394	upl->uplq.prev = 0;
	4395	upl->flags \|= UPL_TRACKED_BY_OBJECT;
	4396	}
	4397	#endif
	4398
	4399	#if UPL_DEBUG
	4400	upl->ubc_alias1 = 0;
	4401	upl->ubc_alias2 = 0;
	4402
	4403	upl->upl_state = 0;
	4404	upl->upl_commit_index = 0;
	4405	bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records));
	4406
	4407	(void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES);
	4408	#endif /* UPL_DEBUG */
	4409
	4410	return(upl);
	4411	}
	4412
	4413	static void
	4414	upl_destroy(upl_t upl)
	4415	{
	4416	int page_field_size; /* bit field in word size buf */
	4417	int size;
	4418
	4419	if (upl->ext_ref_count) {
	4420	panic("upl(%p) ext_ref_count", upl);
	4421	}
	4422
	4423	#if CONFIG_IOSCHED
	4424	if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) {
	4425	upl_t src_upl;
	4426	src_upl = upl->decmp_io_upl;
	4427	assert((src_upl->flags & UPL_DECMP_REQ) != 0);
	4428	upl_lock(src_upl);
	4429	src_upl->decmp_io_upl = NULL;
	4430	upl_unlock(src_upl);
	4431	upl_deallocate(src_upl);
	4432	}
	4433	#endif /* CONFIG_IOSCHED */
	4434
	4435	#if CONFIG_IOSCHED \|\| UPL_DEBUG
	4436	if ((upl->flags & UPL_TRACKED_BY_OBJECT) && !(upl->flags & UPL_VECTOR)) {
	4437	vm_object_t object;
	4438
	4439	if (upl->flags & UPL_SHADOWED) {
	4440	object = upl->map_object->shadow;
	4441	} else {
	4442	object = upl->map_object;
	4443	}
	4444
	4445	vm_object_lock(object);
	4446	queue_remove(&object->uplq, upl, upl_t, uplq);
	4447	vm_object_activity_end(object);
	4448	vm_object_collapse(object, 0, TRUE);
	4449	vm_object_unlock(object);
	4450	}
	4451	#endif
	4452	/*
	4453	* drop a reference on the map_object whether or
	4454	* not a pageout object is inserted
	4455	*/
	4456	if (upl->flags & UPL_SHADOWED)
	4457	vm_object_deallocate(upl->map_object);
	4458
	4459	if (upl->flags & UPL_DEVICE_MEMORY)
	4460	size = PAGE_SIZE;
	4461	else
	4462	size = upl->size;
	4463	page_field_size = 0;
	4464
	4465	if (upl->flags & UPL_LITE) {
	4466	page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
	4467	page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
	4468	}
	4469	upl_lock_destroy(upl);
	4470	upl->vector_upl = (vector_upl_t) 0xfeedbeef;
	4471
	4472	#if CONFIG_IOSCHED
	4473	if (upl->flags & UPL_EXPEDITE_SUPPORTED)
	4474	kfree(upl->upl_reprio_info, sizeof(uint64_t) * (size/PAGE_SIZE));
	4475	#endif
	4476
	4477	if (upl->flags & UPL_INTERNAL) {
	4478	kfree(upl,
	4479	sizeof(struct upl) +
	4480	(sizeof(struct upl_page_info) * (size/PAGE_SIZE))
	4481	+ page_field_size);
	4482	} else {
	4483	kfree(upl, sizeof(struct upl) + page_field_size);
	4484	}
	4485	}
	4486
	4487	void
	4488	upl_deallocate(upl_t upl)
	4489	{
	4490	upl_lock(upl);
	4491	if (--upl->ref_count == 0) {
	4492	if(vector_upl_is_valid(upl))
	4493	vector_upl_deallocate(upl);
	4494	upl_unlock(upl);
	4495	upl_destroy(upl);
	4496	}
	4497	else
	4498	upl_unlock(upl);
	4499	}
	4500
	4501	#if CONFIG_IOSCHED
	4502	void
	4503	upl_mark_decmp(upl_t upl)
	4504	{
	4505	if (upl->flags & UPL_TRACKED_BY_OBJECT) {
	4506	upl->flags \|= UPL_DECMP_REQ;
	4507	upl->upl_creator->decmp_upl = (void *)upl;
	4508	}
	4509	}
	4510
	4511	void
	4512	upl_unmark_decmp(upl_t upl)
	4513	{
	4514	if(upl && (upl->flags & UPL_DECMP_REQ)) {
	4515	upl->upl_creator->decmp_upl = NULL;
	4516	}
	4517	}
	4518
	4519	#endif /* CONFIG_IOSCHED */
	4520
	4521	#define VM_PAGE_Q_BACKING_UP(q) \
	4522	((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
	4523
	4524	boolean_t must_throttle_writes(void);
	4525
	4526	boolean_t
	4527	must_throttle_writes()
	4528	{
	4529	if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) &&
	4530	vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10)
	4531	return (TRUE);
	4532
	4533	return (FALSE);
	4534	}
	4535
	4536
	4537	#if DEVELOPMENT \|\| DEBUG
	4538	//
	4539	* Statistics about UPL enforcement of copy-on-write obligations.
	4540	*/
	4541	unsigned long upl_cow = 0;
	4542	unsigned long upl_cow_again = 0;
	4543	unsigned long upl_cow_pages = 0;
	4544	unsigned long upl_cow_again_pages = 0;
	4545
	4546	unsigned long iopl_cow = 0;
	4547	unsigned long iopl_cow_pages = 0;
	4548	#endif
	4549
	4550	/*
	4551	* Routine: vm_object_upl_request
	4552	* Purpose:
	4553	* Cause the population of a portion of a vm_object.
	4554	* Depending on the nature of the request, the pages
	4555	* returned may be contain valid data or be uninitialized.
	4556	* A page list structure, listing the physical pages
	4557	* will be returned upon request.
	4558	* This function is called by the file system or any other
	4559	* supplier of backing store to a pager.
	4560	* IMPORTANT NOTE: The caller must still respect the relationship
	4561	* between the vm_object and its backing memory object. The
	4562	* caller MUST NOT substitute changes in the backing file
	4563	* without first doing a memory_object_lock_request on the
	4564	* target range unless it is know that the pages are not
	4565	* shared with another entity at the pager level.
	4566	* Copy_in_to:
	4567	* if a page list structure is present
	4568	* return the mapped physical pages, where a
	4569	* page is not present, return a non-initialized
	4570	* one. If the no_sync bit is turned on, don't
	4571	* call the pager unlock to synchronize with other
	4572	* possible copies of the page. Leave pages busy
	4573	* in the original object, if a page list structure
	4574	* was specified. When a commit of the page list
	4575	* pages is done, the dirty bit will be set for each one.
	4576	* Copy_out_from:
	4577	* If a page list structure is present, return
	4578	* all mapped pages. Where a page does not exist
	4579	* map a zero filled one. Leave pages busy in
	4580	* the original object. If a page list structure
	4581	* is not specified, this call is a no-op.
	4582	*
	4583	* Note: access of default pager objects has a rather interesting
	4584	* twist. The caller of this routine, presumably the file system
	4585	* page cache handling code, will never actually make a request
	4586	* against a default pager backed object. Only the default
	4587	* pager will make requests on backing store related vm_objects
	4588	* In this way the default pager can maintain the relationship
	4589	* between backing store files (abstract memory objects) and
	4590	* the vm_objects (cache objects), they support.
	4591	*
	4592	*/
	4593
	4594	__private_extern__ kern_return_t
	4595	vm_object_upl_request(
	4596	vm_object_t object,
	4597	vm_object_offset_t offset,
	4598	upl_size_t size,
	4599	upl_t *upl_ptr,
	4600	upl_page_info_array_t user_page_list,
	4601	unsigned int *page_list_count,
	4602	int cntrl_flags)
	4603	{
	4604	vm_page_t dst_page = VM_PAGE_NULL;
	4605	vm_object_offset_t dst_offset;
	4606	upl_size_t xfer_size;
	4607	unsigned int size_in_pages;
	4608	boolean_t dirty;
	4609	boolean_t hw_dirty;
	4610	upl_t upl = NULL;
	4611	unsigned int entry;
	4612	#if MACH_CLUSTER_STATS
	4613	boolean_t encountered_lrp = FALSE;
	4614	#endif
	4615	vm_page_t alias_page = NULL;
	4616	int refmod_state = 0;
	4617	wpl_array_t lite_list = NULL;
	4618	vm_object_t last_copy_object;
	4619	struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
	4620	struct vm_page_delayed_work *dwp;
	4621	int dw_count;
	4622	int dw_limit;
	4623	int io_tracking_flag = 0;
	4624
	4625	if (cntrl_flags & ~UPL_VALID_FLAGS) {
	4626	/*
	4627	* For forward compatibility's sake,
	4628	* reject any unknown flag.
	4629	*/
	4630	return KERN_INVALID_VALUE;
	4631	}
	4632	if ( (!object->internal) && (object->paging_offset != 0) )
	4633	panic("vm_object_upl_request: external object with non-zero paging offset\n");
	4634	if (object->phys_contiguous)
	4635	panic("vm_object_upl_request: contiguous object specified\n");
	4636
	4637
	4638	if (size > MAX_UPL_SIZE_BYTES)
	4639	size = MAX_UPL_SIZE_BYTES;
	4640
	4641	if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
	4642	*page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
	4643
	4644	#if CONFIG_IOSCHED \|\| UPL_DEBUG
	4645	if (object->io_tracking \|\| upl_debug_enabled)
	4646	io_tracking_flag \|= UPL_CREATE_IO_TRACKING;
	4647	#endif
	4648	#if CONFIG_IOSCHED
	4649	if (object->io_tracking)
	4650	io_tracking_flag \|= UPL_CREATE_EXPEDITE_SUP;
	4651	#endif
	4652
	4653	if (cntrl_flags & UPL_SET_INTERNAL) {
	4654	if (cntrl_flags & UPL_SET_LITE) {
	4655
	4656	upl = upl_create(UPL_CREATE_INTERNAL \| UPL_CREATE_LITE \| io_tracking_flag, 0, size);
	4657
	4658	user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
	4659	lite_list = (wpl_array_t)
	4660	(((uintptr_t)user_page_list) +
	4661	((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
	4662	if (size == 0) {
	4663	user_page_list = NULL;
	4664	lite_list = NULL;
	4665	}
	4666	} else {
	4667	upl = upl_create(UPL_CREATE_INTERNAL \| io_tracking_flag, 0, size);
	4668
	4669	user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
	4670	if (size == 0) {
	4671	user_page_list = NULL;
	4672	}
	4673	}
	4674	} else {
	4675	if (cntrl_flags & UPL_SET_LITE) {
	4676
	4677	upl = upl_create(UPL_CREATE_EXTERNAL \| UPL_CREATE_LITE \| io_tracking_flag, 0, size);
	4678
	4679	lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
	4680	if (size == 0) {
	4681	lite_list = NULL;
	4682	}
	4683	} else {
	4684	upl = upl_create(UPL_CREATE_EXTERNAL \| io_tracking_flag, 0, size);
	4685	}
	4686	}
	4687	*upl_ptr = upl;
	4688
	4689	if (user_page_list)
	4690	user_page_list[0].device = FALSE;
	4691
	4692	if (cntrl_flags & UPL_SET_LITE) {
	4693	upl->map_object = object;
	4694	} else {
	4695	upl->map_object = vm_object_allocate(size);
	4696	/*
	4697	* No neeed to lock the new object: nobody else knows
	4698	* about it yet, so it's all ours so far.
	4699	*/
	4700	upl->map_object->shadow = object;
	4701	upl->map_object->pageout = TRUE;
	4702	upl->map_object->can_persist = FALSE;
	4703	upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
	4704	upl->map_object->vo_shadow_offset = offset;
	4705	upl->map_object->wimg_bits = object->wimg_bits;
	4706
	4707	VM_PAGE_GRAB_FICTITIOUS(alias_page);
	4708
	4709	upl->flags \|= UPL_SHADOWED;
	4710	}
	4711	/*
	4712	* ENCRYPTED SWAP:
	4713	* Just mark the UPL as "encrypted" here.
	4714	* We'll actually encrypt the pages later,
	4715	* in upl_encrypt(), when the caller has
	4716	* selected which pages need to go to swap.
	4717	*/
	4718	if (cntrl_flags & UPL_ENCRYPT)
	4719	upl->flags \|= UPL_ENCRYPTED;
	4720
	4721	if (cntrl_flags & UPL_FOR_PAGEOUT)
	4722	upl->flags \|= UPL_PAGEOUT;
	4723
	4724	vm_object_lock(object);
	4725	vm_object_activity_begin(object);
	4726
	4727	/*
	4728	* we can lock in the paging_offset once paging_in_progress is set
	4729	*/
	4730	upl->size = size;
	4731	upl->offset = offset + object->paging_offset;
	4732
	4733	#if CONFIG_IOSCHED \|\| UPL_DEBUG
	4734	if (object->io_tracking \|\| upl_debug_enabled) {
	4735	vm_object_activity_begin(object);
	4736	queue_enter(&object->uplq, upl, upl_t, uplq);
	4737	}
	4738	#endif
	4739	if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
	4740	/*
	4741	* Honor copy-on-write obligations
	4742	*
	4743	* The caller is gathering these pages and
	4744	* might modify their contents. We need to
	4745	* make sure that the copy object has its own
	4746	* private copies of these pages before we let
	4747	* the caller modify them.
	4748	*/
	4749	vm_object_update(object,
	4750	offset,
	4751	size,
	4752	NULL,
	4753	NULL,
	4754	FALSE, /* should_return */
	4755	MEMORY_OBJECT_COPY_SYNC,
	4756	VM_PROT_NO_CHANGE);
	4757	#if DEVELOPMENT \|\| DEBUG
	4758	upl_cow++;
	4759	upl_cow_pages += size >> PAGE_SHIFT;
	4760	#endif
	4761	}
	4762	/*
	4763	* remember which copy object we synchronized with
	4764	*/
	4765	last_copy_object = object->copy;
	4766	entry = 0;
	4767
	4768	xfer_size = size;
	4769	dst_offset = offset;
	4770	size_in_pages = size / PAGE_SIZE;
	4771
	4772	dwp = &dw_array[0];
	4773	dw_count = 0;
	4774	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
	4775
	4776	if (vm_page_free_count > (vm_page_free_target + size_in_pages) \|\|
	4777	object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT))
	4778	object->scan_collisions = 0;
	4779
	4780	if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
	4781	boolean_t isSSD = FALSE;
	4782
	4783	vnode_pager_get_isSSD(object->pager, &isSSD);
	4784	vm_object_unlock(object);
	4785
	4786	OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
	4787
	4788	if (isSSD == TRUE)
	4789	delay(1000 * size_in_pages);
	4790	else
	4791	delay(5000 * size_in_pages);
	4792	OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
	4793
	4794	vm_object_lock(object);
	4795	}
	4796
	4797	while (xfer_size) {
	4798
	4799	dwp->dw_mask = 0;
	4800
	4801	if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
	4802	vm_object_unlock(object);
	4803	VM_PAGE_GRAB_FICTITIOUS(alias_page);
	4804	vm_object_lock(object);
	4805	}
	4806	if (cntrl_flags & UPL_COPYOUT_FROM) {
	4807	upl->flags \|= UPL_PAGE_SYNC_DONE;
	4808
	4809	if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) \|\|
	4810	dst_page->fictitious \|\|
	4811	dst_page->absent \|\|
	4812	dst_page->error \|\|
	4813	dst_page->cleaning \|\|
	4814	(VM_PAGE_WIRED(dst_page))) {
	4815
	4816	if (user_page_list)
	4817	user_page_list[entry].phys_addr = 0;
	4818
	4819	goto try_next_page;
	4820	}
	4821	/*
	4822	* grab this up front...
	4823	* a high percentange of the time we're going to
	4824	* need the hardware modification state a bit later
	4825	* anyway... so we can eliminate an extra call into
	4826	* the pmap layer by grabbing it here and recording it
	4827	*/
	4828	if (dst_page->pmapped)
	4829	refmod_state = pmap_get_refmod(dst_page->phys_page);
	4830	else
	4831	refmod_state = 0;
	4832
	4833	if ( (refmod_state & VM_MEM_REFERENCED) && dst_page->inactive ) {
	4834	/*
	4835	* page is on inactive list and referenced...
	4836	* reactivate it now... this gets it out of the
	4837	* way of vm_pageout_scan which would have to
	4838	* reactivate it upon tripping over it
	4839	*/
	4840	dwp->dw_mask \|= DW_vm_page_activate;
	4841	}
	4842	if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
	4843	/*
	4844	* we're only asking for DIRTY pages to be returned
	4845	*/
	4846	if (dst_page->laundry \|\| !(cntrl_flags & UPL_FOR_PAGEOUT)) {
	4847	/*
	4848	* if we were the page stolen by vm_pageout_scan to be
	4849	* cleaned (as opposed to a buddy being clustered in
	4850	* or this request is not being driven by a PAGEOUT cluster
	4851	* then we only need to check for the page being dirty or
	4852	* precious to decide whether to return it
	4853	*/
	4854	if (dst_page->dirty \|\| dst_page->precious \|\| (refmod_state & VM_MEM_MODIFIED))
	4855	goto check_busy;
	4856	goto dont_return;
	4857	}
	4858	/*
	4859	* this is a request for a PAGEOUT cluster and this page
	4860	* is merely along for the ride as a 'buddy'... not only
	4861	* does it have to be dirty to be returned, but it also
	4862	* can't have been referenced recently...
	4863	*/
	4864	if ( (hibernate_cleaning_in_progress == TRUE \|\|
	4865	(!((refmod_state & VM_MEM_REFERENCED) \|\| dst_page->reference) \|\| dst_page->throttled)) &&
	4866	((refmod_state & VM_MEM_MODIFIED) \|\| dst_page->dirty \|\| dst_page->precious) ) {
	4867	goto check_busy;
	4868	}
	4869	dont_return:
	4870	/*
	4871	* if we reach here, we're not to return
	4872	* the page... go on to the next one
	4873	*/
	4874	if (dst_page->laundry == TRUE) {
	4875	/*
	4876	* if we get here, the page is not 'cleaning' (filtered out above).
	4877	* since it has been referenced, remove it from the laundry
	4878	* so we don't pay the cost of an I/O to clean a page
	4879	* we're just going to take back
	4880	*/
	4881	vm_page_lockspin_queues();
	4882
	4883	vm_pageout_steal_laundry(dst_page, TRUE);
	4884	vm_page_activate(dst_page);
	4885
	4886	vm_page_unlock_queues();
	4887	}
	4888	if (user_page_list)
	4889	user_page_list[entry].phys_addr = 0;
	4890
	4891	goto try_next_page;
	4892	}
	4893	check_busy:
	4894	if (dst_page->busy) {
	4895	if (cntrl_flags & UPL_NOBLOCK) {
	4896	if (user_page_list)
	4897	user_page_list[entry].phys_addr = 0;
	4898
	4899	goto try_next_page;
	4900	}
	4901	/*
	4902	* someone else is playing with the
	4903	* page. We will have to wait.
	4904	*/
	4905	PAGE_SLEEP(object, dst_page, THREAD_UNINT);
	4906
	4907	continue;
	4908	}
	4909	/*
	4910	* ENCRYPTED SWAP:
	4911	* The caller is gathering this page and might
	4912	* access its contents later on. Decrypt the
	4913	* page before adding it to the UPL, so that
	4914	* the caller never sees encrypted data.
	4915	*/
	4916	if (! (cntrl_flags & UPL_ENCRYPT) && dst_page->encrypted) {
	4917	int was_busy;
	4918
	4919	/*
	4920	* save the current state of busy
	4921	* mark page as busy while decrypt
	4922	* is in progress since it will drop
	4923	* the object lock...
	4924	*/
	4925	was_busy = dst_page->busy;
	4926	dst_page->busy = TRUE;
	4927
	4928	vm_page_decrypt(dst_page, 0);
	4929	vm_page_decrypt_for_upl_counter++;
	4930	/*
	4931	* restore to original busy state
	4932	*/
	4933	dst_page->busy = was_busy;
	4934	}
	4935	if (dst_page->pageout_queue == TRUE) {
	4936
	4937	vm_page_lockspin_queues();
	4938
	4939	if (dst_page->pageout_queue == TRUE) {
	4940	/*
	4941	* we've buddied up a page for a clustered pageout
	4942	* that has already been moved to the pageout
	4943	* queue by pageout_scan... we need to remove
	4944	* it from the queue and drop the laundry count
	4945	* on that queue
	4946	*/
	4947	vm_pageout_throttle_up(dst_page);
	4948	}
	4949	vm_page_unlock_queues();
	4950	}
	4951	#if MACH_CLUSTER_STATS
	4952	/*
	4953	* pageout statistics gathering. count
	4954	* all the pages we will page out that
	4955	* were not counted in the initial
	4956	* vm_pageout_scan work
	4957	*/
	4958	if (dst_page->pageout)
	4959	encountered_lrp = TRUE;
	4960	if ((dst_page->dirty \|\| (dst_page->object->internal && dst_page->precious))) {
	4961	if (encountered_lrp)
	4962	CLUSTER_STAT(pages_at_higher_offsets++;)
	4963	else
	4964	CLUSTER_STAT(pages_at_lower_offsets++;)
	4965	}
	4966	#endif
	4967	hw_dirty = refmod_state & VM_MEM_MODIFIED;
	4968	dirty = hw_dirty ? TRUE : dst_page->dirty;
	4969
	4970	if (dst_page->phys_page > upl->highest_page)
	4971	upl->highest_page = dst_page->phys_page;
	4972
	4973	if (cntrl_flags & UPL_SET_LITE) {
	4974	unsigned int pg_num;
	4975
	4976	pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
	4977	assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
	4978	lite_list[pg_num>>5] \|= 1 << (pg_num & 31);
	4979
	4980	if (hw_dirty)
	4981	pmap_clear_modify(dst_page->phys_page);
	4982
	4983	/*
	4984	* Mark original page as cleaning
	4985	* in place.
	4986	*/
	4987	dst_page->cleaning = TRUE;
	4988	dst_page->precious = FALSE;
	4989	} else {
	4990	/*
	4991	* use pageclean setup, it is more
	4992	* convenient even for the pageout
	4993	* cases here
	4994	*/
	4995	vm_object_lock(upl->map_object);
	4996	vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
	4997	vm_object_unlock(upl->map_object);
	4998
	4999	alias_page->absent = FALSE;
	5000	alias_page = NULL;
	5001	}
	5002	#if MACH_PAGEMAP
	5003	/*
	5004	* Record that this page has been
	5005	* written out
	5006	*/
	5007	vm_external_state_set(object->existence_map, dst_page->offset);
	5008	#endif /MACH_PAGEMAP/
	5009	if (dirty) {
	5010	SET_PAGE_DIRTY(dst_page, FALSE);
	5011	} else {
	5012	dst_page->dirty = FALSE;
	5013	}
	5014
	5015	if (!dirty)
	5016	dst_page->precious = TRUE;
	5017
	5018	if ( (cntrl_flags & UPL_ENCRYPT) ) {
	5019	/*
	5020	* ENCRYPTED SWAP:
	5021	* We want to deny access to the target page
	5022	* because its contents are about to be
	5023	* encrypted and the user would be very
	5024	* confused to see encrypted data instead
	5025	* of their data.
	5026	* We also set "encrypted_cleaning" to allow
	5027	* vm_pageout_scan() to demote that page
	5028	* from "adjacent/clean-in-place" to
	5029	* "target/clean-and-free" if it bumps into
	5030	* this page during its scanning while we're
	5031	* still processing this cluster.
	5032	*/
	5033	dst_page->busy = TRUE;
	5034	dst_page->encrypted_cleaning = TRUE;
	5035	}
	5036	if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
	5037	if ( !VM_PAGE_WIRED(dst_page))
	5038	dst_page->pageout = TRUE;
	5039	}
	5040	} else {
	5041	if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
	5042	/*
	5043	* Honor copy-on-write obligations
	5044	*
	5045	* The copy object has changed since we
	5046	* last synchronized for copy-on-write.
	5047	* Another copy object might have been
	5048	* inserted while we released the object's
	5049	* lock. Since someone could have seen the
	5050	* original contents of the remaining pages
	5051	* through that new object, we have to
	5052	* synchronize with it again for the remaining
	5053	* pages only. The previous pages are "busy"
	5054	* so they can not be seen through the new
	5055	* mapping. The new mapping will see our
	5056	* upcoming changes for those previous pages,
	5057	* but that's OK since they couldn't see what
	5058	* was there before. It's just a race anyway
	5059	* and there's no guarantee of consistency or
	5060	* atomicity. We just don't want new mappings
	5061	* to see both the before and after pages.
	5062	*/
	5063	if (object->copy != VM_OBJECT_NULL) {
	5064	vm_object_update(
	5065	object,
	5066	dst_offset,/* current offset */
	5067	xfer_size, /* remaining size */
	5068	NULL,
	5069	NULL,
	5070	FALSE, /* should_return */
	5071	MEMORY_OBJECT_COPY_SYNC,
	5072	VM_PROT_NO_CHANGE);
	5073
	5074	#if DEVELOPMENT \|\| DEBUG
	5075	upl_cow_again++;
	5076	upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
	5077	#endif
	5078	}
	5079	/*
	5080	* remember the copy object we synced with
	5081	*/
	5082	last_copy_object = object->copy;
	5083	}
	5084	dst_page = vm_page_lookup(object, dst_offset);
	5085
	5086	if (dst_page != VM_PAGE_NULL) {
	5087
	5088	if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
	5089	/*
	5090	* skip over pages already present in the cache
	5091	*/
	5092	if (user_page_list)
	5093	user_page_list[entry].phys_addr = 0;
	5094
	5095	goto try_next_page;
	5096	}
	5097	if (dst_page->fictitious) {
	5098	panic("need corner case for fictitious page");
	5099	}
	5100
	5101	if (dst_page->busy \|\| dst_page->cleaning) {
	5102	/*
	5103	* someone else is playing with the
	5104	* page. We will have to wait.
	5105	*/
	5106	PAGE_SLEEP(object, dst_page, THREAD_UNINT);
	5107
	5108	continue;
	5109	}
	5110	if (dst_page->laundry) {
	5111	dst_page->pageout = FALSE;
	5112
	5113	vm_pageout_steal_laundry(dst_page, FALSE);
	5114	}
	5115	} else {
	5116	if (object->private) {
	5117	/*
	5118	* This is a nasty wrinkle for users
	5119	* of upl who encounter device or
	5120	* private memory however, it is
	5121	* unavoidable, only a fault can
	5122	* resolve the actual backing
	5123	* physical page by asking the
	5124	* backing device.
	5125	*/
	5126	if (user_page_list)
	5127	user_page_list[entry].phys_addr = 0;
	5128
	5129	goto try_next_page;
	5130	}
	5131	if (object->scan_collisions) {
	5132	/*
	5133	* the pageout_scan thread is trying to steal
	5134	* pages from this object, but has run into our
	5135	* lock... grab 2 pages from the head of the object...
	5136	* the first is freed on behalf of pageout_scan, the
	5137	* 2nd is for our own use... we use vm_object_page_grab
	5138	* in both cases to avoid taking pages from the free
	5139	* list since we are under memory pressure and our
	5140	* lock on this object is getting in the way of
	5141	* relieving it
	5142	*/
	5143	dst_page = vm_object_page_grab(object);
	5144
	5145	if (dst_page != VM_PAGE_NULL)
	5146	vm_page_release(dst_page);
	5147
	5148	dst_page = vm_object_page_grab(object);
	5149	}
	5150	if (dst_page == VM_PAGE_NULL) {
	5151	/*
	5152	* need to allocate a page
	5153	*/
	5154	dst_page = vm_page_grab();
	5155	}
	5156	if (dst_page == VM_PAGE_NULL) {
	5157	if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT \| UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT \| UPL_NOBLOCK)) {
	5158	/*
	5159	* we don't want to stall waiting for pages to come onto the free list
	5160	* while we're already holding absent pages in this UPL
	5161	* the caller will deal with the empty slots
	5162	*/
	5163	if (user_page_list)
	5164	user_page_list[entry].phys_addr = 0;
	5165
	5166	goto try_next_page;
	5167	}
	5168	/*
	5169	* no pages available... wait
	5170	* then try again for the same
	5171	* offset...
	5172	*/
	5173	vm_object_unlock(object);
	5174
	5175	OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
	5176
	5177	VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
	5178
	5179	VM_PAGE_WAIT();
	5180	OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
	5181
	5182	VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
	5183
	5184	vm_object_lock(object);
	5185
	5186	continue;
	5187	}
	5188	vm_page_insert(dst_page, object, dst_offset);
	5189
	5190	dst_page->absent = TRUE;
	5191	dst_page->busy = FALSE;
	5192
	5193	if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
	5194	/*
	5195	* if UPL_RET_ONLY_ABSENT was specified,
	5196	* than we're definitely setting up a
	5197	* upl for a clustered read/pagein
	5198	* operation... mark the pages as clustered
	5199	* so upl_commit_range can put them on the
	5200	* speculative list
	5201	*/
	5202	dst_page->clustered = TRUE;
	5203
	5204	if ( !(cntrl_flags & UPL_FILE_IO))
	5205	VM_STAT_INCR(pageins);
	5206	}
	5207	}
	5208	/*
	5209	* ENCRYPTED SWAP:
	5210	*/
	5211	if (cntrl_flags & UPL_ENCRYPT) {
	5212	/*
	5213	* The page is going to be encrypted when we
	5214	* get it from the pager, so mark it so.
	5215	*/
	5216	dst_page->encrypted = TRUE;
	5217	} else {
	5218	/*
	5219	* Otherwise, the page will not contain
	5220	* encrypted data.
	5221	*/
	5222	dst_page->encrypted = FALSE;
	5223	}
	5224	dst_page->overwriting = TRUE;
	5225
	5226	if (dst_page->pmapped) {
	5227	if ( !(cntrl_flags & UPL_FILE_IO))
	5228	/*
	5229	* eliminate all mappings from the
	5230	* original object and its prodigy
	5231	*/
	5232	refmod_state = pmap_disconnect(dst_page->phys_page);
	5233	else
	5234	refmod_state = pmap_get_refmod(dst_page->phys_page);
	5235	} else
	5236	refmod_state = 0;
	5237
	5238	hw_dirty = refmod_state & VM_MEM_MODIFIED;
	5239	dirty = hw_dirty ? TRUE : dst_page->dirty;
	5240
	5241	if (cntrl_flags & UPL_SET_LITE) {
	5242	unsigned int pg_num;
	5243
	5244	pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
	5245	assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
	5246	lite_list[pg_num>>5] \|= 1 << (pg_num & 31);
	5247
	5248	if (hw_dirty)
	5249	pmap_clear_modify(dst_page->phys_page);
	5250
	5251	/*
	5252	* Mark original page as cleaning
	5253	* in place.
	5254	*/
	5255	dst_page->cleaning = TRUE;
	5256	dst_page->precious = FALSE;
	5257	} else {
	5258	/*
	5259	* use pageclean setup, it is more
	5260	* convenient even for the pageout
	5261	* cases here
	5262	*/
	5263	vm_object_lock(upl->map_object);
	5264	vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
	5265	vm_object_unlock(upl->map_object);
	5266
	5267	alias_page->absent = FALSE;
	5268	alias_page = NULL;
	5269	}
	5270
	5271	if (cntrl_flags & UPL_REQUEST_SET_DIRTY) {
	5272	upl->flags &= ~UPL_CLEAR_DIRTY;
	5273	upl->flags \|= UPL_SET_DIRTY;
	5274	dirty = TRUE;
	5275	upl->flags \|= UPL_SET_DIRTY;
	5276	} else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
	5277	/*
	5278	* clean in place for read implies
	5279	* that a write will be done on all
	5280	* the pages that are dirty before
	5281	* a upl commit is done. The caller
	5282	* is obligated to preserve the
	5283	* contents of all pages marked dirty
	5284	*/
	5285	upl->flags \|= UPL_CLEAR_DIRTY;
	5286	}
	5287	dst_page->dirty = dirty;
	5288
	5289	if (!dirty)
	5290	dst_page->precious = TRUE;
	5291
	5292	if ( !VM_PAGE_WIRED(dst_page)) {
	5293	/*
	5294	* deny access to the target page while
	5295	* it is being worked on
	5296	*/
	5297	dst_page->busy = TRUE;
	5298	} else
	5299	dwp->dw_mask \|= DW_vm_page_wire;
	5300
	5301	/*
	5302	* We might be about to satisfy a fault which has been
	5303	* requested. So no need for the "restart" bit.
	5304	*/
	5305	dst_page->restart = FALSE;
	5306	if (!dst_page->absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
	5307	/*
	5308	* expect the page to be used
	5309	*/
	5310	dwp->dw_mask \|= DW_set_reference;
	5311	}
	5312	if (cntrl_flags & UPL_PRECIOUS) {
	5313	if (dst_page->object->internal) {
	5314	SET_PAGE_DIRTY(dst_page, FALSE);
	5315	dst_page->precious = FALSE;
	5316	} else {
	5317	dst_page->precious = TRUE;
	5318	}
	5319	} else {
	5320	dst_page->precious = FALSE;
	5321	}
	5322	}
	5323	if (dst_page->busy)
	5324	upl->flags \|= UPL_HAS_BUSY;
	5325
	5326	if (dst_page->phys_page > upl->highest_page)
	5327	upl->highest_page = dst_page->phys_page;
	5328	if (user_page_list) {
	5329	user_page_list[entry].phys_addr = dst_page->phys_page;
	5330	user_page_list[entry].pageout = dst_page->pageout;
	5331	user_page_list[entry].absent = dst_page->absent;
	5332	user_page_list[entry].dirty = dst_page->dirty;
	5333	user_page_list[entry].precious = dst_page->precious;
	5334	user_page_list[entry].device = FALSE;
	5335	user_page_list[entry].needed = FALSE;
	5336	if (dst_page->clustered == TRUE)
	5337	user_page_list[entry].speculative = dst_page->speculative;
	5338	else
	5339	user_page_list[entry].speculative = FALSE;
	5340	user_page_list[entry].cs_validated = dst_page->cs_validated;
	5341	user_page_list[entry].cs_tainted = dst_page->cs_tainted;
	5342	}
	5343	/*
	5344	* if UPL_RET_ONLY_ABSENT is set, then
	5345	* we are working with a fresh page and we've
	5346	* just set the clustered flag on it to
	5347	* indicate that it was drug in as part of a
	5348	* speculative cluster... so leave it alone
	5349	*/
	5350	if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
	5351	/*
	5352	* someone is explicitly grabbing this page...
	5353	* update clustered and speculative state
	5354	*
	5355	*/
	5356	if (dst_page->clustered)
	5357	VM_PAGE_CONSUME_CLUSTERED(dst_page);
	5358	}
	5359	try_next_page:
	5360	if (dwp->dw_mask) {
	5361	if (dwp->dw_mask & DW_vm_page_activate)
	5362	VM_STAT_INCR(reactivations);
	5363
	5364	VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
	5365
	5366	if (dw_count >= dw_limit) {
	5367	vm_page_do_delayed_work(object, &dw_array[0], dw_count);
	5368
	5369	dwp = &dw_array[0];
	5370	dw_count = 0;
	5371	}
	5372	}
	5373	entry++;
	5374	dst_offset += PAGE_SIZE_64;
	5375	xfer_size -= PAGE_SIZE;
	5376	}
	5377	if (dw_count)
	5378	vm_page_do_delayed_work(object, &dw_array[0], dw_count);
	5379
	5380	if (alias_page != NULL) {
	5381	VM_PAGE_FREE(alias_page);
	5382	}
	5383
	5384	if (page_list_count != NULL) {
	5385	if (upl->flags & UPL_INTERNAL)
	5386	*page_list_count = 0;
	5387	else if (*page_list_count > entry)
	5388	*page_list_count = entry;
	5389	}
	5390	#if UPL_DEBUG
	5391	upl->upl_state = 1;
	5392	#endif
	5393	vm_object_unlock(object);
	5394
	5395	return KERN_SUCCESS;
	5396	}
	5397
	5398	/* JMM - Backward compatability for now */
	5399	kern_return_t
	5400	vm_fault_list_request( /* forward */
	5401	memory_object_control_t control,
	5402	vm_object_offset_t offset,
	5403	upl_size_t size,
	5404	upl_t *upl_ptr,
	5405	upl_page_info_t **user_page_list_ptr,
	5406	unsigned int page_list_count,
	5407	int cntrl_flags);
	5408	kern_return_t
	5409	vm_fault_list_request(
	5410	memory_object_control_t control,
	5411	vm_object_offset_t offset,
	5412	upl_size_t size,
	5413	upl_t *upl_ptr,
	5414	upl_page_info_t **user_page_list_ptr,
	5415	unsigned int page_list_count,
	5416	int cntrl_flags)
	5417	{
	5418	unsigned int local_list_count;
	5419	upl_page_info_t *user_page_list;
	5420	kern_return_t kr;
	5421
	5422	if((cntrl_flags & UPL_VECTOR)==UPL_VECTOR)
	5423	return KERN_INVALID_ARGUMENT;
	5424
	5425	if (user_page_list_ptr != NULL) {
	5426	local_list_count = page_list_count;
	5427	user_page_list = *user_page_list_ptr;
	5428	} else {
	5429	local_list_count = 0;
	5430	user_page_list = NULL;
	5431	}
	5432	kr = memory_object_upl_request(control,
	5433	offset,
	5434	size,
	5435	upl_ptr,
	5436	user_page_list,
	5437	&local_list_count,
	5438	cntrl_flags);
	5439
	5440	if(kr != KERN_SUCCESS)
	5441	return kr;
	5442
	5443	if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) {
	5444	user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(upl_ptr);
	5445	}
	5446
	5447	return KERN_SUCCESS;
	5448	}
	5449
	5450
	5451
	5452	/*
	5453	* Routine: vm_object_super_upl_request
	5454	* Purpose:
	5455	* Cause the population of a portion of a vm_object
	5456	* in much the same way as memory_object_upl_request.
	5457	* Depending on the nature of the request, the pages
	5458	* returned may be contain valid data or be uninitialized.
	5459	* However, the region may be expanded up to the super
	5460	* cluster size provided.
	5461	*/
	5462
	5463	__private_extern__ kern_return_t
	5464	vm_object_super_upl_request(
	5465	vm_object_t object,
	5466	vm_object_offset_t offset,
	5467	upl_size_t size,
	5468	upl_size_t super_cluster,
	5469	upl_t *upl,
	5470	upl_page_info_t *user_page_list,
	5471	unsigned int *page_list_count,
	5472	int cntrl_flags)
	5473	{
	5474	if (object->paging_offset > offset \|\| ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR))
	5475	return KERN_FAILURE;
	5476
	5477	assert(object->paging_in_progress);
	5478	offset = offset - object->paging_offset;
	5479
	5480	if (super_cluster > size) {
	5481
	5482	vm_object_offset_t base_offset;
	5483	upl_size_t super_size;
	5484	vm_object_size_t super_size_64;
	5485
	5486	base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
	5487	super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
	5488	super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size;
	5489	super_size = (upl_size_t) super_size_64;
	5490	assert(super_size == super_size_64);
	5491
	5492	if (offset > (base_offset + super_size)) {
	5493	panic("vm_object_super_upl_request: Missed target pageout"
	5494	" %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
	5495	offset, base_offset, super_size, super_cluster,
	5496	size, object->paging_offset);
	5497	}
	5498	/*
	5499	* apparently there is a case where the vm requests a
	5500	* page to be written out who's offset is beyond the
	5501	* object size
	5502	*/
	5503	if ((offset + size) > (base_offset + super_size)) {
	5504	super_size_64 = (offset + size) - base_offset;
	5505	super_size = (upl_size_t) super_size_64;
	5506	assert(super_size == super_size_64);
	5507	}
	5508
	5509	offset = base_offset;
	5510	size = super_size;
	5511	}
	5512	return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags);
	5513	}
	5514
	5515
	5516	kern_return_t
	5517	vm_map_create_upl(
	5518	vm_map_t map,
	5519	vm_map_address_t offset,
	5520	upl_size_t *upl_size,
	5521	upl_t *upl,
	5522	upl_page_info_array_t page_list,
	5523	unsigned int *count,
	5524	int *flags)
	5525	{
	5526	vm_map_entry_t entry;
	5527	int caller_flags;
	5528	int force_data_sync;
	5529	int sync_cow_data;
	5530	vm_object_t local_object;
	5531	vm_map_offset_t local_offset;
	5532	vm_map_offset_t local_start;
	5533	kern_return_t ret;
	5534
	5535	caller_flags = *flags;
	5536
	5537	if (caller_flags & ~UPL_VALID_FLAGS) {
	5538	/*
	5539	* For forward compatibility's sake,
	5540	* reject any unknown flag.
	5541	*/
	5542	return KERN_INVALID_VALUE;
	5543	}
	5544	force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
	5545	sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
	5546
	5547	if (upl == NULL)
	5548	return KERN_INVALID_ARGUMENT;
	5549
	5550	REDISCOVER_ENTRY:
	5551	vm_map_lock_read(map);
	5552
	5553	if (vm_map_lookup_entry(map, offset, &entry)) {
	5554
	5555	if ((entry->vme_end - offset) < *upl_size) {
	5556	*upl_size = (upl_size_t) (entry->vme_end - offset);
	5557	assert(*upl_size == entry->vme_end - offset);
	5558	}
	5559
	5560	if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
	5561	*flags = 0;
	5562
	5563	if ( !entry->is_sub_map && entry->object.vm_object != VM_OBJECT_NULL) {
	5564	if (entry->object.vm_object->private)
	5565	*flags = UPL_DEV_MEMORY;
	5566
	5567	if (entry->object.vm_object->phys_contiguous)
	5568	*flags \|= UPL_PHYS_CONTIG;
	5569	}
	5570	vm_map_unlock_read(map);
	5571
	5572	return KERN_SUCCESS;
	5573	}
	5574
	5575	if (entry->is_sub_map) {
	5576	vm_map_t submap;
	5577
	5578	submap = entry->object.sub_map;
	5579	local_start = entry->vme_start;
	5580	local_offset = entry->offset;
	5581
	5582	vm_map_reference(submap);
	5583	vm_map_unlock_read(map);
	5584
	5585	ret = vm_map_create_upl(submap,
	5586	local_offset + (offset - local_start),
	5587	upl_size, upl, page_list, count, flags);
	5588	vm_map_deallocate(submap);
	5589
	5590	return ret;
	5591	}
	5592
	5593	if (entry->object.vm_object == VM_OBJECT_NULL \|\| !entry->object.vm_object->phys_contiguous) {
	5594	if (*upl_size > MAX_UPL_SIZE_BYTES)
	5595	*upl_size = MAX_UPL_SIZE_BYTES;
	5596	}
	5597	/*
	5598	* Create an object if necessary.
	5599	*/
	5600	if (entry->object.vm_object == VM_OBJECT_NULL) {
	5601
	5602	if (vm_map_lock_read_to_write(map))
	5603	goto REDISCOVER_ENTRY;
	5604
	5605	entry->object.vm_object = vm_object_allocate((vm_size_t)(entry->vme_end - entry->vme_start));
	5606	entry->offset = 0;
	5607
	5608	vm_map_lock_write_to_read(map);
	5609	}
	5610	if (!(caller_flags & UPL_COPYOUT_FROM)) {
	5611	if (!(entry->protection & VM_PROT_WRITE)) {
	5612	vm_map_unlock_read(map);
	5613	return KERN_PROTECTION_FAILURE;
	5614	}
	5615	}
	5616
	5617	local_object = entry->object.vm_object;
	5618	if (vm_map_entry_should_cow_for_true_share(entry) &&
	5619	local_object->vo_size > *upl_size &&
	5620	*upl_size != 0) {
	5621	vm_prot_t prot;
	5622
	5623	/*
	5624	* Set up the targeted range for copy-on-write to avoid
	5625	* applying true_share/copy_delay to the entire object.
	5626	*/
	5627
	5628	if (vm_map_lock_read_to_write(map)) {
	5629	goto REDISCOVER_ENTRY;
	5630	}
	5631
	5632	vm_map_clip_start(map,
	5633	entry,
	5634	vm_map_trunc_page(offset,
	5635	VM_MAP_PAGE_MASK(map)));
	5636	vm_map_clip_end(map,
	5637	entry,
	5638	vm_map_round_page(offset + *upl_size,
	5639	VM_MAP_PAGE_MASK(map)));
	5640	if ((entry->vme_end - offset) < *upl_size) {
	5641	*upl_size = (upl_size_t) (entry->vme_end - offset);
	5642	assert(*upl_size == entry->vme_end - offset);
	5643	}
	5644
	5645	prot = entry->protection & ~VM_PROT_WRITE;
	5646	if (override_nx(map, entry->alias) && prot)
	5647	prot \|= VM_PROT_EXECUTE;
	5648	vm_object_pmap_protect(local_object,
	5649	entry->offset,
	5650	entry->vme_end - entry->vme_start,
	5651	((entry->is_shared \|\| map->mapped_in_other_pmaps)
	5652	? PMAP_NULL
	5653	: map->pmap),
	5654	entry->vme_start,
	5655	prot);
	5656	entry->needs_copy = TRUE;
	5657
	5658	vm_map_lock_write_to_read(map);
	5659	}
	5660
	5661	if (entry->needs_copy) {
	5662	/*
	5663	* Honor copy-on-write for COPY_SYMMETRIC
	5664	* strategy.
	5665	*/
	5666	vm_map_t local_map;
	5667	vm_object_t object;
	5668	vm_object_offset_t new_offset;
	5669	vm_prot_t prot;
	5670	boolean_t wired;
	5671	vm_map_version_t version;
	5672	vm_map_t real_map;
	5673	vm_prot_t fault_type;
	5674
	5675	local_map = map;
	5676
	5677	if (caller_flags & UPL_COPYOUT_FROM) {
	5678	fault_type = VM_PROT_READ \| VM_PROT_COPY;
	5679	vm_counters.create_upl_extra_cow++;
	5680	vm_counters.create_upl_extra_cow_pages += (entry->vme_end - entry->vme_start) / PAGE_SIZE;
	5681	} else {
	5682	fault_type = VM_PROT_WRITE;
	5683	}
	5684	if (vm_map_lookup_locked(&local_map,
	5685	offset, fault_type,
	5686	OBJECT_LOCK_EXCLUSIVE,
	5687	&version, &object,
	5688	&new_offset, &prot, &wired,
	5689	NULL,
	5690	&real_map) != KERN_SUCCESS) {
	5691	if (fault_type == VM_PROT_WRITE) {
	5692	vm_counters.create_upl_lookup_failure_write++;
	5693	} else {
	5694	vm_counters.create_upl_lookup_failure_copy++;
	5695	}
	5696	vm_map_unlock_read(local_map);
	5697	return KERN_FAILURE;
	5698	}
	5699	if (real_map != map)
	5700	vm_map_unlock(real_map);
	5701	vm_map_unlock_read(local_map);
	5702
	5703	vm_object_unlock(object);
	5704
	5705	goto REDISCOVER_ENTRY;
	5706	}
	5707
	5708	if (sync_cow_data) {
	5709	if (entry->object.vm_object->shadow \|\| entry->object.vm_object->copy) {
	5710	local_object = entry->object.vm_object;
	5711	local_start = entry->vme_start;
	5712	local_offset = entry->offset;
	5713
	5714	vm_object_reference(local_object);
	5715	vm_map_unlock_read(map);
	5716
	5717	if (local_object->shadow && local_object->copy) {
	5718	vm_object_lock_request(
	5719	local_object->shadow,
	5720	(vm_object_offset_t)
	5721	((offset - local_start) +
	5722	local_offset) +
	5723	local_object->vo_shadow_offset,
	5724	*upl_size, FALSE,
	5725	MEMORY_OBJECT_DATA_SYNC,
	5726	VM_PROT_NO_CHANGE);
	5727	}
	5728	sync_cow_data = FALSE;
	5729	vm_object_deallocate(local_object);
	5730
	5731	goto REDISCOVER_ENTRY;
	5732	}
	5733	}
	5734	if (force_data_sync) {
	5735	local_object = entry->object.vm_object;
	5736	local_start = entry->vme_start;
	5737	local_offset = entry->offset;
	5738
	5739	vm_object_reference(local_object);
	5740	vm_map_unlock_read(map);
	5741
	5742	vm_object_lock_request(
	5743	local_object,
	5744	(vm_object_offset_t)
	5745	((offset - local_start) + local_offset),
	5746	(vm_object_size_t)*upl_size, FALSE,
	5747	MEMORY_OBJECT_DATA_SYNC,
	5748	VM_PROT_NO_CHANGE);
	5749
	5750	force_data_sync = FALSE;
	5751	vm_object_deallocate(local_object);
	5752
	5753	goto REDISCOVER_ENTRY;
	5754	}
	5755	if (entry->object.vm_object->private)
	5756	*flags = UPL_DEV_MEMORY;
	5757	else
	5758	*flags = 0;
	5759
	5760	if (entry->object.vm_object->phys_contiguous)
	5761	*flags \|= UPL_PHYS_CONTIG;
	5762
	5763	local_object = entry->object.vm_object;
	5764	local_offset = entry->offset;
	5765	local_start = entry->vme_start;
	5766
	5767	vm_object_reference(local_object);
	5768	vm_map_unlock_read(map);
	5769
	5770	ret = vm_object_iopl_request(local_object,
	5771	(vm_object_offset_t) ((offset - local_start) + local_offset),
	5772	*upl_size,
	5773	upl,
	5774	page_list,
	5775	count,
	5776	caller_flags);
	5777	vm_object_deallocate(local_object);
	5778
	5779	return(ret);
	5780	}
	5781	vm_map_unlock_read(map);
	5782
	5783	return(KERN_FAILURE);
	5784	}
	5785
	5786	/*
	5787	* Internal routine to enter a UPL into a VM map.
	5788	*
	5789	* JMM - This should just be doable through the standard
	5790	* vm_map_enter() API.
	5791	*/
	5792	kern_return_t
	5793	vm_map_enter_upl(
	5794	vm_map_t map,
	5795	upl_t upl,
	5796	vm_map_offset_t *dst_addr)
	5797	{
	5798	vm_map_size_t size;
	5799	vm_object_offset_t offset;
	5800	vm_map_offset_t addr;
	5801	vm_page_t m;
	5802	kern_return_t kr;
	5803	int isVectorUPL = 0, curr_upl=0;
	5804	upl_t vector_upl = NULL;
	5805	vm_offset_t vector_upl_dst_addr = 0;
	5806	vm_map_t vector_upl_submap = NULL;
	5807	upl_offset_t subupl_offset = 0;
	5808	upl_size_t subupl_size = 0;
	5809
	5810	if (upl == UPL_NULL)
	5811	return KERN_INVALID_ARGUMENT;
	5812
	5813	if((isVectorUPL = vector_upl_is_valid(upl))) {
	5814	int mapped=0,valid_upls=0;
	5815	vector_upl = upl;
	5816
	5817	upl_lock(vector_upl);
	5818	for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
	5819	upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
	5820	if(upl == NULL)
	5821	continue;
	5822	valid_upls++;
	5823	if (UPL_PAGE_LIST_MAPPED & upl->flags)
	5824	mapped++;
	5825	}
	5826
	5827	if(mapped) {
	5828	if(mapped != valid_upls)
	5829	panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls);
	5830	else {
	5831	upl_unlock(vector_upl);
	5832	return KERN_FAILURE;
	5833	}
	5834	}
	5835
	5836	kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE, VM_FLAGS_ANYWHERE, &vector_upl_submap);
	5837	if( kr != KERN_SUCCESS )
	5838	panic("Vector UPL submap allocation failed\n");
	5839	map = vector_upl_submap;
	5840	vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
	5841	curr_upl=0;
	5842	}
	5843	else
	5844	upl_lock(upl);
	5845
	5846	process_upl_to_enter:
	5847	if(isVectorUPL){
	5848	if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
	5849	*dst_addr = vector_upl_dst_addr;
	5850	upl_unlock(vector_upl);
	5851	return KERN_SUCCESS;
	5852	}
	5853	upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
	5854	if(upl == NULL)
	5855	goto process_upl_to_enter;
	5856
	5857	vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
	5858	*dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
	5859	} else {
	5860	/*
	5861	* check to see if already mapped
	5862	*/
	5863	if (UPL_PAGE_LIST_MAPPED & upl->flags) {
	5864	upl_unlock(upl);
	5865	return KERN_FAILURE;
	5866	}
	5867	}
	5868	if ((!(upl->flags & UPL_SHADOWED)) &&
	5869	((upl->flags & UPL_HAS_BUSY) \|\|
	5870	!((upl->flags & (UPL_DEVICE_MEMORY \| UPL_IO_WIRE)) \|\| (upl->map_object->phys_contiguous)))) {
	5871
	5872	vm_object_t object;
	5873	vm_page_t alias_page;
	5874	vm_object_offset_t new_offset;
	5875	unsigned int pg_num;
	5876	wpl_array_t lite_list;
	5877
	5878	if (upl->flags & UPL_INTERNAL) {
	5879	lite_list = (wpl_array_t)
	5880	((((uintptr_t)upl) + sizeof(struct upl))
	5881	+ ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
	5882	} else {
	5883	lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
	5884	}
	5885	object = upl->map_object;
	5886	upl->map_object = vm_object_allocate(upl->size);
	5887
	5888	vm_object_lock(upl->map_object);
	5889
	5890	upl->map_object->shadow = object;
	5891	upl->map_object->pageout = TRUE;
	5892	upl->map_object->can_persist = FALSE;
	5893	upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
	5894	upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset;
	5895	upl->map_object->wimg_bits = object->wimg_bits;
	5896	offset = upl->map_object->vo_shadow_offset;
	5897	new_offset = 0;
	5898	size = upl->size;
	5899
	5900	upl->flags \|= UPL_SHADOWED;
	5901
	5902	while (size) {
	5903	pg_num = (unsigned int) (new_offset / PAGE_SIZE);
	5904	assert(pg_num == new_offset / PAGE_SIZE);
	5905
	5906	if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
	5907
	5908	VM_PAGE_GRAB_FICTITIOUS(alias_page);
	5909
	5910	vm_object_lock(object);
	5911
	5912	m = vm_page_lookup(object, offset);
	5913	if (m == VM_PAGE_NULL) {
	5914	panic("vm_upl_map: page missing\n");
	5915	}
	5916
	5917	/*
	5918	* Convert the fictitious page to a private
	5919	* shadow of the real page.
	5920	*/
	5921	assert(alias_page->fictitious);
	5922	alias_page->fictitious = FALSE;
	5923	alias_page->private = TRUE;
	5924	alias_page->pageout = TRUE;
	5925	/*
	5926	* since m is a page in the upl it must
	5927	* already be wired or BUSY, so it's
	5928	* safe to assign the underlying physical
	5929	* page to the alias
	5930	*/
	5931	alias_page->phys_page = m->phys_page;
	5932
	5933	vm_object_unlock(object);
	5934
	5935	vm_page_lockspin_queues();
	5936	vm_page_wire(alias_page);
	5937	vm_page_unlock_queues();
	5938
	5939	/*
	5940	* ENCRYPTED SWAP:
	5941	* The virtual page ("m") has to be wired in some way
	5942	* here or its physical page ("m->phys_page") could
	5943	* be recycled at any time.
	5944	* Assuming this is enforced by the caller, we can't
	5945	* get an encrypted page here. Since the encryption
	5946	* key depends on the VM page's "pager" object and
	5947	* the "paging_offset", we couldn't handle 2 pageable
	5948	* VM pages (with different pagers and paging_offsets)
	5949	* sharing the same physical page: we could end up
	5950	* encrypting with one key (via one VM page) and
	5951	* decrypting with another key (via the alias VM page).
	5952	*/
	5953	ASSERT_PAGE_DECRYPTED(m);
	5954
	5955	vm_page_insert(alias_page, upl->map_object, new_offset);
	5956
	5957	assert(!alias_page->wanted);
	5958	alias_page->busy = FALSE;
	5959	alias_page->absent = FALSE;
	5960	}
	5961	size -= PAGE_SIZE;
	5962	offset += PAGE_SIZE_64;
	5963	new_offset += PAGE_SIZE_64;
	5964	}
	5965	vm_object_unlock(upl->map_object);
	5966	}
	5967	if (upl->flags & UPL_SHADOWED)
	5968	offset = 0;
	5969	else
	5970	offset = upl->offset - upl->map_object->paging_offset;
	5971
	5972	size = upl->size;
	5973
	5974	vm_object_reference(upl->map_object);
	5975
	5976	if(!isVectorUPL) {
	5977	*dst_addr = 0;
	5978	/*
	5979	* NEED A UPL_MAP ALIAS
	5980	*/
	5981	kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
	5982	VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE,
	5983	VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
	5984
	5985	if (kr != KERN_SUCCESS) {
	5986	upl_unlock(upl);
	5987	return(kr);
	5988	}
	5989	}
	5990	else {
	5991	kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
	5992	VM_FLAGS_FIXED, upl->map_object, offset, FALSE,
	5993	VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
	5994	if(kr)
	5995	panic("vm_map_enter failed for a Vector UPL\n");
	5996	}
	5997	vm_object_lock(upl->map_object);
	5998
	5999	for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
	6000	m = vm_page_lookup(upl->map_object, offset);
	6001
	6002	if (m) {
	6003	m->pmapped = TRUE;
	6004
	6005	/* CODE SIGNING ENFORCEMENT: page has been wpmapped,
	6006	* but only in kernel space. If this was on a user map,
	6007	* we'd have to set the wpmapped bit. */
	6008	/* m->wpmapped = TRUE; */
	6009	assert(map->pmap == kernel_pmap);
	6010
	6011	PMAP_ENTER(map->pmap, addr, m, VM_PROT_DEFAULT, VM_PROT_NONE, 0, TRUE);
	6012	}
	6013	offset += PAGE_SIZE_64;
	6014	}
	6015	vm_object_unlock(upl->map_object);
	6016
	6017	/*
	6018	* hold a reference for the mapping
	6019	*/
	6020	upl->ref_count++;
	6021	upl->flags \|= UPL_PAGE_LIST_MAPPED;
	6022	upl->kaddr = (vm_offset_t) *dst_addr;
	6023	assert(upl->kaddr == *dst_addr);
	6024
	6025	if(isVectorUPL)
	6026	goto process_upl_to_enter;
	6027
	6028	upl_unlock(upl);
	6029
	6030	return KERN_SUCCESS;
	6031	}
	6032
	6033	/*
	6034	* Internal routine to remove a UPL mapping from a VM map.
	6035	*
	6036	* XXX - This should just be doable through a standard
	6037	* vm_map_remove() operation. Otherwise, implicit clean-up
	6038	* of the target map won't be able to correctly remove
	6039	* these (and release the reference on the UPL). Having
	6040	* to do this means we can't map these into user-space
	6041	* maps yet.
	6042	*/
	6043	kern_return_t
	6044	vm_map_remove_upl(
	6045	vm_map_t map,
	6046	upl_t upl)
	6047	{
	6048	vm_address_t addr;
	6049	upl_size_t size;
	6050	int isVectorUPL = 0, curr_upl = 0;
	6051	upl_t vector_upl = NULL;
	6052
	6053	if (upl == UPL_NULL)
	6054	return KERN_INVALID_ARGUMENT;
	6055
	6056	if((isVectorUPL = vector_upl_is_valid(upl))) {
	6057	int unmapped=0, valid_upls=0;
	6058	vector_upl = upl;
	6059	upl_lock(vector_upl);
	6060	for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
	6061	upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
	6062	if(upl == NULL)
	6063	continue;
	6064	valid_upls++;
	6065	if (!(UPL_PAGE_LIST_MAPPED & upl->flags))
	6066	unmapped++;
	6067	}
	6068
	6069	if(unmapped) {
	6070	if(unmapped != valid_upls)
	6071	panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls);
	6072	else {
	6073	upl_unlock(vector_upl);
	6074	return KERN_FAILURE;
	6075	}
	6076	}
	6077	curr_upl=0;
	6078	}
	6079	else
	6080	upl_lock(upl);
	6081
	6082	process_upl_to_remove:
	6083	if(isVectorUPL) {
	6084	if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
	6085	vm_map_t v_upl_submap;
	6086	vm_offset_t v_upl_submap_dst_addr;
	6087	vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
	6088
	6089	vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_NO_FLAGS);
	6090	vm_map_deallocate(v_upl_submap);
	6091	upl_unlock(vector_upl);
	6092	return KERN_SUCCESS;
	6093	}
	6094
	6095	upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
	6096	if(upl == NULL)
	6097	goto process_upl_to_remove;
	6098	}
	6099
	6100	if (upl->flags & UPL_PAGE_LIST_MAPPED) {
	6101	addr = upl->kaddr;
	6102	size = upl->size;
	6103
	6104	assert(upl->ref_count > 1);
	6105	upl->ref_count--; /* removing mapping ref */
	6106
	6107	upl->flags &= ~UPL_PAGE_LIST_MAPPED;
	6108	upl->kaddr = (vm_offset_t) 0;
	6109
	6110	if(!isVectorUPL) {
	6111	upl_unlock(upl);
	6112
	6113	vm_map_remove(
	6114	map,
	6115	vm_map_trunc_page(addr,
	6116	VM_MAP_PAGE_MASK(map)),
	6117	vm_map_round_page(addr + size,
	6118	VM_MAP_PAGE_MASK(map)),
	6119	VM_MAP_NO_FLAGS);
	6120
	6121	return KERN_SUCCESS;
	6122	}
	6123	else {
	6124	/*
	6125	* If it's a Vectored UPL, we'll be removing the entire
	6126	* submap anyways, so no need to remove individual UPL
	6127	* element mappings from within the submap
	6128	*/
	6129	goto process_upl_to_remove;
	6130	}
	6131	}
	6132	upl_unlock(upl);
	6133
	6134	return KERN_FAILURE;
	6135	}
	6136
	6137	kern_return_t
	6138	upl_commit_range(
	6139	upl_t upl,
	6140	upl_offset_t offset,
	6141	upl_size_t size,
	6142	int flags,
	6143	upl_page_info_t *page_list,
	6144	mach_msg_type_number_t count,
	6145	boolean_t *empty)
	6146	{
	6147	upl_size_t xfer_size, subupl_size = size;
	6148	vm_object_t shadow_object;
	6149	vm_object_t object;
	6150	vm_object_offset_t target_offset;
	6151	upl_offset_t subupl_offset = offset;
	6152	int entry;
	6153	wpl_array_t lite_list;
	6154	int occupied;
	6155	int clear_refmod = 0;
	6156	int pgpgout_count = 0;
	6157	struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
	6158	struct vm_page_delayed_work *dwp;
	6159	int dw_count;
	6160	int dw_limit;
	6161	int isVectorUPL = 0;
	6162	upl_t vector_upl = NULL;
	6163	boolean_t should_be_throttled = FALSE;
	6164
	6165	vm_page_t nxt_page = VM_PAGE_NULL;
	6166	int fast_path_possible = 0;
	6167	int fast_path_full_commit = 0;
	6168	int throttle_page = 0;
	6169	int unwired_count = 0;
	6170	int local_queue_count = 0;
	6171	queue_head_t local_queue;
	6172
	6173	*empty = FALSE;
	6174
	6175	if (upl == UPL_NULL)
	6176	return KERN_INVALID_ARGUMENT;
	6177
	6178	if (count == 0)
	6179	page_list = NULL;
	6180
	6181	if((isVectorUPL = vector_upl_is_valid(upl))) {
	6182	vector_upl = upl;
	6183	upl_lock(vector_upl);
	6184	}
	6185	else
	6186	upl_lock(upl);
	6187
	6188	process_upl_to_commit:
	6189
	6190	if(isVectorUPL) {
	6191	size = subupl_size;
	6192	offset = subupl_offset;
	6193	if(size == 0) {
	6194	upl_unlock(vector_upl);
	6195	return KERN_SUCCESS;
	6196	}
	6197	upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
	6198	if(upl == NULL) {
	6199	upl_unlock(vector_upl);
	6200	return KERN_FAILURE;
	6201	}
	6202	page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl);
	6203	subupl_size -= size;
	6204	subupl_offset += size;
	6205	}
	6206
	6207	#if UPL_DEBUG
	6208	if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
	6209	(void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
	6210
	6211	upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
	6212	upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
	6213
	6214	upl->upl_commit_index++;
	6215	}
	6216	#endif
	6217	if (upl->flags & UPL_DEVICE_MEMORY)
	6218	xfer_size = 0;
	6219	else if ((offset + size) <= upl->size)
	6220	xfer_size = size;
	6221	else {
	6222	if(!isVectorUPL)
	6223	upl_unlock(upl);
	6224	else {
	6225	upl_unlock(vector_upl);
	6226	}
	6227	return KERN_FAILURE;
	6228	}
	6229	if (upl->flags & UPL_SET_DIRTY)
	6230	flags \|= UPL_COMMIT_SET_DIRTY;
	6231	if (upl->flags & UPL_CLEAR_DIRTY)
	6232	flags \|= UPL_COMMIT_CLEAR_DIRTY;
	6233
	6234	if (upl->flags & UPL_INTERNAL)
	6235	lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
	6236	+ ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
	6237	else
	6238	lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
	6239
	6240	object = upl->map_object;
	6241
	6242	if (upl->flags & UPL_SHADOWED) {
	6243	vm_object_lock(object);
	6244	shadow_object = object->shadow;
	6245	} else {
	6246	shadow_object = object;
	6247	}
	6248	entry = offset/PAGE_SIZE;
	6249	target_offset = (vm_object_offset_t)offset;
	6250
	6251	if (upl->flags & UPL_KERNEL_OBJECT)
	6252	vm_object_lock_shared(shadow_object);
	6253	else
	6254	vm_object_lock(shadow_object);
	6255
	6256	if (upl->flags & UPL_ACCESS_BLOCKED) {
	6257	assert(shadow_object->blocked_access);
	6258	shadow_object->blocked_access = FALSE;
	6259	vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
	6260	}
	6261
	6262	if (shadow_object->code_signed) {
	6263	/*
	6264	* CODE SIGNING:
	6265	* If the object is code-signed, do not let this UPL tell
	6266	* us if the pages are valid or not. Let the pages be
	6267	* validated by VM the normal way (when they get mapped or
	6268	* copied).
	6269	*/
	6270	flags &= ~UPL_COMMIT_CS_VALIDATED;
	6271	}
	6272	if (! page_list) {
	6273	/*
	6274	* No page list to get the code-signing info from !?
	6275	*/
	6276	flags &= ~UPL_COMMIT_CS_VALIDATED;
	6277	}
	6278	if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && shadow_object->internal)
	6279	should_be_throttled = TRUE;
	6280
	6281	dwp = &dw_array[0];
	6282	dw_count = 0;
	6283	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
	6284
	6285	if ((upl->flags & UPL_IO_WIRE) &&
	6286	!(flags & UPL_COMMIT_FREE_ABSENT) &&
	6287	!isVectorUPL &&
	6288	shadow_object->purgable != VM_PURGABLE_VOLATILE &&
	6289	shadow_object->purgable != VM_PURGABLE_EMPTY) {
	6290
	6291	if (!queue_empty(&shadow_object->memq)) {
	6292	queue_init(&local_queue);
	6293	if (size == shadow_object->vo_size) {
	6294	nxt_page = (vm_page_t)queue_first(&shadow_object->memq);
	6295	fast_path_full_commit = 1;
	6296	}
	6297	fast_path_possible = 1;
	6298
	6299	if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && shadow_object->internal &&
	6300	(shadow_object->purgable == VM_PURGABLE_DENY \|\|
	6301	shadow_object->purgable == VM_PURGABLE_NONVOLATILE \|\|
	6302	shadow_object->purgable == VM_PURGABLE_VOLATILE)) {
	6303	throttle_page = 1;
	6304	}
	6305	}
	6306	}
	6307
	6308	while (xfer_size) {
	6309	vm_page_t t, m;
	6310
	6311	dwp->dw_mask = 0;
	6312	clear_refmod = 0;
	6313
	6314	m = VM_PAGE_NULL;
	6315
	6316	if (upl->flags & UPL_LITE) {
	6317	unsigned int pg_num;
	6318
	6319	if (nxt_page != VM_PAGE_NULL) {
	6320	m = nxt_page;
	6321	nxt_page = (vm_page_t)queue_next(&nxt_page->listq);
	6322	target_offset = m->offset;
	6323	}
	6324	pg_num = (unsigned int) (target_offset/PAGE_SIZE);
	6325	assert(pg_num == target_offset/PAGE_SIZE);
	6326
	6327	if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
	6328	lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
	6329
	6330	if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
	6331	m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
	6332	} else
	6333	m = NULL;
	6334	}
	6335	if (upl->flags & UPL_SHADOWED) {
	6336	if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
	6337
	6338	t->pageout = FALSE;
	6339
	6340	VM_PAGE_FREE(t);
	6341
	6342	if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
	6343	m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
	6344	}
	6345	}
	6346	if (m == VM_PAGE_NULL)
	6347	goto commit_next_page;
	6348
	6349	if (m->compressor) {
	6350	assert(m->busy);
	6351
	6352	dwp->dw_mask \|= (DW_clear_busy \| DW_PAGE_WAKEUP);
	6353	goto commit_next_page;
	6354	}
	6355
	6356	if (flags & UPL_COMMIT_CS_VALIDATED) {
	6357	/*
	6358	* CODE SIGNING:
	6359	* Set the code signing bits according to
	6360	* what the UPL says they should be.
	6361	*/
	6362	m->cs_validated = page_list[entry].cs_validated;
	6363	m->cs_tainted = page_list[entry].cs_tainted;
	6364	}
	6365	if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL)
	6366	m->written_by_kernel = TRUE;
	6367
	6368	if (upl->flags & UPL_IO_WIRE) {
	6369
	6370	if (page_list)
	6371	page_list[entry].phys_addr = 0;
	6372
	6373	if (flags & UPL_COMMIT_SET_DIRTY) {
	6374	SET_PAGE_DIRTY(m, FALSE);
	6375	} else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
	6376	m->dirty = FALSE;
	6377
	6378	if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
	6379	m->cs_validated && !m->cs_tainted) {
	6380	/*
	6381	* CODE SIGNING:
	6382	* This page is no longer dirty
	6383	* but could have been modified,
	6384	* so it will need to be
	6385	* re-validated.
	6386	*/
	6387	if (m->slid) {
	6388	panic("upl_commit_range(%p): page %p was slid\n",
	6389	upl, m);
	6390	}
	6391	assert(!m->slid);
	6392	m->cs_validated = FALSE;
	6393	#if DEVELOPMENT \|\| DEBUG
	6394	vm_cs_validated_resets++;
	6395	#endif
	6396	pmap_disconnect(m->phys_page);
	6397	}
	6398	clear_refmod \|= VM_MEM_MODIFIED;
	6399	}
	6400	if (upl->flags & UPL_ACCESS_BLOCKED) {
	6401	/*
	6402	* We blocked access to the pages in this UPL.
	6403	* Clear the "busy" bit and wake up any waiter
	6404	* for this page.
	6405	*/
	6406	dwp->dw_mask \|= (DW_clear_busy \| DW_PAGE_WAKEUP);
	6407	}
	6408	if (fast_path_possible) {
	6409	assert(m->object->purgable != VM_PURGABLE_EMPTY);
	6410	assert(m->object->purgable != VM_PURGABLE_VOLATILE);
	6411	if (m->absent) {
	6412	assert(m->wire_count == 0);
	6413	assert(m->busy);
	6414
	6415	m->absent = FALSE;
	6416	dwp->dw_mask \|= (DW_clear_busy \| DW_PAGE_WAKEUP);
	6417	} else {
	6418	if (m->wire_count == 0)
	6419	panic("wire_count == 0, m = %p, obj = %p\n", m, shadow_object);
	6420
	6421	/*
	6422	* XXX FBDP need to update some other
	6423	* counters here (purgeable_wired_count)
	6424	* (ledgers), ...
	6425	*/
	6426	assert(m->wire_count);
	6427	m->wire_count--;
	6428
	6429	if (m->wire_count == 0)
	6430	unwired_count++;
	6431	}
	6432	if (m->wire_count == 0) {
	6433	queue_enter(&local_queue, m, vm_page_t, pageq);
	6434	local_queue_count++;
	6435
	6436	if (throttle_page) {
	6437	m->throttled = TRUE;
	6438	} else {
	6439	if (flags & UPL_COMMIT_INACTIVATE)
	6440	m->inactive = TRUE;
	6441	else
	6442	m->active = TRUE;
	6443	}
	6444	}
	6445	} else {
	6446	if (flags & UPL_COMMIT_INACTIVATE) {
	6447	dwp->dw_mask \|= DW_vm_page_deactivate_internal;
	6448	clear_refmod \|= VM_MEM_REFERENCED;
	6449	}
	6450	if (m->absent) {
	6451	if (flags & UPL_COMMIT_FREE_ABSENT)
	6452	dwp->dw_mask \|= DW_vm_page_free;
	6453	else {
	6454	m->absent = FALSE;
	6455	dwp->dw_mask \|= (DW_clear_busy \| DW_PAGE_WAKEUP);
	6456
	6457	if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal))
	6458	dwp->dw_mask \|= DW_vm_page_activate;
	6459	}
	6460	} else
	6461	dwp->dw_mask \|= DW_vm_page_unwire;
	6462	}
	6463	goto commit_next_page;
	6464	}
	6465	assert(!m->compressor);
	6466
	6467	if (page_list)
	6468	page_list[entry].phys_addr = 0;
	6469
	6470	/*
	6471	* make sure to clear the hardware
	6472	* modify or reference bits before
	6473	* releasing the BUSY bit on this page
	6474	* otherwise we risk losing a legitimate
	6475	* change of state
	6476	*/
	6477	if (flags & UPL_COMMIT_CLEAR_DIRTY) {
	6478	m->dirty = FALSE;
	6479
	6480	clear_refmod \|= VM_MEM_MODIFIED;
	6481	}
	6482	if (m->laundry)
	6483	dwp->dw_mask \|= DW_vm_pageout_throttle_up;
	6484
	6485	if (VM_PAGE_WIRED(m))
	6486	m->pageout = FALSE;
	6487
	6488	if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
	6489	m->cs_validated && !m->cs_tainted) {
	6490	/*
	6491	* CODE SIGNING:
	6492	* This page is no longer dirty
	6493	* but could have been modified,
	6494	* so it will need to be
	6495	* re-validated.
	6496	*/
	6497	if (m->slid) {
	6498	panic("upl_commit_range(%p): page %p was slid\n",
	6499	upl, m);
	6500	}
	6501	assert(!m->slid);
	6502	m->cs_validated = FALSE;
	6503	#if DEVELOPMENT \|\| DEBUG
	6504	vm_cs_validated_resets++;
	6505	#endif
	6506	pmap_disconnect(m->phys_page);
	6507	}
	6508	if (m->overwriting) {
	6509	/*
	6510	* the (COPY_OUT_FROM == FALSE) request_page_list case
	6511	*/
	6512	if (m->busy) {
	6513	#if CONFIG_PHANTOM_CACHE
	6514	if (m->absent && !m->object->internal)
	6515	dwp->dw_mask \|= DW_vm_phantom_cache_update;
	6516	#endif
	6517	m->absent = FALSE;
	6518
	6519	dwp->dw_mask \|= DW_clear_busy;
	6520	} else {
	6521	/*
	6522	* alternate (COPY_OUT_FROM == FALSE) page_list case
	6523	* Occurs when the original page was wired
	6524	* at the time of the list request
	6525	*/
	6526	assert(VM_PAGE_WIRED(m));
	6527
	6528	dwp->dw_mask \|= DW_vm_page_unwire; /* reactivates */
	6529	}
	6530	m->overwriting = FALSE;
	6531	}
	6532	if (m->encrypted_cleaning == TRUE) {
	6533	m->encrypted_cleaning = FALSE;
	6534
	6535	dwp->dw_mask \|= DW_clear_busy \| DW_PAGE_WAKEUP;
	6536	}
	6537	m->cleaning = FALSE;
	6538
	6539	if (m->pageout) {
	6540	/*
	6541	* With the clean queue enabled, UPL_PAGEOUT should
	6542	* no longer set the pageout bit. It's pages now go
	6543	* to the clean queue.
	6544	*/
	6545	assert(!(flags & UPL_PAGEOUT));
	6546
	6547	m->pageout = FALSE;
	6548	#if MACH_CLUSTER_STATS
	6549	if (m->wanted) vm_pageout_target_collisions++;
	6550	#endif
	6551	if ((flags & UPL_COMMIT_SET_DIRTY) \|\|
	6552	(m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))) {
	6553	/*
	6554	* page was re-dirtied after we started
	6555	* the pageout... reactivate it since
	6556	* we don't know whether the on-disk
	6557	* copy matches what is now in memory
	6558	*/
	6559	SET_PAGE_DIRTY(m, FALSE);
	6560
	6561	dwp->dw_mask \|= DW_vm_page_activate \| DW_PAGE_WAKEUP;
	6562
	6563	if (upl->flags & UPL_PAGEOUT) {
	6564	CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
	6565	VM_STAT_INCR(reactivations);
	6566	DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
	6567	}
	6568	} else {
	6569	/*
	6570	* page has been successfully cleaned
	6571	* go ahead and free it for other use
	6572	*/
	6573	if (m->object->internal) {
	6574	DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
	6575	} else {
	6576	DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
	6577	}
	6578	m->dirty = FALSE;
	6579	m->busy = TRUE;
	6580
	6581	dwp->dw_mask \|= DW_vm_page_free;
	6582	}
	6583	goto commit_next_page;
	6584	}
	6585	#if MACH_CLUSTER_STATS
	6586	if (m->wpmapped)
	6587	m->dirty = pmap_is_modified(m->phys_page);
	6588
	6589	if (m->dirty) vm_pageout_cluster_dirtied++;
	6590	else vm_pageout_cluster_cleaned++;
	6591	if (m->wanted) vm_pageout_cluster_collisions++;
	6592	#endif
	6593	/*
	6594	* It is a part of the semantic of COPYOUT_FROM
	6595	* UPLs that a commit implies cache sync
	6596	* between the vm page and the backing store
	6597	* this can be used to strip the precious bit
	6598	* as well as clean
	6599	*/
	6600	if ((upl->flags & UPL_PAGE_SYNC_DONE) \|\| (flags & UPL_COMMIT_CLEAR_PRECIOUS))
	6601	m->precious = FALSE;
	6602
	6603	if (flags & UPL_COMMIT_SET_DIRTY) {
	6604	SET_PAGE_DIRTY(m, FALSE);
	6605	} else {
	6606	m->dirty = FALSE;
	6607	}
	6608
	6609	/* with the clean queue on, move all cleaned pages to the clean queue */
	6610	if (hibernate_cleaning_in_progress == FALSE && !m->dirty && (upl->flags & UPL_PAGEOUT)) {
	6611	pgpgout_count++;
	6612
	6613	VM_STAT_INCR(pageouts);
	6614	DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
	6615
	6616	dwp->dw_mask \|= DW_enqueue_cleaned;
	6617	vm_pageout_enqueued_cleaned_from_inactive_dirty++;
	6618	} else if (should_be_throttled == TRUE && !m->active && !m->inactive && !m->speculative && !m->throttled) {
	6619	/*
	6620	* page coming back in from being 'frozen'...
	6621	* it was dirty before it was frozen, so keep it so
	6622	* the vm_page_activate will notice that it really belongs
	6623	* on the throttle queue and put it there
	6624	*/
	6625	SET_PAGE_DIRTY(m, FALSE);
	6626	dwp->dw_mask \|= DW_vm_page_activate;
	6627
	6628	} else {
	6629	if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) {
	6630	dwp->dw_mask \|= DW_vm_page_deactivate_internal;
	6631	clear_refmod \|= VM_MEM_REFERENCED;
	6632	} else if (!m->active && !m->inactive && !m->speculative) {
	6633
	6634	if (m->clustered \|\| (flags & UPL_COMMIT_SPECULATE))
	6635	dwp->dw_mask \|= DW_vm_page_speculate;
	6636	else if (m->reference)
	6637	dwp->dw_mask \|= DW_vm_page_activate;
	6638	else {
	6639	dwp->dw_mask \|= DW_vm_page_deactivate_internal;
	6640	clear_refmod \|= VM_MEM_REFERENCED;
	6641	}
	6642	}
	6643	}
	6644	if (upl->flags & UPL_ACCESS_BLOCKED) {
	6645	/*
	6646	* We blocked access to the pages in this URL.
	6647	* Clear the "busy" bit on this page before we
	6648	* wake up any waiter.
	6649	*/
	6650	dwp->dw_mask \|= DW_clear_busy;
	6651	}
	6652	/*
	6653	* Wakeup any thread waiting for the page to be un-cleaning.
	6654	*/
	6655	dwp->dw_mask \|= DW_PAGE_WAKEUP;
	6656
	6657	commit_next_page:
	6658	if (clear_refmod)
	6659	pmap_clear_refmod(m->phys_page, clear_refmod);
	6660
	6661	target_offset += PAGE_SIZE_64;
	6662	xfer_size -= PAGE_SIZE;
	6663	entry++;
	6664
	6665	if (dwp->dw_mask) {
	6666	if (dwp->dw_mask & ~(DW_clear_busy \| DW_PAGE_WAKEUP)) {
	6667	VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
	6668
	6669	if (dw_count >= dw_limit) {
	6670	vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count);
	6671
	6672	dwp = &dw_array[0];
	6673	dw_count = 0;
	6674	}
	6675	} else {
	6676	if (dwp->dw_mask & DW_clear_busy)
	6677	m->busy = FALSE;
	6678
	6679	if (dwp->dw_mask & DW_PAGE_WAKEUP)
	6680	PAGE_WAKEUP(m);
	6681	}
	6682	}
	6683	}
	6684	if (dw_count)
	6685	vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count);
	6686
	6687	if (fast_path_possible) {
	6688
	6689	assert(shadow_object->purgable != VM_PURGABLE_VOLATILE);
	6690	assert(shadow_object->purgable != VM_PURGABLE_EMPTY);
	6691
	6692	if (local_queue_count \|\| unwired_count) {
	6693
	6694	if (local_queue_count) {
	6695	vm_page_t first_local, last_local;
	6696	vm_page_t first_target;
	6697	queue_head_t *target_queue;
	6698
	6699	if (throttle_page)
	6700	target_queue = &vm_page_queue_throttled;
	6701	else {
	6702	if (flags & UPL_COMMIT_INACTIVATE) {
	6703	if (shadow_object->internal)
	6704	target_queue = &vm_page_queue_anonymous;
	6705	else
	6706	target_queue = &vm_page_queue_inactive;
	6707	} else
	6708	target_queue = &vm_page_queue_active;
	6709	}
	6710	/*
	6711	* Transfer the entire local queue to a regular LRU page queues.
	6712	*/
	6713	first_local = (vm_page_t) queue_first(&local_queue);
	6714	last_local = (vm_page_t) queue_last(&local_queue);
	6715
	6716	vm_page_lockspin_queues();
	6717
	6718	first_target = (vm_page_t) queue_first(target_queue);
	6719
	6720	if (queue_empty(target_queue))
	6721	queue_last(target_queue) = (queue_entry_t) last_local;
	6722	else
	6723	queue_prev(&first_target->pageq) = (queue_entry_t) last_local;
	6724
	6725	queue_first(target_queue) = (queue_entry_t) first_local;
	6726	queue_prev(&first_local->pageq) = (queue_entry_t) target_queue;
	6727	queue_next(&last_local->pageq) = (queue_entry_t) first_target;
	6728
	6729	/*
	6730	* Adjust the global page counts.
	6731	*/
	6732	if (throttle_page) {
	6733	vm_page_throttled_count += local_queue_count;
	6734	} else {
	6735	if (flags & UPL_COMMIT_INACTIVATE) {
	6736	if (shadow_object->internal)
	6737	vm_page_anonymous_count += local_queue_count;
	6738	vm_page_inactive_count += local_queue_count;
	6739
	6740	token_new_pagecount += local_queue_count;
	6741	} else
	6742	vm_page_active_count += local_queue_count;
	6743
	6744	if (shadow_object->internal)
	6745	vm_page_pageable_internal_count += local_queue_count;
	6746	else
	6747	vm_page_pageable_external_count += local_queue_count;
	6748	}
	6749	} else {
	6750	vm_page_lockspin_queues();
	6751	}
	6752	if (unwired_count) {
	6753	vm_page_wire_count -= unwired_count;
	6754	VM_CHECK_MEMORYSTATUS;
	6755	}
	6756	vm_page_unlock_queues();
	6757
	6758	shadow_object->wired_page_count -= unwired_count;
	6759	}
	6760	}
	6761	occupied = 1;
	6762
	6763	if (upl->flags & UPL_DEVICE_MEMORY) {
	6764	occupied = 0;
	6765	} else if (upl->flags & UPL_LITE) {
	6766	int pg_num;
	6767	int i;
	6768
	6769	occupied = 0;
	6770
	6771	if (!fast_path_full_commit) {
	6772	pg_num = upl->size/PAGE_SIZE;
	6773	pg_num = (pg_num + 31) >> 5;
	6774
	6775	for (i = 0; i < pg_num; i++) {
	6776	if (lite_list[i] != 0) {
	6777	occupied = 1;
	6778	break;
	6779	}
	6780	}
	6781	}
	6782	} else {
	6783	if (queue_empty(&upl->map_object->memq))
	6784	occupied = 0;
	6785	}
	6786	if (occupied == 0) {
	6787	/*
	6788	* If this UPL element belongs to a Vector UPL and is
	6789	* empty, then this is the right function to deallocate
	6790	* it. So go ahead set the *empty variable. The flag
	6791	* UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
	6792	* should be considered relevant for the Vector UPL and not
	6793	* the internal UPLs.
	6794	*/
	6795	if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) \|\| isVectorUPL)
	6796	*empty = TRUE;
	6797
	6798	if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
	6799	/*
	6800	* this is not a paging object
	6801	* so we need to drop the paging reference
	6802	* that was taken when we created the UPL
	6803	* against this object
	6804	*/
	6805	vm_object_activity_end(shadow_object);
	6806	vm_object_collapse(shadow_object, 0, TRUE);
	6807	} else {
	6808	/*
	6809	* we dontated the paging reference to
	6810	* the map object... vm_pageout_object_terminate
	6811	* will drop this reference
	6812	*/
	6813	}
	6814	}
	6815	vm_object_unlock(shadow_object);
	6816	if (object != shadow_object)
	6817	vm_object_unlock(object);
	6818
	6819	if(!isVectorUPL)
	6820	upl_unlock(upl);
	6821	else {
	6822	/*
	6823	* If we completed our operations on an UPL that is
	6824	* part of a Vectored UPL and if empty is TRUE, then
	6825	* we should go ahead and deallocate this UPL element.
	6826	* Then we check if this was the last of the UPL elements
	6827	* within that Vectored UPL. If so, set empty to TRUE
	6828	* so that in ubc_upl_commit_range or ubc_upl_commit, we
	6829	* can go ahead and deallocate the Vector UPL too.
	6830	*/
	6831	if(*empty==TRUE) {
	6832	*empty = vector_upl_set_subupl(vector_upl, upl, 0);
	6833	upl_deallocate(upl);
	6834	}
	6835	goto process_upl_to_commit;
	6836	}
	6837
	6838	if (pgpgout_count) {
	6839	DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
	6840	}
	6841
	6842	return KERN_SUCCESS;
	6843	}
	6844
	6845	kern_return_t
	6846	upl_abort_range(
	6847	upl_t upl,
	6848	upl_offset_t offset,
	6849	upl_size_t size,
	6850	int error,
	6851	boolean_t *empty)
	6852	{
	6853	upl_page_info_t *user_page_list = NULL;
	6854	upl_size_t xfer_size, subupl_size = size;
	6855	vm_object_t shadow_object;
	6856	vm_object_t object;
	6857	vm_object_offset_t target_offset;
	6858	upl_offset_t subupl_offset = offset;
	6859	int entry;
	6860	wpl_array_t lite_list;
	6861	int occupied;
	6862	struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
	6863	struct vm_page_delayed_work *dwp;
	6864	int dw_count;
	6865	int dw_limit;
	6866	int isVectorUPL = 0;
	6867	upl_t vector_upl = NULL;
	6868
	6869	*empty = FALSE;
	6870
	6871	if (upl == UPL_NULL)
	6872	return KERN_INVALID_ARGUMENT;
	6873
	6874	if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
	6875	return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty);
	6876
	6877	if((isVectorUPL = vector_upl_is_valid(upl))) {
	6878	vector_upl = upl;
	6879	upl_lock(vector_upl);
	6880	}
	6881	else
	6882	upl_lock(upl);
	6883
	6884	process_upl_to_abort:
	6885	if(isVectorUPL) {
	6886	size = subupl_size;
	6887	offset = subupl_offset;
	6888	if(size == 0) {
	6889	upl_unlock(vector_upl);
	6890	return KERN_SUCCESS;
	6891	}
	6892	upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
	6893	if(upl == NULL) {
	6894	upl_unlock(vector_upl);
	6895	return KERN_FAILURE;
	6896	}
	6897	subupl_size -= size;
	6898	subupl_offset += size;
	6899	}
	6900
	6901	*empty = FALSE;
	6902
	6903	#if UPL_DEBUG
	6904	if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
	6905	(void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
	6906
	6907	upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
	6908	upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
	6909	upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1;
	6910
	6911	upl->upl_commit_index++;
	6912	}
	6913	#endif
	6914	if (upl->flags & UPL_DEVICE_MEMORY)
	6915	xfer_size = 0;
	6916	else if ((offset + size) <= upl->size)
	6917	xfer_size = size;
	6918	else {
	6919	if(!isVectorUPL)
	6920	upl_unlock(upl);
	6921	else {
	6922	upl_unlock(vector_upl);
	6923	}
	6924
	6925	return KERN_FAILURE;
	6926	}
	6927	if (upl->flags & UPL_INTERNAL) {
	6928	lite_list = (wpl_array_t)
	6929	((((uintptr_t)upl) + sizeof(struct upl))
	6930	+ ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
	6931
	6932	user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
	6933	} else {
	6934	lite_list = (wpl_array_t)
	6935	(((uintptr_t)upl) + sizeof(struct upl));
	6936	}
	6937	object = upl->map_object;
	6938
	6939	if (upl->flags & UPL_SHADOWED) {
	6940	vm_object_lock(object);
	6941	shadow_object = object->shadow;
	6942	} else
	6943	shadow_object = object;
	6944
	6945	entry = offset/PAGE_SIZE;
	6946	target_offset = (vm_object_offset_t)offset;
	6947
	6948	if (upl->flags & UPL_KERNEL_OBJECT)
	6949	vm_object_lock_shared(shadow_object);
	6950	else
	6951	vm_object_lock(shadow_object);
	6952
	6953	if (upl->flags & UPL_ACCESS_BLOCKED) {
	6954	assert(shadow_object->blocked_access);
	6955	shadow_object->blocked_access = FALSE;
	6956	vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
	6957	}
	6958
	6959	dwp = &dw_array[0];
	6960	dw_count = 0;
	6961	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
	6962
	6963	if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT))
	6964	panic("upl_abort_range: kernel_object being DUMPED");
	6965
	6966	while (xfer_size) {
	6967	vm_page_t t, m;
	6968	unsigned int pg_num;
	6969	boolean_t needed;
	6970
	6971	pg_num = (unsigned int) (target_offset/PAGE_SIZE);
	6972	assert(pg_num == target_offset/PAGE_SIZE);
	6973
	6974	needed = FALSE;
	6975
	6976	if (user_page_list)
	6977	needed = user_page_list[pg_num].needed;
	6978
	6979	dwp->dw_mask = 0;
	6980	m = VM_PAGE_NULL;
	6981
	6982	if (upl->flags & UPL_LITE) {
	6983
	6984	if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
	6985	lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
	6986
	6987	if ( !(upl->flags & UPL_KERNEL_OBJECT))
	6988	m = vm_page_lookup(shadow_object, target_offset +
	6989	(upl->offset - shadow_object->paging_offset));
	6990	}
	6991	}
	6992	if (upl->flags & UPL_SHADOWED) {
	6993	if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
	6994	t->pageout = FALSE;
	6995
	6996	VM_PAGE_FREE(t);
	6997
	6998	if (m == VM_PAGE_NULL)
	6999	m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
	7000	}
	7001	}
	7002	if ((upl->flags & UPL_KERNEL_OBJECT))
	7003	goto abort_next_page;
	7004
	7005	if (m != VM_PAGE_NULL) {
	7006
	7007	assert(!m->compressor);
	7008
	7009	if (m->absent) {
	7010	boolean_t must_free = TRUE;
	7011
	7012	/*
	7013	* COPYOUT = FALSE case
	7014	* check for error conditions which must
	7015	* be passed back to the pages customer
	7016	*/
	7017	if (error & UPL_ABORT_RESTART) {
	7018	m->restart = TRUE;
	7019	m->absent = FALSE;
	7020	m->unusual = TRUE;
	7021	must_free = FALSE;
	7022	} else if (error & UPL_ABORT_UNAVAILABLE) {
	7023	m->restart = FALSE;
	7024	m->unusual = TRUE;
	7025	must_free = FALSE;
	7026	} else if (error & UPL_ABORT_ERROR) {
	7027	m->restart = FALSE;
	7028	m->absent = FALSE;
	7029	m->error = TRUE;
	7030	m->unusual = TRUE;
	7031	must_free = FALSE;
	7032	}
	7033	if (m->clustered && needed == FALSE) {
	7034	/*
	7035	* This page was a part of a speculative
	7036	* read-ahead initiated by the kernel
	7037	* itself. No one is expecting this
	7038	* page and no one will clean up its
	7039	* error state if it ever becomes valid
	7040	* in the future.
	7041	* We have to free it here.
	7042	*/
	7043	must_free = TRUE;
	7044	}
	7045
	7046	/*
	7047	* ENCRYPTED SWAP:
	7048	* If the page was already encrypted,
	7049	* we don't really need to decrypt it
	7050	* now. It will get decrypted later,
	7051	* on demand, as soon as someone needs
	7052	* to access its contents.
	7053	*/
	7054
	7055	m->cleaning = FALSE;
	7056	m->encrypted_cleaning = FALSE;
	7057
	7058	if (m->overwriting && !m->busy) {
	7059	/*
	7060	* this shouldn't happen since
	7061	* this is an 'absent' page, but
	7062	* it doesn't hurt to check for
	7063	* the 'alternate' method of
	7064	* stabilizing the page...
	7065	* we will mark 'busy' to be cleared
	7066	* in the following code which will
	7067	* take care of the primary stabilzation
	7068	* method (i.e. setting 'busy' to TRUE)
	7069	*/
	7070	dwp->dw_mask \|= DW_vm_page_unwire;
	7071	}
	7072	m->overwriting = FALSE;
	7073
	7074	dwp->dw_mask \|= (DW_clear_busy \| DW_PAGE_WAKEUP);
	7075
	7076	if (must_free == TRUE)
	7077	dwp->dw_mask \|= DW_vm_page_free;
	7078	else
	7079	dwp->dw_mask \|= DW_vm_page_activate;
	7080	} else {
	7081	/*
	7082	* Handle the trusted pager throttle.
	7083	*/
	7084	if (m->laundry)
	7085	dwp->dw_mask \|= DW_vm_pageout_throttle_up;
	7086
	7087	if (upl->flags & UPL_ACCESS_BLOCKED) {
	7088	/*
	7089	* We blocked access to the pages in this UPL.
	7090	* Clear the "busy" bit and wake up any waiter
	7091	* for this page.
	7092	*/
	7093	dwp->dw_mask \|= DW_clear_busy;
	7094	}
	7095	if (m->overwriting) {
	7096	if (m->busy)
	7097	dwp->dw_mask \|= DW_clear_busy;
	7098	else {
	7099	/*
	7100	* deal with the 'alternate' method
	7101	* of stabilizing the page...
	7102	* we will either free the page
	7103	* or mark 'busy' to be cleared
	7104	* in the following code which will
	7105	* take care of the primary stabilzation
	7106	* method (i.e. setting 'busy' to TRUE)
	7107	*/
	7108	dwp->dw_mask \|= DW_vm_page_unwire;
	7109	}
	7110	m->overwriting = FALSE;
	7111	}
	7112	if (m->encrypted_cleaning == TRUE) {
	7113	m->encrypted_cleaning = FALSE;
	7114
	7115	dwp->dw_mask \|= DW_clear_busy;
	7116	}
	7117	m->pageout = FALSE;
	7118	m->cleaning = FALSE;
	7119	#if MACH_PAGEMAP
	7120	vm_external_state_clr(m->object->existence_map, m->offset);
	7121	#endif /* MACH_PAGEMAP */
	7122	if (error & UPL_ABORT_DUMP_PAGES) {
	7123	pmap_disconnect(m->phys_page);
	7124
	7125	dwp->dw_mask \|= DW_vm_page_free;
	7126	} else {
	7127	if (!(dwp->dw_mask & DW_vm_page_unwire)) {
	7128	if (error & UPL_ABORT_REFERENCE) {
	7129	/*
	7130	* we've been told to explictly
	7131	* reference this page... for
	7132	* file I/O, this is done by
	7133	* implementing an LRU on the inactive q
	7134	*/
	7135	dwp->dw_mask \|= DW_vm_page_lru;
	7136
	7137	} else if (!m->active && !m->inactive && !m->speculative)
	7138	dwp->dw_mask \|= DW_vm_page_deactivate_internal;
	7139	}
	7140	dwp->dw_mask \|= DW_PAGE_WAKEUP;
	7141	}
	7142	}
	7143	}
	7144	abort_next_page:
	7145	target_offset += PAGE_SIZE_64;
	7146	xfer_size -= PAGE_SIZE;
	7147	entry++;
	7148
	7149	if (dwp->dw_mask) {
	7150	if (dwp->dw_mask & ~(DW_clear_busy \| DW_PAGE_WAKEUP)) {
	7151	VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
	7152
	7153	if (dw_count >= dw_limit) {
	7154	vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count);
	7155
	7156	dwp = &dw_array[0];
	7157	dw_count = 0;
	7158	}
	7159	} else {
	7160	if (dwp->dw_mask & DW_clear_busy)
	7161	m->busy = FALSE;
	7162
	7163	if (dwp->dw_mask & DW_PAGE_WAKEUP)
	7164	PAGE_WAKEUP(m);
	7165	}
	7166	}
	7167	}
	7168	if (dw_count)
	7169	vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count);
	7170
	7171	occupied = 1;
	7172
	7173	if (upl->flags & UPL_DEVICE_MEMORY) {
	7174	occupied = 0;
	7175	} else if (upl->flags & UPL_LITE) {
	7176	int pg_num;
	7177	int i;
	7178
	7179	pg_num = upl->size/PAGE_SIZE;
	7180	pg_num = (pg_num + 31) >> 5;
	7181	occupied = 0;
	7182
	7183	for (i = 0; i < pg_num; i++) {
	7184	if (lite_list[i] != 0) {
	7185	occupied = 1;
	7186	break;
	7187	}
	7188	}
	7189	} else {
	7190	if (queue_empty(&upl->map_object->memq))
	7191	occupied = 0;
	7192	}
	7193	if (occupied == 0) {
	7194	/*
	7195	* If this UPL element belongs to a Vector UPL and is
	7196	* empty, then this is the right function to deallocate
	7197	* it. So go ahead set the *empty variable. The flag
	7198	* UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
	7199	* should be considered relevant for the Vector UPL and
	7200	* not the internal UPLs.
	7201	*/
	7202	if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) \|\| isVectorUPL)
	7203	*empty = TRUE;
	7204
	7205	if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
	7206	/*
	7207	* this is not a paging object
	7208	* so we need to drop the paging reference
	7209	* that was taken when we created the UPL
	7210	* against this object
	7211	*/
	7212	vm_object_activity_end(shadow_object);
	7213	vm_object_collapse(shadow_object, 0, TRUE);
	7214	} else {
	7215	/*
	7216	* we dontated the paging reference to
	7217	* the map object... vm_pageout_object_terminate
	7218	* will drop this reference
	7219	*/
	7220	}
	7221	}
	7222	vm_object_unlock(shadow_object);
	7223	if (object != shadow_object)
	7224	vm_object_unlock(object);
	7225
	7226	if(!isVectorUPL)
	7227	upl_unlock(upl);
	7228	else {
	7229	/*
	7230	* If we completed our operations on an UPL that is
	7231	* part of a Vectored UPL and if empty is TRUE, then
	7232	* we should go ahead and deallocate this UPL element.
	7233	* Then we check if this was the last of the UPL elements
	7234	* within that Vectored UPL. If so, set empty to TRUE
	7235	* so that in ubc_upl_abort_range or ubc_upl_abort, we
	7236	* can go ahead and deallocate the Vector UPL too.
	7237	*/
	7238	if(*empty == TRUE) {
	7239	*empty = vector_upl_set_subupl(vector_upl, upl,0);
	7240	upl_deallocate(upl);
	7241	}
	7242	goto process_upl_to_abort;
	7243	}
	7244
	7245	return KERN_SUCCESS;
	7246	}
	7247
	7248
	7249	kern_return_t
	7250	upl_abort(
	7251	upl_t upl,
	7252	int error)
	7253	{
	7254	boolean_t empty;
	7255
	7256	return upl_abort_range(upl, 0, upl->size, error, &empty);
	7257	}
	7258
	7259
	7260	/* an option on commit should be wire */
	7261	kern_return_t
	7262	upl_commit(
	7263	upl_t upl,
	7264	upl_page_info_t *page_list,
	7265	mach_msg_type_number_t count)
	7266	{
	7267	boolean_t empty;
	7268
	7269	return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
	7270	}
	7271
	7272
	7273	void
	7274	iopl_valid_data(
	7275	upl_t upl)
	7276	{
	7277	vm_object_t object;
	7278	vm_offset_t offset;
	7279	vm_page_t m, nxt_page = VM_PAGE_NULL;
	7280	upl_size_t size;
	7281	int wired_count = 0;
	7282
	7283	if (upl == NULL)
	7284	panic("iopl_valid_data: NULL upl");
	7285	if (vector_upl_is_valid(upl))
	7286	panic("iopl_valid_data: vector upl");
	7287	if ((upl->flags & (UPL_DEVICE_MEMORY\|UPL_SHADOWED\|UPL_ACCESS_BLOCKED\|UPL_IO_WIRE\|UPL_INTERNAL)) != UPL_IO_WIRE)
	7288	panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags);
	7289
	7290	object = upl->map_object;
	7291
	7292	if (object == kernel_object \|\| object == compressor_object)
	7293	panic("iopl_valid_data: object == kernel or compressor");
	7294
	7295	if (object->purgable == VM_PURGABLE_VOLATILE)
	7296	panic("iopl_valid_data: object == VM_PURGABLE_VOLATILE");
	7297
	7298	size = upl->size;
	7299
	7300	vm_object_lock(object);
	7301
	7302	if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE))
	7303	nxt_page = (vm_page_t)queue_first(&object->memq);
	7304	else
	7305	offset = 0 + upl->offset - object->paging_offset;
	7306
	7307	while (size) {
	7308
	7309	if (nxt_page != VM_PAGE_NULL) {
	7310	m = nxt_page;
	7311	nxt_page = (vm_page_t)queue_next(&nxt_page->listq);
	7312	} else {
	7313	m = vm_page_lookup(object, offset);
	7314	offset += PAGE_SIZE;
	7315
	7316	if (m == VM_PAGE_NULL)
	7317	panic("iopl_valid_data: missing expected page at offset %lx", (long)offset);
	7318	}
	7319	if (m->busy) {
	7320	if (!m->absent)
	7321	panic("iopl_valid_data: busy page w/o absent");
	7322
	7323	if (m->pageq.next \|\| m->pageq.prev)
	7324	panic("iopl_valid_data: busy+absent page on page queue");
	7325
	7326	m->absent = FALSE;
	7327	m->dirty = TRUE;
	7328	m->wire_count++;
	7329	wired_count++;
	7330
	7331	PAGE_WAKEUP_DONE(m);
	7332	}
	7333	size -= PAGE_SIZE;
	7334	}
	7335	if (wired_count) {
	7336	object->wired_page_count += wired_count;
	7337
	7338	vm_page_lockspin_queues();
	7339	vm_page_wire_count += wired_count;
	7340	vm_page_unlock_queues();
	7341	}
	7342	vm_object_unlock(object);
	7343	}
	7344
	7345
	7346
	7347
	7348	void
	7349	vm_object_set_pmap_cache_attr(
	7350	vm_object_t object,
	7351	upl_page_info_array_t user_page_list,
	7352	unsigned int num_pages,
	7353	boolean_t batch_pmap_op)
	7354	{
	7355	unsigned int cache_attr = 0;
	7356
	7357	cache_attr = object->wimg_bits & VM_WIMG_MASK;
	7358	assert(user_page_list);
	7359	if (cache_attr != VM_WIMG_USE_DEFAULT) {
	7360	PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op);
	7361	}
	7362	}
	7363
	7364	unsigned int vm_object_iopl_request_sleep_for_cleaning = 0;
	7365
	7366	kern_return_t
	7367	vm_object_iopl_request(
	7368	vm_object_t object,
	7369	vm_object_offset_t offset,
	7370	upl_size_t size,
	7371	upl_t *upl_ptr,
	7372	upl_page_info_array_t user_page_list,
	7373	unsigned int *page_list_count,
	7374	int cntrl_flags)
	7375	{
	7376	vm_page_t dst_page;
	7377	vm_object_offset_t dst_offset;
	7378	upl_size_t xfer_size;
	7379	upl_t upl = NULL;
	7380	unsigned int entry;
	7381	wpl_array_t lite_list = NULL;
	7382	int no_zero_fill = FALSE;
	7383	unsigned int size_in_pages;
	7384	u_int32_t psize;
	7385	kern_return_t ret;
	7386	vm_prot_t prot;
	7387	struct vm_object_fault_info fault_info;
	7388	struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
	7389	struct vm_page_delayed_work *dwp;
	7390	int dw_count;
	7391	int dw_limit;
	7392	int dw_index;
	7393	boolean_t caller_lookup;
	7394	int io_tracking_flag = 0;
	7395	int interruptible;
	7396
	7397	boolean_t set_cache_attr_needed = FALSE;
	7398	boolean_t free_wired_pages = FALSE;
	7399	int fast_path_possible = 0;
	7400
	7401
	7402	if (cntrl_flags & ~UPL_VALID_FLAGS) {
	7403	/*
	7404	* For forward compatibility's sake,
	7405	* reject any unknown flag.
	7406	*/
	7407	return KERN_INVALID_VALUE;
	7408	}
	7409	if (vm_lopage_needed == FALSE)
	7410	cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
	7411
	7412	if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
	7413	if ( (cntrl_flags & (UPL_SET_IO_WIRE \| UPL_SET_LITE)) != (UPL_SET_IO_WIRE \| UPL_SET_LITE))
	7414	return KERN_INVALID_VALUE;
	7415
	7416	if (object->phys_contiguous) {
	7417	if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
	7418	return KERN_INVALID_ADDRESS;
	7419
	7420	if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
	7421	return KERN_INVALID_ADDRESS;
	7422	}
	7423	}
	7424
	7425	if (cntrl_flags & UPL_ENCRYPT) {
	7426	/*
	7427	* ENCRYPTED SWAP:
	7428	* The paging path doesn't use this interface,
	7429	* so we don't support the UPL_ENCRYPT flag
	7430	* here. We won't encrypt the pages.
	7431	*/
	7432	assert(! (cntrl_flags & UPL_ENCRYPT));
	7433	}
	7434	if (cntrl_flags & (UPL_NOZEROFILL \| UPL_NOZEROFILLIO))
	7435	no_zero_fill = TRUE;
	7436
	7437	if (cntrl_flags & UPL_COPYOUT_FROM)
	7438	prot = VM_PROT_READ;
	7439	else
	7440	prot = VM_PROT_READ \| VM_PROT_WRITE;
	7441
	7442	if ((!object->internal) && (object->paging_offset != 0))
	7443	panic("vm_object_iopl_request: external object with non-zero paging offset\n");
	7444
	7445	#if CONFIG_IOSCHED \|\| UPL_DEBUG
	7446	if ((object->io_tracking && object != kernel_object) \|\| upl_debug_enabled)
	7447	io_tracking_flag \|= UPL_CREATE_IO_TRACKING;
	7448	#endif
	7449
	7450	#if CONFIG_IOSCHED
	7451	if (object->io_tracking) {
	7452	/* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */
	7453	if (object != kernel_object)
	7454	io_tracking_flag \|= UPL_CREATE_EXPEDITE_SUP;
	7455	}
	7456	#endif
	7457
	7458	if (object->phys_contiguous)
	7459	psize = PAGE_SIZE;
	7460	else
	7461	psize = size;
	7462
	7463	if (cntrl_flags & UPL_SET_INTERNAL) {
	7464	upl = upl_create(UPL_CREATE_INTERNAL \| UPL_CREATE_LITE \| io_tracking_flag, UPL_IO_WIRE, psize);
	7465
	7466	user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
	7467	lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
	7468	((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
	7469	if (size == 0) {
	7470	user_page_list = NULL;
	7471	lite_list = NULL;
	7472	}
	7473	} else {
	7474	upl = upl_create(UPL_CREATE_LITE \| io_tracking_flag, UPL_IO_WIRE, psize);
	7475
	7476	lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
	7477	if (size == 0) {
	7478	lite_list = NULL;
	7479	}
	7480	}
	7481	if (user_page_list)
	7482	user_page_list[0].device = FALSE;
	7483	*upl_ptr = upl;
	7484
	7485	upl->map_object = object;
	7486	upl->size = size;
	7487
	7488	size_in_pages = size / PAGE_SIZE;
	7489
	7490	if (object == kernel_object &&
	7491	!(cntrl_flags & (UPL_NEED_32BIT_ADDR \| UPL_BLOCK_ACCESS))) {
	7492	upl->flags \|= UPL_KERNEL_OBJECT;
	7493	#if UPL_DEBUG
	7494	vm_object_lock(object);
	7495	#else
	7496	vm_object_lock_shared(object);
	7497	#endif
	7498	} else {
	7499	vm_object_lock(object);
	7500	vm_object_activity_begin(object);
	7501	}
	7502	/*
	7503	* paging in progress also protects the paging_offset
	7504	*/
	7505	upl->offset = offset + object->paging_offset;
	7506
	7507	if (cntrl_flags & UPL_BLOCK_ACCESS) {
	7508	/*
	7509	* The user requested that access to the pages in this UPL
	7510	* be blocked until the UPL is commited or aborted.
	7511	*/
	7512	upl->flags \|= UPL_ACCESS_BLOCKED;
	7513	}
	7514
	7515	if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR \| UPL_BLOCK_ACCESS)) &&
	7516	object->purgable != VM_PURGABLE_VOLATILE &&
	7517	object->purgable != VM_PURGABLE_EMPTY &&
	7518	object->copy == NULL &&
	7519	size == object->vo_size &&
	7520	offset == 0 &&
	7521	object->resident_page_count == 0 &&
	7522	object->shadow == NULL &&
	7523	object->pager == NULL)
	7524	{
	7525	fast_path_possible = 1;
	7526	set_cache_attr_needed = TRUE;
	7527	}
	7528
	7529	#if CONFIG_IOSCHED \|\| UPL_DEBUG
	7530	if (upl->flags & UPL_TRACKED_BY_OBJECT) {
	7531	vm_object_activity_begin(object);
	7532	queue_enter(&object->uplq, upl, upl_t, uplq);
	7533	}
	7534	#endif
	7535
	7536	if (object->phys_contiguous) {
	7537
	7538	if (upl->flags & UPL_ACCESS_BLOCKED) {
	7539	assert(!object->blocked_access);
	7540	object->blocked_access = TRUE;
	7541	}
	7542
	7543	vm_object_unlock(object);
	7544
	7545	/*
	7546	* don't need any shadow mappings for this one
	7547	* since it is already I/O memory
	7548	*/
	7549	upl->flags \|= UPL_DEVICE_MEMORY;
	7550
	7551	upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1)>>PAGE_SHIFT);
	7552
	7553	if (user_page_list) {
	7554	user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT);
	7555	user_page_list[0].device = TRUE;
	7556	}
	7557	if (page_list_count != NULL) {
	7558	if (upl->flags & UPL_INTERNAL)
	7559	*page_list_count = 0;
	7560	else
	7561	*page_list_count = 1;
	7562	}
	7563	return KERN_SUCCESS;
	7564	}
	7565	if (object != kernel_object && object != compressor_object) {
	7566	/*
	7567	* Protect user space from future COW operations
	7568	*/
	7569	#if VM_OBJECT_TRACKING_OP_TRUESHARE
	7570	if (!object->true_share &&
	7571	vm_object_tracking_inited) {
	7572	void *bt[VM_OBJECT_TRACKING_BTDEPTH];
	7573	int num = 0;
	7574
	7575	num = OSBacktrace(bt,
	7576	VM_OBJECT_TRACKING_BTDEPTH);
	7577	btlog_add_entry(vm_object_tracking_btlog,
	7578	object,
	7579	VM_OBJECT_TRACKING_OP_TRUESHARE,
	7580	bt,
	7581	num);
	7582	}
	7583	#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
	7584
	7585	object->true_share = TRUE;
	7586
	7587	if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
	7588	object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
	7589	}
	7590
	7591	if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
	7592	object->copy != VM_OBJECT_NULL) {
	7593	/*
	7594	* Honor copy-on-write obligations
	7595	*
	7596	* The caller is gathering these pages and
	7597	* might modify their contents. We need to
	7598	* make sure that the copy object has its own
	7599	* private copies of these pages before we let
	7600	* the caller modify them.
	7601	*
	7602	* NOTE: someone else could map the original object
	7603	* after we've done this copy-on-write here, and they
	7604	* could then see an inconsistent picture of the memory
	7605	* while it's being modified via the UPL. To prevent this,
	7606	* we would have to block access to these pages until the
	7607	* UPL is released. We could use the UPL_BLOCK_ACCESS
	7608	* code path for that...
	7609	*/
	7610	vm_object_update(object,
	7611	offset,
	7612	size,
	7613	NULL,
	7614	NULL,
	7615	FALSE, /* should_return */
	7616	MEMORY_OBJECT_COPY_SYNC,
	7617	VM_PROT_NO_CHANGE);
	7618	#if DEVELOPMENT \|\| DEBUG
	7619	iopl_cow++;
	7620	iopl_cow_pages += size >> PAGE_SHIFT;
	7621	#endif
	7622	}
	7623	if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
	7624	interruptible = THREAD_ABORTSAFE;
	7625	else
	7626	interruptible = THREAD_UNINT;
	7627
	7628	entry = 0;
	7629
	7630	xfer_size = size;
	7631	dst_offset = offset;
	7632	dw_count = 0;
	7633
	7634	if (fast_path_possible) {
	7635	int wired_count = 0;
	7636
	7637	while (xfer_size) {
	7638
	7639	while ( (dst_page = vm_page_grab()) == VM_PAGE_NULL) {
	7640	OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
	7641
	7642	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
	7643
	7644	if (vm_page_wait(interruptible) == FALSE) {
	7645	/*
	7646	* interrupted case
	7647	*/
	7648	OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
	7649
	7650	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
	7651
	7652	if (wired_count) {
	7653	vm_page_lockspin_queues();
	7654	vm_page_wire_count += wired_count;
	7655	vm_page_unlock_queues();
	7656
	7657	free_wired_pages = TRUE;
	7658	}
	7659	ret = MACH_SEND_INTERRUPTED;
	7660
	7661	goto return_err;
	7662	}
	7663	OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
	7664
	7665	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
	7666	}
	7667	if (no_zero_fill == FALSE)
	7668	vm_page_zero_fill(dst_page);
	7669	else
	7670	dst_page->absent = TRUE;
	7671
	7672	dst_page->reference = TRUE;
	7673
	7674	if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
	7675	SET_PAGE_DIRTY(dst_page, FALSE);
	7676	}
	7677	if (dst_page->absent == FALSE) {
	7678	assert(object->purgable != VM_PURGABLE_VOLATILE);
	7679	assert(object->purgable != VM_PURGABLE_EMPTY);
	7680	dst_page->wire_count++;
	7681	wired_count++;
	7682
	7683	PAGE_WAKEUP_DONE(dst_page);
	7684	}
	7685	vm_page_insert_internal(dst_page, object, dst_offset, FALSE, TRUE, TRUE);
	7686
	7687	lite_list[entry>>5] \|= 1 << (entry & 31);
	7688
	7689	if (dst_page->phys_page > upl->highest_page)
	7690	upl->highest_page = dst_page->phys_page;
	7691
	7692	if (user_page_list) {
	7693	user_page_list[entry].phys_addr = dst_page->phys_page;
	7694	user_page_list[entry].absent = dst_page->absent;
	7695	user_page_list[entry].dirty = dst_page->dirty;
	7696	user_page_list[entry].precious = FALSE;
	7697	user_page_list[entry].pageout = FALSE;
	7698	user_page_list[entry].device = FALSE;
	7699	user_page_list[entry].needed = FALSE;
	7700	user_page_list[entry].speculative = FALSE;
	7701	user_page_list[entry].cs_validated = FALSE;
	7702	user_page_list[entry].cs_tainted = FALSE;
	7703	}
	7704	entry++;
	7705	dst_offset += PAGE_SIZE_64;
	7706	xfer_size -= PAGE_SIZE;
	7707	size_in_pages--;
	7708	}
	7709	if (wired_count) {
	7710	vm_page_lockspin_queues();
	7711	vm_page_wire_count += wired_count;
	7712	vm_page_unlock_queues();
	7713	}
	7714	goto finish;
	7715	}
	7716
	7717	fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
	7718	fault_info.user_tag = 0;
	7719	fault_info.lo_offset = offset;
	7720	fault_info.hi_offset = offset + xfer_size;
	7721	fault_info.no_cache = FALSE;
	7722	fault_info.stealth = FALSE;
	7723	fault_info.io_sync = FALSE;
	7724	fault_info.cs_bypass = FALSE;
	7725	fault_info.mark_zf_absent = TRUE;
	7726	fault_info.interruptible = interruptible;
	7727	fault_info.batch_pmap_op = TRUE;
	7728
	7729	dwp = &dw_array[0];
	7730	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
	7731
	7732	while (xfer_size) {
	7733	vm_fault_return_t result;
	7734	unsigned int pg_num;
	7735
	7736	dwp->dw_mask = 0;
	7737
	7738	dst_page = vm_page_lookup(object, dst_offset);
	7739
	7740	/*
	7741	* ENCRYPTED SWAP:
	7742	* If the page is encrypted, we need to decrypt it,
	7743	* so force a soft page fault.
	7744	*/
	7745	if (dst_page == VM_PAGE_NULL \|\|
	7746	dst_page->busy \|\|
	7747	dst_page->encrypted \|\|
	7748	dst_page->error \|\|
	7749	dst_page->restart \|\|
	7750	dst_page->absent \|\|
	7751	dst_page->fictitious) {
	7752
	7753	if (object == kernel_object)
	7754	panic("vm_object_iopl_request: missing/bad page in kernel object\n");
	7755	if (object == compressor_object)
	7756	panic("vm_object_iopl_request: missing/bad page in compressor object\n");
	7757
	7758	if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
	7759	ret = KERN_MEMORY_ERROR;
	7760	goto return_err;
	7761	}
	7762	set_cache_attr_needed = TRUE;
	7763
	7764	/*
	7765	* We just looked up the page and the result remains valid
	7766	* until the object lock is release, so send it to
	7767	* vm_fault_page() (as "dst_page"), to avoid having to
	7768	* look it up again there.
	7769	*/
	7770	caller_lookup = TRUE;
	7771
	7772	do {
	7773	vm_page_t top_page;
	7774	kern_return_t error_code;
	7775
	7776	fault_info.cluster_size = xfer_size;
	7777
	7778	vm_object_paging_begin(object);
	7779
	7780	result = vm_fault_page(object, dst_offset,
	7781	prot \| VM_PROT_WRITE, FALSE,
	7782	caller_lookup,
	7783	&prot, &dst_page, &top_page,
	7784	(int *)0,
	7785	&error_code, no_zero_fill,
	7786	FALSE, &fault_info);
	7787
	7788	/* our lookup is no longer valid at this point */
	7789	caller_lookup = FALSE;
	7790
	7791	switch (result) {
	7792
	7793	case VM_FAULT_SUCCESS:
	7794
	7795	if ( !dst_page->absent) {
	7796	PAGE_WAKEUP_DONE(dst_page);
	7797	} else {
	7798	/*
	7799	* we only get back an absent page if we
	7800	* requested that it not be zero-filled
	7801	* because we are about to fill it via I/O
	7802	*
	7803	* absent pages should be left BUSY
	7804	* to prevent them from being faulted
	7805	* into an address space before we've
	7806	* had a chance to complete the I/O on
	7807	* them since they may contain info that
	7808	* shouldn't be seen by the faulting task
	7809	*/
	7810	}
	7811	/*
	7812	* Release paging references and
	7813	* top-level placeholder page, if any.
	7814	*/
	7815	if (top_page != VM_PAGE_NULL) {
	7816	vm_object_t local_object;
	7817
	7818	local_object = top_page->object;
	7819
	7820	if (top_page->object != dst_page->object) {
	7821	vm_object_lock(local_object);
	7822	VM_PAGE_FREE(top_page);
	7823	vm_object_paging_end(local_object);
	7824	vm_object_unlock(local_object);
	7825	} else {
	7826	VM_PAGE_FREE(top_page);
	7827	vm_object_paging_end(local_object);
	7828	}
	7829	}
	7830	vm_object_paging_end(object);
	7831	break;
	7832
	7833	case VM_FAULT_RETRY:
	7834	vm_object_lock(object);
	7835	break;
	7836
	7837	case VM_FAULT_MEMORY_SHORTAGE:
	7838	OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
	7839
	7840	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
	7841
	7842	if (vm_page_wait(interruptible)) {
	7843	OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
	7844
	7845	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
	7846	vm_object_lock(object);
	7847
	7848	break;
	7849	}
	7850	OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
	7851
	7852	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
	7853
	7854	/* fall thru */
	7855
	7856	case VM_FAULT_INTERRUPTED:
	7857	error_code = MACH_SEND_INTERRUPTED;
	7858	case VM_FAULT_MEMORY_ERROR:
	7859	memory_error:
	7860	ret = (error_code ? error_code: KERN_MEMORY_ERROR);
	7861
	7862	vm_object_lock(object);
	7863	goto return_err;
	7864
	7865	case VM_FAULT_SUCCESS_NO_VM_PAGE:
	7866	/* success but no page: fail */
	7867	vm_object_paging_end(object);
	7868	vm_object_unlock(object);
	7869	goto memory_error;
	7870
	7871	default:
	7872	panic("vm_object_iopl_request: unexpected error"
	7873	" 0x%x from vm_fault_page()\n", result);
	7874	}
	7875	} while (result != VM_FAULT_SUCCESS);
	7876
	7877	}
	7878	if (upl->flags & UPL_KERNEL_OBJECT)
	7879	goto record_phys_addr;
	7880
	7881	if (dst_page->compressor) {
	7882	dst_page->busy = TRUE;
	7883	goto record_phys_addr;
	7884	}
	7885
	7886	if (dst_page->cleaning) {
	7887	/*
	7888	* Someone else is cleaning this page in place.
	7889	* In theory, we should be able to proceed and use this
	7890	* page but they'll probably end up clearing the "busy"
	7891	* bit on it in upl_commit_range() but they didn't set
	7892	* it, so they would clear our "busy" bit and open
	7893	* us to race conditions.
	7894	* We'd better wait for the cleaning to complete and
	7895	* then try again.
	7896	*/
	7897	vm_object_iopl_request_sleep_for_cleaning++;
	7898	PAGE_SLEEP(object, dst_page, THREAD_UNINT);
	7899	continue;
	7900	}
	7901	if (dst_page->laundry) {
	7902	dst_page->pageout = FALSE;
	7903
	7904	vm_pageout_steal_laundry(dst_page, FALSE);
	7905	}
	7906	if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
	7907	dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
	7908	vm_page_t low_page;
	7909	int refmod;
	7910
	7911	/*
	7912	* support devices that can't DMA above 32 bits
	7913	* by substituting pages from a pool of low address
	7914	* memory for any pages we find above the 4G mark
	7915	* can't substitute if the page is already wired because
	7916	* we don't know whether that physical address has been
	7917	* handed out to some other 64 bit capable DMA device to use
	7918	*/
	7919	if (VM_PAGE_WIRED(dst_page)) {
	7920	ret = KERN_PROTECTION_FAILURE;
	7921	goto return_err;
	7922	}
	7923	low_page = vm_page_grablo();
	7924
	7925	if (low_page == VM_PAGE_NULL) {
	7926	ret = KERN_RESOURCE_SHORTAGE;
	7927	goto return_err;
	7928	}
	7929	/*
	7930	* from here until the vm_page_replace completes
	7931	* we musn't drop the object lock... we don't
	7932	* want anyone refaulting this page in and using
	7933	* it after we disconnect it... we want the fault
	7934	* to find the new page being substituted.
	7935	*/
	7936	if (dst_page->pmapped)
	7937	refmod = pmap_disconnect(dst_page->phys_page);
	7938	else
	7939	refmod = 0;
	7940
	7941	if (!dst_page->absent)
	7942	vm_page_copy(dst_page, low_page);
	7943
	7944	low_page->reference = dst_page->reference;
	7945	low_page->dirty = dst_page->dirty;
	7946	low_page->absent = dst_page->absent;
	7947
	7948	if (refmod & VM_MEM_REFERENCED)
	7949	low_page->reference = TRUE;
	7950	if (refmod & VM_MEM_MODIFIED) {
	7951	SET_PAGE_DIRTY(low_page, FALSE);
	7952	}
	7953
	7954	vm_page_replace(low_page, object, dst_offset);
	7955
	7956	dst_page = low_page;
	7957	/*
	7958	* vm_page_grablo returned the page marked
	7959	* BUSY... we don't need a PAGE_WAKEUP_DONE
	7960	* here, because we've never dropped the object lock
	7961	*/
	7962	if ( !dst_page->absent)
	7963	dst_page->busy = FALSE;
	7964	}
	7965	if ( !dst_page->busy)
	7966	dwp->dw_mask \|= DW_vm_page_wire;
	7967
	7968	if (cntrl_flags & UPL_BLOCK_ACCESS) {
	7969	/*
	7970	* Mark the page "busy" to block any future page fault
	7971	* on this page in addition to wiring it.
	7972	* We'll also remove the mapping
	7973	* of all these pages before leaving this routine.
	7974	*/
	7975	assert(!dst_page->fictitious);
	7976	dst_page->busy = TRUE;
	7977	}
	7978	/*
	7979	* expect the page to be used
	7980	* page queues lock must be held to set 'reference'
	7981	*/
	7982	dwp->dw_mask \|= DW_set_reference;
	7983
	7984	if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
	7985	SET_PAGE_DIRTY(dst_page, TRUE);
	7986	}
	7987	if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) {
	7988	pmap_sync_page_attributes_phys(dst_page->phys_page);
	7989	dst_page->written_by_kernel = FALSE;
	7990	}
	7991
	7992	record_phys_addr:
	7993	if (dst_page->busy)
	7994	upl->flags \|= UPL_HAS_BUSY;
	7995
	7996	pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
	7997	assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
	7998	lite_list[pg_num>>5] \|= 1 << (pg_num & 31);
	7999
	8000	if (dst_page->phys_page > upl->highest_page)
	8001	upl->highest_page = dst_page->phys_page;
	8002
	8003	if (user_page_list) {
	8004	user_page_list[entry].phys_addr = dst_page->phys_page;
	8005	user_page_list[entry].pageout = dst_page->pageout;
	8006	user_page_list[entry].absent = dst_page->absent;
	8007	user_page_list[entry].dirty = dst_page->dirty;
	8008	user_page_list[entry].precious = dst_page->precious;
	8009	user_page_list[entry].device = FALSE;
	8010	user_page_list[entry].needed = FALSE;
	8011	if (dst_page->clustered == TRUE)
	8012	user_page_list[entry].speculative = dst_page->speculative;
	8013	else
	8014	user_page_list[entry].speculative = FALSE;
	8015	user_page_list[entry].cs_validated = dst_page->cs_validated;
	8016	user_page_list[entry].cs_tainted = dst_page->cs_tainted;
	8017	}
	8018	if (object != kernel_object && object != compressor_object) {
	8019	/*
	8020	* someone is explicitly grabbing this page...
	8021	* update clustered and speculative state
	8022	*
	8023	*/
	8024	if (dst_page->clustered)
	8025	VM_PAGE_CONSUME_CLUSTERED(dst_page);
	8026	}
	8027	entry++;
	8028	dst_offset += PAGE_SIZE_64;
	8029	xfer_size -= PAGE_SIZE;
	8030	size_in_pages--;
	8031
	8032	if (dwp->dw_mask) {
	8033	VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
	8034
	8035	if (dw_count >= dw_limit) {
	8036	vm_page_do_delayed_work(object, &dw_array[0], dw_count);
	8037
	8038	dwp = &dw_array[0];
	8039	dw_count = 0;
	8040	}
	8041	}
	8042	}
	8043	if (dw_count)
	8044	vm_page_do_delayed_work(object, &dw_array[0], dw_count);
	8045
	8046	finish:
	8047	if (user_page_list && set_cache_attr_needed == TRUE)
	8048	vm_object_set_pmap_cache_attr(object, user_page_list, entry, TRUE);
	8049
	8050	if (page_list_count != NULL) {
	8051	if (upl->flags & UPL_INTERNAL)
	8052	*page_list_count = 0;
	8053	else if (*page_list_count > entry)
	8054	*page_list_count = entry;
	8055	}
	8056	vm_object_unlock(object);
	8057
	8058	if (cntrl_flags & UPL_BLOCK_ACCESS) {
	8059	/*
	8060	* We've marked all the pages "busy" so that future
	8061	* page faults will block.
	8062	* Now remove the mapping for these pages, so that they
	8063	* can't be accessed without causing a page fault.
	8064	*/
	8065	vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
	8066	PMAP_NULL, 0, VM_PROT_NONE);
	8067	assert(!object->blocked_access);
	8068	object->blocked_access = TRUE;
	8069	}
	8070	return KERN_SUCCESS;
	8071
	8072	return_err:
	8073	dw_index = 0;
	8074
	8075	for (; offset < dst_offset; offset += PAGE_SIZE) {
	8076	boolean_t need_unwire;
	8077
	8078	dst_page = vm_page_lookup(object, offset);
	8079
	8080	if (dst_page == VM_PAGE_NULL)
	8081	panic("vm_object_iopl_request: Wired page missing. \n");
	8082
	8083	/*
	8084	* if we've already processed this page in an earlier
	8085	* dw_do_work, we need to undo the wiring... we will
	8086	* leave the dirty and reference bits on if they
	8087	* were set, since we don't have a good way of knowing
	8088	* what the previous state was and we won't get here
	8089	* under any normal circumstances... we will always
	8090	* clear BUSY and wakeup any waiters via vm_page_free
	8091	* or PAGE_WAKEUP_DONE
	8092	*/
	8093	need_unwire = TRUE;
	8094
	8095	if (dw_count) {
	8096	if (dw_array[dw_index].dw_m == dst_page) {
	8097	/*
	8098	* still in the deferred work list
	8099	* which means we haven't yet called
	8100	* vm_page_wire on this page
	8101	*/
	8102	need_unwire = FALSE;
	8103
	8104	dw_index++;
	8105	dw_count--;
	8106	}
	8107	}
	8108	vm_page_lock_queues();
	8109
	8110	if (dst_page->absent \|\| free_wired_pages == TRUE) {
	8111	vm_page_free(dst_page);
	8112
	8113	need_unwire = FALSE;
	8114	} else {
	8115	if (need_unwire == TRUE)
	8116	vm_page_unwire(dst_page, TRUE);
	8117
	8118	PAGE_WAKEUP_DONE(dst_page);
	8119	}
	8120	vm_page_unlock_queues();
	8121
	8122	if (need_unwire == TRUE)
	8123	VM_STAT_INCR(reactivations);
	8124	}
	8125	#if UPL_DEBUG
	8126	upl->upl_state = 2;
	8127	#endif
	8128	if (! (upl->flags & UPL_KERNEL_OBJECT)) {
	8129	vm_object_activity_end(object);
	8130	vm_object_collapse(object, 0, TRUE);
	8131	}
	8132	vm_object_unlock(object);
	8133	upl_destroy(upl);
	8134
	8135	return ret;
	8136	}
	8137
	8138	kern_return_t
	8139	upl_transpose(
	8140	upl_t upl1,
	8141	upl_t upl2)
	8142	{
	8143	kern_return_t retval;
	8144	boolean_t upls_locked;
	8145	vm_object_t object1, object2;
	8146
	8147	if (upl1 == UPL_NULL \|\| upl2 == UPL_NULL \|\| upl1 == upl2 \|\| ((upl1->flags & UPL_VECTOR)==UPL_VECTOR) \|\| ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) {
	8148	return KERN_INVALID_ARGUMENT;
	8149	}
	8150
	8151	upls_locked = FALSE;
	8152
	8153	/*
	8154	* Since we need to lock both UPLs at the same time,
	8155	* avoid deadlocks by always taking locks in the same order.
	8156	*/
	8157	if (upl1 < upl2) {
	8158	upl_lock(upl1);
	8159	upl_lock(upl2);
	8160	} else {
	8161	upl_lock(upl2);
	8162	upl_lock(upl1);
	8163	}
	8164	upls_locked = TRUE; /* the UPLs will need to be unlocked */
	8165
	8166	object1 = upl1->map_object;
	8167	object2 = upl2->map_object;
	8168
	8169	if (upl1->offset != 0 \|\| upl2->offset != 0 \|\|
	8170	upl1->size != upl2->size) {
	8171	/*
	8172	* We deal only with full objects, not subsets.
	8173	* That's because we exchange the entire backing store info
	8174	* for the objects: pager, resident pages, etc... We can't do
	8175	* only part of it.
	8176	*/
	8177	retval = KERN_INVALID_VALUE;
	8178	goto done;
	8179	}
	8180
	8181	/*
	8182	* Tranpose the VM objects' backing store.
	8183	*/
	8184	retval = vm_object_transpose(object1, object2,
	8185	(vm_object_size_t) upl1->size);
	8186
	8187	if (retval == KERN_SUCCESS) {
	8188	/*
	8189	* Make each UPL point to the correct VM object, i.e. the
	8190	* object holding the pages that the UPL refers to...
	8191	*/
	8192	#if CONFIG_IOSCHED \|\| UPL_DEBUG
	8193	if ((upl1->flags & UPL_TRACKED_BY_OBJECT) \|\| (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
	8194	vm_object_lock(object1);
	8195	vm_object_lock(object2);
	8196	}
	8197	if (upl1->flags & UPL_TRACKED_BY_OBJECT)
	8198	queue_remove(&object1->uplq, upl1, upl_t, uplq);
	8199	if (upl2->flags & UPL_TRACKED_BY_OBJECT)
	8200	queue_remove(&object2->uplq, upl2, upl_t, uplq);
	8201	#endif
	8202	upl1->map_object = object2;
	8203	upl2->map_object = object1;
	8204
	8205	#if CONFIG_IOSCHED \|\| UPL_DEBUG
	8206	if (upl1->flags & UPL_TRACKED_BY_OBJECT)
	8207	queue_enter(&object2->uplq, upl1, upl_t, uplq);
	8208	if (upl2->flags & UPL_TRACKED_BY_OBJECT)
	8209	queue_enter(&object1->uplq, upl2, upl_t, uplq);
	8210	if ((upl1->flags & UPL_TRACKED_BY_OBJECT) \|\| (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
	8211	vm_object_unlock(object2);
	8212	vm_object_unlock(object1);
	8213	}
	8214	#endif
	8215	}
	8216
	8217	done:
	8218	/*
	8219	* Cleanup.
	8220	*/
	8221	if (upls_locked) {
	8222	upl_unlock(upl1);
	8223	upl_unlock(upl2);
	8224	upls_locked = FALSE;
	8225	}
	8226
	8227	return retval;
	8228	}
	8229
	8230	void
	8231	upl_range_needed(
	8232	upl_t upl,
	8233	int index,
	8234	int count)
	8235	{
	8236	upl_page_info_t *user_page_list;
	8237	int size_in_pages;
	8238
	8239	if ( !(upl->flags & UPL_INTERNAL) \|\| count <= 0)
	8240	return;
	8241
	8242	size_in_pages = upl->size / PAGE_SIZE;
	8243
	8244	user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
	8245
	8246	while (count-- && index < size_in_pages)
	8247	user_page_list[index++].needed = TRUE;
	8248	}
	8249
	8250
	8251	/*
	8252	* ENCRYPTED SWAP:
	8253	*
	8254	* Rationale: the user might have some encrypted data on disk (via
	8255	* FileVault or any other mechanism). That data is then decrypted in
	8256	* memory, which is safe as long as the machine is secure. But that
	8257	* decrypted data in memory could be paged out to disk by the default
	8258	* pager. The data would then be stored on disk in clear (not encrypted)
	8259	* and it could be accessed by anyone who gets physical access to the
	8260	* disk (if the laptop or the disk gets stolen for example). This weakens
	8261	* the security offered by FileVault.
	8262	*
	8263	* Solution: the default pager will optionally request that all the
	8264	* pages it gathers for pageout be encrypted, via the UPL interfaces,
	8265	* before it sends this UPL to disk via the vnode_pageout() path.
	8266	*
	8267	* Notes:
	8268	*
	8269	* To avoid disrupting the VM LRU algorithms, we want to keep the
	8270	* clean-in-place mechanisms, which allow us to send some extra pages to
	8271	* swap (clustering) without actually removing them from the user's
	8272	* address space. We don't want the user to unknowingly access encrypted
	8273	* data, so we have to actually remove the encrypted pages from the page
	8274	* table. When the user accesses the data, the hardware will fail to
	8275	* locate the virtual page in its page table and will trigger a page
	8276	* fault. We can then decrypt the page and enter it in the page table
	8277	* again. Whenever we allow the user to access the contents of a page,
	8278	* we have to make sure it's not encrypted.
	8279	*
	8280	*
	8281	*/
	8282	/*
	8283	* ENCRYPTED SWAP:
	8284	* Reserve of virtual addresses in the kernel address space.
	8285	* We need to map the physical pages in the kernel, so that we
	8286	* can call the encryption/decryption routines with a kernel
	8287	* virtual address. We keep this pool of pre-allocated kernel
	8288	* virtual addresses so that we don't have to scan the kernel's
	8289	* virtaul address space each time we need to encrypt or decrypt
	8290	* a physical page.
	8291	* It would be nice to be able to encrypt and decrypt in physical
	8292	* mode but that might not always be more efficient...
	8293	*/
	8294	decl_simple_lock_data(,vm_paging_lock)
	8295	#define VM_PAGING_NUM_PAGES 64
	8296	vm_map_offset_t vm_paging_base_address = 0;
	8297	boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
	8298	int vm_paging_max_index = 0;
	8299	int vm_paging_page_waiter = 0;
	8300	int vm_paging_page_waiter_total = 0;
	8301	unsigned long vm_paging_no_kernel_page = 0;
	8302	unsigned long vm_paging_objects_mapped = 0;
	8303	unsigned long vm_paging_pages_mapped = 0;
	8304	unsigned long vm_paging_objects_mapped_slow = 0;
	8305	unsigned long vm_paging_pages_mapped_slow = 0;
	8306
	8307	void
	8308	vm_paging_map_init(void)
	8309	{
	8310	kern_return_t kr;
	8311	vm_map_offset_t page_map_offset;
	8312	vm_map_entry_t map_entry;
	8313
	8314	assert(vm_paging_base_address == 0);
	8315
	8316	/*
	8317	* Initialize our pool of pre-allocated kernel
	8318	* virtual addresses.
	8319	*/
	8320	page_map_offset = 0;
	8321	kr = vm_map_find_space(kernel_map,
	8322	&page_map_offset,
	8323	VM_PAGING_NUM_PAGES * PAGE_SIZE,
	8324	0,
	8325	0,
	8326	&map_entry);
	8327	if (kr != KERN_SUCCESS) {
	8328	panic("vm_paging_map_init: kernel_map full\n");
	8329	}
	8330	map_entry->object.vm_object = kernel_object;
	8331	map_entry->offset = page_map_offset;
	8332	map_entry->protection = VM_PROT_NONE;
	8333	map_entry->max_protection = VM_PROT_NONE;
	8334	map_entry->permanent = TRUE;
	8335	vm_object_reference(kernel_object);
	8336	vm_map_unlock(kernel_map);
	8337
	8338	assert(vm_paging_base_address == 0);
	8339	vm_paging_base_address = page_map_offset;
	8340	}
	8341
	8342	/*
	8343	* ENCRYPTED SWAP:
	8344	* vm_paging_map_object:
	8345	* Maps part of a VM object's pages in the kernel
	8346	* virtual address space, using the pre-allocated
	8347	* kernel virtual addresses, if possible.
	8348	* Context:
	8349	* The VM object is locked. This lock will get
	8350	* dropped and re-acquired though, so the caller
	8351	* must make sure the VM object is kept alive
	8352	* (by holding a VM map that has a reference
	8353	* on it, for example, or taking an extra reference).
	8354	* The page should also be kept busy to prevent
	8355	* it from being reclaimed.
	8356	*/
	8357	kern_return_t
	8358	vm_paging_map_object(
	8359	vm_page_t page,
	8360	vm_object_t object,
	8361	vm_object_offset_t offset,
	8362	vm_prot_t protection,
	8363	boolean_t can_unlock_object,
	8364	vm_map_size_t size, / IN/OUT */
	8365	vm_map_offset_t address, / OUT */
	8366	boolean_t need_unmap) / OUT */
	8367	{
	8368	kern_return_t kr;
	8369	vm_map_offset_t page_map_offset;
	8370	vm_map_size_t map_size;
	8371	vm_object_offset_t object_offset;
	8372	int i;
	8373
	8374	if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
	8375	/* use permanent 1-to-1 kernel mapping of physical memory ? */
	8376	#if __x86_64__
	8377	*address = (vm_map_offset_t)
	8378	PHYSMAP_PTOV((pmap_paddr_t)page->phys_page <<
	8379	PAGE_SHIFT);
	8380	*need_unmap = FALSE;
	8381	return KERN_SUCCESS;
	8382	#else
	8383	#warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..."
	8384	#endif
	8385
	8386	assert(page->busy);
	8387	/*
	8388	* Use one of the pre-allocated kernel virtual addresses
	8389	* and just enter the VM page in the kernel address space
	8390	* at that virtual address.
	8391	*/
	8392	simple_lock(&vm_paging_lock);
	8393
	8394	/*
	8395	* Try and find an available kernel virtual address
	8396	* from our pre-allocated pool.
	8397	*/
	8398	page_map_offset = 0;
	8399	for (;;) {
	8400	for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
	8401	if (vm_paging_page_inuse[i] == FALSE) {
	8402	page_map_offset =
	8403	vm_paging_base_address +
	8404	(i * PAGE_SIZE);
	8405	break;
	8406	}
	8407	}
	8408	if (page_map_offset != 0) {
	8409	/* found a space to map our page ! */
	8410	break;
	8411	}
	8412
	8413	if (can_unlock_object) {
	8414	/*
	8415	* If we can afford to unlock the VM object,
	8416	* let's take the slow path now...
	8417	*/
	8418	break;
	8419	}
	8420	/*
	8421	* We can't afford to unlock the VM object, so
	8422	* let's wait for a space to become available...
	8423	*/
	8424	vm_paging_page_waiter_total++;
	8425	vm_paging_page_waiter++;
	8426	kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT);
	8427	if (kr == THREAD_WAITING) {
	8428	simple_unlock(&vm_paging_lock);
	8429	kr = thread_block(THREAD_CONTINUE_NULL);
	8430	simple_lock(&vm_paging_lock);
	8431	}
	8432	vm_paging_page_waiter--;
	8433	/* ... and try again */
	8434	}
	8435
	8436	if (page_map_offset != 0) {
	8437	/*
	8438	* We found a kernel virtual address;
	8439	* map the physical page to that virtual address.
	8440	*/
	8441	if (i > vm_paging_max_index) {
	8442	vm_paging_max_index = i;
	8443	}
	8444	vm_paging_page_inuse[i] = TRUE;
	8445	simple_unlock(&vm_paging_lock);
	8446
	8447	page->pmapped = TRUE;
	8448
	8449	/*
	8450	* Keep the VM object locked over the PMAP_ENTER
	8451	* and the actual use of the page by the kernel,
	8452	* or this pmap mapping might get undone by a
	8453	* vm_object_pmap_protect() call...
	8454	*/
	8455	PMAP_ENTER(kernel_pmap,
	8456	page_map_offset,
	8457	page,
	8458	protection,
	8459	VM_PROT_NONE,
	8460	0,
	8461	TRUE);
	8462	vm_paging_objects_mapped++;
	8463	vm_paging_pages_mapped++;
	8464	*address = page_map_offset;
	8465	*need_unmap = TRUE;
	8466
	8467	/* all done and mapped, ready to use ! */
	8468	return KERN_SUCCESS;
	8469	}
	8470
	8471	/*
	8472	* We ran out of pre-allocated kernel virtual
	8473	* addresses. Just map the page in the kernel
	8474	* the slow and regular way.
	8475	*/
	8476	vm_paging_no_kernel_page++;
	8477	simple_unlock(&vm_paging_lock);
	8478	}
	8479
	8480	if (! can_unlock_object) {
	8481	*address = 0;
	8482	*size = 0;
	8483	*need_unmap = FALSE;
	8484	return KERN_NOT_SUPPORTED;
	8485	}
	8486
	8487	object_offset = vm_object_trunc_page(offset);
	8488	map_size = vm_map_round_page(*size,
	8489	VM_MAP_PAGE_MASK(kernel_map));
	8490
	8491	/*
	8492	* Try and map the required range of the object
	8493	* in the kernel_map
	8494	*/
	8495
	8496	vm_object_reference_locked(object); /* for the map entry */
	8497	vm_object_unlock(object);
	8498
	8499	kr = vm_map_enter(kernel_map,
	8500	address,
	8501	map_size,
	8502	0,
	8503	VM_FLAGS_ANYWHERE,
	8504	object,
	8505	object_offset,
	8506	FALSE,
	8507	protection,
	8508	VM_PROT_ALL,
	8509	VM_INHERIT_NONE);
	8510	if (kr != KERN_SUCCESS) {
	8511	*address = 0;
	8512	*size = 0;
	8513	*need_unmap = FALSE;
	8514	vm_object_deallocate(object); /* for the map entry */
	8515	vm_object_lock(object);
	8516	return kr;
	8517	}
	8518
	8519	*size = map_size;
	8520
	8521	/*
	8522	* Enter the mapped pages in the page table now.
	8523	*/
	8524	vm_object_lock(object);
	8525	/*
	8526	* VM object must be kept locked from before PMAP_ENTER()
	8527	* until after the kernel is done accessing the page(s).
	8528	* Otherwise, the pmap mappings in the kernel could be
	8529	* undone by a call to vm_object_pmap_protect().
	8530	*/
	8531
	8532	for (page_map_offset = 0;
	8533	map_size != 0;
	8534	map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
	8535
	8536	page = vm_page_lookup(object, offset + page_map_offset);
	8537	if (page == VM_PAGE_NULL) {
	8538	printf("vm_paging_map_object: no page !?");
	8539	vm_object_unlock(object);
	8540	kr = vm_map_remove(kernel_map, address, size,
	8541	VM_MAP_NO_FLAGS);
	8542	assert(kr == KERN_SUCCESS);
	8543	*address = 0;
	8544	*size = 0;
	8545	*need_unmap = FALSE;
	8546	vm_object_lock(object);
	8547	return KERN_MEMORY_ERROR;
	8548	}
	8549	page->pmapped = TRUE;
	8550
	8551	//assert(pmap_verify_free(page->phys_page));
	8552	PMAP_ENTER(kernel_pmap,
	8553	*address + page_map_offset,
	8554	page,
	8555	protection,
	8556	VM_PROT_NONE,
	8557	0,
	8558	TRUE);
	8559	}
	8560
	8561	vm_paging_objects_mapped_slow++;
	8562	vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
	8563
	8564	*need_unmap = TRUE;
	8565
	8566	return KERN_SUCCESS;
	8567	}
	8568
	8569	/*
	8570	* ENCRYPTED SWAP:
	8571	* vm_paging_unmap_object:
	8572	* Unmaps part of a VM object's pages from the kernel
	8573	* virtual address space.
	8574	* Context:
	8575	* The VM object is locked. This lock will get
	8576	* dropped and re-acquired though.
	8577	*/
	8578	void
	8579	vm_paging_unmap_object(
	8580	vm_object_t object,
	8581	vm_map_offset_t start,
	8582	vm_map_offset_t end)
	8583	{
	8584	kern_return_t kr;
	8585	int i;
	8586
	8587	if ((vm_paging_base_address == 0) \|\|
	8588	(start < vm_paging_base_address) \|\|
	8589	(end > (vm_paging_base_address
	8590	+ (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
	8591	/*
	8592	* We didn't use our pre-allocated pool of
	8593	* kernel virtual address. Deallocate the
	8594	* virtual memory.
	8595	*/
	8596	if (object != VM_OBJECT_NULL) {
	8597	vm_object_unlock(object);
	8598	}
	8599	kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
	8600	if (object != VM_OBJECT_NULL) {
	8601	vm_object_lock(object);
	8602	}
	8603	assert(kr == KERN_SUCCESS);
	8604	} else {
	8605	/*
	8606	* We used a kernel virtual address from our
	8607	* pre-allocated pool. Put it back in the pool
	8608	* for next time.
	8609	*/
	8610	assert(end - start == PAGE_SIZE);
	8611	i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
	8612	assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
	8613
	8614	/* undo the pmap mapping */
	8615	pmap_remove(kernel_pmap, start, end);
	8616
	8617	simple_lock(&vm_paging_lock);
	8618	vm_paging_page_inuse[i] = FALSE;
	8619	if (vm_paging_page_waiter) {
	8620	thread_wakeup(&vm_paging_page_waiter);
	8621	}
	8622	simple_unlock(&vm_paging_lock);
	8623	}
	8624	}
	8625
	8626	#if ENCRYPTED_SWAP
	8627	/*
	8628	* Encryption data.
	8629	* "iv" is the "initial vector". Ideally, we want to
	8630	* have a different one for each page we encrypt, so that
	8631	* crackers can't find encryption patterns too easily.
	8632	*/
	8633	#define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
	8634	boolean_t swap_crypt_ctx_initialized = FALSE;
	8635	uint32_t swap_crypt_key[8]; /* big enough for a 256 key */
	8636	aes_ctx swap_crypt_ctx;
	8637	const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
	8638
	8639	#if DEBUG
	8640	boolean_t swap_crypt_ctx_tested = FALSE;
	8641	unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
	8642	unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
	8643	unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
	8644	#endif /* DEBUG */
	8645
	8646	/*
	8647	* Initialize the encryption context: key and key size.
	8648	*/
	8649	void swap_crypt_ctx_initialize(void); /* forward */
	8650	void
	8651	swap_crypt_ctx_initialize(void)
	8652	{
	8653	unsigned int i;
	8654
	8655	/*
	8656	* No need for locking to protect swap_crypt_ctx_initialized
	8657	* because the first use of encryption will come from the
	8658	* pageout thread (we won't pagein before there's been a pageout)
	8659	* and there's only one pageout thread.
	8660	*/
	8661	if (swap_crypt_ctx_initialized == FALSE) {
	8662	for (i = 0;
	8663	i < (sizeof (swap_crypt_key) /
	8664	sizeof (swap_crypt_key[0]));
	8665	i++) {
	8666	swap_crypt_key[i] = random();
	8667	}
	8668	aes_encrypt_key((const unsigned char *) swap_crypt_key,
	8669	SWAP_CRYPT_AES_KEY_SIZE,
	8670	&swap_crypt_ctx.encrypt);
	8671	aes_decrypt_key((const unsigned char *) swap_crypt_key,
	8672	SWAP_CRYPT_AES_KEY_SIZE,
	8673	&swap_crypt_ctx.decrypt);
	8674	swap_crypt_ctx_initialized = TRUE;
	8675	}
	8676
	8677	#if DEBUG
	8678	/*
	8679	* Validate the encryption algorithms.
	8680	*/
	8681	if (swap_crypt_ctx_tested == FALSE) {
	8682	/* initialize */
	8683	for (i = 0; i < 4096; i++) {
	8684	swap_crypt_test_page_ref[i] = (char) i;
	8685	}
	8686	/* encrypt */
	8687	aes_encrypt_cbc(swap_crypt_test_page_ref,
	8688	swap_crypt_null_iv,
	8689	PAGE_SIZE / AES_BLOCK_SIZE,
	8690	swap_crypt_test_page_encrypt,
	8691	&swap_crypt_ctx.encrypt);
	8692	/* decrypt */
	8693	aes_decrypt_cbc(swap_crypt_test_page_encrypt,
	8694	swap_crypt_null_iv,
	8695	PAGE_SIZE / AES_BLOCK_SIZE,
	8696	swap_crypt_test_page_decrypt,
	8697	&swap_crypt_ctx.decrypt);
	8698	/* compare result with original */
	8699	for (i = 0; i < 4096; i ++) {
	8700	if (swap_crypt_test_page_decrypt[i] !=
	8701	swap_crypt_test_page_ref[i]) {
	8702	panic("encryption test failed");
	8703	}
	8704	}
	8705
	8706	/* encrypt again */
	8707	aes_encrypt_cbc(swap_crypt_test_page_decrypt,
	8708	swap_crypt_null_iv,
	8709	PAGE_SIZE / AES_BLOCK_SIZE,
	8710	swap_crypt_test_page_decrypt,
	8711	&swap_crypt_ctx.encrypt);
	8712	/* decrypt in place */
	8713	aes_decrypt_cbc(swap_crypt_test_page_decrypt,
	8714	swap_crypt_null_iv,
	8715	PAGE_SIZE / AES_BLOCK_SIZE,
	8716	swap_crypt_test_page_decrypt,
	8717	&swap_crypt_ctx.decrypt);
	8718	for (i = 0; i < 4096; i ++) {
	8719	if (swap_crypt_test_page_decrypt[i] !=
	8720	swap_crypt_test_page_ref[i]) {
	8721	panic("in place encryption test failed");
	8722	}
	8723	}
	8724
	8725	swap_crypt_ctx_tested = TRUE;
	8726	}
	8727	#endif /* DEBUG */
	8728	}
	8729
	8730	/*
	8731	* ENCRYPTED SWAP:
	8732	* vm_page_encrypt:
	8733	* Encrypt the given page, for secure paging.
	8734	* The page might already be mapped at kernel virtual
	8735	* address "kernel_mapping_offset". Otherwise, we need
	8736	* to map it.
	8737	*
	8738	* Context:
	8739	* The page's object is locked, but this lock will be released
	8740	* and re-acquired.
	8741	* The page is busy and not accessible by users (not entered in any pmap).
	8742	*/
	8743	void
	8744	vm_page_encrypt(
	8745	vm_page_t page,
	8746	vm_map_offset_t kernel_mapping_offset)
	8747	{
	8748	kern_return_t kr;
	8749	vm_map_size_t kernel_mapping_size;
	8750	boolean_t kernel_mapping_needs_unmap;
	8751	vm_offset_t kernel_vaddr;
	8752	union {
	8753	unsigned char aes_iv[AES_BLOCK_SIZE];
	8754	struct {
	8755	memory_object_t pager_object;
	8756	vm_object_offset_t paging_offset;
	8757	} vm;
	8758	} encrypt_iv;
	8759
	8760	if (! vm_pages_encrypted) {
	8761	vm_pages_encrypted = TRUE;
	8762	}
	8763
	8764	assert(page->busy);
	8765
	8766	if (page->encrypted) {
	8767	/*
	8768	* Already encrypted: no need to do it again.
	8769	*/
	8770	vm_page_encrypt_already_encrypted_counter++;
	8771	return;
	8772	}
	8773	assert(page->dirty \|\| page->precious);
	8774
	8775	ASSERT_PAGE_DECRYPTED(page);
	8776
	8777	/*
	8778	* Take a paging-in-progress reference to keep the object
	8779	* alive even if we have to unlock it (in vm_paging_map_object()
	8780	* for example)...
	8781	*/
	8782	vm_object_paging_begin(page->object);
	8783
	8784	if (kernel_mapping_offset == 0) {
	8785	/*
	8786	* The page hasn't already been mapped in kernel space
	8787	* by the caller. Map it now, so that we can access
	8788	* its contents and encrypt them.
	8789	*/
	8790	kernel_mapping_size = PAGE_SIZE;
	8791	kernel_mapping_needs_unmap = FALSE;
	8792	kr = vm_paging_map_object(page,
	8793	page->object,
	8794	page->offset,
	8795	VM_PROT_READ \| VM_PROT_WRITE,
	8796	FALSE,
	8797	&kernel_mapping_size,
	8798	&kernel_mapping_offset,
	8799	&kernel_mapping_needs_unmap);
	8800	if (kr != KERN_SUCCESS) {
	8801	panic("vm_page_encrypt: "
	8802	"could not map page in kernel: 0x%x\n",
	8803	kr);
	8804	}
	8805	} else {
	8806	kernel_mapping_size = 0;
	8807	kernel_mapping_needs_unmap = FALSE;
	8808	}
	8809	kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
	8810
	8811	if (swap_crypt_ctx_initialized == FALSE) {
	8812	swap_crypt_ctx_initialize();
	8813	}
	8814	assert(swap_crypt_ctx_initialized);
	8815
	8816	/*
	8817	* Prepare an "initial vector" for the encryption.
	8818	* We use the "pager" and the "paging_offset" for that
	8819	* page to obfuscate the encrypted data a bit more and
	8820	* prevent crackers from finding patterns that they could
	8821	* use to break the key.
	8822	*/
	8823	bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
	8824	encrypt_iv.vm.pager_object = page->object->pager;
	8825	encrypt_iv.vm.paging_offset =
	8826	page->object->paging_offset + page->offset;
	8827
	8828	/* encrypt the "initial vector" */
	8829	aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
	8830	swap_crypt_null_iv,
	8831	1,
	8832	&encrypt_iv.aes_iv[0],
	8833	&swap_crypt_ctx.encrypt);
	8834
	8835	/*
	8836	* Encrypt the page.
	8837	*/
	8838	aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
	8839	&encrypt_iv.aes_iv[0],
	8840	PAGE_SIZE / AES_BLOCK_SIZE,
	8841	(unsigned char *) kernel_vaddr,
	8842	&swap_crypt_ctx.encrypt);
	8843
	8844	vm_page_encrypt_counter++;
	8845
	8846	/*
	8847	* Unmap the page from the kernel's address space,
	8848	* if we had to map it ourselves. Otherwise, let
	8849	* the caller undo the mapping if needed.
	8850	*/
	8851	if (kernel_mapping_needs_unmap) {
	8852	vm_paging_unmap_object(page->object,
	8853	kernel_mapping_offset,
	8854	kernel_mapping_offset + kernel_mapping_size);
	8855	}
	8856
	8857	/*
	8858	* Clear the "reference" and "modified" bits.
	8859	* This should clean up any impact the encryption had
	8860	* on them.
	8861	* The page was kept busy and disconnected from all pmaps,
	8862	* so it can't have been referenced or modified from user
	8863	* space.
	8864	* The software bits will be reset later after the I/O
	8865	* has completed (in upl_commit_range()).
	8866	*/
	8867	pmap_clear_refmod(page->phys_page, VM_MEM_REFERENCED \| VM_MEM_MODIFIED);
	8868
	8869	page->encrypted = TRUE;
	8870
	8871	vm_object_paging_end(page->object);
	8872	}
	8873
	8874	/*
	8875	* ENCRYPTED SWAP:
	8876	* vm_page_decrypt:
	8877	* Decrypt the given page.
	8878	* The page might already be mapped at kernel virtual
	8879	* address "kernel_mapping_offset". Otherwise, we need
	8880	* to map it.
	8881	*
	8882	* Context:
	8883	* The page's VM object is locked but will be unlocked and relocked.
	8884	* The page is busy and not accessible by users (not entered in any pmap).
	8885	*/
	8886	void
	8887	vm_page_decrypt(
	8888	vm_page_t page,
	8889	vm_map_offset_t kernel_mapping_offset)
	8890	{
	8891	kern_return_t kr;
	8892	vm_map_size_t kernel_mapping_size;
	8893	vm_offset_t kernel_vaddr;
	8894	boolean_t kernel_mapping_needs_unmap;
	8895	union {
	8896	unsigned char aes_iv[AES_BLOCK_SIZE];
	8897	struct {
	8898	memory_object_t pager_object;
	8899	vm_object_offset_t paging_offset;
	8900	} vm;
	8901	} decrypt_iv;
	8902	boolean_t was_dirty;
	8903
	8904	assert(page->busy);
	8905	assert(page->encrypted);
	8906
	8907	was_dirty = page->dirty;
	8908
	8909	/*
	8910	* Take a paging-in-progress reference to keep the object
	8911	* alive even if we have to unlock it (in vm_paging_map_object()
	8912	* for example)...
	8913	*/
	8914	vm_object_paging_begin(page->object);
	8915
	8916	if (kernel_mapping_offset == 0) {
	8917	/*
	8918	* The page hasn't already been mapped in kernel space
	8919	* by the caller. Map it now, so that we can access
	8920	* its contents and decrypt them.
	8921	*/
	8922	kernel_mapping_size = PAGE_SIZE;
	8923	kernel_mapping_needs_unmap = FALSE;
	8924	kr = vm_paging_map_object(page,
	8925	page->object,
	8926	page->offset,
	8927	VM_PROT_READ \| VM_PROT_WRITE,
	8928	FALSE,
	8929	&kernel_mapping_size,
	8930	&kernel_mapping_offset,
	8931	&kernel_mapping_needs_unmap);
	8932	if (kr != KERN_SUCCESS) {
	8933	panic("vm_page_decrypt: "
	8934	"could not map page in kernel: 0x%x\n",
	8935	kr);
	8936	}
	8937	} else {
	8938	kernel_mapping_size = 0;
	8939	kernel_mapping_needs_unmap = FALSE;
	8940	}
	8941	kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
	8942
	8943	assert(swap_crypt_ctx_initialized);
	8944
	8945	/*
	8946	* Prepare an "initial vector" for the decryption.
	8947	* It has to be the same as the "initial vector" we
	8948	* used to encrypt that page.
	8949	*/
	8950	bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
	8951	decrypt_iv.vm.pager_object = page->object->pager;
	8952	decrypt_iv.vm.paging_offset =
	8953	page->object->paging_offset + page->offset;
	8954
	8955	/* encrypt the "initial vector" */
	8956	aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
	8957	swap_crypt_null_iv,
	8958	1,
	8959	&decrypt_iv.aes_iv[0],
	8960	&swap_crypt_ctx.encrypt);
	8961
	8962	/*
	8963	* Decrypt the page.
	8964	*/
	8965	aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
	8966	&decrypt_iv.aes_iv[0],
	8967	PAGE_SIZE / AES_BLOCK_SIZE,
	8968	(unsigned char *) kernel_vaddr,
	8969	&swap_crypt_ctx.decrypt);
	8970	vm_page_decrypt_counter++;
	8971
	8972	/*
	8973	* Unmap the page from the kernel's address space,
	8974	* if we had to map it ourselves. Otherwise, let
	8975	* the caller undo the mapping if needed.
	8976	*/
	8977	if (kernel_mapping_needs_unmap) {
	8978	vm_paging_unmap_object(page->object,
	8979	kernel_vaddr,
	8980	kernel_vaddr + PAGE_SIZE);
	8981	}
	8982
	8983	if (was_dirty) {
	8984	/*
	8985	* The pager did not specify that the page would be
	8986	* clean when it got paged in, so let's not clean it here
	8987	* either.
	8988	*/
	8989	} else {
	8990	/*
	8991	* After decryption, the page is actually still clean.
	8992	* It was encrypted as part of paging, which "cleans"
	8993	* the "dirty" pages.
	8994	* Noone could access it after it was encrypted
	8995	* and the decryption doesn't count.
	8996	*/
	8997	page->dirty = FALSE;
	8998	assert (page->cs_validated == FALSE);
	8999	pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED \| VM_MEM_REFERENCED);
	9000	}
	9001	page->encrypted = FALSE;
	9002
	9003	/*
	9004	* We've just modified the page's contents via the data cache and part
	9005	* of the new contents might still be in the cache and not yet in RAM.
	9006	* Since the page is now available and might get gathered in a UPL to
	9007	* be part of a DMA transfer from a driver that expects the memory to
	9008	* be coherent at this point, we have to flush the data cache.
	9009	*/
	9010	pmap_sync_page_attributes_phys(page->phys_page);
	9011	/*
	9012	* Since the page is not mapped yet, some code might assume that it
	9013	* doesn't need to invalidate the instruction cache when writing to
	9014	* that page. That code relies on "pmapped" being FALSE, so that the
	9015	* caches get synchronized when the page is first mapped.
	9016	*/
	9017	assert(pmap_verify_free(page->phys_page));
	9018	page->pmapped = FALSE;
	9019	page->wpmapped = FALSE;
	9020
	9021	vm_object_paging_end(page->object);
	9022	}
	9023
	9024	#if DEVELOPMENT \|\| DEBUG
	9025	unsigned long upl_encrypt_upls = 0;
	9026	unsigned long upl_encrypt_pages = 0;
	9027	#endif
	9028
	9029	/*
	9030	* ENCRYPTED SWAP:
	9031	*
	9032	* upl_encrypt:
	9033	* Encrypts all the pages in the UPL, within the specified range.
	9034	*
	9035	*/
	9036	void
	9037	upl_encrypt(
	9038	upl_t upl,
	9039	upl_offset_t crypt_offset,
	9040	upl_size_t crypt_size)
	9041	{
	9042	upl_size_t upl_size, subupl_size=crypt_size;
	9043	upl_offset_t offset_in_upl, subupl_offset=crypt_offset;
	9044	vm_object_t upl_object;
	9045	vm_object_offset_t upl_offset;
	9046	vm_page_t page;
	9047	vm_object_t shadow_object;
	9048	vm_object_offset_t shadow_offset;
	9049	vm_object_offset_t paging_offset;
	9050	vm_object_offset_t base_offset;
	9051	int isVectorUPL = 0;
	9052	upl_t vector_upl = NULL;
	9053
	9054	if((isVectorUPL = vector_upl_is_valid(upl)))
	9055	vector_upl = upl;
	9056
	9057	process_upl_to_encrypt:
	9058	if(isVectorUPL) {
	9059	crypt_size = subupl_size;
	9060	crypt_offset = subupl_offset;
	9061	upl = vector_upl_subupl_byoffset(vector_upl, &crypt_offset, &crypt_size);
	9062	if(upl == NULL)
	9063	panic("upl_encrypt: Accessing a sub-upl that doesn't exist\n");
	9064	subupl_size -= crypt_size;
	9065	subupl_offset += crypt_size;
	9066	}
	9067
	9068	#if DEVELOPMENT \|\| DEBUG
	9069	upl_encrypt_upls++;
	9070	upl_encrypt_pages += crypt_size / PAGE_SIZE;
	9071	#endif
	9072	upl_object = upl->map_object;
	9073	upl_offset = upl->offset;
	9074	upl_size = upl->size;
	9075
	9076	vm_object_lock(upl_object);
	9077
	9078	/*
	9079	* Find the VM object that contains the actual pages.
	9080	*/
	9081	if (upl_object->pageout) {
	9082	shadow_object = upl_object->shadow;
	9083	/*
	9084	* The offset in the shadow object is actually also
	9085	* accounted for in upl->offset. It possibly shouldn't be
	9086	* this way, but for now don't account for it twice.
	9087	*/
	9088	shadow_offset = 0;
	9089	assert(upl_object->paging_offset == 0); /* XXX ? */
	9090	vm_object_lock(shadow_object);
	9091	} else {
	9092	shadow_object = upl_object;
	9093	shadow_offset = 0;
	9094	}
	9095
	9096	paging_offset = shadow_object->paging_offset;
	9097	vm_object_paging_begin(shadow_object);
	9098
	9099	if (shadow_object != upl_object)
	9100	vm_object_unlock(upl_object);
	9101
	9102
	9103	base_offset = shadow_offset;
	9104	base_offset += upl_offset;
	9105	base_offset += crypt_offset;
	9106	base_offset -= paging_offset;
	9107
	9108	assert(crypt_offset + crypt_size <= upl_size);
	9109
	9110	for (offset_in_upl = 0;
	9111	offset_in_upl < crypt_size;
	9112	offset_in_upl += PAGE_SIZE) {
	9113	page = vm_page_lookup(shadow_object,
	9114	base_offset + offset_in_upl);
	9115	if (page == VM_PAGE_NULL) {
	9116	panic("upl_encrypt: "
	9117	"no page for (obj=%p,off=0x%llx+0x%x)!\n",
	9118	shadow_object,
	9119	base_offset,
	9120	offset_in_upl);
	9121	}
	9122	/*
	9123	* Disconnect the page from all pmaps, so that nobody can
	9124	* access it while it's encrypted. After that point, all
	9125	* accesses to this page will cause a page fault and block
	9126	* while the page is busy being encrypted. After the
	9127	* encryption completes, any access will cause a
	9128	* page fault and the page gets decrypted at that time.
	9129	*/
	9130	pmap_disconnect(page->phys_page);
	9131	vm_page_encrypt(page, 0);
	9132
	9133	if (vm_object_lock_avoid(shadow_object)) {
	9134	/*
	9135	* Give vm_pageout_scan() a chance to convert more
	9136	* pages from "clean-in-place" to "clean-and-free",
	9137	* if it's interested in the same pages we selected
	9138	* in this cluster.
	9139	*/
	9140	vm_object_unlock(shadow_object);
	9141	mutex_pause(2);
	9142	vm_object_lock(shadow_object);
	9143	}
	9144	}
	9145
	9146	vm_object_paging_end(shadow_object);
	9147	vm_object_unlock(shadow_object);
	9148
	9149	if(isVectorUPL && subupl_size)
	9150	goto process_upl_to_encrypt;
	9151	}
	9152
	9153	#else /* ENCRYPTED_SWAP */
	9154	void
	9155	upl_encrypt(
	9156	__unused upl_t upl,
	9157	__unused upl_offset_t crypt_offset,
	9158	__unused upl_size_t crypt_size)
	9159	{
	9160	}
	9161
	9162	void
	9163	vm_page_encrypt(
	9164	__unused vm_page_t page,
	9165	__unused vm_map_offset_t kernel_mapping_offset)
	9166	{
	9167	}
	9168
	9169	void
	9170	vm_page_decrypt(
	9171	__unused vm_page_t page,
	9172	__unused vm_map_offset_t kernel_mapping_offset)
	9173	{
	9174	}
	9175
	9176	#endif /* ENCRYPTED_SWAP */
	9177
	9178	/*
	9179	* page->object must be locked
	9180	*/
	9181	void
	9182	vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked)
	9183	{
	9184	if (!queues_locked) {
	9185	vm_page_lockspin_queues();
	9186	}
	9187
	9188	/*
	9189	* need to drop the laundry count...
	9190	* we may also need to remove it
	9191	* from the I/O paging queue...
	9192	* vm_pageout_throttle_up handles both cases
	9193	*
	9194	* the laundry and pageout_queue flags are cleared...
	9195	*/
	9196	vm_pageout_throttle_up(page);
	9197
	9198	vm_page_steal_pageout_page++;
	9199
	9200	if (!queues_locked) {
	9201	vm_page_unlock_queues();
	9202	}
	9203	}
	9204
	9205	upl_t
	9206	vector_upl_create(vm_offset_t upl_offset)
	9207	{
	9208	int vector_upl_size = sizeof(struct _vector_upl);
	9209	int i=0;
	9210	upl_t upl;
	9211	vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size);
	9212
	9213	upl = upl_create(0,UPL_VECTOR,0);
	9214	upl->vector_upl = vector_upl;
	9215	upl->offset = upl_offset;
	9216	vector_upl->size = 0;
	9217	vector_upl->offset = upl_offset;
	9218	vector_upl->invalid_upls=0;
	9219	vector_upl->num_upls=0;
	9220	vector_upl->pagelist = NULL;
	9221
	9222	for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) {
	9223	vector_upl->upl_iostates[i].size = 0;
	9224	vector_upl->upl_iostates[i].offset = 0;
	9225
	9226	}
	9227	return upl;
	9228	}
	9229
	9230	void
	9231	vector_upl_deallocate(upl_t upl)
	9232	{
	9233	if(upl) {
	9234	vector_upl_t vector_upl = upl->vector_upl;
	9235	if(vector_upl) {
	9236	if(vector_upl->invalid_upls != vector_upl->num_upls)
	9237	panic("Deallocating non-empty Vectored UPL\n");
	9238	kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)));
	9239	vector_upl->invalid_upls=0;
	9240	vector_upl->num_upls = 0;
	9241	vector_upl->pagelist = NULL;
	9242	vector_upl->size = 0;
	9243	vector_upl->offset = 0;
	9244	kfree(vector_upl, sizeof(struct _vector_upl));
	9245	vector_upl = (vector_upl_t)0xfeedfeed;
	9246	}
	9247	else
	9248	panic("vector_upl_deallocate was passed a non-vectored upl\n");
	9249	}
	9250	else
	9251	panic("vector_upl_deallocate was passed a NULL upl\n");
	9252	}
	9253
	9254	boolean_t
	9255	vector_upl_is_valid(upl_t upl)
	9256	{
	9257	if(upl && ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) {
	9258	vector_upl_t vector_upl = upl->vector_upl;
	9259	if(vector_upl == NULL \|\| vector_upl == (vector_upl_t)0xfeedfeed \|\| vector_upl == (vector_upl_t)0xfeedbeef)
	9260	return FALSE;
	9261	else
	9262	return TRUE;
	9263	}
	9264	return FALSE;
	9265	}
	9266
	9267	boolean_t
	9268	vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size)
	9269	{
	9270	if(vector_upl_is_valid(upl)) {
	9271	vector_upl_t vector_upl = upl->vector_upl;
	9272
	9273	if(vector_upl) {
	9274	if(subupl) {
	9275	if(io_size) {
	9276	if(io_size < PAGE_SIZE)
	9277	io_size = PAGE_SIZE;
	9278	subupl->vector_upl = (void*)vector_upl;
	9279	vector_upl->upl_elems[vector_upl->num_upls++] = subupl;
	9280	vector_upl->size += io_size;
	9281	upl->size += io_size;
	9282	}
	9283	else {
	9284	uint32_t i=0,invalid_upls=0;
	9285	for(i = 0; i < vector_upl->num_upls; i++) {
	9286	if(vector_upl->upl_elems[i] == subupl)
	9287	break;
	9288	}
	9289	if(i == vector_upl->num_upls)
	9290	panic("Trying to remove sub-upl when none exists");
	9291
	9292	vector_upl->upl_elems[i] = NULL;
	9293	invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1);
	9294	if(invalid_upls == vector_upl->num_upls)
	9295	return TRUE;
	9296	else
	9297	return FALSE;
	9298	}
	9299	}
	9300	else
	9301	panic("vector_upl_set_subupl was passed a NULL upl element\n");
	9302	}
	9303	else
	9304	panic("vector_upl_set_subupl was passed a non-vectored upl\n");
	9305	}
	9306	else
	9307	panic("vector_upl_set_subupl was passed a NULL upl\n");
	9308
	9309	return FALSE;
	9310	}
	9311
	9312	void
	9313	vector_upl_set_pagelist(upl_t upl)
	9314	{
	9315	if(vector_upl_is_valid(upl)) {
	9316	uint32_t i=0;
	9317	vector_upl_t vector_upl = upl->vector_upl;
	9318
	9319	if(vector_upl) {
	9320	vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0;
	9321
	9322	vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE));
	9323
	9324	for(i=0; i < vector_upl->num_upls; i++) {
	9325	cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE;
	9326	bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
	9327	pagelist_size += cur_upl_pagelist_size;
	9328	if(vector_upl->upl_elems[i]->highest_page > upl->highest_page)
	9329	upl->highest_page = vector_upl->upl_elems[i]->highest_page;
	9330	}
	9331	assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) );
	9332	}
	9333	else
	9334	panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
	9335	}
	9336	else
	9337	panic("vector_upl_set_pagelist was passed a NULL upl\n");
	9338
	9339	}
	9340
	9341	upl_t
	9342	vector_upl_subupl_byindex(upl_t upl, uint32_t index)
	9343	{
	9344	if(vector_upl_is_valid(upl)) {
	9345	vector_upl_t vector_upl = upl->vector_upl;
	9346	if(vector_upl) {
	9347	if(index < vector_upl->num_upls)
	9348	return vector_upl->upl_elems[index];
	9349	}
	9350	else
	9351	panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
	9352	}
	9353	return NULL;
	9354	}
	9355
	9356	upl_t
	9357	vector_upl_subupl_byoffset(upl_t upl, upl_offset_t upl_offset, upl_size_t upl_size)
	9358	{
	9359	if(vector_upl_is_valid(upl)) {
	9360	uint32_t i=0;
	9361	vector_upl_t vector_upl = upl->vector_upl;
	9362
	9363	if(vector_upl) {
	9364	upl_t subupl = NULL;
	9365	vector_upl_iostates_t subupl_state;
	9366
	9367	for(i=0; i < vector_upl->num_upls; i++) {
	9368	subupl = vector_upl->upl_elems[i];
	9369	subupl_state = vector_upl->upl_iostates[i];
	9370	if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) {
	9371	/* We could have been passed an offset/size pair that belongs
	9372	* to an UPL element that has already been committed/aborted.
	9373	* If so, return NULL.
	9374	*/
	9375	if(subupl == NULL)
	9376	return NULL;
	9377	if((subupl_state.offset + subupl_state.size) < (upl_offset + upl_size)) {
	9378	upl_size = (subupl_state.offset + subupl_state.size) - upl_offset;
	9379	if(*upl_size > subupl_state.size)
	9380	*upl_size = subupl_state.size;
	9381	}
	9382	if(*upl_offset >= subupl_state.offset)
	9383	*upl_offset -= subupl_state.offset;
	9384	else if(i)
	9385	panic("Vector UPL offset miscalculation\n");
	9386	return subupl;
	9387	}
	9388	}
	9389	}
	9390	else
	9391	panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
	9392	}
	9393	return NULL;
	9394	}
	9395
	9396	void
	9397	vector_upl_get_submap(upl_t upl, vm_map_t v_upl_submap, vm_offset_t submap_dst_addr)
	9398	{
	9399	*v_upl_submap = NULL;
	9400
	9401	if(vector_upl_is_valid(upl)) {
	9402	vector_upl_t vector_upl = upl->vector_upl;
	9403	if(vector_upl) {
	9404	*v_upl_submap = vector_upl->submap;
	9405	*submap_dst_addr = vector_upl->submap_dst_addr;
	9406	}
	9407	else
	9408	panic("vector_upl_get_submap was passed a non-vectored UPL\n");
	9409	}
	9410	else
	9411	panic("vector_upl_get_submap was passed a null UPL\n");
	9412	}
	9413
	9414	void
	9415	vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
	9416	{
	9417	if(vector_upl_is_valid(upl)) {
	9418	vector_upl_t vector_upl = upl->vector_upl;
	9419	if(vector_upl) {
	9420	vector_upl->submap = submap;
	9421	vector_upl->submap_dst_addr = submap_dst_addr;
	9422	}
	9423	else
	9424	panic("vector_upl_get_submap was passed a non-vectored UPL\n");
	9425	}
	9426	else
	9427	panic("vector_upl_get_submap was passed a NULL UPL\n");
	9428	}
	9429
	9430	void
	9431	vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
	9432	{
	9433	if(vector_upl_is_valid(upl)) {
	9434	uint32_t i = 0;
	9435	vector_upl_t vector_upl = upl->vector_upl;
	9436
	9437	if(vector_upl) {
	9438	for(i = 0; i < vector_upl->num_upls; i++) {
	9439	if(vector_upl->upl_elems[i] == subupl)
	9440	break;
	9441	}
	9442
	9443	if(i == vector_upl->num_upls)
	9444	panic("setting sub-upl iostate when none exists");
	9445
	9446	vector_upl->upl_iostates[i].offset = offset;
	9447	if(size < PAGE_SIZE)
	9448	size = PAGE_SIZE;
	9449	vector_upl->upl_iostates[i].size = size;
	9450	}
	9451	else
	9452	panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
	9453	}
	9454	else
	9455	panic("vector_upl_set_iostate was passed a NULL UPL\n");
	9456	}
	9457
	9458	void
	9459	vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
	9460	{
	9461	if(vector_upl_is_valid(upl)) {
	9462	uint32_t i = 0;
	9463	vector_upl_t vector_upl = upl->vector_upl;
	9464
	9465	if(vector_upl) {
	9466	for(i = 0; i < vector_upl->num_upls; i++) {
	9467	if(vector_upl->upl_elems[i] == subupl)
	9468	break;
	9469	}
	9470
	9471	if(i == vector_upl->num_upls)
	9472	panic("getting sub-upl iostate when none exists");
	9473
	9474	*offset = vector_upl->upl_iostates[i].offset;
	9475	*size = vector_upl->upl_iostates[i].size;
	9476	}
	9477	else
	9478	panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
	9479	}
	9480	else
	9481	panic("vector_upl_get_iostate was passed a NULL UPL\n");
	9482	}
	9483
	9484	void
	9485	vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t offset, upl_size_t size)
	9486	{
	9487	if(vector_upl_is_valid(upl)) {
	9488	vector_upl_t vector_upl = upl->vector_upl;
	9489	if(vector_upl) {
	9490	if(index < vector_upl->num_upls) {
	9491	*offset = vector_upl->upl_iostates[index].offset;
	9492	*size = vector_upl->upl_iostates[index].size;
	9493	}
	9494	else
	9495	offset = size = 0;
	9496	}
	9497	else
	9498	panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
	9499	}
	9500	else
	9501	panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
	9502	}
	9503
	9504	upl_page_info_t *
	9505	upl_get_internal_vectorupl_pagelist(upl_t upl)
	9506	{
	9507	return ((vector_upl_t)(upl->vector_upl))->pagelist;
	9508	}
	9509
	9510	void *
	9511	upl_get_internal_vectorupl(upl_t upl)
	9512	{
	9513	return upl->vector_upl;
	9514	}
	9515
	9516	vm_size_t
	9517	upl_get_internal_pagelist_offset(void)
	9518	{
	9519	return sizeof(struct upl);
	9520	}
	9521
	9522	void
	9523	upl_clear_dirty(
	9524	upl_t upl,
	9525	boolean_t value)
	9526	{
	9527	if (value) {
	9528	upl->flags \|= UPL_CLEAR_DIRTY;
	9529	} else {
	9530	upl->flags &= ~UPL_CLEAR_DIRTY;
	9531	}
	9532	}
	9533
	9534	void
	9535	upl_set_referenced(
	9536	upl_t upl,
	9537	boolean_t value)
	9538	{
	9539	upl_lock(upl);
	9540	if (value) {
	9541	upl->ext_ref_count++;
	9542	} else {
	9543	if (!upl->ext_ref_count) {
	9544	panic("upl_set_referenced not %p\n", upl);
	9545	}
	9546	upl->ext_ref_count--;
	9547	}
	9548	upl_unlock(upl);
	9549	}
	9550
	9551	#if CONFIG_IOSCHED
	9552	void
	9553	upl_set_blkno(
	9554	upl_t upl,
	9555	vm_offset_t upl_offset,
	9556	int io_size,
	9557	int64_t blkno)
	9558	{
	9559	int i,j;
	9560	if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0)
	9561	return;
	9562
	9563	assert(upl->upl_reprio_info != 0);
	9564	for(i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) {
	9565	UPL_SET_REPRIO_INFO(upl, i, blkno, io_size);
	9566	}
	9567	}
	9568	#endif
	9569
	9570	boolean_t
	9571	vm_page_is_slideable(vm_page_t m)
	9572	{
	9573	boolean_t result = FALSE;
	9574	vm_shared_region_slide_info_t si;
	9575
	9576	vm_object_lock_assert_held(m->object);
	9577
	9578	/* make sure our page belongs to the one object allowed to do this */
	9579	if (!m->object->object_slid) {
	9580	goto done;
	9581	}
	9582
	9583	si = m->object->vo_slide_info;
	9584	if (si == NULL) {
	9585	goto done;
	9586	}
	9587
	9588	if(!m->slid && (si->start <= m->offset && si->end > m->offset)) {
	9589	result = TRUE;
	9590	}
	9591
	9592	done:
	9593	return result;
	9594	}
	9595
	9596	int vm_page_slide_counter = 0;
	9597	int vm_page_slide_errors = 0;
	9598	kern_return_t
	9599	vm_page_slide(
	9600	vm_page_t page,
	9601	vm_map_offset_t kernel_mapping_offset)
	9602	{
	9603	kern_return_t kr;
	9604	vm_map_size_t kernel_mapping_size;
	9605	boolean_t kernel_mapping_needs_unmap;
	9606	vm_offset_t kernel_vaddr;
	9607	uint32_t pageIndex = 0;
	9608
	9609	assert(!page->slid);
	9610	assert(page->object->object_slid);
	9611	vm_object_lock_assert_exclusive(page->object);
	9612
	9613	if (page->error)
	9614	return KERN_FAILURE;
	9615
	9616	/*
	9617	* Take a paging-in-progress reference to keep the object
	9618	* alive even if we have to unlock it (in vm_paging_map_object()
	9619	* for example)...
	9620	*/
	9621	vm_object_paging_begin(page->object);
	9622
	9623	if (kernel_mapping_offset == 0) {
	9624	/*
	9625	* The page hasn't already been mapped in kernel space
	9626	* by the caller. Map it now, so that we can access
	9627	* its contents and decrypt them.
	9628	*/
	9629	kernel_mapping_size = PAGE_SIZE;
	9630	kernel_mapping_needs_unmap = FALSE;
	9631	kr = vm_paging_map_object(page,
	9632	page->object,
	9633	page->offset,
	9634	VM_PROT_READ \| VM_PROT_WRITE,
	9635	FALSE,
	9636	&kernel_mapping_size,
	9637	&kernel_mapping_offset,
	9638	&kernel_mapping_needs_unmap);
	9639	if (kr != KERN_SUCCESS) {
	9640	panic("vm_page_slide: "
	9641	"could not map page in kernel: 0x%x\n",
	9642	kr);
	9643	}
	9644	} else {
	9645	kernel_mapping_size = 0;
	9646	kernel_mapping_needs_unmap = FALSE;
	9647	}
	9648	kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
	9649
	9650	/*
	9651	* Slide the pointers on the page.
	9652	*/
	9653
	9654	/assert that slide_file_info.start/end are page-aligned?/
	9655
	9656	assert(!page->slid);
	9657	assert(page->object->object_slid);
	9658
	9659	/* on some platforms this is an extern int, on others it's a cpp macro */
	9660	__unreachable_ok_push
	9661	/* TODO: Consider this */
	9662	if (!TEST_PAGE_SIZE_4K) {
	9663	for (int i = 0; i < 4; i++) {
	9664	pageIndex = (uint32_t)((page->offset - page->object->vo_slide_info->start)/0x1000);
	9665	kr = vm_shared_region_slide_page(page->object->vo_slide_info, kernel_vaddr + (0x1000*i), pageIndex + i);
	9666	}
	9667	} else {
	9668	pageIndex = (uint32_t)((page->offset - page->object->vo_slide_info->start)/PAGE_SIZE);
	9669	kr = vm_shared_region_slide_page(page->object->vo_slide_info, kernel_vaddr, pageIndex);
	9670	}
	9671	__unreachable_ok_pop
	9672
	9673	vm_page_slide_counter++;
	9674
	9675	/*
	9676	* Unmap the page from the kernel's address space,
	9677	*/
	9678	if (kernel_mapping_needs_unmap) {
	9679	vm_paging_unmap_object(page->object,
	9680	kernel_vaddr,
	9681	kernel_vaddr + PAGE_SIZE);
	9682	}
	9683
	9684	page->dirty = FALSE;
	9685	pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED \| VM_MEM_REFERENCED);
	9686
	9687	if (kr != KERN_SUCCESS \|\| cs_debug > 1) {
	9688	printf("vm_page_slide(%p): "
	9689	"obj %p off 0x%llx mobj %p moff 0x%llx\n",
	9690	page,
	9691	page->object, page->offset,
	9692	page->object->pager,
	9693	page->offset + page->object->paging_offset);
	9694	}
	9695
	9696	if (kr == KERN_SUCCESS) {
	9697	page->slid = TRUE;
	9698	} else {
	9699	page->error = TRUE;
	9700	vm_page_slide_errors++;
	9701	}
	9702
	9703	vm_object_paging_end(page->object);
	9704
	9705	return kr;
	9706	}
	9707
	9708	void inline memoryshot(unsigned int event, unsigned int control)
	9709	{
	9710	if (vm_debug_events) {
	9711	KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) \| control,
	9712	vm_page_active_count, vm_page_inactive_count,
	9713	vm_page_free_count, vm_page_speculative_count,
	9714	vm_page_throttled_count);
	9715	} else {
	9716	(void) event;
	9717	(void) control;
	9718	}
	9719
	9720	}
	9721
	9722	#ifdef MACH_BSD
	9723
	9724	boolean_t upl_device_page(upl_page_info_t *upl)
	9725	{
	9726	return(UPL_DEVICE_PAGE(upl));
	9727	}
	9728	boolean_t upl_page_present(upl_page_info_t *upl, int index)
	9729	{
	9730	return(UPL_PAGE_PRESENT(upl, index));
	9731	}
	9732	boolean_t upl_speculative_page(upl_page_info_t *upl, int index)
	9733	{
	9734	return(UPL_SPECULATIVE_PAGE(upl, index));
	9735	}
	9736	boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
	9737	{
	9738	return(UPL_DIRTY_PAGE(upl, index));
	9739	}
	9740	boolean_t upl_valid_page(upl_page_info_t *upl, int index)
	9741	{
	9742	return(UPL_VALID_PAGE(upl, index));
	9743	}
	9744	ppnum_t upl_phys_page(upl_page_info_t *upl, int index)
	9745	{
	9746	return(UPL_PHYS_PAGE(upl, index));
	9747	}
	9748
	9749	void
	9750	vm_countdirtypages(void)
	9751	{
	9752	vm_page_t m;
	9753	int dpages;
	9754	int pgopages;
	9755	int precpages;
	9756
	9757
	9758	dpages=0;
	9759	pgopages=0;
	9760	precpages=0;
	9761
	9762	vm_page_lock_queues();
	9763	m = (vm_page_t) queue_first(&vm_page_queue_inactive);
	9764	do {
	9765	if (m ==(vm_page_t )0) break;
	9766
	9767	if(m->dirty) dpages++;
	9768	if(m->pageout) pgopages++;
	9769	if(m->precious) precpages++;
	9770
	9771	assert(m->object != kernel_object);
	9772	m = (vm_page_t) queue_next(&m->pageq);
	9773	if (m ==(vm_page_t )0) break;
	9774
	9775	} while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
	9776	vm_page_unlock_queues();
	9777
	9778	vm_page_lock_queues();
	9779	m = (vm_page_t) queue_first(&vm_page_queue_throttled);
	9780	do {
	9781	if (m ==(vm_page_t )0) break;
	9782
	9783	dpages++;
	9784	assert(m->dirty);
	9785	assert(!m->pageout);
	9786	assert(m->object != kernel_object);
	9787	m = (vm_page_t) queue_next(&m->pageq);
	9788	if (m ==(vm_page_t )0) break;
	9789
	9790	} while (!queue_end(&vm_page_queue_throttled,(queue_entry_t) m));
	9791	vm_page_unlock_queues();
	9792
	9793	vm_page_lock_queues();
	9794	m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
	9795	do {
	9796	if (m ==(vm_page_t )0) break;
	9797
	9798	if(m->dirty) dpages++;
	9799	if(m->pageout) pgopages++;
	9800	if(m->precious) precpages++;
	9801
	9802	assert(m->object != kernel_object);
	9803	m = (vm_page_t) queue_next(&m->pageq);
	9804	if (m ==(vm_page_t )0) break;
	9805
	9806	} while (!queue_end(&vm_page_queue_anonymous,(queue_entry_t) m));
	9807	vm_page_unlock_queues();
	9808
	9809	printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
	9810
	9811	dpages=0;
	9812	pgopages=0;
	9813	precpages=0;
	9814
	9815	vm_page_lock_queues();
	9816	m = (vm_page_t) queue_first(&vm_page_queue_active);
	9817
	9818	do {
	9819	if(m == (vm_page_t )0) break;
	9820	if(m->dirty) dpages++;
	9821	if(m->pageout) pgopages++;
	9822	if(m->precious) precpages++;
	9823
	9824	assert(m->object != kernel_object);
	9825	m = (vm_page_t) queue_next(&m->pageq);
	9826	if(m == (vm_page_t )0) break;
	9827
	9828	} while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
	9829	vm_page_unlock_queues();
	9830
	9831	printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
	9832
	9833	}
	9834	#endif /* MACH_BSD */
	9835
	9836	ppnum_t upl_get_highest_page(
	9837	upl_t upl)
	9838	{
	9839	return upl->highest_page;
	9840	}
	9841
	9842	upl_size_t upl_get_size(
	9843	upl_t upl)
	9844	{
	9845	return upl->size;
	9846	}
	9847
	9848	#if UPL_DEBUG
	9849	kern_return_t upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
	9850	{
	9851	upl->ubc_alias1 = alias1;
	9852	upl->ubc_alias2 = alias2;
	9853	return KERN_SUCCESS;
	9854	}
	9855	int upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
	9856	{
	9857	if(al)
	9858	*al = upl->ubc_alias1;
	9859	if(al2)
	9860	*al2 = upl->ubc_alias2;
	9861	return KERN_SUCCESS;
	9862	}
	9863	#endif /* UPL_DEBUG */
	9864
	9865	#if VM_PRESSURE_EVENTS
	9866	/*
	9867	* Upward trajectory.
	9868	*/
	9869	extern boolean_t vm_compressor_low_on_space(void);
	9870
	9871	boolean_t
	9872	VM_PRESSURE_NORMAL_TO_WARNING(void) {
	9873
	9874	if (DEFAULT_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
	9875
	9876	/* Available pages below our threshold */
	9877	if (memorystatus_available_pages < memorystatus_available_pages_pressure) {
	9878	/* No frozen processes to kill */
	9879	if (memorystatus_frozen_count == 0) {
	9880	/* Not enough suspended processes available. */
	9881	if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) {
	9882	return TRUE;
	9883	}
	9884	}
	9885	}
	9886	return FALSE;
	9887
	9888	} else {
	9889	return ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0);
	9890	}
	9891	}
	9892
	9893	boolean_t
	9894	VM_PRESSURE_WARNING_TO_CRITICAL(void) {
	9895
	9896	if (DEFAULT_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
	9897	/* Available pages below our threshold */
	9898	if (memorystatus_available_pages < memorystatus_available_pages_critical) {
	9899	return TRUE;
	9900	}
	9901	return FALSE;
	9902	} else {
	9903	return (vm_compressor_low_on_space() \|\| (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
	9904	}
	9905	}
	9906
	9907	/*
	9908	* Downward trajectory.
	9909	*/
	9910	boolean_t
	9911	VM_PRESSURE_WARNING_TO_NORMAL(void) {
	9912
	9913	if (DEFAULT_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
	9914	/* Available pages above our threshold */
	9915	unsigned int target_threshold = memorystatus_available_pages_pressure + ((15 * memorystatus_available_pages_pressure) / 100);
	9916	if (memorystatus_available_pages > target_threshold) {
	9917	return TRUE;
	9918	}
	9919	return FALSE;
	9920	} else {
	9921	return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0);
	9922	}
	9923	}
	9924
	9925	boolean_t
	9926	VM_PRESSURE_CRITICAL_TO_WARNING(void) {
	9927
	9928	if (DEFAULT_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
	9929	/* Available pages above our threshold */
	9930	unsigned int target_threshold = memorystatus_available_pages_critical + ((15 * memorystatus_available_pages_critical) / 100);
	9931	if (memorystatus_available_pages > target_threshold) {
	9932	return TRUE;
	9933	}
	9934	return FALSE;
	9935	} else {
	9936	return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
	9937	}
	9938	}
	9939	#endif /* VM_PRESSURE_EVENTS */
	9940