git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2014 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*
	29	* @OSF_COPYRIGHT@
	30	*/
	31	/*
	32	* Mach Operating System
	33	* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
	34	* All Rights Reserved.
	35	*
	36	* Permission to use, copy, modify and distribute this software and its
	37	* documentation is hereby granted, provided that both the copyright
	38	* notice and this permission notice appear in all copies of the
	39	* software, derivative works or modified versions, and any portions
	40	* thereof, and that both notices appear in supporting documentation.
	41	*
	42	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	43	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
	44	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	45	*
	46	* Carnegie Mellon requests users of this software to return to
	47	*
	48	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	49	* School of Computer Science
	50	* Carnegie Mellon University
	51	* Pittsburgh PA 15213-3890
	52	*
	53	* any improvements or extensions that they make and grant Carnegie Mellon
	54	* the rights to redistribute these changes.
	55	*/
	56	/*
	57	*/
	58	/*
	59	* File: vm/vm_pageout.c
	60	* Author: Avadis Tevanian, Jr., Michael Wayne Young
	61	* Date: 1985
	62	*
	63	* The proverbial page-out daemon.
	64	*/
	65
	66	#include <stdint.h>
	67
	68	#include <debug.h>
	69	#include <mach_pagemap.h>
	70	#include <mach_cluster_stats.h>
	71
	72	#include <mach/mach_types.h>
	73	#include <mach/memory_object.h>
	74	#include <mach/memory_object_default.h>
	75	#include <mach/memory_object_control_server.h>
	76	#include <mach/mach_host_server.h>
	77	#include <mach/upl.h>
	78	#include <mach/vm_map.h>
	79	#include <mach/vm_param.h>
	80	#include <mach/vm_statistics.h>
	81	#include <mach/sdt.h>
	82
	83	#include <kern/kern_types.h>
	84	#include <kern/counters.h>
	85	#include <kern/host_statistics.h>
	86	#include <kern/machine.h>
	87	#include <kern/misc_protos.h>
	88	#include <kern/sched.h>
	89	#include <kern/thread.h>
	90	#include <kern/xpr.h>
	91	#include <kern/kalloc.h>
	92
	93	#include <machine/vm_tuning.h>
	94	#include <machine/commpage.h>
	95
	96	#include <vm/pmap.h>
	97	#include <vm/vm_compressor_pager.h>
	98	#include <vm/vm_fault.h>
	99	#include <vm/vm_map.h>
	100	#include <vm/vm_object.h>
	101	#include <vm/vm_page.h>
	102	#include <vm/vm_pageout.h>
	103	#include <vm/vm_protos.h> /* must be last */
	104	#include <vm/memory_object.h>
	105	#include <vm/vm_purgeable_internal.h>
	106	#include <vm/vm_shared_region.h>
	107	#include <vm/vm_compressor.h>
	108
	109	#if CONFIG_PHANTOM_CACHE
	110	#include <vm/vm_phantom_cache.h>
	111	#endif
	112	/*
	113	* ENCRYPTED SWAP:
	114	*/
	115	#include <libkern/crypto/aes.h>
	116	extern u_int32_t random(void); /* from <libkern/libkern.h> */
	117
	118	extern int cs_debug;
	119
	120	#if UPL_DEBUG
	121	#include <libkern/OSDebug.h>
	122	#endif
	123
	124	extern void m_drain(void);
	125
	126	#if VM_PRESSURE_EVENTS
	127	extern unsigned int memorystatus_available_pages;
	128	extern unsigned int memorystatus_available_pages_pressure;
	129	extern unsigned int memorystatus_available_pages_critical;
	130	extern unsigned int memorystatus_frozen_count;
	131	extern unsigned int memorystatus_suspended_count;
	132
	133	extern vm_pressure_level_t memorystatus_vm_pressure_level;
	134	int memorystatus_purge_on_warning = 2;
	135	int memorystatus_purge_on_urgent = 5;
	136	int memorystatus_purge_on_critical = 8;
	137
	138	void vm_pressure_response(void);
	139	boolean_t vm_pressure_thread_running = FALSE;
	140	extern void consider_vm_pressure_events(void);
	141
	142	#define MEMORYSTATUS_SUSPENDED_THRESHOLD 4
	143	#endif /* VM_PRESSURE_EVENTS */
	144
	145	boolean_t vm_pressure_changed = FALSE;
	146
	147	#ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */
	148	#define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100
	149	#endif
	150
	151	#ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
	152	#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
	153	#endif
	154
	155	#ifndef VM_PAGEOUT_DEADLOCK_RELIEF
	156	#define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
	157	#endif
	158
	159	#ifndef VM_PAGEOUT_INACTIVE_RELIEF
	160	#define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
	161	#endif
	162
	163	#ifndef VM_PAGE_LAUNDRY_MAX
	164	#define VM_PAGE_LAUNDRY_MAX 128UL /* maximum pageouts on a given pageout queue */
	165	#endif /* VM_PAGEOUT_LAUNDRY_MAX */
	166
	167	#ifndef VM_PAGEOUT_BURST_WAIT
	168	#define VM_PAGEOUT_BURST_WAIT 10 /* milliseconds */
	169	#endif /* VM_PAGEOUT_BURST_WAIT */
	170
	171	#ifndef VM_PAGEOUT_EMPTY_WAIT
	172	#define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
	173	#endif /* VM_PAGEOUT_EMPTY_WAIT */
	174
	175	#ifndef VM_PAGEOUT_DEADLOCK_WAIT
	176	#define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
	177	#endif /* VM_PAGEOUT_DEADLOCK_WAIT */
	178
	179	#ifndef VM_PAGEOUT_IDLE_WAIT
	180	#define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
	181	#endif /* VM_PAGEOUT_IDLE_WAIT */
	182
	183	#ifndef VM_PAGEOUT_SWAP_WAIT
	184	#define VM_PAGEOUT_SWAP_WAIT 50 /* milliseconds */
	185	#endif /* VM_PAGEOUT_SWAP_WAIT */
	186
	187	#ifndef VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED
	188	#define VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED 1000 /* maximum pages considered before we issue a pressure event */
	189	#endif /* VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED */
	190
	191	#ifndef VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS
	192	#define VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS 5 /* seconds */
	193	#endif /* VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS */
	194
	195	unsigned int vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
	196	unsigned int vm_page_speculative_percentage = 5;
	197
	198	#ifndef VM_PAGE_SPECULATIVE_TARGET
	199	#define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_page_speculative_percentage))
	200	#endif /* VM_PAGE_SPECULATIVE_TARGET */
	201
	202
	203	#ifndef VM_PAGE_INACTIVE_HEALTHY_LIMIT
	204	#define VM_PAGE_INACTIVE_HEALTHY_LIMIT(total) ((total) * 1 / 200)
	205	#endif /* VM_PAGE_INACTIVE_HEALTHY_LIMIT */
	206
	207
	208	/*
	209	* To obtain a reasonable LRU approximation, the inactive queue
	210	* needs to be large enough to give pages on it a chance to be
	211	* referenced a second time. This macro defines the fraction
	212	* of active+inactive pages that should be inactive.
	213	* The pageout daemon uses it to update vm_page_inactive_target.
	214	*
	215	* If vm_page_free_count falls below vm_page_free_target and
	216	* vm_page_inactive_count is below vm_page_inactive_target,
	217	* then the pageout daemon starts running.
	218	*/
	219
	220	#ifndef VM_PAGE_INACTIVE_TARGET
	221	#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 2)
	222	#endif /* VM_PAGE_INACTIVE_TARGET */
	223
	224	/*
	225	* Once the pageout daemon starts running, it keeps going
	226	* until vm_page_free_count meets or exceeds vm_page_free_target.
	227	*/
	228
	229	#ifndef VM_PAGE_FREE_TARGET
	230	#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
	231	#endif /* VM_PAGE_FREE_TARGET */
	232
	233
	234	/*
	235	* The pageout daemon always starts running once vm_page_free_count
	236	* falls below vm_page_free_min.
	237	*/
	238
	239	#ifndef VM_PAGE_FREE_MIN
	240	#define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
	241	#endif /* VM_PAGE_FREE_MIN */
	242
	243	#define VM_PAGE_FREE_RESERVED_LIMIT 1700
	244	#define VM_PAGE_FREE_MIN_LIMIT 3500
	245	#define VM_PAGE_FREE_TARGET_LIMIT 4000
	246
	247	/*
	248	* When vm_page_free_count falls below vm_page_free_reserved,
	249	* only vm-privileged threads can allocate pages. vm-privilege
	250	* allows the pageout daemon and default pager (and any other
	251	* associated threads needed for default pageout) to continue
	252	* operation by dipping into the reserved pool of pages.
	253	*/
	254
	255	#ifndef VM_PAGE_FREE_RESERVED
	256	#define VM_PAGE_FREE_RESERVED(n) \
	257	((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
	258	#endif /* VM_PAGE_FREE_RESERVED */
	259
	260	/*
	261	* When we dequeue pages from the inactive list, they are
	262	* reactivated (ie, put back on the active queue) if referenced.
	263	* However, it is possible to starve the free list if other
	264	* processors are referencing pages faster than we can turn off
	265	* the referenced bit. So we limit the number of reactivations
	266	* we will make per call of vm_pageout_scan().
	267	*/
	268	#define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
	269	#ifndef VM_PAGE_REACTIVATE_LIMIT
	270	#define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
	271	#endif /* VM_PAGE_REACTIVATE_LIMIT */
	272	#define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 1000
	273
	274
	275	extern boolean_t hibernate_cleaning_in_progress;
	276
	277	/*
	278	* Exported variable used to broadcast the activation of the pageout scan
	279	* Working Set uses this to throttle its use of pmap removes. In this
	280	* way, code which runs within memory in an uncontested context does
	281	* not keep encountering soft faults.
	282	*/
	283
	284	unsigned int vm_pageout_scan_event_counter = 0;
	285
	286	/*
	287	* Forward declarations for internal routines.
	288	*/
	289	struct cq {
	290	struct vm_pageout_queue *q;
	291	void *current_chead;
	292	char *scratch_buf;
	293	int id;
	294	};
	295	#define MAX_COMPRESSOR_THREAD_COUNT 8
	296
	297	struct cq ciq[MAX_COMPRESSOR_THREAD_COUNT];
	298
	299	void *vm_pageout_immediate_chead;
	300	char *vm_pageout_immediate_scratch_buf;
	301
	302
	303	#if VM_PRESSURE_EVENTS
	304	void vm_pressure_thread(void);
	305
	306	boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void);
	307	boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void);
	308
	309	boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void);
	310	boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void);
	311	#endif
	312	static void vm_pageout_garbage_collect(int);
	313	static void vm_pageout_iothread_continue(struct vm_pageout_queue *);
	314	static void vm_pageout_iothread_external(void);
	315	static void vm_pageout_iothread_internal(struct cq *cq);
	316	static void vm_pageout_adjust_io_throttles(struct vm_pageout_queue , struct vm_pageout_queue , boolean_t);
	317
	318	extern void vm_pageout_continue(void);
	319	extern void vm_pageout_scan(void);
	320
	321	static void vm_pageout_immediate(vm_page_t, boolean_t);
	322	boolean_t vm_compressor_immediate_preferred = FALSE;
	323	boolean_t vm_compressor_immediate_preferred_override = FALSE;
	324	boolean_t vm_restricted_to_single_processor = FALSE;
	325	static boolean_t vm_pageout_waiter = FALSE;
	326	static boolean_t vm_pageout_running = FALSE;
	327
	328
	329	static thread_t vm_pageout_external_iothread = THREAD_NULL;
	330	static thread_t vm_pageout_internal_iothread = THREAD_NULL;
	331
	332	unsigned int vm_pageout_reserved_internal = 0;
	333	unsigned int vm_pageout_reserved_really = 0;
	334
	335	unsigned int vm_pageout_swap_wait = 0;
	336	unsigned int vm_pageout_idle_wait = 0; /* milliseconds */
	337	unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
	338	unsigned int vm_pageout_burst_wait = 0; /* milliseconds */
	339	unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */
	340	unsigned int vm_pageout_deadlock_relief = 0;
	341	unsigned int vm_pageout_inactive_relief = 0;
	342	unsigned int vm_pageout_burst_active_throttle = 0;
	343	unsigned int vm_pageout_burst_inactive_throttle = 0;
	344
	345	int vm_upl_wait_for_pages = 0;
	346
	347
	348	/*
	349	* These variables record the pageout daemon's actions:
	350	* how many pages it looks at and what happens to those pages.
	351	* No locking needed because only one thread modifies the variables.
	352	*/
	353
	354	unsigned int vm_pageout_active = 0; /* debugging */
	355	unsigned int vm_pageout_inactive = 0; /* debugging */
	356	unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
	357	unsigned int vm_pageout_inactive_forced = 0; /* debugging */
	358	unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
	359	unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
	360	unsigned int vm_pageout_inactive_busy = 0; /* debugging */
	361	unsigned int vm_pageout_inactive_error = 0; /* debugging */
	362	unsigned int vm_pageout_inactive_absent = 0; /* debugging */
	363	unsigned int vm_pageout_inactive_notalive = 0; /* debugging */
	364	unsigned int vm_pageout_inactive_used = 0; /* debugging */
	365	unsigned int vm_pageout_cache_evicted = 0; /* debugging */
	366	unsigned int vm_pageout_inactive_clean = 0; /* debugging */
	367	unsigned int vm_pageout_speculative_clean = 0; /* debugging */
	368
	369	unsigned int vm_pageout_freed_from_cleaned = 0;
	370	unsigned int vm_pageout_freed_from_speculative = 0;
	371	unsigned int vm_pageout_freed_from_inactive_clean = 0;
	372
	373	unsigned int vm_pageout_enqueued_cleaned_from_inactive_clean = 0;
	374	unsigned int vm_pageout_enqueued_cleaned_from_inactive_dirty = 0;
	375
	376	unsigned int vm_pageout_cleaned_reclaimed = 0; /* debugging; how many cleaned pages are reclaimed by the pageout scan */
	377	unsigned int vm_pageout_cleaned_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on pageout (and are therefore reactivated) */
	378	unsigned int vm_pageout_cleaned_reference_reactivated = 0;
	379	unsigned int vm_pageout_cleaned_volatile_reactivated = 0;
	380	unsigned int vm_pageout_cleaned_fault_reactivated = 0;
	381	unsigned int vm_pageout_cleaned_commit_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on commit (and are therefore reactivated) */
	382	unsigned int vm_pageout_cleaned_busy = 0;
	383	unsigned int vm_pageout_cleaned_nolock = 0;
	384
	385	unsigned int vm_pageout_inactive_dirty_internal = 0; /* debugging */
	386	unsigned int vm_pageout_inactive_dirty_external = 0; /* debugging */
	387	unsigned int vm_pageout_inactive_deactivated = 0; /* debugging */
	388	unsigned int vm_pageout_inactive_anonymous = 0; /* debugging */
	389	unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
	390	unsigned int vm_pageout_purged_objects = 0; /* used for sysctl vm stats */
	391	unsigned int vm_stat_discard = 0; /* debugging */
	392	unsigned int vm_stat_discard_sent = 0; /* debugging */
	393	unsigned int vm_stat_discard_failure = 0; /* debugging */
	394	unsigned int vm_stat_discard_throttle = 0; /* debugging */
	395	unsigned int vm_pageout_reactivation_limit_exceeded = 0; /* debugging */
	396	unsigned int vm_pageout_catch_ups = 0; /* debugging */
	397	unsigned int vm_pageout_inactive_force_reclaim = 0; /* debugging */
	398
	399	unsigned int vm_pageout_scan_reclaimed_throttled = 0;
	400	unsigned int vm_pageout_scan_active_throttled = 0;
	401	unsigned int vm_pageout_scan_inactive_throttled_internal = 0;
	402	unsigned int vm_pageout_scan_inactive_throttled_external = 0;
	403	unsigned int vm_pageout_scan_throttle = 0; /* debugging */
	404	unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */
	405	unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */
	406	unsigned int vm_pageout_scan_swap_throttle = 0; /* debugging */
	407	unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */
	408	unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */
	409	unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */
	410	unsigned int vm_pageout_inactive_external_forced_jetsam_count = 0; /* debugging */
	411	unsigned int vm_pageout_scan_throttle_deferred = 0; /* debugging */
	412	unsigned int vm_pageout_scan_yield_unthrottled = 0; /* debugging */
	413	unsigned int vm_page_speculative_count_drifts = 0;
	414	unsigned int vm_page_speculative_count_drift_max = 0;
	415
	416
	417	/*
	418	* Backing store throttle when BS is exhausted
	419	*/
	420	unsigned int vm_backing_store_low = 0;
	421
	422	unsigned int vm_pageout_out_of_line = 0;
	423	unsigned int vm_pageout_in_place = 0;
	424
	425	unsigned int vm_page_steal_pageout_page = 0;
	426
	427	/*
	428	* ENCRYPTED SWAP:
	429	* counters and statistics...
	430	*/
	431	unsigned long vm_page_decrypt_counter = 0;
	432	unsigned long vm_page_decrypt_for_upl_counter = 0;
	433	unsigned long vm_page_encrypt_counter = 0;
	434	unsigned long vm_page_encrypt_abort_counter = 0;
	435	unsigned long vm_page_encrypt_already_encrypted_counter = 0;
	436	boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */
	437
	438	struct vm_pageout_queue vm_pageout_queue_internal;
	439	struct vm_pageout_queue vm_pageout_queue_external;
	440
	441	unsigned int vm_page_speculative_target = 0;
	442
	443	vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	444
	445	boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL;
	446
	447	#if DEVELOPMENT \|\| DEBUG
	448	unsigned long vm_cs_validated_resets = 0;
	449	#endif
	450
	451	int vm_debug_events = 0;
	452
	453	#if CONFIG_MEMORYSTATUS
	454	#if !CONFIG_JETSAM
	455	extern boolean_t memorystatus_idle_exit_from_VM(void);
	456	#endif
	457	extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async);
	458	extern void memorystatus_on_pageout_scan_end(void);
	459	#endif
	460
	461	/*
	462	* Routine: vm_backing_store_disable
	463	* Purpose:
	464	* Suspend non-privileged threads wishing to extend
	465	* backing store when we are low on backing store
	466	* (Synchronized by caller)
	467	*/
	468	void
	469	vm_backing_store_disable(
	470	boolean_t disable)
	471	{
	472	if(disable) {
	473	vm_backing_store_low = 1;
	474	} else {
	475	if(vm_backing_store_low) {
	476	vm_backing_store_low = 0;
	477	thread_wakeup((event_t) &vm_backing_store_low);
	478	}
	479	}
	480	}
	481
	482
	483	#if MACH_CLUSTER_STATS
	484	unsigned long vm_pageout_cluster_dirtied = 0;
	485	unsigned long vm_pageout_cluster_cleaned = 0;
	486	unsigned long vm_pageout_cluster_collisions = 0;
	487	unsigned long vm_pageout_cluster_clusters = 0;
	488	unsigned long vm_pageout_cluster_conversions = 0;
	489	unsigned long vm_pageout_target_collisions = 0;
	490	unsigned long vm_pageout_target_page_dirtied = 0;
	491	unsigned long vm_pageout_target_page_freed = 0;
	492	#define CLUSTER_STAT(clause) clause
	493	#else /* MACH_CLUSTER_STATS */
	494	#define CLUSTER_STAT(clause)
	495	#endif /* MACH_CLUSTER_STATS */
	496
	497	/*
	498	* Routine: vm_pageout_object_terminate
	499	* Purpose:
	500	* Destroy the pageout_object, and perform all of the
	501	* required cleanup actions.
	502	*
	503	* In/Out conditions:
	504	* The object must be locked, and will be returned locked.
	505	*/
	506	void
	507	vm_pageout_object_terminate(
	508	vm_object_t object)
	509	{
	510	vm_object_t shadow_object;
	511
	512	/*
	513	* Deal with the deallocation (last reference) of a pageout object
	514	* (used for cleaning-in-place) by dropping the paging references/
	515	* freeing pages in the original object.
	516	*/
	517
	518	assert(object->pageout);
	519	shadow_object = object->shadow;
	520	vm_object_lock(shadow_object);
	521
	522	while (!queue_empty(&object->memq)) {
	523	vm_page_t p, m;
	524	vm_object_offset_t offset;
	525
	526	p = (vm_page_t) queue_first(&object->memq);
	527
	528	assert(p->private);
	529	assert(p->pageout);
	530	p->pageout = FALSE;
	531	assert(!p->cleaning);
	532	assert(!p->laundry);
	533
	534	offset = p->offset;
	535	VM_PAGE_FREE(p);
	536	p = VM_PAGE_NULL;
	537
	538	m = vm_page_lookup(shadow_object,
	539	offset + object->vo_shadow_offset);
	540
	541	if(m == VM_PAGE_NULL)
	542	continue;
	543
	544	assert((m->dirty) \|\| (m->precious) \|\|
	545	(m->busy && m->cleaning));
	546
	547	/*
	548	* Handle the trusted pager throttle.
	549	* Also decrement the burst throttle (if external).
	550	*/
	551	vm_page_lock_queues();
	552	if (m->pageout_queue)
	553	vm_pageout_throttle_up(m);
	554
	555	/*
	556	* Handle the "target" page(s). These pages are to be freed if
	557	* successfully cleaned. Target pages are always busy, and are
	558	* wired exactly once. The initial target pages are not mapped,
	559	* (so cannot be referenced or modified) but converted target
	560	* pages may have been modified between the selection as an
	561	* adjacent page and conversion to a target.
	562	*/
	563	if (m->pageout) {
	564	assert(m->busy);
	565	assert(m->wire_count == 1);
	566	m->cleaning = FALSE;
	567	m->encrypted_cleaning = FALSE;
	568	m->pageout = FALSE;
	569	#if MACH_CLUSTER_STATS
	570	if (m->wanted) vm_pageout_target_collisions++;
	571	#endif
	572	/*
	573	* Revoke all access to the page. Since the object is
	574	* locked, and the page is busy, this prevents the page
	575	* from being dirtied after the pmap_disconnect() call
	576	* returns.
	577	*
	578	* Since the page is left "dirty" but "not modifed", we
	579	* can detect whether the page was redirtied during
	580	* pageout by checking the modify state.
	581	*/
	582	if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED) {
	583	SET_PAGE_DIRTY(m, FALSE);
	584	} else {
	585	m->dirty = FALSE;
	586	}
	587
	588	if (m->dirty) {
	589	CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
	590	vm_page_unwire(m, TRUE); /* reactivates */
	591	VM_STAT_INCR(reactivations);
	592	PAGE_WAKEUP_DONE(m);
	593	} else {
	594	CLUSTER_STAT(vm_pageout_target_page_freed++;)
	595	vm_page_free(m);/* clears busy, etc. */
	596	}
	597	vm_page_unlock_queues();
	598	continue;
	599	}
	600	/*
	601	* Handle the "adjacent" pages. These pages were cleaned in
	602	* place, and should be left alone.
	603	* If prep_pin_count is nonzero, then someone is using the
	604	* page, so make it active.
	605	*/
	606	if (!m->active && !m->inactive && !m->throttled && !m->private) {
	607	if (m->reference)
	608	vm_page_activate(m);
	609	else
	610	vm_page_deactivate(m);
	611	}
	612	if (m->overwriting) {
	613	/*
	614	* the (COPY_OUT_FROM == FALSE) request_page_list case
	615	*/
	616	if (m->busy) {
	617	/*
	618	* We do not re-set m->dirty !
	619	* The page was busy so no extraneous activity
	620	* could have occurred. COPY_INTO is a read into the
	621	* new pages. CLEAN_IN_PLACE does actually write
	622	* out the pages but handling outside of this code
	623	* will take care of resetting dirty. We clear the
	624	* modify however for the Programmed I/O case.
	625	*/
	626	pmap_clear_modify(m->phys_page);
	627
	628	m->busy = FALSE;
	629	m->absent = FALSE;
	630	} else {
	631	/*
	632	* alternate (COPY_OUT_FROM == FALSE) request_page_list case
	633	* Occurs when the original page was wired
	634	* at the time of the list request
	635	*/
	636	assert(VM_PAGE_WIRED(m));
	637	vm_page_unwire(m, TRUE); /* reactivates */
	638	}
	639	m->overwriting = FALSE;
	640	} else {
	641	/*
	642	* Set the dirty state according to whether or not the page was
	643	* modified during the pageout. Note that we purposefully do
	644	* NOT call pmap_clear_modify since the page is still mapped.
	645	* If the page were to be dirtied between the 2 calls, this
	646	* this fact would be lost. This code is only necessary to
	647	* maintain statistics, since the pmap module is always
	648	* consulted if m->dirty is false.
	649	*/
	650	#if MACH_CLUSTER_STATS
	651	m->dirty = pmap_is_modified(m->phys_page);
	652
	653	if (m->dirty) vm_pageout_cluster_dirtied++;
	654	else vm_pageout_cluster_cleaned++;
	655	if (m->wanted) vm_pageout_cluster_collisions++;
	656	#else
	657	m->dirty = FALSE;
	658	#endif
	659	}
	660	if (m->encrypted_cleaning == TRUE) {
	661	m->encrypted_cleaning = FALSE;
	662	m->busy = FALSE;
	663	}
	664	m->cleaning = FALSE;
	665
	666	/*
	667	* Wakeup any thread waiting for the page to be un-cleaning.
	668	*/
	669	PAGE_WAKEUP(m);
	670	vm_page_unlock_queues();
	671	}
	672	/*
	673	* Account for the paging reference taken in vm_paging_object_allocate.
	674	*/
	675	vm_object_activity_end(shadow_object);
	676	vm_object_unlock(shadow_object);
	677
	678	assert(object->ref_count == 0);
	679	assert(object->paging_in_progress == 0);
	680	assert(object->activity_in_progress == 0);
	681	assert(object->resident_page_count == 0);
	682	return;
	683	}
	684
	685	/*
	686	* Routine: vm_pageclean_setup
	687	*
	688	* Purpose: setup a page to be cleaned (made non-dirty), but not
	689	* necessarily flushed from the VM page cache.
	690	* This is accomplished by cleaning in place.
	691	*
	692	* The page must not be busy, and new_object
	693	* must be locked.
	694	*
	695	*/
	696	static void
	697	vm_pageclean_setup(
	698	vm_page_t m,
	699	vm_page_t new_m,
	700	vm_object_t new_object,
	701	vm_object_offset_t new_offset)
	702	{
	703	assert(!m->busy);
	704	#if 0
	705	assert(!m->cleaning);
	706	#endif
	707
	708	XPR(XPR_VM_PAGEOUT,
	709	"vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
	710	m->object, m->offset, m,
	711	new_m, new_offset);
	712
	713	pmap_clear_modify(m->phys_page);
	714
	715	/*
	716	* Mark original page as cleaning in place.
	717	*/
	718	m->cleaning = TRUE;
	719	SET_PAGE_DIRTY(m, FALSE);
	720	m->precious = FALSE;
	721
	722	/*
	723	* Convert the fictitious page to a private shadow of
	724	* the real page.
	725	*/
	726	assert(new_m->fictitious);
	727	assert(new_m->phys_page == vm_page_fictitious_addr);
	728	new_m->fictitious = FALSE;
	729	new_m->private = TRUE;
	730	new_m->pageout = TRUE;
	731	new_m->phys_page = m->phys_page;
	732
	733	vm_page_lockspin_queues();
	734	vm_page_wire(new_m, VM_KERN_MEMORY_NONE, TRUE);
	735	vm_page_unlock_queues();
	736
	737	vm_page_insert_wired(new_m, new_object, new_offset, VM_KERN_MEMORY_NONE);
	738	assert(!new_m->wanted);
	739	new_m->busy = FALSE;
	740	}
	741
	742	/*
	743	* Routine: vm_pageout_initialize_page
	744	* Purpose:
	745	* Causes the specified page to be initialized in
	746	* the appropriate memory object. This routine is used to push
	747	* pages into a copy-object when they are modified in the
	748	* permanent object.
	749	*
	750	* The page is moved to a temporary object and paged out.
	751	*
	752	* In/out conditions:
	753	* The page in question must not be on any pageout queues.
	754	* The object to which it belongs must be locked.
	755	* The page must be busy, but not hold a paging reference.
	756	*
	757	* Implementation:
	758	* Move this page to a completely new object.
	759	*/
	760	void
	761	vm_pageout_initialize_page(
	762	vm_page_t m)
	763	{
	764	vm_object_t object;
	765	vm_object_offset_t paging_offset;
	766	memory_object_t pager;
	767
	768	XPR(XPR_VM_PAGEOUT,
	769	"vm_pageout_initialize_page, page 0x%X\n",
	770	m, 0, 0, 0, 0);
	771	assert(m->busy);
	772
	773	/*
	774	* Verify that we really want to clean this page
	775	*/
	776	assert(!m->absent);
	777	assert(!m->error);
	778	assert(m->dirty);
	779
	780	/*
	781	* Create a paging reference to let us play with the object.
	782	*/
	783	object = m->object;
	784	paging_offset = m->offset + object->paging_offset;
	785
	786	if (m->absent \|\| m->error \|\| m->restart \|\| (!m->dirty && !m->precious)) {
	787	VM_PAGE_FREE(m);
	788	panic("reservation without pageout?"); /* alan */
	789	vm_object_unlock(object);
	790
	791	return;
	792	}
	793
	794	/*
	795	* If there's no pager, then we can't clean the page. This should
	796	* never happen since this should be a copy object and therefore not
	797	* an external object, so the pager should always be there.
	798	*/
	799
	800	pager = object->pager;
	801
	802	if (pager == MEMORY_OBJECT_NULL) {
	803	VM_PAGE_FREE(m);
	804	panic("missing pager for copy object");
	805	return;
	806	}
	807
	808	/*
	809	* set the page for future call to vm_fault_list_request
	810	*/
	811	pmap_clear_modify(m->phys_page);
	812	SET_PAGE_DIRTY(m, FALSE);
	813	m->pageout = TRUE;
	814
	815	/*
	816	* keep the object from collapsing or terminating
	817	*/
	818	vm_object_paging_begin(object);
	819	vm_object_unlock(object);
	820
	821	/*
	822	* Write the data to its pager.
	823	* Note that the data is passed by naming the new object,
	824	* not a virtual address; the pager interface has been
	825	* manipulated to use the "internal memory" data type.
	826	* [The object reference from its allocation is donated
	827	* to the eventual recipient.]
	828	*/
	829	memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
	830
	831	vm_object_lock(object);
	832	vm_object_paging_end(object);
	833	}
	834
	835	#if MACH_CLUSTER_STATS
	836	#define MAXCLUSTERPAGES 16
	837	struct {
	838	unsigned long pages_in_cluster;
	839	unsigned long pages_at_higher_offsets;
	840	unsigned long pages_at_lower_offsets;
	841	} cluster_stats[MAXCLUSTERPAGES];
	842	#endif /* MACH_CLUSTER_STATS */
	843
	844
	845	/*
	846	* vm_pageout_cluster:
	847	*
	848	* Given a page, queue it to the appropriate I/O thread,
	849	* which will page it out and attempt to clean adjacent pages
	850	* in the same operation.
	851	*
	852	* The object and queues must be locked. We will take a
	853	* paging reference to prevent deallocation or collapse when we
	854	* release the object lock back at the call site. The I/O thread
	855	* is responsible for consuming this reference
	856	*
	857	* The page must not be on any pageout queue.
	858	*/
	859
	860	int
	861	vm_pageout_cluster(vm_page_t m, boolean_t pageout, boolean_t immediate_ok, boolean_t keep_object_locked)
	862	{
	863	vm_object_t object = m->object;
	864	struct vm_pageout_queue *q;
	865
	866
	867	XPR(XPR_VM_PAGEOUT,
	868	"vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
	869	object, m->offset, m, 0, 0);
	870
	871	VM_PAGE_CHECK(m);
	872	#if DEBUG
	873	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	874	#endif
	875	vm_object_lock_assert_exclusive(object);
	876
	877	/*
	878	* Only a certain kind of page is appreciated here.
	879	*/
	880	assert((m->dirty \|\| m->precious) && (!VM_PAGE_WIRED(m)));
	881	assert(!m->cleaning && !m->pageout && !m->laundry);
	882	#ifndef CONFIG_FREEZE
	883	assert(!m->inactive && !m->active);
	884	assert(!m->throttled);
	885	#endif
	886
	887	/*
	888	* protect the object from collapse or termination
	889	*/
	890	vm_object_activity_begin(object);
	891
	892	m->pageout = pageout;
	893
	894	if (object->internal == TRUE) {
	895	if (COMPRESSED_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
	896	m->busy = TRUE;
	897
	898	if (vm_compressor_immediate_preferred == TRUE && immediate_ok == TRUE) {
	899	if (keep_object_locked == FALSE)
	900	vm_object_unlock(object);
	901	vm_page_unlock_queues();
	902
	903	vm_pageout_immediate(m, keep_object_locked);
	904
	905	return (1);
	906	}
	907	}
	908	q = &vm_pageout_queue_internal;
	909	} else
	910	q = &vm_pageout_queue_external;
	911
	912	/*
	913	* pgo_laundry count is tied to the laundry bit
	914	*/
	915	m->laundry = TRUE;
	916	q->pgo_laundry++;
	917
	918	m->pageout_queue = TRUE;
	919	queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
	920
	921	if (q->pgo_idle == TRUE) {
	922	q->pgo_idle = FALSE;
	923	thread_wakeup((event_t) &q->pgo_pending);
	924	}
	925	VM_PAGE_CHECK(m);
	926
	927	return (0);
	928	}
	929
	930
	931	unsigned long vm_pageout_throttle_up_count = 0;
	932
	933	/*
	934	* A page is back from laundry or we are stealing it back from
	935	* the laundering state. See if there are some pages waiting to
	936	* go to laundry and if we can let some of them go now.
	937	*
	938	* Object and page queues must be locked.
	939	*/
	940	void
	941	vm_pageout_throttle_up(
	942	vm_page_t m)
	943	{
	944	struct vm_pageout_queue *q;
	945
	946	assert(m->object != VM_OBJECT_NULL);
	947	assert(m->object != kernel_object);
	948
	949	#if DEBUG
	950	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	951	vm_object_lock_assert_exclusive(m->object);
	952	#endif
	953
	954	vm_pageout_throttle_up_count++;
	955
	956	if (m->object->internal == TRUE)
	957	q = &vm_pageout_queue_internal;
	958	else
	959	q = &vm_pageout_queue_external;
	960
	961	if (m->pageout_queue == TRUE) {
	962
	963	queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
	964	m->pageout_queue = FALSE;
	965
	966	m->pageq.next = NULL;
	967	m->pageq.prev = NULL;
	968
	969	vm_object_activity_end(m->object);
	970	}
	971	if (m->laundry == TRUE) {
	972
	973	m->laundry = FALSE;
	974	q->pgo_laundry--;
	975
	976	if (q->pgo_throttled == TRUE) {
	977	q->pgo_throttled = FALSE;
	978	thread_wakeup((event_t) &q->pgo_laundry);
	979	}
	980	if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
	981	q->pgo_draining = FALSE;
	982	thread_wakeup((event_t) (&q->pgo_laundry+1));
	983	}
	984	}
	985	}
	986
	987
	988	static void
	989	vm_pageout_throttle_up_batch(
	990	struct vm_pageout_queue *q,
	991	int batch_cnt)
	992	{
	993	#if DEBUG
	994	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	995	#endif
	996
	997	vm_pageout_throttle_up_count += batch_cnt;
	998
	999	q->pgo_laundry -= batch_cnt;
	1000
	1001	if (q->pgo_throttled == TRUE) {
	1002	q->pgo_throttled = FALSE;
	1003	thread_wakeup((event_t) &q->pgo_laundry);
	1004	}
	1005	if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
	1006	q->pgo_draining = FALSE;
	1007	thread_wakeup((event_t) (&q->pgo_laundry+1));
	1008	}
	1009	}
	1010
	1011
	1012
	1013	/*
	1014	* VM memory pressure monitoring.
	1015	*
	1016	* vm_pageout_scan() keeps track of the number of pages it considers and
	1017	* reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
	1018	*
	1019	* compute_memory_pressure() is called every second from compute_averages()
	1020	* and moves "vm_pageout_stat_now" forward, to start accumulating the number
	1021	* of recalimed pages in a new vm_pageout_stat[] bucket.
	1022	*
	1023	* mach_vm_pressure_monitor() collects past statistics about memory pressure.
	1024	* The caller provides the number of seconds ("nsecs") worth of statistics
	1025	* it wants, up to 30 seconds.
	1026	* It computes the number of pages reclaimed in the past "nsecs" seconds and
	1027	* also returns the number of pages the system still needs to reclaim at this
	1028	* moment in time.
	1029	*/
	1030	#define VM_PAGEOUT_STAT_SIZE 31
	1031	struct vm_pageout_stat {
	1032	unsigned int considered;
	1033	unsigned int reclaimed;
	1034	} vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0}, };
	1035	unsigned int vm_pageout_stat_now = 0;
	1036	unsigned int vm_memory_pressure = 0;
	1037
	1038	#define VM_PAGEOUT_STAT_BEFORE(i) \
	1039	(((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
	1040	#define VM_PAGEOUT_STAT_AFTER(i) \
	1041	(((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
	1042
	1043	#if VM_PAGE_BUCKETS_CHECK
	1044	int vm_page_buckets_check_interval = 10; /* in seconds */
	1045	#endif /* VM_PAGE_BUCKETS_CHECK */
	1046
	1047	/*
	1048	* Called from compute_averages().
	1049	*/
	1050	void
	1051	compute_memory_pressure(
	1052	__unused void *arg)
	1053	{
	1054	unsigned int vm_pageout_next;
	1055
	1056	#if VM_PAGE_BUCKETS_CHECK
	1057	/* check the consistency of VM page buckets at regular interval */
	1058	static int counter = 0;
	1059	if ((++counter % vm_page_buckets_check_interval) == 0) {
	1060	vm_page_buckets_check();
	1061	}
	1062	#endif /* VM_PAGE_BUCKETS_CHECK */
	1063
	1064	vm_memory_pressure =
	1065	vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].reclaimed;
	1066
	1067	commpage_set_memory_pressure( vm_memory_pressure );
	1068
	1069	/* move "now" forward */
	1070	vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
	1071	vm_pageout_stats[vm_pageout_next].considered = 0;
	1072	vm_pageout_stats[vm_pageout_next].reclaimed = 0;
	1073	vm_pageout_stat_now = vm_pageout_next;
	1074	}
	1075
	1076
	1077	/*
	1078	* IMPORTANT
	1079	* mach_vm_ctl_page_free_wanted() is called indirectly, via
	1080	* mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
	1081	* it must be safe in the restricted stackshot context. Locks and/or
	1082	* blocking are not allowable.
	1083	*/
	1084	unsigned int
	1085	mach_vm_ctl_page_free_wanted(void)
	1086	{
	1087	unsigned int page_free_target, page_free_count, page_free_wanted;
	1088
	1089	page_free_target = vm_page_free_target;
	1090	page_free_count = vm_page_free_count;
	1091	if (page_free_target > page_free_count) {
	1092	page_free_wanted = page_free_target - page_free_count;
	1093	} else {
	1094	page_free_wanted = 0;
	1095	}
	1096
	1097	return page_free_wanted;
	1098	}
	1099
	1100
	1101	/*
	1102	* IMPORTANT:
	1103	* mach_vm_pressure_monitor() is called when taking a stackshot, with
	1104	* wait_for_pressure FALSE, so that code path must remain safe in the
	1105	* restricted stackshot context. No blocking or locks are allowable.
	1106	* on that code path.
	1107	*/
	1108
	1109	kern_return_t
	1110	mach_vm_pressure_monitor(
	1111	boolean_t wait_for_pressure,
	1112	unsigned int nsecs_monitored,
	1113	unsigned int *pages_reclaimed_p,
	1114	unsigned int *pages_wanted_p)
	1115	{
	1116	wait_result_t wr;
	1117	unsigned int vm_pageout_then, vm_pageout_now;
	1118	unsigned int pages_reclaimed;
	1119
	1120	/*
	1121	* We don't take the vm_page_queue_lock here because we don't want
	1122	* vm_pressure_monitor() to get in the way of the vm_pageout_scan()
	1123	* thread when it's trying to reclaim memory. We don't need fully
	1124	* accurate monitoring anyway...
	1125	*/
	1126
	1127	if (wait_for_pressure) {
	1128	/* wait until there's memory pressure */
	1129	while (vm_page_free_count >= vm_page_free_target) {
	1130	wr = assert_wait((event_t) &vm_page_free_wanted,
	1131	THREAD_INTERRUPTIBLE);
	1132	if (wr == THREAD_WAITING) {
	1133	wr = thread_block(THREAD_CONTINUE_NULL);
	1134	}
	1135	if (wr == THREAD_INTERRUPTED) {
	1136	return KERN_ABORTED;
	1137	}
	1138	if (wr == THREAD_AWAKENED) {
	1139	/*
	1140	* The memory pressure might have already
	1141	* been relieved but let's not block again
	1142	* and let's report that there was memory
	1143	* pressure at some point.
	1144	*/
	1145	break;
	1146	}
	1147	}
	1148	}
	1149
	1150	/* provide the number of pages the system wants to reclaim */
	1151	if (pages_wanted_p != NULL) {
	1152	*pages_wanted_p = mach_vm_ctl_page_free_wanted();
	1153	}
	1154
	1155	if (pages_reclaimed_p == NULL) {
	1156	return KERN_SUCCESS;
	1157	}
	1158
	1159	/* provide number of pages reclaimed in the last "nsecs_monitored" */
	1160	do {
	1161	vm_pageout_now = vm_pageout_stat_now;
	1162	pages_reclaimed = 0;
	1163	for (vm_pageout_then =
	1164	VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
	1165	vm_pageout_then != vm_pageout_now &&
	1166	nsecs_monitored-- != 0;
	1167	vm_pageout_then =
	1168	VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
	1169	pages_reclaimed += vm_pageout_stats[vm_pageout_then].reclaimed;
	1170	}
	1171	} while (vm_pageout_now != vm_pageout_stat_now);
	1172	*pages_reclaimed_p = pages_reclaimed;
	1173
	1174	return KERN_SUCCESS;
	1175	}
	1176
	1177
	1178
	1179	static void
	1180	vm_pageout_page_queue(queue_head_t *, int);
	1181
	1182	/*
	1183	* condition variable used to make sure there is
	1184	* only a single sweep going on at a time
	1185	*/
	1186	boolean_t vm_pageout_anonymous_pages_active = FALSE;
	1187
	1188
	1189	void
	1190	vm_pageout_anonymous_pages()
	1191	{
	1192	if (COMPRESSED_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
	1193
	1194	vm_page_lock_queues();
	1195
	1196	if (vm_pageout_anonymous_pages_active == TRUE) {
	1197	vm_page_unlock_queues();
	1198	return;
	1199	}
	1200	vm_pageout_anonymous_pages_active = TRUE;
	1201	vm_page_unlock_queues();
	1202
	1203	vm_pageout_page_queue(&vm_page_queue_throttled, vm_page_throttled_count);
	1204	vm_pageout_page_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
	1205	vm_pageout_page_queue(&vm_page_queue_active, vm_page_active_count);
	1206
	1207	vm_consider_swapping();
	1208
	1209	vm_page_lock_queues();
	1210	vm_pageout_anonymous_pages_active = FALSE;
	1211	vm_page_unlock_queues();
	1212	}
	1213	}
	1214
	1215
	1216	void
	1217	vm_pageout_page_queue(queue_head_t *q, int qcount)
	1218	{
	1219	vm_page_t m;
	1220	vm_object_t t_object = NULL;
	1221	vm_object_t l_object = NULL;
	1222	vm_object_t m_object = NULL;
	1223	int delayed_unlock = 0;
	1224	int try_failed_count = 0;
	1225	int refmod_state;
	1226	int pmap_options;
	1227	struct vm_pageout_queue *iq;
	1228
	1229
	1230	iq = &vm_pageout_queue_internal;
	1231
	1232	vm_page_lock_queues();
	1233
	1234	while (qcount && !queue_empty(q)) {
	1235
	1236	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	1237
	1238	if (VM_PAGE_Q_THROTTLED(iq)) {
	1239
	1240	if (l_object != NULL) {
	1241	vm_object_unlock(l_object);
	1242	l_object = NULL;
	1243	}
	1244	iq->pgo_draining = TRUE;
	1245
	1246	assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE);
	1247	vm_page_unlock_queues();
	1248
	1249	thread_block(THREAD_CONTINUE_NULL);
	1250
	1251	vm_page_lock_queues();
	1252	delayed_unlock = 0;
	1253	continue;
	1254	}
	1255	m = (vm_page_t) queue_first(q);
	1256	m_object = m->object;
	1257
	1258	/*
	1259	* check to see if we currently are working
	1260	* with the same object... if so, we've
	1261	* already got the lock
	1262	*/
	1263	if (m_object != l_object) {
	1264	if ( !m_object->internal)
	1265	goto reenter_pg_on_q;
	1266
	1267	/*
	1268	* the object associated with candidate page is
	1269	* different from the one we were just working
	1270	* with... dump the lock if we still own it
	1271	*/
	1272	if (l_object != NULL) {
	1273	vm_object_unlock(l_object);
	1274	l_object = NULL;
	1275	}
	1276	if (m_object != t_object)
	1277	try_failed_count = 0;
	1278
	1279	/*
	1280	* Try to lock object; since we've alread got the
	1281	* page queues lock, we can only 'try' for this one.
	1282	* if the 'try' fails, we need to do a mutex_pause
	1283	* to allow the owner of the object lock a chance to
	1284	* run...
	1285	*/
	1286	if ( !vm_object_lock_try_scan(m_object)) {
	1287
	1288	if (try_failed_count > 20) {
	1289	goto reenter_pg_on_q;
	1290	}
	1291	vm_page_unlock_queues();
	1292	mutex_pause(try_failed_count++);
	1293	vm_page_lock_queues();
	1294	delayed_unlock = 0;
	1295
	1296	t_object = m_object;
	1297	continue;
	1298	}
	1299	l_object = m_object;
	1300	}
	1301	if ( !m_object->alive \|\| m->encrypted_cleaning \|\| m->cleaning \|\| m->laundry \|\| m->busy \|\| m->absent \|\| m->error \|\| m->pageout) {
	1302	/*
	1303	* page is not to be cleaned
	1304	* put it back on the head of its queue
	1305	*/
	1306	goto reenter_pg_on_q;
	1307	}
	1308	if (m->reference == FALSE && m->pmapped == TRUE) {
	1309	refmod_state = pmap_get_refmod(m->phys_page);
	1310
	1311	if (refmod_state & VM_MEM_REFERENCED)
	1312	m->reference = TRUE;
	1313	if (refmod_state & VM_MEM_MODIFIED) {
	1314	SET_PAGE_DIRTY(m, FALSE);
	1315	}
	1316	}
	1317	if (m->reference == TRUE) {
	1318	m->reference = FALSE;
	1319	pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
	1320	goto reenter_pg_on_q;
	1321	}
	1322	if (m->pmapped == TRUE) {
	1323	if (m->dirty \|\| m->precious) {
	1324	pmap_options = PMAP_OPTIONS_COMPRESSOR;
	1325	} else {
	1326	pmap_options = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
	1327	}
	1328	refmod_state = pmap_disconnect_options(m->phys_page, pmap_options, NULL);
	1329	if (refmod_state & VM_MEM_MODIFIED) {
	1330	SET_PAGE_DIRTY(m, FALSE);
	1331	}
	1332	}
	1333	if ( !m->dirty && !m->precious) {
	1334	vm_page_unlock_queues();
	1335	VM_PAGE_FREE(m);
	1336	vm_page_lock_queues();
	1337	delayed_unlock = 0;
	1338
	1339	goto next_pg;
	1340	}
	1341	if (!m_object->pager_initialized \|\| m_object->pager == MEMORY_OBJECT_NULL) {
	1342
	1343	if (!m_object->pager_initialized) {
	1344
	1345	vm_page_unlock_queues();
	1346
	1347	vm_object_collapse(m_object, (vm_object_offset_t) 0, TRUE);
	1348
	1349	if (!m_object->pager_initialized)
	1350	vm_object_compressor_pager_create(m_object);
	1351
	1352	vm_page_lock_queues();
	1353	delayed_unlock = 0;
	1354	}
	1355	if (!m_object->pager_initialized \|\| m_object->pager == MEMORY_OBJECT_NULL)
	1356	goto reenter_pg_on_q;
	1357	/*
	1358	* vm_object_compressor_pager_create will drop the object lock
	1359	* which means 'm' may no longer be valid to use
	1360	*/
	1361	continue;
	1362	}
	1363	/*
	1364	* we've already factored out pages in the laundry which
	1365	* means this page can't be on the pageout queue so it's
	1366	* safe to do the vm_page_queues_remove
	1367	*/
	1368	assert(!m->pageout_queue);
	1369
	1370	vm_page_queues_remove(m);
	1371
	1372	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
	1373
	1374	vm_pageout_cluster(m, TRUE, FALSE, FALSE);
	1375
	1376	goto next_pg;
	1377
	1378	reenter_pg_on_q:
	1379	queue_remove(q, m, vm_page_t, pageq);
	1380	queue_enter(q, m, vm_page_t, pageq);
	1381	next_pg:
	1382	qcount--;
	1383	try_failed_count = 0;
	1384
	1385	if (delayed_unlock++ > 128) {
	1386
	1387	if (l_object != NULL) {
	1388	vm_object_unlock(l_object);
	1389	l_object = NULL;
	1390	}
	1391	lck_mtx_yield(&vm_page_queue_lock);
	1392	delayed_unlock = 0;
	1393	}
	1394	}
	1395	if (l_object != NULL) {
	1396	vm_object_unlock(l_object);
	1397	l_object = NULL;
	1398	}
	1399	vm_page_unlock_queues();
	1400	}
	1401
	1402
	1403
	1404	/*
	1405	* function in BSD to apply I/O throttle to the pageout thread
	1406	*/
	1407	extern void vm_pageout_io_throttle(void);
	1408
	1409	/*
	1410	* Page States: Used below to maintain the page state
	1411	* before it's removed from it's Q. This saved state
	1412	* helps us do the right accounting in certain cases
	1413	*/
	1414	#define PAGE_STATE_SPECULATIVE 1
	1415	#define PAGE_STATE_ANONYMOUS 2
	1416	#define PAGE_STATE_INACTIVE 3
	1417	#define PAGE_STATE_INACTIVE_FIRST 4
	1418	#define PAGE_STATE_CLEAN 5
	1419
	1420
	1421	#define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m) \
	1422	MACRO_BEGIN \
	1423	/* \
	1424	* If a "reusable" page somehow made it back into \
	1425	* the active queue, it's been re-used and is not \
	1426	* quite re-usable. \
	1427	* If the VM object was "all_reusable", consider it \
	1428	* as "all re-used" instead of converting it to \
	1429	* "partially re-used", which could be expensive. \
	1430	*/ \
	1431	if ((m)->reusable \|\| \
	1432	(m)->object->all_reusable) { \
	1433	vm_object_reuse_pages((m)->object, \
	1434	(m)->offset, \
	1435	(m)->offset + PAGE_SIZE_64, \
	1436	FALSE); \
	1437	} \
	1438	MACRO_END
	1439
	1440
	1441	#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 64
	1442	#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024
	1443
	1444	#define FCS_IDLE 0
	1445	#define FCS_DELAYED 1
	1446	#define FCS_DEADLOCK_DETECTED 2
	1447
	1448	struct flow_control {
	1449	int state;
	1450	mach_timespec_t ts;
	1451	};
	1452
	1453	uint32_t vm_pageout_considered_page = 0;
	1454	uint32_t vm_page_filecache_min = 0;
	1455
	1456	#define ANONS_GRABBED_LIMIT 2
	1457
	1458	/*
	1459	* vm_pageout_scan does the dirty work for the pageout daemon.
	1460	* It returns with both vm_page_queue_free_lock and vm_page_queue_lock
	1461	* held and vm_page_free_wanted == 0.
	1462	*/
	1463	void
	1464	vm_pageout_scan(void)
	1465	{
	1466	unsigned int loop_count = 0;
	1467	unsigned int inactive_burst_count = 0;
	1468	unsigned int active_burst_count = 0;
	1469	unsigned int reactivated_this_call;
	1470	unsigned int reactivate_limit;
	1471	vm_page_t local_freeq = NULL;
	1472	int local_freed = 0;
	1473	int delayed_unlock;
	1474	int delayed_unlock_limit = 0;
	1475	int refmod_state = 0;
	1476	int vm_pageout_deadlock_target = 0;
	1477	struct vm_pageout_queue *iq;
	1478	struct vm_pageout_queue *eq;
	1479	struct vm_speculative_age_q *sq;
	1480	struct flow_control flow_control = { 0, { 0, 0 } };
	1481	boolean_t inactive_throttled = FALSE;
	1482	boolean_t try_failed;
	1483	mach_timespec_t ts;
	1484	unsigned int msecs = 0;
	1485	vm_object_t object;
	1486	vm_object_t last_object_tried;
	1487	uint32_t catch_up_count = 0;
	1488	uint32_t inactive_reclaim_run;
	1489	boolean_t forced_reclaim;
	1490	boolean_t exceeded_burst_throttle;
	1491	boolean_t grab_anonymous = FALSE;
	1492	boolean_t force_anonymous = FALSE;
	1493	int anons_grabbed = 0;
	1494	int page_prev_state = 0;
	1495	int cache_evict_throttle = 0;
	1496	uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0;
	1497	int force_purge = 0;
	1498	#define DELAY_SPECULATIVE_AGE 1000
	1499	int delay_speculative_age = 0;
	1500
	1501	#if VM_PRESSURE_EVENTS
	1502	vm_pressure_level_t pressure_level;
	1503	#endif /* VM_PRESSURE_EVENTS */
	1504
	1505	VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
	1506	vm_pageout_speculative_clean, vm_pageout_inactive_clean,
	1507	vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
	1508
	1509	flow_control.state = FCS_IDLE;
	1510	iq = &vm_pageout_queue_internal;
	1511	eq = &vm_pageout_queue_external;
	1512	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
	1513
	1514
	1515	XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
	1516
	1517
	1518	vm_page_lock_queues();
	1519	delayed_unlock = 1; /* must be nonzero if Qs are locked, 0 if unlocked */
	1520
	1521	/*
	1522	* Calculate the max number of referenced pages on the inactive
	1523	* queue that we will reactivate.
	1524	*/
	1525	reactivated_this_call = 0;
	1526	reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
	1527	vm_page_inactive_count);
	1528	inactive_reclaim_run = 0;
	1529
	1530	vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
	1531
	1532	/*
	1533	* We want to gradually dribble pages from the active queue
	1534	* to the inactive queue. If we let the inactive queue get
	1535	* very small, and then suddenly dump many pages into it,
	1536	* those pages won't get a sufficient chance to be referenced
	1537	* before we start taking them from the inactive queue.
	1538	*
	1539	* We must limit the rate at which we send pages to the pagers
	1540	* so that we don't tie up too many pages in the I/O queues.
	1541	* We implement a throttling mechanism using the laundry count
	1542	* to limit the number of pages outstanding to the default
	1543	* and external pagers. We can bypass the throttles and look
	1544	* for clean pages if the pageout queues don't drain in a timely
	1545	* fashion since this may indicate that the pageout paths are
	1546	* stalled waiting for memory, which only we can provide.
	1547	*/
	1548
	1549
	1550	Restart:
	1551	assert(delayed_unlock!=0);
	1552
	1553	/*
	1554	* Recalculate vm_page_inactivate_target.
	1555	*/
	1556	vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
	1557	vm_page_inactive_count +
	1558	vm_page_speculative_count);
	1559
	1560	vm_page_anonymous_min = vm_page_inactive_target / 20;
	1561
	1562
	1563	/*
	1564	* don't want to wake the pageout_scan thread up everytime we fall below
	1565	* the targets... set a low water mark at 0.25% below the target
	1566	*/
	1567	vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400);
	1568
	1569	if (vm_page_speculative_percentage > 50)
	1570	vm_page_speculative_percentage = 50;
	1571	else if (vm_page_speculative_percentage <= 0)
	1572	vm_page_speculative_percentage = 1;
	1573
	1574	vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
	1575	vm_page_inactive_count);
	1576
	1577	object = NULL;
	1578	last_object_tried = NULL;
	1579	try_failed = FALSE;
	1580
	1581	if ((vm_page_inactive_count + vm_page_speculative_count) < VM_PAGE_INACTIVE_HEALTHY_LIMIT(vm_page_active_count))
	1582	catch_up_count = vm_page_inactive_count + vm_page_speculative_count;
	1583	else
	1584	catch_up_count = 0;
	1585
	1586	for (;;) {
	1587	vm_page_t m;
	1588
	1589	DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
	1590
	1591	assert(delayed_unlock);
	1592
	1593	if (vm_upl_wait_for_pages < 0)
	1594	vm_upl_wait_for_pages = 0;
	1595
	1596	delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
	1597
	1598	if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX)
	1599	delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
	1600
	1601	/*
	1602	* Move pages from active to inactive if we're below the target
	1603	*/
	1604	/* if we are trying to make clean, we need to make sure we actually have inactive - mj */
	1605	if ((vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target)
	1606	goto done_moving_active_pages;
	1607
	1608	if (object != NULL) {
	1609	vm_object_unlock(object);
	1610	object = NULL;
	1611	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	1612	}
	1613	/*
	1614	* Don't sweep through active queue more than the throttle
	1615	* which should be kept relatively low
	1616	*/
	1617	active_burst_count = MIN(vm_pageout_burst_active_throttle, vm_page_active_count);
	1618
	1619	VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_START,
	1620	vm_pageout_inactive, vm_pageout_inactive_used, vm_page_free_count, local_freed);
	1621
	1622	VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_NONE,
	1623	vm_pageout_speculative_clean, vm_pageout_inactive_clean,
	1624	vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
	1625	memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_START);
	1626
	1627
	1628	while (!queue_empty(&vm_page_queue_active) && active_burst_count--) {
	1629
	1630	vm_pageout_active++;
	1631
	1632	m = (vm_page_t) queue_first(&vm_page_queue_active);
	1633
	1634	assert(m->active && !m->inactive);
	1635	assert(!m->laundry);
	1636	assert(m->object != kernel_object);
	1637	assert(m->phys_page != vm_page_guard_addr);
	1638
	1639	DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
	1640
	1641	/*
	1642	* by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
	1643	*
	1644	* a TLB flush isn't really needed here since at worst we'll miss the reference bit being
	1645	* updated in the PTE if a remote processor still has this mapping cached in its TLB when the
	1646	* new reference happens. If no futher references happen on the page after that remote TLB flushes
	1647	* we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
	1648	* by pageout_scan, which is just fine since the last reference would have happened quite far
	1649	* in the past (TLB caches don't hang around for very long), and of course could just as easily
	1650	* have happened before we moved the page
	1651	*/
	1652	pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
	1653
	1654	/*
	1655	* The page might be absent or busy,
	1656	* but vm_page_deactivate can handle that.
	1657	* FALSE indicates that we don't want a H/W clear reference
	1658	*/
	1659	vm_page_deactivate_internal(m, FALSE);
	1660
	1661	if (delayed_unlock++ > delayed_unlock_limit) {
	1662
	1663	if (local_freeq) {
	1664	vm_page_unlock_queues();
	1665
	1666	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
	1667	vm_page_free_count, local_freed, delayed_unlock_limit, 1);
	1668
	1669	vm_page_free_list(local_freeq, TRUE);
	1670
	1671	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
	1672	vm_page_free_count, 0, 0, 1);
	1673
	1674	local_freeq = NULL;
	1675	local_freed = 0;
	1676	vm_page_lock_queues();
	1677	} else {
	1678	lck_mtx_yield(&vm_page_queue_lock);
	1679	}
	1680
	1681	delayed_unlock = 1;
	1682
	1683	/*
	1684	* continue the while loop processing
	1685	* the active queue... need to hold
	1686	* the page queues lock
	1687	*/
	1688	}
	1689	}
	1690
	1691	VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_END,
	1692	vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count, vm_page_inactive_target);
	1693	memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_END);
	1694
	1695	/**********************************************************************
	1696	* above this point we're playing with the active queue
	1697	* below this point we're playing with the throttling mechanisms
	1698	* and the inactive queue
	1699	**********************************************************************/
	1700
	1701	done_moving_active_pages:
	1702
	1703	if (vm_page_free_count + local_freed >= vm_page_free_target) {
	1704	if (object != NULL) {
	1705	vm_object_unlock(object);
	1706	object = NULL;
	1707	}
	1708	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	1709
	1710	vm_page_unlock_queues();
	1711
	1712	if (local_freeq) {
	1713
	1714	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
	1715	vm_page_free_count, local_freed, delayed_unlock_limit, 2);
	1716
	1717	vm_page_free_list(local_freeq, TRUE);
	1718
	1719	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
	1720	vm_page_free_count, local_freed, 0, 2);
	1721
	1722	local_freeq = NULL;
	1723	local_freed = 0;
	1724	}
	1725	vm_consider_waking_compactor_swapper();
	1726
	1727	vm_page_lock_queues();
	1728
	1729	/*
	1730	* make sure the pageout I/O threads are running
	1731	* throttled in case there are still requests
	1732	* in the laundry... since we have met our targets
	1733	* we don't need the laundry to be cleaned in a timely
	1734	* fashion... so let's avoid interfering with foreground
	1735	* activity
	1736	*/
	1737	vm_pageout_adjust_io_throttles(iq, eq, TRUE);
	1738
	1739	/*
	1740	* recalculate vm_page_inactivate_target
	1741	*/
	1742	vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
	1743	vm_page_inactive_count +
	1744	vm_page_speculative_count);
	1745	if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) &&
	1746	!queue_empty(&vm_page_queue_active)) {
	1747	/*
	1748	* inactive target still not met... keep going
	1749	* until we get the queues balanced...
	1750	*/
	1751	continue;
	1752	}
	1753	lck_mtx_lock(&vm_page_queue_free_lock);
	1754
	1755	if ((vm_page_free_count >= vm_page_free_target) &&
	1756	(vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
	1757	/*
	1758	* done - we have met our target and
	1759	* there is no one waiting for a page.
	1760	*/
	1761	return_from_scan:
	1762	assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
	1763
	1764	VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
	1765	vm_pageout_inactive, vm_pageout_inactive_used, 0, 0);
	1766	VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
	1767	vm_pageout_speculative_clean, vm_pageout_inactive_clean,
	1768	vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
	1769
	1770	return;
	1771	}
	1772	lck_mtx_unlock(&vm_page_queue_free_lock);
	1773	}
	1774
	1775	/*
	1776	* Before anything, we check if we have any ripe volatile
	1777	* objects around. If so, try to purge the first object.
	1778	* If the purge fails, fall through to reclaim a page instead.
	1779	* If the purge succeeds, go back to the top and reevalute
	1780	* the new memory situation.
	1781	*/
	1782
	1783	assert (available_for_purge>=0);
	1784	force_purge = 0; /* no force-purging */
	1785
	1786	#if VM_PRESSURE_EVENTS
	1787	pressure_level = memorystatus_vm_pressure_level;
	1788
	1789	if (pressure_level > kVMPressureNormal) {
	1790
	1791	if (pressure_level >= kVMPressureCritical) {
	1792	force_purge = memorystatus_purge_on_critical;
	1793	} else if (pressure_level >= kVMPressureUrgent) {
	1794	force_purge = memorystatus_purge_on_urgent;
	1795	} else if (pressure_level >= kVMPressureWarning) {
	1796	force_purge = memorystatus_purge_on_warning;
	1797	}
	1798	}
	1799	#endif /* VM_PRESSURE_EVENTS */
	1800
	1801	if (available_for_purge \|\| force_purge) {
	1802
	1803	if (object != NULL) {
	1804	vm_object_unlock(object);
	1805	object = NULL;
	1806	}
	1807
	1808	memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
	1809
	1810	VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
	1811	if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
	1812	vm_pageout_purged_objects++;
	1813	VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
	1814	memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
	1815	continue;
	1816	}
	1817	VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
	1818	memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
	1819	}
	1820
	1821	if (queue_empty(&sq->age_q) && vm_page_speculative_count) {
	1822	/*
	1823	* try to pull pages from the aging bins...
	1824	* see vm_page.h for an explanation of how
	1825	* this mechanism works
	1826	*/
	1827	struct vm_speculative_age_q *aq;
	1828	boolean_t can_steal = FALSE;
	1829	int num_scanned_queues;
	1830
	1831	aq = &vm_page_queue_speculative[speculative_steal_index];
	1832
	1833	num_scanned_queues = 0;
	1834	while (queue_empty(&aq->age_q) &&
	1835	num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
	1836
	1837	speculative_steal_index++;
	1838
	1839	if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
	1840	speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
	1841
	1842	aq = &vm_page_queue_speculative[speculative_steal_index];
	1843	}
	1844
	1845	if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
	1846	/*
	1847	* XXX We've scanned all the speculative
	1848	* queues but still haven't found one
	1849	* that is not empty, even though
	1850	* vm_page_speculative_count is not 0.
	1851	*
	1852	* report the anomaly...
	1853	*/
	1854	printf("vm_pageout_scan: "
	1855	"all speculative queues empty "
	1856	"but count=%d. Re-adjusting.\n",
	1857	vm_page_speculative_count);
	1858	if (vm_page_speculative_count > vm_page_speculative_count_drift_max)
	1859	vm_page_speculative_count_drift_max = vm_page_speculative_count;
	1860	vm_page_speculative_count_drifts++;
	1861	#if 6553678
	1862	Debugger("vm_pageout_scan: no speculative pages");
	1863	#endif
	1864	/* readjust... */
	1865	vm_page_speculative_count = 0;
	1866	/* ... and continue */
	1867	continue;
	1868	}
	1869
	1870	if (vm_page_speculative_count > vm_page_speculative_target)
	1871	can_steal = TRUE;
	1872	else {
	1873	if (!delay_speculative_age) {
	1874	mach_timespec_t ts_fully_aged;
	1875
	1876	ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) / 1000;
	1877	ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) % 1000)
	1878	* 1000 * NSEC_PER_USEC;
	1879
	1880	ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
	1881
	1882	clock_sec_t sec;
	1883	clock_nsec_t nsec;
	1884	clock_get_system_nanotime(&sec, &nsec);
	1885	ts.tv_sec = (unsigned int) sec;
	1886	ts.tv_nsec = nsec;
	1887
	1888	if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
	1889	can_steal = TRUE;
	1890	else
	1891	delay_speculative_age++;
	1892	} else {
	1893	delay_speculative_age++;
	1894	if (delay_speculative_age == DELAY_SPECULATIVE_AGE)
	1895	delay_speculative_age = 0;
	1896	}
	1897	}
	1898	if (can_steal == TRUE)
	1899	vm_page_speculate_ageit(aq);
	1900	}
	1901	if (queue_empty(&sq->age_q) && cache_evict_throttle == 0) {
	1902	int pages_evicted;
	1903
	1904	if (object != NULL) {
	1905	vm_object_unlock(object);
	1906	object = NULL;
	1907	}
	1908	pages_evicted = vm_object_cache_evict(100, 10);
	1909
	1910	if (pages_evicted) {
	1911
	1912	vm_pageout_cache_evicted += pages_evicted;
	1913
	1914	VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
	1915	vm_page_free_count, pages_evicted, vm_pageout_cache_evicted, 0);
	1916	memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
	1917
	1918	/*
	1919	* we just freed up to 100 pages,
	1920	* so go back to the top of the main loop
	1921	* and re-evaulate the memory situation
	1922	*/
	1923	continue;
	1924	} else
	1925	cache_evict_throttle = 100;
	1926	}
	1927	if (cache_evict_throttle)
	1928	cache_evict_throttle--;
	1929
	1930	#if CONFIG_JETSAM
	1931	/*
	1932	* don't let the filecache_min fall below 15% of available memory
	1933	* on systems with an active compressor that isn't nearing its
	1934	* limits w/r to accepting new data
	1935	*
	1936	* on systems w/o the compressor/swapper, the filecache is always
	1937	* a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
	1938	* since most (if not all) of the anonymous pages are in the
	1939	* throttled queue (which isn't counted as available) which
	1940	* effectively disables this filter
	1941	*/
	1942	if (vm_compressor_low_on_space())
	1943	vm_page_filecache_min = 0;
	1944	else
	1945	vm_page_filecache_min = (AVAILABLE_NON_COMPRESSED_MEMORY / 7);
	1946	#else
	1947	/*
	1948	* don't let the filecache_min fall below 33% of available memory...
	1949	*/
	1950	vm_page_filecache_min = (AVAILABLE_NON_COMPRESSED_MEMORY / 3);
	1951	#endif
	1952
	1953	exceeded_burst_throttle = FALSE;
	1954	/*
	1955	* Sometimes we have to pause:
	1956	* 1) No inactive pages - nothing to do.
	1957	* 2) Loop control - no acceptable pages found on the inactive queue
	1958	* within the last vm_pageout_burst_inactive_throttle iterations
	1959	* 3) Flow control - default pageout queue is full
	1960	*/
	1961	if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_anonymous) && queue_empty(&sq->age_q)) {
	1962	vm_pageout_scan_empty_throttle++;
	1963	msecs = vm_pageout_empty_wait;
	1964	goto vm_pageout_scan_delay;
	1965
	1966	} else if (inactive_burst_count >=
	1967	MIN(vm_pageout_burst_inactive_throttle,
	1968	(vm_page_inactive_count +
	1969	vm_page_speculative_count))) {
	1970	vm_pageout_scan_burst_throttle++;
	1971	msecs = vm_pageout_burst_wait;
	1972
	1973	exceeded_burst_throttle = TRUE;
	1974	goto vm_pageout_scan_delay;
	1975
	1976	} else if (vm_page_free_count > (vm_page_free_reserved / 4) &&
	1977	VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
	1978	vm_pageout_scan_swap_throttle++;
	1979	msecs = vm_pageout_swap_wait;
	1980	goto vm_pageout_scan_delay;
	1981
	1982	} else if (VM_PAGE_Q_THROTTLED(iq) &&
	1983	VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) {
	1984	clock_sec_t sec;
	1985	clock_nsec_t nsec;
	1986
	1987	switch (flow_control.state) {
	1988
	1989	case FCS_IDLE:
	1990	if ((vm_page_free_count + local_freed) < vm_page_free_target) {
	1991
	1992	if (object != NULL) {
	1993	vm_object_unlock(object);
	1994	object = NULL;
	1995	}
	1996	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	1997
	1998	vm_page_unlock_queues();
	1999
	2000	if (local_freeq) {
	2001
	2002	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
	2003	vm_page_free_count, local_freed, delayed_unlock_limit, 3);
	2004
	2005	vm_page_free_list(local_freeq, TRUE);
	2006
	2007	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
	2008	vm_page_free_count, local_freed, 0, 3);
	2009
	2010	local_freeq = NULL;
	2011	local_freed = 0;
	2012	}
	2013	thread_yield_internal(1);
	2014
	2015	vm_page_lock_queues();
	2016
	2017	if (!VM_PAGE_Q_THROTTLED(iq)) {
	2018	vm_pageout_scan_yield_unthrottled++;
	2019	continue;
	2020	}
	2021	if (vm_page_pageable_external_count > vm_page_filecache_min && !queue_empty(&vm_page_queue_inactive)) {
	2022	anons_grabbed = ANONS_GRABBED_LIMIT;
	2023	vm_pageout_scan_throttle_deferred++;
	2024	goto consider_inactive;
	2025	}
	2026	if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && vm_page_active_count)
	2027	continue;
	2028	}
	2029	reset_deadlock_timer:
	2030	ts.tv_sec = vm_pageout_deadlock_wait / 1000;
	2031	ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
	2032	clock_get_system_nanotime(&sec, &nsec);
	2033	flow_control.ts.tv_sec = (unsigned int) sec;
	2034	flow_control.ts.tv_nsec = nsec;
	2035	ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
	2036
	2037	flow_control.state = FCS_DELAYED;
	2038	msecs = vm_pageout_deadlock_wait;
	2039
	2040	break;
	2041
	2042	case FCS_DELAYED:
	2043	clock_get_system_nanotime(&sec, &nsec);
	2044	ts.tv_sec = (unsigned int) sec;
	2045	ts.tv_nsec = nsec;
	2046
	2047	if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
	2048	/*
	2049	* the pageout thread for the default pager is potentially
	2050	* deadlocked since the
	2051	* default pager queue has been throttled for more than the
	2052	* allowable time... we need to move some clean pages or dirty
	2053	* pages belonging to the external pagers if they aren't throttled
	2054	* vm_page_free_wanted represents the number of threads currently
	2055	* blocked waiting for pages... we'll move one page for each of
	2056	* these plus a fixed amount to break the logjam... once we're done
	2057	* moving this number of pages, we'll re-enter the FSC_DELAYED state
	2058	* with a new timeout target since we have no way of knowing
	2059	* whether we've broken the deadlock except through observation
	2060	* of the queue associated with the default pager... we need to
	2061	* stop moving pages and allow the system to run to see what
	2062	* state it settles into.
	2063	*/
	2064	vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged;
	2065	vm_pageout_scan_deadlock_detected++;
	2066	flow_control.state = FCS_DEADLOCK_DETECTED;
	2067	thread_wakeup((event_t) &vm_pageout_garbage_collect);
	2068	goto consider_inactive;
	2069	}
	2070	/*
	2071	* just resniff instead of trying
	2072	* to compute a new delay time... we're going to be
	2073	* awakened immediately upon a laundry completion,
	2074	* so we won't wait any longer than necessary
	2075	*/
	2076	msecs = vm_pageout_idle_wait;
	2077	break;
	2078
	2079	case FCS_DEADLOCK_DETECTED:
	2080	if (vm_pageout_deadlock_target)
	2081	goto consider_inactive;
	2082	goto reset_deadlock_timer;
	2083
	2084	}
	2085	vm_pageout_scan_delay:
	2086	if (object != NULL) {
	2087	vm_object_unlock(object);
	2088	object = NULL;
	2089	}
	2090	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	2091
	2092	vm_page_unlock_queues();
	2093
	2094	if (local_freeq) {
	2095
	2096	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
	2097	vm_page_free_count, local_freed, delayed_unlock_limit, 3);
	2098
	2099	vm_page_free_list(local_freeq, TRUE);
	2100
	2101	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
	2102	vm_page_free_count, local_freed, 0, 3);
	2103
	2104	local_freeq = NULL;
	2105	local_freed = 0;
	2106	}
	2107	vm_consider_waking_compactor_swapper();
	2108
	2109	vm_page_lock_queues();
	2110
	2111	if (flow_control.state == FCS_DELAYED &&
	2112	!VM_PAGE_Q_THROTTLED(iq)) {
	2113	flow_control.state = FCS_IDLE;
	2114	goto consider_inactive;
	2115	}
	2116
	2117	if (vm_page_free_count >= vm_page_free_target) {
	2118	/*
	2119	* we're here because
	2120	* 1) someone else freed up some pages while we had
	2121	* the queues unlocked above
	2122	* and we've hit one of the 3 conditions that
	2123	* cause us to pause the pageout scan thread
	2124	*
	2125	* since we already have enough free pages,
	2126	* let's avoid stalling and return normally
	2127	*
	2128	* before we return, make sure the pageout I/O threads
	2129	* are running throttled in case there are still requests
	2130	* in the laundry... since we have enough free pages
	2131	* we don't need the laundry to be cleaned in a timely
	2132	* fashion... so let's avoid interfering with foreground
	2133	* activity
	2134	*
	2135	* we don't want to hold vm_page_queue_free_lock when
	2136	* calling vm_pageout_adjust_io_throttles (since it
	2137	* may cause other locks to be taken), we do the intitial
	2138	* check outside of the lock. Once we take the lock,
	2139	* we recheck the condition since it may have changed.
	2140	* if it has, no problem, we will make the threads
	2141	* non-throttled before actually blocking
	2142	*/
	2143	vm_pageout_adjust_io_throttles(iq, eq, TRUE);
	2144	}
	2145	lck_mtx_lock(&vm_page_queue_free_lock);
	2146
	2147	if (vm_page_free_count >= vm_page_free_target &&
	2148	(vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
	2149	goto return_from_scan;
	2150	}
	2151	lck_mtx_unlock(&vm_page_queue_free_lock);
	2152
	2153	if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
	2154	/*
	2155	* we're most likely about to block due to one of
	2156	* the 3 conditions that cause vm_pageout_scan to
	2157	* not be able to make forward progress w/r
	2158	* to providing new pages to the free queue,
	2159	* so unthrottle the I/O threads in case we
	2160	* have laundry to be cleaned... it needs
	2161	* to be completed ASAP.
	2162	*
	2163	* even if we don't block, we want the io threads
	2164	* running unthrottled since the sum of free +
	2165	* clean pages is still under our free target
	2166	*/
	2167	vm_pageout_adjust_io_throttles(iq, eq, FALSE);
	2168	}
	2169	if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
	2170	/*
	2171	* if we get here we're below our free target and
	2172	* we're stalling due to a full laundry queue or
	2173	* we don't have any inactive pages other then
	2174	* those in the clean queue...
	2175	* however, we have pages on the clean queue that
	2176	* can be moved to the free queue, so let's not
	2177	* stall the pageout scan
	2178	*/
	2179	flow_control.state = FCS_IDLE;
	2180	goto consider_inactive;
	2181	}
	2182	VM_CHECK_MEMORYSTATUS;
	2183
	2184	if (flow_control.state != FCS_IDLE)
	2185	vm_pageout_scan_throttle++;
	2186	iq->pgo_throttled = TRUE;
	2187
	2188	assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
	2189	counter(c_vm_pageout_scan_block++);
	2190
	2191	vm_page_unlock_queues();
	2192
	2193	assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
	2194
	2195	VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
	2196	iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
	2197	memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
	2198
	2199	thread_block(THREAD_CONTINUE_NULL);
	2200
	2201	VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
	2202	iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
	2203	memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
	2204
	2205	vm_page_lock_queues();
	2206	delayed_unlock = 1;
	2207
	2208	iq->pgo_throttled = FALSE;
	2209
	2210	if (loop_count >= vm_page_inactive_count)
	2211	loop_count = 0;
	2212	inactive_burst_count = 0;
	2213
	2214	goto Restart;
	2215	/NOTREACHED/
	2216	}
	2217
	2218
	2219	flow_control.state = FCS_IDLE;
	2220	consider_inactive:
	2221	vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count),
	2222	vm_pageout_inactive_external_forced_reactivate_limit);
	2223	loop_count++;
	2224	inactive_burst_count++;
	2225	vm_pageout_inactive++;
	2226
	2227
	2228	/*
	2229	* Choose a victim.
	2230	*/
	2231	while (1) {
	2232	uint32_t inactive_external_count;
	2233
	2234	m = NULL;
	2235
	2236	if (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) {
	2237	assert(vm_page_throttled_count == 0);
	2238	assert(queue_empty(&vm_page_queue_throttled));
	2239	}
	2240	/*
	2241	* The most eligible pages are ones we paged in speculatively,
	2242	* but which have not yet been touched.
	2243	*/
	2244	if (!queue_empty(&sq->age_q) && force_anonymous == FALSE) {
	2245	m = (vm_page_t) queue_first(&sq->age_q);
	2246
	2247	page_prev_state = PAGE_STATE_SPECULATIVE;
	2248
	2249	break;
	2250	}
	2251	/*
	2252	* Try a clean-queue inactive page.
	2253	*/
	2254	if (!queue_empty(&vm_page_queue_cleaned)) {
	2255	m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
	2256
	2257	page_prev_state = PAGE_STATE_CLEAN;
	2258
	2259	break;
	2260	}
	2261
	2262	grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
	2263	inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
	2264
	2265	if ((vm_page_pageable_external_count < vm_page_filecache_min \|\| force_anonymous == TRUE) \|\|
	2266	((inactive_external_count < vm_page_anonymous_count) && (inactive_external_count < (vm_page_pageable_external_count / 3)))) {
	2267	grab_anonymous = TRUE;
	2268	anons_grabbed = 0;
	2269	}
	2270
	2271	if (grab_anonymous == FALSE \|\| anons_grabbed >= ANONS_GRABBED_LIMIT \|\| queue_empty(&vm_page_queue_anonymous)) {
	2272
	2273	if ( !queue_empty(&vm_page_queue_inactive) ) {
	2274	m = (vm_page_t) queue_first(&vm_page_queue_inactive);
	2275
	2276	page_prev_state = PAGE_STATE_INACTIVE;
	2277	anons_grabbed = 0;
	2278
	2279	if (vm_page_pageable_external_count < vm_page_filecache_min) {
	2280	if ((++reactivated_this_call % 100))
	2281	goto must_activate_page;
	2282	/*
	2283	* steal 1% of the file backed pages even if
	2284	* we are under the limit that has been set
	2285	* for a healthy filecache
	2286	*/
	2287	}
	2288	break;
	2289	}
	2290	}
	2291	if ( !queue_empty(&vm_page_queue_anonymous) ) {
	2292	m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
	2293
	2294	page_prev_state = PAGE_STATE_ANONYMOUS;
	2295	anons_grabbed++;
	2296
	2297	break;
	2298	}
	2299
	2300	/*
	2301	* if we've gotten here, we have no victim page.
	2302	* if making clean, free the local freed list and return.
	2303	* if making free, check to see if we've finished balancing the queues
	2304	* yet, if we haven't just continue, else panic
	2305	*/
	2306	vm_page_unlock_queues();
	2307
	2308	if (object != NULL) {
	2309	vm_object_unlock(object);
	2310	object = NULL;
	2311	}
	2312	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	2313
	2314	if (local_freeq) {
	2315	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
	2316	vm_page_free_count, local_freed, delayed_unlock_limit, 5);
	2317
	2318	vm_page_free_list(local_freeq, TRUE);
	2319
	2320	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
	2321	vm_page_free_count, local_freed, 0, 5);
	2322
	2323	local_freeq = NULL;
	2324	local_freed = 0;
	2325	}
	2326	vm_page_lock_queues();
	2327	delayed_unlock = 1;
	2328
	2329	force_anonymous = FALSE;
	2330
	2331	if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target)
	2332	goto Restart;
	2333
	2334	if (!queue_empty(&sq->age_q))
	2335	goto Restart;
	2336
	2337	panic("vm_pageout: no victim");
	2338
	2339	/* NOTREACHED */
	2340	}
	2341	force_anonymous = FALSE;
	2342
	2343	/*
	2344	* we just found this page on one of our queues...
	2345	* it can't also be on the pageout queue, so safe
	2346	* to call vm_page_queues_remove
	2347	*/
	2348	assert(!m->pageout_queue);
	2349
	2350	vm_page_queues_remove(m);
	2351
	2352	assert(!m->laundry);
	2353	assert(!m->private);
	2354	assert(!m->fictitious);
	2355	assert(m->object != kernel_object);
	2356	assert(m->phys_page != vm_page_guard_addr);
	2357
	2358
	2359	if (page_prev_state != PAGE_STATE_SPECULATIVE)
	2360	vm_pageout_stats[vm_pageout_stat_now].considered++;
	2361
	2362	DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
	2363
	2364	/*
	2365	* check to see if we currently are working
	2366	* with the same object... if so, we've
	2367	* already got the lock
	2368	*/
	2369	if (m->object != object) {
	2370	/*
	2371	* the object associated with candidate page is
	2372	* different from the one we were just working
	2373	* with... dump the lock if we still own it
	2374	*/
	2375	if (object != NULL) {
	2376	vm_object_unlock(object);
	2377	object = NULL;
	2378	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	2379	}
	2380	/*
	2381	* Try to lock object; since we've alread got the
	2382	* page queues lock, we can only 'try' for this one.
	2383	* if the 'try' fails, we need to do a mutex_pause
	2384	* to allow the owner of the object lock a chance to
	2385	* run... otherwise, we're likely to trip over this
	2386	* object in the same state as we work our way through
	2387	* the queue... clumps of pages associated with the same
	2388	* object are fairly typical on the inactive and active queues
	2389	*/
	2390	if (!vm_object_lock_try_scan(m->object)) {
	2391	vm_page_t m_want = NULL;
	2392
	2393	vm_pageout_inactive_nolock++;
	2394
	2395	if (page_prev_state == PAGE_STATE_CLEAN)
	2396	vm_pageout_cleaned_nolock++;
	2397
	2398	if (page_prev_state == PAGE_STATE_SPECULATIVE)
	2399	page_prev_state = PAGE_STATE_INACTIVE_FIRST;
	2400
	2401	pmap_clear_reference(m->phys_page);
	2402	m->reference = FALSE;
	2403
	2404	/*
	2405	* m->object must be stable since we hold the page queues lock...
	2406	* we can update the scan_collisions field sans the object lock
	2407	* since it is a separate field and this is the only spot that does
	2408	* a read-modify-write operation and it is never executed concurrently...
	2409	* we can asynchronously set this field to 0 when creating a UPL, so it
	2410	* is possible for the value to be a bit non-determistic, but that's ok
	2411	* since it's only used as a hint
	2412	*/
	2413	m->object->scan_collisions = 1;
	2414
	2415	if ( !queue_empty(&sq->age_q) )
	2416	m_want = (vm_page_t) queue_first(&sq->age_q);
	2417	else if ( !queue_empty(&vm_page_queue_cleaned))
	2418	m_want = (vm_page_t) queue_first(&vm_page_queue_cleaned);
	2419	else if (anons_grabbed >= ANONS_GRABBED_LIMIT \|\| queue_empty(&vm_page_queue_anonymous))
	2420	m_want = (vm_page_t) queue_first(&vm_page_queue_inactive);
	2421	else if ( !queue_empty(&vm_page_queue_anonymous))
	2422	m_want = (vm_page_t) queue_first(&vm_page_queue_anonymous);
	2423
	2424	/*
	2425	* this is the next object we're going to be interested in
	2426	* try to make sure its available after the mutex_yield
	2427	* returns control
	2428	*/
	2429	if (m_want)
	2430	vm_pageout_scan_wants_object = m_want->object;
	2431
	2432	/*
	2433	* force us to dump any collected free pages
	2434	* and to pause before moving on
	2435	*/
	2436	try_failed = TRUE;
	2437
	2438	goto requeue_page;
	2439	}
	2440	object = m->object;
	2441	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	2442
	2443	try_failed = FALSE;
	2444	}
	2445	if (catch_up_count)
	2446	catch_up_count--;
	2447
	2448	if (m->busy) {
	2449	if (m->encrypted_cleaning) {
	2450	/*
	2451	* ENCRYPTED SWAP:
	2452	* if this page has already been picked up as
	2453	* part of a page-out cluster, it will be busy
	2454	* because it is being encrypted (see
	2455	* vm_object_upl_request()). But we still
	2456	* want to demote it from "clean-in-place"
	2457	* (aka "adjacent") to "clean-and-free" (aka
	2458	* "target"), so let's ignore its "busy" bit
	2459	* here and proceed to check for "cleaning" a
	2460	* little bit below...
	2461	*
	2462	* CAUTION CAUTION:
	2463	* A "busy" page should still be left alone for
	2464	* most purposes, so we have to be very careful
	2465	* not to process that page too much.
	2466	*/
	2467	assert(m->cleaning);
	2468	goto consider_inactive_page;
	2469	}
	2470
	2471	/*
	2472	* Somebody is already playing with this page.
	2473	* Put it back on the appropriate queue
	2474	*
	2475	*/
	2476	vm_pageout_inactive_busy++;
	2477
	2478	if (page_prev_state == PAGE_STATE_CLEAN)
	2479	vm_pageout_cleaned_busy++;
	2480
	2481	requeue_page:
	2482	switch (page_prev_state) {
	2483
	2484	case PAGE_STATE_SPECULATIVE:
	2485	case PAGE_STATE_ANONYMOUS:
	2486	case PAGE_STATE_CLEAN:
	2487	case PAGE_STATE_INACTIVE:
	2488	vm_page_enqueue_inactive(m, FALSE);
	2489	break;
	2490
	2491	case PAGE_STATE_INACTIVE_FIRST:
	2492	vm_page_enqueue_inactive(m, TRUE);
	2493	break;
	2494	}
	2495	goto done_with_inactivepage;
	2496	}
	2497
	2498
	2499	/*
	2500	* If it's absent, in error or the object is no longer alive,
	2501	* we can reclaim the page... in the no longer alive case,
	2502	* there are 2 states the page can be in that preclude us
	2503	* from reclaiming it - busy or cleaning - that we've already
	2504	* dealt with
	2505	*/
	2506	if (m->absent \|\| m->error \|\| !object->alive) {
	2507
	2508	if (m->absent)
	2509	vm_pageout_inactive_absent++;
	2510	else if (!object->alive)
	2511	vm_pageout_inactive_notalive++;
	2512	else
	2513	vm_pageout_inactive_error++;
	2514	reclaim_page:
	2515	if (vm_pageout_deadlock_target) {
	2516	vm_pageout_scan_inactive_throttle_success++;
	2517	vm_pageout_deadlock_target--;
	2518	}
	2519
	2520	DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
	2521
	2522	if (object->internal) {
	2523	DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
	2524	} else {
	2525	DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
	2526	}
	2527	assert(!m->cleaning);
	2528	assert(!m->laundry);
	2529
	2530	m->busy = TRUE;
	2531
	2532	/*
	2533	* remove page from object here since we're already
	2534	* behind the object lock... defer the rest of the work
	2535	* we'd normally do in vm_page_free_prepare_object
	2536	* until 'vm_page_free_list' is called
	2537	*/
	2538	if (m->tabled)
	2539	vm_page_remove(m, TRUE);
	2540
	2541	assert(m->pageq.next == NULL &&
	2542	m->pageq.prev == NULL);
	2543	m->pageq.next = (queue_entry_t)local_freeq;
	2544	local_freeq = m;
	2545	local_freed++;
	2546
	2547	if (page_prev_state == PAGE_STATE_SPECULATIVE)
	2548	vm_pageout_freed_from_speculative++;
	2549	else if (page_prev_state == PAGE_STATE_CLEAN)
	2550	vm_pageout_freed_from_cleaned++;
	2551	else
	2552	vm_pageout_freed_from_inactive_clean++;
	2553
	2554	if (page_prev_state != PAGE_STATE_SPECULATIVE)
	2555	vm_pageout_stats[vm_pageout_stat_now].reclaimed++;
	2556
	2557	inactive_burst_count = 0;
	2558	goto done_with_inactivepage;
	2559	}
	2560	/*
	2561	* If the object is empty, the page must be reclaimed even
	2562	* if dirty or used.
	2563	* If the page belongs to a volatile object, we stick it back
	2564	* on.
	2565	*/
	2566	if (object->copy == VM_OBJECT_NULL) {
	2567	if (object->purgable == VM_PURGABLE_EMPTY) {
	2568	if (m->pmapped == TRUE) {
	2569	/* unmap the page */
	2570	refmod_state = pmap_disconnect(m->phys_page);
	2571	if (refmod_state & VM_MEM_MODIFIED) {
	2572	SET_PAGE_DIRTY(m, FALSE);
	2573	}
	2574	}
	2575	if (m->dirty \|\| m->precious) {
	2576	/* we saved the cost of cleaning this page ! */
	2577	vm_page_purged_count++;
	2578	}
	2579	goto reclaim_page;
	2580	}
	2581
	2582	if (COMPRESSED_PAGER_IS_ACTIVE) {
	2583	/*
	2584	* With the VM compressor, the cost of
	2585	* reclaiming a page is much lower (no I/O),
	2586	* so if we find a "volatile" page, it's better
	2587	* to let it get compressed rather than letting
	2588	* it occupy a full page until it gets purged.
	2589	* So no need to check for "volatile" here.
	2590	*/
	2591	} else if (object->purgable == VM_PURGABLE_VOLATILE) {
	2592	/*
	2593	* Avoid cleaning a "volatile" page which might
	2594	* be purged soon.
	2595	*/
	2596
	2597	/* if it's wired, we can't put it on our queue */
	2598	assert(!VM_PAGE_WIRED(m));
	2599
	2600	/* just stick it back on! */
	2601	reactivated_this_call++;
	2602
	2603	if (page_prev_state == PAGE_STATE_CLEAN)
	2604	vm_pageout_cleaned_volatile_reactivated++;
	2605
	2606	goto reactivate_page;
	2607	}
	2608	}
	2609
	2610	consider_inactive_page:
	2611	if (m->busy) {
	2612	/*
	2613	* CAUTION CAUTION:
	2614	* A "busy" page should always be left alone, except...
	2615	*/
	2616	if (m->cleaning && m->encrypted_cleaning) {
	2617	/*
	2618	* ENCRYPTED_SWAP:
	2619	* We could get here with a "busy" page
	2620	* if it's being encrypted during a
	2621	* "clean-in-place" operation. We'll deal
	2622	* with it right away by testing if it has been
	2623	* referenced and either reactivating it or
	2624	* promoting it from "clean-in-place" to
	2625	* "clean-and-free".
	2626	*/
	2627	} else {
	2628	panic("\"busy\" page considered for pageout\n");
	2629	}
	2630	}
	2631
	2632	/*
	2633	* If it's being used, reactivate.
	2634	* (Fictitious pages are either busy or absent.)
	2635	* First, update the reference and dirty bits
	2636	* to make sure the page is unreferenced.
	2637	*/
	2638	refmod_state = -1;
	2639
	2640	if (m->reference == FALSE && m->pmapped == TRUE) {
	2641	refmod_state = pmap_get_refmod(m->phys_page);
	2642
	2643	if (refmod_state & VM_MEM_REFERENCED)
	2644	m->reference = TRUE;
	2645	if (refmod_state & VM_MEM_MODIFIED) {
	2646	SET_PAGE_DIRTY(m, FALSE);
	2647	}
	2648	}
	2649
	2650	/*
	2651	* if (m->cleaning && !m->pageout)
	2652	* If already cleaning this page in place and it hasn't
	2653	* been recently referenced, just pull off the queue.
	2654	* We can leave the page mapped, and upl_commit_range
	2655	* will put it on the clean queue.
	2656	*
	2657	* note: if m->encrypted_cleaning == TRUE, then
	2658	* m->cleaning == TRUE
	2659	* and we'll handle it here
	2660	*
	2661	* if (m->pageout && !m->cleaning)
	2662	* an msync INVALIDATE is in progress...
	2663	* this page has been marked for destruction
	2664	* after it has been cleaned,
	2665	* but not yet gathered into a UPL
	2666	* where 'cleaning' will be set...
	2667	* just leave it off the paging queues
	2668	*
	2669	* if (m->pageout && m->clenaing)
	2670	* an msync INVALIDATE is in progress
	2671	* and the UPL has already gathered this page...
	2672	* just leave it off the paging queues
	2673	*/
	2674
	2675	/*
	2676	* page with m->pageout and still on the queues means that an
	2677	* MS_INVALIDATE is in progress on this page... leave it alone
	2678	*/
	2679	if (m->pageout) {
	2680	goto done_with_inactivepage;
	2681	}
	2682
	2683	/* if cleaning, reactivate if referenced. otherwise, just pull off queue */
	2684	if (m->cleaning) {
	2685	if (m->reference == TRUE) {
	2686	reactivated_this_call++;
	2687	goto reactivate_page;
	2688	} else {
	2689	goto done_with_inactivepage;
	2690	}
	2691	}
	2692
	2693	if (m->reference \|\| m->dirty) {
	2694	/* deal with a rogue "reusable" page */
	2695	VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m);
	2696	}
	2697
	2698	if (!m->no_cache &&
	2699	(m->reference \|\|
	2700	(m->xpmapped && !object->internal && (vm_page_xpmapped_external_count < (vm_page_external_count / 4))))) {
	2701	/*
	2702	* The page we pulled off the inactive list has
	2703	* been referenced. It is possible for other
	2704	* processors to be touching pages faster than we
	2705	* can clear the referenced bit and traverse the
	2706	* inactive queue, so we limit the number of
	2707	* reactivations.
	2708	*/
	2709	if (++reactivated_this_call >= reactivate_limit) {
	2710	vm_pageout_reactivation_limit_exceeded++;
	2711	} else if (catch_up_count) {
	2712	vm_pageout_catch_ups++;
	2713	} else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
	2714	vm_pageout_inactive_force_reclaim++;
	2715	} else {
	2716	uint32_t isinuse;
	2717
	2718	if (page_prev_state == PAGE_STATE_CLEAN)
	2719	vm_pageout_cleaned_reference_reactivated++;
	2720
	2721	reactivate_page:
	2722	if ( !object->internal && object->pager != MEMORY_OBJECT_NULL &&
	2723	vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
	2724	/*
	2725	* no explict mappings of this object exist
	2726	* and it's not open via the filesystem
	2727	*/
	2728	vm_page_deactivate(m);
	2729	vm_pageout_inactive_deactivated++;
	2730	} else {
	2731	must_activate_page:
	2732	/*
	2733	* The page was/is being used, so put back on active list.
	2734	*/
	2735	vm_page_activate(m);
	2736	VM_STAT_INCR(reactivations);
	2737	inactive_burst_count = 0;
	2738	}
	2739
	2740	if (page_prev_state == PAGE_STATE_CLEAN)
	2741	vm_pageout_cleaned_reactivated++;
	2742
	2743	vm_pageout_inactive_used++;
	2744
	2745	goto done_with_inactivepage;
	2746	}
	2747	/*
	2748	* Make sure we call pmap_get_refmod() if it
	2749	* wasn't already called just above, to update
	2750	* the dirty bit.
	2751	*/
	2752	if ((refmod_state == -1) && !m->dirty && m->pmapped) {
	2753	refmod_state = pmap_get_refmod(m->phys_page);
	2754	if (refmod_state & VM_MEM_MODIFIED) {
	2755	SET_PAGE_DIRTY(m, FALSE);
	2756	}
	2757	}
	2758	forced_reclaim = TRUE;
	2759	} else {
	2760	forced_reclaim = FALSE;
	2761	}
	2762
	2763	XPR(XPR_VM_PAGEOUT,
	2764	"vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
	2765	object, m->offset, m, 0,0);
	2766
	2767	/*
	2768	* we've got a candidate page to steal...
	2769	*
	2770	* m->dirty is up to date courtesy of the
	2771	* preceding check for m->reference... if
	2772	* we get here, then m->reference had to be
	2773	* FALSE (or possibly "reactivate_limit" was
	2774	* exceeded), but in either case we called
	2775	* pmap_get_refmod() and updated both
	2776	* m->reference and m->dirty
	2777	*
	2778	* if it's dirty or precious we need to
	2779	* see if the target queue is throtttled
	2780	* it if is, we need to skip over it by moving it back
	2781	* to the end of the inactive queue
	2782	*/
	2783
	2784	inactive_throttled = FALSE;
	2785
	2786	if (m->dirty \|\| m->precious) {
	2787	if (object->internal) {
	2788	if (VM_PAGE_Q_THROTTLED(iq))
	2789	inactive_throttled = TRUE;
	2790	} else if (VM_PAGE_Q_THROTTLED(eq)) {
	2791	inactive_throttled = TRUE;
	2792	}
	2793	}
	2794	throttle_inactive:
	2795	if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
	2796	object->internal && m->dirty &&
	2797	(object->purgable == VM_PURGABLE_DENY \|\|
	2798	object->purgable == VM_PURGABLE_NONVOLATILE \|\|
	2799	object->purgable == VM_PURGABLE_VOLATILE)) {
	2800	vm_page_check_pageable_safe(m);
	2801	queue_enter(&vm_page_queue_throttled, m,
	2802	vm_page_t, pageq);
	2803	m->throttled = TRUE;
	2804	vm_page_throttled_count++;
	2805
	2806	vm_pageout_scan_reclaimed_throttled++;
	2807
	2808	inactive_burst_count = 0;
	2809	goto done_with_inactivepage;
	2810	}
	2811	if (inactive_throttled == TRUE) {
	2812
	2813	if (object->internal == FALSE) {
	2814	/*
	2815	* we need to break up the following potential deadlock case...
	2816	* a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
	2817	* b) The thread doing the writing is waiting for pages while holding the truncate lock
	2818	* c) Most of the pages in the inactive queue belong to this file.
	2819	*
	2820	* we are potentially in this deadlock because...
	2821	* a) the external pageout queue is throttled
	2822	* b) we're done with the active queue and moved on to the inactive queue
	2823	* c) we've got a dirty external page
	2824	*
	2825	* since we don't know the reason for the external pageout queue being throttled we
	2826	* must suspect that we are deadlocked, so move the current page onto the active queue
	2827	* in an effort to cause a page from the active queue to 'age' to the inactive queue
	2828	*
	2829	* if we don't have jetsam configured (i.e. we have a dynamic pager), set
	2830	* 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
	2831	* pool the next time we select a victim page... if we can make enough new free pages,
	2832	* the deadlock will break, the external pageout queue will empty and it will no longer
	2833	* be throttled
	2834	*
	2835	* if we have jestam configured, keep a count of the pages reactivated this way so
	2836	* that we can try to find clean pages in the active/inactive queues before
	2837	* deciding to jetsam a process
	2838	*/
	2839	vm_pageout_scan_inactive_throttled_external++;
	2840
	2841	vm_page_check_pageable_safe(m);
	2842	queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
	2843	m->active = TRUE;
	2844	vm_page_active_count++;
	2845	vm_page_pageable_external_count++;
	2846
	2847	vm_pageout_adjust_io_throttles(iq, eq, FALSE);
	2848
	2849	#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
	2850	vm_pageout_inactive_external_forced_reactivate_limit--;
	2851
	2852	if (vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
	2853	vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
	2854	/*
	2855	* Possible deadlock scenario so request jetsam action
	2856	*/
	2857	assert(object);
	2858	vm_object_unlock(object);
	2859	object = VM_OBJECT_NULL;
	2860	vm_page_unlock_queues();
	2861
	2862	VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
	2863	vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
	2864
	2865	/* Kill first suitable process */
	2866	if (memorystatus_kill_on_VM_page_shortage(FALSE) == FALSE) {
	2867	panic("vm_pageout_scan: Jetsam request failed\n");
	2868	}
	2869
	2870	VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END, 0, 0, 0, 0);
	2871
	2872	vm_pageout_inactive_external_forced_jetsam_count++;
	2873	vm_page_lock_queues();
	2874	delayed_unlock = 1;
	2875	}
	2876	#else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
	2877	force_anonymous = TRUE;
	2878	#endif
	2879	inactive_burst_count = 0;
	2880	goto done_with_inactivepage;
	2881	} else {
	2882	if (page_prev_state == PAGE_STATE_SPECULATIVE)
	2883	page_prev_state = PAGE_STATE_INACTIVE;
	2884
	2885	vm_pageout_scan_inactive_throttled_internal++;
	2886
	2887	goto must_activate_page;
	2888	}
	2889	}
	2890
	2891	/*
	2892	* we've got a page that we can steal...
	2893	* eliminate all mappings and make sure
	2894	* we have the up-to-date modified state
	2895	*
	2896	* if we need to do a pmap_disconnect then we
	2897	* need to re-evaluate m->dirty since the pmap_disconnect
	2898	* provides the true state atomically... the
	2899	* page was still mapped up to the pmap_disconnect
	2900	* and may have been dirtied at the last microsecond
	2901	*
	2902	* Note that if 'pmapped' is FALSE then the page is not
	2903	* and has not been in any map, so there is no point calling
	2904	* pmap_disconnect(). m->dirty could have been set in anticipation
	2905	* of likely usage of the page.
	2906	*/
	2907	if (m->pmapped == TRUE) {
	2908	int pmap_options;
	2909
	2910	/*
	2911	* Don't count this page as going into the compressor
	2912	* if any of these are true:
	2913	* 1) We have the dynamic pager i.e. no compressed pager
	2914	* 2) Freezer enabled device with a freezer file to
	2915	* hold the app data i.e. no compressed pager
	2916	* 3) Freezer enabled device with compressed pager
	2917	* backend (exclusive use) i.e. most of the VM system
	2918	* (including vm_pageout_scan) has no knowledge of
	2919	* the compressor
	2920	* 4) This page belongs to a file and hence will not be
	2921	* sent into the compressor
	2922	*/
	2923	if (DEFAULT_PAGER_IS_ACTIVE \|\|
	2924	DEFAULT_FREEZER_IS_ACTIVE \|\|
	2925	DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS \|\|
	2926	object->internal == FALSE) {
	2927	pmap_options = 0;
	2928	} else if (m->dirty \|\| m->precious) {
	2929	/*
	2930	* VM knows that this page is dirty (or
	2931	* precious) and needs to be compressed
	2932	* rather than freed.
	2933	* Tell the pmap layer to count this page
	2934	* as "compressed".
	2935	*/
	2936	pmap_options = PMAP_OPTIONS_COMPRESSOR;
	2937	} else {
	2938	/*
	2939	* VM does not know if the page needs to
	2940	* be preserved but the pmap layer might tell
	2941	* us if any mapping has "modified" it.
	2942	* Let's the pmap layer to count this page
	2943	* as compressed if and only if it has been
	2944	* modified.
	2945	*/
	2946	pmap_options =
	2947	PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
	2948	}
	2949	refmod_state = pmap_disconnect_options(m->phys_page,
	2950	pmap_options,
	2951	NULL);
	2952	if (refmod_state & VM_MEM_MODIFIED) {
	2953	SET_PAGE_DIRTY(m, FALSE);
	2954	}
	2955	}
	2956	/*
	2957	* reset our count of pages that have been reclaimed
	2958	* since the last page was 'stolen'
	2959	*/
	2960	inactive_reclaim_run = 0;
	2961
	2962	/*
	2963	* If it's clean and not precious, we can free the page.
	2964	*/
	2965	if (!m->dirty && !m->precious) {
	2966
	2967	if (page_prev_state == PAGE_STATE_SPECULATIVE)
	2968	vm_pageout_speculative_clean++;
	2969	else {
	2970	if (page_prev_state == PAGE_STATE_ANONYMOUS)
	2971	vm_pageout_inactive_anonymous++;
	2972	else if (page_prev_state == PAGE_STATE_CLEAN)
	2973	vm_pageout_cleaned_reclaimed++;
	2974
	2975	vm_pageout_inactive_clean++;
	2976	}
	2977
	2978	/*
	2979	* OK, at this point we have found a page we are going to free.
	2980	*/
	2981	#if CONFIG_PHANTOM_CACHE
	2982	if (!object->internal)
	2983	vm_phantom_cache_add_ghost(m);
	2984	#endif
	2985	goto reclaim_page;
	2986	}
	2987
	2988	/*
	2989	* The page may have been dirtied since the last check
	2990	* for a throttled target queue (which may have been skipped
	2991	* if the page was clean then). With the dirty page
	2992	* disconnected here, we can make one final check.
	2993	*/
	2994	if (object->internal) {
	2995	if (VM_PAGE_Q_THROTTLED(iq))
	2996	inactive_throttled = TRUE;
	2997	} else if (VM_PAGE_Q_THROTTLED(eq)) {
	2998	inactive_throttled = TRUE;
	2999	}
	3000
	3001	if (inactive_throttled == TRUE)
	3002	goto throttle_inactive;
	3003
	3004	#if VM_PRESSURE_EVENTS
	3005	#if CONFIG_JETSAM
	3006
	3007	/*
	3008	* If Jetsam is enabled, then the sending
	3009	* of memory pressure notifications is handled
	3010	* from the same thread that takes care of high-water
	3011	* and other jetsams i.e. the memorystatus_thread.
	3012	*/
	3013
	3014	#else /* CONFIG_JETSAM */
	3015
	3016	vm_pressure_response();
	3017
	3018	#endif /* CONFIG_JETSAM */
	3019	#endif /* VM_PRESSURE_EVENTS */
	3020
	3021	if (page_prev_state == PAGE_STATE_ANONYMOUS)
	3022	vm_pageout_inactive_anonymous++;
	3023	if (object->internal)
	3024	vm_pageout_inactive_dirty_internal++;
	3025	else
	3026	vm_pageout_inactive_dirty_external++;
	3027
	3028	/*
	3029	* do NOT set the pageout bit!
	3030	* sure, we might need free pages, but this page is going to take time to become free
	3031	* anyway, so we may as well put it on the clean queue first and take it from there later
	3032	* if necessary. that way, we'll ensure we don't free up too much. -mj
	3033	*/
	3034	vm_pageout_cluster(m, FALSE, FALSE, FALSE);
	3035
	3036	done_with_inactivepage:
	3037
	3038	if (delayed_unlock++ > delayed_unlock_limit \|\| try_failed == TRUE) {
	3039	boolean_t need_delay = TRUE;
	3040
	3041	if (object != NULL) {
	3042	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
	3043	vm_object_unlock(object);
	3044	object = NULL;
	3045	}
	3046	vm_page_unlock_queues();
	3047
	3048	if (local_freeq) {
	3049
	3050	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
	3051	vm_page_free_count, local_freed, delayed_unlock_limit, 4);
	3052
	3053	vm_page_free_list(local_freeq, TRUE);
	3054
	3055	VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
	3056	vm_page_free_count, local_freed, 0, 4);
	3057
	3058	local_freeq = NULL;
	3059	local_freed = 0;
	3060	need_delay = FALSE;
	3061	}
	3062	vm_consider_waking_compactor_swapper();
	3063
	3064	vm_page_lock_queues();
	3065
	3066	if (need_delay == TRUE)
	3067	lck_mtx_yield(&vm_page_queue_lock);
	3068
	3069	delayed_unlock = 1;
	3070	}
	3071	vm_pageout_considered_page++;
	3072
	3073	/*
	3074	* back to top of pageout scan loop
	3075	*/
	3076	}
	3077	}
	3078
	3079
	3080	int vm_page_free_count_init;
	3081
	3082	void
	3083	vm_page_free_reserve(
	3084	int pages)
	3085	{
	3086	int free_after_reserve;
	3087
	3088	if (COMPRESSED_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
	3089
	3090	if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT))
	3091	vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT;
	3092	else
	3093	vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT);
	3094
	3095	} else {
	3096	if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT)
	3097	vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT;
	3098	else
	3099	vm_page_free_reserved += pages;
	3100	}
	3101	free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
	3102
	3103	vm_page_free_min = vm_page_free_reserved +
	3104	VM_PAGE_FREE_MIN(free_after_reserve);
	3105
	3106	if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
	3107	vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
	3108
	3109	vm_page_free_target = vm_page_free_reserved +
	3110	VM_PAGE_FREE_TARGET(free_after_reserve);
	3111
	3112	if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
	3113	vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
	3114
	3115	if (vm_page_free_target < vm_page_free_min + 5)
	3116	vm_page_free_target = vm_page_free_min + 5;
	3117
	3118	vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 2);
	3119	}
	3120
	3121	/*
	3122	* vm_pageout is the high level pageout daemon.
	3123	*/
	3124
	3125	void
	3126	vm_pageout_continue(void)
	3127	{
	3128	DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
	3129	vm_pageout_scan_event_counter++;
	3130
	3131	lck_mtx_lock(&vm_page_queue_free_lock);
	3132	vm_pageout_running = TRUE;
	3133	lck_mtx_unlock(&vm_page_queue_free_lock);
	3134
	3135	vm_pageout_scan();
	3136	/*
	3137	* we hold both the vm_page_queue_free_lock
	3138	* and the vm_page_queues_lock at this point
	3139	*/
	3140	assert(vm_page_free_wanted == 0);
	3141	assert(vm_page_free_wanted_privileged == 0);
	3142	assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
	3143
	3144	vm_pageout_running = FALSE;
	3145	if (vm_pageout_waiter) {
	3146	vm_pageout_waiter = FALSE;
	3147	thread_wakeup((event_t)&vm_pageout_waiter);
	3148	}
	3149
	3150	lck_mtx_unlock(&vm_page_queue_free_lock);
	3151	vm_page_unlock_queues();
	3152
	3153	counter(c_vm_pageout_block++);
	3154	thread_block((thread_continue_t)vm_pageout_continue);
	3155	/NOTREACHED/
	3156	}
	3157
	3158	kern_return_t
	3159	vm_pageout_wait(uint64_t deadline)
	3160	{
	3161	kern_return_t kr;
	3162
	3163	lck_mtx_lock(&vm_page_queue_free_lock);
	3164	for (kr = KERN_SUCCESS; vm_pageout_running && (KERN_SUCCESS == kr); ) {
	3165	vm_pageout_waiter = TRUE;
	3166	if (THREAD_AWAKENED != lck_mtx_sleep_deadline(
	3167	&vm_page_queue_free_lock, LCK_SLEEP_DEFAULT,
	3168	(event_t) &vm_pageout_waiter, THREAD_UNINT, deadline)) {
	3169	kr = KERN_OPERATION_TIMED_OUT;
	3170	}
	3171	}
	3172	lck_mtx_unlock(&vm_page_queue_free_lock);
	3173
	3174	return (kr);
	3175	}
	3176
	3177
	3178	#ifdef FAKE_DEADLOCK
	3179
	3180	#define FAKE_COUNT 5000
	3181
	3182	int internal_count = 0;
	3183	int fake_deadlock = 0;
	3184
	3185	#endif
	3186
	3187	static void
	3188	vm_pageout_iothread_continue(struct vm_pageout_queue *q)
	3189	{
	3190	vm_page_t m = NULL;
	3191	vm_object_t object;
	3192	vm_object_offset_t offset;
	3193	memory_object_t pager;
	3194	thread_t self = current_thread();
	3195
	3196	if ((vm_pageout_internal_iothread != THREAD_NULL)
	3197	&& (self == vm_pageout_external_iothread )
	3198	&& (self->options & TH_OPT_VMPRIV))
	3199	self->options &= ~TH_OPT_VMPRIV;
	3200
	3201	vm_page_lockspin_queues();
	3202
	3203	while ( !queue_empty(&q->pgo_pending) ) {
	3204
	3205	q->pgo_busy = TRUE;
	3206	queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
	3207	if (m->object->object_slid) {
	3208	panic("slid page %p not allowed on this path\n", m);
	3209	}
	3210	VM_PAGE_CHECK(m);
	3211	m->pageout_queue = FALSE;
	3212	m->pageq.next = NULL;
	3213	m->pageq.prev = NULL;
	3214
	3215	/*
	3216	* grab a snapshot of the object and offset this
	3217	* page is tabled in so that we can relookup this
	3218	* page after we've taken the object lock - these
	3219	* fields are stable while we hold the page queues lock
	3220	* but as soon as we drop it, there is nothing to keep
	3221	* this page in this object... we hold an activity_in_progress
	3222	* on this object which will keep it from terminating
	3223	*/
	3224	object = m->object;
	3225	offset = m->offset;
	3226
	3227	vm_page_unlock_queues();
	3228
	3229	#ifdef FAKE_DEADLOCK
	3230	if (q == &vm_pageout_queue_internal) {
	3231	vm_offset_t addr;
	3232	int pg_count;
	3233
	3234	internal_count++;
	3235
	3236	if ((internal_count == FAKE_COUNT)) {
	3237
	3238	pg_count = vm_page_free_count + vm_page_free_reserved;
	3239
	3240	if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) {
	3241	kmem_free(kernel_map, addr, PAGE_SIZE * pg_count);
	3242	}
	3243	internal_count = 0;
	3244	fake_deadlock++;
	3245	}
	3246	}
	3247	#endif
	3248	vm_object_lock(object);
	3249
	3250	m = vm_page_lookup(object, offset);
	3251
	3252	if (m == NULL \|\|
	3253	m->busy \|\| m->cleaning \|\| m->pageout_queue \|\| !m->laundry) {
	3254	/*
	3255	* it's either the same page that someone else has
	3256	* started cleaning (or it's finished cleaning or
	3257	* been put back on the pageout queue), or
	3258	* the page has been freed or we have found a
	3259	* new page at this offset... in all of these cases
	3260	* we merely need to release the activity_in_progress
	3261	* we took when we put the page on the pageout queue
	3262	*/
	3263	vm_object_activity_end(object);
	3264	vm_object_unlock(object);
	3265
	3266	vm_page_lockspin_queues();
	3267	continue;
	3268	}
	3269	if (!object->pager_initialized) {
	3270
	3271	/*
	3272	* If there is no memory object for the page, create
	3273	* one and hand it to the default pager.
	3274	*/
	3275
	3276	if (!object->pager_initialized)
	3277	vm_object_collapse(object,
	3278	(vm_object_offset_t) 0,
	3279	TRUE);
	3280	if (!object->pager_initialized)
	3281	vm_object_pager_create(object);
	3282	if (!object->pager_initialized) {
	3283	/*
	3284	* Still no pager for the object.
	3285	* Reactivate the page.
	3286	*
	3287	* Should only happen if there is no
	3288	* default pager.
	3289	*/
	3290	m->pageout = FALSE;
	3291
	3292	vm_page_lockspin_queues();
	3293
	3294	vm_pageout_throttle_up(m);
	3295	vm_page_activate(m);
	3296	vm_pageout_dirty_no_pager++;
	3297
	3298	vm_page_unlock_queues();
	3299
	3300	/*
	3301	* And we are done with it.
	3302	*/
	3303	vm_object_activity_end(object);
	3304	vm_object_unlock(object);
	3305
	3306	vm_page_lockspin_queues();
	3307	continue;
	3308	}
	3309	}
	3310	pager = object->pager;
	3311
	3312	if (pager == MEMORY_OBJECT_NULL) {
	3313	/*
	3314	* This pager has been destroyed by either
	3315	* memory_object_destroy or vm_object_destroy, and
	3316	* so there is nowhere for the page to go.
	3317	*/
	3318	if (m->pageout) {
	3319	/*
	3320	* Just free the page... VM_PAGE_FREE takes
	3321	* care of cleaning up all the state...
	3322	* including doing the vm_pageout_throttle_up
	3323	*/
	3324	VM_PAGE_FREE(m);
	3325	} else {
	3326	vm_page_lockspin_queues();
	3327
	3328	vm_pageout_throttle_up(m);
	3329	vm_page_activate(m);
	3330
	3331	vm_page_unlock_queues();
	3332
	3333	/*
	3334	* And we are done with it.
	3335	*/
	3336	}
	3337	vm_object_activity_end(object);
	3338	vm_object_unlock(object);
	3339
	3340	vm_page_lockspin_queues();
	3341	continue;
	3342	}
	3343	#if 0
	3344	/*
	3345	* we don't hold the page queue lock
	3346	* so this check isn't safe to make
	3347	*/
	3348	VM_PAGE_CHECK(m);
	3349	#endif
	3350	/*
	3351	* give back the activity_in_progress reference we
	3352	* took when we queued up this page and replace it
	3353	* it with a paging_in_progress reference that will
	3354	* also hold the paging offset from changing and
	3355	* prevent the object from terminating
	3356	*/
	3357	vm_object_activity_end(object);
	3358	vm_object_paging_begin(object);
	3359	vm_object_unlock(object);
	3360
	3361	/*
	3362	* Send the data to the pager.
	3363	* any pageout clustering happens there
	3364	*/
	3365	memory_object_data_return(pager,
	3366	m->offset + object->paging_offset,
	3367	PAGE_SIZE,
	3368	NULL,
	3369	NULL,
	3370	FALSE,
	3371	FALSE,
	3372	0);
	3373
	3374	vm_object_lock(object);
	3375	vm_object_paging_end(object);
	3376	vm_object_unlock(object);
	3377
	3378	vm_pageout_io_throttle();
	3379
	3380	vm_page_lockspin_queues();
	3381	}
	3382	q->pgo_busy = FALSE;
	3383	q->pgo_idle = TRUE;
	3384
	3385	assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
	3386	vm_page_unlock_queues();
	3387
	3388	thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) q);
	3389	/NOTREACHED/
	3390	}
	3391
	3392
	3393	static void
	3394	vm_pageout_iothread_external_continue(struct vm_pageout_queue *q)
	3395	{
	3396	vm_page_t m = NULL;
	3397	vm_object_t object;
	3398	vm_object_offset_t offset;
	3399	memory_object_t pager;
	3400
	3401
	3402	if (vm_pageout_internal_iothread != THREAD_NULL)
	3403	current_thread()->options &= ~TH_OPT_VMPRIV;
	3404
	3405	vm_page_lockspin_queues();
	3406
	3407	while ( !queue_empty(&q->pgo_pending) ) {
	3408
	3409	q->pgo_busy = TRUE;
	3410	queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
	3411	if (m->object->object_slid) {
	3412	panic("slid page %p not allowed on this path\n", m);
	3413	}
	3414	VM_PAGE_CHECK(m);
	3415	m->pageout_queue = FALSE;
	3416	m->pageq.next = NULL;
	3417	m->pageq.prev = NULL;
	3418
	3419	/*
	3420	* grab a snapshot of the object and offset this
	3421	* page is tabled in so that we can relookup this
	3422	* page after we've taken the object lock - these
	3423	* fields are stable while we hold the page queues lock
	3424	* but as soon as we drop it, there is nothing to keep
	3425	* this page in this object... we hold an activity_in_progress
	3426	* on this object which will keep it from terminating
	3427	*/
	3428	object = m->object;
	3429	offset = m->offset;
	3430
	3431	vm_page_unlock_queues();
	3432
	3433	vm_object_lock(object);
	3434
	3435	m = vm_page_lookup(object, offset);
	3436
	3437	if (m == NULL \|\|
	3438	m->busy \|\| m->cleaning \|\| m->pageout_queue \|\| !m->laundry) {
	3439	/*
	3440	* it's either the same page that someone else has
	3441	* started cleaning (or it's finished cleaning or
	3442	* been put back on the pageout queue), or
	3443	* the page has been freed or we have found a
	3444	* new page at this offset... in all of these cases
	3445	* we merely need to release the activity_in_progress
	3446	* we took when we put the page on the pageout queue
	3447	*/
	3448	vm_object_activity_end(object);
	3449	vm_object_unlock(object);
	3450
	3451	vm_page_lockspin_queues();
	3452	continue;
	3453	}
	3454	pager = object->pager;
	3455
	3456	if (pager == MEMORY_OBJECT_NULL) {
	3457	/*
	3458	* This pager has been destroyed by either
	3459	* memory_object_destroy or vm_object_destroy, and
	3460	* so there is nowhere for the page to go.
	3461	*/
	3462	if (m->pageout) {
	3463	/*
	3464	* Just free the page... VM_PAGE_FREE takes
	3465	* care of cleaning up all the state...
	3466	* including doing the vm_pageout_throttle_up
	3467	*/
	3468	VM_PAGE_FREE(m);
	3469	} else {
	3470	vm_page_lockspin_queues();
	3471
	3472	vm_pageout_throttle_up(m);
	3473	vm_page_activate(m);
	3474
	3475	vm_page_unlock_queues();
	3476
	3477	/*
	3478	* And we are done with it.
	3479	*/
	3480	}
	3481	vm_object_activity_end(object);
	3482	vm_object_unlock(object);
	3483
	3484	vm_page_lockspin_queues();
	3485	continue;
	3486	}
	3487	#if 0
	3488	/*
	3489	* we don't hold the page queue lock
	3490	* so this check isn't safe to make
	3491	*/
	3492	VM_PAGE_CHECK(m);
	3493	#endif
	3494	/*
	3495	* give back the activity_in_progress reference we
	3496	* took when we queued up this page and replace it
	3497	* it with a paging_in_progress reference that will
	3498	* also hold the paging offset from changing and
	3499	* prevent the object from terminating
	3500	*/
	3501	vm_object_activity_end(object);
	3502	vm_object_paging_begin(object);
	3503	vm_object_unlock(object);
	3504
	3505	/*
	3506	* Send the data to the pager.
	3507	* any pageout clustering happens there
	3508	*/
	3509	memory_object_data_return(pager,
	3510	m->offset + object->paging_offset,
	3511	PAGE_SIZE,
	3512	NULL,
	3513	NULL,
	3514	FALSE,
	3515	FALSE,
	3516	0);
	3517
	3518	vm_object_lock(object);
	3519	vm_object_paging_end(object);
	3520	vm_object_unlock(object);
	3521
	3522	vm_pageout_io_throttle();
	3523
	3524	vm_page_lockspin_queues();
	3525	}
	3526	q->pgo_busy = FALSE;
	3527	q->pgo_idle = TRUE;
	3528
	3529	assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
	3530	vm_page_unlock_queues();
	3531
	3532	thread_block_parameter((thread_continue_t)vm_pageout_iothread_external_continue, (void *) q);
	3533	/NOTREACHED/
	3534	}
	3535
	3536
	3537	uint32_t vm_compressor_failed;
	3538
	3539	#define MAX_FREE_BATCH 32
	3540
	3541	static void
	3542	vm_pageout_iothread_internal_continue(struct cq *cq)
	3543	{
	3544	struct vm_pageout_queue *q;
	3545	vm_page_t m = NULL;
	3546	boolean_t pgo_draining;
	3547	vm_page_t local_q;
	3548	int local_cnt;
	3549	vm_page_t local_freeq = NULL;
	3550	int local_freed = 0;
	3551	int local_batch_size;
	3552
	3553
	3554	KERNEL_DEBUG(0xe040000c \| DBG_FUNC_END, 0, 0, 0, 0, 0);
	3555
	3556	q = cq->q;
	3557	local_batch_size = q->pgo_maxlaundry / (vm_compressor_thread_count * 2);
	3558
	3559	#if RECORD_THE_COMPRESSED_DATA
	3560	if (q->pgo_laundry)
	3561	c_compressed_record_init();
	3562	#endif
	3563	while (TRUE) {
	3564	int pages_left_on_q = 0;
	3565
	3566	local_cnt = 0;
	3567	local_q = NULL;
	3568
	3569	KERNEL_DEBUG(0xe0400014 \| DBG_FUNC_START, 0, 0, 0, 0, 0);
	3570
	3571	vm_page_lock_queues();
	3572
	3573	KERNEL_DEBUG(0xe0400014 \| DBG_FUNC_END, 0, 0, 0, 0, 0);
	3574
	3575	KERNEL_DEBUG(0xe0400018 \| DBG_FUNC_START, q->pgo_laundry, 0, 0, 0, 0);
	3576
	3577	while ( !queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) {
	3578
	3579	queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
	3580
	3581	VM_PAGE_CHECK(m);
	3582
	3583	m->pageout_queue = FALSE;
	3584	m->pageq.prev = NULL;
	3585
	3586	m->pageq.next = (queue_entry_t)local_q;
	3587	local_q = m;
	3588	local_cnt++;
	3589	}
	3590	if (local_q == NULL)
	3591	break;
	3592
	3593	q->pgo_busy = TRUE;
	3594
	3595	if ((pgo_draining = q->pgo_draining) == FALSE) {
	3596	vm_pageout_throttle_up_batch(q, local_cnt);
	3597	pages_left_on_q = q->pgo_laundry;
	3598	} else
	3599	pages_left_on_q = q->pgo_laundry - local_cnt;
	3600
	3601	vm_page_unlock_queues();
	3602
	3603	#if !RECORD_THE_COMPRESSED_DATA
	3604	if (pages_left_on_q >= local_batch_size && cq->id < (vm_compressor_thread_count - 1))
	3605	thread_wakeup((event_t) ((uintptr_t)&q->pgo_pending + cq->id + 1));
	3606	#endif
	3607	KERNEL_DEBUG(0xe0400018 \| DBG_FUNC_END, q->pgo_laundry, 0, 0, 0, 0);
	3608
	3609	while (local_q) {
	3610
	3611	KERNEL_DEBUG(0xe0400024 \| DBG_FUNC_START, local_cnt, 0, 0, 0, 0);
	3612
	3613	m = local_q;
	3614	local_q = (vm_page_t)m->pageq.next;
	3615	m->pageq.next = NULL;
	3616
	3617	if (vm_pageout_compress_page(&cq->current_chead, cq->scratch_buf, m, FALSE) == KERN_SUCCESS) {
	3618
	3619	m->pageq.next = (queue_entry_t)local_freeq;
	3620	local_freeq = m;
	3621	local_freed++;
	3622
	3623	if (local_freed >= MAX_FREE_BATCH) {
	3624
	3625	vm_page_free_list(local_freeq, TRUE);
	3626	local_freeq = NULL;
	3627	local_freed = 0;
	3628	}
	3629	}
	3630	#if !CONFIG_JETSAM
	3631	while (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
	3632	kern_return_t wait_result;
	3633	int need_wakeup = 0;
	3634
	3635	if (local_freeq) {
	3636	vm_page_free_list(local_freeq, TRUE);
	3637
	3638	local_freeq = NULL;
	3639	local_freed = 0;
	3640
	3641	continue;
	3642	}
	3643	lck_mtx_lock_spin(&vm_page_queue_free_lock);
	3644
	3645	if (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
	3646
	3647	if (vm_page_free_wanted_privileged++ == 0)
	3648	need_wakeup = 1;
	3649	wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT);
	3650
	3651	lck_mtx_unlock(&vm_page_queue_free_lock);
	3652
	3653	if (need_wakeup)
	3654	thread_wakeup((event_t)&vm_page_free_wanted);
	3655
	3656	if (wait_result == THREAD_WAITING)
	3657
	3658	thread_block(THREAD_CONTINUE_NULL);
	3659	} else
	3660	lck_mtx_unlock(&vm_page_queue_free_lock);
	3661	}
	3662	#endif
	3663	}
	3664	if (local_freeq) {
	3665	vm_page_free_list(local_freeq, TRUE);
	3666
	3667	local_freeq = NULL;
	3668	local_freed = 0;
	3669	}
	3670	if (pgo_draining == TRUE) {
	3671	vm_page_lockspin_queues();
	3672	vm_pageout_throttle_up_batch(q, local_cnt);
	3673	vm_page_unlock_queues();
	3674	}
	3675	}
	3676	KERNEL_DEBUG(0xe040000c \| DBG_FUNC_START, 0, 0, 0, 0, 0);
	3677
	3678	/*
	3679	* queue lock is held and our q is empty
	3680	*/
	3681	q->pgo_busy = FALSE;
	3682	q->pgo_idle = TRUE;
	3683
	3684	assert_wait((event_t) ((uintptr_t)&q->pgo_pending + cq->id), THREAD_UNINT);
	3685	vm_page_unlock_queues();
	3686
	3687	KERNEL_DEBUG(0xe0400018 \| DBG_FUNC_END, 0, 0, 0, 0, 0);
	3688
	3689	thread_block_parameter((thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq);
	3690	/NOTREACHED/
	3691	}
	3692
	3693
	3694
	3695	static void
	3696	vm_pageout_immediate(vm_page_t m, boolean_t object_locked_by_caller)
	3697	{
	3698	assert(vm_pageout_immediate_scratch_buf);
	3699
	3700	if (vm_pageout_compress_page(&vm_pageout_immediate_chead, vm_pageout_immediate_scratch_buf, m, object_locked_by_caller) == KERN_SUCCESS) {
	3701
	3702	vm_page_free_prepare_object(m, TRUE);
	3703	vm_page_release(m);
	3704	}
	3705	}
	3706
	3707
	3708	kern_return_t
	3709	vm_pageout_compress_page(void *current_chead, char scratch_buf, vm_page_t m, boolean_t object_locked_by_caller)
	3710	{
	3711	vm_object_t object;
	3712	memory_object_t pager;
	3713	int compressed_count_delta;
	3714	kern_return_t retval;
	3715
	3716	if (m->object->object_slid) {
	3717	panic("slid page %p not allowed on this path\n", m);
	3718	}
	3719
	3720	object = m->object;
	3721	pager = object->pager;
	3722
	3723	if (object_locked_by_caller == FALSE && (!object->pager_initialized \|\| pager == MEMORY_OBJECT_NULL)) {
	3724
	3725	KERNEL_DEBUG(0xe0400010 \| DBG_FUNC_START, object, pager, 0, 0, 0);
	3726
	3727	vm_object_lock(object);
	3728
	3729	/*
	3730	* If there is no memory object for the page, create
	3731	* one and hand it to the compression pager.
	3732	*/
	3733
	3734	if (!object->pager_initialized)
	3735	vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
	3736	if (!object->pager_initialized)
	3737	vm_object_compressor_pager_create(object);
	3738
	3739	if (!object->pager_initialized) {
	3740	/*
	3741	* Still no pager for the object.
	3742	* Reactivate the page.
	3743	*
	3744	* Should only happen if there is no
	3745	* compression pager
	3746	*/
	3747	m->pageout = FALSE;
	3748	m->laundry = FALSE;
	3749	PAGE_WAKEUP_DONE(m);
	3750
	3751	vm_page_lockspin_queues();
	3752	vm_page_activate(m);
	3753	vm_pageout_dirty_no_pager++;
	3754	vm_page_unlock_queues();
	3755
	3756	/*
	3757	* And we are done with it.
	3758	*/
	3759	vm_object_activity_end(object);
	3760	vm_object_unlock(object);
	3761
	3762	return KERN_FAILURE;
	3763	}
	3764	pager = object->pager;
	3765
	3766	if (pager == MEMORY_OBJECT_NULL) {
	3767	/*
	3768	* This pager has been destroyed by either
	3769	* memory_object_destroy or vm_object_destroy, and
	3770	* so there is nowhere for the page to go.
	3771	*/
	3772	if (m->pageout) {
	3773	/*
	3774	* Just free the page... VM_PAGE_FREE takes
	3775	* care of cleaning up all the state...
	3776	* including doing the vm_pageout_throttle_up
	3777	*/
	3778	VM_PAGE_FREE(m);
	3779	} else {
	3780	m->laundry = FALSE;
	3781	PAGE_WAKEUP_DONE(m);
	3782
	3783	vm_page_lockspin_queues();
	3784	vm_page_activate(m);
	3785	vm_page_unlock_queues();
	3786
	3787	/*
	3788	* And we are done with it.
	3789	*/
	3790	}
	3791	vm_object_activity_end(object);
	3792	vm_object_unlock(object);
	3793
	3794	return KERN_FAILURE;
	3795	}
	3796	vm_object_unlock(object);
	3797
	3798	KERNEL_DEBUG(0xe0400010 \| DBG_FUNC_END, object, pager, 0, 0, 0);
	3799	}
	3800	assert(object->pager_initialized && pager != MEMORY_OBJECT_NULL);
	3801
	3802	if (object_locked_by_caller == FALSE)
	3803	assert(object->activity_in_progress > 0);
	3804
	3805	retval = vm_compressor_pager_put(
	3806	pager,
	3807	m->offset + object->paging_offset,
	3808	m->phys_page,
	3809	current_chead,
	3810	scratch_buf,
	3811	&compressed_count_delta);
	3812
	3813	if (object_locked_by_caller == FALSE) {
	3814	vm_object_lock(object);
	3815
	3816	assert(object->activity_in_progress > 0);
	3817	assert(m->object == object);
	3818	}
	3819
	3820	vm_compressor_pager_count(pager,
	3821	compressed_count_delta,
	3822	FALSE, /* shared_lock */
	3823	object);
	3824
	3825	m->laundry = FALSE;
	3826	m->pageout = FALSE;
	3827
	3828	if (retval == KERN_SUCCESS) {
	3829	/*
	3830	* If the object is purgeable, its owner's
	3831	* purgeable ledgers will be updated in
	3832	* vm_page_remove() but the page still
	3833	* contributes to the owner's memory footprint,
	3834	* so account for it as such.
	3835	*/
	3836	if (object->purgable != VM_PURGABLE_DENY &&
	3837	object->vo_purgeable_owner != NULL) {
	3838	/* one more compressed purgeable page */
	3839	vm_purgeable_compressed_update(object,
	3840	+1);
	3841	}
	3842	VM_STAT_INCR(compressions);
	3843
	3844	if (m->tabled)
	3845	vm_page_remove(m, TRUE);
	3846
	3847	} else {
	3848	PAGE_WAKEUP_DONE(m);
	3849
	3850	vm_page_lockspin_queues();
	3851
	3852	vm_page_activate(m);
	3853	vm_compressor_failed++;
	3854
	3855	vm_page_unlock_queues();
	3856	}
	3857	if (object_locked_by_caller == FALSE) {
	3858	vm_object_activity_end(object);
	3859	vm_object_unlock(object);
	3860	}
	3861	return retval;
	3862	}
	3863
	3864
	3865	static void
	3866	vm_pageout_adjust_io_throttles(struct vm_pageout_queue iq, struct vm_pageout_queue eq, boolean_t req_lowpriority)
	3867	{
	3868	uint32_t policy;
	3869	boolean_t set_iq = FALSE;
	3870	boolean_t set_eq = FALSE;
	3871
	3872	if (hibernate_cleaning_in_progress == TRUE)
	3873	req_lowpriority = FALSE;
	3874
	3875	if ((DEFAULT_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_IS_ACTIVE) && iq->pgo_inited == TRUE && iq->pgo_lowpriority != req_lowpriority)
	3876	set_iq = TRUE;
	3877
	3878	if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority)
	3879	set_eq = TRUE;
	3880
	3881	if (set_iq == TRUE \|\| set_eq == TRUE) {
	3882
	3883	vm_page_unlock_queues();
	3884
	3885	if (req_lowpriority == TRUE) {
	3886	policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED;
	3887	DTRACE_VM(laundrythrottle);
	3888	} else {
	3889	policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED;
	3890	DTRACE_VM(laundryunthrottle);
	3891	}
	3892	if (set_iq == TRUE) {
	3893	proc_set_task_policy_thread(kernel_task, iq->pgo_tid, TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
	3894
	3895	iq->pgo_lowpriority = req_lowpriority;
	3896	}
	3897	if (set_eq == TRUE) {
	3898	proc_set_task_policy_thread(kernel_task, eq->pgo_tid, TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
	3899
	3900	eq->pgo_lowpriority = req_lowpriority;
	3901	}
	3902	vm_page_lock_queues();
	3903	}
	3904	}
	3905
	3906
	3907	static void
	3908	vm_pageout_iothread_external(void)
	3909	{
	3910	thread_t self = current_thread();
	3911
	3912	self->options \|= TH_OPT_VMPRIV;
	3913
	3914	DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
	3915
	3916	proc_set_task_policy_thread(kernel_task, self->thread_id, TASK_POLICY_EXTERNAL,
	3917	TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
	3918
	3919	vm_page_lock_queues();
	3920
	3921	vm_pageout_queue_external.pgo_tid = self->thread_id;
	3922	vm_pageout_queue_external.pgo_lowpriority = TRUE;
	3923	vm_pageout_queue_external.pgo_inited = TRUE;
	3924
	3925	vm_page_unlock_queues();
	3926
	3927	if (COMPRESSED_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
	3928	vm_pageout_iothread_external_continue(&vm_pageout_queue_external);
	3929	else
	3930	vm_pageout_iothread_continue(&vm_pageout_queue_external);
	3931
	3932	/NOTREACHED/
	3933	}
	3934
	3935
	3936	static void
	3937	vm_pageout_iothread_internal(struct cq *cq)
	3938	{
	3939	thread_t self = current_thread();
	3940
	3941	self->options \|= TH_OPT_VMPRIV;
	3942
	3943	if (DEFAULT_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_IS_ACTIVE) {
	3944	DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
	3945
	3946	proc_set_task_policy_thread(kernel_task, self->thread_id, TASK_POLICY_EXTERNAL,
	3947	TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
	3948	}
	3949	vm_page_lock_queues();
	3950
	3951	vm_pageout_queue_internal.pgo_tid = self->thread_id;
	3952	vm_pageout_queue_internal.pgo_lowpriority = TRUE;
	3953	vm_pageout_queue_internal.pgo_inited = TRUE;
	3954
	3955	vm_page_unlock_queues();
	3956
	3957	if (COMPRESSED_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
	3958
	3959	if (vm_restricted_to_single_processor == TRUE)
	3960	thread_vm_bind_group_add();
	3961
	3962	vm_pageout_iothread_internal_continue(cq);
	3963	} else
	3964	vm_pageout_iothread_continue(&vm_pageout_queue_internal);
	3965
	3966	/NOTREACHED/
	3967	}
	3968
	3969	kern_return_t
	3970	vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
	3971	{
	3972	if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) {
	3973	return KERN_SUCCESS;
	3974	} else {
	3975	return KERN_FAILURE; /* Already set */
	3976	}
	3977	}
	3978
	3979	extern boolean_t memorystatus_manual_testing_on;
	3980	extern unsigned int memorystatus_level;
	3981
	3982
	3983	#if VM_PRESSURE_EVENTS
	3984
	3985	boolean_t vm_pressure_events_enabled = FALSE;
	3986
	3987	void
	3988	vm_pressure_response(void)
	3989	{
	3990
	3991	vm_pressure_level_t old_level = kVMPressureNormal;
	3992	int new_level = -1;
	3993
	3994	uint64_t available_memory = 0;
	3995
	3996	if (vm_pressure_events_enabled == FALSE)
	3997	return;
	3998
	3999
	4000	available_memory = (((uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY) * 100);
	4001
	4002
	4003	memorystatus_level = (unsigned int) (available_memory / atop_64(max_mem));
	4004
	4005	if (memorystatus_manual_testing_on) {
	4006	return;
	4007	}
	4008
	4009	old_level = memorystatus_vm_pressure_level;
	4010
	4011	switch (memorystatus_vm_pressure_level) {
	4012
	4013	case kVMPressureNormal:
	4014	{
	4015	if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
	4016	new_level = kVMPressureCritical;
	4017	} else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
	4018	new_level = kVMPressureWarning;
	4019	}
	4020	break;
	4021	}
	4022
	4023	case kVMPressureWarning:
	4024	case kVMPressureUrgent:
	4025	{
	4026	if (VM_PRESSURE_WARNING_TO_NORMAL()) {
	4027	new_level = kVMPressureNormal;
	4028	} else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
	4029	new_level = kVMPressureCritical;
	4030	}
	4031	break;
	4032	}
	4033
	4034	case kVMPressureCritical:
	4035	{
	4036	if (VM_PRESSURE_WARNING_TO_NORMAL()) {
	4037	new_level = kVMPressureNormal;
	4038	} else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
	4039	new_level = kVMPressureWarning;
	4040	}
	4041	break;
	4042	}
	4043
	4044	default:
	4045	return;
	4046	}
	4047
	4048	if (new_level != -1) {
	4049	memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level;
	4050
	4051	if ((memorystatus_vm_pressure_level != kVMPressureNormal) \|\| (old_level != new_level)) {
	4052	if (vm_pressure_thread_running == FALSE) {
	4053	thread_wakeup(&vm_pressure_thread);
	4054	}
	4055
	4056	if (old_level != new_level) {
	4057	thread_wakeup(&vm_pressure_changed);
	4058	}
	4059	}
	4060	}
	4061
	4062	}
	4063	#endif /* VM_PRESSURE_EVENTS */
	4064
	4065	kern_return_t
	4066	mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) {
	4067
	4068	#if !VM_PRESSURE_EVENTS
	4069
	4070	return KERN_FAILURE;
	4071
	4072	#else /* VM_PRESSURE_EVENTS */
	4073
	4074	kern_return_t kr = KERN_SUCCESS;
	4075
	4076	if (pressure_level != NULL) {
	4077
	4078	vm_pressure_level_t old_level = memorystatus_vm_pressure_level;
	4079
	4080	if (wait_for_pressure == TRUE) {
	4081	wait_result_t wr = 0;
	4082
	4083	while (old_level == *pressure_level) {
	4084	wr = assert_wait((event_t) &vm_pressure_changed,
	4085	THREAD_INTERRUPTIBLE);
	4086	if (wr == THREAD_WAITING) {
	4087	wr = thread_block(THREAD_CONTINUE_NULL);
	4088	}
	4089	if (wr == THREAD_INTERRUPTED) {
	4090	return KERN_ABORTED;
	4091	}
	4092	if (wr == THREAD_AWAKENED) {
	4093
	4094	old_level = memorystatus_vm_pressure_level;
	4095
	4096	if (old_level != *pressure_level) {
	4097	break;
	4098	}
	4099	}
	4100	}
	4101	}
	4102
	4103	*pressure_level = old_level;
	4104	kr = KERN_SUCCESS;
	4105	} else {
	4106	kr = KERN_INVALID_ARGUMENT;
	4107	}
	4108
	4109	return kr;
	4110	#endif /* VM_PRESSURE_EVENTS */
	4111	}
	4112
	4113	#if VM_PRESSURE_EVENTS
	4114	void
	4115	vm_pressure_thread(void) {
	4116	static boolean_t thread_initialized = FALSE;
	4117
	4118	if (thread_initialized == TRUE) {
	4119	vm_pressure_thread_running = TRUE;
	4120	consider_vm_pressure_events();
	4121	vm_pressure_thread_running = FALSE;
	4122	}
	4123
	4124	thread_initialized = TRUE;
	4125	assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT);
	4126	thread_block((thread_continue_t)vm_pressure_thread);
	4127	}
	4128	#endif /* VM_PRESSURE_EVENTS */
	4129
	4130
	4131	uint32_t vm_pageout_considered_page_last = 0;
	4132
	4133	/*
	4134	* called once per-second via "compute_averages"
	4135	*/
	4136	void
	4137	compute_pageout_gc_throttle()
	4138	{
	4139	if (vm_pageout_considered_page != vm_pageout_considered_page_last) {
	4140
	4141	vm_pageout_considered_page_last = vm_pageout_considered_page;
	4142
	4143	thread_wakeup((event_t) &vm_pageout_garbage_collect);
	4144	}
	4145	}
	4146
	4147
	4148	static void
	4149	vm_pageout_garbage_collect(int collect)
	4150	{
	4151
	4152	if (collect) {
	4153	boolean_t buf_large_zfree = FALSE;
	4154	boolean_t first_try = TRUE;
	4155
	4156	stack_collect();
	4157
	4158	consider_machine_collect();
	4159	m_drain();
	4160
	4161	do {
	4162	if (consider_buffer_cache_collect != NULL) {
	4163	buf_large_zfree = (*consider_buffer_cache_collect)(0);
	4164	}
	4165	if (first_try == TRUE \|\| buf_large_zfree == TRUE) {
	4166	/*
	4167	* consider_zone_gc should be last, because the other operations
	4168	* might return memory to zones.
	4169	*/
	4170	consider_zone_gc(buf_large_zfree);
	4171	}
	4172	first_try = FALSE;
	4173
	4174	} while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
	4175
	4176	consider_machine_adjust();
	4177	}
	4178	assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
	4179
	4180	thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
	4181	/NOTREACHED/
	4182	}
	4183
	4184
	4185	void vm_pageout_reinit_tuneables(void);
	4186
	4187	void
	4188	vm_pageout_reinit_tuneables(void)
	4189	{
	4190
	4191	vm_compressor_minorcompact_threshold_divisor = 18;
	4192	vm_compressor_majorcompact_threshold_divisor = 22;
	4193	vm_compressor_unthrottle_threshold_divisor = 32;
	4194	}
	4195
	4196
	4197	#if VM_PAGE_BUCKETS_CHECK
	4198	#if VM_PAGE_FAKE_BUCKETS
	4199	extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
	4200	#endif /* VM_PAGE_FAKE_BUCKETS */
	4201	#endif /* VM_PAGE_BUCKETS_CHECK */
	4202
	4203	#define FBDP_TEST_COLLAPSE_COMPRESSOR 0
	4204	#if FBDP_TEST_COLLAPSE_COMPRESSOR
	4205	extern boolean_t vm_object_collapse_compressor_allowed;
	4206	#include <IOKit/IOLib.h>
	4207	#endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */
	4208
	4209	#define FBDP_TEST_WIRE_AND_EXTRACT 0
	4210	#if FBDP_TEST_WIRE_AND_EXTRACT
	4211	extern ledger_template_t task_ledger_template;
	4212	#include <mach/mach_vm.h>
	4213	extern ppnum_t vm_map_get_phys_page(vm_map_t map,
	4214	vm_offset_t offset);
	4215	#endif /* FBDP_TEST_WIRE_AND_EXTRACT */
	4216
	4217
	4218	void
	4219	vm_set_restrictions()
	4220	{
	4221	host_basic_info_data_t hinfo;
	4222	mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
	4223
	4224	#define BSD_HOST 1
	4225	host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
	4226
	4227	assert(hinfo.max_cpus > 0);
	4228
	4229	if (hinfo.max_cpus <= 3) {
	4230	/*
	4231	* on systems with a limited number of CPUS, bind the
	4232	* 4 major threads that can free memory and that tend to use
	4233	* a fair bit of CPU under pressured conditions to a single processor.
	4234	* This insures that these threads don't hog all of the available CPUs
	4235	* (important for camera launch), while allowing them to run independently
	4236	* w/r to locks... the 4 threads are
	4237	* vm_pageout_scan, vm_pageout_iothread_internal (compressor),
	4238	* vm_compressor_swap_trigger_thread (minor and major compactions),
	4239	* memorystatus_thread (jetsams).
	4240	*
	4241	* the first time the thread is run, it is responsible for checking the
	4242	* state of vm_restricted_to_single_processor, and if TRUE it calls
	4243	* thread_bind_master... someday this should be replaced with a group
	4244	* scheduling mechanism and KPI.
	4245	*/
	4246	vm_restricted_to_single_processor = TRUE;
	4247	}
	4248	}
	4249
	4250
	4251	void
	4252	vm_pageout(void)
	4253	{
	4254	thread_t self = current_thread();
	4255	thread_t thread;
	4256	kern_return_t result;
	4257	spl_t s;
	4258
	4259	/*
	4260	* Set thread privileges.
	4261	*/
	4262	s = splsched();
	4263
	4264	thread_lock(self);
	4265	self->options \|= TH_OPT_VMPRIV;
	4266	sched_set_thread_base_priority(self, BASEPRI_PREEMPT - 1);
	4267	thread_unlock(self);
	4268
	4269	if (!self->reserved_stack)
	4270	self->reserved_stack = self->kernel_stack;
	4271
	4272	if (vm_restricted_to_single_processor == TRUE)
	4273	thread_vm_bind_group_add();
	4274
	4275	splx(s);
	4276
	4277	/*
	4278	* Initialize some paging parameters.
	4279	*/
	4280
	4281	if (vm_pageout_swap_wait == 0)
	4282	vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT;
	4283
	4284	if (vm_pageout_idle_wait == 0)
	4285	vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
	4286
	4287	if (vm_pageout_burst_wait == 0)
	4288	vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
	4289
	4290	if (vm_pageout_empty_wait == 0)
	4291	vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
	4292
	4293	if (vm_pageout_deadlock_wait == 0)
	4294	vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
	4295
	4296	if (vm_pageout_deadlock_relief == 0)
	4297	vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
	4298
	4299	if (vm_pageout_inactive_relief == 0)
	4300	vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
	4301
	4302	if (vm_pageout_burst_active_throttle == 0)
	4303	vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
	4304
	4305	if (vm_pageout_burst_inactive_throttle == 0)
	4306	vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
	4307
	4308	/*
	4309	* Set kernel task to low backing store privileged
	4310	* status
	4311	*/
	4312	task_lock(kernel_task);
	4313	kernel_task->priv_flags \|= VM_BACKING_STORE_PRIV;
	4314	task_unlock(kernel_task);
	4315
	4316	vm_page_free_count_init = vm_page_free_count;
	4317
	4318	/*
	4319	* even if we've already called vm_page_free_reserve
	4320	* call it again here to insure that the targets are
	4321	* accurately calculated (it uses vm_page_free_count_init)
	4322	* calling it with an arg of 0 will not change the reserve
	4323	* but will re-calculate free_min and free_target
	4324	*/
	4325	if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
	4326	vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
	4327	} else
	4328	vm_page_free_reserve(0);
	4329
	4330
	4331	queue_init(&vm_pageout_queue_external.pgo_pending);
	4332	vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
	4333	vm_pageout_queue_external.pgo_laundry = 0;
	4334	vm_pageout_queue_external.pgo_idle = FALSE;
	4335	vm_pageout_queue_external.pgo_busy = FALSE;
	4336	vm_pageout_queue_external.pgo_throttled = FALSE;
	4337	vm_pageout_queue_external.pgo_draining = FALSE;
	4338	vm_pageout_queue_external.pgo_lowpriority = FALSE;
	4339	vm_pageout_queue_external.pgo_tid = -1;
	4340	vm_pageout_queue_external.pgo_inited = FALSE;
	4341
	4342	queue_init(&vm_pageout_queue_internal.pgo_pending);
	4343	vm_pageout_queue_internal.pgo_maxlaundry = 0;
	4344	vm_pageout_queue_internal.pgo_laundry = 0;
	4345	vm_pageout_queue_internal.pgo_idle = FALSE;
	4346	vm_pageout_queue_internal.pgo_busy = FALSE;
	4347	vm_pageout_queue_internal.pgo_throttled = FALSE;
	4348	vm_pageout_queue_internal.pgo_draining = FALSE;
	4349	vm_pageout_queue_internal.pgo_lowpriority = FALSE;
	4350	vm_pageout_queue_internal.pgo_tid = -1;
	4351	vm_pageout_queue_internal.pgo_inited = FALSE;
	4352
	4353	/* internal pageout thread started when default pager registered first time */
	4354	/* external pageout and garbage collection threads started here */
	4355
	4356	result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
	4357	BASEPRI_PREEMPT - 1,
	4358	&vm_pageout_external_iothread);
	4359	if (result != KERN_SUCCESS)
	4360	panic("vm_pageout_iothread_external: create failed");
	4361
	4362	thread_deallocate(vm_pageout_external_iothread);
	4363
	4364	result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
	4365	BASEPRI_DEFAULT,
	4366	&thread);
	4367	if (result != KERN_SUCCESS)
	4368	panic("vm_pageout_garbage_collect: create failed");
	4369
	4370	thread_deallocate(thread);
	4371
	4372	#if VM_PRESSURE_EVENTS
	4373	result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL,
	4374	BASEPRI_DEFAULT,
	4375	&thread);
	4376
	4377	if (result != KERN_SUCCESS)
	4378	panic("vm_pressure_thread: create failed");
	4379
	4380	thread_deallocate(thread);
	4381	#endif
	4382
	4383	vm_object_reaper_init();
	4384
	4385	if (COMPRESSED_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
	4386	vm_compressor_pager_init();
	4387
	4388	#if VM_PRESSURE_EVENTS
	4389	vm_pressure_events_enabled = TRUE;
	4390	#endif /* VM_PRESSURE_EVENTS */
	4391
	4392	#if CONFIG_PHANTOM_CACHE
	4393	vm_phantom_cache_init();
	4394	#endif
	4395	#if VM_PAGE_BUCKETS_CHECK
	4396	#if VM_PAGE_FAKE_BUCKETS
	4397	printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
	4398	(uint64_t) vm_page_fake_buckets_start,
	4399	(uint64_t) vm_page_fake_buckets_end);
	4400	pmap_protect(kernel_pmap,
	4401	vm_page_fake_buckets_start,
	4402	vm_page_fake_buckets_end,
	4403	VM_PROT_READ);
	4404	// (char ) vm_page_fake_buckets_start = 'x'; /* panic! */
	4405	#endif /* VM_PAGE_FAKE_BUCKETS */
	4406	#endif /* VM_PAGE_BUCKETS_CHECK */
	4407
	4408	#if VM_OBJECT_TRACKING
	4409	vm_object_tracking_init();
	4410	#endif /* VM_OBJECT_TRACKING */
	4411
	4412
	4413	#if FBDP_TEST_COLLAPSE_COMPRESSOR
	4414	vm_object_size_t backing_size, top_size;
	4415	vm_object_t backing_object, top_object;
	4416	vm_map_offset_t backing_offset, top_offset;
	4417	unsigned char backing_address, top_address;
	4418	kern_return_t kr;
	4419
	4420	printf("FBDP_TEST_COLLAPSE_COMPRESSOR:\n");
	4421
	4422	/* create backing object */
	4423	backing_size = 15 * PAGE_SIZE;
	4424	backing_object = vm_object_allocate(backing_size);
	4425	assert(backing_object != VM_OBJECT_NULL);
	4426	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n",
	4427	backing_object);
	4428	/* map backing object */
	4429	backing_offset = 0;
	4430	kr = vm_map_enter(kernel_map, &backing_offset, backing_size, 0,
	4431	VM_FLAGS_ANYWHERE, backing_object, 0, FALSE,
	4432	VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
	4433	assert(kr == KERN_SUCCESS);
	4434	backing_address = (unsigned char *) backing_offset;
	4435	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
	4436	"mapped backing object %p at 0x%llx\n",
	4437	backing_object, (uint64_t) backing_offset);
	4438	/* populate with pages to be compressed in backing object */
	4439	backing_address[0x1*PAGE_SIZE] = 0xB1;
	4440	backing_address[0x4*PAGE_SIZE] = 0xB4;
	4441	backing_address[0x7*PAGE_SIZE] = 0xB7;
	4442	backing_address[0xa*PAGE_SIZE] = 0xBA;
	4443	backing_address[0xd*PAGE_SIZE] = 0xBD;
	4444	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
	4445	"populated pages to be compressed in "
	4446	"backing_object %p\n", backing_object);
	4447	/* compress backing object */
	4448	vm_object_pageout(backing_object);
	4449	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n",
	4450	backing_object);
	4451	/* wait for all the pages to be gone */
	4452	while ((volatile int )&backing_object->resident_page_count != 0)
	4453	IODelay(10);
	4454	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n",
	4455	backing_object);
	4456	/* populate with pages to be resident in backing object */
	4457	backing_address[0x0*PAGE_SIZE] = 0xB0;
	4458	backing_address[0x3*PAGE_SIZE] = 0xB3;
	4459	backing_address[0x6*PAGE_SIZE] = 0xB6;
	4460	backing_address[0x9*PAGE_SIZE] = 0xB9;
	4461	backing_address[0xc*PAGE_SIZE] = 0xBC;
	4462	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
	4463	"populated pages to be resident in "
	4464	"backing_object %p\n", backing_object);
	4465	/* leave the other pages absent */
	4466	/* mess with the paging_offset of the backing_object */
	4467	assert(backing_object->paging_offset == 0);
	4468	backing_object->paging_offset = 0x3000;
	4469
	4470	/* create top object */
	4471	top_size = 9 * PAGE_SIZE;
	4472	top_object = vm_object_allocate(top_size);
	4473	assert(top_object != VM_OBJECT_NULL);
	4474	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created top object %p\n",
	4475	top_object);
	4476	/* map top object */
	4477	top_offset = 0;
	4478	kr = vm_map_enter(kernel_map, &top_offset, top_size, 0,
	4479	VM_FLAGS_ANYWHERE, top_object, 0, FALSE,
	4480	VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
	4481	assert(kr == KERN_SUCCESS);
	4482	top_address = (unsigned char *) top_offset;
	4483	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
	4484	"mapped top object %p at 0x%llx\n",
	4485	top_object, (uint64_t) top_offset);
	4486	/* populate with pages to be compressed in top object */
	4487	top_address[0x3*PAGE_SIZE] = 0xA3;
	4488	top_address[0x4*PAGE_SIZE] = 0xA4;
	4489	top_address[0x5*PAGE_SIZE] = 0xA5;
	4490	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
	4491	"populated pages to be compressed in "
	4492	"top_object %p\n", top_object);
	4493	/* compress top object */
	4494	vm_object_pageout(top_object);
	4495	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n",
	4496	top_object);
	4497	/* wait for all the pages to be gone */
	4498	while (top_object->resident_page_count != 0);
	4499	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n",
	4500	top_object);
	4501	/* populate with pages to be resident in top object */
	4502	top_address[0x0*PAGE_SIZE] = 0xA0;
	4503	top_address[0x1*PAGE_SIZE] = 0xA1;
	4504	top_address[0x2*PAGE_SIZE] = 0xA2;
	4505	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
	4506	"populated pages to be resident in "
	4507	"top_object %p\n", top_object);
	4508	/* leave the other pages absent */
	4509
	4510	/* link the 2 objects */
	4511	vm_object_reference(backing_object);
	4512	top_object->shadow = backing_object;
	4513	top_object->vo_shadow_offset = 0x3000;
	4514	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n",
	4515	top_object, backing_object);
	4516
	4517	/* unmap backing object */
	4518	vm_map_remove(kernel_map,
	4519	backing_offset,
	4520	backing_offset + backing_size,
	4521	0);
	4522	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
	4523	"unmapped backing_object %p [0x%llx:0x%llx]\n",
	4524	backing_object,
	4525	(uint64_t) backing_offset,
	4526	(uint64_t) (backing_offset + backing_size));
	4527
	4528	/* collapse */
	4529	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object);
	4530	vm_object_lock(top_object);
	4531	vm_object_collapse(top_object, 0, FALSE);
	4532	vm_object_unlock(top_object);
	4533	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object);
	4534
	4535	/* did it work? */
	4536	if (top_object->shadow != VM_OBJECT_NULL) {
	4537	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: not collapsed\n");
	4538	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
	4539	if (vm_object_collapse_compressor_allowed) {
	4540	panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
	4541	}
	4542	} else {
	4543	/* check the contents of the mapping */
	4544	unsigned char expect[9] =
	4545	{ 0xA0, 0xA1, 0xA2, /* resident in top */
	4546	0xA3, 0xA4, 0xA5, /* compressed in top */
	4547	0xB9, /* resident in backing + shadow_offset */
	4548	0xBD, /* compressed in backing + shadow_offset + paging_offset */
	4549	0x00 }; /* absent in both */
	4550	unsigned char actual[9];
	4551	unsigned int i, errors;
	4552
	4553	errors = 0;
	4554	for (i = 0; i < sizeof (actual); i++) {
	4555	actual[i] = (unsigned char) top_address[i*PAGE_SIZE];
	4556	if (actual[i] != expect[i]) {
	4557	errors++;
	4558	}
	4559	}
	4560	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
	4561	"actual [%x %x %x %x %x %x %x %x %x] "
	4562	"expect [%x %x %x %x %x %x %x %x %x] "
	4563	"%d errors\n",
	4564	actual[0], actual[1], actual[2], actual[3],
	4565	actual[4], actual[5], actual[6], actual[7],
	4566	actual[8],
	4567	expect[0], expect[1], expect[2], expect[3],
	4568	expect[4], expect[5], expect[6], expect[7],
	4569	expect[8],
	4570	errors);
	4571	if (errors) {
	4572	panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
	4573	} else {
	4574	printf("FBDP_TEST_COLLAPSE_COMPRESSOR: PASS\n");
	4575	}
	4576	}
	4577	#endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */
	4578
	4579	#if FBDP_TEST_WIRE_AND_EXTRACT
	4580	ledger_t ledger;
	4581	vm_map_t user_map, wire_map;
	4582	mach_vm_address_t user_addr, wire_addr;
	4583	mach_vm_size_t user_size, wire_size;
	4584	mach_vm_offset_t cur_offset;
	4585	vm_prot_t cur_prot, max_prot;
	4586	ppnum_t user_ppnum, wire_ppnum;
	4587	kern_return_t kr;
	4588
	4589	ledger = ledger_instantiate(task_ledger_template,
	4590	LEDGER_CREATE_ACTIVE_ENTRIES);
	4591	user_map = vm_map_create(pmap_create(ledger, 0, PMAP_CREATE_64BIT),
	4592	0x100000000ULL,
	4593	0x200000000ULL,
	4594	TRUE);
	4595	wire_map = vm_map_create(NULL,
	4596	0x100000000ULL,
	4597	0x200000000ULL,
	4598	TRUE);
	4599	user_addr = 0;
	4600	user_size = 0x10000;
	4601	kr = mach_vm_allocate(user_map,
	4602	&user_addr,
	4603	user_size,
	4604	VM_FLAGS_ANYWHERE);
	4605	assert(kr == KERN_SUCCESS);
	4606	wire_addr = 0;
	4607	wire_size = user_size;
	4608	kr = mach_vm_remap(wire_map,
	4609	&wire_addr,
	4610	wire_size,
	4611	0,
	4612	VM_FLAGS_ANYWHERE,
	4613	user_map,
	4614	user_addr,
	4615	FALSE,
	4616	&cur_prot,
	4617	&max_prot,
	4618	VM_INHERIT_NONE);
	4619	assert(kr == KERN_SUCCESS);
	4620	for (cur_offset = 0;
	4621	cur_offset < wire_size;
	4622	cur_offset += PAGE_SIZE) {
	4623	kr = vm_map_wire_and_extract(wire_map,
	4624	wire_addr + cur_offset,
	4625	VM_PROT_DEFAULT \| VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK)),
	4626	TRUE,
	4627	&wire_ppnum);
	4628	assert(kr == KERN_SUCCESS);
	4629	user_ppnum = vm_map_get_phys_page(user_map,
	4630	user_addr + cur_offset);
	4631	printf("FBDP_TEST_WIRE_AND_EXTRACT: kr=0x%x "
	4632	"user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
	4633	kr,
	4634	user_map, user_addr + cur_offset, user_ppnum,
	4635	wire_map, wire_addr + cur_offset, wire_ppnum);
	4636	if (kr != KERN_SUCCESS \|\|
	4637	wire_ppnum == 0 \|\|
	4638	wire_ppnum != user_ppnum) {
	4639	panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n");
	4640	}
	4641	}
	4642	cur_offset -= PAGE_SIZE;
	4643	kr = vm_map_wire_and_extract(wire_map,
	4644	wire_addr + cur_offset,
	4645	VM_PROT_DEFAULT,
	4646	TRUE,
	4647	&wire_ppnum);
	4648	assert(kr == KERN_SUCCESS);
	4649	printf("FBDP_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x "
	4650	"user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
	4651	kr,
	4652	user_map, user_addr + cur_offset, user_ppnum,
	4653	wire_map, wire_addr + cur_offset, wire_ppnum);
	4654	if (kr != KERN_SUCCESS \|\|
	4655	wire_ppnum == 0 \|\|
	4656	wire_ppnum != user_ppnum) {
	4657	panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n");
	4658	}
	4659
	4660	printf("FBDP_TEST_WIRE_AND_EXTRACT: PASS\n");
	4661	#endif /* FBDP_TEST_WIRE_AND_EXTRACT */
	4662
	4663	vm_pageout_continue();
	4664
	4665	/*
	4666	* Unreached code!
	4667	*
	4668	* The vm_pageout_continue() call above never returns, so the code below is never
	4669	* executed. We take advantage of this to declare several DTrace VM related probe
	4670	* points that our kernel doesn't have an analog for. These are probe points that
	4671	* exist in Solaris and are in the DTrace documentation, so people may have written
	4672	* scripts that use them. Declaring the probe points here means their scripts will
	4673	* compile and execute which we want for portability of the scripts, but since this
	4674	* section of code is never reached, the probe points will simply never fire. Yes,
	4675	* this is basically a hack. The problem is the DTrace probe points were chosen with
	4676	* Solaris specific VM events in mind, not portability to different VM implementations.
	4677	*/
	4678
	4679	DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
	4680	DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
	4681	DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
	4682	DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
	4683	DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
	4684	DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
	4685	DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
	4686	/NOTREACHED/
	4687	}
	4688
	4689
	4690
	4691	int vm_compressor_thread_count = 2;
	4692
	4693	kern_return_t
	4694	vm_pageout_internal_start(void)
	4695	{
	4696	kern_return_t result;
	4697	int i;
	4698	host_basic_info_data_t hinfo;
	4699	int thread_count;
	4700
	4701
	4702	if (COMPRESSED_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
	4703	mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
	4704	#define BSD_HOST 1
	4705	host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
	4706
	4707	assert(hinfo.max_cpus > 0);
	4708
	4709	if (vm_compressor_thread_count >= hinfo.max_cpus)
	4710	vm_compressor_thread_count = hinfo.max_cpus - 1;
	4711	if (vm_compressor_thread_count <= 0)
	4712	vm_compressor_thread_count = 1;
	4713	else if (vm_compressor_thread_count > MAX_COMPRESSOR_THREAD_COUNT)
	4714	vm_compressor_thread_count = MAX_COMPRESSOR_THREAD_COUNT;
	4715
	4716	if (vm_compressor_immediate_preferred == TRUE) {
	4717	vm_pageout_immediate_chead = NULL;
	4718	vm_pageout_immediate_scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
	4719
	4720	vm_compressor_thread_count = 1;
	4721	}
	4722	thread_count = vm_compressor_thread_count;
	4723
	4724	vm_pageout_queue_internal.pgo_maxlaundry = (vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
	4725	} else {
	4726	vm_compressor_thread_count = 0;
	4727	thread_count = 1;
	4728	vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
	4729	}
	4730
	4731	for (i = 0; i < vm_compressor_thread_count; i++) {
	4732	ciq[i].id = i;
	4733	ciq[i].q = &vm_pageout_queue_internal;
	4734	ciq[i].current_chead = NULL;
	4735	ciq[i].scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
	4736	}
	4737	for (i = 0; i < thread_count; i++) {
	4738	result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i], BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread);
	4739
	4740	if (result == KERN_SUCCESS)
	4741	thread_deallocate(vm_pageout_internal_iothread);
	4742	else
	4743	break;
	4744	}
	4745	return result;
	4746	}
	4747
	4748	#if CONFIG_IOSCHED
	4749	/*
	4750	* To support I/O Expedite for compressed files we mark the upls with special flags.
	4751	* The way decmpfs works is that we create a big upl which marks all the pages needed to
	4752	* represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
	4753	* then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
	4754	* being held in the big original UPL. We mark each of these smaller UPLs with the flag
	4755	* UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
	4756	* decmp_io_upl field (in the upl structure). This link is protected in the forward direction
	4757	* by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
	4758	* unless the real I/O upl is being destroyed).
	4759	*/
	4760
	4761
	4762	static void
	4763	upl_set_decmp_info(upl_t upl, upl_t src_upl)
	4764	{
	4765	assert((src_upl->flags & UPL_DECMP_REQ) != 0);
	4766
	4767	upl_lock(src_upl);
	4768	if (src_upl->decmp_io_upl) {
	4769	/*
	4770	* If there is already an alive real I/O UPL, ignore this new UPL.
	4771	* This case should rarely happen and even if it does, it just means
	4772	* that we might issue a spurious expedite which the driver is expected
	4773	* to handle.
	4774	*/
	4775	upl_unlock(src_upl);
	4776	return;
	4777	}
	4778	src_upl->decmp_io_upl = (void *)upl;
	4779	src_upl->ref_count++;
	4780
	4781	upl->flags \|= UPL_DECMP_REAL_IO;
	4782	upl->decmp_io_upl = (void *)src_upl;
	4783	upl_unlock(src_upl);
	4784	}
	4785	#endif /* CONFIG_IOSCHED */
	4786
	4787	#if UPL_DEBUG
	4788	int upl_debug_enabled = 1;
	4789	#else
	4790	int upl_debug_enabled = 0;
	4791	#endif
	4792
	4793	static upl_t
	4794	upl_create(int type, int flags, upl_size_t size)
	4795	{
	4796	upl_t upl;
	4797	vm_size_t page_field_size = 0;
	4798	int upl_flags = 0;
	4799	vm_size_t upl_size = sizeof(struct upl);
	4800
	4801	size = round_page_32(size);
	4802
	4803	if (type & UPL_CREATE_LITE) {
	4804	page_field_size = (atop(size) + 7) >> 3;
	4805	page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
	4806
	4807	upl_flags \|= UPL_LITE;
	4808	}
	4809	if (type & UPL_CREATE_INTERNAL) {
	4810	upl_size += sizeof(struct upl_page_info) * atop(size);
	4811
	4812	upl_flags \|= UPL_INTERNAL;
	4813	}
	4814	upl = (upl_t)kalloc(upl_size + page_field_size);
	4815
	4816	if (page_field_size)
	4817	bzero((char *)upl + upl_size, page_field_size);
	4818
	4819	upl->flags = upl_flags \| flags;
	4820	upl->src_object = NULL;
	4821	upl->kaddr = (vm_offset_t)0;
	4822	upl->size = 0;
	4823	upl->map_object = NULL;
	4824	upl->ref_count = 1;
	4825	upl->ext_ref_count = 0;
	4826	upl->highest_page = 0;
	4827	upl_lock_init(upl);
	4828	upl->vector_upl = NULL;
	4829	upl->associated_upl = NULL;
	4830	#if CONFIG_IOSCHED
	4831	if (type & UPL_CREATE_IO_TRACKING) {
	4832	upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
	4833	}
	4834
	4835	upl->upl_reprio_info = 0;
	4836	upl->decmp_io_upl = 0;
	4837	if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) {
	4838	/* Only support expedite on internal UPLs */
	4839	thread_t curthread = current_thread();
	4840	upl->upl_reprio_info = (uint64_t )kalloc(sizeof(uint64_t) atop(size));
	4841	bzero(upl->upl_reprio_info, (sizeof(uint64_t) * atop(size)));
	4842	upl->flags \|= UPL_EXPEDITE_SUPPORTED;
	4843	if (curthread->decmp_upl != NULL)
	4844	upl_set_decmp_info(upl, curthread->decmp_upl);
	4845	}
	4846	#endif
	4847	#if CONFIG_IOSCHED \|\| UPL_DEBUG
	4848	if ((type & UPL_CREATE_IO_TRACKING) \|\| upl_debug_enabled) {
	4849	upl->upl_creator = current_thread();
	4850	upl->uplq.next = 0;
	4851	upl->uplq.prev = 0;
	4852	upl->flags \|= UPL_TRACKED_BY_OBJECT;
	4853	}
	4854	#endif
	4855
	4856	#if UPL_DEBUG
	4857	upl->ubc_alias1 = 0;
	4858	upl->ubc_alias2 = 0;
	4859
	4860	upl->upl_state = 0;
	4861	upl->upl_commit_index = 0;
	4862	bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records));
	4863
	4864	(void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES);
	4865	#endif /* UPL_DEBUG */
	4866
	4867	return(upl);
	4868	}
	4869
	4870	static void
	4871	upl_destroy(upl_t upl)
	4872	{
	4873	int page_field_size; /* bit field in word size buf */
	4874	int size;
	4875
	4876	if (upl->ext_ref_count) {
	4877	panic("upl(%p) ext_ref_count", upl);
	4878	}
	4879
	4880	#if CONFIG_IOSCHED
	4881	if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) {
	4882	upl_t src_upl;
	4883	src_upl = upl->decmp_io_upl;
	4884	assert((src_upl->flags & UPL_DECMP_REQ) != 0);
	4885	upl_lock(src_upl);
	4886	src_upl->decmp_io_upl = NULL;
	4887	upl_unlock(src_upl);
	4888	upl_deallocate(src_upl);
	4889	}
	4890	#endif /* CONFIG_IOSCHED */
	4891
	4892	#if CONFIG_IOSCHED \|\| UPL_DEBUG
	4893	if ((upl->flags & UPL_TRACKED_BY_OBJECT) && !(upl->flags & UPL_VECTOR)) {
	4894	vm_object_t object;
	4895
	4896	if (upl->flags & UPL_SHADOWED) {
	4897	object = upl->map_object->shadow;
	4898	} else {
	4899	object = upl->map_object;
	4900	}
	4901
	4902	vm_object_lock(object);
	4903	queue_remove(&object->uplq, upl, upl_t, uplq);
	4904	vm_object_activity_end(object);
	4905	vm_object_collapse(object, 0, TRUE);
	4906	vm_object_unlock(object);
	4907	}
	4908	#endif
	4909	/*
	4910	* drop a reference on the map_object whether or
	4911	* not a pageout object is inserted
	4912	*/
	4913	if (upl->flags & UPL_SHADOWED)
	4914	vm_object_deallocate(upl->map_object);
	4915
	4916	if (upl->flags & UPL_DEVICE_MEMORY)
	4917	size = PAGE_SIZE;
	4918	else
	4919	size = upl->size;
	4920	page_field_size = 0;
	4921
	4922	if (upl->flags & UPL_LITE) {
	4923	page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
	4924	page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
	4925	}
	4926	upl_lock_destroy(upl);
	4927	upl->vector_upl = (vector_upl_t) 0xfeedbeef;
	4928
	4929	#if CONFIG_IOSCHED
	4930	if (upl->flags & UPL_EXPEDITE_SUPPORTED)
	4931	kfree(upl->upl_reprio_info, sizeof(uint64_t) * (size/PAGE_SIZE));
	4932	#endif
	4933
	4934	if (upl->flags & UPL_INTERNAL) {
	4935	kfree(upl,
	4936	sizeof(struct upl) +
	4937	(sizeof(struct upl_page_info) * (size/PAGE_SIZE))
	4938	+ page_field_size);
	4939	} else {
	4940	kfree(upl, sizeof(struct upl) + page_field_size);
	4941	}
	4942	}
	4943
	4944	void
	4945	upl_deallocate(upl_t upl)
	4946	{
	4947	upl_lock(upl);
	4948	if (--upl->ref_count == 0) {
	4949	if(vector_upl_is_valid(upl))
	4950	vector_upl_deallocate(upl);
	4951	upl_unlock(upl);
	4952	upl_destroy(upl);
	4953	}
	4954	else
	4955	upl_unlock(upl);
	4956	}
	4957
	4958	#if CONFIG_IOSCHED
	4959	void
	4960	upl_mark_decmp(upl_t upl)
	4961	{
	4962	if (upl->flags & UPL_TRACKED_BY_OBJECT) {
	4963	upl->flags \|= UPL_DECMP_REQ;
	4964	upl->upl_creator->decmp_upl = (void *)upl;
	4965	}
	4966	}
	4967
	4968	void
	4969	upl_unmark_decmp(upl_t upl)
	4970	{
	4971	if(upl && (upl->flags & UPL_DECMP_REQ)) {
	4972	upl->upl_creator->decmp_upl = NULL;
	4973	}
	4974	}
	4975
	4976	#endif /* CONFIG_IOSCHED */
	4977
	4978	#define VM_PAGE_Q_BACKING_UP(q) \
	4979	((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
	4980
	4981	boolean_t must_throttle_writes(void);
	4982
	4983	boolean_t
	4984	must_throttle_writes()
	4985	{
	4986	if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) &&
	4987	vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10)
	4988	return (TRUE);
	4989
	4990	return (FALSE);
	4991	}
	4992
	4993
	4994	#if DEVELOPMENT \|\| DEBUG
	4995	//
	4996	* Statistics about UPL enforcement of copy-on-write obligations.
	4997	*/
	4998	unsigned long upl_cow = 0;
	4999	unsigned long upl_cow_again = 0;
	5000	unsigned long upl_cow_pages = 0;
	5001	unsigned long upl_cow_again_pages = 0;
	5002
	5003	unsigned long iopl_cow = 0;
	5004	unsigned long iopl_cow_pages = 0;
	5005	#endif
	5006
	5007	/*
	5008	* Routine: vm_object_upl_request
	5009	* Purpose:
	5010	* Cause the population of a portion of a vm_object.
	5011	* Depending on the nature of the request, the pages
	5012	* returned may be contain valid data or be uninitialized.
	5013	* A page list structure, listing the physical pages
	5014	* will be returned upon request.
	5015	* This function is called by the file system or any other
	5016	* supplier of backing store to a pager.
	5017	* IMPORTANT NOTE: The caller must still respect the relationship
	5018	* between the vm_object and its backing memory object. The
	5019	* caller MUST NOT substitute changes in the backing file
	5020	* without first doing a memory_object_lock_request on the
	5021	* target range unless it is know that the pages are not
	5022	* shared with another entity at the pager level.
	5023	* Copy_in_to:
	5024	* if a page list structure is present
	5025	* return the mapped physical pages, where a
	5026	* page is not present, return a non-initialized
	5027	* one. If the no_sync bit is turned on, don't
	5028	* call the pager unlock to synchronize with other
	5029	* possible copies of the page. Leave pages busy
	5030	* in the original object, if a page list structure
	5031	* was specified. When a commit of the page list
	5032	* pages is done, the dirty bit will be set for each one.
	5033	* Copy_out_from:
	5034	* If a page list structure is present, return
	5035	* all mapped pages. Where a page does not exist
	5036	* map a zero filled one. Leave pages busy in
	5037	* the original object. If a page list structure
	5038	* is not specified, this call is a no-op.
	5039	*
	5040	* Note: access of default pager objects has a rather interesting
	5041	* twist. The caller of this routine, presumably the file system
	5042	* page cache handling code, will never actually make a request
	5043	* against a default pager backed object. Only the default
	5044	* pager will make requests on backing store related vm_objects
	5045	* In this way the default pager can maintain the relationship
	5046	* between backing store files (abstract memory objects) and
	5047	* the vm_objects (cache objects), they support.
	5048	*
	5049	*/
	5050
	5051	__private_extern__ kern_return_t
	5052	vm_object_upl_request(
	5053	vm_object_t object,
	5054	vm_object_offset_t offset,
	5055	upl_size_t size,
	5056	upl_t *upl_ptr,
	5057	upl_page_info_array_t user_page_list,
	5058	unsigned int *page_list_count,
	5059	upl_control_flags_t cntrl_flags)
	5060	{
	5061	vm_page_t dst_page = VM_PAGE_NULL;
	5062	vm_object_offset_t dst_offset;
	5063	upl_size_t xfer_size;
	5064	unsigned int size_in_pages;
	5065	boolean_t dirty;
	5066	boolean_t hw_dirty;
	5067	upl_t upl = NULL;
	5068	unsigned int entry;
	5069	#if MACH_CLUSTER_STATS
	5070	boolean_t encountered_lrp = FALSE;
	5071	#endif
	5072	vm_page_t alias_page = NULL;
	5073	int refmod_state = 0;
	5074	wpl_array_t lite_list = NULL;
	5075	vm_object_t last_copy_object;
	5076	struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
	5077	struct vm_page_delayed_work *dwp;
	5078	int dw_count;
	5079	int dw_limit;
	5080	int io_tracking_flag = 0;
	5081
	5082	if (cntrl_flags & ~UPL_VALID_FLAGS) {
	5083	/*
	5084	* For forward compatibility's sake,
	5085	* reject any unknown flag.
	5086	*/
	5087	return KERN_INVALID_VALUE;
	5088	}
	5089	if ( (!object->internal) && (object->paging_offset != 0) )
	5090	panic("vm_object_upl_request: external object with non-zero paging offset\n");
	5091	if (object->phys_contiguous)
	5092	panic("vm_object_upl_request: contiguous object specified\n");
	5093
	5094
	5095	if (size > MAX_UPL_SIZE_BYTES)
	5096	size = MAX_UPL_SIZE_BYTES;
	5097
	5098	if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
	5099	*page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
	5100
	5101	#if CONFIG_IOSCHED \|\| UPL_DEBUG
	5102	if (object->io_tracking \|\| upl_debug_enabled)
	5103	io_tracking_flag \|= UPL_CREATE_IO_TRACKING;
	5104	#endif
	5105	#if CONFIG_IOSCHED
	5106	if (object->io_tracking)
	5107	io_tracking_flag \|= UPL_CREATE_EXPEDITE_SUP;
	5108	#endif
	5109
	5110	if (cntrl_flags & UPL_SET_INTERNAL) {
	5111	if (cntrl_flags & UPL_SET_LITE) {
	5112
	5113	upl = upl_create(UPL_CREATE_INTERNAL \| UPL_CREATE_LITE \| io_tracking_flag, 0, size);
	5114
	5115	user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
	5116	lite_list = (wpl_array_t)
	5117	(((uintptr_t)user_page_list) +
	5118	((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
	5119	if (size == 0) {
	5120	user_page_list = NULL;
	5121	lite_list = NULL;
	5122	}
	5123	} else {
	5124	upl = upl_create(UPL_CREATE_INTERNAL \| io_tracking_flag, 0, size);
	5125
	5126	user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
	5127	if (size == 0) {
	5128	user_page_list = NULL;
	5129	}
	5130	}
	5131	} else {
	5132	if (cntrl_flags & UPL_SET_LITE) {
	5133
	5134	upl = upl_create(UPL_CREATE_EXTERNAL \| UPL_CREATE_LITE \| io_tracking_flag, 0, size);
	5135
	5136	lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
	5137	if (size == 0) {
	5138	lite_list = NULL;
	5139	}
	5140	} else {
	5141	upl = upl_create(UPL_CREATE_EXTERNAL \| io_tracking_flag, 0, size);
	5142	}
	5143	}
	5144	*upl_ptr = upl;
	5145
	5146	if (user_page_list)
	5147	user_page_list[0].device = FALSE;
	5148
	5149	if (cntrl_flags & UPL_SET_LITE) {
	5150	upl->map_object = object;
	5151	} else {
	5152	upl->map_object = vm_object_allocate(size);
	5153	/*
	5154	* No neeed to lock the new object: nobody else knows
	5155	* about it yet, so it's all ours so far.
	5156	*/
	5157	upl->map_object->shadow = object;
	5158	upl->map_object->pageout = TRUE;
	5159	upl->map_object->can_persist = FALSE;
	5160	upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
	5161	upl->map_object->vo_shadow_offset = offset;
	5162	upl->map_object->wimg_bits = object->wimg_bits;
	5163
	5164	VM_PAGE_GRAB_FICTITIOUS(alias_page);
	5165
	5166	upl->flags \|= UPL_SHADOWED;
	5167	}
	5168	/*
	5169	* ENCRYPTED SWAP:
	5170	* Just mark the UPL as "encrypted" here.
	5171	* We'll actually encrypt the pages later,
	5172	* in upl_encrypt(), when the caller has
	5173	* selected which pages need to go to swap.
	5174	*/
	5175	if (cntrl_flags & UPL_ENCRYPT)
	5176	upl->flags \|= UPL_ENCRYPTED;
	5177
	5178	if (cntrl_flags & UPL_FOR_PAGEOUT)
	5179	upl->flags \|= UPL_PAGEOUT;
	5180
	5181	vm_object_lock(object);
	5182	vm_object_activity_begin(object);
	5183
	5184	/*
	5185	* we can lock in the paging_offset once paging_in_progress is set
	5186	*/
	5187	upl->size = size;
	5188	upl->offset = offset + object->paging_offset;
	5189
	5190	#if CONFIG_IOSCHED \|\| UPL_DEBUG
	5191	if (object->io_tracking \|\| upl_debug_enabled) {
	5192	vm_object_activity_begin(object);
	5193	queue_enter(&object->uplq, upl, upl_t, uplq);
	5194	}
	5195	#endif
	5196	if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
	5197	/*
	5198	* Honor copy-on-write obligations
	5199	*
	5200	* The caller is gathering these pages and
	5201	* might modify their contents. We need to
	5202	* make sure that the copy object has its own
	5203	* private copies of these pages before we let
	5204	* the caller modify them.
	5205	*/
	5206	vm_object_update(object,
	5207	offset,
	5208	size,
	5209	NULL,
	5210	NULL,
	5211	FALSE, /* should_return */
	5212	MEMORY_OBJECT_COPY_SYNC,
	5213	VM_PROT_NO_CHANGE);
	5214	#if DEVELOPMENT \|\| DEBUG
	5215	upl_cow++;
	5216	upl_cow_pages += size >> PAGE_SHIFT;
	5217	#endif
	5218	}
	5219	/*
	5220	* remember which copy object we synchronized with
	5221	*/
	5222	last_copy_object = object->copy;
	5223	entry = 0;
	5224
	5225	xfer_size = size;
	5226	dst_offset = offset;
	5227	size_in_pages = size / PAGE_SIZE;
	5228
	5229	dwp = &dw_array[0];
	5230	dw_count = 0;
	5231	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
	5232
	5233	if (vm_page_free_count > (vm_page_free_target + size_in_pages) \|\|
	5234	object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT))
	5235	object->scan_collisions = 0;
	5236
	5237	if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
	5238	boolean_t isSSD = FALSE;
	5239
	5240	vnode_pager_get_isSSD(object->pager, &isSSD);
	5241	vm_object_unlock(object);
	5242
	5243	OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
	5244
	5245	if (isSSD == TRUE)
	5246	delay(1000 * size_in_pages);
	5247	else
	5248	delay(5000 * size_in_pages);
	5249	OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
	5250
	5251	vm_object_lock(object);
	5252	}
	5253
	5254	while (xfer_size) {
	5255
	5256	dwp->dw_mask = 0;
	5257
	5258	if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
	5259	vm_object_unlock(object);
	5260	VM_PAGE_GRAB_FICTITIOUS(alias_page);
	5261	vm_object_lock(object);
	5262	}
	5263	if (cntrl_flags & UPL_COPYOUT_FROM) {
	5264	upl->flags \|= UPL_PAGE_SYNC_DONE;
	5265
	5266	if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) \|\|
	5267	dst_page->fictitious \|\|
	5268	dst_page->absent \|\|
	5269	dst_page->error \|\|
	5270	dst_page->cleaning \|\|
	5271	(VM_PAGE_WIRED(dst_page))) {
	5272
	5273	if (user_page_list)
	5274	user_page_list[entry].phys_addr = 0;
	5275
	5276	goto try_next_page;
	5277	}
	5278	/*
	5279	* grab this up front...
	5280	* a high percentange of the time we're going to
	5281	* need the hardware modification state a bit later
	5282	* anyway... so we can eliminate an extra call into
	5283	* the pmap layer by grabbing it here and recording it
	5284	*/
	5285	if (dst_page->pmapped)
	5286	refmod_state = pmap_get_refmod(dst_page->phys_page);
	5287	else
	5288	refmod_state = 0;
	5289
	5290	if ( (refmod_state & VM_MEM_REFERENCED) && dst_page->inactive ) {
	5291	/*
	5292	* page is on inactive list and referenced...
	5293	* reactivate it now... this gets it out of the
	5294	* way of vm_pageout_scan which would have to
	5295	* reactivate it upon tripping over it
	5296	*/
	5297	dwp->dw_mask \|= DW_vm_page_activate;
	5298	}
	5299	if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
	5300	/*
	5301	* we're only asking for DIRTY pages to be returned
	5302	*/
	5303	if (dst_page->laundry \|\| !(cntrl_flags & UPL_FOR_PAGEOUT)) {
	5304	/*
	5305	* if we were the page stolen by vm_pageout_scan to be
	5306	* cleaned (as opposed to a buddy being clustered in
	5307	* or this request is not being driven by a PAGEOUT cluster
	5308	* then we only need to check for the page being dirty or
	5309	* precious to decide whether to return it
	5310	*/
	5311	if (dst_page->dirty \|\| dst_page->precious \|\| (refmod_state & VM_MEM_MODIFIED))
	5312	goto check_busy;
	5313	goto dont_return;
	5314	}
	5315	/*
	5316	* this is a request for a PAGEOUT cluster and this page
	5317	* is merely along for the ride as a 'buddy'... not only
	5318	* does it have to be dirty to be returned, but it also
	5319	* can't have been referenced recently...
	5320	*/
	5321	if ( (hibernate_cleaning_in_progress == TRUE \|\|
	5322	(!((refmod_state & VM_MEM_REFERENCED) \|\| dst_page->reference) \|\| dst_page->throttled)) &&
	5323	((refmod_state & VM_MEM_MODIFIED) \|\| dst_page->dirty \|\| dst_page->precious) ) {
	5324	goto check_busy;
	5325	}
	5326	dont_return:
	5327	/*
	5328	* if we reach here, we're not to return
	5329	* the page... go on to the next one
	5330	*/
	5331	if (dst_page->laundry == TRUE) {
	5332	/*
	5333	* if we get here, the page is not 'cleaning' (filtered out above).
	5334	* since it has been referenced, remove it from the laundry
	5335	* so we don't pay the cost of an I/O to clean a page
	5336	* we're just going to take back
	5337	*/
	5338	vm_page_lockspin_queues();
	5339
	5340	vm_pageout_steal_laundry(dst_page, TRUE);
	5341	vm_page_activate(dst_page);
	5342
	5343	vm_page_unlock_queues();
	5344	}
	5345	if (user_page_list)
	5346	user_page_list[entry].phys_addr = 0;
	5347
	5348	goto try_next_page;
	5349	}
	5350	check_busy:
	5351	if (dst_page->busy) {
	5352	if (cntrl_flags & UPL_NOBLOCK) {
	5353	if (user_page_list)
	5354	user_page_list[entry].phys_addr = 0;
	5355
	5356	goto try_next_page;
	5357	}
	5358	/*
	5359	* someone else is playing with the
	5360	* page. We will have to wait.
	5361	*/
	5362	PAGE_SLEEP(object, dst_page, THREAD_UNINT);
	5363
	5364	continue;
	5365	}
	5366	/*
	5367	* ENCRYPTED SWAP:
	5368	* The caller is gathering this page and might
	5369	* access its contents later on. Decrypt the
	5370	* page before adding it to the UPL, so that
	5371	* the caller never sees encrypted data.
	5372	*/
	5373	if (! (cntrl_flags & UPL_ENCRYPT) && dst_page->encrypted) {
	5374	int was_busy;
	5375
	5376	/*
	5377	* save the current state of busy
	5378	* mark page as busy while decrypt
	5379	* is in progress since it will drop
	5380	* the object lock...
	5381	*/
	5382	was_busy = dst_page->busy;
	5383	dst_page->busy = TRUE;
	5384
	5385	vm_page_decrypt(dst_page, 0);
	5386	vm_page_decrypt_for_upl_counter++;
	5387	/*
	5388	* restore to original busy state
	5389	*/
	5390	dst_page->busy = was_busy;
	5391	}
	5392	if (dst_page->pageout_queue == TRUE) {
	5393
	5394	vm_page_lockspin_queues();
	5395
	5396	if (dst_page->pageout_queue == TRUE) {
	5397	/*
	5398	* we've buddied up a page for a clustered pageout
	5399	* that has already been moved to the pageout
	5400	* queue by pageout_scan... we need to remove
	5401	* it from the queue and drop the laundry count
	5402	* on that queue
	5403	*/
	5404	vm_pageout_throttle_up(dst_page);
	5405	}
	5406	vm_page_unlock_queues();
	5407	}
	5408	#if MACH_CLUSTER_STATS
	5409	/*
	5410	* pageout statistics gathering. count
	5411	* all the pages we will page out that
	5412	* were not counted in the initial
	5413	* vm_pageout_scan work
	5414	*/
	5415	if (dst_page->pageout)
	5416	encountered_lrp = TRUE;
	5417	if ((dst_page->dirty \|\| (dst_page->object->internal && dst_page->precious))) {
	5418	if (encountered_lrp)
	5419	CLUSTER_STAT(pages_at_higher_offsets++;)
	5420	else
	5421	CLUSTER_STAT(pages_at_lower_offsets++;)
	5422	}
	5423	#endif
	5424	hw_dirty = refmod_state & VM_MEM_MODIFIED;
	5425	dirty = hw_dirty ? TRUE : dst_page->dirty;
	5426
	5427	if (dst_page->phys_page > upl->highest_page)
	5428	upl->highest_page = dst_page->phys_page;
	5429
	5430	assert (!pmap_is_noencrypt(dst_page->phys_page));
	5431
	5432	if (cntrl_flags & UPL_SET_LITE) {
	5433	unsigned int pg_num;
	5434
	5435	pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
	5436	assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
	5437	lite_list[pg_num>>5] \|= 1 << (pg_num & 31);
	5438
	5439	if (hw_dirty)
	5440	pmap_clear_modify(dst_page->phys_page);
	5441
	5442	/*
	5443	* Mark original page as cleaning
	5444	* in place.
	5445	*/
	5446	dst_page->cleaning = TRUE;
	5447	dst_page->precious = FALSE;
	5448	} else {
	5449	/*
	5450	* use pageclean setup, it is more
	5451	* convenient even for the pageout
	5452	* cases here
	5453	*/
	5454	vm_object_lock(upl->map_object);
	5455	vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
	5456	vm_object_unlock(upl->map_object);
	5457
	5458	alias_page->absent = FALSE;
	5459	alias_page = NULL;
	5460	}
	5461	#if MACH_PAGEMAP
	5462	/*
	5463	* Record that this page has been
	5464	* written out
	5465	*/
	5466	vm_external_state_set(object->existence_map, dst_page->offset);
	5467	#endif /MACH_PAGEMAP/
	5468	if (dirty) {
	5469	SET_PAGE_DIRTY(dst_page, FALSE);
	5470	} else {
	5471	dst_page->dirty = FALSE;
	5472	}
	5473
	5474	if (!dirty)
	5475	dst_page->precious = TRUE;
	5476
	5477	if ( (cntrl_flags & UPL_ENCRYPT) ) {
	5478	/*
	5479	* ENCRYPTED SWAP:
	5480	* We want to deny access to the target page
	5481	* because its contents are about to be
	5482	* encrypted and the user would be very
	5483	* confused to see encrypted data instead
	5484	* of their data.
	5485	* We also set "encrypted_cleaning" to allow
	5486	* vm_pageout_scan() to demote that page
	5487	* from "adjacent/clean-in-place" to
	5488	* "target/clean-and-free" if it bumps into
	5489	* this page during its scanning while we're
	5490	* still processing this cluster.
	5491	*/
	5492	dst_page->busy = TRUE;
	5493	dst_page->encrypted_cleaning = TRUE;
	5494	}
	5495	if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
	5496	if ( !VM_PAGE_WIRED(dst_page))
	5497	dst_page->pageout = TRUE;
	5498	}
	5499	} else {
	5500	if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
	5501	/*
	5502	* Honor copy-on-write obligations
	5503	*
	5504	* The copy object has changed since we
	5505	* last synchronized for copy-on-write.
	5506	* Another copy object might have been
	5507	* inserted while we released the object's
	5508	* lock. Since someone could have seen the
	5509	* original contents of the remaining pages
	5510	* through that new object, we have to
	5511	* synchronize with it again for the remaining
	5512	* pages only. The previous pages are "busy"
	5513	* so they can not be seen through the new
	5514	* mapping. The new mapping will see our
	5515	* upcoming changes for those previous pages,
	5516	* but that's OK since they couldn't see what
	5517	* was there before. It's just a race anyway
	5518	* and there's no guarantee of consistency or
	5519	* atomicity. We just don't want new mappings
	5520	* to see both the before and after pages.
	5521	*/
	5522	if (object->copy != VM_OBJECT_NULL) {
	5523	vm_object_update(
	5524	object,
	5525	dst_offset,/* current offset */
	5526	xfer_size, /* remaining size */
	5527	NULL,
	5528	NULL,
	5529	FALSE, /* should_return */
	5530	MEMORY_OBJECT_COPY_SYNC,
	5531	VM_PROT_NO_CHANGE);
	5532
	5533	#if DEVELOPMENT \|\| DEBUG
	5534	upl_cow_again++;
	5535	upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
	5536	#endif
	5537	}
	5538	/*
	5539	* remember the copy object we synced with
	5540	*/
	5541	last_copy_object = object->copy;
	5542	}
	5543	dst_page = vm_page_lookup(object, dst_offset);
	5544
	5545	if (dst_page != VM_PAGE_NULL) {
	5546
	5547	if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
	5548	/*
	5549	* skip over pages already present in the cache
	5550	*/
	5551	if (user_page_list)
	5552	user_page_list[entry].phys_addr = 0;
	5553
	5554	goto try_next_page;
	5555	}
	5556	if (dst_page->fictitious) {
	5557	panic("need corner case for fictitious page");
	5558	}
	5559
	5560	if (dst_page->busy \|\| dst_page->cleaning) {
	5561	/*
	5562	* someone else is playing with the
	5563	* page. We will have to wait.
	5564	*/
	5565	PAGE_SLEEP(object, dst_page, THREAD_UNINT);
	5566
	5567	continue;
	5568	}
	5569	if (dst_page->laundry) {
	5570	dst_page->pageout = FALSE;
	5571
	5572	vm_pageout_steal_laundry(dst_page, FALSE);
	5573	}
	5574	} else {
	5575	if (object->private) {
	5576	/*
	5577	* This is a nasty wrinkle for users
	5578	* of upl who encounter device or
	5579	* private memory however, it is
	5580	* unavoidable, only a fault can
	5581	* resolve the actual backing
	5582	* physical page by asking the
	5583	* backing device.
	5584	*/
	5585	if (user_page_list)
	5586	user_page_list[entry].phys_addr = 0;
	5587
	5588	goto try_next_page;
	5589	}
	5590	if (object->scan_collisions) {
	5591	/*
	5592	* the pageout_scan thread is trying to steal
	5593	* pages from this object, but has run into our
	5594	* lock... grab 2 pages from the head of the object...
	5595	* the first is freed on behalf of pageout_scan, the
	5596	* 2nd is for our own use... we use vm_object_page_grab
	5597	* in both cases to avoid taking pages from the free
	5598	* list since we are under memory pressure and our
	5599	* lock on this object is getting in the way of
	5600	* relieving it
	5601	*/
	5602	dst_page = vm_object_page_grab(object);
	5603
	5604	if (dst_page != VM_PAGE_NULL)
	5605	vm_page_release(dst_page);
	5606
	5607	dst_page = vm_object_page_grab(object);
	5608	}
	5609	if (dst_page == VM_PAGE_NULL) {
	5610	/*
	5611	* need to allocate a page
	5612	*/
	5613	dst_page = vm_page_grab();
	5614	}
	5615	if (dst_page == VM_PAGE_NULL) {
	5616	if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT \| UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT \| UPL_NOBLOCK)) {
	5617	/*
	5618	* we don't want to stall waiting for pages to come onto the free list
	5619	* while we're already holding absent pages in this UPL
	5620	* the caller will deal with the empty slots
	5621	*/
	5622	if (user_page_list)
	5623	user_page_list[entry].phys_addr = 0;
	5624
	5625	goto try_next_page;
	5626	}
	5627	/*
	5628	* no pages available... wait
	5629	* then try again for the same
	5630	* offset...
	5631	*/
	5632	vm_object_unlock(object);
	5633
	5634	OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
	5635
	5636	VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
	5637
	5638	VM_PAGE_WAIT();
	5639	OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
	5640
	5641	VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
	5642
	5643	vm_object_lock(object);
	5644
	5645	continue;
	5646	}
	5647	vm_page_insert(dst_page, object, dst_offset);
	5648
	5649	dst_page->absent = TRUE;
	5650	dst_page->busy = FALSE;
	5651
	5652	if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
	5653	/*
	5654	* if UPL_RET_ONLY_ABSENT was specified,
	5655	* than we're definitely setting up a
	5656	* upl for a clustered read/pagein
	5657	* operation... mark the pages as clustered
	5658	* so upl_commit_range can put them on the
	5659	* speculative list
	5660	*/
	5661	dst_page->clustered = TRUE;
	5662
	5663	if ( !(cntrl_flags & UPL_FILE_IO))
	5664	VM_STAT_INCR(pageins);
	5665	}
	5666	}
	5667	/*
	5668	* ENCRYPTED SWAP:
	5669	*/
	5670	if (cntrl_flags & UPL_ENCRYPT) {
	5671	/*
	5672	* The page is going to be encrypted when we
	5673	* get it from the pager, so mark it so.
	5674	*/
	5675	dst_page->encrypted = TRUE;
	5676	} else {
	5677	/*
	5678	* Otherwise, the page will not contain
	5679	* encrypted data.
	5680	*/
	5681	dst_page->encrypted = FALSE;
	5682	}
	5683	dst_page->overwriting = TRUE;
	5684
	5685	if (dst_page->pmapped) {
	5686	if ( !(cntrl_flags & UPL_FILE_IO))
	5687	/*
	5688	* eliminate all mappings from the
	5689	* original object and its prodigy
	5690	*/
	5691	refmod_state = pmap_disconnect(dst_page->phys_page);
	5692	else
	5693	refmod_state = pmap_get_refmod(dst_page->phys_page);
	5694	} else
	5695	refmod_state = 0;
	5696
	5697	hw_dirty = refmod_state & VM_MEM_MODIFIED;
	5698	dirty = hw_dirty ? TRUE : dst_page->dirty;
	5699
	5700	if (cntrl_flags & UPL_SET_LITE) {
	5701	unsigned int pg_num;
	5702
	5703	pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
	5704	assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
	5705	lite_list[pg_num>>5] \|= 1 << (pg_num & 31);
	5706
	5707	if (hw_dirty)
	5708	pmap_clear_modify(dst_page->phys_page);
	5709
	5710	/*
	5711	* Mark original page as cleaning
	5712	* in place.
	5713	*/
	5714	dst_page->cleaning = TRUE;
	5715	dst_page->precious = FALSE;
	5716	} else {
	5717	/*
	5718	* use pageclean setup, it is more
	5719	* convenient even for the pageout
	5720	* cases here
	5721	*/
	5722	vm_object_lock(upl->map_object);
	5723	vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
	5724	vm_object_unlock(upl->map_object);
	5725
	5726	alias_page->absent = FALSE;
	5727	alias_page = NULL;
	5728	}
	5729
	5730	if (cntrl_flags & UPL_REQUEST_SET_DIRTY) {
	5731	upl->flags &= ~UPL_CLEAR_DIRTY;
	5732	upl->flags \|= UPL_SET_DIRTY;
	5733	dirty = TRUE;
	5734	upl->flags \|= UPL_SET_DIRTY;
	5735	} else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
	5736	/*
	5737	* clean in place for read implies
	5738	* that a write will be done on all
	5739	* the pages that are dirty before
	5740	* a upl commit is done. The caller
	5741	* is obligated to preserve the
	5742	* contents of all pages marked dirty
	5743	*/
	5744	upl->flags \|= UPL_CLEAR_DIRTY;
	5745	}
	5746	dst_page->dirty = dirty;
	5747
	5748	if (!dirty)
	5749	dst_page->precious = TRUE;
	5750
	5751	if ( !VM_PAGE_WIRED(dst_page)) {
	5752	/*
	5753	* deny access to the target page while
	5754	* it is being worked on
	5755	*/
	5756	dst_page->busy = TRUE;
	5757	} else
	5758	dwp->dw_mask \|= DW_vm_page_wire;
	5759
	5760	/*
	5761	* We might be about to satisfy a fault which has been
	5762	* requested. So no need for the "restart" bit.
	5763	*/
	5764	dst_page->restart = FALSE;
	5765	if (!dst_page->absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
	5766	/*
	5767	* expect the page to be used
	5768	*/
	5769	dwp->dw_mask \|= DW_set_reference;
	5770	}
	5771	if (cntrl_flags & UPL_PRECIOUS) {
	5772	if (dst_page->object->internal) {
	5773	SET_PAGE_DIRTY(dst_page, FALSE);
	5774	dst_page->precious = FALSE;
	5775	} else {
	5776	dst_page->precious = TRUE;
	5777	}
	5778	} else {
	5779	dst_page->precious = FALSE;
	5780	}
	5781	}
	5782	if (dst_page->busy)
	5783	upl->flags \|= UPL_HAS_BUSY;
	5784
	5785	if (dst_page->phys_page > upl->highest_page)
	5786	upl->highest_page = dst_page->phys_page;
	5787	assert (!pmap_is_noencrypt(dst_page->phys_page));
	5788	if (user_page_list) {
	5789	user_page_list[entry].phys_addr = dst_page->phys_page;
	5790	user_page_list[entry].pageout = dst_page->pageout;
	5791	user_page_list[entry].absent = dst_page->absent;
	5792	user_page_list[entry].dirty = dst_page->dirty;
	5793	user_page_list[entry].precious = dst_page->precious;
	5794	user_page_list[entry].device = FALSE;
	5795	user_page_list[entry].needed = FALSE;
	5796	if (dst_page->clustered == TRUE)
	5797	user_page_list[entry].speculative = dst_page->speculative;
	5798	else
	5799	user_page_list[entry].speculative = FALSE;
	5800	user_page_list[entry].cs_validated = dst_page->cs_validated;
	5801	user_page_list[entry].cs_tainted = dst_page->cs_tainted;
	5802	user_page_list[entry].cs_nx = dst_page->cs_nx;
	5803	user_page_list[entry].mark = FALSE;
	5804	}
	5805	/*
	5806	* if UPL_RET_ONLY_ABSENT is set, then
	5807	* we are working with a fresh page and we've
	5808	* just set the clustered flag on it to
	5809	* indicate that it was drug in as part of a
	5810	* speculative cluster... so leave it alone
	5811	*/
	5812	if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
	5813	/*
	5814	* someone is explicitly grabbing this page...
	5815	* update clustered and speculative state
	5816	*
	5817	*/
	5818	if (dst_page->clustered)
	5819	VM_PAGE_CONSUME_CLUSTERED(dst_page);
	5820	}
	5821	try_next_page:
	5822	if (dwp->dw_mask) {
	5823	if (dwp->dw_mask & DW_vm_page_activate)
	5824	VM_STAT_INCR(reactivations);
	5825
	5826	VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
	5827
	5828	if (dw_count >= dw_limit) {
	5829	vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
	5830
	5831	dwp = &dw_array[0];
	5832	dw_count = 0;
	5833	}
	5834	}
	5835	entry++;
	5836	dst_offset += PAGE_SIZE_64;
	5837	xfer_size -= PAGE_SIZE;
	5838	}
	5839	if (dw_count)
	5840	vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
	5841
	5842	if (alias_page != NULL) {
	5843	VM_PAGE_FREE(alias_page);
	5844	}
	5845
	5846	if (page_list_count != NULL) {
	5847	if (upl->flags & UPL_INTERNAL)
	5848	*page_list_count = 0;
	5849	else if (*page_list_count > entry)
	5850	*page_list_count = entry;
	5851	}
	5852	#if UPL_DEBUG
	5853	upl->upl_state = 1;
	5854	#endif
	5855	vm_object_unlock(object);
	5856
	5857	return KERN_SUCCESS;
	5858	}
	5859
	5860	/*
	5861	* Routine: vm_object_super_upl_request
	5862	* Purpose:
	5863	* Cause the population of a portion of a vm_object
	5864	* in much the same way as memory_object_upl_request.
	5865	* Depending on the nature of the request, the pages
	5866	* returned may be contain valid data or be uninitialized.
	5867	* However, the region may be expanded up to the super
	5868	* cluster size provided.
	5869	*/
	5870
	5871	__private_extern__ kern_return_t
	5872	vm_object_super_upl_request(
	5873	vm_object_t object,
	5874	vm_object_offset_t offset,
	5875	upl_size_t size,
	5876	upl_size_t super_cluster,
	5877	upl_t *upl,
	5878	upl_page_info_t *user_page_list,
	5879	unsigned int *page_list_count,
	5880	upl_control_flags_t cntrl_flags)
	5881	{
	5882	if (object->paging_offset > offset \|\| ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR))
	5883	return KERN_FAILURE;
	5884
	5885	assert(object->paging_in_progress);
	5886	offset = offset - object->paging_offset;
	5887
	5888	if (super_cluster > size) {
	5889
	5890	vm_object_offset_t base_offset;
	5891	upl_size_t super_size;
	5892	vm_object_size_t super_size_64;
	5893
	5894	base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
	5895	super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
	5896	super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size;
	5897	super_size = (upl_size_t) super_size_64;
	5898	assert(super_size == super_size_64);
	5899
	5900	if (offset > (base_offset + super_size)) {
	5901	panic("vm_object_super_upl_request: Missed target pageout"
	5902	" %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
	5903	offset, base_offset, super_size, super_cluster,
	5904	size, object->paging_offset);
	5905	}
	5906	/*
	5907	* apparently there is a case where the vm requests a
	5908	* page to be written out who's offset is beyond the
	5909	* object size
	5910	*/
	5911	if ((offset + size) > (base_offset + super_size)) {
	5912	super_size_64 = (offset + size) - base_offset;
	5913	super_size = (upl_size_t) super_size_64;
	5914	assert(super_size == super_size_64);
	5915	}
	5916
	5917	offset = base_offset;
	5918	size = super_size;
	5919	}
	5920	return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags);
	5921	}
	5922
	5923
	5924	kern_return_t
	5925	vm_map_create_upl(
	5926	vm_map_t map,
	5927	vm_map_address_t offset,
	5928	upl_size_t *upl_size,
	5929	upl_t *upl,
	5930	upl_page_info_array_t page_list,
	5931	unsigned int *count,
	5932	upl_control_flags_t *flags)
	5933	{
	5934	vm_map_entry_t entry;
	5935	upl_control_flags_t caller_flags;
	5936	int force_data_sync;
	5937	int sync_cow_data;
	5938	vm_object_t local_object;
	5939	vm_map_offset_t local_offset;
	5940	vm_map_offset_t local_start;
	5941	kern_return_t ret;
	5942
	5943	caller_flags = *flags;
	5944
	5945	if (caller_flags & ~UPL_VALID_FLAGS) {
	5946	/*
	5947	* For forward compatibility's sake,
	5948	* reject any unknown flag.
	5949	*/
	5950	return KERN_INVALID_VALUE;
	5951	}
	5952	force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
	5953	sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
	5954
	5955	if (upl == NULL)
	5956	return KERN_INVALID_ARGUMENT;
	5957
	5958	REDISCOVER_ENTRY:
	5959	vm_map_lock_read(map);
	5960
	5961	if (!vm_map_lookup_entry(map, offset, &entry)) {
	5962	vm_map_unlock_read(map);
	5963	return KERN_FAILURE;
	5964	}
	5965
	5966	if ((entry->vme_end - offset) < *upl_size) {
	5967	*upl_size = (upl_size_t) (entry->vme_end - offset);
	5968	assert(*upl_size == entry->vme_end - offset);
	5969	}
	5970
	5971	if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
	5972	*flags = 0;
	5973
	5974	if (!entry->is_sub_map &&
	5975	VME_OBJECT(entry) != VM_OBJECT_NULL) {
	5976	if (VME_OBJECT(entry)->private)
	5977	*flags = UPL_DEV_MEMORY;
	5978
	5979	if (VME_OBJECT(entry)->phys_contiguous)
	5980	*flags \|= UPL_PHYS_CONTIG;
	5981	}
	5982	vm_map_unlock_read(map);
	5983	return KERN_SUCCESS;
	5984	}
	5985
	5986	if (entry->is_sub_map) {
	5987	vm_map_t submap;
	5988
	5989	submap = VME_SUBMAP(entry);
	5990	local_start = entry->vme_start;
	5991	local_offset = VME_OFFSET(entry);
	5992
	5993	vm_map_reference(submap);
	5994	vm_map_unlock_read(map);
	5995
	5996	ret = vm_map_create_upl(submap,
	5997	local_offset + (offset - local_start),
	5998	upl_size, upl, page_list, count, flags);
	5999	vm_map_deallocate(submap);
	6000
	6001	return ret;
	6002	}
	6003
	6004	if (VME_OBJECT(entry) == VM_OBJECT_NULL \|\|
	6005	!VME_OBJECT(entry)->phys_contiguous) {
	6006	if (*upl_size > MAX_UPL_SIZE_BYTES)
	6007	*upl_size = MAX_UPL_SIZE_BYTES;
	6008	}
	6009
	6010	/*
	6011	* Create an object if necessary.
	6012	*/
	6013	if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
	6014
	6015	if (vm_map_lock_read_to_write(map))
	6016	goto REDISCOVER_ENTRY;
	6017
	6018	VME_OBJECT_SET(entry,
	6019	vm_object_allocate((vm_size_t)
	6020	(entry->vme_end -
	6021	entry->vme_start)));
	6022	VME_OFFSET_SET(entry, 0);
	6023
	6024	vm_map_lock_write_to_read(map);
	6025	}
	6026
	6027	if (!(caller_flags & UPL_COPYOUT_FROM) &&
	6028	!(entry->protection & VM_PROT_WRITE)) {
	6029	vm_map_unlock_read(map);
	6030	return KERN_PROTECTION_FAILURE;
	6031	}
	6032
	6033	local_object = VME_OBJECT(entry);
	6034	assert(local_object != VM_OBJECT_NULL);
	6035
	6036	if (*upl_size != 0 &&
	6037	local_object->vo_size > upl_size && / partial UPL */
	6038	entry->wired_count == 0 && /* No COW for entries that are wired */
	6039	(map->pmap != kernel_pmap) && /* alias checks */
	6040	(vm_map_entry_should_cow_for_true_share(entry) /* case 1 */
	6041	\|\|
	6042	(!entry->needs_copy && /* case 2 */
	6043	local_object->internal &&
	6044	(local_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) &&
	6045	local_object->ref_count > 1))) {
	6046	vm_prot_t prot;
	6047
	6048	/*
	6049	* Case 1:
	6050	* Set up the targeted range for copy-on-write to avoid
	6051	* applying true_share/copy_delay to the entire object.
	6052	*
	6053	* Case 2:
	6054	* This map entry covers only part of an internal
	6055	* object. There could be other map entries covering
	6056	* other areas of this object and some of these map
	6057	* entries could be marked as "needs_copy", which
	6058	* assumes that the object is COPY_SYMMETRIC.
	6059	* To avoid marking this object as COPY_DELAY and
	6060	* "true_share", let's shadow it and mark the new
	6061	* (smaller) object as "true_share" and COPY_DELAY.
	6062	*/
	6063
	6064	if (vm_map_lock_read_to_write(map)) {
	6065	goto REDISCOVER_ENTRY;
	6066	}
	6067	vm_map_lock_assert_exclusive(map);
	6068	assert(VME_OBJECT(entry) == local_object);
	6069
	6070	vm_map_clip_start(map,
	6071	entry,
	6072	vm_map_trunc_page(offset,
	6073	VM_MAP_PAGE_MASK(map)));
	6074	vm_map_clip_end(map,
	6075	entry,
	6076	vm_map_round_page(offset + *upl_size,
	6077	VM_MAP_PAGE_MASK(map)));
	6078	if ((entry->vme_end - offset) < *upl_size) {
	6079	*upl_size = (upl_size_t) (entry->vme_end - offset);
	6080	assert(*upl_size == entry->vme_end - offset);
	6081	}
	6082
	6083	prot = entry->protection & ~VM_PROT_WRITE;
	6084	if (override_nx(map, VME_ALIAS(entry)) && prot)
	6085	prot \|= VM_PROT_EXECUTE;
	6086	vm_object_pmap_protect(local_object,
	6087	VME_OFFSET(entry),
	6088	entry->vme_end - entry->vme_start,
	6089	((entry->is_shared \|\|
	6090	map->mapped_in_other_pmaps)
	6091	? PMAP_NULL
	6092	: map->pmap),
	6093	entry->vme_start,
	6094	prot);
	6095
	6096	assert(entry->wired_count == 0);
	6097
	6098	/*
	6099	* Lock the VM object and re-check its status: if it's mapped
	6100	* in another address space, we could still be racing with
	6101	* another thread holding that other VM map exclusively.
	6102	*/
	6103	vm_object_lock(local_object);
	6104	if (local_object->true_share) {
	6105	/* object is already in proper state: no COW needed */
	6106	assert(local_object->copy_strategy !=
	6107	MEMORY_OBJECT_COPY_SYMMETRIC);
	6108	} else {
	6109	/* not true_share: ask for copy-on-write below */
	6110	assert(local_object->copy_strategy ==
	6111	MEMORY_OBJECT_COPY_SYMMETRIC);
	6112	entry->needs_copy = TRUE;
	6113	}
	6114	vm_object_unlock(local_object);
	6115
	6116	vm_map_lock_write_to_read(map);
	6117	}
	6118
	6119	if (entry->needs_copy) {
	6120	/*
	6121	* Honor copy-on-write for COPY_SYMMETRIC
	6122	* strategy.
	6123	*/
	6124	vm_map_t local_map;
	6125	vm_object_t object;
	6126	vm_object_offset_t new_offset;
	6127	vm_prot_t prot;
	6128	boolean_t wired;
	6129	vm_map_version_t version;
	6130	vm_map_t real_map;
	6131	vm_prot_t fault_type;
	6132
	6133	local_map = map;
	6134
	6135	if (caller_flags & UPL_COPYOUT_FROM) {
	6136	fault_type = VM_PROT_READ \| VM_PROT_COPY;
	6137	vm_counters.create_upl_extra_cow++;
	6138	vm_counters.create_upl_extra_cow_pages +=
	6139	(entry->vme_end - entry->vme_start) / PAGE_SIZE;
	6140	} else {
	6141	fault_type = VM_PROT_WRITE;
	6142	}
	6143	if (vm_map_lookup_locked(&local_map,
	6144	offset, fault_type,
	6145	OBJECT_LOCK_EXCLUSIVE,
	6146	&version, &object,
	6147	&new_offset, &prot, &wired,
	6148	NULL,
	6149	&real_map) != KERN_SUCCESS) {
	6150	if (fault_type == VM_PROT_WRITE) {
	6151	vm_counters.create_upl_lookup_failure_write++;
	6152	} else {
	6153	vm_counters.create_upl_lookup_failure_copy++;
	6154	}
	6155	vm_map_unlock_read(local_map);
	6156	return KERN_FAILURE;
	6157	}
	6158	if (real_map != map)
	6159	vm_map_unlock(real_map);
	6160	vm_map_unlock_read(local_map);
	6161
	6162	vm_object_unlock(object);
	6163
	6164	goto REDISCOVER_ENTRY;
	6165	}
	6166
	6167	if (sync_cow_data &&
	6168	(VME_OBJECT(entry)->shadow \|\|
	6169	VME_OBJECT(entry)->copy)) {
	6170	local_object = VME_OBJECT(entry);
	6171	local_start = entry->vme_start;
	6172	local_offset = VME_OFFSET(entry);
	6173
	6174	vm_object_reference(local_object);
	6175	vm_map_unlock_read(map);
	6176
	6177	if (local_object->shadow && local_object->copy) {
	6178	vm_object_lock_request(local_object->shadow,
	6179	((vm_object_offset_t)
	6180	((offset - local_start) +
	6181	local_offset) +
	6182	local_object->vo_shadow_offset),
	6183	*upl_size, FALSE,
	6184	MEMORY_OBJECT_DATA_SYNC,
	6185	VM_PROT_NO_CHANGE);
	6186	}
	6187	sync_cow_data = FALSE;
	6188	vm_object_deallocate(local_object);
	6189
	6190	goto REDISCOVER_ENTRY;
	6191	}
	6192	if (force_data_sync) {
	6193	local_object = VME_OBJECT(entry);
	6194	local_start = entry->vme_start;
	6195	local_offset = VME_OFFSET(entry);
	6196
	6197	vm_object_reference(local_object);
	6198	vm_map_unlock_read(map);
	6199
	6200	vm_object_lock_request(local_object,
	6201	((vm_object_offset_t)
	6202	((offset - local_start) +
	6203	local_offset)),
	6204	(vm_object_size_t)*upl_size,
	6205	FALSE,
	6206	MEMORY_OBJECT_DATA_SYNC,
	6207	VM_PROT_NO_CHANGE);
	6208
	6209	force_data_sync = FALSE;
	6210	vm_object_deallocate(local_object);
	6211
	6212	goto REDISCOVER_ENTRY;
	6213	}
	6214	if (VME_OBJECT(entry)->private)
	6215	*flags = UPL_DEV_MEMORY;
	6216	else
	6217	*flags = 0;
	6218
	6219	if (VME_OBJECT(entry)->phys_contiguous)
	6220	*flags \|= UPL_PHYS_CONTIG;
	6221
	6222	local_object = VME_OBJECT(entry);
	6223	local_offset = VME_OFFSET(entry);
	6224	local_start = entry->vme_start;
	6225
	6226	vm_object_lock(local_object);
	6227
	6228	/*
	6229	* Ensure that this object is "true_share" and "copy_delay" now,
	6230	* while we're still holding the VM map lock. After we unlock the map,
	6231	* anything could happen to that mapping, including some copy-on-write
	6232	* activity. We need to make sure that the IOPL will point at the
	6233	* same memory as the mapping.
	6234	*/
	6235	if (local_object->true_share) {
	6236	assert(local_object->copy_strategy !=
	6237	MEMORY_OBJECT_COPY_SYMMETRIC);
	6238	} else if (local_object != kernel_object &&
	6239	local_object != compressor_object &&
	6240	!local_object->phys_contiguous) {
	6241	#if VM_OBJECT_TRACKING_OP_TRUESHARE
	6242	if (!local_object->true_share &&
	6243	vm_object_tracking_inited) {
	6244	void *bt[VM_OBJECT_TRACKING_BTDEPTH];
	6245	int num = 0;
	6246	num = OSBacktrace(bt,
	6247	VM_OBJECT_TRACKING_BTDEPTH);
	6248	btlog_add_entry(vm_object_tracking_btlog,
	6249	local_object,
	6250	VM_OBJECT_TRACKING_OP_TRUESHARE,
	6251	bt,
	6252	num);
	6253	}
	6254	#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
	6255	local_object->true_share = TRUE;
	6256	if (local_object->copy_strategy ==
	6257	MEMORY_OBJECT_COPY_SYMMETRIC) {
	6258	local_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
	6259	}
	6260	}
	6261
	6262	vm_object_reference_locked(local_object);
	6263	vm_object_unlock(local_object);
	6264
	6265	vm_map_unlock_read(map);
	6266
	6267	ret = vm_object_iopl_request(local_object,
	6268	((vm_object_offset_t)
	6269	((offset - local_start) + local_offset)),
	6270	*upl_size,
	6271	upl,
	6272	page_list,
	6273	count,
	6274	caller_flags);
	6275	vm_object_deallocate(local_object);
	6276
	6277	return ret;
	6278	}
	6279
	6280	/*
	6281	* Internal routine to enter a UPL into a VM map.
	6282	*
	6283	* JMM - This should just be doable through the standard
	6284	* vm_map_enter() API.
	6285	*/
	6286	kern_return_t
	6287	vm_map_enter_upl(
	6288	vm_map_t map,
	6289	upl_t upl,
	6290	vm_map_offset_t *dst_addr)
	6291	{
	6292	vm_map_size_t size;
	6293	vm_object_offset_t offset;
	6294	vm_map_offset_t addr;
	6295	vm_page_t m;
	6296	kern_return_t kr;
	6297	int isVectorUPL = 0, curr_upl=0;
	6298	upl_t vector_upl = NULL;
	6299	vm_offset_t vector_upl_dst_addr = 0;
	6300	vm_map_t vector_upl_submap = NULL;
	6301	upl_offset_t subupl_offset = 0;
	6302	upl_size_t subupl_size = 0;
	6303
	6304	if (upl == UPL_NULL)
	6305	return KERN_INVALID_ARGUMENT;
	6306
	6307	if((isVectorUPL = vector_upl_is_valid(upl))) {
	6308	int mapped=0,valid_upls=0;
	6309	vector_upl = upl;
	6310
	6311	upl_lock(vector_upl);
	6312	for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
	6313	upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
	6314	if(upl == NULL)
	6315	continue;
	6316	valid_upls++;
	6317	if (UPL_PAGE_LIST_MAPPED & upl->flags)
	6318	mapped++;
	6319	}
	6320
	6321	if(mapped) {
	6322	if(mapped != valid_upls)
	6323	panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls);
	6324	else {
	6325	upl_unlock(vector_upl);
	6326	return KERN_FAILURE;
	6327	}
	6328	}
	6329
	6330	kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE, VM_FLAGS_ANYWHERE, &vector_upl_submap);
	6331	if( kr != KERN_SUCCESS )
	6332	panic("Vector UPL submap allocation failed\n");
	6333	map = vector_upl_submap;
	6334	vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
	6335	curr_upl=0;
	6336	}
	6337	else
	6338	upl_lock(upl);
	6339
	6340	process_upl_to_enter:
	6341	if(isVectorUPL){
	6342	if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
	6343	*dst_addr = vector_upl_dst_addr;
	6344	upl_unlock(vector_upl);
	6345	return KERN_SUCCESS;
	6346	}
	6347	upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
	6348	if(upl == NULL)
	6349	goto process_upl_to_enter;
	6350
	6351	vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
	6352	*dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
	6353	} else {
	6354	/*
	6355	* check to see if already mapped
	6356	*/
	6357	if (UPL_PAGE_LIST_MAPPED & upl->flags) {
	6358	upl_unlock(upl);
	6359	return KERN_FAILURE;
	6360	}
	6361	}
	6362	if ((!(upl->flags & UPL_SHADOWED)) &&
	6363	((upl->flags & UPL_HAS_BUSY) \|\|
	6364	!((upl->flags & (UPL_DEVICE_MEMORY \| UPL_IO_WIRE)) \|\| (upl->map_object->phys_contiguous)))) {
	6365
	6366	vm_object_t object;
	6367	vm_page_t alias_page;
	6368	vm_object_offset_t new_offset;
	6369	unsigned int pg_num;
	6370	wpl_array_t lite_list;
	6371
	6372	if (upl->flags & UPL_INTERNAL) {
	6373	lite_list = (wpl_array_t)
	6374	((((uintptr_t)upl) + sizeof(struct upl))
	6375	+ ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
	6376	} else {
	6377	lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
	6378	}
	6379	object = upl->map_object;
	6380	upl->map_object = vm_object_allocate(upl->size);
	6381
	6382	vm_object_lock(upl->map_object);
	6383
	6384	upl->map_object->shadow = object;
	6385	upl->map_object->pageout = TRUE;
	6386	upl->map_object->can_persist = FALSE;
	6387	upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
	6388	upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset;
	6389	upl->map_object->wimg_bits = object->wimg_bits;
	6390	offset = upl->map_object->vo_shadow_offset;
	6391	new_offset = 0;
	6392	size = upl->size;
	6393
	6394	upl->flags \|= UPL_SHADOWED;
	6395
	6396	while (size) {
	6397	pg_num = (unsigned int) (new_offset / PAGE_SIZE);
	6398	assert(pg_num == new_offset / PAGE_SIZE);
	6399
	6400	if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
	6401
	6402	VM_PAGE_GRAB_FICTITIOUS(alias_page);
	6403
	6404	vm_object_lock(object);
	6405
	6406	m = vm_page_lookup(object, offset);
	6407	if (m == VM_PAGE_NULL) {
	6408	panic("vm_upl_map: page missing\n");
	6409	}
	6410
	6411	/*
	6412	* Convert the fictitious page to a private
	6413	* shadow of the real page.
	6414	*/
	6415	assert(alias_page->fictitious);
	6416	alias_page->fictitious = FALSE;
	6417	alias_page->private = TRUE;
	6418	alias_page->pageout = TRUE;
	6419	/*
	6420	* since m is a page in the upl it must
	6421	* already be wired or BUSY, so it's
	6422	* safe to assign the underlying physical
	6423	* page to the alias
	6424	*/
	6425	alias_page->phys_page = m->phys_page;
	6426
	6427	vm_object_unlock(object);
	6428
	6429	vm_page_lockspin_queues();
	6430	vm_page_wire(alias_page, VM_KERN_MEMORY_NONE, TRUE);
	6431	vm_page_unlock_queues();
	6432
	6433	/*
	6434	* ENCRYPTED SWAP:
	6435	* The virtual page ("m") has to be wired in some way
	6436	* here or its physical page ("m->phys_page") could
	6437	* be recycled at any time.
	6438	* Assuming this is enforced by the caller, we can't
	6439	* get an encrypted page here. Since the encryption
	6440	* key depends on the VM page's "pager" object and
	6441	* the "paging_offset", we couldn't handle 2 pageable
	6442	* VM pages (with different pagers and paging_offsets)
	6443	* sharing the same physical page: we could end up
	6444	* encrypting with one key (via one VM page) and
	6445	* decrypting with another key (via the alias VM page).
	6446	*/
	6447	ASSERT_PAGE_DECRYPTED(m);
	6448
	6449	vm_page_insert_wired(alias_page, upl->map_object, new_offset, VM_KERN_MEMORY_NONE);
	6450
	6451	assert(!alias_page->wanted);
	6452	alias_page->busy = FALSE;
	6453	alias_page->absent = FALSE;
	6454	}
	6455	size -= PAGE_SIZE;
	6456	offset += PAGE_SIZE_64;
	6457	new_offset += PAGE_SIZE_64;
	6458	}
	6459	vm_object_unlock(upl->map_object);
	6460	}
	6461	if (upl->flags & UPL_SHADOWED)
	6462	offset = 0;
	6463	else
	6464	offset = upl->offset - upl->map_object->paging_offset;
	6465
	6466	size = upl->size;
	6467
	6468	vm_object_reference(upl->map_object);
	6469
	6470	if(!isVectorUPL) {
	6471	*dst_addr = 0;
	6472	/*
	6473	* NEED A UPL_MAP ALIAS
	6474	*/
	6475	kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
	6476	VM_FLAGS_ANYWHERE \| VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK),
	6477	upl->map_object, offset, FALSE,
	6478	VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
	6479
	6480	if (kr != KERN_SUCCESS) {
	6481	upl_unlock(upl);
	6482	return(kr);
	6483	}
	6484	}
	6485	else {
	6486	kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
	6487	VM_FLAGS_FIXED \| VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK),
	6488	upl->map_object, offset, FALSE,
	6489	VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
	6490	if(kr)
	6491	panic("vm_map_enter failed for a Vector UPL\n");
	6492	}
	6493	vm_object_lock(upl->map_object);
	6494
	6495	for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
	6496	m = vm_page_lookup(upl->map_object, offset);
	6497
	6498	if (m) {
	6499	m->pmapped = TRUE;
	6500
	6501	/* CODE SIGNING ENFORCEMENT: page has been wpmapped,
	6502	* but only in kernel space. If this was on a user map,
	6503	* we'd have to set the wpmapped bit. */
	6504	/* m->wpmapped = TRUE; */
	6505	assert(map->pmap == kernel_pmap);
	6506
	6507	PMAP_ENTER(map->pmap, addr, m, VM_PROT_DEFAULT, VM_PROT_NONE, 0, TRUE);
	6508	}
	6509	offset += PAGE_SIZE_64;
	6510	}
	6511	vm_object_unlock(upl->map_object);
	6512
	6513	/*
	6514	* hold a reference for the mapping
	6515	*/
	6516	upl->ref_count++;
	6517	upl->flags \|= UPL_PAGE_LIST_MAPPED;
	6518	upl->kaddr = (vm_offset_t) *dst_addr;
	6519	assert(upl->kaddr == *dst_addr);
	6520
	6521	if(isVectorUPL)
	6522	goto process_upl_to_enter;
	6523
	6524	upl_unlock(upl);
	6525
	6526	return KERN_SUCCESS;
	6527	}
	6528
	6529	/*
	6530	* Internal routine to remove a UPL mapping from a VM map.
	6531	*
	6532	* XXX - This should just be doable through a standard
	6533	* vm_map_remove() operation. Otherwise, implicit clean-up
	6534	* of the target map won't be able to correctly remove
	6535	* these (and release the reference on the UPL). Having
	6536	* to do this means we can't map these into user-space
	6537	* maps yet.
	6538	*/
	6539	kern_return_t
	6540	vm_map_remove_upl(
	6541	vm_map_t map,
	6542	upl_t upl)
	6543	{
	6544	vm_address_t addr;
	6545	upl_size_t size;
	6546	int isVectorUPL = 0, curr_upl = 0;
	6547	upl_t vector_upl = NULL;
	6548
	6549	if (upl == UPL_NULL)
	6550	return KERN_INVALID_ARGUMENT;
	6551
	6552	if((isVectorUPL = vector_upl_is_valid(upl))) {
	6553	int unmapped=0, valid_upls=0;
	6554	vector_upl = upl;
	6555	upl_lock(vector_upl);
	6556	for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
	6557	upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
	6558	if(upl == NULL)
	6559	continue;
	6560	valid_upls++;
	6561	if (!(UPL_PAGE_LIST_MAPPED & upl->flags))
	6562	unmapped++;
	6563	}
	6564
	6565	if(unmapped) {
	6566	if(unmapped != valid_upls)
	6567	panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls);
	6568	else {
	6569	upl_unlock(vector_upl);
	6570	return KERN_FAILURE;
	6571	}
	6572	}
	6573	curr_upl=0;
	6574	}
	6575	else
	6576	upl_lock(upl);
	6577
	6578	process_upl_to_remove:
	6579	if(isVectorUPL) {
	6580	if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
	6581	vm_map_t v_upl_submap;
	6582	vm_offset_t v_upl_submap_dst_addr;
	6583	vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
	6584
	6585	vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_NO_FLAGS);
	6586	vm_map_deallocate(v_upl_submap);
	6587	upl_unlock(vector_upl);
	6588	return KERN_SUCCESS;
	6589	}
	6590
	6591	upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
	6592	if(upl == NULL)
	6593	goto process_upl_to_remove;
	6594	}
	6595
	6596	if (upl->flags & UPL_PAGE_LIST_MAPPED) {
	6597	addr = upl->kaddr;
	6598	size = upl->size;
	6599
	6600	assert(upl->ref_count > 1);
	6601	upl->ref_count--; /* removing mapping ref */
	6602
	6603	upl->flags &= ~UPL_PAGE_LIST_MAPPED;
	6604	upl->kaddr = (vm_offset_t) 0;
	6605
	6606	if(!isVectorUPL) {
	6607	upl_unlock(upl);
	6608
	6609	vm_map_remove(
	6610	map,
	6611	vm_map_trunc_page(addr,
	6612	VM_MAP_PAGE_MASK(map)),
	6613	vm_map_round_page(addr + size,
	6614	VM_MAP_PAGE_MASK(map)),
	6615	VM_MAP_NO_FLAGS);
	6616
	6617	return KERN_SUCCESS;
	6618	}
	6619	else {
	6620	/*
	6621	* If it's a Vectored UPL, we'll be removing the entire
	6622	* submap anyways, so no need to remove individual UPL
	6623	* element mappings from within the submap
	6624	*/
	6625	goto process_upl_to_remove;
	6626	}
	6627	}
	6628	upl_unlock(upl);
	6629
	6630	return KERN_FAILURE;
	6631	}
	6632
	6633	kern_return_t
	6634	upl_commit_range(
	6635	upl_t upl,
	6636	upl_offset_t offset,
	6637	upl_size_t size,
	6638	int flags,
	6639	upl_page_info_t *page_list,
	6640	mach_msg_type_number_t count,
	6641	boolean_t *empty)
	6642	{
	6643	upl_size_t xfer_size, subupl_size = size;
	6644	vm_object_t shadow_object;
	6645	vm_object_t object;
	6646	vm_object_offset_t target_offset;
	6647	upl_offset_t subupl_offset = offset;
	6648	int entry;
	6649	wpl_array_t lite_list;
	6650	int occupied;
	6651	int clear_refmod = 0;
	6652	int pgpgout_count = 0;
	6653	struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
	6654	struct vm_page_delayed_work *dwp;
	6655	int dw_count;
	6656	int dw_limit;
	6657	int isVectorUPL = 0;
	6658	upl_t vector_upl = NULL;
	6659	boolean_t should_be_throttled = FALSE;
	6660
	6661	vm_page_t nxt_page = VM_PAGE_NULL;
	6662	int fast_path_possible = 0;
	6663	int fast_path_full_commit = 0;
	6664	int throttle_page = 0;
	6665	int unwired_count = 0;
	6666	int local_queue_count = 0;
	6667	queue_head_t local_queue;
	6668
	6669	*empty = FALSE;
	6670
	6671	if (upl == UPL_NULL)
	6672	return KERN_INVALID_ARGUMENT;
	6673
	6674	if (count == 0)
	6675	page_list = NULL;
	6676
	6677	if((isVectorUPL = vector_upl_is_valid(upl))) {
	6678	vector_upl = upl;
	6679	upl_lock(vector_upl);
	6680	}
	6681	else
	6682	upl_lock(upl);
	6683
	6684	process_upl_to_commit:
	6685
	6686	if(isVectorUPL) {
	6687	size = subupl_size;
	6688	offset = subupl_offset;
	6689	if(size == 0) {
	6690	upl_unlock(vector_upl);
	6691	return KERN_SUCCESS;
	6692	}
	6693	upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
	6694	if(upl == NULL) {
	6695	upl_unlock(vector_upl);
	6696	return KERN_FAILURE;
	6697	}
	6698	page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl);
	6699	subupl_size -= size;
	6700	subupl_offset += size;
	6701	}
	6702
	6703	#if UPL_DEBUG
	6704	if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
	6705	(void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
	6706
	6707	upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
	6708	upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
	6709
	6710	upl->upl_commit_index++;
	6711	}
	6712	#endif
	6713	if (upl->flags & UPL_DEVICE_MEMORY)
	6714	xfer_size = 0;
	6715	else if ((offset + size) <= upl->size)
	6716	xfer_size = size;
	6717	else {
	6718	if(!isVectorUPL)
	6719	upl_unlock(upl);
	6720	else {
	6721	upl_unlock(vector_upl);
	6722	}
	6723	return KERN_FAILURE;
	6724	}
	6725	if (upl->flags & UPL_SET_DIRTY)
	6726	flags \|= UPL_COMMIT_SET_DIRTY;
	6727	if (upl->flags & UPL_CLEAR_DIRTY)
	6728	flags \|= UPL_COMMIT_CLEAR_DIRTY;
	6729
	6730	if (upl->flags & UPL_INTERNAL)
	6731	lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
	6732	+ ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
	6733	else
	6734	lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
	6735
	6736	object = upl->map_object;
	6737
	6738	if (upl->flags & UPL_SHADOWED) {
	6739	vm_object_lock(object);
	6740	shadow_object = object->shadow;
	6741	} else {
	6742	shadow_object = object;
	6743	}
	6744	entry = offset/PAGE_SIZE;
	6745	target_offset = (vm_object_offset_t)offset;
	6746
	6747	assert(!(target_offset & PAGE_MASK));
	6748	assert(!(xfer_size & PAGE_MASK));
	6749
	6750	if (upl->flags & UPL_KERNEL_OBJECT)
	6751	vm_object_lock_shared(shadow_object);
	6752	else
	6753	vm_object_lock(shadow_object);
	6754
	6755	if (upl->flags & UPL_ACCESS_BLOCKED) {
	6756	assert(shadow_object->blocked_access);
	6757	shadow_object->blocked_access = FALSE;
	6758	vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
	6759	}
	6760
	6761	if (shadow_object->code_signed) {
	6762	/*
	6763	* CODE SIGNING:
	6764	* If the object is code-signed, do not let this UPL tell
	6765	* us if the pages are valid or not. Let the pages be
	6766	* validated by VM the normal way (when they get mapped or
	6767	* copied).
	6768	*/
	6769	flags &= ~UPL_COMMIT_CS_VALIDATED;
	6770	}
	6771	if (! page_list) {
	6772	/*
	6773	* No page list to get the code-signing info from !?
	6774	*/
	6775	flags &= ~UPL_COMMIT_CS_VALIDATED;
	6776	}
	6777	if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && shadow_object->internal)
	6778	should_be_throttled = TRUE;
	6779
	6780	dwp = &dw_array[0];
	6781	dw_count = 0;
	6782	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
	6783
	6784	if ((upl->flags & UPL_IO_WIRE) &&
	6785	!(flags & UPL_COMMIT_FREE_ABSENT) &&
	6786	!isVectorUPL &&
	6787	shadow_object->purgable != VM_PURGABLE_VOLATILE &&
	6788	shadow_object->purgable != VM_PURGABLE_EMPTY) {
	6789
	6790	if (!queue_empty(&shadow_object->memq)) {
	6791	queue_init(&local_queue);
	6792	if (size == shadow_object->vo_size) {
	6793	nxt_page = (vm_page_t)queue_first(&shadow_object->memq);
	6794	fast_path_full_commit = 1;
	6795	}
	6796	fast_path_possible = 1;
	6797
	6798	if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && shadow_object->internal &&
	6799	(shadow_object->purgable == VM_PURGABLE_DENY \|\|
	6800	shadow_object->purgable == VM_PURGABLE_NONVOLATILE \|\|
	6801	shadow_object->purgable == VM_PURGABLE_VOLATILE)) {
	6802	throttle_page = 1;
	6803	}
	6804	}
	6805	}
	6806
	6807	while (xfer_size) {
	6808	vm_page_t t, m;
	6809
	6810	dwp->dw_mask = 0;
	6811	clear_refmod = 0;
	6812
	6813	m = VM_PAGE_NULL;
	6814
	6815	if (upl->flags & UPL_LITE) {
	6816	unsigned int pg_num;
	6817
	6818	if (nxt_page != VM_PAGE_NULL) {
	6819	m = nxt_page;
	6820	nxt_page = (vm_page_t)queue_next(&nxt_page->listq);
	6821	target_offset = m->offset;
	6822	}
	6823	pg_num = (unsigned int) (target_offset/PAGE_SIZE);
	6824	assert(pg_num == target_offset/PAGE_SIZE);
	6825
	6826	if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
	6827	lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
	6828
	6829	if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
	6830	m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
	6831	} else
	6832	m = NULL;
	6833	}
	6834	if (upl->flags & UPL_SHADOWED) {
	6835	if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
	6836
	6837	t->pageout = FALSE;
	6838
	6839	VM_PAGE_FREE(t);
	6840
	6841	if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
	6842	m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
	6843	}
	6844	}
	6845	if (m == VM_PAGE_NULL)
	6846	goto commit_next_page;
	6847
	6848	if (m->compressor) {
	6849	assert(m->busy);
	6850
	6851	dwp->dw_mask \|= (DW_clear_busy \| DW_PAGE_WAKEUP);
	6852	goto commit_next_page;
	6853	}
	6854
	6855	if (flags & UPL_COMMIT_CS_VALIDATED) {
	6856	/*
	6857	* CODE SIGNING:
	6858	* Set the code signing bits according to
	6859	* what the UPL says they should be.
	6860	*/
	6861	m->cs_validated = page_list[entry].cs_validated;
	6862	m->cs_tainted = page_list[entry].cs_tainted;
	6863	m->cs_nx = page_list[entry].cs_nx;
	6864	}
	6865	if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL)
	6866	m->written_by_kernel = TRUE;
	6867
	6868	if (upl->flags & UPL_IO_WIRE) {
	6869
	6870	if (page_list)
	6871	page_list[entry].phys_addr = 0;
	6872
	6873	if (flags & UPL_COMMIT_SET_DIRTY) {
	6874	SET_PAGE_DIRTY(m, FALSE);
	6875	} else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
	6876	m->dirty = FALSE;
	6877
	6878	if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
	6879	m->cs_validated && !m->cs_tainted) {
	6880	/*
	6881	* CODE SIGNING:
	6882	* This page is no longer dirty
	6883	* but could have been modified,
	6884	* so it will need to be
	6885	* re-validated.
	6886	*/
	6887	if (m->slid) {
	6888	panic("upl_commit_range(%p): page %p was slid\n",
	6889	upl, m);
	6890	}
	6891	assert(!m->slid);
	6892	m->cs_validated = FALSE;
	6893	#if DEVELOPMENT \|\| DEBUG
	6894	vm_cs_validated_resets++;
	6895	#endif
	6896	pmap_disconnect(m->phys_page);
	6897	}
	6898	clear_refmod \|= VM_MEM_MODIFIED;
	6899	}
	6900	if (upl->flags & UPL_ACCESS_BLOCKED) {
	6901	/*
	6902	* We blocked access to the pages in this UPL.
	6903	* Clear the "busy" bit and wake up any waiter
	6904	* for this page.
	6905	*/
	6906	dwp->dw_mask \|= (DW_clear_busy \| DW_PAGE_WAKEUP);
	6907	}
	6908	if (fast_path_possible) {
	6909	assert(m->object->purgable != VM_PURGABLE_EMPTY);
	6910	assert(m->object->purgable != VM_PURGABLE_VOLATILE);
	6911	if (m->absent) {
	6912	assert(m->wire_count == 0);
	6913	assert(m->busy);
	6914
	6915	m->absent = FALSE;
	6916	dwp->dw_mask \|= (DW_clear_busy \| DW_PAGE_WAKEUP);
	6917	} else {
	6918	if (m->wire_count == 0)
	6919	panic("wire_count == 0, m = %p, obj = %p\n", m, shadow_object);
	6920
	6921	/*
	6922	* XXX FBDP need to update some other
	6923	* counters here (purgeable_wired_count)
	6924	* (ledgers), ...
	6925	*/
	6926	assert(m->wire_count);
	6927	m->wire_count--;
	6928
	6929	if (m->wire_count == 0)
	6930	unwired_count++;
	6931	}
	6932	if (m->wire_count == 0) {
	6933	queue_enter(&local_queue, m, vm_page_t, pageq);
	6934	local_queue_count++;
	6935
	6936	if (throttle_page) {
	6937	m->throttled = TRUE;
	6938	} else {
	6939	if (flags & UPL_COMMIT_INACTIVATE)
	6940	m->inactive = TRUE;
	6941	else
	6942	m->active = TRUE;
	6943	}
	6944	}
	6945	} else {
	6946	if (flags & UPL_COMMIT_INACTIVATE) {
	6947	dwp->dw_mask \|= DW_vm_page_deactivate_internal;
	6948	clear_refmod \|= VM_MEM_REFERENCED;
	6949	}
	6950	if (m->absent) {
	6951	if (flags & UPL_COMMIT_FREE_ABSENT)
	6952	dwp->dw_mask \|= DW_vm_page_free;
	6953	else {
	6954	m->absent = FALSE;
	6955	dwp->dw_mask \|= (DW_clear_busy \| DW_PAGE_WAKEUP);
	6956
	6957	if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal))
	6958	dwp->dw_mask \|= DW_vm_page_activate;
	6959	}
	6960	} else
	6961	dwp->dw_mask \|= DW_vm_page_unwire;
	6962	}
	6963	goto commit_next_page;
	6964	}
	6965	assert(!m->compressor);
	6966
	6967	if (page_list)
	6968	page_list[entry].phys_addr = 0;
	6969
	6970	/*
	6971	* make sure to clear the hardware
	6972	* modify or reference bits before
	6973	* releasing the BUSY bit on this page
	6974	* otherwise we risk losing a legitimate
	6975	* change of state
	6976	*/
	6977	if (flags & UPL_COMMIT_CLEAR_DIRTY) {
	6978	m->dirty = FALSE;
	6979
	6980	clear_refmod \|= VM_MEM_MODIFIED;
	6981	}
	6982	if (m->laundry)
	6983	dwp->dw_mask \|= DW_vm_pageout_throttle_up;
	6984
	6985	if (VM_PAGE_WIRED(m))
	6986	m->pageout = FALSE;
	6987
	6988	if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
	6989	m->cs_validated && !m->cs_tainted) {
	6990	/*
	6991	* CODE SIGNING:
	6992	* This page is no longer dirty
	6993	* but could have been modified,
	6994	* so it will need to be
	6995	* re-validated.
	6996	*/
	6997	if (m->slid) {
	6998	panic("upl_commit_range(%p): page %p was slid\n",
	6999	upl, m);
	7000	}
	7001	assert(!m->slid);
	7002	m->cs_validated = FALSE;
	7003	#if DEVELOPMENT \|\| DEBUG
	7004	vm_cs_validated_resets++;
	7005	#endif
	7006	pmap_disconnect(m->phys_page);
	7007	}
	7008	if (m->overwriting) {
	7009	/*
	7010	* the (COPY_OUT_FROM == FALSE) request_page_list case
	7011	*/
	7012	if (m->busy) {
	7013	#if CONFIG_PHANTOM_CACHE
	7014	if (m->absent && !m->object->internal)
	7015	dwp->dw_mask \|= DW_vm_phantom_cache_update;
	7016	#endif
	7017	m->absent = FALSE;
	7018
	7019	dwp->dw_mask \|= DW_clear_busy;
	7020	} else {
	7021	/*
	7022	* alternate (COPY_OUT_FROM == FALSE) page_list case
	7023	* Occurs when the original page was wired
	7024	* at the time of the list request
	7025	*/
	7026	assert(VM_PAGE_WIRED(m));
	7027
	7028	dwp->dw_mask \|= DW_vm_page_unwire; /* reactivates */
	7029	}
	7030	m->overwriting = FALSE;
	7031	}
	7032	if (m->encrypted_cleaning == TRUE) {
	7033	m->encrypted_cleaning = FALSE;
	7034
	7035	dwp->dw_mask \|= DW_clear_busy \| DW_PAGE_WAKEUP;
	7036	}
	7037	m->cleaning = FALSE;
	7038
	7039	if (m->pageout) {
	7040	/*
	7041	* With the clean queue enabled, UPL_PAGEOUT should
	7042	* no longer set the pageout bit. It's pages now go
	7043	* to the clean queue.
	7044	*/
	7045	assert(!(flags & UPL_PAGEOUT));
	7046
	7047	m->pageout = FALSE;
	7048	#if MACH_CLUSTER_STATS
	7049	if (m->wanted) vm_pageout_target_collisions++;
	7050	#endif
	7051	if ((flags & UPL_COMMIT_SET_DIRTY) \|\|
	7052	(m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))) {
	7053	/*
	7054	* page was re-dirtied after we started
	7055	* the pageout... reactivate it since
	7056	* we don't know whether the on-disk
	7057	* copy matches what is now in memory
	7058	*/
	7059	SET_PAGE_DIRTY(m, FALSE);
	7060
	7061	dwp->dw_mask \|= DW_vm_page_activate \| DW_PAGE_WAKEUP;
	7062
	7063	if (upl->flags & UPL_PAGEOUT) {
	7064	CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
	7065	VM_STAT_INCR(reactivations);
	7066	DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
	7067	}
	7068	} else {
	7069	/*
	7070	* page has been successfully cleaned
	7071	* go ahead and free it for other use
	7072	*/
	7073	if (m->object->internal) {
	7074	DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
	7075	} else {
	7076	DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
	7077	}
	7078	m->dirty = FALSE;
	7079	m->busy = TRUE;
	7080
	7081	dwp->dw_mask \|= DW_vm_page_free;
	7082	}
	7083	goto commit_next_page;
	7084	}
	7085	#if MACH_CLUSTER_STATS
	7086	if (m->wpmapped)
	7087	m->dirty = pmap_is_modified(m->phys_page);
	7088
	7089	if (m->dirty) vm_pageout_cluster_dirtied++;
	7090	else vm_pageout_cluster_cleaned++;
	7091	if (m->wanted) vm_pageout_cluster_collisions++;
	7092	#endif
	7093	/*
	7094	* It is a part of the semantic of COPYOUT_FROM
	7095	* UPLs that a commit implies cache sync
	7096	* between the vm page and the backing store
	7097	* this can be used to strip the precious bit
	7098	* as well as clean
	7099	*/
	7100	if ((upl->flags & UPL_PAGE_SYNC_DONE) \|\| (flags & UPL_COMMIT_CLEAR_PRECIOUS))
	7101	m->precious = FALSE;
	7102
	7103	if (flags & UPL_COMMIT_SET_DIRTY) {
	7104	SET_PAGE_DIRTY(m, FALSE);
	7105	} else {
	7106	m->dirty = FALSE;
	7107	}
	7108
	7109	/* with the clean queue on, move all cleaned pages to the clean queue */
	7110	if (hibernate_cleaning_in_progress == FALSE && !m->dirty && (upl->flags & UPL_PAGEOUT)) {
	7111	pgpgout_count++;
	7112
	7113	VM_STAT_INCR(pageouts);
	7114	DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
	7115
	7116	dwp->dw_mask \|= DW_enqueue_cleaned;
	7117	vm_pageout_enqueued_cleaned_from_inactive_dirty++;
	7118	} else if (should_be_throttled == TRUE && !m->active && !m->inactive && !m->speculative && !m->throttled) {
	7119	/*
	7120	* page coming back in from being 'frozen'...
	7121	* it was dirty before it was frozen, so keep it so
	7122	* the vm_page_activate will notice that it really belongs
	7123	* on the throttle queue and put it there
	7124	*/
	7125	SET_PAGE_DIRTY(m, FALSE);
	7126	dwp->dw_mask \|= DW_vm_page_activate;
	7127
	7128	} else {
	7129	if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) {
	7130	dwp->dw_mask \|= DW_vm_page_deactivate_internal;
	7131	clear_refmod \|= VM_MEM_REFERENCED;
	7132	} else if (!m->active && !m->inactive && !m->speculative) {
	7133
	7134	if (m->clustered \|\| (flags & UPL_COMMIT_SPECULATE))
	7135	dwp->dw_mask \|= DW_vm_page_speculate;
	7136	else if (m->reference)
	7137	dwp->dw_mask \|= DW_vm_page_activate;
	7138	else {
	7139	dwp->dw_mask \|= DW_vm_page_deactivate_internal;
	7140	clear_refmod \|= VM_MEM_REFERENCED;
	7141	}
	7142	}
	7143	}
	7144	if (upl->flags & UPL_ACCESS_BLOCKED) {
	7145	/*
	7146	* We blocked access to the pages in this URL.
	7147	* Clear the "busy" bit on this page before we
	7148	* wake up any waiter.
	7149	*/
	7150	dwp->dw_mask \|= DW_clear_busy;
	7151	}
	7152	/*
	7153	* Wakeup any thread waiting for the page to be un-cleaning.
	7154	*/
	7155	dwp->dw_mask \|= DW_PAGE_WAKEUP;
	7156
	7157	commit_next_page:
	7158	if (clear_refmod)
	7159	pmap_clear_refmod(m->phys_page, clear_refmod);
	7160
	7161	target_offset += PAGE_SIZE_64;
	7162	xfer_size -= PAGE_SIZE;
	7163	entry++;
	7164
	7165	if (dwp->dw_mask) {
	7166	if (dwp->dw_mask & ~(DW_clear_busy \| DW_PAGE_WAKEUP)) {
	7167	VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
	7168
	7169	if (dw_count >= dw_limit) {
	7170	vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
	7171
	7172	dwp = &dw_array[0];
	7173	dw_count = 0;
	7174	}
	7175	} else {
	7176	if (dwp->dw_mask & DW_clear_busy)
	7177	m->busy = FALSE;
	7178
	7179	if (dwp->dw_mask & DW_PAGE_WAKEUP)
	7180	PAGE_WAKEUP(m);
	7181	}
	7182	}
	7183	}
	7184	if (dw_count)
	7185	vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
	7186
	7187	if (fast_path_possible) {
	7188
	7189	assert(shadow_object->purgable != VM_PURGABLE_VOLATILE);
	7190	assert(shadow_object->purgable != VM_PURGABLE_EMPTY);
	7191
	7192	if (local_queue_count \|\| unwired_count) {
	7193
	7194	if (local_queue_count) {
	7195	vm_page_t first_local, last_local;
	7196	vm_page_t first_target;
	7197	queue_head_t *target_queue;
	7198
	7199	if (throttle_page)
	7200	target_queue = &vm_page_queue_throttled;
	7201	else {
	7202	if (flags & UPL_COMMIT_INACTIVATE) {
	7203	if (shadow_object->internal)
	7204	target_queue = &vm_page_queue_anonymous;
	7205	else
	7206	target_queue = &vm_page_queue_inactive;
	7207	} else
	7208	target_queue = &vm_page_queue_active;
	7209	}
	7210	/*
	7211	* Transfer the entire local queue to a regular LRU page queues.
	7212	*/
	7213	first_local = (vm_page_t) queue_first(&local_queue);
	7214	last_local = (vm_page_t) queue_last(&local_queue);
	7215
	7216	vm_page_lockspin_queues();
	7217
	7218	first_target = (vm_page_t) queue_first(target_queue);
	7219
	7220	if (queue_empty(target_queue))
	7221	queue_last(target_queue) = (queue_entry_t) last_local;
	7222	else
	7223	queue_prev(&first_target->pageq) = (queue_entry_t) last_local;
	7224
	7225	queue_first(target_queue) = (queue_entry_t) first_local;
	7226	queue_prev(&first_local->pageq) = (queue_entry_t) target_queue;
	7227	queue_next(&last_local->pageq) = (queue_entry_t) first_target;
	7228
	7229	/*
	7230	* Adjust the global page counts.
	7231	*/
	7232	if (throttle_page) {
	7233	vm_page_throttled_count += local_queue_count;
	7234	} else {
	7235	if (flags & UPL_COMMIT_INACTIVATE) {
	7236	if (shadow_object->internal)
	7237	vm_page_anonymous_count += local_queue_count;
	7238	vm_page_inactive_count += local_queue_count;
	7239
	7240	token_new_pagecount += local_queue_count;
	7241	} else
	7242	vm_page_active_count += local_queue_count;
	7243
	7244	if (shadow_object->internal)
	7245	vm_page_pageable_internal_count += local_queue_count;
	7246	else
	7247	vm_page_pageable_external_count += local_queue_count;
	7248	}
	7249	} else {
	7250	vm_page_lockspin_queues();
	7251	}
	7252	if (unwired_count) {
	7253	vm_page_wire_count -= unwired_count;
	7254	VM_CHECK_MEMORYSTATUS;
	7255	}
	7256	vm_page_unlock_queues();
	7257
	7258	shadow_object->wired_page_count -= unwired_count;
	7259
	7260	if (!shadow_object->wired_page_count) {
	7261	VM_OBJECT_UNWIRED(shadow_object);
	7262	}
	7263	}
	7264	}
	7265	occupied = 1;
	7266
	7267	if (upl->flags & UPL_DEVICE_MEMORY) {
	7268	occupied = 0;
	7269	} else if (upl->flags & UPL_LITE) {
	7270	int pg_num;
	7271	int i;
	7272
	7273	occupied = 0;
	7274
	7275	if (!fast_path_full_commit) {
	7276	pg_num = upl->size/PAGE_SIZE;
	7277	pg_num = (pg_num + 31) >> 5;
	7278
	7279	for (i = 0; i < pg_num; i++) {
	7280	if (lite_list[i] != 0) {
	7281	occupied = 1;
	7282	break;
	7283	}
	7284	}
	7285	}
	7286	} else {
	7287	if (queue_empty(&upl->map_object->memq))
	7288	occupied = 0;
	7289	}
	7290	if (occupied == 0) {
	7291	/*
	7292	* If this UPL element belongs to a Vector UPL and is
	7293	* empty, then this is the right function to deallocate
	7294	* it. So go ahead set the *empty variable. The flag
	7295	* UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
	7296	* should be considered relevant for the Vector UPL and not
	7297	* the internal UPLs.
	7298	*/
	7299	if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) \|\| isVectorUPL)
	7300	*empty = TRUE;
	7301
	7302	if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
	7303	/*
	7304	* this is not a paging object
	7305	* so we need to drop the paging reference
	7306	* that was taken when we created the UPL
	7307	* against this object
	7308	*/
	7309	vm_object_activity_end(shadow_object);
	7310	vm_object_collapse(shadow_object, 0, TRUE);
	7311	} else {
	7312	/*
	7313	* we dontated the paging reference to
	7314	* the map object... vm_pageout_object_terminate
	7315	* will drop this reference
	7316	*/
	7317	}
	7318	}
	7319	vm_object_unlock(shadow_object);
	7320	if (object != shadow_object)
	7321	vm_object_unlock(object);
	7322
	7323	if(!isVectorUPL)
	7324	upl_unlock(upl);
	7325	else {
	7326	/*
	7327	* If we completed our operations on an UPL that is
	7328	* part of a Vectored UPL and if empty is TRUE, then
	7329	* we should go ahead and deallocate this UPL element.
	7330	* Then we check if this was the last of the UPL elements
	7331	* within that Vectored UPL. If so, set empty to TRUE
	7332	* so that in ubc_upl_commit_range or ubc_upl_commit, we
	7333	* can go ahead and deallocate the Vector UPL too.
	7334	*/
	7335	if(*empty==TRUE) {
	7336	*empty = vector_upl_set_subupl(vector_upl, upl, 0);
	7337	upl_deallocate(upl);
	7338	}
	7339	goto process_upl_to_commit;
	7340	}
	7341
	7342	if (pgpgout_count) {
	7343	DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
	7344	}
	7345
	7346	return KERN_SUCCESS;
	7347	}
	7348
	7349	kern_return_t
	7350	upl_abort_range(
	7351	upl_t upl,
	7352	upl_offset_t offset,
	7353	upl_size_t size,
	7354	int error,
	7355	boolean_t *empty)
	7356	{
	7357	upl_page_info_t *user_page_list = NULL;
	7358	upl_size_t xfer_size, subupl_size = size;
	7359	vm_object_t shadow_object;
	7360	vm_object_t object;
	7361	vm_object_offset_t target_offset;
	7362	upl_offset_t subupl_offset = offset;
	7363	int entry;
	7364	wpl_array_t lite_list;
	7365	int occupied;
	7366	struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
	7367	struct vm_page_delayed_work *dwp;
	7368	int dw_count;
	7369	int dw_limit;
	7370	int isVectorUPL = 0;
	7371	upl_t vector_upl = NULL;
	7372
	7373	*empty = FALSE;
	7374
	7375	if (upl == UPL_NULL)
	7376	return KERN_INVALID_ARGUMENT;
	7377
	7378	if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
	7379	return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty);
	7380
	7381	if((isVectorUPL = vector_upl_is_valid(upl))) {
	7382	vector_upl = upl;
	7383	upl_lock(vector_upl);
	7384	}
	7385	else
	7386	upl_lock(upl);
	7387
	7388	process_upl_to_abort:
	7389	if(isVectorUPL) {
	7390	size = subupl_size;
	7391	offset = subupl_offset;
	7392	if(size == 0) {
	7393	upl_unlock(vector_upl);
	7394	return KERN_SUCCESS;
	7395	}
	7396	upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
	7397	if(upl == NULL) {
	7398	upl_unlock(vector_upl);
	7399	return KERN_FAILURE;
	7400	}
	7401	subupl_size -= size;
	7402	subupl_offset += size;
	7403	}
	7404
	7405	*empty = FALSE;
	7406
	7407	#if UPL_DEBUG
	7408	if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
	7409	(void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
	7410
	7411	upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
	7412	upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
	7413	upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1;
	7414
	7415	upl->upl_commit_index++;
	7416	}
	7417	#endif
	7418	if (upl->flags & UPL_DEVICE_MEMORY)
	7419	xfer_size = 0;
	7420	else if ((offset + size) <= upl->size)
	7421	xfer_size = size;
	7422	else {
	7423	if(!isVectorUPL)
	7424	upl_unlock(upl);
	7425	else {
	7426	upl_unlock(vector_upl);
	7427	}
	7428
	7429	return KERN_FAILURE;
	7430	}
	7431	if (upl->flags & UPL_INTERNAL) {
	7432	lite_list = (wpl_array_t)
	7433	((((uintptr_t)upl) + sizeof(struct upl))
	7434	+ ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
	7435
	7436	user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
	7437	} else {
	7438	lite_list = (wpl_array_t)
	7439	(((uintptr_t)upl) + sizeof(struct upl));
	7440	}
	7441	object = upl->map_object;
	7442
	7443	if (upl->flags & UPL_SHADOWED) {
	7444	vm_object_lock(object);
	7445	shadow_object = object->shadow;
	7446	} else
	7447	shadow_object = object;
	7448
	7449	entry = offset/PAGE_SIZE;
	7450	target_offset = (vm_object_offset_t)offset;
	7451
	7452	assert(!(target_offset & PAGE_MASK));
	7453	assert(!(xfer_size & PAGE_MASK));
	7454
	7455	if (upl->flags & UPL_KERNEL_OBJECT)
	7456	vm_object_lock_shared(shadow_object);
	7457	else
	7458	vm_object_lock(shadow_object);
	7459
	7460	if (upl->flags & UPL_ACCESS_BLOCKED) {
	7461	assert(shadow_object->blocked_access);
	7462	shadow_object->blocked_access = FALSE;
	7463	vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
	7464	}
	7465
	7466	dwp = &dw_array[0];
	7467	dw_count = 0;
	7468	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
	7469
	7470	if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT))
	7471	panic("upl_abort_range: kernel_object being DUMPED");
	7472
	7473	while (xfer_size) {
	7474	vm_page_t t, m;
	7475	unsigned int pg_num;
	7476	boolean_t needed;
	7477
	7478	pg_num = (unsigned int) (target_offset/PAGE_SIZE);
	7479	assert(pg_num == target_offset/PAGE_SIZE);
	7480
	7481	needed = FALSE;
	7482
	7483	if (user_page_list)
	7484	needed = user_page_list[pg_num].needed;
	7485
	7486	dwp->dw_mask = 0;
	7487	m = VM_PAGE_NULL;
	7488
	7489	if (upl->flags & UPL_LITE) {
	7490
	7491	if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
	7492	lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
	7493
	7494	if ( !(upl->flags & UPL_KERNEL_OBJECT))
	7495	m = vm_page_lookup(shadow_object, target_offset +
	7496	(upl->offset - shadow_object->paging_offset));
	7497	}
	7498	}
	7499	if (upl->flags & UPL_SHADOWED) {
	7500	if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
	7501	t->pageout = FALSE;
	7502
	7503	VM_PAGE_FREE(t);
	7504
	7505	if (m == VM_PAGE_NULL)
	7506	m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
	7507	}
	7508	}
	7509	if ((upl->flags & UPL_KERNEL_OBJECT))
	7510	goto abort_next_page;
	7511
	7512	if (m != VM_PAGE_NULL) {
	7513
	7514	assert(!m->compressor);
	7515
	7516	if (m->absent) {
	7517	boolean_t must_free = TRUE;
	7518
	7519	/*
	7520	* COPYOUT = FALSE case
	7521	* check for error conditions which must
	7522	* be passed back to the pages customer
	7523	*/
	7524	if (error & UPL_ABORT_RESTART) {
	7525	m->restart = TRUE;
	7526	m->absent = FALSE;
	7527	m->unusual = TRUE;
	7528	must_free = FALSE;
	7529	} else if (error & UPL_ABORT_UNAVAILABLE) {
	7530	m->restart = FALSE;
	7531	m->unusual = TRUE;
	7532	must_free = FALSE;
	7533	} else if (error & UPL_ABORT_ERROR) {
	7534	m->restart = FALSE;
	7535	m->absent = FALSE;
	7536	m->error = TRUE;
	7537	m->unusual = TRUE;
	7538	must_free = FALSE;
	7539	}
	7540	if (m->clustered && needed == FALSE) {
	7541	/*
	7542	* This page was a part of a speculative
	7543	* read-ahead initiated by the kernel
	7544	* itself. No one is expecting this
	7545	* page and no one will clean up its
	7546	* error state if it ever becomes valid
	7547	* in the future.
	7548	* We have to free it here.
	7549	*/
	7550	must_free = TRUE;
	7551	}
	7552
	7553	/*
	7554	* ENCRYPTED SWAP:
	7555	* If the page was already encrypted,
	7556	* we don't really need to decrypt it
	7557	* now. It will get decrypted later,
	7558	* on demand, as soon as someone needs
	7559	* to access its contents.
	7560	*/
	7561
	7562	m->cleaning = FALSE;
	7563	m->encrypted_cleaning = FALSE;
	7564
	7565	if (m->overwriting && !m->busy) {
	7566	/*
	7567	* this shouldn't happen since
	7568	* this is an 'absent' page, but
	7569	* it doesn't hurt to check for
	7570	* the 'alternate' method of
	7571	* stabilizing the page...
	7572	* we will mark 'busy' to be cleared
	7573	* in the following code which will
	7574	* take care of the primary stabilzation
	7575	* method (i.e. setting 'busy' to TRUE)
	7576	*/
	7577	dwp->dw_mask \|= DW_vm_page_unwire;
	7578	}
	7579	m->overwriting = FALSE;
	7580
	7581	dwp->dw_mask \|= (DW_clear_busy \| DW_PAGE_WAKEUP);
	7582
	7583	if (must_free == TRUE)
	7584	dwp->dw_mask \|= DW_vm_page_free;
	7585	else
	7586	dwp->dw_mask \|= DW_vm_page_activate;
	7587	} else {
	7588	/*
	7589	* Handle the trusted pager throttle.
	7590	*/
	7591	if (m->laundry)
	7592	dwp->dw_mask \|= DW_vm_pageout_throttle_up;
	7593
	7594	if (upl->flags & UPL_ACCESS_BLOCKED) {
	7595	/*
	7596	* We blocked access to the pages in this UPL.
	7597	* Clear the "busy" bit and wake up any waiter
	7598	* for this page.
	7599	*/
	7600	dwp->dw_mask \|= DW_clear_busy;
	7601	}
	7602	if (m->overwriting) {
	7603	if (m->busy)
	7604	dwp->dw_mask \|= DW_clear_busy;
	7605	else {
	7606	/*
	7607	* deal with the 'alternate' method
	7608	* of stabilizing the page...
	7609	* we will either free the page
	7610	* or mark 'busy' to be cleared
	7611	* in the following code which will
	7612	* take care of the primary stabilzation
	7613	* method (i.e. setting 'busy' to TRUE)
	7614	*/
	7615	dwp->dw_mask \|= DW_vm_page_unwire;
	7616	}
	7617	m->overwriting = FALSE;
	7618	}
	7619	if (m->encrypted_cleaning == TRUE) {
	7620	m->encrypted_cleaning = FALSE;
	7621
	7622	dwp->dw_mask \|= DW_clear_busy;
	7623	}
	7624	m->pageout = FALSE;
	7625	m->cleaning = FALSE;
	7626	#if MACH_PAGEMAP
	7627	vm_external_state_clr(m->object->existence_map, m->offset);
	7628	#endif /* MACH_PAGEMAP */
	7629	if (error & UPL_ABORT_DUMP_PAGES) {
	7630	pmap_disconnect(m->phys_page);
	7631
	7632	dwp->dw_mask \|= DW_vm_page_free;
	7633	} else {
	7634	if (!(dwp->dw_mask & DW_vm_page_unwire)) {
	7635	if (error & UPL_ABORT_REFERENCE) {
	7636	/*
	7637	* we've been told to explictly
	7638	* reference this page... for
	7639	* file I/O, this is done by
	7640	* implementing an LRU on the inactive q
	7641	*/
	7642	dwp->dw_mask \|= DW_vm_page_lru;
	7643
	7644	} else if (!m->active && !m->inactive && !m->speculative)
	7645	dwp->dw_mask \|= DW_vm_page_deactivate_internal;
	7646	}
	7647	dwp->dw_mask \|= DW_PAGE_WAKEUP;
	7648	}
	7649	}
	7650	}
	7651	abort_next_page:
	7652	target_offset += PAGE_SIZE_64;
	7653	xfer_size -= PAGE_SIZE;
	7654	entry++;
	7655
	7656	if (dwp->dw_mask) {
	7657	if (dwp->dw_mask & ~(DW_clear_busy \| DW_PAGE_WAKEUP)) {
	7658	VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
	7659
	7660	if (dw_count >= dw_limit) {
	7661	vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
	7662
	7663	dwp = &dw_array[0];
	7664	dw_count = 0;
	7665	}
	7666	} else {
	7667	if (dwp->dw_mask & DW_clear_busy)
	7668	m->busy = FALSE;
	7669
	7670	if (dwp->dw_mask & DW_PAGE_WAKEUP)
	7671	PAGE_WAKEUP(m);
	7672	}
	7673	}
	7674	}
	7675	if (dw_count)
	7676	vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
	7677
	7678	occupied = 1;
	7679
	7680	if (upl->flags & UPL_DEVICE_MEMORY) {
	7681	occupied = 0;
	7682	} else if (upl->flags & UPL_LITE) {
	7683	int pg_num;
	7684	int i;
	7685
	7686	pg_num = upl->size/PAGE_SIZE;
	7687	pg_num = (pg_num + 31) >> 5;
	7688	occupied = 0;
	7689
	7690	for (i = 0; i < pg_num; i++) {
	7691	if (lite_list[i] != 0) {
	7692	occupied = 1;
	7693	break;
	7694	}
	7695	}
	7696	} else {
	7697	if (queue_empty(&upl->map_object->memq))
	7698	occupied = 0;
	7699	}
	7700	if (occupied == 0) {
	7701	/*
	7702	* If this UPL element belongs to a Vector UPL and is
	7703	* empty, then this is the right function to deallocate
	7704	* it. So go ahead set the *empty variable. The flag
	7705	* UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
	7706	* should be considered relevant for the Vector UPL and
	7707	* not the internal UPLs.
	7708	*/
	7709	if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) \|\| isVectorUPL)
	7710	*empty = TRUE;
	7711
	7712	if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
	7713	/*
	7714	* this is not a paging object
	7715	* so we need to drop the paging reference
	7716	* that was taken when we created the UPL
	7717	* against this object
	7718	*/
	7719	vm_object_activity_end(shadow_object);
	7720	vm_object_collapse(shadow_object, 0, TRUE);
	7721	} else {
	7722	/*
	7723	* we dontated the paging reference to
	7724	* the map object... vm_pageout_object_terminate
	7725	* will drop this reference
	7726	*/
	7727	}
	7728	}
	7729	vm_object_unlock(shadow_object);
	7730	if (object != shadow_object)
	7731	vm_object_unlock(object);
	7732
	7733	if(!isVectorUPL)
	7734	upl_unlock(upl);
	7735	else {
	7736	/*
	7737	* If we completed our operations on an UPL that is
	7738	* part of a Vectored UPL and if empty is TRUE, then
	7739	* we should go ahead and deallocate this UPL element.
	7740	* Then we check if this was the last of the UPL elements
	7741	* within that Vectored UPL. If so, set empty to TRUE
	7742	* so that in ubc_upl_abort_range or ubc_upl_abort, we
	7743	* can go ahead and deallocate the Vector UPL too.
	7744	*/
	7745	if(*empty == TRUE) {
	7746	*empty = vector_upl_set_subupl(vector_upl, upl,0);
	7747	upl_deallocate(upl);
	7748	}
	7749	goto process_upl_to_abort;
	7750	}
	7751
	7752	return KERN_SUCCESS;
	7753	}
	7754
	7755
	7756	kern_return_t
	7757	upl_abort(
	7758	upl_t upl,
	7759	int error)
	7760	{
	7761	boolean_t empty;
	7762
	7763	if (upl == UPL_NULL)
	7764	return KERN_INVALID_ARGUMENT;
	7765
	7766	return upl_abort_range(upl, 0, upl->size, error, &empty);
	7767	}
	7768
	7769
	7770	/* an option on commit should be wire */
	7771	kern_return_t
	7772	upl_commit(
	7773	upl_t upl,
	7774	upl_page_info_t *page_list,
	7775	mach_msg_type_number_t count)
	7776	{
	7777	boolean_t empty;
	7778
	7779	if (upl == UPL_NULL)
	7780	return KERN_INVALID_ARGUMENT;
	7781
	7782	return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
	7783	}
	7784
	7785
	7786	void
	7787	iopl_valid_data(
	7788	upl_t upl)
	7789	{
	7790	vm_object_t object;
	7791	vm_offset_t offset;
	7792	vm_page_t m, nxt_page = VM_PAGE_NULL;
	7793	upl_size_t size;
	7794	int wired_count = 0;
	7795
	7796	if (upl == NULL)
	7797	panic("iopl_valid_data: NULL upl");
	7798	if (vector_upl_is_valid(upl))
	7799	panic("iopl_valid_data: vector upl");
	7800	if ((upl->flags & (UPL_DEVICE_MEMORY\|UPL_SHADOWED\|UPL_ACCESS_BLOCKED\|UPL_IO_WIRE\|UPL_INTERNAL)) != UPL_IO_WIRE)
	7801	panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags);
	7802
	7803	object = upl->map_object;
	7804
	7805	if (object == kernel_object \|\| object == compressor_object)
	7806	panic("iopl_valid_data: object == kernel or compressor");
	7807
	7808	if (object->purgable == VM_PURGABLE_VOLATILE)
	7809	panic("iopl_valid_data: object == VM_PURGABLE_VOLATILE");
	7810
	7811	size = upl->size;
	7812
	7813	vm_object_lock(object);
	7814
	7815	if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE))
	7816	nxt_page = (vm_page_t)queue_first(&object->memq);
	7817	else
	7818	offset = 0 + upl->offset - object->paging_offset;
	7819
	7820	while (size) {
	7821
	7822	if (nxt_page != VM_PAGE_NULL) {
	7823	m = nxt_page;
	7824	nxt_page = (vm_page_t)queue_next(&nxt_page->listq);
	7825	} else {
	7826	m = vm_page_lookup(object, offset);
	7827	offset += PAGE_SIZE;
	7828
	7829	if (m == VM_PAGE_NULL)
	7830	panic("iopl_valid_data: missing expected page at offset %lx", (long)offset);
	7831	}
	7832	if (m->busy) {
	7833	if (!m->absent)
	7834	panic("iopl_valid_data: busy page w/o absent");
	7835
	7836	if (m->pageq.next \|\| m->pageq.prev)
	7837	panic("iopl_valid_data: busy+absent page on page queue");
	7838
	7839	m->absent = FALSE;
	7840	m->dirty = TRUE;
	7841	m->wire_count++;
	7842	wired_count++;
	7843
	7844	PAGE_WAKEUP_DONE(m);
	7845	}
	7846	size -= PAGE_SIZE;
	7847	}
	7848	if (wired_count) {
	7849
	7850	if (!object->wired_page_count) {
	7851	VM_OBJECT_WIRED(object);
	7852	}
	7853	object->wired_page_count += wired_count;
	7854
	7855	vm_page_lockspin_queues();
	7856	vm_page_wire_count += wired_count;
	7857	vm_page_unlock_queues();
	7858	}
	7859	vm_object_unlock(object);
	7860	}
	7861
	7862	void
	7863	vm_object_set_pmap_cache_attr(
	7864	vm_object_t object,
	7865	upl_page_info_array_t user_page_list,
	7866	unsigned int num_pages,
	7867	boolean_t batch_pmap_op)
	7868	{
	7869	unsigned int cache_attr = 0;
	7870
	7871	cache_attr = object->wimg_bits & VM_WIMG_MASK;
	7872	assert(user_page_list);
	7873	if (cache_attr != VM_WIMG_USE_DEFAULT) {
	7874	PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op);
	7875	}
	7876	}
	7877
	7878
	7879	boolean_t vm_object_iopl_wire_full(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t);
	7880	kern_return_t vm_object_iopl_wire_empty(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_object_offset_t *, int);
	7881
	7882
	7883
	7884	boolean_t
	7885	vm_object_iopl_wire_full(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
	7886	wpl_array_t lite_list, upl_control_flags_t cntrl_flags)
	7887	{
	7888	vm_page_t dst_page;
	7889	vm_tag_t tag;
	7890	unsigned int entry;
	7891	int page_count;
	7892	int delayed_unlock = 0;
	7893	boolean_t retval = TRUE;
	7894
	7895	vm_object_lock_assert_exclusive(object);
	7896	assert(object->purgable != VM_PURGABLE_VOLATILE);
	7897	assert(object->purgable != VM_PURGABLE_EMPTY);
	7898	assert(object->pager == NULL);
	7899	assert(object->copy == NULL);
	7900	assert(object->shadow == NULL);
	7901
	7902	tag = UPL_MEMORY_TAG(cntrl_flags);
	7903	page_count = object->resident_page_count;
	7904	dst_page = (vm_page_t)queue_first(&object->memq);
	7905
	7906	vm_page_lock_queues();
	7907
	7908	while (page_count--) {
	7909
	7910	if (dst_page->busy \|\|
	7911	dst_page->fictitious \|\|
	7912	dst_page->absent \|\|
	7913	dst_page->error \|\|
	7914	dst_page->cleaning \|\|
	7915	dst_page->restart \|\|
	7916	dst_page->encrypted \|\|
	7917	dst_page->laundry) {
	7918	retval = FALSE;
	7919	goto done;
	7920	}
	7921	if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) {
	7922	retval = FALSE;
	7923	goto done;
	7924	}
	7925	dst_page->reference = TRUE;
	7926
	7927	vm_page_wire(dst_page, tag, FALSE);
	7928
	7929	if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
	7930	SET_PAGE_DIRTY(dst_page, FALSE);
	7931	}
	7932	entry = (unsigned int)(dst_page->offset / PAGE_SIZE);
	7933	assert(entry >= 0 && entry < object->resident_page_count);
	7934	lite_list[entry>>5] \|= 1 << (entry & 31);
	7935
	7936	if (dst_page->phys_page > upl->highest_page)
	7937	upl->highest_page = dst_page->phys_page;
	7938
	7939	if (user_page_list) {
	7940	user_page_list[entry].phys_addr = dst_page->phys_page;
	7941	user_page_list[entry].absent = dst_page->absent;
	7942	user_page_list[entry].dirty = dst_page->dirty;
	7943	user_page_list[entry].pageout = dst_page->pageout;;
	7944	user_page_list[entry].precious = dst_page->precious;
	7945	user_page_list[entry].device = FALSE;
	7946	user_page_list[entry].speculative = FALSE;
	7947	user_page_list[entry].cs_validated = FALSE;
	7948	user_page_list[entry].cs_tainted = FALSE;
	7949	user_page_list[entry].cs_nx = FALSE;
	7950	user_page_list[entry].needed = FALSE;
	7951	user_page_list[entry].mark = FALSE;
	7952	}
	7953	if (delayed_unlock++ > 256) {
	7954	delayed_unlock = 0;
	7955	lck_mtx_yield(&vm_page_queue_lock);
	7956
	7957	VM_CHECK_MEMORYSTATUS;
	7958	}
	7959	dst_page = (vm_page_t)queue_next(&dst_page->listq);
	7960	}
	7961	done:
	7962	vm_page_unlock_queues();
	7963
	7964	VM_CHECK_MEMORYSTATUS;
	7965
	7966	return (retval);
	7967	}
	7968
	7969
	7970	kern_return_t
	7971	vm_object_iopl_wire_empty(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
	7972	wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_object_offset_t *dst_offset, int page_count)
	7973	{
	7974	vm_page_t dst_page;
	7975	vm_tag_t tag;
	7976	boolean_t no_zero_fill = FALSE;
	7977	int interruptible;
	7978	int pages_wired = 0;
	7979	int pages_inserted = 0;
	7980	int entry = 0;
	7981	uint64_t delayed_ledger_update = 0;
	7982	kern_return_t ret = KERN_SUCCESS;
	7983
	7984	vm_object_lock_assert_exclusive(object);
	7985	assert(object->purgable != VM_PURGABLE_VOLATILE);
	7986	assert(object->purgable != VM_PURGABLE_EMPTY);
	7987	assert(object->pager == NULL);
	7988	assert(object->copy == NULL);
	7989	assert(object->shadow == NULL);
	7990
	7991	if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
	7992	interruptible = THREAD_ABORTSAFE;
	7993	else
	7994	interruptible = THREAD_UNINT;
	7995
	7996	if (cntrl_flags & (UPL_NOZEROFILL \| UPL_NOZEROFILLIO))
	7997	no_zero_fill = TRUE;
	7998
	7999	tag = UPL_MEMORY_TAG(cntrl_flags);
	8000
	8001	while (page_count--) {
	8002
	8003	while ( (dst_page = vm_page_grab()) == VM_PAGE_NULL) {
	8004
	8005	OSAddAtomic(page_count, &vm_upl_wait_for_pages);
	8006
	8007	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
	8008
	8009	if (vm_page_wait(interruptible) == FALSE) {
	8010	/*
	8011	* interrupted case
	8012	*/
	8013	OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
	8014
	8015	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
	8016
	8017	ret = MACH_SEND_INTERRUPTED;
	8018	goto done;
	8019	}
	8020	OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
	8021
	8022	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
	8023	}
	8024	if (no_zero_fill == FALSE)
	8025	vm_page_zero_fill(dst_page);
	8026	else
	8027	dst_page->absent = TRUE;
	8028
	8029	dst_page->reference = TRUE;
	8030
	8031	if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
	8032	SET_PAGE_DIRTY(dst_page, FALSE);
	8033	}
	8034	if (dst_page->absent == FALSE) {
	8035	dst_page->wire_count++;
	8036	pages_wired++;
	8037	PAGE_WAKEUP_DONE(dst_page);
	8038	}
	8039	pages_inserted++;
	8040
	8041	vm_page_insert_internal(dst_page, object, *dst_offset, tag, FALSE, TRUE, TRUE, TRUE, &delayed_ledger_update);
	8042
	8043	lite_list[entry>>5] \|= 1 << (entry & 31);
	8044
	8045	if (dst_page->phys_page > upl->highest_page)
	8046	upl->highest_page = dst_page->phys_page;
	8047
	8048	if (user_page_list) {
	8049	user_page_list[entry].phys_addr = dst_page->phys_page;
	8050	user_page_list[entry].absent = dst_page->absent;
	8051	user_page_list[entry].dirty = dst_page->dirty;
	8052	user_page_list[entry].pageout = FALSE;
	8053	user_page_list[entry].precious = FALSE;
	8054	user_page_list[entry].device = FALSE;
	8055	user_page_list[entry].speculative = FALSE;
	8056	user_page_list[entry].cs_validated = FALSE;
	8057	user_page_list[entry].cs_tainted = FALSE;
	8058	user_page_list[entry].cs_nx = FALSE;
	8059	user_page_list[entry].needed = FALSE;
	8060	user_page_list[entry].mark = FALSE;
	8061	}
	8062	entry++;
	8063	*dst_offset += PAGE_SIZE_64;
	8064	}
	8065	done:
	8066	if (pages_wired) {
	8067	vm_page_lockspin_queues();
	8068	vm_page_wire_count += pages_wired;
	8069	vm_page_unlock_queues();
	8070	}
	8071	if (pages_inserted) {
	8072	if (object->internal) {
	8073	OSAddAtomic(pages_inserted, &vm_page_internal_count);
	8074	} else {
	8075	OSAddAtomic(pages_inserted, &vm_page_external_count);
	8076	}
	8077	}
	8078	if (delayed_ledger_update) {
	8079	task_t owner;
	8080
	8081	owner = object->vo_purgeable_owner;
	8082	assert(owner);
	8083
	8084	/* more non-volatile bytes */
	8085	ledger_credit(owner->ledger,
	8086	task_ledgers.purgeable_nonvolatile,
	8087	delayed_ledger_update);
	8088	/* more footprint */
	8089	ledger_credit(owner->ledger,
	8090	task_ledgers.phys_footprint,
	8091	delayed_ledger_update);
	8092	}
	8093	return (ret);
	8094	}
	8095
	8096
	8097	unsigned int vm_object_iopl_request_sleep_for_cleaning = 0;
	8098
	8099
	8100	kern_return_t
	8101	vm_object_iopl_request(
	8102	vm_object_t object,
	8103	vm_object_offset_t offset,
	8104	upl_size_t size,
	8105	upl_t *upl_ptr,
	8106	upl_page_info_array_t user_page_list,
	8107	unsigned int *page_list_count,
	8108	upl_control_flags_t cntrl_flags)
	8109	{
	8110	vm_page_t dst_page;
	8111	vm_object_offset_t dst_offset;
	8112	upl_size_t xfer_size;
	8113	upl_t upl = NULL;
	8114	unsigned int entry;
	8115	wpl_array_t lite_list = NULL;
	8116	int no_zero_fill = FALSE;
	8117	unsigned int size_in_pages;
	8118	u_int32_t psize;
	8119	kern_return_t ret;
	8120	vm_prot_t prot;
	8121	struct vm_object_fault_info fault_info;
	8122	struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
	8123	struct vm_page_delayed_work *dwp;
	8124	int dw_count;
	8125	int dw_limit;
	8126	int dw_index;
	8127	boolean_t caller_lookup;
	8128	int io_tracking_flag = 0;
	8129	int interruptible;
	8130
	8131	boolean_t set_cache_attr_needed = FALSE;
	8132	boolean_t free_wired_pages = FALSE;
	8133	boolean_t fast_path_empty_req = FALSE;
	8134	boolean_t fast_path_full_req = FALSE;
	8135
	8136	if (cntrl_flags & ~UPL_VALID_FLAGS) {
	8137	/*
	8138	* For forward compatibility's sake,
	8139	* reject any unknown flag.
	8140	*/
	8141	return KERN_INVALID_VALUE;
	8142	}
	8143	if (vm_lopage_needed == FALSE)
	8144	cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
	8145
	8146	if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
	8147	if ( (cntrl_flags & (UPL_SET_IO_WIRE \| UPL_SET_LITE)) != (UPL_SET_IO_WIRE \| UPL_SET_LITE))
	8148	return KERN_INVALID_VALUE;
	8149
	8150	if (object->phys_contiguous) {
	8151	if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
	8152	return KERN_INVALID_ADDRESS;
	8153
	8154	if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
	8155	return KERN_INVALID_ADDRESS;
	8156	}
	8157	}
	8158
	8159	if (cntrl_flags & UPL_ENCRYPT) {
	8160	/*
	8161	* ENCRYPTED SWAP:
	8162	* The paging path doesn't use this interface,
	8163	* so we don't support the UPL_ENCRYPT flag
	8164	* here. We won't encrypt the pages.
	8165	*/
	8166	assert(! (cntrl_flags & UPL_ENCRYPT));
	8167	}
	8168	if (cntrl_flags & (UPL_NOZEROFILL \| UPL_NOZEROFILLIO))
	8169	no_zero_fill = TRUE;
	8170
	8171	if (cntrl_flags & UPL_COPYOUT_FROM)
	8172	prot = VM_PROT_READ;
	8173	else
	8174	prot = VM_PROT_READ \| VM_PROT_WRITE;
	8175
	8176	if ((!object->internal) && (object->paging_offset != 0))
	8177	panic("vm_object_iopl_request: external object with non-zero paging offset\n");
	8178
	8179	#if CONFIG_IOSCHED \|\| UPL_DEBUG
	8180	if ((object->io_tracking && object != kernel_object) \|\| upl_debug_enabled)
	8181	io_tracking_flag \|= UPL_CREATE_IO_TRACKING;
	8182	#endif
	8183
	8184	#if CONFIG_IOSCHED
	8185	if (object->io_tracking) {
	8186	/* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */
	8187	if (object != kernel_object)
	8188	io_tracking_flag \|= UPL_CREATE_EXPEDITE_SUP;
	8189	}
	8190	#endif
	8191
	8192	if (object->phys_contiguous)
	8193	psize = PAGE_SIZE;
	8194	else
	8195	psize = size;
	8196
	8197	if (cntrl_flags & UPL_SET_INTERNAL) {
	8198	upl = upl_create(UPL_CREATE_INTERNAL \| UPL_CREATE_LITE \| io_tracking_flag, UPL_IO_WIRE, psize);
	8199
	8200	user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
	8201	lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
	8202	((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
	8203	if (size == 0) {
	8204	user_page_list = NULL;
	8205	lite_list = NULL;
	8206	}
	8207	} else {
	8208	upl = upl_create(UPL_CREATE_LITE \| io_tracking_flag, UPL_IO_WIRE, psize);
	8209
	8210	lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
	8211	if (size == 0) {
	8212	lite_list = NULL;
	8213	}
	8214	}
	8215	if (user_page_list)
	8216	user_page_list[0].device = FALSE;
	8217	*upl_ptr = upl;
	8218
	8219	upl->map_object = object;
	8220	upl->size = size;
	8221
	8222	size_in_pages = size / PAGE_SIZE;
	8223
	8224	if (object == kernel_object &&
	8225	!(cntrl_flags & (UPL_NEED_32BIT_ADDR \| UPL_BLOCK_ACCESS))) {
	8226	upl->flags \|= UPL_KERNEL_OBJECT;
	8227	#if UPL_DEBUG
	8228	vm_object_lock(object);
	8229	#else
	8230	vm_object_lock_shared(object);
	8231	#endif
	8232	} else {
	8233	vm_object_lock(object);
	8234	vm_object_activity_begin(object);
	8235	}
	8236	/*
	8237	* paging in progress also protects the paging_offset
	8238	*/
	8239	upl->offset = offset + object->paging_offset;
	8240
	8241	if (cntrl_flags & UPL_BLOCK_ACCESS) {
	8242	/*
	8243	* The user requested that access to the pages in this UPL
	8244	* be blocked until the UPL is commited or aborted.
	8245	*/
	8246	upl->flags \|= UPL_ACCESS_BLOCKED;
	8247	}
	8248
	8249	#if CONFIG_IOSCHED \|\| UPL_DEBUG
	8250	if (upl->flags & UPL_TRACKED_BY_OBJECT) {
	8251	vm_object_activity_begin(object);
	8252	queue_enter(&object->uplq, upl, upl_t, uplq);
	8253	}
	8254	#endif
	8255
	8256	if (object->phys_contiguous) {
	8257
	8258	if (upl->flags & UPL_ACCESS_BLOCKED) {
	8259	assert(!object->blocked_access);
	8260	object->blocked_access = TRUE;
	8261	}
	8262
	8263	vm_object_unlock(object);
	8264
	8265	/*
	8266	* don't need any shadow mappings for this one
	8267	* since it is already I/O memory
	8268	*/
	8269	upl->flags \|= UPL_DEVICE_MEMORY;
	8270
	8271	upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1)>>PAGE_SHIFT);
	8272
	8273	if (user_page_list) {
	8274	user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT);
	8275	user_page_list[0].device = TRUE;
	8276	}
	8277	if (page_list_count != NULL) {
	8278	if (upl->flags & UPL_INTERNAL)
	8279	*page_list_count = 0;
	8280	else
	8281	*page_list_count = 1;
	8282	}
	8283	return KERN_SUCCESS;
	8284	}
	8285	if (object != kernel_object && object != compressor_object) {
	8286	/*
	8287	* Protect user space from future COW operations
	8288	*/
	8289	#if VM_OBJECT_TRACKING_OP_TRUESHARE
	8290	if (!object->true_share &&
	8291	vm_object_tracking_inited) {
	8292	void *bt[VM_OBJECT_TRACKING_BTDEPTH];
	8293	int num = 0;
	8294
	8295	num = OSBacktrace(bt,
	8296	VM_OBJECT_TRACKING_BTDEPTH);
	8297	btlog_add_entry(vm_object_tracking_btlog,
	8298	object,
	8299	VM_OBJECT_TRACKING_OP_TRUESHARE,
	8300	bt,
	8301	num);
	8302	}
	8303	#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
	8304
	8305	object->true_share = TRUE;
	8306
	8307	if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
	8308	object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
	8309	}
	8310
	8311	if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
	8312	object->copy != VM_OBJECT_NULL) {
	8313	/*
	8314	* Honor copy-on-write obligations
	8315	*
	8316	* The caller is gathering these pages and
	8317	* might modify their contents. We need to
	8318	* make sure that the copy object has its own
	8319	* private copies of these pages before we let
	8320	* the caller modify them.
	8321	*
	8322	* NOTE: someone else could map the original object
	8323	* after we've done this copy-on-write here, and they
	8324	* could then see an inconsistent picture of the memory
	8325	* while it's being modified via the UPL. To prevent this,
	8326	* we would have to block access to these pages until the
	8327	* UPL is released. We could use the UPL_BLOCK_ACCESS
	8328	* code path for that...
	8329	*/
	8330	vm_object_update(object,
	8331	offset,
	8332	size,
	8333	NULL,
	8334	NULL,
	8335	FALSE, /* should_return */
	8336	MEMORY_OBJECT_COPY_SYNC,
	8337	VM_PROT_NO_CHANGE);
	8338	#if DEVELOPMENT \|\| DEBUG
	8339	iopl_cow++;
	8340	iopl_cow_pages += size >> PAGE_SHIFT;
	8341	#endif
	8342	}
	8343	if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR \| UPL_BLOCK_ACCESS)) &&
	8344	object->purgable != VM_PURGABLE_VOLATILE &&
	8345	object->purgable != VM_PURGABLE_EMPTY &&
	8346	object->copy == NULL &&
	8347	size == object->vo_size &&
	8348	offset == 0 &&
	8349	object->shadow == NULL &&
	8350	object->pager == NULL)
	8351	{
	8352	if (object->resident_page_count == size_in_pages)
	8353	{
	8354	assert(object != compressor_object);
	8355	assert(object != kernel_object);
	8356	fast_path_full_req = TRUE;
	8357	}
	8358	else if (object->resident_page_count == 0)
	8359	{
	8360	assert(object != compressor_object);
	8361	assert(object != kernel_object);
	8362	fast_path_empty_req = TRUE;
	8363	set_cache_attr_needed = TRUE;
	8364	}
	8365	}
	8366
	8367	if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
	8368	interruptible = THREAD_ABORTSAFE;
	8369	else
	8370	interruptible = THREAD_UNINT;
	8371
	8372	entry = 0;
	8373
	8374	xfer_size = size;
	8375	dst_offset = offset;
	8376	dw_count = 0;
	8377
	8378	if (fast_path_full_req) {
	8379
	8380	if (vm_object_iopl_wire_full(object, upl, user_page_list, lite_list, cntrl_flags) == TRUE)
	8381	goto finish;
	8382	/*
	8383	* we couldn't complete the processing of this request on the fast path
	8384	* so fall through to the slow path and finish up
	8385	*/
	8386
	8387	} else if (fast_path_empty_req) {
	8388
	8389	if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
	8390	ret = KERN_MEMORY_ERROR;
	8391	goto return_err;
	8392	}
	8393	ret = vm_object_iopl_wire_empty(object, upl, user_page_list, lite_list, cntrl_flags, &dst_offset, size_in_pages);
	8394
	8395	if (ret) {
	8396	free_wired_pages = TRUE;
	8397	goto return_err;
	8398	}
	8399	goto finish;
	8400	}
	8401
	8402	fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
	8403	fault_info.user_tag = 0;
	8404	fault_info.lo_offset = offset;
	8405	fault_info.hi_offset = offset + xfer_size;
	8406	fault_info.no_cache = FALSE;
	8407	fault_info.stealth = FALSE;
	8408	fault_info.io_sync = FALSE;
	8409	fault_info.cs_bypass = FALSE;
	8410	fault_info.mark_zf_absent = TRUE;
	8411	fault_info.interruptible = interruptible;
	8412	fault_info.batch_pmap_op = TRUE;
	8413
	8414	dwp = &dw_array[0];
	8415	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
	8416
	8417	while (xfer_size) {
	8418	vm_fault_return_t result;
	8419
	8420	dwp->dw_mask = 0;
	8421
	8422	if (fast_path_full_req) {
	8423	/*
	8424	* if we get here, it means that we ran into a page
	8425	* state we couldn't handle in the fast path and
	8426	* bailed out to the slow path... since the order
	8427	* we look at pages is different between the 2 paths,
	8428	* the following check is needed to determine whether
	8429	* this page was already processed in the fast path
	8430	*/
	8431	if (lite_list[entry>>5] & (1 << (entry & 31)))
	8432	goto skip_page;
	8433	}
	8434	dst_page = vm_page_lookup(object, dst_offset);
	8435
	8436	/*
	8437	* ENCRYPTED SWAP:
	8438	* If the page is encrypted, we need to decrypt it,
	8439	* so force a soft page fault.
	8440	*/
	8441	if (dst_page == VM_PAGE_NULL \|\|
	8442	dst_page->busy \|\|
	8443	dst_page->encrypted \|\|
	8444	dst_page->error \|\|
	8445	dst_page->restart \|\|
	8446	dst_page->absent \|\|
	8447	dst_page->fictitious) {
	8448
	8449	if (object == kernel_object)
	8450	panic("vm_object_iopl_request: missing/bad page in kernel object\n");
	8451	if (object == compressor_object)
	8452	panic("vm_object_iopl_request: missing/bad page in compressor object\n");
	8453
	8454	if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
	8455	ret = KERN_MEMORY_ERROR;
	8456	goto return_err;
	8457	}
	8458	set_cache_attr_needed = TRUE;
	8459
	8460	/*
	8461	* We just looked up the page and the result remains valid
	8462	* until the object lock is release, so send it to
	8463	* vm_fault_page() (as "dst_page"), to avoid having to
	8464	* look it up again there.
	8465	*/
	8466	caller_lookup = TRUE;
	8467
	8468	do {
	8469	vm_page_t top_page;
	8470	kern_return_t error_code;
	8471
	8472	fault_info.cluster_size = xfer_size;
	8473
	8474	vm_object_paging_begin(object);
	8475
	8476	result = vm_fault_page(object, dst_offset,
	8477	prot \| VM_PROT_WRITE, FALSE,
	8478	caller_lookup,
	8479	&prot, &dst_page, &top_page,
	8480	(int *)0,
	8481	&error_code, no_zero_fill,
	8482	FALSE, &fault_info);
	8483
	8484	/* our lookup is no longer valid at this point */
	8485	caller_lookup = FALSE;
	8486
	8487	switch (result) {
	8488
	8489	case VM_FAULT_SUCCESS:
	8490
	8491	if ( !dst_page->absent) {
	8492	PAGE_WAKEUP_DONE(dst_page);
	8493	} else {
	8494	/*
	8495	* we only get back an absent page if we
	8496	* requested that it not be zero-filled
	8497	* because we are about to fill it via I/O
	8498	*
	8499	* absent pages should be left BUSY
	8500	* to prevent them from being faulted
	8501	* into an address space before we've
	8502	* had a chance to complete the I/O on
	8503	* them since they may contain info that
	8504	* shouldn't be seen by the faulting task
	8505	*/
	8506	}
	8507	/*
	8508	* Release paging references and
	8509	* top-level placeholder page, if any.
	8510	*/
	8511	if (top_page != VM_PAGE_NULL) {
	8512	vm_object_t local_object;
	8513
	8514	local_object = top_page->object;
	8515
	8516	if (top_page->object != dst_page->object) {
	8517	vm_object_lock(local_object);
	8518	VM_PAGE_FREE(top_page);
	8519	vm_object_paging_end(local_object);
	8520	vm_object_unlock(local_object);
	8521	} else {
	8522	VM_PAGE_FREE(top_page);
	8523	vm_object_paging_end(local_object);
	8524	}
	8525	}
	8526	vm_object_paging_end(object);
	8527	break;
	8528
	8529	case VM_FAULT_RETRY:
	8530	vm_object_lock(object);
	8531	break;
	8532
	8533	case VM_FAULT_MEMORY_SHORTAGE:
	8534	OSAddAtomic((size_in_pages - entry), &vm_upl_wait_for_pages);
	8535
	8536	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
	8537
	8538	if (vm_page_wait(interruptible)) {
	8539	OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
	8540
	8541	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
	8542	vm_object_lock(object);
	8543
	8544	break;
	8545	}
	8546	OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
	8547
	8548	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
	8549
	8550	/* fall thru */
	8551
	8552	case VM_FAULT_INTERRUPTED:
	8553	error_code = MACH_SEND_INTERRUPTED;
	8554	case VM_FAULT_MEMORY_ERROR:
	8555	memory_error:
	8556	ret = (error_code ? error_code: KERN_MEMORY_ERROR);
	8557
	8558	vm_object_lock(object);
	8559	goto return_err;
	8560
	8561	case VM_FAULT_SUCCESS_NO_VM_PAGE:
	8562	/* success but no page: fail */
	8563	vm_object_paging_end(object);
	8564	vm_object_unlock(object);
	8565	goto memory_error;
	8566
	8567	default:
	8568	panic("vm_object_iopl_request: unexpected error"
	8569	" 0x%x from vm_fault_page()\n", result);
	8570	}
	8571	} while (result != VM_FAULT_SUCCESS);
	8572
	8573	}
	8574	if (upl->flags & UPL_KERNEL_OBJECT)
	8575	goto record_phys_addr;
	8576
	8577	if (dst_page->compressor) {
	8578	dst_page->busy = TRUE;
	8579	goto record_phys_addr;
	8580	}
	8581
	8582	if (dst_page->cleaning) {
	8583	/*
	8584	* Someone else is cleaning this page in place.
	8585	* In theory, we should be able to proceed and use this
	8586	* page but they'll probably end up clearing the "busy"
	8587	* bit on it in upl_commit_range() but they didn't set
	8588	* it, so they would clear our "busy" bit and open
	8589	* us to race conditions.
	8590	* We'd better wait for the cleaning to complete and
	8591	* then try again.
	8592	*/
	8593	vm_object_iopl_request_sleep_for_cleaning++;
	8594	PAGE_SLEEP(object, dst_page, THREAD_UNINT);
	8595	continue;
	8596	}
	8597	if (dst_page->laundry) {
	8598	dst_page->pageout = FALSE;
	8599
	8600	vm_pageout_steal_laundry(dst_page, FALSE);
	8601	}
	8602	if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
	8603	dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
	8604	vm_page_t low_page;
	8605	int refmod;
	8606
	8607	/*
	8608	* support devices that can't DMA above 32 bits
	8609	* by substituting pages from a pool of low address
	8610	* memory for any pages we find above the 4G mark
	8611	* can't substitute if the page is already wired because
	8612	* we don't know whether that physical address has been
	8613	* handed out to some other 64 bit capable DMA device to use
	8614	*/
	8615	if (VM_PAGE_WIRED(dst_page)) {
	8616	ret = KERN_PROTECTION_FAILURE;
	8617	goto return_err;
	8618	}
	8619	low_page = vm_page_grablo();
	8620
	8621	if (low_page == VM_PAGE_NULL) {
	8622	ret = KERN_RESOURCE_SHORTAGE;
	8623	goto return_err;
	8624	}
	8625	/*
	8626	* from here until the vm_page_replace completes
	8627	* we musn't drop the object lock... we don't
	8628	* want anyone refaulting this page in and using
	8629	* it after we disconnect it... we want the fault
	8630	* to find the new page being substituted.
	8631	*/
	8632	if (dst_page->pmapped)
	8633	refmod = pmap_disconnect(dst_page->phys_page);
	8634	else
	8635	refmod = 0;
	8636
	8637	if (!dst_page->absent)
	8638	vm_page_copy(dst_page, low_page);
	8639
	8640	low_page->reference = dst_page->reference;
	8641	low_page->dirty = dst_page->dirty;
	8642	low_page->absent = dst_page->absent;
	8643
	8644	if (refmod & VM_MEM_REFERENCED)
	8645	low_page->reference = TRUE;
	8646	if (refmod & VM_MEM_MODIFIED) {
	8647	SET_PAGE_DIRTY(low_page, FALSE);
	8648	}
	8649
	8650	vm_page_replace(low_page, object, dst_offset);
	8651
	8652	dst_page = low_page;
	8653	/*
	8654	* vm_page_grablo returned the page marked
	8655	* BUSY... we don't need a PAGE_WAKEUP_DONE
	8656	* here, because we've never dropped the object lock
	8657	*/
	8658	if ( !dst_page->absent)
	8659	dst_page->busy = FALSE;
	8660	}
	8661	if ( !dst_page->busy)
	8662	dwp->dw_mask \|= DW_vm_page_wire;
	8663
	8664	if (cntrl_flags & UPL_BLOCK_ACCESS) {
	8665	/*
	8666	* Mark the page "busy" to block any future page fault
	8667	* on this page in addition to wiring it.
	8668	* We'll also remove the mapping
	8669	* of all these pages before leaving this routine.
	8670	*/
	8671	assert(!dst_page->fictitious);
	8672	dst_page->busy = TRUE;
	8673	}
	8674	/*
	8675	* expect the page to be used
	8676	* page queues lock must be held to set 'reference'
	8677	*/
	8678	dwp->dw_mask \|= DW_set_reference;
	8679
	8680	if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
	8681	SET_PAGE_DIRTY(dst_page, TRUE);
	8682	}
	8683	if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) {
	8684	pmap_sync_page_attributes_phys(dst_page->phys_page);
	8685	dst_page->written_by_kernel = FALSE;
	8686	}
	8687
	8688	record_phys_addr:
	8689	if (dst_page->busy)
	8690	upl->flags \|= UPL_HAS_BUSY;
	8691
	8692	lite_list[entry>>5] \|= 1 << (entry & 31);
	8693
	8694	if (dst_page->phys_page > upl->highest_page)
	8695	upl->highest_page = dst_page->phys_page;
	8696
	8697	if (user_page_list) {
	8698	user_page_list[entry].phys_addr = dst_page->phys_page;
	8699	user_page_list[entry].pageout = dst_page->pageout;
	8700	user_page_list[entry].absent = dst_page->absent;
	8701	user_page_list[entry].dirty = dst_page->dirty;
	8702	user_page_list[entry].precious = dst_page->precious;
	8703	user_page_list[entry].device = FALSE;
	8704	user_page_list[entry].needed = FALSE;
	8705	if (dst_page->clustered == TRUE)
	8706	user_page_list[entry].speculative = dst_page->speculative;
	8707	else
	8708	user_page_list[entry].speculative = FALSE;
	8709	user_page_list[entry].cs_validated = dst_page->cs_validated;
	8710	user_page_list[entry].cs_tainted = dst_page->cs_tainted;
	8711	user_page_list[entry].cs_nx = dst_page->cs_nx;
	8712	user_page_list[entry].mark = FALSE;
	8713	}
	8714	if (object != kernel_object && object != compressor_object) {
	8715	/*
	8716	* someone is explicitly grabbing this page...
	8717	* update clustered and speculative state
	8718	*
	8719	*/
	8720	if (dst_page->clustered)
	8721	VM_PAGE_CONSUME_CLUSTERED(dst_page);
	8722	}
	8723	skip_page:
	8724	entry++;
	8725	dst_offset += PAGE_SIZE_64;
	8726	xfer_size -= PAGE_SIZE;
	8727
	8728	if (dwp->dw_mask) {
	8729	VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
	8730
	8731	if (dw_count >= dw_limit) {
	8732	vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
	8733
	8734	dwp = &dw_array[0];
	8735	dw_count = 0;
	8736	}
	8737	}
	8738	}
	8739	assert(entry == size_in_pages);
	8740
	8741	if (dw_count)
	8742	vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
	8743	finish:
	8744	if (user_page_list && set_cache_attr_needed == TRUE)
	8745	vm_object_set_pmap_cache_attr(object, user_page_list, size_in_pages, TRUE);
	8746
	8747	if (page_list_count != NULL) {
	8748	if (upl->flags & UPL_INTERNAL)
	8749	*page_list_count = 0;
	8750	else if (*page_list_count > size_in_pages)
	8751	*page_list_count = size_in_pages;
	8752	}
	8753	vm_object_unlock(object);
	8754
	8755	if (cntrl_flags & UPL_BLOCK_ACCESS) {
	8756	/*
	8757	* We've marked all the pages "busy" so that future
	8758	* page faults will block.
	8759	* Now remove the mapping for these pages, so that they
	8760	* can't be accessed without causing a page fault.
	8761	*/
	8762	vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
	8763	PMAP_NULL, 0, VM_PROT_NONE);
	8764	assert(!object->blocked_access);
	8765	object->blocked_access = TRUE;
	8766	}
	8767
	8768	return KERN_SUCCESS;
	8769
	8770	return_err:
	8771	dw_index = 0;
	8772
	8773	for (; offset < dst_offset; offset += PAGE_SIZE) {
	8774	boolean_t need_unwire;
	8775
	8776	dst_page = vm_page_lookup(object, offset);
	8777
	8778	if (dst_page == VM_PAGE_NULL)
	8779	panic("vm_object_iopl_request: Wired page missing. \n");
	8780
	8781	/*
	8782	* if we've already processed this page in an earlier
	8783	* dw_do_work, we need to undo the wiring... we will
	8784	* leave the dirty and reference bits on if they
	8785	* were set, since we don't have a good way of knowing
	8786	* what the previous state was and we won't get here
	8787	* under any normal circumstances... we will always
	8788	* clear BUSY and wakeup any waiters via vm_page_free
	8789	* or PAGE_WAKEUP_DONE
	8790	*/
	8791	need_unwire = TRUE;
	8792
	8793	if (dw_count) {
	8794	if (dw_array[dw_index].dw_m == dst_page) {
	8795	/*
	8796	* still in the deferred work list
	8797	* which means we haven't yet called
	8798	* vm_page_wire on this page
	8799	*/
	8800	need_unwire = FALSE;
	8801
	8802	dw_index++;
	8803	dw_count--;
	8804	}
	8805	}
	8806	vm_page_lock_queues();
	8807
	8808	if (dst_page->absent \|\| free_wired_pages == TRUE) {
	8809	vm_page_free(dst_page);
	8810
	8811	need_unwire = FALSE;
	8812	} else {
	8813	if (need_unwire == TRUE)
	8814	vm_page_unwire(dst_page, TRUE);
	8815
	8816	PAGE_WAKEUP_DONE(dst_page);
	8817	}
	8818	vm_page_unlock_queues();
	8819
	8820	if (need_unwire == TRUE)
	8821	VM_STAT_INCR(reactivations);
	8822	}
	8823	#if UPL_DEBUG
	8824	upl->upl_state = 2;
	8825	#endif
	8826	if (! (upl->flags & UPL_KERNEL_OBJECT)) {
	8827	vm_object_activity_end(object);
	8828	vm_object_collapse(object, 0, TRUE);
	8829	}
	8830	vm_object_unlock(object);
	8831	upl_destroy(upl);
	8832
	8833	return ret;
	8834	}
	8835
	8836	kern_return_t
	8837	upl_transpose(
	8838	upl_t upl1,
	8839	upl_t upl2)
	8840	{
	8841	kern_return_t retval;
	8842	boolean_t upls_locked;
	8843	vm_object_t object1, object2;
	8844
	8845	if (upl1 == UPL_NULL \|\| upl2 == UPL_NULL \|\| upl1 == upl2 \|\| ((upl1->flags & UPL_VECTOR)==UPL_VECTOR) \|\| ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) {
	8846	return KERN_INVALID_ARGUMENT;
	8847	}
	8848
	8849	upls_locked = FALSE;
	8850
	8851	/*
	8852	* Since we need to lock both UPLs at the same time,
	8853	* avoid deadlocks by always taking locks in the same order.
	8854	*/
	8855	if (upl1 < upl2) {
	8856	upl_lock(upl1);
	8857	upl_lock(upl2);
	8858	} else {
	8859	upl_lock(upl2);
	8860	upl_lock(upl1);
	8861	}
	8862	upls_locked = TRUE; /* the UPLs will need to be unlocked */
	8863
	8864	object1 = upl1->map_object;
	8865	object2 = upl2->map_object;
	8866
	8867	if (upl1->offset != 0 \|\| upl2->offset != 0 \|\|
	8868	upl1->size != upl2->size) {
	8869	/*
	8870	* We deal only with full objects, not subsets.
	8871	* That's because we exchange the entire backing store info
	8872	* for the objects: pager, resident pages, etc... We can't do
	8873	* only part of it.
	8874	*/
	8875	retval = KERN_INVALID_VALUE;
	8876	goto done;
	8877	}
	8878
	8879	/*
	8880	* Tranpose the VM objects' backing store.
	8881	*/
	8882	retval = vm_object_transpose(object1, object2,
	8883	(vm_object_size_t) upl1->size);
	8884
	8885	if (retval == KERN_SUCCESS) {
	8886	/*
	8887	* Make each UPL point to the correct VM object, i.e. the
	8888	* object holding the pages that the UPL refers to...
	8889	*/
	8890	#if CONFIG_IOSCHED \|\| UPL_DEBUG
	8891	if ((upl1->flags & UPL_TRACKED_BY_OBJECT) \|\| (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
	8892	vm_object_lock(object1);
	8893	vm_object_lock(object2);
	8894	}
	8895	if (upl1->flags & UPL_TRACKED_BY_OBJECT)
	8896	queue_remove(&object1->uplq, upl1, upl_t, uplq);
	8897	if (upl2->flags & UPL_TRACKED_BY_OBJECT)
	8898	queue_remove(&object2->uplq, upl2, upl_t, uplq);
	8899	#endif
	8900	upl1->map_object = object2;
	8901	upl2->map_object = object1;
	8902
	8903	#if CONFIG_IOSCHED \|\| UPL_DEBUG
	8904	if (upl1->flags & UPL_TRACKED_BY_OBJECT)
	8905	queue_enter(&object2->uplq, upl1, upl_t, uplq);
	8906	if (upl2->flags & UPL_TRACKED_BY_OBJECT)
	8907	queue_enter(&object1->uplq, upl2, upl_t, uplq);
	8908	if ((upl1->flags & UPL_TRACKED_BY_OBJECT) \|\| (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
	8909	vm_object_unlock(object2);
	8910	vm_object_unlock(object1);
	8911	}
	8912	#endif
	8913	}
	8914
	8915	done:
	8916	/*
	8917	* Cleanup.
	8918	*/
	8919	if (upls_locked) {
	8920	upl_unlock(upl1);
	8921	upl_unlock(upl2);
	8922	upls_locked = FALSE;
	8923	}
	8924
	8925	return retval;
	8926	}
	8927
	8928	void
	8929	upl_range_needed(
	8930	upl_t upl,
	8931	int index,
	8932	int count)
	8933	{
	8934	upl_page_info_t *user_page_list;
	8935	int size_in_pages;
	8936
	8937	if ( !(upl->flags & UPL_INTERNAL) \|\| count <= 0)
	8938	return;
	8939
	8940	size_in_pages = upl->size / PAGE_SIZE;
	8941
	8942	user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
	8943
	8944	while (count-- && index < size_in_pages)
	8945	user_page_list[index++].needed = TRUE;
	8946	}
	8947
	8948
	8949	/*
	8950	* ENCRYPTED SWAP:
	8951	*
	8952	* Rationale: the user might have some encrypted data on disk (via
	8953	* FileVault or any other mechanism). That data is then decrypted in
	8954	* memory, which is safe as long as the machine is secure. But that
	8955	* decrypted data in memory could be paged out to disk by the default
	8956	* pager. The data would then be stored on disk in clear (not encrypted)
	8957	* and it could be accessed by anyone who gets physical access to the
	8958	* disk (if the laptop or the disk gets stolen for example). This weakens
	8959	* the security offered by FileVault.
	8960	*
	8961	* Solution: the default pager will optionally request that all the
	8962	* pages it gathers for pageout be encrypted, via the UPL interfaces,
	8963	* before it sends this UPL to disk via the vnode_pageout() path.
	8964	*
	8965	* Notes:
	8966	*
	8967	* To avoid disrupting the VM LRU algorithms, we want to keep the
	8968	* clean-in-place mechanisms, which allow us to send some extra pages to
	8969	* swap (clustering) without actually removing them from the user's
	8970	* address space. We don't want the user to unknowingly access encrypted
	8971	* data, so we have to actually remove the encrypted pages from the page
	8972	* table. When the user accesses the data, the hardware will fail to
	8973	* locate the virtual page in its page table and will trigger a page
	8974	* fault. We can then decrypt the page and enter it in the page table
	8975	* again. Whenever we allow the user to access the contents of a page,
	8976	* we have to make sure it's not encrypted.
	8977	*
	8978	*
	8979	*/
	8980	/*
	8981	* ENCRYPTED SWAP:
	8982	* Reserve of virtual addresses in the kernel address space.
	8983	* We need to map the physical pages in the kernel, so that we
	8984	* can call the encryption/decryption routines with a kernel
	8985	* virtual address. We keep this pool of pre-allocated kernel
	8986	* virtual addresses so that we don't have to scan the kernel's
	8987	* virtaul address space each time we need to encrypt or decrypt
	8988	* a physical page.
	8989	* It would be nice to be able to encrypt and decrypt in physical
	8990	* mode but that might not always be more efficient...
	8991	*/
	8992	decl_simple_lock_data(,vm_paging_lock)
	8993	#define VM_PAGING_NUM_PAGES 64
	8994	vm_map_offset_t vm_paging_base_address = 0;
	8995	boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
	8996	int vm_paging_max_index = 0;
	8997	int vm_paging_page_waiter = 0;
	8998	int vm_paging_page_waiter_total = 0;
	8999	unsigned long vm_paging_no_kernel_page = 0;
	9000	unsigned long vm_paging_objects_mapped = 0;
	9001	unsigned long vm_paging_pages_mapped = 0;
	9002	unsigned long vm_paging_objects_mapped_slow = 0;
	9003	unsigned long vm_paging_pages_mapped_slow = 0;
	9004
	9005	void
	9006	vm_paging_map_init(void)
	9007	{
	9008	kern_return_t kr;
	9009	vm_map_offset_t page_map_offset;
	9010	vm_map_entry_t map_entry;
	9011
	9012	assert(vm_paging_base_address == 0);
	9013
	9014	/*
	9015	* Initialize our pool of pre-allocated kernel
	9016	* virtual addresses.
	9017	*/
	9018	page_map_offset = 0;
	9019	kr = vm_map_find_space(kernel_map,
	9020	&page_map_offset,
	9021	VM_PAGING_NUM_PAGES * PAGE_SIZE,
	9022	0,
	9023	0,
	9024	&map_entry);
	9025	if (kr != KERN_SUCCESS) {
	9026	panic("vm_paging_map_init: kernel_map full\n");
	9027	}
	9028	VME_OBJECT_SET(map_entry, kernel_object);
	9029	VME_OFFSET_SET(map_entry, page_map_offset);
	9030	map_entry->protection = VM_PROT_NONE;
	9031	map_entry->max_protection = VM_PROT_NONE;
	9032	map_entry->permanent = TRUE;
	9033	vm_object_reference(kernel_object);
	9034	vm_map_unlock(kernel_map);
	9035
	9036	assert(vm_paging_base_address == 0);
	9037	vm_paging_base_address = page_map_offset;
	9038	}
	9039
	9040	/*
	9041	* ENCRYPTED SWAP:
	9042	* vm_paging_map_object:
	9043	* Maps part of a VM object's pages in the kernel
	9044	* virtual address space, using the pre-allocated
	9045	* kernel virtual addresses, if possible.
	9046	* Context:
	9047	* The VM object is locked. This lock will get
	9048	* dropped and re-acquired though, so the caller
	9049	* must make sure the VM object is kept alive
	9050	* (by holding a VM map that has a reference
	9051	* on it, for example, or taking an extra reference).
	9052	* The page should also be kept busy to prevent
	9053	* it from being reclaimed.
	9054	*/
	9055	kern_return_t
	9056	vm_paging_map_object(
	9057	vm_page_t page,
	9058	vm_object_t object,
	9059	vm_object_offset_t offset,
	9060	vm_prot_t protection,
	9061	boolean_t can_unlock_object,
	9062	vm_map_size_t size, / IN/OUT */
	9063	vm_map_offset_t address, / OUT */
	9064	boolean_t need_unmap) / OUT */
	9065	{
	9066	kern_return_t kr;
	9067	vm_map_offset_t page_map_offset;
	9068	vm_map_size_t map_size;
	9069	vm_object_offset_t object_offset;
	9070	int i;
	9071
	9072	if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
	9073	/* use permanent 1-to-1 kernel mapping of physical memory ? */
	9074	#if __x86_64__
	9075	*address = (vm_map_offset_t)
	9076	PHYSMAP_PTOV((pmap_paddr_t)page->phys_page <<
	9077	PAGE_SHIFT);
	9078	*need_unmap = FALSE;
	9079	return KERN_SUCCESS;
	9080	#else
	9081	#warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..."
	9082	#endif
	9083
	9084	assert(page->busy);
	9085	/*
	9086	* Use one of the pre-allocated kernel virtual addresses
	9087	* and just enter the VM page in the kernel address space
	9088	* at that virtual address.
	9089	*/
	9090	simple_lock(&vm_paging_lock);
	9091
	9092	/*
	9093	* Try and find an available kernel virtual address
	9094	* from our pre-allocated pool.
	9095	*/
	9096	page_map_offset = 0;
	9097	for (;;) {
	9098	for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
	9099	if (vm_paging_page_inuse[i] == FALSE) {
	9100	page_map_offset =
	9101	vm_paging_base_address +
	9102	(i * PAGE_SIZE);
	9103	break;
	9104	}
	9105	}
	9106	if (page_map_offset != 0) {
	9107	/* found a space to map our page ! */
	9108	break;
	9109	}
	9110
	9111	if (can_unlock_object) {
	9112	/*
	9113	* If we can afford to unlock the VM object,
	9114	* let's take the slow path now...
	9115	*/
	9116	break;
	9117	}
	9118	/*
	9119	* We can't afford to unlock the VM object, so
	9120	* let's wait for a space to become available...
	9121	*/
	9122	vm_paging_page_waiter_total++;
	9123	vm_paging_page_waiter++;
	9124	kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT);
	9125	if (kr == THREAD_WAITING) {
	9126	simple_unlock(&vm_paging_lock);
	9127	kr = thread_block(THREAD_CONTINUE_NULL);
	9128	simple_lock(&vm_paging_lock);
	9129	}
	9130	vm_paging_page_waiter--;
	9131	/* ... and try again */
	9132	}
	9133
	9134	if (page_map_offset != 0) {
	9135	/*
	9136	* We found a kernel virtual address;
	9137	* map the physical page to that virtual address.
	9138	*/
	9139	if (i > vm_paging_max_index) {
	9140	vm_paging_max_index = i;
	9141	}
	9142	vm_paging_page_inuse[i] = TRUE;
	9143	simple_unlock(&vm_paging_lock);
	9144
	9145	page->pmapped = TRUE;
	9146
	9147	/*
	9148	* Keep the VM object locked over the PMAP_ENTER
	9149	* and the actual use of the page by the kernel,
	9150	* or this pmap mapping might get undone by a
	9151	* vm_object_pmap_protect() call...
	9152	*/
	9153	PMAP_ENTER(kernel_pmap,
	9154	page_map_offset,
	9155	page,
	9156	protection,
	9157	VM_PROT_NONE,
	9158	0,
	9159	TRUE);
	9160	vm_paging_objects_mapped++;
	9161	vm_paging_pages_mapped++;
	9162	*address = page_map_offset;
	9163	*need_unmap = TRUE;
	9164
	9165	/* all done and mapped, ready to use ! */
	9166	return KERN_SUCCESS;
	9167	}
	9168
	9169	/*
	9170	* We ran out of pre-allocated kernel virtual
	9171	* addresses. Just map the page in the kernel
	9172	* the slow and regular way.
	9173	*/
	9174	vm_paging_no_kernel_page++;
	9175	simple_unlock(&vm_paging_lock);
	9176	}
	9177
	9178	if (! can_unlock_object) {
	9179	*address = 0;
	9180	*size = 0;
	9181	*need_unmap = FALSE;
	9182	return KERN_NOT_SUPPORTED;
	9183	}
	9184
	9185	object_offset = vm_object_trunc_page(offset);
	9186	map_size = vm_map_round_page(*size,
	9187	VM_MAP_PAGE_MASK(kernel_map));
	9188
	9189	/*
	9190	* Try and map the required range of the object
	9191	* in the kernel_map
	9192	*/
	9193
	9194	vm_object_reference_locked(object); /* for the map entry */
	9195	vm_object_unlock(object);
	9196
	9197	kr = vm_map_enter(kernel_map,
	9198	address,
	9199	map_size,
	9200	0,
	9201	VM_FLAGS_ANYWHERE,
	9202	object,
	9203	object_offset,
	9204	FALSE,
	9205	protection,
	9206	VM_PROT_ALL,
	9207	VM_INHERIT_NONE);
	9208	if (kr != KERN_SUCCESS) {
	9209	*address = 0;
	9210	*size = 0;
	9211	*need_unmap = FALSE;
	9212	vm_object_deallocate(object); /* for the map entry */
	9213	vm_object_lock(object);
	9214	return kr;
	9215	}
	9216
	9217	*size = map_size;
	9218
	9219	/*
	9220	* Enter the mapped pages in the page table now.
	9221	*/
	9222	vm_object_lock(object);
	9223	/*
	9224	* VM object must be kept locked from before PMAP_ENTER()
	9225	* until after the kernel is done accessing the page(s).
	9226	* Otherwise, the pmap mappings in the kernel could be
	9227	* undone by a call to vm_object_pmap_protect().
	9228	*/
	9229
	9230	for (page_map_offset = 0;
	9231	map_size != 0;
	9232	map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
	9233
	9234	page = vm_page_lookup(object, offset + page_map_offset);
	9235	if (page == VM_PAGE_NULL) {
	9236	printf("vm_paging_map_object: no page !?");
	9237	vm_object_unlock(object);
	9238	kr = vm_map_remove(kernel_map, address, size,
	9239	VM_MAP_NO_FLAGS);
	9240	assert(kr == KERN_SUCCESS);
	9241	*address = 0;
	9242	*size = 0;
	9243	*need_unmap = FALSE;
	9244	vm_object_lock(object);
	9245	return KERN_MEMORY_ERROR;
	9246	}
	9247	page->pmapped = TRUE;
	9248
	9249	//assert(pmap_verify_free(page->phys_page));
	9250	PMAP_ENTER(kernel_pmap,
	9251	*address + page_map_offset,
	9252	page,
	9253	protection,
	9254	VM_PROT_NONE,
	9255	0,
	9256	TRUE);
	9257	}
	9258
	9259	vm_paging_objects_mapped_slow++;
	9260	vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
	9261
	9262	*need_unmap = TRUE;
	9263
	9264	return KERN_SUCCESS;
	9265	}
	9266
	9267	/*
	9268	* ENCRYPTED SWAP:
	9269	* vm_paging_unmap_object:
	9270	* Unmaps part of a VM object's pages from the kernel
	9271	* virtual address space.
	9272	* Context:
	9273	* The VM object is locked. This lock will get
	9274	* dropped and re-acquired though.
	9275	*/
	9276	void
	9277	vm_paging_unmap_object(
	9278	vm_object_t object,
	9279	vm_map_offset_t start,
	9280	vm_map_offset_t end)
	9281	{
	9282	kern_return_t kr;
	9283	int i;
	9284
	9285	if ((vm_paging_base_address == 0) \|\|
	9286	(start < vm_paging_base_address) \|\|
	9287	(end > (vm_paging_base_address
	9288	+ (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
	9289	/*
	9290	* We didn't use our pre-allocated pool of
	9291	* kernel virtual address. Deallocate the
	9292	* virtual memory.
	9293	*/
	9294	if (object != VM_OBJECT_NULL) {
	9295	vm_object_unlock(object);
	9296	}
	9297	kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
	9298	if (object != VM_OBJECT_NULL) {
	9299	vm_object_lock(object);
	9300	}
	9301	assert(kr == KERN_SUCCESS);
	9302	} else {
	9303	/*
	9304	* We used a kernel virtual address from our
	9305	* pre-allocated pool. Put it back in the pool
	9306	* for next time.
	9307	*/
	9308	assert(end - start == PAGE_SIZE);
	9309	i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
	9310	assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
	9311
	9312	/* undo the pmap mapping */
	9313	pmap_remove(kernel_pmap, start, end);
	9314
	9315	simple_lock(&vm_paging_lock);
	9316	vm_paging_page_inuse[i] = FALSE;
	9317	if (vm_paging_page_waiter) {
	9318	thread_wakeup(&vm_paging_page_waiter);
	9319	}
	9320	simple_unlock(&vm_paging_lock);
	9321	}
	9322	}
	9323
	9324	#if ENCRYPTED_SWAP
	9325	/*
	9326	* Encryption data.
	9327	* "iv" is the "initial vector". Ideally, we want to
	9328	* have a different one for each page we encrypt, so that
	9329	* crackers can't find encryption patterns too easily.
	9330	*/
	9331	#define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
	9332	boolean_t swap_crypt_ctx_initialized = FALSE;
	9333	uint32_t swap_crypt_key[8]; /* big enough for a 256 key */
	9334	aes_ctx swap_crypt_ctx;
	9335	const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
	9336
	9337	#if DEBUG
	9338	boolean_t swap_crypt_ctx_tested = FALSE;
	9339	unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
	9340	unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
	9341	unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
	9342	#endif /* DEBUG */
	9343
	9344	/*
	9345	* Initialize the encryption context: key and key size.
	9346	*/
	9347	void swap_crypt_ctx_initialize(void); /* forward */
	9348	void
	9349	swap_crypt_ctx_initialize(void)
	9350	{
	9351	unsigned int i;
	9352
	9353	/*
	9354	* No need for locking to protect swap_crypt_ctx_initialized
	9355	* because the first use of encryption will come from the
	9356	* pageout thread (we won't pagein before there's been a pageout)
	9357	* and there's only one pageout thread.
	9358	*/
	9359	if (swap_crypt_ctx_initialized == FALSE) {
	9360	for (i = 0;
	9361	i < (sizeof (swap_crypt_key) /
	9362	sizeof (swap_crypt_key[0]));
	9363	i++) {
	9364	swap_crypt_key[i] = random();
	9365	}
	9366	aes_encrypt_key((const unsigned char *) swap_crypt_key,
	9367	SWAP_CRYPT_AES_KEY_SIZE,
	9368	&swap_crypt_ctx.encrypt);
	9369	aes_decrypt_key((const unsigned char *) swap_crypt_key,
	9370	SWAP_CRYPT_AES_KEY_SIZE,
	9371	&swap_crypt_ctx.decrypt);
	9372	swap_crypt_ctx_initialized = TRUE;
	9373	}
	9374
	9375	#if DEBUG
	9376	/*
	9377	* Validate the encryption algorithms.
	9378	*/
	9379	if (swap_crypt_ctx_tested == FALSE) {
	9380	/* initialize */
	9381	for (i = 0; i < 4096; i++) {
	9382	swap_crypt_test_page_ref[i] = (char) i;
	9383	}
	9384	/* encrypt */
	9385	aes_encrypt_cbc(swap_crypt_test_page_ref,
	9386	swap_crypt_null_iv,
	9387	PAGE_SIZE / AES_BLOCK_SIZE,
	9388	swap_crypt_test_page_encrypt,
	9389	&swap_crypt_ctx.encrypt);
	9390	/* decrypt */
	9391	aes_decrypt_cbc(swap_crypt_test_page_encrypt,
	9392	swap_crypt_null_iv,
	9393	PAGE_SIZE / AES_BLOCK_SIZE,
	9394	swap_crypt_test_page_decrypt,
	9395	&swap_crypt_ctx.decrypt);
	9396	/* compare result with original */
	9397	for (i = 0; i < 4096; i ++) {
	9398	if (swap_crypt_test_page_decrypt[i] !=
	9399	swap_crypt_test_page_ref[i]) {
	9400	panic("encryption test failed");
	9401	}
	9402	}
	9403
	9404	/* encrypt again */
	9405	aes_encrypt_cbc(swap_crypt_test_page_decrypt,
	9406	swap_crypt_null_iv,
	9407	PAGE_SIZE / AES_BLOCK_SIZE,
	9408	swap_crypt_test_page_decrypt,
	9409	&swap_crypt_ctx.encrypt);
	9410	/* decrypt in place */
	9411	aes_decrypt_cbc(swap_crypt_test_page_decrypt,
	9412	swap_crypt_null_iv,
	9413	PAGE_SIZE / AES_BLOCK_SIZE,
	9414	swap_crypt_test_page_decrypt,
	9415	&swap_crypt_ctx.decrypt);
	9416	for (i = 0; i < 4096; i ++) {
	9417	if (swap_crypt_test_page_decrypt[i] !=
	9418	swap_crypt_test_page_ref[i]) {
	9419	panic("in place encryption test failed");
	9420	}
	9421	}
	9422
	9423	swap_crypt_ctx_tested = TRUE;
	9424	}
	9425	#endif /* DEBUG */
	9426	}
	9427
	9428	/*
	9429	* ENCRYPTED SWAP:
	9430	* vm_page_encrypt:
	9431	* Encrypt the given page, for secure paging.
	9432	* The page might already be mapped at kernel virtual
	9433	* address "kernel_mapping_offset". Otherwise, we need
	9434	* to map it.
	9435	*
	9436	* Context:
	9437	* The page's object is locked, but this lock will be released
	9438	* and re-acquired.
	9439	* The page is busy and not accessible by users (not entered in any pmap).
	9440	*/
	9441	void
	9442	vm_page_encrypt(
	9443	vm_page_t page,
	9444	vm_map_offset_t kernel_mapping_offset)
	9445	{
	9446	kern_return_t kr;
	9447	vm_map_size_t kernel_mapping_size;
	9448	boolean_t kernel_mapping_needs_unmap;
	9449	vm_offset_t kernel_vaddr;
	9450	union {
	9451	unsigned char aes_iv[AES_BLOCK_SIZE];
	9452	struct {
	9453	memory_object_t pager_object;
	9454	vm_object_offset_t paging_offset;
	9455	} vm;
	9456	} encrypt_iv;
	9457
	9458	if (! vm_pages_encrypted) {
	9459	vm_pages_encrypted = TRUE;
	9460	}
	9461
	9462	assert(page->busy);
	9463
	9464	if (page->encrypted) {
	9465	/*
	9466	* Already encrypted: no need to do it again.
	9467	*/
	9468	vm_page_encrypt_already_encrypted_counter++;
	9469	return;
	9470	}
	9471	assert(page->dirty \|\| page->precious);
	9472
	9473	ASSERT_PAGE_DECRYPTED(page);
	9474
	9475	/*
	9476	* Take a paging-in-progress reference to keep the object
	9477	* alive even if we have to unlock it (in vm_paging_map_object()
	9478	* for example)...
	9479	*/
	9480	vm_object_paging_begin(page->object);
	9481
	9482	if (kernel_mapping_offset == 0) {
	9483	/*
	9484	* The page hasn't already been mapped in kernel space
	9485	* by the caller. Map it now, so that we can access
	9486	* its contents and encrypt them.
	9487	*/
	9488	kernel_mapping_size = PAGE_SIZE;
	9489	kernel_mapping_needs_unmap = FALSE;
	9490	kr = vm_paging_map_object(page,
	9491	page->object,
	9492	page->offset,
	9493	VM_PROT_READ \| VM_PROT_WRITE,
	9494	FALSE,
	9495	&kernel_mapping_size,
	9496	&kernel_mapping_offset,
	9497	&kernel_mapping_needs_unmap);
	9498	if (kr != KERN_SUCCESS) {
	9499	panic("vm_page_encrypt: "
	9500	"could not map page in kernel: 0x%x\n",
	9501	kr);
	9502	}
	9503	} else {
	9504	kernel_mapping_size = 0;
	9505	kernel_mapping_needs_unmap = FALSE;
	9506	}
	9507	kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
	9508
	9509	if (swap_crypt_ctx_initialized == FALSE) {
	9510	swap_crypt_ctx_initialize();
	9511	}
	9512	assert(swap_crypt_ctx_initialized);
	9513
	9514	/*
	9515	* Prepare an "initial vector" for the encryption.
	9516	* We use the "pager" and the "paging_offset" for that
	9517	* page to obfuscate the encrypted data a bit more and
	9518	* prevent crackers from finding patterns that they could
	9519	* use to break the key.
	9520	*/
	9521	bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
	9522	encrypt_iv.vm.pager_object = page->object->pager;
	9523	encrypt_iv.vm.paging_offset =
	9524	page->object->paging_offset + page->offset;
	9525
	9526	/* encrypt the "initial vector" */
	9527	aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
	9528	swap_crypt_null_iv,
	9529	1,
	9530	&encrypt_iv.aes_iv[0],
	9531	&swap_crypt_ctx.encrypt);
	9532
	9533	/*
	9534	* Encrypt the page.
	9535	*/
	9536	aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
	9537	&encrypt_iv.aes_iv[0],
	9538	PAGE_SIZE / AES_BLOCK_SIZE,
	9539	(unsigned char *) kernel_vaddr,
	9540	&swap_crypt_ctx.encrypt);
	9541
	9542	vm_page_encrypt_counter++;
	9543
	9544	/*
	9545	* Unmap the page from the kernel's address space,
	9546	* if we had to map it ourselves. Otherwise, let
	9547	* the caller undo the mapping if needed.
	9548	*/
	9549	if (kernel_mapping_needs_unmap) {
	9550	vm_paging_unmap_object(page->object,
	9551	kernel_mapping_offset,
	9552	kernel_mapping_offset + kernel_mapping_size);
	9553	}
	9554
	9555	/*
	9556	* Clear the "reference" and "modified" bits.
	9557	* This should clean up any impact the encryption had
	9558	* on them.
	9559	* The page was kept busy and disconnected from all pmaps,
	9560	* so it can't have been referenced or modified from user
	9561	* space.
	9562	* The software bits will be reset later after the I/O
	9563	* has completed (in upl_commit_range()).
	9564	*/
	9565	pmap_clear_refmod(page->phys_page, VM_MEM_REFERENCED \| VM_MEM_MODIFIED);
	9566
	9567	page->encrypted = TRUE;
	9568
	9569	vm_object_paging_end(page->object);
	9570	}
	9571
	9572	/*
	9573	* ENCRYPTED SWAP:
	9574	* vm_page_decrypt:
	9575	* Decrypt the given page.
	9576	* The page might already be mapped at kernel virtual
	9577	* address "kernel_mapping_offset". Otherwise, we need
	9578	* to map it.
	9579	*
	9580	* Context:
	9581	* The page's VM object is locked but will be unlocked and relocked.
	9582	* The page is busy and not accessible by users (not entered in any pmap).
	9583	*/
	9584	void
	9585	vm_page_decrypt(
	9586	vm_page_t page,
	9587	vm_map_offset_t kernel_mapping_offset)
	9588	{
	9589	kern_return_t kr;
	9590	vm_map_size_t kernel_mapping_size;
	9591	vm_offset_t kernel_vaddr;
	9592	boolean_t kernel_mapping_needs_unmap;
	9593	union {
	9594	unsigned char aes_iv[AES_BLOCK_SIZE];
	9595	struct {
	9596	memory_object_t pager_object;
	9597	vm_object_offset_t paging_offset;
	9598	} vm;
	9599	} decrypt_iv;
	9600	boolean_t was_dirty;
	9601
	9602	assert(page->busy);
	9603	assert(page->encrypted);
	9604
	9605	was_dirty = page->dirty;
	9606
	9607	/*
	9608	* Take a paging-in-progress reference to keep the object
	9609	* alive even if we have to unlock it (in vm_paging_map_object()
	9610	* for example)...
	9611	*/
	9612	vm_object_paging_begin(page->object);
	9613
	9614	if (kernel_mapping_offset == 0) {
	9615	/*
	9616	* The page hasn't already been mapped in kernel space
	9617	* by the caller. Map it now, so that we can access
	9618	* its contents and decrypt them.
	9619	*/
	9620	kernel_mapping_size = PAGE_SIZE;
	9621	kernel_mapping_needs_unmap = FALSE;
	9622	kr = vm_paging_map_object(page,
	9623	page->object,
	9624	page->offset,
	9625	VM_PROT_READ \| VM_PROT_WRITE,
	9626	FALSE,
	9627	&kernel_mapping_size,
	9628	&kernel_mapping_offset,
	9629	&kernel_mapping_needs_unmap);
	9630	if (kr != KERN_SUCCESS) {
	9631	panic("vm_page_decrypt: "
	9632	"could not map page in kernel: 0x%x\n",
	9633	kr);
	9634	}
	9635	} else {
	9636	kernel_mapping_size = 0;
	9637	kernel_mapping_needs_unmap = FALSE;
	9638	}
	9639	kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
	9640
	9641	assert(swap_crypt_ctx_initialized);
	9642
	9643	/*
	9644	* Prepare an "initial vector" for the decryption.
	9645	* It has to be the same as the "initial vector" we
	9646	* used to encrypt that page.
	9647	*/
	9648	bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
	9649	decrypt_iv.vm.pager_object = page->object->pager;
	9650	decrypt_iv.vm.paging_offset =
	9651	page->object->paging_offset + page->offset;
	9652
	9653	/* encrypt the "initial vector" */
	9654	aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
	9655	swap_crypt_null_iv,
	9656	1,
	9657	&decrypt_iv.aes_iv[0],
	9658	&swap_crypt_ctx.encrypt);
	9659
	9660	/*
	9661	* Decrypt the page.
	9662	*/
	9663	aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
	9664	&decrypt_iv.aes_iv[0],
	9665	PAGE_SIZE / AES_BLOCK_SIZE,
	9666	(unsigned char *) kernel_vaddr,
	9667	&swap_crypt_ctx.decrypt);
	9668	vm_page_decrypt_counter++;
	9669
	9670	/*
	9671	* Unmap the page from the kernel's address space,
	9672	* if we had to map it ourselves. Otherwise, let
	9673	* the caller undo the mapping if needed.
	9674	*/
	9675	if (kernel_mapping_needs_unmap) {
	9676	vm_paging_unmap_object(page->object,
	9677	kernel_vaddr,
	9678	kernel_vaddr + PAGE_SIZE);
	9679	}
	9680
	9681	if (was_dirty) {
	9682	/*
	9683	* The pager did not specify that the page would be
	9684	* clean when it got paged in, so let's not clean it here
	9685	* either.
	9686	*/
	9687	} else {
	9688	/*
	9689	* After decryption, the page is actually still clean.
	9690	* It was encrypted as part of paging, which "cleans"
	9691	* the "dirty" pages.
	9692	* Noone could access it after it was encrypted
	9693	* and the decryption doesn't count.
	9694	*/
	9695	page->dirty = FALSE;
	9696	assert (page->cs_validated == FALSE);
	9697	pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED \| VM_MEM_REFERENCED);
	9698	}
	9699	page->encrypted = FALSE;
	9700
	9701	/*
	9702	* We've just modified the page's contents via the data cache and part
	9703	* of the new contents might still be in the cache and not yet in RAM.
	9704	* Since the page is now available and might get gathered in a UPL to
	9705	* be part of a DMA transfer from a driver that expects the memory to
	9706	* be coherent at this point, we have to flush the data cache.
	9707	*/
	9708	pmap_sync_page_attributes_phys(page->phys_page);
	9709	/*
	9710	* Since the page is not mapped yet, some code might assume that it
	9711	* doesn't need to invalidate the instruction cache when writing to
	9712	* that page. That code relies on "pmapped" being FALSE, so that the
	9713	* caches get synchronized when the page is first mapped.
	9714	*/
	9715	assert(pmap_verify_free(page->phys_page));
	9716	page->pmapped = FALSE;
	9717	page->wpmapped = FALSE;
	9718
	9719	vm_object_paging_end(page->object);
	9720	}
	9721
	9722	#if DEVELOPMENT \|\| DEBUG
	9723	unsigned long upl_encrypt_upls = 0;
	9724	unsigned long upl_encrypt_pages = 0;
	9725	#endif
	9726
	9727	/*
	9728	* ENCRYPTED SWAP:
	9729	*
	9730	* upl_encrypt:
	9731	* Encrypts all the pages in the UPL, within the specified range.
	9732	*
	9733	*/
	9734	void
	9735	upl_encrypt(
	9736	upl_t upl,
	9737	upl_offset_t crypt_offset,
	9738	upl_size_t crypt_size)
	9739	{
	9740	upl_size_t upl_size, subupl_size=crypt_size;
	9741	upl_offset_t offset_in_upl, subupl_offset=crypt_offset;
	9742	vm_object_t upl_object;
	9743	vm_object_offset_t upl_offset;
	9744	vm_page_t page;
	9745	vm_object_t shadow_object;
	9746	vm_object_offset_t shadow_offset;
	9747	vm_object_offset_t paging_offset;
	9748	vm_object_offset_t base_offset;
	9749	int isVectorUPL = 0;
	9750	upl_t vector_upl = NULL;
	9751
	9752	if((isVectorUPL = vector_upl_is_valid(upl)))
	9753	vector_upl = upl;
	9754
	9755	process_upl_to_encrypt:
	9756	if(isVectorUPL) {
	9757	crypt_size = subupl_size;
	9758	crypt_offset = subupl_offset;
	9759	upl = vector_upl_subupl_byoffset(vector_upl, &crypt_offset, &crypt_size);
	9760	if(upl == NULL)
	9761	panic("upl_encrypt: Accessing a sub-upl that doesn't exist\n");
	9762	subupl_size -= crypt_size;
	9763	subupl_offset += crypt_size;
	9764	}
	9765
	9766	#if DEVELOPMENT \|\| DEBUG
	9767	upl_encrypt_upls++;
	9768	upl_encrypt_pages += crypt_size / PAGE_SIZE;
	9769	#endif
	9770	upl_object = upl->map_object;
	9771	upl_offset = upl->offset;
	9772	upl_size = upl->size;
	9773
	9774	vm_object_lock(upl_object);
	9775
	9776	/*
	9777	* Find the VM object that contains the actual pages.
	9778	*/
	9779	if (upl_object->pageout) {
	9780	shadow_object = upl_object->shadow;
	9781	/*
	9782	* The offset in the shadow object is actually also
	9783	* accounted for in upl->offset. It possibly shouldn't be
	9784	* this way, but for now don't account for it twice.
	9785	*/
	9786	shadow_offset = 0;
	9787	assert(upl_object->paging_offset == 0); /* XXX ? */
	9788	vm_object_lock(shadow_object);
	9789	} else {
	9790	shadow_object = upl_object;
	9791	shadow_offset = 0;
	9792	}
	9793
	9794	paging_offset = shadow_object->paging_offset;
	9795	vm_object_paging_begin(shadow_object);
	9796
	9797	if (shadow_object != upl_object)
	9798	vm_object_unlock(upl_object);
	9799
	9800
	9801	base_offset = shadow_offset;
	9802	base_offset += upl_offset;
	9803	base_offset += crypt_offset;
	9804	base_offset -= paging_offset;
	9805
	9806	assert(crypt_offset + crypt_size <= upl_size);
	9807
	9808	for (offset_in_upl = 0;
	9809	offset_in_upl < crypt_size;
	9810	offset_in_upl += PAGE_SIZE) {
	9811	page = vm_page_lookup(shadow_object,
	9812	base_offset + offset_in_upl);
	9813	if (page == VM_PAGE_NULL) {
	9814	panic("upl_encrypt: "
	9815	"no page for (obj=%p,off=0x%llx+0x%x)!\n",
	9816	shadow_object,
	9817	base_offset,
	9818	offset_in_upl);
	9819	}
	9820	/*
	9821	* Disconnect the page from all pmaps, so that nobody can
	9822	* access it while it's encrypted. After that point, all
	9823	* accesses to this page will cause a page fault and block
	9824	* while the page is busy being encrypted. After the
	9825	* encryption completes, any access will cause a
	9826	* page fault and the page gets decrypted at that time.
	9827	*/
	9828	pmap_disconnect(page->phys_page);
	9829	vm_page_encrypt(page, 0);
	9830
	9831	if (vm_object_lock_avoid(shadow_object)) {
	9832	/*
	9833	* Give vm_pageout_scan() a chance to convert more
	9834	* pages from "clean-in-place" to "clean-and-free",
	9835	* if it's interested in the same pages we selected
	9836	* in this cluster.
	9837	*/
	9838	vm_object_unlock(shadow_object);
	9839	mutex_pause(2);
	9840	vm_object_lock(shadow_object);
	9841	}
	9842	}
	9843
	9844	vm_object_paging_end(shadow_object);
	9845	vm_object_unlock(shadow_object);
	9846
	9847	if(isVectorUPL && subupl_size)
	9848	goto process_upl_to_encrypt;
	9849	}
	9850
	9851	#else /* ENCRYPTED_SWAP */
	9852	void
	9853	upl_encrypt(
	9854	__unused upl_t upl,
	9855	__unused upl_offset_t crypt_offset,
	9856	__unused upl_size_t crypt_size)
	9857	{
	9858	}
	9859
	9860	void
	9861	vm_page_encrypt(
	9862	__unused vm_page_t page,
	9863	__unused vm_map_offset_t kernel_mapping_offset)
	9864	{
	9865	}
	9866
	9867	void
	9868	vm_page_decrypt(
	9869	__unused vm_page_t page,
	9870	__unused vm_map_offset_t kernel_mapping_offset)
	9871	{
	9872	}
	9873
	9874	#endif /* ENCRYPTED_SWAP */
	9875
	9876	/*
	9877	* page->object must be locked
	9878	*/
	9879	void
	9880	vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked)
	9881	{
	9882	if (!queues_locked) {
	9883	vm_page_lockspin_queues();
	9884	}
	9885
	9886	/*
	9887	* need to drop the laundry count...
	9888	* we may also need to remove it
	9889	* from the I/O paging queue...
	9890	* vm_pageout_throttle_up handles both cases
	9891	*
	9892	* the laundry and pageout_queue flags are cleared...
	9893	*/
	9894	vm_pageout_throttle_up(page);
	9895
	9896	vm_page_steal_pageout_page++;
	9897
	9898	if (!queues_locked) {
	9899	vm_page_unlock_queues();
	9900	}
	9901	}
	9902
	9903	upl_t
	9904	vector_upl_create(vm_offset_t upl_offset)
	9905	{
	9906	int vector_upl_size = sizeof(struct _vector_upl);
	9907	int i=0;
	9908	upl_t upl;
	9909	vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size);
	9910
	9911	upl = upl_create(0,UPL_VECTOR,0);
	9912	upl->vector_upl = vector_upl;
	9913	upl->offset = upl_offset;
	9914	vector_upl->size = 0;
	9915	vector_upl->offset = upl_offset;
	9916	vector_upl->invalid_upls=0;
	9917	vector_upl->num_upls=0;
	9918	vector_upl->pagelist = NULL;
	9919
	9920	for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) {
	9921	vector_upl->upl_iostates[i].size = 0;
	9922	vector_upl->upl_iostates[i].offset = 0;
	9923
	9924	}
	9925	return upl;
	9926	}
	9927
	9928	void
	9929	vector_upl_deallocate(upl_t upl)
	9930	{
	9931	if(upl) {
	9932	vector_upl_t vector_upl = upl->vector_upl;
	9933	if(vector_upl) {
	9934	if(vector_upl->invalid_upls != vector_upl->num_upls)
	9935	panic("Deallocating non-empty Vectored UPL\n");
	9936	kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)));
	9937	vector_upl->invalid_upls=0;
	9938	vector_upl->num_upls = 0;
	9939	vector_upl->pagelist = NULL;
	9940	vector_upl->size = 0;
	9941	vector_upl->offset = 0;
	9942	kfree(vector_upl, sizeof(struct _vector_upl));
	9943	vector_upl = (vector_upl_t)0xfeedfeed;
	9944	}
	9945	else
	9946	panic("vector_upl_deallocate was passed a non-vectored upl\n");
	9947	}
	9948	else
	9949	panic("vector_upl_deallocate was passed a NULL upl\n");
	9950	}
	9951
	9952	boolean_t
	9953	vector_upl_is_valid(upl_t upl)
	9954	{
	9955	if(upl && ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) {
	9956	vector_upl_t vector_upl = upl->vector_upl;
	9957	if(vector_upl == NULL \|\| vector_upl == (vector_upl_t)0xfeedfeed \|\| vector_upl == (vector_upl_t)0xfeedbeef)
	9958	return FALSE;
	9959	else
	9960	return TRUE;
	9961	}
	9962	return FALSE;
	9963	}
	9964
	9965	boolean_t
	9966	vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size)
	9967	{
	9968	if(vector_upl_is_valid(upl)) {
	9969	vector_upl_t vector_upl = upl->vector_upl;
	9970
	9971	if(vector_upl) {
	9972	if(subupl) {
	9973	if(io_size) {
	9974	if(io_size < PAGE_SIZE)
	9975	io_size = PAGE_SIZE;
	9976	subupl->vector_upl = (void*)vector_upl;
	9977	vector_upl->upl_elems[vector_upl->num_upls++] = subupl;
	9978	vector_upl->size += io_size;
	9979	upl->size += io_size;
	9980	}
	9981	else {
	9982	uint32_t i=0,invalid_upls=0;
	9983	for(i = 0; i < vector_upl->num_upls; i++) {
	9984	if(vector_upl->upl_elems[i] == subupl)
	9985	break;
	9986	}
	9987	if(i == vector_upl->num_upls)
	9988	panic("Trying to remove sub-upl when none exists");
	9989
	9990	vector_upl->upl_elems[i] = NULL;
	9991	invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1);
	9992	if(invalid_upls == vector_upl->num_upls)
	9993	return TRUE;
	9994	else
	9995	return FALSE;
	9996	}
	9997	}
	9998	else
	9999	panic("vector_upl_set_subupl was passed a NULL upl element\n");
	10000	}
	10001	else
	10002	panic("vector_upl_set_subupl was passed a non-vectored upl\n");
	10003	}
	10004	else
	10005	panic("vector_upl_set_subupl was passed a NULL upl\n");
	10006
	10007	return FALSE;
	10008	}
	10009
	10010	void
	10011	vector_upl_set_pagelist(upl_t upl)
	10012	{
	10013	if(vector_upl_is_valid(upl)) {
	10014	uint32_t i=0;
	10015	vector_upl_t vector_upl = upl->vector_upl;
	10016
	10017	if(vector_upl) {
	10018	vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0;
	10019
	10020	vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE));
	10021
	10022	for(i=0; i < vector_upl->num_upls; i++) {
	10023	cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE;
	10024	bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
	10025	pagelist_size += cur_upl_pagelist_size;
	10026	if(vector_upl->upl_elems[i]->highest_page > upl->highest_page)
	10027	upl->highest_page = vector_upl->upl_elems[i]->highest_page;
	10028	}
	10029	assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) );
	10030	}
	10031	else
	10032	panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
	10033	}
	10034	else
	10035	panic("vector_upl_set_pagelist was passed a NULL upl\n");
	10036
	10037	}
	10038
	10039	upl_t
	10040	vector_upl_subupl_byindex(upl_t upl, uint32_t index)
	10041	{
	10042	if(vector_upl_is_valid(upl)) {
	10043	vector_upl_t vector_upl = upl->vector_upl;
	10044	if(vector_upl) {
	10045	if(index < vector_upl->num_upls)
	10046	return vector_upl->upl_elems[index];
	10047	}
	10048	else
	10049	panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
	10050	}
	10051	return NULL;
	10052	}
	10053
	10054	upl_t
	10055	vector_upl_subupl_byoffset(upl_t upl, upl_offset_t upl_offset, upl_size_t upl_size)
	10056	{
	10057	if(vector_upl_is_valid(upl)) {
	10058	uint32_t i=0;
	10059	vector_upl_t vector_upl = upl->vector_upl;
	10060
	10061	if(vector_upl) {
	10062	upl_t subupl = NULL;
	10063	vector_upl_iostates_t subupl_state;
	10064
	10065	for(i=0; i < vector_upl->num_upls; i++) {
	10066	subupl = vector_upl->upl_elems[i];
	10067	subupl_state = vector_upl->upl_iostates[i];
	10068	if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) {
	10069	/* We could have been passed an offset/size pair that belongs
	10070	* to an UPL element that has already been committed/aborted.
	10071	* If so, return NULL.
	10072	*/
	10073	if(subupl == NULL)
	10074	return NULL;
	10075	if((subupl_state.offset + subupl_state.size) < (upl_offset + upl_size)) {
	10076	upl_size = (subupl_state.offset + subupl_state.size) - upl_offset;
	10077	if(*upl_size > subupl_state.size)
	10078	*upl_size = subupl_state.size;
	10079	}
	10080	if(*upl_offset >= subupl_state.offset)
	10081	*upl_offset -= subupl_state.offset;
	10082	else if(i)
	10083	panic("Vector UPL offset miscalculation\n");
	10084	return subupl;
	10085	}
	10086	}
	10087	}
	10088	else
	10089	panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
	10090	}
	10091	return NULL;
	10092	}
	10093
	10094	void
	10095	vector_upl_get_submap(upl_t upl, vm_map_t v_upl_submap, vm_offset_t submap_dst_addr)
	10096	{
	10097	*v_upl_submap = NULL;
	10098
	10099	if(vector_upl_is_valid(upl)) {
	10100	vector_upl_t vector_upl = upl->vector_upl;
	10101	if(vector_upl) {
	10102	*v_upl_submap = vector_upl->submap;
	10103	*submap_dst_addr = vector_upl->submap_dst_addr;
	10104	}
	10105	else
	10106	panic("vector_upl_get_submap was passed a non-vectored UPL\n");
	10107	}
	10108	else
	10109	panic("vector_upl_get_submap was passed a null UPL\n");
	10110	}
	10111
	10112	void
	10113	vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
	10114	{
	10115	if(vector_upl_is_valid(upl)) {
	10116	vector_upl_t vector_upl = upl->vector_upl;
	10117	if(vector_upl) {
	10118	vector_upl->submap = submap;
	10119	vector_upl->submap_dst_addr = submap_dst_addr;
	10120	}
	10121	else
	10122	panic("vector_upl_get_submap was passed a non-vectored UPL\n");
	10123	}
	10124	else
	10125	panic("vector_upl_get_submap was passed a NULL UPL\n");
	10126	}
	10127
	10128	void
	10129	vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
	10130	{
	10131	if(vector_upl_is_valid(upl)) {
	10132	uint32_t i = 0;
	10133	vector_upl_t vector_upl = upl->vector_upl;
	10134
	10135	if(vector_upl) {
	10136	for(i = 0; i < vector_upl->num_upls; i++) {
	10137	if(vector_upl->upl_elems[i] == subupl)
	10138	break;
	10139	}
	10140
	10141	if(i == vector_upl->num_upls)
	10142	panic("setting sub-upl iostate when none exists");
	10143
	10144	vector_upl->upl_iostates[i].offset = offset;
	10145	if(size < PAGE_SIZE)
	10146	size = PAGE_SIZE;
	10147	vector_upl->upl_iostates[i].size = size;
	10148	}
	10149	else
	10150	panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
	10151	}
	10152	else
	10153	panic("vector_upl_set_iostate was passed a NULL UPL\n");
	10154	}
	10155
	10156	void
	10157	vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
	10158	{
	10159	if(vector_upl_is_valid(upl)) {
	10160	uint32_t i = 0;
	10161	vector_upl_t vector_upl = upl->vector_upl;
	10162
	10163	if(vector_upl) {
	10164	for(i = 0; i < vector_upl->num_upls; i++) {
	10165	if(vector_upl->upl_elems[i] == subupl)
	10166	break;
	10167	}
	10168
	10169	if(i == vector_upl->num_upls)
	10170	panic("getting sub-upl iostate when none exists");
	10171
	10172	*offset = vector_upl->upl_iostates[i].offset;
	10173	*size = vector_upl->upl_iostates[i].size;
	10174	}
	10175	else
	10176	panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
	10177	}
	10178	else
	10179	panic("vector_upl_get_iostate was passed a NULL UPL\n");
	10180	}
	10181
	10182	void
	10183	vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t offset, upl_size_t size)
	10184	{
	10185	if(vector_upl_is_valid(upl)) {
	10186	vector_upl_t vector_upl = upl->vector_upl;
	10187	if(vector_upl) {
	10188	if(index < vector_upl->num_upls) {
	10189	*offset = vector_upl->upl_iostates[index].offset;
	10190	*size = vector_upl->upl_iostates[index].size;
	10191	}
	10192	else
	10193	offset = size = 0;
	10194	}
	10195	else
	10196	panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
	10197	}
	10198	else
	10199	panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
	10200	}
	10201
	10202	upl_page_info_t *
	10203	upl_get_internal_vectorupl_pagelist(upl_t upl)
	10204	{
	10205	return ((vector_upl_t)(upl->vector_upl))->pagelist;
	10206	}
	10207
	10208	void *
	10209	upl_get_internal_vectorupl(upl_t upl)
	10210	{
	10211	return upl->vector_upl;
	10212	}
	10213
	10214	vm_size_t
	10215	upl_get_internal_pagelist_offset(void)
	10216	{
	10217	return sizeof(struct upl);
	10218	}
	10219
	10220	void
	10221	upl_clear_dirty(
	10222	upl_t upl,
	10223	boolean_t value)
	10224	{
	10225	if (value) {
	10226	upl->flags \|= UPL_CLEAR_DIRTY;
	10227	} else {
	10228	upl->flags &= ~UPL_CLEAR_DIRTY;
	10229	}
	10230	}
	10231
	10232	void
	10233	upl_set_referenced(
	10234	upl_t upl,
	10235	boolean_t value)
	10236	{
	10237	upl_lock(upl);
	10238	if (value) {
	10239	upl->ext_ref_count++;
	10240	} else {
	10241	if (!upl->ext_ref_count) {
	10242	panic("upl_set_referenced not %p\n", upl);
	10243	}
	10244	upl->ext_ref_count--;
	10245	}
	10246	upl_unlock(upl);
	10247	}
	10248
	10249	#if CONFIG_IOSCHED
	10250	void
	10251	upl_set_blkno(
	10252	upl_t upl,
	10253	vm_offset_t upl_offset,
	10254	int io_size,
	10255	int64_t blkno)
	10256	{
	10257	int i,j;
	10258	if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0)
	10259	return;
	10260
	10261	assert(upl->upl_reprio_info != 0);
	10262	for(i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) {
	10263	UPL_SET_REPRIO_INFO(upl, i, blkno, io_size);
	10264	}
	10265	}
	10266	#endif
	10267
	10268	boolean_t
	10269	vm_page_is_slideable(vm_page_t m)
	10270	{
	10271	boolean_t result = FALSE;
	10272	vm_shared_region_slide_info_t si;
	10273
	10274	vm_object_lock_assert_held(m->object);
	10275
	10276	/* make sure our page belongs to the one object allowed to do this */
	10277	if (!m->object->object_slid) {
	10278	goto done;
	10279	}
	10280
	10281	si = m->object->vo_slide_info;
	10282	if (si == NULL) {
	10283	goto done;
	10284	}
	10285
	10286	if(!m->slid && (si->start <= m->offset && si->end > m->offset)) {
	10287	result = TRUE;
	10288	}
	10289
	10290	done:
	10291	return result;
	10292	}
	10293
	10294	int vm_page_slide_counter = 0;
	10295	int vm_page_slide_errors = 0;
	10296	kern_return_t
	10297	vm_page_slide(
	10298	vm_page_t page,
	10299	vm_map_offset_t kernel_mapping_offset)
	10300	{
	10301	kern_return_t kr;
	10302	vm_map_size_t kernel_mapping_size;
	10303	boolean_t kernel_mapping_needs_unmap;
	10304	vm_offset_t kernel_vaddr;
	10305	uint32_t pageIndex;
	10306	uint32_t slide_chunk;
	10307
	10308	assert(!page->slid);
	10309	assert(page->object->object_slid);
	10310	vm_object_lock_assert_exclusive(page->object);
	10311
	10312	if (page->error)
	10313	return KERN_FAILURE;
	10314
	10315	/*
	10316	* Take a paging-in-progress reference to keep the object
	10317	* alive even if we have to unlock it (in vm_paging_map_object()
	10318	* for example)...
	10319	*/
	10320	vm_object_paging_begin(page->object);
	10321
	10322	if (kernel_mapping_offset == 0) {
	10323	/*
	10324	* The page hasn't already been mapped in kernel space
	10325	* by the caller. Map it now, so that we can access
	10326	* its contents and decrypt them.
	10327	*/
	10328	kernel_mapping_size = PAGE_SIZE;
	10329	kernel_mapping_needs_unmap = FALSE;
	10330	kr = vm_paging_map_object(page,
	10331	page->object,
	10332	page->offset,
	10333	VM_PROT_READ \| VM_PROT_WRITE,
	10334	FALSE,
	10335	&kernel_mapping_size,
	10336	&kernel_mapping_offset,
	10337	&kernel_mapping_needs_unmap);
	10338	if (kr != KERN_SUCCESS) {
	10339	panic("vm_page_slide: "
	10340	"could not map page in kernel: 0x%x\n",
	10341	kr);
	10342	}
	10343	} else {
	10344	kernel_mapping_size = 0;
	10345	kernel_mapping_needs_unmap = FALSE;
	10346	}
	10347	kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
	10348
	10349	/*
	10350	* Slide the pointers on the page.
	10351	*/
	10352
	10353	/assert that slide_file_info.start/end are page-aligned?/
	10354
	10355	assert(!page->slid);
	10356	assert(page->object->object_slid);
	10357
	10358	#define PAGE_SIZE_FOR_SR_SLIDE 4096
	10359	pageIndex = (uint32_t)((page->offset -
	10360	page->object->vo_slide_info->start) /
	10361	PAGE_SIZE_FOR_SR_SLIDE);
	10362	for (slide_chunk = 0;
	10363	slide_chunk < PAGE_SIZE / PAGE_SIZE_FOR_SR_SLIDE;
	10364	slide_chunk++) {
	10365	kr = vm_shared_region_slide_page(page->object->vo_slide_info,
	10366	(kernel_vaddr +
	10367	(slide_chunk *
	10368	PAGE_SIZE_FOR_SR_SLIDE)),
	10369	(pageIndex + slide_chunk));
	10370	if (kr != KERN_SUCCESS) {
	10371	break;
	10372	}
	10373	}
	10374
	10375	vm_page_slide_counter++;
	10376
	10377	/*
	10378	* Unmap the page from the kernel's address space,
	10379	*/
	10380	if (kernel_mapping_needs_unmap) {
	10381	vm_paging_unmap_object(page->object,
	10382	kernel_vaddr,
	10383	kernel_vaddr + PAGE_SIZE);
	10384	}
	10385
	10386	page->dirty = FALSE;
	10387	pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED \| VM_MEM_REFERENCED);
	10388
	10389	if (kr != KERN_SUCCESS \|\| cs_debug > 1) {
	10390	printf("vm_page_slide(%p): "
	10391	"obj %p off 0x%llx mobj %p moff 0x%llx\n",
	10392	page,
	10393	page->object, page->offset,
	10394	page->object->pager,
	10395	page->offset + page->object->paging_offset);
	10396	}
	10397
	10398	if (kr == KERN_SUCCESS) {
	10399	page->slid = TRUE;
	10400	} else {
	10401	page->error = TRUE;
	10402	vm_page_slide_errors++;
	10403	}
	10404
	10405	vm_object_paging_end(page->object);
	10406
	10407	return kr;
	10408	}
	10409
	10410	void inline memoryshot(unsigned int event, unsigned int control)
	10411	{
	10412	if (vm_debug_events) {
	10413	KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) \| control,
	10414	vm_page_active_count, vm_page_inactive_count,
	10415	vm_page_free_count, vm_page_speculative_count,
	10416	vm_page_throttled_count);
	10417	} else {
	10418	(void) event;
	10419	(void) control;
	10420	}
	10421
	10422	}
	10423
	10424	#ifdef MACH_BSD
	10425
	10426	boolean_t upl_device_page(upl_page_info_t *upl)
	10427	{
	10428	return(UPL_DEVICE_PAGE(upl));
	10429	}
	10430	boolean_t upl_page_present(upl_page_info_t *upl, int index)
	10431	{
	10432	return(UPL_PAGE_PRESENT(upl, index));
	10433	}
	10434	boolean_t upl_speculative_page(upl_page_info_t *upl, int index)
	10435	{
	10436	return(UPL_SPECULATIVE_PAGE(upl, index));
	10437	}
	10438	boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
	10439	{
	10440	return(UPL_DIRTY_PAGE(upl, index));
	10441	}
	10442	boolean_t upl_valid_page(upl_page_info_t *upl, int index)
	10443	{
	10444	return(UPL_VALID_PAGE(upl, index));
	10445	}
	10446	ppnum_t upl_phys_page(upl_page_info_t *upl, int index)
	10447	{
	10448	return(UPL_PHYS_PAGE(upl, index));
	10449	}
	10450
	10451	void upl_page_set_mark(upl_page_info_t *upl, int index, boolean_t v)
	10452	{
	10453	upl[index].mark = v;
	10454	}
	10455
	10456	boolean_t upl_page_get_mark(upl_page_info_t *upl, int index)
	10457	{
	10458	return upl[index].mark;
	10459	}
	10460
	10461	void
	10462	vm_countdirtypages(void)
	10463	{
	10464	vm_page_t m;
	10465	int dpages;
	10466	int pgopages;
	10467	int precpages;
	10468
	10469
	10470	dpages=0;
	10471	pgopages=0;
	10472	precpages=0;
	10473
	10474	vm_page_lock_queues();
	10475	m = (vm_page_t) queue_first(&vm_page_queue_inactive);
	10476	do {
	10477	if (m ==(vm_page_t )0) break;
	10478
	10479	if(m->dirty) dpages++;
	10480	if(m->pageout) pgopages++;
	10481	if(m->precious) precpages++;
	10482
	10483	assert(m->object != kernel_object);
	10484	m = (vm_page_t) queue_next(&m->pageq);
	10485	if (m ==(vm_page_t )0) break;
	10486
	10487	} while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
	10488	vm_page_unlock_queues();
	10489
	10490	vm_page_lock_queues();
	10491	m = (vm_page_t) queue_first(&vm_page_queue_throttled);
	10492	do {
	10493	if (m ==(vm_page_t )0) break;
	10494
	10495	dpages++;
	10496	assert(m->dirty);
	10497	assert(!m->pageout);
	10498	assert(m->object != kernel_object);
	10499	m = (vm_page_t) queue_next(&m->pageq);
	10500	if (m ==(vm_page_t )0) break;
	10501
	10502	} while (!queue_end(&vm_page_queue_throttled,(queue_entry_t) m));
	10503	vm_page_unlock_queues();
	10504
	10505	vm_page_lock_queues();
	10506	m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
	10507	do {
	10508	if (m ==(vm_page_t )0) break;
	10509
	10510	if(m->dirty) dpages++;
	10511	if(m->pageout) pgopages++;
	10512	if(m->precious) precpages++;
	10513
	10514	assert(m->object != kernel_object);
	10515	m = (vm_page_t) queue_next(&m->pageq);
	10516	if (m ==(vm_page_t )0) break;
	10517
	10518	} while (!queue_end(&vm_page_queue_anonymous,(queue_entry_t) m));
	10519	vm_page_unlock_queues();
	10520
	10521	printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
	10522
	10523	dpages=0;
	10524	pgopages=0;
	10525	precpages=0;
	10526
	10527	vm_page_lock_queues();
	10528	m = (vm_page_t) queue_first(&vm_page_queue_active);
	10529
	10530	do {
	10531	if(m == (vm_page_t )0) break;
	10532	if(m->dirty) dpages++;
	10533	if(m->pageout) pgopages++;
	10534	if(m->precious) precpages++;
	10535
	10536	assert(m->object != kernel_object);
	10537	m = (vm_page_t) queue_next(&m->pageq);
	10538	if(m == (vm_page_t )0) break;
	10539
	10540	} while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
	10541	vm_page_unlock_queues();
	10542
	10543	printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
	10544
	10545	}
	10546	#endif /* MACH_BSD */
	10547
	10548	ppnum_t upl_get_highest_page(
	10549	upl_t upl)
	10550	{
	10551	return upl->highest_page;
	10552	}
	10553
	10554	upl_size_t upl_get_size(
	10555	upl_t upl)
	10556	{
	10557	return upl->size;
	10558	}
	10559
	10560	upl_t upl_associated_upl(upl_t upl)
	10561	{
	10562	return upl->associated_upl;
	10563	}
	10564
	10565	void upl_set_associated_upl(upl_t upl, upl_t associated_upl)
	10566	{
	10567	upl->associated_upl = associated_upl;
	10568	}
	10569
	10570	#if UPL_DEBUG
	10571	kern_return_t upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
	10572	{
	10573	upl->ubc_alias1 = alias1;
	10574	upl->ubc_alias2 = alias2;
	10575	return KERN_SUCCESS;
	10576	}
	10577	int upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
	10578	{
	10579	if(al)
	10580	*al = upl->ubc_alias1;
	10581	if(al2)
	10582	*al2 = upl->ubc_alias2;
	10583	return KERN_SUCCESS;
	10584	}
	10585	#endif /* UPL_DEBUG */
	10586
	10587	#if VM_PRESSURE_EVENTS
	10588	/*
	10589	* Upward trajectory.
	10590	*/
	10591	extern boolean_t vm_compressor_low_on_space(void);
	10592
	10593	boolean_t
	10594	VM_PRESSURE_NORMAL_TO_WARNING(void) {
	10595
	10596	if (DEFAULT_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
	10597
	10598	/* Available pages below our threshold */
	10599	if (memorystatus_available_pages < memorystatus_available_pages_pressure) {
	10600	/* No frozen processes to kill */
	10601	if (memorystatus_frozen_count == 0) {
	10602	/* Not enough suspended processes available. */
	10603	if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) {
	10604	return TRUE;
	10605	}
	10606	}
	10607	}
	10608	return FALSE;
	10609
	10610	} else {
	10611	return ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0);
	10612	}
	10613	}
	10614
	10615	boolean_t
	10616	VM_PRESSURE_WARNING_TO_CRITICAL(void) {
	10617
	10618	if (DEFAULT_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
	10619	/* Available pages below our threshold */
	10620	if (memorystatus_available_pages < memorystatus_available_pages_critical) {
	10621	return TRUE;
	10622	}
	10623	return FALSE;
	10624	} else {
	10625	return (vm_compressor_low_on_space() \|\| (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
	10626	}
	10627	}
	10628
	10629	/*
	10630	* Downward trajectory.
	10631	*/
	10632	boolean_t
	10633	VM_PRESSURE_WARNING_TO_NORMAL(void) {
	10634
	10635	if (DEFAULT_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
	10636	/* Available pages above our threshold */
	10637	unsigned int target_threshold = memorystatus_available_pages_pressure + ((15 * memorystatus_available_pages_pressure) / 100);
	10638	if (memorystatus_available_pages > target_threshold) {
	10639	return TRUE;
	10640	}
	10641	return FALSE;
	10642	} else {
	10643	return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0);
	10644	}
	10645	}
	10646
	10647	boolean_t
	10648	VM_PRESSURE_CRITICAL_TO_WARNING(void) {
	10649
	10650	if (DEFAULT_PAGER_IS_ACTIVE \|\| DEFAULT_FREEZER_IS_ACTIVE \|\| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
	10651	/* Available pages above our threshold */
	10652	unsigned int target_threshold = memorystatus_available_pages_critical + ((15 * memorystatus_available_pages_critical) / 100);
	10653	if (memorystatus_available_pages > target_threshold) {
	10654	return TRUE;
	10655	}
	10656	return FALSE;
	10657	} else {
	10658	return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
	10659	}
	10660	}
	10661	#endif /* VM_PRESSURE_EVENTS */
	10662