]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_pageout.c
xnu-3248.60.10.tar.gz
[apple/xnu.git] / osfmk / vm / vm_pageout.c
CommitLineData
1c79356b 1/*
fe8ab488 2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_pageout.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * The proverbial page-out daemon.
64 */
1c79356b 65
91447636
A
66#include <stdint.h>
67
68#include <debug.h>
1c79356b
A
69#include <mach_pagemap.h>
70#include <mach_cluster_stats.h>
1c79356b
A
71
72#include <mach/mach_types.h>
73#include <mach/memory_object.h>
74#include <mach/memory_object_default.h>
0b4e3aa0 75#include <mach/memory_object_control_server.h>
1c79356b 76#include <mach/mach_host_server.h>
91447636
A
77#include <mach/upl.h>
78#include <mach/vm_map.h>
1c79356b
A
79#include <mach/vm_param.h>
80#include <mach/vm_statistics.h>
2d21ac55 81#include <mach/sdt.h>
91447636
A
82
83#include <kern/kern_types.h>
1c79356b 84#include <kern/counters.h>
91447636
A
85#include <kern/host_statistics.h>
86#include <kern/machine.h>
87#include <kern/misc_protos.h>
b0d623f7 88#include <kern/sched.h>
1c79356b 89#include <kern/thread.h>
1c79356b 90#include <kern/xpr.h>
91447636
A
91#include <kern/kalloc.h>
92
93#include <machine/vm_tuning.h>
b0d623f7 94#include <machine/commpage.h>
91447636 95
1c79356b 96#include <vm/pmap.h>
39236c6e 97#include <vm/vm_compressor_pager.h>
55e303ae 98#include <vm/vm_fault.h>
1c79356b
A
99#include <vm/vm_map.h>
100#include <vm/vm_object.h>
101#include <vm/vm_page.h>
102#include <vm/vm_pageout.h>
91447636 103#include <vm/vm_protos.h> /* must be last */
2d21ac55
A
104#include <vm/memory_object.h>
105#include <vm/vm_purgeable_internal.h>
6d2010ae 106#include <vm/vm_shared_region.h>
39236c6e
A
107#include <vm/vm_compressor.h>
108
fe8ab488
A
109#if CONFIG_PHANTOM_CACHE
110#include <vm/vm_phantom_cache.h>
111#endif
91447636
A
112/*
113 * ENCRYPTED SWAP:
114 */
316670eb 115#include <libkern/crypto/aes.h>
b0d623f7 116extern u_int32_t random(void); /* from <libkern/libkern.h> */
55e303ae 117
316670eb
A
118extern int cs_debug;
119
b0d623f7
A
120#if UPL_DEBUG
121#include <libkern/OSDebug.h>
122#endif
91447636 123
fe8ab488
A
124extern void m_drain(void);
125
126#if VM_PRESSURE_EVENTS
127extern unsigned int memorystatus_available_pages;
128extern unsigned int memorystatus_available_pages_pressure;
129extern unsigned int memorystatus_available_pages_critical;
130extern unsigned int memorystatus_frozen_count;
131extern unsigned int memorystatus_suspended_count;
132
39236c6e
A
133extern vm_pressure_level_t memorystatus_vm_pressure_level;
134int memorystatus_purge_on_warning = 2;
135int memorystatus_purge_on_urgent = 5;
136int memorystatus_purge_on_critical = 8;
137
39236c6e
A
138void vm_pressure_response(void);
139boolean_t vm_pressure_thread_running = FALSE;
316670eb 140extern void consider_vm_pressure_events(void);
fe8ab488
A
141
142#define MEMORYSTATUS_SUSPENDED_THRESHOLD 4
143#endif /* VM_PRESSURE_EVENTS */
144
39236c6e 145boolean_t vm_pressure_changed = FALSE;
6d2010ae 146
2d21ac55 147#ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */
2d21ac55
A
148#define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100
149#endif
91447636 150
2d21ac55 151#ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
2d21ac55
A
152#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
153#endif
91447636
A
154
155#ifndef VM_PAGEOUT_DEADLOCK_RELIEF
156#define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
157#endif
158
159#ifndef VM_PAGEOUT_INACTIVE_RELIEF
160#define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
161#endif
162
1c79356b 163#ifndef VM_PAGE_LAUNDRY_MAX
6d2010ae 164#define VM_PAGE_LAUNDRY_MAX 128UL /* maximum pageouts on a given pageout queue */
1c79356b
A
165#endif /* VM_PAGEOUT_LAUNDRY_MAX */
166
1c79356b 167#ifndef VM_PAGEOUT_BURST_WAIT
fe8ab488 168#define VM_PAGEOUT_BURST_WAIT 10 /* milliseconds */
1c79356b
A
169#endif /* VM_PAGEOUT_BURST_WAIT */
170
171#ifndef VM_PAGEOUT_EMPTY_WAIT
172#define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
173#endif /* VM_PAGEOUT_EMPTY_WAIT */
174
91447636
A
175#ifndef VM_PAGEOUT_DEADLOCK_WAIT
176#define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
177#endif /* VM_PAGEOUT_DEADLOCK_WAIT */
178
179#ifndef VM_PAGEOUT_IDLE_WAIT
180#define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
181#endif /* VM_PAGEOUT_IDLE_WAIT */
182
39236c6e
A
183#ifndef VM_PAGEOUT_SWAP_WAIT
184#define VM_PAGEOUT_SWAP_WAIT 50 /* milliseconds */
185#endif /* VM_PAGEOUT_SWAP_WAIT */
186
316670eb
A
187#ifndef VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED
188#define VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED 1000 /* maximum pages considered before we issue a pressure event */
189#endif /* VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED */
190
191#ifndef VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS
192#define VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS 5 /* seconds */
193#endif /* VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS */
194
6d2010ae
A
195unsigned int vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
196unsigned int vm_page_speculative_percentage = 5;
197
2d21ac55 198#ifndef VM_PAGE_SPECULATIVE_TARGET
6d2010ae 199#define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_page_speculative_percentage))
2d21ac55
A
200#endif /* VM_PAGE_SPECULATIVE_TARGET */
201
6d2010ae 202
2d21ac55
A
203#ifndef VM_PAGE_INACTIVE_HEALTHY_LIMIT
204#define VM_PAGE_INACTIVE_HEALTHY_LIMIT(total) ((total) * 1 / 200)
205#endif /* VM_PAGE_INACTIVE_HEALTHY_LIMIT */
206
91447636 207
1c79356b
A
208/*
209 * To obtain a reasonable LRU approximation, the inactive queue
210 * needs to be large enough to give pages on it a chance to be
211 * referenced a second time. This macro defines the fraction
212 * of active+inactive pages that should be inactive.
213 * The pageout daemon uses it to update vm_page_inactive_target.
214 *
215 * If vm_page_free_count falls below vm_page_free_target and
216 * vm_page_inactive_count is below vm_page_inactive_target,
217 * then the pageout daemon starts running.
218 */
219
220#ifndef VM_PAGE_INACTIVE_TARGET
316670eb 221#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 2)
1c79356b
A
222#endif /* VM_PAGE_INACTIVE_TARGET */
223
224/*
225 * Once the pageout daemon starts running, it keeps going
226 * until vm_page_free_count meets or exceeds vm_page_free_target.
227 */
228
229#ifndef VM_PAGE_FREE_TARGET
230#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
231#endif /* VM_PAGE_FREE_TARGET */
232
39236c6e 233
1c79356b
A
234/*
235 * The pageout daemon always starts running once vm_page_free_count
236 * falls below vm_page_free_min.
237 */
238
239#ifndef VM_PAGE_FREE_MIN
2d21ac55 240#define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
1c79356b
A
241#endif /* VM_PAGE_FREE_MIN */
242
fe8ab488
A
243#define VM_PAGE_FREE_RESERVED_LIMIT 1700
244#define VM_PAGE_FREE_MIN_LIMIT 3500
245#define VM_PAGE_FREE_TARGET_LIMIT 4000
2d21ac55 246
1c79356b
A
247/*
248 * When vm_page_free_count falls below vm_page_free_reserved,
249 * only vm-privileged threads can allocate pages. vm-privilege
250 * allows the pageout daemon and default pager (and any other
251 * associated threads needed for default pageout) to continue
252 * operation by dipping into the reserved pool of pages.
253 */
254
255#ifndef VM_PAGE_FREE_RESERVED
91447636 256#define VM_PAGE_FREE_RESERVED(n) \
b0d623f7 257 ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
1c79356b
A
258#endif /* VM_PAGE_FREE_RESERVED */
259
2d21ac55
A
260/*
261 * When we dequeue pages from the inactive list, they are
262 * reactivated (ie, put back on the active queue) if referenced.
263 * However, it is possible to starve the free list if other
264 * processors are referencing pages faster than we can turn off
265 * the referenced bit. So we limit the number of reactivations
266 * we will make per call of vm_pageout_scan().
267 */
268#define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
269#ifndef VM_PAGE_REACTIVATE_LIMIT
2d21ac55 270#define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
2d21ac55 271#endif /* VM_PAGE_REACTIVATE_LIMIT */
3e170ce0 272#define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 1000
2d21ac55 273
91447636 274
316670eb
A
275extern boolean_t hibernate_cleaning_in_progress;
276
0b4e3aa0
A
277/*
278 * Exported variable used to broadcast the activation of the pageout scan
279 * Working Set uses this to throttle its use of pmap removes. In this
280 * way, code which runs within memory in an uncontested context does
281 * not keep encountering soft faults.
282 */
283
284unsigned int vm_pageout_scan_event_counter = 0;
1c79356b
A
285
286/*
287 * Forward declarations for internal routines.
288 */
39236c6e
A
289struct cq {
290 struct vm_pageout_queue *q;
291 void *current_chead;
292 char *scratch_buf;
3e170ce0 293 int id;
39236c6e 294};
3e170ce0
A
295#define MAX_COMPRESSOR_THREAD_COUNT 8
296
297struct cq ciq[MAX_COMPRESSOR_THREAD_COUNT];
298
299void *vm_pageout_immediate_chead;
300char *vm_pageout_immediate_scratch_buf;
39236c6e 301
91447636 302
39236c6e
A
303#if VM_PRESSURE_EVENTS
304void vm_pressure_thread(void);
fe8ab488
A
305
306boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void);
307boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void);
308
309boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void);
310boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void);
39236c6e 311#endif
91447636
A
312static void vm_pageout_garbage_collect(int);
313static void vm_pageout_iothread_continue(struct vm_pageout_queue *);
314static void vm_pageout_iothread_external(void);
39236c6e 315static void vm_pageout_iothread_internal(struct cq *cq);
316670eb 316static void vm_pageout_adjust_io_throttles(struct vm_pageout_queue *, struct vm_pageout_queue *, boolean_t);
91447636 317
1c79356b
A
318extern void vm_pageout_continue(void);
319extern void vm_pageout_scan(void);
1c79356b 320
3e170ce0
A
321static void vm_pageout_immediate(vm_page_t, boolean_t);
322boolean_t vm_compressor_immediate_preferred = FALSE;
323boolean_t vm_compressor_immediate_preferred_override = FALSE;
324boolean_t vm_restricted_to_single_processor = FALSE;
4bd07ac2
A
325static boolean_t vm_pageout_waiter = FALSE;
326static boolean_t vm_pageout_running = FALSE;
327
3e170ce0 328
2d21ac55
A
329static thread_t vm_pageout_external_iothread = THREAD_NULL;
330static thread_t vm_pageout_internal_iothread = THREAD_NULL;
331
1c79356b
A
332unsigned int vm_pageout_reserved_internal = 0;
333unsigned int vm_pageout_reserved_really = 0;
334
39236c6e 335unsigned int vm_pageout_swap_wait = 0;
91447636 336unsigned int vm_pageout_idle_wait = 0; /* milliseconds */
55e303ae 337unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
91447636
A
338unsigned int vm_pageout_burst_wait = 0; /* milliseconds */
339unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */
340unsigned int vm_pageout_deadlock_relief = 0;
341unsigned int vm_pageout_inactive_relief = 0;
342unsigned int vm_pageout_burst_active_throttle = 0;
343unsigned int vm_pageout_burst_inactive_throttle = 0;
1c79356b 344
6d2010ae
A
345int vm_upl_wait_for_pages = 0;
346
b0d623f7 347
1c79356b
A
348/*
349 * These variables record the pageout daemon's actions:
350 * how many pages it looks at and what happens to those pages.
351 * No locking needed because only one thread modifies the variables.
352 */
353
354unsigned int vm_pageout_active = 0; /* debugging */
355unsigned int vm_pageout_inactive = 0; /* debugging */
356unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
357unsigned int vm_pageout_inactive_forced = 0; /* debugging */
358unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
359unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
360unsigned int vm_pageout_inactive_busy = 0; /* debugging */
6d2010ae 361unsigned int vm_pageout_inactive_error = 0; /* debugging */
1c79356b 362unsigned int vm_pageout_inactive_absent = 0; /* debugging */
6d2010ae 363unsigned int vm_pageout_inactive_notalive = 0; /* debugging */
1c79356b 364unsigned int vm_pageout_inactive_used = 0; /* debugging */
6d2010ae 365unsigned int vm_pageout_cache_evicted = 0; /* debugging */
1c79356b 366unsigned int vm_pageout_inactive_clean = 0; /* debugging */
6d2010ae 367unsigned int vm_pageout_speculative_clean = 0; /* debugging */
316670eb
A
368
369unsigned int vm_pageout_freed_from_cleaned = 0;
370unsigned int vm_pageout_freed_from_speculative = 0;
371unsigned int vm_pageout_freed_from_inactive_clean = 0;
372
373unsigned int vm_pageout_enqueued_cleaned_from_inactive_clean = 0;
374unsigned int vm_pageout_enqueued_cleaned_from_inactive_dirty = 0;
375
376unsigned int vm_pageout_cleaned_reclaimed = 0; /* debugging; how many cleaned pages are reclaimed by the pageout scan */
377unsigned int vm_pageout_cleaned_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on pageout (and are therefore reactivated) */
378unsigned int vm_pageout_cleaned_reference_reactivated = 0;
379unsigned int vm_pageout_cleaned_volatile_reactivated = 0;
380unsigned int vm_pageout_cleaned_fault_reactivated = 0;
381unsigned int vm_pageout_cleaned_commit_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on commit (and are therefore reactivated) */
382unsigned int vm_pageout_cleaned_busy = 0;
383unsigned int vm_pageout_cleaned_nolock = 0;
384
6d2010ae
A
385unsigned int vm_pageout_inactive_dirty_internal = 0; /* debugging */
386unsigned int vm_pageout_inactive_dirty_external = 0; /* debugging */
b0d623f7 387unsigned int vm_pageout_inactive_deactivated = 0; /* debugging */
316670eb 388unsigned int vm_pageout_inactive_anonymous = 0; /* debugging */
1c79356b 389unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
3e170ce0 390unsigned int vm_pageout_purged_objects = 0; /* used for sysctl vm stats */
1c79356b
A
391unsigned int vm_stat_discard = 0; /* debugging */
392unsigned int vm_stat_discard_sent = 0; /* debugging */
393unsigned int vm_stat_discard_failure = 0; /* debugging */
394unsigned int vm_stat_discard_throttle = 0; /* debugging */
2d21ac55
A
395unsigned int vm_pageout_reactivation_limit_exceeded = 0; /* debugging */
396unsigned int vm_pageout_catch_ups = 0; /* debugging */
397unsigned int vm_pageout_inactive_force_reclaim = 0; /* debugging */
1c79356b 398
6d2010ae 399unsigned int vm_pageout_scan_reclaimed_throttled = 0;
91447636 400unsigned int vm_pageout_scan_active_throttled = 0;
6d2010ae
A
401unsigned int vm_pageout_scan_inactive_throttled_internal = 0;
402unsigned int vm_pageout_scan_inactive_throttled_external = 0;
91447636
A
403unsigned int vm_pageout_scan_throttle = 0; /* debugging */
404unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */
405unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */
39236c6e 406unsigned int vm_pageout_scan_swap_throttle = 0; /* debugging */
91447636
A
407unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */
408unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */
409unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */
316670eb 410unsigned int vm_pageout_inactive_external_forced_jetsam_count = 0; /* debugging */
3e170ce0
A
411unsigned int vm_pageout_scan_throttle_deferred = 0; /* debugging */
412unsigned int vm_pageout_scan_yield_unthrottled = 0; /* debugging */
b0d623f7
A
413unsigned int vm_page_speculative_count_drifts = 0;
414unsigned int vm_page_speculative_count_drift_max = 0;
415
316670eb 416
55e303ae
A
417/*
418 * Backing store throttle when BS is exhausted
419 */
420unsigned int vm_backing_store_low = 0;
1c79356b
A
421
422unsigned int vm_pageout_out_of_line = 0;
423unsigned int vm_pageout_in_place = 0;
55e303ae 424
b0d623f7
A
425unsigned int vm_page_steal_pageout_page = 0;
426
91447636
A
427/*
428 * ENCRYPTED SWAP:
429 * counters and statistics...
430 */
431unsigned long vm_page_decrypt_counter = 0;
432unsigned long vm_page_decrypt_for_upl_counter = 0;
433unsigned long vm_page_encrypt_counter = 0;
434unsigned long vm_page_encrypt_abort_counter = 0;
435unsigned long vm_page_encrypt_already_encrypted_counter = 0;
436boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */
437
91447636
A
438struct vm_pageout_queue vm_pageout_queue_internal;
439struct vm_pageout_queue vm_pageout_queue_external;
440
2d21ac55
A
441unsigned int vm_page_speculative_target = 0;
442
443vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
444
0b4c1975 445boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL;
b0d623f7
A
446
447#if DEVELOPMENT || DEBUG
4a3eedf9 448unsigned long vm_cs_validated_resets = 0;
b0d623f7 449#endif
55e303ae 450
6d2010ae
A
451int vm_debug_events = 0;
452
316670eb 453#if CONFIG_MEMORYSTATUS
39236c6e
A
454#if !CONFIG_JETSAM
455extern boolean_t memorystatus_idle_exit_from_VM(void);
316670eb 456#endif
39236c6e
A
457extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async);
458extern void memorystatus_on_pageout_scan_end(void);
316670eb 459#endif
6d2010ae 460
55e303ae
A
461/*
462 * Routine: vm_backing_store_disable
463 * Purpose:
464 * Suspend non-privileged threads wishing to extend
465 * backing store when we are low on backing store
466 * (Synchronized by caller)
467 */
468void
469vm_backing_store_disable(
470 boolean_t disable)
471{
472 if(disable) {
473 vm_backing_store_low = 1;
474 } else {
475 if(vm_backing_store_low) {
476 vm_backing_store_low = 0;
477 thread_wakeup((event_t) &vm_backing_store_low);
478 }
479 }
480}
481
482
1c79356b
A
483#if MACH_CLUSTER_STATS
484unsigned long vm_pageout_cluster_dirtied = 0;
485unsigned long vm_pageout_cluster_cleaned = 0;
486unsigned long vm_pageout_cluster_collisions = 0;
487unsigned long vm_pageout_cluster_clusters = 0;
488unsigned long vm_pageout_cluster_conversions = 0;
489unsigned long vm_pageout_target_collisions = 0;
490unsigned long vm_pageout_target_page_dirtied = 0;
491unsigned long vm_pageout_target_page_freed = 0;
1c79356b
A
492#define CLUSTER_STAT(clause) clause
493#else /* MACH_CLUSTER_STATS */
494#define CLUSTER_STAT(clause)
495#endif /* MACH_CLUSTER_STATS */
496
497/*
498 * Routine: vm_pageout_object_terminate
499 * Purpose:
2d21ac55 500 * Destroy the pageout_object, and perform all of the
1c79356b
A
501 * required cleanup actions.
502 *
503 * In/Out conditions:
504 * The object must be locked, and will be returned locked.
505 */
506void
507vm_pageout_object_terminate(
508 vm_object_t object)
509{
510 vm_object_t shadow_object;
511
512 /*
513 * Deal with the deallocation (last reference) of a pageout object
514 * (used for cleaning-in-place) by dropping the paging references/
515 * freeing pages in the original object.
516 */
517
518 assert(object->pageout);
519 shadow_object = object->shadow;
520 vm_object_lock(shadow_object);
521
522 while (!queue_empty(&object->memq)) {
523 vm_page_t p, m;
524 vm_object_offset_t offset;
525
526 p = (vm_page_t) queue_first(&object->memq);
527
528 assert(p->private);
529 assert(p->pageout);
530 p->pageout = FALSE;
531 assert(!p->cleaning);
316670eb 532 assert(!p->laundry);
1c79356b
A
533
534 offset = p->offset;
535 VM_PAGE_FREE(p);
536 p = VM_PAGE_NULL;
537
538 m = vm_page_lookup(shadow_object,
6d2010ae 539 offset + object->vo_shadow_offset);
1c79356b
A
540
541 if(m == VM_PAGE_NULL)
542 continue;
1c79356b 543
1c79356b
A
544 assert((m->dirty) || (m->precious) ||
545 (m->busy && m->cleaning));
546
547 /*
548 * Handle the trusted pager throttle.
55e303ae 549 * Also decrement the burst throttle (if external).
1c79356b
A
550 */
551 vm_page_lock_queues();
39236c6e 552 if (m->pageout_queue)
91447636 553 vm_pageout_throttle_up(m);
1c79356b
A
554
555 /*
556 * Handle the "target" page(s). These pages are to be freed if
557 * successfully cleaned. Target pages are always busy, and are
558 * wired exactly once. The initial target pages are not mapped,
559 * (so cannot be referenced or modified) but converted target
560 * pages may have been modified between the selection as an
561 * adjacent page and conversion to a target.
562 */
563 if (m->pageout) {
564 assert(m->busy);
565 assert(m->wire_count == 1);
566 m->cleaning = FALSE;
2d21ac55 567 m->encrypted_cleaning = FALSE;
1c79356b
A
568 m->pageout = FALSE;
569#if MACH_CLUSTER_STATS
570 if (m->wanted) vm_pageout_target_collisions++;
571#endif
572 /*
573 * Revoke all access to the page. Since the object is
574 * locked, and the page is busy, this prevents the page
91447636 575 * from being dirtied after the pmap_disconnect() call
1c79356b 576 * returns.
91447636 577 *
1c79356b
A
578 * Since the page is left "dirty" but "not modifed", we
579 * can detect whether the page was redirtied during
580 * pageout by checking the modify state.
581 */
316670eb
A
582 if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED) {
583 SET_PAGE_DIRTY(m, FALSE);
584 } else {
585 m->dirty = FALSE;
586 }
1c79356b
A
587
588 if (m->dirty) {
589 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
0b4c1975 590 vm_page_unwire(m, TRUE); /* reactivates */
2d21ac55 591 VM_STAT_INCR(reactivations);
1c79356b 592 PAGE_WAKEUP_DONE(m);
1c79356b
A
593 } else {
594 CLUSTER_STAT(vm_pageout_target_page_freed++;)
595 vm_page_free(m);/* clears busy, etc. */
596 }
597 vm_page_unlock_queues();
598 continue;
599 }
600 /*
601 * Handle the "adjacent" pages. These pages were cleaned in
602 * place, and should be left alone.
603 * If prep_pin_count is nonzero, then someone is using the
604 * page, so make it active.
605 */
2d21ac55 606 if (!m->active && !m->inactive && !m->throttled && !m->private) {
0b4e3aa0 607 if (m->reference)
1c79356b
A
608 vm_page_activate(m);
609 else
610 vm_page_deactivate(m);
611 }
6d2010ae
A
612 if (m->overwriting) {
613 /*
614 * the (COPY_OUT_FROM == FALSE) request_page_list case
615 */
616 if (m->busy) {
617 /*
618 * We do not re-set m->dirty !
619 * The page was busy so no extraneous activity
620 * could have occurred. COPY_INTO is a read into the
621 * new pages. CLEAN_IN_PLACE does actually write
622 * out the pages but handling outside of this code
623 * will take care of resetting dirty. We clear the
624 * modify however for the Programmed I/O case.
625 */
626 pmap_clear_modify(m->phys_page);
2d21ac55 627
6d2010ae
A
628 m->busy = FALSE;
629 m->absent = FALSE;
630 } else {
631 /*
632 * alternate (COPY_OUT_FROM == FALSE) request_page_list case
633 * Occurs when the original page was wired
634 * at the time of the list request
635 */
636 assert(VM_PAGE_WIRED(m));
637 vm_page_unwire(m, TRUE); /* reactivates */
638 }
1c79356b
A
639 m->overwriting = FALSE;
640 } else {
6d2010ae
A
641 /*
642 * Set the dirty state according to whether or not the page was
643 * modified during the pageout. Note that we purposefully do
644 * NOT call pmap_clear_modify since the page is still mapped.
645 * If the page were to be dirtied between the 2 calls, this
646 * this fact would be lost. This code is only necessary to
647 * maintain statistics, since the pmap module is always
648 * consulted if m->dirty is false.
649 */
1c79356b 650#if MACH_CLUSTER_STATS
55e303ae 651 m->dirty = pmap_is_modified(m->phys_page);
1c79356b
A
652
653 if (m->dirty) vm_pageout_cluster_dirtied++;
654 else vm_pageout_cluster_cleaned++;
655 if (m->wanted) vm_pageout_cluster_collisions++;
656#else
316670eb 657 m->dirty = FALSE;
1c79356b
A
658#endif
659 }
6d2010ae
A
660 if (m->encrypted_cleaning == TRUE) {
661 m->encrypted_cleaning = FALSE;
662 m->busy = FALSE;
663 }
1c79356b
A
664 m->cleaning = FALSE;
665
1c79356b
A
666 /*
667 * Wakeup any thread waiting for the page to be un-cleaning.
668 */
669 PAGE_WAKEUP(m);
670 vm_page_unlock_queues();
671 }
672 /*
673 * Account for the paging reference taken in vm_paging_object_allocate.
674 */
b0d623f7 675 vm_object_activity_end(shadow_object);
1c79356b
A
676 vm_object_unlock(shadow_object);
677
678 assert(object->ref_count == 0);
679 assert(object->paging_in_progress == 0);
b0d623f7 680 assert(object->activity_in_progress == 0);
1c79356b
A
681 assert(object->resident_page_count == 0);
682 return;
683}
684
1c79356b
A
685/*
686 * Routine: vm_pageclean_setup
687 *
688 * Purpose: setup a page to be cleaned (made non-dirty), but not
689 * necessarily flushed from the VM page cache.
690 * This is accomplished by cleaning in place.
691 *
b0d623f7
A
692 * The page must not be busy, and new_object
693 * must be locked.
694 *
1c79356b 695 */
3e170ce0 696static void
1c79356b
A
697vm_pageclean_setup(
698 vm_page_t m,
699 vm_page_t new_m,
700 vm_object_t new_object,
701 vm_object_offset_t new_offset)
702{
1c79356b 703 assert(!m->busy);
2d21ac55 704#if 0
1c79356b 705 assert(!m->cleaning);
2d21ac55 706#endif
1c79356b
A
707
708 XPR(XPR_VM_PAGEOUT,
709 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
b0d623f7
A
710 m->object, m->offset, m,
711 new_m, new_offset);
1c79356b 712
55e303ae 713 pmap_clear_modify(m->phys_page);
1c79356b
A
714
715 /*
716 * Mark original page as cleaning in place.
717 */
718 m->cleaning = TRUE;
316670eb 719 SET_PAGE_DIRTY(m, FALSE);
1c79356b
A
720 m->precious = FALSE;
721
722 /*
723 * Convert the fictitious page to a private shadow of
724 * the real page.
725 */
726 assert(new_m->fictitious);
2d21ac55 727 assert(new_m->phys_page == vm_page_fictitious_addr);
1c79356b
A
728 new_m->fictitious = FALSE;
729 new_m->private = TRUE;
730 new_m->pageout = TRUE;
55e303ae 731 new_m->phys_page = m->phys_page;
b0d623f7
A
732
733 vm_page_lockspin_queues();
3e170ce0 734 vm_page_wire(new_m, VM_KERN_MEMORY_NONE, TRUE);
b0d623f7 735 vm_page_unlock_queues();
1c79356b 736
3e170ce0 737 vm_page_insert_wired(new_m, new_object, new_offset, VM_KERN_MEMORY_NONE);
1c79356b
A
738 assert(!new_m->wanted);
739 new_m->busy = FALSE;
740}
741
1c79356b
A
742/*
743 * Routine: vm_pageout_initialize_page
744 * Purpose:
745 * Causes the specified page to be initialized in
746 * the appropriate memory object. This routine is used to push
747 * pages into a copy-object when they are modified in the
748 * permanent object.
749 *
750 * The page is moved to a temporary object and paged out.
751 *
752 * In/out conditions:
753 * The page in question must not be on any pageout queues.
754 * The object to which it belongs must be locked.
755 * The page must be busy, but not hold a paging reference.
756 *
757 * Implementation:
758 * Move this page to a completely new object.
759 */
760void
761vm_pageout_initialize_page(
762 vm_page_t m)
763{
1c79356b
A
764 vm_object_t object;
765 vm_object_offset_t paging_offset;
2d21ac55 766 memory_object_t pager;
1c79356b
A
767
768 XPR(XPR_VM_PAGEOUT,
769 "vm_pageout_initialize_page, page 0x%X\n",
b0d623f7 770 m, 0, 0, 0, 0);
1c79356b
A
771 assert(m->busy);
772
773 /*
774 * Verify that we really want to clean this page
775 */
776 assert(!m->absent);
777 assert(!m->error);
778 assert(m->dirty);
779
780 /*
781 * Create a paging reference to let us play with the object.
782 */
783 object = m->object;
784 paging_offset = m->offset + object->paging_offset;
2d21ac55
A
785
786 if (m->absent || m->error || m->restart || (!m->dirty && !m->precious)) {
1c79356b
A
787 VM_PAGE_FREE(m);
788 panic("reservation without pageout?"); /* alan */
2d21ac55
A
789 vm_object_unlock(object);
790
791 return;
792 }
793
794 /*
795 * If there's no pager, then we can't clean the page. This should
796 * never happen since this should be a copy object and therefore not
797 * an external object, so the pager should always be there.
798 */
799
800 pager = object->pager;
801
802 if (pager == MEMORY_OBJECT_NULL) {
803 VM_PAGE_FREE(m);
804 panic("missing pager for copy object");
1c79356b
A
805 return;
806 }
807
316670eb
A
808 /*
809 * set the page for future call to vm_fault_list_request
810 */
55e303ae 811 pmap_clear_modify(m->phys_page);
316670eb 812 SET_PAGE_DIRTY(m, FALSE);
1c79356b 813 m->pageout = TRUE;
b0d623f7 814
316670eb
A
815 /*
816 * keep the object from collapsing or terminating
817 */
818 vm_object_paging_begin(object);
55e303ae 819 vm_object_unlock(object);
1c79356b
A
820
821 /*
822 * Write the data to its pager.
823 * Note that the data is passed by naming the new object,
824 * not a virtual address; the pager interface has been
825 * manipulated to use the "internal memory" data type.
826 * [The object reference from its allocation is donated
827 * to the eventual recipient.]
828 */
2d21ac55 829 memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
1c79356b
A
830
831 vm_object_lock(object);
2d21ac55 832 vm_object_paging_end(object);
1c79356b
A
833}
834
835#if MACH_CLUSTER_STATS
836#define MAXCLUSTERPAGES 16
837struct {
838 unsigned long pages_in_cluster;
839 unsigned long pages_at_higher_offsets;
840 unsigned long pages_at_lower_offsets;
841} cluster_stats[MAXCLUSTERPAGES];
842#endif /* MACH_CLUSTER_STATS */
843
1c79356b
A
844
845/*
846 * vm_pageout_cluster:
847 *
91447636
A
848 * Given a page, queue it to the appropriate I/O thread,
849 * which will page it out and attempt to clean adjacent pages
1c79356b
A
850 * in the same operation.
851 *
39236c6e 852 * The object and queues must be locked. We will take a
55e303ae 853 * paging reference to prevent deallocation or collapse when we
91447636
A
854 * release the object lock back at the call site. The I/O thread
855 * is responsible for consuming this reference
55e303ae
A
856 *
857 * The page must not be on any pageout queue.
1c79356b 858 */
91447636 859
3e170ce0
A
860int
861vm_pageout_cluster(vm_page_t m, boolean_t pageout, boolean_t immediate_ok, boolean_t keep_object_locked)
1c79356b
A
862{
863 vm_object_t object = m->object;
91447636
A
864 struct vm_pageout_queue *q;
865
1c79356b
A
866
867 XPR(XPR_VM_PAGEOUT,
868 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7
A
869 object, m->offset, m, 0, 0);
870
871 VM_PAGE_CHECK(m);
6d2010ae
A
872#if DEBUG
873 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
874#endif
875 vm_object_lock_assert_exclusive(object);
1c79356b 876
91447636
A
877 /*
878 * Only a certain kind of page is appreciated here.
879 */
316670eb
A
880 assert((m->dirty || m->precious) && (!VM_PAGE_WIRED(m)));
881 assert(!m->cleaning && !m->pageout && !m->laundry);
6d2010ae
A
882#ifndef CONFIG_FREEZE
883 assert(!m->inactive && !m->active);
2d21ac55 884 assert(!m->throttled);
6d2010ae 885#endif
55e303ae
A
886
887 /*
316670eb 888 * protect the object from collapse or termination
55e303ae 889 */
316670eb 890 vm_object_activity_begin(object);
55e303ae 891
316670eb 892 m->pageout = pageout;
1c79356b 893
39236c6e 894 if (object->internal == TRUE) {
3e170ce0 895 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
39236c6e
A
896 m->busy = TRUE;
897
3e170ce0
A
898 if (vm_compressor_immediate_preferred == TRUE && immediate_ok == TRUE) {
899 if (keep_object_locked == FALSE)
900 vm_object_unlock(object);
901 vm_page_unlock_queues();
902
903 vm_pageout_immediate(m, keep_object_locked);
904
905 return (1);
906 }
907 }
91447636 908 q = &vm_pageout_queue_internal;
39236c6e 909 } else
91447636 910 q = &vm_pageout_queue_external;
d1ecb069 911
39236c6e 912 /*
d1ecb069
A
913 * pgo_laundry count is tied to the laundry bit
914 */
6d2010ae 915 m->laundry = TRUE;
91447636 916 q->pgo_laundry++;
1c79356b 917
91447636
A
918 m->pageout_queue = TRUE;
919 queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
920
921 if (q->pgo_idle == TRUE) {
39236c6e
A
922 q->pgo_idle = FALSE;
923 thread_wakeup((event_t) &q->pgo_pending);
1c79356b 924 }
b0d623f7 925 VM_PAGE_CHECK(m);
3e170ce0
A
926
927 return (0);
1c79356b
A
928}
929
55e303ae 930
91447636 931unsigned long vm_pageout_throttle_up_count = 0;
1c79356b
A
932
933/*
b0d623f7
A
934 * A page is back from laundry or we are stealing it back from
935 * the laundering state. See if there are some pages waiting to
91447636 936 * go to laundry and if we can let some of them go now.
1c79356b 937 *
91447636 938 * Object and page queues must be locked.
1c79356b 939 */
91447636
A
940void
941vm_pageout_throttle_up(
6d2010ae 942 vm_page_t m)
1c79356b 943{
6d2010ae 944 struct vm_pageout_queue *q;
1c79356b 945
6d2010ae
A
946 assert(m->object != VM_OBJECT_NULL);
947 assert(m->object != kernel_object);
1c79356b 948
316670eb
A
949#if DEBUG
950 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
951 vm_object_lock_assert_exclusive(m->object);
952#endif
953
6d2010ae 954 vm_pageout_throttle_up_count++;
0b4c1975 955
6d2010ae
A
956 if (m->object->internal == TRUE)
957 q = &vm_pageout_queue_internal;
958 else
959 q = &vm_pageout_queue_external;
d1ecb069 960
6d2010ae 961 if (m->pageout_queue == TRUE) {
0b4c1975 962
6d2010ae
A
963 queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
964 m->pageout_queue = FALSE;
1c79356b 965
6d2010ae
A
966 m->pageq.next = NULL;
967 m->pageq.prev = NULL;
91447636 968
316670eb 969 vm_object_activity_end(m->object);
6d2010ae 970 }
316670eb 971 if (m->laundry == TRUE) {
91447636 972
6d2010ae
A
973 m->laundry = FALSE;
974 q->pgo_laundry--;
91447636 975
6d2010ae
A
976 if (q->pgo_throttled == TRUE) {
977 q->pgo_throttled = FALSE;
978 thread_wakeup((event_t) &q->pgo_laundry);
979 }
980 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
981 q->pgo_draining = FALSE;
982 thread_wakeup((event_t) (&q->pgo_laundry+1));
983 }
984 }
985}
91447636 986
b0d623f7 987
39236c6e
A
988static void
989vm_pageout_throttle_up_batch(
990 struct vm_pageout_queue *q,
991 int batch_cnt)
992{
993#if DEBUG
994 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
995#endif
996
997 vm_pageout_throttle_up_count += batch_cnt;
998
999 q->pgo_laundry -= batch_cnt;
1000
1001 if (q->pgo_throttled == TRUE) {
1002 q->pgo_throttled = FALSE;
1003 thread_wakeup((event_t) &q->pgo_laundry);
1004 }
1005 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
1006 q->pgo_draining = FALSE;
1007 thread_wakeup((event_t) (&q->pgo_laundry+1));
1008 }
1009}
1010
1011
1012
b0d623f7
A
1013/*
1014 * VM memory pressure monitoring.
1015 *
1016 * vm_pageout_scan() keeps track of the number of pages it considers and
1017 * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
1018 *
1019 * compute_memory_pressure() is called every second from compute_averages()
1020 * and moves "vm_pageout_stat_now" forward, to start accumulating the number
1021 * of recalimed pages in a new vm_pageout_stat[] bucket.
1022 *
1023 * mach_vm_pressure_monitor() collects past statistics about memory pressure.
1024 * The caller provides the number of seconds ("nsecs") worth of statistics
1025 * it wants, up to 30 seconds.
1026 * It computes the number of pages reclaimed in the past "nsecs" seconds and
1027 * also returns the number of pages the system still needs to reclaim at this
1028 * moment in time.
1029 */
1030#define VM_PAGEOUT_STAT_SIZE 31
1031struct vm_pageout_stat {
1032 unsigned int considered;
1033 unsigned int reclaimed;
1034} vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0}, };
1035unsigned int vm_pageout_stat_now = 0;
1036unsigned int vm_memory_pressure = 0;
1037
1038#define VM_PAGEOUT_STAT_BEFORE(i) \
1039 (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
1040#define VM_PAGEOUT_STAT_AFTER(i) \
1041 (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
1042
15129b1c
A
1043#if VM_PAGE_BUCKETS_CHECK
1044int vm_page_buckets_check_interval = 10; /* in seconds */
1045#endif /* VM_PAGE_BUCKETS_CHECK */
1046
b0d623f7
A
1047/*
1048 * Called from compute_averages().
1049 */
1050void
1051compute_memory_pressure(
1052 __unused void *arg)
1053{
1054 unsigned int vm_pageout_next;
1055
15129b1c
A
1056#if VM_PAGE_BUCKETS_CHECK
1057 /* check the consistency of VM page buckets at regular interval */
1058 static int counter = 0;
1059 if ((++counter % vm_page_buckets_check_interval) == 0) {
1060 vm_page_buckets_check();
1061 }
1062#endif /* VM_PAGE_BUCKETS_CHECK */
1063
b0d623f7
A
1064 vm_memory_pressure =
1065 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].reclaimed;
1066
1067 commpage_set_memory_pressure( vm_memory_pressure );
1068
1069 /* move "now" forward */
1070 vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
1071 vm_pageout_stats[vm_pageout_next].considered = 0;
1072 vm_pageout_stats[vm_pageout_next].reclaimed = 0;
1073 vm_pageout_stat_now = vm_pageout_next;
1074}
1075
316670eb
A
1076
1077/*
1078 * IMPORTANT
1079 * mach_vm_ctl_page_free_wanted() is called indirectly, via
1080 * mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
1081 * it must be safe in the restricted stackshot context. Locks and/or
1082 * blocking are not allowable.
1083 */
b0d623f7
A
1084unsigned int
1085mach_vm_ctl_page_free_wanted(void)
1086{
1087 unsigned int page_free_target, page_free_count, page_free_wanted;
1088
1089 page_free_target = vm_page_free_target;
1090 page_free_count = vm_page_free_count;
1091 if (page_free_target > page_free_count) {
1092 page_free_wanted = page_free_target - page_free_count;
1093 } else {
1094 page_free_wanted = 0;
1095 }
1096
1097 return page_free_wanted;
1098}
1099
316670eb
A
1100
1101/*
1102 * IMPORTANT:
1103 * mach_vm_pressure_monitor() is called when taking a stackshot, with
1104 * wait_for_pressure FALSE, so that code path must remain safe in the
1105 * restricted stackshot context. No blocking or locks are allowable.
1106 * on that code path.
1107 */
1108
b0d623f7
A
1109kern_return_t
1110mach_vm_pressure_monitor(
1111 boolean_t wait_for_pressure,
1112 unsigned int nsecs_monitored,
1113 unsigned int *pages_reclaimed_p,
1114 unsigned int *pages_wanted_p)
1115{
1116 wait_result_t wr;
1117 unsigned int vm_pageout_then, vm_pageout_now;
1118 unsigned int pages_reclaimed;
1119
1120 /*
1121 * We don't take the vm_page_queue_lock here because we don't want
1122 * vm_pressure_monitor() to get in the way of the vm_pageout_scan()
1123 * thread when it's trying to reclaim memory. We don't need fully
1124 * accurate monitoring anyway...
1125 */
1126
1127 if (wait_for_pressure) {
1128 /* wait until there's memory pressure */
1129 while (vm_page_free_count >= vm_page_free_target) {
1130 wr = assert_wait((event_t) &vm_page_free_wanted,
1131 THREAD_INTERRUPTIBLE);
1132 if (wr == THREAD_WAITING) {
1133 wr = thread_block(THREAD_CONTINUE_NULL);
1134 }
1135 if (wr == THREAD_INTERRUPTED) {
1136 return KERN_ABORTED;
1137 }
1138 if (wr == THREAD_AWAKENED) {
1139 /*
1140 * The memory pressure might have already
1141 * been relieved but let's not block again
1142 * and let's report that there was memory
1143 * pressure at some point.
1144 */
1145 break;
1146 }
1147 }
1148 }
1149
1150 /* provide the number of pages the system wants to reclaim */
1151 if (pages_wanted_p != NULL) {
1152 *pages_wanted_p = mach_vm_ctl_page_free_wanted();
1153 }
1154
1155 if (pages_reclaimed_p == NULL) {
1156 return KERN_SUCCESS;
1157 }
1158
1159 /* provide number of pages reclaimed in the last "nsecs_monitored" */
1160 do {
1161 vm_pageout_now = vm_pageout_stat_now;
1162 pages_reclaimed = 0;
1163 for (vm_pageout_then =
1164 VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
1165 vm_pageout_then != vm_pageout_now &&
1166 nsecs_monitored-- != 0;
1167 vm_pageout_then =
1168 VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
1169 pages_reclaimed += vm_pageout_stats[vm_pageout_then].reclaimed;
1170 }
1171 } while (vm_pageout_now != vm_pageout_stat_now);
1172 *pages_reclaimed_p = pages_reclaimed;
1173
1174 return KERN_SUCCESS;
1175}
1176
b0d623f7 1177
316670eb 1178
3e170ce0
A
1179static void
1180vm_pageout_page_queue(queue_head_t *, int);
1181
1182/*
1183 * condition variable used to make sure there is
1184 * only a single sweep going on at a time
1185 */
1186boolean_t vm_pageout_anonymous_pages_active = FALSE;
1187
1188
1189void
1190vm_pageout_anonymous_pages()
1191{
1192 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
1193
1194 vm_page_lock_queues();
1195
1196 if (vm_pageout_anonymous_pages_active == TRUE) {
1197 vm_page_unlock_queues();
1198 return;
1199 }
1200 vm_pageout_anonymous_pages_active = TRUE;
1201 vm_page_unlock_queues();
1202
1203 vm_pageout_page_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1204 vm_pageout_page_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1205 vm_pageout_page_queue(&vm_page_queue_active, vm_page_active_count);
1206
1207 vm_consider_swapping();
1208
1209 vm_page_lock_queues();
1210 vm_pageout_anonymous_pages_active = FALSE;
1211 vm_page_unlock_queues();
1212 }
1213}
1214
1215
1216void
1217vm_pageout_page_queue(queue_head_t *q, int qcount)
1218{
1219 vm_page_t m;
1220 vm_object_t t_object = NULL;
1221 vm_object_t l_object = NULL;
1222 vm_object_t m_object = NULL;
1223 int delayed_unlock = 0;
1224 int try_failed_count = 0;
1225 int refmod_state;
1226 int pmap_options;
1227 struct vm_pageout_queue *iq;
1228
1229
1230 iq = &vm_pageout_queue_internal;
1231
1232 vm_page_lock_queues();
1233
1234 while (qcount && !queue_empty(q)) {
1235
1236 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1237
1238 if (VM_PAGE_Q_THROTTLED(iq)) {
1239
1240 if (l_object != NULL) {
1241 vm_object_unlock(l_object);
1242 l_object = NULL;
1243 }
1244 iq->pgo_draining = TRUE;
1245
1246 assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE);
1247 vm_page_unlock_queues();
1248
1249 thread_block(THREAD_CONTINUE_NULL);
1250
1251 vm_page_lock_queues();
1252 delayed_unlock = 0;
1253 continue;
1254 }
1255 m = (vm_page_t) queue_first(q);
1256 m_object = m->object;
1257
1258 /*
1259 * check to see if we currently are working
1260 * with the same object... if so, we've
1261 * already got the lock
1262 */
1263 if (m_object != l_object) {
1264 if ( !m_object->internal)
1265 goto reenter_pg_on_q;
1266
1267 /*
1268 * the object associated with candidate page is
1269 * different from the one we were just working
1270 * with... dump the lock if we still own it
1271 */
1272 if (l_object != NULL) {
1273 vm_object_unlock(l_object);
1274 l_object = NULL;
1275 }
1276 if (m_object != t_object)
1277 try_failed_count = 0;
1278
1279 /*
1280 * Try to lock object; since we've alread got the
1281 * page queues lock, we can only 'try' for this one.
1282 * if the 'try' fails, we need to do a mutex_pause
1283 * to allow the owner of the object lock a chance to
1284 * run...
1285 */
1286 if ( !vm_object_lock_try_scan(m_object)) {
1287
1288 if (try_failed_count > 20) {
1289 goto reenter_pg_on_q;
1290 }
1291 vm_page_unlock_queues();
1292 mutex_pause(try_failed_count++);
1293 vm_page_lock_queues();
1294 delayed_unlock = 0;
1295
1296 t_object = m_object;
1297 continue;
1298 }
1299 l_object = m_object;
1300 }
1301 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error || m->pageout) {
1302 /*
1303 * page is not to be cleaned
1304 * put it back on the head of its queue
1305 */
1306 goto reenter_pg_on_q;
1307 }
1308 if (m->reference == FALSE && m->pmapped == TRUE) {
1309 refmod_state = pmap_get_refmod(m->phys_page);
1310
1311 if (refmod_state & VM_MEM_REFERENCED)
1312 m->reference = TRUE;
1313 if (refmod_state & VM_MEM_MODIFIED) {
1314 SET_PAGE_DIRTY(m, FALSE);
1315 }
1316 }
1317 if (m->reference == TRUE) {
1318 m->reference = FALSE;
1319 pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1320 goto reenter_pg_on_q;
1321 }
1322 if (m->pmapped == TRUE) {
1323 if (m->dirty || m->precious) {
1324 pmap_options = PMAP_OPTIONS_COMPRESSOR;
1325 } else {
1326 pmap_options = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1327 }
1328 refmod_state = pmap_disconnect_options(m->phys_page, pmap_options, NULL);
1329 if (refmod_state & VM_MEM_MODIFIED) {
1330 SET_PAGE_DIRTY(m, FALSE);
1331 }
1332 }
1333 if ( !m->dirty && !m->precious) {
1334 vm_page_unlock_queues();
1335 VM_PAGE_FREE(m);
1336 vm_page_lock_queues();
1337 delayed_unlock = 0;
1338
1339 goto next_pg;
1340 }
1341 if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL) {
1342
1343 if (!m_object->pager_initialized) {
1344
1345 vm_page_unlock_queues();
1346
1347 vm_object_collapse(m_object, (vm_object_offset_t) 0, TRUE);
1348
1349 if (!m_object->pager_initialized)
1350 vm_object_compressor_pager_create(m_object);
1351
1352 vm_page_lock_queues();
1353 delayed_unlock = 0;
1354 }
1355 if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL)
1356 goto reenter_pg_on_q;
1357 /*
1358 * vm_object_compressor_pager_create will drop the object lock
1359 * which means 'm' may no longer be valid to use
1360 */
1361 continue;
1362 }
1363 /*
1364 * we've already factored out pages in the laundry which
1365 * means this page can't be on the pageout queue so it's
1366 * safe to do the vm_page_queues_remove
1367 */
1368 assert(!m->pageout_queue);
1369
1370 vm_page_queues_remove(m);
1371
1372 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1373
1374 vm_pageout_cluster(m, TRUE, FALSE, FALSE);
1375
1376 goto next_pg;
1377
1378reenter_pg_on_q:
1379 queue_remove(q, m, vm_page_t, pageq);
1380 queue_enter(q, m, vm_page_t, pageq);
1381next_pg:
1382 qcount--;
1383 try_failed_count = 0;
1384
1385 if (delayed_unlock++ > 128) {
1386
1387 if (l_object != NULL) {
1388 vm_object_unlock(l_object);
1389 l_object = NULL;
1390 }
1391 lck_mtx_yield(&vm_page_queue_lock);
1392 delayed_unlock = 0;
1393 }
1394 }
1395 if (l_object != NULL) {
1396 vm_object_unlock(l_object);
1397 l_object = NULL;
1398 }
1399 vm_page_unlock_queues();
1400}
1401
1402
1403
316670eb
A
1404/*
1405 * function in BSD to apply I/O throttle to the pageout thread
1406 */
1407extern void vm_pageout_io_throttle(void);
1408
316670eb
A
1409/*
1410 * Page States: Used below to maintain the page state
1411 * before it's removed from it's Q. This saved state
1412 * helps us do the right accounting in certain cases
1413 */
6d2010ae 1414#define PAGE_STATE_SPECULATIVE 1
316670eb 1415#define PAGE_STATE_ANONYMOUS 2
6d2010ae
A
1416#define PAGE_STATE_INACTIVE 3
1417#define PAGE_STATE_INACTIVE_FIRST 4
316670eb 1418#define PAGE_STATE_CLEAN 5
b0d623f7 1419
39236c6e
A
1420
1421#define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m) \
1422 MACRO_BEGIN \
1423 /* \
1424 * If a "reusable" page somehow made it back into \
1425 * the active queue, it's been re-used and is not \
1426 * quite re-usable. \
1427 * If the VM object was "all_reusable", consider it \
1428 * as "all re-used" instead of converting it to \
1429 * "partially re-used", which could be expensive. \
1430 */ \
1431 if ((m)->reusable || \
1432 (m)->object->all_reusable) { \
1433 vm_object_reuse_pages((m)->object, \
1434 (m)->offset, \
1435 (m)->offset + PAGE_SIZE_64, \
1436 FALSE); \
1437 } \
1438 MACRO_END
1439
1440
1441#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 64
6d2010ae
A
1442#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024
1443
1444#define FCS_IDLE 0
1445#define FCS_DELAYED 1
1446#define FCS_DEADLOCK_DETECTED 2
1447
1448struct flow_control {
1449 int state;
1450 mach_timespec_t ts;
1451};
1452
316670eb 1453uint32_t vm_pageout_considered_page = 0;
39236c6e 1454uint32_t vm_page_filecache_min = 0;
316670eb 1455
39236c6e 1456#define ANONS_GRABBED_LIMIT 2
6d2010ae
A
1457
1458/*
1459 * vm_pageout_scan does the dirty work for the pageout daemon.
316670eb
A
1460 * It returns with both vm_page_queue_free_lock and vm_page_queue_lock
1461 * held and vm_page_free_wanted == 0.
6d2010ae 1462 */
1c79356b
A
1463void
1464vm_pageout_scan(void)
1465{
91447636
A
1466 unsigned int loop_count = 0;
1467 unsigned int inactive_burst_count = 0;
1468 unsigned int active_burst_count = 0;
2d21ac55
A
1469 unsigned int reactivated_this_call;
1470 unsigned int reactivate_limit;
1471 vm_page_t local_freeq = NULL;
55e303ae 1472 int local_freed = 0;
2d21ac55 1473 int delayed_unlock;
6d2010ae 1474 int delayed_unlock_limit = 0;
91447636
A
1475 int refmod_state = 0;
1476 int vm_pageout_deadlock_target = 0;
1477 struct vm_pageout_queue *iq;
1478 struct vm_pageout_queue *eq;
2d21ac55 1479 struct vm_speculative_age_q *sq;
b0d623f7 1480 struct flow_control flow_control = { 0, { 0, 0 } };
91447636 1481 boolean_t inactive_throttled = FALSE;
2d21ac55 1482 boolean_t try_failed;
6d2010ae
A
1483 mach_timespec_t ts;
1484 unsigned int msecs = 0;
91447636 1485 vm_object_t object;
2d21ac55 1486 vm_object_t last_object_tried;
2d21ac55
A
1487 uint32_t catch_up_count = 0;
1488 uint32_t inactive_reclaim_run;
1489 boolean_t forced_reclaim;
316670eb
A
1490 boolean_t exceeded_burst_throttle;
1491 boolean_t grab_anonymous = FALSE;
39236c6e
A
1492 boolean_t force_anonymous = FALSE;
1493 int anons_grabbed = 0;
b0d623f7 1494 int page_prev_state = 0;
6d2010ae
A
1495 int cache_evict_throttle = 0;
1496 uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0;
fe8ab488 1497 int force_purge = 0;
3e170ce0
A
1498#define DELAY_SPECULATIVE_AGE 1000
1499 int delay_speculative_age = 0;
fe8ab488
A
1500
1501#if VM_PRESSURE_EVENTS
39236c6e 1502 vm_pressure_level_t pressure_level;
fe8ab488 1503#endif /* VM_PRESSURE_EVENTS */
6d2010ae 1504
3e170ce0 1505 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
6d2010ae
A
1506 vm_pageout_speculative_clean, vm_pageout_inactive_clean,
1507 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
91447636
A
1508
1509 flow_control.state = FCS_IDLE;
1510 iq = &vm_pageout_queue_internal;
1511 eq = &vm_pageout_queue_external;
2d21ac55
A
1512 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
1513
1c79356b
A
1514
1515 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1516
2d21ac55
A
1517
1518 vm_page_lock_queues();
1519 delayed_unlock = 1; /* must be nonzero if Qs are locked, 0 if unlocked */
1520
1521 /*
1522 * Calculate the max number of referenced pages on the inactive
1523 * queue that we will reactivate.
1524 */
1525 reactivated_this_call = 0;
1526 reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
1527 vm_page_inactive_count);
1528 inactive_reclaim_run = 0;
1529
316670eb 1530 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
2d21ac55 1531
6d2010ae 1532 /*
1c79356b
A
1533 * We want to gradually dribble pages from the active queue
1534 * to the inactive queue. If we let the inactive queue get
1535 * very small, and then suddenly dump many pages into it,
1536 * those pages won't get a sufficient chance to be referenced
1537 * before we start taking them from the inactive queue.
1538 *
6d2010ae
A
1539 * We must limit the rate at which we send pages to the pagers
1540 * so that we don't tie up too many pages in the I/O queues.
1541 * We implement a throttling mechanism using the laundry count
1542 * to limit the number of pages outstanding to the default
1543 * and external pagers. We can bypass the throttles and look
1544 * for clean pages if the pageout queues don't drain in a timely
1545 * fashion since this may indicate that the pageout paths are
1546 * stalled waiting for memory, which only we can provide.
1c79356b 1547 */
91447636 1548
1c79356b 1549
91447636 1550Restart:
2d21ac55 1551 assert(delayed_unlock!=0);
39236c6e 1552
91447636
A
1553 /*
1554 * Recalculate vm_page_inactivate_target.
1555 */
1556 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
2d21ac55
A
1557 vm_page_inactive_count +
1558 vm_page_speculative_count);
316670eb 1559
39236c6e
A
1560 vm_page_anonymous_min = vm_page_inactive_target / 20;
1561
316670eb 1562
2d21ac55
A
1563 /*
1564 * don't want to wake the pageout_scan thread up everytime we fall below
1565 * the targets... set a low water mark at 0.25% below the target
1566 */
1567 vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400);
1c79356b 1568
6d2010ae
A
1569 if (vm_page_speculative_percentage > 50)
1570 vm_page_speculative_percentage = 50;
1571 else if (vm_page_speculative_percentage <= 0)
1572 vm_page_speculative_percentage = 1;
1573
2d21ac55
A
1574 vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
1575 vm_page_inactive_count);
6d2010ae 1576
2d21ac55
A
1577 object = NULL;
1578 last_object_tried = NULL;
1579 try_failed = FALSE;
1580
1581 if ((vm_page_inactive_count + vm_page_speculative_count) < VM_PAGE_INACTIVE_HEALTHY_LIMIT(vm_page_active_count))
1582 catch_up_count = vm_page_inactive_count + vm_page_speculative_count;
1583 else
1584 catch_up_count = 0;
39236c6e 1585
55e303ae 1586 for (;;) {
91447636 1587 vm_page_t m;
1c79356b 1588
2d21ac55 1589 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
1c79356b 1590
3e170ce0
A
1591 assert(delayed_unlock);
1592
6d2010ae
A
1593 if (vm_upl_wait_for_pages < 0)
1594 vm_upl_wait_for_pages = 0;
91447636 1595
6d2010ae
A
1596 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
1597
1598 if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX)
1599 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
91447636 1600
1c79356b 1601 /*
6d2010ae 1602 * Move pages from active to inactive if we're below the target
1c79356b 1603 */
316670eb 1604 /* if we are trying to make clean, we need to make sure we actually have inactive - mj */
b0d623f7 1605 if ((vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target)
316670eb 1606 goto done_moving_active_pages;
2d21ac55 1607
6d2010ae
A
1608 if (object != NULL) {
1609 vm_object_unlock(object);
1610 object = NULL;
1611 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1612 }
1613 /*
1614 * Don't sweep through active queue more than the throttle
1615 * which should be kept relatively low
1616 */
39236c6e 1617 active_burst_count = MIN(vm_pageout_burst_active_throttle, vm_page_active_count);
6d2010ae
A
1618
1619 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_START,
1620 vm_pageout_inactive, vm_pageout_inactive_used, vm_page_free_count, local_freed);
1621
1622 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_NONE,
1623 vm_pageout_speculative_clean, vm_pageout_inactive_clean,
1624 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
39236c6e
A
1625 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_START);
1626
2d21ac55 1627
6d2010ae 1628 while (!queue_empty(&vm_page_queue_active) && active_burst_count--) {
1c79356b 1629
1c79356b 1630 vm_pageout_active++;
55e303ae 1631
1c79356b 1632 m = (vm_page_t) queue_first(&vm_page_queue_active);
91447636
A
1633
1634 assert(m->active && !m->inactive);
1635 assert(!m->laundry);
1636 assert(m->object != kernel_object);
2d21ac55
A
1637 assert(m->phys_page != vm_page_guard_addr);
1638
1639 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1c79356b 1640
fe8ab488
A
1641 /*
1642 * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
1643 *
1644 * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
1645 * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
1646 * new reference happens. If no futher references happen on the page after that remote TLB flushes
1647 * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
1648 * by pageout_scan, which is just fine since the last reference would have happened quite far
1649 * in the past (TLB caches don't hang around for very long), and of course could just as easily
1650 * have happened before we moved the page
1651 */
1652 pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
2d21ac55 1653
fe8ab488
A
1654 /*
1655 * The page might be absent or busy,
1656 * but vm_page_deactivate can handle that.
1657 * FALSE indicates that we don't want a H/W clear reference
1658 */
1659 vm_page_deactivate_internal(m, FALSE);
1c79356b 1660
fe8ab488 1661 if (delayed_unlock++ > delayed_unlock_limit) {
6d2010ae 1662
fe8ab488
A
1663 if (local_freeq) {
1664 vm_page_unlock_queues();
91447636 1665
fe8ab488
A
1666 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1667 vm_page_free_count, local_freed, delayed_unlock_limit, 1);
1668
1669 vm_page_free_list(local_freeq, TRUE);
1670
1671 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1672 vm_page_free_count, 0, 0, 1);
6d2010ae 1673
fe8ab488
A
1674 local_freeq = NULL;
1675 local_freed = 0;
b0d623f7 1676 vm_page_lock_queues();
fe8ab488
A
1677 } else {
1678 lck_mtx_yield(&vm_page_queue_lock);
39236c6e 1679 }
fe8ab488
A
1680
1681 delayed_unlock = 1;
91447636 1682
91447636 1683 /*
fe8ab488
A
1684 * continue the while loop processing
1685 * the active queue... need to hold
1686 * the page queues lock
91447636 1687 */
55e303ae 1688 }
1c79356b 1689 }
91447636 1690
6d2010ae
A
1691 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_END,
1692 vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count, vm_page_inactive_target);
39236c6e 1693 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_END);
91447636
A
1694
1695 /**********************************************************************
1696 * above this point we're playing with the active queue
1697 * below this point we're playing with the throttling mechanisms
1698 * and the inactive queue
1699 **********************************************************************/
1700
2d21ac55 1701done_moving_active_pages:
91447636 1702
55e303ae 1703 if (vm_page_free_count + local_freed >= vm_page_free_target) {
91447636
A
1704 if (object != NULL) {
1705 vm_object_unlock(object);
1706 object = NULL;
1707 }
2d21ac55
A
1708 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1709
3e170ce0
A
1710 vm_page_unlock_queues();
1711
55e303ae 1712 if (local_freeq) {
6d2010ae
A
1713
1714 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1715 vm_page_free_count, local_freed, delayed_unlock_limit, 2);
1716
316670eb 1717 vm_page_free_list(local_freeq, TRUE);
55e303ae 1718
6d2010ae
A
1719 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1720 vm_page_free_count, local_freed, 0, 2);
1721
2d21ac55 1722 local_freeq = NULL;
55e303ae
A
1723 local_freed = 0;
1724 }
3e170ce0
A
1725 vm_consider_waking_compactor_swapper();
1726
1727 vm_page_lock_queues();
1728
316670eb
A
1729 /*
1730 * make sure the pageout I/O threads are running
1731 * throttled in case there are still requests
1732 * in the laundry... since we have met our targets
1733 * we don't need the laundry to be cleaned in a timely
1734 * fashion... so let's avoid interfering with foreground
1735 * activity
1736 */
1737 vm_pageout_adjust_io_throttles(iq, eq, TRUE);
1738
2d21ac55 1739 /*
6d2010ae 1740 * recalculate vm_page_inactivate_target
593a1d5f
A
1741 */
1742 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1743 vm_page_inactive_count +
1744 vm_page_speculative_count);
2d21ac55 1745 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) &&
6d2010ae
A
1746 !queue_empty(&vm_page_queue_active)) {
1747 /*
1748 * inactive target still not met... keep going
1749 * until we get the queues balanced...
1750 */
2d21ac55 1751 continue;
6d2010ae 1752 }
b0d623f7 1753 lck_mtx_lock(&vm_page_queue_free_lock);
55e303ae 1754
0b4e3aa0 1755 if ((vm_page_free_count >= vm_page_free_target) &&
2d21ac55 1756 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
6d2010ae
A
1757 /*
1758 * done - we have met our target *and*
1759 * there is no one waiting for a page.
1760 */
316670eb 1761return_from_scan:
2d21ac55
A
1762 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
1763
3e170ce0 1764 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
39236c6e 1765 vm_pageout_inactive, vm_pageout_inactive_used, 0, 0);
3e170ce0 1766 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
6d2010ae
A
1767 vm_pageout_speculative_clean, vm_pageout_inactive_clean,
1768 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
1769
91447636 1770 return;
0b4e3aa0 1771 }
b0d623f7 1772 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b 1773 }
b0d623f7 1774
2d21ac55 1775 /*
b0d623f7
A
1776 * Before anything, we check if we have any ripe volatile
1777 * objects around. If so, try to purge the first object.
1778 * If the purge fails, fall through to reclaim a page instead.
1779 * If the purge succeeds, go back to the top and reevalute
1780 * the new memory situation.
2d21ac55 1781 */
fe8ab488 1782
2d21ac55 1783 assert (available_for_purge>=0);
fe8ab488 1784 force_purge = 0; /* no force-purging */
39236c6e 1785
fe8ab488
A
1786#if VM_PRESSURE_EVENTS
1787 pressure_level = memorystatus_vm_pressure_level;
6d2010ae 1788
fe8ab488 1789 if (pressure_level > kVMPressureNormal) {
39236c6e 1790
39236c6e
A
1791 if (pressure_level >= kVMPressureCritical) {
1792 force_purge = memorystatus_purge_on_critical;
1793 } else if (pressure_level >= kVMPressureUrgent) {
1794 force_purge = memorystatus_purge_on_urgent;
1795 } else if (pressure_level >= kVMPressureWarning) {
1796 force_purge = memorystatus_purge_on_warning;
39236c6e 1797 }
fe8ab488
A
1798 }
1799#endif /* VM_PRESSURE_EVENTS */
1800
1801 if (available_for_purge || force_purge) {
1802
1803 if (object != NULL) {
1804 vm_object_unlock(object);
1805 object = NULL;
1806 }
1807
1808 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
1809
1810 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
1811 if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
3e170ce0 1812 vm_pageout_purged_objects++;
6d2010ae 1813 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
39236c6e 1814 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
b0d623f7
A
1815 continue;
1816 }
6d2010ae 1817 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
39236c6e 1818 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2d21ac55 1819 }
fe8ab488 1820
2d21ac55
A
1821 if (queue_empty(&sq->age_q) && vm_page_speculative_count) {
1822 /*
6d2010ae 1823 * try to pull pages from the aging bins...
2d21ac55
A
1824 * see vm_page.h for an explanation of how
1825 * this mechanism works
1826 */
1827 struct vm_speculative_age_q *aq;
2d21ac55 1828 boolean_t can_steal = FALSE;
b0d623f7 1829 int num_scanned_queues;
2d21ac55
A
1830
1831 aq = &vm_page_queue_speculative[speculative_steal_index];
1832
b0d623f7
A
1833 num_scanned_queues = 0;
1834 while (queue_empty(&aq->age_q) &&
1835 num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
2d21ac55
A
1836
1837 speculative_steal_index++;
1838
1839 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
1840 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
1841
1842 aq = &vm_page_queue_speculative[speculative_steal_index];
1843 }
b0d623f7 1844
6d2010ae 1845 if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
b0d623f7
A
1846 /*
1847 * XXX We've scanned all the speculative
1848 * queues but still haven't found one
1849 * that is not empty, even though
1850 * vm_page_speculative_count is not 0.
6d2010ae
A
1851 *
1852 * report the anomaly...
b0d623f7 1853 */
b0d623f7
A
1854 printf("vm_pageout_scan: "
1855 "all speculative queues empty "
1856 "but count=%d. Re-adjusting.\n",
1857 vm_page_speculative_count);
6d2010ae 1858 if (vm_page_speculative_count > vm_page_speculative_count_drift_max)
b0d623f7
A
1859 vm_page_speculative_count_drift_max = vm_page_speculative_count;
1860 vm_page_speculative_count_drifts++;
1861#if 6553678
1862 Debugger("vm_pageout_scan: no speculative pages");
1863#endif
1864 /* readjust... */
1865 vm_page_speculative_count = 0;
1866 /* ... and continue */
1867 continue;
1868 }
1869
2d21ac55
A
1870 if (vm_page_speculative_count > vm_page_speculative_target)
1871 can_steal = TRUE;
1872 else {
3e170ce0
A
1873 if (!delay_speculative_age) {
1874 mach_timespec_t ts_fully_aged;
2d21ac55 1875
3e170ce0
A
1876 ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) / 1000;
1877 ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) % 1000)
1878 * 1000 * NSEC_PER_USEC;
55e303ae 1879
3e170ce0
A
1880 ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
1881
1882 clock_sec_t sec;
1883 clock_nsec_t nsec;
1884 clock_get_system_nanotime(&sec, &nsec);
1885 ts.tv_sec = (unsigned int) sec;
1886 ts.tv_nsec = nsec;
2d21ac55 1887
3e170ce0
A
1888 if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
1889 can_steal = TRUE;
1890 else
1891 delay_speculative_age++;
1892 } else {
1893 delay_speculative_age++;
1894 if (delay_speculative_age == DELAY_SPECULATIVE_AGE)
1895 delay_speculative_age = 0;
1896 }
2d21ac55
A
1897 }
1898 if (can_steal == TRUE)
3e170ce0 1899 vm_page_speculate_ageit(aq);
2d21ac55 1900 }
6d2010ae
A
1901 if (queue_empty(&sq->age_q) && cache_evict_throttle == 0) {
1902 int pages_evicted;
1903
1904 if (object != NULL) {
1905 vm_object_unlock(object);
1906 object = NULL;
1907 }
1908 pages_evicted = vm_object_cache_evict(100, 10);
1909
1910 if (pages_evicted) {
1911
1912 vm_pageout_cache_evicted += pages_evicted;
1913
1914 VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
1915 vm_page_free_count, pages_evicted, vm_pageout_cache_evicted, 0);
39236c6e 1916 memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
6d2010ae
A
1917
1918 /*
1919 * we just freed up to 100 pages,
1920 * so go back to the top of the main loop
1921 * and re-evaulate the memory situation
1922 */
1923 continue;
1924 } else
1925 cache_evict_throttle = 100;
1926 }
1927 if (cache_evict_throttle)
1928 cache_evict_throttle--;
1929
3e170ce0 1930#if CONFIG_JETSAM
04b8595b 1931 /*
3e170ce0
A
1932 * don't let the filecache_min fall below 15% of available memory
1933 * on systems with an active compressor that isn't nearing its
1934 * limits w/r to accepting new data
04b8595b
A
1935 *
1936 * on systems w/o the compressor/swapper, the filecache is always
1937 * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
1938 * since most (if not all) of the anonymous pages are in the
1939 * throttled queue (which isn't counted as available) which
1940 * effectively disables this filter
1941 */
3e170ce0
A
1942 if (vm_compressor_low_on_space())
1943 vm_page_filecache_min = 0;
1944 else
1945 vm_page_filecache_min = (AVAILABLE_NON_COMPRESSED_MEMORY / 7);
1946#else
1947 /*
1948 * don't let the filecache_min fall below 33% of available memory...
1949 */
04b8595b 1950 vm_page_filecache_min = (AVAILABLE_NON_COMPRESSED_MEMORY / 3);
3e170ce0 1951#endif
91447636 1952
316670eb 1953 exceeded_burst_throttle = FALSE;
1c79356b
A
1954 /*
1955 * Sometimes we have to pause:
1956 * 1) No inactive pages - nothing to do.
316670eb 1957 * 2) Loop control - no acceptable pages found on the inactive queue
91447636 1958 * within the last vm_pageout_burst_inactive_throttle iterations
316670eb 1959 * 3) Flow control - default pageout queue is full
1c79356b 1960 */
316670eb 1961 if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_anonymous) && queue_empty(&sq->age_q)) {
91447636
A
1962 vm_pageout_scan_empty_throttle++;
1963 msecs = vm_pageout_empty_wait;
1964 goto vm_pageout_scan_delay;
1965
b0d623f7 1966 } else if (inactive_burst_count >=
593a1d5f
A
1967 MIN(vm_pageout_burst_inactive_throttle,
1968 (vm_page_inactive_count +
1969 vm_page_speculative_count))) {
91447636
A
1970 vm_pageout_scan_burst_throttle++;
1971 msecs = vm_pageout_burst_wait;
316670eb
A
1972
1973 exceeded_burst_throttle = TRUE;
91447636
A
1974 goto vm_pageout_scan_delay;
1975
39236c6e
A
1976 } else if (vm_page_free_count > (vm_page_free_reserved / 4) &&
1977 VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
1978 vm_pageout_scan_swap_throttle++;
1979 msecs = vm_pageout_swap_wait;
1980 goto vm_pageout_scan_delay;
1981
6d2010ae
A
1982 } else if (VM_PAGE_Q_THROTTLED(iq) &&
1983 VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) {
b0d623f7
A
1984 clock_sec_t sec;
1985 clock_nsec_t nsec;
91447636
A
1986
1987 switch (flow_control.state) {
1988
1989 case FCS_IDLE:
316670eb 1990 if ((vm_page_free_count + local_freed) < vm_page_free_target) {
39236c6e 1991
3e170ce0
A
1992 if (object != NULL) {
1993 vm_object_unlock(object);
1994 object = NULL;
1995 }
1996 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1997
1998 vm_page_unlock_queues();
1999
2000 if (local_freeq) {
2001
2002 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
2003 vm_page_free_count, local_freed, delayed_unlock_limit, 3);
2004
2005 vm_page_free_list(local_freeq, TRUE);
2006
2007 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
2008 vm_page_free_count, local_freed, 0, 3);
2009
2010 local_freeq = NULL;
2011 local_freed = 0;
2012 }
2013 thread_yield_internal(1);
2014
2015 vm_page_lock_queues();
2016
2017 if (!VM_PAGE_Q_THROTTLED(iq)) {
2018 vm_pageout_scan_yield_unthrottled++;
2019 continue;
2020 }
39236c6e
A
2021 if (vm_page_pageable_external_count > vm_page_filecache_min && !queue_empty(&vm_page_queue_inactive)) {
2022 anons_grabbed = ANONS_GRABBED_LIMIT;
3e170ce0 2023 vm_pageout_scan_throttle_deferred++;
316670eb
A
2024 goto consider_inactive;
2025 }
39236c6e 2026 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && vm_page_active_count)
316670eb
A
2027 continue;
2028 }
91447636
A
2029reset_deadlock_timer:
2030 ts.tv_sec = vm_pageout_deadlock_wait / 1000;
2031 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
b0d623f7
A
2032 clock_get_system_nanotime(&sec, &nsec);
2033 flow_control.ts.tv_sec = (unsigned int) sec;
2034 flow_control.ts.tv_nsec = nsec;
91447636
A
2035 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
2036
2037 flow_control.state = FCS_DELAYED;
2038 msecs = vm_pageout_deadlock_wait;
1c79356b 2039
91447636
A
2040 break;
2041
2042 case FCS_DELAYED:
b0d623f7
A
2043 clock_get_system_nanotime(&sec, &nsec);
2044 ts.tv_sec = (unsigned int) sec;
2045 ts.tv_nsec = nsec;
91447636
A
2046
2047 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
2048 /*
2049 * the pageout thread for the default pager is potentially
2050 * deadlocked since the
2051 * default pager queue has been throttled for more than the
2052 * allowable time... we need to move some clean pages or dirty
2053 * pages belonging to the external pagers if they aren't throttled
2054 * vm_page_free_wanted represents the number of threads currently
2055 * blocked waiting for pages... we'll move one page for each of
2056 * these plus a fixed amount to break the logjam... once we're done
2057 * moving this number of pages, we'll re-enter the FSC_DELAYED state
2058 * with a new timeout target since we have no way of knowing
2059 * whether we've broken the deadlock except through observation
2060 * of the queue associated with the default pager... we need to
2d21ac55 2061 * stop moving pages and allow the system to run to see what
91447636
A
2062 * state it settles into.
2063 */
2d21ac55 2064 vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged;
91447636
A
2065 vm_pageout_scan_deadlock_detected++;
2066 flow_control.state = FCS_DEADLOCK_DETECTED;
91447636
A
2067 thread_wakeup((event_t) &vm_pageout_garbage_collect);
2068 goto consider_inactive;
2069 }
2070 /*
2071 * just resniff instead of trying
2072 * to compute a new delay time... we're going to be
2073 * awakened immediately upon a laundry completion,
2074 * so we won't wait any longer than necessary
2075 */
2076 msecs = vm_pageout_idle_wait;
2077 break;
1c79356b 2078
91447636
A
2079 case FCS_DEADLOCK_DETECTED:
2080 if (vm_pageout_deadlock_target)
2081 goto consider_inactive;
2082 goto reset_deadlock_timer;
55e303ae 2083
91447636 2084 }
91447636
A
2085vm_pageout_scan_delay:
2086 if (object != NULL) {
2087 vm_object_unlock(object);
2088 object = NULL;
2089 }
2d21ac55
A
2090 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2091
fe8ab488
A
2092 vm_page_unlock_queues();
2093
55e303ae 2094 if (local_freeq) {
6d2010ae
A
2095
2096 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
2097 vm_page_free_count, local_freed, delayed_unlock_limit, 3);
2098
316670eb 2099 vm_page_free_list(local_freeq, TRUE);
55e303ae 2100
6d2010ae
A
2101 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
2102 vm_page_free_count, local_freed, 0, 3);
2103
2d21ac55 2104 local_freeq = NULL;
55e303ae 2105 local_freed = 0;
fe8ab488 2106 }
3e170ce0 2107 vm_consider_waking_compactor_swapper();
b0d623f7 2108
fe8ab488
A
2109 vm_page_lock_queues();
2110
2111 if (flow_control.state == FCS_DELAYED &&
2112 !VM_PAGE_Q_THROTTLED(iq)) {
2113 flow_control.state = FCS_IDLE;
2114 goto consider_inactive;
55e303ae 2115 }
316670eb
A
2116
2117 if (vm_page_free_count >= vm_page_free_target) {
2118 /*
39236c6e 2119 * we're here because
316670eb 2120 * 1) someone else freed up some pages while we had
39236c6e 2121 * the queues unlocked above
316670eb
A
2122 * and we've hit one of the 3 conditions that
2123 * cause us to pause the pageout scan thread
2124 *
2125 * since we already have enough free pages,
2126 * let's avoid stalling and return normally
2127 *
2128 * before we return, make sure the pageout I/O threads
2129 * are running throttled in case there are still requests
2130 * in the laundry... since we have enough free pages
2131 * we don't need the laundry to be cleaned in a timely
2132 * fashion... so let's avoid interfering with foreground
2133 * activity
2134 *
2135 * we don't want to hold vm_page_queue_free_lock when
2136 * calling vm_pageout_adjust_io_throttles (since it
2137 * may cause other locks to be taken), we do the intitial
2138 * check outside of the lock. Once we take the lock,
2139 * we recheck the condition since it may have changed.
2140 * if it has, no problem, we will make the threads
2141 * non-throttled before actually blocking
2142 */
2143 vm_pageout_adjust_io_throttles(iq, eq, TRUE);
2144 }
2145 lck_mtx_lock(&vm_page_queue_free_lock);
0b4e3aa0 2146
39236c6e
A
2147 if (vm_page_free_count >= vm_page_free_target &&
2148 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
316670eb
A
2149 goto return_from_scan;
2150 }
2151 lck_mtx_unlock(&vm_page_queue_free_lock);
2152
2153 if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
2154 /*
2155 * we're most likely about to block due to one of
2156 * the 3 conditions that cause vm_pageout_scan to
2157 * not be able to make forward progress w/r
2158 * to providing new pages to the free queue,
2159 * so unthrottle the I/O threads in case we
2160 * have laundry to be cleaned... it needs
2161 * to be completed ASAP.
2162 *
2163 * even if we don't block, we want the io threads
2164 * running unthrottled since the sum of free +
2165 * clean pages is still under our free target
2166 */
2167 vm_pageout_adjust_io_throttles(iq, eq, FALSE);
2168 }
2169 if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
2170 /*
2171 * if we get here we're below our free target and
2172 * we're stalling due to a full laundry queue or
2173 * we don't have any inactive pages other then
2174 * those in the clean queue...
2175 * however, we have pages on the clean queue that
2176 * can be moved to the free queue, so let's not
2177 * stall the pageout scan
2178 */
2179 flow_control.state = FCS_IDLE;
2180 goto consider_inactive;
2181 }
6d2010ae
A
2182 VM_CHECK_MEMORYSTATUS;
2183
316670eb
A
2184 if (flow_control.state != FCS_IDLE)
2185 vm_pageout_scan_throttle++;
2186 iq->pgo_throttled = TRUE;
2187
2d21ac55 2188 assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
2d21ac55 2189 counter(c_vm_pageout_scan_block++);
1c79356b 2190
91447636 2191 vm_page_unlock_queues();
2d21ac55
A
2192
2193 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
b0d623f7 2194
6d2010ae
A
2195 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
2196 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
39236c6e 2197 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
6d2010ae 2198
91447636
A
2199 thread_block(THREAD_CONTINUE_NULL);
2200
6d2010ae
A
2201 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
2202 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
39236c6e 2203 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
6d2010ae 2204
91447636
A
2205 vm_page_lock_queues();
2206 delayed_unlock = 1;
2207
2208 iq->pgo_throttled = FALSE;
0b4e3aa0 2209
2d21ac55 2210 if (loop_count >= vm_page_inactive_count)
55e303ae 2211 loop_count = 0;
91447636
A
2212 inactive_burst_count = 0;
2213
1c79356b
A
2214 goto Restart;
2215 /*NOTREACHED*/
2216 }
2217
91447636
A
2218
2219 flow_control.state = FCS_IDLE;
2220consider_inactive:
6d2010ae
A
2221 vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count),
2222 vm_pageout_inactive_external_forced_reactivate_limit);
91447636
A
2223 loop_count++;
2224 inactive_burst_count++;
1c79356b 2225 vm_pageout_inactive++;
39236c6e 2226
316670eb
A
2227
2228 /*
2229 * Choose a victim.
2230 */
39236c6e 2231 while (1) {
3e170ce0
A
2232 uint32_t inactive_external_count;
2233
2d21ac55 2234 m = NULL;
91447636 2235
6d2010ae 2236 if (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) {
b0d623f7
A
2237 assert(vm_page_throttled_count == 0);
2238 assert(queue_empty(&vm_page_queue_throttled));
91447636 2239 }
2d21ac55 2240 /*
39236c6e
A
2241 * The most eligible pages are ones we paged in speculatively,
2242 * but which have not yet been touched.
2d21ac55 2243 */
fe8ab488 2244 if (!queue_empty(&sq->age_q) && force_anonymous == FALSE) {
39236c6e 2245 m = (vm_page_t) queue_first(&sq->age_q);
6d2010ae 2246
39236c6e 2247 page_prev_state = PAGE_STATE_SPECULATIVE;
316670eb 2248
39236c6e
A
2249 break;
2250 }
2251 /*
2252 * Try a clean-queue inactive page.
2253 */
2254 if (!queue_empty(&vm_page_queue_cleaned)) {
2255 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
316670eb 2256
39236c6e 2257 page_prev_state = PAGE_STATE_CLEAN;
316670eb 2258
39236c6e
A
2259 break;
2260 }
316670eb 2261
39236c6e 2262 grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
3e170ce0 2263 inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
316670eb 2264
3e170ce0
A
2265 if ((vm_page_pageable_external_count < vm_page_filecache_min || force_anonymous == TRUE) ||
2266 ((inactive_external_count < vm_page_anonymous_count) && (inactive_external_count < (vm_page_pageable_external_count / 3)))) {
39236c6e
A
2267 grab_anonymous = TRUE;
2268 anons_grabbed = 0;
9bccf70c 2269 }
6d2010ae 2270
39236c6e
A
2271 if (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT || queue_empty(&vm_page_queue_anonymous)) {
2272
2273 if ( !queue_empty(&vm_page_queue_inactive) ) {
2274 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
2275
2276 page_prev_state = PAGE_STATE_INACTIVE;
2277 anons_grabbed = 0;
2278
04b8595b
A
2279 if (vm_page_pageable_external_count < vm_page_filecache_min) {
2280 if ((++reactivated_this_call % 100))
2281 goto must_activate_page;
2282 /*
2283 * steal 1% of the file backed pages even if
2284 * we are under the limit that has been set
2285 * for a healthy filecache
2286 */
2287 }
2d21ac55
A
2288 break;
2289 }
2290 }
39236c6e
A
2291 if ( !queue_empty(&vm_page_queue_anonymous) ) {
2292 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
2293
2294 page_prev_state = PAGE_STATE_ANONYMOUS;
2295 anons_grabbed++;
2296
2297 break;
2298 }
316670eb 2299
2d21ac55 2300 /*
316670eb
A
2301 * if we've gotten here, we have no victim page.
2302 * if making clean, free the local freed list and return.
2303 * if making free, check to see if we've finished balancing the queues
2304 * yet, if we haven't just continue, else panic
2d21ac55 2305 */
316670eb 2306 vm_page_unlock_queues();
6d2010ae 2307
316670eb
A
2308 if (object != NULL) {
2309 vm_object_unlock(object);
2310 object = NULL;
2311 }
2312 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2313
2314 if (local_freeq) {
2315 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
2316 vm_page_free_count, local_freed, delayed_unlock_limit, 5);
2317
2318 vm_page_free_list(local_freeq, TRUE);
2319
2320 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
2321 vm_page_free_count, local_freed, 0, 5);
2322
2323 local_freeq = NULL;
2324 local_freed = 0;
2325 }
2326 vm_page_lock_queues();
2327 delayed_unlock = 1;
2328
fe8ab488
A
2329 force_anonymous = FALSE;
2330
316670eb
A
2331 if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target)
2332 goto Restart;
2333
fe8ab488
A
2334 if (!queue_empty(&sq->age_q))
2335 goto Restart;
2336
316670eb
A
2337 panic("vm_pageout: no victim");
2338
2339 /* NOTREACHED */
9bccf70c 2340 }
39236c6e 2341 force_anonymous = FALSE;
316670eb
A
2342
2343 /*
2344 * we just found this page on one of our queues...
2345 * it can't also be on the pageout queue, so safe
3e170ce0 2346 * to call vm_page_queues_remove
316670eb
A
2347 */
2348 assert(!m->pageout_queue);
2349
3e170ce0 2350 vm_page_queues_remove(m);
2d21ac55 2351
91447636 2352 assert(!m->laundry);
6d2010ae
A
2353 assert(!m->private);
2354 assert(!m->fictitious);
91447636 2355 assert(m->object != kernel_object);
2d21ac55
A
2356 assert(m->phys_page != vm_page_guard_addr);
2357
6d2010ae
A
2358
2359 if (page_prev_state != PAGE_STATE_SPECULATIVE)
b0d623f7 2360 vm_pageout_stats[vm_pageout_stat_now].considered++;
b0d623f7 2361
2d21ac55 2362 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1c79356b 2363
91447636 2364 /*
2d21ac55
A
2365 * check to see if we currently are working
2366 * with the same object... if so, we've
2367 * already got the lock
91447636
A
2368 */
2369 if (m->object != object) {
2d21ac55
A
2370 /*
2371 * the object associated with candidate page is
2372 * different from the one we were just working
2373 * with... dump the lock if we still own it
2374 */
91447636
A
2375 if (object != NULL) {
2376 vm_object_unlock(object);
2377 object = NULL;
2d21ac55 2378 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
91447636 2379 }
2d21ac55
A
2380 /*
2381 * Try to lock object; since we've alread got the
2382 * page queues lock, we can only 'try' for this one.
2383 * if the 'try' fails, we need to do a mutex_pause
2384 * to allow the owner of the object lock a chance to
2385 * run... otherwise, we're likely to trip over this
2386 * object in the same state as we work our way through
2387 * the queue... clumps of pages associated with the same
2388 * object are fairly typical on the inactive and active queues
2389 */
2390 if (!vm_object_lock_try_scan(m->object)) {
6d2010ae
A
2391 vm_page_t m_want = NULL;
2392
b0d623f7
A
2393 vm_pageout_inactive_nolock++;
2394
316670eb
A
2395 if (page_prev_state == PAGE_STATE_CLEAN)
2396 vm_pageout_cleaned_nolock++;
2397
6d2010ae
A
2398 if (page_prev_state == PAGE_STATE_SPECULATIVE)
2399 page_prev_state = PAGE_STATE_INACTIVE_FIRST;
2d21ac55 2400
2d21ac55
A
2401 pmap_clear_reference(m->phys_page);
2402 m->reference = FALSE;
2403
6d2010ae
A
2404 /*
2405 * m->object must be stable since we hold the page queues lock...
2406 * we can update the scan_collisions field sans the object lock
2407 * since it is a separate field and this is the only spot that does
2408 * a read-modify-write operation and it is never executed concurrently...
2409 * we can asynchronously set this field to 0 when creating a UPL, so it
2410 * is possible for the value to be a bit non-determistic, but that's ok
2411 * since it's only used as a hint
2412 */
3e170ce0 2413 m->object->scan_collisions = 1;
6d2010ae 2414
39236c6e
A
2415 if ( !queue_empty(&sq->age_q) )
2416 m_want = (vm_page_t) queue_first(&sq->age_q);
2417 else if ( !queue_empty(&vm_page_queue_cleaned))
2418 m_want = (vm_page_t) queue_first(&vm_page_queue_cleaned);
2419 else if (anons_grabbed >= ANONS_GRABBED_LIMIT || queue_empty(&vm_page_queue_anonymous))
2420 m_want = (vm_page_t) queue_first(&vm_page_queue_inactive);
2421 else if ( !queue_empty(&vm_page_queue_anonymous))
2422 m_want = (vm_page_t) queue_first(&vm_page_queue_anonymous);
2423
2d21ac55
A
2424 /*
2425 * this is the next object we're going to be interested in
2426 * try to make sure its available after the mutex_yield
2427 * returns control
2428 */
6d2010ae
A
2429 if (m_want)
2430 vm_pageout_scan_wants_object = m_want->object;
2d21ac55 2431
91447636
A
2432 /*
2433 * force us to dump any collected free pages
2434 * and to pause before moving on
2435 */
2d21ac55 2436 try_failed = TRUE;
55e303ae 2437
6d2010ae 2438 goto requeue_page;
1c79356b 2439 }
91447636 2440 object = m->object;
2d21ac55 2441 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
0b4e3aa0 2442
2d21ac55 2443 try_failed = FALSE;
1c79356b 2444 }
6d2010ae
A
2445 if (catch_up_count)
2446 catch_up_count--;
1c79356b 2447
6d2010ae
A
2448 if (m->busy) {
2449 if (m->encrypted_cleaning) {
2450 /*
2451 * ENCRYPTED SWAP:
2452 * if this page has already been picked up as
2453 * part of a page-out cluster, it will be busy
2454 * because it is being encrypted (see
2455 * vm_object_upl_request()). But we still
2456 * want to demote it from "clean-in-place"
2457 * (aka "adjacent") to "clean-and-free" (aka
2458 * "target"), so let's ignore its "busy" bit
2459 * here and proceed to check for "cleaning" a
2460 * little bit below...
2461 *
2462 * CAUTION CAUTION:
2463 * A "busy" page should still be left alone for
2464 * most purposes, so we have to be very careful
2465 * not to process that page too much.
2466 */
2467 assert(m->cleaning);
2468 goto consider_inactive_page;
2d21ac55 2469 }
2d21ac55 2470
1c79356b
A
2471 /*
2472 * Somebody is already playing with this page.
6d2010ae 2473 * Put it back on the appropriate queue
2d21ac55 2474 *
1c79356b 2475 */
1c79356b 2476 vm_pageout_inactive_busy++;
316670eb
A
2477
2478 if (page_prev_state == PAGE_STATE_CLEAN)
2479 vm_pageout_cleaned_busy++;
2480
6d2010ae
A
2481requeue_page:
2482 switch (page_prev_state) {
2483
2484 case PAGE_STATE_SPECULATIVE:
316670eb
A
2485 case PAGE_STATE_ANONYMOUS:
2486 case PAGE_STATE_CLEAN:
6d2010ae 2487 case PAGE_STATE_INACTIVE:
3e170ce0 2488 vm_page_enqueue_inactive(m, FALSE);
6d2010ae
A
2489 break;
2490
2491 case PAGE_STATE_INACTIVE_FIRST:
3e170ce0 2492 vm_page_enqueue_inactive(m, TRUE);
6d2010ae
A
2493 break;
2494 }
91447636 2495 goto done_with_inactivepage;
1c79356b
A
2496 }
2497
6d2010ae 2498
1c79356b 2499 /*
6d2010ae
A
2500 * If it's absent, in error or the object is no longer alive,
2501 * we can reclaim the page... in the no longer alive case,
2502 * there are 2 states the page can be in that preclude us
2503 * from reclaiming it - busy or cleaning - that we've already
2504 * dealt with
1c79356b 2505 */
6d2010ae 2506 if (m->absent || m->error || !object->alive) {
1c79356b 2507
6d2010ae
A
2508 if (m->absent)
2509 vm_pageout_inactive_absent++;
2510 else if (!object->alive)
2511 vm_pageout_inactive_notalive++;
2512 else
2513 vm_pageout_inactive_error++;
316670eb 2514reclaim_page:
91447636
A
2515 if (vm_pageout_deadlock_target) {
2516 vm_pageout_scan_inactive_throttle_success++;
2517 vm_pageout_deadlock_target--;
2518 }
2d21ac55
A
2519
2520 DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
2521
b0d623f7 2522 if (object->internal) {
2d21ac55
A
2523 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
2524 } else {
2525 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
2526 }
316670eb
A
2527 assert(!m->cleaning);
2528 assert(!m->laundry);
2529
2530 m->busy = TRUE;
2d21ac55 2531
b0d623f7
A
2532 /*
2533 * remove page from object here since we're already
2534 * behind the object lock... defer the rest of the work
2535 * we'd normally do in vm_page_free_prepare_object
2536 * until 'vm_page_free_list' is called
2537 */
2538 if (m->tabled)
2539 vm_page_remove(m, TRUE);
55e303ae 2540
91447636
A
2541 assert(m->pageq.next == NULL &&
2542 m->pageq.prev == NULL);
55e303ae
A
2543 m->pageq.next = (queue_entry_t)local_freeq;
2544 local_freeq = m;
91447636 2545 local_freed++;
316670eb
A
2546
2547 if (page_prev_state == PAGE_STATE_SPECULATIVE)
2548 vm_pageout_freed_from_speculative++;
2549 else if (page_prev_state == PAGE_STATE_CLEAN)
2550 vm_pageout_freed_from_cleaned++;
2551 else
2552 vm_pageout_freed_from_inactive_clean++;
55e303ae 2553
6d2010ae 2554 if (page_prev_state != PAGE_STATE_SPECULATIVE)
b0d623f7 2555 vm_pageout_stats[vm_pageout_stat_now].reclaimed++;
b0d623f7 2556
fe8ab488 2557 inactive_burst_count = 0;
91447636 2558 goto done_with_inactivepage;
1c79356b 2559 }
b0d623f7
A
2560 /*
2561 * If the object is empty, the page must be reclaimed even
2562 * if dirty or used.
2563 * If the page belongs to a volatile object, we stick it back
2564 * on.
2565 */
2566 if (object->copy == VM_OBJECT_NULL) {
2567 if (object->purgable == VM_PURGABLE_EMPTY) {
b0d623f7
A
2568 if (m->pmapped == TRUE) {
2569 /* unmap the page */
2570 refmod_state = pmap_disconnect(m->phys_page);
2571 if (refmod_state & VM_MEM_MODIFIED) {
316670eb 2572 SET_PAGE_DIRTY(m, FALSE);
b0d623f7
A
2573 }
2574 }
2575 if (m->dirty || m->precious) {
2576 /* we saved the cost of cleaning this page ! */
2577 vm_page_purged_count++;
2578 }
2579 goto reclaim_page;
2580 }
39236c6e 2581
fe8ab488 2582 if (COMPRESSED_PAGER_IS_ACTIVE) {
39236c6e
A
2583 /*
2584 * With the VM compressor, the cost of
2585 * reclaiming a page is much lower (no I/O),
2586 * so if we find a "volatile" page, it's better
2587 * to let it get compressed rather than letting
2588 * it occupy a full page until it gets purged.
2589 * So no need to check for "volatile" here.
2590 */
2591 } else if (object->purgable == VM_PURGABLE_VOLATILE) {
2592 /*
2593 * Avoid cleaning a "volatile" page which might
2594 * be purged soon.
2595 */
2596
b0d623f7
A
2597 /* if it's wired, we can't put it on our queue */
2598 assert(!VM_PAGE_WIRED(m));
6d2010ae 2599
b0d623f7 2600 /* just stick it back on! */
6d2010ae 2601 reactivated_this_call++;
316670eb
A
2602
2603 if (page_prev_state == PAGE_STATE_CLEAN)
2604 vm_pageout_cleaned_volatile_reactivated++;
2605
b0d623f7
A
2606 goto reactivate_page;
2607 }
2608 }
2609
316670eb 2610consider_inactive_page:
6d2010ae
A
2611 if (m->busy) {
2612 /*
2613 * CAUTION CAUTION:
2614 * A "busy" page should always be left alone, except...
2615 */
2616 if (m->cleaning && m->encrypted_cleaning) {
2617 /*
2618 * ENCRYPTED_SWAP:
2619 * We could get here with a "busy" page
2620 * if it's being encrypted during a
2621 * "clean-in-place" operation. We'll deal
2622 * with it right away by testing if it has been
2623 * referenced and either reactivating it or
2624 * promoting it from "clean-in-place" to
2625 * "clean-and-free".
2626 */
2627 } else {
2628 panic("\"busy\" page considered for pageout\n");
2629 }
2630 }
2631
1c79356b
A
2632 /*
2633 * If it's being used, reactivate.
2634 * (Fictitious pages are either busy or absent.)
2d21ac55
A
2635 * First, update the reference and dirty bits
2636 * to make sure the page is unreferenced.
1c79356b 2637 */
2d21ac55
A
2638 refmod_state = -1;
2639
2640 if (m->reference == FALSE && m->pmapped == TRUE) {
91447636
A
2641 refmod_state = pmap_get_refmod(m->phys_page);
2642
2643 if (refmod_state & VM_MEM_REFERENCED)
2644 m->reference = TRUE;
316670eb
A
2645 if (refmod_state & VM_MEM_MODIFIED) {
2646 SET_PAGE_DIRTY(m, FALSE);
2647 }
91447636 2648 }
316670eb 2649
6d2010ae 2650 /*
39236c6e 2651 * if (m->cleaning && !m->pageout)
6d2010ae 2652 * If already cleaning this page in place and it hasn't
39236c6e
A
2653 * been recently referenced, just pull off the queue.
2654 * We can leave the page mapped, and upl_commit_range
2655 * will put it on the clean queue.
6d2010ae
A
2656 *
2657 * note: if m->encrypted_cleaning == TRUE, then
2658 * m->cleaning == TRUE
2659 * and we'll handle it here
316670eb
A
2660 *
2661 * if (m->pageout && !m->cleaning)
2662 * an msync INVALIDATE is in progress...
2663 * this page has been marked for destruction
2664 * after it has been cleaned,
2665 * but not yet gathered into a UPL
2666 * where 'cleaning' will be set...
2667 * just leave it off the paging queues
2668 *
2669 * if (m->pageout && m->clenaing)
2670 * an msync INVALIDATE is in progress
2671 * and the UPL has already gathered this page...
2672 * just leave it off the paging queues
6d2010ae 2673 */
316670eb
A
2674
2675 /*
2676 * page with m->pageout and still on the queues means that an
39236c6e 2677 * MS_INVALIDATE is in progress on this page... leave it alone
316670eb
A
2678 */
2679 if (m->pageout) {
316670eb
A
2680 goto done_with_inactivepage;
2681 }
2682
2683 /* if cleaning, reactivate if referenced. otherwise, just pull off queue */
6d2010ae 2684 if (m->cleaning) {
6d2010ae
A
2685 if (m->reference == TRUE) {
2686 reactivated_this_call++;
2687 goto reactivate_page;
316670eb 2688 } else {
316670eb 2689 goto done_with_inactivepage;
6d2010ae 2690 }
6d2010ae
A
2691 }
2692
39236c6e
A
2693 if (m->reference || m->dirty) {
2694 /* deal with a rogue "reusable" page */
2695 VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m);
2696 }
b0d623f7 2697
fe8ab488
A
2698 if (!m->no_cache &&
2699 (m->reference ||
2700 (m->xpmapped && !object->internal && (vm_page_xpmapped_external_count < (vm_page_external_count / 4))))) {
2d21ac55
A
2701 /*
2702 * The page we pulled off the inactive list has
2703 * been referenced. It is possible for other
2704 * processors to be touching pages faster than we
2705 * can clear the referenced bit and traverse the
2706 * inactive queue, so we limit the number of
2707 * reactivations.
2708 */
2709 if (++reactivated_this_call >= reactivate_limit) {
2710 vm_pageout_reactivation_limit_exceeded++;
2711 } else if (catch_up_count) {
2712 vm_pageout_catch_ups++;
2713 } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
2714 vm_pageout_inactive_force_reclaim++;
2715 } else {
b0d623f7 2716 uint32_t isinuse;
316670eb
A
2717
2718 if (page_prev_state == PAGE_STATE_CLEAN)
2719 vm_pageout_cleaned_reference_reactivated++;
2720
2d21ac55 2721reactivate_page:
b0d623f7
A
2722 if ( !object->internal && object->pager != MEMORY_OBJECT_NULL &&
2723 vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
2724 /*
2725 * no explict mappings of this object exist
2726 * and it's not open via the filesystem
2727 */
2728 vm_page_deactivate(m);
2729 vm_pageout_inactive_deactivated++;
2730 } else {
04b8595b 2731must_activate_page:
b0d623f7
A
2732 /*
2733 * The page was/is being used, so put back on active list.
2734 */
2735 vm_page_activate(m);
2736 VM_STAT_INCR(reactivations);
fe8ab488 2737 inactive_burst_count = 0;
b0d623f7 2738 }
316670eb
A
2739
2740 if (page_prev_state == PAGE_STATE_CLEAN)
2741 vm_pageout_cleaned_reactivated++;
2742
2d21ac55 2743 vm_pageout_inactive_used++;
55e303ae 2744
2d21ac55
A
2745 goto done_with_inactivepage;
2746 }
2747 /*
2748 * Make sure we call pmap_get_refmod() if it
2749 * wasn't already called just above, to update
2750 * the dirty bit.
2751 */
2752 if ((refmod_state == -1) && !m->dirty && m->pmapped) {
2753 refmod_state = pmap_get_refmod(m->phys_page);
316670eb
A
2754 if (refmod_state & VM_MEM_MODIFIED) {
2755 SET_PAGE_DIRTY(m, FALSE);
2756 }
2d21ac55
A
2757 }
2758 forced_reclaim = TRUE;
2759 } else {
2760 forced_reclaim = FALSE;
1c79356b
A
2761 }
2762
91447636
A
2763 XPR(XPR_VM_PAGEOUT,
2764 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
b0d623f7 2765 object, m->offset, m, 0,0);
0b4e3aa0 2766
91447636
A
2767 /*
2768 * we've got a candidate page to steal...
2769 *
2770 * m->dirty is up to date courtesy of the
2771 * preceding check for m->reference... if
2772 * we get here, then m->reference had to be
2d21ac55
A
2773 * FALSE (or possibly "reactivate_limit" was
2774 * exceeded), but in either case we called
2775 * pmap_get_refmod() and updated both
2776 * m->reference and m->dirty
91447636
A
2777 *
2778 * if it's dirty or precious we need to
2779 * see if the target queue is throtttled
2780 * it if is, we need to skip over it by moving it back
2781 * to the end of the inactive queue
2782 */
b0d623f7 2783
91447636
A
2784 inactive_throttled = FALSE;
2785
2786 if (m->dirty || m->precious) {
2787 if (object->internal) {
2d21ac55 2788 if (VM_PAGE_Q_THROTTLED(iq))
91447636
A
2789 inactive_throttled = TRUE;
2790 } else if (VM_PAGE_Q_THROTTLED(eq)) {
2d21ac55 2791 inactive_throttled = TRUE;
1c79356b 2792 }
91447636 2793 }
2d21ac55 2794throttle_inactive:
6d2010ae
A
2795 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2796 object->internal && m->dirty &&
2797 (object->purgable == VM_PURGABLE_DENY ||
2798 object->purgable == VM_PURGABLE_NONVOLATILE ||
2799 object->purgable == VM_PURGABLE_VOLATILE)) {
3e170ce0 2800 vm_page_check_pageable_safe(m);
6d2010ae
A
2801 queue_enter(&vm_page_queue_throttled, m,
2802 vm_page_t, pageq);
2803 m->throttled = TRUE;
2804 vm_page_throttled_count++;
2805
2806 vm_pageout_scan_reclaimed_throttled++;
2807
fe8ab488 2808 inactive_burst_count = 0;
6d2010ae
A
2809 goto done_with_inactivepage;
2810 }
2811 if (inactive_throttled == TRUE) {
2812
39236c6e
A
2813 if (object->internal == FALSE) {
2814 /*
2815 * we need to break up the following potential deadlock case...
2816 * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
2817 * b) The thread doing the writing is waiting for pages while holding the truncate lock
2818 * c) Most of the pages in the inactive queue belong to this file.
2819 *
2820 * we are potentially in this deadlock because...
2821 * a) the external pageout queue is throttled
2822 * b) we're done with the active queue and moved on to the inactive queue
2823 * c) we've got a dirty external page
6d2010ae 2824 *
39236c6e
A
2825 * since we don't know the reason for the external pageout queue being throttled we
2826 * must suspect that we are deadlocked, so move the current page onto the active queue
2827 * in an effort to cause a page from the active queue to 'age' to the inactive queue
2828 *
2829 * if we don't have jetsam configured (i.e. we have a dynamic pager), set
2830 * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
2831 * pool the next time we select a victim page... if we can make enough new free pages,
2832 * the deadlock will break, the external pageout queue will empty and it will no longer
2833 * be throttled
2834 *
2835 * if we have jestam configured, keep a count of the pages reactivated this way so
2836 * that we can try to find clean pages in the active/inactive queues before
2837 * deciding to jetsam a process
6d2010ae 2838 */
3e170ce0 2839 vm_pageout_scan_inactive_throttled_external++;
39236c6e 2840
3e170ce0 2841 vm_page_check_pageable_safe(m);
316670eb
A
2842 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2843 m->active = TRUE;
2844 vm_page_active_count++;
fe8ab488 2845 vm_page_pageable_external_count++;
316670eb
A
2846
2847 vm_pageout_adjust_io_throttles(iq, eq, FALSE);
2848
39236c6e 2849#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
6d2010ae
A
2850 vm_pageout_inactive_external_forced_reactivate_limit--;
2851
39236c6e 2852 if (vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
6d2010ae 2853 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
6d2010ae
A
2854 /*
2855 * Possible deadlock scenario so request jetsam action
2856 */
2857 assert(object);
2858 vm_object_unlock(object);
2859 object = VM_OBJECT_NULL;
2860 vm_page_unlock_queues();
39236c6e 2861
3e170ce0 2862 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
39236c6e 2863 vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
6d2010ae 2864
39236c6e
A
2865 /* Kill first suitable process */
2866 if (memorystatus_kill_on_VM_page_shortage(FALSE) == FALSE) {
6d2010ae
A
2867 panic("vm_pageout_scan: Jetsam request failed\n");
2868 }
39236c6e 2869
3e170ce0 2870 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END, 0, 0, 0, 0);
6d2010ae 2871
316670eb 2872 vm_pageout_inactive_external_forced_jetsam_count++;
6d2010ae
A
2873 vm_page_lock_queues();
2874 delayed_unlock = 1;
2d21ac55 2875 }
39236c6e
A
2876#else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
2877 force_anonymous = TRUE;
2878#endif
fe8ab488 2879 inactive_burst_count = 0;
6d2010ae
A
2880 goto done_with_inactivepage;
2881 } else {
39236c6e
A
2882 if (page_prev_state == PAGE_STATE_SPECULATIVE)
2883 page_prev_state = PAGE_STATE_INACTIVE;
2884
2885 vm_pageout_scan_inactive_throttled_internal++;
2886
3e170ce0 2887 goto must_activate_page;
1c79356b 2888 }
1c79356b 2889 }
2d21ac55 2890
1c79356b 2891 /*
91447636
A
2892 * we've got a page that we can steal...
2893 * eliminate all mappings and make sure
2894 * we have the up-to-date modified state
316670eb 2895 *
91447636
A
2896 * if we need to do a pmap_disconnect then we
2897 * need to re-evaluate m->dirty since the pmap_disconnect
2898 * provides the true state atomically... the
2899 * page was still mapped up to the pmap_disconnect
2900 * and may have been dirtied at the last microsecond
2901 *
2d21ac55
A
2902 * Note that if 'pmapped' is FALSE then the page is not
2903 * and has not been in any map, so there is no point calling
39236c6e
A
2904 * pmap_disconnect(). m->dirty could have been set in anticipation
2905 * of likely usage of the page.
91447636 2906 */
2d21ac55 2907 if (m->pmapped == TRUE) {
3e170ce0 2908 int pmap_options;
0b4e3aa0 2909
3e170ce0
A
2910 /*
2911 * Don't count this page as going into the compressor
2912 * if any of these are true:
2913 * 1) We have the dynamic pager i.e. no compressed pager
2914 * 2) Freezer enabled device with a freezer file to
2915 * hold the app data i.e. no compressed pager
2916 * 3) Freezer enabled device with compressed pager
2917 * backend (exclusive use) i.e. most of the VM system
2918 * (including vm_pageout_scan) has no knowledge of
2919 * the compressor
2920 * 4) This page belongs to a file and hence will not be
2921 * sent into the compressor
2922 */
2923 if (DEFAULT_PAGER_IS_ACTIVE ||
2924 DEFAULT_FREEZER_IS_ACTIVE ||
2925 DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS ||
2926 object->internal == FALSE) {
2927 pmap_options = 0;
2928 } else if (m->dirty || m->precious) {
fe8ab488 2929 /*
3e170ce0
A
2930 * VM knows that this page is dirty (or
2931 * precious) and needs to be compressed
2932 * rather than freed.
2933 * Tell the pmap layer to count this page
2934 * as "compressed".
fe8ab488 2935 */
3e170ce0 2936 pmap_options = PMAP_OPTIONS_COMPRESSOR;
39236c6e 2937 } else {
3e170ce0
A
2938 /*
2939 * VM does not know if the page needs to
2940 * be preserved but the pmap layer might tell
2941 * us if any mapping has "modified" it.
2942 * Let's the pmap layer to count this page
2943 * as compressed if and only if it has been
2944 * modified.
2945 */
2946 pmap_options =
2947 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
316670eb 2948 }
3e170ce0
A
2949 refmod_state = pmap_disconnect_options(m->phys_page,
2950 pmap_options,
2951 NULL);
39236c6e
A
2952 if (refmod_state & VM_MEM_MODIFIED) {
2953 SET_PAGE_DIRTY(m, FALSE);
91447636
A
2954 }
2955 }
2d21ac55
A
2956 /*
2957 * reset our count of pages that have been reclaimed
2958 * since the last page was 'stolen'
2959 */
2960 inactive_reclaim_run = 0;
2961
1c79356b
A
2962 /*
2963 * If it's clean and not precious, we can free the page.
2964 */
1c79356b 2965 if (!m->dirty && !m->precious) {
b0d623f7 2966
6d2010ae
A
2967 if (page_prev_state == PAGE_STATE_SPECULATIVE)
2968 vm_pageout_speculative_clean++;
2969 else {
316670eb
A
2970 if (page_prev_state == PAGE_STATE_ANONYMOUS)
2971 vm_pageout_inactive_anonymous++;
2972 else if (page_prev_state == PAGE_STATE_CLEAN)
2973 vm_pageout_cleaned_reclaimed++;
2974
6d2010ae
A
2975 vm_pageout_inactive_clean++;
2976 }
316670eb 2977
316670eb
A
2978 /*
2979 * OK, at this point we have found a page we are going to free.
2980 */
fe8ab488
A
2981#if CONFIG_PHANTOM_CACHE
2982 if (!object->internal)
2983 vm_phantom_cache_add_ghost(m);
2984#endif
1c79356b
A
2985 goto reclaim_page;
2986 }
2d21ac55
A
2987
2988 /*
2989 * The page may have been dirtied since the last check
2990 * for a throttled target queue (which may have been skipped
2991 * if the page was clean then). With the dirty page
2992 * disconnected here, we can make one final check.
2993 */
6d2010ae
A
2994 if (object->internal) {
2995 if (VM_PAGE_Q_THROTTLED(iq))
2996 inactive_throttled = TRUE;
2997 } else if (VM_PAGE_Q_THROTTLED(eq)) {
2998 inactive_throttled = TRUE;
2999 }
2d21ac55 3000
316670eb 3001 if (inactive_throttled == TRUE)
6d2010ae 3002 goto throttle_inactive;
39236c6e 3003
fe8ab488
A
3004#if VM_PRESSURE_EVENTS
3005#if CONFIG_JETSAM
3006
3007 /*
3008 * If Jetsam is enabled, then the sending
3009 * of memory pressure notifications is handled
3010 * from the same thread that takes care of high-water
3011 * and other jetsams i.e. the memorystatus_thread.
3012 */
3013
3014#else /* CONFIG_JETSAM */
3015
39236c6e 3016 vm_pressure_response();
fe8ab488
A
3017
3018#endif /* CONFIG_JETSAM */
39236c6e 3019#endif /* VM_PRESSURE_EVENTS */
316670eb 3020
316670eb
A
3021 if (page_prev_state == PAGE_STATE_ANONYMOUS)
3022 vm_pageout_inactive_anonymous++;
6d2010ae
A
3023 if (object->internal)
3024 vm_pageout_inactive_dirty_internal++;
3025 else
3026 vm_pageout_inactive_dirty_external++;
39236c6e 3027
3e170ce0
A
3028 /*
3029 * do NOT set the pageout bit!
3030 * sure, we might need free pages, but this page is going to take time to become free
3031 * anyway, so we may as well put it on the clean queue first and take it from there later
3032 * if necessary. that way, we'll ensure we don't free up too much. -mj
3033 */
3034 vm_pageout_cluster(m, FALSE, FALSE, FALSE);
1c79356b 3035
91447636 3036done_with_inactivepage:
39236c6e 3037
6d2010ae 3038 if (delayed_unlock++ > delayed_unlock_limit || try_failed == TRUE) {
fe8ab488 3039 boolean_t need_delay = TRUE;
1c79356b 3040
91447636 3041 if (object != NULL) {
b0d623f7 3042 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
91447636
A
3043 vm_object_unlock(object);
3044 object = NULL;
3045 }
fe8ab488
A
3046 vm_page_unlock_queues();
3047
91447636 3048 if (local_freeq) {
6d2010ae
A
3049
3050 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
3051 vm_page_free_count, local_freed, delayed_unlock_limit, 4);
316670eb
A
3052
3053 vm_page_free_list(local_freeq, TRUE);
91447636 3054
6d2010ae
A
3055 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
3056 vm_page_free_count, local_freed, 0, 4);
3057
2d21ac55 3058 local_freeq = NULL;
91447636 3059 local_freed = 0;
fe8ab488
A
3060 need_delay = FALSE;
3061 }
3e170ce0
A
3062 vm_consider_waking_compactor_swapper();
3063
fe8ab488
A
3064 vm_page_lock_queues();
3065
3066 if (need_delay == TRUE)
b0d623f7 3067 lck_mtx_yield(&vm_page_queue_lock);
2d21ac55
A
3068
3069 delayed_unlock = 1;
1c79356b 3070 }
316670eb 3071 vm_pageout_considered_page++;
39236c6e 3072
91447636
A
3073 /*
3074 * back to top of pageout scan loop
3075 */
1c79356b 3076 }
1c79356b
A
3077}
3078
1c79356b 3079
1c79356b
A
3080int vm_page_free_count_init;
3081
3082void
3083vm_page_free_reserve(
3084 int pages)
3085{
3086 int free_after_reserve;
3087
39236c6e 3088 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
1c79356b 3089
39236c6e
A
3090 if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT))
3091 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT;
3092 else
3093 vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT);
6d2010ae 3094
39236c6e
A
3095 } else {
3096 if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT)
3097 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT;
3098 else
3099 vm_page_free_reserved += pages;
3100 }
1c79356b
A
3101 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
3102
3103 vm_page_free_min = vm_page_free_reserved +
3104 VM_PAGE_FREE_MIN(free_after_reserve);
3105
2d21ac55
A
3106 if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
3107 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
3108
1c79356b
A
3109 vm_page_free_target = vm_page_free_reserved +
3110 VM_PAGE_FREE_TARGET(free_after_reserve);
3111
2d21ac55
A
3112 if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
3113 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
3114
1c79356b
A
3115 if (vm_page_free_target < vm_page_free_min + 5)
3116 vm_page_free_target = vm_page_free_min + 5;
2d21ac55 3117
3e170ce0 3118 vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 2);
1c79356b
A
3119}
3120
3121/*
3122 * vm_pageout is the high level pageout daemon.
3123 */
3124
55e303ae
A
3125void
3126vm_pageout_continue(void)
3127{
2d21ac55 3128 DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
55e303ae 3129 vm_pageout_scan_event_counter++;
316670eb 3130
4bd07ac2
A
3131 lck_mtx_lock(&vm_page_queue_free_lock);
3132 vm_pageout_running = TRUE;
3133 lck_mtx_unlock(&vm_page_queue_free_lock);
3134
55e303ae 3135 vm_pageout_scan();
316670eb
A
3136 /*
3137 * we hold both the vm_page_queue_free_lock
3138 * and the vm_page_queues_lock at this point
3139 */
55e303ae 3140 assert(vm_page_free_wanted == 0);
2d21ac55 3141 assert(vm_page_free_wanted_privileged == 0);
55e303ae 3142 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
316670eb 3143
4bd07ac2
A
3144 vm_pageout_running = FALSE;
3145 if (vm_pageout_waiter) {
3146 vm_pageout_waiter = FALSE;
3147 thread_wakeup((event_t)&vm_pageout_waiter);
3148 }
3149
b0d623f7 3150 lck_mtx_unlock(&vm_page_queue_free_lock);
316670eb 3151 vm_page_unlock_queues();
55e303ae
A
3152
3153 counter(c_vm_pageout_block++);
91447636 3154 thread_block((thread_continue_t)vm_pageout_continue);
55e303ae
A
3155 /*NOTREACHED*/
3156}
1c79356b 3157
4bd07ac2
A
3158kern_return_t
3159vm_pageout_wait(uint64_t deadline)
3160{
3161 kern_return_t kr;
3162
3163 lck_mtx_lock(&vm_page_queue_free_lock);
3164 for (kr = KERN_SUCCESS; vm_pageout_running && (KERN_SUCCESS == kr); ) {
3165 vm_pageout_waiter = TRUE;
3166 if (THREAD_AWAKENED != lck_mtx_sleep_deadline(
3167 &vm_page_queue_free_lock, LCK_SLEEP_DEFAULT,
3168 (event_t) &vm_pageout_waiter, THREAD_UNINT, deadline)) {
3169 kr = KERN_OPERATION_TIMED_OUT;
3170 }
3171 }
3172 lck_mtx_unlock(&vm_page_queue_free_lock);
3173
3174 return (kr);
3175}
3176
91447636 3177
91447636 3178#ifdef FAKE_DEADLOCK
1c79356b 3179
91447636
A
3180#define FAKE_COUNT 5000
3181
3182int internal_count = 0;
3183int fake_deadlock = 0;
3184
3185#endif
3186
3187static void
3188vm_pageout_iothread_continue(struct vm_pageout_queue *q)
3189{
3190 vm_page_t m = NULL;
3191 vm_object_t object;
316670eb 3192 vm_object_offset_t offset;
2d21ac55
A
3193 memory_object_t pager;
3194 thread_t self = current_thread();
91447636 3195
2d21ac55
A
3196 if ((vm_pageout_internal_iothread != THREAD_NULL)
3197 && (self == vm_pageout_external_iothread )
3198 && (self->options & TH_OPT_VMPRIV))
3199 self->options &= ~TH_OPT_VMPRIV;
3200
3201 vm_page_lockspin_queues();
91447636
A
3202
3203 while ( !queue_empty(&q->pgo_pending) ) {
3204
3205 q->pgo_busy = TRUE;
3206 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
39236c6e 3207 if (m->object->object_slid) {
6d2010ae
A
3208 panic("slid page %p not allowed on this path\n", m);
3209 }
b0d623f7 3210 VM_PAGE_CHECK(m);
91447636 3211 m->pageout_queue = FALSE;
91447636
A
3212 m->pageq.next = NULL;
3213 m->pageq.prev = NULL;
316670eb
A
3214
3215 /*
3216 * grab a snapshot of the object and offset this
3217 * page is tabled in so that we can relookup this
3218 * page after we've taken the object lock - these
3219 * fields are stable while we hold the page queues lock
3220 * but as soon as we drop it, there is nothing to keep
3221 * this page in this object... we hold an activity_in_progress
3222 * on this object which will keep it from terminating
3223 */
3224 object = m->object;
3225 offset = m->offset;
3226
b0d623f7
A
3227 vm_page_unlock_queues();
3228
91447636
A
3229#ifdef FAKE_DEADLOCK
3230 if (q == &vm_pageout_queue_internal) {
3231 vm_offset_t addr;
3232 int pg_count;
3233
3234 internal_count++;
3235
3236 if ((internal_count == FAKE_COUNT)) {
3237
3238 pg_count = vm_page_free_count + vm_page_free_reserved;
3239
3240 if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) {
3241 kmem_free(kernel_map, addr, PAGE_SIZE * pg_count);
3242 }
3243 internal_count = 0;
3244 fake_deadlock++;
3245 }
3246 }
3247#endif
2d21ac55
A
3248 vm_object_lock(object);
3249
316670eb
A
3250 m = vm_page_lookup(object, offset);
3251
3252 if (m == NULL ||
3253 m->busy || m->cleaning || m->pageout_queue || !m->laundry) {
3254 /*
3255 * it's either the same page that someone else has
3256 * started cleaning (or it's finished cleaning or
3257 * been put back on the pageout queue), or
3258 * the page has been freed or we have found a
3259 * new page at this offset... in all of these cases
3260 * we merely need to release the activity_in_progress
3261 * we took when we put the page on the pageout queue
3262 */
3263 vm_object_activity_end(object);
3264 vm_object_unlock(object);
3265
3266 vm_page_lockspin_queues();
3267 continue;
3268 }
91447636 3269 if (!object->pager_initialized) {
91447636
A
3270
3271 /*
3272 * If there is no memory object for the page, create
3273 * one and hand it to the default pager.
3274 */
3275
3276 if (!object->pager_initialized)
0c530ab8
A
3277 vm_object_collapse(object,
3278 (vm_object_offset_t) 0,
3279 TRUE);
91447636
A
3280 if (!object->pager_initialized)
3281 vm_object_pager_create(object);
3282 if (!object->pager_initialized) {
3283 /*
3284 * Still no pager for the object.
3285 * Reactivate the page.
3286 *
3287 * Should only happen if there is no
3288 * default pager.
3289 */
316670eb
A
3290 m->pageout = FALSE;
3291
39236c6e
A
3292 vm_page_lockspin_queues();
3293
3294 vm_pageout_throttle_up(m);
3295 vm_page_activate(m);
3296 vm_pageout_dirty_no_pager++;
3297
3298 vm_page_unlock_queues();
3299
3300 /*
3301 * And we are done with it.
3302 */
3303 vm_object_activity_end(object);
3304 vm_object_unlock(object);
3305
3306 vm_page_lockspin_queues();
3307 continue;
3308 }
3309 }
3310 pager = object->pager;
3311
3312 if (pager == MEMORY_OBJECT_NULL) {
3313 /*
3314 * This pager has been destroyed by either
3315 * memory_object_destroy or vm_object_destroy, and
3316 * so there is nowhere for the page to go.
3317 */
3318 if (m->pageout) {
3319 /*
3320 * Just free the page... VM_PAGE_FREE takes
3321 * care of cleaning up all the state...
3322 * including doing the vm_pageout_throttle_up
3323 */
3324 VM_PAGE_FREE(m);
3325 } else {
3326 vm_page_lockspin_queues();
3327
3328 vm_pageout_throttle_up(m);
3329 vm_page_activate(m);
3330
3331 vm_page_unlock_queues();
3332
3333 /*
3334 * And we are done with it.
3335 */
3336 }
3337 vm_object_activity_end(object);
3338 vm_object_unlock(object);
3339
3340 vm_page_lockspin_queues();
3341 continue;
3342 }
3343#if 0
3344 /*
3345 * we don't hold the page queue lock
3346 * so this check isn't safe to make
3347 */
3348 VM_PAGE_CHECK(m);
3349#endif
3350 /*
3351 * give back the activity_in_progress reference we
3352 * took when we queued up this page and replace it
3353 * it with a paging_in_progress reference that will
3354 * also hold the paging offset from changing and
3355 * prevent the object from terminating
3356 */
3357 vm_object_activity_end(object);
3358 vm_object_paging_begin(object);
3359 vm_object_unlock(object);
3360
3361 /*
3362 * Send the data to the pager.
3363 * any pageout clustering happens there
3364 */
3365 memory_object_data_return(pager,
3366 m->offset + object->paging_offset,
3367 PAGE_SIZE,
3368 NULL,
3369 NULL,
3370 FALSE,
3371 FALSE,
3372 0);
3373
3374 vm_object_lock(object);
3375 vm_object_paging_end(object);
3376 vm_object_unlock(object);
3377
3378 vm_pageout_io_throttle();
3379
3380 vm_page_lockspin_queues();
3381 }
3382 q->pgo_busy = FALSE;
3383 q->pgo_idle = TRUE;
3384
3385 assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
3386 vm_page_unlock_queues();
3387
3388 thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) q);
3389 /*NOTREACHED*/
3390}
3391
3392
3393static void
3394vm_pageout_iothread_external_continue(struct vm_pageout_queue *q)
3395{
3396 vm_page_t m = NULL;
3397 vm_object_t object;
3398 vm_object_offset_t offset;
3399 memory_object_t pager;
3400
3401
3402 if (vm_pageout_internal_iothread != THREAD_NULL)
3403 current_thread()->options &= ~TH_OPT_VMPRIV;
3404
3405 vm_page_lockspin_queues();
3406
3407 while ( !queue_empty(&q->pgo_pending) ) {
3408
3409 q->pgo_busy = TRUE;
3410 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
3411 if (m->object->object_slid) {
3412 panic("slid page %p not allowed on this path\n", m);
3413 }
3414 VM_PAGE_CHECK(m);
3415 m->pageout_queue = FALSE;
3416 m->pageq.next = NULL;
3417 m->pageq.prev = NULL;
3418
3419 /*
3420 * grab a snapshot of the object and offset this
3421 * page is tabled in so that we can relookup this
3422 * page after we've taken the object lock - these
3423 * fields are stable while we hold the page queues lock
3424 * but as soon as we drop it, there is nothing to keep
3425 * this page in this object... we hold an activity_in_progress
3426 * on this object which will keep it from terminating
3427 */
3428 object = m->object;
3429 offset = m->offset;
3430
3431 vm_page_unlock_queues();
3432
3433 vm_object_lock(object);
3434
3435 m = vm_page_lookup(object, offset);
3436
3437 if (m == NULL ||
3438 m->busy || m->cleaning || m->pageout_queue || !m->laundry) {
3439 /*
3440 * it's either the same page that someone else has
3441 * started cleaning (or it's finished cleaning or
3442 * been put back on the pageout queue), or
3443 * the page has been freed or we have found a
3444 * new page at this offset... in all of these cases
3445 * we merely need to release the activity_in_progress
3446 * we took when we put the page on the pageout queue
3447 */
3448 vm_object_activity_end(object);
3449 vm_object_unlock(object);
3450
3451 vm_page_lockspin_queues();
3452 continue;
3453 }
3454 pager = object->pager;
3455
3456 if (pager == MEMORY_OBJECT_NULL) {
3457 /*
3458 * This pager has been destroyed by either
3459 * memory_object_destroy or vm_object_destroy, and
3460 * so there is nowhere for the page to go.
3461 */
3462 if (m->pageout) {
3463 /*
3464 * Just free the page... VM_PAGE_FREE takes
3465 * care of cleaning up all the state...
3466 * including doing the vm_pageout_throttle_up
3467 */
3468 VM_PAGE_FREE(m);
3469 } else {
3470 vm_page_lockspin_queues();
3471
3472 vm_pageout_throttle_up(m);
3473 vm_page_activate(m);
3474
3475 vm_page_unlock_queues();
3476
3477 /*
3478 * And we are done with it.
3479 */
3480 }
3481 vm_object_activity_end(object);
3482 vm_object_unlock(object);
3483
3484 vm_page_lockspin_queues();
3485 continue;
3486 }
3487#if 0
3488 /*
3489 * we don't hold the page queue lock
3490 * so this check isn't safe to make
3491 */
3492 VM_PAGE_CHECK(m);
3493#endif
3494 /*
3495 * give back the activity_in_progress reference we
3496 * took when we queued up this page and replace it
3497 * it with a paging_in_progress reference that will
3498 * also hold the paging offset from changing and
3499 * prevent the object from terminating
3500 */
3501 vm_object_activity_end(object);
3502 vm_object_paging_begin(object);
3503 vm_object_unlock(object);
3504
3505 /*
3506 * Send the data to the pager.
3507 * any pageout clustering happens there
3508 */
3509 memory_object_data_return(pager,
3510 m->offset + object->paging_offset,
3511 PAGE_SIZE,
3512 NULL,
3513 NULL,
3514 FALSE,
3515 FALSE,
3516 0);
3517
3518 vm_object_lock(object);
3519 vm_object_paging_end(object);
3520 vm_object_unlock(object);
3521
3522 vm_pageout_io_throttle();
3523
3524 vm_page_lockspin_queues();
3525 }
3526 q->pgo_busy = FALSE;
3527 q->pgo_idle = TRUE;
3528
3529 assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
3530 vm_page_unlock_queues();
3531
3532 thread_block_parameter((thread_continue_t)vm_pageout_iothread_external_continue, (void *) q);
3533 /*NOTREACHED*/
3534}
3535
3536
3537uint32_t vm_compressor_failed;
3538
3e170ce0
A
3539#define MAX_FREE_BATCH 32
3540
39236c6e
A
3541static void
3542vm_pageout_iothread_internal_continue(struct cq *cq)
3543{
3544 struct vm_pageout_queue *q;
3545 vm_page_t m = NULL;
39236c6e
A
3546 boolean_t pgo_draining;
3547 vm_page_t local_q;
3548 int local_cnt;
3549 vm_page_t local_freeq = NULL;
3550 int local_freed = 0;
3551 int local_batch_size;
39236c6e
A
3552
3553
3554 KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0);
3555
3556 q = cq->q;
3e170ce0 3557 local_batch_size = q->pgo_maxlaundry / (vm_compressor_thread_count * 2);
39236c6e 3558
3e170ce0
A
3559#if RECORD_THE_COMPRESSED_DATA
3560 if (q->pgo_laundry)
3561 c_compressed_record_init();
3562#endif
39236c6e 3563 while (TRUE) {
3e170ce0 3564 int pages_left_on_q = 0;
39236c6e
A
3565
3566 local_cnt = 0;
3567 local_q = NULL;
3568
3569 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_START, 0, 0, 0, 0, 0);
3570
3571 vm_page_lock_queues();
3572
3573 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3574
3e170ce0 3575 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START, q->pgo_laundry, 0, 0, 0, 0);
39236c6e
A
3576
3577 while ( !queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) {
3578
3579 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
3580
3581 VM_PAGE_CHECK(m);
3582
3583 m->pageout_queue = FALSE;
3584 m->pageq.prev = NULL;
3585
3586 m->pageq.next = (queue_entry_t)local_q;
3587 local_q = m;
3588 local_cnt++;
3589 }
3590 if (local_q == NULL)
3591 break;
3592
3593 q->pgo_busy = TRUE;
3594
3e170ce0 3595 if ((pgo_draining = q->pgo_draining) == FALSE) {
39236c6e 3596 vm_pageout_throttle_up_batch(q, local_cnt);
3e170ce0
A
3597 pages_left_on_q = q->pgo_laundry;
3598 } else
3599 pages_left_on_q = q->pgo_laundry - local_cnt;
39236c6e
A
3600
3601 vm_page_unlock_queues();
3602
3e170ce0
A
3603#if !RECORD_THE_COMPRESSED_DATA
3604 if (pages_left_on_q >= local_batch_size && cq->id < (vm_compressor_thread_count - 1))
3605 thread_wakeup((event_t) ((uintptr_t)&q->pgo_pending + cq->id + 1));
3606#endif
3607 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, q->pgo_laundry, 0, 0, 0, 0);
39236c6e
A
3608
3609 while (local_q) {
3e170ce0
A
3610
3611 KERNEL_DEBUG(0xe0400024 | DBG_FUNC_START, local_cnt, 0, 0, 0, 0);
3612
39236c6e
A
3613 m = local_q;
3614 local_q = (vm_page_t)m->pageq.next;
3615 m->pageq.next = NULL;
3616
3e170ce0 3617 if (vm_pageout_compress_page(&cq->current_chead, cq->scratch_buf, m, FALSE) == KERN_SUCCESS) {
39236c6e 3618
3e170ce0
A
3619 m->pageq.next = (queue_entry_t)local_freeq;
3620 local_freeq = m;
3621 local_freed++;
39236c6e 3622
3e170ce0 3623 if (local_freed >= MAX_FREE_BATCH) {
39236c6e 3624
3e170ce0
A
3625 vm_page_free_list(local_freeq, TRUE);
3626 local_freeq = NULL;
3627 local_freed = 0;
39236c6e 3628 }
39236c6e 3629 }
3e170ce0
A
3630#if !CONFIG_JETSAM
3631 while (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
39236c6e
A
3632 kern_return_t wait_result;
3633 int need_wakeup = 0;
3634
3635 if (local_freeq) {
3636 vm_page_free_list(local_freeq, TRUE);
3637
3638 local_freeq = NULL;
3639 local_freed = 0;
b0d623f7 3640
39236c6e
A
3641 continue;
3642 }
3643 lck_mtx_lock_spin(&vm_page_queue_free_lock);
b0d623f7 3644
3e170ce0
A
3645 if (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3646
39236c6e
A
3647 if (vm_page_free_wanted_privileged++ == 0)
3648 need_wakeup = 1;
3649 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT);
91447636 3650
39236c6e 3651 lck_mtx_unlock(&vm_page_queue_free_lock);
91447636 3652
39236c6e
A
3653 if (need_wakeup)
3654 thread_wakeup((event_t)&vm_page_free_wanted);
316670eb 3655
39236c6e 3656 if (wait_result == THREAD_WAITING)
3e170ce0 3657
39236c6e
A
3658 thread_block(THREAD_CONTINUE_NULL);
3659 } else
3660 lck_mtx_unlock(&vm_page_queue_free_lock);
3661 }
3e170ce0 3662#endif
39236c6e
A
3663 }
3664 if (local_freeq) {
3665 vm_page_free_list(local_freeq, TRUE);
3666
3667 local_freeq = NULL;
3668 local_freed = 0;
3669 }
3670 if (pgo_draining == TRUE) {
3671 vm_page_lockspin_queues();
3672 vm_pageout_throttle_up_batch(q, local_cnt);
3673 vm_page_unlock_queues();
3674 }
0b4c1975 3675 }
39236c6e
A
3676 KERNEL_DEBUG(0xe040000c | DBG_FUNC_START, 0, 0, 0, 0, 0);
3677
3678 /*
3679 * queue lock is held and our q is empty
3680 */
91447636
A
3681 q->pgo_busy = FALSE;
3682 q->pgo_idle = TRUE;
316670eb 3683
3e170ce0 3684 assert_wait((event_t) ((uintptr_t)&q->pgo_pending + cq->id), THREAD_UNINT);
91447636
A
3685 vm_page_unlock_queues();
3686
39236c6e
A
3687 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3688
3689 thread_block_parameter((thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq);
91447636
A
3690 /*NOTREACHED*/
3691}
3692
3693
316670eb 3694
3e170ce0
A
3695static void
3696vm_pageout_immediate(vm_page_t m, boolean_t object_locked_by_caller)
3697{
3698 assert(vm_pageout_immediate_scratch_buf);
3699
3700 if (vm_pageout_compress_page(&vm_pageout_immediate_chead, vm_pageout_immediate_scratch_buf, m, object_locked_by_caller) == KERN_SUCCESS) {
3701
3702 vm_page_free_prepare_object(m, TRUE);
3703 vm_page_release(m);
3704 }
3705}
3706
3707
3708kern_return_t
3709vm_pageout_compress_page(void **current_chead, char *scratch_buf, vm_page_t m, boolean_t object_locked_by_caller)
3710{
3711 vm_object_t object;
3712 memory_object_t pager;
3713 int compressed_count_delta;
3714 kern_return_t retval;
3715
3716 if (m->object->object_slid) {
3717 panic("slid page %p not allowed on this path\n", m);
3718 }
3719
3720 object = m->object;
3721 pager = object->pager;
3722
3723 if (object_locked_by_caller == FALSE && (!object->pager_initialized || pager == MEMORY_OBJECT_NULL)) {
3724
3725 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START, object, pager, 0, 0, 0);
3726
3727 vm_object_lock(object);
3728
3729 /*
3730 * If there is no memory object for the page, create
3731 * one and hand it to the compression pager.
3732 */
3733
3734 if (!object->pager_initialized)
3735 vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
3736 if (!object->pager_initialized)
3737 vm_object_compressor_pager_create(object);
3738
3739 if (!object->pager_initialized) {
3740 /*
3741 * Still no pager for the object.
3742 * Reactivate the page.
3743 *
3744 * Should only happen if there is no
3745 * compression pager
3746 */
3747 m->pageout = FALSE;
3748 m->laundry = FALSE;
3749 PAGE_WAKEUP_DONE(m);
3750
3751 vm_page_lockspin_queues();
3752 vm_page_activate(m);
3753 vm_pageout_dirty_no_pager++;
3754 vm_page_unlock_queues();
3755
3756 /*
3757 * And we are done with it.
3758 */
3759 vm_object_activity_end(object);
3760 vm_object_unlock(object);
3761
3762 return KERN_FAILURE;
3763 }
3764 pager = object->pager;
3765
3766 if (pager == MEMORY_OBJECT_NULL) {
3767 /*
3768 * This pager has been destroyed by either
3769 * memory_object_destroy or vm_object_destroy, and
3770 * so there is nowhere for the page to go.
3771 */
3772 if (m->pageout) {
3773 /*
3774 * Just free the page... VM_PAGE_FREE takes
3775 * care of cleaning up all the state...
3776 * including doing the vm_pageout_throttle_up
3777 */
3778 VM_PAGE_FREE(m);
3779 } else {
3780 m->laundry = FALSE;
3781 PAGE_WAKEUP_DONE(m);
3782
3783 vm_page_lockspin_queues();
3784 vm_page_activate(m);
3785 vm_page_unlock_queues();
3786
3787 /*
3788 * And we are done with it.
3789 */
3790 }
3791 vm_object_activity_end(object);
3792 vm_object_unlock(object);
3793
3794 return KERN_FAILURE;
3795 }
3796 vm_object_unlock(object);
3797
3798 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END, object, pager, 0, 0, 0);
3799 }
3800 assert(object->pager_initialized && pager != MEMORY_OBJECT_NULL);
3801
3802 if (object_locked_by_caller == FALSE)
3803 assert(object->activity_in_progress > 0);
3804
3805 retval = vm_compressor_pager_put(
3806 pager,
3807 m->offset + object->paging_offset,
3808 m->phys_page,
3809 current_chead,
3810 scratch_buf,
3811 &compressed_count_delta);
3812
3813 if (object_locked_by_caller == FALSE) {
3814 vm_object_lock(object);
3815
3816 assert(object->activity_in_progress > 0);
3817 assert(m->object == object);
3818 }
3819
3820 vm_compressor_pager_count(pager,
3821 compressed_count_delta,
3822 FALSE, /* shared_lock */
3823 object);
3824
3825 m->laundry = FALSE;
3826 m->pageout = FALSE;
3827
3828 if (retval == KERN_SUCCESS) {
3829 /*
3830 * If the object is purgeable, its owner's
3831 * purgeable ledgers will be updated in
3832 * vm_page_remove() but the page still
3833 * contributes to the owner's memory footprint,
3834 * so account for it as such.
3835 */
3836 if (object->purgable != VM_PURGABLE_DENY &&
3837 object->vo_purgeable_owner != NULL) {
3838 /* one more compressed purgeable page */
3839 vm_purgeable_compressed_update(object,
3840 +1);
3841 }
3842 VM_STAT_INCR(compressions);
3843
3844 if (m->tabled)
3845 vm_page_remove(m, TRUE);
3846
3847 } else {
3848 PAGE_WAKEUP_DONE(m);
3849
3850 vm_page_lockspin_queues();
3851
3852 vm_page_activate(m);
3853 vm_compressor_failed++;
3854
3855 vm_page_unlock_queues();
3856 }
3857 if (object_locked_by_caller == FALSE) {
3858 vm_object_activity_end(object);
3859 vm_object_unlock(object);
3860 }
3861 return retval;
3862}
3863
3864
316670eb
A
3865static void
3866vm_pageout_adjust_io_throttles(struct vm_pageout_queue *iq, struct vm_pageout_queue *eq, boolean_t req_lowpriority)
3867{
3868 uint32_t policy;
3869 boolean_t set_iq = FALSE;
3870 boolean_t set_eq = FALSE;
3871
3872 if (hibernate_cleaning_in_progress == TRUE)
3873 req_lowpriority = FALSE;
3874
39236c6e 3875 if ((DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) && iq->pgo_inited == TRUE && iq->pgo_lowpriority != req_lowpriority)
316670eb
A
3876 set_iq = TRUE;
3877
3878 if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority)
3879 set_eq = TRUE;
3880
3881 if (set_iq == TRUE || set_eq == TRUE) {
3882
3883 vm_page_unlock_queues();
3884
3885 if (req_lowpriority == TRUE) {
39236c6e 3886 policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED;
316670eb
A
3887 DTRACE_VM(laundrythrottle);
3888 } else {
39236c6e 3889 policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED;
316670eb
A
3890 DTRACE_VM(laundryunthrottle);
3891 }
3892 if (set_iq == TRUE) {
39236c6e
A
3893 proc_set_task_policy_thread(kernel_task, iq->pgo_tid, TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
3894
316670eb
A
3895 iq->pgo_lowpriority = req_lowpriority;
3896 }
3897 if (set_eq == TRUE) {
39236c6e
A
3898 proc_set_task_policy_thread(kernel_task, eq->pgo_tid, TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
3899
316670eb
A
3900 eq->pgo_lowpriority = req_lowpriority;
3901 }
3902 vm_page_lock_queues();
3903 }
3904}
3905
3906
91447636
A
3907static void
3908vm_pageout_iothread_external(void)
3909{
2d21ac55
A
3910 thread_t self = current_thread();
3911
3912 self->options |= TH_OPT_VMPRIV;
91447636 3913
316670eb 3914 DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
39236c6e
A
3915
3916 proc_set_task_policy_thread(kernel_task, self->thread_id, TASK_POLICY_EXTERNAL,
3917 TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
316670eb
A
3918
3919 vm_page_lock_queues();
3920
3921 vm_pageout_queue_external.pgo_tid = self->thread_id;
3922 vm_pageout_queue_external.pgo_lowpriority = TRUE;
3923 vm_pageout_queue_external.pgo_inited = TRUE;
3924
3925 vm_page_unlock_queues();
3926
39236c6e
A
3927 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
3928 vm_pageout_iothread_external_continue(&vm_pageout_queue_external);
3929 else
3930 vm_pageout_iothread_continue(&vm_pageout_queue_external);
316670eb 3931
91447636
A
3932 /*NOTREACHED*/
3933}
3934
39236c6e 3935
91447636 3936static void
39236c6e 3937vm_pageout_iothread_internal(struct cq *cq)
91447636
A
3938{
3939 thread_t self = current_thread();
3940
3941 self->options |= TH_OPT_VMPRIV;
3942
39236c6e
A
3943 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) {
3944 DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
316670eb 3945
39236c6e
A
3946 proc_set_task_policy_thread(kernel_task, self->thread_id, TASK_POLICY_EXTERNAL,
3947 TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
3948 }
316670eb
A
3949 vm_page_lock_queues();
3950
3951 vm_pageout_queue_internal.pgo_tid = self->thread_id;
3952 vm_pageout_queue_internal.pgo_lowpriority = TRUE;
3953 vm_pageout_queue_internal.pgo_inited = TRUE;
3954
3955 vm_page_unlock_queues();
3956
39236c6e 3957 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
3e170ce0
A
3958
3959 if (vm_restricted_to_single_processor == TRUE)
3960 thread_vm_bind_group_add();
39236c6e
A
3961
3962 vm_pageout_iothread_internal_continue(cq);
3963 } else
3964 vm_pageout_iothread_continue(&vm_pageout_queue_internal);
316670eb 3965
91447636
A
3966 /*NOTREACHED*/
3967}
3968
b0d623f7 3969kern_return_t
0b4c1975 3970vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
b0d623f7
A
3971{
3972 if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) {
3973 return KERN_SUCCESS;
3974 } else {
3975 return KERN_FAILURE; /* Already set */
3976 }
3977}
3978
39236c6e
A
3979extern boolean_t memorystatus_manual_testing_on;
3980extern unsigned int memorystatus_level;
3981
3982
39236c6e
A
3983#if VM_PRESSURE_EVENTS
3984
fe8ab488
A
3985boolean_t vm_pressure_events_enabled = FALSE;
3986
39236c6e
A
3987void
3988vm_pressure_response(void)
3989{
3990
39236c6e
A
3991 vm_pressure_level_t old_level = kVMPressureNormal;
3992 int new_level = -1;
3993
fe8ab488
A
3994 uint64_t available_memory = 0;
3995
3996 if (vm_pressure_events_enabled == FALSE)
3997 return;
3998
3999
4000 available_memory = (((uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY) * 100);
4001
39236c6e
A
4002
4003 memorystatus_level = (unsigned int) (available_memory / atop_64(max_mem));
4004
4005 if (memorystatus_manual_testing_on) {
4006 return;
4007 }
4008
4009 old_level = memorystatus_vm_pressure_level;
4010
4011 switch (memorystatus_vm_pressure_level) {
4012
4013 case kVMPressureNormal:
4014 {
4015 if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4016 new_level = kVMPressureCritical;
4017 } else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
4018 new_level = kVMPressureWarning;
4019 }
4020 break;
4021 }
4022
4023 case kVMPressureWarning:
4024 case kVMPressureUrgent:
4025 {
4026 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4027 new_level = kVMPressureNormal;
4028 } else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4029 new_level = kVMPressureCritical;
4030 }
4031 break;
4032 }
4033
4034 case kVMPressureCritical:
4035 {
4036 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4037 new_level = kVMPressureNormal;
4038 } else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
4039 new_level = kVMPressureWarning;
4040 }
4041 break;
4042 }
4043
4044 default:
4045 return;
4046 }
4047
4048 if (new_level != -1) {
4049 memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level;
4050
fe8ab488 4051 if ((memorystatus_vm_pressure_level != kVMPressureNormal) || (old_level != new_level)) {
39236c6e
A
4052 if (vm_pressure_thread_running == FALSE) {
4053 thread_wakeup(&vm_pressure_thread);
4054 }
fe8ab488
A
4055
4056 if (old_level != new_level) {
4057 thread_wakeup(&vm_pressure_changed);
4058 }
39236c6e
A
4059 }
4060 }
4061
4062}
4063#endif /* VM_PRESSURE_EVENTS */
4064
4065kern_return_t
4066mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) {
4067
4068#if !VM_PRESSURE_EVENTS
fe8ab488 4069
39236c6e
A
4070 return KERN_FAILURE;
4071
4072#else /* VM_PRESSURE_EVENTS */
4073
4074 kern_return_t kr = KERN_SUCCESS;
4075
4076 if (pressure_level != NULL) {
4077
4078 vm_pressure_level_t old_level = memorystatus_vm_pressure_level;
4079
4080 if (wait_for_pressure == TRUE) {
4081 wait_result_t wr = 0;
4082
4083 while (old_level == *pressure_level) {
4084 wr = assert_wait((event_t) &vm_pressure_changed,
4085 THREAD_INTERRUPTIBLE);
4086 if (wr == THREAD_WAITING) {
4087 wr = thread_block(THREAD_CONTINUE_NULL);
4088 }
4089 if (wr == THREAD_INTERRUPTED) {
4090 return KERN_ABORTED;
4091 }
4092 if (wr == THREAD_AWAKENED) {
4093
4094 old_level = memorystatus_vm_pressure_level;
4095
4096 if (old_level != *pressure_level) {
4097 break;
4098 }
4099 }
4100 }
4101 }
4102
4103 *pressure_level = old_level;
4104 kr = KERN_SUCCESS;
4105 } else {
4106 kr = KERN_INVALID_ARGUMENT;
4107 }
4108
4109 return kr;
4110#endif /* VM_PRESSURE_EVENTS */
4111}
4112
4113#if VM_PRESSURE_EVENTS
4114void
316670eb 4115vm_pressure_thread(void) {
fe8ab488 4116 static boolean_t thread_initialized = FALSE;
316670eb 4117
fe8ab488 4118 if (thread_initialized == TRUE) {
39236c6e 4119 vm_pressure_thread_running = TRUE;
316670eb 4120 consider_vm_pressure_events();
39236c6e 4121 vm_pressure_thread_running = FALSE;
316670eb
A
4122 }
4123
fe8ab488 4124 thread_initialized = TRUE;
316670eb
A
4125 assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT);
4126 thread_block((thread_continue_t)vm_pressure_thread);
4127}
39236c6e
A
4128#endif /* VM_PRESSURE_EVENTS */
4129
316670eb
A
4130
4131uint32_t vm_pageout_considered_page_last = 0;
4132
4133/*
4134 * called once per-second via "compute_averages"
4135 */
4136void
4137compute_pageout_gc_throttle()
4138{
4139 if (vm_pageout_considered_page != vm_pageout_considered_page_last) {
4140
4141 vm_pageout_considered_page_last = vm_pageout_considered_page;
4142
4143 thread_wakeup((event_t) &vm_pageout_garbage_collect);
4144 }
4145}
4146
4147
91447636
A
4148static void
4149vm_pageout_garbage_collect(int collect)
4150{
316670eb 4151
91447636 4152 if (collect) {
b0d623f7 4153 boolean_t buf_large_zfree = FALSE;
316670eb
A
4154 boolean_t first_try = TRUE;
4155
91447636
A
4156 stack_collect();
4157
91447636 4158 consider_machine_collect();
fe8ab488 4159 m_drain();
316670eb
A
4160
4161 do {
4162 if (consider_buffer_cache_collect != NULL) {
4163 buf_large_zfree = (*consider_buffer_cache_collect)(0);
4164 }
4165 if (first_try == TRUE || buf_large_zfree == TRUE) {
4166 /*
4167 * consider_zone_gc should be last, because the other operations
4168 * might return memory to zones.
4169 */
4170 consider_zone_gc(buf_large_zfree);
4171 }
4172 first_try = FALSE;
4173
4174 } while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
91447636
A
4175
4176 consider_machine_adjust();
4177 }
91447636
A
4178 assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
4179
4180 thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
4181 /*NOTREACHED*/
4182}
4183
4184
fe8ab488
A
4185void vm_pageout_reinit_tuneables(void);
4186
4187void
4188vm_pageout_reinit_tuneables(void)
4189{
3e170ce0 4190
fe8ab488
A
4191 vm_compressor_minorcompact_threshold_divisor = 18;
4192 vm_compressor_majorcompact_threshold_divisor = 22;
4193 vm_compressor_unthrottle_threshold_divisor = 32;
4194}
4195
4196
15129b1c
A
4197#if VM_PAGE_BUCKETS_CHECK
4198#if VM_PAGE_FAKE_BUCKETS
4199extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
4200#endif /* VM_PAGE_FAKE_BUCKETS */
4201#endif /* VM_PAGE_BUCKETS_CHECK */
91447636 4202
fe8ab488
A
4203#define FBDP_TEST_COLLAPSE_COMPRESSOR 0
4204#if FBDP_TEST_COLLAPSE_COMPRESSOR
4205extern boolean_t vm_object_collapse_compressor_allowed;
4206#include <IOKit/IOLib.h>
4207#endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */
4208
4209#define FBDP_TEST_WIRE_AND_EXTRACT 0
4210#if FBDP_TEST_WIRE_AND_EXTRACT
4211extern ledger_template_t task_ledger_template;
4212#include <mach/mach_vm.h>
4213extern ppnum_t vm_map_get_phys_page(vm_map_t map,
4214 vm_offset_t offset);
4215#endif /* FBDP_TEST_WIRE_AND_EXTRACT */
4216
3e170ce0
A
4217
4218void
4219vm_set_restrictions()
4220{
4221 host_basic_info_data_t hinfo;
4222 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4223
4224#define BSD_HOST 1
4225 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4226
4227 assert(hinfo.max_cpus > 0);
4228
4229 if (hinfo.max_cpus <= 3) {
4230 /*
4231 * on systems with a limited number of CPUS, bind the
4232 * 4 major threads that can free memory and that tend to use
4233 * a fair bit of CPU under pressured conditions to a single processor.
4234 * This insures that these threads don't hog all of the available CPUs
4235 * (important for camera launch), while allowing them to run independently
4236 * w/r to locks... the 4 threads are
4237 * vm_pageout_scan, vm_pageout_iothread_internal (compressor),
4238 * vm_compressor_swap_trigger_thread (minor and major compactions),
4239 * memorystatus_thread (jetsams).
4240 *
4241 * the first time the thread is run, it is responsible for checking the
4242 * state of vm_restricted_to_single_processor, and if TRUE it calls
4243 * thread_bind_master... someday this should be replaced with a group
4244 * scheduling mechanism and KPI.
4245 */
4246 vm_restricted_to_single_processor = TRUE;
4247 }
4248}
4249
4250
91447636
A
4251void
4252vm_pageout(void)
4253{
4254 thread_t self = current_thread();
4255 thread_t thread;
4256 kern_return_t result;
4257 spl_t s;
4258
4259 /*
4260 * Set thread privileges.
4261 */
4262 s = splsched();
3e170ce0 4263
91447636 4264 thread_lock(self);
3e170ce0
A
4265 self->options |= TH_OPT_VMPRIV;
4266 sched_set_thread_base_priority(self, BASEPRI_PREEMPT - 1);
91447636 4267 thread_unlock(self);
2d21ac55
A
4268
4269 if (!self->reserved_stack)
4270 self->reserved_stack = self->kernel_stack;
4271
3e170ce0
A
4272 if (vm_restricted_to_single_processor == TRUE)
4273 thread_vm_bind_group_add();
4274
91447636
A
4275 splx(s);
4276
4277 /*
4278 * Initialize some paging parameters.
4279 */
4280
39236c6e
A
4281 if (vm_pageout_swap_wait == 0)
4282 vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT;
4283
91447636
A
4284 if (vm_pageout_idle_wait == 0)
4285 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
4286
4287 if (vm_pageout_burst_wait == 0)
4288 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
4289
4290 if (vm_pageout_empty_wait == 0)
4291 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
4292
4293 if (vm_pageout_deadlock_wait == 0)
4294 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
4295
4296 if (vm_pageout_deadlock_relief == 0)
4297 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
4298
4299 if (vm_pageout_inactive_relief == 0)
4300 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
4301
4302 if (vm_pageout_burst_active_throttle == 0)
4303 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
4304
4305 if (vm_pageout_burst_inactive_throttle == 0)
4306 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
4307
4308 /*
4309 * Set kernel task to low backing store privileged
55e303ae
A
4310 * status
4311 */
4312 task_lock(kernel_task);
4313 kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
4314 task_unlock(kernel_task);
4315
1c79356b 4316 vm_page_free_count_init = vm_page_free_count;
2d21ac55 4317
1c79356b
A
4318 /*
4319 * even if we've already called vm_page_free_reserve
4320 * call it again here to insure that the targets are
4321 * accurately calculated (it uses vm_page_free_count_init)
4322 * calling it with an arg of 0 will not change the reserve
4323 * but will re-calculate free_min and free_target
4324 */
91447636
A
4325 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
4326 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
55e303ae 4327 } else
1c79356b
A
4328 vm_page_free_reserve(0);
4329
55e303ae 4330
91447636
A
4331 queue_init(&vm_pageout_queue_external.pgo_pending);
4332 vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
4333 vm_pageout_queue_external.pgo_laundry = 0;
4334 vm_pageout_queue_external.pgo_idle = FALSE;
4335 vm_pageout_queue_external.pgo_busy = FALSE;
4336 vm_pageout_queue_external.pgo_throttled = FALSE;
0b4c1975 4337 vm_pageout_queue_external.pgo_draining = FALSE;
316670eb
A
4338 vm_pageout_queue_external.pgo_lowpriority = FALSE;
4339 vm_pageout_queue_external.pgo_tid = -1;
4340 vm_pageout_queue_external.pgo_inited = FALSE;
4341
91447636 4342 queue_init(&vm_pageout_queue_internal.pgo_pending);
2d21ac55 4343 vm_pageout_queue_internal.pgo_maxlaundry = 0;
91447636
A
4344 vm_pageout_queue_internal.pgo_laundry = 0;
4345 vm_pageout_queue_internal.pgo_idle = FALSE;
4346 vm_pageout_queue_internal.pgo_busy = FALSE;
4347 vm_pageout_queue_internal.pgo_throttled = FALSE;
0b4c1975 4348 vm_pageout_queue_internal.pgo_draining = FALSE;
316670eb
A
4349 vm_pageout_queue_internal.pgo_lowpriority = FALSE;
4350 vm_pageout_queue_internal.pgo_tid = -1;
4351 vm_pageout_queue_internal.pgo_inited = FALSE;
55e303ae 4352
2d21ac55
A
4353 /* internal pageout thread started when default pager registered first time */
4354 /* external pageout and garbage collection threads started here */
55e303ae 4355
2d21ac55
A
4356 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
4357 BASEPRI_PREEMPT - 1,
4358 &vm_pageout_external_iothread);
91447636
A
4359 if (result != KERN_SUCCESS)
4360 panic("vm_pageout_iothread_external: create failed");
55e303ae 4361
2d21ac55 4362 thread_deallocate(vm_pageout_external_iothread);
9bccf70c 4363
2d21ac55 4364 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
316670eb 4365 BASEPRI_DEFAULT,
2d21ac55 4366 &thread);
91447636
A
4367 if (result != KERN_SUCCESS)
4368 panic("vm_pageout_garbage_collect: create failed");
55e303ae 4369
91447636 4370 thread_deallocate(thread);
55e303ae 4371
39236c6e 4372#if VM_PRESSURE_EVENTS
316670eb
A
4373 result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL,
4374 BASEPRI_DEFAULT,
4375 &thread);
4376
4377 if (result != KERN_SUCCESS)
4378 panic("vm_pressure_thread: create failed");
4379
4380 thread_deallocate(thread);
39236c6e 4381#endif
316670eb 4382
8f6c56a5 4383 vm_object_reaper_init();
39236c6e
A
4384
4385 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
4386 vm_compressor_pager_init();
2d21ac55 4387
fe8ab488
A
4388#if VM_PRESSURE_EVENTS
4389 vm_pressure_events_enabled = TRUE;
4390#endif /* VM_PRESSURE_EVENTS */
4391
4392#if CONFIG_PHANTOM_CACHE
4393 vm_phantom_cache_init();
4394#endif
15129b1c
A
4395#if VM_PAGE_BUCKETS_CHECK
4396#if VM_PAGE_FAKE_BUCKETS
4397 printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
fe8ab488
A
4398 (uint64_t) vm_page_fake_buckets_start,
4399 (uint64_t) vm_page_fake_buckets_end);
15129b1c
A
4400 pmap_protect(kernel_pmap,
4401 vm_page_fake_buckets_start,
4402 vm_page_fake_buckets_end,
4403 VM_PROT_READ);
4404// *(char *) vm_page_fake_buckets_start = 'x'; /* panic! */
4405#endif /* VM_PAGE_FAKE_BUCKETS */
4406#endif /* VM_PAGE_BUCKETS_CHECK */
4407
fe8ab488
A
4408#if VM_OBJECT_TRACKING
4409 vm_object_tracking_init();
4410#endif /* VM_OBJECT_TRACKING */
4411
4412
4413#if FBDP_TEST_COLLAPSE_COMPRESSOR
4414 vm_object_size_t backing_size, top_size;
4415 vm_object_t backing_object, top_object;
4416 vm_map_offset_t backing_offset, top_offset;
4417 unsigned char *backing_address, *top_address;
4418 kern_return_t kr;
4419
4420 printf("FBDP_TEST_COLLAPSE_COMPRESSOR:\n");
4421
4422 /* create backing object */
4423 backing_size = 15 * PAGE_SIZE;
4424 backing_object = vm_object_allocate(backing_size);
4425 assert(backing_object != VM_OBJECT_NULL);
4426 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n",
4427 backing_object);
4428 /* map backing object */
4429 backing_offset = 0;
4430 kr = vm_map_enter(kernel_map, &backing_offset, backing_size, 0,
4431 VM_FLAGS_ANYWHERE, backing_object, 0, FALSE,
4432 VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
4433 assert(kr == KERN_SUCCESS);
4434 backing_address = (unsigned char *) backing_offset;
4435 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4436 "mapped backing object %p at 0x%llx\n",
4437 backing_object, (uint64_t) backing_offset);
4438 /* populate with pages to be compressed in backing object */
4439 backing_address[0x1*PAGE_SIZE] = 0xB1;
4440 backing_address[0x4*PAGE_SIZE] = 0xB4;
4441 backing_address[0x7*PAGE_SIZE] = 0xB7;
4442 backing_address[0xa*PAGE_SIZE] = 0xBA;
4443 backing_address[0xd*PAGE_SIZE] = 0xBD;
4444 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4445 "populated pages to be compressed in "
4446 "backing_object %p\n", backing_object);
4447 /* compress backing object */
4448 vm_object_pageout(backing_object);
4449 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n",
4450 backing_object);
4451 /* wait for all the pages to be gone */
4452 while (*(volatile int *)&backing_object->resident_page_count != 0)
4453 IODelay(10);
4454 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n",
4455 backing_object);
4456 /* populate with pages to be resident in backing object */
4457 backing_address[0x0*PAGE_SIZE] = 0xB0;
4458 backing_address[0x3*PAGE_SIZE] = 0xB3;
4459 backing_address[0x6*PAGE_SIZE] = 0xB6;
4460 backing_address[0x9*PAGE_SIZE] = 0xB9;
4461 backing_address[0xc*PAGE_SIZE] = 0xBC;
4462 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4463 "populated pages to be resident in "
4464 "backing_object %p\n", backing_object);
4465 /* leave the other pages absent */
4466 /* mess with the paging_offset of the backing_object */
4467 assert(backing_object->paging_offset == 0);
4468 backing_object->paging_offset = 0x3000;
4469
4470 /* create top object */
4471 top_size = 9 * PAGE_SIZE;
4472 top_object = vm_object_allocate(top_size);
4473 assert(top_object != VM_OBJECT_NULL);
4474 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created top object %p\n",
4475 top_object);
4476 /* map top object */
4477 top_offset = 0;
4478 kr = vm_map_enter(kernel_map, &top_offset, top_size, 0,
4479 VM_FLAGS_ANYWHERE, top_object, 0, FALSE,
4480 VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
4481 assert(kr == KERN_SUCCESS);
4482 top_address = (unsigned char *) top_offset;
4483 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4484 "mapped top object %p at 0x%llx\n",
4485 top_object, (uint64_t) top_offset);
4486 /* populate with pages to be compressed in top object */
4487 top_address[0x3*PAGE_SIZE] = 0xA3;
4488 top_address[0x4*PAGE_SIZE] = 0xA4;
4489 top_address[0x5*PAGE_SIZE] = 0xA5;
4490 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4491 "populated pages to be compressed in "
4492 "top_object %p\n", top_object);
4493 /* compress top object */
4494 vm_object_pageout(top_object);
4495 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n",
4496 top_object);
4497 /* wait for all the pages to be gone */
4498 while (top_object->resident_page_count != 0);
4499 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n",
4500 top_object);
4501 /* populate with pages to be resident in top object */
4502 top_address[0x0*PAGE_SIZE] = 0xA0;
4503 top_address[0x1*PAGE_SIZE] = 0xA1;
4504 top_address[0x2*PAGE_SIZE] = 0xA2;
4505 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4506 "populated pages to be resident in "
4507 "top_object %p\n", top_object);
4508 /* leave the other pages absent */
4509
4510 /* link the 2 objects */
4511 vm_object_reference(backing_object);
4512 top_object->shadow = backing_object;
4513 top_object->vo_shadow_offset = 0x3000;
4514 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n",
4515 top_object, backing_object);
4516
4517 /* unmap backing object */
4518 vm_map_remove(kernel_map,
4519 backing_offset,
4520 backing_offset + backing_size,
4521 0);
4522 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4523 "unmapped backing_object %p [0x%llx:0x%llx]\n",
4524 backing_object,
4525 (uint64_t) backing_offset,
4526 (uint64_t) (backing_offset + backing_size));
4527
4528 /* collapse */
4529 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object);
4530 vm_object_lock(top_object);
4531 vm_object_collapse(top_object, 0, FALSE);
4532 vm_object_unlock(top_object);
4533 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object);
4534
4535 /* did it work? */
4536 if (top_object->shadow != VM_OBJECT_NULL) {
4537 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: not collapsed\n");
4538 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
4539 if (vm_object_collapse_compressor_allowed) {
4540 panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
4541 }
4542 } else {
4543 /* check the contents of the mapping */
4544 unsigned char expect[9] =
4545 { 0xA0, 0xA1, 0xA2, /* resident in top */
4546 0xA3, 0xA4, 0xA5, /* compressed in top */
4547 0xB9, /* resident in backing + shadow_offset */
4548 0xBD, /* compressed in backing + shadow_offset + paging_offset */
4549 0x00 }; /* absent in both */
4550 unsigned char actual[9];
4551 unsigned int i, errors;
4552
4553 errors = 0;
4554 for (i = 0; i < sizeof (actual); i++) {
4555 actual[i] = (unsigned char) top_address[i*PAGE_SIZE];
4556 if (actual[i] != expect[i]) {
4557 errors++;
4558 }
4559 }
4560 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4561 "actual [%x %x %x %x %x %x %x %x %x] "
4562 "expect [%x %x %x %x %x %x %x %x %x] "
4563 "%d errors\n",
4564 actual[0], actual[1], actual[2], actual[3],
4565 actual[4], actual[5], actual[6], actual[7],
4566 actual[8],
4567 expect[0], expect[1], expect[2], expect[3],
4568 expect[4], expect[5], expect[6], expect[7],
4569 expect[8],
4570 errors);
4571 if (errors) {
4572 panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
4573 } else {
4574 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: PASS\n");
4575 }
4576 }
4577#endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */
4578
4579#if FBDP_TEST_WIRE_AND_EXTRACT
4580 ledger_t ledger;
4581 vm_map_t user_map, wire_map;
4582 mach_vm_address_t user_addr, wire_addr;
4583 mach_vm_size_t user_size, wire_size;
4584 mach_vm_offset_t cur_offset;
4585 vm_prot_t cur_prot, max_prot;
4586 ppnum_t user_ppnum, wire_ppnum;
4587 kern_return_t kr;
4588
4589 ledger = ledger_instantiate(task_ledger_template,
4590 LEDGER_CREATE_ACTIVE_ENTRIES);
3e170ce0 4591 user_map = vm_map_create(pmap_create(ledger, 0, PMAP_CREATE_64BIT),
fe8ab488
A
4592 0x100000000ULL,
4593 0x200000000ULL,
4594 TRUE);
4595 wire_map = vm_map_create(NULL,
4596 0x100000000ULL,
4597 0x200000000ULL,
4598 TRUE);
4599 user_addr = 0;
4600 user_size = 0x10000;
4601 kr = mach_vm_allocate(user_map,
4602 &user_addr,
4603 user_size,
4604 VM_FLAGS_ANYWHERE);
4605 assert(kr == KERN_SUCCESS);
4606 wire_addr = 0;
4607 wire_size = user_size;
4608 kr = mach_vm_remap(wire_map,
4609 &wire_addr,
4610 wire_size,
4611 0,
4612 VM_FLAGS_ANYWHERE,
4613 user_map,
4614 user_addr,
4615 FALSE,
4616 &cur_prot,
4617 &max_prot,
4618 VM_INHERIT_NONE);
4619 assert(kr == KERN_SUCCESS);
4620 for (cur_offset = 0;
4621 cur_offset < wire_size;
4622 cur_offset += PAGE_SIZE) {
4623 kr = vm_map_wire_and_extract(wire_map,
4624 wire_addr + cur_offset,
3e170ce0 4625 VM_PROT_DEFAULT | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK)),
fe8ab488
A
4626 TRUE,
4627 &wire_ppnum);
4628 assert(kr == KERN_SUCCESS);
4629 user_ppnum = vm_map_get_phys_page(user_map,
4630 user_addr + cur_offset);
4631 printf("FBDP_TEST_WIRE_AND_EXTRACT: kr=0x%x "
4632 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
4633 kr,
4634 user_map, user_addr + cur_offset, user_ppnum,
4635 wire_map, wire_addr + cur_offset, wire_ppnum);
4636 if (kr != KERN_SUCCESS ||
4637 wire_ppnum == 0 ||
4638 wire_ppnum != user_ppnum) {
4639 panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n");
4640 }
4641 }
4642 cur_offset -= PAGE_SIZE;
4643 kr = vm_map_wire_and_extract(wire_map,
4644 wire_addr + cur_offset,
4645 VM_PROT_DEFAULT,
4646 TRUE,
4647 &wire_ppnum);
4648 assert(kr == KERN_SUCCESS);
4649 printf("FBDP_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x "
4650 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
4651 kr,
4652 user_map, user_addr + cur_offset, user_ppnum,
4653 wire_map, wire_addr + cur_offset, wire_ppnum);
4654 if (kr != KERN_SUCCESS ||
4655 wire_ppnum == 0 ||
4656 wire_ppnum != user_ppnum) {
4657 panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n");
4658 }
4659
4660 printf("FBDP_TEST_WIRE_AND_EXTRACT: PASS\n");
4661#endif /* FBDP_TEST_WIRE_AND_EXTRACT */
4662
91447636 4663 vm_pageout_continue();
2d21ac55
A
4664
4665 /*
4666 * Unreached code!
4667 *
4668 * The vm_pageout_continue() call above never returns, so the code below is never
4669 * executed. We take advantage of this to declare several DTrace VM related probe
4670 * points that our kernel doesn't have an analog for. These are probe points that
4671 * exist in Solaris and are in the DTrace documentation, so people may have written
4672 * scripts that use them. Declaring the probe points here means their scripts will
4673 * compile and execute which we want for portability of the scripts, but since this
4674 * section of code is never reached, the probe points will simply never fire. Yes,
4675 * this is basically a hack. The problem is the DTrace probe points were chosen with
4676 * Solaris specific VM events in mind, not portability to different VM implementations.
4677 */
4678
4679 DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
4680 DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
4681 DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
4682 DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
4683 DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
4684 DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
4685 DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
91447636 4686 /*NOTREACHED*/
9bccf70c
A
4687}
4688
39236c6e
A
4689
4690
39236c6e
A
4691int vm_compressor_thread_count = 2;
4692
2d21ac55
A
4693kern_return_t
4694vm_pageout_internal_start(void)
4695{
39236c6e
A
4696 kern_return_t result;
4697 int i;
4698 host_basic_info_data_t hinfo;
3e170ce0
A
4699 int thread_count;
4700
39236c6e
A
4701
4702 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
4703 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4704#define BSD_HOST 1
4705 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4706
4707 assert(hinfo.max_cpus > 0);
4708
4709 if (vm_compressor_thread_count >= hinfo.max_cpus)
4710 vm_compressor_thread_count = hinfo.max_cpus - 1;
4711 if (vm_compressor_thread_count <= 0)
4712 vm_compressor_thread_count = 1;
3e170ce0
A
4713 else if (vm_compressor_thread_count > MAX_COMPRESSOR_THREAD_COUNT)
4714 vm_compressor_thread_count = MAX_COMPRESSOR_THREAD_COUNT;
4715
4716 if (vm_compressor_immediate_preferred == TRUE) {
4717 vm_pageout_immediate_chead = NULL;
4718 vm_pageout_immediate_scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
4719
4720 vm_compressor_thread_count = 1;
4721 }
4722 thread_count = vm_compressor_thread_count;
39236c6e
A
4723
4724 vm_pageout_queue_internal.pgo_maxlaundry = (vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
4725 } else {
3e170ce0
A
4726 vm_compressor_thread_count = 0;
4727 thread_count = 1;
39236c6e
A
4728 vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
4729 }
2d21ac55 4730
39236c6e 4731 for (i = 0; i < vm_compressor_thread_count; i++) {
3e170ce0
A
4732 ciq[i].id = i;
4733 ciq[i].q = &vm_pageout_queue_internal;
4734 ciq[i].current_chead = NULL;
4735 ciq[i].scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
4736 }
4737 for (i = 0; i < thread_count; i++) {
39236c6e 4738 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i], BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread);
3e170ce0 4739
39236c6e
A
4740 if (result == KERN_SUCCESS)
4741 thread_deallocate(vm_pageout_internal_iothread);
4742 else
4743 break;
4744 }
2d21ac55
A
4745 return result;
4746}
4747
fe8ab488
A
4748#if CONFIG_IOSCHED
4749/*
4750 * To support I/O Expedite for compressed files we mark the upls with special flags.
4751 * The way decmpfs works is that we create a big upl which marks all the pages needed to
4752 * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
4753 * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
4754 * being held in the big original UPL. We mark each of these smaller UPLs with the flag
4755 * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
4756 * decmp_io_upl field (in the upl structure). This link is protected in the forward direction
4757 * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
4758 * unless the real I/O upl is being destroyed).
4759 */
4760
4761
4762static void
4763upl_set_decmp_info(upl_t upl, upl_t src_upl)
4764{
4765 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4766
4767 upl_lock(src_upl);
4768 if (src_upl->decmp_io_upl) {
4769 /*
4770 * If there is already an alive real I/O UPL, ignore this new UPL.
4771 * This case should rarely happen and even if it does, it just means
4772 * that we might issue a spurious expedite which the driver is expected
4773 * to handle.
4774 */
4775 upl_unlock(src_upl);
4776 return;
4777 }
4778 src_upl->decmp_io_upl = (void *)upl;
4779 src_upl->ref_count++;
fe8ab488
A
4780
4781 upl->flags |= UPL_DECMP_REAL_IO;
4782 upl->decmp_io_upl = (void *)src_upl;
04b8595b 4783 upl_unlock(src_upl);
fe8ab488
A
4784}
4785#endif /* CONFIG_IOSCHED */
4786
4787#if UPL_DEBUG
4788int upl_debug_enabled = 1;
4789#else
4790int upl_debug_enabled = 0;
4791#endif
1c79356b 4792
b0d623f7
A
4793static upl_t
4794upl_create(int type, int flags, upl_size_t size)
0b4e3aa0
A
4795{
4796 upl_t upl;
39236c6e 4797 vm_size_t page_field_size = 0;
2d21ac55 4798 int upl_flags = 0;
39236c6e 4799 vm_size_t upl_size = sizeof(struct upl);
0b4e3aa0 4800
b0d623f7
A
4801 size = round_page_32(size);
4802
2d21ac55 4803 if (type & UPL_CREATE_LITE) {
b0d623f7 4804 page_field_size = (atop(size) + 7) >> 3;
55e303ae 4805 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2d21ac55
A
4806
4807 upl_flags |= UPL_LITE;
55e303ae 4808 }
2d21ac55 4809 if (type & UPL_CREATE_INTERNAL) {
39236c6e 4810 upl_size += sizeof(struct upl_page_info) * atop(size);
2d21ac55
A
4811
4812 upl_flags |= UPL_INTERNAL;
0b4e3aa0 4813 }
2d21ac55
A
4814 upl = (upl_t)kalloc(upl_size + page_field_size);
4815
4816 if (page_field_size)
4817 bzero((char *)upl + upl_size, page_field_size);
4818
4819 upl->flags = upl_flags | flags;
0b4e3aa0
A
4820 upl->src_object = NULL;
4821 upl->kaddr = (vm_offset_t)0;
4822 upl->size = 0;
4823 upl->map_object = NULL;
4824 upl->ref_count = 1;
6d2010ae 4825 upl->ext_ref_count = 0;
0c530ab8 4826 upl->highest_page = 0;
0b4e3aa0 4827 upl_lock_init(upl);
b0d623f7 4828 upl->vector_upl = NULL;
3e170ce0 4829 upl->associated_upl = NULL;
fe8ab488
A
4830#if CONFIG_IOSCHED
4831 if (type & UPL_CREATE_IO_TRACKING) {
4832 upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
4833 }
4834
4835 upl->upl_reprio_info = 0;
4836 upl->decmp_io_upl = 0;
4837 if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) {
4838 /* Only support expedite on internal UPLs */
4839 thread_t curthread = current_thread();
4840 upl->upl_reprio_info = (uint64_t *)kalloc(sizeof(uint64_t) * atop(size));
4841 bzero(upl->upl_reprio_info, (sizeof(uint64_t) * atop(size)));
4842 upl->flags |= UPL_EXPEDITE_SUPPORTED;
4843 if (curthread->decmp_upl != NULL)
4844 upl_set_decmp_info(upl, curthread->decmp_upl);
4845 }
4846#endif
4847#if CONFIG_IOSCHED || UPL_DEBUG
4848 if ((type & UPL_CREATE_IO_TRACKING) || upl_debug_enabled) {
4849 upl->upl_creator = current_thread();
4850 upl->uplq.next = 0;
4851 upl->uplq.prev = 0;
4852 upl->flags |= UPL_TRACKED_BY_OBJECT;
4853 }
4854#endif
4855
b0d623f7 4856#if UPL_DEBUG
0b4e3aa0
A
4857 upl->ubc_alias1 = 0;
4858 upl->ubc_alias2 = 0;
b0d623f7 4859
b0d623f7
A
4860 upl->upl_state = 0;
4861 upl->upl_commit_index = 0;
4862 bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records));
4863
4864 (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES);
91447636 4865#endif /* UPL_DEBUG */
b0d623f7 4866
0b4e3aa0
A
4867 return(upl);
4868}
4869
4870static void
2d21ac55 4871upl_destroy(upl_t upl)
0b4e3aa0 4872{
55e303ae 4873 int page_field_size; /* bit field in word size buf */
2d21ac55 4874 int size;
0b4e3aa0 4875
6d2010ae
A
4876 if (upl->ext_ref_count) {
4877 panic("upl(%p) ext_ref_count", upl);
4878 }
4879
fe8ab488
A
4880#if CONFIG_IOSCHED
4881 if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) {
4882 upl_t src_upl;
4883 src_upl = upl->decmp_io_upl;
4884 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4885 upl_lock(src_upl);
4886 src_upl->decmp_io_upl = NULL;
4887 upl_unlock(src_upl);
4888 upl_deallocate(src_upl);
4889 }
4890#endif /* CONFIG_IOSCHED */
4891
4892#if CONFIG_IOSCHED || UPL_DEBUG
4893 if ((upl->flags & UPL_TRACKED_BY_OBJECT) && !(upl->flags & UPL_VECTOR)) {
55e303ae 4894 vm_object_t object;
2d21ac55
A
4895
4896 if (upl->flags & UPL_SHADOWED) {
55e303ae
A
4897 object = upl->map_object->shadow;
4898 } else {
4899 object = upl->map_object;
4900 }
fe8ab488 4901
55e303ae 4902 vm_object_lock(object);
2d21ac55 4903 queue_remove(&object->uplq, upl, upl_t, uplq);
316670eb
A
4904 vm_object_activity_end(object);
4905 vm_object_collapse(object, 0, TRUE);
55e303ae 4906 vm_object_unlock(object);
0b4e3aa0 4907 }
fe8ab488 4908#endif
2d21ac55
A
4909 /*
4910 * drop a reference on the map_object whether or
4911 * not a pageout object is inserted
4912 */
4913 if (upl->flags & UPL_SHADOWED)
0b4e3aa0 4914 vm_object_deallocate(upl->map_object);
55e303ae 4915
2d21ac55
A
4916 if (upl->flags & UPL_DEVICE_MEMORY)
4917 size = PAGE_SIZE;
4918 else
4919 size = upl->size;
55e303ae 4920 page_field_size = 0;
2d21ac55 4921
55e303ae 4922 if (upl->flags & UPL_LITE) {
2d21ac55 4923 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
55e303ae
A
4924 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4925 }
b0d623f7
A
4926 upl_lock_destroy(upl);
4927 upl->vector_upl = (vector_upl_t) 0xfeedbeef;
316670eb 4928
fe8ab488
A
4929#if CONFIG_IOSCHED
4930 if (upl->flags & UPL_EXPEDITE_SUPPORTED)
4931 kfree(upl->upl_reprio_info, sizeof(uint64_t) * (size/PAGE_SIZE));
4932#endif
4933
2d21ac55 4934 if (upl->flags & UPL_INTERNAL) {
91447636
A
4935 kfree(upl,
4936 sizeof(struct upl) +
2d21ac55 4937 (sizeof(struct upl_page_info) * (size/PAGE_SIZE))
91447636 4938 + page_field_size);
0b4e3aa0 4939 } else {
91447636 4940 kfree(upl, sizeof(struct upl) + page_field_size);
0b4e3aa0
A
4941 }
4942}
4943
0b4e3aa0 4944void
2d21ac55 4945upl_deallocate(upl_t upl)
0b4e3aa0 4946{
fe8ab488 4947 upl_lock(upl);
b0d623f7
A
4948 if (--upl->ref_count == 0) {
4949 if(vector_upl_is_valid(upl))
4950 vector_upl_deallocate(upl);
fe8ab488 4951 upl_unlock(upl);
0b4e3aa0 4952 upl_destroy(upl);
b0d623f7 4953 }
fe8ab488
A
4954 else
4955 upl_unlock(upl);
4956}
4957
4958#if CONFIG_IOSCHED
4959void
4960upl_mark_decmp(upl_t upl)
4961{
4962 if (upl->flags & UPL_TRACKED_BY_OBJECT) {
4963 upl->flags |= UPL_DECMP_REQ;
4964 upl->upl_creator->decmp_upl = (void *)upl;
4965 }
4966}
4967
4968void
4969upl_unmark_decmp(upl_t upl)
4970{
4971 if(upl && (upl->flags & UPL_DECMP_REQ)) {
4972 upl->upl_creator->decmp_upl = NULL;
4973 }
4974}
4975
4976#endif /* CONFIG_IOSCHED */
4977
4978#define VM_PAGE_Q_BACKING_UP(q) \
4979 ((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
4980
4981boolean_t must_throttle_writes(void);
4982
4983boolean_t
4984must_throttle_writes()
4985{
4986 if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) &&
4987 vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10)
4988 return (TRUE);
4989
4990 return (FALSE);
0b4e3aa0 4991}
1c79356b 4992
fe8ab488 4993
b0d623f7
A
4994#if DEVELOPMENT || DEBUG
4995/*/*
91447636
A
4996 * Statistics about UPL enforcement of copy-on-write obligations.
4997 */
4998unsigned long upl_cow = 0;
4999unsigned long upl_cow_again = 0;
91447636
A
5000unsigned long upl_cow_pages = 0;
5001unsigned long upl_cow_again_pages = 0;
b0d623f7
A
5002
5003unsigned long iopl_cow = 0;
5004unsigned long iopl_cow_pages = 0;
5005#endif
91447636 5006
1c79356b 5007/*
0b4e3aa0 5008 * Routine: vm_object_upl_request
1c79356b
A
5009 * Purpose:
5010 * Cause the population of a portion of a vm_object.
5011 * Depending on the nature of the request, the pages
5012 * returned may be contain valid data or be uninitialized.
5013 * A page list structure, listing the physical pages
5014 * will be returned upon request.
5015 * This function is called by the file system or any other
5016 * supplier of backing store to a pager.
5017 * IMPORTANT NOTE: The caller must still respect the relationship
5018 * between the vm_object and its backing memory object. The
5019 * caller MUST NOT substitute changes in the backing file
5020 * without first doing a memory_object_lock_request on the
5021 * target range unless it is know that the pages are not
5022 * shared with another entity at the pager level.
5023 * Copy_in_to:
5024 * if a page list structure is present
5025 * return the mapped physical pages, where a
5026 * page is not present, return a non-initialized
5027 * one. If the no_sync bit is turned on, don't
5028 * call the pager unlock to synchronize with other
5029 * possible copies of the page. Leave pages busy
5030 * in the original object, if a page list structure
5031 * was specified. When a commit of the page list
5032 * pages is done, the dirty bit will be set for each one.
5033 * Copy_out_from:
5034 * If a page list structure is present, return
5035 * all mapped pages. Where a page does not exist
5036 * map a zero filled one. Leave pages busy in
5037 * the original object. If a page list structure
5038 * is not specified, this call is a no-op.
5039 *
5040 * Note: access of default pager objects has a rather interesting
5041 * twist. The caller of this routine, presumably the file system
5042 * page cache handling code, will never actually make a request
5043 * against a default pager backed object. Only the default
5044 * pager will make requests on backing store related vm_objects
5045 * In this way the default pager can maintain the relationship
5046 * between backing store files (abstract memory objects) and
5047 * the vm_objects (cache objects), they support.
5048 *
5049 */
91447636 5050
0b4e3aa0
A
5051__private_extern__ kern_return_t
5052vm_object_upl_request(
1c79356b 5053 vm_object_t object,
91447636
A
5054 vm_object_offset_t offset,
5055 upl_size_t size,
1c79356b 5056 upl_t *upl_ptr,
0b4e3aa0
A
5057 upl_page_info_array_t user_page_list,
5058 unsigned int *page_list_count,
3e170ce0 5059 upl_control_flags_t cntrl_flags)
1c79356b 5060{
91447636 5061 vm_page_t dst_page = VM_PAGE_NULL;
2d21ac55
A
5062 vm_object_offset_t dst_offset;
5063 upl_size_t xfer_size;
6d2010ae 5064 unsigned int size_in_pages;
1c79356b 5065 boolean_t dirty;
55e303ae 5066 boolean_t hw_dirty;
1c79356b 5067 upl_t upl = NULL;
91447636
A
5068 unsigned int entry;
5069#if MACH_CLUSTER_STATS
1c79356b 5070 boolean_t encountered_lrp = FALSE;
91447636 5071#endif
1c79356b 5072 vm_page_t alias_page = NULL;
2d21ac55 5073 int refmod_state = 0;
91447636
A
5074 wpl_array_t lite_list = NULL;
5075 vm_object_t last_copy_object;
6d2010ae
A
5076 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
5077 struct vm_page_delayed_work *dwp;
b0d623f7 5078 int dw_count;
6d2010ae 5079 int dw_limit;
fe8ab488 5080 int io_tracking_flag = 0;
91447636
A
5081
5082 if (cntrl_flags & ~UPL_VALID_FLAGS) {
5083 /*
5084 * For forward compatibility's sake,
5085 * reject any unknown flag.
5086 */
5087 return KERN_INVALID_VALUE;
5088 }
2d21ac55
A
5089 if ( (!object->internal) && (object->paging_offset != 0) )
5090 panic("vm_object_upl_request: external object with non-zero paging offset\n");
5091 if (object->phys_contiguous)
5092 panic("vm_object_upl_request: contiguous object specified\n");
0b4e3aa0 5093
0b4e3aa0 5094
fe8ab488
A
5095 if (size > MAX_UPL_SIZE_BYTES)
5096 size = MAX_UPL_SIZE_BYTES;
1c79356b 5097
2d21ac55 5098 if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
fe8ab488
A
5099 *page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
5100
5101#if CONFIG_IOSCHED || UPL_DEBUG
5102 if (object->io_tracking || upl_debug_enabled)
5103 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
5104#endif
5105#if CONFIG_IOSCHED
5106 if (object->io_tracking)
5107 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
5108#endif
1c79356b 5109
2d21ac55
A
5110 if (cntrl_flags & UPL_SET_INTERNAL) {
5111 if (cntrl_flags & UPL_SET_LITE) {
55e303ae 5112
fe8ab488 5113 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
91447636 5114
2d21ac55
A
5115 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
5116 lite_list = (wpl_array_t)
91447636 5117 (((uintptr_t)user_page_list) +
2d21ac55 5118 ((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
b0d623f7
A
5119 if (size == 0) {
5120 user_page_list = NULL;
5121 lite_list = NULL;
5122 }
1c79356b 5123 } else {
fe8ab488 5124 upl = upl_create(UPL_CREATE_INTERNAL | io_tracking_flag, 0, size);
55e303ae 5125
2d21ac55 5126 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
b0d623f7
A
5127 if (size == 0) {
5128 user_page_list = NULL;
5129 }
55e303ae 5130 }
2d21ac55
A
5131 } else {
5132 if (cntrl_flags & UPL_SET_LITE) {
91447636 5133
fe8ab488 5134 upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
55e303ae 5135
2d21ac55 5136 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
b0d623f7
A
5137 if (size == 0) {
5138 lite_list = NULL;
5139 }
55e303ae 5140 } else {
fe8ab488 5141 upl = upl_create(UPL_CREATE_EXTERNAL | io_tracking_flag, 0, size);
0b4e3aa0 5142 }
55e303ae 5143 }
2d21ac55
A
5144 *upl_ptr = upl;
5145
5146 if (user_page_list)
5147 user_page_list[0].device = FALSE;
91447636 5148
2d21ac55
A
5149 if (cntrl_flags & UPL_SET_LITE) {
5150 upl->map_object = object;
5151 } else {
5152 upl->map_object = vm_object_allocate(size);
5153 /*
5154 * No neeed to lock the new object: nobody else knows
5155 * about it yet, so it's all ours so far.
5156 */
5157 upl->map_object->shadow = object;
5158 upl->map_object->pageout = TRUE;
5159 upl->map_object->can_persist = FALSE;
5160 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
6d2010ae 5161 upl->map_object->vo_shadow_offset = offset;
2d21ac55
A
5162 upl->map_object->wimg_bits = object->wimg_bits;
5163
5164 VM_PAGE_GRAB_FICTITIOUS(alias_page);
5165
5166 upl->flags |= UPL_SHADOWED;
5167 }
5168 /*
91447636
A
5169 * ENCRYPTED SWAP:
5170 * Just mark the UPL as "encrypted" here.
5171 * We'll actually encrypt the pages later,
5172 * in upl_encrypt(), when the caller has
5173 * selected which pages need to go to swap.
5174 */
2d21ac55 5175 if (cntrl_flags & UPL_ENCRYPT)
91447636 5176 upl->flags |= UPL_ENCRYPTED;
2d21ac55
A
5177
5178 if (cntrl_flags & UPL_FOR_PAGEOUT)
91447636 5179 upl->flags |= UPL_PAGEOUT;
2d21ac55 5180
55e303ae 5181 vm_object_lock(object);
b0d623f7 5182 vm_object_activity_begin(object);
2d21ac55
A
5183
5184 /*
5185 * we can lock in the paging_offset once paging_in_progress is set
5186 */
5187 upl->size = size;
5188 upl->offset = offset + object->paging_offset;
55e303ae 5189
fe8ab488
A
5190#if CONFIG_IOSCHED || UPL_DEBUG
5191 if (object->io_tracking || upl_debug_enabled) {
5192 vm_object_activity_begin(object);
5193 queue_enter(&object->uplq, upl, upl_t, uplq);
5194 }
5195#endif
2d21ac55 5196 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
91447636 5197 /*
2d21ac55
A
5198 * Honor copy-on-write obligations
5199 *
91447636
A
5200 * The caller is gathering these pages and
5201 * might modify their contents. We need to
5202 * make sure that the copy object has its own
5203 * private copies of these pages before we let
5204 * the caller modify them.
5205 */
5206 vm_object_update(object,
5207 offset,
5208 size,
5209 NULL,
5210 NULL,
5211 FALSE, /* should_return */
5212 MEMORY_OBJECT_COPY_SYNC,
5213 VM_PROT_NO_CHANGE);
b0d623f7 5214#if DEVELOPMENT || DEBUG
91447636
A
5215 upl_cow++;
5216 upl_cow_pages += size >> PAGE_SHIFT;
b0d623f7 5217#endif
55e303ae 5218 }
2d21ac55
A
5219 /*
5220 * remember which copy object we synchronized with
5221 */
91447636 5222 last_copy_object = object->copy;
1c79356b 5223 entry = 0;
55e303ae 5224
2d21ac55
A
5225 xfer_size = size;
5226 dst_offset = offset;
6d2010ae 5227 size_in_pages = size / PAGE_SIZE;
2d21ac55 5228
b0d623f7
A
5229 dwp = &dw_array[0];
5230 dw_count = 0;
6d2010ae
A
5231 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
5232
5233 if (vm_page_free_count > (vm_page_free_target + size_in_pages) ||
fe8ab488 5234 object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT))
6d2010ae 5235 object->scan_collisions = 0;
b0d623f7 5236
fe8ab488
A
5237 if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
5238 boolean_t isSSD = FALSE;
5239
5240 vnode_pager_get_isSSD(object->pager, &isSSD);
5241 vm_object_unlock(object);
5242
5243 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5244
5245 if (isSSD == TRUE)
5246 delay(1000 * size_in_pages);
5247 else
5248 delay(5000 * size_in_pages);
5249 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5250
5251 vm_object_lock(object);
5252 }
5253
2d21ac55
A
5254 while (xfer_size) {
5255
b0d623f7
A
5256 dwp->dw_mask = 0;
5257
2d21ac55 5258 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
2d21ac55
A
5259 vm_object_unlock(object);
5260 VM_PAGE_GRAB_FICTITIOUS(alias_page);
b0d623f7 5261 vm_object_lock(object);
4a3eedf9 5262 }
2d21ac55
A
5263 if (cntrl_flags & UPL_COPYOUT_FROM) {
5264 upl->flags |= UPL_PAGE_SYNC_DONE;
5265
91447636 5266 if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
1c79356b
A
5267 dst_page->fictitious ||
5268 dst_page->absent ||
5269 dst_page->error ||
316670eb
A
5270 dst_page->cleaning ||
5271 (VM_PAGE_WIRED(dst_page))) {
5272
91447636 5273 if (user_page_list)
1c79356b 5274 user_page_list[entry].phys_addr = 0;
2d21ac55 5275
b0d623f7 5276 goto try_next_page;
2d21ac55
A
5277 }
5278 /*
5279 * grab this up front...
5280 * a high percentange of the time we're going to
5281 * need the hardware modification state a bit later
5282 * anyway... so we can eliminate an extra call into
5283 * the pmap layer by grabbing it here and recording it
5284 */
5285 if (dst_page->pmapped)
5286 refmod_state = pmap_get_refmod(dst_page->phys_page);
5287 else
5288 refmod_state = 0;
5289
5290 if ( (refmod_state & VM_MEM_REFERENCED) && dst_page->inactive ) {
91447636 5291 /*
2d21ac55
A
5292 * page is on inactive list and referenced...
5293 * reactivate it now... this gets it out of the
5294 * way of vm_pageout_scan which would have to
5295 * reactivate it upon tripping over it
91447636 5296 */
b0d623f7 5297 dwp->dw_mask |= DW_vm_page_activate;
2d21ac55
A
5298 }
5299 if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
5300 /*
5301 * we're only asking for DIRTY pages to be returned
5302 */
39236c6e 5303 if (dst_page->laundry || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
91447636 5304 /*
2d21ac55
A
5305 * if we were the page stolen by vm_pageout_scan to be
5306 * cleaned (as opposed to a buddy being clustered in
5307 * or this request is not being driven by a PAGEOUT cluster
5308 * then we only need to check for the page being dirty or
5309 * precious to decide whether to return it
91447636 5310 */
2d21ac55 5311 if (dst_page->dirty || dst_page->precious || (refmod_state & VM_MEM_MODIFIED))
91447636 5312 goto check_busy;
2d21ac55 5313 goto dont_return;
1c79356b 5314 }
2d21ac55
A
5315 /*
5316 * this is a request for a PAGEOUT cluster and this page
5317 * is merely along for the ride as a 'buddy'... not only
5318 * does it have to be dirty to be returned, but it also
316670eb 5319 * can't have been referenced recently...
2d21ac55 5320 */
316670eb
A
5321 if ( (hibernate_cleaning_in_progress == TRUE ||
5322 (!((refmod_state & VM_MEM_REFERENCED) || dst_page->reference) || dst_page->throttled)) &&
5323 ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) {
2d21ac55 5324 goto check_busy;
1c79356b 5325 }
2d21ac55
A
5326dont_return:
5327 /*
5328 * if we reach here, we're not to return
5329 * the page... go on to the next one
5330 */
316670eb
A
5331 if (dst_page->laundry == TRUE) {
5332 /*
5333 * if we get here, the page is not 'cleaning' (filtered out above).
5334 * since it has been referenced, remove it from the laundry
5335 * so we don't pay the cost of an I/O to clean a page
5336 * we're just going to take back
5337 */
5338 vm_page_lockspin_queues();
5339
5340 vm_pageout_steal_laundry(dst_page, TRUE);
5341 vm_page_activate(dst_page);
5342
5343 vm_page_unlock_queues();
5344 }
2d21ac55
A
5345 if (user_page_list)
5346 user_page_list[entry].phys_addr = 0;
55e303ae 5347
b0d623f7 5348 goto try_next_page;
2d21ac55
A
5349 }
5350check_busy:
316670eb
A
5351 if (dst_page->busy) {
5352 if (cntrl_flags & UPL_NOBLOCK) {
5353 if (user_page_list)
2d21ac55 5354 user_page_list[entry].phys_addr = 0;
55e303ae 5355
b0d623f7 5356 goto try_next_page;
1c79356b 5357 }
2d21ac55
A
5358 /*
5359 * someone else is playing with the
5360 * page. We will have to wait.
5361 */
2d21ac55 5362 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
1c79356b 5363
316670eb 5364 continue;
2d21ac55
A
5365 }
5366 /*
5367 * ENCRYPTED SWAP:
5368 * The caller is gathering this page and might
5369 * access its contents later on. Decrypt the
5370 * page before adding it to the UPL, so that
5371 * the caller never sees encrypted data.
5372 */
5373 if (! (cntrl_flags & UPL_ENCRYPT) && dst_page->encrypted) {
5374 int was_busy;
91447636
A
5375
5376 /*
2d21ac55
A
5377 * save the current state of busy
5378 * mark page as busy while decrypt
5379 * is in progress since it will drop
5380 * the object lock...
91447636 5381 */
2d21ac55
A
5382 was_busy = dst_page->busy;
5383 dst_page->busy = TRUE;
91447636 5384
2d21ac55
A
5385 vm_page_decrypt(dst_page, 0);
5386 vm_page_decrypt_for_upl_counter++;
5387 /*
5388 * restore to original busy state
5389 */
5390 dst_page->busy = was_busy;
b0d623f7
A
5391 }
5392 if (dst_page->pageout_queue == TRUE) {
91447636 5393
b0d623f7
A
5394 vm_page_lockspin_queues();
5395
6d2010ae 5396 if (dst_page->pageout_queue == TRUE) {
b0d623f7
A
5397 /*
5398 * we've buddied up a page for a clustered pageout
5399 * that has already been moved to the pageout
5400 * queue by pageout_scan... we need to remove
5401 * it from the queue and drop the laundry count
5402 * on that queue
5403 */
5404 vm_pageout_throttle_up(dst_page);
5405 }
5406 vm_page_unlock_queues();
91447636 5407 }
2d21ac55
A
5408#if MACH_CLUSTER_STATS
5409 /*
5410 * pageout statistics gathering. count
5411 * all the pages we will page out that
5412 * were not counted in the initial
5413 * vm_pageout_scan work
5414 */
316670eb 5415 if (dst_page->pageout)
2d21ac55 5416 encountered_lrp = TRUE;
316670eb 5417 if ((dst_page->dirty || (dst_page->object->internal && dst_page->precious))) {
2d21ac55
A
5418 if (encountered_lrp)
5419 CLUSTER_STAT(pages_at_higher_offsets++;)
5420 else
5421 CLUSTER_STAT(pages_at_lower_offsets++;)
5422 }
5423#endif
2d21ac55
A
5424 hw_dirty = refmod_state & VM_MEM_MODIFIED;
5425 dirty = hw_dirty ? TRUE : dst_page->dirty;
5426
5427 if (dst_page->phys_page > upl->highest_page)
5428 upl->highest_page = dst_page->phys_page;
5429
3e170ce0
A
5430 assert (!pmap_is_noencrypt(dst_page->phys_page));
5431
2d21ac55 5432 if (cntrl_flags & UPL_SET_LITE) {
b0d623f7 5433 unsigned int pg_num;
2d21ac55 5434
b0d623f7
A
5435 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5436 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
2d21ac55
A
5437 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5438
5439 if (hw_dirty)
5440 pmap_clear_modify(dst_page->phys_page);
5441
5442 /*
5443 * Mark original page as cleaning
5444 * in place.
5445 */
5446 dst_page->cleaning = TRUE;
5447 dst_page->precious = FALSE;
5448 } else {
5449 /*
5450 * use pageclean setup, it is more
5451 * convenient even for the pageout
5452 * cases here
5453 */
5454 vm_object_lock(upl->map_object);
5455 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5456 vm_object_unlock(upl->map_object);
5457
5458 alias_page->absent = FALSE;
5459 alias_page = NULL;
1c79356b 5460 }
2d21ac55
A
5461#if MACH_PAGEMAP
5462 /*
5463 * Record that this page has been
5464 * written out
5465 */
5466 vm_external_state_set(object->existence_map, dst_page->offset);
5467#endif /*MACH_PAGEMAP*/
316670eb
A
5468 if (dirty) {
5469 SET_PAGE_DIRTY(dst_page, FALSE);
5470 } else {
5471 dst_page->dirty = FALSE;
5472 }
55e303ae 5473
2d21ac55
A
5474 if (!dirty)
5475 dst_page->precious = TRUE;
91447636 5476
2d21ac55
A
5477 if ( (cntrl_flags & UPL_ENCRYPT) ) {
5478 /*
5479 * ENCRYPTED SWAP:
5480 * We want to deny access to the target page
5481 * because its contents are about to be
5482 * encrypted and the user would be very
5483 * confused to see encrypted data instead
5484 * of their data.
5485 * We also set "encrypted_cleaning" to allow
5486 * vm_pageout_scan() to demote that page
5487 * from "adjacent/clean-in-place" to
5488 * "target/clean-and-free" if it bumps into
5489 * this page during its scanning while we're
5490 * still processing this cluster.
5491 */
5492 dst_page->busy = TRUE;
5493 dst_page->encrypted_cleaning = TRUE;
5494 }
5495 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
316670eb 5496 if ( !VM_PAGE_WIRED(dst_page))
2d21ac55 5497 dst_page->pageout = TRUE;
2d21ac55
A
5498 }
5499 } else {
5500 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
91447636 5501 /*
2d21ac55
A
5502 * Honor copy-on-write obligations
5503 *
91447636
A
5504 * The copy object has changed since we
5505 * last synchronized for copy-on-write.
5506 * Another copy object might have been
5507 * inserted while we released the object's
5508 * lock. Since someone could have seen the
5509 * original contents of the remaining pages
5510 * through that new object, we have to
5511 * synchronize with it again for the remaining
5512 * pages only. The previous pages are "busy"
5513 * so they can not be seen through the new
5514 * mapping. The new mapping will see our
5515 * upcoming changes for those previous pages,
5516 * but that's OK since they couldn't see what
5517 * was there before. It's just a race anyway
5518 * and there's no guarantee of consistency or
5519 * atomicity. We just don't want new mappings
5520 * to see both the *before* and *after* pages.
5521 */
5522 if (object->copy != VM_OBJECT_NULL) {
5523 vm_object_update(
5524 object,
5525 dst_offset,/* current offset */
5526 xfer_size, /* remaining size */
5527 NULL,
5528 NULL,
5529 FALSE, /* should_return */
5530 MEMORY_OBJECT_COPY_SYNC,
5531 VM_PROT_NO_CHANGE);
2d21ac55 5532
b0d623f7 5533#if DEVELOPMENT || DEBUG
91447636 5534 upl_cow_again++;
2d21ac55 5535 upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
b0d623f7 5536#endif
91447636 5537 }
2d21ac55
A
5538 /*
5539 * remember the copy object we synced with
5540 */
91447636
A
5541 last_copy_object = object->copy;
5542 }
91447636
A
5543 dst_page = vm_page_lookup(object, dst_offset);
5544
2d21ac55 5545 if (dst_page != VM_PAGE_NULL) {
b0d623f7
A
5546
5547 if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
316670eb
A
5548 /*
5549 * skip over pages already present in the cache
5550 */
5551 if (user_page_list)
5552 user_page_list[entry].phys_addr = 0;
b0d623f7 5553
316670eb
A
5554 goto try_next_page;
5555 }
5556 if (dst_page->fictitious) {
5557 panic("need corner case for fictitious page");
b0d623f7 5558 }
2d21ac55 5559
316670eb
A
5560 if (dst_page->busy || dst_page->cleaning) {
5561 /*
5562 * someone else is playing with the
5563 * page. We will have to wait.
5564 */
5565 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
b0d623f7 5566
316670eb
A
5567 continue;
5568 }
5569 if (dst_page->laundry) {
5570 dst_page->pageout = FALSE;
b0d623f7 5571
316670eb 5572 vm_pageout_steal_laundry(dst_page, FALSE);
0b4e3aa0 5573 }
316670eb 5574 } else {
2d21ac55 5575 if (object->private) {
0b4e3aa0
A
5576 /*
5577 * This is a nasty wrinkle for users
5578 * of upl who encounter device or
5579 * private memory however, it is
5580 * unavoidable, only a fault can
2d21ac55 5581 * resolve the actual backing
0b4e3aa0
A
5582 * physical page by asking the
5583 * backing device.
5584 */
2d21ac55 5585 if (user_page_list)
55e303ae 5586 user_page_list[entry].phys_addr = 0;
2d21ac55 5587
b0d623f7 5588 goto try_next_page;
0b4e3aa0 5589 }
6d2010ae
A
5590 if (object->scan_collisions) {
5591 /*
5592 * the pageout_scan thread is trying to steal
5593 * pages from this object, but has run into our
5594 * lock... grab 2 pages from the head of the object...
5595 * the first is freed on behalf of pageout_scan, the
5596 * 2nd is for our own use... we use vm_object_page_grab
5597 * in both cases to avoid taking pages from the free
5598 * list since we are under memory pressure and our
5599 * lock on this object is getting in the way of
5600 * relieving it
5601 */
5602 dst_page = vm_object_page_grab(object);
5603
5604 if (dst_page != VM_PAGE_NULL)
5605 vm_page_release(dst_page);
2d21ac55 5606
6d2010ae
A
5607 dst_page = vm_object_page_grab(object);
5608 }
5609 if (dst_page == VM_PAGE_NULL) {
5610 /*
5611 * need to allocate a page
5612 */
5613 dst_page = vm_page_grab();
5614 }
1c79356b 5615 if (dst_page == VM_PAGE_NULL) {
2d21ac55
A
5616 if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
5617 /*
5618 * we don't want to stall waiting for pages to come onto the free list
5619 * while we're already holding absent pages in this UPL
5620 * the caller will deal with the empty slots
5621 */
5622 if (user_page_list)
5623 user_page_list[entry].phys_addr = 0;
5624
5625 goto try_next_page;
5626 }
5627 /*
5628 * no pages available... wait
5629 * then try again for the same
5630 * offset...
5631 */
0b4e3aa0 5632 vm_object_unlock(object);
6d2010ae
A
5633
5634 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5635
5636 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
5637
0b4e3aa0 5638 VM_PAGE_WAIT();
6d2010ae
A
5639 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5640
5641 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
5642
b0d623f7 5643 vm_object_lock(object);
2d21ac55 5644
0b4e3aa0 5645 continue;
1c79356b 5646 }
b0d623f7 5647 vm_page_insert(dst_page, object, dst_offset);
4a3eedf9 5648
2d21ac55 5649 dst_page->absent = TRUE;
4a3eedf9 5650 dst_page->busy = FALSE;
2d21ac55
A
5651
5652 if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
91447636
A
5653 /*
5654 * if UPL_RET_ONLY_ABSENT was specified,
5655 * than we're definitely setting up a
5656 * upl for a clustered read/pagein
5657 * operation... mark the pages as clustered
2d21ac55
A
5658 * so upl_commit_range can put them on the
5659 * speculative list
91447636
A
5660 */
5661 dst_page->clustered = TRUE;
fe8ab488
A
5662
5663 if ( !(cntrl_flags & UPL_FILE_IO))
5664 VM_STAT_INCR(pageins);
91447636 5665 }
1c79356b 5666 }
91447636
A
5667 /*
5668 * ENCRYPTED SWAP:
5669 */
5670 if (cntrl_flags & UPL_ENCRYPT) {
5671 /*
5672 * The page is going to be encrypted when we
5673 * get it from the pager, so mark it so.
5674 */
5675 dst_page->encrypted = TRUE;
5676 } else {
5677 /*
5678 * Otherwise, the page will not contain
5679 * encrypted data.
5680 */
5681 dst_page->encrypted = FALSE;
5682 }
1c79356b 5683 dst_page->overwriting = TRUE;
2d21ac55 5684
2d21ac55
A
5685 if (dst_page->pmapped) {
5686 if ( !(cntrl_flags & UPL_FILE_IO))
5687 /*
5688 * eliminate all mappings from the
5689 * original object and its prodigy
55e303ae 5690 */
2d21ac55
A
5691 refmod_state = pmap_disconnect(dst_page->phys_page);
5692 else
5693 refmod_state = pmap_get_refmod(dst_page->phys_page);
5694 } else
5695 refmod_state = 0;
55e303ae 5696
2d21ac55
A
5697 hw_dirty = refmod_state & VM_MEM_MODIFIED;
5698 dirty = hw_dirty ? TRUE : dst_page->dirty;
1c79356b 5699
2d21ac55 5700 if (cntrl_flags & UPL_SET_LITE) {
b0d623f7 5701 unsigned int pg_num;
1c79356b 5702
b0d623f7
A
5703 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5704 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
2d21ac55 5705 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
91447636 5706
2d21ac55
A
5707 if (hw_dirty)
5708 pmap_clear_modify(dst_page->phys_page);
0b4e3aa0 5709
2d21ac55
A
5710 /*
5711 * Mark original page as cleaning
5712 * in place.
5713 */
5714 dst_page->cleaning = TRUE;
5715 dst_page->precious = FALSE;
5716 } else {
5717 /*
5718 * use pageclean setup, it is more
5719 * convenient even for the pageout
5720 * cases here
5721 */
5722 vm_object_lock(upl->map_object);
5723 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5724 vm_object_unlock(upl->map_object);
0b4e3aa0 5725
2d21ac55
A
5726 alias_page->absent = FALSE;
5727 alias_page = NULL;
5728 }
1c79356b 5729
6d2010ae
A
5730 if (cntrl_flags & UPL_REQUEST_SET_DIRTY) {
5731 upl->flags &= ~UPL_CLEAR_DIRTY;
5732 upl->flags |= UPL_SET_DIRTY;
5733 dirty = TRUE;
5734 upl->flags |= UPL_SET_DIRTY;
5735 } else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
2d21ac55
A
5736 /*
5737 * clean in place for read implies
5738 * that a write will be done on all
5739 * the pages that are dirty before
5740 * a upl commit is done. The caller
5741 * is obligated to preserve the
5742 * contents of all pages marked dirty
5743 */
5744 upl->flags |= UPL_CLEAR_DIRTY;
5745 }
5746 dst_page->dirty = dirty;
91447636 5747
2d21ac55
A
5748 if (!dirty)
5749 dst_page->precious = TRUE;
5750
b0d623f7 5751 if ( !VM_PAGE_WIRED(dst_page)) {
2d21ac55
A
5752 /*
5753 * deny access to the target page while
5754 * it is being worked on
5755 */
5756 dst_page->busy = TRUE;
5757 } else
b0d623f7 5758 dwp->dw_mask |= DW_vm_page_wire;
2d21ac55 5759
b0d623f7
A
5760 /*
5761 * We might be about to satisfy a fault which has been
5762 * requested. So no need for the "restart" bit.
5763 */
5764 dst_page->restart = FALSE;
5765 if (!dst_page->absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
2d21ac55
A
5766 /*
5767 * expect the page to be used
5768 */
b0d623f7 5769 dwp->dw_mask |= DW_set_reference;
2d21ac55 5770 }
6d2010ae
A
5771 if (cntrl_flags & UPL_PRECIOUS) {
5772 if (dst_page->object->internal) {
316670eb 5773 SET_PAGE_DIRTY(dst_page, FALSE);
6d2010ae
A
5774 dst_page->precious = FALSE;
5775 } else {
5776 dst_page->precious = TRUE;
5777 }
5778 } else {
5779 dst_page->precious = FALSE;
5780 }
2d21ac55 5781 }
d41d1dae
A
5782 if (dst_page->busy)
5783 upl->flags |= UPL_HAS_BUSY;
5784
2d21ac55
A
5785 if (dst_page->phys_page > upl->highest_page)
5786 upl->highest_page = dst_page->phys_page;
3e170ce0 5787 assert (!pmap_is_noencrypt(dst_page->phys_page));
2d21ac55
A
5788 if (user_page_list) {
5789 user_page_list[entry].phys_addr = dst_page->phys_page;
2d21ac55
A
5790 user_page_list[entry].pageout = dst_page->pageout;
5791 user_page_list[entry].absent = dst_page->absent;
593a1d5f 5792 user_page_list[entry].dirty = dst_page->dirty;
2d21ac55 5793 user_page_list[entry].precious = dst_page->precious;
593a1d5f 5794 user_page_list[entry].device = FALSE;
316670eb 5795 user_page_list[entry].needed = FALSE;
2d21ac55
A
5796 if (dst_page->clustered == TRUE)
5797 user_page_list[entry].speculative = dst_page->speculative;
5798 else
5799 user_page_list[entry].speculative = FALSE;
593a1d5f
A
5800 user_page_list[entry].cs_validated = dst_page->cs_validated;
5801 user_page_list[entry].cs_tainted = dst_page->cs_tainted;
c18c124e 5802 user_page_list[entry].cs_nx = dst_page->cs_nx;
3e170ce0 5803 user_page_list[entry].mark = FALSE;
2d21ac55
A
5804 }
5805 /*
5806 * if UPL_RET_ONLY_ABSENT is set, then
5807 * we are working with a fresh page and we've
5808 * just set the clustered flag on it to
5809 * indicate that it was drug in as part of a
5810 * speculative cluster... so leave it alone
5811 */
5812 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5813 /*
5814 * someone is explicitly grabbing this page...
5815 * update clustered and speculative state
5816 *
5817 */
fe8ab488
A
5818 if (dst_page->clustered)
5819 VM_PAGE_CONSUME_CLUSTERED(dst_page);
2d21ac55 5820 }
b0d623f7
A
5821try_next_page:
5822 if (dwp->dw_mask) {
5823 if (dwp->dw_mask & DW_vm_page_activate)
5824 VM_STAT_INCR(reactivations);
4a3eedf9 5825
6d2010ae 5826 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
b0d623f7 5827
6d2010ae 5828 if (dw_count >= dw_limit) {
3e170ce0 5829 vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
b0d623f7
A
5830
5831 dwp = &dw_array[0];
5832 dw_count = 0;
4a3eedf9 5833 }
2d21ac55 5834 }
2d21ac55
A
5835 entry++;
5836 dst_offset += PAGE_SIZE_64;
5837 xfer_size -= PAGE_SIZE;
5838 }
b0d623f7 5839 if (dw_count)
3e170ce0 5840 vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
b0d623f7 5841
2d21ac55 5842 if (alias_page != NULL) {
b0d623f7 5843 VM_PAGE_FREE(alias_page);
1c79356b 5844 }
91447636 5845
2d21ac55
A
5846 if (page_list_count != NULL) {
5847 if (upl->flags & UPL_INTERNAL)
5848 *page_list_count = 0;
5849 else if (*page_list_count > entry)
5850 *page_list_count = entry;
5851 }
b0d623f7
A
5852#if UPL_DEBUG
5853 upl->upl_state = 1;
5854#endif
1c79356b 5855 vm_object_unlock(object);
2d21ac55 5856
1c79356b
A
5857 return KERN_SUCCESS;
5858}
5859
0b4e3aa0
A
5860/*
5861 * Routine: vm_object_super_upl_request
5862 * Purpose:
5863 * Cause the population of a portion of a vm_object
5864 * in much the same way as memory_object_upl_request.
5865 * Depending on the nature of the request, the pages
5866 * returned may be contain valid data or be uninitialized.
5867 * However, the region may be expanded up to the super
5868 * cluster size provided.
5869 */
5870
5871__private_extern__ kern_return_t
5872vm_object_super_upl_request(
5873 vm_object_t object,
5874 vm_object_offset_t offset,
91447636
A
5875 upl_size_t size,
5876 upl_size_t super_cluster,
0b4e3aa0
A
5877 upl_t *upl,
5878 upl_page_info_t *user_page_list,
5879 unsigned int *page_list_count,
3e170ce0 5880 upl_control_flags_t cntrl_flags)
0b4e3aa0 5881{
b0d623f7 5882 if (object->paging_offset > offset || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR))
1c79356b 5883 return KERN_FAILURE;
0b4e3aa0 5884
55e303ae 5885 assert(object->paging_in_progress);
1c79356b 5886 offset = offset - object->paging_offset;
91447636 5887
91447636 5888 if (super_cluster > size) {
1c79356b
A
5889
5890 vm_object_offset_t base_offset;
91447636 5891 upl_size_t super_size;
b0d623f7 5892 vm_object_size_t super_size_64;
1c79356b 5893
2d21ac55
A
5894 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
5895 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
6d2010ae 5896 super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size;
b0d623f7
A
5897 super_size = (upl_size_t) super_size_64;
5898 assert(super_size == super_size_64);
2d21ac55
A
5899
5900 if (offset > (base_offset + super_size)) {
5901 panic("vm_object_super_upl_request: Missed target pageout"
5902 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
5903 offset, base_offset, super_size, super_cluster,
5904 size, object->paging_offset);
5905 }
91447636
A
5906 /*
5907 * apparently there is a case where the vm requests a
5908 * page to be written out who's offset is beyond the
5909 * object size
5910 */
b0d623f7
A
5911 if ((offset + size) > (base_offset + super_size)) {
5912 super_size_64 = (offset + size) - base_offset;
5913 super_size = (upl_size_t) super_size_64;
5914 assert(super_size == super_size_64);
5915 }
1c79356b
A
5916
5917 offset = base_offset;
5918 size = super_size;
5919 }
2d21ac55 5920 return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags);
1c79356b
A
5921}
5922
b0d623f7 5923
91447636
A
5924kern_return_t
5925vm_map_create_upl(
5926 vm_map_t map,
5927 vm_map_address_t offset,
5928 upl_size_t *upl_size,
5929 upl_t *upl,
5930 upl_page_info_array_t page_list,
5931 unsigned int *count,
3e170ce0 5932 upl_control_flags_t *flags)
91447636 5933{
3e170ce0
A
5934 vm_map_entry_t entry;
5935 upl_control_flags_t caller_flags;
5936 int force_data_sync;
5937 int sync_cow_data;
5938 vm_object_t local_object;
5939 vm_map_offset_t local_offset;
5940 vm_map_offset_t local_start;
5941 kern_return_t ret;
91447636
A
5942
5943 caller_flags = *flags;
5944
5945 if (caller_flags & ~UPL_VALID_FLAGS) {
5946 /*
5947 * For forward compatibility's sake,
5948 * reject any unknown flag.
5949 */
5950 return KERN_INVALID_VALUE;
5951 }
91447636
A
5952 force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
5953 sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
5954
2d21ac55 5955 if (upl == NULL)
91447636
A
5956 return KERN_INVALID_ARGUMENT;
5957
91447636 5958REDISCOVER_ENTRY:
b0d623f7 5959 vm_map_lock_read(map);
2d21ac55 5960
3e170ce0
A
5961 if (!vm_map_lookup_entry(map, offset, &entry)) {
5962 vm_map_unlock_read(map);
5963 return KERN_FAILURE;
5964 }
2d21ac55 5965
3e170ce0
A
5966 if ((entry->vme_end - offset) < *upl_size) {
5967 *upl_size = (upl_size_t) (entry->vme_end - offset);
5968 assert(*upl_size == entry->vme_end - offset);
5969 }
5970
5971 if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
5972 *flags = 0;
5973
5974 if (!entry->is_sub_map &&
5975 VME_OBJECT(entry) != VM_OBJECT_NULL) {
5976 if (VME_OBJECT(entry)->private)
5977 *flags = UPL_DEV_MEMORY;
5978
5979 if (VME_OBJECT(entry)->phys_contiguous)
5980 *flags |= UPL_PHYS_CONTIG;
b0d623f7 5981 }
3e170ce0
A
5982 vm_map_unlock_read(map);
5983 return KERN_SUCCESS;
5984 }
2d21ac55 5985
3e170ce0
A
5986 if (entry->is_sub_map) {
5987 vm_map_t submap;
2d21ac55 5988
3e170ce0
A
5989 submap = VME_SUBMAP(entry);
5990 local_start = entry->vme_start;
5991 local_offset = VME_OFFSET(entry);
2d21ac55 5992
3e170ce0
A
5993 vm_map_reference(submap);
5994 vm_map_unlock_read(map);
2d21ac55 5995
3e170ce0
A
5996 ret = vm_map_create_upl(submap,
5997 local_offset + (offset - local_start),
5998 upl_size, upl, page_list, count, flags);
5999 vm_map_deallocate(submap);
e2d2fc5c 6000
3e170ce0
A
6001 return ret;
6002 }
e2d2fc5c 6003
3e170ce0
A
6004 if (VME_OBJECT(entry) == VM_OBJECT_NULL ||
6005 !VME_OBJECT(entry)->phys_contiguous) {
6006 if (*upl_size > MAX_UPL_SIZE_BYTES)
6007 *upl_size = MAX_UPL_SIZE_BYTES;
6008 }
e2d2fc5c 6009
3e170ce0
A
6010 /*
6011 * Create an object if necessary.
6012 */
6013 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
e2d2fc5c 6014
3e170ce0
A
6015 if (vm_map_lock_read_to_write(map))
6016 goto REDISCOVER_ENTRY;
e2d2fc5c 6017
3e170ce0
A
6018 VME_OBJECT_SET(entry,
6019 vm_object_allocate((vm_size_t)
6020 (entry->vme_end -
6021 entry->vme_start)));
6022 VME_OFFSET_SET(entry, 0);
e2d2fc5c 6023
3e170ce0
A
6024 vm_map_lock_write_to_read(map);
6025 }
b0d623f7 6026
3e170ce0
A
6027 if (!(caller_flags & UPL_COPYOUT_FROM) &&
6028 !(entry->protection & VM_PROT_WRITE)) {
6029 vm_map_unlock_read(map);
6030 return KERN_PROTECTION_FAILURE;
6031 }
6032
6033 local_object = VME_OBJECT(entry);
6034 assert(local_object != VM_OBJECT_NULL);
6035
6036 if (*upl_size != 0 &&
6037 local_object->vo_size > *upl_size && /* partial UPL */
6038 entry->wired_count == 0 && /* No COW for entries that are wired */
6039 (map->pmap != kernel_pmap) && /* alias checks */
6040 (vm_map_entry_should_cow_for_true_share(entry) /* case 1 */
6041 ||
6042 (!entry->needs_copy && /* case 2 */
6043 local_object->internal &&
6044 (local_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) &&
6045 local_object->ref_count > 1))) {
6046 vm_prot_t prot;
b0d623f7 6047
3e170ce0
A
6048 /*
6049 * Case 1:
6050 * Set up the targeted range for copy-on-write to avoid
6051 * applying true_share/copy_delay to the entire object.
6052 *
6053 * Case 2:
6054 * This map entry covers only part of an internal
6055 * object. There could be other map entries covering
6056 * other areas of this object and some of these map
6057 * entries could be marked as "needs_copy", which
6058 * assumes that the object is COPY_SYMMETRIC.
6059 * To avoid marking this object as COPY_DELAY and
6060 * "true_share", let's shadow it and mark the new
6061 * (smaller) object as "true_share" and COPY_DELAY.
6062 */
b0d623f7 6063
3e170ce0
A
6064 if (vm_map_lock_read_to_write(map)) {
6065 goto REDISCOVER_ENTRY;
91447636 6066 }
3e170ce0
A
6067 vm_map_lock_assert_exclusive(map);
6068 assert(VME_OBJECT(entry) == local_object);
6069
6070 vm_map_clip_start(map,
6071 entry,
6072 vm_map_trunc_page(offset,
6073 VM_MAP_PAGE_MASK(map)));
6074 vm_map_clip_end(map,
6075 entry,
6076 vm_map_round_page(offset + *upl_size,
6077 VM_MAP_PAGE_MASK(map)));
6078 if ((entry->vme_end - offset) < *upl_size) {
6079 *upl_size = (upl_size_t) (entry->vme_end - offset);
6080 assert(*upl_size == entry->vme_end - offset);
fe8ab488 6081 }
e2d2fc5c 6082
3e170ce0
A
6083 prot = entry->protection & ~VM_PROT_WRITE;
6084 if (override_nx(map, VME_ALIAS(entry)) && prot)
6085 prot |= VM_PROT_EXECUTE;
6086 vm_object_pmap_protect(local_object,
6087 VME_OFFSET(entry),
6088 entry->vme_end - entry->vme_start,
6089 ((entry->is_shared ||
6090 map->mapped_in_other_pmaps)
6091 ? PMAP_NULL
6092 : map->pmap),
6093 entry->vme_start,
6094 prot);
e2d2fc5c 6095
3e170ce0 6096 assert(entry->wired_count == 0);
e2d2fc5c 6097
3e170ce0
A
6098 /*
6099 * Lock the VM object and re-check its status: if it's mapped
6100 * in another address space, we could still be racing with
6101 * another thread holding that other VM map exclusively.
6102 */
6103 vm_object_lock(local_object);
6104 if (local_object->true_share) {
6105 /* object is already in proper state: no COW needed */
6106 assert(local_object->copy_strategy !=
6107 MEMORY_OBJECT_COPY_SYMMETRIC);
6108 } else {
6109 /* not true_share: ask for copy-on-write below */
6110 assert(local_object->copy_strategy ==
6111 MEMORY_OBJECT_COPY_SYMMETRIC);
fe8ab488 6112 entry->needs_copy = TRUE;
fe8ab488 6113 }
3e170ce0 6114 vm_object_unlock(local_object);
fe8ab488 6115
3e170ce0
A
6116 vm_map_lock_write_to_read(map);
6117 }
6118
6119 if (entry->needs_copy) {
6120 /*
6121 * Honor copy-on-write for COPY_SYMMETRIC
6122 * strategy.
6123 */
6124 vm_map_t local_map;
6125 vm_object_t object;
6126 vm_object_offset_t new_offset;
6127 vm_prot_t prot;
6128 boolean_t wired;
6129 vm_map_version_t version;
6130 vm_map_t real_map;
6131 vm_prot_t fault_type;
6132
6133 local_map = map;
6134
6135 if (caller_flags & UPL_COPYOUT_FROM) {
6136 fault_type = VM_PROT_READ | VM_PROT_COPY;
6137 vm_counters.create_upl_extra_cow++;
6138 vm_counters.create_upl_extra_cow_pages +=
6139 (entry->vme_end - entry->vme_start) / PAGE_SIZE;
6140 } else {
6141 fault_type = VM_PROT_WRITE;
6142 }
6143 if (vm_map_lookup_locked(&local_map,
6144 offset, fault_type,
6145 OBJECT_LOCK_EXCLUSIVE,
6146 &version, &object,
6147 &new_offset, &prot, &wired,
6148 NULL,
6149 &real_map) != KERN_SUCCESS) {
6150 if (fault_type == VM_PROT_WRITE) {
6151 vm_counters.create_upl_lookup_failure_write++;
fe8ab488 6152 } else {
3e170ce0 6153 vm_counters.create_upl_lookup_failure_copy++;
fe8ab488 6154 }
fe8ab488 6155 vm_map_unlock_read(local_map);
3e170ce0 6156 return KERN_FAILURE;
91447636 6157 }
3e170ce0
A
6158 if (real_map != map)
6159 vm_map_unlock(real_map);
6160 vm_map_unlock_read(local_map);
fe8ab488 6161
3e170ce0 6162 vm_object_unlock(object);
2d21ac55 6163
3e170ce0
A
6164 goto REDISCOVER_ENTRY;
6165 }
2d21ac55 6166
3e170ce0
A
6167 if (sync_cow_data &&
6168 (VME_OBJECT(entry)->shadow ||
6169 VME_OBJECT(entry)->copy)) {
6170 local_object = VME_OBJECT(entry);
6171 local_start = entry->vme_start;
6172 local_offset = VME_OFFSET(entry);
6173
6174 vm_object_reference(local_object);
6175 vm_map_unlock_read(map);
91447636 6176
3e170ce0
A
6177 if (local_object->shadow && local_object->copy) {
6178 vm_object_lock_request(local_object->shadow,
6179 ((vm_object_offset_t)
6180 ((offset - local_start) +
6181 local_offset) +
6182 local_object->vo_shadow_offset),
6183 *upl_size, FALSE,
2d21ac55
A
6184 MEMORY_OBJECT_DATA_SYNC,
6185 VM_PROT_NO_CHANGE);
91447636 6186 }
3e170ce0
A
6187 sync_cow_data = FALSE;
6188 vm_object_deallocate(local_object);
91447636 6189
3e170ce0
A
6190 goto REDISCOVER_ENTRY;
6191 }
6192 if (force_data_sync) {
6193 local_object = VME_OBJECT(entry);
91447636 6194 local_start = entry->vme_start;
3e170ce0 6195 local_offset = VME_OFFSET(entry);
2d21ac55 6196
91447636 6197 vm_object_reference(local_object);
b0d623f7 6198 vm_map_unlock_read(map);
2d21ac55 6199
3e170ce0
A
6200 vm_object_lock_request(local_object,
6201 ((vm_object_offset_t)
6202 ((offset - local_start) +
6203 local_offset)),
6204 (vm_object_size_t)*upl_size,
6205 FALSE,
6206 MEMORY_OBJECT_DATA_SYNC,
6207 VM_PROT_NO_CHANGE);
6208
6209 force_data_sync = FALSE;
91447636 6210 vm_object_deallocate(local_object);
2d21ac55 6211
3e170ce0
A
6212 goto REDISCOVER_ENTRY;
6213 }
6214 if (VME_OBJECT(entry)->private)
6215 *flags = UPL_DEV_MEMORY;
6216 else
6217 *flags = 0;
6218
6219 if (VME_OBJECT(entry)->phys_contiguous)
6220 *flags |= UPL_PHYS_CONTIG;
6221
6222 local_object = VME_OBJECT(entry);
6223 local_offset = VME_OFFSET(entry);
6224 local_start = entry->vme_start;
6225
6226 vm_object_lock(local_object);
6227
6228 /*
6229 * Ensure that this object is "true_share" and "copy_delay" now,
6230 * while we're still holding the VM map lock. After we unlock the map,
6231 * anything could happen to that mapping, including some copy-on-write
6232 * activity. We need to make sure that the IOPL will point at the
6233 * same memory as the mapping.
6234 */
6235 if (local_object->true_share) {
6236 assert(local_object->copy_strategy !=
6237 MEMORY_OBJECT_COPY_SYMMETRIC);
6238 } else if (local_object != kernel_object &&
6239 local_object != compressor_object &&
6240 !local_object->phys_contiguous) {
6241#if VM_OBJECT_TRACKING_OP_TRUESHARE
6242 if (!local_object->true_share &&
6243 vm_object_tracking_inited) {
6244 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
6245 int num = 0;
6246 num = OSBacktrace(bt,
6247 VM_OBJECT_TRACKING_BTDEPTH);
6248 btlog_add_entry(vm_object_tracking_btlog,
6249 local_object,
6250 VM_OBJECT_TRACKING_OP_TRUESHARE,
6251 bt,
6252 num);
6253 }
6254#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
6255 local_object->true_share = TRUE;
6256 if (local_object->copy_strategy ==
6257 MEMORY_OBJECT_COPY_SYMMETRIC) {
6258 local_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
6259 }
6260 }
6261
6262 vm_object_reference_locked(local_object);
6263 vm_object_unlock(local_object);
6264
b0d623f7 6265 vm_map_unlock_read(map);
1c79356b 6266
3e170ce0
A
6267 ret = vm_object_iopl_request(local_object,
6268 ((vm_object_offset_t)
6269 ((offset - local_start) + local_offset)),
6270 *upl_size,
6271 upl,
6272 page_list,
6273 count,
6274 caller_flags);
6275 vm_object_deallocate(local_object);
6276
6277 return ret;
91447636
A
6278}
6279
6280/*
6281 * Internal routine to enter a UPL into a VM map.
6282 *
6283 * JMM - This should just be doable through the standard
6284 * vm_map_enter() API.
6285 */
1c79356b 6286kern_return_t
91447636
A
6287vm_map_enter_upl(
6288 vm_map_t map,
6289 upl_t upl,
b0d623f7 6290 vm_map_offset_t *dst_addr)
1c79356b 6291{
91447636 6292 vm_map_size_t size;
1c79356b 6293 vm_object_offset_t offset;
91447636 6294 vm_map_offset_t addr;
1c79356b
A
6295 vm_page_t m;
6296 kern_return_t kr;
b0d623f7
A
6297 int isVectorUPL = 0, curr_upl=0;
6298 upl_t vector_upl = NULL;
6299 vm_offset_t vector_upl_dst_addr = 0;
6300 vm_map_t vector_upl_submap = NULL;
6301 upl_offset_t subupl_offset = 0;
6302 upl_size_t subupl_size = 0;
1c79356b 6303
0b4e3aa0
A
6304 if (upl == UPL_NULL)
6305 return KERN_INVALID_ARGUMENT;
6306
b0d623f7
A
6307 if((isVectorUPL = vector_upl_is_valid(upl))) {
6308 int mapped=0,valid_upls=0;
6309 vector_upl = upl;
6310
6311 upl_lock(vector_upl);
6312 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6313 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
6314 if(upl == NULL)
6315 continue;
6316 valid_upls++;
6317 if (UPL_PAGE_LIST_MAPPED & upl->flags)
6318 mapped++;
6319 }
6320
6321 if(mapped) {
6322 if(mapped != valid_upls)
6323 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls);
6324 else {
6325 upl_unlock(vector_upl);
6326 return KERN_FAILURE;
6327 }
6328 }
6329
6330 kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE, VM_FLAGS_ANYWHERE, &vector_upl_submap);
6331 if( kr != KERN_SUCCESS )
6332 panic("Vector UPL submap allocation failed\n");
6333 map = vector_upl_submap;
6334 vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
6335 curr_upl=0;
6336 }
6337 else
6338 upl_lock(upl);
6339
6340process_upl_to_enter:
6341 if(isVectorUPL){
6342 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6343 *dst_addr = vector_upl_dst_addr;
6344 upl_unlock(vector_upl);
6345 return KERN_SUCCESS;
6346 }
6347 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6348 if(upl == NULL)
6349 goto process_upl_to_enter;
6d2010ae 6350
b0d623f7
A
6351 vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
6352 *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
d41d1dae
A
6353 } else {
6354 /*
6355 * check to see if already mapped
6356 */
6357 if (UPL_PAGE_LIST_MAPPED & upl->flags) {
6358 upl_unlock(upl);
6359 return KERN_FAILURE;
6360 }
b0d623f7 6361 }
d41d1dae
A
6362 if ((!(upl->flags & UPL_SHADOWED)) &&
6363 ((upl->flags & UPL_HAS_BUSY) ||
6364 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) {
0b4e3aa0 6365
55e303ae
A
6366 vm_object_t object;
6367 vm_page_t alias_page;
6368 vm_object_offset_t new_offset;
b0d623f7 6369 unsigned int pg_num;
55e303ae
A
6370 wpl_array_t lite_list;
6371
2d21ac55 6372 if (upl->flags & UPL_INTERNAL) {
55e303ae 6373 lite_list = (wpl_array_t)
91447636 6374 ((((uintptr_t)upl) + sizeof(struct upl))
2d21ac55 6375 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
55e303ae 6376 } else {
2d21ac55 6377 lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
55e303ae
A
6378 }
6379 object = upl->map_object;
6380 upl->map_object = vm_object_allocate(upl->size);
2d21ac55 6381
55e303ae 6382 vm_object_lock(upl->map_object);
2d21ac55 6383
55e303ae
A
6384 upl->map_object->shadow = object;
6385 upl->map_object->pageout = TRUE;
6386 upl->map_object->can_persist = FALSE;
2d21ac55 6387 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
6d2010ae 6388 upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset;
55e303ae 6389 upl->map_object->wimg_bits = object->wimg_bits;
6d2010ae 6390 offset = upl->map_object->vo_shadow_offset;
55e303ae
A
6391 new_offset = 0;
6392 size = upl->size;
91447636 6393
2d21ac55 6394 upl->flags |= UPL_SHADOWED;
91447636 6395
2d21ac55 6396 while (size) {
b0d623f7
A
6397 pg_num = (unsigned int) (new_offset / PAGE_SIZE);
6398 assert(pg_num == new_offset / PAGE_SIZE);
55e303ae 6399
2d21ac55 6400 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
55e303ae 6401
2d21ac55 6402 VM_PAGE_GRAB_FICTITIOUS(alias_page);
91447636 6403
2d21ac55 6404 vm_object_lock(object);
91447636 6405
2d21ac55
A
6406 m = vm_page_lookup(object, offset);
6407 if (m == VM_PAGE_NULL) {
6408 panic("vm_upl_map: page missing\n");
6409 }
55e303ae 6410
2d21ac55
A
6411 /*
6412 * Convert the fictitious page to a private
6413 * shadow of the real page.
6414 */
6415 assert(alias_page->fictitious);
6416 alias_page->fictitious = FALSE;
6417 alias_page->private = TRUE;
6418 alias_page->pageout = TRUE;
6419 /*
6420 * since m is a page in the upl it must
6421 * already be wired or BUSY, so it's
6422 * safe to assign the underlying physical
6423 * page to the alias
6424 */
6425 alias_page->phys_page = m->phys_page;
6426
6427 vm_object_unlock(object);
6428
6429 vm_page_lockspin_queues();
3e170ce0 6430 vm_page_wire(alias_page, VM_KERN_MEMORY_NONE, TRUE);
2d21ac55
A
6431 vm_page_unlock_queues();
6432
6433 /*
6434 * ENCRYPTED SWAP:
6435 * The virtual page ("m") has to be wired in some way
6436 * here or its physical page ("m->phys_page") could
6437 * be recycled at any time.
6438 * Assuming this is enforced by the caller, we can't
6439 * get an encrypted page here. Since the encryption
6440 * key depends on the VM page's "pager" object and
6441 * the "paging_offset", we couldn't handle 2 pageable
6442 * VM pages (with different pagers and paging_offsets)
6443 * sharing the same physical page: we could end up
6444 * encrypting with one key (via one VM page) and
6445 * decrypting with another key (via the alias VM page).
6446 */
6447 ASSERT_PAGE_DECRYPTED(m);
55e303ae 6448
3e170ce0 6449 vm_page_insert_wired(alias_page, upl->map_object, new_offset, VM_KERN_MEMORY_NONE);
2d21ac55
A
6450
6451 assert(!alias_page->wanted);
6452 alias_page->busy = FALSE;
6453 alias_page->absent = FALSE;
6454 }
6455 size -= PAGE_SIZE;
6456 offset += PAGE_SIZE_64;
6457 new_offset += PAGE_SIZE_64;
55e303ae 6458 }
91447636 6459 vm_object_unlock(upl->map_object);
55e303ae 6460 }
d41d1dae 6461 if (upl->flags & UPL_SHADOWED)
55e303ae 6462 offset = 0;
d41d1dae
A
6463 else
6464 offset = upl->offset - upl->map_object->paging_offset;
6d2010ae 6465
1c79356b
A
6466 size = upl->size;
6467
2d21ac55 6468 vm_object_reference(upl->map_object);
1c79356b 6469
b0d623f7
A
6470 if(!isVectorUPL) {
6471 *dst_addr = 0;
6472 /*
6473 * NEED A UPL_MAP ALIAS
6474 */
6475 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
3e170ce0
A
6476 VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK),
6477 upl->map_object, offset, FALSE,
b0d623f7 6478 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
d41d1dae
A
6479
6480 if (kr != KERN_SUCCESS) {
6481 upl_unlock(upl);
6482 return(kr);
6483 }
b0d623f7
A
6484 }
6485 else {
6486 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
3e170ce0
A
6487 VM_FLAGS_FIXED | VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK),
6488 upl->map_object, offset, FALSE,
b0d623f7
A
6489 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6490 if(kr)
6491 panic("vm_map_enter failed for a Vector UPL\n");
6492 }
91447636
A
6493 vm_object_lock(upl->map_object);
6494
2d21ac55 6495 for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
1c79356b 6496 m = vm_page_lookup(upl->map_object, offset);
2d21ac55
A
6497
6498 if (m) {
2d21ac55 6499 m->pmapped = TRUE;
b0d623f7
A
6500
6501 /* CODE SIGNING ENFORCEMENT: page has been wpmapped,
6502 * but only in kernel space. If this was on a user map,
6503 * we'd have to set the wpmapped bit. */
6504 /* m->wpmapped = TRUE; */
fe8ab488 6505 assert(map->pmap == kernel_pmap);
9bccf70c 6506
fe8ab488 6507 PMAP_ENTER(map->pmap, addr, m, VM_PROT_DEFAULT, VM_PROT_NONE, 0, TRUE);
1c79356b 6508 }
2d21ac55 6509 offset += PAGE_SIZE_64;
1c79356b 6510 }
91447636
A
6511 vm_object_unlock(upl->map_object);
6512
2d21ac55
A
6513 /*
6514 * hold a reference for the mapping
6515 */
6516 upl->ref_count++;
1c79356b 6517 upl->flags |= UPL_PAGE_LIST_MAPPED;
b0d623f7
A
6518 upl->kaddr = (vm_offset_t) *dst_addr;
6519 assert(upl->kaddr == *dst_addr);
6520
d41d1dae 6521 if(isVectorUPL)
b0d623f7 6522 goto process_upl_to_enter;
2d21ac55 6523
d41d1dae
A
6524 upl_unlock(upl);
6525
1c79356b
A
6526 return KERN_SUCCESS;
6527}
6528
91447636
A
6529/*
6530 * Internal routine to remove a UPL mapping from a VM map.
6531 *
6532 * XXX - This should just be doable through a standard
6533 * vm_map_remove() operation. Otherwise, implicit clean-up
6534 * of the target map won't be able to correctly remove
6535 * these (and release the reference on the UPL). Having
6536 * to do this means we can't map these into user-space
6537 * maps yet.
6538 */
1c79356b 6539kern_return_t
91447636 6540vm_map_remove_upl(
1c79356b
A
6541 vm_map_t map,
6542 upl_t upl)
6543{
0b4e3aa0 6544 vm_address_t addr;
91447636 6545 upl_size_t size;
b0d623f7
A
6546 int isVectorUPL = 0, curr_upl = 0;
6547 upl_t vector_upl = NULL;
1c79356b 6548
0b4e3aa0
A
6549 if (upl == UPL_NULL)
6550 return KERN_INVALID_ARGUMENT;
6551
b0d623f7
A
6552 if((isVectorUPL = vector_upl_is_valid(upl))) {
6553 int unmapped=0, valid_upls=0;
6554 vector_upl = upl;
6555 upl_lock(vector_upl);
6556 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6557 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
6558 if(upl == NULL)
6559 continue;
6560 valid_upls++;
6561 if (!(UPL_PAGE_LIST_MAPPED & upl->flags))
6562 unmapped++;
6563 }
6564
6565 if(unmapped) {
6566 if(unmapped != valid_upls)
6567 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls);
6568 else {
6569 upl_unlock(vector_upl);
6570 return KERN_FAILURE;
6571 }
6572 }
6573 curr_upl=0;
6574 }
6575 else
6576 upl_lock(upl);
6577
6578process_upl_to_remove:
6579 if(isVectorUPL) {
6580 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6581 vm_map_t v_upl_submap;
6582 vm_offset_t v_upl_submap_dst_addr;
6583 vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
6584
6585 vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_NO_FLAGS);
6586 vm_map_deallocate(v_upl_submap);
6587 upl_unlock(vector_upl);
6588 return KERN_SUCCESS;
6589 }
6590
6591 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6592 if(upl == NULL)
6593 goto process_upl_to_remove;
6594 }
2d21ac55
A
6595
6596 if (upl->flags & UPL_PAGE_LIST_MAPPED) {
0b4e3aa0 6597 addr = upl->kaddr;
1c79356b 6598 size = upl->size;
2d21ac55 6599
0b4e3aa0
A
6600 assert(upl->ref_count > 1);
6601 upl->ref_count--; /* removing mapping ref */
2d21ac55 6602
1c79356b
A
6603 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
6604 upl->kaddr = (vm_offset_t) 0;
b0d623f7
A
6605
6606 if(!isVectorUPL) {
6607 upl_unlock(upl);
6608
39236c6e
A
6609 vm_map_remove(
6610 map,
6611 vm_map_trunc_page(addr,
6612 VM_MAP_PAGE_MASK(map)),
6613 vm_map_round_page(addr + size,
6614 VM_MAP_PAGE_MASK(map)),
b0d623f7
A
6615 VM_MAP_NO_FLAGS);
6616
6617 return KERN_SUCCESS;
6618 }
6619 else {
6620 /*
6621 * If it's a Vectored UPL, we'll be removing the entire
6622 * submap anyways, so no need to remove individual UPL
6623 * element mappings from within the submap
6624 */
6625 goto process_upl_to_remove;
6626 }
1c79356b 6627 }
0b4e3aa0 6628 upl_unlock(upl);
2d21ac55 6629
0b4e3aa0 6630 return KERN_FAILURE;
1c79356b
A
6631}
6632
6633kern_return_t
0b4e3aa0 6634upl_commit_range(
1c79356b 6635 upl_t upl,
91447636
A
6636 upl_offset_t offset,
6637 upl_size_t size,
1c79356b 6638 int flags,
0b4e3aa0
A
6639 upl_page_info_t *page_list,
6640 mach_msg_type_number_t count,
6641 boolean_t *empty)
1c79356b 6642{
b0d623f7 6643 upl_size_t xfer_size, subupl_size = size;
55e303ae 6644 vm_object_t shadow_object;
2d21ac55 6645 vm_object_t object;
1c79356b 6646 vm_object_offset_t target_offset;
b0d623f7 6647 upl_offset_t subupl_offset = offset;
1c79356b 6648 int entry;
55e303ae
A
6649 wpl_array_t lite_list;
6650 int occupied;
91447636 6651 int clear_refmod = 0;
2d21ac55 6652 int pgpgout_count = 0;
6d2010ae
A
6653 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
6654 struct vm_page_delayed_work *dwp;
6655 int dw_count;
6656 int dw_limit;
6657 int isVectorUPL = 0;
b0d623f7 6658 upl_t vector_upl = NULL;
6d2010ae 6659 boolean_t should_be_throttled = FALSE;
1c79356b 6660
fe8ab488
A
6661 vm_page_t nxt_page = VM_PAGE_NULL;
6662 int fast_path_possible = 0;
6663 int fast_path_full_commit = 0;
6664 int throttle_page = 0;
6665 int unwired_count = 0;
6666 int local_queue_count = 0;
6667 queue_head_t local_queue;
6668
0b4e3aa0
A
6669 *empty = FALSE;
6670
6671 if (upl == UPL_NULL)
6672 return KERN_INVALID_ARGUMENT;
6673
6674 if (count == 0)
6675 page_list = NULL;
6676
b0d623f7
A
6677 if((isVectorUPL = vector_upl_is_valid(upl))) {
6678 vector_upl = upl;
6679 upl_lock(vector_upl);
6680 }
6681 else
6682 upl_lock(upl);
6683
6684process_upl_to_commit:
6685
6686 if(isVectorUPL) {
6687 size = subupl_size;
6688 offset = subupl_offset;
6689 if(size == 0) {
6690 upl_unlock(vector_upl);
6691 return KERN_SUCCESS;
6692 }
6693 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
6694 if(upl == NULL) {
6695 upl_unlock(vector_upl);
6696 return KERN_FAILURE;
6697 }
6698 page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl);
6699 subupl_size -= size;
6700 subupl_offset += size;
6701 }
6702
6703#if UPL_DEBUG
6704 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
6705 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
6706
6707 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
6708 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
6709
6710 upl->upl_commit_index++;
6711 }
6712#endif
2d21ac55
A
6713 if (upl->flags & UPL_DEVICE_MEMORY)
6714 xfer_size = 0;
6715 else if ((offset + size) <= upl->size)
6716 xfer_size = size;
b0d623f7
A
6717 else {
6718 if(!isVectorUPL)
6719 upl_unlock(upl);
6720 else {
6721 upl_unlock(vector_upl);
6722 }
2d21ac55 6723 return KERN_FAILURE;
91447636 6724 }
6d2010ae
A
6725 if (upl->flags & UPL_SET_DIRTY)
6726 flags |= UPL_COMMIT_SET_DIRTY;
55e303ae
A
6727 if (upl->flags & UPL_CLEAR_DIRTY)
6728 flags |= UPL_COMMIT_CLEAR_DIRTY;
6729
2d21ac55
A
6730 if (upl->flags & UPL_INTERNAL)
6731 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
6732 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6733 else
6734 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
1c79356b 6735
2d21ac55
A
6736 object = upl->map_object;
6737
6738 if (upl->flags & UPL_SHADOWED) {
6739 vm_object_lock(object);
6740 shadow_object = object->shadow;
55e303ae 6741 } else {
2d21ac55 6742 shadow_object = object;
55e303ae 6743 }
1c79356b
A
6744 entry = offset/PAGE_SIZE;
6745 target_offset = (vm_object_offset_t)offset;
55e303ae 6746
3e170ce0
A
6747 assert(!(target_offset & PAGE_MASK));
6748 assert(!(xfer_size & PAGE_MASK));
6749
b0d623f7
A
6750 if (upl->flags & UPL_KERNEL_OBJECT)
6751 vm_object_lock_shared(shadow_object);
6752 else
6753 vm_object_lock(shadow_object);
4a3eedf9 6754
b0d623f7
A
6755 if (upl->flags & UPL_ACCESS_BLOCKED) {
6756 assert(shadow_object->blocked_access);
6757 shadow_object->blocked_access = FALSE;
6758 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
4a3eedf9 6759 }
4a3eedf9 6760
593a1d5f
A
6761 if (shadow_object->code_signed) {
6762 /*
6763 * CODE SIGNING:
6764 * If the object is code-signed, do not let this UPL tell
6765 * us if the pages are valid or not. Let the pages be
6766 * validated by VM the normal way (when they get mapped or
6767 * copied).
6768 */
6769 flags &= ~UPL_COMMIT_CS_VALIDATED;
6770 }
6771 if (! page_list) {
6772 /*
6773 * No page list to get the code-signing info from !?
6774 */
6775 flags &= ~UPL_COMMIT_CS_VALIDATED;
6776 }
6d2010ae
A
6777 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && shadow_object->internal)
6778 should_be_throttled = TRUE;
593a1d5f 6779
b0d623f7
A
6780 dwp = &dw_array[0];
6781 dw_count = 0;
6d2010ae 6782 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
b0d623f7 6783
fe8ab488
A
6784 if ((upl->flags & UPL_IO_WIRE) &&
6785 !(flags & UPL_COMMIT_FREE_ABSENT) &&
6786 !isVectorUPL &&
6787 shadow_object->purgable != VM_PURGABLE_VOLATILE &&
6788 shadow_object->purgable != VM_PURGABLE_EMPTY) {
6789
6790 if (!queue_empty(&shadow_object->memq)) {
6791 queue_init(&local_queue);
6792 if (size == shadow_object->vo_size) {
6793 nxt_page = (vm_page_t)queue_first(&shadow_object->memq);
6794 fast_path_full_commit = 1;
6795 }
6796 fast_path_possible = 1;
6797
6798 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && shadow_object->internal &&
6799 (shadow_object->purgable == VM_PURGABLE_DENY ||
6800 shadow_object->purgable == VM_PURGABLE_NONVOLATILE ||
6801 shadow_object->purgable == VM_PURGABLE_VOLATILE)) {
6802 throttle_page = 1;
6803 }
6804 }
6805 }
6806
91447636 6807 while (xfer_size) {
2d21ac55
A
6808 vm_page_t t, m;
6809
b0d623f7
A
6810 dwp->dw_mask = 0;
6811 clear_refmod = 0;
6812
55e303ae 6813 m = VM_PAGE_NULL;
d7e50217 6814
55e303ae 6815 if (upl->flags & UPL_LITE) {
b0d623f7 6816 unsigned int pg_num;
55e303ae 6817
fe8ab488
A
6818 if (nxt_page != VM_PAGE_NULL) {
6819 m = nxt_page;
6820 nxt_page = (vm_page_t)queue_next(&nxt_page->listq);
6821 target_offset = m->offset;
6822 }
b0d623f7
A
6823 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
6824 assert(pg_num == target_offset/PAGE_SIZE);
55e303ae
A
6825
6826 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6827 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
2d21ac55 6828
fe8ab488 6829 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
b0d623f7 6830 m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
fe8ab488
A
6831 } else
6832 m = NULL;
55e303ae 6833 }
2d21ac55
A
6834 if (upl->flags & UPL_SHADOWED) {
6835 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
6836
55e303ae
A
6837 t->pageout = FALSE;
6838
b0d623f7 6839 VM_PAGE_FREE(t);
55e303ae 6840
fe8ab488 6841 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6d2010ae 6842 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
55e303ae
A
6843 }
6844 }
fe8ab488 6845 if (m == VM_PAGE_NULL)
593a1d5f 6846 goto commit_next_page;
55e303ae 6847
39236c6e
A
6848 if (m->compressor) {
6849 assert(m->busy);
6850
6851 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6852 goto commit_next_page;
6853 }
6854
593a1d5f
A
6855 if (flags & UPL_COMMIT_CS_VALIDATED) {
6856 /*
6857 * CODE SIGNING:
6858 * Set the code signing bits according to
6859 * what the UPL says they should be.
6860 */
6861 m->cs_validated = page_list[entry].cs_validated;
6862 m->cs_tainted = page_list[entry].cs_tainted;
c18c124e 6863 m->cs_nx = page_list[entry].cs_nx;
593a1d5f 6864 }
15129b1c 6865 if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL)
fe8ab488 6866 m->written_by_kernel = TRUE;
15129b1c 6867
593a1d5f 6868 if (upl->flags & UPL_IO_WIRE) {
55e303ae 6869
593a1d5f
A
6870 if (page_list)
6871 page_list[entry].phys_addr = 0;
2d21ac55 6872
6d2010ae 6873 if (flags & UPL_COMMIT_SET_DIRTY) {
316670eb 6874 SET_PAGE_DIRTY(m, FALSE);
6d2010ae 6875 } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
593a1d5f 6876 m->dirty = FALSE;
b0d623f7 6877
593a1d5f
A
6878 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6879 m->cs_validated && !m->cs_tainted) {
4a3eedf9
A
6880 /*
6881 * CODE SIGNING:
6882 * This page is no longer dirty
6883 * but could have been modified,
6884 * so it will need to be
6885 * re-validated.
6886 */
fe8ab488 6887 if (m->slid) {
15129b1c
A
6888 panic("upl_commit_range(%p): page %p was slid\n",
6889 upl, m);
6890 }
6891 assert(!m->slid);
4a3eedf9 6892 m->cs_validated = FALSE;
b0d623f7 6893#if DEVELOPMENT || DEBUG
4a3eedf9 6894 vm_cs_validated_resets++;
b0d623f7
A
6895#endif
6896 pmap_disconnect(m->phys_page);
4a3eedf9 6897 }
91447636 6898 clear_refmod |= VM_MEM_MODIFIED;
55e303ae 6899 }
b0d623f7 6900 if (upl->flags & UPL_ACCESS_BLOCKED) {
593a1d5f
A
6901 /*
6902 * We blocked access to the pages in this UPL.
6903 * Clear the "busy" bit and wake up any waiter
6904 * for this page.
6905 */
b0d623f7 6906 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
593a1d5f 6907 }
fe8ab488
A
6908 if (fast_path_possible) {
6909 assert(m->object->purgable != VM_PURGABLE_EMPTY);
6910 assert(m->object->purgable != VM_PURGABLE_VOLATILE);
6911 if (m->absent) {
6912 assert(m->wire_count == 0);
6913 assert(m->busy);
6914
0b4c1975 6915 m->absent = FALSE;
d41d1dae 6916 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
fe8ab488
A
6917 } else {
6918 if (m->wire_count == 0)
6919 panic("wire_count == 0, m = %p, obj = %p\n", m, shadow_object);
6920
6921 /*
6922 * XXX FBDP need to update some other
6923 * counters here (purgeable_wired_count)
6924 * (ledgers), ...
6925 */
6926 assert(m->wire_count);
6927 m->wire_count--;
7ddcb079 6928
fe8ab488
A
6929 if (m->wire_count == 0)
6930 unwired_count++;
d41d1dae 6931 }
fe8ab488
A
6932 if (m->wire_count == 0) {
6933 queue_enter(&local_queue, m, vm_page_t, pageq);
6934 local_queue_count++;
d41d1dae 6935
fe8ab488
A
6936 if (throttle_page) {
6937 m->throttled = TRUE;
6938 } else {
6939 if (flags & UPL_COMMIT_INACTIVATE)
6940 m->inactive = TRUE;
6941 else
6942 m->active = TRUE;
6943 }
6944 }
6945 } else {
6946 if (flags & UPL_COMMIT_INACTIVATE) {
6947 dwp->dw_mask |= DW_vm_page_deactivate_internal;
6948 clear_refmod |= VM_MEM_REFERENCED;
6949 }
6950 if (m->absent) {
6951 if (flags & UPL_COMMIT_FREE_ABSENT)
6952 dwp->dw_mask |= DW_vm_page_free;
6953 else {
6954 m->absent = FALSE;
6955 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6956
6957 if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal))
6958 dwp->dw_mask |= DW_vm_page_activate;
6959 }
6960 } else
6961 dwp->dw_mask |= DW_vm_page_unwire;
6962 }
593a1d5f
A
6963 goto commit_next_page;
6964 }
39236c6e
A
6965 assert(!m->compressor);
6966
316670eb
A
6967 if (page_list)
6968 page_list[entry].phys_addr = 0;
6969
593a1d5f
A
6970 /*
6971 * make sure to clear the hardware
6972 * modify or reference bits before
6973 * releasing the BUSY bit on this page
6974 * otherwise we risk losing a legitimate
6975 * change of state
6976 */
6977 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6978 m->dirty = FALSE;
2d21ac55 6979
593a1d5f
A
6980 clear_refmod |= VM_MEM_MODIFIED;
6981 }
316670eb
A
6982 if (m->laundry)
6983 dwp->dw_mask |= DW_vm_pageout_throttle_up;
b0d623f7 6984
316670eb
A
6985 if (VM_PAGE_WIRED(m))
6986 m->pageout = FALSE;
6987
6988 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6989 m->cs_validated && !m->cs_tainted) {
6990 /*
6991 * CODE SIGNING:
6992 * This page is no longer dirty
6993 * but could have been modified,
6994 * so it will need to be
6995 * re-validated.
6996 */
fe8ab488 6997 if (m->slid) {
15129b1c
A
6998 panic("upl_commit_range(%p): page %p was slid\n",
6999 upl, m);
7000 }
7001 assert(!m->slid);
316670eb
A
7002 m->cs_validated = FALSE;
7003#if DEVELOPMENT || DEBUG
7004 vm_cs_validated_resets++;
7005#endif
7006 pmap_disconnect(m->phys_page);
7007 }
7008 if (m->overwriting) {
7009 /*
7010 * the (COPY_OUT_FROM == FALSE) request_page_list case
7011 */
7012 if (m->busy) {
fe8ab488
A
7013#if CONFIG_PHANTOM_CACHE
7014 if (m->absent && !m->object->internal)
7015 dwp->dw_mask |= DW_vm_phantom_cache_update;
7016#endif
593a1d5f 7017 m->absent = FALSE;
b0d623f7 7018
316670eb
A
7019 dwp->dw_mask |= DW_clear_busy;
7020 } else {
7021 /*
7022 * alternate (COPY_OUT_FROM == FALSE) page_list case
7023 * Occurs when the original page was wired
7024 * at the time of the list request
7025 */
7026 assert(VM_PAGE_WIRED(m));
7027
7028 dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */
593a1d5f 7029 }
316670eb 7030 m->overwriting = FALSE;
593a1d5f 7031 }
316670eb
A
7032 if (m->encrypted_cleaning == TRUE) {
7033 m->encrypted_cleaning = FALSE;
2d21ac55 7034
316670eb
A
7035 dwp->dw_mask |= DW_clear_busy | DW_PAGE_WAKEUP;
7036 }
7037 m->cleaning = FALSE;
91447636 7038
593a1d5f 7039 if (m->pageout) {
316670eb
A
7040 /*
7041 * With the clean queue enabled, UPL_PAGEOUT should
7042 * no longer set the pageout bit. It's pages now go
7043 * to the clean queue.
7044 */
7045 assert(!(flags & UPL_PAGEOUT));
7046
593a1d5f 7047 m->pageout = FALSE;
1c79356b 7048#if MACH_CLUSTER_STATS
593a1d5f 7049 if (m->wanted) vm_pageout_target_collisions++;
1c79356b 7050#endif
b0d623f7 7051 if ((flags & UPL_COMMIT_SET_DIRTY) ||
316670eb 7052 (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))) {
593a1d5f
A
7053 /*
7054 * page was re-dirtied after we started
7055 * the pageout... reactivate it since
7056 * we don't know whether the on-disk
7057 * copy matches what is now in memory
2d21ac55 7058 */
316670eb
A
7059 SET_PAGE_DIRTY(m, FALSE);
7060
7061 dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP;
b0d623f7 7062
593a1d5f
A
7063 if (upl->flags & UPL_PAGEOUT) {
7064 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
7065 VM_STAT_INCR(reactivations);
7066 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
7067 }
593a1d5f
A
7068 } else {
7069 /*
7070 * page has been successfully cleaned
7071 * go ahead and free it for other use
2d21ac55 7072 */
593a1d5f
A
7073 if (m->object->internal) {
7074 DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
7075 } else {
7076 DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
7077 }
316670eb
A
7078 m->dirty = FALSE;
7079 m->busy = TRUE;
b0d623f7 7080
316670eb 7081 dwp->dw_mask |= DW_vm_page_free;
de355530 7082 }
593a1d5f
A
7083 goto commit_next_page;
7084 }
7085#if MACH_CLUSTER_STATS
7086 if (m->wpmapped)
7087 m->dirty = pmap_is_modified(m->phys_page);
7088
7089 if (m->dirty) vm_pageout_cluster_dirtied++;
7090 else vm_pageout_cluster_cleaned++;
7091 if (m->wanted) vm_pageout_cluster_collisions++;
7092#endif
593a1d5f
A
7093 /*
7094 * It is a part of the semantic of COPYOUT_FROM
7095 * UPLs that a commit implies cache sync
7096 * between the vm page and the backing store
7097 * this can be used to strip the precious bit
7098 * as well as clean
7099 */
b0d623f7 7100 if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS))
593a1d5f 7101 m->precious = FALSE;
b0d623f7 7102
316670eb
A
7103 if (flags & UPL_COMMIT_SET_DIRTY) {
7104 SET_PAGE_DIRTY(m, FALSE);
7105 } else {
7106 m->dirty = FALSE;
7107 }
7108
7109 /* with the clean queue on, move *all* cleaned pages to the clean queue */
7110 if (hibernate_cleaning_in_progress == FALSE && !m->dirty && (upl->flags & UPL_PAGEOUT)) {
7111 pgpgout_count++;
7112
fe8ab488
A
7113 VM_STAT_INCR(pageouts);
7114 DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
b0d623f7 7115
316670eb
A
7116 dwp->dw_mask |= DW_enqueue_cleaned;
7117 vm_pageout_enqueued_cleaned_from_inactive_dirty++;
7118 } else if (should_be_throttled == TRUE && !m->active && !m->inactive && !m->speculative && !m->throttled) {
6d2010ae
A
7119 /*
7120 * page coming back in from being 'frozen'...
7121 * it was dirty before it was frozen, so keep it so
7122 * the vm_page_activate will notice that it really belongs
7123 * on the throttle queue and put it there
7124 */
316670eb 7125 SET_PAGE_DIRTY(m, FALSE);
6d2010ae 7126 dwp->dw_mask |= DW_vm_page_activate;
b0d623f7 7127
6d2010ae
A
7128 } else {
7129 if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) {
b0d623f7
A
7130 dwp->dw_mask |= DW_vm_page_deactivate_internal;
7131 clear_refmod |= VM_MEM_REFERENCED;
6d2010ae
A
7132 } else if (!m->active && !m->inactive && !m->speculative) {
7133
7134 if (m->clustered || (flags & UPL_COMMIT_SPECULATE))
7135 dwp->dw_mask |= DW_vm_page_speculate;
7136 else if (m->reference)
7137 dwp->dw_mask |= DW_vm_page_activate;
7138 else {
7139 dwp->dw_mask |= DW_vm_page_deactivate_internal;
7140 clear_refmod |= VM_MEM_REFERENCED;
7141 }
b0d623f7 7142 }
593a1d5f 7143 }
b0d623f7 7144 if (upl->flags & UPL_ACCESS_BLOCKED) {
2d21ac55 7145 /*
593a1d5f
A
7146 * We blocked access to the pages in this URL.
7147 * Clear the "busy" bit on this page before we
7148 * wake up any waiter.
2d21ac55 7149 */
b0d623f7 7150 dwp->dw_mask |= DW_clear_busy;
1c79356b 7151 }
593a1d5f
A
7152 /*
7153 * Wakeup any thread waiting for the page to be un-cleaning.
7154 */
b0d623f7 7155 dwp->dw_mask |= DW_PAGE_WAKEUP;
593a1d5f 7156
2d21ac55 7157commit_next_page:
b0d623f7
A
7158 if (clear_refmod)
7159 pmap_clear_refmod(m->phys_page, clear_refmod);
7160
1c79356b
A
7161 target_offset += PAGE_SIZE_64;
7162 xfer_size -= PAGE_SIZE;
7163 entry++;
2d21ac55 7164
b0d623f7
A
7165 if (dwp->dw_mask) {
7166 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
6d2010ae 7167 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
4a3eedf9 7168
6d2010ae 7169 if (dw_count >= dw_limit) {
3e170ce0 7170 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
b0d623f7
A
7171
7172 dwp = &dw_array[0];
7173 dw_count = 0;
7174 }
7175 } else {
7176 if (dwp->dw_mask & DW_clear_busy)
7177 m->busy = FALSE;
7178
7179 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7180 PAGE_WAKEUP(m);
4a3eedf9 7181 }
2d21ac55 7182 }
1c79356b 7183 }
b0d623f7 7184 if (dw_count)
3e170ce0 7185 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
55e303ae 7186
fe8ab488
A
7187 if (fast_path_possible) {
7188
7189 assert(shadow_object->purgable != VM_PURGABLE_VOLATILE);
7190 assert(shadow_object->purgable != VM_PURGABLE_EMPTY);
7191
7192 if (local_queue_count || unwired_count) {
7193
7194 if (local_queue_count) {
7195 vm_page_t first_local, last_local;
7196 vm_page_t first_target;
7197 queue_head_t *target_queue;
7198
7199 if (throttle_page)
7200 target_queue = &vm_page_queue_throttled;
7201 else {
7202 if (flags & UPL_COMMIT_INACTIVATE) {
7203 if (shadow_object->internal)
7204 target_queue = &vm_page_queue_anonymous;
7205 else
7206 target_queue = &vm_page_queue_inactive;
7207 } else
7208 target_queue = &vm_page_queue_active;
7209 }
7210 /*
7211 * Transfer the entire local queue to a regular LRU page queues.
7212 */
7213 first_local = (vm_page_t) queue_first(&local_queue);
7214 last_local = (vm_page_t) queue_last(&local_queue);
7215
7216 vm_page_lockspin_queues();
7217
7218 first_target = (vm_page_t) queue_first(target_queue);
7219
7220 if (queue_empty(target_queue))
7221 queue_last(target_queue) = (queue_entry_t) last_local;
7222 else
7223 queue_prev(&first_target->pageq) = (queue_entry_t) last_local;
7224
7225 queue_first(target_queue) = (queue_entry_t) first_local;
7226 queue_prev(&first_local->pageq) = (queue_entry_t) target_queue;
7227 queue_next(&last_local->pageq) = (queue_entry_t) first_target;
7228
7229 /*
7230 * Adjust the global page counts.
7231 */
7232 if (throttle_page) {
7233 vm_page_throttled_count += local_queue_count;
7234 } else {
7235 if (flags & UPL_COMMIT_INACTIVATE) {
7236 if (shadow_object->internal)
7237 vm_page_anonymous_count += local_queue_count;
7238 vm_page_inactive_count += local_queue_count;
7239
7240 token_new_pagecount += local_queue_count;
7241 } else
7242 vm_page_active_count += local_queue_count;
7243
7244 if (shadow_object->internal)
7245 vm_page_pageable_internal_count += local_queue_count;
7246 else
7247 vm_page_pageable_external_count += local_queue_count;
7248 }
7249 } else {
7250 vm_page_lockspin_queues();
7251 }
7252 if (unwired_count) {
7253 vm_page_wire_count -= unwired_count;
7254 VM_CHECK_MEMORYSTATUS;
7255 }
7256 vm_page_unlock_queues();
7257
7258 shadow_object->wired_page_count -= unwired_count;
3e170ce0
A
7259
7260 if (!shadow_object->wired_page_count) {
7261 VM_OBJECT_UNWIRED(shadow_object);
7262 }
fe8ab488
A
7263 }
7264 }
55e303ae
A
7265 occupied = 1;
7266
7267 if (upl->flags & UPL_DEVICE_MEMORY) {
7268 occupied = 0;
7269 } else if (upl->flags & UPL_LITE) {
7270 int pg_num;
7271 int i;
2d21ac55 7272
55e303ae 7273 occupied = 0;
2d21ac55 7274
fe8ab488
A
7275 if (!fast_path_full_commit) {
7276 pg_num = upl->size/PAGE_SIZE;
7277 pg_num = (pg_num + 31) >> 5;
7278
7279 for (i = 0; i < pg_num; i++) {
7280 if (lite_list[i] != 0) {
7281 occupied = 1;
7282 break;
7283 }
55e303ae
A
7284 }
7285 }
7286 } else {
2d21ac55 7287 if (queue_empty(&upl->map_object->memq))
55e303ae 7288 occupied = 0;
55e303ae 7289 }
2d21ac55 7290 if (occupied == 0) {
b0d623f7
A
7291 /*
7292 * If this UPL element belongs to a Vector UPL and is
7293 * empty, then this is the right function to deallocate
7294 * it. So go ahead set the *empty variable. The flag
7295 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7296 * should be considered relevant for the Vector UPL and not
7297 * the internal UPLs.
7298 */
7299 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
0b4e3aa0 7300 *empty = TRUE;
2d21ac55 7301
b0d623f7 7302 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
2d21ac55
A
7303 /*
7304 * this is not a paging object
7305 * so we need to drop the paging reference
7306 * that was taken when we created the UPL
7307 * against this object
7308 */
b0d623f7 7309 vm_object_activity_end(shadow_object);
316670eb 7310 vm_object_collapse(shadow_object, 0, TRUE);
2d21ac55
A
7311 } else {
7312 /*
7313 * we dontated the paging reference to
7314 * the map object... vm_pageout_object_terminate
7315 * will drop this reference
7316 */
7317 }
1c79356b 7318 }
55e303ae 7319 vm_object_unlock(shadow_object);
91447636
A
7320 if (object != shadow_object)
7321 vm_object_unlock(object);
b0d623f7
A
7322
7323 if(!isVectorUPL)
7324 upl_unlock(upl);
7325 else {
7326 /*
7327 * If we completed our operations on an UPL that is
7328 * part of a Vectored UPL and if empty is TRUE, then
7329 * we should go ahead and deallocate this UPL element.
7330 * Then we check if this was the last of the UPL elements
7331 * within that Vectored UPL. If so, set empty to TRUE
7332 * so that in ubc_upl_commit_range or ubc_upl_commit, we
7333 * can go ahead and deallocate the Vector UPL too.
7334 */
7335 if(*empty==TRUE) {
7336 *empty = vector_upl_set_subupl(vector_upl, upl, 0);
7337 upl_deallocate(upl);
7338 }
7339 goto process_upl_to_commit;
7340 }
0b4e3aa0 7341
2d21ac55
A
7342 if (pgpgout_count) {
7343 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
7344 }
7345
1c79356b
A
7346 return KERN_SUCCESS;
7347}
7348
0b4e3aa0
A
7349kern_return_t
7350upl_abort_range(
1c79356b 7351 upl_t upl,
91447636
A
7352 upl_offset_t offset,
7353 upl_size_t size,
0b4e3aa0
A
7354 int error,
7355 boolean_t *empty)
1c79356b 7356{
316670eb 7357 upl_page_info_t *user_page_list = NULL;
b0d623f7 7358 upl_size_t xfer_size, subupl_size = size;
55e303ae 7359 vm_object_t shadow_object;
2d21ac55 7360 vm_object_t object;
1c79356b 7361 vm_object_offset_t target_offset;
b0d623f7 7362 upl_offset_t subupl_offset = offset;
1c79356b 7363 int entry;
55e303ae
A
7364 wpl_array_t lite_list;
7365 int occupied;
6d2010ae
A
7366 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7367 struct vm_page_delayed_work *dwp;
7368 int dw_count;
7369 int dw_limit;
7370 int isVectorUPL = 0;
b0d623f7 7371 upl_t vector_upl = NULL;
1c79356b 7372
0b4e3aa0
A
7373 *empty = FALSE;
7374
7375 if (upl == UPL_NULL)
7376 return KERN_INVALID_ARGUMENT;
7377
2d21ac55 7378 if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
0b4c1975 7379 return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty);
55e303ae 7380
b0d623f7
A
7381 if((isVectorUPL = vector_upl_is_valid(upl))) {
7382 vector_upl = upl;
7383 upl_lock(vector_upl);
7384 }
7385 else
7386 upl_lock(upl);
7387
7388process_upl_to_abort:
7389 if(isVectorUPL) {
7390 size = subupl_size;
7391 offset = subupl_offset;
7392 if(size == 0) {
7393 upl_unlock(vector_upl);
7394 return KERN_SUCCESS;
7395 }
7396 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
7397 if(upl == NULL) {
7398 upl_unlock(vector_upl);
7399 return KERN_FAILURE;
7400 }
7401 subupl_size -= size;
7402 subupl_offset += size;
7403 }
7404
7405 *empty = FALSE;
7406
7407#if UPL_DEBUG
7408 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
7409 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
7410
7411 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
7412 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
7413 upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1;
7414
7415 upl->upl_commit_index++;
7416 }
7417#endif
2d21ac55 7418 if (upl->flags & UPL_DEVICE_MEMORY)
1c79356b 7419 xfer_size = 0;
2d21ac55
A
7420 else if ((offset + size) <= upl->size)
7421 xfer_size = size;
b0d623f7
A
7422 else {
7423 if(!isVectorUPL)
7424 upl_unlock(upl);
7425 else {
7426 upl_unlock(vector_upl);
7427 }
55e303ae 7428
b0d623f7
A
7429 return KERN_FAILURE;
7430 }
2d21ac55 7431 if (upl->flags & UPL_INTERNAL) {
55e303ae 7432 lite_list = (wpl_array_t)
91447636 7433 ((((uintptr_t)upl) + sizeof(struct upl))
55e303ae 7434 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
316670eb
A
7435
7436 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
55e303ae
A
7437 } else {
7438 lite_list = (wpl_array_t)
91447636 7439 (((uintptr_t)upl) + sizeof(struct upl));
55e303ae 7440 }
2d21ac55
A
7441 object = upl->map_object;
7442
7443 if (upl->flags & UPL_SHADOWED) {
7444 vm_object_lock(object);
7445 shadow_object = object->shadow;
7446 } else
7447 shadow_object = object;
7448
1c79356b
A
7449 entry = offset/PAGE_SIZE;
7450 target_offset = (vm_object_offset_t)offset;
2d21ac55 7451
3e170ce0
A
7452 assert(!(target_offset & PAGE_MASK));
7453 assert(!(xfer_size & PAGE_MASK));
7454
b0d623f7
A
7455 if (upl->flags & UPL_KERNEL_OBJECT)
7456 vm_object_lock_shared(shadow_object);
7457 else
7458 vm_object_lock(shadow_object);
4a3eedf9 7459
b0d623f7
A
7460 if (upl->flags & UPL_ACCESS_BLOCKED) {
7461 assert(shadow_object->blocked_access);
7462 shadow_object->blocked_access = FALSE;
7463 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
4a3eedf9 7464 }
b0d623f7
A
7465
7466 dwp = &dw_array[0];
7467 dw_count = 0;
6d2010ae 7468 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
b0d623f7
A
7469
7470 if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT))
7471 panic("upl_abort_range: kernel_object being DUMPED");
4a3eedf9 7472
2d21ac55
A
7473 while (xfer_size) {
7474 vm_page_t t, m;
316670eb
A
7475 unsigned int pg_num;
7476 boolean_t needed;
2d21ac55 7477
316670eb
A
7478 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
7479 assert(pg_num == target_offset/PAGE_SIZE);
7480
7481 needed = FALSE;
b0d623f7 7482
316670eb
A
7483 if (user_page_list)
7484 needed = user_page_list[pg_num].needed;
7485
7486 dwp->dw_mask = 0;
55e303ae 7487 m = VM_PAGE_NULL;
2d21ac55
A
7488
7489 if (upl->flags & UPL_LITE) {
2d21ac55
A
7490
7491 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
55e303ae 7492 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
2d21ac55 7493
b0d623f7
A
7494 if ( !(upl->flags & UPL_KERNEL_OBJECT))
7495 m = vm_page_lookup(shadow_object, target_offset +
7496 (upl->offset - shadow_object->paging_offset));
55e303ae
A
7497 }
7498 }
2d21ac55
A
7499 if (upl->flags & UPL_SHADOWED) {
7500 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
7501 t->pageout = FALSE;
7502
b0d623f7 7503 VM_PAGE_FREE(t);
2d21ac55
A
7504
7505 if (m == VM_PAGE_NULL)
6d2010ae 7506 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
55e303ae
A
7507 }
7508 }
b0d623f7
A
7509 if ((upl->flags & UPL_KERNEL_OBJECT))
7510 goto abort_next_page;
7511
2d21ac55
A
7512 if (m != VM_PAGE_NULL) {
7513
39236c6e
A
7514 assert(!m->compressor);
7515
2d21ac55 7516 if (m->absent) {
91447636
A
7517 boolean_t must_free = TRUE;
7518
2d21ac55
A
7519 /*
7520 * COPYOUT = FALSE case
7521 * check for error conditions which must
7522 * be passed back to the pages customer
7523 */
7524 if (error & UPL_ABORT_RESTART) {
1c79356b
A
7525 m->restart = TRUE;
7526 m->absent = FALSE;
2d21ac55 7527 m->unusual = TRUE;
91447636 7528 must_free = FALSE;
2d21ac55 7529 } else if (error & UPL_ABORT_UNAVAILABLE) {
1c79356b
A
7530 m->restart = FALSE;
7531 m->unusual = TRUE;
91447636 7532 must_free = FALSE;
2d21ac55 7533 } else if (error & UPL_ABORT_ERROR) {
1c79356b
A
7534 m->restart = FALSE;
7535 m->absent = FALSE;
1c79356b 7536 m->error = TRUE;
2d21ac55 7537 m->unusual = TRUE;
91447636 7538 must_free = FALSE;
1c79356b 7539 }
316670eb 7540 if (m->clustered && needed == FALSE) {
6d2010ae
A
7541 /*
7542 * This page was a part of a speculative
7543 * read-ahead initiated by the kernel
7544 * itself. No one is expecting this
7545 * page and no one will clean up its
7546 * error state if it ever becomes valid
7547 * in the future.
7548 * We have to free it here.
7549 */
7550 must_free = TRUE;
7551 }
91447636
A
7552
7553 /*
7554 * ENCRYPTED SWAP:
7555 * If the page was already encrypted,
7556 * we don't really need to decrypt it
7557 * now. It will get decrypted later,
7558 * on demand, as soon as someone needs
7559 * to access its contents.
7560 */
1c79356b
A
7561
7562 m->cleaning = FALSE;
2d21ac55 7563 m->encrypted_cleaning = FALSE;
6d2010ae
A
7564
7565 if (m->overwriting && !m->busy) {
7566 /*
7567 * this shouldn't happen since
7568 * this is an 'absent' page, but
7569 * it doesn't hurt to check for
7570 * the 'alternate' method of
7571 * stabilizing the page...
7572 * we will mark 'busy' to be cleared
7573 * in the following code which will
7574 * take care of the primary stabilzation
7575 * method (i.e. setting 'busy' to TRUE)
7576 */
7577 dwp->dw_mask |= DW_vm_page_unwire;
7578 }
1c79356b 7579 m->overwriting = FALSE;
b0d623f7
A
7580
7581 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
91447636 7582
2d21ac55 7583 if (must_free == TRUE)
b0d623f7 7584 dwp->dw_mask |= DW_vm_page_free;
2d21ac55 7585 else
b0d623f7 7586 dwp->dw_mask |= DW_vm_page_activate;
2d21ac55
A
7587 } else {
7588 /*
7589 * Handle the trusted pager throttle.
7590 */
7591 if (m->laundry)
b0d623f7 7592 dwp->dw_mask |= DW_vm_pageout_throttle_up;
2d21ac55 7593
6d2010ae
A
7594 if (upl->flags & UPL_ACCESS_BLOCKED) {
7595 /*
7596 * We blocked access to the pages in this UPL.
7597 * Clear the "busy" bit and wake up any waiter
7598 * for this page.
7599 */
7600 dwp->dw_mask |= DW_clear_busy;
7601 }
6d2010ae
A
7602 if (m->overwriting) {
7603 if (m->busy)
7604 dwp->dw_mask |= DW_clear_busy;
7605 else {
7606 /*
7607 * deal with the 'alternate' method
7608 * of stabilizing the page...
7609 * we will either free the page
7610 * or mark 'busy' to be cleared
7611 * in the following code which will
7612 * take care of the primary stabilzation
7613 * method (i.e. setting 'busy' to TRUE)
7614 */
7615 dwp->dw_mask |= DW_vm_page_unwire;
7616 }
7617 m->overwriting = FALSE;
7618 }
7619 if (m->encrypted_cleaning == TRUE) {
7620 m->encrypted_cleaning = FALSE;
7621
7622 dwp->dw_mask |= DW_clear_busy;
1c79356b 7623 }
316670eb 7624 m->pageout = FALSE;
2d21ac55 7625 m->cleaning = FALSE;
1c79356b 7626#if MACH_PAGEMAP
2d21ac55 7627 vm_external_state_clr(m->object->existence_map, m->offset);
1c79356b 7628#endif /* MACH_PAGEMAP */
2d21ac55
A
7629 if (error & UPL_ABORT_DUMP_PAGES) {
7630 pmap_disconnect(m->phys_page);
b0d623f7
A
7631
7632 dwp->dw_mask |= DW_vm_page_free;
2d21ac55 7633 } else {
316670eb
A
7634 if (!(dwp->dw_mask & DW_vm_page_unwire)) {
7635 if (error & UPL_ABORT_REFERENCE) {
7636 /*
7637 * we've been told to explictly
7638 * reference this page... for
7639 * file I/O, this is done by
7640 * implementing an LRU on the inactive q
7641 */
7642 dwp->dw_mask |= DW_vm_page_lru;
7643
7644 } else if (!m->active && !m->inactive && !m->speculative)
7645 dwp->dw_mask |= DW_vm_page_deactivate_internal;
2d21ac55 7646 }
6d2010ae 7647 dwp->dw_mask |= DW_PAGE_WAKEUP;
2d21ac55 7648 }
1c79356b 7649 }
2d21ac55 7650 }
b0d623f7 7651abort_next_page:
55e303ae
A
7652 target_offset += PAGE_SIZE_64;
7653 xfer_size -= PAGE_SIZE;
7654 entry++;
b0d623f7
A
7655
7656 if (dwp->dw_mask) {
7657 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
6d2010ae 7658 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
b0d623f7 7659
6d2010ae 7660 if (dw_count >= dw_limit) {
3e170ce0 7661 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
b0d623f7
A
7662
7663 dwp = &dw_array[0];
7664 dw_count = 0;
7665 }
7666 } else {
7667 if (dwp->dw_mask & DW_clear_busy)
7668 m->busy = FALSE;
7669
7670 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7671 PAGE_WAKEUP(m);
7672 }
7673 }
d7e50217 7674 }
b0d623f7 7675 if (dw_count)
3e170ce0 7676 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
2d21ac55 7677
55e303ae 7678 occupied = 1;
2d21ac55 7679
55e303ae
A
7680 if (upl->flags & UPL_DEVICE_MEMORY) {
7681 occupied = 0;
7682 } else if (upl->flags & UPL_LITE) {
7683 int pg_num;
7684 int i;
2d21ac55 7685
55e303ae
A
7686 pg_num = upl->size/PAGE_SIZE;
7687 pg_num = (pg_num + 31) >> 5;
7688 occupied = 0;
2d21ac55
A
7689
7690 for (i = 0; i < pg_num; i++) {
7691 if (lite_list[i] != 0) {
55e303ae
A
7692 occupied = 1;
7693 break;
7694 }
7695 }
7696 } else {
2d21ac55 7697 if (queue_empty(&upl->map_object->memq))
55e303ae 7698 occupied = 0;
55e303ae 7699 }
2d21ac55 7700 if (occupied == 0) {
b0d623f7
A
7701 /*
7702 * If this UPL element belongs to a Vector UPL and is
7703 * empty, then this is the right function to deallocate
7704 * it. So go ahead set the *empty variable. The flag
7705 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7706 * should be considered relevant for the Vector UPL and
7707 * not the internal UPLs.
7708 */
7709 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
0b4e3aa0 7710 *empty = TRUE;
2d21ac55 7711
b0d623f7 7712 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
2d21ac55
A
7713 /*
7714 * this is not a paging object
7715 * so we need to drop the paging reference
7716 * that was taken when we created the UPL
7717 * against this object
7718 */
b0d623f7 7719 vm_object_activity_end(shadow_object);
316670eb 7720 vm_object_collapse(shadow_object, 0, TRUE);
2d21ac55
A
7721 } else {
7722 /*
7723 * we dontated the paging reference to
7724 * the map object... vm_pageout_object_terminate
7725 * will drop this reference
7726 */
7727 }
1c79356b 7728 }
55e303ae 7729 vm_object_unlock(shadow_object);
91447636
A
7730 if (object != shadow_object)
7731 vm_object_unlock(object);
b0d623f7
A
7732
7733 if(!isVectorUPL)
7734 upl_unlock(upl);
7735 else {
7736 /*
7737 * If we completed our operations on an UPL that is
7738 * part of a Vectored UPL and if empty is TRUE, then
7739 * we should go ahead and deallocate this UPL element.
7740 * Then we check if this was the last of the UPL elements
7741 * within that Vectored UPL. If so, set empty to TRUE
7742 * so that in ubc_upl_abort_range or ubc_upl_abort, we
7743 * can go ahead and deallocate the Vector UPL too.
7744 */
7745 if(*empty == TRUE) {
7746 *empty = vector_upl_set_subupl(vector_upl, upl,0);
7747 upl_deallocate(upl);
7748 }
7749 goto process_upl_to_abort;
7750 }
55e303ae 7751
1c79356b
A
7752 return KERN_SUCCESS;
7753}
7754
2d21ac55 7755
1c79356b 7756kern_return_t
0b4e3aa0 7757upl_abort(
1c79356b
A
7758 upl_t upl,
7759 int error)
2d21ac55
A
7760{
7761 boolean_t empty;
7762
7e41aa88
A
7763 if (upl == UPL_NULL)
7764 return KERN_INVALID_ARGUMENT;
7765
2d21ac55 7766 return upl_abort_range(upl, 0, upl->size, error, &empty);
1c79356b
A
7767}
7768
55e303ae 7769
2d21ac55
A
7770/* an option on commit should be wire */
7771kern_return_t
7772upl_commit(
7773 upl_t upl,
7774 upl_page_info_t *page_list,
7775 mach_msg_type_number_t count)
7776{
7777 boolean_t empty;
7778
7e41aa88
A
7779 if (upl == UPL_NULL)
7780 return KERN_INVALID_ARGUMENT;
7781
2d21ac55
A
7782 return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
7783}
7784
fe8ab488
A
7785
7786void
7787iopl_valid_data(
7788 upl_t upl)
7789{
7790 vm_object_t object;
7791 vm_offset_t offset;
7792 vm_page_t m, nxt_page = VM_PAGE_NULL;
7793 upl_size_t size;
7794 int wired_count = 0;
7795
7796 if (upl == NULL)
7797 panic("iopl_valid_data: NULL upl");
7798 if (vector_upl_is_valid(upl))
7799 panic("iopl_valid_data: vector upl");
7800 if ((upl->flags & (UPL_DEVICE_MEMORY|UPL_SHADOWED|UPL_ACCESS_BLOCKED|UPL_IO_WIRE|UPL_INTERNAL)) != UPL_IO_WIRE)
7801 panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags);
7802
7803 object = upl->map_object;
7804
7805 if (object == kernel_object || object == compressor_object)
7806 panic("iopl_valid_data: object == kernel or compressor");
7807
7808 if (object->purgable == VM_PURGABLE_VOLATILE)
7809 panic("iopl_valid_data: object == VM_PURGABLE_VOLATILE");
7810
7811 size = upl->size;
7812
7813 vm_object_lock(object);
7814
7815 if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE))
7816 nxt_page = (vm_page_t)queue_first(&object->memq);
7817 else
7818 offset = 0 + upl->offset - object->paging_offset;
7819
7820 while (size) {
7821
7822 if (nxt_page != VM_PAGE_NULL) {
7823 m = nxt_page;
7824 nxt_page = (vm_page_t)queue_next(&nxt_page->listq);
7825 } else {
7826 m = vm_page_lookup(object, offset);
7827 offset += PAGE_SIZE;
7828
7829 if (m == VM_PAGE_NULL)
7830 panic("iopl_valid_data: missing expected page at offset %lx", (long)offset);
7831 }
7832 if (m->busy) {
7833 if (!m->absent)
7834 panic("iopl_valid_data: busy page w/o absent");
7835
7836 if (m->pageq.next || m->pageq.prev)
7837 panic("iopl_valid_data: busy+absent page on page queue");
7838
7839 m->absent = FALSE;
7840 m->dirty = TRUE;
7841 m->wire_count++;
7842 wired_count++;
7843
7844 PAGE_WAKEUP_DONE(m);
7845 }
7846 size -= PAGE_SIZE;
7847 }
7848 if (wired_count) {
3e170ce0
A
7849
7850 if (!object->wired_page_count) {
7851 VM_OBJECT_WIRED(object);
7852 }
fe8ab488
A
7853 object->wired_page_count += wired_count;
7854
7855 vm_page_lockspin_queues();
7856 vm_page_wire_count += wired_count;
7857 vm_page_unlock_queues();
7858 }
7859 vm_object_unlock(object);
7860}
7861
316670eb
A
7862void
7863vm_object_set_pmap_cache_attr(
7864 vm_object_t object,
7865 upl_page_info_array_t user_page_list,
7866 unsigned int num_pages,
7867 boolean_t batch_pmap_op)
7868{
7869 unsigned int cache_attr = 0;
7870
7871 cache_attr = object->wimg_bits & VM_WIMG_MASK;
7872 assert(user_page_list);
7873 if (cache_attr != VM_WIMG_USE_DEFAULT) {
7874 PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op);
7875 }
7876}
55e303ae 7877
3e170ce0
A
7878
7879boolean_t vm_object_iopl_wire_full(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t);
7880kern_return_t vm_object_iopl_wire_empty(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_object_offset_t *, int);
7881
7882
7883
7884boolean_t
7885vm_object_iopl_wire_full(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7886 wpl_array_t lite_list, upl_control_flags_t cntrl_flags)
7887{
7888 vm_page_t dst_page;
7889 vm_tag_t tag;
7890 unsigned int entry;
7891 int page_count;
7892 int delayed_unlock = 0;
7893 boolean_t retval = TRUE;
7894
7895 vm_object_lock_assert_exclusive(object);
7896 assert(object->purgable != VM_PURGABLE_VOLATILE);
7897 assert(object->purgable != VM_PURGABLE_EMPTY);
7898 assert(object->pager == NULL);
7899 assert(object->copy == NULL);
7900 assert(object->shadow == NULL);
7901
7902 tag = UPL_MEMORY_TAG(cntrl_flags);
7903 page_count = object->resident_page_count;
7904 dst_page = (vm_page_t)queue_first(&object->memq);
7905
7906 vm_page_lock_queues();
7907
7908 while (page_count--) {
7909
7910 if (dst_page->busy ||
7911 dst_page->fictitious ||
7912 dst_page->absent ||
7913 dst_page->error ||
7914 dst_page->cleaning ||
7915 dst_page->restart ||
7916 dst_page->encrypted ||
7917 dst_page->laundry) {
7918 retval = FALSE;
7919 goto done;
7920 }
7921 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) {
7922 retval = FALSE;
7923 goto done;
7924 }
7925 dst_page->reference = TRUE;
7926
7927 vm_page_wire(dst_page, tag, FALSE);
7928
7929 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7930 SET_PAGE_DIRTY(dst_page, FALSE);
7931 }
7932 entry = (unsigned int)(dst_page->offset / PAGE_SIZE);
7933 assert(entry >= 0 && entry < object->resident_page_count);
7934 lite_list[entry>>5] |= 1 << (entry & 31);
7935
7936 if (dst_page->phys_page > upl->highest_page)
7937 upl->highest_page = dst_page->phys_page;
7938
7939 if (user_page_list) {
7940 user_page_list[entry].phys_addr = dst_page->phys_page;
7941 user_page_list[entry].absent = dst_page->absent;
7942 user_page_list[entry].dirty = dst_page->dirty;
7943 user_page_list[entry].pageout = dst_page->pageout;;
7944 user_page_list[entry].precious = dst_page->precious;
7945 user_page_list[entry].device = FALSE;
7946 user_page_list[entry].speculative = FALSE;
7947 user_page_list[entry].cs_validated = FALSE;
7948 user_page_list[entry].cs_tainted = FALSE;
7949 user_page_list[entry].cs_nx = FALSE;
7950 user_page_list[entry].needed = FALSE;
7951 user_page_list[entry].mark = FALSE;
7952 }
7953 if (delayed_unlock++ > 256) {
7954 delayed_unlock = 0;
7955 lck_mtx_yield(&vm_page_queue_lock);
7956
7957 VM_CHECK_MEMORYSTATUS;
7958 }
7959 dst_page = (vm_page_t)queue_next(&dst_page->listq);
7960 }
7961done:
7962 vm_page_unlock_queues();
7963
7964 VM_CHECK_MEMORYSTATUS;
7965
7966 return (retval);
7967}
7968
7969
7970kern_return_t
7971vm_object_iopl_wire_empty(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7972 wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_object_offset_t *dst_offset, int page_count)
7973{
7974 vm_page_t dst_page;
7975 vm_tag_t tag;
7976 boolean_t no_zero_fill = FALSE;
7977 int interruptible;
7978 int pages_wired = 0;
7979 int pages_inserted = 0;
7980 int entry = 0;
7981 uint64_t delayed_ledger_update = 0;
7982 kern_return_t ret = KERN_SUCCESS;
7983
7984 vm_object_lock_assert_exclusive(object);
7985 assert(object->purgable != VM_PURGABLE_VOLATILE);
7986 assert(object->purgable != VM_PURGABLE_EMPTY);
7987 assert(object->pager == NULL);
7988 assert(object->copy == NULL);
7989 assert(object->shadow == NULL);
7990
7991 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
7992 interruptible = THREAD_ABORTSAFE;
7993 else
7994 interruptible = THREAD_UNINT;
7995
7996 if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
7997 no_zero_fill = TRUE;
7998
7999 tag = UPL_MEMORY_TAG(cntrl_flags);
8000
8001 while (page_count--) {
8002
8003 while ( (dst_page = vm_page_grab()) == VM_PAGE_NULL) {
8004
8005 OSAddAtomic(page_count, &vm_upl_wait_for_pages);
8006
8007 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
8008
8009 if (vm_page_wait(interruptible) == FALSE) {
8010 /*
8011 * interrupted case
8012 */
8013 OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
8014
8015 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
8016
8017 ret = MACH_SEND_INTERRUPTED;
8018 goto done;
8019 }
8020 OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
8021
8022 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
8023 }
8024 if (no_zero_fill == FALSE)
8025 vm_page_zero_fill(dst_page);
8026 else
8027 dst_page->absent = TRUE;
8028
8029 dst_page->reference = TRUE;
8030
8031 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8032 SET_PAGE_DIRTY(dst_page, FALSE);
8033 }
8034 if (dst_page->absent == FALSE) {
8035 dst_page->wire_count++;
8036 pages_wired++;
8037 PAGE_WAKEUP_DONE(dst_page);
8038 }
8039 pages_inserted++;
8040
8041 vm_page_insert_internal(dst_page, object, *dst_offset, tag, FALSE, TRUE, TRUE, TRUE, &delayed_ledger_update);
8042
8043 lite_list[entry>>5] |= 1 << (entry & 31);
8044
8045 if (dst_page->phys_page > upl->highest_page)
8046 upl->highest_page = dst_page->phys_page;
8047
8048 if (user_page_list) {
8049 user_page_list[entry].phys_addr = dst_page->phys_page;
8050 user_page_list[entry].absent = dst_page->absent;
8051 user_page_list[entry].dirty = dst_page->dirty;
8052 user_page_list[entry].pageout = FALSE;
8053 user_page_list[entry].precious = FALSE;
8054 user_page_list[entry].device = FALSE;
8055 user_page_list[entry].speculative = FALSE;
8056 user_page_list[entry].cs_validated = FALSE;
8057 user_page_list[entry].cs_tainted = FALSE;
8058 user_page_list[entry].cs_nx = FALSE;
8059 user_page_list[entry].needed = FALSE;
8060 user_page_list[entry].mark = FALSE;
8061 }
8062 entry++;
8063 *dst_offset += PAGE_SIZE_64;
8064 }
8065done:
8066 if (pages_wired) {
8067 vm_page_lockspin_queues();
8068 vm_page_wire_count += pages_wired;
8069 vm_page_unlock_queues();
8070 }
8071 if (pages_inserted) {
8072 if (object->internal) {
8073 OSAddAtomic(pages_inserted, &vm_page_internal_count);
8074 } else {
8075 OSAddAtomic(pages_inserted, &vm_page_external_count);
8076 }
8077 }
8078 if (delayed_ledger_update) {
8079 task_t owner;
8080
8081 owner = object->vo_purgeable_owner;
8082 assert(owner);
8083
8084 /* more non-volatile bytes */
8085 ledger_credit(owner->ledger,
8086 task_ledgers.purgeable_nonvolatile,
8087 delayed_ledger_update);
8088 /* more footprint */
8089 ledger_credit(owner->ledger,
8090 task_ledgers.phys_footprint,
8091 delayed_ledger_update);
8092 }
8093 return (ret);
8094}
8095
8096
b0d623f7
A
8097unsigned int vm_object_iopl_request_sleep_for_cleaning = 0;
8098
3e170ce0 8099
55e303ae
A
8100kern_return_t
8101vm_object_iopl_request(
8102 vm_object_t object,
8103 vm_object_offset_t offset,
91447636 8104 upl_size_t size,
55e303ae
A
8105 upl_t *upl_ptr,
8106 upl_page_info_array_t user_page_list,
8107 unsigned int *page_list_count,
3e170ce0 8108 upl_control_flags_t cntrl_flags)
55e303ae
A
8109{
8110 vm_page_t dst_page;
2d21ac55
A
8111 vm_object_offset_t dst_offset;
8112 upl_size_t xfer_size;
55e303ae 8113 upl_t upl = NULL;
91447636
A
8114 unsigned int entry;
8115 wpl_array_t lite_list = NULL;
91447636 8116 int no_zero_fill = FALSE;
6d2010ae 8117 unsigned int size_in_pages;
2d21ac55 8118 u_int32_t psize;
55e303ae
A
8119 kern_return_t ret;
8120 vm_prot_t prot;
2d21ac55 8121 struct vm_object_fault_info fault_info;
6d2010ae
A
8122 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
8123 struct vm_page_delayed_work *dwp;
b0d623f7 8124 int dw_count;
6d2010ae 8125 int dw_limit;
b0d623f7 8126 int dw_index;
39236c6e 8127 boolean_t caller_lookup;
fe8ab488
A
8128 int io_tracking_flag = 0;
8129 int interruptible;
8130
8131 boolean_t set_cache_attr_needed = FALSE;
8132 boolean_t free_wired_pages = FALSE;
3e170ce0
A
8133 boolean_t fast_path_empty_req = FALSE;
8134 boolean_t fast_path_full_req = FALSE;
55e303ae 8135
91447636
A
8136 if (cntrl_flags & ~UPL_VALID_FLAGS) {
8137 /*
8138 * For forward compatibility's sake,
8139 * reject any unknown flag.
8140 */
8141 return KERN_INVALID_VALUE;
8142 }
0b4c1975 8143 if (vm_lopage_needed == FALSE)
0c530ab8
A
8144 cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
8145
8146 if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
8147 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE))
8148 return KERN_INVALID_VALUE;
8149
8150 if (object->phys_contiguous) {
6d2010ae 8151 if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
0c530ab8 8152 return KERN_INVALID_ADDRESS;
2d21ac55 8153
6d2010ae 8154 if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
0c530ab8
A
8155 return KERN_INVALID_ADDRESS;
8156 }
8157 }
91447636
A
8158
8159 if (cntrl_flags & UPL_ENCRYPT) {
8160 /*
8161 * ENCRYPTED SWAP:
8162 * The paging path doesn't use this interface,
8163 * so we don't support the UPL_ENCRYPT flag
8164 * here. We won't encrypt the pages.
8165 */
8166 assert(! (cntrl_flags & UPL_ENCRYPT));
8167 }
39236c6e 8168 if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
91447636
A
8169 no_zero_fill = TRUE;
8170
8171 if (cntrl_flags & UPL_COPYOUT_FROM)
55e303ae 8172 prot = VM_PROT_READ;
91447636 8173 else
55e303ae 8174 prot = VM_PROT_READ | VM_PROT_WRITE;
55e303ae 8175
2d21ac55
A
8176 if ((!object->internal) && (object->paging_offset != 0))
8177 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
8178
fe8ab488
A
8179#if CONFIG_IOSCHED || UPL_DEBUG
8180 if ((object->io_tracking && object != kernel_object) || upl_debug_enabled)
8181 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
8182#endif
8183
8184#if CONFIG_IOSCHED
8185 if (object->io_tracking) {
8186 /* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */
8187 if (object != kernel_object)
8188 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
8189 }
8190#endif
2d21ac55
A
8191
8192 if (object->phys_contiguous)
8193 psize = PAGE_SIZE;
8194 else
8195 psize = size;
8196
8197 if (cntrl_flags & UPL_SET_INTERNAL) {
fe8ab488 8198 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
2d21ac55
A
8199
8200 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8201 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
8202 ((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
b0d623f7
A
8203 if (size == 0) {
8204 user_page_list = NULL;
8205 lite_list = NULL;
8206 }
2d21ac55 8207 } else {
fe8ab488 8208 upl = upl_create(UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
55e303ae 8209
2d21ac55 8210 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
b0d623f7
A
8211 if (size == 0) {
8212 lite_list = NULL;
8213 }
55e303ae 8214 }
2d21ac55
A
8215 if (user_page_list)
8216 user_page_list[0].device = FALSE;
8217 *upl_ptr = upl;
55e303ae 8218
2d21ac55
A
8219 upl->map_object = object;
8220 upl->size = size;
8221
6d2010ae
A
8222 size_in_pages = size / PAGE_SIZE;
8223
b0d623f7
A
8224 if (object == kernel_object &&
8225 !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) {
8226 upl->flags |= UPL_KERNEL_OBJECT;
8227#if UPL_DEBUG
8228 vm_object_lock(object);
8229#else
8230 vm_object_lock_shared(object);
8231#endif
8232 } else {
8233 vm_object_lock(object);
8234 vm_object_activity_begin(object);
8235 }
2d21ac55
A
8236 /*
8237 * paging in progress also protects the paging_offset
8238 */
8239 upl->offset = offset + object->paging_offset;
55e303ae 8240
b0d623f7
A
8241 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8242 /*
316670eb 8243 * The user requested that access to the pages in this UPL
b0d623f7
A
8244 * be blocked until the UPL is commited or aborted.
8245 */
8246 upl->flags |= UPL_ACCESS_BLOCKED;
8247 }
8248
fe8ab488
A
8249#if CONFIG_IOSCHED || UPL_DEBUG
8250 if (upl->flags & UPL_TRACKED_BY_OBJECT) {
316670eb 8251 vm_object_activity_begin(object);
2d21ac55 8252 queue_enter(&object->uplq, upl, upl_t, uplq);
fe8ab488
A
8253 }
8254#endif
8255
8256 if (object->phys_contiguous) {
55e303ae 8257
b0d623f7
A
8258 if (upl->flags & UPL_ACCESS_BLOCKED) {
8259 assert(!object->blocked_access);
8260 object->blocked_access = TRUE;
8261 }
8262
2d21ac55 8263 vm_object_unlock(object);
55e303ae 8264
2d21ac55
A
8265 /*
8266 * don't need any shadow mappings for this one
8267 * since it is already I/O memory
8268 */
8269 upl->flags |= UPL_DEVICE_MEMORY;
55e303ae 8270
6d2010ae 8271 upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1)>>PAGE_SHIFT);
2d21ac55
A
8272
8273 if (user_page_list) {
6d2010ae 8274 user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT);
2d21ac55 8275 user_page_list[0].device = TRUE;
55e303ae 8276 }
2d21ac55
A
8277 if (page_list_count != NULL) {
8278 if (upl->flags & UPL_INTERNAL)
8279 *page_list_count = 0;
8280 else
8281 *page_list_count = 1;
55e303ae 8282 }
2d21ac55 8283 return KERN_SUCCESS;
55e303ae 8284 }
39236c6e 8285 if (object != kernel_object && object != compressor_object) {
b0d623f7
A
8286 /*
8287 * Protect user space from future COW operations
8288 */
fe8ab488
A
8289#if VM_OBJECT_TRACKING_OP_TRUESHARE
8290 if (!object->true_share &&
8291 vm_object_tracking_inited) {
8292 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
8293 int num = 0;
8294
8295 num = OSBacktrace(bt,
8296 VM_OBJECT_TRACKING_BTDEPTH);
8297 btlog_add_entry(vm_object_tracking_btlog,
8298 object,
8299 VM_OBJECT_TRACKING_OP_TRUESHARE,
8300 bt,
8301 num);
8302 }
8303#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
8304
b0d623f7 8305 object->true_share = TRUE;
55e303ae 8306
b0d623f7
A
8307 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
8308 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8309 }
91447636 8310
b0d623f7
A
8311 if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
8312 object->copy != VM_OBJECT_NULL) {
91447636 8313 /*
b0d623f7
A
8314 * Honor copy-on-write obligations
8315 *
8316 * The caller is gathering these pages and
8317 * might modify their contents. We need to
8318 * make sure that the copy object has its own
8319 * private copies of these pages before we let
8320 * the caller modify them.
8321 *
8322 * NOTE: someone else could map the original object
8323 * after we've done this copy-on-write here, and they
8324 * could then see an inconsistent picture of the memory
8325 * while it's being modified via the UPL. To prevent this,
8326 * we would have to block access to these pages until the
8327 * UPL is released. We could use the UPL_BLOCK_ACCESS
8328 * code path for that...
91447636 8329 */
b0d623f7
A
8330 vm_object_update(object,
8331 offset,
8332 size,
8333 NULL,
8334 NULL,
8335 FALSE, /* should_return */
8336 MEMORY_OBJECT_COPY_SYNC,
8337 VM_PROT_NO_CHANGE);
8338#if DEVELOPMENT || DEBUG
8339 iopl_cow++;
8340 iopl_cow_pages += size >> PAGE_SHIFT;
8341#endif
55e303ae 8342 }
3e170ce0
A
8343 if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) &&
8344 object->purgable != VM_PURGABLE_VOLATILE &&
8345 object->purgable != VM_PURGABLE_EMPTY &&
8346 object->copy == NULL &&
8347 size == object->vo_size &&
8348 offset == 0 &&
8349 object->shadow == NULL &&
8350 object->pager == NULL)
8351 {
8352 if (object->resident_page_count == size_in_pages)
8353 {
8354 assert(object != compressor_object);
8355 assert(object != kernel_object);
8356 fast_path_full_req = TRUE;
8357 }
8358 else if (object->resident_page_count == 0)
8359 {
8360 assert(object != compressor_object);
8361 assert(object != kernel_object);
8362 fast_path_empty_req = TRUE;
8363 set_cache_attr_needed = TRUE;
8364 }
8365 }
8366
fe8ab488
A
8367 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
8368 interruptible = THREAD_ABORTSAFE;
8369 else
8370 interruptible = THREAD_UNINT;
b0d623f7 8371
55e303ae 8372 entry = 0;
2d21ac55
A
8373
8374 xfer_size = size;
8375 dst_offset = offset;
fe8ab488
A
8376 dw_count = 0;
8377
3e170ce0 8378 if (fast_path_full_req) {
fe8ab488 8379
3e170ce0
A
8380 if (vm_object_iopl_wire_full(object, upl, user_page_list, lite_list, cntrl_flags) == TRUE)
8381 goto finish;
8382 /*
8383 * we couldn't complete the processing of this request on the fast path
8384 * so fall through to the slow path and finish up
8385 */
fe8ab488 8386
3e170ce0 8387 } else if (fast_path_empty_req) {
fe8ab488 8388
3e170ce0
A
8389 if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8390 ret = KERN_MEMORY_ERROR;
8391 goto return_err;
fe8ab488 8392 }
3e170ce0
A
8393 ret = vm_object_iopl_wire_empty(object, upl, user_page_list, lite_list, cntrl_flags, &dst_offset, size_in_pages);
8394
8395 if (ret) {
8396 free_wired_pages = TRUE;
8397 goto return_err;
fe8ab488
A
8398 }
8399 goto finish;
8400 }
2d21ac55
A
8401
8402 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
8403 fault_info.user_tag = 0;
8404 fault_info.lo_offset = offset;
8405 fault_info.hi_offset = offset + xfer_size;
8406 fault_info.no_cache = FALSE;
b0d623f7 8407 fault_info.stealth = FALSE;
6d2010ae
A
8408 fault_info.io_sync = FALSE;
8409 fault_info.cs_bypass = FALSE;
fe8ab488
A
8410 fault_info.mark_zf_absent = TRUE;
8411 fault_info.interruptible = interruptible;
8412 fault_info.batch_pmap_op = TRUE;
b0d623f7
A
8413
8414 dwp = &dw_array[0];
6d2010ae 8415 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
2d21ac55 8416
55e303ae 8417 while (xfer_size) {
2d21ac55 8418 vm_fault_return_t result;
b0d623f7
A
8419
8420 dwp->dw_mask = 0;
2d21ac55 8421
3e170ce0
A
8422 if (fast_path_full_req) {
8423 /*
8424 * if we get here, it means that we ran into a page
8425 * state we couldn't handle in the fast path and
8426 * bailed out to the slow path... since the order
8427 * we look at pages is different between the 2 paths,
8428 * the following check is needed to determine whether
8429 * this page was already processed in the fast path
8430 */
8431 if (lite_list[entry>>5] & (1 << (entry & 31)))
8432 goto skip_page;
8433 }
55e303ae
A
8434 dst_page = vm_page_lookup(object, dst_offset);
8435
91447636
A
8436 /*
8437 * ENCRYPTED SWAP:
8438 * If the page is encrypted, we need to decrypt it,
8439 * so force a soft page fault.
8440 */
b0d623f7
A
8441 if (dst_page == VM_PAGE_NULL ||
8442 dst_page->busy ||
8443 dst_page->encrypted ||
8444 dst_page->error ||
8445 dst_page->restart ||
8446 dst_page->absent ||
8447 dst_page->fictitious) {
8448
8449 if (object == kernel_object)
8450 panic("vm_object_iopl_request: missing/bad page in kernel object\n");
39236c6e
A
8451 if (object == compressor_object)
8452 panic("vm_object_iopl_request: missing/bad page in compressor object\n");
8453
8454 if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8455 ret = KERN_MEMORY_ERROR;
8456 goto return_err;
8457 }
fe8ab488 8458 set_cache_attr_needed = TRUE;
39236c6e
A
8459
8460 /*
8461 * We just looked up the page and the result remains valid
8462 * until the object lock is release, so send it to
8463 * vm_fault_page() (as "dst_page"), to avoid having to
8464 * look it up again there.
8465 */
8466 caller_lookup = TRUE;
2d21ac55 8467
55e303ae
A
8468 do {
8469 vm_page_t top_page;
8470 kern_return_t error_code;
2d21ac55 8471
2d21ac55 8472 fault_info.cluster_size = xfer_size;
55e303ae 8473
b0d623f7
A
8474 vm_object_paging_begin(object);
8475
55e303ae 8476 result = vm_fault_page(object, dst_offset,
39236c6e
A
8477 prot | VM_PROT_WRITE, FALSE,
8478 caller_lookup,
2d21ac55
A
8479 &prot, &dst_page, &top_page,
8480 (int *)0,
8481 &error_code, no_zero_fill,
8482 FALSE, &fault_info);
8483
39236c6e
A
8484 /* our lookup is no longer valid at this point */
8485 caller_lookup = FALSE;
8486
2d21ac55
A
8487 switch (result) {
8488
55e303ae
A
8489 case VM_FAULT_SUCCESS:
8490
d41d1dae
A
8491 if ( !dst_page->absent) {
8492 PAGE_WAKEUP_DONE(dst_page);
8493 } else {
8494 /*
8495 * we only get back an absent page if we
8496 * requested that it not be zero-filled
8497 * because we are about to fill it via I/O
8498 *
8499 * absent pages should be left BUSY
8500 * to prevent them from being faulted
8501 * into an address space before we've
8502 * had a chance to complete the I/O on
8503 * them since they may contain info that
8504 * shouldn't be seen by the faulting task
8505 */
8506 }
55e303ae
A
8507 /*
8508 * Release paging references and
8509 * top-level placeholder page, if any.
8510 */
2d21ac55 8511 if (top_page != VM_PAGE_NULL) {
55e303ae 8512 vm_object_t local_object;
2d21ac55
A
8513
8514 local_object = top_page->object;
8515
8516 if (top_page->object != dst_page->object) {
8517 vm_object_lock(local_object);
55e303ae 8518 VM_PAGE_FREE(top_page);
2d21ac55
A
8519 vm_object_paging_end(local_object);
8520 vm_object_unlock(local_object);
55e303ae
A
8521 } else {
8522 VM_PAGE_FREE(top_page);
2d21ac55 8523 vm_object_paging_end(local_object);
55e303ae
A
8524 }
8525 }
b0d623f7 8526 vm_object_paging_end(object);
55e303ae
A
8527 break;
8528
55e303ae
A
8529 case VM_FAULT_RETRY:
8530 vm_object_lock(object);
55e303ae
A
8531 break;
8532
6d2010ae 8533 case VM_FAULT_MEMORY_SHORTAGE:
3e170ce0 8534 OSAddAtomic((size_in_pages - entry), &vm_upl_wait_for_pages);
2d21ac55 8535
6d2010ae 8536 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
55e303ae 8537
55e303ae 8538 if (vm_page_wait(interruptible)) {
3e170ce0 8539 OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
6d2010ae
A
8540
8541 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
55e303ae 8542 vm_object_lock(object);
6d2010ae 8543
55e303ae
A
8544 break;
8545 }
3e170ce0 8546 OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
6d2010ae
A
8547
8548 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
8549
55e303ae
A
8550 /* fall thru */
8551
8552 case VM_FAULT_INTERRUPTED:
8553 error_code = MACH_SEND_INTERRUPTED;
8554 case VM_FAULT_MEMORY_ERROR:
b0d623f7 8555 memory_error:
2d21ac55 8556 ret = (error_code ? error_code: KERN_MEMORY_ERROR);
0c530ab8 8557
2d21ac55 8558 vm_object_lock(object);
0c530ab8 8559 goto return_err;
b0d623f7
A
8560
8561 case VM_FAULT_SUCCESS_NO_VM_PAGE:
8562 /* success but no page: fail */
8563 vm_object_paging_end(object);
8564 vm_object_unlock(object);
8565 goto memory_error;
8566
8567 default:
8568 panic("vm_object_iopl_request: unexpected error"
8569 " 0x%x from vm_fault_page()\n", result);
55e303ae 8570 }
2d21ac55 8571 } while (result != VM_FAULT_SUCCESS);
b0d623f7 8572
55e303ae 8573 }
b0d623f7
A
8574 if (upl->flags & UPL_KERNEL_OBJECT)
8575 goto record_phys_addr;
8576
39236c6e
A
8577 if (dst_page->compressor) {
8578 dst_page->busy = TRUE;
8579 goto record_phys_addr;
8580 }
8581
b0d623f7
A
8582 if (dst_page->cleaning) {
8583 /*
316670eb 8584 * Someone else is cleaning this page in place.
b0d623f7
A
8585 * In theory, we should be able to proceed and use this
8586 * page but they'll probably end up clearing the "busy"
8587 * bit on it in upl_commit_range() but they didn't set
8588 * it, so they would clear our "busy" bit and open
8589 * us to race conditions.
8590 * We'd better wait for the cleaning to complete and
8591 * then try again.
8592 */
8593 vm_object_iopl_request_sleep_for_cleaning++;
8594 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
8595 continue;
8596 }
316670eb
A
8597 if (dst_page->laundry) {
8598 dst_page->pageout = FALSE;
8599
8600 vm_pageout_steal_laundry(dst_page, FALSE);
8601 }
0c530ab8
A
8602 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
8603 dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
8604 vm_page_t low_page;
8605 int refmod;
8606
8607 /*
8608 * support devices that can't DMA above 32 bits
8609 * by substituting pages from a pool of low address
8610 * memory for any pages we find above the 4G mark
8611 * can't substitute if the page is already wired because
8612 * we don't know whether that physical address has been
8613 * handed out to some other 64 bit capable DMA device to use
8614 */
b0d623f7 8615 if (VM_PAGE_WIRED(dst_page)) {
0c530ab8
A
8616 ret = KERN_PROTECTION_FAILURE;
8617 goto return_err;
8618 }
0c530ab8
A
8619 low_page = vm_page_grablo();
8620
8621 if (low_page == VM_PAGE_NULL) {
8622 ret = KERN_RESOURCE_SHORTAGE;
8623 goto return_err;
8624 }
8625 /*
8626 * from here until the vm_page_replace completes
8627 * we musn't drop the object lock... we don't
8628 * want anyone refaulting this page in and using
8629 * it after we disconnect it... we want the fault
8630 * to find the new page being substituted.
8631 */
2d21ac55
A
8632 if (dst_page->pmapped)
8633 refmod = pmap_disconnect(dst_page->phys_page);
8634 else
8635 refmod = 0;
d41d1dae 8636
6d2010ae 8637 if (!dst_page->absent)
d41d1dae 8638 vm_page_copy(dst_page, low_page);
2d21ac55 8639
0c530ab8
A
8640 low_page->reference = dst_page->reference;
8641 low_page->dirty = dst_page->dirty;
d41d1dae 8642 low_page->absent = dst_page->absent;
0c530ab8
A
8643
8644 if (refmod & VM_MEM_REFERENCED)
8645 low_page->reference = TRUE;
316670eb
A
8646 if (refmod & VM_MEM_MODIFIED) {
8647 SET_PAGE_DIRTY(low_page, FALSE);
8648 }
0c530ab8 8649
0c530ab8 8650 vm_page_replace(low_page, object, dst_offset);
0c530ab8
A
8651
8652 dst_page = low_page;
8653 /*
8654 * vm_page_grablo returned the page marked
8655 * BUSY... we don't need a PAGE_WAKEUP_DONE
8656 * here, because we've never dropped the object lock
8657 */
d41d1dae
A
8658 if ( !dst_page->absent)
8659 dst_page->busy = FALSE;
0c530ab8 8660 }
d41d1dae
A
8661 if ( !dst_page->busy)
8662 dwp->dw_mask |= DW_vm_page_wire;
55e303ae 8663
91447636
A
8664 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8665 /*
8666 * Mark the page "busy" to block any future page fault
6d2010ae
A
8667 * on this page in addition to wiring it.
8668 * We'll also remove the mapping
91447636
A
8669 * of all these pages before leaving this routine.
8670 */
8671 assert(!dst_page->fictitious);
8672 dst_page->busy = TRUE;
8673 }
2d21ac55
A
8674 /*
8675 * expect the page to be used
8676 * page queues lock must be held to set 'reference'
8677 */
b0d623f7 8678 dwp->dw_mask |= DW_set_reference;
55e303ae 8679
316670eb
A
8680 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8681 SET_PAGE_DIRTY(dst_page, TRUE);
8682 }
15129b1c
A
8683 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) {
8684 pmap_sync_page_attributes_phys(dst_page->phys_page);
8685 dst_page->written_by_kernel = FALSE;
8686 }
8687
b0d623f7 8688record_phys_addr:
d41d1dae
A
8689 if (dst_page->busy)
8690 upl->flags |= UPL_HAS_BUSY;
8691
3e170ce0 8692 lite_list[entry>>5] |= 1 << (entry & 31);
55e303ae 8693
2d21ac55
A
8694 if (dst_page->phys_page > upl->highest_page)
8695 upl->highest_page = dst_page->phys_page;
55e303ae 8696
2d21ac55
A
8697 if (user_page_list) {
8698 user_page_list[entry].phys_addr = dst_page->phys_page;
2d21ac55
A
8699 user_page_list[entry].pageout = dst_page->pageout;
8700 user_page_list[entry].absent = dst_page->absent;
593a1d5f 8701 user_page_list[entry].dirty = dst_page->dirty;
2d21ac55 8702 user_page_list[entry].precious = dst_page->precious;
593a1d5f 8703 user_page_list[entry].device = FALSE;
316670eb 8704 user_page_list[entry].needed = FALSE;
2d21ac55
A
8705 if (dst_page->clustered == TRUE)
8706 user_page_list[entry].speculative = dst_page->speculative;
8707 else
8708 user_page_list[entry].speculative = FALSE;
593a1d5f
A
8709 user_page_list[entry].cs_validated = dst_page->cs_validated;
8710 user_page_list[entry].cs_tainted = dst_page->cs_tainted;
c18c124e 8711 user_page_list[entry].cs_nx = dst_page->cs_nx;
3e170ce0 8712 user_page_list[entry].mark = FALSE;
55e303ae 8713 }
39236c6e 8714 if (object != kernel_object && object != compressor_object) {
b0d623f7
A
8715 /*
8716 * someone is explicitly grabbing this page...
8717 * update clustered and speculative state
8718 *
8719 */
fe8ab488
A
8720 if (dst_page->clustered)
8721 VM_PAGE_CONSUME_CLUSTERED(dst_page);
55e303ae 8722 }
3e170ce0 8723skip_page:
55e303ae
A
8724 entry++;
8725 dst_offset += PAGE_SIZE_64;
8726 xfer_size -= PAGE_SIZE;
b0d623f7
A
8727
8728 if (dwp->dw_mask) {
6d2010ae 8729 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
b0d623f7 8730
6d2010ae 8731 if (dw_count >= dw_limit) {
3e170ce0 8732 vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
b0d623f7
A
8733
8734 dwp = &dw_array[0];
8735 dw_count = 0;
8736 }
8737 }
55e303ae 8738 }
3e170ce0 8739 assert(entry == size_in_pages);
55e303ae 8740
3e170ce0
A
8741 if (dw_count)
8742 vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
fe8ab488
A
8743finish:
8744 if (user_page_list && set_cache_attr_needed == TRUE)
3e170ce0 8745 vm_object_set_pmap_cache_attr(object, user_page_list, size_in_pages, TRUE);
316670eb 8746
2d21ac55
A
8747 if (page_list_count != NULL) {
8748 if (upl->flags & UPL_INTERNAL)
55e303ae 8749 *page_list_count = 0;
3e170ce0
A
8750 else if (*page_list_count > size_in_pages)
8751 *page_list_count = size_in_pages;
55e303ae 8752 }
55e303ae 8753 vm_object_unlock(object);
55e303ae 8754
91447636
A
8755 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8756 /*
8757 * We've marked all the pages "busy" so that future
8758 * page faults will block.
8759 * Now remove the mapping for these pages, so that they
8760 * can't be accessed without causing a page fault.
8761 */
8762 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
8763 PMAP_NULL, 0, VM_PROT_NONE);
b0d623f7
A
8764 assert(!object->blocked_access);
8765 object->blocked_access = TRUE;
91447636 8766 }
3e170ce0 8767
91447636 8768 return KERN_SUCCESS;
0c530ab8 8769
0c530ab8 8770return_err:
b0d623f7 8771 dw_index = 0;
0c530ab8
A
8772
8773 for (; offset < dst_offset; offset += PAGE_SIZE) {
0b4c1975
A
8774 boolean_t need_unwire;
8775
0c530ab8
A
8776 dst_page = vm_page_lookup(object, offset);
8777
8778 if (dst_page == VM_PAGE_NULL)
d41d1dae 8779 panic("vm_object_iopl_request: Wired page missing. \n");
2d21ac55 8780
0b4c1975
A
8781 /*
8782 * if we've already processed this page in an earlier
8783 * dw_do_work, we need to undo the wiring... we will
8784 * leave the dirty and reference bits on if they
8785 * were set, since we don't have a good way of knowing
8786 * what the previous state was and we won't get here
8787 * under any normal circumstances... we will always
8788 * clear BUSY and wakeup any waiters via vm_page_free
8789 * or PAGE_WAKEUP_DONE
8790 */
8791 need_unwire = TRUE;
8792
b0d623f7
A
8793 if (dw_count) {
8794 if (dw_array[dw_index].dw_m == dst_page) {
0b4c1975
A
8795 /*
8796 * still in the deferred work list
8797 * which means we haven't yet called
8798 * vm_page_wire on this page
8799 */
8800 need_unwire = FALSE;
d41d1dae
A
8801
8802 dw_index++;
8803 dw_count--;
b0d623f7
A
8804 }
8805 }
0b4c1975
A
8806 vm_page_lock_queues();
8807
fe8ab488 8808 if (dst_page->absent || free_wired_pages == TRUE) {
d41d1dae 8809 vm_page_free(dst_page);
0b4c1975 8810
d41d1dae
A
8811 need_unwire = FALSE;
8812 } else {
8813 if (need_unwire == TRUE)
8814 vm_page_unwire(dst_page, TRUE);
0b4c1975 8815
0b4c1975 8816 PAGE_WAKEUP_DONE(dst_page);
6d2010ae 8817 }
0c530ab8 8818 vm_page_unlock_queues();
2d21ac55 8819
0b4c1975
A
8820 if (need_unwire == TRUE)
8821 VM_STAT_INCR(reactivations);
0c530ab8 8822 }
b0d623f7
A
8823#if UPL_DEBUG
8824 upl->upl_state = 2;
8825#endif
8826 if (! (upl->flags & UPL_KERNEL_OBJECT)) {
8827 vm_object_activity_end(object);
316670eb 8828 vm_object_collapse(object, 0, TRUE);
b0d623f7 8829 }
0c530ab8
A
8830 vm_object_unlock(object);
8831 upl_destroy(upl);
8832
8833 return ret;
1c79356b
A
8834}
8835
91447636
A
8836kern_return_t
8837upl_transpose(
8838 upl_t upl1,
8839 upl_t upl2)
1c79356b 8840{
91447636
A
8841 kern_return_t retval;
8842 boolean_t upls_locked;
8843 vm_object_t object1, object2;
1c79356b 8844
b0d623f7 8845 if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2 || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR) || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) {
91447636
A
8846 return KERN_INVALID_ARGUMENT;
8847 }
8848
8849 upls_locked = FALSE;
1c79356b 8850
91447636
A
8851 /*
8852 * Since we need to lock both UPLs at the same time,
8853 * avoid deadlocks by always taking locks in the same order.
8854 */
8855 if (upl1 < upl2) {
8856 upl_lock(upl1);
8857 upl_lock(upl2);
8858 } else {
8859 upl_lock(upl2);
8860 upl_lock(upl1);
8861 }
8862 upls_locked = TRUE; /* the UPLs will need to be unlocked */
8863
8864 object1 = upl1->map_object;
8865 object2 = upl2->map_object;
8866
8867 if (upl1->offset != 0 || upl2->offset != 0 ||
8868 upl1->size != upl2->size) {
8869 /*
8870 * We deal only with full objects, not subsets.
8871 * That's because we exchange the entire backing store info
8872 * for the objects: pager, resident pages, etc... We can't do
8873 * only part of it.
8874 */
8875 retval = KERN_INVALID_VALUE;
8876 goto done;
8877 }
8878
8879 /*
8880 * Tranpose the VM objects' backing store.
8881 */
8882 retval = vm_object_transpose(object1, object2,
8883 (vm_object_size_t) upl1->size);
8884
8885 if (retval == KERN_SUCCESS) {
8886 /*
8887 * Make each UPL point to the correct VM object, i.e. the
8888 * object holding the pages that the UPL refers to...
8889 */
fe8ab488
A
8890#if CONFIG_IOSCHED || UPL_DEBUG
8891 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8892 vm_object_lock(object1);
8893 vm_object_lock(object2);
8894 }
8895 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8896 queue_remove(&object1->uplq, upl1, upl_t, uplq);
8897 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8898 queue_remove(&object2->uplq, upl2, upl_t, uplq);
2d21ac55 8899#endif
91447636
A
8900 upl1->map_object = object2;
8901 upl2->map_object = object1;
fe8ab488
A
8902
8903#if CONFIG_IOSCHED || UPL_DEBUG
8904 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8905 queue_enter(&object2->uplq, upl1, upl_t, uplq);
8906 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8907 queue_enter(&object1->uplq, upl2, upl_t, uplq);
8908 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8909 vm_object_unlock(object2);
8910 vm_object_unlock(object1);
8911 }
2d21ac55 8912#endif
91447636
A
8913 }
8914
8915done:
8916 /*
8917 * Cleanup.
8918 */
8919 if (upls_locked) {
8920 upl_unlock(upl1);
8921 upl_unlock(upl2);
8922 upls_locked = FALSE;
8923 }
8924
8925 return retval;
8926}
8927
316670eb
A
8928void
8929upl_range_needed(
8930 upl_t upl,
8931 int index,
8932 int count)
8933{
8934 upl_page_info_t *user_page_list;
8935 int size_in_pages;
8936
8937 if ( !(upl->flags & UPL_INTERNAL) || count <= 0)
8938 return;
8939
8940 size_in_pages = upl->size / PAGE_SIZE;
8941
8942 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8943
8944 while (count-- && index < size_in_pages)
8945 user_page_list[index++].needed = TRUE;
8946}
8947
8948
91447636
A
8949/*
8950 * ENCRYPTED SWAP:
8951 *
8952 * Rationale: the user might have some encrypted data on disk (via
8953 * FileVault or any other mechanism). That data is then decrypted in
8954 * memory, which is safe as long as the machine is secure. But that
8955 * decrypted data in memory could be paged out to disk by the default
8956 * pager. The data would then be stored on disk in clear (not encrypted)
8957 * and it could be accessed by anyone who gets physical access to the
8958 * disk (if the laptop or the disk gets stolen for example). This weakens
8959 * the security offered by FileVault.
8960 *
8961 * Solution: the default pager will optionally request that all the
8962 * pages it gathers for pageout be encrypted, via the UPL interfaces,
8963 * before it sends this UPL to disk via the vnode_pageout() path.
8964 *
8965 * Notes:
8966 *
8967 * To avoid disrupting the VM LRU algorithms, we want to keep the
8968 * clean-in-place mechanisms, which allow us to send some extra pages to
8969 * swap (clustering) without actually removing them from the user's
8970 * address space. We don't want the user to unknowingly access encrypted
8971 * data, so we have to actually remove the encrypted pages from the page
8972 * table. When the user accesses the data, the hardware will fail to
8973 * locate the virtual page in its page table and will trigger a page
8974 * fault. We can then decrypt the page and enter it in the page table
8975 * again. Whenever we allow the user to access the contents of a page,
8976 * we have to make sure it's not encrypted.
8977 *
8978 *
8979 */
8980/*
8981 * ENCRYPTED SWAP:
8982 * Reserve of virtual addresses in the kernel address space.
8983 * We need to map the physical pages in the kernel, so that we
8984 * can call the encryption/decryption routines with a kernel
8985 * virtual address. We keep this pool of pre-allocated kernel
8986 * virtual addresses so that we don't have to scan the kernel's
316670eb 8987 * virtaul address space each time we need to encrypt or decrypt
91447636
A
8988 * a physical page.
8989 * It would be nice to be able to encrypt and decrypt in physical
8990 * mode but that might not always be more efficient...
8991 */
8992decl_simple_lock_data(,vm_paging_lock)
8993#define VM_PAGING_NUM_PAGES 64
8994vm_map_offset_t vm_paging_base_address = 0;
8995boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
8996int vm_paging_max_index = 0;
2d21ac55
A
8997int vm_paging_page_waiter = 0;
8998int vm_paging_page_waiter_total = 0;
91447636
A
8999unsigned long vm_paging_no_kernel_page = 0;
9000unsigned long vm_paging_objects_mapped = 0;
9001unsigned long vm_paging_pages_mapped = 0;
9002unsigned long vm_paging_objects_mapped_slow = 0;
9003unsigned long vm_paging_pages_mapped_slow = 0;
9004
2d21ac55
A
9005void
9006vm_paging_map_init(void)
9007{
9008 kern_return_t kr;
9009 vm_map_offset_t page_map_offset;
9010 vm_map_entry_t map_entry;
9011
9012 assert(vm_paging_base_address == 0);
9013
9014 /*
9015 * Initialize our pool of pre-allocated kernel
9016 * virtual addresses.
9017 */
9018 page_map_offset = 0;
9019 kr = vm_map_find_space(kernel_map,
9020 &page_map_offset,
9021 VM_PAGING_NUM_PAGES * PAGE_SIZE,
9022 0,
9023 0,
9024 &map_entry);
9025 if (kr != KERN_SUCCESS) {
9026 panic("vm_paging_map_init: kernel_map full\n");
9027 }
3e170ce0
A
9028 VME_OBJECT_SET(map_entry, kernel_object);
9029 VME_OFFSET_SET(map_entry, page_map_offset);
6d2010ae
A
9030 map_entry->protection = VM_PROT_NONE;
9031 map_entry->max_protection = VM_PROT_NONE;
9032 map_entry->permanent = TRUE;
2d21ac55
A
9033 vm_object_reference(kernel_object);
9034 vm_map_unlock(kernel_map);
9035
9036 assert(vm_paging_base_address == 0);
9037 vm_paging_base_address = page_map_offset;
9038}
9039
91447636
A
9040/*
9041 * ENCRYPTED SWAP:
9042 * vm_paging_map_object:
9043 * Maps part of a VM object's pages in the kernel
9044 * virtual address space, using the pre-allocated
9045 * kernel virtual addresses, if possible.
9046 * Context:
9047 * The VM object is locked. This lock will get
2d21ac55
A
9048 * dropped and re-acquired though, so the caller
9049 * must make sure the VM object is kept alive
9050 * (by holding a VM map that has a reference
9051 * on it, for example, or taking an extra reference).
9052 * The page should also be kept busy to prevent
9053 * it from being reclaimed.
91447636
A
9054 */
9055kern_return_t
9056vm_paging_map_object(
91447636
A
9057 vm_page_t page,
9058 vm_object_t object,
9059 vm_object_offset_t offset,
593a1d5f 9060 vm_prot_t protection,
39236c6e
A
9061 boolean_t can_unlock_object,
9062 vm_map_size_t *size, /* IN/OUT */
9063 vm_map_offset_t *address, /* OUT */
9064 boolean_t *need_unmap) /* OUT */
91447636
A
9065{
9066 kern_return_t kr;
9067 vm_map_offset_t page_map_offset;
9068 vm_map_size_t map_size;
9069 vm_object_offset_t object_offset;
91447636 9070 int i;
91447636 9071
91447636 9072 if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
39236c6e
A
9073 /* use permanent 1-to-1 kernel mapping of physical memory ? */
9074#if __x86_64__
9075 *address = (vm_map_offset_t)
9076 PHYSMAP_PTOV((pmap_paddr_t)page->phys_page <<
9077 PAGE_SHIFT);
9078 *need_unmap = FALSE;
9079 return KERN_SUCCESS;
9080#else
9081#warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..."
9082#endif
9083
2d21ac55 9084 assert(page->busy);
91447636 9085 /*
91447636
A
9086 * Use one of the pre-allocated kernel virtual addresses
9087 * and just enter the VM page in the kernel address space
9088 * at that virtual address.
9089 */
91447636
A
9090 simple_lock(&vm_paging_lock);
9091
91447636
A
9092 /*
9093 * Try and find an available kernel virtual address
9094 * from our pre-allocated pool.
9095 */
9096 page_map_offset = 0;
2d21ac55
A
9097 for (;;) {
9098 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
9099 if (vm_paging_page_inuse[i] == FALSE) {
9100 page_map_offset =
9101 vm_paging_base_address +
9102 (i * PAGE_SIZE);
9103 break;
9104 }
9105 }
9106 if (page_map_offset != 0) {
9107 /* found a space to map our page ! */
9108 break;
9109 }
9110
9111 if (can_unlock_object) {
9112 /*
9113 * If we can afford to unlock the VM object,
9114 * let's take the slow path now...
9115 */
91447636
A
9116 break;
9117 }
2d21ac55
A
9118 /*
9119 * We can't afford to unlock the VM object, so
9120 * let's wait for a space to become available...
9121 */
9122 vm_paging_page_waiter_total++;
9123 vm_paging_page_waiter++;
fe8ab488
A
9124 kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT);
9125 if (kr == THREAD_WAITING) {
9126 simple_unlock(&vm_paging_lock);
9127 kr = thread_block(THREAD_CONTINUE_NULL);
9128 simple_lock(&vm_paging_lock);
9129 }
2d21ac55
A
9130 vm_paging_page_waiter--;
9131 /* ... and try again */
91447636
A
9132 }
9133
9134 if (page_map_offset != 0) {
9135 /*
9136 * We found a kernel virtual address;
9137 * map the physical page to that virtual address.
9138 */
9139 if (i > vm_paging_max_index) {
9140 vm_paging_max_index = i;
9141 }
9142 vm_paging_page_inuse[i] = TRUE;
9143 simple_unlock(&vm_paging_lock);
2d21ac55 9144
2d21ac55
A
9145 page->pmapped = TRUE;
9146
9147 /*
9148 * Keep the VM object locked over the PMAP_ENTER
9149 * and the actual use of the page by the kernel,
9150 * or this pmap mapping might get undone by a
9151 * vm_object_pmap_protect() call...
9152 */
0c530ab8
A
9153 PMAP_ENTER(kernel_pmap,
9154 page_map_offset,
9155 page,
593a1d5f 9156 protection,
316670eb 9157 VM_PROT_NONE,
6d2010ae 9158 0,
0c530ab8 9159 TRUE);
91447636
A
9160 vm_paging_objects_mapped++;
9161 vm_paging_pages_mapped++;
9162 *address = page_map_offset;
39236c6e 9163 *need_unmap = TRUE;
91447636
A
9164
9165 /* all done and mapped, ready to use ! */
9166 return KERN_SUCCESS;
9167 }
9168
9169 /*
9170 * We ran out of pre-allocated kernel virtual
9171 * addresses. Just map the page in the kernel
9172 * the slow and regular way.
9173 */
9174 vm_paging_no_kernel_page++;
9175 simple_unlock(&vm_paging_lock);
2d21ac55
A
9176 }
9177
9178 if (! can_unlock_object) {
39236c6e
A
9179 *address = 0;
9180 *size = 0;
9181 *need_unmap = FALSE;
2d21ac55 9182 return KERN_NOT_SUPPORTED;
91447636 9183 }
91447636
A
9184
9185 object_offset = vm_object_trunc_page(offset);
39236c6e
A
9186 map_size = vm_map_round_page(*size,
9187 VM_MAP_PAGE_MASK(kernel_map));
91447636
A
9188
9189 /*
9190 * Try and map the required range of the object
9191 * in the kernel_map
9192 */
9193
91447636
A
9194 vm_object_reference_locked(object); /* for the map entry */
9195 vm_object_unlock(object);
9196
9197 kr = vm_map_enter(kernel_map,
9198 address,
9199 map_size,
9200 0,
9201 VM_FLAGS_ANYWHERE,
9202 object,
9203 object_offset,
9204 FALSE,
593a1d5f 9205 protection,
91447636
A
9206 VM_PROT_ALL,
9207 VM_INHERIT_NONE);
9208 if (kr != KERN_SUCCESS) {
9209 *address = 0;
9210 *size = 0;
39236c6e 9211 *need_unmap = FALSE;
91447636 9212 vm_object_deallocate(object); /* for the map entry */
2d21ac55 9213 vm_object_lock(object);
91447636
A
9214 return kr;
9215 }
9216
9217 *size = map_size;
9218
9219 /*
9220 * Enter the mapped pages in the page table now.
9221 */
9222 vm_object_lock(object);
2d21ac55
A
9223 /*
9224 * VM object must be kept locked from before PMAP_ENTER()
9225 * until after the kernel is done accessing the page(s).
9226 * Otherwise, the pmap mappings in the kernel could be
9227 * undone by a call to vm_object_pmap_protect().
9228 */
9229
91447636
A
9230 for (page_map_offset = 0;
9231 map_size != 0;
9232 map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
91447636
A
9233
9234 page = vm_page_lookup(object, offset + page_map_offset);
9235 if (page == VM_PAGE_NULL) {
2d21ac55
A
9236 printf("vm_paging_map_object: no page !?");
9237 vm_object_unlock(object);
9238 kr = vm_map_remove(kernel_map, *address, *size,
9239 VM_MAP_NO_FLAGS);
9240 assert(kr == KERN_SUCCESS);
9241 *address = 0;
9242 *size = 0;
39236c6e 9243 *need_unmap = FALSE;
2d21ac55
A
9244 vm_object_lock(object);
9245 return KERN_MEMORY_ERROR;
91447636 9246 }
2d21ac55 9247 page->pmapped = TRUE;
91447636 9248
2d21ac55 9249 //assert(pmap_verify_free(page->phys_page));
91447636
A
9250 PMAP_ENTER(kernel_pmap,
9251 *address + page_map_offset,
9252 page,
593a1d5f 9253 protection,
316670eb 9254 VM_PROT_NONE,
6d2010ae 9255 0,
0c530ab8 9256 TRUE);
91447636
A
9257 }
9258
9259 vm_paging_objects_mapped_slow++;
b0d623f7 9260 vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
91447636 9261
39236c6e
A
9262 *need_unmap = TRUE;
9263
91447636
A
9264 return KERN_SUCCESS;
9265}
9266
9267/*
9268 * ENCRYPTED SWAP:
9269 * vm_paging_unmap_object:
9270 * Unmaps part of a VM object's pages from the kernel
9271 * virtual address space.
9272 * Context:
9273 * The VM object is locked. This lock will get
9274 * dropped and re-acquired though.
9275 */
9276void
9277vm_paging_unmap_object(
9278 vm_object_t object,
9279 vm_map_offset_t start,
9280 vm_map_offset_t end)
9281{
9282 kern_return_t kr;
91447636 9283 int i;
91447636 9284
0c530ab8 9285 if ((vm_paging_base_address == 0) ||
8f6c56a5
A
9286 (start < vm_paging_base_address) ||
9287 (end > (vm_paging_base_address
2d21ac55 9288 + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
91447636
A
9289 /*
9290 * We didn't use our pre-allocated pool of
9291 * kernel virtual address. Deallocate the
9292 * virtual memory.
9293 */
9294 if (object != VM_OBJECT_NULL) {
9295 vm_object_unlock(object);
9296 }
9297 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
9298 if (object != VM_OBJECT_NULL) {
9299 vm_object_lock(object);
9300 }
9301 assert(kr == KERN_SUCCESS);
9302 } else {
9303 /*
9304 * We used a kernel virtual address from our
9305 * pre-allocated pool. Put it back in the pool
9306 * for next time.
9307 */
91447636 9308 assert(end - start == PAGE_SIZE);
b0d623f7
A
9309 i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
9310 assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
91447636
A
9311
9312 /* undo the pmap mapping */
0c530ab8 9313 pmap_remove(kernel_pmap, start, end);
91447636
A
9314
9315 simple_lock(&vm_paging_lock);
9316 vm_paging_page_inuse[i] = FALSE;
2d21ac55
A
9317 if (vm_paging_page_waiter) {
9318 thread_wakeup(&vm_paging_page_waiter);
9319 }
91447636 9320 simple_unlock(&vm_paging_lock);
91447636
A
9321 }
9322}
9323
fe8ab488 9324#if ENCRYPTED_SWAP
91447636
A
9325/*
9326 * Encryption data.
9327 * "iv" is the "initial vector". Ideally, we want to
9328 * have a different one for each page we encrypt, so that
9329 * crackers can't find encryption patterns too easily.
9330 */
9331#define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
9332boolean_t swap_crypt_ctx_initialized = FALSE;
316670eb 9333uint32_t swap_crypt_key[8]; /* big enough for a 256 key */
91447636
A
9334aes_ctx swap_crypt_ctx;
9335const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
9336
9337#if DEBUG
9338boolean_t swap_crypt_ctx_tested = FALSE;
9339unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
9340unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
9341unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
9342#endif /* DEBUG */
9343
91447636
A
9344/*
9345 * Initialize the encryption context: key and key size.
9346 */
9347void swap_crypt_ctx_initialize(void); /* forward */
9348void
9349swap_crypt_ctx_initialize(void)
9350{
9351 unsigned int i;
9352
9353 /*
9354 * No need for locking to protect swap_crypt_ctx_initialized
9355 * because the first use of encryption will come from the
9356 * pageout thread (we won't pagein before there's been a pageout)
9357 * and there's only one pageout thread.
9358 */
9359 if (swap_crypt_ctx_initialized == FALSE) {
9360 for (i = 0;
9361 i < (sizeof (swap_crypt_key) /
9362 sizeof (swap_crypt_key[0]));
9363 i++) {
9364 swap_crypt_key[i] = random();
9365 }
9366 aes_encrypt_key((const unsigned char *) swap_crypt_key,
9367 SWAP_CRYPT_AES_KEY_SIZE,
9368 &swap_crypt_ctx.encrypt);
9369 aes_decrypt_key((const unsigned char *) swap_crypt_key,
9370 SWAP_CRYPT_AES_KEY_SIZE,
9371 &swap_crypt_ctx.decrypt);
9372 swap_crypt_ctx_initialized = TRUE;
9373 }
9374
9375#if DEBUG
9376 /*
9377 * Validate the encryption algorithms.
9378 */
9379 if (swap_crypt_ctx_tested == FALSE) {
9380 /* initialize */
9381 for (i = 0; i < 4096; i++) {
9382 swap_crypt_test_page_ref[i] = (char) i;
9383 }
9384 /* encrypt */
9385 aes_encrypt_cbc(swap_crypt_test_page_ref,
9386 swap_crypt_null_iv,
9387 PAGE_SIZE / AES_BLOCK_SIZE,
9388 swap_crypt_test_page_encrypt,
9389 &swap_crypt_ctx.encrypt);
9390 /* decrypt */
9391 aes_decrypt_cbc(swap_crypt_test_page_encrypt,
9392 swap_crypt_null_iv,
9393 PAGE_SIZE / AES_BLOCK_SIZE,
9394 swap_crypt_test_page_decrypt,
9395 &swap_crypt_ctx.decrypt);
9396 /* compare result with original */
9397 for (i = 0; i < 4096; i ++) {
9398 if (swap_crypt_test_page_decrypt[i] !=
9399 swap_crypt_test_page_ref[i]) {
9400 panic("encryption test failed");
9401 }
9402 }
9403
9404 /* encrypt again */
9405 aes_encrypt_cbc(swap_crypt_test_page_decrypt,
9406 swap_crypt_null_iv,
9407 PAGE_SIZE / AES_BLOCK_SIZE,
9408 swap_crypt_test_page_decrypt,
9409 &swap_crypt_ctx.encrypt);
9410 /* decrypt in place */
9411 aes_decrypt_cbc(swap_crypt_test_page_decrypt,
9412 swap_crypt_null_iv,
9413 PAGE_SIZE / AES_BLOCK_SIZE,
9414 swap_crypt_test_page_decrypt,
9415 &swap_crypt_ctx.decrypt);
9416 for (i = 0; i < 4096; i ++) {
9417 if (swap_crypt_test_page_decrypt[i] !=
9418 swap_crypt_test_page_ref[i]) {
9419 panic("in place encryption test failed");
9420 }
9421 }
9422
9423 swap_crypt_ctx_tested = TRUE;
9424 }
9425#endif /* DEBUG */
9426}
9427
9428/*
9429 * ENCRYPTED SWAP:
9430 * vm_page_encrypt:
9431 * Encrypt the given page, for secure paging.
9432 * The page might already be mapped at kernel virtual
9433 * address "kernel_mapping_offset". Otherwise, we need
9434 * to map it.
9435 *
9436 * Context:
9437 * The page's object is locked, but this lock will be released
9438 * and re-acquired.
9439 * The page is busy and not accessible by users (not entered in any pmap).
9440 */
9441void
9442vm_page_encrypt(
9443 vm_page_t page,
9444 vm_map_offset_t kernel_mapping_offset)
9445{
91447636 9446 kern_return_t kr;
91447636 9447 vm_map_size_t kernel_mapping_size;
39236c6e 9448 boolean_t kernel_mapping_needs_unmap;
91447636
A
9449 vm_offset_t kernel_vaddr;
9450 union {
9451 unsigned char aes_iv[AES_BLOCK_SIZE];
9452 struct {
9453 memory_object_t pager_object;
9454 vm_object_offset_t paging_offset;
9455 } vm;
9456 } encrypt_iv;
9457
9458 if (! vm_pages_encrypted) {
9459 vm_pages_encrypted = TRUE;
9460 }
9461
9462 assert(page->busy);
91447636
A
9463
9464 if (page->encrypted) {
9465 /*
9466 * Already encrypted: no need to do it again.
9467 */
9468 vm_page_encrypt_already_encrypted_counter++;
9469 return;
9470 }
316670eb
A
9471 assert(page->dirty || page->precious);
9472
91447636
A
9473 ASSERT_PAGE_DECRYPTED(page);
9474
9475 /*
2d21ac55
A
9476 * Take a paging-in-progress reference to keep the object
9477 * alive even if we have to unlock it (in vm_paging_map_object()
9478 * for example)...
91447636 9479 */
2d21ac55 9480 vm_object_paging_begin(page->object);
91447636
A
9481
9482 if (kernel_mapping_offset == 0) {
9483 /*
9484 * The page hasn't already been mapped in kernel space
9485 * by the caller. Map it now, so that we can access
9486 * its contents and encrypt them.
9487 */
9488 kernel_mapping_size = PAGE_SIZE;
39236c6e
A
9489 kernel_mapping_needs_unmap = FALSE;
9490 kr = vm_paging_map_object(page,
91447636
A
9491 page->object,
9492 page->offset,
593a1d5f 9493 VM_PROT_READ | VM_PROT_WRITE,
39236c6e
A
9494 FALSE,
9495 &kernel_mapping_size,
9496 &kernel_mapping_offset,
9497 &kernel_mapping_needs_unmap);
91447636
A
9498 if (kr != KERN_SUCCESS) {
9499 panic("vm_page_encrypt: "
9500 "could not map page in kernel: 0x%x\n",
9501 kr);
9502 }
9503 } else {
9504 kernel_mapping_size = 0;
39236c6e 9505 kernel_mapping_needs_unmap = FALSE;
91447636
A
9506 }
9507 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
9508
9509 if (swap_crypt_ctx_initialized == FALSE) {
9510 swap_crypt_ctx_initialize();
9511 }
9512 assert(swap_crypt_ctx_initialized);
9513
9514 /*
9515 * Prepare an "initial vector" for the encryption.
9516 * We use the "pager" and the "paging_offset" for that
9517 * page to obfuscate the encrypted data a bit more and
9518 * prevent crackers from finding patterns that they could
9519 * use to break the key.
9520 */
9521 bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
9522 encrypt_iv.vm.pager_object = page->object->pager;
9523 encrypt_iv.vm.paging_offset =
9524 page->object->paging_offset + page->offset;
9525
91447636
A
9526 /* encrypt the "initial vector" */
9527 aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
9528 swap_crypt_null_iv,
9529 1,
9530 &encrypt_iv.aes_iv[0],
9531 &swap_crypt_ctx.encrypt);
9532
9533 /*
9534 * Encrypt the page.
9535 */
9536 aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
9537 &encrypt_iv.aes_iv[0],
9538 PAGE_SIZE / AES_BLOCK_SIZE,
9539 (unsigned char *) kernel_vaddr,
9540 &swap_crypt_ctx.encrypt);
9541
9542 vm_page_encrypt_counter++;
9543
91447636
A
9544 /*
9545 * Unmap the page from the kernel's address space,
9546 * if we had to map it ourselves. Otherwise, let
9547 * the caller undo the mapping if needed.
9548 */
39236c6e 9549 if (kernel_mapping_needs_unmap) {
91447636
A
9550 vm_paging_unmap_object(page->object,
9551 kernel_mapping_offset,
9552 kernel_mapping_offset + kernel_mapping_size);
9553 }
9554
9555 /*
2d21ac55 9556 * Clear the "reference" and "modified" bits.
91447636
A
9557 * This should clean up any impact the encryption had
9558 * on them.
2d21ac55
A
9559 * The page was kept busy and disconnected from all pmaps,
9560 * so it can't have been referenced or modified from user
9561 * space.
9562 * The software bits will be reset later after the I/O
9563 * has completed (in upl_commit_range()).
91447636 9564 */
2d21ac55 9565 pmap_clear_refmod(page->phys_page, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
91447636
A
9566
9567 page->encrypted = TRUE;
2d21ac55
A
9568
9569 vm_object_paging_end(page->object);
91447636
A
9570}
9571
9572/*
9573 * ENCRYPTED SWAP:
9574 * vm_page_decrypt:
9575 * Decrypt the given page.
9576 * The page might already be mapped at kernel virtual
9577 * address "kernel_mapping_offset". Otherwise, we need
9578 * to map it.
9579 *
9580 * Context:
9581 * The page's VM object is locked but will be unlocked and relocked.
9582 * The page is busy and not accessible by users (not entered in any pmap).
9583 */
9584void
9585vm_page_decrypt(
9586 vm_page_t page,
9587 vm_map_offset_t kernel_mapping_offset)
9588{
91447636
A
9589 kern_return_t kr;
9590 vm_map_size_t kernel_mapping_size;
9591 vm_offset_t kernel_vaddr;
39236c6e 9592 boolean_t kernel_mapping_needs_unmap;
91447636
A
9593 union {
9594 unsigned char aes_iv[AES_BLOCK_SIZE];
9595 struct {
9596 memory_object_t pager_object;
9597 vm_object_offset_t paging_offset;
9598 } vm;
9599 } decrypt_iv;
6d2010ae 9600 boolean_t was_dirty;
91447636
A
9601
9602 assert(page->busy);
9603 assert(page->encrypted);
9604
6d2010ae
A
9605 was_dirty = page->dirty;
9606
91447636 9607 /*
2d21ac55
A
9608 * Take a paging-in-progress reference to keep the object
9609 * alive even if we have to unlock it (in vm_paging_map_object()
9610 * for example)...
91447636 9611 */
2d21ac55 9612 vm_object_paging_begin(page->object);
91447636
A
9613
9614 if (kernel_mapping_offset == 0) {
9615 /*
9616 * The page hasn't already been mapped in kernel space
9617 * by the caller. Map it now, so that we can access
9618 * its contents and decrypt them.
9619 */
9620 kernel_mapping_size = PAGE_SIZE;
39236c6e
A
9621 kernel_mapping_needs_unmap = FALSE;
9622 kr = vm_paging_map_object(page,
91447636
A
9623 page->object,
9624 page->offset,
593a1d5f 9625 VM_PROT_READ | VM_PROT_WRITE,
39236c6e
A
9626 FALSE,
9627 &kernel_mapping_size,
9628 &kernel_mapping_offset,
9629 &kernel_mapping_needs_unmap);
91447636
A
9630 if (kr != KERN_SUCCESS) {
9631 panic("vm_page_decrypt: "
2d21ac55
A
9632 "could not map page in kernel: 0x%x\n",
9633 kr);
91447636
A
9634 }
9635 } else {
9636 kernel_mapping_size = 0;
39236c6e 9637 kernel_mapping_needs_unmap = FALSE;
91447636
A
9638 }
9639 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
9640
9641 assert(swap_crypt_ctx_initialized);
9642
9643 /*
9644 * Prepare an "initial vector" for the decryption.
9645 * It has to be the same as the "initial vector" we
9646 * used to encrypt that page.
9647 */
9648 bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
9649 decrypt_iv.vm.pager_object = page->object->pager;
9650 decrypt_iv.vm.paging_offset =
9651 page->object->paging_offset + page->offset;
9652
91447636
A
9653 /* encrypt the "initial vector" */
9654 aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
9655 swap_crypt_null_iv,
9656 1,
9657 &decrypt_iv.aes_iv[0],
9658 &swap_crypt_ctx.encrypt);
9659
9660 /*
9661 * Decrypt the page.
9662 */
9663 aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
9664 &decrypt_iv.aes_iv[0],
9665 PAGE_SIZE / AES_BLOCK_SIZE,
9666 (unsigned char *) kernel_vaddr,
9667 &swap_crypt_ctx.decrypt);
9668 vm_page_decrypt_counter++;
9669
91447636
A
9670 /*
9671 * Unmap the page from the kernel's address space,
9672 * if we had to map it ourselves. Otherwise, let
9673 * the caller undo the mapping if needed.
9674 */
39236c6e 9675 if (kernel_mapping_needs_unmap) {
91447636
A
9676 vm_paging_unmap_object(page->object,
9677 kernel_vaddr,
9678 kernel_vaddr + PAGE_SIZE);
9679 }
9680
6d2010ae
A
9681 if (was_dirty) {
9682 /*
9683 * The pager did not specify that the page would be
9684 * clean when it got paged in, so let's not clean it here
9685 * either.
9686 */
9687 } else {
9688 /*
9689 * After decryption, the page is actually still clean.
9690 * It was encrypted as part of paging, which "cleans"
9691 * the "dirty" pages.
9692 * Noone could access it after it was encrypted
9693 * and the decryption doesn't count.
9694 */
9695 page->dirty = FALSE;
9696 assert (page->cs_validated == FALSE);
9697 pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
9698 }
91447636
A
9699 page->encrypted = FALSE;
9700
9701 /*
9702 * We've just modified the page's contents via the data cache and part
9703 * of the new contents might still be in the cache and not yet in RAM.
9704 * Since the page is now available and might get gathered in a UPL to
9705 * be part of a DMA transfer from a driver that expects the memory to
9706 * be coherent at this point, we have to flush the data cache.
9707 */
0c530ab8 9708 pmap_sync_page_attributes_phys(page->phys_page);
91447636
A
9709 /*
9710 * Since the page is not mapped yet, some code might assume that it
9711 * doesn't need to invalidate the instruction cache when writing to
2d21ac55
A
9712 * that page. That code relies on "pmapped" being FALSE, so that the
9713 * caches get synchronized when the page is first mapped.
91447636 9714 */
2d21ac55
A
9715 assert(pmap_verify_free(page->phys_page));
9716 page->pmapped = FALSE;
4a3eedf9 9717 page->wpmapped = FALSE;
2d21ac55
A
9718
9719 vm_object_paging_end(page->object);
91447636
A
9720}
9721
b0d623f7 9722#if DEVELOPMENT || DEBUG
91447636
A
9723unsigned long upl_encrypt_upls = 0;
9724unsigned long upl_encrypt_pages = 0;
b0d623f7 9725#endif
91447636
A
9726
9727/*
9728 * ENCRYPTED SWAP:
9729 *
9730 * upl_encrypt:
9731 * Encrypts all the pages in the UPL, within the specified range.
9732 *
9733 */
9734void
9735upl_encrypt(
9736 upl_t upl,
9737 upl_offset_t crypt_offset,
9738 upl_size_t crypt_size)
9739{
b0d623f7
A
9740 upl_size_t upl_size, subupl_size=crypt_size;
9741 upl_offset_t offset_in_upl, subupl_offset=crypt_offset;
91447636 9742 vm_object_t upl_object;
b0d623f7 9743 vm_object_offset_t upl_offset;
91447636
A
9744 vm_page_t page;
9745 vm_object_t shadow_object;
9746 vm_object_offset_t shadow_offset;
9747 vm_object_offset_t paging_offset;
9748 vm_object_offset_t base_offset;
b0d623f7
A
9749 int isVectorUPL = 0;
9750 upl_t vector_upl = NULL;
9751
9752 if((isVectorUPL = vector_upl_is_valid(upl)))
9753 vector_upl = upl;
9754
9755process_upl_to_encrypt:
9756 if(isVectorUPL) {
9757 crypt_size = subupl_size;
9758 crypt_offset = subupl_offset;
9759 upl = vector_upl_subupl_byoffset(vector_upl, &crypt_offset, &crypt_size);
9760 if(upl == NULL)
9761 panic("upl_encrypt: Accessing a sub-upl that doesn't exist\n");
9762 subupl_size -= crypt_size;
9763 subupl_offset += crypt_size;
9764 }
91447636 9765
b0d623f7 9766#if DEVELOPMENT || DEBUG
91447636
A
9767 upl_encrypt_upls++;
9768 upl_encrypt_pages += crypt_size / PAGE_SIZE;
b0d623f7 9769#endif
91447636
A
9770 upl_object = upl->map_object;
9771 upl_offset = upl->offset;
9772 upl_size = upl->size;
9773
91447636
A
9774 vm_object_lock(upl_object);
9775
9776 /*
9777 * Find the VM object that contains the actual pages.
9778 */
9779 if (upl_object->pageout) {
9780 shadow_object = upl_object->shadow;
9781 /*
9782 * The offset in the shadow object is actually also
9783 * accounted for in upl->offset. It possibly shouldn't be
9784 * this way, but for now don't account for it twice.
9785 */
9786 shadow_offset = 0;
9787 assert(upl_object->paging_offset == 0); /* XXX ? */
9788 vm_object_lock(shadow_object);
9789 } else {
9790 shadow_object = upl_object;
9791 shadow_offset = 0;
9792 }
9793
9794 paging_offset = shadow_object->paging_offset;
9795 vm_object_paging_begin(shadow_object);
9796
2d21ac55
A
9797 if (shadow_object != upl_object)
9798 vm_object_unlock(upl_object);
9799
91447636
A
9800
9801 base_offset = shadow_offset;
9802 base_offset += upl_offset;
9803 base_offset += crypt_offset;
9804 base_offset -= paging_offset;
91447636 9805
2d21ac55 9806 assert(crypt_offset + crypt_size <= upl_size);
91447636 9807
b0d623f7
A
9808 for (offset_in_upl = 0;
9809 offset_in_upl < crypt_size;
9810 offset_in_upl += PAGE_SIZE) {
91447636 9811 page = vm_page_lookup(shadow_object,
b0d623f7 9812 base_offset + offset_in_upl);
91447636
A
9813 if (page == VM_PAGE_NULL) {
9814 panic("upl_encrypt: "
6d2010ae 9815 "no page for (obj=%p,off=0x%llx+0x%x)!\n",
91447636
A
9816 shadow_object,
9817 base_offset,
b0d623f7 9818 offset_in_upl);
91447636 9819 }
2d21ac55
A
9820 /*
9821 * Disconnect the page from all pmaps, so that nobody can
9822 * access it while it's encrypted. After that point, all
9823 * accesses to this page will cause a page fault and block
9824 * while the page is busy being encrypted. After the
9825 * encryption completes, any access will cause a
9826 * page fault and the page gets decrypted at that time.
9827 */
9828 pmap_disconnect(page->phys_page);
91447636 9829 vm_page_encrypt(page, 0);
2d21ac55 9830
b0d623f7 9831 if (vm_object_lock_avoid(shadow_object)) {
2d21ac55
A
9832 /*
9833 * Give vm_pageout_scan() a chance to convert more
9834 * pages from "clean-in-place" to "clean-and-free",
9835 * if it's interested in the same pages we selected
9836 * in this cluster.
9837 */
9838 vm_object_unlock(shadow_object);
b0d623f7 9839 mutex_pause(2);
2d21ac55
A
9840 vm_object_lock(shadow_object);
9841 }
91447636
A
9842 }
9843
9844 vm_object_paging_end(shadow_object);
9845 vm_object_unlock(shadow_object);
b0d623f7
A
9846
9847 if(isVectorUPL && subupl_size)
9848 goto process_upl_to_encrypt;
91447636
A
9849}
9850
fe8ab488 9851#else /* ENCRYPTED_SWAP */
2d21ac55
A
9852void
9853upl_encrypt(
9854 __unused upl_t upl,
9855 __unused upl_offset_t crypt_offset,
9856 __unused upl_size_t crypt_size)
9857{
9858}
9859
9860void
9861vm_page_encrypt(
9862 __unused vm_page_t page,
9863 __unused vm_map_offset_t kernel_mapping_offset)
9864{
9865}
9866
9867void
9868vm_page_decrypt(
9869 __unused vm_page_t page,
9870 __unused vm_map_offset_t kernel_mapping_offset)
9871{
9872}
9873
fe8ab488 9874#endif /* ENCRYPTED_SWAP */
2d21ac55 9875
316670eb
A
9876/*
9877 * page->object must be locked
9878 */
b0d623f7 9879void
316670eb 9880vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked)
b0d623f7 9881{
b0d623f7
A
9882 if (!queues_locked) {
9883 vm_page_lockspin_queues();
9884 }
9885
9886 /*
9887 * need to drop the laundry count...
9888 * we may also need to remove it
9889 * from the I/O paging queue...
9890 * vm_pageout_throttle_up handles both cases
9891 *
9892 * the laundry and pageout_queue flags are cleared...
9893 */
9894 vm_pageout_throttle_up(page);
b0d623f7
A
9895
9896 vm_page_steal_pageout_page++;
9897
9898 if (!queues_locked) {
9899 vm_page_unlock_queues();
9900 }
9901}
9902
9903upl_t
9904vector_upl_create(vm_offset_t upl_offset)
9905{
9906 int vector_upl_size = sizeof(struct _vector_upl);
9907 int i=0;
9908 upl_t upl;
9909 vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size);
9910
9911 upl = upl_create(0,UPL_VECTOR,0);
9912 upl->vector_upl = vector_upl;
9913 upl->offset = upl_offset;
9914 vector_upl->size = 0;
9915 vector_upl->offset = upl_offset;
9916 vector_upl->invalid_upls=0;
9917 vector_upl->num_upls=0;
9918 vector_upl->pagelist = NULL;
9919
9920 for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) {
9921 vector_upl->upl_iostates[i].size = 0;
9922 vector_upl->upl_iostates[i].offset = 0;
9923
9924 }
9925 return upl;
9926}
9927
9928void
9929vector_upl_deallocate(upl_t upl)
9930{
9931 if(upl) {
9932 vector_upl_t vector_upl = upl->vector_upl;
9933 if(vector_upl) {
9934 if(vector_upl->invalid_upls != vector_upl->num_upls)
9935 panic("Deallocating non-empty Vectored UPL\n");
9936 kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)));
9937 vector_upl->invalid_upls=0;
9938 vector_upl->num_upls = 0;
9939 vector_upl->pagelist = NULL;
9940 vector_upl->size = 0;
9941 vector_upl->offset = 0;
9942 kfree(vector_upl, sizeof(struct _vector_upl));
316670eb 9943 vector_upl = (vector_upl_t)0xfeedfeed;
b0d623f7
A
9944 }
9945 else
9946 panic("vector_upl_deallocate was passed a non-vectored upl\n");
9947 }
9948 else
9949 panic("vector_upl_deallocate was passed a NULL upl\n");
9950}
9951
9952boolean_t
9953vector_upl_is_valid(upl_t upl)
9954{
9955 if(upl && ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) {
9956 vector_upl_t vector_upl = upl->vector_upl;
316670eb 9957 if(vector_upl == NULL || vector_upl == (vector_upl_t)0xfeedfeed || vector_upl == (vector_upl_t)0xfeedbeef)
b0d623f7
A
9958 return FALSE;
9959 else
9960 return TRUE;
9961 }
9962 return FALSE;
9963}
9964
9965boolean_t
9966vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size)
9967{
9968 if(vector_upl_is_valid(upl)) {
9969 vector_upl_t vector_upl = upl->vector_upl;
9970
9971 if(vector_upl) {
9972 if(subupl) {
9973 if(io_size) {
9974 if(io_size < PAGE_SIZE)
9975 io_size = PAGE_SIZE;
9976 subupl->vector_upl = (void*)vector_upl;
9977 vector_upl->upl_elems[vector_upl->num_upls++] = subupl;
9978 vector_upl->size += io_size;
9979 upl->size += io_size;
9980 }
9981 else {
9982 uint32_t i=0,invalid_upls=0;
9983 for(i = 0; i < vector_upl->num_upls; i++) {
9984 if(vector_upl->upl_elems[i] == subupl)
9985 break;
9986 }
9987 if(i == vector_upl->num_upls)
9988 panic("Trying to remove sub-upl when none exists");
9989
9990 vector_upl->upl_elems[i] = NULL;
9991 invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1);
9992 if(invalid_upls == vector_upl->num_upls)
9993 return TRUE;
9994 else
9995 return FALSE;
9996 }
9997 }
9998 else
9999 panic("vector_upl_set_subupl was passed a NULL upl element\n");
10000 }
10001 else
10002 panic("vector_upl_set_subupl was passed a non-vectored upl\n");
10003 }
10004 else
10005 panic("vector_upl_set_subupl was passed a NULL upl\n");
10006
10007 return FALSE;
10008}
10009
10010void
10011vector_upl_set_pagelist(upl_t upl)
10012{
10013 if(vector_upl_is_valid(upl)) {
10014 uint32_t i=0;
10015 vector_upl_t vector_upl = upl->vector_upl;
10016
10017 if(vector_upl) {
10018 vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0;
10019
10020 vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE));
10021
10022 for(i=0; i < vector_upl->num_upls; i++) {
10023 cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE;
10024 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
10025 pagelist_size += cur_upl_pagelist_size;
10026 if(vector_upl->upl_elems[i]->highest_page > upl->highest_page)
10027 upl->highest_page = vector_upl->upl_elems[i]->highest_page;
10028 }
10029 assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) );
10030 }
10031 else
10032 panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
10033 }
10034 else
10035 panic("vector_upl_set_pagelist was passed a NULL upl\n");
10036
10037}
10038
10039upl_t
10040vector_upl_subupl_byindex(upl_t upl, uint32_t index)
10041{
10042 if(vector_upl_is_valid(upl)) {
10043 vector_upl_t vector_upl = upl->vector_upl;
10044 if(vector_upl) {
10045 if(index < vector_upl->num_upls)
10046 return vector_upl->upl_elems[index];
10047 }
10048 else
10049 panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
10050 }
10051 return NULL;
10052}
10053
10054upl_t
10055vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size)
10056{
10057 if(vector_upl_is_valid(upl)) {
10058 uint32_t i=0;
10059 vector_upl_t vector_upl = upl->vector_upl;
10060
10061 if(vector_upl) {
10062 upl_t subupl = NULL;
10063 vector_upl_iostates_t subupl_state;
10064
10065 for(i=0; i < vector_upl->num_upls; i++) {
10066 subupl = vector_upl->upl_elems[i];
10067 subupl_state = vector_upl->upl_iostates[i];
10068 if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) {
10069 /* We could have been passed an offset/size pair that belongs
10070 * to an UPL element that has already been committed/aborted.
10071 * If so, return NULL.
10072 */
10073 if(subupl == NULL)
10074 return NULL;
10075 if((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) {
10076 *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset;
10077 if(*upl_size > subupl_state.size)
10078 *upl_size = subupl_state.size;
10079 }
10080 if(*upl_offset >= subupl_state.offset)
10081 *upl_offset -= subupl_state.offset;
10082 else if(i)
10083 panic("Vector UPL offset miscalculation\n");
10084 return subupl;
10085 }
10086 }
10087 }
10088 else
10089 panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
10090 }
10091 return NULL;
10092}
10093
10094void
10095vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr)
10096{
10097 *v_upl_submap = NULL;
10098
10099 if(vector_upl_is_valid(upl)) {
10100 vector_upl_t vector_upl = upl->vector_upl;
10101 if(vector_upl) {
10102 *v_upl_submap = vector_upl->submap;
10103 *submap_dst_addr = vector_upl->submap_dst_addr;
10104 }
10105 else
10106 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
10107 }
10108 else
10109 panic("vector_upl_get_submap was passed a null UPL\n");
10110}
10111
10112void
10113vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
10114{
10115 if(vector_upl_is_valid(upl)) {
10116 vector_upl_t vector_upl = upl->vector_upl;
10117 if(vector_upl) {
10118 vector_upl->submap = submap;
10119 vector_upl->submap_dst_addr = submap_dst_addr;
10120 }
10121 else
10122 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
10123 }
10124 else
10125 panic("vector_upl_get_submap was passed a NULL UPL\n");
10126}
10127
10128void
10129vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
10130{
10131 if(vector_upl_is_valid(upl)) {
10132 uint32_t i = 0;
10133 vector_upl_t vector_upl = upl->vector_upl;
10134
10135 if(vector_upl) {
10136 for(i = 0; i < vector_upl->num_upls; i++) {
10137 if(vector_upl->upl_elems[i] == subupl)
10138 break;
10139 }
10140
10141 if(i == vector_upl->num_upls)
10142 panic("setting sub-upl iostate when none exists");
10143
10144 vector_upl->upl_iostates[i].offset = offset;
10145 if(size < PAGE_SIZE)
10146 size = PAGE_SIZE;
10147 vector_upl->upl_iostates[i].size = size;
10148 }
10149 else
10150 panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
10151 }
10152 else
10153 panic("vector_upl_set_iostate was passed a NULL UPL\n");
10154}
10155
10156void
10157vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size)
10158{
10159 if(vector_upl_is_valid(upl)) {
10160 uint32_t i = 0;
10161 vector_upl_t vector_upl = upl->vector_upl;
10162
10163 if(vector_upl) {
10164 for(i = 0; i < vector_upl->num_upls; i++) {
10165 if(vector_upl->upl_elems[i] == subupl)
10166 break;
10167 }
10168
10169 if(i == vector_upl->num_upls)
10170 panic("getting sub-upl iostate when none exists");
10171
10172 *offset = vector_upl->upl_iostates[i].offset;
10173 *size = vector_upl->upl_iostates[i].size;
10174 }
10175 else
10176 panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
10177 }
10178 else
10179 panic("vector_upl_get_iostate was passed a NULL UPL\n");
10180}
10181
10182void
10183vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size)
10184{
10185 if(vector_upl_is_valid(upl)) {
10186 vector_upl_t vector_upl = upl->vector_upl;
10187 if(vector_upl) {
10188 if(index < vector_upl->num_upls) {
10189 *offset = vector_upl->upl_iostates[index].offset;
10190 *size = vector_upl->upl_iostates[index].size;
10191 }
10192 else
10193 *offset = *size = 0;
10194 }
10195 else
10196 panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
10197 }
10198 else
10199 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
10200}
10201
10202upl_page_info_t *
10203upl_get_internal_vectorupl_pagelist(upl_t upl)
10204{
10205 return ((vector_upl_t)(upl->vector_upl))->pagelist;
10206}
10207
10208void *
10209upl_get_internal_vectorupl(upl_t upl)
10210{
10211 return upl->vector_upl;
10212}
10213
91447636
A
10214vm_size_t
10215upl_get_internal_pagelist_offset(void)
10216{
10217 return sizeof(struct upl);
10218}
10219
91447636
A
10220void
10221upl_clear_dirty(
0c530ab8
A
10222 upl_t upl,
10223 boolean_t value)
91447636 10224{
0c530ab8
A
10225 if (value) {
10226 upl->flags |= UPL_CLEAR_DIRTY;
10227 } else {
10228 upl->flags &= ~UPL_CLEAR_DIRTY;
10229 }
91447636
A
10230}
10231
6d2010ae
A
10232void
10233upl_set_referenced(
10234 upl_t upl,
10235 boolean_t value)
10236{
10237 upl_lock(upl);
10238 if (value) {
10239 upl->ext_ref_count++;
10240 } else {
10241 if (!upl->ext_ref_count) {
10242 panic("upl_set_referenced not %p\n", upl);
10243 }
10244 upl->ext_ref_count--;
10245 }
10246 upl_unlock(upl);
10247}
10248
fe8ab488
A
10249#if CONFIG_IOSCHED
10250void
10251upl_set_blkno(
10252 upl_t upl,
10253 vm_offset_t upl_offset,
10254 int io_size,
10255 int64_t blkno)
10256{
10257 int i,j;
10258 if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0)
10259 return;
10260
10261 assert(upl->upl_reprio_info != 0);
10262 for(i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) {
10263 UPL_SET_REPRIO_INFO(upl, i, blkno, io_size);
10264 }
10265}
10266#endif
10267
6d2010ae
A
10268boolean_t
10269vm_page_is_slideable(vm_page_t m)
10270{
10271 boolean_t result = FALSE;
39236c6e
A
10272 vm_shared_region_slide_info_t si;
10273
10274 vm_object_lock_assert_held(m->object);
6d2010ae
A
10275
10276 /* make sure our page belongs to the one object allowed to do this */
39236c6e
A
10277 if (!m->object->object_slid) {
10278 goto done;
6d2010ae
A
10279 }
10280
39236c6e
A
10281 si = m->object->vo_slide_info;
10282 if (si == NULL) {
10283 goto done;
6d2010ae
A
10284 }
10285
39236c6e 10286 if(!m->slid && (si->start <= m->offset && si->end > m->offset)) {
6d2010ae
A
10287 result = TRUE;
10288 }
39236c6e
A
10289
10290done:
6d2010ae
A
10291 return result;
10292}
10293
10294int vm_page_slide_counter = 0;
10295int vm_page_slide_errors = 0;
10296kern_return_t
10297vm_page_slide(
10298 vm_page_t page,
10299 vm_map_offset_t kernel_mapping_offset)
10300{
10301 kern_return_t kr;
10302 vm_map_size_t kernel_mapping_size;
39236c6e 10303 boolean_t kernel_mapping_needs_unmap;
6d2010ae 10304 vm_offset_t kernel_vaddr;
3e170ce0
A
10305 uint32_t pageIndex;
10306 uint32_t slide_chunk;
6d2010ae
A
10307
10308 assert(!page->slid);
39236c6e
A
10309 assert(page->object->object_slid);
10310 vm_object_lock_assert_exclusive(page->object);
316670eb
A
10311
10312 if (page->error)
10313 return KERN_FAILURE;
6d2010ae
A
10314
10315 /*
10316 * Take a paging-in-progress reference to keep the object
10317 * alive even if we have to unlock it (in vm_paging_map_object()
10318 * for example)...
10319 */
10320 vm_object_paging_begin(page->object);
10321
10322 if (kernel_mapping_offset == 0) {
10323 /*
10324 * The page hasn't already been mapped in kernel space
10325 * by the caller. Map it now, so that we can access
10326 * its contents and decrypt them.
10327 */
10328 kernel_mapping_size = PAGE_SIZE;
39236c6e
A
10329 kernel_mapping_needs_unmap = FALSE;
10330 kr = vm_paging_map_object(page,
6d2010ae
A
10331 page->object,
10332 page->offset,
6d2010ae 10333 VM_PROT_READ | VM_PROT_WRITE,
39236c6e
A
10334 FALSE,
10335 &kernel_mapping_size,
10336 &kernel_mapping_offset,
10337 &kernel_mapping_needs_unmap);
6d2010ae
A
10338 if (kr != KERN_SUCCESS) {
10339 panic("vm_page_slide: "
10340 "could not map page in kernel: 0x%x\n",
10341 kr);
10342 }
10343 } else {
10344 kernel_mapping_size = 0;
39236c6e 10345 kernel_mapping_needs_unmap = FALSE;
6d2010ae
A
10346 }
10347 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
10348
10349 /*
10350 * Slide the pointers on the page.
10351 */
10352
10353 /*assert that slide_file_info.start/end are page-aligned?*/
10354
39236c6e
A
10355 assert(!page->slid);
10356 assert(page->object->object_slid);
10357
3e170ce0
A
10358#define PAGE_SIZE_FOR_SR_SLIDE 4096
10359 pageIndex = (uint32_t)((page->offset -
10360 page->object->vo_slide_info->start) /
10361 PAGE_SIZE_FOR_SR_SLIDE);
10362 for (slide_chunk = 0;
10363 slide_chunk < PAGE_SIZE / PAGE_SIZE_FOR_SR_SLIDE;
10364 slide_chunk++) {
10365 kr = vm_shared_region_slide_page(page->object->vo_slide_info,
10366 (kernel_vaddr +
10367 (slide_chunk *
10368 PAGE_SIZE_FOR_SR_SLIDE)),
10369 (pageIndex + slide_chunk));
10370 if (kr != KERN_SUCCESS) {
10371 break;
fe8ab488 10372 }
fe8ab488 10373 }
fe8ab488 10374
6d2010ae
A
10375 vm_page_slide_counter++;
10376
10377 /*
10378 * Unmap the page from the kernel's address space,
10379 */
39236c6e 10380 if (kernel_mapping_needs_unmap) {
6d2010ae
A
10381 vm_paging_unmap_object(page->object,
10382 kernel_vaddr,
10383 kernel_vaddr + PAGE_SIZE);
10384 }
10385
10386 page->dirty = FALSE;
10387 pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
316670eb
A
10388
10389 if (kr != KERN_SUCCESS || cs_debug > 1) {
10390 printf("vm_page_slide(%p): "
10391 "obj %p off 0x%llx mobj %p moff 0x%llx\n",
10392 page,
10393 page->object, page->offset,
10394 page->object->pager,
10395 page->offset + page->object->paging_offset);
10396 }
6d2010ae
A
10397
10398 if (kr == KERN_SUCCESS) {
10399 page->slid = TRUE;
10400 } else {
10401 page->error = TRUE;
10402 vm_page_slide_errors++;
10403 }
10404
10405 vm_object_paging_end(page->object);
10406
10407 return kr;
10408}
10409
39236c6e
A
10410void inline memoryshot(unsigned int event, unsigned int control)
10411{
10412 if (vm_debug_events) {
10413 KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) | control,
10414 vm_page_active_count, vm_page_inactive_count,
10415 vm_page_free_count, vm_page_speculative_count,
10416 vm_page_throttled_count);
10417 } else {
10418 (void) event;
10419 (void) control;
10420 }
10421
10422}
91447636
A
10423
10424#ifdef MACH_BSD
1c79356b 10425
2d21ac55
A
10426boolean_t upl_device_page(upl_page_info_t *upl)
10427{
10428 return(UPL_DEVICE_PAGE(upl));
10429}
1c79356b
A
10430boolean_t upl_page_present(upl_page_info_t *upl, int index)
10431{
10432 return(UPL_PAGE_PRESENT(upl, index));
10433}
2d21ac55
A
10434boolean_t upl_speculative_page(upl_page_info_t *upl, int index)
10435{
10436 return(UPL_SPECULATIVE_PAGE(upl, index));
10437}
1c79356b
A
10438boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
10439{
10440 return(UPL_DIRTY_PAGE(upl, index));
10441}
10442boolean_t upl_valid_page(upl_page_info_t *upl, int index)
10443{
10444 return(UPL_VALID_PAGE(upl, index));
10445}
91447636 10446ppnum_t upl_phys_page(upl_page_info_t *upl, int index)
1c79356b 10447{
91447636 10448 return(UPL_PHYS_PAGE(upl, index));
1c79356b
A
10449}
10450
3e170ce0
A
10451void upl_page_set_mark(upl_page_info_t *upl, int index, boolean_t v)
10452{
10453 upl[index].mark = v;
10454}
10455
10456boolean_t upl_page_get_mark(upl_page_info_t *upl, int index)
10457{
10458 return upl[index].mark;
10459}
10460
0b4e3aa0
A
10461void
10462vm_countdirtypages(void)
1c79356b
A
10463{
10464 vm_page_t m;
10465 int dpages;
10466 int pgopages;
10467 int precpages;
10468
10469
10470 dpages=0;
10471 pgopages=0;
10472 precpages=0;
10473
10474 vm_page_lock_queues();
10475 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
10476 do {
10477 if (m ==(vm_page_t )0) break;
10478
10479 if(m->dirty) dpages++;
10480 if(m->pageout) pgopages++;
10481 if(m->precious) precpages++;
10482
91447636 10483 assert(m->object != kernel_object);
1c79356b
A
10484 m = (vm_page_t) queue_next(&m->pageq);
10485 if (m ==(vm_page_t )0) break;
10486
10487 } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
10488 vm_page_unlock_queues();
9bccf70c 10489
2d21ac55
A
10490 vm_page_lock_queues();
10491 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
10492 do {
10493 if (m ==(vm_page_t )0) break;
10494
10495 dpages++;
10496 assert(m->dirty);
10497 assert(!m->pageout);
10498 assert(m->object != kernel_object);
10499 m = (vm_page_t) queue_next(&m->pageq);
10500 if (m ==(vm_page_t )0) break;
10501
10502 } while (!queue_end(&vm_page_queue_throttled,(queue_entry_t) m));
10503 vm_page_unlock_queues();
10504
9bccf70c 10505 vm_page_lock_queues();
316670eb 10506 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
9bccf70c
A
10507 do {
10508 if (m ==(vm_page_t )0) break;
10509
10510 if(m->dirty) dpages++;
10511 if(m->pageout) pgopages++;
10512 if(m->precious) precpages++;
10513
91447636 10514 assert(m->object != kernel_object);
9bccf70c
A
10515 m = (vm_page_t) queue_next(&m->pageq);
10516 if (m ==(vm_page_t )0) break;
10517
316670eb 10518 } while (!queue_end(&vm_page_queue_anonymous,(queue_entry_t) m));
9bccf70c 10519 vm_page_unlock_queues();
1c79356b
A
10520
10521 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
10522
10523 dpages=0;
10524 pgopages=0;
10525 precpages=0;
10526
10527 vm_page_lock_queues();
10528 m = (vm_page_t) queue_first(&vm_page_queue_active);
10529
10530 do {
10531 if(m == (vm_page_t )0) break;
10532 if(m->dirty) dpages++;
10533 if(m->pageout) pgopages++;
10534 if(m->precious) precpages++;
10535
91447636 10536 assert(m->object != kernel_object);
1c79356b
A
10537 m = (vm_page_t) queue_next(&m->pageq);
10538 if(m == (vm_page_t )0) break;
10539
10540 } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
10541 vm_page_unlock_queues();
10542
10543 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
10544
10545}
10546#endif /* MACH_BSD */
10547
0c530ab8 10548ppnum_t upl_get_highest_page(
2d21ac55 10549 upl_t upl)
0c530ab8 10550{
2d21ac55 10551 return upl->highest_page;
0c530ab8
A
10552}
10553
b0d623f7
A
10554upl_size_t upl_get_size(
10555 upl_t upl)
10556{
10557 return upl->size;
10558}
10559
3e170ce0
A
10560upl_t upl_associated_upl(upl_t upl)
10561{
10562 return upl->associated_upl;
10563}
10564
10565void upl_set_associated_upl(upl_t upl, upl_t associated_upl)
10566{
10567 upl->associated_upl = associated_upl;
10568}
10569
b0d623f7
A
10570#if UPL_DEBUG
10571kern_return_t upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
1c79356b
A
10572{
10573 upl->ubc_alias1 = alias1;
10574 upl->ubc_alias2 = alias2;
10575 return KERN_SUCCESS;
10576}
b0d623f7 10577int upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
1c79356b
A
10578{
10579 if(al)
10580 *al = upl->ubc_alias1;
10581 if(al2)
10582 *al2 = upl->ubc_alias2;
10583 return KERN_SUCCESS;
10584}
91447636 10585#endif /* UPL_DEBUG */
fe8ab488
A
10586
10587#if VM_PRESSURE_EVENTS
10588/*
10589 * Upward trajectory.
10590 */
10591extern boolean_t vm_compressor_low_on_space(void);
10592
10593boolean_t
10594VM_PRESSURE_NORMAL_TO_WARNING(void) {
10595
10596 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
10597
10598 /* Available pages below our threshold */
10599 if (memorystatus_available_pages < memorystatus_available_pages_pressure) {
10600 /* No frozen processes to kill */
10601 if (memorystatus_frozen_count == 0) {
10602 /* Not enough suspended processes available. */
10603 if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) {
10604 return TRUE;
10605 }
10606 }
10607 }
10608 return FALSE;
10609
10610 } else {
10611 return ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0);
10612 }
10613}
10614
10615boolean_t
10616VM_PRESSURE_WARNING_TO_CRITICAL(void) {
10617
10618 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
10619 /* Available pages below our threshold */
10620 if (memorystatus_available_pages < memorystatus_available_pages_critical) {
10621 return TRUE;
10622 }
10623 return FALSE;
10624 } else {
10625 return (vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
10626 }
10627}
10628
10629/*
10630 * Downward trajectory.
10631 */
10632boolean_t
10633VM_PRESSURE_WARNING_TO_NORMAL(void) {
10634
10635 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
10636 /* Available pages above our threshold */
10637 unsigned int target_threshold = memorystatus_available_pages_pressure + ((15 * memorystatus_available_pages_pressure) / 100);
10638 if (memorystatus_available_pages > target_threshold) {
10639 return TRUE;
10640 }
10641 return FALSE;
10642 } else {
10643 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0);
10644 }
10645}
10646
10647boolean_t
10648VM_PRESSURE_CRITICAL_TO_WARNING(void) {
10649
10650 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
10651 /* Available pages above our threshold */
10652 unsigned int target_threshold = memorystatus_available_pages_critical + ((15 * memorystatus_available_pages_critical) / 100);
10653 if (memorystatus_available_pages > target_threshold) {
10654 return TRUE;
10655 }
10656 return FALSE;
10657 } else {
10658 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
10659 }
10660}
10661#endif /* VM_PRESSURE_EVENTS */
10662