]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_pageout.c
xnu-3248.50.21.tar.gz
[apple/xnu.git] / osfmk / vm / vm_pageout.c
1 /*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_pageout.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * The proverbial page-out daemon.
64 */
65
66 #include <stdint.h>
67
68 #include <debug.h>
69 #include <mach_pagemap.h>
70 #include <mach_cluster_stats.h>
71
72 #include <mach/mach_types.h>
73 #include <mach/memory_object.h>
74 #include <mach/memory_object_default.h>
75 #include <mach/memory_object_control_server.h>
76 #include <mach/mach_host_server.h>
77 #include <mach/upl.h>
78 #include <mach/vm_map.h>
79 #include <mach/vm_param.h>
80 #include <mach/vm_statistics.h>
81 #include <mach/sdt.h>
82
83 #include <kern/kern_types.h>
84 #include <kern/counters.h>
85 #include <kern/host_statistics.h>
86 #include <kern/machine.h>
87 #include <kern/misc_protos.h>
88 #include <kern/sched.h>
89 #include <kern/thread.h>
90 #include <kern/xpr.h>
91 #include <kern/kalloc.h>
92
93 #include <machine/vm_tuning.h>
94 #include <machine/commpage.h>
95
96 #include <vm/pmap.h>
97 #include <vm/vm_compressor_pager.h>
98 #include <vm/vm_fault.h>
99 #include <vm/vm_map.h>
100 #include <vm/vm_object.h>
101 #include <vm/vm_page.h>
102 #include <vm/vm_pageout.h>
103 #include <vm/vm_protos.h> /* must be last */
104 #include <vm/memory_object.h>
105 #include <vm/vm_purgeable_internal.h>
106 #include <vm/vm_shared_region.h>
107 #include <vm/vm_compressor.h>
108
109 #if CONFIG_PHANTOM_CACHE
110 #include <vm/vm_phantom_cache.h>
111 #endif
112 /*
113 * ENCRYPTED SWAP:
114 */
115 #include <libkern/crypto/aes.h>
116 extern u_int32_t random(void); /* from <libkern/libkern.h> */
117
118 extern int cs_debug;
119
120 #if UPL_DEBUG
121 #include <libkern/OSDebug.h>
122 #endif
123
124 extern void m_drain(void);
125
126 #if VM_PRESSURE_EVENTS
127 extern unsigned int memorystatus_available_pages;
128 extern unsigned int memorystatus_available_pages_pressure;
129 extern unsigned int memorystatus_available_pages_critical;
130 extern unsigned int memorystatus_frozen_count;
131 extern unsigned int memorystatus_suspended_count;
132
133 extern vm_pressure_level_t memorystatus_vm_pressure_level;
134 int memorystatus_purge_on_warning = 2;
135 int memorystatus_purge_on_urgent = 5;
136 int memorystatus_purge_on_critical = 8;
137
138 void vm_pressure_response(void);
139 boolean_t vm_pressure_thread_running = FALSE;
140 extern void consider_vm_pressure_events(void);
141
142 #define MEMORYSTATUS_SUSPENDED_THRESHOLD 4
143 #endif /* VM_PRESSURE_EVENTS */
144
145 boolean_t vm_pressure_changed = FALSE;
146
147 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */
148 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100
149 #endif
150
151 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
152 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
153 #endif
154
155 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
156 #define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
157 #endif
158
159 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
160 #define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
161 #endif
162
163 #ifndef VM_PAGE_LAUNDRY_MAX
164 #define VM_PAGE_LAUNDRY_MAX 128UL /* maximum pageouts on a given pageout queue */
165 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
166
167 #ifndef VM_PAGEOUT_BURST_WAIT
168 #define VM_PAGEOUT_BURST_WAIT 10 /* milliseconds */
169 #endif /* VM_PAGEOUT_BURST_WAIT */
170
171 #ifndef VM_PAGEOUT_EMPTY_WAIT
172 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
173 #endif /* VM_PAGEOUT_EMPTY_WAIT */
174
175 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
176 #define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
177 #endif /* VM_PAGEOUT_DEADLOCK_WAIT */
178
179 #ifndef VM_PAGEOUT_IDLE_WAIT
180 #define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
181 #endif /* VM_PAGEOUT_IDLE_WAIT */
182
183 #ifndef VM_PAGEOUT_SWAP_WAIT
184 #define VM_PAGEOUT_SWAP_WAIT 50 /* milliseconds */
185 #endif /* VM_PAGEOUT_SWAP_WAIT */
186
187 #ifndef VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED
188 #define VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED 1000 /* maximum pages considered before we issue a pressure event */
189 #endif /* VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED */
190
191 #ifndef VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS
192 #define VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS 5 /* seconds */
193 #endif /* VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS */
194
195 unsigned int vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
196 unsigned int vm_page_speculative_percentage = 5;
197
198 #ifndef VM_PAGE_SPECULATIVE_TARGET
199 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_page_speculative_percentage))
200 #endif /* VM_PAGE_SPECULATIVE_TARGET */
201
202
203 #ifndef VM_PAGE_INACTIVE_HEALTHY_LIMIT
204 #define VM_PAGE_INACTIVE_HEALTHY_LIMIT(total) ((total) * 1 / 200)
205 #endif /* VM_PAGE_INACTIVE_HEALTHY_LIMIT */
206
207
208 /*
209 * To obtain a reasonable LRU approximation, the inactive queue
210 * needs to be large enough to give pages on it a chance to be
211 * referenced a second time. This macro defines the fraction
212 * of active+inactive pages that should be inactive.
213 * The pageout daemon uses it to update vm_page_inactive_target.
214 *
215 * If vm_page_free_count falls below vm_page_free_target and
216 * vm_page_inactive_count is below vm_page_inactive_target,
217 * then the pageout daemon starts running.
218 */
219
220 #ifndef VM_PAGE_INACTIVE_TARGET
221 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 2)
222 #endif /* VM_PAGE_INACTIVE_TARGET */
223
224 /*
225 * Once the pageout daemon starts running, it keeps going
226 * until vm_page_free_count meets or exceeds vm_page_free_target.
227 */
228
229 #ifndef VM_PAGE_FREE_TARGET
230 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
231 #endif /* VM_PAGE_FREE_TARGET */
232
233
234 /*
235 * The pageout daemon always starts running once vm_page_free_count
236 * falls below vm_page_free_min.
237 */
238
239 #ifndef VM_PAGE_FREE_MIN
240 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
241 #endif /* VM_PAGE_FREE_MIN */
242
243 #define VM_PAGE_FREE_RESERVED_LIMIT 1700
244 #define VM_PAGE_FREE_MIN_LIMIT 3500
245 #define VM_PAGE_FREE_TARGET_LIMIT 4000
246
247 /*
248 * When vm_page_free_count falls below vm_page_free_reserved,
249 * only vm-privileged threads can allocate pages. vm-privilege
250 * allows the pageout daemon and default pager (and any other
251 * associated threads needed for default pageout) to continue
252 * operation by dipping into the reserved pool of pages.
253 */
254
255 #ifndef VM_PAGE_FREE_RESERVED
256 #define VM_PAGE_FREE_RESERVED(n) \
257 ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
258 #endif /* VM_PAGE_FREE_RESERVED */
259
260 /*
261 * When we dequeue pages from the inactive list, they are
262 * reactivated (ie, put back on the active queue) if referenced.
263 * However, it is possible to starve the free list if other
264 * processors are referencing pages faster than we can turn off
265 * the referenced bit. So we limit the number of reactivations
266 * we will make per call of vm_pageout_scan().
267 */
268 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
269 #ifndef VM_PAGE_REACTIVATE_LIMIT
270 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
271 #endif /* VM_PAGE_REACTIVATE_LIMIT */
272 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 1000
273
274
275 extern boolean_t hibernate_cleaning_in_progress;
276
277 /*
278 * Exported variable used to broadcast the activation of the pageout scan
279 * Working Set uses this to throttle its use of pmap removes. In this
280 * way, code which runs within memory in an uncontested context does
281 * not keep encountering soft faults.
282 */
283
284 unsigned int vm_pageout_scan_event_counter = 0;
285
286 /*
287 * Forward declarations for internal routines.
288 */
289 struct cq {
290 struct vm_pageout_queue *q;
291 void *current_chead;
292 char *scratch_buf;
293 int id;
294 };
295 #define MAX_COMPRESSOR_THREAD_COUNT 8
296
297 struct cq ciq[MAX_COMPRESSOR_THREAD_COUNT];
298
299 void *vm_pageout_immediate_chead;
300 char *vm_pageout_immediate_scratch_buf;
301
302
303 #if VM_PRESSURE_EVENTS
304 void vm_pressure_thread(void);
305
306 boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void);
307 boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void);
308
309 boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void);
310 boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void);
311 #endif
312 static void vm_pageout_garbage_collect(int);
313 static void vm_pageout_iothread_continue(struct vm_pageout_queue *);
314 static void vm_pageout_iothread_external(void);
315 static void vm_pageout_iothread_internal(struct cq *cq);
316 static void vm_pageout_adjust_io_throttles(struct vm_pageout_queue *, struct vm_pageout_queue *, boolean_t);
317
318 extern void vm_pageout_continue(void);
319 extern void vm_pageout_scan(void);
320
321 static void vm_pageout_immediate(vm_page_t, boolean_t);
322 boolean_t vm_compressor_immediate_preferred = FALSE;
323 boolean_t vm_compressor_immediate_preferred_override = FALSE;
324 boolean_t vm_restricted_to_single_processor = FALSE;
325 static boolean_t vm_pageout_waiter = FALSE;
326 static boolean_t vm_pageout_running = FALSE;
327
328
329 static thread_t vm_pageout_external_iothread = THREAD_NULL;
330 static thread_t vm_pageout_internal_iothread = THREAD_NULL;
331
332 unsigned int vm_pageout_reserved_internal = 0;
333 unsigned int vm_pageout_reserved_really = 0;
334
335 unsigned int vm_pageout_swap_wait = 0;
336 unsigned int vm_pageout_idle_wait = 0; /* milliseconds */
337 unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
338 unsigned int vm_pageout_burst_wait = 0; /* milliseconds */
339 unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */
340 unsigned int vm_pageout_deadlock_relief = 0;
341 unsigned int vm_pageout_inactive_relief = 0;
342 unsigned int vm_pageout_burst_active_throttle = 0;
343 unsigned int vm_pageout_burst_inactive_throttle = 0;
344
345 int vm_upl_wait_for_pages = 0;
346
347
348 /*
349 * These variables record the pageout daemon's actions:
350 * how many pages it looks at and what happens to those pages.
351 * No locking needed because only one thread modifies the variables.
352 */
353
354 unsigned int vm_pageout_active = 0; /* debugging */
355 unsigned int vm_pageout_inactive = 0; /* debugging */
356 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
357 unsigned int vm_pageout_inactive_forced = 0; /* debugging */
358 unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
359 unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
360 unsigned int vm_pageout_inactive_busy = 0; /* debugging */
361 unsigned int vm_pageout_inactive_error = 0; /* debugging */
362 unsigned int vm_pageout_inactive_absent = 0; /* debugging */
363 unsigned int vm_pageout_inactive_notalive = 0; /* debugging */
364 unsigned int vm_pageout_inactive_used = 0; /* debugging */
365 unsigned int vm_pageout_cache_evicted = 0; /* debugging */
366 unsigned int vm_pageout_inactive_clean = 0; /* debugging */
367 unsigned int vm_pageout_speculative_clean = 0; /* debugging */
368
369 unsigned int vm_pageout_freed_from_cleaned = 0;
370 unsigned int vm_pageout_freed_from_speculative = 0;
371 unsigned int vm_pageout_freed_from_inactive_clean = 0;
372
373 unsigned int vm_pageout_enqueued_cleaned_from_inactive_clean = 0;
374 unsigned int vm_pageout_enqueued_cleaned_from_inactive_dirty = 0;
375
376 unsigned int vm_pageout_cleaned_reclaimed = 0; /* debugging; how many cleaned pages are reclaimed by the pageout scan */
377 unsigned int vm_pageout_cleaned_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on pageout (and are therefore reactivated) */
378 unsigned int vm_pageout_cleaned_reference_reactivated = 0;
379 unsigned int vm_pageout_cleaned_volatile_reactivated = 0;
380 unsigned int vm_pageout_cleaned_fault_reactivated = 0;
381 unsigned int vm_pageout_cleaned_commit_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on commit (and are therefore reactivated) */
382 unsigned int vm_pageout_cleaned_busy = 0;
383 unsigned int vm_pageout_cleaned_nolock = 0;
384
385 unsigned int vm_pageout_inactive_dirty_internal = 0; /* debugging */
386 unsigned int vm_pageout_inactive_dirty_external = 0; /* debugging */
387 unsigned int vm_pageout_inactive_deactivated = 0; /* debugging */
388 unsigned int vm_pageout_inactive_anonymous = 0; /* debugging */
389 unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
390 unsigned int vm_pageout_purged_objects = 0; /* used for sysctl vm stats */
391 unsigned int vm_stat_discard = 0; /* debugging */
392 unsigned int vm_stat_discard_sent = 0; /* debugging */
393 unsigned int vm_stat_discard_failure = 0; /* debugging */
394 unsigned int vm_stat_discard_throttle = 0; /* debugging */
395 unsigned int vm_pageout_reactivation_limit_exceeded = 0; /* debugging */
396 unsigned int vm_pageout_catch_ups = 0; /* debugging */
397 unsigned int vm_pageout_inactive_force_reclaim = 0; /* debugging */
398
399 unsigned int vm_pageout_scan_reclaimed_throttled = 0;
400 unsigned int vm_pageout_scan_active_throttled = 0;
401 unsigned int vm_pageout_scan_inactive_throttled_internal = 0;
402 unsigned int vm_pageout_scan_inactive_throttled_external = 0;
403 unsigned int vm_pageout_scan_throttle = 0; /* debugging */
404 unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */
405 unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */
406 unsigned int vm_pageout_scan_swap_throttle = 0; /* debugging */
407 unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */
408 unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */
409 unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */
410 unsigned int vm_pageout_inactive_external_forced_jetsam_count = 0; /* debugging */
411 unsigned int vm_pageout_scan_throttle_deferred = 0; /* debugging */
412 unsigned int vm_pageout_scan_yield_unthrottled = 0; /* debugging */
413 unsigned int vm_page_speculative_count_drifts = 0;
414 unsigned int vm_page_speculative_count_drift_max = 0;
415
416
417 /*
418 * Backing store throttle when BS is exhausted
419 */
420 unsigned int vm_backing_store_low = 0;
421
422 unsigned int vm_pageout_out_of_line = 0;
423 unsigned int vm_pageout_in_place = 0;
424
425 unsigned int vm_page_steal_pageout_page = 0;
426
427 /*
428 * ENCRYPTED SWAP:
429 * counters and statistics...
430 */
431 unsigned long vm_page_decrypt_counter = 0;
432 unsigned long vm_page_decrypt_for_upl_counter = 0;
433 unsigned long vm_page_encrypt_counter = 0;
434 unsigned long vm_page_encrypt_abort_counter = 0;
435 unsigned long vm_page_encrypt_already_encrypted_counter = 0;
436 boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */
437
438 struct vm_pageout_queue vm_pageout_queue_internal;
439 struct vm_pageout_queue vm_pageout_queue_external;
440
441 unsigned int vm_page_speculative_target = 0;
442
443 vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
444
445 boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL;
446
447 #if DEVELOPMENT || DEBUG
448 unsigned long vm_cs_validated_resets = 0;
449 #endif
450
451 int vm_debug_events = 0;
452
453 #if CONFIG_MEMORYSTATUS
454 #if !CONFIG_JETSAM
455 extern boolean_t memorystatus_idle_exit_from_VM(void);
456 #endif
457 extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async);
458 extern void memorystatus_on_pageout_scan_end(void);
459 #endif
460
461 /*
462 * Routine: vm_backing_store_disable
463 * Purpose:
464 * Suspend non-privileged threads wishing to extend
465 * backing store when we are low on backing store
466 * (Synchronized by caller)
467 */
468 void
469 vm_backing_store_disable(
470 boolean_t disable)
471 {
472 if(disable) {
473 vm_backing_store_low = 1;
474 } else {
475 if(vm_backing_store_low) {
476 vm_backing_store_low = 0;
477 thread_wakeup((event_t) &vm_backing_store_low);
478 }
479 }
480 }
481
482
483 #if MACH_CLUSTER_STATS
484 unsigned long vm_pageout_cluster_dirtied = 0;
485 unsigned long vm_pageout_cluster_cleaned = 0;
486 unsigned long vm_pageout_cluster_collisions = 0;
487 unsigned long vm_pageout_cluster_clusters = 0;
488 unsigned long vm_pageout_cluster_conversions = 0;
489 unsigned long vm_pageout_target_collisions = 0;
490 unsigned long vm_pageout_target_page_dirtied = 0;
491 unsigned long vm_pageout_target_page_freed = 0;
492 #define CLUSTER_STAT(clause) clause
493 #else /* MACH_CLUSTER_STATS */
494 #define CLUSTER_STAT(clause)
495 #endif /* MACH_CLUSTER_STATS */
496
497 /*
498 * Routine: vm_pageout_object_terminate
499 * Purpose:
500 * Destroy the pageout_object, and perform all of the
501 * required cleanup actions.
502 *
503 * In/Out conditions:
504 * The object must be locked, and will be returned locked.
505 */
506 void
507 vm_pageout_object_terminate(
508 vm_object_t object)
509 {
510 vm_object_t shadow_object;
511
512 /*
513 * Deal with the deallocation (last reference) of a pageout object
514 * (used for cleaning-in-place) by dropping the paging references/
515 * freeing pages in the original object.
516 */
517
518 assert(object->pageout);
519 shadow_object = object->shadow;
520 vm_object_lock(shadow_object);
521
522 while (!queue_empty(&object->memq)) {
523 vm_page_t p, m;
524 vm_object_offset_t offset;
525
526 p = (vm_page_t) queue_first(&object->memq);
527
528 assert(p->private);
529 assert(p->pageout);
530 p->pageout = FALSE;
531 assert(!p->cleaning);
532 assert(!p->laundry);
533
534 offset = p->offset;
535 VM_PAGE_FREE(p);
536 p = VM_PAGE_NULL;
537
538 m = vm_page_lookup(shadow_object,
539 offset + object->vo_shadow_offset);
540
541 if(m == VM_PAGE_NULL)
542 continue;
543
544 assert((m->dirty) || (m->precious) ||
545 (m->busy && m->cleaning));
546
547 /*
548 * Handle the trusted pager throttle.
549 * Also decrement the burst throttle (if external).
550 */
551 vm_page_lock_queues();
552 if (m->pageout_queue)
553 vm_pageout_throttle_up(m);
554
555 /*
556 * Handle the "target" page(s). These pages are to be freed if
557 * successfully cleaned. Target pages are always busy, and are
558 * wired exactly once. The initial target pages are not mapped,
559 * (so cannot be referenced or modified) but converted target
560 * pages may have been modified between the selection as an
561 * adjacent page and conversion to a target.
562 */
563 if (m->pageout) {
564 assert(m->busy);
565 assert(m->wire_count == 1);
566 m->cleaning = FALSE;
567 m->encrypted_cleaning = FALSE;
568 m->pageout = FALSE;
569 #if MACH_CLUSTER_STATS
570 if (m->wanted) vm_pageout_target_collisions++;
571 #endif
572 /*
573 * Revoke all access to the page. Since the object is
574 * locked, and the page is busy, this prevents the page
575 * from being dirtied after the pmap_disconnect() call
576 * returns.
577 *
578 * Since the page is left "dirty" but "not modifed", we
579 * can detect whether the page was redirtied during
580 * pageout by checking the modify state.
581 */
582 if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED) {
583 SET_PAGE_DIRTY(m, FALSE);
584 } else {
585 m->dirty = FALSE;
586 }
587
588 if (m->dirty) {
589 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
590 vm_page_unwire(m, TRUE); /* reactivates */
591 VM_STAT_INCR(reactivations);
592 PAGE_WAKEUP_DONE(m);
593 } else {
594 CLUSTER_STAT(vm_pageout_target_page_freed++;)
595 vm_page_free(m);/* clears busy, etc. */
596 }
597 vm_page_unlock_queues();
598 continue;
599 }
600 /*
601 * Handle the "adjacent" pages. These pages were cleaned in
602 * place, and should be left alone.
603 * If prep_pin_count is nonzero, then someone is using the
604 * page, so make it active.
605 */
606 if (!m->active && !m->inactive && !m->throttled && !m->private) {
607 if (m->reference)
608 vm_page_activate(m);
609 else
610 vm_page_deactivate(m);
611 }
612 if (m->overwriting) {
613 /*
614 * the (COPY_OUT_FROM == FALSE) request_page_list case
615 */
616 if (m->busy) {
617 /*
618 * We do not re-set m->dirty !
619 * The page was busy so no extraneous activity
620 * could have occurred. COPY_INTO is a read into the
621 * new pages. CLEAN_IN_PLACE does actually write
622 * out the pages but handling outside of this code
623 * will take care of resetting dirty. We clear the
624 * modify however for the Programmed I/O case.
625 */
626 pmap_clear_modify(m->phys_page);
627
628 m->busy = FALSE;
629 m->absent = FALSE;
630 } else {
631 /*
632 * alternate (COPY_OUT_FROM == FALSE) request_page_list case
633 * Occurs when the original page was wired
634 * at the time of the list request
635 */
636 assert(VM_PAGE_WIRED(m));
637 vm_page_unwire(m, TRUE); /* reactivates */
638 }
639 m->overwriting = FALSE;
640 } else {
641 /*
642 * Set the dirty state according to whether or not the page was
643 * modified during the pageout. Note that we purposefully do
644 * NOT call pmap_clear_modify since the page is still mapped.
645 * If the page were to be dirtied between the 2 calls, this
646 * this fact would be lost. This code is only necessary to
647 * maintain statistics, since the pmap module is always
648 * consulted if m->dirty is false.
649 */
650 #if MACH_CLUSTER_STATS
651 m->dirty = pmap_is_modified(m->phys_page);
652
653 if (m->dirty) vm_pageout_cluster_dirtied++;
654 else vm_pageout_cluster_cleaned++;
655 if (m->wanted) vm_pageout_cluster_collisions++;
656 #else
657 m->dirty = FALSE;
658 #endif
659 }
660 if (m->encrypted_cleaning == TRUE) {
661 m->encrypted_cleaning = FALSE;
662 m->busy = FALSE;
663 }
664 m->cleaning = FALSE;
665
666 /*
667 * Wakeup any thread waiting for the page to be un-cleaning.
668 */
669 PAGE_WAKEUP(m);
670 vm_page_unlock_queues();
671 }
672 /*
673 * Account for the paging reference taken in vm_paging_object_allocate.
674 */
675 vm_object_activity_end(shadow_object);
676 vm_object_unlock(shadow_object);
677
678 assert(object->ref_count == 0);
679 assert(object->paging_in_progress == 0);
680 assert(object->activity_in_progress == 0);
681 assert(object->resident_page_count == 0);
682 return;
683 }
684
685 /*
686 * Routine: vm_pageclean_setup
687 *
688 * Purpose: setup a page to be cleaned (made non-dirty), but not
689 * necessarily flushed from the VM page cache.
690 * This is accomplished by cleaning in place.
691 *
692 * The page must not be busy, and new_object
693 * must be locked.
694 *
695 */
696 static void
697 vm_pageclean_setup(
698 vm_page_t m,
699 vm_page_t new_m,
700 vm_object_t new_object,
701 vm_object_offset_t new_offset)
702 {
703 assert(!m->busy);
704 #if 0
705 assert(!m->cleaning);
706 #endif
707
708 XPR(XPR_VM_PAGEOUT,
709 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
710 m->object, m->offset, m,
711 new_m, new_offset);
712
713 pmap_clear_modify(m->phys_page);
714
715 /*
716 * Mark original page as cleaning in place.
717 */
718 m->cleaning = TRUE;
719 SET_PAGE_DIRTY(m, FALSE);
720 m->precious = FALSE;
721
722 /*
723 * Convert the fictitious page to a private shadow of
724 * the real page.
725 */
726 assert(new_m->fictitious);
727 assert(new_m->phys_page == vm_page_fictitious_addr);
728 new_m->fictitious = FALSE;
729 new_m->private = TRUE;
730 new_m->pageout = TRUE;
731 new_m->phys_page = m->phys_page;
732
733 vm_page_lockspin_queues();
734 vm_page_wire(new_m, VM_KERN_MEMORY_NONE, TRUE);
735 vm_page_unlock_queues();
736
737 vm_page_insert_wired(new_m, new_object, new_offset, VM_KERN_MEMORY_NONE);
738 assert(!new_m->wanted);
739 new_m->busy = FALSE;
740 }
741
742 /*
743 * Routine: vm_pageout_initialize_page
744 * Purpose:
745 * Causes the specified page to be initialized in
746 * the appropriate memory object. This routine is used to push
747 * pages into a copy-object when they are modified in the
748 * permanent object.
749 *
750 * The page is moved to a temporary object and paged out.
751 *
752 * In/out conditions:
753 * The page in question must not be on any pageout queues.
754 * The object to which it belongs must be locked.
755 * The page must be busy, but not hold a paging reference.
756 *
757 * Implementation:
758 * Move this page to a completely new object.
759 */
760 void
761 vm_pageout_initialize_page(
762 vm_page_t m)
763 {
764 vm_object_t object;
765 vm_object_offset_t paging_offset;
766 memory_object_t pager;
767
768 XPR(XPR_VM_PAGEOUT,
769 "vm_pageout_initialize_page, page 0x%X\n",
770 m, 0, 0, 0, 0);
771 assert(m->busy);
772
773 /*
774 * Verify that we really want to clean this page
775 */
776 assert(!m->absent);
777 assert(!m->error);
778 assert(m->dirty);
779
780 /*
781 * Create a paging reference to let us play with the object.
782 */
783 object = m->object;
784 paging_offset = m->offset + object->paging_offset;
785
786 if (m->absent || m->error || m->restart || (!m->dirty && !m->precious)) {
787 VM_PAGE_FREE(m);
788 panic("reservation without pageout?"); /* alan */
789 vm_object_unlock(object);
790
791 return;
792 }
793
794 /*
795 * If there's no pager, then we can't clean the page. This should
796 * never happen since this should be a copy object and therefore not
797 * an external object, so the pager should always be there.
798 */
799
800 pager = object->pager;
801
802 if (pager == MEMORY_OBJECT_NULL) {
803 VM_PAGE_FREE(m);
804 panic("missing pager for copy object");
805 return;
806 }
807
808 /*
809 * set the page for future call to vm_fault_list_request
810 */
811 pmap_clear_modify(m->phys_page);
812 SET_PAGE_DIRTY(m, FALSE);
813 m->pageout = TRUE;
814
815 /*
816 * keep the object from collapsing or terminating
817 */
818 vm_object_paging_begin(object);
819 vm_object_unlock(object);
820
821 /*
822 * Write the data to its pager.
823 * Note that the data is passed by naming the new object,
824 * not a virtual address; the pager interface has been
825 * manipulated to use the "internal memory" data type.
826 * [The object reference from its allocation is donated
827 * to the eventual recipient.]
828 */
829 memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
830
831 vm_object_lock(object);
832 vm_object_paging_end(object);
833 }
834
835 #if MACH_CLUSTER_STATS
836 #define MAXCLUSTERPAGES 16
837 struct {
838 unsigned long pages_in_cluster;
839 unsigned long pages_at_higher_offsets;
840 unsigned long pages_at_lower_offsets;
841 } cluster_stats[MAXCLUSTERPAGES];
842 #endif /* MACH_CLUSTER_STATS */
843
844
845 /*
846 * vm_pageout_cluster:
847 *
848 * Given a page, queue it to the appropriate I/O thread,
849 * which will page it out and attempt to clean adjacent pages
850 * in the same operation.
851 *
852 * The object and queues must be locked. We will take a
853 * paging reference to prevent deallocation or collapse when we
854 * release the object lock back at the call site. The I/O thread
855 * is responsible for consuming this reference
856 *
857 * The page must not be on any pageout queue.
858 */
859
860 int
861 vm_pageout_cluster(vm_page_t m, boolean_t pageout, boolean_t immediate_ok, boolean_t keep_object_locked)
862 {
863 vm_object_t object = m->object;
864 struct vm_pageout_queue *q;
865
866
867 XPR(XPR_VM_PAGEOUT,
868 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
869 object, m->offset, m, 0, 0);
870
871 VM_PAGE_CHECK(m);
872 #if DEBUG
873 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
874 #endif
875 vm_object_lock_assert_exclusive(object);
876
877 /*
878 * Only a certain kind of page is appreciated here.
879 */
880 assert((m->dirty || m->precious) && (!VM_PAGE_WIRED(m)));
881 assert(!m->cleaning && !m->pageout && !m->laundry);
882 #ifndef CONFIG_FREEZE
883 assert(!m->inactive && !m->active);
884 assert(!m->throttled);
885 #endif
886
887 /*
888 * protect the object from collapse or termination
889 */
890 vm_object_activity_begin(object);
891
892 m->pageout = pageout;
893
894 if (object->internal == TRUE) {
895 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
896 m->busy = TRUE;
897
898 if (vm_compressor_immediate_preferred == TRUE && immediate_ok == TRUE) {
899 if (keep_object_locked == FALSE)
900 vm_object_unlock(object);
901 vm_page_unlock_queues();
902
903 vm_pageout_immediate(m, keep_object_locked);
904
905 return (1);
906 }
907 }
908 q = &vm_pageout_queue_internal;
909 } else
910 q = &vm_pageout_queue_external;
911
912 /*
913 * pgo_laundry count is tied to the laundry bit
914 */
915 m->laundry = TRUE;
916 q->pgo_laundry++;
917
918 m->pageout_queue = TRUE;
919 queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
920
921 if (q->pgo_idle == TRUE) {
922 q->pgo_idle = FALSE;
923 thread_wakeup((event_t) &q->pgo_pending);
924 }
925 VM_PAGE_CHECK(m);
926
927 return (0);
928 }
929
930
931 unsigned long vm_pageout_throttle_up_count = 0;
932
933 /*
934 * A page is back from laundry or we are stealing it back from
935 * the laundering state. See if there are some pages waiting to
936 * go to laundry and if we can let some of them go now.
937 *
938 * Object and page queues must be locked.
939 */
940 void
941 vm_pageout_throttle_up(
942 vm_page_t m)
943 {
944 struct vm_pageout_queue *q;
945
946 assert(m->object != VM_OBJECT_NULL);
947 assert(m->object != kernel_object);
948
949 #if DEBUG
950 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
951 vm_object_lock_assert_exclusive(m->object);
952 #endif
953
954 vm_pageout_throttle_up_count++;
955
956 if (m->object->internal == TRUE)
957 q = &vm_pageout_queue_internal;
958 else
959 q = &vm_pageout_queue_external;
960
961 if (m->pageout_queue == TRUE) {
962
963 queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
964 m->pageout_queue = FALSE;
965
966 m->pageq.next = NULL;
967 m->pageq.prev = NULL;
968
969 vm_object_activity_end(m->object);
970 }
971 if (m->laundry == TRUE) {
972
973 m->laundry = FALSE;
974 q->pgo_laundry--;
975
976 if (q->pgo_throttled == TRUE) {
977 q->pgo_throttled = FALSE;
978 thread_wakeup((event_t) &q->pgo_laundry);
979 }
980 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
981 q->pgo_draining = FALSE;
982 thread_wakeup((event_t) (&q->pgo_laundry+1));
983 }
984 }
985 }
986
987
988 static void
989 vm_pageout_throttle_up_batch(
990 struct vm_pageout_queue *q,
991 int batch_cnt)
992 {
993 #if DEBUG
994 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
995 #endif
996
997 vm_pageout_throttle_up_count += batch_cnt;
998
999 q->pgo_laundry -= batch_cnt;
1000
1001 if (q->pgo_throttled == TRUE) {
1002 q->pgo_throttled = FALSE;
1003 thread_wakeup((event_t) &q->pgo_laundry);
1004 }
1005 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
1006 q->pgo_draining = FALSE;
1007 thread_wakeup((event_t) (&q->pgo_laundry+1));
1008 }
1009 }
1010
1011
1012
1013 /*
1014 * VM memory pressure monitoring.
1015 *
1016 * vm_pageout_scan() keeps track of the number of pages it considers and
1017 * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
1018 *
1019 * compute_memory_pressure() is called every second from compute_averages()
1020 * and moves "vm_pageout_stat_now" forward, to start accumulating the number
1021 * of recalimed pages in a new vm_pageout_stat[] bucket.
1022 *
1023 * mach_vm_pressure_monitor() collects past statistics about memory pressure.
1024 * The caller provides the number of seconds ("nsecs") worth of statistics
1025 * it wants, up to 30 seconds.
1026 * It computes the number of pages reclaimed in the past "nsecs" seconds and
1027 * also returns the number of pages the system still needs to reclaim at this
1028 * moment in time.
1029 */
1030 #define VM_PAGEOUT_STAT_SIZE 31
1031 struct vm_pageout_stat {
1032 unsigned int considered;
1033 unsigned int reclaimed;
1034 } vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0}, };
1035 unsigned int vm_pageout_stat_now = 0;
1036 unsigned int vm_memory_pressure = 0;
1037
1038 #define VM_PAGEOUT_STAT_BEFORE(i) \
1039 (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
1040 #define VM_PAGEOUT_STAT_AFTER(i) \
1041 (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
1042
1043 #if VM_PAGE_BUCKETS_CHECK
1044 int vm_page_buckets_check_interval = 10; /* in seconds */
1045 #endif /* VM_PAGE_BUCKETS_CHECK */
1046
1047 /*
1048 * Called from compute_averages().
1049 */
1050 void
1051 compute_memory_pressure(
1052 __unused void *arg)
1053 {
1054 unsigned int vm_pageout_next;
1055
1056 #if VM_PAGE_BUCKETS_CHECK
1057 /* check the consistency of VM page buckets at regular interval */
1058 static int counter = 0;
1059 if ((++counter % vm_page_buckets_check_interval) == 0) {
1060 vm_page_buckets_check();
1061 }
1062 #endif /* VM_PAGE_BUCKETS_CHECK */
1063
1064 vm_memory_pressure =
1065 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].reclaimed;
1066
1067 commpage_set_memory_pressure( vm_memory_pressure );
1068
1069 /* move "now" forward */
1070 vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
1071 vm_pageout_stats[vm_pageout_next].considered = 0;
1072 vm_pageout_stats[vm_pageout_next].reclaimed = 0;
1073 vm_pageout_stat_now = vm_pageout_next;
1074 }
1075
1076
1077 /*
1078 * IMPORTANT
1079 * mach_vm_ctl_page_free_wanted() is called indirectly, via
1080 * mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
1081 * it must be safe in the restricted stackshot context. Locks and/or
1082 * blocking are not allowable.
1083 */
1084 unsigned int
1085 mach_vm_ctl_page_free_wanted(void)
1086 {
1087 unsigned int page_free_target, page_free_count, page_free_wanted;
1088
1089 page_free_target = vm_page_free_target;
1090 page_free_count = vm_page_free_count;
1091 if (page_free_target > page_free_count) {
1092 page_free_wanted = page_free_target - page_free_count;
1093 } else {
1094 page_free_wanted = 0;
1095 }
1096
1097 return page_free_wanted;
1098 }
1099
1100
1101 /*
1102 * IMPORTANT:
1103 * mach_vm_pressure_monitor() is called when taking a stackshot, with
1104 * wait_for_pressure FALSE, so that code path must remain safe in the
1105 * restricted stackshot context. No blocking or locks are allowable.
1106 * on that code path.
1107 */
1108
1109 kern_return_t
1110 mach_vm_pressure_monitor(
1111 boolean_t wait_for_pressure,
1112 unsigned int nsecs_monitored,
1113 unsigned int *pages_reclaimed_p,
1114 unsigned int *pages_wanted_p)
1115 {
1116 wait_result_t wr;
1117 unsigned int vm_pageout_then, vm_pageout_now;
1118 unsigned int pages_reclaimed;
1119
1120 /*
1121 * We don't take the vm_page_queue_lock here because we don't want
1122 * vm_pressure_monitor() to get in the way of the vm_pageout_scan()
1123 * thread when it's trying to reclaim memory. We don't need fully
1124 * accurate monitoring anyway...
1125 */
1126
1127 if (wait_for_pressure) {
1128 /* wait until there's memory pressure */
1129 while (vm_page_free_count >= vm_page_free_target) {
1130 wr = assert_wait((event_t) &vm_page_free_wanted,
1131 THREAD_INTERRUPTIBLE);
1132 if (wr == THREAD_WAITING) {
1133 wr = thread_block(THREAD_CONTINUE_NULL);
1134 }
1135 if (wr == THREAD_INTERRUPTED) {
1136 return KERN_ABORTED;
1137 }
1138 if (wr == THREAD_AWAKENED) {
1139 /*
1140 * The memory pressure might have already
1141 * been relieved but let's not block again
1142 * and let's report that there was memory
1143 * pressure at some point.
1144 */
1145 break;
1146 }
1147 }
1148 }
1149
1150 /* provide the number of pages the system wants to reclaim */
1151 if (pages_wanted_p != NULL) {
1152 *pages_wanted_p = mach_vm_ctl_page_free_wanted();
1153 }
1154
1155 if (pages_reclaimed_p == NULL) {
1156 return KERN_SUCCESS;
1157 }
1158
1159 /* provide number of pages reclaimed in the last "nsecs_monitored" */
1160 do {
1161 vm_pageout_now = vm_pageout_stat_now;
1162 pages_reclaimed = 0;
1163 for (vm_pageout_then =
1164 VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
1165 vm_pageout_then != vm_pageout_now &&
1166 nsecs_monitored-- != 0;
1167 vm_pageout_then =
1168 VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
1169 pages_reclaimed += vm_pageout_stats[vm_pageout_then].reclaimed;
1170 }
1171 } while (vm_pageout_now != vm_pageout_stat_now);
1172 *pages_reclaimed_p = pages_reclaimed;
1173
1174 return KERN_SUCCESS;
1175 }
1176
1177
1178
1179 static void
1180 vm_pageout_page_queue(queue_head_t *, int);
1181
1182 /*
1183 * condition variable used to make sure there is
1184 * only a single sweep going on at a time
1185 */
1186 boolean_t vm_pageout_anonymous_pages_active = FALSE;
1187
1188
1189 void
1190 vm_pageout_anonymous_pages()
1191 {
1192 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
1193
1194 vm_page_lock_queues();
1195
1196 if (vm_pageout_anonymous_pages_active == TRUE) {
1197 vm_page_unlock_queues();
1198 return;
1199 }
1200 vm_pageout_anonymous_pages_active = TRUE;
1201 vm_page_unlock_queues();
1202
1203 vm_pageout_page_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1204 vm_pageout_page_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1205 vm_pageout_page_queue(&vm_page_queue_active, vm_page_active_count);
1206
1207 vm_consider_swapping();
1208
1209 vm_page_lock_queues();
1210 vm_pageout_anonymous_pages_active = FALSE;
1211 vm_page_unlock_queues();
1212 }
1213 }
1214
1215
1216 void
1217 vm_pageout_page_queue(queue_head_t *q, int qcount)
1218 {
1219 vm_page_t m;
1220 vm_object_t t_object = NULL;
1221 vm_object_t l_object = NULL;
1222 vm_object_t m_object = NULL;
1223 int delayed_unlock = 0;
1224 int try_failed_count = 0;
1225 int refmod_state;
1226 int pmap_options;
1227 struct vm_pageout_queue *iq;
1228
1229
1230 iq = &vm_pageout_queue_internal;
1231
1232 vm_page_lock_queues();
1233
1234 while (qcount && !queue_empty(q)) {
1235
1236 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1237
1238 if (VM_PAGE_Q_THROTTLED(iq)) {
1239
1240 if (l_object != NULL) {
1241 vm_object_unlock(l_object);
1242 l_object = NULL;
1243 }
1244 iq->pgo_draining = TRUE;
1245
1246 assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE);
1247 vm_page_unlock_queues();
1248
1249 thread_block(THREAD_CONTINUE_NULL);
1250
1251 vm_page_lock_queues();
1252 delayed_unlock = 0;
1253 continue;
1254 }
1255 m = (vm_page_t) queue_first(q);
1256 m_object = m->object;
1257
1258 /*
1259 * check to see if we currently are working
1260 * with the same object... if so, we've
1261 * already got the lock
1262 */
1263 if (m_object != l_object) {
1264 if ( !m_object->internal)
1265 goto reenter_pg_on_q;
1266
1267 /*
1268 * the object associated with candidate page is
1269 * different from the one we were just working
1270 * with... dump the lock if we still own it
1271 */
1272 if (l_object != NULL) {
1273 vm_object_unlock(l_object);
1274 l_object = NULL;
1275 }
1276 if (m_object != t_object)
1277 try_failed_count = 0;
1278
1279 /*
1280 * Try to lock object; since we've alread got the
1281 * page queues lock, we can only 'try' for this one.
1282 * if the 'try' fails, we need to do a mutex_pause
1283 * to allow the owner of the object lock a chance to
1284 * run...
1285 */
1286 if ( !vm_object_lock_try_scan(m_object)) {
1287
1288 if (try_failed_count > 20) {
1289 goto reenter_pg_on_q;
1290 }
1291 vm_page_unlock_queues();
1292 mutex_pause(try_failed_count++);
1293 vm_page_lock_queues();
1294 delayed_unlock = 0;
1295
1296 t_object = m_object;
1297 continue;
1298 }
1299 l_object = m_object;
1300 }
1301 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error || m->pageout) {
1302 /*
1303 * page is not to be cleaned
1304 * put it back on the head of its queue
1305 */
1306 goto reenter_pg_on_q;
1307 }
1308 if (m->reference == FALSE && m->pmapped == TRUE) {
1309 refmod_state = pmap_get_refmod(m->phys_page);
1310
1311 if (refmod_state & VM_MEM_REFERENCED)
1312 m->reference = TRUE;
1313 if (refmod_state & VM_MEM_MODIFIED) {
1314 SET_PAGE_DIRTY(m, FALSE);
1315 }
1316 }
1317 if (m->reference == TRUE) {
1318 m->reference = FALSE;
1319 pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1320 goto reenter_pg_on_q;
1321 }
1322 if (m->pmapped == TRUE) {
1323 if (m->dirty || m->precious) {
1324 pmap_options = PMAP_OPTIONS_COMPRESSOR;
1325 } else {
1326 pmap_options = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1327 }
1328 refmod_state = pmap_disconnect_options(m->phys_page, pmap_options, NULL);
1329 if (refmod_state & VM_MEM_MODIFIED) {
1330 SET_PAGE_DIRTY(m, FALSE);
1331 }
1332 }
1333 if ( !m->dirty && !m->precious) {
1334 vm_page_unlock_queues();
1335 VM_PAGE_FREE(m);
1336 vm_page_lock_queues();
1337 delayed_unlock = 0;
1338
1339 goto next_pg;
1340 }
1341 if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL) {
1342
1343 if (!m_object->pager_initialized) {
1344
1345 vm_page_unlock_queues();
1346
1347 vm_object_collapse(m_object, (vm_object_offset_t) 0, TRUE);
1348
1349 if (!m_object->pager_initialized)
1350 vm_object_compressor_pager_create(m_object);
1351
1352 vm_page_lock_queues();
1353 delayed_unlock = 0;
1354 }
1355 if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL)
1356 goto reenter_pg_on_q;
1357 /*
1358 * vm_object_compressor_pager_create will drop the object lock
1359 * which means 'm' may no longer be valid to use
1360 */
1361 continue;
1362 }
1363 /*
1364 * we've already factored out pages in the laundry which
1365 * means this page can't be on the pageout queue so it's
1366 * safe to do the vm_page_queues_remove
1367 */
1368 assert(!m->pageout_queue);
1369
1370 vm_page_queues_remove(m);
1371
1372 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1373
1374 vm_pageout_cluster(m, TRUE, FALSE, FALSE);
1375
1376 goto next_pg;
1377
1378 reenter_pg_on_q:
1379 queue_remove(q, m, vm_page_t, pageq);
1380 queue_enter(q, m, vm_page_t, pageq);
1381 next_pg:
1382 qcount--;
1383 try_failed_count = 0;
1384
1385 if (delayed_unlock++ > 128) {
1386
1387 if (l_object != NULL) {
1388 vm_object_unlock(l_object);
1389 l_object = NULL;
1390 }
1391 lck_mtx_yield(&vm_page_queue_lock);
1392 delayed_unlock = 0;
1393 }
1394 }
1395 if (l_object != NULL) {
1396 vm_object_unlock(l_object);
1397 l_object = NULL;
1398 }
1399 vm_page_unlock_queues();
1400 }
1401
1402
1403
1404 /*
1405 * function in BSD to apply I/O throttle to the pageout thread
1406 */
1407 extern void vm_pageout_io_throttle(void);
1408
1409 /*
1410 * Page States: Used below to maintain the page state
1411 * before it's removed from it's Q. This saved state
1412 * helps us do the right accounting in certain cases
1413 */
1414 #define PAGE_STATE_SPECULATIVE 1
1415 #define PAGE_STATE_ANONYMOUS 2
1416 #define PAGE_STATE_INACTIVE 3
1417 #define PAGE_STATE_INACTIVE_FIRST 4
1418 #define PAGE_STATE_CLEAN 5
1419
1420
1421 #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m) \
1422 MACRO_BEGIN \
1423 /* \
1424 * If a "reusable" page somehow made it back into \
1425 * the active queue, it's been re-used and is not \
1426 * quite re-usable. \
1427 * If the VM object was "all_reusable", consider it \
1428 * as "all re-used" instead of converting it to \
1429 * "partially re-used", which could be expensive. \
1430 */ \
1431 if ((m)->reusable || \
1432 (m)->object->all_reusable) { \
1433 vm_object_reuse_pages((m)->object, \
1434 (m)->offset, \
1435 (m)->offset + PAGE_SIZE_64, \
1436 FALSE); \
1437 } \
1438 MACRO_END
1439
1440
1441 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 64
1442 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024
1443
1444 #define FCS_IDLE 0
1445 #define FCS_DELAYED 1
1446 #define FCS_DEADLOCK_DETECTED 2
1447
1448 struct flow_control {
1449 int state;
1450 mach_timespec_t ts;
1451 };
1452
1453 uint32_t vm_pageout_considered_page = 0;
1454 uint32_t vm_page_filecache_min = 0;
1455
1456 #define ANONS_GRABBED_LIMIT 2
1457
1458 /*
1459 * vm_pageout_scan does the dirty work for the pageout daemon.
1460 * It returns with both vm_page_queue_free_lock and vm_page_queue_lock
1461 * held and vm_page_free_wanted == 0.
1462 */
1463 void
1464 vm_pageout_scan(void)
1465 {
1466 unsigned int loop_count = 0;
1467 unsigned int inactive_burst_count = 0;
1468 unsigned int active_burst_count = 0;
1469 unsigned int reactivated_this_call;
1470 unsigned int reactivate_limit;
1471 vm_page_t local_freeq = NULL;
1472 int local_freed = 0;
1473 int delayed_unlock;
1474 int delayed_unlock_limit = 0;
1475 int refmod_state = 0;
1476 int vm_pageout_deadlock_target = 0;
1477 struct vm_pageout_queue *iq;
1478 struct vm_pageout_queue *eq;
1479 struct vm_speculative_age_q *sq;
1480 struct flow_control flow_control = { 0, { 0, 0 } };
1481 boolean_t inactive_throttled = FALSE;
1482 boolean_t try_failed;
1483 mach_timespec_t ts;
1484 unsigned int msecs = 0;
1485 vm_object_t object;
1486 vm_object_t last_object_tried;
1487 uint32_t catch_up_count = 0;
1488 uint32_t inactive_reclaim_run;
1489 boolean_t forced_reclaim;
1490 boolean_t exceeded_burst_throttle;
1491 boolean_t grab_anonymous = FALSE;
1492 boolean_t force_anonymous = FALSE;
1493 int anons_grabbed = 0;
1494 int page_prev_state = 0;
1495 int cache_evict_throttle = 0;
1496 uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0;
1497 int force_purge = 0;
1498 #define DELAY_SPECULATIVE_AGE 1000
1499 int delay_speculative_age = 0;
1500
1501 #if VM_PRESSURE_EVENTS
1502 vm_pressure_level_t pressure_level;
1503 #endif /* VM_PRESSURE_EVENTS */
1504
1505 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
1506 vm_pageout_speculative_clean, vm_pageout_inactive_clean,
1507 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
1508
1509 flow_control.state = FCS_IDLE;
1510 iq = &vm_pageout_queue_internal;
1511 eq = &vm_pageout_queue_external;
1512 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
1513
1514
1515 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1516
1517
1518 vm_page_lock_queues();
1519 delayed_unlock = 1; /* must be nonzero if Qs are locked, 0 if unlocked */
1520
1521 /*
1522 * Calculate the max number of referenced pages on the inactive
1523 * queue that we will reactivate.
1524 */
1525 reactivated_this_call = 0;
1526 reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
1527 vm_page_inactive_count);
1528 inactive_reclaim_run = 0;
1529
1530 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
1531
1532 /*
1533 * We want to gradually dribble pages from the active queue
1534 * to the inactive queue. If we let the inactive queue get
1535 * very small, and then suddenly dump many pages into it,
1536 * those pages won't get a sufficient chance to be referenced
1537 * before we start taking them from the inactive queue.
1538 *
1539 * We must limit the rate at which we send pages to the pagers
1540 * so that we don't tie up too many pages in the I/O queues.
1541 * We implement a throttling mechanism using the laundry count
1542 * to limit the number of pages outstanding to the default
1543 * and external pagers. We can bypass the throttles and look
1544 * for clean pages if the pageout queues don't drain in a timely
1545 * fashion since this may indicate that the pageout paths are
1546 * stalled waiting for memory, which only we can provide.
1547 */
1548
1549
1550 Restart:
1551 assert(delayed_unlock!=0);
1552
1553 /*
1554 * Recalculate vm_page_inactivate_target.
1555 */
1556 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1557 vm_page_inactive_count +
1558 vm_page_speculative_count);
1559
1560 vm_page_anonymous_min = vm_page_inactive_target / 20;
1561
1562
1563 /*
1564 * don't want to wake the pageout_scan thread up everytime we fall below
1565 * the targets... set a low water mark at 0.25% below the target
1566 */
1567 vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400);
1568
1569 if (vm_page_speculative_percentage > 50)
1570 vm_page_speculative_percentage = 50;
1571 else if (vm_page_speculative_percentage <= 0)
1572 vm_page_speculative_percentage = 1;
1573
1574 vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
1575 vm_page_inactive_count);
1576
1577 object = NULL;
1578 last_object_tried = NULL;
1579 try_failed = FALSE;
1580
1581 if ((vm_page_inactive_count + vm_page_speculative_count) < VM_PAGE_INACTIVE_HEALTHY_LIMIT(vm_page_active_count))
1582 catch_up_count = vm_page_inactive_count + vm_page_speculative_count;
1583 else
1584 catch_up_count = 0;
1585
1586 for (;;) {
1587 vm_page_t m;
1588
1589 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
1590
1591 assert(delayed_unlock);
1592
1593 if (vm_upl_wait_for_pages < 0)
1594 vm_upl_wait_for_pages = 0;
1595
1596 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
1597
1598 if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX)
1599 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
1600
1601 /*
1602 * Move pages from active to inactive if we're below the target
1603 */
1604 /* if we are trying to make clean, we need to make sure we actually have inactive - mj */
1605 if ((vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target)
1606 goto done_moving_active_pages;
1607
1608 if (object != NULL) {
1609 vm_object_unlock(object);
1610 object = NULL;
1611 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1612 }
1613 /*
1614 * Don't sweep through active queue more than the throttle
1615 * which should be kept relatively low
1616 */
1617 active_burst_count = MIN(vm_pageout_burst_active_throttle, vm_page_active_count);
1618
1619 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_START,
1620 vm_pageout_inactive, vm_pageout_inactive_used, vm_page_free_count, local_freed);
1621
1622 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_NONE,
1623 vm_pageout_speculative_clean, vm_pageout_inactive_clean,
1624 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
1625 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_START);
1626
1627
1628 while (!queue_empty(&vm_page_queue_active) && active_burst_count--) {
1629
1630 vm_pageout_active++;
1631
1632 m = (vm_page_t) queue_first(&vm_page_queue_active);
1633
1634 assert(m->active && !m->inactive);
1635 assert(!m->laundry);
1636 assert(m->object != kernel_object);
1637 assert(m->phys_page != vm_page_guard_addr);
1638
1639 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1640
1641 /*
1642 * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
1643 *
1644 * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
1645 * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
1646 * new reference happens. If no futher references happen on the page after that remote TLB flushes
1647 * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
1648 * by pageout_scan, which is just fine since the last reference would have happened quite far
1649 * in the past (TLB caches don't hang around for very long), and of course could just as easily
1650 * have happened before we moved the page
1651 */
1652 pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1653
1654 /*
1655 * The page might be absent or busy,
1656 * but vm_page_deactivate can handle that.
1657 * FALSE indicates that we don't want a H/W clear reference
1658 */
1659 vm_page_deactivate_internal(m, FALSE);
1660
1661 if (delayed_unlock++ > delayed_unlock_limit) {
1662
1663 if (local_freeq) {
1664 vm_page_unlock_queues();
1665
1666 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1667 vm_page_free_count, local_freed, delayed_unlock_limit, 1);
1668
1669 vm_page_free_list(local_freeq, TRUE);
1670
1671 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1672 vm_page_free_count, 0, 0, 1);
1673
1674 local_freeq = NULL;
1675 local_freed = 0;
1676 vm_page_lock_queues();
1677 } else {
1678 lck_mtx_yield(&vm_page_queue_lock);
1679 }
1680
1681 delayed_unlock = 1;
1682
1683 /*
1684 * continue the while loop processing
1685 * the active queue... need to hold
1686 * the page queues lock
1687 */
1688 }
1689 }
1690
1691 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_END,
1692 vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count, vm_page_inactive_target);
1693 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_END);
1694
1695 /**********************************************************************
1696 * above this point we're playing with the active queue
1697 * below this point we're playing with the throttling mechanisms
1698 * and the inactive queue
1699 **********************************************************************/
1700
1701 done_moving_active_pages:
1702
1703 if (vm_page_free_count + local_freed >= vm_page_free_target) {
1704 if (object != NULL) {
1705 vm_object_unlock(object);
1706 object = NULL;
1707 }
1708 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1709
1710 vm_page_unlock_queues();
1711
1712 if (local_freeq) {
1713
1714 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1715 vm_page_free_count, local_freed, delayed_unlock_limit, 2);
1716
1717 vm_page_free_list(local_freeq, TRUE);
1718
1719 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1720 vm_page_free_count, local_freed, 0, 2);
1721
1722 local_freeq = NULL;
1723 local_freed = 0;
1724 }
1725 vm_consider_waking_compactor_swapper();
1726
1727 vm_page_lock_queues();
1728
1729 /*
1730 * make sure the pageout I/O threads are running
1731 * throttled in case there are still requests
1732 * in the laundry... since we have met our targets
1733 * we don't need the laundry to be cleaned in a timely
1734 * fashion... so let's avoid interfering with foreground
1735 * activity
1736 */
1737 vm_pageout_adjust_io_throttles(iq, eq, TRUE);
1738
1739 /*
1740 * recalculate vm_page_inactivate_target
1741 */
1742 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1743 vm_page_inactive_count +
1744 vm_page_speculative_count);
1745 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) &&
1746 !queue_empty(&vm_page_queue_active)) {
1747 /*
1748 * inactive target still not met... keep going
1749 * until we get the queues balanced...
1750 */
1751 continue;
1752 }
1753 lck_mtx_lock(&vm_page_queue_free_lock);
1754
1755 if ((vm_page_free_count >= vm_page_free_target) &&
1756 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
1757 /*
1758 * done - we have met our target *and*
1759 * there is no one waiting for a page.
1760 */
1761 return_from_scan:
1762 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
1763
1764 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
1765 vm_pageout_inactive, vm_pageout_inactive_used, 0, 0);
1766 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
1767 vm_pageout_speculative_clean, vm_pageout_inactive_clean,
1768 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
1769
1770 return;
1771 }
1772 lck_mtx_unlock(&vm_page_queue_free_lock);
1773 }
1774
1775 /*
1776 * Before anything, we check if we have any ripe volatile
1777 * objects around. If so, try to purge the first object.
1778 * If the purge fails, fall through to reclaim a page instead.
1779 * If the purge succeeds, go back to the top and reevalute
1780 * the new memory situation.
1781 */
1782
1783 assert (available_for_purge>=0);
1784 force_purge = 0; /* no force-purging */
1785
1786 #if VM_PRESSURE_EVENTS
1787 pressure_level = memorystatus_vm_pressure_level;
1788
1789 if (pressure_level > kVMPressureNormal) {
1790
1791 if (pressure_level >= kVMPressureCritical) {
1792 force_purge = memorystatus_purge_on_critical;
1793 } else if (pressure_level >= kVMPressureUrgent) {
1794 force_purge = memorystatus_purge_on_urgent;
1795 } else if (pressure_level >= kVMPressureWarning) {
1796 force_purge = memorystatus_purge_on_warning;
1797 }
1798 }
1799 #endif /* VM_PRESSURE_EVENTS */
1800
1801 if (available_for_purge || force_purge) {
1802
1803 if (object != NULL) {
1804 vm_object_unlock(object);
1805 object = NULL;
1806 }
1807
1808 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
1809
1810 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
1811 if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
1812 vm_pageout_purged_objects++;
1813 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
1814 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
1815 continue;
1816 }
1817 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
1818 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
1819 }
1820
1821 if (queue_empty(&sq->age_q) && vm_page_speculative_count) {
1822 /*
1823 * try to pull pages from the aging bins...
1824 * see vm_page.h for an explanation of how
1825 * this mechanism works
1826 */
1827 struct vm_speculative_age_q *aq;
1828 boolean_t can_steal = FALSE;
1829 int num_scanned_queues;
1830
1831 aq = &vm_page_queue_speculative[speculative_steal_index];
1832
1833 num_scanned_queues = 0;
1834 while (queue_empty(&aq->age_q) &&
1835 num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
1836
1837 speculative_steal_index++;
1838
1839 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
1840 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
1841
1842 aq = &vm_page_queue_speculative[speculative_steal_index];
1843 }
1844
1845 if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
1846 /*
1847 * XXX We've scanned all the speculative
1848 * queues but still haven't found one
1849 * that is not empty, even though
1850 * vm_page_speculative_count is not 0.
1851 *
1852 * report the anomaly...
1853 */
1854 printf("vm_pageout_scan: "
1855 "all speculative queues empty "
1856 "but count=%d. Re-adjusting.\n",
1857 vm_page_speculative_count);
1858 if (vm_page_speculative_count > vm_page_speculative_count_drift_max)
1859 vm_page_speculative_count_drift_max = vm_page_speculative_count;
1860 vm_page_speculative_count_drifts++;
1861 #if 6553678
1862 Debugger("vm_pageout_scan: no speculative pages");
1863 #endif
1864 /* readjust... */
1865 vm_page_speculative_count = 0;
1866 /* ... and continue */
1867 continue;
1868 }
1869
1870 if (vm_page_speculative_count > vm_page_speculative_target)
1871 can_steal = TRUE;
1872 else {
1873 if (!delay_speculative_age) {
1874 mach_timespec_t ts_fully_aged;
1875
1876 ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) / 1000;
1877 ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) % 1000)
1878 * 1000 * NSEC_PER_USEC;
1879
1880 ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
1881
1882 clock_sec_t sec;
1883 clock_nsec_t nsec;
1884 clock_get_system_nanotime(&sec, &nsec);
1885 ts.tv_sec = (unsigned int) sec;
1886 ts.tv_nsec = nsec;
1887
1888 if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
1889 can_steal = TRUE;
1890 else
1891 delay_speculative_age++;
1892 } else {
1893 delay_speculative_age++;
1894 if (delay_speculative_age == DELAY_SPECULATIVE_AGE)
1895 delay_speculative_age = 0;
1896 }
1897 }
1898 if (can_steal == TRUE)
1899 vm_page_speculate_ageit(aq);
1900 }
1901 if (queue_empty(&sq->age_q) && cache_evict_throttle == 0) {
1902 int pages_evicted;
1903
1904 if (object != NULL) {
1905 vm_object_unlock(object);
1906 object = NULL;
1907 }
1908 pages_evicted = vm_object_cache_evict(100, 10);
1909
1910 if (pages_evicted) {
1911
1912 vm_pageout_cache_evicted += pages_evicted;
1913
1914 VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
1915 vm_page_free_count, pages_evicted, vm_pageout_cache_evicted, 0);
1916 memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
1917
1918 /*
1919 * we just freed up to 100 pages,
1920 * so go back to the top of the main loop
1921 * and re-evaulate the memory situation
1922 */
1923 continue;
1924 } else
1925 cache_evict_throttle = 100;
1926 }
1927 if (cache_evict_throttle)
1928 cache_evict_throttle--;
1929
1930 #if CONFIG_JETSAM
1931 /*
1932 * don't let the filecache_min fall below 15% of available memory
1933 * on systems with an active compressor that isn't nearing its
1934 * limits w/r to accepting new data
1935 *
1936 * on systems w/o the compressor/swapper, the filecache is always
1937 * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
1938 * since most (if not all) of the anonymous pages are in the
1939 * throttled queue (which isn't counted as available) which
1940 * effectively disables this filter
1941 */
1942 if (vm_compressor_low_on_space())
1943 vm_page_filecache_min = 0;
1944 else
1945 vm_page_filecache_min = (AVAILABLE_NON_COMPRESSED_MEMORY / 7);
1946 #else
1947 /*
1948 * don't let the filecache_min fall below 33% of available memory...
1949 */
1950 vm_page_filecache_min = (AVAILABLE_NON_COMPRESSED_MEMORY / 3);
1951 #endif
1952
1953 exceeded_burst_throttle = FALSE;
1954 /*
1955 * Sometimes we have to pause:
1956 * 1) No inactive pages - nothing to do.
1957 * 2) Loop control - no acceptable pages found on the inactive queue
1958 * within the last vm_pageout_burst_inactive_throttle iterations
1959 * 3) Flow control - default pageout queue is full
1960 */
1961 if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_anonymous) && queue_empty(&sq->age_q)) {
1962 vm_pageout_scan_empty_throttle++;
1963 msecs = vm_pageout_empty_wait;
1964 goto vm_pageout_scan_delay;
1965
1966 } else if (inactive_burst_count >=
1967 MIN(vm_pageout_burst_inactive_throttle,
1968 (vm_page_inactive_count +
1969 vm_page_speculative_count))) {
1970 vm_pageout_scan_burst_throttle++;
1971 msecs = vm_pageout_burst_wait;
1972
1973 exceeded_burst_throttle = TRUE;
1974 goto vm_pageout_scan_delay;
1975
1976 } else if (vm_page_free_count > (vm_page_free_reserved / 4) &&
1977 VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
1978 vm_pageout_scan_swap_throttle++;
1979 msecs = vm_pageout_swap_wait;
1980 goto vm_pageout_scan_delay;
1981
1982 } else if (VM_PAGE_Q_THROTTLED(iq) &&
1983 VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) {
1984 clock_sec_t sec;
1985 clock_nsec_t nsec;
1986
1987 switch (flow_control.state) {
1988
1989 case FCS_IDLE:
1990 if ((vm_page_free_count + local_freed) < vm_page_free_target) {
1991
1992 if (object != NULL) {
1993 vm_object_unlock(object);
1994 object = NULL;
1995 }
1996 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1997
1998 vm_page_unlock_queues();
1999
2000 if (local_freeq) {
2001
2002 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
2003 vm_page_free_count, local_freed, delayed_unlock_limit, 3);
2004
2005 vm_page_free_list(local_freeq, TRUE);
2006
2007 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
2008 vm_page_free_count, local_freed, 0, 3);
2009
2010 local_freeq = NULL;
2011 local_freed = 0;
2012 }
2013 thread_yield_internal(1);
2014
2015 vm_page_lock_queues();
2016
2017 if (!VM_PAGE_Q_THROTTLED(iq)) {
2018 vm_pageout_scan_yield_unthrottled++;
2019 continue;
2020 }
2021 if (vm_page_pageable_external_count > vm_page_filecache_min && !queue_empty(&vm_page_queue_inactive)) {
2022 anons_grabbed = ANONS_GRABBED_LIMIT;
2023 vm_pageout_scan_throttle_deferred++;
2024 goto consider_inactive;
2025 }
2026 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && vm_page_active_count)
2027 continue;
2028 }
2029 reset_deadlock_timer:
2030 ts.tv_sec = vm_pageout_deadlock_wait / 1000;
2031 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
2032 clock_get_system_nanotime(&sec, &nsec);
2033 flow_control.ts.tv_sec = (unsigned int) sec;
2034 flow_control.ts.tv_nsec = nsec;
2035 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
2036
2037 flow_control.state = FCS_DELAYED;
2038 msecs = vm_pageout_deadlock_wait;
2039
2040 break;
2041
2042 case FCS_DELAYED:
2043 clock_get_system_nanotime(&sec, &nsec);
2044 ts.tv_sec = (unsigned int) sec;
2045 ts.tv_nsec = nsec;
2046
2047 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
2048 /*
2049 * the pageout thread for the default pager is potentially
2050 * deadlocked since the
2051 * default pager queue has been throttled for more than the
2052 * allowable time... we need to move some clean pages or dirty
2053 * pages belonging to the external pagers if they aren't throttled
2054 * vm_page_free_wanted represents the number of threads currently
2055 * blocked waiting for pages... we'll move one page for each of
2056 * these plus a fixed amount to break the logjam... once we're done
2057 * moving this number of pages, we'll re-enter the FSC_DELAYED state
2058 * with a new timeout target since we have no way of knowing
2059 * whether we've broken the deadlock except through observation
2060 * of the queue associated with the default pager... we need to
2061 * stop moving pages and allow the system to run to see what
2062 * state it settles into.
2063 */
2064 vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged;
2065 vm_pageout_scan_deadlock_detected++;
2066 flow_control.state = FCS_DEADLOCK_DETECTED;
2067 thread_wakeup((event_t) &vm_pageout_garbage_collect);
2068 goto consider_inactive;
2069 }
2070 /*
2071 * just resniff instead of trying
2072 * to compute a new delay time... we're going to be
2073 * awakened immediately upon a laundry completion,
2074 * so we won't wait any longer than necessary
2075 */
2076 msecs = vm_pageout_idle_wait;
2077 break;
2078
2079 case FCS_DEADLOCK_DETECTED:
2080 if (vm_pageout_deadlock_target)
2081 goto consider_inactive;
2082 goto reset_deadlock_timer;
2083
2084 }
2085 vm_pageout_scan_delay:
2086 if (object != NULL) {
2087 vm_object_unlock(object);
2088 object = NULL;
2089 }
2090 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2091
2092 vm_page_unlock_queues();
2093
2094 if (local_freeq) {
2095
2096 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
2097 vm_page_free_count, local_freed, delayed_unlock_limit, 3);
2098
2099 vm_page_free_list(local_freeq, TRUE);
2100
2101 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
2102 vm_page_free_count, local_freed, 0, 3);
2103
2104 local_freeq = NULL;
2105 local_freed = 0;
2106 }
2107 vm_consider_waking_compactor_swapper();
2108
2109 vm_page_lock_queues();
2110
2111 if (flow_control.state == FCS_DELAYED &&
2112 !VM_PAGE_Q_THROTTLED(iq)) {
2113 flow_control.state = FCS_IDLE;
2114 goto consider_inactive;
2115 }
2116
2117 if (vm_page_free_count >= vm_page_free_target) {
2118 /*
2119 * we're here because
2120 * 1) someone else freed up some pages while we had
2121 * the queues unlocked above
2122 * and we've hit one of the 3 conditions that
2123 * cause us to pause the pageout scan thread
2124 *
2125 * since we already have enough free pages,
2126 * let's avoid stalling and return normally
2127 *
2128 * before we return, make sure the pageout I/O threads
2129 * are running throttled in case there are still requests
2130 * in the laundry... since we have enough free pages
2131 * we don't need the laundry to be cleaned in a timely
2132 * fashion... so let's avoid interfering with foreground
2133 * activity
2134 *
2135 * we don't want to hold vm_page_queue_free_lock when
2136 * calling vm_pageout_adjust_io_throttles (since it
2137 * may cause other locks to be taken), we do the intitial
2138 * check outside of the lock. Once we take the lock,
2139 * we recheck the condition since it may have changed.
2140 * if it has, no problem, we will make the threads
2141 * non-throttled before actually blocking
2142 */
2143 vm_pageout_adjust_io_throttles(iq, eq, TRUE);
2144 }
2145 lck_mtx_lock(&vm_page_queue_free_lock);
2146
2147 if (vm_page_free_count >= vm_page_free_target &&
2148 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
2149 goto return_from_scan;
2150 }
2151 lck_mtx_unlock(&vm_page_queue_free_lock);
2152
2153 if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
2154 /*
2155 * we're most likely about to block due to one of
2156 * the 3 conditions that cause vm_pageout_scan to
2157 * not be able to make forward progress w/r
2158 * to providing new pages to the free queue,
2159 * so unthrottle the I/O threads in case we
2160 * have laundry to be cleaned... it needs
2161 * to be completed ASAP.
2162 *
2163 * even if we don't block, we want the io threads
2164 * running unthrottled since the sum of free +
2165 * clean pages is still under our free target
2166 */
2167 vm_pageout_adjust_io_throttles(iq, eq, FALSE);
2168 }
2169 if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
2170 /*
2171 * if we get here we're below our free target and
2172 * we're stalling due to a full laundry queue or
2173 * we don't have any inactive pages other then
2174 * those in the clean queue...
2175 * however, we have pages on the clean queue that
2176 * can be moved to the free queue, so let's not
2177 * stall the pageout scan
2178 */
2179 flow_control.state = FCS_IDLE;
2180 goto consider_inactive;
2181 }
2182 VM_CHECK_MEMORYSTATUS;
2183
2184 if (flow_control.state != FCS_IDLE)
2185 vm_pageout_scan_throttle++;
2186 iq->pgo_throttled = TRUE;
2187
2188 assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
2189 counter(c_vm_pageout_scan_block++);
2190
2191 vm_page_unlock_queues();
2192
2193 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2194
2195 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
2196 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2197 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
2198
2199 thread_block(THREAD_CONTINUE_NULL);
2200
2201 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
2202 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2203 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
2204
2205 vm_page_lock_queues();
2206 delayed_unlock = 1;
2207
2208 iq->pgo_throttled = FALSE;
2209
2210 if (loop_count >= vm_page_inactive_count)
2211 loop_count = 0;
2212 inactive_burst_count = 0;
2213
2214 goto Restart;
2215 /*NOTREACHED*/
2216 }
2217
2218
2219 flow_control.state = FCS_IDLE;
2220 consider_inactive:
2221 vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count),
2222 vm_pageout_inactive_external_forced_reactivate_limit);
2223 loop_count++;
2224 inactive_burst_count++;
2225 vm_pageout_inactive++;
2226
2227
2228 /*
2229 * Choose a victim.
2230 */
2231 while (1) {
2232 uint32_t inactive_external_count;
2233
2234 m = NULL;
2235
2236 if (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) {
2237 assert(vm_page_throttled_count == 0);
2238 assert(queue_empty(&vm_page_queue_throttled));
2239 }
2240 /*
2241 * The most eligible pages are ones we paged in speculatively,
2242 * but which have not yet been touched.
2243 */
2244 if (!queue_empty(&sq->age_q) && force_anonymous == FALSE) {
2245 m = (vm_page_t) queue_first(&sq->age_q);
2246
2247 page_prev_state = PAGE_STATE_SPECULATIVE;
2248
2249 break;
2250 }
2251 /*
2252 * Try a clean-queue inactive page.
2253 */
2254 if (!queue_empty(&vm_page_queue_cleaned)) {
2255 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
2256
2257 page_prev_state = PAGE_STATE_CLEAN;
2258
2259 break;
2260 }
2261
2262 grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
2263 inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2264
2265 if ((vm_page_pageable_external_count < vm_page_filecache_min || force_anonymous == TRUE) ||
2266 ((inactive_external_count < vm_page_anonymous_count) && (inactive_external_count < (vm_page_pageable_external_count / 3)))) {
2267 grab_anonymous = TRUE;
2268 anons_grabbed = 0;
2269 }
2270
2271 if (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT || queue_empty(&vm_page_queue_anonymous)) {
2272
2273 if ( !queue_empty(&vm_page_queue_inactive) ) {
2274 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
2275
2276 page_prev_state = PAGE_STATE_INACTIVE;
2277 anons_grabbed = 0;
2278
2279 if (vm_page_pageable_external_count < vm_page_filecache_min) {
2280 if ((++reactivated_this_call % 100))
2281 goto must_activate_page;
2282 /*
2283 * steal 1% of the file backed pages even if
2284 * we are under the limit that has been set
2285 * for a healthy filecache
2286 */
2287 }
2288 break;
2289 }
2290 }
2291 if ( !queue_empty(&vm_page_queue_anonymous) ) {
2292 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
2293
2294 page_prev_state = PAGE_STATE_ANONYMOUS;
2295 anons_grabbed++;
2296
2297 break;
2298 }
2299
2300 /*
2301 * if we've gotten here, we have no victim page.
2302 * if making clean, free the local freed list and return.
2303 * if making free, check to see if we've finished balancing the queues
2304 * yet, if we haven't just continue, else panic
2305 */
2306 vm_page_unlock_queues();
2307
2308 if (object != NULL) {
2309 vm_object_unlock(object);
2310 object = NULL;
2311 }
2312 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2313
2314 if (local_freeq) {
2315 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
2316 vm_page_free_count, local_freed, delayed_unlock_limit, 5);
2317
2318 vm_page_free_list(local_freeq, TRUE);
2319
2320 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
2321 vm_page_free_count, local_freed, 0, 5);
2322
2323 local_freeq = NULL;
2324 local_freed = 0;
2325 }
2326 vm_page_lock_queues();
2327 delayed_unlock = 1;
2328
2329 force_anonymous = FALSE;
2330
2331 if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target)
2332 goto Restart;
2333
2334 if (!queue_empty(&sq->age_q))
2335 goto Restart;
2336
2337 panic("vm_pageout: no victim");
2338
2339 /* NOTREACHED */
2340 }
2341 force_anonymous = FALSE;
2342
2343 /*
2344 * we just found this page on one of our queues...
2345 * it can't also be on the pageout queue, so safe
2346 * to call vm_page_queues_remove
2347 */
2348 assert(!m->pageout_queue);
2349
2350 vm_page_queues_remove(m);
2351
2352 assert(!m->laundry);
2353 assert(!m->private);
2354 assert(!m->fictitious);
2355 assert(m->object != kernel_object);
2356 assert(m->phys_page != vm_page_guard_addr);
2357
2358
2359 if (page_prev_state != PAGE_STATE_SPECULATIVE)
2360 vm_pageout_stats[vm_pageout_stat_now].considered++;
2361
2362 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
2363
2364 /*
2365 * check to see if we currently are working
2366 * with the same object... if so, we've
2367 * already got the lock
2368 */
2369 if (m->object != object) {
2370 /*
2371 * the object associated with candidate page is
2372 * different from the one we were just working
2373 * with... dump the lock if we still own it
2374 */
2375 if (object != NULL) {
2376 vm_object_unlock(object);
2377 object = NULL;
2378 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2379 }
2380 /*
2381 * Try to lock object; since we've alread got the
2382 * page queues lock, we can only 'try' for this one.
2383 * if the 'try' fails, we need to do a mutex_pause
2384 * to allow the owner of the object lock a chance to
2385 * run... otherwise, we're likely to trip over this
2386 * object in the same state as we work our way through
2387 * the queue... clumps of pages associated with the same
2388 * object are fairly typical on the inactive and active queues
2389 */
2390 if (!vm_object_lock_try_scan(m->object)) {
2391 vm_page_t m_want = NULL;
2392
2393 vm_pageout_inactive_nolock++;
2394
2395 if (page_prev_state == PAGE_STATE_CLEAN)
2396 vm_pageout_cleaned_nolock++;
2397
2398 if (page_prev_state == PAGE_STATE_SPECULATIVE)
2399 page_prev_state = PAGE_STATE_INACTIVE_FIRST;
2400
2401 pmap_clear_reference(m->phys_page);
2402 m->reference = FALSE;
2403
2404 /*
2405 * m->object must be stable since we hold the page queues lock...
2406 * we can update the scan_collisions field sans the object lock
2407 * since it is a separate field and this is the only spot that does
2408 * a read-modify-write operation and it is never executed concurrently...
2409 * we can asynchronously set this field to 0 when creating a UPL, so it
2410 * is possible for the value to be a bit non-determistic, but that's ok
2411 * since it's only used as a hint
2412 */
2413 m->object->scan_collisions = 1;
2414
2415 if ( !queue_empty(&sq->age_q) )
2416 m_want = (vm_page_t) queue_first(&sq->age_q);
2417 else if ( !queue_empty(&vm_page_queue_cleaned))
2418 m_want = (vm_page_t) queue_first(&vm_page_queue_cleaned);
2419 else if (anons_grabbed >= ANONS_GRABBED_LIMIT || queue_empty(&vm_page_queue_anonymous))
2420 m_want = (vm_page_t) queue_first(&vm_page_queue_inactive);
2421 else if ( !queue_empty(&vm_page_queue_anonymous))
2422 m_want = (vm_page_t) queue_first(&vm_page_queue_anonymous);
2423
2424 /*
2425 * this is the next object we're going to be interested in
2426 * try to make sure its available after the mutex_yield
2427 * returns control
2428 */
2429 if (m_want)
2430 vm_pageout_scan_wants_object = m_want->object;
2431
2432 /*
2433 * force us to dump any collected free pages
2434 * and to pause before moving on
2435 */
2436 try_failed = TRUE;
2437
2438 goto requeue_page;
2439 }
2440 object = m->object;
2441 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2442
2443 try_failed = FALSE;
2444 }
2445 if (catch_up_count)
2446 catch_up_count--;
2447
2448 if (m->busy) {
2449 if (m->encrypted_cleaning) {
2450 /*
2451 * ENCRYPTED SWAP:
2452 * if this page has already been picked up as
2453 * part of a page-out cluster, it will be busy
2454 * because it is being encrypted (see
2455 * vm_object_upl_request()). But we still
2456 * want to demote it from "clean-in-place"
2457 * (aka "adjacent") to "clean-and-free" (aka
2458 * "target"), so let's ignore its "busy" bit
2459 * here and proceed to check for "cleaning" a
2460 * little bit below...
2461 *
2462 * CAUTION CAUTION:
2463 * A "busy" page should still be left alone for
2464 * most purposes, so we have to be very careful
2465 * not to process that page too much.
2466 */
2467 assert(m->cleaning);
2468 goto consider_inactive_page;
2469 }
2470
2471 /*
2472 * Somebody is already playing with this page.
2473 * Put it back on the appropriate queue
2474 *
2475 */
2476 vm_pageout_inactive_busy++;
2477
2478 if (page_prev_state == PAGE_STATE_CLEAN)
2479 vm_pageout_cleaned_busy++;
2480
2481 requeue_page:
2482 switch (page_prev_state) {
2483
2484 case PAGE_STATE_SPECULATIVE:
2485 case PAGE_STATE_ANONYMOUS:
2486 case PAGE_STATE_CLEAN:
2487 case PAGE_STATE_INACTIVE:
2488 vm_page_enqueue_inactive(m, FALSE);
2489 break;
2490
2491 case PAGE_STATE_INACTIVE_FIRST:
2492 vm_page_enqueue_inactive(m, TRUE);
2493 break;
2494 }
2495 goto done_with_inactivepage;
2496 }
2497
2498
2499 /*
2500 * If it's absent, in error or the object is no longer alive,
2501 * we can reclaim the page... in the no longer alive case,
2502 * there are 2 states the page can be in that preclude us
2503 * from reclaiming it - busy or cleaning - that we've already
2504 * dealt with
2505 */
2506 if (m->absent || m->error || !object->alive) {
2507
2508 if (m->absent)
2509 vm_pageout_inactive_absent++;
2510 else if (!object->alive)
2511 vm_pageout_inactive_notalive++;
2512 else
2513 vm_pageout_inactive_error++;
2514 reclaim_page:
2515 if (vm_pageout_deadlock_target) {
2516 vm_pageout_scan_inactive_throttle_success++;
2517 vm_pageout_deadlock_target--;
2518 }
2519
2520 DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
2521
2522 if (object->internal) {
2523 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
2524 } else {
2525 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
2526 }
2527 assert(!m->cleaning);
2528 assert(!m->laundry);
2529
2530 m->busy = TRUE;
2531
2532 /*
2533 * remove page from object here since we're already
2534 * behind the object lock... defer the rest of the work
2535 * we'd normally do in vm_page_free_prepare_object
2536 * until 'vm_page_free_list' is called
2537 */
2538 if (m->tabled)
2539 vm_page_remove(m, TRUE);
2540
2541 assert(m->pageq.next == NULL &&
2542 m->pageq.prev == NULL);
2543 m->pageq.next = (queue_entry_t)local_freeq;
2544 local_freeq = m;
2545 local_freed++;
2546
2547 if (page_prev_state == PAGE_STATE_SPECULATIVE)
2548 vm_pageout_freed_from_speculative++;
2549 else if (page_prev_state == PAGE_STATE_CLEAN)
2550 vm_pageout_freed_from_cleaned++;
2551 else
2552 vm_pageout_freed_from_inactive_clean++;
2553
2554 if (page_prev_state != PAGE_STATE_SPECULATIVE)
2555 vm_pageout_stats[vm_pageout_stat_now].reclaimed++;
2556
2557 inactive_burst_count = 0;
2558 goto done_with_inactivepage;
2559 }
2560 /*
2561 * If the object is empty, the page must be reclaimed even
2562 * if dirty or used.
2563 * If the page belongs to a volatile object, we stick it back
2564 * on.
2565 */
2566 if (object->copy == VM_OBJECT_NULL) {
2567 if (object->purgable == VM_PURGABLE_EMPTY) {
2568 if (m->pmapped == TRUE) {
2569 /* unmap the page */
2570 refmod_state = pmap_disconnect(m->phys_page);
2571 if (refmod_state & VM_MEM_MODIFIED) {
2572 SET_PAGE_DIRTY(m, FALSE);
2573 }
2574 }
2575 if (m->dirty || m->precious) {
2576 /* we saved the cost of cleaning this page ! */
2577 vm_page_purged_count++;
2578 }
2579 goto reclaim_page;
2580 }
2581
2582 if (COMPRESSED_PAGER_IS_ACTIVE) {
2583 /*
2584 * With the VM compressor, the cost of
2585 * reclaiming a page is much lower (no I/O),
2586 * so if we find a "volatile" page, it's better
2587 * to let it get compressed rather than letting
2588 * it occupy a full page until it gets purged.
2589 * So no need to check for "volatile" here.
2590 */
2591 } else if (object->purgable == VM_PURGABLE_VOLATILE) {
2592 /*
2593 * Avoid cleaning a "volatile" page which might
2594 * be purged soon.
2595 */
2596
2597 /* if it's wired, we can't put it on our queue */
2598 assert(!VM_PAGE_WIRED(m));
2599
2600 /* just stick it back on! */
2601 reactivated_this_call++;
2602
2603 if (page_prev_state == PAGE_STATE_CLEAN)
2604 vm_pageout_cleaned_volatile_reactivated++;
2605
2606 goto reactivate_page;
2607 }
2608 }
2609
2610 consider_inactive_page:
2611 if (m->busy) {
2612 /*
2613 * CAUTION CAUTION:
2614 * A "busy" page should always be left alone, except...
2615 */
2616 if (m->cleaning && m->encrypted_cleaning) {
2617 /*
2618 * ENCRYPTED_SWAP:
2619 * We could get here with a "busy" page
2620 * if it's being encrypted during a
2621 * "clean-in-place" operation. We'll deal
2622 * with it right away by testing if it has been
2623 * referenced and either reactivating it or
2624 * promoting it from "clean-in-place" to
2625 * "clean-and-free".
2626 */
2627 } else {
2628 panic("\"busy\" page considered for pageout\n");
2629 }
2630 }
2631
2632 /*
2633 * If it's being used, reactivate.
2634 * (Fictitious pages are either busy or absent.)
2635 * First, update the reference and dirty bits
2636 * to make sure the page is unreferenced.
2637 */
2638 refmod_state = -1;
2639
2640 if (m->reference == FALSE && m->pmapped == TRUE) {
2641 refmod_state = pmap_get_refmod(m->phys_page);
2642
2643 if (refmod_state & VM_MEM_REFERENCED)
2644 m->reference = TRUE;
2645 if (refmod_state & VM_MEM_MODIFIED) {
2646 SET_PAGE_DIRTY(m, FALSE);
2647 }
2648 }
2649
2650 /*
2651 * if (m->cleaning && !m->pageout)
2652 * If already cleaning this page in place and it hasn't
2653 * been recently referenced, just pull off the queue.
2654 * We can leave the page mapped, and upl_commit_range
2655 * will put it on the clean queue.
2656 *
2657 * note: if m->encrypted_cleaning == TRUE, then
2658 * m->cleaning == TRUE
2659 * and we'll handle it here
2660 *
2661 * if (m->pageout && !m->cleaning)
2662 * an msync INVALIDATE is in progress...
2663 * this page has been marked for destruction
2664 * after it has been cleaned,
2665 * but not yet gathered into a UPL
2666 * where 'cleaning' will be set...
2667 * just leave it off the paging queues
2668 *
2669 * if (m->pageout && m->clenaing)
2670 * an msync INVALIDATE is in progress
2671 * and the UPL has already gathered this page...
2672 * just leave it off the paging queues
2673 */
2674
2675 /*
2676 * page with m->pageout and still on the queues means that an
2677 * MS_INVALIDATE is in progress on this page... leave it alone
2678 */
2679 if (m->pageout) {
2680 goto done_with_inactivepage;
2681 }
2682
2683 /* if cleaning, reactivate if referenced. otherwise, just pull off queue */
2684 if (m->cleaning) {
2685 if (m->reference == TRUE) {
2686 reactivated_this_call++;
2687 goto reactivate_page;
2688 } else {
2689 goto done_with_inactivepage;
2690 }
2691 }
2692
2693 if (m->reference || m->dirty) {
2694 /* deal with a rogue "reusable" page */
2695 VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m);
2696 }
2697
2698 if (!m->no_cache &&
2699 (m->reference ||
2700 (m->xpmapped && !object->internal && (vm_page_xpmapped_external_count < (vm_page_external_count / 4))))) {
2701 /*
2702 * The page we pulled off the inactive list has
2703 * been referenced. It is possible for other
2704 * processors to be touching pages faster than we
2705 * can clear the referenced bit and traverse the
2706 * inactive queue, so we limit the number of
2707 * reactivations.
2708 */
2709 if (++reactivated_this_call >= reactivate_limit) {
2710 vm_pageout_reactivation_limit_exceeded++;
2711 } else if (catch_up_count) {
2712 vm_pageout_catch_ups++;
2713 } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
2714 vm_pageout_inactive_force_reclaim++;
2715 } else {
2716 uint32_t isinuse;
2717
2718 if (page_prev_state == PAGE_STATE_CLEAN)
2719 vm_pageout_cleaned_reference_reactivated++;
2720
2721 reactivate_page:
2722 if ( !object->internal && object->pager != MEMORY_OBJECT_NULL &&
2723 vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
2724 /*
2725 * no explict mappings of this object exist
2726 * and it's not open via the filesystem
2727 */
2728 vm_page_deactivate(m);
2729 vm_pageout_inactive_deactivated++;
2730 } else {
2731 must_activate_page:
2732 /*
2733 * The page was/is being used, so put back on active list.
2734 */
2735 vm_page_activate(m);
2736 VM_STAT_INCR(reactivations);
2737 inactive_burst_count = 0;
2738 }
2739
2740 if (page_prev_state == PAGE_STATE_CLEAN)
2741 vm_pageout_cleaned_reactivated++;
2742
2743 vm_pageout_inactive_used++;
2744
2745 goto done_with_inactivepage;
2746 }
2747 /*
2748 * Make sure we call pmap_get_refmod() if it
2749 * wasn't already called just above, to update
2750 * the dirty bit.
2751 */
2752 if ((refmod_state == -1) && !m->dirty && m->pmapped) {
2753 refmod_state = pmap_get_refmod(m->phys_page);
2754 if (refmod_state & VM_MEM_MODIFIED) {
2755 SET_PAGE_DIRTY(m, FALSE);
2756 }
2757 }
2758 forced_reclaim = TRUE;
2759 } else {
2760 forced_reclaim = FALSE;
2761 }
2762
2763 XPR(XPR_VM_PAGEOUT,
2764 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
2765 object, m->offset, m, 0,0);
2766
2767 /*
2768 * we've got a candidate page to steal...
2769 *
2770 * m->dirty is up to date courtesy of the
2771 * preceding check for m->reference... if
2772 * we get here, then m->reference had to be
2773 * FALSE (or possibly "reactivate_limit" was
2774 * exceeded), but in either case we called
2775 * pmap_get_refmod() and updated both
2776 * m->reference and m->dirty
2777 *
2778 * if it's dirty or precious we need to
2779 * see if the target queue is throtttled
2780 * it if is, we need to skip over it by moving it back
2781 * to the end of the inactive queue
2782 */
2783
2784 inactive_throttled = FALSE;
2785
2786 if (m->dirty || m->precious) {
2787 if (object->internal) {
2788 if (VM_PAGE_Q_THROTTLED(iq))
2789 inactive_throttled = TRUE;
2790 } else if (VM_PAGE_Q_THROTTLED(eq)) {
2791 inactive_throttled = TRUE;
2792 }
2793 }
2794 throttle_inactive:
2795 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2796 object->internal && m->dirty &&
2797 (object->purgable == VM_PURGABLE_DENY ||
2798 object->purgable == VM_PURGABLE_NONVOLATILE ||
2799 object->purgable == VM_PURGABLE_VOLATILE)) {
2800 vm_page_check_pageable_safe(m);
2801 queue_enter(&vm_page_queue_throttled, m,
2802 vm_page_t, pageq);
2803 m->throttled = TRUE;
2804 vm_page_throttled_count++;
2805
2806 vm_pageout_scan_reclaimed_throttled++;
2807
2808 inactive_burst_count = 0;
2809 goto done_with_inactivepage;
2810 }
2811 if (inactive_throttled == TRUE) {
2812
2813 if (object->internal == FALSE) {
2814 /*
2815 * we need to break up the following potential deadlock case...
2816 * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
2817 * b) The thread doing the writing is waiting for pages while holding the truncate lock
2818 * c) Most of the pages in the inactive queue belong to this file.
2819 *
2820 * we are potentially in this deadlock because...
2821 * a) the external pageout queue is throttled
2822 * b) we're done with the active queue and moved on to the inactive queue
2823 * c) we've got a dirty external page
2824 *
2825 * since we don't know the reason for the external pageout queue being throttled we
2826 * must suspect that we are deadlocked, so move the current page onto the active queue
2827 * in an effort to cause a page from the active queue to 'age' to the inactive queue
2828 *
2829 * if we don't have jetsam configured (i.e. we have a dynamic pager), set
2830 * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
2831 * pool the next time we select a victim page... if we can make enough new free pages,
2832 * the deadlock will break, the external pageout queue will empty and it will no longer
2833 * be throttled
2834 *
2835 * if we have jestam configured, keep a count of the pages reactivated this way so
2836 * that we can try to find clean pages in the active/inactive queues before
2837 * deciding to jetsam a process
2838 */
2839 vm_pageout_scan_inactive_throttled_external++;
2840
2841 vm_page_check_pageable_safe(m);
2842 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2843 m->active = TRUE;
2844 vm_page_active_count++;
2845 vm_page_pageable_external_count++;
2846
2847 vm_pageout_adjust_io_throttles(iq, eq, FALSE);
2848
2849 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
2850 vm_pageout_inactive_external_forced_reactivate_limit--;
2851
2852 if (vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
2853 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
2854 /*
2855 * Possible deadlock scenario so request jetsam action
2856 */
2857 assert(object);
2858 vm_object_unlock(object);
2859 object = VM_OBJECT_NULL;
2860 vm_page_unlock_queues();
2861
2862 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
2863 vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
2864
2865 /* Kill first suitable process */
2866 if (memorystatus_kill_on_VM_page_shortage(FALSE) == FALSE) {
2867 panic("vm_pageout_scan: Jetsam request failed\n");
2868 }
2869
2870 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END, 0, 0, 0, 0);
2871
2872 vm_pageout_inactive_external_forced_jetsam_count++;
2873 vm_page_lock_queues();
2874 delayed_unlock = 1;
2875 }
2876 #else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
2877 force_anonymous = TRUE;
2878 #endif
2879 inactive_burst_count = 0;
2880 goto done_with_inactivepage;
2881 } else {
2882 if (page_prev_state == PAGE_STATE_SPECULATIVE)
2883 page_prev_state = PAGE_STATE_INACTIVE;
2884
2885 vm_pageout_scan_inactive_throttled_internal++;
2886
2887 goto must_activate_page;
2888 }
2889 }
2890
2891 /*
2892 * we've got a page that we can steal...
2893 * eliminate all mappings and make sure
2894 * we have the up-to-date modified state
2895 *
2896 * if we need to do a pmap_disconnect then we
2897 * need to re-evaluate m->dirty since the pmap_disconnect
2898 * provides the true state atomically... the
2899 * page was still mapped up to the pmap_disconnect
2900 * and may have been dirtied at the last microsecond
2901 *
2902 * Note that if 'pmapped' is FALSE then the page is not
2903 * and has not been in any map, so there is no point calling
2904 * pmap_disconnect(). m->dirty could have been set in anticipation
2905 * of likely usage of the page.
2906 */
2907 if (m->pmapped == TRUE) {
2908 int pmap_options;
2909
2910 /*
2911 * Don't count this page as going into the compressor
2912 * if any of these are true:
2913 * 1) We have the dynamic pager i.e. no compressed pager
2914 * 2) Freezer enabled device with a freezer file to
2915 * hold the app data i.e. no compressed pager
2916 * 3) Freezer enabled device with compressed pager
2917 * backend (exclusive use) i.e. most of the VM system
2918 * (including vm_pageout_scan) has no knowledge of
2919 * the compressor
2920 * 4) This page belongs to a file and hence will not be
2921 * sent into the compressor
2922 */
2923 if (DEFAULT_PAGER_IS_ACTIVE ||
2924 DEFAULT_FREEZER_IS_ACTIVE ||
2925 DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS ||
2926 object->internal == FALSE) {
2927 pmap_options = 0;
2928 } else if (m->dirty || m->precious) {
2929 /*
2930 * VM knows that this page is dirty (or
2931 * precious) and needs to be compressed
2932 * rather than freed.
2933 * Tell the pmap layer to count this page
2934 * as "compressed".
2935 */
2936 pmap_options = PMAP_OPTIONS_COMPRESSOR;
2937 } else {
2938 /*
2939 * VM does not know if the page needs to
2940 * be preserved but the pmap layer might tell
2941 * us if any mapping has "modified" it.
2942 * Let's the pmap layer to count this page
2943 * as compressed if and only if it has been
2944 * modified.
2945 */
2946 pmap_options =
2947 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
2948 }
2949 refmod_state = pmap_disconnect_options(m->phys_page,
2950 pmap_options,
2951 NULL);
2952 if (refmod_state & VM_MEM_MODIFIED) {
2953 SET_PAGE_DIRTY(m, FALSE);
2954 }
2955 }
2956 /*
2957 * reset our count of pages that have been reclaimed
2958 * since the last page was 'stolen'
2959 */
2960 inactive_reclaim_run = 0;
2961
2962 /*
2963 * If it's clean and not precious, we can free the page.
2964 */
2965 if (!m->dirty && !m->precious) {
2966
2967 if (page_prev_state == PAGE_STATE_SPECULATIVE)
2968 vm_pageout_speculative_clean++;
2969 else {
2970 if (page_prev_state == PAGE_STATE_ANONYMOUS)
2971 vm_pageout_inactive_anonymous++;
2972 else if (page_prev_state == PAGE_STATE_CLEAN)
2973 vm_pageout_cleaned_reclaimed++;
2974
2975 vm_pageout_inactive_clean++;
2976 }
2977
2978 /*
2979 * OK, at this point we have found a page we are going to free.
2980 */
2981 #if CONFIG_PHANTOM_CACHE
2982 if (!object->internal)
2983 vm_phantom_cache_add_ghost(m);
2984 #endif
2985 goto reclaim_page;
2986 }
2987
2988 /*
2989 * The page may have been dirtied since the last check
2990 * for a throttled target queue (which may have been skipped
2991 * if the page was clean then). With the dirty page
2992 * disconnected here, we can make one final check.
2993 */
2994 if (object->internal) {
2995 if (VM_PAGE_Q_THROTTLED(iq))
2996 inactive_throttled = TRUE;
2997 } else if (VM_PAGE_Q_THROTTLED(eq)) {
2998 inactive_throttled = TRUE;
2999 }
3000
3001 if (inactive_throttled == TRUE)
3002 goto throttle_inactive;
3003
3004 #if VM_PRESSURE_EVENTS
3005 #if CONFIG_JETSAM
3006
3007 /*
3008 * If Jetsam is enabled, then the sending
3009 * of memory pressure notifications is handled
3010 * from the same thread that takes care of high-water
3011 * and other jetsams i.e. the memorystatus_thread.
3012 */
3013
3014 #else /* CONFIG_JETSAM */
3015
3016 vm_pressure_response();
3017
3018 #endif /* CONFIG_JETSAM */
3019 #endif /* VM_PRESSURE_EVENTS */
3020
3021 if (page_prev_state == PAGE_STATE_ANONYMOUS)
3022 vm_pageout_inactive_anonymous++;
3023 if (object->internal)
3024 vm_pageout_inactive_dirty_internal++;
3025 else
3026 vm_pageout_inactive_dirty_external++;
3027
3028 /*
3029 * do NOT set the pageout bit!
3030 * sure, we might need free pages, but this page is going to take time to become free
3031 * anyway, so we may as well put it on the clean queue first and take it from there later
3032 * if necessary. that way, we'll ensure we don't free up too much. -mj
3033 */
3034 vm_pageout_cluster(m, FALSE, FALSE, FALSE);
3035
3036 done_with_inactivepage:
3037
3038 if (delayed_unlock++ > delayed_unlock_limit || try_failed == TRUE) {
3039 boolean_t need_delay = TRUE;
3040
3041 if (object != NULL) {
3042 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
3043 vm_object_unlock(object);
3044 object = NULL;
3045 }
3046 vm_page_unlock_queues();
3047
3048 if (local_freeq) {
3049
3050 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
3051 vm_page_free_count, local_freed, delayed_unlock_limit, 4);
3052
3053 vm_page_free_list(local_freeq, TRUE);
3054
3055 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
3056 vm_page_free_count, local_freed, 0, 4);
3057
3058 local_freeq = NULL;
3059 local_freed = 0;
3060 need_delay = FALSE;
3061 }
3062 vm_consider_waking_compactor_swapper();
3063
3064 vm_page_lock_queues();
3065
3066 if (need_delay == TRUE)
3067 lck_mtx_yield(&vm_page_queue_lock);
3068
3069 delayed_unlock = 1;
3070 }
3071 vm_pageout_considered_page++;
3072
3073 /*
3074 * back to top of pageout scan loop
3075 */
3076 }
3077 }
3078
3079
3080 int vm_page_free_count_init;
3081
3082 void
3083 vm_page_free_reserve(
3084 int pages)
3085 {
3086 int free_after_reserve;
3087
3088 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
3089
3090 if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT))
3091 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT;
3092 else
3093 vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT);
3094
3095 } else {
3096 if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT)
3097 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT;
3098 else
3099 vm_page_free_reserved += pages;
3100 }
3101 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
3102
3103 vm_page_free_min = vm_page_free_reserved +
3104 VM_PAGE_FREE_MIN(free_after_reserve);
3105
3106 if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
3107 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
3108
3109 vm_page_free_target = vm_page_free_reserved +
3110 VM_PAGE_FREE_TARGET(free_after_reserve);
3111
3112 if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
3113 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
3114
3115 if (vm_page_free_target < vm_page_free_min + 5)
3116 vm_page_free_target = vm_page_free_min + 5;
3117
3118 vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 2);
3119 }
3120
3121 /*
3122 * vm_pageout is the high level pageout daemon.
3123 */
3124
3125 void
3126 vm_pageout_continue(void)
3127 {
3128 DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
3129 vm_pageout_scan_event_counter++;
3130
3131 lck_mtx_lock(&vm_page_queue_free_lock);
3132 vm_pageout_running = TRUE;
3133 lck_mtx_unlock(&vm_page_queue_free_lock);
3134
3135 vm_pageout_scan();
3136 /*
3137 * we hold both the vm_page_queue_free_lock
3138 * and the vm_page_queues_lock at this point
3139 */
3140 assert(vm_page_free_wanted == 0);
3141 assert(vm_page_free_wanted_privileged == 0);
3142 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
3143
3144 vm_pageout_running = FALSE;
3145 if (vm_pageout_waiter) {
3146 vm_pageout_waiter = FALSE;
3147 thread_wakeup((event_t)&vm_pageout_waiter);
3148 }
3149
3150 lck_mtx_unlock(&vm_page_queue_free_lock);
3151 vm_page_unlock_queues();
3152
3153 counter(c_vm_pageout_block++);
3154 thread_block((thread_continue_t)vm_pageout_continue);
3155 /*NOTREACHED*/
3156 }
3157
3158 kern_return_t
3159 vm_pageout_wait(uint64_t deadline)
3160 {
3161 kern_return_t kr;
3162
3163 lck_mtx_lock(&vm_page_queue_free_lock);
3164 for (kr = KERN_SUCCESS; vm_pageout_running && (KERN_SUCCESS == kr); ) {
3165 vm_pageout_waiter = TRUE;
3166 if (THREAD_AWAKENED != lck_mtx_sleep_deadline(
3167 &vm_page_queue_free_lock, LCK_SLEEP_DEFAULT,
3168 (event_t) &vm_pageout_waiter, THREAD_UNINT, deadline)) {
3169 kr = KERN_OPERATION_TIMED_OUT;
3170 }
3171 }
3172 lck_mtx_unlock(&vm_page_queue_free_lock);
3173
3174 return (kr);
3175 }
3176
3177
3178 #ifdef FAKE_DEADLOCK
3179
3180 #define FAKE_COUNT 5000
3181
3182 int internal_count = 0;
3183 int fake_deadlock = 0;
3184
3185 #endif
3186
3187 static void
3188 vm_pageout_iothread_continue(struct vm_pageout_queue *q)
3189 {
3190 vm_page_t m = NULL;
3191 vm_object_t object;
3192 vm_object_offset_t offset;
3193 memory_object_t pager;
3194 thread_t self = current_thread();
3195
3196 if ((vm_pageout_internal_iothread != THREAD_NULL)
3197 && (self == vm_pageout_external_iothread )
3198 && (self->options & TH_OPT_VMPRIV))
3199 self->options &= ~TH_OPT_VMPRIV;
3200
3201 vm_page_lockspin_queues();
3202
3203 while ( !queue_empty(&q->pgo_pending) ) {
3204
3205 q->pgo_busy = TRUE;
3206 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
3207 if (m->object->object_slid) {
3208 panic("slid page %p not allowed on this path\n", m);
3209 }
3210 VM_PAGE_CHECK(m);
3211 m->pageout_queue = FALSE;
3212 m->pageq.next = NULL;
3213 m->pageq.prev = NULL;
3214
3215 /*
3216 * grab a snapshot of the object and offset this
3217 * page is tabled in so that we can relookup this
3218 * page after we've taken the object lock - these
3219 * fields are stable while we hold the page queues lock
3220 * but as soon as we drop it, there is nothing to keep
3221 * this page in this object... we hold an activity_in_progress
3222 * on this object which will keep it from terminating
3223 */
3224 object = m->object;
3225 offset = m->offset;
3226
3227 vm_page_unlock_queues();
3228
3229 #ifdef FAKE_DEADLOCK
3230 if (q == &vm_pageout_queue_internal) {
3231 vm_offset_t addr;
3232 int pg_count;
3233
3234 internal_count++;
3235
3236 if ((internal_count == FAKE_COUNT)) {
3237
3238 pg_count = vm_page_free_count + vm_page_free_reserved;
3239
3240 if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) {
3241 kmem_free(kernel_map, addr, PAGE_SIZE * pg_count);
3242 }
3243 internal_count = 0;
3244 fake_deadlock++;
3245 }
3246 }
3247 #endif
3248 vm_object_lock(object);
3249
3250 m = vm_page_lookup(object, offset);
3251
3252 if (m == NULL ||
3253 m->busy || m->cleaning || m->pageout_queue || !m->laundry) {
3254 /*
3255 * it's either the same page that someone else has
3256 * started cleaning (or it's finished cleaning or
3257 * been put back on the pageout queue), or
3258 * the page has been freed or we have found a
3259 * new page at this offset... in all of these cases
3260 * we merely need to release the activity_in_progress
3261 * we took when we put the page on the pageout queue
3262 */
3263 vm_object_activity_end(object);
3264 vm_object_unlock(object);
3265
3266 vm_page_lockspin_queues();
3267 continue;
3268 }
3269 if (!object->pager_initialized) {
3270
3271 /*
3272 * If there is no memory object for the page, create
3273 * one and hand it to the default pager.
3274 */
3275
3276 if (!object->pager_initialized)
3277 vm_object_collapse(object,
3278 (vm_object_offset_t) 0,
3279 TRUE);
3280 if (!object->pager_initialized)
3281 vm_object_pager_create(object);
3282 if (!object->pager_initialized) {
3283 /*
3284 * Still no pager for the object.
3285 * Reactivate the page.
3286 *
3287 * Should only happen if there is no
3288 * default pager.
3289 */
3290 m->pageout = FALSE;
3291
3292 vm_page_lockspin_queues();
3293
3294 vm_pageout_throttle_up(m);
3295 vm_page_activate(m);
3296 vm_pageout_dirty_no_pager++;
3297
3298 vm_page_unlock_queues();
3299
3300 /*
3301 * And we are done with it.
3302 */
3303 vm_object_activity_end(object);
3304 vm_object_unlock(object);
3305
3306 vm_page_lockspin_queues();
3307 continue;
3308 }
3309 }
3310 pager = object->pager;
3311
3312 if (pager == MEMORY_OBJECT_NULL) {
3313 /*
3314 * This pager has been destroyed by either
3315 * memory_object_destroy or vm_object_destroy, and
3316 * so there is nowhere for the page to go.
3317 */
3318 if (m->pageout) {
3319 /*
3320 * Just free the page... VM_PAGE_FREE takes
3321 * care of cleaning up all the state...
3322 * including doing the vm_pageout_throttle_up
3323 */
3324 VM_PAGE_FREE(m);
3325 } else {
3326 vm_page_lockspin_queues();
3327
3328 vm_pageout_throttle_up(m);
3329 vm_page_activate(m);
3330
3331 vm_page_unlock_queues();
3332
3333 /*
3334 * And we are done with it.
3335 */
3336 }
3337 vm_object_activity_end(object);
3338 vm_object_unlock(object);
3339
3340 vm_page_lockspin_queues();
3341 continue;
3342 }
3343 #if 0
3344 /*
3345 * we don't hold the page queue lock
3346 * so this check isn't safe to make
3347 */
3348 VM_PAGE_CHECK(m);
3349 #endif
3350 /*
3351 * give back the activity_in_progress reference we
3352 * took when we queued up this page and replace it
3353 * it with a paging_in_progress reference that will
3354 * also hold the paging offset from changing and
3355 * prevent the object from terminating
3356 */
3357 vm_object_activity_end(object);
3358 vm_object_paging_begin(object);
3359 vm_object_unlock(object);
3360
3361 /*
3362 * Send the data to the pager.
3363 * any pageout clustering happens there
3364 */
3365 memory_object_data_return(pager,
3366 m->offset + object->paging_offset,
3367 PAGE_SIZE,
3368 NULL,
3369 NULL,
3370 FALSE,
3371 FALSE,
3372 0);
3373
3374 vm_object_lock(object);
3375 vm_object_paging_end(object);
3376 vm_object_unlock(object);
3377
3378 vm_pageout_io_throttle();
3379
3380 vm_page_lockspin_queues();
3381 }
3382 q->pgo_busy = FALSE;
3383 q->pgo_idle = TRUE;
3384
3385 assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
3386 vm_page_unlock_queues();
3387
3388 thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) q);
3389 /*NOTREACHED*/
3390 }
3391
3392
3393 static void
3394 vm_pageout_iothread_external_continue(struct vm_pageout_queue *q)
3395 {
3396 vm_page_t m = NULL;
3397 vm_object_t object;
3398 vm_object_offset_t offset;
3399 memory_object_t pager;
3400
3401
3402 if (vm_pageout_internal_iothread != THREAD_NULL)
3403 current_thread()->options &= ~TH_OPT_VMPRIV;
3404
3405 vm_page_lockspin_queues();
3406
3407 while ( !queue_empty(&q->pgo_pending) ) {
3408
3409 q->pgo_busy = TRUE;
3410 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
3411 if (m->object->object_slid) {
3412 panic("slid page %p not allowed on this path\n", m);
3413 }
3414 VM_PAGE_CHECK(m);
3415 m->pageout_queue = FALSE;
3416 m->pageq.next = NULL;
3417 m->pageq.prev = NULL;
3418
3419 /*
3420 * grab a snapshot of the object and offset this
3421 * page is tabled in so that we can relookup this
3422 * page after we've taken the object lock - these
3423 * fields are stable while we hold the page queues lock
3424 * but as soon as we drop it, there is nothing to keep
3425 * this page in this object... we hold an activity_in_progress
3426 * on this object which will keep it from terminating
3427 */
3428 object = m->object;
3429 offset = m->offset;
3430
3431 vm_page_unlock_queues();
3432
3433 vm_object_lock(object);
3434
3435 m = vm_page_lookup(object, offset);
3436
3437 if (m == NULL ||
3438 m->busy || m->cleaning || m->pageout_queue || !m->laundry) {
3439 /*
3440 * it's either the same page that someone else has
3441 * started cleaning (or it's finished cleaning or
3442 * been put back on the pageout queue), or
3443 * the page has been freed or we have found a
3444 * new page at this offset... in all of these cases
3445 * we merely need to release the activity_in_progress
3446 * we took when we put the page on the pageout queue
3447 */
3448 vm_object_activity_end(object);
3449 vm_object_unlock(object);
3450
3451 vm_page_lockspin_queues();
3452 continue;
3453 }
3454 pager = object->pager;
3455
3456 if (pager == MEMORY_OBJECT_NULL) {
3457 /*
3458 * This pager has been destroyed by either
3459 * memory_object_destroy or vm_object_destroy, and
3460 * so there is nowhere for the page to go.
3461 */
3462 if (m->pageout) {
3463 /*
3464 * Just free the page... VM_PAGE_FREE takes
3465 * care of cleaning up all the state...
3466 * including doing the vm_pageout_throttle_up
3467 */
3468 VM_PAGE_FREE(m);
3469 } else {
3470 vm_page_lockspin_queues();
3471
3472 vm_pageout_throttle_up(m);
3473 vm_page_activate(m);
3474
3475 vm_page_unlock_queues();
3476
3477 /*
3478 * And we are done with it.
3479 */
3480 }
3481 vm_object_activity_end(object);
3482 vm_object_unlock(object);
3483
3484 vm_page_lockspin_queues();
3485 continue;
3486 }
3487 #if 0
3488 /*
3489 * we don't hold the page queue lock
3490 * so this check isn't safe to make
3491 */
3492 VM_PAGE_CHECK(m);
3493 #endif
3494 /*
3495 * give back the activity_in_progress reference we
3496 * took when we queued up this page and replace it
3497 * it with a paging_in_progress reference that will
3498 * also hold the paging offset from changing and
3499 * prevent the object from terminating
3500 */
3501 vm_object_activity_end(object);
3502 vm_object_paging_begin(object);
3503 vm_object_unlock(object);
3504
3505 /*
3506 * Send the data to the pager.
3507 * any pageout clustering happens there
3508 */
3509 memory_object_data_return(pager,
3510 m->offset + object->paging_offset,
3511 PAGE_SIZE,
3512 NULL,
3513 NULL,
3514 FALSE,
3515 FALSE,
3516 0);
3517
3518 vm_object_lock(object);
3519 vm_object_paging_end(object);
3520 vm_object_unlock(object);
3521
3522 vm_pageout_io_throttle();
3523
3524 vm_page_lockspin_queues();
3525 }
3526 q->pgo_busy = FALSE;
3527 q->pgo_idle = TRUE;
3528
3529 assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
3530 vm_page_unlock_queues();
3531
3532 thread_block_parameter((thread_continue_t)vm_pageout_iothread_external_continue, (void *) q);
3533 /*NOTREACHED*/
3534 }
3535
3536
3537 uint32_t vm_compressor_failed;
3538
3539 #define MAX_FREE_BATCH 32
3540
3541 static void
3542 vm_pageout_iothread_internal_continue(struct cq *cq)
3543 {
3544 struct vm_pageout_queue *q;
3545 vm_page_t m = NULL;
3546 boolean_t pgo_draining;
3547 vm_page_t local_q;
3548 int local_cnt;
3549 vm_page_t local_freeq = NULL;
3550 int local_freed = 0;
3551 int local_batch_size;
3552
3553
3554 KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0);
3555
3556 q = cq->q;
3557 local_batch_size = q->pgo_maxlaundry / (vm_compressor_thread_count * 2);
3558
3559 #if RECORD_THE_COMPRESSED_DATA
3560 if (q->pgo_laundry)
3561 c_compressed_record_init();
3562 #endif
3563 while (TRUE) {
3564 int pages_left_on_q = 0;
3565
3566 local_cnt = 0;
3567 local_q = NULL;
3568
3569 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_START, 0, 0, 0, 0, 0);
3570
3571 vm_page_lock_queues();
3572
3573 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3574
3575 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START, q->pgo_laundry, 0, 0, 0, 0);
3576
3577 while ( !queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) {
3578
3579 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
3580
3581 VM_PAGE_CHECK(m);
3582
3583 m->pageout_queue = FALSE;
3584 m->pageq.prev = NULL;
3585
3586 m->pageq.next = (queue_entry_t)local_q;
3587 local_q = m;
3588 local_cnt++;
3589 }
3590 if (local_q == NULL)
3591 break;
3592
3593 q->pgo_busy = TRUE;
3594
3595 if ((pgo_draining = q->pgo_draining) == FALSE) {
3596 vm_pageout_throttle_up_batch(q, local_cnt);
3597 pages_left_on_q = q->pgo_laundry;
3598 } else
3599 pages_left_on_q = q->pgo_laundry - local_cnt;
3600
3601 vm_page_unlock_queues();
3602
3603 #if !RECORD_THE_COMPRESSED_DATA
3604 if (pages_left_on_q >= local_batch_size && cq->id < (vm_compressor_thread_count - 1))
3605 thread_wakeup((event_t) ((uintptr_t)&q->pgo_pending + cq->id + 1));
3606 #endif
3607 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, q->pgo_laundry, 0, 0, 0, 0);
3608
3609 while (local_q) {
3610
3611 KERNEL_DEBUG(0xe0400024 | DBG_FUNC_START, local_cnt, 0, 0, 0, 0);
3612
3613 m = local_q;
3614 local_q = (vm_page_t)m->pageq.next;
3615 m->pageq.next = NULL;
3616
3617 if (vm_pageout_compress_page(&cq->current_chead, cq->scratch_buf, m, FALSE) == KERN_SUCCESS) {
3618
3619 m->pageq.next = (queue_entry_t)local_freeq;
3620 local_freeq = m;
3621 local_freed++;
3622
3623 if (local_freed >= MAX_FREE_BATCH) {
3624
3625 vm_page_free_list(local_freeq, TRUE);
3626 local_freeq = NULL;
3627 local_freed = 0;
3628 }
3629 }
3630 #if !CONFIG_JETSAM
3631 while (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3632 kern_return_t wait_result;
3633 int need_wakeup = 0;
3634
3635 if (local_freeq) {
3636 vm_page_free_list(local_freeq, TRUE);
3637
3638 local_freeq = NULL;
3639 local_freed = 0;
3640
3641 continue;
3642 }
3643 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3644
3645 if (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3646
3647 if (vm_page_free_wanted_privileged++ == 0)
3648 need_wakeup = 1;
3649 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT);
3650
3651 lck_mtx_unlock(&vm_page_queue_free_lock);
3652
3653 if (need_wakeup)
3654 thread_wakeup((event_t)&vm_page_free_wanted);
3655
3656 if (wait_result == THREAD_WAITING)
3657
3658 thread_block(THREAD_CONTINUE_NULL);
3659 } else
3660 lck_mtx_unlock(&vm_page_queue_free_lock);
3661 }
3662 #endif
3663 }
3664 if (local_freeq) {
3665 vm_page_free_list(local_freeq, TRUE);
3666
3667 local_freeq = NULL;
3668 local_freed = 0;
3669 }
3670 if (pgo_draining == TRUE) {
3671 vm_page_lockspin_queues();
3672 vm_pageout_throttle_up_batch(q, local_cnt);
3673 vm_page_unlock_queues();
3674 }
3675 }
3676 KERNEL_DEBUG(0xe040000c | DBG_FUNC_START, 0, 0, 0, 0, 0);
3677
3678 /*
3679 * queue lock is held and our q is empty
3680 */
3681 q->pgo_busy = FALSE;
3682 q->pgo_idle = TRUE;
3683
3684 assert_wait((event_t) ((uintptr_t)&q->pgo_pending + cq->id), THREAD_UNINT);
3685 vm_page_unlock_queues();
3686
3687 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3688
3689 thread_block_parameter((thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq);
3690 /*NOTREACHED*/
3691 }
3692
3693
3694
3695 static void
3696 vm_pageout_immediate(vm_page_t m, boolean_t object_locked_by_caller)
3697 {
3698 assert(vm_pageout_immediate_scratch_buf);
3699
3700 if (vm_pageout_compress_page(&vm_pageout_immediate_chead, vm_pageout_immediate_scratch_buf, m, object_locked_by_caller) == KERN_SUCCESS) {
3701
3702 vm_page_free_prepare_object(m, TRUE);
3703 vm_page_release(m);
3704 }
3705 }
3706
3707
3708 kern_return_t
3709 vm_pageout_compress_page(void **current_chead, char *scratch_buf, vm_page_t m, boolean_t object_locked_by_caller)
3710 {
3711 vm_object_t object;
3712 memory_object_t pager;
3713 int compressed_count_delta;
3714 kern_return_t retval;
3715
3716 if (m->object->object_slid) {
3717 panic("slid page %p not allowed on this path\n", m);
3718 }
3719
3720 object = m->object;
3721 pager = object->pager;
3722
3723 if (object_locked_by_caller == FALSE && (!object->pager_initialized || pager == MEMORY_OBJECT_NULL)) {
3724
3725 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START, object, pager, 0, 0, 0);
3726
3727 vm_object_lock(object);
3728
3729 /*
3730 * If there is no memory object for the page, create
3731 * one and hand it to the compression pager.
3732 */
3733
3734 if (!object->pager_initialized)
3735 vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
3736 if (!object->pager_initialized)
3737 vm_object_compressor_pager_create(object);
3738
3739 if (!object->pager_initialized) {
3740 /*
3741 * Still no pager for the object.
3742 * Reactivate the page.
3743 *
3744 * Should only happen if there is no
3745 * compression pager
3746 */
3747 m->pageout = FALSE;
3748 m->laundry = FALSE;
3749 PAGE_WAKEUP_DONE(m);
3750
3751 vm_page_lockspin_queues();
3752 vm_page_activate(m);
3753 vm_pageout_dirty_no_pager++;
3754 vm_page_unlock_queues();
3755
3756 /*
3757 * And we are done with it.
3758 */
3759 vm_object_activity_end(object);
3760 vm_object_unlock(object);
3761
3762 return KERN_FAILURE;
3763 }
3764 pager = object->pager;
3765
3766 if (pager == MEMORY_OBJECT_NULL) {
3767 /*
3768 * This pager has been destroyed by either
3769 * memory_object_destroy or vm_object_destroy, and
3770 * so there is nowhere for the page to go.
3771 */
3772 if (m->pageout) {
3773 /*
3774 * Just free the page... VM_PAGE_FREE takes
3775 * care of cleaning up all the state...
3776 * including doing the vm_pageout_throttle_up
3777 */
3778 VM_PAGE_FREE(m);
3779 } else {
3780 m->laundry = FALSE;
3781 PAGE_WAKEUP_DONE(m);
3782
3783 vm_page_lockspin_queues();
3784 vm_page_activate(m);
3785 vm_page_unlock_queues();
3786
3787 /*
3788 * And we are done with it.
3789 */
3790 }
3791 vm_object_activity_end(object);
3792 vm_object_unlock(object);
3793
3794 return KERN_FAILURE;
3795 }
3796 vm_object_unlock(object);
3797
3798 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END, object, pager, 0, 0, 0);
3799 }
3800 assert(object->pager_initialized && pager != MEMORY_OBJECT_NULL);
3801
3802 if (object_locked_by_caller == FALSE)
3803 assert(object->activity_in_progress > 0);
3804
3805 retval = vm_compressor_pager_put(
3806 pager,
3807 m->offset + object->paging_offset,
3808 m->phys_page,
3809 current_chead,
3810 scratch_buf,
3811 &compressed_count_delta);
3812
3813 if (object_locked_by_caller == FALSE) {
3814 vm_object_lock(object);
3815
3816 assert(object->activity_in_progress > 0);
3817 assert(m->object == object);
3818 }
3819
3820 vm_compressor_pager_count(pager,
3821 compressed_count_delta,
3822 FALSE, /* shared_lock */
3823 object);
3824
3825 m->laundry = FALSE;
3826 m->pageout = FALSE;
3827
3828 if (retval == KERN_SUCCESS) {
3829 /*
3830 * If the object is purgeable, its owner's
3831 * purgeable ledgers will be updated in
3832 * vm_page_remove() but the page still
3833 * contributes to the owner's memory footprint,
3834 * so account for it as such.
3835 */
3836 if (object->purgable != VM_PURGABLE_DENY &&
3837 object->vo_purgeable_owner != NULL) {
3838 /* one more compressed purgeable page */
3839 vm_purgeable_compressed_update(object,
3840 +1);
3841 }
3842 VM_STAT_INCR(compressions);
3843
3844 if (m->tabled)
3845 vm_page_remove(m, TRUE);
3846
3847 } else {
3848 PAGE_WAKEUP_DONE(m);
3849
3850 vm_page_lockspin_queues();
3851
3852 vm_page_activate(m);
3853 vm_compressor_failed++;
3854
3855 vm_page_unlock_queues();
3856 }
3857 if (object_locked_by_caller == FALSE) {
3858 vm_object_activity_end(object);
3859 vm_object_unlock(object);
3860 }
3861 return retval;
3862 }
3863
3864
3865 static void
3866 vm_pageout_adjust_io_throttles(struct vm_pageout_queue *iq, struct vm_pageout_queue *eq, boolean_t req_lowpriority)
3867 {
3868 uint32_t policy;
3869 boolean_t set_iq = FALSE;
3870 boolean_t set_eq = FALSE;
3871
3872 if (hibernate_cleaning_in_progress == TRUE)
3873 req_lowpriority = FALSE;
3874
3875 if ((DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) && iq->pgo_inited == TRUE && iq->pgo_lowpriority != req_lowpriority)
3876 set_iq = TRUE;
3877
3878 if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority)
3879 set_eq = TRUE;
3880
3881 if (set_iq == TRUE || set_eq == TRUE) {
3882
3883 vm_page_unlock_queues();
3884
3885 if (req_lowpriority == TRUE) {
3886 policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED;
3887 DTRACE_VM(laundrythrottle);
3888 } else {
3889 policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED;
3890 DTRACE_VM(laundryunthrottle);
3891 }
3892 if (set_iq == TRUE) {
3893 proc_set_task_policy_thread(kernel_task, iq->pgo_tid, TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
3894
3895 iq->pgo_lowpriority = req_lowpriority;
3896 }
3897 if (set_eq == TRUE) {
3898 proc_set_task_policy_thread(kernel_task, eq->pgo_tid, TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
3899
3900 eq->pgo_lowpriority = req_lowpriority;
3901 }
3902 vm_page_lock_queues();
3903 }
3904 }
3905
3906
3907 static void
3908 vm_pageout_iothread_external(void)
3909 {
3910 thread_t self = current_thread();
3911
3912 self->options |= TH_OPT_VMPRIV;
3913
3914 DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
3915
3916 proc_set_task_policy_thread(kernel_task, self->thread_id, TASK_POLICY_EXTERNAL,
3917 TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
3918
3919 vm_page_lock_queues();
3920
3921 vm_pageout_queue_external.pgo_tid = self->thread_id;
3922 vm_pageout_queue_external.pgo_lowpriority = TRUE;
3923 vm_pageout_queue_external.pgo_inited = TRUE;
3924
3925 vm_page_unlock_queues();
3926
3927 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
3928 vm_pageout_iothread_external_continue(&vm_pageout_queue_external);
3929 else
3930 vm_pageout_iothread_continue(&vm_pageout_queue_external);
3931
3932 /*NOTREACHED*/
3933 }
3934
3935
3936 static void
3937 vm_pageout_iothread_internal(struct cq *cq)
3938 {
3939 thread_t self = current_thread();
3940
3941 self->options |= TH_OPT_VMPRIV;
3942
3943 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) {
3944 DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
3945
3946 proc_set_task_policy_thread(kernel_task, self->thread_id, TASK_POLICY_EXTERNAL,
3947 TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
3948 }
3949 vm_page_lock_queues();
3950
3951 vm_pageout_queue_internal.pgo_tid = self->thread_id;
3952 vm_pageout_queue_internal.pgo_lowpriority = TRUE;
3953 vm_pageout_queue_internal.pgo_inited = TRUE;
3954
3955 vm_page_unlock_queues();
3956
3957 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
3958
3959 if (vm_restricted_to_single_processor == TRUE)
3960 thread_vm_bind_group_add();
3961
3962 vm_pageout_iothread_internal_continue(cq);
3963 } else
3964 vm_pageout_iothread_continue(&vm_pageout_queue_internal);
3965
3966 /*NOTREACHED*/
3967 }
3968
3969 kern_return_t
3970 vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
3971 {
3972 if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) {
3973 return KERN_SUCCESS;
3974 } else {
3975 return KERN_FAILURE; /* Already set */
3976 }
3977 }
3978
3979 extern boolean_t memorystatus_manual_testing_on;
3980 extern unsigned int memorystatus_level;
3981
3982
3983 #if VM_PRESSURE_EVENTS
3984
3985 boolean_t vm_pressure_events_enabled = FALSE;
3986
3987 void
3988 vm_pressure_response(void)
3989 {
3990
3991 vm_pressure_level_t old_level = kVMPressureNormal;
3992 int new_level = -1;
3993
3994 uint64_t available_memory = 0;
3995
3996 if (vm_pressure_events_enabled == FALSE)
3997 return;
3998
3999
4000 available_memory = (((uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY) * 100);
4001
4002
4003 memorystatus_level = (unsigned int) (available_memory / atop_64(max_mem));
4004
4005 if (memorystatus_manual_testing_on) {
4006 return;
4007 }
4008
4009 old_level = memorystatus_vm_pressure_level;
4010
4011 switch (memorystatus_vm_pressure_level) {
4012
4013 case kVMPressureNormal:
4014 {
4015 if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4016 new_level = kVMPressureCritical;
4017 } else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
4018 new_level = kVMPressureWarning;
4019 }
4020 break;
4021 }
4022
4023 case kVMPressureWarning:
4024 case kVMPressureUrgent:
4025 {
4026 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4027 new_level = kVMPressureNormal;
4028 } else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4029 new_level = kVMPressureCritical;
4030 }
4031 break;
4032 }
4033
4034 case kVMPressureCritical:
4035 {
4036 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4037 new_level = kVMPressureNormal;
4038 } else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
4039 new_level = kVMPressureWarning;
4040 }
4041 break;
4042 }
4043
4044 default:
4045 return;
4046 }
4047
4048 if (new_level != -1) {
4049 memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level;
4050
4051 if ((memorystatus_vm_pressure_level != kVMPressureNormal) || (old_level != new_level)) {
4052 if (vm_pressure_thread_running == FALSE) {
4053 thread_wakeup(&vm_pressure_thread);
4054 }
4055
4056 if (old_level != new_level) {
4057 thread_wakeup(&vm_pressure_changed);
4058 }
4059 }
4060 }
4061
4062 }
4063 #endif /* VM_PRESSURE_EVENTS */
4064
4065 kern_return_t
4066 mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) {
4067
4068 #if !VM_PRESSURE_EVENTS
4069
4070 return KERN_FAILURE;
4071
4072 #else /* VM_PRESSURE_EVENTS */
4073
4074 kern_return_t kr = KERN_SUCCESS;
4075
4076 if (pressure_level != NULL) {
4077
4078 vm_pressure_level_t old_level = memorystatus_vm_pressure_level;
4079
4080 if (wait_for_pressure == TRUE) {
4081 wait_result_t wr = 0;
4082
4083 while (old_level == *pressure_level) {
4084 wr = assert_wait((event_t) &vm_pressure_changed,
4085 THREAD_INTERRUPTIBLE);
4086 if (wr == THREAD_WAITING) {
4087 wr = thread_block(THREAD_CONTINUE_NULL);
4088 }
4089 if (wr == THREAD_INTERRUPTED) {
4090 return KERN_ABORTED;
4091 }
4092 if (wr == THREAD_AWAKENED) {
4093
4094 old_level = memorystatus_vm_pressure_level;
4095
4096 if (old_level != *pressure_level) {
4097 break;
4098 }
4099 }
4100 }
4101 }
4102
4103 *pressure_level = old_level;
4104 kr = KERN_SUCCESS;
4105 } else {
4106 kr = KERN_INVALID_ARGUMENT;
4107 }
4108
4109 return kr;
4110 #endif /* VM_PRESSURE_EVENTS */
4111 }
4112
4113 #if VM_PRESSURE_EVENTS
4114 void
4115 vm_pressure_thread(void) {
4116 static boolean_t thread_initialized = FALSE;
4117
4118 if (thread_initialized == TRUE) {
4119 vm_pressure_thread_running = TRUE;
4120 consider_vm_pressure_events();
4121 vm_pressure_thread_running = FALSE;
4122 }
4123
4124 thread_initialized = TRUE;
4125 assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT);
4126 thread_block((thread_continue_t)vm_pressure_thread);
4127 }
4128 #endif /* VM_PRESSURE_EVENTS */
4129
4130
4131 uint32_t vm_pageout_considered_page_last = 0;
4132
4133 /*
4134 * called once per-second via "compute_averages"
4135 */
4136 void
4137 compute_pageout_gc_throttle()
4138 {
4139 if (vm_pageout_considered_page != vm_pageout_considered_page_last) {
4140
4141 vm_pageout_considered_page_last = vm_pageout_considered_page;
4142
4143 thread_wakeup((event_t) &vm_pageout_garbage_collect);
4144 }
4145 }
4146
4147
4148 static void
4149 vm_pageout_garbage_collect(int collect)
4150 {
4151
4152 if (collect) {
4153 boolean_t buf_large_zfree = FALSE;
4154 boolean_t first_try = TRUE;
4155
4156 stack_collect();
4157
4158 consider_machine_collect();
4159 m_drain();
4160
4161 do {
4162 if (consider_buffer_cache_collect != NULL) {
4163 buf_large_zfree = (*consider_buffer_cache_collect)(0);
4164 }
4165 if (first_try == TRUE || buf_large_zfree == TRUE) {
4166 /*
4167 * consider_zone_gc should be last, because the other operations
4168 * might return memory to zones.
4169 */
4170 consider_zone_gc(buf_large_zfree);
4171 }
4172 first_try = FALSE;
4173
4174 } while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
4175
4176 consider_machine_adjust();
4177 }
4178 assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
4179
4180 thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
4181 /*NOTREACHED*/
4182 }
4183
4184
4185 void vm_pageout_reinit_tuneables(void);
4186
4187 void
4188 vm_pageout_reinit_tuneables(void)
4189 {
4190
4191 vm_compressor_minorcompact_threshold_divisor = 18;
4192 vm_compressor_majorcompact_threshold_divisor = 22;
4193 vm_compressor_unthrottle_threshold_divisor = 32;
4194 }
4195
4196
4197 #if VM_PAGE_BUCKETS_CHECK
4198 #if VM_PAGE_FAKE_BUCKETS
4199 extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
4200 #endif /* VM_PAGE_FAKE_BUCKETS */
4201 #endif /* VM_PAGE_BUCKETS_CHECK */
4202
4203 #define FBDP_TEST_COLLAPSE_COMPRESSOR 0
4204 #if FBDP_TEST_COLLAPSE_COMPRESSOR
4205 extern boolean_t vm_object_collapse_compressor_allowed;
4206 #include <IOKit/IOLib.h>
4207 #endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */
4208
4209 #define FBDP_TEST_WIRE_AND_EXTRACT 0
4210 #if FBDP_TEST_WIRE_AND_EXTRACT
4211 extern ledger_template_t task_ledger_template;
4212 #include <mach/mach_vm.h>
4213 extern ppnum_t vm_map_get_phys_page(vm_map_t map,
4214 vm_offset_t offset);
4215 #endif /* FBDP_TEST_WIRE_AND_EXTRACT */
4216
4217
4218 void
4219 vm_set_restrictions()
4220 {
4221 host_basic_info_data_t hinfo;
4222 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4223
4224 #define BSD_HOST 1
4225 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4226
4227 assert(hinfo.max_cpus > 0);
4228
4229 if (hinfo.max_cpus <= 3) {
4230 /*
4231 * on systems with a limited number of CPUS, bind the
4232 * 4 major threads that can free memory and that tend to use
4233 * a fair bit of CPU under pressured conditions to a single processor.
4234 * This insures that these threads don't hog all of the available CPUs
4235 * (important for camera launch), while allowing them to run independently
4236 * w/r to locks... the 4 threads are
4237 * vm_pageout_scan, vm_pageout_iothread_internal (compressor),
4238 * vm_compressor_swap_trigger_thread (minor and major compactions),
4239 * memorystatus_thread (jetsams).
4240 *
4241 * the first time the thread is run, it is responsible for checking the
4242 * state of vm_restricted_to_single_processor, and if TRUE it calls
4243 * thread_bind_master... someday this should be replaced with a group
4244 * scheduling mechanism and KPI.
4245 */
4246 vm_restricted_to_single_processor = TRUE;
4247 }
4248 }
4249
4250
4251 void
4252 vm_pageout(void)
4253 {
4254 thread_t self = current_thread();
4255 thread_t thread;
4256 kern_return_t result;
4257 spl_t s;
4258
4259 /*
4260 * Set thread privileges.
4261 */
4262 s = splsched();
4263
4264 thread_lock(self);
4265 self->options |= TH_OPT_VMPRIV;
4266 sched_set_thread_base_priority(self, BASEPRI_PREEMPT - 1);
4267 thread_unlock(self);
4268
4269 if (!self->reserved_stack)
4270 self->reserved_stack = self->kernel_stack;
4271
4272 if (vm_restricted_to_single_processor == TRUE)
4273 thread_vm_bind_group_add();
4274
4275 splx(s);
4276
4277 /*
4278 * Initialize some paging parameters.
4279 */
4280
4281 if (vm_pageout_swap_wait == 0)
4282 vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT;
4283
4284 if (vm_pageout_idle_wait == 0)
4285 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
4286
4287 if (vm_pageout_burst_wait == 0)
4288 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
4289
4290 if (vm_pageout_empty_wait == 0)
4291 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
4292
4293 if (vm_pageout_deadlock_wait == 0)
4294 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
4295
4296 if (vm_pageout_deadlock_relief == 0)
4297 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
4298
4299 if (vm_pageout_inactive_relief == 0)
4300 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
4301
4302 if (vm_pageout_burst_active_throttle == 0)
4303 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
4304
4305 if (vm_pageout_burst_inactive_throttle == 0)
4306 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
4307
4308 /*
4309 * Set kernel task to low backing store privileged
4310 * status
4311 */
4312 task_lock(kernel_task);
4313 kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
4314 task_unlock(kernel_task);
4315
4316 vm_page_free_count_init = vm_page_free_count;
4317
4318 /*
4319 * even if we've already called vm_page_free_reserve
4320 * call it again here to insure that the targets are
4321 * accurately calculated (it uses vm_page_free_count_init)
4322 * calling it with an arg of 0 will not change the reserve
4323 * but will re-calculate free_min and free_target
4324 */
4325 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
4326 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
4327 } else
4328 vm_page_free_reserve(0);
4329
4330
4331 queue_init(&vm_pageout_queue_external.pgo_pending);
4332 vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
4333 vm_pageout_queue_external.pgo_laundry = 0;
4334 vm_pageout_queue_external.pgo_idle = FALSE;
4335 vm_pageout_queue_external.pgo_busy = FALSE;
4336 vm_pageout_queue_external.pgo_throttled = FALSE;
4337 vm_pageout_queue_external.pgo_draining = FALSE;
4338 vm_pageout_queue_external.pgo_lowpriority = FALSE;
4339 vm_pageout_queue_external.pgo_tid = -1;
4340 vm_pageout_queue_external.pgo_inited = FALSE;
4341
4342 queue_init(&vm_pageout_queue_internal.pgo_pending);
4343 vm_pageout_queue_internal.pgo_maxlaundry = 0;
4344 vm_pageout_queue_internal.pgo_laundry = 0;
4345 vm_pageout_queue_internal.pgo_idle = FALSE;
4346 vm_pageout_queue_internal.pgo_busy = FALSE;
4347 vm_pageout_queue_internal.pgo_throttled = FALSE;
4348 vm_pageout_queue_internal.pgo_draining = FALSE;
4349 vm_pageout_queue_internal.pgo_lowpriority = FALSE;
4350 vm_pageout_queue_internal.pgo_tid = -1;
4351 vm_pageout_queue_internal.pgo_inited = FALSE;
4352
4353 /* internal pageout thread started when default pager registered first time */
4354 /* external pageout and garbage collection threads started here */
4355
4356 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
4357 BASEPRI_PREEMPT - 1,
4358 &vm_pageout_external_iothread);
4359 if (result != KERN_SUCCESS)
4360 panic("vm_pageout_iothread_external: create failed");
4361
4362 thread_deallocate(vm_pageout_external_iothread);
4363
4364 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
4365 BASEPRI_DEFAULT,
4366 &thread);
4367 if (result != KERN_SUCCESS)
4368 panic("vm_pageout_garbage_collect: create failed");
4369
4370 thread_deallocate(thread);
4371
4372 #if VM_PRESSURE_EVENTS
4373 result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL,
4374 BASEPRI_DEFAULT,
4375 &thread);
4376
4377 if (result != KERN_SUCCESS)
4378 panic("vm_pressure_thread: create failed");
4379
4380 thread_deallocate(thread);
4381 #endif
4382
4383 vm_object_reaper_init();
4384
4385 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
4386 vm_compressor_pager_init();
4387
4388 #if VM_PRESSURE_EVENTS
4389 vm_pressure_events_enabled = TRUE;
4390 #endif /* VM_PRESSURE_EVENTS */
4391
4392 #if CONFIG_PHANTOM_CACHE
4393 vm_phantom_cache_init();
4394 #endif
4395 #if VM_PAGE_BUCKETS_CHECK
4396 #if VM_PAGE_FAKE_BUCKETS
4397 printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
4398 (uint64_t) vm_page_fake_buckets_start,
4399 (uint64_t) vm_page_fake_buckets_end);
4400 pmap_protect(kernel_pmap,
4401 vm_page_fake_buckets_start,
4402 vm_page_fake_buckets_end,
4403 VM_PROT_READ);
4404 // *(char *) vm_page_fake_buckets_start = 'x'; /* panic! */
4405 #endif /* VM_PAGE_FAKE_BUCKETS */
4406 #endif /* VM_PAGE_BUCKETS_CHECK */
4407
4408 #if VM_OBJECT_TRACKING
4409 vm_object_tracking_init();
4410 #endif /* VM_OBJECT_TRACKING */
4411
4412
4413 #if FBDP_TEST_COLLAPSE_COMPRESSOR
4414 vm_object_size_t backing_size, top_size;
4415 vm_object_t backing_object, top_object;
4416 vm_map_offset_t backing_offset, top_offset;
4417 unsigned char *backing_address, *top_address;
4418 kern_return_t kr;
4419
4420 printf("FBDP_TEST_COLLAPSE_COMPRESSOR:\n");
4421
4422 /* create backing object */
4423 backing_size = 15 * PAGE_SIZE;
4424 backing_object = vm_object_allocate(backing_size);
4425 assert(backing_object != VM_OBJECT_NULL);
4426 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n",
4427 backing_object);
4428 /* map backing object */
4429 backing_offset = 0;
4430 kr = vm_map_enter(kernel_map, &backing_offset, backing_size, 0,
4431 VM_FLAGS_ANYWHERE, backing_object, 0, FALSE,
4432 VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
4433 assert(kr == KERN_SUCCESS);
4434 backing_address = (unsigned char *) backing_offset;
4435 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4436 "mapped backing object %p at 0x%llx\n",
4437 backing_object, (uint64_t) backing_offset);
4438 /* populate with pages to be compressed in backing object */
4439 backing_address[0x1*PAGE_SIZE] = 0xB1;
4440 backing_address[0x4*PAGE_SIZE] = 0xB4;
4441 backing_address[0x7*PAGE_SIZE] = 0xB7;
4442 backing_address[0xa*PAGE_SIZE] = 0xBA;
4443 backing_address[0xd*PAGE_SIZE] = 0xBD;
4444 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4445 "populated pages to be compressed in "
4446 "backing_object %p\n", backing_object);
4447 /* compress backing object */
4448 vm_object_pageout(backing_object);
4449 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n",
4450 backing_object);
4451 /* wait for all the pages to be gone */
4452 while (*(volatile int *)&backing_object->resident_page_count != 0)
4453 IODelay(10);
4454 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n",
4455 backing_object);
4456 /* populate with pages to be resident in backing object */
4457 backing_address[0x0*PAGE_SIZE] = 0xB0;
4458 backing_address[0x3*PAGE_SIZE] = 0xB3;
4459 backing_address[0x6*PAGE_SIZE] = 0xB6;
4460 backing_address[0x9*PAGE_SIZE] = 0xB9;
4461 backing_address[0xc*PAGE_SIZE] = 0xBC;
4462 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4463 "populated pages to be resident in "
4464 "backing_object %p\n", backing_object);
4465 /* leave the other pages absent */
4466 /* mess with the paging_offset of the backing_object */
4467 assert(backing_object->paging_offset == 0);
4468 backing_object->paging_offset = 0x3000;
4469
4470 /* create top object */
4471 top_size = 9 * PAGE_SIZE;
4472 top_object = vm_object_allocate(top_size);
4473 assert(top_object != VM_OBJECT_NULL);
4474 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created top object %p\n",
4475 top_object);
4476 /* map top object */
4477 top_offset = 0;
4478 kr = vm_map_enter(kernel_map, &top_offset, top_size, 0,
4479 VM_FLAGS_ANYWHERE, top_object, 0, FALSE,
4480 VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
4481 assert(kr == KERN_SUCCESS);
4482 top_address = (unsigned char *) top_offset;
4483 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4484 "mapped top object %p at 0x%llx\n",
4485 top_object, (uint64_t) top_offset);
4486 /* populate with pages to be compressed in top object */
4487 top_address[0x3*PAGE_SIZE] = 0xA3;
4488 top_address[0x4*PAGE_SIZE] = 0xA4;
4489 top_address[0x5*PAGE_SIZE] = 0xA5;
4490 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4491 "populated pages to be compressed in "
4492 "top_object %p\n", top_object);
4493 /* compress top object */
4494 vm_object_pageout(top_object);
4495 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n",
4496 top_object);
4497 /* wait for all the pages to be gone */
4498 while (top_object->resident_page_count != 0);
4499 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n",
4500 top_object);
4501 /* populate with pages to be resident in top object */
4502 top_address[0x0*PAGE_SIZE] = 0xA0;
4503 top_address[0x1*PAGE_SIZE] = 0xA1;
4504 top_address[0x2*PAGE_SIZE] = 0xA2;
4505 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4506 "populated pages to be resident in "
4507 "top_object %p\n", top_object);
4508 /* leave the other pages absent */
4509
4510 /* link the 2 objects */
4511 vm_object_reference(backing_object);
4512 top_object->shadow = backing_object;
4513 top_object->vo_shadow_offset = 0x3000;
4514 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n",
4515 top_object, backing_object);
4516
4517 /* unmap backing object */
4518 vm_map_remove(kernel_map,
4519 backing_offset,
4520 backing_offset + backing_size,
4521 0);
4522 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4523 "unmapped backing_object %p [0x%llx:0x%llx]\n",
4524 backing_object,
4525 (uint64_t) backing_offset,
4526 (uint64_t) (backing_offset + backing_size));
4527
4528 /* collapse */
4529 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object);
4530 vm_object_lock(top_object);
4531 vm_object_collapse(top_object, 0, FALSE);
4532 vm_object_unlock(top_object);
4533 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object);
4534
4535 /* did it work? */
4536 if (top_object->shadow != VM_OBJECT_NULL) {
4537 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: not collapsed\n");
4538 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
4539 if (vm_object_collapse_compressor_allowed) {
4540 panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
4541 }
4542 } else {
4543 /* check the contents of the mapping */
4544 unsigned char expect[9] =
4545 { 0xA0, 0xA1, 0xA2, /* resident in top */
4546 0xA3, 0xA4, 0xA5, /* compressed in top */
4547 0xB9, /* resident in backing + shadow_offset */
4548 0xBD, /* compressed in backing + shadow_offset + paging_offset */
4549 0x00 }; /* absent in both */
4550 unsigned char actual[9];
4551 unsigned int i, errors;
4552
4553 errors = 0;
4554 for (i = 0; i < sizeof (actual); i++) {
4555 actual[i] = (unsigned char) top_address[i*PAGE_SIZE];
4556 if (actual[i] != expect[i]) {
4557 errors++;
4558 }
4559 }
4560 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4561 "actual [%x %x %x %x %x %x %x %x %x] "
4562 "expect [%x %x %x %x %x %x %x %x %x] "
4563 "%d errors\n",
4564 actual[0], actual[1], actual[2], actual[3],
4565 actual[4], actual[5], actual[6], actual[7],
4566 actual[8],
4567 expect[0], expect[1], expect[2], expect[3],
4568 expect[4], expect[5], expect[6], expect[7],
4569 expect[8],
4570 errors);
4571 if (errors) {
4572 panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
4573 } else {
4574 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: PASS\n");
4575 }
4576 }
4577 #endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */
4578
4579 #if FBDP_TEST_WIRE_AND_EXTRACT
4580 ledger_t ledger;
4581 vm_map_t user_map, wire_map;
4582 mach_vm_address_t user_addr, wire_addr;
4583 mach_vm_size_t user_size, wire_size;
4584 mach_vm_offset_t cur_offset;
4585 vm_prot_t cur_prot, max_prot;
4586 ppnum_t user_ppnum, wire_ppnum;
4587 kern_return_t kr;
4588
4589 ledger = ledger_instantiate(task_ledger_template,
4590 LEDGER_CREATE_ACTIVE_ENTRIES);
4591 user_map = vm_map_create(pmap_create(ledger, 0, PMAP_CREATE_64BIT),
4592 0x100000000ULL,
4593 0x200000000ULL,
4594 TRUE);
4595 wire_map = vm_map_create(NULL,
4596 0x100000000ULL,
4597 0x200000000ULL,
4598 TRUE);
4599 user_addr = 0;
4600 user_size = 0x10000;
4601 kr = mach_vm_allocate(user_map,
4602 &user_addr,
4603 user_size,
4604 VM_FLAGS_ANYWHERE);
4605 assert(kr == KERN_SUCCESS);
4606 wire_addr = 0;
4607 wire_size = user_size;
4608 kr = mach_vm_remap(wire_map,
4609 &wire_addr,
4610 wire_size,
4611 0,
4612 VM_FLAGS_ANYWHERE,
4613 user_map,
4614 user_addr,
4615 FALSE,
4616 &cur_prot,
4617 &max_prot,
4618 VM_INHERIT_NONE);
4619 assert(kr == KERN_SUCCESS);
4620 for (cur_offset = 0;
4621 cur_offset < wire_size;
4622 cur_offset += PAGE_SIZE) {
4623 kr = vm_map_wire_and_extract(wire_map,
4624 wire_addr + cur_offset,
4625 VM_PROT_DEFAULT | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK)),
4626 TRUE,
4627 &wire_ppnum);
4628 assert(kr == KERN_SUCCESS);
4629 user_ppnum = vm_map_get_phys_page(user_map,
4630 user_addr + cur_offset);
4631 printf("FBDP_TEST_WIRE_AND_EXTRACT: kr=0x%x "
4632 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
4633 kr,
4634 user_map, user_addr + cur_offset, user_ppnum,
4635 wire_map, wire_addr + cur_offset, wire_ppnum);
4636 if (kr != KERN_SUCCESS ||
4637 wire_ppnum == 0 ||
4638 wire_ppnum != user_ppnum) {
4639 panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n");
4640 }
4641 }
4642 cur_offset -= PAGE_SIZE;
4643 kr = vm_map_wire_and_extract(wire_map,
4644 wire_addr + cur_offset,
4645 VM_PROT_DEFAULT,
4646 TRUE,
4647 &wire_ppnum);
4648 assert(kr == KERN_SUCCESS);
4649 printf("FBDP_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x "
4650 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
4651 kr,
4652 user_map, user_addr + cur_offset, user_ppnum,
4653 wire_map, wire_addr + cur_offset, wire_ppnum);
4654 if (kr != KERN_SUCCESS ||
4655 wire_ppnum == 0 ||
4656 wire_ppnum != user_ppnum) {
4657 panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n");
4658 }
4659
4660 printf("FBDP_TEST_WIRE_AND_EXTRACT: PASS\n");
4661 #endif /* FBDP_TEST_WIRE_AND_EXTRACT */
4662
4663 vm_pageout_continue();
4664
4665 /*
4666 * Unreached code!
4667 *
4668 * The vm_pageout_continue() call above never returns, so the code below is never
4669 * executed. We take advantage of this to declare several DTrace VM related probe
4670 * points that our kernel doesn't have an analog for. These are probe points that
4671 * exist in Solaris and are in the DTrace documentation, so people may have written
4672 * scripts that use them. Declaring the probe points here means their scripts will
4673 * compile and execute which we want for portability of the scripts, but since this
4674 * section of code is never reached, the probe points will simply never fire. Yes,
4675 * this is basically a hack. The problem is the DTrace probe points were chosen with
4676 * Solaris specific VM events in mind, not portability to different VM implementations.
4677 */
4678
4679 DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
4680 DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
4681 DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
4682 DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
4683 DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
4684 DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
4685 DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
4686 /*NOTREACHED*/
4687 }
4688
4689
4690
4691 int vm_compressor_thread_count = 2;
4692
4693 kern_return_t
4694 vm_pageout_internal_start(void)
4695 {
4696 kern_return_t result;
4697 int i;
4698 host_basic_info_data_t hinfo;
4699 int thread_count;
4700
4701
4702 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
4703 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4704 #define BSD_HOST 1
4705 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4706
4707 assert(hinfo.max_cpus > 0);
4708
4709 if (vm_compressor_thread_count >= hinfo.max_cpus)
4710 vm_compressor_thread_count = hinfo.max_cpus - 1;
4711 if (vm_compressor_thread_count <= 0)
4712 vm_compressor_thread_count = 1;
4713 else if (vm_compressor_thread_count > MAX_COMPRESSOR_THREAD_COUNT)
4714 vm_compressor_thread_count = MAX_COMPRESSOR_THREAD_COUNT;
4715
4716 if (vm_compressor_immediate_preferred == TRUE) {
4717 vm_pageout_immediate_chead = NULL;
4718 vm_pageout_immediate_scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
4719
4720 vm_compressor_thread_count = 1;
4721 }
4722 thread_count = vm_compressor_thread_count;
4723
4724 vm_pageout_queue_internal.pgo_maxlaundry = (vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
4725 } else {
4726 vm_compressor_thread_count = 0;
4727 thread_count = 1;
4728 vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
4729 }
4730
4731 for (i = 0; i < vm_compressor_thread_count; i++) {
4732 ciq[i].id = i;
4733 ciq[i].q = &vm_pageout_queue_internal;
4734 ciq[i].current_chead = NULL;
4735 ciq[i].scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
4736 }
4737 for (i = 0; i < thread_count; i++) {
4738 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i], BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread);
4739
4740 if (result == KERN_SUCCESS)
4741 thread_deallocate(vm_pageout_internal_iothread);
4742 else
4743 break;
4744 }
4745 return result;
4746 }
4747
4748 #if CONFIG_IOSCHED
4749 /*
4750 * To support I/O Expedite for compressed files we mark the upls with special flags.
4751 * The way decmpfs works is that we create a big upl which marks all the pages needed to
4752 * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
4753 * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
4754 * being held in the big original UPL. We mark each of these smaller UPLs with the flag
4755 * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
4756 * decmp_io_upl field (in the upl structure). This link is protected in the forward direction
4757 * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
4758 * unless the real I/O upl is being destroyed).
4759 */
4760
4761
4762 static void
4763 upl_set_decmp_info(upl_t upl, upl_t src_upl)
4764 {
4765 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4766
4767 upl_lock(src_upl);
4768 if (src_upl->decmp_io_upl) {
4769 /*
4770 * If there is already an alive real I/O UPL, ignore this new UPL.
4771 * This case should rarely happen and even if it does, it just means
4772 * that we might issue a spurious expedite which the driver is expected
4773 * to handle.
4774 */
4775 upl_unlock(src_upl);
4776 return;
4777 }
4778 src_upl->decmp_io_upl = (void *)upl;
4779 src_upl->ref_count++;
4780
4781 upl->flags |= UPL_DECMP_REAL_IO;
4782 upl->decmp_io_upl = (void *)src_upl;
4783 upl_unlock(src_upl);
4784 }
4785 #endif /* CONFIG_IOSCHED */
4786
4787 #if UPL_DEBUG
4788 int upl_debug_enabled = 1;
4789 #else
4790 int upl_debug_enabled = 0;
4791 #endif
4792
4793 static upl_t
4794 upl_create(int type, int flags, upl_size_t size)
4795 {
4796 upl_t upl;
4797 vm_size_t page_field_size = 0;
4798 int upl_flags = 0;
4799 vm_size_t upl_size = sizeof(struct upl);
4800
4801 size = round_page_32(size);
4802
4803 if (type & UPL_CREATE_LITE) {
4804 page_field_size = (atop(size) + 7) >> 3;
4805 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4806
4807 upl_flags |= UPL_LITE;
4808 }
4809 if (type & UPL_CREATE_INTERNAL) {
4810 upl_size += sizeof(struct upl_page_info) * atop(size);
4811
4812 upl_flags |= UPL_INTERNAL;
4813 }
4814 upl = (upl_t)kalloc(upl_size + page_field_size);
4815
4816 if (page_field_size)
4817 bzero((char *)upl + upl_size, page_field_size);
4818
4819 upl->flags = upl_flags | flags;
4820 upl->src_object = NULL;
4821 upl->kaddr = (vm_offset_t)0;
4822 upl->size = 0;
4823 upl->map_object = NULL;
4824 upl->ref_count = 1;
4825 upl->ext_ref_count = 0;
4826 upl->highest_page = 0;
4827 upl_lock_init(upl);
4828 upl->vector_upl = NULL;
4829 upl->associated_upl = NULL;
4830 #if CONFIG_IOSCHED
4831 if (type & UPL_CREATE_IO_TRACKING) {
4832 upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
4833 }
4834
4835 upl->upl_reprio_info = 0;
4836 upl->decmp_io_upl = 0;
4837 if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) {
4838 /* Only support expedite on internal UPLs */
4839 thread_t curthread = current_thread();
4840 upl->upl_reprio_info = (uint64_t *)kalloc(sizeof(uint64_t) * atop(size));
4841 bzero(upl->upl_reprio_info, (sizeof(uint64_t) * atop(size)));
4842 upl->flags |= UPL_EXPEDITE_SUPPORTED;
4843 if (curthread->decmp_upl != NULL)
4844 upl_set_decmp_info(upl, curthread->decmp_upl);
4845 }
4846 #endif
4847 #if CONFIG_IOSCHED || UPL_DEBUG
4848 if ((type & UPL_CREATE_IO_TRACKING) || upl_debug_enabled) {
4849 upl->upl_creator = current_thread();
4850 upl->uplq.next = 0;
4851 upl->uplq.prev = 0;
4852 upl->flags |= UPL_TRACKED_BY_OBJECT;
4853 }
4854 #endif
4855
4856 #if UPL_DEBUG
4857 upl->ubc_alias1 = 0;
4858 upl->ubc_alias2 = 0;
4859
4860 upl->upl_state = 0;
4861 upl->upl_commit_index = 0;
4862 bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records));
4863
4864 (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES);
4865 #endif /* UPL_DEBUG */
4866
4867 return(upl);
4868 }
4869
4870 static void
4871 upl_destroy(upl_t upl)
4872 {
4873 int page_field_size; /* bit field in word size buf */
4874 int size;
4875
4876 if (upl->ext_ref_count) {
4877 panic("upl(%p) ext_ref_count", upl);
4878 }
4879
4880 #if CONFIG_IOSCHED
4881 if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) {
4882 upl_t src_upl;
4883 src_upl = upl->decmp_io_upl;
4884 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4885 upl_lock(src_upl);
4886 src_upl->decmp_io_upl = NULL;
4887 upl_unlock(src_upl);
4888 upl_deallocate(src_upl);
4889 }
4890 #endif /* CONFIG_IOSCHED */
4891
4892 #if CONFIG_IOSCHED || UPL_DEBUG
4893 if ((upl->flags & UPL_TRACKED_BY_OBJECT) && !(upl->flags & UPL_VECTOR)) {
4894 vm_object_t object;
4895
4896 if (upl->flags & UPL_SHADOWED) {
4897 object = upl->map_object->shadow;
4898 } else {
4899 object = upl->map_object;
4900 }
4901
4902 vm_object_lock(object);
4903 queue_remove(&object->uplq, upl, upl_t, uplq);
4904 vm_object_activity_end(object);
4905 vm_object_collapse(object, 0, TRUE);
4906 vm_object_unlock(object);
4907 }
4908 #endif
4909 /*
4910 * drop a reference on the map_object whether or
4911 * not a pageout object is inserted
4912 */
4913 if (upl->flags & UPL_SHADOWED)
4914 vm_object_deallocate(upl->map_object);
4915
4916 if (upl->flags & UPL_DEVICE_MEMORY)
4917 size = PAGE_SIZE;
4918 else
4919 size = upl->size;
4920 page_field_size = 0;
4921
4922 if (upl->flags & UPL_LITE) {
4923 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4924 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4925 }
4926 upl_lock_destroy(upl);
4927 upl->vector_upl = (vector_upl_t) 0xfeedbeef;
4928
4929 #if CONFIG_IOSCHED
4930 if (upl->flags & UPL_EXPEDITE_SUPPORTED)
4931 kfree(upl->upl_reprio_info, sizeof(uint64_t) * (size/PAGE_SIZE));
4932 #endif
4933
4934 if (upl->flags & UPL_INTERNAL) {
4935 kfree(upl,
4936 sizeof(struct upl) +
4937 (sizeof(struct upl_page_info) * (size/PAGE_SIZE))
4938 + page_field_size);
4939 } else {
4940 kfree(upl, sizeof(struct upl) + page_field_size);
4941 }
4942 }
4943
4944 void
4945 upl_deallocate(upl_t upl)
4946 {
4947 upl_lock(upl);
4948 if (--upl->ref_count == 0) {
4949 if(vector_upl_is_valid(upl))
4950 vector_upl_deallocate(upl);
4951 upl_unlock(upl);
4952 upl_destroy(upl);
4953 }
4954 else
4955 upl_unlock(upl);
4956 }
4957
4958 #if CONFIG_IOSCHED
4959 void
4960 upl_mark_decmp(upl_t upl)
4961 {
4962 if (upl->flags & UPL_TRACKED_BY_OBJECT) {
4963 upl->flags |= UPL_DECMP_REQ;
4964 upl->upl_creator->decmp_upl = (void *)upl;
4965 }
4966 }
4967
4968 void
4969 upl_unmark_decmp(upl_t upl)
4970 {
4971 if(upl && (upl->flags & UPL_DECMP_REQ)) {
4972 upl->upl_creator->decmp_upl = NULL;
4973 }
4974 }
4975
4976 #endif /* CONFIG_IOSCHED */
4977
4978 #define VM_PAGE_Q_BACKING_UP(q) \
4979 ((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
4980
4981 boolean_t must_throttle_writes(void);
4982
4983 boolean_t
4984 must_throttle_writes()
4985 {
4986 if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) &&
4987 vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10)
4988 return (TRUE);
4989
4990 return (FALSE);
4991 }
4992
4993
4994 #if DEVELOPMENT || DEBUG
4995 /*/*
4996 * Statistics about UPL enforcement of copy-on-write obligations.
4997 */
4998 unsigned long upl_cow = 0;
4999 unsigned long upl_cow_again = 0;
5000 unsigned long upl_cow_pages = 0;
5001 unsigned long upl_cow_again_pages = 0;
5002
5003 unsigned long iopl_cow = 0;
5004 unsigned long iopl_cow_pages = 0;
5005 #endif
5006
5007 /*
5008 * Routine: vm_object_upl_request
5009 * Purpose:
5010 * Cause the population of a portion of a vm_object.
5011 * Depending on the nature of the request, the pages
5012 * returned may be contain valid data or be uninitialized.
5013 * A page list structure, listing the physical pages
5014 * will be returned upon request.
5015 * This function is called by the file system or any other
5016 * supplier of backing store to a pager.
5017 * IMPORTANT NOTE: The caller must still respect the relationship
5018 * between the vm_object and its backing memory object. The
5019 * caller MUST NOT substitute changes in the backing file
5020 * without first doing a memory_object_lock_request on the
5021 * target range unless it is know that the pages are not
5022 * shared with another entity at the pager level.
5023 * Copy_in_to:
5024 * if a page list structure is present
5025 * return the mapped physical pages, where a
5026 * page is not present, return a non-initialized
5027 * one. If the no_sync bit is turned on, don't
5028 * call the pager unlock to synchronize with other
5029 * possible copies of the page. Leave pages busy
5030 * in the original object, if a page list structure
5031 * was specified. When a commit of the page list
5032 * pages is done, the dirty bit will be set for each one.
5033 * Copy_out_from:
5034 * If a page list structure is present, return
5035 * all mapped pages. Where a page does not exist
5036 * map a zero filled one. Leave pages busy in
5037 * the original object. If a page list structure
5038 * is not specified, this call is a no-op.
5039 *
5040 * Note: access of default pager objects has a rather interesting
5041 * twist. The caller of this routine, presumably the file system
5042 * page cache handling code, will never actually make a request
5043 * against a default pager backed object. Only the default
5044 * pager will make requests on backing store related vm_objects
5045 * In this way the default pager can maintain the relationship
5046 * between backing store files (abstract memory objects) and
5047 * the vm_objects (cache objects), they support.
5048 *
5049 */
5050
5051 __private_extern__ kern_return_t
5052 vm_object_upl_request(
5053 vm_object_t object,
5054 vm_object_offset_t offset,
5055 upl_size_t size,
5056 upl_t *upl_ptr,
5057 upl_page_info_array_t user_page_list,
5058 unsigned int *page_list_count,
5059 upl_control_flags_t cntrl_flags)
5060 {
5061 vm_page_t dst_page = VM_PAGE_NULL;
5062 vm_object_offset_t dst_offset;
5063 upl_size_t xfer_size;
5064 unsigned int size_in_pages;
5065 boolean_t dirty;
5066 boolean_t hw_dirty;
5067 upl_t upl = NULL;
5068 unsigned int entry;
5069 #if MACH_CLUSTER_STATS
5070 boolean_t encountered_lrp = FALSE;
5071 #endif
5072 vm_page_t alias_page = NULL;
5073 int refmod_state = 0;
5074 wpl_array_t lite_list = NULL;
5075 vm_object_t last_copy_object;
5076 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
5077 struct vm_page_delayed_work *dwp;
5078 int dw_count;
5079 int dw_limit;
5080 int io_tracking_flag = 0;
5081
5082 if (cntrl_flags & ~UPL_VALID_FLAGS) {
5083 /*
5084 * For forward compatibility's sake,
5085 * reject any unknown flag.
5086 */
5087 return KERN_INVALID_VALUE;
5088 }
5089 if ( (!object->internal) && (object->paging_offset != 0) )
5090 panic("vm_object_upl_request: external object with non-zero paging offset\n");
5091 if (object->phys_contiguous)
5092 panic("vm_object_upl_request: contiguous object specified\n");
5093
5094
5095 if (size > MAX_UPL_SIZE_BYTES)
5096 size = MAX_UPL_SIZE_BYTES;
5097
5098 if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
5099 *page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
5100
5101 #if CONFIG_IOSCHED || UPL_DEBUG
5102 if (object->io_tracking || upl_debug_enabled)
5103 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
5104 #endif
5105 #if CONFIG_IOSCHED
5106 if (object->io_tracking)
5107 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
5108 #endif
5109
5110 if (cntrl_flags & UPL_SET_INTERNAL) {
5111 if (cntrl_flags & UPL_SET_LITE) {
5112
5113 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
5114
5115 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
5116 lite_list = (wpl_array_t)
5117 (((uintptr_t)user_page_list) +
5118 ((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
5119 if (size == 0) {
5120 user_page_list = NULL;
5121 lite_list = NULL;
5122 }
5123 } else {
5124 upl = upl_create(UPL_CREATE_INTERNAL | io_tracking_flag, 0, size);
5125
5126 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
5127 if (size == 0) {
5128 user_page_list = NULL;
5129 }
5130 }
5131 } else {
5132 if (cntrl_flags & UPL_SET_LITE) {
5133
5134 upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
5135
5136 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
5137 if (size == 0) {
5138 lite_list = NULL;
5139 }
5140 } else {
5141 upl = upl_create(UPL_CREATE_EXTERNAL | io_tracking_flag, 0, size);
5142 }
5143 }
5144 *upl_ptr = upl;
5145
5146 if (user_page_list)
5147 user_page_list[0].device = FALSE;
5148
5149 if (cntrl_flags & UPL_SET_LITE) {
5150 upl->map_object = object;
5151 } else {
5152 upl->map_object = vm_object_allocate(size);
5153 /*
5154 * No neeed to lock the new object: nobody else knows
5155 * about it yet, so it's all ours so far.
5156 */
5157 upl->map_object->shadow = object;
5158 upl->map_object->pageout = TRUE;
5159 upl->map_object->can_persist = FALSE;
5160 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
5161 upl->map_object->vo_shadow_offset = offset;
5162 upl->map_object->wimg_bits = object->wimg_bits;
5163
5164 VM_PAGE_GRAB_FICTITIOUS(alias_page);
5165
5166 upl->flags |= UPL_SHADOWED;
5167 }
5168 /*
5169 * ENCRYPTED SWAP:
5170 * Just mark the UPL as "encrypted" here.
5171 * We'll actually encrypt the pages later,
5172 * in upl_encrypt(), when the caller has
5173 * selected which pages need to go to swap.
5174 */
5175 if (cntrl_flags & UPL_ENCRYPT)
5176 upl->flags |= UPL_ENCRYPTED;
5177
5178 if (cntrl_flags & UPL_FOR_PAGEOUT)
5179 upl->flags |= UPL_PAGEOUT;
5180
5181 vm_object_lock(object);
5182 vm_object_activity_begin(object);
5183
5184 /*
5185 * we can lock in the paging_offset once paging_in_progress is set
5186 */
5187 upl->size = size;
5188 upl->offset = offset + object->paging_offset;
5189
5190 #if CONFIG_IOSCHED || UPL_DEBUG
5191 if (object->io_tracking || upl_debug_enabled) {
5192 vm_object_activity_begin(object);
5193 queue_enter(&object->uplq, upl, upl_t, uplq);
5194 }
5195 #endif
5196 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
5197 /*
5198 * Honor copy-on-write obligations
5199 *
5200 * The caller is gathering these pages and
5201 * might modify their contents. We need to
5202 * make sure that the copy object has its own
5203 * private copies of these pages before we let
5204 * the caller modify them.
5205 */
5206 vm_object_update(object,
5207 offset,
5208 size,
5209 NULL,
5210 NULL,
5211 FALSE, /* should_return */
5212 MEMORY_OBJECT_COPY_SYNC,
5213 VM_PROT_NO_CHANGE);
5214 #if DEVELOPMENT || DEBUG
5215 upl_cow++;
5216 upl_cow_pages += size >> PAGE_SHIFT;
5217 #endif
5218 }
5219 /*
5220 * remember which copy object we synchronized with
5221 */
5222 last_copy_object = object->copy;
5223 entry = 0;
5224
5225 xfer_size = size;
5226 dst_offset = offset;
5227 size_in_pages = size / PAGE_SIZE;
5228
5229 dwp = &dw_array[0];
5230 dw_count = 0;
5231 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
5232
5233 if (vm_page_free_count > (vm_page_free_target + size_in_pages) ||
5234 object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT))
5235 object->scan_collisions = 0;
5236
5237 if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
5238 boolean_t isSSD = FALSE;
5239
5240 vnode_pager_get_isSSD(object->pager, &isSSD);
5241 vm_object_unlock(object);
5242
5243 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5244
5245 if (isSSD == TRUE)
5246 delay(1000 * size_in_pages);
5247 else
5248 delay(5000 * size_in_pages);
5249 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5250
5251 vm_object_lock(object);
5252 }
5253
5254 while (xfer_size) {
5255
5256 dwp->dw_mask = 0;
5257
5258 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
5259 vm_object_unlock(object);
5260 VM_PAGE_GRAB_FICTITIOUS(alias_page);
5261 vm_object_lock(object);
5262 }
5263 if (cntrl_flags & UPL_COPYOUT_FROM) {
5264 upl->flags |= UPL_PAGE_SYNC_DONE;
5265
5266 if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
5267 dst_page->fictitious ||
5268 dst_page->absent ||
5269 dst_page->error ||
5270 dst_page->cleaning ||
5271 (VM_PAGE_WIRED(dst_page))) {
5272
5273 if (user_page_list)
5274 user_page_list[entry].phys_addr = 0;
5275
5276 goto try_next_page;
5277 }
5278 /*
5279 * grab this up front...
5280 * a high percentange of the time we're going to
5281 * need the hardware modification state a bit later
5282 * anyway... so we can eliminate an extra call into
5283 * the pmap layer by grabbing it here and recording it
5284 */
5285 if (dst_page->pmapped)
5286 refmod_state = pmap_get_refmod(dst_page->phys_page);
5287 else
5288 refmod_state = 0;
5289
5290 if ( (refmod_state & VM_MEM_REFERENCED) && dst_page->inactive ) {
5291 /*
5292 * page is on inactive list and referenced...
5293 * reactivate it now... this gets it out of the
5294 * way of vm_pageout_scan which would have to
5295 * reactivate it upon tripping over it
5296 */
5297 dwp->dw_mask |= DW_vm_page_activate;
5298 }
5299 if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
5300 /*
5301 * we're only asking for DIRTY pages to be returned
5302 */
5303 if (dst_page->laundry || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
5304 /*
5305 * if we were the page stolen by vm_pageout_scan to be
5306 * cleaned (as opposed to a buddy being clustered in
5307 * or this request is not being driven by a PAGEOUT cluster
5308 * then we only need to check for the page being dirty or
5309 * precious to decide whether to return it
5310 */
5311 if (dst_page->dirty || dst_page->precious || (refmod_state & VM_MEM_MODIFIED))
5312 goto check_busy;
5313 goto dont_return;
5314 }
5315 /*
5316 * this is a request for a PAGEOUT cluster and this page
5317 * is merely along for the ride as a 'buddy'... not only
5318 * does it have to be dirty to be returned, but it also
5319 * can't have been referenced recently...
5320 */
5321 if ( (hibernate_cleaning_in_progress == TRUE ||
5322 (!((refmod_state & VM_MEM_REFERENCED) || dst_page->reference) || dst_page->throttled)) &&
5323 ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) {
5324 goto check_busy;
5325 }
5326 dont_return:
5327 /*
5328 * if we reach here, we're not to return
5329 * the page... go on to the next one
5330 */
5331 if (dst_page->laundry == TRUE) {
5332 /*
5333 * if we get here, the page is not 'cleaning' (filtered out above).
5334 * since it has been referenced, remove it from the laundry
5335 * so we don't pay the cost of an I/O to clean a page
5336 * we're just going to take back
5337 */
5338 vm_page_lockspin_queues();
5339
5340 vm_pageout_steal_laundry(dst_page, TRUE);
5341 vm_page_activate(dst_page);
5342
5343 vm_page_unlock_queues();
5344 }
5345 if (user_page_list)
5346 user_page_list[entry].phys_addr = 0;
5347
5348 goto try_next_page;
5349 }
5350 check_busy:
5351 if (dst_page->busy) {
5352 if (cntrl_flags & UPL_NOBLOCK) {
5353 if (user_page_list)
5354 user_page_list[entry].phys_addr = 0;
5355
5356 goto try_next_page;
5357 }
5358 /*
5359 * someone else is playing with the
5360 * page. We will have to wait.
5361 */
5362 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5363
5364 continue;
5365 }
5366 /*
5367 * ENCRYPTED SWAP:
5368 * The caller is gathering this page and might
5369 * access its contents later on. Decrypt the
5370 * page before adding it to the UPL, so that
5371 * the caller never sees encrypted data.
5372 */
5373 if (! (cntrl_flags & UPL_ENCRYPT) && dst_page->encrypted) {
5374 int was_busy;
5375
5376 /*
5377 * save the current state of busy
5378 * mark page as busy while decrypt
5379 * is in progress since it will drop
5380 * the object lock...
5381 */
5382 was_busy = dst_page->busy;
5383 dst_page->busy = TRUE;
5384
5385 vm_page_decrypt(dst_page, 0);
5386 vm_page_decrypt_for_upl_counter++;
5387 /*
5388 * restore to original busy state
5389 */
5390 dst_page->busy = was_busy;
5391 }
5392 if (dst_page->pageout_queue == TRUE) {
5393
5394 vm_page_lockspin_queues();
5395
5396 if (dst_page->pageout_queue == TRUE) {
5397 /*
5398 * we've buddied up a page for a clustered pageout
5399 * that has already been moved to the pageout
5400 * queue by pageout_scan... we need to remove
5401 * it from the queue and drop the laundry count
5402 * on that queue
5403 */
5404 vm_pageout_throttle_up(dst_page);
5405 }
5406 vm_page_unlock_queues();
5407 }
5408 #if MACH_CLUSTER_STATS
5409 /*
5410 * pageout statistics gathering. count
5411 * all the pages we will page out that
5412 * were not counted in the initial
5413 * vm_pageout_scan work
5414 */
5415 if (dst_page->pageout)
5416 encountered_lrp = TRUE;
5417 if ((dst_page->dirty || (dst_page->object->internal && dst_page->precious))) {
5418 if (encountered_lrp)
5419 CLUSTER_STAT(pages_at_higher_offsets++;)
5420 else
5421 CLUSTER_STAT(pages_at_lower_offsets++;)
5422 }
5423 #endif
5424 hw_dirty = refmod_state & VM_MEM_MODIFIED;
5425 dirty = hw_dirty ? TRUE : dst_page->dirty;
5426
5427 if (dst_page->phys_page > upl->highest_page)
5428 upl->highest_page = dst_page->phys_page;
5429
5430 assert (!pmap_is_noencrypt(dst_page->phys_page));
5431
5432 if (cntrl_flags & UPL_SET_LITE) {
5433 unsigned int pg_num;
5434
5435 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5436 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5437 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5438
5439 if (hw_dirty)
5440 pmap_clear_modify(dst_page->phys_page);
5441
5442 /*
5443 * Mark original page as cleaning
5444 * in place.
5445 */
5446 dst_page->cleaning = TRUE;
5447 dst_page->precious = FALSE;
5448 } else {
5449 /*
5450 * use pageclean setup, it is more
5451 * convenient even for the pageout
5452 * cases here
5453 */
5454 vm_object_lock(upl->map_object);
5455 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5456 vm_object_unlock(upl->map_object);
5457
5458 alias_page->absent = FALSE;
5459 alias_page = NULL;
5460 }
5461 #if MACH_PAGEMAP
5462 /*
5463 * Record that this page has been
5464 * written out
5465 */
5466 vm_external_state_set(object->existence_map, dst_page->offset);
5467 #endif /*MACH_PAGEMAP*/
5468 if (dirty) {
5469 SET_PAGE_DIRTY(dst_page, FALSE);
5470 } else {
5471 dst_page->dirty = FALSE;
5472 }
5473
5474 if (!dirty)
5475 dst_page->precious = TRUE;
5476
5477 if ( (cntrl_flags & UPL_ENCRYPT) ) {
5478 /*
5479 * ENCRYPTED SWAP:
5480 * We want to deny access to the target page
5481 * because its contents are about to be
5482 * encrypted and the user would be very
5483 * confused to see encrypted data instead
5484 * of their data.
5485 * We also set "encrypted_cleaning" to allow
5486 * vm_pageout_scan() to demote that page
5487 * from "adjacent/clean-in-place" to
5488 * "target/clean-and-free" if it bumps into
5489 * this page during its scanning while we're
5490 * still processing this cluster.
5491 */
5492 dst_page->busy = TRUE;
5493 dst_page->encrypted_cleaning = TRUE;
5494 }
5495 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
5496 if ( !VM_PAGE_WIRED(dst_page))
5497 dst_page->pageout = TRUE;
5498 }
5499 } else {
5500 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
5501 /*
5502 * Honor copy-on-write obligations
5503 *
5504 * The copy object has changed since we
5505 * last synchronized for copy-on-write.
5506 * Another copy object might have been
5507 * inserted while we released the object's
5508 * lock. Since someone could have seen the
5509 * original contents of the remaining pages
5510 * through that new object, we have to
5511 * synchronize with it again for the remaining
5512 * pages only. The previous pages are "busy"
5513 * so they can not be seen through the new
5514 * mapping. The new mapping will see our
5515 * upcoming changes for those previous pages,
5516 * but that's OK since they couldn't see what
5517 * was there before. It's just a race anyway
5518 * and there's no guarantee of consistency or
5519 * atomicity. We just don't want new mappings
5520 * to see both the *before* and *after* pages.
5521 */
5522 if (object->copy != VM_OBJECT_NULL) {
5523 vm_object_update(
5524 object,
5525 dst_offset,/* current offset */
5526 xfer_size, /* remaining size */
5527 NULL,
5528 NULL,
5529 FALSE, /* should_return */
5530 MEMORY_OBJECT_COPY_SYNC,
5531 VM_PROT_NO_CHANGE);
5532
5533 #if DEVELOPMENT || DEBUG
5534 upl_cow_again++;
5535 upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
5536 #endif
5537 }
5538 /*
5539 * remember the copy object we synced with
5540 */
5541 last_copy_object = object->copy;
5542 }
5543 dst_page = vm_page_lookup(object, dst_offset);
5544
5545 if (dst_page != VM_PAGE_NULL) {
5546
5547 if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5548 /*
5549 * skip over pages already present in the cache
5550 */
5551 if (user_page_list)
5552 user_page_list[entry].phys_addr = 0;
5553
5554 goto try_next_page;
5555 }
5556 if (dst_page->fictitious) {
5557 panic("need corner case for fictitious page");
5558 }
5559
5560 if (dst_page->busy || dst_page->cleaning) {
5561 /*
5562 * someone else is playing with the
5563 * page. We will have to wait.
5564 */
5565 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5566
5567 continue;
5568 }
5569 if (dst_page->laundry) {
5570 dst_page->pageout = FALSE;
5571
5572 vm_pageout_steal_laundry(dst_page, FALSE);
5573 }
5574 } else {
5575 if (object->private) {
5576 /*
5577 * This is a nasty wrinkle for users
5578 * of upl who encounter device or
5579 * private memory however, it is
5580 * unavoidable, only a fault can
5581 * resolve the actual backing
5582 * physical page by asking the
5583 * backing device.
5584 */
5585 if (user_page_list)
5586 user_page_list[entry].phys_addr = 0;
5587
5588 goto try_next_page;
5589 }
5590 if (object->scan_collisions) {
5591 /*
5592 * the pageout_scan thread is trying to steal
5593 * pages from this object, but has run into our
5594 * lock... grab 2 pages from the head of the object...
5595 * the first is freed on behalf of pageout_scan, the
5596 * 2nd is for our own use... we use vm_object_page_grab
5597 * in both cases to avoid taking pages from the free
5598 * list since we are under memory pressure and our
5599 * lock on this object is getting in the way of
5600 * relieving it
5601 */
5602 dst_page = vm_object_page_grab(object);
5603
5604 if (dst_page != VM_PAGE_NULL)
5605 vm_page_release(dst_page);
5606
5607 dst_page = vm_object_page_grab(object);
5608 }
5609 if (dst_page == VM_PAGE_NULL) {
5610 /*
5611 * need to allocate a page
5612 */
5613 dst_page = vm_page_grab();
5614 }
5615 if (dst_page == VM_PAGE_NULL) {
5616 if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
5617 /*
5618 * we don't want to stall waiting for pages to come onto the free list
5619 * while we're already holding absent pages in this UPL
5620 * the caller will deal with the empty slots
5621 */
5622 if (user_page_list)
5623 user_page_list[entry].phys_addr = 0;
5624
5625 goto try_next_page;
5626 }
5627 /*
5628 * no pages available... wait
5629 * then try again for the same
5630 * offset...
5631 */
5632 vm_object_unlock(object);
5633
5634 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5635
5636 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
5637
5638 VM_PAGE_WAIT();
5639 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5640
5641 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
5642
5643 vm_object_lock(object);
5644
5645 continue;
5646 }
5647 vm_page_insert(dst_page, object, dst_offset);
5648
5649 dst_page->absent = TRUE;
5650 dst_page->busy = FALSE;
5651
5652 if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
5653 /*
5654 * if UPL_RET_ONLY_ABSENT was specified,
5655 * than we're definitely setting up a
5656 * upl for a clustered read/pagein
5657 * operation... mark the pages as clustered
5658 * so upl_commit_range can put them on the
5659 * speculative list
5660 */
5661 dst_page->clustered = TRUE;
5662
5663 if ( !(cntrl_flags & UPL_FILE_IO))
5664 VM_STAT_INCR(pageins);
5665 }
5666 }
5667 /*
5668 * ENCRYPTED SWAP:
5669 */
5670 if (cntrl_flags & UPL_ENCRYPT) {
5671 /*
5672 * The page is going to be encrypted when we
5673 * get it from the pager, so mark it so.
5674 */
5675 dst_page->encrypted = TRUE;
5676 } else {
5677 /*
5678 * Otherwise, the page will not contain
5679 * encrypted data.
5680 */
5681 dst_page->encrypted = FALSE;
5682 }
5683 dst_page->overwriting = TRUE;
5684
5685 if (dst_page->pmapped) {
5686 if ( !(cntrl_flags & UPL_FILE_IO))
5687 /*
5688 * eliminate all mappings from the
5689 * original object and its prodigy
5690 */
5691 refmod_state = pmap_disconnect(dst_page->phys_page);
5692 else
5693 refmod_state = pmap_get_refmod(dst_page->phys_page);
5694 } else
5695 refmod_state = 0;
5696
5697 hw_dirty = refmod_state & VM_MEM_MODIFIED;
5698 dirty = hw_dirty ? TRUE : dst_page->dirty;
5699
5700 if (cntrl_flags & UPL_SET_LITE) {
5701 unsigned int pg_num;
5702
5703 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5704 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5705 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5706
5707 if (hw_dirty)
5708 pmap_clear_modify(dst_page->phys_page);
5709
5710 /*
5711 * Mark original page as cleaning
5712 * in place.
5713 */
5714 dst_page->cleaning = TRUE;
5715 dst_page->precious = FALSE;
5716 } else {
5717 /*
5718 * use pageclean setup, it is more
5719 * convenient even for the pageout
5720 * cases here
5721 */
5722 vm_object_lock(upl->map_object);
5723 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5724 vm_object_unlock(upl->map_object);
5725
5726 alias_page->absent = FALSE;
5727 alias_page = NULL;
5728 }
5729
5730 if (cntrl_flags & UPL_REQUEST_SET_DIRTY) {
5731 upl->flags &= ~UPL_CLEAR_DIRTY;
5732 upl->flags |= UPL_SET_DIRTY;
5733 dirty = TRUE;
5734 upl->flags |= UPL_SET_DIRTY;
5735 } else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
5736 /*
5737 * clean in place for read implies
5738 * that a write will be done on all
5739 * the pages that are dirty before
5740 * a upl commit is done. The caller
5741 * is obligated to preserve the
5742 * contents of all pages marked dirty
5743 */
5744 upl->flags |= UPL_CLEAR_DIRTY;
5745 }
5746 dst_page->dirty = dirty;
5747
5748 if (!dirty)
5749 dst_page->precious = TRUE;
5750
5751 if ( !VM_PAGE_WIRED(dst_page)) {
5752 /*
5753 * deny access to the target page while
5754 * it is being worked on
5755 */
5756 dst_page->busy = TRUE;
5757 } else
5758 dwp->dw_mask |= DW_vm_page_wire;
5759
5760 /*
5761 * We might be about to satisfy a fault which has been
5762 * requested. So no need for the "restart" bit.
5763 */
5764 dst_page->restart = FALSE;
5765 if (!dst_page->absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
5766 /*
5767 * expect the page to be used
5768 */
5769 dwp->dw_mask |= DW_set_reference;
5770 }
5771 if (cntrl_flags & UPL_PRECIOUS) {
5772 if (dst_page->object->internal) {
5773 SET_PAGE_DIRTY(dst_page, FALSE);
5774 dst_page->precious = FALSE;
5775 } else {
5776 dst_page->precious = TRUE;
5777 }
5778 } else {
5779 dst_page->precious = FALSE;
5780 }
5781 }
5782 if (dst_page->busy)
5783 upl->flags |= UPL_HAS_BUSY;
5784
5785 if (dst_page->phys_page > upl->highest_page)
5786 upl->highest_page = dst_page->phys_page;
5787 assert (!pmap_is_noencrypt(dst_page->phys_page));
5788 if (user_page_list) {
5789 user_page_list[entry].phys_addr = dst_page->phys_page;
5790 user_page_list[entry].pageout = dst_page->pageout;
5791 user_page_list[entry].absent = dst_page->absent;
5792 user_page_list[entry].dirty = dst_page->dirty;
5793 user_page_list[entry].precious = dst_page->precious;
5794 user_page_list[entry].device = FALSE;
5795 user_page_list[entry].needed = FALSE;
5796 if (dst_page->clustered == TRUE)
5797 user_page_list[entry].speculative = dst_page->speculative;
5798 else
5799 user_page_list[entry].speculative = FALSE;
5800 user_page_list[entry].cs_validated = dst_page->cs_validated;
5801 user_page_list[entry].cs_tainted = dst_page->cs_tainted;
5802 user_page_list[entry].cs_nx = dst_page->cs_nx;
5803 user_page_list[entry].mark = FALSE;
5804 }
5805 /*
5806 * if UPL_RET_ONLY_ABSENT is set, then
5807 * we are working with a fresh page and we've
5808 * just set the clustered flag on it to
5809 * indicate that it was drug in as part of a
5810 * speculative cluster... so leave it alone
5811 */
5812 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5813 /*
5814 * someone is explicitly grabbing this page...
5815 * update clustered and speculative state
5816 *
5817 */
5818 if (dst_page->clustered)
5819 VM_PAGE_CONSUME_CLUSTERED(dst_page);
5820 }
5821 try_next_page:
5822 if (dwp->dw_mask) {
5823 if (dwp->dw_mask & DW_vm_page_activate)
5824 VM_STAT_INCR(reactivations);
5825
5826 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
5827
5828 if (dw_count >= dw_limit) {
5829 vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
5830
5831 dwp = &dw_array[0];
5832 dw_count = 0;
5833 }
5834 }
5835 entry++;
5836 dst_offset += PAGE_SIZE_64;
5837 xfer_size -= PAGE_SIZE;
5838 }
5839 if (dw_count)
5840 vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
5841
5842 if (alias_page != NULL) {
5843 VM_PAGE_FREE(alias_page);
5844 }
5845
5846 if (page_list_count != NULL) {
5847 if (upl->flags & UPL_INTERNAL)
5848 *page_list_count = 0;
5849 else if (*page_list_count > entry)
5850 *page_list_count = entry;
5851 }
5852 #if UPL_DEBUG
5853 upl->upl_state = 1;
5854 #endif
5855 vm_object_unlock(object);
5856
5857 return KERN_SUCCESS;
5858 }
5859
5860 /*
5861 * Routine: vm_object_super_upl_request
5862 * Purpose:
5863 * Cause the population of a portion of a vm_object
5864 * in much the same way as memory_object_upl_request.
5865 * Depending on the nature of the request, the pages
5866 * returned may be contain valid data or be uninitialized.
5867 * However, the region may be expanded up to the super
5868 * cluster size provided.
5869 */
5870
5871 __private_extern__ kern_return_t
5872 vm_object_super_upl_request(
5873 vm_object_t object,
5874 vm_object_offset_t offset,
5875 upl_size_t size,
5876 upl_size_t super_cluster,
5877 upl_t *upl,
5878 upl_page_info_t *user_page_list,
5879 unsigned int *page_list_count,
5880 upl_control_flags_t cntrl_flags)
5881 {
5882 if (object->paging_offset > offset || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR))
5883 return KERN_FAILURE;
5884
5885 assert(object->paging_in_progress);
5886 offset = offset - object->paging_offset;
5887
5888 if (super_cluster > size) {
5889
5890 vm_object_offset_t base_offset;
5891 upl_size_t super_size;
5892 vm_object_size_t super_size_64;
5893
5894 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
5895 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
5896 super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size;
5897 super_size = (upl_size_t) super_size_64;
5898 assert(super_size == super_size_64);
5899
5900 if (offset > (base_offset + super_size)) {
5901 panic("vm_object_super_upl_request: Missed target pageout"
5902 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
5903 offset, base_offset, super_size, super_cluster,
5904 size, object->paging_offset);
5905 }
5906 /*
5907 * apparently there is a case where the vm requests a
5908 * page to be written out who's offset is beyond the
5909 * object size
5910 */
5911 if ((offset + size) > (base_offset + super_size)) {
5912 super_size_64 = (offset + size) - base_offset;
5913 super_size = (upl_size_t) super_size_64;
5914 assert(super_size == super_size_64);
5915 }
5916
5917 offset = base_offset;
5918 size = super_size;
5919 }
5920 return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags);
5921 }
5922
5923
5924 kern_return_t
5925 vm_map_create_upl(
5926 vm_map_t map,
5927 vm_map_address_t offset,
5928 upl_size_t *upl_size,
5929 upl_t *upl,
5930 upl_page_info_array_t page_list,
5931 unsigned int *count,
5932 upl_control_flags_t *flags)
5933 {
5934 vm_map_entry_t entry;
5935 upl_control_flags_t caller_flags;
5936 int force_data_sync;
5937 int sync_cow_data;
5938 vm_object_t local_object;
5939 vm_map_offset_t local_offset;
5940 vm_map_offset_t local_start;
5941 kern_return_t ret;
5942
5943 caller_flags = *flags;
5944
5945 if (caller_flags & ~UPL_VALID_FLAGS) {
5946 /*
5947 * For forward compatibility's sake,
5948 * reject any unknown flag.
5949 */
5950 return KERN_INVALID_VALUE;
5951 }
5952 force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
5953 sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
5954
5955 if (upl == NULL)
5956 return KERN_INVALID_ARGUMENT;
5957
5958 REDISCOVER_ENTRY:
5959 vm_map_lock_read(map);
5960
5961 if (!vm_map_lookup_entry(map, offset, &entry)) {
5962 vm_map_unlock_read(map);
5963 return KERN_FAILURE;
5964 }
5965
5966 if ((entry->vme_end - offset) < *upl_size) {
5967 *upl_size = (upl_size_t) (entry->vme_end - offset);
5968 assert(*upl_size == entry->vme_end - offset);
5969 }
5970
5971 if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
5972 *flags = 0;
5973
5974 if (!entry->is_sub_map &&
5975 VME_OBJECT(entry) != VM_OBJECT_NULL) {
5976 if (VME_OBJECT(entry)->private)
5977 *flags = UPL_DEV_MEMORY;
5978
5979 if (VME_OBJECT(entry)->phys_contiguous)
5980 *flags |= UPL_PHYS_CONTIG;
5981 }
5982 vm_map_unlock_read(map);
5983 return KERN_SUCCESS;
5984 }
5985
5986 if (entry->is_sub_map) {
5987 vm_map_t submap;
5988
5989 submap = VME_SUBMAP(entry);
5990 local_start = entry->vme_start;
5991 local_offset = VME_OFFSET(entry);
5992
5993 vm_map_reference(submap);
5994 vm_map_unlock_read(map);
5995
5996 ret = vm_map_create_upl(submap,
5997 local_offset + (offset - local_start),
5998 upl_size, upl, page_list, count, flags);
5999 vm_map_deallocate(submap);
6000
6001 return ret;
6002 }
6003
6004 if (VME_OBJECT(entry) == VM_OBJECT_NULL ||
6005 !VME_OBJECT(entry)->phys_contiguous) {
6006 if (*upl_size > MAX_UPL_SIZE_BYTES)
6007 *upl_size = MAX_UPL_SIZE_BYTES;
6008 }
6009
6010 /*
6011 * Create an object if necessary.
6012 */
6013 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6014
6015 if (vm_map_lock_read_to_write(map))
6016 goto REDISCOVER_ENTRY;
6017
6018 VME_OBJECT_SET(entry,
6019 vm_object_allocate((vm_size_t)
6020 (entry->vme_end -
6021 entry->vme_start)));
6022 VME_OFFSET_SET(entry, 0);
6023
6024 vm_map_lock_write_to_read(map);
6025 }
6026
6027 if (!(caller_flags & UPL_COPYOUT_FROM) &&
6028 !(entry->protection & VM_PROT_WRITE)) {
6029 vm_map_unlock_read(map);
6030 return KERN_PROTECTION_FAILURE;
6031 }
6032
6033 local_object = VME_OBJECT(entry);
6034 assert(local_object != VM_OBJECT_NULL);
6035
6036 if (*upl_size != 0 &&
6037 local_object->vo_size > *upl_size && /* partial UPL */
6038 entry->wired_count == 0 && /* No COW for entries that are wired */
6039 (map->pmap != kernel_pmap) && /* alias checks */
6040 (vm_map_entry_should_cow_for_true_share(entry) /* case 1 */
6041 ||
6042 (!entry->needs_copy && /* case 2 */
6043 local_object->internal &&
6044 (local_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) &&
6045 local_object->ref_count > 1))) {
6046 vm_prot_t prot;
6047
6048 /*
6049 * Case 1:
6050 * Set up the targeted range for copy-on-write to avoid
6051 * applying true_share/copy_delay to the entire object.
6052 *
6053 * Case 2:
6054 * This map entry covers only part of an internal
6055 * object. There could be other map entries covering
6056 * other areas of this object and some of these map
6057 * entries could be marked as "needs_copy", which
6058 * assumes that the object is COPY_SYMMETRIC.
6059 * To avoid marking this object as COPY_DELAY and
6060 * "true_share", let's shadow it and mark the new
6061 * (smaller) object as "true_share" and COPY_DELAY.
6062 */
6063
6064 if (vm_map_lock_read_to_write(map)) {
6065 goto REDISCOVER_ENTRY;
6066 }
6067 vm_map_lock_assert_exclusive(map);
6068 assert(VME_OBJECT(entry) == local_object);
6069
6070 vm_map_clip_start(map,
6071 entry,
6072 vm_map_trunc_page(offset,
6073 VM_MAP_PAGE_MASK(map)));
6074 vm_map_clip_end(map,
6075 entry,
6076 vm_map_round_page(offset + *upl_size,
6077 VM_MAP_PAGE_MASK(map)));
6078 if ((entry->vme_end - offset) < *upl_size) {
6079 *upl_size = (upl_size_t) (entry->vme_end - offset);
6080 assert(*upl_size == entry->vme_end - offset);
6081 }
6082
6083 prot = entry->protection & ~VM_PROT_WRITE;
6084 if (override_nx(map, VME_ALIAS(entry)) && prot)
6085 prot |= VM_PROT_EXECUTE;
6086 vm_object_pmap_protect(local_object,
6087 VME_OFFSET(entry),
6088 entry->vme_end - entry->vme_start,
6089 ((entry->is_shared ||
6090 map->mapped_in_other_pmaps)
6091 ? PMAP_NULL
6092 : map->pmap),
6093 entry->vme_start,
6094 prot);
6095
6096 assert(entry->wired_count == 0);
6097
6098 /*
6099 * Lock the VM object and re-check its status: if it's mapped
6100 * in another address space, we could still be racing with
6101 * another thread holding that other VM map exclusively.
6102 */
6103 vm_object_lock(local_object);
6104 if (local_object->true_share) {
6105 /* object is already in proper state: no COW needed */
6106 assert(local_object->copy_strategy !=
6107 MEMORY_OBJECT_COPY_SYMMETRIC);
6108 } else {
6109 /* not true_share: ask for copy-on-write below */
6110 assert(local_object->copy_strategy ==
6111 MEMORY_OBJECT_COPY_SYMMETRIC);
6112 entry->needs_copy = TRUE;
6113 }
6114 vm_object_unlock(local_object);
6115
6116 vm_map_lock_write_to_read(map);
6117 }
6118
6119 if (entry->needs_copy) {
6120 /*
6121 * Honor copy-on-write for COPY_SYMMETRIC
6122 * strategy.
6123 */
6124 vm_map_t local_map;
6125 vm_object_t object;
6126 vm_object_offset_t new_offset;
6127 vm_prot_t prot;
6128 boolean_t wired;
6129 vm_map_version_t version;
6130 vm_map_t real_map;
6131 vm_prot_t fault_type;
6132
6133 local_map = map;
6134
6135 if (caller_flags & UPL_COPYOUT_FROM) {
6136 fault_type = VM_PROT_READ | VM_PROT_COPY;
6137 vm_counters.create_upl_extra_cow++;
6138 vm_counters.create_upl_extra_cow_pages +=
6139 (entry->vme_end - entry->vme_start) / PAGE_SIZE;
6140 } else {
6141 fault_type = VM_PROT_WRITE;
6142 }
6143 if (vm_map_lookup_locked(&local_map,
6144 offset, fault_type,
6145 OBJECT_LOCK_EXCLUSIVE,
6146 &version, &object,
6147 &new_offset, &prot, &wired,
6148 NULL,
6149 &real_map) != KERN_SUCCESS) {
6150 if (fault_type == VM_PROT_WRITE) {
6151 vm_counters.create_upl_lookup_failure_write++;
6152 } else {
6153 vm_counters.create_upl_lookup_failure_copy++;
6154 }
6155 vm_map_unlock_read(local_map);
6156 return KERN_FAILURE;
6157 }
6158 if (real_map != map)
6159 vm_map_unlock(real_map);
6160 vm_map_unlock_read(local_map);
6161
6162 vm_object_unlock(object);
6163
6164 goto REDISCOVER_ENTRY;
6165 }
6166
6167 if (sync_cow_data &&
6168 (VME_OBJECT(entry)->shadow ||
6169 VME_OBJECT(entry)->copy)) {
6170 local_object = VME_OBJECT(entry);
6171 local_start = entry->vme_start;
6172 local_offset = VME_OFFSET(entry);
6173
6174 vm_object_reference(local_object);
6175 vm_map_unlock_read(map);
6176
6177 if (local_object->shadow && local_object->copy) {
6178 vm_object_lock_request(local_object->shadow,
6179 ((vm_object_offset_t)
6180 ((offset - local_start) +
6181 local_offset) +
6182 local_object->vo_shadow_offset),
6183 *upl_size, FALSE,
6184 MEMORY_OBJECT_DATA_SYNC,
6185 VM_PROT_NO_CHANGE);
6186 }
6187 sync_cow_data = FALSE;
6188 vm_object_deallocate(local_object);
6189
6190 goto REDISCOVER_ENTRY;
6191 }
6192 if (force_data_sync) {
6193 local_object = VME_OBJECT(entry);
6194 local_start = entry->vme_start;
6195 local_offset = VME_OFFSET(entry);
6196
6197 vm_object_reference(local_object);
6198 vm_map_unlock_read(map);
6199
6200 vm_object_lock_request(local_object,
6201 ((vm_object_offset_t)
6202 ((offset - local_start) +
6203 local_offset)),
6204 (vm_object_size_t)*upl_size,
6205 FALSE,
6206 MEMORY_OBJECT_DATA_SYNC,
6207 VM_PROT_NO_CHANGE);
6208
6209 force_data_sync = FALSE;
6210 vm_object_deallocate(local_object);
6211
6212 goto REDISCOVER_ENTRY;
6213 }
6214 if (VME_OBJECT(entry)->private)
6215 *flags = UPL_DEV_MEMORY;
6216 else
6217 *flags = 0;
6218
6219 if (VME_OBJECT(entry)->phys_contiguous)
6220 *flags |= UPL_PHYS_CONTIG;
6221
6222 local_object = VME_OBJECT(entry);
6223 local_offset = VME_OFFSET(entry);
6224 local_start = entry->vme_start;
6225
6226 vm_object_lock(local_object);
6227
6228 /*
6229 * Ensure that this object is "true_share" and "copy_delay" now,
6230 * while we're still holding the VM map lock. After we unlock the map,
6231 * anything could happen to that mapping, including some copy-on-write
6232 * activity. We need to make sure that the IOPL will point at the
6233 * same memory as the mapping.
6234 */
6235 if (local_object->true_share) {
6236 assert(local_object->copy_strategy !=
6237 MEMORY_OBJECT_COPY_SYMMETRIC);
6238 } else if (local_object != kernel_object &&
6239 local_object != compressor_object &&
6240 !local_object->phys_contiguous) {
6241 #if VM_OBJECT_TRACKING_OP_TRUESHARE
6242 if (!local_object->true_share &&
6243 vm_object_tracking_inited) {
6244 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
6245 int num = 0;
6246 num = OSBacktrace(bt,
6247 VM_OBJECT_TRACKING_BTDEPTH);
6248 btlog_add_entry(vm_object_tracking_btlog,
6249 local_object,
6250 VM_OBJECT_TRACKING_OP_TRUESHARE,
6251 bt,
6252 num);
6253 }
6254 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
6255 local_object->true_share = TRUE;
6256 if (local_object->copy_strategy ==
6257 MEMORY_OBJECT_COPY_SYMMETRIC) {
6258 local_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
6259 }
6260 }
6261
6262 vm_object_reference_locked(local_object);
6263 vm_object_unlock(local_object);
6264
6265 vm_map_unlock_read(map);
6266
6267 ret = vm_object_iopl_request(local_object,
6268 ((vm_object_offset_t)
6269 ((offset - local_start) + local_offset)),
6270 *upl_size,
6271 upl,
6272 page_list,
6273 count,
6274 caller_flags);
6275 vm_object_deallocate(local_object);
6276
6277 return ret;
6278 }
6279
6280 /*
6281 * Internal routine to enter a UPL into a VM map.
6282 *
6283 * JMM - This should just be doable through the standard
6284 * vm_map_enter() API.
6285 */
6286 kern_return_t
6287 vm_map_enter_upl(
6288 vm_map_t map,
6289 upl_t upl,
6290 vm_map_offset_t *dst_addr)
6291 {
6292 vm_map_size_t size;
6293 vm_object_offset_t offset;
6294 vm_map_offset_t addr;
6295 vm_page_t m;
6296 kern_return_t kr;
6297 int isVectorUPL = 0, curr_upl=0;
6298 upl_t vector_upl = NULL;
6299 vm_offset_t vector_upl_dst_addr = 0;
6300 vm_map_t vector_upl_submap = NULL;
6301 upl_offset_t subupl_offset = 0;
6302 upl_size_t subupl_size = 0;
6303
6304 if (upl == UPL_NULL)
6305 return KERN_INVALID_ARGUMENT;
6306
6307 if((isVectorUPL = vector_upl_is_valid(upl))) {
6308 int mapped=0,valid_upls=0;
6309 vector_upl = upl;
6310
6311 upl_lock(vector_upl);
6312 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6313 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
6314 if(upl == NULL)
6315 continue;
6316 valid_upls++;
6317 if (UPL_PAGE_LIST_MAPPED & upl->flags)
6318 mapped++;
6319 }
6320
6321 if(mapped) {
6322 if(mapped != valid_upls)
6323 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls);
6324 else {
6325 upl_unlock(vector_upl);
6326 return KERN_FAILURE;
6327 }
6328 }
6329
6330 kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE, VM_FLAGS_ANYWHERE, &vector_upl_submap);
6331 if( kr != KERN_SUCCESS )
6332 panic("Vector UPL submap allocation failed\n");
6333 map = vector_upl_submap;
6334 vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
6335 curr_upl=0;
6336 }
6337 else
6338 upl_lock(upl);
6339
6340 process_upl_to_enter:
6341 if(isVectorUPL){
6342 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6343 *dst_addr = vector_upl_dst_addr;
6344 upl_unlock(vector_upl);
6345 return KERN_SUCCESS;
6346 }
6347 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6348 if(upl == NULL)
6349 goto process_upl_to_enter;
6350
6351 vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
6352 *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
6353 } else {
6354 /*
6355 * check to see if already mapped
6356 */
6357 if (UPL_PAGE_LIST_MAPPED & upl->flags) {
6358 upl_unlock(upl);
6359 return KERN_FAILURE;
6360 }
6361 }
6362 if ((!(upl->flags & UPL_SHADOWED)) &&
6363 ((upl->flags & UPL_HAS_BUSY) ||
6364 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) {
6365
6366 vm_object_t object;
6367 vm_page_t alias_page;
6368 vm_object_offset_t new_offset;
6369 unsigned int pg_num;
6370 wpl_array_t lite_list;
6371
6372 if (upl->flags & UPL_INTERNAL) {
6373 lite_list = (wpl_array_t)
6374 ((((uintptr_t)upl) + sizeof(struct upl))
6375 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6376 } else {
6377 lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
6378 }
6379 object = upl->map_object;
6380 upl->map_object = vm_object_allocate(upl->size);
6381
6382 vm_object_lock(upl->map_object);
6383
6384 upl->map_object->shadow = object;
6385 upl->map_object->pageout = TRUE;
6386 upl->map_object->can_persist = FALSE;
6387 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
6388 upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset;
6389 upl->map_object->wimg_bits = object->wimg_bits;
6390 offset = upl->map_object->vo_shadow_offset;
6391 new_offset = 0;
6392 size = upl->size;
6393
6394 upl->flags |= UPL_SHADOWED;
6395
6396 while (size) {
6397 pg_num = (unsigned int) (new_offset / PAGE_SIZE);
6398 assert(pg_num == new_offset / PAGE_SIZE);
6399
6400 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6401
6402 VM_PAGE_GRAB_FICTITIOUS(alias_page);
6403
6404 vm_object_lock(object);
6405
6406 m = vm_page_lookup(object, offset);
6407 if (m == VM_PAGE_NULL) {
6408 panic("vm_upl_map: page missing\n");
6409 }
6410
6411 /*
6412 * Convert the fictitious page to a private
6413 * shadow of the real page.
6414 */
6415 assert(alias_page->fictitious);
6416 alias_page->fictitious = FALSE;
6417 alias_page->private = TRUE;
6418 alias_page->pageout = TRUE;
6419 /*
6420 * since m is a page in the upl it must
6421 * already be wired or BUSY, so it's
6422 * safe to assign the underlying physical
6423 * page to the alias
6424 */
6425 alias_page->phys_page = m->phys_page;
6426
6427 vm_object_unlock(object);
6428
6429 vm_page_lockspin_queues();
6430 vm_page_wire(alias_page, VM_KERN_MEMORY_NONE, TRUE);
6431 vm_page_unlock_queues();
6432
6433 /*
6434 * ENCRYPTED SWAP:
6435 * The virtual page ("m") has to be wired in some way
6436 * here or its physical page ("m->phys_page") could
6437 * be recycled at any time.
6438 * Assuming this is enforced by the caller, we can't
6439 * get an encrypted page here. Since the encryption
6440 * key depends on the VM page's "pager" object and
6441 * the "paging_offset", we couldn't handle 2 pageable
6442 * VM pages (with different pagers and paging_offsets)
6443 * sharing the same physical page: we could end up
6444 * encrypting with one key (via one VM page) and
6445 * decrypting with another key (via the alias VM page).
6446 */
6447 ASSERT_PAGE_DECRYPTED(m);
6448
6449 vm_page_insert_wired(alias_page, upl->map_object, new_offset, VM_KERN_MEMORY_NONE);
6450
6451 assert(!alias_page->wanted);
6452 alias_page->busy = FALSE;
6453 alias_page->absent = FALSE;
6454 }
6455 size -= PAGE_SIZE;
6456 offset += PAGE_SIZE_64;
6457 new_offset += PAGE_SIZE_64;
6458 }
6459 vm_object_unlock(upl->map_object);
6460 }
6461 if (upl->flags & UPL_SHADOWED)
6462 offset = 0;
6463 else
6464 offset = upl->offset - upl->map_object->paging_offset;
6465
6466 size = upl->size;
6467
6468 vm_object_reference(upl->map_object);
6469
6470 if(!isVectorUPL) {
6471 *dst_addr = 0;
6472 /*
6473 * NEED A UPL_MAP ALIAS
6474 */
6475 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6476 VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK),
6477 upl->map_object, offset, FALSE,
6478 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6479
6480 if (kr != KERN_SUCCESS) {
6481 upl_unlock(upl);
6482 return(kr);
6483 }
6484 }
6485 else {
6486 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6487 VM_FLAGS_FIXED | VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK),
6488 upl->map_object, offset, FALSE,
6489 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6490 if(kr)
6491 panic("vm_map_enter failed for a Vector UPL\n");
6492 }
6493 vm_object_lock(upl->map_object);
6494
6495 for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
6496 m = vm_page_lookup(upl->map_object, offset);
6497
6498 if (m) {
6499 m->pmapped = TRUE;
6500
6501 /* CODE SIGNING ENFORCEMENT: page has been wpmapped,
6502 * but only in kernel space. If this was on a user map,
6503 * we'd have to set the wpmapped bit. */
6504 /* m->wpmapped = TRUE; */
6505 assert(map->pmap == kernel_pmap);
6506
6507 PMAP_ENTER(map->pmap, addr, m, VM_PROT_DEFAULT, VM_PROT_NONE, 0, TRUE);
6508 }
6509 offset += PAGE_SIZE_64;
6510 }
6511 vm_object_unlock(upl->map_object);
6512
6513 /*
6514 * hold a reference for the mapping
6515 */
6516 upl->ref_count++;
6517 upl->flags |= UPL_PAGE_LIST_MAPPED;
6518 upl->kaddr = (vm_offset_t) *dst_addr;
6519 assert(upl->kaddr == *dst_addr);
6520
6521 if(isVectorUPL)
6522 goto process_upl_to_enter;
6523
6524 upl_unlock(upl);
6525
6526 return KERN_SUCCESS;
6527 }
6528
6529 /*
6530 * Internal routine to remove a UPL mapping from a VM map.
6531 *
6532 * XXX - This should just be doable through a standard
6533 * vm_map_remove() operation. Otherwise, implicit clean-up
6534 * of the target map won't be able to correctly remove
6535 * these (and release the reference on the UPL). Having
6536 * to do this means we can't map these into user-space
6537 * maps yet.
6538 */
6539 kern_return_t
6540 vm_map_remove_upl(
6541 vm_map_t map,
6542 upl_t upl)
6543 {
6544 vm_address_t addr;
6545 upl_size_t size;
6546 int isVectorUPL = 0, curr_upl = 0;
6547 upl_t vector_upl = NULL;
6548
6549 if (upl == UPL_NULL)
6550 return KERN_INVALID_ARGUMENT;
6551
6552 if((isVectorUPL = vector_upl_is_valid(upl))) {
6553 int unmapped=0, valid_upls=0;
6554 vector_upl = upl;
6555 upl_lock(vector_upl);
6556 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6557 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
6558 if(upl == NULL)
6559 continue;
6560 valid_upls++;
6561 if (!(UPL_PAGE_LIST_MAPPED & upl->flags))
6562 unmapped++;
6563 }
6564
6565 if(unmapped) {
6566 if(unmapped != valid_upls)
6567 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls);
6568 else {
6569 upl_unlock(vector_upl);
6570 return KERN_FAILURE;
6571 }
6572 }
6573 curr_upl=0;
6574 }
6575 else
6576 upl_lock(upl);
6577
6578 process_upl_to_remove:
6579 if(isVectorUPL) {
6580 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6581 vm_map_t v_upl_submap;
6582 vm_offset_t v_upl_submap_dst_addr;
6583 vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
6584
6585 vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_NO_FLAGS);
6586 vm_map_deallocate(v_upl_submap);
6587 upl_unlock(vector_upl);
6588 return KERN_SUCCESS;
6589 }
6590
6591 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6592 if(upl == NULL)
6593 goto process_upl_to_remove;
6594 }
6595
6596 if (upl->flags & UPL_PAGE_LIST_MAPPED) {
6597 addr = upl->kaddr;
6598 size = upl->size;
6599
6600 assert(upl->ref_count > 1);
6601 upl->ref_count--; /* removing mapping ref */
6602
6603 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
6604 upl->kaddr = (vm_offset_t) 0;
6605
6606 if(!isVectorUPL) {
6607 upl_unlock(upl);
6608
6609 vm_map_remove(
6610 map,
6611 vm_map_trunc_page(addr,
6612 VM_MAP_PAGE_MASK(map)),
6613 vm_map_round_page(addr + size,
6614 VM_MAP_PAGE_MASK(map)),
6615 VM_MAP_NO_FLAGS);
6616
6617 return KERN_SUCCESS;
6618 }
6619 else {
6620 /*
6621 * If it's a Vectored UPL, we'll be removing the entire
6622 * submap anyways, so no need to remove individual UPL
6623 * element mappings from within the submap
6624 */
6625 goto process_upl_to_remove;
6626 }
6627 }
6628 upl_unlock(upl);
6629
6630 return KERN_FAILURE;
6631 }
6632
6633 kern_return_t
6634 upl_commit_range(
6635 upl_t upl,
6636 upl_offset_t offset,
6637 upl_size_t size,
6638 int flags,
6639 upl_page_info_t *page_list,
6640 mach_msg_type_number_t count,
6641 boolean_t *empty)
6642 {
6643 upl_size_t xfer_size, subupl_size = size;
6644 vm_object_t shadow_object;
6645 vm_object_t object;
6646 vm_object_offset_t target_offset;
6647 upl_offset_t subupl_offset = offset;
6648 int entry;
6649 wpl_array_t lite_list;
6650 int occupied;
6651 int clear_refmod = 0;
6652 int pgpgout_count = 0;
6653 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
6654 struct vm_page_delayed_work *dwp;
6655 int dw_count;
6656 int dw_limit;
6657 int isVectorUPL = 0;
6658 upl_t vector_upl = NULL;
6659 boolean_t should_be_throttled = FALSE;
6660
6661 vm_page_t nxt_page = VM_PAGE_NULL;
6662 int fast_path_possible = 0;
6663 int fast_path_full_commit = 0;
6664 int throttle_page = 0;
6665 int unwired_count = 0;
6666 int local_queue_count = 0;
6667 queue_head_t local_queue;
6668
6669 *empty = FALSE;
6670
6671 if (upl == UPL_NULL)
6672 return KERN_INVALID_ARGUMENT;
6673
6674 if (count == 0)
6675 page_list = NULL;
6676
6677 if((isVectorUPL = vector_upl_is_valid(upl))) {
6678 vector_upl = upl;
6679 upl_lock(vector_upl);
6680 }
6681 else
6682 upl_lock(upl);
6683
6684 process_upl_to_commit:
6685
6686 if(isVectorUPL) {
6687 size = subupl_size;
6688 offset = subupl_offset;
6689 if(size == 0) {
6690 upl_unlock(vector_upl);
6691 return KERN_SUCCESS;
6692 }
6693 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
6694 if(upl == NULL) {
6695 upl_unlock(vector_upl);
6696 return KERN_FAILURE;
6697 }
6698 page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl);
6699 subupl_size -= size;
6700 subupl_offset += size;
6701 }
6702
6703 #if UPL_DEBUG
6704 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
6705 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
6706
6707 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
6708 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
6709
6710 upl->upl_commit_index++;
6711 }
6712 #endif
6713 if (upl->flags & UPL_DEVICE_MEMORY)
6714 xfer_size = 0;
6715 else if ((offset + size) <= upl->size)
6716 xfer_size = size;
6717 else {
6718 if(!isVectorUPL)
6719 upl_unlock(upl);
6720 else {
6721 upl_unlock(vector_upl);
6722 }
6723 return KERN_FAILURE;
6724 }
6725 if (upl->flags & UPL_SET_DIRTY)
6726 flags |= UPL_COMMIT_SET_DIRTY;
6727 if (upl->flags & UPL_CLEAR_DIRTY)
6728 flags |= UPL_COMMIT_CLEAR_DIRTY;
6729
6730 if (upl->flags & UPL_INTERNAL)
6731 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
6732 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6733 else
6734 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
6735
6736 object = upl->map_object;
6737
6738 if (upl->flags & UPL_SHADOWED) {
6739 vm_object_lock(object);
6740 shadow_object = object->shadow;
6741 } else {
6742 shadow_object = object;
6743 }
6744 entry = offset/PAGE_SIZE;
6745 target_offset = (vm_object_offset_t)offset;
6746
6747 assert(!(target_offset & PAGE_MASK));
6748 assert(!(xfer_size & PAGE_MASK));
6749
6750 if (upl->flags & UPL_KERNEL_OBJECT)
6751 vm_object_lock_shared(shadow_object);
6752 else
6753 vm_object_lock(shadow_object);
6754
6755 if (upl->flags & UPL_ACCESS_BLOCKED) {
6756 assert(shadow_object->blocked_access);
6757 shadow_object->blocked_access = FALSE;
6758 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
6759 }
6760
6761 if (shadow_object->code_signed) {
6762 /*
6763 * CODE SIGNING:
6764 * If the object is code-signed, do not let this UPL tell
6765 * us if the pages are valid or not. Let the pages be
6766 * validated by VM the normal way (when they get mapped or
6767 * copied).
6768 */
6769 flags &= ~UPL_COMMIT_CS_VALIDATED;
6770 }
6771 if (! page_list) {
6772 /*
6773 * No page list to get the code-signing info from !?
6774 */
6775 flags &= ~UPL_COMMIT_CS_VALIDATED;
6776 }
6777 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && shadow_object->internal)
6778 should_be_throttled = TRUE;
6779
6780 dwp = &dw_array[0];
6781 dw_count = 0;
6782 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
6783
6784 if ((upl->flags & UPL_IO_WIRE) &&
6785 !(flags & UPL_COMMIT_FREE_ABSENT) &&
6786 !isVectorUPL &&
6787 shadow_object->purgable != VM_PURGABLE_VOLATILE &&
6788 shadow_object->purgable != VM_PURGABLE_EMPTY) {
6789
6790 if (!queue_empty(&shadow_object->memq)) {
6791 queue_init(&local_queue);
6792 if (size == shadow_object->vo_size) {
6793 nxt_page = (vm_page_t)queue_first(&shadow_object->memq);
6794 fast_path_full_commit = 1;
6795 }
6796 fast_path_possible = 1;
6797
6798 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && shadow_object->internal &&
6799 (shadow_object->purgable == VM_PURGABLE_DENY ||
6800 shadow_object->purgable == VM_PURGABLE_NONVOLATILE ||
6801 shadow_object->purgable == VM_PURGABLE_VOLATILE)) {
6802 throttle_page = 1;
6803 }
6804 }
6805 }
6806
6807 while (xfer_size) {
6808 vm_page_t t, m;
6809
6810 dwp->dw_mask = 0;
6811 clear_refmod = 0;
6812
6813 m = VM_PAGE_NULL;
6814
6815 if (upl->flags & UPL_LITE) {
6816 unsigned int pg_num;
6817
6818 if (nxt_page != VM_PAGE_NULL) {
6819 m = nxt_page;
6820 nxt_page = (vm_page_t)queue_next(&nxt_page->listq);
6821 target_offset = m->offset;
6822 }
6823 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
6824 assert(pg_num == target_offset/PAGE_SIZE);
6825
6826 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6827 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
6828
6829 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6830 m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
6831 } else
6832 m = NULL;
6833 }
6834 if (upl->flags & UPL_SHADOWED) {
6835 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
6836
6837 t->pageout = FALSE;
6838
6839 VM_PAGE_FREE(t);
6840
6841 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6842 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
6843 }
6844 }
6845 if (m == VM_PAGE_NULL)
6846 goto commit_next_page;
6847
6848 if (m->compressor) {
6849 assert(m->busy);
6850
6851 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6852 goto commit_next_page;
6853 }
6854
6855 if (flags & UPL_COMMIT_CS_VALIDATED) {
6856 /*
6857 * CODE SIGNING:
6858 * Set the code signing bits according to
6859 * what the UPL says they should be.
6860 */
6861 m->cs_validated = page_list[entry].cs_validated;
6862 m->cs_tainted = page_list[entry].cs_tainted;
6863 m->cs_nx = page_list[entry].cs_nx;
6864 }
6865 if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL)
6866 m->written_by_kernel = TRUE;
6867
6868 if (upl->flags & UPL_IO_WIRE) {
6869
6870 if (page_list)
6871 page_list[entry].phys_addr = 0;
6872
6873 if (flags & UPL_COMMIT_SET_DIRTY) {
6874 SET_PAGE_DIRTY(m, FALSE);
6875 } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6876 m->dirty = FALSE;
6877
6878 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6879 m->cs_validated && !m->cs_tainted) {
6880 /*
6881 * CODE SIGNING:
6882 * This page is no longer dirty
6883 * but could have been modified,
6884 * so it will need to be
6885 * re-validated.
6886 */
6887 if (m->slid) {
6888 panic("upl_commit_range(%p): page %p was slid\n",
6889 upl, m);
6890 }
6891 assert(!m->slid);
6892 m->cs_validated = FALSE;
6893 #if DEVELOPMENT || DEBUG
6894 vm_cs_validated_resets++;
6895 #endif
6896 pmap_disconnect(m->phys_page);
6897 }
6898 clear_refmod |= VM_MEM_MODIFIED;
6899 }
6900 if (upl->flags & UPL_ACCESS_BLOCKED) {
6901 /*
6902 * We blocked access to the pages in this UPL.
6903 * Clear the "busy" bit and wake up any waiter
6904 * for this page.
6905 */
6906 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6907 }
6908 if (fast_path_possible) {
6909 assert(m->object->purgable != VM_PURGABLE_EMPTY);
6910 assert(m->object->purgable != VM_PURGABLE_VOLATILE);
6911 if (m->absent) {
6912 assert(m->wire_count == 0);
6913 assert(m->busy);
6914
6915 m->absent = FALSE;
6916 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6917 } else {
6918 if (m->wire_count == 0)
6919 panic("wire_count == 0, m = %p, obj = %p\n", m, shadow_object);
6920
6921 /*
6922 * XXX FBDP need to update some other
6923 * counters here (purgeable_wired_count)
6924 * (ledgers), ...
6925 */
6926 assert(m->wire_count);
6927 m->wire_count--;
6928
6929 if (m->wire_count == 0)
6930 unwired_count++;
6931 }
6932 if (m->wire_count == 0) {
6933 queue_enter(&local_queue, m, vm_page_t, pageq);
6934 local_queue_count++;
6935
6936 if (throttle_page) {
6937 m->throttled = TRUE;
6938 } else {
6939 if (flags & UPL_COMMIT_INACTIVATE)
6940 m->inactive = TRUE;
6941 else
6942 m->active = TRUE;
6943 }
6944 }
6945 } else {
6946 if (flags & UPL_COMMIT_INACTIVATE) {
6947 dwp->dw_mask |= DW_vm_page_deactivate_internal;
6948 clear_refmod |= VM_MEM_REFERENCED;
6949 }
6950 if (m->absent) {
6951 if (flags & UPL_COMMIT_FREE_ABSENT)
6952 dwp->dw_mask |= DW_vm_page_free;
6953 else {
6954 m->absent = FALSE;
6955 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6956
6957 if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal))
6958 dwp->dw_mask |= DW_vm_page_activate;
6959 }
6960 } else
6961 dwp->dw_mask |= DW_vm_page_unwire;
6962 }
6963 goto commit_next_page;
6964 }
6965 assert(!m->compressor);
6966
6967 if (page_list)
6968 page_list[entry].phys_addr = 0;
6969
6970 /*
6971 * make sure to clear the hardware
6972 * modify or reference bits before
6973 * releasing the BUSY bit on this page
6974 * otherwise we risk losing a legitimate
6975 * change of state
6976 */
6977 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6978 m->dirty = FALSE;
6979
6980 clear_refmod |= VM_MEM_MODIFIED;
6981 }
6982 if (m->laundry)
6983 dwp->dw_mask |= DW_vm_pageout_throttle_up;
6984
6985 if (VM_PAGE_WIRED(m))
6986 m->pageout = FALSE;
6987
6988 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6989 m->cs_validated && !m->cs_tainted) {
6990 /*
6991 * CODE SIGNING:
6992 * This page is no longer dirty
6993 * but could have been modified,
6994 * so it will need to be
6995 * re-validated.
6996 */
6997 if (m->slid) {
6998 panic("upl_commit_range(%p): page %p was slid\n",
6999 upl, m);
7000 }
7001 assert(!m->slid);
7002 m->cs_validated = FALSE;
7003 #if DEVELOPMENT || DEBUG
7004 vm_cs_validated_resets++;
7005 #endif
7006 pmap_disconnect(m->phys_page);
7007 }
7008 if (m->overwriting) {
7009 /*
7010 * the (COPY_OUT_FROM == FALSE) request_page_list case
7011 */
7012 if (m->busy) {
7013 #if CONFIG_PHANTOM_CACHE
7014 if (m->absent && !m->object->internal)
7015 dwp->dw_mask |= DW_vm_phantom_cache_update;
7016 #endif
7017 m->absent = FALSE;
7018
7019 dwp->dw_mask |= DW_clear_busy;
7020 } else {
7021 /*
7022 * alternate (COPY_OUT_FROM == FALSE) page_list case
7023 * Occurs when the original page was wired
7024 * at the time of the list request
7025 */
7026 assert(VM_PAGE_WIRED(m));
7027
7028 dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */
7029 }
7030 m->overwriting = FALSE;
7031 }
7032 if (m->encrypted_cleaning == TRUE) {
7033 m->encrypted_cleaning = FALSE;
7034
7035 dwp->dw_mask |= DW_clear_busy | DW_PAGE_WAKEUP;
7036 }
7037 m->cleaning = FALSE;
7038
7039 if (m->pageout) {
7040 /*
7041 * With the clean queue enabled, UPL_PAGEOUT should
7042 * no longer set the pageout bit. It's pages now go
7043 * to the clean queue.
7044 */
7045 assert(!(flags & UPL_PAGEOUT));
7046
7047 m->pageout = FALSE;
7048 #if MACH_CLUSTER_STATS
7049 if (m->wanted) vm_pageout_target_collisions++;
7050 #endif
7051 if ((flags & UPL_COMMIT_SET_DIRTY) ||
7052 (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))) {
7053 /*
7054 * page was re-dirtied after we started
7055 * the pageout... reactivate it since
7056 * we don't know whether the on-disk
7057 * copy matches what is now in memory
7058 */
7059 SET_PAGE_DIRTY(m, FALSE);
7060
7061 dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP;
7062
7063 if (upl->flags & UPL_PAGEOUT) {
7064 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
7065 VM_STAT_INCR(reactivations);
7066 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
7067 }
7068 } else {
7069 /*
7070 * page has been successfully cleaned
7071 * go ahead and free it for other use
7072 */
7073 if (m->object->internal) {
7074 DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
7075 } else {
7076 DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
7077 }
7078 m->dirty = FALSE;
7079 m->busy = TRUE;
7080
7081 dwp->dw_mask |= DW_vm_page_free;
7082 }
7083 goto commit_next_page;
7084 }
7085 #if MACH_CLUSTER_STATS
7086 if (m->wpmapped)
7087 m->dirty = pmap_is_modified(m->phys_page);
7088
7089 if (m->dirty) vm_pageout_cluster_dirtied++;
7090 else vm_pageout_cluster_cleaned++;
7091 if (m->wanted) vm_pageout_cluster_collisions++;
7092 #endif
7093 /*
7094 * It is a part of the semantic of COPYOUT_FROM
7095 * UPLs that a commit implies cache sync
7096 * between the vm page and the backing store
7097 * this can be used to strip the precious bit
7098 * as well as clean
7099 */
7100 if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS))
7101 m->precious = FALSE;
7102
7103 if (flags & UPL_COMMIT_SET_DIRTY) {
7104 SET_PAGE_DIRTY(m, FALSE);
7105 } else {
7106 m->dirty = FALSE;
7107 }
7108
7109 /* with the clean queue on, move *all* cleaned pages to the clean queue */
7110 if (hibernate_cleaning_in_progress == FALSE && !m->dirty && (upl->flags & UPL_PAGEOUT)) {
7111 pgpgout_count++;
7112
7113 VM_STAT_INCR(pageouts);
7114 DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
7115
7116 dwp->dw_mask |= DW_enqueue_cleaned;
7117 vm_pageout_enqueued_cleaned_from_inactive_dirty++;
7118 } else if (should_be_throttled == TRUE && !m->active && !m->inactive && !m->speculative && !m->throttled) {
7119 /*
7120 * page coming back in from being 'frozen'...
7121 * it was dirty before it was frozen, so keep it so
7122 * the vm_page_activate will notice that it really belongs
7123 * on the throttle queue and put it there
7124 */
7125 SET_PAGE_DIRTY(m, FALSE);
7126 dwp->dw_mask |= DW_vm_page_activate;
7127
7128 } else {
7129 if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) {
7130 dwp->dw_mask |= DW_vm_page_deactivate_internal;
7131 clear_refmod |= VM_MEM_REFERENCED;
7132 } else if (!m->active && !m->inactive && !m->speculative) {
7133
7134 if (m->clustered || (flags & UPL_COMMIT_SPECULATE))
7135 dwp->dw_mask |= DW_vm_page_speculate;
7136 else if (m->reference)
7137 dwp->dw_mask |= DW_vm_page_activate;
7138 else {
7139 dwp->dw_mask |= DW_vm_page_deactivate_internal;
7140 clear_refmod |= VM_MEM_REFERENCED;
7141 }
7142 }
7143 }
7144 if (upl->flags & UPL_ACCESS_BLOCKED) {
7145 /*
7146 * We blocked access to the pages in this URL.
7147 * Clear the "busy" bit on this page before we
7148 * wake up any waiter.
7149 */
7150 dwp->dw_mask |= DW_clear_busy;
7151 }
7152 /*
7153 * Wakeup any thread waiting for the page to be un-cleaning.
7154 */
7155 dwp->dw_mask |= DW_PAGE_WAKEUP;
7156
7157 commit_next_page:
7158 if (clear_refmod)
7159 pmap_clear_refmod(m->phys_page, clear_refmod);
7160
7161 target_offset += PAGE_SIZE_64;
7162 xfer_size -= PAGE_SIZE;
7163 entry++;
7164
7165 if (dwp->dw_mask) {
7166 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
7167 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
7168
7169 if (dw_count >= dw_limit) {
7170 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7171
7172 dwp = &dw_array[0];
7173 dw_count = 0;
7174 }
7175 } else {
7176 if (dwp->dw_mask & DW_clear_busy)
7177 m->busy = FALSE;
7178
7179 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7180 PAGE_WAKEUP(m);
7181 }
7182 }
7183 }
7184 if (dw_count)
7185 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7186
7187 if (fast_path_possible) {
7188
7189 assert(shadow_object->purgable != VM_PURGABLE_VOLATILE);
7190 assert(shadow_object->purgable != VM_PURGABLE_EMPTY);
7191
7192 if (local_queue_count || unwired_count) {
7193
7194 if (local_queue_count) {
7195 vm_page_t first_local, last_local;
7196 vm_page_t first_target;
7197 queue_head_t *target_queue;
7198
7199 if (throttle_page)
7200 target_queue = &vm_page_queue_throttled;
7201 else {
7202 if (flags & UPL_COMMIT_INACTIVATE) {
7203 if (shadow_object->internal)
7204 target_queue = &vm_page_queue_anonymous;
7205 else
7206 target_queue = &vm_page_queue_inactive;
7207 } else
7208 target_queue = &vm_page_queue_active;
7209 }
7210 /*
7211 * Transfer the entire local queue to a regular LRU page queues.
7212 */
7213 first_local = (vm_page_t) queue_first(&local_queue);
7214 last_local = (vm_page_t) queue_last(&local_queue);
7215
7216 vm_page_lockspin_queues();
7217
7218 first_target = (vm_page_t) queue_first(target_queue);
7219
7220 if (queue_empty(target_queue))
7221 queue_last(target_queue) = (queue_entry_t) last_local;
7222 else
7223 queue_prev(&first_target->pageq) = (queue_entry_t) last_local;
7224
7225 queue_first(target_queue) = (queue_entry_t) first_local;
7226 queue_prev(&first_local->pageq) = (queue_entry_t) target_queue;
7227 queue_next(&last_local->pageq) = (queue_entry_t) first_target;
7228
7229 /*
7230 * Adjust the global page counts.
7231 */
7232 if (throttle_page) {
7233 vm_page_throttled_count += local_queue_count;
7234 } else {
7235 if (flags & UPL_COMMIT_INACTIVATE) {
7236 if (shadow_object->internal)
7237 vm_page_anonymous_count += local_queue_count;
7238 vm_page_inactive_count += local_queue_count;
7239
7240 token_new_pagecount += local_queue_count;
7241 } else
7242 vm_page_active_count += local_queue_count;
7243
7244 if (shadow_object->internal)
7245 vm_page_pageable_internal_count += local_queue_count;
7246 else
7247 vm_page_pageable_external_count += local_queue_count;
7248 }
7249 } else {
7250 vm_page_lockspin_queues();
7251 }
7252 if (unwired_count) {
7253 vm_page_wire_count -= unwired_count;
7254 VM_CHECK_MEMORYSTATUS;
7255 }
7256 vm_page_unlock_queues();
7257
7258 shadow_object->wired_page_count -= unwired_count;
7259
7260 if (!shadow_object->wired_page_count) {
7261 VM_OBJECT_UNWIRED(shadow_object);
7262 }
7263 }
7264 }
7265 occupied = 1;
7266
7267 if (upl->flags & UPL_DEVICE_MEMORY) {
7268 occupied = 0;
7269 } else if (upl->flags & UPL_LITE) {
7270 int pg_num;
7271 int i;
7272
7273 occupied = 0;
7274
7275 if (!fast_path_full_commit) {
7276 pg_num = upl->size/PAGE_SIZE;
7277 pg_num = (pg_num + 31) >> 5;
7278
7279 for (i = 0; i < pg_num; i++) {
7280 if (lite_list[i] != 0) {
7281 occupied = 1;
7282 break;
7283 }
7284 }
7285 }
7286 } else {
7287 if (queue_empty(&upl->map_object->memq))
7288 occupied = 0;
7289 }
7290 if (occupied == 0) {
7291 /*
7292 * If this UPL element belongs to a Vector UPL and is
7293 * empty, then this is the right function to deallocate
7294 * it. So go ahead set the *empty variable. The flag
7295 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7296 * should be considered relevant for the Vector UPL and not
7297 * the internal UPLs.
7298 */
7299 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7300 *empty = TRUE;
7301
7302 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7303 /*
7304 * this is not a paging object
7305 * so we need to drop the paging reference
7306 * that was taken when we created the UPL
7307 * against this object
7308 */
7309 vm_object_activity_end(shadow_object);
7310 vm_object_collapse(shadow_object, 0, TRUE);
7311 } else {
7312 /*
7313 * we dontated the paging reference to
7314 * the map object... vm_pageout_object_terminate
7315 * will drop this reference
7316 */
7317 }
7318 }
7319 vm_object_unlock(shadow_object);
7320 if (object != shadow_object)
7321 vm_object_unlock(object);
7322
7323 if(!isVectorUPL)
7324 upl_unlock(upl);
7325 else {
7326 /*
7327 * If we completed our operations on an UPL that is
7328 * part of a Vectored UPL and if empty is TRUE, then
7329 * we should go ahead and deallocate this UPL element.
7330 * Then we check if this was the last of the UPL elements
7331 * within that Vectored UPL. If so, set empty to TRUE
7332 * so that in ubc_upl_commit_range or ubc_upl_commit, we
7333 * can go ahead and deallocate the Vector UPL too.
7334 */
7335 if(*empty==TRUE) {
7336 *empty = vector_upl_set_subupl(vector_upl, upl, 0);
7337 upl_deallocate(upl);
7338 }
7339 goto process_upl_to_commit;
7340 }
7341
7342 if (pgpgout_count) {
7343 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
7344 }
7345
7346 return KERN_SUCCESS;
7347 }
7348
7349 kern_return_t
7350 upl_abort_range(
7351 upl_t upl,
7352 upl_offset_t offset,
7353 upl_size_t size,
7354 int error,
7355 boolean_t *empty)
7356 {
7357 upl_page_info_t *user_page_list = NULL;
7358 upl_size_t xfer_size, subupl_size = size;
7359 vm_object_t shadow_object;
7360 vm_object_t object;
7361 vm_object_offset_t target_offset;
7362 upl_offset_t subupl_offset = offset;
7363 int entry;
7364 wpl_array_t lite_list;
7365 int occupied;
7366 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7367 struct vm_page_delayed_work *dwp;
7368 int dw_count;
7369 int dw_limit;
7370 int isVectorUPL = 0;
7371 upl_t vector_upl = NULL;
7372
7373 *empty = FALSE;
7374
7375 if (upl == UPL_NULL)
7376 return KERN_INVALID_ARGUMENT;
7377
7378 if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
7379 return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty);
7380
7381 if((isVectorUPL = vector_upl_is_valid(upl))) {
7382 vector_upl = upl;
7383 upl_lock(vector_upl);
7384 }
7385 else
7386 upl_lock(upl);
7387
7388 process_upl_to_abort:
7389 if(isVectorUPL) {
7390 size = subupl_size;
7391 offset = subupl_offset;
7392 if(size == 0) {
7393 upl_unlock(vector_upl);
7394 return KERN_SUCCESS;
7395 }
7396 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
7397 if(upl == NULL) {
7398 upl_unlock(vector_upl);
7399 return KERN_FAILURE;
7400 }
7401 subupl_size -= size;
7402 subupl_offset += size;
7403 }
7404
7405 *empty = FALSE;
7406
7407 #if UPL_DEBUG
7408 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
7409 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
7410
7411 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
7412 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
7413 upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1;
7414
7415 upl->upl_commit_index++;
7416 }
7417 #endif
7418 if (upl->flags & UPL_DEVICE_MEMORY)
7419 xfer_size = 0;
7420 else if ((offset + size) <= upl->size)
7421 xfer_size = size;
7422 else {
7423 if(!isVectorUPL)
7424 upl_unlock(upl);
7425 else {
7426 upl_unlock(vector_upl);
7427 }
7428
7429 return KERN_FAILURE;
7430 }
7431 if (upl->flags & UPL_INTERNAL) {
7432 lite_list = (wpl_array_t)
7433 ((((uintptr_t)upl) + sizeof(struct upl))
7434 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
7435
7436 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
7437 } else {
7438 lite_list = (wpl_array_t)
7439 (((uintptr_t)upl) + sizeof(struct upl));
7440 }
7441 object = upl->map_object;
7442
7443 if (upl->flags & UPL_SHADOWED) {
7444 vm_object_lock(object);
7445 shadow_object = object->shadow;
7446 } else
7447 shadow_object = object;
7448
7449 entry = offset/PAGE_SIZE;
7450 target_offset = (vm_object_offset_t)offset;
7451
7452 assert(!(target_offset & PAGE_MASK));
7453 assert(!(xfer_size & PAGE_MASK));
7454
7455 if (upl->flags & UPL_KERNEL_OBJECT)
7456 vm_object_lock_shared(shadow_object);
7457 else
7458 vm_object_lock(shadow_object);
7459
7460 if (upl->flags & UPL_ACCESS_BLOCKED) {
7461 assert(shadow_object->blocked_access);
7462 shadow_object->blocked_access = FALSE;
7463 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
7464 }
7465
7466 dwp = &dw_array[0];
7467 dw_count = 0;
7468 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
7469
7470 if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT))
7471 panic("upl_abort_range: kernel_object being DUMPED");
7472
7473 while (xfer_size) {
7474 vm_page_t t, m;
7475 unsigned int pg_num;
7476 boolean_t needed;
7477
7478 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
7479 assert(pg_num == target_offset/PAGE_SIZE);
7480
7481 needed = FALSE;
7482
7483 if (user_page_list)
7484 needed = user_page_list[pg_num].needed;
7485
7486 dwp->dw_mask = 0;
7487 m = VM_PAGE_NULL;
7488
7489 if (upl->flags & UPL_LITE) {
7490
7491 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
7492 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
7493
7494 if ( !(upl->flags & UPL_KERNEL_OBJECT))
7495 m = vm_page_lookup(shadow_object, target_offset +
7496 (upl->offset - shadow_object->paging_offset));
7497 }
7498 }
7499 if (upl->flags & UPL_SHADOWED) {
7500 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
7501 t->pageout = FALSE;
7502
7503 VM_PAGE_FREE(t);
7504
7505 if (m == VM_PAGE_NULL)
7506 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
7507 }
7508 }
7509 if ((upl->flags & UPL_KERNEL_OBJECT))
7510 goto abort_next_page;
7511
7512 if (m != VM_PAGE_NULL) {
7513
7514 assert(!m->compressor);
7515
7516 if (m->absent) {
7517 boolean_t must_free = TRUE;
7518
7519 /*
7520 * COPYOUT = FALSE case
7521 * check for error conditions which must
7522 * be passed back to the pages customer
7523 */
7524 if (error & UPL_ABORT_RESTART) {
7525 m->restart = TRUE;
7526 m->absent = FALSE;
7527 m->unusual = TRUE;
7528 must_free = FALSE;
7529 } else if (error & UPL_ABORT_UNAVAILABLE) {
7530 m->restart = FALSE;
7531 m->unusual = TRUE;
7532 must_free = FALSE;
7533 } else if (error & UPL_ABORT_ERROR) {
7534 m->restart = FALSE;
7535 m->absent = FALSE;
7536 m->error = TRUE;
7537 m->unusual = TRUE;
7538 must_free = FALSE;
7539 }
7540 if (m->clustered && needed == FALSE) {
7541 /*
7542 * This page was a part of a speculative
7543 * read-ahead initiated by the kernel
7544 * itself. No one is expecting this
7545 * page and no one will clean up its
7546 * error state if it ever becomes valid
7547 * in the future.
7548 * We have to free it here.
7549 */
7550 must_free = TRUE;
7551 }
7552
7553 /*
7554 * ENCRYPTED SWAP:
7555 * If the page was already encrypted,
7556 * we don't really need to decrypt it
7557 * now. It will get decrypted later,
7558 * on demand, as soon as someone needs
7559 * to access its contents.
7560 */
7561
7562 m->cleaning = FALSE;
7563 m->encrypted_cleaning = FALSE;
7564
7565 if (m->overwriting && !m->busy) {
7566 /*
7567 * this shouldn't happen since
7568 * this is an 'absent' page, but
7569 * it doesn't hurt to check for
7570 * the 'alternate' method of
7571 * stabilizing the page...
7572 * we will mark 'busy' to be cleared
7573 * in the following code which will
7574 * take care of the primary stabilzation
7575 * method (i.e. setting 'busy' to TRUE)
7576 */
7577 dwp->dw_mask |= DW_vm_page_unwire;
7578 }
7579 m->overwriting = FALSE;
7580
7581 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
7582
7583 if (must_free == TRUE)
7584 dwp->dw_mask |= DW_vm_page_free;
7585 else
7586 dwp->dw_mask |= DW_vm_page_activate;
7587 } else {
7588 /*
7589 * Handle the trusted pager throttle.
7590 */
7591 if (m->laundry)
7592 dwp->dw_mask |= DW_vm_pageout_throttle_up;
7593
7594 if (upl->flags & UPL_ACCESS_BLOCKED) {
7595 /*
7596 * We blocked access to the pages in this UPL.
7597 * Clear the "busy" bit and wake up any waiter
7598 * for this page.
7599 */
7600 dwp->dw_mask |= DW_clear_busy;
7601 }
7602 if (m->overwriting) {
7603 if (m->busy)
7604 dwp->dw_mask |= DW_clear_busy;
7605 else {
7606 /*
7607 * deal with the 'alternate' method
7608 * of stabilizing the page...
7609 * we will either free the page
7610 * or mark 'busy' to be cleared
7611 * in the following code which will
7612 * take care of the primary stabilzation
7613 * method (i.e. setting 'busy' to TRUE)
7614 */
7615 dwp->dw_mask |= DW_vm_page_unwire;
7616 }
7617 m->overwriting = FALSE;
7618 }
7619 if (m->encrypted_cleaning == TRUE) {
7620 m->encrypted_cleaning = FALSE;
7621
7622 dwp->dw_mask |= DW_clear_busy;
7623 }
7624 m->pageout = FALSE;
7625 m->cleaning = FALSE;
7626 #if MACH_PAGEMAP
7627 vm_external_state_clr(m->object->existence_map, m->offset);
7628 #endif /* MACH_PAGEMAP */
7629 if (error & UPL_ABORT_DUMP_PAGES) {
7630 pmap_disconnect(m->phys_page);
7631
7632 dwp->dw_mask |= DW_vm_page_free;
7633 } else {
7634 if (!(dwp->dw_mask & DW_vm_page_unwire)) {
7635 if (error & UPL_ABORT_REFERENCE) {
7636 /*
7637 * we've been told to explictly
7638 * reference this page... for
7639 * file I/O, this is done by
7640 * implementing an LRU on the inactive q
7641 */
7642 dwp->dw_mask |= DW_vm_page_lru;
7643
7644 } else if (!m->active && !m->inactive && !m->speculative)
7645 dwp->dw_mask |= DW_vm_page_deactivate_internal;
7646 }
7647 dwp->dw_mask |= DW_PAGE_WAKEUP;
7648 }
7649 }
7650 }
7651 abort_next_page:
7652 target_offset += PAGE_SIZE_64;
7653 xfer_size -= PAGE_SIZE;
7654 entry++;
7655
7656 if (dwp->dw_mask) {
7657 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
7658 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
7659
7660 if (dw_count >= dw_limit) {
7661 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7662
7663 dwp = &dw_array[0];
7664 dw_count = 0;
7665 }
7666 } else {
7667 if (dwp->dw_mask & DW_clear_busy)
7668 m->busy = FALSE;
7669
7670 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7671 PAGE_WAKEUP(m);
7672 }
7673 }
7674 }
7675 if (dw_count)
7676 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7677
7678 occupied = 1;
7679
7680 if (upl->flags & UPL_DEVICE_MEMORY) {
7681 occupied = 0;
7682 } else if (upl->flags & UPL_LITE) {
7683 int pg_num;
7684 int i;
7685
7686 pg_num = upl->size/PAGE_SIZE;
7687 pg_num = (pg_num + 31) >> 5;
7688 occupied = 0;
7689
7690 for (i = 0; i < pg_num; i++) {
7691 if (lite_list[i] != 0) {
7692 occupied = 1;
7693 break;
7694 }
7695 }
7696 } else {
7697 if (queue_empty(&upl->map_object->memq))
7698 occupied = 0;
7699 }
7700 if (occupied == 0) {
7701 /*
7702 * If this UPL element belongs to a Vector UPL and is
7703 * empty, then this is the right function to deallocate
7704 * it. So go ahead set the *empty variable. The flag
7705 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7706 * should be considered relevant for the Vector UPL and
7707 * not the internal UPLs.
7708 */
7709 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7710 *empty = TRUE;
7711
7712 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7713 /*
7714 * this is not a paging object
7715 * so we need to drop the paging reference
7716 * that was taken when we created the UPL
7717 * against this object
7718 */
7719 vm_object_activity_end(shadow_object);
7720 vm_object_collapse(shadow_object, 0, TRUE);
7721 } else {
7722 /*
7723 * we dontated the paging reference to
7724 * the map object... vm_pageout_object_terminate
7725 * will drop this reference
7726 */
7727 }
7728 }
7729 vm_object_unlock(shadow_object);
7730 if (object != shadow_object)
7731 vm_object_unlock(object);
7732
7733 if(!isVectorUPL)
7734 upl_unlock(upl);
7735 else {
7736 /*
7737 * If we completed our operations on an UPL that is
7738 * part of a Vectored UPL and if empty is TRUE, then
7739 * we should go ahead and deallocate this UPL element.
7740 * Then we check if this was the last of the UPL elements
7741 * within that Vectored UPL. If so, set empty to TRUE
7742 * so that in ubc_upl_abort_range or ubc_upl_abort, we
7743 * can go ahead and deallocate the Vector UPL too.
7744 */
7745 if(*empty == TRUE) {
7746 *empty = vector_upl_set_subupl(vector_upl, upl,0);
7747 upl_deallocate(upl);
7748 }
7749 goto process_upl_to_abort;
7750 }
7751
7752 return KERN_SUCCESS;
7753 }
7754
7755
7756 kern_return_t
7757 upl_abort(
7758 upl_t upl,
7759 int error)
7760 {
7761 boolean_t empty;
7762
7763 return upl_abort_range(upl, 0, upl->size, error, &empty);
7764 }
7765
7766
7767 /* an option on commit should be wire */
7768 kern_return_t
7769 upl_commit(
7770 upl_t upl,
7771 upl_page_info_t *page_list,
7772 mach_msg_type_number_t count)
7773 {
7774 boolean_t empty;
7775
7776 return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
7777 }
7778
7779
7780 void
7781 iopl_valid_data(
7782 upl_t upl)
7783 {
7784 vm_object_t object;
7785 vm_offset_t offset;
7786 vm_page_t m, nxt_page = VM_PAGE_NULL;
7787 upl_size_t size;
7788 int wired_count = 0;
7789
7790 if (upl == NULL)
7791 panic("iopl_valid_data: NULL upl");
7792 if (vector_upl_is_valid(upl))
7793 panic("iopl_valid_data: vector upl");
7794 if ((upl->flags & (UPL_DEVICE_MEMORY|UPL_SHADOWED|UPL_ACCESS_BLOCKED|UPL_IO_WIRE|UPL_INTERNAL)) != UPL_IO_WIRE)
7795 panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags);
7796
7797 object = upl->map_object;
7798
7799 if (object == kernel_object || object == compressor_object)
7800 panic("iopl_valid_data: object == kernel or compressor");
7801
7802 if (object->purgable == VM_PURGABLE_VOLATILE)
7803 panic("iopl_valid_data: object == VM_PURGABLE_VOLATILE");
7804
7805 size = upl->size;
7806
7807 vm_object_lock(object);
7808
7809 if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE))
7810 nxt_page = (vm_page_t)queue_first(&object->memq);
7811 else
7812 offset = 0 + upl->offset - object->paging_offset;
7813
7814 while (size) {
7815
7816 if (nxt_page != VM_PAGE_NULL) {
7817 m = nxt_page;
7818 nxt_page = (vm_page_t)queue_next(&nxt_page->listq);
7819 } else {
7820 m = vm_page_lookup(object, offset);
7821 offset += PAGE_SIZE;
7822
7823 if (m == VM_PAGE_NULL)
7824 panic("iopl_valid_data: missing expected page at offset %lx", (long)offset);
7825 }
7826 if (m->busy) {
7827 if (!m->absent)
7828 panic("iopl_valid_data: busy page w/o absent");
7829
7830 if (m->pageq.next || m->pageq.prev)
7831 panic("iopl_valid_data: busy+absent page on page queue");
7832
7833 m->absent = FALSE;
7834 m->dirty = TRUE;
7835 m->wire_count++;
7836 wired_count++;
7837
7838 PAGE_WAKEUP_DONE(m);
7839 }
7840 size -= PAGE_SIZE;
7841 }
7842 if (wired_count) {
7843
7844 if (!object->wired_page_count) {
7845 VM_OBJECT_WIRED(object);
7846 }
7847 object->wired_page_count += wired_count;
7848
7849 vm_page_lockspin_queues();
7850 vm_page_wire_count += wired_count;
7851 vm_page_unlock_queues();
7852 }
7853 vm_object_unlock(object);
7854 }
7855
7856 void
7857 vm_object_set_pmap_cache_attr(
7858 vm_object_t object,
7859 upl_page_info_array_t user_page_list,
7860 unsigned int num_pages,
7861 boolean_t batch_pmap_op)
7862 {
7863 unsigned int cache_attr = 0;
7864
7865 cache_attr = object->wimg_bits & VM_WIMG_MASK;
7866 assert(user_page_list);
7867 if (cache_attr != VM_WIMG_USE_DEFAULT) {
7868 PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op);
7869 }
7870 }
7871
7872
7873 boolean_t vm_object_iopl_wire_full(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t);
7874 kern_return_t vm_object_iopl_wire_empty(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_object_offset_t *, int);
7875
7876
7877
7878 boolean_t
7879 vm_object_iopl_wire_full(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7880 wpl_array_t lite_list, upl_control_flags_t cntrl_flags)
7881 {
7882 vm_page_t dst_page;
7883 vm_tag_t tag;
7884 unsigned int entry;
7885 int page_count;
7886 int delayed_unlock = 0;
7887 boolean_t retval = TRUE;
7888
7889 vm_object_lock_assert_exclusive(object);
7890 assert(object->purgable != VM_PURGABLE_VOLATILE);
7891 assert(object->purgable != VM_PURGABLE_EMPTY);
7892 assert(object->pager == NULL);
7893 assert(object->copy == NULL);
7894 assert(object->shadow == NULL);
7895
7896 tag = UPL_MEMORY_TAG(cntrl_flags);
7897 page_count = object->resident_page_count;
7898 dst_page = (vm_page_t)queue_first(&object->memq);
7899
7900 vm_page_lock_queues();
7901
7902 while (page_count--) {
7903
7904 if (dst_page->busy ||
7905 dst_page->fictitious ||
7906 dst_page->absent ||
7907 dst_page->error ||
7908 dst_page->cleaning ||
7909 dst_page->restart ||
7910 dst_page->encrypted ||
7911 dst_page->laundry) {
7912 retval = FALSE;
7913 goto done;
7914 }
7915 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) {
7916 retval = FALSE;
7917 goto done;
7918 }
7919 dst_page->reference = TRUE;
7920
7921 vm_page_wire(dst_page, tag, FALSE);
7922
7923 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7924 SET_PAGE_DIRTY(dst_page, FALSE);
7925 }
7926 entry = (unsigned int)(dst_page->offset / PAGE_SIZE);
7927 assert(entry >= 0 && entry < object->resident_page_count);
7928 lite_list[entry>>5] |= 1 << (entry & 31);
7929
7930 if (dst_page->phys_page > upl->highest_page)
7931 upl->highest_page = dst_page->phys_page;
7932
7933 if (user_page_list) {
7934 user_page_list[entry].phys_addr = dst_page->phys_page;
7935 user_page_list[entry].absent = dst_page->absent;
7936 user_page_list[entry].dirty = dst_page->dirty;
7937 user_page_list[entry].pageout = dst_page->pageout;;
7938 user_page_list[entry].precious = dst_page->precious;
7939 user_page_list[entry].device = FALSE;
7940 user_page_list[entry].speculative = FALSE;
7941 user_page_list[entry].cs_validated = FALSE;
7942 user_page_list[entry].cs_tainted = FALSE;
7943 user_page_list[entry].cs_nx = FALSE;
7944 user_page_list[entry].needed = FALSE;
7945 user_page_list[entry].mark = FALSE;
7946 }
7947 if (delayed_unlock++ > 256) {
7948 delayed_unlock = 0;
7949 lck_mtx_yield(&vm_page_queue_lock);
7950
7951 VM_CHECK_MEMORYSTATUS;
7952 }
7953 dst_page = (vm_page_t)queue_next(&dst_page->listq);
7954 }
7955 done:
7956 vm_page_unlock_queues();
7957
7958 VM_CHECK_MEMORYSTATUS;
7959
7960 return (retval);
7961 }
7962
7963
7964 kern_return_t
7965 vm_object_iopl_wire_empty(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7966 wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_object_offset_t *dst_offset, int page_count)
7967 {
7968 vm_page_t dst_page;
7969 vm_tag_t tag;
7970 boolean_t no_zero_fill = FALSE;
7971 int interruptible;
7972 int pages_wired = 0;
7973 int pages_inserted = 0;
7974 int entry = 0;
7975 uint64_t delayed_ledger_update = 0;
7976 kern_return_t ret = KERN_SUCCESS;
7977
7978 vm_object_lock_assert_exclusive(object);
7979 assert(object->purgable != VM_PURGABLE_VOLATILE);
7980 assert(object->purgable != VM_PURGABLE_EMPTY);
7981 assert(object->pager == NULL);
7982 assert(object->copy == NULL);
7983 assert(object->shadow == NULL);
7984
7985 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
7986 interruptible = THREAD_ABORTSAFE;
7987 else
7988 interruptible = THREAD_UNINT;
7989
7990 if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
7991 no_zero_fill = TRUE;
7992
7993 tag = UPL_MEMORY_TAG(cntrl_flags);
7994
7995 while (page_count--) {
7996
7997 while ( (dst_page = vm_page_grab()) == VM_PAGE_NULL) {
7998
7999 OSAddAtomic(page_count, &vm_upl_wait_for_pages);
8000
8001 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
8002
8003 if (vm_page_wait(interruptible) == FALSE) {
8004 /*
8005 * interrupted case
8006 */
8007 OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
8008
8009 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
8010
8011 ret = MACH_SEND_INTERRUPTED;
8012 goto done;
8013 }
8014 OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
8015
8016 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
8017 }
8018 if (no_zero_fill == FALSE)
8019 vm_page_zero_fill(dst_page);
8020 else
8021 dst_page->absent = TRUE;
8022
8023 dst_page->reference = TRUE;
8024
8025 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8026 SET_PAGE_DIRTY(dst_page, FALSE);
8027 }
8028 if (dst_page->absent == FALSE) {
8029 dst_page->wire_count++;
8030 pages_wired++;
8031 PAGE_WAKEUP_DONE(dst_page);
8032 }
8033 pages_inserted++;
8034
8035 vm_page_insert_internal(dst_page, object, *dst_offset, tag, FALSE, TRUE, TRUE, TRUE, &delayed_ledger_update);
8036
8037 lite_list[entry>>5] |= 1 << (entry & 31);
8038
8039 if (dst_page->phys_page > upl->highest_page)
8040 upl->highest_page = dst_page->phys_page;
8041
8042 if (user_page_list) {
8043 user_page_list[entry].phys_addr = dst_page->phys_page;
8044 user_page_list[entry].absent = dst_page->absent;
8045 user_page_list[entry].dirty = dst_page->dirty;
8046 user_page_list[entry].pageout = FALSE;
8047 user_page_list[entry].precious = FALSE;
8048 user_page_list[entry].device = FALSE;
8049 user_page_list[entry].speculative = FALSE;
8050 user_page_list[entry].cs_validated = FALSE;
8051 user_page_list[entry].cs_tainted = FALSE;
8052 user_page_list[entry].cs_nx = FALSE;
8053 user_page_list[entry].needed = FALSE;
8054 user_page_list[entry].mark = FALSE;
8055 }
8056 entry++;
8057 *dst_offset += PAGE_SIZE_64;
8058 }
8059 done:
8060 if (pages_wired) {
8061 vm_page_lockspin_queues();
8062 vm_page_wire_count += pages_wired;
8063 vm_page_unlock_queues();
8064 }
8065 if (pages_inserted) {
8066 if (object->internal) {
8067 OSAddAtomic(pages_inserted, &vm_page_internal_count);
8068 } else {
8069 OSAddAtomic(pages_inserted, &vm_page_external_count);
8070 }
8071 }
8072 if (delayed_ledger_update) {
8073 task_t owner;
8074
8075 owner = object->vo_purgeable_owner;
8076 assert(owner);
8077
8078 /* more non-volatile bytes */
8079 ledger_credit(owner->ledger,
8080 task_ledgers.purgeable_nonvolatile,
8081 delayed_ledger_update);
8082 /* more footprint */
8083 ledger_credit(owner->ledger,
8084 task_ledgers.phys_footprint,
8085 delayed_ledger_update);
8086 }
8087 return (ret);
8088 }
8089
8090
8091 unsigned int vm_object_iopl_request_sleep_for_cleaning = 0;
8092
8093
8094 kern_return_t
8095 vm_object_iopl_request(
8096 vm_object_t object,
8097 vm_object_offset_t offset,
8098 upl_size_t size,
8099 upl_t *upl_ptr,
8100 upl_page_info_array_t user_page_list,
8101 unsigned int *page_list_count,
8102 upl_control_flags_t cntrl_flags)
8103 {
8104 vm_page_t dst_page;
8105 vm_object_offset_t dst_offset;
8106 upl_size_t xfer_size;
8107 upl_t upl = NULL;
8108 unsigned int entry;
8109 wpl_array_t lite_list = NULL;
8110 int no_zero_fill = FALSE;
8111 unsigned int size_in_pages;
8112 u_int32_t psize;
8113 kern_return_t ret;
8114 vm_prot_t prot;
8115 struct vm_object_fault_info fault_info;
8116 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
8117 struct vm_page_delayed_work *dwp;
8118 int dw_count;
8119 int dw_limit;
8120 int dw_index;
8121 boolean_t caller_lookup;
8122 int io_tracking_flag = 0;
8123 int interruptible;
8124
8125 boolean_t set_cache_attr_needed = FALSE;
8126 boolean_t free_wired_pages = FALSE;
8127 boolean_t fast_path_empty_req = FALSE;
8128 boolean_t fast_path_full_req = FALSE;
8129
8130 if (cntrl_flags & ~UPL_VALID_FLAGS) {
8131 /*
8132 * For forward compatibility's sake,
8133 * reject any unknown flag.
8134 */
8135 return KERN_INVALID_VALUE;
8136 }
8137 if (vm_lopage_needed == FALSE)
8138 cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
8139
8140 if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
8141 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE))
8142 return KERN_INVALID_VALUE;
8143
8144 if (object->phys_contiguous) {
8145 if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
8146 return KERN_INVALID_ADDRESS;
8147
8148 if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
8149 return KERN_INVALID_ADDRESS;
8150 }
8151 }
8152
8153 if (cntrl_flags & UPL_ENCRYPT) {
8154 /*
8155 * ENCRYPTED SWAP:
8156 * The paging path doesn't use this interface,
8157 * so we don't support the UPL_ENCRYPT flag
8158 * here. We won't encrypt the pages.
8159 */
8160 assert(! (cntrl_flags & UPL_ENCRYPT));
8161 }
8162 if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
8163 no_zero_fill = TRUE;
8164
8165 if (cntrl_flags & UPL_COPYOUT_FROM)
8166 prot = VM_PROT_READ;
8167 else
8168 prot = VM_PROT_READ | VM_PROT_WRITE;
8169
8170 if ((!object->internal) && (object->paging_offset != 0))
8171 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
8172
8173 #if CONFIG_IOSCHED || UPL_DEBUG
8174 if ((object->io_tracking && object != kernel_object) || upl_debug_enabled)
8175 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
8176 #endif
8177
8178 #if CONFIG_IOSCHED
8179 if (object->io_tracking) {
8180 /* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */
8181 if (object != kernel_object)
8182 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
8183 }
8184 #endif
8185
8186 if (object->phys_contiguous)
8187 psize = PAGE_SIZE;
8188 else
8189 psize = size;
8190
8191 if (cntrl_flags & UPL_SET_INTERNAL) {
8192 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8193
8194 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8195 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
8196 ((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
8197 if (size == 0) {
8198 user_page_list = NULL;
8199 lite_list = NULL;
8200 }
8201 } else {
8202 upl = upl_create(UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8203
8204 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
8205 if (size == 0) {
8206 lite_list = NULL;
8207 }
8208 }
8209 if (user_page_list)
8210 user_page_list[0].device = FALSE;
8211 *upl_ptr = upl;
8212
8213 upl->map_object = object;
8214 upl->size = size;
8215
8216 size_in_pages = size / PAGE_SIZE;
8217
8218 if (object == kernel_object &&
8219 !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) {
8220 upl->flags |= UPL_KERNEL_OBJECT;
8221 #if UPL_DEBUG
8222 vm_object_lock(object);
8223 #else
8224 vm_object_lock_shared(object);
8225 #endif
8226 } else {
8227 vm_object_lock(object);
8228 vm_object_activity_begin(object);
8229 }
8230 /*
8231 * paging in progress also protects the paging_offset
8232 */
8233 upl->offset = offset + object->paging_offset;
8234
8235 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8236 /*
8237 * The user requested that access to the pages in this UPL
8238 * be blocked until the UPL is commited or aborted.
8239 */
8240 upl->flags |= UPL_ACCESS_BLOCKED;
8241 }
8242
8243 #if CONFIG_IOSCHED || UPL_DEBUG
8244 if (upl->flags & UPL_TRACKED_BY_OBJECT) {
8245 vm_object_activity_begin(object);
8246 queue_enter(&object->uplq, upl, upl_t, uplq);
8247 }
8248 #endif
8249
8250 if (object->phys_contiguous) {
8251
8252 if (upl->flags & UPL_ACCESS_BLOCKED) {
8253 assert(!object->blocked_access);
8254 object->blocked_access = TRUE;
8255 }
8256
8257 vm_object_unlock(object);
8258
8259 /*
8260 * don't need any shadow mappings for this one
8261 * since it is already I/O memory
8262 */
8263 upl->flags |= UPL_DEVICE_MEMORY;
8264
8265 upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1)>>PAGE_SHIFT);
8266
8267 if (user_page_list) {
8268 user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT);
8269 user_page_list[0].device = TRUE;
8270 }
8271 if (page_list_count != NULL) {
8272 if (upl->flags & UPL_INTERNAL)
8273 *page_list_count = 0;
8274 else
8275 *page_list_count = 1;
8276 }
8277 return KERN_SUCCESS;
8278 }
8279 if (object != kernel_object && object != compressor_object) {
8280 /*
8281 * Protect user space from future COW operations
8282 */
8283 #if VM_OBJECT_TRACKING_OP_TRUESHARE
8284 if (!object->true_share &&
8285 vm_object_tracking_inited) {
8286 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
8287 int num = 0;
8288
8289 num = OSBacktrace(bt,
8290 VM_OBJECT_TRACKING_BTDEPTH);
8291 btlog_add_entry(vm_object_tracking_btlog,
8292 object,
8293 VM_OBJECT_TRACKING_OP_TRUESHARE,
8294 bt,
8295 num);
8296 }
8297 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
8298
8299 object->true_share = TRUE;
8300
8301 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
8302 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8303 }
8304
8305 if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
8306 object->copy != VM_OBJECT_NULL) {
8307 /*
8308 * Honor copy-on-write obligations
8309 *
8310 * The caller is gathering these pages and
8311 * might modify their contents. We need to
8312 * make sure that the copy object has its own
8313 * private copies of these pages before we let
8314 * the caller modify them.
8315 *
8316 * NOTE: someone else could map the original object
8317 * after we've done this copy-on-write here, and they
8318 * could then see an inconsistent picture of the memory
8319 * while it's being modified via the UPL. To prevent this,
8320 * we would have to block access to these pages until the
8321 * UPL is released. We could use the UPL_BLOCK_ACCESS
8322 * code path for that...
8323 */
8324 vm_object_update(object,
8325 offset,
8326 size,
8327 NULL,
8328 NULL,
8329 FALSE, /* should_return */
8330 MEMORY_OBJECT_COPY_SYNC,
8331 VM_PROT_NO_CHANGE);
8332 #if DEVELOPMENT || DEBUG
8333 iopl_cow++;
8334 iopl_cow_pages += size >> PAGE_SHIFT;
8335 #endif
8336 }
8337 if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) &&
8338 object->purgable != VM_PURGABLE_VOLATILE &&
8339 object->purgable != VM_PURGABLE_EMPTY &&
8340 object->copy == NULL &&
8341 size == object->vo_size &&
8342 offset == 0 &&
8343 object->shadow == NULL &&
8344 object->pager == NULL)
8345 {
8346 if (object->resident_page_count == size_in_pages)
8347 {
8348 assert(object != compressor_object);
8349 assert(object != kernel_object);
8350 fast_path_full_req = TRUE;
8351 }
8352 else if (object->resident_page_count == 0)
8353 {
8354 assert(object != compressor_object);
8355 assert(object != kernel_object);
8356 fast_path_empty_req = TRUE;
8357 set_cache_attr_needed = TRUE;
8358 }
8359 }
8360
8361 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
8362 interruptible = THREAD_ABORTSAFE;
8363 else
8364 interruptible = THREAD_UNINT;
8365
8366 entry = 0;
8367
8368 xfer_size = size;
8369 dst_offset = offset;
8370 dw_count = 0;
8371
8372 if (fast_path_full_req) {
8373
8374 if (vm_object_iopl_wire_full(object, upl, user_page_list, lite_list, cntrl_flags) == TRUE)
8375 goto finish;
8376 /*
8377 * we couldn't complete the processing of this request on the fast path
8378 * so fall through to the slow path and finish up
8379 */
8380
8381 } else if (fast_path_empty_req) {
8382
8383 if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8384 ret = KERN_MEMORY_ERROR;
8385 goto return_err;
8386 }
8387 ret = vm_object_iopl_wire_empty(object, upl, user_page_list, lite_list, cntrl_flags, &dst_offset, size_in_pages);
8388
8389 if (ret) {
8390 free_wired_pages = TRUE;
8391 goto return_err;
8392 }
8393 goto finish;
8394 }
8395
8396 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
8397 fault_info.user_tag = 0;
8398 fault_info.lo_offset = offset;
8399 fault_info.hi_offset = offset + xfer_size;
8400 fault_info.no_cache = FALSE;
8401 fault_info.stealth = FALSE;
8402 fault_info.io_sync = FALSE;
8403 fault_info.cs_bypass = FALSE;
8404 fault_info.mark_zf_absent = TRUE;
8405 fault_info.interruptible = interruptible;
8406 fault_info.batch_pmap_op = TRUE;
8407
8408 dwp = &dw_array[0];
8409 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
8410
8411 while (xfer_size) {
8412 vm_fault_return_t result;
8413
8414 dwp->dw_mask = 0;
8415
8416 if (fast_path_full_req) {
8417 /*
8418 * if we get here, it means that we ran into a page
8419 * state we couldn't handle in the fast path and
8420 * bailed out to the slow path... since the order
8421 * we look at pages is different between the 2 paths,
8422 * the following check is needed to determine whether
8423 * this page was already processed in the fast path
8424 */
8425 if (lite_list[entry>>5] & (1 << (entry & 31)))
8426 goto skip_page;
8427 }
8428 dst_page = vm_page_lookup(object, dst_offset);
8429
8430 /*
8431 * ENCRYPTED SWAP:
8432 * If the page is encrypted, we need to decrypt it,
8433 * so force a soft page fault.
8434 */
8435 if (dst_page == VM_PAGE_NULL ||
8436 dst_page->busy ||
8437 dst_page->encrypted ||
8438 dst_page->error ||
8439 dst_page->restart ||
8440 dst_page->absent ||
8441 dst_page->fictitious) {
8442
8443 if (object == kernel_object)
8444 panic("vm_object_iopl_request: missing/bad page in kernel object\n");
8445 if (object == compressor_object)
8446 panic("vm_object_iopl_request: missing/bad page in compressor object\n");
8447
8448 if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8449 ret = KERN_MEMORY_ERROR;
8450 goto return_err;
8451 }
8452 set_cache_attr_needed = TRUE;
8453
8454 /*
8455 * We just looked up the page and the result remains valid
8456 * until the object lock is release, so send it to
8457 * vm_fault_page() (as "dst_page"), to avoid having to
8458 * look it up again there.
8459 */
8460 caller_lookup = TRUE;
8461
8462 do {
8463 vm_page_t top_page;
8464 kern_return_t error_code;
8465
8466 fault_info.cluster_size = xfer_size;
8467
8468 vm_object_paging_begin(object);
8469
8470 result = vm_fault_page(object, dst_offset,
8471 prot | VM_PROT_WRITE, FALSE,
8472 caller_lookup,
8473 &prot, &dst_page, &top_page,
8474 (int *)0,
8475 &error_code, no_zero_fill,
8476 FALSE, &fault_info);
8477
8478 /* our lookup is no longer valid at this point */
8479 caller_lookup = FALSE;
8480
8481 switch (result) {
8482
8483 case VM_FAULT_SUCCESS:
8484
8485 if ( !dst_page->absent) {
8486 PAGE_WAKEUP_DONE(dst_page);
8487 } else {
8488 /*
8489 * we only get back an absent page if we
8490 * requested that it not be zero-filled
8491 * because we are about to fill it via I/O
8492 *
8493 * absent pages should be left BUSY
8494 * to prevent them from being faulted
8495 * into an address space before we've
8496 * had a chance to complete the I/O on
8497 * them since they may contain info that
8498 * shouldn't be seen by the faulting task
8499 */
8500 }
8501 /*
8502 * Release paging references and
8503 * top-level placeholder page, if any.
8504 */
8505 if (top_page != VM_PAGE_NULL) {
8506 vm_object_t local_object;
8507
8508 local_object = top_page->object;
8509
8510 if (top_page->object != dst_page->object) {
8511 vm_object_lock(local_object);
8512 VM_PAGE_FREE(top_page);
8513 vm_object_paging_end(local_object);
8514 vm_object_unlock(local_object);
8515 } else {
8516 VM_PAGE_FREE(top_page);
8517 vm_object_paging_end(local_object);
8518 }
8519 }
8520 vm_object_paging_end(object);
8521 break;
8522
8523 case VM_FAULT_RETRY:
8524 vm_object_lock(object);
8525 break;
8526
8527 case VM_FAULT_MEMORY_SHORTAGE:
8528 OSAddAtomic((size_in_pages - entry), &vm_upl_wait_for_pages);
8529
8530 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
8531
8532 if (vm_page_wait(interruptible)) {
8533 OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8534
8535 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
8536 vm_object_lock(object);
8537
8538 break;
8539 }
8540 OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8541
8542 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
8543
8544 /* fall thru */
8545
8546 case VM_FAULT_INTERRUPTED:
8547 error_code = MACH_SEND_INTERRUPTED;
8548 case VM_FAULT_MEMORY_ERROR:
8549 memory_error:
8550 ret = (error_code ? error_code: KERN_MEMORY_ERROR);
8551
8552 vm_object_lock(object);
8553 goto return_err;
8554
8555 case VM_FAULT_SUCCESS_NO_VM_PAGE:
8556 /* success but no page: fail */
8557 vm_object_paging_end(object);
8558 vm_object_unlock(object);
8559 goto memory_error;
8560
8561 default:
8562 panic("vm_object_iopl_request: unexpected error"
8563 " 0x%x from vm_fault_page()\n", result);
8564 }
8565 } while (result != VM_FAULT_SUCCESS);
8566
8567 }
8568 if (upl->flags & UPL_KERNEL_OBJECT)
8569 goto record_phys_addr;
8570
8571 if (dst_page->compressor) {
8572 dst_page->busy = TRUE;
8573 goto record_phys_addr;
8574 }
8575
8576 if (dst_page->cleaning) {
8577 /*
8578 * Someone else is cleaning this page in place.
8579 * In theory, we should be able to proceed and use this
8580 * page but they'll probably end up clearing the "busy"
8581 * bit on it in upl_commit_range() but they didn't set
8582 * it, so they would clear our "busy" bit and open
8583 * us to race conditions.
8584 * We'd better wait for the cleaning to complete and
8585 * then try again.
8586 */
8587 vm_object_iopl_request_sleep_for_cleaning++;
8588 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
8589 continue;
8590 }
8591 if (dst_page->laundry) {
8592 dst_page->pageout = FALSE;
8593
8594 vm_pageout_steal_laundry(dst_page, FALSE);
8595 }
8596 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
8597 dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
8598 vm_page_t low_page;
8599 int refmod;
8600
8601 /*
8602 * support devices that can't DMA above 32 bits
8603 * by substituting pages from a pool of low address
8604 * memory for any pages we find above the 4G mark
8605 * can't substitute if the page is already wired because
8606 * we don't know whether that physical address has been
8607 * handed out to some other 64 bit capable DMA device to use
8608 */
8609 if (VM_PAGE_WIRED(dst_page)) {
8610 ret = KERN_PROTECTION_FAILURE;
8611 goto return_err;
8612 }
8613 low_page = vm_page_grablo();
8614
8615 if (low_page == VM_PAGE_NULL) {
8616 ret = KERN_RESOURCE_SHORTAGE;
8617 goto return_err;
8618 }
8619 /*
8620 * from here until the vm_page_replace completes
8621 * we musn't drop the object lock... we don't
8622 * want anyone refaulting this page in and using
8623 * it after we disconnect it... we want the fault
8624 * to find the new page being substituted.
8625 */
8626 if (dst_page->pmapped)
8627 refmod = pmap_disconnect(dst_page->phys_page);
8628 else
8629 refmod = 0;
8630
8631 if (!dst_page->absent)
8632 vm_page_copy(dst_page, low_page);
8633
8634 low_page->reference = dst_page->reference;
8635 low_page->dirty = dst_page->dirty;
8636 low_page->absent = dst_page->absent;
8637
8638 if (refmod & VM_MEM_REFERENCED)
8639 low_page->reference = TRUE;
8640 if (refmod & VM_MEM_MODIFIED) {
8641 SET_PAGE_DIRTY(low_page, FALSE);
8642 }
8643
8644 vm_page_replace(low_page, object, dst_offset);
8645
8646 dst_page = low_page;
8647 /*
8648 * vm_page_grablo returned the page marked
8649 * BUSY... we don't need a PAGE_WAKEUP_DONE
8650 * here, because we've never dropped the object lock
8651 */
8652 if ( !dst_page->absent)
8653 dst_page->busy = FALSE;
8654 }
8655 if ( !dst_page->busy)
8656 dwp->dw_mask |= DW_vm_page_wire;
8657
8658 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8659 /*
8660 * Mark the page "busy" to block any future page fault
8661 * on this page in addition to wiring it.
8662 * We'll also remove the mapping
8663 * of all these pages before leaving this routine.
8664 */
8665 assert(!dst_page->fictitious);
8666 dst_page->busy = TRUE;
8667 }
8668 /*
8669 * expect the page to be used
8670 * page queues lock must be held to set 'reference'
8671 */
8672 dwp->dw_mask |= DW_set_reference;
8673
8674 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8675 SET_PAGE_DIRTY(dst_page, TRUE);
8676 }
8677 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) {
8678 pmap_sync_page_attributes_phys(dst_page->phys_page);
8679 dst_page->written_by_kernel = FALSE;
8680 }
8681
8682 record_phys_addr:
8683 if (dst_page->busy)
8684 upl->flags |= UPL_HAS_BUSY;
8685
8686 lite_list[entry>>5] |= 1 << (entry & 31);
8687
8688 if (dst_page->phys_page > upl->highest_page)
8689 upl->highest_page = dst_page->phys_page;
8690
8691 if (user_page_list) {
8692 user_page_list[entry].phys_addr = dst_page->phys_page;
8693 user_page_list[entry].pageout = dst_page->pageout;
8694 user_page_list[entry].absent = dst_page->absent;
8695 user_page_list[entry].dirty = dst_page->dirty;
8696 user_page_list[entry].precious = dst_page->precious;
8697 user_page_list[entry].device = FALSE;
8698 user_page_list[entry].needed = FALSE;
8699 if (dst_page->clustered == TRUE)
8700 user_page_list[entry].speculative = dst_page->speculative;
8701 else
8702 user_page_list[entry].speculative = FALSE;
8703 user_page_list[entry].cs_validated = dst_page->cs_validated;
8704 user_page_list[entry].cs_tainted = dst_page->cs_tainted;
8705 user_page_list[entry].cs_nx = dst_page->cs_nx;
8706 user_page_list[entry].mark = FALSE;
8707 }
8708 if (object != kernel_object && object != compressor_object) {
8709 /*
8710 * someone is explicitly grabbing this page...
8711 * update clustered and speculative state
8712 *
8713 */
8714 if (dst_page->clustered)
8715 VM_PAGE_CONSUME_CLUSTERED(dst_page);
8716 }
8717 skip_page:
8718 entry++;
8719 dst_offset += PAGE_SIZE_64;
8720 xfer_size -= PAGE_SIZE;
8721
8722 if (dwp->dw_mask) {
8723 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
8724
8725 if (dw_count >= dw_limit) {
8726 vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
8727
8728 dwp = &dw_array[0];
8729 dw_count = 0;
8730 }
8731 }
8732 }
8733 assert(entry == size_in_pages);
8734
8735 if (dw_count)
8736 vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
8737 finish:
8738 if (user_page_list && set_cache_attr_needed == TRUE)
8739 vm_object_set_pmap_cache_attr(object, user_page_list, size_in_pages, TRUE);
8740
8741 if (page_list_count != NULL) {
8742 if (upl->flags & UPL_INTERNAL)
8743 *page_list_count = 0;
8744 else if (*page_list_count > size_in_pages)
8745 *page_list_count = size_in_pages;
8746 }
8747 vm_object_unlock(object);
8748
8749 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8750 /*
8751 * We've marked all the pages "busy" so that future
8752 * page faults will block.
8753 * Now remove the mapping for these pages, so that they
8754 * can't be accessed without causing a page fault.
8755 */
8756 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
8757 PMAP_NULL, 0, VM_PROT_NONE);
8758 assert(!object->blocked_access);
8759 object->blocked_access = TRUE;
8760 }
8761
8762 return KERN_SUCCESS;
8763
8764 return_err:
8765 dw_index = 0;
8766
8767 for (; offset < dst_offset; offset += PAGE_SIZE) {
8768 boolean_t need_unwire;
8769
8770 dst_page = vm_page_lookup(object, offset);
8771
8772 if (dst_page == VM_PAGE_NULL)
8773 panic("vm_object_iopl_request: Wired page missing. \n");
8774
8775 /*
8776 * if we've already processed this page in an earlier
8777 * dw_do_work, we need to undo the wiring... we will
8778 * leave the dirty and reference bits on if they
8779 * were set, since we don't have a good way of knowing
8780 * what the previous state was and we won't get here
8781 * under any normal circumstances... we will always
8782 * clear BUSY and wakeup any waiters via vm_page_free
8783 * or PAGE_WAKEUP_DONE
8784 */
8785 need_unwire = TRUE;
8786
8787 if (dw_count) {
8788 if (dw_array[dw_index].dw_m == dst_page) {
8789 /*
8790 * still in the deferred work list
8791 * which means we haven't yet called
8792 * vm_page_wire on this page
8793 */
8794 need_unwire = FALSE;
8795
8796 dw_index++;
8797 dw_count--;
8798 }
8799 }
8800 vm_page_lock_queues();
8801
8802 if (dst_page->absent || free_wired_pages == TRUE) {
8803 vm_page_free(dst_page);
8804
8805 need_unwire = FALSE;
8806 } else {
8807 if (need_unwire == TRUE)
8808 vm_page_unwire(dst_page, TRUE);
8809
8810 PAGE_WAKEUP_DONE(dst_page);
8811 }
8812 vm_page_unlock_queues();
8813
8814 if (need_unwire == TRUE)
8815 VM_STAT_INCR(reactivations);
8816 }
8817 #if UPL_DEBUG
8818 upl->upl_state = 2;
8819 #endif
8820 if (! (upl->flags & UPL_KERNEL_OBJECT)) {
8821 vm_object_activity_end(object);
8822 vm_object_collapse(object, 0, TRUE);
8823 }
8824 vm_object_unlock(object);
8825 upl_destroy(upl);
8826
8827 return ret;
8828 }
8829
8830 kern_return_t
8831 upl_transpose(
8832 upl_t upl1,
8833 upl_t upl2)
8834 {
8835 kern_return_t retval;
8836 boolean_t upls_locked;
8837 vm_object_t object1, object2;
8838
8839 if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2 || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR) || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) {
8840 return KERN_INVALID_ARGUMENT;
8841 }
8842
8843 upls_locked = FALSE;
8844
8845 /*
8846 * Since we need to lock both UPLs at the same time,
8847 * avoid deadlocks by always taking locks in the same order.
8848 */
8849 if (upl1 < upl2) {
8850 upl_lock(upl1);
8851 upl_lock(upl2);
8852 } else {
8853 upl_lock(upl2);
8854 upl_lock(upl1);
8855 }
8856 upls_locked = TRUE; /* the UPLs will need to be unlocked */
8857
8858 object1 = upl1->map_object;
8859 object2 = upl2->map_object;
8860
8861 if (upl1->offset != 0 || upl2->offset != 0 ||
8862 upl1->size != upl2->size) {
8863 /*
8864 * We deal only with full objects, not subsets.
8865 * That's because we exchange the entire backing store info
8866 * for the objects: pager, resident pages, etc... We can't do
8867 * only part of it.
8868 */
8869 retval = KERN_INVALID_VALUE;
8870 goto done;
8871 }
8872
8873 /*
8874 * Tranpose the VM objects' backing store.
8875 */
8876 retval = vm_object_transpose(object1, object2,
8877 (vm_object_size_t) upl1->size);
8878
8879 if (retval == KERN_SUCCESS) {
8880 /*
8881 * Make each UPL point to the correct VM object, i.e. the
8882 * object holding the pages that the UPL refers to...
8883 */
8884 #if CONFIG_IOSCHED || UPL_DEBUG
8885 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8886 vm_object_lock(object1);
8887 vm_object_lock(object2);
8888 }
8889 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8890 queue_remove(&object1->uplq, upl1, upl_t, uplq);
8891 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8892 queue_remove(&object2->uplq, upl2, upl_t, uplq);
8893 #endif
8894 upl1->map_object = object2;
8895 upl2->map_object = object1;
8896
8897 #if CONFIG_IOSCHED || UPL_DEBUG
8898 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8899 queue_enter(&object2->uplq, upl1, upl_t, uplq);
8900 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8901 queue_enter(&object1->uplq, upl2, upl_t, uplq);
8902 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8903 vm_object_unlock(object2);
8904 vm_object_unlock(object1);
8905 }
8906 #endif
8907 }
8908
8909 done:
8910 /*
8911 * Cleanup.
8912 */
8913 if (upls_locked) {
8914 upl_unlock(upl1);
8915 upl_unlock(upl2);
8916 upls_locked = FALSE;
8917 }
8918
8919 return retval;
8920 }
8921
8922 void
8923 upl_range_needed(
8924 upl_t upl,
8925 int index,
8926 int count)
8927 {
8928 upl_page_info_t *user_page_list;
8929 int size_in_pages;
8930
8931 if ( !(upl->flags & UPL_INTERNAL) || count <= 0)
8932 return;
8933
8934 size_in_pages = upl->size / PAGE_SIZE;
8935
8936 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8937
8938 while (count-- && index < size_in_pages)
8939 user_page_list[index++].needed = TRUE;
8940 }
8941
8942
8943 /*
8944 * ENCRYPTED SWAP:
8945 *
8946 * Rationale: the user might have some encrypted data on disk (via
8947 * FileVault or any other mechanism). That data is then decrypted in
8948 * memory, which is safe as long as the machine is secure. But that
8949 * decrypted data in memory could be paged out to disk by the default
8950 * pager. The data would then be stored on disk in clear (not encrypted)
8951 * and it could be accessed by anyone who gets physical access to the
8952 * disk (if the laptop or the disk gets stolen for example). This weakens
8953 * the security offered by FileVault.
8954 *
8955 * Solution: the default pager will optionally request that all the
8956 * pages it gathers for pageout be encrypted, via the UPL interfaces,
8957 * before it sends this UPL to disk via the vnode_pageout() path.
8958 *
8959 * Notes:
8960 *
8961 * To avoid disrupting the VM LRU algorithms, we want to keep the
8962 * clean-in-place mechanisms, which allow us to send some extra pages to
8963 * swap (clustering) without actually removing them from the user's
8964 * address space. We don't want the user to unknowingly access encrypted
8965 * data, so we have to actually remove the encrypted pages from the page
8966 * table. When the user accesses the data, the hardware will fail to
8967 * locate the virtual page in its page table and will trigger a page
8968 * fault. We can then decrypt the page and enter it in the page table
8969 * again. Whenever we allow the user to access the contents of a page,
8970 * we have to make sure it's not encrypted.
8971 *
8972 *
8973 */
8974 /*
8975 * ENCRYPTED SWAP:
8976 * Reserve of virtual addresses in the kernel address space.
8977 * We need to map the physical pages in the kernel, so that we
8978 * can call the encryption/decryption routines with a kernel
8979 * virtual address. We keep this pool of pre-allocated kernel
8980 * virtual addresses so that we don't have to scan the kernel's
8981 * virtaul address space each time we need to encrypt or decrypt
8982 * a physical page.
8983 * It would be nice to be able to encrypt and decrypt in physical
8984 * mode but that might not always be more efficient...
8985 */
8986 decl_simple_lock_data(,vm_paging_lock)
8987 #define VM_PAGING_NUM_PAGES 64
8988 vm_map_offset_t vm_paging_base_address = 0;
8989 boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
8990 int vm_paging_max_index = 0;
8991 int vm_paging_page_waiter = 0;
8992 int vm_paging_page_waiter_total = 0;
8993 unsigned long vm_paging_no_kernel_page = 0;
8994 unsigned long vm_paging_objects_mapped = 0;
8995 unsigned long vm_paging_pages_mapped = 0;
8996 unsigned long vm_paging_objects_mapped_slow = 0;
8997 unsigned long vm_paging_pages_mapped_slow = 0;
8998
8999 void
9000 vm_paging_map_init(void)
9001 {
9002 kern_return_t kr;
9003 vm_map_offset_t page_map_offset;
9004 vm_map_entry_t map_entry;
9005
9006 assert(vm_paging_base_address == 0);
9007
9008 /*
9009 * Initialize our pool of pre-allocated kernel
9010 * virtual addresses.
9011 */
9012 page_map_offset = 0;
9013 kr = vm_map_find_space(kernel_map,
9014 &page_map_offset,
9015 VM_PAGING_NUM_PAGES * PAGE_SIZE,
9016 0,
9017 0,
9018 &map_entry);
9019 if (kr != KERN_SUCCESS) {
9020 panic("vm_paging_map_init: kernel_map full\n");
9021 }
9022 VME_OBJECT_SET(map_entry, kernel_object);
9023 VME_OFFSET_SET(map_entry, page_map_offset);
9024 map_entry->protection = VM_PROT_NONE;
9025 map_entry->max_protection = VM_PROT_NONE;
9026 map_entry->permanent = TRUE;
9027 vm_object_reference(kernel_object);
9028 vm_map_unlock(kernel_map);
9029
9030 assert(vm_paging_base_address == 0);
9031 vm_paging_base_address = page_map_offset;
9032 }
9033
9034 /*
9035 * ENCRYPTED SWAP:
9036 * vm_paging_map_object:
9037 * Maps part of a VM object's pages in the kernel
9038 * virtual address space, using the pre-allocated
9039 * kernel virtual addresses, if possible.
9040 * Context:
9041 * The VM object is locked. This lock will get
9042 * dropped and re-acquired though, so the caller
9043 * must make sure the VM object is kept alive
9044 * (by holding a VM map that has a reference
9045 * on it, for example, or taking an extra reference).
9046 * The page should also be kept busy to prevent
9047 * it from being reclaimed.
9048 */
9049 kern_return_t
9050 vm_paging_map_object(
9051 vm_page_t page,
9052 vm_object_t object,
9053 vm_object_offset_t offset,
9054 vm_prot_t protection,
9055 boolean_t can_unlock_object,
9056 vm_map_size_t *size, /* IN/OUT */
9057 vm_map_offset_t *address, /* OUT */
9058 boolean_t *need_unmap) /* OUT */
9059 {
9060 kern_return_t kr;
9061 vm_map_offset_t page_map_offset;
9062 vm_map_size_t map_size;
9063 vm_object_offset_t object_offset;
9064 int i;
9065
9066 if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
9067 /* use permanent 1-to-1 kernel mapping of physical memory ? */
9068 #if __x86_64__
9069 *address = (vm_map_offset_t)
9070 PHYSMAP_PTOV((pmap_paddr_t)page->phys_page <<
9071 PAGE_SHIFT);
9072 *need_unmap = FALSE;
9073 return KERN_SUCCESS;
9074 #else
9075 #warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..."
9076 #endif
9077
9078 assert(page->busy);
9079 /*
9080 * Use one of the pre-allocated kernel virtual addresses
9081 * and just enter the VM page in the kernel address space
9082 * at that virtual address.
9083 */
9084 simple_lock(&vm_paging_lock);
9085
9086 /*
9087 * Try and find an available kernel virtual address
9088 * from our pre-allocated pool.
9089 */
9090 page_map_offset = 0;
9091 for (;;) {
9092 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
9093 if (vm_paging_page_inuse[i] == FALSE) {
9094 page_map_offset =
9095 vm_paging_base_address +
9096 (i * PAGE_SIZE);
9097 break;
9098 }
9099 }
9100 if (page_map_offset != 0) {
9101 /* found a space to map our page ! */
9102 break;
9103 }
9104
9105 if (can_unlock_object) {
9106 /*
9107 * If we can afford to unlock the VM object,
9108 * let's take the slow path now...
9109 */
9110 break;
9111 }
9112 /*
9113 * We can't afford to unlock the VM object, so
9114 * let's wait for a space to become available...
9115 */
9116 vm_paging_page_waiter_total++;
9117 vm_paging_page_waiter++;
9118 kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT);
9119 if (kr == THREAD_WAITING) {
9120 simple_unlock(&vm_paging_lock);
9121 kr = thread_block(THREAD_CONTINUE_NULL);
9122 simple_lock(&vm_paging_lock);
9123 }
9124 vm_paging_page_waiter--;
9125 /* ... and try again */
9126 }
9127
9128 if (page_map_offset != 0) {
9129 /*
9130 * We found a kernel virtual address;
9131 * map the physical page to that virtual address.
9132 */
9133 if (i > vm_paging_max_index) {
9134 vm_paging_max_index = i;
9135 }
9136 vm_paging_page_inuse[i] = TRUE;
9137 simple_unlock(&vm_paging_lock);
9138
9139 page->pmapped = TRUE;
9140
9141 /*
9142 * Keep the VM object locked over the PMAP_ENTER
9143 * and the actual use of the page by the kernel,
9144 * or this pmap mapping might get undone by a
9145 * vm_object_pmap_protect() call...
9146 */
9147 PMAP_ENTER(kernel_pmap,
9148 page_map_offset,
9149 page,
9150 protection,
9151 VM_PROT_NONE,
9152 0,
9153 TRUE);
9154 vm_paging_objects_mapped++;
9155 vm_paging_pages_mapped++;
9156 *address = page_map_offset;
9157 *need_unmap = TRUE;
9158
9159 /* all done and mapped, ready to use ! */
9160 return KERN_SUCCESS;
9161 }
9162
9163 /*
9164 * We ran out of pre-allocated kernel virtual
9165 * addresses. Just map the page in the kernel
9166 * the slow and regular way.
9167 */
9168 vm_paging_no_kernel_page++;
9169 simple_unlock(&vm_paging_lock);
9170 }
9171
9172 if (! can_unlock_object) {
9173 *address = 0;
9174 *size = 0;
9175 *need_unmap = FALSE;
9176 return KERN_NOT_SUPPORTED;
9177 }
9178
9179 object_offset = vm_object_trunc_page(offset);
9180 map_size = vm_map_round_page(*size,
9181 VM_MAP_PAGE_MASK(kernel_map));
9182
9183 /*
9184 * Try and map the required range of the object
9185 * in the kernel_map
9186 */
9187
9188 vm_object_reference_locked(object); /* for the map entry */
9189 vm_object_unlock(object);
9190
9191 kr = vm_map_enter(kernel_map,
9192 address,
9193 map_size,
9194 0,
9195 VM_FLAGS_ANYWHERE,
9196 object,
9197 object_offset,
9198 FALSE,
9199 protection,
9200 VM_PROT_ALL,
9201 VM_INHERIT_NONE);
9202 if (kr != KERN_SUCCESS) {
9203 *address = 0;
9204 *size = 0;
9205 *need_unmap = FALSE;
9206 vm_object_deallocate(object); /* for the map entry */
9207 vm_object_lock(object);
9208 return kr;
9209 }
9210
9211 *size = map_size;
9212
9213 /*
9214 * Enter the mapped pages in the page table now.
9215 */
9216 vm_object_lock(object);
9217 /*
9218 * VM object must be kept locked from before PMAP_ENTER()
9219 * until after the kernel is done accessing the page(s).
9220 * Otherwise, the pmap mappings in the kernel could be
9221 * undone by a call to vm_object_pmap_protect().
9222 */
9223
9224 for (page_map_offset = 0;
9225 map_size != 0;
9226 map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
9227
9228 page = vm_page_lookup(object, offset + page_map_offset);
9229 if (page == VM_PAGE_NULL) {
9230 printf("vm_paging_map_object: no page !?");
9231 vm_object_unlock(object);
9232 kr = vm_map_remove(kernel_map, *address, *size,
9233 VM_MAP_NO_FLAGS);
9234 assert(kr == KERN_SUCCESS);
9235 *address = 0;
9236 *size = 0;
9237 *need_unmap = FALSE;
9238 vm_object_lock(object);
9239 return KERN_MEMORY_ERROR;
9240 }
9241 page->pmapped = TRUE;
9242
9243 //assert(pmap_verify_free(page->phys_page));
9244 PMAP_ENTER(kernel_pmap,
9245 *address + page_map_offset,
9246 page,
9247 protection,
9248 VM_PROT_NONE,
9249 0,
9250 TRUE);
9251 }
9252
9253 vm_paging_objects_mapped_slow++;
9254 vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
9255
9256 *need_unmap = TRUE;
9257
9258 return KERN_SUCCESS;
9259 }
9260
9261 /*
9262 * ENCRYPTED SWAP:
9263 * vm_paging_unmap_object:
9264 * Unmaps part of a VM object's pages from the kernel
9265 * virtual address space.
9266 * Context:
9267 * The VM object is locked. This lock will get
9268 * dropped and re-acquired though.
9269 */
9270 void
9271 vm_paging_unmap_object(
9272 vm_object_t object,
9273 vm_map_offset_t start,
9274 vm_map_offset_t end)
9275 {
9276 kern_return_t kr;
9277 int i;
9278
9279 if ((vm_paging_base_address == 0) ||
9280 (start < vm_paging_base_address) ||
9281 (end > (vm_paging_base_address
9282 + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
9283 /*
9284 * We didn't use our pre-allocated pool of
9285 * kernel virtual address. Deallocate the
9286 * virtual memory.
9287 */
9288 if (object != VM_OBJECT_NULL) {
9289 vm_object_unlock(object);
9290 }
9291 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
9292 if (object != VM_OBJECT_NULL) {
9293 vm_object_lock(object);
9294 }
9295 assert(kr == KERN_SUCCESS);
9296 } else {
9297 /*
9298 * We used a kernel virtual address from our
9299 * pre-allocated pool. Put it back in the pool
9300 * for next time.
9301 */
9302 assert(end - start == PAGE_SIZE);
9303 i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
9304 assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
9305
9306 /* undo the pmap mapping */
9307 pmap_remove(kernel_pmap, start, end);
9308
9309 simple_lock(&vm_paging_lock);
9310 vm_paging_page_inuse[i] = FALSE;
9311 if (vm_paging_page_waiter) {
9312 thread_wakeup(&vm_paging_page_waiter);
9313 }
9314 simple_unlock(&vm_paging_lock);
9315 }
9316 }
9317
9318 #if ENCRYPTED_SWAP
9319 /*
9320 * Encryption data.
9321 * "iv" is the "initial vector". Ideally, we want to
9322 * have a different one for each page we encrypt, so that
9323 * crackers can't find encryption patterns too easily.
9324 */
9325 #define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
9326 boolean_t swap_crypt_ctx_initialized = FALSE;
9327 uint32_t swap_crypt_key[8]; /* big enough for a 256 key */
9328 aes_ctx swap_crypt_ctx;
9329 const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
9330
9331 #if DEBUG
9332 boolean_t swap_crypt_ctx_tested = FALSE;
9333 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
9334 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
9335 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
9336 #endif /* DEBUG */
9337
9338 /*
9339 * Initialize the encryption context: key and key size.
9340 */
9341 void swap_crypt_ctx_initialize(void); /* forward */
9342 void
9343 swap_crypt_ctx_initialize(void)
9344 {
9345 unsigned int i;
9346
9347 /*
9348 * No need for locking to protect swap_crypt_ctx_initialized
9349 * because the first use of encryption will come from the
9350 * pageout thread (we won't pagein before there's been a pageout)
9351 * and there's only one pageout thread.
9352 */
9353 if (swap_crypt_ctx_initialized == FALSE) {
9354 for (i = 0;
9355 i < (sizeof (swap_crypt_key) /
9356 sizeof (swap_crypt_key[0]));
9357 i++) {
9358 swap_crypt_key[i] = random();
9359 }
9360 aes_encrypt_key((const unsigned char *) swap_crypt_key,
9361 SWAP_CRYPT_AES_KEY_SIZE,
9362 &swap_crypt_ctx.encrypt);
9363 aes_decrypt_key((const unsigned char *) swap_crypt_key,
9364 SWAP_CRYPT_AES_KEY_SIZE,
9365 &swap_crypt_ctx.decrypt);
9366 swap_crypt_ctx_initialized = TRUE;
9367 }
9368
9369 #if DEBUG
9370 /*
9371 * Validate the encryption algorithms.
9372 */
9373 if (swap_crypt_ctx_tested == FALSE) {
9374 /* initialize */
9375 for (i = 0; i < 4096; i++) {
9376 swap_crypt_test_page_ref[i] = (char) i;
9377 }
9378 /* encrypt */
9379 aes_encrypt_cbc(swap_crypt_test_page_ref,
9380 swap_crypt_null_iv,
9381 PAGE_SIZE / AES_BLOCK_SIZE,
9382 swap_crypt_test_page_encrypt,
9383 &swap_crypt_ctx.encrypt);
9384 /* decrypt */
9385 aes_decrypt_cbc(swap_crypt_test_page_encrypt,
9386 swap_crypt_null_iv,
9387 PAGE_SIZE / AES_BLOCK_SIZE,
9388 swap_crypt_test_page_decrypt,
9389 &swap_crypt_ctx.decrypt);
9390 /* compare result with original */
9391 for (i = 0; i < 4096; i ++) {
9392 if (swap_crypt_test_page_decrypt[i] !=
9393 swap_crypt_test_page_ref[i]) {
9394 panic("encryption test failed");
9395 }
9396 }
9397
9398 /* encrypt again */
9399 aes_encrypt_cbc(swap_crypt_test_page_decrypt,
9400 swap_crypt_null_iv,
9401 PAGE_SIZE / AES_BLOCK_SIZE,
9402 swap_crypt_test_page_decrypt,
9403 &swap_crypt_ctx.encrypt);
9404 /* decrypt in place */
9405 aes_decrypt_cbc(swap_crypt_test_page_decrypt,
9406 swap_crypt_null_iv,
9407 PAGE_SIZE / AES_BLOCK_SIZE,
9408 swap_crypt_test_page_decrypt,
9409 &swap_crypt_ctx.decrypt);
9410 for (i = 0; i < 4096; i ++) {
9411 if (swap_crypt_test_page_decrypt[i] !=
9412 swap_crypt_test_page_ref[i]) {
9413 panic("in place encryption test failed");
9414 }
9415 }
9416
9417 swap_crypt_ctx_tested = TRUE;
9418 }
9419 #endif /* DEBUG */
9420 }
9421
9422 /*
9423 * ENCRYPTED SWAP:
9424 * vm_page_encrypt:
9425 * Encrypt the given page, for secure paging.
9426 * The page might already be mapped at kernel virtual
9427 * address "kernel_mapping_offset". Otherwise, we need
9428 * to map it.
9429 *
9430 * Context:
9431 * The page's object is locked, but this lock will be released
9432 * and re-acquired.
9433 * The page is busy and not accessible by users (not entered in any pmap).
9434 */
9435 void
9436 vm_page_encrypt(
9437 vm_page_t page,
9438 vm_map_offset_t kernel_mapping_offset)
9439 {
9440 kern_return_t kr;
9441 vm_map_size_t kernel_mapping_size;
9442 boolean_t kernel_mapping_needs_unmap;
9443 vm_offset_t kernel_vaddr;
9444 union {
9445 unsigned char aes_iv[AES_BLOCK_SIZE];
9446 struct {
9447 memory_object_t pager_object;
9448 vm_object_offset_t paging_offset;
9449 } vm;
9450 } encrypt_iv;
9451
9452 if (! vm_pages_encrypted) {
9453 vm_pages_encrypted = TRUE;
9454 }
9455
9456 assert(page->busy);
9457
9458 if (page->encrypted) {
9459 /*
9460 * Already encrypted: no need to do it again.
9461 */
9462 vm_page_encrypt_already_encrypted_counter++;
9463 return;
9464 }
9465 assert(page->dirty || page->precious);
9466
9467 ASSERT_PAGE_DECRYPTED(page);
9468
9469 /*
9470 * Take a paging-in-progress reference to keep the object
9471 * alive even if we have to unlock it (in vm_paging_map_object()
9472 * for example)...
9473 */
9474 vm_object_paging_begin(page->object);
9475
9476 if (kernel_mapping_offset == 0) {
9477 /*
9478 * The page hasn't already been mapped in kernel space
9479 * by the caller. Map it now, so that we can access
9480 * its contents and encrypt them.
9481 */
9482 kernel_mapping_size = PAGE_SIZE;
9483 kernel_mapping_needs_unmap = FALSE;
9484 kr = vm_paging_map_object(page,
9485 page->object,
9486 page->offset,
9487 VM_PROT_READ | VM_PROT_WRITE,
9488 FALSE,
9489 &kernel_mapping_size,
9490 &kernel_mapping_offset,
9491 &kernel_mapping_needs_unmap);
9492 if (kr != KERN_SUCCESS) {
9493 panic("vm_page_encrypt: "
9494 "could not map page in kernel: 0x%x\n",
9495 kr);
9496 }
9497 } else {
9498 kernel_mapping_size = 0;
9499 kernel_mapping_needs_unmap = FALSE;
9500 }
9501 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
9502
9503 if (swap_crypt_ctx_initialized == FALSE) {
9504 swap_crypt_ctx_initialize();
9505 }
9506 assert(swap_crypt_ctx_initialized);
9507
9508 /*
9509 * Prepare an "initial vector" for the encryption.
9510 * We use the "pager" and the "paging_offset" for that
9511 * page to obfuscate the encrypted data a bit more and
9512 * prevent crackers from finding patterns that they could
9513 * use to break the key.
9514 */
9515 bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
9516 encrypt_iv.vm.pager_object = page->object->pager;
9517 encrypt_iv.vm.paging_offset =
9518 page->object->paging_offset + page->offset;
9519
9520 /* encrypt the "initial vector" */
9521 aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
9522 swap_crypt_null_iv,
9523 1,
9524 &encrypt_iv.aes_iv[0],
9525 &swap_crypt_ctx.encrypt);
9526
9527 /*
9528 * Encrypt the page.
9529 */
9530 aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
9531 &encrypt_iv.aes_iv[0],
9532 PAGE_SIZE / AES_BLOCK_SIZE,
9533 (unsigned char *) kernel_vaddr,
9534 &swap_crypt_ctx.encrypt);
9535
9536 vm_page_encrypt_counter++;
9537
9538 /*
9539 * Unmap the page from the kernel's address space,
9540 * if we had to map it ourselves. Otherwise, let
9541 * the caller undo the mapping if needed.
9542 */
9543 if (kernel_mapping_needs_unmap) {
9544 vm_paging_unmap_object(page->object,
9545 kernel_mapping_offset,
9546 kernel_mapping_offset + kernel_mapping_size);
9547 }
9548
9549 /*
9550 * Clear the "reference" and "modified" bits.
9551 * This should clean up any impact the encryption had
9552 * on them.
9553 * The page was kept busy and disconnected from all pmaps,
9554 * so it can't have been referenced or modified from user
9555 * space.
9556 * The software bits will be reset later after the I/O
9557 * has completed (in upl_commit_range()).
9558 */
9559 pmap_clear_refmod(page->phys_page, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
9560
9561 page->encrypted = TRUE;
9562
9563 vm_object_paging_end(page->object);
9564 }
9565
9566 /*
9567 * ENCRYPTED SWAP:
9568 * vm_page_decrypt:
9569 * Decrypt the given page.
9570 * The page might already be mapped at kernel virtual
9571 * address "kernel_mapping_offset". Otherwise, we need
9572 * to map it.
9573 *
9574 * Context:
9575 * The page's VM object is locked but will be unlocked and relocked.
9576 * The page is busy and not accessible by users (not entered in any pmap).
9577 */
9578 void
9579 vm_page_decrypt(
9580 vm_page_t page,
9581 vm_map_offset_t kernel_mapping_offset)
9582 {
9583 kern_return_t kr;
9584 vm_map_size_t kernel_mapping_size;
9585 vm_offset_t kernel_vaddr;
9586 boolean_t kernel_mapping_needs_unmap;
9587 union {
9588 unsigned char aes_iv[AES_BLOCK_SIZE];
9589 struct {
9590 memory_object_t pager_object;
9591 vm_object_offset_t paging_offset;
9592 } vm;
9593 } decrypt_iv;
9594 boolean_t was_dirty;
9595
9596 assert(page->busy);
9597 assert(page->encrypted);
9598
9599 was_dirty = page->dirty;
9600
9601 /*
9602 * Take a paging-in-progress reference to keep the object
9603 * alive even if we have to unlock it (in vm_paging_map_object()
9604 * for example)...
9605 */
9606 vm_object_paging_begin(page->object);
9607
9608 if (kernel_mapping_offset == 0) {
9609 /*
9610 * The page hasn't already been mapped in kernel space
9611 * by the caller. Map it now, so that we can access
9612 * its contents and decrypt them.
9613 */
9614 kernel_mapping_size = PAGE_SIZE;
9615 kernel_mapping_needs_unmap = FALSE;
9616 kr = vm_paging_map_object(page,
9617 page->object,
9618 page->offset,
9619 VM_PROT_READ | VM_PROT_WRITE,
9620 FALSE,
9621 &kernel_mapping_size,
9622 &kernel_mapping_offset,
9623 &kernel_mapping_needs_unmap);
9624 if (kr != KERN_SUCCESS) {
9625 panic("vm_page_decrypt: "
9626 "could not map page in kernel: 0x%x\n",
9627 kr);
9628 }
9629 } else {
9630 kernel_mapping_size = 0;
9631 kernel_mapping_needs_unmap = FALSE;
9632 }
9633 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
9634
9635 assert(swap_crypt_ctx_initialized);
9636
9637 /*
9638 * Prepare an "initial vector" for the decryption.
9639 * It has to be the same as the "initial vector" we
9640 * used to encrypt that page.
9641 */
9642 bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
9643 decrypt_iv.vm.pager_object = page->object->pager;
9644 decrypt_iv.vm.paging_offset =
9645 page->object->paging_offset + page->offset;
9646
9647 /* encrypt the "initial vector" */
9648 aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
9649 swap_crypt_null_iv,
9650 1,
9651 &decrypt_iv.aes_iv[0],
9652 &swap_crypt_ctx.encrypt);
9653
9654 /*
9655 * Decrypt the page.
9656 */
9657 aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
9658 &decrypt_iv.aes_iv[0],
9659 PAGE_SIZE / AES_BLOCK_SIZE,
9660 (unsigned char *) kernel_vaddr,
9661 &swap_crypt_ctx.decrypt);
9662 vm_page_decrypt_counter++;
9663
9664 /*
9665 * Unmap the page from the kernel's address space,
9666 * if we had to map it ourselves. Otherwise, let
9667 * the caller undo the mapping if needed.
9668 */
9669 if (kernel_mapping_needs_unmap) {
9670 vm_paging_unmap_object(page->object,
9671 kernel_vaddr,
9672 kernel_vaddr + PAGE_SIZE);
9673 }
9674
9675 if (was_dirty) {
9676 /*
9677 * The pager did not specify that the page would be
9678 * clean when it got paged in, so let's not clean it here
9679 * either.
9680 */
9681 } else {
9682 /*
9683 * After decryption, the page is actually still clean.
9684 * It was encrypted as part of paging, which "cleans"
9685 * the "dirty" pages.
9686 * Noone could access it after it was encrypted
9687 * and the decryption doesn't count.
9688 */
9689 page->dirty = FALSE;
9690 assert (page->cs_validated == FALSE);
9691 pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
9692 }
9693 page->encrypted = FALSE;
9694
9695 /*
9696 * We've just modified the page's contents via the data cache and part
9697 * of the new contents might still be in the cache and not yet in RAM.
9698 * Since the page is now available and might get gathered in a UPL to
9699 * be part of a DMA transfer from a driver that expects the memory to
9700 * be coherent at this point, we have to flush the data cache.
9701 */
9702 pmap_sync_page_attributes_phys(page->phys_page);
9703 /*
9704 * Since the page is not mapped yet, some code might assume that it
9705 * doesn't need to invalidate the instruction cache when writing to
9706 * that page. That code relies on "pmapped" being FALSE, so that the
9707 * caches get synchronized when the page is first mapped.
9708 */
9709 assert(pmap_verify_free(page->phys_page));
9710 page->pmapped = FALSE;
9711 page->wpmapped = FALSE;
9712
9713 vm_object_paging_end(page->object);
9714 }
9715
9716 #if DEVELOPMENT || DEBUG
9717 unsigned long upl_encrypt_upls = 0;
9718 unsigned long upl_encrypt_pages = 0;
9719 #endif
9720
9721 /*
9722 * ENCRYPTED SWAP:
9723 *
9724 * upl_encrypt:
9725 * Encrypts all the pages in the UPL, within the specified range.
9726 *
9727 */
9728 void
9729 upl_encrypt(
9730 upl_t upl,
9731 upl_offset_t crypt_offset,
9732 upl_size_t crypt_size)
9733 {
9734 upl_size_t upl_size, subupl_size=crypt_size;
9735 upl_offset_t offset_in_upl, subupl_offset=crypt_offset;
9736 vm_object_t upl_object;
9737 vm_object_offset_t upl_offset;
9738 vm_page_t page;
9739 vm_object_t shadow_object;
9740 vm_object_offset_t shadow_offset;
9741 vm_object_offset_t paging_offset;
9742 vm_object_offset_t base_offset;
9743 int isVectorUPL = 0;
9744 upl_t vector_upl = NULL;
9745
9746 if((isVectorUPL = vector_upl_is_valid(upl)))
9747 vector_upl = upl;
9748
9749 process_upl_to_encrypt:
9750 if(isVectorUPL) {
9751 crypt_size = subupl_size;
9752 crypt_offset = subupl_offset;
9753 upl = vector_upl_subupl_byoffset(vector_upl, &crypt_offset, &crypt_size);
9754 if(upl == NULL)
9755 panic("upl_encrypt: Accessing a sub-upl that doesn't exist\n");
9756 subupl_size -= crypt_size;
9757 subupl_offset += crypt_size;
9758 }
9759
9760 #if DEVELOPMENT || DEBUG
9761 upl_encrypt_upls++;
9762 upl_encrypt_pages += crypt_size / PAGE_SIZE;
9763 #endif
9764 upl_object = upl->map_object;
9765 upl_offset = upl->offset;
9766 upl_size = upl->size;
9767
9768 vm_object_lock(upl_object);
9769
9770 /*
9771 * Find the VM object that contains the actual pages.
9772 */
9773 if (upl_object->pageout) {
9774 shadow_object = upl_object->shadow;
9775 /*
9776 * The offset in the shadow object is actually also
9777 * accounted for in upl->offset. It possibly shouldn't be
9778 * this way, but for now don't account for it twice.
9779 */
9780 shadow_offset = 0;
9781 assert(upl_object->paging_offset == 0); /* XXX ? */
9782 vm_object_lock(shadow_object);
9783 } else {
9784 shadow_object = upl_object;
9785 shadow_offset = 0;
9786 }
9787
9788 paging_offset = shadow_object->paging_offset;
9789 vm_object_paging_begin(shadow_object);
9790
9791 if (shadow_object != upl_object)
9792 vm_object_unlock(upl_object);
9793
9794
9795 base_offset = shadow_offset;
9796 base_offset += upl_offset;
9797 base_offset += crypt_offset;
9798 base_offset -= paging_offset;
9799
9800 assert(crypt_offset + crypt_size <= upl_size);
9801
9802 for (offset_in_upl = 0;
9803 offset_in_upl < crypt_size;
9804 offset_in_upl += PAGE_SIZE) {
9805 page = vm_page_lookup(shadow_object,
9806 base_offset + offset_in_upl);
9807 if (page == VM_PAGE_NULL) {
9808 panic("upl_encrypt: "
9809 "no page for (obj=%p,off=0x%llx+0x%x)!\n",
9810 shadow_object,
9811 base_offset,
9812 offset_in_upl);
9813 }
9814 /*
9815 * Disconnect the page from all pmaps, so that nobody can
9816 * access it while it's encrypted. After that point, all
9817 * accesses to this page will cause a page fault and block
9818 * while the page is busy being encrypted. After the
9819 * encryption completes, any access will cause a
9820 * page fault and the page gets decrypted at that time.
9821 */
9822 pmap_disconnect(page->phys_page);
9823 vm_page_encrypt(page, 0);
9824
9825 if (vm_object_lock_avoid(shadow_object)) {
9826 /*
9827 * Give vm_pageout_scan() a chance to convert more
9828 * pages from "clean-in-place" to "clean-and-free",
9829 * if it's interested in the same pages we selected
9830 * in this cluster.
9831 */
9832 vm_object_unlock(shadow_object);
9833 mutex_pause(2);
9834 vm_object_lock(shadow_object);
9835 }
9836 }
9837
9838 vm_object_paging_end(shadow_object);
9839 vm_object_unlock(shadow_object);
9840
9841 if(isVectorUPL && subupl_size)
9842 goto process_upl_to_encrypt;
9843 }
9844
9845 #else /* ENCRYPTED_SWAP */
9846 void
9847 upl_encrypt(
9848 __unused upl_t upl,
9849 __unused upl_offset_t crypt_offset,
9850 __unused upl_size_t crypt_size)
9851 {
9852 }
9853
9854 void
9855 vm_page_encrypt(
9856 __unused vm_page_t page,
9857 __unused vm_map_offset_t kernel_mapping_offset)
9858 {
9859 }
9860
9861 void
9862 vm_page_decrypt(
9863 __unused vm_page_t page,
9864 __unused vm_map_offset_t kernel_mapping_offset)
9865 {
9866 }
9867
9868 #endif /* ENCRYPTED_SWAP */
9869
9870 /*
9871 * page->object must be locked
9872 */
9873 void
9874 vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked)
9875 {
9876 if (!queues_locked) {
9877 vm_page_lockspin_queues();
9878 }
9879
9880 /*
9881 * need to drop the laundry count...
9882 * we may also need to remove it
9883 * from the I/O paging queue...
9884 * vm_pageout_throttle_up handles both cases
9885 *
9886 * the laundry and pageout_queue flags are cleared...
9887 */
9888 vm_pageout_throttle_up(page);
9889
9890 vm_page_steal_pageout_page++;
9891
9892 if (!queues_locked) {
9893 vm_page_unlock_queues();
9894 }
9895 }
9896
9897 upl_t
9898 vector_upl_create(vm_offset_t upl_offset)
9899 {
9900 int vector_upl_size = sizeof(struct _vector_upl);
9901 int i=0;
9902 upl_t upl;
9903 vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size);
9904
9905 upl = upl_create(0,UPL_VECTOR,0);
9906 upl->vector_upl = vector_upl;
9907 upl->offset = upl_offset;
9908 vector_upl->size = 0;
9909 vector_upl->offset = upl_offset;
9910 vector_upl->invalid_upls=0;
9911 vector_upl->num_upls=0;
9912 vector_upl->pagelist = NULL;
9913
9914 for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) {
9915 vector_upl->upl_iostates[i].size = 0;
9916 vector_upl->upl_iostates[i].offset = 0;
9917
9918 }
9919 return upl;
9920 }
9921
9922 void
9923 vector_upl_deallocate(upl_t upl)
9924 {
9925 if(upl) {
9926 vector_upl_t vector_upl = upl->vector_upl;
9927 if(vector_upl) {
9928 if(vector_upl->invalid_upls != vector_upl->num_upls)
9929 panic("Deallocating non-empty Vectored UPL\n");
9930 kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)));
9931 vector_upl->invalid_upls=0;
9932 vector_upl->num_upls = 0;
9933 vector_upl->pagelist = NULL;
9934 vector_upl->size = 0;
9935 vector_upl->offset = 0;
9936 kfree(vector_upl, sizeof(struct _vector_upl));
9937 vector_upl = (vector_upl_t)0xfeedfeed;
9938 }
9939 else
9940 panic("vector_upl_deallocate was passed a non-vectored upl\n");
9941 }
9942 else
9943 panic("vector_upl_deallocate was passed a NULL upl\n");
9944 }
9945
9946 boolean_t
9947 vector_upl_is_valid(upl_t upl)
9948 {
9949 if(upl && ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) {
9950 vector_upl_t vector_upl = upl->vector_upl;
9951 if(vector_upl == NULL || vector_upl == (vector_upl_t)0xfeedfeed || vector_upl == (vector_upl_t)0xfeedbeef)
9952 return FALSE;
9953 else
9954 return TRUE;
9955 }
9956 return FALSE;
9957 }
9958
9959 boolean_t
9960 vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size)
9961 {
9962 if(vector_upl_is_valid(upl)) {
9963 vector_upl_t vector_upl = upl->vector_upl;
9964
9965 if(vector_upl) {
9966 if(subupl) {
9967 if(io_size) {
9968 if(io_size < PAGE_SIZE)
9969 io_size = PAGE_SIZE;
9970 subupl->vector_upl = (void*)vector_upl;
9971 vector_upl->upl_elems[vector_upl->num_upls++] = subupl;
9972 vector_upl->size += io_size;
9973 upl->size += io_size;
9974 }
9975 else {
9976 uint32_t i=0,invalid_upls=0;
9977 for(i = 0; i < vector_upl->num_upls; i++) {
9978 if(vector_upl->upl_elems[i] == subupl)
9979 break;
9980 }
9981 if(i == vector_upl->num_upls)
9982 panic("Trying to remove sub-upl when none exists");
9983
9984 vector_upl->upl_elems[i] = NULL;
9985 invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1);
9986 if(invalid_upls == vector_upl->num_upls)
9987 return TRUE;
9988 else
9989 return FALSE;
9990 }
9991 }
9992 else
9993 panic("vector_upl_set_subupl was passed a NULL upl element\n");
9994 }
9995 else
9996 panic("vector_upl_set_subupl was passed a non-vectored upl\n");
9997 }
9998 else
9999 panic("vector_upl_set_subupl was passed a NULL upl\n");
10000
10001 return FALSE;
10002 }
10003
10004 void
10005 vector_upl_set_pagelist(upl_t upl)
10006 {
10007 if(vector_upl_is_valid(upl)) {
10008 uint32_t i=0;
10009 vector_upl_t vector_upl = upl->vector_upl;
10010
10011 if(vector_upl) {
10012 vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0;
10013
10014 vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE));
10015
10016 for(i=0; i < vector_upl->num_upls; i++) {
10017 cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE;
10018 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
10019 pagelist_size += cur_upl_pagelist_size;
10020 if(vector_upl->upl_elems[i]->highest_page > upl->highest_page)
10021 upl->highest_page = vector_upl->upl_elems[i]->highest_page;
10022 }
10023 assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) );
10024 }
10025 else
10026 panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
10027 }
10028 else
10029 panic("vector_upl_set_pagelist was passed a NULL upl\n");
10030
10031 }
10032
10033 upl_t
10034 vector_upl_subupl_byindex(upl_t upl, uint32_t index)
10035 {
10036 if(vector_upl_is_valid(upl)) {
10037 vector_upl_t vector_upl = upl->vector_upl;
10038 if(vector_upl) {
10039 if(index < vector_upl->num_upls)
10040 return vector_upl->upl_elems[index];
10041 }
10042 else
10043 panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
10044 }
10045 return NULL;
10046 }
10047
10048 upl_t
10049 vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size)
10050 {
10051 if(vector_upl_is_valid(upl)) {
10052 uint32_t i=0;
10053 vector_upl_t vector_upl = upl->vector_upl;
10054
10055 if(vector_upl) {
10056 upl_t subupl = NULL;
10057 vector_upl_iostates_t subupl_state;
10058
10059 for(i=0; i < vector_upl->num_upls; i++) {
10060 subupl = vector_upl->upl_elems[i];
10061 subupl_state = vector_upl->upl_iostates[i];
10062 if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) {
10063 /* We could have been passed an offset/size pair that belongs
10064 * to an UPL element that has already been committed/aborted.
10065 * If so, return NULL.
10066 */
10067 if(subupl == NULL)
10068 return NULL;
10069 if((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) {
10070 *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset;
10071 if(*upl_size > subupl_state.size)
10072 *upl_size = subupl_state.size;
10073 }
10074 if(*upl_offset >= subupl_state.offset)
10075 *upl_offset -= subupl_state.offset;
10076 else if(i)
10077 panic("Vector UPL offset miscalculation\n");
10078 return subupl;
10079 }
10080 }
10081 }
10082 else
10083 panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
10084 }
10085 return NULL;
10086 }
10087
10088 void
10089 vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr)
10090 {
10091 *v_upl_submap = NULL;
10092
10093 if(vector_upl_is_valid(upl)) {
10094 vector_upl_t vector_upl = upl->vector_upl;
10095 if(vector_upl) {
10096 *v_upl_submap = vector_upl->submap;
10097 *submap_dst_addr = vector_upl->submap_dst_addr;
10098 }
10099 else
10100 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
10101 }
10102 else
10103 panic("vector_upl_get_submap was passed a null UPL\n");
10104 }
10105
10106 void
10107 vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
10108 {
10109 if(vector_upl_is_valid(upl)) {
10110 vector_upl_t vector_upl = upl->vector_upl;
10111 if(vector_upl) {
10112 vector_upl->submap = submap;
10113 vector_upl->submap_dst_addr = submap_dst_addr;
10114 }
10115 else
10116 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
10117 }
10118 else
10119 panic("vector_upl_get_submap was passed a NULL UPL\n");
10120 }
10121
10122 void
10123 vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
10124 {
10125 if(vector_upl_is_valid(upl)) {
10126 uint32_t i = 0;
10127 vector_upl_t vector_upl = upl->vector_upl;
10128
10129 if(vector_upl) {
10130 for(i = 0; i < vector_upl->num_upls; i++) {
10131 if(vector_upl->upl_elems[i] == subupl)
10132 break;
10133 }
10134
10135 if(i == vector_upl->num_upls)
10136 panic("setting sub-upl iostate when none exists");
10137
10138 vector_upl->upl_iostates[i].offset = offset;
10139 if(size < PAGE_SIZE)
10140 size = PAGE_SIZE;
10141 vector_upl->upl_iostates[i].size = size;
10142 }
10143 else
10144 panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
10145 }
10146 else
10147 panic("vector_upl_set_iostate was passed a NULL UPL\n");
10148 }
10149
10150 void
10151 vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size)
10152 {
10153 if(vector_upl_is_valid(upl)) {
10154 uint32_t i = 0;
10155 vector_upl_t vector_upl = upl->vector_upl;
10156
10157 if(vector_upl) {
10158 for(i = 0; i < vector_upl->num_upls; i++) {
10159 if(vector_upl->upl_elems[i] == subupl)
10160 break;
10161 }
10162
10163 if(i == vector_upl->num_upls)
10164 panic("getting sub-upl iostate when none exists");
10165
10166 *offset = vector_upl->upl_iostates[i].offset;
10167 *size = vector_upl->upl_iostates[i].size;
10168 }
10169 else
10170 panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
10171 }
10172 else
10173 panic("vector_upl_get_iostate was passed a NULL UPL\n");
10174 }
10175
10176 void
10177 vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size)
10178 {
10179 if(vector_upl_is_valid(upl)) {
10180 vector_upl_t vector_upl = upl->vector_upl;
10181 if(vector_upl) {
10182 if(index < vector_upl->num_upls) {
10183 *offset = vector_upl->upl_iostates[index].offset;
10184 *size = vector_upl->upl_iostates[index].size;
10185 }
10186 else
10187 *offset = *size = 0;
10188 }
10189 else
10190 panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
10191 }
10192 else
10193 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
10194 }
10195
10196 upl_page_info_t *
10197 upl_get_internal_vectorupl_pagelist(upl_t upl)
10198 {
10199 return ((vector_upl_t)(upl->vector_upl))->pagelist;
10200 }
10201
10202 void *
10203 upl_get_internal_vectorupl(upl_t upl)
10204 {
10205 return upl->vector_upl;
10206 }
10207
10208 vm_size_t
10209 upl_get_internal_pagelist_offset(void)
10210 {
10211 return sizeof(struct upl);
10212 }
10213
10214 void
10215 upl_clear_dirty(
10216 upl_t upl,
10217 boolean_t value)
10218 {
10219 if (value) {
10220 upl->flags |= UPL_CLEAR_DIRTY;
10221 } else {
10222 upl->flags &= ~UPL_CLEAR_DIRTY;
10223 }
10224 }
10225
10226 void
10227 upl_set_referenced(
10228 upl_t upl,
10229 boolean_t value)
10230 {
10231 upl_lock(upl);
10232 if (value) {
10233 upl->ext_ref_count++;
10234 } else {
10235 if (!upl->ext_ref_count) {
10236 panic("upl_set_referenced not %p\n", upl);
10237 }
10238 upl->ext_ref_count--;
10239 }
10240 upl_unlock(upl);
10241 }
10242
10243 #if CONFIG_IOSCHED
10244 void
10245 upl_set_blkno(
10246 upl_t upl,
10247 vm_offset_t upl_offset,
10248 int io_size,
10249 int64_t blkno)
10250 {
10251 int i,j;
10252 if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0)
10253 return;
10254
10255 assert(upl->upl_reprio_info != 0);
10256 for(i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) {
10257 UPL_SET_REPRIO_INFO(upl, i, blkno, io_size);
10258 }
10259 }
10260 #endif
10261
10262 boolean_t
10263 vm_page_is_slideable(vm_page_t m)
10264 {
10265 boolean_t result = FALSE;
10266 vm_shared_region_slide_info_t si;
10267
10268 vm_object_lock_assert_held(m->object);
10269
10270 /* make sure our page belongs to the one object allowed to do this */
10271 if (!m->object->object_slid) {
10272 goto done;
10273 }
10274
10275 si = m->object->vo_slide_info;
10276 if (si == NULL) {
10277 goto done;
10278 }
10279
10280 if(!m->slid && (si->start <= m->offset && si->end > m->offset)) {
10281 result = TRUE;
10282 }
10283
10284 done:
10285 return result;
10286 }
10287
10288 int vm_page_slide_counter = 0;
10289 int vm_page_slide_errors = 0;
10290 kern_return_t
10291 vm_page_slide(
10292 vm_page_t page,
10293 vm_map_offset_t kernel_mapping_offset)
10294 {
10295 kern_return_t kr;
10296 vm_map_size_t kernel_mapping_size;
10297 boolean_t kernel_mapping_needs_unmap;
10298 vm_offset_t kernel_vaddr;
10299 uint32_t pageIndex;
10300 uint32_t slide_chunk;
10301
10302 assert(!page->slid);
10303 assert(page->object->object_slid);
10304 vm_object_lock_assert_exclusive(page->object);
10305
10306 if (page->error)
10307 return KERN_FAILURE;
10308
10309 /*
10310 * Take a paging-in-progress reference to keep the object
10311 * alive even if we have to unlock it (in vm_paging_map_object()
10312 * for example)...
10313 */
10314 vm_object_paging_begin(page->object);
10315
10316 if (kernel_mapping_offset == 0) {
10317 /*
10318 * The page hasn't already been mapped in kernel space
10319 * by the caller. Map it now, so that we can access
10320 * its contents and decrypt them.
10321 */
10322 kernel_mapping_size = PAGE_SIZE;
10323 kernel_mapping_needs_unmap = FALSE;
10324 kr = vm_paging_map_object(page,
10325 page->object,
10326 page->offset,
10327 VM_PROT_READ | VM_PROT_WRITE,
10328 FALSE,
10329 &kernel_mapping_size,
10330 &kernel_mapping_offset,
10331 &kernel_mapping_needs_unmap);
10332 if (kr != KERN_SUCCESS) {
10333 panic("vm_page_slide: "
10334 "could not map page in kernel: 0x%x\n",
10335 kr);
10336 }
10337 } else {
10338 kernel_mapping_size = 0;
10339 kernel_mapping_needs_unmap = FALSE;
10340 }
10341 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
10342
10343 /*
10344 * Slide the pointers on the page.
10345 */
10346
10347 /*assert that slide_file_info.start/end are page-aligned?*/
10348
10349 assert(!page->slid);
10350 assert(page->object->object_slid);
10351
10352 #define PAGE_SIZE_FOR_SR_SLIDE 4096
10353 pageIndex = (uint32_t)((page->offset -
10354 page->object->vo_slide_info->start) /
10355 PAGE_SIZE_FOR_SR_SLIDE);
10356 for (slide_chunk = 0;
10357 slide_chunk < PAGE_SIZE / PAGE_SIZE_FOR_SR_SLIDE;
10358 slide_chunk++) {
10359 kr = vm_shared_region_slide_page(page->object->vo_slide_info,
10360 (kernel_vaddr +
10361 (slide_chunk *
10362 PAGE_SIZE_FOR_SR_SLIDE)),
10363 (pageIndex + slide_chunk));
10364 if (kr != KERN_SUCCESS) {
10365 break;
10366 }
10367 }
10368
10369 vm_page_slide_counter++;
10370
10371 /*
10372 * Unmap the page from the kernel's address space,
10373 */
10374 if (kernel_mapping_needs_unmap) {
10375 vm_paging_unmap_object(page->object,
10376 kernel_vaddr,
10377 kernel_vaddr + PAGE_SIZE);
10378 }
10379
10380 page->dirty = FALSE;
10381 pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
10382
10383 if (kr != KERN_SUCCESS || cs_debug > 1) {
10384 printf("vm_page_slide(%p): "
10385 "obj %p off 0x%llx mobj %p moff 0x%llx\n",
10386 page,
10387 page->object, page->offset,
10388 page->object->pager,
10389 page->offset + page->object->paging_offset);
10390 }
10391
10392 if (kr == KERN_SUCCESS) {
10393 page->slid = TRUE;
10394 } else {
10395 page->error = TRUE;
10396 vm_page_slide_errors++;
10397 }
10398
10399 vm_object_paging_end(page->object);
10400
10401 return kr;
10402 }
10403
10404 void inline memoryshot(unsigned int event, unsigned int control)
10405 {
10406 if (vm_debug_events) {
10407 KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) | control,
10408 vm_page_active_count, vm_page_inactive_count,
10409 vm_page_free_count, vm_page_speculative_count,
10410 vm_page_throttled_count);
10411 } else {
10412 (void) event;
10413 (void) control;
10414 }
10415
10416 }
10417
10418 #ifdef MACH_BSD
10419
10420 boolean_t upl_device_page(upl_page_info_t *upl)
10421 {
10422 return(UPL_DEVICE_PAGE(upl));
10423 }
10424 boolean_t upl_page_present(upl_page_info_t *upl, int index)
10425 {
10426 return(UPL_PAGE_PRESENT(upl, index));
10427 }
10428 boolean_t upl_speculative_page(upl_page_info_t *upl, int index)
10429 {
10430 return(UPL_SPECULATIVE_PAGE(upl, index));
10431 }
10432 boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
10433 {
10434 return(UPL_DIRTY_PAGE(upl, index));
10435 }
10436 boolean_t upl_valid_page(upl_page_info_t *upl, int index)
10437 {
10438 return(UPL_VALID_PAGE(upl, index));
10439 }
10440 ppnum_t upl_phys_page(upl_page_info_t *upl, int index)
10441 {
10442 return(UPL_PHYS_PAGE(upl, index));
10443 }
10444
10445 void upl_page_set_mark(upl_page_info_t *upl, int index, boolean_t v)
10446 {
10447 upl[index].mark = v;
10448 }
10449
10450 boolean_t upl_page_get_mark(upl_page_info_t *upl, int index)
10451 {
10452 return upl[index].mark;
10453 }
10454
10455 void
10456 vm_countdirtypages(void)
10457 {
10458 vm_page_t m;
10459 int dpages;
10460 int pgopages;
10461 int precpages;
10462
10463
10464 dpages=0;
10465 pgopages=0;
10466 precpages=0;
10467
10468 vm_page_lock_queues();
10469 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
10470 do {
10471 if (m ==(vm_page_t )0) break;
10472
10473 if(m->dirty) dpages++;
10474 if(m->pageout) pgopages++;
10475 if(m->precious) precpages++;
10476
10477 assert(m->object != kernel_object);
10478 m = (vm_page_t) queue_next(&m->pageq);
10479 if (m ==(vm_page_t )0) break;
10480
10481 } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
10482 vm_page_unlock_queues();
10483
10484 vm_page_lock_queues();
10485 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
10486 do {
10487 if (m ==(vm_page_t )0) break;
10488
10489 dpages++;
10490 assert(m->dirty);
10491 assert(!m->pageout);
10492 assert(m->object != kernel_object);
10493 m = (vm_page_t) queue_next(&m->pageq);
10494 if (m ==(vm_page_t )0) break;
10495
10496 } while (!queue_end(&vm_page_queue_throttled,(queue_entry_t) m));
10497 vm_page_unlock_queues();
10498
10499 vm_page_lock_queues();
10500 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
10501 do {
10502 if (m ==(vm_page_t )0) break;
10503
10504 if(m->dirty) dpages++;
10505 if(m->pageout) pgopages++;
10506 if(m->precious) precpages++;
10507
10508 assert(m->object != kernel_object);
10509 m = (vm_page_t) queue_next(&m->pageq);
10510 if (m ==(vm_page_t )0) break;
10511
10512 } while (!queue_end(&vm_page_queue_anonymous,(queue_entry_t) m));
10513 vm_page_unlock_queues();
10514
10515 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
10516
10517 dpages=0;
10518 pgopages=0;
10519 precpages=0;
10520
10521 vm_page_lock_queues();
10522 m = (vm_page_t) queue_first(&vm_page_queue_active);
10523
10524 do {
10525 if(m == (vm_page_t )0) break;
10526 if(m->dirty) dpages++;
10527 if(m->pageout) pgopages++;
10528 if(m->precious) precpages++;
10529
10530 assert(m->object != kernel_object);
10531 m = (vm_page_t) queue_next(&m->pageq);
10532 if(m == (vm_page_t )0) break;
10533
10534 } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
10535 vm_page_unlock_queues();
10536
10537 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
10538
10539 }
10540 #endif /* MACH_BSD */
10541
10542 ppnum_t upl_get_highest_page(
10543 upl_t upl)
10544 {
10545 return upl->highest_page;
10546 }
10547
10548 upl_size_t upl_get_size(
10549 upl_t upl)
10550 {
10551 return upl->size;
10552 }
10553
10554 upl_t upl_associated_upl(upl_t upl)
10555 {
10556 return upl->associated_upl;
10557 }
10558
10559 void upl_set_associated_upl(upl_t upl, upl_t associated_upl)
10560 {
10561 upl->associated_upl = associated_upl;
10562 }
10563
10564 #if UPL_DEBUG
10565 kern_return_t upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
10566 {
10567 upl->ubc_alias1 = alias1;
10568 upl->ubc_alias2 = alias2;
10569 return KERN_SUCCESS;
10570 }
10571 int upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
10572 {
10573 if(al)
10574 *al = upl->ubc_alias1;
10575 if(al2)
10576 *al2 = upl->ubc_alias2;
10577 return KERN_SUCCESS;
10578 }
10579 #endif /* UPL_DEBUG */
10580
10581 #if VM_PRESSURE_EVENTS
10582 /*
10583 * Upward trajectory.
10584 */
10585 extern boolean_t vm_compressor_low_on_space(void);
10586
10587 boolean_t
10588 VM_PRESSURE_NORMAL_TO_WARNING(void) {
10589
10590 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
10591
10592 /* Available pages below our threshold */
10593 if (memorystatus_available_pages < memorystatus_available_pages_pressure) {
10594 /* No frozen processes to kill */
10595 if (memorystatus_frozen_count == 0) {
10596 /* Not enough suspended processes available. */
10597 if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) {
10598 return TRUE;
10599 }
10600 }
10601 }
10602 return FALSE;
10603
10604 } else {
10605 return ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0);
10606 }
10607 }
10608
10609 boolean_t
10610 VM_PRESSURE_WARNING_TO_CRITICAL(void) {
10611
10612 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
10613 /* Available pages below our threshold */
10614 if (memorystatus_available_pages < memorystatus_available_pages_critical) {
10615 return TRUE;
10616 }
10617 return FALSE;
10618 } else {
10619 return (vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
10620 }
10621 }
10622
10623 /*
10624 * Downward trajectory.
10625 */
10626 boolean_t
10627 VM_PRESSURE_WARNING_TO_NORMAL(void) {
10628
10629 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
10630 /* Available pages above our threshold */
10631 unsigned int target_threshold = memorystatus_available_pages_pressure + ((15 * memorystatus_available_pages_pressure) / 100);
10632 if (memorystatus_available_pages > target_threshold) {
10633 return TRUE;
10634 }
10635 return FALSE;
10636 } else {
10637 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0);
10638 }
10639 }
10640
10641 boolean_t
10642 VM_PRESSURE_CRITICAL_TO_WARNING(void) {
10643
10644 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
10645 /* Available pages above our threshold */
10646 unsigned int target_threshold = memorystatus_available_pages_critical + ((15 * memorystatus_available_pages_critical) / 100);
10647 if (memorystatus_available_pages > target_threshold) {
10648 return TRUE;
10649 }
10650 return FALSE;
10651 } else {
10652 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
10653 }
10654 }
10655 #endif /* VM_PRESSURE_EVENTS */
10656