]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_pageout.c
18ff4907a49e9489ea1305ada1d10122230c3358
[apple/xnu.git] / osfmk / vm / vm_pageout.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_pageout.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * The proverbial page-out daemon.
64 */
65
66 #include <stdint.h>
67
68 #include <debug.h>
69 #include <mach_pagemap.h>
70 #include <mach_cluster_stats.h>
71 #include <mach_kdb.h>
72 #include <advisory_pageout.h>
73
74 #include <mach/mach_types.h>
75 #include <mach/memory_object.h>
76 #include <mach/memory_object_default.h>
77 #include <mach/memory_object_control_server.h>
78 #include <mach/mach_host_server.h>
79 #include <mach/upl.h>
80 #include <mach/vm_map.h>
81 #include <mach/vm_param.h>
82 #include <mach/vm_statistics.h>
83 #include <mach/sdt.h>
84
85 #include <kern/kern_types.h>
86 #include <kern/counters.h>
87 #include <kern/host_statistics.h>
88 #include <kern/machine.h>
89 #include <kern/misc_protos.h>
90 #include <kern/thread.h>
91 #include <kern/xpr.h>
92 #include <kern/kalloc.h>
93
94 #include <machine/vm_tuning.h>
95
96 #if CONFIG_EMBEDDED
97 #include <sys/kern_memorystatus.h>
98 #endif
99
100 #include <vm/pmap.h>
101 #include <vm/vm_fault.h>
102 #include <vm/vm_map.h>
103 #include <vm/vm_object.h>
104 #include <vm/vm_page.h>
105 #include <vm/vm_pageout.h>
106 #include <vm/vm_protos.h> /* must be last */
107 #include <vm/memory_object.h>
108 #include <vm/vm_purgeable_internal.h>
109
110 /*
111 * ENCRYPTED SWAP:
112 */
113 #include <../bsd/crypto/aes/aes.h>
114
115
116 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */
117 #ifdef CONFIG_EMBEDDED
118 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 2048
119 #else
120 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100
121 #endif
122 #endif
123
124 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
125 #ifdef CONFIG_EMBEDDED
126 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
127 #else
128 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
129 #endif
130 #endif
131
132 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
133 #define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
134 #endif
135
136 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
137 #define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
138 #endif
139
140 #ifndef VM_PAGE_LAUNDRY_MAX
141 #define VM_PAGE_LAUNDRY_MAX 16UL /* maximum pageouts on a given pageout queue */
142 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
143
144 #ifndef VM_PAGEOUT_BURST_WAIT
145 #define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
146 #endif /* VM_PAGEOUT_BURST_WAIT */
147
148 #ifndef VM_PAGEOUT_EMPTY_WAIT
149 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
150 #endif /* VM_PAGEOUT_EMPTY_WAIT */
151
152 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
153 #define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
154 #endif /* VM_PAGEOUT_DEADLOCK_WAIT */
155
156 #ifndef VM_PAGEOUT_IDLE_WAIT
157 #define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
158 #endif /* VM_PAGEOUT_IDLE_WAIT */
159
160 #ifndef VM_PAGE_SPECULATIVE_TARGET
161 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / 20)
162 #endif /* VM_PAGE_SPECULATIVE_TARGET */
163
164 #ifndef VM_PAGE_INACTIVE_HEALTHY_LIMIT
165 #define VM_PAGE_INACTIVE_HEALTHY_LIMIT(total) ((total) * 1 / 200)
166 #endif /* VM_PAGE_INACTIVE_HEALTHY_LIMIT */
167
168
169 /*
170 * To obtain a reasonable LRU approximation, the inactive queue
171 * needs to be large enough to give pages on it a chance to be
172 * referenced a second time. This macro defines the fraction
173 * of active+inactive pages that should be inactive.
174 * The pageout daemon uses it to update vm_page_inactive_target.
175 *
176 * If vm_page_free_count falls below vm_page_free_target and
177 * vm_page_inactive_count is below vm_page_inactive_target,
178 * then the pageout daemon starts running.
179 */
180
181 #ifndef VM_PAGE_INACTIVE_TARGET
182 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3)
183 #endif /* VM_PAGE_INACTIVE_TARGET */
184
185 /*
186 * Once the pageout daemon starts running, it keeps going
187 * until vm_page_free_count meets or exceeds vm_page_free_target.
188 */
189
190 #ifndef VM_PAGE_FREE_TARGET
191 #ifdef CONFIG_EMBEDDED
192 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 100)
193 #else
194 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
195 #endif
196 #endif /* VM_PAGE_FREE_TARGET */
197
198 /*
199 * The pageout daemon always starts running once vm_page_free_count
200 * falls below vm_page_free_min.
201 */
202
203 #ifndef VM_PAGE_FREE_MIN
204 #ifdef CONFIG_EMBEDDED
205 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 200)
206 #else
207 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
208 #endif
209 #endif /* VM_PAGE_FREE_MIN */
210
211 #define VM_PAGE_FREE_MIN_LIMIT 1500
212 #define VM_PAGE_FREE_TARGET_LIMIT 2000
213
214
215 /*
216 * When vm_page_free_count falls below vm_page_free_reserved,
217 * only vm-privileged threads can allocate pages. vm-privilege
218 * allows the pageout daemon and default pager (and any other
219 * associated threads needed for default pageout) to continue
220 * operation by dipping into the reserved pool of pages.
221 */
222
223 #ifndef VM_PAGE_FREE_RESERVED
224 #define VM_PAGE_FREE_RESERVED(n) \
225 ((6 * VM_PAGE_LAUNDRY_MAX) + (n))
226 #endif /* VM_PAGE_FREE_RESERVED */
227
228 /*
229 * When we dequeue pages from the inactive list, they are
230 * reactivated (ie, put back on the active queue) if referenced.
231 * However, it is possible to starve the free list if other
232 * processors are referencing pages faster than we can turn off
233 * the referenced bit. So we limit the number of reactivations
234 * we will make per call of vm_pageout_scan().
235 */
236 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
237 #ifndef VM_PAGE_REACTIVATE_LIMIT
238 #ifdef CONFIG_EMBEDDED
239 #define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
240 #else
241 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
242 #endif
243 #endif /* VM_PAGE_REACTIVATE_LIMIT */
244 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 100
245
246
247 /*
248 * must hold the page queues lock to
249 * manipulate this structure
250 */
251 struct vm_pageout_queue {
252 queue_head_t pgo_pending; /* laundry pages to be processed by pager's iothread */
253 unsigned int pgo_laundry; /* current count of laundry pages on queue or in flight */
254 unsigned int pgo_maxlaundry;
255
256 unsigned int pgo_idle:1, /* iothread is blocked waiting for work to do */
257 pgo_busy:1, /* iothread is currently processing request from pgo_pending */
258 pgo_throttled:1,/* vm_pageout_scan thread needs a wakeup when pgo_laundry drops */
259 :0;
260 };
261
262 #define VM_PAGE_Q_THROTTLED(q) \
263 ((q)->pgo_laundry >= (q)->pgo_maxlaundry)
264
265
266 /*
267 * Exported variable used to broadcast the activation of the pageout scan
268 * Working Set uses this to throttle its use of pmap removes. In this
269 * way, code which runs within memory in an uncontested context does
270 * not keep encountering soft faults.
271 */
272
273 unsigned int vm_pageout_scan_event_counter = 0;
274
275 /*
276 * Forward declarations for internal routines.
277 */
278
279 static void vm_pageout_garbage_collect(int);
280 static void vm_pageout_iothread_continue(struct vm_pageout_queue *);
281 static void vm_pageout_iothread_external(void);
282 static void vm_pageout_iothread_internal(void);
283 static void vm_pageout_queue_steal(vm_page_t);
284
285 extern void vm_pageout_continue(void);
286 extern void vm_pageout_scan(void);
287
288 static thread_t vm_pageout_external_iothread = THREAD_NULL;
289 static thread_t vm_pageout_internal_iothread = THREAD_NULL;
290
291 unsigned int vm_pageout_reserved_internal = 0;
292 unsigned int vm_pageout_reserved_really = 0;
293
294 unsigned int vm_pageout_idle_wait = 0; /* milliseconds */
295 unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
296 unsigned int vm_pageout_burst_wait = 0; /* milliseconds */
297 unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */
298 unsigned int vm_pageout_deadlock_relief = 0;
299 unsigned int vm_pageout_inactive_relief = 0;
300 unsigned int vm_pageout_burst_active_throttle = 0;
301 unsigned int vm_pageout_burst_inactive_throttle = 0;
302
303 /*
304 * Protection against zero fill flushing live working sets derived
305 * from existing backing store and files
306 */
307 unsigned int vm_accellerate_zf_pageout_trigger = 400;
308 unsigned int zf_queue_min_count = 100;
309 unsigned int vm_zf_count = 0;
310 unsigned int vm_zf_queue_count = 0;
311
312 /*
313 * These variables record the pageout daemon's actions:
314 * how many pages it looks at and what happens to those pages.
315 * No locking needed because only one thread modifies the variables.
316 */
317
318 unsigned int vm_pageout_active = 0; /* debugging */
319 unsigned int vm_pageout_inactive = 0; /* debugging */
320 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
321 unsigned int vm_pageout_inactive_forced = 0; /* debugging */
322 unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
323 unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
324 unsigned int vm_pageout_inactive_busy = 0; /* debugging */
325 unsigned int vm_pageout_inactive_absent = 0; /* debugging */
326 unsigned int vm_pageout_inactive_used = 0; /* debugging */
327 unsigned int vm_pageout_inactive_clean = 0; /* debugging */
328 unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
329 unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
330 unsigned int vm_pageout_purged_objects = 0; /* debugging */
331 unsigned int vm_stat_discard = 0; /* debugging */
332 unsigned int vm_stat_discard_sent = 0; /* debugging */
333 unsigned int vm_stat_discard_failure = 0; /* debugging */
334 unsigned int vm_stat_discard_throttle = 0; /* debugging */
335 unsigned int vm_pageout_reactivation_limit_exceeded = 0; /* debugging */
336 unsigned int vm_pageout_catch_ups = 0; /* debugging */
337 unsigned int vm_pageout_inactive_force_reclaim = 0; /* debugging */
338
339 unsigned int vm_pageout_scan_active_throttled = 0;
340 unsigned int vm_pageout_scan_inactive_throttled = 0;
341 unsigned int vm_pageout_scan_throttle = 0; /* debugging */
342 unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */
343 unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */
344 unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */
345 unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */
346 unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */
347 /*
348 * Backing store throttle when BS is exhausted
349 */
350 unsigned int vm_backing_store_low = 0;
351
352 unsigned int vm_pageout_out_of_line = 0;
353 unsigned int vm_pageout_in_place = 0;
354
355 /*
356 * ENCRYPTED SWAP:
357 * counters and statistics...
358 */
359 unsigned long vm_page_decrypt_counter = 0;
360 unsigned long vm_page_decrypt_for_upl_counter = 0;
361 unsigned long vm_page_encrypt_counter = 0;
362 unsigned long vm_page_encrypt_abort_counter = 0;
363 unsigned long vm_page_encrypt_already_encrypted_counter = 0;
364 boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */
365
366 struct vm_pageout_queue vm_pageout_queue_internal;
367 struct vm_pageout_queue vm_pageout_queue_external;
368
369 unsigned int vm_page_speculative_target = 0;
370
371 vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
372
373 unsigned long vm_cs_validated_resets = 0;
374
375 /*
376 * Routine: vm_backing_store_disable
377 * Purpose:
378 * Suspend non-privileged threads wishing to extend
379 * backing store when we are low on backing store
380 * (Synchronized by caller)
381 */
382 void
383 vm_backing_store_disable(
384 boolean_t disable)
385 {
386 if(disable) {
387 vm_backing_store_low = 1;
388 } else {
389 if(vm_backing_store_low) {
390 vm_backing_store_low = 0;
391 thread_wakeup((event_t) &vm_backing_store_low);
392 }
393 }
394 }
395
396
397 #if MACH_CLUSTER_STATS
398 unsigned long vm_pageout_cluster_dirtied = 0;
399 unsigned long vm_pageout_cluster_cleaned = 0;
400 unsigned long vm_pageout_cluster_collisions = 0;
401 unsigned long vm_pageout_cluster_clusters = 0;
402 unsigned long vm_pageout_cluster_conversions = 0;
403 unsigned long vm_pageout_target_collisions = 0;
404 unsigned long vm_pageout_target_page_dirtied = 0;
405 unsigned long vm_pageout_target_page_freed = 0;
406 #define CLUSTER_STAT(clause) clause
407 #else /* MACH_CLUSTER_STATS */
408 #define CLUSTER_STAT(clause)
409 #endif /* MACH_CLUSTER_STATS */
410
411 /*
412 * Routine: vm_pageout_object_terminate
413 * Purpose:
414 * Destroy the pageout_object, and perform all of the
415 * required cleanup actions.
416 *
417 * In/Out conditions:
418 * The object must be locked, and will be returned locked.
419 */
420 void
421 vm_pageout_object_terminate(
422 vm_object_t object)
423 {
424 vm_object_t shadow_object;
425
426 /*
427 * Deal with the deallocation (last reference) of a pageout object
428 * (used for cleaning-in-place) by dropping the paging references/
429 * freeing pages in the original object.
430 */
431
432 assert(object->pageout);
433 shadow_object = object->shadow;
434 vm_object_lock(shadow_object);
435
436 while (!queue_empty(&object->memq)) {
437 vm_page_t p, m;
438 vm_object_offset_t offset;
439
440 p = (vm_page_t) queue_first(&object->memq);
441
442 assert(p->private);
443 assert(p->pageout);
444 p->pageout = FALSE;
445 assert(!p->cleaning);
446
447 offset = p->offset;
448 VM_PAGE_FREE(p);
449 p = VM_PAGE_NULL;
450
451 m = vm_page_lookup(shadow_object,
452 offset + object->shadow_offset);
453
454 if(m == VM_PAGE_NULL)
455 continue;
456 assert(m->cleaning);
457 /* used as a trigger on upl_commit etc to recognize the */
458 /* pageout daemon's subseqent desire to pageout a cleaning */
459 /* page. When the bit is on the upl commit code will */
460 /* respect the pageout bit in the target page over the */
461 /* caller's page list indication */
462 m->dump_cleaning = FALSE;
463
464 assert((m->dirty) || (m->precious) ||
465 (m->busy && m->cleaning));
466
467 /*
468 * Handle the trusted pager throttle.
469 * Also decrement the burst throttle (if external).
470 */
471 vm_page_lock_queues();
472 if (m->laundry) {
473 vm_pageout_throttle_up(m);
474 }
475
476 /*
477 * Handle the "target" page(s). These pages are to be freed if
478 * successfully cleaned. Target pages are always busy, and are
479 * wired exactly once. The initial target pages are not mapped,
480 * (so cannot be referenced or modified) but converted target
481 * pages may have been modified between the selection as an
482 * adjacent page and conversion to a target.
483 */
484 if (m->pageout) {
485 assert(m->busy);
486 assert(m->wire_count == 1);
487 m->cleaning = FALSE;
488 m->encrypted_cleaning = FALSE;
489 m->pageout = FALSE;
490 #if MACH_CLUSTER_STATS
491 if (m->wanted) vm_pageout_target_collisions++;
492 #endif
493 /*
494 * Revoke all access to the page. Since the object is
495 * locked, and the page is busy, this prevents the page
496 * from being dirtied after the pmap_disconnect() call
497 * returns.
498 *
499 * Since the page is left "dirty" but "not modifed", we
500 * can detect whether the page was redirtied during
501 * pageout by checking the modify state.
502 */
503 if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
504 m->dirty = TRUE;
505 else
506 m->dirty = FALSE;
507
508 if (m->dirty) {
509 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
510 vm_page_unwire(m);/* reactivates */
511 VM_STAT_INCR(reactivations);
512 PAGE_WAKEUP_DONE(m);
513 } else {
514 CLUSTER_STAT(vm_pageout_target_page_freed++;)
515 vm_page_free(m);/* clears busy, etc. */
516 }
517 vm_page_unlock_queues();
518 continue;
519 }
520 /*
521 * Handle the "adjacent" pages. These pages were cleaned in
522 * place, and should be left alone.
523 * If prep_pin_count is nonzero, then someone is using the
524 * page, so make it active.
525 */
526 if (!m->active && !m->inactive && !m->throttled && !m->private) {
527 if (m->reference)
528 vm_page_activate(m);
529 else
530 vm_page_deactivate(m);
531 }
532 if((m->busy) && (m->cleaning)) {
533
534 /* the request_page_list case, (COPY_OUT_FROM FALSE) */
535 m->busy = FALSE;
536
537 /* We do not re-set m->dirty ! */
538 /* The page was busy so no extraneous activity */
539 /* could have occurred. COPY_INTO is a read into the */
540 /* new pages. CLEAN_IN_PLACE does actually write */
541 /* out the pages but handling outside of this code */
542 /* will take care of resetting dirty. We clear the */
543 /* modify however for the Programmed I/O case. */
544 pmap_clear_modify(m->phys_page);
545
546 m->absent = FALSE;
547 m->overwriting = FALSE;
548 } else if (m->overwriting) {
549 /* alternate request page list, write to page_list */
550 /* case. Occurs when the original page was wired */
551 /* at the time of the list request */
552 assert(m->wire_count != 0);
553 vm_page_unwire(m);/* reactivates */
554 m->overwriting = FALSE;
555 } else {
556 /*
557 * Set the dirty state according to whether or not the page was
558 * modified during the pageout. Note that we purposefully do
559 * NOT call pmap_clear_modify since the page is still mapped.
560 * If the page were to be dirtied between the 2 calls, this
561 * this fact would be lost. This code is only necessary to
562 * maintain statistics, since the pmap module is always
563 * consulted if m->dirty is false.
564 */
565 #if MACH_CLUSTER_STATS
566 m->dirty = pmap_is_modified(m->phys_page);
567
568 if (m->dirty) vm_pageout_cluster_dirtied++;
569 else vm_pageout_cluster_cleaned++;
570 if (m->wanted) vm_pageout_cluster_collisions++;
571 #else
572 m->dirty = 0;
573 #endif
574 }
575 m->cleaning = FALSE;
576 m->encrypted_cleaning = FALSE;
577
578 /*
579 * Wakeup any thread waiting for the page to be un-cleaning.
580 */
581 PAGE_WAKEUP(m);
582 vm_page_unlock_queues();
583 }
584 /*
585 * Account for the paging reference taken in vm_paging_object_allocate.
586 */
587 vm_object_paging_end(shadow_object);
588 vm_object_unlock(shadow_object);
589
590 assert(object->ref_count == 0);
591 assert(object->paging_in_progress == 0);
592 assert(object->resident_page_count == 0);
593 return;
594 }
595
596 /*
597 * Routine: vm_pageclean_setup
598 *
599 * Purpose: setup a page to be cleaned (made non-dirty), but not
600 * necessarily flushed from the VM page cache.
601 * This is accomplished by cleaning in place.
602 *
603 * The page must not be busy, and the object and page
604 * queues must be locked.
605 *
606 */
607 void
608 vm_pageclean_setup(
609 vm_page_t m,
610 vm_page_t new_m,
611 vm_object_t new_object,
612 vm_object_offset_t new_offset)
613 {
614 assert(!m->busy);
615 #if 0
616 assert(!m->cleaning);
617 #endif
618
619 XPR(XPR_VM_PAGEOUT,
620 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
621 (integer_t)m->object, m->offset, (integer_t)m,
622 (integer_t)new_m, new_offset);
623
624 pmap_clear_modify(m->phys_page);
625
626 /*
627 * Mark original page as cleaning in place.
628 */
629 m->cleaning = TRUE;
630 m->dirty = TRUE;
631 m->precious = FALSE;
632
633 /*
634 * Convert the fictitious page to a private shadow of
635 * the real page.
636 */
637 assert(new_m->fictitious);
638 assert(new_m->phys_page == vm_page_fictitious_addr);
639 new_m->fictitious = FALSE;
640 new_m->private = TRUE;
641 new_m->pageout = TRUE;
642 new_m->phys_page = m->phys_page;
643 vm_page_wire(new_m);
644
645 vm_page_insert(new_m, new_object, new_offset);
646 assert(!new_m->wanted);
647 new_m->busy = FALSE;
648 }
649
650 /*
651 * Routine: vm_pageout_initialize_page
652 * Purpose:
653 * Causes the specified page to be initialized in
654 * the appropriate memory object. This routine is used to push
655 * pages into a copy-object when they are modified in the
656 * permanent object.
657 *
658 * The page is moved to a temporary object and paged out.
659 *
660 * In/out conditions:
661 * The page in question must not be on any pageout queues.
662 * The object to which it belongs must be locked.
663 * The page must be busy, but not hold a paging reference.
664 *
665 * Implementation:
666 * Move this page to a completely new object.
667 */
668 void
669 vm_pageout_initialize_page(
670 vm_page_t m)
671 {
672 vm_object_t object;
673 vm_object_offset_t paging_offset;
674 vm_page_t holding_page;
675 memory_object_t pager;
676
677 XPR(XPR_VM_PAGEOUT,
678 "vm_pageout_initialize_page, page 0x%X\n",
679 (integer_t)m, 0, 0, 0, 0);
680 assert(m->busy);
681
682 /*
683 * Verify that we really want to clean this page
684 */
685 assert(!m->absent);
686 assert(!m->error);
687 assert(m->dirty);
688
689 /*
690 * Create a paging reference to let us play with the object.
691 */
692 object = m->object;
693 paging_offset = m->offset + object->paging_offset;
694
695 if (m->absent || m->error || m->restart || (!m->dirty && !m->precious)) {
696 VM_PAGE_FREE(m);
697 panic("reservation without pageout?"); /* alan */
698 vm_object_unlock(object);
699
700 return;
701 }
702
703 /*
704 * If there's no pager, then we can't clean the page. This should
705 * never happen since this should be a copy object and therefore not
706 * an external object, so the pager should always be there.
707 */
708
709 pager = object->pager;
710
711 if (pager == MEMORY_OBJECT_NULL) {
712 VM_PAGE_FREE(m);
713 panic("missing pager for copy object");
714 return;
715 }
716
717 /* set the page for future call to vm_fault_list_request */
718 vm_object_paging_begin(object);
719 holding_page = NULL;
720 vm_page_lock_queues();
721 pmap_clear_modify(m->phys_page);
722 m->dirty = TRUE;
723 m->busy = TRUE;
724 m->list_req_pending = TRUE;
725 m->cleaning = TRUE;
726 m->pageout = TRUE;
727 vm_page_wire(m);
728 vm_page_unlock_queues();
729 vm_object_unlock(object);
730
731 /*
732 * Write the data to its pager.
733 * Note that the data is passed by naming the new object,
734 * not a virtual address; the pager interface has been
735 * manipulated to use the "internal memory" data type.
736 * [The object reference from its allocation is donated
737 * to the eventual recipient.]
738 */
739 memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
740
741 vm_object_lock(object);
742 vm_object_paging_end(object);
743 }
744
745 #if MACH_CLUSTER_STATS
746 #define MAXCLUSTERPAGES 16
747 struct {
748 unsigned long pages_in_cluster;
749 unsigned long pages_at_higher_offsets;
750 unsigned long pages_at_lower_offsets;
751 } cluster_stats[MAXCLUSTERPAGES];
752 #endif /* MACH_CLUSTER_STATS */
753
754
755 /*
756 * vm_pageout_cluster:
757 *
758 * Given a page, queue it to the appropriate I/O thread,
759 * which will page it out and attempt to clean adjacent pages
760 * in the same operation.
761 *
762 * The page must be busy, and the object and queues locked. We will take a
763 * paging reference to prevent deallocation or collapse when we
764 * release the object lock back at the call site. The I/O thread
765 * is responsible for consuming this reference
766 *
767 * The page must not be on any pageout queue.
768 */
769
770 void
771 vm_pageout_cluster(vm_page_t m)
772 {
773 vm_object_t object = m->object;
774 struct vm_pageout_queue *q;
775
776
777 XPR(XPR_VM_PAGEOUT,
778 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
779 (integer_t)object, m->offset, (integer_t)m, 0, 0);
780
781 /*
782 * Only a certain kind of page is appreciated here.
783 */
784 assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0));
785 assert(!m->cleaning && !m->pageout && !m->inactive && !m->active);
786 assert(!m->throttled);
787
788 /*
789 * protect the object from collapse -
790 * locking in the object's paging_offset.
791 */
792 vm_object_paging_begin(object);
793
794 /*
795 * set the page for future call to vm_fault_list_request
796 * page should already be marked busy
797 */
798 vm_page_wire(m);
799 m->list_req_pending = TRUE;
800 m->cleaning = TRUE;
801 m->pageout = TRUE;
802 m->laundry = TRUE;
803
804 if (object->internal == TRUE)
805 q = &vm_pageout_queue_internal;
806 else
807 q = &vm_pageout_queue_external;
808 q->pgo_laundry++;
809
810 m->pageout_queue = TRUE;
811 queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
812
813 if (q->pgo_idle == TRUE) {
814 q->pgo_idle = FALSE;
815 thread_wakeup((event_t) &q->pgo_pending);
816 }
817 }
818
819
820 unsigned long vm_pageout_throttle_up_count = 0;
821
822 /*
823 * A page is back from laundry. See if there are some pages waiting to
824 * go to laundry and if we can let some of them go now.
825 *
826 * Object and page queues must be locked.
827 */
828 void
829 vm_pageout_throttle_up(
830 vm_page_t m)
831 {
832 struct vm_pageout_queue *q;
833
834 vm_pageout_throttle_up_count++;
835
836 assert(m->laundry);
837 assert(m->object != VM_OBJECT_NULL);
838 assert(m->object != kernel_object);
839
840 if (m->object->internal == TRUE)
841 q = &vm_pageout_queue_internal;
842 else
843 q = &vm_pageout_queue_external;
844
845 m->laundry = FALSE;
846 q->pgo_laundry--;
847
848 if (q->pgo_throttled == TRUE) {
849 q->pgo_throttled = FALSE;
850 thread_wakeup((event_t) &q->pgo_laundry);
851 }
852 }
853
854
855 /*
856 * vm_pageout_scan does the dirty work for the pageout daemon.
857 * It returns with vm_page_queue_free_lock held and
858 * vm_page_free_wanted == 0.
859 */
860
861 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT (3 * MAX_UPL_TRANSFER)
862
863 #define FCS_IDLE 0
864 #define FCS_DELAYED 1
865 #define FCS_DEADLOCK_DETECTED 2
866
867 struct flow_control {
868 int state;
869 mach_timespec_t ts;
870 };
871
872 void
873 vm_pageout_scan(void)
874 {
875 unsigned int loop_count = 0;
876 unsigned int inactive_burst_count = 0;
877 unsigned int active_burst_count = 0;
878 unsigned int reactivated_this_call;
879 unsigned int reactivate_limit;
880 vm_page_t local_freeq = NULL;
881 int local_freed = 0;
882 int delayed_unlock;
883 int need_internal_inactive = 0;
884 int refmod_state = 0;
885 int vm_pageout_deadlock_target = 0;
886 struct vm_pageout_queue *iq;
887 struct vm_pageout_queue *eq;
888 struct vm_speculative_age_q *sq;
889 struct flow_control flow_control;
890 boolean_t inactive_throttled = FALSE;
891 boolean_t try_failed;
892 mach_timespec_t ts;
893 unsigned int msecs = 0;
894 vm_object_t object;
895 vm_object_t last_object_tried;
896 int zf_ratio;
897 int zf_run_count;
898 uint32_t catch_up_count = 0;
899 uint32_t inactive_reclaim_run;
900 boolean_t forced_reclaim;
901
902 flow_control.state = FCS_IDLE;
903 iq = &vm_pageout_queue_internal;
904 eq = &vm_pageout_queue_external;
905 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
906
907
908 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
909
910
911 vm_page_lock_queues();
912 delayed_unlock = 1; /* must be nonzero if Qs are locked, 0 if unlocked */
913
914 /*
915 * Calculate the max number of referenced pages on the inactive
916 * queue that we will reactivate.
917 */
918 reactivated_this_call = 0;
919 reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
920 vm_page_inactive_count);
921 inactive_reclaim_run = 0;
922
923
924 /*???*/ /*
925 * We want to gradually dribble pages from the active queue
926 * to the inactive queue. If we let the inactive queue get
927 * very small, and then suddenly dump many pages into it,
928 * those pages won't get a sufficient chance to be referenced
929 * before we start taking them from the inactive queue.
930 *
931 * We must limit the rate at which we send pages to the pagers.
932 * data_write messages consume memory, for message buffers and
933 * for map-copy objects. If we get too far ahead of the pagers,
934 * we can potentially run out of memory.
935 *
936 * We can use the laundry count to limit directly the number
937 * of pages outstanding to the default pager. A similar
938 * strategy for external pagers doesn't work, because
939 * external pagers don't have to deallocate the pages sent them,
940 * and because we might have to send pages to external pagers
941 * even if they aren't processing writes. So we also
942 * use a burst count to limit writes to external pagers.
943 *
944 * When memory is very tight, we can't rely on external pagers to
945 * clean pages. They probably aren't running, because they
946 * aren't vm-privileged. If we kept sending dirty pages to them,
947 * we could exhaust the free list.
948 */
949
950
951 Restart:
952 assert(delayed_unlock!=0);
953
954 /*
955 * A page is "zero-filled" if it was not paged in from somewhere,
956 * and it belongs to an object at least VM_ZF_OBJECT_SIZE_THRESHOLD big.
957 * Recalculate the zero-filled page ratio. We use this to apportion
958 * victimized pages between the normal and zero-filled inactive
959 * queues according to their relative abundance in memory. Thus if a task
960 * is flooding memory with zf pages, we begin to hunt them down.
961 * It would be better to throttle greedy tasks at a higher level,
962 * but at the moment mach vm cannot do this.
963 */
964 {
965 uint32_t total = vm_page_active_count + vm_page_inactive_count;
966 uint32_t normal = total - vm_zf_count;
967
968 /* zf_ratio is the number of zf pages we victimize per normal page */
969
970 if (vm_zf_count < vm_accellerate_zf_pageout_trigger)
971 zf_ratio = 0;
972 else if ((vm_zf_count <= normal) || (normal == 0))
973 zf_ratio = 1;
974 else
975 zf_ratio = vm_zf_count / normal;
976
977 zf_run_count = 0;
978 }
979
980 /*
981 * Recalculate vm_page_inactivate_target.
982 */
983 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
984 vm_page_inactive_count +
985 vm_page_speculative_count);
986 /*
987 * don't want to wake the pageout_scan thread up everytime we fall below
988 * the targets... set a low water mark at 0.25% below the target
989 */
990 vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400);
991
992 vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
993 vm_page_inactive_count);
994 object = NULL;
995 last_object_tried = NULL;
996 try_failed = FALSE;
997
998 if ((vm_page_inactive_count + vm_page_speculative_count) < VM_PAGE_INACTIVE_HEALTHY_LIMIT(vm_page_active_count))
999 catch_up_count = vm_page_inactive_count + vm_page_speculative_count;
1000 else
1001 catch_up_count = 0;
1002
1003 for (;;) {
1004 vm_page_t m;
1005
1006 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
1007
1008 if (delayed_unlock == 0) {
1009 vm_page_lock_queues();
1010 delayed_unlock = 1;
1011 }
1012
1013 /*
1014 * Don't sweep through active queue more than the throttle
1015 * which should be kept relatively low
1016 */
1017 active_burst_count = vm_pageout_burst_active_throttle;
1018
1019 /*
1020 * Move pages from active to inactive.
1021 */
1022 if (need_internal_inactive == 0 && (vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target)
1023 goto done_moving_active_pages;
1024
1025 while (!queue_empty(&vm_page_queue_active) &&
1026 (need_internal_inactive || active_burst_count)) {
1027
1028 if (active_burst_count)
1029 active_burst_count--;
1030
1031 vm_pageout_active++;
1032
1033 m = (vm_page_t) queue_first(&vm_page_queue_active);
1034
1035 assert(m->active && !m->inactive);
1036 assert(!m->laundry);
1037 assert(m->object != kernel_object);
1038 assert(m->phys_page != vm_page_guard_addr);
1039
1040 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1041
1042 /*
1043 * Try to lock object; since we've already got the
1044 * page queues lock, we can only 'try' for this one.
1045 * if the 'try' fails, we need to do a mutex_pause
1046 * to allow the owner of the object lock a chance to
1047 * run... otherwise, we're likely to trip over this
1048 * object in the same state as we work our way through
1049 * the queue... clumps of pages associated with the same
1050 * object are fairly typical on the inactive and active queues
1051 */
1052 if (m->object != object) {
1053 if (object != NULL) {
1054 vm_object_unlock(object);
1055 object = NULL;
1056 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1057 }
1058 if (!vm_object_lock_try_scan(m->object)) {
1059 /*
1060 * move page to end of active queue and continue
1061 */
1062 queue_remove(&vm_page_queue_active, m,
1063 vm_page_t, pageq);
1064 queue_enter(&vm_page_queue_active, m,
1065 vm_page_t, pageq);
1066
1067 try_failed = TRUE;
1068
1069 m = (vm_page_t) queue_first(&vm_page_queue_active);
1070 /*
1071 * this is the next object we're going to be interested in
1072 * try to make sure its available after the mutex_yield
1073 * returns control
1074 */
1075 vm_pageout_scan_wants_object = m->object;
1076
1077 goto done_with_activepage;
1078 }
1079 object = m->object;
1080
1081 try_failed = FALSE;
1082 }
1083
1084 /*
1085 * if the page is BUSY, then we pull it
1086 * off the active queue and leave it alone.
1087 * when BUSY is cleared, it will get stuck
1088 * back on the appropriate queue
1089 */
1090 if (m->busy) {
1091 queue_remove(&vm_page_queue_active, m,
1092 vm_page_t, pageq);
1093 m->pageq.next = NULL;
1094 m->pageq.prev = NULL;
1095
1096 if (!m->fictitious)
1097 vm_page_active_count--;
1098 m->active = FALSE;
1099
1100 goto done_with_activepage;
1101 }
1102
1103 /*
1104 * Deactivate the page while holding the object
1105 * locked, so we know the page is still not busy.
1106 * This should prevent races between pmap_enter
1107 * and pmap_clear_reference. The page might be
1108 * absent or fictitious, but vm_page_deactivate
1109 * can handle that.
1110 */
1111 vm_page_deactivate(m);
1112
1113 if (need_internal_inactive) {
1114 vm_pageout_scan_active_throttle_success++;
1115 need_internal_inactive--;
1116 }
1117 done_with_activepage:
1118 if (delayed_unlock++ > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT || try_failed == TRUE) {
1119
1120 if (object != NULL) {
1121 vm_object_unlock(object);
1122 object = NULL;
1123 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1124 }
1125 if (local_freeq) {
1126 vm_page_free_list(local_freeq);
1127
1128 local_freeq = NULL;
1129 local_freed = 0;
1130 }
1131 mutex_yield(&vm_page_queue_lock);
1132
1133 delayed_unlock = 1;
1134
1135 /*
1136 * continue the while loop processing
1137 * the active queue... need to hold
1138 * the page queues lock
1139 */
1140 }
1141 }
1142
1143
1144
1145 /**********************************************************************
1146 * above this point we're playing with the active queue
1147 * below this point we're playing with the throttling mechanisms
1148 * and the inactive queue
1149 **********************************************************************/
1150
1151 done_moving_active_pages:
1152
1153 /*
1154 * We are done if we have met our target *and*
1155 * nobody is still waiting for a page.
1156 */
1157 if (vm_page_free_count + local_freed >= vm_page_free_target) {
1158 if (object != NULL) {
1159 vm_object_unlock(object);
1160 object = NULL;
1161 }
1162 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1163
1164 if (local_freeq) {
1165 vm_page_free_list(local_freeq);
1166
1167 local_freeq = NULL;
1168 local_freed = 0;
1169 }
1170 /*
1171 * inactive target still not met... keep going
1172 * until we get the queues balanced
1173 */
1174 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) &&
1175 !queue_empty(&vm_page_queue_active))
1176 continue;
1177
1178 mutex_lock(&vm_page_queue_free_lock);
1179
1180 if ((vm_page_free_count >= vm_page_free_target) &&
1181 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
1182
1183 vm_page_unlock_queues();
1184
1185 thread_wakeup((event_t) &vm_pageout_garbage_collect);
1186
1187 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
1188
1189 return;
1190 }
1191 mutex_unlock(&vm_page_queue_free_lock);
1192 }
1193 /*
1194 * Before anything, we check if we have any ripe volatile objects around.
1195 * If so, purge the first and see what it gives us.
1196 */
1197 assert (available_for_purge>=0);
1198 if (available_for_purge)
1199 {
1200 if (object != NULL) {
1201 vm_object_unlock(object);
1202 object = NULL;
1203 }
1204 vm_purgeable_object_purge_one();
1205 continue;
1206 }
1207
1208 if (queue_empty(&sq->age_q) && vm_page_speculative_count) {
1209 /*
1210 * try to pull pages from the aging bins
1211 * see vm_page.h for an explanation of how
1212 * this mechanism works
1213 */
1214 struct vm_speculative_age_q *aq;
1215 mach_timespec_t ts_fully_aged;
1216 boolean_t can_steal = FALSE;
1217
1218 aq = &vm_page_queue_speculative[speculative_steal_index];
1219
1220 while (queue_empty(&aq->age_q)) {
1221
1222 speculative_steal_index++;
1223
1224 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
1225 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
1226
1227 aq = &vm_page_queue_speculative[speculative_steal_index];
1228 }
1229 if (vm_page_speculative_count > vm_page_speculative_target)
1230 can_steal = TRUE;
1231 else {
1232 ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * VM_PAGE_SPECULATIVE_Q_AGE_MS) / 1000;
1233 ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * VM_PAGE_SPECULATIVE_Q_AGE_MS) % 1000)
1234 * 1000 * NSEC_PER_USEC;
1235
1236 ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
1237
1238 clock_get_system_nanotime(&ts.tv_sec, (unsigned *)&ts.tv_nsec);
1239
1240 if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
1241 can_steal = TRUE;
1242 }
1243 if (can_steal == TRUE)
1244 vm_page_speculate_ageit(aq);
1245 }
1246
1247 /*
1248 * Sometimes we have to pause:
1249 * 1) No inactive pages - nothing to do.
1250 * 2) Flow control - default pageout queue is full
1251 * 3) Loop control - no acceptable pages found on the inactive queue
1252 * within the last vm_pageout_burst_inactive_throttle iterations
1253 */
1254 if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_zf) && queue_empty(&sq->age_q) &&
1255 (VM_PAGE_Q_THROTTLED(iq) || queue_empty(&vm_page_queue_throttled))) {
1256 vm_pageout_scan_empty_throttle++;
1257 msecs = vm_pageout_empty_wait;
1258 goto vm_pageout_scan_delay;
1259
1260 } else if (inactive_burst_count >= vm_pageout_burst_inactive_throttle) {
1261 vm_pageout_scan_burst_throttle++;
1262 msecs = vm_pageout_burst_wait;
1263 goto vm_pageout_scan_delay;
1264
1265 } else if (VM_PAGE_Q_THROTTLED(iq) && IP_VALID(memory_manager_default)) {
1266
1267 switch (flow_control.state) {
1268
1269 case FCS_IDLE:
1270 reset_deadlock_timer:
1271 ts.tv_sec = vm_pageout_deadlock_wait / 1000;
1272 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
1273 clock_get_system_nanotime(&flow_control.ts.tv_sec,
1274 (unsigned *)&flow_control.ts.tv_nsec);
1275 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
1276
1277 flow_control.state = FCS_DELAYED;
1278 msecs = vm_pageout_deadlock_wait;
1279
1280 break;
1281
1282 case FCS_DELAYED:
1283 clock_get_system_nanotime(&ts.tv_sec,
1284 (unsigned *)&ts.tv_nsec);
1285
1286 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
1287 /*
1288 * the pageout thread for the default pager is potentially
1289 * deadlocked since the
1290 * default pager queue has been throttled for more than the
1291 * allowable time... we need to move some clean pages or dirty
1292 * pages belonging to the external pagers if they aren't throttled
1293 * vm_page_free_wanted represents the number of threads currently
1294 * blocked waiting for pages... we'll move one page for each of
1295 * these plus a fixed amount to break the logjam... once we're done
1296 * moving this number of pages, we'll re-enter the FSC_DELAYED state
1297 * with a new timeout target since we have no way of knowing
1298 * whether we've broken the deadlock except through observation
1299 * of the queue associated with the default pager... we need to
1300 * stop moving pages and allow the system to run to see what
1301 * state it settles into.
1302 */
1303 vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged;
1304 vm_pageout_scan_deadlock_detected++;
1305 flow_control.state = FCS_DEADLOCK_DETECTED;
1306
1307 thread_wakeup((event_t) &vm_pageout_garbage_collect);
1308 goto consider_inactive;
1309 }
1310 /*
1311 * just resniff instead of trying
1312 * to compute a new delay time... we're going to be
1313 * awakened immediately upon a laundry completion,
1314 * so we won't wait any longer than necessary
1315 */
1316 msecs = vm_pageout_idle_wait;
1317 break;
1318
1319 case FCS_DEADLOCK_DETECTED:
1320 if (vm_pageout_deadlock_target)
1321 goto consider_inactive;
1322 goto reset_deadlock_timer;
1323
1324 }
1325 vm_pageout_scan_throttle++;
1326 iq->pgo_throttled = TRUE;
1327 vm_pageout_scan_delay:
1328 if (object != NULL) {
1329 vm_object_unlock(object);
1330 object = NULL;
1331 }
1332 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1333
1334 if (local_freeq) {
1335 vm_page_free_list(local_freeq);
1336
1337 local_freeq = NULL;
1338 local_freed = 0;
1339 }
1340 #if CONFIG_EMBEDDED
1341 {
1342 int percent_avail;
1343
1344 /*
1345 * Decide if we need to send a memory status notification.
1346 */
1347 percent_avail =
1348 (vm_page_active_count + vm_page_inactive_count +
1349 vm_page_speculative_count + vm_page_free_count +
1350 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
1351 atop_64(max_mem);
1352 if (percent_avail >= (kern_memorystatus_level + 5) ||
1353 percent_avail <= (kern_memorystatus_level - 5)) {
1354 kern_memorystatus_level = percent_avail;
1355 thread_wakeup((event_t)&kern_memorystatus_wakeup);
1356 }
1357 }
1358 #endif
1359 assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
1360
1361 counter(c_vm_pageout_scan_block++);
1362
1363 vm_page_unlock_queues();
1364
1365 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
1366
1367 thread_block(THREAD_CONTINUE_NULL);
1368
1369 vm_page_lock_queues();
1370 delayed_unlock = 1;
1371
1372 iq->pgo_throttled = FALSE;
1373
1374 if (loop_count >= vm_page_inactive_count)
1375 loop_count = 0;
1376 inactive_burst_count = 0;
1377
1378 goto Restart;
1379 /*NOTREACHED*/
1380 }
1381
1382
1383 flow_control.state = FCS_IDLE;
1384 consider_inactive:
1385 loop_count++;
1386 inactive_burst_count++;
1387 vm_pageout_inactive++;
1388
1389 /* Choose a victim. */
1390
1391 while (1) {
1392 m = NULL;
1393
1394 /*
1395 * the most eligible pages are ones that were throttled because the
1396 * pager wasn't ready at the time. If a pager is ready now,
1397 * see if one of these is useful.
1398 */
1399 if (!VM_PAGE_Q_THROTTLED(iq) && !queue_empty(&vm_page_queue_throttled)) {
1400 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
1401 break;
1402 }
1403
1404 /*
1405 * The second most eligible pages are ones we paged in speculatively,
1406 * but which have not yet been touched.
1407 */
1408 if ( !queue_empty(&sq->age_q) ) {
1409 m = (vm_page_t) queue_first(&sq->age_q);
1410 break;
1411 }
1412 /*
1413 * Time for a zero-filled inactive page?
1414 */
1415 if ( ((zf_run_count < zf_ratio) && vm_zf_queue_count >= zf_queue_min_count) ||
1416 queue_empty(&vm_page_queue_inactive)) {
1417 if ( !queue_empty(&vm_page_queue_zf) ) {
1418 m = (vm_page_t) queue_first(&vm_page_queue_zf);
1419 zf_run_count++;
1420 break;
1421 }
1422 }
1423 /*
1424 * It's either a normal inactive page or nothing.
1425 */
1426 if ( !queue_empty(&vm_page_queue_inactive) ) {
1427 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1428 zf_run_count = 0;
1429 break;
1430 }
1431
1432 panic("vm_pageout: no victim");
1433 }
1434
1435 assert(!m->active && (m->inactive || m->speculative || m->throttled));
1436 assert(!m->laundry);
1437 assert(m->object != kernel_object);
1438 assert(m->phys_page != vm_page_guard_addr);
1439
1440 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1441
1442 /*
1443 * check to see if we currently are working
1444 * with the same object... if so, we've
1445 * already got the lock
1446 */
1447 if (m->object != object) {
1448 /*
1449 * the object associated with candidate page is
1450 * different from the one we were just working
1451 * with... dump the lock if we still own it
1452 */
1453 if (object != NULL) {
1454 vm_object_unlock(object);
1455 object = NULL;
1456 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1457 }
1458 /*
1459 * Try to lock object; since we've alread got the
1460 * page queues lock, we can only 'try' for this one.
1461 * if the 'try' fails, we need to do a mutex_pause
1462 * to allow the owner of the object lock a chance to
1463 * run... otherwise, we're likely to trip over this
1464 * object in the same state as we work our way through
1465 * the queue... clumps of pages associated with the same
1466 * object are fairly typical on the inactive and active queues
1467 */
1468 if (!vm_object_lock_try_scan(m->object)) {
1469 /*
1470 * Move page to end and continue.
1471 * Don't re-issue ticket
1472 */
1473 if (m->zero_fill) {
1474 queue_remove(&vm_page_queue_zf, m,
1475 vm_page_t, pageq);
1476 queue_enter(&vm_page_queue_zf, m,
1477 vm_page_t, pageq);
1478 } else if (m->speculative) {
1479 remque(&m->pageq);
1480 m->speculative = FALSE;
1481 vm_page_speculative_count--;
1482
1483 /*
1484 * move to the tail of the inactive queue
1485 * to get it out of the way... the speculative
1486 * queue is generally too small to depend
1487 * on there being enough pages from other
1488 * objects to make cycling it back on the
1489 * same queue a winning proposition
1490 */
1491 queue_enter(&vm_page_queue_inactive, m,
1492 vm_page_t, pageq);
1493 m->inactive = TRUE;
1494 vm_page_inactive_count++;
1495 token_new_pagecount++;
1496 } else if (m->throttled) {
1497 queue_remove(&vm_page_queue_throttled, m,
1498 vm_page_t, pageq);
1499 m->throttled = FALSE;
1500 vm_page_throttled_count--;
1501
1502 /*
1503 * not throttled any more, so can stick
1504 * it on the inactive queue.
1505 */
1506 queue_enter(&vm_page_queue_inactive, m,
1507 vm_page_t, pageq);
1508 m->inactive = TRUE;
1509 vm_page_inactive_count++;
1510 token_new_pagecount++;
1511 } else {
1512 queue_remove(&vm_page_queue_inactive, m,
1513 vm_page_t, pageq);
1514 #if MACH_ASSERT
1515 vm_page_inactive_count--; /* balance for purgeable queue asserts */
1516 #endif
1517 vm_purgeable_q_advance_all();
1518
1519 queue_enter(&vm_page_queue_inactive, m,
1520 vm_page_t, pageq);
1521 #if MACH_ASSERT
1522 vm_page_inactive_count++; /* balance for purgeable queue asserts */
1523 #endif
1524 token_new_pagecount++;
1525 }
1526 pmap_clear_reference(m->phys_page);
1527 m->reference = FALSE;
1528
1529 vm_pageout_inactive_nolock++;
1530
1531 if ( !queue_empty(&sq->age_q) )
1532 m = (vm_page_t) queue_first(&sq->age_q);
1533 else if ( ((zf_run_count < zf_ratio) && vm_zf_queue_count >= zf_queue_min_count) ||
1534 queue_empty(&vm_page_queue_inactive)) {
1535 if ( !queue_empty(&vm_page_queue_zf) )
1536 m = (vm_page_t) queue_first(&vm_page_queue_zf);
1537 } else if ( !queue_empty(&vm_page_queue_inactive) ) {
1538 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1539 }
1540 /*
1541 * this is the next object we're going to be interested in
1542 * try to make sure its available after the mutex_yield
1543 * returns control
1544 */
1545 vm_pageout_scan_wants_object = m->object;
1546
1547 /*
1548 * force us to dump any collected free pages
1549 * and to pause before moving on
1550 */
1551 try_failed = TRUE;
1552
1553 goto done_with_inactivepage;
1554 }
1555 object = m->object;
1556 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1557
1558 try_failed = FALSE;
1559 }
1560
1561 /*
1562 * Paging out pages of external objects which
1563 * are currently being created must be avoided.
1564 * The pager may claim for memory, thus leading to a
1565 * possible dead lock between it and the pageout thread,
1566 * if such pages are finally chosen. The remaining assumption
1567 * is that there will finally be enough available pages in the
1568 * inactive pool to page out in order to satisfy all memory
1569 * claimed by the thread which concurrently creates the pager.
1570 */
1571 if (!object->pager_initialized && object->pager_created) {
1572 /*
1573 * Move page to end and continue, hoping that
1574 * there will be enough other inactive pages to
1575 * page out so that the thread which currently
1576 * initializes the pager will succeed.
1577 * Don't re-grant the ticket, the page should
1578 * pulled from the queue and paged out whenever
1579 * one of its logically adjacent fellows is
1580 * targeted.
1581 *
1582 * Pages found on the speculative list can never be
1583 * in this state... they always have a pager associated
1584 * with them.
1585 */
1586 assert(!m->speculative);
1587
1588 if (m->zero_fill) {
1589 queue_remove(&vm_page_queue_zf, m,
1590 vm_page_t, pageq);
1591 queue_enter(&vm_page_queue_zf, m,
1592 vm_page_t, pageq);
1593 } else {
1594 queue_remove(&vm_page_queue_inactive, m,
1595 vm_page_t, pageq);
1596 #if MACH_ASSERT
1597 vm_page_inactive_count--; /* balance for purgeable queue asserts */
1598 #endif
1599 vm_purgeable_q_advance_all();
1600
1601 queue_enter(&vm_page_queue_inactive, m,
1602 vm_page_t, pageq);
1603 #if MACH_ASSERT
1604 vm_page_inactive_count++; /* balance for purgeable queue asserts */
1605 #endif
1606 token_new_pagecount++;
1607 }
1608 vm_pageout_inactive_avoid++;
1609
1610 goto done_with_inactivepage;
1611 }
1612 /*
1613 * Remove the page from its list.
1614 */
1615 if (m->speculative) {
1616 remque(&m->pageq);
1617 m->speculative = FALSE;
1618 vm_page_speculative_count--;
1619 } else if (m->throttled) {
1620 queue_remove(&vm_page_queue_throttled, m, vm_page_t, pageq);
1621 m->throttled = FALSE;
1622 vm_page_throttled_count--;
1623 } else {
1624 if (m->zero_fill) {
1625 queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq);
1626 vm_zf_queue_count--;
1627 } else {
1628 queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
1629 }
1630 m->inactive = FALSE;
1631 if (!m->fictitious)
1632 vm_page_inactive_count--;
1633 vm_purgeable_q_advance_all();
1634 }
1635
1636 /* If the object is empty, the page must be reclaimed even if dirty or used. */
1637 /* If the page belongs to a volatile object, we stick it back on. */
1638 if (object->copy == VM_OBJECT_NULL) {
1639 if(object->purgable == VM_PURGABLE_EMPTY && !m->cleaning) {
1640 m->busy = TRUE;
1641 if (m->pmapped == TRUE) {
1642 /* unmap the page */
1643 refmod_state = pmap_disconnect(m->phys_page);
1644 if (refmod_state & VM_MEM_MODIFIED) {
1645 m->dirty = TRUE;
1646 }
1647 }
1648 if (m->dirty || m->precious) {
1649 /* we saved the cost of cleaning this page ! */
1650 vm_page_purged_count++;
1651 }
1652 goto reclaim_page;
1653 }
1654 if (object->purgable == VM_PURGABLE_VOLATILE) {
1655 /* if it's wired, we can't put it on our queue */
1656 assert(m->wire_count == 0);
1657 /* just stick it back on! */
1658 goto reactivate_page;
1659 }
1660 }
1661 m->pageq.next = NULL;
1662 m->pageq.prev = NULL;
1663
1664 if ( !m->fictitious && catch_up_count)
1665 catch_up_count--;
1666
1667 /*
1668 * ENCRYPTED SWAP:
1669 * if this page has already been picked up as part of a
1670 * page-out cluster, it will be busy because it is being
1671 * encrypted (see vm_object_upl_request()). But we still
1672 * want to demote it from "clean-in-place" (aka "adjacent")
1673 * to "clean-and-free" (aka "target"), so let's ignore its
1674 * "busy" bit here and proceed to check for "cleaning" a
1675 * little bit below...
1676 */
1677 if ( !m->encrypted_cleaning && (m->busy || !object->alive)) {
1678 /*
1679 * Somebody is already playing with this page.
1680 * Leave it off the pageout queues.
1681 *
1682 */
1683 vm_pageout_inactive_busy++;
1684
1685 goto done_with_inactivepage;
1686 }
1687
1688 /*
1689 * If it's absent or in error, we can reclaim the page.
1690 */
1691
1692 if (m->absent || m->error) {
1693 vm_pageout_inactive_absent++;
1694 reclaim_page:
1695 if (vm_pageout_deadlock_target) {
1696 vm_pageout_scan_inactive_throttle_success++;
1697 vm_pageout_deadlock_target--;
1698 }
1699
1700 DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
1701
1702 if (m->object->internal) {
1703 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
1704 } else {
1705 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
1706 }
1707
1708 vm_page_free_prepare(m);
1709
1710 assert(m->pageq.next == NULL &&
1711 m->pageq.prev == NULL);
1712 m->pageq.next = (queue_entry_t)local_freeq;
1713 local_freeq = m;
1714 local_freed++;
1715
1716 inactive_burst_count = 0;
1717
1718 goto done_with_inactivepage;
1719 }
1720
1721 assert(!m->private);
1722 assert(!m->fictitious);
1723
1724 /*
1725 * If already cleaning this page in place, convert from
1726 * "adjacent" to "target". We can leave the page mapped,
1727 * and vm_pageout_object_terminate will determine whether
1728 * to free or reactivate.
1729 */
1730
1731 if (m->cleaning) {
1732 m->busy = TRUE;
1733 m->pageout = TRUE;
1734 m->dump_cleaning = TRUE;
1735 vm_page_wire(m);
1736
1737 CLUSTER_STAT(vm_pageout_cluster_conversions++);
1738
1739 inactive_burst_count = 0;
1740
1741 goto done_with_inactivepage;
1742 }
1743
1744 /*
1745 * If it's being used, reactivate.
1746 * (Fictitious pages are either busy or absent.)
1747 * First, update the reference and dirty bits
1748 * to make sure the page is unreferenced.
1749 */
1750 refmod_state = -1;
1751
1752 if (m->reference == FALSE && m->pmapped == TRUE) {
1753 refmod_state = pmap_get_refmod(m->phys_page);
1754
1755 if (refmod_state & VM_MEM_REFERENCED)
1756 m->reference = TRUE;
1757 if (refmod_state & VM_MEM_MODIFIED)
1758 m->dirty = TRUE;
1759 }
1760 if (m->reference && !m->no_cache) {
1761 /*
1762 * The page we pulled off the inactive list has
1763 * been referenced. It is possible for other
1764 * processors to be touching pages faster than we
1765 * can clear the referenced bit and traverse the
1766 * inactive queue, so we limit the number of
1767 * reactivations.
1768 */
1769 if (++reactivated_this_call >= reactivate_limit) {
1770 vm_pageout_reactivation_limit_exceeded++;
1771 } else if (catch_up_count) {
1772 vm_pageout_catch_ups++;
1773 } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
1774 vm_pageout_inactive_force_reclaim++;
1775 } else {
1776 /*
1777 * The page was being used, so put back on active list.
1778 */
1779 reactivate_page:
1780 vm_page_activate(m);
1781 VM_STAT_INCR(reactivations);
1782
1783 vm_pageout_inactive_used++;
1784 inactive_burst_count = 0;
1785
1786 goto done_with_inactivepage;
1787 }
1788 /*
1789 * Make sure we call pmap_get_refmod() if it
1790 * wasn't already called just above, to update
1791 * the dirty bit.
1792 */
1793 if ((refmod_state == -1) && !m->dirty && m->pmapped) {
1794 refmod_state = pmap_get_refmod(m->phys_page);
1795 if (refmod_state & VM_MEM_MODIFIED)
1796 m->dirty = TRUE;
1797 }
1798 forced_reclaim = TRUE;
1799 } else {
1800 forced_reclaim = FALSE;
1801 }
1802
1803 XPR(XPR_VM_PAGEOUT,
1804 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
1805 (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0);
1806
1807 /*
1808 * we've got a candidate page to steal...
1809 *
1810 * m->dirty is up to date courtesy of the
1811 * preceding check for m->reference... if
1812 * we get here, then m->reference had to be
1813 * FALSE (or possibly "reactivate_limit" was
1814 * exceeded), but in either case we called
1815 * pmap_get_refmod() and updated both
1816 * m->reference and m->dirty
1817 *
1818 * if it's dirty or precious we need to
1819 * see if the target queue is throtttled
1820 * it if is, we need to skip over it by moving it back
1821 * to the end of the inactive queue
1822 */
1823 inactive_throttled = FALSE;
1824
1825 if (m->dirty || m->precious) {
1826 if (object->internal) {
1827 if (VM_PAGE_Q_THROTTLED(iq))
1828 inactive_throttled = TRUE;
1829 } else if (VM_PAGE_Q_THROTTLED(eq)) {
1830 inactive_throttled = TRUE;
1831 }
1832 }
1833 if (inactive_throttled == TRUE) {
1834 throttle_inactive:
1835 if (!IP_VALID(memory_manager_default) &&
1836 object->internal &&
1837 (object->purgable == VM_PURGABLE_DENY ||
1838 object->purgable == VM_PURGABLE_NONVOLATILE ||
1839 object->purgable == VM_PURGABLE_VOLATILE )) {
1840 queue_enter(&vm_page_queue_throttled, m,
1841 vm_page_t, pageq);
1842 m->throttled = TRUE;
1843 vm_page_throttled_count++;
1844 } else {
1845 if (m->zero_fill) {
1846 queue_enter(&vm_page_queue_zf, m,
1847 vm_page_t, pageq);
1848 vm_zf_queue_count++;
1849 } else
1850 queue_enter(&vm_page_queue_inactive, m,
1851 vm_page_t, pageq);
1852 m->inactive = TRUE;
1853 if (!m->fictitious) {
1854 vm_page_inactive_count++;
1855 token_new_pagecount++;
1856 }
1857 }
1858 vm_pageout_scan_inactive_throttled++;
1859 goto done_with_inactivepage;
1860 }
1861
1862 /*
1863 * we've got a page that we can steal...
1864 * eliminate all mappings and make sure
1865 * we have the up-to-date modified state
1866 * first take the page BUSY, so that no new
1867 * mappings can be made
1868 */
1869 m->busy = TRUE;
1870
1871 /*
1872 * if we need to do a pmap_disconnect then we
1873 * need to re-evaluate m->dirty since the pmap_disconnect
1874 * provides the true state atomically... the
1875 * page was still mapped up to the pmap_disconnect
1876 * and may have been dirtied at the last microsecond
1877 *
1878 * we also check for the page being referenced 'late'
1879 * if it was, we first need to do a WAKEUP_DONE on it
1880 * since we already set m->busy = TRUE, before
1881 * going off to reactivate it
1882 *
1883 * Note that if 'pmapped' is FALSE then the page is not
1884 * and has not been in any map, so there is no point calling
1885 * pmap_disconnect(). m->dirty and/or m->reference could
1886 * have been set in anticipation of likely usage of the page.
1887 */
1888 if (m->pmapped == TRUE) {
1889 refmod_state = pmap_disconnect(m->phys_page);
1890
1891 if (refmod_state & VM_MEM_MODIFIED)
1892 m->dirty = TRUE;
1893 if (refmod_state & VM_MEM_REFERENCED) {
1894
1895 /* If m->reference is already set, this page must have
1896 * already failed the reactivate_limit test, so don't
1897 * bump the counts twice.
1898 */
1899 if ( ! m->reference ) {
1900 m->reference = TRUE;
1901 if (forced_reclaim ||
1902 ++reactivated_this_call >= reactivate_limit)
1903 vm_pageout_reactivation_limit_exceeded++;
1904 else {
1905 PAGE_WAKEUP_DONE(m);
1906 goto reactivate_page;
1907 }
1908 }
1909 }
1910 }
1911 /*
1912 * reset our count of pages that have been reclaimed
1913 * since the last page was 'stolen'
1914 */
1915 inactive_reclaim_run = 0;
1916
1917 /*
1918 * If it's clean and not precious, we can free the page.
1919 */
1920 if (!m->dirty && !m->precious) {
1921 vm_pageout_inactive_clean++;
1922 goto reclaim_page;
1923 }
1924
1925 /*
1926 * The page may have been dirtied since the last check
1927 * for a throttled target queue (which may have been skipped
1928 * if the page was clean then). With the dirty page
1929 * disconnected here, we can make one final check.
1930 */
1931 {
1932 boolean_t disconnect_throttled = FALSE;
1933 if (object->internal) {
1934 if (VM_PAGE_Q_THROTTLED(iq))
1935 disconnect_throttled = TRUE;
1936 } else if (VM_PAGE_Q_THROTTLED(eq)) {
1937 disconnect_throttled = TRUE;
1938 }
1939
1940 if (disconnect_throttled == TRUE) {
1941 PAGE_WAKEUP_DONE(m);
1942 goto throttle_inactive;
1943 }
1944 }
1945
1946 vm_pageout_cluster(m);
1947
1948 vm_pageout_inactive_dirty++;
1949
1950 inactive_burst_count = 0;
1951
1952 done_with_inactivepage:
1953 if (delayed_unlock++ > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT || try_failed == TRUE) {
1954
1955 if (object != NULL) {
1956 vm_object_unlock(object);
1957 object = NULL;
1958 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1959 }
1960 if (local_freeq) {
1961 vm_page_free_list(local_freeq);
1962
1963 local_freeq = NULL;
1964 local_freed = 0;
1965 }
1966 mutex_yield(&vm_page_queue_lock);
1967
1968 delayed_unlock = 1;
1969 }
1970 /*
1971 * back to top of pageout scan loop
1972 */
1973 }
1974 }
1975
1976
1977 int vm_page_free_count_init;
1978
1979 void
1980 vm_page_free_reserve(
1981 int pages)
1982 {
1983 int free_after_reserve;
1984
1985 vm_page_free_reserved += pages;
1986
1987 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
1988
1989 vm_page_free_min = vm_page_free_reserved +
1990 VM_PAGE_FREE_MIN(free_after_reserve);
1991
1992 if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
1993 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
1994
1995 vm_page_free_target = vm_page_free_reserved +
1996 VM_PAGE_FREE_TARGET(free_after_reserve);
1997
1998 if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
1999 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
2000
2001 if (vm_page_free_target < vm_page_free_min + 5)
2002 vm_page_free_target = vm_page_free_min + 5;
2003
2004 }
2005
2006 /*
2007 * vm_pageout is the high level pageout daemon.
2008 */
2009
2010 void
2011 vm_pageout_continue(void)
2012 {
2013 DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
2014 vm_pageout_scan_event_counter++;
2015 vm_pageout_scan();
2016 /* we hold vm_page_queue_free_lock now */
2017 assert(vm_page_free_wanted == 0);
2018 assert(vm_page_free_wanted_privileged == 0);
2019 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
2020 mutex_unlock(&vm_page_queue_free_lock);
2021
2022 counter(c_vm_pageout_block++);
2023 thread_block((thread_continue_t)vm_pageout_continue);
2024 /*NOTREACHED*/
2025 }
2026
2027
2028 /*
2029 * must be called with the
2030 * queues and object locks held
2031 */
2032 static void
2033 vm_pageout_queue_steal(vm_page_t m)
2034 {
2035 struct vm_pageout_queue *q;
2036
2037 if (m->object->internal == TRUE)
2038 q = &vm_pageout_queue_internal;
2039 else
2040 q = &vm_pageout_queue_external;
2041
2042 m->laundry = FALSE;
2043 m->pageout_queue = FALSE;
2044 queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
2045
2046 m->pageq.next = NULL;
2047 m->pageq.prev = NULL;
2048
2049 vm_object_paging_end(m->object);
2050
2051 q->pgo_laundry--;
2052 }
2053
2054
2055 #ifdef FAKE_DEADLOCK
2056
2057 #define FAKE_COUNT 5000
2058
2059 int internal_count = 0;
2060 int fake_deadlock = 0;
2061
2062 #endif
2063
2064 static void
2065 vm_pageout_iothread_continue(struct vm_pageout_queue *q)
2066 {
2067 vm_page_t m = NULL;
2068 vm_object_t object;
2069 boolean_t need_wakeup;
2070 memory_object_t pager;
2071 thread_t self = current_thread();
2072
2073 if ((vm_pageout_internal_iothread != THREAD_NULL)
2074 && (self == vm_pageout_external_iothread )
2075 && (self->options & TH_OPT_VMPRIV))
2076 self->options &= ~TH_OPT_VMPRIV;
2077
2078 vm_page_lockspin_queues();
2079
2080 while ( !queue_empty(&q->pgo_pending) ) {
2081
2082 q->pgo_busy = TRUE;
2083 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
2084 m->pageout_queue = FALSE;
2085 vm_page_unlock_queues();
2086
2087 m->pageq.next = NULL;
2088 m->pageq.prev = NULL;
2089 #ifdef FAKE_DEADLOCK
2090 if (q == &vm_pageout_queue_internal) {
2091 vm_offset_t addr;
2092 int pg_count;
2093
2094 internal_count++;
2095
2096 if ((internal_count == FAKE_COUNT)) {
2097
2098 pg_count = vm_page_free_count + vm_page_free_reserved;
2099
2100 if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) {
2101 kmem_free(kernel_map, addr, PAGE_SIZE * pg_count);
2102 }
2103 internal_count = 0;
2104 fake_deadlock++;
2105 }
2106 }
2107 #endif
2108 object = m->object;
2109
2110 vm_object_lock(object);
2111
2112 if (!object->pager_initialized) {
2113
2114 /*
2115 * If there is no memory object for the page, create
2116 * one and hand it to the default pager.
2117 */
2118
2119 if (!object->pager_initialized)
2120 vm_object_collapse(object,
2121 (vm_object_offset_t) 0,
2122 TRUE);
2123 if (!object->pager_initialized)
2124 vm_object_pager_create(object);
2125 if (!object->pager_initialized) {
2126 /*
2127 * Still no pager for the object.
2128 * Reactivate the page.
2129 *
2130 * Should only happen if there is no
2131 * default pager.
2132 */
2133 m->list_req_pending = FALSE;
2134 m->cleaning = FALSE;
2135 m->pageout = FALSE;
2136
2137 vm_page_lockspin_queues();
2138 vm_page_unwire(m);
2139 vm_pageout_throttle_up(m);
2140 vm_pageout_dirty_no_pager++;
2141 vm_page_activate(m);
2142 vm_page_unlock_queues();
2143
2144 /*
2145 * And we are done with it.
2146 */
2147 PAGE_WAKEUP_DONE(m);
2148
2149 vm_object_paging_end(object);
2150 vm_object_unlock(object);
2151
2152 vm_page_lockspin_queues();
2153 continue;
2154 }
2155 }
2156 pager = object->pager;
2157 if (pager == MEMORY_OBJECT_NULL) {
2158 /*
2159 * This pager has been destroyed by either
2160 * memory_object_destroy or vm_object_destroy, and
2161 * so there is nowhere for the page to go.
2162 * Just free the page... VM_PAGE_FREE takes
2163 * care of cleaning up all the state...
2164 * including doing the vm_pageout_throttle_up
2165 */
2166
2167 VM_PAGE_FREE(m);
2168
2169 vm_object_paging_end(object);
2170 vm_object_unlock(object);
2171
2172 vm_page_lockspin_queues();
2173 continue;
2174 }
2175 vm_object_unlock(object);
2176 /*
2177 * we expect the paging_in_progress reference to have
2178 * already been taken on the object before it was added
2179 * to the appropriate pageout I/O queue... this will
2180 * keep the object from being terminated and/or the
2181 * paging_offset from changing until the I/O has
2182 * completed... therefore no need to lock the object to
2183 * pull the paging_offset from it.
2184 *
2185 * Send the data to the pager.
2186 * any pageout clustering happens there
2187 */
2188 memory_object_data_return(pager,
2189 m->offset + object->paging_offset,
2190 PAGE_SIZE,
2191 NULL,
2192 NULL,
2193 FALSE,
2194 FALSE,
2195 0);
2196
2197 vm_object_lock(object);
2198 vm_object_paging_end(object);
2199 vm_object_unlock(object);
2200
2201 vm_page_lockspin_queues();
2202 }
2203 assert_wait((event_t) q, THREAD_UNINT);
2204
2205
2206 if (q->pgo_throttled == TRUE && !VM_PAGE_Q_THROTTLED(q)) {
2207 q->pgo_throttled = FALSE;
2208 need_wakeup = TRUE;
2209 } else
2210 need_wakeup = FALSE;
2211
2212 q->pgo_busy = FALSE;
2213 q->pgo_idle = TRUE;
2214 vm_page_unlock_queues();
2215
2216 if (need_wakeup == TRUE)
2217 thread_wakeup((event_t) &q->pgo_laundry);
2218
2219 thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) &q->pgo_pending);
2220 /*NOTREACHED*/
2221 }
2222
2223
2224 static void
2225 vm_pageout_iothread_external(void)
2226 {
2227 thread_t self = current_thread();
2228
2229 self->options |= TH_OPT_VMPRIV;
2230
2231 vm_pageout_iothread_continue(&vm_pageout_queue_external);
2232 /*NOTREACHED*/
2233 }
2234
2235
2236 static void
2237 vm_pageout_iothread_internal(void)
2238 {
2239 thread_t self = current_thread();
2240
2241 self->options |= TH_OPT_VMPRIV;
2242
2243 vm_pageout_iothread_continue(&vm_pageout_queue_internal);
2244 /*NOTREACHED*/
2245 }
2246
2247 static void
2248 vm_pageout_garbage_collect(int collect)
2249 {
2250 if (collect) {
2251 stack_collect();
2252
2253 /*
2254 * consider_zone_gc should be last, because the other operations
2255 * might return memory to zones.
2256 */
2257 consider_machine_collect();
2258 consider_zone_gc();
2259
2260 consider_machine_adjust();
2261 }
2262
2263 assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
2264
2265 thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
2266 /*NOTREACHED*/
2267 }
2268
2269
2270
2271 void
2272 vm_pageout(void)
2273 {
2274 thread_t self = current_thread();
2275 thread_t thread;
2276 kern_return_t result;
2277 spl_t s;
2278
2279 /*
2280 * Set thread privileges.
2281 */
2282 s = splsched();
2283 thread_lock(self);
2284 self->priority = BASEPRI_PREEMPT - 1;
2285 set_sched_pri(self, self->priority);
2286 thread_unlock(self);
2287
2288 if (!self->reserved_stack)
2289 self->reserved_stack = self->kernel_stack;
2290
2291 splx(s);
2292
2293 /*
2294 * Initialize some paging parameters.
2295 */
2296
2297 if (vm_pageout_idle_wait == 0)
2298 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
2299
2300 if (vm_pageout_burst_wait == 0)
2301 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
2302
2303 if (vm_pageout_empty_wait == 0)
2304 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
2305
2306 if (vm_pageout_deadlock_wait == 0)
2307 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
2308
2309 if (vm_pageout_deadlock_relief == 0)
2310 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
2311
2312 if (vm_pageout_inactive_relief == 0)
2313 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
2314
2315 if (vm_pageout_burst_active_throttle == 0)
2316 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
2317
2318 if (vm_pageout_burst_inactive_throttle == 0)
2319 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
2320
2321 /*
2322 * Set kernel task to low backing store privileged
2323 * status
2324 */
2325 task_lock(kernel_task);
2326 kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
2327 task_unlock(kernel_task);
2328
2329 vm_page_free_count_init = vm_page_free_count;
2330
2331 /*
2332 * even if we've already called vm_page_free_reserve
2333 * call it again here to insure that the targets are
2334 * accurately calculated (it uses vm_page_free_count_init)
2335 * calling it with an arg of 0 will not change the reserve
2336 * but will re-calculate free_min and free_target
2337 */
2338 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
2339 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
2340 } else
2341 vm_page_free_reserve(0);
2342
2343
2344 queue_init(&vm_pageout_queue_external.pgo_pending);
2345 vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2346 vm_pageout_queue_external.pgo_laundry = 0;
2347 vm_pageout_queue_external.pgo_idle = FALSE;
2348 vm_pageout_queue_external.pgo_busy = FALSE;
2349 vm_pageout_queue_external.pgo_throttled = FALSE;
2350
2351 queue_init(&vm_pageout_queue_internal.pgo_pending);
2352 vm_pageout_queue_internal.pgo_maxlaundry = 0;
2353 vm_pageout_queue_internal.pgo_laundry = 0;
2354 vm_pageout_queue_internal.pgo_idle = FALSE;
2355 vm_pageout_queue_internal.pgo_busy = FALSE;
2356 vm_pageout_queue_internal.pgo_throttled = FALSE;
2357
2358
2359 /* internal pageout thread started when default pager registered first time */
2360 /* external pageout and garbage collection threads started here */
2361
2362 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
2363 BASEPRI_PREEMPT - 1,
2364 &vm_pageout_external_iothread);
2365 if (result != KERN_SUCCESS)
2366 panic("vm_pageout_iothread_external: create failed");
2367
2368 thread_deallocate(vm_pageout_external_iothread);
2369
2370 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
2371 MINPRI_KERNEL,
2372 &thread);
2373 if (result != KERN_SUCCESS)
2374 panic("vm_pageout_garbage_collect: create failed");
2375
2376 thread_deallocate(thread);
2377
2378 vm_object_reaper_init();
2379
2380
2381 vm_pageout_continue();
2382
2383 /*
2384 * Unreached code!
2385 *
2386 * The vm_pageout_continue() call above never returns, so the code below is never
2387 * executed. We take advantage of this to declare several DTrace VM related probe
2388 * points that our kernel doesn't have an analog for. These are probe points that
2389 * exist in Solaris and are in the DTrace documentation, so people may have written
2390 * scripts that use them. Declaring the probe points here means their scripts will
2391 * compile and execute which we want for portability of the scripts, but since this
2392 * section of code is never reached, the probe points will simply never fire. Yes,
2393 * this is basically a hack. The problem is the DTrace probe points were chosen with
2394 * Solaris specific VM events in mind, not portability to different VM implementations.
2395 */
2396
2397 DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
2398 DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
2399 DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
2400 DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
2401 DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
2402 DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
2403 DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
2404 /*NOTREACHED*/
2405 }
2406
2407 kern_return_t
2408 vm_pageout_internal_start(void)
2409 {
2410 kern_return_t result;
2411
2412 vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2413 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, NULL, BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread);
2414 if (result == KERN_SUCCESS)
2415 thread_deallocate(vm_pageout_internal_iothread);
2416 return result;
2417 }
2418
2419 #define UPL_DELAYED_UNLOCK_LIMIT (MAX_UPL_TRANSFER / 2)
2420
2421 static upl_t
2422 upl_create(int type, int flags, upl_size_t size)
2423 {
2424 upl_t upl;
2425 int page_field_size = 0;
2426 int upl_flags = 0;
2427 int upl_size = sizeof(struct upl);
2428
2429 if (type & UPL_CREATE_LITE) {
2430 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2431 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2432
2433 upl_flags |= UPL_LITE;
2434 }
2435 if (type & UPL_CREATE_INTERNAL) {
2436 upl_size += sizeof(struct upl_page_info) * (size/PAGE_SIZE);
2437
2438 upl_flags |= UPL_INTERNAL;
2439 }
2440 upl = (upl_t)kalloc(upl_size + page_field_size);
2441
2442 if (page_field_size)
2443 bzero((char *)upl + upl_size, page_field_size);
2444
2445 upl->flags = upl_flags | flags;
2446 upl->src_object = NULL;
2447 upl->kaddr = (vm_offset_t)0;
2448 upl->size = 0;
2449 upl->map_object = NULL;
2450 upl->ref_count = 1;
2451 upl->highest_page = 0;
2452 upl_lock_init(upl);
2453 #ifdef UPL_DEBUG
2454 upl->ubc_alias1 = 0;
2455 upl->ubc_alias2 = 0;
2456 #endif /* UPL_DEBUG */
2457 return(upl);
2458 }
2459
2460 static void
2461 upl_destroy(upl_t upl)
2462 {
2463 int page_field_size; /* bit field in word size buf */
2464 int size;
2465
2466 #ifdef UPL_DEBUG
2467 {
2468 vm_object_t object;
2469
2470 if (upl->flags & UPL_SHADOWED) {
2471 object = upl->map_object->shadow;
2472 } else {
2473 object = upl->map_object;
2474 }
2475 vm_object_lock(object);
2476 queue_remove(&object->uplq, upl, upl_t, uplq);
2477 vm_object_unlock(object);
2478 }
2479 #endif /* UPL_DEBUG */
2480 /*
2481 * drop a reference on the map_object whether or
2482 * not a pageout object is inserted
2483 */
2484 if (upl->flags & UPL_SHADOWED)
2485 vm_object_deallocate(upl->map_object);
2486
2487 if (upl->flags & UPL_DEVICE_MEMORY)
2488 size = PAGE_SIZE;
2489 else
2490 size = upl->size;
2491 page_field_size = 0;
2492
2493 if (upl->flags & UPL_LITE) {
2494 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2495 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2496 }
2497 if (upl->flags & UPL_INTERNAL) {
2498 kfree(upl,
2499 sizeof(struct upl) +
2500 (sizeof(struct upl_page_info) * (size/PAGE_SIZE))
2501 + page_field_size);
2502 } else {
2503 kfree(upl, sizeof(struct upl) + page_field_size);
2504 }
2505 }
2506
2507 void uc_upl_dealloc(upl_t upl);
2508 __private_extern__ void
2509 uc_upl_dealloc(upl_t upl)
2510 {
2511 if (--upl->ref_count == 0)
2512 upl_destroy(upl);
2513 }
2514
2515 void
2516 upl_deallocate(upl_t upl)
2517 {
2518 if (--upl->ref_count == 0)
2519 upl_destroy(upl);
2520 }
2521
2522 /*
2523 * Statistics about UPL enforcement of copy-on-write obligations.
2524 */
2525 unsigned long upl_cow = 0;
2526 unsigned long upl_cow_again = 0;
2527 unsigned long upl_cow_contiguous = 0;
2528 unsigned long upl_cow_pages = 0;
2529 unsigned long upl_cow_again_pages = 0;
2530 unsigned long upl_cow_contiguous_pages = 0;
2531
2532 /*
2533 * Routine: vm_object_upl_request
2534 * Purpose:
2535 * Cause the population of a portion of a vm_object.
2536 * Depending on the nature of the request, the pages
2537 * returned may be contain valid data or be uninitialized.
2538 * A page list structure, listing the physical pages
2539 * will be returned upon request.
2540 * This function is called by the file system or any other
2541 * supplier of backing store to a pager.
2542 * IMPORTANT NOTE: The caller must still respect the relationship
2543 * between the vm_object and its backing memory object. The
2544 * caller MUST NOT substitute changes in the backing file
2545 * without first doing a memory_object_lock_request on the
2546 * target range unless it is know that the pages are not
2547 * shared with another entity at the pager level.
2548 * Copy_in_to:
2549 * if a page list structure is present
2550 * return the mapped physical pages, where a
2551 * page is not present, return a non-initialized
2552 * one. If the no_sync bit is turned on, don't
2553 * call the pager unlock to synchronize with other
2554 * possible copies of the page. Leave pages busy
2555 * in the original object, if a page list structure
2556 * was specified. When a commit of the page list
2557 * pages is done, the dirty bit will be set for each one.
2558 * Copy_out_from:
2559 * If a page list structure is present, return
2560 * all mapped pages. Where a page does not exist
2561 * map a zero filled one. Leave pages busy in
2562 * the original object. If a page list structure
2563 * is not specified, this call is a no-op.
2564 *
2565 * Note: access of default pager objects has a rather interesting
2566 * twist. The caller of this routine, presumably the file system
2567 * page cache handling code, will never actually make a request
2568 * against a default pager backed object. Only the default
2569 * pager will make requests on backing store related vm_objects
2570 * In this way the default pager can maintain the relationship
2571 * between backing store files (abstract memory objects) and
2572 * the vm_objects (cache objects), they support.
2573 *
2574 */
2575
2576 __private_extern__ kern_return_t
2577 vm_object_upl_request(
2578 vm_object_t object,
2579 vm_object_offset_t offset,
2580 upl_size_t size,
2581 upl_t *upl_ptr,
2582 upl_page_info_array_t user_page_list,
2583 unsigned int *page_list_count,
2584 int cntrl_flags)
2585 {
2586 vm_page_t dst_page = VM_PAGE_NULL;
2587 vm_object_offset_t dst_offset;
2588 upl_size_t xfer_size;
2589 boolean_t dirty;
2590 boolean_t hw_dirty;
2591 upl_t upl = NULL;
2592 unsigned int entry;
2593 #if MACH_CLUSTER_STATS
2594 boolean_t encountered_lrp = FALSE;
2595 #endif
2596 vm_page_t alias_page = NULL;
2597 int refmod_state = 0;
2598 wpl_array_t lite_list = NULL;
2599 vm_object_t last_copy_object;
2600 int delayed_unlock = 0;
2601 int j;
2602
2603 if (cntrl_flags & ~UPL_VALID_FLAGS) {
2604 /*
2605 * For forward compatibility's sake,
2606 * reject any unknown flag.
2607 */
2608 return KERN_INVALID_VALUE;
2609 }
2610 if ( (!object->internal) && (object->paging_offset != 0) )
2611 panic("vm_object_upl_request: external object with non-zero paging offset\n");
2612 if (object->phys_contiguous)
2613 panic("vm_object_upl_request: contiguous object specified\n");
2614
2615
2616 if ((size / PAGE_SIZE) > MAX_UPL_SIZE)
2617 size = MAX_UPL_SIZE * PAGE_SIZE;
2618
2619 if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
2620 *page_list_count = MAX_UPL_SIZE;
2621
2622 if (cntrl_flags & UPL_SET_INTERNAL) {
2623 if (cntrl_flags & UPL_SET_LITE) {
2624
2625 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, 0, size);
2626
2627 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
2628 lite_list = (wpl_array_t)
2629 (((uintptr_t)user_page_list) +
2630 ((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
2631 } else {
2632 upl = upl_create(UPL_CREATE_INTERNAL, 0, size);
2633
2634 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
2635 }
2636 } else {
2637 if (cntrl_flags & UPL_SET_LITE) {
2638
2639 upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE, 0, size);
2640
2641 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
2642 } else {
2643 upl = upl_create(UPL_CREATE_EXTERNAL, 0, size);
2644 }
2645 }
2646 *upl_ptr = upl;
2647
2648 if (user_page_list)
2649 user_page_list[0].device = FALSE;
2650
2651 if (cntrl_flags & UPL_SET_LITE) {
2652 upl->map_object = object;
2653 } else {
2654 upl->map_object = vm_object_allocate(size);
2655 /*
2656 * No neeed to lock the new object: nobody else knows
2657 * about it yet, so it's all ours so far.
2658 */
2659 upl->map_object->shadow = object;
2660 upl->map_object->pageout = TRUE;
2661 upl->map_object->can_persist = FALSE;
2662 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2663 upl->map_object->shadow_offset = offset;
2664 upl->map_object->wimg_bits = object->wimg_bits;
2665
2666 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2667
2668 upl->flags |= UPL_SHADOWED;
2669 }
2670 /*
2671 * ENCRYPTED SWAP:
2672 * Just mark the UPL as "encrypted" here.
2673 * We'll actually encrypt the pages later,
2674 * in upl_encrypt(), when the caller has
2675 * selected which pages need to go to swap.
2676 */
2677 if (cntrl_flags & UPL_ENCRYPT)
2678 upl->flags |= UPL_ENCRYPTED;
2679
2680 if (cntrl_flags & UPL_FOR_PAGEOUT)
2681 upl->flags |= UPL_PAGEOUT;
2682
2683 vm_object_lock(object);
2684 vm_object_paging_begin(object);
2685
2686 /*
2687 * we can lock in the paging_offset once paging_in_progress is set
2688 */
2689 upl->size = size;
2690 upl->offset = offset + object->paging_offset;
2691
2692 #ifdef UPL_DEBUG
2693 queue_enter(&object->uplq, upl, upl_t, uplq);
2694 #endif /* UPL_DEBUG */
2695
2696 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
2697 /*
2698 * Honor copy-on-write obligations
2699 *
2700 * The caller is gathering these pages and
2701 * might modify their contents. We need to
2702 * make sure that the copy object has its own
2703 * private copies of these pages before we let
2704 * the caller modify them.
2705 */
2706 vm_object_update(object,
2707 offset,
2708 size,
2709 NULL,
2710 NULL,
2711 FALSE, /* should_return */
2712 MEMORY_OBJECT_COPY_SYNC,
2713 VM_PROT_NO_CHANGE);
2714 upl_cow++;
2715 upl_cow_pages += size >> PAGE_SHIFT;
2716 }
2717 /*
2718 * remember which copy object we synchronized with
2719 */
2720 last_copy_object = object->copy;
2721 entry = 0;
2722
2723 xfer_size = size;
2724 dst_offset = offset;
2725
2726 while (xfer_size) {
2727
2728 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
2729 if (delayed_unlock) {
2730 delayed_unlock = 0;
2731 vm_page_unlock_queues();
2732 }
2733 vm_object_unlock(object);
2734 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2735 goto relock;
2736 }
2737 if (delayed_unlock == 0) {
2738 /*
2739 * pageout_scan takes the vm_page_lock_queues first
2740 * then tries for the object lock... to avoid what
2741 * is effectively a lock inversion, we'll go to the
2742 * trouble of taking them in that same order... otherwise
2743 * if this object contains the majority of the pages resident
2744 * in the UBC (or a small set of large objects actively being
2745 * worked on contain the majority of the pages), we could
2746 * cause the pageout_scan thread to 'starve' in its attempt
2747 * to find pages to move to the free queue, since it has to
2748 * successfully acquire the object lock of any candidate page
2749 * before it can steal/clean it.
2750 */
2751 vm_object_unlock(object);
2752 relock:
2753 for (j = 0; ; j++) {
2754 vm_page_lock_queues();
2755
2756 if (vm_object_lock_try(object))
2757 break;
2758 vm_page_unlock_queues();
2759 mutex_pause(j);
2760 }
2761 delayed_unlock = 1;
2762 }
2763 if (cntrl_flags & UPL_COPYOUT_FROM) {
2764 upl->flags |= UPL_PAGE_SYNC_DONE;
2765
2766 if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
2767 dst_page->fictitious ||
2768 dst_page->absent ||
2769 dst_page->error ||
2770 (dst_page->wire_count && !dst_page->pageout && !dst_page->list_req_pending)) {
2771
2772 if (user_page_list)
2773 user_page_list[entry].phys_addr = 0;
2774
2775 goto delay_unlock_queues;
2776 }
2777 /*
2778 * grab this up front...
2779 * a high percentange of the time we're going to
2780 * need the hardware modification state a bit later
2781 * anyway... so we can eliminate an extra call into
2782 * the pmap layer by grabbing it here and recording it
2783 */
2784 if (dst_page->pmapped)
2785 refmod_state = pmap_get_refmod(dst_page->phys_page);
2786 else
2787 refmod_state = 0;
2788
2789 if ( (refmod_state & VM_MEM_REFERENCED) && dst_page->inactive ) {
2790 /*
2791 * page is on inactive list and referenced...
2792 * reactivate it now... this gets it out of the
2793 * way of vm_pageout_scan which would have to
2794 * reactivate it upon tripping over it
2795 */
2796 vm_page_activate(dst_page);
2797 VM_STAT_INCR(reactivations);
2798 }
2799 if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
2800 /*
2801 * we're only asking for DIRTY pages to be returned
2802 */
2803 if (dst_page->list_req_pending || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
2804 /*
2805 * if we were the page stolen by vm_pageout_scan to be
2806 * cleaned (as opposed to a buddy being clustered in
2807 * or this request is not being driven by a PAGEOUT cluster
2808 * then we only need to check for the page being dirty or
2809 * precious to decide whether to return it
2810 */
2811 if (dst_page->dirty || dst_page->precious || (refmod_state & VM_MEM_MODIFIED))
2812 goto check_busy;
2813 goto dont_return;
2814 }
2815 /*
2816 * this is a request for a PAGEOUT cluster and this page
2817 * is merely along for the ride as a 'buddy'... not only
2818 * does it have to be dirty to be returned, but it also
2819 * can't have been referenced recently... note that we've
2820 * already filtered above based on whether this page is
2821 * currently on the inactive queue or it meets the page
2822 * ticket (generation count) check
2823 */
2824 if ( !(refmod_state & VM_MEM_REFERENCED) &&
2825 ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) {
2826 goto check_busy;
2827 }
2828 dont_return:
2829 /*
2830 * if we reach here, we're not to return
2831 * the page... go on to the next one
2832 */
2833 if (user_page_list)
2834 user_page_list[entry].phys_addr = 0;
2835
2836 goto delay_unlock_queues;
2837 }
2838 check_busy:
2839 if (dst_page->busy && (!(dst_page->list_req_pending && dst_page->pageout))) {
2840 if (cntrl_flags & UPL_NOBLOCK) {
2841 if (user_page_list)
2842 user_page_list[entry].phys_addr = 0;
2843
2844 goto delay_unlock_queues;
2845 }
2846 /*
2847 * someone else is playing with the
2848 * page. We will have to wait.
2849 */
2850 delayed_unlock = 0;
2851 vm_page_unlock_queues();
2852
2853 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
2854
2855 continue;
2856 }
2857 /*
2858 * Someone else already cleaning the page?
2859 */
2860 if ((dst_page->cleaning || dst_page->absent || dst_page->wire_count != 0) && !dst_page->list_req_pending) {
2861 if (user_page_list)
2862 user_page_list[entry].phys_addr = 0;
2863
2864 goto delay_unlock_queues;
2865 }
2866 /*
2867 * ENCRYPTED SWAP:
2868 * The caller is gathering this page and might
2869 * access its contents later on. Decrypt the
2870 * page before adding it to the UPL, so that
2871 * the caller never sees encrypted data.
2872 */
2873 if (! (cntrl_flags & UPL_ENCRYPT) && dst_page->encrypted) {
2874 int was_busy;
2875
2876 delayed_unlock = 0;
2877 vm_page_unlock_queues();
2878 /*
2879 * save the current state of busy
2880 * mark page as busy while decrypt
2881 * is in progress since it will drop
2882 * the object lock...
2883 */
2884 was_busy = dst_page->busy;
2885 dst_page->busy = TRUE;
2886
2887 vm_page_decrypt(dst_page, 0);
2888 vm_page_decrypt_for_upl_counter++;
2889 /*
2890 * restore to original busy state
2891 */
2892 dst_page->busy = was_busy;
2893
2894 vm_page_lock_queues();
2895 delayed_unlock = 1;
2896 }
2897 if (dst_page->pageout_queue == TRUE)
2898 /*
2899 * we've buddied up a page for a clustered pageout
2900 * that has already been moved to the pageout
2901 * queue by pageout_scan... we need to remove
2902 * it from the queue and drop the laundry count
2903 * on that queue
2904 */
2905 vm_pageout_queue_steal(dst_page);
2906 #if MACH_CLUSTER_STATS
2907 /*
2908 * pageout statistics gathering. count
2909 * all the pages we will page out that
2910 * were not counted in the initial
2911 * vm_pageout_scan work
2912 */
2913 if (dst_page->list_req_pending)
2914 encountered_lrp = TRUE;
2915 if ((dst_page->dirty || (dst_page->object->internal && dst_page->precious)) && !dst_page->list_req_pending) {
2916 if (encountered_lrp)
2917 CLUSTER_STAT(pages_at_higher_offsets++;)
2918 else
2919 CLUSTER_STAT(pages_at_lower_offsets++;)
2920 }
2921 #endif
2922 /*
2923 * Turn off busy indication on pending
2924 * pageout. Note: we can only get here
2925 * in the request pending case.
2926 */
2927 dst_page->list_req_pending = FALSE;
2928 dst_page->busy = FALSE;
2929
2930 hw_dirty = refmod_state & VM_MEM_MODIFIED;
2931 dirty = hw_dirty ? TRUE : dst_page->dirty;
2932
2933 if (dst_page->phys_page > upl->highest_page)
2934 upl->highest_page = dst_page->phys_page;
2935
2936 if (cntrl_flags & UPL_SET_LITE) {
2937 int pg_num;
2938
2939 pg_num = (dst_offset-offset)/PAGE_SIZE;
2940 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
2941
2942 if (hw_dirty)
2943 pmap_clear_modify(dst_page->phys_page);
2944
2945 /*
2946 * Mark original page as cleaning
2947 * in place.
2948 */
2949 dst_page->cleaning = TRUE;
2950 dst_page->precious = FALSE;
2951 } else {
2952 /*
2953 * use pageclean setup, it is more
2954 * convenient even for the pageout
2955 * cases here
2956 */
2957 vm_object_lock(upl->map_object);
2958 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
2959 vm_object_unlock(upl->map_object);
2960
2961 alias_page->absent = FALSE;
2962 alias_page = NULL;
2963 }
2964 #if MACH_PAGEMAP
2965 /*
2966 * Record that this page has been
2967 * written out
2968 */
2969 vm_external_state_set(object->existence_map, dst_page->offset);
2970 #endif /*MACH_PAGEMAP*/
2971 dst_page->dirty = dirty;
2972
2973 if (!dirty)
2974 dst_page->precious = TRUE;
2975
2976 if (dst_page->pageout)
2977 dst_page->busy = TRUE;
2978
2979 if ( (cntrl_flags & UPL_ENCRYPT) ) {
2980 /*
2981 * ENCRYPTED SWAP:
2982 * We want to deny access to the target page
2983 * because its contents are about to be
2984 * encrypted and the user would be very
2985 * confused to see encrypted data instead
2986 * of their data.
2987 * We also set "encrypted_cleaning" to allow
2988 * vm_pageout_scan() to demote that page
2989 * from "adjacent/clean-in-place" to
2990 * "target/clean-and-free" if it bumps into
2991 * this page during its scanning while we're
2992 * still processing this cluster.
2993 */
2994 dst_page->busy = TRUE;
2995 dst_page->encrypted_cleaning = TRUE;
2996 }
2997 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
2998 /*
2999 * deny access to the target page
3000 * while it is being worked on
3001 */
3002 if ((!dst_page->pageout) && (dst_page->wire_count == 0)) {
3003 dst_page->busy = TRUE;
3004 dst_page->pageout = TRUE;
3005 vm_page_wire(dst_page);
3006 }
3007 }
3008 } else {
3009 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
3010 /*
3011 * Honor copy-on-write obligations
3012 *
3013 * The copy object has changed since we
3014 * last synchronized for copy-on-write.
3015 * Another copy object might have been
3016 * inserted while we released the object's
3017 * lock. Since someone could have seen the
3018 * original contents of the remaining pages
3019 * through that new object, we have to
3020 * synchronize with it again for the remaining
3021 * pages only. The previous pages are "busy"
3022 * so they can not be seen through the new
3023 * mapping. The new mapping will see our
3024 * upcoming changes for those previous pages,
3025 * but that's OK since they couldn't see what
3026 * was there before. It's just a race anyway
3027 * and there's no guarantee of consistency or
3028 * atomicity. We just don't want new mappings
3029 * to see both the *before* and *after* pages.
3030 */
3031 if (object->copy != VM_OBJECT_NULL) {
3032 delayed_unlock = 0;
3033 vm_page_unlock_queues();
3034
3035 vm_object_update(
3036 object,
3037 dst_offset,/* current offset */
3038 xfer_size, /* remaining size */
3039 NULL,
3040 NULL,
3041 FALSE, /* should_return */
3042 MEMORY_OBJECT_COPY_SYNC,
3043 VM_PROT_NO_CHANGE);
3044
3045 upl_cow_again++;
3046 upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
3047
3048 vm_page_lock_queues();
3049 delayed_unlock = 1;
3050 }
3051 /*
3052 * remember the copy object we synced with
3053 */
3054 last_copy_object = object->copy;
3055 }
3056 dst_page = vm_page_lookup(object, dst_offset);
3057
3058 if (dst_page != VM_PAGE_NULL) {
3059 if ( !(dst_page->list_req_pending) ) {
3060 if ((cntrl_flags & UPL_RET_ONLY_ABSENT) && !dst_page->absent) {
3061 /*
3062 * skip over pages already present in the cache
3063 */
3064 if (user_page_list)
3065 user_page_list[entry].phys_addr = 0;
3066
3067 goto delay_unlock_queues;
3068 }
3069 if (dst_page->cleaning) {
3070 /*
3071 * someone else is writing to the page... wait...
3072 */
3073 delayed_unlock = 0;
3074 vm_page_unlock_queues();
3075
3076 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
3077
3078 continue;
3079 }
3080 } else {
3081 if (dst_page->fictitious &&
3082 dst_page->phys_page == vm_page_fictitious_addr) {
3083 assert( !dst_page->speculative);
3084 /*
3085 * dump the fictitious page
3086 */
3087 dst_page->list_req_pending = FALSE;
3088
3089 vm_page_free(dst_page);
3090
3091 dst_page = NULL;
3092 } else if (dst_page->absent) {
3093 /*
3094 * the default_pager case
3095 */
3096 dst_page->list_req_pending = FALSE;
3097 dst_page->busy = FALSE;
3098 }
3099 }
3100 }
3101 if (dst_page == VM_PAGE_NULL) {
3102 if (object->private) {
3103 /*
3104 * This is a nasty wrinkle for users
3105 * of upl who encounter device or
3106 * private memory however, it is
3107 * unavoidable, only a fault can
3108 * resolve the actual backing
3109 * physical page by asking the
3110 * backing device.
3111 */
3112 if (user_page_list)
3113 user_page_list[entry].phys_addr = 0;
3114
3115 goto delay_unlock_queues;
3116 }
3117 /*
3118 * need to allocate a page
3119 */
3120 dst_page = vm_page_grab();
3121
3122 if (dst_page == VM_PAGE_NULL) {
3123 if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
3124 /*
3125 * we don't want to stall waiting for pages to come onto the free list
3126 * while we're already holding absent pages in this UPL
3127 * the caller will deal with the empty slots
3128 */
3129 if (user_page_list)
3130 user_page_list[entry].phys_addr = 0;
3131
3132 goto try_next_page;
3133 }
3134 /*
3135 * no pages available... wait
3136 * then try again for the same
3137 * offset...
3138 */
3139 delayed_unlock = 0;
3140 vm_page_unlock_queues();
3141
3142 vm_object_unlock(object);
3143 VM_PAGE_WAIT();
3144
3145 /*
3146 * pageout_scan takes the vm_page_lock_queues first
3147 * then tries for the object lock... to avoid what
3148 * is effectively a lock inversion, we'll go to the
3149 * trouble of taking them in that same order... otherwise
3150 * if this object contains the majority of the pages resident
3151 * in the UBC (or a small set of large objects actively being
3152 * worked on contain the majority of the pages), we could
3153 * cause the pageout_scan thread to 'starve' in its attempt
3154 * to find pages to move to the free queue, since it has to
3155 * successfully acquire the object lock of any candidate page
3156 * before it can steal/clean it.
3157 */
3158 for (j = 0; ; j++) {
3159 vm_page_lock_queues();
3160
3161 if (vm_object_lock_try(object))
3162 break;
3163 vm_page_unlock_queues();
3164 mutex_pause(j);
3165 }
3166 delayed_unlock = 1;
3167
3168 continue;
3169 }
3170 vm_page_insert_internal(dst_page, object, dst_offset, TRUE);
3171
3172 dst_page->absent = TRUE;
3173 dst_page->busy = FALSE;
3174
3175 if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
3176 /*
3177 * if UPL_RET_ONLY_ABSENT was specified,
3178 * than we're definitely setting up a
3179 * upl for a clustered read/pagein
3180 * operation... mark the pages as clustered
3181 * so upl_commit_range can put them on the
3182 * speculative list
3183 */
3184 dst_page->clustered = TRUE;
3185 }
3186 }
3187 /*
3188 * ENCRYPTED SWAP:
3189 */
3190 if (cntrl_flags & UPL_ENCRYPT) {
3191 /*
3192 * The page is going to be encrypted when we
3193 * get it from the pager, so mark it so.
3194 */
3195 dst_page->encrypted = TRUE;
3196 } else {
3197 /*
3198 * Otherwise, the page will not contain
3199 * encrypted data.
3200 */
3201 dst_page->encrypted = FALSE;
3202 }
3203 dst_page->overwriting = TRUE;
3204
3205 if (dst_page->fictitious) {
3206 panic("need corner case for fictitious page");
3207 }
3208 if (dst_page->busy) {
3209 /*
3210 * someone else is playing with the
3211 * page. We will have to wait.
3212 */
3213 delayed_unlock = 0;
3214 vm_page_unlock_queues();
3215
3216 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
3217
3218 continue;
3219 }
3220 if (dst_page->pmapped) {
3221 if ( !(cntrl_flags & UPL_FILE_IO))
3222 /*
3223 * eliminate all mappings from the
3224 * original object and its prodigy
3225 */
3226 refmod_state = pmap_disconnect(dst_page->phys_page);
3227 else
3228 refmod_state = pmap_get_refmod(dst_page->phys_page);
3229 } else
3230 refmod_state = 0;
3231
3232 hw_dirty = refmod_state & VM_MEM_MODIFIED;
3233 dirty = hw_dirty ? TRUE : dst_page->dirty;
3234
3235 if (cntrl_flags & UPL_SET_LITE) {
3236 int pg_num;
3237
3238 pg_num = (dst_offset-offset)/PAGE_SIZE;
3239 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
3240
3241 if (hw_dirty)
3242 pmap_clear_modify(dst_page->phys_page);
3243
3244 /*
3245 * Mark original page as cleaning
3246 * in place.
3247 */
3248 dst_page->cleaning = TRUE;
3249 dst_page->precious = FALSE;
3250 } else {
3251 /*
3252 * use pageclean setup, it is more
3253 * convenient even for the pageout
3254 * cases here
3255 */
3256 vm_object_lock(upl->map_object);
3257 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
3258 vm_object_unlock(upl->map_object);
3259
3260 alias_page->absent = FALSE;
3261 alias_page = NULL;
3262 }
3263
3264 if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
3265 /*
3266 * clean in place for read implies
3267 * that a write will be done on all
3268 * the pages that are dirty before
3269 * a upl commit is done. The caller
3270 * is obligated to preserve the
3271 * contents of all pages marked dirty
3272 */
3273 upl->flags |= UPL_CLEAR_DIRTY;
3274 }
3275 dst_page->dirty = dirty;
3276
3277 if (!dirty)
3278 dst_page->precious = TRUE;
3279
3280 if (dst_page->wire_count == 0) {
3281 /*
3282 * deny access to the target page while
3283 * it is being worked on
3284 */
3285 dst_page->busy = TRUE;
3286 } else
3287 vm_page_wire(dst_page);
3288
3289 if (dst_page->clustered) {
3290 /*
3291 * expect the page not to be used
3292 * since it's coming in as part
3293 * of a speculative cluster...
3294 * pages that are 'consumed' will
3295 * get a hardware reference
3296 */
3297 dst_page->reference = FALSE;
3298 } else {
3299 /*
3300 * expect the page to be used
3301 */
3302 dst_page->reference = TRUE;
3303 }
3304 dst_page->precious = (cntrl_flags & UPL_PRECIOUS) ? TRUE : FALSE;
3305 }
3306 if (dst_page->phys_page > upl->highest_page)
3307 upl->highest_page = dst_page->phys_page;
3308 if (user_page_list) {
3309 user_page_list[entry].phys_addr = dst_page->phys_page;
3310 user_page_list[entry].dirty = dst_page->dirty;
3311 user_page_list[entry].pageout = dst_page->pageout;
3312 user_page_list[entry].absent = dst_page->absent;
3313 user_page_list[entry].precious = dst_page->precious;
3314
3315 if (dst_page->clustered == TRUE)
3316 user_page_list[entry].speculative = dst_page->speculative;
3317 else
3318 user_page_list[entry].speculative = FALSE;
3319 }
3320 /*
3321 * if UPL_RET_ONLY_ABSENT is set, then
3322 * we are working with a fresh page and we've
3323 * just set the clustered flag on it to
3324 * indicate that it was drug in as part of a
3325 * speculative cluster... so leave it alone
3326 */
3327 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
3328 /*
3329 * someone is explicitly grabbing this page...
3330 * update clustered and speculative state
3331 *
3332 */
3333 VM_PAGE_CONSUME_CLUSTERED(dst_page);
3334 }
3335 delay_unlock_queues:
3336 if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
3337 /*
3338 * pageout_scan takes the vm_page_lock_queues first
3339 * then tries for the object lock... to avoid what
3340 * is effectively a lock inversion, we'll go to the
3341 * trouble of taking them in that same order... otherwise
3342 * if this object contains the majority of the pages resident
3343 * in the UBC (or a small set of large objects actively being
3344 * worked on contain the majority of the pages), we could
3345 * cause the pageout_scan thread to 'starve' in its attempt
3346 * to find pages to move to the free queue, since it has to
3347 * successfully acquire the object lock of any candidate page
3348 * before it can steal/clean it.
3349 */
3350 vm_object_unlock(object);
3351 mutex_yield(&vm_page_queue_lock);
3352
3353 for (j = 0; ; j++) {
3354 if (vm_object_lock_try(object))
3355 break;
3356 vm_page_unlock_queues();
3357 mutex_pause(j);
3358 vm_page_lock_queues();
3359 }
3360 delayed_unlock = 1;
3361 }
3362 try_next_page:
3363 entry++;
3364 dst_offset += PAGE_SIZE_64;
3365 xfer_size -= PAGE_SIZE;
3366 }
3367 if (alias_page != NULL) {
3368 if (delayed_unlock == 0) {
3369 vm_page_lock_queues();
3370 delayed_unlock = 1;
3371 }
3372 vm_page_free(alias_page);
3373 }
3374 if (delayed_unlock)
3375 vm_page_unlock_queues();
3376
3377 if (page_list_count != NULL) {
3378 if (upl->flags & UPL_INTERNAL)
3379 *page_list_count = 0;
3380 else if (*page_list_count > entry)
3381 *page_list_count = entry;
3382 }
3383 vm_object_unlock(object);
3384
3385 return KERN_SUCCESS;
3386 }
3387
3388 /* JMM - Backward compatability for now */
3389 kern_return_t
3390 vm_fault_list_request( /* forward */
3391 memory_object_control_t control,
3392 vm_object_offset_t offset,
3393 upl_size_t size,
3394 upl_t *upl_ptr,
3395 upl_page_info_t **user_page_list_ptr,
3396 unsigned int page_list_count,
3397 int cntrl_flags);
3398 kern_return_t
3399 vm_fault_list_request(
3400 memory_object_control_t control,
3401 vm_object_offset_t offset,
3402 upl_size_t size,
3403 upl_t *upl_ptr,
3404 upl_page_info_t **user_page_list_ptr,
3405 unsigned int page_list_count,
3406 int cntrl_flags)
3407 {
3408 unsigned int local_list_count;
3409 upl_page_info_t *user_page_list;
3410 kern_return_t kr;
3411
3412 if (user_page_list_ptr != NULL) {
3413 local_list_count = page_list_count;
3414 user_page_list = *user_page_list_ptr;
3415 } else {
3416 local_list_count = 0;
3417 user_page_list = NULL;
3418 }
3419 kr = memory_object_upl_request(control,
3420 offset,
3421 size,
3422 upl_ptr,
3423 user_page_list,
3424 &local_list_count,
3425 cntrl_flags);
3426
3427 if(kr != KERN_SUCCESS)
3428 return kr;
3429
3430 if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) {
3431 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr);
3432 }
3433
3434 return KERN_SUCCESS;
3435 }
3436
3437
3438
3439 /*
3440 * Routine: vm_object_super_upl_request
3441 * Purpose:
3442 * Cause the population of a portion of a vm_object
3443 * in much the same way as memory_object_upl_request.
3444 * Depending on the nature of the request, the pages
3445 * returned may be contain valid data or be uninitialized.
3446 * However, the region may be expanded up to the super
3447 * cluster size provided.
3448 */
3449
3450 __private_extern__ kern_return_t
3451 vm_object_super_upl_request(
3452 vm_object_t object,
3453 vm_object_offset_t offset,
3454 upl_size_t size,
3455 upl_size_t super_cluster,
3456 upl_t *upl,
3457 upl_page_info_t *user_page_list,
3458 unsigned int *page_list_count,
3459 int cntrl_flags)
3460 {
3461 if (object->paging_offset > offset)
3462 return KERN_FAILURE;
3463
3464 assert(object->paging_in_progress);
3465 offset = offset - object->paging_offset;
3466
3467 if (super_cluster > size) {
3468
3469 vm_object_offset_t base_offset;
3470 upl_size_t super_size;
3471
3472 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
3473 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
3474 super_size = ((base_offset + super_size) > object->size) ? (object->size - base_offset) : super_size;
3475
3476 if (offset > (base_offset + super_size)) {
3477 panic("vm_object_super_upl_request: Missed target pageout"
3478 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
3479 offset, base_offset, super_size, super_cluster,
3480 size, object->paging_offset);
3481 }
3482 /*
3483 * apparently there is a case where the vm requests a
3484 * page to be written out who's offset is beyond the
3485 * object size
3486 */
3487 if ((offset + size) > (base_offset + super_size))
3488 super_size = (offset + size) - base_offset;
3489
3490 offset = base_offset;
3491 size = super_size;
3492 }
3493 return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags);
3494 }
3495
3496
3497 kern_return_t
3498 vm_map_create_upl(
3499 vm_map_t map,
3500 vm_map_address_t offset,
3501 upl_size_t *upl_size,
3502 upl_t *upl,
3503 upl_page_info_array_t page_list,
3504 unsigned int *count,
3505 int *flags)
3506 {
3507 vm_map_entry_t entry;
3508 int caller_flags;
3509 int force_data_sync;
3510 int sync_cow_data;
3511 vm_object_t local_object;
3512 vm_map_offset_t local_offset;
3513 vm_map_offset_t local_start;
3514 kern_return_t ret;
3515
3516 caller_flags = *flags;
3517
3518 if (caller_flags & ~UPL_VALID_FLAGS) {
3519 /*
3520 * For forward compatibility's sake,
3521 * reject any unknown flag.
3522 */
3523 return KERN_INVALID_VALUE;
3524 }
3525 force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
3526 sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
3527
3528 if (upl == NULL)
3529 return KERN_INVALID_ARGUMENT;
3530
3531 REDISCOVER_ENTRY:
3532 vm_map_lock(map);
3533
3534 if (vm_map_lookup_entry(map, offset, &entry)) {
3535
3536 if ((entry->vme_end - offset) < *upl_size)
3537 *upl_size = entry->vme_end - offset;
3538
3539 if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
3540 *flags = 0;
3541
3542 if (entry->object.vm_object != VM_OBJECT_NULL) {
3543 if (entry->object.vm_object->private)
3544 *flags = UPL_DEV_MEMORY;
3545
3546 if (entry->object.vm_object->phys_contiguous)
3547 *flags |= UPL_PHYS_CONTIG;
3548 }
3549 vm_map_unlock(map);
3550
3551 return KERN_SUCCESS;
3552 }
3553 if (entry->object.vm_object == VM_OBJECT_NULL || !entry->object.vm_object->phys_contiguous) {
3554 if ((*upl_size/page_size) > MAX_UPL_SIZE)
3555 *upl_size = MAX_UPL_SIZE * page_size;
3556 }
3557 /*
3558 * Create an object if necessary.
3559 */
3560 if (entry->object.vm_object == VM_OBJECT_NULL) {
3561 entry->object.vm_object = vm_object_allocate((vm_size_t)(entry->vme_end - entry->vme_start));
3562 entry->offset = 0;
3563 }
3564 if (!(caller_flags & UPL_COPYOUT_FROM)) {
3565 if (!(entry->protection & VM_PROT_WRITE)) {
3566 vm_map_unlock(map);
3567 return KERN_PROTECTION_FAILURE;
3568 }
3569 if (entry->needs_copy) {
3570 vm_map_t local_map;
3571 vm_object_t object;
3572 vm_object_offset_t new_offset;
3573 vm_prot_t prot;
3574 boolean_t wired;
3575 vm_map_version_t version;
3576 vm_map_t real_map;
3577
3578 local_map = map;
3579 vm_map_lock_write_to_read(map);
3580
3581 if (vm_map_lookup_locked(&local_map,
3582 offset, VM_PROT_WRITE,
3583 OBJECT_LOCK_EXCLUSIVE,
3584 &version, &object,
3585 &new_offset, &prot, &wired,
3586 NULL,
3587 &real_map)) {
3588 vm_map_unlock(local_map);
3589 return KERN_FAILURE;
3590 }
3591 if (real_map != map)
3592 vm_map_unlock(real_map);
3593 vm_object_unlock(object);
3594 vm_map_unlock(local_map);
3595
3596 goto REDISCOVER_ENTRY;
3597 }
3598 }
3599 if (entry->is_sub_map) {
3600 vm_map_t submap;
3601
3602 submap = entry->object.sub_map;
3603 local_start = entry->vme_start;
3604 local_offset = entry->offset;
3605
3606 vm_map_reference(submap);
3607 vm_map_unlock(map);
3608
3609 ret = vm_map_create_upl(submap,
3610 local_offset + (offset - local_start),
3611 upl_size, upl, page_list, count, flags);
3612 vm_map_deallocate(submap);
3613
3614 return ret;
3615 }
3616 if (sync_cow_data) {
3617 if (entry->object.vm_object->shadow || entry->object.vm_object->copy) {
3618 local_object = entry->object.vm_object;
3619 local_start = entry->vme_start;
3620 local_offset = entry->offset;
3621
3622 vm_object_reference(local_object);
3623 vm_map_unlock(map);
3624
3625 if (entry->object.vm_object->shadow && entry->object.vm_object->copy) {
3626 vm_object_lock_request(
3627 local_object->shadow,
3628 (vm_object_offset_t)
3629 ((offset - local_start) +
3630 local_offset) +
3631 local_object->shadow_offset,
3632 *upl_size, FALSE,
3633 MEMORY_OBJECT_DATA_SYNC,
3634 VM_PROT_NO_CHANGE);
3635 }
3636 sync_cow_data = FALSE;
3637 vm_object_deallocate(local_object);
3638
3639 goto REDISCOVER_ENTRY;
3640 }
3641 }
3642 if (force_data_sync) {
3643 local_object = entry->object.vm_object;
3644 local_start = entry->vme_start;
3645 local_offset = entry->offset;
3646
3647 vm_object_reference(local_object);
3648 vm_map_unlock(map);
3649
3650 vm_object_lock_request(
3651 local_object,
3652 (vm_object_offset_t)
3653 ((offset - local_start) + local_offset),
3654 (vm_object_size_t)*upl_size, FALSE,
3655 MEMORY_OBJECT_DATA_SYNC,
3656 VM_PROT_NO_CHANGE);
3657
3658 force_data_sync = FALSE;
3659 vm_object_deallocate(local_object);
3660
3661 goto REDISCOVER_ENTRY;
3662 }
3663 if (entry->object.vm_object->private)
3664 *flags = UPL_DEV_MEMORY;
3665 else
3666 *flags = 0;
3667
3668 if (entry->object.vm_object->phys_contiguous)
3669 *flags |= UPL_PHYS_CONTIG;
3670
3671 local_object = entry->object.vm_object;
3672 local_offset = entry->offset;
3673 local_start = entry->vme_start;
3674
3675 vm_object_reference(local_object);
3676 vm_map_unlock(map);
3677
3678 ret = vm_object_iopl_request(local_object,
3679 (vm_object_offset_t) ((offset - local_start) + local_offset),
3680 *upl_size,
3681 upl,
3682 page_list,
3683 count,
3684 caller_flags);
3685 vm_object_deallocate(local_object);
3686
3687 return(ret);
3688 }
3689 vm_map_unlock(map);
3690
3691 return(KERN_FAILURE);
3692 }
3693
3694 /*
3695 * Internal routine to enter a UPL into a VM map.
3696 *
3697 * JMM - This should just be doable through the standard
3698 * vm_map_enter() API.
3699 */
3700 kern_return_t
3701 vm_map_enter_upl(
3702 vm_map_t map,
3703 upl_t upl,
3704 vm_map_offset_t *dst_addr)
3705 {
3706 vm_map_size_t size;
3707 vm_object_offset_t offset;
3708 vm_map_offset_t addr;
3709 vm_page_t m;
3710 kern_return_t kr;
3711
3712 if (upl == UPL_NULL)
3713 return KERN_INVALID_ARGUMENT;
3714
3715 upl_lock(upl);
3716
3717 /*
3718 * check to see if already mapped
3719 */
3720 if (UPL_PAGE_LIST_MAPPED & upl->flags) {
3721 upl_unlock(upl);
3722 return KERN_FAILURE;
3723 }
3724
3725 if ((!(upl->flags & UPL_SHADOWED)) && !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) ||
3726 (upl->map_object->phys_contiguous))) {
3727 vm_object_t object;
3728 vm_page_t alias_page;
3729 vm_object_offset_t new_offset;
3730 int pg_num;
3731 wpl_array_t lite_list;
3732
3733 if (upl->flags & UPL_INTERNAL) {
3734 lite_list = (wpl_array_t)
3735 ((((uintptr_t)upl) + sizeof(struct upl))
3736 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3737 } else {
3738 lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
3739 }
3740 object = upl->map_object;
3741 upl->map_object = vm_object_allocate(upl->size);
3742
3743 vm_object_lock(upl->map_object);
3744
3745 upl->map_object->shadow = object;
3746 upl->map_object->pageout = TRUE;
3747 upl->map_object->can_persist = FALSE;
3748 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
3749 upl->map_object->shadow_offset = upl->offset - object->paging_offset;
3750 upl->map_object->wimg_bits = object->wimg_bits;
3751 offset = upl->map_object->shadow_offset;
3752 new_offset = 0;
3753 size = upl->size;
3754
3755 upl->flags |= UPL_SHADOWED;
3756
3757 while (size) {
3758 pg_num = (new_offset)/PAGE_SIZE;
3759
3760 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3761
3762 VM_PAGE_GRAB_FICTITIOUS(alias_page);
3763
3764 vm_object_lock(object);
3765
3766 m = vm_page_lookup(object, offset);
3767 if (m == VM_PAGE_NULL) {
3768 panic("vm_upl_map: page missing\n");
3769 }
3770
3771 /*
3772 * Convert the fictitious page to a private
3773 * shadow of the real page.
3774 */
3775 assert(alias_page->fictitious);
3776 alias_page->fictitious = FALSE;
3777 alias_page->private = TRUE;
3778 alias_page->pageout = TRUE;
3779 /*
3780 * since m is a page in the upl it must
3781 * already be wired or BUSY, so it's
3782 * safe to assign the underlying physical
3783 * page to the alias
3784 */
3785 alias_page->phys_page = m->phys_page;
3786
3787 vm_object_unlock(object);
3788
3789 vm_page_lockspin_queues();
3790 vm_page_wire(alias_page);
3791 vm_page_unlock_queues();
3792
3793 /*
3794 * ENCRYPTED SWAP:
3795 * The virtual page ("m") has to be wired in some way
3796 * here or its physical page ("m->phys_page") could
3797 * be recycled at any time.
3798 * Assuming this is enforced by the caller, we can't
3799 * get an encrypted page here. Since the encryption
3800 * key depends on the VM page's "pager" object and
3801 * the "paging_offset", we couldn't handle 2 pageable
3802 * VM pages (with different pagers and paging_offsets)
3803 * sharing the same physical page: we could end up
3804 * encrypting with one key (via one VM page) and
3805 * decrypting with another key (via the alias VM page).
3806 */
3807 ASSERT_PAGE_DECRYPTED(m);
3808
3809 vm_page_insert(alias_page, upl->map_object, new_offset);
3810
3811 assert(!alias_page->wanted);
3812 alias_page->busy = FALSE;
3813 alias_page->absent = FALSE;
3814 }
3815 size -= PAGE_SIZE;
3816 offset += PAGE_SIZE_64;
3817 new_offset += PAGE_SIZE_64;
3818 }
3819 vm_object_unlock(upl->map_object);
3820 }
3821 if ((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || upl->map_object->phys_contiguous)
3822 offset = upl->offset - upl->map_object->paging_offset;
3823 else
3824 offset = 0;
3825 size = upl->size;
3826
3827 vm_object_reference(upl->map_object);
3828
3829 *dst_addr = 0;
3830 /*
3831 * NEED A UPL_MAP ALIAS
3832 */
3833 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
3834 VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE,
3835 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
3836
3837 if (kr != KERN_SUCCESS) {
3838 upl_unlock(upl);
3839 return(kr);
3840 }
3841 vm_object_lock(upl->map_object);
3842
3843 for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
3844 m = vm_page_lookup(upl->map_object, offset);
3845
3846 if (m) {
3847 unsigned int cache_attr;
3848 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3849
3850 m->pmapped = TRUE;
3851 m->wpmapped = TRUE;
3852
3853 PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, cache_attr, TRUE);
3854 }
3855 offset += PAGE_SIZE_64;
3856 }
3857 vm_object_unlock(upl->map_object);
3858
3859 /*
3860 * hold a reference for the mapping
3861 */
3862 upl->ref_count++;
3863 upl->flags |= UPL_PAGE_LIST_MAPPED;
3864 upl->kaddr = *dst_addr;
3865 upl_unlock(upl);
3866
3867 return KERN_SUCCESS;
3868 }
3869
3870 /*
3871 * Internal routine to remove a UPL mapping from a VM map.
3872 *
3873 * XXX - This should just be doable through a standard
3874 * vm_map_remove() operation. Otherwise, implicit clean-up
3875 * of the target map won't be able to correctly remove
3876 * these (and release the reference on the UPL). Having
3877 * to do this means we can't map these into user-space
3878 * maps yet.
3879 */
3880 kern_return_t
3881 vm_map_remove_upl(
3882 vm_map_t map,
3883 upl_t upl)
3884 {
3885 vm_address_t addr;
3886 upl_size_t size;
3887
3888 if (upl == UPL_NULL)
3889 return KERN_INVALID_ARGUMENT;
3890
3891 upl_lock(upl);
3892
3893 if (upl->flags & UPL_PAGE_LIST_MAPPED) {
3894 addr = upl->kaddr;
3895 size = upl->size;
3896
3897 assert(upl->ref_count > 1);
3898 upl->ref_count--; /* removing mapping ref */
3899
3900 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
3901 upl->kaddr = (vm_offset_t) 0;
3902 upl_unlock(upl);
3903
3904 vm_map_remove(map,
3905 vm_map_trunc_page(addr),
3906 vm_map_round_page(addr + size),
3907 VM_MAP_NO_FLAGS);
3908
3909 return KERN_SUCCESS;
3910 }
3911 upl_unlock(upl);
3912
3913 return KERN_FAILURE;
3914 }
3915
3916 kern_return_t
3917 upl_commit_range(
3918 upl_t upl,
3919 upl_offset_t offset,
3920 upl_size_t size,
3921 int flags,
3922 upl_page_info_t *page_list,
3923 mach_msg_type_number_t count,
3924 boolean_t *empty)
3925 {
3926 upl_size_t xfer_size;
3927 vm_object_t shadow_object;
3928 vm_object_t object;
3929 vm_object_offset_t target_offset;
3930 int entry;
3931 wpl_array_t lite_list;
3932 int occupied;
3933 int delayed_unlock = 0;
3934 int clear_refmod = 0;
3935 int pgpgout_count = 0;
3936 int j;
3937
3938 *empty = FALSE;
3939
3940 if (upl == UPL_NULL)
3941 return KERN_INVALID_ARGUMENT;
3942
3943 if (count == 0)
3944 page_list = NULL;
3945
3946 if (upl->flags & UPL_DEVICE_MEMORY)
3947 xfer_size = 0;
3948 else if ((offset + size) <= upl->size)
3949 xfer_size = size;
3950 else
3951 return KERN_FAILURE;
3952
3953 upl_lock(upl);
3954
3955 if (upl->flags & UPL_ACCESS_BLOCKED) {
3956 /*
3957 * We used this UPL to block access to the pages by marking
3958 * them "busy". Now we need to clear the "busy" bit to allow
3959 * access to these pages again.
3960 */
3961 flags |= UPL_COMMIT_ALLOW_ACCESS;
3962 }
3963 if (upl->flags & UPL_CLEAR_DIRTY)
3964 flags |= UPL_COMMIT_CLEAR_DIRTY;
3965
3966 if (upl->flags & UPL_INTERNAL)
3967 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
3968 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3969 else
3970 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
3971
3972 object = upl->map_object;
3973
3974 if (upl->flags & UPL_SHADOWED) {
3975 vm_object_lock(object);
3976 shadow_object = object->shadow;
3977 } else {
3978 shadow_object = object;
3979 }
3980 entry = offset/PAGE_SIZE;
3981 target_offset = (vm_object_offset_t)offset;
3982
3983 /*
3984 * pageout_scan takes the vm_page_lock_queues first
3985 * then tries for the object lock... to avoid what
3986 * is effectively a lock inversion, we'll go to the
3987 * trouble of taking them in that same order... otherwise
3988 * if this object contains the majority of the pages resident
3989 * in the UBC (or a small set of large objects actively being
3990 * worked on contain the majority of the pages), we could
3991 * cause the pageout_scan thread to 'starve' in its attempt
3992 * to find pages to move to the free queue, since it has to
3993 * successfully acquire the object lock of any candidate page
3994 * before it can steal/clean it.
3995 */
3996 for (j = 0; ; j++) {
3997 vm_page_lock_queues();
3998
3999 if (vm_object_lock_try(shadow_object))
4000 break;
4001 vm_page_unlock_queues();
4002 mutex_pause(j);
4003 }
4004 delayed_unlock = 1;
4005
4006 while (xfer_size) {
4007 vm_page_t t, m;
4008
4009 m = VM_PAGE_NULL;
4010
4011 if (upl->flags & UPL_LITE) {
4012 int pg_num;
4013
4014 pg_num = target_offset/PAGE_SIZE;
4015
4016 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4017 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
4018
4019 m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
4020 }
4021 }
4022 if (upl->flags & UPL_SHADOWED) {
4023 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
4024
4025 t->pageout = FALSE;
4026
4027 vm_page_free(t);
4028
4029 if (m == VM_PAGE_NULL)
4030 m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset);
4031 }
4032 }
4033 if (m != VM_PAGE_NULL) {
4034
4035 clear_refmod = 0;
4036
4037 if (upl->flags & UPL_IO_WIRE) {
4038
4039 vm_page_unwire(m);
4040
4041 if (page_list)
4042 page_list[entry].phys_addr = 0;
4043
4044 if (flags & UPL_COMMIT_SET_DIRTY)
4045 m->dirty = TRUE;
4046 else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
4047 m->dirty = FALSE;
4048 if (m->cs_validated && !m->cs_tainted) {
4049 /*
4050 * CODE SIGNING:
4051 * This page is no longer dirty
4052 * but could have been modified,
4053 * so it will need to be
4054 * re-validated.
4055 */
4056 m->cs_validated = FALSE;
4057 vm_cs_validated_resets++;
4058 }
4059 clear_refmod |= VM_MEM_MODIFIED;
4060 }
4061 if (flags & UPL_COMMIT_INACTIVATE)
4062 vm_page_deactivate(m);
4063
4064 if (clear_refmod)
4065 pmap_clear_refmod(m->phys_page, clear_refmod);
4066
4067 if (flags & UPL_COMMIT_ALLOW_ACCESS) {
4068 /*
4069 * We blocked access to the pages in this UPL.
4070 * Clear the "busy" bit and wake up any waiter
4071 * for this page.
4072 */
4073 PAGE_WAKEUP_DONE(m);
4074 }
4075 goto commit_next_page;
4076 }
4077 /*
4078 * make sure to clear the hardware
4079 * modify or reference bits before
4080 * releasing the BUSY bit on this page
4081 * otherwise we risk losing a legitimate
4082 * change of state
4083 */
4084 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
4085 m->dirty = FALSE;
4086 if (m->cs_validated && !m->cs_tainted) {
4087 /*
4088 * CODE SIGNING:
4089 * This page is no longer dirty
4090 * but could have been modified,
4091 * so it will need to be
4092 * re-validated.
4093 */
4094 m->cs_validated = FALSE;
4095 vm_cs_validated_resets++;
4096 }
4097 clear_refmod |= VM_MEM_MODIFIED;
4098 }
4099 if (clear_refmod)
4100 pmap_clear_refmod(m->phys_page, clear_refmod);
4101
4102 if (page_list) {
4103 upl_page_info_t *p;
4104
4105 p = &(page_list[entry]);
4106
4107 if (p->phys_addr && p->pageout && !m->pageout) {
4108 m->busy = TRUE;
4109 m->pageout = TRUE;
4110 vm_page_wire(m);
4111 } else if (p->phys_addr &&
4112 !p->pageout && m->pageout &&
4113 !m->dump_cleaning) {
4114 m->pageout = FALSE;
4115 m->absent = FALSE;
4116 m->overwriting = FALSE;
4117 vm_page_unwire(m);
4118
4119 PAGE_WAKEUP_DONE(m);
4120 }
4121 page_list[entry].phys_addr = 0;
4122 }
4123 m->dump_cleaning = FALSE;
4124
4125 if (m->laundry)
4126 vm_pageout_throttle_up(m);
4127
4128 if (m->pageout) {
4129 m->cleaning = FALSE;
4130 m->encrypted_cleaning = FALSE;
4131 m->pageout = FALSE;
4132 #if MACH_CLUSTER_STATS
4133 if (m->wanted) vm_pageout_target_collisions++;
4134 #endif
4135 m->dirty = FALSE;
4136 if (m->cs_validated && !m->cs_tainted) {
4137 /*
4138 * CODE SIGNING:
4139 * This page is no longer dirty
4140 * but could have been modified,
4141 * so it will need to be
4142 * re-validated.
4143 */
4144 m->cs_validated = FALSE;
4145 vm_cs_validated_resets++;
4146 }
4147
4148 if (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))
4149 m->dirty = TRUE;
4150
4151 if (m->dirty) {
4152 /*
4153 * page was re-dirtied after we started
4154 * the pageout... reactivate it since
4155 * we don't know whether the on-disk
4156 * copy matches what is now in memory
4157 */
4158 vm_page_unwire(m);
4159
4160 if (upl->flags & UPL_PAGEOUT) {
4161 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
4162 VM_STAT_INCR(reactivations);
4163 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
4164 }
4165 PAGE_WAKEUP_DONE(m);
4166 } else {
4167 /*
4168 * page has been successfully cleaned
4169 * go ahead and free it for other use
4170 */
4171
4172 if (m->object->internal) {
4173 DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
4174 } else {
4175 DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
4176 }
4177
4178 vm_page_free(m);
4179
4180 if (upl->flags & UPL_PAGEOUT) {
4181 CLUSTER_STAT(vm_pageout_target_page_freed++;)
4182
4183 if (page_list[entry].dirty) {
4184 VM_STAT_INCR(pageouts);
4185 DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
4186 pgpgout_count++;
4187 }
4188 }
4189 }
4190 goto commit_next_page;
4191 }
4192 #if MACH_CLUSTER_STATS
4193 if (m->wpmapped)
4194 m->dirty = pmap_is_modified(m->phys_page);
4195
4196 if (m->dirty) vm_pageout_cluster_dirtied++;
4197 else vm_pageout_cluster_cleaned++;
4198 if (m->wanted) vm_pageout_cluster_collisions++;
4199 #endif
4200 m->dirty = FALSE;
4201 if (m->cs_validated && !m->cs_tainted) {
4202 /*
4203 * CODE SIGNING:
4204 * This page is no longer dirty
4205 * but could have been modified,
4206 * so it will need to be
4207 * re-validated.
4208 */
4209 m->cs_validated = FALSE;
4210 vm_cs_validated_resets++;
4211 }
4212
4213 if ((m->busy) && (m->cleaning)) {
4214 /*
4215 * the request_page_list case
4216 */
4217 m->absent = FALSE;
4218 m->overwriting = FALSE;
4219 m->busy = FALSE;
4220 } else if (m->overwriting) {
4221 /*
4222 * alternate request page list, write to
4223 * page_list case. Occurs when the original
4224 * page was wired at the time of the list
4225 * request
4226 */
4227 assert(m->wire_count != 0);
4228 vm_page_unwire(m);/* reactivates */
4229 m->overwriting = FALSE;
4230 }
4231 m->cleaning = FALSE;
4232 m->encrypted_cleaning = FALSE;
4233
4234 /*
4235 * It is a part of the semantic of COPYOUT_FROM
4236 * UPLs that a commit implies cache sync
4237 * between the vm page and the backing store
4238 * this can be used to strip the precious bit
4239 * as well as clean
4240 */
4241 if (upl->flags & UPL_PAGE_SYNC_DONE)
4242 m->precious = FALSE;
4243
4244 if (flags & UPL_COMMIT_SET_DIRTY)
4245 m->dirty = TRUE;
4246
4247 if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) {
4248 vm_page_deactivate(m);
4249 } else if (!m->active && !m->inactive && !m->speculative) {
4250
4251 if (m->clustered)
4252 vm_page_speculate(m, TRUE);
4253 else if (m->reference)
4254 vm_page_activate(m);
4255 else
4256 vm_page_deactivate(m);
4257 }
4258 if (flags & UPL_COMMIT_ALLOW_ACCESS) {
4259 /*
4260 * We blocked access to the pages in this URL.
4261 * Clear the "busy" bit on this page before we
4262 * wake up any waiter.
4263 */
4264 m->busy = FALSE;
4265 }
4266 /*
4267 * Wakeup any thread waiting for the page to be un-cleaning.
4268 */
4269 PAGE_WAKEUP(m);
4270 }
4271 commit_next_page:
4272 target_offset += PAGE_SIZE_64;
4273 xfer_size -= PAGE_SIZE;
4274 entry++;
4275
4276 if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
4277 /*
4278 * pageout_scan takes the vm_page_lock_queues first
4279 * then tries for the object lock... to avoid what
4280 * is effectively a lock inversion, we'll go to the
4281 * trouble of taking them in that same order... otherwise
4282 * if this object contains the majority of the pages resident
4283 * in the UBC (or a small set of large objects actively being
4284 * worked on contain the majority of the pages), we could
4285 * cause the pageout_scan thread to 'starve' in its attempt
4286 * to find pages to move to the free queue, since it has to
4287 * successfully acquire the object lock of any candidate page
4288 * before it can steal/clean it.
4289 */
4290 vm_object_unlock(shadow_object);
4291 mutex_yield(&vm_page_queue_lock);
4292
4293 for (j = 0; ; j++) {
4294 if (vm_object_lock_try(shadow_object))
4295 break;
4296 vm_page_unlock_queues();
4297 mutex_pause(j);
4298 vm_page_lock_queues();
4299 }
4300 delayed_unlock = 1;
4301 }
4302 }
4303 if (delayed_unlock)
4304 vm_page_unlock_queues();
4305
4306 occupied = 1;
4307
4308 if (upl->flags & UPL_DEVICE_MEMORY) {
4309 occupied = 0;
4310 } else if (upl->flags & UPL_LITE) {
4311 int pg_num;
4312 int i;
4313
4314 pg_num = upl->size/PAGE_SIZE;
4315 pg_num = (pg_num + 31) >> 5;
4316 occupied = 0;
4317
4318 for (i = 0; i < pg_num; i++) {
4319 if (lite_list[i] != 0) {
4320 occupied = 1;
4321 break;
4322 }
4323 }
4324 } else {
4325 if (queue_empty(&upl->map_object->memq))
4326 occupied = 0;
4327 }
4328 if (occupied == 0) {
4329 if (upl->flags & UPL_COMMIT_NOTIFY_EMPTY)
4330 *empty = TRUE;
4331
4332 if (object == shadow_object) {
4333 /*
4334 * this is not a paging object
4335 * so we need to drop the paging reference
4336 * that was taken when we created the UPL
4337 * against this object
4338 */
4339 vm_object_paging_end(shadow_object);
4340 } else {
4341 /*
4342 * we dontated the paging reference to
4343 * the map object... vm_pageout_object_terminate
4344 * will drop this reference
4345 */
4346 }
4347 }
4348 vm_object_unlock(shadow_object);
4349 if (object != shadow_object)
4350 vm_object_unlock(object);
4351 upl_unlock(upl);
4352
4353 if (pgpgout_count) {
4354 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
4355 }
4356
4357 return KERN_SUCCESS;
4358 }
4359
4360 kern_return_t
4361 upl_abort_range(
4362 upl_t upl,
4363 upl_offset_t offset,
4364 upl_size_t size,
4365 int error,
4366 boolean_t *empty)
4367 {
4368 upl_size_t xfer_size;
4369 vm_object_t shadow_object;
4370 vm_object_t object;
4371 vm_object_offset_t target_offset;
4372 int entry;
4373 wpl_array_t lite_list;
4374 int occupied;
4375 int delayed_unlock = 0;
4376 int j;
4377
4378 *empty = FALSE;
4379
4380 if (upl == UPL_NULL)
4381 return KERN_INVALID_ARGUMENT;
4382
4383 if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
4384 return upl_commit_range(upl, offset, size, 0, NULL, 0, empty);
4385
4386 if (upl->flags & UPL_DEVICE_MEMORY)
4387 xfer_size = 0;
4388 else if ((offset + size) <= upl->size)
4389 xfer_size = size;
4390 else
4391 return KERN_FAILURE;
4392
4393 upl_lock(upl);
4394
4395 if (upl->flags & UPL_INTERNAL) {
4396 lite_list = (wpl_array_t)
4397 ((((uintptr_t)upl) + sizeof(struct upl))
4398 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4399 } else {
4400 lite_list = (wpl_array_t)
4401 (((uintptr_t)upl) + sizeof(struct upl));
4402 }
4403 object = upl->map_object;
4404
4405 if (upl->flags & UPL_SHADOWED) {
4406 vm_object_lock(object);
4407 shadow_object = object->shadow;
4408 } else
4409 shadow_object = object;
4410
4411 entry = offset/PAGE_SIZE;
4412 target_offset = (vm_object_offset_t)offset;
4413
4414 /*
4415 * pageout_scan takes the vm_page_lock_queues first
4416 * then tries for the object lock... to avoid what
4417 * is effectively a lock inversion, we'll go to the
4418 * trouble of taking them in that same order... otherwise
4419 * if this object contains the majority of the pages resident
4420 * in the UBC (or a small set of large objects actively being
4421 * worked on contain the majority of the pages), we could
4422 * cause the pageout_scan thread to 'starve' in its attempt
4423 * to find pages to move to the free queue, since it has to
4424 * successfully acquire the object lock of any candidate page
4425 * before it can steal/clean it.
4426 */
4427 for (j = 0; ; j++) {
4428 vm_page_lock_queues();
4429
4430 if (vm_object_lock_try(shadow_object))
4431 break;
4432 vm_page_unlock_queues();
4433 mutex_pause(j);
4434 }
4435 delayed_unlock = 1;
4436
4437 while (xfer_size) {
4438 vm_page_t t, m;
4439
4440 m = VM_PAGE_NULL;
4441
4442 if (upl->flags & UPL_LITE) {
4443 int pg_num;
4444 pg_num = target_offset/PAGE_SIZE;
4445
4446 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4447 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
4448
4449 m = vm_page_lookup(shadow_object, target_offset +
4450 (upl->offset - shadow_object->paging_offset));
4451 }
4452 }
4453 if (upl->flags & UPL_SHADOWED) {
4454 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
4455 t->pageout = FALSE;
4456
4457 vm_page_free(t);
4458
4459 if (m == VM_PAGE_NULL)
4460 m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset);
4461 }
4462 }
4463 if (m != VM_PAGE_NULL) {
4464
4465 if (m->absent) {
4466 boolean_t must_free = TRUE;
4467
4468 m->clustered = FALSE;
4469 /*
4470 * COPYOUT = FALSE case
4471 * check for error conditions which must
4472 * be passed back to the pages customer
4473 */
4474 if (error & UPL_ABORT_RESTART) {
4475 m->restart = TRUE;
4476 m->absent = FALSE;
4477 m->error = TRUE;
4478 m->unusual = TRUE;
4479 must_free = FALSE;
4480 } else if (error & UPL_ABORT_UNAVAILABLE) {
4481 m->restart = FALSE;
4482 m->unusual = TRUE;
4483 must_free = FALSE;
4484 } else if (error & UPL_ABORT_ERROR) {
4485 m->restart = FALSE;
4486 m->absent = FALSE;
4487 m->error = TRUE;
4488 m->unusual = TRUE;
4489 must_free = FALSE;
4490 }
4491
4492 /*
4493 * ENCRYPTED SWAP:
4494 * If the page was already encrypted,
4495 * we don't really need to decrypt it
4496 * now. It will get decrypted later,
4497 * on demand, as soon as someone needs
4498 * to access its contents.
4499 */
4500
4501 m->cleaning = FALSE;
4502 m->encrypted_cleaning = FALSE;
4503 m->overwriting = FALSE;
4504 PAGE_WAKEUP_DONE(m);
4505
4506 if (must_free == TRUE)
4507 vm_page_free(m);
4508 else
4509 vm_page_activate(m);
4510 } else {
4511 /*
4512 * Handle the trusted pager throttle.
4513 */
4514 if (m->laundry)
4515 vm_pageout_throttle_up(m);
4516
4517 if (m->pageout) {
4518 assert(m->busy);
4519 assert(m->wire_count == 1);
4520 m->pageout = FALSE;
4521 vm_page_unwire(m);
4522 }
4523 m->dump_cleaning = FALSE;
4524 m->cleaning = FALSE;
4525 m->encrypted_cleaning = FALSE;
4526 m->overwriting = FALSE;
4527 #if MACH_PAGEMAP
4528 vm_external_state_clr(m->object->existence_map, m->offset);
4529 #endif /* MACH_PAGEMAP */
4530 if (error & UPL_ABORT_DUMP_PAGES) {
4531 pmap_disconnect(m->phys_page);
4532 vm_page_free(m);
4533 } else {
4534 if (error & UPL_ABORT_REFERENCE) {
4535 /*
4536 * we've been told to explictly
4537 * reference this page... for
4538 * file I/O, this is done by
4539 * implementing an LRU on the inactive q
4540 */
4541 vm_page_lru(m);
4542 }
4543 PAGE_WAKEUP_DONE(m);
4544 }
4545 }
4546 }
4547 if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
4548 /*
4549 * pageout_scan takes the vm_page_lock_queues first
4550 * then tries for the object lock... to avoid what
4551 * is effectively a lock inversion, we'll go to the
4552 * trouble of taking them in that same order... otherwise
4553 * if this object contains the majority of the pages resident
4554 * in the UBC (or a small set of large objects actively being
4555 * worked on contain the majority of the pages), we could
4556 * cause the pageout_scan thread to 'starve' in its attempt
4557 * to find pages to move to the free queue, since it has to
4558 * successfully acquire the object lock of any candidate page
4559 * before it can steal/clean it.
4560 */
4561 vm_object_unlock(shadow_object);
4562 mutex_yield(&vm_page_queue_lock);
4563
4564 for (j = 0; ; j++) {
4565 if (vm_object_lock_try(shadow_object))
4566 break;
4567 vm_page_unlock_queues();
4568 mutex_pause(j);
4569 vm_page_lock_queues();
4570 }
4571 delayed_unlock = 1;
4572 }
4573 target_offset += PAGE_SIZE_64;
4574 xfer_size -= PAGE_SIZE;
4575 entry++;
4576 }
4577 if (delayed_unlock)
4578 vm_page_unlock_queues();
4579
4580 occupied = 1;
4581
4582 if (upl->flags & UPL_DEVICE_MEMORY) {
4583 occupied = 0;
4584 } else if (upl->flags & UPL_LITE) {
4585 int pg_num;
4586 int i;
4587
4588 pg_num = upl->size/PAGE_SIZE;
4589 pg_num = (pg_num + 31) >> 5;
4590 occupied = 0;
4591
4592 for (i = 0; i < pg_num; i++) {
4593 if (lite_list[i] != 0) {
4594 occupied = 1;
4595 break;
4596 }
4597 }
4598 } else {
4599 if (queue_empty(&upl->map_object->memq))
4600 occupied = 0;
4601 }
4602 if (occupied == 0) {
4603 if (upl->flags & UPL_COMMIT_NOTIFY_EMPTY)
4604 *empty = TRUE;
4605
4606 if (object == shadow_object) {
4607 /*
4608 * this is not a paging object
4609 * so we need to drop the paging reference
4610 * that was taken when we created the UPL
4611 * against this object
4612 */
4613 vm_object_paging_end(shadow_object);
4614 } else {
4615 /*
4616 * we dontated the paging reference to
4617 * the map object... vm_pageout_object_terminate
4618 * will drop this reference
4619 */
4620 }
4621 }
4622 vm_object_unlock(shadow_object);
4623 if (object != shadow_object)
4624 vm_object_unlock(object);
4625 upl_unlock(upl);
4626
4627 return KERN_SUCCESS;
4628 }
4629
4630
4631 kern_return_t
4632 upl_abort(
4633 upl_t upl,
4634 int error)
4635 {
4636 boolean_t empty;
4637
4638 return upl_abort_range(upl, 0, upl->size, error, &empty);
4639 }
4640
4641
4642 /* an option on commit should be wire */
4643 kern_return_t
4644 upl_commit(
4645 upl_t upl,
4646 upl_page_info_t *page_list,
4647 mach_msg_type_number_t count)
4648 {
4649 boolean_t empty;
4650
4651 return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
4652 }
4653
4654
4655 kern_return_t
4656 vm_object_iopl_request(
4657 vm_object_t object,
4658 vm_object_offset_t offset,
4659 upl_size_t size,
4660 upl_t *upl_ptr,
4661 upl_page_info_array_t user_page_list,
4662 unsigned int *page_list_count,
4663 int cntrl_flags)
4664 {
4665 vm_page_t dst_page;
4666 vm_object_offset_t dst_offset;
4667 upl_size_t xfer_size;
4668 upl_t upl = NULL;
4669 unsigned int entry;
4670 wpl_array_t lite_list = NULL;
4671 int delayed_unlock = 0;
4672 int no_zero_fill = FALSE;
4673 u_int32_t psize;
4674 kern_return_t ret;
4675 vm_prot_t prot;
4676 struct vm_object_fault_info fault_info;
4677
4678
4679 if (cntrl_flags & ~UPL_VALID_FLAGS) {
4680 /*
4681 * For forward compatibility's sake,
4682 * reject any unknown flag.
4683 */
4684 return KERN_INVALID_VALUE;
4685 }
4686 if (vm_lopage_poolsize == 0)
4687 cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
4688
4689 if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
4690 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE))
4691 return KERN_INVALID_VALUE;
4692
4693 if (object->phys_contiguous) {
4694 if ((offset + object->shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
4695 return KERN_INVALID_ADDRESS;
4696
4697 if (((offset + object->shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
4698 return KERN_INVALID_ADDRESS;
4699 }
4700 }
4701
4702 if (cntrl_flags & UPL_ENCRYPT) {
4703 /*
4704 * ENCRYPTED SWAP:
4705 * The paging path doesn't use this interface,
4706 * so we don't support the UPL_ENCRYPT flag
4707 * here. We won't encrypt the pages.
4708 */
4709 assert(! (cntrl_flags & UPL_ENCRYPT));
4710 }
4711 if (cntrl_flags & UPL_NOZEROFILL)
4712 no_zero_fill = TRUE;
4713
4714 if (cntrl_flags & UPL_COPYOUT_FROM)
4715 prot = VM_PROT_READ;
4716 else
4717 prot = VM_PROT_READ | VM_PROT_WRITE;
4718
4719 if (((size/page_size) > MAX_UPL_SIZE) && !object->phys_contiguous)
4720 size = MAX_UPL_SIZE * page_size;
4721
4722 if (cntrl_flags & UPL_SET_INTERNAL) {
4723 if (page_list_count != NULL)
4724 *page_list_count = MAX_UPL_SIZE;
4725 }
4726 if (((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
4727 ((page_list_count != NULL) && (*page_list_count != 0) && *page_list_count < (size/page_size)))
4728 return KERN_INVALID_ARGUMENT;
4729
4730 if ((!object->internal) && (object->paging_offset != 0))
4731 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
4732
4733
4734 if (object->phys_contiguous)
4735 psize = PAGE_SIZE;
4736 else
4737 psize = size;
4738
4739 if (cntrl_flags & UPL_SET_INTERNAL) {
4740 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, UPL_IO_WIRE, psize);
4741
4742 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
4743 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
4744 ((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
4745 } else {
4746 upl = upl_create(UPL_CREATE_LITE, UPL_IO_WIRE, psize);
4747
4748 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
4749 }
4750 if (user_page_list)
4751 user_page_list[0].device = FALSE;
4752 *upl_ptr = upl;
4753
4754 upl->map_object = object;
4755 upl->size = size;
4756
4757 vm_object_lock(object);
4758 vm_object_paging_begin(object);
4759 /*
4760 * paging in progress also protects the paging_offset
4761 */
4762 upl->offset = offset + object->paging_offset;
4763
4764 if (object->phys_contiguous) {
4765 #ifdef UPL_DEBUG
4766 queue_enter(&object->uplq, upl, upl_t, uplq);
4767 #endif /* UPL_DEBUG */
4768
4769 vm_object_unlock(object);
4770
4771 /*
4772 * don't need any shadow mappings for this one
4773 * since it is already I/O memory
4774 */
4775 upl->flags |= UPL_DEVICE_MEMORY;
4776
4777 upl->highest_page = (offset + object->shadow_offset + size - 1)>>PAGE_SHIFT;
4778
4779 if (user_page_list) {
4780 user_page_list[0].phys_addr = (offset + object->shadow_offset)>>PAGE_SHIFT;
4781 user_page_list[0].device = TRUE;
4782 }
4783 if (page_list_count != NULL) {
4784 if (upl->flags & UPL_INTERNAL)
4785 *page_list_count = 0;
4786 else
4787 *page_list_count = 1;
4788 }
4789 return KERN_SUCCESS;
4790 }
4791 /*
4792 * Protect user space from future COW operations
4793 */
4794 object->true_share = TRUE;
4795
4796 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
4797 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4798
4799 #ifdef UPL_DEBUG
4800 queue_enter(&object->uplq, upl, upl_t, uplq);
4801 #endif /* UPL_DEBUG */
4802
4803 if (cntrl_flags & UPL_BLOCK_ACCESS) {
4804 /*
4805 * The user requested that access to the pages in this URL
4806 * be blocked until the UPL is commited or aborted.
4807 */
4808 upl->flags |= UPL_ACCESS_BLOCKED;
4809 }
4810 entry = 0;
4811
4812 xfer_size = size;
4813 dst_offset = offset;
4814
4815 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
4816 fault_info.user_tag = 0;
4817 fault_info.lo_offset = offset;
4818 fault_info.hi_offset = offset + xfer_size;
4819 fault_info.no_cache = FALSE;
4820
4821 while (xfer_size) {
4822 vm_fault_return_t result;
4823 int pg_num;
4824
4825 dst_page = vm_page_lookup(object, dst_offset);
4826
4827 /*
4828 * ENCRYPTED SWAP:
4829 * If the page is encrypted, we need to decrypt it,
4830 * so force a soft page fault.
4831 */
4832 if ((dst_page == VM_PAGE_NULL) || (dst_page->busy) ||
4833 (dst_page->encrypted) ||
4834 (dst_page->unusual && (dst_page->error ||
4835 dst_page->restart ||
4836 dst_page->absent ||
4837 dst_page->fictitious))) {
4838
4839 do {
4840 vm_page_t top_page;
4841 kern_return_t error_code;
4842 int interruptible;
4843
4844 if (delayed_unlock) {
4845 delayed_unlock = 0;
4846 vm_page_unlock_queues();
4847 }
4848 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
4849 interruptible = THREAD_ABORTSAFE;
4850 else
4851 interruptible = THREAD_UNINT;
4852
4853 fault_info.interruptible = interruptible;
4854 fault_info.cluster_size = xfer_size;
4855
4856 result = vm_fault_page(object, dst_offset,
4857 prot | VM_PROT_WRITE, FALSE,
4858 &prot, &dst_page, &top_page,
4859 (int *)0,
4860 &error_code, no_zero_fill,
4861 FALSE, &fault_info);
4862
4863 switch (result) {
4864
4865 case VM_FAULT_SUCCESS:
4866
4867 PAGE_WAKEUP_DONE(dst_page);
4868 /*
4869 * Release paging references and
4870 * top-level placeholder page, if any.
4871 */
4872 if (top_page != VM_PAGE_NULL) {
4873 vm_object_t local_object;
4874
4875 local_object = top_page->object;
4876
4877 if (top_page->object != dst_page->object) {
4878 vm_object_lock(local_object);
4879 VM_PAGE_FREE(top_page);
4880 vm_object_paging_end(local_object);
4881 vm_object_unlock(local_object);
4882 } else {
4883 VM_PAGE_FREE(top_page);
4884 vm_object_paging_end(local_object);
4885 }
4886 }
4887 break;
4888
4889 case VM_FAULT_RETRY:
4890 vm_object_lock(object);
4891 vm_object_paging_begin(object);
4892 break;
4893
4894 case VM_FAULT_FICTITIOUS_SHORTAGE:
4895 vm_page_more_fictitious();
4896
4897 vm_object_lock(object);
4898 vm_object_paging_begin(object);
4899 break;
4900
4901 case VM_FAULT_MEMORY_SHORTAGE:
4902 if (vm_page_wait(interruptible)) {
4903 vm_object_lock(object);
4904 vm_object_paging_begin(object);
4905 break;
4906 }
4907 /* fall thru */
4908
4909 case VM_FAULT_INTERRUPTED:
4910 error_code = MACH_SEND_INTERRUPTED;
4911 case VM_FAULT_MEMORY_ERROR:
4912 ret = (error_code ? error_code: KERN_MEMORY_ERROR);
4913
4914 vm_object_lock(object);
4915 vm_object_paging_begin(object);
4916 goto return_err;
4917 }
4918 } while (result != VM_FAULT_SUCCESS);
4919 }
4920
4921 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
4922 dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
4923 vm_page_t low_page;
4924 int refmod;
4925
4926 /*
4927 * support devices that can't DMA above 32 bits
4928 * by substituting pages from a pool of low address
4929 * memory for any pages we find above the 4G mark
4930 * can't substitute if the page is already wired because
4931 * we don't know whether that physical address has been
4932 * handed out to some other 64 bit capable DMA device to use
4933 */
4934 if (dst_page->wire_count) {
4935 ret = KERN_PROTECTION_FAILURE;
4936 goto return_err;
4937 }
4938 if (delayed_unlock) {
4939 delayed_unlock = 0;
4940 vm_page_unlock_queues();
4941 }
4942 low_page = vm_page_grablo();
4943
4944 if (low_page == VM_PAGE_NULL) {
4945 ret = KERN_RESOURCE_SHORTAGE;
4946 goto return_err;
4947 }
4948 /*
4949 * from here until the vm_page_replace completes
4950 * we musn't drop the object lock... we don't
4951 * want anyone refaulting this page in and using
4952 * it after we disconnect it... we want the fault
4953 * to find the new page being substituted.
4954 */
4955 if (dst_page->pmapped)
4956 refmod = pmap_disconnect(dst_page->phys_page);
4957 else
4958 refmod = 0;
4959 vm_page_copy(dst_page, low_page);
4960
4961 low_page->reference = dst_page->reference;
4962 low_page->dirty = dst_page->dirty;
4963
4964 if (refmod & VM_MEM_REFERENCED)
4965 low_page->reference = TRUE;
4966 if (refmod & VM_MEM_MODIFIED)
4967 low_page->dirty = TRUE;
4968
4969 vm_page_lock_queues();
4970 vm_page_replace(low_page, object, dst_offset);
4971 /*
4972 * keep the queue lock since we're going to
4973 * need it immediately
4974 */
4975 delayed_unlock = 1;
4976
4977 dst_page = low_page;
4978 /*
4979 * vm_page_grablo returned the page marked
4980 * BUSY... we don't need a PAGE_WAKEUP_DONE
4981 * here, because we've never dropped the object lock
4982 */
4983 dst_page->busy = FALSE;
4984 }
4985 if (delayed_unlock == 0)
4986 vm_page_lock_queues();
4987
4988 vm_page_wire(dst_page);
4989
4990 if (cntrl_flags & UPL_BLOCK_ACCESS) {
4991 /*
4992 * Mark the page "busy" to block any future page fault
4993 * on this page. We'll also remove the mapping
4994 * of all these pages before leaving this routine.
4995 */
4996 assert(!dst_page->fictitious);
4997 dst_page->busy = TRUE;
4998 }
4999 pg_num = (dst_offset-offset)/PAGE_SIZE;
5000 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5001
5002 /*
5003 * expect the page to be used
5004 * page queues lock must be held to set 'reference'
5005 */
5006 dst_page->reference = TRUE;
5007
5008 if (!(cntrl_flags & UPL_COPYOUT_FROM))
5009 dst_page->dirty = TRUE;
5010
5011 if (dst_page->phys_page > upl->highest_page)
5012 upl->highest_page = dst_page->phys_page;
5013
5014 if (user_page_list) {
5015 user_page_list[entry].phys_addr = dst_page->phys_page;
5016 user_page_list[entry].dirty = dst_page->dirty;
5017 user_page_list[entry].pageout = dst_page->pageout;
5018 user_page_list[entry].absent = dst_page->absent;
5019 user_page_list[entry].precious = dst_page->precious;
5020
5021 if (dst_page->clustered == TRUE)
5022 user_page_list[entry].speculative = dst_page->speculative;
5023 else
5024 user_page_list[entry].speculative = FALSE;
5025 }
5026 /*
5027 * someone is explicitly grabbing this page...
5028 * update clustered and speculative state
5029 *
5030 */
5031 VM_PAGE_CONSUME_CLUSTERED(dst_page);
5032
5033 if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
5034 mutex_yield(&vm_page_queue_lock);
5035 delayed_unlock = 1;
5036 }
5037 entry++;
5038 dst_offset += PAGE_SIZE_64;
5039 xfer_size -= PAGE_SIZE;
5040 }
5041 if (delayed_unlock)
5042 vm_page_unlock_queues();
5043
5044 if (page_list_count != NULL) {
5045 if (upl->flags & UPL_INTERNAL)
5046 *page_list_count = 0;
5047 else if (*page_list_count > entry)
5048 *page_list_count = entry;
5049 }
5050 vm_object_unlock(object);
5051
5052 if (cntrl_flags & UPL_BLOCK_ACCESS) {
5053 /*
5054 * We've marked all the pages "busy" so that future
5055 * page faults will block.
5056 * Now remove the mapping for these pages, so that they
5057 * can't be accessed without causing a page fault.
5058 */
5059 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
5060 PMAP_NULL, 0, VM_PROT_NONE);
5061 }
5062 return KERN_SUCCESS;
5063
5064 return_err:
5065 if (delayed_unlock)
5066 vm_page_unlock_queues();
5067
5068 for (; offset < dst_offset; offset += PAGE_SIZE) {
5069 dst_page = vm_page_lookup(object, offset);
5070
5071 if (dst_page == VM_PAGE_NULL)
5072 panic("vm_object_iopl_request: Wired pages missing. \n");
5073
5074 vm_page_lockspin_queues();
5075 vm_page_unwire(dst_page);
5076 vm_page_unlock_queues();
5077
5078 VM_STAT_INCR(reactivations);
5079 }
5080 vm_object_paging_end(object);
5081 vm_object_unlock(object);
5082 upl_destroy(upl);
5083
5084 return ret;
5085 }
5086
5087 kern_return_t
5088 upl_transpose(
5089 upl_t upl1,
5090 upl_t upl2)
5091 {
5092 kern_return_t retval;
5093 boolean_t upls_locked;
5094 vm_object_t object1, object2;
5095
5096 if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2) {
5097 return KERN_INVALID_ARGUMENT;
5098 }
5099
5100 upls_locked = FALSE;
5101
5102 /*
5103 * Since we need to lock both UPLs at the same time,
5104 * avoid deadlocks by always taking locks in the same order.
5105 */
5106 if (upl1 < upl2) {
5107 upl_lock(upl1);
5108 upl_lock(upl2);
5109 } else {
5110 upl_lock(upl2);
5111 upl_lock(upl1);
5112 }
5113 upls_locked = TRUE; /* the UPLs will need to be unlocked */
5114
5115 object1 = upl1->map_object;
5116 object2 = upl2->map_object;
5117
5118 if (upl1->offset != 0 || upl2->offset != 0 ||
5119 upl1->size != upl2->size) {
5120 /*
5121 * We deal only with full objects, not subsets.
5122 * That's because we exchange the entire backing store info
5123 * for the objects: pager, resident pages, etc... We can't do
5124 * only part of it.
5125 */
5126 retval = KERN_INVALID_VALUE;
5127 goto done;
5128 }
5129
5130 /*
5131 * Tranpose the VM objects' backing store.
5132 */
5133 retval = vm_object_transpose(object1, object2,
5134 (vm_object_size_t) upl1->size);
5135
5136 if (retval == KERN_SUCCESS) {
5137 /*
5138 * Make each UPL point to the correct VM object, i.e. the
5139 * object holding the pages that the UPL refers to...
5140 */
5141 #ifdef UPL_DEBUG
5142 queue_remove(&object1->uplq, upl1, upl_t, uplq);
5143 queue_remove(&object2->uplq, upl2, upl_t, uplq);
5144 #endif
5145 upl1->map_object = object2;
5146 upl2->map_object = object1;
5147 #ifdef UPL_DEBUG
5148 queue_enter(&object1->uplq, upl2, upl_t, uplq);
5149 queue_enter(&object2->uplq, upl1, upl_t, uplq);
5150 #endif
5151 }
5152
5153 done:
5154 /*
5155 * Cleanup.
5156 */
5157 if (upls_locked) {
5158 upl_unlock(upl1);
5159 upl_unlock(upl2);
5160 upls_locked = FALSE;
5161 }
5162
5163 return retval;
5164 }
5165
5166 /*
5167 * ENCRYPTED SWAP:
5168 *
5169 * Rationale: the user might have some encrypted data on disk (via
5170 * FileVault or any other mechanism). That data is then decrypted in
5171 * memory, which is safe as long as the machine is secure. But that
5172 * decrypted data in memory could be paged out to disk by the default
5173 * pager. The data would then be stored on disk in clear (not encrypted)
5174 * and it could be accessed by anyone who gets physical access to the
5175 * disk (if the laptop or the disk gets stolen for example). This weakens
5176 * the security offered by FileVault.
5177 *
5178 * Solution: the default pager will optionally request that all the
5179 * pages it gathers for pageout be encrypted, via the UPL interfaces,
5180 * before it sends this UPL to disk via the vnode_pageout() path.
5181 *
5182 * Notes:
5183 *
5184 * To avoid disrupting the VM LRU algorithms, we want to keep the
5185 * clean-in-place mechanisms, which allow us to send some extra pages to
5186 * swap (clustering) without actually removing them from the user's
5187 * address space. We don't want the user to unknowingly access encrypted
5188 * data, so we have to actually remove the encrypted pages from the page
5189 * table. When the user accesses the data, the hardware will fail to
5190 * locate the virtual page in its page table and will trigger a page
5191 * fault. We can then decrypt the page and enter it in the page table
5192 * again. Whenever we allow the user to access the contents of a page,
5193 * we have to make sure it's not encrypted.
5194 *
5195 *
5196 */
5197 /*
5198 * ENCRYPTED SWAP:
5199 * Reserve of virtual addresses in the kernel address space.
5200 * We need to map the physical pages in the kernel, so that we
5201 * can call the encryption/decryption routines with a kernel
5202 * virtual address. We keep this pool of pre-allocated kernel
5203 * virtual addresses so that we don't have to scan the kernel's
5204 * virtaul address space each time we need to encrypt or decrypt
5205 * a physical page.
5206 * It would be nice to be able to encrypt and decrypt in physical
5207 * mode but that might not always be more efficient...
5208 */
5209 decl_simple_lock_data(,vm_paging_lock)
5210 #define VM_PAGING_NUM_PAGES 64
5211 vm_map_offset_t vm_paging_base_address = 0;
5212 boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
5213 int vm_paging_max_index = 0;
5214 int vm_paging_page_waiter = 0;
5215 int vm_paging_page_waiter_total = 0;
5216 unsigned long vm_paging_no_kernel_page = 0;
5217 unsigned long vm_paging_objects_mapped = 0;
5218 unsigned long vm_paging_pages_mapped = 0;
5219 unsigned long vm_paging_objects_mapped_slow = 0;
5220 unsigned long vm_paging_pages_mapped_slow = 0;
5221
5222 void
5223 vm_paging_map_init(void)
5224 {
5225 kern_return_t kr;
5226 vm_map_offset_t page_map_offset;
5227 vm_map_entry_t map_entry;
5228
5229 assert(vm_paging_base_address == 0);
5230
5231 /*
5232 * Initialize our pool of pre-allocated kernel
5233 * virtual addresses.
5234 */
5235 page_map_offset = 0;
5236 kr = vm_map_find_space(kernel_map,
5237 &page_map_offset,
5238 VM_PAGING_NUM_PAGES * PAGE_SIZE,
5239 0,
5240 0,
5241 &map_entry);
5242 if (kr != KERN_SUCCESS) {
5243 panic("vm_paging_map_init: kernel_map full\n");
5244 }
5245 map_entry->object.vm_object = kernel_object;
5246 map_entry->offset =
5247 page_map_offset - VM_MIN_KERNEL_ADDRESS;
5248 vm_object_reference(kernel_object);
5249 vm_map_unlock(kernel_map);
5250
5251 assert(vm_paging_base_address == 0);
5252 vm_paging_base_address = page_map_offset;
5253 }
5254
5255 /*
5256 * ENCRYPTED SWAP:
5257 * vm_paging_map_object:
5258 * Maps part of a VM object's pages in the kernel
5259 * virtual address space, using the pre-allocated
5260 * kernel virtual addresses, if possible.
5261 * Context:
5262 * The VM object is locked. This lock will get
5263 * dropped and re-acquired though, so the caller
5264 * must make sure the VM object is kept alive
5265 * (by holding a VM map that has a reference
5266 * on it, for example, or taking an extra reference).
5267 * The page should also be kept busy to prevent
5268 * it from being reclaimed.
5269 */
5270 kern_return_t
5271 vm_paging_map_object(
5272 vm_map_offset_t *address,
5273 vm_page_t page,
5274 vm_object_t object,
5275 vm_object_offset_t offset,
5276 vm_map_size_t *size,
5277 boolean_t can_unlock_object)
5278 {
5279 kern_return_t kr;
5280 vm_map_offset_t page_map_offset;
5281 vm_map_size_t map_size;
5282 vm_object_offset_t object_offset;
5283 int i;
5284
5285
5286 if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
5287 assert(page->busy);
5288 /*
5289 * Use one of the pre-allocated kernel virtual addresses
5290 * and just enter the VM page in the kernel address space
5291 * at that virtual address.
5292 */
5293 simple_lock(&vm_paging_lock);
5294
5295 /*
5296 * Try and find an available kernel virtual address
5297 * from our pre-allocated pool.
5298 */
5299 page_map_offset = 0;
5300 for (;;) {
5301 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
5302 if (vm_paging_page_inuse[i] == FALSE) {
5303 page_map_offset =
5304 vm_paging_base_address +
5305 (i * PAGE_SIZE);
5306 break;
5307 }
5308 }
5309 if (page_map_offset != 0) {
5310 /* found a space to map our page ! */
5311 break;
5312 }
5313
5314 if (can_unlock_object) {
5315 /*
5316 * If we can afford to unlock the VM object,
5317 * let's take the slow path now...
5318 */
5319 break;
5320 }
5321 /*
5322 * We can't afford to unlock the VM object, so
5323 * let's wait for a space to become available...
5324 */
5325 vm_paging_page_waiter_total++;
5326 vm_paging_page_waiter++;
5327 thread_sleep_fast_usimple_lock(&vm_paging_page_waiter,
5328 &vm_paging_lock,
5329 THREAD_UNINT);
5330 vm_paging_page_waiter--;
5331 /* ... and try again */
5332 }
5333
5334 if (page_map_offset != 0) {
5335 /*
5336 * We found a kernel virtual address;
5337 * map the physical page to that virtual address.
5338 */
5339 if (i > vm_paging_max_index) {
5340 vm_paging_max_index = i;
5341 }
5342 vm_paging_page_inuse[i] = TRUE;
5343 simple_unlock(&vm_paging_lock);
5344
5345 if (page->pmapped == FALSE) {
5346 pmap_sync_page_data_phys(page->phys_page);
5347 }
5348 page->pmapped = TRUE;
5349
5350 /*
5351 * Keep the VM object locked over the PMAP_ENTER
5352 * and the actual use of the page by the kernel,
5353 * or this pmap mapping might get undone by a
5354 * vm_object_pmap_protect() call...
5355 */
5356 PMAP_ENTER(kernel_pmap,
5357 page_map_offset,
5358 page,
5359 VM_PROT_DEFAULT,
5360 ((int) page->object->wimg_bits &
5361 VM_WIMG_MASK),
5362 TRUE);
5363 vm_paging_objects_mapped++;
5364 vm_paging_pages_mapped++;
5365 *address = page_map_offset;
5366
5367 /* all done and mapped, ready to use ! */
5368 return KERN_SUCCESS;
5369 }
5370
5371 /*
5372 * We ran out of pre-allocated kernel virtual
5373 * addresses. Just map the page in the kernel
5374 * the slow and regular way.
5375 */
5376 vm_paging_no_kernel_page++;
5377 simple_unlock(&vm_paging_lock);
5378 }
5379
5380 if (! can_unlock_object) {
5381 return KERN_NOT_SUPPORTED;
5382 }
5383
5384 object_offset = vm_object_trunc_page(offset);
5385 map_size = vm_map_round_page(*size);
5386
5387 /*
5388 * Try and map the required range of the object
5389 * in the kernel_map
5390 */
5391
5392 vm_object_reference_locked(object); /* for the map entry */
5393 vm_object_unlock(object);
5394
5395 kr = vm_map_enter(kernel_map,
5396 address,
5397 map_size,
5398 0,
5399 VM_FLAGS_ANYWHERE,
5400 object,
5401 object_offset,
5402 FALSE,
5403 VM_PROT_DEFAULT,
5404 VM_PROT_ALL,
5405 VM_INHERIT_NONE);
5406 if (kr != KERN_SUCCESS) {
5407 *address = 0;
5408 *size = 0;
5409 vm_object_deallocate(object); /* for the map entry */
5410 vm_object_lock(object);
5411 return kr;
5412 }
5413
5414 *size = map_size;
5415
5416 /*
5417 * Enter the mapped pages in the page table now.
5418 */
5419 vm_object_lock(object);
5420 /*
5421 * VM object must be kept locked from before PMAP_ENTER()
5422 * until after the kernel is done accessing the page(s).
5423 * Otherwise, the pmap mappings in the kernel could be
5424 * undone by a call to vm_object_pmap_protect().
5425 */
5426
5427 for (page_map_offset = 0;
5428 map_size != 0;
5429 map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
5430 unsigned int cache_attr;
5431
5432 page = vm_page_lookup(object, offset + page_map_offset);
5433 if (page == VM_PAGE_NULL) {
5434 printf("vm_paging_map_object: no page !?");
5435 vm_object_unlock(object);
5436 kr = vm_map_remove(kernel_map, *address, *size,
5437 VM_MAP_NO_FLAGS);
5438 assert(kr == KERN_SUCCESS);
5439 *address = 0;
5440 *size = 0;
5441 vm_object_lock(object);
5442 return KERN_MEMORY_ERROR;
5443 }
5444 if (page->pmapped == FALSE) {
5445 pmap_sync_page_data_phys(page->phys_page);
5446 }
5447 page->pmapped = TRUE;
5448 page->wpmapped = TRUE;
5449 cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
5450
5451 //assert(pmap_verify_free(page->phys_page));
5452 PMAP_ENTER(kernel_pmap,
5453 *address + page_map_offset,
5454 page,
5455 VM_PROT_DEFAULT,
5456 cache_attr,
5457 TRUE);
5458 }
5459
5460 vm_paging_objects_mapped_slow++;
5461 vm_paging_pages_mapped_slow += map_size / PAGE_SIZE_64;
5462
5463 return KERN_SUCCESS;
5464 }
5465
5466 /*
5467 * ENCRYPTED SWAP:
5468 * vm_paging_unmap_object:
5469 * Unmaps part of a VM object's pages from the kernel
5470 * virtual address space.
5471 * Context:
5472 * The VM object is locked. This lock will get
5473 * dropped and re-acquired though.
5474 */
5475 void
5476 vm_paging_unmap_object(
5477 vm_object_t object,
5478 vm_map_offset_t start,
5479 vm_map_offset_t end)
5480 {
5481 kern_return_t kr;
5482 int i;
5483
5484 if ((vm_paging_base_address == 0) ||
5485 (start < vm_paging_base_address) ||
5486 (end > (vm_paging_base_address
5487 + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
5488 /*
5489 * We didn't use our pre-allocated pool of
5490 * kernel virtual address. Deallocate the
5491 * virtual memory.
5492 */
5493 if (object != VM_OBJECT_NULL) {
5494 vm_object_unlock(object);
5495 }
5496 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
5497 if (object != VM_OBJECT_NULL) {
5498 vm_object_lock(object);
5499 }
5500 assert(kr == KERN_SUCCESS);
5501 } else {
5502 /*
5503 * We used a kernel virtual address from our
5504 * pre-allocated pool. Put it back in the pool
5505 * for next time.
5506 */
5507 assert(end - start == PAGE_SIZE);
5508 i = (start - vm_paging_base_address) >> PAGE_SHIFT;
5509
5510 /* undo the pmap mapping */
5511 pmap_remove(kernel_pmap, start, end);
5512
5513 simple_lock(&vm_paging_lock);
5514 vm_paging_page_inuse[i] = FALSE;
5515 if (vm_paging_page_waiter) {
5516 thread_wakeup(&vm_paging_page_waiter);
5517 }
5518 simple_unlock(&vm_paging_lock);
5519 }
5520 }
5521
5522 #if CRYPTO
5523 /*
5524 * Encryption data.
5525 * "iv" is the "initial vector". Ideally, we want to
5526 * have a different one for each page we encrypt, so that
5527 * crackers can't find encryption patterns too easily.
5528 */
5529 #define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
5530 boolean_t swap_crypt_ctx_initialized = FALSE;
5531 aes_32t swap_crypt_key[8]; /* big enough for a 256 key */
5532 aes_ctx swap_crypt_ctx;
5533 const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
5534
5535 #if DEBUG
5536 boolean_t swap_crypt_ctx_tested = FALSE;
5537 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
5538 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
5539 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
5540 #endif /* DEBUG */
5541
5542 extern u_long random(void);
5543
5544 /*
5545 * Initialize the encryption context: key and key size.
5546 */
5547 void swap_crypt_ctx_initialize(void); /* forward */
5548 void
5549 swap_crypt_ctx_initialize(void)
5550 {
5551 unsigned int i;
5552
5553 /*
5554 * No need for locking to protect swap_crypt_ctx_initialized
5555 * because the first use of encryption will come from the
5556 * pageout thread (we won't pagein before there's been a pageout)
5557 * and there's only one pageout thread.
5558 */
5559 if (swap_crypt_ctx_initialized == FALSE) {
5560 for (i = 0;
5561 i < (sizeof (swap_crypt_key) /
5562 sizeof (swap_crypt_key[0]));
5563 i++) {
5564 swap_crypt_key[i] = random();
5565 }
5566 aes_encrypt_key((const unsigned char *) swap_crypt_key,
5567 SWAP_CRYPT_AES_KEY_SIZE,
5568 &swap_crypt_ctx.encrypt);
5569 aes_decrypt_key((const unsigned char *) swap_crypt_key,
5570 SWAP_CRYPT_AES_KEY_SIZE,
5571 &swap_crypt_ctx.decrypt);
5572 swap_crypt_ctx_initialized = TRUE;
5573 }
5574
5575 #if DEBUG
5576 /*
5577 * Validate the encryption algorithms.
5578 */
5579 if (swap_crypt_ctx_tested == FALSE) {
5580 /* initialize */
5581 for (i = 0; i < 4096; i++) {
5582 swap_crypt_test_page_ref[i] = (char) i;
5583 }
5584 /* encrypt */
5585 aes_encrypt_cbc(swap_crypt_test_page_ref,
5586 swap_crypt_null_iv,
5587 PAGE_SIZE / AES_BLOCK_SIZE,
5588 swap_crypt_test_page_encrypt,
5589 &swap_crypt_ctx.encrypt);
5590 /* decrypt */
5591 aes_decrypt_cbc(swap_crypt_test_page_encrypt,
5592 swap_crypt_null_iv,
5593 PAGE_SIZE / AES_BLOCK_SIZE,
5594 swap_crypt_test_page_decrypt,
5595 &swap_crypt_ctx.decrypt);
5596 /* compare result with original */
5597 for (i = 0; i < 4096; i ++) {
5598 if (swap_crypt_test_page_decrypt[i] !=
5599 swap_crypt_test_page_ref[i]) {
5600 panic("encryption test failed");
5601 }
5602 }
5603
5604 /* encrypt again */
5605 aes_encrypt_cbc(swap_crypt_test_page_decrypt,
5606 swap_crypt_null_iv,
5607 PAGE_SIZE / AES_BLOCK_SIZE,
5608 swap_crypt_test_page_decrypt,
5609 &swap_crypt_ctx.encrypt);
5610 /* decrypt in place */
5611 aes_decrypt_cbc(swap_crypt_test_page_decrypt,
5612 swap_crypt_null_iv,
5613 PAGE_SIZE / AES_BLOCK_SIZE,
5614 swap_crypt_test_page_decrypt,
5615 &swap_crypt_ctx.decrypt);
5616 for (i = 0; i < 4096; i ++) {
5617 if (swap_crypt_test_page_decrypt[i] !=
5618 swap_crypt_test_page_ref[i]) {
5619 panic("in place encryption test failed");
5620 }
5621 }
5622
5623 swap_crypt_ctx_tested = TRUE;
5624 }
5625 #endif /* DEBUG */
5626 }
5627
5628 /*
5629 * ENCRYPTED SWAP:
5630 * vm_page_encrypt:
5631 * Encrypt the given page, for secure paging.
5632 * The page might already be mapped at kernel virtual
5633 * address "kernel_mapping_offset". Otherwise, we need
5634 * to map it.
5635 *
5636 * Context:
5637 * The page's object is locked, but this lock will be released
5638 * and re-acquired.
5639 * The page is busy and not accessible by users (not entered in any pmap).
5640 */
5641 void
5642 vm_page_encrypt(
5643 vm_page_t page,
5644 vm_map_offset_t kernel_mapping_offset)
5645 {
5646 kern_return_t kr;
5647 vm_map_size_t kernel_mapping_size;
5648 vm_offset_t kernel_vaddr;
5649 union {
5650 unsigned char aes_iv[AES_BLOCK_SIZE];
5651 struct {
5652 memory_object_t pager_object;
5653 vm_object_offset_t paging_offset;
5654 } vm;
5655 } encrypt_iv;
5656
5657 if (! vm_pages_encrypted) {
5658 vm_pages_encrypted = TRUE;
5659 }
5660
5661 assert(page->busy);
5662 assert(page->dirty || page->precious);
5663
5664 if (page->encrypted) {
5665 /*
5666 * Already encrypted: no need to do it again.
5667 */
5668 vm_page_encrypt_already_encrypted_counter++;
5669 return;
5670 }
5671 ASSERT_PAGE_DECRYPTED(page);
5672
5673 /*
5674 * Take a paging-in-progress reference to keep the object
5675 * alive even if we have to unlock it (in vm_paging_map_object()
5676 * for example)...
5677 */
5678 vm_object_paging_begin(page->object);
5679
5680 if (kernel_mapping_offset == 0) {
5681 /*
5682 * The page hasn't already been mapped in kernel space
5683 * by the caller. Map it now, so that we can access
5684 * its contents and encrypt them.
5685 */
5686 kernel_mapping_size = PAGE_SIZE;
5687 kr = vm_paging_map_object(&kernel_mapping_offset,
5688 page,
5689 page->object,
5690 page->offset,
5691 &kernel_mapping_size,
5692 FALSE);
5693 if (kr != KERN_SUCCESS) {
5694 panic("vm_page_encrypt: "
5695 "could not map page in kernel: 0x%x\n",
5696 kr);
5697 }
5698 } else {
5699 kernel_mapping_size = 0;
5700 }
5701 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
5702
5703 if (swap_crypt_ctx_initialized == FALSE) {
5704 swap_crypt_ctx_initialize();
5705 }
5706 assert(swap_crypt_ctx_initialized);
5707
5708 /*
5709 * Prepare an "initial vector" for the encryption.
5710 * We use the "pager" and the "paging_offset" for that
5711 * page to obfuscate the encrypted data a bit more and
5712 * prevent crackers from finding patterns that they could
5713 * use to break the key.
5714 */
5715 bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
5716 encrypt_iv.vm.pager_object = page->object->pager;
5717 encrypt_iv.vm.paging_offset =
5718 page->object->paging_offset + page->offset;
5719
5720 /* encrypt the "initial vector" */
5721 aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
5722 swap_crypt_null_iv,
5723 1,
5724 &encrypt_iv.aes_iv[0],
5725 &swap_crypt_ctx.encrypt);
5726
5727 /*
5728 * Encrypt the page.
5729 */
5730 aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
5731 &encrypt_iv.aes_iv[0],
5732 PAGE_SIZE / AES_BLOCK_SIZE,
5733 (unsigned char *) kernel_vaddr,
5734 &swap_crypt_ctx.encrypt);
5735
5736 vm_page_encrypt_counter++;
5737
5738 /*
5739 * Unmap the page from the kernel's address space,
5740 * if we had to map it ourselves. Otherwise, let
5741 * the caller undo the mapping if needed.
5742 */
5743 if (kernel_mapping_size != 0) {
5744 vm_paging_unmap_object(page->object,
5745 kernel_mapping_offset,
5746 kernel_mapping_offset + kernel_mapping_size);
5747 }
5748
5749 /*
5750 * Clear the "reference" and "modified" bits.
5751 * This should clean up any impact the encryption had
5752 * on them.
5753 * The page was kept busy and disconnected from all pmaps,
5754 * so it can't have been referenced or modified from user
5755 * space.
5756 * The software bits will be reset later after the I/O
5757 * has completed (in upl_commit_range()).
5758 */
5759 pmap_clear_refmod(page->phys_page, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
5760
5761 page->encrypted = TRUE;
5762
5763 vm_object_paging_end(page->object);
5764 }
5765
5766 /*
5767 * ENCRYPTED SWAP:
5768 * vm_page_decrypt:
5769 * Decrypt the given page.
5770 * The page might already be mapped at kernel virtual
5771 * address "kernel_mapping_offset". Otherwise, we need
5772 * to map it.
5773 *
5774 * Context:
5775 * The page's VM object is locked but will be unlocked and relocked.
5776 * The page is busy and not accessible by users (not entered in any pmap).
5777 */
5778 void
5779 vm_page_decrypt(
5780 vm_page_t page,
5781 vm_map_offset_t kernel_mapping_offset)
5782 {
5783 kern_return_t kr;
5784 vm_map_size_t kernel_mapping_size;
5785 vm_offset_t kernel_vaddr;
5786 union {
5787 unsigned char aes_iv[AES_BLOCK_SIZE];
5788 struct {
5789 memory_object_t pager_object;
5790 vm_object_offset_t paging_offset;
5791 } vm;
5792 } decrypt_iv;
5793
5794 assert(page->busy);
5795 assert(page->encrypted);
5796
5797 /*
5798 * Take a paging-in-progress reference to keep the object
5799 * alive even if we have to unlock it (in vm_paging_map_object()
5800 * for example)...
5801 */
5802 vm_object_paging_begin(page->object);
5803
5804 if (kernel_mapping_offset == 0) {
5805 /*
5806 * The page hasn't already been mapped in kernel space
5807 * by the caller. Map it now, so that we can access
5808 * its contents and decrypt them.
5809 */
5810 kernel_mapping_size = PAGE_SIZE;
5811 kr = vm_paging_map_object(&kernel_mapping_offset,
5812 page,
5813 page->object,
5814 page->offset,
5815 &kernel_mapping_size,
5816 FALSE);
5817 if (kr != KERN_SUCCESS) {
5818 panic("vm_page_decrypt: "
5819 "could not map page in kernel: 0x%x\n",
5820 kr);
5821 }
5822 } else {
5823 kernel_mapping_size = 0;
5824 }
5825 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
5826
5827 assert(swap_crypt_ctx_initialized);
5828
5829 /*
5830 * Prepare an "initial vector" for the decryption.
5831 * It has to be the same as the "initial vector" we
5832 * used to encrypt that page.
5833 */
5834 bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
5835 decrypt_iv.vm.pager_object = page->object->pager;
5836 decrypt_iv.vm.paging_offset =
5837 page->object->paging_offset + page->offset;
5838
5839 /* encrypt the "initial vector" */
5840 aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
5841 swap_crypt_null_iv,
5842 1,
5843 &decrypt_iv.aes_iv[0],
5844 &swap_crypt_ctx.encrypt);
5845
5846 /*
5847 * Decrypt the page.
5848 */
5849 aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
5850 &decrypt_iv.aes_iv[0],
5851 PAGE_SIZE / AES_BLOCK_SIZE,
5852 (unsigned char *) kernel_vaddr,
5853 &swap_crypt_ctx.decrypt);
5854 vm_page_decrypt_counter++;
5855
5856 /*
5857 * Unmap the page from the kernel's address space,
5858 * if we had to map it ourselves. Otherwise, let
5859 * the caller undo the mapping if needed.
5860 */
5861 if (kernel_mapping_size != 0) {
5862 vm_paging_unmap_object(page->object,
5863 kernel_vaddr,
5864 kernel_vaddr + PAGE_SIZE);
5865 }
5866
5867 /*
5868 * After decryption, the page is actually clean.
5869 * It was encrypted as part of paging, which "cleans"
5870 * the "dirty" pages.
5871 * Noone could access it after it was encrypted
5872 * and the decryption doesn't count.
5873 */
5874 page->dirty = FALSE;
5875 if (page->cs_validated && !page->cs_tainted) {
5876 /*
5877 * CODE SIGNING:
5878 * This page is no longer dirty
5879 * but could have been modified,
5880 * so it will need to be
5881 * re-validated.
5882 */
5883 page->cs_validated = FALSE;
5884 vm_cs_validated_resets++;
5885 }
5886 pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
5887
5888 page->encrypted = FALSE;
5889
5890 /*
5891 * We've just modified the page's contents via the data cache and part
5892 * of the new contents might still be in the cache and not yet in RAM.
5893 * Since the page is now available and might get gathered in a UPL to
5894 * be part of a DMA transfer from a driver that expects the memory to
5895 * be coherent at this point, we have to flush the data cache.
5896 */
5897 pmap_sync_page_attributes_phys(page->phys_page);
5898 /*
5899 * Since the page is not mapped yet, some code might assume that it
5900 * doesn't need to invalidate the instruction cache when writing to
5901 * that page. That code relies on "pmapped" being FALSE, so that the
5902 * caches get synchronized when the page is first mapped.
5903 */
5904 assert(pmap_verify_free(page->phys_page));
5905 page->pmapped = FALSE;
5906 page->wpmapped = FALSE;
5907
5908 vm_object_paging_end(page->object);
5909 }
5910
5911 unsigned long upl_encrypt_upls = 0;
5912 unsigned long upl_encrypt_pages = 0;
5913
5914 /*
5915 * ENCRYPTED SWAP:
5916 *
5917 * upl_encrypt:
5918 * Encrypts all the pages in the UPL, within the specified range.
5919 *
5920 */
5921 void
5922 upl_encrypt(
5923 upl_t upl,
5924 upl_offset_t crypt_offset,
5925 upl_size_t crypt_size)
5926 {
5927 upl_size_t upl_size;
5928 upl_offset_t upl_offset;
5929 vm_object_t upl_object;
5930 vm_page_t page;
5931 vm_object_t shadow_object;
5932 vm_object_offset_t shadow_offset;
5933 vm_object_offset_t paging_offset;
5934 vm_object_offset_t base_offset;
5935
5936 upl_encrypt_upls++;
5937 upl_encrypt_pages += crypt_size / PAGE_SIZE;
5938
5939 upl_object = upl->map_object;
5940 upl_offset = upl->offset;
5941 upl_size = upl->size;
5942
5943 vm_object_lock(upl_object);
5944
5945 /*
5946 * Find the VM object that contains the actual pages.
5947 */
5948 if (upl_object->pageout) {
5949 shadow_object = upl_object->shadow;
5950 /*
5951 * The offset in the shadow object is actually also
5952 * accounted for in upl->offset. It possibly shouldn't be
5953 * this way, but for now don't account for it twice.
5954 */
5955 shadow_offset = 0;
5956 assert(upl_object->paging_offset == 0); /* XXX ? */
5957 vm_object_lock(shadow_object);
5958 } else {
5959 shadow_object = upl_object;
5960 shadow_offset = 0;
5961 }
5962
5963 paging_offset = shadow_object->paging_offset;
5964 vm_object_paging_begin(shadow_object);
5965
5966 if (shadow_object != upl_object)
5967 vm_object_unlock(upl_object);
5968
5969
5970 base_offset = shadow_offset;
5971 base_offset += upl_offset;
5972 base_offset += crypt_offset;
5973 base_offset -= paging_offset;
5974
5975 assert(crypt_offset + crypt_size <= upl_size);
5976
5977 for (upl_offset = 0;
5978 upl_offset < crypt_size;
5979 upl_offset += PAGE_SIZE) {
5980 page = vm_page_lookup(shadow_object,
5981 base_offset + upl_offset);
5982 if (page == VM_PAGE_NULL) {
5983 panic("upl_encrypt: "
5984 "no page for (obj=%p,off=%lld+%d)!\n",
5985 shadow_object,
5986 base_offset,
5987 upl_offset);
5988 }
5989 /*
5990 * Disconnect the page from all pmaps, so that nobody can
5991 * access it while it's encrypted. After that point, all
5992 * accesses to this page will cause a page fault and block
5993 * while the page is busy being encrypted. After the
5994 * encryption completes, any access will cause a
5995 * page fault and the page gets decrypted at that time.
5996 */
5997 pmap_disconnect(page->phys_page);
5998 vm_page_encrypt(page, 0);
5999
6000 if (shadow_object == vm_pageout_scan_wants_object) {
6001 /*
6002 * Give vm_pageout_scan() a chance to convert more
6003 * pages from "clean-in-place" to "clean-and-free",
6004 * if it's interested in the same pages we selected
6005 * in this cluster.
6006 */
6007 vm_object_unlock(shadow_object);
6008 vm_object_lock(shadow_object);
6009 }
6010 }
6011
6012 vm_object_paging_end(shadow_object);
6013 vm_object_unlock(shadow_object);
6014 }
6015
6016 #else /* CRYPTO */
6017 void
6018 upl_encrypt(
6019 __unused upl_t upl,
6020 __unused upl_offset_t crypt_offset,
6021 __unused upl_size_t crypt_size)
6022 {
6023 }
6024
6025 void
6026 vm_page_encrypt(
6027 __unused vm_page_t page,
6028 __unused vm_map_offset_t kernel_mapping_offset)
6029 {
6030 }
6031
6032 void
6033 vm_page_decrypt(
6034 __unused vm_page_t page,
6035 __unused vm_map_offset_t kernel_mapping_offset)
6036 {
6037 }
6038
6039 #endif /* CRYPTO */
6040
6041 vm_size_t
6042 upl_get_internal_pagelist_offset(void)
6043 {
6044 return sizeof(struct upl);
6045 }
6046
6047 void
6048 upl_clear_dirty(
6049 upl_t upl,
6050 boolean_t value)
6051 {
6052 if (value) {
6053 upl->flags |= UPL_CLEAR_DIRTY;
6054 } else {
6055 upl->flags &= ~UPL_CLEAR_DIRTY;
6056 }
6057 }
6058
6059
6060 #ifdef MACH_BSD
6061
6062 boolean_t upl_device_page(upl_page_info_t *upl)
6063 {
6064 return(UPL_DEVICE_PAGE(upl));
6065 }
6066 boolean_t upl_page_present(upl_page_info_t *upl, int index)
6067 {
6068 return(UPL_PAGE_PRESENT(upl, index));
6069 }
6070 boolean_t upl_speculative_page(upl_page_info_t *upl, int index)
6071 {
6072 return(UPL_SPECULATIVE_PAGE(upl, index));
6073 }
6074 boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
6075 {
6076 return(UPL_DIRTY_PAGE(upl, index));
6077 }
6078 boolean_t upl_valid_page(upl_page_info_t *upl, int index)
6079 {
6080 return(UPL_VALID_PAGE(upl, index));
6081 }
6082 ppnum_t upl_phys_page(upl_page_info_t *upl, int index)
6083 {
6084 return(UPL_PHYS_PAGE(upl, index));
6085 }
6086
6087
6088 void
6089 vm_countdirtypages(void)
6090 {
6091 vm_page_t m;
6092 int dpages;
6093 int pgopages;
6094 int precpages;
6095
6096
6097 dpages=0;
6098 pgopages=0;
6099 precpages=0;
6100
6101 vm_page_lock_queues();
6102 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6103 do {
6104 if (m ==(vm_page_t )0) break;
6105
6106 if(m->dirty) dpages++;
6107 if(m->pageout) pgopages++;
6108 if(m->precious) precpages++;
6109
6110 assert(m->object != kernel_object);
6111 m = (vm_page_t) queue_next(&m->pageq);
6112 if (m ==(vm_page_t )0) break;
6113
6114 } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
6115 vm_page_unlock_queues();
6116
6117 vm_page_lock_queues();
6118 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
6119 do {
6120 if (m ==(vm_page_t )0) break;
6121
6122 dpages++;
6123 assert(m->dirty);
6124 assert(!m->pageout);
6125 assert(m->object != kernel_object);
6126 m = (vm_page_t) queue_next(&m->pageq);
6127 if (m ==(vm_page_t )0) break;
6128
6129 } while (!queue_end(&vm_page_queue_throttled,(queue_entry_t) m));
6130 vm_page_unlock_queues();
6131
6132 vm_page_lock_queues();
6133 m = (vm_page_t) queue_first(&vm_page_queue_zf);
6134 do {
6135 if (m ==(vm_page_t )0) break;
6136
6137 if(m->dirty) dpages++;
6138 if(m->pageout) pgopages++;
6139 if(m->precious) precpages++;
6140
6141 assert(m->object != kernel_object);
6142 m = (vm_page_t) queue_next(&m->pageq);
6143 if (m ==(vm_page_t )0) break;
6144
6145 } while (!queue_end(&vm_page_queue_zf,(queue_entry_t) m));
6146 vm_page_unlock_queues();
6147
6148 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
6149
6150 dpages=0;
6151 pgopages=0;
6152 precpages=0;
6153
6154 vm_page_lock_queues();
6155 m = (vm_page_t) queue_first(&vm_page_queue_active);
6156
6157 do {
6158 if(m == (vm_page_t )0) break;
6159 if(m->dirty) dpages++;
6160 if(m->pageout) pgopages++;
6161 if(m->precious) precpages++;
6162
6163 assert(m->object != kernel_object);
6164 m = (vm_page_t) queue_next(&m->pageq);
6165 if(m == (vm_page_t )0) break;
6166
6167 } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
6168 vm_page_unlock_queues();
6169
6170 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
6171
6172 }
6173 #endif /* MACH_BSD */
6174
6175 ppnum_t upl_get_highest_page(
6176 upl_t upl)
6177 {
6178 return upl->highest_page;
6179 }
6180
6181 #ifdef UPL_DEBUG
6182 kern_return_t upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2)
6183 {
6184 upl->ubc_alias1 = alias1;
6185 upl->ubc_alias2 = alias2;
6186 return KERN_SUCCESS;
6187 }
6188 int upl_ubc_alias_get(upl_t upl, unsigned int * al, unsigned int * al2)
6189 {
6190 if(al)
6191 *al = upl->ubc_alias1;
6192 if(al2)
6193 *al2 = upl->ubc_alias2;
6194 return KERN_SUCCESS;
6195 }
6196 #endif /* UPL_DEBUG */
6197
6198
6199
6200 #if MACH_KDB
6201 #include <ddb/db_output.h>
6202 #include <ddb/db_print.h>
6203 #include <vm/vm_print.h>
6204
6205 #define printf kdbprintf
6206 void db_pageout(void);
6207
6208 void
6209 db_vm(void)
6210 {
6211
6212 iprintf("VM Statistics:\n");
6213 db_indent += 2;
6214 iprintf("pages:\n");
6215 db_indent += 2;
6216 iprintf("activ %5d inact %5d free %5d",
6217 vm_page_active_count, vm_page_inactive_count,
6218 vm_page_free_count);
6219 printf(" wire %5d gobbl %5d\n",
6220 vm_page_wire_count, vm_page_gobble_count);
6221 db_indent -= 2;
6222 iprintf("target:\n");
6223 db_indent += 2;
6224 iprintf("min %5d inact %5d free %5d",
6225 vm_page_free_min, vm_page_inactive_target,
6226 vm_page_free_target);
6227 printf(" resrv %5d\n", vm_page_free_reserved);
6228 db_indent -= 2;
6229 iprintf("pause:\n");
6230 db_pageout();
6231 db_indent -= 2;
6232 }
6233
6234 #if MACH_COUNTERS
6235 extern int c_laundry_pages_freed;
6236 #endif /* MACH_COUNTERS */
6237
6238 void
6239 db_pageout(void)
6240 {
6241 iprintf("Pageout Statistics:\n");
6242 db_indent += 2;
6243 iprintf("active %5d inactv %5d\n",
6244 vm_pageout_active, vm_pageout_inactive);
6245 iprintf("nolock %5d avoid %5d busy %5d absent %5d\n",
6246 vm_pageout_inactive_nolock, vm_pageout_inactive_avoid,
6247 vm_pageout_inactive_busy, vm_pageout_inactive_absent);
6248 iprintf("used %5d clean %5d dirty %5d\n",
6249 vm_pageout_inactive_used, vm_pageout_inactive_clean,
6250 vm_pageout_inactive_dirty);
6251 #if MACH_COUNTERS
6252 iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed);
6253 #endif /* MACH_COUNTERS */
6254 #if MACH_CLUSTER_STATS
6255 iprintf("Cluster Statistics:\n");
6256 db_indent += 2;
6257 iprintf("dirtied %5d cleaned %5d collisions %5d\n",
6258 vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned,
6259 vm_pageout_cluster_collisions);
6260 iprintf("clusters %5d conversions %5d\n",
6261 vm_pageout_cluster_clusters, vm_pageout_cluster_conversions);
6262 db_indent -= 2;
6263 iprintf("Target Statistics:\n");
6264 db_indent += 2;
6265 iprintf("collisions %5d page_dirtied %5d page_freed %5d\n",
6266 vm_pageout_target_collisions, vm_pageout_target_page_dirtied,
6267 vm_pageout_target_page_freed);
6268 db_indent -= 2;
6269 #endif /* MACH_CLUSTER_STATS */
6270 db_indent -= 2;
6271 }
6272
6273 #endif /* MACH_KDB */