]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_pageout.c
0f3e790a689bb5a55ac1c3df21c89711e36621ef
[apple/xnu.git] / osfmk / vm / vm_pageout.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_pageout.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * The proverbial page-out daemon.
64 */
65
66 #include <stdint.h>
67
68 #include <debug.h>
69 #include <mach_pagemap.h>
70 #include <mach_cluster_stats.h>
71 #include <mach_kdb.h>
72 #include <advisory_pageout.h>
73
74 #include <mach/mach_types.h>
75 #include <mach/memory_object.h>
76 #include <mach/memory_object_default.h>
77 #include <mach/memory_object_control_server.h>
78 #include <mach/mach_host_server.h>
79 #include <mach/upl.h>
80 #include <mach/vm_map.h>
81 #include <mach/vm_param.h>
82 #include <mach/vm_statistics.h>
83 #include <mach/sdt.h>
84
85 #include <kern/kern_types.h>
86 #include <kern/counters.h>
87 #include <kern/host_statistics.h>
88 #include <kern/machine.h>
89 #include <kern/misc_protos.h>
90 #include <kern/thread.h>
91 #include <kern/xpr.h>
92 #include <kern/kalloc.h>
93
94 #include <machine/vm_tuning.h>
95
96 #if CONFIG_EMBEDDED
97 #include <sys/kern_memorystatus.h>
98 #endif
99
100 #include <vm/pmap.h>
101 #include <vm/vm_fault.h>
102 #include <vm/vm_map.h>
103 #include <vm/vm_object.h>
104 #include <vm/vm_page.h>
105 #include <vm/vm_pageout.h>
106 #include <vm/vm_protos.h> /* must be last */
107 #include <vm/memory_object.h>
108 #include <vm/vm_purgeable_internal.h>
109
110 /*
111 * ENCRYPTED SWAP:
112 */
113 #include <../bsd/crypto/aes/aes.h>
114
115
116 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */
117 #ifdef CONFIG_EMBEDDED
118 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 2048
119 #else
120 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100
121 #endif
122 #endif
123
124 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
125 #ifdef CONFIG_EMBEDDED
126 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
127 #else
128 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
129 #endif
130 #endif
131
132 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
133 #define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
134 #endif
135
136 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
137 #define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
138 #endif
139
140 #ifndef VM_PAGE_LAUNDRY_MAX
141 #define VM_PAGE_LAUNDRY_MAX 16UL /* maximum pageouts on a given pageout queue */
142 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
143
144 #ifndef VM_PAGEOUT_BURST_WAIT
145 #define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
146 #endif /* VM_PAGEOUT_BURST_WAIT */
147
148 #ifndef VM_PAGEOUT_EMPTY_WAIT
149 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
150 #endif /* VM_PAGEOUT_EMPTY_WAIT */
151
152 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
153 #define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
154 #endif /* VM_PAGEOUT_DEADLOCK_WAIT */
155
156 #ifndef VM_PAGEOUT_IDLE_WAIT
157 #define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
158 #endif /* VM_PAGEOUT_IDLE_WAIT */
159
160 #ifndef VM_PAGE_SPECULATIVE_TARGET
161 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / 20)
162 #endif /* VM_PAGE_SPECULATIVE_TARGET */
163
164 #ifndef VM_PAGE_INACTIVE_HEALTHY_LIMIT
165 #define VM_PAGE_INACTIVE_HEALTHY_LIMIT(total) ((total) * 1 / 200)
166 #endif /* VM_PAGE_INACTIVE_HEALTHY_LIMIT */
167
168
169 /*
170 * To obtain a reasonable LRU approximation, the inactive queue
171 * needs to be large enough to give pages on it a chance to be
172 * referenced a second time. This macro defines the fraction
173 * of active+inactive pages that should be inactive.
174 * The pageout daemon uses it to update vm_page_inactive_target.
175 *
176 * If vm_page_free_count falls below vm_page_free_target and
177 * vm_page_inactive_count is below vm_page_inactive_target,
178 * then the pageout daemon starts running.
179 */
180
181 #ifndef VM_PAGE_INACTIVE_TARGET
182 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3)
183 #endif /* VM_PAGE_INACTIVE_TARGET */
184
185 /*
186 * Once the pageout daemon starts running, it keeps going
187 * until vm_page_free_count meets or exceeds vm_page_free_target.
188 */
189
190 #ifndef VM_PAGE_FREE_TARGET
191 #ifdef CONFIG_EMBEDDED
192 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 100)
193 #else
194 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
195 #endif
196 #endif /* VM_PAGE_FREE_TARGET */
197
198 /*
199 * The pageout daemon always starts running once vm_page_free_count
200 * falls below vm_page_free_min.
201 */
202
203 #ifndef VM_PAGE_FREE_MIN
204 #ifdef CONFIG_EMBEDDED
205 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 200)
206 #else
207 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
208 #endif
209 #endif /* VM_PAGE_FREE_MIN */
210
211 #define VM_PAGE_FREE_MIN_LIMIT 1500
212 #define VM_PAGE_FREE_TARGET_LIMIT 2000
213
214
215 /*
216 * When vm_page_free_count falls below vm_page_free_reserved,
217 * only vm-privileged threads can allocate pages. vm-privilege
218 * allows the pageout daemon and default pager (and any other
219 * associated threads needed for default pageout) to continue
220 * operation by dipping into the reserved pool of pages.
221 */
222
223 #ifndef VM_PAGE_FREE_RESERVED
224 #define VM_PAGE_FREE_RESERVED(n) \
225 ((6 * VM_PAGE_LAUNDRY_MAX) + (n))
226 #endif /* VM_PAGE_FREE_RESERVED */
227
228 /*
229 * When we dequeue pages from the inactive list, they are
230 * reactivated (ie, put back on the active queue) if referenced.
231 * However, it is possible to starve the free list if other
232 * processors are referencing pages faster than we can turn off
233 * the referenced bit. So we limit the number of reactivations
234 * we will make per call of vm_pageout_scan().
235 */
236 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
237 #ifndef VM_PAGE_REACTIVATE_LIMIT
238 #ifdef CONFIG_EMBEDDED
239 #define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
240 #else
241 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
242 #endif
243 #endif /* VM_PAGE_REACTIVATE_LIMIT */
244 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 100
245
246
247 /*
248 * must hold the page queues lock to
249 * manipulate this structure
250 */
251 struct vm_pageout_queue {
252 queue_head_t pgo_pending; /* laundry pages to be processed by pager's iothread */
253 unsigned int pgo_laundry; /* current count of laundry pages on queue or in flight */
254 unsigned int pgo_maxlaundry;
255
256 unsigned int pgo_idle:1, /* iothread is blocked waiting for work to do */
257 pgo_busy:1, /* iothread is currently processing request from pgo_pending */
258 pgo_throttled:1,/* vm_pageout_scan thread needs a wakeup when pgo_laundry drops */
259 :0;
260 };
261
262 #define VM_PAGE_Q_THROTTLED(q) \
263 ((q)->pgo_laundry >= (q)->pgo_maxlaundry)
264
265
266 /*
267 * Exported variable used to broadcast the activation of the pageout scan
268 * Working Set uses this to throttle its use of pmap removes. In this
269 * way, code which runs within memory in an uncontested context does
270 * not keep encountering soft faults.
271 */
272
273 unsigned int vm_pageout_scan_event_counter = 0;
274
275 /*
276 * Forward declarations for internal routines.
277 */
278
279 static void vm_pageout_garbage_collect(int);
280 static void vm_pageout_iothread_continue(struct vm_pageout_queue *);
281 static void vm_pageout_iothread_external(void);
282 static void vm_pageout_iothread_internal(void);
283 static void vm_pageout_queue_steal(vm_page_t);
284
285 extern void vm_pageout_continue(void);
286 extern void vm_pageout_scan(void);
287
288 static thread_t vm_pageout_external_iothread = THREAD_NULL;
289 static thread_t vm_pageout_internal_iothread = THREAD_NULL;
290
291 unsigned int vm_pageout_reserved_internal = 0;
292 unsigned int vm_pageout_reserved_really = 0;
293
294 unsigned int vm_pageout_idle_wait = 0; /* milliseconds */
295 unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
296 unsigned int vm_pageout_burst_wait = 0; /* milliseconds */
297 unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */
298 unsigned int vm_pageout_deadlock_relief = 0;
299 unsigned int vm_pageout_inactive_relief = 0;
300 unsigned int vm_pageout_burst_active_throttle = 0;
301 unsigned int vm_pageout_burst_inactive_throttle = 0;
302
303 /*
304 * Protection against zero fill flushing live working sets derived
305 * from existing backing store and files
306 */
307 unsigned int vm_accellerate_zf_pageout_trigger = 400;
308 unsigned int zf_queue_min_count = 100;
309 unsigned int vm_zf_count = 0;
310 unsigned int vm_zf_queue_count = 0;
311
312 /*
313 * These variables record the pageout daemon's actions:
314 * how many pages it looks at and what happens to those pages.
315 * No locking needed because only one thread modifies the variables.
316 */
317
318 unsigned int vm_pageout_active = 0; /* debugging */
319 unsigned int vm_pageout_inactive = 0; /* debugging */
320 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
321 unsigned int vm_pageout_inactive_forced = 0; /* debugging */
322 unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
323 unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
324 unsigned int vm_pageout_inactive_busy = 0; /* debugging */
325 unsigned int vm_pageout_inactive_absent = 0; /* debugging */
326 unsigned int vm_pageout_inactive_used = 0; /* debugging */
327 unsigned int vm_pageout_inactive_clean = 0; /* debugging */
328 unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
329 unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
330 unsigned int vm_pageout_purged_objects = 0; /* debugging */
331 unsigned int vm_stat_discard = 0; /* debugging */
332 unsigned int vm_stat_discard_sent = 0; /* debugging */
333 unsigned int vm_stat_discard_failure = 0; /* debugging */
334 unsigned int vm_stat_discard_throttle = 0; /* debugging */
335 unsigned int vm_pageout_reactivation_limit_exceeded = 0; /* debugging */
336 unsigned int vm_pageout_catch_ups = 0; /* debugging */
337 unsigned int vm_pageout_inactive_force_reclaim = 0; /* debugging */
338
339 unsigned int vm_pageout_scan_active_throttled = 0;
340 unsigned int vm_pageout_scan_inactive_throttled = 0;
341 unsigned int vm_pageout_scan_throttle = 0; /* debugging */
342 unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */
343 unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */
344 unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */
345 unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */
346 unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */
347 /*
348 * Backing store throttle when BS is exhausted
349 */
350 unsigned int vm_backing_store_low = 0;
351
352 unsigned int vm_pageout_out_of_line = 0;
353 unsigned int vm_pageout_in_place = 0;
354
355 /*
356 * ENCRYPTED SWAP:
357 * counters and statistics...
358 */
359 unsigned long vm_page_decrypt_counter = 0;
360 unsigned long vm_page_decrypt_for_upl_counter = 0;
361 unsigned long vm_page_encrypt_counter = 0;
362 unsigned long vm_page_encrypt_abort_counter = 0;
363 unsigned long vm_page_encrypt_already_encrypted_counter = 0;
364 boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */
365
366 struct vm_pageout_queue vm_pageout_queue_internal;
367 struct vm_pageout_queue vm_pageout_queue_external;
368
369 unsigned int vm_page_speculative_target = 0;
370
371 vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
372
373 unsigned long vm_cs_validated_resets = 0;
374
375 /*
376 * Routine: vm_backing_store_disable
377 * Purpose:
378 * Suspend non-privileged threads wishing to extend
379 * backing store when we are low on backing store
380 * (Synchronized by caller)
381 */
382 void
383 vm_backing_store_disable(
384 boolean_t disable)
385 {
386 if(disable) {
387 vm_backing_store_low = 1;
388 } else {
389 if(vm_backing_store_low) {
390 vm_backing_store_low = 0;
391 thread_wakeup((event_t) &vm_backing_store_low);
392 }
393 }
394 }
395
396
397 #if MACH_CLUSTER_STATS
398 unsigned long vm_pageout_cluster_dirtied = 0;
399 unsigned long vm_pageout_cluster_cleaned = 0;
400 unsigned long vm_pageout_cluster_collisions = 0;
401 unsigned long vm_pageout_cluster_clusters = 0;
402 unsigned long vm_pageout_cluster_conversions = 0;
403 unsigned long vm_pageout_target_collisions = 0;
404 unsigned long vm_pageout_target_page_dirtied = 0;
405 unsigned long vm_pageout_target_page_freed = 0;
406 #define CLUSTER_STAT(clause) clause
407 #else /* MACH_CLUSTER_STATS */
408 #define CLUSTER_STAT(clause)
409 #endif /* MACH_CLUSTER_STATS */
410
411 /*
412 * Routine: vm_pageout_object_terminate
413 * Purpose:
414 * Destroy the pageout_object, and perform all of the
415 * required cleanup actions.
416 *
417 * In/Out conditions:
418 * The object must be locked, and will be returned locked.
419 */
420 void
421 vm_pageout_object_terminate(
422 vm_object_t object)
423 {
424 vm_object_t shadow_object;
425
426 /*
427 * Deal with the deallocation (last reference) of a pageout object
428 * (used for cleaning-in-place) by dropping the paging references/
429 * freeing pages in the original object.
430 */
431
432 assert(object->pageout);
433 shadow_object = object->shadow;
434 vm_object_lock(shadow_object);
435
436 while (!queue_empty(&object->memq)) {
437 vm_page_t p, m;
438 vm_object_offset_t offset;
439
440 p = (vm_page_t) queue_first(&object->memq);
441
442 assert(p->private);
443 assert(p->pageout);
444 p->pageout = FALSE;
445 assert(!p->cleaning);
446
447 offset = p->offset;
448 VM_PAGE_FREE(p);
449 p = VM_PAGE_NULL;
450
451 m = vm_page_lookup(shadow_object,
452 offset + object->shadow_offset);
453
454 if(m == VM_PAGE_NULL)
455 continue;
456 assert(m->cleaning);
457 /* used as a trigger on upl_commit etc to recognize the */
458 /* pageout daemon's subseqent desire to pageout a cleaning */
459 /* page. When the bit is on the upl commit code will */
460 /* respect the pageout bit in the target page over the */
461 /* caller's page list indication */
462 m->dump_cleaning = FALSE;
463
464 assert((m->dirty) || (m->precious) ||
465 (m->busy && m->cleaning));
466
467 /*
468 * Handle the trusted pager throttle.
469 * Also decrement the burst throttle (if external).
470 */
471 vm_page_lock_queues();
472 if (m->laundry) {
473 vm_pageout_throttle_up(m);
474 }
475
476 /*
477 * Handle the "target" page(s). These pages are to be freed if
478 * successfully cleaned. Target pages are always busy, and are
479 * wired exactly once. The initial target pages are not mapped,
480 * (so cannot be referenced or modified) but converted target
481 * pages may have been modified between the selection as an
482 * adjacent page and conversion to a target.
483 */
484 if (m->pageout) {
485 assert(m->busy);
486 assert(m->wire_count == 1);
487 m->cleaning = FALSE;
488 m->encrypted_cleaning = FALSE;
489 m->pageout = FALSE;
490 #if MACH_CLUSTER_STATS
491 if (m->wanted) vm_pageout_target_collisions++;
492 #endif
493 /*
494 * Revoke all access to the page. Since the object is
495 * locked, and the page is busy, this prevents the page
496 * from being dirtied after the pmap_disconnect() call
497 * returns.
498 *
499 * Since the page is left "dirty" but "not modifed", we
500 * can detect whether the page was redirtied during
501 * pageout by checking the modify state.
502 */
503 if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
504 m->dirty = TRUE;
505 else
506 m->dirty = FALSE;
507
508 if (m->dirty) {
509 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
510 vm_page_unwire(m);/* reactivates */
511 VM_STAT_INCR(reactivations);
512 PAGE_WAKEUP_DONE(m);
513 } else {
514 CLUSTER_STAT(vm_pageout_target_page_freed++;)
515 vm_page_free(m);/* clears busy, etc. */
516 }
517 vm_page_unlock_queues();
518 continue;
519 }
520 /*
521 * Handle the "adjacent" pages. These pages were cleaned in
522 * place, and should be left alone.
523 * If prep_pin_count is nonzero, then someone is using the
524 * page, so make it active.
525 */
526 if (!m->active && !m->inactive && !m->throttled && !m->private) {
527 if (m->reference)
528 vm_page_activate(m);
529 else
530 vm_page_deactivate(m);
531 }
532 if((m->busy) && (m->cleaning)) {
533
534 /* the request_page_list case, (COPY_OUT_FROM FALSE) */
535 m->busy = FALSE;
536
537 /* We do not re-set m->dirty ! */
538 /* The page was busy so no extraneous activity */
539 /* could have occurred. COPY_INTO is a read into the */
540 /* new pages. CLEAN_IN_PLACE does actually write */
541 /* out the pages but handling outside of this code */
542 /* will take care of resetting dirty. We clear the */
543 /* modify however for the Programmed I/O case. */
544 pmap_clear_modify(m->phys_page);
545
546 m->absent = FALSE;
547 m->overwriting = FALSE;
548 } else if (m->overwriting) {
549 /* alternate request page list, write to page_list */
550 /* case. Occurs when the original page was wired */
551 /* at the time of the list request */
552 assert(m->wire_count != 0);
553 vm_page_unwire(m);/* reactivates */
554 m->overwriting = FALSE;
555 } else {
556 /*
557 * Set the dirty state according to whether or not the page was
558 * modified during the pageout. Note that we purposefully do
559 * NOT call pmap_clear_modify since the page is still mapped.
560 * If the page were to be dirtied between the 2 calls, this
561 * this fact would be lost. This code is only necessary to
562 * maintain statistics, since the pmap module is always
563 * consulted if m->dirty is false.
564 */
565 #if MACH_CLUSTER_STATS
566 m->dirty = pmap_is_modified(m->phys_page);
567
568 if (m->dirty) vm_pageout_cluster_dirtied++;
569 else vm_pageout_cluster_cleaned++;
570 if (m->wanted) vm_pageout_cluster_collisions++;
571 #else
572 m->dirty = 0;
573 #endif
574 }
575 m->cleaning = FALSE;
576 m->encrypted_cleaning = FALSE;
577
578 /*
579 * Wakeup any thread waiting for the page to be un-cleaning.
580 */
581 PAGE_WAKEUP(m);
582 vm_page_unlock_queues();
583 }
584 /*
585 * Account for the paging reference taken in vm_paging_object_allocate.
586 */
587 vm_object_paging_end(shadow_object);
588 vm_object_unlock(shadow_object);
589
590 assert(object->ref_count == 0);
591 assert(object->paging_in_progress == 0);
592 assert(object->resident_page_count == 0);
593 return;
594 }
595
596 /*
597 * Routine: vm_pageclean_setup
598 *
599 * Purpose: setup a page to be cleaned (made non-dirty), but not
600 * necessarily flushed from the VM page cache.
601 * This is accomplished by cleaning in place.
602 *
603 * The page must not be busy, and the object and page
604 * queues must be locked.
605 *
606 */
607 void
608 vm_pageclean_setup(
609 vm_page_t m,
610 vm_page_t new_m,
611 vm_object_t new_object,
612 vm_object_offset_t new_offset)
613 {
614 assert(!m->busy);
615 #if 0
616 assert(!m->cleaning);
617 #endif
618
619 XPR(XPR_VM_PAGEOUT,
620 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
621 (integer_t)m->object, m->offset, (integer_t)m,
622 (integer_t)new_m, new_offset);
623
624 pmap_clear_modify(m->phys_page);
625
626 /*
627 * Mark original page as cleaning in place.
628 */
629 m->cleaning = TRUE;
630 m->dirty = TRUE;
631 m->precious = FALSE;
632
633 /*
634 * Convert the fictitious page to a private shadow of
635 * the real page.
636 */
637 assert(new_m->fictitious);
638 assert(new_m->phys_page == vm_page_fictitious_addr);
639 new_m->fictitious = FALSE;
640 new_m->private = TRUE;
641 new_m->pageout = TRUE;
642 new_m->phys_page = m->phys_page;
643 vm_page_wire(new_m);
644
645 vm_page_insert(new_m, new_object, new_offset);
646 assert(!new_m->wanted);
647 new_m->busy = FALSE;
648 }
649
650 /*
651 * Routine: vm_pageout_initialize_page
652 * Purpose:
653 * Causes the specified page to be initialized in
654 * the appropriate memory object. This routine is used to push
655 * pages into a copy-object when they are modified in the
656 * permanent object.
657 *
658 * The page is moved to a temporary object and paged out.
659 *
660 * In/out conditions:
661 * The page in question must not be on any pageout queues.
662 * The object to which it belongs must be locked.
663 * The page must be busy, but not hold a paging reference.
664 *
665 * Implementation:
666 * Move this page to a completely new object.
667 */
668 void
669 vm_pageout_initialize_page(
670 vm_page_t m)
671 {
672 vm_object_t object;
673 vm_object_offset_t paging_offset;
674 vm_page_t holding_page;
675 memory_object_t pager;
676
677 XPR(XPR_VM_PAGEOUT,
678 "vm_pageout_initialize_page, page 0x%X\n",
679 (integer_t)m, 0, 0, 0, 0);
680 assert(m->busy);
681
682 /*
683 * Verify that we really want to clean this page
684 */
685 assert(!m->absent);
686 assert(!m->error);
687 assert(m->dirty);
688
689 /*
690 * Create a paging reference to let us play with the object.
691 */
692 object = m->object;
693 paging_offset = m->offset + object->paging_offset;
694
695 if (m->absent || m->error || m->restart || (!m->dirty && !m->precious)) {
696 VM_PAGE_FREE(m);
697 panic("reservation without pageout?"); /* alan */
698 vm_object_unlock(object);
699
700 return;
701 }
702
703 /*
704 * If there's no pager, then we can't clean the page. This should
705 * never happen since this should be a copy object and therefore not
706 * an external object, so the pager should always be there.
707 */
708
709 pager = object->pager;
710
711 if (pager == MEMORY_OBJECT_NULL) {
712 VM_PAGE_FREE(m);
713 panic("missing pager for copy object");
714 return;
715 }
716
717 /* set the page for future call to vm_fault_list_request */
718 vm_object_paging_begin(object);
719 holding_page = NULL;
720 vm_page_lock_queues();
721 pmap_clear_modify(m->phys_page);
722 m->dirty = TRUE;
723 m->busy = TRUE;
724 m->list_req_pending = TRUE;
725 m->cleaning = TRUE;
726 m->pageout = TRUE;
727 vm_page_wire(m);
728 vm_page_unlock_queues();
729 vm_object_unlock(object);
730
731 /*
732 * Write the data to its pager.
733 * Note that the data is passed by naming the new object,
734 * not a virtual address; the pager interface has been
735 * manipulated to use the "internal memory" data type.
736 * [The object reference from its allocation is donated
737 * to the eventual recipient.]
738 */
739 memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
740
741 vm_object_lock(object);
742 vm_object_paging_end(object);
743 }
744
745 #if MACH_CLUSTER_STATS
746 #define MAXCLUSTERPAGES 16
747 struct {
748 unsigned long pages_in_cluster;
749 unsigned long pages_at_higher_offsets;
750 unsigned long pages_at_lower_offsets;
751 } cluster_stats[MAXCLUSTERPAGES];
752 #endif /* MACH_CLUSTER_STATS */
753
754
755 /*
756 * vm_pageout_cluster:
757 *
758 * Given a page, queue it to the appropriate I/O thread,
759 * which will page it out and attempt to clean adjacent pages
760 * in the same operation.
761 *
762 * The page must be busy, and the object and queues locked. We will take a
763 * paging reference to prevent deallocation or collapse when we
764 * release the object lock back at the call site. The I/O thread
765 * is responsible for consuming this reference
766 *
767 * The page must not be on any pageout queue.
768 */
769
770 void
771 vm_pageout_cluster(vm_page_t m)
772 {
773 vm_object_t object = m->object;
774 struct vm_pageout_queue *q;
775
776
777 XPR(XPR_VM_PAGEOUT,
778 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
779 (integer_t)object, m->offset, (integer_t)m, 0, 0);
780
781 /*
782 * Only a certain kind of page is appreciated here.
783 */
784 assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0));
785 assert(!m->cleaning && !m->pageout && !m->inactive && !m->active);
786 assert(!m->throttled);
787
788 /*
789 * protect the object from collapse -
790 * locking in the object's paging_offset.
791 */
792 vm_object_paging_begin(object);
793
794 /*
795 * set the page for future call to vm_fault_list_request
796 * page should already be marked busy
797 */
798 vm_page_wire(m);
799 m->list_req_pending = TRUE;
800 m->cleaning = TRUE;
801 m->pageout = TRUE;
802 m->laundry = TRUE;
803
804 if (object->internal == TRUE)
805 q = &vm_pageout_queue_internal;
806 else
807 q = &vm_pageout_queue_external;
808 q->pgo_laundry++;
809
810 m->pageout_queue = TRUE;
811 queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
812
813 if (q->pgo_idle == TRUE) {
814 q->pgo_idle = FALSE;
815 thread_wakeup((event_t) &q->pgo_pending);
816 }
817 }
818
819
820 unsigned long vm_pageout_throttle_up_count = 0;
821
822 /*
823 * A page is back from laundry. See if there are some pages waiting to
824 * go to laundry and if we can let some of them go now.
825 *
826 * Object and page queues must be locked.
827 */
828 void
829 vm_pageout_throttle_up(
830 vm_page_t m)
831 {
832 struct vm_pageout_queue *q;
833
834 vm_pageout_throttle_up_count++;
835
836 assert(m->laundry);
837 assert(m->object != VM_OBJECT_NULL);
838 assert(m->object != kernel_object);
839
840 if (m->object->internal == TRUE)
841 q = &vm_pageout_queue_internal;
842 else
843 q = &vm_pageout_queue_external;
844
845 m->laundry = FALSE;
846 q->pgo_laundry--;
847
848 if (q->pgo_throttled == TRUE) {
849 q->pgo_throttled = FALSE;
850 thread_wakeup((event_t) &q->pgo_laundry);
851 }
852 }
853
854
855 /*
856 * vm_pageout_scan does the dirty work for the pageout daemon.
857 * It returns with vm_page_queue_free_lock held and
858 * vm_page_free_wanted == 0.
859 */
860
861 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT (3 * MAX_UPL_TRANSFER)
862
863 #define FCS_IDLE 0
864 #define FCS_DELAYED 1
865 #define FCS_DEADLOCK_DETECTED 2
866
867 struct flow_control {
868 int state;
869 mach_timespec_t ts;
870 };
871
872 void
873 vm_pageout_scan(void)
874 {
875 unsigned int loop_count = 0;
876 unsigned int inactive_burst_count = 0;
877 unsigned int active_burst_count = 0;
878 unsigned int reactivated_this_call;
879 unsigned int reactivate_limit;
880 vm_page_t local_freeq = NULL;
881 int local_freed = 0;
882 int delayed_unlock;
883 int need_internal_inactive = 0;
884 int refmod_state = 0;
885 int vm_pageout_deadlock_target = 0;
886 struct vm_pageout_queue *iq;
887 struct vm_pageout_queue *eq;
888 struct vm_speculative_age_q *sq;
889 struct flow_control flow_control;
890 boolean_t inactive_throttled = FALSE;
891 boolean_t try_failed;
892 mach_timespec_t ts;
893 unsigned int msecs = 0;
894 vm_object_t object;
895 vm_object_t last_object_tried;
896 int zf_ratio;
897 int zf_run_count;
898 uint32_t catch_up_count = 0;
899 uint32_t inactive_reclaim_run;
900 boolean_t forced_reclaim;
901
902 flow_control.state = FCS_IDLE;
903 iq = &vm_pageout_queue_internal;
904 eq = &vm_pageout_queue_external;
905 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
906
907
908 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
909
910
911 vm_page_lock_queues();
912 delayed_unlock = 1; /* must be nonzero if Qs are locked, 0 if unlocked */
913
914 /*
915 * Calculate the max number of referenced pages on the inactive
916 * queue that we will reactivate.
917 */
918 reactivated_this_call = 0;
919 reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
920 vm_page_inactive_count);
921 inactive_reclaim_run = 0;
922
923
924 /*???*/ /*
925 * We want to gradually dribble pages from the active queue
926 * to the inactive queue. If we let the inactive queue get
927 * very small, and then suddenly dump many pages into it,
928 * those pages won't get a sufficient chance to be referenced
929 * before we start taking them from the inactive queue.
930 *
931 * We must limit the rate at which we send pages to the pagers.
932 * data_write messages consume memory, for message buffers and
933 * for map-copy objects. If we get too far ahead of the pagers,
934 * we can potentially run out of memory.
935 *
936 * We can use the laundry count to limit directly the number
937 * of pages outstanding to the default pager. A similar
938 * strategy for external pagers doesn't work, because
939 * external pagers don't have to deallocate the pages sent them,
940 * and because we might have to send pages to external pagers
941 * even if they aren't processing writes. So we also
942 * use a burst count to limit writes to external pagers.
943 *
944 * When memory is very tight, we can't rely on external pagers to
945 * clean pages. They probably aren't running, because they
946 * aren't vm-privileged. If we kept sending dirty pages to them,
947 * we could exhaust the free list.
948 */
949
950
951 Restart:
952 assert(delayed_unlock!=0);
953
954 /*
955 * A page is "zero-filled" if it was not paged in from somewhere,
956 * and it belongs to an object at least VM_ZF_OBJECT_SIZE_THRESHOLD big.
957 * Recalculate the zero-filled page ratio. We use this to apportion
958 * victimized pages between the normal and zero-filled inactive
959 * queues according to their relative abundance in memory. Thus if a task
960 * is flooding memory with zf pages, we begin to hunt them down.
961 * It would be better to throttle greedy tasks at a higher level,
962 * but at the moment mach vm cannot do this.
963 */
964 {
965 uint32_t total = vm_page_active_count + vm_page_inactive_count;
966 uint32_t normal = total - vm_zf_count;
967
968 /* zf_ratio is the number of zf pages we victimize per normal page */
969
970 if (vm_zf_count < vm_accellerate_zf_pageout_trigger)
971 zf_ratio = 0;
972 else if ((vm_zf_count <= normal) || (normal == 0))
973 zf_ratio = 1;
974 else
975 zf_ratio = vm_zf_count / normal;
976
977 zf_run_count = 0;
978 }
979
980 /*
981 * Recalculate vm_page_inactivate_target.
982 */
983 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
984 vm_page_inactive_count +
985 vm_page_speculative_count);
986 /*
987 * don't want to wake the pageout_scan thread up everytime we fall below
988 * the targets... set a low water mark at 0.25% below the target
989 */
990 vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400);
991
992 vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
993 vm_page_inactive_count);
994 object = NULL;
995 last_object_tried = NULL;
996 try_failed = FALSE;
997
998 if ((vm_page_inactive_count + vm_page_speculative_count) < VM_PAGE_INACTIVE_HEALTHY_LIMIT(vm_page_active_count))
999 catch_up_count = vm_page_inactive_count + vm_page_speculative_count;
1000 else
1001 catch_up_count = 0;
1002
1003 for (;;) {
1004 vm_page_t m;
1005
1006 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
1007
1008 if (delayed_unlock == 0) {
1009 vm_page_lock_queues();
1010 delayed_unlock = 1;
1011 }
1012
1013 /*
1014 * Don't sweep through active queue more than the throttle
1015 * which should be kept relatively low
1016 */
1017 active_burst_count = vm_pageout_burst_active_throttle;
1018
1019 /*
1020 * Move pages from active to inactive.
1021 */
1022 if (need_internal_inactive == 0 && (vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target)
1023 goto done_moving_active_pages;
1024
1025 while (!queue_empty(&vm_page_queue_active) &&
1026 (need_internal_inactive || active_burst_count)) {
1027
1028 if (active_burst_count)
1029 active_burst_count--;
1030
1031 vm_pageout_active++;
1032
1033 m = (vm_page_t) queue_first(&vm_page_queue_active);
1034
1035 assert(m->active && !m->inactive);
1036 assert(!m->laundry);
1037 assert(m->object != kernel_object);
1038 assert(m->phys_page != vm_page_guard_addr);
1039
1040 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1041
1042 /*
1043 * Try to lock object; since we've already got the
1044 * page queues lock, we can only 'try' for this one.
1045 * if the 'try' fails, we need to do a mutex_pause
1046 * to allow the owner of the object lock a chance to
1047 * run... otherwise, we're likely to trip over this
1048 * object in the same state as we work our way through
1049 * the queue... clumps of pages associated with the same
1050 * object are fairly typical on the inactive and active queues
1051 */
1052 if (m->object != object) {
1053 if (object != NULL) {
1054 vm_object_unlock(object);
1055 object = NULL;
1056 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1057 }
1058 if (!vm_object_lock_try_scan(m->object)) {
1059 /*
1060 * move page to end of active queue and continue
1061 */
1062 queue_remove(&vm_page_queue_active, m,
1063 vm_page_t, pageq);
1064 queue_enter(&vm_page_queue_active, m,
1065 vm_page_t, pageq);
1066
1067 try_failed = TRUE;
1068
1069 m = (vm_page_t) queue_first(&vm_page_queue_active);
1070 /*
1071 * this is the next object we're going to be interested in
1072 * try to make sure its available after the mutex_yield
1073 * returns control
1074 */
1075 vm_pageout_scan_wants_object = m->object;
1076
1077 goto done_with_activepage;
1078 }
1079 object = m->object;
1080
1081 try_failed = FALSE;
1082 }
1083
1084 /*
1085 * if the page is BUSY, then we pull it
1086 * off the active queue and leave it alone.
1087 * when BUSY is cleared, it will get stuck
1088 * back on the appropriate queue
1089 */
1090 if (m->busy) {
1091 queue_remove(&vm_page_queue_active, m,
1092 vm_page_t, pageq);
1093 m->pageq.next = NULL;
1094 m->pageq.prev = NULL;
1095
1096 if (!m->fictitious)
1097 vm_page_active_count--;
1098 m->active = FALSE;
1099
1100 goto done_with_activepage;
1101 }
1102
1103 /*
1104 * Deactivate the page while holding the object
1105 * locked, so we know the page is still not busy.
1106 * This should prevent races between pmap_enter
1107 * and pmap_clear_reference. The page might be
1108 * absent or fictitious, but vm_page_deactivate
1109 * can handle that.
1110 */
1111 vm_page_deactivate(m);
1112
1113 if (need_internal_inactive) {
1114 vm_pageout_scan_active_throttle_success++;
1115 need_internal_inactive--;
1116 }
1117 done_with_activepage:
1118 if (delayed_unlock++ > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT || try_failed == TRUE) {
1119
1120 if (object != NULL) {
1121 vm_object_unlock(object);
1122 object = NULL;
1123 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1124 }
1125 if (local_freeq) {
1126 vm_page_free_list(local_freeq);
1127
1128 local_freeq = NULL;
1129 local_freed = 0;
1130 }
1131 mutex_yield(&vm_page_queue_lock);
1132
1133 delayed_unlock = 1;
1134
1135 /*
1136 * continue the while loop processing
1137 * the active queue... need to hold
1138 * the page queues lock
1139 */
1140 }
1141 }
1142
1143
1144
1145 /**********************************************************************
1146 * above this point we're playing with the active queue
1147 * below this point we're playing with the throttling mechanisms
1148 * and the inactive queue
1149 **********************************************************************/
1150
1151 done_moving_active_pages:
1152
1153 /*
1154 * We are done if we have met our target *and*
1155 * nobody is still waiting for a page.
1156 */
1157 if (vm_page_free_count + local_freed >= vm_page_free_target) {
1158 if (object != NULL) {
1159 vm_object_unlock(object);
1160 object = NULL;
1161 }
1162 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1163
1164 if (local_freeq) {
1165 vm_page_free_list(local_freeq);
1166
1167 local_freeq = NULL;
1168 local_freed = 0;
1169 }
1170 /*
1171 * inactive target still not met... keep going
1172 * until we get the queues balanced
1173 */
1174 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) &&
1175 !queue_empty(&vm_page_queue_active))
1176 continue;
1177
1178 mutex_lock(&vm_page_queue_free_lock);
1179
1180 if ((vm_page_free_count >= vm_page_free_target) &&
1181 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
1182
1183 vm_page_unlock_queues();
1184
1185 thread_wakeup((event_t) &vm_pageout_garbage_collect);
1186
1187 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
1188
1189 return;
1190 }
1191 mutex_unlock(&vm_page_queue_free_lock);
1192 }
1193 /*
1194 * Before anything, we check if we have any ripe volatile objects around.
1195 * If so, purge the first and see what it gives us.
1196 */
1197 assert (available_for_purge>=0);
1198 if (available_for_purge)
1199 {
1200 if (object != NULL) {
1201 vm_object_unlock(object);
1202 object = NULL;
1203 }
1204 vm_purgeable_object_purge_one();
1205 continue;
1206 }
1207
1208 if (queue_empty(&sq->age_q) && vm_page_speculative_count) {
1209 /*
1210 * try to pull pages from the aging bins
1211 * see vm_page.h for an explanation of how
1212 * this mechanism works
1213 */
1214 struct vm_speculative_age_q *aq;
1215 mach_timespec_t ts_fully_aged;
1216 boolean_t can_steal = FALSE;
1217
1218 aq = &vm_page_queue_speculative[speculative_steal_index];
1219
1220 while (queue_empty(&aq->age_q)) {
1221
1222 speculative_steal_index++;
1223
1224 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
1225 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
1226
1227 aq = &vm_page_queue_speculative[speculative_steal_index];
1228 }
1229 if (vm_page_speculative_count > vm_page_speculative_target)
1230 can_steal = TRUE;
1231 else {
1232 ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * VM_PAGE_SPECULATIVE_Q_AGE_MS) / 1000;
1233 ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * VM_PAGE_SPECULATIVE_Q_AGE_MS) % 1000)
1234 * 1000 * NSEC_PER_USEC;
1235
1236 ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
1237
1238 clock_get_system_nanotime(&ts.tv_sec, (unsigned *)&ts.tv_nsec);
1239
1240 if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
1241 can_steal = TRUE;
1242 }
1243 if (can_steal == TRUE)
1244 vm_page_speculate_ageit(aq);
1245 }
1246
1247 /*
1248 * Sometimes we have to pause:
1249 * 1) No inactive pages - nothing to do.
1250 * 2) Flow control - default pageout queue is full
1251 * 3) Loop control - no acceptable pages found on the inactive queue
1252 * within the last vm_pageout_burst_inactive_throttle iterations
1253 */
1254 if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_zf) && queue_empty(&sq->age_q) &&
1255 (VM_PAGE_Q_THROTTLED(iq) || queue_empty(&vm_page_queue_throttled))) {
1256 vm_pageout_scan_empty_throttle++;
1257 msecs = vm_pageout_empty_wait;
1258 goto vm_pageout_scan_delay;
1259
1260 } else if (inactive_burst_count >= vm_pageout_burst_inactive_throttle) {
1261 vm_pageout_scan_burst_throttle++;
1262 msecs = vm_pageout_burst_wait;
1263 goto vm_pageout_scan_delay;
1264
1265 } else if (VM_PAGE_Q_THROTTLED(iq) && IP_VALID(memory_manager_default)) {
1266
1267 switch (flow_control.state) {
1268
1269 case FCS_IDLE:
1270 reset_deadlock_timer:
1271 ts.tv_sec = vm_pageout_deadlock_wait / 1000;
1272 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
1273 clock_get_system_nanotime(&flow_control.ts.tv_sec,
1274 (unsigned *)&flow_control.ts.tv_nsec);
1275 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
1276
1277 flow_control.state = FCS_DELAYED;
1278 msecs = vm_pageout_deadlock_wait;
1279
1280 break;
1281
1282 case FCS_DELAYED:
1283 clock_get_system_nanotime(&ts.tv_sec,
1284 (unsigned *)&ts.tv_nsec);
1285
1286 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
1287 /*
1288 * the pageout thread for the default pager is potentially
1289 * deadlocked since the
1290 * default pager queue has been throttled for more than the
1291 * allowable time... we need to move some clean pages or dirty
1292 * pages belonging to the external pagers if they aren't throttled
1293 * vm_page_free_wanted represents the number of threads currently
1294 * blocked waiting for pages... we'll move one page for each of
1295 * these plus a fixed amount to break the logjam... once we're done
1296 * moving this number of pages, we'll re-enter the FSC_DELAYED state
1297 * with a new timeout target since we have no way of knowing
1298 * whether we've broken the deadlock except through observation
1299 * of the queue associated with the default pager... we need to
1300 * stop moving pages and allow the system to run to see what
1301 * state it settles into.
1302 */
1303 vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged;
1304 vm_pageout_scan_deadlock_detected++;
1305 flow_control.state = FCS_DEADLOCK_DETECTED;
1306
1307 thread_wakeup((event_t) &vm_pageout_garbage_collect);
1308 goto consider_inactive;
1309 }
1310 /*
1311 * just resniff instead of trying
1312 * to compute a new delay time... we're going to be
1313 * awakened immediately upon a laundry completion,
1314 * so we won't wait any longer than necessary
1315 */
1316 msecs = vm_pageout_idle_wait;
1317 break;
1318
1319 case FCS_DEADLOCK_DETECTED:
1320 if (vm_pageout_deadlock_target)
1321 goto consider_inactive;
1322 goto reset_deadlock_timer;
1323
1324 }
1325 vm_pageout_scan_throttle++;
1326 iq->pgo_throttled = TRUE;
1327 vm_pageout_scan_delay:
1328 if (object != NULL) {
1329 vm_object_unlock(object);
1330 object = NULL;
1331 }
1332 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1333
1334 if (local_freeq) {
1335 vm_page_free_list(local_freeq);
1336
1337 local_freeq = NULL;
1338 local_freed = 0;
1339 }
1340 #if CONFIG_EMBEDDED
1341 {
1342 int percent_avail;
1343
1344 /*
1345 * Decide if we need to send a memory status notification.
1346 */
1347 percent_avail =
1348 (vm_page_active_count + vm_page_inactive_count +
1349 vm_page_speculative_count + vm_page_free_count +
1350 vm_page_purgeable_count ) * 100 /
1351 atop_64(max_mem);
1352 if (percent_avail >= (kern_memorystatus_level + 5) ||
1353 percent_avail <= (kern_memorystatus_level - 5)) {
1354 kern_memorystatus_level = percent_avail;
1355 thread_wakeup((event_t)&kern_memorystatus_wakeup);
1356 }
1357 }
1358 #endif
1359 assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
1360
1361 counter(c_vm_pageout_scan_block++);
1362
1363 vm_page_unlock_queues();
1364
1365 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
1366
1367 thread_block(THREAD_CONTINUE_NULL);
1368
1369 vm_page_lock_queues();
1370 delayed_unlock = 1;
1371
1372 iq->pgo_throttled = FALSE;
1373
1374 if (loop_count >= vm_page_inactive_count)
1375 loop_count = 0;
1376 inactive_burst_count = 0;
1377
1378 goto Restart;
1379 /*NOTREACHED*/
1380 }
1381
1382
1383 flow_control.state = FCS_IDLE;
1384 consider_inactive:
1385 loop_count++;
1386 inactive_burst_count++;
1387 vm_pageout_inactive++;
1388
1389 /* Choose a victim. */
1390
1391 while (1) {
1392 m = NULL;
1393
1394 /*
1395 * the most eligible pages are ones that were throttled because the
1396 * pager wasn't ready at the time. If a pager is ready now,
1397 * see if one of these is useful.
1398 */
1399 if (!VM_PAGE_Q_THROTTLED(iq) && !queue_empty(&vm_page_queue_throttled)) {
1400 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
1401 break;
1402 }
1403
1404 /*
1405 * The second most eligible pages are ones we paged in speculatively,
1406 * but which have not yet been touched.
1407 */
1408 if ( !queue_empty(&sq->age_q) ) {
1409 m = (vm_page_t) queue_first(&sq->age_q);
1410 break;
1411 }
1412 /*
1413 * Time for a zero-filled inactive page?
1414 */
1415 if ( ((zf_run_count < zf_ratio) && vm_zf_queue_count >= zf_queue_min_count) ||
1416 queue_empty(&vm_page_queue_inactive)) {
1417 if ( !queue_empty(&vm_page_queue_zf) ) {
1418 m = (vm_page_t) queue_first(&vm_page_queue_zf);
1419 zf_run_count++;
1420 break;
1421 }
1422 }
1423 /*
1424 * It's either a normal inactive page or nothing.
1425 */
1426 if ( !queue_empty(&vm_page_queue_inactive) ) {
1427 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1428 zf_run_count = 0;
1429 break;
1430 }
1431
1432 panic("vm_pageout: no victim");
1433 }
1434
1435 assert(!m->active && (m->inactive || m->speculative || m->throttled));
1436 assert(!m->laundry);
1437 assert(m->object != kernel_object);
1438 assert(m->phys_page != vm_page_guard_addr);
1439
1440 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1441
1442 /*
1443 * check to see if we currently are working
1444 * with the same object... if so, we've
1445 * already got the lock
1446 */
1447 if (m->object != object) {
1448 /*
1449 * the object associated with candidate page is
1450 * different from the one we were just working
1451 * with... dump the lock if we still own it
1452 */
1453 if (object != NULL) {
1454 vm_object_unlock(object);
1455 object = NULL;
1456 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1457 }
1458 /*
1459 * Try to lock object; since we've alread got the
1460 * page queues lock, we can only 'try' for this one.
1461 * if the 'try' fails, we need to do a mutex_pause
1462 * to allow the owner of the object lock a chance to
1463 * run... otherwise, we're likely to trip over this
1464 * object in the same state as we work our way through
1465 * the queue... clumps of pages associated with the same
1466 * object are fairly typical on the inactive and active queues
1467 */
1468 if (!vm_object_lock_try_scan(m->object)) {
1469 /*
1470 * Move page to end and continue.
1471 * Don't re-issue ticket
1472 */
1473 if (m->zero_fill) {
1474 queue_remove(&vm_page_queue_zf, m,
1475 vm_page_t, pageq);
1476 queue_enter(&vm_page_queue_zf, m,
1477 vm_page_t, pageq);
1478 } else if (m->speculative) {
1479 remque(&m->pageq);
1480 m->speculative = FALSE;
1481 vm_page_speculative_count--;
1482
1483 /*
1484 * move to the tail of the inactive queue
1485 * to get it out of the way... the speculative
1486 * queue is generally too small to depend
1487 * on there being enough pages from other
1488 * objects to make cycling it back on the
1489 * same queue a winning proposition
1490 */
1491 queue_enter(&vm_page_queue_inactive, m,
1492 vm_page_t, pageq);
1493 m->inactive = TRUE;
1494 vm_page_inactive_count++;
1495 token_new_pagecount++;
1496 } else if (m->throttled) {
1497 queue_remove(&vm_page_queue_throttled, m,
1498 vm_page_t, pageq);
1499 m->throttled = FALSE;
1500 vm_page_throttled_count--;
1501
1502 /*
1503 * not throttled any more, so can stick
1504 * it on the inactive queue.
1505 */
1506 queue_enter(&vm_page_queue_inactive, m,
1507 vm_page_t, pageq);
1508 m->inactive = TRUE;
1509 vm_page_inactive_count++;
1510 token_new_pagecount++;
1511 } else {
1512 queue_remove(&vm_page_queue_inactive, m,
1513 vm_page_t, pageq);
1514 #if MACH_ASSERT
1515 vm_page_inactive_count--; /* balance for purgeable queue asserts */
1516 #endif
1517 vm_purgeable_q_advance_all(1);
1518
1519 queue_enter(&vm_page_queue_inactive, m,
1520 vm_page_t, pageq);
1521 #if MACH_ASSERT
1522 vm_page_inactive_count++; /* balance for purgeable queue asserts */
1523 #endif
1524 token_new_pagecount++;
1525 }
1526 pmap_clear_reference(m->phys_page);
1527 m->reference = FALSE;
1528
1529 vm_pageout_inactive_nolock++;
1530
1531 if ( !queue_empty(&sq->age_q) )
1532 m = (vm_page_t) queue_first(&sq->age_q);
1533 else if ( ((zf_run_count < zf_ratio) && vm_zf_queue_count >= zf_queue_min_count) ||
1534 queue_empty(&vm_page_queue_inactive)) {
1535 if ( !queue_empty(&vm_page_queue_zf) )
1536 m = (vm_page_t) queue_first(&vm_page_queue_zf);
1537 } else if ( !queue_empty(&vm_page_queue_inactive) ) {
1538 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1539 }
1540 /*
1541 * this is the next object we're going to be interested in
1542 * try to make sure its available after the mutex_yield
1543 * returns control
1544 */
1545 vm_pageout_scan_wants_object = m->object;
1546
1547 /*
1548 * force us to dump any collected free pages
1549 * and to pause before moving on
1550 */
1551 try_failed = TRUE;
1552
1553 goto done_with_inactivepage;
1554 }
1555 object = m->object;
1556 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1557
1558 try_failed = FALSE;
1559 }
1560
1561 /*
1562 * Paging out pages of external objects which
1563 * are currently being created must be avoided.
1564 * The pager may claim for memory, thus leading to a
1565 * possible dead lock between it and the pageout thread,
1566 * if such pages are finally chosen. The remaining assumption
1567 * is that there will finally be enough available pages in the
1568 * inactive pool to page out in order to satisfy all memory
1569 * claimed by the thread which concurrently creates the pager.
1570 */
1571 if (!object->pager_initialized && object->pager_created) {
1572 /*
1573 * Move page to end and continue, hoping that
1574 * there will be enough other inactive pages to
1575 * page out so that the thread which currently
1576 * initializes the pager will succeed.
1577 * Don't re-grant the ticket, the page should
1578 * pulled from the queue and paged out whenever
1579 * one of its logically adjacent fellows is
1580 * targeted.
1581 *
1582 * Pages found on the speculative list can never be
1583 * in this state... they always have a pager associated
1584 * with them.
1585 */
1586 assert(!m->speculative);
1587
1588 if (m->zero_fill) {
1589 queue_remove(&vm_page_queue_zf, m,
1590 vm_page_t, pageq);
1591 queue_enter(&vm_page_queue_zf, m,
1592 vm_page_t, pageq);
1593 } else {
1594 queue_remove(&vm_page_queue_inactive, m,
1595 vm_page_t, pageq);
1596 #if MACH_ASSERT
1597 vm_page_inactive_count--; /* balance for purgeable queue asserts */
1598 #endif
1599 vm_purgeable_q_advance_all(1);
1600
1601 queue_enter(&vm_page_queue_inactive, m,
1602 vm_page_t, pageq);
1603 #if MACH_ASSERT
1604 vm_page_inactive_count++; /* balance for purgeable queue asserts */
1605 #endif
1606 token_new_pagecount++;
1607 }
1608 vm_pageout_inactive_avoid++;
1609
1610 goto done_with_inactivepage;
1611 }
1612 /*
1613 * Remove the page from its list.
1614 */
1615 if (m->speculative) {
1616 remque(&m->pageq);
1617 m->speculative = FALSE;
1618 vm_page_speculative_count--;
1619 } else if (m->throttled) {
1620 queue_remove(&vm_page_queue_throttled, m, vm_page_t, pageq);
1621 m->throttled = FALSE;
1622 vm_page_throttled_count--;
1623 } else {
1624 if (m->zero_fill) {
1625 queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq);
1626 vm_zf_queue_count--;
1627 } else {
1628 queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
1629 }
1630 m->inactive = FALSE;
1631 if (!m->fictitious)
1632 vm_page_inactive_count--;
1633 vm_purgeable_q_advance_all(1);
1634 }
1635
1636 /* If the object is empty, the page must be reclaimed even if dirty or used. */
1637 /* If the page belongs to a volatile object, we stick it back on. */
1638 if (object->copy == VM_OBJECT_NULL) {
1639 if(object->purgable == VM_PURGABLE_EMPTY && !m->cleaning) {
1640 m->busy = TRUE;
1641 if (m->pmapped == TRUE) {
1642 /* unmap the page */
1643 refmod_state = pmap_disconnect(m->phys_page);
1644 if (refmod_state & VM_MEM_MODIFIED) {
1645 m->dirty = TRUE;
1646 }
1647 }
1648 if (m->dirty || m->precious) {
1649 /* we saved the cost of cleaning this page ! */
1650 vm_page_purged_count++;
1651 }
1652 goto reclaim_page;
1653 }
1654 if (object->purgable == VM_PURGABLE_VOLATILE) {
1655 /* if it's wired, we can't put it on our queue */
1656 assert(m->wire_count == 0);
1657 /* just stick it back on! */
1658 goto reactivate_page;
1659 }
1660 }
1661 m->pageq.next = NULL;
1662 m->pageq.prev = NULL;
1663
1664 if ( !m->fictitious && catch_up_count)
1665 catch_up_count--;
1666
1667 /*
1668 * ENCRYPTED SWAP:
1669 * if this page has already been picked up as part of a
1670 * page-out cluster, it will be busy because it is being
1671 * encrypted (see vm_object_upl_request()). But we still
1672 * want to demote it from "clean-in-place" (aka "adjacent")
1673 * to "clean-and-free" (aka "target"), so let's ignore its
1674 * "busy" bit here and proceed to check for "cleaning" a
1675 * little bit below...
1676 */
1677 if ( !m->encrypted_cleaning && (m->busy || !object->alive)) {
1678 /*
1679 * Somebody is already playing with this page.
1680 * Leave it off the pageout queues.
1681 *
1682 */
1683 vm_pageout_inactive_busy++;
1684
1685 goto done_with_inactivepage;
1686 }
1687
1688 /*
1689 * If it's absent or in error, we can reclaim the page.
1690 */
1691
1692 if (m->absent || m->error) {
1693 vm_pageout_inactive_absent++;
1694 reclaim_page:
1695 if (vm_pageout_deadlock_target) {
1696 vm_pageout_scan_inactive_throttle_success++;
1697 vm_pageout_deadlock_target--;
1698 }
1699
1700 DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
1701
1702 if (m->object->internal) {
1703 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
1704 } else {
1705 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
1706 }
1707
1708 vm_page_free_prepare(m);
1709
1710 assert(m->pageq.next == NULL &&
1711 m->pageq.prev == NULL);
1712 m->pageq.next = (queue_entry_t)local_freeq;
1713 local_freeq = m;
1714 local_freed++;
1715
1716 inactive_burst_count = 0;
1717
1718 goto done_with_inactivepage;
1719 }
1720
1721 assert(!m->private);
1722 assert(!m->fictitious);
1723
1724 /*
1725 * If already cleaning this page in place, convert from
1726 * "adjacent" to "target". We can leave the page mapped,
1727 * and vm_pageout_object_terminate will determine whether
1728 * to free or reactivate.
1729 */
1730
1731 if (m->cleaning) {
1732 m->busy = TRUE;
1733 m->pageout = TRUE;
1734 m->dump_cleaning = TRUE;
1735 vm_page_wire(m);
1736
1737 CLUSTER_STAT(vm_pageout_cluster_conversions++);
1738
1739 inactive_burst_count = 0;
1740
1741 goto done_with_inactivepage;
1742 }
1743
1744 /*
1745 * If it's being used, reactivate.
1746 * (Fictitious pages are either busy or absent.)
1747 * First, update the reference and dirty bits
1748 * to make sure the page is unreferenced.
1749 */
1750 refmod_state = -1;
1751
1752 if (m->reference == FALSE && m->pmapped == TRUE) {
1753 refmod_state = pmap_get_refmod(m->phys_page);
1754
1755 if (refmod_state & VM_MEM_REFERENCED)
1756 m->reference = TRUE;
1757 if (refmod_state & VM_MEM_MODIFIED)
1758 m->dirty = TRUE;
1759 }
1760 if (m->reference && !m->no_cache) {
1761 /*
1762 * The page we pulled off the inactive list has
1763 * been referenced. It is possible for other
1764 * processors to be touching pages faster than we
1765 * can clear the referenced bit and traverse the
1766 * inactive queue, so we limit the number of
1767 * reactivations.
1768 */
1769 if (++reactivated_this_call >= reactivate_limit) {
1770 vm_pageout_reactivation_limit_exceeded++;
1771 } else if (catch_up_count) {
1772 vm_pageout_catch_ups++;
1773 } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
1774 vm_pageout_inactive_force_reclaim++;
1775 } else {
1776 /*
1777 * The page was being used, so put back on active list.
1778 */
1779 reactivate_page:
1780 vm_page_activate(m);
1781 VM_STAT_INCR(reactivations);
1782
1783 vm_pageout_inactive_used++;
1784 inactive_burst_count = 0;
1785
1786 goto done_with_inactivepage;
1787 }
1788 /*
1789 * Make sure we call pmap_get_refmod() if it
1790 * wasn't already called just above, to update
1791 * the dirty bit.
1792 */
1793 if ((refmod_state == -1) && !m->dirty && m->pmapped) {
1794 refmod_state = pmap_get_refmod(m->phys_page);
1795 if (refmod_state & VM_MEM_MODIFIED)
1796 m->dirty = TRUE;
1797 }
1798 forced_reclaim = TRUE;
1799 } else {
1800 forced_reclaim = FALSE;
1801 }
1802
1803 XPR(XPR_VM_PAGEOUT,
1804 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
1805 (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0);
1806
1807 /*
1808 * we've got a candidate page to steal...
1809 *
1810 * m->dirty is up to date courtesy of the
1811 * preceding check for m->reference... if
1812 * we get here, then m->reference had to be
1813 * FALSE (or possibly "reactivate_limit" was
1814 * exceeded), but in either case we called
1815 * pmap_get_refmod() and updated both
1816 * m->reference and m->dirty
1817 *
1818 * if it's dirty or precious we need to
1819 * see if the target queue is throtttled
1820 * it if is, we need to skip over it by moving it back
1821 * to the end of the inactive queue
1822 */
1823 inactive_throttled = FALSE;
1824
1825 if (m->dirty || m->precious) {
1826 if (object->internal) {
1827 if (VM_PAGE_Q_THROTTLED(iq))
1828 inactive_throttled = TRUE;
1829 } else if (VM_PAGE_Q_THROTTLED(eq)) {
1830 inactive_throttled = TRUE;
1831 }
1832 }
1833 if (inactive_throttled == TRUE) {
1834 throttle_inactive:
1835 if (!IP_VALID(memory_manager_default) &&
1836 object->internal &&
1837 (object->purgable == VM_PURGABLE_DENY ||
1838 object->purgable == VM_PURGABLE_NONVOLATILE)) {
1839 queue_enter(&vm_page_queue_throttled, m,
1840 vm_page_t, pageq);
1841 m->throttled = TRUE;
1842 vm_page_throttled_count++;
1843 } else {
1844 if (m->zero_fill) {
1845 queue_enter(&vm_page_queue_zf, m,
1846 vm_page_t, pageq);
1847 vm_zf_queue_count++;
1848 } else
1849 queue_enter(&vm_page_queue_inactive, m,
1850 vm_page_t, pageq);
1851 m->inactive = TRUE;
1852 if (!m->fictitious) {
1853 vm_page_inactive_count++;
1854 token_new_pagecount++;
1855 }
1856 }
1857 vm_pageout_scan_inactive_throttled++;
1858 goto done_with_inactivepage;
1859 }
1860
1861 /*
1862 * we've got a page that we can steal...
1863 * eliminate all mappings and make sure
1864 * we have the up-to-date modified state
1865 * first take the page BUSY, so that no new
1866 * mappings can be made
1867 */
1868 m->busy = TRUE;
1869
1870 /*
1871 * if we need to do a pmap_disconnect then we
1872 * need to re-evaluate m->dirty since the pmap_disconnect
1873 * provides the true state atomically... the
1874 * page was still mapped up to the pmap_disconnect
1875 * and may have been dirtied at the last microsecond
1876 *
1877 * we also check for the page being referenced 'late'
1878 * if it was, we first need to do a WAKEUP_DONE on it
1879 * since we already set m->busy = TRUE, before
1880 * going off to reactivate it
1881 *
1882 * Note that if 'pmapped' is FALSE then the page is not
1883 * and has not been in any map, so there is no point calling
1884 * pmap_disconnect(). m->dirty and/or m->reference could
1885 * have been set in anticipation of likely usage of the page.
1886 */
1887 if (m->pmapped == TRUE) {
1888 refmod_state = pmap_disconnect(m->phys_page);
1889
1890 if (refmod_state & VM_MEM_MODIFIED)
1891 m->dirty = TRUE;
1892 if (refmod_state & VM_MEM_REFERENCED) {
1893
1894 /* If m->reference is already set, this page must have
1895 * already failed the reactivate_limit test, so don't
1896 * bump the counts twice.
1897 */
1898 if ( ! m->reference ) {
1899 m->reference = TRUE;
1900 if (forced_reclaim ||
1901 ++reactivated_this_call >= reactivate_limit)
1902 vm_pageout_reactivation_limit_exceeded++;
1903 else {
1904 PAGE_WAKEUP_DONE(m);
1905 goto reactivate_page;
1906 }
1907 }
1908 }
1909 }
1910 /*
1911 * reset our count of pages that have been reclaimed
1912 * since the last page was 'stolen'
1913 */
1914 inactive_reclaim_run = 0;
1915
1916 /*
1917 * If it's clean and not precious, we can free the page.
1918 */
1919 if (!m->dirty && !m->precious) {
1920 vm_pageout_inactive_clean++;
1921 goto reclaim_page;
1922 }
1923
1924 /*
1925 * The page may have been dirtied since the last check
1926 * for a throttled target queue (which may have been skipped
1927 * if the page was clean then). With the dirty page
1928 * disconnected here, we can make one final check.
1929 */
1930 {
1931 boolean_t disconnect_throttled = FALSE;
1932 if (object->internal) {
1933 if (VM_PAGE_Q_THROTTLED(iq))
1934 disconnect_throttled = TRUE;
1935 } else if (VM_PAGE_Q_THROTTLED(eq)) {
1936 disconnect_throttled = TRUE;
1937 }
1938
1939 if (disconnect_throttled == TRUE) {
1940 PAGE_WAKEUP_DONE(m);
1941 goto throttle_inactive;
1942 }
1943 }
1944
1945 vm_pageout_cluster(m);
1946
1947 vm_pageout_inactive_dirty++;
1948
1949 inactive_burst_count = 0;
1950
1951 done_with_inactivepage:
1952 if (delayed_unlock++ > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT || try_failed == TRUE) {
1953
1954 if (object != NULL) {
1955 vm_object_unlock(object);
1956 object = NULL;
1957 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1958 }
1959 if (local_freeq) {
1960 vm_page_free_list(local_freeq);
1961
1962 local_freeq = NULL;
1963 local_freed = 0;
1964 }
1965 mutex_yield(&vm_page_queue_lock);
1966
1967 delayed_unlock = 1;
1968 }
1969 /*
1970 * back to top of pageout scan loop
1971 */
1972 }
1973 }
1974
1975
1976 int vm_page_free_count_init;
1977
1978 void
1979 vm_page_free_reserve(
1980 int pages)
1981 {
1982 int free_after_reserve;
1983
1984 vm_page_free_reserved += pages;
1985
1986 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
1987
1988 vm_page_free_min = vm_page_free_reserved +
1989 VM_PAGE_FREE_MIN(free_after_reserve);
1990
1991 if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
1992 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
1993
1994 vm_page_free_target = vm_page_free_reserved +
1995 VM_PAGE_FREE_TARGET(free_after_reserve);
1996
1997 if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
1998 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
1999
2000 if (vm_page_free_target < vm_page_free_min + 5)
2001 vm_page_free_target = vm_page_free_min + 5;
2002
2003 }
2004
2005 /*
2006 * vm_pageout is the high level pageout daemon.
2007 */
2008
2009 void
2010 vm_pageout_continue(void)
2011 {
2012 DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
2013 vm_pageout_scan_event_counter++;
2014 vm_pageout_scan();
2015 /* we hold vm_page_queue_free_lock now */
2016 assert(vm_page_free_wanted == 0);
2017 assert(vm_page_free_wanted_privileged == 0);
2018 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
2019 mutex_unlock(&vm_page_queue_free_lock);
2020
2021 counter(c_vm_pageout_block++);
2022 thread_block((thread_continue_t)vm_pageout_continue);
2023 /*NOTREACHED*/
2024 }
2025
2026
2027 /*
2028 * must be called with the
2029 * queues and object locks held
2030 */
2031 static void
2032 vm_pageout_queue_steal(vm_page_t m)
2033 {
2034 struct vm_pageout_queue *q;
2035
2036 if (m->object->internal == TRUE)
2037 q = &vm_pageout_queue_internal;
2038 else
2039 q = &vm_pageout_queue_external;
2040
2041 m->laundry = FALSE;
2042 m->pageout_queue = FALSE;
2043 queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
2044
2045 m->pageq.next = NULL;
2046 m->pageq.prev = NULL;
2047
2048 vm_object_paging_end(m->object);
2049
2050 q->pgo_laundry--;
2051 }
2052
2053
2054 #ifdef FAKE_DEADLOCK
2055
2056 #define FAKE_COUNT 5000
2057
2058 int internal_count = 0;
2059 int fake_deadlock = 0;
2060
2061 #endif
2062
2063 static void
2064 vm_pageout_iothread_continue(struct vm_pageout_queue *q)
2065 {
2066 vm_page_t m = NULL;
2067 vm_object_t object;
2068 boolean_t need_wakeup;
2069 memory_object_t pager;
2070 thread_t self = current_thread();
2071
2072 if ((vm_pageout_internal_iothread != THREAD_NULL)
2073 && (self == vm_pageout_external_iothread )
2074 && (self->options & TH_OPT_VMPRIV))
2075 self->options &= ~TH_OPT_VMPRIV;
2076
2077 vm_page_lockspin_queues();
2078
2079 while ( !queue_empty(&q->pgo_pending) ) {
2080
2081 q->pgo_busy = TRUE;
2082 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
2083 m->pageout_queue = FALSE;
2084 vm_page_unlock_queues();
2085
2086 m->pageq.next = NULL;
2087 m->pageq.prev = NULL;
2088 #ifdef FAKE_DEADLOCK
2089 if (q == &vm_pageout_queue_internal) {
2090 vm_offset_t addr;
2091 int pg_count;
2092
2093 internal_count++;
2094
2095 if ((internal_count == FAKE_COUNT)) {
2096
2097 pg_count = vm_page_free_count + vm_page_free_reserved;
2098
2099 if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) {
2100 kmem_free(kernel_map, addr, PAGE_SIZE * pg_count);
2101 }
2102 internal_count = 0;
2103 fake_deadlock++;
2104 }
2105 }
2106 #endif
2107 object = m->object;
2108
2109 vm_object_lock(object);
2110
2111 if (!object->pager_initialized) {
2112
2113 /*
2114 * If there is no memory object for the page, create
2115 * one and hand it to the default pager.
2116 */
2117
2118 if (!object->pager_initialized)
2119 vm_object_collapse(object,
2120 (vm_object_offset_t) 0,
2121 TRUE);
2122 if (!object->pager_initialized)
2123 vm_object_pager_create(object);
2124 if (!object->pager_initialized) {
2125 /*
2126 * Still no pager for the object.
2127 * Reactivate the page.
2128 *
2129 * Should only happen if there is no
2130 * default pager.
2131 */
2132 m->list_req_pending = FALSE;
2133 m->cleaning = FALSE;
2134 m->pageout = FALSE;
2135
2136 vm_page_lockspin_queues();
2137 vm_page_unwire(m);
2138 vm_pageout_throttle_up(m);
2139 vm_pageout_dirty_no_pager++;
2140 vm_page_activate(m);
2141 vm_page_unlock_queues();
2142
2143 /*
2144 * And we are done with it.
2145 */
2146 PAGE_WAKEUP_DONE(m);
2147
2148 vm_object_paging_end(object);
2149 vm_object_unlock(object);
2150
2151 vm_page_lockspin_queues();
2152 continue;
2153 }
2154 }
2155 pager = object->pager;
2156 if (pager == MEMORY_OBJECT_NULL) {
2157 /*
2158 * This pager has been destroyed by either
2159 * memory_object_destroy or vm_object_destroy, and
2160 * so there is nowhere for the page to go.
2161 * Just free the page... VM_PAGE_FREE takes
2162 * care of cleaning up all the state...
2163 * including doing the vm_pageout_throttle_up
2164 */
2165
2166 VM_PAGE_FREE(m);
2167
2168 vm_object_paging_end(object);
2169 vm_object_unlock(object);
2170
2171 vm_page_lockspin_queues();
2172 continue;
2173 }
2174 vm_object_unlock(object);
2175 /*
2176 * we expect the paging_in_progress reference to have
2177 * already been taken on the object before it was added
2178 * to the appropriate pageout I/O queue... this will
2179 * keep the object from being terminated and/or the
2180 * paging_offset from changing until the I/O has
2181 * completed... therefore no need to lock the object to
2182 * pull the paging_offset from it.
2183 *
2184 * Send the data to the pager.
2185 * any pageout clustering happens there
2186 */
2187 memory_object_data_return(pager,
2188 m->offset + object->paging_offset,
2189 PAGE_SIZE,
2190 NULL,
2191 NULL,
2192 FALSE,
2193 FALSE,
2194 0);
2195
2196 vm_object_lock(object);
2197 vm_object_paging_end(object);
2198 vm_object_unlock(object);
2199
2200 vm_page_lockspin_queues();
2201 }
2202 assert_wait((event_t) q, THREAD_UNINT);
2203
2204
2205 if (q->pgo_throttled == TRUE && !VM_PAGE_Q_THROTTLED(q)) {
2206 q->pgo_throttled = FALSE;
2207 need_wakeup = TRUE;
2208 } else
2209 need_wakeup = FALSE;
2210
2211 q->pgo_busy = FALSE;
2212 q->pgo_idle = TRUE;
2213 vm_page_unlock_queues();
2214
2215 if (need_wakeup == TRUE)
2216 thread_wakeup((event_t) &q->pgo_laundry);
2217
2218 thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) &q->pgo_pending);
2219 /*NOTREACHED*/
2220 }
2221
2222
2223 static void
2224 vm_pageout_iothread_external(void)
2225 {
2226 thread_t self = current_thread();
2227
2228 self->options |= TH_OPT_VMPRIV;
2229
2230 vm_pageout_iothread_continue(&vm_pageout_queue_external);
2231 /*NOTREACHED*/
2232 }
2233
2234
2235 static void
2236 vm_pageout_iothread_internal(void)
2237 {
2238 thread_t self = current_thread();
2239
2240 self->options |= TH_OPT_VMPRIV;
2241
2242 vm_pageout_iothread_continue(&vm_pageout_queue_internal);
2243 /*NOTREACHED*/
2244 }
2245
2246 static void
2247 vm_pageout_garbage_collect(int collect)
2248 {
2249 if (collect) {
2250 stack_collect();
2251
2252 /*
2253 * consider_zone_gc should be last, because the other operations
2254 * might return memory to zones.
2255 */
2256 consider_machine_collect();
2257 consider_zone_gc();
2258
2259 consider_machine_adjust();
2260 }
2261
2262 assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
2263
2264 thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
2265 /*NOTREACHED*/
2266 }
2267
2268
2269
2270 void
2271 vm_pageout(void)
2272 {
2273 thread_t self = current_thread();
2274 thread_t thread;
2275 kern_return_t result;
2276 spl_t s;
2277
2278 /*
2279 * Set thread privileges.
2280 */
2281 s = splsched();
2282 thread_lock(self);
2283 self->priority = BASEPRI_PREEMPT - 1;
2284 set_sched_pri(self, self->priority);
2285 thread_unlock(self);
2286
2287 if (!self->reserved_stack)
2288 self->reserved_stack = self->kernel_stack;
2289
2290 splx(s);
2291
2292 /*
2293 * Initialize some paging parameters.
2294 */
2295
2296 if (vm_pageout_idle_wait == 0)
2297 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
2298
2299 if (vm_pageout_burst_wait == 0)
2300 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
2301
2302 if (vm_pageout_empty_wait == 0)
2303 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
2304
2305 if (vm_pageout_deadlock_wait == 0)
2306 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
2307
2308 if (vm_pageout_deadlock_relief == 0)
2309 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
2310
2311 if (vm_pageout_inactive_relief == 0)
2312 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
2313
2314 if (vm_pageout_burst_active_throttle == 0)
2315 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
2316
2317 if (vm_pageout_burst_inactive_throttle == 0)
2318 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
2319
2320 /*
2321 * Set kernel task to low backing store privileged
2322 * status
2323 */
2324 task_lock(kernel_task);
2325 kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
2326 task_unlock(kernel_task);
2327
2328 vm_page_free_count_init = vm_page_free_count;
2329
2330 /*
2331 * even if we've already called vm_page_free_reserve
2332 * call it again here to insure that the targets are
2333 * accurately calculated (it uses vm_page_free_count_init)
2334 * calling it with an arg of 0 will not change the reserve
2335 * but will re-calculate free_min and free_target
2336 */
2337 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
2338 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
2339 } else
2340 vm_page_free_reserve(0);
2341
2342
2343 queue_init(&vm_pageout_queue_external.pgo_pending);
2344 vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2345 vm_pageout_queue_external.pgo_laundry = 0;
2346 vm_pageout_queue_external.pgo_idle = FALSE;
2347 vm_pageout_queue_external.pgo_busy = FALSE;
2348 vm_pageout_queue_external.pgo_throttled = FALSE;
2349
2350 queue_init(&vm_pageout_queue_internal.pgo_pending);
2351 vm_pageout_queue_internal.pgo_maxlaundry = 0;
2352 vm_pageout_queue_internal.pgo_laundry = 0;
2353 vm_pageout_queue_internal.pgo_idle = FALSE;
2354 vm_pageout_queue_internal.pgo_busy = FALSE;
2355 vm_pageout_queue_internal.pgo_throttled = FALSE;
2356
2357
2358 /* internal pageout thread started when default pager registered first time */
2359 /* external pageout and garbage collection threads started here */
2360
2361 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
2362 BASEPRI_PREEMPT - 1,
2363 &vm_pageout_external_iothread);
2364 if (result != KERN_SUCCESS)
2365 panic("vm_pageout_iothread_external: create failed");
2366
2367 thread_deallocate(vm_pageout_external_iothread);
2368
2369 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
2370 MINPRI_KERNEL,
2371 &thread);
2372 if (result != KERN_SUCCESS)
2373 panic("vm_pageout_garbage_collect: create failed");
2374
2375 thread_deallocate(thread);
2376
2377 vm_object_reaper_init();
2378
2379
2380 vm_pageout_continue();
2381
2382 /*
2383 * Unreached code!
2384 *
2385 * The vm_pageout_continue() call above never returns, so the code below is never
2386 * executed. We take advantage of this to declare several DTrace VM related probe
2387 * points that our kernel doesn't have an analog for. These are probe points that
2388 * exist in Solaris and are in the DTrace documentation, so people may have written
2389 * scripts that use them. Declaring the probe points here means their scripts will
2390 * compile and execute which we want for portability of the scripts, but since this
2391 * section of code is never reached, the probe points will simply never fire. Yes,
2392 * this is basically a hack. The problem is the DTrace probe points were chosen with
2393 * Solaris specific VM events in mind, not portability to different VM implementations.
2394 */
2395
2396 DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
2397 DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
2398 DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
2399 DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
2400 DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
2401 DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
2402 DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
2403 /*NOTREACHED*/
2404 }
2405
2406 kern_return_t
2407 vm_pageout_internal_start(void)
2408 {
2409 kern_return_t result;
2410
2411 vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2412 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, NULL, BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread);
2413 if (result == KERN_SUCCESS)
2414 thread_deallocate(vm_pageout_internal_iothread);
2415 return result;
2416 }
2417
2418 #define UPL_DELAYED_UNLOCK_LIMIT (MAX_UPL_TRANSFER / 2)
2419
2420 static upl_t
2421 upl_create(int type, int flags, upl_size_t size)
2422 {
2423 upl_t upl;
2424 int page_field_size = 0;
2425 int upl_flags = 0;
2426 int upl_size = sizeof(struct upl);
2427
2428 if (type & UPL_CREATE_LITE) {
2429 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2430 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2431
2432 upl_flags |= UPL_LITE;
2433 }
2434 if (type & UPL_CREATE_INTERNAL) {
2435 upl_size += sizeof(struct upl_page_info) * (size/PAGE_SIZE);
2436
2437 upl_flags |= UPL_INTERNAL;
2438 }
2439 upl = (upl_t)kalloc(upl_size + page_field_size);
2440
2441 if (page_field_size)
2442 bzero((char *)upl + upl_size, page_field_size);
2443
2444 upl->flags = upl_flags | flags;
2445 upl->src_object = NULL;
2446 upl->kaddr = (vm_offset_t)0;
2447 upl->size = 0;
2448 upl->map_object = NULL;
2449 upl->ref_count = 1;
2450 upl->highest_page = 0;
2451 upl_lock_init(upl);
2452 #ifdef UPL_DEBUG
2453 upl->ubc_alias1 = 0;
2454 upl->ubc_alias2 = 0;
2455 #endif /* UPL_DEBUG */
2456 return(upl);
2457 }
2458
2459 static void
2460 upl_destroy(upl_t upl)
2461 {
2462 int page_field_size; /* bit field in word size buf */
2463 int size;
2464
2465 #ifdef UPL_DEBUG
2466 {
2467 vm_object_t object;
2468
2469 if (upl->flags & UPL_SHADOWED) {
2470 object = upl->map_object->shadow;
2471 } else {
2472 object = upl->map_object;
2473 }
2474 vm_object_lock(object);
2475 queue_remove(&object->uplq, upl, upl_t, uplq);
2476 vm_object_unlock(object);
2477 }
2478 #endif /* UPL_DEBUG */
2479 /*
2480 * drop a reference on the map_object whether or
2481 * not a pageout object is inserted
2482 */
2483 if (upl->flags & UPL_SHADOWED)
2484 vm_object_deallocate(upl->map_object);
2485
2486 if (upl->flags & UPL_DEVICE_MEMORY)
2487 size = PAGE_SIZE;
2488 else
2489 size = upl->size;
2490 page_field_size = 0;
2491
2492 if (upl->flags & UPL_LITE) {
2493 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2494 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2495 }
2496 if (upl->flags & UPL_INTERNAL) {
2497 kfree(upl,
2498 sizeof(struct upl) +
2499 (sizeof(struct upl_page_info) * (size/PAGE_SIZE))
2500 + page_field_size);
2501 } else {
2502 kfree(upl, sizeof(struct upl) + page_field_size);
2503 }
2504 }
2505
2506 void uc_upl_dealloc(upl_t upl);
2507 __private_extern__ void
2508 uc_upl_dealloc(upl_t upl)
2509 {
2510 if (--upl->ref_count == 0)
2511 upl_destroy(upl);
2512 }
2513
2514 void
2515 upl_deallocate(upl_t upl)
2516 {
2517 if (--upl->ref_count == 0)
2518 upl_destroy(upl);
2519 }
2520
2521 /*
2522 * Statistics about UPL enforcement of copy-on-write obligations.
2523 */
2524 unsigned long upl_cow = 0;
2525 unsigned long upl_cow_again = 0;
2526 unsigned long upl_cow_contiguous = 0;
2527 unsigned long upl_cow_pages = 0;
2528 unsigned long upl_cow_again_pages = 0;
2529 unsigned long upl_cow_contiguous_pages = 0;
2530
2531 /*
2532 * Routine: vm_object_upl_request
2533 * Purpose:
2534 * Cause the population of a portion of a vm_object.
2535 * Depending on the nature of the request, the pages
2536 * returned may be contain valid data or be uninitialized.
2537 * A page list structure, listing the physical pages
2538 * will be returned upon request.
2539 * This function is called by the file system or any other
2540 * supplier of backing store to a pager.
2541 * IMPORTANT NOTE: The caller must still respect the relationship
2542 * between the vm_object and its backing memory object. The
2543 * caller MUST NOT substitute changes in the backing file
2544 * without first doing a memory_object_lock_request on the
2545 * target range unless it is know that the pages are not
2546 * shared with another entity at the pager level.
2547 * Copy_in_to:
2548 * if a page list structure is present
2549 * return the mapped physical pages, where a
2550 * page is not present, return a non-initialized
2551 * one. If the no_sync bit is turned on, don't
2552 * call the pager unlock to synchronize with other
2553 * possible copies of the page. Leave pages busy
2554 * in the original object, if a page list structure
2555 * was specified. When a commit of the page list
2556 * pages is done, the dirty bit will be set for each one.
2557 * Copy_out_from:
2558 * If a page list structure is present, return
2559 * all mapped pages. Where a page does not exist
2560 * map a zero filled one. Leave pages busy in
2561 * the original object. If a page list structure
2562 * is not specified, this call is a no-op.
2563 *
2564 * Note: access of default pager objects has a rather interesting
2565 * twist. The caller of this routine, presumably the file system
2566 * page cache handling code, will never actually make a request
2567 * against a default pager backed object. Only the default
2568 * pager will make requests on backing store related vm_objects
2569 * In this way the default pager can maintain the relationship
2570 * between backing store files (abstract memory objects) and
2571 * the vm_objects (cache objects), they support.
2572 *
2573 */
2574
2575 __private_extern__ kern_return_t
2576 vm_object_upl_request(
2577 vm_object_t object,
2578 vm_object_offset_t offset,
2579 upl_size_t size,
2580 upl_t *upl_ptr,
2581 upl_page_info_array_t user_page_list,
2582 unsigned int *page_list_count,
2583 int cntrl_flags)
2584 {
2585 vm_page_t dst_page = VM_PAGE_NULL;
2586 vm_object_offset_t dst_offset;
2587 upl_size_t xfer_size;
2588 boolean_t dirty;
2589 boolean_t hw_dirty;
2590 upl_t upl = NULL;
2591 unsigned int entry;
2592 #if MACH_CLUSTER_STATS
2593 boolean_t encountered_lrp = FALSE;
2594 #endif
2595 vm_page_t alias_page = NULL;
2596 int refmod_state = 0;
2597 wpl_array_t lite_list = NULL;
2598 vm_object_t last_copy_object;
2599 int delayed_unlock = 0;
2600 int j;
2601
2602 if (cntrl_flags & ~UPL_VALID_FLAGS) {
2603 /*
2604 * For forward compatibility's sake,
2605 * reject any unknown flag.
2606 */
2607 return KERN_INVALID_VALUE;
2608 }
2609 if ( (!object->internal) && (object->paging_offset != 0) )
2610 panic("vm_object_upl_request: external object with non-zero paging offset\n");
2611 if (object->phys_contiguous)
2612 panic("vm_object_upl_request: contiguous object specified\n");
2613
2614
2615 if ((size / PAGE_SIZE) > MAX_UPL_TRANSFER)
2616 size = MAX_UPL_TRANSFER * PAGE_SIZE;
2617
2618 if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
2619 *page_list_count = MAX_UPL_TRANSFER;
2620
2621 if (cntrl_flags & UPL_SET_INTERNAL) {
2622 if (cntrl_flags & UPL_SET_LITE) {
2623
2624 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, 0, size);
2625
2626 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
2627 lite_list = (wpl_array_t)
2628 (((uintptr_t)user_page_list) +
2629 ((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
2630 } else {
2631 upl = upl_create(UPL_CREATE_INTERNAL, 0, size);
2632
2633 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
2634 }
2635 } else {
2636 if (cntrl_flags & UPL_SET_LITE) {
2637
2638 upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE, 0, size);
2639
2640 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
2641 } else {
2642 upl = upl_create(UPL_CREATE_EXTERNAL, 0, size);
2643 }
2644 }
2645 *upl_ptr = upl;
2646
2647 if (user_page_list)
2648 user_page_list[0].device = FALSE;
2649
2650 if (cntrl_flags & UPL_SET_LITE) {
2651 upl->map_object = object;
2652 } else {
2653 upl->map_object = vm_object_allocate(size);
2654 /*
2655 * No neeed to lock the new object: nobody else knows
2656 * about it yet, so it's all ours so far.
2657 */
2658 upl->map_object->shadow = object;
2659 upl->map_object->pageout = TRUE;
2660 upl->map_object->can_persist = FALSE;
2661 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2662 upl->map_object->shadow_offset = offset;
2663 upl->map_object->wimg_bits = object->wimg_bits;
2664
2665 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2666
2667 upl->flags |= UPL_SHADOWED;
2668 }
2669 /*
2670 * ENCRYPTED SWAP:
2671 * Just mark the UPL as "encrypted" here.
2672 * We'll actually encrypt the pages later,
2673 * in upl_encrypt(), when the caller has
2674 * selected which pages need to go to swap.
2675 */
2676 if (cntrl_flags & UPL_ENCRYPT)
2677 upl->flags |= UPL_ENCRYPTED;
2678
2679 if (cntrl_flags & UPL_FOR_PAGEOUT)
2680 upl->flags |= UPL_PAGEOUT;
2681
2682 vm_object_lock(object);
2683 vm_object_paging_begin(object);
2684
2685 /*
2686 * we can lock in the paging_offset once paging_in_progress is set
2687 */
2688 upl->size = size;
2689 upl->offset = offset + object->paging_offset;
2690
2691 #ifdef UPL_DEBUG
2692 queue_enter(&object->uplq, upl, upl_t, uplq);
2693 #endif /* UPL_DEBUG */
2694
2695 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
2696 /*
2697 * Honor copy-on-write obligations
2698 *
2699 * The caller is gathering these pages and
2700 * might modify their contents. We need to
2701 * make sure that the copy object has its own
2702 * private copies of these pages before we let
2703 * the caller modify them.
2704 */
2705 vm_object_update(object,
2706 offset,
2707 size,
2708 NULL,
2709 NULL,
2710 FALSE, /* should_return */
2711 MEMORY_OBJECT_COPY_SYNC,
2712 VM_PROT_NO_CHANGE);
2713 upl_cow++;
2714 upl_cow_pages += size >> PAGE_SHIFT;
2715 }
2716 /*
2717 * remember which copy object we synchronized with
2718 */
2719 last_copy_object = object->copy;
2720 entry = 0;
2721
2722 xfer_size = size;
2723 dst_offset = offset;
2724
2725 while (xfer_size) {
2726
2727 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
2728 if (delayed_unlock) {
2729 delayed_unlock = 0;
2730 vm_page_unlock_queues();
2731 }
2732 vm_object_unlock(object);
2733 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2734 goto relock;
2735 }
2736 if (delayed_unlock == 0) {
2737 /*
2738 * pageout_scan takes the vm_page_lock_queues first
2739 * then tries for the object lock... to avoid what
2740 * is effectively a lock inversion, we'll go to the
2741 * trouble of taking them in that same order... otherwise
2742 * if this object contains the majority of the pages resident
2743 * in the UBC (or a small set of large objects actively being
2744 * worked on contain the majority of the pages), we could
2745 * cause the pageout_scan thread to 'starve' in its attempt
2746 * to find pages to move to the free queue, since it has to
2747 * successfully acquire the object lock of any candidate page
2748 * before it can steal/clean it.
2749 */
2750 vm_object_unlock(object);
2751 relock:
2752 for (j = 0; ; j++) {
2753 vm_page_lock_queues();
2754
2755 if (vm_object_lock_try(object))
2756 break;
2757 vm_page_unlock_queues();
2758 mutex_pause(j);
2759 }
2760 delayed_unlock = 1;
2761 }
2762 if (cntrl_flags & UPL_COPYOUT_FROM) {
2763 upl->flags |= UPL_PAGE_SYNC_DONE;
2764
2765 if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
2766 dst_page->fictitious ||
2767 dst_page->absent ||
2768 dst_page->error ||
2769 (dst_page->wire_count && !dst_page->pageout && !dst_page->list_req_pending)) {
2770
2771 if (user_page_list)
2772 user_page_list[entry].phys_addr = 0;
2773
2774 goto delay_unlock_queues;
2775 }
2776 /*
2777 * grab this up front...
2778 * a high percentange of the time we're going to
2779 * need the hardware modification state a bit later
2780 * anyway... so we can eliminate an extra call into
2781 * the pmap layer by grabbing it here and recording it
2782 */
2783 if (dst_page->pmapped)
2784 refmod_state = pmap_get_refmod(dst_page->phys_page);
2785 else
2786 refmod_state = 0;
2787
2788 if ( (refmod_state & VM_MEM_REFERENCED) && dst_page->inactive ) {
2789 /*
2790 * page is on inactive list and referenced...
2791 * reactivate it now... this gets it out of the
2792 * way of vm_pageout_scan which would have to
2793 * reactivate it upon tripping over it
2794 */
2795 vm_page_activate(dst_page);
2796 VM_STAT_INCR(reactivations);
2797 }
2798 if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
2799 /*
2800 * we're only asking for DIRTY pages to be returned
2801 */
2802 if (dst_page->list_req_pending || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
2803 /*
2804 * if we were the page stolen by vm_pageout_scan to be
2805 * cleaned (as opposed to a buddy being clustered in
2806 * or this request is not being driven by a PAGEOUT cluster
2807 * then we only need to check for the page being dirty or
2808 * precious to decide whether to return it
2809 */
2810 if (dst_page->dirty || dst_page->precious || (refmod_state & VM_MEM_MODIFIED))
2811 goto check_busy;
2812 goto dont_return;
2813 }
2814 /*
2815 * this is a request for a PAGEOUT cluster and this page
2816 * is merely along for the ride as a 'buddy'... not only
2817 * does it have to be dirty to be returned, but it also
2818 * can't have been referenced recently... note that we've
2819 * already filtered above based on whether this page is
2820 * currently on the inactive queue or it meets the page
2821 * ticket (generation count) check
2822 */
2823 if ( !(refmod_state & VM_MEM_REFERENCED) &&
2824 ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) {
2825 goto check_busy;
2826 }
2827 dont_return:
2828 /*
2829 * if we reach here, we're not to return
2830 * the page... go on to the next one
2831 */
2832 if (user_page_list)
2833 user_page_list[entry].phys_addr = 0;
2834
2835 goto delay_unlock_queues;
2836 }
2837 check_busy:
2838 if (dst_page->busy && (!(dst_page->list_req_pending && dst_page->pageout))) {
2839 if (cntrl_flags & UPL_NOBLOCK) {
2840 if (user_page_list)
2841 user_page_list[entry].phys_addr = 0;
2842
2843 goto delay_unlock_queues;
2844 }
2845 /*
2846 * someone else is playing with the
2847 * page. We will have to wait.
2848 */
2849 delayed_unlock = 0;
2850 vm_page_unlock_queues();
2851
2852 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
2853
2854 continue;
2855 }
2856 /*
2857 * Someone else already cleaning the page?
2858 */
2859 if ((dst_page->cleaning || dst_page->absent || dst_page->wire_count != 0) && !dst_page->list_req_pending) {
2860 if (user_page_list)
2861 user_page_list[entry].phys_addr = 0;
2862
2863 goto delay_unlock_queues;
2864 }
2865 /*
2866 * ENCRYPTED SWAP:
2867 * The caller is gathering this page and might
2868 * access its contents later on. Decrypt the
2869 * page before adding it to the UPL, so that
2870 * the caller never sees encrypted data.
2871 */
2872 if (! (cntrl_flags & UPL_ENCRYPT) && dst_page->encrypted) {
2873 int was_busy;
2874
2875 delayed_unlock = 0;
2876 vm_page_unlock_queues();
2877 /*
2878 * save the current state of busy
2879 * mark page as busy while decrypt
2880 * is in progress since it will drop
2881 * the object lock...
2882 */
2883 was_busy = dst_page->busy;
2884 dst_page->busy = TRUE;
2885
2886 vm_page_decrypt(dst_page, 0);
2887 vm_page_decrypt_for_upl_counter++;
2888 /*
2889 * restore to original busy state
2890 */
2891 dst_page->busy = was_busy;
2892
2893 vm_page_lock_queues();
2894 delayed_unlock = 1;
2895 }
2896 if (dst_page->pageout_queue == TRUE)
2897 /*
2898 * we've buddied up a page for a clustered pageout
2899 * that has already been moved to the pageout
2900 * queue by pageout_scan... we need to remove
2901 * it from the queue and drop the laundry count
2902 * on that queue
2903 */
2904 vm_pageout_queue_steal(dst_page);
2905 #if MACH_CLUSTER_STATS
2906 /*
2907 * pageout statistics gathering. count
2908 * all the pages we will page out that
2909 * were not counted in the initial
2910 * vm_pageout_scan work
2911 */
2912 if (dst_page->list_req_pending)
2913 encountered_lrp = TRUE;
2914 if ((dst_page->dirty || (dst_page->object->internal && dst_page->precious)) && !dst_page->list_req_pending) {
2915 if (encountered_lrp)
2916 CLUSTER_STAT(pages_at_higher_offsets++;)
2917 else
2918 CLUSTER_STAT(pages_at_lower_offsets++;)
2919 }
2920 #endif
2921 /*
2922 * Turn off busy indication on pending
2923 * pageout. Note: we can only get here
2924 * in the request pending case.
2925 */
2926 dst_page->list_req_pending = FALSE;
2927 dst_page->busy = FALSE;
2928
2929 hw_dirty = refmod_state & VM_MEM_MODIFIED;
2930 dirty = hw_dirty ? TRUE : dst_page->dirty;
2931
2932 if (dst_page->phys_page > upl->highest_page)
2933 upl->highest_page = dst_page->phys_page;
2934
2935 if (cntrl_flags & UPL_SET_LITE) {
2936 int pg_num;
2937
2938 pg_num = (dst_offset-offset)/PAGE_SIZE;
2939 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
2940
2941 if (hw_dirty)
2942 pmap_clear_modify(dst_page->phys_page);
2943
2944 /*
2945 * Mark original page as cleaning
2946 * in place.
2947 */
2948 dst_page->cleaning = TRUE;
2949 dst_page->precious = FALSE;
2950 } else {
2951 /*
2952 * use pageclean setup, it is more
2953 * convenient even for the pageout
2954 * cases here
2955 */
2956 vm_object_lock(upl->map_object);
2957 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
2958 vm_object_unlock(upl->map_object);
2959
2960 alias_page->absent = FALSE;
2961 alias_page = NULL;
2962 }
2963 #if MACH_PAGEMAP
2964 /*
2965 * Record that this page has been
2966 * written out
2967 */
2968 vm_external_state_set(object->existence_map, dst_page->offset);
2969 #endif /*MACH_PAGEMAP*/
2970 dst_page->dirty = dirty;
2971
2972 if (!dirty)
2973 dst_page->precious = TRUE;
2974
2975 if (dst_page->pageout)
2976 dst_page->busy = TRUE;
2977
2978 if ( (cntrl_flags & UPL_ENCRYPT) ) {
2979 /*
2980 * ENCRYPTED SWAP:
2981 * We want to deny access to the target page
2982 * because its contents are about to be
2983 * encrypted and the user would be very
2984 * confused to see encrypted data instead
2985 * of their data.
2986 * We also set "encrypted_cleaning" to allow
2987 * vm_pageout_scan() to demote that page
2988 * from "adjacent/clean-in-place" to
2989 * "target/clean-and-free" if it bumps into
2990 * this page during its scanning while we're
2991 * still processing this cluster.
2992 */
2993 dst_page->busy = TRUE;
2994 dst_page->encrypted_cleaning = TRUE;
2995 }
2996 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
2997 /*
2998 * deny access to the target page
2999 * while it is being worked on
3000 */
3001 if ((!dst_page->pageout) && (dst_page->wire_count == 0)) {
3002 dst_page->busy = TRUE;
3003 dst_page->pageout = TRUE;
3004 vm_page_wire(dst_page);
3005 }
3006 }
3007 } else {
3008 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
3009 /*
3010 * Honor copy-on-write obligations
3011 *
3012 * The copy object has changed since we
3013 * last synchronized for copy-on-write.
3014 * Another copy object might have been
3015 * inserted while we released the object's
3016 * lock. Since someone could have seen the
3017 * original contents of the remaining pages
3018 * through that new object, we have to
3019 * synchronize with it again for the remaining
3020 * pages only. The previous pages are "busy"
3021 * so they can not be seen through the new
3022 * mapping. The new mapping will see our
3023 * upcoming changes for those previous pages,
3024 * but that's OK since they couldn't see what
3025 * was there before. It's just a race anyway
3026 * and there's no guarantee of consistency or
3027 * atomicity. We just don't want new mappings
3028 * to see both the *before* and *after* pages.
3029 */
3030 if (object->copy != VM_OBJECT_NULL) {
3031 delayed_unlock = 0;
3032 vm_page_unlock_queues();
3033
3034 vm_object_update(
3035 object,
3036 dst_offset,/* current offset */
3037 xfer_size, /* remaining size */
3038 NULL,
3039 NULL,
3040 FALSE, /* should_return */
3041 MEMORY_OBJECT_COPY_SYNC,
3042 VM_PROT_NO_CHANGE);
3043
3044 upl_cow_again++;
3045 upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
3046
3047 vm_page_lock_queues();
3048 delayed_unlock = 1;
3049 }
3050 /*
3051 * remember the copy object we synced with
3052 */
3053 last_copy_object = object->copy;
3054 }
3055 dst_page = vm_page_lookup(object, dst_offset);
3056
3057 if (dst_page != VM_PAGE_NULL) {
3058 if ( !(dst_page->list_req_pending) ) {
3059 if ((cntrl_flags & UPL_RET_ONLY_ABSENT) && !dst_page->absent) {
3060 /*
3061 * skip over pages already present in the cache
3062 */
3063 if (user_page_list)
3064 user_page_list[entry].phys_addr = 0;
3065
3066 goto delay_unlock_queues;
3067 }
3068 if (dst_page->cleaning) {
3069 /*
3070 * someone else is writing to the page... wait...
3071 */
3072 delayed_unlock = 0;
3073 vm_page_unlock_queues();
3074
3075 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
3076
3077 continue;
3078 }
3079 } else {
3080 if (dst_page->fictitious &&
3081 dst_page->phys_page == vm_page_fictitious_addr) {
3082 assert( !dst_page->speculative);
3083 /*
3084 * dump the fictitious page
3085 */
3086 dst_page->list_req_pending = FALSE;
3087
3088 vm_page_free(dst_page);
3089
3090 dst_page = NULL;
3091 } else if (dst_page->absent) {
3092 /*
3093 * the default_pager case
3094 */
3095 dst_page->list_req_pending = FALSE;
3096 dst_page->busy = FALSE;
3097 }
3098 }
3099 }
3100 if (dst_page == VM_PAGE_NULL) {
3101 if (object->private) {
3102 /*
3103 * This is a nasty wrinkle for users
3104 * of upl who encounter device or
3105 * private memory however, it is
3106 * unavoidable, only a fault can
3107 * resolve the actual backing
3108 * physical page by asking the
3109 * backing device.
3110 */
3111 if (user_page_list)
3112 user_page_list[entry].phys_addr = 0;
3113
3114 goto delay_unlock_queues;
3115 }
3116 /*
3117 * need to allocate a page
3118 */
3119 dst_page = vm_page_grab();
3120
3121 if (dst_page == VM_PAGE_NULL) {
3122 if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
3123 /*
3124 * we don't want to stall waiting for pages to come onto the free list
3125 * while we're already holding absent pages in this UPL
3126 * the caller will deal with the empty slots
3127 */
3128 if (user_page_list)
3129 user_page_list[entry].phys_addr = 0;
3130
3131 goto try_next_page;
3132 }
3133 /*
3134 * no pages available... wait
3135 * then try again for the same
3136 * offset...
3137 */
3138 delayed_unlock = 0;
3139 vm_page_unlock_queues();
3140
3141 vm_object_unlock(object);
3142 VM_PAGE_WAIT();
3143
3144 /*
3145 * pageout_scan takes the vm_page_lock_queues first
3146 * then tries for the object lock... to avoid what
3147 * is effectively a lock inversion, we'll go to the
3148 * trouble of taking them in that same order... otherwise
3149 * if this object contains the majority of the pages resident
3150 * in the UBC (or a small set of large objects actively being
3151 * worked on contain the majority of the pages), we could
3152 * cause the pageout_scan thread to 'starve' in its attempt
3153 * to find pages to move to the free queue, since it has to
3154 * successfully acquire the object lock of any candidate page
3155 * before it can steal/clean it.
3156 */
3157 for (j = 0; ; j++) {
3158 vm_page_lock_queues();
3159
3160 if (vm_object_lock_try(object))
3161 break;
3162 vm_page_unlock_queues();
3163 mutex_pause(j);
3164 }
3165 delayed_unlock = 1;
3166
3167 continue;
3168 }
3169 vm_page_insert_internal(dst_page, object, dst_offset, TRUE);
3170
3171 dst_page->absent = TRUE;
3172 dst_page->busy = FALSE;
3173
3174 if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
3175 /*
3176 * if UPL_RET_ONLY_ABSENT was specified,
3177 * than we're definitely setting up a
3178 * upl for a clustered read/pagein
3179 * operation... mark the pages as clustered
3180 * so upl_commit_range can put them on the
3181 * speculative list
3182 */
3183 dst_page->clustered = TRUE;
3184 }
3185 }
3186 /*
3187 * ENCRYPTED SWAP:
3188 */
3189 if (cntrl_flags & UPL_ENCRYPT) {
3190 /*
3191 * The page is going to be encrypted when we
3192 * get it from the pager, so mark it so.
3193 */
3194 dst_page->encrypted = TRUE;
3195 } else {
3196 /*
3197 * Otherwise, the page will not contain
3198 * encrypted data.
3199 */
3200 dst_page->encrypted = FALSE;
3201 }
3202 dst_page->overwriting = TRUE;
3203
3204 if (dst_page->fictitious) {
3205 panic("need corner case for fictitious page");
3206 }
3207 if (dst_page->busy) {
3208 /*
3209 * someone else is playing with the
3210 * page. We will have to wait.
3211 */
3212 delayed_unlock = 0;
3213 vm_page_unlock_queues();
3214
3215 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
3216
3217 continue;
3218 }
3219 if (dst_page->pmapped) {
3220 if ( !(cntrl_flags & UPL_FILE_IO))
3221 /*
3222 * eliminate all mappings from the
3223 * original object and its prodigy
3224 */
3225 refmod_state = pmap_disconnect(dst_page->phys_page);
3226 else
3227 refmod_state = pmap_get_refmod(dst_page->phys_page);
3228 } else
3229 refmod_state = 0;
3230
3231 hw_dirty = refmod_state & VM_MEM_MODIFIED;
3232 dirty = hw_dirty ? TRUE : dst_page->dirty;
3233
3234 if (cntrl_flags & UPL_SET_LITE) {
3235 int pg_num;
3236
3237 pg_num = (dst_offset-offset)/PAGE_SIZE;
3238 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
3239
3240 if (hw_dirty)
3241 pmap_clear_modify(dst_page->phys_page);
3242
3243 /*
3244 * Mark original page as cleaning
3245 * in place.
3246 */
3247 dst_page->cleaning = TRUE;
3248 dst_page->precious = FALSE;
3249 } else {
3250 /*
3251 * use pageclean setup, it is more
3252 * convenient even for the pageout
3253 * cases here
3254 */
3255 vm_object_lock(upl->map_object);
3256 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
3257 vm_object_unlock(upl->map_object);
3258
3259 alias_page->absent = FALSE;
3260 alias_page = NULL;
3261 }
3262
3263 if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
3264 /*
3265 * clean in place for read implies
3266 * that a write will be done on all
3267 * the pages that are dirty before
3268 * a upl commit is done. The caller
3269 * is obligated to preserve the
3270 * contents of all pages marked dirty
3271 */
3272 upl->flags |= UPL_CLEAR_DIRTY;
3273 }
3274 dst_page->dirty = dirty;
3275
3276 if (!dirty)
3277 dst_page->precious = TRUE;
3278
3279 if (dst_page->wire_count == 0) {
3280 /*
3281 * deny access to the target page while
3282 * it is being worked on
3283 */
3284 dst_page->busy = TRUE;
3285 } else
3286 vm_page_wire(dst_page);
3287
3288 if (dst_page->clustered) {
3289 /*
3290 * expect the page not to be used
3291 * since it's coming in as part
3292 * of a speculative cluster...
3293 * pages that are 'consumed' will
3294 * get a hardware reference
3295 */
3296 dst_page->reference = FALSE;
3297 } else {
3298 /*
3299 * expect the page to be used
3300 */
3301 dst_page->reference = TRUE;
3302 }
3303 dst_page->precious = (cntrl_flags & UPL_PRECIOUS) ? TRUE : FALSE;
3304 }
3305 if (dst_page->phys_page > upl->highest_page)
3306 upl->highest_page = dst_page->phys_page;
3307 if (user_page_list) {
3308 user_page_list[entry].phys_addr = dst_page->phys_page;
3309 user_page_list[entry].dirty = dst_page->dirty;
3310 user_page_list[entry].pageout = dst_page->pageout;
3311 user_page_list[entry].absent = dst_page->absent;
3312 user_page_list[entry].precious = dst_page->precious;
3313
3314 if (dst_page->clustered == TRUE)
3315 user_page_list[entry].speculative = dst_page->speculative;
3316 else
3317 user_page_list[entry].speculative = FALSE;
3318 }
3319 /*
3320 * if UPL_RET_ONLY_ABSENT is set, then
3321 * we are working with a fresh page and we've
3322 * just set the clustered flag on it to
3323 * indicate that it was drug in as part of a
3324 * speculative cluster... so leave it alone
3325 */
3326 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
3327 /*
3328 * someone is explicitly grabbing this page...
3329 * update clustered and speculative state
3330 *
3331 */
3332 VM_PAGE_CONSUME_CLUSTERED(dst_page);
3333 }
3334 delay_unlock_queues:
3335 if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
3336 /*
3337 * pageout_scan takes the vm_page_lock_queues first
3338 * then tries for the object lock... to avoid what
3339 * is effectively a lock inversion, we'll go to the
3340 * trouble of taking them in that same order... otherwise
3341 * if this object contains the majority of the pages resident
3342 * in the UBC (or a small set of large objects actively being
3343 * worked on contain the majority of the pages), we could
3344 * cause the pageout_scan thread to 'starve' in its attempt
3345 * to find pages to move to the free queue, since it has to
3346 * successfully acquire the object lock of any candidate page
3347 * before it can steal/clean it.
3348 */
3349 vm_object_unlock(object);
3350 mutex_yield(&vm_page_queue_lock);
3351
3352 for (j = 0; ; j++) {
3353 if (vm_object_lock_try(object))
3354 break;
3355 vm_page_unlock_queues();
3356 mutex_pause(j);
3357 vm_page_lock_queues();
3358 }
3359 delayed_unlock = 1;
3360 }
3361 try_next_page:
3362 entry++;
3363 dst_offset += PAGE_SIZE_64;
3364 xfer_size -= PAGE_SIZE;
3365 }
3366 if (alias_page != NULL) {
3367 if (delayed_unlock == 0) {
3368 vm_page_lock_queues();
3369 delayed_unlock = 1;
3370 }
3371 vm_page_free(alias_page);
3372 }
3373 if (delayed_unlock)
3374 vm_page_unlock_queues();
3375
3376 if (page_list_count != NULL) {
3377 if (upl->flags & UPL_INTERNAL)
3378 *page_list_count = 0;
3379 else if (*page_list_count > entry)
3380 *page_list_count = entry;
3381 }
3382 vm_object_unlock(object);
3383
3384 return KERN_SUCCESS;
3385 }
3386
3387 /* JMM - Backward compatability for now */
3388 kern_return_t
3389 vm_fault_list_request( /* forward */
3390 memory_object_control_t control,
3391 vm_object_offset_t offset,
3392 upl_size_t size,
3393 upl_t *upl_ptr,
3394 upl_page_info_t **user_page_list_ptr,
3395 unsigned int page_list_count,
3396 int cntrl_flags);
3397 kern_return_t
3398 vm_fault_list_request(
3399 memory_object_control_t control,
3400 vm_object_offset_t offset,
3401 upl_size_t size,
3402 upl_t *upl_ptr,
3403 upl_page_info_t **user_page_list_ptr,
3404 unsigned int page_list_count,
3405 int cntrl_flags)
3406 {
3407 unsigned int local_list_count;
3408 upl_page_info_t *user_page_list;
3409 kern_return_t kr;
3410
3411 if (user_page_list_ptr != NULL) {
3412 local_list_count = page_list_count;
3413 user_page_list = *user_page_list_ptr;
3414 } else {
3415 local_list_count = 0;
3416 user_page_list = NULL;
3417 }
3418 kr = memory_object_upl_request(control,
3419 offset,
3420 size,
3421 upl_ptr,
3422 user_page_list,
3423 &local_list_count,
3424 cntrl_flags);
3425
3426 if(kr != KERN_SUCCESS)
3427 return kr;
3428
3429 if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) {
3430 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr);
3431 }
3432
3433 return KERN_SUCCESS;
3434 }
3435
3436
3437
3438 /*
3439 * Routine: vm_object_super_upl_request
3440 * Purpose:
3441 * Cause the population of a portion of a vm_object
3442 * in much the same way as memory_object_upl_request.
3443 * Depending on the nature of the request, the pages
3444 * returned may be contain valid data or be uninitialized.
3445 * However, the region may be expanded up to the super
3446 * cluster size provided.
3447 */
3448
3449 __private_extern__ kern_return_t
3450 vm_object_super_upl_request(
3451 vm_object_t object,
3452 vm_object_offset_t offset,
3453 upl_size_t size,
3454 upl_size_t super_cluster,
3455 upl_t *upl,
3456 upl_page_info_t *user_page_list,
3457 unsigned int *page_list_count,
3458 int cntrl_flags)
3459 {
3460 if (object->paging_offset > offset)
3461 return KERN_FAILURE;
3462
3463 assert(object->paging_in_progress);
3464 offset = offset - object->paging_offset;
3465
3466 if (super_cluster > size) {
3467
3468 vm_object_offset_t base_offset;
3469 upl_size_t super_size;
3470
3471 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
3472 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
3473 super_size = ((base_offset + super_size) > object->size) ? (object->size - base_offset) : super_size;
3474
3475 if (offset > (base_offset + super_size)) {
3476 panic("vm_object_super_upl_request: Missed target pageout"
3477 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
3478 offset, base_offset, super_size, super_cluster,
3479 size, object->paging_offset);
3480 }
3481 /*
3482 * apparently there is a case where the vm requests a
3483 * page to be written out who's offset is beyond the
3484 * object size
3485 */
3486 if ((offset + size) > (base_offset + super_size))
3487 super_size = (offset + size) - base_offset;
3488
3489 offset = base_offset;
3490 size = super_size;
3491 }
3492 return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags);
3493 }
3494
3495
3496 kern_return_t
3497 vm_map_create_upl(
3498 vm_map_t map,
3499 vm_map_address_t offset,
3500 upl_size_t *upl_size,
3501 upl_t *upl,
3502 upl_page_info_array_t page_list,
3503 unsigned int *count,
3504 int *flags)
3505 {
3506 vm_map_entry_t entry;
3507 int caller_flags;
3508 int force_data_sync;
3509 int sync_cow_data;
3510 vm_object_t local_object;
3511 vm_map_offset_t local_offset;
3512 vm_map_offset_t local_start;
3513 kern_return_t ret;
3514
3515 caller_flags = *flags;
3516
3517 if (caller_flags & ~UPL_VALID_FLAGS) {
3518 /*
3519 * For forward compatibility's sake,
3520 * reject any unknown flag.
3521 */
3522 return KERN_INVALID_VALUE;
3523 }
3524 force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
3525 sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
3526
3527 if (upl == NULL)
3528 return KERN_INVALID_ARGUMENT;
3529
3530 REDISCOVER_ENTRY:
3531 vm_map_lock(map);
3532
3533 if (vm_map_lookup_entry(map, offset, &entry)) {
3534
3535 if ((entry->vme_end - offset) < *upl_size)
3536 *upl_size = entry->vme_end - offset;
3537
3538 if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
3539 *flags = 0;
3540
3541 if (entry->object.vm_object != VM_OBJECT_NULL) {
3542 if (entry->object.vm_object->private)
3543 *flags = UPL_DEV_MEMORY;
3544
3545 if (entry->object.vm_object->phys_contiguous)
3546 *flags |= UPL_PHYS_CONTIG;
3547 }
3548 vm_map_unlock(map);
3549
3550 return KERN_SUCCESS;
3551 }
3552 if (entry->object.vm_object == VM_OBJECT_NULL || !entry->object.vm_object->phys_contiguous) {
3553 if ((*upl_size/page_size) > MAX_UPL_TRANSFER)
3554 *upl_size = MAX_UPL_TRANSFER * page_size;
3555 }
3556 /*
3557 * Create an object if necessary.
3558 */
3559 if (entry->object.vm_object == VM_OBJECT_NULL) {
3560 entry->object.vm_object = vm_object_allocate((vm_size_t)(entry->vme_end - entry->vme_start));
3561 entry->offset = 0;
3562 }
3563 if (!(caller_flags & UPL_COPYOUT_FROM)) {
3564 if (!(entry->protection & VM_PROT_WRITE)) {
3565 vm_map_unlock(map);
3566 return KERN_PROTECTION_FAILURE;
3567 }
3568 if (entry->needs_copy) {
3569 vm_map_t local_map;
3570 vm_object_t object;
3571 vm_object_offset_t new_offset;
3572 vm_prot_t prot;
3573 boolean_t wired;
3574 vm_map_version_t version;
3575 vm_map_t real_map;
3576
3577 local_map = map;
3578 vm_map_lock_write_to_read(map);
3579
3580 if (vm_map_lookup_locked(&local_map,
3581 offset, VM_PROT_WRITE,
3582 OBJECT_LOCK_EXCLUSIVE,
3583 &version, &object,
3584 &new_offset, &prot, &wired,
3585 NULL,
3586 &real_map)) {
3587 vm_map_unlock(local_map);
3588 return KERN_FAILURE;
3589 }
3590 if (real_map != map)
3591 vm_map_unlock(real_map);
3592 vm_object_unlock(object);
3593 vm_map_unlock(local_map);
3594
3595 goto REDISCOVER_ENTRY;
3596 }
3597 }
3598 if (entry->is_sub_map) {
3599 vm_map_t submap;
3600
3601 submap = entry->object.sub_map;
3602 local_start = entry->vme_start;
3603 local_offset = entry->offset;
3604
3605 vm_map_reference(submap);
3606 vm_map_unlock(map);
3607
3608 ret = vm_map_create_upl(submap,
3609 local_offset + (offset - local_start),
3610 upl_size, upl, page_list, count, flags);
3611 vm_map_deallocate(submap);
3612
3613 return ret;
3614 }
3615 if (sync_cow_data) {
3616 if (entry->object.vm_object->shadow || entry->object.vm_object->copy) {
3617 local_object = entry->object.vm_object;
3618 local_start = entry->vme_start;
3619 local_offset = entry->offset;
3620
3621 vm_object_reference(local_object);
3622 vm_map_unlock(map);
3623
3624 if (entry->object.vm_object->shadow && entry->object.vm_object->copy) {
3625 vm_object_lock_request(
3626 local_object->shadow,
3627 (vm_object_offset_t)
3628 ((offset - local_start) +
3629 local_offset) +
3630 local_object->shadow_offset,
3631 *upl_size, FALSE,
3632 MEMORY_OBJECT_DATA_SYNC,
3633 VM_PROT_NO_CHANGE);
3634 }
3635 sync_cow_data = FALSE;
3636 vm_object_deallocate(local_object);
3637
3638 goto REDISCOVER_ENTRY;
3639 }
3640 }
3641 if (force_data_sync) {
3642 local_object = entry->object.vm_object;
3643 local_start = entry->vme_start;
3644 local_offset = entry->offset;
3645
3646 vm_object_reference(local_object);
3647 vm_map_unlock(map);
3648
3649 vm_object_lock_request(
3650 local_object,
3651 (vm_object_offset_t)
3652 ((offset - local_start) + local_offset),
3653 (vm_object_size_t)*upl_size, FALSE,
3654 MEMORY_OBJECT_DATA_SYNC,
3655 VM_PROT_NO_CHANGE);
3656
3657 force_data_sync = FALSE;
3658 vm_object_deallocate(local_object);
3659
3660 goto REDISCOVER_ENTRY;
3661 }
3662 if (entry->object.vm_object->private)
3663 *flags = UPL_DEV_MEMORY;
3664 else
3665 *flags = 0;
3666
3667 if (entry->object.vm_object->phys_contiguous)
3668 *flags |= UPL_PHYS_CONTIG;
3669
3670 local_object = entry->object.vm_object;
3671 local_offset = entry->offset;
3672 local_start = entry->vme_start;
3673
3674 vm_object_reference(local_object);
3675 vm_map_unlock(map);
3676
3677 ret = vm_object_iopl_request(local_object,
3678 (vm_object_offset_t) ((offset - local_start) + local_offset),
3679 *upl_size,
3680 upl,
3681 page_list,
3682 count,
3683 caller_flags);
3684 vm_object_deallocate(local_object);
3685
3686 return(ret);
3687 }
3688 vm_map_unlock(map);
3689
3690 return(KERN_FAILURE);
3691 }
3692
3693 /*
3694 * Internal routine to enter a UPL into a VM map.
3695 *
3696 * JMM - This should just be doable through the standard
3697 * vm_map_enter() API.
3698 */
3699 kern_return_t
3700 vm_map_enter_upl(
3701 vm_map_t map,
3702 upl_t upl,
3703 vm_map_offset_t *dst_addr)
3704 {
3705 vm_map_size_t size;
3706 vm_object_offset_t offset;
3707 vm_map_offset_t addr;
3708 vm_page_t m;
3709 kern_return_t kr;
3710
3711 if (upl == UPL_NULL)
3712 return KERN_INVALID_ARGUMENT;
3713
3714 upl_lock(upl);
3715
3716 /*
3717 * check to see if already mapped
3718 */
3719 if (UPL_PAGE_LIST_MAPPED & upl->flags) {
3720 upl_unlock(upl);
3721 return KERN_FAILURE;
3722 }
3723
3724 if ((!(upl->flags & UPL_SHADOWED)) && !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) ||
3725 (upl->map_object->phys_contiguous))) {
3726 vm_object_t object;
3727 vm_page_t alias_page;
3728 vm_object_offset_t new_offset;
3729 int pg_num;
3730 wpl_array_t lite_list;
3731
3732 if (upl->flags & UPL_INTERNAL) {
3733 lite_list = (wpl_array_t)
3734 ((((uintptr_t)upl) + sizeof(struct upl))
3735 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3736 } else {
3737 lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
3738 }
3739 object = upl->map_object;
3740 upl->map_object = vm_object_allocate(upl->size);
3741
3742 vm_object_lock(upl->map_object);
3743
3744 upl->map_object->shadow = object;
3745 upl->map_object->pageout = TRUE;
3746 upl->map_object->can_persist = FALSE;
3747 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
3748 upl->map_object->shadow_offset = upl->offset - object->paging_offset;
3749 upl->map_object->wimg_bits = object->wimg_bits;
3750 offset = upl->map_object->shadow_offset;
3751 new_offset = 0;
3752 size = upl->size;
3753
3754 upl->flags |= UPL_SHADOWED;
3755
3756 while (size) {
3757 pg_num = (new_offset)/PAGE_SIZE;
3758
3759 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3760
3761 VM_PAGE_GRAB_FICTITIOUS(alias_page);
3762
3763 vm_object_lock(object);
3764
3765 m = vm_page_lookup(object, offset);
3766 if (m == VM_PAGE_NULL) {
3767 panic("vm_upl_map: page missing\n");
3768 }
3769
3770 /*
3771 * Convert the fictitious page to a private
3772 * shadow of the real page.
3773 */
3774 assert(alias_page->fictitious);
3775 alias_page->fictitious = FALSE;
3776 alias_page->private = TRUE;
3777 alias_page->pageout = TRUE;
3778 /*
3779 * since m is a page in the upl it must
3780 * already be wired or BUSY, so it's
3781 * safe to assign the underlying physical
3782 * page to the alias
3783 */
3784 alias_page->phys_page = m->phys_page;
3785
3786 vm_object_unlock(object);
3787
3788 vm_page_lockspin_queues();
3789 vm_page_wire(alias_page);
3790 vm_page_unlock_queues();
3791
3792 /*
3793 * ENCRYPTED SWAP:
3794 * The virtual page ("m") has to be wired in some way
3795 * here or its physical page ("m->phys_page") could
3796 * be recycled at any time.
3797 * Assuming this is enforced by the caller, we can't
3798 * get an encrypted page here. Since the encryption
3799 * key depends on the VM page's "pager" object and
3800 * the "paging_offset", we couldn't handle 2 pageable
3801 * VM pages (with different pagers and paging_offsets)
3802 * sharing the same physical page: we could end up
3803 * encrypting with one key (via one VM page) and
3804 * decrypting with another key (via the alias VM page).
3805 */
3806 ASSERT_PAGE_DECRYPTED(m);
3807
3808 vm_page_insert(alias_page, upl->map_object, new_offset);
3809
3810 assert(!alias_page->wanted);
3811 alias_page->busy = FALSE;
3812 alias_page->absent = FALSE;
3813 }
3814 size -= PAGE_SIZE;
3815 offset += PAGE_SIZE_64;
3816 new_offset += PAGE_SIZE_64;
3817 }
3818 vm_object_unlock(upl->map_object);
3819 }
3820 if ((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || upl->map_object->phys_contiguous)
3821 offset = upl->offset - upl->map_object->paging_offset;
3822 else
3823 offset = 0;
3824 size = upl->size;
3825
3826 vm_object_reference(upl->map_object);
3827
3828 *dst_addr = 0;
3829 /*
3830 * NEED A UPL_MAP ALIAS
3831 */
3832 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
3833 VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE,
3834 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
3835
3836 if (kr != KERN_SUCCESS) {
3837 upl_unlock(upl);
3838 return(kr);
3839 }
3840 vm_object_lock(upl->map_object);
3841
3842 for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
3843 m = vm_page_lookup(upl->map_object, offset);
3844
3845 if (m) {
3846 unsigned int cache_attr;
3847 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3848
3849 m->pmapped = TRUE;
3850 m->wpmapped = TRUE;
3851
3852 PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, cache_attr, TRUE);
3853 }
3854 offset += PAGE_SIZE_64;
3855 }
3856 vm_object_unlock(upl->map_object);
3857
3858 /*
3859 * hold a reference for the mapping
3860 */
3861 upl->ref_count++;
3862 upl->flags |= UPL_PAGE_LIST_MAPPED;
3863 upl->kaddr = *dst_addr;
3864 upl_unlock(upl);
3865
3866 return KERN_SUCCESS;
3867 }
3868
3869 /*
3870 * Internal routine to remove a UPL mapping from a VM map.
3871 *
3872 * XXX - This should just be doable through a standard
3873 * vm_map_remove() operation. Otherwise, implicit clean-up
3874 * of the target map won't be able to correctly remove
3875 * these (and release the reference on the UPL). Having
3876 * to do this means we can't map these into user-space
3877 * maps yet.
3878 */
3879 kern_return_t
3880 vm_map_remove_upl(
3881 vm_map_t map,
3882 upl_t upl)
3883 {
3884 vm_address_t addr;
3885 upl_size_t size;
3886
3887 if (upl == UPL_NULL)
3888 return KERN_INVALID_ARGUMENT;
3889
3890 upl_lock(upl);
3891
3892 if (upl->flags & UPL_PAGE_LIST_MAPPED) {
3893 addr = upl->kaddr;
3894 size = upl->size;
3895
3896 assert(upl->ref_count > 1);
3897 upl->ref_count--; /* removing mapping ref */
3898
3899 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
3900 upl->kaddr = (vm_offset_t) 0;
3901 upl_unlock(upl);
3902
3903 vm_map_remove(map,
3904 vm_map_trunc_page(addr),
3905 vm_map_round_page(addr + size),
3906 VM_MAP_NO_FLAGS);
3907
3908 return KERN_SUCCESS;
3909 }
3910 upl_unlock(upl);
3911
3912 return KERN_FAILURE;
3913 }
3914
3915 kern_return_t
3916 upl_commit_range(
3917 upl_t upl,
3918 upl_offset_t offset,
3919 upl_size_t size,
3920 int flags,
3921 upl_page_info_t *page_list,
3922 mach_msg_type_number_t count,
3923 boolean_t *empty)
3924 {
3925 upl_size_t xfer_size;
3926 vm_object_t shadow_object;
3927 vm_object_t object;
3928 vm_object_offset_t target_offset;
3929 int entry;
3930 wpl_array_t lite_list;
3931 int occupied;
3932 int delayed_unlock = 0;
3933 int clear_refmod = 0;
3934 int pgpgout_count = 0;
3935 int j;
3936
3937 *empty = FALSE;
3938
3939 if (upl == UPL_NULL)
3940 return KERN_INVALID_ARGUMENT;
3941
3942 if (count == 0)
3943 page_list = NULL;
3944
3945 if (upl->flags & UPL_DEVICE_MEMORY)
3946 xfer_size = 0;
3947 else if ((offset + size) <= upl->size)
3948 xfer_size = size;
3949 else
3950 return KERN_FAILURE;
3951
3952 upl_lock(upl);
3953
3954 if (upl->flags & UPL_ACCESS_BLOCKED) {
3955 /*
3956 * We used this UPL to block access to the pages by marking
3957 * them "busy". Now we need to clear the "busy" bit to allow
3958 * access to these pages again.
3959 */
3960 flags |= UPL_COMMIT_ALLOW_ACCESS;
3961 }
3962 if (upl->flags & UPL_CLEAR_DIRTY)
3963 flags |= UPL_COMMIT_CLEAR_DIRTY;
3964
3965 if (upl->flags & UPL_INTERNAL)
3966 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
3967 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3968 else
3969 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
3970
3971 object = upl->map_object;
3972
3973 if (upl->flags & UPL_SHADOWED) {
3974 vm_object_lock(object);
3975 shadow_object = object->shadow;
3976 } else {
3977 shadow_object = object;
3978 }
3979 entry = offset/PAGE_SIZE;
3980 target_offset = (vm_object_offset_t)offset;
3981
3982 /*
3983 * pageout_scan takes the vm_page_lock_queues first
3984 * then tries for the object lock... to avoid what
3985 * is effectively a lock inversion, we'll go to the
3986 * trouble of taking them in that same order... otherwise
3987 * if this object contains the majority of the pages resident
3988 * in the UBC (or a small set of large objects actively being
3989 * worked on contain the majority of the pages), we could
3990 * cause the pageout_scan thread to 'starve' in its attempt
3991 * to find pages to move to the free queue, since it has to
3992 * successfully acquire the object lock of any candidate page
3993 * before it can steal/clean it.
3994 */
3995 for (j = 0; ; j++) {
3996 vm_page_lock_queues();
3997
3998 if (vm_object_lock_try(shadow_object))
3999 break;
4000 vm_page_unlock_queues();
4001 mutex_pause(j);
4002 }
4003 delayed_unlock = 1;
4004
4005 while (xfer_size) {
4006 vm_page_t t, m;
4007
4008 m = VM_PAGE_NULL;
4009
4010 if (upl->flags & UPL_LITE) {
4011 int pg_num;
4012
4013 pg_num = target_offset/PAGE_SIZE;
4014
4015 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4016 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
4017
4018 m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
4019 }
4020 }
4021 if (upl->flags & UPL_SHADOWED) {
4022 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
4023
4024 t->pageout = FALSE;
4025
4026 vm_page_free(t);
4027
4028 if (m == VM_PAGE_NULL)
4029 m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset);
4030 }
4031 }
4032 if (m != VM_PAGE_NULL) {
4033
4034 clear_refmod = 0;
4035
4036 if (upl->flags & UPL_IO_WIRE) {
4037
4038 vm_page_unwire(m);
4039
4040 if (page_list)
4041 page_list[entry].phys_addr = 0;
4042
4043 if (flags & UPL_COMMIT_SET_DIRTY)
4044 m->dirty = TRUE;
4045 else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
4046 m->dirty = FALSE;
4047 if (m->cs_validated && !m->cs_tainted) {
4048 /*
4049 * CODE SIGNING:
4050 * This page is no longer dirty
4051 * but could have been modified,
4052 * so it will need to be
4053 * re-validated.
4054 */
4055 m->cs_validated = FALSE;
4056 vm_cs_validated_resets++;
4057 }
4058 clear_refmod |= VM_MEM_MODIFIED;
4059 }
4060 if (flags & UPL_COMMIT_INACTIVATE)
4061 vm_page_deactivate(m);
4062
4063 if (clear_refmod)
4064 pmap_clear_refmod(m->phys_page, clear_refmod);
4065
4066 if (flags & UPL_COMMIT_ALLOW_ACCESS) {
4067 /*
4068 * We blocked access to the pages in this UPL.
4069 * Clear the "busy" bit and wake up any waiter
4070 * for this page.
4071 */
4072 PAGE_WAKEUP_DONE(m);
4073 }
4074 goto commit_next_page;
4075 }
4076 /*
4077 * make sure to clear the hardware
4078 * modify or reference bits before
4079 * releasing the BUSY bit on this page
4080 * otherwise we risk losing a legitimate
4081 * change of state
4082 */
4083 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
4084 m->dirty = FALSE;
4085 if (m->cs_validated && !m->cs_tainted) {
4086 /*
4087 * CODE SIGNING:
4088 * This page is no longer dirty
4089 * but could have been modified,
4090 * so it will need to be
4091 * re-validated.
4092 */
4093 m->cs_validated = FALSE;
4094 vm_cs_validated_resets++;
4095 }
4096 clear_refmod |= VM_MEM_MODIFIED;
4097 }
4098 if (clear_refmod)
4099 pmap_clear_refmod(m->phys_page, clear_refmod);
4100
4101 if (page_list) {
4102 upl_page_info_t *p;
4103
4104 p = &(page_list[entry]);
4105
4106 if (p->phys_addr && p->pageout && !m->pageout) {
4107 m->busy = TRUE;
4108 m->pageout = TRUE;
4109 vm_page_wire(m);
4110 } else if (p->phys_addr &&
4111 !p->pageout && m->pageout &&
4112 !m->dump_cleaning) {
4113 m->pageout = FALSE;
4114 m->absent = FALSE;
4115 m->overwriting = FALSE;
4116 vm_page_unwire(m);
4117
4118 PAGE_WAKEUP_DONE(m);
4119 }
4120 page_list[entry].phys_addr = 0;
4121 }
4122 m->dump_cleaning = FALSE;
4123
4124 if (m->laundry)
4125 vm_pageout_throttle_up(m);
4126
4127 if (m->pageout) {
4128 m->cleaning = FALSE;
4129 m->encrypted_cleaning = FALSE;
4130 m->pageout = FALSE;
4131 #if MACH_CLUSTER_STATS
4132 if (m->wanted) vm_pageout_target_collisions++;
4133 #endif
4134 m->dirty = FALSE;
4135 if (m->cs_validated && !m->cs_tainted) {
4136 /*
4137 * CODE SIGNING:
4138 * This page is no longer dirty
4139 * but could have been modified,
4140 * so it will need to be
4141 * re-validated.
4142 */
4143 m->cs_validated = FALSE;
4144 vm_cs_validated_resets++;
4145 }
4146
4147 if (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))
4148 m->dirty = TRUE;
4149
4150 if (m->dirty) {
4151 /*
4152 * page was re-dirtied after we started
4153 * the pageout... reactivate it since
4154 * we don't know whether the on-disk
4155 * copy matches what is now in memory
4156 */
4157 vm_page_unwire(m);
4158
4159 if (upl->flags & UPL_PAGEOUT) {
4160 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
4161 VM_STAT_INCR(reactivations);
4162 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
4163 }
4164 PAGE_WAKEUP_DONE(m);
4165 } else {
4166 /*
4167 * page has been successfully cleaned
4168 * go ahead and free it for other use
4169 */
4170
4171 if (m->object->internal) {
4172 DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
4173 } else {
4174 DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
4175 }
4176
4177 vm_page_free(m);
4178
4179 if (upl->flags & UPL_PAGEOUT) {
4180 CLUSTER_STAT(vm_pageout_target_page_freed++;)
4181
4182 if (page_list[entry].dirty) {
4183 VM_STAT_INCR(pageouts);
4184 DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
4185 pgpgout_count++;
4186 }
4187 }
4188 }
4189 goto commit_next_page;
4190 }
4191 #if MACH_CLUSTER_STATS
4192 if (m->wpmapped)
4193 m->dirty = pmap_is_modified(m->phys_page);
4194
4195 if (m->dirty) vm_pageout_cluster_dirtied++;
4196 else vm_pageout_cluster_cleaned++;
4197 if (m->wanted) vm_pageout_cluster_collisions++;
4198 #endif
4199 m->dirty = FALSE;
4200 if (m->cs_validated && !m->cs_tainted) {
4201 /*
4202 * CODE SIGNING:
4203 * This page is no longer dirty
4204 * but could have been modified,
4205 * so it will need to be
4206 * re-validated.
4207 */
4208 m->cs_validated = FALSE;
4209 vm_cs_validated_resets++;
4210 }
4211
4212 if ((m->busy) && (m->cleaning)) {
4213 /*
4214 * the request_page_list case
4215 */
4216 m->absent = FALSE;
4217 m->overwriting = FALSE;
4218 m->busy = FALSE;
4219 } else if (m->overwriting) {
4220 /*
4221 * alternate request page list, write to
4222 * page_list case. Occurs when the original
4223 * page was wired at the time of the list
4224 * request
4225 */
4226 assert(m->wire_count != 0);
4227 vm_page_unwire(m);/* reactivates */
4228 m->overwriting = FALSE;
4229 }
4230 m->cleaning = FALSE;
4231 m->encrypted_cleaning = FALSE;
4232
4233 /*
4234 * It is a part of the semantic of COPYOUT_FROM
4235 * UPLs that a commit implies cache sync
4236 * between the vm page and the backing store
4237 * this can be used to strip the precious bit
4238 * as well as clean
4239 */
4240 if (upl->flags & UPL_PAGE_SYNC_DONE)
4241 m->precious = FALSE;
4242
4243 if (flags & UPL_COMMIT_SET_DIRTY)
4244 m->dirty = TRUE;
4245
4246 if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) {
4247 vm_page_deactivate(m);
4248 } else if (!m->active && !m->inactive && !m->speculative) {
4249
4250 if (m->clustered)
4251 vm_page_speculate(m, TRUE);
4252 else if (m->reference)
4253 vm_page_activate(m);
4254 else
4255 vm_page_deactivate(m);
4256 }
4257 if (flags & UPL_COMMIT_ALLOW_ACCESS) {
4258 /*
4259 * We blocked access to the pages in this URL.
4260 * Clear the "busy" bit on this page before we
4261 * wake up any waiter.
4262 */
4263 m->busy = FALSE;
4264 }
4265 /*
4266 * Wakeup any thread waiting for the page to be un-cleaning.
4267 */
4268 PAGE_WAKEUP(m);
4269 }
4270 commit_next_page:
4271 target_offset += PAGE_SIZE_64;
4272 xfer_size -= PAGE_SIZE;
4273 entry++;
4274
4275 if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
4276 /*
4277 * pageout_scan takes the vm_page_lock_queues first
4278 * then tries for the object lock... to avoid what
4279 * is effectively a lock inversion, we'll go to the
4280 * trouble of taking them in that same order... otherwise
4281 * if this object contains the majority of the pages resident
4282 * in the UBC (or a small set of large objects actively being
4283 * worked on contain the majority of the pages), we could
4284 * cause the pageout_scan thread to 'starve' in its attempt
4285 * to find pages to move to the free queue, since it has to
4286 * successfully acquire the object lock of any candidate page
4287 * before it can steal/clean it.
4288 */
4289 vm_object_unlock(shadow_object);
4290 mutex_yield(&vm_page_queue_lock);
4291
4292 for (j = 0; ; j++) {
4293 if (vm_object_lock_try(shadow_object))
4294 break;
4295 vm_page_unlock_queues();
4296 mutex_pause(j);
4297 vm_page_lock_queues();
4298 }
4299 delayed_unlock = 1;
4300 }
4301 }
4302 if (delayed_unlock)
4303 vm_page_unlock_queues();
4304
4305 occupied = 1;
4306
4307 if (upl->flags & UPL_DEVICE_MEMORY) {
4308 occupied = 0;
4309 } else if (upl->flags & UPL_LITE) {
4310 int pg_num;
4311 int i;
4312
4313 pg_num = upl->size/PAGE_SIZE;
4314 pg_num = (pg_num + 31) >> 5;
4315 occupied = 0;
4316
4317 for (i = 0; i < pg_num; i++) {
4318 if (lite_list[i] != 0) {
4319 occupied = 1;
4320 break;
4321 }
4322 }
4323 } else {
4324 if (queue_empty(&upl->map_object->memq))
4325 occupied = 0;
4326 }
4327 if (occupied == 0) {
4328 if (upl->flags & UPL_COMMIT_NOTIFY_EMPTY)
4329 *empty = TRUE;
4330
4331 if (object == shadow_object) {
4332 /*
4333 * this is not a paging object
4334 * so we need to drop the paging reference
4335 * that was taken when we created the UPL
4336 * against this object
4337 */
4338 vm_object_paging_end(shadow_object);
4339 } else {
4340 /*
4341 * we dontated the paging reference to
4342 * the map object... vm_pageout_object_terminate
4343 * will drop this reference
4344 */
4345 }
4346 }
4347 vm_object_unlock(shadow_object);
4348 if (object != shadow_object)
4349 vm_object_unlock(object);
4350 upl_unlock(upl);
4351
4352 if (pgpgout_count) {
4353 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
4354 }
4355
4356 return KERN_SUCCESS;
4357 }
4358
4359 kern_return_t
4360 upl_abort_range(
4361 upl_t upl,
4362 upl_offset_t offset,
4363 upl_size_t size,
4364 int error,
4365 boolean_t *empty)
4366 {
4367 upl_size_t xfer_size;
4368 vm_object_t shadow_object;
4369 vm_object_t object;
4370 vm_object_offset_t target_offset;
4371 int entry;
4372 wpl_array_t lite_list;
4373 int occupied;
4374 int delayed_unlock = 0;
4375 int j;
4376
4377 *empty = FALSE;
4378
4379 if (upl == UPL_NULL)
4380 return KERN_INVALID_ARGUMENT;
4381
4382 if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
4383 return upl_commit_range(upl, offset, size, 0, NULL, 0, empty);
4384
4385 if (upl->flags & UPL_DEVICE_MEMORY)
4386 xfer_size = 0;
4387 else if ((offset + size) <= upl->size)
4388 xfer_size = size;
4389 else
4390 return KERN_FAILURE;
4391
4392 upl_lock(upl);
4393
4394 if (upl->flags & UPL_INTERNAL) {
4395 lite_list = (wpl_array_t)
4396 ((((uintptr_t)upl) + sizeof(struct upl))
4397 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4398 } else {
4399 lite_list = (wpl_array_t)
4400 (((uintptr_t)upl) + sizeof(struct upl));
4401 }
4402 object = upl->map_object;
4403
4404 if (upl->flags & UPL_SHADOWED) {
4405 vm_object_lock(object);
4406 shadow_object = object->shadow;
4407 } else
4408 shadow_object = object;
4409
4410 entry = offset/PAGE_SIZE;
4411 target_offset = (vm_object_offset_t)offset;
4412
4413 /*
4414 * pageout_scan takes the vm_page_lock_queues first
4415 * then tries for the object lock... to avoid what
4416 * is effectively a lock inversion, we'll go to the
4417 * trouble of taking them in that same order... otherwise
4418 * if this object contains the majority of the pages resident
4419 * in the UBC (or a small set of large objects actively being
4420 * worked on contain the majority of the pages), we could
4421 * cause the pageout_scan thread to 'starve' in its attempt
4422 * to find pages to move to the free queue, since it has to
4423 * successfully acquire the object lock of any candidate page
4424 * before it can steal/clean it.
4425 */
4426 for (j = 0; ; j++) {
4427 vm_page_lock_queues();
4428
4429 if (vm_object_lock_try(shadow_object))
4430 break;
4431 vm_page_unlock_queues();
4432 mutex_pause(j);
4433 }
4434 delayed_unlock = 1;
4435
4436 while (xfer_size) {
4437 vm_page_t t, m;
4438
4439 m = VM_PAGE_NULL;
4440
4441 if (upl->flags & UPL_LITE) {
4442 int pg_num;
4443 pg_num = target_offset/PAGE_SIZE;
4444
4445 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4446 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
4447
4448 m = vm_page_lookup(shadow_object, target_offset +
4449 (upl->offset - shadow_object->paging_offset));
4450 }
4451 }
4452 if (upl->flags & UPL_SHADOWED) {
4453 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
4454 t->pageout = FALSE;
4455
4456 vm_page_free(t);
4457
4458 if (m == VM_PAGE_NULL)
4459 m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset);
4460 }
4461 }
4462 if (m != VM_PAGE_NULL) {
4463
4464 if (m->absent) {
4465 boolean_t must_free = TRUE;
4466
4467 m->clustered = FALSE;
4468 /*
4469 * COPYOUT = FALSE case
4470 * check for error conditions which must
4471 * be passed back to the pages customer
4472 */
4473 if (error & UPL_ABORT_RESTART) {
4474 m->restart = TRUE;
4475 m->absent = FALSE;
4476 m->error = TRUE;
4477 m->unusual = TRUE;
4478 must_free = FALSE;
4479 } else if (error & UPL_ABORT_UNAVAILABLE) {
4480 m->restart = FALSE;
4481 m->unusual = TRUE;
4482 must_free = FALSE;
4483 } else if (error & UPL_ABORT_ERROR) {
4484 m->restart = FALSE;
4485 m->absent = FALSE;
4486 m->error = TRUE;
4487 m->unusual = TRUE;
4488 must_free = FALSE;
4489 }
4490
4491 /*
4492 * ENCRYPTED SWAP:
4493 * If the page was already encrypted,
4494 * we don't really need to decrypt it
4495 * now. It will get decrypted later,
4496 * on demand, as soon as someone needs
4497 * to access its contents.
4498 */
4499
4500 m->cleaning = FALSE;
4501 m->encrypted_cleaning = FALSE;
4502 m->overwriting = FALSE;
4503 PAGE_WAKEUP_DONE(m);
4504
4505 if (must_free == TRUE)
4506 vm_page_free(m);
4507 else
4508 vm_page_activate(m);
4509 } else {
4510 /*
4511 * Handle the trusted pager throttle.
4512 */
4513 if (m->laundry)
4514 vm_pageout_throttle_up(m);
4515
4516 if (m->pageout) {
4517 assert(m->busy);
4518 assert(m->wire_count == 1);
4519 m->pageout = FALSE;
4520 vm_page_unwire(m);
4521 }
4522 m->dump_cleaning = FALSE;
4523 m->cleaning = FALSE;
4524 m->encrypted_cleaning = FALSE;
4525 m->overwriting = FALSE;
4526 #if MACH_PAGEMAP
4527 vm_external_state_clr(m->object->existence_map, m->offset);
4528 #endif /* MACH_PAGEMAP */
4529 if (error & UPL_ABORT_DUMP_PAGES) {
4530 pmap_disconnect(m->phys_page);
4531 vm_page_free(m);
4532 } else {
4533 if (error & UPL_ABORT_REFERENCE) {
4534 /*
4535 * we've been told to explictly
4536 * reference this page... for
4537 * file I/O, this is done by
4538 * implementing an LRU on the inactive q
4539 */
4540 vm_page_lru(m);
4541 }
4542 PAGE_WAKEUP_DONE(m);
4543 }
4544 }
4545 }
4546 if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
4547 /*
4548 * pageout_scan takes the vm_page_lock_queues first
4549 * then tries for the object lock... to avoid what
4550 * is effectively a lock inversion, we'll go to the
4551 * trouble of taking them in that same order... otherwise
4552 * if this object contains the majority of the pages resident
4553 * in the UBC (or a small set of large objects actively being
4554 * worked on contain the majority of the pages), we could
4555 * cause the pageout_scan thread to 'starve' in its attempt
4556 * to find pages to move to the free queue, since it has to
4557 * successfully acquire the object lock of any candidate page
4558 * before it can steal/clean it.
4559 */
4560 vm_object_unlock(shadow_object);
4561 mutex_yield(&vm_page_queue_lock);
4562
4563 for (j = 0; ; j++) {
4564 if (vm_object_lock_try(shadow_object))
4565 break;
4566 vm_page_unlock_queues();
4567 mutex_pause(j);
4568 vm_page_lock_queues();
4569 }
4570 delayed_unlock = 1;
4571 }
4572 target_offset += PAGE_SIZE_64;
4573 xfer_size -= PAGE_SIZE;
4574 entry++;
4575 }
4576 if (delayed_unlock)
4577 vm_page_unlock_queues();
4578
4579 occupied = 1;
4580
4581 if (upl->flags & UPL_DEVICE_MEMORY) {
4582 occupied = 0;
4583 } else if (upl->flags & UPL_LITE) {
4584 int pg_num;
4585 int i;
4586
4587 pg_num = upl->size/PAGE_SIZE;
4588 pg_num = (pg_num + 31) >> 5;
4589 occupied = 0;
4590
4591 for (i = 0; i < pg_num; i++) {
4592 if (lite_list[i] != 0) {
4593 occupied = 1;
4594 break;
4595 }
4596 }
4597 } else {
4598 if (queue_empty(&upl->map_object->memq))
4599 occupied = 0;
4600 }
4601 if (occupied == 0) {
4602 if (upl->flags & UPL_COMMIT_NOTIFY_EMPTY)
4603 *empty = TRUE;
4604
4605 if (object == shadow_object) {
4606 /*
4607 * this is not a paging object
4608 * so we need to drop the paging reference
4609 * that was taken when we created the UPL
4610 * against this object
4611 */
4612 vm_object_paging_end(shadow_object);
4613 } else {
4614 /*
4615 * we dontated the paging reference to
4616 * the map object... vm_pageout_object_terminate
4617 * will drop this reference
4618 */
4619 }
4620 }
4621 vm_object_unlock(shadow_object);
4622 if (object != shadow_object)
4623 vm_object_unlock(object);
4624 upl_unlock(upl);
4625
4626 return KERN_SUCCESS;
4627 }
4628
4629
4630 kern_return_t
4631 upl_abort(
4632 upl_t upl,
4633 int error)
4634 {
4635 boolean_t empty;
4636
4637 return upl_abort_range(upl, 0, upl->size, error, &empty);
4638 }
4639
4640
4641 /* an option on commit should be wire */
4642 kern_return_t
4643 upl_commit(
4644 upl_t upl,
4645 upl_page_info_t *page_list,
4646 mach_msg_type_number_t count)
4647 {
4648 boolean_t empty;
4649
4650 return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
4651 }
4652
4653
4654 kern_return_t
4655 vm_object_iopl_request(
4656 vm_object_t object,
4657 vm_object_offset_t offset,
4658 upl_size_t size,
4659 upl_t *upl_ptr,
4660 upl_page_info_array_t user_page_list,
4661 unsigned int *page_list_count,
4662 int cntrl_flags)
4663 {
4664 vm_page_t dst_page;
4665 vm_object_offset_t dst_offset;
4666 upl_size_t xfer_size;
4667 upl_t upl = NULL;
4668 unsigned int entry;
4669 wpl_array_t lite_list = NULL;
4670 int delayed_unlock = 0;
4671 int no_zero_fill = FALSE;
4672 u_int32_t psize;
4673 kern_return_t ret;
4674 vm_prot_t prot;
4675 struct vm_object_fault_info fault_info;
4676
4677
4678 if (cntrl_flags & ~UPL_VALID_FLAGS) {
4679 /*
4680 * For forward compatibility's sake,
4681 * reject any unknown flag.
4682 */
4683 return KERN_INVALID_VALUE;
4684 }
4685 if (vm_lopage_poolsize == 0)
4686 cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
4687
4688 if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
4689 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE))
4690 return KERN_INVALID_VALUE;
4691
4692 if (object->phys_contiguous) {
4693 if ((offset + object->shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
4694 return KERN_INVALID_ADDRESS;
4695
4696 if (((offset + object->shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
4697 return KERN_INVALID_ADDRESS;
4698 }
4699 }
4700
4701 if (cntrl_flags & UPL_ENCRYPT) {
4702 /*
4703 * ENCRYPTED SWAP:
4704 * The paging path doesn't use this interface,
4705 * so we don't support the UPL_ENCRYPT flag
4706 * here. We won't encrypt the pages.
4707 */
4708 assert(! (cntrl_flags & UPL_ENCRYPT));
4709 }
4710 if (cntrl_flags & UPL_NOZEROFILL)
4711 no_zero_fill = TRUE;
4712
4713 if (cntrl_flags & UPL_COPYOUT_FROM)
4714 prot = VM_PROT_READ;
4715 else
4716 prot = VM_PROT_READ | VM_PROT_WRITE;
4717
4718 if (((size/page_size) > MAX_UPL_TRANSFER) && !object->phys_contiguous)
4719 size = MAX_UPL_TRANSFER * page_size;
4720
4721 if (cntrl_flags & UPL_SET_INTERNAL) {
4722 if (page_list_count != NULL)
4723 *page_list_count = MAX_UPL_TRANSFER;
4724 }
4725 if (((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
4726 ((page_list_count != NULL) && (*page_list_count != 0) && *page_list_count < (size/page_size)))
4727 return KERN_INVALID_ARGUMENT;
4728
4729 if ((!object->internal) && (object->paging_offset != 0))
4730 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
4731
4732
4733 if (object->phys_contiguous)
4734 psize = PAGE_SIZE;
4735 else
4736 psize = size;
4737
4738 if (cntrl_flags & UPL_SET_INTERNAL) {
4739 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, UPL_IO_WIRE, psize);
4740
4741 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
4742 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
4743 ((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
4744 } else {
4745 upl = upl_create(UPL_CREATE_LITE, UPL_IO_WIRE, psize);
4746
4747 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
4748 }
4749 if (user_page_list)
4750 user_page_list[0].device = FALSE;
4751 *upl_ptr = upl;
4752
4753 upl->map_object = object;
4754 upl->size = size;
4755
4756 vm_object_lock(object);
4757 vm_object_paging_begin(object);
4758 /*
4759 * paging in progress also protects the paging_offset
4760 */
4761 upl->offset = offset + object->paging_offset;
4762
4763 if (object->phys_contiguous) {
4764 #ifdef UPL_DEBUG
4765 queue_enter(&object->uplq, upl, upl_t, uplq);
4766 #endif /* UPL_DEBUG */
4767
4768 vm_object_unlock(object);
4769
4770 /*
4771 * don't need any shadow mappings for this one
4772 * since it is already I/O memory
4773 */
4774 upl->flags |= UPL_DEVICE_MEMORY;
4775
4776 upl->highest_page = (offset + object->shadow_offset + size - 1)>>PAGE_SHIFT;
4777
4778 if (user_page_list) {
4779 user_page_list[0].phys_addr = (offset + object->shadow_offset)>>PAGE_SHIFT;
4780 user_page_list[0].device = TRUE;
4781 }
4782 if (page_list_count != NULL) {
4783 if (upl->flags & UPL_INTERNAL)
4784 *page_list_count = 0;
4785 else
4786 *page_list_count = 1;
4787 }
4788 return KERN_SUCCESS;
4789 }
4790 /*
4791 * Protect user space from future COW operations
4792 */
4793 object->true_share = TRUE;
4794
4795 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
4796 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4797
4798 #ifdef UPL_DEBUG
4799 queue_enter(&object->uplq, upl, upl_t, uplq);
4800 #endif /* UPL_DEBUG */
4801
4802 if (cntrl_flags & UPL_BLOCK_ACCESS) {
4803 /*
4804 * The user requested that access to the pages in this URL
4805 * be blocked until the UPL is commited or aborted.
4806 */
4807 upl->flags |= UPL_ACCESS_BLOCKED;
4808 }
4809 entry = 0;
4810
4811 xfer_size = size;
4812 dst_offset = offset;
4813
4814 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
4815 fault_info.user_tag = 0;
4816 fault_info.lo_offset = offset;
4817 fault_info.hi_offset = offset + xfer_size;
4818 fault_info.no_cache = FALSE;
4819
4820 while (xfer_size) {
4821 vm_fault_return_t result;
4822 int pg_num;
4823
4824 dst_page = vm_page_lookup(object, dst_offset);
4825
4826 /*
4827 * ENCRYPTED SWAP:
4828 * If the page is encrypted, we need to decrypt it,
4829 * so force a soft page fault.
4830 */
4831 if ((dst_page == VM_PAGE_NULL) || (dst_page->busy) ||
4832 (dst_page->encrypted) ||
4833 (dst_page->unusual && (dst_page->error ||
4834 dst_page->restart ||
4835 dst_page->absent ||
4836 dst_page->fictitious))) {
4837
4838 do {
4839 vm_page_t top_page;
4840 kern_return_t error_code;
4841 int interruptible;
4842
4843 if (delayed_unlock) {
4844 delayed_unlock = 0;
4845 vm_page_unlock_queues();
4846 }
4847 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
4848 interruptible = THREAD_ABORTSAFE;
4849 else
4850 interruptible = THREAD_UNINT;
4851
4852 fault_info.interruptible = interruptible;
4853 fault_info.cluster_size = xfer_size;
4854
4855 result = vm_fault_page(object, dst_offset,
4856 prot | VM_PROT_WRITE, FALSE,
4857 &prot, &dst_page, &top_page,
4858 (int *)0,
4859 &error_code, no_zero_fill,
4860 FALSE, &fault_info);
4861
4862 switch (result) {
4863
4864 case VM_FAULT_SUCCESS:
4865
4866 PAGE_WAKEUP_DONE(dst_page);
4867 /*
4868 * Release paging references and
4869 * top-level placeholder page, if any.
4870 */
4871 if (top_page != VM_PAGE_NULL) {
4872 vm_object_t local_object;
4873
4874 local_object = top_page->object;
4875
4876 if (top_page->object != dst_page->object) {
4877 vm_object_lock(local_object);
4878 VM_PAGE_FREE(top_page);
4879 vm_object_paging_end(local_object);
4880 vm_object_unlock(local_object);
4881 } else {
4882 VM_PAGE_FREE(top_page);
4883 vm_object_paging_end(local_object);
4884 }
4885 }
4886 break;
4887
4888 case VM_FAULT_RETRY:
4889 vm_object_lock(object);
4890 vm_object_paging_begin(object);
4891 break;
4892
4893 case VM_FAULT_FICTITIOUS_SHORTAGE:
4894 vm_page_more_fictitious();
4895
4896 vm_object_lock(object);
4897 vm_object_paging_begin(object);
4898 break;
4899
4900 case VM_FAULT_MEMORY_SHORTAGE:
4901 if (vm_page_wait(interruptible)) {
4902 vm_object_lock(object);
4903 vm_object_paging_begin(object);
4904 break;
4905 }
4906 /* fall thru */
4907
4908 case VM_FAULT_INTERRUPTED:
4909 error_code = MACH_SEND_INTERRUPTED;
4910 case VM_FAULT_MEMORY_ERROR:
4911 ret = (error_code ? error_code: KERN_MEMORY_ERROR);
4912
4913 vm_object_lock(object);
4914 vm_object_paging_begin(object);
4915 goto return_err;
4916 }
4917 } while (result != VM_FAULT_SUCCESS);
4918 }
4919
4920 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
4921 dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
4922 vm_page_t low_page;
4923 int refmod;
4924
4925 /*
4926 * support devices that can't DMA above 32 bits
4927 * by substituting pages from a pool of low address
4928 * memory for any pages we find above the 4G mark
4929 * can't substitute if the page is already wired because
4930 * we don't know whether that physical address has been
4931 * handed out to some other 64 bit capable DMA device to use
4932 */
4933 if (dst_page->wire_count) {
4934 ret = KERN_PROTECTION_FAILURE;
4935 goto return_err;
4936 }
4937 if (delayed_unlock) {
4938 delayed_unlock = 0;
4939 vm_page_unlock_queues();
4940 }
4941 low_page = vm_page_grablo();
4942
4943 if (low_page == VM_PAGE_NULL) {
4944 ret = KERN_RESOURCE_SHORTAGE;
4945 goto return_err;
4946 }
4947 /*
4948 * from here until the vm_page_replace completes
4949 * we musn't drop the object lock... we don't
4950 * want anyone refaulting this page in and using
4951 * it after we disconnect it... we want the fault
4952 * to find the new page being substituted.
4953 */
4954 if (dst_page->pmapped)
4955 refmod = pmap_disconnect(dst_page->phys_page);
4956 else
4957 refmod = 0;
4958 vm_page_copy(dst_page, low_page);
4959
4960 low_page->reference = dst_page->reference;
4961 low_page->dirty = dst_page->dirty;
4962
4963 if (refmod & VM_MEM_REFERENCED)
4964 low_page->reference = TRUE;
4965 if (refmod & VM_MEM_MODIFIED)
4966 low_page->dirty = TRUE;
4967
4968 vm_page_lock_queues();
4969 vm_page_replace(low_page, object, dst_offset);
4970 /*
4971 * keep the queue lock since we're going to
4972 * need it immediately
4973 */
4974 delayed_unlock = 1;
4975
4976 dst_page = low_page;
4977 /*
4978 * vm_page_grablo returned the page marked
4979 * BUSY... we don't need a PAGE_WAKEUP_DONE
4980 * here, because we've never dropped the object lock
4981 */
4982 dst_page->busy = FALSE;
4983 }
4984 if (delayed_unlock == 0)
4985 vm_page_lock_queues();
4986
4987 vm_page_wire(dst_page);
4988
4989 if (cntrl_flags & UPL_BLOCK_ACCESS) {
4990 /*
4991 * Mark the page "busy" to block any future page fault
4992 * on this page. We'll also remove the mapping
4993 * of all these pages before leaving this routine.
4994 */
4995 assert(!dst_page->fictitious);
4996 dst_page->busy = TRUE;
4997 }
4998 pg_num = (dst_offset-offset)/PAGE_SIZE;
4999 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5000
5001 /*
5002 * expect the page to be used
5003 * page queues lock must be held to set 'reference'
5004 */
5005 dst_page->reference = TRUE;
5006
5007 if (!(cntrl_flags & UPL_COPYOUT_FROM))
5008 dst_page->dirty = TRUE;
5009
5010 if (dst_page->phys_page > upl->highest_page)
5011 upl->highest_page = dst_page->phys_page;
5012
5013 if (user_page_list) {
5014 user_page_list[entry].phys_addr = dst_page->phys_page;
5015 user_page_list[entry].dirty = dst_page->dirty;
5016 user_page_list[entry].pageout = dst_page->pageout;
5017 user_page_list[entry].absent = dst_page->absent;
5018 user_page_list[entry].precious = dst_page->precious;
5019
5020 if (dst_page->clustered == TRUE)
5021 user_page_list[entry].speculative = dst_page->speculative;
5022 else
5023 user_page_list[entry].speculative = FALSE;
5024 }
5025 /*
5026 * someone is explicitly grabbing this page...
5027 * update clustered and speculative state
5028 *
5029 */
5030 VM_PAGE_CONSUME_CLUSTERED(dst_page);
5031
5032 if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
5033 mutex_yield(&vm_page_queue_lock);
5034 delayed_unlock = 1;
5035 }
5036 entry++;
5037 dst_offset += PAGE_SIZE_64;
5038 xfer_size -= PAGE_SIZE;
5039 }
5040 if (delayed_unlock)
5041 vm_page_unlock_queues();
5042
5043 if (page_list_count != NULL) {
5044 if (upl->flags & UPL_INTERNAL)
5045 *page_list_count = 0;
5046 else if (*page_list_count > entry)
5047 *page_list_count = entry;
5048 }
5049 vm_object_unlock(object);
5050
5051 if (cntrl_flags & UPL_BLOCK_ACCESS) {
5052 /*
5053 * We've marked all the pages "busy" so that future
5054 * page faults will block.
5055 * Now remove the mapping for these pages, so that they
5056 * can't be accessed without causing a page fault.
5057 */
5058 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
5059 PMAP_NULL, 0, VM_PROT_NONE);
5060 }
5061 return KERN_SUCCESS;
5062
5063 return_err:
5064 if (delayed_unlock)
5065 vm_page_unlock_queues();
5066
5067 for (; offset < dst_offset; offset += PAGE_SIZE) {
5068 dst_page = vm_page_lookup(object, offset);
5069
5070 if (dst_page == VM_PAGE_NULL)
5071 panic("vm_object_iopl_request: Wired pages missing. \n");
5072
5073 vm_page_lockspin_queues();
5074 vm_page_unwire(dst_page);
5075 vm_page_unlock_queues();
5076
5077 VM_STAT_INCR(reactivations);
5078 }
5079 vm_object_paging_end(object);
5080 vm_object_unlock(object);
5081 upl_destroy(upl);
5082
5083 return ret;
5084 }
5085
5086 kern_return_t
5087 upl_transpose(
5088 upl_t upl1,
5089 upl_t upl2)
5090 {
5091 kern_return_t retval;
5092 boolean_t upls_locked;
5093 vm_object_t object1, object2;
5094
5095 if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2) {
5096 return KERN_INVALID_ARGUMENT;
5097 }
5098
5099 upls_locked = FALSE;
5100
5101 /*
5102 * Since we need to lock both UPLs at the same time,
5103 * avoid deadlocks by always taking locks in the same order.
5104 */
5105 if (upl1 < upl2) {
5106 upl_lock(upl1);
5107 upl_lock(upl2);
5108 } else {
5109 upl_lock(upl2);
5110 upl_lock(upl1);
5111 }
5112 upls_locked = TRUE; /* the UPLs will need to be unlocked */
5113
5114 object1 = upl1->map_object;
5115 object2 = upl2->map_object;
5116
5117 if (upl1->offset != 0 || upl2->offset != 0 ||
5118 upl1->size != upl2->size) {
5119 /*
5120 * We deal only with full objects, not subsets.
5121 * That's because we exchange the entire backing store info
5122 * for the objects: pager, resident pages, etc... We can't do
5123 * only part of it.
5124 */
5125 retval = KERN_INVALID_VALUE;
5126 goto done;
5127 }
5128
5129 /*
5130 * Tranpose the VM objects' backing store.
5131 */
5132 retval = vm_object_transpose(object1, object2,
5133 (vm_object_size_t) upl1->size);
5134
5135 if (retval == KERN_SUCCESS) {
5136 /*
5137 * Make each UPL point to the correct VM object, i.e. the
5138 * object holding the pages that the UPL refers to...
5139 */
5140 #ifdef UPL_DEBUG
5141 queue_remove(&object1->uplq, upl1, upl_t, uplq);
5142 queue_remove(&object2->uplq, upl2, upl_t, uplq);
5143 #endif
5144 upl1->map_object = object2;
5145 upl2->map_object = object1;
5146 #ifdef UPL_DEBUG
5147 queue_enter(&object1->uplq, upl2, upl_t, uplq);
5148 queue_enter(&object2->uplq, upl1, upl_t, uplq);
5149 #endif
5150 }
5151
5152 done:
5153 /*
5154 * Cleanup.
5155 */
5156 if (upls_locked) {
5157 upl_unlock(upl1);
5158 upl_unlock(upl2);
5159 upls_locked = FALSE;
5160 }
5161
5162 return retval;
5163 }
5164
5165 /*
5166 * ENCRYPTED SWAP:
5167 *
5168 * Rationale: the user might have some encrypted data on disk (via
5169 * FileVault or any other mechanism). That data is then decrypted in
5170 * memory, which is safe as long as the machine is secure. But that
5171 * decrypted data in memory could be paged out to disk by the default
5172 * pager. The data would then be stored on disk in clear (not encrypted)
5173 * and it could be accessed by anyone who gets physical access to the
5174 * disk (if the laptop or the disk gets stolen for example). This weakens
5175 * the security offered by FileVault.
5176 *
5177 * Solution: the default pager will optionally request that all the
5178 * pages it gathers for pageout be encrypted, via the UPL interfaces,
5179 * before it sends this UPL to disk via the vnode_pageout() path.
5180 *
5181 * Notes:
5182 *
5183 * To avoid disrupting the VM LRU algorithms, we want to keep the
5184 * clean-in-place mechanisms, which allow us to send some extra pages to
5185 * swap (clustering) without actually removing them from the user's
5186 * address space. We don't want the user to unknowingly access encrypted
5187 * data, so we have to actually remove the encrypted pages from the page
5188 * table. When the user accesses the data, the hardware will fail to
5189 * locate the virtual page in its page table and will trigger a page
5190 * fault. We can then decrypt the page and enter it in the page table
5191 * again. Whenever we allow the user to access the contents of a page,
5192 * we have to make sure it's not encrypted.
5193 *
5194 *
5195 */
5196 /*
5197 * ENCRYPTED SWAP:
5198 * Reserve of virtual addresses in the kernel address space.
5199 * We need to map the physical pages in the kernel, so that we
5200 * can call the encryption/decryption routines with a kernel
5201 * virtual address. We keep this pool of pre-allocated kernel
5202 * virtual addresses so that we don't have to scan the kernel's
5203 * virtaul address space each time we need to encrypt or decrypt
5204 * a physical page.
5205 * It would be nice to be able to encrypt and decrypt in physical
5206 * mode but that might not always be more efficient...
5207 */
5208 decl_simple_lock_data(,vm_paging_lock)
5209 #define VM_PAGING_NUM_PAGES 64
5210 vm_map_offset_t vm_paging_base_address = 0;
5211 boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
5212 int vm_paging_max_index = 0;
5213 int vm_paging_page_waiter = 0;
5214 int vm_paging_page_waiter_total = 0;
5215 unsigned long vm_paging_no_kernel_page = 0;
5216 unsigned long vm_paging_objects_mapped = 0;
5217 unsigned long vm_paging_pages_mapped = 0;
5218 unsigned long vm_paging_objects_mapped_slow = 0;
5219 unsigned long vm_paging_pages_mapped_slow = 0;
5220
5221 void
5222 vm_paging_map_init(void)
5223 {
5224 kern_return_t kr;
5225 vm_map_offset_t page_map_offset;
5226 vm_map_entry_t map_entry;
5227
5228 assert(vm_paging_base_address == 0);
5229
5230 /*
5231 * Initialize our pool of pre-allocated kernel
5232 * virtual addresses.
5233 */
5234 page_map_offset = 0;
5235 kr = vm_map_find_space(kernel_map,
5236 &page_map_offset,
5237 VM_PAGING_NUM_PAGES * PAGE_SIZE,
5238 0,
5239 0,
5240 &map_entry);
5241 if (kr != KERN_SUCCESS) {
5242 panic("vm_paging_map_init: kernel_map full\n");
5243 }
5244 map_entry->object.vm_object = kernel_object;
5245 map_entry->offset =
5246 page_map_offset - VM_MIN_KERNEL_ADDRESS;
5247 vm_object_reference(kernel_object);
5248 vm_map_unlock(kernel_map);
5249
5250 assert(vm_paging_base_address == 0);
5251 vm_paging_base_address = page_map_offset;
5252 }
5253
5254 /*
5255 * ENCRYPTED SWAP:
5256 * vm_paging_map_object:
5257 * Maps part of a VM object's pages in the kernel
5258 * virtual address space, using the pre-allocated
5259 * kernel virtual addresses, if possible.
5260 * Context:
5261 * The VM object is locked. This lock will get
5262 * dropped and re-acquired though, so the caller
5263 * must make sure the VM object is kept alive
5264 * (by holding a VM map that has a reference
5265 * on it, for example, or taking an extra reference).
5266 * The page should also be kept busy to prevent
5267 * it from being reclaimed.
5268 */
5269 kern_return_t
5270 vm_paging_map_object(
5271 vm_map_offset_t *address,
5272 vm_page_t page,
5273 vm_object_t object,
5274 vm_object_offset_t offset,
5275 vm_map_size_t *size,
5276 boolean_t can_unlock_object)
5277 {
5278 kern_return_t kr;
5279 vm_map_offset_t page_map_offset;
5280 vm_map_size_t map_size;
5281 vm_object_offset_t object_offset;
5282 int i;
5283
5284
5285 if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
5286 assert(page->busy);
5287 /*
5288 * Use one of the pre-allocated kernel virtual addresses
5289 * and just enter the VM page in the kernel address space
5290 * at that virtual address.
5291 */
5292 simple_lock(&vm_paging_lock);
5293
5294 /*
5295 * Try and find an available kernel virtual address
5296 * from our pre-allocated pool.
5297 */
5298 page_map_offset = 0;
5299 for (;;) {
5300 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
5301 if (vm_paging_page_inuse[i] == FALSE) {
5302 page_map_offset =
5303 vm_paging_base_address +
5304 (i * PAGE_SIZE);
5305 break;
5306 }
5307 }
5308 if (page_map_offset != 0) {
5309 /* found a space to map our page ! */
5310 break;
5311 }
5312
5313 if (can_unlock_object) {
5314 /*
5315 * If we can afford to unlock the VM object,
5316 * let's take the slow path now...
5317 */
5318 break;
5319 }
5320 /*
5321 * We can't afford to unlock the VM object, so
5322 * let's wait for a space to become available...
5323 */
5324 vm_paging_page_waiter_total++;
5325 vm_paging_page_waiter++;
5326 thread_sleep_fast_usimple_lock(&vm_paging_page_waiter,
5327 &vm_paging_lock,
5328 THREAD_UNINT);
5329 vm_paging_page_waiter--;
5330 /* ... and try again */
5331 }
5332
5333 if (page_map_offset != 0) {
5334 /*
5335 * We found a kernel virtual address;
5336 * map the physical page to that virtual address.
5337 */
5338 if (i > vm_paging_max_index) {
5339 vm_paging_max_index = i;
5340 }
5341 vm_paging_page_inuse[i] = TRUE;
5342 simple_unlock(&vm_paging_lock);
5343
5344 if (page->pmapped == FALSE) {
5345 pmap_sync_page_data_phys(page->phys_page);
5346 }
5347 page->pmapped = TRUE;
5348
5349 /*
5350 * Keep the VM object locked over the PMAP_ENTER
5351 * and the actual use of the page by the kernel,
5352 * or this pmap mapping might get undone by a
5353 * vm_object_pmap_protect() call...
5354 */
5355 PMAP_ENTER(kernel_pmap,
5356 page_map_offset,
5357 page,
5358 VM_PROT_DEFAULT,
5359 ((int) page->object->wimg_bits &
5360 VM_WIMG_MASK),
5361 TRUE);
5362 vm_paging_objects_mapped++;
5363 vm_paging_pages_mapped++;
5364 *address = page_map_offset;
5365
5366 /* all done and mapped, ready to use ! */
5367 return KERN_SUCCESS;
5368 }
5369
5370 /*
5371 * We ran out of pre-allocated kernel virtual
5372 * addresses. Just map the page in the kernel
5373 * the slow and regular way.
5374 */
5375 vm_paging_no_kernel_page++;
5376 simple_unlock(&vm_paging_lock);
5377 }
5378
5379 if (! can_unlock_object) {
5380 return KERN_NOT_SUPPORTED;
5381 }
5382
5383 object_offset = vm_object_trunc_page(offset);
5384 map_size = vm_map_round_page(*size);
5385
5386 /*
5387 * Try and map the required range of the object
5388 * in the kernel_map
5389 */
5390
5391 vm_object_reference_locked(object); /* for the map entry */
5392 vm_object_unlock(object);
5393
5394 kr = vm_map_enter(kernel_map,
5395 address,
5396 map_size,
5397 0,
5398 VM_FLAGS_ANYWHERE,
5399 object,
5400 object_offset,
5401 FALSE,
5402 VM_PROT_DEFAULT,
5403 VM_PROT_ALL,
5404 VM_INHERIT_NONE);
5405 if (kr != KERN_SUCCESS) {
5406 *address = 0;
5407 *size = 0;
5408 vm_object_deallocate(object); /* for the map entry */
5409 vm_object_lock(object);
5410 return kr;
5411 }
5412
5413 *size = map_size;
5414
5415 /*
5416 * Enter the mapped pages in the page table now.
5417 */
5418 vm_object_lock(object);
5419 /*
5420 * VM object must be kept locked from before PMAP_ENTER()
5421 * until after the kernel is done accessing the page(s).
5422 * Otherwise, the pmap mappings in the kernel could be
5423 * undone by a call to vm_object_pmap_protect().
5424 */
5425
5426 for (page_map_offset = 0;
5427 map_size != 0;
5428 map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
5429 unsigned int cache_attr;
5430
5431 page = vm_page_lookup(object, offset + page_map_offset);
5432 if (page == VM_PAGE_NULL) {
5433 printf("vm_paging_map_object: no page !?");
5434 vm_object_unlock(object);
5435 kr = vm_map_remove(kernel_map, *address, *size,
5436 VM_MAP_NO_FLAGS);
5437 assert(kr == KERN_SUCCESS);
5438 *address = 0;
5439 *size = 0;
5440 vm_object_lock(object);
5441 return KERN_MEMORY_ERROR;
5442 }
5443 if (page->pmapped == FALSE) {
5444 pmap_sync_page_data_phys(page->phys_page);
5445 }
5446 page->pmapped = TRUE;
5447 page->wpmapped = TRUE;
5448 cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
5449
5450 //assert(pmap_verify_free(page->phys_page));
5451 PMAP_ENTER(kernel_pmap,
5452 *address + page_map_offset,
5453 page,
5454 VM_PROT_DEFAULT,
5455 cache_attr,
5456 TRUE);
5457 }
5458
5459 vm_paging_objects_mapped_slow++;
5460 vm_paging_pages_mapped_slow += map_size / PAGE_SIZE_64;
5461
5462 return KERN_SUCCESS;
5463 }
5464
5465 /*
5466 * ENCRYPTED SWAP:
5467 * vm_paging_unmap_object:
5468 * Unmaps part of a VM object's pages from the kernel
5469 * virtual address space.
5470 * Context:
5471 * The VM object is locked. This lock will get
5472 * dropped and re-acquired though.
5473 */
5474 void
5475 vm_paging_unmap_object(
5476 vm_object_t object,
5477 vm_map_offset_t start,
5478 vm_map_offset_t end)
5479 {
5480 kern_return_t kr;
5481 int i;
5482
5483 if ((vm_paging_base_address == 0) ||
5484 (start < vm_paging_base_address) ||
5485 (end > (vm_paging_base_address
5486 + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
5487 /*
5488 * We didn't use our pre-allocated pool of
5489 * kernel virtual address. Deallocate the
5490 * virtual memory.
5491 */
5492 if (object != VM_OBJECT_NULL) {
5493 vm_object_unlock(object);
5494 }
5495 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
5496 if (object != VM_OBJECT_NULL) {
5497 vm_object_lock(object);
5498 }
5499 assert(kr == KERN_SUCCESS);
5500 } else {
5501 /*
5502 * We used a kernel virtual address from our
5503 * pre-allocated pool. Put it back in the pool
5504 * for next time.
5505 */
5506 assert(end - start == PAGE_SIZE);
5507 i = (start - vm_paging_base_address) >> PAGE_SHIFT;
5508
5509 /* undo the pmap mapping */
5510 pmap_remove(kernel_pmap, start, end);
5511
5512 simple_lock(&vm_paging_lock);
5513 vm_paging_page_inuse[i] = FALSE;
5514 if (vm_paging_page_waiter) {
5515 thread_wakeup(&vm_paging_page_waiter);
5516 }
5517 simple_unlock(&vm_paging_lock);
5518 }
5519 }
5520
5521 #if CRYPTO
5522 /*
5523 * Encryption data.
5524 * "iv" is the "initial vector". Ideally, we want to
5525 * have a different one for each page we encrypt, so that
5526 * crackers can't find encryption patterns too easily.
5527 */
5528 #define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
5529 boolean_t swap_crypt_ctx_initialized = FALSE;
5530 aes_32t swap_crypt_key[8]; /* big enough for a 256 key */
5531 aes_ctx swap_crypt_ctx;
5532 const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
5533
5534 #if DEBUG
5535 boolean_t swap_crypt_ctx_tested = FALSE;
5536 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
5537 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
5538 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
5539 #endif /* DEBUG */
5540
5541 extern u_long random(void);
5542
5543 /*
5544 * Initialize the encryption context: key and key size.
5545 */
5546 void swap_crypt_ctx_initialize(void); /* forward */
5547 void
5548 swap_crypt_ctx_initialize(void)
5549 {
5550 unsigned int i;
5551
5552 /*
5553 * No need for locking to protect swap_crypt_ctx_initialized
5554 * because the first use of encryption will come from the
5555 * pageout thread (we won't pagein before there's been a pageout)
5556 * and there's only one pageout thread.
5557 */
5558 if (swap_crypt_ctx_initialized == FALSE) {
5559 for (i = 0;
5560 i < (sizeof (swap_crypt_key) /
5561 sizeof (swap_crypt_key[0]));
5562 i++) {
5563 swap_crypt_key[i] = random();
5564 }
5565 aes_encrypt_key((const unsigned char *) swap_crypt_key,
5566 SWAP_CRYPT_AES_KEY_SIZE,
5567 &swap_crypt_ctx.encrypt);
5568 aes_decrypt_key((const unsigned char *) swap_crypt_key,
5569 SWAP_CRYPT_AES_KEY_SIZE,
5570 &swap_crypt_ctx.decrypt);
5571 swap_crypt_ctx_initialized = TRUE;
5572 }
5573
5574 #if DEBUG
5575 /*
5576 * Validate the encryption algorithms.
5577 */
5578 if (swap_crypt_ctx_tested == FALSE) {
5579 /* initialize */
5580 for (i = 0; i < 4096; i++) {
5581 swap_crypt_test_page_ref[i] = (char) i;
5582 }
5583 /* encrypt */
5584 aes_encrypt_cbc(swap_crypt_test_page_ref,
5585 swap_crypt_null_iv,
5586 PAGE_SIZE / AES_BLOCK_SIZE,
5587 swap_crypt_test_page_encrypt,
5588 &swap_crypt_ctx.encrypt);
5589 /* decrypt */
5590 aes_decrypt_cbc(swap_crypt_test_page_encrypt,
5591 swap_crypt_null_iv,
5592 PAGE_SIZE / AES_BLOCK_SIZE,
5593 swap_crypt_test_page_decrypt,
5594 &swap_crypt_ctx.decrypt);
5595 /* compare result with original */
5596 for (i = 0; i < 4096; i ++) {
5597 if (swap_crypt_test_page_decrypt[i] !=
5598 swap_crypt_test_page_ref[i]) {
5599 panic("encryption test failed");
5600 }
5601 }
5602
5603 /* encrypt again */
5604 aes_encrypt_cbc(swap_crypt_test_page_decrypt,
5605 swap_crypt_null_iv,
5606 PAGE_SIZE / AES_BLOCK_SIZE,
5607 swap_crypt_test_page_decrypt,
5608 &swap_crypt_ctx.encrypt);
5609 /* decrypt in place */
5610 aes_decrypt_cbc(swap_crypt_test_page_decrypt,
5611 swap_crypt_null_iv,
5612 PAGE_SIZE / AES_BLOCK_SIZE,
5613 swap_crypt_test_page_decrypt,
5614 &swap_crypt_ctx.decrypt);
5615 for (i = 0; i < 4096; i ++) {
5616 if (swap_crypt_test_page_decrypt[i] !=
5617 swap_crypt_test_page_ref[i]) {
5618 panic("in place encryption test failed");
5619 }
5620 }
5621
5622 swap_crypt_ctx_tested = TRUE;
5623 }
5624 #endif /* DEBUG */
5625 }
5626
5627 /*
5628 * ENCRYPTED SWAP:
5629 * vm_page_encrypt:
5630 * Encrypt the given page, for secure paging.
5631 * The page might already be mapped at kernel virtual
5632 * address "kernel_mapping_offset". Otherwise, we need
5633 * to map it.
5634 *
5635 * Context:
5636 * The page's object is locked, but this lock will be released
5637 * and re-acquired.
5638 * The page is busy and not accessible by users (not entered in any pmap).
5639 */
5640 void
5641 vm_page_encrypt(
5642 vm_page_t page,
5643 vm_map_offset_t kernel_mapping_offset)
5644 {
5645 kern_return_t kr;
5646 vm_map_size_t kernel_mapping_size;
5647 vm_offset_t kernel_vaddr;
5648 union {
5649 unsigned char aes_iv[AES_BLOCK_SIZE];
5650 struct {
5651 memory_object_t pager_object;
5652 vm_object_offset_t paging_offset;
5653 } vm;
5654 } encrypt_iv;
5655
5656 if (! vm_pages_encrypted) {
5657 vm_pages_encrypted = TRUE;
5658 }
5659
5660 assert(page->busy);
5661 assert(page->dirty || page->precious);
5662
5663 if (page->encrypted) {
5664 /*
5665 * Already encrypted: no need to do it again.
5666 */
5667 vm_page_encrypt_already_encrypted_counter++;
5668 return;
5669 }
5670 ASSERT_PAGE_DECRYPTED(page);
5671
5672 /*
5673 * Take a paging-in-progress reference to keep the object
5674 * alive even if we have to unlock it (in vm_paging_map_object()
5675 * for example)...
5676 */
5677 vm_object_paging_begin(page->object);
5678
5679 if (kernel_mapping_offset == 0) {
5680 /*
5681 * The page hasn't already been mapped in kernel space
5682 * by the caller. Map it now, so that we can access
5683 * its contents and encrypt them.
5684 */
5685 kernel_mapping_size = PAGE_SIZE;
5686 kr = vm_paging_map_object(&kernel_mapping_offset,
5687 page,
5688 page->object,
5689 page->offset,
5690 &kernel_mapping_size,
5691 FALSE);
5692 if (kr != KERN_SUCCESS) {
5693 panic("vm_page_encrypt: "
5694 "could not map page in kernel: 0x%x\n",
5695 kr);
5696 }
5697 } else {
5698 kernel_mapping_size = 0;
5699 }
5700 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
5701
5702 if (swap_crypt_ctx_initialized == FALSE) {
5703 swap_crypt_ctx_initialize();
5704 }
5705 assert(swap_crypt_ctx_initialized);
5706
5707 /*
5708 * Prepare an "initial vector" for the encryption.
5709 * We use the "pager" and the "paging_offset" for that
5710 * page to obfuscate the encrypted data a bit more and
5711 * prevent crackers from finding patterns that they could
5712 * use to break the key.
5713 */
5714 bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
5715 encrypt_iv.vm.pager_object = page->object->pager;
5716 encrypt_iv.vm.paging_offset =
5717 page->object->paging_offset + page->offset;
5718
5719 /* encrypt the "initial vector" */
5720 aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
5721 swap_crypt_null_iv,
5722 1,
5723 &encrypt_iv.aes_iv[0],
5724 &swap_crypt_ctx.encrypt);
5725
5726 /*
5727 * Encrypt the page.
5728 */
5729 aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
5730 &encrypt_iv.aes_iv[0],
5731 PAGE_SIZE / AES_BLOCK_SIZE,
5732 (unsigned char *) kernel_vaddr,
5733 &swap_crypt_ctx.encrypt);
5734
5735 vm_page_encrypt_counter++;
5736
5737 /*
5738 * Unmap the page from the kernel's address space,
5739 * if we had to map it ourselves. Otherwise, let
5740 * the caller undo the mapping if needed.
5741 */
5742 if (kernel_mapping_size != 0) {
5743 vm_paging_unmap_object(page->object,
5744 kernel_mapping_offset,
5745 kernel_mapping_offset + kernel_mapping_size);
5746 }
5747
5748 /*
5749 * Clear the "reference" and "modified" bits.
5750 * This should clean up any impact the encryption had
5751 * on them.
5752 * The page was kept busy and disconnected from all pmaps,
5753 * so it can't have been referenced or modified from user
5754 * space.
5755 * The software bits will be reset later after the I/O
5756 * has completed (in upl_commit_range()).
5757 */
5758 pmap_clear_refmod(page->phys_page, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
5759
5760 page->encrypted = TRUE;
5761
5762 vm_object_paging_end(page->object);
5763 }
5764
5765 /*
5766 * ENCRYPTED SWAP:
5767 * vm_page_decrypt:
5768 * Decrypt the given page.
5769 * The page might already be mapped at kernel virtual
5770 * address "kernel_mapping_offset". Otherwise, we need
5771 * to map it.
5772 *
5773 * Context:
5774 * The page's VM object is locked but will be unlocked and relocked.
5775 * The page is busy and not accessible by users (not entered in any pmap).
5776 */
5777 void
5778 vm_page_decrypt(
5779 vm_page_t page,
5780 vm_map_offset_t kernel_mapping_offset)
5781 {
5782 kern_return_t kr;
5783 vm_map_size_t kernel_mapping_size;
5784 vm_offset_t kernel_vaddr;
5785 union {
5786 unsigned char aes_iv[AES_BLOCK_SIZE];
5787 struct {
5788 memory_object_t pager_object;
5789 vm_object_offset_t paging_offset;
5790 } vm;
5791 } decrypt_iv;
5792
5793 assert(page->busy);
5794 assert(page->encrypted);
5795
5796 /*
5797 * Take a paging-in-progress reference to keep the object
5798 * alive even if we have to unlock it (in vm_paging_map_object()
5799 * for example)...
5800 */
5801 vm_object_paging_begin(page->object);
5802
5803 if (kernel_mapping_offset == 0) {
5804 /*
5805 * The page hasn't already been mapped in kernel space
5806 * by the caller. Map it now, so that we can access
5807 * its contents and decrypt them.
5808 */
5809 kernel_mapping_size = PAGE_SIZE;
5810 kr = vm_paging_map_object(&kernel_mapping_offset,
5811 page,
5812 page->object,
5813 page->offset,
5814 &kernel_mapping_size,
5815 FALSE);
5816 if (kr != KERN_SUCCESS) {
5817 panic("vm_page_decrypt: "
5818 "could not map page in kernel: 0x%x\n",
5819 kr);
5820 }
5821 } else {
5822 kernel_mapping_size = 0;
5823 }
5824 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
5825
5826 assert(swap_crypt_ctx_initialized);
5827
5828 /*
5829 * Prepare an "initial vector" for the decryption.
5830 * It has to be the same as the "initial vector" we
5831 * used to encrypt that page.
5832 */
5833 bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
5834 decrypt_iv.vm.pager_object = page->object->pager;
5835 decrypt_iv.vm.paging_offset =
5836 page->object->paging_offset + page->offset;
5837
5838 /* encrypt the "initial vector" */
5839 aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
5840 swap_crypt_null_iv,
5841 1,
5842 &decrypt_iv.aes_iv[0],
5843 &swap_crypt_ctx.encrypt);
5844
5845 /*
5846 * Decrypt the page.
5847 */
5848 aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
5849 &decrypt_iv.aes_iv[0],
5850 PAGE_SIZE / AES_BLOCK_SIZE,
5851 (unsigned char *) kernel_vaddr,
5852 &swap_crypt_ctx.decrypt);
5853 vm_page_decrypt_counter++;
5854
5855 /*
5856 * Unmap the page from the kernel's address space,
5857 * if we had to map it ourselves. Otherwise, let
5858 * the caller undo the mapping if needed.
5859 */
5860 if (kernel_mapping_size != 0) {
5861 vm_paging_unmap_object(page->object,
5862 kernel_vaddr,
5863 kernel_vaddr + PAGE_SIZE);
5864 }
5865
5866 /*
5867 * After decryption, the page is actually clean.
5868 * It was encrypted as part of paging, which "cleans"
5869 * the "dirty" pages.
5870 * Noone could access it after it was encrypted
5871 * and the decryption doesn't count.
5872 */
5873 page->dirty = FALSE;
5874 if (page->cs_validated && !page->cs_tainted) {
5875 /*
5876 * CODE SIGNING:
5877 * This page is no longer dirty
5878 * but could have been modified,
5879 * so it will need to be
5880 * re-validated.
5881 */
5882 page->cs_validated = FALSE;
5883 vm_cs_validated_resets++;
5884 }
5885 pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
5886
5887 page->encrypted = FALSE;
5888
5889 /*
5890 * We've just modified the page's contents via the data cache and part
5891 * of the new contents might still be in the cache and not yet in RAM.
5892 * Since the page is now available and might get gathered in a UPL to
5893 * be part of a DMA transfer from a driver that expects the memory to
5894 * be coherent at this point, we have to flush the data cache.
5895 */
5896 pmap_sync_page_attributes_phys(page->phys_page);
5897 /*
5898 * Since the page is not mapped yet, some code might assume that it
5899 * doesn't need to invalidate the instruction cache when writing to
5900 * that page. That code relies on "pmapped" being FALSE, so that the
5901 * caches get synchronized when the page is first mapped.
5902 */
5903 assert(pmap_verify_free(page->phys_page));
5904 page->pmapped = FALSE;
5905 page->wpmapped = FALSE;
5906
5907 vm_object_paging_end(page->object);
5908 }
5909
5910 unsigned long upl_encrypt_upls = 0;
5911 unsigned long upl_encrypt_pages = 0;
5912
5913 /*
5914 * ENCRYPTED SWAP:
5915 *
5916 * upl_encrypt:
5917 * Encrypts all the pages in the UPL, within the specified range.
5918 *
5919 */
5920 void
5921 upl_encrypt(
5922 upl_t upl,
5923 upl_offset_t crypt_offset,
5924 upl_size_t crypt_size)
5925 {
5926 upl_size_t upl_size;
5927 upl_offset_t upl_offset;
5928 vm_object_t upl_object;
5929 vm_page_t page;
5930 vm_object_t shadow_object;
5931 vm_object_offset_t shadow_offset;
5932 vm_object_offset_t paging_offset;
5933 vm_object_offset_t base_offset;
5934
5935 upl_encrypt_upls++;
5936 upl_encrypt_pages += crypt_size / PAGE_SIZE;
5937
5938 upl_object = upl->map_object;
5939 upl_offset = upl->offset;
5940 upl_size = upl->size;
5941
5942 vm_object_lock(upl_object);
5943
5944 /*
5945 * Find the VM object that contains the actual pages.
5946 */
5947 if (upl_object->pageout) {
5948 shadow_object = upl_object->shadow;
5949 /*
5950 * The offset in the shadow object is actually also
5951 * accounted for in upl->offset. It possibly shouldn't be
5952 * this way, but for now don't account for it twice.
5953 */
5954 shadow_offset = 0;
5955 assert(upl_object->paging_offset == 0); /* XXX ? */
5956 vm_object_lock(shadow_object);
5957 } else {
5958 shadow_object = upl_object;
5959 shadow_offset = 0;
5960 }
5961
5962 paging_offset = shadow_object->paging_offset;
5963 vm_object_paging_begin(shadow_object);
5964
5965 if (shadow_object != upl_object)
5966 vm_object_unlock(upl_object);
5967
5968
5969 base_offset = shadow_offset;
5970 base_offset += upl_offset;
5971 base_offset += crypt_offset;
5972 base_offset -= paging_offset;
5973
5974 assert(crypt_offset + crypt_size <= upl_size);
5975
5976 for (upl_offset = 0;
5977 upl_offset < crypt_size;
5978 upl_offset += PAGE_SIZE) {
5979 page = vm_page_lookup(shadow_object,
5980 base_offset + upl_offset);
5981 if (page == VM_PAGE_NULL) {
5982 panic("upl_encrypt: "
5983 "no page for (obj=%p,off=%lld+%d)!\n",
5984 shadow_object,
5985 base_offset,
5986 upl_offset);
5987 }
5988 /*
5989 * Disconnect the page from all pmaps, so that nobody can
5990 * access it while it's encrypted. After that point, all
5991 * accesses to this page will cause a page fault and block
5992 * while the page is busy being encrypted. After the
5993 * encryption completes, any access will cause a
5994 * page fault and the page gets decrypted at that time.
5995 */
5996 pmap_disconnect(page->phys_page);
5997 vm_page_encrypt(page, 0);
5998
5999 if (shadow_object == vm_pageout_scan_wants_object) {
6000 /*
6001 * Give vm_pageout_scan() a chance to convert more
6002 * pages from "clean-in-place" to "clean-and-free",
6003 * if it's interested in the same pages we selected
6004 * in this cluster.
6005 */
6006 vm_object_unlock(shadow_object);
6007 vm_object_lock(shadow_object);
6008 }
6009 }
6010
6011 vm_object_paging_end(shadow_object);
6012 vm_object_unlock(shadow_object);
6013 }
6014
6015 #else /* CRYPTO */
6016 void
6017 upl_encrypt(
6018 __unused upl_t upl,
6019 __unused upl_offset_t crypt_offset,
6020 __unused upl_size_t crypt_size)
6021 {
6022 }
6023
6024 void
6025 vm_page_encrypt(
6026 __unused vm_page_t page,
6027 __unused vm_map_offset_t kernel_mapping_offset)
6028 {
6029 }
6030
6031 void
6032 vm_page_decrypt(
6033 __unused vm_page_t page,
6034 __unused vm_map_offset_t kernel_mapping_offset)
6035 {
6036 }
6037
6038 #endif /* CRYPTO */
6039
6040 vm_size_t
6041 upl_get_internal_pagelist_offset(void)
6042 {
6043 return sizeof(struct upl);
6044 }
6045
6046 void
6047 upl_clear_dirty(
6048 upl_t upl,
6049 boolean_t value)
6050 {
6051 if (value) {
6052 upl->flags |= UPL_CLEAR_DIRTY;
6053 } else {
6054 upl->flags &= ~UPL_CLEAR_DIRTY;
6055 }
6056 }
6057
6058
6059 #ifdef MACH_BSD
6060
6061 boolean_t upl_device_page(upl_page_info_t *upl)
6062 {
6063 return(UPL_DEVICE_PAGE(upl));
6064 }
6065 boolean_t upl_page_present(upl_page_info_t *upl, int index)
6066 {
6067 return(UPL_PAGE_PRESENT(upl, index));
6068 }
6069 boolean_t upl_speculative_page(upl_page_info_t *upl, int index)
6070 {
6071 return(UPL_SPECULATIVE_PAGE(upl, index));
6072 }
6073 boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
6074 {
6075 return(UPL_DIRTY_PAGE(upl, index));
6076 }
6077 boolean_t upl_valid_page(upl_page_info_t *upl, int index)
6078 {
6079 return(UPL_VALID_PAGE(upl, index));
6080 }
6081 ppnum_t upl_phys_page(upl_page_info_t *upl, int index)
6082 {
6083 return(UPL_PHYS_PAGE(upl, index));
6084 }
6085
6086
6087 void
6088 vm_countdirtypages(void)
6089 {
6090 vm_page_t m;
6091 int dpages;
6092 int pgopages;
6093 int precpages;
6094
6095
6096 dpages=0;
6097 pgopages=0;
6098 precpages=0;
6099
6100 vm_page_lock_queues();
6101 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6102 do {
6103 if (m ==(vm_page_t )0) break;
6104
6105 if(m->dirty) dpages++;
6106 if(m->pageout) pgopages++;
6107 if(m->precious) precpages++;
6108
6109 assert(m->object != kernel_object);
6110 m = (vm_page_t) queue_next(&m->pageq);
6111 if (m ==(vm_page_t )0) break;
6112
6113 } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
6114 vm_page_unlock_queues();
6115
6116 vm_page_lock_queues();
6117 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
6118 do {
6119 if (m ==(vm_page_t )0) break;
6120
6121 dpages++;
6122 assert(m->dirty);
6123 assert(!m->pageout);
6124 assert(m->object != kernel_object);
6125 m = (vm_page_t) queue_next(&m->pageq);
6126 if (m ==(vm_page_t )0) break;
6127
6128 } while (!queue_end(&vm_page_queue_throttled,(queue_entry_t) m));
6129 vm_page_unlock_queues();
6130
6131 vm_page_lock_queues();
6132 m = (vm_page_t) queue_first(&vm_page_queue_zf);
6133 do {
6134 if (m ==(vm_page_t )0) break;
6135
6136 if(m->dirty) dpages++;
6137 if(m->pageout) pgopages++;
6138 if(m->precious) precpages++;
6139
6140 assert(m->object != kernel_object);
6141 m = (vm_page_t) queue_next(&m->pageq);
6142 if (m ==(vm_page_t )0) break;
6143
6144 } while (!queue_end(&vm_page_queue_zf,(queue_entry_t) m));
6145 vm_page_unlock_queues();
6146
6147 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
6148
6149 dpages=0;
6150 pgopages=0;
6151 precpages=0;
6152
6153 vm_page_lock_queues();
6154 m = (vm_page_t) queue_first(&vm_page_queue_active);
6155
6156 do {
6157 if(m == (vm_page_t )0) break;
6158 if(m->dirty) dpages++;
6159 if(m->pageout) pgopages++;
6160 if(m->precious) precpages++;
6161
6162 assert(m->object != kernel_object);
6163 m = (vm_page_t) queue_next(&m->pageq);
6164 if(m == (vm_page_t )0) break;
6165
6166 } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
6167 vm_page_unlock_queues();
6168
6169 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
6170
6171 }
6172 #endif /* MACH_BSD */
6173
6174 ppnum_t upl_get_highest_page(
6175 upl_t upl)
6176 {
6177 return upl->highest_page;
6178 }
6179
6180 #ifdef UPL_DEBUG
6181 kern_return_t upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2)
6182 {
6183 upl->ubc_alias1 = alias1;
6184 upl->ubc_alias2 = alias2;
6185 return KERN_SUCCESS;
6186 }
6187 int upl_ubc_alias_get(upl_t upl, unsigned int * al, unsigned int * al2)
6188 {
6189 if(al)
6190 *al = upl->ubc_alias1;
6191 if(al2)
6192 *al2 = upl->ubc_alias2;
6193 return KERN_SUCCESS;
6194 }
6195 #endif /* UPL_DEBUG */
6196
6197
6198
6199 #if MACH_KDB
6200 #include <ddb/db_output.h>
6201 #include <ddb/db_print.h>
6202 #include <vm/vm_print.h>
6203
6204 #define printf kdbprintf
6205 void db_pageout(void);
6206
6207 void
6208 db_vm(void)
6209 {
6210
6211 iprintf("VM Statistics:\n");
6212 db_indent += 2;
6213 iprintf("pages:\n");
6214 db_indent += 2;
6215 iprintf("activ %5d inact %5d free %5d",
6216 vm_page_active_count, vm_page_inactive_count,
6217 vm_page_free_count);
6218 printf(" wire %5d gobbl %5d\n",
6219 vm_page_wire_count, vm_page_gobble_count);
6220 db_indent -= 2;
6221 iprintf("target:\n");
6222 db_indent += 2;
6223 iprintf("min %5d inact %5d free %5d",
6224 vm_page_free_min, vm_page_inactive_target,
6225 vm_page_free_target);
6226 printf(" resrv %5d\n", vm_page_free_reserved);
6227 db_indent -= 2;
6228 iprintf("pause:\n");
6229 db_pageout();
6230 db_indent -= 2;
6231 }
6232
6233 #if MACH_COUNTERS
6234 extern int c_laundry_pages_freed;
6235 #endif /* MACH_COUNTERS */
6236
6237 void
6238 db_pageout(void)
6239 {
6240 iprintf("Pageout Statistics:\n");
6241 db_indent += 2;
6242 iprintf("active %5d inactv %5d\n",
6243 vm_pageout_active, vm_pageout_inactive);
6244 iprintf("nolock %5d avoid %5d busy %5d absent %5d\n",
6245 vm_pageout_inactive_nolock, vm_pageout_inactive_avoid,
6246 vm_pageout_inactive_busy, vm_pageout_inactive_absent);
6247 iprintf("used %5d clean %5d dirty %5d\n",
6248 vm_pageout_inactive_used, vm_pageout_inactive_clean,
6249 vm_pageout_inactive_dirty);
6250 #if MACH_COUNTERS
6251 iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed);
6252 #endif /* MACH_COUNTERS */
6253 #if MACH_CLUSTER_STATS
6254 iprintf("Cluster Statistics:\n");
6255 db_indent += 2;
6256 iprintf("dirtied %5d cleaned %5d collisions %5d\n",
6257 vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned,
6258 vm_pageout_cluster_collisions);
6259 iprintf("clusters %5d conversions %5d\n",
6260 vm_pageout_cluster_clusters, vm_pageout_cluster_conversions);
6261 db_indent -= 2;
6262 iprintf("Target Statistics:\n");
6263 db_indent += 2;
6264 iprintf("collisions %5d page_dirtied %5d page_freed %5d\n",
6265 vm_pageout_target_collisions, vm_pageout_target_page_dirtied,
6266 vm_pageout_target_page_freed);
6267 db_indent -= 2;
6268 #endif /* MACH_CLUSTER_STATS */
6269 db_indent -= 2;
6270 }
6271
6272 #endif /* MACH_KDB */