]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_pageout.c
xnu-1228.9.59.tar.gz
[apple/xnu.git] / osfmk / vm / vm_pageout.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_pageout.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * The proverbial page-out daemon.
64 */
65
66 #include <stdint.h>
67
68 #include <debug.h>
69 #include <mach_pagemap.h>
70 #include <mach_cluster_stats.h>
71 #include <mach_kdb.h>
72 #include <advisory_pageout.h>
73
74 #include <mach/mach_types.h>
75 #include <mach/memory_object.h>
76 #include <mach/memory_object_default.h>
77 #include <mach/memory_object_control_server.h>
78 #include <mach/mach_host_server.h>
79 #include <mach/upl.h>
80 #include <mach/vm_map.h>
81 #include <mach/vm_param.h>
82 #include <mach/vm_statistics.h>
83 #include <mach/sdt.h>
84
85 #include <kern/kern_types.h>
86 #include <kern/counters.h>
87 #include <kern/host_statistics.h>
88 #include <kern/machine.h>
89 #include <kern/misc_protos.h>
90 #include <kern/thread.h>
91 #include <kern/xpr.h>
92 #include <kern/kalloc.h>
93
94 #include <machine/vm_tuning.h>
95
96 #if CONFIG_EMBEDDED
97 #include <sys/kern_memorystatus.h>
98 #endif
99
100 #include <vm/pmap.h>
101 #include <vm/vm_fault.h>
102 #include <vm/vm_map.h>
103 #include <vm/vm_object.h>
104 #include <vm/vm_page.h>
105 #include <vm/vm_pageout.h>
106 #include <vm/vm_protos.h> /* must be last */
107 #include <vm/memory_object.h>
108 #include <vm/vm_purgeable_internal.h>
109
110 /*
111 * ENCRYPTED SWAP:
112 */
113 #include <../bsd/crypto/aes/aes.h>
114
115
116 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */
117 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100
118 #endif
119
120 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
121 #ifdef CONFIG_EMBEDDED
122 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
123 #else
124 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
125 #endif
126 #endif
127
128 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
129 #define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
130 #endif
131
132 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
133 #define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
134 #endif
135
136 #ifndef VM_PAGE_LAUNDRY_MAX
137 #define VM_PAGE_LAUNDRY_MAX 16UL /* maximum pageouts on a given pageout queue */
138 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
139
140 #ifndef VM_PAGEOUT_BURST_WAIT
141 #define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
142 #endif /* VM_PAGEOUT_BURST_WAIT */
143
144 #ifndef VM_PAGEOUT_EMPTY_WAIT
145 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
146 #endif /* VM_PAGEOUT_EMPTY_WAIT */
147
148 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
149 #define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
150 #endif /* VM_PAGEOUT_DEADLOCK_WAIT */
151
152 #ifndef VM_PAGEOUT_IDLE_WAIT
153 #define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
154 #endif /* VM_PAGEOUT_IDLE_WAIT */
155
156 #ifndef VM_PAGE_SPECULATIVE_TARGET
157 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / 20)
158 #endif /* VM_PAGE_SPECULATIVE_TARGET */
159
160 #ifndef VM_PAGE_INACTIVE_HEALTHY_LIMIT
161 #define VM_PAGE_INACTIVE_HEALTHY_LIMIT(total) ((total) * 1 / 200)
162 #endif /* VM_PAGE_INACTIVE_HEALTHY_LIMIT */
163
164
165 /*
166 * To obtain a reasonable LRU approximation, the inactive queue
167 * needs to be large enough to give pages on it a chance to be
168 * referenced a second time. This macro defines the fraction
169 * of active+inactive pages that should be inactive.
170 * The pageout daemon uses it to update vm_page_inactive_target.
171 *
172 * If vm_page_free_count falls below vm_page_free_target and
173 * vm_page_inactive_count is below vm_page_inactive_target,
174 * then the pageout daemon starts running.
175 */
176
177 #ifndef VM_PAGE_INACTIVE_TARGET
178 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3)
179 #endif /* VM_PAGE_INACTIVE_TARGET */
180
181 /*
182 * Once the pageout daemon starts running, it keeps going
183 * until vm_page_free_count meets or exceeds vm_page_free_target.
184 */
185
186 #ifndef VM_PAGE_FREE_TARGET
187 #ifdef CONFIG_EMBEDDED
188 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 100)
189 #else
190 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
191 #endif
192 #endif /* VM_PAGE_FREE_TARGET */
193
194 /*
195 * The pageout daemon always starts running once vm_page_free_count
196 * falls below vm_page_free_min.
197 */
198
199 #ifndef VM_PAGE_FREE_MIN
200 #ifdef CONFIG_EMBEDDED
201 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 200)
202 #else
203 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
204 #endif
205 #endif /* VM_PAGE_FREE_MIN */
206
207 #define VM_PAGE_FREE_MIN_LIMIT 1500
208 #define VM_PAGE_FREE_TARGET_LIMIT 2000
209
210
211 /*
212 * When vm_page_free_count falls below vm_page_free_reserved,
213 * only vm-privileged threads can allocate pages. vm-privilege
214 * allows the pageout daemon and default pager (and any other
215 * associated threads needed for default pageout) to continue
216 * operation by dipping into the reserved pool of pages.
217 */
218
219 #ifndef VM_PAGE_FREE_RESERVED
220 #define VM_PAGE_FREE_RESERVED(n) \
221 ((6 * VM_PAGE_LAUNDRY_MAX) + (n))
222 #endif /* VM_PAGE_FREE_RESERVED */
223
224 /*
225 * When we dequeue pages from the inactive list, they are
226 * reactivated (ie, put back on the active queue) if referenced.
227 * However, it is possible to starve the free list if other
228 * processors are referencing pages faster than we can turn off
229 * the referenced bit. So we limit the number of reactivations
230 * we will make per call of vm_pageout_scan().
231 */
232 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
233 #ifndef VM_PAGE_REACTIVATE_LIMIT
234 #ifdef CONFIG_EMBEDDED
235 #define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
236 #else
237 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
238 #endif
239 #endif /* VM_PAGE_REACTIVATE_LIMIT */
240 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 100
241
242
243 /*
244 * must hold the page queues lock to
245 * manipulate this structure
246 */
247 struct vm_pageout_queue {
248 queue_head_t pgo_pending; /* laundry pages to be processed by pager's iothread */
249 unsigned int pgo_laundry; /* current count of laundry pages on queue or in flight */
250 unsigned int pgo_maxlaundry;
251
252 unsigned int pgo_idle:1, /* iothread is blocked waiting for work to do */
253 pgo_busy:1, /* iothread is currently processing request from pgo_pending */
254 pgo_throttled:1,/* vm_pageout_scan thread needs a wakeup when pgo_laundry drops */
255 :0;
256 };
257
258 #define VM_PAGE_Q_THROTTLED(q) \
259 ((q)->pgo_laundry >= (q)->pgo_maxlaundry)
260
261
262 /*
263 * Exported variable used to broadcast the activation of the pageout scan
264 * Working Set uses this to throttle its use of pmap removes. In this
265 * way, code which runs within memory in an uncontested context does
266 * not keep encountering soft faults.
267 */
268
269 unsigned int vm_pageout_scan_event_counter = 0;
270
271 /*
272 * Forward declarations for internal routines.
273 */
274
275 static void vm_pageout_garbage_collect(int);
276 static void vm_pageout_iothread_continue(struct vm_pageout_queue *);
277 static void vm_pageout_iothread_external(void);
278 static void vm_pageout_iothread_internal(void);
279 static void vm_pageout_queue_steal(vm_page_t);
280
281 extern void vm_pageout_continue(void);
282 extern void vm_pageout_scan(void);
283
284 static thread_t vm_pageout_external_iothread = THREAD_NULL;
285 static thread_t vm_pageout_internal_iothread = THREAD_NULL;
286
287 unsigned int vm_pageout_reserved_internal = 0;
288 unsigned int vm_pageout_reserved_really = 0;
289
290 unsigned int vm_pageout_idle_wait = 0; /* milliseconds */
291 unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
292 unsigned int vm_pageout_burst_wait = 0; /* milliseconds */
293 unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */
294 unsigned int vm_pageout_deadlock_relief = 0;
295 unsigned int vm_pageout_inactive_relief = 0;
296 unsigned int vm_pageout_burst_active_throttle = 0;
297 unsigned int vm_pageout_burst_inactive_throttle = 0;
298
299 /*
300 * Protection against zero fill flushing live working sets derived
301 * from existing backing store and files
302 */
303 unsigned int vm_accellerate_zf_pageout_trigger = 400;
304 unsigned int zf_queue_min_count = 100;
305 unsigned int vm_zf_count = 0;
306 unsigned int vm_zf_queue_count = 0;
307
308 /*
309 * These variables record the pageout daemon's actions:
310 * how many pages it looks at and what happens to those pages.
311 * No locking needed because only one thread modifies the variables.
312 */
313
314 unsigned int vm_pageout_active = 0; /* debugging */
315 unsigned int vm_pageout_inactive = 0; /* debugging */
316 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
317 unsigned int vm_pageout_inactive_forced = 0; /* debugging */
318 unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
319 unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
320 unsigned int vm_pageout_inactive_busy = 0; /* debugging */
321 unsigned int vm_pageout_inactive_absent = 0; /* debugging */
322 unsigned int vm_pageout_inactive_used = 0; /* debugging */
323 unsigned int vm_pageout_inactive_clean = 0; /* debugging */
324 unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
325 unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
326 unsigned int vm_pageout_purged_objects = 0; /* debugging */
327 unsigned int vm_stat_discard = 0; /* debugging */
328 unsigned int vm_stat_discard_sent = 0; /* debugging */
329 unsigned int vm_stat_discard_failure = 0; /* debugging */
330 unsigned int vm_stat_discard_throttle = 0; /* debugging */
331 unsigned int vm_pageout_reactivation_limit_exceeded = 0; /* debugging */
332 unsigned int vm_pageout_catch_ups = 0; /* debugging */
333 unsigned int vm_pageout_inactive_force_reclaim = 0; /* debugging */
334
335 unsigned int vm_pageout_scan_active_throttled = 0;
336 unsigned int vm_pageout_scan_inactive_throttled = 0;
337 unsigned int vm_pageout_scan_throttle = 0; /* debugging */
338 unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */
339 unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */
340 unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */
341 unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */
342 unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */
343 /*
344 * Backing store throttle when BS is exhausted
345 */
346 unsigned int vm_backing_store_low = 0;
347
348 unsigned int vm_pageout_out_of_line = 0;
349 unsigned int vm_pageout_in_place = 0;
350
351 /*
352 * ENCRYPTED SWAP:
353 * counters and statistics...
354 */
355 unsigned long vm_page_decrypt_counter = 0;
356 unsigned long vm_page_decrypt_for_upl_counter = 0;
357 unsigned long vm_page_encrypt_counter = 0;
358 unsigned long vm_page_encrypt_abort_counter = 0;
359 unsigned long vm_page_encrypt_already_encrypted_counter = 0;
360 boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */
361
362 struct vm_pageout_queue vm_pageout_queue_internal;
363 struct vm_pageout_queue vm_pageout_queue_external;
364
365 unsigned int vm_page_speculative_target = 0;
366
367 vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
368
369 unsigned long vm_cs_validated_resets = 0;
370
371 /*
372 * Routine: vm_backing_store_disable
373 * Purpose:
374 * Suspend non-privileged threads wishing to extend
375 * backing store when we are low on backing store
376 * (Synchronized by caller)
377 */
378 void
379 vm_backing_store_disable(
380 boolean_t disable)
381 {
382 if(disable) {
383 vm_backing_store_low = 1;
384 } else {
385 if(vm_backing_store_low) {
386 vm_backing_store_low = 0;
387 thread_wakeup((event_t) &vm_backing_store_low);
388 }
389 }
390 }
391
392
393 #if MACH_CLUSTER_STATS
394 unsigned long vm_pageout_cluster_dirtied = 0;
395 unsigned long vm_pageout_cluster_cleaned = 0;
396 unsigned long vm_pageout_cluster_collisions = 0;
397 unsigned long vm_pageout_cluster_clusters = 0;
398 unsigned long vm_pageout_cluster_conversions = 0;
399 unsigned long vm_pageout_target_collisions = 0;
400 unsigned long vm_pageout_target_page_dirtied = 0;
401 unsigned long vm_pageout_target_page_freed = 0;
402 #define CLUSTER_STAT(clause) clause
403 #else /* MACH_CLUSTER_STATS */
404 #define CLUSTER_STAT(clause)
405 #endif /* MACH_CLUSTER_STATS */
406
407 /*
408 * Routine: vm_pageout_object_terminate
409 * Purpose:
410 * Destroy the pageout_object, and perform all of the
411 * required cleanup actions.
412 *
413 * In/Out conditions:
414 * The object must be locked, and will be returned locked.
415 */
416 void
417 vm_pageout_object_terminate(
418 vm_object_t object)
419 {
420 vm_object_t shadow_object;
421
422 /*
423 * Deal with the deallocation (last reference) of a pageout object
424 * (used for cleaning-in-place) by dropping the paging references/
425 * freeing pages in the original object.
426 */
427
428 assert(object->pageout);
429 shadow_object = object->shadow;
430 vm_object_lock(shadow_object);
431
432 while (!queue_empty(&object->memq)) {
433 vm_page_t p, m;
434 vm_object_offset_t offset;
435
436 p = (vm_page_t) queue_first(&object->memq);
437
438 assert(p->private);
439 assert(p->pageout);
440 p->pageout = FALSE;
441 assert(!p->cleaning);
442
443 offset = p->offset;
444 VM_PAGE_FREE(p);
445 p = VM_PAGE_NULL;
446
447 m = vm_page_lookup(shadow_object,
448 offset + object->shadow_offset);
449
450 if(m == VM_PAGE_NULL)
451 continue;
452 assert(m->cleaning);
453 /* used as a trigger on upl_commit etc to recognize the */
454 /* pageout daemon's subseqent desire to pageout a cleaning */
455 /* page. When the bit is on the upl commit code will */
456 /* respect the pageout bit in the target page over the */
457 /* caller's page list indication */
458 m->dump_cleaning = FALSE;
459
460 assert((m->dirty) || (m->precious) ||
461 (m->busy && m->cleaning));
462
463 /*
464 * Handle the trusted pager throttle.
465 * Also decrement the burst throttle (if external).
466 */
467 vm_page_lock_queues();
468 if (m->laundry) {
469 vm_pageout_throttle_up(m);
470 }
471
472 /*
473 * Handle the "target" page(s). These pages are to be freed if
474 * successfully cleaned. Target pages are always busy, and are
475 * wired exactly once. The initial target pages are not mapped,
476 * (so cannot be referenced or modified) but converted target
477 * pages may have been modified between the selection as an
478 * adjacent page and conversion to a target.
479 */
480 if (m->pageout) {
481 assert(m->busy);
482 assert(m->wire_count == 1);
483 m->cleaning = FALSE;
484 m->encrypted_cleaning = FALSE;
485 m->pageout = FALSE;
486 #if MACH_CLUSTER_STATS
487 if (m->wanted) vm_pageout_target_collisions++;
488 #endif
489 /*
490 * Revoke all access to the page. Since the object is
491 * locked, and the page is busy, this prevents the page
492 * from being dirtied after the pmap_disconnect() call
493 * returns.
494 *
495 * Since the page is left "dirty" but "not modifed", we
496 * can detect whether the page was redirtied during
497 * pageout by checking the modify state.
498 */
499 if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
500 m->dirty = TRUE;
501 else
502 m->dirty = FALSE;
503
504 if (m->dirty) {
505 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
506 vm_page_unwire(m);/* reactivates */
507 VM_STAT_INCR(reactivations);
508 PAGE_WAKEUP_DONE(m);
509 } else {
510 CLUSTER_STAT(vm_pageout_target_page_freed++;)
511 vm_page_free(m);/* clears busy, etc. */
512 }
513 vm_page_unlock_queues();
514 continue;
515 }
516 /*
517 * Handle the "adjacent" pages. These pages were cleaned in
518 * place, and should be left alone.
519 * If prep_pin_count is nonzero, then someone is using the
520 * page, so make it active.
521 */
522 if (!m->active && !m->inactive && !m->throttled && !m->private) {
523 if (m->reference)
524 vm_page_activate(m);
525 else
526 vm_page_deactivate(m);
527 }
528 if((m->busy) && (m->cleaning)) {
529
530 /* the request_page_list case, (COPY_OUT_FROM FALSE) */
531 m->busy = FALSE;
532
533 /* We do not re-set m->dirty ! */
534 /* The page was busy so no extraneous activity */
535 /* could have occurred. COPY_INTO is a read into the */
536 /* new pages. CLEAN_IN_PLACE does actually write */
537 /* out the pages but handling outside of this code */
538 /* will take care of resetting dirty. We clear the */
539 /* modify however for the Programmed I/O case. */
540 pmap_clear_modify(m->phys_page);
541
542 m->absent = FALSE;
543 m->overwriting = FALSE;
544 } else if (m->overwriting) {
545 /* alternate request page list, write to page_list */
546 /* case. Occurs when the original page was wired */
547 /* at the time of the list request */
548 assert(m->wire_count != 0);
549 vm_page_unwire(m);/* reactivates */
550 m->overwriting = FALSE;
551 } else {
552 /*
553 * Set the dirty state according to whether or not the page was
554 * modified during the pageout. Note that we purposefully do
555 * NOT call pmap_clear_modify since the page is still mapped.
556 * If the page were to be dirtied between the 2 calls, this
557 * this fact would be lost. This code is only necessary to
558 * maintain statistics, since the pmap module is always
559 * consulted if m->dirty is false.
560 */
561 #if MACH_CLUSTER_STATS
562 m->dirty = pmap_is_modified(m->phys_page);
563
564 if (m->dirty) vm_pageout_cluster_dirtied++;
565 else vm_pageout_cluster_cleaned++;
566 if (m->wanted) vm_pageout_cluster_collisions++;
567 #else
568 m->dirty = 0;
569 #endif
570 }
571 m->cleaning = FALSE;
572 m->encrypted_cleaning = FALSE;
573
574 /*
575 * Wakeup any thread waiting for the page to be un-cleaning.
576 */
577 PAGE_WAKEUP(m);
578 vm_page_unlock_queues();
579 }
580 /*
581 * Account for the paging reference taken in vm_paging_object_allocate.
582 */
583 vm_object_paging_end(shadow_object);
584 vm_object_unlock(shadow_object);
585
586 assert(object->ref_count == 0);
587 assert(object->paging_in_progress == 0);
588 assert(object->resident_page_count == 0);
589 return;
590 }
591
592 /*
593 * Routine: vm_pageclean_setup
594 *
595 * Purpose: setup a page to be cleaned (made non-dirty), but not
596 * necessarily flushed from the VM page cache.
597 * This is accomplished by cleaning in place.
598 *
599 * The page must not be busy, and the object and page
600 * queues must be locked.
601 *
602 */
603 void
604 vm_pageclean_setup(
605 vm_page_t m,
606 vm_page_t new_m,
607 vm_object_t new_object,
608 vm_object_offset_t new_offset)
609 {
610 assert(!m->busy);
611 #if 0
612 assert(!m->cleaning);
613 #endif
614
615 XPR(XPR_VM_PAGEOUT,
616 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
617 (integer_t)m->object, m->offset, (integer_t)m,
618 (integer_t)new_m, new_offset);
619
620 pmap_clear_modify(m->phys_page);
621
622 /*
623 * Mark original page as cleaning in place.
624 */
625 m->cleaning = TRUE;
626 m->dirty = TRUE;
627 m->precious = FALSE;
628
629 /*
630 * Convert the fictitious page to a private shadow of
631 * the real page.
632 */
633 assert(new_m->fictitious);
634 assert(new_m->phys_page == vm_page_fictitious_addr);
635 new_m->fictitious = FALSE;
636 new_m->private = TRUE;
637 new_m->pageout = TRUE;
638 new_m->phys_page = m->phys_page;
639 vm_page_wire(new_m);
640
641 vm_page_insert(new_m, new_object, new_offset);
642 assert(!new_m->wanted);
643 new_m->busy = FALSE;
644 }
645
646 /*
647 * Routine: vm_pageout_initialize_page
648 * Purpose:
649 * Causes the specified page to be initialized in
650 * the appropriate memory object. This routine is used to push
651 * pages into a copy-object when they are modified in the
652 * permanent object.
653 *
654 * The page is moved to a temporary object and paged out.
655 *
656 * In/out conditions:
657 * The page in question must not be on any pageout queues.
658 * The object to which it belongs must be locked.
659 * The page must be busy, but not hold a paging reference.
660 *
661 * Implementation:
662 * Move this page to a completely new object.
663 */
664 void
665 vm_pageout_initialize_page(
666 vm_page_t m)
667 {
668 vm_object_t object;
669 vm_object_offset_t paging_offset;
670 vm_page_t holding_page;
671 memory_object_t pager;
672
673 XPR(XPR_VM_PAGEOUT,
674 "vm_pageout_initialize_page, page 0x%X\n",
675 (integer_t)m, 0, 0, 0, 0);
676 assert(m->busy);
677
678 /*
679 * Verify that we really want to clean this page
680 */
681 assert(!m->absent);
682 assert(!m->error);
683 assert(m->dirty);
684
685 /*
686 * Create a paging reference to let us play with the object.
687 */
688 object = m->object;
689 paging_offset = m->offset + object->paging_offset;
690
691 if (m->absent || m->error || m->restart || (!m->dirty && !m->precious)) {
692 VM_PAGE_FREE(m);
693 panic("reservation without pageout?"); /* alan */
694 vm_object_unlock(object);
695
696 return;
697 }
698
699 /*
700 * If there's no pager, then we can't clean the page. This should
701 * never happen since this should be a copy object and therefore not
702 * an external object, so the pager should always be there.
703 */
704
705 pager = object->pager;
706
707 if (pager == MEMORY_OBJECT_NULL) {
708 VM_PAGE_FREE(m);
709 panic("missing pager for copy object");
710 return;
711 }
712
713 /* set the page for future call to vm_fault_list_request */
714 vm_object_paging_begin(object);
715 holding_page = NULL;
716 vm_page_lock_queues();
717 pmap_clear_modify(m->phys_page);
718 m->dirty = TRUE;
719 m->busy = TRUE;
720 m->list_req_pending = TRUE;
721 m->cleaning = TRUE;
722 m->pageout = TRUE;
723 vm_page_wire(m);
724 vm_page_unlock_queues();
725 vm_object_unlock(object);
726
727 /*
728 * Write the data to its pager.
729 * Note that the data is passed by naming the new object,
730 * not a virtual address; the pager interface has been
731 * manipulated to use the "internal memory" data type.
732 * [The object reference from its allocation is donated
733 * to the eventual recipient.]
734 */
735 memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
736
737 vm_object_lock(object);
738 vm_object_paging_end(object);
739 }
740
741 #if MACH_CLUSTER_STATS
742 #define MAXCLUSTERPAGES 16
743 struct {
744 unsigned long pages_in_cluster;
745 unsigned long pages_at_higher_offsets;
746 unsigned long pages_at_lower_offsets;
747 } cluster_stats[MAXCLUSTERPAGES];
748 #endif /* MACH_CLUSTER_STATS */
749
750
751 /*
752 * vm_pageout_cluster:
753 *
754 * Given a page, queue it to the appropriate I/O thread,
755 * which will page it out and attempt to clean adjacent pages
756 * in the same operation.
757 *
758 * The page must be busy, and the object and queues locked. We will take a
759 * paging reference to prevent deallocation or collapse when we
760 * release the object lock back at the call site. The I/O thread
761 * is responsible for consuming this reference
762 *
763 * The page must not be on any pageout queue.
764 */
765
766 void
767 vm_pageout_cluster(vm_page_t m)
768 {
769 vm_object_t object = m->object;
770 struct vm_pageout_queue *q;
771
772
773 XPR(XPR_VM_PAGEOUT,
774 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
775 (integer_t)object, m->offset, (integer_t)m, 0, 0);
776
777 /*
778 * Only a certain kind of page is appreciated here.
779 */
780 assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0));
781 assert(!m->cleaning && !m->pageout && !m->inactive && !m->active);
782 assert(!m->throttled);
783
784 /*
785 * protect the object from collapse -
786 * locking in the object's paging_offset.
787 */
788 vm_object_paging_begin(object);
789
790 /*
791 * set the page for future call to vm_fault_list_request
792 * page should already be marked busy
793 */
794 vm_page_wire(m);
795 m->list_req_pending = TRUE;
796 m->cleaning = TRUE;
797 m->pageout = TRUE;
798 m->laundry = TRUE;
799
800 if (object->internal == TRUE)
801 q = &vm_pageout_queue_internal;
802 else
803 q = &vm_pageout_queue_external;
804 q->pgo_laundry++;
805
806 m->pageout_queue = TRUE;
807 queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
808
809 if (q->pgo_idle == TRUE) {
810 q->pgo_idle = FALSE;
811 thread_wakeup((event_t) &q->pgo_pending);
812 }
813 }
814
815
816 unsigned long vm_pageout_throttle_up_count = 0;
817
818 /*
819 * A page is back from laundry. See if there are some pages waiting to
820 * go to laundry and if we can let some of them go now.
821 *
822 * Object and page queues must be locked.
823 */
824 void
825 vm_pageout_throttle_up(
826 vm_page_t m)
827 {
828 struct vm_pageout_queue *q;
829
830 vm_pageout_throttle_up_count++;
831
832 assert(m->laundry);
833 assert(m->object != VM_OBJECT_NULL);
834 assert(m->object != kernel_object);
835
836 if (m->object->internal == TRUE)
837 q = &vm_pageout_queue_internal;
838 else
839 q = &vm_pageout_queue_external;
840
841 m->laundry = FALSE;
842 q->pgo_laundry--;
843
844 if (q->pgo_throttled == TRUE) {
845 q->pgo_throttled = FALSE;
846 thread_wakeup((event_t) &q->pgo_laundry);
847 }
848 }
849
850
851 /*
852 * vm_pageout_scan does the dirty work for the pageout daemon.
853 * It returns with vm_page_queue_free_lock held and
854 * vm_page_free_wanted == 0.
855 */
856
857 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT (3 * MAX_UPL_TRANSFER)
858
859 #define FCS_IDLE 0
860 #define FCS_DELAYED 1
861 #define FCS_DEADLOCK_DETECTED 2
862
863 struct flow_control {
864 int state;
865 mach_timespec_t ts;
866 };
867
868 void
869 vm_pageout_scan(void)
870 {
871 unsigned int loop_count = 0;
872 unsigned int inactive_burst_count = 0;
873 unsigned int active_burst_count = 0;
874 unsigned int reactivated_this_call;
875 unsigned int reactivate_limit;
876 vm_page_t local_freeq = NULL;
877 int local_freed = 0;
878 int delayed_unlock;
879 int need_internal_inactive = 0;
880 int refmod_state = 0;
881 int vm_pageout_deadlock_target = 0;
882 struct vm_pageout_queue *iq;
883 struct vm_pageout_queue *eq;
884 struct vm_speculative_age_q *sq;
885 struct flow_control flow_control;
886 boolean_t inactive_throttled = FALSE;
887 boolean_t try_failed;
888 mach_timespec_t ts;
889 unsigned int msecs = 0;
890 vm_object_t object;
891 vm_object_t last_object_tried;
892 int zf_ratio;
893 int zf_run_count;
894 uint32_t catch_up_count = 0;
895 uint32_t inactive_reclaim_run;
896 boolean_t forced_reclaim;
897
898 flow_control.state = FCS_IDLE;
899 iq = &vm_pageout_queue_internal;
900 eq = &vm_pageout_queue_external;
901 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
902
903
904 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
905
906
907 vm_page_lock_queues();
908 delayed_unlock = 1; /* must be nonzero if Qs are locked, 0 if unlocked */
909
910 /*
911 * Calculate the max number of referenced pages on the inactive
912 * queue that we will reactivate.
913 */
914 reactivated_this_call = 0;
915 reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
916 vm_page_inactive_count);
917 inactive_reclaim_run = 0;
918
919
920 /*???*/ /*
921 * We want to gradually dribble pages from the active queue
922 * to the inactive queue. If we let the inactive queue get
923 * very small, and then suddenly dump many pages into it,
924 * those pages won't get a sufficient chance to be referenced
925 * before we start taking them from the inactive queue.
926 *
927 * We must limit the rate at which we send pages to the pagers.
928 * data_write messages consume memory, for message buffers and
929 * for map-copy objects. If we get too far ahead of the pagers,
930 * we can potentially run out of memory.
931 *
932 * We can use the laundry count to limit directly the number
933 * of pages outstanding to the default pager. A similar
934 * strategy for external pagers doesn't work, because
935 * external pagers don't have to deallocate the pages sent them,
936 * and because we might have to send pages to external pagers
937 * even if they aren't processing writes. So we also
938 * use a burst count to limit writes to external pagers.
939 *
940 * When memory is very tight, we can't rely on external pagers to
941 * clean pages. They probably aren't running, because they
942 * aren't vm-privileged. If we kept sending dirty pages to them,
943 * we could exhaust the free list.
944 */
945
946
947 Restart:
948 assert(delayed_unlock!=0);
949
950 /*
951 * A page is "zero-filled" if it was not paged in from somewhere,
952 * and it belongs to an object at least VM_ZF_OBJECT_SIZE_THRESHOLD big.
953 * Recalculate the zero-filled page ratio. We use this to apportion
954 * victimized pages between the normal and zero-filled inactive
955 * queues according to their relative abundance in memory. Thus if a task
956 * is flooding memory with zf pages, we begin to hunt them down.
957 * It would be better to throttle greedy tasks at a higher level,
958 * but at the moment mach vm cannot do this.
959 */
960 {
961 uint32_t total = vm_page_active_count + vm_page_inactive_count;
962 uint32_t normal = total - vm_zf_count;
963
964 /* zf_ratio is the number of zf pages we victimize per normal page */
965
966 if (vm_zf_count < vm_accellerate_zf_pageout_trigger)
967 zf_ratio = 0;
968 else if ((vm_zf_count <= normal) || (normal == 0))
969 zf_ratio = 1;
970 else
971 zf_ratio = vm_zf_count / normal;
972
973 zf_run_count = 0;
974 }
975
976 /*
977 * Recalculate vm_page_inactivate_target.
978 */
979 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
980 vm_page_inactive_count +
981 vm_page_speculative_count);
982 /*
983 * don't want to wake the pageout_scan thread up everytime we fall below
984 * the targets... set a low water mark at 0.25% below the target
985 */
986 vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400);
987
988 vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
989 vm_page_inactive_count);
990 object = NULL;
991 last_object_tried = NULL;
992 try_failed = FALSE;
993
994 if ((vm_page_inactive_count + vm_page_speculative_count) < VM_PAGE_INACTIVE_HEALTHY_LIMIT(vm_page_active_count))
995 catch_up_count = vm_page_inactive_count + vm_page_speculative_count;
996 else
997 catch_up_count = 0;
998
999 for (;;) {
1000 vm_page_t m;
1001
1002 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
1003
1004 if (delayed_unlock == 0) {
1005 vm_page_lock_queues();
1006 delayed_unlock = 1;
1007 }
1008
1009 /*
1010 * Don't sweep through active queue more than the throttle
1011 * which should be kept relatively low
1012 */
1013 active_burst_count = MIN(vm_pageout_burst_active_throttle, vm_page_active_count);
1014
1015 /*
1016 * Move pages from active to inactive.
1017 */
1018 if (need_internal_inactive == 0 && (vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target)
1019 goto done_moving_active_pages;
1020
1021 while (!queue_empty(&vm_page_queue_active) &&
1022 (need_internal_inactive || active_burst_count)) {
1023
1024 if (active_burst_count)
1025 active_burst_count--;
1026
1027 vm_pageout_active++;
1028
1029 m = (vm_page_t) queue_first(&vm_page_queue_active);
1030
1031 assert(m->active && !m->inactive);
1032 assert(!m->laundry);
1033 assert(m->object != kernel_object);
1034 assert(m->phys_page != vm_page_guard_addr);
1035
1036 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1037
1038 /*
1039 * Try to lock object; since we've already got the
1040 * page queues lock, we can only 'try' for this one.
1041 * if the 'try' fails, we need to do a mutex_pause
1042 * to allow the owner of the object lock a chance to
1043 * run... otherwise, we're likely to trip over this
1044 * object in the same state as we work our way through
1045 * the queue... clumps of pages associated with the same
1046 * object are fairly typical on the inactive and active queues
1047 */
1048 if (m->object != object) {
1049 if (object != NULL) {
1050 vm_object_unlock(object);
1051 object = NULL;
1052 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1053 }
1054 if (!vm_object_lock_try_scan(m->object)) {
1055 /*
1056 * move page to end of active queue and continue
1057 */
1058 queue_remove(&vm_page_queue_active, m,
1059 vm_page_t, pageq);
1060 queue_enter(&vm_page_queue_active, m,
1061 vm_page_t, pageq);
1062
1063 try_failed = TRUE;
1064
1065 m = (vm_page_t) queue_first(&vm_page_queue_active);
1066 /*
1067 * this is the next object we're going to be interested in
1068 * try to make sure its available after the mutex_yield
1069 * returns control
1070 */
1071 vm_pageout_scan_wants_object = m->object;
1072
1073 goto done_with_activepage;
1074 }
1075 object = m->object;
1076
1077 try_failed = FALSE;
1078 }
1079
1080 /*
1081 * if the page is BUSY, then we pull it
1082 * off the active queue and leave it alone.
1083 * when BUSY is cleared, it will get stuck
1084 * back on the appropriate queue
1085 */
1086 if (m->busy) {
1087 queue_remove(&vm_page_queue_active, m,
1088 vm_page_t, pageq);
1089 m->pageq.next = NULL;
1090 m->pageq.prev = NULL;
1091
1092 if (!m->fictitious)
1093 vm_page_active_count--;
1094 m->active = FALSE;
1095
1096 goto done_with_activepage;
1097 }
1098
1099 /*
1100 * Deactivate the page while holding the object
1101 * locked, so we know the page is still not busy.
1102 * This should prevent races between pmap_enter
1103 * and pmap_clear_reference. The page might be
1104 * absent or fictitious, but vm_page_deactivate
1105 * can handle that.
1106 */
1107 vm_page_deactivate(m);
1108
1109 if (need_internal_inactive) {
1110 vm_pageout_scan_active_throttle_success++;
1111 need_internal_inactive--;
1112 }
1113 done_with_activepage:
1114 if (delayed_unlock++ > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT || try_failed == TRUE) {
1115
1116 if (object != NULL) {
1117 vm_object_unlock(object);
1118 object = NULL;
1119 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1120 }
1121 if (local_freeq) {
1122 vm_page_free_list(local_freeq);
1123
1124 local_freeq = NULL;
1125 local_freed = 0;
1126 }
1127 mutex_yield(&vm_page_queue_lock);
1128
1129 delayed_unlock = 1;
1130
1131 /*
1132 * continue the while loop processing
1133 * the active queue... need to hold
1134 * the page queues lock
1135 */
1136 }
1137 }
1138
1139
1140
1141 /**********************************************************************
1142 * above this point we're playing with the active queue
1143 * below this point we're playing with the throttling mechanisms
1144 * and the inactive queue
1145 **********************************************************************/
1146
1147 done_moving_active_pages:
1148
1149 /*
1150 * We are done if we have met our target *and*
1151 * nobody is still waiting for a page.
1152 */
1153 if (vm_page_free_count + local_freed >= vm_page_free_target) {
1154 if (object != NULL) {
1155 vm_object_unlock(object);
1156 object = NULL;
1157 }
1158 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1159
1160 if (local_freeq) {
1161 vm_page_free_list(local_freeq);
1162
1163 local_freeq = NULL;
1164 local_freed = 0;
1165 }
1166 /*
1167 * inactive target still not met... keep going
1168 * until we get the queues balanced
1169 */
1170
1171 /*
1172 * Recalculate vm_page_inactivate_target.
1173 */
1174 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1175 vm_page_inactive_count +
1176 vm_page_speculative_count);
1177
1178 #ifndef CONFIG_EMBEDDED
1179 /*
1180 * XXX: if no active pages can be reclaimed, pageout scan can be stuck trying
1181 * to balance the queues
1182 */
1183 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) &&
1184 !queue_empty(&vm_page_queue_active))
1185 continue;
1186 #endif
1187
1188 mutex_lock(&vm_page_queue_free_lock);
1189
1190 if ((vm_page_free_count >= vm_page_free_target) &&
1191 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
1192
1193 vm_page_unlock_queues();
1194
1195 thread_wakeup((event_t) &vm_pageout_garbage_collect);
1196
1197 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
1198
1199 return;
1200 }
1201 mutex_unlock(&vm_page_queue_free_lock);
1202 }
1203 /*
1204 * Before anything, we check if we have any ripe volatile objects around.
1205 * If so, purge the first and see what it gives us.
1206 */
1207 assert (available_for_purge>=0);
1208 if (available_for_purge)
1209 {
1210 if (object != NULL) {
1211 vm_object_unlock(object);
1212 object = NULL;
1213 }
1214 vm_purgeable_object_purge_one();
1215 continue;
1216 }
1217
1218 if (queue_empty(&sq->age_q) && vm_page_speculative_count) {
1219 /*
1220 * try to pull pages from the aging bins
1221 * see vm_page.h for an explanation of how
1222 * this mechanism works
1223 */
1224 struct vm_speculative_age_q *aq;
1225 mach_timespec_t ts_fully_aged;
1226 boolean_t can_steal = FALSE;
1227
1228 aq = &vm_page_queue_speculative[speculative_steal_index];
1229
1230 while (queue_empty(&aq->age_q)) {
1231
1232 speculative_steal_index++;
1233
1234 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
1235 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
1236
1237 aq = &vm_page_queue_speculative[speculative_steal_index];
1238 }
1239 if (vm_page_speculative_count > vm_page_speculative_target)
1240 can_steal = TRUE;
1241 else {
1242 ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * VM_PAGE_SPECULATIVE_Q_AGE_MS) / 1000;
1243 ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * VM_PAGE_SPECULATIVE_Q_AGE_MS) % 1000)
1244 * 1000 * NSEC_PER_USEC;
1245
1246 ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
1247
1248 clock_get_system_nanotime(&ts.tv_sec, (unsigned *)&ts.tv_nsec);
1249
1250 if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
1251 can_steal = TRUE;
1252 }
1253 if (can_steal == TRUE)
1254 vm_page_speculate_ageit(aq);
1255 }
1256
1257 /*
1258 * Sometimes we have to pause:
1259 * 1) No inactive pages - nothing to do.
1260 * 2) Flow control - default pageout queue is full
1261 * 3) Loop control - no acceptable pages found on the inactive queue
1262 * within the last vm_pageout_burst_inactive_throttle iterations
1263 */
1264 if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_zf) && queue_empty(&sq->age_q) &&
1265 (VM_PAGE_Q_THROTTLED(iq) || queue_empty(&vm_page_queue_throttled))) {
1266 vm_pageout_scan_empty_throttle++;
1267 msecs = vm_pageout_empty_wait;
1268 goto vm_pageout_scan_delay;
1269
1270 } else if (inactive_burst_count >=
1271 MIN(vm_pageout_burst_inactive_throttle,
1272 (vm_page_inactive_count +
1273 vm_page_speculative_count))) {
1274 vm_pageout_scan_burst_throttle++;
1275 msecs = vm_pageout_burst_wait;
1276 goto vm_pageout_scan_delay;
1277
1278 } else if (VM_PAGE_Q_THROTTLED(iq) && IP_VALID(memory_manager_default)) {
1279
1280 switch (flow_control.state) {
1281
1282 case FCS_IDLE:
1283 reset_deadlock_timer:
1284 ts.tv_sec = vm_pageout_deadlock_wait / 1000;
1285 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
1286 clock_get_system_nanotime(&flow_control.ts.tv_sec,
1287 (unsigned *)&flow_control.ts.tv_nsec);
1288 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
1289
1290 flow_control.state = FCS_DELAYED;
1291 msecs = vm_pageout_deadlock_wait;
1292
1293 break;
1294
1295 case FCS_DELAYED:
1296 clock_get_system_nanotime(&ts.tv_sec,
1297 (unsigned *)&ts.tv_nsec);
1298
1299 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
1300 /*
1301 * the pageout thread for the default pager is potentially
1302 * deadlocked since the
1303 * default pager queue has been throttled for more than the
1304 * allowable time... we need to move some clean pages or dirty
1305 * pages belonging to the external pagers if they aren't throttled
1306 * vm_page_free_wanted represents the number of threads currently
1307 * blocked waiting for pages... we'll move one page for each of
1308 * these plus a fixed amount to break the logjam... once we're done
1309 * moving this number of pages, we'll re-enter the FSC_DELAYED state
1310 * with a new timeout target since we have no way of knowing
1311 * whether we've broken the deadlock except through observation
1312 * of the queue associated with the default pager... we need to
1313 * stop moving pages and allow the system to run to see what
1314 * state it settles into.
1315 */
1316 vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged;
1317 vm_pageout_scan_deadlock_detected++;
1318 flow_control.state = FCS_DEADLOCK_DETECTED;
1319
1320 thread_wakeup((event_t) &vm_pageout_garbage_collect);
1321 goto consider_inactive;
1322 }
1323 /*
1324 * just resniff instead of trying
1325 * to compute a new delay time... we're going to be
1326 * awakened immediately upon a laundry completion,
1327 * so we won't wait any longer than necessary
1328 */
1329 msecs = vm_pageout_idle_wait;
1330 break;
1331
1332 case FCS_DEADLOCK_DETECTED:
1333 if (vm_pageout_deadlock_target)
1334 goto consider_inactive;
1335 goto reset_deadlock_timer;
1336
1337 }
1338 vm_pageout_scan_throttle++;
1339 iq->pgo_throttled = TRUE;
1340 vm_pageout_scan_delay:
1341 if (object != NULL) {
1342 vm_object_unlock(object);
1343 object = NULL;
1344 }
1345 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1346
1347 if (local_freeq) {
1348 vm_page_free_list(local_freeq);
1349
1350 local_freeq = NULL;
1351 local_freed = 0;
1352 }
1353 #if CONFIG_EMBEDDED
1354 {
1355 int percent_avail;
1356
1357 /*
1358 * Decide if we need to send a memory status notification.
1359 */
1360 percent_avail =
1361 (vm_page_active_count + vm_page_inactive_count +
1362 vm_page_speculative_count + vm_page_free_count +
1363 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
1364 atop_64(max_mem);
1365 if (percent_avail >= (kern_memorystatus_level + 5) ||
1366 percent_avail <= (kern_memorystatus_level - 5)) {
1367 kern_memorystatus_level = percent_avail;
1368 thread_wakeup((event_t)&kern_memorystatus_wakeup);
1369 }
1370 }
1371 #endif
1372 assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
1373
1374 counter(c_vm_pageout_scan_block++);
1375
1376 vm_page_unlock_queues();
1377
1378 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
1379
1380 thread_block(THREAD_CONTINUE_NULL);
1381
1382 vm_page_lock_queues();
1383 delayed_unlock = 1;
1384
1385 iq->pgo_throttled = FALSE;
1386
1387 if (loop_count >= vm_page_inactive_count)
1388 loop_count = 0;
1389 inactive_burst_count = 0;
1390
1391 goto Restart;
1392 /*NOTREACHED*/
1393 }
1394
1395
1396 flow_control.state = FCS_IDLE;
1397 consider_inactive:
1398 loop_count++;
1399 inactive_burst_count++;
1400 vm_pageout_inactive++;
1401
1402 /* Choose a victim. */
1403
1404 while (1) {
1405 m = NULL;
1406
1407 /*
1408 * the most eligible pages are ones that were throttled because the
1409 * pager wasn't ready at the time. If a pager is ready now,
1410 * see if one of these is useful.
1411 */
1412 if (!VM_PAGE_Q_THROTTLED(iq) && !queue_empty(&vm_page_queue_throttled)) {
1413 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
1414 break;
1415 }
1416
1417 /*
1418 * The second most eligible pages are ones we paged in speculatively,
1419 * but which have not yet been touched.
1420 */
1421 if ( !queue_empty(&sq->age_q) ) {
1422 m = (vm_page_t) queue_first(&sq->age_q);
1423 break;
1424 }
1425 /*
1426 * Time for a zero-filled inactive page?
1427 */
1428 if ( ((zf_run_count < zf_ratio) && vm_zf_queue_count >= zf_queue_min_count) ||
1429 queue_empty(&vm_page_queue_inactive)) {
1430 if ( !queue_empty(&vm_page_queue_zf) ) {
1431 m = (vm_page_t) queue_first(&vm_page_queue_zf);
1432 zf_run_count++;
1433 break;
1434 }
1435 }
1436 /*
1437 * It's either a normal inactive page or nothing.
1438 */
1439 if ( !queue_empty(&vm_page_queue_inactive) ) {
1440 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1441 zf_run_count = 0;
1442 break;
1443 }
1444
1445 panic("vm_pageout: no victim");
1446 }
1447
1448 assert(!m->active && (m->inactive || m->speculative || m->throttled));
1449 assert(!m->laundry);
1450 assert(m->object != kernel_object);
1451 assert(m->phys_page != vm_page_guard_addr);
1452
1453 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1454
1455 /*
1456 * check to see if we currently are working
1457 * with the same object... if so, we've
1458 * already got the lock
1459 */
1460 if (m->object != object) {
1461 /*
1462 * the object associated with candidate page is
1463 * different from the one we were just working
1464 * with... dump the lock if we still own it
1465 */
1466 if (object != NULL) {
1467 vm_object_unlock(object);
1468 object = NULL;
1469 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1470 }
1471 /*
1472 * Try to lock object; since we've alread got the
1473 * page queues lock, we can only 'try' for this one.
1474 * if the 'try' fails, we need to do a mutex_pause
1475 * to allow the owner of the object lock a chance to
1476 * run... otherwise, we're likely to trip over this
1477 * object in the same state as we work our way through
1478 * the queue... clumps of pages associated with the same
1479 * object are fairly typical on the inactive and active queues
1480 */
1481 if (!vm_object_lock_try_scan(m->object)) {
1482 /*
1483 * Move page to end and continue.
1484 * Don't re-issue ticket
1485 */
1486 if (m->zero_fill) {
1487 queue_remove(&vm_page_queue_zf, m,
1488 vm_page_t, pageq);
1489 queue_enter(&vm_page_queue_zf, m,
1490 vm_page_t, pageq);
1491 } else if (m->speculative) {
1492 remque(&m->pageq);
1493 m->speculative = FALSE;
1494 vm_page_speculative_count--;
1495
1496 /*
1497 * move to the tail of the inactive queue
1498 * to get it out of the way... the speculative
1499 * queue is generally too small to depend
1500 * on there being enough pages from other
1501 * objects to make cycling it back on the
1502 * same queue a winning proposition
1503 */
1504 queue_enter(&vm_page_queue_inactive, m,
1505 vm_page_t, pageq);
1506 m->inactive = TRUE;
1507 vm_page_inactive_count++;
1508 token_new_pagecount++;
1509 } else if (m->throttled) {
1510 queue_remove(&vm_page_queue_throttled, m,
1511 vm_page_t, pageq);
1512 m->throttled = FALSE;
1513 vm_page_throttled_count--;
1514
1515 /*
1516 * not throttled any more, so can stick
1517 * it on the inactive queue.
1518 */
1519 queue_enter(&vm_page_queue_inactive, m,
1520 vm_page_t, pageq);
1521 m->inactive = TRUE;
1522 vm_page_inactive_count++;
1523 token_new_pagecount++;
1524 } else {
1525 queue_remove(&vm_page_queue_inactive, m,
1526 vm_page_t, pageq);
1527 #if MACH_ASSERT
1528 vm_page_inactive_count--; /* balance for purgeable queue asserts */
1529 #endif
1530 vm_purgeable_q_advance_all();
1531
1532 queue_enter(&vm_page_queue_inactive, m,
1533 vm_page_t, pageq);
1534 #if MACH_ASSERT
1535 vm_page_inactive_count++; /* balance for purgeable queue asserts */
1536 #endif
1537 token_new_pagecount++;
1538 }
1539 pmap_clear_reference(m->phys_page);
1540 m->reference = FALSE;
1541
1542 vm_pageout_inactive_nolock++;
1543
1544 if ( !queue_empty(&sq->age_q) )
1545 m = (vm_page_t) queue_first(&sq->age_q);
1546 else if ( ((zf_run_count < zf_ratio) && vm_zf_queue_count >= zf_queue_min_count) ||
1547 queue_empty(&vm_page_queue_inactive)) {
1548 if ( !queue_empty(&vm_page_queue_zf) )
1549 m = (vm_page_t) queue_first(&vm_page_queue_zf);
1550 } else if ( !queue_empty(&vm_page_queue_inactive) ) {
1551 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1552 }
1553 /*
1554 * this is the next object we're going to be interested in
1555 * try to make sure its available after the mutex_yield
1556 * returns control
1557 */
1558 vm_pageout_scan_wants_object = m->object;
1559
1560 /*
1561 * force us to dump any collected free pages
1562 * and to pause before moving on
1563 */
1564 try_failed = TRUE;
1565
1566 goto done_with_inactivepage;
1567 }
1568 object = m->object;
1569 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1570
1571 try_failed = FALSE;
1572 }
1573
1574 /*
1575 * Paging out pages of external objects which
1576 * are currently being created must be avoided.
1577 * The pager may claim for memory, thus leading to a
1578 * possible dead lock between it and the pageout thread,
1579 * if such pages are finally chosen. The remaining assumption
1580 * is that there will finally be enough available pages in the
1581 * inactive pool to page out in order to satisfy all memory
1582 * claimed by the thread which concurrently creates the pager.
1583 */
1584 if (!object->pager_initialized && object->pager_created) {
1585 /*
1586 * Move page to end and continue, hoping that
1587 * there will be enough other inactive pages to
1588 * page out so that the thread which currently
1589 * initializes the pager will succeed.
1590 * Don't re-grant the ticket, the page should
1591 * pulled from the queue and paged out whenever
1592 * one of its logically adjacent fellows is
1593 * targeted.
1594 *
1595 * Pages found on the speculative list can never be
1596 * in this state... they always have a pager associated
1597 * with them.
1598 */
1599 assert(!m->speculative);
1600
1601 if (m->zero_fill) {
1602 queue_remove(&vm_page_queue_zf, m,
1603 vm_page_t, pageq);
1604 queue_enter(&vm_page_queue_zf, m,
1605 vm_page_t, pageq);
1606 } else {
1607 queue_remove(&vm_page_queue_inactive, m,
1608 vm_page_t, pageq);
1609 #if MACH_ASSERT
1610 vm_page_inactive_count--; /* balance for purgeable queue asserts */
1611 #endif
1612 vm_purgeable_q_advance_all();
1613
1614 queue_enter(&vm_page_queue_inactive, m,
1615 vm_page_t, pageq);
1616 #if MACH_ASSERT
1617 vm_page_inactive_count++; /* balance for purgeable queue asserts */
1618 #endif
1619 token_new_pagecount++;
1620 }
1621 vm_pageout_inactive_avoid++;
1622
1623 goto done_with_inactivepage;
1624 }
1625 /*
1626 * Remove the page from its list.
1627 */
1628 if (m->speculative) {
1629 remque(&m->pageq);
1630 m->speculative = FALSE;
1631 vm_page_speculative_count--;
1632 } else if (m->throttled) {
1633 queue_remove(&vm_page_queue_throttled, m, vm_page_t, pageq);
1634 m->throttled = FALSE;
1635 vm_page_throttled_count--;
1636 } else {
1637 if (m->zero_fill) {
1638 queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq);
1639 vm_zf_queue_count--;
1640 } else {
1641 queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
1642 }
1643 m->inactive = FALSE;
1644 if (!m->fictitious)
1645 vm_page_inactive_count--;
1646 vm_purgeable_q_advance_all();
1647 }
1648
1649 /* If the object is empty, the page must be reclaimed even if dirty or used. */
1650 /* If the page belongs to a volatile object, we stick it back on. */
1651 if (object->copy == VM_OBJECT_NULL) {
1652 if(object->purgable == VM_PURGABLE_EMPTY && !m->cleaning) {
1653 m->busy = TRUE;
1654 if (m->pmapped == TRUE) {
1655 /* unmap the page */
1656 refmod_state = pmap_disconnect(m->phys_page);
1657 if (refmod_state & VM_MEM_MODIFIED) {
1658 m->dirty = TRUE;
1659 }
1660 }
1661 if (m->dirty || m->precious) {
1662 /* we saved the cost of cleaning this page ! */
1663 vm_page_purged_count++;
1664 }
1665 goto reclaim_page;
1666 }
1667 if (object->purgable == VM_PURGABLE_VOLATILE) {
1668 /* if it's wired, we can't put it on our queue */
1669 assert(m->wire_count == 0);
1670 /* just stick it back on! */
1671 goto reactivate_page;
1672 }
1673 }
1674 m->pageq.next = NULL;
1675 m->pageq.prev = NULL;
1676
1677 if ( !m->fictitious && catch_up_count)
1678 catch_up_count--;
1679
1680 /*
1681 * ENCRYPTED SWAP:
1682 * if this page has already been picked up as part of a
1683 * page-out cluster, it will be busy because it is being
1684 * encrypted (see vm_object_upl_request()). But we still
1685 * want to demote it from "clean-in-place" (aka "adjacent")
1686 * to "clean-and-free" (aka "target"), so let's ignore its
1687 * "busy" bit here and proceed to check for "cleaning" a
1688 * little bit below...
1689 */
1690 if ( !m->encrypted_cleaning && (m->busy || !object->alive)) {
1691 /*
1692 * Somebody is already playing with this page.
1693 * Leave it off the pageout queues.
1694 *
1695 */
1696 vm_pageout_inactive_busy++;
1697
1698 goto done_with_inactivepage;
1699 }
1700
1701 /*
1702 * If it's absent or in error, we can reclaim the page.
1703 */
1704
1705 if (m->absent || m->error) {
1706 vm_pageout_inactive_absent++;
1707 reclaim_page:
1708 if (vm_pageout_deadlock_target) {
1709 vm_pageout_scan_inactive_throttle_success++;
1710 vm_pageout_deadlock_target--;
1711 }
1712
1713 DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
1714
1715 if (m->object->internal) {
1716 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
1717 } else {
1718 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
1719 }
1720
1721 vm_page_free_prepare(m);
1722
1723 assert(m->pageq.next == NULL &&
1724 m->pageq.prev == NULL);
1725 m->pageq.next = (queue_entry_t)local_freeq;
1726 local_freeq = m;
1727 local_freed++;
1728
1729 inactive_burst_count = 0;
1730
1731 goto done_with_inactivepage;
1732 }
1733
1734 assert(!m->private);
1735 assert(!m->fictitious);
1736
1737 /*
1738 * If already cleaning this page in place, convert from
1739 * "adjacent" to "target". We can leave the page mapped,
1740 * and vm_pageout_object_terminate will determine whether
1741 * to free or reactivate.
1742 */
1743
1744 if (m->cleaning) {
1745 m->busy = TRUE;
1746 m->pageout = TRUE;
1747 m->dump_cleaning = TRUE;
1748 vm_page_wire(m);
1749
1750 CLUSTER_STAT(vm_pageout_cluster_conversions++);
1751
1752 inactive_burst_count = 0;
1753
1754 goto done_with_inactivepage;
1755 }
1756
1757 /*
1758 * If it's being used, reactivate.
1759 * (Fictitious pages are either busy or absent.)
1760 * First, update the reference and dirty bits
1761 * to make sure the page is unreferenced.
1762 */
1763 refmod_state = -1;
1764
1765 if (m->reference == FALSE && m->pmapped == TRUE) {
1766 refmod_state = pmap_get_refmod(m->phys_page);
1767
1768 if (refmod_state & VM_MEM_REFERENCED)
1769 m->reference = TRUE;
1770 if (refmod_state & VM_MEM_MODIFIED)
1771 m->dirty = TRUE;
1772 }
1773 if (m->reference && !m->no_cache) {
1774 /*
1775 * The page we pulled off the inactive list has
1776 * been referenced. It is possible for other
1777 * processors to be touching pages faster than we
1778 * can clear the referenced bit and traverse the
1779 * inactive queue, so we limit the number of
1780 * reactivations.
1781 */
1782 if (++reactivated_this_call >= reactivate_limit) {
1783 vm_pageout_reactivation_limit_exceeded++;
1784 } else if (catch_up_count) {
1785 vm_pageout_catch_ups++;
1786 } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
1787 vm_pageout_inactive_force_reclaim++;
1788 } else {
1789 /*
1790 * The page was being used, so put back on active list.
1791 */
1792 reactivate_page:
1793 vm_page_activate(m);
1794 VM_STAT_INCR(reactivations);
1795
1796 vm_pageout_inactive_used++;
1797 inactive_burst_count = 0;
1798
1799 goto done_with_inactivepage;
1800 }
1801 /*
1802 * Make sure we call pmap_get_refmod() if it
1803 * wasn't already called just above, to update
1804 * the dirty bit.
1805 */
1806 if ((refmod_state == -1) && !m->dirty && m->pmapped) {
1807 refmod_state = pmap_get_refmod(m->phys_page);
1808 if (refmod_state & VM_MEM_MODIFIED)
1809 m->dirty = TRUE;
1810 }
1811 forced_reclaim = TRUE;
1812 } else {
1813 forced_reclaim = FALSE;
1814 }
1815
1816 XPR(XPR_VM_PAGEOUT,
1817 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
1818 (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0);
1819
1820 /*
1821 * we've got a candidate page to steal...
1822 *
1823 * m->dirty is up to date courtesy of the
1824 * preceding check for m->reference... if
1825 * we get here, then m->reference had to be
1826 * FALSE (or possibly "reactivate_limit" was
1827 * exceeded), but in either case we called
1828 * pmap_get_refmod() and updated both
1829 * m->reference and m->dirty
1830 *
1831 * if it's dirty or precious we need to
1832 * see if the target queue is throtttled
1833 * it if is, we need to skip over it by moving it back
1834 * to the end of the inactive queue
1835 */
1836 inactive_throttled = FALSE;
1837
1838 if (m->dirty || m->precious) {
1839 if (object->internal) {
1840 if (VM_PAGE_Q_THROTTLED(iq))
1841 inactive_throttled = TRUE;
1842 } else if (VM_PAGE_Q_THROTTLED(eq)) {
1843 inactive_throttled = TRUE;
1844 }
1845 }
1846 if (inactive_throttled == TRUE) {
1847 throttle_inactive:
1848 if (!IP_VALID(memory_manager_default) &&
1849 object->internal &&
1850 (object->purgable == VM_PURGABLE_DENY ||
1851 object->purgable == VM_PURGABLE_NONVOLATILE ||
1852 object->purgable == VM_PURGABLE_VOLATILE )) {
1853 queue_enter(&vm_page_queue_throttled, m,
1854 vm_page_t, pageq);
1855 m->throttled = TRUE;
1856 vm_page_throttled_count++;
1857 } else {
1858 if (m->zero_fill) {
1859 queue_enter(&vm_page_queue_zf, m,
1860 vm_page_t, pageq);
1861 vm_zf_queue_count++;
1862 } else
1863 queue_enter(&vm_page_queue_inactive, m,
1864 vm_page_t, pageq);
1865 m->inactive = TRUE;
1866 if (!m->fictitious) {
1867 vm_page_inactive_count++;
1868 token_new_pagecount++;
1869 }
1870 }
1871 vm_pageout_scan_inactive_throttled++;
1872 goto done_with_inactivepage;
1873 }
1874
1875 /*
1876 * we've got a page that we can steal...
1877 * eliminate all mappings and make sure
1878 * we have the up-to-date modified state
1879 * first take the page BUSY, so that no new
1880 * mappings can be made
1881 */
1882 m->busy = TRUE;
1883
1884 /*
1885 * if we need to do a pmap_disconnect then we
1886 * need to re-evaluate m->dirty since the pmap_disconnect
1887 * provides the true state atomically... the
1888 * page was still mapped up to the pmap_disconnect
1889 * and may have been dirtied at the last microsecond
1890 *
1891 * we also check for the page being referenced 'late'
1892 * if it was, we first need to do a WAKEUP_DONE on it
1893 * since we already set m->busy = TRUE, before
1894 * going off to reactivate it
1895 *
1896 * Note that if 'pmapped' is FALSE then the page is not
1897 * and has not been in any map, so there is no point calling
1898 * pmap_disconnect(). m->dirty and/or m->reference could
1899 * have been set in anticipation of likely usage of the page.
1900 */
1901 if (m->pmapped == TRUE) {
1902 refmod_state = pmap_disconnect(m->phys_page);
1903
1904 if (refmod_state & VM_MEM_MODIFIED)
1905 m->dirty = TRUE;
1906 if (refmod_state & VM_MEM_REFERENCED) {
1907
1908 /* If m->reference is already set, this page must have
1909 * already failed the reactivate_limit test, so don't
1910 * bump the counts twice.
1911 */
1912 if ( ! m->reference ) {
1913 m->reference = TRUE;
1914 if (forced_reclaim ||
1915 ++reactivated_this_call >= reactivate_limit)
1916 vm_pageout_reactivation_limit_exceeded++;
1917 else {
1918 PAGE_WAKEUP_DONE(m);
1919 goto reactivate_page;
1920 }
1921 }
1922 }
1923 }
1924 /*
1925 * reset our count of pages that have been reclaimed
1926 * since the last page was 'stolen'
1927 */
1928 inactive_reclaim_run = 0;
1929
1930 /*
1931 * If it's clean and not precious, we can free the page.
1932 */
1933 if (!m->dirty && !m->precious) {
1934 vm_pageout_inactive_clean++;
1935 goto reclaim_page;
1936 }
1937
1938 /*
1939 * The page may have been dirtied since the last check
1940 * for a throttled target queue (which may have been skipped
1941 * if the page was clean then). With the dirty page
1942 * disconnected here, we can make one final check.
1943 */
1944 {
1945 boolean_t disconnect_throttled = FALSE;
1946 if (object->internal) {
1947 if (VM_PAGE_Q_THROTTLED(iq))
1948 disconnect_throttled = TRUE;
1949 } else if (VM_PAGE_Q_THROTTLED(eq)) {
1950 disconnect_throttled = TRUE;
1951 }
1952
1953 if (disconnect_throttled == TRUE) {
1954 PAGE_WAKEUP_DONE(m);
1955 goto throttle_inactive;
1956 }
1957 }
1958
1959 vm_pageout_cluster(m);
1960
1961 vm_pageout_inactive_dirty++;
1962
1963 inactive_burst_count = 0;
1964
1965 done_with_inactivepage:
1966 if (delayed_unlock++ > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT || try_failed == TRUE) {
1967
1968 if (object != NULL) {
1969 vm_object_unlock(object);
1970 object = NULL;
1971 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1972 }
1973 if (local_freeq) {
1974 vm_page_free_list(local_freeq);
1975
1976 local_freeq = NULL;
1977 local_freed = 0;
1978 }
1979 mutex_yield(&vm_page_queue_lock);
1980
1981 delayed_unlock = 1;
1982 }
1983 /*
1984 * back to top of pageout scan loop
1985 */
1986 }
1987 }
1988
1989
1990 int vm_page_free_count_init;
1991
1992 void
1993 vm_page_free_reserve(
1994 int pages)
1995 {
1996 int free_after_reserve;
1997
1998 vm_page_free_reserved += pages;
1999
2000 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
2001
2002 vm_page_free_min = vm_page_free_reserved +
2003 VM_PAGE_FREE_MIN(free_after_reserve);
2004
2005 if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
2006 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
2007
2008 vm_page_free_target = vm_page_free_reserved +
2009 VM_PAGE_FREE_TARGET(free_after_reserve);
2010
2011 if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
2012 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
2013
2014 if (vm_page_free_target < vm_page_free_min + 5)
2015 vm_page_free_target = vm_page_free_min + 5;
2016
2017 }
2018
2019 /*
2020 * vm_pageout is the high level pageout daemon.
2021 */
2022
2023 void
2024 vm_pageout_continue(void)
2025 {
2026 DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
2027 vm_pageout_scan_event_counter++;
2028 vm_pageout_scan();
2029 /* we hold vm_page_queue_free_lock now */
2030 assert(vm_page_free_wanted == 0);
2031 assert(vm_page_free_wanted_privileged == 0);
2032 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
2033 mutex_unlock(&vm_page_queue_free_lock);
2034
2035 counter(c_vm_pageout_block++);
2036 thread_block((thread_continue_t)vm_pageout_continue);
2037 /*NOTREACHED*/
2038 }
2039
2040
2041 /*
2042 * must be called with the
2043 * queues and object locks held
2044 */
2045 static void
2046 vm_pageout_queue_steal(vm_page_t m)
2047 {
2048 struct vm_pageout_queue *q;
2049
2050 if (m->object->internal == TRUE)
2051 q = &vm_pageout_queue_internal;
2052 else
2053 q = &vm_pageout_queue_external;
2054
2055 m->laundry = FALSE;
2056 m->pageout_queue = FALSE;
2057 queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
2058
2059 m->pageq.next = NULL;
2060 m->pageq.prev = NULL;
2061
2062 vm_object_paging_end(m->object);
2063
2064 q->pgo_laundry--;
2065 }
2066
2067
2068 #ifdef FAKE_DEADLOCK
2069
2070 #define FAKE_COUNT 5000
2071
2072 int internal_count = 0;
2073 int fake_deadlock = 0;
2074
2075 #endif
2076
2077 static void
2078 vm_pageout_iothread_continue(struct vm_pageout_queue *q)
2079 {
2080 vm_page_t m = NULL;
2081 vm_object_t object;
2082 boolean_t need_wakeup;
2083 memory_object_t pager;
2084 thread_t self = current_thread();
2085
2086 if ((vm_pageout_internal_iothread != THREAD_NULL)
2087 && (self == vm_pageout_external_iothread )
2088 && (self->options & TH_OPT_VMPRIV))
2089 self->options &= ~TH_OPT_VMPRIV;
2090
2091 vm_page_lockspin_queues();
2092
2093 while ( !queue_empty(&q->pgo_pending) ) {
2094
2095 q->pgo_busy = TRUE;
2096 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
2097 m->pageout_queue = FALSE;
2098 vm_page_unlock_queues();
2099
2100 m->pageq.next = NULL;
2101 m->pageq.prev = NULL;
2102 #ifdef FAKE_DEADLOCK
2103 if (q == &vm_pageout_queue_internal) {
2104 vm_offset_t addr;
2105 int pg_count;
2106
2107 internal_count++;
2108
2109 if ((internal_count == FAKE_COUNT)) {
2110
2111 pg_count = vm_page_free_count + vm_page_free_reserved;
2112
2113 if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) {
2114 kmem_free(kernel_map, addr, PAGE_SIZE * pg_count);
2115 }
2116 internal_count = 0;
2117 fake_deadlock++;
2118 }
2119 }
2120 #endif
2121 object = m->object;
2122
2123 vm_object_lock(object);
2124
2125 if (!object->pager_initialized) {
2126
2127 /*
2128 * If there is no memory object for the page, create
2129 * one and hand it to the default pager.
2130 */
2131
2132 if (!object->pager_initialized)
2133 vm_object_collapse(object,
2134 (vm_object_offset_t) 0,
2135 TRUE);
2136 if (!object->pager_initialized)
2137 vm_object_pager_create(object);
2138 if (!object->pager_initialized) {
2139 /*
2140 * Still no pager for the object.
2141 * Reactivate the page.
2142 *
2143 * Should only happen if there is no
2144 * default pager.
2145 */
2146 m->list_req_pending = FALSE;
2147 m->cleaning = FALSE;
2148 m->pageout = FALSE;
2149
2150 vm_page_lockspin_queues();
2151 vm_page_unwire(m);
2152 vm_pageout_throttle_up(m);
2153 vm_pageout_dirty_no_pager++;
2154 vm_page_activate(m);
2155 vm_page_unlock_queues();
2156
2157 /*
2158 * And we are done with it.
2159 */
2160 PAGE_WAKEUP_DONE(m);
2161
2162 vm_object_paging_end(object);
2163 vm_object_unlock(object);
2164
2165 vm_page_lockspin_queues();
2166 continue;
2167 }
2168 }
2169 pager = object->pager;
2170 if (pager == MEMORY_OBJECT_NULL) {
2171 /*
2172 * This pager has been destroyed by either
2173 * memory_object_destroy or vm_object_destroy, and
2174 * so there is nowhere for the page to go.
2175 * Just free the page... VM_PAGE_FREE takes
2176 * care of cleaning up all the state...
2177 * including doing the vm_pageout_throttle_up
2178 */
2179
2180 VM_PAGE_FREE(m);
2181
2182 vm_object_paging_end(object);
2183 vm_object_unlock(object);
2184
2185 vm_page_lockspin_queues();
2186 continue;
2187 }
2188 vm_object_unlock(object);
2189 /*
2190 * we expect the paging_in_progress reference to have
2191 * already been taken on the object before it was added
2192 * to the appropriate pageout I/O queue... this will
2193 * keep the object from being terminated and/or the
2194 * paging_offset from changing until the I/O has
2195 * completed... therefore no need to lock the object to
2196 * pull the paging_offset from it.
2197 *
2198 * Send the data to the pager.
2199 * any pageout clustering happens there
2200 */
2201 memory_object_data_return(pager,
2202 m->offset + object->paging_offset,
2203 PAGE_SIZE,
2204 NULL,
2205 NULL,
2206 FALSE,
2207 FALSE,
2208 0);
2209
2210 vm_object_lock(object);
2211 vm_object_paging_end(object);
2212 vm_object_unlock(object);
2213
2214 vm_page_lockspin_queues();
2215 }
2216 assert_wait((event_t) q, THREAD_UNINT);
2217
2218
2219 if (q->pgo_throttled == TRUE && !VM_PAGE_Q_THROTTLED(q)) {
2220 q->pgo_throttled = FALSE;
2221 need_wakeup = TRUE;
2222 } else
2223 need_wakeup = FALSE;
2224
2225 q->pgo_busy = FALSE;
2226 q->pgo_idle = TRUE;
2227 vm_page_unlock_queues();
2228
2229 if (need_wakeup == TRUE)
2230 thread_wakeup((event_t) &q->pgo_laundry);
2231
2232 thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) &q->pgo_pending);
2233 /*NOTREACHED*/
2234 }
2235
2236
2237 static void
2238 vm_pageout_iothread_external(void)
2239 {
2240 thread_t self = current_thread();
2241
2242 self->options |= TH_OPT_VMPRIV;
2243
2244 vm_pageout_iothread_continue(&vm_pageout_queue_external);
2245 /*NOTREACHED*/
2246 }
2247
2248
2249 static void
2250 vm_pageout_iothread_internal(void)
2251 {
2252 thread_t self = current_thread();
2253
2254 self->options |= TH_OPT_VMPRIV;
2255
2256 vm_pageout_iothread_continue(&vm_pageout_queue_internal);
2257 /*NOTREACHED*/
2258 }
2259
2260 static void
2261 vm_pageout_garbage_collect(int collect)
2262 {
2263 if (collect) {
2264 stack_collect();
2265
2266 /*
2267 * consider_zone_gc should be last, because the other operations
2268 * might return memory to zones.
2269 */
2270 consider_machine_collect();
2271 consider_zone_gc();
2272
2273 consider_machine_adjust();
2274 }
2275
2276 assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
2277
2278 thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
2279 /*NOTREACHED*/
2280 }
2281
2282
2283
2284 void
2285 vm_pageout(void)
2286 {
2287 thread_t self = current_thread();
2288 thread_t thread;
2289 kern_return_t result;
2290 spl_t s;
2291
2292 /*
2293 * Set thread privileges.
2294 */
2295 s = splsched();
2296 thread_lock(self);
2297 self->priority = BASEPRI_PREEMPT - 1;
2298 set_sched_pri(self, self->priority);
2299 thread_unlock(self);
2300
2301 if (!self->reserved_stack)
2302 self->reserved_stack = self->kernel_stack;
2303
2304 splx(s);
2305
2306 /*
2307 * Initialize some paging parameters.
2308 */
2309
2310 if (vm_pageout_idle_wait == 0)
2311 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
2312
2313 if (vm_pageout_burst_wait == 0)
2314 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
2315
2316 if (vm_pageout_empty_wait == 0)
2317 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
2318
2319 if (vm_pageout_deadlock_wait == 0)
2320 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
2321
2322 if (vm_pageout_deadlock_relief == 0)
2323 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
2324
2325 if (vm_pageout_inactive_relief == 0)
2326 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
2327
2328 if (vm_pageout_burst_active_throttle == 0)
2329 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
2330
2331 if (vm_pageout_burst_inactive_throttle == 0)
2332 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
2333
2334 /*
2335 * Set kernel task to low backing store privileged
2336 * status
2337 */
2338 task_lock(kernel_task);
2339 kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
2340 task_unlock(kernel_task);
2341
2342 vm_page_free_count_init = vm_page_free_count;
2343
2344 /*
2345 * even if we've already called vm_page_free_reserve
2346 * call it again here to insure that the targets are
2347 * accurately calculated (it uses vm_page_free_count_init)
2348 * calling it with an arg of 0 will not change the reserve
2349 * but will re-calculate free_min and free_target
2350 */
2351 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
2352 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
2353 } else
2354 vm_page_free_reserve(0);
2355
2356
2357 queue_init(&vm_pageout_queue_external.pgo_pending);
2358 vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2359 vm_pageout_queue_external.pgo_laundry = 0;
2360 vm_pageout_queue_external.pgo_idle = FALSE;
2361 vm_pageout_queue_external.pgo_busy = FALSE;
2362 vm_pageout_queue_external.pgo_throttled = FALSE;
2363
2364 queue_init(&vm_pageout_queue_internal.pgo_pending);
2365 vm_pageout_queue_internal.pgo_maxlaundry = 0;
2366 vm_pageout_queue_internal.pgo_laundry = 0;
2367 vm_pageout_queue_internal.pgo_idle = FALSE;
2368 vm_pageout_queue_internal.pgo_busy = FALSE;
2369 vm_pageout_queue_internal.pgo_throttled = FALSE;
2370
2371
2372 /* internal pageout thread started when default pager registered first time */
2373 /* external pageout and garbage collection threads started here */
2374
2375 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
2376 BASEPRI_PREEMPT - 1,
2377 &vm_pageout_external_iothread);
2378 if (result != KERN_SUCCESS)
2379 panic("vm_pageout_iothread_external: create failed");
2380
2381 thread_deallocate(vm_pageout_external_iothread);
2382
2383 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
2384 MINPRI_KERNEL,
2385 &thread);
2386 if (result != KERN_SUCCESS)
2387 panic("vm_pageout_garbage_collect: create failed");
2388
2389 thread_deallocate(thread);
2390
2391 vm_object_reaper_init();
2392
2393
2394 vm_pageout_continue();
2395
2396 /*
2397 * Unreached code!
2398 *
2399 * The vm_pageout_continue() call above never returns, so the code below is never
2400 * executed. We take advantage of this to declare several DTrace VM related probe
2401 * points that our kernel doesn't have an analog for. These are probe points that
2402 * exist in Solaris and are in the DTrace documentation, so people may have written
2403 * scripts that use them. Declaring the probe points here means their scripts will
2404 * compile and execute which we want for portability of the scripts, but since this
2405 * section of code is never reached, the probe points will simply never fire. Yes,
2406 * this is basically a hack. The problem is the DTrace probe points were chosen with
2407 * Solaris specific VM events in mind, not portability to different VM implementations.
2408 */
2409
2410 DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
2411 DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
2412 DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
2413 DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
2414 DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
2415 DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
2416 DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
2417 /*NOTREACHED*/
2418 }
2419
2420 kern_return_t
2421 vm_pageout_internal_start(void)
2422 {
2423 kern_return_t result;
2424
2425 vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2426 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, NULL, BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread);
2427 if (result == KERN_SUCCESS)
2428 thread_deallocate(vm_pageout_internal_iothread);
2429 return result;
2430 }
2431
2432 #define UPL_DELAYED_UNLOCK_LIMIT (MAX_UPL_TRANSFER / 2)
2433
2434 static upl_t
2435 upl_create(int type, int flags, upl_size_t size)
2436 {
2437 upl_t upl;
2438 int page_field_size = 0;
2439 int upl_flags = 0;
2440 int upl_size = sizeof(struct upl);
2441
2442 if (type & UPL_CREATE_LITE) {
2443 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2444 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2445
2446 upl_flags |= UPL_LITE;
2447 }
2448 if (type & UPL_CREATE_INTERNAL) {
2449 upl_size += sizeof(struct upl_page_info) * (size/PAGE_SIZE);
2450
2451 upl_flags |= UPL_INTERNAL;
2452 }
2453 upl = (upl_t)kalloc(upl_size + page_field_size);
2454
2455 if (page_field_size)
2456 bzero((char *)upl + upl_size, page_field_size);
2457
2458 upl->flags = upl_flags | flags;
2459 upl->src_object = NULL;
2460 upl->kaddr = (vm_offset_t)0;
2461 upl->size = 0;
2462 upl->map_object = NULL;
2463 upl->ref_count = 1;
2464 upl->highest_page = 0;
2465 upl_lock_init(upl);
2466 #ifdef UPL_DEBUG
2467 upl->ubc_alias1 = 0;
2468 upl->ubc_alias2 = 0;
2469 #endif /* UPL_DEBUG */
2470 return(upl);
2471 }
2472
2473 static void
2474 upl_destroy(upl_t upl)
2475 {
2476 int page_field_size; /* bit field in word size buf */
2477 int size;
2478
2479 #ifdef UPL_DEBUG
2480 {
2481 vm_object_t object;
2482
2483 if (upl->flags & UPL_SHADOWED) {
2484 object = upl->map_object->shadow;
2485 } else {
2486 object = upl->map_object;
2487 }
2488 vm_object_lock(object);
2489 queue_remove(&object->uplq, upl, upl_t, uplq);
2490 vm_object_unlock(object);
2491 }
2492 #endif /* UPL_DEBUG */
2493 /*
2494 * drop a reference on the map_object whether or
2495 * not a pageout object is inserted
2496 */
2497 if (upl->flags & UPL_SHADOWED)
2498 vm_object_deallocate(upl->map_object);
2499
2500 if (upl->flags & UPL_DEVICE_MEMORY)
2501 size = PAGE_SIZE;
2502 else
2503 size = upl->size;
2504 page_field_size = 0;
2505
2506 if (upl->flags & UPL_LITE) {
2507 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2508 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2509 }
2510 if (upl->flags & UPL_INTERNAL) {
2511 kfree(upl,
2512 sizeof(struct upl) +
2513 (sizeof(struct upl_page_info) * (size/PAGE_SIZE))
2514 + page_field_size);
2515 } else {
2516 kfree(upl, sizeof(struct upl) + page_field_size);
2517 }
2518 }
2519
2520 void uc_upl_dealloc(upl_t upl);
2521 __private_extern__ void
2522 uc_upl_dealloc(upl_t upl)
2523 {
2524 if (--upl->ref_count == 0)
2525 upl_destroy(upl);
2526 }
2527
2528 void
2529 upl_deallocate(upl_t upl)
2530 {
2531 if (--upl->ref_count == 0)
2532 upl_destroy(upl);
2533 }
2534
2535 /*
2536 * Statistics about UPL enforcement of copy-on-write obligations.
2537 */
2538 unsigned long upl_cow = 0;
2539 unsigned long upl_cow_again = 0;
2540 unsigned long upl_cow_contiguous = 0;
2541 unsigned long upl_cow_pages = 0;
2542 unsigned long upl_cow_again_pages = 0;
2543 unsigned long upl_cow_contiguous_pages = 0;
2544
2545 /*
2546 * Routine: vm_object_upl_request
2547 * Purpose:
2548 * Cause the population of a portion of a vm_object.
2549 * Depending on the nature of the request, the pages
2550 * returned may be contain valid data or be uninitialized.
2551 * A page list structure, listing the physical pages
2552 * will be returned upon request.
2553 * This function is called by the file system or any other
2554 * supplier of backing store to a pager.
2555 * IMPORTANT NOTE: The caller must still respect the relationship
2556 * between the vm_object and its backing memory object. The
2557 * caller MUST NOT substitute changes in the backing file
2558 * without first doing a memory_object_lock_request on the
2559 * target range unless it is know that the pages are not
2560 * shared with another entity at the pager level.
2561 * Copy_in_to:
2562 * if a page list structure is present
2563 * return the mapped physical pages, where a
2564 * page is not present, return a non-initialized
2565 * one. If the no_sync bit is turned on, don't
2566 * call the pager unlock to synchronize with other
2567 * possible copies of the page. Leave pages busy
2568 * in the original object, if a page list structure
2569 * was specified. When a commit of the page list
2570 * pages is done, the dirty bit will be set for each one.
2571 * Copy_out_from:
2572 * If a page list structure is present, return
2573 * all mapped pages. Where a page does not exist
2574 * map a zero filled one. Leave pages busy in
2575 * the original object. If a page list structure
2576 * is not specified, this call is a no-op.
2577 *
2578 * Note: access of default pager objects has a rather interesting
2579 * twist. The caller of this routine, presumably the file system
2580 * page cache handling code, will never actually make a request
2581 * against a default pager backed object. Only the default
2582 * pager will make requests on backing store related vm_objects
2583 * In this way the default pager can maintain the relationship
2584 * between backing store files (abstract memory objects) and
2585 * the vm_objects (cache objects), they support.
2586 *
2587 */
2588
2589 __private_extern__ kern_return_t
2590 vm_object_upl_request(
2591 vm_object_t object,
2592 vm_object_offset_t offset,
2593 upl_size_t size,
2594 upl_t *upl_ptr,
2595 upl_page_info_array_t user_page_list,
2596 unsigned int *page_list_count,
2597 int cntrl_flags)
2598 {
2599 vm_page_t dst_page = VM_PAGE_NULL;
2600 vm_object_offset_t dst_offset;
2601 upl_size_t xfer_size;
2602 boolean_t dirty;
2603 boolean_t hw_dirty;
2604 upl_t upl = NULL;
2605 unsigned int entry;
2606 #if MACH_CLUSTER_STATS
2607 boolean_t encountered_lrp = FALSE;
2608 #endif
2609 vm_page_t alias_page = NULL;
2610 int refmod_state = 0;
2611 wpl_array_t lite_list = NULL;
2612 vm_object_t last_copy_object;
2613 int delayed_unlock = 0;
2614 int j;
2615
2616 if (cntrl_flags & ~UPL_VALID_FLAGS) {
2617 /*
2618 * For forward compatibility's sake,
2619 * reject any unknown flag.
2620 */
2621 return KERN_INVALID_VALUE;
2622 }
2623 if ( (!object->internal) && (object->paging_offset != 0) )
2624 panic("vm_object_upl_request: external object with non-zero paging offset\n");
2625 if (object->phys_contiguous)
2626 panic("vm_object_upl_request: contiguous object specified\n");
2627
2628
2629 if ((size / PAGE_SIZE) > MAX_UPL_SIZE)
2630 size = MAX_UPL_SIZE * PAGE_SIZE;
2631
2632 if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
2633 *page_list_count = MAX_UPL_SIZE;
2634
2635 if (cntrl_flags & UPL_SET_INTERNAL) {
2636 if (cntrl_flags & UPL_SET_LITE) {
2637
2638 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, 0, size);
2639
2640 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
2641 lite_list = (wpl_array_t)
2642 (((uintptr_t)user_page_list) +
2643 ((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
2644 } else {
2645 upl = upl_create(UPL_CREATE_INTERNAL, 0, size);
2646
2647 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
2648 }
2649 } else {
2650 if (cntrl_flags & UPL_SET_LITE) {
2651
2652 upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE, 0, size);
2653
2654 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
2655 } else {
2656 upl = upl_create(UPL_CREATE_EXTERNAL, 0, size);
2657 }
2658 }
2659 *upl_ptr = upl;
2660
2661 if (user_page_list)
2662 user_page_list[0].device = FALSE;
2663
2664 if (cntrl_flags & UPL_SET_LITE) {
2665 upl->map_object = object;
2666 } else {
2667 upl->map_object = vm_object_allocate(size);
2668 /*
2669 * No neeed to lock the new object: nobody else knows
2670 * about it yet, so it's all ours so far.
2671 */
2672 upl->map_object->shadow = object;
2673 upl->map_object->pageout = TRUE;
2674 upl->map_object->can_persist = FALSE;
2675 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2676 upl->map_object->shadow_offset = offset;
2677 upl->map_object->wimg_bits = object->wimg_bits;
2678
2679 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2680
2681 upl->flags |= UPL_SHADOWED;
2682 }
2683 /*
2684 * ENCRYPTED SWAP:
2685 * Just mark the UPL as "encrypted" here.
2686 * We'll actually encrypt the pages later,
2687 * in upl_encrypt(), when the caller has
2688 * selected which pages need to go to swap.
2689 */
2690 if (cntrl_flags & UPL_ENCRYPT)
2691 upl->flags |= UPL_ENCRYPTED;
2692
2693 if (cntrl_flags & UPL_FOR_PAGEOUT)
2694 upl->flags |= UPL_PAGEOUT;
2695
2696 vm_object_lock(object);
2697 vm_object_paging_begin(object);
2698
2699 /*
2700 * we can lock in the paging_offset once paging_in_progress is set
2701 */
2702 upl->size = size;
2703 upl->offset = offset + object->paging_offset;
2704
2705 #ifdef UPL_DEBUG
2706 queue_enter(&object->uplq, upl, upl_t, uplq);
2707 #endif /* UPL_DEBUG */
2708
2709 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
2710 /*
2711 * Honor copy-on-write obligations
2712 *
2713 * The caller is gathering these pages and
2714 * might modify their contents. We need to
2715 * make sure that the copy object has its own
2716 * private copies of these pages before we let
2717 * the caller modify them.
2718 */
2719 vm_object_update(object,
2720 offset,
2721 size,
2722 NULL,
2723 NULL,
2724 FALSE, /* should_return */
2725 MEMORY_OBJECT_COPY_SYNC,
2726 VM_PROT_NO_CHANGE);
2727 upl_cow++;
2728 upl_cow_pages += size >> PAGE_SHIFT;
2729 }
2730 /*
2731 * remember which copy object we synchronized with
2732 */
2733 last_copy_object = object->copy;
2734 entry = 0;
2735
2736 xfer_size = size;
2737 dst_offset = offset;
2738
2739 while (xfer_size) {
2740
2741 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
2742 if (delayed_unlock) {
2743 delayed_unlock = 0;
2744 vm_page_unlock_queues();
2745 }
2746 vm_object_unlock(object);
2747 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2748 goto relock;
2749 }
2750 if (delayed_unlock == 0) {
2751 /*
2752 * pageout_scan takes the vm_page_lock_queues first
2753 * then tries for the object lock... to avoid what
2754 * is effectively a lock inversion, we'll go to the
2755 * trouble of taking them in that same order... otherwise
2756 * if this object contains the majority of the pages resident
2757 * in the UBC (or a small set of large objects actively being
2758 * worked on contain the majority of the pages), we could
2759 * cause the pageout_scan thread to 'starve' in its attempt
2760 * to find pages to move to the free queue, since it has to
2761 * successfully acquire the object lock of any candidate page
2762 * before it can steal/clean it.
2763 */
2764 vm_object_unlock(object);
2765 relock:
2766 for (j = 0; ; j++) {
2767 vm_page_lock_queues();
2768
2769 if (vm_object_lock_try(object))
2770 break;
2771 vm_page_unlock_queues();
2772 mutex_pause(j);
2773 }
2774 delayed_unlock = 1;
2775 }
2776 if (cntrl_flags & UPL_COPYOUT_FROM) {
2777 upl->flags |= UPL_PAGE_SYNC_DONE;
2778
2779 if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
2780 dst_page->fictitious ||
2781 dst_page->absent ||
2782 dst_page->error ||
2783 (dst_page->wire_count && !dst_page->pageout && !dst_page->list_req_pending)) {
2784
2785 if (user_page_list)
2786 user_page_list[entry].phys_addr = 0;
2787
2788 goto delay_unlock_queues;
2789 }
2790 /*
2791 * grab this up front...
2792 * a high percentange of the time we're going to
2793 * need the hardware modification state a bit later
2794 * anyway... so we can eliminate an extra call into
2795 * the pmap layer by grabbing it here and recording it
2796 */
2797 if (dst_page->pmapped)
2798 refmod_state = pmap_get_refmod(dst_page->phys_page);
2799 else
2800 refmod_state = 0;
2801
2802 if ( (refmod_state & VM_MEM_REFERENCED) && dst_page->inactive ) {
2803 /*
2804 * page is on inactive list and referenced...
2805 * reactivate it now... this gets it out of the
2806 * way of vm_pageout_scan which would have to
2807 * reactivate it upon tripping over it
2808 */
2809 vm_page_activate(dst_page);
2810 VM_STAT_INCR(reactivations);
2811 }
2812 if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
2813 /*
2814 * we're only asking for DIRTY pages to be returned
2815 */
2816 if (dst_page->list_req_pending || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
2817 /*
2818 * if we were the page stolen by vm_pageout_scan to be
2819 * cleaned (as opposed to a buddy being clustered in
2820 * or this request is not being driven by a PAGEOUT cluster
2821 * then we only need to check for the page being dirty or
2822 * precious to decide whether to return it
2823 */
2824 if (dst_page->dirty || dst_page->precious || (refmod_state & VM_MEM_MODIFIED))
2825 goto check_busy;
2826 goto dont_return;
2827 }
2828 /*
2829 * this is a request for a PAGEOUT cluster and this page
2830 * is merely along for the ride as a 'buddy'... not only
2831 * does it have to be dirty to be returned, but it also
2832 * can't have been referenced recently... note that we've
2833 * already filtered above based on whether this page is
2834 * currently on the inactive queue or it meets the page
2835 * ticket (generation count) check
2836 */
2837 if ( !(refmod_state & VM_MEM_REFERENCED) &&
2838 ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) {
2839 goto check_busy;
2840 }
2841 dont_return:
2842 /*
2843 * if we reach here, we're not to return
2844 * the page... go on to the next one
2845 */
2846 if (user_page_list)
2847 user_page_list[entry].phys_addr = 0;
2848
2849 goto delay_unlock_queues;
2850 }
2851 check_busy:
2852 if (dst_page->busy && (!(dst_page->list_req_pending && dst_page->pageout))) {
2853 if (cntrl_flags & UPL_NOBLOCK) {
2854 if (user_page_list)
2855 user_page_list[entry].phys_addr = 0;
2856
2857 goto delay_unlock_queues;
2858 }
2859 /*
2860 * someone else is playing with the
2861 * page. We will have to wait.
2862 */
2863 delayed_unlock = 0;
2864 vm_page_unlock_queues();
2865
2866 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
2867
2868 continue;
2869 }
2870 /*
2871 * Someone else already cleaning the page?
2872 */
2873 if ((dst_page->cleaning || dst_page->absent || dst_page->wire_count != 0) && !dst_page->list_req_pending) {
2874 if (user_page_list)
2875 user_page_list[entry].phys_addr = 0;
2876
2877 goto delay_unlock_queues;
2878 }
2879 /*
2880 * ENCRYPTED SWAP:
2881 * The caller is gathering this page and might
2882 * access its contents later on. Decrypt the
2883 * page before adding it to the UPL, so that
2884 * the caller never sees encrypted data.
2885 */
2886 if (! (cntrl_flags & UPL_ENCRYPT) && dst_page->encrypted) {
2887 int was_busy;
2888
2889 delayed_unlock = 0;
2890 vm_page_unlock_queues();
2891 /*
2892 * save the current state of busy
2893 * mark page as busy while decrypt
2894 * is in progress since it will drop
2895 * the object lock...
2896 */
2897 was_busy = dst_page->busy;
2898 dst_page->busy = TRUE;
2899
2900 vm_page_decrypt(dst_page, 0);
2901 vm_page_decrypt_for_upl_counter++;
2902 /*
2903 * restore to original busy state
2904 */
2905 dst_page->busy = was_busy;
2906
2907 vm_page_lock_queues();
2908 delayed_unlock = 1;
2909 }
2910 if (dst_page->pageout_queue == TRUE)
2911 /*
2912 * we've buddied up a page for a clustered pageout
2913 * that has already been moved to the pageout
2914 * queue by pageout_scan... we need to remove
2915 * it from the queue and drop the laundry count
2916 * on that queue
2917 */
2918 vm_pageout_queue_steal(dst_page);
2919 #if MACH_CLUSTER_STATS
2920 /*
2921 * pageout statistics gathering. count
2922 * all the pages we will page out that
2923 * were not counted in the initial
2924 * vm_pageout_scan work
2925 */
2926 if (dst_page->list_req_pending)
2927 encountered_lrp = TRUE;
2928 if ((dst_page->dirty || (dst_page->object->internal && dst_page->precious)) && !dst_page->list_req_pending) {
2929 if (encountered_lrp)
2930 CLUSTER_STAT(pages_at_higher_offsets++;)
2931 else
2932 CLUSTER_STAT(pages_at_lower_offsets++;)
2933 }
2934 #endif
2935 /*
2936 * Turn off busy indication on pending
2937 * pageout. Note: we can only get here
2938 * in the request pending case.
2939 */
2940 dst_page->list_req_pending = FALSE;
2941 dst_page->busy = FALSE;
2942
2943 hw_dirty = refmod_state & VM_MEM_MODIFIED;
2944 dirty = hw_dirty ? TRUE : dst_page->dirty;
2945
2946 if (dst_page->phys_page > upl->highest_page)
2947 upl->highest_page = dst_page->phys_page;
2948
2949 if (cntrl_flags & UPL_SET_LITE) {
2950 int pg_num;
2951
2952 pg_num = (dst_offset-offset)/PAGE_SIZE;
2953 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
2954
2955 if (hw_dirty)
2956 pmap_clear_modify(dst_page->phys_page);
2957
2958 /*
2959 * Mark original page as cleaning
2960 * in place.
2961 */
2962 dst_page->cleaning = TRUE;
2963 dst_page->precious = FALSE;
2964 } else {
2965 /*
2966 * use pageclean setup, it is more
2967 * convenient even for the pageout
2968 * cases here
2969 */
2970 vm_object_lock(upl->map_object);
2971 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
2972 vm_object_unlock(upl->map_object);
2973
2974 alias_page->absent = FALSE;
2975 alias_page = NULL;
2976 }
2977 #if MACH_PAGEMAP
2978 /*
2979 * Record that this page has been
2980 * written out
2981 */
2982 vm_external_state_set(object->existence_map, dst_page->offset);
2983 #endif /*MACH_PAGEMAP*/
2984 dst_page->dirty = dirty;
2985
2986 if (!dirty)
2987 dst_page->precious = TRUE;
2988
2989 if (dst_page->pageout)
2990 dst_page->busy = TRUE;
2991
2992 if ( (cntrl_flags & UPL_ENCRYPT) ) {
2993 /*
2994 * ENCRYPTED SWAP:
2995 * We want to deny access to the target page
2996 * because its contents are about to be
2997 * encrypted and the user would be very
2998 * confused to see encrypted data instead
2999 * of their data.
3000 * We also set "encrypted_cleaning" to allow
3001 * vm_pageout_scan() to demote that page
3002 * from "adjacent/clean-in-place" to
3003 * "target/clean-and-free" if it bumps into
3004 * this page during its scanning while we're
3005 * still processing this cluster.
3006 */
3007 dst_page->busy = TRUE;
3008 dst_page->encrypted_cleaning = TRUE;
3009 }
3010 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
3011 /*
3012 * deny access to the target page
3013 * while it is being worked on
3014 */
3015 if ((!dst_page->pageout) && (dst_page->wire_count == 0)) {
3016 dst_page->busy = TRUE;
3017 dst_page->pageout = TRUE;
3018 vm_page_wire(dst_page);
3019 }
3020 }
3021 } else {
3022 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
3023 /*
3024 * Honor copy-on-write obligations
3025 *
3026 * The copy object has changed since we
3027 * last synchronized for copy-on-write.
3028 * Another copy object might have been
3029 * inserted while we released the object's
3030 * lock. Since someone could have seen the
3031 * original contents of the remaining pages
3032 * through that new object, we have to
3033 * synchronize with it again for the remaining
3034 * pages only. The previous pages are "busy"
3035 * so they can not be seen through the new
3036 * mapping. The new mapping will see our
3037 * upcoming changes for those previous pages,
3038 * but that's OK since they couldn't see what
3039 * was there before. It's just a race anyway
3040 * and there's no guarantee of consistency or
3041 * atomicity. We just don't want new mappings
3042 * to see both the *before* and *after* pages.
3043 */
3044 if (object->copy != VM_OBJECT_NULL) {
3045 delayed_unlock = 0;
3046 vm_page_unlock_queues();
3047
3048 vm_object_update(
3049 object,
3050 dst_offset,/* current offset */
3051 xfer_size, /* remaining size */
3052 NULL,
3053 NULL,
3054 FALSE, /* should_return */
3055 MEMORY_OBJECT_COPY_SYNC,
3056 VM_PROT_NO_CHANGE);
3057
3058 upl_cow_again++;
3059 upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
3060
3061 vm_page_lock_queues();
3062 delayed_unlock = 1;
3063 }
3064 /*
3065 * remember the copy object we synced with
3066 */
3067 last_copy_object = object->copy;
3068 }
3069 dst_page = vm_page_lookup(object, dst_offset);
3070
3071 if (dst_page != VM_PAGE_NULL) {
3072 if ( !(dst_page->list_req_pending) ) {
3073 if ((cntrl_flags & UPL_RET_ONLY_ABSENT) && !dst_page->absent) {
3074 /*
3075 * skip over pages already present in the cache
3076 */
3077 if (user_page_list)
3078 user_page_list[entry].phys_addr = 0;
3079
3080 goto delay_unlock_queues;
3081 }
3082 if (dst_page->cleaning) {
3083 /*
3084 * someone else is writing to the page... wait...
3085 */
3086 delayed_unlock = 0;
3087 vm_page_unlock_queues();
3088
3089 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
3090
3091 continue;
3092 }
3093 } else {
3094 if (dst_page->fictitious &&
3095 dst_page->phys_page == vm_page_fictitious_addr) {
3096 assert( !dst_page->speculative);
3097 /*
3098 * dump the fictitious page
3099 */
3100 dst_page->list_req_pending = FALSE;
3101
3102 vm_page_free(dst_page);
3103
3104 dst_page = NULL;
3105 } else if (dst_page->absent) {
3106 /*
3107 * the default_pager case
3108 */
3109 dst_page->list_req_pending = FALSE;
3110 dst_page->busy = FALSE;
3111 }
3112 }
3113 }
3114 if (dst_page == VM_PAGE_NULL) {
3115 if (object->private) {
3116 /*
3117 * This is a nasty wrinkle for users
3118 * of upl who encounter device or
3119 * private memory however, it is
3120 * unavoidable, only a fault can
3121 * resolve the actual backing
3122 * physical page by asking the
3123 * backing device.
3124 */
3125 if (user_page_list)
3126 user_page_list[entry].phys_addr = 0;
3127
3128 goto delay_unlock_queues;
3129 }
3130 /*
3131 * need to allocate a page
3132 */
3133 dst_page = vm_page_grab();
3134
3135 if (dst_page == VM_PAGE_NULL) {
3136 if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
3137 /*
3138 * we don't want to stall waiting for pages to come onto the free list
3139 * while we're already holding absent pages in this UPL
3140 * the caller will deal with the empty slots
3141 */
3142 if (user_page_list)
3143 user_page_list[entry].phys_addr = 0;
3144
3145 goto try_next_page;
3146 }
3147 /*
3148 * no pages available... wait
3149 * then try again for the same
3150 * offset...
3151 */
3152 delayed_unlock = 0;
3153 vm_page_unlock_queues();
3154
3155 vm_object_unlock(object);
3156 VM_PAGE_WAIT();
3157
3158 /*
3159 * pageout_scan takes the vm_page_lock_queues first
3160 * then tries for the object lock... to avoid what
3161 * is effectively a lock inversion, we'll go to the
3162 * trouble of taking them in that same order... otherwise
3163 * if this object contains the majority of the pages resident
3164 * in the UBC (or a small set of large objects actively being
3165 * worked on contain the majority of the pages), we could
3166 * cause the pageout_scan thread to 'starve' in its attempt
3167 * to find pages to move to the free queue, since it has to
3168 * successfully acquire the object lock of any candidate page
3169 * before it can steal/clean it.
3170 */
3171 for (j = 0; ; j++) {
3172 vm_page_lock_queues();
3173
3174 if (vm_object_lock_try(object))
3175 break;
3176 vm_page_unlock_queues();
3177 mutex_pause(j);
3178 }
3179 delayed_unlock = 1;
3180
3181 continue;
3182 }
3183 vm_page_insert_internal(dst_page, object, dst_offset, TRUE);
3184
3185 dst_page->absent = TRUE;
3186 dst_page->busy = FALSE;
3187
3188 if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
3189 /*
3190 * if UPL_RET_ONLY_ABSENT was specified,
3191 * than we're definitely setting up a
3192 * upl for a clustered read/pagein
3193 * operation... mark the pages as clustered
3194 * so upl_commit_range can put them on the
3195 * speculative list
3196 */
3197 dst_page->clustered = TRUE;
3198 }
3199 }
3200 /*
3201 * ENCRYPTED SWAP:
3202 */
3203 if (cntrl_flags & UPL_ENCRYPT) {
3204 /*
3205 * The page is going to be encrypted when we
3206 * get it from the pager, so mark it so.
3207 */
3208 dst_page->encrypted = TRUE;
3209 } else {
3210 /*
3211 * Otherwise, the page will not contain
3212 * encrypted data.
3213 */
3214 dst_page->encrypted = FALSE;
3215 }
3216 dst_page->overwriting = TRUE;
3217
3218 if (dst_page->fictitious) {
3219 panic("need corner case for fictitious page");
3220 }
3221 if (dst_page->busy) {
3222 /*
3223 * someone else is playing with the
3224 * page. We will have to wait.
3225 */
3226 delayed_unlock = 0;
3227 vm_page_unlock_queues();
3228
3229 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
3230
3231 continue;
3232 }
3233 if (dst_page->pmapped) {
3234 if ( !(cntrl_flags & UPL_FILE_IO))
3235 /*
3236 * eliminate all mappings from the
3237 * original object and its prodigy
3238 */
3239 refmod_state = pmap_disconnect(dst_page->phys_page);
3240 else
3241 refmod_state = pmap_get_refmod(dst_page->phys_page);
3242 } else
3243 refmod_state = 0;
3244
3245 hw_dirty = refmod_state & VM_MEM_MODIFIED;
3246 dirty = hw_dirty ? TRUE : dst_page->dirty;
3247
3248 if (cntrl_flags & UPL_SET_LITE) {
3249 int pg_num;
3250
3251 pg_num = (dst_offset-offset)/PAGE_SIZE;
3252 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
3253
3254 if (hw_dirty)
3255 pmap_clear_modify(dst_page->phys_page);
3256
3257 /*
3258 * Mark original page as cleaning
3259 * in place.
3260 */
3261 dst_page->cleaning = TRUE;
3262 dst_page->precious = FALSE;
3263 } else {
3264 /*
3265 * use pageclean setup, it is more
3266 * convenient even for the pageout
3267 * cases here
3268 */
3269 vm_object_lock(upl->map_object);
3270 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
3271 vm_object_unlock(upl->map_object);
3272
3273 alias_page->absent = FALSE;
3274 alias_page = NULL;
3275 }
3276
3277 if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
3278 /*
3279 * clean in place for read implies
3280 * that a write will be done on all
3281 * the pages that are dirty before
3282 * a upl commit is done. The caller
3283 * is obligated to preserve the
3284 * contents of all pages marked dirty
3285 */
3286 upl->flags |= UPL_CLEAR_DIRTY;
3287 }
3288 dst_page->dirty = dirty;
3289
3290 if (!dirty)
3291 dst_page->precious = TRUE;
3292
3293 if (dst_page->wire_count == 0) {
3294 /*
3295 * deny access to the target page while
3296 * it is being worked on
3297 */
3298 dst_page->busy = TRUE;
3299 } else
3300 vm_page_wire(dst_page);
3301
3302 if (dst_page->clustered) {
3303 /*
3304 * expect the page not to be used
3305 * since it's coming in as part
3306 * of a speculative cluster...
3307 * pages that are 'consumed' will
3308 * get a hardware reference
3309 */
3310 dst_page->reference = FALSE;
3311 } else {
3312 /*
3313 * expect the page to be used
3314 */
3315 dst_page->reference = TRUE;
3316 }
3317 dst_page->precious = (cntrl_flags & UPL_PRECIOUS) ? TRUE : FALSE;
3318 }
3319 if (dst_page->phys_page > upl->highest_page)
3320 upl->highest_page = dst_page->phys_page;
3321 if (user_page_list) {
3322 user_page_list[entry].phys_addr = dst_page->phys_page;
3323 user_page_list[entry].pageout = dst_page->pageout;
3324 user_page_list[entry].absent = dst_page->absent;
3325 user_page_list[entry].dirty = dst_page->dirty;
3326 user_page_list[entry].precious = dst_page->precious;
3327 user_page_list[entry].device = FALSE;
3328 if (dst_page->clustered == TRUE)
3329 user_page_list[entry].speculative = dst_page->speculative;
3330 else
3331 user_page_list[entry].speculative = FALSE;
3332 user_page_list[entry].cs_validated = dst_page->cs_validated;
3333 user_page_list[entry].cs_tainted = dst_page->cs_tainted;
3334 }
3335 /*
3336 * if UPL_RET_ONLY_ABSENT is set, then
3337 * we are working with a fresh page and we've
3338 * just set the clustered flag on it to
3339 * indicate that it was drug in as part of a
3340 * speculative cluster... so leave it alone
3341 */
3342 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
3343 /*
3344 * someone is explicitly grabbing this page...
3345 * update clustered and speculative state
3346 *
3347 */
3348 VM_PAGE_CONSUME_CLUSTERED(dst_page);
3349 }
3350 delay_unlock_queues:
3351 if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
3352 /*
3353 * pageout_scan takes the vm_page_lock_queues first
3354 * then tries for the object lock... to avoid what
3355 * is effectively a lock inversion, we'll go to the
3356 * trouble of taking them in that same order... otherwise
3357 * if this object contains the majority of the pages resident
3358 * in the UBC (or a small set of large objects actively being
3359 * worked on contain the majority of the pages), we could
3360 * cause the pageout_scan thread to 'starve' in its attempt
3361 * to find pages to move to the free queue, since it has to
3362 * successfully acquire the object lock of any candidate page
3363 * before it can steal/clean it.
3364 */
3365 vm_object_unlock(object);
3366 mutex_yield(&vm_page_queue_lock);
3367
3368 for (j = 0; ; j++) {
3369 if (vm_object_lock_try(object))
3370 break;
3371 vm_page_unlock_queues();
3372 mutex_pause(j);
3373 vm_page_lock_queues();
3374 }
3375 delayed_unlock = 1;
3376 }
3377 try_next_page:
3378 entry++;
3379 dst_offset += PAGE_SIZE_64;
3380 xfer_size -= PAGE_SIZE;
3381 }
3382 if (alias_page != NULL) {
3383 if (delayed_unlock == 0) {
3384 vm_page_lock_queues();
3385 delayed_unlock = 1;
3386 }
3387 vm_page_free(alias_page);
3388 }
3389 if (delayed_unlock)
3390 vm_page_unlock_queues();
3391
3392 if (page_list_count != NULL) {
3393 if (upl->flags & UPL_INTERNAL)
3394 *page_list_count = 0;
3395 else if (*page_list_count > entry)
3396 *page_list_count = entry;
3397 }
3398 vm_object_unlock(object);
3399
3400 return KERN_SUCCESS;
3401 }
3402
3403 /* JMM - Backward compatability for now */
3404 kern_return_t
3405 vm_fault_list_request( /* forward */
3406 memory_object_control_t control,
3407 vm_object_offset_t offset,
3408 upl_size_t size,
3409 upl_t *upl_ptr,
3410 upl_page_info_t **user_page_list_ptr,
3411 unsigned int page_list_count,
3412 int cntrl_flags);
3413 kern_return_t
3414 vm_fault_list_request(
3415 memory_object_control_t control,
3416 vm_object_offset_t offset,
3417 upl_size_t size,
3418 upl_t *upl_ptr,
3419 upl_page_info_t **user_page_list_ptr,
3420 unsigned int page_list_count,
3421 int cntrl_flags)
3422 {
3423 unsigned int local_list_count;
3424 upl_page_info_t *user_page_list;
3425 kern_return_t kr;
3426
3427 if (user_page_list_ptr != NULL) {
3428 local_list_count = page_list_count;
3429 user_page_list = *user_page_list_ptr;
3430 } else {
3431 local_list_count = 0;
3432 user_page_list = NULL;
3433 }
3434 kr = memory_object_upl_request(control,
3435 offset,
3436 size,
3437 upl_ptr,
3438 user_page_list,
3439 &local_list_count,
3440 cntrl_flags);
3441
3442 if(kr != KERN_SUCCESS)
3443 return kr;
3444
3445 if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) {
3446 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr);
3447 }
3448
3449 return KERN_SUCCESS;
3450 }
3451
3452
3453
3454 /*
3455 * Routine: vm_object_super_upl_request
3456 * Purpose:
3457 * Cause the population of a portion of a vm_object
3458 * in much the same way as memory_object_upl_request.
3459 * Depending on the nature of the request, the pages
3460 * returned may be contain valid data or be uninitialized.
3461 * However, the region may be expanded up to the super
3462 * cluster size provided.
3463 */
3464
3465 __private_extern__ kern_return_t
3466 vm_object_super_upl_request(
3467 vm_object_t object,
3468 vm_object_offset_t offset,
3469 upl_size_t size,
3470 upl_size_t super_cluster,
3471 upl_t *upl,
3472 upl_page_info_t *user_page_list,
3473 unsigned int *page_list_count,
3474 int cntrl_flags)
3475 {
3476 if (object->paging_offset > offset)
3477 return KERN_FAILURE;
3478
3479 assert(object->paging_in_progress);
3480 offset = offset - object->paging_offset;
3481
3482 if (super_cluster > size) {
3483
3484 vm_object_offset_t base_offset;
3485 upl_size_t super_size;
3486
3487 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
3488 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
3489 super_size = ((base_offset + super_size) > object->size) ? (object->size - base_offset) : super_size;
3490
3491 if (offset > (base_offset + super_size)) {
3492 panic("vm_object_super_upl_request: Missed target pageout"
3493 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
3494 offset, base_offset, super_size, super_cluster,
3495 size, object->paging_offset);
3496 }
3497 /*
3498 * apparently there is a case where the vm requests a
3499 * page to be written out who's offset is beyond the
3500 * object size
3501 */
3502 if ((offset + size) > (base_offset + super_size))
3503 super_size = (offset + size) - base_offset;
3504
3505 offset = base_offset;
3506 size = super_size;
3507 }
3508 return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags);
3509 }
3510
3511
3512 kern_return_t
3513 vm_map_create_upl(
3514 vm_map_t map,
3515 vm_map_address_t offset,
3516 upl_size_t *upl_size,
3517 upl_t *upl,
3518 upl_page_info_array_t page_list,
3519 unsigned int *count,
3520 int *flags)
3521 {
3522 vm_map_entry_t entry;
3523 int caller_flags;
3524 int force_data_sync;
3525 int sync_cow_data;
3526 vm_object_t local_object;
3527 vm_map_offset_t local_offset;
3528 vm_map_offset_t local_start;
3529 kern_return_t ret;
3530
3531 caller_flags = *flags;
3532
3533 if (caller_flags & ~UPL_VALID_FLAGS) {
3534 /*
3535 * For forward compatibility's sake,
3536 * reject any unknown flag.
3537 */
3538 return KERN_INVALID_VALUE;
3539 }
3540 force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
3541 sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
3542
3543 if (upl == NULL)
3544 return KERN_INVALID_ARGUMENT;
3545
3546 REDISCOVER_ENTRY:
3547 vm_map_lock(map);
3548
3549 if (vm_map_lookup_entry(map, offset, &entry)) {
3550
3551 if ((entry->vme_end - offset) < *upl_size)
3552 *upl_size = entry->vme_end - offset;
3553
3554 if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
3555 *flags = 0;
3556
3557 if (entry->object.vm_object != VM_OBJECT_NULL) {
3558 if (entry->object.vm_object->private)
3559 *flags = UPL_DEV_MEMORY;
3560
3561 if (entry->object.vm_object->phys_contiguous)
3562 *flags |= UPL_PHYS_CONTIG;
3563 }
3564 vm_map_unlock(map);
3565
3566 return KERN_SUCCESS;
3567 }
3568 if (entry->object.vm_object == VM_OBJECT_NULL || !entry->object.vm_object->phys_contiguous) {
3569 if ((*upl_size/page_size) > MAX_UPL_SIZE)
3570 *upl_size = MAX_UPL_SIZE * page_size;
3571 }
3572 /*
3573 * Create an object if necessary.
3574 */
3575 if (entry->object.vm_object == VM_OBJECT_NULL) {
3576 entry->object.vm_object = vm_object_allocate((vm_size_t)(entry->vme_end - entry->vme_start));
3577 entry->offset = 0;
3578 }
3579 if (!(caller_flags & UPL_COPYOUT_FROM)) {
3580 if (!(entry->protection & VM_PROT_WRITE)) {
3581 vm_map_unlock(map);
3582 return KERN_PROTECTION_FAILURE;
3583 }
3584 if (entry->needs_copy) {
3585 vm_map_t local_map;
3586 vm_object_t object;
3587 vm_object_offset_t new_offset;
3588 vm_prot_t prot;
3589 boolean_t wired;
3590 vm_map_version_t version;
3591 vm_map_t real_map;
3592
3593 local_map = map;
3594 vm_map_lock_write_to_read(map);
3595
3596 if (vm_map_lookup_locked(&local_map,
3597 offset, VM_PROT_WRITE,
3598 OBJECT_LOCK_EXCLUSIVE,
3599 &version, &object,
3600 &new_offset, &prot, &wired,
3601 NULL,
3602 &real_map)) {
3603 vm_map_unlock(local_map);
3604 return KERN_FAILURE;
3605 }
3606 if (real_map != map)
3607 vm_map_unlock(real_map);
3608 vm_object_unlock(object);
3609 vm_map_unlock(local_map);
3610
3611 goto REDISCOVER_ENTRY;
3612 }
3613 }
3614 if (entry->is_sub_map) {
3615 vm_map_t submap;
3616
3617 submap = entry->object.sub_map;
3618 local_start = entry->vme_start;
3619 local_offset = entry->offset;
3620
3621 vm_map_reference(submap);
3622 vm_map_unlock(map);
3623
3624 ret = vm_map_create_upl(submap,
3625 local_offset + (offset - local_start),
3626 upl_size, upl, page_list, count, flags);
3627 vm_map_deallocate(submap);
3628
3629 return ret;
3630 }
3631 if (sync_cow_data) {
3632 if (entry->object.vm_object->shadow || entry->object.vm_object->copy) {
3633 local_object = entry->object.vm_object;
3634 local_start = entry->vme_start;
3635 local_offset = entry->offset;
3636
3637 vm_object_reference(local_object);
3638 vm_map_unlock(map);
3639
3640 if (entry->object.vm_object->shadow && entry->object.vm_object->copy) {
3641 vm_object_lock_request(
3642 local_object->shadow,
3643 (vm_object_offset_t)
3644 ((offset - local_start) +
3645 local_offset) +
3646 local_object->shadow_offset,
3647 *upl_size, FALSE,
3648 MEMORY_OBJECT_DATA_SYNC,
3649 VM_PROT_NO_CHANGE);
3650 }
3651 sync_cow_data = FALSE;
3652 vm_object_deallocate(local_object);
3653
3654 goto REDISCOVER_ENTRY;
3655 }
3656 }
3657 if (force_data_sync) {
3658 local_object = entry->object.vm_object;
3659 local_start = entry->vme_start;
3660 local_offset = entry->offset;
3661
3662 vm_object_reference(local_object);
3663 vm_map_unlock(map);
3664
3665 vm_object_lock_request(
3666 local_object,
3667 (vm_object_offset_t)
3668 ((offset - local_start) + local_offset),
3669 (vm_object_size_t)*upl_size, FALSE,
3670 MEMORY_OBJECT_DATA_SYNC,
3671 VM_PROT_NO_CHANGE);
3672
3673 force_data_sync = FALSE;
3674 vm_object_deallocate(local_object);
3675
3676 goto REDISCOVER_ENTRY;
3677 }
3678 if (entry->object.vm_object->private)
3679 *flags = UPL_DEV_MEMORY;
3680 else
3681 *flags = 0;
3682
3683 if (entry->object.vm_object->phys_contiguous)
3684 *flags |= UPL_PHYS_CONTIG;
3685
3686 local_object = entry->object.vm_object;
3687 local_offset = entry->offset;
3688 local_start = entry->vme_start;
3689
3690 vm_object_reference(local_object);
3691 vm_map_unlock(map);
3692
3693 ret = vm_object_iopl_request(local_object,
3694 (vm_object_offset_t) ((offset - local_start) + local_offset),
3695 *upl_size,
3696 upl,
3697 page_list,
3698 count,
3699 caller_flags);
3700 vm_object_deallocate(local_object);
3701
3702 return(ret);
3703 }
3704 vm_map_unlock(map);
3705
3706 return(KERN_FAILURE);
3707 }
3708
3709 /*
3710 * Internal routine to enter a UPL into a VM map.
3711 *
3712 * JMM - This should just be doable through the standard
3713 * vm_map_enter() API.
3714 */
3715 kern_return_t
3716 vm_map_enter_upl(
3717 vm_map_t map,
3718 upl_t upl,
3719 vm_map_offset_t *dst_addr)
3720 {
3721 vm_map_size_t size;
3722 vm_object_offset_t offset;
3723 vm_map_offset_t addr;
3724 vm_page_t m;
3725 kern_return_t kr;
3726
3727 if (upl == UPL_NULL)
3728 return KERN_INVALID_ARGUMENT;
3729
3730 upl_lock(upl);
3731
3732 /*
3733 * check to see if already mapped
3734 */
3735 if (UPL_PAGE_LIST_MAPPED & upl->flags) {
3736 upl_unlock(upl);
3737 return KERN_FAILURE;
3738 }
3739
3740 if ((!(upl->flags & UPL_SHADOWED)) && !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) ||
3741 (upl->map_object->phys_contiguous))) {
3742 vm_object_t object;
3743 vm_page_t alias_page;
3744 vm_object_offset_t new_offset;
3745 int pg_num;
3746 wpl_array_t lite_list;
3747
3748 if (upl->flags & UPL_INTERNAL) {
3749 lite_list = (wpl_array_t)
3750 ((((uintptr_t)upl) + sizeof(struct upl))
3751 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3752 } else {
3753 lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
3754 }
3755 object = upl->map_object;
3756 upl->map_object = vm_object_allocate(upl->size);
3757
3758 vm_object_lock(upl->map_object);
3759
3760 upl->map_object->shadow = object;
3761 upl->map_object->pageout = TRUE;
3762 upl->map_object->can_persist = FALSE;
3763 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
3764 upl->map_object->shadow_offset = upl->offset - object->paging_offset;
3765 upl->map_object->wimg_bits = object->wimg_bits;
3766 offset = upl->map_object->shadow_offset;
3767 new_offset = 0;
3768 size = upl->size;
3769
3770 upl->flags |= UPL_SHADOWED;
3771
3772 while (size) {
3773 pg_num = (new_offset)/PAGE_SIZE;
3774
3775 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3776
3777 VM_PAGE_GRAB_FICTITIOUS(alias_page);
3778
3779 vm_object_lock(object);
3780
3781 m = vm_page_lookup(object, offset);
3782 if (m == VM_PAGE_NULL) {
3783 panic("vm_upl_map: page missing\n");
3784 }
3785
3786 /*
3787 * Convert the fictitious page to a private
3788 * shadow of the real page.
3789 */
3790 assert(alias_page->fictitious);
3791 alias_page->fictitious = FALSE;
3792 alias_page->private = TRUE;
3793 alias_page->pageout = TRUE;
3794 /*
3795 * since m is a page in the upl it must
3796 * already be wired or BUSY, so it's
3797 * safe to assign the underlying physical
3798 * page to the alias
3799 */
3800 alias_page->phys_page = m->phys_page;
3801
3802 vm_object_unlock(object);
3803
3804 vm_page_lockspin_queues();
3805 vm_page_wire(alias_page);
3806 vm_page_unlock_queues();
3807
3808 /*
3809 * ENCRYPTED SWAP:
3810 * The virtual page ("m") has to be wired in some way
3811 * here or its physical page ("m->phys_page") could
3812 * be recycled at any time.
3813 * Assuming this is enforced by the caller, we can't
3814 * get an encrypted page here. Since the encryption
3815 * key depends on the VM page's "pager" object and
3816 * the "paging_offset", we couldn't handle 2 pageable
3817 * VM pages (with different pagers and paging_offsets)
3818 * sharing the same physical page: we could end up
3819 * encrypting with one key (via one VM page) and
3820 * decrypting with another key (via the alias VM page).
3821 */
3822 ASSERT_PAGE_DECRYPTED(m);
3823
3824 vm_page_insert(alias_page, upl->map_object, new_offset);
3825
3826 assert(!alias_page->wanted);
3827 alias_page->busy = FALSE;
3828 alias_page->absent = FALSE;
3829 }
3830 size -= PAGE_SIZE;
3831 offset += PAGE_SIZE_64;
3832 new_offset += PAGE_SIZE_64;
3833 }
3834 vm_object_unlock(upl->map_object);
3835 }
3836 if ((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || upl->map_object->phys_contiguous)
3837 offset = upl->offset - upl->map_object->paging_offset;
3838 else
3839 offset = 0;
3840 size = upl->size;
3841
3842 vm_object_reference(upl->map_object);
3843
3844 *dst_addr = 0;
3845 /*
3846 * NEED A UPL_MAP ALIAS
3847 */
3848 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
3849 VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE,
3850 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
3851
3852 if (kr != KERN_SUCCESS) {
3853 upl_unlock(upl);
3854 return(kr);
3855 }
3856 vm_object_lock(upl->map_object);
3857
3858 for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
3859 m = vm_page_lookup(upl->map_object, offset);
3860
3861 if (m) {
3862 unsigned int cache_attr;
3863 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3864
3865 m->pmapped = TRUE;
3866 m->wpmapped = TRUE;
3867
3868 PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, cache_attr, TRUE);
3869 }
3870 offset += PAGE_SIZE_64;
3871 }
3872 vm_object_unlock(upl->map_object);
3873
3874 /*
3875 * hold a reference for the mapping
3876 */
3877 upl->ref_count++;
3878 upl->flags |= UPL_PAGE_LIST_MAPPED;
3879 upl->kaddr = *dst_addr;
3880 upl_unlock(upl);
3881
3882 return KERN_SUCCESS;
3883 }
3884
3885 /*
3886 * Internal routine to remove a UPL mapping from a VM map.
3887 *
3888 * XXX - This should just be doable through a standard
3889 * vm_map_remove() operation. Otherwise, implicit clean-up
3890 * of the target map won't be able to correctly remove
3891 * these (and release the reference on the UPL). Having
3892 * to do this means we can't map these into user-space
3893 * maps yet.
3894 */
3895 kern_return_t
3896 vm_map_remove_upl(
3897 vm_map_t map,
3898 upl_t upl)
3899 {
3900 vm_address_t addr;
3901 upl_size_t size;
3902
3903 if (upl == UPL_NULL)
3904 return KERN_INVALID_ARGUMENT;
3905
3906 upl_lock(upl);
3907
3908 if (upl->flags & UPL_PAGE_LIST_MAPPED) {
3909 addr = upl->kaddr;
3910 size = upl->size;
3911
3912 assert(upl->ref_count > 1);
3913 upl->ref_count--; /* removing mapping ref */
3914
3915 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
3916 upl->kaddr = (vm_offset_t) 0;
3917 upl_unlock(upl);
3918
3919 vm_map_remove(map,
3920 vm_map_trunc_page(addr),
3921 vm_map_round_page(addr + size),
3922 VM_MAP_NO_FLAGS);
3923
3924 return KERN_SUCCESS;
3925 }
3926 upl_unlock(upl);
3927
3928 return KERN_FAILURE;
3929 }
3930
3931 kern_return_t
3932 upl_commit_range(
3933 upl_t upl,
3934 upl_offset_t offset,
3935 upl_size_t size,
3936 int flags,
3937 upl_page_info_t *page_list,
3938 mach_msg_type_number_t count,
3939 boolean_t *empty)
3940 {
3941 upl_size_t xfer_size;
3942 vm_object_t shadow_object;
3943 vm_object_t object;
3944 vm_object_offset_t target_offset;
3945 int entry;
3946 wpl_array_t lite_list;
3947 int occupied;
3948 int delayed_unlock = 0;
3949 int clear_refmod = 0;
3950 int pgpgout_count = 0;
3951 int j;
3952
3953 *empty = FALSE;
3954
3955 if (upl == UPL_NULL)
3956 return KERN_INVALID_ARGUMENT;
3957
3958 if (count == 0)
3959 page_list = NULL;
3960
3961 if (upl->flags & UPL_DEVICE_MEMORY)
3962 xfer_size = 0;
3963 else if ((offset + size) <= upl->size)
3964 xfer_size = size;
3965 else
3966 return KERN_FAILURE;
3967
3968 upl_lock(upl);
3969
3970 if (upl->flags & UPL_ACCESS_BLOCKED) {
3971 /*
3972 * We used this UPL to block access to the pages by marking
3973 * them "busy". Now we need to clear the "busy" bit to allow
3974 * access to these pages again.
3975 */
3976 flags |= UPL_COMMIT_ALLOW_ACCESS;
3977 }
3978 if (upl->flags & UPL_CLEAR_DIRTY)
3979 flags |= UPL_COMMIT_CLEAR_DIRTY;
3980
3981 if (upl->flags & UPL_INTERNAL)
3982 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
3983 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3984 else
3985 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
3986
3987 object = upl->map_object;
3988
3989 if (upl->flags & UPL_SHADOWED) {
3990 vm_object_lock(object);
3991 shadow_object = object->shadow;
3992 } else {
3993 shadow_object = object;
3994 }
3995 entry = offset/PAGE_SIZE;
3996 target_offset = (vm_object_offset_t)offset;
3997
3998 /*
3999 * pageout_scan takes the vm_page_lock_queues first
4000 * then tries for the object lock... to avoid what
4001 * is effectively a lock inversion, we'll go to the
4002 * trouble of taking them in that same order... otherwise
4003 * if this object contains the majority of the pages resident
4004 * in the UBC (or a small set of large objects actively being
4005 * worked on contain the majority of the pages), we could
4006 * cause the pageout_scan thread to 'starve' in its attempt
4007 * to find pages to move to the free queue, since it has to
4008 * successfully acquire the object lock of any candidate page
4009 * before it can steal/clean it.
4010 */
4011 for (j = 0; ; j++) {
4012 vm_page_lock_queues();
4013
4014 if (vm_object_lock_try(shadow_object))
4015 break;
4016 vm_page_unlock_queues();
4017 mutex_pause(j);
4018 }
4019 delayed_unlock = 1;
4020
4021 if (shadow_object->code_signed) {
4022 /*
4023 * CODE SIGNING:
4024 * If the object is code-signed, do not let this UPL tell
4025 * us if the pages are valid or not. Let the pages be
4026 * validated by VM the normal way (when they get mapped or
4027 * copied).
4028 */
4029 flags &= ~UPL_COMMIT_CS_VALIDATED;
4030 }
4031 if (! page_list) {
4032 /*
4033 * No page list to get the code-signing info from !?
4034 */
4035 flags &= ~UPL_COMMIT_CS_VALIDATED;
4036 }
4037
4038 while (xfer_size) {
4039 vm_page_t t, m;
4040
4041 m = VM_PAGE_NULL;
4042
4043 if (upl->flags & UPL_LITE) {
4044 int pg_num;
4045
4046 pg_num = target_offset/PAGE_SIZE;
4047
4048 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4049 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
4050
4051 m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
4052 }
4053 }
4054 if (upl->flags & UPL_SHADOWED) {
4055 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
4056
4057 t->pageout = FALSE;
4058
4059 vm_page_free(t);
4060
4061 if (m == VM_PAGE_NULL)
4062 m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset);
4063 }
4064 }
4065 if (m == VM_PAGE_NULL) {
4066 goto commit_next_page;
4067 }
4068
4069 clear_refmod = 0;
4070
4071 if (flags & UPL_COMMIT_CS_VALIDATED) {
4072 /*
4073 * CODE SIGNING:
4074 * Set the code signing bits according to
4075 * what the UPL says they should be.
4076 */
4077 m->cs_validated = page_list[entry].cs_validated;
4078 m->cs_tainted = page_list[entry].cs_tainted;
4079 }
4080 if (upl->flags & UPL_IO_WIRE) {
4081
4082 vm_page_unwire(m);
4083
4084 if (page_list)
4085 page_list[entry].phys_addr = 0;
4086
4087 if (flags & UPL_COMMIT_SET_DIRTY)
4088 m->dirty = TRUE;
4089 else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
4090 m->dirty = FALSE;
4091 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
4092 m->cs_validated && !m->cs_tainted) {
4093 /*
4094 * CODE SIGNING:
4095 * This page is no longer dirty
4096 * but could have been modified,
4097 * so it will need to be
4098 * re-validated.
4099 */
4100 m->cs_validated = FALSE;
4101 vm_cs_validated_resets++;
4102 }
4103 clear_refmod |= VM_MEM_MODIFIED;
4104 }
4105
4106 if (flags & UPL_COMMIT_INACTIVATE)
4107 vm_page_deactivate(m);
4108
4109 if (clear_refmod)
4110 pmap_clear_refmod(m->phys_page, clear_refmod);
4111
4112 if (flags & UPL_COMMIT_ALLOW_ACCESS) {
4113 /*
4114 * We blocked access to the pages in this UPL.
4115 * Clear the "busy" bit and wake up any waiter
4116 * for this page.
4117 */
4118 PAGE_WAKEUP_DONE(m);
4119 }
4120 goto commit_next_page;
4121 }
4122 /*
4123 * make sure to clear the hardware
4124 * modify or reference bits before
4125 * releasing the BUSY bit on this page
4126 * otherwise we risk losing a legitimate
4127 * change of state
4128 */
4129 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
4130 m->dirty = FALSE;
4131
4132 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
4133 m->cs_validated && !m->cs_tainted) {
4134 /*
4135 * CODE SIGNING:
4136 * This page is no longer dirty
4137 * but could have been modified,
4138 * so it will need to be
4139 * re-validated.
4140 */
4141 m->cs_validated = FALSE;
4142 #if DEVELOPMENT || DEBUG
4143 vm_cs_validated_resets++;
4144 #endif
4145 }
4146 clear_refmod |= VM_MEM_MODIFIED;
4147 }
4148 if (clear_refmod)
4149 pmap_clear_refmod(m->phys_page, clear_refmod);
4150
4151 if (page_list) {
4152 upl_page_info_t *p;
4153
4154 p = &(page_list[entry]);
4155
4156 if (p->phys_addr && p->pageout && !m->pageout) {
4157 m->busy = TRUE;
4158 m->pageout = TRUE;
4159 vm_page_wire(m);
4160 } else if (p->phys_addr &&
4161 !p->pageout && m->pageout &&
4162 !m->dump_cleaning) {
4163 m->pageout = FALSE;
4164 m->absent = FALSE;
4165 m->overwriting = FALSE;
4166 vm_page_unwire(m);
4167
4168 PAGE_WAKEUP_DONE(m);
4169 }
4170 page_list[entry].phys_addr = 0;
4171 }
4172 m->dump_cleaning = FALSE;
4173
4174 if (m->laundry)
4175 vm_pageout_throttle_up(m);
4176
4177 if (m->pageout) {
4178 m->cleaning = FALSE;
4179 m->encrypted_cleaning = FALSE;
4180 m->pageout = FALSE;
4181 #if MACH_CLUSTER_STATS
4182 if (m->wanted) vm_pageout_target_collisions++;
4183 #endif
4184 m->dirty = FALSE;
4185
4186 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
4187 m->cs_validated && !m->cs_tainted) {
4188 /*
4189 * CODE SIGNING:
4190 * This page is no longer dirty
4191 * but could have been modified,
4192 * so it will need to be
4193 * re-validated.
4194 */
4195 m->cs_validated = FALSE;
4196 #if DEVELOPMENT || DEBUG
4197 vm_cs_validated_resets++;
4198 #endif
4199 }
4200
4201 if (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))
4202 m->dirty = TRUE;
4203
4204 if (m->dirty) {
4205 /*
4206 * page was re-dirtied after we started
4207 * the pageout... reactivate it since
4208 * we don't know whether the on-disk
4209 * copy matches what is now in memory
4210 */
4211 vm_page_unwire(m);
4212
4213 if (upl->flags & UPL_PAGEOUT) {
4214 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
4215 VM_STAT_INCR(reactivations);
4216 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
4217 }
4218 PAGE_WAKEUP_DONE(m);
4219 } else {
4220 /*
4221 * page has been successfully cleaned
4222 * go ahead and free it for other use
4223 */
4224
4225 if (m->object->internal) {
4226 DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
4227 } else {
4228 DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
4229 }
4230
4231 vm_page_free(m);
4232
4233 if (upl->flags & UPL_PAGEOUT) {
4234 CLUSTER_STAT(vm_pageout_target_page_freed++;)
4235
4236 if (page_list[entry].dirty) {
4237 VM_STAT_INCR(pageouts);
4238 DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
4239 pgpgout_count++;
4240 }
4241 }
4242 }
4243 goto commit_next_page;
4244 }
4245 #if MACH_CLUSTER_STATS
4246 if (m->wpmapped)
4247 m->dirty = pmap_is_modified(m->phys_page);
4248
4249 if (m->dirty) vm_pageout_cluster_dirtied++;
4250 else vm_pageout_cluster_cleaned++;
4251 if (m->wanted) vm_pageout_cluster_collisions++;
4252 #endif
4253 m->dirty = FALSE;
4254
4255 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
4256 m->cs_validated && !m->cs_tainted) {
4257 /*
4258 * CODE SIGNING:
4259 * This page is no longer dirty
4260 * but could have been modified,
4261 * so it will need to be
4262 * re-validated.
4263 */
4264 m->cs_validated = FALSE;
4265 #if DEVELOPMENT || DEBUG
4266 vm_cs_validated_resets++;
4267 #endif
4268 }
4269
4270 if ((m->busy) && (m->cleaning)) {
4271 /*
4272 * the request_page_list case
4273 */
4274 m->absent = FALSE;
4275 m->overwriting = FALSE;
4276 m->busy = FALSE;
4277 } else if (m->overwriting) {
4278 /*
4279 * alternate request page list, write to
4280 * page_list case. Occurs when the original
4281 * page was wired at the time of the list
4282 * request
4283 */
4284 assert(m->wire_count != 0);
4285 vm_page_unwire(m);/* reactivates */
4286 m->overwriting = FALSE;
4287 }
4288 m->cleaning = FALSE;
4289 m->encrypted_cleaning = FALSE;
4290
4291 /*
4292 * It is a part of the semantic of COPYOUT_FROM
4293 * UPLs that a commit implies cache sync
4294 * between the vm page and the backing store
4295 * this can be used to strip the precious bit
4296 * as well as clean
4297 */
4298 if (upl->flags & UPL_PAGE_SYNC_DONE)
4299 m->precious = FALSE;
4300
4301 if (flags & UPL_COMMIT_SET_DIRTY)
4302 m->dirty = TRUE;
4303
4304 if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) {
4305 vm_page_deactivate(m);
4306 } else if (!m->active && !m->inactive && !m->speculative) {
4307
4308 if (m->clustered)
4309 vm_page_speculate(m, TRUE);
4310 else if (m->reference)
4311 vm_page_activate(m);
4312 else
4313 vm_page_deactivate(m);
4314 }
4315 if (flags & UPL_COMMIT_ALLOW_ACCESS) {
4316 /*
4317 * We blocked access to the pages in this URL.
4318 * Clear the "busy" bit on this page before we
4319 * wake up any waiter.
4320 */
4321 m->busy = FALSE;
4322 }
4323 /*
4324 * Wakeup any thread waiting for the page to be un-cleaning.
4325 */
4326 PAGE_WAKEUP(m);
4327
4328 commit_next_page:
4329 target_offset += PAGE_SIZE_64;
4330 xfer_size -= PAGE_SIZE;
4331 entry++;
4332
4333 if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
4334 /*
4335 * pageout_scan takes the vm_page_lock_queues first
4336 * then tries for the object lock... to avoid what
4337 * is effectively a lock inversion, we'll go to the
4338 * trouble of taking them in that same order... otherwise
4339 * if this object contains the majority of the pages resident
4340 * in the UBC (or a small set of large objects actively being
4341 * worked on contain the majority of the pages), we could
4342 * cause the pageout_scan thread to 'starve' in its attempt
4343 * to find pages to move to the free queue, since it has to
4344 * successfully acquire the object lock of any candidate page
4345 * before it can steal/clean it.
4346 */
4347 vm_object_unlock(shadow_object);
4348 mutex_yield(&vm_page_queue_lock);
4349
4350 for (j = 0; ; j++) {
4351 if (vm_object_lock_try(shadow_object))
4352 break;
4353 vm_page_unlock_queues();
4354 mutex_pause(j);
4355 vm_page_lock_queues();
4356 }
4357 delayed_unlock = 1;
4358 }
4359 }
4360 if (delayed_unlock)
4361 vm_page_unlock_queues();
4362
4363 occupied = 1;
4364
4365 if (upl->flags & UPL_DEVICE_MEMORY) {
4366 occupied = 0;
4367 } else if (upl->flags & UPL_LITE) {
4368 int pg_num;
4369 int i;
4370
4371 pg_num = upl->size/PAGE_SIZE;
4372 pg_num = (pg_num + 31) >> 5;
4373 occupied = 0;
4374
4375 for (i = 0; i < pg_num; i++) {
4376 if (lite_list[i] != 0) {
4377 occupied = 1;
4378 break;
4379 }
4380 }
4381 } else {
4382 if (queue_empty(&upl->map_object->memq))
4383 occupied = 0;
4384 }
4385 if (occupied == 0) {
4386 if (upl->flags & UPL_COMMIT_NOTIFY_EMPTY)
4387 *empty = TRUE;
4388
4389 if (object == shadow_object) {
4390 /*
4391 * this is not a paging object
4392 * so we need to drop the paging reference
4393 * that was taken when we created the UPL
4394 * against this object
4395 */
4396 vm_object_paging_end(shadow_object);
4397 } else {
4398 /*
4399 * we dontated the paging reference to
4400 * the map object... vm_pageout_object_terminate
4401 * will drop this reference
4402 */
4403 }
4404 }
4405 vm_object_unlock(shadow_object);
4406 if (object != shadow_object)
4407 vm_object_unlock(object);
4408 upl_unlock(upl);
4409
4410 if (pgpgout_count) {
4411 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
4412 }
4413
4414 return KERN_SUCCESS;
4415 }
4416
4417 kern_return_t
4418 upl_abort_range(
4419 upl_t upl,
4420 upl_offset_t offset,
4421 upl_size_t size,
4422 int error,
4423 boolean_t *empty)
4424 {
4425 upl_size_t xfer_size;
4426 vm_object_t shadow_object;
4427 vm_object_t object;
4428 vm_object_offset_t target_offset;
4429 int entry;
4430 wpl_array_t lite_list;
4431 int occupied;
4432 int delayed_unlock = 0;
4433 int j;
4434
4435 *empty = FALSE;
4436
4437 if (upl == UPL_NULL)
4438 return KERN_INVALID_ARGUMENT;
4439
4440 if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
4441 return upl_commit_range(upl, offset, size, 0, NULL, 0, empty);
4442
4443 if (upl->flags & UPL_DEVICE_MEMORY)
4444 xfer_size = 0;
4445 else if ((offset + size) <= upl->size)
4446 xfer_size = size;
4447 else
4448 return KERN_FAILURE;
4449
4450 upl_lock(upl);
4451
4452 if (upl->flags & UPL_INTERNAL) {
4453 lite_list = (wpl_array_t)
4454 ((((uintptr_t)upl) + sizeof(struct upl))
4455 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4456 } else {
4457 lite_list = (wpl_array_t)
4458 (((uintptr_t)upl) + sizeof(struct upl));
4459 }
4460 object = upl->map_object;
4461
4462 if (upl->flags & UPL_SHADOWED) {
4463 vm_object_lock(object);
4464 shadow_object = object->shadow;
4465 } else
4466 shadow_object = object;
4467
4468 entry = offset/PAGE_SIZE;
4469 target_offset = (vm_object_offset_t)offset;
4470
4471 /*
4472 * pageout_scan takes the vm_page_lock_queues first
4473 * then tries for the object lock... to avoid what
4474 * is effectively a lock inversion, we'll go to the
4475 * trouble of taking them in that same order... otherwise
4476 * if this object contains the majority of the pages resident
4477 * in the UBC (or a small set of large objects actively being
4478 * worked on contain the majority of the pages), we could
4479 * cause the pageout_scan thread to 'starve' in its attempt
4480 * to find pages to move to the free queue, since it has to
4481 * successfully acquire the object lock of any candidate page
4482 * before it can steal/clean it.
4483 */
4484 for (j = 0; ; j++) {
4485 vm_page_lock_queues();
4486
4487 if (vm_object_lock_try(shadow_object))
4488 break;
4489 vm_page_unlock_queues();
4490 mutex_pause(j);
4491 }
4492 delayed_unlock = 1;
4493
4494 while (xfer_size) {
4495 vm_page_t t, m;
4496
4497 m = VM_PAGE_NULL;
4498
4499 if (upl->flags & UPL_LITE) {
4500 int pg_num;
4501 pg_num = target_offset/PAGE_SIZE;
4502
4503 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4504 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
4505
4506 m = vm_page_lookup(shadow_object, target_offset +
4507 (upl->offset - shadow_object->paging_offset));
4508 }
4509 }
4510 if (upl->flags & UPL_SHADOWED) {
4511 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
4512 t->pageout = FALSE;
4513
4514 vm_page_free(t);
4515
4516 if (m == VM_PAGE_NULL)
4517 m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset);
4518 }
4519 }
4520 if (m != VM_PAGE_NULL) {
4521
4522 if (m->absent) {
4523 boolean_t must_free = TRUE;
4524
4525 m->clustered = FALSE;
4526 /*
4527 * COPYOUT = FALSE case
4528 * check for error conditions which must
4529 * be passed back to the pages customer
4530 */
4531 if (error & UPL_ABORT_RESTART) {
4532 m->restart = TRUE;
4533 m->absent = FALSE;
4534 m->error = TRUE;
4535 m->unusual = TRUE;
4536 must_free = FALSE;
4537 } else if (error & UPL_ABORT_UNAVAILABLE) {
4538 m->restart = FALSE;
4539 m->unusual = TRUE;
4540 must_free = FALSE;
4541 } else if (error & UPL_ABORT_ERROR) {
4542 m->restart = FALSE;
4543 m->absent = FALSE;
4544 m->error = TRUE;
4545 m->unusual = TRUE;
4546 must_free = FALSE;
4547 }
4548
4549 /*
4550 * ENCRYPTED SWAP:
4551 * If the page was already encrypted,
4552 * we don't really need to decrypt it
4553 * now. It will get decrypted later,
4554 * on demand, as soon as someone needs
4555 * to access its contents.
4556 */
4557
4558 m->cleaning = FALSE;
4559 m->encrypted_cleaning = FALSE;
4560 m->overwriting = FALSE;
4561 PAGE_WAKEUP_DONE(m);
4562
4563 if (must_free == TRUE)
4564 vm_page_free(m);
4565 else
4566 vm_page_activate(m);
4567 } else {
4568 /*
4569 * Handle the trusted pager throttle.
4570 */
4571 if (m->laundry)
4572 vm_pageout_throttle_up(m);
4573
4574 if (m->pageout) {
4575 assert(m->busy);
4576 assert(m->wire_count == 1);
4577 m->pageout = FALSE;
4578 vm_page_unwire(m);
4579 }
4580 m->dump_cleaning = FALSE;
4581 m->cleaning = FALSE;
4582 m->encrypted_cleaning = FALSE;
4583 m->overwriting = FALSE;
4584 #if MACH_PAGEMAP
4585 vm_external_state_clr(m->object->existence_map, m->offset);
4586 #endif /* MACH_PAGEMAP */
4587 if (error & UPL_ABORT_DUMP_PAGES) {
4588 pmap_disconnect(m->phys_page);
4589 vm_page_free(m);
4590 } else {
4591 if (error & UPL_ABORT_REFERENCE) {
4592 /*
4593 * we've been told to explictly
4594 * reference this page... for
4595 * file I/O, this is done by
4596 * implementing an LRU on the inactive q
4597 */
4598 vm_page_lru(m);
4599 }
4600 PAGE_WAKEUP_DONE(m);
4601 }
4602 }
4603 }
4604 if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
4605 /*
4606 * pageout_scan takes the vm_page_lock_queues first
4607 * then tries for the object lock... to avoid what
4608 * is effectively a lock inversion, we'll go to the
4609 * trouble of taking them in that same order... otherwise
4610 * if this object contains the majority of the pages resident
4611 * in the UBC (or a small set of large objects actively being
4612 * worked on contain the majority of the pages), we could
4613 * cause the pageout_scan thread to 'starve' in its attempt
4614 * to find pages to move to the free queue, since it has to
4615 * successfully acquire the object lock of any candidate page
4616 * before it can steal/clean it.
4617 */
4618 vm_object_unlock(shadow_object);
4619 mutex_yield(&vm_page_queue_lock);
4620
4621 for (j = 0; ; j++) {
4622 if (vm_object_lock_try(shadow_object))
4623 break;
4624 vm_page_unlock_queues();
4625 mutex_pause(j);
4626 vm_page_lock_queues();
4627 }
4628 delayed_unlock = 1;
4629 }
4630 target_offset += PAGE_SIZE_64;
4631 xfer_size -= PAGE_SIZE;
4632 entry++;
4633 }
4634 if (delayed_unlock)
4635 vm_page_unlock_queues();
4636
4637 occupied = 1;
4638
4639 if (upl->flags & UPL_DEVICE_MEMORY) {
4640 occupied = 0;
4641 } else if (upl->flags & UPL_LITE) {
4642 int pg_num;
4643 int i;
4644
4645 pg_num = upl->size/PAGE_SIZE;
4646 pg_num = (pg_num + 31) >> 5;
4647 occupied = 0;
4648
4649 for (i = 0; i < pg_num; i++) {
4650 if (lite_list[i] != 0) {
4651 occupied = 1;
4652 break;
4653 }
4654 }
4655 } else {
4656 if (queue_empty(&upl->map_object->memq))
4657 occupied = 0;
4658 }
4659 if (occupied == 0) {
4660 if (upl->flags & UPL_COMMIT_NOTIFY_EMPTY)
4661 *empty = TRUE;
4662
4663 if (object == shadow_object) {
4664 /*
4665 * this is not a paging object
4666 * so we need to drop the paging reference
4667 * that was taken when we created the UPL
4668 * against this object
4669 */
4670 vm_object_paging_end(shadow_object);
4671 } else {
4672 /*
4673 * we dontated the paging reference to
4674 * the map object... vm_pageout_object_terminate
4675 * will drop this reference
4676 */
4677 }
4678 }
4679 vm_object_unlock(shadow_object);
4680 if (object != shadow_object)
4681 vm_object_unlock(object);
4682 upl_unlock(upl);
4683
4684 return KERN_SUCCESS;
4685 }
4686
4687
4688 kern_return_t
4689 upl_abort(
4690 upl_t upl,
4691 int error)
4692 {
4693 boolean_t empty;
4694
4695 return upl_abort_range(upl, 0, upl->size, error, &empty);
4696 }
4697
4698
4699 /* an option on commit should be wire */
4700 kern_return_t
4701 upl_commit(
4702 upl_t upl,
4703 upl_page_info_t *page_list,
4704 mach_msg_type_number_t count)
4705 {
4706 boolean_t empty;
4707
4708 return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
4709 }
4710
4711
4712 kern_return_t
4713 vm_object_iopl_request(
4714 vm_object_t object,
4715 vm_object_offset_t offset,
4716 upl_size_t size,
4717 upl_t *upl_ptr,
4718 upl_page_info_array_t user_page_list,
4719 unsigned int *page_list_count,
4720 int cntrl_flags)
4721 {
4722 vm_page_t dst_page;
4723 vm_object_offset_t dst_offset;
4724 upl_size_t xfer_size;
4725 upl_t upl = NULL;
4726 unsigned int entry;
4727 wpl_array_t lite_list = NULL;
4728 int delayed_unlock = 0;
4729 int no_zero_fill = FALSE;
4730 u_int32_t psize;
4731 kern_return_t ret;
4732 vm_prot_t prot;
4733 struct vm_object_fault_info fault_info;
4734
4735
4736 if (cntrl_flags & ~UPL_VALID_FLAGS) {
4737 /*
4738 * For forward compatibility's sake,
4739 * reject any unknown flag.
4740 */
4741 return KERN_INVALID_VALUE;
4742 }
4743 if (vm_lopage_poolsize == 0)
4744 cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
4745
4746 if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
4747 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE))
4748 return KERN_INVALID_VALUE;
4749
4750 if (object->phys_contiguous) {
4751 if ((offset + object->shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
4752 return KERN_INVALID_ADDRESS;
4753
4754 if (((offset + object->shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
4755 return KERN_INVALID_ADDRESS;
4756 }
4757 }
4758
4759 if (cntrl_flags & UPL_ENCRYPT) {
4760 /*
4761 * ENCRYPTED SWAP:
4762 * The paging path doesn't use this interface,
4763 * so we don't support the UPL_ENCRYPT flag
4764 * here. We won't encrypt the pages.
4765 */
4766 assert(! (cntrl_flags & UPL_ENCRYPT));
4767 }
4768 if (cntrl_flags & UPL_NOZEROFILL)
4769 no_zero_fill = TRUE;
4770
4771 if (cntrl_flags & UPL_COPYOUT_FROM)
4772 prot = VM_PROT_READ;
4773 else
4774 prot = VM_PROT_READ | VM_PROT_WRITE;
4775
4776 if (((size/page_size) > MAX_UPL_SIZE) && !object->phys_contiguous)
4777 size = MAX_UPL_SIZE * page_size;
4778
4779 if (cntrl_flags & UPL_SET_INTERNAL) {
4780 if (page_list_count != NULL)
4781 *page_list_count = MAX_UPL_SIZE;
4782 }
4783 if (((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
4784 ((page_list_count != NULL) && (*page_list_count != 0) && *page_list_count < (size/page_size)))
4785 return KERN_INVALID_ARGUMENT;
4786
4787 if ((!object->internal) && (object->paging_offset != 0))
4788 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
4789
4790
4791 if (object->phys_contiguous)
4792 psize = PAGE_SIZE;
4793 else
4794 psize = size;
4795
4796 if (cntrl_flags & UPL_SET_INTERNAL) {
4797 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, UPL_IO_WIRE, psize);
4798
4799 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
4800 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
4801 ((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
4802 } else {
4803 upl = upl_create(UPL_CREATE_LITE, UPL_IO_WIRE, psize);
4804
4805 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
4806 }
4807 if (user_page_list)
4808 user_page_list[0].device = FALSE;
4809 *upl_ptr = upl;
4810
4811 upl->map_object = object;
4812 upl->size = size;
4813
4814 vm_object_lock(object);
4815 vm_object_paging_begin(object);
4816 /*
4817 * paging in progress also protects the paging_offset
4818 */
4819 upl->offset = offset + object->paging_offset;
4820
4821 if (object->phys_contiguous) {
4822 #ifdef UPL_DEBUG
4823 queue_enter(&object->uplq, upl, upl_t, uplq);
4824 #endif /* UPL_DEBUG */
4825
4826 vm_object_unlock(object);
4827
4828 /*
4829 * don't need any shadow mappings for this one
4830 * since it is already I/O memory
4831 */
4832 upl->flags |= UPL_DEVICE_MEMORY;
4833
4834 upl->highest_page = (offset + object->shadow_offset + size - 1)>>PAGE_SHIFT;
4835
4836 if (user_page_list) {
4837 user_page_list[0].phys_addr = (offset + object->shadow_offset)>>PAGE_SHIFT;
4838 user_page_list[0].device = TRUE;
4839 }
4840 if (page_list_count != NULL) {
4841 if (upl->flags & UPL_INTERNAL)
4842 *page_list_count = 0;
4843 else
4844 *page_list_count = 1;
4845 }
4846 return KERN_SUCCESS;
4847 }
4848 /*
4849 * Protect user space from future COW operations
4850 */
4851 object->true_share = TRUE;
4852
4853 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
4854 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4855
4856 #ifdef UPL_DEBUG
4857 queue_enter(&object->uplq, upl, upl_t, uplq);
4858 #endif /* UPL_DEBUG */
4859
4860 if (cntrl_flags & UPL_BLOCK_ACCESS) {
4861 /*
4862 * The user requested that access to the pages in this URL
4863 * be blocked until the UPL is commited or aborted.
4864 */
4865 upl->flags |= UPL_ACCESS_BLOCKED;
4866 }
4867 entry = 0;
4868
4869 xfer_size = size;
4870 dst_offset = offset;
4871
4872 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
4873 fault_info.user_tag = 0;
4874 fault_info.lo_offset = offset;
4875 fault_info.hi_offset = offset + xfer_size;
4876 fault_info.no_cache = FALSE;
4877
4878 while (xfer_size) {
4879 vm_fault_return_t result;
4880 int pg_num;
4881
4882 dst_page = vm_page_lookup(object, dst_offset);
4883
4884 /*
4885 * ENCRYPTED SWAP:
4886 * If the page is encrypted, we need to decrypt it,
4887 * so force a soft page fault.
4888 */
4889 if ((dst_page == VM_PAGE_NULL) || (dst_page->busy) ||
4890 (dst_page->encrypted) ||
4891 (dst_page->unusual && (dst_page->error ||
4892 dst_page->restart ||
4893 dst_page->absent ||
4894 dst_page->fictitious))) {
4895
4896 do {
4897 vm_page_t top_page;
4898 kern_return_t error_code;
4899 int interruptible;
4900
4901 if (delayed_unlock) {
4902 delayed_unlock = 0;
4903 vm_page_unlock_queues();
4904 }
4905 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
4906 interruptible = THREAD_ABORTSAFE;
4907 else
4908 interruptible = THREAD_UNINT;
4909
4910 fault_info.interruptible = interruptible;
4911 fault_info.cluster_size = xfer_size;
4912
4913 result = vm_fault_page(object, dst_offset,
4914 prot | VM_PROT_WRITE, FALSE,
4915 &prot, &dst_page, &top_page,
4916 (int *)0,
4917 &error_code, no_zero_fill,
4918 FALSE, &fault_info);
4919
4920 switch (result) {
4921
4922 case VM_FAULT_SUCCESS:
4923
4924 PAGE_WAKEUP_DONE(dst_page);
4925 /*
4926 * Release paging references and
4927 * top-level placeholder page, if any.
4928 */
4929 if (top_page != VM_PAGE_NULL) {
4930 vm_object_t local_object;
4931
4932 local_object = top_page->object;
4933
4934 if (top_page->object != dst_page->object) {
4935 vm_object_lock(local_object);
4936 VM_PAGE_FREE(top_page);
4937 vm_object_paging_end(local_object);
4938 vm_object_unlock(local_object);
4939 } else {
4940 VM_PAGE_FREE(top_page);
4941 vm_object_paging_end(local_object);
4942 }
4943 }
4944 break;
4945
4946 case VM_FAULT_RETRY:
4947 vm_object_lock(object);
4948 vm_object_paging_begin(object);
4949 break;
4950
4951 case VM_FAULT_FICTITIOUS_SHORTAGE:
4952 vm_page_more_fictitious();
4953
4954 vm_object_lock(object);
4955 vm_object_paging_begin(object);
4956 break;
4957
4958 case VM_FAULT_MEMORY_SHORTAGE:
4959 if (vm_page_wait(interruptible)) {
4960 vm_object_lock(object);
4961 vm_object_paging_begin(object);
4962 break;
4963 }
4964 /* fall thru */
4965
4966 case VM_FAULT_INTERRUPTED:
4967 error_code = MACH_SEND_INTERRUPTED;
4968 case VM_FAULT_MEMORY_ERROR:
4969 ret = (error_code ? error_code: KERN_MEMORY_ERROR);
4970
4971 vm_object_lock(object);
4972 vm_object_paging_begin(object);
4973 goto return_err;
4974 }
4975 } while (result != VM_FAULT_SUCCESS);
4976 }
4977
4978 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
4979 dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
4980 vm_page_t low_page;
4981 int refmod;
4982
4983 /*
4984 * support devices that can't DMA above 32 bits
4985 * by substituting pages from a pool of low address
4986 * memory for any pages we find above the 4G mark
4987 * can't substitute if the page is already wired because
4988 * we don't know whether that physical address has been
4989 * handed out to some other 64 bit capable DMA device to use
4990 */
4991 if (dst_page->wire_count) {
4992 ret = KERN_PROTECTION_FAILURE;
4993 goto return_err;
4994 }
4995 if (delayed_unlock) {
4996 delayed_unlock = 0;
4997 vm_page_unlock_queues();
4998 }
4999 low_page = vm_page_grablo();
5000
5001 if (low_page == VM_PAGE_NULL) {
5002 ret = KERN_RESOURCE_SHORTAGE;
5003 goto return_err;
5004 }
5005 /*
5006 * from here until the vm_page_replace completes
5007 * we musn't drop the object lock... we don't
5008 * want anyone refaulting this page in and using
5009 * it after we disconnect it... we want the fault
5010 * to find the new page being substituted.
5011 */
5012 if (dst_page->pmapped)
5013 refmod = pmap_disconnect(dst_page->phys_page);
5014 else
5015 refmod = 0;
5016 vm_page_copy(dst_page, low_page);
5017
5018 low_page->reference = dst_page->reference;
5019 low_page->dirty = dst_page->dirty;
5020
5021 if (refmod & VM_MEM_REFERENCED)
5022 low_page->reference = TRUE;
5023 if (refmod & VM_MEM_MODIFIED)
5024 low_page->dirty = TRUE;
5025
5026 vm_page_lock_queues();
5027 vm_page_replace(low_page, object, dst_offset);
5028 /*
5029 * keep the queue lock since we're going to
5030 * need it immediately
5031 */
5032 delayed_unlock = 1;
5033
5034 dst_page = low_page;
5035 /*
5036 * vm_page_grablo returned the page marked
5037 * BUSY... we don't need a PAGE_WAKEUP_DONE
5038 * here, because we've never dropped the object lock
5039 */
5040 dst_page->busy = FALSE;
5041 }
5042 if (delayed_unlock == 0)
5043 vm_page_lock_queues();
5044
5045 vm_page_wire(dst_page);
5046
5047 if (cntrl_flags & UPL_BLOCK_ACCESS) {
5048 /*
5049 * Mark the page "busy" to block any future page fault
5050 * on this page. We'll also remove the mapping
5051 * of all these pages before leaving this routine.
5052 */
5053 assert(!dst_page->fictitious);
5054 dst_page->busy = TRUE;
5055 }
5056 pg_num = (dst_offset-offset)/PAGE_SIZE;
5057 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5058
5059 /*
5060 * expect the page to be used
5061 * page queues lock must be held to set 'reference'
5062 */
5063 dst_page->reference = TRUE;
5064
5065 if (!(cntrl_flags & UPL_COPYOUT_FROM))
5066 dst_page->dirty = TRUE;
5067
5068 if (dst_page->phys_page > upl->highest_page)
5069 upl->highest_page = dst_page->phys_page;
5070
5071 if (user_page_list) {
5072 user_page_list[entry].phys_addr = dst_page->phys_page;
5073 user_page_list[entry].pageout = dst_page->pageout;
5074 user_page_list[entry].absent = dst_page->absent;
5075 user_page_list[entry].dirty = dst_page->dirty;
5076 user_page_list[entry].precious = dst_page->precious;
5077 user_page_list[entry].device = FALSE;
5078 if (dst_page->clustered == TRUE)
5079 user_page_list[entry].speculative = dst_page->speculative;
5080 else
5081 user_page_list[entry].speculative = FALSE;
5082 user_page_list[entry].cs_validated = dst_page->cs_validated;
5083 user_page_list[entry].cs_tainted = dst_page->cs_tainted;
5084 }
5085 /*
5086 * someone is explicitly grabbing this page...
5087 * update clustered and speculative state
5088 *
5089 */
5090 VM_PAGE_CONSUME_CLUSTERED(dst_page);
5091
5092 if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
5093 mutex_yield(&vm_page_queue_lock);
5094 delayed_unlock = 1;
5095 }
5096 entry++;
5097 dst_offset += PAGE_SIZE_64;
5098 xfer_size -= PAGE_SIZE;
5099 }
5100 if (delayed_unlock)
5101 vm_page_unlock_queues();
5102
5103 if (page_list_count != NULL) {
5104 if (upl->flags & UPL_INTERNAL)
5105 *page_list_count = 0;
5106 else if (*page_list_count > entry)
5107 *page_list_count = entry;
5108 }
5109 vm_object_unlock(object);
5110
5111 if (cntrl_flags & UPL_BLOCK_ACCESS) {
5112 /*
5113 * We've marked all the pages "busy" so that future
5114 * page faults will block.
5115 * Now remove the mapping for these pages, so that they
5116 * can't be accessed without causing a page fault.
5117 */
5118 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
5119 PMAP_NULL, 0, VM_PROT_NONE);
5120 }
5121 return KERN_SUCCESS;
5122
5123 return_err:
5124 if (delayed_unlock)
5125 vm_page_unlock_queues();
5126
5127 for (; offset < dst_offset; offset += PAGE_SIZE) {
5128 dst_page = vm_page_lookup(object, offset);
5129
5130 if (dst_page == VM_PAGE_NULL)
5131 panic("vm_object_iopl_request: Wired pages missing. \n");
5132
5133 vm_page_lockspin_queues();
5134 vm_page_unwire(dst_page);
5135 vm_page_unlock_queues();
5136
5137 VM_STAT_INCR(reactivations);
5138 }
5139 vm_object_paging_end(object);
5140 vm_object_unlock(object);
5141 upl_destroy(upl);
5142
5143 return ret;
5144 }
5145
5146 kern_return_t
5147 upl_transpose(
5148 upl_t upl1,
5149 upl_t upl2)
5150 {
5151 kern_return_t retval;
5152 boolean_t upls_locked;
5153 vm_object_t object1, object2;
5154
5155 if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2) {
5156 return KERN_INVALID_ARGUMENT;
5157 }
5158
5159 upls_locked = FALSE;
5160
5161 /*
5162 * Since we need to lock both UPLs at the same time,
5163 * avoid deadlocks by always taking locks in the same order.
5164 */
5165 if (upl1 < upl2) {
5166 upl_lock(upl1);
5167 upl_lock(upl2);
5168 } else {
5169 upl_lock(upl2);
5170 upl_lock(upl1);
5171 }
5172 upls_locked = TRUE; /* the UPLs will need to be unlocked */
5173
5174 object1 = upl1->map_object;
5175 object2 = upl2->map_object;
5176
5177 if (upl1->offset != 0 || upl2->offset != 0 ||
5178 upl1->size != upl2->size) {
5179 /*
5180 * We deal only with full objects, not subsets.
5181 * That's because we exchange the entire backing store info
5182 * for the objects: pager, resident pages, etc... We can't do
5183 * only part of it.
5184 */
5185 retval = KERN_INVALID_VALUE;
5186 goto done;
5187 }
5188
5189 /*
5190 * Tranpose the VM objects' backing store.
5191 */
5192 retval = vm_object_transpose(object1, object2,
5193 (vm_object_size_t) upl1->size);
5194
5195 if (retval == KERN_SUCCESS) {
5196 /*
5197 * Make each UPL point to the correct VM object, i.e. the
5198 * object holding the pages that the UPL refers to...
5199 */
5200 #ifdef UPL_DEBUG
5201 queue_remove(&object1->uplq, upl1, upl_t, uplq);
5202 queue_remove(&object2->uplq, upl2, upl_t, uplq);
5203 #endif
5204 upl1->map_object = object2;
5205 upl2->map_object = object1;
5206 #ifdef UPL_DEBUG
5207 queue_enter(&object1->uplq, upl2, upl_t, uplq);
5208 queue_enter(&object2->uplq, upl1, upl_t, uplq);
5209 #endif
5210 }
5211
5212 done:
5213 /*
5214 * Cleanup.
5215 */
5216 if (upls_locked) {
5217 upl_unlock(upl1);
5218 upl_unlock(upl2);
5219 upls_locked = FALSE;
5220 }
5221
5222 return retval;
5223 }
5224
5225 /*
5226 * ENCRYPTED SWAP:
5227 *
5228 * Rationale: the user might have some encrypted data on disk (via
5229 * FileVault or any other mechanism). That data is then decrypted in
5230 * memory, which is safe as long as the machine is secure. But that
5231 * decrypted data in memory could be paged out to disk by the default
5232 * pager. The data would then be stored on disk in clear (not encrypted)
5233 * and it could be accessed by anyone who gets physical access to the
5234 * disk (if the laptop or the disk gets stolen for example). This weakens
5235 * the security offered by FileVault.
5236 *
5237 * Solution: the default pager will optionally request that all the
5238 * pages it gathers for pageout be encrypted, via the UPL interfaces,
5239 * before it sends this UPL to disk via the vnode_pageout() path.
5240 *
5241 * Notes:
5242 *
5243 * To avoid disrupting the VM LRU algorithms, we want to keep the
5244 * clean-in-place mechanisms, which allow us to send some extra pages to
5245 * swap (clustering) without actually removing them from the user's
5246 * address space. We don't want the user to unknowingly access encrypted
5247 * data, so we have to actually remove the encrypted pages from the page
5248 * table. When the user accesses the data, the hardware will fail to
5249 * locate the virtual page in its page table and will trigger a page
5250 * fault. We can then decrypt the page and enter it in the page table
5251 * again. Whenever we allow the user to access the contents of a page,
5252 * we have to make sure it's not encrypted.
5253 *
5254 *
5255 */
5256 /*
5257 * ENCRYPTED SWAP:
5258 * Reserve of virtual addresses in the kernel address space.
5259 * We need to map the physical pages in the kernel, so that we
5260 * can call the encryption/decryption routines with a kernel
5261 * virtual address. We keep this pool of pre-allocated kernel
5262 * virtual addresses so that we don't have to scan the kernel's
5263 * virtaul address space each time we need to encrypt or decrypt
5264 * a physical page.
5265 * It would be nice to be able to encrypt and decrypt in physical
5266 * mode but that might not always be more efficient...
5267 */
5268 decl_simple_lock_data(,vm_paging_lock)
5269 #define VM_PAGING_NUM_PAGES 64
5270 vm_map_offset_t vm_paging_base_address = 0;
5271 boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
5272 int vm_paging_max_index = 0;
5273 int vm_paging_page_waiter = 0;
5274 int vm_paging_page_waiter_total = 0;
5275 unsigned long vm_paging_no_kernel_page = 0;
5276 unsigned long vm_paging_objects_mapped = 0;
5277 unsigned long vm_paging_pages_mapped = 0;
5278 unsigned long vm_paging_objects_mapped_slow = 0;
5279 unsigned long vm_paging_pages_mapped_slow = 0;
5280
5281 void
5282 vm_paging_map_init(void)
5283 {
5284 kern_return_t kr;
5285 vm_map_offset_t page_map_offset;
5286 vm_map_entry_t map_entry;
5287
5288 assert(vm_paging_base_address == 0);
5289
5290 /*
5291 * Initialize our pool of pre-allocated kernel
5292 * virtual addresses.
5293 */
5294 page_map_offset = 0;
5295 kr = vm_map_find_space(kernel_map,
5296 &page_map_offset,
5297 VM_PAGING_NUM_PAGES * PAGE_SIZE,
5298 0,
5299 0,
5300 &map_entry);
5301 if (kr != KERN_SUCCESS) {
5302 panic("vm_paging_map_init: kernel_map full\n");
5303 }
5304 map_entry->object.vm_object = kernel_object;
5305 map_entry->offset =
5306 page_map_offset - VM_MIN_KERNEL_ADDRESS;
5307 vm_object_reference(kernel_object);
5308 vm_map_unlock(kernel_map);
5309
5310 assert(vm_paging_base_address == 0);
5311 vm_paging_base_address = page_map_offset;
5312 }
5313
5314 /*
5315 * ENCRYPTED SWAP:
5316 * vm_paging_map_object:
5317 * Maps part of a VM object's pages in the kernel
5318 * virtual address space, using the pre-allocated
5319 * kernel virtual addresses, if possible.
5320 * Context:
5321 * The VM object is locked. This lock will get
5322 * dropped and re-acquired though, so the caller
5323 * must make sure the VM object is kept alive
5324 * (by holding a VM map that has a reference
5325 * on it, for example, or taking an extra reference).
5326 * The page should also be kept busy to prevent
5327 * it from being reclaimed.
5328 */
5329 kern_return_t
5330 vm_paging_map_object(
5331 vm_map_offset_t *address,
5332 vm_page_t page,
5333 vm_object_t object,
5334 vm_object_offset_t offset,
5335 vm_map_size_t *size,
5336 vm_prot_t protection,
5337 boolean_t can_unlock_object)
5338 {
5339 kern_return_t kr;
5340 vm_map_offset_t page_map_offset;
5341 vm_map_size_t map_size;
5342 vm_object_offset_t object_offset;
5343 int i;
5344
5345
5346 if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
5347 assert(page->busy);
5348 /*
5349 * Use one of the pre-allocated kernel virtual addresses
5350 * and just enter the VM page in the kernel address space
5351 * at that virtual address.
5352 */
5353 simple_lock(&vm_paging_lock);
5354
5355 /*
5356 * Try and find an available kernel virtual address
5357 * from our pre-allocated pool.
5358 */
5359 page_map_offset = 0;
5360 for (;;) {
5361 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
5362 if (vm_paging_page_inuse[i] == FALSE) {
5363 page_map_offset =
5364 vm_paging_base_address +
5365 (i * PAGE_SIZE);
5366 break;
5367 }
5368 }
5369 if (page_map_offset != 0) {
5370 /* found a space to map our page ! */
5371 break;
5372 }
5373
5374 if (can_unlock_object) {
5375 /*
5376 * If we can afford to unlock the VM object,
5377 * let's take the slow path now...
5378 */
5379 break;
5380 }
5381 /*
5382 * We can't afford to unlock the VM object, so
5383 * let's wait for a space to become available...
5384 */
5385 vm_paging_page_waiter_total++;
5386 vm_paging_page_waiter++;
5387 thread_sleep_fast_usimple_lock(&vm_paging_page_waiter,
5388 &vm_paging_lock,
5389 THREAD_UNINT);
5390 vm_paging_page_waiter--;
5391 /* ... and try again */
5392 }
5393
5394 if (page_map_offset != 0) {
5395 /*
5396 * We found a kernel virtual address;
5397 * map the physical page to that virtual address.
5398 */
5399 if (i > vm_paging_max_index) {
5400 vm_paging_max_index = i;
5401 }
5402 vm_paging_page_inuse[i] = TRUE;
5403 simple_unlock(&vm_paging_lock);
5404
5405 if (page->pmapped == FALSE) {
5406 pmap_sync_page_data_phys(page->phys_page);
5407 }
5408 page->pmapped = TRUE;
5409
5410 /*
5411 * Keep the VM object locked over the PMAP_ENTER
5412 * and the actual use of the page by the kernel,
5413 * or this pmap mapping might get undone by a
5414 * vm_object_pmap_protect() call...
5415 */
5416 PMAP_ENTER(kernel_pmap,
5417 page_map_offset,
5418 page,
5419 protection,
5420 ((int) page->object->wimg_bits &
5421 VM_WIMG_MASK),
5422 TRUE);
5423 vm_paging_objects_mapped++;
5424 vm_paging_pages_mapped++;
5425 *address = page_map_offset;
5426
5427 /* all done and mapped, ready to use ! */
5428 return KERN_SUCCESS;
5429 }
5430
5431 /*
5432 * We ran out of pre-allocated kernel virtual
5433 * addresses. Just map the page in the kernel
5434 * the slow and regular way.
5435 */
5436 vm_paging_no_kernel_page++;
5437 simple_unlock(&vm_paging_lock);
5438 }
5439
5440 if (! can_unlock_object) {
5441 return KERN_NOT_SUPPORTED;
5442 }
5443
5444 object_offset = vm_object_trunc_page(offset);
5445 map_size = vm_map_round_page(*size);
5446
5447 /*
5448 * Try and map the required range of the object
5449 * in the kernel_map
5450 */
5451
5452 vm_object_reference_locked(object); /* for the map entry */
5453 vm_object_unlock(object);
5454
5455 kr = vm_map_enter(kernel_map,
5456 address,
5457 map_size,
5458 0,
5459 VM_FLAGS_ANYWHERE,
5460 object,
5461 object_offset,
5462 FALSE,
5463 protection,
5464 VM_PROT_ALL,
5465 VM_INHERIT_NONE);
5466 if (kr != KERN_SUCCESS) {
5467 *address = 0;
5468 *size = 0;
5469 vm_object_deallocate(object); /* for the map entry */
5470 vm_object_lock(object);
5471 return kr;
5472 }
5473
5474 *size = map_size;
5475
5476 /*
5477 * Enter the mapped pages in the page table now.
5478 */
5479 vm_object_lock(object);
5480 /*
5481 * VM object must be kept locked from before PMAP_ENTER()
5482 * until after the kernel is done accessing the page(s).
5483 * Otherwise, the pmap mappings in the kernel could be
5484 * undone by a call to vm_object_pmap_protect().
5485 */
5486
5487 for (page_map_offset = 0;
5488 map_size != 0;
5489 map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
5490 unsigned int cache_attr;
5491
5492 page = vm_page_lookup(object, offset + page_map_offset);
5493 if (page == VM_PAGE_NULL) {
5494 printf("vm_paging_map_object: no page !?");
5495 vm_object_unlock(object);
5496 kr = vm_map_remove(kernel_map, *address, *size,
5497 VM_MAP_NO_FLAGS);
5498 assert(kr == KERN_SUCCESS);
5499 *address = 0;
5500 *size = 0;
5501 vm_object_lock(object);
5502 return KERN_MEMORY_ERROR;
5503 }
5504 if (page->pmapped == FALSE) {
5505 pmap_sync_page_data_phys(page->phys_page);
5506 }
5507 page->pmapped = TRUE;
5508 cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
5509
5510 //assert(pmap_verify_free(page->phys_page));
5511 PMAP_ENTER(kernel_pmap,
5512 *address + page_map_offset,
5513 page,
5514 protection,
5515 cache_attr,
5516 TRUE);
5517 }
5518
5519 vm_paging_objects_mapped_slow++;
5520 vm_paging_pages_mapped_slow += map_size / PAGE_SIZE_64;
5521
5522 return KERN_SUCCESS;
5523 }
5524
5525 /*
5526 * ENCRYPTED SWAP:
5527 * vm_paging_unmap_object:
5528 * Unmaps part of a VM object's pages from the kernel
5529 * virtual address space.
5530 * Context:
5531 * The VM object is locked. This lock will get
5532 * dropped and re-acquired though.
5533 */
5534 void
5535 vm_paging_unmap_object(
5536 vm_object_t object,
5537 vm_map_offset_t start,
5538 vm_map_offset_t end)
5539 {
5540 kern_return_t kr;
5541 int i;
5542
5543 if ((vm_paging_base_address == 0) ||
5544 (start < vm_paging_base_address) ||
5545 (end > (vm_paging_base_address
5546 + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
5547 /*
5548 * We didn't use our pre-allocated pool of
5549 * kernel virtual address. Deallocate the
5550 * virtual memory.
5551 */
5552 if (object != VM_OBJECT_NULL) {
5553 vm_object_unlock(object);
5554 }
5555 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
5556 if (object != VM_OBJECT_NULL) {
5557 vm_object_lock(object);
5558 }
5559 assert(kr == KERN_SUCCESS);
5560 } else {
5561 /*
5562 * We used a kernel virtual address from our
5563 * pre-allocated pool. Put it back in the pool
5564 * for next time.
5565 */
5566 assert(end - start == PAGE_SIZE);
5567 i = (start - vm_paging_base_address) >> PAGE_SHIFT;
5568
5569 /* undo the pmap mapping */
5570 pmap_remove(kernel_pmap, start, end);
5571
5572 simple_lock(&vm_paging_lock);
5573 vm_paging_page_inuse[i] = FALSE;
5574 if (vm_paging_page_waiter) {
5575 thread_wakeup(&vm_paging_page_waiter);
5576 }
5577 simple_unlock(&vm_paging_lock);
5578 }
5579 }
5580
5581 #if CRYPTO
5582 /*
5583 * Encryption data.
5584 * "iv" is the "initial vector". Ideally, we want to
5585 * have a different one for each page we encrypt, so that
5586 * crackers can't find encryption patterns too easily.
5587 */
5588 #define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
5589 boolean_t swap_crypt_ctx_initialized = FALSE;
5590 aes_32t swap_crypt_key[8]; /* big enough for a 256 key */
5591 aes_ctx swap_crypt_ctx;
5592 const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
5593
5594 #if DEBUG
5595 boolean_t swap_crypt_ctx_tested = FALSE;
5596 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
5597 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
5598 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
5599 #endif /* DEBUG */
5600
5601 extern u_long random(void);
5602
5603 /*
5604 * Initialize the encryption context: key and key size.
5605 */
5606 void swap_crypt_ctx_initialize(void); /* forward */
5607 void
5608 swap_crypt_ctx_initialize(void)
5609 {
5610 unsigned int i;
5611
5612 /*
5613 * No need for locking to protect swap_crypt_ctx_initialized
5614 * because the first use of encryption will come from the
5615 * pageout thread (we won't pagein before there's been a pageout)
5616 * and there's only one pageout thread.
5617 */
5618 if (swap_crypt_ctx_initialized == FALSE) {
5619 for (i = 0;
5620 i < (sizeof (swap_crypt_key) /
5621 sizeof (swap_crypt_key[0]));
5622 i++) {
5623 swap_crypt_key[i] = random();
5624 }
5625 aes_encrypt_key((const unsigned char *) swap_crypt_key,
5626 SWAP_CRYPT_AES_KEY_SIZE,
5627 &swap_crypt_ctx.encrypt);
5628 aes_decrypt_key((const unsigned char *) swap_crypt_key,
5629 SWAP_CRYPT_AES_KEY_SIZE,
5630 &swap_crypt_ctx.decrypt);
5631 swap_crypt_ctx_initialized = TRUE;
5632 }
5633
5634 #if DEBUG
5635 /*
5636 * Validate the encryption algorithms.
5637 */
5638 if (swap_crypt_ctx_tested == FALSE) {
5639 /* initialize */
5640 for (i = 0; i < 4096; i++) {
5641 swap_crypt_test_page_ref[i] = (char) i;
5642 }
5643 /* encrypt */
5644 aes_encrypt_cbc(swap_crypt_test_page_ref,
5645 swap_crypt_null_iv,
5646 PAGE_SIZE / AES_BLOCK_SIZE,
5647 swap_crypt_test_page_encrypt,
5648 &swap_crypt_ctx.encrypt);
5649 /* decrypt */
5650 aes_decrypt_cbc(swap_crypt_test_page_encrypt,
5651 swap_crypt_null_iv,
5652 PAGE_SIZE / AES_BLOCK_SIZE,
5653 swap_crypt_test_page_decrypt,
5654 &swap_crypt_ctx.decrypt);
5655 /* compare result with original */
5656 for (i = 0; i < 4096; i ++) {
5657 if (swap_crypt_test_page_decrypt[i] !=
5658 swap_crypt_test_page_ref[i]) {
5659 panic("encryption test failed");
5660 }
5661 }
5662
5663 /* encrypt again */
5664 aes_encrypt_cbc(swap_crypt_test_page_decrypt,
5665 swap_crypt_null_iv,
5666 PAGE_SIZE / AES_BLOCK_SIZE,
5667 swap_crypt_test_page_decrypt,
5668 &swap_crypt_ctx.encrypt);
5669 /* decrypt in place */
5670 aes_decrypt_cbc(swap_crypt_test_page_decrypt,
5671 swap_crypt_null_iv,
5672 PAGE_SIZE / AES_BLOCK_SIZE,
5673 swap_crypt_test_page_decrypt,
5674 &swap_crypt_ctx.decrypt);
5675 for (i = 0; i < 4096; i ++) {
5676 if (swap_crypt_test_page_decrypt[i] !=
5677 swap_crypt_test_page_ref[i]) {
5678 panic("in place encryption test failed");
5679 }
5680 }
5681
5682 swap_crypt_ctx_tested = TRUE;
5683 }
5684 #endif /* DEBUG */
5685 }
5686
5687 /*
5688 * ENCRYPTED SWAP:
5689 * vm_page_encrypt:
5690 * Encrypt the given page, for secure paging.
5691 * The page might already be mapped at kernel virtual
5692 * address "kernel_mapping_offset". Otherwise, we need
5693 * to map it.
5694 *
5695 * Context:
5696 * The page's object is locked, but this lock will be released
5697 * and re-acquired.
5698 * The page is busy and not accessible by users (not entered in any pmap).
5699 */
5700 void
5701 vm_page_encrypt(
5702 vm_page_t page,
5703 vm_map_offset_t kernel_mapping_offset)
5704 {
5705 kern_return_t kr;
5706 vm_map_size_t kernel_mapping_size;
5707 vm_offset_t kernel_vaddr;
5708 union {
5709 unsigned char aes_iv[AES_BLOCK_SIZE];
5710 struct {
5711 memory_object_t pager_object;
5712 vm_object_offset_t paging_offset;
5713 } vm;
5714 } encrypt_iv;
5715
5716 if (! vm_pages_encrypted) {
5717 vm_pages_encrypted = TRUE;
5718 }
5719
5720 assert(page->busy);
5721 assert(page->dirty || page->precious);
5722
5723 if (page->encrypted) {
5724 /*
5725 * Already encrypted: no need to do it again.
5726 */
5727 vm_page_encrypt_already_encrypted_counter++;
5728 return;
5729 }
5730 ASSERT_PAGE_DECRYPTED(page);
5731
5732 /*
5733 * Take a paging-in-progress reference to keep the object
5734 * alive even if we have to unlock it (in vm_paging_map_object()
5735 * for example)...
5736 */
5737 vm_object_paging_begin(page->object);
5738
5739 if (kernel_mapping_offset == 0) {
5740 /*
5741 * The page hasn't already been mapped in kernel space
5742 * by the caller. Map it now, so that we can access
5743 * its contents and encrypt them.
5744 */
5745 kernel_mapping_size = PAGE_SIZE;
5746 kr = vm_paging_map_object(&kernel_mapping_offset,
5747 page,
5748 page->object,
5749 page->offset,
5750 &kernel_mapping_size,
5751 VM_PROT_READ | VM_PROT_WRITE,
5752 FALSE);
5753 if (kr != KERN_SUCCESS) {
5754 panic("vm_page_encrypt: "
5755 "could not map page in kernel: 0x%x\n",
5756 kr);
5757 }
5758 } else {
5759 kernel_mapping_size = 0;
5760 }
5761 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
5762
5763 if (swap_crypt_ctx_initialized == FALSE) {
5764 swap_crypt_ctx_initialize();
5765 }
5766 assert(swap_crypt_ctx_initialized);
5767
5768 /*
5769 * Prepare an "initial vector" for the encryption.
5770 * We use the "pager" and the "paging_offset" for that
5771 * page to obfuscate the encrypted data a bit more and
5772 * prevent crackers from finding patterns that they could
5773 * use to break the key.
5774 */
5775 bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
5776 encrypt_iv.vm.pager_object = page->object->pager;
5777 encrypt_iv.vm.paging_offset =
5778 page->object->paging_offset + page->offset;
5779
5780 /* encrypt the "initial vector" */
5781 aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
5782 swap_crypt_null_iv,
5783 1,
5784 &encrypt_iv.aes_iv[0],
5785 &swap_crypt_ctx.encrypt);
5786
5787 /*
5788 * Encrypt the page.
5789 */
5790 aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
5791 &encrypt_iv.aes_iv[0],
5792 PAGE_SIZE / AES_BLOCK_SIZE,
5793 (unsigned char *) kernel_vaddr,
5794 &swap_crypt_ctx.encrypt);
5795
5796 vm_page_encrypt_counter++;
5797
5798 /*
5799 * Unmap the page from the kernel's address space,
5800 * if we had to map it ourselves. Otherwise, let
5801 * the caller undo the mapping if needed.
5802 */
5803 if (kernel_mapping_size != 0) {
5804 vm_paging_unmap_object(page->object,
5805 kernel_mapping_offset,
5806 kernel_mapping_offset + kernel_mapping_size);
5807 }
5808
5809 /*
5810 * Clear the "reference" and "modified" bits.
5811 * This should clean up any impact the encryption had
5812 * on them.
5813 * The page was kept busy and disconnected from all pmaps,
5814 * so it can't have been referenced or modified from user
5815 * space.
5816 * The software bits will be reset later after the I/O
5817 * has completed (in upl_commit_range()).
5818 */
5819 pmap_clear_refmod(page->phys_page, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
5820
5821 page->encrypted = TRUE;
5822
5823 vm_object_paging_end(page->object);
5824 }
5825
5826 /*
5827 * ENCRYPTED SWAP:
5828 * vm_page_decrypt:
5829 * Decrypt the given page.
5830 * The page might already be mapped at kernel virtual
5831 * address "kernel_mapping_offset". Otherwise, we need
5832 * to map it.
5833 *
5834 * Context:
5835 * The page's VM object is locked but will be unlocked and relocked.
5836 * The page is busy and not accessible by users (not entered in any pmap).
5837 */
5838 void
5839 vm_page_decrypt(
5840 vm_page_t page,
5841 vm_map_offset_t kernel_mapping_offset)
5842 {
5843 kern_return_t kr;
5844 vm_map_size_t kernel_mapping_size;
5845 vm_offset_t kernel_vaddr;
5846 union {
5847 unsigned char aes_iv[AES_BLOCK_SIZE];
5848 struct {
5849 memory_object_t pager_object;
5850 vm_object_offset_t paging_offset;
5851 } vm;
5852 } decrypt_iv;
5853
5854 assert(page->busy);
5855 assert(page->encrypted);
5856
5857 /*
5858 * Take a paging-in-progress reference to keep the object
5859 * alive even if we have to unlock it (in vm_paging_map_object()
5860 * for example)...
5861 */
5862 vm_object_paging_begin(page->object);
5863
5864 if (kernel_mapping_offset == 0) {
5865 /*
5866 * The page hasn't already been mapped in kernel space
5867 * by the caller. Map it now, so that we can access
5868 * its contents and decrypt them.
5869 */
5870 kernel_mapping_size = PAGE_SIZE;
5871 kr = vm_paging_map_object(&kernel_mapping_offset,
5872 page,
5873 page->object,
5874 page->offset,
5875 &kernel_mapping_size,
5876 VM_PROT_READ | VM_PROT_WRITE,
5877 FALSE);
5878 if (kr != KERN_SUCCESS) {
5879 panic("vm_page_decrypt: "
5880 "could not map page in kernel: 0x%x\n",
5881 kr);
5882 }
5883 } else {
5884 kernel_mapping_size = 0;
5885 }
5886 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
5887
5888 assert(swap_crypt_ctx_initialized);
5889
5890 /*
5891 * Prepare an "initial vector" for the decryption.
5892 * It has to be the same as the "initial vector" we
5893 * used to encrypt that page.
5894 */
5895 bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
5896 decrypt_iv.vm.pager_object = page->object->pager;
5897 decrypt_iv.vm.paging_offset =
5898 page->object->paging_offset + page->offset;
5899
5900 /* encrypt the "initial vector" */
5901 aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
5902 swap_crypt_null_iv,
5903 1,
5904 &decrypt_iv.aes_iv[0],
5905 &swap_crypt_ctx.encrypt);
5906
5907 /*
5908 * Decrypt the page.
5909 */
5910 aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
5911 &decrypt_iv.aes_iv[0],
5912 PAGE_SIZE / AES_BLOCK_SIZE,
5913 (unsigned char *) kernel_vaddr,
5914 &swap_crypt_ctx.decrypt);
5915 vm_page_decrypt_counter++;
5916
5917 /*
5918 * Unmap the page from the kernel's address space,
5919 * if we had to map it ourselves. Otherwise, let
5920 * the caller undo the mapping if needed.
5921 */
5922 if (kernel_mapping_size != 0) {
5923 vm_paging_unmap_object(page->object,
5924 kernel_vaddr,
5925 kernel_vaddr + PAGE_SIZE);
5926 }
5927
5928 /*
5929 * After decryption, the page is actually clean.
5930 * It was encrypted as part of paging, which "cleans"
5931 * the "dirty" pages.
5932 * Noone could access it after it was encrypted
5933 * and the decryption doesn't count.
5934 */
5935 page->dirty = FALSE;
5936 if (page->cs_validated && !page->cs_tainted) {
5937 /*
5938 * CODE SIGNING:
5939 * This page is no longer dirty
5940 * but could have been modified,
5941 * so it will need to be
5942 * re-validated.
5943 */
5944 page->cs_validated = FALSE;
5945 vm_cs_validated_resets++;
5946 }
5947 pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
5948
5949 page->encrypted = FALSE;
5950
5951 /*
5952 * We've just modified the page's contents via the data cache and part
5953 * of the new contents might still be in the cache and not yet in RAM.
5954 * Since the page is now available and might get gathered in a UPL to
5955 * be part of a DMA transfer from a driver that expects the memory to
5956 * be coherent at this point, we have to flush the data cache.
5957 */
5958 pmap_sync_page_attributes_phys(page->phys_page);
5959 /*
5960 * Since the page is not mapped yet, some code might assume that it
5961 * doesn't need to invalidate the instruction cache when writing to
5962 * that page. That code relies on "pmapped" being FALSE, so that the
5963 * caches get synchronized when the page is first mapped.
5964 */
5965 assert(pmap_verify_free(page->phys_page));
5966 page->pmapped = FALSE;
5967 page->wpmapped = FALSE;
5968
5969 vm_object_paging_end(page->object);
5970 }
5971
5972 unsigned long upl_encrypt_upls = 0;
5973 unsigned long upl_encrypt_pages = 0;
5974
5975 /*
5976 * ENCRYPTED SWAP:
5977 *
5978 * upl_encrypt:
5979 * Encrypts all the pages in the UPL, within the specified range.
5980 *
5981 */
5982 void
5983 upl_encrypt(
5984 upl_t upl,
5985 upl_offset_t crypt_offset,
5986 upl_size_t crypt_size)
5987 {
5988 upl_size_t upl_size;
5989 upl_offset_t upl_offset;
5990 vm_object_t upl_object;
5991 vm_page_t page;
5992 vm_object_t shadow_object;
5993 vm_object_offset_t shadow_offset;
5994 vm_object_offset_t paging_offset;
5995 vm_object_offset_t base_offset;
5996
5997 upl_encrypt_upls++;
5998 upl_encrypt_pages += crypt_size / PAGE_SIZE;
5999
6000 upl_object = upl->map_object;
6001 upl_offset = upl->offset;
6002 upl_size = upl->size;
6003
6004 vm_object_lock(upl_object);
6005
6006 /*
6007 * Find the VM object that contains the actual pages.
6008 */
6009 if (upl_object->pageout) {
6010 shadow_object = upl_object->shadow;
6011 /*
6012 * The offset in the shadow object is actually also
6013 * accounted for in upl->offset. It possibly shouldn't be
6014 * this way, but for now don't account for it twice.
6015 */
6016 shadow_offset = 0;
6017 assert(upl_object->paging_offset == 0); /* XXX ? */
6018 vm_object_lock(shadow_object);
6019 } else {
6020 shadow_object = upl_object;
6021 shadow_offset = 0;
6022 }
6023
6024 paging_offset = shadow_object->paging_offset;
6025 vm_object_paging_begin(shadow_object);
6026
6027 if (shadow_object != upl_object)
6028 vm_object_unlock(upl_object);
6029
6030
6031 base_offset = shadow_offset;
6032 base_offset += upl_offset;
6033 base_offset += crypt_offset;
6034 base_offset -= paging_offset;
6035
6036 assert(crypt_offset + crypt_size <= upl_size);
6037
6038 for (upl_offset = 0;
6039 upl_offset < crypt_size;
6040 upl_offset += PAGE_SIZE) {
6041 page = vm_page_lookup(shadow_object,
6042 base_offset + upl_offset);
6043 if (page == VM_PAGE_NULL) {
6044 panic("upl_encrypt: "
6045 "no page for (obj=%p,off=%lld+%d)!\n",
6046 shadow_object,
6047 base_offset,
6048 upl_offset);
6049 }
6050 /*
6051 * Disconnect the page from all pmaps, so that nobody can
6052 * access it while it's encrypted. After that point, all
6053 * accesses to this page will cause a page fault and block
6054 * while the page is busy being encrypted. After the
6055 * encryption completes, any access will cause a
6056 * page fault and the page gets decrypted at that time.
6057 */
6058 pmap_disconnect(page->phys_page);
6059 vm_page_encrypt(page, 0);
6060
6061 if (shadow_object == vm_pageout_scan_wants_object) {
6062 /*
6063 * Give vm_pageout_scan() a chance to convert more
6064 * pages from "clean-in-place" to "clean-and-free",
6065 * if it's interested in the same pages we selected
6066 * in this cluster.
6067 */
6068 vm_object_unlock(shadow_object);
6069 vm_object_lock(shadow_object);
6070 }
6071 }
6072
6073 vm_object_paging_end(shadow_object);
6074 vm_object_unlock(shadow_object);
6075 }
6076
6077 #else /* CRYPTO */
6078 void
6079 upl_encrypt(
6080 __unused upl_t upl,
6081 __unused upl_offset_t crypt_offset,
6082 __unused upl_size_t crypt_size)
6083 {
6084 }
6085
6086 void
6087 vm_page_encrypt(
6088 __unused vm_page_t page,
6089 __unused vm_map_offset_t kernel_mapping_offset)
6090 {
6091 }
6092
6093 void
6094 vm_page_decrypt(
6095 __unused vm_page_t page,
6096 __unused vm_map_offset_t kernel_mapping_offset)
6097 {
6098 }
6099
6100 #endif /* CRYPTO */
6101
6102 vm_size_t
6103 upl_get_internal_pagelist_offset(void)
6104 {
6105 return sizeof(struct upl);
6106 }
6107
6108 void
6109 upl_clear_dirty(
6110 upl_t upl,
6111 boolean_t value)
6112 {
6113 if (value) {
6114 upl->flags |= UPL_CLEAR_DIRTY;
6115 } else {
6116 upl->flags &= ~UPL_CLEAR_DIRTY;
6117 }
6118 }
6119
6120
6121 #ifdef MACH_BSD
6122
6123 boolean_t upl_device_page(upl_page_info_t *upl)
6124 {
6125 return(UPL_DEVICE_PAGE(upl));
6126 }
6127 boolean_t upl_page_present(upl_page_info_t *upl, int index)
6128 {
6129 return(UPL_PAGE_PRESENT(upl, index));
6130 }
6131 boolean_t upl_speculative_page(upl_page_info_t *upl, int index)
6132 {
6133 return(UPL_SPECULATIVE_PAGE(upl, index));
6134 }
6135 boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
6136 {
6137 return(UPL_DIRTY_PAGE(upl, index));
6138 }
6139 boolean_t upl_valid_page(upl_page_info_t *upl, int index)
6140 {
6141 return(UPL_VALID_PAGE(upl, index));
6142 }
6143 ppnum_t upl_phys_page(upl_page_info_t *upl, int index)
6144 {
6145 return(UPL_PHYS_PAGE(upl, index));
6146 }
6147
6148
6149 void
6150 vm_countdirtypages(void)
6151 {
6152 vm_page_t m;
6153 int dpages;
6154 int pgopages;
6155 int precpages;
6156
6157
6158 dpages=0;
6159 pgopages=0;
6160 precpages=0;
6161
6162 vm_page_lock_queues();
6163 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6164 do {
6165 if (m ==(vm_page_t )0) break;
6166
6167 if(m->dirty) dpages++;
6168 if(m->pageout) pgopages++;
6169 if(m->precious) precpages++;
6170
6171 assert(m->object != kernel_object);
6172 m = (vm_page_t) queue_next(&m->pageq);
6173 if (m ==(vm_page_t )0) break;
6174
6175 } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
6176 vm_page_unlock_queues();
6177
6178 vm_page_lock_queues();
6179 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
6180 do {
6181 if (m ==(vm_page_t )0) break;
6182
6183 dpages++;
6184 assert(m->dirty);
6185 assert(!m->pageout);
6186 assert(m->object != kernel_object);
6187 m = (vm_page_t) queue_next(&m->pageq);
6188 if (m ==(vm_page_t )0) break;
6189
6190 } while (!queue_end(&vm_page_queue_throttled,(queue_entry_t) m));
6191 vm_page_unlock_queues();
6192
6193 vm_page_lock_queues();
6194 m = (vm_page_t) queue_first(&vm_page_queue_zf);
6195 do {
6196 if (m ==(vm_page_t )0) break;
6197
6198 if(m->dirty) dpages++;
6199 if(m->pageout) pgopages++;
6200 if(m->precious) precpages++;
6201
6202 assert(m->object != kernel_object);
6203 m = (vm_page_t) queue_next(&m->pageq);
6204 if (m ==(vm_page_t )0) break;
6205
6206 } while (!queue_end(&vm_page_queue_zf,(queue_entry_t) m));
6207 vm_page_unlock_queues();
6208
6209 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
6210
6211 dpages=0;
6212 pgopages=0;
6213 precpages=0;
6214
6215 vm_page_lock_queues();
6216 m = (vm_page_t) queue_first(&vm_page_queue_active);
6217
6218 do {
6219 if(m == (vm_page_t )0) break;
6220 if(m->dirty) dpages++;
6221 if(m->pageout) pgopages++;
6222 if(m->precious) precpages++;
6223
6224 assert(m->object != kernel_object);
6225 m = (vm_page_t) queue_next(&m->pageq);
6226 if(m == (vm_page_t )0) break;
6227
6228 } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
6229 vm_page_unlock_queues();
6230
6231 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
6232
6233 }
6234 #endif /* MACH_BSD */
6235
6236 ppnum_t upl_get_highest_page(
6237 upl_t upl)
6238 {
6239 return upl->highest_page;
6240 }
6241
6242 #ifdef UPL_DEBUG
6243 kern_return_t upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2)
6244 {
6245 upl->ubc_alias1 = alias1;
6246 upl->ubc_alias2 = alias2;
6247 return KERN_SUCCESS;
6248 }
6249 int upl_ubc_alias_get(upl_t upl, unsigned int * al, unsigned int * al2)
6250 {
6251 if(al)
6252 *al = upl->ubc_alias1;
6253 if(al2)
6254 *al2 = upl->ubc_alias2;
6255 return KERN_SUCCESS;
6256 }
6257 #endif /* UPL_DEBUG */
6258
6259
6260
6261 #if MACH_KDB
6262 #include <ddb/db_output.h>
6263 #include <ddb/db_print.h>
6264 #include <vm/vm_print.h>
6265
6266 #define printf kdbprintf
6267 void db_pageout(void);
6268
6269 void
6270 db_vm(void)
6271 {
6272
6273 iprintf("VM Statistics:\n");
6274 db_indent += 2;
6275 iprintf("pages:\n");
6276 db_indent += 2;
6277 iprintf("activ %5d inact %5d free %5d",
6278 vm_page_active_count, vm_page_inactive_count,
6279 vm_page_free_count);
6280 printf(" wire %5d gobbl %5d\n",
6281 vm_page_wire_count, vm_page_gobble_count);
6282 db_indent -= 2;
6283 iprintf("target:\n");
6284 db_indent += 2;
6285 iprintf("min %5d inact %5d free %5d",
6286 vm_page_free_min, vm_page_inactive_target,
6287 vm_page_free_target);
6288 printf(" resrv %5d\n", vm_page_free_reserved);
6289 db_indent -= 2;
6290 iprintf("pause:\n");
6291 db_pageout();
6292 db_indent -= 2;
6293 }
6294
6295 #if MACH_COUNTERS
6296 extern int c_laundry_pages_freed;
6297 #endif /* MACH_COUNTERS */
6298
6299 void
6300 db_pageout(void)
6301 {
6302 iprintf("Pageout Statistics:\n");
6303 db_indent += 2;
6304 iprintf("active %5d inactv %5d\n",
6305 vm_pageout_active, vm_pageout_inactive);
6306 iprintf("nolock %5d avoid %5d busy %5d absent %5d\n",
6307 vm_pageout_inactive_nolock, vm_pageout_inactive_avoid,
6308 vm_pageout_inactive_busy, vm_pageout_inactive_absent);
6309 iprintf("used %5d clean %5d dirty %5d\n",
6310 vm_pageout_inactive_used, vm_pageout_inactive_clean,
6311 vm_pageout_inactive_dirty);
6312 #if MACH_COUNTERS
6313 iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed);
6314 #endif /* MACH_COUNTERS */
6315 #if MACH_CLUSTER_STATS
6316 iprintf("Cluster Statistics:\n");
6317 db_indent += 2;
6318 iprintf("dirtied %5d cleaned %5d collisions %5d\n",
6319 vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned,
6320 vm_pageout_cluster_collisions);
6321 iprintf("clusters %5d conversions %5d\n",
6322 vm_pageout_cluster_clusters, vm_pageout_cluster_conversions);
6323 db_indent -= 2;
6324 iprintf("Target Statistics:\n");
6325 db_indent += 2;
6326 iprintf("collisions %5d page_dirtied %5d page_freed %5d\n",
6327 vm_pageout_target_collisions, vm_pageout_target_page_dirtied,
6328 vm_pageout_target_page_freed);
6329 db_indent -= 2;
6330 #endif /* MACH_CLUSTER_STATS */
6331 db_indent -= 2;
6332 }
6333
6334 #endif /* MACH_KDB */