]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_pageout.c
xnu-1504.15.3.tar.gz
[apple/xnu.git] / osfmk / vm / vm_pageout.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_pageout.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * The proverbial page-out daemon.
64 */
65
66 #include <stdint.h>
67
68 #include <debug.h>
69 #include <mach_pagemap.h>
70 #include <mach_cluster_stats.h>
71 #include <mach_kdb.h>
72 #include <advisory_pageout.h>
73
74 #include <mach/mach_types.h>
75 #include <mach/memory_object.h>
76 #include <mach/memory_object_default.h>
77 #include <mach/memory_object_control_server.h>
78 #include <mach/mach_host_server.h>
79 #include <mach/upl.h>
80 #include <mach/vm_map.h>
81 #include <mach/vm_param.h>
82 #include <mach/vm_statistics.h>
83 #include <mach/sdt.h>
84
85 #include <kern/kern_types.h>
86 #include <kern/counters.h>
87 #include <kern/host_statistics.h>
88 #include <kern/machine.h>
89 #include <kern/misc_protos.h>
90 #include <kern/sched.h>
91 #include <kern/thread.h>
92 #include <kern/xpr.h>
93 #include <kern/kalloc.h>
94
95 #include <machine/vm_tuning.h>
96 #include <machine/commpage.h>
97
98 #if CONFIG_EMBEDDED
99 #include <sys/kern_memorystatus.h>
100 #endif
101
102 #include <vm/pmap.h>
103 #include <vm/vm_fault.h>
104 #include <vm/vm_map.h>
105 #include <vm/vm_object.h>
106 #include <vm/vm_page.h>
107 #include <vm/vm_pageout.h>
108 #include <vm/vm_protos.h> /* must be last */
109 #include <vm/memory_object.h>
110 #include <vm/vm_purgeable_internal.h>
111
112 /*
113 * ENCRYPTED SWAP:
114 */
115 #include <../bsd/crypto/aes/aes.h>
116 extern u_int32_t random(void); /* from <libkern/libkern.h> */
117
118 #if UPL_DEBUG
119 #include <libkern/OSDebug.h>
120 #endif
121
122 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */
123 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100
124 #endif
125
126 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
127 #ifdef CONFIG_EMBEDDED
128 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
129 #else
130 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
131 #endif
132 #endif
133
134 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
135 #define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
136 #endif
137
138 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
139 #define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
140 #endif
141
142 #ifndef VM_PAGE_LAUNDRY_MAX
143 #define VM_PAGE_LAUNDRY_MAX 16UL /* maximum pageouts on a given pageout queue */
144 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
145
146 #ifndef VM_PAGEOUT_BURST_WAIT
147 #define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
148 #endif /* VM_PAGEOUT_BURST_WAIT */
149
150 #ifndef VM_PAGEOUT_EMPTY_WAIT
151 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
152 #endif /* VM_PAGEOUT_EMPTY_WAIT */
153
154 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
155 #define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
156 #endif /* VM_PAGEOUT_DEADLOCK_WAIT */
157
158 #ifndef VM_PAGEOUT_IDLE_WAIT
159 #define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
160 #endif /* VM_PAGEOUT_IDLE_WAIT */
161
162 #ifndef VM_PAGE_SPECULATIVE_TARGET
163 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / 20)
164 #endif /* VM_PAGE_SPECULATIVE_TARGET */
165
166 #ifndef VM_PAGE_INACTIVE_HEALTHY_LIMIT
167 #define VM_PAGE_INACTIVE_HEALTHY_LIMIT(total) ((total) * 1 / 200)
168 #endif /* VM_PAGE_INACTIVE_HEALTHY_LIMIT */
169
170
171 /*
172 * To obtain a reasonable LRU approximation, the inactive queue
173 * needs to be large enough to give pages on it a chance to be
174 * referenced a second time. This macro defines the fraction
175 * of active+inactive pages that should be inactive.
176 * The pageout daemon uses it to update vm_page_inactive_target.
177 *
178 * If vm_page_free_count falls below vm_page_free_target and
179 * vm_page_inactive_count is below vm_page_inactive_target,
180 * then the pageout daemon starts running.
181 */
182
183 #ifndef VM_PAGE_INACTIVE_TARGET
184 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3)
185 #endif /* VM_PAGE_INACTIVE_TARGET */
186
187 /*
188 * Once the pageout daemon starts running, it keeps going
189 * until vm_page_free_count meets or exceeds vm_page_free_target.
190 */
191
192 #ifndef VM_PAGE_FREE_TARGET
193 #ifdef CONFIG_EMBEDDED
194 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 100)
195 #else
196 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
197 #endif
198 #endif /* VM_PAGE_FREE_TARGET */
199
200 /*
201 * The pageout daemon always starts running once vm_page_free_count
202 * falls below vm_page_free_min.
203 */
204
205 #ifndef VM_PAGE_FREE_MIN
206 #ifdef CONFIG_EMBEDDED
207 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 200)
208 #else
209 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
210 #endif
211 #endif /* VM_PAGE_FREE_MIN */
212
213 #define VM_PAGE_FREE_MIN_LIMIT 1500
214 #define VM_PAGE_FREE_TARGET_LIMIT 2000
215
216
217 /*
218 * When vm_page_free_count falls below vm_page_free_reserved,
219 * only vm-privileged threads can allocate pages. vm-privilege
220 * allows the pageout daemon and default pager (and any other
221 * associated threads needed for default pageout) to continue
222 * operation by dipping into the reserved pool of pages.
223 */
224
225 #ifndef VM_PAGE_FREE_RESERVED
226 #define VM_PAGE_FREE_RESERVED(n) \
227 ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
228 #endif /* VM_PAGE_FREE_RESERVED */
229
230 /*
231 * When we dequeue pages from the inactive list, they are
232 * reactivated (ie, put back on the active queue) if referenced.
233 * However, it is possible to starve the free list if other
234 * processors are referencing pages faster than we can turn off
235 * the referenced bit. So we limit the number of reactivations
236 * we will make per call of vm_pageout_scan().
237 */
238 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
239 #ifndef VM_PAGE_REACTIVATE_LIMIT
240 #ifdef CONFIG_EMBEDDED
241 #define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
242 #else
243 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
244 #endif
245 #endif /* VM_PAGE_REACTIVATE_LIMIT */
246 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 100
247
248
249 /*
250 * Exported variable used to broadcast the activation of the pageout scan
251 * Working Set uses this to throttle its use of pmap removes. In this
252 * way, code which runs within memory in an uncontested context does
253 * not keep encountering soft faults.
254 */
255
256 unsigned int vm_pageout_scan_event_counter = 0;
257
258 /*
259 * Forward declarations for internal routines.
260 */
261
262 static void vm_pageout_garbage_collect(int);
263 static void vm_pageout_iothread_continue(struct vm_pageout_queue *);
264 static void vm_pageout_iothread_external(void);
265 static void vm_pageout_iothread_internal(void);
266
267 extern void vm_pageout_continue(void);
268 extern void vm_pageout_scan(void);
269
270 static thread_t vm_pageout_external_iothread = THREAD_NULL;
271 static thread_t vm_pageout_internal_iothread = THREAD_NULL;
272
273 unsigned int vm_pageout_reserved_internal = 0;
274 unsigned int vm_pageout_reserved_really = 0;
275
276 unsigned int vm_pageout_idle_wait = 0; /* milliseconds */
277 unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
278 unsigned int vm_pageout_burst_wait = 0; /* milliseconds */
279 unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */
280 unsigned int vm_pageout_deadlock_relief = 0;
281 unsigned int vm_pageout_inactive_relief = 0;
282 unsigned int vm_pageout_burst_active_throttle = 0;
283 unsigned int vm_pageout_burst_inactive_throttle = 0;
284
285 /*
286 * Protection against zero fill flushing live working sets derived
287 * from existing backing store and files
288 */
289 unsigned int vm_accellerate_zf_pageout_trigger = 400;
290 unsigned int zf_queue_min_count = 100;
291 unsigned int vm_zf_queue_count = 0;
292
293 #if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */
294 unsigned int vm_zf_count = 0;
295 #else
296 uint64_t vm_zf_count __attribute__((aligned(8))) = 0;
297 #endif
298
299 /*
300 * These variables record the pageout daemon's actions:
301 * how many pages it looks at and what happens to those pages.
302 * No locking needed because only one thread modifies the variables.
303 */
304
305 unsigned int vm_pageout_active = 0; /* debugging */
306 unsigned int vm_pageout_inactive = 0; /* debugging */
307 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
308 unsigned int vm_pageout_inactive_forced = 0; /* debugging */
309 unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
310 unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
311 unsigned int vm_pageout_inactive_busy = 0; /* debugging */
312 unsigned int vm_pageout_inactive_absent = 0; /* debugging */
313 unsigned int vm_pageout_inactive_used = 0; /* debugging */
314 unsigned int vm_pageout_inactive_clean = 0; /* debugging */
315 unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
316 unsigned int vm_pageout_inactive_deactivated = 0; /* debugging */
317 unsigned int vm_pageout_inactive_zf = 0; /* debugging */
318 unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
319 unsigned int vm_pageout_purged_objects = 0; /* debugging */
320 unsigned int vm_stat_discard = 0; /* debugging */
321 unsigned int vm_stat_discard_sent = 0; /* debugging */
322 unsigned int vm_stat_discard_failure = 0; /* debugging */
323 unsigned int vm_stat_discard_throttle = 0; /* debugging */
324 unsigned int vm_pageout_reactivation_limit_exceeded = 0; /* debugging */
325 unsigned int vm_pageout_catch_ups = 0; /* debugging */
326 unsigned int vm_pageout_inactive_force_reclaim = 0; /* debugging */
327
328 unsigned int vm_pageout_scan_active_throttled = 0;
329 unsigned int vm_pageout_scan_inactive_throttled = 0;
330 unsigned int vm_pageout_scan_throttle = 0; /* debugging */
331 unsigned int vm_pageout_scan_throttle_aborted = 0; /* debugging */
332 unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */
333 unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */
334 unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */
335 unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */
336 unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */
337
338 unsigned int vm_page_speculative_count_drifts = 0;
339 unsigned int vm_page_speculative_count_drift_max = 0;
340
341 /*
342 * Backing store throttle when BS is exhausted
343 */
344 unsigned int vm_backing_store_low = 0;
345
346 unsigned int vm_pageout_out_of_line = 0;
347 unsigned int vm_pageout_in_place = 0;
348
349 unsigned int vm_page_steal_pageout_page = 0;
350
351 /*
352 * ENCRYPTED SWAP:
353 * counters and statistics...
354 */
355 unsigned long vm_page_decrypt_counter = 0;
356 unsigned long vm_page_decrypt_for_upl_counter = 0;
357 unsigned long vm_page_encrypt_counter = 0;
358 unsigned long vm_page_encrypt_abort_counter = 0;
359 unsigned long vm_page_encrypt_already_encrypted_counter = 0;
360 boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */
361
362 struct vm_pageout_queue vm_pageout_queue_internal;
363 struct vm_pageout_queue vm_pageout_queue_external;
364
365 unsigned int vm_page_speculative_target = 0;
366
367 vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
368
369 boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL;
370
371 #if DEVELOPMENT || DEBUG
372 unsigned long vm_cs_validated_resets = 0;
373 #endif
374
375 /*
376 * Routine: vm_backing_store_disable
377 * Purpose:
378 * Suspend non-privileged threads wishing to extend
379 * backing store when we are low on backing store
380 * (Synchronized by caller)
381 */
382 void
383 vm_backing_store_disable(
384 boolean_t disable)
385 {
386 if(disable) {
387 vm_backing_store_low = 1;
388 } else {
389 if(vm_backing_store_low) {
390 vm_backing_store_low = 0;
391 thread_wakeup((event_t) &vm_backing_store_low);
392 }
393 }
394 }
395
396
397 #if MACH_CLUSTER_STATS
398 unsigned long vm_pageout_cluster_dirtied = 0;
399 unsigned long vm_pageout_cluster_cleaned = 0;
400 unsigned long vm_pageout_cluster_collisions = 0;
401 unsigned long vm_pageout_cluster_clusters = 0;
402 unsigned long vm_pageout_cluster_conversions = 0;
403 unsigned long vm_pageout_target_collisions = 0;
404 unsigned long vm_pageout_target_page_dirtied = 0;
405 unsigned long vm_pageout_target_page_freed = 0;
406 #define CLUSTER_STAT(clause) clause
407 #else /* MACH_CLUSTER_STATS */
408 #define CLUSTER_STAT(clause)
409 #endif /* MACH_CLUSTER_STATS */
410
411 /*
412 * Routine: vm_pageout_object_terminate
413 * Purpose:
414 * Destroy the pageout_object, and perform all of the
415 * required cleanup actions.
416 *
417 * In/Out conditions:
418 * The object must be locked, and will be returned locked.
419 */
420 void
421 vm_pageout_object_terminate(
422 vm_object_t object)
423 {
424 vm_object_t shadow_object;
425
426 /*
427 * Deal with the deallocation (last reference) of a pageout object
428 * (used for cleaning-in-place) by dropping the paging references/
429 * freeing pages in the original object.
430 */
431
432 assert(object->pageout);
433 shadow_object = object->shadow;
434 vm_object_lock(shadow_object);
435
436 while (!queue_empty(&object->memq)) {
437 vm_page_t p, m;
438 vm_object_offset_t offset;
439
440 p = (vm_page_t) queue_first(&object->memq);
441
442 assert(p->private);
443 assert(p->pageout);
444 p->pageout = FALSE;
445 assert(!p->cleaning);
446
447 offset = p->offset;
448 VM_PAGE_FREE(p);
449 p = VM_PAGE_NULL;
450
451 m = vm_page_lookup(shadow_object,
452 offset + object->shadow_offset);
453
454 if(m == VM_PAGE_NULL)
455 continue;
456 assert(m->cleaning);
457 /* used as a trigger on upl_commit etc to recognize the */
458 /* pageout daemon's subseqent desire to pageout a cleaning */
459 /* page. When the bit is on the upl commit code will */
460 /* respect the pageout bit in the target page over the */
461 /* caller's page list indication */
462 m->dump_cleaning = FALSE;
463
464 assert((m->dirty) || (m->precious) ||
465 (m->busy && m->cleaning));
466
467 /*
468 * Handle the trusted pager throttle.
469 * Also decrement the burst throttle (if external).
470 */
471 vm_page_lock_queues();
472 if (m->laundry) {
473 vm_pageout_throttle_up(m);
474 }
475
476 /*
477 * Handle the "target" page(s). These pages are to be freed if
478 * successfully cleaned. Target pages are always busy, and are
479 * wired exactly once. The initial target pages are not mapped,
480 * (so cannot be referenced or modified) but converted target
481 * pages may have been modified between the selection as an
482 * adjacent page and conversion to a target.
483 */
484 if (m->pageout) {
485 assert(m->busy);
486 assert(m->wire_count == 1);
487 m->cleaning = FALSE;
488 m->encrypted_cleaning = FALSE;
489 m->pageout = FALSE;
490 #if MACH_CLUSTER_STATS
491 if (m->wanted) vm_pageout_target_collisions++;
492 #endif
493 /*
494 * Revoke all access to the page. Since the object is
495 * locked, and the page is busy, this prevents the page
496 * from being dirtied after the pmap_disconnect() call
497 * returns.
498 *
499 * Since the page is left "dirty" but "not modifed", we
500 * can detect whether the page was redirtied during
501 * pageout by checking the modify state.
502 */
503 if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
504 m->dirty = TRUE;
505 else
506 m->dirty = FALSE;
507
508 if (m->dirty) {
509 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
510 vm_page_unwire(m, TRUE); /* reactivates */
511 VM_STAT_INCR(reactivations);
512 PAGE_WAKEUP_DONE(m);
513 } else {
514 CLUSTER_STAT(vm_pageout_target_page_freed++;)
515 vm_page_free(m);/* clears busy, etc. */
516 }
517 vm_page_unlock_queues();
518 continue;
519 }
520 /*
521 * Handle the "adjacent" pages. These pages were cleaned in
522 * place, and should be left alone.
523 * If prep_pin_count is nonzero, then someone is using the
524 * page, so make it active.
525 */
526 if (!m->active && !m->inactive && !m->throttled && !m->private) {
527 if (m->reference)
528 vm_page_activate(m);
529 else
530 vm_page_deactivate(m);
531 }
532 if((m->busy) && (m->cleaning)) {
533
534 /* the request_page_list case, (COPY_OUT_FROM FALSE) */
535 m->busy = FALSE;
536
537 /* We do not re-set m->dirty ! */
538 /* The page was busy so no extraneous activity */
539 /* could have occurred. COPY_INTO is a read into the */
540 /* new pages. CLEAN_IN_PLACE does actually write */
541 /* out the pages but handling outside of this code */
542 /* will take care of resetting dirty. We clear the */
543 /* modify however for the Programmed I/O case. */
544 pmap_clear_modify(m->phys_page);
545
546 m->absent = FALSE;
547 m->overwriting = FALSE;
548 } else if (m->overwriting) {
549 /* alternate request page list, write to page_list */
550 /* case. Occurs when the original page was wired */
551 /* at the time of the list request */
552 assert(VM_PAGE_WIRED(m));
553 vm_page_unwire(m, TRUE); /* reactivates */
554 m->overwriting = FALSE;
555 } else {
556 /*
557 * Set the dirty state according to whether or not the page was
558 * modified during the pageout. Note that we purposefully do
559 * NOT call pmap_clear_modify since the page is still mapped.
560 * If the page were to be dirtied between the 2 calls, this
561 * this fact would be lost. This code is only necessary to
562 * maintain statistics, since the pmap module is always
563 * consulted if m->dirty is false.
564 */
565 #if MACH_CLUSTER_STATS
566 m->dirty = pmap_is_modified(m->phys_page);
567
568 if (m->dirty) vm_pageout_cluster_dirtied++;
569 else vm_pageout_cluster_cleaned++;
570 if (m->wanted) vm_pageout_cluster_collisions++;
571 #else
572 m->dirty = 0;
573 #endif
574 }
575 m->cleaning = FALSE;
576 m->encrypted_cleaning = FALSE;
577
578 /*
579 * Wakeup any thread waiting for the page to be un-cleaning.
580 */
581 PAGE_WAKEUP(m);
582 vm_page_unlock_queues();
583 }
584 /*
585 * Account for the paging reference taken in vm_paging_object_allocate.
586 */
587 vm_object_activity_end(shadow_object);
588 vm_object_unlock(shadow_object);
589
590 assert(object->ref_count == 0);
591 assert(object->paging_in_progress == 0);
592 assert(object->activity_in_progress == 0);
593 assert(object->resident_page_count == 0);
594 return;
595 }
596
597 /*
598 * Routine: vm_pageclean_setup
599 *
600 * Purpose: setup a page to be cleaned (made non-dirty), but not
601 * necessarily flushed from the VM page cache.
602 * This is accomplished by cleaning in place.
603 *
604 * The page must not be busy, and new_object
605 * must be locked.
606 *
607 */
608 void
609 vm_pageclean_setup(
610 vm_page_t m,
611 vm_page_t new_m,
612 vm_object_t new_object,
613 vm_object_offset_t new_offset)
614 {
615 assert(!m->busy);
616 #if 0
617 assert(!m->cleaning);
618 #endif
619
620 XPR(XPR_VM_PAGEOUT,
621 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
622 m->object, m->offset, m,
623 new_m, new_offset);
624
625 pmap_clear_modify(m->phys_page);
626
627 /*
628 * Mark original page as cleaning in place.
629 */
630 m->cleaning = TRUE;
631 m->dirty = TRUE;
632 m->precious = FALSE;
633
634 /*
635 * Convert the fictitious page to a private shadow of
636 * the real page.
637 */
638 assert(new_m->fictitious);
639 assert(new_m->phys_page == vm_page_fictitious_addr);
640 new_m->fictitious = FALSE;
641 new_m->private = TRUE;
642 new_m->pageout = TRUE;
643 new_m->phys_page = m->phys_page;
644
645 vm_page_lockspin_queues();
646 vm_page_wire(new_m);
647 vm_page_unlock_queues();
648
649 vm_page_insert(new_m, new_object, new_offset);
650 assert(!new_m->wanted);
651 new_m->busy = FALSE;
652 }
653
654 /*
655 * Routine: vm_pageout_initialize_page
656 * Purpose:
657 * Causes the specified page to be initialized in
658 * the appropriate memory object. This routine is used to push
659 * pages into a copy-object when they are modified in the
660 * permanent object.
661 *
662 * The page is moved to a temporary object and paged out.
663 *
664 * In/out conditions:
665 * The page in question must not be on any pageout queues.
666 * The object to which it belongs must be locked.
667 * The page must be busy, but not hold a paging reference.
668 *
669 * Implementation:
670 * Move this page to a completely new object.
671 */
672 void
673 vm_pageout_initialize_page(
674 vm_page_t m)
675 {
676 vm_object_t object;
677 vm_object_offset_t paging_offset;
678 vm_page_t holding_page;
679 memory_object_t pager;
680
681 XPR(XPR_VM_PAGEOUT,
682 "vm_pageout_initialize_page, page 0x%X\n",
683 m, 0, 0, 0, 0);
684 assert(m->busy);
685
686 /*
687 * Verify that we really want to clean this page
688 */
689 assert(!m->absent);
690 assert(!m->error);
691 assert(m->dirty);
692
693 /*
694 * Create a paging reference to let us play with the object.
695 */
696 object = m->object;
697 paging_offset = m->offset + object->paging_offset;
698
699 if (m->absent || m->error || m->restart || (!m->dirty && !m->precious)) {
700 VM_PAGE_FREE(m);
701 panic("reservation without pageout?"); /* alan */
702 vm_object_unlock(object);
703
704 return;
705 }
706
707 /*
708 * If there's no pager, then we can't clean the page. This should
709 * never happen since this should be a copy object and therefore not
710 * an external object, so the pager should always be there.
711 */
712
713 pager = object->pager;
714
715 if (pager == MEMORY_OBJECT_NULL) {
716 VM_PAGE_FREE(m);
717 panic("missing pager for copy object");
718 return;
719 }
720
721 /* set the page for future call to vm_fault_list_request */
722 vm_object_paging_begin(object);
723 holding_page = NULL;
724
725 pmap_clear_modify(m->phys_page);
726 m->dirty = TRUE;
727 m->busy = TRUE;
728 m->list_req_pending = TRUE;
729 m->cleaning = TRUE;
730 m->pageout = TRUE;
731
732 vm_page_lockspin_queues();
733 vm_page_wire(m);
734 vm_page_unlock_queues();
735
736 vm_object_unlock(object);
737
738 /*
739 * Write the data to its pager.
740 * Note that the data is passed by naming the new object,
741 * not a virtual address; the pager interface has been
742 * manipulated to use the "internal memory" data type.
743 * [The object reference from its allocation is donated
744 * to the eventual recipient.]
745 */
746 memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
747
748 vm_object_lock(object);
749 vm_object_paging_end(object);
750 }
751
752 #if MACH_CLUSTER_STATS
753 #define MAXCLUSTERPAGES 16
754 struct {
755 unsigned long pages_in_cluster;
756 unsigned long pages_at_higher_offsets;
757 unsigned long pages_at_lower_offsets;
758 } cluster_stats[MAXCLUSTERPAGES];
759 #endif /* MACH_CLUSTER_STATS */
760
761
762 /*
763 * vm_pageout_cluster:
764 *
765 * Given a page, queue it to the appropriate I/O thread,
766 * which will page it out and attempt to clean adjacent pages
767 * in the same operation.
768 *
769 * The page must be busy, and the object and queues locked. We will take a
770 * paging reference to prevent deallocation or collapse when we
771 * release the object lock back at the call site. The I/O thread
772 * is responsible for consuming this reference
773 *
774 * The page must not be on any pageout queue.
775 */
776
777 void
778 vm_pageout_cluster(vm_page_t m)
779 {
780 vm_object_t object = m->object;
781 struct vm_pageout_queue *q;
782
783
784 XPR(XPR_VM_PAGEOUT,
785 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
786 object, m->offset, m, 0, 0);
787
788 VM_PAGE_CHECK(m);
789
790 /*
791 * Only a certain kind of page is appreciated here.
792 */
793 assert(m->busy && (m->dirty || m->precious) && (!VM_PAGE_WIRED(m)));
794 assert(!m->cleaning && !m->pageout && !m->inactive && !m->active);
795 assert(!m->throttled);
796
797 /*
798 * protect the object from collapse -
799 * locking in the object's paging_offset.
800 */
801 vm_object_paging_begin(object);
802
803 /*
804 * set the page for future call to vm_fault_list_request
805 * page should already be marked busy
806 */
807 vm_page_wire(m);
808 m->list_req_pending = TRUE;
809 m->cleaning = TRUE;
810 m->pageout = TRUE;
811
812 if (object->internal == TRUE)
813 q = &vm_pageout_queue_internal;
814 else
815 q = &vm_pageout_queue_external;
816
817 /*
818 * pgo_laundry count is tied to the laundry bit
819 */
820 m->laundry = TRUE;
821 q->pgo_laundry++;
822
823 m->pageout_queue = TRUE;
824 queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
825
826 if (q->pgo_idle == TRUE) {
827 q->pgo_idle = FALSE;
828 thread_wakeup((event_t) &q->pgo_pending);
829 }
830
831 VM_PAGE_CHECK(m);
832 }
833
834
835 unsigned long vm_pageout_throttle_up_count = 0;
836
837 /*
838 * A page is back from laundry or we are stealing it back from
839 * the laundering state. See if there are some pages waiting to
840 * go to laundry and if we can let some of them go now.
841 *
842 * Object and page queues must be locked.
843 */
844 void
845 vm_pageout_throttle_up(
846 vm_page_t m)
847 {
848 struct vm_pageout_queue *q;
849
850 assert(m->object != VM_OBJECT_NULL);
851 assert(m->object != kernel_object);
852
853 vm_pageout_throttle_up_count++;
854
855 if (m->object->internal == TRUE)
856 q = &vm_pageout_queue_internal;
857 else
858 q = &vm_pageout_queue_external;
859
860 if (m->pageout_queue == TRUE) {
861
862 queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
863 m->pageout_queue = FALSE;
864
865 m->pageq.next = NULL;
866 m->pageq.prev = NULL;
867
868 vm_object_paging_end(m->object);
869 }
870 if (m->laundry == TRUE) {
871 m->laundry = FALSE;
872 q->pgo_laundry--;
873
874 if (q->pgo_throttled == TRUE) {
875 q->pgo_throttled = FALSE;
876 thread_wakeup((event_t) &q->pgo_laundry);
877 }
878 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
879 q->pgo_draining = FALSE;
880 thread_wakeup((event_t) (&q->pgo_laundry+1));
881 }
882 }
883 }
884
885
886 /*
887 * vm_pageout_scan does the dirty work for the pageout daemon.
888 * It returns with vm_page_queue_free_lock held and
889 * vm_page_free_wanted == 0.
890 */
891
892 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT (3 * MAX_UPL_TRANSFER)
893
894 #define FCS_IDLE 0
895 #define FCS_DELAYED 1
896 #define FCS_DEADLOCK_DETECTED 2
897
898 struct flow_control {
899 int state;
900 mach_timespec_t ts;
901 };
902
903
904 /*
905 * VM memory pressure monitoring.
906 *
907 * vm_pageout_scan() keeps track of the number of pages it considers and
908 * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
909 *
910 * compute_memory_pressure() is called every second from compute_averages()
911 * and moves "vm_pageout_stat_now" forward, to start accumulating the number
912 * of recalimed pages in a new vm_pageout_stat[] bucket.
913 *
914 * mach_vm_pressure_monitor() collects past statistics about memory pressure.
915 * The caller provides the number of seconds ("nsecs") worth of statistics
916 * it wants, up to 30 seconds.
917 * It computes the number of pages reclaimed in the past "nsecs" seconds and
918 * also returns the number of pages the system still needs to reclaim at this
919 * moment in time.
920 */
921 #define VM_PAGEOUT_STAT_SIZE 31
922 struct vm_pageout_stat {
923 unsigned int considered;
924 unsigned int reclaimed;
925 } vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0}, };
926 unsigned int vm_pageout_stat_now = 0;
927 unsigned int vm_memory_pressure = 0;
928
929 #define VM_PAGEOUT_STAT_BEFORE(i) \
930 (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
931 #define VM_PAGEOUT_STAT_AFTER(i) \
932 (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
933
934 /*
935 * Called from compute_averages().
936 */
937 void
938 compute_memory_pressure(
939 __unused void *arg)
940 {
941 unsigned int vm_pageout_next;
942
943 vm_memory_pressure =
944 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].reclaimed;
945
946 commpage_set_memory_pressure( vm_memory_pressure );
947
948 /* move "now" forward */
949 vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
950 vm_pageout_stats[vm_pageout_next].considered = 0;
951 vm_pageout_stats[vm_pageout_next].reclaimed = 0;
952 vm_pageout_stat_now = vm_pageout_next;
953 }
954
955 unsigned int
956 mach_vm_ctl_page_free_wanted(void)
957 {
958 unsigned int page_free_target, page_free_count, page_free_wanted;
959
960 page_free_target = vm_page_free_target;
961 page_free_count = vm_page_free_count;
962 if (page_free_target > page_free_count) {
963 page_free_wanted = page_free_target - page_free_count;
964 } else {
965 page_free_wanted = 0;
966 }
967
968 return page_free_wanted;
969 }
970
971 kern_return_t
972 mach_vm_pressure_monitor(
973 boolean_t wait_for_pressure,
974 unsigned int nsecs_monitored,
975 unsigned int *pages_reclaimed_p,
976 unsigned int *pages_wanted_p)
977 {
978 wait_result_t wr;
979 unsigned int vm_pageout_then, vm_pageout_now;
980 unsigned int pages_reclaimed;
981
982 /*
983 * We don't take the vm_page_queue_lock here because we don't want
984 * vm_pressure_monitor() to get in the way of the vm_pageout_scan()
985 * thread when it's trying to reclaim memory. We don't need fully
986 * accurate monitoring anyway...
987 */
988
989 if (wait_for_pressure) {
990 /* wait until there's memory pressure */
991 while (vm_page_free_count >= vm_page_free_target) {
992 wr = assert_wait((event_t) &vm_page_free_wanted,
993 THREAD_INTERRUPTIBLE);
994 if (wr == THREAD_WAITING) {
995 wr = thread_block(THREAD_CONTINUE_NULL);
996 }
997 if (wr == THREAD_INTERRUPTED) {
998 return KERN_ABORTED;
999 }
1000 if (wr == THREAD_AWAKENED) {
1001 /*
1002 * The memory pressure might have already
1003 * been relieved but let's not block again
1004 * and let's report that there was memory
1005 * pressure at some point.
1006 */
1007 break;
1008 }
1009 }
1010 }
1011
1012 /* provide the number of pages the system wants to reclaim */
1013 if (pages_wanted_p != NULL) {
1014 *pages_wanted_p = mach_vm_ctl_page_free_wanted();
1015 }
1016
1017 if (pages_reclaimed_p == NULL) {
1018 return KERN_SUCCESS;
1019 }
1020
1021 /* provide number of pages reclaimed in the last "nsecs_monitored" */
1022 do {
1023 vm_pageout_now = vm_pageout_stat_now;
1024 pages_reclaimed = 0;
1025 for (vm_pageout_then =
1026 VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
1027 vm_pageout_then != vm_pageout_now &&
1028 nsecs_monitored-- != 0;
1029 vm_pageout_then =
1030 VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
1031 pages_reclaimed += vm_pageout_stats[vm_pageout_then].reclaimed;
1032 }
1033 } while (vm_pageout_now != vm_pageout_stat_now);
1034 *pages_reclaimed_p = pages_reclaimed;
1035
1036 return KERN_SUCCESS;
1037 }
1038
1039 /* Page States: Used below to maintain the page state
1040 before it's removed from it's Q. This saved state
1041 helps us do the right accounting in certain cases
1042 */
1043
1044 #define PAGE_STATE_SPECULATIVE 1
1045 #define PAGE_STATE_THROTTLED 2
1046 #define PAGE_STATE_ZEROFILL 3
1047 #define PAGE_STATE_INACTIVE 4
1048
1049 #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m) \
1050 MACRO_BEGIN \
1051 /* \
1052 * If a "reusable" page somehow made it back into \
1053 * the active queue, it's been re-used and is not \
1054 * quite re-usable. \
1055 * If the VM object was "all_reusable", consider it \
1056 * as "all re-used" instead of converting it to \
1057 * "partially re-used", which could be expensive. \
1058 */ \
1059 if ((m)->reusable || \
1060 (m)->object->all_reusable) { \
1061 vm_object_reuse_pages((m)->object, \
1062 (m)->offset, \
1063 (m)->offset + PAGE_SIZE_64, \
1064 FALSE); \
1065 } \
1066 MACRO_END
1067
1068 void
1069 vm_pageout_scan(void)
1070 {
1071 unsigned int loop_count = 0;
1072 unsigned int inactive_burst_count = 0;
1073 unsigned int active_burst_count = 0;
1074 unsigned int reactivated_this_call;
1075 unsigned int reactivate_limit;
1076 vm_page_t local_freeq = NULL;
1077 int local_freed = 0;
1078 int delayed_unlock;
1079 int refmod_state = 0;
1080 int vm_pageout_deadlock_target = 0;
1081 struct vm_pageout_queue *iq;
1082 struct vm_pageout_queue *eq;
1083 struct vm_speculative_age_q *sq;
1084 struct flow_control flow_control = { 0, { 0, 0 } };
1085 boolean_t inactive_throttled = FALSE;
1086 boolean_t try_failed;
1087 mach_timespec_t ts;
1088 unsigned int msecs = 0;
1089 vm_object_t object;
1090 vm_object_t last_object_tried;
1091 #if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */
1092 unsigned int zf_ratio;
1093 unsigned int zf_run_count;
1094 #else
1095 uint64_t zf_ratio;
1096 uint64_t zf_run_count;
1097 #endif
1098 uint32_t catch_up_count = 0;
1099 uint32_t inactive_reclaim_run;
1100 boolean_t forced_reclaim;
1101 int page_prev_state = 0;
1102
1103 flow_control.state = FCS_IDLE;
1104 iq = &vm_pageout_queue_internal;
1105 eq = &vm_pageout_queue_external;
1106 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
1107
1108
1109 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1110
1111
1112 vm_page_lock_queues();
1113 delayed_unlock = 1; /* must be nonzero if Qs are locked, 0 if unlocked */
1114
1115 /*
1116 * Calculate the max number of referenced pages on the inactive
1117 * queue that we will reactivate.
1118 */
1119 reactivated_this_call = 0;
1120 reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
1121 vm_page_inactive_count);
1122 inactive_reclaim_run = 0;
1123
1124
1125 /*???*/ /*
1126 * We want to gradually dribble pages from the active queue
1127 * to the inactive queue. If we let the inactive queue get
1128 * very small, and then suddenly dump many pages into it,
1129 * those pages won't get a sufficient chance to be referenced
1130 * before we start taking them from the inactive queue.
1131 *
1132 * We must limit the rate at which we send pages to the pagers.
1133 * data_write messages consume memory, for message buffers and
1134 * for map-copy objects. If we get too far ahead of the pagers,
1135 * we can potentially run out of memory.
1136 *
1137 * We can use the laundry count to limit directly the number
1138 * of pages outstanding to the default pager. A similar
1139 * strategy for external pagers doesn't work, because
1140 * external pagers don't have to deallocate the pages sent them,
1141 * and because we might have to send pages to external pagers
1142 * even if they aren't processing writes. So we also
1143 * use a burst count to limit writes to external pagers.
1144 *
1145 * When memory is very tight, we can't rely on external pagers to
1146 * clean pages. They probably aren't running, because they
1147 * aren't vm-privileged. If we kept sending dirty pages to them,
1148 * we could exhaust the free list.
1149 */
1150
1151
1152 Restart:
1153 assert(delayed_unlock!=0);
1154
1155 /*
1156 * A page is "zero-filled" if it was not paged in from somewhere,
1157 * and it belongs to an object at least VM_ZF_OBJECT_SIZE_THRESHOLD big.
1158 * Recalculate the zero-filled page ratio. We use this to apportion
1159 * victimized pages between the normal and zero-filled inactive
1160 * queues according to their relative abundance in memory. Thus if a task
1161 * is flooding memory with zf pages, we begin to hunt them down.
1162 * It would be better to throttle greedy tasks at a higher level,
1163 * but at the moment mach vm cannot do this.
1164 */
1165 {
1166 #if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */
1167 uint32_t total = vm_page_active_count + vm_page_inactive_count;
1168 uint32_t normal = total - vm_zf_count;
1169 #else
1170 uint64_t total = vm_page_active_count + vm_page_inactive_count;
1171 uint64_t normal = total - vm_zf_count;
1172 #endif
1173
1174 /* zf_ratio is the number of zf pages we victimize per normal page */
1175
1176 if (vm_zf_count < vm_accellerate_zf_pageout_trigger)
1177 zf_ratio = 0;
1178 else if ((vm_zf_count <= normal) || (normal == 0))
1179 zf_ratio = 1;
1180 else
1181 zf_ratio = vm_zf_count / normal;
1182
1183 zf_run_count = 0;
1184 }
1185
1186 /*
1187 * Recalculate vm_page_inactivate_target.
1188 */
1189 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1190 vm_page_inactive_count +
1191 vm_page_speculative_count);
1192 /*
1193 * don't want to wake the pageout_scan thread up everytime we fall below
1194 * the targets... set a low water mark at 0.25% below the target
1195 */
1196 vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400);
1197
1198 vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
1199 vm_page_inactive_count);
1200 object = NULL;
1201 last_object_tried = NULL;
1202 try_failed = FALSE;
1203
1204 if ((vm_page_inactive_count + vm_page_speculative_count) < VM_PAGE_INACTIVE_HEALTHY_LIMIT(vm_page_active_count))
1205 catch_up_count = vm_page_inactive_count + vm_page_speculative_count;
1206 else
1207 catch_up_count = 0;
1208
1209 for (;;) {
1210 vm_page_t m;
1211
1212 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
1213
1214 if (delayed_unlock == 0) {
1215 vm_page_lock_queues();
1216 delayed_unlock = 1;
1217 }
1218
1219 /*
1220 * Don't sweep through active queue more than the throttle
1221 * which should be kept relatively low
1222 */
1223 active_burst_count = MIN(vm_pageout_burst_active_throttle,
1224 vm_page_active_count);
1225
1226 /*
1227 * Move pages from active to inactive.
1228 */
1229 if ((vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target)
1230 goto done_moving_active_pages;
1231
1232 while (!queue_empty(&vm_page_queue_active) && active_burst_count) {
1233
1234 if (active_burst_count)
1235 active_burst_count--;
1236
1237 vm_pageout_active++;
1238
1239 m = (vm_page_t) queue_first(&vm_page_queue_active);
1240
1241 assert(m->active && !m->inactive);
1242 assert(!m->laundry);
1243 assert(m->object != kernel_object);
1244 assert(m->phys_page != vm_page_guard_addr);
1245
1246 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1247
1248 /*
1249 * Try to lock object; since we've already got the
1250 * page queues lock, we can only 'try' for this one.
1251 * if the 'try' fails, we need to do a mutex_pause
1252 * to allow the owner of the object lock a chance to
1253 * run... otherwise, we're likely to trip over this
1254 * object in the same state as we work our way through
1255 * the queue... clumps of pages associated with the same
1256 * object are fairly typical on the inactive and active queues
1257 */
1258 if (m->object != object) {
1259 if (object != NULL) {
1260 vm_object_unlock(object);
1261 object = NULL;
1262 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1263 }
1264 if (!vm_object_lock_try_scan(m->object)) {
1265 /*
1266 * move page to end of active queue and continue
1267 */
1268 queue_remove(&vm_page_queue_active, m,
1269 vm_page_t, pageq);
1270 queue_enter(&vm_page_queue_active, m,
1271 vm_page_t, pageq);
1272
1273 try_failed = TRUE;
1274
1275 m = (vm_page_t) queue_first(&vm_page_queue_active);
1276 /*
1277 * this is the next object we're going to be interested in
1278 * try to make sure it's available after the mutex_yield
1279 * returns control
1280 */
1281 vm_pageout_scan_wants_object = m->object;
1282
1283 goto done_with_activepage;
1284 }
1285 object = m->object;
1286
1287 try_failed = FALSE;
1288 }
1289
1290 /*
1291 * if the page is BUSY, then we pull it
1292 * off the active queue and leave it alone.
1293 * when BUSY is cleared, it will get stuck
1294 * back on the appropriate queue
1295 */
1296 if (m->busy) {
1297 queue_remove(&vm_page_queue_active, m,
1298 vm_page_t, pageq);
1299 m->pageq.next = NULL;
1300 m->pageq.prev = NULL;
1301
1302 if (!m->fictitious)
1303 vm_page_active_count--;
1304 m->active = FALSE;
1305
1306 goto done_with_activepage;
1307 }
1308
1309 /* deal with a rogue "reusable" page */
1310 VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m);
1311
1312 /*
1313 * Deactivate the page while holding the object
1314 * locked, so we know the page is still not busy.
1315 * This should prevent races between pmap_enter
1316 * and pmap_clear_reference. The page might be
1317 * absent or fictitious, but vm_page_deactivate
1318 * can handle that.
1319 */
1320 vm_page_deactivate(m);
1321
1322 done_with_activepage:
1323 if (delayed_unlock++ > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT || try_failed == TRUE) {
1324
1325 if (object != NULL) {
1326 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1327 vm_object_unlock(object);
1328 object = NULL;
1329 }
1330 if (local_freeq) {
1331 vm_page_unlock_queues();
1332 vm_page_free_list(local_freeq, TRUE);
1333
1334 local_freeq = NULL;
1335 local_freed = 0;
1336 vm_page_lock_queues();
1337 } else
1338 lck_mtx_yield(&vm_page_queue_lock);
1339
1340 delayed_unlock = 1;
1341
1342 /*
1343 * continue the while loop processing
1344 * the active queue... need to hold
1345 * the page queues lock
1346 */
1347 }
1348 }
1349
1350
1351
1352 /**********************************************************************
1353 * above this point we're playing with the active queue
1354 * below this point we're playing with the throttling mechanisms
1355 * and the inactive queue
1356 **********************************************************************/
1357
1358 done_moving_active_pages:
1359
1360 /*
1361 * We are done if we have met our target *and*
1362 * nobody is still waiting for a page.
1363 */
1364 if (vm_page_free_count + local_freed >= vm_page_free_target) {
1365 if (object != NULL) {
1366 vm_object_unlock(object);
1367 object = NULL;
1368 }
1369 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1370
1371 if (local_freeq) {
1372 vm_page_unlock_queues();
1373 vm_page_free_list(local_freeq, TRUE);
1374
1375 local_freeq = NULL;
1376 local_freed = 0;
1377 vm_page_lock_queues();
1378 }
1379 /*
1380 * inactive target still not met... keep going
1381 * until we get the queues balanced
1382 */
1383
1384 /*
1385 * Recalculate vm_page_inactivate_target.
1386 */
1387 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1388 vm_page_inactive_count +
1389 vm_page_speculative_count);
1390
1391 #ifndef CONFIG_EMBEDDED
1392 /*
1393 * XXX: if no active pages can be reclaimed, pageout scan can be stuck trying
1394 * to balance the queues
1395 */
1396 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) &&
1397 !queue_empty(&vm_page_queue_active))
1398 continue;
1399 #endif
1400
1401 lck_mtx_lock(&vm_page_queue_free_lock);
1402
1403 if ((vm_page_free_count >= vm_page_free_target) &&
1404 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
1405
1406 vm_page_unlock_queues();
1407
1408 thread_wakeup((event_t) &vm_pageout_garbage_collect);
1409
1410 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
1411
1412 return;
1413 }
1414 lck_mtx_unlock(&vm_page_queue_free_lock);
1415 }
1416
1417 /*
1418 * Before anything, we check if we have any ripe volatile
1419 * objects around. If so, try to purge the first object.
1420 * If the purge fails, fall through to reclaim a page instead.
1421 * If the purge succeeds, go back to the top and reevalute
1422 * the new memory situation.
1423 */
1424 assert (available_for_purge>=0);
1425 if (available_for_purge)
1426 {
1427 if (object != NULL) {
1428 vm_object_unlock(object);
1429 object = NULL;
1430 }
1431 if(TRUE == vm_purgeable_object_purge_one()) {
1432 continue;
1433 }
1434 }
1435
1436 if (queue_empty(&sq->age_q) && vm_page_speculative_count) {
1437 /*
1438 * try to pull pages from the aging bins
1439 * see vm_page.h for an explanation of how
1440 * this mechanism works
1441 */
1442 struct vm_speculative_age_q *aq;
1443 mach_timespec_t ts_fully_aged;
1444 boolean_t can_steal = FALSE;
1445 int num_scanned_queues;
1446
1447 aq = &vm_page_queue_speculative[speculative_steal_index];
1448
1449 num_scanned_queues = 0;
1450 while (queue_empty(&aq->age_q) &&
1451 num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
1452
1453 speculative_steal_index++;
1454
1455 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
1456 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
1457
1458 aq = &vm_page_queue_speculative[speculative_steal_index];
1459 }
1460
1461 if (num_scanned_queues ==
1462 VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
1463 /*
1464 * XXX We've scanned all the speculative
1465 * queues but still haven't found one
1466 * that is not empty, even though
1467 * vm_page_speculative_count is not 0.
1468 */
1469 /* report the anomaly... */
1470 printf("vm_pageout_scan: "
1471 "all speculative queues empty "
1472 "but count=%d. Re-adjusting.\n",
1473 vm_page_speculative_count);
1474 if (vm_page_speculative_count >
1475 vm_page_speculative_count_drift_max)
1476 vm_page_speculative_count_drift_max = vm_page_speculative_count;
1477 vm_page_speculative_count_drifts++;
1478 #if 6553678
1479 Debugger("vm_pageout_scan: no speculative pages");
1480 #endif
1481 /* readjust... */
1482 vm_page_speculative_count = 0;
1483 /* ... and continue */
1484 continue;
1485 }
1486
1487 if (vm_page_speculative_count > vm_page_speculative_target)
1488 can_steal = TRUE;
1489 else {
1490 ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * VM_PAGE_SPECULATIVE_Q_AGE_MS) / 1000;
1491 ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * VM_PAGE_SPECULATIVE_Q_AGE_MS) % 1000)
1492 * 1000 * NSEC_PER_USEC;
1493
1494 ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
1495
1496 clock_sec_t sec;
1497 clock_nsec_t nsec;
1498 clock_get_system_nanotime(&sec, &nsec);
1499 ts.tv_sec = (unsigned int) sec;
1500 ts.tv_nsec = nsec;
1501
1502 if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
1503 can_steal = TRUE;
1504 }
1505 if (can_steal == TRUE)
1506 vm_page_speculate_ageit(aq);
1507 }
1508
1509 /*
1510 * Sometimes we have to pause:
1511 * 1) No inactive pages - nothing to do.
1512 * 2) Flow control - default pageout queue is full
1513 * 3) Loop control - no acceptable pages found on the inactive queue
1514 * within the last vm_pageout_burst_inactive_throttle iterations
1515 */
1516 if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_zf) && queue_empty(&sq->age_q) &&
1517 (VM_PAGE_Q_THROTTLED(iq) || queue_empty(&vm_page_queue_throttled))) {
1518 vm_pageout_scan_empty_throttle++;
1519 msecs = vm_pageout_empty_wait;
1520 goto vm_pageout_scan_delay;
1521
1522 } else if (inactive_burst_count >=
1523 MIN(vm_pageout_burst_inactive_throttle,
1524 (vm_page_inactive_count +
1525 vm_page_speculative_count))) {
1526 vm_pageout_scan_burst_throttle++;
1527 msecs = vm_pageout_burst_wait;
1528 goto vm_pageout_scan_delay;
1529
1530 } else if (VM_PAGE_Q_THROTTLED(iq) && IP_VALID(memory_manager_default)) {
1531 clock_sec_t sec;
1532 clock_nsec_t nsec;
1533
1534 switch (flow_control.state) {
1535
1536 case FCS_IDLE:
1537 reset_deadlock_timer:
1538 ts.tv_sec = vm_pageout_deadlock_wait / 1000;
1539 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
1540 clock_get_system_nanotime(&sec, &nsec);
1541 flow_control.ts.tv_sec = (unsigned int) sec;
1542 flow_control.ts.tv_nsec = nsec;
1543 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
1544
1545 flow_control.state = FCS_DELAYED;
1546 msecs = vm_pageout_deadlock_wait;
1547
1548 break;
1549
1550 case FCS_DELAYED:
1551 clock_get_system_nanotime(&sec, &nsec);
1552 ts.tv_sec = (unsigned int) sec;
1553 ts.tv_nsec = nsec;
1554
1555 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
1556 /*
1557 * the pageout thread for the default pager is potentially
1558 * deadlocked since the
1559 * default pager queue has been throttled for more than the
1560 * allowable time... we need to move some clean pages or dirty
1561 * pages belonging to the external pagers if they aren't throttled
1562 * vm_page_free_wanted represents the number of threads currently
1563 * blocked waiting for pages... we'll move one page for each of
1564 * these plus a fixed amount to break the logjam... once we're done
1565 * moving this number of pages, we'll re-enter the FSC_DELAYED state
1566 * with a new timeout target since we have no way of knowing
1567 * whether we've broken the deadlock except through observation
1568 * of the queue associated with the default pager... we need to
1569 * stop moving pages and allow the system to run to see what
1570 * state it settles into.
1571 */
1572 vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged;
1573 vm_pageout_scan_deadlock_detected++;
1574 flow_control.state = FCS_DEADLOCK_DETECTED;
1575
1576 thread_wakeup((event_t) &vm_pageout_garbage_collect);
1577 goto consider_inactive;
1578 }
1579 /*
1580 * just resniff instead of trying
1581 * to compute a new delay time... we're going to be
1582 * awakened immediately upon a laundry completion,
1583 * so we won't wait any longer than necessary
1584 */
1585 msecs = vm_pageout_idle_wait;
1586 break;
1587
1588 case FCS_DEADLOCK_DETECTED:
1589 if (vm_pageout_deadlock_target)
1590 goto consider_inactive;
1591 goto reset_deadlock_timer;
1592
1593 }
1594 vm_pageout_scan_throttle++;
1595 iq->pgo_throttled = TRUE;
1596 vm_pageout_scan_delay:
1597 if (object != NULL) {
1598 vm_object_unlock(object);
1599 object = NULL;
1600 }
1601 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1602
1603 if (local_freeq) {
1604 vm_page_unlock_queues();
1605 vm_page_free_list(local_freeq, TRUE);
1606
1607 local_freeq = NULL;
1608 local_freed = 0;
1609 vm_page_lock_queues();
1610
1611 if (flow_control.state == FCS_DELAYED &&
1612 !VM_PAGE_Q_THROTTLED(iq)) {
1613 flow_control.state = FCS_IDLE;
1614 vm_pageout_scan_throttle_aborted++;
1615 goto consider_inactive;
1616 }
1617 }
1618 #if CONFIG_EMBEDDED
1619 {
1620 int percent_avail;
1621
1622 /*
1623 * Decide if we need to send a memory status notification.
1624 */
1625 percent_avail =
1626 (vm_page_active_count + vm_page_inactive_count +
1627 vm_page_speculative_count + vm_page_free_count +
1628 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
1629 atop_64(max_mem);
1630 if (percent_avail >= (kern_memorystatus_level + 5) ||
1631 percent_avail <= (kern_memorystatus_level - 5)) {
1632 kern_memorystatus_level = percent_avail;
1633 thread_wakeup((event_t)&kern_memorystatus_wakeup);
1634 }
1635 }
1636 #endif
1637 assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
1638 counter(c_vm_pageout_scan_block++);
1639
1640 vm_page_unlock_queues();
1641
1642 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
1643
1644 thread_block(THREAD_CONTINUE_NULL);
1645
1646 vm_page_lock_queues();
1647 delayed_unlock = 1;
1648
1649 iq->pgo_throttled = FALSE;
1650
1651 if (loop_count >= vm_page_inactive_count)
1652 loop_count = 0;
1653 inactive_burst_count = 0;
1654
1655 goto Restart;
1656 /*NOTREACHED*/
1657 }
1658
1659
1660 flow_control.state = FCS_IDLE;
1661 consider_inactive:
1662 loop_count++;
1663 inactive_burst_count++;
1664 vm_pageout_inactive++;
1665
1666 /* Choose a victim. */
1667
1668 while (1) {
1669 m = NULL;
1670
1671 if (IP_VALID(memory_manager_default)) {
1672 assert(vm_page_throttled_count == 0);
1673 assert(queue_empty(&vm_page_queue_throttled));
1674 }
1675
1676 /*
1677 * The most eligible pages are ones we paged in speculatively,
1678 * but which have not yet been touched.
1679 */
1680 if ( !queue_empty(&sq->age_q) ) {
1681 m = (vm_page_t) queue_first(&sq->age_q);
1682 break;
1683 }
1684 /*
1685 * Time for a zero-filled inactive page?
1686 */
1687 if ( ((zf_run_count < zf_ratio) && vm_zf_queue_count >= zf_queue_min_count) ||
1688 queue_empty(&vm_page_queue_inactive)) {
1689 if ( !queue_empty(&vm_page_queue_zf) ) {
1690 m = (vm_page_t) queue_first(&vm_page_queue_zf);
1691 zf_run_count++;
1692 break;
1693 }
1694 }
1695 /*
1696 * It's either a normal inactive page or nothing.
1697 */
1698 if ( !queue_empty(&vm_page_queue_inactive) ) {
1699 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1700 zf_run_count = 0;
1701 break;
1702 }
1703
1704 panic("vm_pageout: no victim");
1705 }
1706
1707 assert(!m->active && (m->inactive || m->speculative || m->throttled));
1708 assert(!m->laundry);
1709 assert(m->object != kernel_object);
1710 assert(m->phys_page != vm_page_guard_addr);
1711
1712 if (!m->speculative) {
1713 vm_pageout_stats[vm_pageout_stat_now].considered++;
1714 }
1715
1716 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1717
1718 /*
1719 * check to see if we currently are working
1720 * with the same object... if so, we've
1721 * already got the lock
1722 */
1723 if (m->object != object) {
1724 /*
1725 * the object associated with candidate page is
1726 * different from the one we were just working
1727 * with... dump the lock if we still own it
1728 */
1729 if (object != NULL) {
1730 vm_object_unlock(object);
1731 object = NULL;
1732 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1733 }
1734 /*
1735 * Try to lock object; since we've alread got the
1736 * page queues lock, we can only 'try' for this one.
1737 * if the 'try' fails, we need to do a mutex_pause
1738 * to allow the owner of the object lock a chance to
1739 * run... otherwise, we're likely to trip over this
1740 * object in the same state as we work our way through
1741 * the queue... clumps of pages associated with the same
1742 * object are fairly typical on the inactive and active queues
1743 */
1744 if (!vm_object_lock_try_scan(m->object)) {
1745 vm_pageout_inactive_nolock++;
1746
1747 requeue_page:
1748 /*
1749 * Move page to end and continue.
1750 * Don't re-issue ticket
1751 */
1752 if (m->zero_fill) {
1753 if (m->speculative) {
1754 panic("vm_pageout_scan(): page %p speculative and zero-fill !?\n", m);
1755 }
1756 assert(!m->speculative);
1757 queue_remove(&vm_page_queue_zf, m,
1758 vm_page_t, pageq);
1759 queue_enter(&vm_page_queue_zf, m,
1760 vm_page_t, pageq);
1761 } else if (m->speculative) {
1762 remque(&m->pageq);
1763 m->speculative = FALSE;
1764 vm_page_speculative_count--;
1765
1766 /*
1767 * move to the head of the inactive queue
1768 * to get it out of the way... the speculative
1769 * queue is generally too small to depend
1770 * on there being enough pages from other
1771 * objects to make cycling it back on the
1772 * same queue a winning proposition
1773 */
1774 queue_enter_first(&vm_page_queue_inactive, m,
1775 vm_page_t, pageq);
1776 m->inactive = TRUE;
1777 vm_page_inactive_count++;
1778 token_new_pagecount++;
1779 } else if (m->throttled) {
1780 queue_remove(&vm_page_queue_throttled, m,
1781 vm_page_t, pageq);
1782 m->throttled = FALSE;
1783 vm_page_throttled_count--;
1784
1785 /*
1786 * not throttled any more, so can stick
1787 * it on the inactive queue.
1788 */
1789 queue_enter(&vm_page_queue_inactive, m,
1790 vm_page_t, pageq);
1791 m->inactive = TRUE;
1792 vm_page_inactive_count++;
1793 token_new_pagecount++;
1794 } else {
1795 queue_remove(&vm_page_queue_inactive, m,
1796 vm_page_t, pageq);
1797 #if MACH_ASSERT
1798 vm_page_inactive_count--; /* balance for purgeable queue asserts */
1799 #endif
1800 vm_purgeable_q_advance_all();
1801
1802 queue_enter(&vm_page_queue_inactive, m,
1803 vm_page_t, pageq);
1804 #if MACH_ASSERT
1805 vm_page_inactive_count++; /* balance for purgeable queue asserts */
1806 #endif
1807 token_new_pagecount++;
1808 }
1809 pmap_clear_reference(m->phys_page);
1810 m->reference = FALSE;
1811
1812 if ( !queue_empty(&sq->age_q) )
1813 m = (vm_page_t) queue_first(&sq->age_q);
1814 else if ( ((zf_run_count < zf_ratio) && vm_zf_queue_count >= zf_queue_min_count) ||
1815 queue_empty(&vm_page_queue_inactive)) {
1816 if ( !queue_empty(&vm_page_queue_zf) )
1817 m = (vm_page_t) queue_first(&vm_page_queue_zf);
1818 } else if ( !queue_empty(&vm_page_queue_inactive) ) {
1819 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1820 }
1821 /*
1822 * this is the next object we're going to be interested in
1823 * try to make sure its available after the mutex_yield
1824 * returns control
1825 */
1826 vm_pageout_scan_wants_object = m->object;
1827
1828 /*
1829 * force us to dump any collected free pages
1830 * and to pause before moving on
1831 */
1832 try_failed = TRUE;
1833
1834 goto done_with_inactivepage;
1835 }
1836 object = m->object;
1837 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1838
1839 try_failed = FALSE;
1840 }
1841
1842 /*
1843 * Paging out pages of external objects which
1844 * are currently being created must be avoided.
1845 * The pager may claim for memory, thus leading to a
1846 * possible dead lock between it and the pageout thread,
1847 * if such pages are finally chosen. The remaining assumption
1848 * is that there will finally be enough available pages in the
1849 * inactive pool to page out in order to satisfy all memory
1850 * claimed by the thread which concurrently creates the pager.
1851 */
1852 if (!object->pager_initialized && object->pager_created) {
1853 /*
1854 * Move page to end and continue, hoping that
1855 * there will be enough other inactive pages to
1856 * page out so that the thread which currently
1857 * initializes the pager will succeed.
1858 * Don't re-grant the ticket, the page should
1859 * pulled from the queue and paged out whenever
1860 * one of its logically adjacent fellows is
1861 * targeted.
1862 */
1863 vm_pageout_inactive_avoid++;
1864 goto requeue_page;
1865 }
1866 /*
1867 * Remove the page from its list.
1868 */
1869 if (m->speculative) {
1870 remque(&m->pageq);
1871 page_prev_state = PAGE_STATE_SPECULATIVE;
1872 m->speculative = FALSE;
1873 vm_page_speculative_count--;
1874 } else if (m->throttled) {
1875 queue_remove(&vm_page_queue_throttled, m, vm_page_t, pageq);
1876 page_prev_state = PAGE_STATE_THROTTLED;
1877 m->throttled = FALSE;
1878 vm_page_throttled_count--;
1879 } else {
1880 if (m->zero_fill) {
1881 queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq);
1882 page_prev_state = PAGE_STATE_ZEROFILL;
1883 vm_zf_queue_count--;
1884 } else {
1885 page_prev_state = PAGE_STATE_INACTIVE;
1886 queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
1887 }
1888 m->inactive = FALSE;
1889 if (!m->fictitious)
1890 vm_page_inactive_count--;
1891 vm_purgeable_q_advance_all();
1892 }
1893
1894 m->pageq.next = NULL;
1895 m->pageq.prev = NULL;
1896
1897 if ( !m->fictitious && catch_up_count)
1898 catch_up_count--;
1899
1900 /*
1901 * ENCRYPTED SWAP:
1902 * if this page has already been picked up as part of a
1903 * page-out cluster, it will be busy because it is being
1904 * encrypted (see vm_object_upl_request()). But we still
1905 * want to demote it from "clean-in-place" (aka "adjacent")
1906 * to "clean-and-free" (aka "target"), so let's ignore its
1907 * "busy" bit here and proceed to check for "cleaning" a
1908 * little bit below...
1909 */
1910 if ( !m->encrypted_cleaning && (m->busy || !object->alive)) {
1911 /*
1912 * Somebody is already playing with this page.
1913 * Leave it off the pageout queues.
1914 *
1915 */
1916 vm_pageout_inactive_busy++;
1917
1918 goto done_with_inactivepage;
1919 }
1920
1921 /*
1922 * If it's absent or in error, we can reclaim the page.
1923 */
1924
1925 if (m->absent || m->error) {
1926 vm_pageout_inactive_absent++;
1927 reclaim_page:
1928 if (vm_pageout_deadlock_target) {
1929 vm_pageout_scan_inactive_throttle_success++;
1930 vm_pageout_deadlock_target--;
1931 }
1932
1933 DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
1934
1935 if (object->internal) {
1936 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
1937 } else {
1938 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
1939 }
1940 vm_page_free_prepare_queues(m);
1941
1942 /*
1943 * remove page from object here since we're already
1944 * behind the object lock... defer the rest of the work
1945 * we'd normally do in vm_page_free_prepare_object
1946 * until 'vm_page_free_list' is called
1947 */
1948 if (m->tabled)
1949 vm_page_remove(m, TRUE);
1950
1951 assert(m->pageq.next == NULL &&
1952 m->pageq.prev == NULL);
1953 m->pageq.next = (queue_entry_t)local_freeq;
1954 local_freeq = m;
1955 local_freed++;
1956
1957 inactive_burst_count = 0;
1958
1959 if(page_prev_state != PAGE_STATE_SPECULATIVE) {
1960 vm_pageout_stats[vm_pageout_stat_now].reclaimed++;
1961 page_prev_state = 0;
1962 }
1963
1964 goto done_with_inactivepage;
1965 }
1966
1967 assert(!m->private);
1968 assert(!m->fictitious);
1969
1970 /*
1971 * If already cleaning this page in place, convert from
1972 * "adjacent" to "target". We can leave the page mapped,
1973 * and vm_pageout_object_terminate will determine whether
1974 * to free or reactivate.
1975 */
1976
1977 if (m->cleaning) {
1978 m->busy = TRUE;
1979 m->pageout = TRUE;
1980 m->dump_cleaning = TRUE;
1981 vm_page_wire(m);
1982
1983 CLUSTER_STAT(vm_pageout_cluster_conversions++);
1984
1985 inactive_burst_count = 0;
1986
1987 goto done_with_inactivepage;
1988 }
1989
1990 /*
1991 * If the object is empty, the page must be reclaimed even
1992 * if dirty or used.
1993 * If the page belongs to a volatile object, we stick it back
1994 * on.
1995 */
1996 if (object->copy == VM_OBJECT_NULL) {
1997 if (object->purgable == VM_PURGABLE_EMPTY) {
1998 m->busy = TRUE;
1999 if (m->pmapped == TRUE) {
2000 /* unmap the page */
2001 refmod_state = pmap_disconnect(m->phys_page);
2002 if (refmod_state & VM_MEM_MODIFIED) {
2003 m->dirty = TRUE;
2004 }
2005 }
2006 if (m->dirty || m->precious) {
2007 /* we saved the cost of cleaning this page ! */
2008 vm_page_purged_count++;
2009 }
2010 goto reclaim_page;
2011 }
2012 if (object->purgable == VM_PURGABLE_VOLATILE) {
2013 /* if it's wired, we can't put it on our queue */
2014 assert(!VM_PAGE_WIRED(m));
2015 /* just stick it back on! */
2016 goto reactivate_page;
2017 }
2018 }
2019
2020 /*
2021 * If it's being used, reactivate.
2022 * (Fictitious pages are either busy or absent.)
2023 * First, update the reference and dirty bits
2024 * to make sure the page is unreferenced.
2025 */
2026 refmod_state = -1;
2027
2028 if (m->reference == FALSE && m->pmapped == TRUE) {
2029 refmod_state = pmap_get_refmod(m->phys_page);
2030
2031 if (refmod_state & VM_MEM_REFERENCED)
2032 m->reference = TRUE;
2033 if (refmod_state & VM_MEM_MODIFIED)
2034 m->dirty = TRUE;
2035 }
2036
2037 if (m->reference || m->dirty) {
2038 /* deal with a rogue "reusable" page */
2039 VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m);
2040 }
2041
2042 if (m->reference && !m->no_cache) {
2043 /*
2044 * The page we pulled off the inactive list has
2045 * been referenced. It is possible for other
2046 * processors to be touching pages faster than we
2047 * can clear the referenced bit and traverse the
2048 * inactive queue, so we limit the number of
2049 * reactivations.
2050 */
2051 if (++reactivated_this_call >= reactivate_limit) {
2052 vm_pageout_reactivation_limit_exceeded++;
2053 } else if (catch_up_count) {
2054 vm_pageout_catch_ups++;
2055 } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
2056 vm_pageout_inactive_force_reclaim++;
2057 } else {
2058 uint32_t isinuse;
2059 reactivate_page:
2060 if ( !object->internal && object->pager != MEMORY_OBJECT_NULL &&
2061 vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
2062 /*
2063 * no explict mappings of this object exist
2064 * and it's not open via the filesystem
2065 */
2066 vm_page_deactivate(m);
2067 vm_pageout_inactive_deactivated++;
2068 } else {
2069 /*
2070 * The page was/is being used, so put back on active list.
2071 */
2072 vm_page_activate(m);
2073 VM_STAT_INCR(reactivations);
2074 }
2075 vm_pageout_inactive_used++;
2076 inactive_burst_count = 0;
2077
2078 goto done_with_inactivepage;
2079 }
2080 /*
2081 * Make sure we call pmap_get_refmod() if it
2082 * wasn't already called just above, to update
2083 * the dirty bit.
2084 */
2085 if ((refmod_state == -1) && !m->dirty && m->pmapped) {
2086 refmod_state = pmap_get_refmod(m->phys_page);
2087 if (refmod_state & VM_MEM_MODIFIED)
2088 m->dirty = TRUE;
2089 }
2090 forced_reclaim = TRUE;
2091 } else {
2092 forced_reclaim = FALSE;
2093 }
2094
2095 XPR(XPR_VM_PAGEOUT,
2096 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
2097 object, m->offset, m, 0,0);
2098
2099 /*
2100 * we've got a candidate page to steal...
2101 *
2102 * m->dirty is up to date courtesy of the
2103 * preceding check for m->reference... if
2104 * we get here, then m->reference had to be
2105 * FALSE (or possibly "reactivate_limit" was
2106 * exceeded), but in either case we called
2107 * pmap_get_refmod() and updated both
2108 * m->reference and m->dirty
2109 *
2110 * if it's dirty or precious we need to
2111 * see if the target queue is throtttled
2112 * it if is, we need to skip over it by moving it back
2113 * to the end of the inactive queue
2114 */
2115
2116 inactive_throttled = FALSE;
2117
2118 if (m->dirty || m->precious) {
2119 if (object->internal) {
2120 if (VM_PAGE_Q_THROTTLED(iq))
2121 inactive_throttled = TRUE;
2122 } else if (VM_PAGE_Q_THROTTLED(eq)) {
2123 inactive_throttled = TRUE;
2124 }
2125 }
2126 if (inactive_throttled == TRUE) {
2127 throttle_inactive:
2128 if (!IP_VALID(memory_manager_default) &&
2129 object->internal && m->dirty &&
2130 (object->purgable == VM_PURGABLE_DENY ||
2131 object->purgable == VM_PURGABLE_NONVOLATILE ||
2132 object->purgable == VM_PURGABLE_VOLATILE)) {
2133 queue_enter(&vm_page_queue_throttled, m,
2134 vm_page_t, pageq);
2135 m->throttled = TRUE;
2136 vm_page_throttled_count++;
2137 } else {
2138 if (m->zero_fill) {
2139 queue_enter(&vm_page_queue_zf, m,
2140 vm_page_t, pageq);
2141 vm_zf_queue_count++;
2142 } else
2143 queue_enter(&vm_page_queue_inactive, m,
2144 vm_page_t, pageq);
2145 m->inactive = TRUE;
2146 if (!m->fictitious) {
2147 vm_page_inactive_count++;
2148 token_new_pagecount++;
2149 }
2150 }
2151 vm_pageout_scan_inactive_throttled++;
2152 goto done_with_inactivepage;
2153 }
2154
2155 /*
2156 * we've got a page that we can steal...
2157 * eliminate all mappings and make sure
2158 * we have the up-to-date modified state
2159 * first take the page BUSY, so that no new
2160 * mappings can be made
2161 */
2162 m->busy = TRUE;
2163
2164 /*
2165 * if we need to do a pmap_disconnect then we
2166 * need to re-evaluate m->dirty since the pmap_disconnect
2167 * provides the true state atomically... the
2168 * page was still mapped up to the pmap_disconnect
2169 * and may have been dirtied at the last microsecond
2170 *
2171 * we also check for the page being referenced 'late'
2172 * if it was, we first need to do a WAKEUP_DONE on it
2173 * since we already set m->busy = TRUE, before
2174 * going off to reactivate it
2175 *
2176 * Note that if 'pmapped' is FALSE then the page is not
2177 * and has not been in any map, so there is no point calling
2178 * pmap_disconnect(). m->dirty and/or m->reference could
2179 * have been set in anticipation of likely usage of the page.
2180 */
2181 if (m->pmapped == TRUE) {
2182 refmod_state = pmap_disconnect(m->phys_page);
2183
2184 if (refmod_state & VM_MEM_MODIFIED)
2185 m->dirty = TRUE;
2186 if (refmod_state & VM_MEM_REFERENCED) {
2187
2188 /* If m->reference is already set, this page must have
2189 * already failed the reactivate_limit test, so don't
2190 * bump the counts twice.
2191 */
2192 if ( ! m->reference ) {
2193 m->reference = TRUE;
2194 if (forced_reclaim ||
2195 ++reactivated_this_call >= reactivate_limit)
2196 vm_pageout_reactivation_limit_exceeded++;
2197 else {
2198 PAGE_WAKEUP_DONE(m);
2199 goto reactivate_page;
2200 }
2201 }
2202 }
2203 }
2204 /*
2205 * reset our count of pages that have been reclaimed
2206 * since the last page was 'stolen'
2207 */
2208 inactive_reclaim_run = 0;
2209
2210 /*
2211 * If it's clean and not precious, we can free the page.
2212 */
2213 if (!m->dirty && !m->precious) {
2214 if (m->zero_fill)
2215 vm_pageout_inactive_zf++;
2216 vm_pageout_inactive_clean++;
2217
2218 goto reclaim_page;
2219 }
2220
2221 /*
2222 * The page may have been dirtied since the last check
2223 * for a throttled target queue (which may have been skipped
2224 * if the page was clean then). With the dirty page
2225 * disconnected here, we can make one final check.
2226 */
2227 {
2228 boolean_t disconnect_throttled = FALSE;
2229 if (object->internal) {
2230 if (VM_PAGE_Q_THROTTLED(iq))
2231 disconnect_throttled = TRUE;
2232 } else if (VM_PAGE_Q_THROTTLED(eq)) {
2233 disconnect_throttled = TRUE;
2234 }
2235
2236 if (disconnect_throttled == TRUE) {
2237 PAGE_WAKEUP_DONE(m);
2238 goto throttle_inactive;
2239 }
2240 }
2241
2242 vm_pageout_stats[vm_pageout_stat_now].reclaimed++;
2243
2244 vm_pageout_cluster(m);
2245
2246 if (m->zero_fill)
2247 vm_pageout_inactive_zf++;
2248 vm_pageout_inactive_dirty++;
2249
2250 inactive_burst_count = 0;
2251
2252 done_with_inactivepage:
2253 if (delayed_unlock++ > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT || try_failed == TRUE) {
2254
2255 if (object != NULL) {
2256 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2257 vm_object_unlock(object);
2258 object = NULL;
2259 }
2260 if (local_freeq) {
2261 vm_page_unlock_queues();
2262 vm_page_free_list(local_freeq, TRUE);
2263
2264 local_freeq = NULL;
2265 local_freed = 0;
2266 vm_page_lock_queues();
2267 } else
2268 lck_mtx_yield(&vm_page_queue_lock);
2269
2270 delayed_unlock = 1;
2271 }
2272 /*
2273 * back to top of pageout scan loop
2274 */
2275 }
2276 }
2277
2278
2279 int vm_page_free_count_init;
2280
2281 void
2282 vm_page_free_reserve(
2283 int pages)
2284 {
2285 int free_after_reserve;
2286
2287 vm_page_free_reserved += pages;
2288
2289 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
2290
2291 vm_page_free_min = vm_page_free_reserved +
2292 VM_PAGE_FREE_MIN(free_after_reserve);
2293
2294 if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
2295 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
2296
2297 vm_page_free_target = vm_page_free_reserved +
2298 VM_PAGE_FREE_TARGET(free_after_reserve);
2299
2300 if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
2301 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
2302
2303 if (vm_page_free_target < vm_page_free_min + 5)
2304 vm_page_free_target = vm_page_free_min + 5;
2305
2306 vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 3);
2307 vm_page_creation_throttle = vm_page_free_target / 2;
2308 }
2309
2310 /*
2311 * vm_pageout is the high level pageout daemon.
2312 */
2313
2314 void
2315 vm_pageout_continue(void)
2316 {
2317 DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
2318 vm_pageout_scan_event_counter++;
2319 vm_pageout_scan();
2320 /* we hold vm_page_queue_free_lock now */
2321 assert(vm_page_free_wanted == 0);
2322 assert(vm_page_free_wanted_privileged == 0);
2323 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
2324 lck_mtx_unlock(&vm_page_queue_free_lock);
2325
2326 counter(c_vm_pageout_block++);
2327 thread_block((thread_continue_t)vm_pageout_continue);
2328 /*NOTREACHED*/
2329 }
2330
2331
2332 #ifdef FAKE_DEADLOCK
2333
2334 #define FAKE_COUNT 5000
2335
2336 int internal_count = 0;
2337 int fake_deadlock = 0;
2338
2339 #endif
2340
2341 static void
2342 vm_pageout_iothread_continue(struct vm_pageout_queue *q)
2343 {
2344 vm_page_t m = NULL;
2345 vm_object_t object;
2346 memory_object_t pager;
2347 thread_t self = current_thread();
2348
2349 if ((vm_pageout_internal_iothread != THREAD_NULL)
2350 && (self == vm_pageout_external_iothread )
2351 && (self->options & TH_OPT_VMPRIV))
2352 self->options &= ~TH_OPT_VMPRIV;
2353
2354 vm_page_lockspin_queues();
2355
2356 while ( !queue_empty(&q->pgo_pending) ) {
2357
2358 q->pgo_busy = TRUE;
2359 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
2360 VM_PAGE_CHECK(m);
2361 m->pageout_queue = FALSE;
2362 m->pageq.next = NULL;
2363 m->pageq.prev = NULL;
2364 vm_page_unlock_queues();
2365
2366 #ifdef FAKE_DEADLOCK
2367 if (q == &vm_pageout_queue_internal) {
2368 vm_offset_t addr;
2369 int pg_count;
2370
2371 internal_count++;
2372
2373 if ((internal_count == FAKE_COUNT)) {
2374
2375 pg_count = vm_page_free_count + vm_page_free_reserved;
2376
2377 if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) {
2378 kmem_free(kernel_map, addr, PAGE_SIZE * pg_count);
2379 }
2380 internal_count = 0;
2381 fake_deadlock++;
2382 }
2383 }
2384 #endif
2385 object = m->object;
2386
2387 vm_object_lock(object);
2388
2389 if (!object->pager_initialized) {
2390
2391 /*
2392 * If there is no memory object for the page, create
2393 * one and hand it to the default pager.
2394 */
2395
2396 if (!object->pager_initialized)
2397 vm_object_collapse(object,
2398 (vm_object_offset_t) 0,
2399 TRUE);
2400 if (!object->pager_initialized)
2401 vm_object_pager_create(object);
2402 if (!object->pager_initialized) {
2403 /*
2404 * Still no pager for the object.
2405 * Reactivate the page.
2406 *
2407 * Should only happen if there is no
2408 * default pager.
2409 */
2410 vm_page_lockspin_queues();
2411
2412 vm_pageout_queue_steal(m, TRUE);
2413 vm_pageout_dirty_no_pager++;
2414 vm_page_activate(m);
2415
2416 vm_page_unlock_queues();
2417
2418 /*
2419 * And we are done with it.
2420 */
2421 PAGE_WAKEUP_DONE(m);
2422
2423 vm_object_paging_end(object);
2424 vm_object_unlock(object);
2425
2426 vm_page_lockspin_queues();
2427 continue;
2428 }
2429 }
2430 pager = object->pager;
2431 if (pager == MEMORY_OBJECT_NULL) {
2432 /*
2433 * This pager has been destroyed by either
2434 * memory_object_destroy or vm_object_destroy, and
2435 * so there is nowhere for the page to go.
2436 */
2437 if (m->pageout) {
2438 /*
2439 * Just free the page... VM_PAGE_FREE takes
2440 * care of cleaning up all the state...
2441 * including doing the vm_pageout_throttle_up
2442 */
2443 VM_PAGE_FREE(m);
2444 } else {
2445 vm_page_lockspin_queues();
2446
2447 vm_pageout_queue_steal(m, TRUE);
2448 vm_page_activate(m);
2449
2450 vm_page_unlock_queues();
2451
2452 /*
2453 * And we are done with it.
2454 */
2455 PAGE_WAKEUP_DONE(m);
2456 }
2457 vm_object_paging_end(object);
2458 vm_object_unlock(object);
2459
2460 vm_page_lockspin_queues();
2461 continue;
2462 }
2463 VM_PAGE_CHECK(m);
2464 vm_object_unlock(object);
2465 /*
2466 * we expect the paging_in_progress reference to have
2467 * already been taken on the object before it was added
2468 * to the appropriate pageout I/O queue... this will
2469 * keep the object from being terminated and/or the
2470 * paging_offset from changing until the I/O has
2471 * completed... therefore no need to lock the object to
2472 * pull the paging_offset from it.
2473 *
2474 * Send the data to the pager.
2475 * any pageout clustering happens there
2476 */
2477 memory_object_data_return(pager,
2478 m->offset + object->paging_offset,
2479 PAGE_SIZE,
2480 NULL,
2481 NULL,
2482 FALSE,
2483 FALSE,
2484 0);
2485
2486 vm_object_lock(object);
2487 vm_object_paging_end(object);
2488 vm_object_unlock(object);
2489
2490 vm_page_lockspin_queues();
2491 }
2492 assert_wait((event_t) q, THREAD_UNINT);
2493
2494 if (q->pgo_throttled == TRUE && !VM_PAGE_Q_THROTTLED(q)) {
2495 q->pgo_throttled = FALSE;
2496 thread_wakeup((event_t) &q->pgo_laundry);
2497 }
2498 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
2499 q->pgo_draining = FALSE;
2500 thread_wakeup((event_t) (&q->pgo_laundry+1));
2501 }
2502 q->pgo_busy = FALSE;
2503 q->pgo_idle = TRUE;
2504 vm_page_unlock_queues();
2505
2506 thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) &q->pgo_pending);
2507 /*NOTREACHED*/
2508 }
2509
2510
2511 static void
2512 vm_pageout_iothread_external(void)
2513 {
2514 thread_t self = current_thread();
2515
2516 self->options |= TH_OPT_VMPRIV;
2517
2518 vm_pageout_iothread_continue(&vm_pageout_queue_external);
2519 /*NOTREACHED*/
2520 }
2521
2522
2523 static void
2524 vm_pageout_iothread_internal(void)
2525 {
2526 thread_t self = current_thread();
2527
2528 self->options |= TH_OPT_VMPRIV;
2529
2530 vm_pageout_iothread_continue(&vm_pageout_queue_internal);
2531 /*NOTREACHED*/
2532 }
2533
2534 kern_return_t
2535 vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
2536 {
2537 if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) {
2538 return KERN_SUCCESS;
2539 } else {
2540 return KERN_FAILURE; /* Already set */
2541 }
2542 }
2543
2544 static void
2545 vm_pageout_garbage_collect(int collect)
2546 {
2547 if (collect) {
2548 boolean_t buf_large_zfree = FALSE;
2549 stack_collect();
2550
2551 /*
2552 * consider_zone_gc should be last, because the other operations
2553 * might return memory to zones.
2554 */
2555 consider_machine_collect();
2556 if (consider_buffer_cache_collect != NULL) {
2557 buf_large_zfree = (*consider_buffer_cache_collect)(0);
2558 }
2559 consider_zone_gc(buf_large_zfree);
2560
2561 consider_machine_adjust();
2562 }
2563
2564 assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
2565
2566 thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
2567 /*NOTREACHED*/
2568 }
2569
2570
2571
2572 void
2573 vm_pageout(void)
2574 {
2575 thread_t self = current_thread();
2576 thread_t thread;
2577 kern_return_t result;
2578 spl_t s;
2579
2580 /*
2581 * Set thread privileges.
2582 */
2583 s = splsched();
2584 thread_lock(self);
2585 self->priority = BASEPRI_PREEMPT - 1;
2586 set_sched_pri(self, self->priority);
2587 thread_unlock(self);
2588
2589 if (!self->reserved_stack)
2590 self->reserved_stack = self->kernel_stack;
2591
2592 splx(s);
2593
2594 /*
2595 * Initialize some paging parameters.
2596 */
2597
2598 if (vm_pageout_idle_wait == 0)
2599 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
2600
2601 if (vm_pageout_burst_wait == 0)
2602 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
2603
2604 if (vm_pageout_empty_wait == 0)
2605 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
2606
2607 if (vm_pageout_deadlock_wait == 0)
2608 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
2609
2610 if (vm_pageout_deadlock_relief == 0)
2611 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
2612
2613 if (vm_pageout_inactive_relief == 0)
2614 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
2615
2616 if (vm_pageout_burst_active_throttle == 0)
2617 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
2618
2619 if (vm_pageout_burst_inactive_throttle == 0)
2620 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
2621
2622 /*
2623 * Set kernel task to low backing store privileged
2624 * status
2625 */
2626 task_lock(kernel_task);
2627 kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
2628 task_unlock(kernel_task);
2629
2630 vm_page_free_count_init = vm_page_free_count;
2631
2632 /*
2633 * even if we've already called vm_page_free_reserve
2634 * call it again here to insure that the targets are
2635 * accurately calculated (it uses vm_page_free_count_init)
2636 * calling it with an arg of 0 will not change the reserve
2637 * but will re-calculate free_min and free_target
2638 */
2639 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
2640 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
2641 } else
2642 vm_page_free_reserve(0);
2643
2644
2645 queue_init(&vm_pageout_queue_external.pgo_pending);
2646 vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2647 vm_pageout_queue_external.pgo_laundry = 0;
2648 vm_pageout_queue_external.pgo_idle = FALSE;
2649 vm_pageout_queue_external.pgo_busy = FALSE;
2650 vm_pageout_queue_external.pgo_throttled = FALSE;
2651 vm_pageout_queue_external.pgo_draining = FALSE;
2652
2653 queue_init(&vm_pageout_queue_internal.pgo_pending);
2654 vm_pageout_queue_internal.pgo_maxlaundry = 0;
2655 vm_pageout_queue_internal.pgo_laundry = 0;
2656 vm_pageout_queue_internal.pgo_idle = FALSE;
2657 vm_pageout_queue_internal.pgo_busy = FALSE;
2658 vm_pageout_queue_internal.pgo_throttled = FALSE;
2659 vm_pageout_queue_internal.pgo_draining = FALSE;
2660
2661
2662 /* internal pageout thread started when default pager registered first time */
2663 /* external pageout and garbage collection threads started here */
2664
2665 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
2666 BASEPRI_PREEMPT - 1,
2667 &vm_pageout_external_iothread);
2668 if (result != KERN_SUCCESS)
2669 panic("vm_pageout_iothread_external: create failed");
2670
2671 thread_deallocate(vm_pageout_external_iothread);
2672
2673 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
2674 MINPRI_KERNEL,
2675 &thread);
2676 if (result != KERN_SUCCESS)
2677 panic("vm_pageout_garbage_collect: create failed");
2678
2679 thread_deallocate(thread);
2680
2681 vm_object_reaper_init();
2682
2683
2684 vm_pageout_continue();
2685
2686 /*
2687 * Unreached code!
2688 *
2689 * The vm_pageout_continue() call above never returns, so the code below is never
2690 * executed. We take advantage of this to declare several DTrace VM related probe
2691 * points that our kernel doesn't have an analog for. These are probe points that
2692 * exist in Solaris and are in the DTrace documentation, so people may have written
2693 * scripts that use them. Declaring the probe points here means their scripts will
2694 * compile and execute which we want for portability of the scripts, but since this
2695 * section of code is never reached, the probe points will simply never fire. Yes,
2696 * this is basically a hack. The problem is the DTrace probe points were chosen with
2697 * Solaris specific VM events in mind, not portability to different VM implementations.
2698 */
2699
2700 DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
2701 DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
2702 DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
2703 DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
2704 DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
2705 DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
2706 DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
2707 /*NOTREACHED*/
2708 }
2709
2710 kern_return_t
2711 vm_pageout_internal_start(void)
2712 {
2713 kern_return_t result;
2714
2715 vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2716 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, NULL, BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread);
2717 if (result == KERN_SUCCESS)
2718 thread_deallocate(vm_pageout_internal_iothread);
2719 return result;
2720 }
2721
2722
2723 /*
2724 * when marshalling pages into a UPL and subsequently committing
2725 * or aborting them, it is necessary to hold
2726 * the vm_page_queue_lock (a hot global lock) for certain operations
2727 * on the page... however, the majority of the work can be done
2728 * while merely holding the object lock... in fact there are certain
2729 * collections of pages that don't require any work brokered by the
2730 * vm_page_queue_lock... to mitigate the time spent behind the global
2731 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
2732 * while doing all of the work that doesn't require the vm_page_queue_lock...
2733 * then call dw_do_work to acquire the vm_page_queue_lock and do the
2734 * necessary work for each page... we will grab the busy bit on the page
2735 * if it's not already held so that dw_do_work can drop the object lock
2736 * if it can't immediately take the vm_page_queue_lock in order to compete
2737 * for the locks in the same order that vm_pageout_scan takes them.
2738 * the operation names are modeled after the names of the routines that
2739 * need to be called in order to make the changes very obvious in the
2740 * original loop
2741 */
2742
2743 #define DELAYED_WORK_LIMIT 32
2744
2745 #define DW_vm_page_unwire 0x01
2746 #define DW_vm_page_wire 0x02
2747 #define DW_vm_page_free 0x04
2748 #define DW_vm_page_activate 0x08
2749 #define DW_vm_page_deactivate_internal 0x10
2750 #define DW_vm_page_speculate 0x20
2751 #define DW_vm_page_lru 0x40
2752 #define DW_vm_pageout_throttle_up 0x80
2753 #define DW_PAGE_WAKEUP 0x100
2754 #define DW_clear_busy 0x200
2755 #define DW_clear_reference 0x400
2756 #define DW_set_reference 0x800
2757
2758 struct dw {
2759 vm_page_t dw_m;
2760 int dw_mask;
2761 };
2762
2763
2764 static void dw_do_work(vm_object_t object, struct dw *dwp, int dw_count);
2765
2766
2767
2768 static upl_t
2769 upl_create(int type, int flags, upl_size_t size)
2770 {
2771 upl_t upl;
2772 int page_field_size = 0;
2773 int upl_flags = 0;
2774 int upl_size = sizeof(struct upl);
2775
2776 size = round_page_32(size);
2777
2778 if (type & UPL_CREATE_LITE) {
2779 page_field_size = (atop(size) + 7) >> 3;
2780 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2781
2782 upl_flags |= UPL_LITE;
2783 }
2784 if (type & UPL_CREATE_INTERNAL) {
2785 upl_size += (int) sizeof(struct upl_page_info) * atop(size);
2786
2787 upl_flags |= UPL_INTERNAL;
2788 }
2789 upl = (upl_t)kalloc(upl_size + page_field_size);
2790
2791 if (page_field_size)
2792 bzero((char *)upl + upl_size, page_field_size);
2793
2794 upl->flags = upl_flags | flags;
2795 upl->src_object = NULL;
2796 upl->kaddr = (vm_offset_t)0;
2797 upl->size = 0;
2798 upl->map_object = NULL;
2799 upl->ref_count = 1;
2800 upl->highest_page = 0;
2801 upl_lock_init(upl);
2802 upl->vector_upl = NULL;
2803 #if UPL_DEBUG
2804 upl->ubc_alias1 = 0;
2805 upl->ubc_alias2 = 0;
2806
2807 upl->upl_creator = current_thread();
2808 upl->upl_state = 0;
2809 upl->upl_commit_index = 0;
2810 bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records));
2811
2812 (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES);
2813 #endif /* UPL_DEBUG */
2814
2815 return(upl);
2816 }
2817
2818 static void
2819 upl_destroy(upl_t upl)
2820 {
2821 int page_field_size; /* bit field in word size buf */
2822 int size;
2823
2824 #if UPL_DEBUG
2825 {
2826 vm_object_t object;
2827
2828 if (upl->flags & UPL_SHADOWED) {
2829 object = upl->map_object->shadow;
2830 } else {
2831 object = upl->map_object;
2832 }
2833 vm_object_lock(object);
2834 queue_remove(&object->uplq, upl, upl_t, uplq);
2835 vm_object_unlock(object);
2836 }
2837 #endif /* UPL_DEBUG */
2838 /*
2839 * drop a reference on the map_object whether or
2840 * not a pageout object is inserted
2841 */
2842 if (upl->flags & UPL_SHADOWED)
2843 vm_object_deallocate(upl->map_object);
2844
2845 if (upl->flags & UPL_DEVICE_MEMORY)
2846 size = PAGE_SIZE;
2847 else
2848 size = upl->size;
2849 page_field_size = 0;
2850
2851 if (upl->flags & UPL_LITE) {
2852 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2853 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2854 }
2855 upl_lock_destroy(upl);
2856 upl->vector_upl = (vector_upl_t) 0xfeedbeef;
2857 if (upl->flags & UPL_INTERNAL) {
2858 kfree(upl,
2859 sizeof(struct upl) +
2860 (sizeof(struct upl_page_info) * (size/PAGE_SIZE))
2861 + page_field_size);
2862 } else {
2863 kfree(upl, sizeof(struct upl) + page_field_size);
2864 }
2865 }
2866
2867 void uc_upl_dealloc(upl_t upl);
2868 __private_extern__ void
2869 uc_upl_dealloc(upl_t upl)
2870 {
2871 if (--upl->ref_count == 0)
2872 upl_destroy(upl);
2873 }
2874
2875 void
2876 upl_deallocate(upl_t upl)
2877 {
2878 if (--upl->ref_count == 0) {
2879 if(vector_upl_is_valid(upl))
2880 vector_upl_deallocate(upl);
2881 upl_destroy(upl);
2882 }
2883 }
2884
2885 #if DEVELOPMENT || DEBUG
2886 /*/*
2887 * Statistics about UPL enforcement of copy-on-write obligations.
2888 */
2889 unsigned long upl_cow = 0;
2890 unsigned long upl_cow_again = 0;
2891 unsigned long upl_cow_pages = 0;
2892 unsigned long upl_cow_again_pages = 0;
2893
2894 unsigned long iopl_cow = 0;
2895 unsigned long iopl_cow_pages = 0;
2896 #endif
2897
2898 /*
2899 * Routine: vm_object_upl_request
2900 * Purpose:
2901 * Cause the population of a portion of a vm_object.
2902 * Depending on the nature of the request, the pages
2903 * returned may be contain valid data or be uninitialized.
2904 * A page list structure, listing the physical pages
2905 * will be returned upon request.
2906 * This function is called by the file system or any other
2907 * supplier of backing store to a pager.
2908 * IMPORTANT NOTE: The caller must still respect the relationship
2909 * between the vm_object and its backing memory object. The
2910 * caller MUST NOT substitute changes in the backing file
2911 * without first doing a memory_object_lock_request on the
2912 * target range unless it is know that the pages are not
2913 * shared with another entity at the pager level.
2914 * Copy_in_to:
2915 * if a page list structure is present
2916 * return the mapped physical pages, where a
2917 * page is not present, return a non-initialized
2918 * one. If the no_sync bit is turned on, don't
2919 * call the pager unlock to synchronize with other
2920 * possible copies of the page. Leave pages busy
2921 * in the original object, if a page list structure
2922 * was specified. When a commit of the page list
2923 * pages is done, the dirty bit will be set for each one.
2924 * Copy_out_from:
2925 * If a page list structure is present, return
2926 * all mapped pages. Where a page does not exist
2927 * map a zero filled one. Leave pages busy in
2928 * the original object. If a page list structure
2929 * is not specified, this call is a no-op.
2930 *
2931 * Note: access of default pager objects has a rather interesting
2932 * twist. The caller of this routine, presumably the file system
2933 * page cache handling code, will never actually make a request
2934 * against a default pager backed object. Only the default
2935 * pager will make requests on backing store related vm_objects
2936 * In this way the default pager can maintain the relationship
2937 * between backing store files (abstract memory objects) and
2938 * the vm_objects (cache objects), they support.
2939 *
2940 */
2941
2942 __private_extern__ kern_return_t
2943 vm_object_upl_request(
2944 vm_object_t object,
2945 vm_object_offset_t offset,
2946 upl_size_t size,
2947 upl_t *upl_ptr,
2948 upl_page_info_array_t user_page_list,
2949 unsigned int *page_list_count,
2950 int cntrl_flags)
2951 {
2952 vm_page_t dst_page = VM_PAGE_NULL;
2953 vm_object_offset_t dst_offset;
2954 upl_size_t xfer_size;
2955 boolean_t dirty;
2956 boolean_t hw_dirty;
2957 upl_t upl = NULL;
2958 unsigned int entry;
2959 #if MACH_CLUSTER_STATS
2960 boolean_t encountered_lrp = FALSE;
2961 #endif
2962 vm_page_t alias_page = NULL;
2963 int refmod_state = 0;
2964 wpl_array_t lite_list = NULL;
2965 vm_object_t last_copy_object;
2966 struct dw dw_array[DELAYED_WORK_LIMIT];
2967 struct dw *dwp;
2968 int dw_count;
2969
2970 if (cntrl_flags & ~UPL_VALID_FLAGS) {
2971 /*
2972 * For forward compatibility's sake,
2973 * reject any unknown flag.
2974 */
2975 return KERN_INVALID_VALUE;
2976 }
2977 if ( (!object->internal) && (object->paging_offset != 0) )
2978 panic("vm_object_upl_request: external object with non-zero paging offset\n");
2979 if (object->phys_contiguous)
2980 panic("vm_object_upl_request: contiguous object specified\n");
2981
2982
2983 if ((size / PAGE_SIZE) > MAX_UPL_SIZE)
2984 size = MAX_UPL_SIZE * PAGE_SIZE;
2985
2986 if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
2987 *page_list_count = MAX_UPL_SIZE;
2988
2989 if (cntrl_flags & UPL_SET_INTERNAL) {
2990 if (cntrl_flags & UPL_SET_LITE) {
2991
2992 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, 0, size);
2993
2994 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
2995 lite_list = (wpl_array_t)
2996 (((uintptr_t)user_page_list) +
2997 ((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
2998 if (size == 0) {
2999 user_page_list = NULL;
3000 lite_list = NULL;
3001 }
3002 } else {
3003 upl = upl_create(UPL_CREATE_INTERNAL, 0, size);
3004
3005 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
3006 if (size == 0) {
3007 user_page_list = NULL;
3008 }
3009 }
3010 } else {
3011 if (cntrl_flags & UPL_SET_LITE) {
3012
3013 upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE, 0, size);
3014
3015 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
3016 if (size == 0) {
3017 lite_list = NULL;
3018 }
3019 } else {
3020 upl = upl_create(UPL_CREATE_EXTERNAL, 0, size);
3021 }
3022 }
3023 *upl_ptr = upl;
3024
3025 if (user_page_list)
3026 user_page_list[0].device = FALSE;
3027
3028 if (cntrl_flags & UPL_SET_LITE) {
3029 upl->map_object = object;
3030 } else {
3031 upl->map_object = vm_object_allocate(size);
3032 /*
3033 * No neeed to lock the new object: nobody else knows
3034 * about it yet, so it's all ours so far.
3035 */
3036 upl->map_object->shadow = object;
3037 upl->map_object->pageout = TRUE;
3038 upl->map_object->can_persist = FALSE;
3039 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
3040 upl->map_object->shadow_offset = offset;
3041 upl->map_object->wimg_bits = object->wimg_bits;
3042
3043 VM_PAGE_GRAB_FICTITIOUS(alias_page);
3044
3045 upl->flags |= UPL_SHADOWED;
3046 }
3047 /*
3048 * ENCRYPTED SWAP:
3049 * Just mark the UPL as "encrypted" here.
3050 * We'll actually encrypt the pages later,
3051 * in upl_encrypt(), when the caller has
3052 * selected which pages need to go to swap.
3053 */
3054 if (cntrl_flags & UPL_ENCRYPT)
3055 upl->flags |= UPL_ENCRYPTED;
3056
3057 if (cntrl_flags & UPL_FOR_PAGEOUT)
3058 upl->flags |= UPL_PAGEOUT;
3059
3060 vm_object_lock(object);
3061 vm_object_activity_begin(object);
3062
3063 /*
3064 * we can lock in the paging_offset once paging_in_progress is set
3065 */
3066 upl->size = size;
3067 upl->offset = offset + object->paging_offset;
3068
3069 #if UPL_DEBUG
3070 queue_enter(&object->uplq, upl, upl_t, uplq);
3071 #endif /* UPL_DEBUG */
3072
3073 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
3074 /*
3075 * Honor copy-on-write obligations
3076 *
3077 * The caller is gathering these pages and
3078 * might modify their contents. We need to
3079 * make sure that the copy object has its own
3080 * private copies of these pages before we let
3081 * the caller modify them.
3082 */
3083 vm_object_update(object,
3084 offset,
3085 size,
3086 NULL,
3087 NULL,
3088 FALSE, /* should_return */
3089 MEMORY_OBJECT_COPY_SYNC,
3090 VM_PROT_NO_CHANGE);
3091 #if DEVELOPMENT || DEBUG
3092 upl_cow++;
3093 upl_cow_pages += size >> PAGE_SHIFT;
3094 #endif
3095 }
3096 /*
3097 * remember which copy object we synchronized with
3098 */
3099 last_copy_object = object->copy;
3100 entry = 0;
3101
3102 xfer_size = size;
3103 dst_offset = offset;
3104
3105 dwp = &dw_array[0];
3106 dw_count = 0;
3107
3108 while (xfer_size) {
3109
3110 dwp->dw_mask = 0;
3111
3112 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
3113 vm_object_unlock(object);
3114 VM_PAGE_GRAB_FICTITIOUS(alias_page);
3115 vm_object_lock(object);
3116 }
3117 if (cntrl_flags & UPL_COPYOUT_FROM) {
3118 upl->flags |= UPL_PAGE_SYNC_DONE;
3119
3120 if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
3121 dst_page->fictitious ||
3122 dst_page->absent ||
3123 dst_page->error ||
3124 (VM_PAGE_WIRED(dst_page) && !dst_page->pageout && !dst_page->list_req_pending)) {
3125
3126 if (user_page_list)
3127 user_page_list[entry].phys_addr = 0;
3128
3129 goto try_next_page;
3130 }
3131 /*
3132 * grab this up front...
3133 * a high percentange of the time we're going to
3134 * need the hardware modification state a bit later
3135 * anyway... so we can eliminate an extra call into
3136 * the pmap layer by grabbing it here and recording it
3137 */
3138 if (dst_page->pmapped)
3139 refmod_state = pmap_get_refmod(dst_page->phys_page);
3140 else
3141 refmod_state = 0;
3142
3143 if ( (refmod_state & VM_MEM_REFERENCED) && dst_page->inactive ) {
3144 /*
3145 * page is on inactive list and referenced...
3146 * reactivate it now... this gets it out of the
3147 * way of vm_pageout_scan which would have to
3148 * reactivate it upon tripping over it
3149 */
3150 dwp->dw_mask |= DW_vm_page_activate;
3151 }
3152 if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
3153 /*
3154 * we're only asking for DIRTY pages to be returned
3155 */
3156 if (dst_page->list_req_pending || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
3157 /*
3158 * if we were the page stolen by vm_pageout_scan to be
3159 * cleaned (as opposed to a buddy being clustered in
3160 * or this request is not being driven by a PAGEOUT cluster
3161 * then we only need to check for the page being dirty or
3162 * precious to decide whether to return it
3163 */
3164 if (dst_page->dirty || dst_page->precious || (refmod_state & VM_MEM_MODIFIED))
3165 goto check_busy;
3166 goto dont_return;
3167 }
3168 /*
3169 * this is a request for a PAGEOUT cluster and this page
3170 * is merely along for the ride as a 'buddy'... not only
3171 * does it have to be dirty to be returned, but it also
3172 * can't have been referenced recently... note that we've
3173 * already filtered above based on whether this page is
3174 * currently on the inactive queue or it meets the page
3175 * ticket (generation count) check
3176 */
3177 if ( (cntrl_flags & UPL_CLEAN_IN_PLACE || !(refmod_state & VM_MEM_REFERENCED)) &&
3178 ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) {
3179 goto check_busy;
3180 }
3181 dont_return:
3182 /*
3183 * if we reach here, we're not to return
3184 * the page... go on to the next one
3185 */
3186 if (user_page_list)
3187 user_page_list[entry].phys_addr = 0;
3188
3189 goto try_next_page;
3190 }
3191 check_busy:
3192 if (dst_page->busy && (!(dst_page->list_req_pending && (dst_page->pageout || dst_page->cleaning)))) {
3193 if (cntrl_flags & UPL_NOBLOCK) {
3194 if (user_page_list)
3195 user_page_list[entry].phys_addr = 0;
3196
3197 goto try_next_page;
3198 }
3199 /*
3200 * someone else is playing with the
3201 * page. We will have to wait.
3202 */
3203 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
3204
3205 continue;
3206 }
3207 /*
3208 * Someone else already cleaning the page?
3209 */
3210 if ((dst_page->cleaning || dst_page->absent || VM_PAGE_WIRED(dst_page)) && !dst_page->list_req_pending) {
3211 if (user_page_list)
3212 user_page_list[entry].phys_addr = 0;
3213
3214 goto try_next_page;
3215 }
3216 /*
3217 * ENCRYPTED SWAP:
3218 * The caller is gathering this page and might
3219 * access its contents later on. Decrypt the
3220 * page before adding it to the UPL, so that
3221 * the caller never sees encrypted data.
3222 */
3223 if (! (cntrl_flags & UPL_ENCRYPT) && dst_page->encrypted) {
3224 int was_busy;
3225
3226 /*
3227 * save the current state of busy
3228 * mark page as busy while decrypt
3229 * is in progress since it will drop
3230 * the object lock...
3231 */
3232 was_busy = dst_page->busy;
3233 dst_page->busy = TRUE;
3234
3235 vm_page_decrypt(dst_page, 0);
3236 vm_page_decrypt_for_upl_counter++;
3237 /*
3238 * restore to original busy state
3239 */
3240 dst_page->busy = was_busy;
3241 }
3242 if (dst_page->pageout_queue == TRUE) {
3243
3244 vm_page_lockspin_queues();
3245
3246 #if CONFIG_EMBEDDED
3247 if (dst_page->laundry)
3248 #else
3249 if (dst_page->pageout_queue == TRUE)
3250 #endif
3251 {
3252 /*
3253 * we've buddied up a page for a clustered pageout
3254 * that has already been moved to the pageout
3255 * queue by pageout_scan... we need to remove
3256 * it from the queue and drop the laundry count
3257 * on that queue
3258 */
3259 vm_pageout_throttle_up(dst_page);
3260 }
3261 vm_page_unlock_queues();
3262 }
3263 #if MACH_CLUSTER_STATS
3264 /*
3265 * pageout statistics gathering. count
3266 * all the pages we will page out that
3267 * were not counted in the initial
3268 * vm_pageout_scan work
3269 */
3270 if (dst_page->list_req_pending)
3271 encountered_lrp = TRUE;
3272 if ((dst_page->dirty || (dst_page->object->internal && dst_page->precious)) && !dst_page->list_req_pending) {
3273 if (encountered_lrp)
3274 CLUSTER_STAT(pages_at_higher_offsets++;)
3275 else
3276 CLUSTER_STAT(pages_at_lower_offsets++;)
3277 }
3278 #endif
3279 /*
3280 * Turn off busy indication on pending
3281 * pageout. Note: we can only get here
3282 * in the request pending case.
3283 */
3284 dst_page->list_req_pending = FALSE;
3285 dst_page->busy = FALSE;
3286
3287 hw_dirty = refmod_state & VM_MEM_MODIFIED;
3288 dirty = hw_dirty ? TRUE : dst_page->dirty;
3289
3290 if (dst_page->phys_page > upl->highest_page)
3291 upl->highest_page = dst_page->phys_page;
3292
3293 if (cntrl_flags & UPL_SET_LITE) {
3294 unsigned int pg_num;
3295
3296 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
3297 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
3298 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
3299
3300 if (hw_dirty)
3301 pmap_clear_modify(dst_page->phys_page);
3302
3303 /*
3304 * Mark original page as cleaning
3305 * in place.
3306 */
3307 dst_page->cleaning = TRUE;
3308 dst_page->precious = FALSE;
3309 } else {
3310 /*
3311 * use pageclean setup, it is more
3312 * convenient even for the pageout
3313 * cases here
3314 */
3315 vm_object_lock(upl->map_object);
3316 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
3317 vm_object_unlock(upl->map_object);
3318
3319 alias_page->absent = FALSE;
3320 alias_page = NULL;
3321 }
3322 #if MACH_PAGEMAP
3323 /*
3324 * Record that this page has been
3325 * written out
3326 */
3327 vm_external_state_set(object->existence_map, dst_page->offset);
3328 #endif /*MACH_PAGEMAP*/
3329 dst_page->dirty = dirty;
3330
3331 if (!dirty)
3332 dst_page->precious = TRUE;
3333
3334 if (dst_page->pageout)
3335 dst_page->busy = TRUE;
3336
3337 if ( (cntrl_flags & UPL_ENCRYPT) ) {
3338 /*
3339 * ENCRYPTED SWAP:
3340 * We want to deny access to the target page
3341 * because its contents are about to be
3342 * encrypted and the user would be very
3343 * confused to see encrypted data instead
3344 * of their data.
3345 * We also set "encrypted_cleaning" to allow
3346 * vm_pageout_scan() to demote that page
3347 * from "adjacent/clean-in-place" to
3348 * "target/clean-and-free" if it bumps into
3349 * this page during its scanning while we're
3350 * still processing this cluster.
3351 */
3352 dst_page->busy = TRUE;
3353 dst_page->encrypted_cleaning = TRUE;
3354 }
3355 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
3356 /*
3357 * deny access to the target page
3358 * while it is being worked on
3359 */
3360 if ((!dst_page->pageout) && ( !VM_PAGE_WIRED(dst_page))) {
3361 dst_page->busy = TRUE;
3362 dst_page->pageout = TRUE;
3363
3364 dwp->dw_mask |= DW_vm_page_wire;
3365 }
3366 }
3367 } else {
3368 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
3369 /*
3370 * Honor copy-on-write obligations
3371 *
3372 * The copy object has changed since we
3373 * last synchronized for copy-on-write.
3374 * Another copy object might have been
3375 * inserted while we released the object's
3376 * lock. Since someone could have seen the
3377 * original contents of the remaining pages
3378 * through that new object, we have to
3379 * synchronize with it again for the remaining
3380 * pages only. The previous pages are "busy"
3381 * so they can not be seen through the new
3382 * mapping. The new mapping will see our
3383 * upcoming changes for those previous pages,
3384 * but that's OK since they couldn't see what
3385 * was there before. It's just a race anyway
3386 * and there's no guarantee of consistency or
3387 * atomicity. We just don't want new mappings
3388 * to see both the *before* and *after* pages.
3389 */
3390 if (object->copy != VM_OBJECT_NULL) {
3391 vm_object_update(
3392 object,
3393 dst_offset,/* current offset */
3394 xfer_size, /* remaining size */
3395 NULL,
3396 NULL,
3397 FALSE, /* should_return */
3398 MEMORY_OBJECT_COPY_SYNC,
3399 VM_PROT_NO_CHANGE);
3400
3401 #if DEVELOPMENT || DEBUG
3402 upl_cow_again++;
3403 upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
3404 #endif
3405 }
3406 /*
3407 * remember the copy object we synced with
3408 */
3409 last_copy_object = object->copy;
3410 }
3411 dst_page = vm_page_lookup(object, dst_offset);
3412
3413 if (dst_page != VM_PAGE_NULL) {
3414
3415 if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
3416
3417 if ( !(dst_page->absent && dst_page->list_req_pending) ) {
3418 /*
3419 * skip over pages already present in the cache
3420 */
3421 if (user_page_list)
3422 user_page_list[entry].phys_addr = 0;
3423
3424 goto try_next_page;
3425 }
3426 }
3427 if ( !(dst_page->list_req_pending) ) {
3428
3429 if (dst_page->cleaning) {
3430 /*
3431 * someone else is writing to the page... wait...
3432 */
3433 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
3434
3435 continue;
3436 }
3437 } else {
3438 if (dst_page->fictitious &&
3439 dst_page->phys_page == vm_page_fictitious_addr) {
3440 assert( !dst_page->speculative);
3441 /*
3442 * dump the fictitious page
3443 */
3444 dst_page->list_req_pending = FALSE;
3445
3446 VM_PAGE_FREE(dst_page);
3447
3448 dst_page = NULL;
3449
3450 } else if (dst_page->absent) {
3451 /*
3452 * the default_pager case
3453 */
3454 dst_page->list_req_pending = FALSE;
3455 dst_page->busy = FALSE;
3456
3457 } else if (dst_page->pageout || dst_page->cleaning) {
3458 /*
3459 * page was earmarked by vm_pageout_scan
3460 * to be cleaned and stolen... we're going
3461 * to take it back since we are not attempting
3462 * to read that page and we don't want to stall
3463 * waiting for it to be cleaned for 2 reasons...
3464 * 1 - no use paging it out and back in
3465 * 2 - if we stall, we may casue a deadlock in
3466 * the FS trying to acquire the its locks
3467 * on the VNOP_PAGEOUT path presuming that
3468 * those locks are already held on the read
3469 * path before trying to create this UPL
3470 *
3471 * so undo all of the state that vm_pageout_scan
3472 * hung on this page
3473 */
3474 dst_page->busy = FALSE;
3475
3476 vm_pageout_queue_steal(dst_page, FALSE);
3477 }
3478 }
3479 }
3480 if (dst_page == VM_PAGE_NULL) {
3481 if (object->private) {
3482 /*
3483 * This is a nasty wrinkle for users
3484 * of upl who encounter device or
3485 * private memory however, it is
3486 * unavoidable, only a fault can
3487 * resolve the actual backing
3488 * physical page by asking the
3489 * backing device.
3490 */
3491 if (user_page_list)
3492 user_page_list[entry].phys_addr = 0;
3493
3494 goto try_next_page;
3495 }
3496 /*
3497 * need to allocate a page
3498 */
3499 dst_page = vm_page_grab();
3500
3501 if (dst_page == VM_PAGE_NULL) {
3502 if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
3503 /*
3504 * we don't want to stall waiting for pages to come onto the free list
3505 * while we're already holding absent pages in this UPL
3506 * the caller will deal with the empty slots
3507 */
3508 if (user_page_list)
3509 user_page_list[entry].phys_addr = 0;
3510
3511 goto try_next_page;
3512 }
3513 /*
3514 * no pages available... wait
3515 * then try again for the same
3516 * offset...
3517 */
3518 vm_object_unlock(object);
3519 VM_PAGE_WAIT();
3520 vm_object_lock(object);
3521
3522 continue;
3523 }
3524 vm_page_insert(dst_page, object, dst_offset);
3525
3526 dst_page->absent = TRUE;
3527 dst_page->busy = FALSE;
3528
3529 if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
3530 /*
3531 * if UPL_RET_ONLY_ABSENT was specified,
3532 * than we're definitely setting up a
3533 * upl for a clustered read/pagein
3534 * operation... mark the pages as clustered
3535 * so upl_commit_range can put them on the
3536 * speculative list
3537 */
3538 dst_page->clustered = TRUE;
3539 }
3540 }
3541 if (dst_page->fictitious) {
3542 panic("need corner case for fictitious page");
3543 }
3544 if (dst_page->busy) {
3545 /*
3546 * someone else is playing with the
3547 * page. We will have to wait.
3548 */
3549 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
3550
3551 continue;
3552 }
3553 /*
3554 * ENCRYPTED SWAP:
3555 */
3556 if (cntrl_flags & UPL_ENCRYPT) {
3557 /*
3558 * The page is going to be encrypted when we
3559 * get it from the pager, so mark it so.
3560 */
3561 dst_page->encrypted = TRUE;
3562 } else {
3563 /*
3564 * Otherwise, the page will not contain
3565 * encrypted data.
3566 */
3567 dst_page->encrypted = FALSE;
3568 }
3569 dst_page->overwriting = TRUE;
3570
3571 if (dst_page->pmapped) {
3572 if ( !(cntrl_flags & UPL_FILE_IO))
3573 /*
3574 * eliminate all mappings from the
3575 * original object and its prodigy
3576 */
3577 refmod_state = pmap_disconnect(dst_page->phys_page);
3578 else
3579 refmod_state = pmap_get_refmod(dst_page->phys_page);
3580 } else
3581 refmod_state = 0;
3582
3583 hw_dirty = refmod_state & VM_MEM_MODIFIED;
3584 dirty = hw_dirty ? TRUE : dst_page->dirty;
3585
3586 if (cntrl_flags & UPL_SET_LITE) {
3587 unsigned int pg_num;
3588
3589 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
3590 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
3591 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
3592
3593 if (hw_dirty)
3594 pmap_clear_modify(dst_page->phys_page);
3595
3596 /*
3597 * Mark original page as cleaning
3598 * in place.
3599 */
3600 dst_page->cleaning = TRUE;
3601 dst_page->precious = FALSE;
3602 } else {
3603 /*
3604 * use pageclean setup, it is more
3605 * convenient even for the pageout
3606 * cases here
3607 */
3608 vm_object_lock(upl->map_object);
3609 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
3610 vm_object_unlock(upl->map_object);
3611
3612 alias_page->absent = FALSE;
3613 alias_page = NULL;
3614 }
3615
3616 if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
3617 /*
3618 * clean in place for read implies
3619 * that a write will be done on all
3620 * the pages that are dirty before
3621 * a upl commit is done. The caller
3622 * is obligated to preserve the
3623 * contents of all pages marked dirty
3624 */
3625 upl->flags |= UPL_CLEAR_DIRTY;
3626 }
3627 dst_page->dirty = dirty;
3628
3629 if (!dirty)
3630 dst_page->precious = TRUE;
3631
3632 if ( !VM_PAGE_WIRED(dst_page)) {
3633 /*
3634 * deny access to the target page while
3635 * it is being worked on
3636 */
3637 dst_page->busy = TRUE;
3638 } else
3639 dwp->dw_mask |= DW_vm_page_wire;
3640
3641 /*
3642 * We might be about to satisfy a fault which has been
3643 * requested. So no need for the "restart" bit.
3644 */
3645 dst_page->restart = FALSE;
3646 if (!dst_page->absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
3647 /*
3648 * expect the page to be used
3649 */
3650 dwp->dw_mask |= DW_set_reference;
3651 }
3652 dst_page->precious = (cntrl_flags & UPL_PRECIOUS) ? TRUE : FALSE;
3653 }
3654 if (dst_page->busy)
3655 upl->flags |= UPL_HAS_BUSY;
3656
3657 if (dst_page->phys_page > upl->highest_page)
3658 upl->highest_page = dst_page->phys_page;
3659 if (user_page_list) {
3660 user_page_list[entry].phys_addr = dst_page->phys_page;
3661 user_page_list[entry].pageout = dst_page->pageout;
3662 user_page_list[entry].absent = dst_page->absent;
3663 user_page_list[entry].dirty = dst_page->dirty;
3664 user_page_list[entry].precious = dst_page->precious;
3665 user_page_list[entry].device = FALSE;
3666 if (dst_page->clustered == TRUE)
3667 user_page_list[entry].speculative = dst_page->speculative;
3668 else
3669 user_page_list[entry].speculative = FALSE;
3670 user_page_list[entry].cs_validated = dst_page->cs_validated;
3671 user_page_list[entry].cs_tainted = dst_page->cs_tainted;
3672 }
3673 /*
3674 * if UPL_RET_ONLY_ABSENT is set, then
3675 * we are working with a fresh page and we've
3676 * just set the clustered flag on it to
3677 * indicate that it was drug in as part of a
3678 * speculative cluster... so leave it alone
3679 */
3680 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
3681 /*
3682 * someone is explicitly grabbing this page...
3683 * update clustered and speculative state
3684 *
3685 */
3686 VM_PAGE_CONSUME_CLUSTERED(dst_page);
3687 }
3688 try_next_page:
3689 if (dwp->dw_mask) {
3690 if (dwp->dw_mask & DW_vm_page_activate)
3691 VM_STAT_INCR(reactivations);
3692
3693 if (dst_page->busy == FALSE) {
3694 /*
3695 * dw_do_work may need to drop the object lock
3696 * if it does, we need the pages it's looking at to
3697 * be held stable via the busy bit.
3698 */
3699 dst_page->busy = TRUE;
3700 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
3701 }
3702 dwp->dw_m = dst_page;
3703 dwp++;
3704 dw_count++;
3705
3706 if (dw_count >= DELAYED_WORK_LIMIT) {
3707 dw_do_work(object, &dw_array[0], dw_count);
3708
3709 dwp = &dw_array[0];
3710 dw_count = 0;
3711 }
3712 }
3713 entry++;
3714 dst_offset += PAGE_SIZE_64;
3715 xfer_size -= PAGE_SIZE;
3716 }
3717 if (dw_count)
3718 dw_do_work(object, &dw_array[0], dw_count);
3719
3720 if (alias_page != NULL) {
3721 VM_PAGE_FREE(alias_page);
3722 }
3723
3724 if (page_list_count != NULL) {
3725 if (upl->flags & UPL_INTERNAL)
3726 *page_list_count = 0;
3727 else if (*page_list_count > entry)
3728 *page_list_count = entry;
3729 }
3730 #if UPL_DEBUG
3731 upl->upl_state = 1;
3732 #endif
3733 vm_object_unlock(object);
3734
3735 return KERN_SUCCESS;
3736 }
3737
3738 /* JMM - Backward compatability for now */
3739 kern_return_t
3740 vm_fault_list_request( /* forward */
3741 memory_object_control_t control,
3742 vm_object_offset_t offset,
3743 upl_size_t size,
3744 upl_t *upl_ptr,
3745 upl_page_info_t **user_page_list_ptr,
3746 unsigned int page_list_count,
3747 int cntrl_flags);
3748 kern_return_t
3749 vm_fault_list_request(
3750 memory_object_control_t control,
3751 vm_object_offset_t offset,
3752 upl_size_t size,
3753 upl_t *upl_ptr,
3754 upl_page_info_t **user_page_list_ptr,
3755 unsigned int page_list_count,
3756 int cntrl_flags)
3757 {
3758 unsigned int local_list_count;
3759 upl_page_info_t *user_page_list;
3760 kern_return_t kr;
3761
3762 if((cntrl_flags & UPL_VECTOR)==UPL_VECTOR)
3763 return KERN_INVALID_ARGUMENT;
3764
3765 if (user_page_list_ptr != NULL) {
3766 local_list_count = page_list_count;
3767 user_page_list = *user_page_list_ptr;
3768 } else {
3769 local_list_count = 0;
3770 user_page_list = NULL;
3771 }
3772 kr = memory_object_upl_request(control,
3773 offset,
3774 size,
3775 upl_ptr,
3776 user_page_list,
3777 &local_list_count,
3778 cntrl_flags);
3779
3780 if(kr != KERN_SUCCESS)
3781 return kr;
3782
3783 if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) {
3784 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr);
3785 }
3786
3787 return KERN_SUCCESS;
3788 }
3789
3790
3791
3792 /*
3793 * Routine: vm_object_super_upl_request
3794 * Purpose:
3795 * Cause the population of a portion of a vm_object
3796 * in much the same way as memory_object_upl_request.
3797 * Depending on the nature of the request, the pages
3798 * returned may be contain valid data or be uninitialized.
3799 * However, the region may be expanded up to the super
3800 * cluster size provided.
3801 */
3802
3803 __private_extern__ kern_return_t
3804 vm_object_super_upl_request(
3805 vm_object_t object,
3806 vm_object_offset_t offset,
3807 upl_size_t size,
3808 upl_size_t super_cluster,
3809 upl_t *upl,
3810 upl_page_info_t *user_page_list,
3811 unsigned int *page_list_count,
3812 int cntrl_flags)
3813 {
3814 if (object->paging_offset > offset || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR))
3815 return KERN_FAILURE;
3816
3817 assert(object->paging_in_progress);
3818 offset = offset - object->paging_offset;
3819
3820 if (super_cluster > size) {
3821
3822 vm_object_offset_t base_offset;
3823 upl_size_t super_size;
3824 vm_object_size_t super_size_64;
3825
3826 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
3827 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
3828 super_size_64 = ((base_offset + super_size) > object->size) ? (object->size - base_offset) : super_size;
3829 super_size = (upl_size_t) super_size_64;
3830 assert(super_size == super_size_64);
3831
3832 if (offset > (base_offset + super_size)) {
3833 panic("vm_object_super_upl_request: Missed target pageout"
3834 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
3835 offset, base_offset, super_size, super_cluster,
3836 size, object->paging_offset);
3837 }
3838 /*
3839 * apparently there is a case where the vm requests a
3840 * page to be written out who's offset is beyond the
3841 * object size
3842 */
3843 if ((offset + size) > (base_offset + super_size)) {
3844 super_size_64 = (offset + size) - base_offset;
3845 super_size = (upl_size_t) super_size_64;
3846 assert(super_size == super_size_64);
3847 }
3848
3849 offset = base_offset;
3850 size = super_size;
3851 }
3852 return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags);
3853 }
3854
3855
3856 kern_return_t
3857 vm_map_create_upl(
3858 vm_map_t map,
3859 vm_map_address_t offset,
3860 upl_size_t *upl_size,
3861 upl_t *upl,
3862 upl_page_info_array_t page_list,
3863 unsigned int *count,
3864 int *flags)
3865 {
3866 vm_map_entry_t entry;
3867 int caller_flags;
3868 int force_data_sync;
3869 int sync_cow_data;
3870 vm_object_t local_object;
3871 vm_map_offset_t local_offset;
3872 vm_map_offset_t local_start;
3873 kern_return_t ret;
3874
3875 caller_flags = *flags;
3876
3877 if (caller_flags & ~UPL_VALID_FLAGS) {
3878 /*
3879 * For forward compatibility's sake,
3880 * reject any unknown flag.
3881 */
3882 return KERN_INVALID_VALUE;
3883 }
3884 force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
3885 sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
3886
3887 if (upl == NULL)
3888 return KERN_INVALID_ARGUMENT;
3889
3890 REDISCOVER_ENTRY:
3891 vm_map_lock_read(map);
3892
3893 if (vm_map_lookup_entry(map, offset, &entry)) {
3894
3895 if ((entry->vme_end - offset) < *upl_size) {
3896 *upl_size = (upl_size_t) (entry->vme_end - offset);
3897 assert(*upl_size == entry->vme_end - offset);
3898 }
3899
3900 if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
3901 *flags = 0;
3902
3903 if ( !entry->is_sub_map && entry->object.vm_object != VM_OBJECT_NULL) {
3904 if (entry->object.vm_object->private)
3905 *flags = UPL_DEV_MEMORY;
3906
3907 if (entry->object.vm_object->phys_contiguous)
3908 *flags |= UPL_PHYS_CONTIG;
3909 }
3910 vm_map_unlock_read(map);
3911
3912 return KERN_SUCCESS;
3913 }
3914 if (entry->object.vm_object == VM_OBJECT_NULL || !entry->object.vm_object->phys_contiguous) {
3915 if ((*upl_size/PAGE_SIZE) > MAX_UPL_SIZE)
3916 *upl_size = MAX_UPL_SIZE * PAGE_SIZE;
3917 }
3918 /*
3919 * Create an object if necessary.
3920 */
3921 if (entry->object.vm_object == VM_OBJECT_NULL) {
3922
3923 if (vm_map_lock_read_to_write(map))
3924 goto REDISCOVER_ENTRY;
3925
3926 entry->object.vm_object = vm_object_allocate((vm_size_t)(entry->vme_end - entry->vme_start));
3927 entry->offset = 0;
3928
3929 vm_map_lock_write_to_read(map);
3930 }
3931 if (!(caller_flags & UPL_COPYOUT_FROM)) {
3932 if (!(entry->protection & VM_PROT_WRITE)) {
3933 vm_map_unlock_read(map);
3934 return KERN_PROTECTION_FAILURE;
3935 }
3936 if (entry->needs_copy) {
3937 /*
3938 * Honor copy-on-write for COPY_SYMMETRIC
3939 * strategy.
3940 */
3941 vm_map_t local_map;
3942 vm_object_t object;
3943 vm_object_offset_t new_offset;
3944 vm_prot_t prot;
3945 boolean_t wired;
3946 vm_map_version_t version;
3947 vm_map_t real_map;
3948
3949 local_map = map;
3950
3951 if (vm_map_lookup_locked(&local_map,
3952 offset, VM_PROT_WRITE,
3953 OBJECT_LOCK_EXCLUSIVE,
3954 &version, &object,
3955 &new_offset, &prot, &wired,
3956 NULL,
3957 &real_map) != KERN_SUCCESS) {
3958 vm_map_unlock_read(local_map);
3959 return KERN_FAILURE;
3960 }
3961 if (real_map != map)
3962 vm_map_unlock(real_map);
3963 vm_map_unlock_read(local_map);
3964
3965 vm_object_unlock(object);
3966
3967 goto REDISCOVER_ENTRY;
3968 }
3969 }
3970 if (entry->is_sub_map) {
3971 vm_map_t submap;
3972
3973 submap = entry->object.sub_map;
3974 local_start = entry->vme_start;
3975 local_offset = entry->offset;
3976
3977 vm_map_reference(submap);
3978 vm_map_unlock_read(map);
3979
3980 ret = vm_map_create_upl(submap,
3981 local_offset + (offset - local_start),
3982 upl_size, upl, page_list, count, flags);
3983 vm_map_deallocate(submap);
3984
3985 return ret;
3986 }
3987 if (sync_cow_data) {
3988 if (entry->object.vm_object->shadow || entry->object.vm_object->copy) {
3989 local_object = entry->object.vm_object;
3990 local_start = entry->vme_start;
3991 local_offset = entry->offset;
3992
3993 vm_object_reference(local_object);
3994 vm_map_unlock_read(map);
3995
3996 if (local_object->shadow && local_object->copy) {
3997 vm_object_lock_request(
3998 local_object->shadow,
3999 (vm_object_offset_t)
4000 ((offset - local_start) +
4001 local_offset) +
4002 local_object->shadow_offset,
4003 *upl_size, FALSE,
4004 MEMORY_OBJECT_DATA_SYNC,
4005 VM_PROT_NO_CHANGE);
4006 }
4007 sync_cow_data = FALSE;
4008 vm_object_deallocate(local_object);
4009
4010 goto REDISCOVER_ENTRY;
4011 }
4012 }
4013 if (force_data_sync) {
4014 local_object = entry->object.vm_object;
4015 local_start = entry->vme_start;
4016 local_offset = entry->offset;
4017
4018 vm_object_reference(local_object);
4019 vm_map_unlock_read(map);
4020
4021 vm_object_lock_request(
4022 local_object,
4023 (vm_object_offset_t)
4024 ((offset - local_start) + local_offset),
4025 (vm_object_size_t)*upl_size, FALSE,
4026 MEMORY_OBJECT_DATA_SYNC,
4027 VM_PROT_NO_CHANGE);
4028
4029 force_data_sync = FALSE;
4030 vm_object_deallocate(local_object);
4031
4032 goto REDISCOVER_ENTRY;
4033 }
4034 if (entry->object.vm_object->private)
4035 *flags = UPL_DEV_MEMORY;
4036 else
4037 *flags = 0;
4038
4039 if (entry->object.vm_object->phys_contiguous)
4040 *flags |= UPL_PHYS_CONTIG;
4041
4042 local_object = entry->object.vm_object;
4043 local_offset = entry->offset;
4044 local_start = entry->vme_start;
4045
4046 vm_object_reference(local_object);
4047 vm_map_unlock_read(map);
4048
4049 ret = vm_object_iopl_request(local_object,
4050 (vm_object_offset_t) ((offset - local_start) + local_offset),
4051 *upl_size,
4052 upl,
4053 page_list,
4054 count,
4055 caller_flags);
4056 vm_object_deallocate(local_object);
4057
4058 return(ret);
4059 }
4060 vm_map_unlock_read(map);
4061
4062 return(KERN_FAILURE);
4063 }
4064
4065 /*
4066 * Internal routine to enter a UPL into a VM map.
4067 *
4068 * JMM - This should just be doable through the standard
4069 * vm_map_enter() API.
4070 */
4071 kern_return_t
4072 vm_map_enter_upl(
4073 vm_map_t map,
4074 upl_t upl,
4075 vm_map_offset_t *dst_addr)
4076 {
4077 vm_map_size_t size;
4078 vm_object_offset_t offset;
4079 vm_map_offset_t addr;
4080 vm_page_t m;
4081 kern_return_t kr;
4082 int isVectorUPL = 0, curr_upl=0;
4083 upl_t vector_upl = NULL;
4084 vm_offset_t vector_upl_dst_addr = 0;
4085 vm_map_t vector_upl_submap = NULL;
4086 upl_offset_t subupl_offset = 0;
4087 upl_size_t subupl_size = 0;
4088
4089 if (upl == UPL_NULL)
4090 return KERN_INVALID_ARGUMENT;
4091
4092 if((isVectorUPL = vector_upl_is_valid(upl))) {
4093 int mapped=0,valid_upls=0;
4094 vector_upl = upl;
4095
4096 upl_lock(vector_upl);
4097 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
4098 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
4099 if(upl == NULL)
4100 continue;
4101 valid_upls++;
4102 if (UPL_PAGE_LIST_MAPPED & upl->flags)
4103 mapped++;
4104 }
4105
4106 if(mapped) {
4107 if(mapped != valid_upls)
4108 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls);
4109 else {
4110 upl_unlock(vector_upl);
4111 return KERN_FAILURE;
4112 }
4113 }
4114
4115 kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE, VM_FLAGS_ANYWHERE, &vector_upl_submap);
4116 if( kr != KERN_SUCCESS )
4117 panic("Vector UPL submap allocation failed\n");
4118 map = vector_upl_submap;
4119 vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
4120 curr_upl=0;
4121 }
4122 else
4123 upl_lock(upl);
4124
4125 process_upl_to_enter:
4126 if(isVectorUPL){
4127 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
4128 *dst_addr = vector_upl_dst_addr;
4129 upl_unlock(vector_upl);
4130 return KERN_SUCCESS;
4131 }
4132 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
4133 if(upl == NULL)
4134 goto process_upl_to_enter;
4135 vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
4136 *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
4137 } else {
4138 /*
4139 * check to see if already mapped
4140 */
4141 if (UPL_PAGE_LIST_MAPPED & upl->flags) {
4142 upl_unlock(upl);
4143 return KERN_FAILURE;
4144 }
4145 }
4146 if ((!(upl->flags & UPL_SHADOWED)) &&
4147 ((upl->flags & UPL_HAS_BUSY) ||
4148 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) {
4149
4150 vm_object_t object;
4151 vm_page_t alias_page;
4152 vm_object_offset_t new_offset;
4153 unsigned int pg_num;
4154 wpl_array_t lite_list;
4155
4156 if (upl->flags & UPL_INTERNAL) {
4157 lite_list = (wpl_array_t)
4158 ((((uintptr_t)upl) + sizeof(struct upl))
4159 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4160 } else {
4161 lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
4162 }
4163 object = upl->map_object;
4164 upl->map_object = vm_object_allocate(upl->size);
4165
4166 vm_object_lock(upl->map_object);
4167
4168 upl->map_object->shadow = object;
4169 upl->map_object->pageout = TRUE;
4170 upl->map_object->can_persist = FALSE;
4171 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
4172 upl->map_object->shadow_offset = upl->offset - object->paging_offset;
4173 upl->map_object->wimg_bits = object->wimg_bits;
4174 offset = upl->map_object->shadow_offset;
4175 new_offset = 0;
4176 size = upl->size;
4177
4178 upl->flags |= UPL_SHADOWED;
4179
4180 while (size) {
4181 pg_num = (unsigned int) (new_offset / PAGE_SIZE);
4182 assert(pg_num == new_offset / PAGE_SIZE);
4183
4184 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4185
4186 VM_PAGE_GRAB_FICTITIOUS(alias_page);
4187
4188 vm_object_lock(object);
4189
4190 m = vm_page_lookup(object, offset);
4191 if (m == VM_PAGE_NULL) {
4192 panic("vm_upl_map: page missing\n");
4193 }
4194
4195 /*
4196 * Convert the fictitious page to a private
4197 * shadow of the real page.
4198 */
4199 assert(alias_page->fictitious);
4200 alias_page->fictitious = FALSE;
4201 alias_page->private = TRUE;
4202 alias_page->pageout = TRUE;
4203 /*
4204 * since m is a page in the upl it must
4205 * already be wired or BUSY, so it's
4206 * safe to assign the underlying physical
4207 * page to the alias
4208 */
4209 alias_page->phys_page = m->phys_page;
4210
4211 vm_object_unlock(object);
4212
4213 vm_page_lockspin_queues();
4214 vm_page_wire(alias_page);
4215 vm_page_unlock_queues();
4216
4217 /*
4218 * ENCRYPTED SWAP:
4219 * The virtual page ("m") has to be wired in some way
4220 * here or its physical page ("m->phys_page") could
4221 * be recycled at any time.
4222 * Assuming this is enforced by the caller, we can't
4223 * get an encrypted page here. Since the encryption
4224 * key depends on the VM page's "pager" object and
4225 * the "paging_offset", we couldn't handle 2 pageable
4226 * VM pages (with different pagers and paging_offsets)
4227 * sharing the same physical page: we could end up
4228 * encrypting with one key (via one VM page) and
4229 * decrypting with another key (via the alias VM page).
4230 */
4231 ASSERT_PAGE_DECRYPTED(m);
4232
4233 vm_page_insert(alias_page, upl->map_object, new_offset);
4234
4235 assert(!alias_page->wanted);
4236 alias_page->busy = FALSE;
4237 alias_page->absent = FALSE;
4238 }
4239 size -= PAGE_SIZE;
4240 offset += PAGE_SIZE_64;
4241 new_offset += PAGE_SIZE_64;
4242 }
4243 vm_object_unlock(upl->map_object);
4244 }
4245 if (upl->flags & UPL_SHADOWED)
4246 offset = 0;
4247 else
4248 offset = upl->offset - upl->map_object->paging_offset;
4249 size = upl->size;
4250
4251 vm_object_reference(upl->map_object);
4252
4253 if(!isVectorUPL) {
4254 *dst_addr = 0;
4255 /*
4256 * NEED A UPL_MAP ALIAS
4257 */
4258 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
4259 VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE,
4260 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
4261
4262 if (kr != KERN_SUCCESS) {
4263 upl_unlock(upl);
4264 return(kr);
4265 }
4266 }
4267 else {
4268 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
4269 VM_FLAGS_FIXED, upl->map_object, offset, FALSE,
4270 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
4271 if(kr)
4272 panic("vm_map_enter failed for a Vector UPL\n");
4273 }
4274 vm_object_lock(upl->map_object);
4275
4276 for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
4277 m = vm_page_lookup(upl->map_object, offset);
4278
4279 if (m) {
4280 unsigned int cache_attr;
4281 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
4282
4283 m->pmapped = TRUE;
4284
4285 /* CODE SIGNING ENFORCEMENT: page has been wpmapped,
4286 * but only in kernel space. If this was on a user map,
4287 * we'd have to set the wpmapped bit. */
4288 /* m->wpmapped = TRUE; */
4289 assert(map==kernel_map);
4290
4291 PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, cache_attr, TRUE);
4292 }
4293 offset += PAGE_SIZE_64;
4294 }
4295 vm_object_unlock(upl->map_object);
4296
4297 /*
4298 * hold a reference for the mapping
4299 */
4300 upl->ref_count++;
4301 upl->flags |= UPL_PAGE_LIST_MAPPED;
4302 upl->kaddr = (vm_offset_t) *dst_addr;
4303 assert(upl->kaddr == *dst_addr);
4304
4305 if(isVectorUPL)
4306 goto process_upl_to_enter;
4307
4308 upl_unlock(upl);
4309
4310 return KERN_SUCCESS;
4311 }
4312
4313 /*
4314 * Internal routine to remove a UPL mapping from a VM map.
4315 *
4316 * XXX - This should just be doable through a standard
4317 * vm_map_remove() operation. Otherwise, implicit clean-up
4318 * of the target map won't be able to correctly remove
4319 * these (and release the reference on the UPL). Having
4320 * to do this means we can't map these into user-space
4321 * maps yet.
4322 */
4323 kern_return_t
4324 vm_map_remove_upl(
4325 vm_map_t map,
4326 upl_t upl)
4327 {
4328 vm_address_t addr;
4329 upl_size_t size;
4330 int isVectorUPL = 0, curr_upl = 0;
4331 upl_t vector_upl = NULL;
4332
4333 if (upl == UPL_NULL)
4334 return KERN_INVALID_ARGUMENT;
4335
4336 if((isVectorUPL = vector_upl_is_valid(upl))) {
4337 int unmapped=0, valid_upls=0;
4338 vector_upl = upl;
4339 upl_lock(vector_upl);
4340 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
4341 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
4342 if(upl == NULL)
4343 continue;
4344 valid_upls++;
4345 if (!(UPL_PAGE_LIST_MAPPED & upl->flags))
4346 unmapped++;
4347 }
4348
4349 if(unmapped) {
4350 if(unmapped != valid_upls)
4351 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls);
4352 else {
4353 upl_unlock(vector_upl);
4354 return KERN_FAILURE;
4355 }
4356 }
4357 curr_upl=0;
4358 }
4359 else
4360 upl_lock(upl);
4361
4362 process_upl_to_remove:
4363 if(isVectorUPL) {
4364 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
4365 vm_map_t v_upl_submap;
4366 vm_offset_t v_upl_submap_dst_addr;
4367 vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
4368
4369 vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_NO_FLAGS);
4370 vm_map_deallocate(v_upl_submap);
4371 upl_unlock(vector_upl);
4372 return KERN_SUCCESS;
4373 }
4374
4375 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
4376 if(upl == NULL)
4377 goto process_upl_to_remove;
4378 }
4379
4380 if (upl->flags & UPL_PAGE_LIST_MAPPED) {
4381 addr = upl->kaddr;
4382 size = upl->size;
4383
4384 assert(upl->ref_count > 1);
4385 upl->ref_count--; /* removing mapping ref */
4386
4387 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
4388 upl->kaddr = (vm_offset_t) 0;
4389
4390 if(!isVectorUPL) {
4391 upl_unlock(upl);
4392
4393 vm_map_remove(map,
4394 vm_map_trunc_page(addr),
4395 vm_map_round_page(addr + size),
4396 VM_MAP_NO_FLAGS);
4397
4398 return KERN_SUCCESS;
4399 }
4400 else {
4401 /*
4402 * If it's a Vectored UPL, we'll be removing the entire
4403 * submap anyways, so no need to remove individual UPL
4404 * element mappings from within the submap
4405 */
4406 goto process_upl_to_remove;
4407 }
4408 }
4409 upl_unlock(upl);
4410
4411 return KERN_FAILURE;
4412 }
4413
4414 static void
4415 dw_do_work(
4416 vm_object_t object,
4417 struct dw *dwp,
4418 int dw_count)
4419 {
4420 int j;
4421 boolean_t held_as_spin = TRUE;
4422
4423 /*
4424 * pageout_scan takes the vm_page_lock_queues first
4425 * then tries for the object lock... to avoid what
4426 * is effectively a lock inversion, we'll go to the
4427 * trouble of taking them in that same order... otherwise
4428 * if this object contains the majority of the pages resident
4429 * in the UBC (or a small set of large objects actively being
4430 * worked on contain the majority of the pages), we could
4431 * cause the pageout_scan thread to 'starve' in its attempt
4432 * to find pages to move to the free queue, since it has to
4433 * successfully acquire the object lock of any candidate page
4434 * before it can steal/clean it.
4435 */
4436 if (!vm_page_trylockspin_queues()) {
4437 vm_object_unlock(object);
4438
4439 vm_page_lockspin_queues();
4440
4441 for (j = 0; ; j++) {
4442 if (!vm_object_lock_avoid(object) &&
4443 _vm_object_lock_try(object))
4444 break;
4445 vm_page_unlock_queues();
4446 mutex_pause(j);
4447 vm_page_lockspin_queues();
4448 }
4449 }
4450 for (j = 0; j < dw_count; j++, dwp++) {
4451
4452 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4453 vm_pageout_throttle_up(dwp->dw_m);
4454
4455 if (dwp->dw_mask & DW_vm_page_wire)
4456 vm_page_wire(dwp->dw_m);
4457 else if (dwp->dw_mask & DW_vm_page_unwire) {
4458 boolean_t queueit;
4459
4460 queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
4461
4462 vm_page_unwire(dwp->dw_m, queueit);
4463 }
4464 if (dwp->dw_mask & DW_vm_page_free) {
4465 if (held_as_spin == TRUE) {
4466 vm_page_lockconvert_queues();
4467 held_as_spin = FALSE;
4468 }
4469 vm_page_free(dwp->dw_m);
4470 } else {
4471 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4472 vm_page_deactivate_internal(dwp->dw_m, FALSE);
4473 else if (dwp->dw_mask & DW_vm_page_activate)
4474 vm_page_activate(dwp->dw_m);
4475 else if (dwp->dw_mask & DW_vm_page_speculate)
4476 vm_page_speculate(dwp->dw_m, TRUE);
4477 else if (dwp->dw_mask & DW_vm_page_lru)
4478 vm_page_lru(dwp->dw_m);
4479
4480 if (dwp->dw_mask & DW_set_reference)
4481 dwp->dw_m->reference = TRUE;
4482 else if (dwp->dw_mask & DW_clear_reference)
4483 dwp->dw_m->reference = FALSE;
4484
4485 if (dwp->dw_mask & DW_clear_busy)
4486 dwp->dw_m->busy = FALSE;
4487
4488 if (dwp->dw_mask & DW_PAGE_WAKEUP)
4489 PAGE_WAKEUP(dwp->dw_m);
4490 }
4491 }
4492 vm_page_unlock_queues();
4493 }
4494
4495
4496
4497 kern_return_t
4498 upl_commit_range(
4499 upl_t upl,
4500 upl_offset_t offset,
4501 upl_size_t size,
4502 int flags,
4503 upl_page_info_t *page_list,
4504 mach_msg_type_number_t count,
4505 boolean_t *empty)
4506 {
4507 upl_size_t xfer_size, subupl_size = size;
4508 vm_object_t shadow_object;
4509 vm_object_t object;
4510 vm_object_offset_t target_offset;
4511 upl_offset_t subupl_offset = offset;
4512 int entry;
4513 wpl_array_t lite_list;
4514 int occupied;
4515 int clear_refmod = 0;
4516 int pgpgout_count = 0;
4517 struct dw dw_array[DELAYED_WORK_LIMIT];
4518 struct dw *dwp;
4519 int dw_count, isVectorUPL = 0;
4520 upl_t vector_upl = NULL;
4521
4522 *empty = FALSE;
4523
4524 if (upl == UPL_NULL)
4525 return KERN_INVALID_ARGUMENT;
4526
4527 if (count == 0)
4528 page_list = NULL;
4529
4530 if((isVectorUPL = vector_upl_is_valid(upl))) {
4531 vector_upl = upl;
4532 upl_lock(vector_upl);
4533 }
4534 else
4535 upl_lock(upl);
4536
4537 process_upl_to_commit:
4538
4539 if(isVectorUPL) {
4540 size = subupl_size;
4541 offset = subupl_offset;
4542 if(size == 0) {
4543 upl_unlock(vector_upl);
4544 return KERN_SUCCESS;
4545 }
4546 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
4547 if(upl == NULL) {
4548 upl_unlock(vector_upl);
4549 return KERN_FAILURE;
4550 }
4551 page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl);
4552 subupl_size -= size;
4553 subupl_offset += size;
4554 }
4555
4556 #if UPL_DEBUG
4557 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
4558 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
4559
4560 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
4561 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
4562
4563 upl->upl_commit_index++;
4564 }
4565 #endif
4566 if (upl->flags & UPL_DEVICE_MEMORY)
4567 xfer_size = 0;
4568 else if ((offset + size) <= upl->size)
4569 xfer_size = size;
4570 else {
4571 if(!isVectorUPL)
4572 upl_unlock(upl);
4573 else {
4574 upl_unlock(vector_upl);
4575 }
4576 return KERN_FAILURE;
4577 }
4578 if (upl->flags & UPL_CLEAR_DIRTY)
4579 flags |= UPL_COMMIT_CLEAR_DIRTY;
4580
4581 if (upl->flags & UPL_INTERNAL)
4582 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
4583 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4584 else
4585 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
4586
4587 object = upl->map_object;
4588
4589 if (upl->flags & UPL_SHADOWED) {
4590 vm_object_lock(object);
4591 shadow_object = object->shadow;
4592 } else {
4593 shadow_object = object;
4594 }
4595 entry = offset/PAGE_SIZE;
4596 target_offset = (vm_object_offset_t)offset;
4597
4598 if (upl->flags & UPL_KERNEL_OBJECT)
4599 vm_object_lock_shared(shadow_object);
4600 else
4601 vm_object_lock(shadow_object);
4602
4603 if (upl->flags & UPL_ACCESS_BLOCKED) {
4604 assert(shadow_object->blocked_access);
4605 shadow_object->blocked_access = FALSE;
4606 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
4607 }
4608
4609 if (shadow_object->code_signed) {
4610 /*
4611 * CODE SIGNING:
4612 * If the object is code-signed, do not let this UPL tell
4613 * us if the pages are valid or not. Let the pages be
4614 * validated by VM the normal way (when they get mapped or
4615 * copied).
4616 */
4617 flags &= ~UPL_COMMIT_CS_VALIDATED;
4618 }
4619 if (! page_list) {
4620 /*
4621 * No page list to get the code-signing info from !?
4622 */
4623 flags &= ~UPL_COMMIT_CS_VALIDATED;
4624 }
4625
4626 dwp = &dw_array[0];
4627 dw_count = 0;
4628
4629 while (xfer_size) {
4630 vm_page_t t, m;
4631
4632 dwp->dw_mask = 0;
4633 clear_refmod = 0;
4634
4635 m = VM_PAGE_NULL;
4636
4637 if (upl->flags & UPL_LITE) {
4638 unsigned int pg_num;
4639
4640 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
4641 assert(pg_num == target_offset/PAGE_SIZE);
4642
4643 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4644 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
4645
4646 if (!(upl->flags & UPL_KERNEL_OBJECT))
4647 m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
4648 }
4649 }
4650 if (upl->flags & UPL_SHADOWED) {
4651 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
4652
4653 t->pageout = FALSE;
4654
4655 VM_PAGE_FREE(t);
4656
4657 if (m == VM_PAGE_NULL)
4658 m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset);
4659 }
4660 }
4661 if ((upl->flags & UPL_KERNEL_OBJECT) || m == VM_PAGE_NULL)
4662 goto commit_next_page;
4663
4664 if (flags & UPL_COMMIT_CS_VALIDATED) {
4665 /*
4666 * CODE SIGNING:
4667 * Set the code signing bits according to
4668 * what the UPL says they should be.
4669 */
4670 m->cs_validated = page_list[entry].cs_validated;
4671 m->cs_tainted = page_list[entry].cs_tainted;
4672 }
4673 if (upl->flags & UPL_IO_WIRE) {
4674
4675 if (page_list)
4676 page_list[entry].phys_addr = 0;
4677
4678 if (flags & UPL_COMMIT_SET_DIRTY)
4679 m->dirty = TRUE;
4680 else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
4681 m->dirty = FALSE;
4682
4683 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
4684 m->cs_validated && !m->cs_tainted) {
4685 /*
4686 * CODE SIGNING:
4687 * This page is no longer dirty
4688 * but could have been modified,
4689 * so it will need to be
4690 * re-validated.
4691 */
4692 m->cs_validated = FALSE;
4693 #if DEVELOPMENT || DEBUG
4694 vm_cs_validated_resets++;
4695 #endif
4696 pmap_disconnect(m->phys_page);
4697 }
4698 clear_refmod |= VM_MEM_MODIFIED;
4699 }
4700 if (flags & UPL_COMMIT_INACTIVATE) {
4701 dwp->dw_mask |= DW_vm_page_deactivate_internal;
4702 clear_refmod |= VM_MEM_REFERENCED;
4703 }
4704 if (upl->flags & UPL_ACCESS_BLOCKED) {
4705 /*
4706 * We blocked access to the pages in this UPL.
4707 * Clear the "busy" bit and wake up any waiter
4708 * for this page.
4709 */
4710 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
4711 }
4712 if (m->absent) {
4713 if (flags & UPL_COMMIT_FREE_ABSENT)
4714 dwp->dw_mask |= DW_vm_page_free;
4715 else {
4716 m->absent = FALSE;
4717 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
4718 }
4719 } else
4720 dwp->dw_mask |= DW_vm_page_unwire;
4721
4722 goto commit_next_page;
4723 }
4724 /*
4725 * make sure to clear the hardware
4726 * modify or reference bits before
4727 * releasing the BUSY bit on this page
4728 * otherwise we risk losing a legitimate
4729 * change of state
4730 */
4731 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
4732 m->dirty = FALSE;
4733
4734 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
4735 m->cs_validated && !m->cs_tainted) {
4736 /*
4737 * CODE SIGNING:
4738 * This page is no longer dirty
4739 * but could have been modified,
4740 * so it will need to be
4741 * re-validated.
4742 */
4743 m->cs_validated = FALSE;
4744 #if DEVELOPMENT || DEBUG
4745 vm_cs_validated_resets++;
4746 #endif
4747 pmap_disconnect(m->phys_page);
4748 }
4749 clear_refmod |= VM_MEM_MODIFIED;
4750 }
4751 if (page_list) {
4752 upl_page_info_t *p;
4753
4754 p = &(page_list[entry]);
4755
4756 if (p->phys_addr && p->pageout && !m->pageout) {
4757 m->busy = TRUE;
4758 m->pageout = TRUE;
4759
4760 dwp->dw_mask |= DW_vm_page_wire;
4761
4762 } else if (p->phys_addr &&
4763 !p->pageout && m->pageout &&
4764 !m->dump_cleaning) {
4765 m->pageout = FALSE;
4766 m->absent = FALSE;
4767 m->overwriting = FALSE;
4768
4769 dwp->dw_mask |= (DW_vm_page_unwire | DW_clear_busy | DW_PAGE_WAKEUP);
4770 }
4771 page_list[entry].phys_addr = 0;
4772 }
4773 m->dump_cleaning = FALSE;
4774
4775 if (m->laundry)
4776 dwp->dw_mask |= DW_vm_pageout_throttle_up;
4777
4778 if (m->pageout) {
4779 m->cleaning = FALSE;
4780 m->encrypted_cleaning = FALSE;
4781 m->pageout = FALSE;
4782 #if MACH_CLUSTER_STATS
4783 if (m->wanted) vm_pageout_target_collisions++;
4784 #endif
4785 m->dirty = FALSE;
4786
4787 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
4788 m->cs_validated && !m->cs_tainted) {
4789 /*
4790 * CODE SIGNING:
4791 * This page is no longer dirty
4792 * but could have been modified,
4793 * so it will need to be
4794 * re-validated.
4795 */
4796 m->cs_validated = FALSE;
4797 #if DEVELOPMENT || DEBUG
4798 vm_cs_validated_resets++;
4799 #endif
4800 pmap_disconnect(m->phys_page);
4801 }
4802
4803 if ((flags & UPL_COMMIT_SET_DIRTY) ||
4804 (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)))
4805 m->dirty = TRUE;
4806
4807 if (m->dirty) {
4808 /*
4809 * page was re-dirtied after we started
4810 * the pageout... reactivate it since
4811 * we don't know whether the on-disk
4812 * copy matches what is now in memory
4813 */
4814 dwp->dw_mask |= (DW_vm_page_unwire | DW_clear_busy | DW_PAGE_WAKEUP);
4815
4816 if (upl->flags & UPL_PAGEOUT) {
4817 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
4818 VM_STAT_INCR(reactivations);
4819 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
4820 }
4821 } else {
4822 /*
4823 * page has been successfully cleaned
4824 * go ahead and free it for other use
4825 */
4826
4827 if (m->object->internal) {
4828 DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
4829 } else {
4830 DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
4831 }
4832 dwp->dw_mask |= DW_vm_page_free;
4833
4834 if (upl->flags & UPL_PAGEOUT) {
4835 CLUSTER_STAT(vm_pageout_target_page_freed++;)
4836
4837 if (page_list[entry].dirty) {
4838 VM_STAT_INCR(pageouts);
4839 DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
4840 pgpgout_count++;
4841 }
4842 }
4843 }
4844 goto commit_next_page;
4845 }
4846 #if MACH_CLUSTER_STATS
4847 if (m->wpmapped)
4848 m->dirty = pmap_is_modified(m->phys_page);
4849
4850 if (m->dirty) vm_pageout_cluster_dirtied++;
4851 else vm_pageout_cluster_cleaned++;
4852 if (m->wanted) vm_pageout_cluster_collisions++;
4853 #endif
4854 m->dirty = FALSE;
4855
4856 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
4857 m->cs_validated && !m->cs_tainted) {
4858 /*
4859 * CODE SIGNING:
4860 * This page is no longer dirty
4861 * but could have been modified,
4862 * so it will need to be
4863 * re-validated.
4864 */
4865 m->cs_validated = FALSE;
4866 #if DEVELOPMENT || DEBUG
4867 vm_cs_validated_resets++;
4868 #endif
4869 pmap_disconnect(m->phys_page);
4870 }
4871
4872 if ((m->busy) && (m->cleaning)) {
4873 /*
4874 * the request_page_list case
4875 */
4876 m->absent = FALSE;
4877 m->overwriting = FALSE;
4878
4879 dwp->dw_mask |= DW_clear_busy;
4880
4881 } else if (m->overwriting) {
4882 /*
4883 * alternate request page list, write to
4884 * page_list case. Occurs when the original
4885 * page was wired at the time of the list
4886 * request
4887 */
4888 assert(VM_PAGE_WIRED(m));
4889 m->overwriting = FALSE;
4890
4891 dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */
4892 }
4893 m->cleaning = FALSE;
4894 m->encrypted_cleaning = FALSE;
4895
4896 /*
4897 * It is a part of the semantic of COPYOUT_FROM
4898 * UPLs that a commit implies cache sync
4899 * between the vm page and the backing store
4900 * this can be used to strip the precious bit
4901 * as well as clean
4902 */
4903 if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS))
4904 m->precious = FALSE;
4905
4906 if (flags & UPL_COMMIT_SET_DIRTY)
4907 m->dirty = TRUE;
4908
4909 if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) {
4910 dwp->dw_mask |= DW_vm_page_deactivate_internal;
4911 clear_refmod |= VM_MEM_REFERENCED;
4912
4913 } else if (!m->active && !m->inactive && !m->speculative) {
4914
4915 if (m->clustered || (flags & UPL_COMMIT_SPECULATE))
4916 dwp->dw_mask |= DW_vm_page_speculate;
4917 else if (m->reference)
4918 dwp->dw_mask |= DW_vm_page_activate;
4919 else {
4920 dwp->dw_mask |= DW_vm_page_deactivate_internal;
4921 clear_refmod |= VM_MEM_REFERENCED;
4922 }
4923 }
4924 if (upl->flags & UPL_ACCESS_BLOCKED) {
4925 /*
4926 * We blocked access to the pages in this URL.
4927 * Clear the "busy" bit on this page before we
4928 * wake up any waiter.
4929 */
4930 dwp->dw_mask |= DW_clear_busy;
4931 }
4932 /*
4933 * Wakeup any thread waiting for the page to be un-cleaning.
4934 */
4935 dwp->dw_mask |= DW_PAGE_WAKEUP;
4936
4937 commit_next_page:
4938 if (clear_refmod)
4939 pmap_clear_refmod(m->phys_page, clear_refmod);
4940
4941 target_offset += PAGE_SIZE_64;
4942 xfer_size -= PAGE_SIZE;
4943 entry++;
4944
4945 if (dwp->dw_mask) {
4946 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
4947 if (m->busy == FALSE) {
4948 /*
4949 * dw_do_work may need to drop the object lock
4950 * if it does, we need the pages it's looking at to
4951 * be held stable via the busy bit.
4952 */
4953 m->busy = TRUE;
4954 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
4955 }
4956 dwp->dw_m = m;
4957 dwp++;
4958 dw_count++;
4959
4960 if (dw_count >= DELAYED_WORK_LIMIT) {
4961 dw_do_work(shadow_object, &dw_array[0], dw_count);
4962
4963 dwp = &dw_array[0];
4964 dw_count = 0;
4965 }
4966 } else {
4967 if (dwp->dw_mask & DW_clear_busy)
4968 m->busy = FALSE;
4969
4970 if (dwp->dw_mask & DW_PAGE_WAKEUP)
4971 PAGE_WAKEUP(m);
4972 }
4973 }
4974 }
4975 if (dw_count)
4976 dw_do_work(shadow_object, &dw_array[0], dw_count);
4977
4978 occupied = 1;
4979
4980 if (upl->flags & UPL_DEVICE_MEMORY) {
4981 occupied = 0;
4982 } else if (upl->flags & UPL_LITE) {
4983 int pg_num;
4984 int i;
4985
4986 pg_num = upl->size/PAGE_SIZE;
4987 pg_num = (pg_num + 31) >> 5;
4988 occupied = 0;
4989
4990 for (i = 0; i < pg_num; i++) {
4991 if (lite_list[i] != 0) {
4992 occupied = 1;
4993 break;
4994 }
4995 }
4996 } else {
4997 if (queue_empty(&upl->map_object->memq))
4998 occupied = 0;
4999 }
5000 if (occupied == 0) {
5001 /*
5002 * If this UPL element belongs to a Vector UPL and is
5003 * empty, then this is the right function to deallocate
5004 * it. So go ahead set the *empty variable. The flag
5005 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
5006 * should be considered relevant for the Vector UPL and not
5007 * the internal UPLs.
5008 */
5009 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
5010 *empty = TRUE;
5011
5012 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
5013 /*
5014 * this is not a paging object
5015 * so we need to drop the paging reference
5016 * that was taken when we created the UPL
5017 * against this object
5018 */
5019 vm_object_activity_end(shadow_object);
5020 } else {
5021 /*
5022 * we dontated the paging reference to
5023 * the map object... vm_pageout_object_terminate
5024 * will drop this reference
5025 */
5026 }
5027 }
5028 vm_object_unlock(shadow_object);
5029 if (object != shadow_object)
5030 vm_object_unlock(object);
5031
5032 if(!isVectorUPL)
5033 upl_unlock(upl);
5034 else {
5035 /*
5036 * If we completed our operations on an UPL that is
5037 * part of a Vectored UPL and if empty is TRUE, then
5038 * we should go ahead and deallocate this UPL element.
5039 * Then we check if this was the last of the UPL elements
5040 * within that Vectored UPL. If so, set empty to TRUE
5041 * so that in ubc_upl_commit_range or ubc_upl_commit, we
5042 * can go ahead and deallocate the Vector UPL too.
5043 */
5044 if(*empty==TRUE) {
5045 *empty = vector_upl_set_subupl(vector_upl, upl, 0);
5046 upl_deallocate(upl);
5047 }
5048 goto process_upl_to_commit;
5049 }
5050
5051 if (pgpgout_count) {
5052 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
5053 }
5054
5055 return KERN_SUCCESS;
5056 }
5057
5058 kern_return_t
5059 upl_abort_range(
5060 upl_t upl,
5061 upl_offset_t offset,
5062 upl_size_t size,
5063 int error,
5064 boolean_t *empty)
5065 {
5066 upl_size_t xfer_size, subupl_size = size;
5067 vm_object_t shadow_object;
5068 vm_object_t object;
5069 vm_object_offset_t target_offset;
5070 upl_offset_t subupl_offset = offset;
5071 int entry;
5072 wpl_array_t lite_list;
5073 int occupied;
5074 struct dw dw_array[DELAYED_WORK_LIMIT];
5075 struct dw *dwp;
5076 int dw_count, isVectorUPL = 0;
5077 upl_t vector_upl = NULL;
5078
5079 *empty = FALSE;
5080
5081 if (upl == UPL_NULL)
5082 return KERN_INVALID_ARGUMENT;
5083
5084 if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
5085 return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty);
5086
5087 if((isVectorUPL = vector_upl_is_valid(upl))) {
5088 vector_upl = upl;
5089 upl_lock(vector_upl);
5090 }
5091 else
5092 upl_lock(upl);
5093
5094 process_upl_to_abort:
5095 if(isVectorUPL) {
5096 size = subupl_size;
5097 offset = subupl_offset;
5098 if(size == 0) {
5099 upl_unlock(vector_upl);
5100 return KERN_SUCCESS;
5101 }
5102 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
5103 if(upl == NULL) {
5104 upl_unlock(vector_upl);
5105 return KERN_FAILURE;
5106 }
5107 subupl_size -= size;
5108 subupl_offset += size;
5109 }
5110
5111 *empty = FALSE;
5112
5113 #if UPL_DEBUG
5114 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
5115 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
5116
5117 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
5118 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
5119 upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1;
5120
5121 upl->upl_commit_index++;
5122 }
5123 #endif
5124 if (upl->flags & UPL_DEVICE_MEMORY)
5125 xfer_size = 0;
5126 else if ((offset + size) <= upl->size)
5127 xfer_size = size;
5128 else {
5129 if(!isVectorUPL)
5130 upl_unlock(upl);
5131 else {
5132 upl_unlock(vector_upl);
5133 }
5134
5135 return KERN_FAILURE;
5136 }
5137 if (upl->flags & UPL_INTERNAL) {
5138 lite_list = (wpl_array_t)
5139 ((((uintptr_t)upl) + sizeof(struct upl))
5140 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
5141 } else {
5142 lite_list = (wpl_array_t)
5143 (((uintptr_t)upl) + sizeof(struct upl));
5144 }
5145 object = upl->map_object;
5146
5147 if (upl->flags & UPL_SHADOWED) {
5148 vm_object_lock(object);
5149 shadow_object = object->shadow;
5150 } else
5151 shadow_object = object;
5152
5153 entry = offset/PAGE_SIZE;
5154 target_offset = (vm_object_offset_t)offset;
5155
5156 if (upl->flags & UPL_KERNEL_OBJECT)
5157 vm_object_lock_shared(shadow_object);
5158 else
5159 vm_object_lock(shadow_object);
5160
5161 if (upl->flags & UPL_ACCESS_BLOCKED) {
5162 assert(shadow_object->blocked_access);
5163 shadow_object->blocked_access = FALSE;
5164 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
5165 }
5166
5167 dwp = &dw_array[0];
5168 dw_count = 0;
5169
5170 if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT))
5171 panic("upl_abort_range: kernel_object being DUMPED");
5172
5173 while (xfer_size) {
5174 vm_page_t t, m;
5175
5176 dwp->dw_mask = 0;
5177
5178 m = VM_PAGE_NULL;
5179
5180 if (upl->flags & UPL_LITE) {
5181 unsigned int pg_num;
5182
5183 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
5184 assert(pg_num == target_offset/PAGE_SIZE);
5185
5186
5187 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
5188 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
5189
5190 if ( !(upl->flags & UPL_KERNEL_OBJECT))
5191 m = vm_page_lookup(shadow_object, target_offset +
5192 (upl->offset - shadow_object->paging_offset));
5193 }
5194 }
5195 if (upl->flags & UPL_SHADOWED) {
5196 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
5197 t->pageout = FALSE;
5198
5199 VM_PAGE_FREE(t);
5200
5201 if (m == VM_PAGE_NULL)
5202 m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset);
5203 }
5204 }
5205 if ((upl->flags & UPL_KERNEL_OBJECT))
5206 goto abort_next_page;
5207
5208 if (m != VM_PAGE_NULL) {
5209
5210 if (m->absent) {
5211 boolean_t must_free = TRUE;
5212
5213 m->clustered = FALSE;
5214 /*
5215 * COPYOUT = FALSE case
5216 * check for error conditions which must
5217 * be passed back to the pages customer
5218 */
5219 if (error & UPL_ABORT_RESTART) {
5220 m->restart = TRUE;
5221 m->absent = FALSE;
5222 m->unusual = TRUE;
5223 must_free = FALSE;
5224 } else if (error & UPL_ABORT_UNAVAILABLE) {
5225 m->restart = FALSE;
5226 m->unusual = TRUE;
5227 must_free = FALSE;
5228 } else if (error & UPL_ABORT_ERROR) {
5229 m->restart = FALSE;
5230 m->absent = FALSE;
5231 m->error = TRUE;
5232 m->unusual = TRUE;
5233 must_free = FALSE;
5234 }
5235
5236 /*
5237 * ENCRYPTED SWAP:
5238 * If the page was already encrypted,
5239 * we don't really need to decrypt it
5240 * now. It will get decrypted later,
5241 * on demand, as soon as someone needs
5242 * to access its contents.
5243 */
5244
5245 m->cleaning = FALSE;
5246 m->encrypted_cleaning = FALSE;
5247 m->overwriting = FALSE;
5248
5249 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
5250
5251 if (must_free == TRUE)
5252 dwp->dw_mask |= DW_vm_page_free;
5253 else
5254 dwp->dw_mask |= DW_vm_page_activate;
5255 } else {
5256 /*
5257 * Handle the trusted pager throttle.
5258 */
5259 if (m->laundry)
5260 dwp->dw_mask |= DW_vm_pageout_throttle_up;
5261
5262 if (m->pageout) {
5263 assert(m->busy);
5264 assert(m->wire_count == 1);
5265 m->pageout = FALSE;
5266
5267 dwp->dw_mask |= DW_vm_page_unwire;
5268 }
5269 m->dump_cleaning = FALSE;
5270 m->cleaning = FALSE;
5271 m->encrypted_cleaning = FALSE;
5272 m->overwriting = FALSE;
5273 #if MACH_PAGEMAP
5274 vm_external_state_clr(m->object->existence_map, m->offset);
5275 #endif /* MACH_PAGEMAP */
5276 if (error & UPL_ABORT_DUMP_PAGES) {
5277 pmap_disconnect(m->phys_page);
5278
5279 dwp->dw_mask |= DW_vm_page_free;
5280 } else {
5281 if (error & UPL_ABORT_REFERENCE) {
5282 /*
5283 * we've been told to explictly
5284 * reference this page... for
5285 * file I/O, this is done by
5286 * implementing an LRU on the inactive q
5287 */
5288 dwp->dw_mask |= DW_vm_page_lru;
5289 }
5290 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
5291 }
5292 }
5293 }
5294 abort_next_page:
5295 target_offset += PAGE_SIZE_64;
5296 xfer_size -= PAGE_SIZE;
5297 entry++;
5298
5299 if (dwp->dw_mask) {
5300 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
5301 if (m->busy == FALSE) {
5302 /*
5303 * dw_do_work may need to drop the object lock
5304 * if it does, we need the pages it's looking at to
5305 * be held stable via the busy bit.
5306 */
5307 m->busy = TRUE;
5308 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
5309 }
5310 dwp->dw_m = m;
5311 dwp++;
5312 dw_count++;
5313
5314 if (dw_count >= DELAYED_WORK_LIMIT) {
5315 dw_do_work(shadow_object, &dw_array[0], dw_count);
5316
5317 dwp = &dw_array[0];
5318 dw_count = 0;
5319 }
5320 } else {
5321 if (dwp->dw_mask & DW_clear_busy)
5322 m->busy = FALSE;
5323
5324 if (dwp->dw_mask & DW_PAGE_WAKEUP)
5325 PAGE_WAKEUP(m);
5326 }
5327 }
5328 }
5329 if (dw_count)
5330 dw_do_work(shadow_object, &dw_array[0], dw_count);
5331
5332 occupied = 1;
5333
5334 if (upl->flags & UPL_DEVICE_MEMORY) {
5335 occupied = 0;
5336 } else if (upl->flags & UPL_LITE) {
5337 int pg_num;
5338 int i;
5339
5340 pg_num = upl->size/PAGE_SIZE;
5341 pg_num = (pg_num + 31) >> 5;
5342 occupied = 0;
5343
5344 for (i = 0; i < pg_num; i++) {
5345 if (lite_list[i] != 0) {
5346 occupied = 1;
5347 break;
5348 }
5349 }
5350 } else {
5351 if (queue_empty(&upl->map_object->memq))
5352 occupied = 0;
5353 }
5354 if (occupied == 0) {
5355 /*
5356 * If this UPL element belongs to a Vector UPL and is
5357 * empty, then this is the right function to deallocate
5358 * it. So go ahead set the *empty variable. The flag
5359 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
5360 * should be considered relevant for the Vector UPL and
5361 * not the internal UPLs.
5362 */
5363 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
5364 *empty = TRUE;
5365
5366 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
5367 /*
5368 * this is not a paging object
5369 * so we need to drop the paging reference
5370 * that was taken when we created the UPL
5371 * against this object
5372 */
5373 vm_object_activity_end(shadow_object);
5374 } else {
5375 /*
5376 * we dontated the paging reference to
5377 * the map object... vm_pageout_object_terminate
5378 * will drop this reference
5379 */
5380 }
5381 }
5382 vm_object_unlock(shadow_object);
5383 if (object != shadow_object)
5384 vm_object_unlock(object);
5385
5386 if(!isVectorUPL)
5387 upl_unlock(upl);
5388 else {
5389 /*
5390 * If we completed our operations on an UPL that is
5391 * part of a Vectored UPL and if empty is TRUE, then
5392 * we should go ahead and deallocate this UPL element.
5393 * Then we check if this was the last of the UPL elements
5394 * within that Vectored UPL. If so, set empty to TRUE
5395 * so that in ubc_upl_abort_range or ubc_upl_abort, we
5396 * can go ahead and deallocate the Vector UPL too.
5397 */
5398 if(*empty == TRUE) {
5399 *empty = vector_upl_set_subupl(vector_upl, upl,0);
5400 upl_deallocate(upl);
5401 }
5402 goto process_upl_to_abort;
5403 }
5404
5405 return KERN_SUCCESS;
5406 }
5407
5408
5409 kern_return_t
5410 upl_abort(
5411 upl_t upl,
5412 int error)
5413 {
5414 boolean_t empty;
5415
5416 return upl_abort_range(upl, 0, upl->size, error, &empty);
5417 }
5418
5419
5420 /* an option on commit should be wire */
5421 kern_return_t
5422 upl_commit(
5423 upl_t upl,
5424 upl_page_info_t *page_list,
5425 mach_msg_type_number_t count)
5426 {
5427 boolean_t empty;
5428
5429 return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
5430 }
5431
5432
5433 unsigned int vm_object_iopl_request_sleep_for_cleaning = 0;
5434
5435 kern_return_t
5436 vm_object_iopl_request(
5437 vm_object_t object,
5438 vm_object_offset_t offset,
5439 upl_size_t size,
5440 upl_t *upl_ptr,
5441 upl_page_info_array_t user_page_list,
5442 unsigned int *page_list_count,
5443 int cntrl_flags)
5444 {
5445 vm_page_t dst_page;
5446 vm_object_offset_t dst_offset;
5447 upl_size_t xfer_size;
5448 upl_t upl = NULL;
5449 unsigned int entry;
5450 wpl_array_t lite_list = NULL;
5451 int no_zero_fill = FALSE;
5452 u_int32_t psize;
5453 kern_return_t ret;
5454 vm_prot_t prot;
5455 struct vm_object_fault_info fault_info;
5456 struct dw dw_array[DELAYED_WORK_LIMIT];
5457 struct dw *dwp;
5458 int dw_count;
5459 int dw_index;
5460
5461 if (cntrl_flags & ~UPL_VALID_FLAGS) {
5462 /*
5463 * For forward compatibility's sake,
5464 * reject any unknown flag.
5465 */
5466 return KERN_INVALID_VALUE;
5467 }
5468 if (vm_lopage_needed == FALSE)
5469 cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
5470
5471 if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
5472 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE))
5473 return KERN_INVALID_VALUE;
5474
5475 if (object->phys_contiguous) {
5476 if ((offset + object->shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
5477 return KERN_INVALID_ADDRESS;
5478
5479 if (((offset + object->shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
5480 return KERN_INVALID_ADDRESS;
5481 }
5482 }
5483
5484 if (cntrl_flags & UPL_ENCRYPT) {
5485 /*
5486 * ENCRYPTED SWAP:
5487 * The paging path doesn't use this interface,
5488 * so we don't support the UPL_ENCRYPT flag
5489 * here. We won't encrypt the pages.
5490 */
5491 assert(! (cntrl_flags & UPL_ENCRYPT));
5492 }
5493 if (cntrl_flags & UPL_NOZEROFILL)
5494 no_zero_fill = TRUE;
5495
5496 if (cntrl_flags & UPL_COPYOUT_FROM)
5497 prot = VM_PROT_READ;
5498 else
5499 prot = VM_PROT_READ | VM_PROT_WRITE;
5500
5501 if (((size/PAGE_SIZE) > MAX_UPL_SIZE) && !object->phys_contiguous)
5502 size = MAX_UPL_SIZE * PAGE_SIZE;
5503
5504 if (cntrl_flags & UPL_SET_INTERNAL) {
5505 if (page_list_count != NULL)
5506 *page_list_count = MAX_UPL_SIZE;
5507 }
5508 if (((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
5509 ((page_list_count != NULL) && (*page_list_count != 0) && *page_list_count < (size/page_size)))
5510 return KERN_INVALID_ARGUMENT;
5511
5512 if ((!object->internal) && (object->paging_offset != 0))
5513 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
5514
5515
5516 if (object->phys_contiguous)
5517 psize = PAGE_SIZE;
5518 else
5519 psize = size;
5520
5521 if (cntrl_flags & UPL_SET_INTERNAL) {
5522 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, UPL_IO_WIRE, psize);
5523
5524 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
5525 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
5526 ((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
5527 if (size == 0) {
5528 user_page_list = NULL;
5529 lite_list = NULL;
5530 }
5531 } else {
5532 upl = upl_create(UPL_CREATE_LITE, UPL_IO_WIRE, psize);
5533
5534 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
5535 if (size == 0) {
5536 lite_list = NULL;
5537 }
5538 }
5539 if (user_page_list)
5540 user_page_list[0].device = FALSE;
5541 *upl_ptr = upl;
5542
5543 upl->map_object = object;
5544 upl->size = size;
5545
5546 if (object == kernel_object &&
5547 !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) {
5548 upl->flags |= UPL_KERNEL_OBJECT;
5549 #if UPL_DEBUG
5550 vm_object_lock(object);
5551 #else
5552 vm_object_lock_shared(object);
5553 #endif
5554 } else {
5555 vm_object_lock(object);
5556 vm_object_activity_begin(object);
5557 }
5558 /*
5559 * paging in progress also protects the paging_offset
5560 */
5561 upl->offset = offset + object->paging_offset;
5562
5563 if (cntrl_flags & UPL_BLOCK_ACCESS) {
5564 /*
5565 * The user requested that access to the pages in this URL
5566 * be blocked until the UPL is commited or aborted.
5567 */
5568 upl->flags |= UPL_ACCESS_BLOCKED;
5569 }
5570
5571 if (object->phys_contiguous) {
5572 #if UPL_DEBUG
5573 queue_enter(&object->uplq, upl, upl_t, uplq);
5574 #endif /* UPL_DEBUG */
5575
5576 if (upl->flags & UPL_ACCESS_BLOCKED) {
5577 assert(!object->blocked_access);
5578 object->blocked_access = TRUE;
5579 }
5580
5581 vm_object_unlock(object);
5582
5583 /*
5584 * don't need any shadow mappings for this one
5585 * since it is already I/O memory
5586 */
5587 upl->flags |= UPL_DEVICE_MEMORY;
5588
5589 upl->highest_page = (ppnum_t) ((offset + object->shadow_offset + size - 1)>>PAGE_SHIFT);
5590
5591 if (user_page_list) {
5592 user_page_list[0].phys_addr = (ppnum_t) ((offset + object->shadow_offset)>>PAGE_SHIFT);
5593 user_page_list[0].device = TRUE;
5594 }
5595 if (page_list_count != NULL) {
5596 if (upl->flags & UPL_INTERNAL)
5597 *page_list_count = 0;
5598 else
5599 *page_list_count = 1;
5600 }
5601 return KERN_SUCCESS;
5602 }
5603 if (object != kernel_object) {
5604 /*
5605 * Protect user space from future COW operations
5606 */
5607 object->true_share = TRUE;
5608
5609 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
5610 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
5611 }
5612
5613 #if UPL_DEBUG
5614 queue_enter(&object->uplq, upl, upl_t, uplq);
5615 #endif /* UPL_DEBUG */
5616
5617 if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
5618 object->copy != VM_OBJECT_NULL) {
5619 /*
5620 * Honor copy-on-write obligations
5621 *
5622 * The caller is gathering these pages and
5623 * might modify their contents. We need to
5624 * make sure that the copy object has its own
5625 * private copies of these pages before we let
5626 * the caller modify them.
5627 *
5628 * NOTE: someone else could map the original object
5629 * after we've done this copy-on-write here, and they
5630 * could then see an inconsistent picture of the memory
5631 * while it's being modified via the UPL. To prevent this,
5632 * we would have to block access to these pages until the
5633 * UPL is released. We could use the UPL_BLOCK_ACCESS
5634 * code path for that...
5635 */
5636 vm_object_update(object,
5637 offset,
5638 size,
5639 NULL,
5640 NULL,
5641 FALSE, /* should_return */
5642 MEMORY_OBJECT_COPY_SYNC,
5643 VM_PROT_NO_CHANGE);
5644 #if DEVELOPMENT || DEBUG
5645 iopl_cow++;
5646 iopl_cow_pages += size >> PAGE_SHIFT;
5647 #endif
5648 }
5649
5650
5651 entry = 0;
5652
5653 xfer_size = size;
5654 dst_offset = offset;
5655
5656 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
5657 fault_info.user_tag = 0;
5658 fault_info.lo_offset = offset;
5659 fault_info.hi_offset = offset + xfer_size;
5660 fault_info.no_cache = FALSE;
5661 fault_info.stealth = FALSE;
5662 fault_info.mark_zf_absent = TRUE;
5663
5664 dwp = &dw_array[0];
5665 dw_count = 0;
5666
5667 while (xfer_size) {
5668 vm_fault_return_t result;
5669 unsigned int pg_num;
5670
5671 dwp->dw_mask = 0;
5672
5673 dst_page = vm_page_lookup(object, dst_offset);
5674
5675 /*
5676 * ENCRYPTED SWAP:
5677 * If the page is encrypted, we need to decrypt it,
5678 * so force a soft page fault.
5679 */
5680 if (dst_page == VM_PAGE_NULL ||
5681 dst_page->busy ||
5682 dst_page->encrypted ||
5683 dst_page->error ||
5684 dst_page->restart ||
5685 dst_page->absent ||
5686 dst_page->fictitious) {
5687
5688 if (object == kernel_object)
5689 panic("vm_object_iopl_request: missing/bad page in kernel object\n");
5690
5691 do {
5692 vm_page_t top_page;
5693 kern_return_t error_code;
5694 int interruptible;
5695
5696 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
5697 interruptible = THREAD_ABORTSAFE;
5698 else
5699 interruptible = THREAD_UNINT;
5700
5701 fault_info.interruptible = interruptible;
5702 fault_info.cluster_size = xfer_size;
5703
5704 vm_object_paging_begin(object);
5705
5706 result = vm_fault_page(object, dst_offset,
5707 prot | VM_PROT_WRITE, FALSE,
5708 &prot, &dst_page, &top_page,
5709 (int *)0,
5710 &error_code, no_zero_fill,
5711 FALSE, &fault_info);
5712
5713 switch (result) {
5714
5715 case VM_FAULT_SUCCESS:
5716
5717 if ( !dst_page->absent) {
5718 PAGE_WAKEUP_DONE(dst_page);
5719 } else {
5720 /*
5721 * we only get back an absent page if we
5722 * requested that it not be zero-filled
5723 * because we are about to fill it via I/O
5724 *
5725 * absent pages should be left BUSY
5726 * to prevent them from being faulted
5727 * into an address space before we've
5728 * had a chance to complete the I/O on
5729 * them since they may contain info that
5730 * shouldn't be seen by the faulting task
5731 */
5732 }
5733 /*
5734 * Release paging references and
5735 * top-level placeholder page, if any.
5736 */
5737 if (top_page != VM_PAGE_NULL) {
5738 vm_object_t local_object;
5739
5740 local_object = top_page->object;
5741
5742 if (top_page->object != dst_page->object) {
5743 vm_object_lock(local_object);
5744 VM_PAGE_FREE(top_page);
5745 vm_object_paging_end(local_object);
5746 vm_object_unlock(local_object);
5747 } else {
5748 VM_PAGE_FREE(top_page);
5749 vm_object_paging_end(local_object);
5750 }
5751 }
5752 vm_object_paging_end(object);
5753 break;
5754
5755 case VM_FAULT_RETRY:
5756 vm_object_lock(object);
5757 break;
5758
5759 case VM_FAULT_FICTITIOUS_SHORTAGE:
5760 vm_page_more_fictitious();
5761
5762 vm_object_lock(object);
5763 break;
5764
5765 case VM_FAULT_MEMORY_SHORTAGE:
5766 if (vm_page_wait(interruptible)) {
5767 vm_object_lock(object);
5768 break;
5769 }
5770 /* fall thru */
5771
5772 case VM_FAULT_INTERRUPTED:
5773 error_code = MACH_SEND_INTERRUPTED;
5774 case VM_FAULT_MEMORY_ERROR:
5775 memory_error:
5776 ret = (error_code ? error_code: KERN_MEMORY_ERROR);
5777
5778 vm_object_lock(object);
5779 goto return_err;
5780
5781 case VM_FAULT_SUCCESS_NO_VM_PAGE:
5782 /* success but no page: fail */
5783 vm_object_paging_end(object);
5784 vm_object_unlock(object);
5785 goto memory_error;
5786
5787 default:
5788 panic("vm_object_iopl_request: unexpected error"
5789 " 0x%x from vm_fault_page()\n", result);
5790 }
5791 } while (result != VM_FAULT_SUCCESS);
5792
5793 }
5794
5795 if (upl->flags & UPL_KERNEL_OBJECT)
5796 goto record_phys_addr;
5797
5798 if (dst_page->cleaning) {
5799 /*
5800 * Someone else is cleaning this page in place.as
5801 * In theory, we should be able to proceed and use this
5802 * page but they'll probably end up clearing the "busy"
5803 * bit on it in upl_commit_range() but they didn't set
5804 * it, so they would clear our "busy" bit and open
5805 * us to race conditions.
5806 * We'd better wait for the cleaning to complete and
5807 * then try again.
5808 */
5809 vm_object_iopl_request_sleep_for_cleaning++;
5810 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5811 continue;
5812 }
5813 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
5814 dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
5815 vm_page_t low_page;
5816 int refmod;
5817
5818 /*
5819 * support devices that can't DMA above 32 bits
5820 * by substituting pages from a pool of low address
5821 * memory for any pages we find above the 4G mark
5822 * can't substitute if the page is already wired because
5823 * we don't know whether that physical address has been
5824 * handed out to some other 64 bit capable DMA device to use
5825 */
5826 if (VM_PAGE_WIRED(dst_page)) {
5827 ret = KERN_PROTECTION_FAILURE;
5828 goto return_err;
5829 }
5830 low_page = vm_page_grablo();
5831
5832 if (low_page == VM_PAGE_NULL) {
5833 ret = KERN_RESOURCE_SHORTAGE;
5834 goto return_err;
5835 }
5836 /*
5837 * from here until the vm_page_replace completes
5838 * we musn't drop the object lock... we don't
5839 * want anyone refaulting this page in and using
5840 * it after we disconnect it... we want the fault
5841 * to find the new page being substituted.
5842 */
5843 if (dst_page->pmapped)
5844 refmod = pmap_disconnect(dst_page->phys_page);
5845 else
5846 refmod = 0;
5847
5848 if ( !dst_page->absent)
5849 vm_page_copy(dst_page, low_page);
5850
5851 low_page->reference = dst_page->reference;
5852 low_page->dirty = dst_page->dirty;
5853 low_page->absent = dst_page->absent;
5854
5855 if (refmod & VM_MEM_REFERENCED)
5856 low_page->reference = TRUE;
5857 if (refmod & VM_MEM_MODIFIED)
5858 low_page->dirty = TRUE;
5859
5860 vm_page_replace(low_page, object, dst_offset);
5861
5862 dst_page = low_page;
5863 /*
5864 * vm_page_grablo returned the page marked
5865 * BUSY... we don't need a PAGE_WAKEUP_DONE
5866 * here, because we've never dropped the object lock
5867 */
5868 if ( !dst_page->absent)
5869 dst_page->busy = FALSE;
5870 }
5871 if ( !dst_page->busy)
5872 dwp->dw_mask |= DW_vm_page_wire;
5873
5874 if (cntrl_flags & UPL_BLOCK_ACCESS) {
5875 /*
5876 * Mark the page "busy" to block any future page fault
5877 * on this page. We'll also remove the mapping
5878 * of all these pages before leaving this routine.
5879 */
5880 assert(!dst_page->fictitious);
5881 dst_page->busy = TRUE;
5882 }
5883 /*
5884 * expect the page to be used
5885 * page queues lock must be held to set 'reference'
5886 */
5887 dwp->dw_mask |= DW_set_reference;
5888
5889 if (!(cntrl_flags & UPL_COPYOUT_FROM))
5890 dst_page->dirty = TRUE;
5891 record_phys_addr:
5892 if (dst_page->busy)
5893 upl->flags |= UPL_HAS_BUSY;
5894
5895 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5896 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5897 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5898
5899 if (dst_page->phys_page > upl->highest_page)
5900 upl->highest_page = dst_page->phys_page;
5901
5902 if (user_page_list) {
5903 user_page_list[entry].phys_addr = dst_page->phys_page;
5904 user_page_list[entry].pageout = dst_page->pageout;
5905 user_page_list[entry].absent = dst_page->absent;
5906 user_page_list[entry].dirty = dst_page->dirty;
5907 user_page_list[entry].precious = dst_page->precious;
5908 user_page_list[entry].device = FALSE;
5909 if (dst_page->clustered == TRUE)
5910 user_page_list[entry].speculative = dst_page->speculative;
5911 else
5912 user_page_list[entry].speculative = FALSE;
5913 user_page_list[entry].cs_validated = dst_page->cs_validated;
5914 user_page_list[entry].cs_tainted = dst_page->cs_tainted;
5915 }
5916 if (object != kernel_object) {
5917 /*
5918 * someone is explicitly grabbing this page...
5919 * update clustered and speculative state
5920 *
5921 */
5922 VM_PAGE_CONSUME_CLUSTERED(dst_page);
5923 }
5924 entry++;
5925 dst_offset += PAGE_SIZE_64;
5926 xfer_size -= PAGE_SIZE;
5927
5928 if (dwp->dw_mask) {
5929 if (dst_page->busy == FALSE) {
5930 /*
5931 * dw_do_work may need to drop the object lock
5932 * if it does, we need the pages it's looking at to
5933 * be held stable via the busy bit.
5934 */
5935 dst_page->busy = TRUE;
5936 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
5937 }
5938 dwp->dw_m = dst_page;
5939 dwp++;
5940 dw_count++;
5941
5942 if (dw_count >= DELAYED_WORK_LIMIT) {
5943 dw_do_work(object, &dw_array[0], dw_count);
5944
5945 dwp = &dw_array[0];
5946 dw_count = 0;
5947 }
5948 }
5949 }
5950 if (dw_count)
5951 dw_do_work(object, &dw_array[0], dw_count);
5952
5953 if (page_list_count != NULL) {
5954 if (upl->flags & UPL_INTERNAL)
5955 *page_list_count = 0;
5956 else if (*page_list_count > entry)
5957 *page_list_count = entry;
5958 }
5959 vm_object_unlock(object);
5960
5961 if (cntrl_flags & UPL_BLOCK_ACCESS) {
5962 /*
5963 * We've marked all the pages "busy" so that future
5964 * page faults will block.
5965 * Now remove the mapping for these pages, so that they
5966 * can't be accessed without causing a page fault.
5967 */
5968 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
5969 PMAP_NULL, 0, VM_PROT_NONE);
5970 assert(!object->blocked_access);
5971 object->blocked_access = TRUE;
5972 }
5973 return KERN_SUCCESS;
5974
5975 return_err:
5976 dw_index = 0;
5977
5978 for (; offset < dst_offset; offset += PAGE_SIZE) {
5979 boolean_t need_unwire;
5980
5981 dst_page = vm_page_lookup(object, offset);
5982
5983 if (dst_page == VM_PAGE_NULL)
5984 panic("vm_object_iopl_request: Wired page missing. \n");
5985
5986 /*
5987 * if we've already processed this page in an earlier
5988 * dw_do_work, we need to undo the wiring... we will
5989 * leave the dirty and reference bits on if they
5990 * were set, since we don't have a good way of knowing
5991 * what the previous state was and we won't get here
5992 * under any normal circumstances... we will always
5993 * clear BUSY and wakeup any waiters via vm_page_free
5994 * or PAGE_WAKEUP_DONE
5995 */
5996 need_unwire = TRUE;
5997
5998 if (dw_count) {
5999 if (dw_array[dw_index].dw_m == dst_page) {
6000 /*
6001 * still in the deferred work list
6002 * which means we haven't yet called
6003 * vm_page_wire on this page
6004 */
6005 need_unwire = FALSE;
6006
6007 dw_index++;
6008 dw_count--;
6009 }
6010 }
6011 vm_page_lock_queues();
6012
6013 if (dst_page->absent) {
6014 vm_page_free(dst_page);
6015
6016 need_unwire = FALSE;
6017 } else {
6018 if (need_unwire == TRUE)
6019 vm_page_unwire(dst_page, TRUE);
6020
6021 PAGE_WAKEUP_DONE(dst_page);
6022 }
6023 vm_page_unlock_queues();
6024
6025 if (need_unwire == TRUE)
6026 VM_STAT_INCR(reactivations);
6027 }
6028 #if UPL_DEBUG
6029 upl->upl_state = 2;
6030 #endif
6031 if (! (upl->flags & UPL_KERNEL_OBJECT)) {
6032 vm_object_activity_end(object);
6033 }
6034 vm_object_unlock(object);
6035 upl_destroy(upl);
6036
6037 return ret;
6038 }
6039
6040 kern_return_t
6041 upl_transpose(
6042 upl_t upl1,
6043 upl_t upl2)
6044 {
6045 kern_return_t retval;
6046 boolean_t upls_locked;
6047 vm_object_t object1, object2;
6048
6049 if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2 || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR) || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) {
6050 return KERN_INVALID_ARGUMENT;
6051 }
6052
6053 upls_locked = FALSE;
6054
6055 /*
6056 * Since we need to lock both UPLs at the same time,
6057 * avoid deadlocks by always taking locks in the same order.
6058 */
6059 if (upl1 < upl2) {
6060 upl_lock(upl1);
6061 upl_lock(upl2);
6062 } else {
6063 upl_lock(upl2);
6064 upl_lock(upl1);
6065 }
6066 upls_locked = TRUE; /* the UPLs will need to be unlocked */
6067
6068 object1 = upl1->map_object;
6069 object2 = upl2->map_object;
6070
6071 if (upl1->offset != 0 || upl2->offset != 0 ||
6072 upl1->size != upl2->size) {
6073 /*
6074 * We deal only with full objects, not subsets.
6075 * That's because we exchange the entire backing store info
6076 * for the objects: pager, resident pages, etc... We can't do
6077 * only part of it.
6078 */
6079 retval = KERN_INVALID_VALUE;
6080 goto done;
6081 }
6082
6083 /*
6084 * Tranpose the VM objects' backing store.
6085 */
6086 retval = vm_object_transpose(object1, object2,
6087 (vm_object_size_t) upl1->size);
6088
6089 if (retval == KERN_SUCCESS) {
6090 /*
6091 * Make each UPL point to the correct VM object, i.e. the
6092 * object holding the pages that the UPL refers to...
6093 */
6094 #if UPL_DEBUG
6095 queue_remove(&object1->uplq, upl1, upl_t, uplq);
6096 queue_remove(&object2->uplq, upl2, upl_t, uplq);
6097 #endif
6098 upl1->map_object = object2;
6099 upl2->map_object = object1;
6100 #if UPL_DEBUG
6101 queue_enter(&object1->uplq, upl2, upl_t, uplq);
6102 queue_enter(&object2->uplq, upl1, upl_t, uplq);
6103 #endif
6104 }
6105
6106 done:
6107 /*
6108 * Cleanup.
6109 */
6110 if (upls_locked) {
6111 upl_unlock(upl1);
6112 upl_unlock(upl2);
6113 upls_locked = FALSE;
6114 }
6115
6116 return retval;
6117 }
6118
6119 /*
6120 * ENCRYPTED SWAP:
6121 *
6122 * Rationale: the user might have some encrypted data on disk (via
6123 * FileVault or any other mechanism). That data is then decrypted in
6124 * memory, which is safe as long as the machine is secure. But that
6125 * decrypted data in memory could be paged out to disk by the default
6126 * pager. The data would then be stored on disk in clear (not encrypted)
6127 * and it could be accessed by anyone who gets physical access to the
6128 * disk (if the laptop or the disk gets stolen for example). This weakens
6129 * the security offered by FileVault.
6130 *
6131 * Solution: the default pager will optionally request that all the
6132 * pages it gathers for pageout be encrypted, via the UPL interfaces,
6133 * before it sends this UPL to disk via the vnode_pageout() path.
6134 *
6135 * Notes:
6136 *
6137 * To avoid disrupting the VM LRU algorithms, we want to keep the
6138 * clean-in-place mechanisms, which allow us to send some extra pages to
6139 * swap (clustering) without actually removing them from the user's
6140 * address space. We don't want the user to unknowingly access encrypted
6141 * data, so we have to actually remove the encrypted pages from the page
6142 * table. When the user accesses the data, the hardware will fail to
6143 * locate the virtual page in its page table and will trigger a page
6144 * fault. We can then decrypt the page and enter it in the page table
6145 * again. Whenever we allow the user to access the contents of a page,
6146 * we have to make sure it's not encrypted.
6147 *
6148 *
6149 */
6150 /*
6151 * ENCRYPTED SWAP:
6152 * Reserve of virtual addresses in the kernel address space.
6153 * We need to map the physical pages in the kernel, so that we
6154 * can call the encryption/decryption routines with a kernel
6155 * virtual address. We keep this pool of pre-allocated kernel
6156 * virtual addresses so that we don't have to scan the kernel's
6157 * virtaul address space each time we need to encrypt or decrypt
6158 * a physical page.
6159 * It would be nice to be able to encrypt and decrypt in physical
6160 * mode but that might not always be more efficient...
6161 */
6162 decl_simple_lock_data(,vm_paging_lock)
6163 #define VM_PAGING_NUM_PAGES 64
6164 vm_map_offset_t vm_paging_base_address = 0;
6165 boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
6166 int vm_paging_max_index = 0;
6167 int vm_paging_page_waiter = 0;
6168 int vm_paging_page_waiter_total = 0;
6169 unsigned long vm_paging_no_kernel_page = 0;
6170 unsigned long vm_paging_objects_mapped = 0;
6171 unsigned long vm_paging_pages_mapped = 0;
6172 unsigned long vm_paging_objects_mapped_slow = 0;
6173 unsigned long vm_paging_pages_mapped_slow = 0;
6174
6175 void
6176 vm_paging_map_init(void)
6177 {
6178 kern_return_t kr;
6179 vm_map_offset_t page_map_offset;
6180 vm_map_entry_t map_entry;
6181
6182 assert(vm_paging_base_address == 0);
6183
6184 /*
6185 * Initialize our pool of pre-allocated kernel
6186 * virtual addresses.
6187 */
6188 page_map_offset = 0;
6189 kr = vm_map_find_space(kernel_map,
6190 &page_map_offset,
6191 VM_PAGING_NUM_PAGES * PAGE_SIZE,
6192 0,
6193 0,
6194 &map_entry);
6195 if (kr != KERN_SUCCESS) {
6196 panic("vm_paging_map_init: kernel_map full\n");
6197 }
6198 map_entry->object.vm_object = kernel_object;
6199 map_entry->offset = page_map_offset;
6200 vm_object_reference(kernel_object);
6201 vm_map_unlock(kernel_map);
6202
6203 assert(vm_paging_base_address == 0);
6204 vm_paging_base_address = page_map_offset;
6205 }
6206
6207 /*
6208 * ENCRYPTED SWAP:
6209 * vm_paging_map_object:
6210 * Maps part of a VM object's pages in the kernel
6211 * virtual address space, using the pre-allocated
6212 * kernel virtual addresses, if possible.
6213 * Context:
6214 * The VM object is locked. This lock will get
6215 * dropped and re-acquired though, so the caller
6216 * must make sure the VM object is kept alive
6217 * (by holding a VM map that has a reference
6218 * on it, for example, or taking an extra reference).
6219 * The page should also be kept busy to prevent
6220 * it from being reclaimed.
6221 */
6222 kern_return_t
6223 vm_paging_map_object(
6224 vm_map_offset_t *address,
6225 vm_page_t page,
6226 vm_object_t object,
6227 vm_object_offset_t offset,
6228 vm_map_size_t *size,
6229 vm_prot_t protection,
6230 boolean_t can_unlock_object)
6231 {
6232 kern_return_t kr;
6233 vm_map_offset_t page_map_offset;
6234 vm_map_size_t map_size;
6235 vm_object_offset_t object_offset;
6236 int i;
6237
6238
6239 if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
6240 assert(page->busy);
6241 /*
6242 * Use one of the pre-allocated kernel virtual addresses
6243 * and just enter the VM page in the kernel address space
6244 * at that virtual address.
6245 */
6246 simple_lock(&vm_paging_lock);
6247
6248 /*
6249 * Try and find an available kernel virtual address
6250 * from our pre-allocated pool.
6251 */
6252 page_map_offset = 0;
6253 for (;;) {
6254 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
6255 if (vm_paging_page_inuse[i] == FALSE) {
6256 page_map_offset =
6257 vm_paging_base_address +
6258 (i * PAGE_SIZE);
6259 break;
6260 }
6261 }
6262 if (page_map_offset != 0) {
6263 /* found a space to map our page ! */
6264 break;
6265 }
6266
6267 if (can_unlock_object) {
6268 /*
6269 * If we can afford to unlock the VM object,
6270 * let's take the slow path now...
6271 */
6272 break;
6273 }
6274 /*
6275 * We can't afford to unlock the VM object, so
6276 * let's wait for a space to become available...
6277 */
6278 vm_paging_page_waiter_total++;
6279 vm_paging_page_waiter++;
6280 thread_sleep_fast_usimple_lock(&vm_paging_page_waiter,
6281 &vm_paging_lock,
6282 THREAD_UNINT);
6283 vm_paging_page_waiter--;
6284 /* ... and try again */
6285 }
6286
6287 if (page_map_offset != 0) {
6288 /*
6289 * We found a kernel virtual address;
6290 * map the physical page to that virtual address.
6291 */
6292 if (i > vm_paging_max_index) {
6293 vm_paging_max_index = i;
6294 }
6295 vm_paging_page_inuse[i] = TRUE;
6296 simple_unlock(&vm_paging_lock);
6297
6298 if (page->pmapped == FALSE) {
6299 pmap_sync_page_data_phys(page->phys_page);
6300 }
6301 page->pmapped = TRUE;
6302
6303 /*
6304 * Keep the VM object locked over the PMAP_ENTER
6305 * and the actual use of the page by the kernel,
6306 * or this pmap mapping might get undone by a
6307 * vm_object_pmap_protect() call...
6308 */
6309 PMAP_ENTER(kernel_pmap,
6310 page_map_offset,
6311 page,
6312 protection,
6313 ((int) page->object->wimg_bits &
6314 VM_WIMG_MASK),
6315 TRUE);
6316 vm_paging_objects_mapped++;
6317 vm_paging_pages_mapped++;
6318 *address = page_map_offset;
6319
6320 /* all done and mapped, ready to use ! */
6321 return KERN_SUCCESS;
6322 }
6323
6324 /*
6325 * We ran out of pre-allocated kernel virtual
6326 * addresses. Just map the page in the kernel
6327 * the slow and regular way.
6328 */
6329 vm_paging_no_kernel_page++;
6330 simple_unlock(&vm_paging_lock);
6331 }
6332
6333 if (! can_unlock_object) {
6334 return KERN_NOT_SUPPORTED;
6335 }
6336
6337 object_offset = vm_object_trunc_page(offset);
6338 map_size = vm_map_round_page(*size);
6339
6340 /*
6341 * Try and map the required range of the object
6342 * in the kernel_map
6343 */
6344
6345 vm_object_reference_locked(object); /* for the map entry */
6346 vm_object_unlock(object);
6347
6348 kr = vm_map_enter(kernel_map,
6349 address,
6350 map_size,
6351 0,
6352 VM_FLAGS_ANYWHERE,
6353 object,
6354 object_offset,
6355 FALSE,
6356 protection,
6357 VM_PROT_ALL,
6358 VM_INHERIT_NONE);
6359 if (kr != KERN_SUCCESS) {
6360 *address = 0;
6361 *size = 0;
6362 vm_object_deallocate(object); /* for the map entry */
6363 vm_object_lock(object);
6364 return kr;
6365 }
6366
6367 *size = map_size;
6368
6369 /*
6370 * Enter the mapped pages in the page table now.
6371 */
6372 vm_object_lock(object);
6373 /*
6374 * VM object must be kept locked from before PMAP_ENTER()
6375 * until after the kernel is done accessing the page(s).
6376 * Otherwise, the pmap mappings in the kernel could be
6377 * undone by a call to vm_object_pmap_protect().
6378 */
6379
6380 for (page_map_offset = 0;
6381 map_size != 0;
6382 map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
6383 unsigned int cache_attr;
6384
6385 page = vm_page_lookup(object, offset + page_map_offset);
6386 if (page == VM_PAGE_NULL) {
6387 printf("vm_paging_map_object: no page !?");
6388 vm_object_unlock(object);
6389 kr = vm_map_remove(kernel_map, *address, *size,
6390 VM_MAP_NO_FLAGS);
6391 assert(kr == KERN_SUCCESS);
6392 *address = 0;
6393 *size = 0;
6394 vm_object_lock(object);
6395 return KERN_MEMORY_ERROR;
6396 }
6397 if (page->pmapped == FALSE) {
6398 pmap_sync_page_data_phys(page->phys_page);
6399 }
6400 page->pmapped = TRUE;
6401 cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
6402
6403 //assert(pmap_verify_free(page->phys_page));
6404 PMAP_ENTER(kernel_pmap,
6405 *address + page_map_offset,
6406 page,
6407 protection,
6408 cache_attr,
6409 TRUE);
6410 }
6411
6412 vm_paging_objects_mapped_slow++;
6413 vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
6414
6415 return KERN_SUCCESS;
6416 }
6417
6418 /*
6419 * ENCRYPTED SWAP:
6420 * vm_paging_unmap_object:
6421 * Unmaps part of a VM object's pages from the kernel
6422 * virtual address space.
6423 * Context:
6424 * The VM object is locked. This lock will get
6425 * dropped and re-acquired though.
6426 */
6427 void
6428 vm_paging_unmap_object(
6429 vm_object_t object,
6430 vm_map_offset_t start,
6431 vm_map_offset_t end)
6432 {
6433 kern_return_t kr;
6434 int i;
6435
6436 if ((vm_paging_base_address == 0) ||
6437 (start < vm_paging_base_address) ||
6438 (end > (vm_paging_base_address
6439 + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
6440 /*
6441 * We didn't use our pre-allocated pool of
6442 * kernel virtual address. Deallocate the
6443 * virtual memory.
6444 */
6445 if (object != VM_OBJECT_NULL) {
6446 vm_object_unlock(object);
6447 }
6448 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
6449 if (object != VM_OBJECT_NULL) {
6450 vm_object_lock(object);
6451 }
6452 assert(kr == KERN_SUCCESS);
6453 } else {
6454 /*
6455 * We used a kernel virtual address from our
6456 * pre-allocated pool. Put it back in the pool
6457 * for next time.
6458 */
6459 assert(end - start == PAGE_SIZE);
6460 i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
6461 assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
6462
6463 /* undo the pmap mapping */
6464 pmap_remove(kernel_pmap, start, end);
6465
6466 simple_lock(&vm_paging_lock);
6467 vm_paging_page_inuse[i] = FALSE;
6468 if (vm_paging_page_waiter) {
6469 thread_wakeup(&vm_paging_page_waiter);
6470 }
6471 simple_unlock(&vm_paging_lock);
6472 }
6473 }
6474
6475 #if CRYPTO
6476 /*
6477 * Encryption data.
6478 * "iv" is the "initial vector". Ideally, we want to
6479 * have a different one for each page we encrypt, so that
6480 * crackers can't find encryption patterns too easily.
6481 */
6482 #define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
6483 boolean_t swap_crypt_ctx_initialized = FALSE;
6484 aes_32t swap_crypt_key[8]; /* big enough for a 256 key */
6485 aes_ctx swap_crypt_ctx;
6486 const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
6487
6488 #if DEBUG
6489 boolean_t swap_crypt_ctx_tested = FALSE;
6490 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
6491 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
6492 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
6493 #endif /* DEBUG */
6494
6495 /*
6496 * Initialize the encryption context: key and key size.
6497 */
6498 void swap_crypt_ctx_initialize(void); /* forward */
6499 void
6500 swap_crypt_ctx_initialize(void)
6501 {
6502 unsigned int i;
6503
6504 /*
6505 * No need for locking to protect swap_crypt_ctx_initialized
6506 * because the first use of encryption will come from the
6507 * pageout thread (we won't pagein before there's been a pageout)
6508 * and there's only one pageout thread.
6509 */
6510 if (swap_crypt_ctx_initialized == FALSE) {
6511 for (i = 0;
6512 i < (sizeof (swap_crypt_key) /
6513 sizeof (swap_crypt_key[0]));
6514 i++) {
6515 swap_crypt_key[i] = random();
6516 }
6517 aes_encrypt_key((const unsigned char *) swap_crypt_key,
6518 SWAP_CRYPT_AES_KEY_SIZE,
6519 &swap_crypt_ctx.encrypt);
6520 aes_decrypt_key((const unsigned char *) swap_crypt_key,
6521 SWAP_CRYPT_AES_KEY_SIZE,
6522 &swap_crypt_ctx.decrypt);
6523 swap_crypt_ctx_initialized = TRUE;
6524 }
6525
6526 #if DEBUG
6527 /*
6528 * Validate the encryption algorithms.
6529 */
6530 if (swap_crypt_ctx_tested == FALSE) {
6531 /* initialize */
6532 for (i = 0; i < 4096; i++) {
6533 swap_crypt_test_page_ref[i] = (char) i;
6534 }
6535 /* encrypt */
6536 aes_encrypt_cbc(swap_crypt_test_page_ref,
6537 swap_crypt_null_iv,
6538 PAGE_SIZE / AES_BLOCK_SIZE,
6539 swap_crypt_test_page_encrypt,
6540 &swap_crypt_ctx.encrypt);
6541 /* decrypt */
6542 aes_decrypt_cbc(swap_crypt_test_page_encrypt,
6543 swap_crypt_null_iv,
6544 PAGE_SIZE / AES_BLOCK_SIZE,
6545 swap_crypt_test_page_decrypt,
6546 &swap_crypt_ctx.decrypt);
6547 /* compare result with original */
6548 for (i = 0; i < 4096; i ++) {
6549 if (swap_crypt_test_page_decrypt[i] !=
6550 swap_crypt_test_page_ref[i]) {
6551 panic("encryption test failed");
6552 }
6553 }
6554
6555 /* encrypt again */
6556 aes_encrypt_cbc(swap_crypt_test_page_decrypt,
6557 swap_crypt_null_iv,
6558 PAGE_SIZE / AES_BLOCK_SIZE,
6559 swap_crypt_test_page_decrypt,
6560 &swap_crypt_ctx.encrypt);
6561 /* decrypt in place */
6562 aes_decrypt_cbc(swap_crypt_test_page_decrypt,
6563 swap_crypt_null_iv,
6564 PAGE_SIZE / AES_BLOCK_SIZE,
6565 swap_crypt_test_page_decrypt,
6566 &swap_crypt_ctx.decrypt);
6567 for (i = 0; i < 4096; i ++) {
6568 if (swap_crypt_test_page_decrypt[i] !=
6569 swap_crypt_test_page_ref[i]) {
6570 panic("in place encryption test failed");
6571 }
6572 }
6573
6574 swap_crypt_ctx_tested = TRUE;
6575 }
6576 #endif /* DEBUG */
6577 }
6578
6579 /*
6580 * ENCRYPTED SWAP:
6581 * vm_page_encrypt:
6582 * Encrypt the given page, for secure paging.
6583 * The page might already be mapped at kernel virtual
6584 * address "kernel_mapping_offset". Otherwise, we need
6585 * to map it.
6586 *
6587 * Context:
6588 * The page's object is locked, but this lock will be released
6589 * and re-acquired.
6590 * The page is busy and not accessible by users (not entered in any pmap).
6591 */
6592 void
6593 vm_page_encrypt(
6594 vm_page_t page,
6595 vm_map_offset_t kernel_mapping_offset)
6596 {
6597 kern_return_t kr;
6598 vm_map_size_t kernel_mapping_size;
6599 vm_offset_t kernel_vaddr;
6600 union {
6601 unsigned char aes_iv[AES_BLOCK_SIZE];
6602 struct {
6603 memory_object_t pager_object;
6604 vm_object_offset_t paging_offset;
6605 } vm;
6606 } encrypt_iv;
6607
6608 if (! vm_pages_encrypted) {
6609 vm_pages_encrypted = TRUE;
6610 }
6611
6612 assert(page->busy);
6613 assert(page->dirty || page->precious);
6614
6615 if (page->encrypted) {
6616 /*
6617 * Already encrypted: no need to do it again.
6618 */
6619 vm_page_encrypt_already_encrypted_counter++;
6620 return;
6621 }
6622 ASSERT_PAGE_DECRYPTED(page);
6623
6624 /*
6625 * Take a paging-in-progress reference to keep the object
6626 * alive even if we have to unlock it (in vm_paging_map_object()
6627 * for example)...
6628 */
6629 vm_object_paging_begin(page->object);
6630
6631 if (kernel_mapping_offset == 0) {
6632 /*
6633 * The page hasn't already been mapped in kernel space
6634 * by the caller. Map it now, so that we can access
6635 * its contents and encrypt them.
6636 */
6637 kernel_mapping_size = PAGE_SIZE;
6638 kr = vm_paging_map_object(&kernel_mapping_offset,
6639 page,
6640 page->object,
6641 page->offset,
6642 &kernel_mapping_size,
6643 VM_PROT_READ | VM_PROT_WRITE,
6644 FALSE);
6645 if (kr != KERN_SUCCESS) {
6646 panic("vm_page_encrypt: "
6647 "could not map page in kernel: 0x%x\n",
6648 kr);
6649 }
6650 } else {
6651 kernel_mapping_size = 0;
6652 }
6653 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
6654
6655 if (swap_crypt_ctx_initialized == FALSE) {
6656 swap_crypt_ctx_initialize();
6657 }
6658 assert(swap_crypt_ctx_initialized);
6659
6660 /*
6661 * Prepare an "initial vector" for the encryption.
6662 * We use the "pager" and the "paging_offset" for that
6663 * page to obfuscate the encrypted data a bit more and
6664 * prevent crackers from finding patterns that they could
6665 * use to break the key.
6666 */
6667 bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
6668 encrypt_iv.vm.pager_object = page->object->pager;
6669 encrypt_iv.vm.paging_offset =
6670 page->object->paging_offset + page->offset;
6671
6672 /* encrypt the "initial vector" */
6673 aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
6674 swap_crypt_null_iv,
6675 1,
6676 &encrypt_iv.aes_iv[0],
6677 &swap_crypt_ctx.encrypt);
6678
6679 /*
6680 * Encrypt the page.
6681 */
6682 aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
6683 &encrypt_iv.aes_iv[0],
6684 PAGE_SIZE / AES_BLOCK_SIZE,
6685 (unsigned char *) kernel_vaddr,
6686 &swap_crypt_ctx.encrypt);
6687
6688 vm_page_encrypt_counter++;
6689
6690 /*
6691 * Unmap the page from the kernel's address space,
6692 * if we had to map it ourselves. Otherwise, let
6693 * the caller undo the mapping if needed.
6694 */
6695 if (kernel_mapping_size != 0) {
6696 vm_paging_unmap_object(page->object,
6697 kernel_mapping_offset,
6698 kernel_mapping_offset + kernel_mapping_size);
6699 }
6700
6701 /*
6702 * Clear the "reference" and "modified" bits.
6703 * This should clean up any impact the encryption had
6704 * on them.
6705 * The page was kept busy and disconnected from all pmaps,
6706 * so it can't have been referenced or modified from user
6707 * space.
6708 * The software bits will be reset later after the I/O
6709 * has completed (in upl_commit_range()).
6710 */
6711 pmap_clear_refmod(page->phys_page, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
6712
6713 page->encrypted = TRUE;
6714
6715 vm_object_paging_end(page->object);
6716 }
6717
6718 /*
6719 * ENCRYPTED SWAP:
6720 * vm_page_decrypt:
6721 * Decrypt the given page.
6722 * The page might already be mapped at kernel virtual
6723 * address "kernel_mapping_offset". Otherwise, we need
6724 * to map it.
6725 *
6726 * Context:
6727 * The page's VM object is locked but will be unlocked and relocked.
6728 * The page is busy and not accessible by users (not entered in any pmap).
6729 */
6730 void
6731 vm_page_decrypt(
6732 vm_page_t page,
6733 vm_map_offset_t kernel_mapping_offset)
6734 {
6735 kern_return_t kr;
6736 vm_map_size_t kernel_mapping_size;
6737 vm_offset_t kernel_vaddr;
6738 union {
6739 unsigned char aes_iv[AES_BLOCK_SIZE];
6740 struct {
6741 memory_object_t pager_object;
6742 vm_object_offset_t paging_offset;
6743 } vm;
6744 } decrypt_iv;
6745
6746 assert(page->busy);
6747 assert(page->encrypted);
6748
6749 /*
6750 * Take a paging-in-progress reference to keep the object
6751 * alive even if we have to unlock it (in vm_paging_map_object()
6752 * for example)...
6753 */
6754 vm_object_paging_begin(page->object);
6755
6756 if (kernel_mapping_offset == 0) {
6757 /*
6758 * The page hasn't already been mapped in kernel space
6759 * by the caller. Map it now, so that we can access
6760 * its contents and decrypt them.
6761 */
6762 kernel_mapping_size = PAGE_SIZE;
6763 kr = vm_paging_map_object(&kernel_mapping_offset,
6764 page,
6765 page->object,
6766 page->offset,
6767 &kernel_mapping_size,
6768 VM_PROT_READ | VM_PROT_WRITE,
6769 FALSE);
6770 if (kr != KERN_SUCCESS) {
6771 panic("vm_page_decrypt: "
6772 "could not map page in kernel: 0x%x\n",
6773 kr);
6774 }
6775 } else {
6776 kernel_mapping_size = 0;
6777 }
6778 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
6779
6780 assert(swap_crypt_ctx_initialized);
6781
6782 /*
6783 * Prepare an "initial vector" for the decryption.
6784 * It has to be the same as the "initial vector" we
6785 * used to encrypt that page.
6786 */
6787 bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
6788 decrypt_iv.vm.pager_object = page->object->pager;
6789 decrypt_iv.vm.paging_offset =
6790 page->object->paging_offset + page->offset;
6791
6792 /* encrypt the "initial vector" */
6793 aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
6794 swap_crypt_null_iv,
6795 1,
6796 &decrypt_iv.aes_iv[0],
6797 &swap_crypt_ctx.encrypt);
6798
6799 /*
6800 * Decrypt the page.
6801 */
6802 aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
6803 &decrypt_iv.aes_iv[0],
6804 PAGE_SIZE / AES_BLOCK_SIZE,
6805 (unsigned char *) kernel_vaddr,
6806 &swap_crypt_ctx.decrypt);
6807 vm_page_decrypt_counter++;
6808
6809 /*
6810 * Unmap the page from the kernel's address space,
6811 * if we had to map it ourselves. Otherwise, let
6812 * the caller undo the mapping if needed.
6813 */
6814 if (kernel_mapping_size != 0) {
6815 vm_paging_unmap_object(page->object,
6816 kernel_vaddr,
6817 kernel_vaddr + PAGE_SIZE);
6818 }
6819
6820 /*
6821 * After decryption, the page is actually clean.
6822 * It was encrypted as part of paging, which "cleans"
6823 * the "dirty" pages.
6824 * Noone could access it after it was encrypted
6825 * and the decryption doesn't count.
6826 */
6827 page->dirty = FALSE;
6828 assert (page->cs_validated == FALSE);
6829 pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
6830 page->encrypted = FALSE;
6831
6832 /*
6833 * We've just modified the page's contents via the data cache and part
6834 * of the new contents might still be in the cache and not yet in RAM.
6835 * Since the page is now available and might get gathered in a UPL to
6836 * be part of a DMA transfer from a driver that expects the memory to
6837 * be coherent at this point, we have to flush the data cache.
6838 */
6839 pmap_sync_page_attributes_phys(page->phys_page);
6840 /*
6841 * Since the page is not mapped yet, some code might assume that it
6842 * doesn't need to invalidate the instruction cache when writing to
6843 * that page. That code relies on "pmapped" being FALSE, so that the
6844 * caches get synchronized when the page is first mapped.
6845 */
6846 assert(pmap_verify_free(page->phys_page));
6847 page->pmapped = FALSE;
6848 page->wpmapped = FALSE;
6849
6850 vm_object_paging_end(page->object);
6851 }
6852
6853 #if DEVELOPMENT || DEBUG
6854 unsigned long upl_encrypt_upls = 0;
6855 unsigned long upl_encrypt_pages = 0;
6856 #endif
6857
6858 /*
6859 * ENCRYPTED SWAP:
6860 *
6861 * upl_encrypt:
6862 * Encrypts all the pages in the UPL, within the specified range.
6863 *
6864 */
6865 void
6866 upl_encrypt(
6867 upl_t upl,
6868 upl_offset_t crypt_offset,
6869 upl_size_t crypt_size)
6870 {
6871 upl_size_t upl_size, subupl_size=crypt_size;
6872 upl_offset_t offset_in_upl, subupl_offset=crypt_offset;
6873 vm_object_t upl_object;
6874 vm_object_offset_t upl_offset;
6875 vm_page_t page;
6876 vm_object_t shadow_object;
6877 vm_object_offset_t shadow_offset;
6878 vm_object_offset_t paging_offset;
6879 vm_object_offset_t base_offset;
6880 int isVectorUPL = 0;
6881 upl_t vector_upl = NULL;
6882
6883 if((isVectorUPL = vector_upl_is_valid(upl)))
6884 vector_upl = upl;
6885
6886 process_upl_to_encrypt:
6887 if(isVectorUPL) {
6888 crypt_size = subupl_size;
6889 crypt_offset = subupl_offset;
6890 upl = vector_upl_subupl_byoffset(vector_upl, &crypt_offset, &crypt_size);
6891 if(upl == NULL)
6892 panic("upl_encrypt: Accessing a sub-upl that doesn't exist\n");
6893 subupl_size -= crypt_size;
6894 subupl_offset += crypt_size;
6895 }
6896
6897 #if DEVELOPMENT || DEBUG
6898 upl_encrypt_upls++;
6899 upl_encrypt_pages += crypt_size / PAGE_SIZE;
6900 #endif
6901 upl_object = upl->map_object;
6902 upl_offset = upl->offset;
6903 upl_size = upl->size;
6904
6905 vm_object_lock(upl_object);
6906
6907 /*
6908 * Find the VM object that contains the actual pages.
6909 */
6910 if (upl_object->pageout) {
6911 shadow_object = upl_object->shadow;
6912 /*
6913 * The offset in the shadow object is actually also
6914 * accounted for in upl->offset. It possibly shouldn't be
6915 * this way, but for now don't account for it twice.
6916 */
6917 shadow_offset = 0;
6918 assert(upl_object->paging_offset == 0); /* XXX ? */
6919 vm_object_lock(shadow_object);
6920 } else {
6921 shadow_object = upl_object;
6922 shadow_offset = 0;
6923 }
6924
6925 paging_offset = shadow_object->paging_offset;
6926 vm_object_paging_begin(shadow_object);
6927
6928 if (shadow_object != upl_object)
6929 vm_object_unlock(upl_object);
6930
6931
6932 base_offset = shadow_offset;
6933 base_offset += upl_offset;
6934 base_offset += crypt_offset;
6935 base_offset -= paging_offset;
6936
6937 assert(crypt_offset + crypt_size <= upl_size);
6938
6939 for (offset_in_upl = 0;
6940 offset_in_upl < crypt_size;
6941 offset_in_upl += PAGE_SIZE) {
6942 page = vm_page_lookup(shadow_object,
6943 base_offset + offset_in_upl);
6944 if (page == VM_PAGE_NULL) {
6945 panic("upl_encrypt: "
6946 "no page for (obj=%p,off=%lld+%d)!\n",
6947 shadow_object,
6948 base_offset,
6949 offset_in_upl);
6950 }
6951 /*
6952 * Disconnect the page from all pmaps, so that nobody can
6953 * access it while it's encrypted. After that point, all
6954 * accesses to this page will cause a page fault and block
6955 * while the page is busy being encrypted. After the
6956 * encryption completes, any access will cause a
6957 * page fault and the page gets decrypted at that time.
6958 */
6959 pmap_disconnect(page->phys_page);
6960 vm_page_encrypt(page, 0);
6961
6962 if (vm_object_lock_avoid(shadow_object)) {
6963 /*
6964 * Give vm_pageout_scan() a chance to convert more
6965 * pages from "clean-in-place" to "clean-and-free",
6966 * if it's interested in the same pages we selected
6967 * in this cluster.
6968 */
6969 vm_object_unlock(shadow_object);
6970 mutex_pause(2);
6971 vm_object_lock(shadow_object);
6972 }
6973 }
6974
6975 vm_object_paging_end(shadow_object);
6976 vm_object_unlock(shadow_object);
6977
6978 if(isVectorUPL && subupl_size)
6979 goto process_upl_to_encrypt;
6980 }
6981
6982 #else /* CRYPTO */
6983 void
6984 upl_encrypt(
6985 __unused upl_t upl,
6986 __unused upl_offset_t crypt_offset,
6987 __unused upl_size_t crypt_size)
6988 {
6989 }
6990
6991 void
6992 vm_page_encrypt(
6993 __unused vm_page_t page,
6994 __unused vm_map_offset_t kernel_mapping_offset)
6995 {
6996 }
6997
6998 void
6999 vm_page_decrypt(
7000 __unused vm_page_t page,
7001 __unused vm_map_offset_t kernel_mapping_offset)
7002 {
7003 }
7004
7005 #endif /* CRYPTO */
7006
7007 void
7008 vm_pageout_queue_steal(vm_page_t page, boolean_t queues_locked)
7009 {
7010 boolean_t pageout;
7011
7012 pageout = page->pageout;
7013
7014 page->list_req_pending = FALSE;
7015 page->cleaning = FALSE;
7016 page->pageout = FALSE;
7017
7018 if (!queues_locked) {
7019 vm_page_lockspin_queues();
7020 }
7021
7022 /*
7023 * need to drop the laundry count...
7024 * we may also need to remove it
7025 * from the I/O paging queue...
7026 * vm_pageout_throttle_up handles both cases
7027 *
7028 * the laundry and pageout_queue flags are cleared...
7029 */
7030 vm_pageout_throttle_up(page);
7031
7032 if (pageout == TRUE) {
7033 /*
7034 * toss the wire count we picked up
7035 * when we intially set this page up
7036 * to be cleaned...
7037 */
7038 vm_page_unwire(page, TRUE);
7039 }
7040 vm_page_steal_pageout_page++;
7041
7042 if (!queues_locked) {
7043 vm_page_unlock_queues();
7044 }
7045 }
7046
7047 upl_t
7048 vector_upl_create(vm_offset_t upl_offset)
7049 {
7050 int vector_upl_size = sizeof(struct _vector_upl);
7051 int i=0;
7052 upl_t upl;
7053 vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size);
7054
7055 upl = upl_create(0,UPL_VECTOR,0);
7056 upl->vector_upl = vector_upl;
7057 upl->offset = upl_offset;
7058 vector_upl->size = 0;
7059 vector_upl->offset = upl_offset;
7060 vector_upl->invalid_upls=0;
7061 vector_upl->num_upls=0;
7062 vector_upl->pagelist = NULL;
7063
7064 for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) {
7065 vector_upl->upl_iostates[i].size = 0;
7066 vector_upl->upl_iostates[i].offset = 0;
7067
7068 }
7069 return upl;
7070 }
7071
7072 void
7073 vector_upl_deallocate(upl_t upl)
7074 {
7075 if(upl) {
7076 vector_upl_t vector_upl = upl->vector_upl;
7077 if(vector_upl) {
7078 if(vector_upl->invalid_upls != vector_upl->num_upls)
7079 panic("Deallocating non-empty Vectored UPL\n");
7080 kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)));
7081 vector_upl->invalid_upls=0;
7082 vector_upl->num_upls = 0;
7083 vector_upl->pagelist = NULL;
7084 vector_upl->size = 0;
7085 vector_upl->offset = 0;
7086 kfree(vector_upl, sizeof(struct _vector_upl));
7087 vector_upl = (vector_upl_t)0xdeadbeef;
7088 }
7089 else
7090 panic("vector_upl_deallocate was passed a non-vectored upl\n");
7091 }
7092 else
7093 panic("vector_upl_deallocate was passed a NULL upl\n");
7094 }
7095
7096 boolean_t
7097 vector_upl_is_valid(upl_t upl)
7098 {
7099 if(upl && ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) {
7100 vector_upl_t vector_upl = upl->vector_upl;
7101 if(vector_upl == NULL || vector_upl == (vector_upl_t)0xdeadbeef || vector_upl == (vector_upl_t)0xfeedbeef)
7102 return FALSE;
7103 else
7104 return TRUE;
7105 }
7106 return FALSE;
7107 }
7108
7109 boolean_t
7110 vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size)
7111 {
7112 if(vector_upl_is_valid(upl)) {
7113 vector_upl_t vector_upl = upl->vector_upl;
7114
7115 if(vector_upl) {
7116 if(subupl) {
7117 if(io_size) {
7118 if(io_size < PAGE_SIZE)
7119 io_size = PAGE_SIZE;
7120 subupl->vector_upl = (void*)vector_upl;
7121 vector_upl->upl_elems[vector_upl->num_upls++] = subupl;
7122 vector_upl->size += io_size;
7123 upl->size += io_size;
7124 }
7125 else {
7126 uint32_t i=0,invalid_upls=0;
7127 for(i = 0; i < vector_upl->num_upls; i++) {
7128 if(vector_upl->upl_elems[i] == subupl)
7129 break;
7130 }
7131 if(i == vector_upl->num_upls)
7132 panic("Trying to remove sub-upl when none exists");
7133
7134 vector_upl->upl_elems[i] = NULL;
7135 invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1);
7136 if(invalid_upls == vector_upl->num_upls)
7137 return TRUE;
7138 else
7139 return FALSE;
7140 }
7141 }
7142 else
7143 panic("vector_upl_set_subupl was passed a NULL upl element\n");
7144 }
7145 else
7146 panic("vector_upl_set_subupl was passed a non-vectored upl\n");
7147 }
7148 else
7149 panic("vector_upl_set_subupl was passed a NULL upl\n");
7150
7151 return FALSE;
7152 }
7153
7154 void
7155 vector_upl_set_pagelist(upl_t upl)
7156 {
7157 if(vector_upl_is_valid(upl)) {
7158 uint32_t i=0;
7159 vector_upl_t vector_upl = upl->vector_upl;
7160
7161 if(vector_upl) {
7162 vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0;
7163
7164 vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE));
7165
7166 for(i=0; i < vector_upl->num_upls; i++) {
7167 cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE;
7168 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
7169 pagelist_size += cur_upl_pagelist_size;
7170 if(vector_upl->upl_elems[i]->highest_page > upl->highest_page)
7171 upl->highest_page = vector_upl->upl_elems[i]->highest_page;
7172 }
7173 assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) );
7174 }
7175 else
7176 panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
7177 }
7178 else
7179 panic("vector_upl_set_pagelist was passed a NULL upl\n");
7180
7181 }
7182
7183 upl_t
7184 vector_upl_subupl_byindex(upl_t upl, uint32_t index)
7185 {
7186 if(vector_upl_is_valid(upl)) {
7187 vector_upl_t vector_upl = upl->vector_upl;
7188 if(vector_upl) {
7189 if(index < vector_upl->num_upls)
7190 return vector_upl->upl_elems[index];
7191 }
7192 else
7193 panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
7194 }
7195 return NULL;
7196 }
7197
7198 upl_t
7199 vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size)
7200 {
7201 if(vector_upl_is_valid(upl)) {
7202 uint32_t i=0;
7203 vector_upl_t vector_upl = upl->vector_upl;
7204
7205 if(vector_upl) {
7206 upl_t subupl = NULL;
7207 vector_upl_iostates_t subupl_state;
7208
7209 for(i=0; i < vector_upl->num_upls; i++) {
7210 subupl = vector_upl->upl_elems[i];
7211 subupl_state = vector_upl->upl_iostates[i];
7212 if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) {
7213 /* We could have been passed an offset/size pair that belongs
7214 * to an UPL element that has already been committed/aborted.
7215 * If so, return NULL.
7216 */
7217 if(subupl == NULL)
7218 return NULL;
7219 if((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) {
7220 *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset;
7221 if(*upl_size > subupl_state.size)
7222 *upl_size = subupl_state.size;
7223 }
7224 if(*upl_offset >= subupl_state.offset)
7225 *upl_offset -= subupl_state.offset;
7226 else if(i)
7227 panic("Vector UPL offset miscalculation\n");
7228 return subupl;
7229 }
7230 }
7231 }
7232 else
7233 panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
7234 }
7235 return NULL;
7236 }
7237
7238 void
7239 vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr)
7240 {
7241 *v_upl_submap = NULL;
7242
7243 if(vector_upl_is_valid(upl)) {
7244 vector_upl_t vector_upl = upl->vector_upl;
7245 if(vector_upl) {
7246 *v_upl_submap = vector_upl->submap;
7247 *submap_dst_addr = vector_upl->submap_dst_addr;
7248 }
7249 else
7250 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
7251 }
7252 else
7253 panic("vector_upl_get_submap was passed a null UPL\n");
7254 }
7255
7256 void
7257 vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
7258 {
7259 if(vector_upl_is_valid(upl)) {
7260 vector_upl_t vector_upl = upl->vector_upl;
7261 if(vector_upl) {
7262 vector_upl->submap = submap;
7263 vector_upl->submap_dst_addr = submap_dst_addr;
7264 }
7265 else
7266 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
7267 }
7268 else
7269 panic("vector_upl_get_submap was passed a NULL UPL\n");
7270 }
7271
7272 void
7273 vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
7274 {
7275 if(vector_upl_is_valid(upl)) {
7276 uint32_t i = 0;
7277 vector_upl_t vector_upl = upl->vector_upl;
7278
7279 if(vector_upl) {
7280 for(i = 0; i < vector_upl->num_upls; i++) {
7281 if(vector_upl->upl_elems[i] == subupl)
7282 break;
7283 }
7284
7285 if(i == vector_upl->num_upls)
7286 panic("setting sub-upl iostate when none exists");
7287
7288 vector_upl->upl_iostates[i].offset = offset;
7289 if(size < PAGE_SIZE)
7290 size = PAGE_SIZE;
7291 vector_upl->upl_iostates[i].size = size;
7292 }
7293 else
7294 panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
7295 }
7296 else
7297 panic("vector_upl_set_iostate was passed a NULL UPL\n");
7298 }
7299
7300 void
7301 vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size)
7302 {
7303 if(vector_upl_is_valid(upl)) {
7304 uint32_t i = 0;
7305 vector_upl_t vector_upl = upl->vector_upl;
7306
7307 if(vector_upl) {
7308 for(i = 0; i < vector_upl->num_upls; i++) {
7309 if(vector_upl->upl_elems[i] == subupl)
7310 break;
7311 }
7312
7313 if(i == vector_upl->num_upls)
7314 panic("getting sub-upl iostate when none exists");
7315
7316 *offset = vector_upl->upl_iostates[i].offset;
7317 *size = vector_upl->upl_iostates[i].size;
7318 }
7319 else
7320 panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
7321 }
7322 else
7323 panic("vector_upl_get_iostate was passed a NULL UPL\n");
7324 }
7325
7326 void
7327 vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size)
7328 {
7329 if(vector_upl_is_valid(upl)) {
7330 vector_upl_t vector_upl = upl->vector_upl;
7331 if(vector_upl) {
7332 if(index < vector_upl->num_upls) {
7333 *offset = vector_upl->upl_iostates[index].offset;
7334 *size = vector_upl->upl_iostates[index].size;
7335 }
7336 else
7337 *offset = *size = 0;
7338 }
7339 else
7340 panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
7341 }
7342 else
7343 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
7344 }
7345
7346 upl_page_info_t *
7347 upl_get_internal_vectorupl_pagelist(upl_t upl)
7348 {
7349 return ((vector_upl_t)(upl->vector_upl))->pagelist;
7350 }
7351
7352 void *
7353 upl_get_internal_vectorupl(upl_t upl)
7354 {
7355 return upl->vector_upl;
7356 }
7357
7358 vm_size_t
7359 upl_get_internal_pagelist_offset(void)
7360 {
7361 return sizeof(struct upl);
7362 }
7363
7364 void
7365 upl_clear_dirty(
7366 upl_t upl,
7367 boolean_t value)
7368 {
7369 if (value) {
7370 upl->flags |= UPL_CLEAR_DIRTY;
7371 } else {
7372 upl->flags &= ~UPL_CLEAR_DIRTY;
7373 }
7374 }
7375
7376
7377 #ifdef MACH_BSD
7378
7379 boolean_t upl_device_page(upl_page_info_t *upl)
7380 {
7381 return(UPL_DEVICE_PAGE(upl));
7382 }
7383 boolean_t upl_page_present(upl_page_info_t *upl, int index)
7384 {
7385 return(UPL_PAGE_PRESENT(upl, index));
7386 }
7387 boolean_t upl_speculative_page(upl_page_info_t *upl, int index)
7388 {
7389 return(UPL_SPECULATIVE_PAGE(upl, index));
7390 }
7391 boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
7392 {
7393 return(UPL_DIRTY_PAGE(upl, index));
7394 }
7395 boolean_t upl_valid_page(upl_page_info_t *upl, int index)
7396 {
7397 return(UPL_VALID_PAGE(upl, index));
7398 }
7399 ppnum_t upl_phys_page(upl_page_info_t *upl, int index)
7400 {
7401 return(UPL_PHYS_PAGE(upl, index));
7402 }
7403
7404
7405 void
7406 vm_countdirtypages(void)
7407 {
7408 vm_page_t m;
7409 int dpages;
7410 int pgopages;
7411 int precpages;
7412
7413
7414 dpages=0;
7415 pgopages=0;
7416 precpages=0;
7417
7418 vm_page_lock_queues();
7419 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
7420 do {
7421 if (m ==(vm_page_t )0) break;
7422
7423 if(m->dirty) dpages++;
7424 if(m->pageout) pgopages++;
7425 if(m->precious) precpages++;
7426
7427 assert(m->object != kernel_object);
7428 m = (vm_page_t) queue_next(&m->pageq);
7429 if (m ==(vm_page_t )0) break;
7430
7431 } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
7432 vm_page_unlock_queues();
7433
7434 vm_page_lock_queues();
7435 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
7436 do {
7437 if (m ==(vm_page_t )0) break;
7438
7439 dpages++;
7440 assert(m->dirty);
7441 assert(!m->pageout);
7442 assert(m->object != kernel_object);
7443 m = (vm_page_t) queue_next(&m->pageq);
7444 if (m ==(vm_page_t )0) break;
7445
7446 } while (!queue_end(&vm_page_queue_throttled,(queue_entry_t) m));
7447 vm_page_unlock_queues();
7448
7449 vm_page_lock_queues();
7450 m = (vm_page_t) queue_first(&vm_page_queue_zf);
7451 do {
7452 if (m ==(vm_page_t )0) break;
7453
7454 if(m->dirty) dpages++;
7455 if(m->pageout) pgopages++;
7456 if(m->precious) precpages++;
7457
7458 assert(m->object != kernel_object);
7459 m = (vm_page_t) queue_next(&m->pageq);
7460 if (m ==(vm_page_t )0) break;
7461
7462 } while (!queue_end(&vm_page_queue_zf,(queue_entry_t) m));
7463 vm_page_unlock_queues();
7464
7465 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
7466
7467 dpages=0;
7468 pgopages=0;
7469 precpages=0;
7470
7471 vm_page_lock_queues();
7472 m = (vm_page_t) queue_first(&vm_page_queue_active);
7473
7474 do {
7475 if(m == (vm_page_t )0) break;
7476 if(m->dirty) dpages++;
7477 if(m->pageout) pgopages++;
7478 if(m->precious) precpages++;
7479
7480 assert(m->object != kernel_object);
7481 m = (vm_page_t) queue_next(&m->pageq);
7482 if(m == (vm_page_t )0) break;
7483
7484 } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
7485 vm_page_unlock_queues();
7486
7487 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
7488
7489 }
7490 #endif /* MACH_BSD */
7491
7492 ppnum_t upl_get_highest_page(
7493 upl_t upl)
7494 {
7495 return upl->highest_page;
7496 }
7497
7498 upl_size_t upl_get_size(
7499 upl_t upl)
7500 {
7501 return upl->size;
7502 }
7503
7504 #if UPL_DEBUG
7505 kern_return_t upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
7506 {
7507 upl->ubc_alias1 = alias1;
7508 upl->ubc_alias2 = alias2;
7509 return KERN_SUCCESS;
7510 }
7511 int upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
7512 {
7513 if(al)
7514 *al = upl->ubc_alias1;
7515 if(al2)
7516 *al2 = upl->ubc_alias2;
7517 return KERN_SUCCESS;
7518 }
7519 #endif /* UPL_DEBUG */
7520
7521
7522
7523 #if MACH_KDB
7524 #include <ddb/db_output.h>
7525 #include <ddb/db_print.h>
7526 #include <vm/vm_print.h>
7527
7528 #define printf kdbprintf
7529 void db_pageout(void);
7530
7531 void
7532 db_vm(void)
7533 {
7534
7535 iprintf("VM Statistics:\n");
7536 db_indent += 2;
7537 iprintf("pages:\n");
7538 db_indent += 2;
7539 iprintf("activ %5d inact %5d free %5d",
7540 vm_page_active_count, vm_page_inactive_count,
7541 vm_page_free_count);
7542 printf(" wire %5d gobbl %5d\n",
7543 vm_page_wire_count, vm_page_gobble_count);
7544 db_indent -= 2;
7545 iprintf("target:\n");
7546 db_indent += 2;
7547 iprintf("min %5d inact %5d free %5d",
7548 vm_page_free_min, vm_page_inactive_target,
7549 vm_page_free_target);
7550 printf(" resrv %5d\n", vm_page_free_reserved);
7551 db_indent -= 2;
7552 iprintf("pause:\n");
7553 db_pageout();
7554 db_indent -= 2;
7555 }
7556
7557 #if MACH_COUNTERS
7558 extern int c_laundry_pages_freed;
7559 #endif /* MACH_COUNTERS */
7560
7561 void
7562 db_pageout(void)
7563 {
7564 iprintf("Pageout Statistics:\n");
7565 db_indent += 2;
7566 iprintf("active %5d inactv %5d\n",
7567 vm_pageout_active, vm_pageout_inactive);
7568 iprintf("nolock %5d avoid %5d busy %5d absent %5d\n",
7569 vm_pageout_inactive_nolock, vm_pageout_inactive_avoid,
7570 vm_pageout_inactive_busy, vm_pageout_inactive_absent);
7571 iprintf("used %5d clean %5d dirty %5d\n",
7572 vm_pageout_inactive_used, vm_pageout_inactive_clean,
7573 vm_pageout_inactive_dirty);
7574 #if MACH_COUNTERS
7575 iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed);
7576 #endif /* MACH_COUNTERS */
7577 #if MACH_CLUSTER_STATS
7578 iprintf("Cluster Statistics:\n");
7579 db_indent += 2;
7580 iprintf("dirtied %5d cleaned %5d collisions %5d\n",
7581 vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned,
7582 vm_pageout_cluster_collisions);
7583 iprintf("clusters %5d conversions %5d\n",
7584 vm_pageout_cluster_clusters, vm_pageout_cluster_conversions);
7585 db_indent -= 2;
7586 iprintf("Target Statistics:\n");
7587 db_indent += 2;
7588 iprintf("collisions %5d page_dirtied %5d page_freed %5d\n",
7589 vm_pageout_target_collisions, vm_pageout_target_page_dirtied,
7590 vm_pageout_target_page_freed);
7591 db_indent -= 2;
7592 #endif /* MACH_CLUSTER_STATS */
7593 db_indent -= 2;
7594 }
7595
7596 #endif /* MACH_KDB */