]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_pageout.c
xnu-1504.9.17.tar.gz
[apple/xnu.git] / osfmk / vm / vm_pageout.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_pageout.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * The proverbial page-out daemon.
64 */
65
66 #include <stdint.h>
67
68 #include <debug.h>
69 #include <mach_pagemap.h>
70 #include <mach_cluster_stats.h>
71 #include <mach_kdb.h>
72 #include <advisory_pageout.h>
73
74 #include <mach/mach_types.h>
75 #include <mach/memory_object.h>
76 #include <mach/memory_object_default.h>
77 #include <mach/memory_object_control_server.h>
78 #include <mach/mach_host_server.h>
79 #include <mach/upl.h>
80 #include <mach/vm_map.h>
81 #include <mach/vm_param.h>
82 #include <mach/vm_statistics.h>
83 #include <mach/sdt.h>
84
85 #include <kern/kern_types.h>
86 #include <kern/counters.h>
87 #include <kern/host_statistics.h>
88 #include <kern/machine.h>
89 #include <kern/misc_protos.h>
90 #include <kern/sched.h>
91 #include <kern/thread.h>
92 #include <kern/xpr.h>
93 #include <kern/kalloc.h>
94
95 #include <machine/vm_tuning.h>
96 #include <machine/commpage.h>
97
98 #if CONFIG_EMBEDDED
99 #include <sys/kern_memorystatus.h>
100 #endif
101
102 #include <vm/pmap.h>
103 #include <vm/vm_fault.h>
104 #include <vm/vm_map.h>
105 #include <vm/vm_object.h>
106 #include <vm/vm_page.h>
107 #include <vm/vm_pageout.h>
108 #include <vm/vm_protos.h> /* must be last */
109 #include <vm/memory_object.h>
110 #include <vm/vm_purgeable_internal.h>
111
112 /*
113 * ENCRYPTED SWAP:
114 */
115 #include <../bsd/crypto/aes/aes.h>
116 extern u_int32_t random(void); /* from <libkern/libkern.h> */
117
118 #if UPL_DEBUG
119 #include <libkern/OSDebug.h>
120 #endif
121
122 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */
123 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100
124 #endif
125
126 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
127 #ifdef CONFIG_EMBEDDED
128 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
129 #else
130 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
131 #endif
132 #endif
133
134 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
135 #define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
136 #endif
137
138 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
139 #define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
140 #endif
141
142 #ifndef VM_PAGE_LAUNDRY_MAX
143 #define VM_PAGE_LAUNDRY_MAX 16UL /* maximum pageouts on a given pageout queue */
144 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
145
146 #ifndef VM_PAGEOUT_BURST_WAIT
147 #define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
148 #endif /* VM_PAGEOUT_BURST_WAIT */
149
150 #ifndef VM_PAGEOUT_EMPTY_WAIT
151 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
152 #endif /* VM_PAGEOUT_EMPTY_WAIT */
153
154 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
155 #define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
156 #endif /* VM_PAGEOUT_DEADLOCK_WAIT */
157
158 #ifndef VM_PAGEOUT_IDLE_WAIT
159 #define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
160 #endif /* VM_PAGEOUT_IDLE_WAIT */
161
162 #ifndef VM_PAGE_SPECULATIVE_TARGET
163 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / 20)
164 #endif /* VM_PAGE_SPECULATIVE_TARGET */
165
166 #ifndef VM_PAGE_INACTIVE_HEALTHY_LIMIT
167 #define VM_PAGE_INACTIVE_HEALTHY_LIMIT(total) ((total) * 1 / 200)
168 #endif /* VM_PAGE_INACTIVE_HEALTHY_LIMIT */
169
170
171 /*
172 * To obtain a reasonable LRU approximation, the inactive queue
173 * needs to be large enough to give pages on it a chance to be
174 * referenced a second time. This macro defines the fraction
175 * of active+inactive pages that should be inactive.
176 * The pageout daemon uses it to update vm_page_inactive_target.
177 *
178 * If vm_page_free_count falls below vm_page_free_target and
179 * vm_page_inactive_count is below vm_page_inactive_target,
180 * then the pageout daemon starts running.
181 */
182
183 #ifndef VM_PAGE_INACTIVE_TARGET
184 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3)
185 #endif /* VM_PAGE_INACTIVE_TARGET */
186
187 /*
188 * Once the pageout daemon starts running, it keeps going
189 * until vm_page_free_count meets or exceeds vm_page_free_target.
190 */
191
192 #ifndef VM_PAGE_FREE_TARGET
193 #ifdef CONFIG_EMBEDDED
194 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 100)
195 #else
196 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
197 #endif
198 #endif /* VM_PAGE_FREE_TARGET */
199
200 /*
201 * The pageout daemon always starts running once vm_page_free_count
202 * falls below vm_page_free_min.
203 */
204
205 #ifndef VM_PAGE_FREE_MIN
206 #ifdef CONFIG_EMBEDDED
207 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 200)
208 #else
209 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
210 #endif
211 #endif /* VM_PAGE_FREE_MIN */
212
213 #define VM_PAGE_FREE_MIN_LIMIT 1500
214 #define VM_PAGE_FREE_TARGET_LIMIT 2000
215
216
217 /*
218 * When vm_page_free_count falls below vm_page_free_reserved,
219 * only vm-privileged threads can allocate pages. vm-privilege
220 * allows the pageout daemon and default pager (and any other
221 * associated threads needed for default pageout) to continue
222 * operation by dipping into the reserved pool of pages.
223 */
224
225 #ifndef VM_PAGE_FREE_RESERVED
226 #define VM_PAGE_FREE_RESERVED(n) \
227 ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
228 #endif /* VM_PAGE_FREE_RESERVED */
229
230 /*
231 * When we dequeue pages from the inactive list, they are
232 * reactivated (ie, put back on the active queue) if referenced.
233 * However, it is possible to starve the free list if other
234 * processors are referencing pages faster than we can turn off
235 * the referenced bit. So we limit the number of reactivations
236 * we will make per call of vm_pageout_scan().
237 */
238 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
239 #ifndef VM_PAGE_REACTIVATE_LIMIT
240 #ifdef CONFIG_EMBEDDED
241 #define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
242 #else
243 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
244 #endif
245 #endif /* VM_PAGE_REACTIVATE_LIMIT */
246 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 100
247
248
249 /*
250 * Exported variable used to broadcast the activation of the pageout scan
251 * Working Set uses this to throttle its use of pmap removes. In this
252 * way, code which runs within memory in an uncontested context does
253 * not keep encountering soft faults.
254 */
255
256 unsigned int vm_pageout_scan_event_counter = 0;
257
258 /*
259 * Forward declarations for internal routines.
260 */
261
262 static void vm_pageout_garbage_collect(int);
263 static void vm_pageout_iothread_continue(struct vm_pageout_queue *);
264 static void vm_pageout_iothread_external(void);
265 static void vm_pageout_iothread_internal(void);
266
267 extern void vm_pageout_continue(void);
268 extern void vm_pageout_scan(void);
269
270 static thread_t vm_pageout_external_iothread = THREAD_NULL;
271 static thread_t vm_pageout_internal_iothread = THREAD_NULL;
272
273 unsigned int vm_pageout_reserved_internal = 0;
274 unsigned int vm_pageout_reserved_really = 0;
275
276 unsigned int vm_pageout_idle_wait = 0; /* milliseconds */
277 unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
278 unsigned int vm_pageout_burst_wait = 0; /* milliseconds */
279 unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */
280 unsigned int vm_pageout_deadlock_relief = 0;
281 unsigned int vm_pageout_inactive_relief = 0;
282 unsigned int vm_pageout_burst_active_throttle = 0;
283 unsigned int vm_pageout_burst_inactive_throttle = 0;
284
285 /*
286 * Protection against zero fill flushing live working sets derived
287 * from existing backing store and files
288 */
289 unsigned int vm_accellerate_zf_pageout_trigger = 400;
290 unsigned int zf_queue_min_count = 100;
291 unsigned int vm_zf_queue_count = 0;
292
293 #if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */
294 unsigned int vm_zf_count = 0;
295 #else
296 uint64_t vm_zf_count __attribute__((aligned(8))) = 0;
297 #endif
298
299 /*
300 * These variables record the pageout daemon's actions:
301 * how many pages it looks at and what happens to those pages.
302 * No locking needed because only one thread modifies the variables.
303 */
304
305 unsigned int vm_pageout_active = 0; /* debugging */
306 unsigned int vm_pageout_inactive = 0; /* debugging */
307 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
308 unsigned int vm_pageout_inactive_forced = 0; /* debugging */
309 unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
310 unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
311 unsigned int vm_pageout_inactive_busy = 0; /* debugging */
312 unsigned int vm_pageout_inactive_absent = 0; /* debugging */
313 unsigned int vm_pageout_inactive_used = 0; /* debugging */
314 unsigned int vm_pageout_inactive_clean = 0; /* debugging */
315 unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
316 unsigned int vm_pageout_inactive_deactivated = 0; /* debugging */
317 unsigned int vm_pageout_inactive_zf = 0; /* debugging */
318 unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
319 unsigned int vm_pageout_purged_objects = 0; /* debugging */
320 unsigned int vm_stat_discard = 0; /* debugging */
321 unsigned int vm_stat_discard_sent = 0; /* debugging */
322 unsigned int vm_stat_discard_failure = 0; /* debugging */
323 unsigned int vm_stat_discard_throttle = 0; /* debugging */
324 unsigned int vm_pageout_reactivation_limit_exceeded = 0; /* debugging */
325 unsigned int vm_pageout_catch_ups = 0; /* debugging */
326 unsigned int vm_pageout_inactive_force_reclaim = 0; /* debugging */
327
328 unsigned int vm_pageout_scan_active_throttled = 0;
329 unsigned int vm_pageout_scan_inactive_throttled = 0;
330 unsigned int vm_pageout_scan_throttle = 0; /* debugging */
331 unsigned int vm_pageout_scan_throttle_aborted = 0; /* debugging */
332 unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */
333 unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */
334 unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */
335 unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */
336 unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */
337
338 unsigned int vm_page_speculative_count_drifts = 0;
339 unsigned int vm_page_speculative_count_drift_max = 0;
340
341 /*
342 * Backing store throttle when BS is exhausted
343 */
344 unsigned int vm_backing_store_low = 0;
345
346 unsigned int vm_pageout_out_of_line = 0;
347 unsigned int vm_pageout_in_place = 0;
348
349 unsigned int vm_page_steal_pageout_page = 0;
350
351 /*
352 * ENCRYPTED SWAP:
353 * counters and statistics...
354 */
355 unsigned long vm_page_decrypt_counter = 0;
356 unsigned long vm_page_decrypt_for_upl_counter = 0;
357 unsigned long vm_page_encrypt_counter = 0;
358 unsigned long vm_page_encrypt_abort_counter = 0;
359 unsigned long vm_page_encrypt_already_encrypted_counter = 0;
360 boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */
361
362 struct vm_pageout_queue vm_pageout_queue_internal;
363 struct vm_pageout_queue vm_pageout_queue_external;
364
365 unsigned int vm_page_speculative_target = 0;
366
367 vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
368
369 boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL;
370
371 #if DEVELOPMENT || DEBUG
372 unsigned long vm_cs_validated_resets = 0;
373 #endif
374
375 /*
376 * Routine: vm_backing_store_disable
377 * Purpose:
378 * Suspend non-privileged threads wishing to extend
379 * backing store when we are low on backing store
380 * (Synchronized by caller)
381 */
382 void
383 vm_backing_store_disable(
384 boolean_t disable)
385 {
386 if(disable) {
387 vm_backing_store_low = 1;
388 } else {
389 if(vm_backing_store_low) {
390 vm_backing_store_low = 0;
391 thread_wakeup((event_t) &vm_backing_store_low);
392 }
393 }
394 }
395
396
397 #if MACH_CLUSTER_STATS
398 unsigned long vm_pageout_cluster_dirtied = 0;
399 unsigned long vm_pageout_cluster_cleaned = 0;
400 unsigned long vm_pageout_cluster_collisions = 0;
401 unsigned long vm_pageout_cluster_clusters = 0;
402 unsigned long vm_pageout_cluster_conversions = 0;
403 unsigned long vm_pageout_target_collisions = 0;
404 unsigned long vm_pageout_target_page_dirtied = 0;
405 unsigned long vm_pageout_target_page_freed = 0;
406 #define CLUSTER_STAT(clause) clause
407 #else /* MACH_CLUSTER_STATS */
408 #define CLUSTER_STAT(clause)
409 #endif /* MACH_CLUSTER_STATS */
410
411 /*
412 * Routine: vm_pageout_object_terminate
413 * Purpose:
414 * Destroy the pageout_object, and perform all of the
415 * required cleanup actions.
416 *
417 * In/Out conditions:
418 * The object must be locked, and will be returned locked.
419 */
420 void
421 vm_pageout_object_terminate(
422 vm_object_t object)
423 {
424 vm_object_t shadow_object;
425
426 /*
427 * Deal with the deallocation (last reference) of a pageout object
428 * (used for cleaning-in-place) by dropping the paging references/
429 * freeing pages in the original object.
430 */
431
432 assert(object->pageout);
433 shadow_object = object->shadow;
434 vm_object_lock(shadow_object);
435
436 while (!queue_empty(&object->memq)) {
437 vm_page_t p, m;
438 vm_object_offset_t offset;
439
440 p = (vm_page_t) queue_first(&object->memq);
441
442 assert(p->private);
443 assert(p->pageout);
444 p->pageout = FALSE;
445 assert(!p->cleaning);
446
447 offset = p->offset;
448 VM_PAGE_FREE(p);
449 p = VM_PAGE_NULL;
450
451 m = vm_page_lookup(shadow_object,
452 offset + object->shadow_offset);
453
454 if(m == VM_PAGE_NULL)
455 continue;
456 assert(m->cleaning);
457 /* used as a trigger on upl_commit etc to recognize the */
458 /* pageout daemon's subseqent desire to pageout a cleaning */
459 /* page. When the bit is on the upl commit code will */
460 /* respect the pageout bit in the target page over the */
461 /* caller's page list indication */
462 m->dump_cleaning = FALSE;
463
464 assert((m->dirty) || (m->precious) ||
465 (m->busy && m->cleaning));
466
467 /*
468 * Handle the trusted pager throttle.
469 * Also decrement the burst throttle (if external).
470 */
471 vm_page_lock_queues();
472 if (m->laundry) {
473 vm_pageout_throttle_up(m);
474 }
475
476 /*
477 * Handle the "target" page(s). These pages are to be freed if
478 * successfully cleaned. Target pages are always busy, and are
479 * wired exactly once. The initial target pages are not mapped,
480 * (so cannot be referenced or modified) but converted target
481 * pages may have been modified between the selection as an
482 * adjacent page and conversion to a target.
483 */
484 if (m->pageout) {
485 assert(m->busy);
486 assert(m->wire_count == 1);
487 m->cleaning = FALSE;
488 m->encrypted_cleaning = FALSE;
489 m->pageout = FALSE;
490 #if MACH_CLUSTER_STATS
491 if (m->wanted) vm_pageout_target_collisions++;
492 #endif
493 /*
494 * Revoke all access to the page. Since the object is
495 * locked, and the page is busy, this prevents the page
496 * from being dirtied after the pmap_disconnect() call
497 * returns.
498 *
499 * Since the page is left "dirty" but "not modifed", we
500 * can detect whether the page was redirtied during
501 * pageout by checking the modify state.
502 */
503 if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
504 m->dirty = TRUE;
505 else
506 m->dirty = FALSE;
507
508 if (m->dirty) {
509 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
510 vm_page_unwire(m, TRUE); /* reactivates */
511 VM_STAT_INCR(reactivations);
512 PAGE_WAKEUP_DONE(m);
513 } else {
514 CLUSTER_STAT(vm_pageout_target_page_freed++;)
515 vm_page_free(m);/* clears busy, etc. */
516 }
517 vm_page_unlock_queues();
518 continue;
519 }
520 /*
521 * Handle the "adjacent" pages. These pages were cleaned in
522 * place, and should be left alone.
523 * If prep_pin_count is nonzero, then someone is using the
524 * page, so make it active.
525 */
526 if (!m->active && !m->inactive && !m->throttled && !m->private) {
527 if (m->reference)
528 vm_page_activate(m);
529 else
530 vm_page_deactivate(m);
531 }
532 if((m->busy) && (m->cleaning)) {
533
534 /* the request_page_list case, (COPY_OUT_FROM FALSE) */
535 m->busy = FALSE;
536
537 /* We do not re-set m->dirty ! */
538 /* The page was busy so no extraneous activity */
539 /* could have occurred. COPY_INTO is a read into the */
540 /* new pages. CLEAN_IN_PLACE does actually write */
541 /* out the pages but handling outside of this code */
542 /* will take care of resetting dirty. We clear the */
543 /* modify however for the Programmed I/O case. */
544 pmap_clear_modify(m->phys_page);
545
546 m->absent = FALSE;
547 m->overwriting = FALSE;
548 } else if (m->overwriting) {
549 /* alternate request page list, write to page_list */
550 /* case. Occurs when the original page was wired */
551 /* at the time of the list request */
552 assert(VM_PAGE_WIRED(m));
553 vm_page_unwire(m, TRUE); /* reactivates */
554 m->overwriting = FALSE;
555 } else {
556 /*
557 * Set the dirty state according to whether or not the page was
558 * modified during the pageout. Note that we purposefully do
559 * NOT call pmap_clear_modify since the page is still mapped.
560 * If the page were to be dirtied between the 2 calls, this
561 * this fact would be lost. This code is only necessary to
562 * maintain statistics, since the pmap module is always
563 * consulted if m->dirty is false.
564 */
565 #if MACH_CLUSTER_STATS
566 m->dirty = pmap_is_modified(m->phys_page);
567
568 if (m->dirty) vm_pageout_cluster_dirtied++;
569 else vm_pageout_cluster_cleaned++;
570 if (m->wanted) vm_pageout_cluster_collisions++;
571 #else
572 m->dirty = 0;
573 #endif
574 }
575 m->cleaning = FALSE;
576 m->encrypted_cleaning = FALSE;
577
578 /*
579 * Wakeup any thread waiting for the page to be un-cleaning.
580 */
581 PAGE_WAKEUP(m);
582 vm_page_unlock_queues();
583 }
584 /*
585 * Account for the paging reference taken in vm_paging_object_allocate.
586 */
587 vm_object_activity_end(shadow_object);
588 vm_object_unlock(shadow_object);
589
590 assert(object->ref_count == 0);
591 assert(object->paging_in_progress == 0);
592 assert(object->activity_in_progress == 0);
593 assert(object->resident_page_count == 0);
594 return;
595 }
596
597 /*
598 * Routine: vm_pageclean_setup
599 *
600 * Purpose: setup a page to be cleaned (made non-dirty), but not
601 * necessarily flushed from the VM page cache.
602 * This is accomplished by cleaning in place.
603 *
604 * The page must not be busy, and new_object
605 * must be locked.
606 *
607 */
608 void
609 vm_pageclean_setup(
610 vm_page_t m,
611 vm_page_t new_m,
612 vm_object_t new_object,
613 vm_object_offset_t new_offset)
614 {
615 assert(!m->busy);
616 #if 0
617 assert(!m->cleaning);
618 #endif
619
620 XPR(XPR_VM_PAGEOUT,
621 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
622 m->object, m->offset, m,
623 new_m, new_offset);
624
625 pmap_clear_modify(m->phys_page);
626
627 /*
628 * Mark original page as cleaning in place.
629 */
630 m->cleaning = TRUE;
631 m->dirty = TRUE;
632 m->precious = FALSE;
633
634 /*
635 * Convert the fictitious page to a private shadow of
636 * the real page.
637 */
638 assert(new_m->fictitious);
639 assert(new_m->phys_page == vm_page_fictitious_addr);
640 new_m->fictitious = FALSE;
641 new_m->private = TRUE;
642 new_m->pageout = TRUE;
643 new_m->phys_page = m->phys_page;
644
645 vm_page_lockspin_queues();
646 vm_page_wire(new_m);
647 vm_page_unlock_queues();
648
649 vm_page_insert(new_m, new_object, new_offset);
650 assert(!new_m->wanted);
651 new_m->busy = FALSE;
652 }
653
654 /*
655 * Routine: vm_pageout_initialize_page
656 * Purpose:
657 * Causes the specified page to be initialized in
658 * the appropriate memory object. This routine is used to push
659 * pages into a copy-object when they are modified in the
660 * permanent object.
661 *
662 * The page is moved to a temporary object and paged out.
663 *
664 * In/out conditions:
665 * The page in question must not be on any pageout queues.
666 * The object to which it belongs must be locked.
667 * The page must be busy, but not hold a paging reference.
668 *
669 * Implementation:
670 * Move this page to a completely new object.
671 */
672 void
673 vm_pageout_initialize_page(
674 vm_page_t m)
675 {
676 vm_object_t object;
677 vm_object_offset_t paging_offset;
678 vm_page_t holding_page;
679 memory_object_t pager;
680
681 XPR(XPR_VM_PAGEOUT,
682 "vm_pageout_initialize_page, page 0x%X\n",
683 m, 0, 0, 0, 0);
684 assert(m->busy);
685
686 /*
687 * Verify that we really want to clean this page
688 */
689 assert(!m->absent);
690 assert(!m->error);
691 assert(m->dirty);
692
693 /*
694 * Create a paging reference to let us play with the object.
695 */
696 object = m->object;
697 paging_offset = m->offset + object->paging_offset;
698
699 if (m->absent || m->error || m->restart || (!m->dirty && !m->precious)) {
700 VM_PAGE_FREE(m);
701 panic("reservation without pageout?"); /* alan */
702 vm_object_unlock(object);
703
704 return;
705 }
706
707 /*
708 * If there's no pager, then we can't clean the page. This should
709 * never happen since this should be a copy object and therefore not
710 * an external object, so the pager should always be there.
711 */
712
713 pager = object->pager;
714
715 if (pager == MEMORY_OBJECT_NULL) {
716 VM_PAGE_FREE(m);
717 panic("missing pager for copy object");
718 return;
719 }
720
721 /* set the page for future call to vm_fault_list_request */
722 vm_object_paging_begin(object);
723 holding_page = NULL;
724
725 pmap_clear_modify(m->phys_page);
726 m->dirty = TRUE;
727 m->busy = TRUE;
728 m->list_req_pending = TRUE;
729 m->cleaning = TRUE;
730 m->pageout = TRUE;
731
732 vm_page_lockspin_queues();
733 vm_page_wire(m);
734 vm_page_unlock_queues();
735
736 vm_object_unlock(object);
737
738 /*
739 * Write the data to its pager.
740 * Note that the data is passed by naming the new object,
741 * not a virtual address; the pager interface has been
742 * manipulated to use the "internal memory" data type.
743 * [The object reference from its allocation is donated
744 * to the eventual recipient.]
745 */
746 memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
747
748 vm_object_lock(object);
749 vm_object_paging_end(object);
750 }
751
752 #if MACH_CLUSTER_STATS
753 #define MAXCLUSTERPAGES 16
754 struct {
755 unsigned long pages_in_cluster;
756 unsigned long pages_at_higher_offsets;
757 unsigned long pages_at_lower_offsets;
758 } cluster_stats[MAXCLUSTERPAGES];
759 #endif /* MACH_CLUSTER_STATS */
760
761
762 /*
763 * vm_pageout_cluster:
764 *
765 * Given a page, queue it to the appropriate I/O thread,
766 * which will page it out and attempt to clean adjacent pages
767 * in the same operation.
768 *
769 * The page must be busy, and the object and queues locked. We will take a
770 * paging reference to prevent deallocation or collapse when we
771 * release the object lock back at the call site. The I/O thread
772 * is responsible for consuming this reference
773 *
774 * The page must not be on any pageout queue.
775 */
776
777 void
778 vm_pageout_cluster(vm_page_t m)
779 {
780 vm_object_t object = m->object;
781 struct vm_pageout_queue *q;
782
783
784 XPR(XPR_VM_PAGEOUT,
785 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
786 object, m->offset, m, 0, 0);
787
788 VM_PAGE_CHECK(m);
789
790 /*
791 * Only a certain kind of page is appreciated here.
792 */
793 assert(m->busy && (m->dirty || m->precious) && (!VM_PAGE_WIRED(m)));
794 assert(!m->cleaning && !m->pageout && !m->inactive && !m->active);
795 assert(!m->throttled);
796
797 /*
798 * protect the object from collapse -
799 * locking in the object's paging_offset.
800 */
801 vm_object_paging_begin(object);
802
803 /*
804 * set the page for future call to vm_fault_list_request
805 * page should already be marked busy
806 */
807 vm_page_wire(m);
808 m->list_req_pending = TRUE;
809 m->cleaning = TRUE;
810 m->pageout = TRUE;
811
812 if (object->internal == TRUE)
813 q = &vm_pageout_queue_internal;
814 else
815 q = &vm_pageout_queue_external;
816
817 /*
818 * pgo_laundry count is tied to the laundry bit
819 */
820 m->laundry = TRUE;
821 q->pgo_laundry++;
822
823 m->pageout_queue = TRUE;
824 queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
825
826 if (q->pgo_idle == TRUE) {
827 q->pgo_idle = FALSE;
828 thread_wakeup((event_t) &q->pgo_pending);
829 }
830
831 VM_PAGE_CHECK(m);
832 }
833
834
835 unsigned long vm_pageout_throttle_up_count = 0;
836
837 /*
838 * A page is back from laundry or we are stealing it back from
839 * the laundering state. See if there are some pages waiting to
840 * go to laundry and if we can let some of them go now.
841 *
842 * Object and page queues must be locked.
843 */
844 void
845 vm_pageout_throttle_up(
846 vm_page_t m)
847 {
848 struct vm_pageout_queue *q;
849
850 assert(m->object != VM_OBJECT_NULL);
851 assert(m->object != kernel_object);
852
853 vm_pageout_throttle_up_count++;
854
855 if (m->object->internal == TRUE)
856 q = &vm_pageout_queue_internal;
857 else
858 q = &vm_pageout_queue_external;
859
860 if (m->pageout_queue == TRUE) {
861
862 queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
863 m->pageout_queue = FALSE;
864
865 m->pageq.next = NULL;
866 m->pageq.prev = NULL;
867
868 vm_object_paging_end(m->object);
869 }
870 if (m->laundry == TRUE) {
871 m->laundry = FALSE;
872 q->pgo_laundry--;
873
874 if (q->pgo_throttled == TRUE) {
875 q->pgo_throttled = FALSE;
876 thread_wakeup((event_t) &q->pgo_laundry);
877 }
878 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
879 q->pgo_draining = FALSE;
880 thread_wakeup((event_t) (&q->pgo_laundry+1));
881 }
882 }
883 }
884
885
886 /*
887 * vm_pageout_scan does the dirty work for the pageout daemon.
888 * It returns with vm_page_queue_free_lock held and
889 * vm_page_free_wanted == 0.
890 */
891
892 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT (3 * MAX_UPL_TRANSFER)
893
894 #define FCS_IDLE 0
895 #define FCS_DELAYED 1
896 #define FCS_DEADLOCK_DETECTED 2
897
898 struct flow_control {
899 int state;
900 mach_timespec_t ts;
901 };
902
903
904 /*
905 * VM memory pressure monitoring.
906 *
907 * vm_pageout_scan() keeps track of the number of pages it considers and
908 * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
909 *
910 * compute_memory_pressure() is called every second from compute_averages()
911 * and moves "vm_pageout_stat_now" forward, to start accumulating the number
912 * of recalimed pages in a new vm_pageout_stat[] bucket.
913 *
914 * mach_vm_pressure_monitor() collects past statistics about memory pressure.
915 * The caller provides the number of seconds ("nsecs") worth of statistics
916 * it wants, up to 30 seconds.
917 * It computes the number of pages reclaimed in the past "nsecs" seconds and
918 * also returns the number of pages the system still needs to reclaim at this
919 * moment in time.
920 */
921 #define VM_PAGEOUT_STAT_SIZE 31
922 struct vm_pageout_stat {
923 unsigned int considered;
924 unsigned int reclaimed;
925 } vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0}, };
926 unsigned int vm_pageout_stat_now = 0;
927 unsigned int vm_memory_pressure = 0;
928
929 #define VM_PAGEOUT_STAT_BEFORE(i) \
930 (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
931 #define VM_PAGEOUT_STAT_AFTER(i) \
932 (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
933
934 /*
935 * Called from compute_averages().
936 */
937 void
938 compute_memory_pressure(
939 __unused void *arg)
940 {
941 unsigned int vm_pageout_next;
942
943 vm_memory_pressure =
944 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].reclaimed;
945
946 commpage_set_memory_pressure( vm_memory_pressure );
947
948 /* move "now" forward */
949 vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
950 vm_pageout_stats[vm_pageout_next].considered = 0;
951 vm_pageout_stats[vm_pageout_next].reclaimed = 0;
952 vm_pageout_stat_now = vm_pageout_next;
953 }
954
955 unsigned int
956 mach_vm_ctl_page_free_wanted(void)
957 {
958 unsigned int page_free_target, page_free_count, page_free_wanted;
959
960 page_free_target = vm_page_free_target;
961 page_free_count = vm_page_free_count;
962 if (page_free_target > page_free_count) {
963 page_free_wanted = page_free_target - page_free_count;
964 } else {
965 page_free_wanted = 0;
966 }
967
968 return page_free_wanted;
969 }
970
971 kern_return_t
972 mach_vm_pressure_monitor(
973 boolean_t wait_for_pressure,
974 unsigned int nsecs_monitored,
975 unsigned int *pages_reclaimed_p,
976 unsigned int *pages_wanted_p)
977 {
978 wait_result_t wr;
979 unsigned int vm_pageout_then, vm_pageout_now;
980 unsigned int pages_reclaimed;
981
982 /*
983 * We don't take the vm_page_queue_lock here because we don't want
984 * vm_pressure_monitor() to get in the way of the vm_pageout_scan()
985 * thread when it's trying to reclaim memory. We don't need fully
986 * accurate monitoring anyway...
987 */
988
989 if (wait_for_pressure) {
990 /* wait until there's memory pressure */
991 while (vm_page_free_count >= vm_page_free_target) {
992 wr = assert_wait((event_t) &vm_page_free_wanted,
993 THREAD_INTERRUPTIBLE);
994 if (wr == THREAD_WAITING) {
995 wr = thread_block(THREAD_CONTINUE_NULL);
996 }
997 if (wr == THREAD_INTERRUPTED) {
998 return KERN_ABORTED;
999 }
1000 if (wr == THREAD_AWAKENED) {
1001 /*
1002 * The memory pressure might have already
1003 * been relieved but let's not block again
1004 * and let's report that there was memory
1005 * pressure at some point.
1006 */
1007 break;
1008 }
1009 }
1010 }
1011
1012 /* provide the number of pages the system wants to reclaim */
1013 if (pages_wanted_p != NULL) {
1014 *pages_wanted_p = mach_vm_ctl_page_free_wanted();
1015 }
1016
1017 if (pages_reclaimed_p == NULL) {
1018 return KERN_SUCCESS;
1019 }
1020
1021 /* provide number of pages reclaimed in the last "nsecs_monitored" */
1022 do {
1023 vm_pageout_now = vm_pageout_stat_now;
1024 pages_reclaimed = 0;
1025 for (vm_pageout_then =
1026 VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
1027 vm_pageout_then != vm_pageout_now &&
1028 nsecs_monitored-- != 0;
1029 vm_pageout_then =
1030 VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
1031 pages_reclaimed += vm_pageout_stats[vm_pageout_then].reclaimed;
1032 }
1033 } while (vm_pageout_now != vm_pageout_stat_now);
1034 *pages_reclaimed_p = pages_reclaimed;
1035
1036 return KERN_SUCCESS;
1037 }
1038
1039 /* Page States: Used below to maintain the page state
1040 before it's removed from it's Q. This saved state
1041 helps us do the right accounting in certain cases
1042 */
1043
1044 #define PAGE_STATE_SPECULATIVE 1
1045 #define PAGE_STATE_THROTTLED 2
1046 #define PAGE_STATE_ZEROFILL 3
1047 #define PAGE_STATE_INACTIVE 4
1048
1049 #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m) \
1050 MACRO_BEGIN \
1051 /* \
1052 * If a "reusable" page somehow made it back into \
1053 * the active queue, it's been re-used and is not \
1054 * quite re-usable. \
1055 * If the VM object was "all_reusable", consider it \
1056 * as "all re-used" instead of converting it to \
1057 * "partially re-used", which could be expensive. \
1058 */ \
1059 if ((m)->reusable || \
1060 (m)->object->all_reusable) { \
1061 vm_object_reuse_pages((m)->object, \
1062 (m)->offset, \
1063 (m)->offset + PAGE_SIZE_64, \
1064 FALSE); \
1065 } \
1066 MACRO_END
1067
1068 void
1069 vm_pageout_scan(void)
1070 {
1071 unsigned int loop_count = 0;
1072 unsigned int inactive_burst_count = 0;
1073 unsigned int active_burst_count = 0;
1074 unsigned int reactivated_this_call;
1075 unsigned int reactivate_limit;
1076 vm_page_t local_freeq = NULL;
1077 int local_freed = 0;
1078 int delayed_unlock;
1079 int refmod_state = 0;
1080 int vm_pageout_deadlock_target = 0;
1081 struct vm_pageout_queue *iq;
1082 struct vm_pageout_queue *eq;
1083 struct vm_speculative_age_q *sq;
1084 struct flow_control flow_control = { 0, { 0, 0 } };
1085 boolean_t inactive_throttled = FALSE;
1086 boolean_t try_failed;
1087 mach_timespec_t ts;
1088 unsigned int msecs = 0;
1089 vm_object_t object;
1090 vm_object_t last_object_tried;
1091 #if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */
1092 unsigned int zf_ratio;
1093 unsigned int zf_run_count;
1094 #else
1095 uint64_t zf_ratio;
1096 uint64_t zf_run_count;
1097 #endif
1098 uint32_t catch_up_count = 0;
1099 uint32_t inactive_reclaim_run;
1100 boolean_t forced_reclaim;
1101 int page_prev_state = 0;
1102
1103 flow_control.state = FCS_IDLE;
1104 iq = &vm_pageout_queue_internal;
1105 eq = &vm_pageout_queue_external;
1106 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
1107
1108
1109 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1110
1111
1112 vm_page_lock_queues();
1113 delayed_unlock = 1; /* must be nonzero if Qs are locked, 0 if unlocked */
1114
1115 /*
1116 * Calculate the max number of referenced pages on the inactive
1117 * queue that we will reactivate.
1118 */
1119 reactivated_this_call = 0;
1120 reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
1121 vm_page_inactive_count);
1122 inactive_reclaim_run = 0;
1123
1124
1125 /*???*/ /*
1126 * We want to gradually dribble pages from the active queue
1127 * to the inactive queue. If we let the inactive queue get
1128 * very small, and then suddenly dump many pages into it,
1129 * those pages won't get a sufficient chance to be referenced
1130 * before we start taking them from the inactive queue.
1131 *
1132 * We must limit the rate at which we send pages to the pagers.
1133 * data_write messages consume memory, for message buffers and
1134 * for map-copy objects. If we get too far ahead of the pagers,
1135 * we can potentially run out of memory.
1136 *
1137 * We can use the laundry count to limit directly the number
1138 * of pages outstanding to the default pager. A similar
1139 * strategy for external pagers doesn't work, because
1140 * external pagers don't have to deallocate the pages sent them,
1141 * and because we might have to send pages to external pagers
1142 * even if they aren't processing writes. So we also
1143 * use a burst count to limit writes to external pagers.
1144 *
1145 * When memory is very tight, we can't rely on external pagers to
1146 * clean pages. They probably aren't running, because they
1147 * aren't vm-privileged. If we kept sending dirty pages to them,
1148 * we could exhaust the free list.
1149 */
1150
1151
1152 Restart:
1153 assert(delayed_unlock!=0);
1154
1155 /*
1156 * A page is "zero-filled" if it was not paged in from somewhere,
1157 * and it belongs to an object at least VM_ZF_OBJECT_SIZE_THRESHOLD big.
1158 * Recalculate the zero-filled page ratio. We use this to apportion
1159 * victimized pages between the normal and zero-filled inactive
1160 * queues according to their relative abundance in memory. Thus if a task
1161 * is flooding memory with zf pages, we begin to hunt them down.
1162 * It would be better to throttle greedy tasks at a higher level,
1163 * but at the moment mach vm cannot do this.
1164 */
1165 {
1166 #if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */
1167 uint32_t total = vm_page_active_count + vm_page_inactive_count;
1168 uint32_t normal = total - vm_zf_count;
1169 #else
1170 uint64_t total = vm_page_active_count + vm_page_inactive_count;
1171 uint64_t normal = total - vm_zf_count;
1172 #endif
1173
1174 /* zf_ratio is the number of zf pages we victimize per normal page */
1175
1176 if (vm_zf_count < vm_accellerate_zf_pageout_trigger)
1177 zf_ratio = 0;
1178 else if ((vm_zf_count <= normal) || (normal == 0))
1179 zf_ratio = 1;
1180 else
1181 zf_ratio = vm_zf_count / normal;
1182
1183 zf_run_count = 0;
1184 }
1185
1186 /*
1187 * Recalculate vm_page_inactivate_target.
1188 */
1189 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1190 vm_page_inactive_count +
1191 vm_page_speculative_count);
1192 /*
1193 * don't want to wake the pageout_scan thread up everytime we fall below
1194 * the targets... set a low water mark at 0.25% below the target
1195 */
1196 vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400);
1197
1198 vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
1199 vm_page_inactive_count);
1200 object = NULL;
1201 last_object_tried = NULL;
1202 try_failed = FALSE;
1203
1204 if ((vm_page_inactive_count + vm_page_speculative_count) < VM_PAGE_INACTIVE_HEALTHY_LIMIT(vm_page_active_count))
1205 catch_up_count = vm_page_inactive_count + vm_page_speculative_count;
1206 else
1207 catch_up_count = 0;
1208
1209 for (;;) {
1210 vm_page_t m;
1211
1212 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
1213
1214 if (delayed_unlock == 0) {
1215 vm_page_lock_queues();
1216 delayed_unlock = 1;
1217 }
1218
1219 /*
1220 * Don't sweep through active queue more than the throttle
1221 * which should be kept relatively low
1222 */
1223 active_burst_count = MIN(vm_pageout_burst_active_throttle,
1224 vm_page_active_count);
1225
1226 /*
1227 * Move pages from active to inactive.
1228 */
1229 if ((vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target)
1230 goto done_moving_active_pages;
1231
1232 while (!queue_empty(&vm_page_queue_active) && active_burst_count) {
1233
1234 if (active_burst_count)
1235 active_burst_count--;
1236
1237 vm_pageout_active++;
1238
1239 m = (vm_page_t) queue_first(&vm_page_queue_active);
1240
1241 assert(m->active && !m->inactive);
1242 assert(!m->laundry);
1243 assert(m->object != kernel_object);
1244 assert(m->phys_page != vm_page_guard_addr);
1245
1246 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1247
1248 /*
1249 * Try to lock object; since we've already got the
1250 * page queues lock, we can only 'try' for this one.
1251 * if the 'try' fails, we need to do a mutex_pause
1252 * to allow the owner of the object lock a chance to
1253 * run... otherwise, we're likely to trip over this
1254 * object in the same state as we work our way through
1255 * the queue... clumps of pages associated with the same
1256 * object are fairly typical on the inactive and active queues
1257 */
1258 if (m->object != object) {
1259 if (object != NULL) {
1260 vm_object_unlock(object);
1261 object = NULL;
1262 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1263 }
1264 if (!vm_object_lock_try_scan(m->object)) {
1265 /*
1266 * move page to end of active queue and continue
1267 */
1268 queue_remove(&vm_page_queue_active, m,
1269 vm_page_t, pageq);
1270 queue_enter(&vm_page_queue_active, m,
1271 vm_page_t, pageq);
1272
1273 try_failed = TRUE;
1274
1275 m = (vm_page_t) queue_first(&vm_page_queue_active);
1276 /*
1277 * this is the next object we're going to be interested in
1278 * try to make sure it's available after the mutex_yield
1279 * returns control
1280 */
1281 vm_pageout_scan_wants_object = m->object;
1282
1283 goto done_with_activepage;
1284 }
1285 object = m->object;
1286
1287 try_failed = FALSE;
1288 }
1289
1290 /*
1291 * if the page is BUSY, then we pull it
1292 * off the active queue and leave it alone.
1293 * when BUSY is cleared, it will get stuck
1294 * back on the appropriate queue
1295 */
1296 if (m->busy) {
1297 queue_remove(&vm_page_queue_active, m,
1298 vm_page_t, pageq);
1299 m->pageq.next = NULL;
1300 m->pageq.prev = NULL;
1301
1302 if (!m->fictitious)
1303 vm_page_active_count--;
1304 m->active = FALSE;
1305
1306 goto done_with_activepage;
1307 }
1308
1309 /* deal with a rogue "reusable" page */
1310 VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m);
1311
1312 /*
1313 * Deactivate the page while holding the object
1314 * locked, so we know the page is still not busy.
1315 * This should prevent races between pmap_enter
1316 * and pmap_clear_reference. The page might be
1317 * absent or fictitious, but vm_page_deactivate
1318 * can handle that.
1319 */
1320 vm_page_deactivate(m);
1321
1322 done_with_activepage:
1323 if (delayed_unlock++ > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT || try_failed == TRUE) {
1324
1325 if (object != NULL) {
1326 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1327 vm_object_unlock(object);
1328 object = NULL;
1329 }
1330 if (local_freeq) {
1331 vm_page_unlock_queues();
1332 vm_page_free_list(local_freeq, TRUE);
1333
1334 local_freeq = NULL;
1335 local_freed = 0;
1336 vm_page_lock_queues();
1337 } else
1338 lck_mtx_yield(&vm_page_queue_lock);
1339
1340 delayed_unlock = 1;
1341
1342 /*
1343 * continue the while loop processing
1344 * the active queue... need to hold
1345 * the page queues lock
1346 */
1347 }
1348 }
1349
1350
1351
1352 /**********************************************************************
1353 * above this point we're playing with the active queue
1354 * below this point we're playing with the throttling mechanisms
1355 * and the inactive queue
1356 **********************************************************************/
1357
1358 done_moving_active_pages:
1359
1360 /*
1361 * We are done if we have met our target *and*
1362 * nobody is still waiting for a page.
1363 */
1364 if (vm_page_free_count + local_freed >= vm_page_free_target) {
1365 if (object != NULL) {
1366 vm_object_unlock(object);
1367 object = NULL;
1368 }
1369 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1370
1371 if (local_freeq) {
1372 vm_page_unlock_queues();
1373 vm_page_free_list(local_freeq, TRUE);
1374
1375 local_freeq = NULL;
1376 local_freed = 0;
1377 vm_page_lock_queues();
1378 }
1379 /*
1380 * inactive target still not met... keep going
1381 * until we get the queues balanced
1382 */
1383
1384 /*
1385 * Recalculate vm_page_inactivate_target.
1386 */
1387 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1388 vm_page_inactive_count +
1389 vm_page_speculative_count);
1390
1391 #ifndef CONFIG_EMBEDDED
1392 /*
1393 * XXX: if no active pages can be reclaimed, pageout scan can be stuck trying
1394 * to balance the queues
1395 */
1396 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) &&
1397 !queue_empty(&vm_page_queue_active))
1398 continue;
1399 #endif
1400
1401 lck_mtx_lock(&vm_page_queue_free_lock);
1402
1403 if ((vm_page_free_count >= vm_page_free_target) &&
1404 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
1405
1406 vm_page_unlock_queues();
1407
1408 thread_wakeup((event_t) &vm_pageout_garbage_collect);
1409
1410 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
1411
1412 return;
1413 }
1414 lck_mtx_unlock(&vm_page_queue_free_lock);
1415 }
1416
1417 /*
1418 * Before anything, we check if we have any ripe volatile
1419 * objects around. If so, try to purge the first object.
1420 * If the purge fails, fall through to reclaim a page instead.
1421 * If the purge succeeds, go back to the top and reevalute
1422 * the new memory situation.
1423 */
1424 assert (available_for_purge>=0);
1425 if (available_for_purge)
1426 {
1427 if (object != NULL) {
1428 vm_object_unlock(object);
1429 object = NULL;
1430 }
1431 if(TRUE == vm_purgeable_object_purge_one()) {
1432 continue;
1433 }
1434 }
1435
1436 if (queue_empty(&sq->age_q) && vm_page_speculative_count) {
1437 /*
1438 * try to pull pages from the aging bins
1439 * see vm_page.h for an explanation of how
1440 * this mechanism works
1441 */
1442 struct vm_speculative_age_q *aq;
1443 mach_timespec_t ts_fully_aged;
1444 boolean_t can_steal = FALSE;
1445 int num_scanned_queues;
1446
1447 aq = &vm_page_queue_speculative[speculative_steal_index];
1448
1449 num_scanned_queues = 0;
1450 while (queue_empty(&aq->age_q) &&
1451 num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
1452
1453 speculative_steal_index++;
1454
1455 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
1456 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
1457
1458 aq = &vm_page_queue_speculative[speculative_steal_index];
1459 }
1460
1461 if (num_scanned_queues ==
1462 VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
1463 /*
1464 * XXX We've scanned all the speculative
1465 * queues but still haven't found one
1466 * that is not empty, even though
1467 * vm_page_speculative_count is not 0.
1468 */
1469 /* report the anomaly... */
1470 printf("vm_pageout_scan: "
1471 "all speculative queues empty "
1472 "but count=%d. Re-adjusting.\n",
1473 vm_page_speculative_count);
1474 if (vm_page_speculative_count >
1475 vm_page_speculative_count_drift_max)
1476 vm_page_speculative_count_drift_max = vm_page_speculative_count;
1477 vm_page_speculative_count_drifts++;
1478 #if 6553678
1479 Debugger("vm_pageout_scan: no speculative pages");
1480 #endif
1481 /* readjust... */
1482 vm_page_speculative_count = 0;
1483 /* ... and continue */
1484 continue;
1485 }
1486
1487 if (vm_page_speculative_count > vm_page_speculative_target)
1488 can_steal = TRUE;
1489 else {
1490 ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * VM_PAGE_SPECULATIVE_Q_AGE_MS) / 1000;
1491 ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * VM_PAGE_SPECULATIVE_Q_AGE_MS) % 1000)
1492 * 1000 * NSEC_PER_USEC;
1493
1494 ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
1495
1496 clock_sec_t sec;
1497 clock_nsec_t nsec;
1498 clock_get_system_nanotime(&sec, &nsec);
1499 ts.tv_sec = (unsigned int) sec;
1500 ts.tv_nsec = nsec;
1501
1502 if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
1503 can_steal = TRUE;
1504 }
1505 if (can_steal == TRUE)
1506 vm_page_speculate_ageit(aq);
1507 }
1508
1509 /*
1510 * Sometimes we have to pause:
1511 * 1) No inactive pages - nothing to do.
1512 * 2) Flow control - default pageout queue is full
1513 * 3) Loop control - no acceptable pages found on the inactive queue
1514 * within the last vm_pageout_burst_inactive_throttle iterations
1515 */
1516 if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_zf) && queue_empty(&sq->age_q) &&
1517 (VM_PAGE_Q_THROTTLED(iq) || queue_empty(&vm_page_queue_throttled))) {
1518 vm_pageout_scan_empty_throttle++;
1519 msecs = vm_pageout_empty_wait;
1520 goto vm_pageout_scan_delay;
1521
1522 } else if (inactive_burst_count >=
1523 MIN(vm_pageout_burst_inactive_throttle,
1524 (vm_page_inactive_count +
1525 vm_page_speculative_count))) {
1526 vm_pageout_scan_burst_throttle++;
1527 msecs = vm_pageout_burst_wait;
1528 goto vm_pageout_scan_delay;
1529
1530 } else if (VM_PAGE_Q_THROTTLED(iq) && IP_VALID(memory_manager_default)) {
1531 clock_sec_t sec;
1532 clock_nsec_t nsec;
1533
1534 switch (flow_control.state) {
1535
1536 case FCS_IDLE:
1537 reset_deadlock_timer:
1538 ts.tv_sec = vm_pageout_deadlock_wait / 1000;
1539 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
1540 clock_get_system_nanotime(&sec, &nsec);
1541 flow_control.ts.tv_sec = (unsigned int) sec;
1542 flow_control.ts.tv_nsec = nsec;
1543 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
1544
1545 flow_control.state = FCS_DELAYED;
1546 msecs = vm_pageout_deadlock_wait;
1547
1548 break;
1549
1550 case FCS_DELAYED:
1551 clock_get_system_nanotime(&sec, &nsec);
1552 ts.tv_sec = (unsigned int) sec;
1553 ts.tv_nsec = nsec;
1554
1555 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
1556 /*
1557 * the pageout thread for the default pager is potentially
1558 * deadlocked since the
1559 * default pager queue has been throttled for more than the
1560 * allowable time... we need to move some clean pages or dirty
1561 * pages belonging to the external pagers if they aren't throttled
1562 * vm_page_free_wanted represents the number of threads currently
1563 * blocked waiting for pages... we'll move one page for each of
1564 * these plus a fixed amount to break the logjam... once we're done
1565 * moving this number of pages, we'll re-enter the FSC_DELAYED state
1566 * with a new timeout target since we have no way of knowing
1567 * whether we've broken the deadlock except through observation
1568 * of the queue associated with the default pager... we need to
1569 * stop moving pages and allow the system to run to see what
1570 * state it settles into.
1571 */
1572 vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged;
1573 vm_pageout_scan_deadlock_detected++;
1574 flow_control.state = FCS_DEADLOCK_DETECTED;
1575
1576 thread_wakeup((event_t) &vm_pageout_garbage_collect);
1577 goto consider_inactive;
1578 }
1579 /*
1580 * just resniff instead of trying
1581 * to compute a new delay time... we're going to be
1582 * awakened immediately upon a laundry completion,
1583 * so we won't wait any longer than necessary
1584 */
1585 msecs = vm_pageout_idle_wait;
1586 break;
1587
1588 case FCS_DEADLOCK_DETECTED:
1589 if (vm_pageout_deadlock_target)
1590 goto consider_inactive;
1591 goto reset_deadlock_timer;
1592
1593 }
1594 vm_pageout_scan_throttle++;
1595 iq->pgo_throttled = TRUE;
1596 vm_pageout_scan_delay:
1597 if (object != NULL) {
1598 vm_object_unlock(object);
1599 object = NULL;
1600 }
1601 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1602
1603 if (local_freeq) {
1604 vm_page_unlock_queues();
1605 vm_page_free_list(local_freeq, TRUE);
1606
1607 local_freeq = NULL;
1608 local_freed = 0;
1609 vm_page_lock_queues();
1610
1611 if (flow_control.state == FCS_DELAYED &&
1612 !VM_PAGE_Q_THROTTLED(iq)) {
1613 flow_control.state = FCS_IDLE;
1614 vm_pageout_scan_throttle_aborted++;
1615 goto consider_inactive;
1616 }
1617 }
1618 #if CONFIG_EMBEDDED
1619 {
1620 int percent_avail;
1621
1622 /*
1623 * Decide if we need to send a memory status notification.
1624 */
1625 percent_avail =
1626 (vm_page_active_count + vm_page_inactive_count +
1627 vm_page_speculative_count + vm_page_free_count +
1628 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
1629 atop_64(max_mem);
1630 if (percent_avail >= (kern_memorystatus_level + 5) ||
1631 percent_avail <= (kern_memorystatus_level - 5)) {
1632 kern_memorystatus_level = percent_avail;
1633 thread_wakeup((event_t)&kern_memorystatus_wakeup);
1634 }
1635 }
1636 #endif
1637 assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
1638 counter(c_vm_pageout_scan_block++);
1639
1640 vm_page_unlock_queues();
1641
1642 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
1643
1644 thread_block(THREAD_CONTINUE_NULL);
1645
1646 vm_page_lock_queues();
1647 delayed_unlock = 1;
1648
1649 iq->pgo_throttled = FALSE;
1650
1651 if (loop_count >= vm_page_inactive_count)
1652 loop_count = 0;
1653 inactive_burst_count = 0;
1654
1655 goto Restart;
1656 /*NOTREACHED*/
1657 }
1658
1659
1660 flow_control.state = FCS_IDLE;
1661 consider_inactive:
1662 loop_count++;
1663 inactive_burst_count++;
1664 vm_pageout_inactive++;
1665
1666 /* Choose a victim. */
1667
1668 while (1) {
1669 m = NULL;
1670
1671 if (IP_VALID(memory_manager_default)) {
1672 assert(vm_page_throttled_count == 0);
1673 assert(queue_empty(&vm_page_queue_throttled));
1674 }
1675
1676 /*
1677 * The most eligible pages are ones we paged in speculatively,
1678 * but which have not yet been touched.
1679 */
1680 if ( !queue_empty(&sq->age_q) ) {
1681 m = (vm_page_t) queue_first(&sq->age_q);
1682 break;
1683 }
1684 /*
1685 * Time for a zero-filled inactive page?
1686 */
1687 if ( ((zf_run_count < zf_ratio) && vm_zf_queue_count >= zf_queue_min_count) ||
1688 queue_empty(&vm_page_queue_inactive)) {
1689 if ( !queue_empty(&vm_page_queue_zf) ) {
1690 m = (vm_page_t) queue_first(&vm_page_queue_zf);
1691 zf_run_count++;
1692 break;
1693 }
1694 }
1695 /*
1696 * It's either a normal inactive page or nothing.
1697 */
1698 if ( !queue_empty(&vm_page_queue_inactive) ) {
1699 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1700 zf_run_count = 0;
1701 break;
1702 }
1703
1704 panic("vm_pageout: no victim");
1705 }
1706
1707 assert(!m->active && (m->inactive || m->speculative || m->throttled));
1708 assert(!m->laundry);
1709 assert(m->object != kernel_object);
1710 assert(m->phys_page != vm_page_guard_addr);
1711
1712 if (!m->speculative) {
1713 vm_pageout_stats[vm_pageout_stat_now].considered++;
1714 }
1715
1716 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1717
1718 /*
1719 * check to see if we currently are working
1720 * with the same object... if so, we've
1721 * already got the lock
1722 */
1723 if (m->object != object) {
1724 /*
1725 * the object associated with candidate page is
1726 * different from the one we were just working
1727 * with... dump the lock if we still own it
1728 */
1729 if (object != NULL) {
1730 vm_object_unlock(object);
1731 object = NULL;
1732 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1733 }
1734 /*
1735 * Try to lock object; since we've alread got the
1736 * page queues lock, we can only 'try' for this one.
1737 * if the 'try' fails, we need to do a mutex_pause
1738 * to allow the owner of the object lock a chance to
1739 * run... otherwise, we're likely to trip over this
1740 * object in the same state as we work our way through
1741 * the queue... clumps of pages associated with the same
1742 * object are fairly typical on the inactive and active queues
1743 */
1744 if (!vm_object_lock_try_scan(m->object)) {
1745 vm_pageout_inactive_nolock++;
1746
1747 requeue_page:
1748 /*
1749 * Move page to end and continue.
1750 * Don't re-issue ticket
1751 */
1752 if (m->zero_fill) {
1753 if (m->speculative) {
1754 panic("vm_pageout_scan(): page %p speculative and zero-fill !?\n", m);
1755 }
1756 assert(!m->speculative);
1757 queue_remove(&vm_page_queue_zf, m,
1758 vm_page_t, pageq);
1759 queue_enter(&vm_page_queue_zf, m,
1760 vm_page_t, pageq);
1761 } else if (m->speculative) {
1762 remque(&m->pageq);
1763 m->speculative = FALSE;
1764 vm_page_speculative_count--;
1765
1766 /*
1767 * move to the head of the inactive queue
1768 * to get it out of the way... the speculative
1769 * queue is generally too small to depend
1770 * on there being enough pages from other
1771 * objects to make cycling it back on the
1772 * same queue a winning proposition
1773 */
1774 queue_enter_first(&vm_page_queue_inactive, m,
1775 vm_page_t, pageq);
1776 m->inactive = TRUE;
1777 vm_page_inactive_count++;
1778 token_new_pagecount++;
1779 } else if (m->throttled) {
1780 queue_remove(&vm_page_queue_throttled, m,
1781 vm_page_t, pageq);
1782 m->throttled = FALSE;
1783 vm_page_throttled_count--;
1784
1785 /*
1786 * not throttled any more, so can stick
1787 * it on the inactive queue.
1788 */
1789 queue_enter(&vm_page_queue_inactive, m,
1790 vm_page_t, pageq);
1791 m->inactive = TRUE;
1792 vm_page_inactive_count++;
1793 token_new_pagecount++;
1794 } else {
1795 queue_remove(&vm_page_queue_inactive, m,
1796 vm_page_t, pageq);
1797 #if MACH_ASSERT
1798 vm_page_inactive_count--; /* balance for purgeable queue asserts */
1799 #endif
1800 vm_purgeable_q_advance_all();
1801
1802 queue_enter(&vm_page_queue_inactive, m,
1803 vm_page_t, pageq);
1804 #if MACH_ASSERT
1805 vm_page_inactive_count++; /* balance for purgeable queue asserts */
1806 #endif
1807 token_new_pagecount++;
1808 }
1809 pmap_clear_reference(m->phys_page);
1810 m->reference = FALSE;
1811
1812 if ( !queue_empty(&sq->age_q) )
1813 m = (vm_page_t) queue_first(&sq->age_q);
1814 else if ( ((zf_run_count < zf_ratio) && vm_zf_queue_count >= zf_queue_min_count) ||
1815 queue_empty(&vm_page_queue_inactive)) {
1816 if ( !queue_empty(&vm_page_queue_zf) )
1817 m = (vm_page_t) queue_first(&vm_page_queue_zf);
1818 } else if ( !queue_empty(&vm_page_queue_inactive) ) {
1819 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1820 }
1821 /*
1822 * this is the next object we're going to be interested in
1823 * try to make sure its available after the mutex_yield
1824 * returns control
1825 */
1826 vm_pageout_scan_wants_object = m->object;
1827
1828 /*
1829 * force us to dump any collected free pages
1830 * and to pause before moving on
1831 */
1832 try_failed = TRUE;
1833
1834 goto done_with_inactivepage;
1835 }
1836 object = m->object;
1837 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1838
1839 try_failed = FALSE;
1840 }
1841
1842 /*
1843 * Paging out pages of external objects which
1844 * are currently being created must be avoided.
1845 * The pager may claim for memory, thus leading to a
1846 * possible dead lock between it and the pageout thread,
1847 * if such pages are finally chosen. The remaining assumption
1848 * is that there will finally be enough available pages in the
1849 * inactive pool to page out in order to satisfy all memory
1850 * claimed by the thread which concurrently creates the pager.
1851 */
1852 if (!object->pager_initialized && object->pager_created) {
1853 /*
1854 * Move page to end and continue, hoping that
1855 * there will be enough other inactive pages to
1856 * page out so that the thread which currently
1857 * initializes the pager will succeed.
1858 * Don't re-grant the ticket, the page should
1859 * pulled from the queue and paged out whenever
1860 * one of its logically adjacent fellows is
1861 * targeted.
1862 */
1863 vm_pageout_inactive_avoid++;
1864 goto requeue_page;
1865 }
1866 /*
1867 * Remove the page from its list.
1868 */
1869 if (m->speculative) {
1870 remque(&m->pageq);
1871 page_prev_state = PAGE_STATE_SPECULATIVE;
1872 m->speculative = FALSE;
1873 vm_page_speculative_count--;
1874 } else if (m->throttled) {
1875 queue_remove(&vm_page_queue_throttled, m, vm_page_t, pageq);
1876 page_prev_state = PAGE_STATE_THROTTLED;
1877 m->throttled = FALSE;
1878 vm_page_throttled_count--;
1879 } else {
1880 if (m->zero_fill) {
1881 queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq);
1882 page_prev_state = PAGE_STATE_ZEROFILL;
1883 vm_zf_queue_count--;
1884 } else {
1885 page_prev_state = PAGE_STATE_INACTIVE;
1886 queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
1887 }
1888 m->inactive = FALSE;
1889 if (!m->fictitious)
1890 vm_page_inactive_count--;
1891 vm_purgeable_q_advance_all();
1892 }
1893
1894 m->pageq.next = NULL;
1895 m->pageq.prev = NULL;
1896
1897 if ( !m->fictitious && catch_up_count)
1898 catch_up_count--;
1899
1900 /*
1901 * ENCRYPTED SWAP:
1902 * if this page has already been picked up as part of a
1903 * page-out cluster, it will be busy because it is being
1904 * encrypted (see vm_object_upl_request()). But we still
1905 * want to demote it from "clean-in-place" (aka "adjacent")
1906 * to "clean-and-free" (aka "target"), so let's ignore its
1907 * "busy" bit here and proceed to check for "cleaning" a
1908 * little bit below...
1909 */
1910 if ( !m->encrypted_cleaning && (m->busy || !object->alive)) {
1911 /*
1912 * Somebody is already playing with this page.
1913 * Leave it off the pageout queues.
1914 *
1915 */
1916 vm_pageout_inactive_busy++;
1917
1918 goto done_with_inactivepage;
1919 }
1920
1921 /*
1922 * If it's absent or in error, we can reclaim the page.
1923 */
1924
1925 if (m->absent || m->error) {
1926 vm_pageout_inactive_absent++;
1927 reclaim_page:
1928 if (vm_pageout_deadlock_target) {
1929 vm_pageout_scan_inactive_throttle_success++;
1930 vm_pageout_deadlock_target--;
1931 }
1932
1933 DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
1934
1935 if (object->internal) {
1936 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
1937 } else {
1938 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
1939 }
1940 vm_page_free_prepare_queues(m);
1941
1942 /*
1943 * remove page from object here since we're already
1944 * behind the object lock... defer the rest of the work
1945 * we'd normally do in vm_page_free_prepare_object
1946 * until 'vm_page_free_list' is called
1947 */
1948 if (m->tabled)
1949 vm_page_remove(m, TRUE);
1950
1951 assert(m->pageq.next == NULL &&
1952 m->pageq.prev == NULL);
1953 m->pageq.next = (queue_entry_t)local_freeq;
1954 local_freeq = m;
1955 local_freed++;
1956
1957 inactive_burst_count = 0;
1958
1959 if(page_prev_state != PAGE_STATE_SPECULATIVE) {
1960 vm_pageout_stats[vm_pageout_stat_now].reclaimed++;
1961 page_prev_state = 0;
1962 }
1963
1964 goto done_with_inactivepage;
1965 }
1966
1967 assert(!m->private);
1968 assert(!m->fictitious);
1969
1970 /*
1971 * If already cleaning this page in place, convert from
1972 * "adjacent" to "target". We can leave the page mapped,
1973 * and vm_pageout_object_terminate will determine whether
1974 * to free or reactivate.
1975 */
1976
1977 if (m->cleaning) {
1978 m->busy = TRUE;
1979 m->pageout = TRUE;
1980 m->dump_cleaning = TRUE;
1981 vm_page_wire(m);
1982
1983 CLUSTER_STAT(vm_pageout_cluster_conversions++);
1984
1985 inactive_burst_count = 0;
1986
1987 goto done_with_inactivepage;
1988 }
1989
1990 /*
1991 * If the object is empty, the page must be reclaimed even
1992 * if dirty or used.
1993 * If the page belongs to a volatile object, we stick it back
1994 * on.
1995 */
1996 if (object->copy == VM_OBJECT_NULL) {
1997 if (object->purgable == VM_PURGABLE_EMPTY) {
1998 m->busy = TRUE;
1999 if (m->pmapped == TRUE) {
2000 /* unmap the page */
2001 refmod_state = pmap_disconnect(m->phys_page);
2002 if (refmod_state & VM_MEM_MODIFIED) {
2003 m->dirty = TRUE;
2004 }
2005 }
2006 if (m->dirty || m->precious) {
2007 /* we saved the cost of cleaning this page ! */
2008 vm_page_purged_count++;
2009 }
2010 goto reclaim_page;
2011 }
2012 if (object->purgable == VM_PURGABLE_VOLATILE) {
2013 /* if it's wired, we can't put it on our queue */
2014 assert(!VM_PAGE_WIRED(m));
2015 /* just stick it back on! */
2016 goto reactivate_page;
2017 }
2018 }
2019
2020 /*
2021 * If it's being used, reactivate.
2022 * (Fictitious pages are either busy or absent.)
2023 * First, update the reference and dirty bits
2024 * to make sure the page is unreferenced.
2025 */
2026 refmod_state = -1;
2027
2028 if (m->reference == FALSE && m->pmapped == TRUE) {
2029 refmod_state = pmap_get_refmod(m->phys_page);
2030
2031 if (refmod_state & VM_MEM_REFERENCED)
2032 m->reference = TRUE;
2033 if (refmod_state & VM_MEM_MODIFIED)
2034 m->dirty = TRUE;
2035 }
2036
2037 if (m->reference || m->dirty) {
2038 /* deal with a rogue "reusable" page */
2039 VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m);
2040 }
2041
2042 if (m->reference && !m->no_cache) {
2043 /*
2044 * The page we pulled off the inactive list has
2045 * been referenced. It is possible for other
2046 * processors to be touching pages faster than we
2047 * can clear the referenced bit and traverse the
2048 * inactive queue, so we limit the number of
2049 * reactivations.
2050 */
2051 if (++reactivated_this_call >= reactivate_limit) {
2052 vm_pageout_reactivation_limit_exceeded++;
2053 } else if (catch_up_count) {
2054 vm_pageout_catch_ups++;
2055 } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
2056 vm_pageout_inactive_force_reclaim++;
2057 } else {
2058 uint32_t isinuse;
2059 reactivate_page:
2060 if ( !object->internal && object->pager != MEMORY_OBJECT_NULL &&
2061 vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
2062 /*
2063 * no explict mappings of this object exist
2064 * and it's not open via the filesystem
2065 */
2066 vm_page_deactivate(m);
2067 vm_pageout_inactive_deactivated++;
2068 } else {
2069 /*
2070 * The page was/is being used, so put back on active list.
2071 */
2072 vm_page_activate(m);
2073 VM_STAT_INCR(reactivations);
2074 }
2075 vm_pageout_inactive_used++;
2076 inactive_burst_count = 0;
2077
2078 goto done_with_inactivepage;
2079 }
2080 /*
2081 * Make sure we call pmap_get_refmod() if it
2082 * wasn't already called just above, to update
2083 * the dirty bit.
2084 */
2085 if ((refmod_state == -1) && !m->dirty && m->pmapped) {
2086 refmod_state = pmap_get_refmod(m->phys_page);
2087 if (refmod_state & VM_MEM_MODIFIED)
2088 m->dirty = TRUE;
2089 }
2090 forced_reclaim = TRUE;
2091 } else {
2092 forced_reclaim = FALSE;
2093 }
2094
2095 XPR(XPR_VM_PAGEOUT,
2096 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
2097 object, m->offset, m, 0,0);
2098
2099 /*
2100 * we've got a candidate page to steal...
2101 *
2102 * m->dirty is up to date courtesy of the
2103 * preceding check for m->reference... if
2104 * we get here, then m->reference had to be
2105 * FALSE (or possibly "reactivate_limit" was
2106 * exceeded), but in either case we called
2107 * pmap_get_refmod() and updated both
2108 * m->reference and m->dirty
2109 *
2110 * if it's dirty or precious we need to
2111 * see if the target queue is throtttled
2112 * it if is, we need to skip over it by moving it back
2113 * to the end of the inactive queue
2114 */
2115
2116 inactive_throttled = FALSE;
2117
2118 if (m->dirty || m->precious) {
2119 if (object->internal) {
2120 if (VM_PAGE_Q_THROTTLED(iq))
2121 inactive_throttled = TRUE;
2122 } else if (VM_PAGE_Q_THROTTLED(eq)) {
2123 inactive_throttled = TRUE;
2124 }
2125 }
2126 if (inactive_throttled == TRUE) {
2127 throttle_inactive:
2128 if (!IP_VALID(memory_manager_default) &&
2129 object->internal && m->dirty &&
2130 (object->purgable == VM_PURGABLE_DENY ||
2131 object->purgable == VM_PURGABLE_NONVOLATILE ||
2132 object->purgable == VM_PURGABLE_VOLATILE)) {
2133 queue_enter(&vm_page_queue_throttled, m,
2134 vm_page_t, pageq);
2135 m->throttled = TRUE;
2136 vm_page_throttled_count++;
2137 } else {
2138 if (m->zero_fill) {
2139 queue_enter(&vm_page_queue_zf, m,
2140 vm_page_t, pageq);
2141 vm_zf_queue_count++;
2142 } else
2143 queue_enter(&vm_page_queue_inactive, m,
2144 vm_page_t, pageq);
2145 m->inactive = TRUE;
2146 if (!m->fictitious) {
2147 vm_page_inactive_count++;
2148 token_new_pagecount++;
2149 }
2150 }
2151 vm_pageout_scan_inactive_throttled++;
2152 goto done_with_inactivepage;
2153 }
2154
2155 /*
2156 * we've got a page that we can steal...
2157 * eliminate all mappings and make sure
2158 * we have the up-to-date modified state
2159 * first take the page BUSY, so that no new
2160 * mappings can be made
2161 */
2162 m->busy = TRUE;
2163
2164 /*
2165 * if we need to do a pmap_disconnect then we
2166 * need to re-evaluate m->dirty since the pmap_disconnect
2167 * provides the true state atomically... the
2168 * page was still mapped up to the pmap_disconnect
2169 * and may have been dirtied at the last microsecond
2170 *
2171 * we also check for the page being referenced 'late'
2172 * if it was, we first need to do a WAKEUP_DONE on it
2173 * since we already set m->busy = TRUE, before
2174 * going off to reactivate it
2175 *
2176 * Note that if 'pmapped' is FALSE then the page is not
2177 * and has not been in any map, so there is no point calling
2178 * pmap_disconnect(). m->dirty and/or m->reference could
2179 * have been set in anticipation of likely usage of the page.
2180 */
2181 if (m->pmapped == TRUE) {
2182 refmod_state = pmap_disconnect(m->phys_page);
2183
2184 if (refmod_state & VM_MEM_MODIFIED)
2185 m->dirty = TRUE;
2186 if (refmod_state & VM_MEM_REFERENCED) {
2187
2188 /* If m->reference is already set, this page must have
2189 * already failed the reactivate_limit test, so don't
2190 * bump the counts twice.
2191 */
2192 if ( ! m->reference ) {
2193 m->reference = TRUE;
2194 if (forced_reclaim ||
2195 ++reactivated_this_call >= reactivate_limit)
2196 vm_pageout_reactivation_limit_exceeded++;
2197 else {
2198 PAGE_WAKEUP_DONE(m);
2199 goto reactivate_page;
2200 }
2201 }
2202 }
2203 }
2204 /*
2205 * reset our count of pages that have been reclaimed
2206 * since the last page was 'stolen'
2207 */
2208 inactive_reclaim_run = 0;
2209
2210 /*
2211 * If it's clean and not precious, we can free the page.
2212 */
2213 if (!m->dirty && !m->precious) {
2214 if (m->zero_fill)
2215 vm_pageout_inactive_zf++;
2216 vm_pageout_inactive_clean++;
2217
2218 goto reclaim_page;
2219 }
2220
2221 /*
2222 * The page may have been dirtied since the last check
2223 * for a throttled target queue (which may have been skipped
2224 * if the page was clean then). With the dirty page
2225 * disconnected here, we can make one final check.
2226 */
2227 {
2228 boolean_t disconnect_throttled = FALSE;
2229 if (object->internal) {
2230 if (VM_PAGE_Q_THROTTLED(iq))
2231 disconnect_throttled = TRUE;
2232 } else if (VM_PAGE_Q_THROTTLED(eq)) {
2233 disconnect_throttled = TRUE;
2234 }
2235
2236 if (disconnect_throttled == TRUE) {
2237 PAGE_WAKEUP_DONE(m);
2238 goto throttle_inactive;
2239 }
2240 }
2241
2242 vm_pageout_stats[vm_pageout_stat_now].reclaimed++;
2243
2244 vm_pageout_cluster(m);
2245
2246 if (m->zero_fill)
2247 vm_pageout_inactive_zf++;
2248 vm_pageout_inactive_dirty++;
2249
2250 inactive_burst_count = 0;
2251
2252 done_with_inactivepage:
2253 if (delayed_unlock++ > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT || try_failed == TRUE) {
2254
2255 if (object != NULL) {
2256 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2257 vm_object_unlock(object);
2258 object = NULL;
2259 }
2260 if (local_freeq) {
2261 vm_page_unlock_queues();
2262 vm_page_free_list(local_freeq, TRUE);
2263
2264 local_freeq = NULL;
2265 local_freed = 0;
2266 vm_page_lock_queues();
2267 } else
2268 lck_mtx_yield(&vm_page_queue_lock);
2269
2270 delayed_unlock = 1;
2271 }
2272 /*
2273 * back to top of pageout scan loop
2274 */
2275 }
2276 }
2277
2278
2279 int vm_page_free_count_init;
2280
2281 void
2282 vm_page_free_reserve(
2283 int pages)
2284 {
2285 int free_after_reserve;
2286
2287 vm_page_free_reserved += pages;
2288
2289 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
2290
2291 vm_page_free_min = vm_page_free_reserved +
2292 VM_PAGE_FREE_MIN(free_after_reserve);
2293
2294 if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
2295 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
2296
2297 vm_page_free_target = vm_page_free_reserved +
2298 VM_PAGE_FREE_TARGET(free_after_reserve);
2299
2300 if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
2301 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
2302
2303 if (vm_page_free_target < vm_page_free_min + 5)
2304 vm_page_free_target = vm_page_free_min + 5;
2305
2306 vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 3);
2307 vm_page_creation_throttle = vm_page_free_target / 2;
2308 }
2309
2310 /*
2311 * vm_pageout is the high level pageout daemon.
2312 */
2313
2314 void
2315 vm_pageout_continue(void)
2316 {
2317 DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
2318 vm_pageout_scan_event_counter++;
2319 vm_pageout_scan();
2320 /* we hold vm_page_queue_free_lock now */
2321 assert(vm_page_free_wanted == 0);
2322 assert(vm_page_free_wanted_privileged == 0);
2323 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
2324 lck_mtx_unlock(&vm_page_queue_free_lock);
2325
2326 counter(c_vm_pageout_block++);
2327 thread_block((thread_continue_t)vm_pageout_continue);
2328 /*NOTREACHED*/
2329 }
2330
2331
2332 #ifdef FAKE_DEADLOCK
2333
2334 #define FAKE_COUNT 5000
2335
2336 int internal_count = 0;
2337 int fake_deadlock = 0;
2338
2339 #endif
2340
2341 static void
2342 vm_pageout_iothread_continue(struct vm_pageout_queue *q)
2343 {
2344 vm_page_t m = NULL;
2345 vm_object_t object;
2346 memory_object_t pager;
2347 thread_t self = current_thread();
2348
2349 if ((vm_pageout_internal_iothread != THREAD_NULL)
2350 && (self == vm_pageout_external_iothread )
2351 && (self->options & TH_OPT_VMPRIV))
2352 self->options &= ~TH_OPT_VMPRIV;
2353
2354 vm_page_lockspin_queues();
2355
2356 while ( !queue_empty(&q->pgo_pending) ) {
2357
2358 q->pgo_busy = TRUE;
2359 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
2360 VM_PAGE_CHECK(m);
2361 m->pageout_queue = FALSE;
2362 m->pageq.next = NULL;
2363 m->pageq.prev = NULL;
2364 vm_page_unlock_queues();
2365
2366 #ifdef FAKE_DEADLOCK
2367 if (q == &vm_pageout_queue_internal) {
2368 vm_offset_t addr;
2369 int pg_count;
2370
2371 internal_count++;
2372
2373 if ((internal_count == FAKE_COUNT)) {
2374
2375 pg_count = vm_page_free_count + vm_page_free_reserved;
2376
2377 if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) {
2378 kmem_free(kernel_map, addr, PAGE_SIZE * pg_count);
2379 }
2380 internal_count = 0;
2381 fake_deadlock++;
2382 }
2383 }
2384 #endif
2385 object = m->object;
2386
2387 vm_object_lock(object);
2388
2389 if (!object->pager_initialized) {
2390
2391 /*
2392 * If there is no memory object for the page, create
2393 * one and hand it to the default pager.
2394 */
2395
2396 if (!object->pager_initialized)
2397 vm_object_collapse(object,
2398 (vm_object_offset_t) 0,
2399 TRUE);
2400 if (!object->pager_initialized)
2401 vm_object_pager_create(object);
2402 if (!object->pager_initialized) {
2403 /*
2404 * Still no pager for the object.
2405 * Reactivate the page.
2406 *
2407 * Should only happen if there is no
2408 * default pager.
2409 */
2410 vm_page_lockspin_queues();
2411
2412 vm_pageout_queue_steal(m, TRUE);
2413 vm_pageout_dirty_no_pager++;
2414 vm_page_activate(m);
2415
2416 vm_page_unlock_queues();
2417
2418 /*
2419 * And we are done with it.
2420 */
2421 PAGE_WAKEUP_DONE(m);
2422
2423 vm_object_paging_end(object);
2424 vm_object_unlock(object);
2425
2426 vm_page_lockspin_queues();
2427 continue;
2428 }
2429 }
2430 pager = object->pager;
2431 if (pager == MEMORY_OBJECT_NULL) {
2432 /*
2433 * This pager has been destroyed by either
2434 * memory_object_destroy or vm_object_destroy, and
2435 * so there is nowhere for the page to go.
2436 */
2437 if (m->pageout) {
2438 /*
2439 * Just free the page... VM_PAGE_FREE takes
2440 * care of cleaning up all the state...
2441 * including doing the vm_pageout_throttle_up
2442 */
2443 VM_PAGE_FREE(m);
2444 } else {
2445 vm_page_lockspin_queues();
2446
2447 vm_pageout_queue_steal(m, TRUE);
2448 vm_page_activate(m);
2449
2450 vm_page_unlock_queues();
2451
2452 /*
2453 * And we are done with it.
2454 */
2455 PAGE_WAKEUP_DONE(m);
2456 }
2457 vm_object_paging_end(object);
2458 vm_object_unlock(object);
2459
2460 vm_page_lockspin_queues();
2461 continue;
2462 }
2463 VM_PAGE_CHECK(m);
2464 vm_object_unlock(object);
2465 /*
2466 * we expect the paging_in_progress reference to have
2467 * already been taken on the object before it was added
2468 * to the appropriate pageout I/O queue... this will
2469 * keep the object from being terminated and/or the
2470 * paging_offset from changing until the I/O has
2471 * completed... therefore no need to lock the object to
2472 * pull the paging_offset from it.
2473 *
2474 * Send the data to the pager.
2475 * any pageout clustering happens there
2476 */
2477 memory_object_data_return(pager,
2478 m->offset + object->paging_offset,
2479 PAGE_SIZE,
2480 NULL,
2481 NULL,
2482 FALSE,
2483 FALSE,
2484 0);
2485
2486 vm_object_lock(object);
2487 vm_object_paging_end(object);
2488 vm_object_unlock(object);
2489
2490 vm_page_lockspin_queues();
2491 }
2492 assert_wait((event_t) q, THREAD_UNINT);
2493
2494 if (q->pgo_throttled == TRUE && !VM_PAGE_Q_THROTTLED(q)) {
2495 q->pgo_throttled = FALSE;
2496 thread_wakeup((event_t) &q->pgo_laundry);
2497 }
2498 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
2499 q->pgo_draining = FALSE;
2500 thread_wakeup((event_t) (&q->pgo_laundry+1));
2501 }
2502 q->pgo_busy = FALSE;
2503 q->pgo_idle = TRUE;
2504 vm_page_unlock_queues();
2505
2506 thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) &q->pgo_pending);
2507 /*NOTREACHED*/
2508 }
2509
2510
2511 static void
2512 vm_pageout_iothread_external(void)
2513 {
2514 thread_t self = current_thread();
2515
2516 self->options |= TH_OPT_VMPRIV;
2517
2518 vm_pageout_iothread_continue(&vm_pageout_queue_external);
2519 /*NOTREACHED*/
2520 }
2521
2522
2523 static void
2524 vm_pageout_iothread_internal(void)
2525 {
2526 thread_t self = current_thread();
2527
2528 self->options |= TH_OPT_VMPRIV;
2529
2530 vm_pageout_iothread_continue(&vm_pageout_queue_internal);
2531 /*NOTREACHED*/
2532 }
2533
2534 kern_return_t
2535 vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
2536 {
2537 if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) {
2538 return KERN_SUCCESS;
2539 } else {
2540 return KERN_FAILURE; /* Already set */
2541 }
2542 }
2543
2544 static void
2545 vm_pageout_garbage_collect(int collect)
2546 {
2547 if (collect) {
2548 boolean_t buf_large_zfree = FALSE;
2549 stack_collect();
2550
2551 /*
2552 * consider_zone_gc should be last, because the other operations
2553 * might return memory to zones.
2554 */
2555 consider_machine_collect();
2556 if (consider_buffer_cache_collect != NULL) {
2557 buf_large_zfree = (*consider_buffer_cache_collect)(0);
2558 }
2559 consider_zone_gc(buf_large_zfree);
2560
2561 consider_machine_adjust();
2562 }
2563
2564 assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
2565
2566 thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
2567 /*NOTREACHED*/
2568 }
2569
2570
2571
2572 void
2573 vm_pageout(void)
2574 {
2575 thread_t self = current_thread();
2576 thread_t thread;
2577 kern_return_t result;
2578 spl_t s;
2579
2580 /*
2581 * Set thread privileges.
2582 */
2583 s = splsched();
2584 thread_lock(self);
2585 self->priority = BASEPRI_PREEMPT - 1;
2586 set_sched_pri(self, self->priority);
2587 thread_unlock(self);
2588
2589 if (!self->reserved_stack)
2590 self->reserved_stack = self->kernel_stack;
2591
2592 splx(s);
2593
2594 /*
2595 * Initialize some paging parameters.
2596 */
2597
2598 if (vm_pageout_idle_wait == 0)
2599 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
2600
2601 if (vm_pageout_burst_wait == 0)
2602 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
2603
2604 if (vm_pageout_empty_wait == 0)
2605 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
2606
2607 if (vm_pageout_deadlock_wait == 0)
2608 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
2609
2610 if (vm_pageout_deadlock_relief == 0)
2611 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
2612
2613 if (vm_pageout_inactive_relief == 0)
2614 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
2615
2616 if (vm_pageout_burst_active_throttle == 0)
2617 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
2618
2619 if (vm_pageout_burst_inactive_throttle == 0)
2620 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
2621
2622 /*
2623 * Set kernel task to low backing store privileged
2624 * status
2625 */
2626 task_lock(kernel_task);
2627 kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
2628 task_unlock(kernel_task);
2629
2630 vm_page_free_count_init = vm_page_free_count;
2631
2632 /*
2633 * even if we've already called vm_page_free_reserve
2634 * call it again here to insure that the targets are
2635 * accurately calculated (it uses vm_page_free_count_init)
2636 * calling it with an arg of 0 will not change the reserve
2637 * but will re-calculate free_min and free_target
2638 */
2639 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
2640 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
2641 } else
2642 vm_page_free_reserve(0);
2643
2644
2645 queue_init(&vm_pageout_queue_external.pgo_pending);
2646 vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2647 vm_pageout_queue_external.pgo_laundry = 0;
2648 vm_pageout_queue_external.pgo_idle = FALSE;
2649 vm_pageout_queue_external.pgo_busy = FALSE;
2650 vm_pageout_queue_external.pgo_throttled = FALSE;
2651 vm_pageout_queue_external.pgo_draining = FALSE;
2652
2653 queue_init(&vm_pageout_queue_internal.pgo_pending);
2654 vm_pageout_queue_internal.pgo_maxlaundry = 0;
2655 vm_pageout_queue_internal.pgo_laundry = 0;
2656 vm_pageout_queue_internal.pgo_idle = FALSE;
2657 vm_pageout_queue_internal.pgo_busy = FALSE;
2658 vm_pageout_queue_internal.pgo_throttled = FALSE;
2659 vm_pageout_queue_internal.pgo_draining = FALSE;
2660
2661
2662 /* internal pageout thread started when default pager registered first time */
2663 /* external pageout and garbage collection threads started here */
2664
2665 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
2666 BASEPRI_PREEMPT - 1,
2667 &vm_pageout_external_iothread);
2668 if (result != KERN_SUCCESS)
2669 panic("vm_pageout_iothread_external: create failed");
2670
2671 thread_deallocate(vm_pageout_external_iothread);
2672
2673 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
2674 MINPRI_KERNEL,
2675 &thread);
2676 if (result != KERN_SUCCESS)
2677 panic("vm_pageout_garbage_collect: create failed");
2678
2679 thread_deallocate(thread);
2680
2681 vm_object_reaper_init();
2682
2683
2684 vm_pageout_continue();
2685
2686 /*
2687 * Unreached code!
2688 *
2689 * The vm_pageout_continue() call above never returns, so the code below is never
2690 * executed. We take advantage of this to declare several DTrace VM related probe
2691 * points that our kernel doesn't have an analog for. These are probe points that
2692 * exist in Solaris and are in the DTrace documentation, so people may have written
2693 * scripts that use them. Declaring the probe points here means their scripts will
2694 * compile and execute which we want for portability of the scripts, but since this
2695 * section of code is never reached, the probe points will simply never fire. Yes,
2696 * this is basically a hack. The problem is the DTrace probe points were chosen with
2697 * Solaris specific VM events in mind, not portability to different VM implementations.
2698 */
2699
2700 DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
2701 DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
2702 DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
2703 DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
2704 DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
2705 DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
2706 DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
2707 /*NOTREACHED*/
2708 }
2709
2710 kern_return_t
2711 vm_pageout_internal_start(void)
2712 {
2713 kern_return_t result;
2714
2715 vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2716 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, NULL, BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread);
2717 if (result == KERN_SUCCESS)
2718 thread_deallocate(vm_pageout_internal_iothread);
2719 return result;
2720 }
2721
2722
2723 /*
2724 * when marshalling pages into a UPL and subsequently committing
2725 * or aborting them, it is necessary to hold
2726 * the vm_page_queue_lock (a hot global lock) for certain operations
2727 * on the page... however, the majority of the work can be done
2728 * while merely holding the object lock... in fact there are certain
2729 * collections of pages that don't require any work brokered by the
2730 * vm_page_queue_lock... to mitigate the time spent behind the global
2731 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
2732 * while doing all of the work that doesn't require the vm_page_queue_lock...
2733 * then call dw_do_work to acquire the vm_page_queue_lock and do the
2734 * necessary work for each page... we will grab the busy bit on the page
2735 * if it's not already held so that dw_do_work can drop the object lock
2736 * if it can't immediately take the vm_page_queue_lock in order to compete
2737 * for the locks in the same order that vm_pageout_scan takes them.
2738 * the operation names are modeled after the names of the routines that
2739 * need to be called in order to make the changes very obvious in the
2740 * original loop
2741 */
2742
2743 #define DELAYED_WORK_LIMIT 32
2744
2745 #define DW_vm_page_unwire 0x01
2746 #define DW_vm_page_wire 0x02
2747 #define DW_vm_page_free 0x04
2748 #define DW_vm_page_activate 0x08
2749 #define DW_vm_page_deactivate_internal 0x10
2750 #define DW_vm_page_speculate 0x20
2751 #define DW_vm_page_lru 0x40
2752 #define DW_vm_pageout_throttle_up 0x80
2753 #define DW_PAGE_WAKEUP 0x100
2754 #define DW_clear_busy 0x200
2755 #define DW_clear_reference 0x400
2756 #define DW_set_reference 0x800
2757
2758 struct dw {
2759 vm_page_t dw_m;
2760 int dw_mask;
2761 };
2762
2763
2764 static void dw_do_work(vm_object_t object, struct dw *dwp, int dw_count);
2765
2766
2767
2768 static upl_t
2769 upl_create(int type, int flags, upl_size_t size)
2770 {
2771 upl_t upl;
2772 int page_field_size = 0;
2773 int upl_flags = 0;
2774 int upl_size = sizeof(struct upl);
2775
2776 size = round_page_32(size);
2777
2778 if (type & UPL_CREATE_LITE) {
2779 page_field_size = (atop(size) + 7) >> 3;
2780 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2781
2782 upl_flags |= UPL_LITE;
2783 }
2784 if (type & UPL_CREATE_INTERNAL) {
2785 upl_size += (int) sizeof(struct upl_page_info) * atop(size);
2786
2787 upl_flags |= UPL_INTERNAL;
2788 }
2789 upl = (upl_t)kalloc(upl_size + page_field_size);
2790
2791 if (page_field_size)
2792 bzero((char *)upl + upl_size, page_field_size);
2793
2794 upl->flags = upl_flags | flags;
2795 upl->src_object = NULL;
2796 upl->kaddr = (vm_offset_t)0;
2797 upl->size = 0;
2798 upl->map_object = NULL;
2799 upl->ref_count = 1;
2800 upl->highest_page = 0;
2801 upl_lock_init(upl);
2802 upl->vector_upl = NULL;
2803 #if UPL_DEBUG
2804 upl->ubc_alias1 = 0;
2805 upl->ubc_alias2 = 0;
2806
2807 upl->upl_creator = current_thread();
2808 upl->upl_state = 0;
2809 upl->upl_commit_index = 0;
2810 bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records));
2811
2812 (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES);
2813 #endif /* UPL_DEBUG */
2814
2815 return(upl);
2816 }
2817
2818 static void
2819 upl_destroy(upl_t upl)
2820 {
2821 int page_field_size; /* bit field in word size buf */
2822 int size;
2823
2824 #if UPL_DEBUG
2825 {
2826 vm_object_t object;
2827
2828 if (upl->flags & UPL_SHADOWED) {
2829 object = upl->map_object->shadow;
2830 } else {
2831 object = upl->map_object;
2832 }
2833 vm_object_lock(object);
2834 queue_remove(&object->uplq, upl, upl_t, uplq);
2835 vm_object_unlock(object);
2836 }
2837 #endif /* UPL_DEBUG */
2838 /*
2839 * drop a reference on the map_object whether or
2840 * not a pageout object is inserted
2841 */
2842 if (upl->flags & UPL_SHADOWED)
2843 vm_object_deallocate(upl->map_object);
2844
2845 if (upl->flags & UPL_DEVICE_MEMORY)
2846 size = PAGE_SIZE;
2847 else
2848 size = upl->size;
2849 page_field_size = 0;
2850
2851 if (upl->flags & UPL_LITE) {
2852 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2853 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2854 }
2855 upl_lock_destroy(upl);
2856 upl->vector_upl = (vector_upl_t) 0xfeedbeef;
2857 if (upl->flags & UPL_INTERNAL) {
2858 kfree(upl,
2859 sizeof(struct upl) +
2860 (sizeof(struct upl_page_info) * (size/PAGE_SIZE))
2861 + page_field_size);
2862 } else {
2863 kfree(upl, sizeof(struct upl) + page_field_size);
2864 }
2865 }
2866
2867 void uc_upl_dealloc(upl_t upl);
2868 __private_extern__ void
2869 uc_upl_dealloc(upl_t upl)
2870 {
2871 if (--upl->ref_count == 0)
2872 upl_destroy(upl);
2873 }
2874
2875 void
2876 upl_deallocate(upl_t upl)
2877 {
2878 if (--upl->ref_count == 0) {
2879 if(vector_upl_is_valid(upl))
2880 vector_upl_deallocate(upl);
2881 upl_destroy(upl);
2882 }
2883 }
2884
2885 #if DEVELOPMENT || DEBUG
2886 /*/*
2887 * Statistics about UPL enforcement of copy-on-write obligations.
2888 */
2889 unsigned long upl_cow = 0;
2890 unsigned long upl_cow_again = 0;
2891 unsigned long upl_cow_pages = 0;
2892 unsigned long upl_cow_again_pages = 0;
2893
2894 unsigned long iopl_cow = 0;
2895 unsigned long iopl_cow_pages = 0;
2896 #endif
2897
2898 /*
2899 * Routine: vm_object_upl_request
2900 * Purpose:
2901 * Cause the population of a portion of a vm_object.
2902 * Depending on the nature of the request, the pages
2903 * returned may be contain valid data or be uninitialized.
2904 * A page list structure, listing the physical pages
2905 * will be returned upon request.
2906 * This function is called by the file system or any other
2907 * supplier of backing store to a pager.
2908 * IMPORTANT NOTE: The caller must still respect the relationship
2909 * between the vm_object and its backing memory object. The
2910 * caller MUST NOT substitute changes in the backing file
2911 * without first doing a memory_object_lock_request on the
2912 * target range unless it is know that the pages are not
2913 * shared with another entity at the pager level.
2914 * Copy_in_to:
2915 * if a page list structure is present
2916 * return the mapped physical pages, where a
2917 * page is not present, return a non-initialized
2918 * one. If the no_sync bit is turned on, don't
2919 * call the pager unlock to synchronize with other
2920 * possible copies of the page. Leave pages busy
2921 * in the original object, if a page list structure
2922 * was specified. When a commit of the page list
2923 * pages is done, the dirty bit will be set for each one.
2924 * Copy_out_from:
2925 * If a page list structure is present, return
2926 * all mapped pages. Where a page does not exist
2927 * map a zero filled one. Leave pages busy in
2928 * the original object. If a page list structure
2929 * is not specified, this call is a no-op.
2930 *
2931 * Note: access of default pager objects has a rather interesting
2932 * twist. The caller of this routine, presumably the file system
2933 * page cache handling code, will never actually make a request
2934 * against a default pager backed object. Only the default
2935 * pager will make requests on backing store related vm_objects
2936 * In this way the default pager can maintain the relationship
2937 * between backing store files (abstract memory objects) and
2938 * the vm_objects (cache objects), they support.
2939 *
2940 */
2941
2942 __private_extern__ kern_return_t
2943 vm_object_upl_request(
2944 vm_object_t object,
2945 vm_object_offset_t offset,
2946 upl_size_t size,
2947 upl_t *upl_ptr,
2948 upl_page_info_array_t user_page_list,
2949 unsigned int *page_list_count,
2950 int cntrl_flags)
2951 {
2952 vm_page_t dst_page = VM_PAGE_NULL;
2953 vm_object_offset_t dst_offset;
2954 upl_size_t xfer_size;
2955 boolean_t dirty;
2956 boolean_t hw_dirty;
2957 upl_t upl = NULL;
2958 unsigned int entry;
2959 #if MACH_CLUSTER_STATS
2960 boolean_t encountered_lrp = FALSE;
2961 #endif
2962 vm_page_t alias_page = NULL;
2963 int refmod_state = 0;
2964 wpl_array_t lite_list = NULL;
2965 vm_object_t last_copy_object;
2966 struct dw dw_array[DELAYED_WORK_LIMIT];
2967 struct dw *dwp;
2968 int dw_count;
2969
2970 if (cntrl_flags & ~UPL_VALID_FLAGS) {
2971 /*
2972 * For forward compatibility's sake,
2973 * reject any unknown flag.
2974 */
2975 return KERN_INVALID_VALUE;
2976 }
2977 if ( (!object->internal) && (object->paging_offset != 0) )
2978 panic("vm_object_upl_request: external object with non-zero paging offset\n");
2979 if (object->phys_contiguous)
2980 panic("vm_object_upl_request: contiguous object specified\n");
2981
2982
2983 if ((size / PAGE_SIZE) > MAX_UPL_SIZE)
2984 size = MAX_UPL_SIZE * PAGE_SIZE;
2985
2986 if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
2987 *page_list_count = MAX_UPL_SIZE;
2988
2989 if (cntrl_flags & UPL_SET_INTERNAL) {
2990 if (cntrl_flags & UPL_SET_LITE) {
2991
2992 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, 0, size);
2993
2994 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
2995 lite_list = (wpl_array_t)
2996 (((uintptr_t)user_page_list) +
2997 ((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
2998 if (size == 0) {
2999 user_page_list = NULL;
3000 lite_list = NULL;
3001 }
3002 } else {
3003 upl = upl_create(UPL_CREATE_INTERNAL, 0, size);
3004
3005 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
3006 if (size == 0) {
3007 user_page_list = NULL;
3008 }
3009 }
3010 } else {
3011 if (cntrl_flags & UPL_SET_LITE) {
3012
3013 upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE, 0, size);
3014
3015 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
3016 if (size == 0) {
3017 lite_list = NULL;
3018 }
3019 } else {
3020 upl = upl_create(UPL_CREATE_EXTERNAL, 0, size);
3021 }
3022 }
3023 *upl_ptr = upl;
3024
3025 if (user_page_list)
3026 user_page_list[0].device = FALSE;
3027
3028 if (cntrl_flags & UPL_SET_LITE) {
3029 upl->map_object = object;
3030 } else {
3031 upl->map_object = vm_object_allocate(size);
3032 /*
3033 * No neeed to lock the new object: nobody else knows
3034 * about it yet, so it's all ours so far.
3035 */
3036 upl->map_object->shadow = object;
3037 upl->map_object->pageout = TRUE;
3038 upl->map_object->can_persist = FALSE;
3039 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
3040 upl->map_object->shadow_offset = offset;
3041 upl->map_object->wimg_bits = object->wimg_bits;
3042
3043 VM_PAGE_GRAB_FICTITIOUS(alias_page);
3044
3045 upl->flags |= UPL_SHADOWED;
3046 }
3047 /*
3048 * ENCRYPTED SWAP:
3049 * Just mark the UPL as "encrypted" here.
3050 * We'll actually encrypt the pages later,
3051 * in upl_encrypt(), when the caller has
3052 * selected which pages need to go to swap.
3053 */
3054 if (cntrl_flags & UPL_ENCRYPT)
3055 upl->flags |= UPL_ENCRYPTED;
3056
3057 if (cntrl_flags & UPL_FOR_PAGEOUT)
3058 upl->flags |= UPL_PAGEOUT;
3059
3060 vm_object_lock(object);
3061 vm_object_activity_begin(object);
3062
3063 /*
3064 * we can lock in the paging_offset once paging_in_progress is set
3065 */
3066 upl->size = size;
3067 upl->offset = offset + object->paging_offset;
3068
3069 #if UPL_DEBUG
3070 queue_enter(&object->uplq, upl, upl_t, uplq);
3071 #endif /* UPL_DEBUG */
3072
3073 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
3074 /*
3075 * Honor copy-on-write obligations
3076 *
3077 * The caller is gathering these pages and
3078 * might modify their contents. We need to
3079 * make sure that the copy object has its own
3080 * private copies of these pages before we let
3081 * the caller modify them.
3082 */
3083 vm_object_update(object,
3084 offset,
3085 size,
3086 NULL,
3087 NULL,
3088 FALSE, /* should_return */
3089 MEMORY_OBJECT_COPY_SYNC,
3090 VM_PROT_NO_CHANGE);
3091 #if DEVELOPMENT || DEBUG
3092 upl_cow++;
3093 upl_cow_pages += size >> PAGE_SHIFT;
3094 #endif
3095 }
3096 /*
3097 * remember which copy object we synchronized with
3098 */
3099 last_copy_object = object->copy;
3100 entry = 0;
3101
3102 xfer_size = size;
3103 dst_offset = offset;
3104
3105 dwp = &dw_array[0];
3106 dw_count = 0;
3107
3108 while (xfer_size) {
3109
3110 dwp->dw_mask = 0;
3111
3112 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
3113 vm_object_unlock(object);
3114 VM_PAGE_GRAB_FICTITIOUS(alias_page);
3115 vm_object_lock(object);
3116 }
3117 if (cntrl_flags & UPL_COPYOUT_FROM) {
3118 upl->flags |= UPL_PAGE_SYNC_DONE;
3119
3120 if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
3121 dst_page->fictitious ||
3122 dst_page->absent ||
3123 dst_page->error ||
3124 (VM_PAGE_WIRED(dst_page) && !dst_page->pageout && !dst_page->list_req_pending)) {
3125
3126 if (user_page_list)
3127 user_page_list[entry].phys_addr = 0;
3128
3129 goto try_next_page;
3130 }
3131 /*
3132 * grab this up front...
3133 * a high percentange of the time we're going to
3134 * need the hardware modification state a bit later
3135 * anyway... so we can eliminate an extra call into
3136 * the pmap layer by grabbing it here and recording it
3137 */
3138 if (dst_page->pmapped)
3139 refmod_state = pmap_get_refmod(dst_page->phys_page);
3140 else
3141 refmod_state = 0;
3142
3143 if ( (refmod_state & VM_MEM_REFERENCED) && dst_page->inactive ) {
3144 /*
3145 * page is on inactive list and referenced...
3146 * reactivate it now... this gets it out of the
3147 * way of vm_pageout_scan which would have to
3148 * reactivate it upon tripping over it
3149 */
3150 dwp->dw_mask |= DW_vm_page_activate;
3151 }
3152 if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
3153 /*
3154 * we're only asking for DIRTY pages to be returned
3155 */
3156 if (dst_page->list_req_pending || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
3157 /*
3158 * if we were the page stolen by vm_pageout_scan to be
3159 * cleaned (as opposed to a buddy being clustered in
3160 * or this request is not being driven by a PAGEOUT cluster
3161 * then we only need to check for the page being dirty or
3162 * precious to decide whether to return it
3163 */
3164 if (dst_page->dirty || dst_page->precious || (refmod_state & VM_MEM_MODIFIED))
3165 goto check_busy;
3166 goto dont_return;
3167 }
3168 /*
3169 * this is a request for a PAGEOUT cluster and this page
3170 * is merely along for the ride as a 'buddy'... not only
3171 * does it have to be dirty to be returned, but it also
3172 * can't have been referenced recently... note that we've
3173 * already filtered above based on whether this page is
3174 * currently on the inactive queue or it meets the page
3175 * ticket (generation count) check
3176 */
3177 if ( (cntrl_flags & UPL_CLEAN_IN_PLACE || !(refmod_state & VM_MEM_REFERENCED)) &&
3178 ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) {
3179 goto check_busy;
3180 }
3181 dont_return:
3182 /*
3183 * if we reach here, we're not to return
3184 * the page... go on to the next one
3185 */
3186 if (user_page_list)
3187 user_page_list[entry].phys_addr = 0;
3188
3189 goto try_next_page;
3190 }
3191 check_busy:
3192 if (dst_page->busy && (!(dst_page->list_req_pending && (dst_page->pageout || dst_page->cleaning)))) {
3193 if (cntrl_flags & UPL_NOBLOCK) {
3194 if (user_page_list)
3195 user_page_list[entry].phys_addr = 0;
3196
3197 goto try_next_page;
3198 }
3199 /*
3200 * someone else is playing with the
3201 * page. We will have to wait.
3202 */
3203 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
3204
3205 continue;
3206 }
3207 /*
3208 * Someone else already cleaning the page?
3209 */
3210 if ((dst_page->cleaning || dst_page->absent || VM_PAGE_WIRED(dst_page)) && !dst_page->list_req_pending) {
3211 if (user_page_list)
3212 user_page_list[entry].phys_addr = 0;
3213
3214 goto try_next_page;
3215 }
3216 /*
3217 * ENCRYPTED SWAP:
3218 * The caller is gathering this page and might
3219 * access its contents later on. Decrypt the
3220 * page before adding it to the UPL, so that
3221 * the caller never sees encrypted data.
3222 */
3223 if (! (cntrl_flags & UPL_ENCRYPT) && dst_page->encrypted) {
3224 int was_busy;
3225
3226 /*
3227 * save the current state of busy
3228 * mark page as busy while decrypt
3229 * is in progress since it will drop
3230 * the object lock...
3231 */
3232 was_busy = dst_page->busy;
3233 dst_page->busy = TRUE;
3234
3235 vm_page_decrypt(dst_page, 0);
3236 vm_page_decrypt_for_upl_counter++;
3237 /*
3238 * restore to original busy state
3239 */
3240 dst_page->busy = was_busy;
3241 }
3242 if (dst_page->pageout_queue == TRUE) {
3243
3244 vm_page_lockspin_queues();
3245
3246 #if CONFIG_EMBEDDED
3247 if (dst_page->laundry)
3248 #else
3249 if (dst_page->pageout_queue == TRUE)
3250 #endif
3251 {
3252 /*
3253 * we've buddied up a page for a clustered pageout
3254 * that has already been moved to the pageout
3255 * queue by pageout_scan... we need to remove
3256 * it from the queue and drop the laundry count
3257 * on that queue
3258 */
3259 vm_pageout_throttle_up(dst_page);
3260 }
3261 vm_page_unlock_queues();
3262 }
3263 #if MACH_CLUSTER_STATS
3264 /*
3265 * pageout statistics gathering. count
3266 * all the pages we will page out that
3267 * were not counted in the initial
3268 * vm_pageout_scan work
3269 */
3270 if (dst_page->list_req_pending)
3271 encountered_lrp = TRUE;
3272 if ((dst_page->dirty || (dst_page->object->internal && dst_page->precious)) && !dst_page->list_req_pending) {
3273 if (encountered_lrp)
3274 CLUSTER_STAT(pages_at_higher_offsets++;)
3275 else
3276 CLUSTER_STAT(pages_at_lower_offsets++;)
3277 }
3278 #endif
3279 /*
3280 * Turn off busy indication on pending
3281 * pageout. Note: we can only get here
3282 * in the request pending case.
3283 */
3284 dst_page->list_req_pending = FALSE;
3285 dst_page->busy = FALSE;
3286
3287 hw_dirty = refmod_state & VM_MEM_MODIFIED;
3288 dirty = hw_dirty ? TRUE : dst_page->dirty;
3289
3290 if (dst_page->phys_page > upl->highest_page)
3291 upl->highest_page = dst_page->phys_page;
3292
3293 if (cntrl_flags & UPL_SET_LITE) {
3294 unsigned int pg_num;
3295
3296 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
3297 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
3298 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
3299
3300 if (hw_dirty)
3301 pmap_clear_modify(dst_page->phys_page);
3302
3303 /*
3304 * Mark original page as cleaning
3305 * in place.
3306 */
3307 dst_page->cleaning = TRUE;
3308 dst_page->precious = FALSE;
3309 } else {
3310 /*
3311 * use pageclean setup, it is more
3312 * convenient even for the pageout
3313 * cases here
3314 */
3315 vm_object_lock(upl->map_object);
3316 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
3317 vm_object_unlock(upl->map_object);
3318
3319 alias_page->absent = FALSE;
3320 alias_page = NULL;
3321 }
3322 #if MACH_PAGEMAP
3323 /*
3324 * Record that this page has been
3325 * written out
3326 */
3327 vm_external_state_set(object->existence_map, dst_page->offset);
3328 #endif /*MACH_PAGEMAP*/
3329 dst_page->dirty = dirty;
3330
3331 if (!dirty)
3332 dst_page->precious = TRUE;
3333
3334 if (dst_page->pageout)
3335 dst_page->busy = TRUE;
3336
3337 if ( (cntrl_flags & UPL_ENCRYPT) ) {
3338 /*
3339 * ENCRYPTED SWAP:
3340 * We want to deny access to the target page
3341 * because its contents are about to be
3342 * encrypted and the user would be very
3343 * confused to see encrypted data instead
3344 * of their data.
3345 * We also set "encrypted_cleaning" to allow
3346 * vm_pageout_scan() to demote that page
3347 * from "adjacent/clean-in-place" to
3348 * "target/clean-and-free" if it bumps into
3349 * this page during its scanning while we're
3350 * still processing this cluster.
3351 */
3352 dst_page->busy = TRUE;
3353 dst_page->encrypted_cleaning = TRUE;
3354 }
3355 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
3356 /*
3357 * deny access to the target page
3358 * while it is being worked on
3359 */
3360 if ((!dst_page->pageout) && ( !VM_PAGE_WIRED(dst_page))) {
3361 dst_page->busy = TRUE;
3362 dst_page->pageout = TRUE;
3363
3364 dwp->dw_mask |= DW_vm_page_wire;
3365 }
3366 }
3367 } else {
3368 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
3369 /*
3370 * Honor copy-on-write obligations
3371 *
3372 * The copy object has changed since we
3373 * last synchronized for copy-on-write.
3374 * Another copy object might have been
3375 * inserted while we released the object's
3376 * lock. Since someone could have seen the
3377 * original contents of the remaining pages
3378 * through that new object, we have to
3379 * synchronize with it again for the remaining
3380 * pages only. The previous pages are "busy"
3381 * so they can not be seen through the new
3382 * mapping. The new mapping will see our
3383 * upcoming changes for those previous pages,
3384 * but that's OK since they couldn't see what
3385 * was there before. It's just a race anyway
3386 * and there's no guarantee of consistency or
3387 * atomicity. We just don't want new mappings
3388 * to see both the *before* and *after* pages.
3389 */
3390 if (object->copy != VM_OBJECT_NULL) {
3391 vm_object_update(
3392 object,
3393 dst_offset,/* current offset */
3394 xfer_size, /* remaining size */
3395 NULL,
3396 NULL,
3397 FALSE, /* should_return */
3398 MEMORY_OBJECT_COPY_SYNC,
3399 VM_PROT_NO_CHANGE);
3400
3401 #if DEVELOPMENT || DEBUG
3402 upl_cow_again++;
3403 upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
3404 #endif
3405 }
3406 /*
3407 * remember the copy object we synced with
3408 */
3409 last_copy_object = object->copy;
3410 }
3411 dst_page = vm_page_lookup(object, dst_offset);
3412
3413 if (dst_page != VM_PAGE_NULL) {
3414
3415 if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
3416
3417 if ( !(dst_page->absent && dst_page->list_req_pending) ) {
3418 /*
3419 * skip over pages already present in the cache
3420 */
3421 if (user_page_list)
3422 user_page_list[entry].phys_addr = 0;
3423
3424 goto try_next_page;
3425 }
3426 }
3427 if ( !(dst_page->list_req_pending) ) {
3428
3429 if (dst_page->cleaning) {
3430 /*
3431 * someone else is writing to the page... wait...
3432 */
3433 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
3434
3435 continue;
3436 }
3437 } else {
3438 if (dst_page->fictitious &&
3439 dst_page->phys_page == vm_page_fictitious_addr) {
3440 assert( !dst_page->speculative);
3441 /*
3442 * dump the fictitious page
3443 */
3444 dst_page->list_req_pending = FALSE;
3445
3446 VM_PAGE_FREE(dst_page);
3447
3448 dst_page = NULL;
3449
3450 } else if (dst_page->absent) {
3451 /*
3452 * the default_pager case
3453 */
3454 dst_page->list_req_pending = FALSE;
3455 dst_page->busy = FALSE;
3456
3457 } else if (dst_page->pageout || dst_page->cleaning) {
3458 /*
3459 * page was earmarked by vm_pageout_scan
3460 * to be cleaned and stolen... we're going
3461 * to take it back since we are not attempting
3462 * to read that page and we don't want to stall
3463 * waiting for it to be cleaned for 2 reasons...
3464 * 1 - no use paging it out and back in
3465 * 2 - if we stall, we may casue a deadlock in
3466 * the FS trying to acquire the its locks
3467 * on the VNOP_PAGEOUT path presuming that
3468 * those locks are already held on the read
3469 * path before trying to create this UPL
3470 *
3471 * so undo all of the state that vm_pageout_scan
3472 * hung on this page
3473 */
3474 dst_page->busy = FALSE;
3475
3476 vm_pageout_queue_steal(dst_page, FALSE);
3477 }
3478 }
3479 }
3480 if (dst_page == VM_PAGE_NULL) {
3481 if (object->private) {
3482 /*
3483 * This is a nasty wrinkle for users
3484 * of upl who encounter device or
3485 * private memory however, it is
3486 * unavoidable, only a fault can
3487 * resolve the actual backing
3488 * physical page by asking the
3489 * backing device.
3490 */
3491 if (user_page_list)
3492 user_page_list[entry].phys_addr = 0;
3493
3494 goto try_next_page;
3495 }
3496 /*
3497 * need to allocate a page
3498 */
3499 dst_page = vm_page_grab();
3500
3501 if (dst_page == VM_PAGE_NULL) {
3502 if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
3503 /*
3504 * we don't want to stall waiting for pages to come onto the free list
3505 * while we're already holding absent pages in this UPL
3506 * the caller will deal with the empty slots
3507 */
3508 if (user_page_list)
3509 user_page_list[entry].phys_addr = 0;
3510
3511 goto try_next_page;
3512 }
3513 /*
3514 * no pages available... wait
3515 * then try again for the same
3516 * offset...
3517 */
3518 vm_object_unlock(object);
3519 VM_PAGE_WAIT();
3520 vm_object_lock(object);
3521
3522 continue;
3523 }
3524 vm_page_insert(dst_page, object, dst_offset);
3525
3526 dst_page->absent = TRUE;
3527 dst_page->busy = FALSE;
3528
3529 if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
3530 /*
3531 * if UPL_RET_ONLY_ABSENT was specified,
3532 * than we're definitely setting up a
3533 * upl for a clustered read/pagein
3534 * operation... mark the pages as clustered
3535 * so upl_commit_range can put them on the
3536 * speculative list
3537 */
3538 dst_page->clustered = TRUE;
3539 }
3540 }
3541 if (dst_page->fictitious) {
3542 panic("need corner case for fictitious page");
3543 }
3544 if (dst_page->busy) {
3545 /*
3546 * someone else is playing with the
3547 * page. We will have to wait.
3548 */
3549 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
3550
3551 continue;
3552 }
3553 /*
3554 * ENCRYPTED SWAP:
3555 */
3556 if (cntrl_flags & UPL_ENCRYPT) {
3557 /*
3558 * The page is going to be encrypted when we
3559 * get it from the pager, so mark it so.
3560 */
3561 dst_page->encrypted = TRUE;
3562 } else {
3563 /*
3564 * Otherwise, the page will not contain
3565 * encrypted data.
3566 */
3567 dst_page->encrypted = FALSE;
3568 }
3569 dst_page->overwriting = TRUE;
3570
3571 if (dst_page->pmapped) {
3572 if ( !(cntrl_flags & UPL_FILE_IO))
3573 /*
3574 * eliminate all mappings from the
3575 * original object and its prodigy
3576 */
3577 refmod_state = pmap_disconnect(dst_page->phys_page);
3578 else
3579 refmod_state = pmap_get_refmod(dst_page->phys_page);
3580 } else
3581 refmod_state = 0;
3582
3583 hw_dirty = refmod_state & VM_MEM_MODIFIED;
3584 dirty = hw_dirty ? TRUE : dst_page->dirty;
3585
3586 if (cntrl_flags & UPL_SET_LITE) {
3587 unsigned int pg_num;
3588
3589 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
3590 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
3591 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
3592
3593 if (hw_dirty)
3594 pmap_clear_modify(dst_page->phys_page);
3595
3596 /*
3597 * Mark original page as cleaning
3598 * in place.
3599 */
3600 dst_page->cleaning = TRUE;
3601 dst_page->precious = FALSE;
3602 } else {
3603 /*
3604 * use pageclean setup, it is more
3605 * convenient even for the pageout
3606 * cases here
3607 */
3608 vm_object_lock(upl->map_object);
3609 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
3610 vm_object_unlock(upl->map_object);
3611
3612 alias_page->absent = FALSE;
3613 alias_page = NULL;
3614 }
3615
3616 if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
3617 /*
3618 * clean in place for read implies
3619 * that a write will be done on all
3620 * the pages that are dirty before
3621 * a upl commit is done. The caller
3622 * is obligated to preserve the
3623 * contents of all pages marked dirty
3624 */
3625 upl->flags |= UPL_CLEAR_DIRTY;
3626 }
3627 dst_page->dirty = dirty;
3628
3629 if (!dirty)
3630 dst_page->precious = TRUE;
3631
3632 if ( !VM_PAGE_WIRED(dst_page)) {
3633 /*
3634 * deny access to the target page while
3635 * it is being worked on
3636 */
3637 dst_page->busy = TRUE;
3638 } else
3639 dwp->dw_mask |= DW_vm_page_wire;
3640
3641 /*
3642 * We might be about to satisfy a fault which has been
3643 * requested. So no need for the "restart" bit.
3644 */
3645 dst_page->restart = FALSE;
3646 if (!dst_page->absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
3647 /*
3648 * expect the page to be used
3649 */
3650 dwp->dw_mask |= DW_set_reference;
3651 }
3652 dst_page->precious = (cntrl_flags & UPL_PRECIOUS) ? TRUE : FALSE;
3653 }
3654 if (dst_page->phys_page > upl->highest_page)
3655 upl->highest_page = dst_page->phys_page;
3656 if (user_page_list) {
3657 user_page_list[entry].phys_addr = dst_page->phys_page;
3658 user_page_list[entry].pageout = dst_page->pageout;
3659 user_page_list[entry].absent = dst_page->absent;
3660 user_page_list[entry].dirty = dst_page->dirty;
3661 user_page_list[entry].precious = dst_page->precious;
3662 user_page_list[entry].device = FALSE;
3663 if (dst_page->clustered == TRUE)
3664 user_page_list[entry].speculative = dst_page->speculative;
3665 else
3666 user_page_list[entry].speculative = FALSE;
3667 user_page_list[entry].cs_validated = dst_page->cs_validated;
3668 user_page_list[entry].cs_tainted = dst_page->cs_tainted;
3669 }
3670 /*
3671 * if UPL_RET_ONLY_ABSENT is set, then
3672 * we are working with a fresh page and we've
3673 * just set the clustered flag on it to
3674 * indicate that it was drug in as part of a
3675 * speculative cluster... so leave it alone
3676 */
3677 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
3678 /*
3679 * someone is explicitly grabbing this page...
3680 * update clustered and speculative state
3681 *
3682 */
3683 VM_PAGE_CONSUME_CLUSTERED(dst_page);
3684 }
3685 try_next_page:
3686 if (dwp->dw_mask) {
3687 if (dwp->dw_mask & DW_vm_page_activate)
3688 VM_STAT_INCR(reactivations);
3689
3690 if (dst_page->busy == FALSE) {
3691 /*
3692 * dw_do_work may need to drop the object lock
3693 * if it does, we need the pages it's looking at to
3694 * be held stable via the busy bit.
3695 */
3696 dst_page->busy = TRUE;
3697 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
3698 }
3699 dwp->dw_m = dst_page;
3700 dwp++;
3701 dw_count++;
3702
3703 if (dw_count >= DELAYED_WORK_LIMIT) {
3704 dw_do_work(object, &dw_array[0], dw_count);
3705
3706 dwp = &dw_array[0];
3707 dw_count = 0;
3708 }
3709 }
3710 entry++;
3711 dst_offset += PAGE_SIZE_64;
3712 xfer_size -= PAGE_SIZE;
3713 }
3714 if (dw_count)
3715 dw_do_work(object, &dw_array[0], dw_count);
3716
3717 if (alias_page != NULL) {
3718 VM_PAGE_FREE(alias_page);
3719 }
3720
3721 if (page_list_count != NULL) {
3722 if (upl->flags & UPL_INTERNAL)
3723 *page_list_count = 0;
3724 else if (*page_list_count > entry)
3725 *page_list_count = entry;
3726 }
3727 #if UPL_DEBUG
3728 upl->upl_state = 1;
3729 #endif
3730 vm_object_unlock(object);
3731
3732 return KERN_SUCCESS;
3733 }
3734
3735 /* JMM - Backward compatability for now */
3736 kern_return_t
3737 vm_fault_list_request( /* forward */
3738 memory_object_control_t control,
3739 vm_object_offset_t offset,
3740 upl_size_t size,
3741 upl_t *upl_ptr,
3742 upl_page_info_t **user_page_list_ptr,
3743 unsigned int page_list_count,
3744 int cntrl_flags);
3745 kern_return_t
3746 vm_fault_list_request(
3747 memory_object_control_t control,
3748 vm_object_offset_t offset,
3749 upl_size_t size,
3750 upl_t *upl_ptr,
3751 upl_page_info_t **user_page_list_ptr,
3752 unsigned int page_list_count,
3753 int cntrl_flags)
3754 {
3755 unsigned int local_list_count;
3756 upl_page_info_t *user_page_list;
3757 kern_return_t kr;
3758
3759 if((cntrl_flags & UPL_VECTOR)==UPL_VECTOR)
3760 return KERN_INVALID_ARGUMENT;
3761
3762 if (user_page_list_ptr != NULL) {
3763 local_list_count = page_list_count;
3764 user_page_list = *user_page_list_ptr;
3765 } else {
3766 local_list_count = 0;
3767 user_page_list = NULL;
3768 }
3769 kr = memory_object_upl_request(control,
3770 offset,
3771 size,
3772 upl_ptr,
3773 user_page_list,
3774 &local_list_count,
3775 cntrl_flags);
3776
3777 if(kr != KERN_SUCCESS)
3778 return kr;
3779
3780 if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) {
3781 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr);
3782 }
3783
3784 return KERN_SUCCESS;
3785 }
3786
3787
3788
3789 /*
3790 * Routine: vm_object_super_upl_request
3791 * Purpose:
3792 * Cause the population of a portion of a vm_object
3793 * in much the same way as memory_object_upl_request.
3794 * Depending on the nature of the request, the pages
3795 * returned may be contain valid data or be uninitialized.
3796 * However, the region may be expanded up to the super
3797 * cluster size provided.
3798 */
3799
3800 __private_extern__ kern_return_t
3801 vm_object_super_upl_request(
3802 vm_object_t object,
3803 vm_object_offset_t offset,
3804 upl_size_t size,
3805 upl_size_t super_cluster,
3806 upl_t *upl,
3807 upl_page_info_t *user_page_list,
3808 unsigned int *page_list_count,
3809 int cntrl_flags)
3810 {
3811 if (object->paging_offset > offset || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR))
3812 return KERN_FAILURE;
3813
3814 assert(object->paging_in_progress);
3815 offset = offset - object->paging_offset;
3816
3817 if (super_cluster > size) {
3818
3819 vm_object_offset_t base_offset;
3820 upl_size_t super_size;
3821 vm_object_size_t super_size_64;
3822
3823 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
3824 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
3825 super_size_64 = ((base_offset + super_size) > object->size) ? (object->size - base_offset) : super_size;
3826 super_size = (upl_size_t) super_size_64;
3827 assert(super_size == super_size_64);
3828
3829 if (offset > (base_offset + super_size)) {
3830 panic("vm_object_super_upl_request: Missed target pageout"
3831 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
3832 offset, base_offset, super_size, super_cluster,
3833 size, object->paging_offset);
3834 }
3835 /*
3836 * apparently there is a case where the vm requests a
3837 * page to be written out who's offset is beyond the
3838 * object size
3839 */
3840 if ((offset + size) > (base_offset + super_size)) {
3841 super_size_64 = (offset + size) - base_offset;
3842 super_size = (upl_size_t) super_size_64;
3843 assert(super_size == super_size_64);
3844 }
3845
3846 offset = base_offset;
3847 size = super_size;
3848 }
3849 return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags);
3850 }
3851
3852
3853 kern_return_t
3854 vm_map_create_upl(
3855 vm_map_t map,
3856 vm_map_address_t offset,
3857 upl_size_t *upl_size,
3858 upl_t *upl,
3859 upl_page_info_array_t page_list,
3860 unsigned int *count,
3861 int *flags)
3862 {
3863 vm_map_entry_t entry;
3864 int caller_flags;
3865 int force_data_sync;
3866 int sync_cow_data;
3867 vm_object_t local_object;
3868 vm_map_offset_t local_offset;
3869 vm_map_offset_t local_start;
3870 kern_return_t ret;
3871
3872 caller_flags = *flags;
3873
3874 if (caller_flags & ~UPL_VALID_FLAGS) {
3875 /*
3876 * For forward compatibility's sake,
3877 * reject any unknown flag.
3878 */
3879 return KERN_INVALID_VALUE;
3880 }
3881 force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
3882 sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
3883
3884 if (upl == NULL)
3885 return KERN_INVALID_ARGUMENT;
3886
3887 REDISCOVER_ENTRY:
3888 vm_map_lock_read(map);
3889
3890 if (vm_map_lookup_entry(map, offset, &entry)) {
3891
3892 if ((entry->vme_end - offset) < *upl_size) {
3893 *upl_size = (upl_size_t) (entry->vme_end - offset);
3894 assert(*upl_size == entry->vme_end - offset);
3895 }
3896
3897 if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
3898 *flags = 0;
3899
3900 if ( !entry->is_sub_map && entry->object.vm_object != VM_OBJECT_NULL) {
3901 if (entry->object.vm_object->private)
3902 *flags = UPL_DEV_MEMORY;
3903
3904 if (entry->object.vm_object->phys_contiguous)
3905 *flags |= UPL_PHYS_CONTIG;
3906 }
3907 vm_map_unlock_read(map);
3908
3909 return KERN_SUCCESS;
3910 }
3911 if (entry->object.vm_object == VM_OBJECT_NULL || !entry->object.vm_object->phys_contiguous) {
3912 if ((*upl_size/PAGE_SIZE) > MAX_UPL_SIZE)
3913 *upl_size = MAX_UPL_SIZE * PAGE_SIZE;
3914 }
3915 /*
3916 * Create an object if necessary.
3917 */
3918 if (entry->object.vm_object == VM_OBJECT_NULL) {
3919
3920 if (vm_map_lock_read_to_write(map))
3921 goto REDISCOVER_ENTRY;
3922
3923 entry->object.vm_object = vm_object_allocate((vm_size_t)(entry->vme_end - entry->vme_start));
3924 entry->offset = 0;
3925
3926 vm_map_lock_write_to_read(map);
3927 }
3928 if (!(caller_flags & UPL_COPYOUT_FROM)) {
3929 if (!(entry->protection & VM_PROT_WRITE)) {
3930 vm_map_unlock_read(map);
3931 return KERN_PROTECTION_FAILURE;
3932 }
3933 if (entry->needs_copy) {
3934 /*
3935 * Honor copy-on-write for COPY_SYMMETRIC
3936 * strategy.
3937 */
3938 vm_map_t local_map;
3939 vm_object_t object;
3940 vm_object_offset_t new_offset;
3941 vm_prot_t prot;
3942 boolean_t wired;
3943 vm_map_version_t version;
3944 vm_map_t real_map;
3945
3946 local_map = map;
3947
3948 if (vm_map_lookup_locked(&local_map,
3949 offset, VM_PROT_WRITE,
3950 OBJECT_LOCK_EXCLUSIVE,
3951 &version, &object,
3952 &new_offset, &prot, &wired,
3953 NULL,
3954 &real_map) != KERN_SUCCESS) {
3955 vm_map_unlock_read(local_map);
3956 return KERN_FAILURE;
3957 }
3958 if (real_map != map)
3959 vm_map_unlock(real_map);
3960 vm_map_unlock_read(local_map);
3961
3962 vm_object_unlock(object);
3963
3964 goto REDISCOVER_ENTRY;
3965 }
3966 }
3967 if (entry->is_sub_map) {
3968 vm_map_t submap;
3969
3970 submap = entry->object.sub_map;
3971 local_start = entry->vme_start;
3972 local_offset = entry->offset;
3973
3974 vm_map_reference(submap);
3975 vm_map_unlock_read(map);
3976
3977 ret = vm_map_create_upl(submap,
3978 local_offset + (offset - local_start),
3979 upl_size, upl, page_list, count, flags);
3980 vm_map_deallocate(submap);
3981
3982 return ret;
3983 }
3984 if (sync_cow_data) {
3985 if (entry->object.vm_object->shadow || entry->object.vm_object->copy) {
3986 local_object = entry->object.vm_object;
3987 local_start = entry->vme_start;
3988 local_offset = entry->offset;
3989
3990 vm_object_reference(local_object);
3991 vm_map_unlock_read(map);
3992
3993 if (local_object->shadow && local_object->copy) {
3994 vm_object_lock_request(
3995 local_object->shadow,
3996 (vm_object_offset_t)
3997 ((offset - local_start) +
3998 local_offset) +
3999 local_object->shadow_offset,
4000 *upl_size, FALSE,
4001 MEMORY_OBJECT_DATA_SYNC,
4002 VM_PROT_NO_CHANGE);
4003 }
4004 sync_cow_data = FALSE;
4005 vm_object_deallocate(local_object);
4006
4007 goto REDISCOVER_ENTRY;
4008 }
4009 }
4010 if (force_data_sync) {
4011 local_object = entry->object.vm_object;
4012 local_start = entry->vme_start;
4013 local_offset = entry->offset;
4014
4015 vm_object_reference(local_object);
4016 vm_map_unlock_read(map);
4017
4018 vm_object_lock_request(
4019 local_object,
4020 (vm_object_offset_t)
4021 ((offset - local_start) + local_offset),
4022 (vm_object_size_t)*upl_size, FALSE,
4023 MEMORY_OBJECT_DATA_SYNC,
4024 VM_PROT_NO_CHANGE);
4025
4026 force_data_sync = FALSE;
4027 vm_object_deallocate(local_object);
4028
4029 goto REDISCOVER_ENTRY;
4030 }
4031 if (entry->object.vm_object->private)
4032 *flags = UPL_DEV_MEMORY;
4033 else
4034 *flags = 0;
4035
4036 if (entry->object.vm_object->phys_contiguous)
4037 *flags |= UPL_PHYS_CONTIG;
4038
4039 local_object = entry->object.vm_object;
4040 local_offset = entry->offset;
4041 local_start = entry->vme_start;
4042
4043 vm_object_reference(local_object);
4044 vm_map_unlock_read(map);
4045
4046 ret = vm_object_iopl_request(local_object,
4047 (vm_object_offset_t) ((offset - local_start) + local_offset),
4048 *upl_size,
4049 upl,
4050 page_list,
4051 count,
4052 caller_flags);
4053 vm_object_deallocate(local_object);
4054
4055 return(ret);
4056 }
4057 vm_map_unlock_read(map);
4058
4059 return(KERN_FAILURE);
4060 }
4061
4062 /*
4063 * Internal routine to enter a UPL into a VM map.
4064 *
4065 * JMM - This should just be doable through the standard
4066 * vm_map_enter() API.
4067 */
4068 kern_return_t
4069 vm_map_enter_upl(
4070 vm_map_t map,
4071 upl_t upl,
4072 vm_map_offset_t *dst_addr)
4073 {
4074 vm_map_size_t size;
4075 vm_object_offset_t offset;
4076 vm_map_offset_t addr;
4077 vm_page_t m;
4078 kern_return_t kr;
4079 int isVectorUPL = 0, curr_upl=0;
4080 upl_t vector_upl = NULL;
4081 vm_offset_t vector_upl_dst_addr = 0;
4082 vm_map_t vector_upl_submap = NULL;
4083 upl_offset_t subupl_offset = 0;
4084 upl_size_t subupl_size = 0;
4085
4086 if (upl == UPL_NULL)
4087 return KERN_INVALID_ARGUMENT;
4088
4089 if((isVectorUPL = vector_upl_is_valid(upl))) {
4090 int mapped=0,valid_upls=0;
4091 vector_upl = upl;
4092
4093 upl_lock(vector_upl);
4094 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
4095 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
4096 if(upl == NULL)
4097 continue;
4098 valid_upls++;
4099 if (UPL_PAGE_LIST_MAPPED & upl->flags)
4100 mapped++;
4101 }
4102
4103 if(mapped) {
4104 if(mapped != valid_upls)
4105 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls);
4106 else {
4107 upl_unlock(vector_upl);
4108 return KERN_FAILURE;
4109 }
4110 }
4111
4112 kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE, VM_FLAGS_ANYWHERE, &vector_upl_submap);
4113 if( kr != KERN_SUCCESS )
4114 panic("Vector UPL submap allocation failed\n");
4115 map = vector_upl_submap;
4116 vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
4117 curr_upl=0;
4118 }
4119 else
4120 upl_lock(upl);
4121
4122 process_upl_to_enter:
4123 if(isVectorUPL){
4124 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
4125 *dst_addr = vector_upl_dst_addr;
4126 upl_unlock(vector_upl);
4127 return KERN_SUCCESS;
4128 }
4129 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
4130 if(upl == NULL)
4131 goto process_upl_to_enter;
4132 vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
4133 *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
4134 }
4135
4136 /*
4137 * check to see if already mapped
4138 */
4139 if (UPL_PAGE_LIST_MAPPED & upl->flags) {
4140 upl_unlock(upl);
4141 return KERN_FAILURE;
4142 }
4143
4144 if ((!(upl->flags & UPL_SHADOWED)) && !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) ||
4145 (upl->map_object->phys_contiguous))) {
4146 vm_object_t object;
4147 vm_page_t alias_page;
4148 vm_object_offset_t new_offset;
4149 unsigned int pg_num;
4150 wpl_array_t lite_list;
4151
4152 if (upl->flags & UPL_INTERNAL) {
4153 lite_list = (wpl_array_t)
4154 ((((uintptr_t)upl) + sizeof(struct upl))
4155 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4156 } else {
4157 lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
4158 }
4159 object = upl->map_object;
4160 upl->map_object = vm_object_allocate(upl->size);
4161
4162 vm_object_lock(upl->map_object);
4163
4164 upl->map_object->shadow = object;
4165 upl->map_object->pageout = TRUE;
4166 upl->map_object->can_persist = FALSE;
4167 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
4168 upl->map_object->shadow_offset = upl->offset - object->paging_offset;
4169 upl->map_object->wimg_bits = object->wimg_bits;
4170 offset = upl->map_object->shadow_offset;
4171 new_offset = 0;
4172 size = upl->size;
4173
4174 upl->flags |= UPL_SHADOWED;
4175
4176 while (size) {
4177 pg_num = (unsigned int) (new_offset / PAGE_SIZE);
4178 assert(pg_num == new_offset / PAGE_SIZE);
4179
4180 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4181
4182 VM_PAGE_GRAB_FICTITIOUS(alias_page);
4183
4184 vm_object_lock(object);
4185
4186 m = vm_page_lookup(object, offset);
4187 if (m == VM_PAGE_NULL) {
4188 panic("vm_upl_map: page missing\n");
4189 }
4190
4191 /*
4192 * Convert the fictitious page to a private
4193 * shadow of the real page.
4194 */
4195 assert(alias_page->fictitious);
4196 alias_page->fictitious = FALSE;
4197 alias_page->private = TRUE;
4198 alias_page->pageout = TRUE;
4199 /*
4200 * since m is a page in the upl it must
4201 * already be wired or BUSY, so it's
4202 * safe to assign the underlying physical
4203 * page to the alias
4204 */
4205 alias_page->phys_page = m->phys_page;
4206
4207 vm_object_unlock(object);
4208
4209 vm_page_lockspin_queues();
4210 vm_page_wire(alias_page);
4211 vm_page_unlock_queues();
4212
4213 /*
4214 * ENCRYPTED SWAP:
4215 * The virtual page ("m") has to be wired in some way
4216 * here or its physical page ("m->phys_page") could
4217 * be recycled at any time.
4218 * Assuming this is enforced by the caller, we can't
4219 * get an encrypted page here. Since the encryption
4220 * key depends on the VM page's "pager" object and
4221 * the "paging_offset", we couldn't handle 2 pageable
4222 * VM pages (with different pagers and paging_offsets)
4223 * sharing the same physical page: we could end up
4224 * encrypting with one key (via one VM page) and
4225 * decrypting with another key (via the alias VM page).
4226 */
4227 ASSERT_PAGE_DECRYPTED(m);
4228
4229 vm_page_insert(alias_page, upl->map_object, new_offset);
4230
4231 assert(!alias_page->wanted);
4232 alias_page->busy = FALSE;
4233 alias_page->absent = FALSE;
4234 }
4235 size -= PAGE_SIZE;
4236 offset += PAGE_SIZE_64;
4237 new_offset += PAGE_SIZE_64;
4238 }
4239 vm_object_unlock(upl->map_object);
4240 }
4241 if ((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || upl->map_object->phys_contiguous)
4242 offset = upl->offset - upl->map_object->paging_offset;
4243 else
4244 offset = 0;
4245 size = upl->size;
4246
4247 vm_object_reference(upl->map_object);
4248
4249 if(!isVectorUPL) {
4250 *dst_addr = 0;
4251 /*
4252 * NEED A UPL_MAP ALIAS
4253 */
4254 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
4255 VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE,
4256 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
4257 }
4258 else {
4259 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
4260 VM_FLAGS_FIXED, upl->map_object, offset, FALSE,
4261 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
4262 if(kr)
4263 panic("vm_map_enter failed for a Vector UPL\n");
4264 }
4265
4266 if (kr != KERN_SUCCESS) {
4267 upl_unlock(upl);
4268 return(kr);
4269 }
4270 vm_object_lock(upl->map_object);
4271
4272 for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
4273 m = vm_page_lookup(upl->map_object, offset);
4274
4275 if (m) {
4276 unsigned int cache_attr;
4277 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
4278
4279 m->pmapped = TRUE;
4280
4281 /* CODE SIGNING ENFORCEMENT: page has been wpmapped,
4282 * but only in kernel space. If this was on a user map,
4283 * we'd have to set the wpmapped bit. */
4284 /* m->wpmapped = TRUE; */
4285 assert(map==kernel_map);
4286
4287 PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, cache_attr, TRUE);
4288 }
4289 offset += PAGE_SIZE_64;
4290 }
4291 vm_object_unlock(upl->map_object);
4292
4293 /*
4294 * hold a reference for the mapping
4295 */
4296 upl->ref_count++;
4297 upl->flags |= UPL_PAGE_LIST_MAPPED;
4298 upl->kaddr = (vm_offset_t) *dst_addr;
4299 assert(upl->kaddr == *dst_addr);
4300
4301 if(!isVectorUPL)
4302 upl_unlock(upl);
4303 else
4304 goto process_upl_to_enter;
4305
4306 return KERN_SUCCESS;
4307 }
4308
4309 /*
4310 * Internal routine to remove a UPL mapping from a VM map.
4311 *
4312 * XXX - This should just be doable through a standard
4313 * vm_map_remove() operation. Otherwise, implicit clean-up
4314 * of the target map won't be able to correctly remove
4315 * these (and release the reference on the UPL). Having
4316 * to do this means we can't map these into user-space
4317 * maps yet.
4318 */
4319 kern_return_t
4320 vm_map_remove_upl(
4321 vm_map_t map,
4322 upl_t upl)
4323 {
4324 vm_address_t addr;
4325 upl_size_t size;
4326 int isVectorUPL = 0, curr_upl = 0;
4327 upl_t vector_upl = NULL;
4328
4329 if (upl == UPL_NULL)
4330 return KERN_INVALID_ARGUMENT;
4331
4332 if((isVectorUPL = vector_upl_is_valid(upl))) {
4333 int unmapped=0, valid_upls=0;
4334 vector_upl = upl;
4335 upl_lock(vector_upl);
4336 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
4337 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
4338 if(upl == NULL)
4339 continue;
4340 valid_upls++;
4341 if (!(UPL_PAGE_LIST_MAPPED & upl->flags))
4342 unmapped++;
4343 }
4344
4345 if(unmapped) {
4346 if(unmapped != valid_upls)
4347 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls);
4348 else {
4349 upl_unlock(vector_upl);
4350 return KERN_FAILURE;
4351 }
4352 }
4353 curr_upl=0;
4354 }
4355 else
4356 upl_lock(upl);
4357
4358 process_upl_to_remove:
4359 if(isVectorUPL) {
4360 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
4361 vm_map_t v_upl_submap;
4362 vm_offset_t v_upl_submap_dst_addr;
4363 vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
4364
4365 vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_NO_FLAGS);
4366 vm_map_deallocate(v_upl_submap);
4367 upl_unlock(vector_upl);
4368 return KERN_SUCCESS;
4369 }
4370
4371 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
4372 if(upl == NULL)
4373 goto process_upl_to_remove;
4374 }
4375
4376 if (upl->flags & UPL_PAGE_LIST_MAPPED) {
4377 addr = upl->kaddr;
4378 size = upl->size;
4379
4380 assert(upl->ref_count > 1);
4381 upl->ref_count--; /* removing mapping ref */
4382
4383 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
4384 upl->kaddr = (vm_offset_t) 0;
4385
4386 if(!isVectorUPL) {
4387 upl_unlock(upl);
4388
4389 vm_map_remove(map,
4390 vm_map_trunc_page(addr),
4391 vm_map_round_page(addr + size),
4392 VM_MAP_NO_FLAGS);
4393
4394 return KERN_SUCCESS;
4395 }
4396 else {
4397 /*
4398 * If it's a Vectored UPL, we'll be removing the entire
4399 * submap anyways, so no need to remove individual UPL
4400 * element mappings from within the submap
4401 */
4402 goto process_upl_to_remove;
4403 }
4404 }
4405 upl_unlock(upl);
4406
4407 return KERN_FAILURE;
4408 }
4409
4410 static void
4411 dw_do_work(
4412 vm_object_t object,
4413 struct dw *dwp,
4414 int dw_count)
4415 {
4416 int j;
4417 boolean_t held_as_spin = TRUE;
4418
4419 /*
4420 * pageout_scan takes the vm_page_lock_queues first
4421 * then tries for the object lock... to avoid what
4422 * is effectively a lock inversion, we'll go to the
4423 * trouble of taking them in that same order... otherwise
4424 * if this object contains the majority of the pages resident
4425 * in the UBC (or a small set of large objects actively being
4426 * worked on contain the majority of the pages), we could
4427 * cause the pageout_scan thread to 'starve' in its attempt
4428 * to find pages to move to the free queue, since it has to
4429 * successfully acquire the object lock of any candidate page
4430 * before it can steal/clean it.
4431 */
4432 if (!vm_page_trylockspin_queues()) {
4433 vm_object_unlock(object);
4434
4435 vm_page_lockspin_queues();
4436
4437 for (j = 0; ; j++) {
4438 if (!vm_object_lock_avoid(object) &&
4439 _vm_object_lock_try(object))
4440 break;
4441 vm_page_unlock_queues();
4442 mutex_pause(j);
4443 vm_page_lockspin_queues();
4444 }
4445 }
4446 for (j = 0; j < dw_count; j++, dwp++) {
4447
4448 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4449 vm_pageout_throttle_up(dwp->dw_m);
4450
4451 if (dwp->dw_mask & DW_vm_page_wire)
4452 vm_page_wire(dwp->dw_m);
4453 else if (dwp->dw_mask & DW_vm_page_unwire) {
4454 boolean_t queueit;
4455
4456 queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
4457
4458 vm_page_unwire(dwp->dw_m, queueit);
4459 }
4460 if (dwp->dw_mask & DW_vm_page_free) {
4461 if (held_as_spin == TRUE) {
4462 vm_page_lockconvert_queues();
4463 held_as_spin = FALSE;
4464 }
4465 vm_page_free(dwp->dw_m);
4466 } else {
4467 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4468 vm_page_deactivate_internal(dwp->dw_m, FALSE);
4469 else if (dwp->dw_mask & DW_vm_page_activate)
4470 vm_page_activate(dwp->dw_m);
4471 else if (dwp->dw_mask & DW_vm_page_speculate)
4472 vm_page_speculate(dwp->dw_m, TRUE);
4473 else if (dwp->dw_mask & DW_vm_page_lru)
4474 vm_page_lru(dwp->dw_m);
4475
4476 if (dwp->dw_mask & DW_set_reference)
4477 dwp->dw_m->reference = TRUE;
4478 else if (dwp->dw_mask & DW_clear_reference)
4479 dwp->dw_m->reference = FALSE;
4480
4481 if (dwp->dw_mask & DW_clear_busy)
4482 dwp->dw_m->busy = FALSE;
4483
4484 if (dwp->dw_mask & DW_PAGE_WAKEUP)
4485 PAGE_WAKEUP(dwp->dw_m);
4486 }
4487 }
4488 vm_page_unlock_queues();
4489 }
4490
4491
4492
4493 kern_return_t
4494 upl_commit_range(
4495 upl_t upl,
4496 upl_offset_t offset,
4497 upl_size_t size,
4498 int flags,
4499 upl_page_info_t *page_list,
4500 mach_msg_type_number_t count,
4501 boolean_t *empty)
4502 {
4503 upl_size_t xfer_size, subupl_size = size;
4504 vm_object_t shadow_object;
4505 vm_object_t object;
4506 vm_object_offset_t target_offset;
4507 upl_offset_t subupl_offset = offset;
4508 int entry;
4509 wpl_array_t lite_list;
4510 int occupied;
4511 int clear_refmod = 0;
4512 int pgpgout_count = 0;
4513 struct dw dw_array[DELAYED_WORK_LIMIT];
4514 struct dw *dwp;
4515 int dw_count, isVectorUPL = 0;
4516 upl_t vector_upl = NULL;
4517
4518 *empty = FALSE;
4519
4520 if (upl == UPL_NULL)
4521 return KERN_INVALID_ARGUMENT;
4522
4523 if (count == 0)
4524 page_list = NULL;
4525
4526 if((isVectorUPL = vector_upl_is_valid(upl))) {
4527 vector_upl = upl;
4528 upl_lock(vector_upl);
4529 }
4530 else
4531 upl_lock(upl);
4532
4533 process_upl_to_commit:
4534
4535 if(isVectorUPL) {
4536 size = subupl_size;
4537 offset = subupl_offset;
4538 if(size == 0) {
4539 upl_unlock(vector_upl);
4540 return KERN_SUCCESS;
4541 }
4542 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
4543 if(upl == NULL) {
4544 upl_unlock(vector_upl);
4545 return KERN_FAILURE;
4546 }
4547 page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl);
4548 subupl_size -= size;
4549 subupl_offset += size;
4550 }
4551
4552 #if UPL_DEBUG
4553 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
4554 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
4555
4556 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
4557 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
4558
4559 upl->upl_commit_index++;
4560 }
4561 #endif
4562 if (upl->flags & UPL_DEVICE_MEMORY)
4563 xfer_size = 0;
4564 else if ((offset + size) <= upl->size)
4565 xfer_size = size;
4566 else {
4567 if(!isVectorUPL)
4568 upl_unlock(upl);
4569 else {
4570 upl_unlock(vector_upl);
4571 }
4572 return KERN_FAILURE;
4573 }
4574 if (upl->flags & UPL_CLEAR_DIRTY)
4575 flags |= UPL_COMMIT_CLEAR_DIRTY;
4576
4577 if (upl->flags & UPL_INTERNAL)
4578 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
4579 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4580 else
4581 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
4582
4583 object = upl->map_object;
4584
4585 if (upl->flags & UPL_SHADOWED) {
4586 vm_object_lock(object);
4587 shadow_object = object->shadow;
4588 } else {
4589 shadow_object = object;
4590 }
4591 entry = offset/PAGE_SIZE;
4592 target_offset = (vm_object_offset_t)offset;
4593
4594 if (upl->flags & UPL_KERNEL_OBJECT)
4595 vm_object_lock_shared(shadow_object);
4596 else
4597 vm_object_lock(shadow_object);
4598
4599 if (upl->flags & UPL_ACCESS_BLOCKED) {
4600 assert(shadow_object->blocked_access);
4601 shadow_object->blocked_access = FALSE;
4602 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
4603 }
4604
4605 if (shadow_object->code_signed) {
4606 /*
4607 * CODE SIGNING:
4608 * If the object is code-signed, do not let this UPL tell
4609 * us if the pages are valid or not. Let the pages be
4610 * validated by VM the normal way (when they get mapped or
4611 * copied).
4612 */
4613 flags &= ~UPL_COMMIT_CS_VALIDATED;
4614 }
4615 if (! page_list) {
4616 /*
4617 * No page list to get the code-signing info from !?
4618 */
4619 flags &= ~UPL_COMMIT_CS_VALIDATED;
4620 }
4621
4622 dwp = &dw_array[0];
4623 dw_count = 0;
4624
4625 while (xfer_size) {
4626 vm_page_t t, m;
4627
4628 dwp->dw_mask = 0;
4629 clear_refmod = 0;
4630
4631 m = VM_PAGE_NULL;
4632
4633 if (upl->flags & UPL_LITE) {
4634 unsigned int pg_num;
4635
4636 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
4637 assert(pg_num == target_offset/PAGE_SIZE);
4638
4639 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4640 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
4641
4642 if (!(upl->flags & UPL_KERNEL_OBJECT))
4643 m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
4644 }
4645 }
4646 if (upl->flags & UPL_SHADOWED) {
4647 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
4648
4649 t->pageout = FALSE;
4650
4651 VM_PAGE_FREE(t);
4652
4653 if (m == VM_PAGE_NULL)
4654 m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset);
4655 }
4656 }
4657 if ((upl->flags & UPL_KERNEL_OBJECT) || m == VM_PAGE_NULL)
4658 goto commit_next_page;
4659
4660 if (flags & UPL_COMMIT_CS_VALIDATED) {
4661 /*
4662 * CODE SIGNING:
4663 * Set the code signing bits according to
4664 * what the UPL says they should be.
4665 */
4666 m->cs_validated = page_list[entry].cs_validated;
4667 m->cs_tainted = page_list[entry].cs_tainted;
4668 }
4669 if (upl->flags & UPL_IO_WIRE) {
4670
4671 dwp->dw_mask |= DW_vm_page_unwire;
4672
4673 if (page_list)
4674 page_list[entry].phys_addr = 0;
4675
4676 if (flags & UPL_COMMIT_SET_DIRTY)
4677 m->dirty = TRUE;
4678 else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
4679 m->dirty = FALSE;
4680
4681 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
4682 m->cs_validated && !m->cs_tainted) {
4683 /*
4684 * CODE SIGNING:
4685 * This page is no longer dirty
4686 * but could have been modified,
4687 * so it will need to be
4688 * re-validated.
4689 */
4690 m->cs_validated = FALSE;
4691 #if DEVELOPMENT || DEBUG
4692 vm_cs_validated_resets++;
4693 #endif
4694 pmap_disconnect(m->phys_page);
4695 }
4696 clear_refmod |= VM_MEM_MODIFIED;
4697 }
4698 if (flags & UPL_COMMIT_INACTIVATE) {
4699 dwp->dw_mask |= DW_vm_page_deactivate_internal;
4700 clear_refmod |= VM_MEM_REFERENCED;
4701 }
4702 if (upl->flags & UPL_ACCESS_BLOCKED) {
4703 /*
4704 * We blocked access to the pages in this UPL.
4705 * Clear the "busy" bit and wake up any waiter
4706 * for this page.
4707 */
4708 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
4709 }
4710 if (m->absent) {
4711 if (flags & UPL_COMMIT_FREE_ABSENT)
4712 dwp->dw_mask |= DW_vm_page_free;
4713 else
4714 m->absent = FALSE;
4715 }
4716 goto commit_next_page;
4717 }
4718 /*
4719 * make sure to clear the hardware
4720 * modify or reference bits before
4721 * releasing the BUSY bit on this page
4722 * otherwise we risk losing a legitimate
4723 * change of state
4724 */
4725 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
4726 m->dirty = FALSE;
4727
4728 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
4729 m->cs_validated && !m->cs_tainted) {
4730 /*
4731 * CODE SIGNING:
4732 * This page is no longer dirty
4733 * but could have been modified,
4734 * so it will need to be
4735 * re-validated.
4736 */
4737 m->cs_validated = FALSE;
4738 #if DEVELOPMENT || DEBUG
4739 vm_cs_validated_resets++;
4740 #endif
4741 pmap_disconnect(m->phys_page);
4742 }
4743 clear_refmod |= VM_MEM_MODIFIED;
4744 }
4745 if (page_list) {
4746 upl_page_info_t *p;
4747
4748 p = &(page_list[entry]);
4749
4750 if (p->phys_addr && p->pageout && !m->pageout) {
4751 m->busy = TRUE;
4752 m->pageout = TRUE;
4753
4754 dwp->dw_mask |= DW_vm_page_wire;
4755
4756 } else if (p->phys_addr &&
4757 !p->pageout && m->pageout &&
4758 !m->dump_cleaning) {
4759 m->pageout = FALSE;
4760 m->absent = FALSE;
4761 m->overwriting = FALSE;
4762
4763 dwp->dw_mask |= (DW_vm_page_unwire | DW_clear_busy | DW_PAGE_WAKEUP);
4764 }
4765 page_list[entry].phys_addr = 0;
4766 }
4767 m->dump_cleaning = FALSE;
4768
4769 if (m->laundry)
4770 dwp->dw_mask |= DW_vm_pageout_throttle_up;
4771
4772 if (m->pageout) {
4773 m->cleaning = FALSE;
4774 m->encrypted_cleaning = FALSE;
4775 m->pageout = FALSE;
4776 #if MACH_CLUSTER_STATS
4777 if (m->wanted) vm_pageout_target_collisions++;
4778 #endif
4779 m->dirty = FALSE;
4780
4781 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
4782 m->cs_validated && !m->cs_tainted) {
4783 /*
4784 * CODE SIGNING:
4785 * This page is no longer dirty
4786 * but could have been modified,
4787 * so it will need to be
4788 * re-validated.
4789 */
4790 m->cs_validated = FALSE;
4791 #if DEVELOPMENT || DEBUG
4792 vm_cs_validated_resets++;
4793 #endif
4794 pmap_disconnect(m->phys_page);
4795 }
4796
4797 if ((flags & UPL_COMMIT_SET_DIRTY) ||
4798 (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)))
4799 m->dirty = TRUE;
4800
4801 if (m->dirty) {
4802 /*
4803 * page was re-dirtied after we started
4804 * the pageout... reactivate it since
4805 * we don't know whether the on-disk
4806 * copy matches what is now in memory
4807 */
4808 dwp->dw_mask |= (DW_vm_page_unwire | DW_clear_busy | DW_PAGE_WAKEUP);
4809
4810 if (upl->flags & UPL_PAGEOUT) {
4811 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
4812 VM_STAT_INCR(reactivations);
4813 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
4814 }
4815 } else {
4816 /*
4817 * page has been successfully cleaned
4818 * go ahead and free it for other use
4819 */
4820
4821 if (m->object->internal) {
4822 DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
4823 } else {
4824 DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
4825 }
4826 dwp->dw_mask |= DW_vm_page_free;
4827
4828 if (upl->flags & UPL_PAGEOUT) {
4829 CLUSTER_STAT(vm_pageout_target_page_freed++;)
4830
4831 if (page_list[entry].dirty) {
4832 VM_STAT_INCR(pageouts);
4833 DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
4834 pgpgout_count++;
4835 }
4836 }
4837 }
4838 goto commit_next_page;
4839 }
4840 #if MACH_CLUSTER_STATS
4841 if (m->wpmapped)
4842 m->dirty = pmap_is_modified(m->phys_page);
4843
4844 if (m->dirty) vm_pageout_cluster_dirtied++;
4845 else vm_pageout_cluster_cleaned++;
4846 if (m->wanted) vm_pageout_cluster_collisions++;
4847 #endif
4848 m->dirty = FALSE;
4849
4850 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
4851 m->cs_validated && !m->cs_tainted) {
4852 /*
4853 * CODE SIGNING:
4854 * This page is no longer dirty
4855 * but could have been modified,
4856 * so it will need to be
4857 * re-validated.
4858 */
4859 m->cs_validated = FALSE;
4860 #if DEVELOPMENT || DEBUG
4861 vm_cs_validated_resets++;
4862 #endif
4863 pmap_disconnect(m->phys_page);
4864 }
4865
4866 if ((m->busy) && (m->cleaning)) {
4867 /*
4868 * the request_page_list case
4869 */
4870 m->absent = FALSE;
4871 m->overwriting = FALSE;
4872
4873 dwp->dw_mask |= DW_clear_busy;
4874
4875 } else if (m->overwriting) {
4876 /*
4877 * alternate request page list, write to
4878 * page_list case. Occurs when the original
4879 * page was wired at the time of the list
4880 * request
4881 */
4882 assert(VM_PAGE_WIRED(m));
4883 m->overwriting = FALSE;
4884
4885 dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */
4886 }
4887 m->cleaning = FALSE;
4888 m->encrypted_cleaning = FALSE;
4889
4890 /*
4891 * It is a part of the semantic of COPYOUT_FROM
4892 * UPLs that a commit implies cache sync
4893 * between the vm page and the backing store
4894 * this can be used to strip the precious bit
4895 * as well as clean
4896 */
4897 if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS))
4898 m->precious = FALSE;
4899
4900 if (flags & UPL_COMMIT_SET_DIRTY)
4901 m->dirty = TRUE;
4902
4903 if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) {
4904 dwp->dw_mask |= DW_vm_page_deactivate_internal;
4905 clear_refmod |= VM_MEM_REFERENCED;
4906
4907 } else if (!m->active && !m->inactive && !m->speculative) {
4908
4909 if (m->clustered || (flags & UPL_COMMIT_SPECULATE))
4910 dwp->dw_mask |= DW_vm_page_speculate;
4911 else if (m->reference)
4912 dwp->dw_mask |= DW_vm_page_activate;
4913 else {
4914 dwp->dw_mask |= DW_vm_page_deactivate_internal;
4915 clear_refmod |= VM_MEM_REFERENCED;
4916 }
4917 }
4918 if (upl->flags & UPL_ACCESS_BLOCKED) {
4919 /*
4920 * We blocked access to the pages in this URL.
4921 * Clear the "busy" bit on this page before we
4922 * wake up any waiter.
4923 */
4924 dwp->dw_mask |= DW_clear_busy;
4925 }
4926 /*
4927 * Wakeup any thread waiting for the page to be un-cleaning.
4928 */
4929 dwp->dw_mask |= DW_PAGE_WAKEUP;
4930
4931 commit_next_page:
4932 if (clear_refmod)
4933 pmap_clear_refmod(m->phys_page, clear_refmod);
4934
4935 target_offset += PAGE_SIZE_64;
4936 xfer_size -= PAGE_SIZE;
4937 entry++;
4938
4939 if (dwp->dw_mask) {
4940 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
4941 if (m->busy == FALSE) {
4942 /*
4943 * dw_do_work may need to drop the object lock
4944 * if it does, we need the pages it's looking at to
4945 * be held stable via the busy bit.
4946 */
4947 m->busy = TRUE;
4948 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
4949 }
4950 dwp->dw_m = m;
4951 dwp++;
4952 dw_count++;
4953
4954 if (dw_count >= DELAYED_WORK_LIMIT) {
4955 dw_do_work(shadow_object, &dw_array[0], dw_count);
4956
4957 dwp = &dw_array[0];
4958 dw_count = 0;
4959 }
4960 } else {
4961 if (dwp->dw_mask & DW_clear_busy)
4962 m->busy = FALSE;
4963
4964 if (dwp->dw_mask & DW_PAGE_WAKEUP)
4965 PAGE_WAKEUP(m);
4966 }
4967 }
4968 }
4969 if (dw_count)
4970 dw_do_work(shadow_object, &dw_array[0], dw_count);
4971
4972 occupied = 1;
4973
4974 if (upl->flags & UPL_DEVICE_MEMORY) {
4975 occupied = 0;
4976 } else if (upl->flags & UPL_LITE) {
4977 int pg_num;
4978 int i;
4979
4980 pg_num = upl->size/PAGE_SIZE;
4981 pg_num = (pg_num + 31) >> 5;
4982 occupied = 0;
4983
4984 for (i = 0; i < pg_num; i++) {
4985 if (lite_list[i] != 0) {
4986 occupied = 1;
4987 break;
4988 }
4989 }
4990 } else {
4991 if (queue_empty(&upl->map_object->memq))
4992 occupied = 0;
4993 }
4994 if (occupied == 0) {
4995 /*
4996 * If this UPL element belongs to a Vector UPL and is
4997 * empty, then this is the right function to deallocate
4998 * it. So go ahead set the *empty variable. The flag
4999 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
5000 * should be considered relevant for the Vector UPL and not
5001 * the internal UPLs.
5002 */
5003 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
5004 *empty = TRUE;
5005
5006 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
5007 /*
5008 * this is not a paging object
5009 * so we need to drop the paging reference
5010 * that was taken when we created the UPL
5011 * against this object
5012 */
5013 vm_object_activity_end(shadow_object);
5014 } else {
5015 /*
5016 * we dontated the paging reference to
5017 * the map object... vm_pageout_object_terminate
5018 * will drop this reference
5019 */
5020 }
5021 }
5022 vm_object_unlock(shadow_object);
5023 if (object != shadow_object)
5024 vm_object_unlock(object);
5025
5026 if(!isVectorUPL)
5027 upl_unlock(upl);
5028 else {
5029 /*
5030 * If we completed our operations on an UPL that is
5031 * part of a Vectored UPL and if empty is TRUE, then
5032 * we should go ahead and deallocate this UPL element.
5033 * Then we check if this was the last of the UPL elements
5034 * within that Vectored UPL. If so, set empty to TRUE
5035 * so that in ubc_upl_commit_range or ubc_upl_commit, we
5036 * can go ahead and deallocate the Vector UPL too.
5037 */
5038 if(*empty==TRUE) {
5039 *empty = vector_upl_set_subupl(vector_upl, upl, 0);
5040 upl_deallocate(upl);
5041 }
5042 goto process_upl_to_commit;
5043 }
5044
5045 if (pgpgout_count) {
5046 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
5047 }
5048
5049 return KERN_SUCCESS;
5050 }
5051
5052 kern_return_t
5053 upl_abort_range(
5054 upl_t upl,
5055 upl_offset_t offset,
5056 upl_size_t size,
5057 int error,
5058 boolean_t *empty)
5059 {
5060 upl_size_t xfer_size, subupl_size = size;
5061 vm_object_t shadow_object;
5062 vm_object_t object;
5063 vm_object_offset_t target_offset;
5064 upl_offset_t subupl_offset = offset;
5065 int entry;
5066 wpl_array_t lite_list;
5067 int occupied;
5068 struct dw dw_array[DELAYED_WORK_LIMIT];
5069 struct dw *dwp;
5070 int dw_count, isVectorUPL = 0;
5071 upl_t vector_upl = NULL;
5072
5073 *empty = FALSE;
5074
5075 if (upl == UPL_NULL)
5076 return KERN_INVALID_ARGUMENT;
5077
5078 if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
5079 return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty);
5080
5081 if((isVectorUPL = vector_upl_is_valid(upl))) {
5082 vector_upl = upl;
5083 upl_lock(vector_upl);
5084 }
5085 else
5086 upl_lock(upl);
5087
5088 process_upl_to_abort:
5089 if(isVectorUPL) {
5090 size = subupl_size;
5091 offset = subupl_offset;
5092 if(size == 0) {
5093 upl_unlock(vector_upl);
5094 return KERN_SUCCESS;
5095 }
5096 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
5097 if(upl == NULL) {
5098 upl_unlock(vector_upl);
5099 return KERN_FAILURE;
5100 }
5101 subupl_size -= size;
5102 subupl_offset += size;
5103 }
5104
5105 *empty = FALSE;
5106
5107 #if UPL_DEBUG
5108 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
5109 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
5110
5111 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
5112 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
5113 upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1;
5114
5115 upl->upl_commit_index++;
5116 }
5117 #endif
5118 if (upl->flags & UPL_DEVICE_MEMORY)
5119 xfer_size = 0;
5120 else if ((offset + size) <= upl->size)
5121 xfer_size = size;
5122 else {
5123 if(!isVectorUPL)
5124 upl_unlock(upl);
5125 else {
5126 upl_unlock(vector_upl);
5127 }
5128
5129 return KERN_FAILURE;
5130 }
5131 if (upl->flags & UPL_INTERNAL) {
5132 lite_list = (wpl_array_t)
5133 ((((uintptr_t)upl) + sizeof(struct upl))
5134 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
5135 } else {
5136 lite_list = (wpl_array_t)
5137 (((uintptr_t)upl) + sizeof(struct upl));
5138 }
5139 object = upl->map_object;
5140
5141 if (upl->flags & UPL_SHADOWED) {
5142 vm_object_lock(object);
5143 shadow_object = object->shadow;
5144 } else
5145 shadow_object = object;
5146
5147 entry = offset/PAGE_SIZE;
5148 target_offset = (vm_object_offset_t)offset;
5149
5150 if (upl->flags & UPL_KERNEL_OBJECT)
5151 vm_object_lock_shared(shadow_object);
5152 else
5153 vm_object_lock(shadow_object);
5154
5155 if (upl->flags & UPL_ACCESS_BLOCKED) {
5156 assert(shadow_object->blocked_access);
5157 shadow_object->blocked_access = FALSE;
5158 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
5159 }
5160
5161 dwp = &dw_array[0];
5162 dw_count = 0;
5163
5164 if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT))
5165 panic("upl_abort_range: kernel_object being DUMPED");
5166
5167 while (xfer_size) {
5168 vm_page_t t, m;
5169
5170 dwp->dw_mask = 0;
5171
5172 m = VM_PAGE_NULL;
5173
5174 if (upl->flags & UPL_LITE) {
5175 unsigned int pg_num;
5176
5177 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
5178 assert(pg_num == target_offset/PAGE_SIZE);
5179
5180
5181 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
5182 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
5183
5184 if ( !(upl->flags & UPL_KERNEL_OBJECT))
5185 m = vm_page_lookup(shadow_object, target_offset +
5186 (upl->offset - shadow_object->paging_offset));
5187 }
5188 }
5189 if (upl->flags & UPL_SHADOWED) {
5190 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
5191 t->pageout = FALSE;
5192
5193 VM_PAGE_FREE(t);
5194
5195 if (m == VM_PAGE_NULL)
5196 m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset);
5197 }
5198 }
5199 if ((upl->flags & UPL_KERNEL_OBJECT))
5200 goto abort_next_page;
5201
5202 if (m != VM_PAGE_NULL) {
5203
5204 if (m->absent) {
5205 boolean_t must_free = TRUE;
5206
5207 m->clustered = FALSE;
5208 /*
5209 * COPYOUT = FALSE case
5210 * check for error conditions which must
5211 * be passed back to the pages customer
5212 */
5213 if (error & UPL_ABORT_RESTART) {
5214 m->restart = TRUE;
5215 m->absent = FALSE;
5216 m->unusual = TRUE;
5217 must_free = FALSE;
5218 } else if (error & UPL_ABORT_UNAVAILABLE) {
5219 m->restart = FALSE;
5220 m->unusual = TRUE;
5221 must_free = FALSE;
5222 } else if (error & UPL_ABORT_ERROR) {
5223 m->restart = FALSE;
5224 m->absent = FALSE;
5225 m->error = TRUE;
5226 m->unusual = TRUE;
5227 must_free = FALSE;
5228 }
5229
5230 /*
5231 * ENCRYPTED SWAP:
5232 * If the page was already encrypted,
5233 * we don't really need to decrypt it
5234 * now. It will get decrypted later,
5235 * on demand, as soon as someone needs
5236 * to access its contents.
5237 */
5238
5239 m->cleaning = FALSE;
5240 m->encrypted_cleaning = FALSE;
5241 m->overwriting = FALSE;
5242
5243 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
5244
5245 if (must_free == TRUE)
5246 dwp->dw_mask |= DW_vm_page_free;
5247 else
5248 dwp->dw_mask |= DW_vm_page_activate;
5249 } else {
5250 /*
5251 * Handle the trusted pager throttle.
5252 */
5253 if (m->laundry)
5254 dwp->dw_mask |= DW_vm_pageout_throttle_up;
5255
5256 if (m->pageout) {
5257 assert(m->busy);
5258 assert(m->wire_count == 1);
5259 m->pageout = FALSE;
5260
5261 dwp->dw_mask |= DW_vm_page_unwire;
5262 }
5263 m->dump_cleaning = FALSE;
5264 m->cleaning = FALSE;
5265 m->encrypted_cleaning = FALSE;
5266 m->overwriting = FALSE;
5267 #if MACH_PAGEMAP
5268 vm_external_state_clr(m->object->existence_map, m->offset);
5269 #endif /* MACH_PAGEMAP */
5270 if (error & UPL_ABORT_DUMP_PAGES) {
5271 pmap_disconnect(m->phys_page);
5272
5273 dwp->dw_mask |= DW_vm_page_free;
5274 } else {
5275 if (error & UPL_ABORT_REFERENCE) {
5276 /*
5277 * we've been told to explictly
5278 * reference this page... for
5279 * file I/O, this is done by
5280 * implementing an LRU on the inactive q
5281 */
5282 dwp->dw_mask |= DW_vm_page_lru;
5283 }
5284 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
5285 }
5286 }
5287 }
5288 abort_next_page:
5289 target_offset += PAGE_SIZE_64;
5290 xfer_size -= PAGE_SIZE;
5291 entry++;
5292
5293 if (dwp->dw_mask) {
5294 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
5295 if (m->busy == FALSE) {
5296 /*
5297 * dw_do_work may need to drop the object lock
5298 * if it does, we need the pages it's looking at to
5299 * be held stable via the busy bit.
5300 */
5301 m->busy = TRUE;
5302 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
5303 }
5304 dwp->dw_m = m;
5305 dwp++;
5306 dw_count++;
5307
5308 if (dw_count >= DELAYED_WORK_LIMIT) {
5309 dw_do_work(shadow_object, &dw_array[0], dw_count);
5310
5311 dwp = &dw_array[0];
5312 dw_count = 0;
5313 }
5314 } else {
5315 if (dwp->dw_mask & DW_clear_busy)
5316 m->busy = FALSE;
5317
5318 if (dwp->dw_mask & DW_PAGE_WAKEUP)
5319 PAGE_WAKEUP(m);
5320 }
5321 }
5322 }
5323 if (dw_count)
5324 dw_do_work(shadow_object, &dw_array[0], dw_count);
5325
5326 occupied = 1;
5327
5328 if (upl->flags & UPL_DEVICE_MEMORY) {
5329 occupied = 0;
5330 } else if (upl->flags & UPL_LITE) {
5331 int pg_num;
5332 int i;
5333
5334 pg_num = upl->size/PAGE_SIZE;
5335 pg_num = (pg_num + 31) >> 5;
5336 occupied = 0;
5337
5338 for (i = 0; i < pg_num; i++) {
5339 if (lite_list[i] != 0) {
5340 occupied = 1;
5341 break;
5342 }
5343 }
5344 } else {
5345 if (queue_empty(&upl->map_object->memq))
5346 occupied = 0;
5347 }
5348 if (occupied == 0) {
5349 /*
5350 * If this UPL element belongs to a Vector UPL and is
5351 * empty, then this is the right function to deallocate
5352 * it. So go ahead set the *empty variable. The flag
5353 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
5354 * should be considered relevant for the Vector UPL and
5355 * not the internal UPLs.
5356 */
5357 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
5358 *empty = TRUE;
5359
5360 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
5361 /*
5362 * this is not a paging object
5363 * so we need to drop the paging reference
5364 * that was taken when we created the UPL
5365 * against this object
5366 */
5367 vm_object_activity_end(shadow_object);
5368 } else {
5369 /*
5370 * we dontated the paging reference to
5371 * the map object... vm_pageout_object_terminate
5372 * will drop this reference
5373 */
5374 }
5375 }
5376 vm_object_unlock(shadow_object);
5377 if (object != shadow_object)
5378 vm_object_unlock(object);
5379
5380 if(!isVectorUPL)
5381 upl_unlock(upl);
5382 else {
5383 /*
5384 * If we completed our operations on an UPL that is
5385 * part of a Vectored UPL and if empty is TRUE, then
5386 * we should go ahead and deallocate this UPL element.
5387 * Then we check if this was the last of the UPL elements
5388 * within that Vectored UPL. If so, set empty to TRUE
5389 * so that in ubc_upl_abort_range or ubc_upl_abort, we
5390 * can go ahead and deallocate the Vector UPL too.
5391 */
5392 if(*empty == TRUE) {
5393 *empty = vector_upl_set_subupl(vector_upl, upl,0);
5394 upl_deallocate(upl);
5395 }
5396 goto process_upl_to_abort;
5397 }
5398
5399 return KERN_SUCCESS;
5400 }
5401
5402
5403 kern_return_t
5404 upl_abort(
5405 upl_t upl,
5406 int error)
5407 {
5408 boolean_t empty;
5409
5410 return upl_abort_range(upl, 0, upl->size, error, &empty);
5411 }
5412
5413
5414 /* an option on commit should be wire */
5415 kern_return_t
5416 upl_commit(
5417 upl_t upl,
5418 upl_page_info_t *page_list,
5419 mach_msg_type_number_t count)
5420 {
5421 boolean_t empty;
5422
5423 return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
5424 }
5425
5426
5427 unsigned int vm_object_iopl_request_sleep_for_cleaning = 0;
5428
5429 kern_return_t
5430 vm_object_iopl_request(
5431 vm_object_t object,
5432 vm_object_offset_t offset,
5433 upl_size_t size,
5434 upl_t *upl_ptr,
5435 upl_page_info_array_t user_page_list,
5436 unsigned int *page_list_count,
5437 int cntrl_flags)
5438 {
5439 vm_page_t dst_page;
5440 vm_object_offset_t dst_offset;
5441 upl_size_t xfer_size;
5442 upl_t upl = NULL;
5443 unsigned int entry;
5444 wpl_array_t lite_list = NULL;
5445 int no_zero_fill = FALSE;
5446 u_int32_t psize;
5447 kern_return_t ret;
5448 vm_prot_t prot;
5449 struct vm_object_fault_info fault_info;
5450 struct dw dw_array[DELAYED_WORK_LIMIT];
5451 struct dw *dwp;
5452 int dw_count;
5453 int dw_index;
5454
5455 if (cntrl_flags & ~UPL_VALID_FLAGS) {
5456 /*
5457 * For forward compatibility's sake,
5458 * reject any unknown flag.
5459 */
5460 return KERN_INVALID_VALUE;
5461 }
5462 if (vm_lopage_needed == FALSE)
5463 cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
5464
5465 if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
5466 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE))
5467 return KERN_INVALID_VALUE;
5468
5469 if (object->phys_contiguous) {
5470 if ((offset + object->shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
5471 return KERN_INVALID_ADDRESS;
5472
5473 if (((offset + object->shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
5474 return KERN_INVALID_ADDRESS;
5475 }
5476 }
5477
5478 if (cntrl_flags & UPL_ENCRYPT) {
5479 /*
5480 * ENCRYPTED SWAP:
5481 * The paging path doesn't use this interface,
5482 * so we don't support the UPL_ENCRYPT flag
5483 * here. We won't encrypt the pages.
5484 */
5485 assert(! (cntrl_flags & UPL_ENCRYPT));
5486 }
5487 if (cntrl_flags & UPL_NOZEROFILL)
5488 no_zero_fill = TRUE;
5489
5490 if (cntrl_flags & UPL_COPYOUT_FROM)
5491 prot = VM_PROT_READ;
5492 else
5493 prot = VM_PROT_READ | VM_PROT_WRITE;
5494
5495 if (((size/PAGE_SIZE) > MAX_UPL_SIZE) && !object->phys_contiguous)
5496 size = MAX_UPL_SIZE * PAGE_SIZE;
5497
5498 if (cntrl_flags & UPL_SET_INTERNAL) {
5499 if (page_list_count != NULL)
5500 *page_list_count = MAX_UPL_SIZE;
5501 }
5502 if (((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
5503 ((page_list_count != NULL) && (*page_list_count != 0) && *page_list_count < (size/page_size)))
5504 return KERN_INVALID_ARGUMENT;
5505
5506 if ((!object->internal) && (object->paging_offset != 0))
5507 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
5508
5509
5510 if (object->phys_contiguous)
5511 psize = PAGE_SIZE;
5512 else
5513 psize = size;
5514
5515 if (cntrl_flags & UPL_SET_INTERNAL) {
5516 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, UPL_IO_WIRE, psize);
5517
5518 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
5519 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
5520 ((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
5521 if (size == 0) {
5522 user_page_list = NULL;
5523 lite_list = NULL;
5524 }
5525 } else {
5526 upl = upl_create(UPL_CREATE_LITE, UPL_IO_WIRE, psize);
5527
5528 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
5529 if (size == 0) {
5530 lite_list = NULL;
5531 }
5532 }
5533 if (user_page_list)
5534 user_page_list[0].device = FALSE;
5535 *upl_ptr = upl;
5536
5537 upl->map_object = object;
5538 upl->size = size;
5539
5540 if (object == kernel_object &&
5541 !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) {
5542 upl->flags |= UPL_KERNEL_OBJECT;
5543 #if UPL_DEBUG
5544 vm_object_lock(object);
5545 #else
5546 vm_object_lock_shared(object);
5547 #endif
5548 } else {
5549 vm_object_lock(object);
5550 vm_object_activity_begin(object);
5551 }
5552 /*
5553 * paging in progress also protects the paging_offset
5554 */
5555 upl->offset = offset + object->paging_offset;
5556
5557 if (cntrl_flags & UPL_BLOCK_ACCESS) {
5558 /*
5559 * The user requested that access to the pages in this URL
5560 * be blocked until the UPL is commited or aborted.
5561 */
5562 upl->flags |= UPL_ACCESS_BLOCKED;
5563 }
5564
5565 if (object->phys_contiguous) {
5566 #if UPL_DEBUG
5567 queue_enter(&object->uplq, upl, upl_t, uplq);
5568 #endif /* UPL_DEBUG */
5569
5570 if (upl->flags & UPL_ACCESS_BLOCKED) {
5571 assert(!object->blocked_access);
5572 object->blocked_access = TRUE;
5573 }
5574
5575 vm_object_unlock(object);
5576
5577 /*
5578 * don't need any shadow mappings for this one
5579 * since it is already I/O memory
5580 */
5581 upl->flags |= UPL_DEVICE_MEMORY;
5582
5583 upl->highest_page = (ppnum_t) ((offset + object->shadow_offset + size - 1)>>PAGE_SHIFT);
5584
5585 if (user_page_list) {
5586 user_page_list[0].phys_addr = (ppnum_t) ((offset + object->shadow_offset)>>PAGE_SHIFT);
5587 user_page_list[0].device = TRUE;
5588 }
5589 if (page_list_count != NULL) {
5590 if (upl->flags & UPL_INTERNAL)
5591 *page_list_count = 0;
5592 else
5593 *page_list_count = 1;
5594 }
5595 return KERN_SUCCESS;
5596 }
5597 if (object != kernel_object) {
5598 /*
5599 * Protect user space from future COW operations
5600 */
5601 object->true_share = TRUE;
5602
5603 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
5604 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
5605 }
5606
5607 #if UPL_DEBUG
5608 queue_enter(&object->uplq, upl, upl_t, uplq);
5609 #endif /* UPL_DEBUG */
5610
5611 if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
5612 object->copy != VM_OBJECT_NULL) {
5613 /*
5614 * Honor copy-on-write obligations
5615 *
5616 * The caller is gathering these pages and
5617 * might modify their contents. We need to
5618 * make sure that the copy object has its own
5619 * private copies of these pages before we let
5620 * the caller modify them.
5621 *
5622 * NOTE: someone else could map the original object
5623 * after we've done this copy-on-write here, and they
5624 * could then see an inconsistent picture of the memory
5625 * while it's being modified via the UPL. To prevent this,
5626 * we would have to block access to these pages until the
5627 * UPL is released. We could use the UPL_BLOCK_ACCESS
5628 * code path for that...
5629 */
5630 vm_object_update(object,
5631 offset,
5632 size,
5633 NULL,
5634 NULL,
5635 FALSE, /* should_return */
5636 MEMORY_OBJECT_COPY_SYNC,
5637 VM_PROT_NO_CHANGE);
5638 #if DEVELOPMENT || DEBUG
5639 iopl_cow++;
5640 iopl_cow_pages += size >> PAGE_SHIFT;
5641 #endif
5642 }
5643
5644
5645 entry = 0;
5646
5647 xfer_size = size;
5648 dst_offset = offset;
5649
5650 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
5651 fault_info.user_tag = 0;
5652 fault_info.lo_offset = offset;
5653 fault_info.hi_offset = offset + xfer_size;
5654 fault_info.no_cache = FALSE;
5655 fault_info.stealth = FALSE;
5656 fault_info.mark_zf_absent = TRUE;
5657
5658 dwp = &dw_array[0];
5659 dw_count = 0;
5660
5661 while (xfer_size) {
5662 vm_fault_return_t result;
5663 unsigned int pg_num;
5664
5665 dwp->dw_mask = 0;
5666
5667 dst_page = vm_page_lookup(object, dst_offset);
5668
5669 /*
5670 * ENCRYPTED SWAP:
5671 * If the page is encrypted, we need to decrypt it,
5672 * so force a soft page fault.
5673 */
5674 if (dst_page == VM_PAGE_NULL ||
5675 dst_page->busy ||
5676 dst_page->encrypted ||
5677 dst_page->error ||
5678 dst_page->restart ||
5679 dst_page->absent ||
5680 dst_page->fictitious) {
5681
5682 if (object == kernel_object)
5683 panic("vm_object_iopl_request: missing/bad page in kernel object\n");
5684
5685 do {
5686 vm_page_t top_page;
5687 kern_return_t error_code;
5688 int interruptible;
5689
5690 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
5691 interruptible = THREAD_ABORTSAFE;
5692 else
5693 interruptible = THREAD_UNINT;
5694
5695 fault_info.interruptible = interruptible;
5696 fault_info.cluster_size = xfer_size;
5697
5698 vm_object_paging_begin(object);
5699
5700 result = vm_fault_page(object, dst_offset,
5701 prot | VM_PROT_WRITE, FALSE,
5702 &prot, &dst_page, &top_page,
5703 (int *)0,
5704 &error_code, no_zero_fill,
5705 FALSE, &fault_info);
5706
5707 switch (result) {
5708
5709 case VM_FAULT_SUCCESS:
5710
5711 PAGE_WAKEUP_DONE(dst_page);
5712 /*
5713 * Release paging references and
5714 * top-level placeholder page, if any.
5715 */
5716 if (top_page != VM_PAGE_NULL) {
5717 vm_object_t local_object;
5718
5719 local_object = top_page->object;
5720
5721 if (top_page->object != dst_page->object) {
5722 vm_object_lock(local_object);
5723 VM_PAGE_FREE(top_page);
5724 vm_object_paging_end(local_object);
5725 vm_object_unlock(local_object);
5726 } else {
5727 VM_PAGE_FREE(top_page);
5728 vm_object_paging_end(local_object);
5729 }
5730 }
5731 vm_object_paging_end(object);
5732 break;
5733
5734 case VM_FAULT_RETRY:
5735 vm_object_lock(object);
5736 break;
5737
5738 case VM_FAULT_FICTITIOUS_SHORTAGE:
5739 vm_page_more_fictitious();
5740
5741 vm_object_lock(object);
5742 break;
5743
5744 case VM_FAULT_MEMORY_SHORTAGE:
5745 if (vm_page_wait(interruptible)) {
5746 vm_object_lock(object);
5747 break;
5748 }
5749 /* fall thru */
5750
5751 case VM_FAULT_INTERRUPTED:
5752 error_code = MACH_SEND_INTERRUPTED;
5753 case VM_FAULT_MEMORY_ERROR:
5754 memory_error:
5755 ret = (error_code ? error_code: KERN_MEMORY_ERROR);
5756
5757 vm_object_lock(object);
5758 goto return_err;
5759
5760 case VM_FAULT_SUCCESS_NO_VM_PAGE:
5761 /* success but no page: fail */
5762 vm_object_paging_end(object);
5763 vm_object_unlock(object);
5764 goto memory_error;
5765
5766 default:
5767 panic("vm_object_iopl_request: unexpected error"
5768 " 0x%x from vm_fault_page()\n", result);
5769 }
5770 } while (result != VM_FAULT_SUCCESS);
5771
5772 }
5773
5774 if (upl->flags & UPL_KERNEL_OBJECT)
5775 goto record_phys_addr;
5776
5777 if (dst_page->cleaning) {
5778 /*
5779 * Someone else is cleaning this page in place.as
5780 * In theory, we should be able to proceed and use this
5781 * page but they'll probably end up clearing the "busy"
5782 * bit on it in upl_commit_range() but they didn't set
5783 * it, so they would clear our "busy" bit and open
5784 * us to race conditions.
5785 * We'd better wait for the cleaning to complete and
5786 * then try again.
5787 */
5788 vm_object_iopl_request_sleep_for_cleaning++;
5789 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5790 continue;
5791 }
5792 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
5793 dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
5794 vm_page_t low_page;
5795 int refmod;
5796
5797 /*
5798 * support devices that can't DMA above 32 bits
5799 * by substituting pages from a pool of low address
5800 * memory for any pages we find above the 4G mark
5801 * can't substitute if the page is already wired because
5802 * we don't know whether that physical address has been
5803 * handed out to some other 64 bit capable DMA device to use
5804 */
5805 if (VM_PAGE_WIRED(dst_page)) {
5806 ret = KERN_PROTECTION_FAILURE;
5807 goto return_err;
5808 }
5809 low_page = vm_page_grablo();
5810
5811 if (low_page == VM_PAGE_NULL) {
5812 ret = KERN_RESOURCE_SHORTAGE;
5813 goto return_err;
5814 }
5815 /*
5816 * from here until the vm_page_replace completes
5817 * we musn't drop the object lock... we don't
5818 * want anyone refaulting this page in and using
5819 * it after we disconnect it... we want the fault
5820 * to find the new page being substituted.
5821 */
5822 if (dst_page->pmapped)
5823 refmod = pmap_disconnect(dst_page->phys_page);
5824 else
5825 refmod = 0;
5826 vm_page_copy(dst_page, low_page);
5827
5828 low_page->reference = dst_page->reference;
5829 low_page->dirty = dst_page->dirty;
5830
5831 if (refmod & VM_MEM_REFERENCED)
5832 low_page->reference = TRUE;
5833 if (refmod & VM_MEM_MODIFIED)
5834 low_page->dirty = TRUE;
5835
5836 vm_page_replace(low_page, object, dst_offset);
5837
5838 dst_page = low_page;
5839 /*
5840 * vm_page_grablo returned the page marked
5841 * BUSY... we don't need a PAGE_WAKEUP_DONE
5842 * here, because we've never dropped the object lock
5843 */
5844 dst_page->busy = FALSE;
5845 }
5846 dwp->dw_mask |= DW_vm_page_wire;
5847
5848 if (cntrl_flags & UPL_BLOCK_ACCESS) {
5849 /*
5850 * Mark the page "busy" to block any future page fault
5851 * on this page. We'll also remove the mapping
5852 * of all these pages before leaving this routine.
5853 */
5854 assert(!dst_page->fictitious);
5855 dst_page->busy = TRUE;
5856 }
5857 /*
5858 * expect the page to be used
5859 * page queues lock must be held to set 'reference'
5860 */
5861 dwp->dw_mask |= DW_set_reference;
5862
5863 if (!(cntrl_flags & UPL_COPYOUT_FROM))
5864 dst_page->dirty = TRUE;
5865 record_phys_addr:
5866 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5867 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5868 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5869
5870 if (dst_page->phys_page > upl->highest_page)
5871 upl->highest_page = dst_page->phys_page;
5872
5873 if (user_page_list) {
5874 user_page_list[entry].phys_addr = dst_page->phys_page;
5875 user_page_list[entry].pageout = dst_page->pageout;
5876 user_page_list[entry].absent = dst_page->absent;
5877 user_page_list[entry].dirty = dst_page->dirty;
5878 user_page_list[entry].precious = dst_page->precious;
5879 user_page_list[entry].device = FALSE;
5880 if (dst_page->clustered == TRUE)
5881 user_page_list[entry].speculative = dst_page->speculative;
5882 else
5883 user_page_list[entry].speculative = FALSE;
5884 user_page_list[entry].cs_validated = dst_page->cs_validated;
5885 user_page_list[entry].cs_tainted = dst_page->cs_tainted;
5886 }
5887 if (object != kernel_object) {
5888 /*
5889 * someone is explicitly grabbing this page...
5890 * update clustered and speculative state
5891 *
5892 */
5893 VM_PAGE_CONSUME_CLUSTERED(dst_page);
5894 }
5895 entry++;
5896 dst_offset += PAGE_SIZE_64;
5897 xfer_size -= PAGE_SIZE;
5898
5899 if (dwp->dw_mask) {
5900 if (dst_page->busy == FALSE) {
5901 /*
5902 * dw_do_work may need to drop the object lock
5903 * if it does, we need the pages it's looking at to
5904 * be held stable via the busy bit.
5905 */
5906 dst_page->busy = TRUE;
5907 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
5908 }
5909 dwp->dw_m = dst_page;
5910 dwp++;
5911 dw_count++;
5912
5913 if (dw_count >= DELAYED_WORK_LIMIT) {
5914 dw_do_work(object, &dw_array[0], dw_count);
5915
5916 dwp = &dw_array[0];
5917 dw_count = 0;
5918 }
5919 }
5920 }
5921 if (dw_count)
5922 dw_do_work(object, &dw_array[0], dw_count);
5923
5924 if (page_list_count != NULL) {
5925 if (upl->flags & UPL_INTERNAL)
5926 *page_list_count = 0;
5927 else if (*page_list_count > entry)
5928 *page_list_count = entry;
5929 }
5930 vm_object_unlock(object);
5931
5932 if (cntrl_flags & UPL_BLOCK_ACCESS) {
5933 /*
5934 * We've marked all the pages "busy" so that future
5935 * page faults will block.
5936 * Now remove the mapping for these pages, so that they
5937 * can't be accessed without causing a page fault.
5938 */
5939 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
5940 PMAP_NULL, 0, VM_PROT_NONE);
5941 assert(!object->blocked_access);
5942 object->blocked_access = TRUE;
5943 }
5944 return KERN_SUCCESS;
5945
5946 return_err:
5947 dw_index = 0;
5948
5949 for (; offset < dst_offset; offset += PAGE_SIZE) {
5950 boolean_t need_unwire;
5951
5952 dst_page = vm_page_lookup(object, offset);
5953
5954 if (dst_page == VM_PAGE_NULL)
5955 panic("vm_object_iopl_request: Wired pages missing. \n");
5956
5957 /*
5958 * if we've already processed this page in an earlier
5959 * dw_do_work, we need to undo the wiring... we will
5960 * leave the dirty and reference bits on if they
5961 * were set, since we don't have a good way of knowing
5962 * what the previous state was and we won't get here
5963 * under any normal circumstances... we will always
5964 * clear BUSY and wakeup any waiters via vm_page_free
5965 * or PAGE_WAKEUP_DONE
5966 */
5967 need_unwire = TRUE;
5968
5969 if (dw_count) {
5970 if (dw_array[dw_index].dw_m == dst_page) {
5971 /*
5972 * still in the deferred work list
5973 * which means we haven't yet called
5974 * vm_page_wire on this page
5975 */
5976 need_unwire = FALSE;
5977 }
5978 dw_index++;
5979 dw_count--;
5980 }
5981 vm_page_lock_queues();
5982
5983 if (need_unwire == TRUE) {
5984 boolean_t queueit;
5985
5986 queueit = (dst_page->absent) ? FALSE : TRUE;
5987
5988 vm_page_unwire(dst_page, queueit);
5989 }
5990 if (dst_page->absent)
5991 vm_page_free(dst_page);
5992 else
5993 PAGE_WAKEUP_DONE(dst_page);
5994
5995 vm_page_unlock_queues();
5996
5997 if (need_unwire == TRUE)
5998 VM_STAT_INCR(reactivations);
5999 }
6000 #if UPL_DEBUG
6001 upl->upl_state = 2;
6002 #endif
6003 if (! (upl->flags & UPL_KERNEL_OBJECT)) {
6004 vm_object_activity_end(object);
6005 }
6006 vm_object_unlock(object);
6007 upl_destroy(upl);
6008
6009 return ret;
6010 }
6011
6012 kern_return_t
6013 upl_transpose(
6014 upl_t upl1,
6015 upl_t upl2)
6016 {
6017 kern_return_t retval;
6018 boolean_t upls_locked;
6019 vm_object_t object1, object2;
6020
6021 if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2 || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR) || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) {
6022 return KERN_INVALID_ARGUMENT;
6023 }
6024
6025 upls_locked = FALSE;
6026
6027 /*
6028 * Since we need to lock both UPLs at the same time,
6029 * avoid deadlocks by always taking locks in the same order.
6030 */
6031 if (upl1 < upl2) {
6032 upl_lock(upl1);
6033 upl_lock(upl2);
6034 } else {
6035 upl_lock(upl2);
6036 upl_lock(upl1);
6037 }
6038 upls_locked = TRUE; /* the UPLs will need to be unlocked */
6039
6040 object1 = upl1->map_object;
6041 object2 = upl2->map_object;
6042
6043 if (upl1->offset != 0 || upl2->offset != 0 ||
6044 upl1->size != upl2->size) {
6045 /*
6046 * We deal only with full objects, not subsets.
6047 * That's because we exchange the entire backing store info
6048 * for the objects: pager, resident pages, etc... We can't do
6049 * only part of it.
6050 */
6051 retval = KERN_INVALID_VALUE;
6052 goto done;
6053 }
6054
6055 /*
6056 * Tranpose the VM objects' backing store.
6057 */
6058 retval = vm_object_transpose(object1, object2,
6059 (vm_object_size_t) upl1->size);
6060
6061 if (retval == KERN_SUCCESS) {
6062 /*
6063 * Make each UPL point to the correct VM object, i.e. the
6064 * object holding the pages that the UPL refers to...
6065 */
6066 #if UPL_DEBUG
6067 queue_remove(&object1->uplq, upl1, upl_t, uplq);
6068 queue_remove(&object2->uplq, upl2, upl_t, uplq);
6069 #endif
6070 upl1->map_object = object2;
6071 upl2->map_object = object1;
6072 #if UPL_DEBUG
6073 queue_enter(&object1->uplq, upl2, upl_t, uplq);
6074 queue_enter(&object2->uplq, upl1, upl_t, uplq);
6075 #endif
6076 }
6077
6078 done:
6079 /*
6080 * Cleanup.
6081 */
6082 if (upls_locked) {
6083 upl_unlock(upl1);
6084 upl_unlock(upl2);
6085 upls_locked = FALSE;
6086 }
6087
6088 return retval;
6089 }
6090
6091 /*
6092 * ENCRYPTED SWAP:
6093 *
6094 * Rationale: the user might have some encrypted data on disk (via
6095 * FileVault or any other mechanism). That data is then decrypted in
6096 * memory, which is safe as long as the machine is secure. But that
6097 * decrypted data in memory could be paged out to disk by the default
6098 * pager. The data would then be stored on disk in clear (not encrypted)
6099 * and it could be accessed by anyone who gets physical access to the
6100 * disk (if the laptop or the disk gets stolen for example). This weakens
6101 * the security offered by FileVault.
6102 *
6103 * Solution: the default pager will optionally request that all the
6104 * pages it gathers for pageout be encrypted, via the UPL interfaces,
6105 * before it sends this UPL to disk via the vnode_pageout() path.
6106 *
6107 * Notes:
6108 *
6109 * To avoid disrupting the VM LRU algorithms, we want to keep the
6110 * clean-in-place mechanisms, which allow us to send some extra pages to
6111 * swap (clustering) without actually removing them from the user's
6112 * address space. We don't want the user to unknowingly access encrypted
6113 * data, so we have to actually remove the encrypted pages from the page
6114 * table. When the user accesses the data, the hardware will fail to
6115 * locate the virtual page in its page table and will trigger a page
6116 * fault. We can then decrypt the page and enter it in the page table
6117 * again. Whenever we allow the user to access the contents of a page,
6118 * we have to make sure it's not encrypted.
6119 *
6120 *
6121 */
6122 /*
6123 * ENCRYPTED SWAP:
6124 * Reserve of virtual addresses in the kernel address space.
6125 * We need to map the physical pages in the kernel, so that we
6126 * can call the encryption/decryption routines with a kernel
6127 * virtual address. We keep this pool of pre-allocated kernel
6128 * virtual addresses so that we don't have to scan the kernel's
6129 * virtaul address space each time we need to encrypt or decrypt
6130 * a physical page.
6131 * It would be nice to be able to encrypt and decrypt in physical
6132 * mode but that might not always be more efficient...
6133 */
6134 decl_simple_lock_data(,vm_paging_lock)
6135 #define VM_PAGING_NUM_PAGES 64
6136 vm_map_offset_t vm_paging_base_address = 0;
6137 boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
6138 int vm_paging_max_index = 0;
6139 int vm_paging_page_waiter = 0;
6140 int vm_paging_page_waiter_total = 0;
6141 unsigned long vm_paging_no_kernel_page = 0;
6142 unsigned long vm_paging_objects_mapped = 0;
6143 unsigned long vm_paging_pages_mapped = 0;
6144 unsigned long vm_paging_objects_mapped_slow = 0;
6145 unsigned long vm_paging_pages_mapped_slow = 0;
6146
6147 void
6148 vm_paging_map_init(void)
6149 {
6150 kern_return_t kr;
6151 vm_map_offset_t page_map_offset;
6152 vm_map_entry_t map_entry;
6153
6154 assert(vm_paging_base_address == 0);
6155
6156 /*
6157 * Initialize our pool of pre-allocated kernel
6158 * virtual addresses.
6159 */
6160 page_map_offset = 0;
6161 kr = vm_map_find_space(kernel_map,
6162 &page_map_offset,
6163 VM_PAGING_NUM_PAGES * PAGE_SIZE,
6164 0,
6165 0,
6166 &map_entry);
6167 if (kr != KERN_SUCCESS) {
6168 panic("vm_paging_map_init: kernel_map full\n");
6169 }
6170 map_entry->object.vm_object = kernel_object;
6171 map_entry->offset = page_map_offset;
6172 vm_object_reference(kernel_object);
6173 vm_map_unlock(kernel_map);
6174
6175 assert(vm_paging_base_address == 0);
6176 vm_paging_base_address = page_map_offset;
6177 }
6178
6179 /*
6180 * ENCRYPTED SWAP:
6181 * vm_paging_map_object:
6182 * Maps part of a VM object's pages in the kernel
6183 * virtual address space, using the pre-allocated
6184 * kernel virtual addresses, if possible.
6185 * Context:
6186 * The VM object is locked. This lock will get
6187 * dropped and re-acquired though, so the caller
6188 * must make sure the VM object is kept alive
6189 * (by holding a VM map that has a reference
6190 * on it, for example, or taking an extra reference).
6191 * The page should also be kept busy to prevent
6192 * it from being reclaimed.
6193 */
6194 kern_return_t
6195 vm_paging_map_object(
6196 vm_map_offset_t *address,
6197 vm_page_t page,
6198 vm_object_t object,
6199 vm_object_offset_t offset,
6200 vm_map_size_t *size,
6201 vm_prot_t protection,
6202 boolean_t can_unlock_object)
6203 {
6204 kern_return_t kr;
6205 vm_map_offset_t page_map_offset;
6206 vm_map_size_t map_size;
6207 vm_object_offset_t object_offset;
6208 int i;
6209
6210
6211 if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
6212 assert(page->busy);
6213 /*
6214 * Use one of the pre-allocated kernel virtual addresses
6215 * and just enter the VM page in the kernel address space
6216 * at that virtual address.
6217 */
6218 simple_lock(&vm_paging_lock);
6219
6220 /*
6221 * Try and find an available kernel virtual address
6222 * from our pre-allocated pool.
6223 */
6224 page_map_offset = 0;
6225 for (;;) {
6226 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
6227 if (vm_paging_page_inuse[i] == FALSE) {
6228 page_map_offset =
6229 vm_paging_base_address +
6230 (i * PAGE_SIZE);
6231 break;
6232 }
6233 }
6234 if (page_map_offset != 0) {
6235 /* found a space to map our page ! */
6236 break;
6237 }
6238
6239 if (can_unlock_object) {
6240 /*
6241 * If we can afford to unlock the VM object,
6242 * let's take the slow path now...
6243 */
6244 break;
6245 }
6246 /*
6247 * We can't afford to unlock the VM object, so
6248 * let's wait for a space to become available...
6249 */
6250 vm_paging_page_waiter_total++;
6251 vm_paging_page_waiter++;
6252 thread_sleep_fast_usimple_lock(&vm_paging_page_waiter,
6253 &vm_paging_lock,
6254 THREAD_UNINT);
6255 vm_paging_page_waiter--;
6256 /* ... and try again */
6257 }
6258
6259 if (page_map_offset != 0) {
6260 /*
6261 * We found a kernel virtual address;
6262 * map the physical page to that virtual address.
6263 */
6264 if (i > vm_paging_max_index) {
6265 vm_paging_max_index = i;
6266 }
6267 vm_paging_page_inuse[i] = TRUE;
6268 simple_unlock(&vm_paging_lock);
6269
6270 if (page->pmapped == FALSE) {
6271 pmap_sync_page_data_phys(page->phys_page);
6272 }
6273 page->pmapped = TRUE;
6274
6275 /*
6276 * Keep the VM object locked over the PMAP_ENTER
6277 * and the actual use of the page by the kernel,
6278 * or this pmap mapping might get undone by a
6279 * vm_object_pmap_protect() call...
6280 */
6281 PMAP_ENTER(kernel_pmap,
6282 page_map_offset,
6283 page,
6284 protection,
6285 ((int) page->object->wimg_bits &
6286 VM_WIMG_MASK),
6287 TRUE);
6288 vm_paging_objects_mapped++;
6289 vm_paging_pages_mapped++;
6290 *address = page_map_offset;
6291
6292 /* all done and mapped, ready to use ! */
6293 return KERN_SUCCESS;
6294 }
6295
6296 /*
6297 * We ran out of pre-allocated kernel virtual
6298 * addresses. Just map the page in the kernel
6299 * the slow and regular way.
6300 */
6301 vm_paging_no_kernel_page++;
6302 simple_unlock(&vm_paging_lock);
6303 }
6304
6305 if (! can_unlock_object) {
6306 return KERN_NOT_SUPPORTED;
6307 }
6308
6309 object_offset = vm_object_trunc_page(offset);
6310 map_size = vm_map_round_page(*size);
6311
6312 /*
6313 * Try and map the required range of the object
6314 * in the kernel_map
6315 */
6316
6317 vm_object_reference_locked(object); /* for the map entry */
6318 vm_object_unlock(object);
6319
6320 kr = vm_map_enter(kernel_map,
6321 address,
6322 map_size,
6323 0,
6324 VM_FLAGS_ANYWHERE,
6325 object,
6326 object_offset,
6327 FALSE,
6328 protection,
6329 VM_PROT_ALL,
6330 VM_INHERIT_NONE);
6331 if (kr != KERN_SUCCESS) {
6332 *address = 0;
6333 *size = 0;
6334 vm_object_deallocate(object); /* for the map entry */
6335 vm_object_lock(object);
6336 return kr;
6337 }
6338
6339 *size = map_size;
6340
6341 /*
6342 * Enter the mapped pages in the page table now.
6343 */
6344 vm_object_lock(object);
6345 /*
6346 * VM object must be kept locked from before PMAP_ENTER()
6347 * until after the kernel is done accessing the page(s).
6348 * Otherwise, the pmap mappings in the kernel could be
6349 * undone by a call to vm_object_pmap_protect().
6350 */
6351
6352 for (page_map_offset = 0;
6353 map_size != 0;
6354 map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
6355 unsigned int cache_attr;
6356
6357 page = vm_page_lookup(object, offset + page_map_offset);
6358 if (page == VM_PAGE_NULL) {
6359 printf("vm_paging_map_object: no page !?");
6360 vm_object_unlock(object);
6361 kr = vm_map_remove(kernel_map, *address, *size,
6362 VM_MAP_NO_FLAGS);
6363 assert(kr == KERN_SUCCESS);
6364 *address = 0;
6365 *size = 0;
6366 vm_object_lock(object);
6367 return KERN_MEMORY_ERROR;
6368 }
6369 if (page->pmapped == FALSE) {
6370 pmap_sync_page_data_phys(page->phys_page);
6371 }
6372 page->pmapped = TRUE;
6373 cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
6374
6375 //assert(pmap_verify_free(page->phys_page));
6376 PMAP_ENTER(kernel_pmap,
6377 *address + page_map_offset,
6378 page,
6379 protection,
6380 cache_attr,
6381 TRUE);
6382 }
6383
6384 vm_paging_objects_mapped_slow++;
6385 vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
6386
6387 return KERN_SUCCESS;
6388 }
6389
6390 /*
6391 * ENCRYPTED SWAP:
6392 * vm_paging_unmap_object:
6393 * Unmaps part of a VM object's pages from the kernel
6394 * virtual address space.
6395 * Context:
6396 * The VM object is locked. This lock will get
6397 * dropped and re-acquired though.
6398 */
6399 void
6400 vm_paging_unmap_object(
6401 vm_object_t object,
6402 vm_map_offset_t start,
6403 vm_map_offset_t end)
6404 {
6405 kern_return_t kr;
6406 int i;
6407
6408 if ((vm_paging_base_address == 0) ||
6409 (start < vm_paging_base_address) ||
6410 (end > (vm_paging_base_address
6411 + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
6412 /*
6413 * We didn't use our pre-allocated pool of
6414 * kernel virtual address. Deallocate the
6415 * virtual memory.
6416 */
6417 if (object != VM_OBJECT_NULL) {
6418 vm_object_unlock(object);
6419 }
6420 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
6421 if (object != VM_OBJECT_NULL) {
6422 vm_object_lock(object);
6423 }
6424 assert(kr == KERN_SUCCESS);
6425 } else {
6426 /*
6427 * We used a kernel virtual address from our
6428 * pre-allocated pool. Put it back in the pool
6429 * for next time.
6430 */
6431 assert(end - start == PAGE_SIZE);
6432 i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
6433 assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
6434
6435 /* undo the pmap mapping */
6436 pmap_remove(kernel_pmap, start, end);
6437
6438 simple_lock(&vm_paging_lock);
6439 vm_paging_page_inuse[i] = FALSE;
6440 if (vm_paging_page_waiter) {
6441 thread_wakeup(&vm_paging_page_waiter);
6442 }
6443 simple_unlock(&vm_paging_lock);
6444 }
6445 }
6446
6447 #if CRYPTO
6448 /*
6449 * Encryption data.
6450 * "iv" is the "initial vector". Ideally, we want to
6451 * have a different one for each page we encrypt, so that
6452 * crackers can't find encryption patterns too easily.
6453 */
6454 #define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
6455 boolean_t swap_crypt_ctx_initialized = FALSE;
6456 aes_32t swap_crypt_key[8]; /* big enough for a 256 key */
6457 aes_ctx swap_crypt_ctx;
6458 const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
6459
6460 #if DEBUG
6461 boolean_t swap_crypt_ctx_tested = FALSE;
6462 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
6463 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
6464 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
6465 #endif /* DEBUG */
6466
6467 /*
6468 * Initialize the encryption context: key and key size.
6469 */
6470 void swap_crypt_ctx_initialize(void); /* forward */
6471 void
6472 swap_crypt_ctx_initialize(void)
6473 {
6474 unsigned int i;
6475
6476 /*
6477 * No need for locking to protect swap_crypt_ctx_initialized
6478 * because the first use of encryption will come from the
6479 * pageout thread (we won't pagein before there's been a pageout)
6480 * and there's only one pageout thread.
6481 */
6482 if (swap_crypt_ctx_initialized == FALSE) {
6483 for (i = 0;
6484 i < (sizeof (swap_crypt_key) /
6485 sizeof (swap_crypt_key[0]));
6486 i++) {
6487 swap_crypt_key[i] = random();
6488 }
6489 aes_encrypt_key((const unsigned char *) swap_crypt_key,
6490 SWAP_CRYPT_AES_KEY_SIZE,
6491 &swap_crypt_ctx.encrypt);
6492 aes_decrypt_key((const unsigned char *) swap_crypt_key,
6493 SWAP_CRYPT_AES_KEY_SIZE,
6494 &swap_crypt_ctx.decrypt);
6495 swap_crypt_ctx_initialized = TRUE;
6496 }
6497
6498 #if DEBUG
6499 /*
6500 * Validate the encryption algorithms.
6501 */
6502 if (swap_crypt_ctx_tested == FALSE) {
6503 /* initialize */
6504 for (i = 0; i < 4096; i++) {
6505 swap_crypt_test_page_ref[i] = (char) i;
6506 }
6507 /* encrypt */
6508 aes_encrypt_cbc(swap_crypt_test_page_ref,
6509 swap_crypt_null_iv,
6510 PAGE_SIZE / AES_BLOCK_SIZE,
6511 swap_crypt_test_page_encrypt,
6512 &swap_crypt_ctx.encrypt);
6513 /* decrypt */
6514 aes_decrypt_cbc(swap_crypt_test_page_encrypt,
6515 swap_crypt_null_iv,
6516 PAGE_SIZE / AES_BLOCK_SIZE,
6517 swap_crypt_test_page_decrypt,
6518 &swap_crypt_ctx.decrypt);
6519 /* compare result with original */
6520 for (i = 0; i < 4096; i ++) {
6521 if (swap_crypt_test_page_decrypt[i] !=
6522 swap_crypt_test_page_ref[i]) {
6523 panic("encryption test failed");
6524 }
6525 }
6526
6527 /* encrypt again */
6528 aes_encrypt_cbc(swap_crypt_test_page_decrypt,
6529 swap_crypt_null_iv,
6530 PAGE_SIZE / AES_BLOCK_SIZE,
6531 swap_crypt_test_page_decrypt,
6532 &swap_crypt_ctx.encrypt);
6533 /* decrypt in place */
6534 aes_decrypt_cbc(swap_crypt_test_page_decrypt,
6535 swap_crypt_null_iv,
6536 PAGE_SIZE / AES_BLOCK_SIZE,
6537 swap_crypt_test_page_decrypt,
6538 &swap_crypt_ctx.decrypt);
6539 for (i = 0; i < 4096; i ++) {
6540 if (swap_crypt_test_page_decrypt[i] !=
6541 swap_crypt_test_page_ref[i]) {
6542 panic("in place encryption test failed");
6543 }
6544 }
6545
6546 swap_crypt_ctx_tested = TRUE;
6547 }
6548 #endif /* DEBUG */
6549 }
6550
6551 /*
6552 * ENCRYPTED SWAP:
6553 * vm_page_encrypt:
6554 * Encrypt the given page, for secure paging.
6555 * The page might already be mapped at kernel virtual
6556 * address "kernel_mapping_offset". Otherwise, we need
6557 * to map it.
6558 *
6559 * Context:
6560 * The page's object is locked, but this lock will be released
6561 * and re-acquired.
6562 * The page is busy and not accessible by users (not entered in any pmap).
6563 */
6564 void
6565 vm_page_encrypt(
6566 vm_page_t page,
6567 vm_map_offset_t kernel_mapping_offset)
6568 {
6569 kern_return_t kr;
6570 vm_map_size_t kernel_mapping_size;
6571 vm_offset_t kernel_vaddr;
6572 union {
6573 unsigned char aes_iv[AES_BLOCK_SIZE];
6574 struct {
6575 memory_object_t pager_object;
6576 vm_object_offset_t paging_offset;
6577 } vm;
6578 } encrypt_iv;
6579
6580 if (! vm_pages_encrypted) {
6581 vm_pages_encrypted = TRUE;
6582 }
6583
6584 assert(page->busy);
6585 assert(page->dirty || page->precious);
6586
6587 if (page->encrypted) {
6588 /*
6589 * Already encrypted: no need to do it again.
6590 */
6591 vm_page_encrypt_already_encrypted_counter++;
6592 return;
6593 }
6594 ASSERT_PAGE_DECRYPTED(page);
6595
6596 /*
6597 * Take a paging-in-progress reference to keep the object
6598 * alive even if we have to unlock it (in vm_paging_map_object()
6599 * for example)...
6600 */
6601 vm_object_paging_begin(page->object);
6602
6603 if (kernel_mapping_offset == 0) {
6604 /*
6605 * The page hasn't already been mapped in kernel space
6606 * by the caller. Map it now, so that we can access
6607 * its contents and encrypt them.
6608 */
6609 kernel_mapping_size = PAGE_SIZE;
6610 kr = vm_paging_map_object(&kernel_mapping_offset,
6611 page,
6612 page->object,
6613 page->offset,
6614 &kernel_mapping_size,
6615 VM_PROT_READ | VM_PROT_WRITE,
6616 FALSE);
6617 if (kr != KERN_SUCCESS) {
6618 panic("vm_page_encrypt: "
6619 "could not map page in kernel: 0x%x\n",
6620 kr);
6621 }
6622 } else {
6623 kernel_mapping_size = 0;
6624 }
6625 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
6626
6627 if (swap_crypt_ctx_initialized == FALSE) {
6628 swap_crypt_ctx_initialize();
6629 }
6630 assert(swap_crypt_ctx_initialized);
6631
6632 /*
6633 * Prepare an "initial vector" for the encryption.
6634 * We use the "pager" and the "paging_offset" for that
6635 * page to obfuscate the encrypted data a bit more and
6636 * prevent crackers from finding patterns that they could
6637 * use to break the key.
6638 */
6639 bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
6640 encrypt_iv.vm.pager_object = page->object->pager;
6641 encrypt_iv.vm.paging_offset =
6642 page->object->paging_offset + page->offset;
6643
6644 /* encrypt the "initial vector" */
6645 aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
6646 swap_crypt_null_iv,
6647 1,
6648 &encrypt_iv.aes_iv[0],
6649 &swap_crypt_ctx.encrypt);
6650
6651 /*
6652 * Encrypt the page.
6653 */
6654 aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
6655 &encrypt_iv.aes_iv[0],
6656 PAGE_SIZE / AES_BLOCK_SIZE,
6657 (unsigned char *) kernel_vaddr,
6658 &swap_crypt_ctx.encrypt);
6659
6660 vm_page_encrypt_counter++;
6661
6662 /*
6663 * Unmap the page from the kernel's address space,
6664 * if we had to map it ourselves. Otherwise, let
6665 * the caller undo the mapping if needed.
6666 */
6667 if (kernel_mapping_size != 0) {
6668 vm_paging_unmap_object(page->object,
6669 kernel_mapping_offset,
6670 kernel_mapping_offset + kernel_mapping_size);
6671 }
6672
6673 /*
6674 * Clear the "reference" and "modified" bits.
6675 * This should clean up any impact the encryption had
6676 * on them.
6677 * The page was kept busy and disconnected from all pmaps,
6678 * so it can't have been referenced or modified from user
6679 * space.
6680 * The software bits will be reset later after the I/O
6681 * has completed (in upl_commit_range()).
6682 */
6683 pmap_clear_refmod(page->phys_page, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
6684
6685 page->encrypted = TRUE;
6686
6687 vm_object_paging_end(page->object);
6688 }
6689
6690 /*
6691 * ENCRYPTED SWAP:
6692 * vm_page_decrypt:
6693 * Decrypt the given page.
6694 * The page might already be mapped at kernel virtual
6695 * address "kernel_mapping_offset". Otherwise, we need
6696 * to map it.
6697 *
6698 * Context:
6699 * The page's VM object is locked but will be unlocked and relocked.
6700 * The page is busy and not accessible by users (not entered in any pmap).
6701 */
6702 void
6703 vm_page_decrypt(
6704 vm_page_t page,
6705 vm_map_offset_t kernel_mapping_offset)
6706 {
6707 kern_return_t kr;
6708 vm_map_size_t kernel_mapping_size;
6709 vm_offset_t kernel_vaddr;
6710 union {
6711 unsigned char aes_iv[AES_BLOCK_SIZE];
6712 struct {
6713 memory_object_t pager_object;
6714 vm_object_offset_t paging_offset;
6715 } vm;
6716 } decrypt_iv;
6717
6718 assert(page->busy);
6719 assert(page->encrypted);
6720
6721 /*
6722 * Take a paging-in-progress reference to keep the object
6723 * alive even if we have to unlock it (in vm_paging_map_object()
6724 * for example)...
6725 */
6726 vm_object_paging_begin(page->object);
6727
6728 if (kernel_mapping_offset == 0) {
6729 /*
6730 * The page hasn't already been mapped in kernel space
6731 * by the caller. Map it now, so that we can access
6732 * its contents and decrypt them.
6733 */
6734 kernel_mapping_size = PAGE_SIZE;
6735 kr = vm_paging_map_object(&kernel_mapping_offset,
6736 page,
6737 page->object,
6738 page->offset,
6739 &kernel_mapping_size,
6740 VM_PROT_READ | VM_PROT_WRITE,
6741 FALSE);
6742 if (kr != KERN_SUCCESS) {
6743 panic("vm_page_decrypt: "
6744 "could not map page in kernel: 0x%x\n",
6745 kr);
6746 }
6747 } else {
6748 kernel_mapping_size = 0;
6749 }
6750 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
6751
6752 assert(swap_crypt_ctx_initialized);
6753
6754 /*
6755 * Prepare an "initial vector" for the decryption.
6756 * It has to be the same as the "initial vector" we
6757 * used to encrypt that page.
6758 */
6759 bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
6760 decrypt_iv.vm.pager_object = page->object->pager;
6761 decrypt_iv.vm.paging_offset =
6762 page->object->paging_offset + page->offset;
6763
6764 /* encrypt the "initial vector" */
6765 aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
6766 swap_crypt_null_iv,
6767 1,
6768 &decrypt_iv.aes_iv[0],
6769 &swap_crypt_ctx.encrypt);
6770
6771 /*
6772 * Decrypt the page.
6773 */
6774 aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
6775 &decrypt_iv.aes_iv[0],
6776 PAGE_SIZE / AES_BLOCK_SIZE,
6777 (unsigned char *) kernel_vaddr,
6778 &swap_crypt_ctx.decrypt);
6779 vm_page_decrypt_counter++;
6780
6781 /*
6782 * Unmap the page from the kernel's address space,
6783 * if we had to map it ourselves. Otherwise, let
6784 * the caller undo the mapping if needed.
6785 */
6786 if (kernel_mapping_size != 0) {
6787 vm_paging_unmap_object(page->object,
6788 kernel_vaddr,
6789 kernel_vaddr + PAGE_SIZE);
6790 }
6791
6792 /*
6793 * After decryption, the page is actually clean.
6794 * It was encrypted as part of paging, which "cleans"
6795 * the "dirty" pages.
6796 * Noone could access it after it was encrypted
6797 * and the decryption doesn't count.
6798 */
6799 page->dirty = FALSE;
6800 assert (page->cs_validated == FALSE);
6801 pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
6802 page->encrypted = FALSE;
6803
6804 /*
6805 * We've just modified the page's contents via the data cache and part
6806 * of the new contents might still be in the cache and not yet in RAM.
6807 * Since the page is now available and might get gathered in a UPL to
6808 * be part of a DMA transfer from a driver that expects the memory to
6809 * be coherent at this point, we have to flush the data cache.
6810 */
6811 pmap_sync_page_attributes_phys(page->phys_page);
6812 /*
6813 * Since the page is not mapped yet, some code might assume that it
6814 * doesn't need to invalidate the instruction cache when writing to
6815 * that page. That code relies on "pmapped" being FALSE, so that the
6816 * caches get synchronized when the page is first mapped.
6817 */
6818 assert(pmap_verify_free(page->phys_page));
6819 page->pmapped = FALSE;
6820 page->wpmapped = FALSE;
6821
6822 vm_object_paging_end(page->object);
6823 }
6824
6825 #if DEVELOPMENT || DEBUG
6826 unsigned long upl_encrypt_upls = 0;
6827 unsigned long upl_encrypt_pages = 0;
6828 #endif
6829
6830 /*
6831 * ENCRYPTED SWAP:
6832 *
6833 * upl_encrypt:
6834 * Encrypts all the pages in the UPL, within the specified range.
6835 *
6836 */
6837 void
6838 upl_encrypt(
6839 upl_t upl,
6840 upl_offset_t crypt_offset,
6841 upl_size_t crypt_size)
6842 {
6843 upl_size_t upl_size, subupl_size=crypt_size;
6844 upl_offset_t offset_in_upl, subupl_offset=crypt_offset;
6845 vm_object_t upl_object;
6846 vm_object_offset_t upl_offset;
6847 vm_page_t page;
6848 vm_object_t shadow_object;
6849 vm_object_offset_t shadow_offset;
6850 vm_object_offset_t paging_offset;
6851 vm_object_offset_t base_offset;
6852 int isVectorUPL = 0;
6853 upl_t vector_upl = NULL;
6854
6855 if((isVectorUPL = vector_upl_is_valid(upl)))
6856 vector_upl = upl;
6857
6858 process_upl_to_encrypt:
6859 if(isVectorUPL) {
6860 crypt_size = subupl_size;
6861 crypt_offset = subupl_offset;
6862 upl = vector_upl_subupl_byoffset(vector_upl, &crypt_offset, &crypt_size);
6863 if(upl == NULL)
6864 panic("upl_encrypt: Accessing a sub-upl that doesn't exist\n");
6865 subupl_size -= crypt_size;
6866 subupl_offset += crypt_size;
6867 }
6868
6869 #if DEVELOPMENT || DEBUG
6870 upl_encrypt_upls++;
6871 upl_encrypt_pages += crypt_size / PAGE_SIZE;
6872 #endif
6873 upl_object = upl->map_object;
6874 upl_offset = upl->offset;
6875 upl_size = upl->size;
6876
6877 vm_object_lock(upl_object);
6878
6879 /*
6880 * Find the VM object that contains the actual pages.
6881 */
6882 if (upl_object->pageout) {
6883 shadow_object = upl_object->shadow;
6884 /*
6885 * The offset in the shadow object is actually also
6886 * accounted for in upl->offset. It possibly shouldn't be
6887 * this way, but for now don't account for it twice.
6888 */
6889 shadow_offset = 0;
6890 assert(upl_object->paging_offset == 0); /* XXX ? */
6891 vm_object_lock(shadow_object);
6892 } else {
6893 shadow_object = upl_object;
6894 shadow_offset = 0;
6895 }
6896
6897 paging_offset = shadow_object->paging_offset;
6898 vm_object_paging_begin(shadow_object);
6899
6900 if (shadow_object != upl_object)
6901 vm_object_unlock(upl_object);
6902
6903
6904 base_offset = shadow_offset;
6905 base_offset += upl_offset;
6906 base_offset += crypt_offset;
6907 base_offset -= paging_offset;
6908
6909 assert(crypt_offset + crypt_size <= upl_size);
6910
6911 for (offset_in_upl = 0;
6912 offset_in_upl < crypt_size;
6913 offset_in_upl += PAGE_SIZE) {
6914 page = vm_page_lookup(shadow_object,
6915 base_offset + offset_in_upl);
6916 if (page == VM_PAGE_NULL) {
6917 panic("upl_encrypt: "
6918 "no page for (obj=%p,off=%lld+%d)!\n",
6919 shadow_object,
6920 base_offset,
6921 offset_in_upl);
6922 }
6923 /*
6924 * Disconnect the page from all pmaps, so that nobody can
6925 * access it while it's encrypted. After that point, all
6926 * accesses to this page will cause a page fault and block
6927 * while the page is busy being encrypted. After the
6928 * encryption completes, any access will cause a
6929 * page fault and the page gets decrypted at that time.
6930 */
6931 pmap_disconnect(page->phys_page);
6932 vm_page_encrypt(page, 0);
6933
6934 if (vm_object_lock_avoid(shadow_object)) {
6935 /*
6936 * Give vm_pageout_scan() a chance to convert more
6937 * pages from "clean-in-place" to "clean-and-free",
6938 * if it's interested in the same pages we selected
6939 * in this cluster.
6940 */
6941 vm_object_unlock(shadow_object);
6942 mutex_pause(2);
6943 vm_object_lock(shadow_object);
6944 }
6945 }
6946
6947 vm_object_paging_end(shadow_object);
6948 vm_object_unlock(shadow_object);
6949
6950 if(isVectorUPL && subupl_size)
6951 goto process_upl_to_encrypt;
6952 }
6953
6954 #else /* CRYPTO */
6955 void
6956 upl_encrypt(
6957 __unused upl_t upl,
6958 __unused upl_offset_t crypt_offset,
6959 __unused upl_size_t crypt_size)
6960 {
6961 }
6962
6963 void
6964 vm_page_encrypt(
6965 __unused vm_page_t page,
6966 __unused vm_map_offset_t kernel_mapping_offset)
6967 {
6968 }
6969
6970 void
6971 vm_page_decrypt(
6972 __unused vm_page_t page,
6973 __unused vm_map_offset_t kernel_mapping_offset)
6974 {
6975 }
6976
6977 #endif /* CRYPTO */
6978
6979 void
6980 vm_pageout_queue_steal(vm_page_t page, boolean_t queues_locked)
6981 {
6982 boolean_t pageout;
6983
6984 pageout = page->pageout;
6985
6986 page->list_req_pending = FALSE;
6987 page->cleaning = FALSE;
6988 page->pageout = FALSE;
6989
6990 if (!queues_locked) {
6991 vm_page_lockspin_queues();
6992 }
6993
6994 /*
6995 * need to drop the laundry count...
6996 * we may also need to remove it
6997 * from the I/O paging queue...
6998 * vm_pageout_throttle_up handles both cases
6999 *
7000 * the laundry and pageout_queue flags are cleared...
7001 */
7002 vm_pageout_throttle_up(page);
7003
7004 if (pageout == TRUE) {
7005 /*
7006 * toss the wire count we picked up
7007 * when we intially set this page up
7008 * to be cleaned...
7009 */
7010 vm_page_unwire(page, TRUE);
7011 }
7012 vm_page_steal_pageout_page++;
7013
7014 if (!queues_locked) {
7015 vm_page_unlock_queues();
7016 }
7017 }
7018
7019 upl_t
7020 vector_upl_create(vm_offset_t upl_offset)
7021 {
7022 int vector_upl_size = sizeof(struct _vector_upl);
7023 int i=0;
7024 upl_t upl;
7025 vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size);
7026
7027 upl = upl_create(0,UPL_VECTOR,0);
7028 upl->vector_upl = vector_upl;
7029 upl->offset = upl_offset;
7030 vector_upl->size = 0;
7031 vector_upl->offset = upl_offset;
7032 vector_upl->invalid_upls=0;
7033 vector_upl->num_upls=0;
7034 vector_upl->pagelist = NULL;
7035
7036 for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) {
7037 vector_upl->upl_iostates[i].size = 0;
7038 vector_upl->upl_iostates[i].offset = 0;
7039
7040 }
7041 return upl;
7042 }
7043
7044 void
7045 vector_upl_deallocate(upl_t upl)
7046 {
7047 if(upl) {
7048 vector_upl_t vector_upl = upl->vector_upl;
7049 if(vector_upl) {
7050 if(vector_upl->invalid_upls != vector_upl->num_upls)
7051 panic("Deallocating non-empty Vectored UPL\n");
7052 kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)));
7053 vector_upl->invalid_upls=0;
7054 vector_upl->num_upls = 0;
7055 vector_upl->pagelist = NULL;
7056 vector_upl->size = 0;
7057 vector_upl->offset = 0;
7058 kfree(vector_upl, sizeof(struct _vector_upl));
7059 vector_upl = (vector_upl_t)0xdeadbeef;
7060 }
7061 else
7062 panic("vector_upl_deallocate was passed a non-vectored upl\n");
7063 }
7064 else
7065 panic("vector_upl_deallocate was passed a NULL upl\n");
7066 }
7067
7068 boolean_t
7069 vector_upl_is_valid(upl_t upl)
7070 {
7071 if(upl && ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) {
7072 vector_upl_t vector_upl = upl->vector_upl;
7073 if(vector_upl == NULL || vector_upl == (vector_upl_t)0xdeadbeef || vector_upl == (vector_upl_t)0xfeedbeef)
7074 return FALSE;
7075 else
7076 return TRUE;
7077 }
7078 return FALSE;
7079 }
7080
7081 boolean_t
7082 vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size)
7083 {
7084 if(vector_upl_is_valid(upl)) {
7085 vector_upl_t vector_upl = upl->vector_upl;
7086
7087 if(vector_upl) {
7088 if(subupl) {
7089 if(io_size) {
7090 if(io_size < PAGE_SIZE)
7091 io_size = PAGE_SIZE;
7092 subupl->vector_upl = (void*)vector_upl;
7093 vector_upl->upl_elems[vector_upl->num_upls++] = subupl;
7094 vector_upl->size += io_size;
7095 upl->size += io_size;
7096 }
7097 else {
7098 uint32_t i=0,invalid_upls=0;
7099 for(i = 0; i < vector_upl->num_upls; i++) {
7100 if(vector_upl->upl_elems[i] == subupl)
7101 break;
7102 }
7103 if(i == vector_upl->num_upls)
7104 panic("Trying to remove sub-upl when none exists");
7105
7106 vector_upl->upl_elems[i] = NULL;
7107 invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1);
7108 if(invalid_upls == vector_upl->num_upls)
7109 return TRUE;
7110 else
7111 return FALSE;
7112 }
7113 }
7114 else
7115 panic("vector_upl_set_subupl was passed a NULL upl element\n");
7116 }
7117 else
7118 panic("vector_upl_set_subupl was passed a non-vectored upl\n");
7119 }
7120 else
7121 panic("vector_upl_set_subupl was passed a NULL upl\n");
7122
7123 return FALSE;
7124 }
7125
7126 void
7127 vector_upl_set_pagelist(upl_t upl)
7128 {
7129 if(vector_upl_is_valid(upl)) {
7130 uint32_t i=0;
7131 vector_upl_t vector_upl = upl->vector_upl;
7132
7133 if(vector_upl) {
7134 vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0;
7135
7136 vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE));
7137
7138 for(i=0; i < vector_upl->num_upls; i++) {
7139 cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE;
7140 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
7141 pagelist_size += cur_upl_pagelist_size;
7142 if(vector_upl->upl_elems[i]->highest_page > upl->highest_page)
7143 upl->highest_page = vector_upl->upl_elems[i]->highest_page;
7144 }
7145 assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) );
7146 }
7147 else
7148 panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
7149 }
7150 else
7151 panic("vector_upl_set_pagelist was passed a NULL upl\n");
7152
7153 }
7154
7155 upl_t
7156 vector_upl_subupl_byindex(upl_t upl, uint32_t index)
7157 {
7158 if(vector_upl_is_valid(upl)) {
7159 vector_upl_t vector_upl = upl->vector_upl;
7160 if(vector_upl) {
7161 if(index < vector_upl->num_upls)
7162 return vector_upl->upl_elems[index];
7163 }
7164 else
7165 panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
7166 }
7167 return NULL;
7168 }
7169
7170 upl_t
7171 vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size)
7172 {
7173 if(vector_upl_is_valid(upl)) {
7174 uint32_t i=0;
7175 vector_upl_t vector_upl = upl->vector_upl;
7176
7177 if(vector_upl) {
7178 upl_t subupl = NULL;
7179 vector_upl_iostates_t subupl_state;
7180
7181 for(i=0; i < vector_upl->num_upls; i++) {
7182 subupl = vector_upl->upl_elems[i];
7183 subupl_state = vector_upl->upl_iostates[i];
7184 if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) {
7185 /* We could have been passed an offset/size pair that belongs
7186 * to an UPL element that has already been committed/aborted.
7187 * If so, return NULL.
7188 */
7189 if(subupl == NULL)
7190 return NULL;
7191 if((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) {
7192 *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset;
7193 if(*upl_size > subupl_state.size)
7194 *upl_size = subupl_state.size;
7195 }
7196 if(*upl_offset >= subupl_state.offset)
7197 *upl_offset -= subupl_state.offset;
7198 else if(i)
7199 panic("Vector UPL offset miscalculation\n");
7200 return subupl;
7201 }
7202 }
7203 }
7204 else
7205 panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
7206 }
7207 return NULL;
7208 }
7209
7210 void
7211 vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr)
7212 {
7213 *v_upl_submap = NULL;
7214
7215 if(vector_upl_is_valid(upl)) {
7216 vector_upl_t vector_upl = upl->vector_upl;
7217 if(vector_upl) {
7218 *v_upl_submap = vector_upl->submap;
7219 *submap_dst_addr = vector_upl->submap_dst_addr;
7220 }
7221 else
7222 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
7223 }
7224 else
7225 panic("vector_upl_get_submap was passed a null UPL\n");
7226 }
7227
7228 void
7229 vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
7230 {
7231 if(vector_upl_is_valid(upl)) {
7232 vector_upl_t vector_upl = upl->vector_upl;
7233 if(vector_upl) {
7234 vector_upl->submap = submap;
7235 vector_upl->submap_dst_addr = submap_dst_addr;
7236 }
7237 else
7238 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
7239 }
7240 else
7241 panic("vector_upl_get_submap was passed a NULL UPL\n");
7242 }
7243
7244 void
7245 vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
7246 {
7247 if(vector_upl_is_valid(upl)) {
7248 uint32_t i = 0;
7249 vector_upl_t vector_upl = upl->vector_upl;
7250
7251 if(vector_upl) {
7252 for(i = 0; i < vector_upl->num_upls; i++) {
7253 if(vector_upl->upl_elems[i] == subupl)
7254 break;
7255 }
7256
7257 if(i == vector_upl->num_upls)
7258 panic("setting sub-upl iostate when none exists");
7259
7260 vector_upl->upl_iostates[i].offset = offset;
7261 if(size < PAGE_SIZE)
7262 size = PAGE_SIZE;
7263 vector_upl->upl_iostates[i].size = size;
7264 }
7265 else
7266 panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
7267 }
7268 else
7269 panic("vector_upl_set_iostate was passed a NULL UPL\n");
7270 }
7271
7272 void
7273 vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size)
7274 {
7275 if(vector_upl_is_valid(upl)) {
7276 uint32_t i = 0;
7277 vector_upl_t vector_upl = upl->vector_upl;
7278
7279 if(vector_upl) {
7280 for(i = 0; i < vector_upl->num_upls; i++) {
7281 if(vector_upl->upl_elems[i] == subupl)
7282 break;
7283 }
7284
7285 if(i == vector_upl->num_upls)
7286 panic("getting sub-upl iostate when none exists");
7287
7288 *offset = vector_upl->upl_iostates[i].offset;
7289 *size = vector_upl->upl_iostates[i].size;
7290 }
7291 else
7292 panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
7293 }
7294 else
7295 panic("vector_upl_get_iostate was passed a NULL UPL\n");
7296 }
7297
7298 void
7299 vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size)
7300 {
7301 if(vector_upl_is_valid(upl)) {
7302 vector_upl_t vector_upl = upl->vector_upl;
7303 if(vector_upl) {
7304 if(index < vector_upl->num_upls) {
7305 *offset = vector_upl->upl_iostates[index].offset;
7306 *size = vector_upl->upl_iostates[index].size;
7307 }
7308 else
7309 *offset = *size = 0;
7310 }
7311 else
7312 panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
7313 }
7314 else
7315 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
7316 }
7317
7318 upl_page_info_t *
7319 upl_get_internal_vectorupl_pagelist(upl_t upl)
7320 {
7321 return ((vector_upl_t)(upl->vector_upl))->pagelist;
7322 }
7323
7324 void *
7325 upl_get_internal_vectorupl(upl_t upl)
7326 {
7327 return upl->vector_upl;
7328 }
7329
7330 vm_size_t
7331 upl_get_internal_pagelist_offset(void)
7332 {
7333 return sizeof(struct upl);
7334 }
7335
7336 void
7337 upl_clear_dirty(
7338 upl_t upl,
7339 boolean_t value)
7340 {
7341 if (value) {
7342 upl->flags |= UPL_CLEAR_DIRTY;
7343 } else {
7344 upl->flags &= ~UPL_CLEAR_DIRTY;
7345 }
7346 }
7347
7348
7349 #ifdef MACH_BSD
7350
7351 boolean_t upl_device_page(upl_page_info_t *upl)
7352 {
7353 return(UPL_DEVICE_PAGE(upl));
7354 }
7355 boolean_t upl_page_present(upl_page_info_t *upl, int index)
7356 {
7357 return(UPL_PAGE_PRESENT(upl, index));
7358 }
7359 boolean_t upl_speculative_page(upl_page_info_t *upl, int index)
7360 {
7361 return(UPL_SPECULATIVE_PAGE(upl, index));
7362 }
7363 boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
7364 {
7365 return(UPL_DIRTY_PAGE(upl, index));
7366 }
7367 boolean_t upl_valid_page(upl_page_info_t *upl, int index)
7368 {
7369 return(UPL_VALID_PAGE(upl, index));
7370 }
7371 ppnum_t upl_phys_page(upl_page_info_t *upl, int index)
7372 {
7373 return(UPL_PHYS_PAGE(upl, index));
7374 }
7375
7376
7377 void
7378 vm_countdirtypages(void)
7379 {
7380 vm_page_t m;
7381 int dpages;
7382 int pgopages;
7383 int precpages;
7384
7385
7386 dpages=0;
7387 pgopages=0;
7388 precpages=0;
7389
7390 vm_page_lock_queues();
7391 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
7392 do {
7393 if (m ==(vm_page_t )0) break;
7394
7395 if(m->dirty) dpages++;
7396 if(m->pageout) pgopages++;
7397 if(m->precious) precpages++;
7398
7399 assert(m->object != kernel_object);
7400 m = (vm_page_t) queue_next(&m->pageq);
7401 if (m ==(vm_page_t )0) break;
7402
7403 } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
7404 vm_page_unlock_queues();
7405
7406 vm_page_lock_queues();
7407 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
7408 do {
7409 if (m ==(vm_page_t )0) break;
7410
7411 dpages++;
7412 assert(m->dirty);
7413 assert(!m->pageout);
7414 assert(m->object != kernel_object);
7415 m = (vm_page_t) queue_next(&m->pageq);
7416 if (m ==(vm_page_t )0) break;
7417
7418 } while (!queue_end(&vm_page_queue_throttled,(queue_entry_t) m));
7419 vm_page_unlock_queues();
7420
7421 vm_page_lock_queues();
7422 m = (vm_page_t) queue_first(&vm_page_queue_zf);
7423 do {
7424 if (m ==(vm_page_t )0) break;
7425
7426 if(m->dirty) dpages++;
7427 if(m->pageout) pgopages++;
7428 if(m->precious) precpages++;
7429
7430 assert(m->object != kernel_object);
7431 m = (vm_page_t) queue_next(&m->pageq);
7432 if (m ==(vm_page_t )0) break;
7433
7434 } while (!queue_end(&vm_page_queue_zf,(queue_entry_t) m));
7435 vm_page_unlock_queues();
7436
7437 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
7438
7439 dpages=0;
7440 pgopages=0;
7441 precpages=0;
7442
7443 vm_page_lock_queues();
7444 m = (vm_page_t) queue_first(&vm_page_queue_active);
7445
7446 do {
7447 if(m == (vm_page_t )0) break;
7448 if(m->dirty) dpages++;
7449 if(m->pageout) pgopages++;
7450 if(m->precious) precpages++;
7451
7452 assert(m->object != kernel_object);
7453 m = (vm_page_t) queue_next(&m->pageq);
7454 if(m == (vm_page_t )0) break;
7455
7456 } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
7457 vm_page_unlock_queues();
7458
7459 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
7460
7461 }
7462 #endif /* MACH_BSD */
7463
7464 ppnum_t upl_get_highest_page(
7465 upl_t upl)
7466 {
7467 return upl->highest_page;
7468 }
7469
7470 upl_size_t upl_get_size(
7471 upl_t upl)
7472 {
7473 return upl->size;
7474 }
7475
7476 #if UPL_DEBUG
7477 kern_return_t upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
7478 {
7479 upl->ubc_alias1 = alias1;
7480 upl->ubc_alias2 = alias2;
7481 return KERN_SUCCESS;
7482 }
7483 int upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
7484 {
7485 if(al)
7486 *al = upl->ubc_alias1;
7487 if(al2)
7488 *al2 = upl->ubc_alias2;
7489 return KERN_SUCCESS;
7490 }
7491 #endif /* UPL_DEBUG */
7492
7493
7494
7495 #if MACH_KDB
7496 #include <ddb/db_output.h>
7497 #include <ddb/db_print.h>
7498 #include <vm/vm_print.h>
7499
7500 #define printf kdbprintf
7501 void db_pageout(void);
7502
7503 void
7504 db_vm(void)
7505 {
7506
7507 iprintf("VM Statistics:\n");
7508 db_indent += 2;
7509 iprintf("pages:\n");
7510 db_indent += 2;
7511 iprintf("activ %5d inact %5d free %5d",
7512 vm_page_active_count, vm_page_inactive_count,
7513 vm_page_free_count);
7514 printf(" wire %5d gobbl %5d\n",
7515 vm_page_wire_count, vm_page_gobble_count);
7516 db_indent -= 2;
7517 iprintf("target:\n");
7518 db_indent += 2;
7519 iprintf("min %5d inact %5d free %5d",
7520 vm_page_free_min, vm_page_inactive_target,
7521 vm_page_free_target);
7522 printf(" resrv %5d\n", vm_page_free_reserved);
7523 db_indent -= 2;
7524 iprintf("pause:\n");
7525 db_pageout();
7526 db_indent -= 2;
7527 }
7528
7529 #if MACH_COUNTERS
7530 extern int c_laundry_pages_freed;
7531 #endif /* MACH_COUNTERS */
7532
7533 void
7534 db_pageout(void)
7535 {
7536 iprintf("Pageout Statistics:\n");
7537 db_indent += 2;
7538 iprintf("active %5d inactv %5d\n",
7539 vm_pageout_active, vm_pageout_inactive);
7540 iprintf("nolock %5d avoid %5d busy %5d absent %5d\n",
7541 vm_pageout_inactive_nolock, vm_pageout_inactive_avoid,
7542 vm_pageout_inactive_busy, vm_pageout_inactive_absent);
7543 iprintf("used %5d clean %5d dirty %5d\n",
7544 vm_pageout_inactive_used, vm_pageout_inactive_clean,
7545 vm_pageout_inactive_dirty);
7546 #if MACH_COUNTERS
7547 iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed);
7548 #endif /* MACH_COUNTERS */
7549 #if MACH_CLUSTER_STATS
7550 iprintf("Cluster Statistics:\n");
7551 db_indent += 2;
7552 iprintf("dirtied %5d cleaned %5d collisions %5d\n",
7553 vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned,
7554 vm_pageout_cluster_collisions);
7555 iprintf("clusters %5d conversions %5d\n",
7556 vm_pageout_cluster_clusters, vm_pageout_cluster_conversions);
7557 db_indent -= 2;
7558 iprintf("Target Statistics:\n");
7559 db_indent += 2;
7560 iprintf("collisions %5d page_dirtied %5d page_freed %5d\n",
7561 vm_pageout_target_collisions, vm_pageout_target_page_dirtied,
7562 vm_pageout_target_page_freed);
7563 db_indent -= 2;
7564 #endif /* MACH_CLUSTER_STATS */
7565 db_indent -= 2;
7566 }
7567
7568 #endif /* MACH_KDB */