2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * File: vm/vm_pageout.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 * The proverbial page-out daemon.
69 #include <mach_pagemap.h>
70 #include <mach_cluster_stats.h>
72 #include <advisory_pageout.h>
74 #include <mach/mach_types.h>
75 #include <mach/memory_object.h>
76 #include <mach/memory_object_default.h>
77 #include <mach/memory_object_control_server.h>
78 #include <mach/mach_host_server.h>
80 #include <mach/vm_map.h>
81 #include <mach/vm_param.h>
82 #include <mach/vm_statistics.h>
84 #include <kern/kern_types.h>
85 #include <kern/counters.h>
86 #include <kern/host_statistics.h>
87 #include <kern/machine.h>
88 #include <kern/misc_protos.h>
89 #include <kern/thread.h>
91 #include <kern/kalloc.h>
93 #include <machine/vm_tuning.h>
96 #include <vm/vm_fault.h>
97 #include <vm/vm_map.h>
98 #include <vm/vm_object.h>
99 #include <vm/vm_page.h>
100 #include <vm/vm_pageout.h>
101 #include <vm/vm_protos.h> /* must be last */
106 #include <../bsd/crypto/aes/aes.h>
108 extern ipc_port_t memory_manager_default
;
111 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE
112 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 10000 /* maximum iterations of the active queue to move pages to inactive */
115 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE
116 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096 /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
119 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
120 #define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
123 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
124 #define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
127 #ifndef VM_PAGE_LAUNDRY_MAX
128 #define VM_PAGE_LAUNDRY_MAX 16UL /* maximum pageouts on a given pageout queue */
129 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
131 #ifndef VM_PAGEOUT_BURST_WAIT
132 #define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
133 #endif /* VM_PAGEOUT_BURST_WAIT */
135 #ifndef VM_PAGEOUT_EMPTY_WAIT
136 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
137 #endif /* VM_PAGEOUT_EMPTY_WAIT */
139 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
140 #define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
141 #endif /* VM_PAGEOUT_DEADLOCK_WAIT */
143 #ifndef VM_PAGEOUT_IDLE_WAIT
144 #define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
145 #endif /* VM_PAGEOUT_IDLE_WAIT */
149 * To obtain a reasonable LRU approximation, the inactive queue
150 * needs to be large enough to give pages on it a chance to be
151 * referenced a second time. This macro defines the fraction
152 * of active+inactive pages that should be inactive.
153 * The pageout daemon uses it to update vm_page_inactive_target.
155 * If vm_page_free_count falls below vm_page_free_target and
156 * vm_page_inactive_count is below vm_page_inactive_target,
157 * then the pageout daemon starts running.
160 #ifndef VM_PAGE_INACTIVE_TARGET
161 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3)
162 #endif /* VM_PAGE_INACTIVE_TARGET */
165 * Once the pageout daemon starts running, it keeps going
166 * until vm_page_free_count meets or exceeds vm_page_free_target.
169 #ifndef VM_PAGE_FREE_TARGET
170 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
171 #endif /* VM_PAGE_FREE_TARGET */
174 * The pageout daemon always starts running once vm_page_free_count
175 * falls below vm_page_free_min.
178 #ifndef VM_PAGE_FREE_MIN
179 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
180 #endif /* VM_PAGE_FREE_MIN */
183 * When vm_page_free_count falls below vm_page_free_reserved,
184 * only vm-privileged threads can allocate pages. vm-privilege
185 * allows the pageout daemon and default pager (and any other
186 * associated threads needed for default pageout) to continue
187 * operation by dipping into the reserved pool of pages.
190 #ifndef VM_PAGE_FREE_RESERVED
191 #define VM_PAGE_FREE_RESERVED(n) \
192 ((6 * VM_PAGE_LAUNDRY_MAX) + (n))
193 #endif /* VM_PAGE_FREE_RESERVED */
197 * must hold the page queues lock to
198 * manipulate this structure
200 struct vm_pageout_queue
{
201 queue_head_t pgo_pending
; /* laundry pages to be processed by pager's iothread */
202 unsigned int pgo_laundry
; /* current count of laundry pages on queue or in flight */
203 unsigned int pgo_maxlaundry
;
205 unsigned int pgo_idle
:1, /* iothread is blocked waiting for work to do */
206 pgo_busy
:1, /* iothread is currently processing request from pgo_pending */
207 pgo_throttled
:1,/* vm_pageout_scan thread needs a wakeup when pgo_laundry drops */
211 #define VM_PAGE_Q_THROTTLED(q) \
212 ((q)->pgo_laundry >= (q)->pgo_maxlaundry)
216 * Exported variable used to broadcast the activation of the pageout scan
217 * Working Set uses this to throttle its use of pmap removes. In this
218 * way, code which runs within memory in an uncontested context does
219 * not keep encountering soft faults.
222 unsigned int vm_pageout_scan_event_counter
= 0;
225 * Forward declarations for internal routines.
228 static void vm_pageout_garbage_collect(int);
229 static void vm_pageout_iothread_continue(struct vm_pageout_queue
*);
230 static void vm_pageout_iothread_external(void);
231 static void vm_pageout_iothread_internal(void);
232 static void vm_pageout_queue_steal(vm_page_t
);
234 extern void vm_pageout_continue(void);
235 extern void vm_pageout_scan(void);
237 unsigned int vm_pageout_reserved_internal
= 0;
238 unsigned int vm_pageout_reserved_really
= 0;
240 unsigned int vm_pageout_idle_wait
= 0; /* milliseconds */
241 unsigned int vm_pageout_empty_wait
= 0; /* milliseconds */
242 unsigned int vm_pageout_burst_wait
= 0; /* milliseconds */
243 unsigned int vm_pageout_deadlock_wait
= 0; /* milliseconds */
244 unsigned int vm_pageout_deadlock_relief
= 0;
245 unsigned int vm_pageout_inactive_relief
= 0;
246 unsigned int vm_pageout_burst_active_throttle
= 0;
247 unsigned int vm_pageout_burst_inactive_throttle
= 0;
250 * Protection against zero fill flushing live working sets derived
251 * from existing backing store and files
253 unsigned int vm_accellerate_zf_pageout_trigger
= 400;
254 unsigned int vm_zf_iterator
;
255 unsigned int vm_zf_iterator_count
= 40;
256 unsigned int last_page_zf
;
257 unsigned int vm_zf_count
= 0;
260 * These variables record the pageout daemon's actions:
261 * how many pages it looks at and what happens to those pages.
262 * No locking needed because only one thread modifies the variables.
265 unsigned int vm_pageout_active
= 0; /* debugging */
266 unsigned int vm_pageout_inactive
= 0; /* debugging */
267 unsigned int vm_pageout_inactive_throttled
= 0; /* debugging */
268 unsigned int vm_pageout_inactive_forced
= 0; /* debugging */
269 unsigned int vm_pageout_inactive_nolock
= 0; /* debugging */
270 unsigned int vm_pageout_inactive_avoid
= 0; /* debugging */
271 unsigned int vm_pageout_inactive_busy
= 0; /* debugging */
272 unsigned int vm_pageout_inactive_absent
= 0; /* debugging */
273 unsigned int vm_pageout_inactive_used
= 0; /* debugging */
274 unsigned int vm_pageout_inactive_clean
= 0; /* debugging */
275 unsigned int vm_pageout_inactive_dirty
= 0; /* debugging */
276 unsigned int vm_pageout_dirty_no_pager
= 0; /* debugging */
277 unsigned int vm_pageout_purged_objects
= 0; /* debugging */
278 unsigned int vm_stat_discard
= 0; /* debugging */
279 unsigned int vm_stat_discard_sent
= 0; /* debugging */
280 unsigned int vm_stat_discard_failure
= 0; /* debugging */
281 unsigned int vm_stat_discard_throttle
= 0; /* debugging */
283 unsigned int vm_pageout_scan_active_throttled
= 0;
284 unsigned int vm_pageout_scan_inactive_throttled
= 0;
285 unsigned int vm_pageout_scan_throttle
= 0; /* debugging */
286 unsigned int vm_pageout_scan_burst_throttle
= 0; /* debugging */
287 unsigned int vm_pageout_scan_empty_throttle
= 0; /* debugging */
288 unsigned int vm_pageout_scan_deadlock_detected
= 0; /* debugging */
289 unsigned int vm_pageout_scan_active_throttle_success
= 0; /* debugging */
290 unsigned int vm_pageout_scan_inactive_throttle_success
= 0; /* debugging */
292 * Backing store throttle when BS is exhausted
294 unsigned int vm_backing_store_low
= 0;
296 unsigned int vm_pageout_out_of_line
= 0;
297 unsigned int vm_pageout_in_place
= 0;
301 * counters and statistics...
303 unsigned long vm_page_decrypt_counter
= 0;
304 unsigned long vm_page_decrypt_for_upl_counter
= 0;
305 unsigned long vm_page_encrypt_counter
= 0;
306 unsigned long vm_page_encrypt_abort_counter
= 0;
307 unsigned long vm_page_encrypt_already_encrypted_counter
= 0;
308 boolean_t vm_pages_encrypted
= FALSE
; /* are there encrypted pages ? */
311 struct vm_pageout_queue vm_pageout_queue_internal
;
312 struct vm_pageout_queue vm_pageout_queue_external
;
316 * Routine: vm_backing_store_disable
318 * Suspend non-privileged threads wishing to extend
319 * backing store when we are low on backing store
320 * (Synchronized by caller)
323 vm_backing_store_disable(
327 vm_backing_store_low
= 1;
329 if(vm_backing_store_low
) {
330 vm_backing_store_low
= 0;
331 thread_wakeup((event_t
) &vm_backing_store_low
);
338 * Routine: vm_pageout_object_allocate
340 * Allocate an object for use as out-of-line memory in a
341 * data_return/data_initialize message.
342 * The page must be in an unlocked object.
344 * If the page belongs to a trusted pager, cleaning in place
345 * will be used, which utilizes a special "pageout object"
346 * containing private alias pages for the real page frames.
347 * Untrusted pagers use normal out-of-line memory.
350 vm_pageout_object_allocate(
353 vm_object_offset_t offset
)
355 vm_object_t object
= m
->object
;
356 vm_object_t new_object
;
358 assert(object
->pager_ready
);
360 new_object
= vm_object_allocate(size
);
362 if (object
->pager_trusted
) {
363 assert (offset
< object
->size
);
365 vm_object_lock(new_object
);
366 new_object
->pageout
= TRUE
;
367 new_object
->shadow
= object
;
368 new_object
->can_persist
= FALSE
;
369 new_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
370 new_object
->shadow_offset
= offset
;
371 vm_object_unlock(new_object
);
374 * Take a paging reference on the object. This will be dropped
375 * in vm_pageout_object_terminate()
377 vm_object_lock(object
);
378 vm_object_paging_begin(object
);
379 vm_page_lock_queues();
380 vm_page_unlock_queues();
381 vm_object_unlock(object
);
383 vm_pageout_in_place
++;
385 vm_pageout_out_of_line
++;
389 #if MACH_CLUSTER_STATS
390 unsigned long vm_pageout_cluster_dirtied
= 0;
391 unsigned long vm_pageout_cluster_cleaned
= 0;
392 unsigned long vm_pageout_cluster_collisions
= 0;
393 unsigned long vm_pageout_cluster_clusters
= 0;
394 unsigned long vm_pageout_cluster_conversions
= 0;
395 unsigned long vm_pageout_target_collisions
= 0;
396 unsigned long vm_pageout_target_page_dirtied
= 0;
397 unsigned long vm_pageout_target_page_freed
= 0;
398 #define CLUSTER_STAT(clause) clause
399 #else /* MACH_CLUSTER_STATS */
400 #define CLUSTER_STAT(clause)
401 #endif /* MACH_CLUSTER_STATS */
404 * Routine: vm_pageout_object_terminate
406 * Destroy the pageout_object allocated by
407 * vm_pageout_object_allocate(), and perform all of the
408 * required cleanup actions.
411 * The object must be locked, and will be returned locked.
414 vm_pageout_object_terminate(
417 vm_object_t shadow_object
;
418 boolean_t shadow_internal
;
421 * Deal with the deallocation (last reference) of a pageout object
422 * (used for cleaning-in-place) by dropping the paging references/
423 * freeing pages in the original object.
426 assert(object
->pageout
);
427 shadow_object
= object
->shadow
;
428 vm_object_lock(shadow_object
);
429 shadow_internal
= shadow_object
->internal
;
431 while (!queue_empty(&object
->memq
)) {
433 vm_object_offset_t offset
;
435 p
= (vm_page_t
) queue_first(&object
->memq
);
440 assert(!p
->cleaning
);
446 m
= vm_page_lookup(shadow_object
,
447 offset
+ object
->shadow_offset
);
449 if(m
== VM_PAGE_NULL
)
452 /* used as a trigger on upl_commit etc to recognize the */
453 /* pageout daemon's subseqent desire to pageout a cleaning */
454 /* page. When the bit is on the upl commit code will */
455 /* respect the pageout bit in the target page over the */
456 /* caller's page list indication */
457 m
->dump_cleaning
= FALSE
;
460 * Account for the paging reference taken when
461 * m->cleaning was set on this page.
463 vm_object_paging_end(shadow_object
);
464 assert((m
->dirty
) || (m
->precious
) ||
465 (m
->busy
&& m
->cleaning
));
468 * Handle the trusted pager throttle.
469 * Also decrement the burst throttle (if external).
471 vm_page_lock_queues();
473 vm_pageout_throttle_up(m
);
477 * Handle the "target" page(s). These pages are to be freed if
478 * successfully cleaned. Target pages are always busy, and are
479 * wired exactly once. The initial target pages are not mapped,
480 * (so cannot be referenced or modified) but converted target
481 * pages may have been modified between the selection as an
482 * adjacent page and conversion to a target.
486 assert(m
->wire_count
== 1);
489 #if MACH_CLUSTER_STATS
490 if (m
->wanted
) vm_pageout_target_collisions
++;
493 * Revoke all access to the page. Since the object is
494 * locked, and the page is busy, this prevents the page
495 * from being dirtied after the pmap_disconnect() call
498 * Since the page is left "dirty" but "not modifed", we
499 * can detect whether the page was redirtied during
500 * pageout by checking the modify state.
502 if (pmap_disconnect(m
->phys_page
) & VM_MEM_MODIFIED
)
508 CLUSTER_STAT(vm_pageout_target_page_dirtied
++;)
509 vm_page_unwire(m
);/* reactivates */
510 VM_STAT(reactivations
++);
513 CLUSTER_STAT(vm_pageout_target_page_freed
++;)
514 vm_page_free(m
);/* clears busy, etc. */
516 vm_page_unlock_queues();
520 * Handle the "adjacent" pages. These pages were cleaned in
521 * place, and should be left alone.
522 * If prep_pin_count is nonzero, then someone is using the
523 * page, so make it active.
525 if (!m
->active
&& !m
->inactive
&& !m
->private) {
529 vm_page_deactivate(m
);
531 if((m
->busy
) && (m
->cleaning
)) {
533 /* the request_page_list case, (COPY_OUT_FROM FALSE) */
536 /* We do not re-set m->dirty ! */
537 /* The page was busy so no extraneous activity */
538 /* could have occurred. COPY_INTO is a read into the */
539 /* new pages. CLEAN_IN_PLACE does actually write */
540 /* out the pages but handling outside of this code */
541 /* will take care of resetting dirty. We clear the */
542 /* modify however for the Programmed I/O case. */
543 pmap_clear_modify(m
->phys_page
);
546 if(shadow_object
->absent_count
== 1)
547 vm_object_absent_release(shadow_object
);
549 shadow_object
->absent_count
--;
551 m
->overwriting
= FALSE
;
552 } else if (m
->overwriting
) {
553 /* alternate request page list, write to page_list */
554 /* case. Occurs when the original page was wired */
555 /* at the time of the list request */
556 assert(m
->wire_count
!= 0);
557 vm_page_unwire(m
);/* reactivates */
558 m
->overwriting
= FALSE
;
561 * Set the dirty state according to whether or not the page was
562 * modified during the pageout. Note that we purposefully do
563 * NOT call pmap_clear_modify since the page is still mapped.
564 * If the page were to be dirtied between the 2 calls, this
565 * this fact would be lost. This code is only necessary to
566 * maintain statistics, since the pmap module is always
567 * consulted if m->dirty is false.
569 #if MACH_CLUSTER_STATS
570 m
->dirty
= pmap_is_modified(m
->phys_page
);
572 if (m
->dirty
) vm_pageout_cluster_dirtied
++;
573 else vm_pageout_cluster_cleaned
++;
574 if (m
->wanted
) vm_pageout_cluster_collisions
++;
582 * Wakeup any thread waiting for the page to be un-cleaning.
585 vm_page_unlock_queues();
588 * Account for the paging reference taken in vm_paging_object_allocate.
590 vm_object_paging_end(shadow_object
);
591 vm_object_unlock(shadow_object
);
593 assert(object
->ref_count
== 0);
594 assert(object
->paging_in_progress
== 0);
595 assert(object
->resident_page_count
== 0);
600 * Routine: vm_pageout_setup
602 * Set up a page for pageout (clean & flush).
604 * Move the page to a new object, as part of which it will be
605 * sent to its memory manager in a memory_object_data_write or
606 * memory_object_initialize message.
608 * The "new_object" and "new_offset" arguments
609 * indicate where the page should be moved.
612 * The page in question must not be on any pageout queues,
613 * and must be busy. The object to which it belongs
614 * must be unlocked, and the caller must hold a paging
615 * reference to it. The new_object must not be locked.
617 * This routine returns a pointer to a place-holder page,
618 * inserted at the same offset, to block out-of-order
619 * requests for the page. The place-holder page must
620 * be freed after the data_write or initialize message
623 * The original page is put on a paging queue and marked
628 register vm_page_t m
,
629 register vm_object_t new_object
,
630 vm_object_offset_t new_offset
)
632 register vm_object_t old_object
= m
->object
;
633 vm_object_offset_t paging_offset
;
634 vm_object_offset_t offset
;
635 register vm_page_t holding_page
;
636 register vm_page_t new_m
;
637 boolean_t need_to_wire
= FALSE
;
641 "vm_pageout_setup, obj 0x%X off 0x%X page 0x%X new obj 0x%X offset 0x%X\n",
642 (integer_t
)m
->object
, (integer_t
)m
->offset
,
643 (integer_t
)m
, (integer_t
)new_object
,
644 (integer_t
)new_offset
);
645 assert(m
&& m
->busy
&& !m
->absent
&& !m
->fictitious
&& !m
->error
&&
648 assert(m
->dirty
|| m
->precious
);
651 * Create a place-holder page where the old one was, to prevent
652 * attempted pageins of this page while we're unlocked.
654 VM_PAGE_GRAB_FICTITIOUS(holding_page
);
656 vm_object_lock(old_object
);
659 paging_offset
= offset
+ old_object
->paging_offset
;
661 if (old_object
->pager_trusted
) {
663 * This pager is trusted, so we can clean this page
664 * in place. Leave it in the old object, and mark it
665 * cleaning & pageout.
667 new_m
= holding_page
;
668 holding_page
= VM_PAGE_NULL
;
671 * Set up new page to be private shadow of real page.
673 new_m
->phys_page
= m
->phys_page
;
674 new_m
->fictitious
= FALSE
;
675 new_m
->pageout
= TRUE
;
678 * Mark real page as cleaning (indicating that we hold a
679 * paging reference to be released via m_o_d_r_c) and
680 * pageout (indicating that the page should be freed
681 * when the pageout completes).
683 pmap_clear_modify(m
->phys_page
);
684 vm_page_lock_queues();
685 new_m
->private = TRUE
;
691 assert(m
->wire_count
== 1);
692 vm_page_unlock_queues();
696 m
->page_lock
= VM_PROT_NONE
;
698 m
->unlock_request
= VM_PROT_NONE
;
701 * Cannot clean in place, so rip the old page out of the
702 * object, and stick the holding page in. Set new_m to the
703 * page in the new object.
705 vm_page_lock_queues();
706 VM_PAGE_QUEUES_REMOVE(m
);
709 vm_page_insert(holding_page
, old_object
, offset
);
710 vm_page_unlock_queues();
715 new_m
->page_lock
= VM_PROT_NONE
;
716 new_m
->unlock_request
= VM_PROT_NONE
;
718 if (old_object
->internal
)
722 * Record that this page has been written out
725 vm_external_state_set(old_object
->existence_map
, offset
);
726 #endif /* MACH_PAGEMAP */
728 vm_object_unlock(old_object
);
730 vm_object_lock(new_object
);
733 * Put the page into the new object. If it is a not wired
734 * (if it's the real page) it will be activated.
737 vm_page_lock_queues();
738 vm_page_insert(new_m
, new_object
, new_offset
);
742 vm_page_activate(new_m
);
743 PAGE_WAKEUP_DONE(new_m
);
744 vm_page_unlock_queues();
746 vm_object_unlock(new_object
);
749 * Return the placeholder page to simplify cleanup.
751 return (holding_page
);
755 * Routine: vm_pageclean_setup
757 * Purpose: setup a page to be cleaned (made non-dirty), but not
758 * necessarily flushed from the VM page cache.
759 * This is accomplished by cleaning in place.
761 * The page must not be busy, and the object and page
762 * queues must be locked.
769 vm_object_t new_object
,
770 vm_object_offset_t new_offset
)
772 vm_object_t old_object
= m
->object
;
774 assert(!m
->cleaning
);
777 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
778 (integer_t
)old_object
, m
->offset
, (integer_t
)m
,
779 (integer_t
)new_m
, new_offset
);
781 pmap_clear_modify(m
->phys_page
);
782 vm_object_paging_begin(old_object
);
785 * Record that this page has been written out
788 vm_external_state_set(old_object
->existence_map
, m
->offset
);
789 #endif /*MACH_PAGEMAP*/
792 * Mark original page as cleaning in place.
799 * Convert the fictitious page to a private shadow of
802 assert(new_m
->fictitious
);
803 new_m
->fictitious
= FALSE
;
804 new_m
->private = TRUE
;
805 new_m
->pageout
= TRUE
;
806 new_m
->phys_page
= m
->phys_page
;
809 vm_page_insert(new_m
, new_object
, new_offset
);
810 assert(!new_m
->wanted
);
818 vm_object_t new_object
,
819 vm_object_offset_t new_offset
)
822 "vm_pageclean_copy, page 0x%X new_m 0x%X new_obj 0x%X offset 0x%X\n",
823 m
, new_m
, new_object
, new_offset
, 0);
825 assert((!m
->busy
) && (!m
->cleaning
));
827 assert(!new_m
->private && !new_m
->fictitious
);
829 pmap_clear_modify(m
->phys_page
);
832 vm_object_paging_begin(m
->object
);
833 vm_page_unlock_queues();
834 vm_object_unlock(m
->object
);
837 * Copy the original page to the new page.
839 vm_page_copy(m
, new_m
);
842 * Mark the old page as clean. A request to pmap_is_modified
843 * will get the right answer.
845 vm_object_lock(m
->object
);
848 vm_object_paging_end(m
->object
);
850 vm_page_lock_queues();
851 if (!m
->active
&& !m
->inactive
)
855 vm_page_insert(new_m
, new_object
, new_offset
);
856 vm_page_activate(new_m
);
857 new_m
->busy
= FALSE
; /* No other thread can be waiting */
862 * Routine: vm_pageout_initialize_page
864 * Causes the specified page to be initialized in
865 * the appropriate memory object. This routine is used to push
866 * pages into a copy-object when they are modified in the
869 * The page is moved to a temporary object and paged out.
872 * The page in question must not be on any pageout queues.
873 * The object to which it belongs must be locked.
874 * The page must be busy, but not hold a paging reference.
877 * Move this page to a completely new object.
880 vm_pageout_initialize_page(
884 vm_object_offset_t paging_offset
;
885 vm_page_t holding_page
;
889 "vm_pageout_initialize_page, page 0x%X\n",
890 (integer_t
)m
, 0, 0, 0, 0);
894 * Verify that we really want to clean this page
901 * Create a paging reference to let us play with the object.
904 paging_offset
= m
->offset
+ object
->paging_offset
;
905 vm_object_paging_begin(object
);
906 if (m
->absent
|| m
->error
|| m
->restart
||
907 (!m
->dirty
&& !m
->precious
)) {
909 panic("reservation without pageout?"); /* alan */
910 vm_object_unlock(object
);
914 /* set the page for future call to vm_fault_list_request */
916 vm_page_lock_queues();
917 pmap_clear_modify(m
->phys_page
);
920 m
->list_req_pending
= TRUE
;
924 vm_page_unlock_queues();
925 vm_object_unlock(object
);
928 * Write the data to its pager.
929 * Note that the data is passed by naming the new object,
930 * not a virtual address; the pager interface has been
931 * manipulated to use the "internal memory" data type.
932 * [The object reference from its allocation is donated
933 * to the eventual recipient.]
935 memory_object_data_initialize(object
->pager
,
939 vm_object_lock(object
);
942 #if MACH_CLUSTER_STATS
943 #define MAXCLUSTERPAGES 16
945 unsigned long pages_in_cluster
;
946 unsigned long pages_at_higher_offsets
;
947 unsigned long pages_at_lower_offsets
;
948 } cluster_stats
[MAXCLUSTERPAGES
];
949 #endif /* MACH_CLUSTER_STATS */
951 boolean_t allow_clustered_pageouts
= FALSE
;
954 * vm_pageout_cluster:
956 * Given a page, queue it to the appropriate I/O thread,
957 * which will page it out and attempt to clean adjacent pages
958 * in the same operation.
960 * The page must be busy, and the object and queues locked. We will take a
961 * paging reference to prevent deallocation or collapse when we
962 * release the object lock back at the call site. The I/O thread
963 * is responsible for consuming this reference
965 * The page must not be on any pageout queue.
969 vm_pageout_cluster(vm_page_t m
)
971 vm_object_t object
= m
->object
;
972 struct vm_pageout_queue
*q
;
976 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
977 (integer_t
)object
, m
->offset
, (integer_t
)m
, 0, 0);
980 * Only a certain kind of page is appreciated here.
982 assert(m
->busy
&& (m
->dirty
|| m
->precious
) && (m
->wire_count
== 0));
983 assert(!m
->cleaning
&& !m
->pageout
&& !m
->inactive
&& !m
->active
);
986 * protect the object from collapse -
987 * locking in the object's paging_offset.
989 vm_object_paging_begin(object
);
992 * set the page for future call to vm_fault_list_request
993 * page should already be marked busy
996 m
->list_req_pending
= TRUE
;
1001 if (object
->internal
== TRUE
)
1002 q
= &vm_pageout_queue_internal
;
1004 q
= &vm_pageout_queue_external
;
1007 m
->pageout_queue
= TRUE
;
1008 queue_enter(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
1010 if (q
->pgo_idle
== TRUE
) {
1011 q
->pgo_idle
= FALSE
;
1012 thread_wakeup((event_t
) &q
->pgo_pending
);
1017 unsigned long vm_pageout_throttle_up_count
= 0;
1020 * A page is back from laundry. See if there are some pages waiting to
1021 * go to laundry and if we can let some of them go now.
1023 * Object and page queues must be locked.
1026 vm_pageout_throttle_up(
1029 struct vm_pageout_queue
*q
;
1031 vm_pageout_throttle_up_count
++;
1034 assert(m
->object
!= VM_OBJECT_NULL
);
1035 assert(m
->object
!= kernel_object
);
1037 if (m
->object
->internal
== TRUE
)
1038 q
= &vm_pageout_queue_internal
;
1040 q
= &vm_pageout_queue_external
;
1045 if (q
->pgo_throttled
== TRUE
) {
1046 q
->pgo_throttled
= FALSE
;
1047 thread_wakeup((event_t
) &q
->pgo_laundry
);
1053 * vm_pageout_scan does the dirty work for the pageout daemon.
1054 * It returns with vm_page_queue_free_lock held and
1055 * vm_page_free_wanted == 0.
1058 #define DELAYED_UNLOCK_LIMIT (3 * MAX_UPL_TRANSFER)
1061 #define FCS_DELAYED 1
1062 #define FCS_DEADLOCK_DETECTED 2
1064 struct flow_control
{
1070 vm_pageout_scan(void)
1072 unsigned int loop_count
= 0;
1073 unsigned int inactive_burst_count
= 0;
1074 unsigned int active_burst_count
= 0;
1075 vm_page_t local_freeq
= 0;
1076 int local_freed
= 0;
1077 int delayed_unlock
= 0;
1078 int need_internal_inactive
= 0;
1079 int refmod_state
= 0;
1080 int vm_pageout_deadlock_target
= 0;
1081 struct vm_pageout_queue
*iq
;
1082 struct vm_pageout_queue
*eq
;
1083 struct flow_control flow_control
;
1084 boolean_t active_throttled
= FALSE
;
1085 boolean_t inactive_throttled
= FALSE
;
1087 unsigned int msecs
= 0;
1091 flow_control
.state
= FCS_IDLE
;
1092 iq
= &vm_pageout_queue_internal
;
1093 eq
= &vm_pageout_queue_external
;
1095 XPR(XPR_VM_PAGEOUT
, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1098 * We want to gradually dribble pages from the active queue
1099 * to the inactive queue. If we let the inactive queue get
1100 * very small, and then suddenly dump many pages into it,
1101 * those pages won't get a sufficient chance to be referenced
1102 * before we start taking them from the inactive queue.
1104 * We must limit the rate at which we send pages to the pagers.
1105 * data_write messages consume memory, for message buffers and
1106 * for map-copy objects. If we get too far ahead of the pagers,
1107 * we can potentially run out of memory.
1109 * We can use the laundry count to limit directly the number
1110 * of pages outstanding to the default pager. A similar
1111 * strategy for external pagers doesn't work, because
1112 * external pagers don't have to deallocate the pages sent them,
1113 * and because we might have to send pages to external pagers
1114 * even if they aren't processing writes. So we also
1115 * use a burst count to limit writes to external pagers.
1117 * When memory is very tight, we can't rely on external pagers to
1118 * clean pages. They probably aren't running, because they
1119 * aren't vm-privileged. If we kept sending dirty pages to them,
1120 * we could exhaust the free list.
1122 vm_page_lock_queues();
1128 * Recalculate vm_page_inactivate_target.
1130 vm_page_inactive_target
= VM_PAGE_INACTIVE_TARGET(vm_page_active_count
+
1131 vm_page_inactive_count
);
1137 if (delayed_unlock
== 0)
1138 vm_page_lock_queues();
1140 active_burst_count
= vm_page_active_count
;
1142 if (active_burst_count
> vm_pageout_burst_active_throttle
)
1143 active_burst_count
= vm_pageout_burst_active_throttle
;
1146 * Move pages from active to inactive.
1148 while ((need_internal_inactive
||
1149 vm_page_inactive_count
< vm_page_inactive_target
) &&
1150 !queue_empty(&vm_page_queue_active
) &&
1151 ((active_burst_count
--) > 0)) {
1153 vm_pageout_active
++;
1155 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
1157 assert(m
->active
&& !m
->inactive
);
1158 assert(!m
->laundry
);
1159 assert(m
->object
!= kernel_object
);
1162 * Try to lock object; since we've already got the
1163 * page queues lock, we can only 'try' for this one.
1164 * if the 'try' fails, we need to do a mutex_pause
1165 * to allow the owner of the object lock a chance to
1166 * run... otherwise, we're likely to trip over this
1167 * object in the same state as we work our way through
1168 * the queue... clumps of pages associated with the same
1169 * object are fairly typical on the inactive and active queues
1171 if (m
->object
!= object
) {
1172 if (object
!= NULL
) {
1173 vm_object_unlock(object
);
1176 if (!vm_object_lock_try(m
->object
)) {
1178 * move page to end of active queue and continue
1180 queue_remove(&vm_page_queue_active
, m
,
1182 queue_enter(&vm_page_queue_active
, m
,
1185 goto done_with_activepage
;
1190 * if the page is BUSY, then we pull it
1191 * off the active queue and leave it alone.
1192 * when BUSY is cleared, it will get stuck
1193 * back on the appropriate queue
1196 queue_remove(&vm_page_queue_active
, m
,
1198 m
->pageq
.next
= NULL
;
1199 m
->pageq
.prev
= NULL
;
1202 vm_page_active_count
--;
1205 goto done_with_activepage
;
1207 if (need_internal_inactive
) {
1209 * If we're unable to make forward progress
1210 * with the current set of pages on the
1211 * inactive queue due to busy objects or
1212 * throttled pageout queues, then
1213 * move a page that is already clean
1214 * or belongs to a pageout queue that
1215 * isn't currently throttled
1217 active_throttled
= FALSE
;
1219 if (object
->internal
) {
1220 if ((VM_PAGE_Q_THROTTLED(iq
) || !IP_VALID(memory_manager_default
)))
1221 active_throttled
= TRUE
;
1222 } else if (VM_PAGE_Q_THROTTLED(eq
)) {
1223 active_throttled
= TRUE
;
1225 if (active_throttled
== TRUE
) {
1227 refmod_state
= pmap_get_refmod(m
->phys_page
);
1229 if (refmod_state
& VM_MEM_REFERENCED
)
1230 m
->reference
= TRUE
;
1231 if (refmod_state
& VM_MEM_MODIFIED
)
1234 if (m
->dirty
|| m
->precious
) {
1236 * page is dirty and targets a THROTTLED queue
1237 * so all we can do is move it back to the
1238 * end of the active queue to get it out
1241 queue_remove(&vm_page_queue_active
, m
,
1243 queue_enter(&vm_page_queue_active
, m
,
1246 vm_pageout_scan_active_throttled
++;
1248 goto done_with_activepage
;
1251 vm_pageout_scan_active_throttle_success
++;
1252 need_internal_inactive
--;
1255 * Deactivate the page while holding the object
1256 * locked, so we know the page is still not busy.
1257 * This should prevent races between pmap_enter
1258 * and pmap_clear_reference. The page might be
1259 * absent or fictitious, but vm_page_deactivate
1262 vm_page_deactivate(m
);
1263 done_with_activepage
:
1264 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
1266 if (object
!= NULL
) {
1267 vm_object_unlock(object
);
1271 vm_page_free_list(local_freeq
);
1277 vm_page_unlock_queues();
1280 vm_page_lock_queues();
1282 * continue the while loop processing
1283 * the active queue... need to hold
1284 * the page queues lock
1292 /**********************************************************************
1293 * above this point we're playing with the active queue
1294 * below this point we're playing with the throttling mechanisms
1295 * and the inactive queue
1296 **********************************************************************/
1301 * We are done if we have met our target *and*
1302 * nobody is still waiting for a page.
1304 if (vm_page_free_count
+ local_freed
>= vm_page_free_target
) {
1305 if (object
!= NULL
) {
1306 vm_object_unlock(object
);
1310 vm_page_free_list(local_freeq
);
1315 mutex_lock(&vm_page_queue_free_lock
);
1317 if ((vm_page_free_count
>= vm_page_free_target
) &&
1318 (vm_page_free_wanted
== 0)) {
1320 vm_page_unlock_queues();
1322 thread_wakeup((event_t
) &vm_pageout_garbage_collect
);
1325 mutex_unlock(&vm_page_queue_free_lock
);
1330 * Sometimes we have to pause:
1331 * 1) No inactive pages - nothing to do.
1332 * 2) Flow control - default pageout queue is full
1333 * 3) Loop control - no acceptable pages found on the inactive queue
1334 * within the last vm_pageout_burst_inactive_throttle iterations
1336 if ((queue_empty(&vm_page_queue_inactive
) && queue_empty(&vm_page_queue_zf
))) {
1337 vm_pageout_scan_empty_throttle
++;
1338 msecs
= vm_pageout_empty_wait
;
1339 goto vm_pageout_scan_delay
;
1341 } else if (inactive_burst_count
>= vm_pageout_burst_inactive_throttle
) {
1342 vm_pageout_scan_burst_throttle
++;
1343 msecs
= vm_pageout_burst_wait
;
1344 goto vm_pageout_scan_delay
;
1346 } else if (VM_PAGE_Q_THROTTLED(iq
)) {
1348 switch (flow_control
.state
) {
1351 reset_deadlock_timer
:
1352 ts
.tv_sec
= vm_pageout_deadlock_wait
/ 1000;
1353 ts
.tv_nsec
= (vm_pageout_deadlock_wait
% 1000) * 1000 * NSEC_PER_USEC
;
1354 clock_get_system_nanotime(
1355 &flow_control
.ts
.tv_sec
,
1356 (uint32_t *) &flow_control
.ts
.tv_nsec
);
1357 ADD_MACH_TIMESPEC(&flow_control
.ts
, &ts
);
1359 flow_control
.state
= FCS_DELAYED
;
1360 msecs
= vm_pageout_deadlock_wait
;
1365 clock_get_system_nanotime(
1367 (uint32_t *) &ts
.tv_nsec
);
1369 if (CMP_MACH_TIMESPEC(&ts
, &flow_control
.ts
) >= 0) {
1371 * the pageout thread for the default pager is potentially
1372 * deadlocked since the
1373 * default pager queue has been throttled for more than the
1374 * allowable time... we need to move some clean pages or dirty
1375 * pages belonging to the external pagers if they aren't throttled
1376 * vm_page_free_wanted represents the number of threads currently
1377 * blocked waiting for pages... we'll move one page for each of
1378 * these plus a fixed amount to break the logjam... once we're done
1379 * moving this number of pages, we'll re-enter the FSC_DELAYED state
1380 * with a new timeout target since we have no way of knowing
1381 * whether we've broken the deadlock except through observation
1382 * of the queue associated with the default pager... we need to
1383 * stop moving pagings and allow the system to run to see what
1384 * state it settles into.
1386 vm_pageout_deadlock_target
= vm_pageout_deadlock_relief
+ vm_page_free_wanted
;
1387 vm_pageout_scan_deadlock_detected
++;
1388 flow_control
.state
= FCS_DEADLOCK_DETECTED
;
1390 thread_wakeup((event_t
) &vm_pageout_garbage_collect
);
1391 goto consider_inactive
;
1394 * just resniff instead of trying
1395 * to compute a new delay time... we're going to be
1396 * awakened immediately upon a laundry completion,
1397 * so we won't wait any longer than necessary
1399 msecs
= vm_pageout_idle_wait
;
1402 case FCS_DEADLOCK_DETECTED
:
1403 if (vm_pageout_deadlock_target
)
1404 goto consider_inactive
;
1405 goto reset_deadlock_timer
;
1408 vm_pageout_scan_throttle
++;
1409 iq
->pgo_throttled
= TRUE
;
1410 vm_pageout_scan_delay
:
1411 if (object
!= NULL
) {
1412 vm_object_unlock(object
);
1416 vm_page_free_list(local_freeq
);
1421 assert_wait_timeout((event_t
) &iq
->pgo_laundry
, THREAD_INTERRUPTIBLE
, msecs
, 1000*NSEC_PER_USEC
);
1423 counter(c_vm_pageout_scan_block
++);
1425 vm_page_unlock_queues();
1427 thread_block(THREAD_CONTINUE_NULL
);
1429 vm_page_lock_queues();
1432 iq
->pgo_throttled
= FALSE
;
1434 if (loop_count
>= vm_page_inactive_count
) {
1435 if (VM_PAGE_Q_THROTTLED(eq
) || VM_PAGE_Q_THROTTLED(iq
)) {
1437 * Make sure we move enough "appropriate"
1438 * pages to the inactive queue before trying
1441 need_internal_inactive
= vm_pageout_inactive_relief
;
1445 inactive_burst_count
= 0;
1452 flow_control
.state
= FCS_IDLE
;
1455 inactive_burst_count
++;
1456 vm_pageout_inactive
++;
1458 if (!queue_empty(&vm_page_queue_inactive
)) {
1459 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
1461 if (m
->clustered
&& (m
->no_isync
== TRUE
)) {
1465 if (vm_zf_count
< vm_accellerate_zf_pageout_trigger
) {
1469 if((vm_zf_iterator
+=1) >= vm_zf_iterator_count
) {
1473 if (queue_empty(&vm_page_queue_zf
) ||
1474 (((last_page_zf
) || (vm_zf_iterator
== 0)) &&
1475 !queue_empty(&vm_page_queue_inactive
))) {
1476 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
1479 m
= (vm_page_t
) queue_first(&vm_page_queue_zf
);
1483 assert(!m
->active
&& m
->inactive
);
1484 assert(!m
->laundry
);
1485 assert(m
->object
!= kernel_object
);
1488 * Try to lock object; since we've alread got the
1489 * page queues lock, we can only 'try' for this one.
1490 * if the 'try' fails, we need to do a mutex_pause
1491 * to allow the owner of the object lock a chance to
1492 * run... otherwise, we're likely to trip over this
1493 * object in the same state as we work our way through
1494 * the queue... clumps of pages associated with the same
1495 * object are fairly typical on the inactive and active queues
1497 if (m
->object
!= object
) {
1498 if (object
!= NULL
) {
1499 vm_object_unlock(object
);
1502 if (!vm_object_lock_try(m
->object
)) {
1504 * Move page to end and continue.
1505 * Don't re-issue ticket
1508 queue_remove(&vm_page_queue_zf
, m
,
1510 queue_enter(&vm_page_queue_zf
, m
,
1513 queue_remove(&vm_page_queue_inactive
, m
,
1515 queue_enter(&vm_page_queue_inactive
, m
,
1518 vm_pageout_inactive_nolock
++;
1521 * force us to dump any collected free pages
1522 * and to pause before moving on
1524 delayed_unlock
= DELAYED_UNLOCK_LIMIT
+ 1;
1526 goto done_with_inactivepage
;
1531 * If the page belongs to a purgable object with no pending copies
1532 * against it, then we reap all of the pages in the object
1533 * and note that the object has been "emptied". It'll be up to the
1534 * application the discover this and recreate its contents if desired.
1536 if ((object
->purgable
== VM_OBJECT_PURGABLE_VOLATILE
||
1537 object
->purgable
== VM_OBJECT_PURGABLE_EMPTY
) &&
1538 object
->copy
== VM_OBJECT_NULL
) {
1540 (void) vm_object_purge(object
);
1541 vm_pageout_purged_objects
++;
1543 * we've just taken all of the pages from this object,
1544 * so drop the lock now since we're not going to find
1545 * any more pages belonging to it anytime soon
1547 vm_object_unlock(object
);
1550 inactive_burst_count
= 0;
1552 goto done_with_inactivepage
;
1556 * Paging out pages of external objects which
1557 * are currently being created must be avoided.
1558 * The pager may claim for memory, thus leading to a
1559 * possible dead lock between it and the pageout thread,
1560 * if such pages are finally chosen. The remaining assumption
1561 * is that there will finally be enough available pages in the
1562 * inactive pool to page out in order to satisfy all memory
1563 * claimed by the thread which concurrently creates the pager.
1565 if (!object
->pager_initialized
&& object
->pager_created
) {
1567 * Move page to end and continue, hoping that
1568 * there will be enough other inactive pages to
1569 * page out so that the thread which currently
1570 * initializes the pager will succeed.
1571 * Don't re-grant the ticket, the page should
1572 * pulled from the queue and paged out whenever
1573 * one of its logically adjacent fellows is
1577 queue_remove(&vm_page_queue_zf
, m
,
1579 queue_enter(&vm_page_queue_zf
, m
,
1582 vm_zf_iterator
= vm_zf_iterator_count
- 1;
1584 queue_remove(&vm_page_queue_inactive
, m
,
1586 queue_enter(&vm_page_queue_inactive
, m
,
1591 vm_pageout_inactive_avoid
++;
1593 goto done_with_inactivepage
;
1596 * Remove the page from the inactive list.
1599 queue_remove(&vm_page_queue_zf
, m
, vm_page_t
, pageq
);
1601 queue_remove(&vm_page_queue_inactive
, m
, vm_page_t
, pageq
);
1603 m
->pageq
.next
= NULL
;
1604 m
->pageq
.prev
= NULL
;
1605 m
->inactive
= FALSE
;
1607 vm_page_inactive_count
--;
1609 if (m
->busy
|| !object
->alive
) {
1611 * Somebody is already playing with this page.
1612 * Leave it off the pageout queues.
1614 vm_pageout_inactive_busy
++;
1616 goto done_with_inactivepage
;
1620 * If it's absent or in error, we can reclaim the page.
1623 if (m
->absent
|| m
->error
) {
1624 vm_pageout_inactive_absent
++;
1626 if (vm_pageout_deadlock_target
) {
1627 vm_pageout_scan_inactive_throttle_success
++;
1628 vm_pageout_deadlock_target
--;
1631 vm_page_remove(m
); /* clears tabled, object, offset */
1633 vm_object_absent_release(object
);
1635 assert(m
->pageq
.next
== NULL
&&
1636 m
->pageq
.prev
== NULL
);
1637 m
->pageq
.next
= (queue_entry_t
)local_freeq
;
1641 inactive_burst_count
= 0;
1643 goto done_with_inactivepage
;
1646 assert(!m
->private);
1647 assert(!m
->fictitious
);
1650 * If already cleaning this page in place, convert from
1651 * "adjacent" to "target". We can leave the page mapped,
1652 * and vm_pageout_object_terminate will determine whether
1653 * to free or reactivate.
1659 m
->dump_cleaning
= TRUE
;
1662 CLUSTER_STAT(vm_pageout_cluster_conversions
++);
1664 inactive_burst_count
= 0;
1666 goto done_with_inactivepage
;
1670 * If it's being used, reactivate.
1671 * (Fictitious pages are either busy or absent.)
1673 if ( (!m
->reference
) ) {
1674 refmod_state
= pmap_get_refmod(m
->phys_page
);
1676 if (refmod_state
& VM_MEM_REFERENCED
)
1677 m
->reference
= TRUE
;
1678 if (refmod_state
& VM_MEM_MODIFIED
)
1683 vm_page_activate(m
);
1684 VM_STAT(reactivations
++);
1686 vm_pageout_inactive_used
++;
1688 inactive_burst_count
= 0;
1690 goto done_with_inactivepage
;
1694 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
1695 (integer_t
)object
, (integer_t
)m
->offset
, (integer_t
)m
, 0,0);
1698 * we've got a candidate page to steal...
1700 * m->dirty is up to date courtesy of the
1701 * preceding check for m->reference... if
1702 * we get here, then m->reference had to be
1703 * FALSE which means we did a pmap_get_refmod
1704 * and updated both m->reference and m->dirty
1706 * if it's dirty or precious we need to
1707 * see if the target queue is throtttled
1708 * it if is, we need to skip over it by moving it back
1709 * to the end of the inactive queue
1711 inactive_throttled
= FALSE
;
1713 if (m
->dirty
|| m
->precious
) {
1714 if (object
->internal
) {
1715 if ((VM_PAGE_Q_THROTTLED(iq
) || !IP_VALID(memory_manager_default
)))
1716 inactive_throttled
= TRUE
;
1717 } else if (VM_PAGE_Q_THROTTLED(eq
)) {
1718 inactive_throttled
= TRUE
;
1721 if (inactive_throttled
== TRUE
) {
1723 queue_enter(&vm_page_queue_zf
, m
,
1726 queue_enter(&vm_page_queue_inactive
, m
,
1730 vm_page_inactive_count
++;
1733 vm_pageout_scan_inactive_throttled
++;
1735 goto done_with_inactivepage
;
1738 * we've got a page that we can steal...
1739 * eliminate all mappings and make sure
1740 * we have the up-to-date modified state
1741 * first take the page BUSY, so that no new
1742 * mappings can be made
1747 * if we need to do a pmap_disconnect then we
1748 * need to re-evaluate m->dirty since the pmap_disconnect
1749 * provides the true state atomically... the
1750 * page was still mapped up to the pmap_disconnect
1751 * and may have been dirtied at the last microsecond
1753 * we also check for the page being referenced 'late'
1754 * if it was, we first need to do a WAKEUP_DONE on it
1755 * since we already set m->busy = TRUE, before
1756 * going off to reactivate it
1758 * if we don't need the pmap_disconnect, then
1759 * m->dirty is up to date courtesy of the
1760 * earlier check for m->reference... if
1761 * we get here, then m->reference had to be
1762 * FALSE which means we did a pmap_get_refmod
1763 * and updated both m->reference and m->dirty...
1765 if (m
->no_isync
== FALSE
) {
1766 refmod_state
= pmap_disconnect(m
->phys_page
);
1768 if (refmod_state
& VM_MEM_MODIFIED
)
1770 if (refmod_state
& VM_MEM_REFERENCED
) {
1771 m
->reference
= TRUE
;
1773 PAGE_WAKEUP_DONE(m
);
1774 goto was_referenced
;
1778 * If it's clean and not precious, we can free the page.
1780 if (!m
->dirty
&& !m
->precious
) {
1781 vm_pageout_inactive_clean
++;
1784 vm_pageout_cluster(m
);
1786 vm_pageout_inactive_dirty
++;
1788 inactive_burst_count
= 0;
1790 done_with_inactivepage
:
1791 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
1793 if (object
!= NULL
) {
1794 vm_object_unlock(object
);
1798 vm_page_free_list(local_freeq
);
1804 vm_page_unlock_queues();
1808 * back to top of pageout scan loop
1814 int vm_page_free_count_init
;
1817 vm_page_free_reserve(
1820 int free_after_reserve
;
1822 vm_page_free_reserved
+= pages
;
1824 free_after_reserve
= vm_page_free_count_init
- vm_page_free_reserved
;
1826 vm_page_free_min
= vm_page_free_reserved
+
1827 VM_PAGE_FREE_MIN(free_after_reserve
);
1829 vm_page_free_target
= vm_page_free_reserved
+
1830 VM_PAGE_FREE_TARGET(free_after_reserve
);
1832 if (vm_page_free_target
< vm_page_free_min
+ 5)
1833 vm_page_free_target
= vm_page_free_min
+ 5;
1837 * vm_pageout is the high level pageout daemon.
1841 vm_pageout_continue(void)
1843 vm_pageout_scan_event_counter
++;
1845 /* we hold vm_page_queue_free_lock now */
1846 assert(vm_page_free_wanted
== 0);
1847 assert_wait((event_t
) &vm_page_free_wanted
, THREAD_UNINT
);
1848 mutex_unlock(&vm_page_queue_free_lock
);
1850 counter(c_vm_pageout_block
++);
1851 thread_block((thread_continue_t
)vm_pageout_continue
);
1857 * must be called with the
1858 * queues and object locks held
1861 vm_pageout_queue_steal(vm_page_t m
)
1863 struct vm_pageout_queue
*q
;
1865 if (m
->object
->internal
== TRUE
)
1866 q
= &vm_pageout_queue_internal
;
1868 q
= &vm_pageout_queue_external
;
1871 m
->pageout_queue
= FALSE
;
1872 queue_remove(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
1874 m
->pageq
.next
= NULL
;
1875 m
->pageq
.prev
= NULL
;
1877 vm_object_paging_end(m
->object
);
1883 #ifdef FAKE_DEADLOCK
1885 #define FAKE_COUNT 5000
1887 int internal_count
= 0;
1888 int fake_deadlock
= 0;
1893 vm_pageout_iothread_continue(struct vm_pageout_queue
*q
)
1897 boolean_t need_wakeup
;
1899 vm_page_lock_queues();
1901 while ( !queue_empty(&q
->pgo_pending
) ) {
1904 queue_remove_first(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
1905 m
->pageout_queue
= FALSE
;
1906 vm_page_unlock_queues();
1908 m
->pageq
.next
= NULL
;
1909 m
->pageq
.prev
= NULL
;
1910 #ifdef FAKE_DEADLOCK
1911 if (q
== &vm_pageout_queue_internal
) {
1917 if ((internal_count
== FAKE_COUNT
)) {
1919 pg_count
= vm_page_free_count
+ vm_page_free_reserved
;
1921 if (kmem_alloc(kernel_map
, &addr
, PAGE_SIZE
* pg_count
) == KERN_SUCCESS
) {
1922 kmem_free(kernel_map
, addr
, PAGE_SIZE
* pg_count
);
1931 if (!object
->pager_initialized
) {
1932 vm_object_lock(object
);
1935 * If there is no memory object for the page, create
1936 * one and hand it to the default pager.
1939 if (!object
->pager_initialized
)
1940 vm_object_collapse(object
,
1941 (vm_object_offset_t
) 0,
1943 if (!object
->pager_initialized
)
1944 vm_object_pager_create(object
);
1945 if (!object
->pager_initialized
) {
1947 * Still no pager for the object.
1948 * Reactivate the page.
1950 * Should only happen if there is no
1953 m
->list_req_pending
= FALSE
;
1954 m
->cleaning
= FALSE
;
1958 vm_pageout_throttle_up(m
);
1960 vm_page_lock_queues();
1961 vm_pageout_dirty_no_pager
++;
1962 vm_page_activate(m
);
1963 vm_page_unlock_queues();
1966 * And we are done with it.
1968 PAGE_WAKEUP_DONE(m
);
1970 vm_object_paging_end(object
);
1971 vm_object_unlock(object
);
1973 vm_page_lock_queues();
1975 } else if (object
->pager
== MEMORY_OBJECT_NULL
) {
1977 * This pager has been destroyed by either
1978 * memory_object_destroy or vm_object_destroy, and
1979 * so there is nowhere for the page to go.
1980 * Just free the page... VM_PAGE_FREE takes
1981 * care of cleaning up all the state...
1982 * including doing the vm_pageout_throttle_up
1986 vm_object_paging_end(object
);
1987 vm_object_unlock(object
);
1989 vm_page_lock_queues();
1992 vm_object_unlock(object
);
1995 * we expect the paging_in_progress reference to have
1996 * already been taken on the object before it was added
1997 * to the appropriate pageout I/O queue... this will
1998 * keep the object from being terminated and/or the
1999 * paging_offset from changing until the I/O has
2000 * completed... therefore no need to lock the object to
2001 * pull the paging_offset from it.
2003 * Send the data to the pager.
2004 * any pageout clustering happens there
2006 memory_object_data_return(object
->pager
,
2007 m
->offset
+ object
->paging_offset
,
2015 vm_object_lock(object
);
2016 vm_object_paging_end(object
);
2017 vm_object_unlock(object
);
2019 vm_page_lock_queues();
2021 assert_wait((event_t
) q
, THREAD_UNINT
);
2024 if (q
->pgo_throttled
== TRUE
&& !VM_PAGE_Q_THROTTLED(q
)) {
2025 q
->pgo_throttled
= FALSE
;
2028 need_wakeup
= FALSE
;
2030 q
->pgo_busy
= FALSE
;
2032 vm_page_unlock_queues();
2034 if (need_wakeup
== TRUE
)
2035 thread_wakeup((event_t
) &q
->pgo_laundry
);
2037 thread_block_parameter((thread_continue_t
)vm_pageout_iothread_continue
, (void *) &q
->pgo_pending
);
2043 vm_pageout_iothread_external(void)
2046 vm_pageout_iothread_continue(&vm_pageout_queue_external
);
2052 vm_pageout_iothread_internal(void)
2054 thread_t self
= current_thread();
2056 self
->options
|= TH_OPT_VMPRIV
;
2058 vm_pageout_iothread_continue(&vm_pageout_queue_internal
);
2063 vm_pageout_garbage_collect(int collect
)
2069 * consider_zone_gc should be last, because the other operations
2070 * might return memory to zones.
2072 consider_machine_collect();
2075 consider_machine_adjust();
2078 assert_wait((event_t
) &vm_pageout_garbage_collect
, THREAD_UNINT
);
2080 thread_block_parameter((thread_continue_t
) vm_pageout_garbage_collect
, (void *)1);
2089 thread_t self
= current_thread();
2091 kern_return_t result
;
2095 * Set thread privileges.
2099 self
->priority
= BASEPRI_PREEMPT
- 1;
2100 set_sched_pri(self
, self
->priority
);
2101 thread_unlock(self
);
2105 * Initialize some paging parameters.
2108 if (vm_pageout_idle_wait
== 0)
2109 vm_pageout_idle_wait
= VM_PAGEOUT_IDLE_WAIT
;
2111 if (vm_pageout_burst_wait
== 0)
2112 vm_pageout_burst_wait
= VM_PAGEOUT_BURST_WAIT
;
2114 if (vm_pageout_empty_wait
== 0)
2115 vm_pageout_empty_wait
= VM_PAGEOUT_EMPTY_WAIT
;
2117 if (vm_pageout_deadlock_wait
== 0)
2118 vm_pageout_deadlock_wait
= VM_PAGEOUT_DEADLOCK_WAIT
;
2120 if (vm_pageout_deadlock_relief
== 0)
2121 vm_pageout_deadlock_relief
= VM_PAGEOUT_DEADLOCK_RELIEF
;
2123 if (vm_pageout_inactive_relief
== 0)
2124 vm_pageout_inactive_relief
= VM_PAGEOUT_INACTIVE_RELIEF
;
2126 if (vm_pageout_burst_active_throttle
== 0)
2127 vm_pageout_burst_active_throttle
= VM_PAGEOUT_BURST_ACTIVE_THROTTLE
;
2129 if (vm_pageout_burst_inactive_throttle
== 0)
2130 vm_pageout_burst_inactive_throttle
= VM_PAGEOUT_BURST_INACTIVE_THROTTLE
;
2133 * Set kernel task to low backing store privileged
2136 task_lock(kernel_task
);
2137 kernel_task
->priv_flags
|= VM_BACKING_STORE_PRIV
;
2138 task_unlock(kernel_task
);
2140 vm_page_free_count_init
= vm_page_free_count
;
2143 * even if we've already called vm_page_free_reserve
2144 * call it again here to insure that the targets are
2145 * accurately calculated (it uses vm_page_free_count_init)
2146 * calling it with an arg of 0 will not change the reserve
2147 * but will re-calculate free_min and free_target
2149 if (vm_page_free_reserved
< VM_PAGE_FREE_RESERVED(processor_count
)) {
2150 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count
)) - vm_page_free_reserved
);
2152 vm_page_free_reserve(0);
2155 queue_init(&vm_pageout_queue_external
.pgo_pending
);
2156 vm_pageout_queue_external
.pgo_maxlaundry
= VM_PAGE_LAUNDRY_MAX
;
2157 vm_pageout_queue_external
.pgo_laundry
= 0;
2158 vm_pageout_queue_external
.pgo_idle
= FALSE
;
2159 vm_pageout_queue_external
.pgo_busy
= FALSE
;
2160 vm_pageout_queue_external
.pgo_throttled
= FALSE
;
2162 queue_init(&vm_pageout_queue_internal
.pgo_pending
);
2163 vm_pageout_queue_internal
.pgo_maxlaundry
= VM_PAGE_LAUNDRY_MAX
;
2164 vm_pageout_queue_internal
.pgo_laundry
= 0;
2165 vm_pageout_queue_internal
.pgo_idle
= FALSE
;
2166 vm_pageout_queue_internal
.pgo_busy
= FALSE
;
2167 vm_pageout_queue_internal
.pgo_throttled
= FALSE
;
2170 result
= kernel_thread_start_priority((thread_continue_t
)vm_pageout_iothread_internal
, NULL
, BASEPRI_PREEMPT
- 1, &thread
);
2171 if (result
!= KERN_SUCCESS
)
2172 panic("vm_pageout_iothread_internal: create failed");
2174 thread_deallocate(thread
);
2177 result
= kernel_thread_start_priority((thread_continue_t
)vm_pageout_iothread_external
, NULL
, BASEPRI_PREEMPT
- 1, &thread
);
2178 if (result
!= KERN_SUCCESS
)
2179 panic("vm_pageout_iothread_external: create failed");
2181 thread_deallocate(thread
);
2184 result
= kernel_thread_start_priority((thread_continue_t
)vm_pageout_garbage_collect
, NULL
, BASEPRI_PREEMPT
- 2, &thread
);
2185 if (result
!= KERN_SUCCESS
)
2186 panic("vm_pageout_garbage_collect: create failed");
2188 thread_deallocate(thread
);
2190 vm_object_reaper_init();
2192 vm_pageout_continue();
2203 int page_field_size
; /* bit field in word size buf */
2205 page_field_size
= 0;
2206 if (flags
& UPL_CREATE_LITE
) {
2207 page_field_size
= ((size
/PAGE_SIZE
) + 7) >> 3;
2208 page_field_size
= (page_field_size
+ 3) & 0xFFFFFFFC;
2210 if(flags
& UPL_CREATE_INTERNAL
) {
2211 upl
= (upl_t
)kalloc(sizeof(struct upl
)
2212 + (sizeof(struct upl_page_info
)*(size
/PAGE_SIZE
))
2215 upl
= (upl_t
)kalloc(sizeof(struct upl
) + page_field_size
);
2218 upl
->src_object
= NULL
;
2219 upl
->kaddr
= (vm_offset_t
)0;
2221 upl
->map_object
= NULL
;
2223 upl
->highest_page
= 0;
2226 upl
->ubc_alias1
= 0;
2227 upl
->ubc_alias2
= 0;
2228 #endif /* UPL_DEBUG */
2236 int page_field_size
; /* bit field in word size buf */
2242 if (upl
->map_object
->pageout
) {
2243 object
= upl
->map_object
->shadow
;
2245 object
= upl
->map_object
;
2247 vm_object_lock(object
);
2248 queue_iterate(&object
->uplq
, upl_ele
, upl_t
, uplq
) {
2249 if(upl_ele
== upl
) {
2250 queue_remove(&object
->uplq
,
2251 upl_ele
, upl_t
, uplq
);
2255 vm_object_unlock(object
);
2257 #endif /* UPL_DEBUG */
2258 /* drop a reference on the map_object whether or */
2259 /* not a pageout object is inserted */
2260 if(upl
->map_object
->pageout
)
2261 vm_object_deallocate(upl
->map_object
);
2263 page_field_size
= 0;
2264 if (upl
->flags
& UPL_LITE
) {
2265 page_field_size
= ((upl
->size
/PAGE_SIZE
) + 7) >> 3;
2266 page_field_size
= (page_field_size
+ 3) & 0xFFFFFFFC;
2268 if(upl
->flags
& UPL_INTERNAL
) {
2270 sizeof(struct upl
) +
2271 (sizeof(struct upl_page_info
) * (upl
->size
/PAGE_SIZE
))
2274 kfree(upl
, sizeof(struct upl
) + page_field_size
);
2278 void uc_upl_dealloc(upl_t upl
);
2279 __private_extern__
void
2283 upl
->ref_count
-= 1;
2284 if(upl
->ref_count
== 0) {
2294 upl
->ref_count
-= 1;
2295 if(upl
->ref_count
== 0) {
2301 * Statistics about UPL enforcement of copy-on-write obligations.
2303 unsigned long upl_cow
= 0;
2304 unsigned long upl_cow_again
= 0;
2305 unsigned long upl_cow_contiguous
= 0;
2306 unsigned long upl_cow_pages
= 0;
2307 unsigned long upl_cow_again_pages
= 0;
2308 unsigned long upl_cow_contiguous_pages
= 0;
2311 * Routine: vm_object_upl_request
2313 * Cause the population of a portion of a vm_object.
2314 * Depending on the nature of the request, the pages
2315 * returned may be contain valid data or be uninitialized.
2316 * A page list structure, listing the physical pages
2317 * will be returned upon request.
2318 * This function is called by the file system or any other
2319 * supplier of backing store to a pager.
2320 * IMPORTANT NOTE: The caller must still respect the relationship
2321 * between the vm_object and its backing memory object. The
2322 * caller MUST NOT substitute changes in the backing file
2323 * without first doing a memory_object_lock_request on the
2324 * target range unless it is know that the pages are not
2325 * shared with another entity at the pager level.
2327 * if a page list structure is present
2328 * return the mapped physical pages, where a
2329 * page is not present, return a non-initialized
2330 * one. If the no_sync bit is turned on, don't
2331 * call the pager unlock to synchronize with other
2332 * possible copies of the page. Leave pages busy
2333 * in the original object, if a page list structure
2334 * was specified. When a commit of the page list
2335 * pages is done, the dirty bit will be set for each one.
2337 * If a page list structure is present, return
2338 * all mapped pages. Where a page does not exist
2339 * map a zero filled one. Leave pages busy in
2340 * the original object. If a page list structure
2341 * is not specified, this call is a no-op.
2343 * Note: access of default pager objects has a rather interesting
2344 * twist. The caller of this routine, presumably the file system
2345 * page cache handling code, will never actually make a request
2346 * against a default pager backed object. Only the default
2347 * pager will make requests on backing store related vm_objects
2348 * In this way the default pager can maintain the relationship
2349 * between backing store files (abstract memory objects) and
2350 * the vm_objects (cache objects), they support.
2354 __private_extern__ kern_return_t
2355 vm_object_upl_request(
2357 vm_object_offset_t offset
,
2360 upl_page_info_array_t user_page_list
,
2361 unsigned int *page_list_count
,
2364 vm_page_t dst_page
= VM_PAGE_NULL
;
2365 vm_object_offset_t dst_offset
= offset
;
2366 upl_size_t xfer_size
= size
;
2367 boolean_t do_m_lock
= FALSE
;
2372 #if MACH_CLUSTER_STATS
2373 boolean_t encountered_lrp
= FALSE
;
2375 vm_page_t alias_page
= NULL
;
2378 wpl_array_t lite_list
= NULL
;
2379 vm_object_t last_copy_object
;
2382 if (cntrl_flags
& ~UPL_VALID_FLAGS
) {
2384 * For forward compatibility's sake,
2385 * reject any unknown flag.
2387 return KERN_INVALID_VALUE
;
2390 page_ticket
= (cntrl_flags
& UPL_PAGE_TICKET_MASK
)
2391 >> UPL_PAGE_TICKET_SHIFT
;
2393 if(((size
/PAGE_SIZE
) > MAX_UPL_TRANSFER
) && !object
->phys_contiguous
) {
2394 size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
2397 if(cntrl_flags
& UPL_SET_INTERNAL
)
2398 if(page_list_count
!= NULL
)
2399 *page_list_count
= MAX_UPL_TRANSFER
;
2401 if((!object
->internal
) && (object
->paging_offset
!= 0))
2402 panic("vm_object_upl_request: external object with non-zero paging offset\n");
2404 if((cntrl_flags
& UPL_COPYOUT_FROM
) && (upl_ptr
== NULL
)) {
2405 return KERN_SUCCESS
;
2408 vm_object_lock(object
);
2409 vm_object_paging_begin(object
);
2410 vm_object_unlock(object
);
2413 if(cntrl_flags
& UPL_SET_INTERNAL
) {
2414 if(cntrl_flags
& UPL_SET_LITE
) {
2415 uintptr_t page_field_size
;
2417 UPL_CREATE_INTERNAL
| UPL_CREATE_LITE
,
2419 user_page_list
= (upl_page_info_t
*)
2420 (((uintptr_t)upl
) + sizeof(struct upl
));
2421 lite_list
= (wpl_array_t
)
2422 (((uintptr_t)user_page_list
) +
2424 sizeof(upl_page_info_t
)));
2425 page_field_size
= ((size
/PAGE_SIZE
) + 7) >> 3;
2427 (page_field_size
+ 3) & 0xFFFFFFFC;
2428 bzero((char *)lite_list
, page_field_size
);
2430 UPL_LITE
| UPL_INTERNAL
;
2432 upl
= upl_create(UPL_CREATE_INTERNAL
, size
);
2433 user_page_list
= (upl_page_info_t
*)
2434 (((uintptr_t)upl
) + sizeof(struct upl
));
2435 upl
->flags
= UPL_INTERNAL
;
2438 if(cntrl_flags
& UPL_SET_LITE
) {
2439 uintptr_t page_field_size
;
2440 upl
= upl_create(UPL_CREATE_LITE
, size
);
2441 lite_list
= (wpl_array_t
)
2442 (((uintptr_t)upl
) + sizeof(struct upl
));
2443 page_field_size
= ((size
/PAGE_SIZE
) + 7) >> 3;
2445 (page_field_size
+ 3) & 0xFFFFFFFC;
2446 bzero((char *)lite_list
, page_field_size
);
2447 upl
->flags
= UPL_LITE
;
2449 upl
= upl_create(UPL_CREATE_EXTERNAL
, size
);
2454 if (object
->phys_contiguous
) {
2455 if ((cntrl_flags
& UPL_WILL_MODIFY
) &&
2456 object
->copy
!= VM_OBJECT_NULL
) {
2457 /* Honor copy-on-write obligations */
2461 * We could still have a race...
2462 * A is here building the UPL for a write().
2463 * A pushes the pages to the current copy
2465 * A returns the UPL to the caller.
2466 * B comes along and establishes another
2467 * private mapping on this object, inserting
2468 * a new copy object between the original
2469 * object and the old copy object.
2470 * B reads a page and gets the original contents
2471 * from the original object.
2472 * A modifies the page in the original object.
2473 * B reads the page again and sees A's changes,
2476 * The problem is that the pages are not
2477 * marked "busy" in the original object, so
2478 * nothing prevents B from reading it before
2479 * before A's changes are completed.
2481 * The "paging_in_progress" might protect us
2482 * from the insertion of a new copy object
2483 * though... To be verified.
2485 vm_object_lock_request(object
,
2489 MEMORY_OBJECT_COPY_SYNC
,
2491 upl_cow_contiguous
++;
2492 upl_cow_contiguous_pages
+= size
>> PAGE_SHIFT
;
2495 upl
->map_object
= object
;
2496 /* don't need any shadow mappings for this one */
2497 /* since it is already I/O memory */
2498 upl
->flags
|= UPL_DEVICE_MEMORY
;
2501 /* paging_in_progress protects paging_offset */
2502 upl
->offset
= offset
+ object
->paging_offset
;
2505 if(user_page_list
) {
2506 user_page_list
[0].phys_addr
=
2507 (offset
+ object
->shadow_offset
)>>PAGE_SHIFT
;
2508 user_page_list
[0].device
= TRUE
;
2510 upl
->highest_page
= (offset
+ object
->shadow_offset
+ size
- 1)>>PAGE_SHIFT
;
2512 if(page_list_count
!= NULL
) {
2513 if (upl
->flags
& UPL_INTERNAL
) {
2514 *page_list_count
= 0;
2516 *page_list_count
= 1;
2520 return KERN_SUCCESS
;
2524 user_page_list
[0].device
= FALSE
;
2526 if(cntrl_flags
& UPL_SET_LITE
) {
2527 upl
->map_object
= object
;
2529 upl
->map_object
= vm_object_allocate(size
);
2531 * No neeed to lock the new object: nobody else knows
2532 * about it yet, so it's all ours so far.
2534 upl
->map_object
->shadow
= object
;
2535 upl
->map_object
->pageout
= TRUE
;
2536 upl
->map_object
->can_persist
= FALSE
;
2537 upl
->map_object
->copy_strategy
=
2538 MEMORY_OBJECT_COPY_NONE
;
2539 upl
->map_object
->shadow_offset
= offset
;
2540 upl
->map_object
->wimg_bits
= object
->wimg_bits
;
2544 if (!(cntrl_flags
& UPL_SET_LITE
)) {
2545 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
2550 * Just mark the UPL as "encrypted" here.
2551 * We'll actually encrypt the pages later,
2552 * in upl_encrypt(), when the caller has
2553 * selected which pages need to go to swap.
2555 if (cntrl_flags
& UPL_ENCRYPT
) {
2556 upl
->flags
|= UPL_ENCRYPTED
;
2558 if (cntrl_flags
& UPL_FOR_PAGEOUT
) {
2559 upl
->flags
|= UPL_PAGEOUT
;
2561 vm_object_lock(object
);
2563 /* we can lock in the paging_offset once paging_in_progress is set */
2566 upl
->offset
= offset
+ object
->paging_offset
;
2569 queue_enter(&object
->uplq
, upl
, upl_t
, uplq
);
2570 #endif /* UPL_DEBUG */
2573 if ((cntrl_flags
& UPL_WILL_MODIFY
) &&
2574 object
->copy
!= VM_OBJECT_NULL
) {
2575 /* Honor copy-on-write obligations */
2578 * The caller is gathering these pages and
2579 * might modify their contents. We need to
2580 * make sure that the copy object has its own
2581 * private copies of these pages before we let
2582 * the caller modify them.
2584 vm_object_update(object
,
2589 FALSE
, /* should_return */
2590 MEMORY_OBJECT_COPY_SYNC
,
2593 upl_cow_pages
+= size
>> PAGE_SHIFT
;
2596 /* remember which copy object we synchronized with */
2597 last_copy_object
= object
->copy
;
2600 if(cntrl_flags
& UPL_COPYOUT_FROM
) {
2601 upl
->flags
|= UPL_PAGE_SYNC_DONE
;
2604 if((alias_page
== NULL
) &&
2605 !(cntrl_flags
& UPL_SET_LITE
)) {
2606 vm_object_unlock(object
);
2607 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
2608 vm_object_lock(object
);
2610 if ( ((dst_page
= vm_page_lookup(object
, dst_offset
)) == VM_PAGE_NULL
) ||
2611 dst_page
->fictitious
||
2614 (dst_page
->wire_count
&& !dst_page
->pageout
) ||
2616 ((!dst_page
->inactive
) && (cntrl_flags
& UPL_FOR_PAGEOUT
) &&
2617 (dst_page
->page_ticket
!= page_ticket
) &&
2618 ((dst_page
->page_ticket
+1) != page_ticket
)) ) {
2621 user_page_list
[entry
].phys_addr
= 0;
2624 * grab this up front...
2625 * a high percentange of the time we're going to
2626 * need the hardware modification state a bit later
2627 * anyway... so we can eliminate an extra call into
2628 * the pmap layer by grabbing it here and recording it
2630 refmod_state
= pmap_get_refmod(dst_page
->phys_page
);
2632 if (cntrl_flags
& UPL_RET_ONLY_DIRTY
) {
2634 * we're only asking for DIRTY pages to be returned
2637 if (dst_page
->list_req_pending
|| !(cntrl_flags
& UPL_FOR_PAGEOUT
)) {
2639 * if we were the page stolen by vm_pageout_scan to be
2640 * cleaned (as opposed to a buddy being clustered in
2641 * or this request is not being driven by a PAGEOUT cluster
2642 * then we only need to check for the page being diry or
2643 * precious to decide whether to return it
2645 if (dst_page
->dirty
|| dst_page
->precious
||
2646 (refmod_state
& VM_MEM_MODIFIED
)) {
2651 * this is a request for a PAGEOUT cluster and this page
2652 * is merely along for the ride as a 'buddy'... not only
2653 * does it have to be dirty to be returned, but it also
2654 * can't have been referenced recently... note that we've
2655 * already filtered above based on whether this page is
2656 * currently on the inactive queue or it meets the page
2657 * ticket (generation count) check
2659 if ( !(refmod_state
& VM_MEM_REFERENCED
) &&
2660 ((refmod_state
& VM_MEM_MODIFIED
) ||
2661 dst_page
->dirty
|| dst_page
->precious
) ) {
2665 * if we reach here, we're not to return
2666 * the page... go on to the next one
2669 user_page_list
[entry
].phys_addr
= 0;
2671 dst_offset
+= PAGE_SIZE_64
;
2672 xfer_size
-= PAGE_SIZE
;
2676 if(dst_page
->busy
&&
2677 (!(dst_page
->list_req_pending
&&
2678 dst_page
->pageout
))) {
2679 if(cntrl_flags
& UPL_NOBLOCK
) {
2680 if(user_page_list
) {
2681 user_page_list
[entry
].phys_addr
= 0;
2684 dst_offset
+= PAGE_SIZE_64
;
2685 xfer_size
-= PAGE_SIZE
;
2689 * someone else is playing with the
2690 * page. We will have to wait.
2692 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
2695 /* Someone else already cleaning the page? */
2696 if((dst_page
->cleaning
|| dst_page
->absent
||
2697 dst_page
->wire_count
!= 0) &&
2698 !dst_page
->list_req_pending
) {
2699 if(user_page_list
) {
2700 user_page_list
[entry
].phys_addr
= 0;
2703 dst_offset
+= PAGE_SIZE_64
;
2704 xfer_size
-= PAGE_SIZE
;
2707 /* eliminate all mappings from the */
2708 /* original object and its prodigy */
2710 vm_page_lock_queues();
2712 if (dst_page
->pageout_queue
== TRUE
)
2714 * we've buddied up a page for a clustered pageout
2715 * that has already been moved to the pageout
2716 * queue by pageout_scan... we need to remove
2717 * it from the queue and drop the laundry count
2720 vm_pageout_queue_steal(dst_page
);
2721 #if MACH_CLUSTER_STATS
2722 /* pageout statistics gathering. count */
2723 /* all the pages we will page out that */
2724 /* were not counted in the initial */
2725 /* vm_pageout_scan work */
2726 if(dst_page
->list_req_pending
)
2727 encountered_lrp
= TRUE
;
2728 if((dst_page
->dirty
||
2729 (dst_page
->object
->internal
&&
2730 dst_page
->precious
)) &&
2731 (dst_page
->list_req_pending
2733 if(encountered_lrp
) {
2735 (pages_at_higher_offsets
++;)
2738 (pages_at_lower_offsets
++;)
2742 /* Turn off busy indication on pending */
2743 /* pageout. Note: we can only get here */
2744 /* in the request pending case. */
2745 dst_page
->list_req_pending
= FALSE
;
2746 dst_page
->busy
= FALSE
;
2747 dst_page
->cleaning
= FALSE
;
2749 hw_dirty
= refmod_state
& VM_MEM_MODIFIED
;
2750 dirty
= hw_dirty
? TRUE
: dst_page
->dirty
;
2752 if(cntrl_flags
& UPL_SET_LITE
) {
2754 pg_num
= (dst_offset
-offset
)/PAGE_SIZE
;
2755 lite_list
[pg_num
>>5] |=
2758 pmap_clear_modify(dst_page
->phys_page
);
2760 * Record that this page has been
2764 vm_external_state_set(
2765 object
->existence_map
,
2767 #endif /*MACH_PAGEMAP*/
2770 * Mark original page as cleaning
2773 dst_page
->cleaning
= TRUE
;
2774 dst_page
->dirty
= TRUE
;
2775 dst_page
->precious
= FALSE
;
2777 /* use pageclean setup, it is more */
2778 /* convenient even for the pageout */
2781 vm_object_lock(upl
->map_object
);
2782 vm_pageclean_setup(dst_page
,
2783 alias_page
, upl
->map_object
,
2785 vm_object_unlock(upl
->map_object
);
2787 alias_page
->absent
= FALSE
;
2792 dst_page
->dirty
= FALSE
;
2793 dst_page
->precious
= TRUE
;
2796 if(dst_page
->pageout
)
2797 dst_page
->busy
= TRUE
;
2799 if ( (cntrl_flags
& UPL_ENCRYPT
) ) {
2802 * We want to deny access to the target page
2803 * because its contents are about to be
2804 * encrypted and the user would be very
2805 * confused to see encrypted data instead
2808 dst_page
->busy
= TRUE
;
2810 if ( !(cntrl_flags
& UPL_CLEAN_IN_PLACE
) ) {
2812 * deny access to the target page
2813 * while it is being worked on
2815 if ((!dst_page
->pageout
) &&
2816 (dst_page
->wire_count
== 0)) {
2817 dst_page
->busy
= TRUE
;
2818 dst_page
->pageout
= TRUE
;
2819 vm_page_wire(dst_page
);
2823 if (dst_page
->phys_page
> upl
->highest_page
)
2824 upl
->highest_page
= dst_page
->phys_page
;
2826 if(user_page_list
) {
2827 user_page_list
[entry
].phys_addr
2828 = dst_page
->phys_page
;
2829 user_page_list
[entry
].dirty
=
2831 user_page_list
[entry
].pageout
=
2833 user_page_list
[entry
].absent
=
2835 user_page_list
[entry
].precious
=
2838 vm_page_unlock_queues();
2842 * The caller is gathering this page and might
2843 * access its contents later on. Decrypt the
2844 * page before adding it to the UPL, so that
2845 * the caller never sees encrypted data.
2847 if (! (cntrl_flags
& UPL_ENCRYPT
) &&
2848 dst_page
->encrypted
) {
2849 assert(dst_page
->busy
);
2851 vm_page_decrypt(dst_page
, 0);
2852 vm_page_decrypt_for_upl_counter
++;
2855 * Retry this page, since anything
2856 * could have changed while we were
2863 dst_offset
+= PAGE_SIZE_64
;
2864 xfer_size
-= PAGE_SIZE
;
2868 if((alias_page
== NULL
) &&
2869 !(cntrl_flags
& UPL_SET_LITE
)) {
2870 vm_object_unlock(object
);
2871 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
2872 vm_object_lock(object
);
2875 if ((cntrl_flags
& UPL_WILL_MODIFY
) &&
2876 object
->copy
!= last_copy_object
) {
2877 /* Honor copy-on-write obligations */
2880 * The copy object has changed since we
2881 * last synchronized for copy-on-write.
2882 * Another copy object might have been
2883 * inserted while we released the object's
2884 * lock. Since someone could have seen the
2885 * original contents of the remaining pages
2886 * through that new object, we have to
2887 * synchronize with it again for the remaining
2888 * pages only. The previous pages are "busy"
2889 * so they can not be seen through the new
2890 * mapping. The new mapping will see our
2891 * upcoming changes for those previous pages,
2892 * but that's OK since they couldn't see what
2893 * was there before. It's just a race anyway
2894 * and there's no guarantee of consistency or
2895 * atomicity. We just don't want new mappings
2896 * to see both the *before* and *after* pages.
2898 if (object
->copy
!= VM_OBJECT_NULL
) {
2901 dst_offset
,/* current offset */
2902 xfer_size
, /* remaining size */
2905 FALSE
, /* should_return */
2906 MEMORY_OBJECT_COPY_SYNC
,
2909 upl_cow_again_pages
+=
2910 xfer_size
>> PAGE_SHIFT
;
2912 /* remember the copy object we synced with */
2913 last_copy_object
= object
->copy
;
2916 dst_page
= vm_page_lookup(object
, dst_offset
);
2918 if(dst_page
!= VM_PAGE_NULL
) {
2919 if((cntrl_flags
& UPL_RET_ONLY_ABSENT
) &&
2920 !((dst_page
->list_req_pending
)
2921 && (dst_page
->absent
))) {
2922 /* we are doing extended range */
2923 /* requests. we want to grab */
2924 /* pages around some which are */
2925 /* already present. */
2926 if(user_page_list
) {
2927 user_page_list
[entry
].phys_addr
= 0;
2930 dst_offset
+= PAGE_SIZE_64
;
2931 xfer_size
-= PAGE_SIZE
;
2934 if((dst_page
->cleaning
) &&
2935 !(dst_page
->list_req_pending
)) {
2936 /*someone else is writing to the */
2937 /* page. We will have to wait. */
2938 PAGE_SLEEP(object
,dst_page
,THREAD_UNINT
);
2941 if ((dst_page
->fictitious
&&
2942 dst_page
->list_req_pending
)) {
2943 /* dump the fictitious page */
2944 dst_page
->list_req_pending
= FALSE
;
2945 dst_page
->clustered
= FALSE
;
2947 vm_page_lock_queues();
2948 vm_page_free(dst_page
);
2949 vm_page_unlock_queues();
2952 } else if ((dst_page
->absent
&&
2953 dst_page
->list_req_pending
)) {
2954 /* the default_pager case */
2955 dst_page
->list_req_pending
= FALSE
;
2956 dst_page
->busy
= FALSE
;
2959 if(dst_page
== VM_PAGE_NULL
) {
2960 if(object
->private) {
2962 * This is a nasty wrinkle for users
2963 * of upl who encounter device or
2964 * private memory however, it is
2965 * unavoidable, only a fault can
2966 * reslove the actual backing
2967 * physical page by asking the
2970 if(user_page_list
) {
2971 user_page_list
[entry
].phys_addr
= 0;
2974 dst_offset
+= PAGE_SIZE_64
;
2975 xfer_size
-= PAGE_SIZE
;
2978 /* need to allocate a page */
2979 dst_page
= vm_page_alloc(object
, dst_offset
);
2980 if (dst_page
== VM_PAGE_NULL
) {
2981 vm_object_unlock(object
);
2983 vm_object_lock(object
);
2986 dst_page
->busy
= FALSE
;
2988 if(cntrl_flags
& UPL_NO_SYNC
) {
2989 dst_page
->page_lock
= 0;
2990 dst_page
->unlock_request
= 0;
2993 if(cntrl_flags
& UPL_RET_ONLY_ABSENT
) {
2995 * if UPL_RET_ONLY_ABSENT was specified,
2996 * than we're definitely setting up a
2997 * upl for a clustered read/pagein
2998 * operation... mark the pages as clustered
2999 * so vm_fault can correctly attribute them
3000 * to the 'pagein' bucket the first time
3001 * a fault happens on them
3003 dst_page
->clustered
= TRUE
;
3005 dst_page
->absent
= TRUE
;
3006 object
->absent_count
++;
3009 if(cntrl_flags
& UPL_NO_SYNC
) {
3010 dst_page
->page_lock
= 0;
3011 dst_page
->unlock_request
= 0;
3018 if (cntrl_flags
& UPL_ENCRYPT
) {
3020 * The page is going to be encrypted when we
3021 * get it from the pager, so mark it so.
3023 dst_page
->encrypted
= TRUE
;
3026 * Otherwise, the page will not contain
3029 dst_page
->encrypted
= FALSE
;
3032 dst_page
->overwriting
= TRUE
;
3033 if(dst_page
->fictitious
) {
3034 panic("need corner case for fictitious page");
3036 if(dst_page
->page_lock
) {
3041 /* eliminate all mappings from the */
3042 /* original object and its prodigy */
3044 if(dst_page
->busy
) {
3045 /*someone else is playing with the */
3046 /* page. We will have to wait. */
3047 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
3050 vm_page_lock_queues();
3052 if( !(cntrl_flags
& UPL_FILE_IO
))
3053 hw_dirty
= pmap_disconnect(dst_page
->phys_page
) & VM_MEM_MODIFIED
;
3055 hw_dirty
= pmap_get_refmod(dst_page
->phys_page
) & VM_MEM_MODIFIED
;
3056 dirty
= hw_dirty
? TRUE
: dst_page
->dirty
;
3058 if(cntrl_flags
& UPL_SET_LITE
) {
3060 pg_num
= (dst_offset
-offset
)/PAGE_SIZE
;
3061 lite_list
[pg_num
>>5] |=
3064 pmap_clear_modify(dst_page
->phys_page
);
3066 * Record that this page has been
3070 vm_external_state_set(
3071 object
->existence_map
,
3073 #endif /*MACH_PAGEMAP*/
3076 * Mark original page as cleaning
3079 dst_page
->cleaning
= TRUE
;
3080 dst_page
->dirty
= TRUE
;
3081 dst_page
->precious
= FALSE
;
3083 /* use pageclean setup, it is more */
3084 /* convenient even for the pageout */
3086 vm_object_lock(upl
->map_object
);
3087 vm_pageclean_setup(dst_page
,
3088 alias_page
, upl
->map_object
,
3090 vm_object_unlock(upl
->map_object
);
3092 alias_page
->absent
= FALSE
;
3096 if(cntrl_flags
& UPL_CLEAN_IN_PLACE
) {
3097 /* clean in place for read implies */
3098 /* that a write will be done on all */
3099 /* the pages that are dirty before */
3100 /* a upl commit is done. The caller */
3101 /* is obligated to preserve the */
3102 /* contents of all pages marked */
3104 upl
->flags
|= UPL_CLEAR_DIRTY
;
3108 dst_page
->dirty
= FALSE
;
3109 dst_page
->precious
= TRUE
;
3112 if (dst_page
->wire_count
== 0) {
3113 /* deny access to the target page while */
3114 /* it is being worked on */
3115 dst_page
->busy
= TRUE
;
3117 vm_page_wire(dst_page
);
3119 if(cntrl_flags
& UPL_RET_ONLY_ABSENT
) {
3121 * expect the page not to be used
3122 * since it's coming in as part
3123 * of a cluster and could be
3124 * speculative... pages that
3125 * are 'consumed' will get a
3126 * hardware reference
3128 dst_page
->reference
= FALSE
;
3131 * expect the page to be used
3133 dst_page
->reference
= TRUE
;
3135 dst_page
->precious
=
3136 (cntrl_flags
& UPL_PRECIOUS
)
3139 if (dst_page
->phys_page
> upl
->highest_page
)
3140 upl
->highest_page
= dst_page
->phys_page
;
3142 if(user_page_list
) {
3143 user_page_list
[entry
].phys_addr
3144 = dst_page
->phys_page
;
3145 user_page_list
[entry
].dirty
=
3147 user_page_list
[entry
].pageout
=
3149 user_page_list
[entry
].absent
=
3151 user_page_list
[entry
].precious
=
3154 vm_page_unlock_queues();
3157 dst_offset
+= PAGE_SIZE_64
;
3158 xfer_size
-= PAGE_SIZE
;
3162 if (upl
->flags
& UPL_INTERNAL
) {
3163 if(page_list_count
!= NULL
)
3164 *page_list_count
= 0;
3165 } else if (*page_list_count
> entry
) {
3166 if(page_list_count
!= NULL
)
3167 *page_list_count
= entry
;
3170 if(alias_page
!= NULL
) {
3171 vm_page_lock_queues();
3172 vm_page_free(alias_page
);
3173 vm_page_unlock_queues();
3177 vm_prot_t access_required
;
3178 /* call back all associated pages from other users of the pager */
3179 /* all future updates will be on data which is based on the */
3180 /* changes we are going to make here. Note: it is assumed that */
3181 /* we already hold copies of the data so we will not be seeing */
3182 /* an avalanche of incoming data from the pager */
3183 access_required
= (cntrl_flags
& UPL_COPYOUT_FROM
)
3184 ? VM_PROT_READ
: VM_PROT_WRITE
;
3188 if(!object
->pager_ready
) {
3189 wait_result_t wait_result
;
3191 wait_result
= vm_object_sleep(object
,
3192 VM_OBJECT_EVENT_PAGER_READY
,
3194 if (wait_result
!= THREAD_AWAKENED
) {
3195 vm_object_unlock(object
);
3196 return KERN_FAILURE
;
3201 vm_object_unlock(object
);
3202 rc
= memory_object_data_unlock(
3204 dst_offset
+ object
->paging_offset
,
3207 if (rc
!= KERN_SUCCESS
&& rc
!= MACH_SEND_INTERRUPTED
)
3208 return KERN_FAILURE
;
3209 vm_object_lock(object
);
3211 if (rc
== KERN_SUCCESS
)
3215 /* lets wait on the last page requested */
3216 /* NOTE: we will have to update lock completed routine to signal */
3217 if(dst_page
!= VM_PAGE_NULL
&&
3218 (access_required
& dst_page
->page_lock
) != access_required
) {
3219 PAGE_ASSERT_WAIT(dst_page
, THREAD_UNINT
);
3220 vm_object_unlock(object
);
3221 thread_block(THREAD_CONTINUE_NULL
);
3222 return KERN_SUCCESS
;
3226 vm_object_unlock(object
);
3227 return KERN_SUCCESS
;
3230 /* JMM - Backward compatability for now */
3232 vm_fault_list_request( /* forward */
3233 memory_object_control_t control
,
3234 vm_object_offset_t offset
,
3237 upl_page_info_t
**user_page_list_ptr
,
3238 int page_list_count
,
3241 vm_fault_list_request(
3242 memory_object_control_t control
,
3243 vm_object_offset_t offset
,
3246 upl_page_info_t
**user_page_list_ptr
,
3247 int page_list_count
,
3250 unsigned int local_list_count
;
3251 upl_page_info_t
*user_page_list
;
3254 if (user_page_list_ptr
!= NULL
) {
3255 local_list_count
= page_list_count
;
3256 user_page_list
= *user_page_list_ptr
;
3258 local_list_count
= 0;
3259 user_page_list
= NULL
;
3261 kr
= memory_object_upl_request(control
,
3269 if(kr
!= KERN_SUCCESS
)
3272 if ((user_page_list_ptr
!= NULL
) && (cntrl_flags
& UPL_INTERNAL
)) {
3273 *user_page_list_ptr
= UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr
);
3276 return KERN_SUCCESS
;
3282 * Routine: vm_object_super_upl_request
3284 * Cause the population of a portion of a vm_object
3285 * in much the same way as memory_object_upl_request.
3286 * Depending on the nature of the request, the pages
3287 * returned may be contain valid data or be uninitialized.
3288 * However, the region may be expanded up to the super
3289 * cluster size provided.
3292 __private_extern__ kern_return_t
3293 vm_object_super_upl_request(
3295 vm_object_offset_t offset
,
3297 upl_size_t super_cluster
,
3299 upl_page_info_t
*user_page_list
,
3300 unsigned int *page_list_count
,
3303 vm_page_t target_page
;
3307 if(object
->paging_offset
> offset
)
3308 return KERN_FAILURE
;
3310 assert(object
->paging_in_progress
);
3311 offset
= offset
- object
->paging_offset
;
3313 if(cntrl_flags
& UPL_FOR_PAGEOUT
) {
3315 vm_object_lock(object
);
3317 if((target_page
= vm_page_lookup(object
, offset
))
3319 ticket
= target_page
->page_ticket
;
3320 cntrl_flags
= cntrl_flags
& ~(int)UPL_PAGE_TICKET_MASK
;
3321 cntrl_flags
= cntrl_flags
|
3322 ((ticket
<< UPL_PAGE_TICKET_SHIFT
)
3323 & UPL_PAGE_TICKET_MASK
);
3325 vm_object_unlock(object
);
3328 if (super_cluster
> size
) {
3330 vm_object_offset_t base_offset
;
3331 upl_size_t super_size
;
3333 base_offset
= (offset
&
3334 ~((vm_object_offset_t
) super_cluster
- 1));
3335 super_size
= (offset
+size
) > (base_offset
+ super_cluster
) ?
3336 super_cluster
<<1 : super_cluster
;
3337 super_size
= ((base_offset
+ super_size
) > object
->size
) ?
3338 (object
->size
- base_offset
) : super_size
;
3339 if(offset
> (base_offset
+ super_size
))
3340 panic("vm_object_super_upl_request: Missed target pageout"
3341 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
3342 offset
, base_offset
, super_size
, super_cluster
,
3343 size
, object
->paging_offset
);
3345 * apparently there is a case where the vm requests a
3346 * page to be written out who's offset is beyond the
3349 if((offset
+ size
) > (base_offset
+ super_size
))
3350 super_size
= (offset
+ size
) - base_offset
;
3352 offset
= base_offset
;
3355 return vm_object_upl_request(object
, offset
, size
,
3356 upl
, user_page_list
, page_list_count
,
3364 vm_map_address_t offset
,
3365 upl_size_t
*upl_size
,
3367 upl_page_info_array_t page_list
,
3368 unsigned int *count
,
3371 vm_map_entry_t entry
;
3373 int force_data_sync
;
3375 vm_object_t local_object
;
3376 vm_map_offset_t local_offset
;
3377 vm_map_offset_t local_start
;
3380 caller_flags
= *flags
;
3382 if (caller_flags
& ~UPL_VALID_FLAGS
) {
3384 * For forward compatibility's sake,
3385 * reject any unknown flag.
3387 return KERN_INVALID_VALUE
;
3390 force_data_sync
= (caller_flags
& UPL_FORCE_DATA_SYNC
);
3391 sync_cow_data
= !(caller_flags
& UPL_COPYOUT_FROM
);
3394 return KERN_INVALID_ARGUMENT
;
3399 if (vm_map_lookup_entry(map
, offset
, &entry
)) {
3400 if (entry
->object
.vm_object
== VM_OBJECT_NULL
||
3401 !entry
->object
.vm_object
->phys_contiguous
) {
3402 if((*upl_size
/page_size
) > MAX_UPL_TRANSFER
) {
3403 *upl_size
= MAX_UPL_TRANSFER
* page_size
;
3406 if((entry
->vme_end
- offset
) < *upl_size
) {
3407 *upl_size
= entry
->vme_end
- offset
;
3409 if (caller_flags
& UPL_QUERY_OBJECT_TYPE
) {
3410 if (entry
->object
.vm_object
== VM_OBJECT_NULL
) {
3412 } else if (entry
->object
.vm_object
->private) {
3413 *flags
= UPL_DEV_MEMORY
;
3414 if (entry
->object
.vm_object
->phys_contiguous
) {
3415 *flags
|= UPL_PHYS_CONTIG
;
3421 return KERN_SUCCESS
;
3424 * Create an object if necessary.
3426 if (entry
->object
.vm_object
== VM_OBJECT_NULL
) {
3427 entry
->object
.vm_object
= vm_object_allocate(
3428 (vm_size_t
)(entry
->vme_end
- entry
->vme_start
));
3431 if (!(caller_flags
& UPL_COPYOUT_FROM
)) {
3432 if (!(entry
->protection
& VM_PROT_WRITE
)) {
3434 return KERN_PROTECTION_FAILURE
;
3436 if (entry
->needs_copy
) {
3439 vm_map_offset_t offset_hi
;
3440 vm_map_offset_t offset_lo
;
3441 vm_object_offset_t new_offset
;
3444 vm_behavior_t behavior
;
3445 vm_map_version_t version
;
3449 vm_map_lock_write_to_read(map
);
3450 if(vm_map_lookup_locked(&local_map
,
3451 offset
, VM_PROT_WRITE
,
3453 &new_offset
, &prot
, &wired
,
3454 &behavior
, &offset_lo
,
3455 &offset_hi
, &real_map
)) {
3456 vm_map_unlock(local_map
);
3457 return KERN_FAILURE
;
3459 if (real_map
!= map
) {
3460 vm_map_unlock(real_map
);
3462 vm_object_unlock(object
);
3463 vm_map_unlock(local_map
);
3465 goto REDISCOVER_ENTRY
;
3468 if (entry
->is_sub_map
) {
3471 submap
= entry
->object
.sub_map
;
3472 local_start
= entry
->vme_start
;
3473 local_offset
= entry
->offset
;
3474 vm_map_reference(submap
);
3477 ret
= (vm_map_create_upl(submap
,
3478 local_offset
+ (offset
- local_start
),
3479 upl_size
, upl
, page_list
, count
,
3482 vm_map_deallocate(submap
);
3486 if (sync_cow_data
) {
3487 if (entry
->object
.vm_object
->shadow
3488 || entry
->object
.vm_object
->copy
) {
3490 local_object
= entry
->object
.vm_object
;
3491 local_start
= entry
->vme_start
;
3492 local_offset
= entry
->offset
;
3493 vm_object_reference(local_object
);
3496 if (entry
->object
.vm_object
->shadow
&&
3497 entry
->object
.vm_object
->copy
) {
3498 vm_object_lock_request(
3499 local_object
->shadow
,
3500 (vm_object_offset_t
)
3501 ((offset
- local_start
) +
3503 local_object
->shadow_offset
,
3505 MEMORY_OBJECT_DATA_SYNC
,
3508 sync_cow_data
= FALSE
;
3509 vm_object_deallocate(local_object
);
3510 goto REDISCOVER_ENTRY
;
3514 if (force_data_sync
) {
3516 local_object
= entry
->object
.vm_object
;
3517 local_start
= entry
->vme_start
;
3518 local_offset
= entry
->offset
;
3519 vm_object_reference(local_object
);
3522 vm_object_lock_request(
3524 (vm_object_offset_t
)
3525 ((offset
- local_start
) + local_offset
),
3526 (vm_object_size_t
)*upl_size
, FALSE
,
3527 MEMORY_OBJECT_DATA_SYNC
,
3529 force_data_sync
= FALSE
;
3530 vm_object_deallocate(local_object
);
3531 goto REDISCOVER_ENTRY
;
3534 if(!(entry
->object
.vm_object
->private)) {
3535 if(*upl_size
> (MAX_UPL_TRANSFER
*PAGE_SIZE
))
3536 *upl_size
= (MAX_UPL_TRANSFER
*PAGE_SIZE
);
3537 if(entry
->object
.vm_object
->phys_contiguous
) {
3538 *flags
= UPL_PHYS_CONTIG
;
3543 *flags
= UPL_DEV_MEMORY
| UPL_PHYS_CONTIG
;
3545 local_object
= entry
->object
.vm_object
;
3546 local_offset
= entry
->offset
;
3547 local_start
= entry
->vme_start
;
3548 vm_object_reference(local_object
);
3550 if(caller_flags
& UPL_SET_IO_WIRE
) {
3551 ret
= (vm_object_iopl_request(local_object
,
3552 (vm_object_offset_t
)
3553 ((offset
- local_start
)
3561 ret
= (vm_object_upl_request(local_object
,
3562 (vm_object_offset_t
)
3563 ((offset
- local_start
)
3571 vm_object_deallocate(local_object
);
3576 return(KERN_FAILURE
);
3581 * Internal routine to enter a UPL into a VM map.
3583 * JMM - This should just be doable through the standard
3584 * vm_map_enter() API.
3590 vm_map_offset_t
*dst_addr
)
3593 vm_object_offset_t offset
;
3594 vm_map_offset_t addr
;
3598 if (upl
== UPL_NULL
)
3599 return KERN_INVALID_ARGUMENT
;
3603 /* check to see if already mapped */
3604 if(UPL_PAGE_LIST_MAPPED
& upl
->flags
) {
3606 return KERN_FAILURE
;
3609 if((!(upl
->map_object
->pageout
)) &&
3610 !((upl
->flags
& (UPL_DEVICE_MEMORY
| UPL_IO_WIRE
)) ||
3611 (upl
->map_object
->phys_contiguous
))) {
3613 vm_page_t alias_page
;
3614 vm_object_offset_t new_offset
;
3616 wpl_array_t lite_list
;
3618 if(upl
->flags
& UPL_INTERNAL
) {
3619 lite_list
= (wpl_array_t
)
3620 ((((uintptr_t)upl
) + sizeof(struct upl
))
3621 + ((upl
->size
/PAGE_SIZE
)
3622 * sizeof(upl_page_info_t
)));
3624 lite_list
= (wpl_array_t
)
3625 (((uintptr_t)upl
) + sizeof(struct upl
));
3627 object
= upl
->map_object
;
3628 upl
->map_object
= vm_object_allocate(upl
->size
);
3629 vm_object_lock(upl
->map_object
);
3630 upl
->map_object
->shadow
= object
;
3631 upl
->map_object
->pageout
= TRUE
;
3632 upl
->map_object
->can_persist
= FALSE
;
3633 upl
->map_object
->copy_strategy
=
3634 MEMORY_OBJECT_COPY_NONE
;
3635 upl
->map_object
->shadow_offset
=
3636 upl
->offset
- object
->paging_offset
;
3637 upl
->map_object
->wimg_bits
= object
->wimg_bits
;
3638 offset
= upl
->map_object
->shadow_offset
;
3642 vm_object_lock(object
);
3645 pg_num
= (new_offset
)/PAGE_SIZE
;
3646 if(lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
3647 vm_object_unlock(object
);
3648 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
3649 vm_object_lock(object
);
3650 m
= vm_page_lookup(object
, offset
);
3651 if (m
== VM_PAGE_NULL
) {
3652 panic("vm_upl_map: page missing\n");
3655 vm_object_paging_begin(object
);
3658 * Convert the fictitious page to a private
3659 * shadow of the real page.
3661 assert(alias_page
->fictitious
);
3662 alias_page
->fictitious
= FALSE
;
3663 alias_page
->private = TRUE
;
3664 alias_page
->pageout
= TRUE
;
3665 alias_page
->phys_page
= m
->phys_page
;
3667 vm_page_lock_queues();
3668 vm_page_wire(alias_page
);
3669 vm_page_unlock_queues();
3673 * The virtual page ("m") has to be wired in some way
3674 * here or its physical page ("m->phys_page") could
3675 * be recycled at any time.
3676 * Assuming this is enforced by the caller, we can't
3677 * get an encrypted page here. Since the encryption
3678 * key depends on the VM page's "pager" object and
3679 * the "paging_offset", we couldn't handle 2 pageable
3680 * VM pages (with different pagers and paging_offsets)
3681 * sharing the same physical page: we could end up
3682 * encrypting with one key (via one VM page) and
3683 * decrypting with another key (via the alias VM page).
3685 ASSERT_PAGE_DECRYPTED(m
);
3687 vm_page_insert(alias_page
,
3688 upl
->map_object
, new_offset
);
3689 assert(!alias_page
->wanted
);
3690 alias_page
->busy
= FALSE
;
3691 alias_page
->absent
= FALSE
;
3695 offset
+= PAGE_SIZE_64
;
3696 new_offset
+= PAGE_SIZE_64
;
3698 vm_object_unlock(object
);
3699 vm_object_unlock(upl
->map_object
);
3701 if ((upl
->flags
& (UPL_DEVICE_MEMORY
| UPL_IO_WIRE
)) || upl
->map_object
->phys_contiguous
)
3702 offset
= upl
->offset
- upl
->map_object
->paging_offset
;
3708 vm_object_lock(upl
->map_object
);
3709 upl
->map_object
->ref_count
++;
3710 vm_object_res_reference(upl
->map_object
);
3711 vm_object_unlock(upl
->map_object
);
3716 /* NEED A UPL_MAP ALIAS */
3717 kr
= vm_map_enter(map
, dst_addr
, (vm_map_size_t
)size
, (vm_map_offset_t
) 0,
3718 VM_FLAGS_ANYWHERE
, upl
->map_object
, offset
, FALSE
,
3719 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
3721 if (kr
!= KERN_SUCCESS
) {
3726 vm_object_lock(upl
->map_object
);
3728 for(addr
=*dst_addr
; size
> 0; size
-=PAGE_SIZE
,addr
+=PAGE_SIZE
) {
3729 m
= vm_page_lookup(upl
->map_object
, offset
);
3731 unsigned int cache_attr
;
3732 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
3734 PMAP_ENTER(map
->pmap
, addr
,
3738 offset
+=PAGE_SIZE_64
;
3740 vm_object_unlock(upl
->map_object
);
3742 upl
->ref_count
++; /* hold a reference for the mapping */
3743 upl
->flags
|= UPL_PAGE_LIST_MAPPED
;
3744 upl
->kaddr
= *dst_addr
;
3746 return KERN_SUCCESS
;
3750 * Internal routine to remove a UPL mapping from a VM map.
3752 * XXX - This should just be doable through a standard
3753 * vm_map_remove() operation. Otherwise, implicit clean-up
3754 * of the target map won't be able to correctly remove
3755 * these (and release the reference on the UPL). Having
3756 * to do this means we can't map these into user-space
3767 if (upl
== UPL_NULL
)
3768 return KERN_INVALID_ARGUMENT
;
3771 if(upl
->flags
& UPL_PAGE_LIST_MAPPED
) {
3774 assert(upl
->ref_count
> 1);
3775 upl
->ref_count
--; /* removing mapping ref */
3776 upl
->flags
&= ~UPL_PAGE_LIST_MAPPED
;
3777 upl
->kaddr
= (vm_offset_t
) 0;
3781 vm_map_trunc_page(addr
),
3782 vm_map_round_page(addr
+ size
),
3784 return KERN_SUCCESS
;
3787 return KERN_FAILURE
;
3793 upl_offset_t offset
,
3796 upl_page_info_t
*page_list
,
3797 mach_msg_type_number_t count
,
3800 upl_size_t xfer_size
= size
;
3801 vm_object_t shadow_object
;
3802 vm_object_t object
= upl
->map_object
;
3803 vm_object_offset_t target_offset
;
3805 wpl_array_t lite_list
;
3807 int delayed_unlock
= 0;
3808 int clear_refmod
= 0;
3809 boolean_t shadow_internal
;
3813 if (upl
== UPL_NULL
)
3814 return KERN_INVALID_ARGUMENT
;
3820 if (object
->pageout
) {
3821 shadow_object
= object
->shadow
;
3823 shadow_object
= object
;
3828 if (upl
->flags
& UPL_ACCESS_BLOCKED
) {
3830 * We used this UPL to block access to the pages by marking
3831 * them "busy". Now we need to clear the "busy" bit to allow
3832 * access to these pages again.
3834 flags
|= UPL_COMMIT_ALLOW_ACCESS
;
3837 if (upl
->flags
& UPL_CLEAR_DIRTY
)
3838 flags
|= UPL_COMMIT_CLEAR_DIRTY
;
3840 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
3842 } else if ((offset
+ size
) > upl
->size
) {
3844 return KERN_FAILURE
;
3847 if (upl
->flags
& UPL_INTERNAL
) {
3848 lite_list
= (wpl_array_t
)
3849 ((((uintptr_t)upl
) + sizeof(struct upl
))
3850 + ((upl
->size
/PAGE_SIZE
) * sizeof(upl_page_info_t
)));
3852 lite_list
= (wpl_array_t
)
3853 (((uintptr_t)upl
) + sizeof(struct upl
));
3855 if (object
!= shadow_object
)
3856 vm_object_lock(object
);
3857 vm_object_lock(shadow_object
);
3859 shadow_internal
= shadow_object
->internal
;
3861 entry
= offset
/PAGE_SIZE
;
3862 target_offset
= (vm_object_offset_t
)offset
;
3870 if (upl
->flags
& UPL_LITE
) {
3873 pg_num
= target_offset
/PAGE_SIZE
;
3875 if (lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
3876 lite_list
[pg_num
>>5] &= ~(1 << (pg_num
& 31));
3877 m
= vm_page_lookup(shadow_object
,
3878 target_offset
+ (upl
->offset
-
3879 shadow_object
->paging_offset
));
3882 if (object
->pageout
) {
3883 if ((t
= vm_page_lookup(object
, target_offset
)) != NULL
) {
3886 if (delayed_unlock
) {
3888 vm_page_unlock_queues();
3896 object
->shadow_offset
);
3898 if (m
!= VM_PAGE_NULL
)
3899 vm_object_paging_end(m
->object
);
3902 if (m
!= VM_PAGE_NULL
) {
3906 if (upl
->flags
& UPL_IO_WIRE
) {
3908 if (delayed_unlock
== 0)
3909 vm_page_lock_queues();
3913 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
3915 vm_page_unlock_queues();
3918 page_list
[entry
].phys_addr
= 0;
3920 if (flags
& UPL_COMMIT_SET_DIRTY
) {
3922 } else if (flags
& UPL_COMMIT_CLEAR_DIRTY
) {
3924 clear_refmod
|= VM_MEM_MODIFIED
;
3926 if (flags
& UPL_COMMIT_INACTIVATE
) {
3927 m
->reference
= FALSE
;
3928 clear_refmod
|= VM_MEM_REFERENCED
;
3929 vm_page_deactivate(m
);
3932 pmap_clear_refmod(m
->phys_page
, clear_refmod
);
3934 if (flags
& UPL_COMMIT_ALLOW_ACCESS
) {
3936 * We blocked access to the pages in this UPL.
3937 * Clear the "busy" bit and wake up any waiter
3940 PAGE_WAKEUP_DONE(m
);
3943 target_offset
+= PAGE_SIZE_64
;
3944 xfer_size
-= PAGE_SIZE
;
3948 if (delayed_unlock
== 0)
3949 vm_page_lock_queues();
3951 * make sure to clear the hardware
3952 * modify or reference bits before
3953 * releasing the BUSY bit on this page
3954 * otherwise we risk losing a legitimate
3957 if (flags
& UPL_COMMIT_CLEAR_DIRTY
) {
3959 clear_refmod
|= VM_MEM_MODIFIED
;
3961 if (flags
& UPL_COMMIT_INACTIVATE
)
3962 clear_refmod
|= VM_MEM_REFERENCED
;
3965 pmap_clear_refmod(m
->phys_page
, clear_refmod
);
3968 p
= &(page_list
[entry
]);
3969 if(p
->phys_addr
&& p
->pageout
&& !m
->pageout
) {
3973 } else if (page_list
[entry
].phys_addr
&&
3974 !p
->pageout
&& m
->pageout
&&
3975 !m
->dump_cleaning
) {
3978 m
->overwriting
= FALSE
;
3980 PAGE_WAKEUP_DONE(m
);
3982 page_list
[entry
].phys_addr
= 0;
3984 m
->dump_cleaning
= FALSE
;
3986 vm_pageout_throttle_up(m
);
3989 m
->cleaning
= FALSE
;
3991 #if MACH_CLUSTER_STATS
3992 if (m
->wanted
) vm_pageout_target_collisions
++;
3994 if (pmap_disconnect(m
->phys_page
) & VM_MEM_MODIFIED
)
4000 vm_page_unwire(m
);/* reactivates */
4002 if (upl
->flags
& UPL_PAGEOUT
) {
4003 CLUSTER_STAT(vm_pageout_target_page_dirtied
++;)
4004 VM_STAT(reactivations
++);
4006 PAGE_WAKEUP_DONE(m
);
4008 vm_page_free(m
);/* clears busy, etc. */
4010 if (upl
->flags
& UPL_PAGEOUT
) {
4011 CLUSTER_STAT(vm_pageout_target_page_freed
++;)
4013 if (page_list
[entry
].dirty
)
4014 VM_STAT(pageouts
++);
4017 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
4019 vm_page_unlock_queues();
4021 target_offset
+= PAGE_SIZE_64
;
4022 xfer_size
-= PAGE_SIZE
;
4026 #if MACH_CLUSTER_STATS
4027 m
->dirty
= pmap_is_modified(m
->phys_page
);
4029 if (m
->dirty
) vm_pageout_cluster_dirtied
++;
4030 else vm_pageout_cluster_cleaned
++;
4031 if (m
->wanted
) vm_pageout_cluster_collisions
++;
4036 if((m
->busy
) && (m
->cleaning
)) {
4037 /* the request_page_list case */
4040 if(shadow_object
->absent_count
== 1)
4041 vm_object_absent_release(shadow_object
);
4043 shadow_object
->absent_count
--;
4045 m
->overwriting
= FALSE
;
4048 } else if (m
->overwriting
) {
4049 /* alternate request page list, write to
4050 * page_list case. Occurs when the original
4051 * page was wired at the time of the list
4053 assert(m
->wire_count
!= 0);
4054 vm_page_unwire(m
);/* reactivates */
4055 m
->overwriting
= FALSE
;
4057 m
->cleaning
= FALSE
;
4059 /* It is a part of the semantic of COPYOUT_FROM */
4060 /* UPLs that a commit implies cache sync */
4061 /* between the vm page and the backing store */
4062 /* this can be used to strip the precious bit */
4063 /* as well as clean */
4064 if (upl
->flags
& UPL_PAGE_SYNC_DONE
)
4065 m
->precious
= FALSE
;
4067 if (flags
& UPL_COMMIT_SET_DIRTY
)
4070 if (flags
& UPL_COMMIT_INACTIVATE
) {
4071 m
->reference
= FALSE
;
4072 vm_page_deactivate(m
);
4073 } else if (!m
->active
&& !m
->inactive
) {
4075 vm_page_activate(m
);
4077 vm_page_deactivate(m
);
4080 if (flags
& UPL_COMMIT_ALLOW_ACCESS
) {
4082 * We blocked access to the pages in this URL.
4083 * Clear the "busy" bit on this page before we
4084 * wake up any waiter.
4090 * Wakeup any thread waiting for the page to be un-cleaning.
4094 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
4096 vm_page_unlock_queues();
4099 target_offset
+= PAGE_SIZE_64
;
4100 xfer_size
-= PAGE_SIZE
;
4104 vm_page_unlock_queues();
4108 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
4110 } else if (upl
->flags
& UPL_LITE
) {
4113 pg_num
= upl
->size
/PAGE_SIZE
;
4114 pg_num
= (pg_num
+ 31) >> 5;
4116 for(i
= 0; i
<pg_num
; i
++) {
4117 if(lite_list
[i
] != 0) {
4123 if(queue_empty(&upl
->map_object
->memq
)) {
4129 if(upl
->flags
& UPL_COMMIT_NOTIFY_EMPTY
) {
4132 if(object
== shadow_object
)
4133 vm_object_paging_end(shadow_object
);
4135 vm_object_unlock(shadow_object
);
4136 if (object
!= shadow_object
)
4137 vm_object_unlock(object
);
4140 return KERN_SUCCESS
;
4146 upl_offset_t offset
,
4151 upl_size_t xfer_size
= size
;
4152 vm_object_t shadow_object
;
4153 vm_object_t object
= upl
->map_object
;
4154 vm_object_offset_t target_offset
;
4156 wpl_array_t lite_list
;
4158 boolean_t shadow_internal
;
4162 if (upl
== UPL_NULL
)
4163 return KERN_INVALID_ARGUMENT
;
4165 if (upl
->flags
& UPL_IO_WIRE
) {
4166 return upl_commit_range(upl
,
4171 if(object
->pageout
) {
4172 shadow_object
= object
->shadow
;
4174 shadow_object
= object
;
4178 if(upl
->flags
& UPL_DEVICE_MEMORY
) {
4180 } else if ((offset
+ size
) > upl
->size
) {
4182 return KERN_FAILURE
;
4184 if (object
!= shadow_object
)
4185 vm_object_lock(object
);
4186 vm_object_lock(shadow_object
);
4188 shadow_internal
= shadow_object
->internal
;
4190 if(upl
->flags
& UPL_INTERNAL
) {
4191 lite_list
= (wpl_array_t
)
4192 ((((uintptr_t)upl
) + sizeof(struct upl
))
4193 + ((upl
->size
/PAGE_SIZE
) * sizeof(upl_page_info_t
)));
4195 lite_list
= (wpl_array_t
)
4196 (((uintptr_t)upl
) + sizeof(struct upl
));
4199 entry
= offset
/PAGE_SIZE
;
4200 target_offset
= (vm_object_offset_t
)offset
;
4205 if(upl
->flags
& UPL_LITE
) {
4207 pg_num
= target_offset
/PAGE_SIZE
;
4208 if(lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
4209 lite_list
[pg_num
>>5] &= ~(1 << (pg_num
& 31));
4210 m
= vm_page_lookup(shadow_object
,
4211 target_offset
+ (upl
->offset
-
4212 shadow_object
->paging_offset
));
4215 if(object
->pageout
) {
4216 if ((t
= vm_page_lookup(object
, target_offset
))
4224 object
->shadow_offset
);
4226 if(m
!= VM_PAGE_NULL
)
4227 vm_object_paging_end(m
->object
);
4230 if(m
!= VM_PAGE_NULL
) {
4231 vm_page_lock_queues();
4233 boolean_t must_free
= TRUE
;
4235 /* COPYOUT = FALSE case */
4236 /* check for error conditions which must */
4237 /* be passed back to the pages customer */
4238 if(error
& UPL_ABORT_RESTART
) {
4241 vm_object_absent_release(m
->object
);
4242 m
->page_error
= KERN_MEMORY_ERROR
;
4245 } else if(error
& UPL_ABORT_UNAVAILABLE
) {
4249 } else if(error
& UPL_ABORT_ERROR
) {
4252 vm_object_absent_release(m
->object
);
4253 m
->page_error
= KERN_MEMORY_ERROR
;
4260 * If the page was already encrypted,
4261 * we don't really need to decrypt it
4262 * now. It will get decrypted later,
4263 * on demand, as soon as someone needs
4264 * to access its contents.
4267 m
->cleaning
= FALSE
;
4268 m
->overwriting
= FALSE
;
4269 PAGE_WAKEUP_DONE(m
);
4271 if (must_free
== TRUE
) {
4274 vm_page_activate(m
);
4276 vm_page_unlock_queues();
4278 target_offset
+= PAGE_SIZE_64
;
4279 xfer_size
-= PAGE_SIZE
;
4284 * Handle the trusted pager throttle.
4287 vm_pageout_throttle_up(m
);
4291 assert(m
->wire_count
== 1);
4295 m
->dump_cleaning
= FALSE
;
4296 m
->cleaning
= FALSE
;
4297 m
->overwriting
= FALSE
;
4299 vm_external_state_clr(
4300 m
->object
->existence_map
, m
->offset
);
4301 #endif /* MACH_PAGEMAP */
4302 if(error
& UPL_ABORT_DUMP_PAGES
) {
4304 pmap_disconnect(m
->phys_page
);
4306 PAGE_WAKEUP_DONE(m
);
4308 vm_page_unlock_queues();
4310 target_offset
+= PAGE_SIZE_64
;
4311 xfer_size
-= PAGE_SIZE
;
4315 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
4317 } else if (upl
->flags
& UPL_LITE
) {
4320 pg_num
= upl
->size
/PAGE_SIZE
;
4321 pg_num
= (pg_num
+ 31) >> 5;
4323 for(i
= 0; i
<pg_num
; i
++) {
4324 if(lite_list
[i
] != 0) {
4330 if(queue_empty(&upl
->map_object
->memq
)) {
4336 if(upl
->flags
& UPL_COMMIT_NOTIFY_EMPTY
) {
4339 if(object
== shadow_object
)
4340 vm_object_paging_end(shadow_object
);
4342 vm_object_unlock(shadow_object
);
4343 if (object
!= shadow_object
)
4344 vm_object_unlock(object
);
4348 return KERN_SUCCESS
;
4356 vm_object_t object
= NULL
;
4357 vm_object_t shadow_object
= NULL
;
4358 vm_object_offset_t offset
;
4359 vm_object_offset_t shadow_offset
;
4360 vm_object_offset_t target_offset
;
4362 wpl_array_t lite_list
;
4365 boolean_t shadow_internal
;
4367 if (upl
== UPL_NULL
)
4368 return KERN_INVALID_ARGUMENT
;
4370 if (upl
->flags
& UPL_IO_WIRE
) {
4372 return upl_commit_range(upl
,
4378 if(upl
->flags
& UPL_DEVICE_MEMORY
) {
4380 return KERN_SUCCESS
;
4383 object
= upl
->map_object
;
4385 if (object
== NULL
) {
4386 panic("upl_abort: upl object is not backed by an object");
4388 return KERN_INVALID_ARGUMENT
;
4391 if(object
->pageout
) {
4392 shadow_object
= object
->shadow
;
4393 shadow_offset
= object
->shadow_offset
;
4395 shadow_object
= object
;
4396 shadow_offset
= upl
->offset
- object
->paging_offset
;
4399 if(upl
->flags
& UPL_INTERNAL
) {
4400 lite_list
= (wpl_array_t
)
4401 ((((uintptr_t)upl
) + sizeof(struct upl
))
4402 + ((upl
->size
/PAGE_SIZE
) * sizeof(upl_page_info_t
)));
4404 lite_list
= (wpl_array_t
)
4405 (((uintptr_t)upl
) + sizeof(struct upl
));
4409 if (object
!= shadow_object
)
4410 vm_object_lock(object
);
4411 vm_object_lock(shadow_object
);
4413 shadow_internal
= shadow_object
->internal
;
4415 for(i
= 0; i
<(upl
->size
); i
+=PAGE_SIZE
, offset
+= PAGE_SIZE_64
) {
4417 target_offset
= offset
+ shadow_offset
;
4418 if(upl
->flags
& UPL_LITE
) {
4420 pg_num
= offset
/PAGE_SIZE
;
4421 if(lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
4422 lite_list
[pg_num
>>5] &= ~(1 << (pg_num
& 31));
4424 shadow_object
, target_offset
);
4427 if(object
->pageout
) {
4428 if ((t
= vm_page_lookup(object
, offset
)) != NULL
) {
4433 shadow_object
, target_offset
);
4435 if(m
!= VM_PAGE_NULL
)
4436 vm_object_paging_end(m
->object
);
4439 if(m
!= VM_PAGE_NULL
) {
4440 vm_page_lock_queues();
4442 boolean_t must_free
= TRUE
;
4444 /* COPYOUT = FALSE case */
4445 /* check for error conditions which must */
4446 /* be passed back to the pages customer */
4447 if(error
& UPL_ABORT_RESTART
) {
4450 vm_object_absent_release(m
->object
);
4451 m
->page_error
= KERN_MEMORY_ERROR
;
4454 } else if(error
& UPL_ABORT_UNAVAILABLE
) {
4458 } else if(error
& UPL_ABORT_ERROR
) {
4461 vm_object_absent_release(m
->object
);
4462 m
->page_error
= KERN_MEMORY_ERROR
;
4469 * If the page was already encrypted,
4470 * we don't really need to decrypt it
4471 * now. It will get decrypted later,
4472 * on demand, as soon as someone needs
4473 * to access its contents.
4476 m
->cleaning
= FALSE
;
4477 m
->overwriting
= FALSE
;
4478 PAGE_WAKEUP_DONE(m
);
4480 if (must_free
== TRUE
) {
4483 vm_page_activate(m
);
4485 vm_page_unlock_queues();
4489 * Handle the trusted pager throttle.
4492 vm_pageout_throttle_up(m
);
4496 assert(m
->wire_count
== 1);
4500 m
->dump_cleaning
= FALSE
;
4501 m
->cleaning
= FALSE
;
4502 m
->overwriting
= FALSE
;
4504 vm_external_state_clr(
4505 m
->object
->existence_map
, m
->offset
);
4506 #endif /* MACH_PAGEMAP */
4507 if(error
& UPL_ABORT_DUMP_PAGES
) {
4509 pmap_disconnect(m
->phys_page
);
4511 PAGE_WAKEUP_DONE(m
);
4513 vm_page_unlock_queues();
4517 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
4519 } else if (upl
->flags
& UPL_LITE
) {
4522 pg_num
= upl
->size
/PAGE_SIZE
;
4523 pg_num
= (pg_num
+ 31) >> 5;
4525 for(j
= 0; j
<pg_num
; j
++) {
4526 if(lite_list
[j
] != 0) {
4532 if(queue_empty(&upl
->map_object
->memq
)) {
4538 if(object
== shadow_object
)
4539 vm_object_paging_end(shadow_object
);
4541 vm_object_unlock(shadow_object
);
4542 if (object
!= shadow_object
)
4543 vm_object_unlock(object
);
4546 return KERN_SUCCESS
;
4549 /* an option on commit should be wire */
4553 upl_page_info_t
*page_list
,
4554 mach_msg_type_number_t count
)
4556 if (upl
== UPL_NULL
)
4557 return KERN_INVALID_ARGUMENT
;
4559 if(upl
->flags
& (UPL_LITE
| UPL_IO_WIRE
)) {
4561 return upl_commit_range(upl
, 0, upl
->size
, 0,
4562 page_list
, count
, &empty
);
4569 if (upl
->flags
& UPL_DEVICE_MEMORY
)
4572 if (upl
->flags
& UPL_ENCRYPTED
) {
4575 * This UPL was encrypted, but we don't need
4576 * to decrypt here. We'll decrypt each page
4577 * later, on demand, as soon as someone needs
4578 * to access the page's contents.
4582 if ((upl
->flags
& UPL_CLEAR_DIRTY
) ||
4583 (upl
->flags
& UPL_PAGE_SYNC_DONE
) || page_list
) {
4584 vm_object_t shadow_object
= upl
->map_object
->shadow
;
4585 vm_object_t object
= upl
->map_object
;
4586 vm_object_offset_t target_offset
;
4587 upl_size_t xfer_end
;
4593 if (object
!= shadow_object
)
4594 vm_object_lock(object
);
4595 vm_object_lock(shadow_object
);
4598 target_offset
= object
->shadow_offset
;
4599 xfer_end
= upl
->size
+ object
->shadow_offset
;
4601 while(target_offset
< xfer_end
) {
4603 if ((t
= vm_page_lookup(object
,
4604 target_offset
- object
->shadow_offset
))
4606 target_offset
+= PAGE_SIZE_64
;
4611 m
= vm_page_lookup(shadow_object
, target_offset
);
4612 if(m
!= VM_PAGE_NULL
) {
4615 * If this page was encrypted, we
4616 * don't need to decrypt it here.
4617 * We'll decrypt it later, on demand,
4618 * as soon as someone needs to access
4622 if (upl
->flags
& UPL_CLEAR_DIRTY
) {
4623 pmap_clear_modify(m
->phys_page
);
4626 /* It is a part of the semantic of */
4627 /* COPYOUT_FROM UPLs that a commit */
4628 /* implies cache sync between the */
4629 /* vm page and the backing store */
4630 /* this can be used to strip the */
4631 /* precious bit as well as clean */
4632 if (upl
->flags
& UPL_PAGE_SYNC_DONE
)
4633 m
->precious
= FALSE
;
4636 p
= &(page_list
[entry
]);
4637 if(page_list
[entry
].phys_addr
&&
4638 p
->pageout
&& !m
->pageout
) {
4639 vm_page_lock_queues();
4643 vm_page_unlock_queues();
4644 } else if (page_list
[entry
].phys_addr
&&
4645 !p
->pageout
&& m
->pageout
&&
4646 !m
->dump_cleaning
) {
4647 vm_page_lock_queues();
4650 m
->overwriting
= FALSE
;
4652 PAGE_WAKEUP_DONE(m
);
4653 vm_page_unlock_queues();
4655 page_list
[entry
].phys_addr
= 0;
4658 target_offset
+= PAGE_SIZE_64
;
4661 vm_object_unlock(shadow_object
);
4662 if (object
!= shadow_object
)
4663 vm_object_unlock(object
);
4666 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
4667 vm_object_lock(upl
->map_object
->shadow
);
4668 if(upl
->map_object
== upl
->map_object
->shadow
)
4669 vm_object_paging_end(upl
->map_object
->shadow
);
4670 vm_object_unlock(upl
->map_object
->shadow
);
4673 return KERN_SUCCESS
;
4679 vm_object_iopl_request(
4681 vm_object_offset_t offset
,
4684 upl_page_info_array_t user_page_list
,
4685 unsigned int *page_list_count
,
4689 vm_object_offset_t dst_offset
= offset
;
4690 upl_size_t xfer_size
= size
;
4693 wpl_array_t lite_list
= NULL
;
4694 int page_field_size
;
4695 int delayed_unlock
= 0;
4696 int no_zero_fill
= FALSE
;
4697 vm_page_t alias_page
= NULL
;
4702 if (cntrl_flags
& ~UPL_VALID_FLAGS
) {
4704 * For forward compatibility's sake,
4705 * reject any unknown flag.
4707 return KERN_INVALID_VALUE
;
4709 if (vm_lopage_poolsize
== 0)
4710 cntrl_flags
&= ~UPL_NEED_32BIT_ADDR
;
4712 if (cntrl_flags
& UPL_NEED_32BIT_ADDR
) {
4713 if ( (cntrl_flags
& (UPL_SET_IO_WIRE
| UPL_SET_LITE
)) != (UPL_SET_IO_WIRE
| UPL_SET_LITE
))
4714 return KERN_INVALID_VALUE
;
4716 if (object
->phys_contiguous
) {
4717 if ((offset
+ object
->shadow_offset
) >= (vm_object_offset_t
)max_valid_dma_address
)
4718 return KERN_INVALID_ADDRESS
;
4720 if (((offset
+ object
->shadow_offset
) + size
) >= (vm_object_offset_t
)max_valid_dma_address
)
4721 return KERN_INVALID_ADDRESS
;
4725 if (cntrl_flags
& UPL_ENCRYPT
) {
4728 * The paging path doesn't use this interface,
4729 * so we don't support the UPL_ENCRYPT flag
4730 * here. We won't encrypt the pages.
4732 assert(! (cntrl_flags
& UPL_ENCRYPT
));
4735 if (cntrl_flags
& UPL_NOZEROFILL
)
4736 no_zero_fill
= TRUE
;
4738 if (cntrl_flags
& UPL_COPYOUT_FROM
)
4739 prot
= VM_PROT_READ
;
4741 prot
= VM_PROT_READ
| VM_PROT_WRITE
;
4743 if(((size
/page_size
) > MAX_UPL_TRANSFER
) && !object
->phys_contiguous
) {
4744 size
= MAX_UPL_TRANSFER
* page_size
;
4747 if(cntrl_flags
& UPL_SET_INTERNAL
)
4748 if(page_list_count
!= NULL
)
4749 *page_list_count
= MAX_UPL_TRANSFER
;
4750 if(((cntrl_flags
& UPL_SET_INTERNAL
) && !(object
->phys_contiguous
)) &&
4751 ((page_list_count
!= NULL
) && (*page_list_count
!= 0)
4752 && *page_list_count
< (size
/page_size
)))
4753 return KERN_INVALID_ARGUMENT
;
4755 if((!object
->internal
) && (object
->paging_offset
!= 0))
4756 panic("vm_object_upl_request: external object with non-zero paging offset\n");
4758 if(object
->phys_contiguous
) {
4759 /* No paging operations are possible against this memory */
4760 /* and so no need for map object, ever */
4761 cntrl_flags
|= UPL_SET_LITE
;
4765 if(cntrl_flags
& UPL_SET_INTERNAL
) {
4766 if(cntrl_flags
& UPL_SET_LITE
) {
4768 UPL_CREATE_INTERNAL
| UPL_CREATE_LITE
,
4770 user_page_list
= (upl_page_info_t
*)
4771 (((uintptr_t)upl
) + sizeof(struct upl
));
4772 lite_list
= (wpl_array_t
)
4773 (((uintptr_t)user_page_list
) +
4775 sizeof(upl_page_info_t
)));
4776 page_field_size
= ((size
/PAGE_SIZE
) + 7) >> 3;
4778 (page_field_size
+ 3) & 0xFFFFFFFC;
4779 bzero((char *)lite_list
, page_field_size
);
4781 UPL_LITE
| UPL_INTERNAL
| UPL_IO_WIRE
;
4783 upl
= upl_create(UPL_CREATE_INTERNAL
, size
);
4784 user_page_list
= (upl_page_info_t
*)
4786 + sizeof(struct upl
));
4787 upl
->flags
= UPL_INTERNAL
| UPL_IO_WIRE
;
4790 if(cntrl_flags
& UPL_SET_LITE
) {
4791 upl
= upl_create(UPL_CREATE_LITE
, size
);
4792 lite_list
= (wpl_array_t
)
4793 (((uintptr_t)upl
) + sizeof(struct upl
));
4794 page_field_size
= ((size
/PAGE_SIZE
) + 7) >> 3;
4796 (page_field_size
+ 3) & 0xFFFFFFFC;
4797 bzero((char *)lite_list
, page_field_size
);
4798 upl
->flags
= UPL_LITE
| UPL_IO_WIRE
;
4800 upl
= upl_create(UPL_CREATE_EXTERNAL
, size
);
4801 upl
->flags
= UPL_IO_WIRE
;
4805 if(object
->phys_contiguous
) {
4806 upl
->map_object
= object
;
4807 /* don't need any shadow mappings for this one */
4808 /* since it is already I/O memory */
4809 upl
->flags
|= UPL_DEVICE_MEMORY
;
4811 vm_object_lock(object
);
4812 vm_object_paging_begin(object
);
4813 vm_object_unlock(object
);
4815 /* paging in progress also protects the paging_offset */
4816 upl
->offset
= offset
+ object
->paging_offset
;
4819 if(user_page_list
) {
4820 user_page_list
[0].phys_addr
=
4821 (offset
+ object
->shadow_offset
)>>PAGE_SHIFT
;
4822 user_page_list
[0].device
= TRUE
;
4824 upl
->highest_page
= (offset
+ object
->shadow_offset
+ size
- 1)>>PAGE_SHIFT
;
4826 if(page_list_count
!= NULL
) {
4827 if (upl
->flags
& UPL_INTERNAL
) {
4828 *page_list_count
= 0;
4830 *page_list_count
= 1;
4833 return KERN_SUCCESS
;
4836 user_page_list
[0].device
= FALSE
;
4838 if(cntrl_flags
& UPL_SET_LITE
) {
4839 upl
->map_object
= object
;
4841 upl
->map_object
= vm_object_allocate(size
);
4842 vm_object_lock(upl
->map_object
);
4843 upl
->map_object
->shadow
= object
;
4844 upl
->map_object
->pageout
= TRUE
;
4845 upl
->map_object
->can_persist
= FALSE
;
4846 upl
->map_object
->copy_strategy
=
4847 MEMORY_OBJECT_COPY_NONE
;
4848 upl
->map_object
->shadow_offset
= offset
;
4849 upl
->map_object
->wimg_bits
= object
->wimg_bits
;
4850 vm_object_unlock(upl
->map_object
);
4853 vm_object_lock(object
);
4854 vm_object_paging_begin(object
);
4856 if (!object
->phys_contiguous
) {
4857 /* Protect user space from future COW operations */
4858 object
->true_share
= TRUE
;
4859 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
)
4860 object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
4863 /* we can lock the upl offset now that paging_in_progress is set */
4866 upl
->offset
= offset
+ object
->paging_offset
;
4869 queue_enter(&object
->uplq
, upl
, upl_t
, uplq
);
4870 #endif /* UPL_DEBUG */
4873 if (cntrl_flags
& UPL_BLOCK_ACCESS
) {
4875 * The user requested that access to the pages in this URL
4876 * be blocked until the UPL is commited or aborted.
4878 upl
->flags
|= UPL_ACCESS_BLOCKED
;
4883 if((alias_page
== NULL
) && !(cntrl_flags
& UPL_SET_LITE
)) {
4884 if (delayed_unlock
) {
4886 vm_page_unlock_queues();
4888 vm_object_unlock(object
);
4889 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
4890 vm_object_lock(object
);
4892 dst_page
= vm_page_lookup(object
, dst_offset
);
4896 * If the page is encrypted, we need to decrypt it,
4897 * so force a soft page fault.
4899 if ((dst_page
== VM_PAGE_NULL
) || (dst_page
->busy
) ||
4900 (dst_page
->encrypted
) ||
4901 (dst_page
->unusual
&& (dst_page
->error
||
4902 dst_page
->restart
||
4904 dst_page
->fictitious
||
4905 (prot
& dst_page
->page_lock
)))) {
4906 vm_fault_return_t result
;
4909 kern_return_t error_code
;
4912 vm_object_offset_t lo_offset
= offset
;
4913 vm_object_offset_t hi_offset
= offset
+ size
;
4916 if (delayed_unlock
) {
4918 vm_page_unlock_queues();
4921 if(cntrl_flags
& UPL_SET_INTERRUPTIBLE
) {
4922 interruptible
= THREAD_ABORTSAFE
;
4924 interruptible
= THREAD_UNINT
;
4927 result
= vm_fault_page(object
, dst_offset
,
4928 prot
| VM_PROT_WRITE
, FALSE
,
4930 lo_offset
, hi_offset
,
4931 VM_BEHAVIOR_SEQUENTIAL
,
4932 &prot
, &dst_page
, &top_page
,
4934 &error_code
, no_zero_fill
, FALSE
, NULL
, 0);
4937 case VM_FAULT_SUCCESS
:
4939 PAGE_WAKEUP_DONE(dst_page
);
4942 * Release paging references and
4943 * top-level placeholder page, if any.
4946 if(top_page
!= VM_PAGE_NULL
) {
4947 vm_object_t local_object
;
4951 != dst_page
->object
) {
4954 VM_PAGE_FREE(top_page
);
4955 vm_object_paging_end(
4960 VM_PAGE_FREE(top_page
);
4961 vm_object_paging_end(
4969 case VM_FAULT_RETRY
:
4970 vm_object_lock(object
);
4971 vm_object_paging_begin(object
);
4974 case VM_FAULT_FICTITIOUS_SHORTAGE
:
4975 vm_page_more_fictitious();
4976 vm_object_lock(object
);
4977 vm_object_paging_begin(object
);
4980 case VM_FAULT_MEMORY_SHORTAGE
:
4981 if (vm_page_wait(interruptible
)) {
4982 vm_object_lock(object
);
4983 vm_object_paging_begin(object
);
4988 case VM_FAULT_INTERRUPTED
:
4989 error_code
= MACH_SEND_INTERRUPTED
;
4990 case VM_FAULT_MEMORY_ERROR
:
4991 ret
= (error_code
? error_code
:
4993 vm_object_lock(object
);
4997 } while ((result
!= VM_FAULT_SUCCESS
)
4998 || (result
== VM_FAULT_INTERRUPTED
));
5001 if ( (cntrl_flags
& UPL_NEED_32BIT_ADDR
) &&
5002 dst_page
->phys_page
>= (max_valid_dma_address
>> PAGE_SHIFT
) ) {
5007 * support devices that can't DMA above 32 bits
5008 * by substituting pages from a pool of low address
5009 * memory for any pages we find above the 4G mark
5010 * can't substitute if the page is already wired because
5011 * we don't know whether that physical address has been
5012 * handed out to some other 64 bit capable DMA device to use
5014 if (dst_page
->wire_count
) {
5015 ret
= KERN_PROTECTION_FAILURE
;
5018 if (delayed_unlock
) {
5020 vm_page_unlock_queues();
5022 low_page
= vm_page_grablo();
5024 if (low_page
== VM_PAGE_NULL
) {
5025 ret
= KERN_RESOURCE_SHORTAGE
;
5029 * from here until the vm_page_replace completes
5030 * we musn't drop the object lock... we don't
5031 * want anyone refaulting this page in and using
5032 * it after we disconnect it... we want the fault
5033 * to find the new page being substituted.
5035 refmod
= pmap_disconnect(dst_page
->phys_page
);
5037 vm_page_copy(dst_page
, low_page
);
5039 low_page
->reference
= dst_page
->reference
;
5040 low_page
->dirty
= dst_page
->dirty
;
5042 if (refmod
& VM_MEM_REFERENCED
)
5043 low_page
->reference
= TRUE
;
5044 if (refmod
& VM_MEM_MODIFIED
)
5045 low_page
->dirty
= TRUE
;
5047 vm_page_lock_queues();
5048 vm_page_replace(low_page
, object
, dst_offset
);
5050 * keep the queue lock since we're going to
5051 * need it immediately
5055 dst_page
= low_page
;
5057 * vm_page_grablo returned the page marked
5058 * BUSY... we don't need a PAGE_WAKEUP_DONE
5059 * here, because we've never dropped the object lock
5061 dst_page
->busy
= FALSE
;
5063 if (delayed_unlock
== 0)
5064 vm_page_lock_queues();
5065 vm_page_wire(dst_page
);
5067 if (cntrl_flags
& UPL_BLOCK_ACCESS
) {
5069 * Mark the page "busy" to block any future page fault
5070 * on this page. We'll also remove the mapping
5071 * of all these pages before leaving this routine.
5073 assert(!dst_page
->fictitious
);
5074 dst_page
->busy
= TRUE
;
5078 if (cntrl_flags
& UPL_SET_LITE
) {
5080 pg_num
= (dst_offset
-offset
)/PAGE_SIZE
;
5081 lite_list
[pg_num
>>5] |= 1 << (pg_num
& 31);
5084 * Convert the fictitious page to a
5085 * private shadow of the real page.
5087 assert(alias_page
->fictitious
);
5088 alias_page
->fictitious
= FALSE
;
5089 alias_page
->private = TRUE
;
5090 alias_page
->pageout
= TRUE
;
5091 alias_page
->phys_page
= dst_page
->phys_page
;
5092 vm_page_wire(alias_page
);
5094 vm_page_insert(alias_page
,
5095 upl
->map_object
, size
- xfer_size
);
5096 assert(!alias_page
->wanted
);
5097 alias_page
->busy
= FALSE
;
5098 alias_page
->absent
= FALSE
;
5101 /* expect the page to be used */
5102 dst_page
->reference
= TRUE
;
5104 if (!(cntrl_flags
& UPL_COPYOUT_FROM
))
5105 dst_page
->dirty
= TRUE
;
5108 if (dst_page
->phys_page
> upl
->highest_page
)
5109 upl
->highest_page
= dst_page
->phys_page
;
5111 if (user_page_list
) {
5112 user_page_list
[entry
].phys_addr
5113 = dst_page
->phys_page
;
5114 user_page_list
[entry
].dirty
=
5116 user_page_list
[entry
].pageout
=
5118 user_page_list
[entry
].absent
=
5120 user_page_list
[entry
].precious
=
5124 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
5126 vm_page_unlock_queues();
5129 dst_offset
+= PAGE_SIZE_64
;
5130 xfer_size
-= PAGE_SIZE
;
5133 vm_page_unlock_queues();
5135 if (upl
->flags
& UPL_INTERNAL
) {
5136 if(page_list_count
!= NULL
)
5137 *page_list_count
= 0;
5138 } else if (*page_list_count
> entry
) {
5139 if(page_list_count
!= NULL
)
5140 *page_list_count
= entry
;
5143 if (alias_page
!= NULL
) {
5144 vm_page_lock_queues();
5145 vm_page_free(alias_page
);
5146 vm_page_unlock_queues();
5149 vm_object_unlock(object
);
5151 if (cntrl_flags
& UPL_BLOCK_ACCESS
) {
5153 * We've marked all the pages "busy" so that future
5154 * page faults will block.
5155 * Now remove the mapping for these pages, so that they
5156 * can't be accessed without causing a page fault.
5158 vm_object_pmap_protect(object
, offset
, (vm_object_size_t
)size
,
5159 PMAP_NULL
, 0, VM_PROT_NONE
);
5162 return KERN_SUCCESS
;
5167 vm_page_unlock_queues();
5169 for (; offset
< dst_offset
; offset
+= PAGE_SIZE
) {
5170 dst_page
= vm_page_lookup(object
, offset
);
5172 if (dst_page
== VM_PAGE_NULL
)
5173 panic("vm_object_iopl_request: Wired pages missing. \n");
5174 vm_page_lock_queues();
5175 vm_page_unwire(dst_page
);
5176 vm_page_unlock_queues();
5177 VM_STAT(reactivations
++);
5179 vm_object_paging_end(object
);
5180 vm_object_unlock(object
);
5192 kern_return_t retval
;
5193 boolean_t upls_locked
;
5194 vm_object_t object1
, object2
;
5196 if (upl1
== UPL_NULL
|| upl2
== UPL_NULL
|| upl1
== upl2
) {
5197 return KERN_INVALID_ARGUMENT
;
5200 upls_locked
= FALSE
;
5203 * Since we need to lock both UPLs at the same time,
5204 * avoid deadlocks by always taking locks in the same order.
5213 upls_locked
= TRUE
; /* the UPLs will need to be unlocked */
5215 object1
= upl1
->map_object
;
5216 object2
= upl2
->map_object
;
5218 if (upl1
->offset
!= 0 || upl2
->offset
!= 0 ||
5219 upl1
->size
!= upl2
->size
) {
5221 * We deal only with full objects, not subsets.
5222 * That's because we exchange the entire backing store info
5223 * for the objects: pager, resident pages, etc... We can't do
5226 retval
= KERN_INVALID_VALUE
;
5231 * Tranpose the VM objects' backing store.
5233 retval
= vm_object_transpose(object1
, object2
,
5234 (vm_object_size_t
) upl1
->size
);
5236 if (retval
== KERN_SUCCESS
) {
5238 * Make each UPL point to the correct VM object, i.e. the
5239 * object holding the pages that the UPL refers to...
5241 upl1
->map_object
= object2
;
5242 upl2
->map_object
= object1
;
5252 upls_locked
= FALSE
;
5261 * Rationale: the user might have some encrypted data on disk (via
5262 * FileVault or any other mechanism). That data is then decrypted in
5263 * memory, which is safe as long as the machine is secure. But that
5264 * decrypted data in memory could be paged out to disk by the default
5265 * pager. The data would then be stored on disk in clear (not encrypted)
5266 * and it could be accessed by anyone who gets physical access to the
5267 * disk (if the laptop or the disk gets stolen for example). This weakens
5268 * the security offered by FileVault.
5270 * Solution: the default pager will optionally request that all the
5271 * pages it gathers for pageout be encrypted, via the UPL interfaces,
5272 * before it sends this UPL to disk via the vnode_pageout() path.
5276 * To avoid disrupting the VM LRU algorithms, we want to keep the
5277 * clean-in-place mechanisms, which allow us to send some extra pages to
5278 * swap (clustering) without actually removing them from the user's
5279 * address space. We don't want the user to unknowingly access encrypted
5280 * data, so we have to actually remove the encrypted pages from the page
5281 * table. When the user accesses the data, the hardware will fail to
5282 * locate the virtual page in its page table and will trigger a page
5283 * fault. We can then decrypt the page and enter it in the page table
5284 * again. Whenever we allow the user to access the contents of a page,
5285 * we have to make sure it's not encrypted.
5291 * Reserve of virtual addresses in the kernel address space.
5292 * We need to map the physical pages in the kernel, so that we
5293 * can call the encryption/decryption routines with a kernel
5294 * virtual address. We keep this pool of pre-allocated kernel
5295 * virtual addresses so that we don't have to scan the kernel's
5296 * virtaul address space each time we need to encrypt or decrypt
5298 * It would be nice to be able to encrypt and decrypt in physical
5299 * mode but that might not always be more efficient...
5301 decl_simple_lock_data(,vm_paging_lock
)
5302 #define VM_PAGING_NUM_PAGES 64
5303 vm_map_offset_t vm_paging_base_address
= 0;
5304 boolean_t vm_paging_page_inuse
[VM_PAGING_NUM_PAGES
] = { FALSE
, };
5305 int vm_paging_max_index
= 0;
5306 unsigned long vm_paging_no_kernel_page
= 0;
5307 unsigned long vm_paging_objects_mapped
= 0;
5308 unsigned long vm_paging_pages_mapped
= 0;
5309 unsigned long vm_paging_objects_mapped_slow
= 0;
5310 unsigned long vm_paging_pages_mapped_slow
= 0;
5314 * vm_paging_map_object:
5315 * Maps part of a VM object's pages in the kernel
5316 * virtual address space, using the pre-allocated
5317 * kernel virtual addresses, if possible.
5319 * The VM object is locked. This lock will get
5320 * dropped and re-acquired though.
5323 vm_paging_map_object(
5324 vm_map_offset_t
*address
,
5327 vm_object_offset_t offset
,
5328 vm_map_size_t
*size
)
5331 vm_map_offset_t page_map_offset
;
5332 vm_map_size_t map_size
;
5333 vm_object_offset_t object_offset
;
5335 vm_map_entry_t map_entry
;
5338 if (page
!= VM_PAGE_NULL
&& *size
== PAGE_SIZE
) {
5340 * Use one of the pre-allocated kernel virtual addresses
5341 * and just enter the VM page in the kernel address space
5342 * at that virtual address.
5344 vm_object_unlock(object
);
5345 simple_lock(&vm_paging_lock
);
5347 if (vm_paging_base_address
== 0) {
5349 * Initialize our pool of pre-allocated kernel
5350 * virtual addresses.
5352 simple_unlock(&vm_paging_lock
);
5353 page_map_offset
= 0;
5354 kr
= vm_map_find_space(kernel_map
,
5356 VM_PAGING_NUM_PAGES
* PAGE_SIZE
,
5360 if (kr
!= KERN_SUCCESS
) {
5361 panic("vm_paging_map_object: "
5362 "kernel_map full\n");
5364 map_entry
->object
.vm_object
= kernel_object
;
5366 page_map_offset
- VM_MIN_KERNEL_ADDRESS
;
5367 vm_object_reference(kernel_object
);
5368 vm_map_unlock(kernel_map
);
5370 simple_lock(&vm_paging_lock
);
5371 if (vm_paging_base_address
!= 0) {
5372 /* someone raced us and won: undo */
5373 simple_unlock(&vm_paging_lock
);
5374 kr
= vm_map_remove(kernel_map
,
5377 (VM_PAGING_NUM_PAGES
5380 assert(kr
== KERN_SUCCESS
);
5381 simple_lock(&vm_paging_lock
);
5383 vm_paging_base_address
= page_map_offset
;
5388 * Try and find an available kernel virtual address
5389 * from our pre-allocated pool.
5391 page_map_offset
= 0;
5392 for (i
= 0; i
< VM_PAGING_NUM_PAGES
; i
++) {
5393 if (vm_paging_page_inuse
[i
] == FALSE
) {
5394 page_map_offset
= vm_paging_base_address
+
5400 if (page_map_offset
!= 0) {
5402 * We found a kernel virtual address;
5403 * map the physical page to that virtual address.
5405 if (i
> vm_paging_max_index
) {
5406 vm_paging_max_index
= i
;
5408 vm_paging_page_inuse
[i
] = TRUE
;
5409 simple_unlock(&vm_paging_lock
);
5410 if (page
->no_isync
== TRUE
) {
5411 pmap_sync_page_data_phys(page
->phys_page
);
5413 assert(pmap_verify_free(page
->phys_page
));
5414 PMAP_ENTER(kernel_pmap
,
5418 ((int) page
->object
->wimg_bits
&
5421 vm_paging_objects_mapped
++;
5422 vm_paging_pages_mapped
++;
5423 *address
= page_map_offset
;
5424 vm_object_lock(object
);
5426 /* all done and mapped, ready to use ! */
5427 return KERN_SUCCESS
;
5431 * We ran out of pre-allocated kernel virtual
5432 * addresses. Just map the page in the kernel
5433 * the slow and regular way.
5435 vm_paging_no_kernel_page
++;
5436 simple_unlock(&vm_paging_lock
);
5437 vm_object_lock(object
);
5440 object_offset
= vm_object_trunc_page(offset
);
5441 map_size
= vm_map_round_page(*size
);
5444 * Try and map the required range of the object
5448 /* don't go beyond the object's end... */
5449 if (object_offset
>= object
->size
) {
5451 } else if (map_size
> object
->size
- offset
) {
5452 map_size
= object
->size
- offset
;
5455 vm_object_reference_locked(object
); /* for the map entry */
5456 vm_object_unlock(object
);
5458 kr
= vm_map_enter(kernel_map
,
5469 if (kr
!= KERN_SUCCESS
) {
5472 vm_object_deallocate(object
); /* for the map entry */
5479 * Enter the mapped pages in the page table now.
5481 vm_object_lock(object
);
5482 for (page_map_offset
= 0;
5484 map_size
-= PAGE_SIZE_64
, page_map_offset
+= PAGE_SIZE_64
) {
5485 unsigned int cache_attr
;
5487 page
= vm_page_lookup(object
, offset
+ page_map_offset
);
5488 if (page
== VM_PAGE_NULL
) {
5489 panic("vm_paging_map_object: no page !?");
5491 if (page
->no_isync
== TRUE
) {
5492 pmap_sync_page_data_phys(page
->phys_page
);
5494 cache_attr
= ((unsigned int) object
->wimg_bits
) & VM_WIMG_MASK
;
5496 assert(pmap_verify_free(page
->phys_page
));
5497 PMAP_ENTER(kernel_pmap
,
5498 *address
+ page_map_offset
,
5505 vm_paging_objects_mapped_slow
++;
5506 vm_paging_pages_mapped_slow
+= map_size
/ PAGE_SIZE_64
;
5508 return KERN_SUCCESS
;
5513 * vm_paging_unmap_object:
5514 * Unmaps part of a VM object's pages from the kernel
5515 * virtual address space.
5517 * The VM object is locked. This lock will get
5518 * dropped and re-acquired though.
5521 vm_paging_unmap_object(
5523 vm_map_offset_t start
,
5524 vm_map_offset_t end
)
5529 if ((vm_paging_base_address
== 0) ||
5530 (start
< vm_paging_base_address
) ||
5531 (end
> (vm_paging_base_address
5532 + (VM_PAGING_NUM_PAGES
* PAGE_SIZE
)))) {
5534 * We didn't use our pre-allocated pool of
5535 * kernel virtual address. Deallocate the
5538 if (object
!= VM_OBJECT_NULL
) {
5539 vm_object_unlock(object
);
5541 kr
= vm_map_remove(kernel_map
, start
, end
, VM_MAP_NO_FLAGS
);
5542 if (object
!= VM_OBJECT_NULL
) {
5543 vm_object_lock(object
);
5545 assert(kr
== KERN_SUCCESS
);
5548 * We used a kernel virtual address from our
5549 * pre-allocated pool. Put it back in the pool
5552 assert(end
- start
== PAGE_SIZE
);
5553 i
= (start
- vm_paging_base_address
) >> PAGE_SHIFT
;
5555 /* undo the pmap mapping */
5556 pmap_remove(kernel_pmap
, start
, end
);
5558 simple_lock(&vm_paging_lock
);
5559 vm_paging_page_inuse
[i
] = FALSE
;
5560 simple_unlock(&vm_paging_lock
);
5566 * "iv" is the "initial vector". Ideally, we want to
5567 * have a different one for each page we encrypt, so that
5568 * crackers can't find encryption patterns too easily.
5570 #define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
5571 boolean_t swap_crypt_ctx_initialized
= FALSE
;
5572 aes_32t swap_crypt_key
[8]; /* big enough for a 256 key */
5573 aes_ctx swap_crypt_ctx
;
5574 const unsigned char swap_crypt_null_iv
[AES_BLOCK_SIZE
] = {0xa, };
5577 boolean_t swap_crypt_ctx_tested
= FALSE
;
5578 unsigned char swap_crypt_test_page_ref
[4096] __attribute__((aligned(4096)));
5579 unsigned char swap_crypt_test_page_encrypt
[4096] __attribute__((aligned(4096)));
5580 unsigned char swap_crypt_test_page_decrypt
[4096] __attribute__((aligned(4096)));
5583 extern u_long
random(void);
5586 * Initialize the encryption context: key and key size.
5588 void swap_crypt_ctx_initialize(void); /* forward */
5590 swap_crypt_ctx_initialize(void)
5595 * No need for locking to protect swap_crypt_ctx_initialized
5596 * because the first use of encryption will come from the
5597 * pageout thread (we won't pagein before there's been a pageout)
5598 * and there's only one pageout thread.
5600 if (swap_crypt_ctx_initialized
== FALSE
) {
5602 i
< (sizeof (swap_crypt_key
) /
5603 sizeof (swap_crypt_key
[0]));
5605 swap_crypt_key
[i
] = random();
5607 aes_encrypt_key((const unsigned char *) swap_crypt_key
,
5608 SWAP_CRYPT_AES_KEY_SIZE
,
5609 &swap_crypt_ctx
.encrypt
);
5610 aes_decrypt_key((const unsigned char *) swap_crypt_key
,
5611 SWAP_CRYPT_AES_KEY_SIZE
,
5612 &swap_crypt_ctx
.decrypt
);
5613 swap_crypt_ctx_initialized
= TRUE
;
5618 * Validate the encryption algorithms.
5620 if (swap_crypt_ctx_tested
== FALSE
) {
5622 for (i
= 0; i
< 4096; i
++) {
5623 swap_crypt_test_page_ref
[i
] = (char) i
;
5626 aes_encrypt_cbc(swap_crypt_test_page_ref
,
5628 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5629 swap_crypt_test_page_encrypt
,
5630 &swap_crypt_ctx
.encrypt
);
5632 aes_decrypt_cbc(swap_crypt_test_page_encrypt
,
5634 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5635 swap_crypt_test_page_decrypt
,
5636 &swap_crypt_ctx
.decrypt
);
5637 /* compare result with original */
5638 for (i
= 0; i
< 4096; i
++) {
5639 if (swap_crypt_test_page_decrypt
[i
] !=
5640 swap_crypt_test_page_ref
[i
]) {
5641 panic("encryption test failed");
5646 aes_encrypt_cbc(swap_crypt_test_page_decrypt
,
5648 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5649 swap_crypt_test_page_decrypt
,
5650 &swap_crypt_ctx
.encrypt
);
5651 /* decrypt in place */
5652 aes_decrypt_cbc(swap_crypt_test_page_decrypt
,
5654 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5655 swap_crypt_test_page_decrypt
,
5656 &swap_crypt_ctx
.decrypt
);
5657 for (i
= 0; i
< 4096; i
++) {
5658 if (swap_crypt_test_page_decrypt
[i
] !=
5659 swap_crypt_test_page_ref
[i
]) {
5660 panic("in place encryption test failed");
5664 swap_crypt_ctx_tested
= TRUE
;
5672 * Encrypt the given page, for secure paging.
5673 * The page might already be mapped at kernel virtual
5674 * address "kernel_mapping_offset". Otherwise, we need
5678 * The page's object is locked, but this lock will be released
5680 * The page is busy and not accessible by users (not entered in any pmap).
5685 vm_map_offset_t kernel_mapping_offset
)
5687 int clear_refmod
= 0;
5689 boolean_t page_was_referenced
;
5690 boolean_t page_was_modified
;
5691 vm_map_size_t kernel_mapping_size
;
5692 vm_offset_t kernel_vaddr
;
5694 unsigned char aes_iv
[AES_BLOCK_SIZE
];
5696 memory_object_t pager_object
;
5697 vm_object_offset_t paging_offset
;
5701 if (! vm_pages_encrypted
) {
5702 vm_pages_encrypted
= TRUE
;
5706 assert(page
->dirty
|| page
->precious
);
5708 if (page
->encrypted
) {
5710 * Already encrypted: no need to do it again.
5712 vm_page_encrypt_already_encrypted_counter
++;
5715 ASSERT_PAGE_DECRYPTED(page
);
5718 * Gather the "reference" and "modified" status of the page.
5719 * We'll restore these values after the encryption, so that
5720 * the encryption is transparent to the rest of the system
5721 * and doesn't impact the VM's LRU logic.
5723 page_was_referenced
=
5724 (page
->reference
|| pmap_is_referenced(page
->phys_page
));
5726 (page
->dirty
|| pmap_is_modified(page
->phys_page
));
5728 if (kernel_mapping_offset
== 0) {
5730 * The page hasn't already been mapped in kernel space
5731 * by the caller. Map it now, so that we can access
5732 * its contents and encrypt them.
5734 kernel_mapping_size
= PAGE_SIZE
;
5735 kr
= vm_paging_map_object(&kernel_mapping_offset
,
5739 &kernel_mapping_size
);
5740 if (kr
!= KERN_SUCCESS
) {
5741 panic("vm_page_encrypt: "
5742 "could not map page in kernel: 0x%x\n",
5746 kernel_mapping_size
= 0;
5748 kernel_vaddr
= CAST_DOWN(vm_offset_t
, kernel_mapping_offset
);
5750 if (swap_crypt_ctx_initialized
== FALSE
) {
5751 swap_crypt_ctx_initialize();
5753 assert(swap_crypt_ctx_initialized
);
5756 * Prepare an "initial vector" for the encryption.
5757 * We use the "pager" and the "paging_offset" for that
5758 * page to obfuscate the encrypted data a bit more and
5759 * prevent crackers from finding patterns that they could
5760 * use to break the key.
5762 bzero(&encrypt_iv
.aes_iv
[0], sizeof (encrypt_iv
.aes_iv
));
5763 encrypt_iv
.vm
.pager_object
= page
->object
->pager
;
5764 encrypt_iv
.vm
.paging_offset
=
5765 page
->object
->paging_offset
+ page
->offset
;
5767 vm_object_unlock(page
->object
);
5769 /* encrypt the "initial vector" */
5770 aes_encrypt_cbc((const unsigned char *) &encrypt_iv
.aes_iv
[0],
5773 &encrypt_iv
.aes_iv
[0],
5774 &swap_crypt_ctx
.encrypt
);
5779 aes_encrypt_cbc((const unsigned char *) kernel_vaddr
,
5780 &encrypt_iv
.aes_iv
[0],
5781 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5782 (unsigned char *) kernel_vaddr
,
5783 &swap_crypt_ctx
.encrypt
);
5785 vm_page_encrypt_counter
++;
5787 vm_object_lock(page
->object
);
5790 * Unmap the page from the kernel's address space,
5791 * if we had to map it ourselves. Otherwise, let
5792 * the caller undo the mapping if needed.
5794 if (kernel_mapping_size
!= 0) {
5795 vm_paging_unmap_object(page
->object
,
5796 kernel_mapping_offset
,
5797 kernel_mapping_offset
+ kernel_mapping_size
);
5801 * Restore the "reference" and "modified" bits.
5802 * This should clean up any impact the encryption had
5805 if (! page_was_referenced
) {
5806 clear_refmod
|= VM_MEM_REFERENCED
;
5807 page
->reference
= FALSE
;
5809 if (! page_was_modified
) {
5810 clear_refmod
|= VM_MEM_MODIFIED
;
5811 page
->dirty
= FALSE
;
5814 pmap_clear_refmod(page
->phys_page
, clear_refmod
);
5816 page
->encrypted
= TRUE
;
5822 * Decrypt the given page.
5823 * The page might already be mapped at kernel virtual
5824 * address "kernel_mapping_offset". Otherwise, we need
5828 * The page's VM object is locked but will be unlocked and relocked.
5829 * The page is busy and not accessible by users (not entered in any pmap).
5834 vm_map_offset_t kernel_mapping_offset
)
5836 int clear_refmod
= 0;
5838 vm_map_size_t kernel_mapping_size
;
5839 vm_offset_t kernel_vaddr
;
5840 boolean_t page_was_referenced
;
5842 unsigned char aes_iv
[AES_BLOCK_SIZE
];
5844 memory_object_t pager_object
;
5845 vm_object_offset_t paging_offset
;
5850 assert(page
->encrypted
);
5853 * Gather the "reference" status of the page.
5854 * We'll restore its value after the decryption, so that
5855 * the decryption is transparent to the rest of the system
5856 * and doesn't impact the VM's LRU logic.
5858 page_was_referenced
=
5859 (page
->reference
|| pmap_is_referenced(page
->phys_page
));
5861 if (kernel_mapping_offset
== 0) {
5863 * The page hasn't already been mapped in kernel space
5864 * by the caller. Map it now, so that we can access
5865 * its contents and decrypt them.
5867 kernel_mapping_size
= PAGE_SIZE
;
5868 kr
= vm_paging_map_object(&kernel_mapping_offset
,
5872 &kernel_mapping_size
);
5873 if (kr
!= KERN_SUCCESS
) {
5874 panic("vm_page_decrypt: "
5875 "could not map page in kernel: 0x%x\n");
5878 kernel_mapping_size
= 0;
5880 kernel_vaddr
= CAST_DOWN(vm_offset_t
, kernel_mapping_offset
);
5882 assert(swap_crypt_ctx_initialized
);
5885 * Prepare an "initial vector" for the decryption.
5886 * It has to be the same as the "initial vector" we
5887 * used to encrypt that page.
5889 bzero(&decrypt_iv
.aes_iv
[0], sizeof (decrypt_iv
.aes_iv
));
5890 decrypt_iv
.vm
.pager_object
= page
->object
->pager
;
5891 decrypt_iv
.vm
.paging_offset
=
5892 page
->object
->paging_offset
+ page
->offset
;
5894 vm_object_unlock(page
->object
);
5896 /* encrypt the "initial vector" */
5897 aes_encrypt_cbc((const unsigned char *) &decrypt_iv
.aes_iv
[0],
5900 &decrypt_iv
.aes_iv
[0],
5901 &swap_crypt_ctx
.encrypt
);
5906 aes_decrypt_cbc((const unsigned char *) kernel_vaddr
,
5907 &decrypt_iv
.aes_iv
[0],
5908 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5909 (unsigned char *) kernel_vaddr
,
5910 &swap_crypt_ctx
.decrypt
);
5911 vm_page_decrypt_counter
++;
5913 vm_object_lock(page
->object
);
5916 * Unmap the page from the kernel's address space,
5917 * if we had to map it ourselves. Otherwise, let
5918 * the caller undo the mapping if needed.
5920 if (kernel_mapping_size
!= 0) {
5921 vm_paging_unmap_object(page
->object
,
5923 kernel_vaddr
+ PAGE_SIZE
);
5927 * After decryption, the page is actually clean.
5928 * It was encrypted as part of paging, which "cleans"
5929 * the "dirty" pages.
5930 * Noone could access it after it was encrypted
5931 * and the decryption doesn't count.
5933 page
->dirty
= FALSE
;
5934 clear_refmod
= VM_MEM_MODIFIED
;
5936 /* restore the "reference" bit */
5937 if (! page_was_referenced
) {
5938 page
->reference
= FALSE
;
5939 clear_refmod
|= VM_MEM_REFERENCED
;
5941 pmap_clear_refmod(page
->phys_page
, clear_refmod
);
5943 page
->encrypted
= FALSE
;
5946 * We've just modified the page's contents via the data cache and part
5947 * of the new contents might still be in the cache and not yet in RAM.
5948 * Since the page is now available and might get gathered in a UPL to
5949 * be part of a DMA transfer from a driver that expects the memory to
5950 * be coherent at this point, we have to flush the data cache.
5952 pmap_sync_page_attributes_phys(page
->phys_page
);
5954 * Since the page is not mapped yet, some code might assume that it
5955 * doesn't need to invalidate the instruction cache when writing to
5956 * that page. That code relies on "no_isync" being set, so that the
5957 * caches get syncrhonized when the page is first mapped. So we need
5958 * to set "no_isync" here too, despite the fact that we just
5959 * synchronized the caches above...
5961 page
->no_isync
= TRUE
;
5964 unsigned long upl_encrypt_upls
= 0;
5965 unsigned long upl_encrypt_pages
= 0;
5971 * Encrypts all the pages in the UPL, within the specified range.
5977 upl_offset_t crypt_offset
,
5978 upl_size_t crypt_size
)
5980 upl_size_t upl_size
;
5981 upl_offset_t upl_offset
;
5982 vm_object_t upl_object
;
5984 vm_object_t shadow_object
;
5985 vm_object_offset_t shadow_offset
;
5986 vm_object_offset_t paging_offset
;
5987 vm_object_offset_t base_offset
;
5990 upl_encrypt_pages
+= crypt_size
/ PAGE_SIZE
;
5994 upl_object
= upl
->map_object
;
5995 upl_offset
= upl
->offset
;
5996 upl_size
= upl
->size
;
6000 vm_object_lock(upl_object
);
6003 * Find the VM object that contains the actual pages.
6005 if (upl_object
->pageout
) {
6006 shadow_object
= upl_object
->shadow
;
6008 * The offset in the shadow object is actually also
6009 * accounted for in upl->offset. It possibly shouldn't be
6010 * this way, but for now don't account for it twice.
6013 assert(upl_object
->paging_offset
== 0); /* XXX ? */
6014 vm_object_lock(shadow_object
);
6016 shadow_object
= upl_object
;
6020 paging_offset
= shadow_object
->paging_offset
;
6021 vm_object_paging_begin(shadow_object
);
6023 if (shadow_object
!= upl_object
) {
6024 vm_object_unlock(shadow_object
);
6026 vm_object_unlock(upl_object
);
6028 base_offset
= shadow_offset
;
6029 base_offset
+= upl_offset
;
6030 base_offset
+= crypt_offset
;
6031 base_offset
-= paging_offset
;
6033 * Unmap the pages, so that nobody can continue accessing them while
6034 * they're encrypted. After that point, all accesses to these pages
6035 * will cause a page fault and block while the page is being encrypted
6036 * (busy). After the encryption completes, any access will cause a
6037 * page fault and the page gets decrypted at that time.
6039 assert(crypt_offset
+ crypt_size
<= upl_size
);
6040 vm_object_pmap_protect(shadow_object
,
6042 (vm_object_size_t
)crypt_size
,
6047 /* XXX FBDP could the object have changed significantly here ? */
6048 vm_object_lock(shadow_object
);
6050 for (upl_offset
= 0;
6051 upl_offset
< crypt_size
;
6052 upl_offset
+= PAGE_SIZE
) {
6053 page
= vm_page_lookup(shadow_object
,
6054 base_offset
+ upl_offset
);
6055 if (page
== VM_PAGE_NULL
) {
6056 panic("upl_encrypt: "
6057 "no page for (obj=%p,off=%lld+%d)!\n",
6062 vm_page_encrypt(page
, 0);
6065 vm_object_paging_end(shadow_object
);
6066 vm_object_unlock(shadow_object
);
6070 upl_get_internal_pagelist_offset(void)
6072 return sizeof(struct upl
);
6081 upl
->flags
|= UPL_CLEAR_DIRTY
;
6083 upl
->flags
&= ~UPL_CLEAR_DIRTY
;
6090 boolean_t
upl_page_present(upl_page_info_t
*upl
, int index
)
6092 return(UPL_PAGE_PRESENT(upl
, index
));
6094 boolean_t
upl_dirty_page(upl_page_info_t
*upl
, int index
)
6096 return(UPL_DIRTY_PAGE(upl
, index
));
6098 boolean_t
upl_valid_page(upl_page_info_t
*upl
, int index
)
6100 return(UPL_VALID_PAGE(upl
, index
));
6102 ppnum_t
upl_phys_page(upl_page_info_t
*upl
, int index
)
6104 return(UPL_PHYS_PAGE(upl
, index
));
6108 vm_countdirtypages(void)
6120 vm_page_lock_queues();
6121 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
6123 if (m
==(vm_page_t
)0) break;
6125 if(m
->dirty
) dpages
++;
6126 if(m
->pageout
) pgopages
++;
6127 if(m
->precious
) precpages
++;
6129 assert(m
->object
!= kernel_object
);
6130 m
= (vm_page_t
) queue_next(&m
->pageq
);
6131 if (m
==(vm_page_t
)0) break;
6133 } while (!queue_end(&vm_page_queue_inactive
,(queue_entry_t
) m
));
6134 vm_page_unlock_queues();
6136 vm_page_lock_queues();
6137 m
= (vm_page_t
) queue_first(&vm_page_queue_zf
);
6139 if (m
==(vm_page_t
)0) break;
6141 if(m
->dirty
) dpages
++;
6142 if(m
->pageout
) pgopages
++;
6143 if(m
->precious
) precpages
++;
6145 assert(m
->object
!= kernel_object
);
6146 m
= (vm_page_t
) queue_next(&m
->pageq
);
6147 if (m
==(vm_page_t
)0) break;
6149 } while (!queue_end(&vm_page_queue_zf
,(queue_entry_t
) m
));
6150 vm_page_unlock_queues();
6152 printf("IN Q: %d : %d : %d\n", dpages
, pgopages
, precpages
);
6158 vm_page_lock_queues();
6159 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
6162 if(m
== (vm_page_t
)0) break;
6163 if(m
->dirty
) dpages
++;
6164 if(m
->pageout
) pgopages
++;
6165 if(m
->precious
) precpages
++;
6167 assert(m
->object
!= kernel_object
);
6168 m
= (vm_page_t
) queue_next(&m
->pageq
);
6169 if(m
== (vm_page_t
)0) break;
6171 } while (!queue_end(&vm_page_queue_active
,(queue_entry_t
) m
));
6172 vm_page_unlock_queues();
6174 printf("AC Q: %d : %d : %d\n", dpages
, pgopages
, precpages
);
6177 #endif /* MACH_BSD */
6179 ppnum_t
upl_get_highest_page(
6182 return upl
->highest_page
;
6186 kern_return_t
upl_ubc_alias_set(upl_t upl
, unsigned int alias1
, unsigned int alias2
)
6188 upl
->ubc_alias1
= alias1
;
6189 upl
->ubc_alias2
= alias2
;
6190 return KERN_SUCCESS
;
6192 int upl_ubc_alias_get(upl_t upl
, unsigned int * al
, unsigned int * al2
)
6195 *al
= upl
->ubc_alias1
;
6197 *al2
= upl
->ubc_alias2
;
6198 return KERN_SUCCESS
;
6200 #endif /* UPL_DEBUG */
6205 #include <ddb/db_output.h>
6206 #include <ddb/db_print.h>
6207 #include <vm/vm_print.h>
6209 #define printf kdbprintf
6210 void db_pageout(void);
6216 iprintf("VM Statistics:\n");
6218 iprintf("pages:\n");
6220 iprintf("activ %5d inact %5d free %5d",
6221 vm_page_active_count
, vm_page_inactive_count
,
6222 vm_page_free_count
);
6223 printf(" wire %5d gobbl %5d\n",
6224 vm_page_wire_count
, vm_page_gobble_count
);
6226 iprintf("target:\n");
6228 iprintf("min %5d inact %5d free %5d",
6229 vm_page_free_min
, vm_page_inactive_target
,
6230 vm_page_free_target
);
6231 printf(" resrv %5d\n", vm_page_free_reserved
);
6233 iprintf("pause:\n");
6239 extern int c_laundry_pages_freed
;
6240 #endif /* MACH_COUNTERS */
6245 iprintf("Pageout Statistics:\n");
6247 iprintf("active %5d inactv %5d\n",
6248 vm_pageout_active
, vm_pageout_inactive
);
6249 iprintf("nolock %5d avoid %5d busy %5d absent %5d\n",
6250 vm_pageout_inactive_nolock
, vm_pageout_inactive_avoid
,
6251 vm_pageout_inactive_busy
, vm_pageout_inactive_absent
);
6252 iprintf("used %5d clean %5d dirty %5d\n",
6253 vm_pageout_inactive_used
, vm_pageout_inactive_clean
,
6254 vm_pageout_inactive_dirty
);
6256 iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed
);
6257 #endif /* MACH_COUNTERS */
6258 #if MACH_CLUSTER_STATS
6259 iprintf("Cluster Statistics:\n");
6261 iprintf("dirtied %5d cleaned %5d collisions %5d\n",
6262 vm_pageout_cluster_dirtied
, vm_pageout_cluster_cleaned
,
6263 vm_pageout_cluster_collisions
);
6264 iprintf("clusters %5d conversions %5d\n",
6265 vm_pageout_cluster_clusters
, vm_pageout_cluster_conversions
);
6267 iprintf("Target Statistics:\n");
6269 iprintf("collisions %5d page_dirtied %5d page_freed %5d\n",
6270 vm_pageout_target_collisions
, vm_pageout_target_page_dirtied
,
6271 vm_pageout_target_page_freed
);
6273 #endif /* MACH_CLUSTER_STATS */
6277 #endif /* MACH_KDB */