2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
34 * Mach Operating System
35 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
36 * All Rights Reserved.
38 * Permission to use, copy, modify and distribute this software and its
39 * documentation is hereby granted, provided that both the copyright
40 * notice and this permission notice appear in all copies of the
41 * software, derivative works or modified versions, and any portions
42 * thereof, and that both notices appear in supporting documentation.
44 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
45 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
46 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
48 * Carnegie Mellon requests users of this software to return to
50 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
51 * School of Computer Science
52 * Carnegie Mellon University
53 * Pittsburgh PA 15213-3890
55 * any improvements or extensions that they make and grant Carnegie Mellon
56 * the rights to redistribute these changes.
61 * File: vm/vm_pageout.c
62 * Author: Avadis Tevanian, Jr., Michael Wayne Young
65 * The proverbial page-out daemon.
71 #include <mach_pagemap.h>
72 #include <mach_cluster_stats.h>
74 #include <advisory_pageout.h>
76 #include <mach/mach_types.h>
77 #include <mach/memory_object.h>
78 #include <mach/memory_object_default.h>
79 #include <mach/memory_object_control_server.h>
80 #include <mach/mach_host_server.h>
82 #include <mach/vm_map.h>
83 #include <mach/vm_param.h>
84 #include <mach/vm_statistics.h>
86 #include <kern/kern_types.h>
87 #include <kern/counters.h>
88 #include <kern/host_statistics.h>
89 #include <kern/machine.h>
90 #include <kern/misc_protos.h>
91 #include <kern/thread.h>
93 #include <kern/kalloc.h>
95 #include <machine/vm_tuning.h>
98 #include <vm/vm_fault.h>
99 #include <vm/vm_map.h>
100 #include <vm/vm_object.h>
101 #include <vm/vm_page.h>
102 #include <vm/vm_pageout.h>
103 #include <vm/vm_protos.h> /* must be last */
109 #include <ppc/mappings.h>
111 #include <../bsd/crypto/aes/aes.h>
113 extern ipc_port_t memory_manager_default
;
116 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE
117 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 10000 /* maximum iterations of the active queue to move pages to inactive */
120 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE
121 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096 /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
124 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
125 #define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
128 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
129 #define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
132 #ifndef VM_PAGE_LAUNDRY_MAX
133 #define VM_PAGE_LAUNDRY_MAX 16UL /* maximum pageouts on a given pageout queue */
134 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
136 #ifndef VM_PAGEOUT_BURST_WAIT
137 #define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
138 #endif /* VM_PAGEOUT_BURST_WAIT */
140 #ifndef VM_PAGEOUT_EMPTY_WAIT
141 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
142 #endif /* VM_PAGEOUT_EMPTY_WAIT */
144 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
145 #define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
146 #endif /* VM_PAGEOUT_DEADLOCK_WAIT */
148 #ifndef VM_PAGEOUT_IDLE_WAIT
149 #define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
150 #endif /* VM_PAGEOUT_IDLE_WAIT */
154 * To obtain a reasonable LRU approximation, the inactive queue
155 * needs to be large enough to give pages on it a chance to be
156 * referenced a second time. This macro defines the fraction
157 * of active+inactive pages that should be inactive.
158 * The pageout daemon uses it to update vm_page_inactive_target.
160 * If vm_page_free_count falls below vm_page_free_target and
161 * vm_page_inactive_count is below vm_page_inactive_target,
162 * then the pageout daemon starts running.
165 #ifndef VM_PAGE_INACTIVE_TARGET
166 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3)
167 #endif /* VM_PAGE_INACTIVE_TARGET */
170 * Once the pageout daemon starts running, it keeps going
171 * until vm_page_free_count meets or exceeds vm_page_free_target.
174 #ifndef VM_PAGE_FREE_TARGET
175 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
176 #endif /* VM_PAGE_FREE_TARGET */
179 * The pageout daemon always starts running once vm_page_free_count
180 * falls below vm_page_free_min.
183 #ifndef VM_PAGE_FREE_MIN
184 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
185 #endif /* VM_PAGE_FREE_MIN */
188 * When vm_page_free_count falls below vm_page_free_reserved,
189 * only vm-privileged threads can allocate pages. vm-privilege
190 * allows the pageout daemon and default pager (and any other
191 * associated threads needed for default pageout) to continue
192 * operation by dipping into the reserved pool of pages.
195 #ifndef VM_PAGE_FREE_RESERVED
196 #define VM_PAGE_FREE_RESERVED(n) \
197 ((6 * VM_PAGE_LAUNDRY_MAX) + (n))
198 #endif /* VM_PAGE_FREE_RESERVED */
202 * must hold the page queues lock to
203 * manipulate this structure
205 struct vm_pageout_queue
{
206 queue_head_t pgo_pending
; /* laundry pages to be processed by pager's iothread */
207 unsigned int pgo_laundry
; /* current count of laundry pages on queue or in flight */
208 unsigned int pgo_maxlaundry
;
210 unsigned int pgo_idle
:1, /* iothread is blocked waiting for work to do */
211 pgo_busy
:1, /* iothread is currently processing request from pgo_pending */
212 pgo_throttled
:1,/* vm_pageout_scan thread needs a wakeup when pgo_laundry drops */
216 #define VM_PAGE_Q_THROTTLED(q) \
217 ((q)->pgo_laundry >= (q)->pgo_maxlaundry)
221 * Exported variable used to broadcast the activation of the pageout scan
222 * Working Set uses this to throttle its use of pmap removes. In this
223 * way, code which runs within memory in an uncontested context does
224 * not keep encountering soft faults.
227 unsigned int vm_pageout_scan_event_counter
= 0;
230 * Forward declarations for internal routines.
233 static void vm_pageout_garbage_collect(int);
234 static void vm_pageout_iothread_continue(struct vm_pageout_queue
*);
235 static void vm_pageout_iothread_external(void);
236 static void vm_pageout_iothread_internal(void);
237 static void vm_pageout_queue_steal(vm_page_t
);
239 extern void vm_pageout_continue(void);
240 extern void vm_pageout_scan(void);
242 unsigned int vm_pageout_reserved_internal
= 0;
243 unsigned int vm_pageout_reserved_really
= 0;
245 unsigned int vm_pageout_idle_wait
= 0; /* milliseconds */
246 unsigned int vm_pageout_empty_wait
= 0; /* milliseconds */
247 unsigned int vm_pageout_burst_wait
= 0; /* milliseconds */
248 unsigned int vm_pageout_deadlock_wait
= 0; /* milliseconds */
249 unsigned int vm_pageout_deadlock_relief
= 0;
250 unsigned int vm_pageout_inactive_relief
= 0;
251 unsigned int vm_pageout_burst_active_throttle
= 0;
252 unsigned int vm_pageout_burst_inactive_throttle
= 0;
255 * Protection against zero fill flushing live working sets derived
256 * from existing backing store and files
258 unsigned int vm_accellerate_zf_pageout_trigger
= 400;
259 unsigned int vm_zf_iterator
;
260 unsigned int vm_zf_iterator_count
= 40;
261 unsigned int last_page_zf
;
262 unsigned int vm_zf_count
= 0;
265 * These variables record the pageout daemon's actions:
266 * how many pages it looks at and what happens to those pages.
267 * No locking needed because only one thread modifies the variables.
270 unsigned int vm_pageout_active
= 0; /* debugging */
271 unsigned int vm_pageout_inactive
= 0; /* debugging */
272 unsigned int vm_pageout_inactive_throttled
= 0; /* debugging */
273 unsigned int vm_pageout_inactive_forced
= 0; /* debugging */
274 unsigned int vm_pageout_inactive_nolock
= 0; /* debugging */
275 unsigned int vm_pageout_inactive_avoid
= 0; /* debugging */
276 unsigned int vm_pageout_inactive_busy
= 0; /* debugging */
277 unsigned int vm_pageout_inactive_absent
= 0; /* debugging */
278 unsigned int vm_pageout_inactive_used
= 0; /* debugging */
279 unsigned int vm_pageout_inactive_clean
= 0; /* debugging */
280 unsigned int vm_pageout_inactive_dirty
= 0; /* debugging */
281 unsigned int vm_pageout_dirty_no_pager
= 0; /* debugging */
282 unsigned int vm_pageout_purged_objects
= 0; /* debugging */
283 unsigned int vm_stat_discard
= 0; /* debugging */
284 unsigned int vm_stat_discard_sent
= 0; /* debugging */
285 unsigned int vm_stat_discard_failure
= 0; /* debugging */
286 unsigned int vm_stat_discard_throttle
= 0; /* debugging */
288 unsigned int vm_pageout_scan_active_throttled
= 0;
289 unsigned int vm_pageout_scan_inactive_throttled
= 0;
290 unsigned int vm_pageout_scan_throttle
= 0; /* debugging */
291 unsigned int vm_pageout_scan_burst_throttle
= 0; /* debugging */
292 unsigned int vm_pageout_scan_empty_throttle
= 0; /* debugging */
293 unsigned int vm_pageout_scan_deadlock_detected
= 0; /* debugging */
294 unsigned int vm_pageout_scan_active_throttle_success
= 0; /* debugging */
295 unsigned int vm_pageout_scan_inactive_throttle_success
= 0; /* debugging */
297 * Backing store throttle when BS is exhausted
299 unsigned int vm_backing_store_low
= 0;
301 unsigned int vm_pageout_out_of_line
= 0;
302 unsigned int vm_pageout_in_place
= 0;
306 * counters and statistics...
308 unsigned long vm_page_decrypt_counter
= 0;
309 unsigned long vm_page_decrypt_for_upl_counter
= 0;
310 unsigned long vm_page_encrypt_counter
= 0;
311 unsigned long vm_page_encrypt_abort_counter
= 0;
312 unsigned long vm_page_encrypt_already_encrypted_counter
= 0;
313 boolean_t vm_pages_encrypted
= FALSE
; /* are there encrypted pages ? */
316 struct vm_pageout_queue vm_pageout_queue_internal
;
317 struct vm_pageout_queue vm_pageout_queue_external
;
321 * Routine: vm_backing_store_disable
323 * Suspend non-privileged threads wishing to extend
324 * backing store when we are low on backing store
325 * (Synchronized by caller)
328 vm_backing_store_disable(
332 vm_backing_store_low
= 1;
334 if(vm_backing_store_low
) {
335 vm_backing_store_low
= 0;
336 thread_wakeup((event_t
) &vm_backing_store_low
);
343 * Routine: vm_pageout_object_allocate
345 * Allocate an object for use as out-of-line memory in a
346 * data_return/data_initialize message.
347 * The page must be in an unlocked object.
349 * If the page belongs to a trusted pager, cleaning in place
350 * will be used, which utilizes a special "pageout object"
351 * containing private alias pages for the real page frames.
352 * Untrusted pagers use normal out-of-line memory.
355 vm_pageout_object_allocate(
358 vm_object_offset_t offset
)
360 vm_object_t object
= m
->object
;
361 vm_object_t new_object
;
363 assert(object
->pager_ready
);
365 new_object
= vm_object_allocate(size
);
367 if (object
->pager_trusted
) {
368 assert (offset
< object
->size
);
370 vm_object_lock(new_object
);
371 new_object
->pageout
= TRUE
;
372 new_object
->shadow
= object
;
373 new_object
->can_persist
= FALSE
;
374 new_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
375 new_object
->shadow_offset
= offset
;
376 vm_object_unlock(new_object
);
379 * Take a paging reference on the object. This will be dropped
380 * in vm_pageout_object_terminate()
382 vm_object_lock(object
);
383 vm_object_paging_begin(object
);
384 vm_page_lock_queues();
385 vm_page_unlock_queues();
386 vm_object_unlock(object
);
388 vm_pageout_in_place
++;
390 vm_pageout_out_of_line
++;
394 #if MACH_CLUSTER_STATS
395 unsigned long vm_pageout_cluster_dirtied
= 0;
396 unsigned long vm_pageout_cluster_cleaned
= 0;
397 unsigned long vm_pageout_cluster_collisions
= 0;
398 unsigned long vm_pageout_cluster_clusters
= 0;
399 unsigned long vm_pageout_cluster_conversions
= 0;
400 unsigned long vm_pageout_target_collisions
= 0;
401 unsigned long vm_pageout_target_page_dirtied
= 0;
402 unsigned long vm_pageout_target_page_freed
= 0;
403 #define CLUSTER_STAT(clause) clause
404 #else /* MACH_CLUSTER_STATS */
405 #define CLUSTER_STAT(clause)
406 #endif /* MACH_CLUSTER_STATS */
409 * Routine: vm_pageout_object_terminate
411 * Destroy the pageout_object allocated by
412 * vm_pageout_object_allocate(), and perform all of the
413 * required cleanup actions.
416 * The object must be locked, and will be returned locked.
419 vm_pageout_object_terminate(
422 vm_object_t shadow_object
;
423 boolean_t shadow_internal
;
426 * Deal with the deallocation (last reference) of a pageout object
427 * (used for cleaning-in-place) by dropping the paging references/
428 * freeing pages in the original object.
431 assert(object
->pageout
);
432 shadow_object
= object
->shadow
;
433 vm_object_lock(shadow_object
);
434 shadow_internal
= shadow_object
->internal
;
436 while (!queue_empty(&object
->memq
)) {
438 vm_object_offset_t offset
;
440 p
= (vm_page_t
) queue_first(&object
->memq
);
445 assert(!p
->cleaning
);
451 m
= vm_page_lookup(shadow_object
,
452 offset
+ object
->shadow_offset
);
454 if(m
== VM_PAGE_NULL
)
457 /* used as a trigger on upl_commit etc to recognize the */
458 /* pageout daemon's subseqent desire to pageout a cleaning */
459 /* page. When the bit is on the upl commit code will */
460 /* respect the pageout bit in the target page over the */
461 /* caller's page list indication */
462 m
->dump_cleaning
= FALSE
;
465 * Account for the paging reference taken when
466 * m->cleaning was set on this page.
468 vm_object_paging_end(shadow_object
);
469 assert((m
->dirty
) || (m
->precious
) ||
470 (m
->busy
&& m
->cleaning
));
473 * Handle the trusted pager throttle.
474 * Also decrement the burst throttle (if external).
476 vm_page_lock_queues();
478 vm_pageout_throttle_up(m
);
482 * Handle the "target" page(s). These pages are to be freed if
483 * successfully cleaned. Target pages are always busy, and are
484 * wired exactly once. The initial target pages are not mapped,
485 * (so cannot be referenced or modified) but converted target
486 * pages may have been modified between the selection as an
487 * adjacent page and conversion to a target.
491 assert(m
->wire_count
== 1);
494 #if MACH_CLUSTER_STATS
495 if (m
->wanted
) vm_pageout_target_collisions
++;
498 * Revoke all access to the page. Since the object is
499 * locked, and the page is busy, this prevents the page
500 * from being dirtied after the pmap_disconnect() call
503 * Since the page is left "dirty" but "not modifed", we
504 * can detect whether the page was redirtied during
505 * pageout by checking the modify state.
507 if (pmap_disconnect(m
->phys_page
) & VM_MEM_MODIFIED
)
513 CLUSTER_STAT(vm_pageout_target_page_dirtied
++;)
514 vm_page_unwire(m
);/* reactivates */
515 VM_STAT(reactivations
++);
518 CLUSTER_STAT(vm_pageout_target_page_freed
++;)
519 vm_page_free(m
);/* clears busy, etc. */
521 vm_page_unlock_queues();
525 * Handle the "adjacent" pages. These pages were cleaned in
526 * place, and should be left alone.
527 * If prep_pin_count is nonzero, then someone is using the
528 * page, so make it active.
530 if (!m
->active
&& !m
->inactive
&& !m
->private) {
534 vm_page_deactivate(m
);
536 if((m
->busy
) && (m
->cleaning
)) {
538 /* the request_page_list case, (COPY_OUT_FROM FALSE) */
541 /* We do not re-set m->dirty ! */
542 /* The page was busy so no extraneous activity */
543 /* could have occurred. COPY_INTO is a read into the */
544 /* new pages. CLEAN_IN_PLACE does actually write */
545 /* out the pages but handling outside of this code */
546 /* will take care of resetting dirty. We clear the */
547 /* modify however for the Programmed I/O case. */
548 pmap_clear_modify(m
->phys_page
);
551 if(shadow_object
->absent_count
== 1)
552 vm_object_absent_release(shadow_object
);
554 shadow_object
->absent_count
--;
556 m
->overwriting
= FALSE
;
557 } else if (m
->overwriting
) {
558 /* alternate request page list, write to page_list */
559 /* case. Occurs when the original page was wired */
560 /* at the time of the list request */
561 assert(m
->wire_count
!= 0);
562 vm_page_unwire(m
);/* reactivates */
563 m
->overwriting
= FALSE
;
566 * Set the dirty state according to whether or not the page was
567 * modified during the pageout. Note that we purposefully do
568 * NOT call pmap_clear_modify since the page is still mapped.
569 * If the page were to be dirtied between the 2 calls, this
570 * this fact would be lost. This code is only necessary to
571 * maintain statistics, since the pmap module is always
572 * consulted if m->dirty is false.
574 #if MACH_CLUSTER_STATS
575 m
->dirty
= pmap_is_modified(m
->phys_page
);
577 if (m
->dirty
) vm_pageout_cluster_dirtied
++;
578 else vm_pageout_cluster_cleaned
++;
579 if (m
->wanted
) vm_pageout_cluster_collisions
++;
587 * Wakeup any thread waiting for the page to be un-cleaning.
590 vm_page_unlock_queues();
593 * Account for the paging reference taken in vm_paging_object_allocate.
595 vm_object_paging_end(shadow_object
);
596 vm_object_unlock(shadow_object
);
598 assert(object
->ref_count
== 0);
599 assert(object
->paging_in_progress
== 0);
600 assert(object
->resident_page_count
== 0);
605 * Routine: vm_pageout_setup
607 * Set up a page for pageout (clean & flush).
609 * Move the page to a new object, as part of which it will be
610 * sent to its memory manager in a memory_object_data_write or
611 * memory_object_initialize message.
613 * The "new_object" and "new_offset" arguments
614 * indicate where the page should be moved.
617 * The page in question must not be on any pageout queues,
618 * and must be busy. The object to which it belongs
619 * must be unlocked, and the caller must hold a paging
620 * reference to it. The new_object must not be locked.
622 * This routine returns a pointer to a place-holder page,
623 * inserted at the same offset, to block out-of-order
624 * requests for the page. The place-holder page must
625 * be freed after the data_write or initialize message
628 * The original page is put on a paging queue and marked
633 register vm_page_t m
,
634 register vm_object_t new_object
,
635 vm_object_offset_t new_offset
)
637 register vm_object_t old_object
= m
->object
;
638 vm_object_offset_t paging_offset
;
639 vm_object_offset_t offset
;
640 register vm_page_t holding_page
;
641 register vm_page_t new_m
;
642 boolean_t need_to_wire
= FALSE
;
646 "vm_pageout_setup, obj 0x%X off 0x%X page 0x%X new obj 0x%X offset 0x%X\n",
647 (integer_t
)m
->object
, (integer_t
)m
->offset
,
648 (integer_t
)m
, (integer_t
)new_object
,
649 (integer_t
)new_offset
);
650 assert(m
&& m
->busy
&& !m
->absent
&& !m
->fictitious
&& !m
->error
&&
653 assert(m
->dirty
|| m
->precious
);
656 * Create a place-holder page where the old one was, to prevent
657 * attempted pageins of this page while we're unlocked.
659 VM_PAGE_GRAB_FICTITIOUS(holding_page
);
661 vm_object_lock(old_object
);
664 paging_offset
= offset
+ old_object
->paging_offset
;
666 if (old_object
->pager_trusted
) {
668 * This pager is trusted, so we can clean this page
669 * in place. Leave it in the old object, and mark it
670 * cleaning & pageout.
672 new_m
= holding_page
;
673 holding_page
= VM_PAGE_NULL
;
676 * Set up new page to be private shadow of real page.
678 new_m
->phys_page
= m
->phys_page
;
679 new_m
->fictitious
= FALSE
;
680 new_m
->pageout
= TRUE
;
683 * Mark real page as cleaning (indicating that we hold a
684 * paging reference to be released via m_o_d_r_c) and
685 * pageout (indicating that the page should be freed
686 * when the pageout completes).
688 pmap_clear_modify(m
->phys_page
);
689 vm_page_lock_queues();
690 new_m
->private = TRUE
;
696 assert(m
->wire_count
== 1);
697 vm_page_unlock_queues();
701 m
->page_lock
= VM_PROT_NONE
;
703 m
->unlock_request
= VM_PROT_NONE
;
706 * Cannot clean in place, so rip the old page out of the
707 * object, and stick the holding page in. Set new_m to the
708 * page in the new object.
710 vm_page_lock_queues();
711 VM_PAGE_QUEUES_REMOVE(m
);
714 vm_page_insert(holding_page
, old_object
, offset
);
715 vm_page_unlock_queues();
720 new_m
->page_lock
= VM_PROT_NONE
;
721 new_m
->unlock_request
= VM_PROT_NONE
;
723 if (old_object
->internal
)
727 * Record that this page has been written out
730 vm_external_state_set(old_object
->existence_map
, offset
);
731 #endif /* MACH_PAGEMAP */
733 vm_object_unlock(old_object
);
735 vm_object_lock(new_object
);
738 * Put the page into the new object. If it is a not wired
739 * (if it's the real page) it will be activated.
742 vm_page_lock_queues();
743 vm_page_insert(new_m
, new_object
, new_offset
);
747 vm_page_activate(new_m
);
748 PAGE_WAKEUP_DONE(new_m
);
749 vm_page_unlock_queues();
751 vm_object_unlock(new_object
);
754 * Return the placeholder page to simplify cleanup.
756 return (holding_page
);
760 * Routine: vm_pageclean_setup
762 * Purpose: setup a page to be cleaned (made non-dirty), but not
763 * necessarily flushed from the VM page cache.
764 * This is accomplished by cleaning in place.
766 * The page must not be busy, and the object and page
767 * queues must be locked.
774 vm_object_t new_object
,
775 vm_object_offset_t new_offset
)
777 vm_object_t old_object
= m
->object
;
779 assert(!m
->cleaning
);
782 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
783 (integer_t
)old_object
, m
->offset
, (integer_t
)m
,
784 (integer_t
)new_m
, new_offset
);
786 pmap_clear_modify(m
->phys_page
);
787 vm_object_paging_begin(old_object
);
790 * Record that this page has been written out
793 vm_external_state_set(old_object
->existence_map
, m
->offset
);
794 #endif /*MACH_PAGEMAP*/
797 * Mark original page as cleaning in place.
804 * Convert the fictitious page to a private shadow of
807 assert(new_m
->fictitious
);
808 new_m
->fictitious
= FALSE
;
809 new_m
->private = TRUE
;
810 new_m
->pageout
= TRUE
;
811 new_m
->phys_page
= m
->phys_page
;
814 vm_page_insert(new_m
, new_object
, new_offset
);
815 assert(!new_m
->wanted
);
823 vm_object_t new_object
,
824 vm_object_offset_t new_offset
)
827 "vm_pageclean_copy, page 0x%X new_m 0x%X new_obj 0x%X offset 0x%X\n",
828 m
, new_m
, new_object
, new_offset
, 0);
830 assert((!m
->busy
) && (!m
->cleaning
));
832 assert(!new_m
->private && !new_m
->fictitious
);
834 pmap_clear_modify(m
->phys_page
);
837 vm_object_paging_begin(m
->object
);
838 vm_page_unlock_queues();
839 vm_object_unlock(m
->object
);
842 * Copy the original page to the new page.
844 vm_page_copy(m
, new_m
);
847 * Mark the old page as clean. A request to pmap_is_modified
848 * will get the right answer.
850 vm_object_lock(m
->object
);
853 vm_object_paging_end(m
->object
);
855 vm_page_lock_queues();
856 if (!m
->active
&& !m
->inactive
)
860 vm_page_insert(new_m
, new_object
, new_offset
);
861 vm_page_activate(new_m
);
862 new_m
->busy
= FALSE
; /* No other thread can be waiting */
867 * Routine: vm_pageout_initialize_page
869 * Causes the specified page to be initialized in
870 * the appropriate memory object. This routine is used to push
871 * pages into a copy-object when they are modified in the
874 * The page is moved to a temporary object and paged out.
877 * The page in question must not be on any pageout queues.
878 * The object to which it belongs must be locked.
879 * The page must be busy, but not hold a paging reference.
882 * Move this page to a completely new object.
885 vm_pageout_initialize_page(
889 vm_object_offset_t paging_offset
;
890 vm_page_t holding_page
;
894 "vm_pageout_initialize_page, page 0x%X\n",
895 (integer_t
)m
, 0, 0, 0, 0);
899 * Verify that we really want to clean this page
906 * Create a paging reference to let us play with the object.
909 paging_offset
= m
->offset
+ object
->paging_offset
;
910 vm_object_paging_begin(object
);
911 if (m
->absent
|| m
->error
|| m
->restart
||
912 (!m
->dirty
&& !m
->precious
)) {
914 panic("reservation without pageout?"); /* alan */
915 vm_object_unlock(object
);
919 /* set the page for future call to vm_fault_list_request */
921 vm_page_lock_queues();
922 pmap_clear_modify(m
->phys_page
);
925 m
->list_req_pending
= TRUE
;
929 vm_page_unlock_queues();
930 vm_object_unlock(object
);
933 * Write the data to its pager.
934 * Note that the data is passed by naming the new object,
935 * not a virtual address; the pager interface has been
936 * manipulated to use the "internal memory" data type.
937 * [The object reference from its allocation is donated
938 * to the eventual recipient.]
940 memory_object_data_initialize(object
->pager
,
944 vm_object_lock(object
);
947 #if MACH_CLUSTER_STATS
948 #define MAXCLUSTERPAGES 16
950 unsigned long pages_in_cluster
;
951 unsigned long pages_at_higher_offsets
;
952 unsigned long pages_at_lower_offsets
;
953 } cluster_stats
[MAXCLUSTERPAGES
];
954 #endif /* MACH_CLUSTER_STATS */
956 boolean_t allow_clustered_pageouts
= FALSE
;
959 * vm_pageout_cluster:
961 * Given a page, queue it to the appropriate I/O thread,
962 * which will page it out and attempt to clean adjacent pages
963 * in the same operation.
965 * The page must be busy, and the object and queues locked. We will take a
966 * paging reference to prevent deallocation or collapse when we
967 * release the object lock back at the call site. The I/O thread
968 * is responsible for consuming this reference
970 * The page must not be on any pageout queue.
974 vm_pageout_cluster(vm_page_t m
)
976 vm_object_t object
= m
->object
;
977 struct vm_pageout_queue
*q
;
981 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
982 (integer_t
)object
, m
->offset
, (integer_t
)m
, 0, 0);
985 * Only a certain kind of page is appreciated here.
987 assert(m
->busy
&& (m
->dirty
|| m
->precious
) && (m
->wire_count
== 0));
988 assert(!m
->cleaning
&& !m
->pageout
&& !m
->inactive
&& !m
->active
);
991 * protect the object from collapse -
992 * locking in the object's paging_offset.
994 vm_object_paging_begin(object
);
997 * set the page for future call to vm_fault_list_request
998 * page should already be marked busy
1001 m
->list_req_pending
= TRUE
;
1006 if (object
->internal
== TRUE
)
1007 q
= &vm_pageout_queue_internal
;
1009 q
= &vm_pageout_queue_external
;
1012 m
->pageout_queue
= TRUE
;
1013 queue_enter(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
1015 if (q
->pgo_idle
== TRUE
) {
1016 q
->pgo_idle
= FALSE
;
1017 thread_wakeup((event_t
) &q
->pgo_pending
);
1022 unsigned long vm_pageout_throttle_up_count
= 0;
1025 * A page is back from laundry. See if there are some pages waiting to
1026 * go to laundry and if we can let some of them go now.
1028 * Object and page queues must be locked.
1031 vm_pageout_throttle_up(
1034 struct vm_pageout_queue
*q
;
1036 vm_pageout_throttle_up_count
++;
1039 assert(m
->object
!= VM_OBJECT_NULL
);
1040 assert(m
->object
!= kernel_object
);
1042 if (m
->object
->internal
== TRUE
)
1043 q
= &vm_pageout_queue_internal
;
1045 q
= &vm_pageout_queue_external
;
1050 if (q
->pgo_throttled
== TRUE
) {
1051 q
->pgo_throttled
= FALSE
;
1052 thread_wakeup((event_t
) &q
->pgo_laundry
);
1058 * vm_pageout_scan does the dirty work for the pageout daemon.
1059 * It returns with vm_page_queue_free_lock held and
1060 * vm_page_free_wanted == 0.
1063 #define DELAYED_UNLOCK_LIMIT (3 * MAX_UPL_TRANSFER)
1066 #define FCS_DELAYED 1
1067 #define FCS_DEADLOCK_DETECTED 2
1069 struct flow_control
{
1075 vm_pageout_scan(void)
1077 unsigned int loop_count
= 0;
1078 unsigned int inactive_burst_count
= 0;
1079 unsigned int active_burst_count
= 0;
1080 vm_page_t local_freeq
= 0;
1081 int local_freed
= 0;
1082 int delayed_unlock
= 0;
1083 int need_internal_inactive
= 0;
1084 int refmod_state
= 0;
1085 int vm_pageout_deadlock_target
= 0;
1086 struct vm_pageout_queue
*iq
;
1087 struct vm_pageout_queue
*eq
;
1088 struct flow_control flow_control
;
1089 boolean_t active_throttled
= FALSE
;
1090 boolean_t inactive_throttled
= FALSE
;
1092 unsigned int msecs
= 0;
1096 flow_control
.state
= FCS_IDLE
;
1097 iq
= &vm_pageout_queue_internal
;
1098 eq
= &vm_pageout_queue_external
;
1100 XPR(XPR_VM_PAGEOUT
, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1103 * We want to gradually dribble pages from the active queue
1104 * to the inactive queue. If we let the inactive queue get
1105 * very small, and then suddenly dump many pages into it,
1106 * those pages won't get a sufficient chance to be referenced
1107 * before we start taking them from the inactive queue.
1109 * We must limit the rate at which we send pages to the pagers.
1110 * data_write messages consume memory, for message buffers and
1111 * for map-copy objects. If we get too far ahead of the pagers,
1112 * we can potentially run out of memory.
1114 * We can use the laundry count to limit directly the number
1115 * of pages outstanding to the default pager. A similar
1116 * strategy for external pagers doesn't work, because
1117 * external pagers don't have to deallocate the pages sent them,
1118 * and because we might have to send pages to external pagers
1119 * even if they aren't processing writes. So we also
1120 * use a burst count to limit writes to external pagers.
1122 * When memory is very tight, we can't rely on external pagers to
1123 * clean pages. They probably aren't running, because they
1124 * aren't vm-privileged. If we kept sending dirty pages to them,
1125 * we could exhaust the free list.
1127 vm_page_lock_queues();
1133 * Recalculate vm_page_inactivate_target.
1135 vm_page_inactive_target
= VM_PAGE_INACTIVE_TARGET(vm_page_active_count
+
1136 vm_page_inactive_count
);
1142 if (delayed_unlock
== 0)
1143 vm_page_lock_queues();
1145 active_burst_count
= vm_page_active_count
;
1147 if (active_burst_count
> vm_pageout_burst_active_throttle
)
1148 active_burst_count
= vm_pageout_burst_active_throttle
;
1151 * Move pages from active to inactive.
1153 while ((need_internal_inactive
||
1154 vm_page_inactive_count
< vm_page_inactive_target
) &&
1155 !queue_empty(&vm_page_queue_active
) &&
1156 ((active_burst_count
--) > 0)) {
1158 vm_pageout_active
++;
1160 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
1162 assert(m
->active
&& !m
->inactive
);
1163 assert(!m
->laundry
);
1164 assert(m
->object
!= kernel_object
);
1167 * Try to lock object; since we've already got the
1168 * page queues lock, we can only 'try' for this one.
1169 * if the 'try' fails, we need to do a mutex_pause
1170 * to allow the owner of the object lock a chance to
1171 * run... otherwise, we're likely to trip over this
1172 * object in the same state as we work our way through
1173 * the queue... clumps of pages associated with the same
1174 * object are fairly typical on the inactive and active queues
1176 if (m
->object
!= object
) {
1177 if (object
!= NULL
) {
1178 vm_object_unlock(object
);
1181 if (!vm_object_lock_try(m
->object
)) {
1183 * move page to end of active queue and continue
1185 queue_remove(&vm_page_queue_active
, m
,
1187 queue_enter(&vm_page_queue_active
, m
,
1190 goto done_with_activepage
;
1195 * if the page is BUSY, then we pull it
1196 * off the active queue and leave it alone.
1197 * when BUSY is cleared, it will get stuck
1198 * back on the appropriate queue
1201 queue_remove(&vm_page_queue_active
, m
,
1203 m
->pageq
.next
= NULL
;
1204 m
->pageq
.prev
= NULL
;
1207 vm_page_active_count
--;
1210 goto done_with_activepage
;
1212 if (need_internal_inactive
) {
1214 * If we're unable to make forward progress
1215 * with the current set of pages on the
1216 * inactive queue due to busy objects or
1217 * throttled pageout queues, then
1218 * move a page that is already clean
1219 * or belongs to a pageout queue that
1220 * isn't currently throttled
1222 active_throttled
= FALSE
;
1224 if (object
->internal
) {
1225 if ((VM_PAGE_Q_THROTTLED(iq
) || !IP_VALID(memory_manager_default
)))
1226 active_throttled
= TRUE
;
1227 } else if (VM_PAGE_Q_THROTTLED(eq
)) {
1228 active_throttled
= TRUE
;
1230 if (active_throttled
== TRUE
) {
1232 refmod_state
= pmap_get_refmod(m
->phys_page
);
1234 if (refmod_state
& VM_MEM_REFERENCED
)
1235 m
->reference
= TRUE
;
1236 if (refmod_state
& VM_MEM_MODIFIED
)
1239 if (m
->dirty
|| m
->precious
) {
1241 * page is dirty and targets a THROTTLED queue
1242 * so all we can do is move it back to the
1243 * end of the active queue to get it out
1246 queue_remove(&vm_page_queue_active
, m
,
1248 queue_enter(&vm_page_queue_active
, m
,
1251 vm_pageout_scan_active_throttled
++;
1253 goto done_with_activepage
;
1256 vm_pageout_scan_active_throttle_success
++;
1257 need_internal_inactive
--;
1260 * Deactivate the page while holding the object
1261 * locked, so we know the page is still not busy.
1262 * This should prevent races between pmap_enter
1263 * and pmap_clear_reference. The page might be
1264 * absent or fictitious, but vm_page_deactivate
1267 vm_page_deactivate(m
);
1268 done_with_activepage
:
1269 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
1271 if (object
!= NULL
) {
1272 vm_object_unlock(object
);
1276 vm_page_free_list(local_freeq
);
1282 vm_page_unlock_queues();
1285 vm_page_lock_queues();
1287 * continue the while loop processing
1288 * the active queue... need to hold
1289 * the page queues lock
1297 /**********************************************************************
1298 * above this point we're playing with the active queue
1299 * below this point we're playing with the throttling mechanisms
1300 * and the inactive queue
1301 **********************************************************************/
1306 * We are done if we have met our target *and*
1307 * nobody is still waiting for a page.
1309 if (vm_page_free_count
+ local_freed
>= vm_page_free_target
) {
1310 if (object
!= NULL
) {
1311 vm_object_unlock(object
);
1315 vm_page_free_list(local_freeq
);
1320 mutex_lock(&vm_page_queue_free_lock
);
1322 if ((vm_page_free_count
>= vm_page_free_target
) &&
1323 (vm_page_free_wanted
== 0)) {
1325 vm_page_unlock_queues();
1327 thread_wakeup((event_t
) &vm_pageout_garbage_collect
);
1330 mutex_unlock(&vm_page_queue_free_lock
);
1335 * Sometimes we have to pause:
1336 * 1) No inactive pages - nothing to do.
1337 * 2) Flow control - default pageout queue is full
1338 * 3) Loop control - no acceptable pages found on the inactive queue
1339 * within the last vm_pageout_burst_inactive_throttle iterations
1341 if ((queue_empty(&vm_page_queue_inactive
) && queue_empty(&vm_page_queue_zf
))) {
1342 vm_pageout_scan_empty_throttle
++;
1343 msecs
= vm_pageout_empty_wait
;
1344 goto vm_pageout_scan_delay
;
1346 } else if (inactive_burst_count
>= vm_pageout_burst_inactive_throttle
) {
1347 vm_pageout_scan_burst_throttle
++;
1348 msecs
= vm_pageout_burst_wait
;
1349 goto vm_pageout_scan_delay
;
1351 } else if (VM_PAGE_Q_THROTTLED(iq
)) {
1353 switch (flow_control
.state
) {
1356 reset_deadlock_timer
:
1357 ts
.tv_sec
= vm_pageout_deadlock_wait
/ 1000;
1358 ts
.tv_nsec
= (vm_pageout_deadlock_wait
% 1000) * 1000 * NSEC_PER_USEC
;
1359 clock_get_system_nanotime(
1360 &flow_control
.ts
.tv_sec
,
1361 (uint32_t *) &flow_control
.ts
.tv_nsec
);
1362 ADD_MACH_TIMESPEC(&flow_control
.ts
, &ts
);
1364 flow_control
.state
= FCS_DELAYED
;
1365 msecs
= vm_pageout_deadlock_wait
;
1370 clock_get_system_nanotime(
1372 (uint32_t *) &ts
.tv_nsec
);
1374 if (CMP_MACH_TIMESPEC(&ts
, &flow_control
.ts
) >= 0) {
1376 * the pageout thread for the default pager is potentially
1377 * deadlocked since the
1378 * default pager queue has been throttled for more than the
1379 * allowable time... we need to move some clean pages or dirty
1380 * pages belonging to the external pagers if they aren't throttled
1381 * vm_page_free_wanted represents the number of threads currently
1382 * blocked waiting for pages... we'll move one page for each of
1383 * these plus a fixed amount to break the logjam... once we're done
1384 * moving this number of pages, we'll re-enter the FSC_DELAYED state
1385 * with a new timeout target since we have no way of knowing
1386 * whether we've broken the deadlock except through observation
1387 * of the queue associated with the default pager... we need to
1388 * stop moving pagings and allow the system to run to see what
1389 * state it settles into.
1391 vm_pageout_deadlock_target
= vm_pageout_deadlock_relief
+ vm_page_free_wanted
;
1392 vm_pageout_scan_deadlock_detected
++;
1393 flow_control
.state
= FCS_DEADLOCK_DETECTED
;
1395 thread_wakeup((event_t
) &vm_pageout_garbage_collect
);
1396 goto consider_inactive
;
1399 * just resniff instead of trying
1400 * to compute a new delay time... we're going to be
1401 * awakened immediately upon a laundry completion,
1402 * so we won't wait any longer than necessary
1404 msecs
= vm_pageout_idle_wait
;
1407 case FCS_DEADLOCK_DETECTED
:
1408 if (vm_pageout_deadlock_target
)
1409 goto consider_inactive
;
1410 goto reset_deadlock_timer
;
1413 vm_pageout_scan_throttle
++;
1414 iq
->pgo_throttled
= TRUE
;
1415 vm_pageout_scan_delay
:
1416 if (object
!= NULL
) {
1417 vm_object_unlock(object
);
1421 vm_page_free_list(local_freeq
);
1426 assert_wait_timeout((event_t
) &iq
->pgo_laundry
, THREAD_INTERRUPTIBLE
, msecs
, 1000*NSEC_PER_USEC
);
1428 counter(c_vm_pageout_scan_block
++);
1430 vm_page_unlock_queues();
1432 thread_block(THREAD_CONTINUE_NULL
);
1434 vm_page_lock_queues();
1437 iq
->pgo_throttled
= FALSE
;
1439 if (loop_count
>= vm_page_inactive_count
) {
1440 if (VM_PAGE_Q_THROTTLED(eq
) || VM_PAGE_Q_THROTTLED(iq
)) {
1442 * Make sure we move enough "appropriate"
1443 * pages to the inactive queue before trying
1446 need_internal_inactive
= vm_pageout_inactive_relief
;
1450 inactive_burst_count
= 0;
1457 flow_control
.state
= FCS_IDLE
;
1460 inactive_burst_count
++;
1461 vm_pageout_inactive
++;
1463 if (!queue_empty(&vm_page_queue_inactive
)) {
1464 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
1466 if (m
->clustered
&& (m
->no_isync
== TRUE
)) {
1470 if (vm_zf_count
< vm_accellerate_zf_pageout_trigger
) {
1474 if((vm_zf_iterator
+=1) >= vm_zf_iterator_count
) {
1478 if (queue_empty(&vm_page_queue_zf
) ||
1479 (((last_page_zf
) || (vm_zf_iterator
== 0)) &&
1480 !queue_empty(&vm_page_queue_inactive
))) {
1481 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
1484 m
= (vm_page_t
) queue_first(&vm_page_queue_zf
);
1488 assert(!m
->active
&& m
->inactive
);
1489 assert(!m
->laundry
);
1490 assert(m
->object
!= kernel_object
);
1493 * Try to lock object; since we've alread got the
1494 * page queues lock, we can only 'try' for this one.
1495 * if the 'try' fails, we need to do a mutex_pause
1496 * to allow the owner of the object lock a chance to
1497 * run... otherwise, we're likely to trip over this
1498 * object in the same state as we work our way through
1499 * the queue... clumps of pages associated with the same
1500 * object are fairly typical on the inactive and active queues
1502 if (m
->object
!= object
) {
1503 if (object
!= NULL
) {
1504 vm_object_unlock(object
);
1507 if (!vm_object_lock_try(m
->object
)) {
1509 * Move page to end and continue.
1510 * Don't re-issue ticket
1513 queue_remove(&vm_page_queue_zf
, m
,
1515 queue_enter(&vm_page_queue_zf
, m
,
1518 queue_remove(&vm_page_queue_inactive
, m
,
1520 queue_enter(&vm_page_queue_inactive
, m
,
1523 vm_pageout_inactive_nolock
++;
1526 * force us to dump any collected free pages
1527 * and to pause before moving on
1529 delayed_unlock
= DELAYED_UNLOCK_LIMIT
+ 1;
1531 goto done_with_inactivepage
;
1536 * If the page belongs to a purgable object with no pending copies
1537 * against it, then we reap all of the pages in the object
1538 * and note that the object has been "emptied". It'll be up to the
1539 * application the discover this and recreate its contents if desired.
1541 if ((object
->purgable
== VM_OBJECT_PURGABLE_VOLATILE
||
1542 object
->purgable
== VM_OBJECT_PURGABLE_EMPTY
) &&
1543 object
->copy
== VM_OBJECT_NULL
) {
1545 (void) vm_object_purge(object
);
1546 vm_pageout_purged_objects
++;
1548 * we've just taken all of the pages from this object,
1549 * so drop the lock now since we're not going to find
1550 * any more pages belonging to it anytime soon
1552 vm_object_unlock(object
);
1555 inactive_burst_count
= 0;
1557 goto done_with_inactivepage
;
1561 * Paging out pages of external objects which
1562 * are currently being created must be avoided.
1563 * The pager may claim for memory, thus leading to a
1564 * possible dead lock between it and the pageout thread,
1565 * if such pages are finally chosen. The remaining assumption
1566 * is that there will finally be enough available pages in the
1567 * inactive pool to page out in order to satisfy all memory
1568 * claimed by the thread which concurrently creates the pager.
1570 if (!object
->pager_initialized
&& object
->pager_created
) {
1572 * Move page to end and continue, hoping that
1573 * there will be enough other inactive pages to
1574 * page out so that the thread which currently
1575 * initializes the pager will succeed.
1576 * Don't re-grant the ticket, the page should
1577 * pulled from the queue and paged out whenever
1578 * one of its logically adjacent fellows is
1582 queue_remove(&vm_page_queue_zf
, m
,
1584 queue_enter(&vm_page_queue_zf
, m
,
1587 vm_zf_iterator
= vm_zf_iterator_count
- 1;
1589 queue_remove(&vm_page_queue_inactive
, m
,
1591 queue_enter(&vm_page_queue_inactive
, m
,
1596 vm_pageout_inactive_avoid
++;
1598 goto done_with_inactivepage
;
1601 * Remove the page from the inactive list.
1604 queue_remove(&vm_page_queue_zf
, m
, vm_page_t
, pageq
);
1606 queue_remove(&vm_page_queue_inactive
, m
, vm_page_t
, pageq
);
1608 m
->pageq
.next
= NULL
;
1609 m
->pageq
.prev
= NULL
;
1610 m
->inactive
= FALSE
;
1612 vm_page_inactive_count
--;
1614 if (m
->busy
|| !object
->alive
) {
1616 * Somebody is already playing with this page.
1617 * Leave it off the pageout queues.
1619 vm_pageout_inactive_busy
++;
1621 goto done_with_inactivepage
;
1625 * If it's absent or in error, we can reclaim the page.
1628 if (m
->absent
|| m
->error
) {
1629 vm_pageout_inactive_absent
++;
1631 if (vm_pageout_deadlock_target
) {
1632 vm_pageout_scan_inactive_throttle_success
++;
1633 vm_pageout_deadlock_target
--;
1636 vm_page_remove(m
); /* clears tabled, object, offset */
1638 vm_object_absent_release(object
);
1640 assert(m
->pageq
.next
== NULL
&&
1641 m
->pageq
.prev
== NULL
);
1642 m
->pageq
.next
= (queue_entry_t
)local_freeq
;
1646 inactive_burst_count
= 0;
1648 goto done_with_inactivepage
;
1651 assert(!m
->private);
1652 assert(!m
->fictitious
);
1655 * If already cleaning this page in place, convert from
1656 * "adjacent" to "target". We can leave the page mapped,
1657 * and vm_pageout_object_terminate will determine whether
1658 * to free or reactivate.
1664 m
->dump_cleaning
= TRUE
;
1667 CLUSTER_STAT(vm_pageout_cluster_conversions
++);
1669 inactive_burst_count
= 0;
1671 goto done_with_inactivepage
;
1675 * If it's being used, reactivate.
1676 * (Fictitious pages are either busy or absent.)
1678 if ( (!m
->reference
) ) {
1679 refmod_state
= pmap_get_refmod(m
->phys_page
);
1681 if (refmod_state
& VM_MEM_REFERENCED
)
1682 m
->reference
= TRUE
;
1683 if (refmod_state
& VM_MEM_MODIFIED
)
1688 vm_page_activate(m
);
1689 VM_STAT(reactivations
++);
1691 vm_pageout_inactive_used
++;
1693 inactive_burst_count
= 0;
1695 goto done_with_inactivepage
;
1699 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
1700 (integer_t
)object
, (integer_t
)m
->offset
, (integer_t
)m
, 0,0);
1703 * we've got a candidate page to steal...
1705 * m->dirty is up to date courtesy of the
1706 * preceding check for m->reference... if
1707 * we get here, then m->reference had to be
1708 * FALSE which means we did a pmap_get_refmod
1709 * and updated both m->reference and m->dirty
1711 * if it's dirty or precious we need to
1712 * see if the target queue is throtttled
1713 * it if is, we need to skip over it by moving it back
1714 * to the end of the inactive queue
1716 inactive_throttled
= FALSE
;
1718 if (m
->dirty
|| m
->precious
) {
1719 if (object
->internal
) {
1720 if ((VM_PAGE_Q_THROTTLED(iq
) || !IP_VALID(memory_manager_default
)))
1721 inactive_throttled
= TRUE
;
1722 } else if (VM_PAGE_Q_THROTTLED(eq
)) {
1723 inactive_throttled
= TRUE
;
1726 if (inactive_throttled
== TRUE
) {
1728 queue_enter(&vm_page_queue_zf
, m
,
1731 queue_enter(&vm_page_queue_inactive
, m
,
1735 vm_page_inactive_count
++;
1738 vm_pageout_scan_inactive_throttled
++;
1740 goto done_with_inactivepage
;
1743 * we've got a page that we can steal...
1744 * eliminate all mappings and make sure
1745 * we have the up-to-date modified state
1746 * first take the page BUSY, so that no new
1747 * mappings can be made
1752 * if we need to do a pmap_disconnect then we
1753 * need to re-evaluate m->dirty since the pmap_disconnect
1754 * provides the true state atomically... the
1755 * page was still mapped up to the pmap_disconnect
1756 * and may have been dirtied at the last microsecond
1758 * we also check for the page being referenced 'late'
1759 * if it was, we first need to do a WAKEUP_DONE on it
1760 * since we already set m->busy = TRUE, before
1761 * going off to reactivate it
1763 * if we don't need the pmap_disconnect, then
1764 * m->dirty is up to date courtesy of the
1765 * earlier check for m->reference... if
1766 * we get here, then m->reference had to be
1767 * FALSE which means we did a pmap_get_refmod
1768 * and updated both m->reference and m->dirty...
1770 if (m
->no_isync
== FALSE
) {
1771 refmod_state
= pmap_disconnect(m
->phys_page
);
1773 if (refmod_state
& VM_MEM_MODIFIED
)
1775 if (refmod_state
& VM_MEM_REFERENCED
) {
1776 m
->reference
= TRUE
;
1778 PAGE_WAKEUP_DONE(m
);
1779 goto was_referenced
;
1783 * If it's clean and not precious, we can free the page.
1785 if (!m
->dirty
&& !m
->precious
) {
1786 vm_pageout_inactive_clean
++;
1789 vm_pageout_cluster(m
);
1791 vm_pageout_inactive_dirty
++;
1793 inactive_burst_count
= 0;
1795 done_with_inactivepage
:
1796 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
1798 if (object
!= NULL
) {
1799 vm_object_unlock(object
);
1803 vm_page_free_list(local_freeq
);
1809 vm_page_unlock_queues();
1813 * back to top of pageout scan loop
1819 int vm_page_free_count_init
;
1822 vm_page_free_reserve(
1825 int free_after_reserve
;
1827 vm_page_free_reserved
+= pages
;
1829 free_after_reserve
= vm_page_free_count_init
- vm_page_free_reserved
;
1831 vm_page_free_min
= vm_page_free_reserved
+
1832 VM_PAGE_FREE_MIN(free_after_reserve
);
1834 vm_page_free_target
= vm_page_free_reserved
+
1835 VM_PAGE_FREE_TARGET(free_after_reserve
);
1837 if (vm_page_free_target
< vm_page_free_min
+ 5)
1838 vm_page_free_target
= vm_page_free_min
+ 5;
1842 * vm_pageout is the high level pageout daemon.
1846 vm_pageout_continue(void)
1848 vm_pageout_scan_event_counter
++;
1850 /* we hold vm_page_queue_free_lock now */
1851 assert(vm_page_free_wanted
== 0);
1852 assert_wait((event_t
) &vm_page_free_wanted
, THREAD_UNINT
);
1853 mutex_unlock(&vm_page_queue_free_lock
);
1855 counter(c_vm_pageout_block
++);
1856 thread_block((thread_continue_t
)vm_pageout_continue
);
1862 * must be called with the
1863 * queues and object locks held
1866 vm_pageout_queue_steal(vm_page_t m
)
1868 struct vm_pageout_queue
*q
;
1870 if (m
->object
->internal
== TRUE
)
1871 q
= &vm_pageout_queue_internal
;
1873 q
= &vm_pageout_queue_external
;
1876 m
->pageout_queue
= FALSE
;
1877 queue_remove(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
1879 m
->pageq
.next
= NULL
;
1880 m
->pageq
.prev
= NULL
;
1882 vm_object_paging_end(m
->object
);
1888 #ifdef FAKE_DEADLOCK
1890 #define FAKE_COUNT 5000
1892 int internal_count
= 0;
1893 int fake_deadlock
= 0;
1898 vm_pageout_iothread_continue(struct vm_pageout_queue
*q
)
1902 boolean_t need_wakeup
;
1904 vm_page_lock_queues();
1906 while ( !queue_empty(&q
->pgo_pending
) ) {
1909 queue_remove_first(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
1910 m
->pageout_queue
= FALSE
;
1911 vm_page_unlock_queues();
1913 m
->pageq
.next
= NULL
;
1914 m
->pageq
.prev
= NULL
;
1915 #ifdef FAKE_DEADLOCK
1916 if (q
== &vm_pageout_queue_internal
) {
1922 if ((internal_count
== FAKE_COUNT
)) {
1924 pg_count
= vm_page_free_count
+ vm_page_free_reserved
;
1926 if (kmem_alloc(kernel_map
, &addr
, PAGE_SIZE
* pg_count
) == KERN_SUCCESS
) {
1927 kmem_free(kernel_map
, addr
, PAGE_SIZE
* pg_count
);
1936 if (!object
->pager_initialized
) {
1937 vm_object_lock(object
);
1940 * If there is no memory object for the page, create
1941 * one and hand it to the default pager.
1944 if (!object
->pager_initialized
)
1945 vm_object_collapse(object
,
1946 (vm_object_offset_t
) 0,
1948 if (!object
->pager_initialized
)
1949 vm_object_pager_create(object
);
1950 if (!object
->pager_initialized
) {
1952 * Still no pager for the object.
1953 * Reactivate the page.
1955 * Should only happen if there is no
1958 m
->list_req_pending
= FALSE
;
1959 m
->cleaning
= FALSE
;
1963 vm_pageout_throttle_up(m
);
1965 vm_page_lock_queues();
1966 vm_pageout_dirty_no_pager
++;
1967 vm_page_activate(m
);
1968 vm_page_unlock_queues();
1971 * And we are done with it.
1973 PAGE_WAKEUP_DONE(m
);
1975 vm_object_paging_end(object
);
1976 vm_object_unlock(object
);
1978 vm_page_lock_queues();
1980 } else if (object
->pager
== MEMORY_OBJECT_NULL
) {
1982 * This pager has been destroyed by either
1983 * memory_object_destroy or vm_object_destroy, and
1984 * so there is nowhere for the page to go.
1985 * Just free the page... VM_PAGE_FREE takes
1986 * care of cleaning up all the state...
1987 * including doing the vm_pageout_throttle_up
1991 vm_object_paging_end(object
);
1992 vm_object_unlock(object
);
1994 vm_page_lock_queues();
1997 vm_object_unlock(object
);
2000 * we expect the paging_in_progress reference to have
2001 * already been taken on the object before it was added
2002 * to the appropriate pageout I/O queue... this will
2003 * keep the object from being terminated and/or the
2004 * paging_offset from changing until the I/O has
2005 * completed... therefore no need to lock the object to
2006 * pull the paging_offset from it.
2008 * Send the data to the pager.
2009 * any pageout clustering happens there
2011 memory_object_data_return(object
->pager
,
2012 m
->offset
+ object
->paging_offset
,
2020 vm_object_lock(object
);
2021 vm_object_paging_end(object
);
2022 vm_object_unlock(object
);
2024 vm_page_lock_queues();
2026 assert_wait((event_t
) q
, THREAD_UNINT
);
2029 if (q
->pgo_throttled
== TRUE
&& !VM_PAGE_Q_THROTTLED(q
)) {
2030 q
->pgo_throttled
= FALSE
;
2033 need_wakeup
= FALSE
;
2035 q
->pgo_busy
= FALSE
;
2037 vm_page_unlock_queues();
2039 if (need_wakeup
== TRUE
)
2040 thread_wakeup((event_t
) &q
->pgo_laundry
);
2042 thread_block_parameter((thread_continue_t
)vm_pageout_iothread_continue
, (void *) &q
->pgo_pending
);
2048 vm_pageout_iothread_external(void)
2051 vm_pageout_iothread_continue(&vm_pageout_queue_external
);
2057 vm_pageout_iothread_internal(void)
2059 thread_t self
= current_thread();
2061 self
->options
|= TH_OPT_VMPRIV
;
2063 vm_pageout_iothread_continue(&vm_pageout_queue_internal
);
2068 vm_pageout_garbage_collect(int collect
)
2074 * consider_zone_gc should be last, because the other operations
2075 * might return memory to zones.
2077 consider_machine_collect();
2080 consider_machine_adjust();
2083 assert_wait((event_t
) &vm_pageout_garbage_collect
, THREAD_UNINT
);
2085 thread_block_parameter((thread_continue_t
) vm_pageout_garbage_collect
, (void *)1);
2094 thread_t self
= current_thread();
2096 kern_return_t result
;
2100 * Set thread privileges.
2104 self
->priority
= BASEPRI_PREEMPT
- 1;
2105 set_sched_pri(self
, self
->priority
);
2106 thread_unlock(self
);
2110 * Initialize some paging parameters.
2113 if (vm_pageout_idle_wait
== 0)
2114 vm_pageout_idle_wait
= VM_PAGEOUT_IDLE_WAIT
;
2116 if (vm_pageout_burst_wait
== 0)
2117 vm_pageout_burst_wait
= VM_PAGEOUT_BURST_WAIT
;
2119 if (vm_pageout_empty_wait
== 0)
2120 vm_pageout_empty_wait
= VM_PAGEOUT_EMPTY_WAIT
;
2122 if (vm_pageout_deadlock_wait
== 0)
2123 vm_pageout_deadlock_wait
= VM_PAGEOUT_DEADLOCK_WAIT
;
2125 if (vm_pageout_deadlock_relief
== 0)
2126 vm_pageout_deadlock_relief
= VM_PAGEOUT_DEADLOCK_RELIEF
;
2128 if (vm_pageout_inactive_relief
== 0)
2129 vm_pageout_inactive_relief
= VM_PAGEOUT_INACTIVE_RELIEF
;
2131 if (vm_pageout_burst_active_throttle
== 0)
2132 vm_pageout_burst_active_throttle
= VM_PAGEOUT_BURST_ACTIVE_THROTTLE
;
2134 if (vm_pageout_burst_inactive_throttle
== 0)
2135 vm_pageout_burst_inactive_throttle
= VM_PAGEOUT_BURST_INACTIVE_THROTTLE
;
2138 * Set kernel task to low backing store privileged
2141 task_lock(kernel_task
);
2142 kernel_task
->priv_flags
|= VM_BACKING_STORE_PRIV
;
2143 task_unlock(kernel_task
);
2145 vm_page_free_count_init
= vm_page_free_count
;
2148 * even if we've already called vm_page_free_reserve
2149 * call it again here to insure that the targets are
2150 * accurately calculated (it uses vm_page_free_count_init)
2151 * calling it with an arg of 0 will not change the reserve
2152 * but will re-calculate free_min and free_target
2154 if (vm_page_free_reserved
< VM_PAGE_FREE_RESERVED(processor_count
)) {
2155 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count
)) - vm_page_free_reserved
);
2157 vm_page_free_reserve(0);
2160 queue_init(&vm_pageout_queue_external
.pgo_pending
);
2161 vm_pageout_queue_external
.pgo_maxlaundry
= VM_PAGE_LAUNDRY_MAX
;
2162 vm_pageout_queue_external
.pgo_laundry
= 0;
2163 vm_pageout_queue_external
.pgo_idle
= FALSE
;
2164 vm_pageout_queue_external
.pgo_busy
= FALSE
;
2165 vm_pageout_queue_external
.pgo_throttled
= FALSE
;
2167 queue_init(&vm_pageout_queue_internal
.pgo_pending
);
2168 vm_pageout_queue_internal
.pgo_maxlaundry
= VM_PAGE_LAUNDRY_MAX
;
2169 vm_pageout_queue_internal
.pgo_laundry
= 0;
2170 vm_pageout_queue_internal
.pgo_idle
= FALSE
;
2171 vm_pageout_queue_internal
.pgo_busy
= FALSE
;
2172 vm_pageout_queue_internal
.pgo_throttled
= FALSE
;
2175 result
= kernel_thread_start_priority((thread_continue_t
)vm_pageout_iothread_internal
, NULL
, BASEPRI_PREEMPT
- 1, &thread
);
2176 if (result
!= KERN_SUCCESS
)
2177 panic("vm_pageout_iothread_internal: create failed");
2179 thread_deallocate(thread
);
2182 result
= kernel_thread_start_priority((thread_continue_t
)vm_pageout_iothread_external
, NULL
, BASEPRI_PREEMPT
- 1, &thread
);
2183 if (result
!= KERN_SUCCESS
)
2184 panic("vm_pageout_iothread_external: create failed");
2186 thread_deallocate(thread
);
2189 result
= kernel_thread_start_priority((thread_continue_t
)vm_pageout_garbage_collect
, NULL
, BASEPRI_PREEMPT
- 2, &thread
);
2190 if (result
!= KERN_SUCCESS
)
2191 panic("vm_pageout_garbage_collect: create failed");
2193 thread_deallocate(thread
);
2196 vm_pageout_continue();
2207 int page_field_size
; /* bit field in word size buf */
2209 page_field_size
= 0;
2210 if (flags
& UPL_CREATE_LITE
) {
2211 page_field_size
= ((size
/PAGE_SIZE
) + 7) >> 3;
2212 page_field_size
= (page_field_size
+ 3) & 0xFFFFFFFC;
2214 if(flags
& UPL_CREATE_INTERNAL
) {
2215 upl
= (upl_t
)kalloc(sizeof(struct upl
)
2216 + (sizeof(struct upl_page_info
)*(size
/PAGE_SIZE
))
2219 upl
= (upl_t
)kalloc(sizeof(struct upl
) + page_field_size
);
2222 upl
->src_object
= NULL
;
2223 upl
->kaddr
= (vm_offset_t
)0;
2225 upl
->map_object
= NULL
;
2227 upl
->highest_page
= 0;
2230 upl
->ubc_alias1
= 0;
2231 upl
->ubc_alias2
= 0;
2232 #endif /* UPL_DEBUG */
2240 int page_field_size
; /* bit field in word size buf */
2246 if (upl
->map_object
->pageout
) {
2247 object
= upl
->map_object
->shadow
;
2249 object
= upl
->map_object
;
2251 vm_object_lock(object
);
2252 queue_iterate(&object
->uplq
, upl_ele
, upl_t
, uplq
) {
2253 if(upl_ele
== upl
) {
2254 queue_remove(&object
->uplq
,
2255 upl_ele
, upl_t
, uplq
);
2259 vm_object_unlock(object
);
2261 #endif /* UPL_DEBUG */
2262 /* drop a reference on the map_object whether or */
2263 /* not a pageout object is inserted */
2264 if(upl
->map_object
->pageout
)
2265 vm_object_deallocate(upl
->map_object
);
2267 page_field_size
= 0;
2268 if (upl
->flags
& UPL_LITE
) {
2269 page_field_size
= ((upl
->size
/PAGE_SIZE
) + 7) >> 3;
2270 page_field_size
= (page_field_size
+ 3) & 0xFFFFFFFC;
2272 if(upl
->flags
& UPL_INTERNAL
) {
2274 sizeof(struct upl
) +
2275 (sizeof(struct upl_page_info
) * (upl
->size
/PAGE_SIZE
))
2278 kfree(upl
, sizeof(struct upl
) + page_field_size
);
2282 void uc_upl_dealloc(upl_t upl
);
2283 __private_extern__
void
2287 upl
->ref_count
-= 1;
2288 if(upl
->ref_count
== 0) {
2298 upl
->ref_count
-= 1;
2299 if(upl
->ref_count
== 0) {
2305 * Statistics about UPL enforcement of copy-on-write obligations.
2307 unsigned long upl_cow
= 0;
2308 unsigned long upl_cow_again
= 0;
2309 unsigned long upl_cow_contiguous
= 0;
2310 unsigned long upl_cow_pages
= 0;
2311 unsigned long upl_cow_again_pages
= 0;
2312 unsigned long upl_cow_contiguous_pages
= 0;
2315 * Routine: vm_object_upl_request
2317 * Cause the population of a portion of a vm_object.
2318 * Depending on the nature of the request, the pages
2319 * returned may be contain valid data or be uninitialized.
2320 * A page list structure, listing the physical pages
2321 * will be returned upon request.
2322 * This function is called by the file system or any other
2323 * supplier of backing store to a pager.
2324 * IMPORTANT NOTE: The caller must still respect the relationship
2325 * between the vm_object and its backing memory object. The
2326 * caller MUST NOT substitute changes in the backing file
2327 * without first doing a memory_object_lock_request on the
2328 * target range unless it is know that the pages are not
2329 * shared with another entity at the pager level.
2331 * if a page list structure is present
2332 * return the mapped physical pages, where a
2333 * page is not present, return a non-initialized
2334 * one. If the no_sync bit is turned on, don't
2335 * call the pager unlock to synchronize with other
2336 * possible copies of the page. Leave pages busy
2337 * in the original object, if a page list structure
2338 * was specified. When a commit of the page list
2339 * pages is done, the dirty bit will be set for each one.
2341 * If a page list structure is present, return
2342 * all mapped pages. Where a page does not exist
2343 * map a zero filled one. Leave pages busy in
2344 * the original object. If a page list structure
2345 * is not specified, this call is a no-op.
2347 * Note: access of default pager objects has a rather interesting
2348 * twist. The caller of this routine, presumably the file system
2349 * page cache handling code, will never actually make a request
2350 * against a default pager backed object. Only the default
2351 * pager will make requests on backing store related vm_objects
2352 * In this way the default pager can maintain the relationship
2353 * between backing store files (abstract memory objects) and
2354 * the vm_objects (cache objects), they support.
2358 __private_extern__ kern_return_t
2359 vm_object_upl_request(
2361 vm_object_offset_t offset
,
2364 upl_page_info_array_t user_page_list
,
2365 unsigned int *page_list_count
,
2368 vm_page_t dst_page
= VM_PAGE_NULL
;
2369 vm_object_offset_t dst_offset
= offset
;
2370 upl_size_t xfer_size
= size
;
2371 boolean_t do_m_lock
= FALSE
;
2376 #if MACH_CLUSTER_STATS
2377 boolean_t encountered_lrp
= FALSE
;
2379 vm_page_t alias_page
= NULL
;
2382 wpl_array_t lite_list
= NULL
;
2383 vm_object_t last_copy_object
;
2386 if (cntrl_flags
& ~UPL_VALID_FLAGS
) {
2388 * For forward compatibility's sake,
2389 * reject any unknown flag.
2391 return KERN_INVALID_VALUE
;
2394 page_ticket
= (cntrl_flags
& UPL_PAGE_TICKET_MASK
)
2395 >> UPL_PAGE_TICKET_SHIFT
;
2397 if(((size
/PAGE_SIZE
) > MAX_UPL_TRANSFER
) && !object
->phys_contiguous
) {
2398 size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
2401 if(cntrl_flags
& UPL_SET_INTERNAL
)
2402 if(page_list_count
!= NULL
)
2403 *page_list_count
= MAX_UPL_TRANSFER
;
2405 if((!object
->internal
) && (object
->paging_offset
!= 0))
2406 panic("vm_object_upl_request: external object with non-zero paging offset\n");
2408 if((cntrl_flags
& UPL_COPYOUT_FROM
) && (upl_ptr
== NULL
)) {
2409 return KERN_SUCCESS
;
2412 vm_object_lock(object
);
2413 vm_object_paging_begin(object
);
2414 vm_object_unlock(object
);
2417 if(cntrl_flags
& UPL_SET_INTERNAL
) {
2418 if(cntrl_flags
& UPL_SET_LITE
) {
2419 uintptr_t page_field_size
;
2421 UPL_CREATE_INTERNAL
| UPL_CREATE_LITE
,
2423 user_page_list
= (upl_page_info_t
*)
2424 (((uintptr_t)upl
) + sizeof(struct upl
));
2425 lite_list
= (wpl_array_t
)
2426 (((uintptr_t)user_page_list
) +
2428 sizeof(upl_page_info_t
)));
2429 page_field_size
= ((size
/PAGE_SIZE
) + 7) >> 3;
2431 (page_field_size
+ 3) & 0xFFFFFFFC;
2432 bzero((char *)lite_list
, page_field_size
);
2434 UPL_LITE
| UPL_INTERNAL
;
2436 upl
= upl_create(UPL_CREATE_INTERNAL
, size
);
2437 user_page_list
= (upl_page_info_t
*)
2438 (((uintptr_t)upl
) + sizeof(struct upl
));
2439 upl
->flags
= UPL_INTERNAL
;
2442 if(cntrl_flags
& UPL_SET_LITE
) {
2443 uintptr_t page_field_size
;
2444 upl
= upl_create(UPL_CREATE_LITE
, size
);
2445 lite_list
= (wpl_array_t
)
2446 (((uintptr_t)upl
) + sizeof(struct upl
));
2447 page_field_size
= ((size
/PAGE_SIZE
) + 7) >> 3;
2449 (page_field_size
+ 3) & 0xFFFFFFFC;
2450 bzero((char *)lite_list
, page_field_size
);
2451 upl
->flags
= UPL_LITE
;
2453 upl
= upl_create(UPL_CREATE_EXTERNAL
, size
);
2458 if (object
->phys_contiguous
) {
2459 if ((cntrl_flags
& UPL_WILL_MODIFY
) &&
2460 object
->copy
!= VM_OBJECT_NULL
) {
2461 /* Honor copy-on-write obligations */
2465 * We could still have a race...
2466 * A is here building the UPL for a write().
2467 * A pushes the pages to the current copy
2469 * A returns the UPL to the caller.
2470 * B comes along and establishes another
2471 * private mapping on this object, inserting
2472 * a new copy object between the original
2473 * object and the old copy object.
2474 * B reads a page and gets the original contents
2475 * from the original object.
2476 * A modifies the page in the original object.
2477 * B reads the page again and sees A's changes,
2480 * The problem is that the pages are not
2481 * marked "busy" in the original object, so
2482 * nothing prevents B from reading it before
2483 * before A's changes are completed.
2485 * The "paging_in_progress" might protect us
2486 * from the insertion of a new copy object
2487 * though... To be verified.
2489 vm_object_lock_request(object
,
2493 MEMORY_OBJECT_COPY_SYNC
,
2495 upl_cow_contiguous
++;
2496 upl_cow_contiguous_pages
+= size
>> PAGE_SHIFT
;
2499 upl
->map_object
= object
;
2500 /* don't need any shadow mappings for this one */
2501 /* since it is already I/O memory */
2502 upl
->flags
|= UPL_DEVICE_MEMORY
;
2505 /* paging_in_progress protects paging_offset */
2506 upl
->offset
= offset
+ object
->paging_offset
;
2509 if(user_page_list
) {
2510 user_page_list
[0].phys_addr
=
2511 (offset
+ object
->shadow_offset
)>>PAGE_SHIFT
;
2512 user_page_list
[0].device
= TRUE
;
2514 upl
->highest_page
= (offset
+ object
->shadow_offset
+ size
- 1)>>PAGE_SHIFT
;
2516 if(page_list_count
!= NULL
) {
2517 if (upl
->flags
& UPL_INTERNAL
) {
2518 *page_list_count
= 0;
2520 *page_list_count
= 1;
2524 return KERN_SUCCESS
;
2528 user_page_list
[0].device
= FALSE
;
2530 if(cntrl_flags
& UPL_SET_LITE
) {
2531 upl
->map_object
= object
;
2533 upl
->map_object
= vm_object_allocate(size
);
2535 * No neeed to lock the new object: nobody else knows
2536 * about it yet, so it's all ours so far.
2538 upl
->map_object
->shadow
= object
;
2539 upl
->map_object
->pageout
= TRUE
;
2540 upl
->map_object
->can_persist
= FALSE
;
2541 upl
->map_object
->copy_strategy
=
2542 MEMORY_OBJECT_COPY_NONE
;
2543 upl
->map_object
->shadow_offset
= offset
;
2544 upl
->map_object
->wimg_bits
= object
->wimg_bits
;
2548 if (!(cntrl_flags
& UPL_SET_LITE
)) {
2549 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
2554 * Just mark the UPL as "encrypted" here.
2555 * We'll actually encrypt the pages later,
2556 * in upl_encrypt(), when the caller has
2557 * selected which pages need to go to swap.
2559 if (cntrl_flags
& UPL_ENCRYPT
) {
2560 upl
->flags
|= UPL_ENCRYPTED
;
2562 if (cntrl_flags
& UPL_FOR_PAGEOUT
) {
2563 upl
->flags
|= UPL_PAGEOUT
;
2565 vm_object_lock(object
);
2567 /* we can lock in the paging_offset once paging_in_progress is set */
2570 upl
->offset
= offset
+ object
->paging_offset
;
2573 queue_enter(&object
->uplq
, upl
, upl_t
, uplq
);
2574 #endif /* UPL_DEBUG */
2577 if ((cntrl_flags
& UPL_WILL_MODIFY
) &&
2578 object
->copy
!= VM_OBJECT_NULL
) {
2579 /* Honor copy-on-write obligations */
2582 * The caller is gathering these pages and
2583 * might modify their contents. We need to
2584 * make sure that the copy object has its own
2585 * private copies of these pages before we let
2586 * the caller modify them.
2588 vm_object_update(object
,
2593 FALSE
, /* should_return */
2594 MEMORY_OBJECT_COPY_SYNC
,
2597 upl_cow_pages
+= size
>> PAGE_SHIFT
;
2600 /* remember which copy object we synchronized with */
2601 last_copy_object
= object
->copy
;
2604 if(cntrl_flags
& UPL_COPYOUT_FROM
) {
2605 upl
->flags
|= UPL_PAGE_SYNC_DONE
;
2608 if((alias_page
== NULL
) &&
2609 !(cntrl_flags
& UPL_SET_LITE
)) {
2610 vm_object_unlock(object
);
2611 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
2612 vm_object_lock(object
);
2614 if ( ((dst_page
= vm_page_lookup(object
, dst_offset
)) == VM_PAGE_NULL
) ||
2615 dst_page
->fictitious
||
2618 (dst_page
->wire_count
&& !dst_page
->pageout
) ||
2620 ((!dst_page
->inactive
) && (cntrl_flags
& UPL_FOR_PAGEOUT
) &&
2621 (dst_page
->page_ticket
!= page_ticket
) &&
2622 ((dst_page
->page_ticket
+1) != page_ticket
)) ) {
2625 user_page_list
[entry
].phys_addr
= 0;
2628 * grab this up front...
2629 * a high percentange of the time we're going to
2630 * need the hardware modification state a bit later
2631 * anyway... so we can eliminate an extra call into
2632 * the pmap layer by grabbing it here and recording it
2634 refmod_state
= pmap_get_refmod(dst_page
->phys_page
);
2636 if (cntrl_flags
& UPL_RET_ONLY_DIRTY
) {
2638 * we're only asking for DIRTY pages to be returned
2641 if (dst_page
->list_req_pending
|| !(cntrl_flags
& UPL_FOR_PAGEOUT
)) {
2643 * if we were the page stolen by vm_pageout_scan to be
2644 * cleaned (as opposed to a buddy being clustered in
2645 * or this request is not being driven by a PAGEOUT cluster
2646 * then we only need to check for the page being diry or
2647 * precious to decide whether to return it
2649 if (dst_page
->dirty
|| dst_page
->precious
||
2650 (refmod_state
& VM_MEM_MODIFIED
)) {
2655 * this is a request for a PAGEOUT cluster and this page
2656 * is merely along for the ride as a 'buddy'... not only
2657 * does it have to be dirty to be returned, but it also
2658 * can't have been referenced recently... note that we've
2659 * already filtered above based on whether this page is
2660 * currently on the inactive queue or it meets the page
2661 * ticket (generation count) check
2663 if ( !(refmod_state
& VM_MEM_REFERENCED
) &&
2664 ((refmod_state
& VM_MEM_MODIFIED
) ||
2665 dst_page
->dirty
|| dst_page
->precious
) ) {
2669 * if we reach here, we're not to return
2670 * the page... go on to the next one
2673 user_page_list
[entry
].phys_addr
= 0;
2675 dst_offset
+= PAGE_SIZE_64
;
2676 xfer_size
-= PAGE_SIZE
;
2680 if(dst_page
->busy
&&
2681 (!(dst_page
->list_req_pending
&&
2682 dst_page
->pageout
))) {
2683 if(cntrl_flags
& UPL_NOBLOCK
) {
2684 if(user_page_list
) {
2685 user_page_list
[entry
].phys_addr
= 0;
2688 dst_offset
+= PAGE_SIZE_64
;
2689 xfer_size
-= PAGE_SIZE
;
2693 * someone else is playing with the
2694 * page. We will have to wait.
2696 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
2699 /* Someone else already cleaning the page? */
2700 if((dst_page
->cleaning
|| dst_page
->absent
||
2701 dst_page
->wire_count
!= 0) &&
2702 !dst_page
->list_req_pending
) {
2703 if(user_page_list
) {
2704 user_page_list
[entry
].phys_addr
= 0;
2707 dst_offset
+= PAGE_SIZE_64
;
2708 xfer_size
-= PAGE_SIZE
;
2711 /* eliminate all mappings from the */
2712 /* original object and its prodigy */
2714 vm_page_lock_queues();
2716 if (dst_page
->pageout_queue
== TRUE
)
2718 * we've buddied up a page for a clustered pageout
2719 * that has already been moved to the pageout
2720 * queue by pageout_scan... we need to remove
2721 * it from the queue and drop the laundry count
2724 vm_pageout_queue_steal(dst_page
);
2725 #if MACH_CLUSTER_STATS
2726 /* pageout statistics gathering. count */
2727 /* all the pages we will page out that */
2728 /* were not counted in the initial */
2729 /* vm_pageout_scan work */
2730 if(dst_page
->list_req_pending
)
2731 encountered_lrp
= TRUE
;
2732 if((dst_page
->dirty
||
2733 (dst_page
->object
->internal
&&
2734 dst_page
->precious
)) &&
2735 (dst_page
->list_req_pending
2737 if(encountered_lrp
) {
2739 (pages_at_higher_offsets
++;)
2742 (pages_at_lower_offsets
++;)
2746 /* Turn off busy indication on pending */
2747 /* pageout. Note: we can only get here */
2748 /* in the request pending case. */
2749 dst_page
->list_req_pending
= FALSE
;
2750 dst_page
->busy
= FALSE
;
2751 dst_page
->cleaning
= FALSE
;
2753 hw_dirty
= refmod_state
& VM_MEM_MODIFIED
;
2754 dirty
= hw_dirty
? TRUE
: dst_page
->dirty
;
2756 if(cntrl_flags
& UPL_SET_LITE
) {
2758 pg_num
= (dst_offset
-offset
)/PAGE_SIZE
;
2759 lite_list
[pg_num
>>5] |=
2762 pmap_clear_modify(dst_page
->phys_page
);
2764 * Record that this page has been
2768 vm_external_state_set(
2769 object
->existence_map
,
2771 #endif /*MACH_PAGEMAP*/
2774 * Mark original page as cleaning
2777 dst_page
->cleaning
= TRUE
;
2778 dst_page
->dirty
= TRUE
;
2779 dst_page
->precious
= FALSE
;
2781 /* use pageclean setup, it is more */
2782 /* convenient even for the pageout */
2785 vm_object_lock(upl
->map_object
);
2786 vm_pageclean_setup(dst_page
,
2787 alias_page
, upl
->map_object
,
2789 vm_object_unlock(upl
->map_object
);
2791 alias_page
->absent
= FALSE
;
2796 dst_page
->dirty
= FALSE
;
2797 dst_page
->precious
= TRUE
;
2800 if(dst_page
->pageout
)
2801 dst_page
->busy
= TRUE
;
2803 if ( (cntrl_flags
& UPL_ENCRYPT
) ) {
2806 * We want to deny access to the target page
2807 * because its contents are about to be
2808 * encrypted and the user would be very
2809 * confused to see encrypted data instead
2812 dst_page
->busy
= TRUE
;
2814 if ( !(cntrl_flags
& UPL_CLEAN_IN_PLACE
) ) {
2816 * deny access to the target page
2817 * while it is being worked on
2819 if ((!dst_page
->pageout
) &&
2820 (dst_page
->wire_count
== 0)) {
2821 dst_page
->busy
= TRUE
;
2822 dst_page
->pageout
= TRUE
;
2823 vm_page_wire(dst_page
);
2827 if (dst_page
->phys_page
> upl
->highest_page
)
2828 upl
->highest_page
= dst_page
->phys_page
;
2830 if(user_page_list
) {
2831 user_page_list
[entry
].phys_addr
2832 = dst_page
->phys_page
;
2833 user_page_list
[entry
].dirty
=
2835 user_page_list
[entry
].pageout
=
2837 user_page_list
[entry
].absent
=
2839 user_page_list
[entry
].precious
=
2842 vm_page_unlock_queues();
2846 * The caller is gathering this page and might
2847 * access its contents later on. Decrypt the
2848 * page before adding it to the UPL, so that
2849 * the caller never sees encrypted data.
2851 if (! (cntrl_flags
& UPL_ENCRYPT
) &&
2852 dst_page
->encrypted
) {
2853 assert(dst_page
->busy
);
2855 vm_page_decrypt(dst_page
, 0);
2856 vm_page_decrypt_for_upl_counter
++;
2859 * Retry this page, since anything
2860 * could have changed while we were
2867 dst_offset
+= PAGE_SIZE_64
;
2868 xfer_size
-= PAGE_SIZE
;
2872 if((alias_page
== NULL
) &&
2873 !(cntrl_flags
& UPL_SET_LITE
)) {
2874 vm_object_unlock(object
);
2875 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
2876 vm_object_lock(object
);
2879 if ((cntrl_flags
& UPL_WILL_MODIFY
) &&
2880 object
->copy
!= last_copy_object
) {
2881 /* Honor copy-on-write obligations */
2884 * The copy object has changed since we
2885 * last synchronized for copy-on-write.
2886 * Another copy object might have been
2887 * inserted while we released the object's
2888 * lock. Since someone could have seen the
2889 * original contents of the remaining pages
2890 * through that new object, we have to
2891 * synchronize with it again for the remaining
2892 * pages only. The previous pages are "busy"
2893 * so they can not be seen through the new
2894 * mapping. The new mapping will see our
2895 * upcoming changes for those previous pages,
2896 * but that's OK since they couldn't see what
2897 * was there before. It's just a race anyway
2898 * and there's no guarantee of consistency or
2899 * atomicity. We just don't want new mappings
2900 * to see both the *before* and *after* pages.
2902 if (object
->copy
!= VM_OBJECT_NULL
) {
2905 dst_offset
,/* current offset */
2906 xfer_size
, /* remaining size */
2909 FALSE
, /* should_return */
2910 MEMORY_OBJECT_COPY_SYNC
,
2913 upl_cow_again_pages
+=
2914 xfer_size
>> PAGE_SHIFT
;
2916 /* remember the copy object we synced with */
2917 last_copy_object
= object
->copy
;
2920 dst_page
= vm_page_lookup(object
, dst_offset
);
2922 if(dst_page
!= VM_PAGE_NULL
) {
2923 if((cntrl_flags
& UPL_RET_ONLY_ABSENT
) &&
2924 !((dst_page
->list_req_pending
)
2925 && (dst_page
->absent
))) {
2926 /* we are doing extended range */
2927 /* requests. we want to grab */
2928 /* pages around some which are */
2929 /* already present. */
2930 if(user_page_list
) {
2931 user_page_list
[entry
].phys_addr
= 0;
2934 dst_offset
+= PAGE_SIZE_64
;
2935 xfer_size
-= PAGE_SIZE
;
2938 if((dst_page
->cleaning
) &&
2939 !(dst_page
->list_req_pending
)) {
2940 /*someone else is writing to the */
2941 /* page. We will have to wait. */
2942 PAGE_SLEEP(object
,dst_page
,THREAD_UNINT
);
2945 if ((dst_page
->fictitious
&&
2946 dst_page
->list_req_pending
)) {
2947 /* dump the fictitious page */
2948 dst_page
->list_req_pending
= FALSE
;
2949 dst_page
->clustered
= FALSE
;
2951 vm_page_lock_queues();
2952 vm_page_free(dst_page
);
2953 vm_page_unlock_queues();
2956 } else if ((dst_page
->absent
&&
2957 dst_page
->list_req_pending
)) {
2958 /* the default_pager case */
2959 dst_page
->list_req_pending
= FALSE
;
2960 dst_page
->busy
= FALSE
;
2963 if(dst_page
== VM_PAGE_NULL
) {
2964 if(object
->private) {
2966 * This is a nasty wrinkle for users
2967 * of upl who encounter device or
2968 * private memory however, it is
2969 * unavoidable, only a fault can
2970 * reslove the actual backing
2971 * physical page by asking the
2974 if(user_page_list
) {
2975 user_page_list
[entry
].phys_addr
= 0;
2978 dst_offset
+= PAGE_SIZE_64
;
2979 xfer_size
-= PAGE_SIZE
;
2982 /* need to allocate a page */
2983 dst_page
= vm_page_alloc(object
, dst_offset
);
2984 if (dst_page
== VM_PAGE_NULL
) {
2985 vm_object_unlock(object
);
2987 vm_object_lock(object
);
2990 dst_page
->busy
= FALSE
;
2992 if(cntrl_flags
& UPL_NO_SYNC
) {
2993 dst_page
->page_lock
= 0;
2994 dst_page
->unlock_request
= 0;
2997 if(cntrl_flags
& UPL_RET_ONLY_ABSENT
) {
2999 * if UPL_RET_ONLY_ABSENT was specified,
3000 * than we're definitely setting up a
3001 * upl for a clustered read/pagein
3002 * operation... mark the pages as clustered
3003 * so vm_fault can correctly attribute them
3004 * to the 'pagein' bucket the first time
3005 * a fault happens on them
3007 dst_page
->clustered
= TRUE
;
3009 dst_page
->absent
= TRUE
;
3010 object
->absent_count
++;
3013 if(cntrl_flags
& UPL_NO_SYNC
) {
3014 dst_page
->page_lock
= 0;
3015 dst_page
->unlock_request
= 0;
3022 if (cntrl_flags
& UPL_ENCRYPT
) {
3024 * The page is going to be encrypted when we
3025 * get it from the pager, so mark it so.
3027 dst_page
->encrypted
= TRUE
;
3030 * Otherwise, the page will not contain
3033 dst_page
->encrypted
= FALSE
;
3036 dst_page
->overwriting
= TRUE
;
3037 if(dst_page
->fictitious
) {
3038 panic("need corner case for fictitious page");
3040 if(dst_page
->page_lock
) {
3045 /* eliminate all mappings from the */
3046 /* original object and its prodigy */
3048 if(dst_page
->busy
) {
3049 /*someone else is playing with the */
3050 /* page. We will have to wait. */
3051 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
3054 vm_page_lock_queues();
3056 if( !(cntrl_flags
& UPL_FILE_IO
))
3057 hw_dirty
= pmap_disconnect(dst_page
->phys_page
) & VM_MEM_MODIFIED
;
3059 hw_dirty
= pmap_get_refmod(dst_page
->phys_page
) & VM_MEM_MODIFIED
;
3060 dirty
= hw_dirty
? TRUE
: dst_page
->dirty
;
3062 if(cntrl_flags
& UPL_SET_LITE
) {
3064 pg_num
= (dst_offset
-offset
)/PAGE_SIZE
;
3065 lite_list
[pg_num
>>5] |=
3068 pmap_clear_modify(dst_page
->phys_page
);
3070 * Record that this page has been
3074 vm_external_state_set(
3075 object
->existence_map
,
3077 #endif /*MACH_PAGEMAP*/
3080 * Mark original page as cleaning
3083 dst_page
->cleaning
= TRUE
;
3084 dst_page
->dirty
= TRUE
;
3085 dst_page
->precious
= FALSE
;
3087 /* use pageclean setup, it is more */
3088 /* convenient even for the pageout */
3090 vm_object_lock(upl
->map_object
);
3091 vm_pageclean_setup(dst_page
,
3092 alias_page
, upl
->map_object
,
3094 vm_object_unlock(upl
->map_object
);
3096 alias_page
->absent
= FALSE
;
3100 if(cntrl_flags
& UPL_CLEAN_IN_PLACE
) {
3101 /* clean in place for read implies */
3102 /* that a write will be done on all */
3103 /* the pages that are dirty before */
3104 /* a upl commit is done. The caller */
3105 /* is obligated to preserve the */
3106 /* contents of all pages marked */
3108 upl
->flags
|= UPL_CLEAR_DIRTY
;
3112 dst_page
->dirty
= FALSE
;
3113 dst_page
->precious
= TRUE
;
3116 if (dst_page
->wire_count
== 0) {
3117 /* deny access to the target page while */
3118 /* it is being worked on */
3119 dst_page
->busy
= TRUE
;
3121 vm_page_wire(dst_page
);
3123 if(cntrl_flags
& UPL_RET_ONLY_ABSENT
) {
3125 * expect the page not to be used
3126 * since it's coming in as part
3127 * of a cluster and could be
3128 * speculative... pages that
3129 * are 'consumed' will get a
3130 * hardware reference
3132 dst_page
->reference
= FALSE
;
3135 * expect the page to be used
3137 dst_page
->reference
= TRUE
;
3139 dst_page
->precious
=
3140 (cntrl_flags
& UPL_PRECIOUS
)
3143 if (dst_page
->phys_page
> upl
->highest_page
)
3144 upl
->highest_page
= dst_page
->phys_page
;
3146 if(user_page_list
) {
3147 user_page_list
[entry
].phys_addr
3148 = dst_page
->phys_page
;
3149 user_page_list
[entry
].dirty
=
3151 user_page_list
[entry
].pageout
=
3153 user_page_list
[entry
].absent
=
3155 user_page_list
[entry
].precious
=
3158 vm_page_unlock_queues();
3161 dst_offset
+= PAGE_SIZE_64
;
3162 xfer_size
-= PAGE_SIZE
;
3166 if (upl
->flags
& UPL_INTERNAL
) {
3167 if(page_list_count
!= NULL
)
3168 *page_list_count
= 0;
3169 } else if (*page_list_count
> entry
) {
3170 if(page_list_count
!= NULL
)
3171 *page_list_count
= entry
;
3174 if(alias_page
!= NULL
) {
3175 vm_page_lock_queues();
3176 vm_page_free(alias_page
);
3177 vm_page_unlock_queues();
3181 vm_prot_t access_required
;
3182 /* call back all associated pages from other users of the pager */
3183 /* all future updates will be on data which is based on the */
3184 /* changes we are going to make here. Note: it is assumed that */
3185 /* we already hold copies of the data so we will not be seeing */
3186 /* an avalanche of incoming data from the pager */
3187 access_required
= (cntrl_flags
& UPL_COPYOUT_FROM
)
3188 ? VM_PROT_READ
: VM_PROT_WRITE
;
3192 if(!object
->pager_ready
) {
3193 wait_result_t wait_result
;
3195 wait_result
= vm_object_sleep(object
,
3196 VM_OBJECT_EVENT_PAGER_READY
,
3198 if (wait_result
!= THREAD_AWAKENED
) {
3199 vm_object_unlock(object
);
3200 return KERN_FAILURE
;
3205 vm_object_unlock(object
);
3206 rc
= memory_object_data_unlock(
3208 dst_offset
+ object
->paging_offset
,
3211 if (rc
!= KERN_SUCCESS
&& rc
!= MACH_SEND_INTERRUPTED
)
3212 return KERN_FAILURE
;
3213 vm_object_lock(object
);
3215 if (rc
== KERN_SUCCESS
)
3219 /* lets wait on the last page requested */
3220 /* NOTE: we will have to update lock completed routine to signal */
3221 if(dst_page
!= VM_PAGE_NULL
&&
3222 (access_required
& dst_page
->page_lock
) != access_required
) {
3223 PAGE_ASSERT_WAIT(dst_page
, THREAD_UNINT
);
3224 vm_object_unlock(object
);
3225 thread_block(THREAD_CONTINUE_NULL
);
3226 return KERN_SUCCESS
;
3230 vm_object_unlock(object
);
3231 return KERN_SUCCESS
;
3234 /* JMM - Backward compatability for now */
3236 vm_fault_list_request( /* forward */
3237 memory_object_control_t control
,
3238 vm_object_offset_t offset
,
3241 upl_page_info_t
**user_page_list_ptr
,
3242 int page_list_count
,
3245 vm_fault_list_request(
3246 memory_object_control_t control
,
3247 vm_object_offset_t offset
,
3250 upl_page_info_t
**user_page_list_ptr
,
3251 int page_list_count
,
3254 unsigned int local_list_count
;
3255 upl_page_info_t
*user_page_list
;
3258 if (user_page_list_ptr
!= NULL
) {
3259 local_list_count
= page_list_count
;
3260 user_page_list
= *user_page_list_ptr
;
3262 local_list_count
= 0;
3263 user_page_list
= NULL
;
3265 kr
= memory_object_upl_request(control
,
3273 if(kr
!= KERN_SUCCESS
)
3276 if ((user_page_list_ptr
!= NULL
) && (cntrl_flags
& UPL_INTERNAL
)) {
3277 *user_page_list_ptr
= UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr
);
3280 return KERN_SUCCESS
;
3286 * Routine: vm_object_super_upl_request
3288 * Cause the population of a portion of a vm_object
3289 * in much the same way as memory_object_upl_request.
3290 * Depending on the nature of the request, the pages
3291 * returned may be contain valid data or be uninitialized.
3292 * However, the region may be expanded up to the super
3293 * cluster size provided.
3296 __private_extern__ kern_return_t
3297 vm_object_super_upl_request(
3299 vm_object_offset_t offset
,
3301 upl_size_t super_cluster
,
3303 upl_page_info_t
*user_page_list
,
3304 unsigned int *page_list_count
,
3307 vm_page_t target_page
;
3311 if(object
->paging_offset
> offset
)
3312 return KERN_FAILURE
;
3314 assert(object
->paging_in_progress
);
3315 offset
= offset
- object
->paging_offset
;
3317 if(cntrl_flags
& UPL_FOR_PAGEOUT
) {
3319 vm_object_lock(object
);
3321 if((target_page
= vm_page_lookup(object
, offset
))
3323 ticket
= target_page
->page_ticket
;
3324 cntrl_flags
= cntrl_flags
& ~(int)UPL_PAGE_TICKET_MASK
;
3325 cntrl_flags
= cntrl_flags
|
3326 ((ticket
<< UPL_PAGE_TICKET_SHIFT
)
3327 & UPL_PAGE_TICKET_MASK
);
3329 vm_object_unlock(object
);
3332 if (super_cluster
> size
) {
3334 vm_object_offset_t base_offset
;
3335 upl_size_t super_size
;
3337 base_offset
= (offset
&
3338 ~((vm_object_offset_t
) super_cluster
- 1));
3339 super_size
= (offset
+size
) > (base_offset
+ super_cluster
) ?
3340 super_cluster
<<1 : super_cluster
;
3341 super_size
= ((base_offset
+ super_size
) > object
->size
) ?
3342 (object
->size
- base_offset
) : super_size
;
3343 if(offset
> (base_offset
+ super_size
))
3344 panic("vm_object_super_upl_request: Missed target pageout"
3345 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
3346 offset
, base_offset
, super_size
, super_cluster
,
3347 size
, object
->paging_offset
);
3349 * apparently there is a case where the vm requests a
3350 * page to be written out who's offset is beyond the
3353 if((offset
+ size
) > (base_offset
+ super_size
))
3354 super_size
= (offset
+ size
) - base_offset
;
3356 offset
= base_offset
;
3359 return vm_object_upl_request(object
, offset
, size
,
3360 upl
, user_page_list
, page_list_count
,
3368 vm_map_address_t offset
,
3369 upl_size_t
*upl_size
,
3371 upl_page_info_array_t page_list
,
3372 unsigned int *count
,
3375 vm_map_entry_t entry
;
3377 int force_data_sync
;
3379 vm_object_t local_object
;
3380 vm_map_offset_t local_offset
;
3381 vm_map_offset_t local_start
;
3384 caller_flags
= *flags
;
3386 if (caller_flags
& ~UPL_VALID_FLAGS
) {
3388 * For forward compatibility's sake,
3389 * reject any unknown flag.
3391 return KERN_INVALID_VALUE
;
3394 force_data_sync
= (caller_flags
& UPL_FORCE_DATA_SYNC
);
3395 sync_cow_data
= !(caller_flags
& UPL_COPYOUT_FROM
);
3398 return KERN_INVALID_ARGUMENT
;
3403 if (vm_map_lookup_entry(map
, offset
, &entry
)) {
3404 if (entry
->object
.vm_object
== VM_OBJECT_NULL
||
3405 !entry
->object
.vm_object
->phys_contiguous
) {
3406 if((*upl_size
/page_size
) > MAX_UPL_TRANSFER
) {
3407 *upl_size
= MAX_UPL_TRANSFER
* page_size
;
3410 if((entry
->vme_end
- offset
) < *upl_size
) {
3411 *upl_size
= entry
->vme_end
- offset
;
3413 if (caller_flags
& UPL_QUERY_OBJECT_TYPE
) {
3414 if (entry
->object
.vm_object
== VM_OBJECT_NULL
) {
3416 } else if (entry
->object
.vm_object
->private) {
3417 *flags
= UPL_DEV_MEMORY
;
3418 if (entry
->object
.vm_object
->phys_contiguous
) {
3419 *flags
|= UPL_PHYS_CONTIG
;
3425 return KERN_SUCCESS
;
3428 * Create an object if necessary.
3430 if (entry
->object
.vm_object
== VM_OBJECT_NULL
) {
3431 entry
->object
.vm_object
= vm_object_allocate(
3432 (vm_size_t
)(entry
->vme_end
- entry
->vme_start
));
3435 if (!(caller_flags
& UPL_COPYOUT_FROM
)) {
3436 if (!(entry
->protection
& VM_PROT_WRITE
)) {
3438 return KERN_PROTECTION_FAILURE
;
3440 if (entry
->needs_copy
) {
3443 vm_map_offset_t offset_hi
;
3444 vm_map_offset_t offset_lo
;
3445 vm_object_offset_t new_offset
;
3448 vm_behavior_t behavior
;
3449 vm_map_version_t version
;
3453 vm_map_lock_write_to_read(map
);
3454 if(vm_map_lookup_locked(&local_map
,
3455 offset
, VM_PROT_WRITE
,
3457 &new_offset
, &prot
, &wired
,
3458 &behavior
, &offset_lo
,
3459 &offset_hi
, &real_map
)) {
3460 vm_map_unlock(local_map
);
3461 return KERN_FAILURE
;
3463 if (real_map
!= map
) {
3464 vm_map_unlock(real_map
);
3466 vm_object_unlock(object
);
3467 vm_map_unlock(local_map
);
3469 goto REDISCOVER_ENTRY
;
3472 if (entry
->is_sub_map
) {
3475 submap
= entry
->object
.sub_map
;
3476 local_start
= entry
->vme_start
;
3477 local_offset
= entry
->offset
;
3478 vm_map_reference(submap
);
3481 ret
= (vm_map_create_upl(submap
,
3482 local_offset
+ (offset
- local_start
),
3483 upl_size
, upl
, page_list
, count
,
3486 vm_map_deallocate(submap
);
3490 if (sync_cow_data
) {
3491 if (entry
->object
.vm_object
->shadow
3492 || entry
->object
.vm_object
->copy
) {
3494 local_object
= entry
->object
.vm_object
;
3495 local_start
= entry
->vme_start
;
3496 local_offset
= entry
->offset
;
3497 vm_object_reference(local_object
);
3500 if (entry
->object
.vm_object
->shadow
&&
3501 entry
->object
.vm_object
->copy
) {
3502 vm_object_lock_request(
3503 local_object
->shadow
,
3504 (vm_object_offset_t
)
3505 ((offset
- local_start
) +
3507 local_object
->shadow_offset
,
3509 MEMORY_OBJECT_DATA_SYNC
,
3512 sync_cow_data
= FALSE
;
3513 vm_object_deallocate(local_object
);
3514 goto REDISCOVER_ENTRY
;
3518 if (force_data_sync
) {
3520 local_object
= entry
->object
.vm_object
;
3521 local_start
= entry
->vme_start
;
3522 local_offset
= entry
->offset
;
3523 vm_object_reference(local_object
);
3526 vm_object_lock_request(
3528 (vm_object_offset_t
)
3529 ((offset
- local_start
) + local_offset
),
3530 (vm_object_size_t
)*upl_size
, FALSE
,
3531 MEMORY_OBJECT_DATA_SYNC
,
3533 force_data_sync
= FALSE
;
3534 vm_object_deallocate(local_object
);
3535 goto REDISCOVER_ENTRY
;
3538 if(!(entry
->object
.vm_object
->private)) {
3539 if(*upl_size
> (MAX_UPL_TRANSFER
*PAGE_SIZE
))
3540 *upl_size
= (MAX_UPL_TRANSFER
*PAGE_SIZE
);
3541 if(entry
->object
.vm_object
->phys_contiguous
) {
3542 *flags
= UPL_PHYS_CONTIG
;
3547 *flags
= UPL_DEV_MEMORY
| UPL_PHYS_CONTIG
;
3549 local_object
= entry
->object
.vm_object
;
3550 local_offset
= entry
->offset
;
3551 local_start
= entry
->vme_start
;
3552 vm_object_reference(local_object
);
3554 if(caller_flags
& UPL_SET_IO_WIRE
) {
3555 ret
= (vm_object_iopl_request(local_object
,
3556 (vm_object_offset_t
)
3557 ((offset
- local_start
)
3565 ret
= (vm_object_upl_request(local_object
,
3566 (vm_object_offset_t
)
3567 ((offset
- local_start
)
3575 vm_object_deallocate(local_object
);
3580 return(KERN_FAILURE
);
3585 * Internal routine to enter a UPL into a VM map.
3587 * JMM - This should just be doable through the standard
3588 * vm_map_enter() API.
3594 vm_map_offset_t
*dst_addr
)
3597 vm_object_offset_t offset
;
3598 vm_map_offset_t addr
;
3602 if (upl
== UPL_NULL
)
3603 return KERN_INVALID_ARGUMENT
;
3607 /* check to see if already mapped */
3608 if(UPL_PAGE_LIST_MAPPED
& upl
->flags
) {
3610 return KERN_FAILURE
;
3613 if((!(upl
->map_object
->pageout
)) &&
3614 !((upl
->flags
& (UPL_DEVICE_MEMORY
| UPL_IO_WIRE
)) ||
3615 (upl
->map_object
->phys_contiguous
))) {
3617 vm_page_t alias_page
;
3618 vm_object_offset_t new_offset
;
3620 wpl_array_t lite_list
;
3622 if(upl
->flags
& UPL_INTERNAL
) {
3623 lite_list
= (wpl_array_t
)
3624 ((((uintptr_t)upl
) + sizeof(struct upl
))
3625 + ((upl
->size
/PAGE_SIZE
)
3626 * sizeof(upl_page_info_t
)));
3628 lite_list
= (wpl_array_t
)
3629 (((uintptr_t)upl
) + sizeof(struct upl
));
3631 object
= upl
->map_object
;
3632 upl
->map_object
= vm_object_allocate(upl
->size
);
3633 vm_object_lock(upl
->map_object
);
3634 upl
->map_object
->shadow
= object
;
3635 upl
->map_object
->pageout
= TRUE
;
3636 upl
->map_object
->can_persist
= FALSE
;
3637 upl
->map_object
->copy_strategy
=
3638 MEMORY_OBJECT_COPY_NONE
;
3639 upl
->map_object
->shadow_offset
=
3640 upl
->offset
- object
->paging_offset
;
3641 upl
->map_object
->wimg_bits
= object
->wimg_bits
;
3642 offset
= upl
->map_object
->shadow_offset
;
3646 vm_object_lock(object
);
3649 pg_num
= (new_offset
)/PAGE_SIZE
;
3650 if(lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
3651 vm_object_unlock(object
);
3652 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
3653 vm_object_lock(object
);
3654 m
= vm_page_lookup(object
, offset
);
3655 if (m
== VM_PAGE_NULL
) {
3656 panic("vm_upl_map: page missing\n");
3659 vm_object_paging_begin(object
);
3662 * Convert the fictitious page to a private
3663 * shadow of the real page.
3665 assert(alias_page
->fictitious
);
3666 alias_page
->fictitious
= FALSE
;
3667 alias_page
->private = TRUE
;
3668 alias_page
->pageout
= TRUE
;
3669 alias_page
->phys_page
= m
->phys_page
;
3671 vm_page_lock_queues();
3672 vm_page_wire(alias_page
);
3673 vm_page_unlock_queues();
3677 * The virtual page ("m") has to be wired in some way
3678 * here or its physical page ("m->phys_page") could
3679 * be recycled at any time.
3680 * Assuming this is enforced by the caller, we can't
3681 * get an encrypted page here. Since the encryption
3682 * key depends on the VM page's "pager" object and
3683 * the "paging_offset", we couldn't handle 2 pageable
3684 * VM pages (with different pagers and paging_offsets)
3685 * sharing the same physical page: we could end up
3686 * encrypting with one key (via one VM page) and
3687 * decrypting with another key (via the alias VM page).
3689 ASSERT_PAGE_DECRYPTED(m
);
3691 vm_page_insert(alias_page
,
3692 upl
->map_object
, new_offset
);
3693 assert(!alias_page
->wanted
);
3694 alias_page
->busy
= FALSE
;
3695 alias_page
->absent
= FALSE
;
3699 offset
+= PAGE_SIZE_64
;
3700 new_offset
+= PAGE_SIZE_64
;
3702 vm_object_unlock(object
);
3703 vm_object_unlock(upl
->map_object
);
3705 if ((upl
->flags
& (UPL_DEVICE_MEMORY
| UPL_IO_WIRE
)) || upl
->map_object
->phys_contiguous
)
3706 offset
= upl
->offset
- upl
->map_object
->paging_offset
;
3712 vm_object_lock(upl
->map_object
);
3713 upl
->map_object
->ref_count
++;
3714 vm_object_res_reference(upl
->map_object
);
3715 vm_object_unlock(upl
->map_object
);
3720 /* NEED A UPL_MAP ALIAS */
3721 kr
= vm_map_enter(map
, dst_addr
, (vm_map_size_t
)size
, (vm_map_offset_t
) 0,
3722 VM_FLAGS_ANYWHERE
, upl
->map_object
, offset
, FALSE
,
3723 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
3725 if (kr
!= KERN_SUCCESS
) {
3730 vm_object_lock(upl
->map_object
);
3732 for(addr
=*dst_addr
; size
> 0; size
-=PAGE_SIZE
,addr
+=PAGE_SIZE
) {
3733 m
= vm_page_lookup(upl
->map_object
, offset
);
3735 unsigned int cache_attr
;
3736 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
3738 PMAP_ENTER(map
->pmap
, addr
,
3742 offset
+=PAGE_SIZE_64
;
3744 vm_object_unlock(upl
->map_object
);
3746 upl
->ref_count
++; /* hold a reference for the mapping */
3747 upl
->flags
|= UPL_PAGE_LIST_MAPPED
;
3748 upl
->kaddr
= *dst_addr
;
3750 return KERN_SUCCESS
;
3754 * Internal routine to remove a UPL mapping from a VM map.
3756 * XXX - This should just be doable through a standard
3757 * vm_map_remove() operation. Otherwise, implicit clean-up
3758 * of the target map won't be able to correctly remove
3759 * these (and release the reference on the UPL). Having
3760 * to do this means we can't map these into user-space
3771 if (upl
== UPL_NULL
)
3772 return KERN_INVALID_ARGUMENT
;
3775 if(upl
->flags
& UPL_PAGE_LIST_MAPPED
) {
3778 assert(upl
->ref_count
> 1);
3779 upl
->ref_count
--; /* removing mapping ref */
3780 upl
->flags
&= ~UPL_PAGE_LIST_MAPPED
;
3781 upl
->kaddr
= (vm_offset_t
) 0;
3785 vm_map_trunc_page(addr
),
3786 vm_map_round_page(addr
+ size
),
3788 return KERN_SUCCESS
;
3791 return KERN_FAILURE
;
3797 upl_offset_t offset
,
3800 upl_page_info_t
*page_list
,
3801 mach_msg_type_number_t count
,
3804 upl_size_t xfer_size
= size
;
3805 vm_object_t shadow_object
;
3806 vm_object_t object
= upl
->map_object
;
3807 vm_object_offset_t target_offset
;
3809 wpl_array_t lite_list
;
3811 int delayed_unlock
= 0;
3812 int clear_refmod
= 0;
3813 boolean_t shadow_internal
;
3817 if (upl
== UPL_NULL
)
3818 return KERN_INVALID_ARGUMENT
;
3824 if (object
->pageout
) {
3825 shadow_object
= object
->shadow
;
3827 shadow_object
= object
;
3832 if (upl
->flags
& UPL_ACCESS_BLOCKED
) {
3834 * We used this UPL to block access to the pages by marking
3835 * them "busy". Now we need to clear the "busy" bit to allow
3836 * access to these pages again.
3838 flags
|= UPL_COMMIT_ALLOW_ACCESS
;
3841 if (upl
->flags
& UPL_CLEAR_DIRTY
)
3842 flags
|= UPL_COMMIT_CLEAR_DIRTY
;
3844 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
3846 } else if ((offset
+ size
) > upl
->size
) {
3848 return KERN_FAILURE
;
3851 if (upl
->flags
& UPL_INTERNAL
) {
3852 lite_list
= (wpl_array_t
)
3853 ((((uintptr_t)upl
) + sizeof(struct upl
))
3854 + ((upl
->size
/PAGE_SIZE
) * sizeof(upl_page_info_t
)));
3856 lite_list
= (wpl_array_t
)
3857 (((uintptr_t)upl
) + sizeof(struct upl
));
3859 if (object
!= shadow_object
)
3860 vm_object_lock(object
);
3861 vm_object_lock(shadow_object
);
3863 shadow_internal
= shadow_object
->internal
;
3865 entry
= offset
/PAGE_SIZE
;
3866 target_offset
= (vm_object_offset_t
)offset
;
3874 if (upl
->flags
& UPL_LITE
) {
3877 pg_num
= target_offset
/PAGE_SIZE
;
3879 if (lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
3880 lite_list
[pg_num
>>5] &= ~(1 << (pg_num
& 31));
3881 m
= vm_page_lookup(shadow_object
,
3882 target_offset
+ (upl
->offset
-
3883 shadow_object
->paging_offset
));
3886 if (object
->pageout
) {
3887 if ((t
= vm_page_lookup(object
, target_offset
)) != NULL
) {
3890 if (delayed_unlock
) {
3892 vm_page_unlock_queues();
3900 object
->shadow_offset
);
3902 if (m
!= VM_PAGE_NULL
)
3903 vm_object_paging_end(m
->object
);
3906 if (m
!= VM_PAGE_NULL
) {
3910 if (upl
->flags
& UPL_IO_WIRE
) {
3912 if (delayed_unlock
== 0)
3913 vm_page_lock_queues();
3917 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
3919 vm_page_unlock_queues();
3922 page_list
[entry
].phys_addr
= 0;
3924 if (flags
& UPL_COMMIT_SET_DIRTY
) {
3926 } else if (flags
& UPL_COMMIT_CLEAR_DIRTY
) {
3928 clear_refmod
|= VM_MEM_MODIFIED
;
3930 if (flags
& UPL_COMMIT_INACTIVATE
) {
3931 m
->reference
= FALSE
;
3932 clear_refmod
|= VM_MEM_REFERENCED
;
3933 vm_page_deactivate(m
);
3936 pmap_clear_refmod(m
->phys_page
, clear_refmod
);
3938 if (flags
& UPL_COMMIT_ALLOW_ACCESS
) {
3940 * We blocked access to the pages in this UPL.
3941 * Clear the "busy" bit and wake up any waiter
3944 PAGE_WAKEUP_DONE(m
);
3947 target_offset
+= PAGE_SIZE_64
;
3948 xfer_size
-= PAGE_SIZE
;
3952 if (delayed_unlock
== 0)
3953 vm_page_lock_queues();
3955 * make sure to clear the hardware
3956 * modify or reference bits before
3957 * releasing the BUSY bit on this page
3958 * otherwise we risk losing a legitimate
3961 if (flags
& UPL_COMMIT_CLEAR_DIRTY
) {
3963 clear_refmod
|= VM_MEM_MODIFIED
;
3965 if (flags
& UPL_COMMIT_INACTIVATE
)
3966 clear_refmod
|= VM_MEM_REFERENCED
;
3969 pmap_clear_refmod(m
->phys_page
, clear_refmod
);
3972 p
= &(page_list
[entry
]);
3973 if(p
->phys_addr
&& p
->pageout
&& !m
->pageout
) {
3977 } else if (page_list
[entry
].phys_addr
&&
3978 !p
->pageout
&& m
->pageout
&&
3979 !m
->dump_cleaning
) {
3982 m
->overwriting
= FALSE
;
3984 PAGE_WAKEUP_DONE(m
);
3986 page_list
[entry
].phys_addr
= 0;
3988 m
->dump_cleaning
= FALSE
;
3990 vm_pageout_throttle_up(m
);
3993 m
->cleaning
= FALSE
;
3995 #if MACH_CLUSTER_STATS
3996 if (m
->wanted
) vm_pageout_target_collisions
++;
3998 if (pmap_disconnect(m
->phys_page
) & VM_MEM_MODIFIED
)
4004 vm_page_unwire(m
);/* reactivates */
4006 if (upl
->flags
& UPL_PAGEOUT
) {
4007 CLUSTER_STAT(vm_pageout_target_page_dirtied
++;)
4008 VM_STAT(reactivations
++);
4010 PAGE_WAKEUP_DONE(m
);
4012 vm_page_free(m
);/* clears busy, etc. */
4014 if (upl
->flags
& UPL_PAGEOUT
) {
4015 CLUSTER_STAT(vm_pageout_target_page_freed
++;)
4017 if (page_list
[entry
].dirty
)
4018 VM_STAT(pageouts
++);
4021 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
4023 vm_page_unlock_queues();
4025 target_offset
+= PAGE_SIZE_64
;
4026 xfer_size
-= PAGE_SIZE
;
4030 #if MACH_CLUSTER_STATS
4031 m
->dirty
= pmap_is_modified(m
->phys_page
);
4033 if (m
->dirty
) vm_pageout_cluster_dirtied
++;
4034 else vm_pageout_cluster_cleaned
++;
4035 if (m
->wanted
) vm_pageout_cluster_collisions
++;
4040 if((m
->busy
) && (m
->cleaning
)) {
4041 /* the request_page_list case */
4044 if(shadow_object
->absent_count
== 1)
4045 vm_object_absent_release(shadow_object
);
4047 shadow_object
->absent_count
--;
4049 m
->overwriting
= FALSE
;
4052 } else if (m
->overwriting
) {
4053 /* alternate request page list, write to
4054 * page_list case. Occurs when the original
4055 * page was wired at the time of the list
4057 assert(m
->wire_count
!= 0);
4058 vm_page_unwire(m
);/* reactivates */
4059 m
->overwriting
= FALSE
;
4061 m
->cleaning
= FALSE
;
4063 /* It is a part of the semantic of COPYOUT_FROM */
4064 /* UPLs that a commit implies cache sync */
4065 /* between the vm page and the backing store */
4066 /* this can be used to strip the precious bit */
4067 /* as well as clean */
4068 if (upl
->flags
& UPL_PAGE_SYNC_DONE
)
4069 m
->precious
= FALSE
;
4071 if (flags
& UPL_COMMIT_SET_DIRTY
)
4074 if (flags
& UPL_COMMIT_INACTIVATE
) {
4075 m
->reference
= FALSE
;
4076 vm_page_deactivate(m
);
4077 } else if (!m
->active
&& !m
->inactive
) {
4079 vm_page_activate(m
);
4081 vm_page_deactivate(m
);
4084 if (flags
& UPL_COMMIT_ALLOW_ACCESS
) {
4086 * We blocked access to the pages in this URL.
4087 * Clear the "busy" bit on this page before we
4088 * wake up any waiter.
4094 * Wakeup any thread waiting for the page to be un-cleaning.
4098 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
4100 vm_page_unlock_queues();
4103 target_offset
+= PAGE_SIZE_64
;
4104 xfer_size
-= PAGE_SIZE
;
4108 vm_page_unlock_queues();
4112 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
4114 } else if (upl
->flags
& UPL_LITE
) {
4117 pg_num
= upl
->size
/PAGE_SIZE
;
4118 pg_num
= (pg_num
+ 31) >> 5;
4120 for(i
= 0; i
<pg_num
; i
++) {
4121 if(lite_list
[i
] != 0) {
4127 if(queue_empty(&upl
->map_object
->memq
)) {
4133 if(upl
->flags
& UPL_COMMIT_NOTIFY_EMPTY
) {
4136 if(object
== shadow_object
)
4137 vm_object_paging_end(shadow_object
);
4139 vm_object_unlock(shadow_object
);
4140 if (object
!= shadow_object
)
4141 vm_object_unlock(object
);
4144 return KERN_SUCCESS
;
4150 upl_offset_t offset
,
4155 upl_size_t xfer_size
= size
;
4156 vm_object_t shadow_object
;
4157 vm_object_t object
= upl
->map_object
;
4158 vm_object_offset_t target_offset
;
4160 wpl_array_t lite_list
;
4162 boolean_t shadow_internal
;
4166 if (upl
== UPL_NULL
)
4167 return KERN_INVALID_ARGUMENT
;
4169 if (upl
->flags
& UPL_IO_WIRE
) {
4170 return upl_commit_range(upl
,
4175 if(object
->pageout
) {
4176 shadow_object
= object
->shadow
;
4178 shadow_object
= object
;
4182 if(upl
->flags
& UPL_DEVICE_MEMORY
) {
4184 } else if ((offset
+ size
) > upl
->size
) {
4186 return KERN_FAILURE
;
4188 if (object
!= shadow_object
)
4189 vm_object_lock(object
);
4190 vm_object_lock(shadow_object
);
4192 shadow_internal
= shadow_object
->internal
;
4194 if(upl
->flags
& UPL_INTERNAL
) {
4195 lite_list
= (wpl_array_t
)
4196 ((((uintptr_t)upl
) + sizeof(struct upl
))
4197 + ((upl
->size
/PAGE_SIZE
) * sizeof(upl_page_info_t
)));
4199 lite_list
= (wpl_array_t
)
4200 (((uintptr_t)upl
) + sizeof(struct upl
));
4203 entry
= offset
/PAGE_SIZE
;
4204 target_offset
= (vm_object_offset_t
)offset
;
4209 if(upl
->flags
& UPL_LITE
) {
4211 pg_num
= target_offset
/PAGE_SIZE
;
4212 if(lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
4213 lite_list
[pg_num
>>5] &= ~(1 << (pg_num
& 31));
4214 m
= vm_page_lookup(shadow_object
,
4215 target_offset
+ (upl
->offset
-
4216 shadow_object
->paging_offset
));
4219 if(object
->pageout
) {
4220 if ((t
= vm_page_lookup(object
, target_offset
))
4228 object
->shadow_offset
);
4230 if(m
!= VM_PAGE_NULL
)
4231 vm_object_paging_end(m
->object
);
4234 if(m
!= VM_PAGE_NULL
) {
4235 vm_page_lock_queues();
4237 boolean_t must_free
= TRUE
;
4239 /* COPYOUT = FALSE case */
4240 /* check for error conditions which must */
4241 /* be passed back to the pages customer */
4242 if(error
& UPL_ABORT_RESTART
) {
4245 vm_object_absent_release(m
->object
);
4246 m
->page_error
= KERN_MEMORY_ERROR
;
4249 } else if(error
& UPL_ABORT_UNAVAILABLE
) {
4253 } else if(error
& UPL_ABORT_ERROR
) {
4256 vm_object_absent_release(m
->object
);
4257 m
->page_error
= KERN_MEMORY_ERROR
;
4264 * If the page was already encrypted,
4265 * we don't really need to decrypt it
4266 * now. It will get decrypted later,
4267 * on demand, as soon as someone needs
4268 * to access its contents.
4271 m
->cleaning
= FALSE
;
4272 m
->overwriting
= FALSE
;
4273 PAGE_WAKEUP_DONE(m
);
4275 if (must_free
== TRUE
) {
4278 vm_page_activate(m
);
4280 vm_page_unlock_queues();
4282 target_offset
+= PAGE_SIZE_64
;
4283 xfer_size
-= PAGE_SIZE
;
4288 * Handle the trusted pager throttle.
4291 vm_pageout_throttle_up(m
);
4295 assert(m
->wire_count
== 1);
4299 m
->dump_cleaning
= FALSE
;
4300 m
->cleaning
= FALSE
;
4301 m
->overwriting
= FALSE
;
4303 vm_external_state_clr(
4304 m
->object
->existence_map
, m
->offset
);
4305 #endif /* MACH_PAGEMAP */
4306 if(error
& UPL_ABORT_DUMP_PAGES
) {
4308 pmap_disconnect(m
->phys_page
);
4310 PAGE_WAKEUP_DONE(m
);
4312 vm_page_unlock_queues();
4314 target_offset
+= PAGE_SIZE_64
;
4315 xfer_size
-= PAGE_SIZE
;
4319 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
4321 } else if (upl
->flags
& UPL_LITE
) {
4324 pg_num
= upl
->size
/PAGE_SIZE
;
4325 pg_num
= (pg_num
+ 31) >> 5;
4327 for(i
= 0; i
<pg_num
; i
++) {
4328 if(lite_list
[i
] != 0) {
4334 if(queue_empty(&upl
->map_object
->memq
)) {
4340 if(upl
->flags
& UPL_COMMIT_NOTIFY_EMPTY
) {
4343 if(object
== shadow_object
)
4344 vm_object_paging_end(shadow_object
);
4346 vm_object_unlock(shadow_object
);
4347 if (object
!= shadow_object
)
4348 vm_object_unlock(object
);
4352 return KERN_SUCCESS
;
4360 vm_object_t object
= NULL
;
4361 vm_object_t shadow_object
= NULL
;
4362 vm_object_offset_t offset
;
4363 vm_object_offset_t shadow_offset
;
4364 vm_object_offset_t target_offset
;
4366 wpl_array_t lite_list
;
4369 boolean_t shadow_internal
;
4371 if (upl
== UPL_NULL
)
4372 return KERN_INVALID_ARGUMENT
;
4374 if (upl
->flags
& UPL_IO_WIRE
) {
4376 return upl_commit_range(upl
,
4382 if(upl
->flags
& UPL_DEVICE_MEMORY
) {
4384 return KERN_SUCCESS
;
4387 object
= upl
->map_object
;
4389 if (object
== NULL
) {
4390 panic("upl_abort: upl object is not backed by an object");
4392 return KERN_INVALID_ARGUMENT
;
4395 if(object
->pageout
) {
4396 shadow_object
= object
->shadow
;
4397 shadow_offset
= object
->shadow_offset
;
4399 shadow_object
= object
;
4400 shadow_offset
= upl
->offset
- object
->paging_offset
;
4403 if(upl
->flags
& UPL_INTERNAL
) {
4404 lite_list
= (wpl_array_t
)
4405 ((((uintptr_t)upl
) + sizeof(struct upl
))
4406 + ((upl
->size
/PAGE_SIZE
) * sizeof(upl_page_info_t
)));
4408 lite_list
= (wpl_array_t
)
4409 (((uintptr_t)upl
) + sizeof(struct upl
));
4413 if (object
!= shadow_object
)
4414 vm_object_lock(object
);
4415 vm_object_lock(shadow_object
);
4417 shadow_internal
= shadow_object
->internal
;
4419 for(i
= 0; i
<(upl
->size
); i
+=PAGE_SIZE
, offset
+= PAGE_SIZE_64
) {
4421 target_offset
= offset
+ shadow_offset
;
4422 if(upl
->flags
& UPL_LITE
) {
4424 pg_num
= offset
/PAGE_SIZE
;
4425 if(lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
4426 lite_list
[pg_num
>>5] &= ~(1 << (pg_num
& 31));
4428 shadow_object
, target_offset
);
4431 if(object
->pageout
) {
4432 if ((t
= vm_page_lookup(object
, offset
)) != NULL
) {
4437 shadow_object
, target_offset
);
4439 if(m
!= VM_PAGE_NULL
)
4440 vm_object_paging_end(m
->object
);
4443 if(m
!= VM_PAGE_NULL
) {
4444 vm_page_lock_queues();
4446 boolean_t must_free
= TRUE
;
4448 /* COPYOUT = FALSE case */
4449 /* check for error conditions which must */
4450 /* be passed back to the pages customer */
4451 if(error
& UPL_ABORT_RESTART
) {
4454 vm_object_absent_release(m
->object
);
4455 m
->page_error
= KERN_MEMORY_ERROR
;
4458 } else if(error
& UPL_ABORT_UNAVAILABLE
) {
4462 } else if(error
& UPL_ABORT_ERROR
) {
4465 vm_object_absent_release(m
->object
);
4466 m
->page_error
= KERN_MEMORY_ERROR
;
4473 * If the page was already encrypted,
4474 * we don't really need to decrypt it
4475 * now. It will get decrypted later,
4476 * on demand, as soon as someone needs
4477 * to access its contents.
4480 m
->cleaning
= FALSE
;
4481 m
->overwriting
= FALSE
;
4482 PAGE_WAKEUP_DONE(m
);
4484 if (must_free
== TRUE
) {
4487 vm_page_activate(m
);
4489 vm_page_unlock_queues();
4493 * Handle the trusted pager throttle.
4496 vm_pageout_throttle_up(m
);
4500 assert(m
->wire_count
== 1);
4504 m
->dump_cleaning
= FALSE
;
4505 m
->cleaning
= FALSE
;
4506 m
->overwriting
= FALSE
;
4508 vm_external_state_clr(
4509 m
->object
->existence_map
, m
->offset
);
4510 #endif /* MACH_PAGEMAP */
4511 if(error
& UPL_ABORT_DUMP_PAGES
) {
4513 pmap_disconnect(m
->phys_page
);
4515 PAGE_WAKEUP_DONE(m
);
4517 vm_page_unlock_queues();
4521 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
4523 } else if (upl
->flags
& UPL_LITE
) {
4526 pg_num
= upl
->size
/PAGE_SIZE
;
4527 pg_num
= (pg_num
+ 31) >> 5;
4529 for(j
= 0; j
<pg_num
; j
++) {
4530 if(lite_list
[j
] != 0) {
4536 if(queue_empty(&upl
->map_object
->memq
)) {
4542 if(object
== shadow_object
)
4543 vm_object_paging_end(shadow_object
);
4545 vm_object_unlock(shadow_object
);
4546 if (object
!= shadow_object
)
4547 vm_object_unlock(object
);
4550 return KERN_SUCCESS
;
4553 /* an option on commit should be wire */
4557 upl_page_info_t
*page_list
,
4558 mach_msg_type_number_t count
)
4560 if (upl
== UPL_NULL
)
4561 return KERN_INVALID_ARGUMENT
;
4563 if(upl
->flags
& (UPL_LITE
| UPL_IO_WIRE
)) {
4565 return upl_commit_range(upl
, 0, upl
->size
, 0,
4566 page_list
, count
, &empty
);
4573 if (upl
->flags
& UPL_DEVICE_MEMORY
)
4576 if (upl
->flags
& UPL_ENCRYPTED
) {
4579 * This UPL was encrypted, but we don't need
4580 * to decrypt here. We'll decrypt each page
4581 * later, on demand, as soon as someone needs
4582 * to access the page's contents.
4586 if ((upl
->flags
& UPL_CLEAR_DIRTY
) ||
4587 (upl
->flags
& UPL_PAGE_SYNC_DONE
) || page_list
) {
4588 vm_object_t shadow_object
= upl
->map_object
->shadow
;
4589 vm_object_t object
= upl
->map_object
;
4590 vm_object_offset_t target_offset
;
4591 upl_size_t xfer_end
;
4597 if (object
!= shadow_object
)
4598 vm_object_lock(object
);
4599 vm_object_lock(shadow_object
);
4602 target_offset
= object
->shadow_offset
;
4603 xfer_end
= upl
->size
+ object
->shadow_offset
;
4605 while(target_offset
< xfer_end
) {
4607 if ((t
= vm_page_lookup(object
,
4608 target_offset
- object
->shadow_offset
))
4610 target_offset
+= PAGE_SIZE_64
;
4615 m
= vm_page_lookup(shadow_object
, target_offset
);
4616 if(m
!= VM_PAGE_NULL
) {
4619 * If this page was encrypted, we
4620 * don't need to decrypt it here.
4621 * We'll decrypt it later, on demand,
4622 * as soon as someone needs to access
4626 if (upl
->flags
& UPL_CLEAR_DIRTY
) {
4627 pmap_clear_modify(m
->phys_page
);
4630 /* It is a part of the semantic of */
4631 /* COPYOUT_FROM UPLs that a commit */
4632 /* implies cache sync between the */
4633 /* vm page and the backing store */
4634 /* this can be used to strip the */
4635 /* precious bit as well as clean */
4636 if (upl
->flags
& UPL_PAGE_SYNC_DONE
)
4637 m
->precious
= FALSE
;
4640 p
= &(page_list
[entry
]);
4641 if(page_list
[entry
].phys_addr
&&
4642 p
->pageout
&& !m
->pageout
) {
4643 vm_page_lock_queues();
4647 vm_page_unlock_queues();
4648 } else if (page_list
[entry
].phys_addr
&&
4649 !p
->pageout
&& m
->pageout
&&
4650 !m
->dump_cleaning
) {
4651 vm_page_lock_queues();
4654 m
->overwriting
= FALSE
;
4656 PAGE_WAKEUP_DONE(m
);
4657 vm_page_unlock_queues();
4659 page_list
[entry
].phys_addr
= 0;
4662 target_offset
+= PAGE_SIZE_64
;
4665 vm_object_unlock(shadow_object
);
4666 if (object
!= shadow_object
)
4667 vm_object_unlock(object
);
4670 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
4671 vm_object_lock(upl
->map_object
->shadow
);
4672 if(upl
->map_object
== upl
->map_object
->shadow
)
4673 vm_object_paging_end(upl
->map_object
->shadow
);
4674 vm_object_unlock(upl
->map_object
->shadow
);
4677 return KERN_SUCCESS
;
4683 vm_object_iopl_request(
4685 vm_object_offset_t offset
,
4688 upl_page_info_array_t user_page_list
,
4689 unsigned int *page_list_count
,
4693 vm_object_offset_t dst_offset
= offset
;
4694 upl_size_t xfer_size
= size
;
4697 wpl_array_t lite_list
= NULL
;
4698 int page_field_size
;
4699 int delayed_unlock
= 0;
4700 int no_zero_fill
= FALSE
;
4701 vm_page_t alias_page
= NULL
;
4706 if (cntrl_flags
& ~UPL_VALID_FLAGS
) {
4708 * For forward compatibility's sake,
4709 * reject any unknown flag.
4711 return KERN_INVALID_VALUE
;
4713 if (vm_lopage_poolsize
== 0)
4714 cntrl_flags
&= ~UPL_NEED_32BIT_ADDR
;
4716 if (cntrl_flags
& UPL_NEED_32BIT_ADDR
) {
4717 if ( (cntrl_flags
& (UPL_SET_IO_WIRE
| UPL_SET_LITE
)) != (UPL_SET_IO_WIRE
| UPL_SET_LITE
))
4718 return KERN_INVALID_VALUE
;
4720 if (object
->phys_contiguous
) {
4721 if ((offset
+ object
->shadow_offset
) >= (vm_object_offset_t
)max_valid_dma_address
)
4722 return KERN_INVALID_ADDRESS
;
4724 if (((offset
+ object
->shadow_offset
) + size
) >= (vm_object_offset_t
)max_valid_dma_address
)
4725 return KERN_INVALID_ADDRESS
;
4729 if (cntrl_flags
& UPL_ENCRYPT
) {
4732 * The paging path doesn't use this interface,
4733 * so we don't support the UPL_ENCRYPT flag
4734 * here. We won't encrypt the pages.
4736 assert(! (cntrl_flags
& UPL_ENCRYPT
));
4739 if (cntrl_flags
& UPL_NOZEROFILL
)
4740 no_zero_fill
= TRUE
;
4742 if (cntrl_flags
& UPL_COPYOUT_FROM
)
4743 prot
= VM_PROT_READ
;
4745 prot
= VM_PROT_READ
| VM_PROT_WRITE
;
4747 if(((size
/page_size
) > MAX_UPL_TRANSFER
) && !object
->phys_contiguous
) {
4748 size
= MAX_UPL_TRANSFER
* page_size
;
4751 if(cntrl_flags
& UPL_SET_INTERNAL
)
4752 if(page_list_count
!= NULL
)
4753 *page_list_count
= MAX_UPL_TRANSFER
;
4754 if(((cntrl_flags
& UPL_SET_INTERNAL
) && !(object
->phys_contiguous
)) &&
4755 ((page_list_count
!= NULL
) && (*page_list_count
!= 0)
4756 && *page_list_count
< (size
/page_size
)))
4757 return KERN_INVALID_ARGUMENT
;
4759 if((!object
->internal
) && (object
->paging_offset
!= 0))
4760 panic("vm_object_upl_request: external object with non-zero paging offset\n");
4762 if(object
->phys_contiguous
) {
4763 /* No paging operations are possible against this memory */
4764 /* and so no need for map object, ever */
4765 cntrl_flags
|= UPL_SET_LITE
;
4769 if(cntrl_flags
& UPL_SET_INTERNAL
) {
4770 if(cntrl_flags
& UPL_SET_LITE
) {
4772 UPL_CREATE_INTERNAL
| UPL_CREATE_LITE
,
4774 user_page_list
= (upl_page_info_t
*)
4775 (((uintptr_t)upl
) + sizeof(struct upl
));
4776 lite_list
= (wpl_array_t
)
4777 (((uintptr_t)user_page_list
) +
4779 sizeof(upl_page_info_t
)));
4780 page_field_size
= ((size
/PAGE_SIZE
) + 7) >> 3;
4782 (page_field_size
+ 3) & 0xFFFFFFFC;
4783 bzero((char *)lite_list
, page_field_size
);
4785 UPL_LITE
| UPL_INTERNAL
| UPL_IO_WIRE
;
4787 upl
= upl_create(UPL_CREATE_INTERNAL
, size
);
4788 user_page_list
= (upl_page_info_t
*)
4790 + sizeof(struct upl
));
4791 upl
->flags
= UPL_INTERNAL
| UPL_IO_WIRE
;
4794 if(cntrl_flags
& UPL_SET_LITE
) {
4795 upl
= upl_create(UPL_CREATE_LITE
, size
);
4796 lite_list
= (wpl_array_t
)
4797 (((uintptr_t)upl
) + sizeof(struct upl
));
4798 page_field_size
= ((size
/PAGE_SIZE
) + 7) >> 3;
4800 (page_field_size
+ 3) & 0xFFFFFFFC;
4801 bzero((char *)lite_list
, page_field_size
);
4802 upl
->flags
= UPL_LITE
| UPL_IO_WIRE
;
4804 upl
= upl_create(UPL_CREATE_EXTERNAL
, size
);
4805 upl
->flags
= UPL_IO_WIRE
;
4809 if(object
->phys_contiguous
) {
4810 upl
->map_object
= object
;
4811 /* don't need any shadow mappings for this one */
4812 /* since it is already I/O memory */
4813 upl
->flags
|= UPL_DEVICE_MEMORY
;
4815 vm_object_lock(object
);
4816 vm_object_paging_begin(object
);
4817 vm_object_unlock(object
);
4819 /* paging in progress also protects the paging_offset */
4820 upl
->offset
= offset
+ object
->paging_offset
;
4823 if(user_page_list
) {
4824 user_page_list
[0].phys_addr
=
4825 (offset
+ object
->shadow_offset
)>>PAGE_SHIFT
;
4826 user_page_list
[0].device
= TRUE
;
4828 upl
->highest_page
= (offset
+ object
->shadow_offset
+ size
- 1)>>PAGE_SHIFT
;
4830 if(page_list_count
!= NULL
) {
4831 if (upl
->flags
& UPL_INTERNAL
) {
4832 *page_list_count
= 0;
4834 *page_list_count
= 1;
4837 return KERN_SUCCESS
;
4840 user_page_list
[0].device
= FALSE
;
4842 if(cntrl_flags
& UPL_SET_LITE
) {
4843 upl
->map_object
= object
;
4845 upl
->map_object
= vm_object_allocate(size
);
4846 vm_object_lock(upl
->map_object
);
4847 upl
->map_object
->shadow
= object
;
4848 upl
->map_object
->pageout
= TRUE
;
4849 upl
->map_object
->can_persist
= FALSE
;
4850 upl
->map_object
->copy_strategy
=
4851 MEMORY_OBJECT_COPY_NONE
;
4852 upl
->map_object
->shadow_offset
= offset
;
4853 upl
->map_object
->wimg_bits
= object
->wimg_bits
;
4854 vm_object_unlock(upl
->map_object
);
4857 vm_object_lock(object
);
4858 vm_object_paging_begin(object
);
4860 if (!object
->phys_contiguous
) {
4861 /* Protect user space from future COW operations */
4862 object
->true_share
= TRUE
;
4863 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
)
4864 object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
4867 /* we can lock the upl offset now that paging_in_progress is set */
4870 upl
->offset
= offset
+ object
->paging_offset
;
4873 queue_enter(&object
->uplq
, upl
, upl_t
, uplq
);
4874 #endif /* UPL_DEBUG */
4877 if (cntrl_flags
& UPL_BLOCK_ACCESS
) {
4879 * The user requested that access to the pages in this URL
4880 * be blocked until the UPL is commited or aborted.
4882 upl
->flags
|= UPL_ACCESS_BLOCKED
;
4887 if((alias_page
== NULL
) && !(cntrl_flags
& UPL_SET_LITE
)) {
4888 if (delayed_unlock
) {
4890 vm_page_unlock_queues();
4892 vm_object_unlock(object
);
4893 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
4894 vm_object_lock(object
);
4896 dst_page
= vm_page_lookup(object
, dst_offset
);
4900 * If the page is encrypted, we need to decrypt it,
4901 * so force a soft page fault.
4903 if ((dst_page
== VM_PAGE_NULL
) || (dst_page
->busy
) ||
4904 (dst_page
->encrypted
) ||
4905 (dst_page
->unusual
&& (dst_page
->error
||
4906 dst_page
->restart
||
4908 dst_page
->fictitious
||
4909 (prot
& dst_page
->page_lock
)))) {
4910 vm_fault_return_t result
;
4913 kern_return_t error_code
;
4916 vm_object_offset_t lo_offset
= offset
;
4917 vm_object_offset_t hi_offset
= offset
+ size
;
4920 if (delayed_unlock
) {
4922 vm_page_unlock_queues();
4925 if(cntrl_flags
& UPL_SET_INTERRUPTIBLE
) {
4926 interruptible
= THREAD_ABORTSAFE
;
4928 interruptible
= THREAD_UNINT
;
4931 result
= vm_fault_page(object
, dst_offset
,
4932 prot
| VM_PROT_WRITE
, FALSE
,
4934 lo_offset
, hi_offset
,
4935 VM_BEHAVIOR_SEQUENTIAL
,
4936 &prot
, &dst_page
, &top_page
,
4938 &error_code
, no_zero_fill
, FALSE
, NULL
, 0);
4941 case VM_FAULT_SUCCESS
:
4943 PAGE_WAKEUP_DONE(dst_page
);
4946 * Release paging references and
4947 * top-level placeholder page, if any.
4950 if(top_page
!= VM_PAGE_NULL
) {
4951 vm_object_t local_object
;
4955 != dst_page
->object
) {
4958 VM_PAGE_FREE(top_page
);
4959 vm_object_paging_end(
4964 VM_PAGE_FREE(top_page
);
4965 vm_object_paging_end(
4973 case VM_FAULT_RETRY
:
4974 vm_object_lock(object
);
4975 vm_object_paging_begin(object
);
4978 case VM_FAULT_FICTITIOUS_SHORTAGE
:
4979 vm_page_more_fictitious();
4980 vm_object_lock(object
);
4981 vm_object_paging_begin(object
);
4984 case VM_FAULT_MEMORY_SHORTAGE
:
4985 if (vm_page_wait(interruptible
)) {
4986 vm_object_lock(object
);
4987 vm_object_paging_begin(object
);
4992 case VM_FAULT_INTERRUPTED
:
4993 error_code
= MACH_SEND_INTERRUPTED
;
4994 case VM_FAULT_MEMORY_ERROR
:
4995 ret
= (error_code
? error_code
:
4997 vm_object_lock(object
);
5001 } while ((result
!= VM_FAULT_SUCCESS
)
5002 || (result
== VM_FAULT_INTERRUPTED
));
5005 if ( (cntrl_flags
& UPL_NEED_32BIT_ADDR
) &&
5006 dst_page
->phys_page
>= (max_valid_dma_address
>> PAGE_SHIFT
) ) {
5011 * support devices that can't DMA above 32 bits
5012 * by substituting pages from a pool of low address
5013 * memory for any pages we find above the 4G mark
5014 * can't substitute if the page is already wired because
5015 * we don't know whether that physical address has been
5016 * handed out to some other 64 bit capable DMA device to use
5018 if (dst_page
->wire_count
) {
5019 ret
= KERN_PROTECTION_FAILURE
;
5022 if (delayed_unlock
) {
5024 vm_page_unlock_queues();
5026 low_page
= vm_page_grablo();
5028 if (low_page
== VM_PAGE_NULL
) {
5029 ret
= KERN_RESOURCE_SHORTAGE
;
5033 * from here until the vm_page_replace completes
5034 * we musn't drop the object lock... we don't
5035 * want anyone refaulting this page in and using
5036 * it after we disconnect it... we want the fault
5037 * to find the new page being substituted.
5039 refmod
= pmap_disconnect(dst_page
->phys_page
);
5041 vm_page_copy(dst_page
, low_page
);
5043 low_page
->reference
= dst_page
->reference
;
5044 low_page
->dirty
= dst_page
->dirty
;
5046 if (refmod
& VM_MEM_REFERENCED
)
5047 low_page
->reference
= TRUE
;
5048 if (refmod
& VM_MEM_MODIFIED
)
5049 low_page
->dirty
= TRUE
;
5051 vm_page_lock_queues();
5052 vm_page_replace(low_page
, object
, dst_offset
);
5054 * keep the queue lock since we're going to
5055 * need it immediately
5059 dst_page
= low_page
;
5061 * vm_page_grablo returned the page marked
5062 * BUSY... we don't need a PAGE_WAKEUP_DONE
5063 * here, because we've never dropped the object lock
5065 dst_page
->busy
= FALSE
;
5067 if (delayed_unlock
== 0)
5068 vm_page_lock_queues();
5069 vm_page_wire(dst_page
);
5071 if (cntrl_flags
& UPL_BLOCK_ACCESS
) {
5073 * Mark the page "busy" to block any future page fault
5074 * on this page. We'll also remove the mapping
5075 * of all these pages before leaving this routine.
5077 assert(!dst_page
->fictitious
);
5078 dst_page
->busy
= TRUE
;
5082 if (cntrl_flags
& UPL_SET_LITE
) {
5084 pg_num
= (dst_offset
-offset
)/PAGE_SIZE
;
5085 lite_list
[pg_num
>>5] |= 1 << (pg_num
& 31);
5088 * Convert the fictitious page to a
5089 * private shadow of the real page.
5091 assert(alias_page
->fictitious
);
5092 alias_page
->fictitious
= FALSE
;
5093 alias_page
->private = TRUE
;
5094 alias_page
->pageout
= TRUE
;
5095 alias_page
->phys_page
= dst_page
->phys_page
;
5096 vm_page_wire(alias_page
);
5098 vm_page_insert(alias_page
,
5099 upl
->map_object
, size
- xfer_size
);
5100 assert(!alias_page
->wanted
);
5101 alias_page
->busy
= FALSE
;
5102 alias_page
->absent
= FALSE
;
5105 /* expect the page to be used */
5106 dst_page
->reference
= TRUE
;
5108 if (!(cntrl_flags
& UPL_COPYOUT_FROM
))
5109 dst_page
->dirty
= TRUE
;
5112 if (dst_page
->phys_page
> upl
->highest_page
)
5113 upl
->highest_page
= dst_page
->phys_page
;
5115 if (user_page_list
) {
5116 user_page_list
[entry
].phys_addr
5117 = dst_page
->phys_page
;
5118 user_page_list
[entry
].dirty
=
5120 user_page_list
[entry
].pageout
=
5122 user_page_list
[entry
].absent
=
5124 user_page_list
[entry
].precious
=
5128 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
5130 vm_page_unlock_queues();
5133 dst_offset
+= PAGE_SIZE_64
;
5134 xfer_size
-= PAGE_SIZE
;
5137 vm_page_unlock_queues();
5139 if (upl
->flags
& UPL_INTERNAL
) {
5140 if(page_list_count
!= NULL
)
5141 *page_list_count
= 0;
5142 } else if (*page_list_count
> entry
) {
5143 if(page_list_count
!= NULL
)
5144 *page_list_count
= entry
;
5147 if (alias_page
!= NULL
) {
5148 vm_page_lock_queues();
5149 vm_page_free(alias_page
);
5150 vm_page_unlock_queues();
5153 vm_object_unlock(object
);
5155 if (cntrl_flags
& UPL_BLOCK_ACCESS
) {
5157 * We've marked all the pages "busy" so that future
5158 * page faults will block.
5159 * Now remove the mapping for these pages, so that they
5160 * can't be accessed without causing a page fault.
5162 vm_object_pmap_protect(object
, offset
, (vm_object_size_t
)size
,
5163 PMAP_NULL
, 0, VM_PROT_NONE
);
5166 return KERN_SUCCESS
;
5171 vm_page_unlock_queues();
5173 for (; offset
< dst_offset
; offset
+= PAGE_SIZE
) {
5174 dst_page
= vm_page_lookup(object
, offset
);
5176 if (dst_page
== VM_PAGE_NULL
)
5177 panic("vm_object_iopl_request: Wired pages missing. \n");
5178 vm_page_lock_queues();
5179 vm_page_unwire(dst_page
);
5180 vm_page_unlock_queues();
5181 VM_STAT(reactivations
++);
5183 vm_object_paging_end(object
);
5184 vm_object_unlock(object
);
5196 kern_return_t retval
;
5197 boolean_t upls_locked
;
5198 vm_object_t object1
, object2
;
5200 if (upl1
== UPL_NULL
|| upl2
== UPL_NULL
|| upl1
== upl2
) {
5201 return KERN_INVALID_ARGUMENT
;
5204 upls_locked
= FALSE
;
5207 * Since we need to lock both UPLs at the same time,
5208 * avoid deadlocks by always taking locks in the same order.
5217 upls_locked
= TRUE
; /* the UPLs will need to be unlocked */
5219 object1
= upl1
->map_object
;
5220 object2
= upl2
->map_object
;
5222 if (upl1
->offset
!= 0 || upl2
->offset
!= 0 ||
5223 upl1
->size
!= upl2
->size
) {
5225 * We deal only with full objects, not subsets.
5226 * That's because we exchange the entire backing store info
5227 * for the objects: pager, resident pages, etc... We can't do
5230 retval
= KERN_INVALID_VALUE
;
5235 * Tranpose the VM objects' backing store.
5237 retval
= vm_object_transpose(object1
, object2
,
5238 (vm_object_size_t
) upl1
->size
);
5240 if (retval
== KERN_SUCCESS
) {
5242 * Make each UPL point to the correct VM object, i.e. the
5243 * object holding the pages that the UPL refers to...
5245 upl1
->map_object
= object2
;
5246 upl2
->map_object
= object1
;
5256 upls_locked
= FALSE
;
5265 * Rationale: the user might have some encrypted data on disk (via
5266 * FileVault or any other mechanism). That data is then decrypted in
5267 * memory, which is safe as long as the machine is secure. But that
5268 * decrypted data in memory could be paged out to disk by the default
5269 * pager. The data would then be stored on disk in clear (not encrypted)
5270 * and it could be accessed by anyone who gets physical access to the
5271 * disk (if the laptop or the disk gets stolen for example). This weakens
5272 * the security offered by FileVault.
5274 * Solution: the default pager will optionally request that all the
5275 * pages it gathers for pageout be encrypted, via the UPL interfaces,
5276 * before it sends this UPL to disk via the vnode_pageout() path.
5280 * To avoid disrupting the VM LRU algorithms, we want to keep the
5281 * clean-in-place mechanisms, which allow us to send some extra pages to
5282 * swap (clustering) without actually removing them from the user's
5283 * address space. We don't want the user to unknowingly access encrypted
5284 * data, so we have to actually remove the encrypted pages from the page
5285 * table. When the user accesses the data, the hardware will fail to
5286 * locate the virtual page in its page table and will trigger a page
5287 * fault. We can then decrypt the page and enter it in the page table
5288 * again. Whenever we allow the user to access the contents of a page,
5289 * we have to make sure it's not encrypted.
5295 * Reserve of virtual addresses in the kernel address space.
5296 * We need to map the physical pages in the kernel, so that we
5297 * can call the encryption/decryption routines with a kernel
5298 * virtual address. We keep this pool of pre-allocated kernel
5299 * virtual addresses so that we don't have to scan the kernel's
5300 * virtaul address space each time we need to encrypt or decrypt
5302 * It would be nice to be able to encrypt and decrypt in physical
5303 * mode but that might not always be more efficient...
5305 decl_simple_lock_data(,vm_paging_lock
)
5306 #define VM_PAGING_NUM_PAGES 64
5307 vm_map_offset_t vm_paging_base_address
= 0;
5308 boolean_t vm_paging_page_inuse
[VM_PAGING_NUM_PAGES
] = { FALSE
, };
5309 int vm_paging_max_index
= 0;
5310 unsigned long vm_paging_no_kernel_page
= 0;
5311 unsigned long vm_paging_objects_mapped
= 0;
5312 unsigned long vm_paging_pages_mapped
= 0;
5313 unsigned long vm_paging_objects_mapped_slow
= 0;
5314 unsigned long vm_paging_pages_mapped_slow
= 0;
5318 * vm_paging_map_object:
5319 * Maps part of a VM object's pages in the kernel
5320 * virtual address space, using the pre-allocated
5321 * kernel virtual addresses, if possible.
5323 * The VM object is locked. This lock will get
5324 * dropped and re-acquired though.
5327 vm_paging_map_object(
5328 vm_map_offset_t
*address
,
5331 vm_object_offset_t offset
,
5332 vm_map_size_t
*size
)
5335 vm_map_offset_t page_map_offset
;
5336 vm_map_size_t map_size
;
5337 vm_object_offset_t object_offset
;
5340 vm_map_entry_t map_entry
;
5341 #endif /* __ppc__ */
5345 if (page
!= VM_PAGE_NULL
&& *size
== PAGE_SIZE
) {
5347 * Optimization for the PowerPC.
5348 * Use one of the pre-allocated kernel virtual addresses
5349 * and just enter the VM page in the kernel address space
5350 * at that virtual address.
5352 vm_object_unlock(object
);
5353 simple_lock(&vm_paging_lock
);
5355 if (vm_paging_base_address
== 0) {
5357 * Initialize our pool of pre-allocated kernel
5358 * virtual addresses.
5360 simple_unlock(&vm_paging_lock
);
5361 page_map_offset
= 0;
5362 kr
= vm_map_find_space(kernel_map
,
5364 VM_PAGING_NUM_PAGES
* PAGE_SIZE
,
5368 if (kr
!= KERN_SUCCESS
) {
5369 panic("vm_paging_map_object: "
5370 "kernel_map full\n");
5372 map_entry
->object
.vm_object
= kernel_object
;
5374 page_map_offset
- VM_MIN_KERNEL_ADDRESS
;
5375 vm_object_reference(kernel_object
);
5376 vm_map_unlock(kernel_map
);
5378 simple_lock(&vm_paging_lock
);
5379 if (vm_paging_base_address
!= 0) {
5380 /* someone raced us and won: undo */
5381 simple_unlock(&vm_paging_lock
);
5382 kr
= vm_map_remove(kernel_map
,
5385 (VM_PAGING_NUM_PAGES
5388 assert(kr
== KERN_SUCCESS
);
5389 simple_lock(&vm_paging_lock
);
5391 vm_paging_base_address
= page_map_offset
;
5396 * Try and find an available kernel virtual address
5397 * from our pre-allocated pool.
5399 page_map_offset
= 0;
5400 for (i
= 0; i
< VM_PAGING_NUM_PAGES
; i
++) {
5401 if (vm_paging_page_inuse
[i
] == FALSE
) {
5402 page_map_offset
= vm_paging_base_address
+
5408 if (page_map_offset
!= 0) {
5410 * We found a kernel virtual address;
5411 * map the physical page to that virtual address.
5413 if (i
> vm_paging_max_index
) {
5414 vm_paging_max_index
= i
;
5416 vm_paging_page_inuse
[i
] = TRUE
;
5417 simple_unlock(&vm_paging_lock
);
5418 pmap_map_block(kernel_pmap
,
5421 1, /* Size is number of 4k pages */
5423 ((int) page
->object
->wimg_bits
&
5426 vm_paging_objects_mapped
++;
5427 vm_paging_pages_mapped
++;
5428 *address
= page_map_offset
;
5429 vm_object_lock(object
);
5431 /* all done and mapped, ready to use ! */
5432 return KERN_SUCCESS
;
5436 * We ran out of pre-allocated kernel virtual
5437 * addresses. Just map the page in the kernel
5438 * the slow and regular way.
5440 vm_paging_no_kernel_page
++;
5441 simple_unlock(&vm_paging_lock
);
5442 vm_object_lock(object
);
5444 #endif /* __ppc__ */
5446 object_offset
= vm_object_trunc_page(offset
);
5447 map_size
= vm_map_round_page(*size
);
5450 * Try and map the required range of the object
5454 /* don't go beyond the object's end... */
5455 if (object_offset
>= object
->size
) {
5457 } else if (map_size
> object
->size
- offset
) {
5458 map_size
= object
->size
- offset
;
5461 vm_object_reference_locked(object
); /* for the map entry */
5462 vm_object_unlock(object
);
5464 kr
= vm_map_enter(kernel_map
,
5475 if (kr
!= KERN_SUCCESS
) {
5478 vm_object_deallocate(object
); /* for the map entry */
5485 * Enter the mapped pages in the page table now.
5487 vm_object_lock(object
);
5488 for (page_map_offset
= 0;
5490 map_size
-= PAGE_SIZE_64
, page_map_offset
+= PAGE_SIZE_64
) {
5491 unsigned int cache_attr
;
5493 page
= vm_page_lookup(object
, offset
+ page_map_offset
);
5494 if (page
== VM_PAGE_NULL
) {
5495 panic("vm_paging_map_object: no page !?");
5497 if (page
->no_isync
== TRUE
) {
5498 pmap_sync_page_data_phys(page
->phys_page
);
5500 cache_attr
= ((unsigned int) object
->wimg_bits
) & VM_WIMG_MASK
;
5502 PMAP_ENTER(kernel_pmap
,
5503 *address
+ page_map_offset
,
5510 vm_paging_objects_mapped_slow
++;
5511 vm_paging_pages_mapped_slow
+= map_size
/ PAGE_SIZE_64
;
5513 return KERN_SUCCESS
;
5518 * vm_paging_unmap_object:
5519 * Unmaps part of a VM object's pages from the kernel
5520 * virtual address space.
5522 * The VM object is locked. This lock will get
5523 * dropped and re-acquired though.
5526 vm_paging_unmap_object(
5528 vm_map_offset_t start
,
5529 vm_map_offset_t end
)
5534 #endif /* __ppc__ */
5536 if ((vm_paging_base_address
== 0) &&
5537 ((start
< vm_paging_base_address
) ||
5538 (end
> (vm_paging_base_address
5539 + (VM_PAGING_NUM_PAGES
* PAGE_SIZE
))))) {
5541 * We didn't use our pre-allocated pool of
5542 * kernel virtual address. Deallocate the
5545 if (object
!= VM_OBJECT_NULL
) {
5546 vm_object_unlock(object
);
5548 kr
= vm_map_remove(kernel_map
, start
, end
, VM_MAP_NO_FLAGS
);
5549 if (object
!= VM_OBJECT_NULL
) {
5550 vm_object_lock(object
);
5552 assert(kr
== KERN_SUCCESS
);
5555 * We used a kernel virtual address from our
5556 * pre-allocated pool. Put it back in the pool
5560 assert(end
- start
== PAGE_SIZE
);
5561 i
= (start
- vm_paging_base_address
) >> PAGE_SHIFT
;
5563 /* undo the pmap mapping */
5564 mapping_remove(kernel_pmap
, start
);
5566 simple_lock(&vm_paging_lock
);
5567 vm_paging_page_inuse
[i
] = FALSE
;
5568 simple_unlock(&vm_paging_lock
);
5569 #endif /* __ppc__ */
5575 * "iv" is the "initial vector". Ideally, we want to
5576 * have a different one for each page we encrypt, so that
5577 * crackers can't find encryption patterns too easily.
5579 #define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
5580 boolean_t swap_crypt_ctx_initialized
= FALSE
;
5581 aes_32t swap_crypt_key
[8]; /* big enough for a 256 key */
5582 aes_ctx swap_crypt_ctx
;
5583 const unsigned char swap_crypt_null_iv
[AES_BLOCK_SIZE
] = {0xa, };
5586 boolean_t swap_crypt_ctx_tested
= FALSE
;
5587 unsigned char swap_crypt_test_page_ref
[4096] __attribute__((aligned(4096)));
5588 unsigned char swap_crypt_test_page_encrypt
[4096] __attribute__((aligned(4096)));
5589 unsigned char swap_crypt_test_page_decrypt
[4096] __attribute__((aligned(4096)));
5592 extern u_long
random(void);
5595 * Initialize the encryption context: key and key size.
5597 void swap_crypt_ctx_initialize(void); /* forward */
5599 swap_crypt_ctx_initialize(void)
5604 * No need for locking to protect swap_crypt_ctx_initialized
5605 * because the first use of encryption will come from the
5606 * pageout thread (we won't pagein before there's been a pageout)
5607 * and there's only one pageout thread.
5609 if (swap_crypt_ctx_initialized
== FALSE
) {
5611 i
< (sizeof (swap_crypt_key
) /
5612 sizeof (swap_crypt_key
[0]));
5614 swap_crypt_key
[i
] = random();
5616 aes_encrypt_key((const unsigned char *) swap_crypt_key
,
5617 SWAP_CRYPT_AES_KEY_SIZE
,
5618 &swap_crypt_ctx
.encrypt
);
5619 aes_decrypt_key((const unsigned char *) swap_crypt_key
,
5620 SWAP_CRYPT_AES_KEY_SIZE
,
5621 &swap_crypt_ctx
.decrypt
);
5622 swap_crypt_ctx_initialized
= TRUE
;
5627 * Validate the encryption algorithms.
5629 if (swap_crypt_ctx_tested
== FALSE
) {
5631 for (i
= 0; i
< 4096; i
++) {
5632 swap_crypt_test_page_ref
[i
] = (char) i
;
5635 aes_encrypt_cbc(swap_crypt_test_page_ref
,
5637 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5638 swap_crypt_test_page_encrypt
,
5639 &swap_crypt_ctx
.encrypt
);
5641 aes_decrypt_cbc(swap_crypt_test_page_encrypt
,
5643 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5644 swap_crypt_test_page_decrypt
,
5645 &swap_crypt_ctx
.decrypt
);
5646 /* compare result with original */
5647 for (i
= 0; i
< 4096; i
++) {
5648 if (swap_crypt_test_page_decrypt
[i
] !=
5649 swap_crypt_test_page_ref
[i
]) {
5650 panic("encryption test failed");
5655 aes_encrypt_cbc(swap_crypt_test_page_decrypt
,
5657 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5658 swap_crypt_test_page_decrypt
,
5659 &swap_crypt_ctx
.encrypt
);
5660 /* decrypt in place */
5661 aes_decrypt_cbc(swap_crypt_test_page_decrypt
,
5663 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5664 swap_crypt_test_page_decrypt
,
5665 &swap_crypt_ctx
.decrypt
);
5666 for (i
= 0; i
< 4096; i
++) {
5667 if (swap_crypt_test_page_decrypt
[i
] !=
5668 swap_crypt_test_page_ref
[i
]) {
5669 panic("in place encryption test failed");
5673 swap_crypt_ctx_tested
= TRUE
;
5681 * Encrypt the given page, for secure paging.
5682 * The page might already be mapped at kernel virtual
5683 * address "kernel_mapping_offset". Otherwise, we need
5687 * The page's object is locked, but this lock will be released
5689 * The page is busy and not accessible by users (not entered in any pmap).
5694 vm_map_offset_t kernel_mapping_offset
)
5696 int clear_refmod
= 0;
5698 boolean_t page_was_referenced
;
5699 boolean_t page_was_modified
;
5700 vm_map_size_t kernel_mapping_size
;
5701 vm_offset_t kernel_vaddr
;
5703 unsigned char aes_iv
[AES_BLOCK_SIZE
];
5705 memory_object_t pager_object
;
5706 vm_object_offset_t paging_offset
;
5710 if (! vm_pages_encrypted
) {
5711 vm_pages_encrypted
= TRUE
;
5715 assert(page
->dirty
|| page
->precious
);
5717 if (page
->encrypted
) {
5719 * Already encrypted: no need to do it again.
5721 vm_page_encrypt_already_encrypted_counter
++;
5724 ASSERT_PAGE_DECRYPTED(page
);
5727 * Gather the "reference" and "modified" status of the page.
5728 * We'll restore these values after the encryption, so that
5729 * the encryption is transparent to the rest of the system
5730 * and doesn't impact the VM's LRU logic.
5732 page_was_referenced
=
5733 (page
->reference
|| pmap_is_referenced(page
->phys_page
));
5735 (page
->dirty
|| pmap_is_modified(page
->phys_page
));
5737 if (kernel_mapping_offset
== 0) {
5739 * The page hasn't already been mapped in kernel space
5740 * by the caller. Map it now, so that we can access
5741 * its contents and encrypt them.
5743 kernel_mapping_size
= PAGE_SIZE
;
5744 kr
= vm_paging_map_object(&kernel_mapping_offset
,
5748 &kernel_mapping_size
);
5749 if (kr
!= KERN_SUCCESS
) {
5750 panic("vm_page_encrypt: "
5751 "could not map page in kernel: 0x%x\n",
5755 kernel_mapping_size
= 0;
5757 kernel_vaddr
= CAST_DOWN(vm_offset_t
, kernel_mapping_offset
);
5759 if (swap_crypt_ctx_initialized
== FALSE
) {
5760 swap_crypt_ctx_initialize();
5762 assert(swap_crypt_ctx_initialized
);
5765 * Prepare an "initial vector" for the encryption.
5766 * We use the "pager" and the "paging_offset" for that
5767 * page to obfuscate the encrypted data a bit more and
5768 * prevent crackers from finding patterns that they could
5769 * use to break the key.
5771 bzero(&encrypt_iv
.aes_iv
[0], sizeof (encrypt_iv
.aes_iv
));
5772 encrypt_iv
.vm
.pager_object
= page
->object
->pager
;
5773 encrypt_iv
.vm
.paging_offset
=
5774 page
->object
->paging_offset
+ page
->offset
;
5776 vm_object_unlock(page
->object
);
5778 /* encrypt the "initial vector" */
5779 aes_encrypt_cbc((const unsigned char *) &encrypt_iv
.aes_iv
[0],
5782 &encrypt_iv
.aes_iv
[0],
5783 &swap_crypt_ctx
.encrypt
);
5788 aes_encrypt_cbc((const unsigned char *) kernel_vaddr
,
5789 &encrypt_iv
.aes_iv
[0],
5790 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5791 (unsigned char *) kernel_vaddr
,
5792 &swap_crypt_ctx
.encrypt
);
5794 vm_page_encrypt_counter
++;
5796 vm_object_lock(page
->object
);
5799 * Unmap the page from the kernel's address space,
5800 * if we had to map it ourselves. Otherwise, let
5801 * the caller undo the mapping if needed.
5803 if (kernel_mapping_size
!= 0) {
5804 vm_paging_unmap_object(page
->object
,
5805 kernel_mapping_offset
,
5806 kernel_mapping_offset
+ kernel_mapping_size
);
5810 * Restore the "reference" and "modified" bits.
5811 * This should clean up any impact the encryption had
5814 if (! page_was_referenced
) {
5815 clear_refmod
|= VM_MEM_REFERENCED
;
5816 page
->reference
= FALSE
;
5818 if (! page_was_modified
) {
5819 clear_refmod
|= VM_MEM_MODIFIED
;
5820 page
->dirty
= FALSE
;
5823 pmap_clear_refmod(page
->phys_page
, clear_refmod
);
5825 page
->encrypted
= TRUE
;
5831 * Decrypt the given page.
5832 * The page might already be mapped at kernel virtual
5833 * address "kernel_mapping_offset". Otherwise, we need
5837 * The page's VM object is locked but will be unlocked and relocked.
5838 * The page is busy and not accessible by users (not entered in any pmap).
5843 vm_map_offset_t kernel_mapping_offset
)
5845 int clear_refmod
= 0;
5847 vm_map_size_t kernel_mapping_size
;
5848 vm_offset_t kernel_vaddr
;
5849 boolean_t page_was_referenced
;
5851 unsigned char aes_iv
[AES_BLOCK_SIZE
];
5853 memory_object_t pager_object
;
5854 vm_object_offset_t paging_offset
;
5859 assert(page
->encrypted
);
5862 * Gather the "reference" status of the page.
5863 * We'll restore its value after the decryption, so that
5864 * the decryption is transparent to the rest of the system
5865 * and doesn't impact the VM's LRU logic.
5867 page_was_referenced
=
5868 (page
->reference
|| pmap_is_referenced(page
->phys_page
));
5870 if (kernel_mapping_offset
== 0) {
5872 * The page hasn't already been mapped in kernel space
5873 * by the caller. Map it now, so that we can access
5874 * its contents and decrypt them.
5876 kernel_mapping_size
= PAGE_SIZE
;
5877 kr
= vm_paging_map_object(&kernel_mapping_offset
,
5881 &kernel_mapping_size
);
5882 if (kr
!= KERN_SUCCESS
) {
5883 panic("vm_page_decrypt: "
5884 "could not map page in kernel: 0x%x\n");
5887 kernel_mapping_size
= 0;
5889 kernel_vaddr
= CAST_DOWN(vm_offset_t
, kernel_mapping_offset
);
5891 assert(swap_crypt_ctx_initialized
);
5894 * Prepare an "initial vector" for the decryption.
5895 * It has to be the same as the "initial vector" we
5896 * used to encrypt that page.
5898 bzero(&decrypt_iv
.aes_iv
[0], sizeof (decrypt_iv
.aes_iv
));
5899 decrypt_iv
.vm
.pager_object
= page
->object
->pager
;
5900 decrypt_iv
.vm
.paging_offset
=
5901 page
->object
->paging_offset
+ page
->offset
;
5903 vm_object_unlock(page
->object
);
5905 /* encrypt the "initial vector" */
5906 aes_encrypt_cbc((const unsigned char *) &decrypt_iv
.aes_iv
[0],
5909 &decrypt_iv
.aes_iv
[0],
5910 &swap_crypt_ctx
.encrypt
);
5915 aes_decrypt_cbc((const unsigned char *) kernel_vaddr
,
5916 &decrypt_iv
.aes_iv
[0],
5917 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5918 (unsigned char *) kernel_vaddr
,
5919 &swap_crypt_ctx
.decrypt
);
5920 vm_page_decrypt_counter
++;
5922 vm_object_lock(page
->object
);
5925 * Unmap the page from the kernel's address space,
5926 * if we had to map it ourselves. Otherwise, let
5927 * the caller undo the mapping if needed.
5929 if (kernel_mapping_size
!= 0) {
5930 vm_paging_unmap_object(page
->object
,
5932 kernel_vaddr
+ PAGE_SIZE
);
5936 * After decryption, the page is actually clean.
5937 * It was encrypted as part of paging, which "cleans"
5938 * the "dirty" pages.
5939 * Noone could access it after it was encrypted
5940 * and the decryption doesn't count.
5942 page
->dirty
= FALSE
;
5943 clear_refmod
= VM_MEM_MODIFIED
;
5945 /* restore the "reference" bit */
5946 if (! page_was_referenced
) {
5947 page
->reference
= FALSE
;
5948 clear_refmod
|= VM_MEM_REFERENCED
;
5950 pmap_clear_refmod(page
->phys_page
, clear_refmod
);
5952 page
->encrypted
= FALSE
;
5955 * We've just modified the page's contents via the data cache and part
5956 * of the new contents might still be in the cache and not yet in RAM.
5957 * Since the page is now available and might get gathered in a UPL to
5958 * be part of a DMA transfer from a driver that expects the memory to
5959 * be coherent at this point, we have to flush the data cache.
5961 pmap_sync_page_attributes_phys(page
->phys_page
);
5963 * Since the page is not mapped yet, some code might assume that it
5964 * doesn't need to invalidate the instruction cache when writing to
5965 * that page. That code relies on "no_isync" being set, so that the
5966 * caches get syncrhonized when the page is first mapped. So we need
5967 * to set "no_isync" here too, despite the fact that we just
5968 * synchronized the caches above...
5970 page
->no_isync
= TRUE
;
5973 unsigned long upl_encrypt_upls
= 0;
5974 unsigned long upl_encrypt_pages
= 0;
5980 * Encrypts all the pages in the UPL, within the specified range.
5986 upl_offset_t crypt_offset
,
5987 upl_size_t crypt_size
)
5989 upl_size_t upl_size
;
5990 upl_offset_t upl_offset
;
5991 vm_object_t upl_object
;
5993 vm_object_t shadow_object
;
5994 vm_object_offset_t shadow_offset
;
5995 vm_object_offset_t paging_offset
;
5996 vm_object_offset_t base_offset
;
5999 upl_encrypt_pages
+= crypt_size
/ PAGE_SIZE
;
6003 upl_object
= upl
->map_object
;
6004 upl_offset
= upl
->offset
;
6005 upl_size
= upl
->size
;
6009 vm_object_lock(upl_object
);
6012 * Find the VM object that contains the actual pages.
6014 if (upl_object
->pageout
) {
6015 shadow_object
= upl_object
->shadow
;
6017 * The offset in the shadow object is actually also
6018 * accounted for in upl->offset. It possibly shouldn't be
6019 * this way, but for now don't account for it twice.
6022 assert(upl_object
->paging_offset
== 0); /* XXX ? */
6023 vm_object_lock(shadow_object
);
6025 shadow_object
= upl_object
;
6029 paging_offset
= shadow_object
->paging_offset
;
6030 vm_object_paging_begin(shadow_object
);
6032 if (shadow_object
!= upl_object
) {
6033 vm_object_unlock(shadow_object
);
6035 vm_object_unlock(upl_object
);
6037 base_offset
= shadow_offset
;
6038 base_offset
+= upl_offset
;
6039 base_offset
+= crypt_offset
;
6040 base_offset
-= paging_offset
;
6042 * Unmap the pages, so that nobody can continue accessing them while
6043 * they're encrypted. After that point, all accesses to these pages
6044 * will cause a page fault and block while the page is being encrypted
6045 * (busy). After the encryption completes, any access will cause a
6046 * page fault and the page gets decrypted at that time.
6048 assert(crypt_offset
+ crypt_size
<= upl_size
);
6049 vm_object_pmap_protect(shadow_object
,
6051 (vm_object_size_t
)crypt_size
,
6056 /* XXX FBDP could the object have changed significantly here ? */
6057 vm_object_lock(shadow_object
);
6059 for (upl_offset
= 0;
6060 upl_offset
< crypt_size
;
6061 upl_offset
+= PAGE_SIZE
) {
6062 page
= vm_page_lookup(shadow_object
,
6063 base_offset
+ upl_offset
);
6064 if (page
== VM_PAGE_NULL
) {
6065 panic("upl_encrypt: "
6066 "no page for (obj=%p,off=%lld+%d)!\n",
6071 vm_page_encrypt(page
, 0);
6074 vm_object_paging_end(shadow_object
);
6075 vm_object_unlock(shadow_object
);
6079 upl_get_internal_pagelist_offset(void)
6081 return sizeof(struct upl
);
6090 upl
->flags
|= UPL_CLEAR_DIRTY
;
6092 upl
->flags
&= ~UPL_CLEAR_DIRTY
;
6099 boolean_t
upl_page_present(upl_page_info_t
*upl
, int index
)
6101 return(UPL_PAGE_PRESENT(upl
, index
));
6103 boolean_t
upl_dirty_page(upl_page_info_t
*upl
, int index
)
6105 return(UPL_DIRTY_PAGE(upl
, index
));
6107 boolean_t
upl_valid_page(upl_page_info_t
*upl
, int index
)
6109 return(UPL_VALID_PAGE(upl
, index
));
6111 ppnum_t
upl_phys_page(upl_page_info_t
*upl
, int index
)
6113 return(UPL_PHYS_PAGE(upl
, index
));
6117 vm_countdirtypages(void)
6129 vm_page_lock_queues();
6130 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
6132 if (m
==(vm_page_t
)0) break;
6134 if(m
->dirty
) dpages
++;
6135 if(m
->pageout
) pgopages
++;
6136 if(m
->precious
) precpages
++;
6138 assert(m
->object
!= kernel_object
);
6139 m
= (vm_page_t
) queue_next(&m
->pageq
);
6140 if (m
==(vm_page_t
)0) break;
6142 } while (!queue_end(&vm_page_queue_inactive
,(queue_entry_t
) m
));
6143 vm_page_unlock_queues();
6145 vm_page_lock_queues();
6146 m
= (vm_page_t
) queue_first(&vm_page_queue_zf
);
6148 if (m
==(vm_page_t
)0) break;
6150 if(m
->dirty
) dpages
++;
6151 if(m
->pageout
) pgopages
++;
6152 if(m
->precious
) precpages
++;
6154 assert(m
->object
!= kernel_object
);
6155 m
= (vm_page_t
) queue_next(&m
->pageq
);
6156 if (m
==(vm_page_t
)0) break;
6158 } while (!queue_end(&vm_page_queue_zf
,(queue_entry_t
) m
));
6159 vm_page_unlock_queues();
6161 printf("IN Q: %d : %d : %d\n", dpages
, pgopages
, precpages
);
6167 vm_page_lock_queues();
6168 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
6171 if(m
== (vm_page_t
)0) break;
6172 if(m
->dirty
) dpages
++;
6173 if(m
->pageout
) pgopages
++;
6174 if(m
->precious
) precpages
++;
6176 assert(m
->object
!= kernel_object
);
6177 m
= (vm_page_t
) queue_next(&m
->pageq
);
6178 if(m
== (vm_page_t
)0) break;
6180 } while (!queue_end(&vm_page_queue_active
,(queue_entry_t
) m
));
6181 vm_page_unlock_queues();
6183 printf("AC Q: %d : %d : %d\n", dpages
, pgopages
, precpages
);
6186 #endif /* MACH_BSD */
6188 ppnum_t
upl_get_highest_page(
6191 return upl
->highest_page
;
6195 kern_return_t
upl_ubc_alias_set(upl_t upl
, unsigned int alias1
, unsigned int alias2
)
6197 upl
->ubc_alias1
= alias1
;
6198 upl
->ubc_alias2
= alias2
;
6199 return KERN_SUCCESS
;
6201 int upl_ubc_alias_get(upl_t upl
, unsigned int * al
, unsigned int * al2
)
6204 *al
= upl
->ubc_alias1
;
6206 *al2
= upl
->ubc_alias2
;
6207 return KERN_SUCCESS
;
6209 #endif /* UPL_DEBUG */
6214 #include <ddb/db_output.h>
6215 #include <ddb/db_print.h>
6216 #include <vm/vm_print.h>
6218 #define printf kdbprintf
6219 void db_pageout(void);
6225 iprintf("VM Statistics:\n");
6227 iprintf("pages:\n");
6229 iprintf("activ %5d inact %5d free %5d",
6230 vm_page_active_count
, vm_page_inactive_count
,
6231 vm_page_free_count
);
6232 printf(" wire %5d gobbl %5d\n",
6233 vm_page_wire_count
, vm_page_gobble_count
);
6235 iprintf("target:\n");
6237 iprintf("min %5d inact %5d free %5d",
6238 vm_page_free_min
, vm_page_inactive_target
,
6239 vm_page_free_target
);
6240 printf(" resrv %5d\n", vm_page_free_reserved
);
6242 iprintf("pause:\n");
6248 extern int c_laundry_pages_freed
;
6249 #endif /* MACH_COUNTERS */
6254 iprintf("Pageout Statistics:\n");
6256 iprintf("active %5d inactv %5d\n",
6257 vm_pageout_active
, vm_pageout_inactive
);
6258 iprintf("nolock %5d avoid %5d busy %5d absent %5d\n",
6259 vm_pageout_inactive_nolock
, vm_pageout_inactive_avoid
,
6260 vm_pageout_inactive_busy
, vm_pageout_inactive_absent
);
6261 iprintf("used %5d clean %5d dirty %5d\n",
6262 vm_pageout_inactive_used
, vm_pageout_inactive_clean
,
6263 vm_pageout_inactive_dirty
);
6265 iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed
);
6266 #endif /* MACH_COUNTERS */
6267 #if MACH_CLUSTER_STATS
6268 iprintf("Cluster Statistics:\n");
6270 iprintf("dirtied %5d cleaned %5d collisions %5d\n",
6271 vm_pageout_cluster_dirtied
, vm_pageout_cluster_cleaned
,
6272 vm_pageout_cluster_collisions
);
6273 iprintf("clusters %5d conversions %5d\n",
6274 vm_pageout_cluster_clusters
, vm_pageout_cluster_conversions
);
6276 iprintf("Target Statistics:\n");
6278 iprintf("collisions %5d page_dirtied %5d page_freed %5d\n",
6279 vm_pageout_target_collisions
, vm_pageout_target_page_dirtied
,
6280 vm_pageout_target_page_freed
);
6282 #endif /* MACH_CLUSTER_STATS */
6286 #endif /* MACH_KDB */