2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 * Carnegie Mellon requests users of this software to return to
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
53 * File: vm/vm_pageout.c
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * The proverbial page-out daemon.
63 #include <mach_pagemap.h>
64 #include <mach_cluster_stats.h>
66 #include <advisory_pageout.h>
68 #include <mach/mach_types.h>
69 #include <mach/memory_object.h>
70 #include <mach/memory_object_default.h>
71 #include <mach/memory_object_control_server.h>
72 #include <mach/mach_host_server.h>
74 #include <mach/vm_map.h>
75 #include <mach/vm_param.h>
76 #include <mach/vm_statistics.h>
78 #include <kern/kern_types.h>
79 #include <kern/counters.h>
80 #include <kern/host_statistics.h>
81 #include <kern/machine.h>
82 #include <kern/misc_protos.h>
83 #include <kern/thread.h>
85 #include <kern/kalloc.h>
87 #include <machine/vm_tuning.h>
90 #include <vm/vm_fault.h>
91 #include <vm/vm_map.h>
92 #include <vm/vm_object.h>
93 #include <vm/vm_page.h>
94 #include <vm/vm_pageout.h>
95 #include <vm/vm_protos.h> /* must be last */
101 #include <ppc/mappings.h>
103 #include <../bsd/crypto/aes/aes.h>
105 extern ipc_port_t memory_manager_default
;
108 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE
109 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 10000 /* maximum iterations of the active queue to move pages to inactive */
112 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE
113 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096 /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
116 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
117 #define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
120 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
121 #define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
124 #ifndef VM_PAGE_LAUNDRY_MAX
125 #define VM_PAGE_LAUNDRY_MAX 16UL /* maximum pageouts on a given pageout queue */
126 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
128 #ifndef VM_PAGEOUT_BURST_WAIT
129 #define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
130 #endif /* VM_PAGEOUT_BURST_WAIT */
132 #ifndef VM_PAGEOUT_EMPTY_WAIT
133 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
134 #endif /* VM_PAGEOUT_EMPTY_WAIT */
136 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
137 #define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
138 #endif /* VM_PAGEOUT_DEADLOCK_WAIT */
140 #ifndef VM_PAGEOUT_IDLE_WAIT
141 #define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
142 #endif /* VM_PAGEOUT_IDLE_WAIT */
146 * To obtain a reasonable LRU approximation, the inactive queue
147 * needs to be large enough to give pages on it a chance to be
148 * referenced a second time. This macro defines the fraction
149 * of active+inactive pages that should be inactive.
150 * The pageout daemon uses it to update vm_page_inactive_target.
152 * If vm_page_free_count falls below vm_page_free_target and
153 * vm_page_inactive_count is below vm_page_inactive_target,
154 * then the pageout daemon starts running.
157 #ifndef VM_PAGE_INACTIVE_TARGET
158 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3)
159 #endif /* VM_PAGE_INACTIVE_TARGET */
162 * Once the pageout daemon starts running, it keeps going
163 * until vm_page_free_count meets or exceeds vm_page_free_target.
166 #ifndef VM_PAGE_FREE_TARGET
167 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
168 #endif /* VM_PAGE_FREE_TARGET */
171 * The pageout daemon always starts running once vm_page_free_count
172 * falls below vm_page_free_min.
175 #ifndef VM_PAGE_FREE_MIN
176 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
177 #endif /* VM_PAGE_FREE_MIN */
180 * When vm_page_free_count falls below vm_page_free_reserved,
181 * only vm-privileged threads can allocate pages. vm-privilege
182 * allows the pageout daemon and default pager (and any other
183 * associated threads needed for default pageout) to continue
184 * operation by dipping into the reserved pool of pages.
187 #ifndef VM_PAGE_FREE_RESERVED
188 #define VM_PAGE_FREE_RESERVED(n) \
189 ((6 * VM_PAGE_LAUNDRY_MAX) + (n))
190 #endif /* VM_PAGE_FREE_RESERVED */
194 * must hold the page queues lock to
195 * manipulate this structure
197 struct vm_pageout_queue
{
198 queue_head_t pgo_pending
; /* laundry pages to be processed by pager's iothread */
199 unsigned int pgo_laundry
; /* current count of laundry pages on queue or in flight */
200 unsigned int pgo_maxlaundry
;
202 unsigned int pgo_idle
:1, /* iothread is blocked waiting for work to do */
203 pgo_busy
:1, /* iothread is currently processing request from pgo_pending */
204 pgo_throttled
:1,/* vm_pageout_scan thread needs a wakeup when pgo_laundry drops */
208 #define VM_PAGE_Q_THROTTLED(q) \
209 ((q)->pgo_laundry >= (q)->pgo_maxlaundry)
213 * Exported variable used to broadcast the activation of the pageout scan
214 * Working Set uses this to throttle its use of pmap removes. In this
215 * way, code which runs within memory in an uncontested context does
216 * not keep encountering soft faults.
219 unsigned int vm_pageout_scan_event_counter
= 0;
222 * Forward declarations for internal routines.
225 static void vm_pageout_garbage_collect(int);
226 static void vm_pageout_iothread_continue(struct vm_pageout_queue
*);
227 static void vm_pageout_iothread_external(void);
228 static void vm_pageout_iothread_internal(void);
229 static void vm_pageout_queue_steal(vm_page_t
);
231 extern void vm_pageout_continue(void);
232 extern void vm_pageout_scan(void);
234 unsigned int vm_pageout_reserved_internal
= 0;
235 unsigned int vm_pageout_reserved_really
= 0;
237 unsigned int vm_pageout_idle_wait
= 0; /* milliseconds */
238 unsigned int vm_pageout_empty_wait
= 0; /* milliseconds */
239 unsigned int vm_pageout_burst_wait
= 0; /* milliseconds */
240 unsigned int vm_pageout_deadlock_wait
= 0; /* milliseconds */
241 unsigned int vm_pageout_deadlock_relief
= 0;
242 unsigned int vm_pageout_inactive_relief
= 0;
243 unsigned int vm_pageout_burst_active_throttle
= 0;
244 unsigned int vm_pageout_burst_inactive_throttle
= 0;
247 * Protection against zero fill flushing live working sets derived
248 * from existing backing store and files
250 unsigned int vm_accellerate_zf_pageout_trigger
= 400;
251 unsigned int vm_zf_iterator
;
252 unsigned int vm_zf_iterator_count
= 40;
253 unsigned int last_page_zf
;
254 unsigned int vm_zf_count
= 0;
257 * These variables record the pageout daemon's actions:
258 * how many pages it looks at and what happens to those pages.
259 * No locking needed because only one thread modifies the variables.
262 unsigned int vm_pageout_active
= 0; /* debugging */
263 unsigned int vm_pageout_inactive
= 0; /* debugging */
264 unsigned int vm_pageout_inactive_throttled
= 0; /* debugging */
265 unsigned int vm_pageout_inactive_forced
= 0; /* debugging */
266 unsigned int vm_pageout_inactive_nolock
= 0; /* debugging */
267 unsigned int vm_pageout_inactive_avoid
= 0; /* debugging */
268 unsigned int vm_pageout_inactive_busy
= 0; /* debugging */
269 unsigned int vm_pageout_inactive_absent
= 0; /* debugging */
270 unsigned int vm_pageout_inactive_used
= 0; /* debugging */
271 unsigned int vm_pageout_inactive_clean
= 0; /* debugging */
272 unsigned int vm_pageout_inactive_dirty
= 0; /* debugging */
273 unsigned int vm_pageout_dirty_no_pager
= 0; /* debugging */
274 unsigned int vm_pageout_purged_objects
= 0; /* debugging */
275 unsigned int vm_stat_discard
= 0; /* debugging */
276 unsigned int vm_stat_discard_sent
= 0; /* debugging */
277 unsigned int vm_stat_discard_failure
= 0; /* debugging */
278 unsigned int vm_stat_discard_throttle
= 0; /* debugging */
280 unsigned int vm_pageout_scan_active_throttled
= 0;
281 unsigned int vm_pageout_scan_inactive_throttled
= 0;
282 unsigned int vm_pageout_scan_throttle
= 0; /* debugging */
283 unsigned int vm_pageout_scan_burst_throttle
= 0; /* debugging */
284 unsigned int vm_pageout_scan_empty_throttle
= 0; /* debugging */
285 unsigned int vm_pageout_scan_deadlock_detected
= 0; /* debugging */
286 unsigned int vm_pageout_scan_active_throttle_success
= 0; /* debugging */
287 unsigned int vm_pageout_scan_inactive_throttle_success
= 0; /* debugging */
289 * Backing store throttle when BS is exhausted
291 unsigned int vm_backing_store_low
= 0;
293 unsigned int vm_pageout_out_of_line
= 0;
294 unsigned int vm_pageout_in_place
= 0;
298 * counters and statistics...
300 unsigned long vm_page_decrypt_counter
= 0;
301 unsigned long vm_page_decrypt_for_upl_counter
= 0;
302 unsigned long vm_page_encrypt_counter
= 0;
303 unsigned long vm_page_encrypt_abort_counter
= 0;
304 unsigned long vm_page_encrypt_already_encrypted_counter
= 0;
305 boolean_t vm_pages_encrypted
= FALSE
; /* are there encrypted pages ? */
308 struct vm_pageout_queue vm_pageout_queue_internal
;
309 struct vm_pageout_queue vm_pageout_queue_external
;
313 * Routine: vm_backing_store_disable
315 * Suspend non-privileged threads wishing to extend
316 * backing store when we are low on backing store
317 * (Synchronized by caller)
320 vm_backing_store_disable(
324 vm_backing_store_low
= 1;
326 if(vm_backing_store_low
) {
327 vm_backing_store_low
= 0;
328 thread_wakeup((event_t
) &vm_backing_store_low
);
335 * Routine: vm_pageout_object_allocate
337 * Allocate an object for use as out-of-line memory in a
338 * data_return/data_initialize message.
339 * The page must be in an unlocked object.
341 * If the page belongs to a trusted pager, cleaning in place
342 * will be used, which utilizes a special "pageout object"
343 * containing private alias pages for the real page frames.
344 * Untrusted pagers use normal out-of-line memory.
347 vm_pageout_object_allocate(
350 vm_object_offset_t offset
)
352 vm_object_t object
= m
->object
;
353 vm_object_t new_object
;
355 assert(object
->pager_ready
);
357 new_object
= vm_object_allocate(size
);
359 if (object
->pager_trusted
) {
360 assert (offset
< object
->size
);
362 vm_object_lock(new_object
);
363 new_object
->pageout
= TRUE
;
364 new_object
->shadow
= object
;
365 new_object
->can_persist
= FALSE
;
366 new_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
367 new_object
->shadow_offset
= offset
;
368 vm_object_unlock(new_object
);
371 * Take a paging reference on the object. This will be dropped
372 * in vm_pageout_object_terminate()
374 vm_object_lock(object
);
375 vm_object_paging_begin(object
);
376 vm_page_lock_queues();
377 vm_page_unlock_queues();
378 vm_object_unlock(object
);
380 vm_pageout_in_place
++;
382 vm_pageout_out_of_line
++;
386 #if MACH_CLUSTER_STATS
387 unsigned long vm_pageout_cluster_dirtied
= 0;
388 unsigned long vm_pageout_cluster_cleaned
= 0;
389 unsigned long vm_pageout_cluster_collisions
= 0;
390 unsigned long vm_pageout_cluster_clusters
= 0;
391 unsigned long vm_pageout_cluster_conversions
= 0;
392 unsigned long vm_pageout_target_collisions
= 0;
393 unsigned long vm_pageout_target_page_dirtied
= 0;
394 unsigned long vm_pageout_target_page_freed
= 0;
395 #define CLUSTER_STAT(clause) clause
396 #else /* MACH_CLUSTER_STATS */
397 #define CLUSTER_STAT(clause)
398 #endif /* MACH_CLUSTER_STATS */
401 * Routine: vm_pageout_object_terminate
403 * Destroy the pageout_object allocated by
404 * vm_pageout_object_allocate(), and perform all of the
405 * required cleanup actions.
408 * The object must be locked, and will be returned locked.
411 vm_pageout_object_terminate(
414 vm_object_t shadow_object
;
415 boolean_t shadow_internal
;
418 * Deal with the deallocation (last reference) of a pageout object
419 * (used for cleaning-in-place) by dropping the paging references/
420 * freeing pages in the original object.
423 assert(object
->pageout
);
424 shadow_object
= object
->shadow
;
425 vm_object_lock(shadow_object
);
426 shadow_internal
= shadow_object
->internal
;
428 while (!queue_empty(&object
->memq
)) {
430 vm_object_offset_t offset
;
432 p
= (vm_page_t
) queue_first(&object
->memq
);
437 assert(!p
->cleaning
);
443 m
= vm_page_lookup(shadow_object
,
444 offset
+ object
->shadow_offset
);
446 if(m
== VM_PAGE_NULL
)
449 /* used as a trigger on upl_commit etc to recognize the */
450 /* pageout daemon's subseqent desire to pageout a cleaning */
451 /* page. When the bit is on the upl commit code will */
452 /* respect the pageout bit in the target page over the */
453 /* caller's page list indication */
454 m
->dump_cleaning
= FALSE
;
457 * Account for the paging reference taken when
458 * m->cleaning was set on this page.
460 vm_object_paging_end(shadow_object
);
461 assert((m
->dirty
) || (m
->precious
) ||
462 (m
->busy
&& m
->cleaning
));
465 * Handle the trusted pager throttle.
466 * Also decrement the burst throttle (if external).
468 vm_page_lock_queues();
470 vm_pageout_throttle_up(m
);
474 * Handle the "target" page(s). These pages are to be freed if
475 * successfully cleaned. Target pages are always busy, and are
476 * wired exactly once. The initial target pages are not mapped,
477 * (so cannot be referenced or modified) but converted target
478 * pages may have been modified between the selection as an
479 * adjacent page and conversion to a target.
483 assert(m
->wire_count
== 1);
486 #if MACH_CLUSTER_STATS
487 if (m
->wanted
) vm_pageout_target_collisions
++;
490 * Revoke all access to the page. Since the object is
491 * locked, and the page is busy, this prevents the page
492 * from being dirtied after the pmap_disconnect() call
495 * Since the page is left "dirty" but "not modifed", we
496 * can detect whether the page was redirtied during
497 * pageout by checking the modify state.
499 if (pmap_disconnect(m
->phys_page
) & VM_MEM_MODIFIED
)
505 CLUSTER_STAT(vm_pageout_target_page_dirtied
++;)
506 vm_page_unwire(m
);/* reactivates */
507 VM_STAT(reactivations
++);
510 CLUSTER_STAT(vm_pageout_target_page_freed
++;)
511 vm_page_free(m
);/* clears busy, etc. */
513 vm_page_unlock_queues();
517 * Handle the "adjacent" pages. These pages were cleaned in
518 * place, and should be left alone.
519 * If prep_pin_count is nonzero, then someone is using the
520 * page, so make it active.
522 if (!m
->active
&& !m
->inactive
&& !m
->private) {
526 vm_page_deactivate(m
);
528 if((m
->busy
) && (m
->cleaning
)) {
530 /* the request_page_list case, (COPY_OUT_FROM FALSE) */
533 /* We do not re-set m->dirty ! */
534 /* The page was busy so no extraneous activity */
535 /* could have occurred. COPY_INTO is a read into the */
536 /* new pages. CLEAN_IN_PLACE does actually write */
537 /* out the pages but handling outside of this code */
538 /* will take care of resetting dirty. We clear the */
539 /* modify however for the Programmed I/O case. */
540 pmap_clear_modify(m
->phys_page
);
543 if(shadow_object
->absent_count
== 1)
544 vm_object_absent_release(shadow_object
);
546 shadow_object
->absent_count
--;
548 m
->overwriting
= FALSE
;
549 } else if (m
->overwriting
) {
550 /* alternate request page list, write to page_list */
551 /* case. Occurs when the original page was wired */
552 /* at the time of the list request */
553 assert(m
->wire_count
!= 0);
554 vm_page_unwire(m
);/* reactivates */
555 m
->overwriting
= FALSE
;
558 * Set the dirty state according to whether or not the page was
559 * modified during the pageout. Note that we purposefully do
560 * NOT call pmap_clear_modify since the page is still mapped.
561 * If the page were to be dirtied between the 2 calls, this
562 * this fact would be lost. This code is only necessary to
563 * maintain statistics, since the pmap module is always
564 * consulted if m->dirty is false.
566 #if MACH_CLUSTER_STATS
567 m
->dirty
= pmap_is_modified(m
->phys_page
);
569 if (m
->dirty
) vm_pageout_cluster_dirtied
++;
570 else vm_pageout_cluster_cleaned
++;
571 if (m
->wanted
) vm_pageout_cluster_collisions
++;
579 * Wakeup any thread waiting for the page to be un-cleaning.
582 vm_page_unlock_queues();
585 * Account for the paging reference taken in vm_paging_object_allocate.
587 vm_object_paging_end(shadow_object
);
588 vm_object_unlock(shadow_object
);
590 assert(object
->ref_count
== 0);
591 assert(object
->paging_in_progress
== 0);
592 assert(object
->resident_page_count
== 0);
597 * Routine: vm_pageout_setup
599 * Set up a page for pageout (clean & flush).
601 * Move the page to a new object, as part of which it will be
602 * sent to its memory manager in a memory_object_data_write or
603 * memory_object_initialize message.
605 * The "new_object" and "new_offset" arguments
606 * indicate where the page should be moved.
609 * The page in question must not be on any pageout queues,
610 * and must be busy. The object to which it belongs
611 * must be unlocked, and the caller must hold a paging
612 * reference to it. The new_object must not be locked.
614 * This routine returns a pointer to a place-holder page,
615 * inserted at the same offset, to block out-of-order
616 * requests for the page. The place-holder page must
617 * be freed after the data_write or initialize message
620 * The original page is put on a paging queue and marked
625 register vm_page_t m
,
626 register vm_object_t new_object
,
627 vm_object_offset_t new_offset
)
629 register vm_object_t old_object
= m
->object
;
630 vm_object_offset_t paging_offset
;
631 vm_object_offset_t offset
;
632 register vm_page_t holding_page
;
633 register vm_page_t new_m
;
634 boolean_t need_to_wire
= FALSE
;
638 "vm_pageout_setup, obj 0x%X off 0x%X page 0x%X new obj 0x%X offset 0x%X\n",
639 (integer_t
)m
->object
, (integer_t
)m
->offset
,
640 (integer_t
)m
, (integer_t
)new_object
,
641 (integer_t
)new_offset
);
642 assert(m
&& m
->busy
&& !m
->absent
&& !m
->fictitious
&& !m
->error
&&
645 assert(m
->dirty
|| m
->precious
);
648 * Create a place-holder page where the old one was, to prevent
649 * attempted pageins of this page while we're unlocked.
651 VM_PAGE_GRAB_FICTITIOUS(holding_page
);
653 vm_object_lock(old_object
);
656 paging_offset
= offset
+ old_object
->paging_offset
;
658 if (old_object
->pager_trusted
) {
660 * This pager is trusted, so we can clean this page
661 * in place. Leave it in the old object, and mark it
662 * cleaning & pageout.
664 new_m
= holding_page
;
665 holding_page
= VM_PAGE_NULL
;
668 * Set up new page to be private shadow of real page.
670 new_m
->phys_page
= m
->phys_page
;
671 new_m
->fictitious
= FALSE
;
672 new_m
->pageout
= TRUE
;
675 * Mark real page as cleaning (indicating that we hold a
676 * paging reference to be released via m_o_d_r_c) and
677 * pageout (indicating that the page should be freed
678 * when the pageout completes).
680 pmap_clear_modify(m
->phys_page
);
681 vm_page_lock_queues();
682 new_m
->private = TRUE
;
688 assert(m
->wire_count
== 1);
689 vm_page_unlock_queues();
693 m
->page_lock
= VM_PROT_NONE
;
695 m
->unlock_request
= VM_PROT_NONE
;
698 * Cannot clean in place, so rip the old page out of the
699 * object, and stick the holding page in. Set new_m to the
700 * page in the new object.
702 vm_page_lock_queues();
703 VM_PAGE_QUEUES_REMOVE(m
);
706 vm_page_insert(holding_page
, old_object
, offset
);
707 vm_page_unlock_queues();
712 new_m
->page_lock
= VM_PROT_NONE
;
713 new_m
->unlock_request
= VM_PROT_NONE
;
715 if (old_object
->internal
)
719 * Record that this page has been written out
722 vm_external_state_set(old_object
->existence_map
, offset
);
723 #endif /* MACH_PAGEMAP */
725 vm_object_unlock(old_object
);
727 vm_object_lock(new_object
);
730 * Put the page into the new object. If it is a not wired
731 * (if it's the real page) it will be activated.
734 vm_page_lock_queues();
735 vm_page_insert(new_m
, new_object
, new_offset
);
739 vm_page_activate(new_m
);
740 PAGE_WAKEUP_DONE(new_m
);
741 vm_page_unlock_queues();
743 vm_object_unlock(new_object
);
746 * Return the placeholder page to simplify cleanup.
748 return (holding_page
);
752 * Routine: vm_pageclean_setup
754 * Purpose: setup a page to be cleaned (made non-dirty), but not
755 * necessarily flushed from the VM page cache.
756 * This is accomplished by cleaning in place.
758 * The page must not be busy, and the object and page
759 * queues must be locked.
766 vm_object_t new_object
,
767 vm_object_offset_t new_offset
)
769 vm_object_t old_object
= m
->object
;
771 assert(!m
->cleaning
);
774 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
775 (integer_t
)old_object
, m
->offset
, (integer_t
)m
,
776 (integer_t
)new_m
, new_offset
);
778 pmap_clear_modify(m
->phys_page
);
779 vm_object_paging_begin(old_object
);
782 * Record that this page has been written out
785 vm_external_state_set(old_object
->existence_map
, m
->offset
);
786 #endif /*MACH_PAGEMAP*/
789 * Mark original page as cleaning in place.
796 * Convert the fictitious page to a private shadow of
799 assert(new_m
->fictitious
);
800 new_m
->fictitious
= FALSE
;
801 new_m
->private = TRUE
;
802 new_m
->pageout
= TRUE
;
803 new_m
->phys_page
= m
->phys_page
;
806 vm_page_insert(new_m
, new_object
, new_offset
);
807 assert(!new_m
->wanted
);
815 vm_object_t new_object
,
816 vm_object_offset_t new_offset
)
819 "vm_pageclean_copy, page 0x%X new_m 0x%X new_obj 0x%X offset 0x%X\n",
820 m
, new_m
, new_object
, new_offset
, 0);
822 assert((!m
->busy
) && (!m
->cleaning
));
824 assert(!new_m
->private && !new_m
->fictitious
);
826 pmap_clear_modify(m
->phys_page
);
829 vm_object_paging_begin(m
->object
);
830 vm_page_unlock_queues();
831 vm_object_unlock(m
->object
);
834 * Copy the original page to the new page.
836 vm_page_copy(m
, new_m
);
839 * Mark the old page as clean. A request to pmap_is_modified
840 * will get the right answer.
842 vm_object_lock(m
->object
);
845 vm_object_paging_end(m
->object
);
847 vm_page_lock_queues();
848 if (!m
->active
&& !m
->inactive
)
852 vm_page_insert(new_m
, new_object
, new_offset
);
853 vm_page_activate(new_m
);
854 new_m
->busy
= FALSE
; /* No other thread can be waiting */
859 * Routine: vm_pageout_initialize_page
861 * Causes the specified page to be initialized in
862 * the appropriate memory object. This routine is used to push
863 * pages into a copy-object when they are modified in the
866 * The page is moved to a temporary object and paged out.
869 * The page in question must not be on any pageout queues.
870 * The object to which it belongs must be locked.
871 * The page must be busy, but not hold a paging reference.
874 * Move this page to a completely new object.
877 vm_pageout_initialize_page(
881 vm_object_offset_t paging_offset
;
882 vm_page_t holding_page
;
886 "vm_pageout_initialize_page, page 0x%X\n",
887 (integer_t
)m
, 0, 0, 0, 0);
891 * Verify that we really want to clean this page
898 * Create a paging reference to let us play with the object.
901 paging_offset
= m
->offset
+ object
->paging_offset
;
902 vm_object_paging_begin(object
);
903 if (m
->absent
|| m
->error
|| m
->restart
||
904 (!m
->dirty
&& !m
->precious
)) {
906 panic("reservation without pageout?"); /* alan */
907 vm_object_unlock(object
);
911 /* set the page for future call to vm_fault_list_request */
913 vm_page_lock_queues();
914 pmap_clear_modify(m
->phys_page
);
917 m
->list_req_pending
= TRUE
;
921 vm_page_unlock_queues();
922 vm_object_unlock(object
);
925 * Write the data to its pager.
926 * Note that the data is passed by naming the new object,
927 * not a virtual address; the pager interface has been
928 * manipulated to use the "internal memory" data type.
929 * [The object reference from its allocation is donated
930 * to the eventual recipient.]
932 memory_object_data_initialize(object
->pager
,
936 vm_object_lock(object
);
939 #if MACH_CLUSTER_STATS
940 #define MAXCLUSTERPAGES 16
942 unsigned long pages_in_cluster
;
943 unsigned long pages_at_higher_offsets
;
944 unsigned long pages_at_lower_offsets
;
945 } cluster_stats
[MAXCLUSTERPAGES
];
946 #endif /* MACH_CLUSTER_STATS */
948 boolean_t allow_clustered_pageouts
= FALSE
;
951 * vm_pageout_cluster:
953 * Given a page, queue it to the appropriate I/O thread,
954 * which will page it out and attempt to clean adjacent pages
955 * in the same operation.
957 * The page must be busy, and the object and queues locked. We will take a
958 * paging reference to prevent deallocation or collapse when we
959 * release the object lock back at the call site. The I/O thread
960 * is responsible for consuming this reference
962 * The page must not be on any pageout queue.
966 vm_pageout_cluster(vm_page_t m
)
968 vm_object_t object
= m
->object
;
969 struct vm_pageout_queue
*q
;
973 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
974 (integer_t
)object
, m
->offset
, (integer_t
)m
, 0, 0);
977 * Only a certain kind of page is appreciated here.
979 assert(m
->busy
&& (m
->dirty
|| m
->precious
) && (m
->wire_count
== 0));
980 assert(!m
->cleaning
&& !m
->pageout
&& !m
->inactive
&& !m
->active
);
983 * protect the object from collapse -
984 * locking in the object's paging_offset.
986 vm_object_paging_begin(object
);
989 * set the page for future call to vm_fault_list_request
990 * page should already be marked busy
993 m
->list_req_pending
= TRUE
;
998 if (object
->internal
== TRUE
)
999 q
= &vm_pageout_queue_internal
;
1001 q
= &vm_pageout_queue_external
;
1004 m
->pageout_queue
= TRUE
;
1005 queue_enter(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
1007 if (q
->pgo_idle
== TRUE
) {
1008 q
->pgo_idle
= FALSE
;
1009 thread_wakeup((event_t
) &q
->pgo_pending
);
1014 unsigned long vm_pageout_throttle_up_count
= 0;
1017 * A page is back from laundry. See if there are some pages waiting to
1018 * go to laundry and if we can let some of them go now.
1020 * Object and page queues must be locked.
1023 vm_pageout_throttle_up(
1026 struct vm_pageout_queue
*q
;
1028 vm_pageout_throttle_up_count
++;
1031 assert(m
->object
!= VM_OBJECT_NULL
);
1032 assert(m
->object
!= kernel_object
);
1034 if (m
->object
->internal
== TRUE
)
1035 q
= &vm_pageout_queue_internal
;
1037 q
= &vm_pageout_queue_external
;
1042 if (q
->pgo_throttled
== TRUE
) {
1043 q
->pgo_throttled
= FALSE
;
1044 thread_wakeup((event_t
) &q
->pgo_laundry
);
1050 * vm_pageout_scan does the dirty work for the pageout daemon.
1051 * It returns with vm_page_queue_free_lock held and
1052 * vm_page_free_wanted == 0.
1055 #define DELAYED_UNLOCK_LIMIT (3 * MAX_UPL_TRANSFER)
1058 #define FCS_DELAYED 1
1059 #define FCS_DEADLOCK_DETECTED 2
1061 struct flow_control
{
1067 vm_pageout_scan(void)
1069 unsigned int loop_count
= 0;
1070 unsigned int inactive_burst_count
= 0;
1071 unsigned int active_burst_count
= 0;
1072 vm_page_t local_freeq
= 0;
1073 int local_freed
= 0;
1074 int delayed_unlock
= 0;
1075 int need_internal_inactive
= 0;
1076 int refmod_state
= 0;
1077 int vm_pageout_deadlock_target
= 0;
1078 struct vm_pageout_queue
*iq
;
1079 struct vm_pageout_queue
*eq
;
1080 struct flow_control flow_control
;
1081 boolean_t active_throttled
= FALSE
;
1082 boolean_t inactive_throttled
= FALSE
;
1084 unsigned int msecs
= 0;
1088 flow_control
.state
= FCS_IDLE
;
1089 iq
= &vm_pageout_queue_internal
;
1090 eq
= &vm_pageout_queue_external
;
1092 XPR(XPR_VM_PAGEOUT
, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1095 * We want to gradually dribble pages from the active queue
1096 * to the inactive queue. If we let the inactive queue get
1097 * very small, and then suddenly dump many pages into it,
1098 * those pages won't get a sufficient chance to be referenced
1099 * before we start taking them from the inactive queue.
1101 * We must limit the rate at which we send pages to the pagers.
1102 * data_write messages consume memory, for message buffers and
1103 * for map-copy objects. If we get too far ahead of the pagers,
1104 * we can potentially run out of memory.
1106 * We can use the laundry count to limit directly the number
1107 * of pages outstanding to the default pager. A similar
1108 * strategy for external pagers doesn't work, because
1109 * external pagers don't have to deallocate the pages sent them,
1110 * and because we might have to send pages to external pagers
1111 * even if they aren't processing writes. So we also
1112 * use a burst count to limit writes to external pagers.
1114 * When memory is very tight, we can't rely on external pagers to
1115 * clean pages. They probably aren't running, because they
1116 * aren't vm-privileged. If we kept sending dirty pages to them,
1117 * we could exhaust the free list.
1119 vm_page_lock_queues();
1125 * Recalculate vm_page_inactivate_target.
1127 vm_page_inactive_target
= VM_PAGE_INACTIVE_TARGET(vm_page_active_count
+
1128 vm_page_inactive_count
);
1134 if (delayed_unlock
== 0)
1135 vm_page_lock_queues();
1137 active_burst_count
= vm_page_active_count
;
1139 if (active_burst_count
> vm_pageout_burst_active_throttle
)
1140 active_burst_count
= vm_pageout_burst_active_throttle
;
1143 * Move pages from active to inactive.
1145 while ((need_internal_inactive
||
1146 vm_page_inactive_count
< vm_page_inactive_target
) &&
1147 !queue_empty(&vm_page_queue_active
) &&
1148 ((active_burst_count
--) > 0)) {
1150 vm_pageout_active
++;
1152 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
1154 assert(m
->active
&& !m
->inactive
);
1155 assert(!m
->laundry
);
1156 assert(m
->object
!= kernel_object
);
1159 * Try to lock object; since we've already got the
1160 * page queues lock, we can only 'try' for this one.
1161 * if the 'try' fails, we need to do a mutex_pause
1162 * to allow the owner of the object lock a chance to
1163 * run... otherwise, we're likely to trip over this
1164 * object in the same state as we work our way through
1165 * the queue... clumps of pages associated with the same
1166 * object are fairly typical on the inactive and active queues
1168 if (m
->object
!= object
) {
1169 if (object
!= NULL
) {
1170 vm_object_unlock(object
);
1173 if (!vm_object_lock_try(m
->object
)) {
1175 * move page to end of active queue and continue
1177 queue_remove(&vm_page_queue_active
, m
,
1179 queue_enter(&vm_page_queue_active
, m
,
1182 goto done_with_activepage
;
1187 * if the page is BUSY, then we pull it
1188 * off the active queue and leave it alone.
1189 * when BUSY is cleared, it will get stuck
1190 * back on the appropriate queue
1193 queue_remove(&vm_page_queue_active
, m
,
1195 m
->pageq
.next
= NULL
;
1196 m
->pageq
.prev
= NULL
;
1199 vm_page_active_count
--;
1202 goto done_with_activepage
;
1204 if (need_internal_inactive
) {
1206 * If we're unable to make forward progress
1207 * with the current set of pages on the
1208 * inactive queue due to busy objects or
1209 * throttled pageout queues, then
1210 * move a page that is already clean
1211 * or belongs to a pageout queue that
1212 * isn't currently throttled
1214 active_throttled
= FALSE
;
1216 if (object
->internal
) {
1217 if ((VM_PAGE_Q_THROTTLED(iq
) || !IP_VALID(memory_manager_default
)))
1218 active_throttled
= TRUE
;
1219 } else if (VM_PAGE_Q_THROTTLED(eq
)) {
1220 active_throttled
= TRUE
;
1222 if (active_throttled
== TRUE
) {
1224 refmod_state
= pmap_get_refmod(m
->phys_page
);
1226 if (refmod_state
& VM_MEM_REFERENCED
)
1227 m
->reference
= TRUE
;
1228 if (refmod_state
& VM_MEM_MODIFIED
)
1231 if (m
->dirty
|| m
->precious
) {
1233 * page is dirty and targets a THROTTLED queue
1234 * so all we can do is move it back to the
1235 * end of the active queue to get it out
1238 queue_remove(&vm_page_queue_active
, m
,
1240 queue_enter(&vm_page_queue_active
, m
,
1243 vm_pageout_scan_active_throttled
++;
1245 goto done_with_activepage
;
1248 vm_pageout_scan_active_throttle_success
++;
1249 need_internal_inactive
--;
1252 * Deactivate the page while holding the object
1253 * locked, so we know the page is still not busy.
1254 * This should prevent races between pmap_enter
1255 * and pmap_clear_reference. The page might be
1256 * absent or fictitious, but vm_page_deactivate
1259 vm_page_deactivate(m
);
1260 done_with_activepage
:
1261 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
1263 if (object
!= NULL
) {
1264 vm_object_unlock(object
);
1268 vm_page_free_list(local_freeq
);
1274 vm_page_unlock_queues();
1277 vm_page_lock_queues();
1279 * continue the while loop processing
1280 * the active queue... need to hold
1281 * the page queues lock
1289 /**********************************************************************
1290 * above this point we're playing with the active queue
1291 * below this point we're playing with the throttling mechanisms
1292 * and the inactive queue
1293 **********************************************************************/
1298 * We are done if we have met our target *and*
1299 * nobody is still waiting for a page.
1301 if (vm_page_free_count
+ local_freed
>= vm_page_free_target
) {
1302 if (object
!= NULL
) {
1303 vm_object_unlock(object
);
1307 vm_page_free_list(local_freeq
);
1312 mutex_lock(&vm_page_queue_free_lock
);
1314 if ((vm_page_free_count
>= vm_page_free_target
) &&
1315 (vm_page_free_wanted
== 0)) {
1317 vm_page_unlock_queues();
1319 thread_wakeup((event_t
) &vm_pageout_garbage_collect
);
1322 mutex_unlock(&vm_page_queue_free_lock
);
1327 * Sometimes we have to pause:
1328 * 1) No inactive pages - nothing to do.
1329 * 2) Flow control - default pageout queue is full
1330 * 3) Loop control - no acceptable pages found on the inactive queue
1331 * within the last vm_pageout_burst_inactive_throttle iterations
1333 if ((queue_empty(&vm_page_queue_inactive
) && queue_empty(&vm_page_queue_zf
))) {
1334 vm_pageout_scan_empty_throttle
++;
1335 msecs
= vm_pageout_empty_wait
;
1336 goto vm_pageout_scan_delay
;
1338 } else if (inactive_burst_count
>= vm_pageout_burst_inactive_throttle
) {
1339 vm_pageout_scan_burst_throttle
++;
1340 msecs
= vm_pageout_burst_wait
;
1341 goto vm_pageout_scan_delay
;
1343 } else if (VM_PAGE_Q_THROTTLED(iq
)) {
1345 switch (flow_control
.state
) {
1348 reset_deadlock_timer
:
1349 ts
.tv_sec
= vm_pageout_deadlock_wait
/ 1000;
1350 ts
.tv_nsec
= (vm_pageout_deadlock_wait
% 1000) * 1000 * NSEC_PER_USEC
;
1351 clock_get_system_nanotime(
1352 &flow_control
.ts
.tv_sec
,
1353 (uint32_t *) &flow_control
.ts
.tv_nsec
);
1354 ADD_MACH_TIMESPEC(&flow_control
.ts
, &ts
);
1356 flow_control
.state
= FCS_DELAYED
;
1357 msecs
= vm_pageout_deadlock_wait
;
1362 clock_get_system_nanotime(
1364 (uint32_t *) &ts
.tv_nsec
);
1366 if (CMP_MACH_TIMESPEC(&ts
, &flow_control
.ts
) >= 0) {
1368 * the pageout thread for the default pager is potentially
1369 * deadlocked since the
1370 * default pager queue has been throttled for more than the
1371 * allowable time... we need to move some clean pages or dirty
1372 * pages belonging to the external pagers if they aren't throttled
1373 * vm_page_free_wanted represents the number of threads currently
1374 * blocked waiting for pages... we'll move one page for each of
1375 * these plus a fixed amount to break the logjam... once we're done
1376 * moving this number of pages, we'll re-enter the FSC_DELAYED state
1377 * with a new timeout target since we have no way of knowing
1378 * whether we've broken the deadlock except through observation
1379 * of the queue associated with the default pager... we need to
1380 * stop moving pagings and allow the system to run to see what
1381 * state it settles into.
1383 vm_pageout_deadlock_target
= vm_pageout_deadlock_relief
+ vm_page_free_wanted
;
1384 vm_pageout_scan_deadlock_detected
++;
1385 flow_control
.state
= FCS_DEADLOCK_DETECTED
;
1387 thread_wakeup((event_t
) &vm_pageout_garbage_collect
);
1388 goto consider_inactive
;
1391 * just resniff instead of trying
1392 * to compute a new delay time... we're going to be
1393 * awakened immediately upon a laundry completion,
1394 * so we won't wait any longer than necessary
1396 msecs
= vm_pageout_idle_wait
;
1399 case FCS_DEADLOCK_DETECTED
:
1400 if (vm_pageout_deadlock_target
)
1401 goto consider_inactive
;
1402 goto reset_deadlock_timer
;
1405 vm_pageout_scan_throttle
++;
1406 iq
->pgo_throttled
= TRUE
;
1407 vm_pageout_scan_delay
:
1408 if (object
!= NULL
) {
1409 vm_object_unlock(object
);
1413 vm_page_free_list(local_freeq
);
1418 assert_wait_timeout((event_t
) &iq
->pgo_laundry
, THREAD_INTERRUPTIBLE
, msecs
, 1000*NSEC_PER_USEC
);
1420 counter(c_vm_pageout_scan_block
++);
1422 vm_page_unlock_queues();
1424 thread_block(THREAD_CONTINUE_NULL
);
1426 vm_page_lock_queues();
1429 iq
->pgo_throttled
= FALSE
;
1431 if (loop_count
>= vm_page_inactive_count
) {
1432 if (VM_PAGE_Q_THROTTLED(eq
) || VM_PAGE_Q_THROTTLED(iq
)) {
1434 * Make sure we move enough "appropriate"
1435 * pages to the inactive queue before trying
1438 need_internal_inactive
= vm_pageout_inactive_relief
;
1442 inactive_burst_count
= 0;
1449 flow_control
.state
= FCS_IDLE
;
1452 inactive_burst_count
++;
1453 vm_pageout_inactive
++;
1455 if (!queue_empty(&vm_page_queue_inactive
)) {
1456 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
1458 if (m
->clustered
&& (m
->no_isync
== TRUE
)) {
1462 if (vm_zf_count
< vm_accellerate_zf_pageout_trigger
) {
1466 if((vm_zf_iterator
+=1) >= vm_zf_iterator_count
) {
1470 if (queue_empty(&vm_page_queue_zf
) ||
1471 (((last_page_zf
) || (vm_zf_iterator
== 0)) &&
1472 !queue_empty(&vm_page_queue_inactive
))) {
1473 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
1476 m
= (vm_page_t
) queue_first(&vm_page_queue_zf
);
1480 assert(!m
->active
&& m
->inactive
);
1481 assert(!m
->laundry
);
1482 assert(m
->object
!= kernel_object
);
1485 * Try to lock object; since we've alread got the
1486 * page queues lock, we can only 'try' for this one.
1487 * if the 'try' fails, we need to do a mutex_pause
1488 * to allow the owner of the object lock a chance to
1489 * run... otherwise, we're likely to trip over this
1490 * object in the same state as we work our way through
1491 * the queue... clumps of pages associated with the same
1492 * object are fairly typical on the inactive and active queues
1494 if (m
->object
!= object
) {
1495 if (object
!= NULL
) {
1496 vm_object_unlock(object
);
1499 if (!vm_object_lock_try(m
->object
)) {
1501 * Move page to end and continue.
1502 * Don't re-issue ticket
1505 queue_remove(&vm_page_queue_zf
, m
,
1507 queue_enter(&vm_page_queue_zf
, m
,
1510 queue_remove(&vm_page_queue_inactive
, m
,
1512 queue_enter(&vm_page_queue_inactive
, m
,
1515 vm_pageout_inactive_nolock
++;
1518 * force us to dump any collected free pages
1519 * and to pause before moving on
1521 delayed_unlock
= DELAYED_UNLOCK_LIMIT
+ 1;
1523 goto done_with_inactivepage
;
1528 * If the page belongs to a purgable object with no pending copies
1529 * against it, then we reap all of the pages in the object
1530 * and note that the object has been "emptied". It'll be up to the
1531 * application the discover this and recreate its contents if desired.
1533 if ((object
->purgable
== VM_OBJECT_PURGABLE_VOLATILE
||
1534 object
->purgable
== VM_OBJECT_PURGABLE_EMPTY
) &&
1535 object
->copy
== VM_OBJECT_NULL
) {
1537 (void) vm_object_purge(object
);
1538 vm_pageout_purged_objects
++;
1540 * we've just taken all of the pages from this object,
1541 * so drop the lock now since we're not going to find
1542 * any more pages belonging to it anytime soon
1544 vm_object_unlock(object
);
1547 inactive_burst_count
= 0;
1549 goto done_with_inactivepage
;
1553 * Paging out pages of external objects which
1554 * are currently being created must be avoided.
1555 * The pager may claim for memory, thus leading to a
1556 * possible dead lock between it and the pageout thread,
1557 * if such pages are finally chosen. The remaining assumption
1558 * is that there will finally be enough available pages in the
1559 * inactive pool to page out in order to satisfy all memory
1560 * claimed by the thread which concurrently creates the pager.
1562 if (!object
->pager_initialized
&& object
->pager_created
) {
1564 * Move page to end and continue, hoping that
1565 * there will be enough other inactive pages to
1566 * page out so that the thread which currently
1567 * initializes the pager will succeed.
1568 * Don't re-grant the ticket, the page should
1569 * pulled from the queue and paged out whenever
1570 * one of its logically adjacent fellows is
1574 queue_remove(&vm_page_queue_zf
, m
,
1576 queue_enter(&vm_page_queue_zf
, m
,
1579 vm_zf_iterator
= vm_zf_iterator_count
- 1;
1581 queue_remove(&vm_page_queue_inactive
, m
,
1583 queue_enter(&vm_page_queue_inactive
, m
,
1588 vm_pageout_inactive_avoid
++;
1590 goto done_with_inactivepage
;
1593 * Remove the page from the inactive list.
1596 queue_remove(&vm_page_queue_zf
, m
, vm_page_t
, pageq
);
1598 queue_remove(&vm_page_queue_inactive
, m
, vm_page_t
, pageq
);
1600 m
->pageq
.next
= NULL
;
1601 m
->pageq
.prev
= NULL
;
1602 m
->inactive
= FALSE
;
1604 vm_page_inactive_count
--;
1606 if (m
->busy
|| !object
->alive
) {
1608 * Somebody is already playing with this page.
1609 * Leave it off the pageout queues.
1611 vm_pageout_inactive_busy
++;
1613 goto done_with_inactivepage
;
1617 * If it's absent or in error, we can reclaim the page.
1620 if (m
->absent
|| m
->error
) {
1621 vm_pageout_inactive_absent
++;
1623 if (vm_pageout_deadlock_target
) {
1624 vm_pageout_scan_inactive_throttle_success
++;
1625 vm_pageout_deadlock_target
--;
1628 vm_page_remove(m
); /* clears tabled, object, offset */
1630 vm_object_absent_release(object
);
1632 assert(m
->pageq
.next
== NULL
&&
1633 m
->pageq
.prev
== NULL
);
1634 m
->pageq
.next
= (queue_entry_t
)local_freeq
;
1638 inactive_burst_count
= 0;
1640 goto done_with_inactivepage
;
1643 assert(!m
->private);
1644 assert(!m
->fictitious
);
1647 * If already cleaning this page in place, convert from
1648 * "adjacent" to "target". We can leave the page mapped,
1649 * and vm_pageout_object_terminate will determine whether
1650 * to free or reactivate.
1656 m
->dump_cleaning
= TRUE
;
1659 CLUSTER_STAT(vm_pageout_cluster_conversions
++);
1661 inactive_burst_count
= 0;
1663 goto done_with_inactivepage
;
1667 * If it's being used, reactivate.
1668 * (Fictitious pages are either busy or absent.)
1670 if ( (!m
->reference
) ) {
1671 refmod_state
= pmap_get_refmod(m
->phys_page
);
1673 if (refmod_state
& VM_MEM_REFERENCED
)
1674 m
->reference
= TRUE
;
1675 if (refmod_state
& VM_MEM_MODIFIED
)
1680 vm_page_activate(m
);
1681 VM_STAT(reactivations
++);
1683 vm_pageout_inactive_used
++;
1685 inactive_burst_count
= 0;
1687 goto done_with_inactivepage
;
1691 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
1692 (integer_t
)object
, (integer_t
)m
->offset
, (integer_t
)m
, 0,0);
1695 * we've got a candidate page to steal...
1697 * m->dirty is up to date courtesy of the
1698 * preceding check for m->reference... if
1699 * we get here, then m->reference had to be
1700 * FALSE which means we did a pmap_get_refmod
1701 * and updated both m->reference and m->dirty
1703 * if it's dirty or precious we need to
1704 * see if the target queue is throtttled
1705 * it if is, we need to skip over it by moving it back
1706 * to the end of the inactive queue
1708 inactive_throttled
= FALSE
;
1710 if (m
->dirty
|| m
->precious
) {
1711 if (object
->internal
) {
1712 if ((VM_PAGE_Q_THROTTLED(iq
) || !IP_VALID(memory_manager_default
)))
1713 inactive_throttled
= TRUE
;
1714 } else if (VM_PAGE_Q_THROTTLED(eq
)) {
1715 inactive_throttled
= TRUE
;
1718 if (inactive_throttled
== TRUE
) {
1720 queue_enter(&vm_page_queue_zf
, m
,
1723 queue_enter(&vm_page_queue_inactive
, m
,
1727 vm_page_inactive_count
++;
1730 vm_pageout_scan_inactive_throttled
++;
1732 goto done_with_inactivepage
;
1735 * we've got a page that we can steal...
1736 * eliminate all mappings and make sure
1737 * we have the up-to-date modified state
1738 * first take the page BUSY, so that no new
1739 * mappings can be made
1744 * if we need to do a pmap_disconnect then we
1745 * need to re-evaluate m->dirty since the pmap_disconnect
1746 * provides the true state atomically... the
1747 * page was still mapped up to the pmap_disconnect
1748 * and may have been dirtied at the last microsecond
1750 * we also check for the page being referenced 'late'
1751 * if it was, we first need to do a WAKEUP_DONE on it
1752 * since we already set m->busy = TRUE, before
1753 * going off to reactivate it
1755 * if we don't need the pmap_disconnect, then
1756 * m->dirty is up to date courtesy of the
1757 * earlier check for m->reference... if
1758 * we get here, then m->reference had to be
1759 * FALSE which means we did a pmap_get_refmod
1760 * and updated both m->reference and m->dirty...
1762 if (m
->no_isync
== FALSE
) {
1763 refmod_state
= pmap_disconnect(m
->phys_page
);
1765 if (refmod_state
& VM_MEM_MODIFIED
)
1767 if (refmod_state
& VM_MEM_REFERENCED
) {
1768 m
->reference
= TRUE
;
1770 PAGE_WAKEUP_DONE(m
);
1771 goto was_referenced
;
1775 * If it's clean and not precious, we can free the page.
1777 if (!m
->dirty
&& !m
->precious
) {
1778 vm_pageout_inactive_clean
++;
1781 vm_pageout_cluster(m
);
1783 vm_pageout_inactive_dirty
++;
1785 inactive_burst_count
= 0;
1787 done_with_inactivepage
:
1788 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
1790 if (object
!= NULL
) {
1791 vm_object_unlock(object
);
1795 vm_page_free_list(local_freeq
);
1801 vm_page_unlock_queues();
1805 * back to top of pageout scan loop
1811 int vm_page_free_count_init
;
1814 vm_page_free_reserve(
1817 int free_after_reserve
;
1819 vm_page_free_reserved
+= pages
;
1821 free_after_reserve
= vm_page_free_count_init
- vm_page_free_reserved
;
1823 vm_page_free_min
= vm_page_free_reserved
+
1824 VM_PAGE_FREE_MIN(free_after_reserve
);
1826 vm_page_free_target
= vm_page_free_reserved
+
1827 VM_PAGE_FREE_TARGET(free_after_reserve
);
1829 if (vm_page_free_target
< vm_page_free_min
+ 5)
1830 vm_page_free_target
= vm_page_free_min
+ 5;
1834 * vm_pageout is the high level pageout daemon.
1838 vm_pageout_continue(void)
1840 vm_pageout_scan_event_counter
++;
1842 /* we hold vm_page_queue_free_lock now */
1843 assert(vm_page_free_wanted
== 0);
1844 assert_wait((event_t
) &vm_page_free_wanted
, THREAD_UNINT
);
1845 mutex_unlock(&vm_page_queue_free_lock
);
1847 counter(c_vm_pageout_block
++);
1848 thread_block((thread_continue_t
)vm_pageout_continue
);
1854 * must be called with the
1855 * queues and object locks held
1858 vm_pageout_queue_steal(vm_page_t m
)
1860 struct vm_pageout_queue
*q
;
1862 if (m
->object
->internal
== TRUE
)
1863 q
= &vm_pageout_queue_internal
;
1865 q
= &vm_pageout_queue_external
;
1868 m
->pageout_queue
= FALSE
;
1869 queue_remove(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
1871 m
->pageq
.next
= NULL
;
1872 m
->pageq
.prev
= NULL
;
1874 vm_object_paging_end(m
->object
);
1880 #ifdef FAKE_DEADLOCK
1882 #define FAKE_COUNT 5000
1884 int internal_count
= 0;
1885 int fake_deadlock
= 0;
1890 vm_pageout_iothread_continue(struct vm_pageout_queue
*q
)
1894 boolean_t need_wakeup
;
1896 vm_page_lock_queues();
1898 while ( !queue_empty(&q
->pgo_pending
) ) {
1901 queue_remove_first(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
1902 m
->pageout_queue
= FALSE
;
1903 vm_page_unlock_queues();
1905 m
->pageq
.next
= NULL
;
1906 m
->pageq
.prev
= NULL
;
1907 #ifdef FAKE_DEADLOCK
1908 if (q
== &vm_pageout_queue_internal
) {
1914 if ((internal_count
== FAKE_COUNT
)) {
1916 pg_count
= vm_page_free_count
+ vm_page_free_reserved
;
1918 if (kmem_alloc(kernel_map
, &addr
, PAGE_SIZE
* pg_count
) == KERN_SUCCESS
) {
1919 kmem_free(kernel_map
, addr
, PAGE_SIZE
* pg_count
);
1928 if (!object
->pager_initialized
) {
1929 vm_object_lock(object
);
1932 * If there is no memory object for the page, create
1933 * one and hand it to the default pager.
1936 if (!object
->pager_initialized
)
1937 vm_object_collapse(object
,
1938 (vm_object_offset_t
) 0,
1940 if (!object
->pager_initialized
)
1941 vm_object_pager_create(object
);
1942 if (!object
->pager_initialized
) {
1944 * Still no pager for the object.
1945 * Reactivate the page.
1947 * Should only happen if there is no
1950 m
->list_req_pending
= FALSE
;
1951 m
->cleaning
= FALSE
;
1955 vm_pageout_throttle_up(m
);
1957 vm_page_lock_queues();
1958 vm_pageout_dirty_no_pager
++;
1959 vm_page_activate(m
);
1960 vm_page_unlock_queues();
1963 * And we are done with it.
1965 PAGE_WAKEUP_DONE(m
);
1967 vm_object_paging_end(object
);
1968 vm_object_unlock(object
);
1970 vm_page_lock_queues();
1972 } else if (object
->pager
== MEMORY_OBJECT_NULL
) {
1974 * This pager has been destroyed by either
1975 * memory_object_destroy or vm_object_destroy, and
1976 * so there is nowhere for the page to go.
1977 * Just free the page... VM_PAGE_FREE takes
1978 * care of cleaning up all the state...
1979 * including doing the vm_pageout_throttle_up
1983 vm_object_paging_end(object
);
1984 vm_object_unlock(object
);
1986 vm_page_lock_queues();
1989 vm_object_unlock(object
);
1992 * we expect the paging_in_progress reference to have
1993 * already been taken on the object before it was added
1994 * to the appropriate pageout I/O queue... this will
1995 * keep the object from being terminated and/or the
1996 * paging_offset from changing until the I/O has
1997 * completed... therefore no need to lock the object to
1998 * pull the paging_offset from it.
2000 * Send the data to the pager.
2001 * any pageout clustering happens there
2003 memory_object_data_return(object
->pager
,
2004 m
->offset
+ object
->paging_offset
,
2012 vm_object_lock(object
);
2013 vm_object_paging_end(object
);
2014 vm_object_unlock(object
);
2016 vm_page_lock_queues();
2018 assert_wait((event_t
) q
, THREAD_UNINT
);
2021 if (q
->pgo_throttled
== TRUE
&& !VM_PAGE_Q_THROTTLED(q
)) {
2022 q
->pgo_throttled
= FALSE
;
2025 need_wakeup
= FALSE
;
2027 q
->pgo_busy
= FALSE
;
2029 vm_page_unlock_queues();
2031 if (need_wakeup
== TRUE
)
2032 thread_wakeup((event_t
) &q
->pgo_laundry
);
2034 thread_block_parameter((thread_continue_t
)vm_pageout_iothread_continue
, (void *) &q
->pgo_pending
);
2040 vm_pageout_iothread_external(void)
2043 vm_pageout_iothread_continue(&vm_pageout_queue_external
);
2049 vm_pageout_iothread_internal(void)
2051 thread_t self
= current_thread();
2053 self
->options
|= TH_OPT_VMPRIV
;
2055 vm_pageout_iothread_continue(&vm_pageout_queue_internal
);
2060 vm_pageout_garbage_collect(int collect
)
2066 * consider_zone_gc should be last, because the other operations
2067 * might return memory to zones.
2069 consider_machine_collect();
2072 consider_machine_adjust();
2075 assert_wait((event_t
) &vm_pageout_garbage_collect
, THREAD_UNINT
);
2077 thread_block_parameter((thread_continue_t
) vm_pageout_garbage_collect
, (void *)1);
2086 thread_t self
= current_thread();
2088 kern_return_t result
;
2092 * Set thread privileges.
2096 self
->priority
= BASEPRI_PREEMPT
- 1;
2097 set_sched_pri(self
, self
->priority
);
2098 thread_unlock(self
);
2102 * Initialize some paging parameters.
2105 if (vm_pageout_idle_wait
== 0)
2106 vm_pageout_idle_wait
= VM_PAGEOUT_IDLE_WAIT
;
2108 if (vm_pageout_burst_wait
== 0)
2109 vm_pageout_burst_wait
= VM_PAGEOUT_BURST_WAIT
;
2111 if (vm_pageout_empty_wait
== 0)
2112 vm_pageout_empty_wait
= VM_PAGEOUT_EMPTY_WAIT
;
2114 if (vm_pageout_deadlock_wait
== 0)
2115 vm_pageout_deadlock_wait
= VM_PAGEOUT_DEADLOCK_WAIT
;
2117 if (vm_pageout_deadlock_relief
== 0)
2118 vm_pageout_deadlock_relief
= VM_PAGEOUT_DEADLOCK_RELIEF
;
2120 if (vm_pageout_inactive_relief
== 0)
2121 vm_pageout_inactive_relief
= VM_PAGEOUT_INACTIVE_RELIEF
;
2123 if (vm_pageout_burst_active_throttle
== 0)
2124 vm_pageout_burst_active_throttle
= VM_PAGEOUT_BURST_ACTIVE_THROTTLE
;
2126 if (vm_pageout_burst_inactive_throttle
== 0)
2127 vm_pageout_burst_inactive_throttle
= VM_PAGEOUT_BURST_INACTIVE_THROTTLE
;
2130 * Set kernel task to low backing store privileged
2133 task_lock(kernel_task
);
2134 kernel_task
->priv_flags
|= VM_BACKING_STORE_PRIV
;
2135 task_unlock(kernel_task
);
2137 vm_page_free_count_init
= vm_page_free_count
;
2140 * even if we've already called vm_page_free_reserve
2141 * call it again here to insure that the targets are
2142 * accurately calculated (it uses vm_page_free_count_init)
2143 * calling it with an arg of 0 will not change the reserve
2144 * but will re-calculate free_min and free_target
2146 if (vm_page_free_reserved
< VM_PAGE_FREE_RESERVED(processor_count
)) {
2147 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count
)) - vm_page_free_reserved
);
2149 vm_page_free_reserve(0);
2152 queue_init(&vm_pageout_queue_external
.pgo_pending
);
2153 vm_pageout_queue_external
.pgo_maxlaundry
= VM_PAGE_LAUNDRY_MAX
;
2154 vm_pageout_queue_external
.pgo_laundry
= 0;
2155 vm_pageout_queue_external
.pgo_idle
= FALSE
;
2156 vm_pageout_queue_external
.pgo_busy
= FALSE
;
2157 vm_pageout_queue_external
.pgo_throttled
= FALSE
;
2159 queue_init(&vm_pageout_queue_internal
.pgo_pending
);
2160 vm_pageout_queue_internal
.pgo_maxlaundry
= VM_PAGE_LAUNDRY_MAX
;
2161 vm_pageout_queue_internal
.pgo_laundry
= 0;
2162 vm_pageout_queue_internal
.pgo_idle
= FALSE
;
2163 vm_pageout_queue_internal
.pgo_busy
= FALSE
;
2164 vm_pageout_queue_internal
.pgo_throttled
= FALSE
;
2167 result
= kernel_thread_start_priority((thread_continue_t
)vm_pageout_iothread_internal
, NULL
, BASEPRI_PREEMPT
- 1, &thread
);
2168 if (result
!= KERN_SUCCESS
)
2169 panic("vm_pageout_iothread_internal: create failed");
2171 thread_deallocate(thread
);
2174 result
= kernel_thread_start_priority((thread_continue_t
)vm_pageout_iothread_external
, NULL
, BASEPRI_PREEMPT
- 1, &thread
);
2175 if (result
!= KERN_SUCCESS
)
2176 panic("vm_pageout_iothread_external: create failed");
2178 thread_deallocate(thread
);
2181 result
= kernel_thread_start_priority((thread_continue_t
)vm_pageout_garbage_collect
, NULL
, BASEPRI_PREEMPT
- 2, &thread
);
2182 if (result
!= KERN_SUCCESS
)
2183 panic("vm_pageout_garbage_collect: create failed");
2185 thread_deallocate(thread
);
2188 vm_pageout_continue();
2199 int page_field_size
; /* bit field in word size buf */
2201 page_field_size
= 0;
2202 if (flags
& UPL_CREATE_LITE
) {
2203 page_field_size
= ((size
/PAGE_SIZE
) + 7) >> 3;
2204 page_field_size
= (page_field_size
+ 3) & 0xFFFFFFFC;
2206 if(flags
& UPL_CREATE_INTERNAL
) {
2207 upl
= (upl_t
)kalloc(sizeof(struct upl
)
2208 + (sizeof(struct upl_page_info
)*(size
/PAGE_SIZE
))
2211 upl
= (upl_t
)kalloc(sizeof(struct upl
) + page_field_size
);
2214 upl
->src_object
= NULL
;
2215 upl
->kaddr
= (vm_offset_t
)0;
2217 upl
->map_object
= NULL
;
2219 upl
->highest_page
= 0;
2222 upl
->ubc_alias1
= 0;
2223 upl
->ubc_alias2
= 0;
2224 #endif /* UPL_DEBUG */
2232 int page_field_size
; /* bit field in word size buf */
2238 if (upl
->map_object
->pageout
) {
2239 object
= upl
->map_object
->shadow
;
2241 object
= upl
->map_object
;
2243 vm_object_lock(object
);
2244 queue_iterate(&object
->uplq
, upl_ele
, upl_t
, uplq
) {
2245 if(upl_ele
== upl
) {
2246 queue_remove(&object
->uplq
,
2247 upl_ele
, upl_t
, uplq
);
2251 vm_object_unlock(object
);
2253 #endif /* UPL_DEBUG */
2254 /* drop a reference on the map_object whether or */
2255 /* not a pageout object is inserted */
2256 if(upl
->map_object
->pageout
)
2257 vm_object_deallocate(upl
->map_object
);
2259 page_field_size
= 0;
2260 if (upl
->flags
& UPL_LITE
) {
2261 page_field_size
= ((upl
->size
/PAGE_SIZE
) + 7) >> 3;
2262 page_field_size
= (page_field_size
+ 3) & 0xFFFFFFFC;
2264 if(upl
->flags
& UPL_INTERNAL
) {
2266 sizeof(struct upl
) +
2267 (sizeof(struct upl_page_info
) * (upl
->size
/PAGE_SIZE
))
2270 kfree(upl
, sizeof(struct upl
) + page_field_size
);
2274 void uc_upl_dealloc(upl_t upl
);
2275 __private_extern__
void
2279 upl
->ref_count
-= 1;
2280 if(upl
->ref_count
== 0) {
2290 upl
->ref_count
-= 1;
2291 if(upl
->ref_count
== 0) {
2297 * Statistics about UPL enforcement of copy-on-write obligations.
2299 unsigned long upl_cow
= 0;
2300 unsigned long upl_cow_again
= 0;
2301 unsigned long upl_cow_contiguous
= 0;
2302 unsigned long upl_cow_pages
= 0;
2303 unsigned long upl_cow_again_pages
= 0;
2304 unsigned long upl_cow_contiguous_pages
= 0;
2307 * Routine: vm_object_upl_request
2309 * Cause the population of a portion of a vm_object.
2310 * Depending on the nature of the request, the pages
2311 * returned may be contain valid data or be uninitialized.
2312 * A page list structure, listing the physical pages
2313 * will be returned upon request.
2314 * This function is called by the file system or any other
2315 * supplier of backing store to a pager.
2316 * IMPORTANT NOTE: The caller must still respect the relationship
2317 * between the vm_object and its backing memory object. The
2318 * caller MUST NOT substitute changes in the backing file
2319 * without first doing a memory_object_lock_request on the
2320 * target range unless it is know that the pages are not
2321 * shared with another entity at the pager level.
2323 * if a page list structure is present
2324 * return the mapped physical pages, where a
2325 * page is not present, return a non-initialized
2326 * one. If the no_sync bit is turned on, don't
2327 * call the pager unlock to synchronize with other
2328 * possible copies of the page. Leave pages busy
2329 * in the original object, if a page list structure
2330 * was specified. When a commit of the page list
2331 * pages is done, the dirty bit will be set for each one.
2333 * If a page list structure is present, return
2334 * all mapped pages. Where a page does not exist
2335 * map a zero filled one. Leave pages busy in
2336 * the original object. If a page list structure
2337 * is not specified, this call is a no-op.
2339 * Note: access of default pager objects has a rather interesting
2340 * twist. The caller of this routine, presumably the file system
2341 * page cache handling code, will never actually make a request
2342 * against a default pager backed object. Only the default
2343 * pager will make requests on backing store related vm_objects
2344 * In this way the default pager can maintain the relationship
2345 * between backing store files (abstract memory objects) and
2346 * the vm_objects (cache objects), they support.
2350 __private_extern__ kern_return_t
2351 vm_object_upl_request(
2353 vm_object_offset_t offset
,
2356 upl_page_info_array_t user_page_list
,
2357 unsigned int *page_list_count
,
2360 vm_page_t dst_page
= VM_PAGE_NULL
;
2361 vm_object_offset_t dst_offset
= offset
;
2362 upl_size_t xfer_size
= size
;
2363 boolean_t do_m_lock
= FALSE
;
2368 #if MACH_CLUSTER_STATS
2369 boolean_t encountered_lrp
= FALSE
;
2371 vm_page_t alias_page
= NULL
;
2374 wpl_array_t lite_list
= NULL
;
2375 vm_object_t last_copy_object
;
2378 if (cntrl_flags
& ~UPL_VALID_FLAGS
) {
2380 * For forward compatibility's sake,
2381 * reject any unknown flag.
2383 return KERN_INVALID_VALUE
;
2386 page_ticket
= (cntrl_flags
& UPL_PAGE_TICKET_MASK
)
2387 >> UPL_PAGE_TICKET_SHIFT
;
2389 if(((size
/PAGE_SIZE
) > MAX_UPL_TRANSFER
) && !object
->phys_contiguous
) {
2390 size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
2393 if(cntrl_flags
& UPL_SET_INTERNAL
)
2394 if(page_list_count
!= NULL
)
2395 *page_list_count
= MAX_UPL_TRANSFER
;
2397 if((!object
->internal
) && (object
->paging_offset
!= 0))
2398 panic("vm_object_upl_request: external object with non-zero paging offset\n");
2400 if((cntrl_flags
& UPL_COPYOUT_FROM
) && (upl_ptr
== NULL
)) {
2401 return KERN_SUCCESS
;
2404 vm_object_lock(object
);
2405 vm_object_paging_begin(object
);
2406 vm_object_unlock(object
);
2409 if(cntrl_flags
& UPL_SET_INTERNAL
) {
2410 if(cntrl_flags
& UPL_SET_LITE
) {
2411 uintptr_t page_field_size
;
2413 UPL_CREATE_INTERNAL
| UPL_CREATE_LITE
,
2415 user_page_list
= (upl_page_info_t
*)
2416 (((uintptr_t)upl
) + sizeof(struct upl
));
2417 lite_list
= (wpl_array_t
)
2418 (((uintptr_t)user_page_list
) +
2420 sizeof(upl_page_info_t
)));
2421 page_field_size
= ((size
/PAGE_SIZE
) + 7) >> 3;
2423 (page_field_size
+ 3) & 0xFFFFFFFC;
2424 bzero((char *)lite_list
, page_field_size
);
2426 UPL_LITE
| UPL_INTERNAL
;
2428 upl
= upl_create(UPL_CREATE_INTERNAL
, size
);
2429 user_page_list
= (upl_page_info_t
*)
2430 (((uintptr_t)upl
) + sizeof(struct upl
));
2431 upl
->flags
= UPL_INTERNAL
;
2434 if(cntrl_flags
& UPL_SET_LITE
) {
2435 uintptr_t page_field_size
;
2436 upl
= upl_create(UPL_CREATE_LITE
, size
);
2437 lite_list
= (wpl_array_t
)
2438 (((uintptr_t)upl
) + sizeof(struct upl
));
2439 page_field_size
= ((size
/PAGE_SIZE
) + 7) >> 3;
2441 (page_field_size
+ 3) & 0xFFFFFFFC;
2442 bzero((char *)lite_list
, page_field_size
);
2443 upl
->flags
= UPL_LITE
;
2445 upl
= upl_create(UPL_CREATE_EXTERNAL
, size
);
2450 if (object
->phys_contiguous
) {
2451 if ((cntrl_flags
& UPL_WILL_MODIFY
) &&
2452 object
->copy
!= VM_OBJECT_NULL
) {
2453 /* Honor copy-on-write obligations */
2457 * We could still have a race...
2458 * A is here building the UPL for a write().
2459 * A pushes the pages to the current copy
2461 * A returns the UPL to the caller.
2462 * B comes along and establishes another
2463 * private mapping on this object, inserting
2464 * a new copy object between the original
2465 * object and the old copy object.
2466 * B reads a page and gets the original contents
2467 * from the original object.
2468 * A modifies the page in the original object.
2469 * B reads the page again and sees A's changes,
2472 * The problem is that the pages are not
2473 * marked "busy" in the original object, so
2474 * nothing prevents B from reading it before
2475 * before A's changes are completed.
2477 * The "paging_in_progress" might protect us
2478 * from the insertion of a new copy object
2479 * though... To be verified.
2481 vm_object_lock_request(object
,
2485 MEMORY_OBJECT_COPY_SYNC
,
2487 upl_cow_contiguous
++;
2488 upl_cow_contiguous_pages
+= size
>> PAGE_SHIFT
;
2491 upl
->map_object
= object
;
2492 /* don't need any shadow mappings for this one */
2493 /* since it is already I/O memory */
2494 upl
->flags
|= UPL_DEVICE_MEMORY
;
2497 /* paging_in_progress protects paging_offset */
2498 upl
->offset
= offset
+ object
->paging_offset
;
2501 if(user_page_list
) {
2502 user_page_list
[0].phys_addr
=
2503 (offset
+ object
->shadow_offset
)>>PAGE_SHIFT
;
2504 user_page_list
[0].device
= TRUE
;
2506 upl
->highest_page
= (offset
+ object
->shadow_offset
+ size
- 1)>>PAGE_SHIFT
;
2508 if(page_list_count
!= NULL
) {
2509 if (upl
->flags
& UPL_INTERNAL
) {
2510 *page_list_count
= 0;
2512 *page_list_count
= 1;
2516 return KERN_SUCCESS
;
2520 user_page_list
[0].device
= FALSE
;
2522 if(cntrl_flags
& UPL_SET_LITE
) {
2523 upl
->map_object
= object
;
2525 upl
->map_object
= vm_object_allocate(size
);
2527 * No neeed to lock the new object: nobody else knows
2528 * about it yet, so it's all ours so far.
2530 upl
->map_object
->shadow
= object
;
2531 upl
->map_object
->pageout
= TRUE
;
2532 upl
->map_object
->can_persist
= FALSE
;
2533 upl
->map_object
->copy_strategy
=
2534 MEMORY_OBJECT_COPY_NONE
;
2535 upl
->map_object
->shadow_offset
= offset
;
2536 upl
->map_object
->wimg_bits
= object
->wimg_bits
;
2540 if (!(cntrl_flags
& UPL_SET_LITE
)) {
2541 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
2546 * Just mark the UPL as "encrypted" here.
2547 * We'll actually encrypt the pages later,
2548 * in upl_encrypt(), when the caller has
2549 * selected which pages need to go to swap.
2551 if (cntrl_flags
& UPL_ENCRYPT
) {
2552 upl
->flags
|= UPL_ENCRYPTED
;
2554 if (cntrl_flags
& UPL_FOR_PAGEOUT
) {
2555 upl
->flags
|= UPL_PAGEOUT
;
2557 vm_object_lock(object
);
2559 /* we can lock in the paging_offset once paging_in_progress is set */
2562 upl
->offset
= offset
+ object
->paging_offset
;
2565 queue_enter(&object
->uplq
, upl
, upl_t
, uplq
);
2566 #endif /* UPL_DEBUG */
2569 if ((cntrl_flags
& UPL_WILL_MODIFY
) &&
2570 object
->copy
!= VM_OBJECT_NULL
) {
2571 /* Honor copy-on-write obligations */
2574 * The caller is gathering these pages and
2575 * might modify their contents. We need to
2576 * make sure that the copy object has its own
2577 * private copies of these pages before we let
2578 * the caller modify them.
2580 vm_object_update(object
,
2585 FALSE
, /* should_return */
2586 MEMORY_OBJECT_COPY_SYNC
,
2589 upl_cow_pages
+= size
>> PAGE_SHIFT
;
2592 /* remember which copy object we synchronized with */
2593 last_copy_object
= object
->copy
;
2596 if(cntrl_flags
& UPL_COPYOUT_FROM
) {
2597 upl
->flags
|= UPL_PAGE_SYNC_DONE
;
2600 if((alias_page
== NULL
) &&
2601 !(cntrl_flags
& UPL_SET_LITE
)) {
2602 vm_object_unlock(object
);
2603 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
2604 vm_object_lock(object
);
2606 if ( ((dst_page
= vm_page_lookup(object
, dst_offset
)) == VM_PAGE_NULL
) ||
2607 dst_page
->fictitious
||
2610 (dst_page
->wire_count
&& !dst_page
->pageout
) ||
2612 ((!dst_page
->inactive
) && (cntrl_flags
& UPL_FOR_PAGEOUT
) &&
2613 (dst_page
->page_ticket
!= page_ticket
) &&
2614 ((dst_page
->page_ticket
+1) != page_ticket
)) ) {
2617 user_page_list
[entry
].phys_addr
= 0;
2620 * grab this up front...
2621 * a high percentange of the time we're going to
2622 * need the hardware modification state a bit later
2623 * anyway... so we can eliminate an extra call into
2624 * the pmap layer by grabbing it here and recording it
2626 refmod_state
= pmap_get_refmod(dst_page
->phys_page
);
2628 if (cntrl_flags
& UPL_RET_ONLY_DIRTY
) {
2630 * we're only asking for DIRTY pages to be returned
2633 if (dst_page
->list_req_pending
|| !(cntrl_flags
& UPL_FOR_PAGEOUT
)) {
2635 * if we were the page stolen by vm_pageout_scan to be
2636 * cleaned (as opposed to a buddy being clustered in
2637 * or this request is not being driven by a PAGEOUT cluster
2638 * then we only need to check for the page being diry or
2639 * precious to decide whether to return it
2641 if (dst_page
->dirty
|| dst_page
->precious
||
2642 (refmod_state
& VM_MEM_MODIFIED
)) {
2647 * this is a request for a PAGEOUT cluster and this page
2648 * is merely along for the ride as a 'buddy'... not only
2649 * does it have to be dirty to be returned, but it also
2650 * can't have been referenced recently... note that we've
2651 * already filtered above based on whether this page is
2652 * currently on the inactive queue or it meets the page
2653 * ticket (generation count) check
2655 if ( !(refmod_state
& VM_MEM_REFERENCED
) &&
2656 ((refmod_state
& VM_MEM_MODIFIED
) ||
2657 dst_page
->dirty
|| dst_page
->precious
) ) {
2661 * if we reach here, we're not to return
2662 * the page... go on to the next one
2665 user_page_list
[entry
].phys_addr
= 0;
2667 dst_offset
+= PAGE_SIZE_64
;
2668 xfer_size
-= PAGE_SIZE
;
2672 if(dst_page
->busy
&&
2673 (!(dst_page
->list_req_pending
&&
2674 dst_page
->pageout
))) {
2675 if(cntrl_flags
& UPL_NOBLOCK
) {
2676 if(user_page_list
) {
2677 user_page_list
[entry
].phys_addr
= 0;
2680 dst_offset
+= PAGE_SIZE_64
;
2681 xfer_size
-= PAGE_SIZE
;
2685 * someone else is playing with the
2686 * page. We will have to wait.
2688 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
2691 /* Someone else already cleaning the page? */
2692 if((dst_page
->cleaning
|| dst_page
->absent
||
2693 dst_page
->wire_count
!= 0) &&
2694 !dst_page
->list_req_pending
) {
2695 if(user_page_list
) {
2696 user_page_list
[entry
].phys_addr
= 0;
2699 dst_offset
+= PAGE_SIZE_64
;
2700 xfer_size
-= PAGE_SIZE
;
2703 /* eliminate all mappings from the */
2704 /* original object and its prodigy */
2706 vm_page_lock_queues();
2708 if (dst_page
->pageout_queue
== TRUE
)
2710 * we've buddied up a page for a clustered pageout
2711 * that has already been moved to the pageout
2712 * queue by pageout_scan... we need to remove
2713 * it from the queue and drop the laundry count
2716 vm_pageout_queue_steal(dst_page
);
2717 #if MACH_CLUSTER_STATS
2718 /* pageout statistics gathering. count */
2719 /* all the pages we will page out that */
2720 /* were not counted in the initial */
2721 /* vm_pageout_scan work */
2722 if(dst_page
->list_req_pending
)
2723 encountered_lrp
= TRUE
;
2724 if((dst_page
->dirty
||
2725 (dst_page
->object
->internal
&&
2726 dst_page
->precious
)) &&
2727 (dst_page
->list_req_pending
2729 if(encountered_lrp
) {
2731 (pages_at_higher_offsets
++;)
2734 (pages_at_lower_offsets
++;)
2738 /* Turn off busy indication on pending */
2739 /* pageout. Note: we can only get here */
2740 /* in the request pending case. */
2741 dst_page
->list_req_pending
= FALSE
;
2742 dst_page
->busy
= FALSE
;
2743 dst_page
->cleaning
= FALSE
;
2745 hw_dirty
= refmod_state
& VM_MEM_MODIFIED
;
2746 dirty
= hw_dirty
? TRUE
: dst_page
->dirty
;
2748 if(cntrl_flags
& UPL_SET_LITE
) {
2750 pg_num
= (dst_offset
-offset
)/PAGE_SIZE
;
2751 lite_list
[pg_num
>>5] |=
2754 pmap_clear_modify(dst_page
->phys_page
);
2756 * Record that this page has been
2760 vm_external_state_set(
2761 object
->existence_map
,
2763 #endif /*MACH_PAGEMAP*/
2766 * Mark original page as cleaning
2769 dst_page
->cleaning
= TRUE
;
2770 dst_page
->dirty
= TRUE
;
2771 dst_page
->precious
= FALSE
;
2773 /* use pageclean setup, it is more */
2774 /* convenient even for the pageout */
2777 vm_object_lock(upl
->map_object
);
2778 vm_pageclean_setup(dst_page
,
2779 alias_page
, upl
->map_object
,
2781 vm_object_unlock(upl
->map_object
);
2783 alias_page
->absent
= FALSE
;
2788 dst_page
->dirty
= FALSE
;
2789 dst_page
->precious
= TRUE
;
2792 if(dst_page
->pageout
)
2793 dst_page
->busy
= TRUE
;
2795 if ( (cntrl_flags
& UPL_ENCRYPT
) ) {
2798 * We want to deny access to the target page
2799 * because its contents are about to be
2800 * encrypted and the user would be very
2801 * confused to see encrypted data instead
2804 dst_page
->busy
= TRUE
;
2806 if ( !(cntrl_flags
& UPL_CLEAN_IN_PLACE
) ) {
2808 * deny access to the target page
2809 * while it is being worked on
2811 if ((!dst_page
->pageout
) &&
2812 (dst_page
->wire_count
== 0)) {
2813 dst_page
->busy
= TRUE
;
2814 dst_page
->pageout
= TRUE
;
2815 vm_page_wire(dst_page
);
2819 if (dst_page
->phys_page
> upl
->highest_page
)
2820 upl
->highest_page
= dst_page
->phys_page
;
2822 if(user_page_list
) {
2823 user_page_list
[entry
].phys_addr
2824 = dst_page
->phys_page
;
2825 user_page_list
[entry
].dirty
=
2827 user_page_list
[entry
].pageout
=
2829 user_page_list
[entry
].absent
=
2831 user_page_list
[entry
].precious
=
2834 vm_page_unlock_queues();
2838 * The caller is gathering this page and might
2839 * access its contents later on. Decrypt the
2840 * page before adding it to the UPL, so that
2841 * the caller never sees encrypted data.
2843 if (! (cntrl_flags
& UPL_ENCRYPT
) &&
2844 dst_page
->encrypted
) {
2845 assert(dst_page
->busy
);
2847 vm_page_decrypt(dst_page
, 0);
2848 vm_page_decrypt_for_upl_counter
++;
2851 * Retry this page, since anything
2852 * could have changed while we were
2859 dst_offset
+= PAGE_SIZE_64
;
2860 xfer_size
-= PAGE_SIZE
;
2864 if((alias_page
== NULL
) &&
2865 !(cntrl_flags
& UPL_SET_LITE
)) {
2866 vm_object_unlock(object
);
2867 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
2868 vm_object_lock(object
);
2871 if ((cntrl_flags
& UPL_WILL_MODIFY
) &&
2872 object
->copy
!= last_copy_object
) {
2873 /* Honor copy-on-write obligations */
2876 * The copy object has changed since we
2877 * last synchronized for copy-on-write.
2878 * Another copy object might have been
2879 * inserted while we released the object's
2880 * lock. Since someone could have seen the
2881 * original contents of the remaining pages
2882 * through that new object, we have to
2883 * synchronize with it again for the remaining
2884 * pages only. The previous pages are "busy"
2885 * so they can not be seen through the new
2886 * mapping. The new mapping will see our
2887 * upcoming changes for those previous pages,
2888 * but that's OK since they couldn't see what
2889 * was there before. It's just a race anyway
2890 * and there's no guarantee of consistency or
2891 * atomicity. We just don't want new mappings
2892 * to see both the *before* and *after* pages.
2894 if (object
->copy
!= VM_OBJECT_NULL
) {
2897 dst_offset
,/* current offset */
2898 xfer_size
, /* remaining size */
2901 FALSE
, /* should_return */
2902 MEMORY_OBJECT_COPY_SYNC
,
2905 upl_cow_again_pages
+=
2906 xfer_size
>> PAGE_SHIFT
;
2908 /* remember the copy object we synced with */
2909 last_copy_object
= object
->copy
;
2912 dst_page
= vm_page_lookup(object
, dst_offset
);
2914 if(dst_page
!= VM_PAGE_NULL
) {
2915 if((cntrl_flags
& UPL_RET_ONLY_ABSENT
) &&
2916 !((dst_page
->list_req_pending
)
2917 && (dst_page
->absent
))) {
2918 /* we are doing extended range */
2919 /* requests. we want to grab */
2920 /* pages around some which are */
2921 /* already present. */
2922 if(user_page_list
) {
2923 user_page_list
[entry
].phys_addr
= 0;
2926 dst_offset
+= PAGE_SIZE_64
;
2927 xfer_size
-= PAGE_SIZE
;
2930 if((dst_page
->cleaning
) &&
2931 !(dst_page
->list_req_pending
)) {
2932 /*someone else is writing to the */
2933 /* page. We will have to wait. */
2934 PAGE_SLEEP(object
,dst_page
,THREAD_UNINT
);
2937 if ((dst_page
->fictitious
&&
2938 dst_page
->list_req_pending
)) {
2939 /* dump the fictitious page */
2940 dst_page
->list_req_pending
= FALSE
;
2941 dst_page
->clustered
= FALSE
;
2943 vm_page_lock_queues();
2944 vm_page_free(dst_page
);
2945 vm_page_unlock_queues();
2948 } else if ((dst_page
->absent
&&
2949 dst_page
->list_req_pending
)) {
2950 /* the default_pager case */
2951 dst_page
->list_req_pending
= FALSE
;
2952 dst_page
->busy
= FALSE
;
2955 if(dst_page
== VM_PAGE_NULL
) {
2956 if(object
->private) {
2958 * This is a nasty wrinkle for users
2959 * of upl who encounter device or
2960 * private memory however, it is
2961 * unavoidable, only a fault can
2962 * reslove the actual backing
2963 * physical page by asking the
2966 if(user_page_list
) {
2967 user_page_list
[entry
].phys_addr
= 0;
2970 dst_offset
+= PAGE_SIZE_64
;
2971 xfer_size
-= PAGE_SIZE
;
2974 /* need to allocate a page */
2975 dst_page
= vm_page_alloc(object
, dst_offset
);
2976 if (dst_page
== VM_PAGE_NULL
) {
2977 vm_object_unlock(object
);
2979 vm_object_lock(object
);
2982 dst_page
->busy
= FALSE
;
2984 if(cntrl_flags
& UPL_NO_SYNC
) {
2985 dst_page
->page_lock
= 0;
2986 dst_page
->unlock_request
= 0;
2989 if(cntrl_flags
& UPL_RET_ONLY_ABSENT
) {
2991 * if UPL_RET_ONLY_ABSENT was specified,
2992 * than we're definitely setting up a
2993 * upl for a clustered read/pagein
2994 * operation... mark the pages as clustered
2995 * so vm_fault can correctly attribute them
2996 * to the 'pagein' bucket the first time
2997 * a fault happens on them
2999 dst_page
->clustered
= TRUE
;
3001 dst_page
->absent
= TRUE
;
3002 object
->absent_count
++;
3005 if(cntrl_flags
& UPL_NO_SYNC
) {
3006 dst_page
->page_lock
= 0;
3007 dst_page
->unlock_request
= 0;
3014 if (cntrl_flags
& UPL_ENCRYPT
) {
3016 * The page is going to be encrypted when we
3017 * get it from the pager, so mark it so.
3019 dst_page
->encrypted
= TRUE
;
3022 * Otherwise, the page will not contain
3025 dst_page
->encrypted
= FALSE
;
3028 dst_page
->overwriting
= TRUE
;
3029 if(dst_page
->fictitious
) {
3030 panic("need corner case for fictitious page");
3032 if(dst_page
->page_lock
) {
3037 /* eliminate all mappings from the */
3038 /* original object and its prodigy */
3040 if(dst_page
->busy
) {
3041 /*someone else is playing with the */
3042 /* page. We will have to wait. */
3043 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
3046 vm_page_lock_queues();
3048 if( !(cntrl_flags
& UPL_FILE_IO
))
3049 hw_dirty
= pmap_disconnect(dst_page
->phys_page
) & VM_MEM_MODIFIED
;
3051 hw_dirty
= pmap_get_refmod(dst_page
->phys_page
) & VM_MEM_MODIFIED
;
3052 dirty
= hw_dirty
? TRUE
: dst_page
->dirty
;
3054 if(cntrl_flags
& UPL_SET_LITE
) {
3056 pg_num
= (dst_offset
-offset
)/PAGE_SIZE
;
3057 lite_list
[pg_num
>>5] |=
3060 pmap_clear_modify(dst_page
->phys_page
);
3062 * Record that this page has been
3066 vm_external_state_set(
3067 object
->existence_map
,
3069 #endif /*MACH_PAGEMAP*/
3072 * Mark original page as cleaning
3075 dst_page
->cleaning
= TRUE
;
3076 dst_page
->dirty
= TRUE
;
3077 dst_page
->precious
= FALSE
;
3079 /* use pageclean setup, it is more */
3080 /* convenient even for the pageout */
3082 vm_object_lock(upl
->map_object
);
3083 vm_pageclean_setup(dst_page
,
3084 alias_page
, upl
->map_object
,
3086 vm_object_unlock(upl
->map_object
);
3088 alias_page
->absent
= FALSE
;
3092 if(cntrl_flags
& UPL_CLEAN_IN_PLACE
) {
3093 /* clean in place for read implies */
3094 /* that a write will be done on all */
3095 /* the pages that are dirty before */
3096 /* a upl commit is done. The caller */
3097 /* is obligated to preserve the */
3098 /* contents of all pages marked */
3100 upl
->flags
|= UPL_CLEAR_DIRTY
;
3104 dst_page
->dirty
= FALSE
;
3105 dst_page
->precious
= TRUE
;
3108 if (dst_page
->wire_count
== 0) {
3109 /* deny access to the target page while */
3110 /* it is being worked on */
3111 dst_page
->busy
= TRUE
;
3113 vm_page_wire(dst_page
);
3115 if(cntrl_flags
& UPL_RET_ONLY_ABSENT
) {
3117 * expect the page not to be used
3118 * since it's coming in as part
3119 * of a cluster and could be
3120 * speculative... pages that
3121 * are 'consumed' will get a
3122 * hardware reference
3124 dst_page
->reference
= FALSE
;
3127 * expect the page to be used
3129 dst_page
->reference
= TRUE
;
3131 dst_page
->precious
=
3132 (cntrl_flags
& UPL_PRECIOUS
)
3135 if (dst_page
->phys_page
> upl
->highest_page
)
3136 upl
->highest_page
= dst_page
->phys_page
;
3138 if(user_page_list
) {
3139 user_page_list
[entry
].phys_addr
3140 = dst_page
->phys_page
;
3141 user_page_list
[entry
].dirty
=
3143 user_page_list
[entry
].pageout
=
3145 user_page_list
[entry
].absent
=
3147 user_page_list
[entry
].precious
=
3150 vm_page_unlock_queues();
3153 dst_offset
+= PAGE_SIZE_64
;
3154 xfer_size
-= PAGE_SIZE
;
3158 if (upl
->flags
& UPL_INTERNAL
) {
3159 if(page_list_count
!= NULL
)
3160 *page_list_count
= 0;
3161 } else if (*page_list_count
> entry
) {
3162 if(page_list_count
!= NULL
)
3163 *page_list_count
= entry
;
3166 if(alias_page
!= NULL
) {
3167 vm_page_lock_queues();
3168 vm_page_free(alias_page
);
3169 vm_page_unlock_queues();
3173 vm_prot_t access_required
;
3174 /* call back all associated pages from other users of the pager */
3175 /* all future updates will be on data which is based on the */
3176 /* changes we are going to make here. Note: it is assumed that */
3177 /* we already hold copies of the data so we will not be seeing */
3178 /* an avalanche of incoming data from the pager */
3179 access_required
= (cntrl_flags
& UPL_COPYOUT_FROM
)
3180 ? VM_PROT_READ
: VM_PROT_WRITE
;
3184 if(!object
->pager_ready
) {
3185 wait_result_t wait_result
;
3187 wait_result
= vm_object_sleep(object
,
3188 VM_OBJECT_EVENT_PAGER_READY
,
3190 if (wait_result
!= THREAD_AWAKENED
) {
3191 vm_object_unlock(object
);
3192 return KERN_FAILURE
;
3197 vm_object_unlock(object
);
3198 rc
= memory_object_data_unlock(
3200 dst_offset
+ object
->paging_offset
,
3203 if (rc
!= KERN_SUCCESS
&& rc
!= MACH_SEND_INTERRUPTED
)
3204 return KERN_FAILURE
;
3205 vm_object_lock(object
);
3207 if (rc
== KERN_SUCCESS
)
3211 /* lets wait on the last page requested */
3212 /* NOTE: we will have to update lock completed routine to signal */
3213 if(dst_page
!= VM_PAGE_NULL
&&
3214 (access_required
& dst_page
->page_lock
) != access_required
) {
3215 PAGE_ASSERT_WAIT(dst_page
, THREAD_UNINT
);
3216 vm_object_unlock(object
);
3217 thread_block(THREAD_CONTINUE_NULL
);
3218 return KERN_SUCCESS
;
3222 vm_object_unlock(object
);
3223 return KERN_SUCCESS
;
3226 /* JMM - Backward compatability for now */
3228 vm_fault_list_request( /* forward */
3229 memory_object_control_t control
,
3230 vm_object_offset_t offset
,
3233 upl_page_info_t
**user_page_list_ptr
,
3234 int page_list_count
,
3237 vm_fault_list_request(
3238 memory_object_control_t control
,
3239 vm_object_offset_t offset
,
3242 upl_page_info_t
**user_page_list_ptr
,
3243 int page_list_count
,
3246 unsigned int local_list_count
;
3247 upl_page_info_t
*user_page_list
;
3250 if (user_page_list_ptr
!= NULL
) {
3251 local_list_count
= page_list_count
;
3252 user_page_list
= *user_page_list_ptr
;
3254 local_list_count
= 0;
3255 user_page_list
= NULL
;
3257 kr
= memory_object_upl_request(control
,
3265 if(kr
!= KERN_SUCCESS
)
3268 if ((user_page_list_ptr
!= NULL
) && (cntrl_flags
& UPL_INTERNAL
)) {
3269 *user_page_list_ptr
= UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr
);
3272 return KERN_SUCCESS
;
3278 * Routine: vm_object_super_upl_request
3280 * Cause the population of a portion of a vm_object
3281 * in much the same way as memory_object_upl_request.
3282 * Depending on the nature of the request, the pages
3283 * returned may be contain valid data or be uninitialized.
3284 * However, the region may be expanded up to the super
3285 * cluster size provided.
3288 __private_extern__ kern_return_t
3289 vm_object_super_upl_request(
3291 vm_object_offset_t offset
,
3293 upl_size_t super_cluster
,
3295 upl_page_info_t
*user_page_list
,
3296 unsigned int *page_list_count
,
3299 vm_page_t target_page
;
3303 if(object
->paging_offset
> offset
)
3304 return KERN_FAILURE
;
3306 assert(object
->paging_in_progress
);
3307 offset
= offset
- object
->paging_offset
;
3309 if(cntrl_flags
& UPL_FOR_PAGEOUT
) {
3311 vm_object_lock(object
);
3313 if((target_page
= vm_page_lookup(object
, offset
))
3315 ticket
= target_page
->page_ticket
;
3316 cntrl_flags
= cntrl_flags
& ~(int)UPL_PAGE_TICKET_MASK
;
3317 cntrl_flags
= cntrl_flags
|
3318 ((ticket
<< UPL_PAGE_TICKET_SHIFT
)
3319 & UPL_PAGE_TICKET_MASK
);
3321 vm_object_unlock(object
);
3324 if (super_cluster
> size
) {
3326 vm_object_offset_t base_offset
;
3327 upl_size_t super_size
;
3329 base_offset
= (offset
&
3330 ~((vm_object_offset_t
) super_cluster
- 1));
3331 super_size
= (offset
+size
) > (base_offset
+ super_cluster
) ?
3332 super_cluster
<<1 : super_cluster
;
3333 super_size
= ((base_offset
+ super_size
) > object
->size
) ?
3334 (object
->size
- base_offset
) : super_size
;
3335 if(offset
> (base_offset
+ super_size
))
3336 panic("vm_object_super_upl_request: Missed target pageout"
3337 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
3338 offset
, base_offset
, super_size
, super_cluster
,
3339 size
, object
->paging_offset
);
3341 * apparently there is a case where the vm requests a
3342 * page to be written out who's offset is beyond the
3345 if((offset
+ size
) > (base_offset
+ super_size
))
3346 super_size
= (offset
+ size
) - base_offset
;
3348 offset
= base_offset
;
3351 return vm_object_upl_request(object
, offset
, size
,
3352 upl
, user_page_list
, page_list_count
,
3360 vm_map_address_t offset
,
3361 upl_size_t
*upl_size
,
3363 upl_page_info_array_t page_list
,
3364 unsigned int *count
,
3367 vm_map_entry_t entry
;
3369 int force_data_sync
;
3371 vm_object_t local_object
;
3372 vm_map_offset_t local_offset
;
3373 vm_map_offset_t local_start
;
3376 caller_flags
= *flags
;
3378 if (caller_flags
& ~UPL_VALID_FLAGS
) {
3380 * For forward compatibility's sake,
3381 * reject any unknown flag.
3383 return KERN_INVALID_VALUE
;
3386 force_data_sync
= (caller_flags
& UPL_FORCE_DATA_SYNC
);
3387 sync_cow_data
= !(caller_flags
& UPL_COPYOUT_FROM
);
3390 return KERN_INVALID_ARGUMENT
;
3395 if (vm_map_lookup_entry(map
, offset
, &entry
)) {
3396 if (entry
->object
.vm_object
== VM_OBJECT_NULL
||
3397 !entry
->object
.vm_object
->phys_contiguous
) {
3398 if((*upl_size
/page_size
) > MAX_UPL_TRANSFER
) {
3399 *upl_size
= MAX_UPL_TRANSFER
* page_size
;
3402 if((entry
->vme_end
- offset
) < *upl_size
) {
3403 *upl_size
= entry
->vme_end
- offset
;
3405 if (caller_flags
& UPL_QUERY_OBJECT_TYPE
) {
3406 if (entry
->object
.vm_object
== VM_OBJECT_NULL
) {
3408 } else if (entry
->object
.vm_object
->private) {
3409 *flags
= UPL_DEV_MEMORY
;
3410 if (entry
->object
.vm_object
->phys_contiguous
) {
3411 *flags
|= UPL_PHYS_CONTIG
;
3417 return KERN_SUCCESS
;
3420 * Create an object if necessary.
3422 if (entry
->object
.vm_object
== VM_OBJECT_NULL
) {
3423 entry
->object
.vm_object
= vm_object_allocate(
3424 (vm_size_t
)(entry
->vme_end
- entry
->vme_start
));
3427 if (!(caller_flags
& UPL_COPYOUT_FROM
)) {
3428 if (!(entry
->protection
& VM_PROT_WRITE
)) {
3430 return KERN_PROTECTION_FAILURE
;
3432 if (entry
->needs_copy
) {
3435 vm_map_offset_t offset_hi
;
3436 vm_map_offset_t offset_lo
;
3437 vm_object_offset_t new_offset
;
3440 vm_behavior_t behavior
;
3441 vm_map_version_t version
;
3445 vm_map_lock_write_to_read(map
);
3446 if(vm_map_lookup_locked(&local_map
,
3447 offset
, VM_PROT_WRITE
,
3449 &new_offset
, &prot
, &wired
,
3450 &behavior
, &offset_lo
,
3451 &offset_hi
, &real_map
)) {
3452 vm_map_unlock(local_map
);
3453 return KERN_FAILURE
;
3455 if (real_map
!= map
) {
3456 vm_map_unlock(real_map
);
3458 vm_object_unlock(object
);
3459 vm_map_unlock(local_map
);
3461 goto REDISCOVER_ENTRY
;
3464 if (entry
->is_sub_map
) {
3467 submap
= entry
->object
.sub_map
;
3468 local_start
= entry
->vme_start
;
3469 local_offset
= entry
->offset
;
3470 vm_map_reference(submap
);
3473 ret
= (vm_map_create_upl(submap
,
3474 local_offset
+ (offset
- local_start
),
3475 upl_size
, upl
, page_list
, count
,
3478 vm_map_deallocate(submap
);
3482 if (sync_cow_data
) {
3483 if (entry
->object
.vm_object
->shadow
3484 || entry
->object
.vm_object
->copy
) {
3486 local_object
= entry
->object
.vm_object
;
3487 local_start
= entry
->vme_start
;
3488 local_offset
= entry
->offset
;
3489 vm_object_reference(local_object
);
3492 if (entry
->object
.vm_object
->shadow
&&
3493 entry
->object
.vm_object
->copy
) {
3494 vm_object_lock_request(
3495 local_object
->shadow
,
3496 (vm_object_offset_t
)
3497 ((offset
- local_start
) +
3499 local_object
->shadow_offset
,
3501 MEMORY_OBJECT_DATA_SYNC
,
3504 sync_cow_data
= FALSE
;
3505 vm_object_deallocate(local_object
);
3506 goto REDISCOVER_ENTRY
;
3510 if (force_data_sync
) {
3512 local_object
= entry
->object
.vm_object
;
3513 local_start
= entry
->vme_start
;
3514 local_offset
= entry
->offset
;
3515 vm_object_reference(local_object
);
3518 vm_object_lock_request(
3520 (vm_object_offset_t
)
3521 ((offset
- local_start
) + local_offset
),
3522 (vm_object_size_t
)*upl_size
, FALSE
,
3523 MEMORY_OBJECT_DATA_SYNC
,
3525 force_data_sync
= FALSE
;
3526 vm_object_deallocate(local_object
);
3527 goto REDISCOVER_ENTRY
;
3530 if(!(entry
->object
.vm_object
->private)) {
3531 if(*upl_size
> (MAX_UPL_TRANSFER
*PAGE_SIZE
))
3532 *upl_size
= (MAX_UPL_TRANSFER
*PAGE_SIZE
);
3533 if(entry
->object
.vm_object
->phys_contiguous
) {
3534 *flags
= UPL_PHYS_CONTIG
;
3539 *flags
= UPL_DEV_MEMORY
| UPL_PHYS_CONTIG
;
3541 local_object
= entry
->object
.vm_object
;
3542 local_offset
= entry
->offset
;
3543 local_start
= entry
->vme_start
;
3544 vm_object_reference(local_object
);
3546 if(caller_flags
& UPL_SET_IO_WIRE
) {
3547 ret
= (vm_object_iopl_request(local_object
,
3548 (vm_object_offset_t
)
3549 ((offset
- local_start
)
3557 ret
= (vm_object_upl_request(local_object
,
3558 (vm_object_offset_t
)
3559 ((offset
- local_start
)
3567 vm_object_deallocate(local_object
);
3572 return(KERN_FAILURE
);
3577 * Internal routine to enter a UPL into a VM map.
3579 * JMM - This should just be doable through the standard
3580 * vm_map_enter() API.
3586 vm_map_offset_t
*dst_addr
)
3589 vm_object_offset_t offset
;
3590 vm_map_offset_t addr
;
3594 if (upl
== UPL_NULL
)
3595 return KERN_INVALID_ARGUMENT
;
3599 /* check to see if already mapped */
3600 if(UPL_PAGE_LIST_MAPPED
& upl
->flags
) {
3602 return KERN_FAILURE
;
3605 if((!(upl
->map_object
->pageout
)) &&
3606 !((upl
->flags
& (UPL_DEVICE_MEMORY
| UPL_IO_WIRE
)) ||
3607 (upl
->map_object
->phys_contiguous
))) {
3609 vm_page_t alias_page
;
3610 vm_object_offset_t new_offset
;
3612 wpl_array_t lite_list
;
3614 if(upl
->flags
& UPL_INTERNAL
) {
3615 lite_list
= (wpl_array_t
)
3616 ((((uintptr_t)upl
) + sizeof(struct upl
))
3617 + ((upl
->size
/PAGE_SIZE
)
3618 * sizeof(upl_page_info_t
)));
3620 lite_list
= (wpl_array_t
)
3621 (((uintptr_t)upl
) + sizeof(struct upl
));
3623 object
= upl
->map_object
;
3624 upl
->map_object
= vm_object_allocate(upl
->size
);
3625 vm_object_lock(upl
->map_object
);
3626 upl
->map_object
->shadow
= object
;
3627 upl
->map_object
->pageout
= TRUE
;
3628 upl
->map_object
->can_persist
= FALSE
;
3629 upl
->map_object
->copy_strategy
=
3630 MEMORY_OBJECT_COPY_NONE
;
3631 upl
->map_object
->shadow_offset
=
3632 upl
->offset
- object
->paging_offset
;
3633 upl
->map_object
->wimg_bits
= object
->wimg_bits
;
3634 offset
= upl
->map_object
->shadow_offset
;
3638 vm_object_lock(object
);
3641 pg_num
= (new_offset
)/PAGE_SIZE
;
3642 if(lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
3643 vm_object_unlock(object
);
3644 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
3645 vm_object_lock(object
);
3646 m
= vm_page_lookup(object
, offset
);
3647 if (m
== VM_PAGE_NULL
) {
3648 panic("vm_upl_map: page missing\n");
3651 vm_object_paging_begin(object
);
3654 * Convert the fictitious page to a private
3655 * shadow of the real page.
3657 assert(alias_page
->fictitious
);
3658 alias_page
->fictitious
= FALSE
;
3659 alias_page
->private = TRUE
;
3660 alias_page
->pageout
= TRUE
;
3661 alias_page
->phys_page
= m
->phys_page
;
3663 vm_page_lock_queues();
3664 vm_page_wire(alias_page
);
3665 vm_page_unlock_queues();
3669 * The virtual page ("m") has to be wired in some way
3670 * here or its physical page ("m->phys_page") could
3671 * be recycled at any time.
3672 * Assuming this is enforced by the caller, we can't
3673 * get an encrypted page here. Since the encryption
3674 * key depends on the VM page's "pager" object and
3675 * the "paging_offset", we couldn't handle 2 pageable
3676 * VM pages (with different pagers and paging_offsets)
3677 * sharing the same physical page: we could end up
3678 * encrypting with one key (via one VM page) and
3679 * decrypting with another key (via the alias VM page).
3681 ASSERT_PAGE_DECRYPTED(m
);
3683 vm_page_insert(alias_page
,
3684 upl
->map_object
, new_offset
);
3685 assert(!alias_page
->wanted
);
3686 alias_page
->busy
= FALSE
;
3687 alias_page
->absent
= FALSE
;
3691 offset
+= PAGE_SIZE_64
;
3692 new_offset
+= PAGE_SIZE_64
;
3694 vm_object_unlock(object
);
3695 vm_object_unlock(upl
->map_object
);
3697 if ((upl
->flags
& (UPL_DEVICE_MEMORY
| UPL_IO_WIRE
)) || upl
->map_object
->phys_contiguous
)
3698 offset
= upl
->offset
- upl
->map_object
->paging_offset
;
3704 vm_object_lock(upl
->map_object
);
3705 upl
->map_object
->ref_count
++;
3706 vm_object_res_reference(upl
->map_object
);
3707 vm_object_unlock(upl
->map_object
);
3712 /* NEED A UPL_MAP ALIAS */
3713 kr
= vm_map_enter(map
, dst_addr
, (vm_map_size_t
)size
, (vm_map_offset_t
) 0,
3714 VM_FLAGS_ANYWHERE
, upl
->map_object
, offset
, FALSE
,
3715 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
3717 if (kr
!= KERN_SUCCESS
) {
3722 vm_object_lock(upl
->map_object
);
3724 for(addr
=*dst_addr
; size
> 0; size
-=PAGE_SIZE
,addr
+=PAGE_SIZE
) {
3725 m
= vm_page_lookup(upl
->map_object
, offset
);
3727 unsigned int cache_attr
;
3728 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
3730 PMAP_ENTER(map
->pmap
, addr
,
3734 offset
+=PAGE_SIZE_64
;
3736 vm_object_unlock(upl
->map_object
);
3738 upl
->ref_count
++; /* hold a reference for the mapping */
3739 upl
->flags
|= UPL_PAGE_LIST_MAPPED
;
3740 upl
->kaddr
= *dst_addr
;
3742 return KERN_SUCCESS
;
3746 * Internal routine to remove a UPL mapping from a VM map.
3748 * XXX - This should just be doable through a standard
3749 * vm_map_remove() operation. Otherwise, implicit clean-up
3750 * of the target map won't be able to correctly remove
3751 * these (and release the reference on the UPL). Having
3752 * to do this means we can't map these into user-space
3763 if (upl
== UPL_NULL
)
3764 return KERN_INVALID_ARGUMENT
;
3767 if(upl
->flags
& UPL_PAGE_LIST_MAPPED
) {
3770 assert(upl
->ref_count
> 1);
3771 upl
->ref_count
--; /* removing mapping ref */
3772 upl
->flags
&= ~UPL_PAGE_LIST_MAPPED
;
3773 upl
->kaddr
= (vm_offset_t
) 0;
3777 vm_map_trunc_page(addr
),
3778 vm_map_round_page(addr
+ size
),
3780 return KERN_SUCCESS
;
3783 return KERN_FAILURE
;
3789 upl_offset_t offset
,
3792 upl_page_info_t
*page_list
,
3793 mach_msg_type_number_t count
,
3796 upl_size_t xfer_size
= size
;
3797 vm_object_t shadow_object
;
3798 vm_object_t object
= upl
->map_object
;
3799 vm_object_offset_t target_offset
;
3801 wpl_array_t lite_list
;
3803 int delayed_unlock
= 0;
3804 int clear_refmod
= 0;
3805 boolean_t shadow_internal
;
3809 if (upl
== UPL_NULL
)
3810 return KERN_INVALID_ARGUMENT
;
3816 if (object
->pageout
) {
3817 shadow_object
= object
->shadow
;
3819 shadow_object
= object
;
3824 if (upl
->flags
& UPL_ACCESS_BLOCKED
) {
3826 * We used this UPL to block access to the pages by marking
3827 * them "busy". Now we need to clear the "busy" bit to allow
3828 * access to these pages again.
3830 flags
|= UPL_COMMIT_ALLOW_ACCESS
;
3833 if (upl
->flags
& UPL_CLEAR_DIRTY
)
3834 flags
|= UPL_COMMIT_CLEAR_DIRTY
;
3836 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
3838 } else if ((offset
+ size
) > upl
->size
) {
3840 return KERN_FAILURE
;
3843 if (upl
->flags
& UPL_INTERNAL
) {
3844 lite_list
= (wpl_array_t
)
3845 ((((uintptr_t)upl
) + sizeof(struct upl
))
3846 + ((upl
->size
/PAGE_SIZE
) * sizeof(upl_page_info_t
)));
3848 lite_list
= (wpl_array_t
)
3849 (((uintptr_t)upl
) + sizeof(struct upl
));
3851 if (object
!= shadow_object
)
3852 vm_object_lock(object
);
3853 vm_object_lock(shadow_object
);
3855 shadow_internal
= shadow_object
->internal
;
3857 entry
= offset
/PAGE_SIZE
;
3858 target_offset
= (vm_object_offset_t
)offset
;
3866 if (upl
->flags
& UPL_LITE
) {
3869 pg_num
= target_offset
/PAGE_SIZE
;
3871 if (lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
3872 lite_list
[pg_num
>>5] &= ~(1 << (pg_num
& 31));
3873 m
= vm_page_lookup(shadow_object
,
3874 target_offset
+ (upl
->offset
-
3875 shadow_object
->paging_offset
));
3878 if (object
->pageout
) {
3879 if ((t
= vm_page_lookup(object
, target_offset
)) != NULL
) {
3882 if (delayed_unlock
) {
3884 vm_page_unlock_queues();
3892 object
->shadow_offset
);
3894 if (m
!= VM_PAGE_NULL
)
3895 vm_object_paging_end(m
->object
);
3898 if (m
!= VM_PAGE_NULL
) {
3902 if (upl
->flags
& UPL_IO_WIRE
) {
3904 if (delayed_unlock
== 0)
3905 vm_page_lock_queues();
3909 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
3911 vm_page_unlock_queues();
3914 page_list
[entry
].phys_addr
= 0;
3916 if (flags
& UPL_COMMIT_SET_DIRTY
) {
3918 } else if (flags
& UPL_COMMIT_CLEAR_DIRTY
) {
3920 clear_refmod
|= VM_MEM_MODIFIED
;
3922 if (flags
& UPL_COMMIT_INACTIVATE
) {
3923 m
->reference
= FALSE
;
3924 clear_refmod
|= VM_MEM_REFERENCED
;
3925 vm_page_deactivate(m
);
3928 pmap_clear_refmod(m
->phys_page
, clear_refmod
);
3930 if (flags
& UPL_COMMIT_ALLOW_ACCESS
) {
3932 * We blocked access to the pages in this UPL.
3933 * Clear the "busy" bit and wake up any waiter
3936 PAGE_WAKEUP_DONE(m
);
3939 target_offset
+= PAGE_SIZE_64
;
3940 xfer_size
-= PAGE_SIZE
;
3944 if (delayed_unlock
== 0)
3945 vm_page_lock_queues();
3947 * make sure to clear the hardware
3948 * modify or reference bits before
3949 * releasing the BUSY bit on this page
3950 * otherwise we risk losing a legitimate
3953 if (flags
& UPL_COMMIT_CLEAR_DIRTY
) {
3955 clear_refmod
|= VM_MEM_MODIFIED
;
3957 if (flags
& UPL_COMMIT_INACTIVATE
)
3958 clear_refmod
|= VM_MEM_REFERENCED
;
3961 pmap_clear_refmod(m
->phys_page
, clear_refmod
);
3964 p
= &(page_list
[entry
]);
3965 if(p
->phys_addr
&& p
->pageout
&& !m
->pageout
) {
3969 } else if (page_list
[entry
].phys_addr
&&
3970 !p
->pageout
&& m
->pageout
&&
3971 !m
->dump_cleaning
) {
3974 m
->overwriting
= FALSE
;
3976 PAGE_WAKEUP_DONE(m
);
3978 page_list
[entry
].phys_addr
= 0;
3980 m
->dump_cleaning
= FALSE
;
3982 vm_pageout_throttle_up(m
);
3985 m
->cleaning
= FALSE
;
3987 #if MACH_CLUSTER_STATS
3988 if (m
->wanted
) vm_pageout_target_collisions
++;
3990 if (pmap_disconnect(m
->phys_page
) & VM_MEM_MODIFIED
)
3996 vm_page_unwire(m
);/* reactivates */
3998 if (upl
->flags
& UPL_PAGEOUT
) {
3999 CLUSTER_STAT(vm_pageout_target_page_dirtied
++;)
4000 VM_STAT(reactivations
++);
4002 PAGE_WAKEUP_DONE(m
);
4004 vm_page_free(m
);/* clears busy, etc. */
4006 if (upl
->flags
& UPL_PAGEOUT
) {
4007 CLUSTER_STAT(vm_pageout_target_page_freed
++;)
4009 if (page_list
[entry
].dirty
)
4010 VM_STAT(pageouts
++);
4013 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
4015 vm_page_unlock_queues();
4017 target_offset
+= PAGE_SIZE_64
;
4018 xfer_size
-= PAGE_SIZE
;
4022 #if MACH_CLUSTER_STATS
4023 m
->dirty
= pmap_is_modified(m
->phys_page
);
4025 if (m
->dirty
) vm_pageout_cluster_dirtied
++;
4026 else vm_pageout_cluster_cleaned
++;
4027 if (m
->wanted
) vm_pageout_cluster_collisions
++;
4032 if((m
->busy
) && (m
->cleaning
)) {
4033 /* the request_page_list case */
4036 if(shadow_object
->absent_count
== 1)
4037 vm_object_absent_release(shadow_object
);
4039 shadow_object
->absent_count
--;
4041 m
->overwriting
= FALSE
;
4044 } else if (m
->overwriting
) {
4045 /* alternate request page list, write to
4046 * page_list case. Occurs when the original
4047 * page was wired at the time of the list
4049 assert(m
->wire_count
!= 0);
4050 vm_page_unwire(m
);/* reactivates */
4051 m
->overwriting
= FALSE
;
4053 m
->cleaning
= FALSE
;
4055 /* It is a part of the semantic of COPYOUT_FROM */
4056 /* UPLs that a commit implies cache sync */
4057 /* between the vm page and the backing store */
4058 /* this can be used to strip the precious bit */
4059 /* as well as clean */
4060 if (upl
->flags
& UPL_PAGE_SYNC_DONE
)
4061 m
->precious
= FALSE
;
4063 if (flags
& UPL_COMMIT_SET_DIRTY
)
4066 if (flags
& UPL_COMMIT_INACTIVATE
) {
4067 m
->reference
= FALSE
;
4068 vm_page_deactivate(m
);
4069 } else if (!m
->active
&& !m
->inactive
) {
4071 vm_page_activate(m
);
4073 vm_page_deactivate(m
);
4076 if (flags
& UPL_COMMIT_ALLOW_ACCESS
) {
4078 * We blocked access to the pages in this URL.
4079 * Clear the "busy" bit on this page before we
4080 * wake up any waiter.
4086 * Wakeup any thread waiting for the page to be un-cleaning.
4090 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
4092 vm_page_unlock_queues();
4095 target_offset
+= PAGE_SIZE_64
;
4096 xfer_size
-= PAGE_SIZE
;
4100 vm_page_unlock_queues();
4104 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
4106 } else if (upl
->flags
& UPL_LITE
) {
4109 pg_num
= upl
->size
/PAGE_SIZE
;
4110 pg_num
= (pg_num
+ 31) >> 5;
4112 for(i
= 0; i
<pg_num
; i
++) {
4113 if(lite_list
[i
] != 0) {
4119 if(queue_empty(&upl
->map_object
->memq
)) {
4125 if(upl
->flags
& UPL_COMMIT_NOTIFY_EMPTY
) {
4128 if(object
== shadow_object
)
4129 vm_object_paging_end(shadow_object
);
4131 vm_object_unlock(shadow_object
);
4132 if (object
!= shadow_object
)
4133 vm_object_unlock(object
);
4136 return KERN_SUCCESS
;
4142 upl_offset_t offset
,
4147 upl_size_t xfer_size
= size
;
4148 vm_object_t shadow_object
;
4149 vm_object_t object
= upl
->map_object
;
4150 vm_object_offset_t target_offset
;
4152 wpl_array_t lite_list
;
4154 boolean_t shadow_internal
;
4158 if (upl
== UPL_NULL
)
4159 return KERN_INVALID_ARGUMENT
;
4161 if (upl
->flags
& UPL_IO_WIRE
) {
4162 return upl_commit_range(upl
,
4167 if(object
->pageout
) {
4168 shadow_object
= object
->shadow
;
4170 shadow_object
= object
;
4174 if(upl
->flags
& UPL_DEVICE_MEMORY
) {
4176 } else if ((offset
+ size
) > upl
->size
) {
4178 return KERN_FAILURE
;
4180 if (object
!= shadow_object
)
4181 vm_object_lock(object
);
4182 vm_object_lock(shadow_object
);
4184 shadow_internal
= shadow_object
->internal
;
4186 if(upl
->flags
& UPL_INTERNAL
) {
4187 lite_list
= (wpl_array_t
)
4188 ((((uintptr_t)upl
) + sizeof(struct upl
))
4189 + ((upl
->size
/PAGE_SIZE
) * sizeof(upl_page_info_t
)));
4191 lite_list
= (wpl_array_t
)
4192 (((uintptr_t)upl
) + sizeof(struct upl
));
4195 entry
= offset
/PAGE_SIZE
;
4196 target_offset
= (vm_object_offset_t
)offset
;
4201 if(upl
->flags
& UPL_LITE
) {
4203 pg_num
= target_offset
/PAGE_SIZE
;
4204 if(lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
4205 lite_list
[pg_num
>>5] &= ~(1 << (pg_num
& 31));
4206 m
= vm_page_lookup(shadow_object
,
4207 target_offset
+ (upl
->offset
-
4208 shadow_object
->paging_offset
));
4211 if(object
->pageout
) {
4212 if ((t
= vm_page_lookup(object
, target_offset
))
4220 object
->shadow_offset
);
4222 if(m
!= VM_PAGE_NULL
)
4223 vm_object_paging_end(m
->object
);
4226 if(m
!= VM_PAGE_NULL
) {
4227 vm_page_lock_queues();
4229 boolean_t must_free
= TRUE
;
4231 /* COPYOUT = FALSE case */
4232 /* check for error conditions which must */
4233 /* be passed back to the pages customer */
4234 if(error
& UPL_ABORT_RESTART
) {
4237 vm_object_absent_release(m
->object
);
4238 m
->page_error
= KERN_MEMORY_ERROR
;
4241 } else if(error
& UPL_ABORT_UNAVAILABLE
) {
4245 } else if(error
& UPL_ABORT_ERROR
) {
4248 vm_object_absent_release(m
->object
);
4249 m
->page_error
= KERN_MEMORY_ERROR
;
4256 * If the page was already encrypted,
4257 * we don't really need to decrypt it
4258 * now. It will get decrypted later,
4259 * on demand, as soon as someone needs
4260 * to access its contents.
4263 m
->cleaning
= FALSE
;
4264 m
->overwriting
= FALSE
;
4265 PAGE_WAKEUP_DONE(m
);
4267 if (must_free
== TRUE
) {
4270 vm_page_activate(m
);
4272 vm_page_unlock_queues();
4274 target_offset
+= PAGE_SIZE_64
;
4275 xfer_size
-= PAGE_SIZE
;
4280 * Handle the trusted pager throttle.
4283 vm_pageout_throttle_up(m
);
4287 assert(m
->wire_count
== 1);
4291 m
->dump_cleaning
= FALSE
;
4292 m
->cleaning
= FALSE
;
4293 m
->overwriting
= FALSE
;
4295 vm_external_state_clr(
4296 m
->object
->existence_map
, m
->offset
);
4297 #endif /* MACH_PAGEMAP */
4298 if(error
& UPL_ABORT_DUMP_PAGES
) {
4300 pmap_disconnect(m
->phys_page
);
4302 PAGE_WAKEUP_DONE(m
);
4304 vm_page_unlock_queues();
4306 target_offset
+= PAGE_SIZE_64
;
4307 xfer_size
-= PAGE_SIZE
;
4311 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
4313 } else if (upl
->flags
& UPL_LITE
) {
4316 pg_num
= upl
->size
/PAGE_SIZE
;
4317 pg_num
= (pg_num
+ 31) >> 5;
4319 for(i
= 0; i
<pg_num
; i
++) {
4320 if(lite_list
[i
] != 0) {
4326 if(queue_empty(&upl
->map_object
->memq
)) {
4332 if(upl
->flags
& UPL_COMMIT_NOTIFY_EMPTY
) {
4335 if(object
== shadow_object
)
4336 vm_object_paging_end(shadow_object
);
4338 vm_object_unlock(shadow_object
);
4339 if (object
!= shadow_object
)
4340 vm_object_unlock(object
);
4344 return KERN_SUCCESS
;
4352 vm_object_t object
= NULL
;
4353 vm_object_t shadow_object
= NULL
;
4354 vm_object_offset_t offset
;
4355 vm_object_offset_t shadow_offset
;
4356 vm_object_offset_t target_offset
;
4358 wpl_array_t lite_list
;
4361 boolean_t shadow_internal
;
4363 if (upl
== UPL_NULL
)
4364 return KERN_INVALID_ARGUMENT
;
4366 if (upl
->flags
& UPL_IO_WIRE
) {
4368 return upl_commit_range(upl
,
4374 if(upl
->flags
& UPL_DEVICE_MEMORY
) {
4376 return KERN_SUCCESS
;
4379 object
= upl
->map_object
;
4381 if (object
== NULL
) {
4382 panic("upl_abort: upl object is not backed by an object");
4384 return KERN_INVALID_ARGUMENT
;
4387 if(object
->pageout
) {
4388 shadow_object
= object
->shadow
;
4389 shadow_offset
= object
->shadow_offset
;
4391 shadow_object
= object
;
4392 shadow_offset
= upl
->offset
- object
->paging_offset
;
4395 if(upl
->flags
& UPL_INTERNAL
) {
4396 lite_list
= (wpl_array_t
)
4397 ((((uintptr_t)upl
) + sizeof(struct upl
))
4398 + ((upl
->size
/PAGE_SIZE
) * sizeof(upl_page_info_t
)));
4400 lite_list
= (wpl_array_t
)
4401 (((uintptr_t)upl
) + sizeof(struct upl
));
4405 if (object
!= shadow_object
)
4406 vm_object_lock(object
);
4407 vm_object_lock(shadow_object
);
4409 shadow_internal
= shadow_object
->internal
;
4411 for(i
= 0; i
<(upl
->size
); i
+=PAGE_SIZE
, offset
+= PAGE_SIZE_64
) {
4413 target_offset
= offset
+ shadow_offset
;
4414 if(upl
->flags
& UPL_LITE
) {
4416 pg_num
= offset
/PAGE_SIZE
;
4417 if(lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
4418 lite_list
[pg_num
>>5] &= ~(1 << (pg_num
& 31));
4420 shadow_object
, target_offset
);
4423 if(object
->pageout
) {
4424 if ((t
= vm_page_lookup(object
, offset
)) != NULL
) {
4429 shadow_object
, target_offset
);
4431 if(m
!= VM_PAGE_NULL
)
4432 vm_object_paging_end(m
->object
);
4435 if(m
!= VM_PAGE_NULL
) {
4436 vm_page_lock_queues();
4438 boolean_t must_free
= TRUE
;
4440 /* COPYOUT = FALSE case */
4441 /* check for error conditions which must */
4442 /* be passed back to the pages customer */
4443 if(error
& UPL_ABORT_RESTART
) {
4446 vm_object_absent_release(m
->object
);
4447 m
->page_error
= KERN_MEMORY_ERROR
;
4450 } else if(error
& UPL_ABORT_UNAVAILABLE
) {
4454 } else if(error
& UPL_ABORT_ERROR
) {
4457 vm_object_absent_release(m
->object
);
4458 m
->page_error
= KERN_MEMORY_ERROR
;
4465 * If the page was already encrypted,
4466 * we don't really need to decrypt it
4467 * now. It will get decrypted later,
4468 * on demand, as soon as someone needs
4469 * to access its contents.
4472 m
->cleaning
= FALSE
;
4473 m
->overwriting
= FALSE
;
4474 PAGE_WAKEUP_DONE(m
);
4476 if (must_free
== TRUE
) {
4479 vm_page_activate(m
);
4481 vm_page_unlock_queues();
4485 * Handle the trusted pager throttle.
4488 vm_pageout_throttle_up(m
);
4492 assert(m
->wire_count
== 1);
4496 m
->dump_cleaning
= FALSE
;
4497 m
->cleaning
= FALSE
;
4498 m
->overwriting
= FALSE
;
4500 vm_external_state_clr(
4501 m
->object
->existence_map
, m
->offset
);
4502 #endif /* MACH_PAGEMAP */
4503 if(error
& UPL_ABORT_DUMP_PAGES
) {
4505 pmap_disconnect(m
->phys_page
);
4507 PAGE_WAKEUP_DONE(m
);
4509 vm_page_unlock_queues();
4513 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
4515 } else if (upl
->flags
& UPL_LITE
) {
4518 pg_num
= upl
->size
/PAGE_SIZE
;
4519 pg_num
= (pg_num
+ 31) >> 5;
4521 for(j
= 0; j
<pg_num
; j
++) {
4522 if(lite_list
[j
] != 0) {
4528 if(queue_empty(&upl
->map_object
->memq
)) {
4534 if(object
== shadow_object
)
4535 vm_object_paging_end(shadow_object
);
4537 vm_object_unlock(shadow_object
);
4538 if (object
!= shadow_object
)
4539 vm_object_unlock(object
);
4542 return KERN_SUCCESS
;
4545 /* an option on commit should be wire */
4549 upl_page_info_t
*page_list
,
4550 mach_msg_type_number_t count
)
4552 if (upl
== UPL_NULL
)
4553 return KERN_INVALID_ARGUMENT
;
4555 if(upl
->flags
& (UPL_LITE
| UPL_IO_WIRE
)) {
4557 return upl_commit_range(upl
, 0, upl
->size
, 0,
4558 page_list
, count
, &empty
);
4565 if (upl
->flags
& UPL_DEVICE_MEMORY
)
4568 if (upl
->flags
& UPL_ENCRYPTED
) {
4571 * This UPL was encrypted, but we don't need
4572 * to decrypt here. We'll decrypt each page
4573 * later, on demand, as soon as someone needs
4574 * to access the page's contents.
4578 if ((upl
->flags
& UPL_CLEAR_DIRTY
) ||
4579 (upl
->flags
& UPL_PAGE_SYNC_DONE
) || page_list
) {
4580 vm_object_t shadow_object
= upl
->map_object
->shadow
;
4581 vm_object_t object
= upl
->map_object
;
4582 vm_object_offset_t target_offset
;
4583 upl_size_t xfer_end
;
4589 if (object
!= shadow_object
)
4590 vm_object_lock(object
);
4591 vm_object_lock(shadow_object
);
4594 target_offset
= object
->shadow_offset
;
4595 xfer_end
= upl
->size
+ object
->shadow_offset
;
4597 while(target_offset
< xfer_end
) {
4599 if ((t
= vm_page_lookup(object
,
4600 target_offset
- object
->shadow_offset
))
4602 target_offset
+= PAGE_SIZE_64
;
4607 m
= vm_page_lookup(shadow_object
, target_offset
);
4608 if(m
!= VM_PAGE_NULL
) {
4611 * If this page was encrypted, we
4612 * don't need to decrypt it here.
4613 * We'll decrypt it later, on demand,
4614 * as soon as someone needs to access
4618 if (upl
->flags
& UPL_CLEAR_DIRTY
) {
4619 pmap_clear_modify(m
->phys_page
);
4622 /* It is a part of the semantic of */
4623 /* COPYOUT_FROM UPLs that a commit */
4624 /* implies cache sync between the */
4625 /* vm page and the backing store */
4626 /* this can be used to strip the */
4627 /* precious bit as well as clean */
4628 if (upl
->flags
& UPL_PAGE_SYNC_DONE
)
4629 m
->precious
= FALSE
;
4632 p
= &(page_list
[entry
]);
4633 if(page_list
[entry
].phys_addr
&&
4634 p
->pageout
&& !m
->pageout
) {
4635 vm_page_lock_queues();
4639 vm_page_unlock_queues();
4640 } else if (page_list
[entry
].phys_addr
&&
4641 !p
->pageout
&& m
->pageout
&&
4642 !m
->dump_cleaning
) {
4643 vm_page_lock_queues();
4646 m
->overwriting
= FALSE
;
4648 PAGE_WAKEUP_DONE(m
);
4649 vm_page_unlock_queues();
4651 page_list
[entry
].phys_addr
= 0;
4654 target_offset
+= PAGE_SIZE_64
;
4657 vm_object_unlock(shadow_object
);
4658 if (object
!= shadow_object
)
4659 vm_object_unlock(object
);
4662 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
4663 vm_object_lock(upl
->map_object
->shadow
);
4664 if(upl
->map_object
== upl
->map_object
->shadow
)
4665 vm_object_paging_end(upl
->map_object
->shadow
);
4666 vm_object_unlock(upl
->map_object
->shadow
);
4669 return KERN_SUCCESS
;
4675 vm_object_iopl_request(
4677 vm_object_offset_t offset
,
4680 upl_page_info_array_t user_page_list
,
4681 unsigned int *page_list_count
,
4685 vm_object_offset_t dst_offset
= offset
;
4686 upl_size_t xfer_size
= size
;
4689 wpl_array_t lite_list
= NULL
;
4690 int page_field_size
;
4691 int delayed_unlock
= 0;
4692 int no_zero_fill
= FALSE
;
4693 vm_page_t alias_page
= NULL
;
4698 if (cntrl_flags
& ~UPL_VALID_FLAGS
) {
4700 * For forward compatibility's sake,
4701 * reject any unknown flag.
4703 return KERN_INVALID_VALUE
;
4705 if (vm_lopage_poolsize
== 0)
4706 cntrl_flags
&= ~UPL_NEED_32BIT_ADDR
;
4708 if (cntrl_flags
& UPL_NEED_32BIT_ADDR
) {
4709 if ( (cntrl_flags
& (UPL_SET_IO_WIRE
| UPL_SET_LITE
)) != (UPL_SET_IO_WIRE
| UPL_SET_LITE
))
4710 return KERN_INVALID_VALUE
;
4712 if (object
->phys_contiguous
) {
4713 if ((offset
+ object
->shadow_offset
) >= (vm_object_offset_t
)max_valid_dma_address
)
4714 return KERN_INVALID_ADDRESS
;
4716 if (((offset
+ object
->shadow_offset
) + size
) >= (vm_object_offset_t
)max_valid_dma_address
)
4717 return KERN_INVALID_ADDRESS
;
4721 if (cntrl_flags
& UPL_ENCRYPT
) {
4724 * The paging path doesn't use this interface,
4725 * so we don't support the UPL_ENCRYPT flag
4726 * here. We won't encrypt the pages.
4728 assert(! (cntrl_flags
& UPL_ENCRYPT
));
4731 if (cntrl_flags
& UPL_NOZEROFILL
)
4732 no_zero_fill
= TRUE
;
4734 if (cntrl_flags
& UPL_COPYOUT_FROM
)
4735 prot
= VM_PROT_READ
;
4737 prot
= VM_PROT_READ
| VM_PROT_WRITE
;
4739 if(((size
/page_size
) > MAX_UPL_TRANSFER
) && !object
->phys_contiguous
) {
4740 size
= MAX_UPL_TRANSFER
* page_size
;
4743 if(cntrl_flags
& UPL_SET_INTERNAL
)
4744 if(page_list_count
!= NULL
)
4745 *page_list_count
= MAX_UPL_TRANSFER
;
4746 if(((cntrl_flags
& UPL_SET_INTERNAL
) && !(object
->phys_contiguous
)) &&
4747 ((page_list_count
!= NULL
) && (*page_list_count
!= 0)
4748 && *page_list_count
< (size
/page_size
)))
4749 return KERN_INVALID_ARGUMENT
;
4751 if((!object
->internal
) && (object
->paging_offset
!= 0))
4752 panic("vm_object_upl_request: external object with non-zero paging offset\n");
4754 if(object
->phys_contiguous
) {
4755 /* No paging operations are possible against this memory */
4756 /* and so no need for map object, ever */
4757 cntrl_flags
|= UPL_SET_LITE
;
4761 if(cntrl_flags
& UPL_SET_INTERNAL
) {
4762 if(cntrl_flags
& UPL_SET_LITE
) {
4764 UPL_CREATE_INTERNAL
| UPL_CREATE_LITE
,
4766 user_page_list
= (upl_page_info_t
*)
4767 (((uintptr_t)upl
) + sizeof(struct upl
));
4768 lite_list
= (wpl_array_t
)
4769 (((uintptr_t)user_page_list
) +
4771 sizeof(upl_page_info_t
)));
4772 page_field_size
= ((size
/PAGE_SIZE
) + 7) >> 3;
4774 (page_field_size
+ 3) & 0xFFFFFFFC;
4775 bzero((char *)lite_list
, page_field_size
);
4777 UPL_LITE
| UPL_INTERNAL
| UPL_IO_WIRE
;
4779 upl
= upl_create(UPL_CREATE_INTERNAL
, size
);
4780 user_page_list
= (upl_page_info_t
*)
4782 + sizeof(struct upl
));
4783 upl
->flags
= UPL_INTERNAL
| UPL_IO_WIRE
;
4786 if(cntrl_flags
& UPL_SET_LITE
) {
4787 upl
= upl_create(UPL_CREATE_LITE
, size
);
4788 lite_list
= (wpl_array_t
)
4789 (((uintptr_t)upl
) + sizeof(struct upl
));
4790 page_field_size
= ((size
/PAGE_SIZE
) + 7) >> 3;
4792 (page_field_size
+ 3) & 0xFFFFFFFC;
4793 bzero((char *)lite_list
, page_field_size
);
4794 upl
->flags
= UPL_LITE
| UPL_IO_WIRE
;
4796 upl
= upl_create(UPL_CREATE_EXTERNAL
, size
);
4797 upl
->flags
= UPL_IO_WIRE
;
4801 if(object
->phys_contiguous
) {
4802 upl
->map_object
= object
;
4803 /* don't need any shadow mappings for this one */
4804 /* since it is already I/O memory */
4805 upl
->flags
|= UPL_DEVICE_MEMORY
;
4807 vm_object_lock(object
);
4808 vm_object_paging_begin(object
);
4809 vm_object_unlock(object
);
4811 /* paging in progress also protects the paging_offset */
4812 upl
->offset
= offset
+ object
->paging_offset
;
4815 if(user_page_list
) {
4816 user_page_list
[0].phys_addr
=
4817 (offset
+ object
->shadow_offset
)>>PAGE_SHIFT
;
4818 user_page_list
[0].device
= TRUE
;
4820 upl
->highest_page
= (offset
+ object
->shadow_offset
+ size
- 1)>>PAGE_SHIFT
;
4822 if(page_list_count
!= NULL
) {
4823 if (upl
->flags
& UPL_INTERNAL
) {
4824 *page_list_count
= 0;
4826 *page_list_count
= 1;
4829 return KERN_SUCCESS
;
4832 user_page_list
[0].device
= FALSE
;
4834 if(cntrl_flags
& UPL_SET_LITE
) {
4835 upl
->map_object
= object
;
4837 upl
->map_object
= vm_object_allocate(size
);
4838 vm_object_lock(upl
->map_object
);
4839 upl
->map_object
->shadow
= object
;
4840 upl
->map_object
->pageout
= TRUE
;
4841 upl
->map_object
->can_persist
= FALSE
;
4842 upl
->map_object
->copy_strategy
=
4843 MEMORY_OBJECT_COPY_NONE
;
4844 upl
->map_object
->shadow_offset
= offset
;
4845 upl
->map_object
->wimg_bits
= object
->wimg_bits
;
4846 vm_object_unlock(upl
->map_object
);
4849 vm_object_lock(object
);
4850 vm_object_paging_begin(object
);
4852 if (!object
->phys_contiguous
) {
4853 /* Protect user space from future COW operations */
4854 object
->true_share
= TRUE
;
4855 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
)
4856 object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
4859 /* we can lock the upl offset now that paging_in_progress is set */
4862 upl
->offset
= offset
+ object
->paging_offset
;
4865 queue_enter(&object
->uplq
, upl
, upl_t
, uplq
);
4866 #endif /* UPL_DEBUG */
4869 if (cntrl_flags
& UPL_BLOCK_ACCESS
) {
4871 * The user requested that access to the pages in this URL
4872 * be blocked until the UPL is commited or aborted.
4874 upl
->flags
|= UPL_ACCESS_BLOCKED
;
4879 if((alias_page
== NULL
) && !(cntrl_flags
& UPL_SET_LITE
)) {
4880 if (delayed_unlock
) {
4882 vm_page_unlock_queues();
4884 vm_object_unlock(object
);
4885 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
4886 vm_object_lock(object
);
4888 dst_page
= vm_page_lookup(object
, dst_offset
);
4892 * If the page is encrypted, we need to decrypt it,
4893 * so force a soft page fault.
4895 if ((dst_page
== VM_PAGE_NULL
) || (dst_page
->busy
) ||
4896 (dst_page
->encrypted
) ||
4897 (dst_page
->unusual
&& (dst_page
->error
||
4898 dst_page
->restart
||
4900 dst_page
->fictitious
||
4901 (prot
& dst_page
->page_lock
)))) {
4902 vm_fault_return_t result
;
4905 kern_return_t error_code
;
4908 vm_object_offset_t lo_offset
= offset
;
4909 vm_object_offset_t hi_offset
= offset
+ size
;
4912 if (delayed_unlock
) {
4914 vm_page_unlock_queues();
4917 if(cntrl_flags
& UPL_SET_INTERRUPTIBLE
) {
4918 interruptible
= THREAD_ABORTSAFE
;
4920 interruptible
= THREAD_UNINT
;
4923 result
= vm_fault_page(object
, dst_offset
,
4924 prot
| VM_PROT_WRITE
, FALSE
,
4926 lo_offset
, hi_offset
,
4927 VM_BEHAVIOR_SEQUENTIAL
,
4928 &prot
, &dst_page
, &top_page
,
4930 &error_code
, no_zero_fill
, FALSE
, NULL
, 0);
4933 case VM_FAULT_SUCCESS
:
4935 PAGE_WAKEUP_DONE(dst_page
);
4938 * Release paging references and
4939 * top-level placeholder page, if any.
4942 if(top_page
!= VM_PAGE_NULL
) {
4943 vm_object_t local_object
;
4947 != dst_page
->object
) {
4950 VM_PAGE_FREE(top_page
);
4951 vm_object_paging_end(
4956 VM_PAGE_FREE(top_page
);
4957 vm_object_paging_end(
4965 case VM_FAULT_RETRY
:
4966 vm_object_lock(object
);
4967 vm_object_paging_begin(object
);
4970 case VM_FAULT_FICTITIOUS_SHORTAGE
:
4971 vm_page_more_fictitious();
4972 vm_object_lock(object
);
4973 vm_object_paging_begin(object
);
4976 case VM_FAULT_MEMORY_SHORTAGE
:
4977 if (vm_page_wait(interruptible
)) {
4978 vm_object_lock(object
);
4979 vm_object_paging_begin(object
);
4984 case VM_FAULT_INTERRUPTED
:
4985 error_code
= MACH_SEND_INTERRUPTED
;
4986 case VM_FAULT_MEMORY_ERROR
:
4987 ret
= (error_code
? error_code
:
4989 vm_object_lock(object
);
4993 } while ((result
!= VM_FAULT_SUCCESS
)
4994 || (result
== VM_FAULT_INTERRUPTED
));
4997 if ( (cntrl_flags
& UPL_NEED_32BIT_ADDR
) &&
4998 dst_page
->phys_page
>= (max_valid_dma_address
>> PAGE_SHIFT
) ) {
5003 * support devices that can't DMA above 32 bits
5004 * by substituting pages from a pool of low address
5005 * memory for any pages we find above the 4G mark
5006 * can't substitute if the page is already wired because
5007 * we don't know whether that physical address has been
5008 * handed out to some other 64 bit capable DMA device to use
5010 if (dst_page
->wire_count
) {
5011 ret
= KERN_PROTECTION_FAILURE
;
5014 if (delayed_unlock
) {
5016 vm_page_unlock_queues();
5018 low_page
= vm_page_grablo();
5020 if (low_page
== VM_PAGE_NULL
) {
5021 ret
= KERN_RESOURCE_SHORTAGE
;
5025 * from here until the vm_page_replace completes
5026 * we musn't drop the object lock... we don't
5027 * want anyone refaulting this page in and using
5028 * it after we disconnect it... we want the fault
5029 * to find the new page being substituted.
5031 refmod
= pmap_disconnect(dst_page
->phys_page
);
5033 vm_page_copy(dst_page
, low_page
);
5035 low_page
->reference
= dst_page
->reference
;
5036 low_page
->dirty
= dst_page
->dirty
;
5038 if (refmod
& VM_MEM_REFERENCED
)
5039 low_page
->reference
= TRUE
;
5040 if (refmod
& VM_MEM_MODIFIED
)
5041 low_page
->dirty
= TRUE
;
5043 vm_page_lock_queues();
5044 vm_page_replace(low_page
, object
, dst_offset
);
5046 * keep the queue lock since we're going to
5047 * need it immediately
5051 dst_page
= low_page
;
5053 * vm_page_grablo returned the page marked
5054 * BUSY... we don't need a PAGE_WAKEUP_DONE
5055 * here, because we've never dropped the object lock
5057 dst_page
->busy
= FALSE
;
5059 if (delayed_unlock
== 0)
5060 vm_page_lock_queues();
5061 vm_page_wire(dst_page
);
5063 if (cntrl_flags
& UPL_BLOCK_ACCESS
) {
5065 * Mark the page "busy" to block any future page fault
5066 * on this page. We'll also remove the mapping
5067 * of all these pages before leaving this routine.
5069 assert(!dst_page
->fictitious
);
5070 dst_page
->busy
= TRUE
;
5074 if (cntrl_flags
& UPL_SET_LITE
) {
5076 pg_num
= (dst_offset
-offset
)/PAGE_SIZE
;
5077 lite_list
[pg_num
>>5] |= 1 << (pg_num
& 31);
5080 * Convert the fictitious page to a
5081 * private shadow of the real page.
5083 assert(alias_page
->fictitious
);
5084 alias_page
->fictitious
= FALSE
;
5085 alias_page
->private = TRUE
;
5086 alias_page
->pageout
= TRUE
;
5087 alias_page
->phys_page
= dst_page
->phys_page
;
5088 vm_page_wire(alias_page
);
5090 vm_page_insert(alias_page
,
5091 upl
->map_object
, size
- xfer_size
);
5092 assert(!alias_page
->wanted
);
5093 alias_page
->busy
= FALSE
;
5094 alias_page
->absent
= FALSE
;
5097 /* expect the page to be used */
5098 dst_page
->reference
= TRUE
;
5100 if (!(cntrl_flags
& UPL_COPYOUT_FROM
))
5101 dst_page
->dirty
= TRUE
;
5104 if (dst_page
->phys_page
> upl
->highest_page
)
5105 upl
->highest_page
= dst_page
->phys_page
;
5107 if (user_page_list
) {
5108 user_page_list
[entry
].phys_addr
5109 = dst_page
->phys_page
;
5110 user_page_list
[entry
].dirty
=
5112 user_page_list
[entry
].pageout
=
5114 user_page_list
[entry
].absent
=
5116 user_page_list
[entry
].precious
=
5120 if (delayed_unlock
++ > DELAYED_UNLOCK_LIMIT
) {
5122 vm_page_unlock_queues();
5125 dst_offset
+= PAGE_SIZE_64
;
5126 xfer_size
-= PAGE_SIZE
;
5129 vm_page_unlock_queues();
5131 if (upl
->flags
& UPL_INTERNAL
) {
5132 if(page_list_count
!= NULL
)
5133 *page_list_count
= 0;
5134 } else if (*page_list_count
> entry
) {
5135 if(page_list_count
!= NULL
)
5136 *page_list_count
= entry
;
5139 if (alias_page
!= NULL
) {
5140 vm_page_lock_queues();
5141 vm_page_free(alias_page
);
5142 vm_page_unlock_queues();
5145 vm_object_unlock(object
);
5147 if (cntrl_flags
& UPL_BLOCK_ACCESS
) {
5149 * We've marked all the pages "busy" so that future
5150 * page faults will block.
5151 * Now remove the mapping for these pages, so that they
5152 * can't be accessed without causing a page fault.
5154 vm_object_pmap_protect(object
, offset
, (vm_object_size_t
)size
,
5155 PMAP_NULL
, 0, VM_PROT_NONE
);
5158 return KERN_SUCCESS
;
5163 vm_page_unlock_queues();
5165 for (; offset
< dst_offset
; offset
+= PAGE_SIZE
) {
5166 dst_page
= vm_page_lookup(object
, offset
);
5168 if (dst_page
== VM_PAGE_NULL
)
5169 panic("vm_object_iopl_request: Wired pages missing. \n");
5170 vm_page_lock_queues();
5171 vm_page_unwire(dst_page
);
5172 vm_page_unlock_queues();
5173 VM_STAT(reactivations
++);
5175 vm_object_paging_end(object
);
5176 vm_object_unlock(object
);
5188 kern_return_t retval
;
5189 boolean_t upls_locked
;
5190 vm_object_t object1
, object2
;
5192 if (upl1
== UPL_NULL
|| upl2
== UPL_NULL
|| upl1
== upl2
) {
5193 return KERN_INVALID_ARGUMENT
;
5196 upls_locked
= FALSE
;
5199 * Since we need to lock both UPLs at the same time,
5200 * avoid deadlocks by always taking locks in the same order.
5209 upls_locked
= TRUE
; /* the UPLs will need to be unlocked */
5211 object1
= upl1
->map_object
;
5212 object2
= upl2
->map_object
;
5214 if (upl1
->offset
!= 0 || upl2
->offset
!= 0 ||
5215 upl1
->size
!= upl2
->size
) {
5217 * We deal only with full objects, not subsets.
5218 * That's because we exchange the entire backing store info
5219 * for the objects: pager, resident pages, etc... We can't do
5222 retval
= KERN_INVALID_VALUE
;
5227 * Tranpose the VM objects' backing store.
5229 retval
= vm_object_transpose(object1
, object2
,
5230 (vm_object_size_t
) upl1
->size
);
5232 if (retval
== KERN_SUCCESS
) {
5234 * Make each UPL point to the correct VM object, i.e. the
5235 * object holding the pages that the UPL refers to...
5237 upl1
->map_object
= object2
;
5238 upl2
->map_object
= object1
;
5248 upls_locked
= FALSE
;
5257 * Rationale: the user might have some encrypted data on disk (via
5258 * FileVault or any other mechanism). That data is then decrypted in
5259 * memory, which is safe as long as the machine is secure. But that
5260 * decrypted data in memory could be paged out to disk by the default
5261 * pager. The data would then be stored on disk in clear (not encrypted)
5262 * and it could be accessed by anyone who gets physical access to the
5263 * disk (if the laptop or the disk gets stolen for example). This weakens
5264 * the security offered by FileVault.
5266 * Solution: the default pager will optionally request that all the
5267 * pages it gathers for pageout be encrypted, via the UPL interfaces,
5268 * before it sends this UPL to disk via the vnode_pageout() path.
5272 * To avoid disrupting the VM LRU algorithms, we want to keep the
5273 * clean-in-place mechanisms, which allow us to send some extra pages to
5274 * swap (clustering) without actually removing them from the user's
5275 * address space. We don't want the user to unknowingly access encrypted
5276 * data, so we have to actually remove the encrypted pages from the page
5277 * table. When the user accesses the data, the hardware will fail to
5278 * locate the virtual page in its page table and will trigger a page
5279 * fault. We can then decrypt the page and enter it in the page table
5280 * again. Whenever we allow the user to access the contents of a page,
5281 * we have to make sure it's not encrypted.
5287 * Reserve of virtual addresses in the kernel address space.
5288 * We need to map the physical pages in the kernel, so that we
5289 * can call the encryption/decryption routines with a kernel
5290 * virtual address. We keep this pool of pre-allocated kernel
5291 * virtual addresses so that we don't have to scan the kernel's
5292 * virtaul address space each time we need to encrypt or decrypt
5294 * It would be nice to be able to encrypt and decrypt in physical
5295 * mode but that might not always be more efficient...
5297 decl_simple_lock_data(,vm_paging_lock
)
5298 #define VM_PAGING_NUM_PAGES 64
5299 vm_map_offset_t vm_paging_base_address
= 0;
5300 boolean_t vm_paging_page_inuse
[VM_PAGING_NUM_PAGES
] = { FALSE
, };
5301 int vm_paging_max_index
= 0;
5302 unsigned long vm_paging_no_kernel_page
= 0;
5303 unsigned long vm_paging_objects_mapped
= 0;
5304 unsigned long vm_paging_pages_mapped
= 0;
5305 unsigned long vm_paging_objects_mapped_slow
= 0;
5306 unsigned long vm_paging_pages_mapped_slow
= 0;
5310 * vm_paging_map_object:
5311 * Maps part of a VM object's pages in the kernel
5312 * virtual address space, using the pre-allocated
5313 * kernel virtual addresses, if possible.
5315 * The VM object is locked. This lock will get
5316 * dropped and re-acquired though.
5319 vm_paging_map_object(
5320 vm_map_offset_t
*address
,
5323 vm_object_offset_t offset
,
5324 vm_map_size_t
*size
)
5327 vm_map_offset_t page_map_offset
;
5328 vm_map_size_t map_size
;
5329 vm_object_offset_t object_offset
;
5332 vm_map_entry_t map_entry
;
5333 #endif /* __ppc__ */
5337 if (page
!= VM_PAGE_NULL
&& *size
== PAGE_SIZE
) {
5339 * Optimization for the PowerPC.
5340 * Use one of the pre-allocated kernel virtual addresses
5341 * and just enter the VM page in the kernel address space
5342 * at that virtual address.
5344 vm_object_unlock(object
);
5345 simple_lock(&vm_paging_lock
);
5347 if (vm_paging_base_address
== 0) {
5349 * Initialize our pool of pre-allocated kernel
5350 * virtual addresses.
5352 simple_unlock(&vm_paging_lock
);
5353 page_map_offset
= 0;
5354 kr
= vm_map_find_space(kernel_map
,
5356 VM_PAGING_NUM_PAGES
* PAGE_SIZE
,
5360 if (kr
!= KERN_SUCCESS
) {
5361 panic("vm_paging_map_object: "
5362 "kernel_map full\n");
5364 map_entry
->object
.vm_object
= kernel_object
;
5366 page_map_offset
- VM_MIN_KERNEL_ADDRESS
;
5367 vm_object_reference(kernel_object
);
5368 vm_map_unlock(kernel_map
);
5370 simple_lock(&vm_paging_lock
);
5371 if (vm_paging_base_address
!= 0) {
5372 /* someone raced us and won: undo */
5373 simple_unlock(&vm_paging_lock
);
5374 kr
= vm_map_remove(kernel_map
,
5377 (VM_PAGING_NUM_PAGES
5380 assert(kr
== KERN_SUCCESS
);
5381 simple_lock(&vm_paging_lock
);
5383 vm_paging_base_address
= page_map_offset
;
5388 * Try and find an available kernel virtual address
5389 * from our pre-allocated pool.
5391 page_map_offset
= 0;
5392 for (i
= 0; i
< VM_PAGING_NUM_PAGES
; i
++) {
5393 if (vm_paging_page_inuse
[i
] == FALSE
) {
5394 page_map_offset
= vm_paging_base_address
+
5400 if (page_map_offset
!= 0) {
5402 * We found a kernel virtual address;
5403 * map the physical page to that virtual address.
5405 if (i
> vm_paging_max_index
) {
5406 vm_paging_max_index
= i
;
5408 vm_paging_page_inuse
[i
] = TRUE
;
5409 simple_unlock(&vm_paging_lock
);
5410 pmap_map_block(kernel_pmap
,
5413 1, /* Size is number of 4k pages */
5415 ((int) page
->object
->wimg_bits
&
5418 vm_paging_objects_mapped
++;
5419 vm_paging_pages_mapped
++;
5420 *address
= page_map_offset
;
5421 vm_object_lock(object
);
5423 /* all done and mapped, ready to use ! */
5424 return KERN_SUCCESS
;
5428 * We ran out of pre-allocated kernel virtual
5429 * addresses. Just map the page in the kernel
5430 * the slow and regular way.
5432 vm_paging_no_kernel_page
++;
5433 simple_unlock(&vm_paging_lock
);
5434 vm_object_lock(object
);
5436 #endif /* __ppc__ */
5438 object_offset
= vm_object_trunc_page(offset
);
5439 map_size
= vm_map_round_page(*size
);
5442 * Try and map the required range of the object
5446 /* don't go beyond the object's end... */
5447 if (object_offset
>= object
->size
) {
5449 } else if (map_size
> object
->size
- offset
) {
5450 map_size
= object
->size
- offset
;
5453 vm_object_reference_locked(object
); /* for the map entry */
5454 vm_object_unlock(object
);
5456 kr
= vm_map_enter(kernel_map
,
5467 if (kr
!= KERN_SUCCESS
) {
5470 vm_object_deallocate(object
); /* for the map entry */
5477 * Enter the mapped pages in the page table now.
5479 vm_object_lock(object
);
5480 for (page_map_offset
= 0;
5482 map_size
-= PAGE_SIZE_64
, page_map_offset
+= PAGE_SIZE_64
) {
5483 unsigned int cache_attr
;
5485 page
= vm_page_lookup(object
, offset
+ page_map_offset
);
5486 if (page
== VM_PAGE_NULL
) {
5487 panic("vm_paging_map_object: no page !?");
5489 if (page
->no_isync
== TRUE
) {
5490 pmap_sync_page_data_phys(page
->phys_page
);
5492 cache_attr
= ((unsigned int) object
->wimg_bits
) & VM_WIMG_MASK
;
5494 PMAP_ENTER(kernel_pmap
,
5495 *address
+ page_map_offset
,
5502 vm_paging_objects_mapped_slow
++;
5503 vm_paging_pages_mapped_slow
+= map_size
/ PAGE_SIZE_64
;
5505 return KERN_SUCCESS
;
5510 * vm_paging_unmap_object:
5511 * Unmaps part of a VM object's pages from the kernel
5512 * virtual address space.
5514 * The VM object is locked. This lock will get
5515 * dropped and re-acquired though.
5518 vm_paging_unmap_object(
5520 vm_map_offset_t start
,
5521 vm_map_offset_t end
)
5526 #endif /* __ppc__ */
5528 if ((vm_paging_base_address
== 0) &&
5529 ((start
< vm_paging_base_address
) ||
5530 (end
> (vm_paging_base_address
5531 + (VM_PAGING_NUM_PAGES
* PAGE_SIZE
))))) {
5533 * We didn't use our pre-allocated pool of
5534 * kernel virtual address. Deallocate the
5537 if (object
!= VM_OBJECT_NULL
) {
5538 vm_object_unlock(object
);
5540 kr
= vm_map_remove(kernel_map
, start
, end
, VM_MAP_NO_FLAGS
);
5541 if (object
!= VM_OBJECT_NULL
) {
5542 vm_object_lock(object
);
5544 assert(kr
== KERN_SUCCESS
);
5547 * We used a kernel virtual address from our
5548 * pre-allocated pool. Put it back in the pool
5552 assert(end
- start
== PAGE_SIZE
);
5553 i
= (start
- vm_paging_base_address
) >> PAGE_SHIFT
;
5555 /* undo the pmap mapping */
5556 mapping_remove(kernel_pmap
, start
);
5558 simple_lock(&vm_paging_lock
);
5559 vm_paging_page_inuse
[i
] = FALSE
;
5560 simple_unlock(&vm_paging_lock
);
5561 #endif /* __ppc__ */
5567 * "iv" is the "initial vector". Ideally, we want to
5568 * have a different one for each page we encrypt, so that
5569 * crackers can't find encryption patterns too easily.
5571 #define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
5572 boolean_t swap_crypt_ctx_initialized
= FALSE
;
5573 aes_32t swap_crypt_key
[8]; /* big enough for a 256 key */
5574 aes_ctx swap_crypt_ctx
;
5575 const unsigned char swap_crypt_null_iv
[AES_BLOCK_SIZE
] = {0xa, };
5578 boolean_t swap_crypt_ctx_tested
= FALSE
;
5579 unsigned char swap_crypt_test_page_ref
[4096] __attribute__((aligned(4096)));
5580 unsigned char swap_crypt_test_page_encrypt
[4096] __attribute__((aligned(4096)));
5581 unsigned char swap_crypt_test_page_decrypt
[4096] __attribute__((aligned(4096)));
5584 extern u_long
random(void);
5587 * Initialize the encryption context: key and key size.
5589 void swap_crypt_ctx_initialize(void); /* forward */
5591 swap_crypt_ctx_initialize(void)
5596 * No need for locking to protect swap_crypt_ctx_initialized
5597 * because the first use of encryption will come from the
5598 * pageout thread (we won't pagein before there's been a pageout)
5599 * and there's only one pageout thread.
5601 if (swap_crypt_ctx_initialized
== FALSE
) {
5603 i
< (sizeof (swap_crypt_key
) /
5604 sizeof (swap_crypt_key
[0]));
5606 swap_crypt_key
[i
] = random();
5608 aes_encrypt_key((const unsigned char *) swap_crypt_key
,
5609 SWAP_CRYPT_AES_KEY_SIZE
,
5610 &swap_crypt_ctx
.encrypt
);
5611 aes_decrypt_key((const unsigned char *) swap_crypt_key
,
5612 SWAP_CRYPT_AES_KEY_SIZE
,
5613 &swap_crypt_ctx
.decrypt
);
5614 swap_crypt_ctx_initialized
= TRUE
;
5619 * Validate the encryption algorithms.
5621 if (swap_crypt_ctx_tested
== FALSE
) {
5623 for (i
= 0; i
< 4096; i
++) {
5624 swap_crypt_test_page_ref
[i
] = (char) i
;
5627 aes_encrypt_cbc(swap_crypt_test_page_ref
,
5629 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5630 swap_crypt_test_page_encrypt
,
5631 &swap_crypt_ctx
.encrypt
);
5633 aes_decrypt_cbc(swap_crypt_test_page_encrypt
,
5635 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5636 swap_crypt_test_page_decrypt
,
5637 &swap_crypt_ctx
.decrypt
);
5638 /* compare result with original */
5639 for (i
= 0; i
< 4096; i
++) {
5640 if (swap_crypt_test_page_decrypt
[i
] !=
5641 swap_crypt_test_page_ref
[i
]) {
5642 panic("encryption test failed");
5647 aes_encrypt_cbc(swap_crypt_test_page_decrypt
,
5649 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5650 swap_crypt_test_page_decrypt
,
5651 &swap_crypt_ctx
.encrypt
);
5652 /* decrypt in place */
5653 aes_decrypt_cbc(swap_crypt_test_page_decrypt
,
5655 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5656 swap_crypt_test_page_decrypt
,
5657 &swap_crypt_ctx
.decrypt
);
5658 for (i
= 0; i
< 4096; i
++) {
5659 if (swap_crypt_test_page_decrypt
[i
] !=
5660 swap_crypt_test_page_ref
[i
]) {
5661 panic("in place encryption test failed");
5665 swap_crypt_ctx_tested
= TRUE
;
5673 * Encrypt the given page, for secure paging.
5674 * The page might already be mapped at kernel virtual
5675 * address "kernel_mapping_offset". Otherwise, we need
5679 * The page's object is locked, but this lock will be released
5681 * The page is busy and not accessible by users (not entered in any pmap).
5686 vm_map_offset_t kernel_mapping_offset
)
5688 int clear_refmod
= 0;
5690 boolean_t page_was_referenced
;
5691 boolean_t page_was_modified
;
5692 vm_map_size_t kernel_mapping_size
;
5693 vm_offset_t kernel_vaddr
;
5695 unsigned char aes_iv
[AES_BLOCK_SIZE
];
5697 memory_object_t pager_object
;
5698 vm_object_offset_t paging_offset
;
5702 if (! vm_pages_encrypted
) {
5703 vm_pages_encrypted
= TRUE
;
5707 assert(page
->dirty
|| page
->precious
);
5709 if (page
->encrypted
) {
5711 * Already encrypted: no need to do it again.
5713 vm_page_encrypt_already_encrypted_counter
++;
5716 ASSERT_PAGE_DECRYPTED(page
);
5719 * Gather the "reference" and "modified" status of the page.
5720 * We'll restore these values after the encryption, so that
5721 * the encryption is transparent to the rest of the system
5722 * and doesn't impact the VM's LRU logic.
5724 page_was_referenced
=
5725 (page
->reference
|| pmap_is_referenced(page
->phys_page
));
5727 (page
->dirty
|| pmap_is_modified(page
->phys_page
));
5729 if (kernel_mapping_offset
== 0) {
5731 * The page hasn't already been mapped in kernel space
5732 * by the caller. Map it now, so that we can access
5733 * its contents and encrypt them.
5735 kernel_mapping_size
= PAGE_SIZE
;
5736 kr
= vm_paging_map_object(&kernel_mapping_offset
,
5740 &kernel_mapping_size
);
5741 if (kr
!= KERN_SUCCESS
) {
5742 panic("vm_page_encrypt: "
5743 "could not map page in kernel: 0x%x\n",
5747 kernel_mapping_size
= 0;
5749 kernel_vaddr
= CAST_DOWN(vm_offset_t
, kernel_mapping_offset
);
5751 if (swap_crypt_ctx_initialized
== FALSE
) {
5752 swap_crypt_ctx_initialize();
5754 assert(swap_crypt_ctx_initialized
);
5757 * Prepare an "initial vector" for the encryption.
5758 * We use the "pager" and the "paging_offset" for that
5759 * page to obfuscate the encrypted data a bit more and
5760 * prevent crackers from finding patterns that they could
5761 * use to break the key.
5763 bzero(&encrypt_iv
.aes_iv
[0], sizeof (encrypt_iv
.aes_iv
));
5764 encrypt_iv
.vm
.pager_object
= page
->object
->pager
;
5765 encrypt_iv
.vm
.paging_offset
=
5766 page
->object
->paging_offset
+ page
->offset
;
5768 vm_object_unlock(page
->object
);
5770 /* encrypt the "initial vector" */
5771 aes_encrypt_cbc((const unsigned char *) &encrypt_iv
.aes_iv
[0],
5774 &encrypt_iv
.aes_iv
[0],
5775 &swap_crypt_ctx
.encrypt
);
5780 aes_encrypt_cbc((const unsigned char *) kernel_vaddr
,
5781 &encrypt_iv
.aes_iv
[0],
5782 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5783 (unsigned char *) kernel_vaddr
,
5784 &swap_crypt_ctx
.encrypt
);
5786 vm_page_encrypt_counter
++;
5788 vm_object_lock(page
->object
);
5791 * Unmap the page from the kernel's address space,
5792 * if we had to map it ourselves. Otherwise, let
5793 * the caller undo the mapping if needed.
5795 if (kernel_mapping_size
!= 0) {
5796 vm_paging_unmap_object(page
->object
,
5797 kernel_mapping_offset
,
5798 kernel_mapping_offset
+ kernel_mapping_size
);
5802 * Restore the "reference" and "modified" bits.
5803 * This should clean up any impact the encryption had
5806 if (! page_was_referenced
) {
5807 clear_refmod
|= VM_MEM_REFERENCED
;
5808 page
->reference
= FALSE
;
5810 if (! page_was_modified
) {
5811 clear_refmod
|= VM_MEM_MODIFIED
;
5812 page
->dirty
= FALSE
;
5815 pmap_clear_refmod(page
->phys_page
, clear_refmod
);
5817 page
->encrypted
= TRUE
;
5823 * Decrypt the given page.
5824 * The page might already be mapped at kernel virtual
5825 * address "kernel_mapping_offset". Otherwise, we need
5829 * The page's VM object is locked but will be unlocked and relocked.
5830 * The page is busy and not accessible by users (not entered in any pmap).
5835 vm_map_offset_t kernel_mapping_offset
)
5837 int clear_refmod
= 0;
5839 vm_map_size_t kernel_mapping_size
;
5840 vm_offset_t kernel_vaddr
;
5841 boolean_t page_was_referenced
;
5843 unsigned char aes_iv
[AES_BLOCK_SIZE
];
5845 memory_object_t pager_object
;
5846 vm_object_offset_t paging_offset
;
5851 assert(page
->encrypted
);
5854 * Gather the "reference" status of the page.
5855 * We'll restore its value after the decryption, so that
5856 * the decryption is transparent to the rest of the system
5857 * and doesn't impact the VM's LRU logic.
5859 page_was_referenced
=
5860 (page
->reference
|| pmap_is_referenced(page
->phys_page
));
5862 if (kernel_mapping_offset
== 0) {
5864 * The page hasn't already been mapped in kernel space
5865 * by the caller. Map it now, so that we can access
5866 * its contents and decrypt them.
5868 kernel_mapping_size
= PAGE_SIZE
;
5869 kr
= vm_paging_map_object(&kernel_mapping_offset
,
5873 &kernel_mapping_size
);
5874 if (kr
!= KERN_SUCCESS
) {
5875 panic("vm_page_decrypt: "
5876 "could not map page in kernel: 0x%x\n");
5879 kernel_mapping_size
= 0;
5881 kernel_vaddr
= CAST_DOWN(vm_offset_t
, kernel_mapping_offset
);
5883 assert(swap_crypt_ctx_initialized
);
5886 * Prepare an "initial vector" for the decryption.
5887 * It has to be the same as the "initial vector" we
5888 * used to encrypt that page.
5890 bzero(&decrypt_iv
.aes_iv
[0], sizeof (decrypt_iv
.aes_iv
));
5891 decrypt_iv
.vm
.pager_object
= page
->object
->pager
;
5892 decrypt_iv
.vm
.paging_offset
=
5893 page
->object
->paging_offset
+ page
->offset
;
5895 vm_object_unlock(page
->object
);
5897 /* encrypt the "initial vector" */
5898 aes_encrypt_cbc((const unsigned char *) &decrypt_iv
.aes_iv
[0],
5901 &decrypt_iv
.aes_iv
[0],
5902 &swap_crypt_ctx
.encrypt
);
5907 aes_decrypt_cbc((const unsigned char *) kernel_vaddr
,
5908 &decrypt_iv
.aes_iv
[0],
5909 PAGE_SIZE
/ AES_BLOCK_SIZE
,
5910 (unsigned char *) kernel_vaddr
,
5911 &swap_crypt_ctx
.decrypt
);
5912 vm_page_decrypt_counter
++;
5914 vm_object_lock(page
->object
);
5917 * Unmap the page from the kernel's address space,
5918 * if we had to map it ourselves. Otherwise, let
5919 * the caller undo the mapping if needed.
5921 if (kernel_mapping_size
!= 0) {
5922 vm_paging_unmap_object(page
->object
,
5924 kernel_vaddr
+ PAGE_SIZE
);
5928 * After decryption, the page is actually clean.
5929 * It was encrypted as part of paging, which "cleans"
5930 * the "dirty" pages.
5931 * Noone could access it after it was encrypted
5932 * and the decryption doesn't count.
5934 page
->dirty
= FALSE
;
5935 clear_refmod
= VM_MEM_MODIFIED
;
5937 /* restore the "reference" bit */
5938 if (! page_was_referenced
) {
5939 page
->reference
= FALSE
;
5940 clear_refmod
|= VM_MEM_REFERENCED
;
5942 pmap_clear_refmod(page
->phys_page
, clear_refmod
);
5944 page
->encrypted
= FALSE
;
5947 * We've just modified the page's contents via the data cache and part
5948 * of the new contents might still be in the cache and not yet in RAM.
5949 * Since the page is now available and might get gathered in a UPL to
5950 * be part of a DMA transfer from a driver that expects the memory to
5951 * be coherent at this point, we have to flush the data cache.
5953 pmap_sync_page_attributes_phys(page
->phys_page
);
5955 * Since the page is not mapped yet, some code might assume that it
5956 * doesn't need to invalidate the instruction cache when writing to
5957 * that page. That code relies on "no_isync" being set, so that the
5958 * caches get syncrhonized when the page is first mapped. So we need
5959 * to set "no_isync" here too, despite the fact that we just
5960 * synchronized the caches above...
5962 page
->no_isync
= TRUE
;
5965 unsigned long upl_encrypt_upls
= 0;
5966 unsigned long upl_encrypt_pages
= 0;
5972 * Encrypts all the pages in the UPL, within the specified range.
5978 upl_offset_t crypt_offset
,
5979 upl_size_t crypt_size
)
5981 upl_size_t upl_size
;
5982 upl_offset_t upl_offset
;
5983 vm_object_t upl_object
;
5985 vm_object_t shadow_object
;
5986 vm_object_offset_t shadow_offset
;
5987 vm_object_offset_t paging_offset
;
5988 vm_object_offset_t base_offset
;
5991 upl_encrypt_pages
+= crypt_size
/ PAGE_SIZE
;
5995 upl_object
= upl
->map_object
;
5996 upl_offset
= upl
->offset
;
5997 upl_size
= upl
->size
;
6001 vm_object_lock(upl_object
);
6004 * Find the VM object that contains the actual pages.
6006 if (upl_object
->pageout
) {
6007 shadow_object
= upl_object
->shadow
;
6009 * The offset in the shadow object is actually also
6010 * accounted for in upl->offset. It possibly shouldn't be
6011 * this way, but for now don't account for it twice.
6014 assert(upl_object
->paging_offset
== 0); /* XXX ? */
6015 vm_object_lock(shadow_object
);
6017 shadow_object
= upl_object
;
6021 paging_offset
= shadow_object
->paging_offset
;
6022 vm_object_paging_begin(shadow_object
);
6024 if (shadow_object
!= upl_object
) {
6025 vm_object_unlock(shadow_object
);
6027 vm_object_unlock(upl_object
);
6029 base_offset
= shadow_offset
;
6030 base_offset
+= upl_offset
;
6031 base_offset
+= crypt_offset
;
6032 base_offset
-= paging_offset
;
6034 * Unmap the pages, so that nobody can continue accessing them while
6035 * they're encrypted. After that point, all accesses to these pages
6036 * will cause a page fault and block while the page is being encrypted
6037 * (busy). After the encryption completes, any access will cause a
6038 * page fault and the page gets decrypted at that time.
6040 assert(crypt_offset
+ crypt_size
<= upl_size
);
6041 vm_object_pmap_protect(shadow_object
,
6043 (vm_object_size_t
)crypt_size
,
6048 /* XXX FBDP could the object have changed significantly here ? */
6049 vm_object_lock(shadow_object
);
6051 for (upl_offset
= 0;
6052 upl_offset
< crypt_size
;
6053 upl_offset
+= PAGE_SIZE
) {
6054 page
= vm_page_lookup(shadow_object
,
6055 base_offset
+ upl_offset
);
6056 if (page
== VM_PAGE_NULL
) {
6057 panic("upl_encrypt: "
6058 "no page for (obj=%p,off=%lld+%d)!\n",
6063 vm_page_encrypt(page
, 0);
6066 vm_object_paging_end(shadow_object
);
6067 vm_object_unlock(shadow_object
);
6071 upl_get_internal_pagelist_offset(void)
6073 return sizeof(struct upl
);
6082 upl
->flags
|= UPL_CLEAR_DIRTY
;
6084 upl
->flags
&= ~UPL_CLEAR_DIRTY
;
6091 boolean_t
upl_page_present(upl_page_info_t
*upl
, int index
)
6093 return(UPL_PAGE_PRESENT(upl
, index
));
6095 boolean_t
upl_dirty_page(upl_page_info_t
*upl
, int index
)
6097 return(UPL_DIRTY_PAGE(upl
, index
));
6099 boolean_t
upl_valid_page(upl_page_info_t
*upl
, int index
)
6101 return(UPL_VALID_PAGE(upl
, index
));
6103 ppnum_t
upl_phys_page(upl_page_info_t
*upl
, int index
)
6105 return(UPL_PHYS_PAGE(upl
, index
));
6109 vm_countdirtypages(void)
6121 vm_page_lock_queues();
6122 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
6124 if (m
==(vm_page_t
)0) break;
6126 if(m
->dirty
) dpages
++;
6127 if(m
->pageout
) pgopages
++;
6128 if(m
->precious
) precpages
++;
6130 assert(m
->object
!= kernel_object
);
6131 m
= (vm_page_t
) queue_next(&m
->pageq
);
6132 if (m
==(vm_page_t
)0) break;
6134 } while (!queue_end(&vm_page_queue_inactive
,(queue_entry_t
) m
));
6135 vm_page_unlock_queues();
6137 vm_page_lock_queues();
6138 m
= (vm_page_t
) queue_first(&vm_page_queue_zf
);
6140 if (m
==(vm_page_t
)0) break;
6142 if(m
->dirty
) dpages
++;
6143 if(m
->pageout
) pgopages
++;
6144 if(m
->precious
) precpages
++;
6146 assert(m
->object
!= kernel_object
);
6147 m
= (vm_page_t
) queue_next(&m
->pageq
);
6148 if (m
==(vm_page_t
)0) break;
6150 } while (!queue_end(&vm_page_queue_zf
,(queue_entry_t
) m
));
6151 vm_page_unlock_queues();
6153 printf("IN Q: %d : %d : %d\n", dpages
, pgopages
, precpages
);
6159 vm_page_lock_queues();
6160 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
6163 if(m
== (vm_page_t
)0) break;
6164 if(m
->dirty
) dpages
++;
6165 if(m
->pageout
) pgopages
++;
6166 if(m
->precious
) precpages
++;
6168 assert(m
->object
!= kernel_object
);
6169 m
= (vm_page_t
) queue_next(&m
->pageq
);
6170 if(m
== (vm_page_t
)0) break;
6172 } while (!queue_end(&vm_page_queue_active
,(queue_entry_t
) m
));
6173 vm_page_unlock_queues();
6175 printf("AC Q: %d : %d : %d\n", dpages
, pgopages
, precpages
);
6178 #endif /* MACH_BSD */
6180 ppnum_t
upl_get_highest_page(
6183 return upl
->highest_page
;
6187 kern_return_t
upl_ubc_alias_set(upl_t upl
, unsigned int alias1
, unsigned int alias2
)
6189 upl
->ubc_alias1
= alias1
;
6190 upl
->ubc_alias2
= alias2
;
6191 return KERN_SUCCESS
;
6193 int upl_ubc_alias_get(upl_t upl
, unsigned int * al
, unsigned int * al2
)
6196 *al
= upl
->ubc_alias1
;
6198 *al2
= upl
->ubc_alias2
;
6199 return KERN_SUCCESS
;
6201 #endif /* UPL_DEBUG */
6206 #include <ddb/db_output.h>
6207 #include <ddb/db_print.h>
6208 #include <vm/vm_print.h>
6210 #define printf kdbprintf
6211 void db_pageout(void);
6217 iprintf("VM Statistics:\n");
6219 iprintf("pages:\n");
6221 iprintf("activ %5d inact %5d free %5d",
6222 vm_page_active_count
, vm_page_inactive_count
,
6223 vm_page_free_count
);
6224 printf(" wire %5d gobbl %5d\n",
6225 vm_page_wire_count
, vm_page_gobble_count
);
6227 iprintf("target:\n");
6229 iprintf("min %5d inact %5d free %5d",
6230 vm_page_free_min
, vm_page_inactive_target
,
6231 vm_page_free_target
);
6232 printf(" resrv %5d\n", vm_page_free_reserved
);
6234 iprintf("pause:\n");
6240 extern int c_laundry_pages_freed
;
6241 #endif /* MACH_COUNTERS */
6246 iprintf("Pageout Statistics:\n");
6248 iprintf("active %5d inactv %5d\n",
6249 vm_pageout_active
, vm_pageout_inactive
);
6250 iprintf("nolock %5d avoid %5d busy %5d absent %5d\n",
6251 vm_pageout_inactive_nolock
, vm_pageout_inactive_avoid
,
6252 vm_pageout_inactive_busy
, vm_pageout_inactive_absent
);
6253 iprintf("used %5d clean %5d dirty %5d\n",
6254 vm_pageout_inactive_used
, vm_pageout_inactive_clean
,
6255 vm_pageout_inactive_dirty
);
6257 iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed
);
6258 #endif /* MACH_COUNTERS */
6259 #if MACH_CLUSTER_STATS
6260 iprintf("Cluster Statistics:\n");
6262 iprintf("dirtied %5d cleaned %5d collisions %5d\n",
6263 vm_pageout_cluster_dirtied
, vm_pageout_cluster_cleaned
,
6264 vm_pageout_cluster_collisions
);
6265 iprintf("clusters %5d conversions %5d\n",
6266 vm_pageout_cluster_clusters
, vm_pageout_cluster_conversions
);
6268 iprintf("Target Statistics:\n");
6270 iprintf("collisions %5d page_dirtied %5d page_freed %5d\n",
6271 vm_pageout_target_collisions
, vm_pageout_target_page_dirtied
,
6272 vm_pageout_target_page_freed
);
6274 #endif /* MACH_CLUSTER_STATS */
6278 #endif /* MACH_KDB */