2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
34 * Mach Operating System
35 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
36 * All Rights Reserved.
38 * Permission to use, copy, modify and distribute this software and its
39 * documentation is hereby granted, provided that both the copyright
40 * notice and this permission notice appear in all copies of the
41 * software, derivative works or modified versions, and any portions
42 * thereof, and that both notices appear in supporting documentation.
44 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
45 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
46 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
48 * Carnegie Mellon requests users of this software to return to
50 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
51 * School of Computer Science
52 * Carnegie Mellon University
53 * Pittsburgh PA 15213-3890
55 * any improvements or extensions that they make and grant Carnegie Mellon
56 * the rights to redistribute these changes.
62 * Author: Avadis Tevanian, Jr., Michael Wayne Young
64 * Page fault handling module.
67 #include <mach_cluster_stats.h>
68 #include <mach_pagemap.h>
71 #include <mach/mach_types.h>
72 #include <mach/kern_return.h>
73 #include <mach/message.h> /* for error codes */
74 #include <mach/vm_param.h>
75 #include <mach/vm_behavior.h>
76 #include <mach/memory_object.h>
77 /* For memory_object_data_{request,unlock} */
79 #include <kern/kern_types.h>
80 #include <kern/host_statistics.h>
81 #include <kern/counters.h>
82 #include <kern/task.h>
83 #include <kern/thread.h>
84 #include <kern/sched_prim.h>
85 #include <kern/host.h>
87 #include <kern/mach_param.h>
88 #include <kern/macro_help.h>
89 #include <kern/zalloc.h>
90 #include <kern/misc_protos.h>
92 #include <ppc/proc_reg.h>
94 #include <vm/vm_fault.h>
95 #include <vm/task_working_set.h>
96 #include <vm/vm_map.h>
97 #include <vm/vm_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_kern.h>
101 #include <vm/vm_pageout.h>
102 #include <vm/vm_protos.h>
104 #include <sys/kdebug.h>
106 #define VM_FAULT_CLASSIFY 0
107 #define VM_FAULT_STATIC_CONFIG 1
109 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
111 unsigned int vm_object_absent_max
= 50;
113 int vm_fault_debug
= 0;
115 #if !VM_FAULT_STATIC_CONFIG
116 boolean_t vm_fault_dirty_handling
= FALSE
;
117 boolean_t vm_fault_interruptible
= FALSE
;
118 boolean_t software_reference_bits
= TRUE
;
122 extern struct db_watchpoint
*db_watchpoint_list
;
123 #endif /* MACH_KDB */
126 /* Forward declarations of internal routines. */
127 extern kern_return_t
vm_fault_wire_fast(
130 vm_map_entry_t entry
,
132 vm_map_offset_t pmap_addr
);
134 extern void vm_fault_continue(void);
136 extern void vm_fault_copy_cleanup(
140 extern void vm_fault_copy_dst_cleanup(
143 #if VM_FAULT_CLASSIFY
144 extern void vm_fault_classify(vm_object_t object
,
145 vm_object_offset_t offset
,
146 vm_prot_t fault_type
);
148 extern void vm_fault_classify_init(void);
152 * Routine: vm_fault_init
154 * Initialize our private data structures.
162 * Routine: vm_fault_cleanup
164 * Clean up the result of vm_fault_page.
166 * The paging reference for "object" is released.
167 * "object" is unlocked.
168 * If "top_page" is not null, "top_page" is
169 * freed and the paging reference for the object
170 * containing it is released.
173 * "object" must be locked.
177 register vm_object_t object
,
178 register vm_page_t top_page
)
180 vm_object_paging_end(object
);
181 vm_object_unlock(object
);
183 if (top_page
!= VM_PAGE_NULL
) {
184 object
= top_page
->object
;
185 vm_object_lock(object
);
186 VM_PAGE_FREE(top_page
);
187 vm_object_paging_end(object
);
188 vm_object_unlock(object
);
192 #if MACH_CLUSTER_STATS
193 #define MAXCLUSTERPAGES 16
195 unsigned long pages_in_cluster
;
196 unsigned long pages_at_higher_offsets
;
197 unsigned long pages_at_lower_offsets
;
198 } cluster_stats_in
[MAXCLUSTERPAGES
];
199 #define CLUSTER_STAT(clause) clause
200 #define CLUSTER_STAT_HIGHER(x) \
201 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
202 #define CLUSTER_STAT_LOWER(x) \
203 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
204 #define CLUSTER_STAT_CLUSTER(x) \
205 ((cluster_stats_in[(x)].pages_in_cluster)++)
206 #else /* MACH_CLUSTER_STATS */
207 #define CLUSTER_STAT(clause)
208 #endif /* MACH_CLUSTER_STATS */
210 /* XXX - temporary */
211 boolean_t vm_allow_clustered_pagein
= FALSE
;
212 int vm_pagein_cluster_used
= 0;
214 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
217 boolean_t vm_page_deactivate_behind
= TRUE
;
219 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
221 int vm_default_ahead
= 0;
222 int vm_default_behind
= MAX_UPL_TRANSFER
;
225 * vm_page_deactivate_behind
227 * Determine if sequential access is in progress
228 * in accordance with the behavior specified. If
229 * so, compute a potential page to deactive and
232 * The object must be locked.
236 vm_fault_deactivate_behind(
238 vm_object_offset_t offset
,
239 vm_behavior_t behavior
)
244 dbgTrace(0xBEEF0018, (unsigned int) object
, (unsigned int) vm_fault_deactivate_behind
); /* (TEST/DEBUG) */
247 if (object
== kernel_object
) {
249 * Do not deactivate pages from the kernel object: they
250 * are not intended to become pageable.
256 case VM_BEHAVIOR_RANDOM
:
257 object
->sequential
= PAGE_SIZE_64
;
260 case VM_BEHAVIOR_SEQUENTIAL
:
262 object
->last_alloc
== offset
- PAGE_SIZE_64
) {
263 object
->sequential
+= PAGE_SIZE_64
;
264 m
= vm_page_lookup(object
, offset
- PAGE_SIZE_64
);
266 object
->sequential
= PAGE_SIZE_64
; /* reset */
270 case VM_BEHAVIOR_RSEQNTL
:
271 if (object
->last_alloc
&&
272 object
->last_alloc
== offset
+ PAGE_SIZE_64
) {
273 object
->sequential
+= PAGE_SIZE_64
;
274 m
= vm_page_lookup(object
, offset
+ PAGE_SIZE_64
);
276 object
->sequential
= PAGE_SIZE_64
; /* reset */
280 case VM_BEHAVIOR_DEFAULT
:
283 object
->last_alloc
== offset
- PAGE_SIZE_64
) {
284 vm_object_offset_t behind
= vm_default_behind
* PAGE_SIZE_64
;
286 object
->sequential
+= PAGE_SIZE_64
;
287 m
= (offset
>= behind
&&
288 object
->sequential
>= behind
) ?
289 vm_page_lookup(object
, offset
- behind
) :
291 } else if (object
->last_alloc
&&
292 object
->last_alloc
== offset
+ PAGE_SIZE_64
) {
293 vm_object_offset_t behind
= vm_default_behind
* PAGE_SIZE_64
;
295 object
->sequential
+= PAGE_SIZE_64
;
296 m
= (offset
< -behind
&&
297 object
->sequential
>= behind
) ?
298 vm_page_lookup(object
, offset
+ behind
) :
301 object
->sequential
= PAGE_SIZE_64
;
307 object
->last_alloc
= offset
;
311 vm_page_lock_queues();
312 vm_page_deactivate(m
);
313 vm_page_unlock_queues();
315 dbgTrace(0xBEEF0019, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
325 * Routine: vm_fault_page
327 * Find the resident page for the virtual memory
328 * specified by the given virtual memory object
330 * Additional arguments:
331 * The required permissions for the page is given
332 * in "fault_type". Desired permissions are included
333 * in "protection". The minimum and maximum valid offsets
334 * within the object for the relevant map entry are
335 * passed in "lo_offset" and "hi_offset" respectively and
336 * the expected page reference pattern is passed in "behavior".
337 * These three parameters are used to determine pagein cluster
340 * If the desired page is known to be resident (for
341 * example, because it was previously wired down), asserting
342 * the "unwiring" parameter will speed the search.
344 * If the operation can be interrupted (by thread_abort
345 * or thread_terminate), then the "interruptible"
346 * parameter should be asserted.
349 * The page containing the proper data is returned
353 * The source object must be locked and referenced,
354 * and must donate one paging reference. The reference
355 * is not affected. The paging reference and lock are
358 * If the call succeeds, the object in which "result_page"
359 * resides is left locked and holding a paging reference.
360 * If this is not the original object, a busy page in the
361 * original object is returned in "top_page", to prevent other
362 * callers from pursuing this same data, along with a paging
363 * reference for the original object. The "top_page" should
364 * be destroyed when this guarantee is no longer required.
365 * The "result_page" is also left busy. It is not removed
366 * from the pageout queues.
372 vm_object_t first_object
, /* Object to begin search */
373 vm_object_offset_t first_offset
, /* Offset into object */
374 vm_prot_t fault_type
, /* What access is requested */
375 boolean_t must_be_resident
,/* Must page be resident? */
376 int interruptible
, /* how may fault be interrupted? */
377 vm_map_offset_t lo_offset
, /* Map entry start */
378 vm_map_offset_t hi_offset
, /* Map entry end */
379 vm_behavior_t behavior
, /* Page reference behavior */
380 /* Modifies in place: */
381 vm_prot_t
*protection
, /* Protection for mapping */
383 vm_page_t
*result_page
, /* Page found, if successful */
384 vm_page_t
*top_page
, /* Page in top object, if
385 * not result_page. */
386 int *type_of_fault
, /* if non-null, fill in with type of fault
387 * COW, zero-fill, etc... returned in trace point */
388 /* More arguments: */
389 kern_return_t
*error_code
, /* code if page is in error */
390 boolean_t no_zero_fill
, /* don't zero fill absent pages */
391 boolean_t data_supply
, /* treat as data_supply if
392 * it is a write fault and a full
393 * page is provided */
395 __unused vm_map_offset_t vaddr
)
402 vm_object_offset_t offset
;
404 vm_object_t next_object
;
405 vm_object_t copy_object
;
406 boolean_t look_for_page
;
407 vm_prot_t access_required
= fault_type
;
408 vm_prot_t wants_copy_flag
;
409 vm_object_size_t length
;
410 vm_object_offset_t cluster_start
, cluster_end
;
411 CLUSTER_STAT(int pages_at_higher_offsets
;)
412 CLUSTER_STAT(int pages_at_lower_offsets
;)
413 kern_return_t wait_result
;
414 boolean_t interruptible_state
;
415 boolean_t bumped_pagein
= FALSE
;
420 * MACH page map - an optional optimization where a bit map is maintained
421 * by the VM subsystem for internal objects to indicate which pages of
422 * the object currently reside on backing store. This existence map
423 * duplicates information maintained by the vnode pager. It is
424 * created at the time of the first pageout against the object, i.e.
425 * at the same time pager for the object is created. The optimization
426 * is designed to eliminate pager interaction overhead, if it is
427 * 'known' that the page does not exist on backing store.
429 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
430 * either marked as paged out in the existence map for the object or no
431 * existence map exists for the object. LOOK_FOR() is one of the
432 * criteria in the decision to invoke the pager. It is also used as one
433 * of the criteria to terminate the scan for adjacent pages in a clustered
434 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
435 * permanent objects. Note also that if the pager for an internal object
436 * has not been created, the pager is not invoked regardless of the value
437 * of LOOK_FOR() and that clustered pagein scans are only done on an object
438 * for which a pager has been created.
440 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
441 * is marked as paged out in the existence map for the object. PAGED_OUT()
442 * PAGED_OUT() is used to determine if a page has already been pushed
443 * into a copy object in order to avoid a redundant page out operation.
445 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
446 != VM_EXTERNAL_STATE_ABSENT)
447 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
448 == VM_EXTERNAL_STATE_EXISTS)
449 #else /* MACH_PAGEMAP */
451 * If the MACH page map optimization is not enabled,
452 * LOOK_FOR() always evaluates to TRUE. The pager will always be
453 * invoked to resolve missing pages in an object, assuming the pager
454 * has been created for the object. In a clustered page operation, the
455 * absence of a page on backing backing store cannot be used to terminate
456 * a scan for adjacent pages since that information is available only in
457 * the pager. Hence pages that may not be paged out are potentially
458 * included in a clustered request. The vnode pager is coded to deal
459 * with any combination of absent/present pages in a clustered
460 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
461 * will always be invoked to push a dirty page into a copy object assuming
462 * a pager has been created. If the page has already been pushed, the
463 * pager will ingore the new request.
465 #define LOOK_FOR(o, f) TRUE
466 #define PAGED_OUT(o, f) FALSE
467 #endif /* MACH_PAGEMAP */
472 #define PREPARE_RELEASE_PAGE(m) \
474 vm_page_lock_queues(); \
477 #define DO_RELEASE_PAGE(m) \
479 PAGE_WAKEUP_DONE(m); \
480 if (!m->active && !m->inactive) \
481 vm_page_activate(m); \
482 vm_page_unlock_queues(); \
485 #define RELEASE_PAGE(m) \
487 PREPARE_RELEASE_PAGE(m); \
488 DO_RELEASE_PAGE(m); \
492 dbgTrace(0xBEEF0002, (unsigned int) first_object
, (unsigned int) first_offset
); /* (TEST/DEBUG) */
497 #if !VM_FAULT_STATIC_CONFIG
498 if (vm_fault_dirty_handling
501 * If there are watchpoints set, then
502 * we don't want to give away write permission
503 * on a read fault. Make the task write fault,
504 * so that the watchpoint code notices the access.
506 || db_watchpoint_list
507 #endif /* MACH_KDB */
510 * If we aren't asking for write permission,
511 * then don't give it away. We're using write
512 * faults to set the dirty bit.
514 if (!(fault_type
& VM_PROT_WRITE
))
515 *protection
&= ~VM_PROT_WRITE
;
518 if (!vm_fault_interruptible
)
519 interruptible
= THREAD_UNINT
;
520 #else /* STATIC_CONFIG */
523 * If there are watchpoints set, then
524 * we don't want to give away write permission
525 * on a read fault. Make the task write fault,
526 * so that the watchpoint code notices the access.
528 if (db_watchpoint_list
) {
530 * If we aren't asking for write permission,
531 * then don't give it away. We're using write
532 * faults to set the dirty bit.
534 if (!(fault_type
& VM_PROT_WRITE
))
535 *protection
&= ~VM_PROT_WRITE
;
538 #endif /* MACH_KDB */
539 #endif /* STATIC_CONFIG */
541 interruptible_state
= thread_interrupt_level(interruptible
);
544 * INVARIANTS (through entire routine):
546 * 1) At all times, we must either have the object
547 * lock or a busy page in some object to prevent
548 * some other thread from trying to bring in
551 * Note that we cannot hold any locks during the
552 * pager access or when waiting for memory, so
553 * we use a busy page then.
555 * Note also that we aren't as concerned about more than
556 * one thread attempting to memory_object_data_unlock
557 * the same page at once, so we don't hold the page
558 * as busy then, but do record the highest unlock
559 * value so far. [Unlock requests may also be delivered
562 * 2) To prevent another thread from racing us down the
563 * shadow chain and entering a new page in the top
564 * object before we do, we must keep a busy page in
565 * the top object while following the shadow chain.
567 * 3) We must increment paging_in_progress on any object
568 * for which we have a busy page
570 * 4) We leave busy pages on the pageout queues.
571 * If the pageout daemon comes across a busy page,
572 * it will remove the page from the pageout queues.
576 * Search for the page at object/offset.
579 object
= first_object
;
580 offset
= first_offset
;
581 first_m
= VM_PAGE_NULL
;
582 access_required
= fault_type
;
585 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
586 (integer_t
)object
, offset
, fault_type
, *protection
, 0);
589 * See whether this page is resident
594 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
596 if (!object
->alive
) {
597 vm_fault_cleanup(object
, first_m
);
598 thread_interrupt_level(interruptible_state
);
599 return(VM_FAULT_MEMORY_ERROR
);
601 m
= vm_page_lookup(object
, offset
);
603 dbgTrace(0xBEEF0004, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
605 if (m
!= VM_PAGE_NULL
) {
607 * If the page was pre-paged as part of a
608 * cluster, record the fact.
609 * If we were passed a valid pointer for
610 * "type_of_fault", than we came from
611 * vm_fault... we'll let it deal with
612 * this condition, since it
613 * needs to see m->clustered to correctly
614 * account the pageins.
616 if (type_of_fault
== NULL
&& m
->clustered
) {
617 vm_pagein_cluster_used
++;
618 m
->clustered
= FALSE
;
622 * If the page is being brought in,
623 * wait for it and then retry.
625 * A possible optimization: if the page
626 * is known to be resident, we can ignore
627 * pages that are absent (regardless of
628 * whether they're busy).
633 dbgTrace(0xBEEF0005, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
635 wait_result
= PAGE_SLEEP(object
, m
, interruptible
);
637 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
638 (integer_t
)object
, offset
,
640 counter(c_vm_fault_page_block_busy_kernel
++);
642 if (wait_result
!= THREAD_AWAKENED
) {
643 vm_fault_cleanup(object
, first_m
);
644 thread_interrupt_level(interruptible_state
);
645 if (wait_result
== THREAD_RESTART
)
647 return(VM_FAULT_RETRY
);
651 return(VM_FAULT_INTERRUPTED
);
660 * the user needs access to a page that we
661 * encrypted before paging it out.
662 * Decrypt the page now.
663 * Keep it busy to prevent anyone from
664 * accessing it during the decryption.
667 vm_page_decrypt(m
, 0);
668 assert(object
== m
->object
);
673 * Retry from the top, in case
674 * something changed while we were
679 ASSERT_PAGE_DECRYPTED(m
);
682 * If the page is in error, give up now.
687 dbgTrace(0xBEEF0006, (unsigned int) m
, (unsigned int) error_code
); /* (TEST/DEBUG) */
690 *error_code
= m
->page_error
;
692 vm_fault_cleanup(object
, first_m
);
693 thread_interrupt_level(interruptible_state
);
694 return(VM_FAULT_MEMORY_ERROR
);
698 * If the pager wants us to restart
699 * at the top of the chain,
700 * typically because it has moved the
701 * page to another pager, then do so.
706 dbgTrace(0xBEEF0007, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
709 vm_fault_cleanup(object
, first_m
);
710 thread_interrupt_level(interruptible_state
);
711 return(VM_FAULT_RETRY
);
715 * If the page isn't busy, but is absent,
716 * then it was deemed "unavailable".
721 * Remove the non-existent page (unless it's
722 * in the top object) and move on down to the
723 * next object (if there is one).
726 dbgTrace(0xBEEF0008, (unsigned int) m
, (unsigned int) object
->shadow
); /* (TEST/DEBUG) */
729 next_object
= object
->shadow
;
730 if (next_object
== VM_OBJECT_NULL
) {
733 assert(!must_be_resident
);
735 if (object
->shadow_severed
) {
738 thread_interrupt_level(interruptible_state
);
739 return VM_FAULT_MEMORY_ERROR
;
743 * Absent page at bottom of shadow
744 * chain; zero fill the page we left
745 * busy in the first object, and flush
746 * the absent page. But first we
747 * need to allocate a real page.
749 if (VM_PAGE_THROTTLED() ||
750 (real_m
= vm_page_grab())
754 thread_interrupt_level(
755 interruptible_state
);
757 VM_FAULT_MEMORY_SHORTAGE
);
761 * are we protecting the system from
762 * backing store exhaustion. If so
763 * sleep unless we are privileged.
766 if(vm_backing_store_low
) {
767 if(!(current_task()->priv_flags
768 & VM_BACKING_STORE_PRIV
)) {
769 assert_wait((event_t
)
770 &vm_backing_store_low
,
772 vm_fault_cleanup(object
,
774 thread_block(THREAD_CONTINUE_NULL
);
775 thread_interrupt_level(
776 interruptible_state
);
777 return(VM_FAULT_RETRY
);
783 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
784 (integer_t
)object
, offset
,
786 (integer_t
)first_object
, 0);
787 if (object
!= first_object
) {
789 vm_object_paging_end(object
);
790 vm_object_unlock(object
);
791 object
= first_object
;
792 offset
= first_offset
;
794 first_m
= VM_PAGE_NULL
;
795 vm_object_lock(object
);
799 assert(real_m
->busy
);
800 vm_page_insert(real_m
, object
, offset
);
804 * Drop the lock while zero filling
805 * page. Then break because this
806 * is the page we wanted. Checking
807 * the page lock is a waste of time;
808 * this page was either absent or
809 * newly allocated -- in both cases
810 * it can't be page locked by a pager.
815 vm_object_unlock(object
);
816 vm_page_zero_fill(m
);
817 vm_object_lock(object
);
820 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
821 VM_STAT(zero_fill_count
++);
823 if (bumped_pagein
== TRUE
) {
825 current_task()->pageins
--;
827 vm_page_lock_queues();
828 VM_PAGE_QUEUES_REMOVE(m
);
829 m
->page_ticket
= vm_page_ticket
;
831 assert(m
->object
!= kernel_object
);
832 assert(m
->pageq
.next
== NULL
&&
833 m
->pageq
.prev
== NULL
);
834 if(m
->object
->size
> 0x200000) {
836 /* depends on the queues lock */
838 queue_enter(&vm_page_queue_zf
,
839 m
, vm_page_t
, pageq
);
842 &vm_page_queue_inactive
,
843 m
, vm_page_t
, pageq
);
845 vm_page_ticket_roll
++;
846 if(vm_page_ticket_roll
==
847 VM_PAGE_TICKETS_IN_ROLL
) {
848 vm_page_ticket_roll
= 0;
850 VM_PAGE_TICKET_ROLL_IDS
)
856 vm_page_inactive_count
++;
857 vm_page_unlock_queues();
860 if (must_be_resident
) {
861 vm_object_paging_end(object
);
862 } else if (object
!= first_object
) {
863 vm_object_paging_end(object
);
869 vm_object_absent_release(object
);
872 vm_page_lock_queues();
873 VM_PAGE_QUEUES_REMOVE(m
);
874 vm_page_unlock_queues();
877 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
878 (integer_t
)object
, offset
,
879 (integer_t
)next_object
,
880 offset
+object
->shadow_offset
,0);
881 offset
+= object
->shadow_offset
;
882 hi_offset
+= object
->shadow_offset
;
883 lo_offset
+= object
->shadow_offset
;
884 access_required
= VM_PROT_READ
;
885 vm_object_lock(next_object
);
886 vm_object_unlock(object
);
887 object
= next_object
;
888 vm_object_paging_begin(object
);
894 && ((object
!= first_object
) ||
895 (object
->copy
!= VM_OBJECT_NULL
))
896 && (fault_type
& VM_PROT_WRITE
)) {
898 * This is a copy-on-write fault that will
899 * cause us to revoke access to this page, but
900 * this page is in the process of being cleaned
901 * in a clustered pageout. We must wait until
902 * the cleaning operation completes before
903 * revoking access to the original page,
904 * otherwise we might attempt to remove a
908 dbgTrace(0xBEEF0009, (unsigned int) m
, (unsigned int) offset
); /* (TEST/DEBUG) */
911 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
912 (integer_t
)object
, offset
,
914 /* take an extra ref so that object won't die */
915 assert(object
->ref_count
> 0);
917 vm_object_res_reference(object
);
918 vm_fault_cleanup(object
, first_m
);
919 counter(c_vm_fault_page_block_backoff_kernel
++);
920 vm_object_lock(object
);
921 assert(object
->ref_count
> 0);
922 m
= vm_page_lookup(object
, offset
);
923 if (m
!= VM_PAGE_NULL
&& m
->cleaning
) {
924 PAGE_ASSERT_WAIT(m
, interruptible
);
925 vm_object_unlock(object
);
926 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
927 vm_object_deallocate(object
);
930 vm_object_unlock(object
);
931 vm_object_deallocate(object
);
932 thread_interrupt_level(interruptible_state
);
933 return VM_FAULT_RETRY
;
938 * If the desired access to this page has
939 * been locked out, request that it be unlocked.
942 if (access_required
& m
->page_lock
) {
943 if ((access_required
& m
->unlock_request
) != access_required
) {
944 vm_prot_t new_unlock_request
;
948 dbgTrace(0xBEEF000A, (unsigned int) m
, (unsigned int) object
->pager_ready
); /* (TEST/DEBUG) */
950 if (!object
->pager_ready
) {
952 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
954 (integer_t
)object
, offset
,
956 /* take an extra ref */
957 assert(object
->ref_count
> 0);
959 vm_object_res_reference(object
);
960 vm_fault_cleanup(object
,
962 counter(c_vm_fault_page_block_backoff_kernel
++);
963 vm_object_lock(object
);
964 assert(object
->ref_count
> 0);
965 if (!object
->pager_ready
) {
966 wait_result
= vm_object_assert_wait(
968 VM_OBJECT_EVENT_PAGER_READY
,
970 vm_object_unlock(object
);
971 if (wait_result
== THREAD_WAITING
)
972 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
973 vm_object_deallocate(object
);
976 vm_object_unlock(object
);
977 vm_object_deallocate(object
);
978 thread_interrupt_level(interruptible_state
);
979 return VM_FAULT_RETRY
;
983 new_unlock_request
= m
->unlock_request
=
984 (access_required
| m
->unlock_request
);
985 vm_object_unlock(object
);
987 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
988 (integer_t
)object
, offset
,
989 (integer_t
)m
, new_unlock_request
, 0);
990 if ((rc
= memory_object_data_unlock(
992 offset
+ object
->paging_offset
,
997 printf("vm_fault: memory_object_data_unlock failed\n");
998 vm_object_lock(object
);
999 vm_fault_cleanup(object
, first_m
);
1000 thread_interrupt_level(interruptible_state
);
1001 return((rc
== MACH_SEND_INTERRUPTED
) ?
1002 VM_FAULT_INTERRUPTED
:
1003 VM_FAULT_MEMORY_ERROR
);
1005 vm_object_lock(object
);
1010 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
1011 access_required
, (integer_t
)object
,
1012 offset
, (integer_t
)m
, 0);
1013 /* take an extra ref so object won't die */
1014 assert(object
->ref_count
> 0);
1015 object
->ref_count
++;
1016 vm_object_res_reference(object
);
1017 vm_fault_cleanup(object
, first_m
);
1018 counter(c_vm_fault_page_block_backoff_kernel
++);
1019 vm_object_lock(object
);
1020 assert(object
->ref_count
> 0);
1021 m
= vm_page_lookup(object
, offset
);
1022 if (m
!= VM_PAGE_NULL
&&
1023 (access_required
& m
->page_lock
) &&
1024 !((access_required
& m
->unlock_request
) != access_required
)) {
1025 PAGE_ASSERT_WAIT(m
, interruptible
);
1026 vm_object_unlock(object
);
1027 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1028 vm_object_deallocate(object
);
1031 vm_object_unlock(object
);
1032 vm_object_deallocate(object
);
1033 thread_interrupt_level(interruptible_state
);
1034 return VM_FAULT_RETRY
;
1038 * We mark the page busy and leave it on
1039 * the pageout queues. If the pageout
1040 * deamon comes across it, then it will
1045 dbgTrace(0xBEEF000B, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1048 #if !VM_FAULT_STATIC_CONFIG
1049 if (!software_reference_bits
) {
1050 vm_page_lock_queues();
1052 vm_stat
.reactivations
++;
1054 VM_PAGE_QUEUES_REMOVE(m
);
1055 vm_page_unlock_queues();
1059 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
1060 (integer_t
)object
, offset
, (integer_t
)m
, 0, 0);
1068 (object
->pager_created
) &&
1069 LOOK_FOR(object
, offset
) &&
1073 dbgTrace(0xBEEF000C, (unsigned int) look_for_page
, (unsigned int) object
); /* (TEST/DEBUG) */
1075 if ((look_for_page
|| (object
== first_object
))
1076 && !must_be_resident
1077 && !(object
->phys_contiguous
)) {
1079 * Allocate a new page for this object/offset
1083 m
= vm_page_grab_fictitious();
1085 dbgTrace(0xBEEF000D, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
1087 if (m
== VM_PAGE_NULL
) {
1088 vm_fault_cleanup(object
, first_m
);
1089 thread_interrupt_level(interruptible_state
);
1090 return(VM_FAULT_FICTITIOUS_SHORTAGE
);
1092 vm_page_insert(m
, object
, offset
);
1095 if ((look_for_page
&& !must_be_resident
)) {
1099 * If the memory manager is not ready, we
1100 * cannot make requests.
1102 if (!object
->pager_ready
) {
1104 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
1106 if(m
!= VM_PAGE_NULL
)
1109 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
1110 (integer_t
)object
, offset
, 0, 0, 0);
1111 /* take an extra ref so object won't die */
1112 assert(object
->ref_count
> 0);
1113 object
->ref_count
++;
1114 vm_object_res_reference(object
);
1115 vm_fault_cleanup(object
, first_m
);
1116 counter(c_vm_fault_page_block_backoff_kernel
++);
1117 vm_object_lock(object
);
1118 assert(object
->ref_count
> 0);
1119 if (!object
->pager_ready
) {
1120 wait_result
= vm_object_assert_wait(object
,
1121 VM_OBJECT_EVENT_PAGER_READY
,
1123 vm_object_unlock(object
);
1124 if (wait_result
== THREAD_WAITING
)
1125 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1126 vm_object_deallocate(object
);
1129 vm_object_unlock(object
);
1130 vm_object_deallocate(object
);
1131 thread_interrupt_level(interruptible_state
);
1132 return VM_FAULT_RETRY
;
1136 if(object
->phys_contiguous
) {
1137 if(m
!= VM_PAGE_NULL
) {
1143 if (object
->internal
) {
1145 * Requests to the default pager
1146 * must reserve a real page in advance,
1147 * because the pager's data-provided
1148 * won't block for pages. IMPORTANT:
1149 * this acts as a throttling mechanism
1150 * for data_requests to the default
1155 dbgTrace(0xBEEF000F, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1157 if (m
->fictitious
&& !vm_page_convert(m
)) {
1159 vm_fault_cleanup(object
, first_m
);
1160 thread_interrupt_level(interruptible_state
);
1161 return(VM_FAULT_MEMORY_SHORTAGE
);
1163 } else if (object
->absent_count
>
1164 vm_object_absent_max
) {
1166 * If there are too many outstanding page
1167 * requests pending on this object, we
1168 * wait for them to be resolved now.
1172 dbgTrace(0xBEEF0010, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1174 if(m
!= VM_PAGE_NULL
)
1176 /* take an extra ref so object won't die */
1177 assert(object
->ref_count
> 0);
1178 object
->ref_count
++;
1179 vm_object_res_reference(object
);
1180 vm_fault_cleanup(object
, first_m
);
1181 counter(c_vm_fault_page_block_backoff_kernel
++);
1182 vm_object_lock(object
);
1183 assert(object
->ref_count
> 0);
1184 if (object
->absent_count
> vm_object_absent_max
) {
1185 vm_object_absent_assert_wait(object
,
1187 vm_object_unlock(object
);
1188 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1189 vm_object_deallocate(object
);
1192 vm_object_unlock(object
);
1193 vm_object_deallocate(object
);
1194 thread_interrupt_level(interruptible_state
);
1195 return VM_FAULT_RETRY
;
1200 * Indicate that the page is waiting for data
1201 * from the memory manager.
1204 if(m
!= VM_PAGE_NULL
) {
1206 m
->list_req_pending
= TRUE
;
1209 object
->absent_count
++;
1214 cluster_start
= offset
;
1218 * lengthen the cluster by the pages in the working set
1221 (current_task()->dynamic_working_set
!= 0)) {
1222 cluster_end
= cluster_start
+ length
;
1223 /* tws values for start and end are just a
1224 * suggestions. Therefore, as long as
1225 * build_cluster does not use pointers or
1226 * take action based on values that
1227 * could be affected by re-entrance we
1228 * do not need to take the map lock.
1230 cluster_end
= offset
+ PAGE_SIZE_64
;
1232 current_task()->dynamic_working_set
,
1233 object
, &cluster_start
,
1234 &cluster_end
, 0x40000);
1235 length
= cluster_end
- cluster_start
;
1238 dbgTrace(0xBEEF0012, (unsigned int) object
, (unsigned int) 0); /* (TEST/DEBUG) */
1241 * We have a busy page, so we can
1242 * release the object lock.
1244 vm_object_unlock(object
);
1247 * Call the memory manager to retrieve the data.
1251 *type_of_fault
= ((int)length
<< 8) | DBG_PAGEIN_FAULT
;
1253 current_task()->pageins
++;
1254 bumped_pagein
= TRUE
;
1257 * If this object uses a copy_call strategy,
1258 * and we are interested in a copy of this object
1259 * (having gotten here only by following a
1260 * shadow chain), then tell the memory manager
1261 * via a flag added to the desired_access
1262 * parameter, so that it can detect a race
1263 * between our walking down the shadow chain
1264 * and its pushing pages up into a copy of
1265 * the object that it manages.
1268 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_CALL
&&
1269 object
!= first_object
) {
1270 wants_copy_flag
= VM_PROT_WANTS_COPY
;
1272 wants_copy_flag
= VM_PROT_NONE
;
1276 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1277 (integer_t
)object
, offset
, (integer_t
)m
,
1278 access_required
| wants_copy_flag
, 0);
1280 rc
= memory_object_data_request(object
->pager
,
1281 cluster_start
+ object
->paging_offset
,
1283 access_required
| wants_copy_flag
);
1287 dbgTrace(0xBEEF0013, (unsigned int) object
, (unsigned int) rc
); /* (TEST/DEBUG) */
1289 if (rc
!= KERN_SUCCESS
) {
1290 if (rc
!= MACH_SEND_INTERRUPTED
1292 printf("%s(0x%x, 0x%xll, 0x%xll, 0x%x) failed, rc=%d\n",
1293 "memory_object_data_request",
1295 cluster_start
+ object
->paging_offset
,
1296 length
, access_required
, rc
);
1298 * Don't want to leave a busy page around,
1299 * but the data request may have blocked,
1300 * so check if it's still there and busy.
1302 if(!object
->phys_contiguous
) {
1303 vm_object_lock(object
);
1304 for (; length
; length
-= PAGE_SIZE
,
1305 cluster_start
+= PAGE_SIZE_64
) {
1307 if ((p
= vm_page_lookup(object
,
1309 && p
->absent
&& p
->busy
1315 vm_fault_cleanup(object
, first_m
);
1316 thread_interrupt_level(interruptible_state
);
1317 return((rc
== MACH_SEND_INTERRUPTED
) ?
1318 VM_FAULT_INTERRUPTED
:
1319 VM_FAULT_MEMORY_ERROR
);
1322 vm_object_lock(object
);
1323 if ((interruptible
!= THREAD_UNINT
) &&
1324 (current_thread()->state
& TH_ABORT
)) {
1325 vm_fault_cleanup(object
, first_m
);
1326 thread_interrupt_level(interruptible_state
);
1327 return(VM_FAULT_INTERRUPTED
);
1329 if (m
== VM_PAGE_NULL
&&
1330 object
->phys_contiguous
) {
1332 * No page here means that the object we
1333 * initially looked up was "physically
1334 * contiguous" (i.e. device memory). However,
1335 * with Virtual VRAM, the object might not
1336 * be backed by that device memory anymore,
1337 * so we're done here only if the object is
1338 * still "phys_contiguous".
1339 * Otherwise, if the object is no longer
1340 * "phys_contiguous", we need to retry the
1341 * page fault against the object's new backing
1342 * store (different memory object).
1348 * Retry with same object/offset, since new data may
1349 * be in a different page (i.e., m is meaningless at
1356 * The only case in which we get here is if
1357 * object has no pager (or unwiring). If the pager doesn't
1358 * have the page this is handled in the m->absent case above
1359 * (and if you change things here you should look above).
1362 dbgTrace(0xBEEF0014, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1364 if (object
== first_object
)
1367 assert(m
== VM_PAGE_NULL
);
1370 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1371 (integer_t
)object
, offset
, (integer_t
)m
,
1372 (integer_t
)object
->shadow
, 0);
1374 * Move on to the next object. Lock the next
1375 * object before unlocking the current one.
1377 next_object
= object
->shadow
;
1378 if (next_object
== VM_OBJECT_NULL
) {
1379 assert(!must_be_resident
);
1381 * If there's no object left, fill the page
1382 * in the top object with zeros. But first we
1383 * need to allocate a real page.
1386 if (object
!= first_object
) {
1387 vm_object_paging_end(object
);
1388 vm_object_unlock(object
);
1390 object
= first_object
;
1391 offset
= first_offset
;
1392 vm_object_lock(object
);
1396 assert(m
->object
== object
);
1397 first_m
= VM_PAGE_NULL
;
1399 if(m
== VM_PAGE_NULL
) {
1401 if (m
== VM_PAGE_NULL
) {
1403 object
, VM_PAGE_NULL
);
1404 thread_interrupt_level(
1405 interruptible_state
);
1406 return(VM_FAULT_MEMORY_SHORTAGE
);
1412 if (object
->shadow_severed
) {
1414 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1415 thread_interrupt_level(interruptible_state
);
1416 return VM_FAULT_MEMORY_ERROR
;
1420 * are we protecting the system from
1421 * backing store exhaustion. If so
1422 * sleep unless we are privileged.
1425 if(vm_backing_store_low
) {
1426 if(!(current_task()->priv_flags
1427 & VM_BACKING_STORE_PRIV
)) {
1428 assert_wait((event_t
)
1429 &vm_backing_store_low
,
1432 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1433 thread_block(THREAD_CONTINUE_NULL
);
1434 thread_interrupt_level(
1435 interruptible_state
);
1436 return(VM_FAULT_RETRY
);
1440 if (VM_PAGE_THROTTLED() ||
1441 (m
->fictitious
&& !vm_page_convert(m
))) {
1443 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1444 thread_interrupt_level(interruptible_state
);
1445 return(VM_FAULT_MEMORY_SHORTAGE
);
1447 m
->no_isync
= FALSE
;
1449 if (!no_zero_fill
) {
1450 vm_object_unlock(object
);
1451 vm_page_zero_fill(m
);
1452 vm_object_lock(object
);
1455 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
1456 VM_STAT(zero_fill_count
++);
1458 if (bumped_pagein
== TRUE
) {
1460 current_task()->pageins
--;
1462 vm_page_lock_queues();
1463 VM_PAGE_QUEUES_REMOVE(m
);
1464 assert(!m
->laundry
);
1465 assert(m
->object
!= kernel_object
);
1466 assert(m
->pageq
.next
== NULL
&&
1467 m
->pageq
.prev
== NULL
);
1468 if(m
->object
->size
> 0x200000) {
1469 m
->zero_fill
= TRUE
;
1470 /* depends on the queues lock */
1472 queue_enter(&vm_page_queue_zf
,
1473 m
, vm_page_t
, pageq
);
1476 &vm_page_queue_inactive
,
1477 m
, vm_page_t
, pageq
);
1479 m
->page_ticket
= vm_page_ticket
;
1480 vm_page_ticket_roll
++;
1481 if(vm_page_ticket_roll
== VM_PAGE_TICKETS_IN_ROLL
) {
1482 vm_page_ticket_roll
= 0;
1483 if(vm_page_ticket
==
1484 VM_PAGE_TICKET_ROLL_IDS
)
1490 vm_page_inactive_count
++;
1491 vm_page_unlock_queues();
1493 pmap_clear_modify(m
->phys_page
);
1498 if ((object
!= first_object
) || must_be_resident
)
1499 vm_object_paging_end(object
);
1500 offset
+= object
->shadow_offset
;
1501 hi_offset
+= object
->shadow_offset
;
1502 lo_offset
+= object
->shadow_offset
;
1503 access_required
= VM_PROT_READ
;
1504 vm_object_lock(next_object
);
1505 vm_object_unlock(object
);
1506 object
= next_object
;
1507 vm_object_paging_begin(object
);
1512 * PAGE HAS BEEN FOUND.
1515 * busy, so that we can play with it;
1516 * not absent, so that nobody else will fill it;
1517 * possibly eligible for pageout;
1519 * The top-level page (first_m) is:
1520 * VM_PAGE_NULL if the page was found in the
1522 * busy, not absent, and ineligible for pageout.
1524 * The current object (object) is locked. A paging
1525 * reference is held for the current and top-level
1530 dbgTrace(0xBEEF0015, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1532 #if EXTRA_ASSERTIONS
1533 if(m
!= VM_PAGE_NULL
) {
1534 assert(m
->busy
&& !m
->absent
);
1535 assert((first_m
== VM_PAGE_NULL
) ||
1536 (first_m
->busy
&& !first_m
->absent
&&
1537 !first_m
->active
&& !first_m
->inactive
));
1539 #endif /* EXTRA_ASSERTIONS */
1543 * If we found a page, we must have decrypted it before we
1546 if (m
!= VM_PAGE_NULL
) {
1547 ASSERT_PAGE_DECRYPTED(m
);
1551 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1552 (integer_t
)object
, offset
, (integer_t
)m
,
1553 (integer_t
)first_object
, (integer_t
)first_m
);
1555 * If the page is being written, but isn't
1556 * already owned by the top-level object,
1557 * we have to copy it into a new page owned
1558 * by the top-level object.
1561 if ((object
!= first_object
) && (m
!= VM_PAGE_NULL
)) {
1563 * We only really need to copy if we
1568 dbgTrace(0xBEEF0016, (unsigned int) object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1570 if (fault_type
& VM_PROT_WRITE
) {
1573 assert(!must_be_resident
);
1576 * are we protecting the system from
1577 * backing store exhaustion. If so
1578 * sleep unless we are privileged.
1581 if(vm_backing_store_low
) {
1582 if(!(current_task()->priv_flags
1583 & VM_BACKING_STORE_PRIV
)) {
1584 assert_wait((event_t
)
1585 &vm_backing_store_low
,
1588 vm_fault_cleanup(object
, first_m
);
1589 thread_block(THREAD_CONTINUE_NULL
);
1590 thread_interrupt_level(
1591 interruptible_state
);
1592 return(VM_FAULT_RETRY
);
1597 * If we try to collapse first_object at this
1598 * point, we may deadlock when we try to get
1599 * the lock on an intermediate object (since we
1600 * have the bottom object locked). We can't
1601 * unlock the bottom object, because the page
1602 * we found may move (by collapse) if we do.
1604 * Instead, we first copy the page. Then, when
1605 * we have no more use for the bottom object,
1606 * we unlock it and try to collapse.
1608 * Note that we copy the page even if we didn't
1609 * need to... that's the breaks.
1613 * Allocate a page for the copy
1615 copy_m
= vm_page_grab();
1616 if (copy_m
== VM_PAGE_NULL
) {
1618 vm_fault_cleanup(object
, first_m
);
1619 thread_interrupt_level(interruptible_state
);
1620 return(VM_FAULT_MEMORY_SHORTAGE
);
1625 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1626 (integer_t
)object
, offset
,
1627 (integer_t
)m
, (integer_t
)copy_m
, 0);
1628 vm_page_copy(m
, copy_m
);
1631 * If another map is truly sharing this
1632 * page with us, we have to flush all
1633 * uses of the original page, since we
1634 * can't distinguish those which want the
1635 * original from those which need the
1638 * XXXO If we know that only one map has
1639 * access to this page, then we could
1640 * avoid the pmap_disconnect() call.
1643 vm_page_lock_queues();
1644 assert(!m
->cleaning
);
1645 pmap_disconnect(m
->phys_page
);
1646 vm_page_deactivate(m
);
1647 copy_m
->dirty
= TRUE
;
1649 * Setting reference here prevents this fault from
1650 * being counted as a (per-thread) reactivate as well
1651 * as a copy-on-write.
1653 first_m
->reference
= TRUE
;
1654 vm_page_unlock_queues();
1657 * We no longer need the old page or object.
1660 PAGE_WAKEUP_DONE(m
);
1661 vm_object_paging_end(object
);
1662 vm_object_unlock(object
);
1665 *type_of_fault
= DBG_COW_FAULT
;
1666 VM_STAT(cow_faults
++);
1667 current_task()->cow_faults
++;
1668 object
= first_object
;
1669 offset
= first_offset
;
1671 vm_object_lock(object
);
1672 VM_PAGE_FREE(first_m
);
1673 first_m
= VM_PAGE_NULL
;
1674 assert(copy_m
->busy
);
1675 vm_page_insert(copy_m
, object
, offset
);
1679 * Now that we've gotten the copy out of the
1680 * way, let's try to collapse the top object.
1681 * But we have to play ugly games with
1682 * paging_in_progress to do that...
1685 vm_object_paging_end(object
);
1686 vm_object_collapse(object
, offset
);
1687 vm_object_paging_begin(object
);
1691 *protection
&= (~VM_PROT_WRITE
);
1696 * Now check whether the page needs to be pushed into the
1697 * copy object. The use of asymmetric copy on write for
1698 * shared temporary objects means that we may do two copies to
1699 * satisfy the fault; one above to get the page from a
1700 * shadowed object, and one here to push it into the copy.
1703 while ((copy_object
= first_object
->copy
) != VM_OBJECT_NULL
&&
1704 (m
!= VM_PAGE_NULL
)) {
1705 vm_object_offset_t copy_offset
;
1709 dbgTrace(0xBEEF0017, (unsigned int) copy_object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1712 * If the page is being written, but hasn't been
1713 * copied to the copy-object, we have to copy it there.
1716 if ((fault_type
& VM_PROT_WRITE
) == 0) {
1717 *protection
&= ~VM_PROT_WRITE
;
1722 * If the page was guaranteed to be resident,
1723 * we must have already performed the copy.
1726 if (must_be_resident
)
1730 * Try to get the lock on the copy_object.
1732 if (!vm_object_lock_try(copy_object
)) {
1733 vm_object_unlock(object
);
1735 mutex_pause(); /* wait a bit */
1737 vm_object_lock(object
);
1742 * Make another reference to the copy-object,
1743 * to keep it from disappearing during the
1746 assert(copy_object
->ref_count
> 0);
1747 copy_object
->ref_count
++;
1748 VM_OBJ_RES_INCR(copy_object
);
1751 * Does the page exist in the copy?
1753 copy_offset
= first_offset
- copy_object
->shadow_offset
;
1754 if (copy_object
->size
<= copy_offset
)
1756 * Copy object doesn't cover this page -- do nothing.
1760 vm_page_lookup(copy_object
, copy_offset
)) != VM_PAGE_NULL
) {
1761 /* Page currently exists in the copy object */
1764 * If the page is being brought
1765 * in, wait for it and then retry.
1768 /* take an extra ref so object won't die */
1769 assert(copy_object
->ref_count
> 0);
1770 copy_object
->ref_count
++;
1771 vm_object_res_reference(copy_object
);
1772 vm_object_unlock(copy_object
);
1773 vm_fault_cleanup(object
, first_m
);
1774 counter(c_vm_fault_page_block_backoff_kernel
++);
1775 vm_object_lock(copy_object
);
1776 assert(copy_object
->ref_count
> 0);
1777 VM_OBJ_RES_DECR(copy_object
);
1778 copy_object
->ref_count
--;
1779 assert(copy_object
->ref_count
> 0);
1780 copy_m
= vm_page_lookup(copy_object
, copy_offset
);
1783 * it's OK if the "copy_m" page is encrypted,
1784 * because we're not moving it nor handling its
1787 if (copy_m
!= VM_PAGE_NULL
&& copy_m
->busy
) {
1788 PAGE_ASSERT_WAIT(copy_m
, interruptible
);
1789 vm_object_unlock(copy_object
);
1790 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1791 vm_object_deallocate(copy_object
);
1794 vm_object_unlock(copy_object
);
1795 vm_object_deallocate(copy_object
);
1796 thread_interrupt_level(interruptible_state
);
1797 return VM_FAULT_RETRY
;
1801 else if (!PAGED_OUT(copy_object
, copy_offset
)) {
1803 * If PAGED_OUT is TRUE, then the page used to exist
1804 * in the copy-object, and has already been paged out.
1805 * We don't need to repeat this. If PAGED_OUT is
1806 * FALSE, then either we don't know (!pager_created,
1807 * for example) or it hasn't been paged out.
1808 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1809 * We must copy the page to the copy object.
1813 * are we protecting the system from
1814 * backing store exhaustion. If so
1815 * sleep unless we are privileged.
1818 if(vm_backing_store_low
) {
1819 if(!(current_task()->priv_flags
1820 & VM_BACKING_STORE_PRIV
)) {
1821 assert_wait((event_t
)
1822 &vm_backing_store_low
,
1825 VM_OBJ_RES_DECR(copy_object
);
1826 copy_object
->ref_count
--;
1827 assert(copy_object
->ref_count
> 0);
1828 vm_object_unlock(copy_object
);
1829 vm_fault_cleanup(object
, first_m
);
1830 thread_block(THREAD_CONTINUE_NULL
);
1831 thread_interrupt_level(
1832 interruptible_state
);
1833 return(VM_FAULT_RETRY
);
1838 * Allocate a page for the copy
1840 copy_m
= vm_page_alloc(copy_object
, copy_offset
);
1841 if (copy_m
== VM_PAGE_NULL
) {
1843 VM_OBJ_RES_DECR(copy_object
);
1844 copy_object
->ref_count
--;
1845 assert(copy_object
->ref_count
> 0);
1846 vm_object_unlock(copy_object
);
1847 vm_fault_cleanup(object
, first_m
);
1848 thread_interrupt_level(interruptible_state
);
1849 return(VM_FAULT_MEMORY_SHORTAGE
);
1853 * Must copy page into copy-object.
1856 vm_page_copy(m
, copy_m
);
1859 * If the old page was in use by any users
1860 * of the copy-object, it must be removed
1861 * from all pmaps. (We can't know which
1865 vm_page_lock_queues();
1866 assert(!m
->cleaning
);
1867 pmap_disconnect(m
->phys_page
);
1868 copy_m
->dirty
= TRUE
;
1869 vm_page_unlock_queues();
1872 * If there's a pager, then immediately
1873 * page out this page, using the "initialize"
1874 * option. Else, we use the copy.
1879 ((!copy_object
->pager_created
) ||
1880 vm_external_state_get(
1881 copy_object
->existence_map
, copy_offset
)
1882 == VM_EXTERNAL_STATE_ABSENT
)
1884 (!copy_object
->pager_created
)
1887 vm_page_lock_queues();
1888 vm_page_activate(copy_m
);
1889 vm_page_unlock_queues();
1890 PAGE_WAKEUP_DONE(copy_m
);
1893 assert(copy_m
->busy
== TRUE
);
1896 * The page is already ready for pageout:
1897 * not on pageout queues and busy.
1898 * Unlock everything except the
1899 * copy_object itself.
1902 vm_object_unlock(object
);
1905 * Write the page to the copy-object,
1906 * flushing it from the kernel.
1909 vm_pageout_initialize_page(copy_m
);
1912 * Since the pageout may have
1913 * temporarily dropped the
1914 * copy_object's lock, we
1915 * check whether we'll have
1916 * to deallocate the hard way.
1919 if ((copy_object
->shadow
!= object
) ||
1920 (copy_object
->ref_count
== 1)) {
1921 vm_object_unlock(copy_object
);
1922 vm_object_deallocate(copy_object
);
1923 vm_object_lock(object
);
1928 * Pick back up the old object's
1929 * lock. [It is safe to do so,
1930 * since it must be deeper in the
1934 vm_object_lock(object
);
1938 * Because we're pushing a page upward
1939 * in the object tree, we must restart
1940 * any faults that are waiting here.
1941 * [Note that this is an expansion of
1942 * PAGE_WAKEUP that uses the THREAD_RESTART
1943 * wait result]. Can't turn off the page's
1944 * busy bit because we're not done with it.
1949 thread_wakeup_with_result((event_t
) m
,
1955 * The reference count on copy_object must be
1956 * at least 2: one for our extra reference,
1957 * and at least one from the outside world
1958 * (we checked that when we last locked
1961 copy_object
->ref_count
--;
1962 assert(copy_object
->ref_count
> 0);
1963 VM_OBJ_RES_DECR(copy_object
);
1964 vm_object_unlock(copy_object
);
1970 *top_page
= first_m
;
1973 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1974 (integer_t
)object
, offset
, (integer_t
)m
, (integer_t
)first_m
, 0);
1976 * If the page can be written, assume that it will be.
1977 * [Earlier, we restrict the permission to allow write
1978 * access only if the fault so required, so we don't
1979 * mark read-only data as dirty.]
1983 if(m
!= VM_PAGE_NULL
) {
1984 #if !VM_FAULT_STATIC_CONFIG
1985 if (vm_fault_dirty_handling
&& (*protection
& VM_PROT_WRITE
))
1988 if (vm_page_deactivate_behind
)
1989 vm_fault_deactivate_behind(object
, offset
, behavior
);
1991 vm_object_unlock(object
);
1993 thread_interrupt_level(interruptible_state
);
1996 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS
, 0); /* (TEST/DEBUG) */
1998 return(VM_FAULT_SUCCESS
);
2002 vm_fault_cleanup(object
, first_m
);
2004 counter(c_vm_fault_page_block_backoff_kernel
++);
2005 thread_block(THREAD_CONTINUE_NULL
);
2009 thread_interrupt_level(interruptible_state
);
2010 if (wait_result
== THREAD_INTERRUPTED
)
2011 return VM_FAULT_INTERRUPTED
;
2012 return VM_FAULT_RETRY
;
2018 * Routine: vm_fault_tws_insert
2020 * Add fault information to the task working set.
2022 * We always insert the base object/offset pair
2023 * rather the actual object/offset.
2025 * Map and real_map locked.
2026 * Object locked and referenced.
2028 * TRUE if startup file should be written.
2029 * With object locked and still referenced.
2030 * But we may drop the object lock temporarily.
2033 vm_fault_tws_insert(
2036 vm_map_offset_t vaddr
,
2038 vm_object_offset_t offset
)
2040 tws_hash_line_t line
;
2043 boolean_t result
= FALSE
;
2045 /* Avoid possible map lock deadlock issues */
2046 if (map
== kernel_map
|| map
== kalloc_map
||
2047 real_map
== kernel_map
|| real_map
== kalloc_map
)
2050 task
= current_task();
2051 if (task
->dynamic_working_set
!= 0) {
2052 vm_object_t base_object
;
2053 vm_object_t base_shadow
;
2054 vm_object_offset_t base_offset
;
2055 base_object
= object
;
2056 base_offset
= offset
;
2057 while ((base_shadow
= base_object
->shadow
)) {
2058 vm_object_lock(base_shadow
);
2059 vm_object_unlock(base_object
);
2061 base_object
->shadow_offset
;
2062 base_object
= base_shadow
;
2065 task
->dynamic_working_set
,
2066 base_offset
, base_object
,
2068 if (kr
== KERN_OPERATION_TIMED_OUT
){
2070 if (base_object
!= object
) {
2071 vm_object_unlock(base_object
);
2072 vm_object_lock(object
);
2074 } else if (kr
!= KERN_SUCCESS
) {
2075 if(base_object
!= object
)
2076 vm_object_reference_locked(base_object
);
2078 task
->dynamic_working_set
,
2079 base_offset
, base_object
,
2081 if(base_object
!= object
) {
2082 vm_object_unlock(base_object
);
2083 vm_object_deallocate(base_object
);
2085 if(kr
== KERN_NO_SPACE
) {
2086 if (base_object
== object
)
2087 vm_object_unlock(object
);
2088 tws_expand_working_set(
2089 task
->dynamic_working_set
,
2090 TWS_HASH_LINE_COUNT
,
2092 if (base_object
== object
)
2093 vm_object_lock(object
);
2094 } else if(kr
== KERN_OPERATION_TIMED_OUT
) {
2097 if(base_object
!= object
)
2098 vm_object_lock(object
);
2099 } else if (base_object
!= object
) {
2100 vm_object_unlock(base_object
);
2101 vm_object_lock(object
);
2110 * Handle page faults, including pseudo-faults
2111 * used to change the wiring status of pages.
2113 * Explicit continuations have been removed.
2115 * vm_fault and vm_fault_page save mucho state
2116 * in the moral equivalent of a closure. The state
2117 * structure is allocated when first entering vm_fault
2118 * and deallocated when leaving vm_fault.
2121 extern int _map_enter_debug
;
2126 vm_map_offset_t vaddr
,
2127 vm_prot_t fault_type
,
2128 boolean_t change_wiring
,
2131 vm_map_offset_t caller_pmap_addr
)
2133 vm_map_version_t version
; /* Map version for verificiation */
2134 boolean_t wired
; /* Should mapping be wired down? */
2135 vm_object_t object
; /* Top-level object */
2136 vm_object_offset_t offset
; /* Top-level offset */
2137 vm_prot_t prot
; /* Protection for mapping */
2138 vm_behavior_t behavior
; /* Expected paging behavior */
2139 vm_map_offset_t lo_offset
, hi_offset
;
2140 vm_object_t old_copy_object
; /* Saved copy object */
2141 vm_page_t result_page
; /* Result of vm_fault_page */
2142 vm_page_t top_page
; /* Placeholder page */
2146 vm_page_t m
; /* Fast access to result_page */
2147 kern_return_t error_code
= 0; /* page error reasons */
2149 vm_object_t cur_object
;
2151 vm_object_offset_t cur_offset
;
2153 vm_object_t new_object
;
2155 vm_map_t real_map
= map
;
2156 vm_map_t original_map
= map
;
2158 boolean_t interruptible_state
;
2159 unsigned int cache_attr
;
2160 int write_startup_file
= 0;
2161 boolean_t need_activation
;
2162 vm_prot_t full_fault_type
;
2164 if (get_preemption_level() != 0)
2165 return (KERN_FAILURE
);
2167 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_START
,
2174 /* at present we do not fully check for execute permission */
2175 /* we generally treat it is read except in certain device */
2176 /* memory settings */
2177 full_fault_type
= fault_type
;
2178 if(fault_type
& VM_PROT_EXECUTE
) {
2179 fault_type
&= ~VM_PROT_EXECUTE
;
2180 fault_type
|= VM_PROT_READ
;
2183 interruptible_state
= thread_interrupt_level(interruptible
);
2186 * assume we will hit a page in the cache
2187 * otherwise, explicitly override with
2188 * the real fault type once we determine it
2190 type_of_fault
= DBG_CACHE_HIT_FAULT
;
2193 current_task()->faults
++;
2198 * Find the backing store object and offset into
2199 * it to begin the search.
2202 vm_map_lock_read(map
);
2203 kr
= vm_map_lookup_locked(&map
, vaddr
, fault_type
, &version
,
2206 &behavior
, &lo_offset
, &hi_offset
, &real_map
);
2208 //if (_map_enter_debug)printf("vm_map_lookup_locked(map=0x%x, addr=0x%llx, prot=%d wired=%d) = %d\n", map, vaddr, prot, wired, kr);
2210 pmap
= real_map
->pmap
;
2212 if (kr
!= KERN_SUCCESS
) {
2213 vm_map_unlock_read(map
);
2218 * If the page is wired, we must fault for the current protection
2219 * value, to avoid further faults.
2223 fault_type
= prot
| VM_PROT_WRITE
;
2225 #if VM_FAULT_CLASSIFY
2227 * Temporary data gathering code
2229 vm_fault_classify(object
, offset
, fault_type
);
2232 * Fast fault code. The basic idea is to do as much as
2233 * possible while holding the map lock and object locks.
2234 * Busy pages are not used until the object lock has to
2235 * be dropped to do something (copy, zero fill, pmap enter).
2236 * Similarly, paging references aren't acquired until that
2237 * point, and object references aren't used.
2239 * If we can figure out what to do
2240 * (zero fill, copy on write, pmap enter) while holding
2241 * the locks, then it gets done. Otherwise, we give up,
2242 * and use the original fault path (which doesn't hold
2243 * the map lock, and relies on busy pages).
2244 * The give up cases include:
2245 * - Have to talk to pager.
2246 * - Page is busy, absent or in error.
2247 * - Pager has locked out desired access.
2248 * - Fault needs to be restarted.
2249 * - Have to push page into copy object.
2251 * The code is an infinite loop that moves one level down
2252 * the shadow chain each time. cur_object and cur_offset
2253 * refer to the current object being examined. object and offset
2254 * are the original object from the map. The loop is at the
2255 * top level if and only if object and cur_object are the same.
2257 * Invariants: Map lock is held throughout. Lock is held on
2258 * original object and cur_object (if different) when
2259 * continuing or exiting loop.
2265 * If this page is to be inserted in a copy delay object
2266 * for writing, and if the object has a copy, then the
2267 * copy delay strategy is implemented in the slow fault page.
2269 if (object
->copy_strategy
!= MEMORY_OBJECT_COPY_DELAY
||
2270 object
->copy
== VM_OBJECT_NULL
||
2271 (fault_type
& VM_PROT_WRITE
) == 0) {
2272 cur_object
= object
;
2273 cur_offset
= offset
;
2276 m
= vm_page_lookup(cur_object
, cur_offset
);
2277 if (m
!= VM_PAGE_NULL
) {
2279 wait_result_t result
;
2281 if (object
!= cur_object
)
2282 vm_object_unlock(object
);
2284 vm_map_unlock_read(map
);
2285 if (real_map
!= map
)
2286 vm_map_unlock(real_map
);
2288 #if !VM_FAULT_STATIC_CONFIG
2289 if (!vm_fault_interruptible
)
2290 interruptible
= THREAD_UNINT
;
2292 result
= PAGE_ASSERT_WAIT(m
, interruptible
);
2294 vm_object_unlock(cur_object
);
2296 if (result
== THREAD_WAITING
) {
2297 result
= thread_block(THREAD_CONTINUE_NULL
);
2299 counter(c_vm_fault_page_block_busy_kernel
++);
2301 if (result
== THREAD_AWAKENED
|| result
== THREAD_RESTART
)
2307 if (m
->unusual
&& (m
->error
|| m
->restart
|| m
->private
2308 || m
->absent
|| (fault_type
& m
->page_lock
))) {
2311 * Unusual case. Give up.
2319 * We've soft-faulted (because it's not in the page
2320 * table) on an encrypted page.
2321 * Keep the page "busy" so that noone messes with
2322 * it during the decryption.
2323 * Release the extra locks we're holding, keep only
2324 * the page's VM object lock.
2327 if (object
!= cur_object
) {
2328 vm_object_unlock(object
);
2330 vm_map_unlock_read(map
);
2331 if (real_map
!= map
)
2332 vm_map_unlock(real_map
);
2334 vm_page_decrypt(m
, 0);
2337 PAGE_WAKEUP_DONE(m
);
2338 vm_object_unlock(m
->object
);
2341 * Retry from the top, in case anything
2342 * changed while we were decrypting...
2346 ASSERT_PAGE_DECRYPTED(m
);
2349 * Two cases of map in faults:
2350 * - At top level w/o copy object.
2351 * - Read fault anywhere.
2352 * --> must disallow write.
2355 if (object
== cur_object
&&
2356 object
->copy
== VM_OBJECT_NULL
)
2357 goto FastMapInFault
;
2359 if ((fault_type
& VM_PROT_WRITE
) == 0) {
2360 boolean_t sequential
;
2362 prot
&= ~VM_PROT_WRITE
;
2365 * Set up to map the page ...
2366 * mark the page busy, drop
2367 * locks and take a paging reference
2368 * on the object with the page.
2371 if (object
!= cur_object
) {
2372 vm_object_unlock(object
);
2373 object
= cur_object
;
2378 vm_object_paging_begin(object
);
2382 * Check a couple of global reasons to
2383 * be conservative about write access.
2384 * Then do the pmap_enter.
2386 #if !VM_FAULT_STATIC_CONFIG
2387 if (vm_fault_dirty_handling
2389 || db_watchpoint_list
2391 && (fault_type
& VM_PROT_WRITE
) == 0)
2392 prot
&= ~VM_PROT_WRITE
;
2393 #else /* STATIC_CONFIG */
2395 if (db_watchpoint_list
2396 && (fault_type
& VM_PROT_WRITE
) == 0)
2397 prot
&= ~VM_PROT_WRITE
;
2398 #endif /* MACH_KDB */
2399 #endif /* STATIC_CONFIG */
2400 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
2403 need_activation
= FALSE
;
2405 if (m
->no_isync
== TRUE
) {
2406 m
->no_isync
= FALSE
;
2407 pmap_sync_page_data_phys(m
->phys_page
);
2409 if ((type_of_fault
== DBG_CACHE_HIT_FAULT
) && m
->clustered
) {
2411 * found it in the cache, but this
2412 * is the first fault-in of the page (no_isync == TRUE)
2413 * so it must have come in as part of
2414 * a cluster... account 1 pagein against it
2417 current_task()->pageins
++;
2418 type_of_fault
= DBG_PAGEIN_FAULT
;
2422 need_activation
= TRUE
;
2424 } else if (cache_attr
!= VM_WIMG_DEFAULT
) {
2425 pmap_sync_page_attributes_phys(m
->phys_page
);
2429 PMAP_ENTER(caller_pmap
,
2430 caller_pmap_addr
, m
,
2431 prot
, cache_attr
, wired
);
2433 PMAP_ENTER(pmap
, vaddr
, m
,
2434 prot
, cache_attr
, wired
);
2438 * Hold queues lock to manipulate
2439 * the page queues. Change wiring
2440 * case is obvious. In soft ref bits
2441 * case activate page only if it fell
2442 * off paging queues, otherwise just
2443 * activate it if it's inactive.
2445 * NOTE: original vm_fault code will
2446 * move active page to back of active
2447 * queue. This code doesn't.
2449 vm_page_lock_queues();
2452 vm_pagein_cluster_used
++;
2453 m
->clustered
= FALSE
;
2455 m
->reference
= TRUE
;
2457 if (change_wiring
) {
2463 #if VM_FAULT_STATIC_CONFIG
2465 if ((!m
->active
&& !m
->inactive
) || ((need_activation
== TRUE
) && !m
->active
))
2466 vm_page_activate(m
);
2469 else if (software_reference_bits
) {
2470 if (!m
->active
&& !m
->inactive
)
2471 vm_page_activate(m
);
2473 else if (!m
->active
) {
2474 vm_page_activate(m
);
2477 vm_page_unlock_queues();
2480 * That's it, clean up and return.
2482 PAGE_WAKEUP_DONE(m
);
2484 sequential
= (sequential
&& vm_page_deactivate_behind
) ?
2485 vm_fault_deactivate_behind(object
, cur_offset
, behavior
) :
2489 * Add non-sequential pages to the working set.
2490 * The sequential pages will be brought in through
2491 * normal clustering behavior.
2493 if (!sequential
&& !object
->private) {
2494 write_startup_file
=
2495 vm_fault_tws_insert(map
, real_map
, vaddr
,
2496 object
, cur_offset
);
2499 vm_object_paging_end(object
);
2500 vm_object_unlock(object
);
2502 vm_map_unlock_read(map
);
2504 vm_map_unlock(real_map
);
2506 if(write_startup_file
)
2507 tws_send_startup_info(current_task());
2509 thread_interrupt_level(interruptible_state
);
2512 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
2514 type_of_fault
& 0xff,
2519 return KERN_SUCCESS
;
2523 * Copy on write fault. If objects match, then
2524 * object->copy must not be NULL (else control
2525 * would be in previous code block), and we
2526 * have a potential push into the copy object
2527 * with which we won't cope here.
2530 if (cur_object
== object
)
2533 * This is now a shadow based copy on write
2534 * fault -- it requires a copy up the shadow
2537 * Allocate a page in the original top level
2538 * object. Give up if allocate fails. Also
2539 * need to remember current page, as it's the
2540 * source of the copy.
2544 if (m
== VM_PAGE_NULL
) {
2548 * Now do the copy. Mark the source busy
2549 * and take out paging references on both
2552 * NOTE: This code holds the map lock across
2557 vm_page_copy(cur_m
, m
);
2558 vm_page_insert(m
, object
, offset
);
2560 vm_object_paging_begin(cur_object
);
2561 vm_object_paging_begin(object
);
2563 type_of_fault
= DBG_COW_FAULT
;
2564 VM_STAT(cow_faults
++);
2565 current_task()->cow_faults
++;
2568 * Now cope with the source page and object
2569 * If the top object has a ref count of 1
2570 * then no other map can access it, and hence
2571 * it's not necessary to do the pmap_disconnect.
2574 vm_page_lock_queues();
2575 vm_page_deactivate(cur_m
);
2577 pmap_disconnect(cur_m
->phys_page
);
2578 vm_page_unlock_queues();
2580 PAGE_WAKEUP_DONE(cur_m
);
2581 vm_object_paging_end(cur_object
);
2582 vm_object_unlock(cur_object
);
2585 * Slight hack to call vm_object collapse
2586 * and then reuse common map in code.
2587 * note that the object lock was taken above.
2590 vm_object_paging_end(object
);
2591 vm_object_collapse(object
, offset
);
2592 vm_object_paging_begin(object
);
2599 * No page at cur_object, cur_offset
2602 if (cur_object
->pager_created
) {
2605 * Have to talk to the pager. Give up.
2611 if (cur_object
->shadow
== VM_OBJECT_NULL
) {
2613 if (cur_object
->shadow_severed
) {
2614 vm_object_paging_end(object
);
2615 vm_object_unlock(object
);
2616 vm_map_unlock_read(map
);
2618 vm_map_unlock(real_map
);
2620 if(write_startup_file
)
2621 tws_send_startup_info(
2624 thread_interrupt_level(interruptible_state
);
2626 return KERN_MEMORY_ERROR
;
2630 * Zero fill fault. Page gets
2631 * filled in top object. Insert
2632 * page, then drop any lower lock.
2633 * Give up if no page.
2635 if (VM_PAGE_THROTTLED()) {
2640 * are we protecting the system from
2641 * backing store exhaustion. If so
2642 * sleep unless we are privileged.
2644 if(vm_backing_store_low
) {
2645 if(!(current_task()->priv_flags
2646 & VM_BACKING_STORE_PRIV
))
2649 m
= vm_page_alloc(object
, offset
);
2650 if (m
== VM_PAGE_NULL
) {
2654 * This is a zero-fill or initial fill
2655 * page fault. As such, we consider it
2656 * undefined with respect to instruction
2657 * execution. i.e. it is the responsibility
2658 * of higher layers to call for an instruction
2659 * sync after changing the contents and before
2660 * sending a program into this area. We
2661 * choose this approach for performance
2664 m
->no_isync
= FALSE
;
2666 if (cur_object
!= object
)
2667 vm_object_unlock(cur_object
);
2669 vm_object_paging_begin(object
);
2670 vm_object_unlock(object
);
2673 * Now zero fill page and map it.
2674 * the page is probably going to
2675 * be written soon, so don't bother
2676 * to clear the modified bit
2678 * NOTE: This code holds the map
2679 * lock across the zero fill.
2682 if (!map
->no_zero_fill
) {
2683 vm_page_zero_fill(m
);
2684 type_of_fault
= DBG_ZERO_FILL_FAULT
;
2685 VM_STAT(zero_fill_count
++);
2687 vm_page_lock_queues();
2688 VM_PAGE_QUEUES_REMOVE(m
);
2690 m
->page_ticket
= vm_page_ticket
;
2691 assert(!m
->laundry
);
2692 assert(m
->object
!= kernel_object
);
2693 assert(m
->pageq
.next
== NULL
&&
2694 m
->pageq
.prev
== NULL
);
2695 if(m
->object
->size
> 0x200000) {
2696 m
->zero_fill
= TRUE
;
2697 /* depends on the queues lock */
2699 queue_enter(&vm_page_queue_zf
,
2700 m
, vm_page_t
, pageq
);
2703 &vm_page_queue_inactive
,
2704 m
, vm_page_t
, pageq
);
2706 vm_page_ticket_roll
++;
2707 if(vm_page_ticket_roll
==
2708 VM_PAGE_TICKETS_IN_ROLL
) {
2709 vm_page_ticket_roll
= 0;
2710 if(vm_page_ticket
==
2711 VM_PAGE_TICKET_ROLL_IDS
)
2718 vm_page_inactive_count
++;
2719 vm_page_unlock_queues();
2720 vm_object_lock(object
);
2726 * On to the next level
2729 cur_offset
+= cur_object
->shadow_offset
;
2730 new_object
= cur_object
->shadow
;
2731 vm_object_lock(new_object
);
2732 if (cur_object
!= object
)
2733 vm_object_unlock(cur_object
);
2734 cur_object
= new_object
;
2741 * Cleanup from fast fault failure. Drop any object
2742 * lock other than original and drop map lock.
2745 if (object
!= cur_object
)
2746 vm_object_unlock(cur_object
);
2748 vm_map_unlock_read(map
);
2751 vm_map_unlock(real_map
);
2754 * Make a reference to this object to
2755 * prevent its disposal while we are messing with
2756 * it. Once we have the reference, the map is free
2757 * to be diddled. Since objects reference their
2758 * shadows (and copies), they will stay around as well.
2761 assert(object
->ref_count
> 0);
2762 object
->ref_count
++;
2763 vm_object_res_reference(object
);
2764 vm_object_paging_begin(object
);
2766 XPR(XPR_VM_FAULT
,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2768 if (!object
->private) {
2769 write_startup_file
=
2770 vm_fault_tws_insert(map
, real_map
, vaddr
, object
, offset
);
2773 kr
= vm_fault_page(object
, offset
, fault_type
,
2774 (change_wiring
&& !wired
),
2776 lo_offset
, hi_offset
, behavior
,
2777 &prot
, &result_page
, &top_page
,
2779 &error_code
, map
->no_zero_fill
, FALSE
, map
, vaddr
);
2782 * If we didn't succeed, lose the object reference immediately.
2785 if (kr
!= VM_FAULT_SUCCESS
)
2786 vm_object_deallocate(object
);
2789 * See why we failed, and take corrective action.
2793 case VM_FAULT_SUCCESS
:
2795 case VM_FAULT_MEMORY_SHORTAGE
:
2796 if (vm_page_wait((change_wiring
) ?
2801 case VM_FAULT_INTERRUPTED
:
2804 case VM_FAULT_RETRY
:
2806 case VM_FAULT_FICTITIOUS_SHORTAGE
:
2807 vm_page_more_fictitious();
2809 case VM_FAULT_MEMORY_ERROR
:
2813 kr
= KERN_MEMORY_ERROR
;
2819 if(m
!= VM_PAGE_NULL
) {
2820 assert((change_wiring
&& !wired
) ?
2821 (top_page
== VM_PAGE_NULL
) :
2822 ((top_page
== VM_PAGE_NULL
) == (m
->object
== object
)));
2826 * How to clean up the result of vm_fault_page. This
2827 * happens whether the mapping is entered or not.
2830 #define UNLOCK_AND_DEALLOCATE \
2832 vm_fault_cleanup(m->object, top_page); \
2833 vm_object_deallocate(object); \
2837 * What to do with the resulting page from vm_fault_page
2838 * if it doesn't get entered into the physical map:
2841 #define RELEASE_PAGE(m) \
2843 PAGE_WAKEUP_DONE(m); \
2844 vm_page_lock_queues(); \
2845 if (!m->active && !m->inactive) \
2846 vm_page_activate(m); \
2847 vm_page_unlock_queues(); \
2851 * We must verify that the maps have not changed
2852 * since our last lookup.
2855 if(m
!= VM_PAGE_NULL
) {
2856 old_copy_object
= m
->object
->copy
;
2857 vm_object_unlock(m
->object
);
2859 old_copy_object
= VM_OBJECT_NULL
;
2861 if ((map
!= original_map
) || !vm_map_verify(map
, &version
)) {
2862 vm_object_t retry_object
;
2863 vm_object_offset_t retry_offset
;
2864 vm_prot_t retry_prot
;
2867 * To avoid trying to write_lock the map while another
2868 * thread has it read_locked (in vm_map_pageable), we
2869 * do not try for write permission. If the page is
2870 * still writable, we will get write permission. If it
2871 * is not, or has been marked needs_copy, we enter the
2872 * mapping without write permission, and will merely
2873 * take another fault.
2876 vm_map_lock_read(map
);
2877 kr
= vm_map_lookup_locked(&map
, vaddr
,
2878 fault_type
& ~VM_PROT_WRITE
, &version
,
2879 &retry_object
, &retry_offset
, &retry_prot
,
2880 &wired
, &behavior
, &lo_offset
, &hi_offset
,
2882 pmap
= real_map
->pmap
;
2884 if (kr
!= KERN_SUCCESS
) {
2885 vm_map_unlock_read(map
);
2886 if(m
!= VM_PAGE_NULL
) {
2887 vm_object_lock(m
->object
);
2889 UNLOCK_AND_DEALLOCATE
;
2891 vm_object_deallocate(object
);
2896 vm_object_unlock(retry_object
);
2897 if(m
!= VM_PAGE_NULL
) {
2898 vm_object_lock(m
->object
);
2900 vm_object_lock(object
);
2903 if ((retry_object
!= object
) ||
2904 (retry_offset
!= offset
)) {
2905 vm_map_unlock_read(map
);
2907 vm_map_unlock(real_map
);
2908 if(m
!= VM_PAGE_NULL
) {
2910 UNLOCK_AND_DEALLOCATE
;
2912 vm_object_deallocate(object
);
2918 * Check whether the protection has changed or the object
2919 * has been copied while we left the map unlocked.
2922 if(m
!= VM_PAGE_NULL
) {
2923 vm_object_unlock(m
->object
);
2925 vm_object_unlock(object
);
2928 if(m
!= VM_PAGE_NULL
) {
2929 vm_object_lock(m
->object
);
2931 vm_object_lock(object
);
2935 * If the copy object changed while the top-level object
2936 * was unlocked, then we must take away write permission.
2939 if(m
!= VM_PAGE_NULL
) {
2940 if (m
->object
->copy
!= old_copy_object
)
2941 prot
&= ~VM_PROT_WRITE
;
2945 * If we want to wire down this page, but no longer have
2946 * adequate permissions, we must start all over.
2949 if (wired
&& (fault_type
!= (prot
|VM_PROT_WRITE
))) {
2950 vm_map_verify_done(map
, &version
);
2952 vm_map_unlock(real_map
);
2953 if(m
!= VM_PAGE_NULL
) {
2955 UNLOCK_AND_DEALLOCATE
;
2957 vm_object_deallocate(object
);
2963 * Put this page into the physical map.
2964 * We had to do the unlock above because pmap_enter
2965 * may cause other faults. The page may be on
2966 * the pageout queues. If the pageout daemon comes
2967 * across the page, it will remove it from the queues.
2969 need_activation
= FALSE
;
2971 if (m
!= VM_PAGE_NULL
) {
2972 if (m
->no_isync
== TRUE
) {
2973 pmap_sync_page_data_phys(m
->phys_page
);
2975 if ((type_of_fault
== DBG_CACHE_HIT_FAULT
) && m
->clustered
) {
2977 * found it in the cache, but this
2978 * is the first fault-in of the page (no_isync == TRUE)
2979 * so it must have come in as part of
2980 * a cluster... account 1 pagein against it
2983 current_task()->pageins
++;
2985 type_of_fault
= DBG_PAGEIN_FAULT
;
2988 need_activation
= TRUE
;
2990 m
->no_isync
= FALSE
;
2992 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
2995 PMAP_ENTER(caller_pmap
,
2996 caller_pmap_addr
, m
,
2997 prot
, cache_attr
, wired
);
2999 PMAP_ENTER(pmap
, vaddr
, m
,
3000 prot
, cache_attr
, wired
);
3004 * Add working set information for private objects here.
3006 if (m
->object
->private) {
3007 write_startup_file
=
3008 vm_fault_tws_insert(map
, real_map
, vaddr
,
3009 m
->object
, m
->offset
);
3014 vm_map_entry_t entry
;
3015 vm_map_offset_t laddr
;
3016 vm_map_offset_t ldelta
, hdelta
;
3019 * do a pmap block mapping from the physical address
3023 /* While we do not worry about execution protection in */
3024 /* general, certian pages may have instruction execution */
3025 /* disallowed. We will check here, and if not allowed */
3026 /* to execute, we return with a protection failure. */
3028 if((full_fault_type
& VM_PROT_EXECUTE
) &&
3029 (!pmap_eligible_for_execute((ppnum_t
)
3030 (object
->shadow_offset
>> 12)))) {
3032 vm_map_verify_done(map
, &version
);
3034 vm_map_unlock(real_map
);
3035 vm_fault_cleanup(object
, top_page
);
3036 vm_object_deallocate(object
);
3037 kr
= KERN_PROTECTION_FAILURE
;
3041 if(real_map
!= map
) {
3042 vm_map_unlock(real_map
);
3044 if (original_map
!= map
) {
3045 vm_map_unlock_read(map
);
3046 vm_map_lock_read(original_map
);
3052 hdelta
= 0xFFFFF000;
3053 ldelta
= 0xFFFFF000;
3056 while(vm_map_lookup_entry(map
, laddr
, &entry
)) {
3057 if(ldelta
> (laddr
- entry
->vme_start
))
3058 ldelta
= laddr
- entry
->vme_start
;
3059 if(hdelta
> (entry
->vme_end
- laddr
))
3060 hdelta
= entry
->vme_end
- laddr
;
3061 if(entry
->is_sub_map
) {
3063 laddr
= (laddr
- entry
->vme_start
)
3065 vm_map_lock_read(entry
->object
.sub_map
);
3067 vm_map_unlock_read(map
);
3068 if(entry
->use_pmap
) {
3069 vm_map_unlock_read(real_map
);
3070 real_map
= entry
->object
.sub_map
;
3072 map
= entry
->object
.sub_map
;
3079 if(vm_map_lookup_entry(map
, laddr
, &entry
) &&
3080 (entry
->object
.vm_object
!= NULL
) &&
3081 (entry
->object
.vm_object
== object
)) {
3085 /* Set up a block mapped area */
3086 pmap_map_block(caller_pmap
,
3087 (addr64_t
)(caller_pmap_addr
- ldelta
),
3089 (entry
->object
.vm_object
->shadow_offset
))
3091 (laddr
- entry
->vme_start
)
3093 ((ldelta
+ hdelta
) >> 12), prot
,
3094 (VM_WIMG_MASK
& (int)object
->wimg_bits
), 0);
3096 /* Set up a block mapped area */
3097 pmap_map_block(real_map
->pmap
,
3098 (addr64_t
)(vaddr
- ldelta
),
3100 (entry
->object
.vm_object
->shadow_offset
))
3102 (laddr
- entry
->vme_start
) - ldelta
) >> 12,
3103 ((ldelta
+ hdelta
) >> 12), prot
,
3104 (VM_WIMG_MASK
& (int)object
->wimg_bits
), 0);
3110 pmap_enter(caller_pmap
, caller_pmap_addr
,
3111 object
->shadow_offset
>>12, prot
, 0, TRUE
);
3113 pmap_enter(pmap
, vaddr
,
3114 object
->shadow_offset
>>12, prot
, 0, TRUE
);
3123 * If the page is not wired down and isn't already
3124 * on a pageout queue, then put it where the
3125 * pageout daemon can find it.
3127 if(m
!= VM_PAGE_NULL
) {
3128 vm_page_lock_queues();
3131 vm_pagein_cluster_used
++;
3132 m
->clustered
= FALSE
;
3134 m
->reference
= TRUE
;
3136 if (change_wiring
) {
3142 #if VM_FAULT_STATIC_CONFIG
3144 if ((!m
->active
&& !m
->inactive
) || ((need_activation
== TRUE
) && !m
->active
))
3145 vm_page_activate(m
);
3148 else if (software_reference_bits
) {
3149 if (!m
->active
&& !m
->inactive
)
3150 vm_page_activate(m
);
3151 m
->reference
= TRUE
;
3153 vm_page_activate(m
);
3156 vm_page_unlock_queues();
3160 * Unlock everything, and return
3163 vm_map_verify_done(map
, &version
);
3165 vm_map_unlock(real_map
);
3166 if(m
!= VM_PAGE_NULL
) {
3167 PAGE_WAKEUP_DONE(m
);
3168 UNLOCK_AND_DEALLOCATE
;
3170 vm_fault_cleanup(object
, top_page
);
3171 vm_object_deallocate(object
);
3175 #undef UNLOCK_AND_DEALLOCATE
3179 if(write_startup_file
)
3180 tws_send_startup_info(current_task());
3182 thread_interrupt_level(interruptible_state
);
3184 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
3186 type_of_fault
& 0xff,
3197 * Wire down a range of virtual addresses in a map.
3202 vm_map_entry_t entry
,
3204 vm_map_offset_t pmap_addr
)
3207 register vm_map_offset_t va
;
3208 register vm_map_offset_t end_addr
= entry
->vme_end
;
3209 register kern_return_t rc
;
3211 assert(entry
->in_transition
);
3213 if ((entry
->object
.vm_object
!= NULL
) &&
3214 !entry
->is_sub_map
&&
3215 entry
->object
.vm_object
->phys_contiguous
) {
3216 return KERN_SUCCESS
;
3220 * Inform the physical mapping system that the
3221 * range of addresses may not fault, so that
3222 * page tables and such can be locked down as well.
3225 pmap_pageable(pmap
, pmap_addr
,
3226 pmap_addr
+ (end_addr
- entry
->vme_start
), FALSE
);
3229 * We simulate a fault to get the page and enter it
3230 * in the physical map.
3233 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
3234 if ((rc
= vm_fault_wire_fast(
3235 map
, va
, entry
, pmap
,
3236 pmap_addr
+ (va
- entry
->vme_start
)
3237 )) != KERN_SUCCESS
) {
3238 rc
= vm_fault(map
, va
, VM_PROT_NONE
, TRUE
,
3239 (pmap
== kernel_pmap
) ?
3240 THREAD_UNINT
: THREAD_ABORTSAFE
,
3241 pmap
, pmap_addr
+ (va
- entry
->vme_start
));
3244 if (rc
!= KERN_SUCCESS
) {
3245 struct vm_map_entry tmp_entry
= *entry
;
3247 /* unwire wired pages */
3248 tmp_entry
.vme_end
= va
;
3249 vm_fault_unwire(map
,
3250 &tmp_entry
, FALSE
, pmap
, pmap_addr
);
3255 return KERN_SUCCESS
;
3261 * Unwire a range of virtual addresses in a map.
3266 vm_map_entry_t entry
,
3267 boolean_t deallocate
,
3269 vm_map_offset_t pmap_addr
)
3271 register vm_map_offset_t va
;
3272 register vm_map_offset_t end_addr
= entry
->vme_end
;
3275 object
= (entry
->is_sub_map
)
3276 ? VM_OBJECT_NULL
: entry
->object
.vm_object
;
3279 * Since the pages are wired down, we must be able to
3280 * get their mappings from the physical map system.
3283 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
3284 pmap_change_wiring(pmap
,
3285 pmap_addr
+ (va
- entry
->vme_start
), FALSE
);
3287 if (object
== VM_OBJECT_NULL
) {
3288 (void) vm_fault(map
, va
, VM_PROT_NONE
,
3289 TRUE
, THREAD_UNINT
, pmap
, pmap_addr
);
3290 } else if (object
->phys_contiguous
) {
3294 vm_page_t result_page
;
3296 vm_object_t result_object
;
3297 vm_fault_return_t result
;
3300 prot
= VM_PROT_NONE
;
3302 vm_object_lock(object
);
3303 vm_object_paging_begin(object
);
3305 "vm_fault_unwire -> vm_fault_page\n",
3307 result
= vm_fault_page(object
,
3309 (va
- entry
->vme_start
),
3315 - entry
->vme_start
),
3321 0, map
->no_zero_fill
,
3323 } while (result
== VM_FAULT_RETRY
);
3325 if (result
!= VM_FAULT_SUCCESS
)
3326 panic("vm_fault_unwire: failure");
3328 result_object
= result_page
->object
;
3330 assert(!result_page
->fictitious
);
3331 pmap_disconnect(result_page
->phys_page
);
3332 VM_PAGE_FREE(result_page
);
3334 vm_page_lock_queues();
3335 vm_page_unwire(result_page
);
3336 vm_page_unlock_queues();
3337 PAGE_WAKEUP_DONE(result_page
);
3340 vm_fault_cleanup(result_object
, top_page
);
3345 * Inform the physical mapping system that the range
3346 * of addresses may fault, so that page tables and
3347 * such may be unwired themselves.
3350 pmap_pageable(pmap
, pmap_addr
,
3351 pmap_addr
+ (end_addr
- entry
->vme_start
), TRUE
);
3356 * vm_fault_wire_fast:
3358 * Handle common case of a wire down page fault at the given address.
3359 * If successful, the page is inserted into the associated physical map.
3360 * The map entry is passed in to avoid the overhead of a map lookup.
3362 * NOTE: the given address should be truncated to the
3363 * proper page address.
3365 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3366 * a standard error specifying why the fault is fatal is returned.
3368 * The map in question must be referenced, and remains so.
3369 * Caller has a read lock on the map.
3371 * This is a stripped version of vm_fault() for wiring pages. Anything
3372 * other than the common case will return KERN_FAILURE, and the caller
3373 * is expected to call vm_fault().
3377 __unused vm_map_t map
,
3379 vm_map_entry_t entry
,
3381 vm_map_offset_t pmap_addr
)
3384 vm_object_offset_t offset
;
3385 register vm_page_t m
;
3387 thread_t thread
= current_thread();
3388 unsigned int cache_attr
;
3392 if (thread
!= THREAD_NULL
&& thread
->task
!= TASK_NULL
)
3393 thread
->task
->faults
++;
3400 #define RELEASE_PAGE(m) { \
3401 PAGE_WAKEUP_DONE(m); \
3402 vm_page_lock_queues(); \
3403 vm_page_unwire(m); \
3404 vm_page_unlock_queues(); \
3408 #undef UNLOCK_THINGS
3409 #define UNLOCK_THINGS { \
3410 vm_object_paging_end(object); \
3411 vm_object_unlock(object); \
3414 #undef UNLOCK_AND_DEALLOCATE
3415 #define UNLOCK_AND_DEALLOCATE { \
3417 vm_object_deallocate(object); \
3420 * Give up and have caller do things the hard way.
3424 UNLOCK_AND_DEALLOCATE; \
3425 return(KERN_FAILURE); \
3430 * If this entry is not directly to a vm_object, bail out.
3432 if (entry
->is_sub_map
)
3433 return(KERN_FAILURE
);
3436 * Find the backing store object and offset into it.
3439 object
= entry
->object
.vm_object
;
3440 offset
= (va
- entry
->vme_start
) + entry
->offset
;
3441 prot
= entry
->protection
;
3444 * Make a reference to this object to prevent its
3445 * disposal while we are messing with it.
3448 vm_object_lock(object
);
3449 assert(object
->ref_count
> 0);
3450 object
->ref_count
++;
3451 vm_object_res_reference(object
);
3452 vm_object_paging_begin(object
);
3455 * INVARIANTS (through entire routine):
3457 * 1) At all times, we must either have the object
3458 * lock or a busy page in some object to prevent
3459 * some other thread from trying to bring in
3462 * 2) Once we have a busy page, we must remove it from
3463 * the pageout queues, so that the pageout daemon
3464 * will not grab it away.
3469 * Look for page in top-level object. If it's not there or
3470 * there's something going on, give up.
3471 * ENCRYPTED SWAP: use the slow fault path, since we'll need to
3472 * decrypt the page before wiring it down.
3474 m
= vm_page_lookup(object
, offset
);
3475 if ((m
== VM_PAGE_NULL
) || (m
->busy
) || (m
->encrypted
) ||
3476 (m
->unusual
&& ( m
->error
|| m
->restart
|| m
->absent
||
3477 prot
& m
->page_lock
))) {
3481 ASSERT_PAGE_DECRYPTED(m
);
3484 * Wire the page down now. All bail outs beyond this
3485 * point must unwire the page.
3488 vm_page_lock_queues();
3490 vm_page_unlock_queues();
3493 * Mark page busy for other threads.
3500 * Give up if the page is being written and there's a copy object
3502 if ((object
->copy
!= VM_OBJECT_NULL
) && (prot
& VM_PROT_WRITE
)) {
3508 * Put this page into the physical map.
3509 * We have to unlock the object because pmap_enter
3510 * may cause other faults.
3512 if (m
->no_isync
== TRUE
) {
3513 pmap_sync_page_data_phys(m
->phys_page
);
3515 m
->no_isync
= FALSE
;
3518 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
3520 PMAP_ENTER(pmap
, pmap_addr
, m
, prot
, cache_attr
, TRUE
);
3523 * Unlock everything, and return
3526 PAGE_WAKEUP_DONE(m
);
3527 UNLOCK_AND_DEALLOCATE
;
3529 return(KERN_SUCCESS
);
3534 * Routine: vm_fault_copy_cleanup
3536 * Release a page used by vm_fault_copy.
3540 vm_fault_copy_cleanup(
3544 vm_object_t object
= page
->object
;
3546 vm_object_lock(object
);
3547 PAGE_WAKEUP_DONE(page
);
3548 vm_page_lock_queues();
3549 if (!page
->active
&& !page
->inactive
)
3550 vm_page_activate(page
);
3551 vm_page_unlock_queues();
3552 vm_fault_cleanup(object
, top_page
);
3556 vm_fault_copy_dst_cleanup(
3561 if (page
!= VM_PAGE_NULL
) {
3562 object
= page
->object
;
3563 vm_object_lock(object
);
3564 vm_page_lock_queues();
3565 vm_page_unwire(page
);
3566 vm_page_unlock_queues();
3567 vm_object_paging_end(object
);
3568 vm_object_unlock(object
);
3573 * Routine: vm_fault_copy
3576 * Copy pages from one virtual memory object to another --
3577 * neither the source nor destination pages need be resident.
3579 * Before actually copying a page, the version associated with
3580 * the destination address map wil be verified.
3582 * In/out conditions:
3583 * The caller must hold a reference, but not a lock, to
3584 * each of the source and destination objects and to the
3588 * Returns KERN_SUCCESS if no errors were encountered in
3589 * reading or writing the data. Returns KERN_INTERRUPTED if
3590 * the operation was interrupted (only possible if the
3591 * "interruptible" argument is asserted). Other return values
3592 * indicate a permanent error in copying the data.
3594 * The actual amount of data copied will be returned in the
3595 * "copy_size" argument. In the event that the destination map
3596 * verification failed, this amount may be less than the amount
3601 vm_object_t src_object
,
3602 vm_object_offset_t src_offset
,
3603 vm_map_size_t
*copy_size
, /* INOUT */
3604 vm_object_t dst_object
,
3605 vm_object_offset_t dst_offset
,
3607 vm_map_version_t
*dst_version
,
3610 vm_page_t result_page
;
3613 vm_page_t src_top_page
;
3617 vm_page_t dst_top_page
;
3620 vm_map_size_t amount_left
;
3621 vm_object_t old_copy_object
;
3622 kern_return_t error
= 0;
3624 vm_map_size_t part_size
;
3627 * In order not to confuse the clustered pageins, align
3628 * the different offsets on a page boundary.
3630 vm_object_offset_t src_lo_offset
= vm_object_trunc_page(src_offset
);
3631 vm_object_offset_t dst_lo_offset
= vm_object_trunc_page(dst_offset
);
3632 vm_object_offset_t src_hi_offset
= vm_object_round_page(src_offset
+ *copy_size
);
3633 vm_object_offset_t dst_hi_offset
= vm_object_round_page(dst_offset
+ *copy_size
);
3637 *copy_size -= amount_left; \
3641 amount_left
= *copy_size
;
3642 do { /* while (amount_left > 0) */
3644 * There may be a deadlock if both source and destination
3645 * pages are the same. To avoid this deadlock, the copy must
3646 * start by getting the destination page in order to apply
3647 * COW semantics if any.
3650 RetryDestinationFault
: ;
3652 dst_prot
= VM_PROT_WRITE
|VM_PROT_READ
;
3654 vm_object_lock(dst_object
);
3655 vm_object_paging_begin(dst_object
);
3657 XPR(XPR_VM_FAULT
,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3658 switch (vm_fault_page(dst_object
,
3659 vm_object_trunc_page(dst_offset
),
3660 VM_PROT_WRITE
|VM_PROT_READ
,
3665 VM_BEHAVIOR_SEQUENTIAL
,
3671 dst_map
->no_zero_fill
,
3673 case VM_FAULT_SUCCESS
:
3675 case VM_FAULT_RETRY
:
3676 goto RetryDestinationFault
;
3677 case VM_FAULT_MEMORY_SHORTAGE
:
3678 if (vm_page_wait(interruptible
))
3679 goto RetryDestinationFault
;
3681 case VM_FAULT_INTERRUPTED
:
3682 RETURN(MACH_SEND_INTERRUPTED
);
3683 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3684 vm_page_more_fictitious();
3685 goto RetryDestinationFault
;
3686 case VM_FAULT_MEMORY_ERROR
:
3690 return(KERN_MEMORY_ERROR
);
3692 assert ((dst_prot
& VM_PROT_WRITE
) != VM_PROT_NONE
);
3694 old_copy_object
= dst_page
->object
->copy
;
3697 * There exists the possiblity that the source and
3698 * destination page are the same. But we can't
3699 * easily determine that now. If they are the
3700 * same, the call to vm_fault_page() for the
3701 * destination page will deadlock. To prevent this we
3702 * wire the page so we can drop busy without having
3703 * the page daemon steal the page. We clean up the
3704 * top page but keep the paging reference on the object
3705 * holding the dest page so it doesn't go away.
3708 vm_page_lock_queues();
3709 vm_page_wire(dst_page
);
3710 vm_page_unlock_queues();
3711 PAGE_WAKEUP_DONE(dst_page
);
3712 vm_object_unlock(dst_page
->object
);
3714 if (dst_top_page
!= VM_PAGE_NULL
) {
3715 vm_object_lock(dst_object
);
3716 VM_PAGE_FREE(dst_top_page
);
3717 vm_object_paging_end(dst_object
);
3718 vm_object_unlock(dst_object
);
3723 if (src_object
== VM_OBJECT_NULL
) {
3725 * No source object. We will just
3726 * zero-fill the page in dst_object.
3728 src_page
= VM_PAGE_NULL
;
3729 result_page
= VM_PAGE_NULL
;
3731 vm_object_lock(src_object
);
3732 src_page
= vm_page_lookup(src_object
,
3733 vm_object_trunc_page(src_offset
));
3734 if (src_page
== dst_page
) {
3735 src_prot
= dst_prot
;
3736 result_page
= VM_PAGE_NULL
;
3738 src_prot
= VM_PROT_READ
;
3739 vm_object_paging_begin(src_object
);
3742 "vm_fault_copy(2) -> vm_fault_page\n",
3744 switch (vm_fault_page(src_object
,
3745 vm_object_trunc_page(src_offset
),
3751 VM_BEHAVIOR_SEQUENTIAL
,
3760 case VM_FAULT_SUCCESS
:
3762 case VM_FAULT_RETRY
:
3763 goto RetrySourceFault
;
3764 case VM_FAULT_MEMORY_SHORTAGE
:
3765 if (vm_page_wait(interruptible
))
3766 goto RetrySourceFault
;
3768 case VM_FAULT_INTERRUPTED
:
3769 vm_fault_copy_dst_cleanup(dst_page
);
3770 RETURN(MACH_SEND_INTERRUPTED
);
3771 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3772 vm_page_more_fictitious();
3773 goto RetrySourceFault
;
3774 case VM_FAULT_MEMORY_ERROR
:
3775 vm_fault_copy_dst_cleanup(dst_page
);
3779 return(KERN_MEMORY_ERROR
);
3783 assert((src_top_page
== VM_PAGE_NULL
) ==
3784 (result_page
->object
== src_object
));
3786 assert ((src_prot
& VM_PROT_READ
) != VM_PROT_NONE
);
3787 vm_object_unlock(result_page
->object
);
3790 if (!vm_map_verify(dst_map
, dst_version
)) {
3791 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3792 vm_fault_copy_cleanup(result_page
, src_top_page
);
3793 vm_fault_copy_dst_cleanup(dst_page
);
3797 vm_object_lock(dst_page
->object
);
3799 if (dst_page
->object
->copy
!= old_copy_object
) {
3800 vm_object_unlock(dst_page
->object
);
3801 vm_map_verify_done(dst_map
, dst_version
);
3802 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3803 vm_fault_copy_cleanup(result_page
, src_top_page
);
3804 vm_fault_copy_dst_cleanup(dst_page
);
3807 vm_object_unlock(dst_page
->object
);
3810 * Copy the page, and note that it is dirty
3814 if (!page_aligned(src_offset
) ||
3815 !page_aligned(dst_offset
) ||
3816 !page_aligned(amount_left
)) {
3818 vm_object_offset_t src_po
,
3821 src_po
= src_offset
- vm_object_trunc_page(src_offset
);
3822 dst_po
= dst_offset
- vm_object_trunc_page(dst_offset
);
3824 if (dst_po
> src_po
) {
3825 part_size
= PAGE_SIZE
- dst_po
;
3827 part_size
= PAGE_SIZE
- src_po
;
3829 if (part_size
> (amount_left
)){
3830 part_size
= amount_left
;
3833 if (result_page
== VM_PAGE_NULL
) {
3834 vm_page_part_zero_fill(dst_page
,
3837 vm_page_part_copy(result_page
, src_po
,
3838 dst_page
, dst_po
, part_size
);
3839 if(!dst_page
->dirty
){
3840 vm_object_lock(dst_object
);
3841 dst_page
->dirty
= TRUE
;
3842 vm_object_unlock(dst_page
->object
);
3847 part_size
= PAGE_SIZE
;
3849 if (result_page
== VM_PAGE_NULL
)
3850 vm_page_zero_fill(dst_page
);
3852 vm_page_copy(result_page
, dst_page
);
3853 if(!dst_page
->dirty
){
3854 vm_object_lock(dst_object
);
3855 dst_page
->dirty
= TRUE
;
3856 vm_object_unlock(dst_page
->object
);
3863 * Unlock everything, and return
3866 vm_map_verify_done(dst_map
, dst_version
);
3868 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3869 vm_fault_copy_cleanup(result_page
, src_top_page
);
3870 vm_fault_copy_dst_cleanup(dst_page
);
3872 amount_left
-= part_size
;
3873 src_offset
+= part_size
;
3874 dst_offset
+= part_size
;
3875 } while (amount_left
> 0);
3877 RETURN(KERN_SUCCESS
);
3886 * Routine: vm_fault_page_overwrite
3889 * A form of vm_fault_page that assumes that the
3890 * resulting page will be overwritten in its entirety,
3891 * making it unnecessary to obtain the correct *contents*
3895 * XXX Untested. Also unused. Eventually, this technology
3896 * could be used in vm_fault_copy() to advantage.
3899 vm_fault_page_overwrite(
3901 vm_object_t dst_object
,
3902 vm_object_offset_t dst_offset
,
3903 vm_page_t
*result_page
) /* OUT */
3907 kern_return_t wait_result
;
3909 #define interruptible THREAD_UNINT /* XXX */
3913 * Look for a page at this offset
3916 while ((dst_page
= vm_page_lookup(dst_object
, dst_offset
))
3919 * No page, no problem... just allocate one.
3922 dst_page
= vm_page_alloc(dst_object
, dst_offset
);
3923 if (dst_page
== VM_PAGE_NULL
) {
3924 vm_object_unlock(dst_object
);
3926 vm_object_lock(dst_object
);
3931 * Pretend that the memory manager
3932 * write-protected the page.
3934 * Note that we will be asking for write
3935 * permission without asking for the data
3939 dst_page
->overwriting
= TRUE
;
3940 dst_page
->page_lock
= VM_PROT_WRITE
;
3941 dst_page
->absent
= TRUE
;
3942 dst_page
->unusual
= TRUE
;
3943 dst_object
->absent_count
++;
3948 * When we bail out, we might have to throw
3949 * away the page created here.
3952 #define DISCARD_PAGE \
3954 vm_object_lock(dst_object); \
3955 dst_page = vm_page_lookup(dst_object, dst_offset); \
3956 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3957 VM_PAGE_FREE(dst_page); \
3958 vm_object_unlock(dst_object); \
3963 * If the page is write-protected...
3966 if (dst_page
->page_lock
& VM_PROT_WRITE
) {
3968 * ... and an unlock request hasn't been sent
3971 if ( ! (dst_page
->unlock_request
& VM_PROT_WRITE
)) {
3976 * ... then send one now.
3979 if (!dst_object
->pager_ready
) {
3980 wait_result
= vm_object_assert_wait(dst_object
,
3981 VM_OBJECT_EVENT_PAGER_READY
,
3983 vm_object_unlock(dst_object
);
3984 if (wait_result
== THREAD_WAITING
)
3985 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
3986 if (wait_result
!= THREAD_AWAKENED
) {
3988 return(VM_FAULT_INTERRUPTED
);
3993 u
= dst_page
->unlock_request
|= VM_PROT_WRITE
;
3994 vm_object_unlock(dst_object
);
3996 if ((rc
= memory_object_data_unlock(
3998 dst_offset
+ dst_object
->paging_offset
,
4000 u
)) != KERN_SUCCESS
) {
4002 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
4004 return((rc
== MACH_SEND_INTERRUPTED
) ?
4005 VM_FAULT_INTERRUPTED
:
4006 VM_FAULT_MEMORY_ERROR
);
4008 vm_object_lock(dst_object
);
4012 /* ... fall through to wait below */
4015 * If the page isn't being used for other
4016 * purposes, then we're done.
4018 if ( ! (dst_page
->busy
|| dst_page
->absent
||
4019 dst_page
->error
|| dst_page
->restart
) )
4023 wait_result
= PAGE_ASSERT_WAIT(dst_page
, interruptible
);
4024 vm_object_unlock(dst_object
);
4025 if (wait_result
== THREAD_WAITING
)
4026 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
4027 if (wait_result
!= THREAD_AWAKENED
) {
4029 return(VM_FAULT_INTERRUPTED
);
4033 *result_page
= dst_page
;
4034 return(VM_FAULT_SUCCESS
);
4036 #undef interruptible
4042 #if VM_FAULT_CLASSIFY
4044 * Temporary statistics gathering support.
4048 * Statistics arrays:
4050 #define VM_FAULT_TYPES_MAX 5
4051 #define VM_FAULT_LEVEL_MAX 8
4053 int vm_fault_stats
[VM_FAULT_TYPES_MAX
][VM_FAULT_LEVEL_MAX
];
4055 #define VM_FAULT_TYPE_ZERO_FILL 0
4056 #define VM_FAULT_TYPE_MAP_IN 1
4057 #define VM_FAULT_TYPE_PAGER 2
4058 #define VM_FAULT_TYPE_COPY 3
4059 #define VM_FAULT_TYPE_OTHER 4
4063 vm_fault_classify(vm_object_t object
,
4064 vm_object_offset_t offset
,
4065 vm_prot_t fault_type
)
4067 int type
, level
= 0;
4071 m
= vm_page_lookup(object
, offset
);
4072 if (m
!= VM_PAGE_NULL
) {
4073 if (m
->busy
|| m
->error
|| m
->restart
|| m
->absent
||
4074 fault_type
& m
->page_lock
) {
4075 type
= VM_FAULT_TYPE_OTHER
;
4078 if (((fault_type
& VM_PROT_WRITE
) == 0) ||
4079 ((level
== 0) && object
->copy
== VM_OBJECT_NULL
)) {
4080 type
= VM_FAULT_TYPE_MAP_IN
;
4083 type
= VM_FAULT_TYPE_COPY
;
4087 if (object
->pager_created
) {
4088 type
= VM_FAULT_TYPE_PAGER
;
4091 if (object
->shadow
== VM_OBJECT_NULL
) {
4092 type
= VM_FAULT_TYPE_ZERO_FILL
;
4096 offset
+= object
->shadow_offset
;
4097 object
= object
->shadow
;
4103 if (level
> VM_FAULT_LEVEL_MAX
)
4104 level
= VM_FAULT_LEVEL_MAX
;
4106 vm_fault_stats
[type
][level
] += 1;
4111 /* cleanup routine to call from debugger */
4114 vm_fault_classify_init(void)
4118 for (type
= 0; type
< VM_FAULT_TYPES_MAX
; type
++) {
4119 for (level
= 0; level
< VM_FAULT_LEVEL_MAX
; level
++) {
4120 vm_fault_stats
[type
][level
] = 0;
4126 #endif /* VM_FAULT_CLASSIFY */