2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * Page fault handling module.
65 #include <mach_cluster_stats.h>
66 #include <mach_pagemap.h>
69 #include <mach/mach_types.h>
70 #include <mach/kern_return.h>
71 #include <mach/message.h> /* for error codes */
72 #include <mach/vm_param.h>
73 #include <mach/vm_behavior.h>
74 #include <mach/memory_object.h>
75 /* For memory_object_data_{request,unlock} */
77 #include <kern/kern_types.h>
78 #include <kern/host_statistics.h>
79 #include <kern/counters.h>
80 #include <kern/task.h>
81 #include <kern/thread.h>
82 #include <kern/sched_prim.h>
83 #include <kern/host.h>
85 #include <kern/mach_param.h>
86 #include <kern/macro_help.h>
87 #include <kern/zalloc.h>
88 #include <kern/misc_protos.h>
90 #include <ppc/proc_reg.h>
92 #include <vm/vm_fault.h>
93 #include <vm/task_working_set.h>
94 #include <vm/vm_map.h>
95 #include <vm/vm_object.h>
96 #include <vm/vm_page.h>
97 #include <vm/vm_kern.h>
99 #include <vm/vm_pageout.h>
100 #include <vm/vm_protos.h>
102 #include <sys/kdebug.h>
104 #define VM_FAULT_CLASSIFY 0
105 #define VM_FAULT_STATIC_CONFIG 1
107 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
109 unsigned int vm_object_absent_max
= 50;
111 int vm_fault_debug
= 0;
113 #if !VM_FAULT_STATIC_CONFIG
114 boolean_t vm_fault_dirty_handling
= FALSE
;
115 boolean_t vm_fault_interruptible
= FALSE
;
116 boolean_t software_reference_bits
= TRUE
;
120 extern struct db_watchpoint
*db_watchpoint_list
;
121 #endif /* MACH_KDB */
124 /* Forward declarations of internal routines. */
125 extern kern_return_t
vm_fault_wire_fast(
128 vm_map_entry_t entry
,
130 vm_map_offset_t pmap_addr
);
132 extern void vm_fault_continue(void);
134 extern void vm_fault_copy_cleanup(
138 extern void vm_fault_copy_dst_cleanup(
141 #if VM_FAULT_CLASSIFY
142 extern void vm_fault_classify(vm_object_t object
,
143 vm_object_offset_t offset
,
144 vm_prot_t fault_type
);
146 extern void vm_fault_classify_init(void);
150 * Routine: vm_fault_init
152 * Initialize our private data structures.
160 * Routine: vm_fault_cleanup
162 * Clean up the result of vm_fault_page.
164 * The paging reference for "object" is released.
165 * "object" is unlocked.
166 * If "top_page" is not null, "top_page" is
167 * freed and the paging reference for the object
168 * containing it is released.
171 * "object" must be locked.
175 register vm_object_t object
,
176 register vm_page_t top_page
)
178 vm_object_paging_end(object
);
179 vm_object_unlock(object
);
181 if (top_page
!= VM_PAGE_NULL
) {
182 object
= top_page
->object
;
183 vm_object_lock(object
);
184 VM_PAGE_FREE(top_page
);
185 vm_object_paging_end(object
);
186 vm_object_unlock(object
);
190 #if MACH_CLUSTER_STATS
191 #define MAXCLUSTERPAGES 16
193 unsigned long pages_in_cluster
;
194 unsigned long pages_at_higher_offsets
;
195 unsigned long pages_at_lower_offsets
;
196 } cluster_stats_in
[MAXCLUSTERPAGES
];
197 #define CLUSTER_STAT(clause) clause
198 #define CLUSTER_STAT_HIGHER(x) \
199 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
200 #define CLUSTER_STAT_LOWER(x) \
201 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
202 #define CLUSTER_STAT_CLUSTER(x) \
203 ((cluster_stats_in[(x)].pages_in_cluster)++)
204 #else /* MACH_CLUSTER_STATS */
205 #define CLUSTER_STAT(clause)
206 #endif /* MACH_CLUSTER_STATS */
208 /* XXX - temporary */
209 boolean_t vm_allow_clustered_pagein
= FALSE
;
210 int vm_pagein_cluster_used
= 0;
212 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
215 boolean_t vm_page_deactivate_behind
= TRUE
;
217 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
219 int vm_default_ahead
= 0;
220 int vm_default_behind
= MAX_UPL_TRANSFER
;
223 * vm_page_deactivate_behind
225 * Determine if sequential access is in progress
226 * in accordance with the behavior specified. If
227 * so, compute a potential page to deactive and
230 * The object must be locked.
234 vm_fault_deactivate_behind(
236 vm_object_offset_t offset
,
237 vm_behavior_t behavior
)
242 dbgTrace(0xBEEF0018, (unsigned int) object
, (unsigned int) vm_fault_deactivate_behind
); /* (TEST/DEBUG) */
245 if (object
== kernel_object
) {
247 * Do not deactivate pages from the kernel object: they
248 * are not intended to become pageable.
254 case VM_BEHAVIOR_RANDOM
:
255 object
->sequential
= PAGE_SIZE_64
;
258 case VM_BEHAVIOR_SEQUENTIAL
:
260 object
->last_alloc
== offset
- PAGE_SIZE_64
) {
261 object
->sequential
+= PAGE_SIZE_64
;
262 m
= vm_page_lookup(object
, offset
- PAGE_SIZE_64
);
264 object
->sequential
= PAGE_SIZE_64
; /* reset */
268 case VM_BEHAVIOR_RSEQNTL
:
269 if (object
->last_alloc
&&
270 object
->last_alloc
== offset
+ PAGE_SIZE_64
) {
271 object
->sequential
+= PAGE_SIZE_64
;
272 m
= vm_page_lookup(object
, offset
+ PAGE_SIZE_64
);
274 object
->sequential
= PAGE_SIZE_64
; /* reset */
278 case VM_BEHAVIOR_DEFAULT
:
281 object
->last_alloc
== offset
- PAGE_SIZE_64
) {
282 vm_object_offset_t behind
= vm_default_behind
* PAGE_SIZE_64
;
284 object
->sequential
+= PAGE_SIZE_64
;
285 m
= (offset
>= behind
&&
286 object
->sequential
>= behind
) ?
287 vm_page_lookup(object
, offset
- behind
) :
289 } else if (object
->last_alloc
&&
290 object
->last_alloc
== offset
+ PAGE_SIZE_64
) {
291 vm_object_offset_t behind
= vm_default_behind
* PAGE_SIZE_64
;
293 object
->sequential
+= PAGE_SIZE_64
;
294 m
= (offset
< -behind
&&
295 object
->sequential
>= behind
) ?
296 vm_page_lookup(object
, offset
+ behind
) :
299 object
->sequential
= PAGE_SIZE_64
;
305 object
->last_alloc
= offset
;
309 vm_page_lock_queues();
310 vm_page_deactivate(m
);
311 vm_page_unlock_queues();
313 dbgTrace(0xBEEF0019, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
323 * Routine: vm_fault_page
325 * Find the resident page for the virtual memory
326 * specified by the given virtual memory object
328 * Additional arguments:
329 * The required permissions for the page is given
330 * in "fault_type". Desired permissions are included
331 * in "protection". The minimum and maximum valid offsets
332 * within the object for the relevant map entry are
333 * passed in "lo_offset" and "hi_offset" respectively and
334 * the expected page reference pattern is passed in "behavior".
335 * These three parameters are used to determine pagein cluster
338 * If the desired page is known to be resident (for
339 * example, because it was previously wired down), asserting
340 * the "unwiring" parameter will speed the search.
342 * If the operation can be interrupted (by thread_abort
343 * or thread_terminate), then the "interruptible"
344 * parameter should be asserted.
347 * The page containing the proper data is returned
351 * The source object must be locked and referenced,
352 * and must donate one paging reference. The reference
353 * is not affected. The paging reference and lock are
356 * If the call succeeds, the object in which "result_page"
357 * resides is left locked and holding a paging reference.
358 * If this is not the original object, a busy page in the
359 * original object is returned in "top_page", to prevent other
360 * callers from pursuing this same data, along with a paging
361 * reference for the original object. The "top_page" should
362 * be destroyed when this guarantee is no longer required.
363 * The "result_page" is also left busy. It is not removed
364 * from the pageout queues.
370 vm_object_t first_object
, /* Object to begin search */
371 vm_object_offset_t first_offset
, /* Offset into object */
372 vm_prot_t fault_type
, /* What access is requested */
373 boolean_t must_be_resident
,/* Must page be resident? */
374 int interruptible
, /* how may fault be interrupted? */
375 vm_map_offset_t lo_offset
, /* Map entry start */
376 vm_map_offset_t hi_offset
, /* Map entry end */
377 vm_behavior_t behavior
, /* Page reference behavior */
378 /* Modifies in place: */
379 vm_prot_t
*protection
, /* Protection for mapping */
381 vm_page_t
*result_page
, /* Page found, if successful */
382 vm_page_t
*top_page
, /* Page in top object, if
383 * not result_page. */
384 int *type_of_fault
, /* if non-null, fill in with type of fault
385 * COW, zero-fill, etc... returned in trace point */
386 /* More arguments: */
387 kern_return_t
*error_code
, /* code if page is in error */
388 boolean_t no_zero_fill
, /* don't zero fill absent pages */
389 boolean_t data_supply
, /* treat as data_supply if
390 * it is a write fault and a full
391 * page is provided */
393 __unused vm_map_offset_t vaddr
)
400 vm_object_offset_t offset
;
402 vm_object_t next_object
;
403 vm_object_t copy_object
;
404 boolean_t look_for_page
;
405 vm_prot_t access_required
= fault_type
;
406 vm_prot_t wants_copy_flag
;
407 vm_object_size_t length
;
408 vm_object_offset_t cluster_start
, cluster_end
;
409 CLUSTER_STAT(int pages_at_higher_offsets
;)
410 CLUSTER_STAT(int pages_at_lower_offsets
;)
411 kern_return_t wait_result
;
412 boolean_t interruptible_state
;
413 boolean_t bumped_pagein
= FALSE
;
418 * MACH page map - an optional optimization where a bit map is maintained
419 * by the VM subsystem for internal objects to indicate which pages of
420 * the object currently reside on backing store. This existence map
421 * duplicates information maintained by the vnode pager. It is
422 * created at the time of the first pageout against the object, i.e.
423 * at the same time pager for the object is created. The optimization
424 * is designed to eliminate pager interaction overhead, if it is
425 * 'known' that the page does not exist on backing store.
427 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
428 * either marked as paged out in the existence map for the object or no
429 * existence map exists for the object. LOOK_FOR() is one of the
430 * criteria in the decision to invoke the pager. It is also used as one
431 * of the criteria to terminate the scan for adjacent pages in a clustered
432 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
433 * permanent objects. Note also that if the pager for an internal object
434 * has not been created, the pager is not invoked regardless of the value
435 * of LOOK_FOR() and that clustered pagein scans are only done on an object
436 * for which a pager has been created.
438 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
439 * is marked as paged out in the existence map for the object. PAGED_OUT()
440 * PAGED_OUT() is used to determine if a page has already been pushed
441 * into a copy object in order to avoid a redundant page out operation.
443 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
444 != VM_EXTERNAL_STATE_ABSENT)
445 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
446 == VM_EXTERNAL_STATE_EXISTS)
447 #else /* MACH_PAGEMAP */
449 * If the MACH page map optimization is not enabled,
450 * LOOK_FOR() always evaluates to TRUE. The pager will always be
451 * invoked to resolve missing pages in an object, assuming the pager
452 * has been created for the object. In a clustered page operation, the
453 * absence of a page on backing backing store cannot be used to terminate
454 * a scan for adjacent pages since that information is available only in
455 * the pager. Hence pages that may not be paged out are potentially
456 * included in a clustered request. The vnode pager is coded to deal
457 * with any combination of absent/present pages in a clustered
458 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
459 * will always be invoked to push a dirty page into a copy object assuming
460 * a pager has been created. If the page has already been pushed, the
461 * pager will ingore the new request.
463 #define LOOK_FOR(o, f) TRUE
464 #define PAGED_OUT(o, f) FALSE
465 #endif /* MACH_PAGEMAP */
470 #define PREPARE_RELEASE_PAGE(m) \
472 vm_page_lock_queues(); \
475 #define DO_RELEASE_PAGE(m) \
477 PAGE_WAKEUP_DONE(m); \
478 if (!m->active && !m->inactive) \
479 vm_page_activate(m); \
480 vm_page_unlock_queues(); \
483 #define RELEASE_PAGE(m) \
485 PREPARE_RELEASE_PAGE(m); \
486 DO_RELEASE_PAGE(m); \
490 dbgTrace(0xBEEF0002, (unsigned int) first_object
, (unsigned int) first_offset
); /* (TEST/DEBUG) */
495 #if !VM_FAULT_STATIC_CONFIG
496 if (vm_fault_dirty_handling
499 * If there are watchpoints set, then
500 * we don't want to give away write permission
501 * on a read fault. Make the task write fault,
502 * so that the watchpoint code notices the access.
504 || db_watchpoint_list
505 #endif /* MACH_KDB */
508 * If we aren't asking for write permission,
509 * then don't give it away. We're using write
510 * faults to set the dirty bit.
512 if (!(fault_type
& VM_PROT_WRITE
))
513 *protection
&= ~VM_PROT_WRITE
;
516 if (!vm_fault_interruptible
)
517 interruptible
= THREAD_UNINT
;
518 #else /* STATIC_CONFIG */
521 * If there are watchpoints set, then
522 * we don't want to give away write permission
523 * on a read fault. Make the task write fault,
524 * so that the watchpoint code notices the access.
526 if (db_watchpoint_list
) {
528 * If we aren't asking for write permission,
529 * then don't give it away. We're using write
530 * faults to set the dirty bit.
532 if (!(fault_type
& VM_PROT_WRITE
))
533 *protection
&= ~VM_PROT_WRITE
;
536 #endif /* MACH_KDB */
537 #endif /* STATIC_CONFIG */
539 interruptible_state
= thread_interrupt_level(interruptible
);
542 * INVARIANTS (through entire routine):
544 * 1) At all times, we must either have the object
545 * lock or a busy page in some object to prevent
546 * some other thread from trying to bring in
549 * Note that we cannot hold any locks during the
550 * pager access or when waiting for memory, so
551 * we use a busy page then.
553 * Note also that we aren't as concerned about more than
554 * one thread attempting to memory_object_data_unlock
555 * the same page at once, so we don't hold the page
556 * as busy then, but do record the highest unlock
557 * value so far. [Unlock requests may also be delivered
560 * 2) To prevent another thread from racing us down the
561 * shadow chain and entering a new page in the top
562 * object before we do, we must keep a busy page in
563 * the top object while following the shadow chain.
565 * 3) We must increment paging_in_progress on any object
566 * for which we have a busy page
568 * 4) We leave busy pages on the pageout queues.
569 * If the pageout daemon comes across a busy page,
570 * it will remove the page from the pageout queues.
574 * Search for the page at object/offset.
577 object
= first_object
;
578 offset
= first_offset
;
579 first_m
= VM_PAGE_NULL
;
580 access_required
= fault_type
;
583 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
584 (integer_t
)object
, offset
, fault_type
, *protection
, 0);
587 * See whether this page is resident
592 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
594 if (!object
->alive
) {
595 vm_fault_cleanup(object
, first_m
);
596 thread_interrupt_level(interruptible_state
);
597 return(VM_FAULT_MEMORY_ERROR
);
599 m
= vm_page_lookup(object
, offset
);
601 dbgTrace(0xBEEF0004, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
603 if (m
!= VM_PAGE_NULL
) {
605 * If the page was pre-paged as part of a
606 * cluster, record the fact.
607 * If we were passed a valid pointer for
608 * "type_of_fault", than we came from
609 * vm_fault... we'll let it deal with
610 * this condition, since it
611 * needs to see m->clustered to correctly
612 * account the pageins.
614 if (type_of_fault
== NULL
&& m
->clustered
) {
615 vm_pagein_cluster_used
++;
616 m
->clustered
= FALSE
;
620 * If the page is being brought in,
621 * wait for it and then retry.
623 * A possible optimization: if the page
624 * is known to be resident, we can ignore
625 * pages that are absent (regardless of
626 * whether they're busy).
631 dbgTrace(0xBEEF0005, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
633 wait_result
= PAGE_SLEEP(object
, m
, interruptible
);
635 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
636 (integer_t
)object
, offset
,
638 counter(c_vm_fault_page_block_busy_kernel
++);
640 if (wait_result
!= THREAD_AWAKENED
) {
641 vm_fault_cleanup(object
, first_m
);
642 thread_interrupt_level(interruptible_state
);
643 if (wait_result
== THREAD_RESTART
)
645 return(VM_FAULT_RETRY
);
649 return(VM_FAULT_INTERRUPTED
);
658 * the user needs access to a page that we
659 * encrypted before paging it out.
660 * Decrypt the page now.
661 * Keep it busy to prevent anyone from
662 * accessing it during the decryption.
665 vm_page_decrypt(m
, 0);
666 assert(object
== m
->object
);
671 * Retry from the top, in case
672 * something changed while we were
677 ASSERT_PAGE_DECRYPTED(m
);
680 * If the page is in error, give up now.
685 dbgTrace(0xBEEF0006, (unsigned int) m
, (unsigned int) error_code
); /* (TEST/DEBUG) */
688 *error_code
= m
->page_error
;
690 vm_fault_cleanup(object
, first_m
);
691 thread_interrupt_level(interruptible_state
);
692 return(VM_FAULT_MEMORY_ERROR
);
696 * If the pager wants us to restart
697 * at the top of the chain,
698 * typically because it has moved the
699 * page to another pager, then do so.
704 dbgTrace(0xBEEF0007, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
707 vm_fault_cleanup(object
, first_m
);
708 thread_interrupt_level(interruptible_state
);
709 return(VM_FAULT_RETRY
);
713 * If the page isn't busy, but is absent,
714 * then it was deemed "unavailable".
719 * Remove the non-existent page (unless it's
720 * in the top object) and move on down to the
721 * next object (if there is one).
724 dbgTrace(0xBEEF0008, (unsigned int) m
, (unsigned int) object
->shadow
); /* (TEST/DEBUG) */
727 next_object
= object
->shadow
;
728 if (next_object
== VM_OBJECT_NULL
) {
731 assert(!must_be_resident
);
733 if (object
->shadow_severed
) {
736 thread_interrupt_level(interruptible_state
);
737 return VM_FAULT_MEMORY_ERROR
;
741 * Absent page at bottom of shadow
742 * chain; zero fill the page we left
743 * busy in the first object, and flush
744 * the absent page. But first we
745 * need to allocate a real page.
747 if (VM_PAGE_THROTTLED() ||
748 (real_m
= vm_page_grab())
752 thread_interrupt_level(
753 interruptible_state
);
755 VM_FAULT_MEMORY_SHORTAGE
);
759 * are we protecting the system from
760 * backing store exhaustion. If so
761 * sleep unless we are privileged.
764 if(vm_backing_store_low
) {
765 if(!(current_task()->priv_flags
766 & VM_BACKING_STORE_PRIV
)) {
767 assert_wait((event_t
)
768 &vm_backing_store_low
,
770 vm_fault_cleanup(object
,
772 thread_block(THREAD_CONTINUE_NULL
);
773 thread_interrupt_level(
774 interruptible_state
);
775 return(VM_FAULT_RETRY
);
781 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
782 (integer_t
)object
, offset
,
784 (integer_t
)first_object
, 0);
785 if (object
!= first_object
) {
787 vm_object_paging_end(object
);
788 vm_object_unlock(object
);
789 object
= first_object
;
790 offset
= first_offset
;
792 first_m
= VM_PAGE_NULL
;
793 vm_object_lock(object
);
797 assert(real_m
->busy
);
798 vm_page_insert(real_m
, object
, offset
);
802 * Drop the lock while zero filling
803 * page. Then break because this
804 * is the page we wanted. Checking
805 * the page lock is a waste of time;
806 * this page was either absent or
807 * newly allocated -- in both cases
808 * it can't be page locked by a pager.
813 vm_object_unlock(object
);
814 vm_page_zero_fill(m
);
815 vm_object_lock(object
);
818 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
819 VM_STAT(zero_fill_count
++);
821 if (bumped_pagein
== TRUE
) {
823 current_task()->pageins
--;
825 vm_page_lock_queues();
826 VM_PAGE_QUEUES_REMOVE(m
);
827 m
->page_ticket
= vm_page_ticket
;
829 assert(m
->object
!= kernel_object
);
830 assert(m
->pageq
.next
== NULL
&&
831 m
->pageq
.prev
== NULL
);
832 if(m
->object
->size
> 0x200000) {
834 /* depends on the queues lock */
836 queue_enter(&vm_page_queue_zf
,
837 m
, vm_page_t
, pageq
);
840 &vm_page_queue_inactive
,
841 m
, vm_page_t
, pageq
);
843 vm_page_ticket_roll
++;
844 if(vm_page_ticket_roll
==
845 VM_PAGE_TICKETS_IN_ROLL
) {
846 vm_page_ticket_roll
= 0;
848 VM_PAGE_TICKET_ROLL_IDS
)
854 vm_page_inactive_count
++;
855 vm_page_unlock_queues();
858 if (must_be_resident
) {
859 vm_object_paging_end(object
);
860 } else if (object
!= first_object
) {
861 vm_object_paging_end(object
);
867 vm_object_absent_release(object
);
870 vm_page_lock_queues();
871 VM_PAGE_QUEUES_REMOVE(m
);
872 vm_page_unlock_queues();
875 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
876 (integer_t
)object
, offset
,
877 (integer_t
)next_object
,
878 offset
+object
->shadow_offset
,0);
879 offset
+= object
->shadow_offset
;
880 hi_offset
+= object
->shadow_offset
;
881 lo_offset
+= object
->shadow_offset
;
882 access_required
= VM_PROT_READ
;
883 vm_object_lock(next_object
);
884 vm_object_unlock(object
);
885 object
= next_object
;
886 vm_object_paging_begin(object
);
892 && ((object
!= first_object
) ||
893 (object
->copy
!= VM_OBJECT_NULL
))
894 && (fault_type
& VM_PROT_WRITE
)) {
896 * This is a copy-on-write fault that will
897 * cause us to revoke access to this page, but
898 * this page is in the process of being cleaned
899 * in a clustered pageout. We must wait until
900 * the cleaning operation completes before
901 * revoking access to the original page,
902 * otherwise we might attempt to remove a
906 dbgTrace(0xBEEF0009, (unsigned int) m
, (unsigned int) offset
); /* (TEST/DEBUG) */
909 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
910 (integer_t
)object
, offset
,
912 /* take an extra ref so that object won't die */
913 assert(object
->ref_count
> 0);
915 vm_object_res_reference(object
);
916 vm_fault_cleanup(object
, first_m
);
917 counter(c_vm_fault_page_block_backoff_kernel
++);
918 vm_object_lock(object
);
919 assert(object
->ref_count
> 0);
920 m
= vm_page_lookup(object
, offset
);
921 if (m
!= VM_PAGE_NULL
&& m
->cleaning
) {
922 PAGE_ASSERT_WAIT(m
, interruptible
);
923 vm_object_unlock(object
);
924 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
925 vm_object_deallocate(object
);
928 vm_object_unlock(object
);
929 vm_object_deallocate(object
);
930 thread_interrupt_level(interruptible_state
);
931 return VM_FAULT_RETRY
;
936 * If the desired access to this page has
937 * been locked out, request that it be unlocked.
940 if (access_required
& m
->page_lock
) {
941 if ((access_required
& m
->unlock_request
) != access_required
) {
942 vm_prot_t new_unlock_request
;
946 dbgTrace(0xBEEF000A, (unsigned int) m
, (unsigned int) object
->pager_ready
); /* (TEST/DEBUG) */
948 if (!object
->pager_ready
) {
950 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
952 (integer_t
)object
, offset
,
954 /* take an extra ref */
955 assert(object
->ref_count
> 0);
957 vm_object_res_reference(object
);
958 vm_fault_cleanup(object
,
960 counter(c_vm_fault_page_block_backoff_kernel
++);
961 vm_object_lock(object
);
962 assert(object
->ref_count
> 0);
963 if (!object
->pager_ready
) {
964 wait_result
= vm_object_assert_wait(
966 VM_OBJECT_EVENT_PAGER_READY
,
968 vm_object_unlock(object
);
969 if (wait_result
== THREAD_WAITING
)
970 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
971 vm_object_deallocate(object
);
974 vm_object_unlock(object
);
975 vm_object_deallocate(object
);
976 thread_interrupt_level(interruptible_state
);
977 return VM_FAULT_RETRY
;
981 new_unlock_request
= m
->unlock_request
=
982 (access_required
| m
->unlock_request
);
983 vm_object_unlock(object
);
985 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
986 (integer_t
)object
, offset
,
987 (integer_t
)m
, new_unlock_request
, 0);
988 if ((rc
= memory_object_data_unlock(
990 offset
+ object
->paging_offset
,
995 printf("vm_fault: memory_object_data_unlock failed\n");
996 vm_object_lock(object
);
997 vm_fault_cleanup(object
, first_m
);
998 thread_interrupt_level(interruptible_state
);
999 return((rc
== MACH_SEND_INTERRUPTED
) ?
1000 VM_FAULT_INTERRUPTED
:
1001 VM_FAULT_MEMORY_ERROR
);
1003 vm_object_lock(object
);
1008 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
1009 access_required
, (integer_t
)object
,
1010 offset
, (integer_t
)m
, 0);
1011 /* take an extra ref so object won't die */
1012 assert(object
->ref_count
> 0);
1013 object
->ref_count
++;
1014 vm_object_res_reference(object
);
1015 vm_fault_cleanup(object
, first_m
);
1016 counter(c_vm_fault_page_block_backoff_kernel
++);
1017 vm_object_lock(object
);
1018 assert(object
->ref_count
> 0);
1019 m
= vm_page_lookup(object
, offset
);
1020 if (m
!= VM_PAGE_NULL
&&
1021 (access_required
& m
->page_lock
) &&
1022 !((access_required
& m
->unlock_request
) != access_required
)) {
1023 PAGE_ASSERT_WAIT(m
, interruptible
);
1024 vm_object_unlock(object
);
1025 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1026 vm_object_deallocate(object
);
1029 vm_object_unlock(object
);
1030 vm_object_deallocate(object
);
1031 thread_interrupt_level(interruptible_state
);
1032 return VM_FAULT_RETRY
;
1036 * We mark the page busy and leave it on
1037 * the pageout queues. If the pageout
1038 * deamon comes across it, then it will
1043 dbgTrace(0xBEEF000B, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1046 #if !VM_FAULT_STATIC_CONFIG
1047 if (!software_reference_bits
) {
1048 vm_page_lock_queues();
1050 vm_stat
.reactivations
++;
1052 VM_PAGE_QUEUES_REMOVE(m
);
1053 vm_page_unlock_queues();
1057 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
1058 (integer_t
)object
, offset
, (integer_t
)m
, 0, 0);
1066 (object
->pager_created
) &&
1067 LOOK_FOR(object
, offset
) &&
1071 dbgTrace(0xBEEF000C, (unsigned int) look_for_page
, (unsigned int) object
); /* (TEST/DEBUG) */
1073 if ((look_for_page
|| (object
== first_object
))
1074 && !must_be_resident
1075 && !(object
->phys_contiguous
)) {
1077 * Allocate a new page for this object/offset
1081 m
= vm_page_grab_fictitious();
1083 dbgTrace(0xBEEF000D, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
1085 if (m
== VM_PAGE_NULL
) {
1086 vm_fault_cleanup(object
, first_m
);
1087 thread_interrupt_level(interruptible_state
);
1088 return(VM_FAULT_FICTITIOUS_SHORTAGE
);
1090 vm_page_insert(m
, object
, offset
);
1093 if ((look_for_page
&& !must_be_resident
)) {
1097 * If the memory manager is not ready, we
1098 * cannot make requests.
1100 if (!object
->pager_ready
) {
1102 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
1104 if(m
!= VM_PAGE_NULL
)
1107 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
1108 (integer_t
)object
, offset
, 0, 0, 0);
1109 /* take an extra ref so object won't die */
1110 assert(object
->ref_count
> 0);
1111 object
->ref_count
++;
1112 vm_object_res_reference(object
);
1113 vm_fault_cleanup(object
, first_m
);
1114 counter(c_vm_fault_page_block_backoff_kernel
++);
1115 vm_object_lock(object
);
1116 assert(object
->ref_count
> 0);
1117 if (!object
->pager_ready
) {
1118 wait_result
= vm_object_assert_wait(object
,
1119 VM_OBJECT_EVENT_PAGER_READY
,
1121 vm_object_unlock(object
);
1122 if (wait_result
== THREAD_WAITING
)
1123 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1124 vm_object_deallocate(object
);
1127 vm_object_unlock(object
);
1128 vm_object_deallocate(object
);
1129 thread_interrupt_level(interruptible_state
);
1130 return VM_FAULT_RETRY
;
1134 if(object
->phys_contiguous
) {
1135 if(m
!= VM_PAGE_NULL
) {
1141 if (object
->internal
) {
1143 * Requests to the default pager
1144 * must reserve a real page in advance,
1145 * because the pager's data-provided
1146 * won't block for pages. IMPORTANT:
1147 * this acts as a throttling mechanism
1148 * for data_requests to the default
1153 dbgTrace(0xBEEF000F, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1155 if (m
->fictitious
&& !vm_page_convert(m
)) {
1157 vm_fault_cleanup(object
, first_m
);
1158 thread_interrupt_level(interruptible_state
);
1159 return(VM_FAULT_MEMORY_SHORTAGE
);
1161 } else if (object
->absent_count
>
1162 vm_object_absent_max
) {
1164 * If there are too many outstanding page
1165 * requests pending on this object, we
1166 * wait for them to be resolved now.
1170 dbgTrace(0xBEEF0010, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1172 if(m
!= VM_PAGE_NULL
)
1174 /* take an extra ref so object won't die */
1175 assert(object
->ref_count
> 0);
1176 object
->ref_count
++;
1177 vm_object_res_reference(object
);
1178 vm_fault_cleanup(object
, first_m
);
1179 counter(c_vm_fault_page_block_backoff_kernel
++);
1180 vm_object_lock(object
);
1181 assert(object
->ref_count
> 0);
1182 if (object
->absent_count
> vm_object_absent_max
) {
1183 vm_object_absent_assert_wait(object
,
1185 vm_object_unlock(object
);
1186 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1187 vm_object_deallocate(object
);
1190 vm_object_unlock(object
);
1191 vm_object_deallocate(object
);
1192 thread_interrupt_level(interruptible_state
);
1193 return VM_FAULT_RETRY
;
1198 * Indicate that the page is waiting for data
1199 * from the memory manager.
1202 if(m
!= VM_PAGE_NULL
) {
1204 m
->list_req_pending
= TRUE
;
1207 object
->absent_count
++;
1212 cluster_start
= offset
;
1216 * lengthen the cluster by the pages in the working set
1219 (current_task()->dynamic_working_set
!= 0)) {
1220 cluster_end
= cluster_start
+ length
;
1221 /* tws values for start and end are just a
1222 * suggestions. Therefore, as long as
1223 * build_cluster does not use pointers or
1224 * take action based on values that
1225 * could be affected by re-entrance we
1226 * do not need to take the map lock.
1228 cluster_end
= offset
+ PAGE_SIZE_64
;
1230 current_task()->dynamic_working_set
,
1231 object
, &cluster_start
,
1232 &cluster_end
, 0x40000);
1233 length
= cluster_end
- cluster_start
;
1236 dbgTrace(0xBEEF0012, (unsigned int) object
, (unsigned int) 0); /* (TEST/DEBUG) */
1239 * We have a busy page, so we can
1240 * release the object lock.
1242 vm_object_unlock(object
);
1245 * Call the memory manager to retrieve the data.
1249 *type_of_fault
= ((int)length
<< 8) | DBG_PAGEIN_FAULT
;
1251 current_task()->pageins
++;
1252 bumped_pagein
= TRUE
;
1255 * If this object uses a copy_call strategy,
1256 * and we are interested in a copy of this object
1257 * (having gotten here only by following a
1258 * shadow chain), then tell the memory manager
1259 * via a flag added to the desired_access
1260 * parameter, so that it can detect a race
1261 * between our walking down the shadow chain
1262 * and its pushing pages up into a copy of
1263 * the object that it manages.
1266 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_CALL
&&
1267 object
!= first_object
) {
1268 wants_copy_flag
= VM_PROT_WANTS_COPY
;
1270 wants_copy_flag
= VM_PROT_NONE
;
1274 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1275 (integer_t
)object
, offset
, (integer_t
)m
,
1276 access_required
| wants_copy_flag
, 0);
1278 rc
= memory_object_data_request(object
->pager
,
1279 cluster_start
+ object
->paging_offset
,
1281 access_required
| wants_copy_flag
);
1285 dbgTrace(0xBEEF0013, (unsigned int) object
, (unsigned int) rc
); /* (TEST/DEBUG) */
1287 if (rc
!= KERN_SUCCESS
) {
1288 if (rc
!= MACH_SEND_INTERRUPTED
1290 printf("%s(0x%x, 0x%xll, 0x%xll, 0x%x) failed, rc=%d\n",
1291 "memory_object_data_request",
1293 cluster_start
+ object
->paging_offset
,
1294 length
, access_required
, rc
);
1296 * Don't want to leave a busy page around,
1297 * but the data request may have blocked,
1298 * so check if it's still there and busy.
1300 if(!object
->phys_contiguous
) {
1301 vm_object_lock(object
);
1302 for (; length
; length
-= PAGE_SIZE
,
1303 cluster_start
+= PAGE_SIZE_64
) {
1305 if ((p
= vm_page_lookup(object
,
1307 && p
->absent
&& p
->busy
1313 vm_fault_cleanup(object
, first_m
);
1314 thread_interrupt_level(interruptible_state
);
1315 return((rc
== MACH_SEND_INTERRUPTED
) ?
1316 VM_FAULT_INTERRUPTED
:
1317 VM_FAULT_MEMORY_ERROR
);
1320 vm_object_lock(object
);
1321 if ((interruptible
!= THREAD_UNINT
) &&
1322 (current_thread()->state
& TH_ABORT
)) {
1323 vm_fault_cleanup(object
, first_m
);
1324 thread_interrupt_level(interruptible_state
);
1325 return(VM_FAULT_INTERRUPTED
);
1327 if (m
== VM_PAGE_NULL
&&
1328 object
->phys_contiguous
) {
1330 * No page here means that the object we
1331 * initially looked up was "physically
1332 * contiguous" (i.e. device memory). However,
1333 * with Virtual VRAM, the object might not
1334 * be backed by that device memory anymore,
1335 * so we're done here only if the object is
1336 * still "phys_contiguous".
1337 * Otherwise, if the object is no longer
1338 * "phys_contiguous", we need to retry the
1339 * page fault against the object's new backing
1340 * store (different memory object).
1346 * Retry with same object/offset, since new data may
1347 * be in a different page (i.e., m is meaningless at
1354 * The only case in which we get here is if
1355 * object has no pager (or unwiring). If the pager doesn't
1356 * have the page this is handled in the m->absent case above
1357 * (and if you change things here you should look above).
1360 dbgTrace(0xBEEF0014, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1362 if (object
== first_object
)
1365 assert(m
== VM_PAGE_NULL
);
1368 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1369 (integer_t
)object
, offset
, (integer_t
)m
,
1370 (integer_t
)object
->shadow
, 0);
1372 * Move on to the next object. Lock the next
1373 * object before unlocking the current one.
1375 next_object
= object
->shadow
;
1376 if (next_object
== VM_OBJECT_NULL
) {
1377 assert(!must_be_resident
);
1379 * If there's no object left, fill the page
1380 * in the top object with zeros. But first we
1381 * need to allocate a real page.
1384 if (object
!= first_object
) {
1385 vm_object_paging_end(object
);
1386 vm_object_unlock(object
);
1388 object
= first_object
;
1389 offset
= first_offset
;
1390 vm_object_lock(object
);
1394 assert(m
->object
== object
);
1395 first_m
= VM_PAGE_NULL
;
1397 if(m
== VM_PAGE_NULL
) {
1399 if (m
== VM_PAGE_NULL
) {
1401 object
, VM_PAGE_NULL
);
1402 thread_interrupt_level(
1403 interruptible_state
);
1404 return(VM_FAULT_MEMORY_SHORTAGE
);
1410 if (object
->shadow_severed
) {
1412 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1413 thread_interrupt_level(interruptible_state
);
1414 return VM_FAULT_MEMORY_ERROR
;
1418 * are we protecting the system from
1419 * backing store exhaustion. If so
1420 * sleep unless we are privileged.
1423 if(vm_backing_store_low
) {
1424 if(!(current_task()->priv_flags
1425 & VM_BACKING_STORE_PRIV
)) {
1426 assert_wait((event_t
)
1427 &vm_backing_store_low
,
1430 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1431 thread_block(THREAD_CONTINUE_NULL
);
1432 thread_interrupt_level(
1433 interruptible_state
);
1434 return(VM_FAULT_RETRY
);
1438 if (VM_PAGE_THROTTLED() ||
1439 (m
->fictitious
&& !vm_page_convert(m
))) {
1441 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1442 thread_interrupt_level(interruptible_state
);
1443 return(VM_FAULT_MEMORY_SHORTAGE
);
1445 m
->no_isync
= FALSE
;
1447 if (!no_zero_fill
) {
1448 vm_object_unlock(object
);
1449 vm_page_zero_fill(m
);
1450 vm_object_lock(object
);
1453 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
1454 VM_STAT(zero_fill_count
++);
1456 if (bumped_pagein
== TRUE
) {
1458 current_task()->pageins
--;
1460 vm_page_lock_queues();
1461 VM_PAGE_QUEUES_REMOVE(m
);
1462 assert(!m
->laundry
);
1463 assert(m
->object
!= kernel_object
);
1464 assert(m
->pageq
.next
== NULL
&&
1465 m
->pageq
.prev
== NULL
);
1466 if(m
->object
->size
> 0x200000) {
1467 m
->zero_fill
= TRUE
;
1468 /* depends on the queues lock */
1470 queue_enter(&vm_page_queue_zf
,
1471 m
, vm_page_t
, pageq
);
1474 &vm_page_queue_inactive
,
1475 m
, vm_page_t
, pageq
);
1477 m
->page_ticket
= vm_page_ticket
;
1478 vm_page_ticket_roll
++;
1479 if(vm_page_ticket_roll
== VM_PAGE_TICKETS_IN_ROLL
) {
1480 vm_page_ticket_roll
= 0;
1481 if(vm_page_ticket
==
1482 VM_PAGE_TICKET_ROLL_IDS
)
1488 vm_page_inactive_count
++;
1489 vm_page_unlock_queues();
1491 pmap_clear_modify(m
->phys_page
);
1496 if ((object
!= first_object
) || must_be_resident
)
1497 vm_object_paging_end(object
);
1498 offset
+= object
->shadow_offset
;
1499 hi_offset
+= object
->shadow_offset
;
1500 lo_offset
+= object
->shadow_offset
;
1501 access_required
= VM_PROT_READ
;
1502 vm_object_lock(next_object
);
1503 vm_object_unlock(object
);
1504 object
= next_object
;
1505 vm_object_paging_begin(object
);
1510 * PAGE HAS BEEN FOUND.
1513 * busy, so that we can play with it;
1514 * not absent, so that nobody else will fill it;
1515 * possibly eligible for pageout;
1517 * The top-level page (first_m) is:
1518 * VM_PAGE_NULL if the page was found in the
1520 * busy, not absent, and ineligible for pageout.
1522 * The current object (object) is locked. A paging
1523 * reference is held for the current and top-level
1528 dbgTrace(0xBEEF0015, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1530 #if EXTRA_ASSERTIONS
1531 if(m
!= VM_PAGE_NULL
) {
1532 assert(m
->busy
&& !m
->absent
);
1533 assert((first_m
== VM_PAGE_NULL
) ||
1534 (first_m
->busy
&& !first_m
->absent
&&
1535 !first_m
->active
&& !first_m
->inactive
));
1537 #endif /* EXTRA_ASSERTIONS */
1541 * If we found a page, we must have decrypted it before we
1544 if (m
!= VM_PAGE_NULL
) {
1545 ASSERT_PAGE_DECRYPTED(m
);
1549 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1550 (integer_t
)object
, offset
, (integer_t
)m
,
1551 (integer_t
)first_object
, (integer_t
)first_m
);
1553 * If the page is being written, but isn't
1554 * already owned by the top-level object,
1555 * we have to copy it into a new page owned
1556 * by the top-level object.
1559 if ((object
!= first_object
) && (m
!= VM_PAGE_NULL
)) {
1561 * We only really need to copy if we
1566 dbgTrace(0xBEEF0016, (unsigned int) object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1568 if (fault_type
& VM_PROT_WRITE
) {
1571 assert(!must_be_resident
);
1574 * are we protecting the system from
1575 * backing store exhaustion. If so
1576 * sleep unless we are privileged.
1579 if(vm_backing_store_low
) {
1580 if(!(current_task()->priv_flags
1581 & VM_BACKING_STORE_PRIV
)) {
1582 assert_wait((event_t
)
1583 &vm_backing_store_low
,
1586 vm_fault_cleanup(object
, first_m
);
1587 thread_block(THREAD_CONTINUE_NULL
);
1588 thread_interrupt_level(
1589 interruptible_state
);
1590 return(VM_FAULT_RETRY
);
1595 * If we try to collapse first_object at this
1596 * point, we may deadlock when we try to get
1597 * the lock on an intermediate object (since we
1598 * have the bottom object locked). We can't
1599 * unlock the bottom object, because the page
1600 * we found may move (by collapse) if we do.
1602 * Instead, we first copy the page. Then, when
1603 * we have no more use for the bottom object,
1604 * we unlock it and try to collapse.
1606 * Note that we copy the page even if we didn't
1607 * need to... that's the breaks.
1611 * Allocate a page for the copy
1613 copy_m
= vm_page_grab();
1614 if (copy_m
== VM_PAGE_NULL
) {
1616 vm_fault_cleanup(object
, first_m
);
1617 thread_interrupt_level(interruptible_state
);
1618 return(VM_FAULT_MEMORY_SHORTAGE
);
1623 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1624 (integer_t
)object
, offset
,
1625 (integer_t
)m
, (integer_t
)copy_m
, 0);
1626 vm_page_copy(m
, copy_m
);
1629 * If another map is truly sharing this
1630 * page with us, we have to flush all
1631 * uses of the original page, since we
1632 * can't distinguish those which want the
1633 * original from those which need the
1636 * XXXO If we know that only one map has
1637 * access to this page, then we could
1638 * avoid the pmap_disconnect() call.
1641 vm_page_lock_queues();
1642 assert(!m
->cleaning
);
1643 pmap_disconnect(m
->phys_page
);
1644 vm_page_deactivate(m
);
1645 copy_m
->dirty
= TRUE
;
1647 * Setting reference here prevents this fault from
1648 * being counted as a (per-thread) reactivate as well
1649 * as a copy-on-write.
1651 first_m
->reference
= TRUE
;
1652 vm_page_unlock_queues();
1655 * We no longer need the old page or object.
1658 PAGE_WAKEUP_DONE(m
);
1659 vm_object_paging_end(object
);
1660 vm_object_unlock(object
);
1663 *type_of_fault
= DBG_COW_FAULT
;
1664 VM_STAT(cow_faults
++);
1665 current_task()->cow_faults
++;
1666 object
= first_object
;
1667 offset
= first_offset
;
1669 vm_object_lock(object
);
1670 VM_PAGE_FREE(first_m
);
1671 first_m
= VM_PAGE_NULL
;
1672 assert(copy_m
->busy
);
1673 vm_page_insert(copy_m
, object
, offset
);
1677 * Now that we've gotten the copy out of the
1678 * way, let's try to collapse the top object.
1679 * But we have to play ugly games with
1680 * paging_in_progress to do that...
1683 vm_object_paging_end(object
);
1684 vm_object_collapse(object
, offset
, TRUE
);
1685 vm_object_paging_begin(object
);
1689 *protection
&= (~VM_PROT_WRITE
);
1694 * Now check whether the page needs to be pushed into the
1695 * copy object. The use of asymmetric copy on write for
1696 * shared temporary objects means that we may do two copies to
1697 * satisfy the fault; one above to get the page from a
1698 * shadowed object, and one here to push it into the copy.
1701 while ((copy_object
= first_object
->copy
) != VM_OBJECT_NULL
&&
1702 (m
!= VM_PAGE_NULL
)) {
1703 vm_object_offset_t copy_offset
;
1707 dbgTrace(0xBEEF0017, (unsigned int) copy_object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1710 * If the page is being written, but hasn't been
1711 * copied to the copy-object, we have to copy it there.
1714 if ((fault_type
& VM_PROT_WRITE
) == 0) {
1715 *protection
&= ~VM_PROT_WRITE
;
1720 * If the page was guaranteed to be resident,
1721 * we must have already performed the copy.
1724 if (must_be_resident
)
1728 * Try to get the lock on the copy_object.
1730 if (!vm_object_lock_try(copy_object
)) {
1731 vm_object_unlock(object
);
1733 mutex_pause(); /* wait a bit */
1735 vm_object_lock(object
);
1740 * Make another reference to the copy-object,
1741 * to keep it from disappearing during the
1744 assert(copy_object
->ref_count
> 0);
1745 copy_object
->ref_count
++;
1746 VM_OBJ_RES_INCR(copy_object
);
1749 * Does the page exist in the copy?
1751 copy_offset
= first_offset
- copy_object
->shadow_offset
;
1752 if (copy_object
->size
<= copy_offset
)
1754 * Copy object doesn't cover this page -- do nothing.
1758 vm_page_lookup(copy_object
, copy_offset
)) != VM_PAGE_NULL
) {
1759 /* Page currently exists in the copy object */
1762 * If the page is being brought
1763 * in, wait for it and then retry.
1766 /* take an extra ref so object won't die */
1767 assert(copy_object
->ref_count
> 0);
1768 copy_object
->ref_count
++;
1769 vm_object_res_reference(copy_object
);
1770 vm_object_unlock(copy_object
);
1771 vm_fault_cleanup(object
, first_m
);
1772 counter(c_vm_fault_page_block_backoff_kernel
++);
1773 vm_object_lock(copy_object
);
1774 assert(copy_object
->ref_count
> 0);
1775 VM_OBJ_RES_DECR(copy_object
);
1776 copy_object
->ref_count
--;
1777 assert(copy_object
->ref_count
> 0);
1778 copy_m
= vm_page_lookup(copy_object
, copy_offset
);
1781 * it's OK if the "copy_m" page is encrypted,
1782 * because we're not moving it nor handling its
1785 if (copy_m
!= VM_PAGE_NULL
&& copy_m
->busy
) {
1786 PAGE_ASSERT_WAIT(copy_m
, interruptible
);
1787 vm_object_unlock(copy_object
);
1788 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1789 vm_object_deallocate(copy_object
);
1792 vm_object_unlock(copy_object
);
1793 vm_object_deallocate(copy_object
);
1794 thread_interrupt_level(interruptible_state
);
1795 return VM_FAULT_RETRY
;
1799 else if (!PAGED_OUT(copy_object
, copy_offset
)) {
1801 * If PAGED_OUT is TRUE, then the page used to exist
1802 * in the copy-object, and has already been paged out.
1803 * We don't need to repeat this. If PAGED_OUT is
1804 * FALSE, then either we don't know (!pager_created,
1805 * for example) or it hasn't been paged out.
1806 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1807 * We must copy the page to the copy object.
1811 * are we protecting the system from
1812 * backing store exhaustion. If so
1813 * sleep unless we are privileged.
1816 if(vm_backing_store_low
) {
1817 if(!(current_task()->priv_flags
1818 & VM_BACKING_STORE_PRIV
)) {
1819 assert_wait((event_t
)
1820 &vm_backing_store_low
,
1823 VM_OBJ_RES_DECR(copy_object
);
1824 copy_object
->ref_count
--;
1825 assert(copy_object
->ref_count
> 0);
1826 vm_object_unlock(copy_object
);
1827 vm_fault_cleanup(object
, first_m
);
1828 thread_block(THREAD_CONTINUE_NULL
);
1829 thread_interrupt_level(
1830 interruptible_state
);
1831 return(VM_FAULT_RETRY
);
1836 * Allocate a page for the copy
1838 copy_m
= vm_page_alloc(copy_object
, copy_offset
);
1839 if (copy_m
== VM_PAGE_NULL
) {
1841 VM_OBJ_RES_DECR(copy_object
);
1842 copy_object
->ref_count
--;
1843 assert(copy_object
->ref_count
> 0);
1844 vm_object_unlock(copy_object
);
1845 vm_fault_cleanup(object
, first_m
);
1846 thread_interrupt_level(interruptible_state
);
1847 return(VM_FAULT_MEMORY_SHORTAGE
);
1851 * Must copy page into copy-object.
1854 vm_page_copy(m
, copy_m
);
1857 * If the old page was in use by any users
1858 * of the copy-object, it must be removed
1859 * from all pmaps. (We can't know which
1863 vm_page_lock_queues();
1864 assert(!m
->cleaning
);
1865 pmap_disconnect(m
->phys_page
);
1866 copy_m
->dirty
= TRUE
;
1867 vm_page_unlock_queues();
1870 * If there's a pager, then immediately
1871 * page out this page, using the "initialize"
1872 * option. Else, we use the copy.
1877 ((!copy_object
->pager_created
) ||
1878 vm_external_state_get(
1879 copy_object
->existence_map
, copy_offset
)
1880 == VM_EXTERNAL_STATE_ABSENT
)
1882 (!copy_object
->pager_created
)
1885 vm_page_lock_queues();
1886 vm_page_activate(copy_m
);
1887 vm_page_unlock_queues();
1888 PAGE_WAKEUP_DONE(copy_m
);
1891 assert(copy_m
->busy
== TRUE
);
1894 * The page is already ready for pageout:
1895 * not on pageout queues and busy.
1896 * Unlock everything except the
1897 * copy_object itself.
1900 vm_object_unlock(object
);
1903 * Write the page to the copy-object,
1904 * flushing it from the kernel.
1907 vm_pageout_initialize_page(copy_m
);
1910 * Since the pageout may have
1911 * temporarily dropped the
1912 * copy_object's lock, we
1913 * check whether we'll have
1914 * to deallocate the hard way.
1917 if ((copy_object
->shadow
!= object
) ||
1918 (copy_object
->ref_count
== 1)) {
1919 vm_object_unlock(copy_object
);
1920 vm_object_deallocate(copy_object
);
1921 vm_object_lock(object
);
1926 * Pick back up the old object's
1927 * lock. [It is safe to do so,
1928 * since it must be deeper in the
1932 vm_object_lock(object
);
1936 * Because we're pushing a page upward
1937 * in the object tree, we must restart
1938 * any faults that are waiting here.
1939 * [Note that this is an expansion of
1940 * PAGE_WAKEUP that uses the THREAD_RESTART
1941 * wait result]. Can't turn off the page's
1942 * busy bit because we're not done with it.
1947 thread_wakeup_with_result((event_t
) m
,
1953 * The reference count on copy_object must be
1954 * at least 2: one for our extra reference,
1955 * and at least one from the outside world
1956 * (we checked that when we last locked
1959 copy_object
->ref_count
--;
1960 assert(copy_object
->ref_count
> 0);
1961 VM_OBJ_RES_DECR(copy_object
);
1962 vm_object_unlock(copy_object
);
1968 *top_page
= first_m
;
1971 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1972 (integer_t
)object
, offset
, (integer_t
)m
, (integer_t
)first_m
, 0);
1974 * If the page can be written, assume that it will be.
1975 * [Earlier, we restrict the permission to allow write
1976 * access only if the fault so required, so we don't
1977 * mark read-only data as dirty.]
1981 if(m
!= VM_PAGE_NULL
) {
1982 #if !VM_FAULT_STATIC_CONFIG
1983 if (vm_fault_dirty_handling
&& (*protection
& VM_PROT_WRITE
))
1986 if (vm_page_deactivate_behind
)
1987 vm_fault_deactivate_behind(object
, offset
, behavior
);
1989 vm_object_unlock(object
);
1991 thread_interrupt_level(interruptible_state
);
1994 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS
, 0); /* (TEST/DEBUG) */
1996 return(VM_FAULT_SUCCESS
);
2000 vm_fault_cleanup(object
, first_m
);
2002 counter(c_vm_fault_page_block_backoff_kernel
++);
2003 thread_block(THREAD_CONTINUE_NULL
);
2007 thread_interrupt_level(interruptible_state
);
2008 if (wait_result
== THREAD_INTERRUPTED
)
2009 return VM_FAULT_INTERRUPTED
;
2010 return VM_FAULT_RETRY
;
2016 * Routine: vm_fault_tws_insert
2018 * Add fault information to the task working set.
2020 * We always insert the base object/offset pair
2021 * rather the actual object/offset.
2023 * Map and real_map locked.
2024 * Object locked and referenced.
2026 * TRUE if startup file should be written.
2027 * With object locked and still referenced.
2028 * But we may drop the object lock temporarily.
2031 vm_fault_tws_insert(
2034 vm_map_offset_t vaddr
,
2036 vm_object_offset_t offset
)
2038 tws_hash_line_t line
;
2041 boolean_t result
= FALSE
;
2043 /* Avoid possible map lock deadlock issues */
2044 if (map
== kernel_map
|| map
== kalloc_map
||
2045 real_map
== kernel_map
|| real_map
== kalloc_map
)
2048 task
= current_task();
2049 if (task
->dynamic_working_set
!= 0) {
2050 vm_object_t base_object
;
2051 vm_object_t base_shadow
;
2052 vm_object_offset_t base_offset
;
2053 base_object
= object
;
2054 base_offset
= offset
;
2055 while ((base_shadow
= base_object
->shadow
)) {
2056 vm_object_lock(base_shadow
);
2057 vm_object_unlock(base_object
);
2059 base_object
->shadow_offset
;
2060 base_object
= base_shadow
;
2063 task
->dynamic_working_set
,
2064 base_offset
, base_object
,
2066 if (kr
== KERN_OPERATION_TIMED_OUT
){
2068 if (base_object
!= object
) {
2069 vm_object_unlock(base_object
);
2070 vm_object_lock(object
);
2072 } else if (kr
!= KERN_SUCCESS
) {
2073 if(base_object
!= object
)
2074 vm_object_reference_locked(base_object
);
2076 task
->dynamic_working_set
,
2077 base_offset
, base_object
,
2079 if(base_object
!= object
) {
2080 vm_object_unlock(base_object
);
2081 vm_object_deallocate(base_object
);
2083 if(kr
== KERN_NO_SPACE
) {
2084 if (base_object
== object
)
2085 vm_object_unlock(object
);
2086 tws_expand_working_set(
2087 task
->dynamic_working_set
,
2088 TWS_HASH_LINE_COUNT
,
2090 if (base_object
== object
)
2091 vm_object_lock(object
);
2092 } else if(kr
== KERN_OPERATION_TIMED_OUT
) {
2095 if(base_object
!= object
)
2096 vm_object_lock(object
);
2097 } else if (base_object
!= object
) {
2098 vm_object_unlock(base_object
);
2099 vm_object_lock(object
);
2108 * Handle page faults, including pseudo-faults
2109 * used to change the wiring status of pages.
2111 * Explicit continuations have been removed.
2113 * vm_fault and vm_fault_page save mucho state
2114 * in the moral equivalent of a closure. The state
2115 * structure is allocated when first entering vm_fault
2116 * and deallocated when leaving vm_fault.
2119 extern int _map_enter_debug
;
2124 vm_map_offset_t vaddr
,
2125 vm_prot_t fault_type
,
2126 boolean_t change_wiring
,
2129 vm_map_offset_t caller_pmap_addr
)
2131 vm_map_version_t version
; /* Map version for verificiation */
2132 boolean_t wired
; /* Should mapping be wired down? */
2133 vm_object_t object
; /* Top-level object */
2134 vm_object_offset_t offset
; /* Top-level offset */
2135 vm_prot_t prot
; /* Protection for mapping */
2136 vm_behavior_t behavior
; /* Expected paging behavior */
2137 vm_map_offset_t lo_offset
, hi_offset
;
2138 vm_object_t old_copy_object
; /* Saved copy object */
2139 vm_page_t result_page
; /* Result of vm_fault_page */
2140 vm_page_t top_page
; /* Placeholder page */
2144 vm_page_t m
; /* Fast access to result_page */
2145 kern_return_t error_code
= 0; /* page error reasons */
2147 vm_object_t cur_object
;
2149 vm_object_offset_t cur_offset
;
2151 vm_object_t new_object
;
2153 vm_map_t real_map
= map
;
2154 vm_map_t original_map
= map
;
2156 boolean_t interruptible_state
;
2157 unsigned int cache_attr
;
2158 int write_startup_file
= 0;
2159 boolean_t need_activation
;
2160 vm_prot_t original_fault_type
;
2163 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_START
,
2170 if (get_preemption_level() != 0) {
2171 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
2178 return (KERN_FAILURE
);
2181 interruptible_state
= thread_interrupt_level(interruptible
);
2184 * assume we will hit a page in the cache
2185 * otherwise, explicitly override with
2186 * the real fault type once we determine it
2188 type_of_fault
= DBG_CACHE_HIT_FAULT
;
2191 current_task()->faults
++;
2193 original_fault_type
= fault_type
;
2198 * Find the backing store object and offset into
2199 * it to begin the search.
2201 fault_type
= original_fault_type
;
2203 vm_map_lock_read(map
);
2204 kr
= vm_map_lookup_locked(&map
, vaddr
, fault_type
, &version
,
2207 &behavior
, &lo_offset
, &hi_offset
, &real_map
);
2209 //if (_map_enter_debug)printf("vm_map_lookup_locked(map=0x%x, addr=0x%llx, prot=%d wired=%d) = %d\n", map, vaddr, prot, wired, kr);
2211 pmap
= real_map
->pmap
;
2213 if (kr
!= KERN_SUCCESS
) {
2214 vm_map_unlock_read(map
);
2219 * If the page is wired, we must fault for the current protection
2220 * value, to avoid further faults.
2224 fault_type
= prot
| VM_PROT_WRITE
;
2226 #if VM_FAULT_CLASSIFY
2228 * Temporary data gathering code
2230 vm_fault_classify(object
, offset
, fault_type
);
2233 * Fast fault code. The basic idea is to do as much as
2234 * possible while holding the map lock and object locks.
2235 * Busy pages are not used until the object lock has to
2236 * be dropped to do something (copy, zero fill, pmap enter).
2237 * Similarly, paging references aren't acquired until that
2238 * point, and object references aren't used.
2240 * If we can figure out what to do
2241 * (zero fill, copy on write, pmap enter) while holding
2242 * the locks, then it gets done. Otherwise, we give up,
2243 * and use the original fault path (which doesn't hold
2244 * the map lock, and relies on busy pages).
2245 * The give up cases include:
2246 * - Have to talk to pager.
2247 * - Page is busy, absent or in error.
2248 * - Pager has locked out desired access.
2249 * - Fault needs to be restarted.
2250 * - Have to push page into copy object.
2252 * The code is an infinite loop that moves one level down
2253 * the shadow chain each time. cur_object and cur_offset
2254 * refer to the current object being examined. object and offset
2255 * are the original object from the map. The loop is at the
2256 * top level if and only if object and cur_object are the same.
2258 * Invariants: Map lock is held throughout. Lock is held on
2259 * original object and cur_object (if different) when
2260 * continuing or exiting loop.
2266 * If this page is to be inserted in a copy delay object
2267 * for writing, and if the object has a copy, then the
2268 * copy delay strategy is implemented in the slow fault page.
2270 if (object
->copy_strategy
!= MEMORY_OBJECT_COPY_DELAY
||
2271 object
->copy
== VM_OBJECT_NULL
||
2272 (fault_type
& VM_PROT_WRITE
) == 0) {
2273 cur_object
= object
;
2274 cur_offset
= offset
;
2277 m
= vm_page_lookup(cur_object
, cur_offset
);
2278 if (m
!= VM_PAGE_NULL
) {
2280 wait_result_t result
;
2282 if (object
!= cur_object
)
2283 vm_object_unlock(object
);
2285 vm_map_unlock_read(map
);
2286 if (real_map
!= map
)
2287 vm_map_unlock(real_map
);
2289 #if !VM_FAULT_STATIC_CONFIG
2290 if (!vm_fault_interruptible
)
2291 interruptible
= THREAD_UNINT
;
2293 result
= PAGE_ASSERT_WAIT(m
, interruptible
);
2295 vm_object_unlock(cur_object
);
2297 if (result
== THREAD_WAITING
) {
2298 result
= thread_block(THREAD_CONTINUE_NULL
);
2300 counter(c_vm_fault_page_block_busy_kernel
++);
2302 if (result
== THREAD_AWAKENED
|| result
== THREAD_RESTART
)
2308 if (m
->unusual
&& (m
->error
|| m
->restart
|| m
->private
2309 || m
->absent
|| (fault_type
& m
->page_lock
))) {
2312 * Unusual case. Give up.
2320 * We've soft-faulted (because it's not in the page
2321 * table) on an encrypted page.
2322 * Keep the page "busy" so that noone messes with
2323 * it during the decryption.
2324 * Release the extra locks we're holding, keep only
2325 * the page's VM object lock.
2328 if (object
!= cur_object
) {
2329 vm_object_unlock(object
);
2331 vm_map_unlock_read(map
);
2332 if (real_map
!= map
)
2333 vm_map_unlock(real_map
);
2335 vm_page_decrypt(m
, 0);
2338 PAGE_WAKEUP_DONE(m
);
2339 vm_object_unlock(m
->object
);
2342 * Retry from the top, in case anything
2343 * changed while we were decrypting...
2347 ASSERT_PAGE_DECRYPTED(m
);
2350 * Two cases of map in faults:
2351 * - At top level w/o copy object.
2352 * - Read fault anywhere.
2353 * --> must disallow write.
2356 if (object
== cur_object
&&
2357 object
->copy
== VM_OBJECT_NULL
)
2358 goto FastMapInFault
;
2360 if ((fault_type
& VM_PROT_WRITE
) == 0) {
2361 boolean_t sequential
;
2363 prot
&= ~VM_PROT_WRITE
;
2366 * Set up to map the page ...
2367 * mark the page busy, drop
2368 * locks and take a paging reference
2369 * on the object with the page.
2372 if (object
!= cur_object
) {
2373 vm_object_unlock(object
);
2374 object
= cur_object
;
2381 * Check a couple of global reasons to
2382 * be conservative about write access.
2383 * Then do the pmap_enter.
2385 #if !VM_FAULT_STATIC_CONFIG
2386 if (vm_fault_dirty_handling
2388 || db_watchpoint_list
2390 && (fault_type
& VM_PROT_WRITE
) == 0)
2391 prot
&= ~VM_PROT_WRITE
;
2392 #else /* STATIC_CONFIG */
2394 if (db_watchpoint_list
2395 && (fault_type
& VM_PROT_WRITE
) == 0)
2396 prot
&= ~VM_PROT_WRITE
;
2397 #endif /* MACH_KDB */
2398 #endif /* STATIC_CONFIG */
2399 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
2402 need_activation
= FALSE
;
2404 if (m
->no_isync
== TRUE
) {
2405 m
->no_isync
= FALSE
;
2406 pmap_sync_page_data_phys(m
->phys_page
);
2408 if ((type_of_fault
== DBG_CACHE_HIT_FAULT
) && m
->clustered
) {
2410 * found it in the cache, but this
2411 * is the first fault-in of the page (no_isync == TRUE)
2412 * so it must have come in as part of
2413 * a cluster... account 1 pagein against it
2416 current_task()->pageins
++;
2417 type_of_fault
= DBG_PAGEIN_FAULT
;
2421 need_activation
= TRUE
;
2423 } else if (cache_attr
!= VM_WIMG_DEFAULT
) {
2424 pmap_sync_page_attributes_phys(m
->phys_page
);
2428 PMAP_ENTER(caller_pmap
,
2429 caller_pmap_addr
, m
,
2430 prot
, cache_attr
, wired
);
2432 PMAP_ENTER(pmap
, vaddr
, m
,
2433 prot
, cache_attr
, wired
);
2437 * Hold queues lock to manipulate
2438 * the page queues. Change wiring
2439 * case is obvious. In soft ref bits
2440 * case activate page only if it fell
2441 * off paging queues, otherwise just
2442 * activate it if it's inactive.
2444 * NOTE: original vm_fault code will
2445 * move active page to back of active
2446 * queue. This code doesn't.
2449 vm_pagein_cluster_used
++;
2450 m
->clustered
= FALSE
;
2452 if (change_wiring
) {
2453 vm_page_lock_queues();
2460 vm_page_unlock_queues();
2463 if ((!m
->active
&& !m
->inactive
) || ((need_activation
== TRUE
) && !m
->active
)) {
2464 vm_page_lock_queues();
2465 vm_page_activate(m
);
2466 vm_page_unlock_queues();
2471 * That's it, clean up and return.
2473 PAGE_WAKEUP_DONE(m
);
2475 sequential
= (sequential
&& vm_page_deactivate_behind
) ?
2476 vm_fault_deactivate_behind(object
, cur_offset
, behavior
) :
2480 * Add non-sequential pages to the working set.
2481 * The sequential pages will be brought in through
2482 * normal clustering behavior.
2484 if (!sequential
&& !object
->private) {
2485 vm_object_paging_begin(object
);
2487 write_startup_file
=
2488 vm_fault_tws_insert(map
, real_map
, vaddr
,
2489 object
, cur_offset
);
2491 vm_object_paging_end(object
);
2493 vm_object_unlock(object
);
2495 vm_map_unlock_read(map
);
2497 vm_map_unlock(real_map
);
2499 if(write_startup_file
)
2500 tws_send_startup_info(current_task());
2502 thread_interrupt_level(interruptible_state
);
2505 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
2507 type_of_fault
& 0xff,
2512 return KERN_SUCCESS
;
2516 * Copy on write fault. If objects match, then
2517 * object->copy must not be NULL (else control
2518 * would be in previous code block), and we
2519 * have a potential push into the copy object
2520 * with which we won't cope here.
2523 if (cur_object
== object
)
2526 * This is now a shadow based copy on write
2527 * fault -- it requires a copy up the shadow
2530 * Allocate a page in the original top level
2531 * object. Give up if allocate fails. Also
2532 * need to remember current page, as it's the
2533 * source of the copy.
2537 if (m
== VM_PAGE_NULL
) {
2541 * Now do the copy. Mark the source busy
2542 * and take out paging references on both
2545 * NOTE: This code holds the map lock across
2550 vm_page_copy(cur_m
, m
);
2551 vm_page_insert(m
, object
, offset
);
2553 vm_object_paging_begin(cur_object
);
2554 vm_object_paging_begin(object
);
2556 type_of_fault
= DBG_COW_FAULT
;
2557 VM_STAT(cow_faults
++);
2558 current_task()->cow_faults
++;
2561 * Now cope with the source page and object
2562 * If the top object has a ref count of 1
2563 * then no other map can access it, and hence
2564 * it's not necessary to do the pmap_disconnect.
2567 vm_page_lock_queues();
2568 vm_page_deactivate(cur_m
);
2570 pmap_disconnect(cur_m
->phys_page
);
2571 vm_page_unlock_queues();
2573 PAGE_WAKEUP_DONE(cur_m
);
2574 vm_object_paging_end(cur_object
);
2575 vm_object_unlock(cur_object
);
2578 * Slight hack to call vm_object collapse
2579 * and then reuse common map in code.
2580 * note that the object lock was taken above.
2583 vm_object_paging_end(object
);
2584 vm_object_collapse(object
, offset
, TRUE
);
2591 * No page at cur_object, cur_offset
2594 if (cur_object
->pager_created
) {
2597 * Have to talk to the pager. Give up.
2603 if (cur_object
->shadow
== VM_OBJECT_NULL
) {
2605 if (cur_object
->shadow_severed
) {
2606 vm_object_paging_end(object
);
2607 vm_object_unlock(object
);
2608 vm_map_unlock_read(map
);
2610 vm_map_unlock(real_map
);
2612 if(write_startup_file
)
2613 tws_send_startup_info(
2616 thread_interrupt_level(interruptible_state
);
2618 return KERN_MEMORY_ERROR
;
2622 * Zero fill fault. Page gets
2623 * filled in top object. Insert
2624 * page, then drop any lower lock.
2625 * Give up if no page.
2627 if (VM_PAGE_THROTTLED()) {
2632 * are we protecting the system from
2633 * backing store exhaustion. If so
2634 * sleep unless we are privileged.
2636 if(vm_backing_store_low
) {
2637 if(!(current_task()->priv_flags
2638 & VM_BACKING_STORE_PRIV
))
2641 m
= vm_page_alloc(object
, offset
);
2642 if (m
== VM_PAGE_NULL
) {
2646 * This is a zero-fill or initial fill
2647 * page fault. As such, we consider it
2648 * undefined with respect to instruction
2649 * execution. i.e. it is the responsibility
2650 * of higher layers to call for an instruction
2651 * sync after changing the contents and before
2652 * sending a program into this area. We
2653 * choose this approach for performance
2656 m
->no_isync
= FALSE
;
2658 if (cur_object
!= object
)
2659 vm_object_unlock(cur_object
);
2662 * Now zero fill page and map it.
2663 * the page is probably going to
2664 * be written soon, so don't bother
2665 * to clear the modified bit
2667 * NOTE: This code holds the map
2668 * lock across the zero fill.
2671 if (!map
->no_zero_fill
) {
2672 vm_page_zero_fill(m
);
2673 type_of_fault
= DBG_ZERO_FILL_FAULT
;
2674 VM_STAT(zero_fill_count
++);
2676 vm_page_lock_queues();
2677 VM_PAGE_QUEUES_REMOVE(m
);
2679 m
->page_ticket
= vm_page_ticket
;
2680 assert(!m
->laundry
);
2681 assert(m
->object
!= kernel_object
);
2682 assert(m
->pageq
.next
== NULL
&&
2683 m
->pageq
.prev
== NULL
);
2684 if(m
->object
->size
> 0x200000) {
2685 m
->zero_fill
= TRUE
;
2686 /* depends on the queues lock */
2688 queue_enter(&vm_page_queue_zf
,
2689 m
, vm_page_t
, pageq
);
2692 &vm_page_queue_inactive
,
2693 m
, vm_page_t
, pageq
);
2695 vm_page_ticket_roll
++;
2696 if(vm_page_ticket_roll
==
2697 VM_PAGE_TICKETS_IN_ROLL
) {
2698 vm_page_ticket_roll
= 0;
2699 if(vm_page_ticket
==
2700 VM_PAGE_TICKET_ROLL_IDS
)
2707 vm_page_inactive_count
++;
2708 vm_page_unlock_queues();
2714 * On to the next level
2717 cur_offset
+= cur_object
->shadow_offset
;
2718 new_object
= cur_object
->shadow
;
2719 vm_object_lock(new_object
);
2720 if (cur_object
!= object
)
2721 vm_object_unlock(cur_object
);
2722 cur_object
= new_object
;
2729 * Cleanup from fast fault failure. Drop any object
2730 * lock other than original and drop map lock.
2733 if (object
!= cur_object
)
2734 vm_object_unlock(cur_object
);
2736 vm_map_unlock_read(map
);
2739 vm_map_unlock(real_map
);
2742 * Make a reference to this object to
2743 * prevent its disposal while we are messing with
2744 * it. Once we have the reference, the map is free
2745 * to be diddled. Since objects reference their
2746 * shadows (and copies), they will stay around as well.
2749 assert(object
->ref_count
> 0);
2750 object
->ref_count
++;
2751 vm_object_res_reference(object
);
2752 vm_object_paging_begin(object
);
2754 XPR(XPR_VM_FAULT
,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2756 if (!object
->private) {
2757 write_startup_file
=
2758 vm_fault_tws_insert(map
, real_map
, vaddr
, object
, offset
);
2761 kr
= vm_fault_page(object
, offset
, fault_type
,
2762 (change_wiring
&& !wired
),
2764 lo_offset
, hi_offset
, behavior
,
2765 &prot
, &result_page
, &top_page
,
2767 &error_code
, map
->no_zero_fill
, FALSE
, map
, vaddr
);
2770 * If we didn't succeed, lose the object reference immediately.
2773 if (kr
!= VM_FAULT_SUCCESS
)
2774 vm_object_deallocate(object
);
2777 * See why we failed, and take corrective action.
2781 case VM_FAULT_SUCCESS
:
2783 case VM_FAULT_MEMORY_SHORTAGE
:
2784 if (vm_page_wait((change_wiring
) ?
2789 case VM_FAULT_INTERRUPTED
:
2792 case VM_FAULT_RETRY
:
2794 case VM_FAULT_FICTITIOUS_SHORTAGE
:
2795 vm_page_more_fictitious();
2797 case VM_FAULT_MEMORY_ERROR
:
2801 kr
= KERN_MEMORY_ERROR
;
2807 if(m
!= VM_PAGE_NULL
) {
2808 assert((change_wiring
&& !wired
) ?
2809 (top_page
== VM_PAGE_NULL
) :
2810 ((top_page
== VM_PAGE_NULL
) == (m
->object
== object
)));
2814 * How to clean up the result of vm_fault_page. This
2815 * happens whether the mapping is entered or not.
2818 #define UNLOCK_AND_DEALLOCATE \
2820 vm_fault_cleanup(m->object, top_page); \
2821 vm_object_deallocate(object); \
2825 * What to do with the resulting page from vm_fault_page
2826 * if it doesn't get entered into the physical map:
2829 #define RELEASE_PAGE(m) \
2831 PAGE_WAKEUP_DONE(m); \
2832 vm_page_lock_queues(); \
2833 if (!m->active && !m->inactive) \
2834 vm_page_activate(m); \
2835 vm_page_unlock_queues(); \
2839 * We must verify that the maps have not changed
2840 * since our last lookup.
2843 if(m
!= VM_PAGE_NULL
) {
2844 old_copy_object
= m
->object
->copy
;
2845 vm_object_unlock(m
->object
);
2847 old_copy_object
= VM_OBJECT_NULL
;
2849 if ((map
!= original_map
) || !vm_map_verify(map
, &version
)) {
2850 vm_object_t retry_object
;
2851 vm_object_offset_t retry_offset
;
2852 vm_prot_t retry_prot
;
2855 * To avoid trying to write_lock the map while another
2856 * thread has it read_locked (in vm_map_pageable), we
2857 * do not try for write permission. If the page is
2858 * still writable, we will get write permission. If it
2859 * is not, or has been marked needs_copy, we enter the
2860 * mapping without write permission, and will merely
2861 * take another fault.
2864 vm_map_lock_read(map
);
2865 kr
= vm_map_lookup_locked(&map
, vaddr
,
2866 fault_type
& ~VM_PROT_WRITE
, &version
,
2867 &retry_object
, &retry_offset
, &retry_prot
,
2868 &wired
, &behavior
, &lo_offset
, &hi_offset
,
2870 pmap
= real_map
->pmap
;
2872 if (kr
!= KERN_SUCCESS
) {
2873 vm_map_unlock_read(map
);
2874 if(m
!= VM_PAGE_NULL
) {
2875 vm_object_lock(m
->object
);
2877 UNLOCK_AND_DEALLOCATE
;
2879 vm_object_deallocate(object
);
2884 vm_object_unlock(retry_object
);
2885 if(m
!= VM_PAGE_NULL
) {
2886 vm_object_lock(m
->object
);
2888 vm_object_lock(object
);
2891 if ((retry_object
!= object
) ||
2892 (retry_offset
!= offset
)) {
2893 vm_map_unlock_read(map
);
2895 vm_map_unlock(real_map
);
2896 if(m
!= VM_PAGE_NULL
) {
2898 UNLOCK_AND_DEALLOCATE
;
2900 vm_object_deallocate(object
);
2906 * Check whether the protection has changed or the object
2907 * has been copied while we left the map unlocked.
2910 if(m
!= VM_PAGE_NULL
) {
2911 vm_object_unlock(m
->object
);
2913 vm_object_unlock(object
);
2916 if(m
!= VM_PAGE_NULL
) {
2917 vm_object_lock(m
->object
);
2919 vm_object_lock(object
);
2923 * If the copy object changed while the top-level object
2924 * was unlocked, then we must take away write permission.
2927 if(m
!= VM_PAGE_NULL
) {
2928 if (m
->object
->copy
!= old_copy_object
)
2929 prot
&= ~VM_PROT_WRITE
;
2933 * If we want to wire down this page, but no longer have
2934 * adequate permissions, we must start all over.
2937 if (wired
&& (fault_type
!= (prot
|VM_PROT_WRITE
))) {
2938 vm_map_verify_done(map
, &version
);
2940 vm_map_unlock(real_map
);
2941 if(m
!= VM_PAGE_NULL
) {
2943 UNLOCK_AND_DEALLOCATE
;
2945 vm_object_deallocate(object
);
2951 * Put this page into the physical map.
2952 * We had to do the unlock above because pmap_enter
2953 * may cause other faults. The page may be on
2954 * the pageout queues. If the pageout daemon comes
2955 * across the page, it will remove it from the queues.
2957 need_activation
= FALSE
;
2959 if (m
!= VM_PAGE_NULL
) {
2960 if (m
->no_isync
== TRUE
) {
2961 pmap_sync_page_data_phys(m
->phys_page
);
2963 if ((type_of_fault
== DBG_CACHE_HIT_FAULT
) && m
->clustered
) {
2965 * found it in the cache, but this
2966 * is the first fault-in of the page (no_isync == TRUE)
2967 * so it must have come in as part of
2968 * a cluster... account 1 pagein against it
2971 current_task()->pageins
++;
2973 type_of_fault
= DBG_PAGEIN_FAULT
;
2976 need_activation
= TRUE
;
2978 m
->no_isync
= FALSE
;
2980 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
2983 PMAP_ENTER(caller_pmap
,
2984 caller_pmap_addr
, m
,
2985 prot
, cache_attr
, wired
);
2987 PMAP_ENTER(pmap
, vaddr
, m
,
2988 prot
, cache_attr
, wired
);
2992 * Add working set information for private objects here.
2994 if (m
->object
->private) {
2995 write_startup_file
=
2996 vm_fault_tws_insert(map
, real_map
, vaddr
,
2997 m
->object
, m
->offset
);
3001 vm_map_entry_t entry
;
3002 vm_map_offset_t laddr
;
3003 vm_map_offset_t ldelta
, hdelta
;
3006 * do a pmap block mapping from the physical address
3011 /* While we do not worry about execution protection in */
3012 /* general, certian pages may have instruction execution */
3013 /* disallowed. We will check here, and if not allowed */
3014 /* to execute, we return with a protection failure. */
3016 if((fault_type
& VM_PROT_EXECUTE
) &&
3017 (!pmap_eligible_for_execute((ppnum_t
)
3018 (object
->shadow_offset
>> 12)))) {
3020 vm_map_verify_done(map
, &version
);
3022 vm_map_unlock(real_map
);
3023 vm_fault_cleanup(object
, top_page
);
3024 vm_object_deallocate(object
);
3025 kr
= KERN_PROTECTION_FAILURE
;
3030 if(real_map
!= map
) {
3031 vm_map_unlock(real_map
);
3033 if (original_map
!= map
) {
3034 vm_map_unlock_read(map
);
3035 vm_map_lock_read(original_map
);
3041 hdelta
= 0xFFFFF000;
3042 ldelta
= 0xFFFFF000;
3045 while(vm_map_lookup_entry(map
, laddr
, &entry
)) {
3046 if(ldelta
> (laddr
- entry
->vme_start
))
3047 ldelta
= laddr
- entry
->vme_start
;
3048 if(hdelta
> (entry
->vme_end
- laddr
))
3049 hdelta
= entry
->vme_end
- laddr
;
3050 if(entry
->is_sub_map
) {
3052 laddr
= (laddr
- entry
->vme_start
)
3054 vm_map_lock_read(entry
->object
.sub_map
);
3056 vm_map_unlock_read(map
);
3057 if(entry
->use_pmap
) {
3058 vm_map_unlock_read(real_map
);
3059 real_map
= entry
->object
.sub_map
;
3061 map
= entry
->object
.sub_map
;
3068 if(vm_map_lookup_entry(map
, laddr
, &entry
) &&
3069 (entry
->object
.vm_object
!= NULL
) &&
3070 (entry
->object
.vm_object
== object
)) {
3072 vm_map_offset_t phys_offset
;
3074 phys_offset
= (entry
->object
.vm_object
->shadow_offset
3077 - entry
->vme_start
);
3078 phys_offset
-= ldelta
;
3080 /* Set up a block mapped area */
3083 (addr64_t
)(caller_pmap_addr
- ldelta
),
3085 (ldelta
+ hdelta
) >> 12,
3087 (VM_WIMG_MASK
& (int)object
->wimg_bits
),
3090 /* Set up a block mapped area */
3093 (addr64_t
)(vaddr
- ldelta
),
3095 (ldelta
+ hdelta
) >> 12,
3097 (VM_WIMG_MASK
& (int)object
->wimg_bits
),
3105 * If the page is not wired down and isn't already
3106 * on a pageout queue, then put it where the
3107 * pageout daemon can find it.
3109 if(m
!= VM_PAGE_NULL
) {
3110 vm_page_lock_queues();
3113 vm_pagein_cluster_used
++;
3114 m
->clustered
= FALSE
;
3116 m
->reference
= TRUE
;
3118 if (change_wiring
) {
3124 #if VM_FAULT_STATIC_CONFIG
3126 if ((!m
->active
&& !m
->inactive
) || ((need_activation
== TRUE
) && !m
->active
))
3127 vm_page_activate(m
);
3130 else if (software_reference_bits
) {
3131 if (!m
->active
&& !m
->inactive
)
3132 vm_page_activate(m
);
3133 m
->reference
= TRUE
;
3135 vm_page_activate(m
);
3138 vm_page_unlock_queues();
3142 * Unlock everything, and return
3145 vm_map_verify_done(map
, &version
);
3147 vm_map_unlock(real_map
);
3148 if(m
!= VM_PAGE_NULL
) {
3149 PAGE_WAKEUP_DONE(m
);
3150 UNLOCK_AND_DEALLOCATE
;
3152 vm_fault_cleanup(object
, top_page
);
3153 vm_object_deallocate(object
);
3157 #undef UNLOCK_AND_DEALLOCATE
3161 if(write_startup_file
)
3162 tws_send_startup_info(current_task());
3164 thread_interrupt_level(interruptible_state
);
3166 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
3168 type_of_fault
& 0xff,
3179 * Wire down a range of virtual addresses in a map.
3184 vm_map_entry_t entry
,
3186 vm_map_offset_t pmap_addr
)
3189 register vm_map_offset_t va
;
3190 register vm_map_offset_t end_addr
= entry
->vme_end
;
3191 register kern_return_t rc
;
3193 assert(entry
->in_transition
);
3195 if ((entry
->object
.vm_object
!= NULL
) &&
3196 !entry
->is_sub_map
&&
3197 entry
->object
.vm_object
->phys_contiguous
) {
3198 return KERN_SUCCESS
;
3202 * Inform the physical mapping system that the
3203 * range of addresses may not fault, so that
3204 * page tables and such can be locked down as well.
3207 pmap_pageable(pmap
, pmap_addr
,
3208 pmap_addr
+ (end_addr
- entry
->vme_start
), FALSE
);
3211 * We simulate a fault to get the page and enter it
3212 * in the physical map.
3215 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
3216 if ((rc
= vm_fault_wire_fast(
3217 map
, va
, entry
, pmap
,
3218 pmap_addr
+ (va
- entry
->vme_start
)
3219 )) != KERN_SUCCESS
) {
3220 rc
= vm_fault(map
, va
, VM_PROT_NONE
, TRUE
,
3221 (pmap
== kernel_pmap
) ?
3222 THREAD_UNINT
: THREAD_ABORTSAFE
,
3223 pmap
, pmap_addr
+ (va
- entry
->vme_start
));
3226 if (rc
!= KERN_SUCCESS
) {
3227 struct vm_map_entry tmp_entry
= *entry
;
3229 /* unwire wired pages */
3230 tmp_entry
.vme_end
= va
;
3231 vm_fault_unwire(map
,
3232 &tmp_entry
, FALSE
, pmap
, pmap_addr
);
3237 return KERN_SUCCESS
;
3243 * Unwire a range of virtual addresses in a map.
3248 vm_map_entry_t entry
,
3249 boolean_t deallocate
,
3251 vm_map_offset_t pmap_addr
)
3253 register vm_map_offset_t va
;
3254 register vm_map_offset_t end_addr
= entry
->vme_end
;
3257 object
= (entry
->is_sub_map
)
3258 ? VM_OBJECT_NULL
: entry
->object
.vm_object
;
3261 * Since the pages are wired down, we must be able to
3262 * get their mappings from the physical map system.
3265 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
3266 pmap_change_wiring(pmap
,
3267 pmap_addr
+ (va
- entry
->vme_start
), FALSE
);
3269 if (object
== VM_OBJECT_NULL
) {
3270 (void) vm_fault(map
, va
, VM_PROT_NONE
,
3271 TRUE
, THREAD_UNINT
, pmap
, pmap_addr
);
3272 } else if (object
->phys_contiguous
) {
3276 vm_page_t result_page
;
3278 vm_object_t result_object
;
3279 vm_fault_return_t result
;
3282 prot
= VM_PROT_NONE
;
3284 vm_object_lock(object
);
3285 vm_object_paging_begin(object
);
3287 "vm_fault_unwire -> vm_fault_page\n",
3289 result
= vm_fault_page(object
,
3291 (va
- entry
->vme_start
),
3297 - entry
->vme_start
),
3303 0, map
->no_zero_fill
,
3305 } while (result
== VM_FAULT_RETRY
);
3307 if (result
!= VM_FAULT_SUCCESS
)
3308 panic("vm_fault_unwire: failure");
3310 result_object
= result_page
->object
;
3312 assert(!result_page
->fictitious
);
3313 pmap_disconnect(result_page
->phys_page
);
3314 VM_PAGE_FREE(result_page
);
3316 vm_page_lock_queues();
3317 vm_page_unwire(result_page
);
3318 vm_page_unlock_queues();
3319 PAGE_WAKEUP_DONE(result_page
);
3322 vm_fault_cleanup(result_object
, top_page
);
3327 * Inform the physical mapping system that the range
3328 * of addresses may fault, so that page tables and
3329 * such may be unwired themselves.
3332 pmap_pageable(pmap
, pmap_addr
,
3333 pmap_addr
+ (end_addr
- entry
->vme_start
), TRUE
);
3338 * vm_fault_wire_fast:
3340 * Handle common case of a wire down page fault at the given address.
3341 * If successful, the page is inserted into the associated physical map.
3342 * The map entry is passed in to avoid the overhead of a map lookup.
3344 * NOTE: the given address should be truncated to the
3345 * proper page address.
3347 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3348 * a standard error specifying why the fault is fatal is returned.
3350 * The map in question must be referenced, and remains so.
3351 * Caller has a read lock on the map.
3353 * This is a stripped version of vm_fault() for wiring pages. Anything
3354 * other than the common case will return KERN_FAILURE, and the caller
3355 * is expected to call vm_fault().
3359 __unused vm_map_t map
,
3361 vm_map_entry_t entry
,
3363 vm_map_offset_t pmap_addr
)
3366 vm_object_offset_t offset
;
3367 register vm_page_t m
;
3369 thread_t thread
= current_thread();
3370 unsigned int cache_attr
;
3374 if (thread
!= THREAD_NULL
&& thread
->task
!= TASK_NULL
)
3375 thread
->task
->faults
++;
3382 #define RELEASE_PAGE(m) { \
3383 PAGE_WAKEUP_DONE(m); \
3384 vm_page_lock_queues(); \
3385 vm_page_unwire(m); \
3386 vm_page_unlock_queues(); \
3390 #undef UNLOCK_THINGS
3391 #define UNLOCK_THINGS { \
3392 vm_object_paging_end(object); \
3393 vm_object_unlock(object); \
3396 #undef UNLOCK_AND_DEALLOCATE
3397 #define UNLOCK_AND_DEALLOCATE { \
3399 vm_object_deallocate(object); \
3402 * Give up and have caller do things the hard way.
3406 UNLOCK_AND_DEALLOCATE; \
3407 return(KERN_FAILURE); \
3412 * If this entry is not directly to a vm_object, bail out.
3414 if (entry
->is_sub_map
)
3415 return(KERN_FAILURE
);
3418 * Find the backing store object and offset into it.
3421 object
= entry
->object
.vm_object
;
3422 offset
= (va
- entry
->vme_start
) + entry
->offset
;
3423 prot
= entry
->protection
;
3426 * Make a reference to this object to prevent its
3427 * disposal while we are messing with it.
3430 vm_object_lock(object
);
3431 assert(object
->ref_count
> 0);
3432 object
->ref_count
++;
3433 vm_object_res_reference(object
);
3434 vm_object_paging_begin(object
);
3437 * INVARIANTS (through entire routine):
3439 * 1) At all times, we must either have the object
3440 * lock or a busy page in some object to prevent
3441 * some other thread from trying to bring in
3444 * 2) Once we have a busy page, we must remove it from
3445 * the pageout queues, so that the pageout daemon
3446 * will not grab it away.
3451 * Look for page in top-level object. If it's not there or
3452 * there's something going on, give up.
3453 * ENCRYPTED SWAP: use the slow fault path, since we'll need to
3454 * decrypt the page before wiring it down.
3456 m
= vm_page_lookup(object
, offset
);
3457 if ((m
== VM_PAGE_NULL
) || (m
->busy
) || (m
->encrypted
) ||
3458 (m
->unusual
&& ( m
->error
|| m
->restart
|| m
->absent
||
3459 prot
& m
->page_lock
))) {
3463 ASSERT_PAGE_DECRYPTED(m
);
3466 * Wire the page down now. All bail outs beyond this
3467 * point must unwire the page.
3470 vm_page_lock_queues();
3472 vm_page_unlock_queues();
3475 * Mark page busy for other threads.
3482 * Give up if the page is being written and there's a copy object
3484 if ((object
->copy
!= VM_OBJECT_NULL
) && (prot
& VM_PROT_WRITE
)) {
3490 * Put this page into the physical map.
3491 * We have to unlock the object because pmap_enter
3492 * may cause other faults.
3494 if (m
->no_isync
== TRUE
) {
3495 pmap_sync_page_data_phys(m
->phys_page
);
3497 m
->no_isync
= FALSE
;
3500 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
3502 PMAP_ENTER(pmap
, pmap_addr
, m
, prot
, cache_attr
, TRUE
);
3505 * Unlock everything, and return
3508 PAGE_WAKEUP_DONE(m
);
3509 UNLOCK_AND_DEALLOCATE
;
3511 return(KERN_SUCCESS
);
3516 * Routine: vm_fault_copy_cleanup
3518 * Release a page used by vm_fault_copy.
3522 vm_fault_copy_cleanup(
3526 vm_object_t object
= page
->object
;
3528 vm_object_lock(object
);
3529 PAGE_WAKEUP_DONE(page
);
3530 vm_page_lock_queues();
3531 if (!page
->active
&& !page
->inactive
)
3532 vm_page_activate(page
);
3533 vm_page_unlock_queues();
3534 vm_fault_cleanup(object
, top_page
);
3538 vm_fault_copy_dst_cleanup(
3543 if (page
!= VM_PAGE_NULL
) {
3544 object
= page
->object
;
3545 vm_object_lock(object
);
3546 vm_page_lock_queues();
3547 vm_page_unwire(page
);
3548 vm_page_unlock_queues();
3549 vm_object_paging_end(object
);
3550 vm_object_unlock(object
);
3555 * Routine: vm_fault_copy
3558 * Copy pages from one virtual memory object to another --
3559 * neither the source nor destination pages need be resident.
3561 * Before actually copying a page, the version associated with
3562 * the destination address map wil be verified.
3564 * In/out conditions:
3565 * The caller must hold a reference, but not a lock, to
3566 * each of the source and destination objects and to the
3570 * Returns KERN_SUCCESS if no errors were encountered in
3571 * reading or writing the data. Returns KERN_INTERRUPTED if
3572 * the operation was interrupted (only possible if the
3573 * "interruptible" argument is asserted). Other return values
3574 * indicate a permanent error in copying the data.
3576 * The actual amount of data copied will be returned in the
3577 * "copy_size" argument. In the event that the destination map
3578 * verification failed, this amount may be less than the amount
3583 vm_object_t src_object
,
3584 vm_object_offset_t src_offset
,
3585 vm_map_size_t
*copy_size
, /* INOUT */
3586 vm_object_t dst_object
,
3587 vm_object_offset_t dst_offset
,
3589 vm_map_version_t
*dst_version
,
3592 vm_page_t result_page
;
3595 vm_page_t src_top_page
;
3599 vm_page_t dst_top_page
;
3602 vm_map_size_t amount_left
;
3603 vm_object_t old_copy_object
;
3604 kern_return_t error
= 0;
3606 vm_map_size_t part_size
;
3609 * In order not to confuse the clustered pageins, align
3610 * the different offsets on a page boundary.
3612 vm_object_offset_t src_lo_offset
= vm_object_trunc_page(src_offset
);
3613 vm_object_offset_t dst_lo_offset
= vm_object_trunc_page(dst_offset
);
3614 vm_object_offset_t src_hi_offset
= vm_object_round_page(src_offset
+ *copy_size
);
3615 vm_object_offset_t dst_hi_offset
= vm_object_round_page(dst_offset
+ *copy_size
);
3619 *copy_size -= amount_left; \
3623 amount_left
= *copy_size
;
3624 do { /* while (amount_left > 0) */
3626 * There may be a deadlock if both source and destination
3627 * pages are the same. To avoid this deadlock, the copy must
3628 * start by getting the destination page in order to apply
3629 * COW semantics if any.
3632 RetryDestinationFault
: ;
3634 dst_prot
= VM_PROT_WRITE
|VM_PROT_READ
;
3636 vm_object_lock(dst_object
);
3637 vm_object_paging_begin(dst_object
);
3639 XPR(XPR_VM_FAULT
,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3640 switch (vm_fault_page(dst_object
,
3641 vm_object_trunc_page(dst_offset
),
3642 VM_PROT_WRITE
|VM_PROT_READ
,
3647 VM_BEHAVIOR_SEQUENTIAL
,
3653 dst_map
->no_zero_fill
,
3655 case VM_FAULT_SUCCESS
:
3657 case VM_FAULT_RETRY
:
3658 goto RetryDestinationFault
;
3659 case VM_FAULT_MEMORY_SHORTAGE
:
3660 if (vm_page_wait(interruptible
))
3661 goto RetryDestinationFault
;
3663 case VM_FAULT_INTERRUPTED
:
3664 RETURN(MACH_SEND_INTERRUPTED
);
3665 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3666 vm_page_more_fictitious();
3667 goto RetryDestinationFault
;
3668 case VM_FAULT_MEMORY_ERROR
:
3672 return(KERN_MEMORY_ERROR
);
3674 assert ((dst_prot
& VM_PROT_WRITE
) != VM_PROT_NONE
);
3676 old_copy_object
= dst_page
->object
->copy
;
3679 * There exists the possiblity that the source and
3680 * destination page are the same. But we can't
3681 * easily determine that now. If they are the
3682 * same, the call to vm_fault_page() for the
3683 * destination page will deadlock. To prevent this we
3684 * wire the page so we can drop busy without having
3685 * the page daemon steal the page. We clean up the
3686 * top page but keep the paging reference on the object
3687 * holding the dest page so it doesn't go away.
3690 vm_page_lock_queues();
3691 vm_page_wire(dst_page
);
3692 vm_page_unlock_queues();
3693 PAGE_WAKEUP_DONE(dst_page
);
3694 vm_object_unlock(dst_page
->object
);
3696 if (dst_top_page
!= VM_PAGE_NULL
) {
3697 vm_object_lock(dst_object
);
3698 VM_PAGE_FREE(dst_top_page
);
3699 vm_object_paging_end(dst_object
);
3700 vm_object_unlock(dst_object
);
3705 if (src_object
== VM_OBJECT_NULL
) {
3707 * No source object. We will just
3708 * zero-fill the page in dst_object.
3710 src_page
= VM_PAGE_NULL
;
3711 result_page
= VM_PAGE_NULL
;
3713 vm_object_lock(src_object
);
3714 src_page
= vm_page_lookup(src_object
,
3715 vm_object_trunc_page(src_offset
));
3716 if (src_page
== dst_page
) {
3717 src_prot
= dst_prot
;
3718 result_page
= VM_PAGE_NULL
;
3720 src_prot
= VM_PROT_READ
;
3721 vm_object_paging_begin(src_object
);
3724 "vm_fault_copy(2) -> vm_fault_page\n",
3726 switch (vm_fault_page(src_object
,
3727 vm_object_trunc_page(src_offset
),
3733 VM_BEHAVIOR_SEQUENTIAL
,
3742 case VM_FAULT_SUCCESS
:
3744 case VM_FAULT_RETRY
:
3745 goto RetrySourceFault
;
3746 case VM_FAULT_MEMORY_SHORTAGE
:
3747 if (vm_page_wait(interruptible
))
3748 goto RetrySourceFault
;
3750 case VM_FAULT_INTERRUPTED
:
3751 vm_fault_copy_dst_cleanup(dst_page
);
3752 RETURN(MACH_SEND_INTERRUPTED
);
3753 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3754 vm_page_more_fictitious();
3755 goto RetrySourceFault
;
3756 case VM_FAULT_MEMORY_ERROR
:
3757 vm_fault_copy_dst_cleanup(dst_page
);
3761 return(KERN_MEMORY_ERROR
);
3765 assert((src_top_page
== VM_PAGE_NULL
) ==
3766 (result_page
->object
== src_object
));
3768 assert ((src_prot
& VM_PROT_READ
) != VM_PROT_NONE
);
3769 vm_object_unlock(result_page
->object
);
3772 if (!vm_map_verify(dst_map
, dst_version
)) {
3773 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3774 vm_fault_copy_cleanup(result_page
, src_top_page
);
3775 vm_fault_copy_dst_cleanup(dst_page
);
3779 vm_object_lock(dst_page
->object
);
3781 if (dst_page
->object
->copy
!= old_copy_object
) {
3782 vm_object_unlock(dst_page
->object
);
3783 vm_map_verify_done(dst_map
, dst_version
);
3784 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3785 vm_fault_copy_cleanup(result_page
, src_top_page
);
3786 vm_fault_copy_dst_cleanup(dst_page
);
3789 vm_object_unlock(dst_page
->object
);
3792 * Copy the page, and note that it is dirty
3796 if (!page_aligned(src_offset
) ||
3797 !page_aligned(dst_offset
) ||
3798 !page_aligned(amount_left
)) {
3800 vm_object_offset_t src_po
,
3803 src_po
= src_offset
- vm_object_trunc_page(src_offset
);
3804 dst_po
= dst_offset
- vm_object_trunc_page(dst_offset
);
3806 if (dst_po
> src_po
) {
3807 part_size
= PAGE_SIZE
- dst_po
;
3809 part_size
= PAGE_SIZE
- src_po
;
3811 if (part_size
> (amount_left
)){
3812 part_size
= amount_left
;
3815 if (result_page
== VM_PAGE_NULL
) {
3816 vm_page_part_zero_fill(dst_page
,
3819 vm_page_part_copy(result_page
, src_po
,
3820 dst_page
, dst_po
, part_size
);
3821 if(!dst_page
->dirty
){
3822 vm_object_lock(dst_object
);
3823 dst_page
->dirty
= TRUE
;
3824 vm_object_unlock(dst_page
->object
);
3829 part_size
= PAGE_SIZE
;
3831 if (result_page
== VM_PAGE_NULL
)
3832 vm_page_zero_fill(dst_page
);
3834 vm_page_copy(result_page
, dst_page
);
3835 if(!dst_page
->dirty
){
3836 vm_object_lock(dst_object
);
3837 dst_page
->dirty
= TRUE
;
3838 vm_object_unlock(dst_page
->object
);
3845 * Unlock everything, and return
3848 vm_map_verify_done(dst_map
, dst_version
);
3850 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3851 vm_fault_copy_cleanup(result_page
, src_top_page
);
3852 vm_fault_copy_dst_cleanup(dst_page
);
3854 amount_left
-= part_size
;
3855 src_offset
+= part_size
;
3856 dst_offset
+= part_size
;
3857 } while (amount_left
> 0);
3859 RETURN(KERN_SUCCESS
);
3868 * Routine: vm_fault_page_overwrite
3871 * A form of vm_fault_page that assumes that the
3872 * resulting page will be overwritten in its entirety,
3873 * making it unnecessary to obtain the correct *contents*
3877 * XXX Untested. Also unused. Eventually, this technology
3878 * could be used in vm_fault_copy() to advantage.
3881 vm_fault_page_overwrite(
3883 vm_object_t dst_object
,
3884 vm_object_offset_t dst_offset
,
3885 vm_page_t
*result_page
) /* OUT */
3889 kern_return_t wait_result
;
3891 #define interruptible THREAD_UNINT /* XXX */
3895 * Look for a page at this offset
3898 while ((dst_page
= vm_page_lookup(dst_object
, dst_offset
))
3901 * No page, no problem... just allocate one.
3904 dst_page
= vm_page_alloc(dst_object
, dst_offset
);
3905 if (dst_page
== VM_PAGE_NULL
) {
3906 vm_object_unlock(dst_object
);
3908 vm_object_lock(dst_object
);
3913 * Pretend that the memory manager
3914 * write-protected the page.
3916 * Note that we will be asking for write
3917 * permission without asking for the data
3921 dst_page
->overwriting
= TRUE
;
3922 dst_page
->page_lock
= VM_PROT_WRITE
;
3923 dst_page
->absent
= TRUE
;
3924 dst_page
->unusual
= TRUE
;
3925 dst_object
->absent_count
++;
3930 * When we bail out, we might have to throw
3931 * away the page created here.
3934 #define DISCARD_PAGE \
3936 vm_object_lock(dst_object); \
3937 dst_page = vm_page_lookup(dst_object, dst_offset); \
3938 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3939 VM_PAGE_FREE(dst_page); \
3940 vm_object_unlock(dst_object); \
3945 * If the page is write-protected...
3948 if (dst_page
->page_lock
& VM_PROT_WRITE
) {
3950 * ... and an unlock request hasn't been sent
3953 if ( ! (dst_page
->unlock_request
& VM_PROT_WRITE
)) {
3958 * ... then send one now.
3961 if (!dst_object
->pager_ready
) {
3962 wait_result
= vm_object_assert_wait(dst_object
,
3963 VM_OBJECT_EVENT_PAGER_READY
,
3965 vm_object_unlock(dst_object
);
3966 if (wait_result
== THREAD_WAITING
)
3967 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
3968 if (wait_result
!= THREAD_AWAKENED
) {
3970 return(VM_FAULT_INTERRUPTED
);
3975 u
= dst_page
->unlock_request
|= VM_PROT_WRITE
;
3976 vm_object_unlock(dst_object
);
3978 if ((rc
= memory_object_data_unlock(
3980 dst_offset
+ dst_object
->paging_offset
,
3982 u
)) != KERN_SUCCESS
) {
3984 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3986 return((rc
== MACH_SEND_INTERRUPTED
) ?
3987 VM_FAULT_INTERRUPTED
:
3988 VM_FAULT_MEMORY_ERROR
);
3990 vm_object_lock(dst_object
);
3994 /* ... fall through to wait below */
3997 * If the page isn't being used for other
3998 * purposes, then we're done.
4000 if ( ! (dst_page
->busy
|| dst_page
->absent
||
4001 dst_page
->error
|| dst_page
->restart
) )
4005 wait_result
= PAGE_ASSERT_WAIT(dst_page
, interruptible
);
4006 vm_object_unlock(dst_object
);
4007 if (wait_result
== THREAD_WAITING
)
4008 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
4009 if (wait_result
!= THREAD_AWAKENED
) {
4011 return(VM_FAULT_INTERRUPTED
);
4015 *result_page
= dst_page
;
4016 return(VM_FAULT_SUCCESS
);
4018 #undef interruptible
4024 #if VM_FAULT_CLASSIFY
4026 * Temporary statistics gathering support.
4030 * Statistics arrays:
4032 #define VM_FAULT_TYPES_MAX 5
4033 #define VM_FAULT_LEVEL_MAX 8
4035 int vm_fault_stats
[VM_FAULT_TYPES_MAX
][VM_FAULT_LEVEL_MAX
];
4037 #define VM_FAULT_TYPE_ZERO_FILL 0
4038 #define VM_FAULT_TYPE_MAP_IN 1
4039 #define VM_FAULT_TYPE_PAGER 2
4040 #define VM_FAULT_TYPE_COPY 3
4041 #define VM_FAULT_TYPE_OTHER 4
4045 vm_fault_classify(vm_object_t object
,
4046 vm_object_offset_t offset
,
4047 vm_prot_t fault_type
)
4049 int type
, level
= 0;
4053 m
= vm_page_lookup(object
, offset
);
4054 if (m
!= VM_PAGE_NULL
) {
4055 if (m
->busy
|| m
->error
|| m
->restart
|| m
->absent
||
4056 fault_type
& m
->page_lock
) {
4057 type
= VM_FAULT_TYPE_OTHER
;
4060 if (((fault_type
& VM_PROT_WRITE
) == 0) ||
4061 ((level
== 0) && object
->copy
== VM_OBJECT_NULL
)) {
4062 type
= VM_FAULT_TYPE_MAP_IN
;
4065 type
= VM_FAULT_TYPE_COPY
;
4069 if (object
->pager_created
) {
4070 type
= VM_FAULT_TYPE_PAGER
;
4073 if (object
->shadow
== VM_OBJECT_NULL
) {
4074 type
= VM_FAULT_TYPE_ZERO_FILL
;
4078 offset
+= object
->shadow_offset
;
4079 object
= object
->shadow
;
4085 if (level
> VM_FAULT_LEVEL_MAX
)
4086 level
= VM_FAULT_LEVEL_MAX
;
4088 vm_fault_stats
[type
][level
] += 1;
4093 /* cleanup routine to call from debugger */
4096 vm_fault_classify_init(void)
4100 for (type
= 0; type
< VM_FAULT_TYPES_MAX
; type
++) {
4101 for (level
= 0; level
< VM_FAULT_LEVEL_MAX
; level
++) {
4102 vm_fault_stats
[type
][level
] = 0;
4108 #endif /* VM_FAULT_CLASSIFY */