2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
27 * Mach Operating System
28 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
29 * All Rights Reserved.
31 * Permission to use, copy, modify and distribute this software and its
32 * documentation is hereby granted, provided that both the copyright
33 * notice and this permission notice appear in all copies of the
34 * software, derivative works or modified versions, and any portions
35 * thereof, and that both notices appear in supporting documentation.
37 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
38 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
39 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
41 * Carnegie Mellon requests users of this software to return to
43 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
44 * School of Computer Science
45 * Carnegie Mellon University
46 * Pittsburgh PA 15213-3890
48 * any improvements or extensions that they make and grant Carnegie Mellon
49 * the rights to redistribute these changes.
55 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * Page fault handling module.
60 #include <mach_cluster_stats.h>
61 #include <mach_pagemap.h>
64 #include <mach/mach_types.h>
65 #include <mach/kern_return.h>
66 #include <mach/message.h> /* for error codes */
67 #include <mach/vm_param.h>
68 #include <mach/vm_behavior.h>
69 #include <mach/memory_object.h>
70 /* For memory_object_data_{request,unlock} */
72 #include <kern/kern_types.h>
73 #include <kern/host_statistics.h>
74 #include <kern/counters.h>
75 #include <kern/task.h>
76 #include <kern/thread.h>
77 #include <kern/sched_prim.h>
78 #include <kern/host.h>
80 #include <kern/mach_param.h>
81 #include <kern/macro_help.h>
82 #include <kern/zalloc.h>
83 #include <kern/misc_protos.h>
85 #include <ppc/proc_reg.h>
87 #include <vm/vm_fault.h>
88 #include <vm/task_working_set.h>
89 #include <vm/vm_map.h>
90 #include <vm/vm_object.h>
91 #include <vm/vm_page.h>
92 #include <vm/vm_kern.h>
94 #include <vm/vm_pageout.h>
95 #include <vm/vm_protos.h>
97 #include <sys/kdebug.h>
99 #define VM_FAULT_CLASSIFY 0
100 #define VM_FAULT_STATIC_CONFIG 1
102 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
104 unsigned int vm_object_absent_max
= 50;
106 int vm_fault_debug
= 0;
108 #if !VM_FAULT_STATIC_CONFIG
109 boolean_t vm_fault_dirty_handling
= FALSE
;
110 boolean_t vm_fault_interruptible
= FALSE
;
111 boolean_t software_reference_bits
= TRUE
;
115 extern struct db_watchpoint
*db_watchpoint_list
;
116 #endif /* MACH_KDB */
119 /* Forward declarations of internal routines. */
120 extern kern_return_t
vm_fault_wire_fast(
123 vm_map_entry_t entry
,
125 vm_map_offset_t pmap_addr
);
127 extern void vm_fault_continue(void);
129 extern void vm_fault_copy_cleanup(
133 extern void vm_fault_copy_dst_cleanup(
136 #if VM_FAULT_CLASSIFY
137 extern void vm_fault_classify(vm_object_t object
,
138 vm_object_offset_t offset
,
139 vm_prot_t fault_type
);
141 extern void vm_fault_classify_init(void);
145 * Routine: vm_fault_init
147 * Initialize our private data structures.
155 * Routine: vm_fault_cleanup
157 * Clean up the result of vm_fault_page.
159 * The paging reference for "object" is released.
160 * "object" is unlocked.
161 * If "top_page" is not null, "top_page" is
162 * freed and the paging reference for the object
163 * containing it is released.
166 * "object" must be locked.
170 register vm_object_t object
,
171 register vm_page_t top_page
)
173 vm_object_paging_end(object
);
174 vm_object_unlock(object
);
176 if (top_page
!= VM_PAGE_NULL
) {
177 object
= top_page
->object
;
178 vm_object_lock(object
);
179 VM_PAGE_FREE(top_page
);
180 vm_object_paging_end(object
);
181 vm_object_unlock(object
);
185 #if MACH_CLUSTER_STATS
186 #define MAXCLUSTERPAGES 16
188 unsigned long pages_in_cluster
;
189 unsigned long pages_at_higher_offsets
;
190 unsigned long pages_at_lower_offsets
;
191 } cluster_stats_in
[MAXCLUSTERPAGES
];
192 #define CLUSTER_STAT(clause) clause
193 #define CLUSTER_STAT_HIGHER(x) \
194 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
195 #define CLUSTER_STAT_LOWER(x) \
196 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
197 #define CLUSTER_STAT_CLUSTER(x) \
198 ((cluster_stats_in[(x)].pages_in_cluster)++)
199 #else /* MACH_CLUSTER_STATS */
200 #define CLUSTER_STAT(clause)
201 #endif /* MACH_CLUSTER_STATS */
203 /* XXX - temporary */
204 boolean_t vm_allow_clustered_pagein
= FALSE
;
205 int vm_pagein_cluster_used
= 0;
207 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
210 boolean_t vm_page_deactivate_behind
= TRUE
;
212 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
214 int vm_default_ahead
= 0;
215 int vm_default_behind
= MAX_UPL_TRANSFER
;
218 * vm_page_deactivate_behind
220 * Determine if sequential access is in progress
221 * in accordance with the behavior specified. If
222 * so, compute a potential page to deactive and
225 * The object must be locked.
229 vm_fault_deactivate_behind(
231 vm_object_offset_t offset
,
232 vm_behavior_t behavior
)
237 dbgTrace(0xBEEF0018, (unsigned int) object
, (unsigned int) vm_fault_deactivate_behind
); /* (TEST/DEBUG) */
240 if (object
== kernel_object
) {
242 * Do not deactivate pages from the kernel object: they
243 * are not intended to become pageable.
249 case VM_BEHAVIOR_RANDOM
:
250 object
->sequential
= PAGE_SIZE_64
;
253 case VM_BEHAVIOR_SEQUENTIAL
:
255 object
->last_alloc
== offset
- PAGE_SIZE_64
) {
256 object
->sequential
+= PAGE_SIZE_64
;
257 m
= vm_page_lookup(object
, offset
- PAGE_SIZE_64
);
259 object
->sequential
= PAGE_SIZE_64
; /* reset */
263 case VM_BEHAVIOR_RSEQNTL
:
264 if (object
->last_alloc
&&
265 object
->last_alloc
== offset
+ PAGE_SIZE_64
) {
266 object
->sequential
+= PAGE_SIZE_64
;
267 m
= vm_page_lookup(object
, offset
+ PAGE_SIZE_64
);
269 object
->sequential
= PAGE_SIZE_64
; /* reset */
273 case VM_BEHAVIOR_DEFAULT
:
276 object
->last_alloc
== offset
- PAGE_SIZE_64
) {
277 vm_object_offset_t behind
= vm_default_behind
* PAGE_SIZE_64
;
279 object
->sequential
+= PAGE_SIZE_64
;
280 m
= (offset
>= behind
&&
281 object
->sequential
>= behind
) ?
282 vm_page_lookup(object
, offset
- behind
) :
284 } else if (object
->last_alloc
&&
285 object
->last_alloc
== offset
+ PAGE_SIZE_64
) {
286 vm_object_offset_t behind
= vm_default_behind
* PAGE_SIZE_64
;
288 object
->sequential
+= PAGE_SIZE_64
;
289 m
= (offset
< -behind
&&
290 object
->sequential
>= behind
) ?
291 vm_page_lookup(object
, offset
+ behind
) :
294 object
->sequential
= PAGE_SIZE_64
;
300 object
->last_alloc
= offset
;
304 vm_page_lock_queues();
305 vm_page_deactivate(m
);
306 vm_page_unlock_queues();
308 dbgTrace(0xBEEF0019, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
318 * Routine: vm_fault_page
320 * Find the resident page for the virtual memory
321 * specified by the given virtual memory object
323 * Additional arguments:
324 * The required permissions for the page is given
325 * in "fault_type". Desired permissions are included
326 * in "protection". The minimum and maximum valid offsets
327 * within the object for the relevant map entry are
328 * passed in "lo_offset" and "hi_offset" respectively and
329 * the expected page reference pattern is passed in "behavior".
330 * These three parameters are used to determine pagein cluster
333 * If the desired page is known to be resident (for
334 * example, because it was previously wired down), asserting
335 * the "unwiring" parameter will speed the search.
337 * If the operation can be interrupted (by thread_abort
338 * or thread_terminate), then the "interruptible"
339 * parameter should be asserted.
342 * The page containing the proper data is returned
346 * The source object must be locked and referenced,
347 * and must donate one paging reference. The reference
348 * is not affected. The paging reference and lock are
351 * If the call succeeds, the object in which "result_page"
352 * resides is left locked and holding a paging reference.
353 * If this is not the original object, a busy page in the
354 * original object is returned in "top_page", to prevent other
355 * callers from pursuing this same data, along with a paging
356 * reference for the original object. The "top_page" should
357 * be destroyed when this guarantee is no longer required.
358 * The "result_page" is also left busy. It is not removed
359 * from the pageout queues.
365 vm_object_t first_object
, /* Object to begin search */
366 vm_object_offset_t first_offset
, /* Offset into object */
367 vm_prot_t fault_type
, /* What access is requested */
368 boolean_t must_be_resident
,/* Must page be resident? */
369 int interruptible
, /* how may fault be interrupted? */
370 vm_map_offset_t lo_offset
, /* Map entry start */
371 vm_map_offset_t hi_offset
, /* Map entry end */
372 vm_behavior_t behavior
, /* Page reference behavior */
373 /* Modifies in place: */
374 vm_prot_t
*protection
, /* Protection for mapping */
376 vm_page_t
*result_page
, /* Page found, if successful */
377 vm_page_t
*top_page
, /* Page in top object, if
378 * not result_page. */
379 int *type_of_fault
, /* if non-null, fill in with type of fault
380 * COW, zero-fill, etc... returned in trace point */
381 /* More arguments: */
382 kern_return_t
*error_code
, /* code if page is in error */
383 boolean_t no_zero_fill
, /* don't zero fill absent pages */
384 boolean_t data_supply
, /* treat as data_supply if
385 * it is a write fault and a full
386 * page is provided */
388 __unused vm_map_offset_t vaddr
)
395 vm_object_offset_t offset
;
397 vm_object_t next_object
;
398 vm_object_t copy_object
;
399 boolean_t look_for_page
;
400 vm_prot_t access_required
= fault_type
;
401 vm_prot_t wants_copy_flag
;
402 vm_object_size_t length
;
403 vm_object_offset_t cluster_start
, cluster_end
;
404 CLUSTER_STAT(int pages_at_higher_offsets
;)
405 CLUSTER_STAT(int pages_at_lower_offsets
;)
406 kern_return_t wait_result
;
407 boolean_t interruptible_state
;
408 boolean_t bumped_pagein
= FALSE
;
413 * MACH page map - an optional optimization where a bit map is maintained
414 * by the VM subsystem for internal objects to indicate which pages of
415 * the object currently reside on backing store. This existence map
416 * duplicates information maintained by the vnode pager. It is
417 * created at the time of the first pageout against the object, i.e.
418 * at the same time pager for the object is created. The optimization
419 * is designed to eliminate pager interaction overhead, if it is
420 * 'known' that the page does not exist on backing store.
422 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
423 * either marked as paged out in the existence map for the object or no
424 * existence map exists for the object. LOOK_FOR() is one of the
425 * criteria in the decision to invoke the pager. It is also used as one
426 * of the criteria to terminate the scan for adjacent pages in a clustered
427 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
428 * permanent objects. Note also that if the pager for an internal object
429 * has not been created, the pager is not invoked regardless of the value
430 * of LOOK_FOR() and that clustered pagein scans are only done on an object
431 * for which a pager has been created.
433 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
434 * is marked as paged out in the existence map for the object. PAGED_OUT()
435 * PAGED_OUT() is used to determine if a page has already been pushed
436 * into a copy object in order to avoid a redundant page out operation.
438 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
439 != VM_EXTERNAL_STATE_ABSENT)
440 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
441 == VM_EXTERNAL_STATE_EXISTS)
442 #else /* MACH_PAGEMAP */
444 * If the MACH page map optimization is not enabled,
445 * LOOK_FOR() always evaluates to TRUE. The pager will always be
446 * invoked to resolve missing pages in an object, assuming the pager
447 * has been created for the object. In a clustered page operation, the
448 * absence of a page on backing backing store cannot be used to terminate
449 * a scan for adjacent pages since that information is available only in
450 * the pager. Hence pages that may not be paged out are potentially
451 * included in a clustered request. The vnode pager is coded to deal
452 * with any combination of absent/present pages in a clustered
453 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
454 * will always be invoked to push a dirty page into a copy object assuming
455 * a pager has been created. If the page has already been pushed, the
456 * pager will ingore the new request.
458 #define LOOK_FOR(o, f) TRUE
459 #define PAGED_OUT(o, f) FALSE
460 #endif /* MACH_PAGEMAP */
465 #define PREPARE_RELEASE_PAGE(m) \
467 vm_page_lock_queues(); \
470 #define DO_RELEASE_PAGE(m) \
472 PAGE_WAKEUP_DONE(m); \
473 if (!m->active && !m->inactive) \
474 vm_page_activate(m); \
475 vm_page_unlock_queues(); \
478 #define RELEASE_PAGE(m) \
480 PREPARE_RELEASE_PAGE(m); \
481 DO_RELEASE_PAGE(m); \
485 dbgTrace(0xBEEF0002, (unsigned int) first_object
, (unsigned int) first_offset
); /* (TEST/DEBUG) */
490 #if !VM_FAULT_STATIC_CONFIG
491 if (vm_fault_dirty_handling
494 * If there are watchpoints set, then
495 * we don't want to give away write permission
496 * on a read fault. Make the task write fault,
497 * so that the watchpoint code notices the access.
499 || db_watchpoint_list
500 #endif /* MACH_KDB */
503 * If we aren't asking for write permission,
504 * then don't give it away. We're using write
505 * faults to set the dirty bit.
507 if (!(fault_type
& VM_PROT_WRITE
))
508 *protection
&= ~VM_PROT_WRITE
;
511 if (!vm_fault_interruptible
)
512 interruptible
= THREAD_UNINT
;
513 #else /* STATIC_CONFIG */
516 * If there are watchpoints set, then
517 * we don't want to give away write permission
518 * on a read fault. Make the task write fault,
519 * so that the watchpoint code notices the access.
521 if (db_watchpoint_list
) {
523 * If we aren't asking for write permission,
524 * then don't give it away. We're using write
525 * faults to set the dirty bit.
527 if (!(fault_type
& VM_PROT_WRITE
))
528 *protection
&= ~VM_PROT_WRITE
;
531 #endif /* MACH_KDB */
532 #endif /* STATIC_CONFIG */
534 interruptible_state
= thread_interrupt_level(interruptible
);
537 * INVARIANTS (through entire routine):
539 * 1) At all times, we must either have the object
540 * lock or a busy page in some object to prevent
541 * some other thread from trying to bring in
544 * Note that we cannot hold any locks during the
545 * pager access or when waiting for memory, so
546 * we use a busy page then.
548 * Note also that we aren't as concerned about more than
549 * one thread attempting to memory_object_data_unlock
550 * the same page at once, so we don't hold the page
551 * as busy then, but do record the highest unlock
552 * value so far. [Unlock requests may also be delivered
555 * 2) To prevent another thread from racing us down the
556 * shadow chain and entering a new page in the top
557 * object before we do, we must keep a busy page in
558 * the top object while following the shadow chain.
560 * 3) We must increment paging_in_progress on any object
561 * for which we have a busy page
563 * 4) We leave busy pages on the pageout queues.
564 * If the pageout daemon comes across a busy page,
565 * it will remove the page from the pageout queues.
569 * Search for the page at object/offset.
572 object
= first_object
;
573 offset
= first_offset
;
574 first_m
= VM_PAGE_NULL
;
575 access_required
= fault_type
;
578 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
579 (integer_t
)object
, offset
, fault_type
, *protection
, 0);
582 * See whether this page is resident
587 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
589 if (!object
->alive
) {
590 vm_fault_cleanup(object
, first_m
);
591 thread_interrupt_level(interruptible_state
);
592 return(VM_FAULT_MEMORY_ERROR
);
594 m
= vm_page_lookup(object
, offset
);
596 dbgTrace(0xBEEF0004, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
598 if (m
!= VM_PAGE_NULL
) {
600 * If the page was pre-paged as part of a
601 * cluster, record the fact.
602 * If we were passed a valid pointer for
603 * "type_of_fault", than we came from
604 * vm_fault... we'll let it deal with
605 * this condition, since it
606 * needs to see m->clustered to correctly
607 * account the pageins.
609 if (type_of_fault
== NULL
&& m
->clustered
) {
610 vm_pagein_cluster_used
++;
611 m
->clustered
= FALSE
;
615 * If the page is being brought in,
616 * wait for it and then retry.
618 * A possible optimization: if the page
619 * is known to be resident, we can ignore
620 * pages that are absent (regardless of
621 * whether they're busy).
626 dbgTrace(0xBEEF0005, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
628 wait_result
= PAGE_SLEEP(object
, m
, interruptible
);
630 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
631 (integer_t
)object
, offset
,
633 counter(c_vm_fault_page_block_busy_kernel
++);
635 if (wait_result
!= THREAD_AWAKENED
) {
636 vm_fault_cleanup(object
, first_m
);
637 thread_interrupt_level(interruptible_state
);
638 if (wait_result
== THREAD_RESTART
)
640 return(VM_FAULT_RETRY
);
644 return(VM_FAULT_INTERRUPTED
);
653 * the user needs access to a page that we
654 * encrypted before paging it out.
655 * Decrypt the page now.
656 * Keep it busy to prevent anyone from
657 * accessing it during the decryption.
660 vm_page_decrypt(m
, 0);
661 assert(object
== m
->object
);
666 * Retry from the top, in case
667 * something changed while we were
672 ASSERT_PAGE_DECRYPTED(m
);
675 * If the page is in error, give up now.
680 dbgTrace(0xBEEF0006, (unsigned int) m
, (unsigned int) error_code
); /* (TEST/DEBUG) */
683 *error_code
= m
->page_error
;
685 vm_fault_cleanup(object
, first_m
);
686 thread_interrupt_level(interruptible_state
);
687 return(VM_FAULT_MEMORY_ERROR
);
691 * If the pager wants us to restart
692 * at the top of the chain,
693 * typically because it has moved the
694 * page to another pager, then do so.
699 dbgTrace(0xBEEF0007, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
702 vm_fault_cleanup(object
, first_m
);
703 thread_interrupt_level(interruptible_state
);
704 return(VM_FAULT_RETRY
);
708 * If the page isn't busy, but is absent,
709 * then it was deemed "unavailable".
714 * Remove the non-existent page (unless it's
715 * in the top object) and move on down to the
716 * next object (if there is one).
719 dbgTrace(0xBEEF0008, (unsigned int) m
, (unsigned int) object
->shadow
); /* (TEST/DEBUG) */
722 next_object
= object
->shadow
;
723 if (next_object
== VM_OBJECT_NULL
) {
726 assert(!must_be_resident
);
728 if (object
->shadow_severed
) {
731 thread_interrupt_level(interruptible_state
);
732 return VM_FAULT_MEMORY_ERROR
;
736 * Absent page at bottom of shadow
737 * chain; zero fill the page we left
738 * busy in the first object, and flush
739 * the absent page. But first we
740 * need to allocate a real page.
742 if (VM_PAGE_THROTTLED() ||
743 (real_m
= vm_page_grab())
747 thread_interrupt_level(
748 interruptible_state
);
750 VM_FAULT_MEMORY_SHORTAGE
);
754 * are we protecting the system from
755 * backing store exhaustion. If so
756 * sleep unless we are privileged.
759 if(vm_backing_store_low
) {
760 if(!(current_task()->priv_flags
761 & VM_BACKING_STORE_PRIV
)) {
762 assert_wait((event_t
)
763 &vm_backing_store_low
,
765 vm_fault_cleanup(object
,
767 thread_block(THREAD_CONTINUE_NULL
);
768 thread_interrupt_level(
769 interruptible_state
);
770 return(VM_FAULT_RETRY
);
776 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
777 (integer_t
)object
, offset
,
779 (integer_t
)first_object
, 0);
780 if (object
!= first_object
) {
782 vm_object_paging_end(object
);
783 vm_object_unlock(object
);
784 object
= first_object
;
785 offset
= first_offset
;
787 first_m
= VM_PAGE_NULL
;
788 vm_object_lock(object
);
792 assert(real_m
->busy
);
793 vm_page_insert(real_m
, object
, offset
);
797 * Drop the lock while zero filling
798 * page. Then break because this
799 * is the page we wanted. Checking
800 * the page lock is a waste of time;
801 * this page was either absent or
802 * newly allocated -- in both cases
803 * it can't be page locked by a pager.
808 vm_object_unlock(object
);
809 vm_page_zero_fill(m
);
810 vm_object_lock(object
);
813 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
814 VM_STAT(zero_fill_count
++);
816 if (bumped_pagein
== TRUE
) {
818 current_task()->pageins
--;
820 vm_page_lock_queues();
821 VM_PAGE_QUEUES_REMOVE(m
);
822 m
->page_ticket
= vm_page_ticket
;
824 assert(m
->object
!= kernel_object
);
825 assert(m
->pageq
.next
== NULL
&&
826 m
->pageq
.prev
== NULL
);
827 if(m
->object
->size
> 0x200000) {
829 /* depends on the queues lock */
831 queue_enter(&vm_page_queue_zf
,
832 m
, vm_page_t
, pageq
);
835 &vm_page_queue_inactive
,
836 m
, vm_page_t
, pageq
);
838 vm_page_ticket_roll
++;
839 if(vm_page_ticket_roll
==
840 VM_PAGE_TICKETS_IN_ROLL
) {
841 vm_page_ticket_roll
= 0;
843 VM_PAGE_TICKET_ROLL_IDS
)
849 vm_page_inactive_count
++;
850 vm_page_unlock_queues();
853 if (must_be_resident
) {
854 vm_object_paging_end(object
);
855 } else if (object
!= first_object
) {
856 vm_object_paging_end(object
);
862 vm_object_absent_release(object
);
865 vm_page_lock_queues();
866 VM_PAGE_QUEUES_REMOVE(m
);
867 vm_page_unlock_queues();
870 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
871 (integer_t
)object
, offset
,
872 (integer_t
)next_object
,
873 offset
+object
->shadow_offset
,0);
874 offset
+= object
->shadow_offset
;
875 hi_offset
+= object
->shadow_offset
;
876 lo_offset
+= object
->shadow_offset
;
877 access_required
= VM_PROT_READ
;
878 vm_object_lock(next_object
);
879 vm_object_unlock(object
);
880 object
= next_object
;
881 vm_object_paging_begin(object
);
887 && ((object
!= first_object
) ||
888 (object
->copy
!= VM_OBJECT_NULL
))
889 && (fault_type
& VM_PROT_WRITE
)) {
891 * This is a copy-on-write fault that will
892 * cause us to revoke access to this page, but
893 * this page is in the process of being cleaned
894 * in a clustered pageout. We must wait until
895 * the cleaning operation completes before
896 * revoking access to the original page,
897 * otherwise we might attempt to remove a
901 dbgTrace(0xBEEF0009, (unsigned int) m
, (unsigned int) offset
); /* (TEST/DEBUG) */
904 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
905 (integer_t
)object
, offset
,
907 /* take an extra ref so that object won't die */
908 assert(object
->ref_count
> 0);
910 vm_object_res_reference(object
);
911 vm_fault_cleanup(object
, first_m
);
912 counter(c_vm_fault_page_block_backoff_kernel
++);
913 vm_object_lock(object
);
914 assert(object
->ref_count
> 0);
915 m
= vm_page_lookup(object
, offset
);
916 if (m
!= VM_PAGE_NULL
&& m
->cleaning
) {
917 PAGE_ASSERT_WAIT(m
, interruptible
);
918 vm_object_unlock(object
);
919 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
920 vm_object_deallocate(object
);
923 vm_object_unlock(object
);
924 vm_object_deallocate(object
);
925 thread_interrupt_level(interruptible_state
);
926 return VM_FAULT_RETRY
;
931 * If the desired access to this page has
932 * been locked out, request that it be unlocked.
935 if (access_required
& m
->page_lock
) {
936 if ((access_required
& m
->unlock_request
) != access_required
) {
937 vm_prot_t new_unlock_request
;
941 dbgTrace(0xBEEF000A, (unsigned int) m
, (unsigned int) object
->pager_ready
); /* (TEST/DEBUG) */
943 if (!object
->pager_ready
) {
945 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
947 (integer_t
)object
, offset
,
949 /* take an extra ref */
950 assert(object
->ref_count
> 0);
952 vm_object_res_reference(object
);
953 vm_fault_cleanup(object
,
955 counter(c_vm_fault_page_block_backoff_kernel
++);
956 vm_object_lock(object
);
957 assert(object
->ref_count
> 0);
958 if (!object
->pager_ready
) {
959 wait_result
= vm_object_assert_wait(
961 VM_OBJECT_EVENT_PAGER_READY
,
963 vm_object_unlock(object
);
964 if (wait_result
== THREAD_WAITING
)
965 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
966 vm_object_deallocate(object
);
969 vm_object_unlock(object
);
970 vm_object_deallocate(object
);
971 thread_interrupt_level(interruptible_state
);
972 return VM_FAULT_RETRY
;
976 new_unlock_request
= m
->unlock_request
=
977 (access_required
| m
->unlock_request
);
978 vm_object_unlock(object
);
980 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
981 (integer_t
)object
, offset
,
982 (integer_t
)m
, new_unlock_request
, 0);
983 if ((rc
= memory_object_data_unlock(
985 offset
+ object
->paging_offset
,
990 printf("vm_fault: memory_object_data_unlock failed\n");
991 vm_object_lock(object
);
992 vm_fault_cleanup(object
, first_m
);
993 thread_interrupt_level(interruptible_state
);
994 return((rc
== MACH_SEND_INTERRUPTED
) ?
995 VM_FAULT_INTERRUPTED
:
996 VM_FAULT_MEMORY_ERROR
);
998 vm_object_lock(object
);
1003 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
1004 access_required
, (integer_t
)object
,
1005 offset
, (integer_t
)m
, 0);
1006 /* take an extra ref so object won't die */
1007 assert(object
->ref_count
> 0);
1008 object
->ref_count
++;
1009 vm_object_res_reference(object
);
1010 vm_fault_cleanup(object
, first_m
);
1011 counter(c_vm_fault_page_block_backoff_kernel
++);
1012 vm_object_lock(object
);
1013 assert(object
->ref_count
> 0);
1014 m
= vm_page_lookup(object
, offset
);
1015 if (m
!= VM_PAGE_NULL
&&
1016 (access_required
& m
->page_lock
) &&
1017 !((access_required
& m
->unlock_request
) != access_required
)) {
1018 PAGE_ASSERT_WAIT(m
, interruptible
);
1019 vm_object_unlock(object
);
1020 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1021 vm_object_deallocate(object
);
1024 vm_object_unlock(object
);
1025 vm_object_deallocate(object
);
1026 thread_interrupt_level(interruptible_state
);
1027 return VM_FAULT_RETRY
;
1031 * We mark the page busy and leave it on
1032 * the pageout queues. If the pageout
1033 * deamon comes across it, then it will
1038 dbgTrace(0xBEEF000B, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1041 #if !VM_FAULT_STATIC_CONFIG
1042 if (!software_reference_bits
) {
1043 vm_page_lock_queues();
1045 vm_stat
.reactivations
++;
1047 VM_PAGE_QUEUES_REMOVE(m
);
1048 vm_page_unlock_queues();
1052 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
1053 (integer_t
)object
, offset
, (integer_t
)m
, 0, 0);
1061 (object
->pager_created
) &&
1062 LOOK_FOR(object
, offset
) &&
1066 dbgTrace(0xBEEF000C, (unsigned int) look_for_page
, (unsigned int) object
); /* (TEST/DEBUG) */
1068 if ((look_for_page
|| (object
== first_object
))
1069 && !must_be_resident
1070 && !(object
->phys_contiguous
)) {
1072 * Allocate a new page for this object/offset
1076 m
= vm_page_grab_fictitious();
1078 dbgTrace(0xBEEF000D, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
1080 if (m
== VM_PAGE_NULL
) {
1081 vm_fault_cleanup(object
, first_m
);
1082 thread_interrupt_level(interruptible_state
);
1083 return(VM_FAULT_FICTITIOUS_SHORTAGE
);
1085 vm_page_insert(m
, object
, offset
);
1088 if ((look_for_page
&& !must_be_resident
)) {
1092 * If the memory manager is not ready, we
1093 * cannot make requests.
1095 if (!object
->pager_ready
) {
1097 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
1099 if(m
!= VM_PAGE_NULL
)
1102 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
1103 (integer_t
)object
, offset
, 0, 0, 0);
1104 /* take an extra ref so object won't die */
1105 assert(object
->ref_count
> 0);
1106 object
->ref_count
++;
1107 vm_object_res_reference(object
);
1108 vm_fault_cleanup(object
, first_m
);
1109 counter(c_vm_fault_page_block_backoff_kernel
++);
1110 vm_object_lock(object
);
1111 assert(object
->ref_count
> 0);
1112 if (!object
->pager_ready
) {
1113 wait_result
= vm_object_assert_wait(object
,
1114 VM_OBJECT_EVENT_PAGER_READY
,
1116 vm_object_unlock(object
);
1117 if (wait_result
== THREAD_WAITING
)
1118 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1119 vm_object_deallocate(object
);
1122 vm_object_unlock(object
);
1123 vm_object_deallocate(object
);
1124 thread_interrupt_level(interruptible_state
);
1125 return VM_FAULT_RETRY
;
1129 if(object
->phys_contiguous
) {
1130 if(m
!= VM_PAGE_NULL
) {
1136 if (object
->internal
) {
1138 * Requests to the default pager
1139 * must reserve a real page in advance,
1140 * because the pager's data-provided
1141 * won't block for pages. IMPORTANT:
1142 * this acts as a throttling mechanism
1143 * for data_requests to the default
1148 dbgTrace(0xBEEF000F, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1150 if (m
->fictitious
&& !vm_page_convert(m
)) {
1152 vm_fault_cleanup(object
, first_m
);
1153 thread_interrupt_level(interruptible_state
);
1154 return(VM_FAULT_MEMORY_SHORTAGE
);
1156 } else if (object
->absent_count
>
1157 vm_object_absent_max
) {
1159 * If there are too many outstanding page
1160 * requests pending on this object, we
1161 * wait for them to be resolved now.
1165 dbgTrace(0xBEEF0010, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1167 if(m
!= VM_PAGE_NULL
)
1169 /* take an extra ref so object won't die */
1170 assert(object
->ref_count
> 0);
1171 object
->ref_count
++;
1172 vm_object_res_reference(object
);
1173 vm_fault_cleanup(object
, first_m
);
1174 counter(c_vm_fault_page_block_backoff_kernel
++);
1175 vm_object_lock(object
);
1176 assert(object
->ref_count
> 0);
1177 if (object
->absent_count
> vm_object_absent_max
) {
1178 vm_object_absent_assert_wait(object
,
1180 vm_object_unlock(object
);
1181 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1182 vm_object_deallocate(object
);
1185 vm_object_unlock(object
);
1186 vm_object_deallocate(object
);
1187 thread_interrupt_level(interruptible_state
);
1188 return VM_FAULT_RETRY
;
1193 * Indicate that the page is waiting for data
1194 * from the memory manager.
1197 if(m
!= VM_PAGE_NULL
) {
1199 m
->list_req_pending
= TRUE
;
1202 object
->absent_count
++;
1207 cluster_start
= offset
;
1211 * lengthen the cluster by the pages in the working set
1214 (current_task()->dynamic_working_set
!= 0)) {
1215 cluster_end
= cluster_start
+ length
;
1216 /* tws values for start and end are just a
1217 * suggestions. Therefore, as long as
1218 * build_cluster does not use pointers or
1219 * take action based on values that
1220 * could be affected by re-entrance we
1221 * do not need to take the map lock.
1223 cluster_end
= offset
+ PAGE_SIZE_64
;
1225 current_task()->dynamic_working_set
,
1226 object
, &cluster_start
,
1227 &cluster_end
, 0x40000);
1228 length
= cluster_end
- cluster_start
;
1231 dbgTrace(0xBEEF0012, (unsigned int) object
, (unsigned int) 0); /* (TEST/DEBUG) */
1234 * We have a busy page, so we can
1235 * release the object lock.
1237 vm_object_unlock(object
);
1240 * Call the memory manager to retrieve the data.
1244 *type_of_fault
= ((int)length
<< 8) | DBG_PAGEIN_FAULT
;
1246 current_task()->pageins
++;
1247 bumped_pagein
= TRUE
;
1250 * If this object uses a copy_call strategy,
1251 * and we are interested in a copy of this object
1252 * (having gotten here only by following a
1253 * shadow chain), then tell the memory manager
1254 * via a flag added to the desired_access
1255 * parameter, so that it can detect a race
1256 * between our walking down the shadow chain
1257 * and its pushing pages up into a copy of
1258 * the object that it manages.
1261 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_CALL
&&
1262 object
!= first_object
) {
1263 wants_copy_flag
= VM_PROT_WANTS_COPY
;
1265 wants_copy_flag
= VM_PROT_NONE
;
1269 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1270 (integer_t
)object
, offset
, (integer_t
)m
,
1271 access_required
| wants_copy_flag
, 0);
1273 rc
= memory_object_data_request(object
->pager
,
1274 cluster_start
+ object
->paging_offset
,
1276 access_required
| wants_copy_flag
);
1280 dbgTrace(0xBEEF0013, (unsigned int) object
, (unsigned int) rc
); /* (TEST/DEBUG) */
1282 if (rc
!= KERN_SUCCESS
) {
1283 if (rc
!= MACH_SEND_INTERRUPTED
1285 printf("%s(0x%x, 0x%xll, 0x%xll, 0x%x) failed, rc=%d\n",
1286 "memory_object_data_request",
1288 cluster_start
+ object
->paging_offset
,
1289 length
, access_required
, rc
);
1291 * Don't want to leave a busy page around,
1292 * but the data request may have blocked,
1293 * so check if it's still there and busy.
1295 if(!object
->phys_contiguous
) {
1296 vm_object_lock(object
);
1297 for (; length
; length
-= PAGE_SIZE
,
1298 cluster_start
+= PAGE_SIZE_64
) {
1300 if ((p
= vm_page_lookup(object
,
1302 && p
->absent
&& p
->busy
1308 vm_fault_cleanup(object
, first_m
);
1309 thread_interrupt_level(interruptible_state
);
1310 return((rc
== MACH_SEND_INTERRUPTED
) ?
1311 VM_FAULT_INTERRUPTED
:
1312 VM_FAULT_MEMORY_ERROR
);
1315 vm_object_lock(object
);
1316 if ((interruptible
!= THREAD_UNINT
) &&
1317 (current_thread()->state
& TH_ABORT
)) {
1318 vm_fault_cleanup(object
, first_m
);
1319 thread_interrupt_level(interruptible_state
);
1320 return(VM_FAULT_INTERRUPTED
);
1322 if (m
== VM_PAGE_NULL
&&
1323 object
->phys_contiguous
) {
1325 * No page here means that the object we
1326 * initially looked up was "physically
1327 * contiguous" (i.e. device memory). However,
1328 * with Virtual VRAM, the object might not
1329 * be backed by that device memory anymore,
1330 * so we're done here only if the object is
1331 * still "phys_contiguous".
1332 * Otherwise, if the object is no longer
1333 * "phys_contiguous", we need to retry the
1334 * page fault against the object's new backing
1335 * store (different memory object).
1341 * Retry with same object/offset, since new data may
1342 * be in a different page (i.e., m is meaningless at
1349 * The only case in which we get here is if
1350 * object has no pager (or unwiring). If the pager doesn't
1351 * have the page this is handled in the m->absent case above
1352 * (and if you change things here you should look above).
1355 dbgTrace(0xBEEF0014, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1357 if (object
== first_object
)
1360 assert(m
== VM_PAGE_NULL
);
1363 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1364 (integer_t
)object
, offset
, (integer_t
)m
,
1365 (integer_t
)object
->shadow
, 0);
1367 * Move on to the next object. Lock the next
1368 * object before unlocking the current one.
1370 next_object
= object
->shadow
;
1371 if (next_object
== VM_OBJECT_NULL
) {
1372 assert(!must_be_resident
);
1374 * If there's no object left, fill the page
1375 * in the top object with zeros. But first we
1376 * need to allocate a real page.
1379 if (object
!= first_object
) {
1380 vm_object_paging_end(object
);
1381 vm_object_unlock(object
);
1383 object
= first_object
;
1384 offset
= first_offset
;
1385 vm_object_lock(object
);
1389 assert(m
->object
== object
);
1390 first_m
= VM_PAGE_NULL
;
1392 if(m
== VM_PAGE_NULL
) {
1394 if (m
== VM_PAGE_NULL
) {
1396 object
, VM_PAGE_NULL
);
1397 thread_interrupt_level(
1398 interruptible_state
);
1399 return(VM_FAULT_MEMORY_SHORTAGE
);
1405 if (object
->shadow_severed
) {
1407 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1408 thread_interrupt_level(interruptible_state
);
1409 return VM_FAULT_MEMORY_ERROR
;
1413 * are we protecting the system from
1414 * backing store exhaustion. If so
1415 * sleep unless we are privileged.
1418 if(vm_backing_store_low
) {
1419 if(!(current_task()->priv_flags
1420 & VM_BACKING_STORE_PRIV
)) {
1421 assert_wait((event_t
)
1422 &vm_backing_store_low
,
1425 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1426 thread_block(THREAD_CONTINUE_NULL
);
1427 thread_interrupt_level(
1428 interruptible_state
);
1429 return(VM_FAULT_RETRY
);
1433 if (VM_PAGE_THROTTLED() ||
1434 (m
->fictitious
&& !vm_page_convert(m
))) {
1436 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1437 thread_interrupt_level(interruptible_state
);
1438 return(VM_FAULT_MEMORY_SHORTAGE
);
1440 m
->no_isync
= FALSE
;
1442 if (!no_zero_fill
) {
1443 vm_object_unlock(object
);
1444 vm_page_zero_fill(m
);
1445 vm_object_lock(object
);
1448 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
1449 VM_STAT(zero_fill_count
++);
1451 if (bumped_pagein
== TRUE
) {
1453 current_task()->pageins
--;
1455 vm_page_lock_queues();
1456 VM_PAGE_QUEUES_REMOVE(m
);
1457 assert(!m
->laundry
);
1458 assert(m
->object
!= kernel_object
);
1459 assert(m
->pageq
.next
== NULL
&&
1460 m
->pageq
.prev
== NULL
);
1461 if(m
->object
->size
> 0x200000) {
1462 m
->zero_fill
= TRUE
;
1463 /* depends on the queues lock */
1465 queue_enter(&vm_page_queue_zf
,
1466 m
, vm_page_t
, pageq
);
1469 &vm_page_queue_inactive
,
1470 m
, vm_page_t
, pageq
);
1472 m
->page_ticket
= vm_page_ticket
;
1473 vm_page_ticket_roll
++;
1474 if(vm_page_ticket_roll
== VM_PAGE_TICKETS_IN_ROLL
) {
1475 vm_page_ticket_roll
= 0;
1476 if(vm_page_ticket
==
1477 VM_PAGE_TICKET_ROLL_IDS
)
1483 vm_page_inactive_count
++;
1484 vm_page_unlock_queues();
1486 pmap_clear_modify(m
->phys_page
);
1491 if ((object
!= first_object
) || must_be_resident
)
1492 vm_object_paging_end(object
);
1493 offset
+= object
->shadow_offset
;
1494 hi_offset
+= object
->shadow_offset
;
1495 lo_offset
+= object
->shadow_offset
;
1496 access_required
= VM_PROT_READ
;
1497 vm_object_lock(next_object
);
1498 vm_object_unlock(object
);
1499 object
= next_object
;
1500 vm_object_paging_begin(object
);
1505 * PAGE HAS BEEN FOUND.
1508 * busy, so that we can play with it;
1509 * not absent, so that nobody else will fill it;
1510 * possibly eligible for pageout;
1512 * The top-level page (first_m) is:
1513 * VM_PAGE_NULL if the page was found in the
1515 * busy, not absent, and ineligible for pageout.
1517 * The current object (object) is locked. A paging
1518 * reference is held for the current and top-level
1523 dbgTrace(0xBEEF0015, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1525 #if EXTRA_ASSERTIONS
1526 if(m
!= VM_PAGE_NULL
) {
1527 assert(m
->busy
&& !m
->absent
);
1528 assert((first_m
== VM_PAGE_NULL
) ||
1529 (first_m
->busy
&& !first_m
->absent
&&
1530 !first_m
->active
&& !first_m
->inactive
));
1532 #endif /* EXTRA_ASSERTIONS */
1536 * If we found a page, we must have decrypted it before we
1539 if (m
!= VM_PAGE_NULL
) {
1540 ASSERT_PAGE_DECRYPTED(m
);
1544 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1545 (integer_t
)object
, offset
, (integer_t
)m
,
1546 (integer_t
)first_object
, (integer_t
)first_m
);
1548 * If the page is being written, but isn't
1549 * already owned by the top-level object,
1550 * we have to copy it into a new page owned
1551 * by the top-level object.
1554 if ((object
!= first_object
) && (m
!= VM_PAGE_NULL
)) {
1556 * We only really need to copy if we
1561 dbgTrace(0xBEEF0016, (unsigned int) object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1563 if (fault_type
& VM_PROT_WRITE
) {
1566 assert(!must_be_resident
);
1569 * are we protecting the system from
1570 * backing store exhaustion. If so
1571 * sleep unless we are privileged.
1574 if(vm_backing_store_low
) {
1575 if(!(current_task()->priv_flags
1576 & VM_BACKING_STORE_PRIV
)) {
1577 assert_wait((event_t
)
1578 &vm_backing_store_low
,
1581 vm_fault_cleanup(object
, first_m
);
1582 thread_block(THREAD_CONTINUE_NULL
);
1583 thread_interrupt_level(
1584 interruptible_state
);
1585 return(VM_FAULT_RETRY
);
1590 * If we try to collapse first_object at this
1591 * point, we may deadlock when we try to get
1592 * the lock on an intermediate object (since we
1593 * have the bottom object locked). We can't
1594 * unlock the bottom object, because the page
1595 * we found may move (by collapse) if we do.
1597 * Instead, we first copy the page. Then, when
1598 * we have no more use for the bottom object,
1599 * we unlock it and try to collapse.
1601 * Note that we copy the page even if we didn't
1602 * need to... that's the breaks.
1606 * Allocate a page for the copy
1608 copy_m
= vm_page_grab();
1609 if (copy_m
== VM_PAGE_NULL
) {
1611 vm_fault_cleanup(object
, first_m
);
1612 thread_interrupt_level(interruptible_state
);
1613 return(VM_FAULT_MEMORY_SHORTAGE
);
1618 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1619 (integer_t
)object
, offset
,
1620 (integer_t
)m
, (integer_t
)copy_m
, 0);
1621 vm_page_copy(m
, copy_m
);
1624 * If another map is truly sharing this
1625 * page with us, we have to flush all
1626 * uses of the original page, since we
1627 * can't distinguish those which want the
1628 * original from those which need the
1631 * XXXO If we know that only one map has
1632 * access to this page, then we could
1633 * avoid the pmap_disconnect() call.
1636 vm_page_lock_queues();
1637 assert(!m
->cleaning
);
1638 pmap_disconnect(m
->phys_page
);
1639 vm_page_deactivate(m
);
1640 copy_m
->dirty
= TRUE
;
1642 * Setting reference here prevents this fault from
1643 * being counted as a (per-thread) reactivate as well
1644 * as a copy-on-write.
1646 first_m
->reference
= TRUE
;
1647 vm_page_unlock_queues();
1650 * We no longer need the old page or object.
1653 PAGE_WAKEUP_DONE(m
);
1654 vm_object_paging_end(object
);
1655 vm_object_unlock(object
);
1658 *type_of_fault
= DBG_COW_FAULT
;
1659 VM_STAT(cow_faults
++);
1660 current_task()->cow_faults
++;
1661 object
= first_object
;
1662 offset
= first_offset
;
1664 vm_object_lock(object
);
1665 VM_PAGE_FREE(first_m
);
1666 first_m
= VM_PAGE_NULL
;
1667 assert(copy_m
->busy
);
1668 vm_page_insert(copy_m
, object
, offset
);
1672 * Now that we've gotten the copy out of the
1673 * way, let's try to collapse the top object.
1674 * But we have to play ugly games with
1675 * paging_in_progress to do that...
1678 vm_object_paging_end(object
);
1679 vm_object_collapse(object
, offset
);
1680 vm_object_paging_begin(object
);
1684 *protection
&= (~VM_PROT_WRITE
);
1689 * Now check whether the page needs to be pushed into the
1690 * copy object. The use of asymmetric copy on write for
1691 * shared temporary objects means that we may do two copies to
1692 * satisfy the fault; one above to get the page from a
1693 * shadowed object, and one here to push it into the copy.
1696 while ((copy_object
= first_object
->copy
) != VM_OBJECT_NULL
&&
1697 (m
!= VM_PAGE_NULL
)) {
1698 vm_object_offset_t copy_offset
;
1702 dbgTrace(0xBEEF0017, (unsigned int) copy_object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1705 * If the page is being written, but hasn't been
1706 * copied to the copy-object, we have to copy it there.
1709 if ((fault_type
& VM_PROT_WRITE
) == 0) {
1710 *protection
&= ~VM_PROT_WRITE
;
1715 * If the page was guaranteed to be resident,
1716 * we must have already performed the copy.
1719 if (must_be_resident
)
1723 * Try to get the lock on the copy_object.
1725 if (!vm_object_lock_try(copy_object
)) {
1726 vm_object_unlock(object
);
1728 mutex_pause(); /* wait a bit */
1730 vm_object_lock(object
);
1735 * Make another reference to the copy-object,
1736 * to keep it from disappearing during the
1739 assert(copy_object
->ref_count
> 0);
1740 copy_object
->ref_count
++;
1741 VM_OBJ_RES_INCR(copy_object
);
1744 * Does the page exist in the copy?
1746 copy_offset
= first_offset
- copy_object
->shadow_offset
;
1747 if (copy_object
->size
<= copy_offset
)
1749 * Copy object doesn't cover this page -- do nothing.
1753 vm_page_lookup(copy_object
, copy_offset
)) != VM_PAGE_NULL
) {
1754 /* Page currently exists in the copy object */
1757 * If the page is being brought
1758 * in, wait for it and then retry.
1761 /* take an extra ref so object won't die */
1762 assert(copy_object
->ref_count
> 0);
1763 copy_object
->ref_count
++;
1764 vm_object_res_reference(copy_object
);
1765 vm_object_unlock(copy_object
);
1766 vm_fault_cleanup(object
, first_m
);
1767 counter(c_vm_fault_page_block_backoff_kernel
++);
1768 vm_object_lock(copy_object
);
1769 assert(copy_object
->ref_count
> 0);
1770 VM_OBJ_RES_DECR(copy_object
);
1771 copy_object
->ref_count
--;
1772 assert(copy_object
->ref_count
> 0);
1773 copy_m
= vm_page_lookup(copy_object
, copy_offset
);
1776 * it's OK if the "copy_m" page is encrypted,
1777 * because we're not moving it nor handling its
1780 if (copy_m
!= VM_PAGE_NULL
&& copy_m
->busy
) {
1781 PAGE_ASSERT_WAIT(copy_m
, interruptible
);
1782 vm_object_unlock(copy_object
);
1783 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1784 vm_object_deallocate(copy_object
);
1787 vm_object_unlock(copy_object
);
1788 vm_object_deallocate(copy_object
);
1789 thread_interrupt_level(interruptible_state
);
1790 return VM_FAULT_RETRY
;
1794 else if (!PAGED_OUT(copy_object
, copy_offset
)) {
1796 * If PAGED_OUT is TRUE, then the page used to exist
1797 * in the copy-object, and has already been paged out.
1798 * We don't need to repeat this. If PAGED_OUT is
1799 * FALSE, then either we don't know (!pager_created,
1800 * for example) or it hasn't been paged out.
1801 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1802 * We must copy the page to the copy object.
1806 * are we protecting the system from
1807 * backing store exhaustion. If so
1808 * sleep unless we are privileged.
1811 if(vm_backing_store_low
) {
1812 if(!(current_task()->priv_flags
1813 & VM_BACKING_STORE_PRIV
)) {
1814 assert_wait((event_t
)
1815 &vm_backing_store_low
,
1818 VM_OBJ_RES_DECR(copy_object
);
1819 copy_object
->ref_count
--;
1820 assert(copy_object
->ref_count
> 0);
1821 vm_object_unlock(copy_object
);
1822 vm_fault_cleanup(object
, first_m
);
1823 thread_block(THREAD_CONTINUE_NULL
);
1824 thread_interrupt_level(
1825 interruptible_state
);
1826 return(VM_FAULT_RETRY
);
1831 * Allocate a page for the copy
1833 copy_m
= vm_page_alloc(copy_object
, copy_offset
);
1834 if (copy_m
== VM_PAGE_NULL
) {
1836 VM_OBJ_RES_DECR(copy_object
);
1837 copy_object
->ref_count
--;
1838 assert(copy_object
->ref_count
> 0);
1839 vm_object_unlock(copy_object
);
1840 vm_fault_cleanup(object
, first_m
);
1841 thread_interrupt_level(interruptible_state
);
1842 return(VM_FAULT_MEMORY_SHORTAGE
);
1846 * Must copy page into copy-object.
1849 vm_page_copy(m
, copy_m
);
1852 * If the old page was in use by any users
1853 * of the copy-object, it must be removed
1854 * from all pmaps. (We can't know which
1858 vm_page_lock_queues();
1859 assert(!m
->cleaning
);
1860 pmap_disconnect(m
->phys_page
);
1861 copy_m
->dirty
= TRUE
;
1862 vm_page_unlock_queues();
1865 * If there's a pager, then immediately
1866 * page out this page, using the "initialize"
1867 * option. Else, we use the copy.
1872 ((!copy_object
->pager_created
) ||
1873 vm_external_state_get(
1874 copy_object
->existence_map
, copy_offset
)
1875 == VM_EXTERNAL_STATE_ABSENT
)
1877 (!copy_object
->pager_created
)
1880 vm_page_lock_queues();
1881 vm_page_activate(copy_m
);
1882 vm_page_unlock_queues();
1883 PAGE_WAKEUP_DONE(copy_m
);
1886 assert(copy_m
->busy
== TRUE
);
1889 * The page is already ready for pageout:
1890 * not on pageout queues and busy.
1891 * Unlock everything except the
1892 * copy_object itself.
1895 vm_object_unlock(object
);
1898 * Write the page to the copy-object,
1899 * flushing it from the kernel.
1902 vm_pageout_initialize_page(copy_m
);
1905 * Since the pageout may have
1906 * temporarily dropped the
1907 * copy_object's lock, we
1908 * check whether we'll have
1909 * to deallocate the hard way.
1912 if ((copy_object
->shadow
!= object
) ||
1913 (copy_object
->ref_count
== 1)) {
1914 vm_object_unlock(copy_object
);
1915 vm_object_deallocate(copy_object
);
1916 vm_object_lock(object
);
1921 * Pick back up the old object's
1922 * lock. [It is safe to do so,
1923 * since it must be deeper in the
1927 vm_object_lock(object
);
1931 * Because we're pushing a page upward
1932 * in the object tree, we must restart
1933 * any faults that are waiting here.
1934 * [Note that this is an expansion of
1935 * PAGE_WAKEUP that uses the THREAD_RESTART
1936 * wait result]. Can't turn off the page's
1937 * busy bit because we're not done with it.
1942 thread_wakeup_with_result((event_t
) m
,
1948 * The reference count on copy_object must be
1949 * at least 2: one for our extra reference,
1950 * and at least one from the outside world
1951 * (we checked that when we last locked
1954 copy_object
->ref_count
--;
1955 assert(copy_object
->ref_count
> 0);
1956 VM_OBJ_RES_DECR(copy_object
);
1957 vm_object_unlock(copy_object
);
1963 *top_page
= first_m
;
1966 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1967 (integer_t
)object
, offset
, (integer_t
)m
, (integer_t
)first_m
, 0);
1969 * If the page can be written, assume that it will be.
1970 * [Earlier, we restrict the permission to allow write
1971 * access only if the fault so required, so we don't
1972 * mark read-only data as dirty.]
1976 if(m
!= VM_PAGE_NULL
) {
1977 #if !VM_FAULT_STATIC_CONFIG
1978 if (vm_fault_dirty_handling
&& (*protection
& VM_PROT_WRITE
))
1981 if (vm_page_deactivate_behind
)
1982 vm_fault_deactivate_behind(object
, offset
, behavior
);
1984 vm_object_unlock(object
);
1986 thread_interrupt_level(interruptible_state
);
1989 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS
, 0); /* (TEST/DEBUG) */
1991 return(VM_FAULT_SUCCESS
);
1995 vm_fault_cleanup(object
, first_m
);
1997 counter(c_vm_fault_page_block_backoff_kernel
++);
1998 thread_block(THREAD_CONTINUE_NULL
);
2002 thread_interrupt_level(interruptible_state
);
2003 if (wait_result
== THREAD_INTERRUPTED
)
2004 return VM_FAULT_INTERRUPTED
;
2005 return VM_FAULT_RETRY
;
2011 * Routine: vm_fault_tws_insert
2013 * Add fault information to the task working set.
2015 * We always insert the base object/offset pair
2016 * rather the actual object/offset.
2018 * Map and real_map locked.
2019 * Object locked and referenced.
2021 * TRUE if startup file should be written.
2022 * With object locked and still referenced.
2023 * But we may drop the object lock temporarily.
2026 vm_fault_tws_insert(
2029 vm_map_offset_t vaddr
,
2031 vm_object_offset_t offset
)
2033 tws_hash_line_t line
;
2036 boolean_t result
= FALSE
;
2038 /* Avoid possible map lock deadlock issues */
2039 if (map
== kernel_map
|| map
== kalloc_map
||
2040 real_map
== kernel_map
|| real_map
== kalloc_map
)
2043 task
= current_task();
2044 if (task
->dynamic_working_set
!= 0) {
2045 vm_object_t base_object
;
2046 vm_object_t base_shadow
;
2047 vm_object_offset_t base_offset
;
2048 base_object
= object
;
2049 base_offset
= offset
;
2050 while ((base_shadow
= base_object
->shadow
)) {
2051 vm_object_lock(base_shadow
);
2052 vm_object_unlock(base_object
);
2054 base_object
->shadow_offset
;
2055 base_object
= base_shadow
;
2058 task
->dynamic_working_set
,
2059 base_offset
, base_object
,
2061 if (kr
== KERN_OPERATION_TIMED_OUT
){
2063 if (base_object
!= object
) {
2064 vm_object_unlock(base_object
);
2065 vm_object_lock(object
);
2067 } else if (kr
!= KERN_SUCCESS
) {
2068 if(base_object
!= object
)
2069 vm_object_reference_locked(base_object
);
2071 task
->dynamic_working_set
,
2072 base_offset
, base_object
,
2074 if(base_object
!= object
) {
2075 vm_object_unlock(base_object
);
2076 vm_object_deallocate(base_object
);
2078 if(kr
== KERN_NO_SPACE
) {
2079 if (base_object
== object
)
2080 vm_object_unlock(object
);
2081 tws_expand_working_set(
2082 task
->dynamic_working_set
,
2083 TWS_HASH_LINE_COUNT
,
2085 if (base_object
== object
)
2086 vm_object_lock(object
);
2087 } else if(kr
== KERN_OPERATION_TIMED_OUT
) {
2090 if(base_object
!= object
)
2091 vm_object_lock(object
);
2092 } else if (base_object
!= object
) {
2093 vm_object_unlock(base_object
);
2094 vm_object_lock(object
);
2103 * Handle page faults, including pseudo-faults
2104 * used to change the wiring status of pages.
2106 * Explicit continuations have been removed.
2108 * vm_fault and vm_fault_page save mucho state
2109 * in the moral equivalent of a closure. The state
2110 * structure is allocated when first entering vm_fault
2111 * and deallocated when leaving vm_fault.
2114 extern int _map_enter_debug
;
2119 vm_map_offset_t vaddr
,
2120 vm_prot_t fault_type
,
2121 boolean_t change_wiring
,
2124 vm_map_offset_t caller_pmap_addr
)
2126 vm_map_version_t version
; /* Map version for verificiation */
2127 boolean_t wired
; /* Should mapping be wired down? */
2128 vm_object_t object
; /* Top-level object */
2129 vm_object_offset_t offset
; /* Top-level offset */
2130 vm_prot_t prot
; /* Protection for mapping */
2131 vm_behavior_t behavior
; /* Expected paging behavior */
2132 vm_map_offset_t lo_offset
, hi_offset
;
2133 vm_object_t old_copy_object
; /* Saved copy object */
2134 vm_page_t result_page
; /* Result of vm_fault_page */
2135 vm_page_t top_page
; /* Placeholder page */
2139 vm_page_t m
; /* Fast access to result_page */
2140 kern_return_t error_code
= 0; /* page error reasons */
2142 vm_object_t cur_object
;
2144 vm_object_offset_t cur_offset
;
2146 vm_object_t new_object
;
2148 vm_map_t real_map
= map
;
2149 vm_map_t original_map
= map
;
2151 boolean_t interruptible_state
;
2152 unsigned int cache_attr
;
2153 int write_startup_file
= 0;
2154 boolean_t need_activation
;
2155 vm_prot_t full_fault_type
;
2157 if (get_preemption_level() != 0)
2158 return (KERN_FAILURE
);
2160 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_START
,
2167 /* at present we do not fully check for execute permission */
2168 /* we generally treat it is read except in certain device */
2169 /* memory settings */
2170 full_fault_type
= fault_type
;
2171 if(fault_type
& VM_PROT_EXECUTE
) {
2172 fault_type
&= ~VM_PROT_EXECUTE
;
2173 fault_type
|= VM_PROT_READ
;
2176 interruptible_state
= thread_interrupt_level(interruptible
);
2179 * assume we will hit a page in the cache
2180 * otherwise, explicitly override with
2181 * the real fault type once we determine it
2183 type_of_fault
= DBG_CACHE_HIT_FAULT
;
2186 current_task()->faults
++;
2191 * Find the backing store object and offset into
2192 * it to begin the search.
2195 vm_map_lock_read(map
);
2196 kr
= vm_map_lookup_locked(&map
, vaddr
, fault_type
, &version
,
2199 &behavior
, &lo_offset
, &hi_offset
, &real_map
);
2201 //if (_map_enter_debug)printf("vm_map_lookup_locked(map=0x%x, addr=0x%llx, prot=%d wired=%d) = %d\n", map, vaddr, prot, wired, kr);
2203 pmap
= real_map
->pmap
;
2205 if (kr
!= KERN_SUCCESS
) {
2206 vm_map_unlock_read(map
);
2211 * If the page is wired, we must fault for the current protection
2212 * value, to avoid further faults.
2216 fault_type
= prot
| VM_PROT_WRITE
;
2218 #if VM_FAULT_CLASSIFY
2220 * Temporary data gathering code
2222 vm_fault_classify(object
, offset
, fault_type
);
2225 * Fast fault code. The basic idea is to do as much as
2226 * possible while holding the map lock and object locks.
2227 * Busy pages are not used until the object lock has to
2228 * be dropped to do something (copy, zero fill, pmap enter).
2229 * Similarly, paging references aren't acquired until that
2230 * point, and object references aren't used.
2232 * If we can figure out what to do
2233 * (zero fill, copy on write, pmap enter) while holding
2234 * the locks, then it gets done. Otherwise, we give up,
2235 * and use the original fault path (which doesn't hold
2236 * the map lock, and relies on busy pages).
2237 * The give up cases include:
2238 * - Have to talk to pager.
2239 * - Page is busy, absent or in error.
2240 * - Pager has locked out desired access.
2241 * - Fault needs to be restarted.
2242 * - Have to push page into copy object.
2244 * The code is an infinite loop that moves one level down
2245 * the shadow chain each time. cur_object and cur_offset
2246 * refer to the current object being examined. object and offset
2247 * are the original object from the map. The loop is at the
2248 * top level if and only if object and cur_object are the same.
2250 * Invariants: Map lock is held throughout. Lock is held on
2251 * original object and cur_object (if different) when
2252 * continuing or exiting loop.
2258 * If this page is to be inserted in a copy delay object
2259 * for writing, and if the object has a copy, then the
2260 * copy delay strategy is implemented in the slow fault page.
2262 if (object
->copy_strategy
!= MEMORY_OBJECT_COPY_DELAY
||
2263 object
->copy
== VM_OBJECT_NULL
||
2264 (fault_type
& VM_PROT_WRITE
) == 0) {
2265 cur_object
= object
;
2266 cur_offset
= offset
;
2269 m
= vm_page_lookup(cur_object
, cur_offset
);
2270 if (m
!= VM_PAGE_NULL
) {
2272 wait_result_t result
;
2274 if (object
!= cur_object
)
2275 vm_object_unlock(object
);
2277 vm_map_unlock_read(map
);
2278 if (real_map
!= map
)
2279 vm_map_unlock(real_map
);
2281 #if !VM_FAULT_STATIC_CONFIG
2282 if (!vm_fault_interruptible
)
2283 interruptible
= THREAD_UNINT
;
2285 result
= PAGE_ASSERT_WAIT(m
, interruptible
);
2287 vm_object_unlock(cur_object
);
2289 if (result
== THREAD_WAITING
) {
2290 result
= thread_block(THREAD_CONTINUE_NULL
);
2292 counter(c_vm_fault_page_block_busy_kernel
++);
2294 if (result
== THREAD_AWAKENED
|| result
== THREAD_RESTART
)
2300 if (m
->unusual
&& (m
->error
|| m
->restart
|| m
->private
2301 || m
->absent
|| (fault_type
& m
->page_lock
))) {
2304 * Unusual case. Give up.
2312 * We've soft-faulted (because it's not in the page
2313 * table) on an encrypted page.
2314 * Keep the page "busy" so that noone messes with
2315 * it during the decryption.
2316 * Release the extra locks we're holding, keep only
2317 * the page's VM object lock.
2320 if (object
!= cur_object
) {
2321 vm_object_unlock(object
);
2323 vm_map_unlock_read(map
);
2324 if (real_map
!= map
)
2325 vm_map_unlock(real_map
);
2327 vm_page_decrypt(m
, 0);
2330 PAGE_WAKEUP_DONE(m
);
2331 vm_object_unlock(m
->object
);
2334 * Retry from the top, in case anything
2335 * changed while we were decrypting...
2339 ASSERT_PAGE_DECRYPTED(m
);
2342 * Two cases of map in faults:
2343 * - At top level w/o copy object.
2344 * - Read fault anywhere.
2345 * --> must disallow write.
2348 if (object
== cur_object
&&
2349 object
->copy
== VM_OBJECT_NULL
)
2350 goto FastMapInFault
;
2352 if ((fault_type
& VM_PROT_WRITE
) == 0) {
2353 boolean_t sequential
;
2355 prot
&= ~VM_PROT_WRITE
;
2358 * Set up to map the page ...
2359 * mark the page busy, drop
2360 * locks and take a paging reference
2361 * on the object with the page.
2364 if (object
!= cur_object
) {
2365 vm_object_unlock(object
);
2366 object
= cur_object
;
2371 vm_object_paging_begin(object
);
2375 * Check a couple of global reasons to
2376 * be conservative about write access.
2377 * Then do the pmap_enter.
2379 #if !VM_FAULT_STATIC_CONFIG
2380 if (vm_fault_dirty_handling
2382 || db_watchpoint_list
2384 && (fault_type
& VM_PROT_WRITE
) == 0)
2385 prot
&= ~VM_PROT_WRITE
;
2386 #else /* STATIC_CONFIG */
2388 if (db_watchpoint_list
2389 && (fault_type
& VM_PROT_WRITE
) == 0)
2390 prot
&= ~VM_PROT_WRITE
;
2391 #endif /* MACH_KDB */
2392 #endif /* STATIC_CONFIG */
2393 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
2396 need_activation
= FALSE
;
2398 if (m
->no_isync
== TRUE
) {
2399 m
->no_isync
= FALSE
;
2400 pmap_sync_page_data_phys(m
->phys_page
);
2402 if ((type_of_fault
== DBG_CACHE_HIT_FAULT
) && m
->clustered
) {
2404 * found it in the cache, but this
2405 * is the first fault-in of the page (no_isync == TRUE)
2406 * so it must have come in as part of
2407 * a cluster... account 1 pagein against it
2410 current_task()->pageins
++;
2411 type_of_fault
= DBG_PAGEIN_FAULT
;
2415 need_activation
= TRUE
;
2417 } else if (cache_attr
!= VM_WIMG_DEFAULT
) {
2418 pmap_sync_page_attributes_phys(m
->phys_page
);
2422 PMAP_ENTER(caller_pmap
,
2423 caller_pmap_addr
, m
,
2424 prot
, cache_attr
, wired
);
2426 PMAP_ENTER(pmap
, vaddr
, m
,
2427 prot
, cache_attr
, wired
);
2431 * Hold queues lock to manipulate
2432 * the page queues. Change wiring
2433 * case is obvious. In soft ref bits
2434 * case activate page only if it fell
2435 * off paging queues, otherwise just
2436 * activate it if it's inactive.
2438 * NOTE: original vm_fault code will
2439 * move active page to back of active
2440 * queue. This code doesn't.
2442 vm_page_lock_queues();
2445 vm_pagein_cluster_used
++;
2446 m
->clustered
= FALSE
;
2448 m
->reference
= TRUE
;
2450 if (change_wiring
) {
2456 #if VM_FAULT_STATIC_CONFIG
2458 if ((!m
->active
&& !m
->inactive
) || ((need_activation
== TRUE
) && !m
->active
))
2459 vm_page_activate(m
);
2462 else if (software_reference_bits
) {
2463 if (!m
->active
&& !m
->inactive
)
2464 vm_page_activate(m
);
2466 else if (!m
->active
) {
2467 vm_page_activate(m
);
2470 vm_page_unlock_queues();
2473 * That's it, clean up and return.
2475 PAGE_WAKEUP_DONE(m
);
2477 sequential
= (sequential
&& vm_page_deactivate_behind
) ?
2478 vm_fault_deactivate_behind(object
, cur_offset
, behavior
) :
2482 * Add non-sequential pages to the working set.
2483 * The sequential pages will be brought in through
2484 * normal clustering behavior.
2486 if (!sequential
&& !object
->private) {
2487 write_startup_file
=
2488 vm_fault_tws_insert(map
, real_map
, vaddr
,
2489 object
, cur_offset
);
2492 vm_object_paging_end(object
);
2493 vm_object_unlock(object
);
2495 vm_map_unlock_read(map
);
2497 vm_map_unlock(real_map
);
2499 if(write_startup_file
)
2500 tws_send_startup_info(current_task());
2502 thread_interrupt_level(interruptible_state
);
2505 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
2507 type_of_fault
& 0xff,
2512 return KERN_SUCCESS
;
2516 * Copy on write fault. If objects match, then
2517 * object->copy must not be NULL (else control
2518 * would be in previous code block), and we
2519 * have a potential push into the copy object
2520 * with which we won't cope here.
2523 if (cur_object
== object
)
2526 * This is now a shadow based copy on write
2527 * fault -- it requires a copy up the shadow
2530 * Allocate a page in the original top level
2531 * object. Give up if allocate fails. Also
2532 * need to remember current page, as it's the
2533 * source of the copy.
2537 if (m
== VM_PAGE_NULL
) {
2541 * Now do the copy. Mark the source busy
2542 * and take out paging references on both
2545 * NOTE: This code holds the map lock across
2550 vm_page_copy(cur_m
, m
);
2551 vm_page_insert(m
, object
, offset
);
2553 vm_object_paging_begin(cur_object
);
2554 vm_object_paging_begin(object
);
2556 type_of_fault
= DBG_COW_FAULT
;
2557 VM_STAT(cow_faults
++);
2558 current_task()->cow_faults
++;
2561 * Now cope with the source page and object
2562 * If the top object has a ref count of 1
2563 * then no other map can access it, and hence
2564 * it's not necessary to do the pmap_disconnect.
2567 vm_page_lock_queues();
2568 vm_page_deactivate(cur_m
);
2570 pmap_disconnect(cur_m
->phys_page
);
2571 vm_page_unlock_queues();
2573 PAGE_WAKEUP_DONE(cur_m
);
2574 vm_object_paging_end(cur_object
);
2575 vm_object_unlock(cur_object
);
2578 * Slight hack to call vm_object collapse
2579 * and then reuse common map in code.
2580 * note that the object lock was taken above.
2583 vm_object_paging_end(object
);
2584 vm_object_collapse(object
, offset
);
2585 vm_object_paging_begin(object
);
2592 * No page at cur_object, cur_offset
2595 if (cur_object
->pager_created
) {
2598 * Have to talk to the pager. Give up.
2604 if (cur_object
->shadow
== VM_OBJECT_NULL
) {
2606 if (cur_object
->shadow_severed
) {
2607 vm_object_paging_end(object
);
2608 vm_object_unlock(object
);
2609 vm_map_unlock_read(map
);
2611 vm_map_unlock(real_map
);
2613 if(write_startup_file
)
2614 tws_send_startup_info(
2617 thread_interrupt_level(interruptible_state
);
2619 return KERN_MEMORY_ERROR
;
2623 * Zero fill fault. Page gets
2624 * filled in top object. Insert
2625 * page, then drop any lower lock.
2626 * Give up if no page.
2628 if (VM_PAGE_THROTTLED()) {
2633 * are we protecting the system from
2634 * backing store exhaustion. If so
2635 * sleep unless we are privileged.
2637 if(vm_backing_store_low
) {
2638 if(!(current_task()->priv_flags
2639 & VM_BACKING_STORE_PRIV
))
2642 m
= vm_page_alloc(object
, offset
);
2643 if (m
== VM_PAGE_NULL
) {
2647 * This is a zero-fill or initial fill
2648 * page fault. As such, we consider it
2649 * undefined with respect to instruction
2650 * execution. i.e. it is the responsibility
2651 * of higher layers to call for an instruction
2652 * sync after changing the contents and before
2653 * sending a program into this area. We
2654 * choose this approach for performance
2657 m
->no_isync
= FALSE
;
2659 if (cur_object
!= object
)
2660 vm_object_unlock(cur_object
);
2662 vm_object_paging_begin(object
);
2663 vm_object_unlock(object
);
2666 * Now zero fill page and map it.
2667 * the page is probably going to
2668 * be written soon, so don't bother
2669 * to clear the modified bit
2671 * NOTE: This code holds the map
2672 * lock across the zero fill.
2675 if (!map
->no_zero_fill
) {
2676 vm_page_zero_fill(m
);
2677 type_of_fault
= DBG_ZERO_FILL_FAULT
;
2678 VM_STAT(zero_fill_count
++);
2680 vm_page_lock_queues();
2681 VM_PAGE_QUEUES_REMOVE(m
);
2683 m
->page_ticket
= vm_page_ticket
;
2684 assert(!m
->laundry
);
2685 assert(m
->object
!= kernel_object
);
2686 assert(m
->pageq
.next
== NULL
&&
2687 m
->pageq
.prev
== NULL
);
2688 if(m
->object
->size
> 0x200000) {
2689 m
->zero_fill
= TRUE
;
2690 /* depends on the queues lock */
2692 queue_enter(&vm_page_queue_zf
,
2693 m
, vm_page_t
, pageq
);
2696 &vm_page_queue_inactive
,
2697 m
, vm_page_t
, pageq
);
2699 vm_page_ticket_roll
++;
2700 if(vm_page_ticket_roll
==
2701 VM_PAGE_TICKETS_IN_ROLL
) {
2702 vm_page_ticket_roll
= 0;
2703 if(vm_page_ticket
==
2704 VM_PAGE_TICKET_ROLL_IDS
)
2711 vm_page_inactive_count
++;
2712 vm_page_unlock_queues();
2713 vm_object_lock(object
);
2719 * On to the next level
2722 cur_offset
+= cur_object
->shadow_offset
;
2723 new_object
= cur_object
->shadow
;
2724 vm_object_lock(new_object
);
2725 if (cur_object
!= object
)
2726 vm_object_unlock(cur_object
);
2727 cur_object
= new_object
;
2734 * Cleanup from fast fault failure. Drop any object
2735 * lock other than original and drop map lock.
2738 if (object
!= cur_object
)
2739 vm_object_unlock(cur_object
);
2741 vm_map_unlock_read(map
);
2744 vm_map_unlock(real_map
);
2747 * Make a reference to this object to
2748 * prevent its disposal while we are messing with
2749 * it. Once we have the reference, the map is free
2750 * to be diddled. Since objects reference their
2751 * shadows (and copies), they will stay around as well.
2754 assert(object
->ref_count
> 0);
2755 object
->ref_count
++;
2756 vm_object_res_reference(object
);
2757 vm_object_paging_begin(object
);
2759 XPR(XPR_VM_FAULT
,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2761 if (!object
->private) {
2762 write_startup_file
=
2763 vm_fault_tws_insert(map
, real_map
, vaddr
, object
, offset
);
2766 kr
= vm_fault_page(object
, offset
, fault_type
,
2767 (change_wiring
&& !wired
),
2769 lo_offset
, hi_offset
, behavior
,
2770 &prot
, &result_page
, &top_page
,
2772 &error_code
, map
->no_zero_fill
, FALSE
, map
, vaddr
);
2775 * If we didn't succeed, lose the object reference immediately.
2778 if (kr
!= VM_FAULT_SUCCESS
)
2779 vm_object_deallocate(object
);
2782 * See why we failed, and take corrective action.
2786 case VM_FAULT_SUCCESS
:
2788 case VM_FAULT_MEMORY_SHORTAGE
:
2789 if (vm_page_wait((change_wiring
) ?
2794 case VM_FAULT_INTERRUPTED
:
2797 case VM_FAULT_RETRY
:
2799 case VM_FAULT_FICTITIOUS_SHORTAGE
:
2800 vm_page_more_fictitious();
2802 case VM_FAULT_MEMORY_ERROR
:
2806 kr
= KERN_MEMORY_ERROR
;
2812 if(m
!= VM_PAGE_NULL
) {
2813 assert((change_wiring
&& !wired
) ?
2814 (top_page
== VM_PAGE_NULL
) :
2815 ((top_page
== VM_PAGE_NULL
) == (m
->object
== object
)));
2819 * How to clean up the result of vm_fault_page. This
2820 * happens whether the mapping is entered or not.
2823 #define UNLOCK_AND_DEALLOCATE \
2825 vm_fault_cleanup(m->object, top_page); \
2826 vm_object_deallocate(object); \
2830 * What to do with the resulting page from vm_fault_page
2831 * if it doesn't get entered into the physical map:
2834 #define RELEASE_PAGE(m) \
2836 PAGE_WAKEUP_DONE(m); \
2837 vm_page_lock_queues(); \
2838 if (!m->active && !m->inactive) \
2839 vm_page_activate(m); \
2840 vm_page_unlock_queues(); \
2844 * We must verify that the maps have not changed
2845 * since our last lookup.
2848 if(m
!= VM_PAGE_NULL
) {
2849 old_copy_object
= m
->object
->copy
;
2850 vm_object_unlock(m
->object
);
2852 old_copy_object
= VM_OBJECT_NULL
;
2854 if ((map
!= original_map
) || !vm_map_verify(map
, &version
)) {
2855 vm_object_t retry_object
;
2856 vm_object_offset_t retry_offset
;
2857 vm_prot_t retry_prot
;
2860 * To avoid trying to write_lock the map while another
2861 * thread has it read_locked (in vm_map_pageable), we
2862 * do not try for write permission. If the page is
2863 * still writable, we will get write permission. If it
2864 * is not, or has been marked needs_copy, we enter the
2865 * mapping without write permission, and will merely
2866 * take another fault.
2869 vm_map_lock_read(map
);
2870 kr
= vm_map_lookup_locked(&map
, vaddr
,
2871 fault_type
& ~VM_PROT_WRITE
, &version
,
2872 &retry_object
, &retry_offset
, &retry_prot
,
2873 &wired
, &behavior
, &lo_offset
, &hi_offset
,
2875 pmap
= real_map
->pmap
;
2877 if (kr
!= KERN_SUCCESS
) {
2878 vm_map_unlock_read(map
);
2879 if(m
!= VM_PAGE_NULL
) {
2880 vm_object_lock(m
->object
);
2882 UNLOCK_AND_DEALLOCATE
;
2884 vm_object_deallocate(object
);
2889 vm_object_unlock(retry_object
);
2890 if(m
!= VM_PAGE_NULL
) {
2891 vm_object_lock(m
->object
);
2893 vm_object_lock(object
);
2896 if ((retry_object
!= object
) ||
2897 (retry_offset
!= offset
)) {
2898 vm_map_unlock_read(map
);
2900 vm_map_unlock(real_map
);
2901 if(m
!= VM_PAGE_NULL
) {
2903 UNLOCK_AND_DEALLOCATE
;
2905 vm_object_deallocate(object
);
2911 * Check whether the protection has changed or the object
2912 * has been copied while we left the map unlocked.
2915 if(m
!= VM_PAGE_NULL
) {
2916 vm_object_unlock(m
->object
);
2918 vm_object_unlock(object
);
2921 if(m
!= VM_PAGE_NULL
) {
2922 vm_object_lock(m
->object
);
2924 vm_object_lock(object
);
2928 * If the copy object changed while the top-level object
2929 * was unlocked, then we must take away write permission.
2932 if(m
!= VM_PAGE_NULL
) {
2933 if (m
->object
->copy
!= old_copy_object
)
2934 prot
&= ~VM_PROT_WRITE
;
2938 * If we want to wire down this page, but no longer have
2939 * adequate permissions, we must start all over.
2942 if (wired
&& (fault_type
!= (prot
|VM_PROT_WRITE
))) {
2943 vm_map_verify_done(map
, &version
);
2945 vm_map_unlock(real_map
);
2946 if(m
!= VM_PAGE_NULL
) {
2948 UNLOCK_AND_DEALLOCATE
;
2950 vm_object_deallocate(object
);
2956 * Put this page into the physical map.
2957 * We had to do the unlock above because pmap_enter
2958 * may cause other faults. The page may be on
2959 * the pageout queues. If the pageout daemon comes
2960 * across the page, it will remove it from the queues.
2962 need_activation
= FALSE
;
2964 if (m
!= VM_PAGE_NULL
) {
2965 if (m
->no_isync
== TRUE
) {
2966 pmap_sync_page_data_phys(m
->phys_page
);
2968 if ((type_of_fault
== DBG_CACHE_HIT_FAULT
) && m
->clustered
) {
2970 * found it in the cache, but this
2971 * is the first fault-in of the page (no_isync == TRUE)
2972 * so it must have come in as part of
2973 * a cluster... account 1 pagein against it
2976 current_task()->pageins
++;
2978 type_of_fault
= DBG_PAGEIN_FAULT
;
2981 need_activation
= TRUE
;
2983 m
->no_isync
= FALSE
;
2985 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
2988 PMAP_ENTER(caller_pmap
,
2989 caller_pmap_addr
, m
,
2990 prot
, cache_attr
, wired
);
2992 PMAP_ENTER(pmap
, vaddr
, m
,
2993 prot
, cache_attr
, wired
);
2997 * Add working set information for private objects here.
2999 if (m
->object
->private) {
3000 write_startup_file
=
3001 vm_fault_tws_insert(map
, real_map
, vaddr
,
3002 m
->object
, m
->offset
);
3007 vm_map_entry_t entry
;
3008 vm_map_offset_t laddr
;
3009 vm_map_offset_t ldelta
, hdelta
;
3012 * do a pmap block mapping from the physical address
3016 /* While we do not worry about execution protection in */
3017 /* general, certian pages may have instruction execution */
3018 /* disallowed. We will check here, and if not allowed */
3019 /* to execute, we return with a protection failure. */
3021 if((full_fault_type
& VM_PROT_EXECUTE
) &&
3022 (!pmap_eligible_for_execute((ppnum_t
)
3023 (object
->shadow_offset
>> 12)))) {
3025 vm_map_verify_done(map
, &version
);
3027 vm_map_unlock(real_map
);
3028 vm_fault_cleanup(object
, top_page
);
3029 vm_object_deallocate(object
);
3030 kr
= KERN_PROTECTION_FAILURE
;
3034 if(real_map
!= map
) {
3035 vm_map_unlock(real_map
);
3037 if (original_map
!= map
) {
3038 vm_map_unlock_read(map
);
3039 vm_map_lock_read(original_map
);
3045 hdelta
= 0xFFFFF000;
3046 ldelta
= 0xFFFFF000;
3049 while(vm_map_lookup_entry(map
, laddr
, &entry
)) {
3050 if(ldelta
> (laddr
- entry
->vme_start
))
3051 ldelta
= laddr
- entry
->vme_start
;
3052 if(hdelta
> (entry
->vme_end
- laddr
))
3053 hdelta
= entry
->vme_end
- laddr
;
3054 if(entry
->is_sub_map
) {
3056 laddr
= (laddr
- entry
->vme_start
)
3058 vm_map_lock_read(entry
->object
.sub_map
);
3060 vm_map_unlock_read(map
);
3061 if(entry
->use_pmap
) {
3062 vm_map_unlock_read(real_map
);
3063 real_map
= entry
->object
.sub_map
;
3065 map
= entry
->object
.sub_map
;
3072 if(vm_map_lookup_entry(map
, laddr
, &entry
) &&
3073 (entry
->object
.vm_object
!= NULL
) &&
3074 (entry
->object
.vm_object
== object
)) {
3078 /* Set up a block mapped area */
3079 pmap_map_block(caller_pmap
,
3080 (addr64_t
)(caller_pmap_addr
- ldelta
),
3082 (entry
->object
.vm_object
->shadow_offset
))
3084 (laddr
- entry
->vme_start
)
3086 ((ldelta
+ hdelta
) >> 12), prot
,
3087 (VM_WIMG_MASK
& (int)object
->wimg_bits
), 0);
3089 /* Set up a block mapped area */
3090 pmap_map_block(real_map
->pmap
,
3091 (addr64_t
)(vaddr
- ldelta
),
3093 (entry
->object
.vm_object
->shadow_offset
))
3095 (laddr
- entry
->vme_start
) - ldelta
) >> 12,
3096 ((ldelta
+ hdelta
) >> 12), prot
,
3097 (VM_WIMG_MASK
& (int)object
->wimg_bits
), 0);
3103 pmap_enter(caller_pmap
, caller_pmap_addr
,
3104 object
->shadow_offset
>>12, prot
, 0, TRUE
);
3106 pmap_enter(pmap
, vaddr
,
3107 object
->shadow_offset
>>12, prot
, 0, TRUE
);
3116 * If the page is not wired down and isn't already
3117 * on a pageout queue, then put it where the
3118 * pageout daemon can find it.
3120 if(m
!= VM_PAGE_NULL
) {
3121 vm_page_lock_queues();
3124 vm_pagein_cluster_used
++;
3125 m
->clustered
= FALSE
;
3127 m
->reference
= TRUE
;
3129 if (change_wiring
) {
3135 #if VM_FAULT_STATIC_CONFIG
3137 if ((!m
->active
&& !m
->inactive
) || ((need_activation
== TRUE
) && !m
->active
))
3138 vm_page_activate(m
);
3141 else if (software_reference_bits
) {
3142 if (!m
->active
&& !m
->inactive
)
3143 vm_page_activate(m
);
3144 m
->reference
= TRUE
;
3146 vm_page_activate(m
);
3149 vm_page_unlock_queues();
3153 * Unlock everything, and return
3156 vm_map_verify_done(map
, &version
);
3158 vm_map_unlock(real_map
);
3159 if(m
!= VM_PAGE_NULL
) {
3160 PAGE_WAKEUP_DONE(m
);
3161 UNLOCK_AND_DEALLOCATE
;
3163 vm_fault_cleanup(object
, top_page
);
3164 vm_object_deallocate(object
);
3168 #undef UNLOCK_AND_DEALLOCATE
3172 if(write_startup_file
)
3173 tws_send_startup_info(current_task());
3175 thread_interrupt_level(interruptible_state
);
3177 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
3179 type_of_fault
& 0xff,
3190 * Wire down a range of virtual addresses in a map.
3195 vm_map_entry_t entry
,
3197 vm_map_offset_t pmap_addr
)
3200 register vm_map_offset_t va
;
3201 register vm_map_offset_t end_addr
= entry
->vme_end
;
3202 register kern_return_t rc
;
3204 assert(entry
->in_transition
);
3206 if ((entry
->object
.vm_object
!= NULL
) &&
3207 !entry
->is_sub_map
&&
3208 entry
->object
.vm_object
->phys_contiguous
) {
3209 return KERN_SUCCESS
;
3213 * Inform the physical mapping system that the
3214 * range of addresses may not fault, so that
3215 * page tables and such can be locked down as well.
3218 pmap_pageable(pmap
, pmap_addr
,
3219 pmap_addr
+ (end_addr
- entry
->vme_start
), FALSE
);
3222 * We simulate a fault to get the page and enter it
3223 * in the physical map.
3226 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
3227 if ((rc
= vm_fault_wire_fast(
3228 map
, va
, entry
, pmap
,
3229 pmap_addr
+ (va
- entry
->vme_start
)
3230 )) != KERN_SUCCESS
) {
3231 rc
= vm_fault(map
, va
, VM_PROT_NONE
, TRUE
,
3232 (pmap
== kernel_pmap
) ?
3233 THREAD_UNINT
: THREAD_ABORTSAFE
,
3234 pmap
, pmap_addr
+ (va
- entry
->vme_start
));
3237 if (rc
!= KERN_SUCCESS
) {
3238 struct vm_map_entry tmp_entry
= *entry
;
3240 /* unwire wired pages */
3241 tmp_entry
.vme_end
= va
;
3242 vm_fault_unwire(map
,
3243 &tmp_entry
, FALSE
, pmap
, pmap_addr
);
3248 return KERN_SUCCESS
;
3254 * Unwire a range of virtual addresses in a map.
3259 vm_map_entry_t entry
,
3260 boolean_t deallocate
,
3262 vm_map_offset_t pmap_addr
)
3264 register vm_map_offset_t va
;
3265 register vm_map_offset_t end_addr
= entry
->vme_end
;
3268 object
= (entry
->is_sub_map
)
3269 ? VM_OBJECT_NULL
: entry
->object
.vm_object
;
3272 * Since the pages are wired down, we must be able to
3273 * get their mappings from the physical map system.
3276 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
3277 pmap_change_wiring(pmap
,
3278 pmap_addr
+ (va
- entry
->vme_start
), FALSE
);
3280 if (object
== VM_OBJECT_NULL
) {
3281 (void) vm_fault(map
, va
, VM_PROT_NONE
,
3282 TRUE
, THREAD_UNINT
, pmap
, pmap_addr
);
3283 } else if (object
->phys_contiguous
) {
3287 vm_page_t result_page
;
3289 vm_object_t result_object
;
3290 vm_fault_return_t result
;
3293 prot
= VM_PROT_NONE
;
3295 vm_object_lock(object
);
3296 vm_object_paging_begin(object
);
3298 "vm_fault_unwire -> vm_fault_page\n",
3300 result
= vm_fault_page(object
,
3302 (va
- entry
->vme_start
),
3308 - entry
->vme_start
),
3314 0, map
->no_zero_fill
,
3316 } while (result
== VM_FAULT_RETRY
);
3318 if (result
!= VM_FAULT_SUCCESS
)
3319 panic("vm_fault_unwire: failure");
3321 result_object
= result_page
->object
;
3323 assert(!result_page
->fictitious
);
3324 pmap_disconnect(result_page
->phys_page
);
3325 VM_PAGE_FREE(result_page
);
3327 vm_page_lock_queues();
3328 vm_page_unwire(result_page
);
3329 vm_page_unlock_queues();
3330 PAGE_WAKEUP_DONE(result_page
);
3333 vm_fault_cleanup(result_object
, top_page
);
3338 * Inform the physical mapping system that the range
3339 * of addresses may fault, so that page tables and
3340 * such may be unwired themselves.
3343 pmap_pageable(pmap
, pmap_addr
,
3344 pmap_addr
+ (end_addr
- entry
->vme_start
), TRUE
);
3349 * vm_fault_wire_fast:
3351 * Handle common case of a wire down page fault at the given address.
3352 * If successful, the page is inserted into the associated physical map.
3353 * The map entry is passed in to avoid the overhead of a map lookup.
3355 * NOTE: the given address should be truncated to the
3356 * proper page address.
3358 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3359 * a standard error specifying why the fault is fatal is returned.
3361 * The map in question must be referenced, and remains so.
3362 * Caller has a read lock on the map.
3364 * This is a stripped version of vm_fault() for wiring pages. Anything
3365 * other than the common case will return KERN_FAILURE, and the caller
3366 * is expected to call vm_fault().
3370 __unused vm_map_t map
,
3372 vm_map_entry_t entry
,
3374 vm_map_offset_t pmap_addr
)
3377 vm_object_offset_t offset
;
3378 register vm_page_t m
;
3380 thread_t thread
= current_thread();
3381 unsigned int cache_attr
;
3385 if (thread
!= THREAD_NULL
&& thread
->task
!= TASK_NULL
)
3386 thread
->task
->faults
++;
3393 #define RELEASE_PAGE(m) { \
3394 PAGE_WAKEUP_DONE(m); \
3395 vm_page_lock_queues(); \
3396 vm_page_unwire(m); \
3397 vm_page_unlock_queues(); \
3401 #undef UNLOCK_THINGS
3402 #define UNLOCK_THINGS { \
3403 vm_object_paging_end(object); \
3404 vm_object_unlock(object); \
3407 #undef UNLOCK_AND_DEALLOCATE
3408 #define UNLOCK_AND_DEALLOCATE { \
3410 vm_object_deallocate(object); \
3413 * Give up and have caller do things the hard way.
3417 UNLOCK_AND_DEALLOCATE; \
3418 return(KERN_FAILURE); \
3423 * If this entry is not directly to a vm_object, bail out.
3425 if (entry
->is_sub_map
)
3426 return(KERN_FAILURE
);
3429 * Find the backing store object and offset into it.
3432 object
= entry
->object
.vm_object
;
3433 offset
= (va
- entry
->vme_start
) + entry
->offset
;
3434 prot
= entry
->protection
;
3437 * Make a reference to this object to prevent its
3438 * disposal while we are messing with it.
3441 vm_object_lock(object
);
3442 assert(object
->ref_count
> 0);
3443 object
->ref_count
++;
3444 vm_object_res_reference(object
);
3445 vm_object_paging_begin(object
);
3448 * INVARIANTS (through entire routine):
3450 * 1) At all times, we must either have the object
3451 * lock or a busy page in some object to prevent
3452 * some other thread from trying to bring in
3455 * 2) Once we have a busy page, we must remove it from
3456 * the pageout queues, so that the pageout daemon
3457 * will not grab it away.
3462 * Look for page in top-level object. If it's not there or
3463 * there's something going on, give up.
3464 * ENCRYPTED SWAP: use the slow fault path, since we'll need to
3465 * decrypt the page before wiring it down.
3467 m
= vm_page_lookup(object
, offset
);
3468 if ((m
== VM_PAGE_NULL
) || (m
->busy
) || (m
->encrypted
) ||
3469 (m
->unusual
&& ( m
->error
|| m
->restart
|| m
->absent
||
3470 prot
& m
->page_lock
))) {
3474 ASSERT_PAGE_DECRYPTED(m
);
3477 * Wire the page down now. All bail outs beyond this
3478 * point must unwire the page.
3481 vm_page_lock_queues();
3483 vm_page_unlock_queues();
3486 * Mark page busy for other threads.
3493 * Give up if the page is being written and there's a copy object
3495 if ((object
->copy
!= VM_OBJECT_NULL
) && (prot
& VM_PROT_WRITE
)) {
3501 * Put this page into the physical map.
3502 * We have to unlock the object because pmap_enter
3503 * may cause other faults.
3505 if (m
->no_isync
== TRUE
) {
3506 pmap_sync_page_data_phys(m
->phys_page
);
3508 m
->no_isync
= FALSE
;
3511 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
3513 PMAP_ENTER(pmap
, pmap_addr
, m
, prot
, cache_attr
, TRUE
);
3516 * Unlock everything, and return
3519 PAGE_WAKEUP_DONE(m
);
3520 UNLOCK_AND_DEALLOCATE
;
3522 return(KERN_SUCCESS
);
3527 * Routine: vm_fault_copy_cleanup
3529 * Release a page used by vm_fault_copy.
3533 vm_fault_copy_cleanup(
3537 vm_object_t object
= page
->object
;
3539 vm_object_lock(object
);
3540 PAGE_WAKEUP_DONE(page
);
3541 vm_page_lock_queues();
3542 if (!page
->active
&& !page
->inactive
)
3543 vm_page_activate(page
);
3544 vm_page_unlock_queues();
3545 vm_fault_cleanup(object
, top_page
);
3549 vm_fault_copy_dst_cleanup(
3554 if (page
!= VM_PAGE_NULL
) {
3555 object
= page
->object
;
3556 vm_object_lock(object
);
3557 vm_page_lock_queues();
3558 vm_page_unwire(page
);
3559 vm_page_unlock_queues();
3560 vm_object_paging_end(object
);
3561 vm_object_unlock(object
);
3566 * Routine: vm_fault_copy
3569 * Copy pages from one virtual memory object to another --
3570 * neither the source nor destination pages need be resident.
3572 * Before actually copying a page, the version associated with
3573 * the destination address map wil be verified.
3575 * In/out conditions:
3576 * The caller must hold a reference, but not a lock, to
3577 * each of the source and destination objects and to the
3581 * Returns KERN_SUCCESS if no errors were encountered in
3582 * reading or writing the data. Returns KERN_INTERRUPTED if
3583 * the operation was interrupted (only possible if the
3584 * "interruptible" argument is asserted). Other return values
3585 * indicate a permanent error in copying the data.
3587 * The actual amount of data copied will be returned in the
3588 * "copy_size" argument. In the event that the destination map
3589 * verification failed, this amount may be less than the amount
3594 vm_object_t src_object
,
3595 vm_object_offset_t src_offset
,
3596 vm_map_size_t
*copy_size
, /* INOUT */
3597 vm_object_t dst_object
,
3598 vm_object_offset_t dst_offset
,
3600 vm_map_version_t
*dst_version
,
3603 vm_page_t result_page
;
3606 vm_page_t src_top_page
;
3610 vm_page_t dst_top_page
;
3613 vm_map_size_t amount_left
;
3614 vm_object_t old_copy_object
;
3615 kern_return_t error
= 0;
3617 vm_map_size_t part_size
;
3620 * In order not to confuse the clustered pageins, align
3621 * the different offsets on a page boundary.
3623 vm_object_offset_t src_lo_offset
= vm_object_trunc_page(src_offset
);
3624 vm_object_offset_t dst_lo_offset
= vm_object_trunc_page(dst_offset
);
3625 vm_object_offset_t src_hi_offset
= vm_object_round_page(src_offset
+ *copy_size
);
3626 vm_object_offset_t dst_hi_offset
= vm_object_round_page(dst_offset
+ *copy_size
);
3630 *copy_size -= amount_left; \
3634 amount_left
= *copy_size
;
3635 do { /* while (amount_left > 0) */
3637 * There may be a deadlock if both source and destination
3638 * pages are the same. To avoid this deadlock, the copy must
3639 * start by getting the destination page in order to apply
3640 * COW semantics if any.
3643 RetryDestinationFault
: ;
3645 dst_prot
= VM_PROT_WRITE
|VM_PROT_READ
;
3647 vm_object_lock(dst_object
);
3648 vm_object_paging_begin(dst_object
);
3650 XPR(XPR_VM_FAULT
,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3651 switch (vm_fault_page(dst_object
,
3652 vm_object_trunc_page(dst_offset
),
3653 VM_PROT_WRITE
|VM_PROT_READ
,
3658 VM_BEHAVIOR_SEQUENTIAL
,
3664 dst_map
->no_zero_fill
,
3666 case VM_FAULT_SUCCESS
:
3668 case VM_FAULT_RETRY
:
3669 goto RetryDestinationFault
;
3670 case VM_FAULT_MEMORY_SHORTAGE
:
3671 if (vm_page_wait(interruptible
))
3672 goto RetryDestinationFault
;
3674 case VM_FAULT_INTERRUPTED
:
3675 RETURN(MACH_SEND_INTERRUPTED
);
3676 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3677 vm_page_more_fictitious();
3678 goto RetryDestinationFault
;
3679 case VM_FAULT_MEMORY_ERROR
:
3683 return(KERN_MEMORY_ERROR
);
3685 assert ((dst_prot
& VM_PROT_WRITE
) != VM_PROT_NONE
);
3687 old_copy_object
= dst_page
->object
->copy
;
3690 * There exists the possiblity that the source and
3691 * destination page are the same. But we can't
3692 * easily determine that now. If they are the
3693 * same, the call to vm_fault_page() for the
3694 * destination page will deadlock. To prevent this we
3695 * wire the page so we can drop busy without having
3696 * the page daemon steal the page. We clean up the
3697 * top page but keep the paging reference on the object
3698 * holding the dest page so it doesn't go away.
3701 vm_page_lock_queues();
3702 vm_page_wire(dst_page
);
3703 vm_page_unlock_queues();
3704 PAGE_WAKEUP_DONE(dst_page
);
3705 vm_object_unlock(dst_page
->object
);
3707 if (dst_top_page
!= VM_PAGE_NULL
) {
3708 vm_object_lock(dst_object
);
3709 VM_PAGE_FREE(dst_top_page
);
3710 vm_object_paging_end(dst_object
);
3711 vm_object_unlock(dst_object
);
3716 if (src_object
== VM_OBJECT_NULL
) {
3718 * No source object. We will just
3719 * zero-fill the page in dst_object.
3721 src_page
= VM_PAGE_NULL
;
3722 result_page
= VM_PAGE_NULL
;
3724 vm_object_lock(src_object
);
3725 src_page
= vm_page_lookup(src_object
,
3726 vm_object_trunc_page(src_offset
));
3727 if (src_page
== dst_page
) {
3728 src_prot
= dst_prot
;
3729 result_page
= VM_PAGE_NULL
;
3731 src_prot
= VM_PROT_READ
;
3732 vm_object_paging_begin(src_object
);
3735 "vm_fault_copy(2) -> vm_fault_page\n",
3737 switch (vm_fault_page(src_object
,
3738 vm_object_trunc_page(src_offset
),
3744 VM_BEHAVIOR_SEQUENTIAL
,
3753 case VM_FAULT_SUCCESS
:
3755 case VM_FAULT_RETRY
:
3756 goto RetrySourceFault
;
3757 case VM_FAULT_MEMORY_SHORTAGE
:
3758 if (vm_page_wait(interruptible
))
3759 goto RetrySourceFault
;
3761 case VM_FAULT_INTERRUPTED
:
3762 vm_fault_copy_dst_cleanup(dst_page
);
3763 RETURN(MACH_SEND_INTERRUPTED
);
3764 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3765 vm_page_more_fictitious();
3766 goto RetrySourceFault
;
3767 case VM_FAULT_MEMORY_ERROR
:
3768 vm_fault_copy_dst_cleanup(dst_page
);
3772 return(KERN_MEMORY_ERROR
);
3776 assert((src_top_page
== VM_PAGE_NULL
) ==
3777 (result_page
->object
== src_object
));
3779 assert ((src_prot
& VM_PROT_READ
) != VM_PROT_NONE
);
3780 vm_object_unlock(result_page
->object
);
3783 if (!vm_map_verify(dst_map
, dst_version
)) {
3784 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3785 vm_fault_copy_cleanup(result_page
, src_top_page
);
3786 vm_fault_copy_dst_cleanup(dst_page
);
3790 vm_object_lock(dst_page
->object
);
3792 if (dst_page
->object
->copy
!= old_copy_object
) {
3793 vm_object_unlock(dst_page
->object
);
3794 vm_map_verify_done(dst_map
, dst_version
);
3795 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3796 vm_fault_copy_cleanup(result_page
, src_top_page
);
3797 vm_fault_copy_dst_cleanup(dst_page
);
3800 vm_object_unlock(dst_page
->object
);
3803 * Copy the page, and note that it is dirty
3807 if (!page_aligned(src_offset
) ||
3808 !page_aligned(dst_offset
) ||
3809 !page_aligned(amount_left
)) {
3811 vm_object_offset_t src_po
,
3814 src_po
= src_offset
- vm_object_trunc_page(src_offset
);
3815 dst_po
= dst_offset
- vm_object_trunc_page(dst_offset
);
3817 if (dst_po
> src_po
) {
3818 part_size
= PAGE_SIZE
- dst_po
;
3820 part_size
= PAGE_SIZE
- src_po
;
3822 if (part_size
> (amount_left
)){
3823 part_size
= amount_left
;
3826 if (result_page
== VM_PAGE_NULL
) {
3827 vm_page_part_zero_fill(dst_page
,
3830 vm_page_part_copy(result_page
, src_po
,
3831 dst_page
, dst_po
, part_size
);
3832 if(!dst_page
->dirty
){
3833 vm_object_lock(dst_object
);
3834 dst_page
->dirty
= TRUE
;
3835 vm_object_unlock(dst_page
->object
);
3840 part_size
= PAGE_SIZE
;
3842 if (result_page
== VM_PAGE_NULL
)
3843 vm_page_zero_fill(dst_page
);
3845 vm_page_copy(result_page
, dst_page
);
3846 if(!dst_page
->dirty
){
3847 vm_object_lock(dst_object
);
3848 dst_page
->dirty
= TRUE
;
3849 vm_object_unlock(dst_page
->object
);
3856 * Unlock everything, and return
3859 vm_map_verify_done(dst_map
, dst_version
);
3861 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3862 vm_fault_copy_cleanup(result_page
, src_top_page
);
3863 vm_fault_copy_dst_cleanup(dst_page
);
3865 amount_left
-= part_size
;
3866 src_offset
+= part_size
;
3867 dst_offset
+= part_size
;
3868 } while (amount_left
> 0);
3870 RETURN(KERN_SUCCESS
);
3879 * Routine: vm_fault_page_overwrite
3882 * A form of vm_fault_page that assumes that the
3883 * resulting page will be overwritten in its entirety,
3884 * making it unnecessary to obtain the correct *contents*
3888 * XXX Untested. Also unused. Eventually, this technology
3889 * could be used in vm_fault_copy() to advantage.
3892 vm_fault_page_overwrite(
3894 vm_object_t dst_object
,
3895 vm_object_offset_t dst_offset
,
3896 vm_page_t
*result_page
) /* OUT */
3900 kern_return_t wait_result
;
3902 #define interruptible THREAD_UNINT /* XXX */
3906 * Look for a page at this offset
3909 while ((dst_page
= vm_page_lookup(dst_object
, dst_offset
))
3912 * No page, no problem... just allocate one.
3915 dst_page
= vm_page_alloc(dst_object
, dst_offset
);
3916 if (dst_page
== VM_PAGE_NULL
) {
3917 vm_object_unlock(dst_object
);
3919 vm_object_lock(dst_object
);
3924 * Pretend that the memory manager
3925 * write-protected the page.
3927 * Note that we will be asking for write
3928 * permission without asking for the data
3932 dst_page
->overwriting
= TRUE
;
3933 dst_page
->page_lock
= VM_PROT_WRITE
;
3934 dst_page
->absent
= TRUE
;
3935 dst_page
->unusual
= TRUE
;
3936 dst_object
->absent_count
++;
3941 * When we bail out, we might have to throw
3942 * away the page created here.
3945 #define DISCARD_PAGE \
3947 vm_object_lock(dst_object); \
3948 dst_page = vm_page_lookup(dst_object, dst_offset); \
3949 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3950 VM_PAGE_FREE(dst_page); \
3951 vm_object_unlock(dst_object); \
3956 * If the page is write-protected...
3959 if (dst_page
->page_lock
& VM_PROT_WRITE
) {
3961 * ... and an unlock request hasn't been sent
3964 if ( ! (dst_page
->unlock_request
& VM_PROT_WRITE
)) {
3969 * ... then send one now.
3972 if (!dst_object
->pager_ready
) {
3973 wait_result
= vm_object_assert_wait(dst_object
,
3974 VM_OBJECT_EVENT_PAGER_READY
,
3976 vm_object_unlock(dst_object
);
3977 if (wait_result
== THREAD_WAITING
)
3978 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
3979 if (wait_result
!= THREAD_AWAKENED
) {
3981 return(VM_FAULT_INTERRUPTED
);
3986 u
= dst_page
->unlock_request
|= VM_PROT_WRITE
;
3987 vm_object_unlock(dst_object
);
3989 if ((rc
= memory_object_data_unlock(
3991 dst_offset
+ dst_object
->paging_offset
,
3993 u
)) != KERN_SUCCESS
) {
3995 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3997 return((rc
== MACH_SEND_INTERRUPTED
) ?
3998 VM_FAULT_INTERRUPTED
:
3999 VM_FAULT_MEMORY_ERROR
);
4001 vm_object_lock(dst_object
);
4005 /* ... fall through to wait below */
4008 * If the page isn't being used for other
4009 * purposes, then we're done.
4011 if ( ! (dst_page
->busy
|| dst_page
->absent
||
4012 dst_page
->error
|| dst_page
->restart
) )
4016 wait_result
= PAGE_ASSERT_WAIT(dst_page
, interruptible
);
4017 vm_object_unlock(dst_object
);
4018 if (wait_result
== THREAD_WAITING
)
4019 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
4020 if (wait_result
!= THREAD_AWAKENED
) {
4022 return(VM_FAULT_INTERRUPTED
);
4026 *result_page
= dst_page
;
4027 return(VM_FAULT_SUCCESS
);
4029 #undef interruptible
4035 #if VM_FAULT_CLASSIFY
4037 * Temporary statistics gathering support.
4041 * Statistics arrays:
4043 #define VM_FAULT_TYPES_MAX 5
4044 #define VM_FAULT_LEVEL_MAX 8
4046 int vm_fault_stats
[VM_FAULT_TYPES_MAX
][VM_FAULT_LEVEL_MAX
];
4048 #define VM_FAULT_TYPE_ZERO_FILL 0
4049 #define VM_FAULT_TYPE_MAP_IN 1
4050 #define VM_FAULT_TYPE_PAGER 2
4051 #define VM_FAULT_TYPE_COPY 3
4052 #define VM_FAULT_TYPE_OTHER 4
4056 vm_fault_classify(vm_object_t object
,
4057 vm_object_offset_t offset
,
4058 vm_prot_t fault_type
)
4060 int type
, level
= 0;
4064 m
= vm_page_lookup(object
, offset
);
4065 if (m
!= VM_PAGE_NULL
) {
4066 if (m
->busy
|| m
->error
|| m
->restart
|| m
->absent
||
4067 fault_type
& m
->page_lock
) {
4068 type
= VM_FAULT_TYPE_OTHER
;
4071 if (((fault_type
& VM_PROT_WRITE
) == 0) ||
4072 ((level
== 0) && object
->copy
== VM_OBJECT_NULL
)) {
4073 type
= VM_FAULT_TYPE_MAP_IN
;
4076 type
= VM_FAULT_TYPE_COPY
;
4080 if (object
->pager_created
) {
4081 type
= VM_FAULT_TYPE_PAGER
;
4084 if (object
->shadow
== VM_OBJECT_NULL
) {
4085 type
= VM_FAULT_TYPE_ZERO_FILL
;
4089 offset
+= object
->shadow_offset
;
4090 object
= object
->shadow
;
4096 if (level
> VM_FAULT_LEVEL_MAX
)
4097 level
= VM_FAULT_LEVEL_MAX
;
4099 vm_fault_stats
[type
][level
] += 1;
4104 /* cleanup routine to call from debugger */
4107 vm_fault_classify_init(void)
4111 for (type
= 0; type
< VM_FAULT_TYPES_MAX
; type
++) {
4112 for (level
= 0; level
< VM_FAULT_LEVEL_MAX
; level
++) {
4113 vm_fault_stats
[type
][level
] = 0;
4119 #endif /* VM_FAULT_CLASSIFY */