2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 * Carnegie Mellon requests users of this software to return to
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
56 * Page fault handling module.
59 #include <mach_cluster_stats.h>
60 #include <mach_pagemap.h>
63 #include <mach/mach_types.h>
64 #include <mach/kern_return.h>
65 #include <mach/message.h> /* for error codes */
66 #include <mach/vm_param.h>
67 #include <mach/vm_behavior.h>
68 #include <mach/memory_object.h>
69 /* For memory_object_data_{request,unlock} */
71 #include <kern/kern_types.h>
72 #include <kern/host_statistics.h>
73 #include <kern/counters.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/sched_prim.h>
77 #include <kern/host.h>
79 #include <kern/mach_param.h>
80 #include <kern/macro_help.h>
81 #include <kern/zalloc.h>
82 #include <kern/misc_protos.h>
84 #include <ppc/proc_reg.h>
86 #include <vm/vm_fault.h>
87 #include <vm/task_working_set.h>
88 #include <vm/vm_map.h>
89 #include <vm/vm_object.h>
90 #include <vm/vm_page.h>
91 #include <vm/vm_kern.h>
93 #include <vm/vm_pageout.h>
94 #include <vm/vm_protos.h>
96 #include <sys/kdebug.h>
98 #define VM_FAULT_CLASSIFY 0
99 #define VM_FAULT_STATIC_CONFIG 1
101 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
103 unsigned int vm_object_absent_max
= 50;
105 int vm_fault_debug
= 0;
107 #if !VM_FAULT_STATIC_CONFIG
108 boolean_t vm_fault_dirty_handling
= FALSE
;
109 boolean_t vm_fault_interruptible
= FALSE
;
110 boolean_t software_reference_bits
= TRUE
;
114 extern struct db_watchpoint
*db_watchpoint_list
;
115 #endif /* MACH_KDB */
118 /* Forward declarations of internal routines. */
119 extern kern_return_t
vm_fault_wire_fast(
122 vm_map_entry_t entry
,
124 vm_map_offset_t pmap_addr
);
126 extern void vm_fault_continue(void);
128 extern void vm_fault_copy_cleanup(
132 extern void vm_fault_copy_dst_cleanup(
135 #if VM_FAULT_CLASSIFY
136 extern void vm_fault_classify(vm_object_t object
,
137 vm_object_offset_t offset
,
138 vm_prot_t fault_type
);
140 extern void vm_fault_classify_init(void);
144 * Routine: vm_fault_init
146 * Initialize our private data structures.
154 * Routine: vm_fault_cleanup
156 * Clean up the result of vm_fault_page.
158 * The paging reference for "object" is released.
159 * "object" is unlocked.
160 * If "top_page" is not null, "top_page" is
161 * freed and the paging reference for the object
162 * containing it is released.
165 * "object" must be locked.
169 register vm_object_t object
,
170 register vm_page_t top_page
)
172 vm_object_paging_end(object
);
173 vm_object_unlock(object
);
175 if (top_page
!= VM_PAGE_NULL
) {
176 object
= top_page
->object
;
177 vm_object_lock(object
);
178 VM_PAGE_FREE(top_page
);
179 vm_object_paging_end(object
);
180 vm_object_unlock(object
);
184 #if MACH_CLUSTER_STATS
185 #define MAXCLUSTERPAGES 16
187 unsigned long pages_in_cluster
;
188 unsigned long pages_at_higher_offsets
;
189 unsigned long pages_at_lower_offsets
;
190 } cluster_stats_in
[MAXCLUSTERPAGES
];
191 #define CLUSTER_STAT(clause) clause
192 #define CLUSTER_STAT_HIGHER(x) \
193 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
194 #define CLUSTER_STAT_LOWER(x) \
195 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
196 #define CLUSTER_STAT_CLUSTER(x) \
197 ((cluster_stats_in[(x)].pages_in_cluster)++)
198 #else /* MACH_CLUSTER_STATS */
199 #define CLUSTER_STAT(clause)
200 #endif /* MACH_CLUSTER_STATS */
202 /* XXX - temporary */
203 boolean_t vm_allow_clustered_pagein
= FALSE
;
204 int vm_pagein_cluster_used
= 0;
206 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
209 boolean_t vm_page_deactivate_behind
= TRUE
;
211 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
213 int vm_default_ahead
= 0;
214 int vm_default_behind
= MAX_UPL_TRANSFER
;
217 * vm_page_deactivate_behind
219 * Determine if sequential access is in progress
220 * in accordance with the behavior specified. If
221 * so, compute a potential page to deactive and
224 * The object must be locked.
228 vm_fault_deactivate_behind(
230 vm_object_offset_t offset
,
231 vm_behavior_t behavior
)
236 dbgTrace(0xBEEF0018, (unsigned int) object
, (unsigned int) vm_fault_deactivate_behind
); /* (TEST/DEBUG) */
239 if (object
== kernel_object
) {
241 * Do not deactivate pages from the kernel object: they
242 * are not intended to become pageable.
248 case VM_BEHAVIOR_RANDOM
:
249 object
->sequential
= PAGE_SIZE_64
;
252 case VM_BEHAVIOR_SEQUENTIAL
:
254 object
->last_alloc
== offset
- PAGE_SIZE_64
) {
255 object
->sequential
+= PAGE_SIZE_64
;
256 m
= vm_page_lookup(object
, offset
- PAGE_SIZE_64
);
258 object
->sequential
= PAGE_SIZE_64
; /* reset */
262 case VM_BEHAVIOR_RSEQNTL
:
263 if (object
->last_alloc
&&
264 object
->last_alloc
== offset
+ PAGE_SIZE_64
) {
265 object
->sequential
+= PAGE_SIZE_64
;
266 m
= vm_page_lookup(object
, offset
+ PAGE_SIZE_64
);
268 object
->sequential
= PAGE_SIZE_64
; /* reset */
272 case VM_BEHAVIOR_DEFAULT
:
275 object
->last_alloc
== offset
- PAGE_SIZE_64
) {
276 vm_object_offset_t behind
= vm_default_behind
* PAGE_SIZE_64
;
278 object
->sequential
+= PAGE_SIZE_64
;
279 m
= (offset
>= behind
&&
280 object
->sequential
>= behind
) ?
281 vm_page_lookup(object
, offset
- behind
) :
283 } else if (object
->last_alloc
&&
284 object
->last_alloc
== offset
+ PAGE_SIZE_64
) {
285 vm_object_offset_t behind
= vm_default_behind
* PAGE_SIZE_64
;
287 object
->sequential
+= PAGE_SIZE_64
;
288 m
= (offset
< -behind
&&
289 object
->sequential
>= behind
) ?
290 vm_page_lookup(object
, offset
+ behind
) :
293 object
->sequential
= PAGE_SIZE_64
;
299 object
->last_alloc
= offset
;
303 vm_page_lock_queues();
304 vm_page_deactivate(m
);
305 vm_page_unlock_queues();
307 dbgTrace(0xBEEF0019, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
317 * Routine: vm_fault_page
319 * Find the resident page for the virtual memory
320 * specified by the given virtual memory object
322 * Additional arguments:
323 * The required permissions for the page is given
324 * in "fault_type". Desired permissions are included
325 * in "protection". The minimum and maximum valid offsets
326 * within the object for the relevant map entry are
327 * passed in "lo_offset" and "hi_offset" respectively and
328 * the expected page reference pattern is passed in "behavior".
329 * These three parameters are used to determine pagein cluster
332 * If the desired page is known to be resident (for
333 * example, because it was previously wired down), asserting
334 * the "unwiring" parameter will speed the search.
336 * If the operation can be interrupted (by thread_abort
337 * or thread_terminate), then the "interruptible"
338 * parameter should be asserted.
341 * The page containing the proper data is returned
345 * The source object must be locked and referenced,
346 * and must donate one paging reference. The reference
347 * is not affected. The paging reference and lock are
350 * If the call succeeds, the object in which "result_page"
351 * resides is left locked and holding a paging reference.
352 * If this is not the original object, a busy page in the
353 * original object is returned in "top_page", to prevent other
354 * callers from pursuing this same data, along with a paging
355 * reference for the original object. The "top_page" should
356 * be destroyed when this guarantee is no longer required.
357 * The "result_page" is also left busy. It is not removed
358 * from the pageout queues.
364 vm_object_t first_object
, /* Object to begin search */
365 vm_object_offset_t first_offset
, /* Offset into object */
366 vm_prot_t fault_type
, /* What access is requested */
367 boolean_t must_be_resident
,/* Must page be resident? */
368 int interruptible
, /* how may fault be interrupted? */
369 vm_map_offset_t lo_offset
, /* Map entry start */
370 vm_map_offset_t hi_offset
, /* Map entry end */
371 vm_behavior_t behavior
, /* Page reference behavior */
372 /* Modifies in place: */
373 vm_prot_t
*protection
, /* Protection for mapping */
375 vm_page_t
*result_page
, /* Page found, if successful */
376 vm_page_t
*top_page
, /* Page in top object, if
377 * not result_page. */
378 int *type_of_fault
, /* if non-null, fill in with type of fault
379 * COW, zero-fill, etc... returned in trace point */
380 /* More arguments: */
381 kern_return_t
*error_code
, /* code if page is in error */
382 boolean_t no_zero_fill
, /* don't zero fill absent pages */
383 boolean_t data_supply
, /* treat as data_supply if
384 * it is a write fault and a full
385 * page is provided */
387 __unused vm_map_offset_t vaddr
)
394 vm_object_offset_t offset
;
396 vm_object_t next_object
;
397 vm_object_t copy_object
;
398 boolean_t look_for_page
;
399 vm_prot_t access_required
= fault_type
;
400 vm_prot_t wants_copy_flag
;
401 vm_object_size_t length
;
402 vm_object_offset_t cluster_start
, cluster_end
;
403 CLUSTER_STAT(int pages_at_higher_offsets
;)
404 CLUSTER_STAT(int pages_at_lower_offsets
;)
405 kern_return_t wait_result
;
406 boolean_t interruptible_state
;
407 boolean_t bumped_pagein
= FALSE
;
412 * MACH page map - an optional optimization where a bit map is maintained
413 * by the VM subsystem for internal objects to indicate which pages of
414 * the object currently reside on backing store. This existence map
415 * duplicates information maintained by the vnode pager. It is
416 * created at the time of the first pageout against the object, i.e.
417 * at the same time pager for the object is created. The optimization
418 * is designed to eliminate pager interaction overhead, if it is
419 * 'known' that the page does not exist on backing store.
421 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
422 * either marked as paged out in the existence map for the object or no
423 * existence map exists for the object. LOOK_FOR() is one of the
424 * criteria in the decision to invoke the pager. It is also used as one
425 * of the criteria to terminate the scan for adjacent pages in a clustered
426 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
427 * permanent objects. Note also that if the pager for an internal object
428 * has not been created, the pager is not invoked regardless of the value
429 * of LOOK_FOR() and that clustered pagein scans are only done on an object
430 * for which a pager has been created.
432 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
433 * is marked as paged out in the existence map for the object. PAGED_OUT()
434 * PAGED_OUT() is used to determine if a page has already been pushed
435 * into a copy object in order to avoid a redundant page out operation.
437 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
438 != VM_EXTERNAL_STATE_ABSENT)
439 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
440 == VM_EXTERNAL_STATE_EXISTS)
441 #else /* MACH_PAGEMAP */
443 * If the MACH page map optimization is not enabled,
444 * LOOK_FOR() always evaluates to TRUE. The pager will always be
445 * invoked to resolve missing pages in an object, assuming the pager
446 * has been created for the object. In a clustered page operation, the
447 * absence of a page on backing backing store cannot be used to terminate
448 * a scan for adjacent pages since that information is available only in
449 * the pager. Hence pages that may not be paged out are potentially
450 * included in a clustered request. The vnode pager is coded to deal
451 * with any combination of absent/present pages in a clustered
452 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
453 * will always be invoked to push a dirty page into a copy object assuming
454 * a pager has been created. If the page has already been pushed, the
455 * pager will ingore the new request.
457 #define LOOK_FOR(o, f) TRUE
458 #define PAGED_OUT(o, f) FALSE
459 #endif /* MACH_PAGEMAP */
464 #define PREPARE_RELEASE_PAGE(m) \
466 vm_page_lock_queues(); \
469 #define DO_RELEASE_PAGE(m) \
471 PAGE_WAKEUP_DONE(m); \
472 if (!m->active && !m->inactive) \
473 vm_page_activate(m); \
474 vm_page_unlock_queues(); \
477 #define RELEASE_PAGE(m) \
479 PREPARE_RELEASE_PAGE(m); \
480 DO_RELEASE_PAGE(m); \
484 dbgTrace(0xBEEF0002, (unsigned int) first_object
, (unsigned int) first_offset
); /* (TEST/DEBUG) */
489 #if !VM_FAULT_STATIC_CONFIG
490 if (vm_fault_dirty_handling
493 * If there are watchpoints set, then
494 * we don't want to give away write permission
495 * on a read fault. Make the task write fault,
496 * so that the watchpoint code notices the access.
498 || db_watchpoint_list
499 #endif /* MACH_KDB */
502 * If we aren't asking for write permission,
503 * then don't give it away. We're using write
504 * faults to set the dirty bit.
506 if (!(fault_type
& VM_PROT_WRITE
))
507 *protection
&= ~VM_PROT_WRITE
;
510 if (!vm_fault_interruptible
)
511 interruptible
= THREAD_UNINT
;
512 #else /* STATIC_CONFIG */
515 * If there are watchpoints set, then
516 * we don't want to give away write permission
517 * on a read fault. Make the task write fault,
518 * so that the watchpoint code notices the access.
520 if (db_watchpoint_list
) {
522 * If we aren't asking for write permission,
523 * then don't give it away. We're using write
524 * faults to set the dirty bit.
526 if (!(fault_type
& VM_PROT_WRITE
))
527 *protection
&= ~VM_PROT_WRITE
;
530 #endif /* MACH_KDB */
531 #endif /* STATIC_CONFIG */
533 interruptible_state
= thread_interrupt_level(interruptible
);
536 * INVARIANTS (through entire routine):
538 * 1) At all times, we must either have the object
539 * lock or a busy page in some object to prevent
540 * some other thread from trying to bring in
543 * Note that we cannot hold any locks during the
544 * pager access or when waiting for memory, so
545 * we use a busy page then.
547 * Note also that we aren't as concerned about more than
548 * one thread attempting to memory_object_data_unlock
549 * the same page at once, so we don't hold the page
550 * as busy then, but do record the highest unlock
551 * value so far. [Unlock requests may also be delivered
554 * 2) To prevent another thread from racing us down the
555 * shadow chain and entering a new page in the top
556 * object before we do, we must keep a busy page in
557 * the top object while following the shadow chain.
559 * 3) We must increment paging_in_progress on any object
560 * for which we have a busy page
562 * 4) We leave busy pages on the pageout queues.
563 * If the pageout daemon comes across a busy page,
564 * it will remove the page from the pageout queues.
568 * Search for the page at object/offset.
571 object
= first_object
;
572 offset
= first_offset
;
573 first_m
= VM_PAGE_NULL
;
574 access_required
= fault_type
;
577 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
578 (integer_t
)object
, offset
, fault_type
, *protection
, 0);
581 * See whether this page is resident
586 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
588 if (!object
->alive
) {
589 vm_fault_cleanup(object
, first_m
);
590 thread_interrupt_level(interruptible_state
);
591 return(VM_FAULT_MEMORY_ERROR
);
593 m
= vm_page_lookup(object
, offset
);
595 dbgTrace(0xBEEF0004, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
597 if (m
!= VM_PAGE_NULL
) {
599 * If the page was pre-paged as part of a
600 * cluster, record the fact.
601 * If we were passed a valid pointer for
602 * "type_of_fault", than we came from
603 * vm_fault... we'll let it deal with
604 * this condition, since it
605 * needs to see m->clustered to correctly
606 * account the pageins.
608 if (type_of_fault
== NULL
&& m
->clustered
) {
609 vm_pagein_cluster_used
++;
610 m
->clustered
= FALSE
;
614 * If the page is being brought in,
615 * wait for it and then retry.
617 * A possible optimization: if the page
618 * is known to be resident, we can ignore
619 * pages that are absent (regardless of
620 * whether they're busy).
625 dbgTrace(0xBEEF0005, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
627 wait_result
= PAGE_SLEEP(object
, m
, interruptible
);
629 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
630 (integer_t
)object
, offset
,
632 counter(c_vm_fault_page_block_busy_kernel
++);
634 if (wait_result
!= THREAD_AWAKENED
) {
635 vm_fault_cleanup(object
, first_m
);
636 thread_interrupt_level(interruptible_state
);
637 if (wait_result
== THREAD_RESTART
)
639 return(VM_FAULT_RETRY
);
643 return(VM_FAULT_INTERRUPTED
);
652 * the user needs access to a page that we
653 * encrypted before paging it out.
654 * Decrypt the page now.
655 * Keep it busy to prevent anyone from
656 * accessing it during the decryption.
659 vm_page_decrypt(m
, 0);
660 assert(object
== m
->object
);
665 * Retry from the top, in case
666 * something changed while we were
671 ASSERT_PAGE_DECRYPTED(m
);
674 * If the page is in error, give up now.
679 dbgTrace(0xBEEF0006, (unsigned int) m
, (unsigned int) error_code
); /* (TEST/DEBUG) */
682 *error_code
= m
->page_error
;
684 vm_fault_cleanup(object
, first_m
);
685 thread_interrupt_level(interruptible_state
);
686 return(VM_FAULT_MEMORY_ERROR
);
690 * If the pager wants us to restart
691 * at the top of the chain,
692 * typically because it has moved the
693 * page to another pager, then do so.
698 dbgTrace(0xBEEF0007, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
701 vm_fault_cleanup(object
, first_m
);
702 thread_interrupt_level(interruptible_state
);
703 return(VM_FAULT_RETRY
);
707 * If the page isn't busy, but is absent,
708 * then it was deemed "unavailable".
713 * Remove the non-existent page (unless it's
714 * in the top object) and move on down to the
715 * next object (if there is one).
718 dbgTrace(0xBEEF0008, (unsigned int) m
, (unsigned int) object
->shadow
); /* (TEST/DEBUG) */
721 next_object
= object
->shadow
;
722 if (next_object
== VM_OBJECT_NULL
) {
725 assert(!must_be_resident
);
727 if (object
->shadow_severed
) {
730 thread_interrupt_level(interruptible_state
);
731 return VM_FAULT_MEMORY_ERROR
;
735 * Absent page at bottom of shadow
736 * chain; zero fill the page we left
737 * busy in the first object, and flush
738 * the absent page. But first we
739 * need to allocate a real page.
741 if (VM_PAGE_THROTTLED() ||
742 (real_m
= vm_page_grab())
746 thread_interrupt_level(
747 interruptible_state
);
749 VM_FAULT_MEMORY_SHORTAGE
);
753 * are we protecting the system from
754 * backing store exhaustion. If so
755 * sleep unless we are privileged.
758 if(vm_backing_store_low
) {
759 if(!(current_task()->priv_flags
760 & VM_BACKING_STORE_PRIV
)) {
761 assert_wait((event_t
)
762 &vm_backing_store_low
,
764 vm_fault_cleanup(object
,
766 thread_block(THREAD_CONTINUE_NULL
);
767 thread_interrupt_level(
768 interruptible_state
);
769 return(VM_FAULT_RETRY
);
775 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
776 (integer_t
)object
, offset
,
778 (integer_t
)first_object
, 0);
779 if (object
!= first_object
) {
781 vm_object_paging_end(object
);
782 vm_object_unlock(object
);
783 object
= first_object
;
784 offset
= first_offset
;
786 first_m
= VM_PAGE_NULL
;
787 vm_object_lock(object
);
791 assert(real_m
->busy
);
792 vm_page_insert(real_m
, object
, offset
);
796 * Drop the lock while zero filling
797 * page. Then break because this
798 * is the page we wanted. Checking
799 * the page lock is a waste of time;
800 * this page was either absent or
801 * newly allocated -- in both cases
802 * it can't be page locked by a pager.
807 vm_object_unlock(object
);
808 vm_page_zero_fill(m
);
809 vm_object_lock(object
);
812 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
813 VM_STAT(zero_fill_count
++);
815 if (bumped_pagein
== TRUE
) {
817 current_task()->pageins
--;
819 vm_page_lock_queues();
820 VM_PAGE_QUEUES_REMOVE(m
);
821 m
->page_ticket
= vm_page_ticket
;
823 assert(m
->object
!= kernel_object
);
824 assert(m
->pageq
.next
== NULL
&&
825 m
->pageq
.prev
== NULL
);
826 if(m
->object
->size
> 0x200000) {
828 /* depends on the queues lock */
830 queue_enter(&vm_page_queue_zf
,
831 m
, vm_page_t
, pageq
);
834 &vm_page_queue_inactive
,
835 m
, vm_page_t
, pageq
);
837 vm_page_ticket_roll
++;
838 if(vm_page_ticket_roll
==
839 VM_PAGE_TICKETS_IN_ROLL
) {
840 vm_page_ticket_roll
= 0;
842 VM_PAGE_TICKET_ROLL_IDS
)
848 vm_page_inactive_count
++;
849 vm_page_unlock_queues();
852 if (must_be_resident
) {
853 vm_object_paging_end(object
);
854 } else if (object
!= first_object
) {
855 vm_object_paging_end(object
);
861 vm_object_absent_release(object
);
864 vm_page_lock_queues();
865 VM_PAGE_QUEUES_REMOVE(m
);
866 vm_page_unlock_queues();
869 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
870 (integer_t
)object
, offset
,
871 (integer_t
)next_object
,
872 offset
+object
->shadow_offset
,0);
873 offset
+= object
->shadow_offset
;
874 hi_offset
+= object
->shadow_offset
;
875 lo_offset
+= object
->shadow_offset
;
876 access_required
= VM_PROT_READ
;
877 vm_object_lock(next_object
);
878 vm_object_unlock(object
);
879 object
= next_object
;
880 vm_object_paging_begin(object
);
886 && ((object
!= first_object
) ||
887 (object
->copy
!= VM_OBJECT_NULL
))
888 && (fault_type
& VM_PROT_WRITE
)) {
890 * This is a copy-on-write fault that will
891 * cause us to revoke access to this page, but
892 * this page is in the process of being cleaned
893 * in a clustered pageout. We must wait until
894 * the cleaning operation completes before
895 * revoking access to the original page,
896 * otherwise we might attempt to remove a
900 dbgTrace(0xBEEF0009, (unsigned int) m
, (unsigned int) offset
); /* (TEST/DEBUG) */
903 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
904 (integer_t
)object
, offset
,
906 /* take an extra ref so that object won't die */
907 assert(object
->ref_count
> 0);
909 vm_object_res_reference(object
);
910 vm_fault_cleanup(object
, first_m
);
911 counter(c_vm_fault_page_block_backoff_kernel
++);
912 vm_object_lock(object
);
913 assert(object
->ref_count
> 0);
914 m
= vm_page_lookup(object
, offset
);
915 if (m
!= VM_PAGE_NULL
&& m
->cleaning
) {
916 PAGE_ASSERT_WAIT(m
, interruptible
);
917 vm_object_unlock(object
);
918 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
919 vm_object_deallocate(object
);
922 vm_object_unlock(object
);
923 vm_object_deallocate(object
);
924 thread_interrupt_level(interruptible_state
);
925 return VM_FAULT_RETRY
;
930 * If the desired access to this page has
931 * been locked out, request that it be unlocked.
934 if (access_required
& m
->page_lock
) {
935 if ((access_required
& m
->unlock_request
) != access_required
) {
936 vm_prot_t new_unlock_request
;
940 dbgTrace(0xBEEF000A, (unsigned int) m
, (unsigned int) object
->pager_ready
); /* (TEST/DEBUG) */
942 if (!object
->pager_ready
) {
944 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
946 (integer_t
)object
, offset
,
948 /* take an extra ref */
949 assert(object
->ref_count
> 0);
951 vm_object_res_reference(object
);
952 vm_fault_cleanup(object
,
954 counter(c_vm_fault_page_block_backoff_kernel
++);
955 vm_object_lock(object
);
956 assert(object
->ref_count
> 0);
957 if (!object
->pager_ready
) {
958 wait_result
= vm_object_assert_wait(
960 VM_OBJECT_EVENT_PAGER_READY
,
962 vm_object_unlock(object
);
963 if (wait_result
== THREAD_WAITING
)
964 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
965 vm_object_deallocate(object
);
968 vm_object_unlock(object
);
969 vm_object_deallocate(object
);
970 thread_interrupt_level(interruptible_state
);
971 return VM_FAULT_RETRY
;
975 new_unlock_request
= m
->unlock_request
=
976 (access_required
| m
->unlock_request
);
977 vm_object_unlock(object
);
979 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
980 (integer_t
)object
, offset
,
981 (integer_t
)m
, new_unlock_request
, 0);
982 if ((rc
= memory_object_data_unlock(
984 offset
+ object
->paging_offset
,
989 printf("vm_fault: memory_object_data_unlock failed\n");
990 vm_object_lock(object
);
991 vm_fault_cleanup(object
, first_m
);
992 thread_interrupt_level(interruptible_state
);
993 return((rc
== MACH_SEND_INTERRUPTED
) ?
994 VM_FAULT_INTERRUPTED
:
995 VM_FAULT_MEMORY_ERROR
);
997 vm_object_lock(object
);
1002 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
1003 access_required
, (integer_t
)object
,
1004 offset
, (integer_t
)m
, 0);
1005 /* take an extra ref so object won't die */
1006 assert(object
->ref_count
> 0);
1007 object
->ref_count
++;
1008 vm_object_res_reference(object
);
1009 vm_fault_cleanup(object
, first_m
);
1010 counter(c_vm_fault_page_block_backoff_kernel
++);
1011 vm_object_lock(object
);
1012 assert(object
->ref_count
> 0);
1013 m
= vm_page_lookup(object
, offset
);
1014 if (m
!= VM_PAGE_NULL
&&
1015 (access_required
& m
->page_lock
) &&
1016 !((access_required
& m
->unlock_request
) != access_required
)) {
1017 PAGE_ASSERT_WAIT(m
, interruptible
);
1018 vm_object_unlock(object
);
1019 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1020 vm_object_deallocate(object
);
1023 vm_object_unlock(object
);
1024 vm_object_deallocate(object
);
1025 thread_interrupt_level(interruptible_state
);
1026 return VM_FAULT_RETRY
;
1030 * We mark the page busy and leave it on
1031 * the pageout queues. If the pageout
1032 * deamon comes across it, then it will
1037 dbgTrace(0xBEEF000B, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1040 #if !VM_FAULT_STATIC_CONFIG
1041 if (!software_reference_bits
) {
1042 vm_page_lock_queues();
1044 vm_stat
.reactivations
++;
1046 VM_PAGE_QUEUES_REMOVE(m
);
1047 vm_page_unlock_queues();
1051 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
1052 (integer_t
)object
, offset
, (integer_t
)m
, 0, 0);
1060 (object
->pager_created
) &&
1061 LOOK_FOR(object
, offset
) &&
1065 dbgTrace(0xBEEF000C, (unsigned int) look_for_page
, (unsigned int) object
); /* (TEST/DEBUG) */
1067 if ((look_for_page
|| (object
== first_object
))
1068 && !must_be_resident
1069 && !(object
->phys_contiguous
)) {
1071 * Allocate a new page for this object/offset
1075 m
= vm_page_grab_fictitious();
1077 dbgTrace(0xBEEF000D, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
1079 if (m
== VM_PAGE_NULL
) {
1080 vm_fault_cleanup(object
, first_m
);
1081 thread_interrupt_level(interruptible_state
);
1082 return(VM_FAULT_FICTITIOUS_SHORTAGE
);
1084 vm_page_insert(m
, object
, offset
);
1087 if ((look_for_page
&& !must_be_resident
)) {
1091 * If the memory manager is not ready, we
1092 * cannot make requests.
1094 if (!object
->pager_ready
) {
1096 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
1098 if(m
!= VM_PAGE_NULL
)
1101 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
1102 (integer_t
)object
, offset
, 0, 0, 0);
1103 /* take an extra ref so object won't die */
1104 assert(object
->ref_count
> 0);
1105 object
->ref_count
++;
1106 vm_object_res_reference(object
);
1107 vm_fault_cleanup(object
, first_m
);
1108 counter(c_vm_fault_page_block_backoff_kernel
++);
1109 vm_object_lock(object
);
1110 assert(object
->ref_count
> 0);
1111 if (!object
->pager_ready
) {
1112 wait_result
= vm_object_assert_wait(object
,
1113 VM_OBJECT_EVENT_PAGER_READY
,
1115 vm_object_unlock(object
);
1116 if (wait_result
== THREAD_WAITING
)
1117 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1118 vm_object_deallocate(object
);
1121 vm_object_unlock(object
);
1122 vm_object_deallocate(object
);
1123 thread_interrupt_level(interruptible_state
);
1124 return VM_FAULT_RETRY
;
1128 if(object
->phys_contiguous
) {
1129 if(m
!= VM_PAGE_NULL
) {
1135 if (object
->internal
) {
1137 * Requests to the default pager
1138 * must reserve a real page in advance,
1139 * because the pager's data-provided
1140 * won't block for pages. IMPORTANT:
1141 * this acts as a throttling mechanism
1142 * for data_requests to the default
1147 dbgTrace(0xBEEF000F, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1149 if (m
->fictitious
&& !vm_page_convert(m
)) {
1151 vm_fault_cleanup(object
, first_m
);
1152 thread_interrupt_level(interruptible_state
);
1153 return(VM_FAULT_MEMORY_SHORTAGE
);
1155 } else if (object
->absent_count
>
1156 vm_object_absent_max
) {
1158 * If there are too many outstanding page
1159 * requests pending on this object, we
1160 * wait for them to be resolved now.
1164 dbgTrace(0xBEEF0010, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1166 if(m
!= VM_PAGE_NULL
)
1168 /* take an extra ref so object won't die */
1169 assert(object
->ref_count
> 0);
1170 object
->ref_count
++;
1171 vm_object_res_reference(object
);
1172 vm_fault_cleanup(object
, first_m
);
1173 counter(c_vm_fault_page_block_backoff_kernel
++);
1174 vm_object_lock(object
);
1175 assert(object
->ref_count
> 0);
1176 if (object
->absent_count
> vm_object_absent_max
) {
1177 vm_object_absent_assert_wait(object
,
1179 vm_object_unlock(object
);
1180 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1181 vm_object_deallocate(object
);
1184 vm_object_unlock(object
);
1185 vm_object_deallocate(object
);
1186 thread_interrupt_level(interruptible_state
);
1187 return VM_FAULT_RETRY
;
1192 * Indicate that the page is waiting for data
1193 * from the memory manager.
1196 if(m
!= VM_PAGE_NULL
) {
1198 m
->list_req_pending
= TRUE
;
1201 object
->absent_count
++;
1206 cluster_start
= offset
;
1210 * lengthen the cluster by the pages in the working set
1213 (current_task()->dynamic_working_set
!= 0)) {
1214 cluster_end
= cluster_start
+ length
;
1215 /* tws values for start and end are just a
1216 * suggestions. Therefore, as long as
1217 * build_cluster does not use pointers or
1218 * take action based on values that
1219 * could be affected by re-entrance we
1220 * do not need to take the map lock.
1222 cluster_end
= offset
+ PAGE_SIZE_64
;
1224 current_task()->dynamic_working_set
,
1225 object
, &cluster_start
,
1226 &cluster_end
, 0x40000);
1227 length
= cluster_end
- cluster_start
;
1230 dbgTrace(0xBEEF0012, (unsigned int) object
, (unsigned int) 0); /* (TEST/DEBUG) */
1233 * We have a busy page, so we can
1234 * release the object lock.
1236 vm_object_unlock(object
);
1239 * Call the memory manager to retrieve the data.
1243 *type_of_fault
= ((int)length
<< 8) | DBG_PAGEIN_FAULT
;
1245 current_task()->pageins
++;
1246 bumped_pagein
= TRUE
;
1249 * If this object uses a copy_call strategy,
1250 * and we are interested in a copy of this object
1251 * (having gotten here only by following a
1252 * shadow chain), then tell the memory manager
1253 * via a flag added to the desired_access
1254 * parameter, so that it can detect a race
1255 * between our walking down the shadow chain
1256 * and its pushing pages up into a copy of
1257 * the object that it manages.
1260 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_CALL
&&
1261 object
!= first_object
) {
1262 wants_copy_flag
= VM_PROT_WANTS_COPY
;
1264 wants_copy_flag
= VM_PROT_NONE
;
1268 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1269 (integer_t
)object
, offset
, (integer_t
)m
,
1270 access_required
| wants_copy_flag
, 0);
1272 rc
= memory_object_data_request(object
->pager
,
1273 cluster_start
+ object
->paging_offset
,
1275 access_required
| wants_copy_flag
);
1279 dbgTrace(0xBEEF0013, (unsigned int) object
, (unsigned int) rc
); /* (TEST/DEBUG) */
1281 if (rc
!= KERN_SUCCESS
) {
1282 if (rc
!= MACH_SEND_INTERRUPTED
1284 printf("%s(0x%x, 0x%xll, 0x%xll, 0x%x) failed, rc=%d\n",
1285 "memory_object_data_request",
1287 cluster_start
+ object
->paging_offset
,
1288 length
, access_required
, rc
);
1290 * Don't want to leave a busy page around,
1291 * but the data request may have blocked,
1292 * so check if it's still there and busy.
1294 if(!object
->phys_contiguous
) {
1295 vm_object_lock(object
);
1296 for (; length
; length
-= PAGE_SIZE
,
1297 cluster_start
+= PAGE_SIZE_64
) {
1299 if ((p
= vm_page_lookup(object
,
1301 && p
->absent
&& p
->busy
1307 vm_fault_cleanup(object
, first_m
);
1308 thread_interrupt_level(interruptible_state
);
1309 return((rc
== MACH_SEND_INTERRUPTED
) ?
1310 VM_FAULT_INTERRUPTED
:
1311 VM_FAULT_MEMORY_ERROR
);
1314 vm_object_lock(object
);
1315 if ((interruptible
!= THREAD_UNINT
) &&
1316 (current_thread()->state
& TH_ABORT
)) {
1317 vm_fault_cleanup(object
, first_m
);
1318 thread_interrupt_level(interruptible_state
);
1319 return(VM_FAULT_INTERRUPTED
);
1321 if (m
== VM_PAGE_NULL
&&
1322 object
->phys_contiguous
) {
1324 * No page here means that the object we
1325 * initially looked up was "physically
1326 * contiguous" (i.e. device memory). However,
1327 * with Virtual VRAM, the object might not
1328 * be backed by that device memory anymore,
1329 * so we're done here only if the object is
1330 * still "phys_contiguous".
1331 * Otherwise, if the object is no longer
1332 * "phys_contiguous", we need to retry the
1333 * page fault against the object's new backing
1334 * store (different memory object).
1340 * Retry with same object/offset, since new data may
1341 * be in a different page (i.e., m is meaningless at
1348 * The only case in which we get here is if
1349 * object has no pager (or unwiring). If the pager doesn't
1350 * have the page this is handled in the m->absent case above
1351 * (and if you change things here you should look above).
1354 dbgTrace(0xBEEF0014, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1356 if (object
== first_object
)
1359 assert(m
== VM_PAGE_NULL
);
1362 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1363 (integer_t
)object
, offset
, (integer_t
)m
,
1364 (integer_t
)object
->shadow
, 0);
1366 * Move on to the next object. Lock the next
1367 * object before unlocking the current one.
1369 next_object
= object
->shadow
;
1370 if (next_object
== VM_OBJECT_NULL
) {
1371 assert(!must_be_resident
);
1373 * If there's no object left, fill the page
1374 * in the top object with zeros. But first we
1375 * need to allocate a real page.
1378 if (object
!= first_object
) {
1379 vm_object_paging_end(object
);
1380 vm_object_unlock(object
);
1382 object
= first_object
;
1383 offset
= first_offset
;
1384 vm_object_lock(object
);
1388 assert(m
->object
== object
);
1389 first_m
= VM_PAGE_NULL
;
1391 if(m
== VM_PAGE_NULL
) {
1393 if (m
== VM_PAGE_NULL
) {
1395 object
, VM_PAGE_NULL
);
1396 thread_interrupt_level(
1397 interruptible_state
);
1398 return(VM_FAULT_MEMORY_SHORTAGE
);
1404 if (object
->shadow_severed
) {
1406 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1407 thread_interrupt_level(interruptible_state
);
1408 return VM_FAULT_MEMORY_ERROR
;
1412 * are we protecting the system from
1413 * backing store exhaustion. If so
1414 * sleep unless we are privileged.
1417 if(vm_backing_store_low
) {
1418 if(!(current_task()->priv_flags
1419 & VM_BACKING_STORE_PRIV
)) {
1420 assert_wait((event_t
)
1421 &vm_backing_store_low
,
1424 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1425 thread_block(THREAD_CONTINUE_NULL
);
1426 thread_interrupt_level(
1427 interruptible_state
);
1428 return(VM_FAULT_RETRY
);
1432 if (VM_PAGE_THROTTLED() ||
1433 (m
->fictitious
&& !vm_page_convert(m
))) {
1435 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1436 thread_interrupt_level(interruptible_state
);
1437 return(VM_FAULT_MEMORY_SHORTAGE
);
1439 m
->no_isync
= FALSE
;
1441 if (!no_zero_fill
) {
1442 vm_object_unlock(object
);
1443 vm_page_zero_fill(m
);
1444 vm_object_lock(object
);
1447 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
1448 VM_STAT(zero_fill_count
++);
1450 if (bumped_pagein
== TRUE
) {
1452 current_task()->pageins
--;
1454 vm_page_lock_queues();
1455 VM_PAGE_QUEUES_REMOVE(m
);
1456 assert(!m
->laundry
);
1457 assert(m
->object
!= kernel_object
);
1458 assert(m
->pageq
.next
== NULL
&&
1459 m
->pageq
.prev
== NULL
);
1460 if(m
->object
->size
> 0x200000) {
1461 m
->zero_fill
= TRUE
;
1462 /* depends on the queues lock */
1464 queue_enter(&vm_page_queue_zf
,
1465 m
, vm_page_t
, pageq
);
1468 &vm_page_queue_inactive
,
1469 m
, vm_page_t
, pageq
);
1471 m
->page_ticket
= vm_page_ticket
;
1472 vm_page_ticket_roll
++;
1473 if(vm_page_ticket_roll
== VM_PAGE_TICKETS_IN_ROLL
) {
1474 vm_page_ticket_roll
= 0;
1475 if(vm_page_ticket
==
1476 VM_PAGE_TICKET_ROLL_IDS
)
1482 vm_page_inactive_count
++;
1483 vm_page_unlock_queues();
1485 pmap_clear_modify(m
->phys_page
);
1490 if ((object
!= first_object
) || must_be_resident
)
1491 vm_object_paging_end(object
);
1492 offset
+= object
->shadow_offset
;
1493 hi_offset
+= object
->shadow_offset
;
1494 lo_offset
+= object
->shadow_offset
;
1495 access_required
= VM_PROT_READ
;
1496 vm_object_lock(next_object
);
1497 vm_object_unlock(object
);
1498 object
= next_object
;
1499 vm_object_paging_begin(object
);
1504 * PAGE HAS BEEN FOUND.
1507 * busy, so that we can play with it;
1508 * not absent, so that nobody else will fill it;
1509 * possibly eligible for pageout;
1511 * The top-level page (first_m) is:
1512 * VM_PAGE_NULL if the page was found in the
1514 * busy, not absent, and ineligible for pageout.
1516 * The current object (object) is locked. A paging
1517 * reference is held for the current and top-level
1522 dbgTrace(0xBEEF0015, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1524 #if EXTRA_ASSERTIONS
1525 if(m
!= VM_PAGE_NULL
) {
1526 assert(m
->busy
&& !m
->absent
);
1527 assert((first_m
== VM_PAGE_NULL
) ||
1528 (first_m
->busy
&& !first_m
->absent
&&
1529 !first_m
->active
&& !first_m
->inactive
));
1531 #endif /* EXTRA_ASSERTIONS */
1535 * If we found a page, we must have decrypted it before we
1538 if (m
!= VM_PAGE_NULL
) {
1539 ASSERT_PAGE_DECRYPTED(m
);
1543 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1544 (integer_t
)object
, offset
, (integer_t
)m
,
1545 (integer_t
)first_object
, (integer_t
)first_m
);
1547 * If the page is being written, but isn't
1548 * already owned by the top-level object,
1549 * we have to copy it into a new page owned
1550 * by the top-level object.
1553 if ((object
!= first_object
) && (m
!= VM_PAGE_NULL
)) {
1555 * We only really need to copy if we
1560 dbgTrace(0xBEEF0016, (unsigned int) object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1562 if (fault_type
& VM_PROT_WRITE
) {
1565 assert(!must_be_resident
);
1568 * are we protecting the system from
1569 * backing store exhaustion. If so
1570 * sleep unless we are privileged.
1573 if(vm_backing_store_low
) {
1574 if(!(current_task()->priv_flags
1575 & VM_BACKING_STORE_PRIV
)) {
1576 assert_wait((event_t
)
1577 &vm_backing_store_low
,
1580 vm_fault_cleanup(object
, first_m
);
1581 thread_block(THREAD_CONTINUE_NULL
);
1582 thread_interrupt_level(
1583 interruptible_state
);
1584 return(VM_FAULT_RETRY
);
1589 * If we try to collapse first_object at this
1590 * point, we may deadlock when we try to get
1591 * the lock on an intermediate object (since we
1592 * have the bottom object locked). We can't
1593 * unlock the bottom object, because the page
1594 * we found may move (by collapse) if we do.
1596 * Instead, we first copy the page. Then, when
1597 * we have no more use for the bottom object,
1598 * we unlock it and try to collapse.
1600 * Note that we copy the page even if we didn't
1601 * need to... that's the breaks.
1605 * Allocate a page for the copy
1607 copy_m
= vm_page_grab();
1608 if (copy_m
== VM_PAGE_NULL
) {
1610 vm_fault_cleanup(object
, first_m
);
1611 thread_interrupt_level(interruptible_state
);
1612 return(VM_FAULT_MEMORY_SHORTAGE
);
1617 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1618 (integer_t
)object
, offset
,
1619 (integer_t
)m
, (integer_t
)copy_m
, 0);
1620 vm_page_copy(m
, copy_m
);
1623 * If another map is truly sharing this
1624 * page with us, we have to flush all
1625 * uses of the original page, since we
1626 * can't distinguish those which want the
1627 * original from those which need the
1630 * XXXO If we know that only one map has
1631 * access to this page, then we could
1632 * avoid the pmap_disconnect() call.
1635 vm_page_lock_queues();
1636 assert(!m
->cleaning
);
1637 pmap_disconnect(m
->phys_page
);
1638 vm_page_deactivate(m
);
1639 copy_m
->dirty
= TRUE
;
1641 * Setting reference here prevents this fault from
1642 * being counted as a (per-thread) reactivate as well
1643 * as a copy-on-write.
1645 first_m
->reference
= TRUE
;
1646 vm_page_unlock_queues();
1649 * We no longer need the old page or object.
1652 PAGE_WAKEUP_DONE(m
);
1653 vm_object_paging_end(object
);
1654 vm_object_unlock(object
);
1657 *type_of_fault
= DBG_COW_FAULT
;
1658 VM_STAT(cow_faults
++);
1659 current_task()->cow_faults
++;
1660 object
= first_object
;
1661 offset
= first_offset
;
1663 vm_object_lock(object
);
1664 VM_PAGE_FREE(first_m
);
1665 first_m
= VM_PAGE_NULL
;
1666 assert(copy_m
->busy
);
1667 vm_page_insert(copy_m
, object
, offset
);
1671 * Now that we've gotten the copy out of the
1672 * way, let's try to collapse the top object.
1673 * But we have to play ugly games with
1674 * paging_in_progress to do that...
1677 vm_object_paging_end(object
);
1678 vm_object_collapse(object
, offset
);
1679 vm_object_paging_begin(object
);
1683 *protection
&= (~VM_PROT_WRITE
);
1688 * Now check whether the page needs to be pushed into the
1689 * copy object. The use of asymmetric copy on write for
1690 * shared temporary objects means that we may do two copies to
1691 * satisfy the fault; one above to get the page from a
1692 * shadowed object, and one here to push it into the copy.
1695 while ((copy_object
= first_object
->copy
) != VM_OBJECT_NULL
&&
1696 (m
!= VM_PAGE_NULL
)) {
1697 vm_object_offset_t copy_offset
;
1701 dbgTrace(0xBEEF0017, (unsigned int) copy_object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1704 * If the page is being written, but hasn't been
1705 * copied to the copy-object, we have to copy it there.
1708 if ((fault_type
& VM_PROT_WRITE
) == 0) {
1709 *protection
&= ~VM_PROT_WRITE
;
1714 * If the page was guaranteed to be resident,
1715 * we must have already performed the copy.
1718 if (must_be_resident
)
1722 * Try to get the lock on the copy_object.
1724 if (!vm_object_lock_try(copy_object
)) {
1725 vm_object_unlock(object
);
1727 mutex_pause(); /* wait a bit */
1729 vm_object_lock(object
);
1734 * Make another reference to the copy-object,
1735 * to keep it from disappearing during the
1738 assert(copy_object
->ref_count
> 0);
1739 copy_object
->ref_count
++;
1740 VM_OBJ_RES_INCR(copy_object
);
1743 * Does the page exist in the copy?
1745 copy_offset
= first_offset
- copy_object
->shadow_offset
;
1746 if (copy_object
->size
<= copy_offset
)
1748 * Copy object doesn't cover this page -- do nothing.
1752 vm_page_lookup(copy_object
, copy_offset
)) != VM_PAGE_NULL
) {
1753 /* Page currently exists in the copy object */
1756 * If the page is being brought
1757 * in, wait for it and then retry.
1760 /* take an extra ref so object won't die */
1761 assert(copy_object
->ref_count
> 0);
1762 copy_object
->ref_count
++;
1763 vm_object_res_reference(copy_object
);
1764 vm_object_unlock(copy_object
);
1765 vm_fault_cleanup(object
, first_m
);
1766 counter(c_vm_fault_page_block_backoff_kernel
++);
1767 vm_object_lock(copy_object
);
1768 assert(copy_object
->ref_count
> 0);
1769 VM_OBJ_RES_DECR(copy_object
);
1770 copy_object
->ref_count
--;
1771 assert(copy_object
->ref_count
> 0);
1772 copy_m
= vm_page_lookup(copy_object
, copy_offset
);
1775 * it's OK if the "copy_m" page is encrypted,
1776 * because we're not moving it nor handling its
1779 if (copy_m
!= VM_PAGE_NULL
&& copy_m
->busy
) {
1780 PAGE_ASSERT_WAIT(copy_m
, interruptible
);
1781 vm_object_unlock(copy_object
);
1782 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1783 vm_object_deallocate(copy_object
);
1786 vm_object_unlock(copy_object
);
1787 vm_object_deallocate(copy_object
);
1788 thread_interrupt_level(interruptible_state
);
1789 return VM_FAULT_RETRY
;
1793 else if (!PAGED_OUT(copy_object
, copy_offset
)) {
1795 * If PAGED_OUT is TRUE, then the page used to exist
1796 * in the copy-object, and has already been paged out.
1797 * We don't need to repeat this. If PAGED_OUT is
1798 * FALSE, then either we don't know (!pager_created,
1799 * for example) or it hasn't been paged out.
1800 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1801 * We must copy the page to the copy object.
1805 * are we protecting the system from
1806 * backing store exhaustion. If so
1807 * sleep unless we are privileged.
1810 if(vm_backing_store_low
) {
1811 if(!(current_task()->priv_flags
1812 & VM_BACKING_STORE_PRIV
)) {
1813 assert_wait((event_t
)
1814 &vm_backing_store_low
,
1817 VM_OBJ_RES_DECR(copy_object
);
1818 copy_object
->ref_count
--;
1819 assert(copy_object
->ref_count
> 0);
1820 vm_object_unlock(copy_object
);
1821 vm_fault_cleanup(object
, first_m
);
1822 thread_block(THREAD_CONTINUE_NULL
);
1823 thread_interrupt_level(
1824 interruptible_state
);
1825 return(VM_FAULT_RETRY
);
1830 * Allocate a page for the copy
1832 copy_m
= vm_page_alloc(copy_object
, copy_offset
);
1833 if (copy_m
== VM_PAGE_NULL
) {
1835 VM_OBJ_RES_DECR(copy_object
);
1836 copy_object
->ref_count
--;
1837 assert(copy_object
->ref_count
> 0);
1838 vm_object_unlock(copy_object
);
1839 vm_fault_cleanup(object
, first_m
);
1840 thread_interrupt_level(interruptible_state
);
1841 return(VM_FAULT_MEMORY_SHORTAGE
);
1845 * Must copy page into copy-object.
1848 vm_page_copy(m
, copy_m
);
1851 * If the old page was in use by any users
1852 * of the copy-object, it must be removed
1853 * from all pmaps. (We can't know which
1857 vm_page_lock_queues();
1858 assert(!m
->cleaning
);
1859 pmap_disconnect(m
->phys_page
);
1860 copy_m
->dirty
= TRUE
;
1861 vm_page_unlock_queues();
1864 * If there's a pager, then immediately
1865 * page out this page, using the "initialize"
1866 * option. Else, we use the copy.
1871 ((!copy_object
->pager_created
) ||
1872 vm_external_state_get(
1873 copy_object
->existence_map
, copy_offset
)
1874 == VM_EXTERNAL_STATE_ABSENT
)
1876 (!copy_object
->pager_created
)
1879 vm_page_lock_queues();
1880 vm_page_activate(copy_m
);
1881 vm_page_unlock_queues();
1882 PAGE_WAKEUP_DONE(copy_m
);
1885 assert(copy_m
->busy
== TRUE
);
1888 * The page is already ready for pageout:
1889 * not on pageout queues and busy.
1890 * Unlock everything except the
1891 * copy_object itself.
1894 vm_object_unlock(object
);
1897 * Write the page to the copy-object,
1898 * flushing it from the kernel.
1901 vm_pageout_initialize_page(copy_m
);
1904 * Since the pageout may have
1905 * temporarily dropped the
1906 * copy_object's lock, we
1907 * check whether we'll have
1908 * to deallocate the hard way.
1911 if ((copy_object
->shadow
!= object
) ||
1912 (copy_object
->ref_count
== 1)) {
1913 vm_object_unlock(copy_object
);
1914 vm_object_deallocate(copy_object
);
1915 vm_object_lock(object
);
1920 * Pick back up the old object's
1921 * lock. [It is safe to do so,
1922 * since it must be deeper in the
1926 vm_object_lock(object
);
1930 * Because we're pushing a page upward
1931 * in the object tree, we must restart
1932 * any faults that are waiting here.
1933 * [Note that this is an expansion of
1934 * PAGE_WAKEUP that uses the THREAD_RESTART
1935 * wait result]. Can't turn off the page's
1936 * busy bit because we're not done with it.
1941 thread_wakeup_with_result((event_t
) m
,
1947 * The reference count on copy_object must be
1948 * at least 2: one for our extra reference,
1949 * and at least one from the outside world
1950 * (we checked that when we last locked
1953 copy_object
->ref_count
--;
1954 assert(copy_object
->ref_count
> 0);
1955 VM_OBJ_RES_DECR(copy_object
);
1956 vm_object_unlock(copy_object
);
1962 *top_page
= first_m
;
1965 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1966 (integer_t
)object
, offset
, (integer_t
)m
, (integer_t
)first_m
, 0);
1968 * If the page can be written, assume that it will be.
1969 * [Earlier, we restrict the permission to allow write
1970 * access only if the fault so required, so we don't
1971 * mark read-only data as dirty.]
1975 if(m
!= VM_PAGE_NULL
) {
1976 #if !VM_FAULT_STATIC_CONFIG
1977 if (vm_fault_dirty_handling
&& (*protection
& VM_PROT_WRITE
))
1980 if (vm_page_deactivate_behind
)
1981 vm_fault_deactivate_behind(object
, offset
, behavior
);
1983 vm_object_unlock(object
);
1985 thread_interrupt_level(interruptible_state
);
1988 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS
, 0); /* (TEST/DEBUG) */
1990 return(VM_FAULT_SUCCESS
);
1994 vm_fault_cleanup(object
, first_m
);
1996 counter(c_vm_fault_page_block_backoff_kernel
++);
1997 thread_block(THREAD_CONTINUE_NULL
);
2001 thread_interrupt_level(interruptible_state
);
2002 if (wait_result
== THREAD_INTERRUPTED
)
2003 return VM_FAULT_INTERRUPTED
;
2004 return VM_FAULT_RETRY
;
2010 * Routine: vm_fault_tws_insert
2012 * Add fault information to the task working set.
2014 * We always insert the base object/offset pair
2015 * rather the actual object/offset.
2017 * Map and real_map locked.
2018 * Object locked and referenced.
2020 * TRUE if startup file should be written.
2021 * With object locked and still referenced.
2022 * But we may drop the object lock temporarily.
2025 vm_fault_tws_insert(
2028 vm_map_offset_t vaddr
,
2030 vm_object_offset_t offset
)
2032 tws_hash_line_t line
;
2035 boolean_t result
= FALSE
;
2037 /* Avoid possible map lock deadlock issues */
2038 if (map
== kernel_map
|| map
== kalloc_map
||
2039 real_map
== kernel_map
|| real_map
== kalloc_map
)
2042 task
= current_task();
2043 if (task
->dynamic_working_set
!= 0) {
2044 vm_object_t base_object
;
2045 vm_object_t base_shadow
;
2046 vm_object_offset_t base_offset
;
2047 base_object
= object
;
2048 base_offset
= offset
;
2049 while ((base_shadow
= base_object
->shadow
)) {
2050 vm_object_lock(base_shadow
);
2051 vm_object_unlock(base_object
);
2053 base_object
->shadow_offset
;
2054 base_object
= base_shadow
;
2057 task
->dynamic_working_set
,
2058 base_offset
, base_object
,
2060 if (kr
== KERN_OPERATION_TIMED_OUT
){
2062 if (base_object
!= object
) {
2063 vm_object_unlock(base_object
);
2064 vm_object_lock(object
);
2066 } else if (kr
!= KERN_SUCCESS
) {
2067 if(base_object
!= object
)
2068 vm_object_reference_locked(base_object
);
2070 task
->dynamic_working_set
,
2071 base_offset
, base_object
,
2073 if(base_object
!= object
) {
2074 vm_object_unlock(base_object
);
2075 vm_object_deallocate(base_object
);
2077 if(kr
== KERN_NO_SPACE
) {
2078 if (base_object
== object
)
2079 vm_object_unlock(object
);
2080 tws_expand_working_set(
2081 task
->dynamic_working_set
,
2082 TWS_HASH_LINE_COUNT
,
2084 if (base_object
== object
)
2085 vm_object_lock(object
);
2086 } else if(kr
== KERN_OPERATION_TIMED_OUT
) {
2089 if(base_object
!= object
)
2090 vm_object_lock(object
);
2091 } else if (base_object
!= object
) {
2092 vm_object_unlock(base_object
);
2093 vm_object_lock(object
);
2102 * Handle page faults, including pseudo-faults
2103 * used to change the wiring status of pages.
2105 * Explicit continuations have been removed.
2107 * vm_fault and vm_fault_page save mucho state
2108 * in the moral equivalent of a closure. The state
2109 * structure is allocated when first entering vm_fault
2110 * and deallocated when leaving vm_fault.
2113 extern int _map_enter_debug
;
2118 vm_map_offset_t vaddr
,
2119 vm_prot_t fault_type
,
2120 boolean_t change_wiring
,
2123 vm_map_offset_t caller_pmap_addr
)
2125 vm_map_version_t version
; /* Map version for verificiation */
2126 boolean_t wired
; /* Should mapping be wired down? */
2127 vm_object_t object
; /* Top-level object */
2128 vm_object_offset_t offset
; /* Top-level offset */
2129 vm_prot_t prot
; /* Protection for mapping */
2130 vm_behavior_t behavior
; /* Expected paging behavior */
2131 vm_map_offset_t lo_offset
, hi_offset
;
2132 vm_object_t old_copy_object
; /* Saved copy object */
2133 vm_page_t result_page
; /* Result of vm_fault_page */
2134 vm_page_t top_page
; /* Placeholder page */
2138 vm_page_t m
; /* Fast access to result_page */
2139 kern_return_t error_code
= 0; /* page error reasons */
2141 vm_object_t cur_object
;
2143 vm_object_offset_t cur_offset
;
2145 vm_object_t new_object
;
2147 vm_map_t real_map
= map
;
2148 vm_map_t original_map
= map
;
2150 boolean_t interruptible_state
;
2151 unsigned int cache_attr
;
2152 int write_startup_file
= 0;
2153 boolean_t need_activation
;
2154 vm_prot_t full_fault_type
;
2156 if (get_preemption_level() != 0)
2157 return (KERN_FAILURE
);
2159 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_START
,
2166 /* at present we do not fully check for execute permission */
2167 /* we generally treat it is read except in certain device */
2168 /* memory settings */
2169 full_fault_type
= fault_type
;
2170 if(fault_type
& VM_PROT_EXECUTE
) {
2171 fault_type
&= ~VM_PROT_EXECUTE
;
2172 fault_type
|= VM_PROT_READ
;
2175 interruptible_state
= thread_interrupt_level(interruptible
);
2178 * assume we will hit a page in the cache
2179 * otherwise, explicitly override with
2180 * the real fault type once we determine it
2182 type_of_fault
= DBG_CACHE_HIT_FAULT
;
2185 current_task()->faults
++;
2190 * Find the backing store object and offset into
2191 * it to begin the search.
2194 vm_map_lock_read(map
);
2195 kr
= vm_map_lookup_locked(&map
, vaddr
, fault_type
, &version
,
2198 &behavior
, &lo_offset
, &hi_offset
, &real_map
);
2200 //if (_map_enter_debug)printf("vm_map_lookup_locked(map=0x%x, addr=0x%llx, prot=%d wired=%d) = %d\n", map, vaddr, prot, wired, kr);
2202 pmap
= real_map
->pmap
;
2204 if (kr
!= KERN_SUCCESS
) {
2205 vm_map_unlock_read(map
);
2210 * If the page is wired, we must fault for the current protection
2211 * value, to avoid further faults.
2215 fault_type
= prot
| VM_PROT_WRITE
;
2217 #if VM_FAULT_CLASSIFY
2219 * Temporary data gathering code
2221 vm_fault_classify(object
, offset
, fault_type
);
2224 * Fast fault code. The basic idea is to do as much as
2225 * possible while holding the map lock and object locks.
2226 * Busy pages are not used until the object lock has to
2227 * be dropped to do something (copy, zero fill, pmap enter).
2228 * Similarly, paging references aren't acquired until that
2229 * point, and object references aren't used.
2231 * If we can figure out what to do
2232 * (zero fill, copy on write, pmap enter) while holding
2233 * the locks, then it gets done. Otherwise, we give up,
2234 * and use the original fault path (which doesn't hold
2235 * the map lock, and relies on busy pages).
2236 * The give up cases include:
2237 * - Have to talk to pager.
2238 * - Page is busy, absent or in error.
2239 * - Pager has locked out desired access.
2240 * - Fault needs to be restarted.
2241 * - Have to push page into copy object.
2243 * The code is an infinite loop that moves one level down
2244 * the shadow chain each time. cur_object and cur_offset
2245 * refer to the current object being examined. object and offset
2246 * are the original object from the map. The loop is at the
2247 * top level if and only if object and cur_object are the same.
2249 * Invariants: Map lock is held throughout. Lock is held on
2250 * original object and cur_object (if different) when
2251 * continuing or exiting loop.
2257 * If this page is to be inserted in a copy delay object
2258 * for writing, and if the object has a copy, then the
2259 * copy delay strategy is implemented in the slow fault page.
2261 if (object
->copy_strategy
!= MEMORY_OBJECT_COPY_DELAY
||
2262 object
->copy
== VM_OBJECT_NULL
||
2263 (fault_type
& VM_PROT_WRITE
) == 0) {
2264 cur_object
= object
;
2265 cur_offset
= offset
;
2268 m
= vm_page_lookup(cur_object
, cur_offset
);
2269 if (m
!= VM_PAGE_NULL
) {
2271 wait_result_t result
;
2273 if (object
!= cur_object
)
2274 vm_object_unlock(object
);
2276 vm_map_unlock_read(map
);
2277 if (real_map
!= map
)
2278 vm_map_unlock(real_map
);
2280 #if !VM_FAULT_STATIC_CONFIG
2281 if (!vm_fault_interruptible
)
2282 interruptible
= THREAD_UNINT
;
2284 result
= PAGE_ASSERT_WAIT(m
, interruptible
);
2286 vm_object_unlock(cur_object
);
2288 if (result
== THREAD_WAITING
) {
2289 result
= thread_block(THREAD_CONTINUE_NULL
);
2291 counter(c_vm_fault_page_block_busy_kernel
++);
2293 if (result
== THREAD_AWAKENED
|| result
== THREAD_RESTART
)
2299 if (m
->unusual
&& (m
->error
|| m
->restart
|| m
->private
2300 || m
->absent
|| (fault_type
& m
->page_lock
))) {
2303 * Unusual case. Give up.
2311 * We've soft-faulted (because it's not in the page
2312 * table) on an encrypted page.
2313 * Keep the page "busy" so that noone messes with
2314 * it during the decryption.
2315 * Release the extra locks we're holding, keep only
2316 * the page's VM object lock.
2319 if (object
!= cur_object
) {
2320 vm_object_unlock(object
);
2322 vm_map_unlock_read(map
);
2323 if (real_map
!= map
)
2324 vm_map_unlock(real_map
);
2326 vm_page_decrypt(m
, 0);
2329 PAGE_WAKEUP_DONE(m
);
2330 vm_object_unlock(m
->object
);
2333 * Retry from the top, in case anything
2334 * changed while we were decrypting...
2338 ASSERT_PAGE_DECRYPTED(m
);
2341 * Two cases of map in faults:
2342 * - At top level w/o copy object.
2343 * - Read fault anywhere.
2344 * --> must disallow write.
2347 if (object
== cur_object
&&
2348 object
->copy
== VM_OBJECT_NULL
)
2349 goto FastMapInFault
;
2351 if ((fault_type
& VM_PROT_WRITE
) == 0) {
2352 boolean_t sequential
;
2354 prot
&= ~VM_PROT_WRITE
;
2357 * Set up to map the page ...
2358 * mark the page busy, drop
2359 * locks and take a paging reference
2360 * on the object with the page.
2363 if (object
!= cur_object
) {
2364 vm_object_unlock(object
);
2365 object
= cur_object
;
2370 vm_object_paging_begin(object
);
2374 * Check a couple of global reasons to
2375 * be conservative about write access.
2376 * Then do the pmap_enter.
2378 #if !VM_FAULT_STATIC_CONFIG
2379 if (vm_fault_dirty_handling
2381 || db_watchpoint_list
2383 && (fault_type
& VM_PROT_WRITE
) == 0)
2384 prot
&= ~VM_PROT_WRITE
;
2385 #else /* STATIC_CONFIG */
2387 if (db_watchpoint_list
2388 && (fault_type
& VM_PROT_WRITE
) == 0)
2389 prot
&= ~VM_PROT_WRITE
;
2390 #endif /* MACH_KDB */
2391 #endif /* STATIC_CONFIG */
2392 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
2395 need_activation
= FALSE
;
2397 if (m
->no_isync
== TRUE
) {
2398 m
->no_isync
= FALSE
;
2399 pmap_sync_page_data_phys(m
->phys_page
);
2401 if ((type_of_fault
== DBG_CACHE_HIT_FAULT
) && m
->clustered
) {
2403 * found it in the cache, but this
2404 * is the first fault-in of the page (no_isync == TRUE)
2405 * so it must have come in as part of
2406 * a cluster... account 1 pagein against it
2409 current_task()->pageins
++;
2410 type_of_fault
= DBG_PAGEIN_FAULT
;
2414 need_activation
= TRUE
;
2416 } else if (cache_attr
!= VM_WIMG_DEFAULT
) {
2417 pmap_sync_page_attributes_phys(m
->phys_page
);
2421 PMAP_ENTER(caller_pmap
,
2422 caller_pmap_addr
, m
,
2423 prot
, cache_attr
, wired
);
2425 PMAP_ENTER(pmap
, vaddr
, m
,
2426 prot
, cache_attr
, wired
);
2430 * Hold queues lock to manipulate
2431 * the page queues. Change wiring
2432 * case is obvious. In soft ref bits
2433 * case activate page only if it fell
2434 * off paging queues, otherwise just
2435 * activate it if it's inactive.
2437 * NOTE: original vm_fault code will
2438 * move active page to back of active
2439 * queue. This code doesn't.
2441 vm_page_lock_queues();
2444 vm_pagein_cluster_used
++;
2445 m
->clustered
= FALSE
;
2447 m
->reference
= TRUE
;
2449 if (change_wiring
) {
2455 #if VM_FAULT_STATIC_CONFIG
2457 if ((!m
->active
&& !m
->inactive
) || ((need_activation
== TRUE
) && !m
->active
))
2458 vm_page_activate(m
);
2461 else if (software_reference_bits
) {
2462 if (!m
->active
&& !m
->inactive
)
2463 vm_page_activate(m
);
2465 else if (!m
->active
) {
2466 vm_page_activate(m
);
2469 vm_page_unlock_queues();
2472 * That's it, clean up and return.
2474 PAGE_WAKEUP_DONE(m
);
2476 sequential
= (sequential
&& vm_page_deactivate_behind
) ?
2477 vm_fault_deactivate_behind(object
, cur_offset
, behavior
) :
2481 * Add non-sequential pages to the working set.
2482 * The sequential pages will be brought in through
2483 * normal clustering behavior.
2485 if (!sequential
&& !object
->private) {
2486 write_startup_file
=
2487 vm_fault_tws_insert(map
, real_map
, vaddr
,
2488 object
, cur_offset
);
2491 vm_object_paging_end(object
);
2492 vm_object_unlock(object
);
2494 vm_map_unlock_read(map
);
2496 vm_map_unlock(real_map
);
2498 if(write_startup_file
)
2499 tws_send_startup_info(current_task());
2501 thread_interrupt_level(interruptible_state
);
2504 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
2506 type_of_fault
& 0xff,
2511 return KERN_SUCCESS
;
2515 * Copy on write fault. If objects match, then
2516 * object->copy must not be NULL (else control
2517 * would be in previous code block), and we
2518 * have a potential push into the copy object
2519 * with which we won't cope here.
2522 if (cur_object
== object
)
2525 * This is now a shadow based copy on write
2526 * fault -- it requires a copy up the shadow
2529 * Allocate a page in the original top level
2530 * object. Give up if allocate fails. Also
2531 * need to remember current page, as it's the
2532 * source of the copy.
2536 if (m
== VM_PAGE_NULL
) {
2540 * Now do the copy. Mark the source busy
2541 * and take out paging references on both
2544 * NOTE: This code holds the map lock across
2549 vm_page_copy(cur_m
, m
);
2550 vm_page_insert(m
, object
, offset
);
2552 vm_object_paging_begin(cur_object
);
2553 vm_object_paging_begin(object
);
2555 type_of_fault
= DBG_COW_FAULT
;
2556 VM_STAT(cow_faults
++);
2557 current_task()->cow_faults
++;
2560 * Now cope with the source page and object
2561 * If the top object has a ref count of 1
2562 * then no other map can access it, and hence
2563 * it's not necessary to do the pmap_disconnect.
2566 vm_page_lock_queues();
2567 vm_page_deactivate(cur_m
);
2569 pmap_disconnect(cur_m
->phys_page
);
2570 vm_page_unlock_queues();
2572 PAGE_WAKEUP_DONE(cur_m
);
2573 vm_object_paging_end(cur_object
);
2574 vm_object_unlock(cur_object
);
2577 * Slight hack to call vm_object collapse
2578 * and then reuse common map in code.
2579 * note that the object lock was taken above.
2582 vm_object_paging_end(object
);
2583 vm_object_collapse(object
, offset
);
2584 vm_object_paging_begin(object
);
2591 * No page at cur_object, cur_offset
2594 if (cur_object
->pager_created
) {
2597 * Have to talk to the pager. Give up.
2603 if (cur_object
->shadow
== VM_OBJECT_NULL
) {
2605 if (cur_object
->shadow_severed
) {
2606 vm_object_paging_end(object
);
2607 vm_object_unlock(object
);
2608 vm_map_unlock_read(map
);
2610 vm_map_unlock(real_map
);
2612 if(write_startup_file
)
2613 tws_send_startup_info(
2616 thread_interrupt_level(interruptible_state
);
2618 return KERN_MEMORY_ERROR
;
2622 * Zero fill fault. Page gets
2623 * filled in top object. Insert
2624 * page, then drop any lower lock.
2625 * Give up if no page.
2627 if (VM_PAGE_THROTTLED()) {
2632 * are we protecting the system from
2633 * backing store exhaustion. If so
2634 * sleep unless we are privileged.
2636 if(vm_backing_store_low
) {
2637 if(!(current_task()->priv_flags
2638 & VM_BACKING_STORE_PRIV
))
2641 m
= vm_page_alloc(object
, offset
);
2642 if (m
== VM_PAGE_NULL
) {
2646 * This is a zero-fill or initial fill
2647 * page fault. As such, we consider it
2648 * undefined with respect to instruction
2649 * execution. i.e. it is the responsibility
2650 * of higher layers to call for an instruction
2651 * sync after changing the contents and before
2652 * sending a program into this area. We
2653 * choose this approach for performance
2656 m
->no_isync
= FALSE
;
2658 if (cur_object
!= object
)
2659 vm_object_unlock(cur_object
);
2661 vm_object_paging_begin(object
);
2662 vm_object_unlock(object
);
2665 * Now zero fill page and map it.
2666 * the page is probably going to
2667 * be written soon, so don't bother
2668 * to clear the modified bit
2670 * NOTE: This code holds the map
2671 * lock across the zero fill.
2674 if (!map
->no_zero_fill
) {
2675 vm_page_zero_fill(m
);
2676 type_of_fault
= DBG_ZERO_FILL_FAULT
;
2677 VM_STAT(zero_fill_count
++);
2679 vm_page_lock_queues();
2680 VM_PAGE_QUEUES_REMOVE(m
);
2682 m
->page_ticket
= vm_page_ticket
;
2683 assert(!m
->laundry
);
2684 assert(m
->object
!= kernel_object
);
2685 assert(m
->pageq
.next
== NULL
&&
2686 m
->pageq
.prev
== NULL
);
2687 if(m
->object
->size
> 0x200000) {
2688 m
->zero_fill
= TRUE
;
2689 /* depends on the queues lock */
2691 queue_enter(&vm_page_queue_zf
,
2692 m
, vm_page_t
, pageq
);
2695 &vm_page_queue_inactive
,
2696 m
, vm_page_t
, pageq
);
2698 vm_page_ticket_roll
++;
2699 if(vm_page_ticket_roll
==
2700 VM_PAGE_TICKETS_IN_ROLL
) {
2701 vm_page_ticket_roll
= 0;
2702 if(vm_page_ticket
==
2703 VM_PAGE_TICKET_ROLL_IDS
)
2710 vm_page_inactive_count
++;
2711 vm_page_unlock_queues();
2712 vm_object_lock(object
);
2718 * On to the next level
2721 cur_offset
+= cur_object
->shadow_offset
;
2722 new_object
= cur_object
->shadow
;
2723 vm_object_lock(new_object
);
2724 if (cur_object
!= object
)
2725 vm_object_unlock(cur_object
);
2726 cur_object
= new_object
;
2733 * Cleanup from fast fault failure. Drop any object
2734 * lock other than original and drop map lock.
2737 if (object
!= cur_object
)
2738 vm_object_unlock(cur_object
);
2740 vm_map_unlock_read(map
);
2743 vm_map_unlock(real_map
);
2746 * Make a reference to this object to
2747 * prevent its disposal while we are messing with
2748 * it. Once we have the reference, the map is free
2749 * to be diddled. Since objects reference their
2750 * shadows (and copies), they will stay around as well.
2753 assert(object
->ref_count
> 0);
2754 object
->ref_count
++;
2755 vm_object_res_reference(object
);
2756 vm_object_paging_begin(object
);
2758 XPR(XPR_VM_FAULT
,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2760 if (!object
->private) {
2761 write_startup_file
=
2762 vm_fault_tws_insert(map
, real_map
, vaddr
, object
, offset
);
2765 kr
= vm_fault_page(object
, offset
, fault_type
,
2766 (change_wiring
&& !wired
),
2768 lo_offset
, hi_offset
, behavior
,
2769 &prot
, &result_page
, &top_page
,
2771 &error_code
, map
->no_zero_fill
, FALSE
, map
, vaddr
);
2774 * If we didn't succeed, lose the object reference immediately.
2777 if (kr
!= VM_FAULT_SUCCESS
)
2778 vm_object_deallocate(object
);
2781 * See why we failed, and take corrective action.
2785 case VM_FAULT_SUCCESS
:
2787 case VM_FAULT_MEMORY_SHORTAGE
:
2788 if (vm_page_wait((change_wiring
) ?
2793 case VM_FAULT_INTERRUPTED
:
2796 case VM_FAULT_RETRY
:
2798 case VM_FAULT_FICTITIOUS_SHORTAGE
:
2799 vm_page_more_fictitious();
2801 case VM_FAULT_MEMORY_ERROR
:
2805 kr
= KERN_MEMORY_ERROR
;
2811 if(m
!= VM_PAGE_NULL
) {
2812 assert((change_wiring
&& !wired
) ?
2813 (top_page
== VM_PAGE_NULL
) :
2814 ((top_page
== VM_PAGE_NULL
) == (m
->object
== object
)));
2818 * How to clean up the result of vm_fault_page. This
2819 * happens whether the mapping is entered or not.
2822 #define UNLOCK_AND_DEALLOCATE \
2824 vm_fault_cleanup(m->object, top_page); \
2825 vm_object_deallocate(object); \
2829 * What to do with the resulting page from vm_fault_page
2830 * if it doesn't get entered into the physical map:
2833 #define RELEASE_PAGE(m) \
2835 PAGE_WAKEUP_DONE(m); \
2836 vm_page_lock_queues(); \
2837 if (!m->active && !m->inactive) \
2838 vm_page_activate(m); \
2839 vm_page_unlock_queues(); \
2843 * We must verify that the maps have not changed
2844 * since our last lookup.
2847 if(m
!= VM_PAGE_NULL
) {
2848 old_copy_object
= m
->object
->copy
;
2849 vm_object_unlock(m
->object
);
2851 old_copy_object
= VM_OBJECT_NULL
;
2853 if ((map
!= original_map
) || !vm_map_verify(map
, &version
)) {
2854 vm_object_t retry_object
;
2855 vm_object_offset_t retry_offset
;
2856 vm_prot_t retry_prot
;
2859 * To avoid trying to write_lock the map while another
2860 * thread has it read_locked (in vm_map_pageable), we
2861 * do not try for write permission. If the page is
2862 * still writable, we will get write permission. If it
2863 * is not, or has been marked needs_copy, we enter the
2864 * mapping without write permission, and will merely
2865 * take another fault.
2868 vm_map_lock_read(map
);
2869 kr
= vm_map_lookup_locked(&map
, vaddr
,
2870 fault_type
& ~VM_PROT_WRITE
, &version
,
2871 &retry_object
, &retry_offset
, &retry_prot
,
2872 &wired
, &behavior
, &lo_offset
, &hi_offset
,
2874 pmap
= real_map
->pmap
;
2876 if (kr
!= KERN_SUCCESS
) {
2877 vm_map_unlock_read(map
);
2878 if(m
!= VM_PAGE_NULL
) {
2879 vm_object_lock(m
->object
);
2881 UNLOCK_AND_DEALLOCATE
;
2883 vm_object_deallocate(object
);
2888 vm_object_unlock(retry_object
);
2889 if(m
!= VM_PAGE_NULL
) {
2890 vm_object_lock(m
->object
);
2892 vm_object_lock(object
);
2895 if ((retry_object
!= object
) ||
2896 (retry_offset
!= offset
)) {
2897 vm_map_unlock_read(map
);
2899 vm_map_unlock(real_map
);
2900 if(m
!= VM_PAGE_NULL
) {
2902 UNLOCK_AND_DEALLOCATE
;
2904 vm_object_deallocate(object
);
2910 * Check whether the protection has changed or the object
2911 * has been copied while we left the map unlocked.
2914 if(m
!= VM_PAGE_NULL
) {
2915 vm_object_unlock(m
->object
);
2917 vm_object_unlock(object
);
2920 if(m
!= VM_PAGE_NULL
) {
2921 vm_object_lock(m
->object
);
2923 vm_object_lock(object
);
2927 * If the copy object changed while the top-level object
2928 * was unlocked, then we must take away write permission.
2931 if(m
!= VM_PAGE_NULL
) {
2932 if (m
->object
->copy
!= old_copy_object
)
2933 prot
&= ~VM_PROT_WRITE
;
2937 * If we want to wire down this page, but no longer have
2938 * adequate permissions, we must start all over.
2941 if (wired
&& (fault_type
!= (prot
|VM_PROT_WRITE
))) {
2942 vm_map_verify_done(map
, &version
);
2944 vm_map_unlock(real_map
);
2945 if(m
!= VM_PAGE_NULL
) {
2947 UNLOCK_AND_DEALLOCATE
;
2949 vm_object_deallocate(object
);
2955 * Put this page into the physical map.
2956 * We had to do the unlock above because pmap_enter
2957 * may cause other faults. The page may be on
2958 * the pageout queues. If the pageout daemon comes
2959 * across the page, it will remove it from the queues.
2961 need_activation
= FALSE
;
2963 if (m
!= VM_PAGE_NULL
) {
2964 if (m
->no_isync
== TRUE
) {
2965 pmap_sync_page_data_phys(m
->phys_page
);
2967 if ((type_of_fault
== DBG_CACHE_HIT_FAULT
) && m
->clustered
) {
2969 * found it in the cache, but this
2970 * is the first fault-in of the page (no_isync == TRUE)
2971 * so it must have come in as part of
2972 * a cluster... account 1 pagein against it
2975 current_task()->pageins
++;
2977 type_of_fault
= DBG_PAGEIN_FAULT
;
2980 need_activation
= TRUE
;
2982 m
->no_isync
= FALSE
;
2984 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
2987 PMAP_ENTER(caller_pmap
,
2988 caller_pmap_addr
, m
,
2989 prot
, cache_attr
, wired
);
2991 PMAP_ENTER(pmap
, vaddr
, m
,
2992 prot
, cache_attr
, wired
);
2996 * Add working set information for private objects here.
2998 if (m
->object
->private) {
2999 write_startup_file
=
3000 vm_fault_tws_insert(map
, real_map
, vaddr
,
3001 m
->object
, m
->offset
);
3006 vm_map_entry_t entry
;
3007 vm_map_offset_t laddr
;
3008 vm_map_offset_t ldelta
, hdelta
;
3011 * do a pmap block mapping from the physical address
3015 /* While we do not worry about execution protection in */
3016 /* general, certian pages may have instruction execution */
3017 /* disallowed. We will check here, and if not allowed */
3018 /* to execute, we return with a protection failure. */
3020 if((full_fault_type
& VM_PROT_EXECUTE
) &&
3021 (!pmap_eligible_for_execute((ppnum_t
)
3022 (object
->shadow_offset
>> 12)))) {
3024 vm_map_verify_done(map
, &version
);
3026 vm_map_unlock(real_map
);
3027 vm_fault_cleanup(object
, top_page
);
3028 vm_object_deallocate(object
);
3029 kr
= KERN_PROTECTION_FAILURE
;
3033 if(real_map
!= map
) {
3034 vm_map_unlock(real_map
);
3036 if (original_map
!= map
) {
3037 vm_map_unlock_read(map
);
3038 vm_map_lock_read(original_map
);
3044 hdelta
= 0xFFFFF000;
3045 ldelta
= 0xFFFFF000;
3048 while(vm_map_lookup_entry(map
, laddr
, &entry
)) {
3049 if(ldelta
> (laddr
- entry
->vme_start
))
3050 ldelta
= laddr
- entry
->vme_start
;
3051 if(hdelta
> (entry
->vme_end
- laddr
))
3052 hdelta
= entry
->vme_end
- laddr
;
3053 if(entry
->is_sub_map
) {
3055 laddr
= (laddr
- entry
->vme_start
)
3057 vm_map_lock_read(entry
->object
.sub_map
);
3059 vm_map_unlock_read(map
);
3060 if(entry
->use_pmap
) {
3061 vm_map_unlock_read(real_map
);
3062 real_map
= entry
->object
.sub_map
;
3064 map
= entry
->object
.sub_map
;
3071 if(vm_map_lookup_entry(map
, laddr
, &entry
) &&
3072 (entry
->object
.vm_object
!= NULL
) &&
3073 (entry
->object
.vm_object
== object
)) {
3077 /* Set up a block mapped area */
3078 pmap_map_block(caller_pmap
,
3079 (addr64_t
)(caller_pmap_addr
- ldelta
),
3081 (entry
->object
.vm_object
->shadow_offset
))
3083 (laddr
- entry
->vme_start
)
3085 ldelta
+ hdelta
, prot
,
3086 (VM_WIMG_MASK
& (int)object
->wimg_bits
), 0);
3088 /* Set up a block mapped area */
3089 pmap_map_block(real_map
->pmap
,
3090 (addr64_t
)(vaddr
- ldelta
),
3092 (entry
->object
.vm_object
->shadow_offset
))
3094 (laddr
- entry
->vme_start
) - ldelta
)>>12,
3095 ldelta
+ hdelta
, prot
,
3096 (VM_WIMG_MASK
& (int)object
->wimg_bits
), 0);
3102 pmap_enter(caller_pmap
, caller_pmap_addr
,
3103 object
->shadow_offset
>>12, prot
, 0, TRUE
);
3105 pmap_enter(pmap
, vaddr
,
3106 object
->shadow_offset
>>12, prot
, 0, TRUE
);
3115 * If the page is not wired down and isn't already
3116 * on a pageout queue, then put it where the
3117 * pageout daemon can find it.
3119 if(m
!= VM_PAGE_NULL
) {
3120 vm_page_lock_queues();
3123 vm_pagein_cluster_used
++;
3124 m
->clustered
= FALSE
;
3126 m
->reference
= TRUE
;
3128 if (change_wiring
) {
3134 #if VM_FAULT_STATIC_CONFIG
3136 if ((!m
->active
&& !m
->inactive
) || ((need_activation
== TRUE
) && !m
->active
))
3137 vm_page_activate(m
);
3140 else if (software_reference_bits
) {
3141 if (!m
->active
&& !m
->inactive
)
3142 vm_page_activate(m
);
3143 m
->reference
= TRUE
;
3145 vm_page_activate(m
);
3148 vm_page_unlock_queues();
3152 * Unlock everything, and return
3155 vm_map_verify_done(map
, &version
);
3157 vm_map_unlock(real_map
);
3158 if(m
!= VM_PAGE_NULL
) {
3159 PAGE_WAKEUP_DONE(m
);
3160 UNLOCK_AND_DEALLOCATE
;
3162 vm_fault_cleanup(object
, top_page
);
3163 vm_object_deallocate(object
);
3167 #undef UNLOCK_AND_DEALLOCATE
3171 if(write_startup_file
)
3172 tws_send_startup_info(current_task());
3174 thread_interrupt_level(interruptible_state
);
3176 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
3178 type_of_fault
& 0xff,
3189 * Wire down a range of virtual addresses in a map.
3194 vm_map_entry_t entry
,
3196 vm_map_offset_t pmap_addr
)
3199 register vm_map_offset_t va
;
3200 register vm_map_offset_t end_addr
= entry
->vme_end
;
3201 register kern_return_t rc
;
3203 assert(entry
->in_transition
);
3205 if ((entry
->object
.vm_object
!= NULL
) &&
3206 !entry
->is_sub_map
&&
3207 entry
->object
.vm_object
->phys_contiguous
) {
3208 return KERN_SUCCESS
;
3212 * Inform the physical mapping system that the
3213 * range of addresses may not fault, so that
3214 * page tables and such can be locked down as well.
3217 pmap_pageable(pmap
, pmap_addr
,
3218 pmap_addr
+ (end_addr
- entry
->vme_start
), FALSE
);
3221 * We simulate a fault to get the page and enter it
3222 * in the physical map.
3225 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
3226 if ((rc
= vm_fault_wire_fast(
3227 map
, va
, entry
, pmap
,
3228 pmap_addr
+ (va
- entry
->vme_start
)
3229 )) != KERN_SUCCESS
) {
3230 rc
= vm_fault(map
, va
, VM_PROT_NONE
, TRUE
,
3231 (pmap
== kernel_pmap
) ?
3232 THREAD_UNINT
: THREAD_ABORTSAFE
,
3233 pmap
, pmap_addr
+ (va
- entry
->vme_start
));
3236 if (rc
!= KERN_SUCCESS
) {
3237 struct vm_map_entry tmp_entry
= *entry
;
3239 /* unwire wired pages */
3240 tmp_entry
.vme_end
= va
;
3241 vm_fault_unwire(map
,
3242 &tmp_entry
, FALSE
, pmap
, pmap_addr
);
3247 return KERN_SUCCESS
;
3253 * Unwire a range of virtual addresses in a map.
3258 vm_map_entry_t entry
,
3259 boolean_t deallocate
,
3261 vm_map_offset_t pmap_addr
)
3263 register vm_map_offset_t va
;
3264 register vm_map_offset_t end_addr
= entry
->vme_end
;
3267 object
= (entry
->is_sub_map
)
3268 ? VM_OBJECT_NULL
: entry
->object
.vm_object
;
3271 * Since the pages are wired down, we must be able to
3272 * get their mappings from the physical map system.
3275 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
3276 pmap_change_wiring(pmap
,
3277 pmap_addr
+ (va
- entry
->vme_start
), FALSE
);
3279 if (object
== VM_OBJECT_NULL
) {
3280 (void) vm_fault(map
, va
, VM_PROT_NONE
,
3281 TRUE
, THREAD_UNINT
, pmap
, pmap_addr
);
3282 } else if (object
->phys_contiguous
) {
3286 vm_page_t result_page
;
3288 vm_object_t result_object
;
3289 vm_fault_return_t result
;
3292 prot
= VM_PROT_NONE
;
3294 vm_object_lock(object
);
3295 vm_object_paging_begin(object
);
3297 "vm_fault_unwire -> vm_fault_page\n",
3299 result
= vm_fault_page(object
,
3301 (va
- entry
->vme_start
),
3307 - entry
->vme_start
),
3313 0, map
->no_zero_fill
,
3315 } while (result
== VM_FAULT_RETRY
);
3317 if (result
!= VM_FAULT_SUCCESS
)
3318 panic("vm_fault_unwire: failure");
3320 result_object
= result_page
->object
;
3322 assert(!result_page
->fictitious
);
3323 pmap_disconnect(result_page
->phys_page
);
3324 VM_PAGE_FREE(result_page
);
3326 vm_page_lock_queues();
3327 vm_page_unwire(result_page
);
3328 vm_page_unlock_queues();
3329 PAGE_WAKEUP_DONE(result_page
);
3332 vm_fault_cleanup(result_object
, top_page
);
3337 * Inform the physical mapping system that the range
3338 * of addresses may fault, so that page tables and
3339 * such may be unwired themselves.
3342 pmap_pageable(pmap
, pmap_addr
,
3343 pmap_addr
+ (end_addr
- entry
->vme_start
), TRUE
);
3348 * vm_fault_wire_fast:
3350 * Handle common case of a wire down page fault at the given address.
3351 * If successful, the page is inserted into the associated physical map.
3352 * The map entry is passed in to avoid the overhead of a map lookup.
3354 * NOTE: the given address should be truncated to the
3355 * proper page address.
3357 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3358 * a standard error specifying why the fault is fatal is returned.
3360 * The map in question must be referenced, and remains so.
3361 * Caller has a read lock on the map.
3363 * This is a stripped version of vm_fault() for wiring pages. Anything
3364 * other than the common case will return KERN_FAILURE, and the caller
3365 * is expected to call vm_fault().
3369 __unused vm_map_t map
,
3371 vm_map_entry_t entry
,
3373 vm_map_offset_t pmap_addr
)
3376 vm_object_offset_t offset
;
3377 register vm_page_t m
;
3379 thread_t thread
= current_thread();
3380 unsigned int cache_attr
;
3384 if (thread
!= THREAD_NULL
&& thread
->task
!= TASK_NULL
)
3385 thread
->task
->faults
++;
3392 #define RELEASE_PAGE(m) { \
3393 PAGE_WAKEUP_DONE(m); \
3394 vm_page_lock_queues(); \
3395 vm_page_unwire(m); \
3396 vm_page_unlock_queues(); \
3400 #undef UNLOCK_THINGS
3401 #define UNLOCK_THINGS { \
3402 object->paging_in_progress--; \
3403 vm_object_unlock(object); \
3406 #undef UNLOCK_AND_DEALLOCATE
3407 #define UNLOCK_AND_DEALLOCATE { \
3409 vm_object_deallocate(object); \
3412 * Give up and have caller do things the hard way.
3416 UNLOCK_AND_DEALLOCATE; \
3417 return(KERN_FAILURE); \
3422 * If this entry is not directly to a vm_object, bail out.
3424 if (entry
->is_sub_map
)
3425 return(KERN_FAILURE
);
3428 * Find the backing store object and offset into it.
3431 object
= entry
->object
.vm_object
;
3432 offset
= (va
- entry
->vme_start
) + entry
->offset
;
3433 prot
= entry
->protection
;
3436 * Make a reference to this object to prevent its
3437 * disposal while we are messing with it.
3440 vm_object_lock(object
);
3441 assert(object
->ref_count
> 0);
3442 object
->ref_count
++;
3443 vm_object_res_reference(object
);
3444 object
->paging_in_progress
++;
3447 * INVARIANTS (through entire routine):
3449 * 1) At all times, we must either have the object
3450 * lock or a busy page in some object to prevent
3451 * some other thread from trying to bring in
3454 * 2) Once we have a busy page, we must remove it from
3455 * the pageout queues, so that the pageout daemon
3456 * will not grab it away.
3461 * Look for page in top-level object. If it's not there or
3462 * there's something going on, give up.
3463 * ENCRYPTED SWAP: use the slow fault path, since we'll need to
3464 * decrypt the page before wiring it down.
3466 m
= vm_page_lookup(object
, offset
);
3467 if ((m
== VM_PAGE_NULL
) || (m
->busy
) || (m
->encrypted
) ||
3468 (m
->unusual
&& ( m
->error
|| m
->restart
|| m
->absent
||
3469 prot
& m
->page_lock
))) {
3473 ASSERT_PAGE_DECRYPTED(m
);
3476 * Wire the page down now. All bail outs beyond this
3477 * point must unwire the page.
3480 vm_page_lock_queues();
3482 vm_page_unlock_queues();
3485 * Mark page busy for other threads.
3492 * Give up if the page is being written and there's a copy object
3494 if ((object
->copy
!= VM_OBJECT_NULL
) && (prot
& VM_PROT_WRITE
)) {
3500 * Put this page into the physical map.
3501 * We have to unlock the object because pmap_enter
3502 * may cause other faults.
3504 if (m
->no_isync
== TRUE
) {
3505 pmap_sync_page_data_phys(m
->phys_page
);
3507 m
->no_isync
= FALSE
;
3510 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
3512 PMAP_ENTER(pmap
, pmap_addr
, m
, prot
, cache_attr
, TRUE
);
3515 * Unlock everything, and return
3518 PAGE_WAKEUP_DONE(m
);
3519 UNLOCK_AND_DEALLOCATE
;
3521 return(KERN_SUCCESS
);
3526 * Routine: vm_fault_copy_cleanup
3528 * Release a page used by vm_fault_copy.
3532 vm_fault_copy_cleanup(
3536 vm_object_t object
= page
->object
;
3538 vm_object_lock(object
);
3539 PAGE_WAKEUP_DONE(page
);
3540 vm_page_lock_queues();
3541 if (!page
->active
&& !page
->inactive
)
3542 vm_page_activate(page
);
3543 vm_page_unlock_queues();
3544 vm_fault_cleanup(object
, top_page
);
3548 vm_fault_copy_dst_cleanup(
3553 if (page
!= VM_PAGE_NULL
) {
3554 object
= page
->object
;
3555 vm_object_lock(object
);
3556 vm_page_lock_queues();
3557 vm_page_unwire(page
);
3558 vm_page_unlock_queues();
3559 vm_object_paging_end(object
);
3560 vm_object_unlock(object
);
3565 * Routine: vm_fault_copy
3568 * Copy pages from one virtual memory object to another --
3569 * neither the source nor destination pages need be resident.
3571 * Before actually copying a page, the version associated with
3572 * the destination address map wil be verified.
3574 * In/out conditions:
3575 * The caller must hold a reference, but not a lock, to
3576 * each of the source and destination objects and to the
3580 * Returns KERN_SUCCESS if no errors were encountered in
3581 * reading or writing the data. Returns KERN_INTERRUPTED if
3582 * the operation was interrupted (only possible if the
3583 * "interruptible" argument is asserted). Other return values
3584 * indicate a permanent error in copying the data.
3586 * The actual amount of data copied will be returned in the
3587 * "copy_size" argument. In the event that the destination map
3588 * verification failed, this amount may be less than the amount
3593 vm_object_t src_object
,
3594 vm_object_offset_t src_offset
,
3595 vm_map_size_t
*copy_size
, /* INOUT */
3596 vm_object_t dst_object
,
3597 vm_object_offset_t dst_offset
,
3599 vm_map_version_t
*dst_version
,
3602 vm_page_t result_page
;
3605 vm_page_t src_top_page
;
3609 vm_page_t dst_top_page
;
3612 vm_map_size_t amount_left
;
3613 vm_object_t old_copy_object
;
3614 kern_return_t error
= 0;
3616 vm_map_size_t part_size
;
3619 * In order not to confuse the clustered pageins, align
3620 * the different offsets on a page boundary.
3622 vm_object_offset_t src_lo_offset
= vm_object_trunc_page(src_offset
);
3623 vm_object_offset_t dst_lo_offset
= vm_object_trunc_page(dst_offset
);
3624 vm_object_offset_t src_hi_offset
= vm_object_round_page(src_offset
+ *copy_size
);
3625 vm_object_offset_t dst_hi_offset
= vm_object_round_page(dst_offset
+ *copy_size
);
3629 *copy_size -= amount_left; \
3633 amount_left
= *copy_size
;
3634 do { /* while (amount_left > 0) */
3636 * There may be a deadlock if both source and destination
3637 * pages are the same. To avoid this deadlock, the copy must
3638 * start by getting the destination page in order to apply
3639 * COW semantics if any.
3642 RetryDestinationFault
: ;
3644 dst_prot
= VM_PROT_WRITE
|VM_PROT_READ
;
3646 vm_object_lock(dst_object
);
3647 vm_object_paging_begin(dst_object
);
3649 XPR(XPR_VM_FAULT
,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3650 switch (vm_fault_page(dst_object
,
3651 vm_object_trunc_page(dst_offset
),
3652 VM_PROT_WRITE
|VM_PROT_READ
,
3657 VM_BEHAVIOR_SEQUENTIAL
,
3663 dst_map
->no_zero_fill
,
3665 case VM_FAULT_SUCCESS
:
3667 case VM_FAULT_RETRY
:
3668 goto RetryDestinationFault
;
3669 case VM_FAULT_MEMORY_SHORTAGE
:
3670 if (vm_page_wait(interruptible
))
3671 goto RetryDestinationFault
;
3673 case VM_FAULT_INTERRUPTED
:
3674 RETURN(MACH_SEND_INTERRUPTED
);
3675 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3676 vm_page_more_fictitious();
3677 goto RetryDestinationFault
;
3678 case VM_FAULT_MEMORY_ERROR
:
3682 return(KERN_MEMORY_ERROR
);
3684 assert ((dst_prot
& VM_PROT_WRITE
) != VM_PROT_NONE
);
3686 old_copy_object
= dst_page
->object
->copy
;
3689 * There exists the possiblity that the source and
3690 * destination page are the same. But we can't
3691 * easily determine that now. If they are the
3692 * same, the call to vm_fault_page() for the
3693 * destination page will deadlock. To prevent this we
3694 * wire the page so we can drop busy without having
3695 * the page daemon steal the page. We clean up the
3696 * top page but keep the paging reference on the object
3697 * holding the dest page so it doesn't go away.
3700 vm_page_lock_queues();
3701 vm_page_wire(dst_page
);
3702 vm_page_unlock_queues();
3703 PAGE_WAKEUP_DONE(dst_page
);
3704 vm_object_unlock(dst_page
->object
);
3706 if (dst_top_page
!= VM_PAGE_NULL
) {
3707 vm_object_lock(dst_object
);
3708 VM_PAGE_FREE(dst_top_page
);
3709 vm_object_paging_end(dst_object
);
3710 vm_object_unlock(dst_object
);
3715 if (src_object
== VM_OBJECT_NULL
) {
3717 * No source object. We will just
3718 * zero-fill the page in dst_object.
3720 src_page
= VM_PAGE_NULL
;
3721 result_page
= VM_PAGE_NULL
;
3723 vm_object_lock(src_object
);
3724 src_page
= vm_page_lookup(src_object
,
3725 vm_object_trunc_page(src_offset
));
3726 if (src_page
== dst_page
) {
3727 src_prot
= dst_prot
;
3728 result_page
= VM_PAGE_NULL
;
3730 src_prot
= VM_PROT_READ
;
3731 vm_object_paging_begin(src_object
);
3734 "vm_fault_copy(2) -> vm_fault_page\n",
3736 switch (vm_fault_page(src_object
,
3737 vm_object_trunc_page(src_offset
),
3743 VM_BEHAVIOR_SEQUENTIAL
,
3752 case VM_FAULT_SUCCESS
:
3754 case VM_FAULT_RETRY
:
3755 goto RetrySourceFault
;
3756 case VM_FAULT_MEMORY_SHORTAGE
:
3757 if (vm_page_wait(interruptible
))
3758 goto RetrySourceFault
;
3760 case VM_FAULT_INTERRUPTED
:
3761 vm_fault_copy_dst_cleanup(dst_page
);
3762 RETURN(MACH_SEND_INTERRUPTED
);
3763 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3764 vm_page_more_fictitious();
3765 goto RetrySourceFault
;
3766 case VM_FAULT_MEMORY_ERROR
:
3767 vm_fault_copy_dst_cleanup(dst_page
);
3771 return(KERN_MEMORY_ERROR
);
3775 assert((src_top_page
== VM_PAGE_NULL
) ==
3776 (result_page
->object
== src_object
));
3778 assert ((src_prot
& VM_PROT_READ
) != VM_PROT_NONE
);
3779 vm_object_unlock(result_page
->object
);
3782 if (!vm_map_verify(dst_map
, dst_version
)) {
3783 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3784 vm_fault_copy_cleanup(result_page
, src_top_page
);
3785 vm_fault_copy_dst_cleanup(dst_page
);
3789 vm_object_lock(dst_page
->object
);
3791 if (dst_page
->object
->copy
!= old_copy_object
) {
3792 vm_object_unlock(dst_page
->object
);
3793 vm_map_verify_done(dst_map
, dst_version
);
3794 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3795 vm_fault_copy_cleanup(result_page
, src_top_page
);
3796 vm_fault_copy_dst_cleanup(dst_page
);
3799 vm_object_unlock(dst_page
->object
);
3802 * Copy the page, and note that it is dirty
3806 if (!page_aligned(src_offset
) ||
3807 !page_aligned(dst_offset
) ||
3808 !page_aligned(amount_left
)) {
3810 vm_object_offset_t src_po
,
3813 src_po
= src_offset
- vm_object_trunc_page(src_offset
);
3814 dst_po
= dst_offset
- vm_object_trunc_page(dst_offset
);
3816 if (dst_po
> src_po
) {
3817 part_size
= PAGE_SIZE
- dst_po
;
3819 part_size
= PAGE_SIZE
- src_po
;
3821 if (part_size
> (amount_left
)){
3822 part_size
= amount_left
;
3825 if (result_page
== VM_PAGE_NULL
) {
3826 vm_page_part_zero_fill(dst_page
,
3829 vm_page_part_copy(result_page
, src_po
,
3830 dst_page
, dst_po
, part_size
);
3831 if(!dst_page
->dirty
){
3832 vm_object_lock(dst_object
);
3833 dst_page
->dirty
= TRUE
;
3834 vm_object_unlock(dst_page
->object
);
3839 part_size
= PAGE_SIZE
;
3841 if (result_page
== VM_PAGE_NULL
)
3842 vm_page_zero_fill(dst_page
);
3844 vm_page_copy(result_page
, dst_page
);
3845 if(!dst_page
->dirty
){
3846 vm_object_lock(dst_object
);
3847 dst_page
->dirty
= TRUE
;
3848 vm_object_unlock(dst_page
->object
);
3855 * Unlock everything, and return
3858 vm_map_verify_done(dst_map
, dst_version
);
3860 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3861 vm_fault_copy_cleanup(result_page
, src_top_page
);
3862 vm_fault_copy_dst_cleanup(dst_page
);
3864 amount_left
-= part_size
;
3865 src_offset
+= part_size
;
3866 dst_offset
+= part_size
;
3867 } while (amount_left
> 0);
3869 RETURN(KERN_SUCCESS
);
3878 * Routine: vm_fault_page_overwrite
3881 * A form of vm_fault_page that assumes that the
3882 * resulting page will be overwritten in its entirety,
3883 * making it unnecessary to obtain the correct *contents*
3887 * XXX Untested. Also unused. Eventually, this technology
3888 * could be used in vm_fault_copy() to advantage.
3891 vm_fault_page_overwrite(
3893 vm_object_t dst_object
,
3894 vm_object_offset_t dst_offset
,
3895 vm_page_t
*result_page
) /* OUT */
3899 kern_return_t wait_result
;
3901 #define interruptible THREAD_UNINT /* XXX */
3905 * Look for a page at this offset
3908 while ((dst_page
= vm_page_lookup(dst_object
, dst_offset
))
3911 * No page, no problem... just allocate one.
3914 dst_page
= vm_page_alloc(dst_object
, dst_offset
);
3915 if (dst_page
== VM_PAGE_NULL
) {
3916 vm_object_unlock(dst_object
);
3918 vm_object_lock(dst_object
);
3923 * Pretend that the memory manager
3924 * write-protected the page.
3926 * Note that we will be asking for write
3927 * permission without asking for the data
3931 dst_page
->overwriting
= TRUE
;
3932 dst_page
->page_lock
= VM_PROT_WRITE
;
3933 dst_page
->absent
= TRUE
;
3934 dst_page
->unusual
= TRUE
;
3935 dst_object
->absent_count
++;
3940 * When we bail out, we might have to throw
3941 * away the page created here.
3944 #define DISCARD_PAGE \
3946 vm_object_lock(dst_object); \
3947 dst_page = vm_page_lookup(dst_object, dst_offset); \
3948 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3949 VM_PAGE_FREE(dst_page); \
3950 vm_object_unlock(dst_object); \
3955 * If the page is write-protected...
3958 if (dst_page
->page_lock
& VM_PROT_WRITE
) {
3960 * ... and an unlock request hasn't been sent
3963 if ( ! (dst_page
->unlock_request
& VM_PROT_WRITE
)) {
3968 * ... then send one now.
3971 if (!dst_object
->pager_ready
) {
3972 wait_result
= vm_object_assert_wait(dst_object
,
3973 VM_OBJECT_EVENT_PAGER_READY
,
3975 vm_object_unlock(dst_object
);
3976 if (wait_result
== THREAD_WAITING
)
3977 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
3978 if (wait_result
!= THREAD_AWAKENED
) {
3980 return(VM_FAULT_INTERRUPTED
);
3985 u
= dst_page
->unlock_request
|= VM_PROT_WRITE
;
3986 vm_object_unlock(dst_object
);
3988 if ((rc
= memory_object_data_unlock(
3990 dst_offset
+ dst_object
->paging_offset
,
3992 u
)) != KERN_SUCCESS
) {
3994 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3996 return((rc
== MACH_SEND_INTERRUPTED
) ?
3997 VM_FAULT_INTERRUPTED
:
3998 VM_FAULT_MEMORY_ERROR
);
4000 vm_object_lock(dst_object
);
4004 /* ... fall through to wait below */
4007 * If the page isn't being used for other
4008 * purposes, then we're done.
4010 if ( ! (dst_page
->busy
|| dst_page
->absent
||
4011 dst_page
->error
|| dst_page
->restart
) )
4015 wait_result
= PAGE_ASSERT_WAIT(dst_page
, interruptible
);
4016 vm_object_unlock(dst_object
);
4017 if (wait_result
== THREAD_WAITING
)
4018 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
4019 if (wait_result
!= THREAD_AWAKENED
) {
4021 return(VM_FAULT_INTERRUPTED
);
4025 *result_page
= dst_page
;
4026 return(VM_FAULT_SUCCESS
);
4028 #undef interruptible
4034 #if VM_FAULT_CLASSIFY
4036 * Temporary statistics gathering support.
4040 * Statistics arrays:
4042 #define VM_FAULT_TYPES_MAX 5
4043 #define VM_FAULT_LEVEL_MAX 8
4045 int vm_fault_stats
[VM_FAULT_TYPES_MAX
][VM_FAULT_LEVEL_MAX
];
4047 #define VM_FAULT_TYPE_ZERO_FILL 0
4048 #define VM_FAULT_TYPE_MAP_IN 1
4049 #define VM_FAULT_TYPE_PAGER 2
4050 #define VM_FAULT_TYPE_COPY 3
4051 #define VM_FAULT_TYPE_OTHER 4
4055 vm_fault_classify(vm_object_t object
,
4056 vm_object_offset_t offset
,
4057 vm_prot_t fault_type
)
4059 int type
, level
= 0;
4063 m
= vm_page_lookup(object
, offset
);
4064 if (m
!= VM_PAGE_NULL
) {
4065 if (m
->busy
|| m
->error
|| m
->restart
|| m
->absent
||
4066 fault_type
& m
->page_lock
) {
4067 type
= VM_FAULT_TYPE_OTHER
;
4070 if (((fault_type
& VM_PROT_WRITE
) == 0) ||
4071 ((level
== 0) && object
->copy
== VM_OBJECT_NULL
)) {
4072 type
= VM_FAULT_TYPE_MAP_IN
;
4075 type
= VM_FAULT_TYPE_COPY
;
4079 if (object
->pager_created
) {
4080 type
= VM_FAULT_TYPE_PAGER
;
4083 if (object
->shadow
== VM_OBJECT_NULL
) {
4084 type
= VM_FAULT_TYPE_ZERO_FILL
;
4088 offset
+= object
->shadow_offset
;
4089 object
= object
->shadow
;
4095 if (level
> VM_FAULT_LEVEL_MAX
)
4096 level
= VM_FAULT_LEVEL_MAX
;
4098 vm_fault_stats
[type
][level
] += 1;
4103 /* cleanup routine to call from debugger */
4106 vm_fault_classify_init(void)
4110 for (type
= 0; type
< VM_FAULT_TYPES_MAX
; type
++) {
4111 for (level
= 0; level
< VM_FAULT_LEVEL_MAX
; level
++) {
4112 vm_fault_stats
[type
][level
] = 0;
4118 #endif /* VM_FAULT_CLASSIFY */