2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 * Carnegie Mellon requests users of this software to return to
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
56 * Page fault handling module.
59 /* remove after component interface available */
60 extern int vnode_pager_workaround
;
63 #include <mach_cluster_stats.h>
64 #include <mach_pagemap.h>
67 #include <vm/vm_fault.h>
68 #include <mach/kern_return.h>
69 #include <mach/message.h> /* for error codes */
70 #include <kern/host_statistics.h>
71 #include <kern/counters.h>
72 #include <kern/task.h>
73 #include <kern/thread.h>
74 #include <kern/sched_prim.h>
75 #include <kern/host.h>
77 #include <vm/vm_map.h>
78 #include <vm/vm_object.h>
79 #include <vm/vm_page.h>
81 #include <vm/vm_pageout.h>
82 #include <mach/vm_param.h>
83 #include <mach/vm_behavior.h>
84 #include <mach/memory_object.h>
85 /* For memory_object_data_{request,unlock} */
86 #include <kern/mach_param.h>
87 #include <kern/macro_help.h>
88 #include <kern/zalloc.h>
89 #include <kern/misc_protos.h>
91 #include <sys/kdebug.h>
93 #define VM_FAULT_CLASSIFY 0
94 #define VM_FAULT_STATIC_CONFIG 1
96 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
98 int vm_object_absent_max
= 50;
100 int vm_fault_debug
= 0;
101 boolean_t vm_page_deactivate_behind
= TRUE
;
103 vm_machine_attribute_val_t mv_cache_sync
= MATTR_VAL_CACHE_SYNC
;
105 #if !VM_FAULT_STATIC_CONFIG
106 boolean_t vm_fault_dirty_handling
= FALSE
;
107 boolean_t vm_fault_interruptible
= FALSE
;
108 boolean_t software_reference_bits
= TRUE
;
112 extern struct db_watchpoint
*db_watchpoint_list
;
113 #endif /* MACH_KDB */
115 /* Forward declarations of internal routines. */
116 extern kern_return_t
vm_fault_wire_fast(
119 vm_map_entry_t entry
,
122 extern void vm_fault_continue(void);
124 extern void vm_fault_copy_cleanup(
128 extern void vm_fault_copy_dst_cleanup(
131 #if VM_FAULT_CLASSIFY
132 extern void vm_fault_classify(vm_object_t object
,
133 vm_object_offset_t offset
,
134 vm_prot_t fault_type
);
136 extern void vm_fault_classify_init(void);
140 * Routine: vm_fault_init
142 * Initialize our private data structures.
150 * Routine: vm_fault_cleanup
152 * Clean up the result of vm_fault_page.
154 * The paging reference for "object" is released.
155 * "object" is unlocked.
156 * If "top_page" is not null, "top_page" is
157 * freed and the paging reference for the object
158 * containing it is released.
161 * "object" must be locked.
165 register vm_object_t object
,
166 register vm_page_t top_page
)
168 vm_object_paging_end(object
);
169 vm_object_unlock(object
);
171 if (top_page
!= VM_PAGE_NULL
) {
172 object
= top_page
->object
;
173 vm_object_lock(object
);
174 VM_PAGE_FREE(top_page
);
175 vm_object_paging_end(object
);
176 vm_object_unlock(object
);
180 #if MACH_CLUSTER_STATS
181 #define MAXCLUSTERPAGES 16
183 unsigned long pages_in_cluster
;
184 unsigned long pages_at_higher_offsets
;
185 unsigned long pages_at_lower_offsets
;
186 } cluster_stats_in
[MAXCLUSTERPAGES
];
187 #define CLUSTER_STAT(clause) clause
188 #define CLUSTER_STAT_HIGHER(x) \
189 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
190 #define CLUSTER_STAT_LOWER(x) \
191 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
192 #define CLUSTER_STAT_CLUSTER(x) \
193 ((cluster_stats_in[(x)].pages_in_cluster)++)
194 #else /* MACH_CLUSTER_STATS */
195 #define CLUSTER_STAT(clause)
196 #endif /* MACH_CLUSTER_STATS */
198 /* XXX - temporary */
199 boolean_t vm_allow_clustered_pagein
= FALSE
;
200 int vm_pagein_cluster_used
= 0;
203 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
205 int vm_default_ahead
= 1; /* Number of pages to prepage ahead */
206 int vm_default_behind
= 0; /* Number of pages to prepage behind */
208 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
211 * Routine: vm_fault_page
213 * Find the resident page for the virtual memory
214 * specified by the given virtual memory object
216 * Additional arguments:
217 * The required permissions for the page is given
218 * in "fault_type". Desired permissions are included
219 * in "protection". The minimum and maximum valid offsets
220 * within the object for the relevant map entry are
221 * passed in "lo_offset" and "hi_offset" respectively and
222 * the expected page reference pattern is passed in "behavior".
223 * These three parameters are used to determine pagein cluster
226 * If the desired page is known to be resident (for
227 * example, because it was previously wired down), asserting
228 * the "unwiring" parameter will speed the search.
230 * If the operation can be interrupted (by thread_abort
231 * or thread_terminate), then the "interruptible"
232 * parameter should be asserted.
235 * The page containing the proper data is returned
239 * The source object must be locked and referenced,
240 * and must donate one paging reference. The reference
241 * is not affected. The paging reference and lock are
244 * If the call succeeds, the object in which "result_page"
245 * resides is left locked and holding a paging reference.
246 * If this is not the original object, a busy page in the
247 * original object is returned in "top_page", to prevent other
248 * callers from pursuing this same data, along with a paging
249 * reference for the original object. The "top_page" should
250 * be destroyed when this guarantee is no longer required.
251 * The "result_page" is also left busy. It is not removed
252 * from the pageout queues.
258 vm_object_t first_object
, /* Object to begin search */
259 vm_object_offset_t first_offset
, /* Offset into object */
260 vm_prot_t fault_type
, /* What access is requested */
261 boolean_t must_be_resident
,/* Must page be resident? */
262 int interruptible
, /* how may fault be interrupted? */
263 vm_object_offset_t lo_offset
, /* Map entry start */
264 vm_object_offset_t hi_offset
, /* Map entry end */
265 vm_behavior_t behavior
, /* Page reference behavior */
266 /* Modifies in place: */
267 vm_prot_t
*protection
, /* Protection for mapping */
269 vm_page_t
*result_page
, /* Page found, if successful */
270 vm_page_t
*top_page
, /* Page in top object, if
271 * not result_page. */
272 int *type_of_fault
, /* if non-null, fill in with type of fault
273 * COW, zero-fill, etc... returned in trace point */
274 /* More arguments: */
275 kern_return_t
*error_code
, /* code if page is in error */
276 boolean_t no_zero_fill
, /* don't zero fill absent pages */
277 boolean_t data_supply
) /* treat as data_supply if
278 * it is a write fault and a full
279 * page is provided */
286 vm_object_offset_t offset
;
288 vm_object_t next_object
;
289 vm_object_t copy_object
;
290 boolean_t look_for_page
;
291 vm_prot_t access_required
= fault_type
;
292 vm_prot_t wants_copy_flag
;
293 vm_size_t cluster_size
, length
;
294 vm_object_offset_t cluster_offset
;
295 vm_object_offset_t cluster_start
, cluster_end
, paging_offset
;
296 vm_object_offset_t align_offset
;
297 CLUSTER_STAT(int pages_at_higher_offsets
;)
298 CLUSTER_STAT(int pages_at_lower_offsets
;)
299 kern_return_t wait_result
;
301 boolean_t interruptible_state
;
304 kern_return_t
vnode_pager_data_request(ipc_port_t
,
305 ipc_port_t
, vm_object_offset_t
, vm_size_t
, vm_prot_t
);
310 * MACH page map - an optional optimization where a bit map is maintained
311 * by the VM subsystem for internal objects to indicate which pages of
312 * the object currently reside on backing store. This existence map
313 * duplicates information maintained by the vnode pager. It is
314 * created at the time of the first pageout against the object, i.e.
315 * at the same time pager for the object is created. The optimization
316 * is designed to eliminate pager interaction overhead, if it is
317 * 'known' that the page does not exist on backing store.
319 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
320 * either marked as paged out in the existence map for the object or no
321 * existence map exists for the object. LOOK_FOR() is one of the
322 * criteria in the decision to invoke the pager. It is also used as one
323 * of the criteria to terminate the scan for adjacent pages in a clustered
324 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
325 * permanent objects. Note also that if the pager for an internal object
326 * has not been created, the pager is not invoked regardless of the value
327 * of LOOK_FOR() and that clustered pagein scans are only done on an object
328 * for which a pager has been created.
330 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
331 * is marked as paged out in the existence map for the object. PAGED_OUT()
332 * PAGED_OUT() is used to determine if a page has already been pushed
333 * into a copy object in order to avoid a redundant page out operation.
335 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
336 != VM_EXTERNAL_STATE_ABSENT)
337 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
338 == VM_EXTERNAL_STATE_EXISTS)
339 #else /* MACH_PAGEMAP */
341 * If the MACH page map optimization is not enabled,
342 * LOOK_FOR() always evaluates to TRUE. The pager will always be
343 * invoked to resolve missing pages in an object, assuming the pager
344 * has been created for the object. In a clustered page operation, the
345 * absence of a page on backing backing store cannot be used to terminate
346 * a scan for adjacent pages since that information is available only in
347 * the pager. Hence pages that may not be paged out are potentially
348 * included in a clustered request. The vnode pager is coded to deal
349 * with any combination of absent/present pages in a clustered
350 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
351 * will always be invoked to push a dirty page into a copy object assuming
352 * a pager has been created. If the page has already been pushed, the
353 * pager will ingore the new request.
355 #define LOOK_FOR(o, f) TRUE
356 #define PAGED_OUT(o, f) FALSE
357 #endif /* MACH_PAGEMAP */
362 #define PREPARE_RELEASE_PAGE(m) \
364 vm_page_lock_queues(); \
367 #define DO_RELEASE_PAGE(m) \
369 PAGE_WAKEUP_DONE(m); \
370 if (!m->active && !m->inactive) \
371 vm_page_activate(m); \
372 vm_page_unlock_queues(); \
375 #define RELEASE_PAGE(m) \
377 PREPARE_RELEASE_PAGE(m); \
378 DO_RELEASE_PAGE(m); \
382 dbgTrace(0xBEEF0002, (unsigned int) first_object
, (unsigned int) first_offset
); /* (TEST/DEBUG) */
387 #if !VM_FAULT_STATIC_CONFIG
388 if (vm_fault_dirty_handling
391 * If there are watchpoints set, then
392 * we don't want to give away write permission
393 * on a read fault. Make the task write fault,
394 * so that the watchpoint code notices the access.
396 || db_watchpoint_list
397 #endif /* MACH_KDB */
400 * If we aren't asking for write permission,
401 * then don't give it away. We're using write
402 * faults to set the dirty bit.
404 if (!(fault_type
& VM_PROT_WRITE
))
405 *protection
&= ~VM_PROT_WRITE
;
408 if (!vm_fault_interruptible
)
409 interruptible
= THREAD_UNINT
;
410 #else /* STATIC_CONFIG */
413 * If there are watchpoints set, then
414 * we don't want to give away write permission
415 * on a read fault. Make the task write fault,
416 * so that the watchpoint code notices the access.
418 if (db_watchpoint_list
) {
420 * If we aren't asking for write permission,
421 * then don't give it away. We're using write
422 * faults to set the dirty bit.
424 if (!(fault_type
& VM_PROT_WRITE
))
425 *protection
&= ~VM_PROT_WRITE
;
428 #endif /* MACH_KDB */
429 #endif /* STATIC_CONFIG */
431 cur_thread
= current_thread();
433 interruptible_state
= cur_thread
->interruptible
;
434 if (interruptible
== THREAD_UNINT
)
435 cur_thread
->interruptible
= FALSE
;
438 * INVARIANTS (through entire routine):
440 * 1) At all times, we must either have the object
441 * lock or a busy page in some object to prevent
442 * some other thread from trying to bring in
445 * Note that we cannot hold any locks during the
446 * pager access or when waiting for memory, so
447 * we use a busy page then.
449 * Note also that we aren't as concerned about more than
450 * one thread attempting to memory_object_data_unlock
451 * the same page at once, so we don't hold the page
452 * as busy then, but do record the highest unlock
453 * value so far. [Unlock requests may also be delivered
456 * 2) To prevent another thread from racing us down the
457 * shadow chain and entering a new page in the top
458 * object before we do, we must keep a busy page in
459 * the top object while following the shadow chain.
461 * 3) We must increment paging_in_progress on any object
462 * for which we have a busy page
464 * 4) We leave busy pages on the pageout queues.
465 * If the pageout daemon comes across a busy page,
466 * it will remove the page from the pageout queues.
470 * Search for the page at object/offset.
473 object
= first_object
;
474 offset
= first_offset
;
475 first_m
= VM_PAGE_NULL
;
476 access_required
= fault_type
;
479 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
480 (integer_t
)object
, offset
, fault_type
, *protection
, 0);
483 * See whether this page is resident
488 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
490 if (!object
->alive
) {
491 vm_fault_cleanup(object
, first_m
);
492 cur_thread
->interruptible
= interruptible_state
;
493 return(VM_FAULT_MEMORY_ERROR
);
495 m
= vm_page_lookup(object
, offset
);
497 dbgTrace(0xBEEF0004, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
499 if (m
!= VM_PAGE_NULL
) {
501 * If the page was pre-paged as part of a
502 * cluster, record the fact.
505 vm_pagein_cluster_used
++;
506 m
->clustered
= FALSE
;
510 * If the page is being brought in,
511 * wait for it and then retry.
513 * A possible optimization: if the page
514 * is known to be resident, we can ignore
515 * pages that are absent (regardless of
516 * whether they're busy).
521 dbgTrace(0xBEEF0005, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
523 PAGE_ASSERT_WAIT(m
, interruptible
);
524 vm_object_unlock(object
);
526 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
527 (integer_t
)object
, offset
,
529 counter(c_vm_fault_page_block_busy_kernel
++);
530 wait_result
= thread_block((void (*)(void))0);
532 vm_object_lock(object
);
533 if (wait_result
!= THREAD_AWAKENED
) {
534 vm_fault_cleanup(object
, first_m
);
535 cur_thread
->interruptible
= interruptible_state
;
536 if (wait_result
== THREAD_RESTART
)
538 return(VM_FAULT_RETRY
);
542 return(VM_FAULT_INTERRUPTED
);
549 * If the page is in error, give up now.
554 dbgTrace(0xBEEF0006, (unsigned int) m
, (unsigned int) error_code
); /* (TEST/DEBUG) */
557 *error_code
= m
->page_error
;
559 vm_fault_cleanup(object
, first_m
);
560 cur_thread
->interruptible
= interruptible_state
;
561 return(VM_FAULT_MEMORY_ERROR
);
565 * If the pager wants us to restart
566 * at the top of the chain,
567 * typically because it has moved the
568 * page to another pager, then do so.
573 dbgTrace(0xBEEF0007, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
576 vm_fault_cleanup(object
, first_m
);
577 cur_thread
->interruptible
= interruptible_state
;
578 return(VM_FAULT_RETRY
);
582 * If the page isn't busy, but is absent,
583 * then it was deemed "unavailable".
588 * Remove the non-existent page (unless it's
589 * in the top object) and move on down to the
590 * next object (if there is one).
593 dbgTrace(0xBEEF0008, (unsigned int) m
, (unsigned int) object
->shadow
); /* (TEST/DEBUG) */
596 next_object
= object
->shadow
;
597 if (next_object
== VM_OBJECT_NULL
) {
600 assert(!must_be_resident
);
602 if (object
->shadow_severed
) {
605 cur_thread
->interruptible
= interruptible_state
;
606 return VM_FAULT_MEMORY_ERROR
;
610 * Absent page at bottom of shadow
611 * chain; zero fill the page we left
612 * busy in the first object, and flush
613 * the absent page. But first we
614 * need to allocate a real page.
616 if (VM_PAGE_THROTTLED() ||
617 (real_m
= vm_page_grab()) == VM_PAGE_NULL
) {
618 vm_fault_cleanup(object
, first_m
);
619 cur_thread
->interruptible
= interruptible_state
;
620 return(VM_FAULT_MEMORY_SHORTAGE
);
624 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
625 (integer_t
)object
, offset
,
627 (integer_t
)first_object
, 0);
628 if (object
!= first_object
) {
630 vm_object_paging_end(object
);
631 vm_object_unlock(object
);
632 object
= first_object
;
633 offset
= first_offset
;
635 first_m
= VM_PAGE_NULL
;
636 vm_object_lock(object
);
640 assert(real_m
->busy
);
641 vm_page_insert(real_m
, object
, offset
);
645 * Drop the lock while zero filling
646 * page. Then break because this
647 * is the page we wanted. Checking
648 * the page lock is a waste of time;
649 * this page was either absent or
650 * newly allocated -- in both cases
651 * it can't be page locked by a pager.
654 vm_object_unlock(object
);
655 vm_page_zero_fill(m
);
657 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
658 VM_STAT(zero_fill_count
++);
659 vm_object_lock(object
);
661 pmap_clear_modify(m
->phys_addr
);
662 vm_page_lock_queues();
663 VM_PAGE_QUEUES_REMOVE(m
);
664 queue_enter(&vm_page_queue_inactive
,
665 m
, vm_page_t
, pageq
);
667 vm_page_inactive_count
++;
668 vm_page_unlock_queues();
671 if (must_be_resident
) {
672 vm_object_paging_end(object
);
673 } else if (object
!= first_object
) {
674 vm_object_paging_end(object
);
680 vm_object_absent_release(object
);
683 vm_page_lock_queues();
684 VM_PAGE_QUEUES_REMOVE(m
);
685 vm_page_unlock_queues();
688 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
689 (integer_t
)object
, offset
,
690 (integer_t
)next_object
,
691 offset
+object
->shadow_offset
,0);
692 offset
+= object
->shadow_offset
;
693 hi_offset
+= object
->shadow_offset
;
694 lo_offset
+= object
->shadow_offset
;
695 access_required
= VM_PROT_READ
;
696 vm_object_lock(next_object
);
697 vm_object_unlock(object
);
698 object
= next_object
;
699 vm_object_paging_begin(object
);
705 && ((object
!= first_object
) ||
706 (object
->copy
!= VM_OBJECT_NULL
))
707 && (fault_type
& VM_PROT_WRITE
)) {
709 * This is a copy-on-write fault that will
710 * cause us to revoke access to this page, but
711 * this page is in the process of being cleaned
712 * in a clustered pageout. We must wait until
713 * the cleaning operation completes before
714 * revoking access to the original page,
715 * otherwise we might attempt to remove a
719 dbgTrace(0xBEEF0009, (unsigned int) m
, (unsigned int) offset
); /* (TEST/DEBUG) */
722 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
723 (integer_t
)object
, offset
,
725 /* take an extra ref so that object won't die */
726 assert(object
->ref_count
> 0);
728 vm_object_res_reference(object
);
729 vm_fault_cleanup(object
, first_m
);
730 counter(c_vm_fault_page_block_backoff_kernel
++);
731 vm_object_lock(object
);
732 assert(object
->ref_count
> 0);
733 m
= vm_page_lookup(object
, offset
);
734 if (m
!= VM_PAGE_NULL
&& m
->cleaning
) {
735 PAGE_ASSERT_WAIT(m
, interruptible
);
736 vm_object_unlock(object
);
737 wait_result
= thread_block((void (*)(void)) 0);
738 vm_object_deallocate(object
);
741 vm_object_unlock(object
);
742 vm_object_deallocate(object
);
743 cur_thread
->interruptible
= interruptible_state
;
744 return VM_FAULT_RETRY
;
749 * If the desired access to this page has
750 * been locked out, request that it be unlocked.
753 if (access_required
& m
->page_lock
) {
754 if ((access_required
& m
->unlock_request
) != access_required
) {
755 vm_prot_t new_unlock_request
;
759 dbgTrace(0xBEEF000A, (unsigned int) m
, (unsigned int) object
->pager_ready
); /* (TEST/DEBUG) */
761 if (!object
->pager_ready
) {
763 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
765 (integer_t
)object
, offset
,
767 /* take an extra ref */
768 assert(object
->ref_count
> 0);
770 vm_object_res_reference(object
);
771 vm_fault_cleanup(object
,
773 counter(c_vm_fault_page_block_backoff_kernel
++);
774 vm_object_lock(object
);
775 assert(object
->ref_count
> 0);
776 if (!object
->pager_ready
) {
777 vm_object_assert_wait(
779 VM_OBJECT_EVENT_PAGER_READY
,
781 vm_object_unlock(object
);
782 wait_result
= thread_block((void (*)(void))0);
783 vm_object_deallocate(object
);
786 vm_object_unlock(object
);
787 vm_object_deallocate(object
);
788 cur_thread
->interruptible
= interruptible_state
;
789 return VM_FAULT_RETRY
;
793 new_unlock_request
= m
->unlock_request
=
794 (access_required
| m
->unlock_request
);
795 vm_object_unlock(object
);
797 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
798 (integer_t
)object
, offset
,
799 (integer_t
)m
, new_unlock_request
, 0);
800 if ((rc
= memory_object_data_unlock(
802 object
->pager_request
,
803 offset
+ object
->paging_offset
,
808 printf("vm_fault: memory_object_data_unlock failed\n");
809 vm_object_lock(object
);
810 vm_fault_cleanup(object
, first_m
);
811 cur_thread
->interruptible
= interruptible_state
;
812 return((rc
== MACH_SEND_INTERRUPTED
) ?
813 VM_FAULT_INTERRUPTED
:
814 VM_FAULT_MEMORY_ERROR
);
816 vm_object_lock(object
);
821 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
822 access_required
, (integer_t
)object
,
823 offset
, (integer_t
)m
, 0);
824 /* take an extra ref so object won't die */
825 assert(object
->ref_count
> 0);
827 vm_object_res_reference(object
);
828 vm_fault_cleanup(object
, first_m
);
829 counter(c_vm_fault_page_block_backoff_kernel
++);
830 vm_object_lock(object
);
831 assert(object
->ref_count
> 0);
832 m
= vm_page_lookup(object
, offset
);
833 if (m
!= VM_PAGE_NULL
&&
834 (access_required
& m
->page_lock
) &&
835 !((access_required
& m
->unlock_request
) != access_required
)) {
836 PAGE_ASSERT_WAIT(m
, interruptible
);
837 vm_object_unlock(object
);
838 wait_result
= thread_block((void (*)(void)) 0);
839 vm_object_deallocate(object
);
842 vm_object_unlock(object
);
843 vm_object_deallocate(object
);
844 cur_thread
->interruptible
= interruptible_state
;
845 return VM_FAULT_RETRY
;
849 * We mark the page busy and leave it on
850 * the pageout queues. If the pageout
851 * deamon comes across it, then it will
856 dbgTrace(0xBEEF000B, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
859 #if !VM_FAULT_STATIC_CONFIG
860 if (!software_reference_bits
) {
861 vm_page_lock_queues();
863 vm_stat
.reactivations
++;
865 VM_PAGE_QUEUES_REMOVE(m
);
866 vm_page_unlock_queues();
870 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
871 (integer_t
)object
, offset
, (integer_t
)m
, 0, 0);
879 (object
->pager_created
) &&
880 LOOK_FOR(object
, offset
) &&
884 dbgTrace(0xBEEF000C, (unsigned int) look_for_page
, (unsigned int) object
); /* (TEST/DEBUG) */
886 if ((look_for_page
|| (object
== first_object
))
887 && !must_be_resident
) {
889 * Allocate a new page for this object/offset
893 m
= vm_page_grab_fictitious();
895 dbgTrace(0xBEEF000D, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
897 if (m
== VM_PAGE_NULL
) {
898 vm_fault_cleanup(object
, first_m
);
899 cur_thread
->interruptible
= interruptible_state
;
900 return(VM_FAULT_FICTITIOUS_SHORTAGE
);
902 vm_page_insert(m
, object
, offset
);
905 if (look_for_page
&& !must_be_resident
) {
909 * If the memory manager is not ready, we
910 * cannot make requests.
912 if (!object
->pager_ready
) {
914 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
918 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
919 (integer_t
)object
, offset
, 0, 0, 0);
920 /* take an extra ref so object won't die */
921 assert(object
->ref_count
> 0);
923 vm_object_res_reference(object
);
924 vm_fault_cleanup(object
, first_m
);
925 counter(c_vm_fault_page_block_backoff_kernel
++);
926 vm_object_lock(object
);
927 assert(object
->ref_count
> 0);
928 if (!object
->pager_ready
) {
929 vm_object_assert_wait(object
,
930 VM_OBJECT_EVENT_PAGER_READY
,
932 vm_object_unlock(object
);
933 wait_result
= thread_block((void (*)(void))0);
934 vm_object_deallocate(object
);
937 vm_object_unlock(object
);
938 vm_object_deallocate(object
);
939 cur_thread
->interruptible
= interruptible_state
;
940 return VM_FAULT_RETRY
;
944 if (object
->internal
) {
946 * Requests to the default pager
947 * must reserve a real page in advance,
948 * because the pager's data-provided
949 * won't block for pages. IMPORTANT:
950 * this acts as a throttling mechanism
951 * for data_requests to the default
956 dbgTrace(0xBEEF000F, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
958 if (m
->fictitious
&& !vm_page_convert(m
)) {
960 vm_fault_cleanup(object
, first_m
);
961 cur_thread
->interruptible
= interruptible_state
;
962 return(VM_FAULT_MEMORY_SHORTAGE
);
964 } else if (object
->absent_count
>
965 vm_object_absent_max
) {
967 * If there are too many outstanding page
968 * requests pending on this object, we
969 * wait for them to be resolved now.
973 dbgTrace(0xBEEF0010, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
976 /* take an extra ref so object won't die */
977 assert(object
->ref_count
> 0);
979 vm_object_res_reference(object
);
980 vm_fault_cleanup(object
, first_m
);
981 counter(c_vm_fault_page_block_backoff_kernel
++);
982 vm_object_lock(object
);
983 assert(object
->ref_count
> 0);
984 if (object
->absent_count
> vm_object_absent_max
) {
985 vm_object_absent_assert_wait(object
,
987 vm_object_unlock(object
);
988 wait_result
= thread_block((void (*)(void))0);
989 vm_object_deallocate(object
);
992 vm_object_unlock(object
);
993 vm_object_deallocate(object
);
994 cur_thread
->interruptible
= interruptible_state
;
995 return VM_FAULT_RETRY
;
1000 * Indicate that the page is waiting for data
1001 * from the memory manager.
1004 m
->list_req_pending
= TRUE
;
1007 object
->absent_count
++;
1009 cluster_start
= offset
;
1011 cluster_size
= object
->cluster_size
;
1014 * Skip clustered pagein if it is globally disabled
1015 * or random page reference behavior is expected
1016 * for the address range containing the faulting
1017 * address or the object paging block size is
1018 * equal to the page size.
1020 if (!vm_allow_clustered_pagein
||
1021 behavior
== VM_BEHAVIOR_RANDOM
||
1022 cluster_size
== PAGE_SIZE
) {
1023 cluster_start
= trunc_page_64(cluster_start
);
1027 assert(offset
>= lo_offset
);
1028 assert(offset
< hi_offset
);
1029 assert(ALIGNED(object
->paging_offset
));
1030 assert(cluster_size
>= PAGE_SIZE
);
1033 dbgTrace(0xBEEF0011, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1036 * Decide whether to scan ahead or behind for
1037 * additional pages contiguous to the faulted
1038 * page in the same paging block. The decision
1039 * is based on system wide globals and the
1040 * expected page reference behavior of the
1041 * address range contained the faulting address.
1042 * First calculate some constants.
1044 paging_offset
= offset
+ object
->paging_offset
;
1045 cluster_offset
= paging_offset
& (cluster_size
- 1);
1046 align_offset
= paging_offset
&(PAGE_SIZE_64
-1);
1047 if (align_offset
!= 0) {
1048 cluster_offset
= trunc_page_64(cluster_offset
);
1051 #define SPANS_CLUSTER(x) ((((x) - align_offset) & (vm_object_offset_t)(cluster_size - 1)) == 0)
1054 * Backward scan only if reverse sequential
1055 * behavior has been specified
1057 CLUSTER_STAT(pages_at_lower_offsets
= 0;)
1058 if (((vm_default_behind
!= 0 &&
1059 behavior
== VM_BEHAVIOR_DEFAULT
) ||
1060 behavior
== VM_BEHAVIOR_RSEQNTL
) && offset
) {
1061 vm_object_offset_t cluster_bot
;
1064 * Calculate lower search boundary.
1065 * Exclude pages that span a cluster boundary.
1066 * Clip to start of map entry.
1067 * For default page reference behavior, scan
1068 * default pages behind.
1070 cluster_bot
= (offset
> cluster_offset
) ?
1071 offset
- cluster_offset
: offset
;
1072 if (align_offset
!= 0) {
1073 if ((cluster_bot
< offset
) &&
1074 SPANS_CLUSTER(cluster_bot
)) {
1075 cluster_bot
+= PAGE_SIZE_64
;
1078 if (behavior
== VM_BEHAVIOR_DEFAULT
) {
1080 bot
= (vm_object_offset_t
)
1081 (vm_default_behind
* PAGE_SIZE
);
1083 if (cluster_bot
< (offset
- bot
))
1084 cluster_bot
= offset
- bot
;
1086 if (lo_offset
> cluster_bot
)
1087 cluster_bot
= lo_offset
;
1089 for ( cluster_start
= offset
- PAGE_SIZE_64
;
1090 (cluster_start
>= cluster_bot
) &&
1092 (align_offset
- PAGE_SIZE_64
));
1093 cluster_start
-= PAGE_SIZE_64
) {
1094 assert(cluster_size
> PAGE_SIZE_64
);
1095 retry_cluster_backw
:
1096 if (!LOOK_FOR(object
, cluster_start
) ||
1097 vm_page_lookup(object
, cluster_start
)
1101 if (object
->internal
) {
1103 * need to acquire a real page in
1104 * advance because this acts as
1105 * a throttling mechanism for
1106 * data_requests to the default
1107 * pager. If this fails, give up
1108 * trying to find any more pages
1109 * in the cluster and send off the
1110 * request for what we already have.
1112 if ((m
= vm_page_grab())
1114 cluster_start
+= PAGE_SIZE_64
;
1115 cluster_end
= offset
+ PAGE_SIZE_64
;
1118 } else if ((m
= vm_page_grab_fictitious())
1120 vm_object_unlock(object
);
1121 vm_page_more_fictitious();
1122 vm_object_lock(object
);
1123 goto retry_cluster_backw
;
1127 m
->clustered
= TRUE
;
1128 m
->list_req_pending
= TRUE
;
1130 vm_page_insert(m
, object
, cluster_start
);
1131 CLUSTER_STAT(pages_at_lower_offsets
++;)
1132 object
->absent_count
++;
1134 cluster_start
+= PAGE_SIZE_64
;
1135 assert(cluster_start
>= cluster_bot
);
1137 assert(cluster_start
<= offset
);
1140 * Forward scan if default or sequential behavior
1143 CLUSTER_STAT(pages_at_higher_offsets
= 0;)
1144 if ((behavior
== VM_BEHAVIOR_DEFAULT
&&
1145 vm_default_ahead
!= 0) ||
1146 behavior
== VM_BEHAVIOR_SEQUENTIAL
) {
1147 vm_object_offset_t cluster_top
;
1150 * Calculate upper search boundary.
1151 * Exclude pages that span a cluster boundary.
1152 * Clip to end of map entry.
1153 * For default page reference behavior, scan
1154 * default pages ahead.
1156 cluster_top
= (offset
+ cluster_size
) -
1158 if (align_offset
!= 0) {
1159 if ((cluster_top
> (offset
+ PAGE_SIZE_64
)) &&
1160 SPANS_CLUSTER(cluster_top
)) {
1161 cluster_top
-= PAGE_SIZE_64
;
1164 if (behavior
== VM_BEHAVIOR_DEFAULT
) {
1165 vm_object_offset_t top
= (vm_object_offset_t
)
1166 ((vm_default_ahead
*PAGE_SIZE
)+PAGE_SIZE
);
1168 if (cluster_top
> (offset
+ top
))
1169 cluster_top
= offset
+ top
;
1171 if (cluster_top
> hi_offset
)
1172 cluster_top
= hi_offset
;
1174 for (cluster_end
= offset
+ PAGE_SIZE_64
;
1175 cluster_end
< cluster_top
;
1176 cluster_end
+= PAGE_SIZE_64
) {
1177 assert(cluster_size
> PAGE_SIZE
);
1179 if (!LOOK_FOR(object
, cluster_end
) ||
1180 vm_page_lookup(object
, cluster_end
)
1184 if (object
->internal
) {
1186 * need to acquire a real page in
1187 * advance because this acts as
1188 * a throttling mechanism for
1189 * data_requests to the default
1190 * pager. If this fails, give up
1191 * trying to find any more pages
1192 * in the cluster and send off the
1193 * request for what we already have.
1195 if ((m
= vm_page_grab())
1199 } else if ((m
= vm_page_grab_fictitious())
1201 vm_object_unlock(object
);
1202 vm_page_more_fictitious();
1203 vm_object_lock(object
);
1204 goto retry_cluster_forw
;
1208 m
->clustered
= TRUE
;
1209 m
->list_req_pending
= TRUE
;
1211 vm_page_insert(m
, object
, cluster_end
);
1212 CLUSTER_STAT(pages_at_higher_offsets
++;)
1213 object
->absent_count
++;
1215 assert(cluster_end
<= cluster_top
);
1218 cluster_end
= offset
+ PAGE_SIZE_64
;
1221 assert(cluster_end
>= offset
+ PAGE_SIZE_64
);
1222 length
= cluster_end
- cluster_start
;
1224 #if MACH_CLUSTER_STATS
1225 CLUSTER_STAT_HIGHER(pages_at_higher_offsets
);
1226 CLUSTER_STAT_LOWER(pages_at_lower_offsets
);
1227 CLUSTER_STAT_CLUSTER(length
/PAGE_SIZE
);
1228 #endif /* MACH_CLUSTER_STATS */
1232 dbgTrace(0xBEEF0012, (unsigned int) object
, (unsigned int) 0); /* (TEST/DEBUG) */
1235 * We have a busy page, so we can
1236 * release the object lock.
1238 vm_object_unlock(object
);
1241 * Call the memory manager to retrieve the data.
1245 *type_of_fault
= DBG_PAGEIN_FAULT
;
1247 current_task()->pageins
++;
1250 * If this object uses a copy_call strategy,
1251 * and we are interested in a copy of this object
1252 * (having gotten here only by following a
1253 * shadow chain), then tell the memory manager
1254 * via a flag added to the desired_access
1255 * parameter, so that it can detect a race
1256 * between our walking down the shadow chain
1257 * and its pushing pages up into a copy of
1258 * the object that it manages.
1261 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_CALL
&&
1262 object
!= first_object
) {
1263 wants_copy_flag
= VM_PROT_WANTS_COPY
;
1265 wants_copy_flag
= VM_PROT_NONE
;
1269 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1270 (integer_t
)object
, offset
, (integer_t
)m
,
1271 access_required
| wants_copy_flag
, 0);
1274 if (((rpc_subsystem_t
)pager_mux_hash_lookup(object
->pager
)) ==
1275 ((rpc_subsystem_t
) &vnode_pager_workaround
)) {
1276 rc
= vnode_pager_data_request(object
->pager
,
1277 object
->pager_request
,
1278 cluster_start
+ object
->paging_offset
,
1280 access_required
| wants_copy_flag
);
1282 rc
= memory_object_data_request(object
->pager
,
1283 object
->pager_request
,
1284 cluster_start
+ object
->paging_offset
,
1286 access_required
| wants_copy_flag
);
1289 rc
= memory_object_data_request(object
->pager
,
1290 object
->pager_request
,
1291 cluster_start
+ object
->paging_offset
,
1293 access_required
| wants_copy_flag
);
1298 dbgTrace(0xBEEF0013, (unsigned int) object
, (unsigned int) rc
); /* (TEST/DEBUG) */
1300 if (rc
!= KERN_SUCCESS
) {
1301 if (rc
!= MACH_SEND_INTERRUPTED
1303 printf("%s(0x%x, 0x%x, 0x%x, 0x%x, 0x%x) failed, rc=%d, object=0x%x\n",
1304 "memory_object_data_request",
1306 object
->pager_request
,
1307 cluster_start
+ object
->paging_offset
,
1308 length
, access_required
,
1311 * Don't want to leave a busy page around,
1312 * but the data request may have blocked,
1313 * so check if it's still there and busy.
1315 vm_object_lock(object
);
1317 length
-= PAGE_SIZE
,
1318 cluster_start
+= PAGE_SIZE_64
) {
1320 if ((p
= vm_page_lookup(object
,
1322 && p
->absent
&& p
->busy
1327 vm_fault_cleanup(object
, first_m
);
1328 cur_thread
->interruptible
= interruptible_state
;
1329 return((rc
== MACH_SEND_INTERRUPTED
) ?
1330 VM_FAULT_INTERRUPTED
:
1331 VM_FAULT_MEMORY_ERROR
);
1335 * Retry with same object/offset, since new data may
1336 * be in a different page (i.e., m is meaningless at
1339 vm_object_lock(object
);
1340 if ((interruptible
!= THREAD_UNINT
) &&
1341 (current_thread()->state
& TH_ABORT
)) {
1342 vm_fault_cleanup(object
, first_m
);
1343 cur_thread
->interruptible
= interruptible_state
;
1344 return(VM_FAULT_INTERRUPTED
);
1350 * The only case in which we get here is if
1351 * object has no pager (or unwiring). If the pager doesn't
1352 * have the page this is handled in the m->absent case above
1353 * (and if you change things here you should look above).
1356 dbgTrace(0xBEEF0014, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1358 if (object
== first_object
)
1361 assert(m
== VM_PAGE_NULL
);
1364 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1365 (integer_t
)object
, offset
, (integer_t
)m
,
1366 (integer_t
)object
->shadow
, 0);
1368 * Move on to the next object. Lock the next
1369 * object before unlocking the current one.
1371 next_object
= object
->shadow
;
1372 if (next_object
== VM_OBJECT_NULL
) {
1373 assert(!must_be_resident
);
1375 * If there's no object left, fill the page
1376 * in the top object with zeros. But first we
1377 * need to allocate a real page.
1380 if (object
!= first_object
) {
1381 vm_object_paging_end(object
);
1382 vm_object_unlock(object
);
1384 object
= first_object
;
1385 offset
= first_offset
;
1386 vm_object_lock(object
);
1390 assert(m
->object
== object
);
1391 first_m
= VM_PAGE_NULL
;
1393 if (object
->shadow_severed
) {
1395 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1396 cur_thread
->interruptible
= interruptible_state
;
1397 return VM_FAULT_MEMORY_ERROR
;
1400 if (VM_PAGE_THROTTLED() ||
1401 (m
->fictitious
&& !vm_page_convert(m
))) {
1403 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1404 cur_thread
->interruptible
= interruptible_state
;
1405 return(VM_FAULT_MEMORY_SHORTAGE
);
1408 if (!no_zero_fill
) {
1409 vm_object_unlock(object
);
1410 vm_page_zero_fill(m
);
1412 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
1413 VM_STAT(zero_fill_count
++);
1414 vm_object_lock(object
);
1416 vm_page_lock_queues();
1417 VM_PAGE_QUEUES_REMOVE(m
);
1418 queue_enter(&vm_page_queue_inactive
,
1419 m
, vm_page_t
, pageq
);
1421 vm_page_inactive_count
++;
1422 vm_page_unlock_queues();
1423 pmap_clear_modify(m
->phys_addr
);
1427 if ((object
!= first_object
) || must_be_resident
)
1428 vm_object_paging_end(object
);
1429 offset
+= object
->shadow_offset
;
1430 hi_offset
+= object
->shadow_offset
;
1431 lo_offset
+= object
->shadow_offset
;
1432 access_required
= VM_PROT_READ
;
1433 vm_object_lock(next_object
);
1434 vm_object_unlock(object
);
1435 object
= next_object
;
1436 vm_object_paging_begin(object
);
1441 * PAGE HAS BEEN FOUND.
1444 * busy, so that we can play with it;
1445 * not absent, so that nobody else will fill it;
1446 * possibly eligible for pageout;
1448 * The top-level page (first_m) is:
1449 * VM_PAGE_NULL if the page was found in the
1451 * busy, not absent, and ineligible for pageout.
1453 * The current object (object) is locked. A paging
1454 * reference is held for the current and top-level
1459 dbgTrace(0xBEEF0015, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1461 #if EXTRA_ASSERTIONS
1462 assert(m
->busy
&& !m
->absent
);
1463 assert((first_m
== VM_PAGE_NULL
) ||
1464 (first_m
->busy
&& !first_m
->absent
&&
1465 !first_m
->active
&& !first_m
->inactive
));
1466 #endif /* EXTRA_ASSERTIONS */
1469 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1470 (integer_t
)object
, offset
, (integer_t
)m
,
1471 (integer_t
)first_object
, (integer_t
)first_m
);
1473 * If the page is being written, but isn't
1474 * already owned by the top-level object,
1475 * we have to copy it into a new page owned
1476 * by the top-level object.
1479 if (object
!= first_object
) {
1481 * We only really need to copy if we
1486 dbgTrace(0xBEEF0016, (unsigned int) object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1488 if (fault_type
& VM_PROT_WRITE
) {
1491 assert(!must_be_resident
);
1494 * If we try to collapse first_object at this
1495 * point, we may deadlock when we try to get
1496 * the lock on an intermediate object (since we
1497 * have the bottom object locked). We can't
1498 * unlock the bottom object, because the page
1499 * we found may move (by collapse) if we do.
1501 * Instead, we first copy the page. Then, when
1502 * we have no more use for the bottom object,
1503 * we unlock it and try to collapse.
1505 * Note that we copy the page even if we didn't
1506 * need to... that's the breaks.
1510 * Allocate a page for the copy
1512 copy_m
= vm_page_grab();
1513 if (copy_m
== VM_PAGE_NULL
) {
1515 vm_fault_cleanup(object
, first_m
);
1516 cur_thread
->interruptible
= interruptible_state
;
1517 return(VM_FAULT_MEMORY_SHORTAGE
);
1522 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1523 (integer_t
)object
, offset
,
1524 (integer_t
)m
, (integer_t
)copy_m
, 0);
1525 vm_page_copy(m
, copy_m
);
1528 * If another map is truly sharing this
1529 * page with us, we have to flush all
1530 * uses of the original page, since we
1531 * can't distinguish those which want the
1532 * original from those which need the
1535 * XXXO If we know that only one map has
1536 * access to this page, then we could
1537 * avoid the pmap_page_protect() call.
1540 vm_page_lock_queues();
1541 assert(!m
->cleaning
);
1542 pmap_page_protect(m
->phys_addr
, VM_PROT_NONE
);
1543 vm_page_deactivate(m
);
1544 copy_m
->dirty
= TRUE
;
1546 * Setting reference here prevents this fault from
1547 * being counted as a (per-thread) reactivate as well
1548 * as a copy-on-write.
1550 first_m
->reference
= TRUE
;
1551 vm_page_unlock_queues();
1554 * We no longer need the old page or object.
1557 PAGE_WAKEUP_DONE(m
);
1558 vm_object_paging_end(object
);
1559 vm_object_unlock(object
);
1562 *type_of_fault
= DBG_COW_FAULT
;
1563 VM_STAT(cow_faults
++);
1564 current_task()->cow_faults
++;
1565 object
= first_object
;
1566 offset
= first_offset
;
1568 vm_object_lock(object
);
1569 VM_PAGE_FREE(first_m
);
1570 first_m
= VM_PAGE_NULL
;
1571 assert(copy_m
->busy
);
1572 vm_page_insert(copy_m
, object
, offset
);
1576 * Now that we've gotten the copy out of the
1577 * way, let's try to collapse the top object.
1578 * But we have to play ugly games with
1579 * paging_in_progress to do that...
1582 vm_object_paging_end(object
);
1583 vm_object_collapse(object
);
1584 vm_object_paging_begin(object
);
1588 *protection
&= (~VM_PROT_WRITE
);
1593 * Now check whether the page needs to be pushed into the
1594 * copy object. The use of asymmetric copy on write for
1595 * shared temporary objects means that we may do two copies to
1596 * satisfy the fault; one above to get the page from a
1597 * shadowed object, and one here to push it into the copy.
1600 while (first_object
->copy_strategy
== MEMORY_OBJECT_COPY_DELAY
&&
1601 (copy_object
= first_object
->copy
) != VM_OBJECT_NULL
) {
1602 vm_object_offset_t copy_offset
;
1606 dbgTrace(0xBEEF0017, (unsigned int) copy_object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1609 * If the page is being written, but hasn't been
1610 * copied to the copy-object, we have to copy it there.
1613 if ((fault_type
& VM_PROT_WRITE
) == 0) {
1614 *protection
&= ~VM_PROT_WRITE
;
1619 * If the page was guaranteed to be resident,
1620 * we must have already performed the copy.
1623 if (must_be_resident
)
1627 * Try to get the lock on the copy_object.
1629 if (!vm_object_lock_try(copy_object
)) {
1630 vm_object_unlock(object
);
1632 mutex_pause(); /* wait a bit */
1634 vm_object_lock(object
);
1639 * Make another reference to the copy-object,
1640 * to keep it from disappearing during the
1643 assert(copy_object
->ref_count
> 0);
1644 copy_object
->ref_count
++;
1645 VM_OBJ_RES_INCR(copy_object
);
1648 * Does the page exist in the copy?
1650 copy_offset
= first_offset
- copy_object
->shadow_offset
;
1651 if (copy_object
->size
<= copy_offset
)
1653 * Copy object doesn't cover this page -- do nothing.
1657 vm_page_lookup(copy_object
, copy_offset
)) != VM_PAGE_NULL
) {
1658 /* Page currently exists in the copy object */
1661 * If the page is being brought
1662 * in, wait for it and then retry.
1665 /* take an extra ref so object won't die */
1666 assert(copy_object
->ref_count
> 0);
1667 copy_object
->ref_count
++;
1668 vm_object_res_reference(copy_object
);
1669 vm_object_unlock(copy_object
);
1670 vm_fault_cleanup(object
, first_m
);
1671 counter(c_vm_fault_page_block_backoff_kernel
++);
1672 vm_object_lock(copy_object
);
1673 assert(copy_object
->ref_count
> 0);
1674 VM_OBJ_RES_DECR(copy_object
);
1675 copy_object
->ref_count
--;
1676 assert(copy_object
->ref_count
> 0);
1677 copy_m
= vm_page_lookup(copy_object
, copy_offset
);
1678 if (copy_m
!= VM_PAGE_NULL
&& copy_m
->busy
) {
1679 PAGE_ASSERT_WAIT(copy_m
, interruptible
);
1680 vm_object_unlock(copy_object
);
1681 wait_result
= thread_block((void (*)(void))0);
1682 vm_object_deallocate(copy_object
);
1685 vm_object_unlock(copy_object
);
1686 vm_object_deallocate(copy_object
);
1687 cur_thread
->interruptible
= interruptible_state
;
1688 return VM_FAULT_RETRY
;
1692 else if (!PAGED_OUT(copy_object
, copy_offset
)) {
1694 * If PAGED_OUT is TRUE, then the page used to exist
1695 * in the copy-object, and has already been paged out.
1696 * We don't need to repeat this. If PAGED_OUT is
1697 * FALSE, then either we don't know (!pager_created,
1698 * for example) or it hasn't been paged out.
1699 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1700 * We must copy the page to the copy object.
1704 * Allocate a page for the copy
1706 copy_m
= vm_page_alloc(copy_object
, copy_offset
);
1707 if (copy_m
== VM_PAGE_NULL
) {
1709 VM_OBJ_RES_DECR(copy_object
);
1710 copy_object
->ref_count
--;
1711 assert(copy_object
->ref_count
> 0);
1712 vm_object_unlock(copy_object
);
1713 vm_fault_cleanup(object
, first_m
);
1714 cur_thread
->interruptible
= interruptible_state
;
1715 return(VM_FAULT_MEMORY_SHORTAGE
);
1719 * Must copy page into copy-object.
1722 vm_page_copy(m
, copy_m
);
1725 * If the old page was in use by any users
1726 * of the copy-object, it must be removed
1727 * from all pmaps. (We can't know which
1731 vm_page_lock_queues();
1732 assert(!m
->cleaning
);
1733 pmap_page_protect(m
->phys_addr
, VM_PROT_NONE
);
1734 copy_m
->dirty
= TRUE
;
1735 vm_page_unlock_queues();
1738 * If there's a pager, then immediately
1739 * page out this page, using the "initialize"
1740 * option. Else, we use the copy.
1745 ((!copy_object
->pager_created
) ||
1746 vm_external_state_get(
1747 copy_object
->existence_map
, copy_offset
)
1748 == VM_EXTERNAL_STATE_ABSENT
)
1750 (!copy_object
->pager_created
)
1753 vm_page_lock_queues();
1754 vm_page_activate(copy_m
);
1755 vm_page_unlock_queues();
1756 PAGE_WAKEUP_DONE(copy_m
);
1759 assert(copy_m
->busy
== TRUE
);
1762 * The page is already ready for pageout:
1763 * not on pageout queues and busy.
1764 * Unlock everything except the
1765 * copy_object itself.
1768 vm_object_unlock(object
);
1771 * Write the page to the copy-object,
1772 * flushing it from the kernel.
1775 vm_pageout_initialize_page(copy_m
);
1778 * Since the pageout may have
1779 * temporarily dropped the
1780 * copy_object's lock, we
1781 * check whether we'll have
1782 * to deallocate the hard way.
1785 if ((copy_object
->shadow
!= object
) ||
1786 (copy_object
->ref_count
== 1)) {
1787 vm_object_unlock(copy_object
);
1788 vm_object_deallocate(copy_object
);
1789 vm_object_lock(object
);
1794 * Pick back up the old object's
1795 * lock. [It is safe to do so,
1796 * since it must be deeper in the
1800 vm_object_lock(object
);
1804 * Because we're pushing a page upward
1805 * in the object tree, we must restart
1806 * any faults that are waiting here.
1807 * [Note that this is an expansion of
1808 * PAGE_WAKEUP that uses the THREAD_RESTART
1809 * wait result]. Can't turn off the page's
1810 * busy bit because we're not done with it.
1815 thread_wakeup_with_result((event_t
) m
,
1821 * The reference count on copy_object must be
1822 * at least 2: one for our extra reference,
1823 * and at least one from the outside world
1824 * (we checked that when we last locked
1827 copy_object
->ref_count
--;
1828 assert(copy_object
->ref_count
> 0);
1829 VM_OBJ_RES_DECR(copy_object
);
1830 vm_object_unlock(copy_object
);
1836 *top_page
= first_m
;
1839 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1840 (integer_t
)object
, offset
, (integer_t
)m
, (integer_t
)first_m
, 0);
1842 * If the page can be written, assume that it will be.
1843 * [Earlier, we restrict the permission to allow write
1844 * access only if the fault so required, so we don't
1845 * mark read-only data as dirty.]
1848 #if !VM_FAULT_STATIC_CONFIG
1849 if (vm_fault_dirty_handling
&& (*protection
& VM_PROT_WRITE
))
1853 dbgTrace(0xBEEF0018, (unsigned int) object
, (unsigned int) vm_page_deactivate_behind
); /* (TEST/DEBUG) */
1855 if (vm_page_deactivate_behind
) {
1856 if (offset
&& /* don't underflow */
1857 (object
->last_alloc
== (offset
- PAGE_SIZE_64
))) {
1858 m
= vm_page_lookup(object
, object
->last_alloc
);
1859 if ((m
!= VM_PAGE_NULL
) && !m
->busy
) {
1860 vm_page_lock_queues();
1861 vm_page_deactivate(m
);
1862 vm_page_unlock_queues();
1865 dbgTrace(0xBEEF0019, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1868 object
->last_alloc
= offset
;
1871 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS
, 0); /* (TEST/DEBUG) */
1873 cur_thread
->interruptible
= interruptible_state
;
1874 return(VM_FAULT_SUCCESS
);
1878 vm_fault_cleanup(object
, first_m
);
1880 counter(c_vm_fault_page_block_backoff_kernel
++);
1881 thread_block((void (*)(void))0);
1885 cur_thread
->interruptible
= interruptible_state
;
1886 if (wait_result
== THREAD_INTERRUPTED
)
1887 return VM_FAULT_INTERRUPTED
;
1888 return VM_FAULT_RETRY
;
1896 * Handle page faults, including pseudo-faults
1897 * used to change the wiring status of pages.
1899 * Explicit continuations have been removed.
1901 * vm_fault and vm_fault_page save mucho state
1902 * in the moral equivalent of a closure. The state
1903 * structure is allocated when first entering vm_fault
1904 * and deallocated when leaving vm_fault.
1911 vm_prot_t fault_type
,
1912 boolean_t change_wiring
,
1915 vm_map_version_t version
; /* Map version for verificiation */
1916 boolean_t wired
; /* Should mapping be wired down? */
1917 vm_object_t object
; /* Top-level object */
1918 vm_object_offset_t offset
; /* Top-level offset */
1919 vm_prot_t prot
; /* Protection for mapping */
1920 vm_behavior_t behavior
; /* Expected paging behavior */
1921 vm_object_offset_t lo_offset
, hi_offset
;
1922 vm_object_t old_copy_object
; /* Saved copy object */
1923 vm_page_t result_page
; /* Result of vm_fault_page */
1924 vm_page_t top_page
; /* Placeholder page */
1928 vm_page_t m
; /* Fast access to result_page */
1929 kern_return_t error_code
; /* page error reasons */
1931 vm_object_t cur_object
;
1933 vm_object_offset_t cur_offset
;
1935 vm_object_t new_object
;
1937 vm_map_t pmap_map
= map
;
1938 vm_map_t original_map
= map
;
1940 boolean_t funnel_set
= FALSE
;
1942 thread_t cur_thread
;
1943 boolean_t interruptible_state
;
1946 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_START
,
1953 cur_thread
= current_thread();
1955 interruptible_state
= cur_thread
->interruptible
;
1956 if (interruptible
== THREAD_UNINT
)
1957 cur_thread
->interruptible
= FALSE
;
1960 * assume we will hit a page in the cache
1961 * otherwise, explicitly override with
1962 * the real fault type once we determine it
1964 type_of_fault
= DBG_CACHE_HIT_FAULT
;
1967 current_task()->faults
++;
1970 * drop funnel if it is already held. Then restore while returning
1972 if ((cur_thread
->funnel_state
& TH_FN_OWNED
) == TH_FN_OWNED
) {
1974 curflock
= cur_thread
->funnel_lock
;
1975 thread_funnel_set( curflock
, FALSE
);
1981 * Find the backing store object and offset into
1982 * it to begin the search.
1985 vm_map_lock_read(map
);
1986 kr
= vm_map_lookup_locked(&map
, vaddr
, fault_type
, &version
,
1989 &behavior
, &lo_offset
, &hi_offset
, &pmap_map
);
1991 pmap
= pmap_map
->pmap
;
1993 if (kr
!= KERN_SUCCESS
) {
1994 vm_map_unlock_read(map
);
1999 * If the page is wired, we must fault for the current protection
2000 * value, to avoid further faults.
2004 fault_type
= prot
| VM_PROT_WRITE
;
2006 #if VM_FAULT_CLASSIFY
2008 * Temporary data gathering code
2010 vm_fault_classify(object
, offset
, fault_type
);
2013 * Fast fault code. The basic idea is to do as much as
2014 * possible while holding the map lock and object locks.
2015 * Busy pages are not used until the object lock has to
2016 * be dropped to do something (copy, zero fill, pmap enter).
2017 * Similarly, paging references aren't acquired until that
2018 * point, and object references aren't used.
2020 * If we can figure out what to do
2021 * (zero fill, copy on write, pmap enter) while holding
2022 * the locks, then it gets done. Otherwise, we give up,
2023 * and use the original fault path (which doesn't hold
2024 * the map lock, and relies on busy pages).
2025 * The give up cases include:
2026 * - Have to talk to pager.
2027 * - Page is busy, absent or in error.
2028 * - Pager has locked out desired access.
2029 * - Fault needs to be restarted.
2030 * - Have to push page into copy object.
2032 * The code is an infinite loop that moves one level down
2033 * the shadow chain each time. cur_object and cur_offset
2034 * refer to the current object being examined. object and offset
2035 * are the original object from the map. The loop is at the
2036 * top level if and only if object and cur_object are the same.
2038 * Invariants: Map lock is held throughout. Lock is held on
2039 * original object and cur_object (if different) when
2040 * continuing or exiting loop.
2046 * If this page is to be inserted in a copy delay object
2047 * for writing, and if the object has a copy, then the
2048 * copy delay strategy is implemented in the slow fault page.
2050 if (object
->copy_strategy
!= MEMORY_OBJECT_COPY_DELAY
||
2051 object
->copy
== VM_OBJECT_NULL
||
2052 (fault_type
& VM_PROT_WRITE
) == 0) {
2053 cur_object
= object
;
2054 cur_offset
= offset
;
2057 m
= vm_page_lookup(cur_object
, cur_offset
);
2058 if (m
!= VM_PAGE_NULL
) {
2062 if (m
->unusual
&& (m
->error
|| m
->restart
||
2063 m
->absent
|| (fault_type
& m
->page_lock
))) {
2066 * Unusual case. Give up.
2072 * Two cases of map in faults:
2073 * - At top level w/o copy object.
2074 * - Read fault anywhere.
2075 * --> must disallow write.
2078 if (object
== cur_object
&&
2079 object
->copy
== VM_OBJECT_NULL
)
2080 goto FastMapInFault
;
2082 if ((fault_type
& VM_PROT_WRITE
) == 0) {
2084 prot
&= ~VM_PROT_WRITE
;
2087 * Set up to map the page ...
2088 * mark the page busy, drop
2089 * locks and take a paging reference
2090 * on the object with the page.
2093 if (object
!= cur_object
) {
2094 vm_object_unlock(object
);
2095 object
= cur_object
;
2100 vm_object_paging_begin(object
);
2101 vm_object_unlock(object
);
2105 * Check a couple of global reasons to
2106 * be conservative about write access.
2107 * Then do the pmap_enter.
2109 #if !VM_FAULT_STATIC_CONFIG
2110 if (vm_fault_dirty_handling
2112 || db_watchpoint_list
2114 && (fault_type
& VM_PROT_WRITE
) == 0)
2115 prot
&= ~VM_PROT_WRITE
;
2116 #else /* STATIC_CONFIG */
2118 if (db_watchpoint_list
2119 && (fault_type
& VM_PROT_WRITE
) == 0)
2120 prot
&= ~VM_PROT_WRITE
;
2121 #endif /* MACH_KDB */
2122 #endif /* STATIC_CONFIG */
2123 PMAP_ENTER(pmap
, vaddr
, m
, prot
, wired
);
2124 pmap_attribute(pmap
,
2131 vm_pagein_cluster_used
++;
2132 m
->clustered
= FALSE
;
2136 * Grab the object lock to manipulate
2137 * the page queues. Change wiring
2138 * case is obvious. In soft ref bits
2139 * case activate page only if it fell
2140 * off paging queues, otherwise just
2141 * activate it if it's inactive.
2143 * NOTE: original vm_fault code will
2144 * move active page to back of active
2145 * queue. This code doesn't.
2147 vm_object_lock(object
);
2148 vm_page_lock_queues();
2150 m
->reference
= TRUE
;
2152 if (change_wiring
) {
2158 #if VM_FAULT_STATIC_CONFIG
2160 if (!m
->active
&& !m
->inactive
)
2161 vm_page_activate(m
);
2164 else if (software_reference_bits
) {
2165 if (!m
->active
&& !m
->inactive
)
2166 vm_page_activate(m
);
2168 else if (!m
->active
) {
2169 vm_page_activate(m
);
2172 vm_page_unlock_queues();
2175 * That's it, clean up and return.
2177 PAGE_WAKEUP_DONE(m
);
2178 vm_object_paging_end(object
);
2179 vm_object_unlock(object
);
2180 vm_map_unlock_read(map
);
2182 vm_map_unlock(pmap_map
);
2185 thread_funnel_set( curflock
, TRUE
);
2188 cur_thread
->interruptible
= interruptible_state
;
2190 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
2196 return KERN_SUCCESS
;
2200 * Copy on write fault. If objects match, then
2201 * object->copy must not be NULL (else control
2202 * would be in previous code block), and we
2203 * have a potential push into the copy object
2204 * with which we won't cope here.
2207 if (cur_object
== object
)
2211 * This is now a shadow based copy on write
2212 * fault -- it requires a copy up the shadow
2215 * Allocate a page in the original top level
2216 * object. Give up if allocate fails. Also
2217 * need to remember current page, as it's the
2218 * source of the copy.
2222 if (m
== VM_PAGE_NULL
) {
2227 * Now do the copy. Mark the source busy
2228 * and take out paging references on both
2231 * NOTE: This code holds the map lock across
2236 vm_page_copy(cur_m
, m
);
2237 vm_page_insert(m
, object
, offset
);
2239 vm_object_paging_begin(cur_object
);
2240 vm_object_paging_begin(object
);
2242 type_of_fault
= DBG_COW_FAULT
;
2243 VM_STAT(cow_faults
++);
2244 current_task()->cow_faults
++;
2247 * Now cope with the source page and object
2248 * If the top object has a ref count of 1
2249 * then no other map can access it, and hence
2250 * it's not necessary to do the pmap_page_protect.
2254 vm_page_lock_queues();
2255 vm_page_deactivate(cur_m
);
2257 pmap_page_protect(cur_m
->phys_addr
,
2259 vm_page_unlock_queues();
2261 PAGE_WAKEUP_DONE(cur_m
);
2262 vm_object_paging_end(cur_object
);
2263 vm_object_unlock(cur_object
);
2266 * Slight hack to call vm_object collapse
2267 * and then reuse common map in code.
2268 * note that the object lock was taken above.
2271 vm_object_paging_end(object
);
2272 vm_object_collapse(object
);
2273 vm_object_paging_begin(object
);
2274 vm_object_unlock(object
);
2281 * No page at cur_object, cur_offset
2284 if (cur_object
->pager_created
) {
2287 * Have to talk to the pager. Give up.
2294 if (cur_object
->shadow
== VM_OBJECT_NULL
) {
2296 if (cur_object
->shadow_severed
) {
2297 vm_object_paging_end(object
);
2298 vm_object_unlock(object
);
2299 vm_map_unlock_read(map
);
2301 vm_map_unlock(pmap_map
);
2304 thread_funnel_set( curflock
, TRUE
);
2307 cur_thread
->interruptible
= interruptible_state
;
2309 return VM_FAULT_MEMORY_ERROR
;
2313 * Zero fill fault. Page gets
2314 * filled in top object. Insert
2315 * page, then drop any lower lock.
2316 * Give up if no page.
2318 if ((vm_page_free_target
-
2319 ((vm_page_free_target
-vm_page_free_min
)>>2))
2320 > vm_page_free_count
) {
2323 m
= vm_page_alloc(object
, offset
);
2324 if (m
== VM_PAGE_NULL
) {
2328 if (cur_object
!= object
)
2329 vm_object_unlock(cur_object
);
2331 vm_object_paging_begin(object
);
2332 vm_object_unlock(object
);
2335 * Now zero fill page and map it.
2336 * the page is probably going to
2337 * be written soon, so don't bother
2338 * to clear the modified bit
2340 * NOTE: This code holds the map
2341 * lock across the zero fill.
2344 if (!map
->no_zero_fill
) {
2345 vm_page_zero_fill(m
);
2346 type_of_fault
= DBG_ZERO_FILL_FAULT
;
2347 VM_STAT(zero_fill_count
++);
2349 vm_page_lock_queues();
2350 VM_PAGE_QUEUES_REMOVE(m
);
2351 queue_enter(&vm_page_queue_inactive
,
2352 m
, vm_page_t
, pageq
);
2354 vm_page_inactive_count
++;
2355 vm_page_unlock_queues();
2360 * On to the next level
2363 cur_offset
+= cur_object
->shadow_offset
;
2364 new_object
= cur_object
->shadow
;
2365 vm_object_lock(new_object
);
2366 if (cur_object
!= object
)
2367 vm_object_unlock(cur_object
);
2368 cur_object
= new_object
;
2375 * Cleanup from fast fault failure. Drop any object
2376 * lock other than original and drop map lock.
2379 if (object
!= cur_object
)
2380 vm_object_unlock(cur_object
);
2382 vm_map_unlock_read(map
);
2384 vm_map_unlock(pmap_map
);
2387 * Make a reference to this object to
2388 * prevent its disposal while we are messing with
2389 * it. Once we have the reference, the map is free
2390 * to be diddled. Since objects reference their
2391 * shadows (and copies), they will stay around as well.
2394 assert(object
->ref_count
> 0);
2395 object
->ref_count
++;
2396 vm_object_res_reference(object
);
2397 vm_object_paging_begin(object
);
2399 XPR(XPR_VM_FAULT
,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2400 kr
= vm_fault_page(object
, offset
, fault_type
,
2401 (change_wiring
&& !wired
),
2403 lo_offset
, hi_offset
, behavior
,
2404 &prot
, &result_page
, &top_page
,
2406 &error_code
, map
->no_zero_fill
, FALSE
);
2409 * If we didn't succeed, lose the object reference immediately.
2412 if (kr
!= VM_FAULT_SUCCESS
)
2413 vm_object_deallocate(object
);
2416 * See why we failed, and take corrective action.
2420 case VM_FAULT_SUCCESS
:
2422 case VM_FAULT_MEMORY_SHORTAGE
:
2423 if (vm_page_wait((change_wiring
) ?
2428 case VM_FAULT_INTERRUPTED
:
2431 case VM_FAULT_RETRY
:
2433 case VM_FAULT_FICTITIOUS_SHORTAGE
:
2434 vm_page_more_fictitious();
2436 case VM_FAULT_MEMORY_ERROR
:
2440 kr
= KERN_MEMORY_ERROR
;
2446 assert((change_wiring
&& !wired
) ?
2447 (top_page
== VM_PAGE_NULL
) :
2448 ((top_page
== VM_PAGE_NULL
) == (m
->object
== object
)));
2451 * How to clean up the result of vm_fault_page. This
2452 * happens whether the mapping is entered or not.
2455 #define UNLOCK_AND_DEALLOCATE \
2457 vm_fault_cleanup(m->object, top_page); \
2458 vm_object_deallocate(object); \
2462 * What to do with the resulting page from vm_fault_page
2463 * if it doesn't get entered into the physical map:
2466 #define RELEASE_PAGE(m) \
2468 PAGE_WAKEUP_DONE(m); \
2469 vm_page_lock_queues(); \
2470 if (!m->active && !m->inactive) \
2471 vm_page_activate(m); \
2472 vm_page_unlock_queues(); \
2476 * We must verify that the maps have not changed
2477 * since our last lookup.
2480 old_copy_object
= m
->object
->copy
;
2482 vm_object_unlock(m
->object
);
2483 if ((map
!= original_map
) || !vm_map_verify(map
, &version
)) {
2484 vm_object_t retry_object
;
2485 vm_object_offset_t retry_offset
;
2486 vm_prot_t retry_prot
;
2489 * To avoid trying to write_lock the map while another
2490 * thread has it read_locked (in vm_map_pageable), we
2491 * do not try for write permission. If the page is
2492 * still writable, we will get write permission. If it
2493 * is not, or has been marked needs_copy, we enter the
2494 * mapping without write permission, and will merely
2495 * take another fault.
2498 vm_map_lock_read(map
);
2499 kr
= vm_map_lookup_locked(&map
, vaddr
,
2500 fault_type
& ~VM_PROT_WRITE
, &version
,
2501 &retry_object
, &retry_offset
, &retry_prot
,
2502 &wired
, &behavior
, &lo_offset
, &hi_offset
,
2504 pmap
= pmap_map
->pmap
;
2506 if (kr
!= KERN_SUCCESS
) {
2507 vm_map_unlock_read(map
);
2508 vm_object_lock(m
->object
);
2510 UNLOCK_AND_DEALLOCATE
;
2514 vm_object_unlock(retry_object
);
2515 vm_object_lock(m
->object
);
2517 if ((retry_object
!= object
) ||
2518 (retry_offset
!= offset
)) {
2519 vm_map_unlock_read(map
);
2521 vm_map_unlock(pmap_map
);
2523 UNLOCK_AND_DEALLOCATE
;
2528 * Check whether the protection has changed or the object
2529 * has been copied while we left the map unlocked.
2532 vm_object_unlock(m
->object
);
2534 vm_object_lock(m
->object
);
2537 * If the copy object changed while the top-level object
2538 * was unlocked, then we must take away write permission.
2541 if (m
->object
->copy
!= old_copy_object
)
2542 prot
&= ~VM_PROT_WRITE
;
2545 * If we want to wire down this page, but no longer have
2546 * adequate permissions, we must start all over.
2549 if (wired
&& (fault_type
!= (prot
|VM_PROT_WRITE
))) {
2550 vm_map_verify_done(map
, &version
);
2552 vm_map_unlock(pmap_map
);
2554 UNLOCK_AND_DEALLOCATE
;
2559 * It's critically important that a wired-down page be faulted
2560 * only once in each map for which it is wired.
2562 vm_object_unlock(m
->object
);
2565 * Put this page into the physical map.
2566 * We had to do the unlock above because pmap_enter
2567 * may cause other faults. The page may be on
2568 * the pageout queues. If the pageout daemon comes
2569 * across the page, it will remove it from the queues.
2571 PMAP_ENTER(pmap
, vaddr
, m
, prot
, wired
);
2573 /* Sync I & D caches for new mapping*/
2574 pmap_attribute(pmap
,
2581 * If the page is not wired down and isn't already
2582 * on a pageout queue, then put it where the
2583 * pageout daemon can find it.
2585 vm_object_lock(m
->object
);
2586 vm_page_lock_queues();
2587 if (change_wiring
) {
2593 #if VM_FAULT_STATIC_CONFIG
2595 if (!m
->active
&& !m
->inactive
)
2596 vm_page_activate(m
);
2597 m
->reference
= TRUE
;
2600 else if (software_reference_bits
) {
2601 if (!m
->active
&& !m
->inactive
)
2602 vm_page_activate(m
);
2603 m
->reference
= TRUE
;
2605 vm_page_activate(m
);
2608 vm_page_unlock_queues();
2611 * Unlock everything, and return
2614 vm_map_verify_done(map
, &version
);
2616 vm_map_unlock(pmap_map
);
2617 PAGE_WAKEUP_DONE(m
);
2619 UNLOCK_AND_DEALLOCATE
;
2621 #undef UNLOCK_AND_DEALLOCATE
2626 thread_funnel_set( curflock
, TRUE
);
2629 cur_thread
->interruptible
= interruptible_state
;
2631 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
2643 * Wire down a range of virtual addresses in a map.
2648 vm_map_entry_t entry
,
2652 register vm_offset_t va
;
2653 register vm_offset_t end_addr
= entry
->vme_end
;
2654 register kern_return_t rc
;
2656 assert(entry
->in_transition
);
2659 * Inform the physical mapping system that the
2660 * range of addresses may not fault, so that
2661 * page tables and such can be locked down as well.
2664 pmap_pageable(pmap
, entry
->vme_start
, end_addr
, FALSE
);
2667 * We simulate a fault to get the page and enter it
2668 * in the physical map.
2671 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
2672 if ((rc
= vm_fault_wire_fast(
2673 map
, va
, entry
, pmap
)) != KERN_SUCCESS
) {
2674 rc
= vm_fault(map
, va
, VM_PROT_NONE
, TRUE
,
2675 (pmap
== kernel_pmap
) ? THREAD_UNINT
: THREAD_ABORTSAFE
);
2678 if (rc
!= KERN_SUCCESS
) {
2679 struct vm_map_entry tmp_entry
= *entry
;
2681 /* unwire wired pages */
2682 tmp_entry
.vme_end
= va
;
2683 vm_fault_unwire(map
, &tmp_entry
, FALSE
, pmap
);
2688 return KERN_SUCCESS
;
2694 * Unwire a range of virtual addresses in a map.
2699 vm_map_entry_t entry
,
2700 boolean_t deallocate
,
2703 register vm_offset_t va
;
2704 register vm_offset_t end_addr
= entry
->vme_end
;
2707 object
= (entry
->is_sub_map
)
2708 ? VM_OBJECT_NULL
: entry
->object
.vm_object
;
2711 * Since the pages are wired down, we must be able to
2712 * get their mappings from the physical map system.
2715 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
2716 pmap_change_wiring(pmap
, va
, FALSE
);
2718 if (object
== VM_OBJECT_NULL
) {
2719 (void) vm_fault(map
, va
, VM_PROT_NONE
, TRUE
, THREAD_UNINT
);
2722 vm_page_t result_page
;
2724 vm_object_t result_object
;
2725 vm_fault_return_t result
;
2728 prot
= VM_PROT_NONE
;
2730 vm_object_lock(object
);
2731 vm_object_paging_begin(object
);
2733 "vm_fault_unwire -> vm_fault_page\n",
2735 result
= vm_fault_page(object
,
2737 (va
- entry
->vme_start
),
2743 - entry
->vme_start
),
2749 0, map
->no_zero_fill
,
2751 } while (result
== VM_FAULT_RETRY
);
2753 if (result
!= VM_FAULT_SUCCESS
)
2754 panic("vm_fault_unwire: failure");
2756 result_object
= result_page
->object
;
2758 assert(!result_page
->fictitious
);
2759 pmap_page_protect(result_page
->phys_addr
,
2761 VM_PAGE_FREE(result_page
);
2763 vm_page_lock_queues();
2764 vm_page_unwire(result_page
);
2765 vm_page_unlock_queues();
2766 PAGE_WAKEUP_DONE(result_page
);
2769 vm_fault_cleanup(result_object
, top_page
);
2774 * Inform the physical mapping system that the range
2775 * of addresses may fault, so that page tables and
2776 * such may be unwired themselves.
2779 pmap_pageable(pmap
, entry
->vme_start
, end_addr
, TRUE
);
2784 * vm_fault_wire_fast:
2786 * Handle common case of a wire down page fault at the given address.
2787 * If successful, the page is inserted into the associated physical map.
2788 * The map entry is passed in to avoid the overhead of a map lookup.
2790 * NOTE: the given address should be truncated to the
2791 * proper page address.
2793 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
2794 * a standard error specifying why the fault is fatal is returned.
2796 * The map in question must be referenced, and remains so.
2797 * Caller has a read lock on the map.
2799 * This is a stripped version of vm_fault() for wiring pages. Anything
2800 * other than the common case will return KERN_FAILURE, and the caller
2801 * is expected to call vm_fault().
2807 vm_map_entry_t entry
,
2811 vm_object_offset_t offset
;
2812 register vm_page_t m
;
2814 thread_act_t thr_act
;
2818 if((thr_act
=current_act()) && (thr_act
->task
!= TASK_NULL
))
2819 thr_act
->task
->faults
++;
2826 #define RELEASE_PAGE(m) { \
2827 PAGE_WAKEUP_DONE(m); \
2828 vm_page_lock_queues(); \
2829 vm_page_unwire(m); \
2830 vm_page_unlock_queues(); \
2834 #undef UNLOCK_THINGS
2835 #define UNLOCK_THINGS { \
2836 object->paging_in_progress--; \
2837 vm_object_unlock(object); \
2840 #undef UNLOCK_AND_DEALLOCATE
2841 #define UNLOCK_AND_DEALLOCATE { \
2843 vm_object_deallocate(object); \
2846 * Give up and have caller do things the hard way.
2850 UNLOCK_AND_DEALLOCATE; \
2851 return(KERN_FAILURE); \
2856 * If this entry is not directly to a vm_object, bail out.
2858 if (entry
->is_sub_map
)
2859 return(KERN_FAILURE
);
2862 * Find the backing store object and offset into it.
2865 object
= entry
->object
.vm_object
;
2866 offset
= (va
- entry
->vme_start
) + entry
->offset
;
2867 prot
= entry
->protection
;
2870 * Make a reference to this object to prevent its
2871 * disposal while we are messing with it.
2874 vm_object_lock(object
);
2875 assert(object
->ref_count
> 0);
2876 object
->ref_count
++;
2877 vm_object_res_reference(object
);
2878 object
->paging_in_progress
++;
2881 * INVARIANTS (through entire routine):
2883 * 1) At all times, we must either have the object
2884 * lock or a busy page in some object to prevent
2885 * some other thread from trying to bring in
2888 * 2) Once we have a busy page, we must remove it from
2889 * the pageout queues, so that the pageout daemon
2890 * will not grab it away.
2895 * Look for page in top-level object. If it's not there or
2896 * there's something going on, give up.
2898 m
= vm_page_lookup(object
, offset
);
2899 if ((m
== VM_PAGE_NULL
) || (m
->busy
) ||
2900 (m
->unusual
&& ( m
->error
|| m
->restart
|| m
->absent
||
2901 prot
& m
->page_lock
))) {
2907 * Wire the page down now. All bail outs beyond this
2908 * point must unwire the page.
2911 vm_page_lock_queues();
2913 vm_page_unlock_queues();
2916 * Mark page busy for other threads.
2923 * Give up if the page is being written and there's a copy object
2925 if ((object
->copy
!= VM_OBJECT_NULL
) && (prot
& VM_PROT_WRITE
)) {
2931 * Put this page into the physical map.
2932 * We have to unlock the object because pmap_enter
2933 * may cause other faults.
2935 vm_object_unlock(object
);
2937 PMAP_ENTER(pmap
, va
, m
, prot
, TRUE
);
2938 /* Sync I & D caches for new mapping */
2939 pmap_attribute(pmap
,
2946 * Must relock object so that paging_in_progress can be cleared.
2948 vm_object_lock(object
);
2951 * Unlock everything, and return
2954 PAGE_WAKEUP_DONE(m
);
2955 UNLOCK_AND_DEALLOCATE
;
2957 return(KERN_SUCCESS
);
2962 * Routine: vm_fault_copy_cleanup
2964 * Release a page used by vm_fault_copy.
2968 vm_fault_copy_cleanup(
2972 vm_object_t object
= page
->object
;
2974 vm_object_lock(object
);
2975 PAGE_WAKEUP_DONE(page
);
2976 vm_page_lock_queues();
2977 if (!page
->active
&& !page
->inactive
)
2978 vm_page_activate(page
);
2979 vm_page_unlock_queues();
2980 vm_fault_cleanup(object
, top_page
);
2984 vm_fault_copy_dst_cleanup(
2989 if (page
!= VM_PAGE_NULL
) {
2990 object
= page
->object
;
2991 vm_object_lock(object
);
2992 vm_page_lock_queues();
2993 vm_page_unwire(page
);
2994 vm_page_unlock_queues();
2995 vm_object_paging_end(object
);
2996 vm_object_unlock(object
);
3001 * Routine: vm_fault_copy
3004 * Copy pages from one virtual memory object to another --
3005 * neither the source nor destination pages need be resident.
3007 * Before actually copying a page, the version associated with
3008 * the destination address map wil be verified.
3010 * In/out conditions:
3011 * The caller must hold a reference, but not a lock, to
3012 * each of the source and destination objects and to the
3016 * Returns KERN_SUCCESS if no errors were encountered in
3017 * reading or writing the data. Returns KERN_INTERRUPTED if
3018 * the operation was interrupted (only possible if the
3019 * "interruptible" argument is asserted). Other return values
3020 * indicate a permanent error in copying the data.
3022 * The actual amount of data copied will be returned in the
3023 * "copy_size" argument. In the event that the destination map
3024 * verification failed, this amount may be less than the amount
3029 vm_object_t src_object
,
3030 vm_object_offset_t src_offset
,
3031 vm_size_t
*src_size
, /* INOUT */
3032 vm_object_t dst_object
,
3033 vm_object_offset_t dst_offset
,
3035 vm_map_version_t
*dst_version
,
3038 vm_page_t result_page
;
3041 vm_page_t src_top_page
;
3045 vm_page_t dst_top_page
;
3048 vm_size_t amount_left
;
3049 vm_object_t old_copy_object
;
3050 kern_return_t error
= 0;
3052 vm_size_t part_size
;
3055 * In order not to confuse the clustered pageins, align
3056 * the different offsets on a page boundary.
3058 vm_object_offset_t src_lo_offset
= trunc_page_64(src_offset
);
3059 vm_object_offset_t dst_lo_offset
= trunc_page_64(dst_offset
);
3060 vm_object_offset_t src_hi_offset
= round_page_64(src_offset
+ *src_size
);
3061 vm_object_offset_t dst_hi_offset
= round_page_64(dst_offset
+ *src_size
);
3065 *src_size -= amount_left; \
3069 amount_left
= *src_size
;
3070 do { /* while (amount_left > 0) */
3072 * There may be a deadlock if both source and destination
3073 * pages are the same. To avoid this deadlock, the copy must
3074 * start by getting the destination page in order to apply
3075 * COW semantics if any.
3078 RetryDestinationFault
: ;
3080 dst_prot
= VM_PROT_WRITE
|VM_PROT_READ
;
3082 vm_object_lock(dst_object
);
3083 vm_object_paging_begin(dst_object
);
3085 XPR(XPR_VM_FAULT
,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3086 switch (vm_fault_page(dst_object
,
3087 trunc_page_64(dst_offset
),
3088 VM_PROT_WRITE
|VM_PROT_READ
,
3093 VM_BEHAVIOR_SEQUENTIAL
,
3099 dst_map
->no_zero_fill
,
3101 case VM_FAULT_SUCCESS
:
3103 case VM_FAULT_RETRY
:
3104 goto RetryDestinationFault
;
3105 case VM_FAULT_MEMORY_SHORTAGE
:
3106 if (vm_page_wait(interruptible
))
3107 goto RetryDestinationFault
;
3109 case VM_FAULT_INTERRUPTED
:
3110 RETURN(MACH_SEND_INTERRUPTED
);
3111 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3112 vm_page_more_fictitious();
3113 goto RetryDestinationFault
;
3114 case VM_FAULT_MEMORY_ERROR
:
3118 return(KERN_MEMORY_ERROR
);
3120 assert ((dst_prot
& VM_PROT_WRITE
) != VM_PROT_NONE
);
3122 old_copy_object
= dst_page
->object
->copy
;
3125 * There exists the possiblity that the source and
3126 * destination page are the same. But we can't
3127 * easily determine that now. If they are the
3128 * same, the call to vm_fault_page() for the
3129 * destination page will deadlock. To prevent this we
3130 * wire the page so we can drop busy without having
3131 * the page daemon steal the page. We clean up the
3132 * top page but keep the paging reference on the object
3133 * holding the dest page so it doesn't go away.
3136 vm_page_lock_queues();
3137 vm_page_wire(dst_page
);
3138 vm_page_unlock_queues();
3139 PAGE_WAKEUP_DONE(dst_page
);
3140 vm_object_unlock(dst_page
->object
);
3142 if (dst_top_page
!= VM_PAGE_NULL
) {
3143 vm_object_lock(dst_object
);
3144 VM_PAGE_FREE(dst_top_page
);
3145 vm_object_paging_end(dst_object
);
3146 vm_object_unlock(dst_object
);
3151 if (src_object
== VM_OBJECT_NULL
) {
3153 * No source object. We will just
3154 * zero-fill the page in dst_object.
3156 src_page
= VM_PAGE_NULL
;
3158 vm_object_lock(src_object
);
3159 src_page
= vm_page_lookup(src_object
,
3160 trunc_page_64(src_offset
));
3161 if (src_page
== dst_page
)
3162 src_prot
= dst_prot
;
3164 src_prot
= VM_PROT_READ
;
3165 vm_object_paging_begin(src_object
);
3168 "vm_fault_copy(2) -> vm_fault_page\n",
3170 switch (vm_fault_page(src_object
,
3171 trunc_page_64(src_offset
),
3177 VM_BEHAVIOR_SEQUENTIAL
,
3186 case VM_FAULT_SUCCESS
:
3188 case VM_FAULT_RETRY
:
3189 goto RetrySourceFault
;
3190 case VM_FAULT_MEMORY_SHORTAGE
:
3191 if (vm_page_wait(interruptible
))
3192 goto RetrySourceFault
;
3194 case VM_FAULT_INTERRUPTED
:
3195 vm_fault_copy_dst_cleanup(dst_page
);
3196 RETURN(MACH_SEND_INTERRUPTED
);
3197 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3198 vm_page_more_fictitious();
3199 goto RetrySourceFault
;
3200 case VM_FAULT_MEMORY_ERROR
:
3201 vm_fault_copy_dst_cleanup(dst_page
);
3205 return(KERN_MEMORY_ERROR
);
3208 src_page
= result_page
;
3210 assert((src_top_page
== VM_PAGE_NULL
) ==
3211 (src_page
->object
== src_object
));
3213 assert ((src_prot
& VM_PROT_READ
) != VM_PROT_NONE
);
3214 vm_object_unlock(src_page
->object
);
3217 if (!vm_map_verify(dst_map
, dst_version
)) {
3218 if (src_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3219 vm_fault_copy_cleanup(src_page
, src_top_page
);
3220 vm_fault_copy_dst_cleanup(dst_page
);
3224 vm_object_lock(dst_page
->object
);
3226 if (dst_page
->object
->copy
!= old_copy_object
) {
3227 vm_object_unlock(dst_page
->object
);
3228 vm_map_verify_done(dst_map
, dst_version
);
3229 if (src_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3230 vm_fault_copy_cleanup(src_page
, src_top_page
);
3231 vm_fault_copy_dst_cleanup(dst_page
);
3234 vm_object_unlock(dst_page
->object
);
3237 * Copy the page, and note that it is dirty
3241 if (!page_aligned(src_offset
) ||
3242 !page_aligned(dst_offset
) ||
3243 !page_aligned(amount_left
)) {
3245 vm_object_offset_t src_po
,
3248 src_po
= src_offset
- trunc_page_64(src_offset
);
3249 dst_po
= dst_offset
- trunc_page_64(dst_offset
);
3251 if (dst_po
> src_po
) {
3252 part_size
= PAGE_SIZE
- dst_po
;
3254 part_size
= PAGE_SIZE
- src_po
;
3256 if (part_size
> (amount_left
)){
3257 part_size
= amount_left
;
3260 if (src_page
== VM_PAGE_NULL
) {
3261 vm_page_part_zero_fill(dst_page
,
3264 vm_page_part_copy(src_page
, src_po
,
3265 dst_page
, dst_po
, part_size
);
3266 if(!dst_page
->dirty
){
3267 vm_object_lock(dst_object
);
3268 dst_page
->dirty
= TRUE
;
3269 vm_object_unlock(dst_page
->object
);
3274 part_size
= PAGE_SIZE
;
3276 if (src_page
== VM_PAGE_NULL
)
3277 vm_page_zero_fill(dst_page
);
3279 vm_page_copy(src_page
, dst_page
);
3280 if(!dst_page
->dirty
){
3281 vm_object_lock(dst_object
);
3282 dst_page
->dirty
= TRUE
;
3283 vm_object_unlock(dst_page
->object
);
3290 * Unlock everything, and return
3293 vm_map_verify_done(dst_map
, dst_version
);
3295 if (src_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3296 vm_fault_copy_cleanup(src_page
, src_top_page
);
3297 vm_fault_copy_dst_cleanup(dst_page
);
3299 amount_left
-= part_size
;
3300 src_offset
+= part_size
;
3301 dst_offset
+= part_size
;
3302 } while (amount_left
> 0);
3304 RETURN(KERN_SUCCESS
);
3313 * Routine: vm_fault_page_overwrite
3316 * A form of vm_fault_page that assumes that the
3317 * resulting page will be overwritten in its entirety,
3318 * making it unnecessary to obtain the correct *contents*
3322 * XXX Untested. Also unused. Eventually, this technology
3323 * could be used in vm_fault_copy() to advantage.
3326 vm_fault_page_overwrite(
3328 vm_object_t dst_object
,
3329 vm_object_offset_t dst_offset
,
3330 vm_page_t
*result_page
) /* OUT */
3334 kern_return_t wait_result
;
3336 #define interruptible THREAD_UNINT /* XXX */
3340 * Look for a page at this offset
3343 while ((dst_page
= vm_page_lookup(dst_object
, dst_offset
))
3346 * No page, no problem... just allocate one.
3349 dst_page
= vm_page_alloc(dst_object
, dst_offset
);
3350 if (dst_page
== VM_PAGE_NULL
) {
3351 vm_object_unlock(dst_object
);
3353 vm_object_lock(dst_object
);
3358 * Pretend that the memory manager
3359 * write-protected the page.
3361 * Note that we will be asking for write
3362 * permission without asking for the data
3366 dst_page
->overwriting
= TRUE
;
3367 dst_page
->page_lock
= VM_PROT_WRITE
;
3368 dst_page
->absent
= TRUE
;
3369 dst_page
->unusual
= TRUE
;
3370 dst_object
->absent_count
++;
3375 * When we bail out, we might have to throw
3376 * away the page created here.
3379 #define DISCARD_PAGE \
3381 vm_object_lock(dst_object); \
3382 dst_page = vm_page_lookup(dst_object, dst_offset); \
3383 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3384 VM_PAGE_FREE(dst_page); \
3385 vm_object_unlock(dst_object); \
3390 * If the page is write-protected...
3393 if (dst_page
->page_lock
& VM_PROT_WRITE
) {
3395 * ... and an unlock request hasn't been sent
3398 if ( ! (dst_page
->unlock_request
& VM_PROT_WRITE
)) {
3403 * ... then send one now.
3406 if (!dst_object
->pager_ready
) {
3407 vm_object_assert_wait(dst_object
,
3408 VM_OBJECT_EVENT_PAGER_READY
,
3410 vm_object_unlock(dst_object
);
3411 wait_result
= thread_block((void (*)(void))0);
3412 if (wait_result
!= THREAD_AWAKENED
) {
3414 return(VM_FAULT_INTERRUPTED
);
3419 u
= dst_page
->unlock_request
|= VM_PROT_WRITE
;
3420 vm_object_unlock(dst_object
);
3422 if ((rc
= memory_object_data_unlock(
3424 dst_object
->pager_request
,
3425 dst_offset
+ dst_object
->paging_offset
,
3427 u
)) != KERN_SUCCESS
) {
3429 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3431 return((rc
== MACH_SEND_INTERRUPTED
) ?
3432 VM_FAULT_INTERRUPTED
:
3433 VM_FAULT_MEMORY_ERROR
);
3435 vm_object_lock(dst_object
);
3439 /* ... fall through to wait below */
3442 * If the page isn't being used for other
3443 * purposes, then we're done.
3445 if ( ! (dst_page
->busy
|| dst_page
->absent
||
3446 dst_page
->error
|| dst_page
->restart
) )
3450 PAGE_ASSERT_WAIT(dst_page
, interruptible
);
3451 vm_object_unlock(dst_object
);
3452 wait_result
= thread_block((void (*)(void))0);
3453 if (wait_result
!= THREAD_AWAKENED
) {
3455 return(VM_FAULT_INTERRUPTED
);
3459 *result_page
= dst_page
;
3460 return(VM_FAULT_SUCCESS
);
3462 #undef interruptible
3468 #if VM_FAULT_CLASSIFY
3470 * Temporary statistics gathering support.
3474 * Statistics arrays:
3476 #define VM_FAULT_TYPES_MAX 5
3477 #define VM_FAULT_LEVEL_MAX 8
3479 int vm_fault_stats
[VM_FAULT_TYPES_MAX
][VM_FAULT_LEVEL_MAX
];
3481 #define VM_FAULT_TYPE_ZERO_FILL 0
3482 #define VM_FAULT_TYPE_MAP_IN 1
3483 #define VM_FAULT_TYPE_PAGER 2
3484 #define VM_FAULT_TYPE_COPY 3
3485 #define VM_FAULT_TYPE_OTHER 4
3489 vm_fault_classify(vm_object_t object
,
3490 vm_object_offset_t offset
,
3491 vm_prot_t fault_type
)
3493 int type
, level
= 0;
3497 m
= vm_page_lookup(object
, offset
);
3498 if (m
!= VM_PAGE_NULL
) {
3499 if (m
->busy
|| m
->error
|| m
->restart
|| m
->absent
||
3500 fault_type
& m
->page_lock
) {
3501 type
= VM_FAULT_TYPE_OTHER
;
3504 if (((fault_type
& VM_PROT_WRITE
) == 0) ||
3505 ((level
== 0) && object
->copy
== VM_OBJECT_NULL
)) {
3506 type
= VM_FAULT_TYPE_MAP_IN
;
3509 type
= VM_FAULT_TYPE_COPY
;
3513 if (object
->pager_created
) {
3514 type
= VM_FAULT_TYPE_PAGER
;
3517 if (object
->shadow
== VM_OBJECT_NULL
) {
3518 type
= VM_FAULT_TYPE_ZERO_FILL
;
3522 offset
+= object
->shadow_offset
;
3523 object
= object
->shadow
;
3529 if (level
> VM_FAULT_LEVEL_MAX
)
3530 level
= VM_FAULT_LEVEL_MAX
;
3532 vm_fault_stats
[type
][level
] += 1;
3537 /* cleanup routine to call from debugger */
3540 vm_fault_classify_init(void)
3544 for (type
= 0; type
< VM_FAULT_TYPES_MAX
; type
++) {
3545 for (level
= 0; level
< VM_FAULT_LEVEL_MAX
; level
++) {
3546 vm_fault_stats
[type
][level
] = 0;
3552 #endif /* VM_FAULT_CLASSIFY */