2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 * Carnegie Mellon requests users of this software to return to
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
56 * Page fault handling module.
59 /* remove after component interface available */
60 extern int vnode_pager_workaround
;
61 extern int device_pager_workaround
;
64 #include <mach_cluster_stats.h>
65 #include <mach_pagemap.h>
68 #include <vm/vm_fault.h>
69 #include <mach/kern_return.h>
70 #include <mach/message.h> /* for error codes */
71 #include <kern/host_statistics.h>
72 #include <kern/counters.h>
73 #include <kern/task.h>
74 #include <kern/thread.h>
75 #include <kern/sched_prim.h>
76 #include <kern/host.h>
78 #include <ppc/proc_reg.h>
79 #include <ppc/pmap_internals.h>
80 #include <vm/task_working_set.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_object.h>
83 #include <vm/vm_page.h>
85 #include <vm/vm_pageout.h>
86 #include <mach/vm_param.h>
87 #include <mach/vm_behavior.h>
88 #include <mach/memory_object.h>
89 /* For memory_object_data_{request,unlock} */
90 #include <kern/mach_param.h>
91 #include <kern/macro_help.h>
92 #include <kern/zalloc.h>
93 #include <kern/misc_protos.h>
95 #include <sys/kdebug.h>
97 #define VM_FAULT_CLASSIFY 0
98 #define VM_FAULT_STATIC_CONFIG 1
100 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
102 int vm_object_absent_max
= 50;
104 int vm_fault_debug
= 0;
105 boolean_t vm_page_deactivate_behind
= TRUE
;
107 vm_machine_attribute_val_t mv_cache_sync
= MATTR_VAL_CACHE_SYNC
;
109 #if !VM_FAULT_STATIC_CONFIG
110 boolean_t vm_fault_dirty_handling
= FALSE
;
111 boolean_t vm_fault_interruptible
= FALSE
;
112 boolean_t software_reference_bits
= TRUE
;
116 extern struct db_watchpoint
*db_watchpoint_list
;
117 #endif /* MACH_KDB */
119 /* Forward declarations of internal routines. */
120 extern kern_return_t
vm_fault_wire_fast(
123 vm_map_entry_t entry
,
126 extern void vm_fault_continue(void);
128 extern void vm_fault_copy_cleanup(
132 extern void vm_fault_copy_dst_cleanup(
135 #if VM_FAULT_CLASSIFY
136 extern void vm_fault_classify(vm_object_t object
,
137 vm_object_offset_t offset
,
138 vm_prot_t fault_type
);
140 extern void vm_fault_classify_init(void);
144 * Routine: vm_fault_init
146 * Initialize our private data structures.
154 * Routine: vm_fault_cleanup
156 * Clean up the result of vm_fault_page.
158 * The paging reference for "object" is released.
159 * "object" is unlocked.
160 * If "top_page" is not null, "top_page" is
161 * freed and the paging reference for the object
162 * containing it is released.
165 * "object" must be locked.
169 register vm_object_t object
,
170 register vm_page_t top_page
)
172 vm_object_paging_end(object
);
173 vm_object_unlock(object
);
175 if (top_page
!= VM_PAGE_NULL
) {
176 object
= top_page
->object
;
177 vm_object_lock(object
);
178 VM_PAGE_FREE(top_page
);
179 vm_object_paging_end(object
);
180 vm_object_unlock(object
);
184 #if MACH_CLUSTER_STATS
185 #define MAXCLUSTERPAGES 16
187 unsigned long pages_in_cluster
;
188 unsigned long pages_at_higher_offsets
;
189 unsigned long pages_at_lower_offsets
;
190 } cluster_stats_in
[MAXCLUSTERPAGES
];
191 #define CLUSTER_STAT(clause) clause
192 #define CLUSTER_STAT_HIGHER(x) \
193 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
194 #define CLUSTER_STAT_LOWER(x) \
195 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
196 #define CLUSTER_STAT_CLUSTER(x) \
197 ((cluster_stats_in[(x)].pages_in_cluster)++)
198 #else /* MACH_CLUSTER_STATS */
199 #define CLUSTER_STAT(clause)
200 #endif /* MACH_CLUSTER_STATS */
202 /* XXX - temporary */
203 boolean_t vm_allow_clustered_pagein
= FALSE
;
204 int vm_pagein_cluster_used
= 0;
207 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
209 int vm_default_ahead
= 1; /* Number of pages to prepage ahead */
210 int vm_default_behind
= 0; /* Number of pages to prepage behind */
212 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
215 * Routine: vm_fault_page
217 * Find the resident page for the virtual memory
218 * specified by the given virtual memory object
220 * Additional arguments:
221 * The required permissions for the page is given
222 * in "fault_type". Desired permissions are included
223 * in "protection". The minimum and maximum valid offsets
224 * within the object for the relevant map entry are
225 * passed in "lo_offset" and "hi_offset" respectively and
226 * the expected page reference pattern is passed in "behavior".
227 * These three parameters are used to determine pagein cluster
230 * If the desired page is known to be resident (for
231 * example, because it was previously wired down), asserting
232 * the "unwiring" parameter will speed the search.
234 * If the operation can be interrupted (by thread_abort
235 * or thread_terminate), then the "interruptible"
236 * parameter should be asserted.
239 * The page containing the proper data is returned
243 * The source object must be locked and referenced,
244 * and must donate one paging reference. The reference
245 * is not affected. The paging reference and lock are
248 * If the call succeeds, the object in which "result_page"
249 * resides is left locked and holding a paging reference.
250 * If this is not the original object, a busy page in the
251 * original object is returned in "top_page", to prevent other
252 * callers from pursuing this same data, along with a paging
253 * reference for the original object. The "top_page" should
254 * be destroyed when this guarantee is no longer required.
255 * The "result_page" is also left busy. It is not removed
256 * from the pageout queues.
262 vm_object_t first_object
, /* Object to begin search */
263 vm_object_offset_t first_offset
, /* Offset into object */
264 vm_prot_t fault_type
, /* What access is requested */
265 boolean_t must_be_resident
,/* Must page be resident? */
266 int interruptible
, /* how may fault be interrupted? */
267 vm_object_offset_t lo_offset
, /* Map entry start */
268 vm_object_offset_t hi_offset
, /* Map entry end */
269 vm_behavior_t behavior
, /* Page reference behavior */
270 /* Modifies in place: */
271 vm_prot_t
*protection
, /* Protection for mapping */
273 vm_page_t
*result_page
, /* Page found, if successful */
274 vm_page_t
*top_page
, /* Page in top object, if
275 * not result_page. */
276 int *type_of_fault
, /* if non-null, fill in with type of fault
277 * COW, zero-fill, etc... returned in trace point */
278 /* More arguments: */
279 kern_return_t
*error_code
, /* code if page is in error */
280 boolean_t no_zero_fill
, /* don't zero fill absent pages */
281 boolean_t data_supply
, /* treat as data_supply if
282 * it is a write fault and a full
283 * page is provided */
292 vm_object_offset_t offset
;
294 vm_object_t next_object
;
295 vm_object_t copy_object
;
296 boolean_t look_for_page
;
297 vm_prot_t access_required
= fault_type
;
298 vm_prot_t wants_copy_flag
;
299 vm_size_t cluster_size
, length
;
300 vm_object_offset_t cluster_offset
;
301 vm_object_offset_t cluster_start
, cluster_end
, paging_offset
;
302 vm_object_offset_t align_offset
;
303 CLUSTER_STAT(int pages_at_higher_offsets
;)
304 CLUSTER_STAT(int pages_at_lower_offsets
;)
305 kern_return_t wait_result
;
307 boolean_t interruptible_state
;
308 boolean_t bumped_pagein
= FALSE
;
313 * MACH page map - an optional optimization where a bit map is maintained
314 * by the VM subsystem for internal objects to indicate which pages of
315 * the object currently reside on backing store. This existence map
316 * duplicates information maintained by the vnode pager. It is
317 * created at the time of the first pageout against the object, i.e.
318 * at the same time pager for the object is created. The optimization
319 * is designed to eliminate pager interaction overhead, if it is
320 * 'known' that the page does not exist on backing store.
322 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
323 * either marked as paged out in the existence map for the object or no
324 * existence map exists for the object. LOOK_FOR() is one of the
325 * criteria in the decision to invoke the pager. It is also used as one
326 * of the criteria to terminate the scan for adjacent pages in a clustered
327 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
328 * permanent objects. Note also that if the pager for an internal object
329 * has not been created, the pager is not invoked regardless of the value
330 * of LOOK_FOR() and that clustered pagein scans are only done on an object
331 * for which a pager has been created.
333 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
334 * is marked as paged out in the existence map for the object. PAGED_OUT()
335 * PAGED_OUT() is used to determine if a page has already been pushed
336 * into a copy object in order to avoid a redundant page out operation.
338 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
339 != VM_EXTERNAL_STATE_ABSENT)
340 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
341 == VM_EXTERNAL_STATE_EXISTS)
342 #else /* MACH_PAGEMAP */
344 * If the MACH page map optimization is not enabled,
345 * LOOK_FOR() always evaluates to TRUE. The pager will always be
346 * invoked to resolve missing pages in an object, assuming the pager
347 * has been created for the object. In a clustered page operation, the
348 * absence of a page on backing backing store cannot be used to terminate
349 * a scan for adjacent pages since that information is available only in
350 * the pager. Hence pages that may not be paged out are potentially
351 * included in a clustered request. The vnode pager is coded to deal
352 * with any combination of absent/present pages in a clustered
353 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
354 * will always be invoked to push a dirty page into a copy object assuming
355 * a pager has been created. If the page has already been pushed, the
356 * pager will ingore the new request.
358 #define LOOK_FOR(o, f) TRUE
359 #define PAGED_OUT(o, f) FALSE
360 #endif /* MACH_PAGEMAP */
365 #define PREPARE_RELEASE_PAGE(m) \
367 vm_page_lock_queues(); \
370 #define DO_RELEASE_PAGE(m) \
372 PAGE_WAKEUP_DONE(m); \
373 if (!m->active && !m->inactive) \
374 vm_page_activate(m); \
375 vm_page_unlock_queues(); \
378 #define RELEASE_PAGE(m) \
380 PREPARE_RELEASE_PAGE(m); \
381 DO_RELEASE_PAGE(m); \
385 dbgTrace(0xBEEF0002, (unsigned int) first_object
, (unsigned int) first_offset
); /* (TEST/DEBUG) */
390 #if !VM_FAULT_STATIC_CONFIG
391 if (vm_fault_dirty_handling
394 * If there are watchpoints set, then
395 * we don't want to give away write permission
396 * on a read fault. Make the task write fault,
397 * so that the watchpoint code notices the access.
399 || db_watchpoint_list
400 #endif /* MACH_KDB */
403 * If we aren't asking for write permission,
404 * then don't give it away. We're using write
405 * faults to set the dirty bit.
407 if (!(fault_type
& VM_PROT_WRITE
))
408 *protection
&= ~VM_PROT_WRITE
;
411 if (!vm_fault_interruptible
)
412 interruptible
= THREAD_UNINT
;
413 #else /* STATIC_CONFIG */
416 * If there are watchpoints set, then
417 * we don't want to give away write permission
418 * on a read fault. Make the task write fault,
419 * so that the watchpoint code notices the access.
421 if (db_watchpoint_list
) {
423 * If we aren't asking for write permission,
424 * then don't give it away. We're using write
425 * faults to set the dirty bit.
427 if (!(fault_type
& VM_PROT_WRITE
))
428 *protection
&= ~VM_PROT_WRITE
;
431 #endif /* MACH_KDB */
432 #endif /* STATIC_CONFIG */
434 cur_thread
= current_thread();
436 interruptible_state
= cur_thread
->interruptible
;
437 if (interruptible
== THREAD_UNINT
)
438 cur_thread
->interruptible
= FALSE
;
441 * INVARIANTS (through entire routine):
443 * 1) At all times, we must either have the object
444 * lock or a busy page in some object to prevent
445 * some other thread from trying to bring in
448 * Note that we cannot hold any locks during the
449 * pager access or when waiting for memory, so
450 * we use a busy page then.
452 * Note also that we aren't as concerned about more than
453 * one thread attempting to memory_object_data_unlock
454 * the same page at once, so we don't hold the page
455 * as busy then, but do record the highest unlock
456 * value so far. [Unlock requests may also be delivered
459 * 2) To prevent another thread from racing us down the
460 * shadow chain and entering a new page in the top
461 * object before we do, we must keep a busy page in
462 * the top object while following the shadow chain.
464 * 3) We must increment paging_in_progress on any object
465 * for which we have a busy page
467 * 4) We leave busy pages on the pageout queues.
468 * If the pageout daemon comes across a busy page,
469 * it will remove the page from the pageout queues.
473 * Search for the page at object/offset.
476 object
= first_object
;
477 offset
= first_offset
;
478 first_m
= VM_PAGE_NULL
;
479 access_required
= fault_type
;
482 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
483 (integer_t
)object
, offset
, fault_type
, *protection
, 0);
486 * See whether this page is resident
491 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
493 if (!object
->alive
) {
494 vm_fault_cleanup(object
, first_m
);
495 cur_thread
->interruptible
= interruptible_state
;
496 return(VM_FAULT_MEMORY_ERROR
);
498 m
= vm_page_lookup(object
, offset
);
500 dbgTrace(0xBEEF0004, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
502 if (m
!= VM_PAGE_NULL
) {
504 * If the page was pre-paged as part of a
505 * cluster, record the fact.
508 vm_pagein_cluster_used
++;
509 m
->clustered
= FALSE
;
513 * If the page is being brought in,
514 * wait for it and then retry.
516 * A possible optimization: if the page
517 * is known to be resident, we can ignore
518 * pages that are absent (regardless of
519 * whether they're busy).
524 dbgTrace(0xBEEF0005, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
526 PAGE_ASSERT_WAIT(m
, interruptible
);
527 vm_object_unlock(object
);
529 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
530 (integer_t
)object
, offset
,
532 counter(c_vm_fault_page_block_busy_kernel
++);
533 wait_result
= thread_block((void (*)(void))0);
535 vm_object_lock(object
);
536 if (wait_result
!= THREAD_AWAKENED
) {
537 vm_fault_cleanup(object
, first_m
);
538 cur_thread
->interruptible
= interruptible_state
;
539 if (wait_result
== THREAD_RESTART
)
541 return(VM_FAULT_RETRY
);
545 return(VM_FAULT_INTERRUPTED
);
552 * If the page is in error, give up now.
557 dbgTrace(0xBEEF0006, (unsigned int) m
, (unsigned int) error_code
); /* (TEST/DEBUG) */
560 *error_code
= m
->page_error
;
562 vm_fault_cleanup(object
, first_m
);
563 cur_thread
->interruptible
= interruptible_state
;
564 return(VM_FAULT_MEMORY_ERROR
);
568 * If the pager wants us to restart
569 * at the top of the chain,
570 * typically because it has moved the
571 * page to another pager, then do so.
576 dbgTrace(0xBEEF0007, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
579 vm_fault_cleanup(object
, first_m
);
580 cur_thread
->interruptible
= interruptible_state
;
581 return(VM_FAULT_RETRY
);
585 * If the page isn't busy, but is absent,
586 * then it was deemed "unavailable".
591 * Remove the non-existent page (unless it's
592 * in the top object) and move on down to the
593 * next object (if there is one).
596 dbgTrace(0xBEEF0008, (unsigned int) m
, (unsigned int) object
->shadow
); /* (TEST/DEBUG) */
599 next_object
= object
->shadow
;
600 if (next_object
== VM_OBJECT_NULL
) {
603 assert(!must_be_resident
);
605 if (object
->shadow_severed
) {
608 cur_thread
->interruptible
= interruptible_state
;
609 return VM_FAULT_MEMORY_ERROR
;
613 * Absent page at bottom of shadow
614 * chain; zero fill the page we left
615 * busy in the first object, and flush
616 * the absent page. But first we
617 * need to allocate a real page.
619 if (VM_PAGE_THROTTLED() ||
620 (real_m
= vm_page_grab()) == VM_PAGE_NULL
) {
621 vm_fault_cleanup(object
, first_m
);
622 cur_thread
->interruptible
= interruptible_state
;
623 return(VM_FAULT_MEMORY_SHORTAGE
);
627 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
628 (integer_t
)object
, offset
,
630 (integer_t
)first_object
, 0);
631 if (object
!= first_object
) {
633 vm_object_paging_end(object
);
634 vm_object_unlock(object
);
635 object
= first_object
;
636 offset
= first_offset
;
638 first_m
= VM_PAGE_NULL
;
639 vm_object_lock(object
);
643 assert(real_m
->busy
);
644 vm_page_insert(real_m
, object
, offset
);
648 * Drop the lock while zero filling
649 * page. Then break because this
650 * is the page we wanted. Checking
651 * the page lock is a waste of time;
652 * this page was either absent or
653 * newly allocated -- in both cases
654 * it can't be page locked by a pager.
659 vm_object_unlock(object
);
660 vm_page_zero_fill(m
);
662 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
663 VM_STAT(zero_fill_count
++);
665 if (bumped_pagein
== TRUE
) {
667 current_task()->pageins
--;
669 vm_object_lock(object
);
671 pmap_clear_modify(m
->phys_addr
);
672 vm_page_lock_queues();
673 VM_PAGE_QUEUES_REMOVE(m
);
674 m
->page_ticket
= vm_page_ticket
;
675 vm_page_ticket_roll
++;
676 if(vm_page_ticket_roll
==
677 VM_PAGE_TICKETS_IN_ROLL
) {
678 vm_page_ticket_roll
= 0;
680 VM_PAGE_TICKET_ROLL_IDS
)
685 queue_enter(&vm_page_queue_inactive
,
686 m
, vm_page_t
, pageq
);
688 vm_page_inactive_count
++;
689 vm_page_unlock_queues();
692 if (must_be_resident
) {
693 vm_object_paging_end(object
);
694 } else if (object
!= first_object
) {
695 vm_object_paging_end(object
);
701 vm_object_absent_release(object
);
704 vm_page_lock_queues();
705 VM_PAGE_QUEUES_REMOVE(m
);
706 vm_page_unlock_queues();
709 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
710 (integer_t
)object
, offset
,
711 (integer_t
)next_object
,
712 offset
+object
->shadow_offset
,0);
713 offset
+= object
->shadow_offset
;
714 hi_offset
+= object
->shadow_offset
;
715 lo_offset
+= object
->shadow_offset
;
716 access_required
= VM_PROT_READ
;
717 vm_object_lock(next_object
);
718 vm_object_unlock(object
);
719 object
= next_object
;
720 vm_object_paging_begin(object
);
726 && ((object
!= first_object
) ||
727 (object
->copy
!= VM_OBJECT_NULL
))
728 && (fault_type
& VM_PROT_WRITE
)) {
730 * This is a copy-on-write fault that will
731 * cause us to revoke access to this page, but
732 * this page is in the process of being cleaned
733 * in a clustered pageout. We must wait until
734 * the cleaning operation completes before
735 * revoking access to the original page,
736 * otherwise we might attempt to remove a
740 dbgTrace(0xBEEF0009, (unsigned int) m
, (unsigned int) offset
); /* (TEST/DEBUG) */
743 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
744 (integer_t
)object
, offset
,
746 /* take an extra ref so that object won't die */
747 assert(object
->ref_count
> 0);
749 vm_object_res_reference(object
);
750 vm_fault_cleanup(object
, first_m
);
751 counter(c_vm_fault_page_block_backoff_kernel
++);
752 vm_object_lock(object
);
753 assert(object
->ref_count
> 0);
754 m
= vm_page_lookup(object
, offset
);
755 if (m
!= VM_PAGE_NULL
&& m
->cleaning
) {
756 PAGE_ASSERT_WAIT(m
, interruptible
);
757 vm_object_unlock(object
);
758 wait_result
= thread_block((void (*)(void)) 0);
759 vm_object_deallocate(object
);
762 vm_object_unlock(object
);
763 vm_object_deallocate(object
);
764 cur_thread
->interruptible
= interruptible_state
;
765 return VM_FAULT_RETRY
;
770 * If the desired access to this page has
771 * been locked out, request that it be unlocked.
774 if (access_required
& m
->page_lock
) {
775 if ((access_required
& m
->unlock_request
) != access_required
) {
776 vm_prot_t new_unlock_request
;
780 dbgTrace(0xBEEF000A, (unsigned int) m
, (unsigned int) object
->pager_ready
); /* (TEST/DEBUG) */
782 if (!object
->pager_ready
) {
784 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
786 (integer_t
)object
, offset
,
788 /* take an extra ref */
789 assert(object
->ref_count
> 0);
791 vm_object_res_reference(object
);
792 vm_fault_cleanup(object
,
794 counter(c_vm_fault_page_block_backoff_kernel
++);
795 vm_object_lock(object
);
796 assert(object
->ref_count
> 0);
797 if (!object
->pager_ready
) {
798 vm_object_assert_wait(
800 VM_OBJECT_EVENT_PAGER_READY
,
802 vm_object_unlock(object
);
803 wait_result
= thread_block((void (*)(void))0);
804 vm_object_deallocate(object
);
807 vm_object_unlock(object
);
808 vm_object_deallocate(object
);
809 cur_thread
->interruptible
= interruptible_state
;
810 return VM_FAULT_RETRY
;
814 new_unlock_request
= m
->unlock_request
=
815 (access_required
| m
->unlock_request
);
816 vm_object_unlock(object
);
818 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
819 (integer_t
)object
, offset
,
820 (integer_t
)m
, new_unlock_request
, 0);
821 if ((rc
= memory_object_data_unlock(
823 offset
+ object
->paging_offset
,
828 printf("vm_fault: memory_object_data_unlock failed\n");
829 vm_object_lock(object
);
830 vm_fault_cleanup(object
, first_m
);
831 cur_thread
->interruptible
= interruptible_state
;
832 return((rc
== MACH_SEND_INTERRUPTED
) ?
833 VM_FAULT_INTERRUPTED
:
834 VM_FAULT_MEMORY_ERROR
);
836 vm_object_lock(object
);
841 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
842 access_required
, (integer_t
)object
,
843 offset
, (integer_t
)m
, 0);
844 /* take an extra ref so object won't die */
845 assert(object
->ref_count
> 0);
847 vm_object_res_reference(object
);
848 vm_fault_cleanup(object
, first_m
);
849 counter(c_vm_fault_page_block_backoff_kernel
++);
850 vm_object_lock(object
);
851 assert(object
->ref_count
> 0);
852 m
= vm_page_lookup(object
, offset
);
853 if (m
!= VM_PAGE_NULL
&&
854 (access_required
& m
->page_lock
) &&
855 !((access_required
& m
->unlock_request
) != access_required
)) {
856 PAGE_ASSERT_WAIT(m
, interruptible
);
857 vm_object_unlock(object
);
858 wait_result
= thread_block((void (*)(void)) 0);
859 vm_object_deallocate(object
);
862 vm_object_unlock(object
);
863 vm_object_deallocate(object
);
864 cur_thread
->interruptible
= interruptible_state
;
865 return VM_FAULT_RETRY
;
869 * We mark the page busy and leave it on
870 * the pageout queues. If the pageout
871 * deamon comes across it, then it will
876 dbgTrace(0xBEEF000B, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
879 #if !VM_FAULT_STATIC_CONFIG
880 if (!software_reference_bits
) {
881 vm_page_lock_queues();
883 vm_stat
.reactivations
++;
885 VM_PAGE_QUEUES_REMOVE(m
);
886 vm_page_unlock_queues();
890 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
891 (integer_t
)object
, offset
, (integer_t
)m
, 0, 0);
899 (object
->pager_created
) &&
900 LOOK_FOR(object
, offset
) &&
904 dbgTrace(0xBEEF000C, (unsigned int) look_for_page
, (unsigned int) object
); /* (TEST/DEBUG) */
906 if ((look_for_page
|| (object
== first_object
))
908 && !(object
->phys_contiguous
)) {
910 * Allocate a new page for this object/offset
914 m
= vm_page_grab_fictitious();
916 dbgTrace(0xBEEF000D, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
918 if (m
== VM_PAGE_NULL
) {
919 vm_fault_cleanup(object
, first_m
);
920 cur_thread
->interruptible
= interruptible_state
;
921 return(VM_FAULT_FICTITIOUS_SHORTAGE
);
923 vm_page_insert(m
, object
, offset
);
926 if ((look_for_page
&& !must_be_resident
)) {
930 * If the memory manager is not ready, we
931 * cannot make requests.
933 if (!object
->pager_ready
) {
935 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
937 if(m
!= VM_PAGE_NULL
)
940 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
941 (integer_t
)object
, offset
, 0, 0, 0);
942 /* take an extra ref so object won't die */
943 assert(object
->ref_count
> 0);
945 vm_object_res_reference(object
);
946 vm_fault_cleanup(object
, first_m
);
947 counter(c_vm_fault_page_block_backoff_kernel
++);
948 vm_object_lock(object
);
949 assert(object
->ref_count
> 0);
950 if (!object
->pager_ready
) {
951 vm_object_assert_wait(object
,
952 VM_OBJECT_EVENT_PAGER_READY
,
954 vm_object_unlock(object
);
955 wait_result
= thread_block((void (*)(void))0);
956 vm_object_deallocate(object
);
959 vm_object_unlock(object
);
960 vm_object_deallocate(object
);
961 cur_thread
->interruptible
= interruptible_state
;
962 return VM_FAULT_RETRY
;
966 if(object
->phys_contiguous
) {
967 if(m
!= VM_PAGE_NULL
) {
973 if (object
->internal
) {
975 * Requests to the default pager
976 * must reserve a real page in advance,
977 * because the pager's data-provided
978 * won't block for pages. IMPORTANT:
979 * this acts as a throttling mechanism
980 * for data_requests to the default
985 dbgTrace(0xBEEF000F, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
987 if (m
->fictitious
&& !vm_page_convert(m
)) {
989 vm_fault_cleanup(object
, first_m
);
990 cur_thread
->interruptible
= interruptible_state
;
991 return(VM_FAULT_MEMORY_SHORTAGE
);
993 } else if (object
->absent_count
>
994 vm_object_absent_max
) {
996 * If there are too many outstanding page
997 * requests pending on this object, we
998 * wait for them to be resolved now.
1002 dbgTrace(0xBEEF0010, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1004 if(m
!= VM_PAGE_NULL
)
1006 /* take an extra ref so object won't die */
1007 assert(object
->ref_count
> 0);
1008 object
->ref_count
++;
1009 vm_object_res_reference(object
);
1010 vm_fault_cleanup(object
, first_m
);
1011 counter(c_vm_fault_page_block_backoff_kernel
++);
1012 vm_object_lock(object
);
1013 assert(object
->ref_count
> 0);
1014 if (object
->absent_count
> vm_object_absent_max
) {
1015 vm_object_absent_assert_wait(object
,
1017 vm_object_unlock(object
);
1018 wait_result
= thread_block((void (*)(void))0);
1019 vm_object_deallocate(object
);
1022 vm_object_unlock(object
);
1023 vm_object_deallocate(object
);
1024 cur_thread
->interruptible
= interruptible_state
;
1025 return VM_FAULT_RETRY
;
1030 * Indicate that the page is waiting for data
1031 * from the memory manager.
1034 if(m
!= VM_PAGE_NULL
) {
1036 m
->list_req_pending
= TRUE
;
1039 object
->absent_count
++;
1043 cluster_start
= offset
;
1045 cluster_size
= object
->cluster_size
;
1048 * Skip clustered pagein if it is globally disabled
1049 * or random page reference behavior is expected
1050 * for the address range containing the faulting
1051 * address or the object paging block size is
1052 * equal to the page size.
1054 if (!vm_allow_clustered_pagein
||
1055 behavior
== VM_BEHAVIOR_RANDOM
||
1056 m
== VM_PAGE_NULL
||
1057 cluster_size
== PAGE_SIZE
) {
1058 cluster_start
= trunc_page_64(cluster_start
);
1062 assert(offset
>= lo_offset
);
1063 assert(offset
< hi_offset
);
1064 assert(ALIGNED(object
->paging_offset
));
1065 assert(cluster_size
>= PAGE_SIZE
);
1068 dbgTrace(0xBEEF0011, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1071 * Decide whether to scan ahead or behind for
1072 * additional pages contiguous to the faulted
1073 * page in the same paging block. The decision
1074 * is based on system wide globals and the
1075 * expected page reference behavior of the
1076 * address range contained the faulting address.
1077 * First calculate some constants.
1079 paging_offset
= offset
+ object
->paging_offset
;
1080 cluster_offset
= paging_offset
& (cluster_size
- 1);
1081 align_offset
= paging_offset
&(PAGE_SIZE_64
-1);
1082 if (align_offset
!= 0) {
1083 cluster_offset
= trunc_page_64(cluster_offset
);
1086 #define SPANS_CLUSTER(x) ((((x) - align_offset) & (vm_object_offset_t)(cluster_size - 1)) == 0)
1089 * Backward scan only if reverse sequential
1090 * behavior has been specified
1092 CLUSTER_STAT(pages_at_lower_offsets
= 0;)
1093 if (((vm_default_behind
!= 0 &&
1094 behavior
== VM_BEHAVIOR_DEFAULT
) ||
1095 behavior
== VM_BEHAVIOR_RSEQNTL
) && offset
) {
1096 vm_object_offset_t cluster_bot
;
1099 * Calculate lower search boundary.
1100 * Exclude pages that span a cluster boundary.
1101 * Clip to start of map entry.
1102 * For default page reference behavior, scan
1103 * default pages behind.
1105 cluster_bot
= (offset
> cluster_offset
) ?
1106 offset
- cluster_offset
: offset
;
1107 if (align_offset
!= 0) {
1108 if ((cluster_bot
< offset
) &&
1109 SPANS_CLUSTER(cluster_bot
)) {
1110 cluster_bot
+= PAGE_SIZE_64
;
1113 if (behavior
== VM_BEHAVIOR_DEFAULT
) {
1115 bot
= (vm_object_offset_t
)
1116 (vm_default_behind
* PAGE_SIZE
);
1118 if (cluster_bot
< (offset
- bot
))
1119 cluster_bot
= offset
- bot
;
1121 if (lo_offset
> cluster_bot
)
1122 cluster_bot
= lo_offset
;
1124 for ( cluster_start
= offset
- PAGE_SIZE_64
;
1125 (cluster_start
>= cluster_bot
) &&
1127 (align_offset
- PAGE_SIZE_64
));
1128 cluster_start
-= PAGE_SIZE_64
) {
1129 assert(cluster_size
> PAGE_SIZE_64
);
1130 retry_cluster_backw
:
1131 if (!LOOK_FOR(object
, cluster_start
) ||
1132 vm_page_lookup(object
, cluster_start
)
1136 if (object
->internal
) {
1138 * need to acquire a real page in
1139 * advance because this acts as
1140 * a throttling mechanism for
1141 * data_requests to the default
1142 * pager. If this fails, give up
1143 * trying to find any more pages
1144 * in the cluster and send off the
1145 * request for what we already have.
1147 if ((m
= vm_page_grab())
1149 cluster_start
+= PAGE_SIZE_64
;
1150 cluster_end
= offset
+ PAGE_SIZE_64
;
1153 } else if ((m
= vm_page_grab_fictitious())
1155 vm_object_unlock(object
);
1156 vm_page_more_fictitious();
1157 vm_object_lock(object
);
1158 goto retry_cluster_backw
;
1162 m
->clustered
= TRUE
;
1163 m
->list_req_pending
= TRUE
;
1165 vm_page_insert(m
, object
, cluster_start
);
1166 CLUSTER_STAT(pages_at_lower_offsets
++;)
1167 object
->absent_count
++;
1169 cluster_start
+= PAGE_SIZE_64
;
1170 assert(cluster_start
>= cluster_bot
);
1172 assert(cluster_start
<= offset
);
1175 * Forward scan if default or sequential behavior
1178 CLUSTER_STAT(pages_at_higher_offsets
= 0;)
1179 if ((behavior
== VM_BEHAVIOR_DEFAULT
&&
1180 vm_default_ahead
!= 0) ||
1181 behavior
== VM_BEHAVIOR_SEQUENTIAL
) {
1182 vm_object_offset_t cluster_top
;
1185 * Calculate upper search boundary.
1186 * Exclude pages that span a cluster boundary.
1187 * Clip to end of map entry.
1188 * For default page reference behavior, scan
1189 * default pages ahead.
1191 cluster_top
= (offset
+ cluster_size
) -
1193 if (align_offset
!= 0) {
1194 if ((cluster_top
> (offset
+ PAGE_SIZE_64
)) &&
1195 SPANS_CLUSTER(cluster_top
)) {
1196 cluster_top
-= PAGE_SIZE_64
;
1199 if (behavior
== VM_BEHAVIOR_DEFAULT
) {
1200 vm_object_offset_t top
= (vm_object_offset_t
)
1201 ((vm_default_ahead
*PAGE_SIZE
)+PAGE_SIZE
);
1203 if (cluster_top
> (offset
+ top
))
1204 cluster_top
= offset
+ top
;
1206 if (cluster_top
> hi_offset
)
1207 cluster_top
= hi_offset
;
1209 for (cluster_end
= offset
+ PAGE_SIZE_64
;
1210 cluster_end
< cluster_top
;
1211 cluster_end
+= PAGE_SIZE_64
) {
1212 assert(cluster_size
> PAGE_SIZE
);
1214 if (!LOOK_FOR(object
, cluster_end
) ||
1215 vm_page_lookup(object
, cluster_end
)
1219 if (object
->internal
) {
1221 * need to acquire a real page in
1222 * advance because this acts as
1223 * a throttling mechanism for
1224 * data_requests to the default
1225 * pager. If this fails, give up
1226 * trying to find any more pages
1227 * in the cluster and send off the
1228 * request for what we already have.
1230 if ((m
= vm_page_grab())
1234 } else if ((m
= vm_page_grab_fictitious())
1236 vm_object_unlock(object
);
1237 vm_page_more_fictitious();
1238 vm_object_lock(object
);
1239 goto retry_cluster_forw
;
1243 m
->clustered
= TRUE
;
1244 m
->list_req_pending
= TRUE
;
1246 vm_page_insert(m
, object
, cluster_end
);
1247 CLUSTER_STAT(pages_at_higher_offsets
++;)
1248 object
->absent_count
++;
1250 assert(cluster_end
<= cluster_top
);
1253 cluster_end
= offset
+ PAGE_SIZE_64
;
1256 assert(cluster_end
>= offset
+ PAGE_SIZE_64
);
1257 length
= cluster_end
- cluster_start
;
1259 #if MACH_CLUSTER_STATS
1260 CLUSTER_STAT_HIGHER(pages_at_higher_offsets
);
1261 CLUSTER_STAT_LOWER(pages_at_lower_offsets
);
1262 CLUSTER_STAT_CLUSTER(length
/PAGE_SIZE
);
1263 #endif /* MACH_CLUSTER_STATS */
1267 * lengthen the cluster by the pages in the working set
1270 (current_task()->dynamic_working_set
!= 0)) {
1271 cluster_end
= cluster_start
+ length
;
1272 /* tws values for start and end are just a
1273 * suggestions. Therefore, as long as
1274 * build_cluster does not use pointers or
1275 * take action based on values that
1276 * could be affected by re-entrance we
1277 * do not need to take the map lock.
1279 tws_build_cluster((tws_hash_t
)
1280 current_task()->dynamic_working_set
,
1281 object
, &cluster_start
,
1282 &cluster_end
, 0x16000);
1283 length
= cluster_end
- cluster_start
;
1286 dbgTrace(0xBEEF0012, (unsigned int) object
, (unsigned int) 0); /* (TEST/DEBUG) */
1289 * We have a busy page, so we can
1290 * release the object lock.
1292 vm_object_unlock(object
);
1295 * Call the memory manager to retrieve the data.
1299 *type_of_fault
= DBG_PAGEIN_FAULT
;
1301 current_task()->pageins
++;
1302 bumped_pagein
= TRUE
;
1305 * If this object uses a copy_call strategy,
1306 * and we are interested in a copy of this object
1307 * (having gotten here only by following a
1308 * shadow chain), then tell the memory manager
1309 * via a flag added to the desired_access
1310 * parameter, so that it can detect a race
1311 * between our walking down the shadow chain
1312 * and its pushing pages up into a copy of
1313 * the object that it manages.
1316 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_CALL
&&
1317 object
!= first_object
) {
1318 wants_copy_flag
= VM_PROT_WANTS_COPY
;
1320 wants_copy_flag
= VM_PROT_NONE
;
1324 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1325 (integer_t
)object
, offset
, (integer_t
)m
,
1326 access_required
| wants_copy_flag
, 0);
1328 rc
= memory_object_data_request(object
->pager
,
1329 cluster_start
+ object
->paging_offset
,
1331 access_required
| wants_copy_flag
);
1335 dbgTrace(0xBEEF0013, (unsigned int) object
, (unsigned int) rc
); /* (TEST/DEBUG) */
1337 if (rc
!= KERN_SUCCESS
) {
1338 if (rc
!= MACH_SEND_INTERRUPTED
1340 printf("%s(0x%x, 0x%x, 0x%x, 0x%x) failed, rc=%d\n",
1341 "memory_object_data_request",
1343 cluster_start
+ object
->paging_offset
,
1344 length
, access_required
, rc
);
1346 * Don't want to leave a busy page around,
1347 * but the data request may have blocked,
1348 * so check if it's still there and busy.
1350 if(!object
->phys_contiguous
) {
1351 vm_object_lock(object
);
1352 for (; length
; length
-= PAGE_SIZE
,
1353 cluster_start
+= PAGE_SIZE_64
) {
1355 if ((p
= vm_page_lookup(object
,
1357 && p
->absent
&& p
->busy
1363 vm_fault_cleanup(object
, first_m
);
1364 cur_thread
->interruptible
= interruptible_state
;
1365 return((rc
== MACH_SEND_INTERRUPTED
) ?
1366 VM_FAULT_INTERRUPTED
:
1367 VM_FAULT_MEMORY_ERROR
);
1370 tws_hash_line_t line
;
1373 task
= current_task();
1376 (task
->dynamic_working_set
!= 0)) {
1379 task
->dynamic_working_set
,
1381 &line
) == KERN_SUCCESS
) {
1382 tws_line_signal((tws_hash_t
)
1383 task
->dynamic_working_set
,
1391 * Retry with same object/offset, since new data may
1392 * be in a different page (i.e., m is meaningless at
1395 vm_object_lock(object
);
1396 if ((interruptible
!= THREAD_UNINT
) &&
1397 (current_thread()->state
& TH_ABORT
)) {
1398 vm_fault_cleanup(object
, first_m
);
1399 cur_thread
->interruptible
= interruptible_state
;
1400 return(VM_FAULT_INTERRUPTED
);
1402 if(m
== VM_PAGE_NULL
)
1408 * The only case in which we get here is if
1409 * object has no pager (or unwiring). If the pager doesn't
1410 * have the page this is handled in the m->absent case above
1411 * (and if you change things here you should look above).
1414 dbgTrace(0xBEEF0014, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1416 if (object
== first_object
)
1419 assert(m
== VM_PAGE_NULL
);
1422 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1423 (integer_t
)object
, offset
, (integer_t
)m
,
1424 (integer_t
)object
->shadow
, 0);
1426 * Move on to the next object. Lock the next
1427 * object before unlocking the current one.
1429 next_object
= object
->shadow
;
1430 if (next_object
== VM_OBJECT_NULL
) {
1431 assert(!must_be_resident
);
1433 * If there's no object left, fill the page
1434 * in the top object with zeros. But first we
1435 * need to allocate a real page.
1438 if (object
!= first_object
) {
1439 vm_object_paging_end(object
);
1440 vm_object_unlock(object
);
1442 object
= first_object
;
1443 offset
= first_offset
;
1444 vm_object_lock(object
);
1448 assert(m
->object
== object
);
1449 first_m
= VM_PAGE_NULL
;
1451 if (object
->shadow_severed
) {
1453 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1454 cur_thread
->interruptible
= interruptible_state
;
1455 return VM_FAULT_MEMORY_ERROR
;
1458 if (VM_PAGE_THROTTLED() ||
1459 (m
->fictitious
&& !vm_page_convert(m
))) {
1461 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1462 cur_thread
->interruptible
= interruptible_state
;
1463 return(VM_FAULT_MEMORY_SHORTAGE
);
1465 m
->no_isync
= FALSE
;
1467 if (!no_zero_fill
) {
1468 vm_object_unlock(object
);
1469 vm_page_zero_fill(m
);
1471 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
1472 VM_STAT(zero_fill_count
++);
1474 if (bumped_pagein
== TRUE
) {
1476 current_task()->pageins
--;
1478 vm_object_lock(object
);
1480 vm_page_lock_queues();
1481 VM_PAGE_QUEUES_REMOVE(m
);
1482 m
->page_ticket
= vm_page_ticket
;
1483 vm_page_ticket_roll
++;
1484 if(vm_page_ticket_roll
== VM_PAGE_TICKETS_IN_ROLL
) {
1485 vm_page_ticket_roll
= 0;
1486 if(vm_page_ticket
==
1487 VM_PAGE_TICKET_ROLL_IDS
)
1492 queue_enter(&vm_page_queue_inactive
,
1493 m
, vm_page_t
, pageq
);
1495 vm_page_inactive_count
++;
1496 vm_page_unlock_queues();
1497 pmap_clear_modify(m
->phys_addr
);
1501 if ((object
!= first_object
) || must_be_resident
)
1502 vm_object_paging_end(object
);
1503 offset
+= object
->shadow_offset
;
1504 hi_offset
+= object
->shadow_offset
;
1505 lo_offset
+= object
->shadow_offset
;
1506 access_required
= VM_PROT_READ
;
1507 vm_object_lock(next_object
);
1508 vm_object_unlock(object
);
1509 object
= next_object
;
1510 vm_object_paging_begin(object
);
1515 * PAGE HAS BEEN FOUND.
1518 * busy, so that we can play with it;
1519 * not absent, so that nobody else will fill it;
1520 * possibly eligible for pageout;
1522 * The top-level page (first_m) is:
1523 * VM_PAGE_NULL if the page was found in the
1525 * busy, not absent, and ineligible for pageout.
1527 * The current object (object) is locked. A paging
1528 * reference is held for the current and top-level
1533 dbgTrace(0xBEEF0015, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1535 #if EXTRA_ASSERTIONS
1536 if(m
!= VM_PAGE_NULL
) {
1537 assert(m
->busy
&& !m
->absent
);
1538 assert((first_m
== VM_PAGE_NULL
) ||
1539 (first_m
->busy
&& !first_m
->absent
&&
1540 !first_m
->active
&& !first_m
->inactive
));
1542 #endif /* EXTRA_ASSERTIONS */
1545 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1546 (integer_t
)object
, offset
, (integer_t
)m
,
1547 (integer_t
)first_object
, (integer_t
)first_m
);
1549 * If the page is being written, but isn't
1550 * already owned by the top-level object,
1551 * we have to copy it into a new page owned
1552 * by the top-level object.
1555 if ((object
!= first_object
) && (m
!= VM_PAGE_NULL
)) {
1557 * We only really need to copy if we
1562 dbgTrace(0xBEEF0016, (unsigned int) object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1564 if (fault_type
& VM_PROT_WRITE
) {
1567 assert(!must_be_resident
);
1570 * If we try to collapse first_object at this
1571 * point, we may deadlock when we try to get
1572 * the lock on an intermediate object (since we
1573 * have the bottom object locked). We can't
1574 * unlock the bottom object, because the page
1575 * we found may move (by collapse) if we do.
1577 * Instead, we first copy the page. Then, when
1578 * we have no more use for the bottom object,
1579 * we unlock it and try to collapse.
1581 * Note that we copy the page even if we didn't
1582 * need to... that's the breaks.
1586 * Allocate a page for the copy
1588 copy_m
= vm_page_grab();
1589 if (copy_m
== VM_PAGE_NULL
) {
1591 vm_fault_cleanup(object
, first_m
);
1592 cur_thread
->interruptible
= interruptible_state
;
1593 return(VM_FAULT_MEMORY_SHORTAGE
);
1598 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1599 (integer_t
)object
, offset
,
1600 (integer_t
)m
, (integer_t
)copy_m
, 0);
1601 vm_page_copy(m
, copy_m
);
1604 * If another map is truly sharing this
1605 * page with us, we have to flush all
1606 * uses of the original page, since we
1607 * can't distinguish those which want the
1608 * original from those which need the
1611 * XXXO If we know that only one map has
1612 * access to this page, then we could
1613 * avoid the pmap_page_protect() call.
1616 vm_page_lock_queues();
1617 assert(!m
->cleaning
);
1618 pmap_page_protect(m
->phys_addr
, VM_PROT_NONE
);
1619 vm_page_deactivate(m
);
1620 copy_m
->dirty
= TRUE
;
1622 * Setting reference here prevents this fault from
1623 * being counted as a (per-thread) reactivate as well
1624 * as a copy-on-write.
1626 first_m
->reference
= TRUE
;
1627 vm_page_unlock_queues();
1630 * We no longer need the old page or object.
1633 PAGE_WAKEUP_DONE(m
);
1634 vm_object_paging_end(object
);
1635 vm_object_unlock(object
);
1638 *type_of_fault
= DBG_COW_FAULT
;
1639 VM_STAT(cow_faults
++);
1640 current_task()->cow_faults
++;
1641 object
= first_object
;
1642 offset
= first_offset
;
1644 vm_object_lock(object
);
1645 VM_PAGE_FREE(first_m
);
1646 first_m
= VM_PAGE_NULL
;
1647 assert(copy_m
->busy
);
1648 vm_page_insert(copy_m
, object
, offset
);
1652 * Now that we've gotten the copy out of the
1653 * way, let's try to collapse the top object.
1654 * But we have to play ugly games with
1655 * paging_in_progress to do that...
1658 vm_object_paging_end(object
);
1659 vm_object_collapse(object
);
1660 vm_object_paging_begin(object
);
1664 *protection
&= (~VM_PROT_WRITE
);
1669 * Now check whether the page needs to be pushed into the
1670 * copy object. The use of asymmetric copy on write for
1671 * shared temporary objects means that we may do two copies to
1672 * satisfy the fault; one above to get the page from a
1673 * shadowed object, and one here to push it into the copy.
1676 while (first_object
->copy_strategy
== MEMORY_OBJECT_COPY_DELAY
&&
1677 (copy_object
= first_object
->copy
) != VM_OBJECT_NULL
&&
1678 (m
!= VM_PAGE_NULL
)) {
1679 vm_object_offset_t copy_offset
;
1683 dbgTrace(0xBEEF0017, (unsigned int) copy_object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1686 * If the page is being written, but hasn't been
1687 * copied to the copy-object, we have to copy it there.
1690 if ((fault_type
& VM_PROT_WRITE
) == 0) {
1691 *protection
&= ~VM_PROT_WRITE
;
1696 * If the page was guaranteed to be resident,
1697 * we must have already performed the copy.
1700 if (must_be_resident
)
1704 * Try to get the lock on the copy_object.
1706 if (!vm_object_lock_try(copy_object
)) {
1707 vm_object_unlock(object
);
1709 mutex_pause(); /* wait a bit */
1711 vm_object_lock(object
);
1716 * Make another reference to the copy-object,
1717 * to keep it from disappearing during the
1720 assert(copy_object
->ref_count
> 0);
1721 copy_object
->ref_count
++;
1722 VM_OBJ_RES_INCR(copy_object
);
1725 * Does the page exist in the copy?
1727 copy_offset
= first_offset
- copy_object
->shadow_offset
;
1728 if (copy_object
->size
<= copy_offset
)
1730 * Copy object doesn't cover this page -- do nothing.
1734 vm_page_lookup(copy_object
, copy_offset
)) != VM_PAGE_NULL
) {
1735 /* Page currently exists in the copy object */
1738 * If the page is being brought
1739 * in, wait for it and then retry.
1742 /* take an extra ref so object won't die */
1743 assert(copy_object
->ref_count
> 0);
1744 copy_object
->ref_count
++;
1745 vm_object_res_reference(copy_object
);
1746 vm_object_unlock(copy_object
);
1747 vm_fault_cleanup(object
, first_m
);
1748 counter(c_vm_fault_page_block_backoff_kernel
++);
1749 vm_object_lock(copy_object
);
1750 assert(copy_object
->ref_count
> 0);
1751 VM_OBJ_RES_DECR(copy_object
);
1752 copy_object
->ref_count
--;
1753 assert(copy_object
->ref_count
> 0);
1754 copy_m
= vm_page_lookup(copy_object
, copy_offset
);
1755 if (copy_m
!= VM_PAGE_NULL
&& copy_m
->busy
) {
1756 PAGE_ASSERT_WAIT(copy_m
, interruptible
);
1757 vm_object_unlock(copy_object
);
1758 wait_result
= thread_block((void (*)(void))0);
1759 vm_object_deallocate(copy_object
);
1762 vm_object_unlock(copy_object
);
1763 vm_object_deallocate(copy_object
);
1764 cur_thread
->interruptible
= interruptible_state
;
1765 return VM_FAULT_RETRY
;
1769 else if (!PAGED_OUT(copy_object
, copy_offset
)) {
1771 * If PAGED_OUT is TRUE, then the page used to exist
1772 * in the copy-object, and has already been paged out.
1773 * We don't need to repeat this. If PAGED_OUT is
1774 * FALSE, then either we don't know (!pager_created,
1775 * for example) or it hasn't been paged out.
1776 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1777 * We must copy the page to the copy object.
1781 * Allocate a page for the copy
1783 copy_m
= vm_page_alloc(copy_object
, copy_offset
);
1784 if (copy_m
== VM_PAGE_NULL
) {
1786 VM_OBJ_RES_DECR(copy_object
);
1787 copy_object
->ref_count
--;
1788 assert(copy_object
->ref_count
> 0);
1789 vm_object_unlock(copy_object
);
1790 vm_fault_cleanup(object
, first_m
);
1791 cur_thread
->interruptible
= interruptible_state
;
1792 return(VM_FAULT_MEMORY_SHORTAGE
);
1796 * Must copy page into copy-object.
1799 vm_page_copy(m
, copy_m
);
1802 * If the old page was in use by any users
1803 * of the copy-object, it must be removed
1804 * from all pmaps. (We can't know which
1808 vm_page_lock_queues();
1809 assert(!m
->cleaning
);
1810 pmap_page_protect(m
->phys_addr
, VM_PROT_NONE
);
1811 copy_m
->dirty
= TRUE
;
1812 vm_page_unlock_queues();
1815 * If there's a pager, then immediately
1816 * page out this page, using the "initialize"
1817 * option. Else, we use the copy.
1822 ((!copy_object
->pager_created
) ||
1823 vm_external_state_get(
1824 copy_object
->existence_map
, copy_offset
)
1825 == VM_EXTERNAL_STATE_ABSENT
)
1827 (!copy_object
->pager_created
)
1830 vm_page_lock_queues();
1831 vm_page_activate(copy_m
);
1832 vm_page_unlock_queues();
1833 PAGE_WAKEUP_DONE(copy_m
);
1836 assert(copy_m
->busy
== TRUE
);
1839 * The page is already ready for pageout:
1840 * not on pageout queues and busy.
1841 * Unlock everything except the
1842 * copy_object itself.
1845 vm_object_unlock(object
);
1848 * Write the page to the copy-object,
1849 * flushing it from the kernel.
1852 vm_pageout_initialize_page(copy_m
);
1855 * Since the pageout may have
1856 * temporarily dropped the
1857 * copy_object's lock, we
1858 * check whether we'll have
1859 * to deallocate the hard way.
1862 if ((copy_object
->shadow
!= object
) ||
1863 (copy_object
->ref_count
== 1)) {
1864 vm_object_unlock(copy_object
);
1865 vm_object_deallocate(copy_object
);
1866 vm_object_lock(object
);
1871 * Pick back up the old object's
1872 * lock. [It is safe to do so,
1873 * since it must be deeper in the
1877 vm_object_lock(object
);
1881 * Because we're pushing a page upward
1882 * in the object tree, we must restart
1883 * any faults that are waiting here.
1884 * [Note that this is an expansion of
1885 * PAGE_WAKEUP that uses the THREAD_RESTART
1886 * wait result]. Can't turn off the page's
1887 * busy bit because we're not done with it.
1892 thread_wakeup_with_result((event_t
) m
,
1898 * The reference count on copy_object must be
1899 * at least 2: one for our extra reference,
1900 * and at least one from the outside world
1901 * (we checked that when we last locked
1904 copy_object
->ref_count
--;
1905 assert(copy_object
->ref_count
> 0);
1906 VM_OBJ_RES_DECR(copy_object
);
1907 vm_object_unlock(copy_object
);
1913 *top_page
= first_m
;
1916 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1917 (integer_t
)object
, offset
, (integer_t
)m
, (integer_t
)first_m
, 0);
1919 * If the page can be written, assume that it will be.
1920 * [Earlier, we restrict the permission to allow write
1921 * access only if the fault so required, so we don't
1922 * mark read-only data as dirty.]
1925 #if !VM_FAULT_STATIC_CONFIG
1926 if (vm_fault_dirty_handling
&& (*protection
& VM_PROT_WRITE
) &&
1927 (m
!= VM_PAGE_NULL
)) {
1932 dbgTrace(0xBEEF0018, (unsigned int) object
, (unsigned int) vm_page_deactivate_behind
); /* (TEST/DEBUG) */
1934 if (vm_page_deactivate_behind
) {
1935 if (offset
&& /* don't underflow */
1936 (object
->last_alloc
== (offset
- PAGE_SIZE_64
))) {
1937 m
= vm_page_lookup(object
, object
->last_alloc
);
1938 if ((m
!= VM_PAGE_NULL
) && !m
->busy
) {
1939 vm_page_lock_queues();
1940 vm_page_deactivate(m
);
1941 vm_page_unlock_queues();
1944 dbgTrace(0xBEEF0019, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1947 object
->last_alloc
= offset
;
1950 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS
, 0); /* (TEST/DEBUG) */
1952 cur_thread
->interruptible
= interruptible_state
;
1953 if(*result_page
== VM_PAGE_NULL
) {
1954 vm_object_unlock(object
);
1956 return(VM_FAULT_SUCCESS
);
1960 vm_fault_cleanup(object
, first_m
);
1962 counter(c_vm_fault_page_block_backoff_kernel
++);
1963 thread_block((void (*)(void))0);
1967 cur_thread
->interruptible
= interruptible_state
;
1968 if (wait_result
== THREAD_INTERRUPTED
)
1969 return VM_FAULT_INTERRUPTED
;
1970 return VM_FAULT_RETRY
;
1978 * Handle page faults, including pseudo-faults
1979 * used to change the wiring status of pages.
1981 * Explicit continuations have been removed.
1983 * vm_fault and vm_fault_page save mucho state
1984 * in the moral equivalent of a closure. The state
1985 * structure is allocated when first entering vm_fault
1986 * and deallocated when leaving vm_fault.
1993 vm_prot_t fault_type
,
1994 boolean_t change_wiring
,
1997 vm_map_version_t version
; /* Map version for verificiation */
1998 boolean_t wired
; /* Should mapping be wired down? */
1999 vm_object_t object
; /* Top-level object */
2000 vm_object_offset_t offset
; /* Top-level offset */
2001 vm_prot_t prot
; /* Protection for mapping */
2002 vm_behavior_t behavior
; /* Expected paging behavior */
2003 vm_object_offset_t lo_offset
, hi_offset
;
2004 vm_object_t old_copy_object
; /* Saved copy object */
2005 vm_page_t result_page
; /* Result of vm_fault_page */
2006 vm_page_t top_page
; /* Placeholder page */
2010 vm_page_t m
; /* Fast access to result_page */
2011 kern_return_t error_code
; /* page error reasons */
2013 vm_object_t cur_object
;
2015 vm_object_offset_t cur_offset
;
2017 vm_object_t new_object
;
2019 vm_map_t pmap_map
= map
;
2020 vm_map_t original_map
= map
;
2022 boolean_t funnel_set
= FALSE
;
2024 thread_t cur_thread
;
2025 boolean_t interruptible_state
;
2028 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_START
,
2035 cur_thread
= current_thread();
2037 interruptible_state
= cur_thread
->interruptible
;
2038 if (interruptible
== THREAD_UNINT
)
2039 cur_thread
->interruptible
= FALSE
;
2042 * assume we will hit a page in the cache
2043 * otherwise, explicitly override with
2044 * the real fault type once we determine it
2046 type_of_fault
= DBG_CACHE_HIT_FAULT
;
2049 current_task()->faults
++;
2052 * drop funnel if it is already held. Then restore while returning
2054 if ((cur_thread
->funnel_state
& TH_FN_OWNED
) == TH_FN_OWNED
) {
2056 curflock
= cur_thread
->funnel_lock
;
2057 thread_funnel_set( curflock
, FALSE
);
2063 * Find the backing store object and offset into
2064 * it to begin the search.
2067 vm_map_lock_read(map
);
2068 kr
= vm_map_lookup_locked(&map
, vaddr
, fault_type
, &version
,
2071 &behavior
, &lo_offset
, &hi_offset
, &pmap_map
);
2073 pmap
= pmap_map
->pmap
;
2075 if (kr
!= KERN_SUCCESS
) {
2076 vm_map_unlock_read(map
);
2081 * If the page is wired, we must fault for the current protection
2082 * value, to avoid further faults.
2086 fault_type
= prot
| VM_PROT_WRITE
;
2088 #if VM_FAULT_CLASSIFY
2090 * Temporary data gathering code
2092 vm_fault_classify(object
, offset
, fault_type
);
2095 * Fast fault code. The basic idea is to do as much as
2096 * possible while holding the map lock and object locks.
2097 * Busy pages are not used until the object lock has to
2098 * be dropped to do something (copy, zero fill, pmap enter).
2099 * Similarly, paging references aren't acquired until that
2100 * point, and object references aren't used.
2102 * If we can figure out what to do
2103 * (zero fill, copy on write, pmap enter) while holding
2104 * the locks, then it gets done. Otherwise, we give up,
2105 * and use the original fault path (which doesn't hold
2106 * the map lock, and relies on busy pages).
2107 * The give up cases include:
2108 * - Have to talk to pager.
2109 * - Page is busy, absent or in error.
2110 * - Pager has locked out desired access.
2111 * - Fault needs to be restarted.
2112 * - Have to push page into copy object.
2114 * The code is an infinite loop that moves one level down
2115 * the shadow chain each time. cur_object and cur_offset
2116 * refer to the current object being examined. object and offset
2117 * are the original object from the map. The loop is at the
2118 * top level if and only if object and cur_object are the same.
2120 * Invariants: Map lock is held throughout. Lock is held on
2121 * original object and cur_object (if different) when
2122 * continuing or exiting loop.
2128 * If this page is to be inserted in a copy delay object
2129 * for writing, and if the object has a copy, then the
2130 * copy delay strategy is implemented in the slow fault page.
2132 if (object
->copy_strategy
!= MEMORY_OBJECT_COPY_DELAY
||
2133 object
->copy
== VM_OBJECT_NULL
||
2134 (fault_type
& VM_PROT_WRITE
) == 0) {
2135 cur_object
= object
;
2136 cur_offset
= offset
;
2139 m
= vm_page_lookup(cur_object
, cur_offset
);
2140 if (m
!= VM_PAGE_NULL
) {
2144 if (m
->unusual
&& (m
->error
|| m
->restart
|| m
->private
2145 || m
->absent
|| (fault_type
& m
->page_lock
))) {
2148 * Unusual case. Give up.
2154 * Two cases of map in faults:
2155 * - At top level w/o copy object.
2156 * - Read fault anywhere.
2157 * --> must disallow write.
2160 if (object
== cur_object
&&
2161 object
->copy
== VM_OBJECT_NULL
)
2162 goto FastMapInFault
;
2164 if ((fault_type
& VM_PROT_WRITE
) == 0) {
2166 prot
&= ~VM_PROT_WRITE
;
2169 * Set up to map the page ...
2170 * mark the page busy, drop
2171 * locks and take a paging reference
2172 * on the object with the page.
2175 if (object
!= cur_object
) {
2176 vm_object_unlock(object
);
2177 object
= cur_object
;
2182 vm_object_paging_begin(object
);
2183 vm_object_unlock(object
);
2187 * Check a couple of global reasons to
2188 * be conservative about write access.
2189 * Then do the pmap_enter.
2191 #if !VM_FAULT_STATIC_CONFIG
2192 if (vm_fault_dirty_handling
2194 || db_watchpoint_list
2196 && (fault_type
& VM_PROT_WRITE
) == 0)
2197 prot
&= ~VM_PROT_WRITE
;
2198 #else /* STATIC_CONFIG */
2200 if (db_watchpoint_list
2201 && (fault_type
& VM_PROT_WRITE
) == 0)
2202 prot
&= ~VM_PROT_WRITE
;
2203 #endif /* MACH_KDB */
2204 #endif /* STATIC_CONFIG */
2205 PMAP_ENTER(pmap
, vaddr
, m
, prot
, wired
);
2208 pmap_attribute(pmap
,
2216 tws_hash_line_t line
;
2219 task
= current_task();
2221 (task
->dynamic_working_set
!= 0)) {
2224 task
->dynamic_working_set
,
2226 &line
) != KERN_SUCCESS
) {
2227 if(tws_insert((tws_hash_t
)
2228 task
->dynamic_working_set
,
2229 m
->offset
, m
->object
,
2232 tws_expand_working_set(
2233 task
->dynamic_working_set
,
2234 TWS_HASH_LINE_COUNT
);
2241 vm_pagein_cluster_used
++;
2242 m
->clustered
= FALSE
;
2245 * Grab the object lock to manipulate
2246 * the page queues. Change wiring
2247 * case is obvious. In soft ref bits
2248 * case activate page only if it fell
2249 * off paging queues, otherwise just
2250 * activate it if it's inactive.
2252 * NOTE: original vm_fault code will
2253 * move active page to back of active
2254 * queue. This code doesn't.
2256 vm_object_lock(object
);
2257 vm_page_lock_queues();
2259 * we did the isync above... we're clearing
2260 * the flag here to avoid holding a lock
2261 * while calling pmap functions, however
2262 * we need hold the object lock before
2263 * we can modify the flag
2265 m
->no_isync
= FALSE
;
2266 m
->reference
= TRUE
;
2268 if (change_wiring
) {
2274 #if VM_FAULT_STATIC_CONFIG
2276 if (!m
->active
&& !m
->inactive
)
2277 vm_page_activate(m
);
2280 else if (software_reference_bits
) {
2281 if (!m
->active
&& !m
->inactive
)
2282 vm_page_activate(m
);
2284 else if (!m
->active
) {
2285 vm_page_activate(m
);
2288 vm_page_unlock_queues();
2291 * That's it, clean up and return.
2293 PAGE_WAKEUP_DONE(m
);
2294 vm_object_paging_end(object
);
2295 vm_object_unlock(object
);
2296 vm_map_unlock_read(map
);
2298 vm_map_unlock(pmap_map
);
2301 thread_funnel_set( curflock
, TRUE
);
2304 cur_thread
->interruptible
= interruptible_state
;
2306 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
2312 return KERN_SUCCESS
;
2316 * Copy on write fault. If objects match, then
2317 * object->copy must not be NULL (else control
2318 * would be in previous code block), and we
2319 * have a potential push into the copy object
2320 * with which we won't cope here.
2323 if (cur_object
== object
)
2327 * This is now a shadow based copy on write
2328 * fault -- it requires a copy up the shadow
2331 * Allocate a page in the original top level
2332 * object. Give up if allocate fails. Also
2333 * need to remember current page, as it's the
2334 * source of the copy.
2338 if (m
== VM_PAGE_NULL
) {
2343 * Now do the copy. Mark the source busy
2344 * and take out paging references on both
2347 * NOTE: This code holds the map lock across
2352 vm_page_copy(cur_m
, m
);
2353 vm_page_insert(m
, object
, offset
);
2355 vm_object_paging_begin(cur_object
);
2356 vm_object_paging_begin(object
);
2358 type_of_fault
= DBG_COW_FAULT
;
2359 VM_STAT(cow_faults
++);
2360 current_task()->cow_faults
++;
2363 * Now cope with the source page and object
2364 * If the top object has a ref count of 1
2365 * then no other map can access it, and hence
2366 * it's not necessary to do the pmap_page_protect.
2370 vm_page_lock_queues();
2371 vm_page_deactivate(cur_m
);
2373 pmap_page_protect(cur_m
->phys_addr
,
2375 vm_page_unlock_queues();
2377 PAGE_WAKEUP_DONE(cur_m
);
2378 vm_object_paging_end(cur_object
);
2379 vm_object_unlock(cur_object
);
2382 * Slight hack to call vm_object collapse
2383 * and then reuse common map in code.
2384 * note that the object lock was taken above.
2387 vm_object_paging_end(object
);
2388 vm_object_collapse(object
);
2389 vm_object_paging_begin(object
);
2390 vm_object_unlock(object
);
2397 * No page at cur_object, cur_offset
2400 if (cur_object
->pager_created
) {
2403 * Have to talk to the pager. Give up.
2410 if (cur_object
->shadow
== VM_OBJECT_NULL
) {
2412 if (cur_object
->shadow_severed
) {
2413 vm_object_paging_end(object
);
2414 vm_object_unlock(object
);
2415 vm_map_unlock_read(map
);
2417 vm_map_unlock(pmap_map
);
2420 thread_funnel_set( curflock
, TRUE
);
2423 cur_thread
->interruptible
= interruptible_state
;
2425 return VM_FAULT_MEMORY_ERROR
;
2429 * Zero fill fault. Page gets
2430 * filled in top object. Insert
2431 * page, then drop any lower lock.
2432 * Give up if no page.
2434 if ((vm_page_free_target
-
2435 ((vm_page_free_target
-vm_page_free_min
)>>2))
2436 > vm_page_free_count
) {
2439 m
= vm_page_alloc(object
, offset
);
2440 if (m
== VM_PAGE_NULL
) {
2444 * This is a zero-fill or initial fill
2445 * page fault. As such, we consider it
2446 * undefined with respect to instruction
2447 * execution. i.e. it is the responsibility
2448 * of higher layers to call for an instruction
2449 * sync after changing the contents and before
2450 * sending a program into this area. We
2451 * choose this approach for performance
2454 m
->no_isync
= FALSE
;
2456 if (cur_object
!= object
)
2457 vm_object_unlock(cur_object
);
2459 vm_object_paging_begin(object
);
2460 vm_object_unlock(object
);
2463 * Now zero fill page and map it.
2464 * the page is probably going to
2465 * be written soon, so don't bother
2466 * to clear the modified bit
2468 * NOTE: This code holds the map
2469 * lock across the zero fill.
2472 if (!map
->no_zero_fill
) {
2473 vm_page_zero_fill(m
);
2474 type_of_fault
= DBG_ZERO_FILL_FAULT
;
2475 VM_STAT(zero_fill_count
++);
2477 vm_page_lock_queues();
2478 VM_PAGE_QUEUES_REMOVE(m
);
2480 m
->page_ticket
= vm_page_ticket
;
2481 vm_page_ticket_roll
++;
2482 if(vm_page_ticket_roll
==
2483 VM_PAGE_TICKETS_IN_ROLL
) {
2484 vm_page_ticket_roll
= 0;
2485 if(vm_page_ticket
==
2486 VM_PAGE_TICKET_ROLL_IDS
)
2492 queue_enter(&vm_page_queue_inactive
,
2493 m
, vm_page_t
, pageq
);
2495 vm_page_inactive_count
++;
2496 vm_page_unlock_queues();
2501 * On to the next level
2504 cur_offset
+= cur_object
->shadow_offset
;
2505 new_object
= cur_object
->shadow
;
2506 vm_object_lock(new_object
);
2507 if (cur_object
!= object
)
2508 vm_object_unlock(cur_object
);
2509 cur_object
= new_object
;
2516 * Cleanup from fast fault failure. Drop any object
2517 * lock other than original and drop map lock.
2520 if (object
!= cur_object
)
2521 vm_object_unlock(cur_object
);
2523 vm_map_unlock_read(map
);
2525 vm_map_unlock(pmap_map
);
2528 * Make a reference to this object to
2529 * prevent its disposal while we are messing with
2530 * it. Once we have the reference, the map is free
2531 * to be diddled. Since objects reference their
2532 * shadows (and copies), they will stay around as well.
2535 assert(object
->ref_count
> 0);
2536 object
->ref_count
++;
2537 vm_object_res_reference(object
);
2538 vm_object_paging_begin(object
);
2540 XPR(XPR_VM_FAULT
,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2541 kr
= vm_fault_page(object
, offset
, fault_type
,
2542 (change_wiring
&& !wired
),
2544 lo_offset
, hi_offset
, behavior
,
2545 &prot
, &result_page
, &top_page
,
2547 &error_code
, map
->no_zero_fill
, FALSE
, map
, vaddr
);
2550 * If we didn't succeed, lose the object reference immediately.
2553 if (kr
!= VM_FAULT_SUCCESS
)
2554 vm_object_deallocate(object
);
2557 * See why we failed, and take corrective action.
2561 case VM_FAULT_SUCCESS
:
2563 case VM_FAULT_MEMORY_SHORTAGE
:
2564 if (vm_page_wait((change_wiring
) ?
2569 case VM_FAULT_INTERRUPTED
:
2572 case VM_FAULT_RETRY
:
2574 case VM_FAULT_FICTITIOUS_SHORTAGE
:
2575 vm_page_more_fictitious();
2577 case VM_FAULT_MEMORY_ERROR
:
2581 kr
= KERN_MEMORY_ERROR
;
2587 if(m
!= VM_PAGE_NULL
) {
2588 assert((change_wiring
&& !wired
) ?
2589 (top_page
== VM_PAGE_NULL
) :
2590 ((top_page
== VM_PAGE_NULL
) == (m
->object
== object
)));
2594 * How to clean up the result of vm_fault_page. This
2595 * happens whether the mapping is entered or not.
2598 #define UNLOCK_AND_DEALLOCATE \
2600 vm_fault_cleanup(m->object, top_page); \
2601 vm_object_deallocate(object); \
2605 * What to do with the resulting page from vm_fault_page
2606 * if it doesn't get entered into the physical map:
2609 #define RELEASE_PAGE(m) \
2611 PAGE_WAKEUP_DONE(m); \
2612 vm_page_lock_queues(); \
2613 if (!m->active && !m->inactive) \
2614 vm_page_activate(m); \
2615 vm_page_unlock_queues(); \
2619 * We must verify that the maps have not changed
2620 * since our last lookup.
2623 if(m
!= VM_PAGE_NULL
) {
2624 old_copy_object
= m
->object
->copy
;
2626 vm_object_unlock(m
->object
);
2628 old_copy_object
= VM_OBJECT_NULL
;
2630 if ((map
!= original_map
) || !vm_map_verify(map
, &version
)) {
2631 vm_object_t retry_object
;
2632 vm_object_offset_t retry_offset
;
2633 vm_prot_t retry_prot
;
2636 * To avoid trying to write_lock the map while another
2637 * thread has it read_locked (in vm_map_pageable), we
2638 * do not try for write permission. If the page is
2639 * still writable, we will get write permission. If it
2640 * is not, or has been marked needs_copy, we enter the
2641 * mapping without write permission, and will merely
2642 * take another fault.
2645 vm_map_lock_read(map
);
2646 kr
= vm_map_lookup_locked(&map
, vaddr
,
2647 fault_type
& ~VM_PROT_WRITE
, &version
,
2648 &retry_object
, &retry_offset
, &retry_prot
,
2649 &wired
, &behavior
, &lo_offset
, &hi_offset
,
2651 pmap
= pmap_map
->pmap
;
2653 if (kr
!= KERN_SUCCESS
) {
2654 vm_map_unlock_read(map
);
2655 if(m
!= VM_PAGE_NULL
) {
2656 vm_object_lock(m
->object
);
2658 UNLOCK_AND_DEALLOCATE
;
2660 vm_object_deallocate(object
);
2665 vm_object_unlock(retry_object
);
2666 if(m
!= VM_PAGE_NULL
) {
2667 vm_object_lock(m
->object
);
2669 vm_object_lock(object
);
2672 if ((retry_object
!= object
) ||
2673 (retry_offset
!= offset
)) {
2674 vm_map_unlock_read(map
);
2676 vm_map_unlock(pmap_map
);
2677 if(m
!= VM_PAGE_NULL
) {
2679 UNLOCK_AND_DEALLOCATE
;
2681 vm_object_deallocate(object
);
2687 * Check whether the protection has changed or the object
2688 * has been copied while we left the map unlocked.
2691 if(m
!= VM_PAGE_NULL
) {
2692 vm_object_unlock(m
->object
);
2694 vm_object_unlock(object
);
2697 if(m
!= VM_PAGE_NULL
) {
2698 vm_object_lock(m
->object
);
2700 vm_object_lock(object
);
2704 * If the copy object changed while the top-level object
2705 * was unlocked, then we must take away write permission.
2708 if(m
!= VM_PAGE_NULL
) {
2709 if (m
->object
->copy
!= old_copy_object
)
2710 prot
&= ~VM_PROT_WRITE
;
2714 * If we want to wire down this page, but no longer have
2715 * adequate permissions, we must start all over.
2718 if (wired
&& (fault_type
!= (prot
|VM_PROT_WRITE
))) {
2719 vm_map_verify_done(map
, &version
);
2721 vm_map_unlock(pmap_map
);
2722 if(m
!= VM_PAGE_NULL
) {
2724 UNLOCK_AND_DEALLOCATE
;
2726 vm_object_deallocate(object
);
2732 * Put this page into the physical map.
2733 * We had to do the unlock above because pmap_enter
2734 * may cause other faults. The page may be on
2735 * the pageout queues. If the pageout daemon comes
2736 * across the page, it will remove it from the queues.
2738 if(m
!= VM_PAGE_NULL
) {
2740 m
->no_isync
= FALSE
;
2742 vm_object_unlock(m
->object
);
2744 PMAP_ENTER(pmap
, vaddr
, m
, prot
, wired
);
2747 * It's critically important that a wired-down page be faulted
2748 * only once in each map for which it is wired.
2750 /* Sync I & D caches for new mapping */
2751 pmap_attribute(pmap
,
2757 vm_object_unlock(m
->object
);
2759 PMAP_ENTER(pmap
, vaddr
, m
, prot
, wired
);
2762 tws_hash_line_t line
;
2765 task
= current_task();
2767 (task
->dynamic_working_set
!= 0)) {
2770 task
->dynamic_working_set
,
2771 m
->offset
, m
->object
,
2772 &line
) != KERN_SUCCESS
) {
2773 tws_insert((tws_hash_t
)
2774 task
->dynamic_working_set
,
2775 m
->offset
, m
->object
,
2777 if(tws_insert((tws_hash_t
)
2778 task
->dynamic_working_set
,
2779 m
->offset
, m
->object
,
2782 tws_expand_working_set(
2783 task
->dynamic_working_set
,
2784 TWS_HASH_LINE_COUNT
);
2791 /* if __ppc__ not working until figure out phys copy on block maps */
2794 struct phys_entry
*pp
;
2796 * do a pmap block mapping from the physical address
2799 if(pp
= pmap_find_physentry(
2800 (vm_offset_t
)object
->shadow_offset
)) {
2801 memattr
= ((pp
->pte1
& 0x00000078) >> 3);
2803 memattr
= PTE_WIMG_UNCACHED_COHERENT_GUARDED
;
2806 pmap_map_block(pmap
, vaddr
,
2807 (vm_offset_t
)object
->shadow_offset
,
2809 memattr
, 0); /* Set up a block mapped area */
2812 for (off
= 0; off
< object
->size
; off
+= page_size
) {
2813 pmap_enter(pmap
, vaddr
+ off
,
2814 object
->shadow_offset
+ off
, prot
, TRUE
);
2822 * If the page is not wired down and isn't already
2823 * on a pageout queue, then put it where the
2824 * pageout daemon can find it.
2826 if(m
!= VM_PAGE_NULL
) {
2827 vm_object_lock(m
->object
);
2828 vm_page_lock_queues();
2830 if (change_wiring
) {
2836 #if VM_FAULT_STATIC_CONFIG
2838 if (!m
->active
&& !m
->inactive
)
2839 vm_page_activate(m
);
2840 m
->reference
= TRUE
;
2843 else if (software_reference_bits
) {
2844 if (!m
->active
&& !m
->inactive
)
2845 vm_page_activate(m
);
2846 m
->reference
= TRUE
;
2848 vm_page_activate(m
);
2851 vm_page_unlock_queues();
2855 * Unlock everything, and return
2858 vm_map_verify_done(map
, &version
);
2860 vm_map_unlock(pmap_map
);
2861 if(m
!= VM_PAGE_NULL
) {
2862 PAGE_WAKEUP_DONE(m
);
2863 UNLOCK_AND_DEALLOCATE
;
2865 vm_fault_cleanup(object
, top_page
);
2866 vm_object_deallocate(object
);
2870 #undef UNLOCK_AND_DEALLOCATE
2875 thread_funnel_set( curflock
, TRUE
);
2878 cur_thread
->interruptible
= interruptible_state
;
2880 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
2892 * Wire down a range of virtual addresses in a map.
2897 vm_map_entry_t entry
,
2901 register vm_offset_t va
;
2902 register vm_offset_t end_addr
= entry
->vme_end
;
2903 register kern_return_t rc
;
2905 assert(entry
->in_transition
);
2908 * Inform the physical mapping system that the
2909 * range of addresses may not fault, so that
2910 * page tables and such can be locked down as well.
2913 pmap_pageable(pmap
, entry
->vme_start
, end_addr
, FALSE
);
2916 * We simulate a fault to get the page and enter it
2917 * in the physical map.
2920 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
2921 if ((rc
= vm_fault_wire_fast(
2922 map
, va
, entry
, pmap
)) != KERN_SUCCESS
) {
2923 rc
= vm_fault(map
, va
, VM_PROT_NONE
, TRUE
,
2924 (pmap
== kernel_pmap
) ? THREAD_UNINT
: THREAD_ABORTSAFE
);
2927 if (rc
!= KERN_SUCCESS
) {
2928 struct vm_map_entry tmp_entry
= *entry
;
2930 /* unwire wired pages */
2931 tmp_entry
.vme_end
= va
;
2932 vm_fault_unwire(map
, &tmp_entry
, FALSE
, pmap
);
2937 return KERN_SUCCESS
;
2943 * Unwire a range of virtual addresses in a map.
2948 vm_map_entry_t entry
,
2949 boolean_t deallocate
,
2952 register vm_offset_t va
;
2953 register vm_offset_t end_addr
= entry
->vme_end
;
2956 object
= (entry
->is_sub_map
)
2957 ? VM_OBJECT_NULL
: entry
->object
.vm_object
;
2960 * Since the pages are wired down, we must be able to
2961 * get their mappings from the physical map system.
2964 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
2965 pmap_change_wiring(pmap
, va
, FALSE
);
2967 if (object
== VM_OBJECT_NULL
) {
2968 (void) vm_fault(map
, va
, VM_PROT_NONE
, TRUE
, THREAD_UNINT
);
2971 vm_page_t result_page
;
2973 vm_object_t result_object
;
2974 vm_fault_return_t result
;
2977 prot
= VM_PROT_NONE
;
2979 vm_object_lock(object
);
2980 vm_object_paging_begin(object
);
2982 "vm_fault_unwire -> vm_fault_page\n",
2984 result
= vm_fault_page(object
,
2986 (va
- entry
->vme_start
),
2992 - entry
->vme_start
),
2998 0, map
->no_zero_fill
,
3000 } while (result
== VM_FAULT_RETRY
);
3002 if (result
!= VM_FAULT_SUCCESS
)
3003 panic("vm_fault_unwire: failure");
3005 result_object
= result_page
->object
;
3007 assert(!result_page
->fictitious
);
3008 pmap_page_protect(result_page
->phys_addr
,
3010 VM_PAGE_FREE(result_page
);
3012 vm_page_lock_queues();
3013 vm_page_unwire(result_page
);
3014 vm_page_unlock_queues();
3015 PAGE_WAKEUP_DONE(result_page
);
3018 vm_fault_cleanup(result_object
, top_page
);
3023 * Inform the physical mapping system that the range
3024 * of addresses may fault, so that page tables and
3025 * such may be unwired themselves.
3028 pmap_pageable(pmap
, entry
->vme_start
, end_addr
, TRUE
);
3033 * vm_fault_wire_fast:
3035 * Handle common case of a wire down page fault at the given address.
3036 * If successful, the page is inserted into the associated physical map.
3037 * The map entry is passed in to avoid the overhead of a map lookup.
3039 * NOTE: the given address should be truncated to the
3040 * proper page address.
3042 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3043 * a standard error specifying why the fault is fatal is returned.
3045 * The map in question must be referenced, and remains so.
3046 * Caller has a read lock on the map.
3048 * This is a stripped version of vm_fault() for wiring pages. Anything
3049 * other than the common case will return KERN_FAILURE, and the caller
3050 * is expected to call vm_fault().
3056 vm_map_entry_t entry
,
3060 vm_object_offset_t offset
;
3061 register vm_page_t m
;
3063 thread_act_t thr_act
;
3067 if((thr_act
=current_act()) && (thr_act
->task
!= TASK_NULL
))
3068 thr_act
->task
->faults
++;
3075 #define RELEASE_PAGE(m) { \
3076 PAGE_WAKEUP_DONE(m); \
3077 vm_page_lock_queues(); \
3078 vm_page_unwire(m); \
3079 vm_page_unlock_queues(); \
3083 #undef UNLOCK_THINGS
3084 #define UNLOCK_THINGS { \
3085 object->paging_in_progress--; \
3086 vm_object_unlock(object); \
3089 #undef UNLOCK_AND_DEALLOCATE
3090 #define UNLOCK_AND_DEALLOCATE { \
3092 vm_object_deallocate(object); \
3095 * Give up and have caller do things the hard way.
3099 UNLOCK_AND_DEALLOCATE; \
3100 return(KERN_FAILURE); \
3105 * If this entry is not directly to a vm_object, bail out.
3107 if (entry
->is_sub_map
)
3108 return(KERN_FAILURE
);
3111 * Find the backing store object and offset into it.
3114 object
= entry
->object
.vm_object
;
3115 offset
= (va
- entry
->vme_start
) + entry
->offset
;
3116 prot
= entry
->protection
;
3119 * Make a reference to this object to prevent its
3120 * disposal while we are messing with it.
3123 vm_object_lock(object
);
3124 assert(object
->ref_count
> 0);
3125 object
->ref_count
++;
3126 vm_object_res_reference(object
);
3127 object
->paging_in_progress
++;
3130 * INVARIANTS (through entire routine):
3132 * 1) At all times, we must either have the object
3133 * lock or a busy page in some object to prevent
3134 * some other thread from trying to bring in
3137 * 2) Once we have a busy page, we must remove it from
3138 * the pageout queues, so that the pageout daemon
3139 * will not grab it away.
3144 * Look for page in top-level object. If it's not there or
3145 * there's something going on, give up.
3147 m
= vm_page_lookup(object
, offset
);
3148 if ((m
== VM_PAGE_NULL
) || (m
->busy
) ||
3149 (m
->unusual
&& ( m
->error
|| m
->restart
|| m
->absent
||
3150 prot
& m
->page_lock
))) {
3156 * Wire the page down now. All bail outs beyond this
3157 * point must unwire the page.
3160 vm_page_lock_queues();
3162 vm_page_unlock_queues();
3165 * Mark page busy for other threads.
3172 * Give up if the page is being written and there's a copy object
3174 if ((object
->copy
!= VM_OBJECT_NULL
) && (prot
& VM_PROT_WRITE
)) {
3180 * Put this page into the physical map.
3181 * We have to unlock the object because pmap_enter
3182 * may cause other faults.
3185 m
->no_isync
= FALSE
;
3187 vm_object_unlock(object
);
3189 PMAP_ENTER(pmap
, va
, m
, prot
, TRUE
);
3191 /* Sync I & D caches for new mapping */
3192 pmap_attribute(pmap
,
3199 vm_object_unlock(object
);
3201 PMAP_ENTER(pmap
, va
, m
, prot
, TRUE
);
3205 * Must relock object so that paging_in_progress can be cleared.
3207 vm_object_lock(object
);
3210 * Unlock everything, and return
3213 PAGE_WAKEUP_DONE(m
);
3214 UNLOCK_AND_DEALLOCATE
;
3216 return(KERN_SUCCESS
);
3221 * Routine: vm_fault_copy_cleanup
3223 * Release a page used by vm_fault_copy.
3227 vm_fault_copy_cleanup(
3231 vm_object_t object
= page
->object
;
3233 vm_object_lock(object
);
3234 PAGE_WAKEUP_DONE(page
);
3235 vm_page_lock_queues();
3236 if (!page
->active
&& !page
->inactive
)
3237 vm_page_activate(page
);
3238 vm_page_unlock_queues();
3239 vm_fault_cleanup(object
, top_page
);
3243 vm_fault_copy_dst_cleanup(
3248 if (page
!= VM_PAGE_NULL
) {
3249 object
= page
->object
;
3250 vm_object_lock(object
);
3251 vm_page_lock_queues();
3252 vm_page_unwire(page
);
3253 vm_page_unlock_queues();
3254 vm_object_paging_end(object
);
3255 vm_object_unlock(object
);
3260 * Routine: vm_fault_copy
3263 * Copy pages from one virtual memory object to another --
3264 * neither the source nor destination pages need be resident.
3266 * Before actually copying a page, the version associated with
3267 * the destination address map wil be verified.
3269 * In/out conditions:
3270 * The caller must hold a reference, but not a lock, to
3271 * each of the source and destination objects and to the
3275 * Returns KERN_SUCCESS if no errors were encountered in
3276 * reading or writing the data. Returns KERN_INTERRUPTED if
3277 * the operation was interrupted (only possible if the
3278 * "interruptible" argument is asserted). Other return values
3279 * indicate a permanent error in copying the data.
3281 * The actual amount of data copied will be returned in the
3282 * "copy_size" argument. In the event that the destination map
3283 * verification failed, this amount may be less than the amount
3288 vm_object_t src_object
,
3289 vm_object_offset_t src_offset
,
3290 vm_size_t
*src_size
, /* INOUT */
3291 vm_object_t dst_object
,
3292 vm_object_offset_t dst_offset
,
3294 vm_map_version_t
*dst_version
,
3297 vm_page_t result_page
;
3300 vm_page_t src_top_page
;
3304 vm_page_t dst_top_page
;
3307 vm_size_t amount_left
;
3308 vm_object_t old_copy_object
;
3309 kern_return_t error
= 0;
3311 vm_size_t part_size
;
3314 * In order not to confuse the clustered pageins, align
3315 * the different offsets on a page boundary.
3317 vm_object_offset_t src_lo_offset
= trunc_page_64(src_offset
);
3318 vm_object_offset_t dst_lo_offset
= trunc_page_64(dst_offset
);
3319 vm_object_offset_t src_hi_offset
= round_page_64(src_offset
+ *src_size
);
3320 vm_object_offset_t dst_hi_offset
= round_page_64(dst_offset
+ *src_size
);
3324 *src_size -= amount_left; \
3328 amount_left
= *src_size
;
3329 do { /* while (amount_left > 0) */
3331 * There may be a deadlock if both source and destination
3332 * pages are the same. To avoid this deadlock, the copy must
3333 * start by getting the destination page in order to apply
3334 * COW semantics if any.
3337 RetryDestinationFault
: ;
3339 dst_prot
= VM_PROT_WRITE
|VM_PROT_READ
;
3341 vm_object_lock(dst_object
);
3342 vm_object_paging_begin(dst_object
);
3344 XPR(XPR_VM_FAULT
,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3345 switch (vm_fault_page(dst_object
,
3346 trunc_page_64(dst_offset
),
3347 VM_PROT_WRITE
|VM_PROT_READ
,
3352 VM_BEHAVIOR_SEQUENTIAL
,
3358 dst_map
->no_zero_fill
,
3360 case VM_FAULT_SUCCESS
:
3362 case VM_FAULT_RETRY
:
3363 goto RetryDestinationFault
;
3364 case VM_FAULT_MEMORY_SHORTAGE
:
3365 if (vm_page_wait(interruptible
))
3366 goto RetryDestinationFault
;
3368 case VM_FAULT_INTERRUPTED
:
3369 RETURN(MACH_SEND_INTERRUPTED
);
3370 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3371 vm_page_more_fictitious();
3372 goto RetryDestinationFault
;
3373 case VM_FAULT_MEMORY_ERROR
:
3377 return(KERN_MEMORY_ERROR
);
3379 assert ((dst_prot
& VM_PROT_WRITE
) != VM_PROT_NONE
);
3381 old_copy_object
= dst_page
->object
->copy
;
3384 * There exists the possiblity that the source and
3385 * destination page are the same. But we can't
3386 * easily determine that now. If they are the
3387 * same, the call to vm_fault_page() for the
3388 * destination page will deadlock. To prevent this we
3389 * wire the page so we can drop busy without having
3390 * the page daemon steal the page. We clean up the
3391 * top page but keep the paging reference on the object
3392 * holding the dest page so it doesn't go away.
3395 vm_page_lock_queues();
3396 vm_page_wire(dst_page
);
3397 vm_page_unlock_queues();
3398 PAGE_WAKEUP_DONE(dst_page
);
3399 vm_object_unlock(dst_page
->object
);
3401 if (dst_top_page
!= VM_PAGE_NULL
) {
3402 vm_object_lock(dst_object
);
3403 VM_PAGE_FREE(dst_top_page
);
3404 vm_object_paging_end(dst_object
);
3405 vm_object_unlock(dst_object
);
3410 if (src_object
== VM_OBJECT_NULL
) {
3412 * No source object. We will just
3413 * zero-fill the page in dst_object.
3415 src_page
= VM_PAGE_NULL
;
3416 result_page
= VM_PAGE_NULL
;
3418 vm_object_lock(src_object
);
3419 src_page
= vm_page_lookup(src_object
,
3420 trunc_page_64(src_offset
));
3421 if (src_page
== dst_page
) {
3422 src_prot
= dst_prot
;
3423 result_page
= VM_PAGE_NULL
;
3425 src_prot
= VM_PROT_READ
;
3426 vm_object_paging_begin(src_object
);
3429 "vm_fault_copy(2) -> vm_fault_page\n",
3431 switch (vm_fault_page(src_object
,
3432 trunc_page_64(src_offset
),
3438 VM_BEHAVIOR_SEQUENTIAL
,
3447 case VM_FAULT_SUCCESS
:
3449 case VM_FAULT_RETRY
:
3450 goto RetrySourceFault
;
3451 case VM_FAULT_MEMORY_SHORTAGE
:
3452 if (vm_page_wait(interruptible
))
3453 goto RetrySourceFault
;
3455 case VM_FAULT_INTERRUPTED
:
3456 vm_fault_copy_dst_cleanup(dst_page
);
3457 RETURN(MACH_SEND_INTERRUPTED
);
3458 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3459 vm_page_more_fictitious();
3460 goto RetrySourceFault
;
3461 case VM_FAULT_MEMORY_ERROR
:
3462 vm_fault_copy_dst_cleanup(dst_page
);
3466 return(KERN_MEMORY_ERROR
);
3470 assert((src_top_page
== VM_PAGE_NULL
) ==
3471 (result_page
->object
== src_object
));
3473 assert ((src_prot
& VM_PROT_READ
) != VM_PROT_NONE
);
3474 vm_object_unlock(result_page
->object
);
3477 if (!vm_map_verify(dst_map
, dst_version
)) {
3478 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3479 vm_fault_copy_cleanup(result_page
, src_top_page
);
3480 vm_fault_copy_dst_cleanup(dst_page
);
3484 vm_object_lock(dst_page
->object
);
3486 if (dst_page
->object
->copy
!= old_copy_object
) {
3487 vm_object_unlock(dst_page
->object
);
3488 vm_map_verify_done(dst_map
, dst_version
);
3489 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3490 vm_fault_copy_cleanup(result_page
, src_top_page
);
3491 vm_fault_copy_dst_cleanup(dst_page
);
3494 vm_object_unlock(dst_page
->object
);
3497 * Copy the page, and note that it is dirty
3501 if (!page_aligned(src_offset
) ||
3502 !page_aligned(dst_offset
) ||
3503 !page_aligned(amount_left
)) {
3505 vm_object_offset_t src_po
,
3508 src_po
= src_offset
- trunc_page_64(src_offset
);
3509 dst_po
= dst_offset
- trunc_page_64(dst_offset
);
3511 if (dst_po
> src_po
) {
3512 part_size
= PAGE_SIZE
- dst_po
;
3514 part_size
= PAGE_SIZE
- src_po
;
3516 if (part_size
> (amount_left
)){
3517 part_size
= amount_left
;
3520 if (result_page
== VM_PAGE_NULL
) {
3521 vm_page_part_zero_fill(dst_page
,
3524 vm_page_part_copy(result_page
, src_po
,
3525 dst_page
, dst_po
, part_size
);
3526 if(!dst_page
->dirty
){
3527 vm_object_lock(dst_object
);
3528 dst_page
->dirty
= TRUE
;
3529 vm_object_unlock(dst_page
->object
);
3534 part_size
= PAGE_SIZE
;
3536 if (result_page
== VM_PAGE_NULL
)
3537 vm_page_zero_fill(dst_page
);
3539 vm_page_copy(result_page
, dst_page
);
3540 if(!dst_page
->dirty
){
3541 vm_object_lock(dst_object
);
3542 dst_page
->dirty
= TRUE
;
3543 vm_object_unlock(dst_page
->object
);
3550 * Unlock everything, and return
3553 vm_map_verify_done(dst_map
, dst_version
);
3555 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3556 vm_fault_copy_cleanup(result_page
, src_top_page
);
3557 vm_fault_copy_dst_cleanup(dst_page
);
3559 amount_left
-= part_size
;
3560 src_offset
+= part_size
;
3561 dst_offset
+= part_size
;
3562 } while (amount_left
> 0);
3564 RETURN(KERN_SUCCESS
);
3573 * Routine: vm_fault_page_overwrite
3576 * A form of vm_fault_page that assumes that the
3577 * resulting page will be overwritten in its entirety,
3578 * making it unnecessary to obtain the correct *contents*
3582 * XXX Untested. Also unused. Eventually, this technology
3583 * could be used in vm_fault_copy() to advantage.
3586 vm_fault_page_overwrite(
3588 vm_object_t dst_object
,
3589 vm_object_offset_t dst_offset
,
3590 vm_page_t
*result_page
) /* OUT */
3594 kern_return_t wait_result
;
3596 #define interruptible THREAD_UNINT /* XXX */
3600 * Look for a page at this offset
3603 while ((dst_page
= vm_page_lookup(dst_object
, dst_offset
))
3606 * No page, no problem... just allocate one.
3609 dst_page
= vm_page_alloc(dst_object
, dst_offset
);
3610 if (dst_page
== VM_PAGE_NULL
) {
3611 vm_object_unlock(dst_object
);
3613 vm_object_lock(dst_object
);
3618 * Pretend that the memory manager
3619 * write-protected the page.
3621 * Note that we will be asking for write
3622 * permission without asking for the data
3626 dst_page
->overwriting
= TRUE
;
3627 dst_page
->page_lock
= VM_PROT_WRITE
;
3628 dst_page
->absent
= TRUE
;
3629 dst_page
->unusual
= TRUE
;
3630 dst_object
->absent_count
++;
3635 * When we bail out, we might have to throw
3636 * away the page created here.
3639 #define DISCARD_PAGE \
3641 vm_object_lock(dst_object); \
3642 dst_page = vm_page_lookup(dst_object, dst_offset); \
3643 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3644 VM_PAGE_FREE(dst_page); \
3645 vm_object_unlock(dst_object); \
3650 * If the page is write-protected...
3653 if (dst_page
->page_lock
& VM_PROT_WRITE
) {
3655 * ... and an unlock request hasn't been sent
3658 if ( ! (dst_page
->unlock_request
& VM_PROT_WRITE
)) {
3663 * ... then send one now.
3666 if (!dst_object
->pager_ready
) {
3667 vm_object_assert_wait(dst_object
,
3668 VM_OBJECT_EVENT_PAGER_READY
,
3670 vm_object_unlock(dst_object
);
3671 wait_result
= thread_block((void (*)(void))0);
3672 if (wait_result
!= THREAD_AWAKENED
) {
3674 return(VM_FAULT_INTERRUPTED
);
3679 u
= dst_page
->unlock_request
|= VM_PROT_WRITE
;
3680 vm_object_unlock(dst_object
);
3682 if ((rc
= memory_object_data_unlock(
3684 dst_offset
+ dst_object
->paging_offset
,
3686 u
)) != KERN_SUCCESS
) {
3688 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3690 return((rc
== MACH_SEND_INTERRUPTED
) ?
3691 VM_FAULT_INTERRUPTED
:
3692 VM_FAULT_MEMORY_ERROR
);
3694 vm_object_lock(dst_object
);
3698 /* ... fall through to wait below */
3701 * If the page isn't being used for other
3702 * purposes, then we're done.
3704 if ( ! (dst_page
->busy
|| dst_page
->absent
||
3705 dst_page
->error
|| dst_page
->restart
) )
3709 PAGE_ASSERT_WAIT(dst_page
, interruptible
);
3710 vm_object_unlock(dst_object
);
3711 wait_result
= thread_block((void (*)(void))0);
3712 if (wait_result
!= THREAD_AWAKENED
) {
3714 return(VM_FAULT_INTERRUPTED
);
3718 *result_page
= dst_page
;
3719 return(VM_FAULT_SUCCESS
);
3721 #undef interruptible
3727 #if VM_FAULT_CLASSIFY
3729 * Temporary statistics gathering support.
3733 * Statistics arrays:
3735 #define VM_FAULT_TYPES_MAX 5
3736 #define VM_FAULT_LEVEL_MAX 8
3738 int vm_fault_stats
[VM_FAULT_TYPES_MAX
][VM_FAULT_LEVEL_MAX
];
3740 #define VM_FAULT_TYPE_ZERO_FILL 0
3741 #define VM_FAULT_TYPE_MAP_IN 1
3742 #define VM_FAULT_TYPE_PAGER 2
3743 #define VM_FAULT_TYPE_COPY 3
3744 #define VM_FAULT_TYPE_OTHER 4
3748 vm_fault_classify(vm_object_t object
,
3749 vm_object_offset_t offset
,
3750 vm_prot_t fault_type
)
3752 int type
, level
= 0;
3756 m
= vm_page_lookup(object
, offset
);
3757 if (m
!= VM_PAGE_NULL
) {
3758 if (m
->busy
|| m
->error
|| m
->restart
|| m
->absent
||
3759 fault_type
& m
->page_lock
) {
3760 type
= VM_FAULT_TYPE_OTHER
;
3763 if (((fault_type
& VM_PROT_WRITE
) == 0) ||
3764 ((level
== 0) && object
->copy
== VM_OBJECT_NULL
)) {
3765 type
= VM_FAULT_TYPE_MAP_IN
;
3768 type
= VM_FAULT_TYPE_COPY
;
3772 if (object
->pager_created
) {
3773 type
= VM_FAULT_TYPE_PAGER
;
3776 if (object
->shadow
== VM_OBJECT_NULL
) {
3777 type
= VM_FAULT_TYPE_ZERO_FILL
;
3781 offset
+= object
->shadow_offset
;
3782 object
= object
->shadow
;
3788 if (level
> VM_FAULT_LEVEL_MAX
)
3789 level
= VM_FAULT_LEVEL_MAX
;
3791 vm_fault_stats
[type
][level
] += 1;
3796 /* cleanup routine to call from debugger */
3799 vm_fault_classify_init(void)
3803 for (type
= 0; type
< VM_FAULT_TYPES_MAX
; type
++) {
3804 for (level
= 0; level
< VM_FAULT_LEVEL_MAX
; level
++) {
3805 vm_fault_stats
[type
][level
] = 0;
3811 #endif /* VM_FAULT_CLASSIFY */