2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
29 * Mach Operating System
30 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
31 * All Rights Reserved.
33 * Permission to use, copy, modify and distribute this software and its
34 * documentation is hereby granted, provided that both the copyright
35 * notice and this permission notice appear in all copies of the
36 * software, derivative works or modified versions, and any portions
37 * thereof, and that both notices appear in supporting documentation.
39 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
40 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
41 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
43 * Carnegie Mellon requests users of this software to return to
45 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
46 * School of Computer Science
47 * Carnegie Mellon University
48 * Pittsburgh PA 15213-3890
50 * any improvements or extensions that they make and grant Carnegie Mellon
51 * the rights to redistribute these changes.
57 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * Page fault handling module.
62 /* remove after component interface available */
63 extern int vnode_pager_workaround
;
64 extern int device_pager_workaround
;
67 #include <mach_cluster_stats.h>
68 #include <mach_pagemap.h>
71 #include <vm/vm_fault.h>
72 #include <mach/kern_return.h>
73 #include <mach/message.h> /* for error codes */
74 #include <kern/host_statistics.h>
75 #include <kern/counters.h>
76 #include <kern/task.h>
77 #include <kern/thread.h>
78 #include <kern/sched_prim.h>
79 #include <kern/host.h>
81 #include <ppc/proc_reg.h>
82 #include <vm/task_working_set.h>
83 #include <vm/vm_map.h>
84 #include <vm/vm_object.h>
85 #include <vm/vm_page.h>
86 #include <vm/vm_kern.h>
88 #include <vm/vm_pageout.h>
89 #include <mach/vm_param.h>
90 #include <mach/vm_behavior.h>
91 #include <mach/memory_object.h>
92 /* For memory_object_data_{request,unlock} */
93 #include <kern/mach_param.h>
94 #include <kern/macro_help.h>
95 #include <kern/zalloc.h>
96 #include <kern/misc_protos.h>
98 #include <sys/kdebug.h>
100 #define VM_FAULT_CLASSIFY 0
101 #define VM_FAULT_STATIC_CONFIG 1
103 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
105 int vm_object_absent_max
= 50;
107 int vm_fault_debug
= 0;
109 #if !VM_FAULT_STATIC_CONFIG
110 boolean_t vm_fault_dirty_handling
= FALSE
;
111 boolean_t vm_fault_interruptible
= FALSE
;
112 boolean_t software_reference_bits
= TRUE
;
116 extern struct db_watchpoint
*db_watchpoint_list
;
117 #endif /* MACH_KDB */
119 /* Forward declarations of internal routines. */
120 extern kern_return_t
vm_fault_wire_fast(
123 vm_map_entry_t entry
,
125 vm_offset_t pmap_addr
);
127 extern void vm_fault_continue(void);
129 extern void vm_fault_copy_cleanup(
133 extern void vm_fault_copy_dst_cleanup(
136 #if VM_FAULT_CLASSIFY
137 extern void vm_fault_classify(vm_object_t object
,
138 vm_object_offset_t offset
,
139 vm_prot_t fault_type
);
141 extern void vm_fault_classify_init(void);
145 * Routine: vm_fault_init
147 * Initialize our private data structures.
155 * Routine: vm_fault_cleanup
157 * Clean up the result of vm_fault_page.
159 * The paging reference for "object" is released.
160 * "object" is unlocked.
161 * If "top_page" is not null, "top_page" is
162 * freed and the paging reference for the object
163 * containing it is released.
166 * "object" must be locked.
170 register vm_object_t object
,
171 register vm_page_t top_page
)
173 vm_object_paging_end(object
);
174 vm_object_unlock(object
);
176 if (top_page
!= VM_PAGE_NULL
) {
177 object
= top_page
->object
;
178 vm_object_lock(object
);
179 VM_PAGE_FREE(top_page
);
180 vm_object_paging_end(object
);
181 vm_object_unlock(object
);
185 #if MACH_CLUSTER_STATS
186 #define MAXCLUSTERPAGES 16
188 unsigned long pages_in_cluster
;
189 unsigned long pages_at_higher_offsets
;
190 unsigned long pages_at_lower_offsets
;
191 } cluster_stats_in
[MAXCLUSTERPAGES
];
192 #define CLUSTER_STAT(clause) clause
193 #define CLUSTER_STAT_HIGHER(x) \
194 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
195 #define CLUSTER_STAT_LOWER(x) \
196 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
197 #define CLUSTER_STAT_CLUSTER(x) \
198 ((cluster_stats_in[(x)].pages_in_cluster)++)
199 #else /* MACH_CLUSTER_STATS */
200 #define CLUSTER_STAT(clause)
201 #endif /* MACH_CLUSTER_STATS */
203 /* XXX - temporary */
204 boolean_t vm_allow_clustered_pagein
= FALSE
;
205 int vm_pagein_cluster_used
= 0;
207 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
210 boolean_t vm_page_deactivate_behind
= TRUE
;
212 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
214 int vm_default_ahead
= 0;
215 int vm_default_behind
= MAX_UPL_TRANSFER
;
218 * vm_page_deactivate_behind
220 * Determine if sequential access is in progress
221 * in accordance with the behavior specified. If
222 * so, compute a potential page to deactive and
225 * The object must be locked.
229 vm_fault_deactivate_behind(
232 vm_behavior_t behavior
)
237 dbgTrace(0xBEEF0018, (unsigned int) object
, (unsigned int) vm_fault_deactivate_behind
); /* (TEST/DEBUG) */
241 case VM_BEHAVIOR_RANDOM
:
242 object
->sequential
= PAGE_SIZE_64
;
245 case VM_BEHAVIOR_SEQUENTIAL
:
247 object
->last_alloc
== offset
- PAGE_SIZE_64
) {
248 object
->sequential
+= PAGE_SIZE_64
;
249 m
= vm_page_lookup(object
, offset
- PAGE_SIZE_64
);
251 object
->sequential
= PAGE_SIZE_64
; /* reset */
255 case VM_BEHAVIOR_RSEQNTL
:
256 if (object
->last_alloc
&&
257 object
->last_alloc
== offset
+ PAGE_SIZE_64
) {
258 object
->sequential
+= PAGE_SIZE_64
;
259 m
= vm_page_lookup(object
, offset
+ PAGE_SIZE_64
);
261 object
->sequential
= PAGE_SIZE_64
; /* reset */
265 case VM_BEHAVIOR_DEFAULT
:
268 object
->last_alloc
== offset
- PAGE_SIZE_64
) {
269 vm_object_offset_t behind
= vm_default_behind
* PAGE_SIZE_64
;
271 object
->sequential
+= PAGE_SIZE_64
;
272 m
= (offset
>= behind
&&
273 object
->sequential
>= behind
) ?
274 vm_page_lookup(object
, offset
- behind
) :
276 } else if (object
->last_alloc
&&
277 object
->last_alloc
== offset
+ PAGE_SIZE_64
) {
278 vm_object_offset_t behind
= vm_default_behind
* PAGE_SIZE_64
;
280 object
->sequential
+= PAGE_SIZE_64
;
281 m
= (offset
< -behind
&&
282 object
->sequential
>= behind
) ?
283 vm_page_lookup(object
, offset
+ behind
) :
286 object
->sequential
= PAGE_SIZE_64
;
292 object
->last_alloc
= offset
;
296 vm_page_lock_queues();
297 vm_page_deactivate(m
);
298 vm_page_unlock_queues();
300 dbgTrace(0xBEEF0019, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
310 * Routine: vm_fault_page
312 * Find the resident page for the virtual memory
313 * specified by the given virtual memory object
315 * Additional arguments:
316 * The required permissions for the page is given
317 * in "fault_type". Desired permissions are included
318 * in "protection". The minimum and maximum valid offsets
319 * within the object for the relevant map entry are
320 * passed in "lo_offset" and "hi_offset" respectively and
321 * the expected page reference pattern is passed in "behavior".
322 * These three parameters are used to determine pagein cluster
325 * If the desired page is known to be resident (for
326 * example, because it was previously wired down), asserting
327 * the "unwiring" parameter will speed the search.
329 * If the operation can be interrupted (by thread_abort
330 * or thread_terminate), then the "interruptible"
331 * parameter should be asserted.
334 * The page containing the proper data is returned
338 * The source object must be locked and referenced,
339 * and must donate one paging reference. The reference
340 * is not affected. The paging reference and lock are
343 * If the call succeeds, the object in which "result_page"
344 * resides is left locked and holding a paging reference.
345 * If this is not the original object, a busy page in the
346 * original object is returned in "top_page", to prevent other
347 * callers from pursuing this same data, along with a paging
348 * reference for the original object. The "top_page" should
349 * be destroyed when this guarantee is no longer required.
350 * The "result_page" is also left busy. It is not removed
351 * from the pageout queues.
357 vm_object_t first_object
, /* Object to begin search */
358 vm_object_offset_t first_offset
, /* Offset into object */
359 vm_prot_t fault_type
, /* What access is requested */
360 boolean_t must_be_resident
,/* Must page be resident? */
361 int interruptible
, /* how may fault be interrupted? */
362 vm_object_offset_t lo_offset
, /* Map entry start */
363 vm_object_offset_t hi_offset
, /* Map entry end */
364 vm_behavior_t behavior
, /* Page reference behavior */
365 /* Modifies in place: */
366 vm_prot_t
*protection
, /* Protection for mapping */
368 vm_page_t
*result_page
, /* Page found, if successful */
369 vm_page_t
*top_page
, /* Page in top object, if
370 * not result_page. */
371 int *type_of_fault
, /* if non-null, fill in with type of fault
372 * COW, zero-fill, etc... returned in trace point */
373 /* More arguments: */
374 kern_return_t
*error_code
, /* code if page is in error */
375 boolean_t no_zero_fill
, /* don't zero fill absent pages */
376 boolean_t data_supply
, /* treat as data_supply if
377 * it is a write fault and a full
378 * page is provided */
387 vm_object_offset_t offset
;
389 vm_object_t next_object
;
390 vm_object_t copy_object
;
391 boolean_t look_for_page
;
392 vm_prot_t access_required
= fault_type
;
393 vm_prot_t wants_copy_flag
;
394 vm_size_t cluster_size
, length
;
395 vm_object_offset_t cluster_offset
;
396 vm_object_offset_t cluster_start
, cluster_end
, paging_offset
;
397 vm_object_offset_t align_offset
;
398 CLUSTER_STAT(int pages_at_higher_offsets
;)
399 CLUSTER_STAT(int pages_at_lower_offsets
;)
400 kern_return_t wait_result
;
401 boolean_t interruptible_state
;
402 boolean_t bumped_pagein
= FALSE
;
407 * MACH page map - an optional optimization where a bit map is maintained
408 * by the VM subsystem for internal objects to indicate which pages of
409 * the object currently reside on backing store. This existence map
410 * duplicates information maintained by the vnode pager. It is
411 * created at the time of the first pageout against the object, i.e.
412 * at the same time pager for the object is created. The optimization
413 * is designed to eliminate pager interaction overhead, if it is
414 * 'known' that the page does not exist on backing store.
416 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
417 * either marked as paged out in the existence map for the object or no
418 * existence map exists for the object. LOOK_FOR() is one of the
419 * criteria in the decision to invoke the pager. It is also used as one
420 * of the criteria to terminate the scan for adjacent pages in a clustered
421 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
422 * permanent objects. Note also that if the pager for an internal object
423 * has not been created, the pager is not invoked regardless of the value
424 * of LOOK_FOR() and that clustered pagein scans are only done on an object
425 * for which a pager has been created.
427 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
428 * is marked as paged out in the existence map for the object. PAGED_OUT()
429 * PAGED_OUT() is used to determine if a page has already been pushed
430 * into a copy object in order to avoid a redundant page out operation.
432 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
433 != VM_EXTERNAL_STATE_ABSENT)
434 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
435 == VM_EXTERNAL_STATE_EXISTS)
436 #else /* MACH_PAGEMAP */
438 * If the MACH page map optimization is not enabled,
439 * LOOK_FOR() always evaluates to TRUE. The pager will always be
440 * invoked to resolve missing pages in an object, assuming the pager
441 * has been created for the object. In a clustered page operation, the
442 * absence of a page on backing backing store cannot be used to terminate
443 * a scan for adjacent pages since that information is available only in
444 * the pager. Hence pages that may not be paged out are potentially
445 * included in a clustered request. The vnode pager is coded to deal
446 * with any combination of absent/present pages in a clustered
447 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
448 * will always be invoked to push a dirty page into a copy object assuming
449 * a pager has been created. If the page has already been pushed, the
450 * pager will ingore the new request.
452 #define LOOK_FOR(o, f) TRUE
453 #define PAGED_OUT(o, f) FALSE
454 #endif /* MACH_PAGEMAP */
459 #define PREPARE_RELEASE_PAGE(m) \
461 vm_page_lock_queues(); \
464 #define DO_RELEASE_PAGE(m) \
466 PAGE_WAKEUP_DONE(m); \
467 if (!m->active && !m->inactive) \
468 vm_page_activate(m); \
469 vm_page_unlock_queues(); \
472 #define RELEASE_PAGE(m) \
474 PREPARE_RELEASE_PAGE(m); \
475 DO_RELEASE_PAGE(m); \
479 dbgTrace(0xBEEF0002, (unsigned int) first_object
, (unsigned int) first_offset
); /* (TEST/DEBUG) */
484 #if !VM_FAULT_STATIC_CONFIG
485 if (vm_fault_dirty_handling
488 * If there are watchpoints set, then
489 * we don't want to give away write permission
490 * on a read fault. Make the task write fault,
491 * so that the watchpoint code notices the access.
493 || db_watchpoint_list
494 #endif /* MACH_KDB */
497 * If we aren't asking for write permission,
498 * then don't give it away. We're using write
499 * faults to set the dirty bit.
501 if (!(fault_type
& VM_PROT_WRITE
))
502 *protection
&= ~VM_PROT_WRITE
;
505 if (!vm_fault_interruptible
)
506 interruptible
= THREAD_UNINT
;
507 #else /* STATIC_CONFIG */
510 * If there are watchpoints set, then
511 * we don't want to give away write permission
512 * on a read fault. Make the task write fault,
513 * so that the watchpoint code notices the access.
515 if (db_watchpoint_list
) {
517 * If we aren't asking for write permission,
518 * then don't give it away. We're using write
519 * faults to set the dirty bit.
521 if (!(fault_type
& VM_PROT_WRITE
))
522 *protection
&= ~VM_PROT_WRITE
;
525 #endif /* MACH_KDB */
526 #endif /* STATIC_CONFIG */
528 interruptible_state
= thread_interrupt_level(interruptible
);
531 * INVARIANTS (through entire routine):
533 * 1) At all times, we must either have the object
534 * lock or a busy page in some object to prevent
535 * some other thread from trying to bring in
538 * Note that we cannot hold any locks during the
539 * pager access or when waiting for memory, so
540 * we use a busy page then.
542 * Note also that we aren't as concerned about more than
543 * one thread attempting to memory_object_data_unlock
544 * the same page at once, so we don't hold the page
545 * as busy then, but do record the highest unlock
546 * value so far. [Unlock requests may also be delivered
549 * 2) To prevent another thread from racing us down the
550 * shadow chain and entering a new page in the top
551 * object before we do, we must keep a busy page in
552 * the top object while following the shadow chain.
554 * 3) We must increment paging_in_progress on any object
555 * for which we have a busy page
557 * 4) We leave busy pages on the pageout queues.
558 * If the pageout daemon comes across a busy page,
559 * it will remove the page from the pageout queues.
563 * Search for the page at object/offset.
566 object
= first_object
;
567 offset
= first_offset
;
568 first_m
= VM_PAGE_NULL
;
569 access_required
= fault_type
;
572 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
573 (integer_t
)object
, offset
, fault_type
, *protection
, 0);
576 * See whether this page is resident
581 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
583 if (!object
->alive
) {
584 vm_fault_cleanup(object
, first_m
);
585 thread_interrupt_level(interruptible_state
);
586 return(VM_FAULT_MEMORY_ERROR
);
588 m
= vm_page_lookup(object
, offset
);
590 dbgTrace(0xBEEF0004, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
592 if (m
!= VM_PAGE_NULL
) {
594 * If the page was pre-paged as part of a
595 * cluster, record the fact.
598 vm_pagein_cluster_used
++;
599 m
->clustered
= FALSE
;
603 * If the page is being brought in,
604 * wait for it and then retry.
606 * A possible optimization: if the page
607 * is known to be resident, we can ignore
608 * pages that are absent (regardless of
609 * whether they're busy).
614 dbgTrace(0xBEEF0005, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
616 wait_result
= PAGE_SLEEP(object
, m
, interruptible
);
618 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
619 (integer_t
)object
, offset
,
621 counter(c_vm_fault_page_block_busy_kernel
++);
623 if (wait_result
!= THREAD_AWAKENED
) {
624 vm_fault_cleanup(object
, first_m
);
625 thread_interrupt_level(interruptible_state
);
626 if (wait_result
== THREAD_RESTART
)
628 return(VM_FAULT_RETRY
);
632 return(VM_FAULT_INTERRUPTED
);
639 * If the page is in error, give up now.
644 dbgTrace(0xBEEF0006, (unsigned int) m
, (unsigned int) error_code
); /* (TEST/DEBUG) */
647 *error_code
= m
->page_error
;
649 vm_fault_cleanup(object
, first_m
);
650 thread_interrupt_level(interruptible_state
);
651 return(VM_FAULT_MEMORY_ERROR
);
655 * If the pager wants us to restart
656 * at the top of the chain,
657 * typically because it has moved the
658 * page to another pager, then do so.
663 dbgTrace(0xBEEF0007, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
666 vm_fault_cleanup(object
, first_m
);
667 thread_interrupt_level(interruptible_state
);
668 return(VM_FAULT_RETRY
);
672 * If the page isn't busy, but is absent,
673 * then it was deemed "unavailable".
678 * Remove the non-existent page (unless it's
679 * in the top object) and move on down to the
680 * next object (if there is one).
683 dbgTrace(0xBEEF0008, (unsigned int) m
, (unsigned int) object
->shadow
); /* (TEST/DEBUG) */
686 next_object
= object
->shadow
;
687 if (next_object
== VM_OBJECT_NULL
) {
690 assert(!must_be_resident
);
692 if (object
->shadow_severed
) {
695 thread_interrupt_level(interruptible_state
);
696 return VM_FAULT_MEMORY_ERROR
;
700 * Absent page at bottom of shadow
701 * chain; zero fill the page we left
702 * busy in the first object, and flush
703 * the absent page. But first we
704 * need to allocate a real page.
706 if (VM_PAGE_THROTTLED() ||
707 (real_m
= vm_page_grab())
711 thread_interrupt_level(
712 interruptible_state
);
714 VM_FAULT_MEMORY_SHORTAGE
);
718 * are we protecting the system from
719 * backing store exhaustion. If so
720 * sleep unless we are privileged.
723 if(vm_backing_store_low
) {
724 if(!(current_task()->priv_flags
725 & VM_BACKING_STORE_PRIV
)) {
726 assert_wait((event_t
)
727 &vm_backing_store_low
,
729 vm_fault_cleanup(object
,
731 thread_block((void(*)(void)) 0);
732 thread_interrupt_level(
733 interruptible_state
);
734 return(VM_FAULT_RETRY
);
740 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
741 (integer_t
)object
, offset
,
743 (integer_t
)first_object
, 0);
744 if (object
!= first_object
) {
746 vm_object_paging_end(object
);
747 vm_object_unlock(object
);
748 object
= first_object
;
749 offset
= first_offset
;
751 first_m
= VM_PAGE_NULL
;
752 vm_object_lock(object
);
756 assert(real_m
->busy
);
757 vm_page_insert(real_m
, object
, offset
);
761 * Drop the lock while zero filling
762 * page. Then break because this
763 * is the page we wanted. Checking
764 * the page lock is a waste of time;
765 * this page was either absent or
766 * newly allocated -- in both cases
767 * it can't be page locked by a pager.
772 vm_object_unlock(object
);
773 vm_page_zero_fill(m
);
774 vm_object_lock(object
);
777 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
778 VM_STAT(zero_fill_count
++);
780 if (bumped_pagein
== TRUE
) {
782 current_task()->pageins
--;
785 pmap_clear_modify(m
->phys_page
);
787 vm_page_lock_queues();
788 VM_PAGE_QUEUES_REMOVE(m
);
789 m
->page_ticket
= vm_page_ticket
;
790 if(m
->object
->size
> 0x80000) {
792 /* depends on the queues lock */
794 queue_enter(&vm_page_queue_zf
,
795 m
, vm_page_t
, pageq
);
798 &vm_page_queue_inactive
,
799 m
, vm_page_t
, pageq
);
801 vm_page_ticket_roll
++;
802 if(vm_page_ticket_roll
==
803 VM_PAGE_TICKETS_IN_ROLL
) {
804 vm_page_ticket_roll
= 0;
806 VM_PAGE_TICKET_ROLL_IDS
)
812 vm_page_inactive_count
++;
813 vm_page_unlock_queues();
816 if (must_be_resident
) {
817 vm_object_paging_end(object
);
818 } else if (object
!= first_object
) {
819 vm_object_paging_end(object
);
825 vm_object_absent_release(object
);
828 vm_page_lock_queues();
829 VM_PAGE_QUEUES_REMOVE(m
);
830 vm_page_unlock_queues();
833 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
834 (integer_t
)object
, offset
,
835 (integer_t
)next_object
,
836 offset
+object
->shadow_offset
,0);
837 offset
+= object
->shadow_offset
;
838 hi_offset
+= object
->shadow_offset
;
839 lo_offset
+= object
->shadow_offset
;
840 access_required
= VM_PROT_READ
;
841 vm_object_lock(next_object
);
842 vm_object_unlock(object
);
843 object
= next_object
;
844 vm_object_paging_begin(object
);
850 && ((object
!= first_object
) ||
851 (object
->copy
!= VM_OBJECT_NULL
))
852 && (fault_type
& VM_PROT_WRITE
)) {
854 * This is a copy-on-write fault that will
855 * cause us to revoke access to this page, but
856 * this page is in the process of being cleaned
857 * in a clustered pageout. We must wait until
858 * the cleaning operation completes before
859 * revoking access to the original page,
860 * otherwise we might attempt to remove a
864 dbgTrace(0xBEEF0009, (unsigned int) m
, (unsigned int) offset
); /* (TEST/DEBUG) */
867 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
868 (integer_t
)object
, offset
,
870 /* take an extra ref so that object won't die */
871 assert(object
->ref_count
> 0);
873 vm_object_res_reference(object
);
874 vm_fault_cleanup(object
, first_m
);
875 counter(c_vm_fault_page_block_backoff_kernel
++);
876 vm_object_lock(object
);
877 assert(object
->ref_count
> 0);
878 m
= vm_page_lookup(object
, offset
);
879 if (m
!= VM_PAGE_NULL
&& m
->cleaning
) {
880 PAGE_ASSERT_WAIT(m
, interruptible
);
881 vm_object_unlock(object
);
882 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
883 vm_object_deallocate(object
);
886 vm_object_unlock(object
);
887 vm_object_deallocate(object
);
888 thread_interrupt_level(interruptible_state
);
889 return VM_FAULT_RETRY
;
894 * If the desired access to this page has
895 * been locked out, request that it be unlocked.
898 if (access_required
& m
->page_lock
) {
899 if ((access_required
& m
->unlock_request
) != access_required
) {
900 vm_prot_t new_unlock_request
;
904 dbgTrace(0xBEEF000A, (unsigned int) m
, (unsigned int) object
->pager_ready
); /* (TEST/DEBUG) */
906 if (!object
->pager_ready
) {
908 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
910 (integer_t
)object
, offset
,
912 /* take an extra ref */
913 assert(object
->ref_count
> 0);
915 vm_object_res_reference(object
);
916 vm_fault_cleanup(object
,
918 counter(c_vm_fault_page_block_backoff_kernel
++);
919 vm_object_lock(object
);
920 assert(object
->ref_count
> 0);
921 if (!object
->pager_ready
) {
922 wait_result
= vm_object_assert_wait(
924 VM_OBJECT_EVENT_PAGER_READY
,
926 vm_object_unlock(object
);
927 if (wait_result
== THREAD_WAITING
)
928 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
929 vm_object_deallocate(object
);
932 vm_object_unlock(object
);
933 vm_object_deallocate(object
);
934 thread_interrupt_level(interruptible_state
);
935 return VM_FAULT_RETRY
;
939 new_unlock_request
= m
->unlock_request
=
940 (access_required
| m
->unlock_request
);
941 vm_object_unlock(object
);
943 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
944 (integer_t
)object
, offset
,
945 (integer_t
)m
, new_unlock_request
, 0);
946 if ((rc
= memory_object_data_unlock(
948 offset
+ object
->paging_offset
,
953 printf("vm_fault: memory_object_data_unlock failed\n");
954 vm_object_lock(object
);
955 vm_fault_cleanup(object
, first_m
);
956 thread_interrupt_level(interruptible_state
);
957 return((rc
== MACH_SEND_INTERRUPTED
) ?
958 VM_FAULT_INTERRUPTED
:
959 VM_FAULT_MEMORY_ERROR
);
961 vm_object_lock(object
);
966 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
967 access_required
, (integer_t
)object
,
968 offset
, (integer_t
)m
, 0);
969 /* take an extra ref so object won't die */
970 assert(object
->ref_count
> 0);
972 vm_object_res_reference(object
);
973 vm_fault_cleanup(object
, first_m
);
974 counter(c_vm_fault_page_block_backoff_kernel
++);
975 vm_object_lock(object
);
976 assert(object
->ref_count
> 0);
977 m
= vm_page_lookup(object
, offset
);
978 if (m
!= VM_PAGE_NULL
&&
979 (access_required
& m
->page_lock
) &&
980 !((access_required
& m
->unlock_request
) != access_required
)) {
981 PAGE_ASSERT_WAIT(m
, interruptible
);
982 vm_object_unlock(object
);
983 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
984 vm_object_deallocate(object
);
987 vm_object_unlock(object
);
988 vm_object_deallocate(object
);
989 thread_interrupt_level(interruptible_state
);
990 return VM_FAULT_RETRY
;
994 * We mark the page busy and leave it on
995 * the pageout queues. If the pageout
996 * deamon comes across it, then it will
1001 dbgTrace(0xBEEF000B, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1004 #if !VM_FAULT_STATIC_CONFIG
1005 if (!software_reference_bits
) {
1006 vm_page_lock_queues();
1008 vm_stat
.reactivations
++;
1010 VM_PAGE_QUEUES_REMOVE(m
);
1011 vm_page_unlock_queues();
1015 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
1016 (integer_t
)object
, offset
, (integer_t
)m
, 0, 0);
1024 (object
->pager_created
) &&
1025 LOOK_FOR(object
, offset
) &&
1029 dbgTrace(0xBEEF000C, (unsigned int) look_for_page
, (unsigned int) object
); /* (TEST/DEBUG) */
1031 if ((look_for_page
|| (object
== first_object
))
1032 && !must_be_resident
1033 && !(object
->phys_contiguous
)) {
1035 * Allocate a new page for this object/offset
1039 m
= vm_page_grab_fictitious();
1041 dbgTrace(0xBEEF000D, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
1043 if (m
== VM_PAGE_NULL
) {
1044 vm_fault_cleanup(object
, first_m
);
1045 thread_interrupt_level(interruptible_state
);
1046 return(VM_FAULT_FICTITIOUS_SHORTAGE
);
1048 vm_page_insert(m
, object
, offset
);
1051 if ((look_for_page
&& !must_be_resident
)) {
1055 * If the memory manager is not ready, we
1056 * cannot make requests.
1058 if (!object
->pager_ready
) {
1060 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
1062 if(m
!= VM_PAGE_NULL
)
1065 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
1066 (integer_t
)object
, offset
, 0, 0, 0);
1067 /* take an extra ref so object won't die */
1068 assert(object
->ref_count
> 0);
1069 object
->ref_count
++;
1070 vm_object_res_reference(object
);
1071 vm_fault_cleanup(object
, first_m
);
1072 counter(c_vm_fault_page_block_backoff_kernel
++);
1073 vm_object_lock(object
);
1074 assert(object
->ref_count
> 0);
1075 if (!object
->pager_ready
) {
1076 wait_result
= vm_object_assert_wait(object
,
1077 VM_OBJECT_EVENT_PAGER_READY
,
1079 vm_object_unlock(object
);
1080 if (wait_result
== THREAD_WAITING
)
1081 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1082 vm_object_deallocate(object
);
1085 vm_object_unlock(object
);
1086 vm_object_deallocate(object
);
1087 thread_interrupt_level(interruptible_state
);
1088 return VM_FAULT_RETRY
;
1092 if(object
->phys_contiguous
) {
1093 if(m
!= VM_PAGE_NULL
) {
1099 if (object
->internal
) {
1101 * Requests to the default pager
1102 * must reserve a real page in advance,
1103 * because the pager's data-provided
1104 * won't block for pages. IMPORTANT:
1105 * this acts as a throttling mechanism
1106 * for data_requests to the default
1111 dbgTrace(0xBEEF000F, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1113 if (m
->fictitious
&& !vm_page_convert(m
)) {
1115 vm_fault_cleanup(object
, first_m
);
1116 thread_interrupt_level(interruptible_state
);
1117 return(VM_FAULT_MEMORY_SHORTAGE
);
1119 } else if (object
->absent_count
>
1120 vm_object_absent_max
) {
1122 * If there are too many outstanding page
1123 * requests pending on this object, we
1124 * wait for them to be resolved now.
1128 dbgTrace(0xBEEF0010, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1130 if(m
!= VM_PAGE_NULL
)
1132 /* take an extra ref so object won't die */
1133 assert(object
->ref_count
> 0);
1134 object
->ref_count
++;
1135 vm_object_res_reference(object
);
1136 vm_fault_cleanup(object
, first_m
);
1137 counter(c_vm_fault_page_block_backoff_kernel
++);
1138 vm_object_lock(object
);
1139 assert(object
->ref_count
> 0);
1140 if (object
->absent_count
> vm_object_absent_max
) {
1141 vm_object_absent_assert_wait(object
,
1143 vm_object_unlock(object
);
1144 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1145 vm_object_deallocate(object
);
1148 vm_object_unlock(object
);
1149 vm_object_deallocate(object
);
1150 thread_interrupt_level(interruptible_state
);
1151 return VM_FAULT_RETRY
;
1156 * Indicate that the page is waiting for data
1157 * from the memory manager.
1160 if(m
!= VM_PAGE_NULL
) {
1162 m
->list_req_pending
= TRUE
;
1165 object
->absent_count
++;
1170 cluster_start
= offset
;
1174 * lengthen the cluster by the pages in the working set
1177 (current_task()->dynamic_working_set
!= 0)) {
1178 cluster_end
= cluster_start
+ length
;
1179 /* tws values for start and end are just a
1180 * suggestions. Therefore, as long as
1181 * build_cluster does not use pointers or
1182 * take action based on values that
1183 * could be affected by re-entrance we
1184 * do not need to take the map lock.
1186 cluster_end
= offset
+ PAGE_SIZE_64
;
1187 tws_build_cluster((tws_hash_t
)
1188 current_task()->dynamic_working_set
,
1189 object
, &cluster_start
,
1190 &cluster_end
, 0x40000);
1191 length
= cluster_end
- cluster_start
;
1194 dbgTrace(0xBEEF0012, (unsigned int) object
, (unsigned int) 0); /* (TEST/DEBUG) */
1197 * We have a busy page, so we can
1198 * release the object lock.
1200 vm_object_unlock(object
);
1203 * Call the memory manager to retrieve the data.
1207 *type_of_fault
= (length
<< 8) | DBG_PAGEIN_FAULT
;
1209 current_task()->pageins
++;
1210 bumped_pagein
= TRUE
;
1213 * If this object uses a copy_call strategy,
1214 * and we are interested in a copy of this object
1215 * (having gotten here only by following a
1216 * shadow chain), then tell the memory manager
1217 * via a flag added to the desired_access
1218 * parameter, so that it can detect a race
1219 * between our walking down the shadow chain
1220 * and its pushing pages up into a copy of
1221 * the object that it manages.
1224 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_CALL
&&
1225 object
!= first_object
) {
1226 wants_copy_flag
= VM_PROT_WANTS_COPY
;
1228 wants_copy_flag
= VM_PROT_NONE
;
1232 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1233 (integer_t
)object
, offset
, (integer_t
)m
,
1234 access_required
| wants_copy_flag
, 0);
1236 rc
= memory_object_data_request(object
->pager
,
1237 cluster_start
+ object
->paging_offset
,
1239 access_required
| wants_copy_flag
);
1243 dbgTrace(0xBEEF0013, (unsigned int) object
, (unsigned int) rc
); /* (TEST/DEBUG) */
1245 if (rc
!= KERN_SUCCESS
) {
1246 if (rc
!= MACH_SEND_INTERRUPTED
1248 printf("%s(0x%x, 0x%x, 0x%x, 0x%x) failed, rc=%d\n",
1249 "memory_object_data_request",
1251 cluster_start
+ object
->paging_offset
,
1252 length
, access_required
, rc
);
1254 * Don't want to leave a busy page around,
1255 * but the data request may have blocked,
1256 * so check if it's still there and busy.
1258 if(!object
->phys_contiguous
) {
1259 vm_object_lock(object
);
1260 for (; length
; length
-= PAGE_SIZE
,
1261 cluster_start
+= PAGE_SIZE_64
) {
1263 if ((p
= vm_page_lookup(object
,
1265 && p
->absent
&& p
->busy
1271 vm_fault_cleanup(object
, first_m
);
1272 thread_interrupt_level(interruptible_state
);
1273 return((rc
== MACH_SEND_INTERRUPTED
) ?
1274 VM_FAULT_INTERRUPTED
:
1275 VM_FAULT_MEMORY_ERROR
);
1278 tws_hash_line_t line
;
1281 task
= current_task();
1284 (task
->dynamic_working_set
!= 0))
1285 && !(object
->private)) {
1286 vm_object_t base_object
;
1287 vm_object_offset_t base_offset
;
1288 base_object
= object
;
1289 base_offset
= offset
;
1290 while(base_object
->shadow
) {
1292 base_object
->shadow_offset
;
1294 base_object
->shadow
;
1298 task
->dynamic_working_set
,
1299 base_offset
, base_object
,
1300 &line
) == KERN_SUCCESS
) {
1301 tws_line_signal((tws_hash_t
)
1302 task
->dynamic_working_set
,
1310 * Retry with same object/offset, since new data may
1311 * be in a different page (i.e., m is meaningless at
1314 vm_object_lock(object
);
1315 if ((interruptible
!= THREAD_UNINT
) &&
1316 (current_thread()->state
& TH_ABORT
)) {
1317 vm_fault_cleanup(object
, first_m
);
1318 thread_interrupt_level(interruptible_state
);
1319 return(VM_FAULT_INTERRUPTED
);
1321 if(m
== VM_PAGE_NULL
)
1327 * The only case in which we get here is if
1328 * object has no pager (or unwiring). If the pager doesn't
1329 * have the page this is handled in the m->absent case above
1330 * (and if you change things here you should look above).
1333 dbgTrace(0xBEEF0014, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1335 if (object
== first_object
)
1338 assert(m
== VM_PAGE_NULL
);
1341 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1342 (integer_t
)object
, offset
, (integer_t
)m
,
1343 (integer_t
)object
->shadow
, 0);
1345 * Move on to the next object. Lock the next
1346 * object before unlocking the current one.
1348 next_object
= object
->shadow
;
1349 if (next_object
== VM_OBJECT_NULL
) {
1350 assert(!must_be_resident
);
1352 * If there's no object left, fill the page
1353 * in the top object with zeros. But first we
1354 * need to allocate a real page.
1357 if (object
!= first_object
) {
1358 vm_object_paging_end(object
);
1359 vm_object_unlock(object
);
1361 object
= first_object
;
1362 offset
= first_offset
;
1363 vm_object_lock(object
);
1367 assert(m
->object
== object
);
1368 first_m
= VM_PAGE_NULL
;
1370 if(m
== VM_PAGE_NULL
) {
1372 if (m
== VM_PAGE_NULL
) {
1374 object
, VM_PAGE_NULL
);
1375 thread_interrupt_level(
1376 interruptible_state
);
1377 return(VM_FAULT_MEMORY_SHORTAGE
);
1383 if (object
->shadow_severed
) {
1385 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1386 thread_interrupt_level(interruptible_state
);
1387 return VM_FAULT_MEMORY_ERROR
;
1391 * are we protecting the system from
1392 * backing store exhaustion. If so
1393 * sleep unless we are privileged.
1396 if(vm_backing_store_low
) {
1397 if(!(current_task()->priv_flags
1398 & VM_BACKING_STORE_PRIV
)) {
1399 assert_wait((event_t
)
1400 &vm_backing_store_low
,
1403 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1404 thread_block((void (*)(void)) 0);
1405 thread_interrupt_level(
1406 interruptible_state
);
1407 return(VM_FAULT_RETRY
);
1411 if (VM_PAGE_THROTTLED() ||
1412 (m
->fictitious
&& !vm_page_convert(m
))) {
1414 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1415 thread_interrupt_level(interruptible_state
);
1416 return(VM_FAULT_MEMORY_SHORTAGE
);
1418 m
->no_isync
= FALSE
;
1420 if (!no_zero_fill
) {
1421 vm_object_unlock(object
);
1422 vm_page_zero_fill(m
);
1423 vm_object_lock(object
);
1426 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
1427 VM_STAT(zero_fill_count
++);
1429 if (bumped_pagein
== TRUE
) {
1431 current_task()->pageins
--;
1434 vm_page_lock_queues();
1435 VM_PAGE_QUEUES_REMOVE(m
);
1436 if(m
->object
->size
> 0x80000) {
1437 m
->zero_fill
= TRUE
;
1438 /* depends on the queues lock */
1440 queue_enter(&vm_page_queue_zf
,
1441 m
, vm_page_t
, pageq
);
1444 &vm_page_queue_inactive
,
1445 m
, vm_page_t
, pageq
);
1447 m
->page_ticket
= vm_page_ticket
;
1448 vm_page_ticket_roll
++;
1449 if(vm_page_ticket_roll
== VM_PAGE_TICKETS_IN_ROLL
) {
1450 vm_page_ticket_roll
= 0;
1451 if(vm_page_ticket
==
1452 VM_PAGE_TICKET_ROLL_IDS
)
1458 vm_page_inactive_count
++;
1459 vm_page_unlock_queues();
1461 pmap_clear_modify(m
->phys_page
);
1466 if ((object
!= first_object
) || must_be_resident
)
1467 vm_object_paging_end(object
);
1468 offset
+= object
->shadow_offset
;
1469 hi_offset
+= object
->shadow_offset
;
1470 lo_offset
+= object
->shadow_offset
;
1471 access_required
= VM_PROT_READ
;
1472 vm_object_lock(next_object
);
1473 vm_object_unlock(object
);
1474 object
= next_object
;
1475 vm_object_paging_begin(object
);
1480 * PAGE HAS BEEN FOUND.
1483 * busy, so that we can play with it;
1484 * not absent, so that nobody else will fill it;
1485 * possibly eligible for pageout;
1487 * The top-level page (first_m) is:
1488 * VM_PAGE_NULL if the page was found in the
1490 * busy, not absent, and ineligible for pageout.
1492 * The current object (object) is locked. A paging
1493 * reference is held for the current and top-level
1498 dbgTrace(0xBEEF0015, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1500 #if EXTRA_ASSERTIONS
1501 if(m
!= VM_PAGE_NULL
) {
1502 assert(m
->busy
&& !m
->absent
);
1503 assert((first_m
== VM_PAGE_NULL
) ||
1504 (first_m
->busy
&& !first_m
->absent
&&
1505 !first_m
->active
&& !first_m
->inactive
));
1507 #endif /* EXTRA_ASSERTIONS */
1510 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1511 (integer_t
)object
, offset
, (integer_t
)m
,
1512 (integer_t
)first_object
, (integer_t
)first_m
);
1514 * If the page is being written, but isn't
1515 * already owned by the top-level object,
1516 * we have to copy it into a new page owned
1517 * by the top-level object.
1520 if ((object
!= first_object
) && (m
!= VM_PAGE_NULL
)) {
1522 * We only really need to copy if we
1527 dbgTrace(0xBEEF0016, (unsigned int) object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1529 if (fault_type
& VM_PROT_WRITE
) {
1532 assert(!must_be_resident
);
1535 * are we protecting the system from
1536 * backing store exhaustion. If so
1537 * sleep unless we are privileged.
1540 if(vm_backing_store_low
) {
1541 if(!(current_task()->priv_flags
1542 & VM_BACKING_STORE_PRIV
)) {
1543 assert_wait((event_t
)
1544 &vm_backing_store_low
,
1547 vm_fault_cleanup(object
, first_m
);
1548 thread_block((void (*)(void)) 0);
1549 thread_interrupt_level(
1550 interruptible_state
);
1551 return(VM_FAULT_RETRY
);
1556 * If we try to collapse first_object at this
1557 * point, we may deadlock when we try to get
1558 * the lock on an intermediate object (since we
1559 * have the bottom object locked). We can't
1560 * unlock the bottom object, because the page
1561 * we found may move (by collapse) if we do.
1563 * Instead, we first copy the page. Then, when
1564 * we have no more use for the bottom object,
1565 * we unlock it and try to collapse.
1567 * Note that we copy the page even if we didn't
1568 * need to... that's the breaks.
1572 * Allocate a page for the copy
1574 copy_m
= vm_page_grab();
1575 if (copy_m
== VM_PAGE_NULL
) {
1577 vm_fault_cleanup(object
, first_m
);
1578 thread_interrupt_level(interruptible_state
);
1579 return(VM_FAULT_MEMORY_SHORTAGE
);
1584 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1585 (integer_t
)object
, offset
,
1586 (integer_t
)m
, (integer_t
)copy_m
, 0);
1587 vm_page_copy(m
, copy_m
);
1590 * If another map is truly sharing this
1591 * page with us, we have to flush all
1592 * uses of the original page, since we
1593 * can't distinguish those which want the
1594 * original from those which need the
1597 * XXXO If we know that only one map has
1598 * access to this page, then we could
1599 * avoid the pmap_page_protect() call.
1602 vm_page_lock_queues();
1603 assert(!m
->cleaning
);
1604 pmap_page_protect(m
->phys_page
, VM_PROT_NONE
);
1605 vm_page_deactivate(m
);
1606 copy_m
->dirty
= TRUE
;
1608 * Setting reference here prevents this fault from
1609 * being counted as a (per-thread) reactivate as well
1610 * as a copy-on-write.
1612 first_m
->reference
= TRUE
;
1613 vm_page_unlock_queues();
1616 * We no longer need the old page or object.
1619 PAGE_WAKEUP_DONE(m
);
1620 vm_object_paging_end(object
);
1621 vm_object_unlock(object
);
1624 *type_of_fault
= DBG_COW_FAULT
;
1625 VM_STAT(cow_faults
++);
1626 current_task()->cow_faults
++;
1627 object
= first_object
;
1628 offset
= first_offset
;
1630 vm_object_lock(object
);
1631 VM_PAGE_FREE(first_m
);
1632 first_m
= VM_PAGE_NULL
;
1633 assert(copy_m
->busy
);
1634 vm_page_insert(copy_m
, object
, offset
);
1638 * Now that we've gotten the copy out of the
1639 * way, let's try to collapse the top object.
1640 * But we have to play ugly games with
1641 * paging_in_progress to do that...
1644 vm_object_paging_end(object
);
1645 vm_object_collapse(object
, offset
);
1646 vm_object_paging_begin(object
);
1650 *protection
&= (~VM_PROT_WRITE
);
1655 * Now check whether the page needs to be pushed into the
1656 * copy object. The use of asymmetric copy on write for
1657 * shared temporary objects means that we may do two copies to
1658 * satisfy the fault; one above to get the page from a
1659 * shadowed object, and one here to push it into the copy.
1662 while ((copy_object
= first_object
->copy
) != VM_OBJECT_NULL
&&
1663 (m
!= VM_PAGE_NULL
)) {
1664 vm_object_offset_t copy_offset
;
1668 dbgTrace(0xBEEF0017, (unsigned int) copy_object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1671 * If the page is being written, but hasn't been
1672 * copied to the copy-object, we have to copy it there.
1675 if ((fault_type
& VM_PROT_WRITE
) == 0) {
1676 *protection
&= ~VM_PROT_WRITE
;
1681 * If the page was guaranteed to be resident,
1682 * we must have already performed the copy.
1685 if (must_be_resident
)
1689 * Try to get the lock on the copy_object.
1691 if (!vm_object_lock_try(copy_object
)) {
1692 vm_object_unlock(object
);
1694 mutex_pause(); /* wait a bit */
1696 vm_object_lock(object
);
1701 * Make another reference to the copy-object,
1702 * to keep it from disappearing during the
1705 assert(copy_object
->ref_count
> 0);
1706 copy_object
->ref_count
++;
1707 VM_OBJ_RES_INCR(copy_object
);
1710 * Does the page exist in the copy?
1712 copy_offset
= first_offset
- copy_object
->shadow_offset
;
1713 if (copy_object
->size
<= copy_offset
)
1715 * Copy object doesn't cover this page -- do nothing.
1719 vm_page_lookup(copy_object
, copy_offset
)) != VM_PAGE_NULL
) {
1720 /* Page currently exists in the copy object */
1723 * If the page is being brought
1724 * in, wait for it and then retry.
1727 /* take an extra ref so object won't die */
1728 assert(copy_object
->ref_count
> 0);
1729 copy_object
->ref_count
++;
1730 vm_object_res_reference(copy_object
);
1731 vm_object_unlock(copy_object
);
1732 vm_fault_cleanup(object
, first_m
);
1733 counter(c_vm_fault_page_block_backoff_kernel
++);
1734 vm_object_lock(copy_object
);
1735 assert(copy_object
->ref_count
> 0);
1736 VM_OBJ_RES_DECR(copy_object
);
1737 copy_object
->ref_count
--;
1738 assert(copy_object
->ref_count
> 0);
1739 copy_m
= vm_page_lookup(copy_object
, copy_offset
);
1740 if (copy_m
!= VM_PAGE_NULL
&& copy_m
->busy
) {
1741 PAGE_ASSERT_WAIT(copy_m
, interruptible
);
1742 vm_object_unlock(copy_object
);
1743 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1744 vm_object_deallocate(copy_object
);
1747 vm_object_unlock(copy_object
);
1748 vm_object_deallocate(copy_object
);
1749 thread_interrupt_level(interruptible_state
);
1750 return VM_FAULT_RETRY
;
1754 else if (!PAGED_OUT(copy_object
, copy_offset
)) {
1756 * If PAGED_OUT is TRUE, then the page used to exist
1757 * in the copy-object, and has already been paged out.
1758 * We don't need to repeat this. If PAGED_OUT is
1759 * FALSE, then either we don't know (!pager_created,
1760 * for example) or it hasn't been paged out.
1761 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1762 * We must copy the page to the copy object.
1766 * are we protecting the system from
1767 * backing store exhaustion. If so
1768 * sleep unless we are privileged.
1771 if(vm_backing_store_low
) {
1772 if(!(current_task()->priv_flags
1773 & VM_BACKING_STORE_PRIV
)) {
1774 assert_wait((event_t
)
1775 &vm_backing_store_low
,
1778 VM_OBJ_RES_DECR(copy_object
);
1779 copy_object
->ref_count
--;
1780 assert(copy_object
->ref_count
> 0);
1781 vm_object_unlock(copy_object
);
1782 vm_fault_cleanup(object
, first_m
);
1783 thread_block((void (*)(void)) 0);
1784 thread_interrupt_level(
1785 interruptible_state
);
1786 return(VM_FAULT_RETRY
);
1791 * Allocate a page for the copy
1793 copy_m
= vm_page_alloc(copy_object
, copy_offset
);
1794 if (copy_m
== VM_PAGE_NULL
) {
1796 VM_OBJ_RES_DECR(copy_object
);
1797 copy_object
->ref_count
--;
1798 assert(copy_object
->ref_count
> 0);
1799 vm_object_unlock(copy_object
);
1800 vm_fault_cleanup(object
, first_m
);
1801 thread_interrupt_level(interruptible_state
);
1802 return(VM_FAULT_MEMORY_SHORTAGE
);
1806 * Must copy page into copy-object.
1809 vm_page_copy(m
, copy_m
);
1812 * If the old page was in use by any users
1813 * of the copy-object, it must be removed
1814 * from all pmaps. (We can't know which
1818 vm_page_lock_queues();
1819 assert(!m
->cleaning
);
1820 pmap_page_protect(m
->phys_page
, VM_PROT_NONE
);
1821 copy_m
->dirty
= TRUE
;
1822 vm_page_unlock_queues();
1825 * If there's a pager, then immediately
1826 * page out this page, using the "initialize"
1827 * option. Else, we use the copy.
1832 ((!copy_object
->pager_created
) ||
1833 vm_external_state_get(
1834 copy_object
->existence_map
, copy_offset
)
1835 == VM_EXTERNAL_STATE_ABSENT
)
1837 (!copy_object
->pager_created
)
1840 vm_page_lock_queues();
1841 vm_page_activate(copy_m
);
1842 vm_page_unlock_queues();
1843 PAGE_WAKEUP_DONE(copy_m
);
1846 assert(copy_m
->busy
== TRUE
);
1849 * The page is already ready for pageout:
1850 * not on pageout queues and busy.
1851 * Unlock everything except the
1852 * copy_object itself.
1855 vm_object_unlock(object
);
1858 * Write the page to the copy-object,
1859 * flushing it from the kernel.
1862 vm_pageout_initialize_page(copy_m
);
1865 * Since the pageout may have
1866 * temporarily dropped the
1867 * copy_object's lock, we
1868 * check whether we'll have
1869 * to deallocate the hard way.
1872 if ((copy_object
->shadow
!= object
) ||
1873 (copy_object
->ref_count
== 1)) {
1874 vm_object_unlock(copy_object
);
1875 vm_object_deallocate(copy_object
);
1876 vm_object_lock(object
);
1881 * Pick back up the old object's
1882 * lock. [It is safe to do so,
1883 * since it must be deeper in the
1887 vm_object_lock(object
);
1891 * Because we're pushing a page upward
1892 * in the object tree, we must restart
1893 * any faults that are waiting here.
1894 * [Note that this is an expansion of
1895 * PAGE_WAKEUP that uses the THREAD_RESTART
1896 * wait result]. Can't turn off the page's
1897 * busy bit because we're not done with it.
1902 thread_wakeup_with_result((event_t
) m
,
1908 * The reference count on copy_object must be
1909 * at least 2: one for our extra reference,
1910 * and at least one from the outside world
1911 * (we checked that when we last locked
1914 copy_object
->ref_count
--;
1915 assert(copy_object
->ref_count
> 0);
1916 VM_OBJ_RES_DECR(copy_object
);
1917 vm_object_unlock(copy_object
);
1923 *top_page
= first_m
;
1926 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1927 (integer_t
)object
, offset
, (integer_t
)m
, (integer_t
)first_m
, 0);
1929 * If the page can be written, assume that it will be.
1930 * [Earlier, we restrict the permission to allow write
1931 * access only if the fault so required, so we don't
1932 * mark read-only data as dirty.]
1936 if(m
!= VM_PAGE_NULL
) {
1937 #if !VM_FAULT_STATIC_CONFIG
1938 if (vm_fault_dirty_handling
&& (*protection
& VM_PROT_WRITE
))
1941 if (vm_page_deactivate_behind
)
1942 vm_fault_deactivate_behind(object
, offset
, behavior
);
1944 vm_object_unlock(object
);
1946 thread_interrupt_level(interruptible_state
);
1949 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS
, 0); /* (TEST/DEBUG) */
1951 return(VM_FAULT_SUCCESS
);
1955 vm_fault_cleanup(object
, first_m
);
1957 counter(c_vm_fault_page_block_backoff_kernel
++);
1958 thread_block(THREAD_CONTINUE_NULL
);
1962 thread_interrupt_level(interruptible_state
);
1963 if (wait_result
== THREAD_INTERRUPTED
)
1964 return VM_FAULT_INTERRUPTED
;
1965 return VM_FAULT_RETRY
;
1971 * Routine: vm_fault_tws_insert
1973 * Add fault information to the task working set.
1975 * We always insert the base object/offset pair
1976 * rather the actual object/offset.
1978 * Map and pmap_map locked.
1979 * Object locked and referenced.
1981 * TRUE if startup file should be written.
1982 * With object locked and still referenced.
1983 * But we may drop the object lock temporarily.
1986 vm_fault_tws_insert(
1991 vm_object_offset_t offset
)
1993 tws_hash_line_t line
;
1996 boolean_t result
= FALSE
;
1997 extern vm_map_t kalloc_map
;
1999 /* Avoid possible map lock deadlock issues */
2000 if (map
== kernel_map
|| map
== kalloc_map
||
2001 pmap_map
== kernel_map
|| pmap_map
== kalloc_map
)
2004 task
= current_task();
2005 if (task
->dynamic_working_set
!= 0) {
2006 vm_object_t base_object
;
2007 vm_object_t base_shadow
;
2008 vm_object_offset_t base_offset
;
2009 base_object
= object
;
2010 base_offset
= offset
;
2011 while(base_shadow
= base_object
->shadow
) {
2012 vm_object_lock(base_shadow
);
2013 vm_object_unlock(base_object
);
2015 base_object
->shadow_offset
;
2016 base_object
= base_shadow
;
2018 kr
= tws_lookup((tws_hash_t
)
2019 task
->dynamic_working_set
,
2020 base_offset
, base_object
,
2022 if (kr
== KERN_OPERATION_TIMED_OUT
){
2024 if (base_object
!= object
) {
2025 vm_object_unlock(base_object
);
2026 vm_object_lock(object
);
2028 } else if (kr
!= KERN_SUCCESS
) {
2029 if(base_object
!= object
)
2030 vm_object_reference_locked(base_object
);
2031 kr
= tws_insert((tws_hash_t
)
2032 task
->dynamic_working_set
,
2033 base_offset
, base_object
,
2035 if(base_object
!= object
) {
2036 vm_object_unlock(base_object
);
2037 vm_object_deallocate(base_object
);
2039 if(kr
== KERN_NO_SPACE
) {
2040 if (base_object
== object
)
2041 vm_object_unlock(object
);
2042 tws_expand_working_set(
2043 task
->dynamic_working_set
,
2044 TWS_HASH_LINE_COUNT
,
2046 if (base_object
== object
)
2047 vm_object_lock(object
);
2048 } else if(kr
== KERN_OPERATION_TIMED_OUT
) {
2051 if(base_object
!= object
)
2052 vm_object_lock(object
);
2053 } else if (base_object
!= object
) {
2054 vm_object_unlock(base_object
);
2055 vm_object_lock(object
);
2064 * Handle page faults, including pseudo-faults
2065 * used to change the wiring status of pages.
2067 * Explicit continuations have been removed.
2069 * vm_fault and vm_fault_page save mucho state
2070 * in the moral equivalent of a closure. The state
2071 * structure is allocated when first entering vm_fault
2072 * and deallocated when leaving vm_fault.
2079 vm_prot_t fault_type
,
2080 boolean_t change_wiring
,
2083 vm_offset_t caller_pmap_addr
)
2085 vm_map_version_t version
; /* Map version for verificiation */
2086 boolean_t wired
; /* Should mapping be wired down? */
2087 vm_object_t object
; /* Top-level object */
2088 vm_object_offset_t offset
; /* Top-level offset */
2089 vm_prot_t prot
; /* Protection for mapping */
2090 vm_behavior_t behavior
; /* Expected paging behavior */
2091 vm_object_offset_t lo_offset
, hi_offset
;
2092 vm_object_t old_copy_object
; /* Saved copy object */
2093 vm_page_t result_page
; /* Result of vm_fault_page */
2094 vm_page_t top_page
; /* Placeholder page */
2098 vm_page_t m
; /* Fast access to result_page */
2099 kern_return_t error_code
; /* page error reasons */
2101 vm_object_t cur_object
;
2103 vm_object_offset_t cur_offset
;
2105 vm_object_t new_object
;
2107 vm_map_t pmap_map
= map
;
2108 vm_map_t original_map
= map
;
2110 boolean_t funnel_set
= FALSE
;
2112 thread_t cur_thread
;
2113 boolean_t interruptible_state
;
2114 unsigned int cache_attr
;
2115 int write_startup_file
= 0;
2116 vm_prot_t full_fault_type
;
2118 if (get_preemption_level() != 0)
2119 return (KERN_FAILURE
);
2121 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_START
,
2128 /* at present we do not fully check for execute permission */
2129 /* we generally treat it is read except in certain device */
2130 /* memory settings */
2131 full_fault_type
= fault_type
;
2132 if(fault_type
& VM_PROT_EXECUTE
) {
2133 fault_type
&= ~VM_PROT_EXECUTE
;
2134 fault_type
|= VM_PROT_READ
;
2137 interruptible_state
= thread_interrupt_level(interruptible
);
2140 * assume we will hit a page in the cache
2141 * otherwise, explicitly override with
2142 * the real fault type once we determine it
2144 type_of_fault
= DBG_CACHE_HIT_FAULT
;
2147 current_task()->faults
++;
2150 * drop funnel if it is already held. Then restore while returning
2152 cur_thread
= current_thread();
2154 if ((cur_thread
->funnel_state
& TH_FN_OWNED
) == TH_FN_OWNED
) {
2156 curflock
= cur_thread
->funnel_lock
;
2157 thread_funnel_set( curflock
, FALSE
);
2163 * Find the backing store object and offset into
2164 * it to begin the search.
2167 vm_map_lock_read(map
);
2168 kr
= vm_map_lookup_locked(&map
, vaddr
, fault_type
, &version
,
2171 &behavior
, &lo_offset
, &hi_offset
, &pmap_map
);
2173 pmap
= pmap_map
->pmap
;
2175 if (kr
!= KERN_SUCCESS
) {
2176 vm_map_unlock_read(map
);
2181 * If the page is wired, we must fault for the current protection
2182 * value, to avoid further faults.
2186 fault_type
= prot
| VM_PROT_WRITE
;
2188 #if VM_FAULT_CLASSIFY
2190 * Temporary data gathering code
2192 vm_fault_classify(object
, offset
, fault_type
);
2195 * Fast fault code. The basic idea is to do as much as
2196 * possible while holding the map lock and object locks.
2197 * Busy pages are not used until the object lock has to
2198 * be dropped to do something (copy, zero fill, pmap enter).
2199 * Similarly, paging references aren't acquired until that
2200 * point, and object references aren't used.
2202 * If we can figure out what to do
2203 * (zero fill, copy on write, pmap enter) while holding
2204 * the locks, then it gets done. Otherwise, we give up,
2205 * and use the original fault path (which doesn't hold
2206 * the map lock, and relies on busy pages).
2207 * The give up cases include:
2208 * - Have to talk to pager.
2209 * - Page is busy, absent or in error.
2210 * - Pager has locked out desired access.
2211 * - Fault needs to be restarted.
2212 * - Have to push page into copy object.
2214 * The code is an infinite loop that moves one level down
2215 * the shadow chain each time. cur_object and cur_offset
2216 * refer to the current object being examined. object and offset
2217 * are the original object from the map. The loop is at the
2218 * top level if and only if object and cur_object are the same.
2220 * Invariants: Map lock is held throughout. Lock is held on
2221 * original object and cur_object (if different) when
2222 * continuing or exiting loop.
2228 * If this page is to be inserted in a copy delay object
2229 * for writing, and if the object has a copy, then the
2230 * copy delay strategy is implemented in the slow fault page.
2232 if (object
->copy_strategy
!= MEMORY_OBJECT_COPY_DELAY
||
2233 object
->copy
== VM_OBJECT_NULL
||
2234 (fault_type
& VM_PROT_WRITE
) == 0) {
2235 cur_object
= object
;
2236 cur_offset
= offset
;
2239 m
= vm_page_lookup(cur_object
, cur_offset
);
2240 if (m
!= VM_PAGE_NULL
) {
2242 wait_result_t result
;
2244 if (object
!= cur_object
)
2245 vm_object_unlock(object
);
2247 vm_map_unlock_read(map
);
2248 if (pmap_map
!= map
)
2249 vm_map_unlock(pmap_map
);
2251 #if !VM_FAULT_STATIC_CONFIG
2252 if (!vm_fault_interruptible
)
2253 interruptible
= THREAD_UNINT
;
2255 result
= PAGE_ASSERT_WAIT(m
, interruptible
);
2257 vm_object_unlock(cur_object
);
2259 if (result
== THREAD_WAITING
) {
2260 result
= thread_block(THREAD_CONTINUE_NULL
);
2262 counter(c_vm_fault_page_block_busy_kernel
++);
2264 if (result
== THREAD_AWAKENED
|| result
== THREAD_RESTART
)
2270 if (m
->unusual
&& (m
->error
|| m
->restart
|| m
->private
2271 || m
->absent
|| (fault_type
& m
->page_lock
))) {
2274 * Unusual case. Give up.
2280 * Two cases of map in faults:
2281 * - At top level w/o copy object.
2282 * - Read fault anywhere.
2283 * --> must disallow write.
2286 if (object
== cur_object
&&
2287 object
->copy
== VM_OBJECT_NULL
)
2288 goto FastMapInFault
;
2290 if ((fault_type
& VM_PROT_WRITE
) == 0) {
2291 boolean_t sequential
;
2293 prot
&= ~VM_PROT_WRITE
;
2296 * Set up to map the page ...
2297 * mark the page busy, drop
2298 * locks and take a paging reference
2299 * on the object with the page.
2302 if (object
!= cur_object
) {
2303 vm_object_unlock(object
);
2304 object
= cur_object
;
2309 vm_object_paging_begin(object
);
2313 * Check a couple of global reasons to
2314 * be conservative about write access.
2315 * Then do the pmap_enter.
2317 #if !VM_FAULT_STATIC_CONFIG
2318 if (vm_fault_dirty_handling
2320 || db_watchpoint_list
2322 && (fault_type
& VM_PROT_WRITE
) == 0)
2323 prot
&= ~VM_PROT_WRITE
;
2324 #else /* STATIC_CONFIG */
2326 if (db_watchpoint_list
2327 && (fault_type
& VM_PROT_WRITE
) == 0)
2328 prot
&= ~VM_PROT_WRITE
;
2329 #endif /* MACH_KDB */
2330 #endif /* STATIC_CONFIG */
2331 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
2334 if (m
->no_isync
== TRUE
) {
2335 m
->no_isync
= FALSE
;
2336 pmap_sync_caches_phys(m
->phys_page
);
2337 if (type_of_fault
== DBG_CACHE_HIT_FAULT
) {
2339 * found it in the cache, but this
2340 * is the first fault-in of the page (no_isync == TRUE)
2341 * so it must have come in as part of
2342 * a cluster... account 1 pagein against it
2345 current_task()->pageins
++;
2346 type_of_fault
= DBG_PAGEIN_FAULT
;
2349 } else if (cache_attr
!= VM_WIMG_DEFAULT
) {
2350 pmap_sync_caches_phys(m
->phys_page
);
2354 PMAP_ENTER(caller_pmap
,
2355 caller_pmap_addr
, m
,
2356 prot
, cache_attr
, wired
);
2358 PMAP_ENTER(pmap
, vaddr
, m
,
2359 prot
, cache_attr
, wired
);
2363 * Hold queues lock to manipulate
2364 * the page queues. Change wiring
2365 * case is obvious. In soft ref bits
2366 * case activate page only if it fell
2367 * off paging queues, otherwise just
2368 * activate it if it's inactive.
2370 * NOTE: original vm_fault code will
2371 * move active page to back of active
2372 * queue. This code doesn't.
2374 vm_page_lock_queues();
2376 vm_pagein_cluster_used
++;
2377 m
->clustered
= FALSE
;
2379 m
->reference
= TRUE
;
2381 if (change_wiring
) {
2387 #if VM_FAULT_STATIC_CONFIG
2389 if (!m
->active
&& !m
->inactive
)
2390 vm_page_activate(m
);
2393 else if (software_reference_bits
) {
2394 if (!m
->active
&& !m
->inactive
)
2395 vm_page_activate(m
);
2397 else if (!m
->active
) {
2398 vm_page_activate(m
);
2401 vm_page_unlock_queues();
2404 * That's it, clean up and return.
2406 PAGE_WAKEUP_DONE(m
);
2408 sequential
= (sequential
&& vm_page_deactivate_behind
) ?
2409 vm_fault_deactivate_behind(object
, cur_offset
, behavior
) :
2413 * Add non-sequential pages to the working set.
2414 * The sequential pages will be brought in through
2415 * normal clustering behavior.
2417 if (!sequential
&& !object
->private) {
2418 write_startup_file
=
2419 vm_fault_tws_insert(map
, pmap_map
, vaddr
,
2420 object
, cur_offset
);
2423 vm_object_paging_end(object
);
2424 vm_object_unlock(object
);
2426 vm_map_unlock_read(map
);
2428 vm_map_unlock(pmap_map
);
2430 if(write_startup_file
)
2431 tws_send_startup_info(current_task());
2434 thread_funnel_set( curflock
, TRUE
);
2436 thread_interrupt_level(interruptible_state
);
2439 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
2441 type_of_fault
& 0xff,
2446 return KERN_SUCCESS
;
2450 * Copy on write fault. If objects match, then
2451 * object->copy must not be NULL (else control
2452 * would be in previous code block), and we
2453 * have a potential push into the copy object
2454 * with which we won't cope here.
2457 if (cur_object
== object
)
2460 * This is now a shadow based copy on write
2461 * fault -- it requires a copy up the shadow
2464 * Allocate a page in the original top level
2465 * object. Give up if allocate fails. Also
2466 * need to remember current page, as it's the
2467 * source of the copy.
2471 if (m
== VM_PAGE_NULL
) {
2475 * Now do the copy. Mark the source busy
2476 * and take out paging references on both
2479 * NOTE: This code holds the map lock across
2484 vm_page_copy(cur_m
, m
);
2485 vm_page_insert(m
, object
, offset
);
2487 vm_object_paging_begin(cur_object
);
2488 vm_object_paging_begin(object
);
2490 type_of_fault
= DBG_COW_FAULT
;
2491 VM_STAT(cow_faults
++);
2492 current_task()->cow_faults
++;
2495 * Now cope with the source page and object
2496 * If the top object has a ref count of 1
2497 * then no other map can access it, and hence
2498 * it's not necessary to do the pmap_page_protect.
2502 vm_page_lock_queues();
2503 vm_page_deactivate(cur_m
);
2505 pmap_page_protect(cur_m
->phys_page
,
2507 vm_page_unlock_queues();
2509 PAGE_WAKEUP_DONE(cur_m
);
2510 vm_object_paging_end(cur_object
);
2511 vm_object_unlock(cur_object
);
2514 * Slight hack to call vm_object collapse
2515 * and then reuse common map in code.
2516 * note that the object lock was taken above.
2519 vm_object_paging_end(object
);
2520 vm_object_collapse(object
, offset
);
2521 vm_object_paging_begin(object
);
2528 * No page at cur_object, cur_offset
2531 if (cur_object
->pager_created
) {
2534 * Have to talk to the pager. Give up.
2540 if (cur_object
->shadow
== VM_OBJECT_NULL
) {
2542 if (cur_object
->shadow_severed
) {
2543 vm_object_paging_end(object
);
2544 vm_object_unlock(object
);
2545 vm_map_unlock_read(map
);
2547 vm_map_unlock(pmap_map
);
2549 if(write_startup_file
)
2550 tws_send_startup_info(
2554 thread_funnel_set( curflock
, TRUE
);
2557 thread_interrupt_level(interruptible_state
);
2559 return VM_FAULT_MEMORY_ERROR
;
2563 * Zero fill fault. Page gets
2564 * filled in top object. Insert
2565 * page, then drop any lower lock.
2566 * Give up if no page.
2568 if (VM_PAGE_THROTTLED()) {
2573 * are we protecting the system from
2574 * backing store exhaustion. If so
2575 * sleep unless we are privileged.
2577 if(vm_backing_store_low
) {
2578 if(!(current_task()->priv_flags
2579 & VM_BACKING_STORE_PRIV
))
2582 m
= vm_page_alloc(object
, offset
);
2583 if (m
== VM_PAGE_NULL
) {
2587 * This is a zero-fill or initial fill
2588 * page fault. As such, we consider it
2589 * undefined with respect to instruction
2590 * execution. i.e. it is the responsibility
2591 * of higher layers to call for an instruction
2592 * sync after changing the contents and before
2593 * sending a program into this area. We
2594 * choose this approach for performance
2597 m
->no_isync
= FALSE
;
2599 if (cur_object
!= object
)
2600 vm_object_unlock(cur_object
);
2602 vm_object_paging_begin(object
);
2603 vm_object_unlock(object
);
2606 * Now zero fill page and map it.
2607 * the page is probably going to
2608 * be written soon, so don't bother
2609 * to clear the modified bit
2611 * NOTE: This code holds the map
2612 * lock across the zero fill.
2615 if (!map
->no_zero_fill
) {
2616 vm_page_zero_fill(m
);
2617 type_of_fault
= DBG_ZERO_FILL_FAULT
;
2618 VM_STAT(zero_fill_count
++);
2620 vm_page_lock_queues();
2621 VM_PAGE_QUEUES_REMOVE(m
);
2623 m
->page_ticket
= vm_page_ticket
;
2624 if(m
->object
->size
> 0x80000) {
2625 m
->zero_fill
= TRUE
;
2626 /* depends on the queues lock */
2628 queue_enter(&vm_page_queue_zf
,
2629 m
, vm_page_t
, pageq
);
2632 &vm_page_queue_inactive
,
2633 m
, vm_page_t
, pageq
);
2635 vm_page_ticket_roll
++;
2636 if(vm_page_ticket_roll
==
2637 VM_PAGE_TICKETS_IN_ROLL
) {
2638 vm_page_ticket_roll
= 0;
2639 if(vm_page_ticket
==
2640 VM_PAGE_TICKET_ROLL_IDS
)
2647 vm_page_inactive_count
++;
2648 vm_page_unlock_queues();
2649 vm_object_lock(object
);
2655 * On to the next level
2658 cur_offset
+= cur_object
->shadow_offset
;
2659 new_object
= cur_object
->shadow
;
2660 vm_object_lock(new_object
);
2661 if (cur_object
!= object
)
2662 vm_object_unlock(cur_object
);
2663 cur_object
= new_object
;
2670 * Cleanup from fast fault failure. Drop any object
2671 * lock other than original and drop map lock.
2674 if (object
!= cur_object
)
2675 vm_object_unlock(cur_object
);
2677 vm_map_unlock_read(map
);
2680 vm_map_unlock(pmap_map
);
2683 * Make a reference to this object to
2684 * prevent its disposal while we are messing with
2685 * it. Once we have the reference, the map is free
2686 * to be diddled. Since objects reference their
2687 * shadows (and copies), they will stay around as well.
2690 assert(object
->ref_count
> 0);
2691 object
->ref_count
++;
2692 vm_object_res_reference(object
);
2693 vm_object_paging_begin(object
);
2695 XPR(XPR_VM_FAULT
,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2697 if (!object
->private) {
2698 write_startup_file
=
2699 vm_fault_tws_insert(map
, pmap_map
, vaddr
, object
, offset
);
2702 kr
= vm_fault_page(object
, offset
, fault_type
,
2703 (change_wiring
&& !wired
),
2705 lo_offset
, hi_offset
, behavior
,
2706 &prot
, &result_page
, &top_page
,
2708 &error_code
, map
->no_zero_fill
, FALSE
, map
, vaddr
);
2711 * If we didn't succeed, lose the object reference immediately.
2714 if (kr
!= VM_FAULT_SUCCESS
)
2715 vm_object_deallocate(object
);
2718 * See why we failed, and take corrective action.
2722 case VM_FAULT_SUCCESS
:
2724 case VM_FAULT_MEMORY_SHORTAGE
:
2725 if (vm_page_wait((change_wiring
) ?
2730 case VM_FAULT_INTERRUPTED
:
2733 case VM_FAULT_RETRY
:
2735 case VM_FAULT_FICTITIOUS_SHORTAGE
:
2736 vm_page_more_fictitious();
2738 case VM_FAULT_MEMORY_ERROR
:
2742 kr
= KERN_MEMORY_ERROR
;
2748 if(m
!= VM_PAGE_NULL
) {
2749 assert((change_wiring
&& !wired
) ?
2750 (top_page
== VM_PAGE_NULL
) :
2751 ((top_page
== VM_PAGE_NULL
) == (m
->object
== object
)));
2755 * How to clean up the result of vm_fault_page. This
2756 * happens whether the mapping is entered or not.
2759 #define UNLOCK_AND_DEALLOCATE \
2761 vm_fault_cleanup(m->object, top_page); \
2762 vm_object_deallocate(object); \
2766 * What to do with the resulting page from vm_fault_page
2767 * if it doesn't get entered into the physical map:
2770 #define RELEASE_PAGE(m) \
2772 PAGE_WAKEUP_DONE(m); \
2773 vm_page_lock_queues(); \
2774 if (!m->active && !m->inactive) \
2775 vm_page_activate(m); \
2776 vm_page_unlock_queues(); \
2780 * We must verify that the maps have not changed
2781 * since our last lookup.
2784 if(m
!= VM_PAGE_NULL
) {
2785 old_copy_object
= m
->object
->copy
;
2786 vm_object_unlock(m
->object
);
2788 old_copy_object
= VM_OBJECT_NULL
;
2790 if ((map
!= original_map
) || !vm_map_verify(map
, &version
)) {
2791 vm_object_t retry_object
;
2792 vm_object_offset_t retry_offset
;
2793 vm_prot_t retry_prot
;
2796 * To avoid trying to write_lock the map while another
2797 * thread has it read_locked (in vm_map_pageable), we
2798 * do not try for write permission. If the page is
2799 * still writable, we will get write permission. If it
2800 * is not, or has been marked needs_copy, we enter the
2801 * mapping without write permission, and will merely
2802 * take another fault.
2805 vm_map_lock_read(map
);
2806 kr
= vm_map_lookup_locked(&map
, vaddr
,
2807 fault_type
& ~VM_PROT_WRITE
, &version
,
2808 &retry_object
, &retry_offset
, &retry_prot
,
2809 &wired
, &behavior
, &lo_offset
, &hi_offset
,
2811 pmap
= pmap_map
->pmap
;
2813 if (kr
!= KERN_SUCCESS
) {
2814 vm_map_unlock_read(map
);
2815 if(m
!= VM_PAGE_NULL
) {
2816 vm_object_lock(m
->object
);
2818 UNLOCK_AND_DEALLOCATE
;
2820 vm_object_deallocate(object
);
2825 vm_object_unlock(retry_object
);
2826 if(m
!= VM_PAGE_NULL
) {
2827 vm_object_lock(m
->object
);
2829 vm_object_lock(object
);
2832 if ((retry_object
!= object
) ||
2833 (retry_offset
!= offset
)) {
2834 vm_map_unlock_read(map
);
2836 vm_map_unlock(pmap_map
);
2837 if(m
!= VM_PAGE_NULL
) {
2839 UNLOCK_AND_DEALLOCATE
;
2841 vm_object_deallocate(object
);
2847 * Check whether the protection has changed or the object
2848 * has been copied while we left the map unlocked.
2851 if(m
!= VM_PAGE_NULL
) {
2852 vm_object_unlock(m
->object
);
2854 vm_object_unlock(object
);
2857 if(m
!= VM_PAGE_NULL
) {
2858 vm_object_lock(m
->object
);
2860 vm_object_lock(object
);
2864 * If the copy object changed while the top-level object
2865 * was unlocked, then we must take away write permission.
2868 if(m
!= VM_PAGE_NULL
) {
2869 if (m
->object
->copy
!= old_copy_object
)
2870 prot
&= ~VM_PROT_WRITE
;
2874 * If we want to wire down this page, but no longer have
2875 * adequate permissions, we must start all over.
2878 if (wired
&& (fault_type
!= (prot
|VM_PROT_WRITE
))) {
2879 vm_map_verify_done(map
, &version
);
2881 vm_map_unlock(pmap_map
);
2882 if(m
!= VM_PAGE_NULL
) {
2884 UNLOCK_AND_DEALLOCATE
;
2886 vm_object_deallocate(object
);
2892 * Put this page into the physical map.
2893 * We had to do the unlock above because pmap_enter
2894 * may cause other faults. The page may be on
2895 * the pageout queues. If the pageout daemon comes
2896 * across the page, it will remove it from the queues.
2898 if (m
!= VM_PAGE_NULL
) {
2899 if (m
->no_isync
== TRUE
) {
2900 pmap_sync_caches_phys(m
->phys_page
);
2902 if (type_of_fault
== DBG_CACHE_HIT_FAULT
) {
2904 * found it in the cache, but this
2905 * is the first fault-in of the page (no_isync == TRUE)
2906 * so it must have come in as part of
2907 * a cluster... account 1 pagein against it
2910 current_task()->pageins
++;
2912 type_of_fault
= DBG_PAGEIN_FAULT
;
2914 m
->no_isync
= FALSE
;
2916 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
2919 PMAP_ENTER(caller_pmap
,
2920 caller_pmap_addr
, m
,
2921 prot
, cache_attr
, wired
);
2923 PMAP_ENTER(pmap
, vaddr
, m
,
2924 prot
, cache_attr
, wired
);
2928 * Add working set information for private objects here.
2930 if (m
->object
->private) {
2931 write_startup_file
=
2932 vm_fault_tws_insert(map
, pmap_map
, vaddr
,
2933 m
->object
, m
->offset
);
2939 vm_map_entry_t entry
;
2941 vm_offset_t ldelta
, hdelta
;
2944 * do a pmap block mapping from the physical address
2948 /* While we do not worry about execution protection in */
2949 /* general, certian pages may have instruction execution */
2950 /* disallowed. We will check here, and if not allowed */
2951 /* to execute, we return with a protection failure. */
2953 if((full_fault_type
& VM_PROT_EXECUTE
) &&
2954 (pmap_canExecute((ppnum_t
)
2955 (object
->shadow_offset
>> 12)) < 1)) {
2957 vm_map_verify_done(map
, &version
);
2959 vm_map_unlock(pmap_map
);
2960 vm_fault_cleanup(object
, top_page
);
2961 vm_object_deallocate(object
);
2962 kr
= KERN_PROTECTION_FAILURE
;
2966 if(pmap_map
!= map
) {
2967 vm_map_unlock(pmap_map
);
2969 if (original_map
!= map
) {
2970 vm_map_unlock_read(map
);
2971 vm_map_lock_read(original_map
);
2977 hdelta
= 0xFFFFF000;
2978 ldelta
= 0xFFFFF000;
2981 while(vm_map_lookup_entry(map
, laddr
, &entry
)) {
2982 if(ldelta
> (laddr
- entry
->vme_start
))
2983 ldelta
= laddr
- entry
->vme_start
;
2984 if(hdelta
> (entry
->vme_end
- laddr
))
2985 hdelta
= entry
->vme_end
- laddr
;
2986 if(entry
->is_sub_map
) {
2988 laddr
= (laddr
- entry
->vme_start
)
2990 vm_map_lock_read(entry
->object
.sub_map
);
2992 vm_map_unlock_read(map
);
2993 if(entry
->use_pmap
) {
2994 vm_map_unlock_read(pmap_map
);
2995 pmap_map
= entry
->object
.sub_map
;
2997 map
= entry
->object
.sub_map
;
3004 if(vm_map_lookup_entry(map
, laddr
, &entry
) &&
3005 (entry
->object
.vm_object
!= NULL
) &&
3006 (entry
->object
.vm_object
== object
)) {
3010 /* Set up a block mapped area */
3011 pmap_map_block(caller_pmap
,
3012 (addr64_t
)(caller_pmap_addr
- ldelta
),
3014 (entry
->object
.vm_object
->shadow_offset
))
3016 (laddr
- entry
->vme_start
)
3018 ldelta
+ hdelta
, prot
,
3019 (VM_WIMG_MASK
& (int)object
->wimg_bits
), 0);
3021 /* Set up a block mapped area */
3022 pmap_map_block(pmap_map
->pmap
,
3023 (addr64_t
)(vaddr
- ldelta
),
3025 (entry
->object
.vm_object
->shadow_offset
))
3027 (laddr
- entry
->vme_start
) - ldelta
)>>12,
3028 ldelta
+ hdelta
, prot
,
3029 (VM_WIMG_MASK
& (int)object
->wimg_bits
), 0);
3035 pmap_enter(caller_pmap
, caller_pmap_addr
,
3036 object
->shadow_offset
>>12, prot
, 0, TRUE
);
3038 pmap_enter(pmap
, vaddr
,
3039 object
->shadow_offset
>>12, prot
, 0, TRUE
);
3048 * If the page is not wired down and isn't already
3049 * on a pageout queue, then put it where the
3050 * pageout daemon can find it.
3052 if(m
!= VM_PAGE_NULL
) {
3053 vm_page_lock_queues();
3055 if (change_wiring
) {
3061 #if VM_FAULT_STATIC_CONFIG
3063 if (!m
->active
&& !m
->inactive
)
3064 vm_page_activate(m
);
3065 m
->reference
= TRUE
;
3068 else if (software_reference_bits
) {
3069 if (!m
->active
&& !m
->inactive
)
3070 vm_page_activate(m
);
3071 m
->reference
= TRUE
;
3073 vm_page_activate(m
);
3076 vm_page_unlock_queues();
3080 * Unlock everything, and return
3083 vm_map_verify_done(map
, &version
);
3085 vm_map_unlock(pmap_map
);
3086 if(m
!= VM_PAGE_NULL
) {
3087 PAGE_WAKEUP_DONE(m
);
3088 UNLOCK_AND_DEALLOCATE
;
3090 vm_fault_cleanup(object
, top_page
);
3091 vm_object_deallocate(object
);
3095 #undef UNLOCK_AND_DEALLOCATE
3099 if(write_startup_file
)
3100 tws_send_startup_info(current_task());
3102 thread_funnel_set( curflock
, TRUE
);
3105 thread_interrupt_level(interruptible_state
);
3107 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
3109 type_of_fault
& 0xff,
3120 * Wire down a range of virtual addresses in a map.
3125 vm_map_entry_t entry
,
3127 vm_offset_t pmap_addr
)
3130 register vm_offset_t va
;
3131 register vm_offset_t end_addr
= entry
->vme_end
;
3132 register kern_return_t rc
;
3134 assert(entry
->in_transition
);
3136 if ((entry
->object
.vm_object
!= NULL
) &&
3137 !entry
->is_sub_map
&&
3138 entry
->object
.vm_object
->phys_contiguous
) {
3139 return KERN_SUCCESS
;
3143 * Inform the physical mapping system that the
3144 * range of addresses may not fault, so that
3145 * page tables and such can be locked down as well.
3148 pmap_pageable(pmap
, pmap_addr
,
3149 pmap_addr
+ (end_addr
- entry
->vme_start
), FALSE
);
3152 * We simulate a fault to get the page and enter it
3153 * in the physical map.
3156 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
3157 if ((rc
= vm_fault_wire_fast(
3158 map
, va
, entry
, pmap
,
3159 pmap_addr
+ (va
- entry
->vme_start
)
3160 )) != KERN_SUCCESS
) {
3161 rc
= vm_fault(map
, va
, VM_PROT_NONE
, TRUE
,
3162 (pmap
== kernel_pmap
) ?
3163 THREAD_UNINT
: THREAD_ABORTSAFE
,
3164 pmap
, pmap_addr
+ (va
- entry
->vme_start
));
3167 if (rc
!= KERN_SUCCESS
) {
3168 struct vm_map_entry tmp_entry
= *entry
;
3170 /* unwire wired pages */
3171 tmp_entry
.vme_end
= va
;
3172 vm_fault_unwire(map
,
3173 &tmp_entry
, FALSE
, pmap
, pmap_addr
);
3178 return KERN_SUCCESS
;
3184 * Unwire a range of virtual addresses in a map.
3189 vm_map_entry_t entry
,
3190 boolean_t deallocate
,
3192 vm_offset_t pmap_addr
)
3194 register vm_offset_t va
;
3195 register vm_offset_t end_addr
= entry
->vme_end
;
3198 object
= (entry
->is_sub_map
)
3199 ? VM_OBJECT_NULL
: entry
->object
.vm_object
;
3202 * Since the pages are wired down, we must be able to
3203 * get their mappings from the physical map system.
3206 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
3207 pmap_change_wiring(pmap
,
3208 pmap_addr
+ (va
- entry
->vme_start
), FALSE
);
3210 if (object
== VM_OBJECT_NULL
) {
3211 (void) vm_fault(map
, va
, VM_PROT_NONE
,
3212 TRUE
, THREAD_UNINT
, pmap
, pmap_addr
);
3213 } else if (object
->phys_contiguous
) {
3217 vm_page_t result_page
;
3219 vm_object_t result_object
;
3220 vm_fault_return_t result
;
3223 prot
= VM_PROT_NONE
;
3225 vm_object_lock(object
);
3226 vm_object_paging_begin(object
);
3228 "vm_fault_unwire -> vm_fault_page\n",
3230 result
= vm_fault_page(object
,
3232 (va
- entry
->vme_start
),
3238 - entry
->vme_start
),
3244 0, map
->no_zero_fill
,
3246 } while (result
== VM_FAULT_RETRY
);
3248 if (result
!= VM_FAULT_SUCCESS
)
3249 panic("vm_fault_unwire: failure");
3251 result_object
= result_page
->object
;
3253 assert(!result_page
->fictitious
);
3254 pmap_page_protect(result_page
->phys_page
,
3256 VM_PAGE_FREE(result_page
);
3258 vm_page_lock_queues();
3259 vm_page_unwire(result_page
);
3260 vm_page_unlock_queues();
3261 PAGE_WAKEUP_DONE(result_page
);
3264 vm_fault_cleanup(result_object
, top_page
);
3269 * Inform the physical mapping system that the range
3270 * of addresses may fault, so that page tables and
3271 * such may be unwired themselves.
3274 pmap_pageable(pmap
, pmap_addr
,
3275 pmap_addr
+ (end_addr
- entry
->vme_start
), TRUE
);
3280 * vm_fault_wire_fast:
3282 * Handle common case of a wire down page fault at the given address.
3283 * If successful, the page is inserted into the associated physical map.
3284 * The map entry is passed in to avoid the overhead of a map lookup.
3286 * NOTE: the given address should be truncated to the
3287 * proper page address.
3289 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3290 * a standard error specifying why the fault is fatal is returned.
3292 * The map in question must be referenced, and remains so.
3293 * Caller has a read lock on the map.
3295 * This is a stripped version of vm_fault() for wiring pages. Anything
3296 * other than the common case will return KERN_FAILURE, and the caller
3297 * is expected to call vm_fault().
3303 vm_map_entry_t entry
,
3305 vm_offset_t pmap_addr
)
3308 vm_object_offset_t offset
;
3309 register vm_page_t m
;
3311 thread_act_t thr_act
;
3312 unsigned int cache_attr
;
3316 if((thr_act
=current_act()) && (thr_act
->task
!= TASK_NULL
))
3317 thr_act
->task
->faults
++;
3324 #define RELEASE_PAGE(m) { \
3325 PAGE_WAKEUP_DONE(m); \
3326 vm_page_lock_queues(); \
3327 vm_page_unwire(m); \
3328 vm_page_unlock_queues(); \
3332 #undef UNLOCK_THINGS
3333 #define UNLOCK_THINGS { \
3334 object->paging_in_progress--; \
3335 vm_object_unlock(object); \
3338 #undef UNLOCK_AND_DEALLOCATE
3339 #define UNLOCK_AND_DEALLOCATE { \
3341 vm_object_deallocate(object); \
3344 * Give up and have caller do things the hard way.
3348 UNLOCK_AND_DEALLOCATE; \
3349 return(KERN_FAILURE); \
3354 * If this entry is not directly to a vm_object, bail out.
3356 if (entry
->is_sub_map
)
3357 return(KERN_FAILURE
);
3360 * Find the backing store object and offset into it.
3363 object
= entry
->object
.vm_object
;
3364 offset
= (va
- entry
->vme_start
) + entry
->offset
;
3365 prot
= entry
->protection
;
3368 * Make a reference to this object to prevent its
3369 * disposal while we are messing with it.
3372 vm_object_lock(object
);
3373 assert(object
->ref_count
> 0);
3374 object
->ref_count
++;
3375 vm_object_res_reference(object
);
3376 object
->paging_in_progress
++;
3379 * INVARIANTS (through entire routine):
3381 * 1) At all times, we must either have the object
3382 * lock or a busy page in some object to prevent
3383 * some other thread from trying to bring in
3386 * 2) Once we have a busy page, we must remove it from
3387 * the pageout queues, so that the pageout daemon
3388 * will not grab it away.
3393 * Look for page in top-level object. If it's not there or
3394 * there's something going on, give up.
3396 m
= vm_page_lookup(object
, offset
);
3397 if ((m
== VM_PAGE_NULL
) || (m
->busy
) ||
3398 (m
->unusual
&& ( m
->error
|| m
->restart
|| m
->absent
||
3399 prot
& m
->page_lock
))) {
3405 * Wire the page down now. All bail outs beyond this
3406 * point must unwire the page.
3409 vm_page_lock_queues();
3411 vm_page_unlock_queues();
3414 * Mark page busy for other threads.
3421 * Give up if the page is being written and there's a copy object
3423 if ((object
->copy
!= VM_OBJECT_NULL
) && (prot
& VM_PROT_WRITE
)) {
3429 * Put this page into the physical map.
3430 * We have to unlock the object because pmap_enter
3431 * may cause other faults.
3433 if (m
->no_isync
== TRUE
) {
3434 pmap_sync_caches_phys(m
->phys_page
);
3436 m
->no_isync
= FALSE
;
3439 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
3441 PMAP_ENTER(pmap
, pmap_addr
, m
, prot
, cache_attr
, TRUE
);
3444 * Unlock everything, and return
3447 PAGE_WAKEUP_DONE(m
);
3448 UNLOCK_AND_DEALLOCATE
;
3450 return(KERN_SUCCESS
);
3455 * Routine: vm_fault_copy_cleanup
3457 * Release a page used by vm_fault_copy.
3461 vm_fault_copy_cleanup(
3465 vm_object_t object
= page
->object
;
3467 vm_object_lock(object
);
3468 PAGE_WAKEUP_DONE(page
);
3469 vm_page_lock_queues();
3470 if (!page
->active
&& !page
->inactive
)
3471 vm_page_activate(page
);
3472 vm_page_unlock_queues();
3473 vm_fault_cleanup(object
, top_page
);
3477 vm_fault_copy_dst_cleanup(
3482 if (page
!= VM_PAGE_NULL
) {
3483 object
= page
->object
;
3484 vm_object_lock(object
);
3485 vm_page_lock_queues();
3486 vm_page_unwire(page
);
3487 vm_page_unlock_queues();
3488 vm_object_paging_end(object
);
3489 vm_object_unlock(object
);
3494 * Routine: vm_fault_copy
3497 * Copy pages from one virtual memory object to another --
3498 * neither the source nor destination pages need be resident.
3500 * Before actually copying a page, the version associated with
3501 * the destination address map wil be verified.
3503 * In/out conditions:
3504 * The caller must hold a reference, but not a lock, to
3505 * each of the source and destination objects and to the
3509 * Returns KERN_SUCCESS if no errors were encountered in
3510 * reading or writing the data. Returns KERN_INTERRUPTED if
3511 * the operation was interrupted (only possible if the
3512 * "interruptible" argument is asserted). Other return values
3513 * indicate a permanent error in copying the data.
3515 * The actual amount of data copied will be returned in the
3516 * "copy_size" argument. In the event that the destination map
3517 * verification failed, this amount may be less than the amount
3522 vm_object_t src_object
,
3523 vm_object_offset_t src_offset
,
3524 vm_size_t
*src_size
, /* INOUT */
3525 vm_object_t dst_object
,
3526 vm_object_offset_t dst_offset
,
3528 vm_map_version_t
*dst_version
,
3531 vm_page_t result_page
;
3534 vm_page_t src_top_page
;
3538 vm_page_t dst_top_page
;
3541 vm_size_t amount_left
;
3542 vm_object_t old_copy_object
;
3543 kern_return_t error
= 0;
3545 vm_size_t part_size
;
3548 * In order not to confuse the clustered pageins, align
3549 * the different offsets on a page boundary.
3551 vm_object_offset_t src_lo_offset
= trunc_page_64(src_offset
);
3552 vm_object_offset_t dst_lo_offset
= trunc_page_64(dst_offset
);
3553 vm_object_offset_t src_hi_offset
= round_page_64(src_offset
+ *src_size
);
3554 vm_object_offset_t dst_hi_offset
= round_page_64(dst_offset
+ *src_size
);
3558 *src_size -= amount_left; \
3562 amount_left
= *src_size
;
3563 do { /* while (amount_left > 0) */
3565 * There may be a deadlock if both source and destination
3566 * pages are the same. To avoid this deadlock, the copy must
3567 * start by getting the destination page in order to apply
3568 * COW semantics if any.
3571 RetryDestinationFault
: ;
3573 dst_prot
= VM_PROT_WRITE
|VM_PROT_READ
;
3575 vm_object_lock(dst_object
);
3576 vm_object_paging_begin(dst_object
);
3578 XPR(XPR_VM_FAULT
,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3579 switch (vm_fault_page(dst_object
,
3580 trunc_page_64(dst_offset
),
3581 VM_PROT_WRITE
|VM_PROT_READ
,
3586 VM_BEHAVIOR_SEQUENTIAL
,
3592 dst_map
->no_zero_fill
,
3594 case VM_FAULT_SUCCESS
:
3596 case VM_FAULT_RETRY
:
3597 goto RetryDestinationFault
;
3598 case VM_FAULT_MEMORY_SHORTAGE
:
3599 if (vm_page_wait(interruptible
))
3600 goto RetryDestinationFault
;
3602 case VM_FAULT_INTERRUPTED
:
3603 RETURN(MACH_SEND_INTERRUPTED
);
3604 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3605 vm_page_more_fictitious();
3606 goto RetryDestinationFault
;
3607 case VM_FAULT_MEMORY_ERROR
:
3611 return(KERN_MEMORY_ERROR
);
3613 assert ((dst_prot
& VM_PROT_WRITE
) != VM_PROT_NONE
);
3615 old_copy_object
= dst_page
->object
->copy
;
3618 * There exists the possiblity that the source and
3619 * destination page are the same. But we can't
3620 * easily determine that now. If they are the
3621 * same, the call to vm_fault_page() for the
3622 * destination page will deadlock. To prevent this we
3623 * wire the page so we can drop busy without having
3624 * the page daemon steal the page. We clean up the
3625 * top page but keep the paging reference on the object
3626 * holding the dest page so it doesn't go away.
3629 vm_page_lock_queues();
3630 vm_page_wire(dst_page
);
3631 vm_page_unlock_queues();
3632 PAGE_WAKEUP_DONE(dst_page
);
3633 vm_object_unlock(dst_page
->object
);
3635 if (dst_top_page
!= VM_PAGE_NULL
) {
3636 vm_object_lock(dst_object
);
3637 VM_PAGE_FREE(dst_top_page
);
3638 vm_object_paging_end(dst_object
);
3639 vm_object_unlock(dst_object
);
3644 if (src_object
== VM_OBJECT_NULL
) {
3646 * No source object. We will just
3647 * zero-fill the page in dst_object.
3649 src_page
= VM_PAGE_NULL
;
3650 result_page
= VM_PAGE_NULL
;
3652 vm_object_lock(src_object
);
3653 src_page
= vm_page_lookup(src_object
,
3654 trunc_page_64(src_offset
));
3655 if (src_page
== dst_page
) {
3656 src_prot
= dst_prot
;
3657 result_page
= VM_PAGE_NULL
;
3659 src_prot
= VM_PROT_READ
;
3660 vm_object_paging_begin(src_object
);
3663 "vm_fault_copy(2) -> vm_fault_page\n",
3665 switch (vm_fault_page(src_object
,
3666 trunc_page_64(src_offset
),
3672 VM_BEHAVIOR_SEQUENTIAL
,
3681 case VM_FAULT_SUCCESS
:
3683 case VM_FAULT_RETRY
:
3684 goto RetrySourceFault
;
3685 case VM_FAULT_MEMORY_SHORTAGE
:
3686 if (vm_page_wait(interruptible
))
3687 goto RetrySourceFault
;
3689 case VM_FAULT_INTERRUPTED
:
3690 vm_fault_copy_dst_cleanup(dst_page
);
3691 RETURN(MACH_SEND_INTERRUPTED
);
3692 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3693 vm_page_more_fictitious();
3694 goto RetrySourceFault
;
3695 case VM_FAULT_MEMORY_ERROR
:
3696 vm_fault_copy_dst_cleanup(dst_page
);
3700 return(KERN_MEMORY_ERROR
);
3704 assert((src_top_page
== VM_PAGE_NULL
) ==
3705 (result_page
->object
== src_object
));
3707 assert ((src_prot
& VM_PROT_READ
) != VM_PROT_NONE
);
3708 vm_object_unlock(result_page
->object
);
3711 if (!vm_map_verify(dst_map
, dst_version
)) {
3712 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3713 vm_fault_copy_cleanup(result_page
, src_top_page
);
3714 vm_fault_copy_dst_cleanup(dst_page
);
3718 vm_object_lock(dst_page
->object
);
3720 if (dst_page
->object
->copy
!= old_copy_object
) {
3721 vm_object_unlock(dst_page
->object
);
3722 vm_map_verify_done(dst_map
, dst_version
);
3723 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3724 vm_fault_copy_cleanup(result_page
, src_top_page
);
3725 vm_fault_copy_dst_cleanup(dst_page
);
3728 vm_object_unlock(dst_page
->object
);
3731 * Copy the page, and note that it is dirty
3735 if (!page_aligned(src_offset
) ||
3736 !page_aligned(dst_offset
) ||
3737 !page_aligned(amount_left
)) {
3739 vm_object_offset_t src_po
,
3742 src_po
= src_offset
- trunc_page_64(src_offset
);
3743 dst_po
= dst_offset
- trunc_page_64(dst_offset
);
3745 if (dst_po
> src_po
) {
3746 part_size
= PAGE_SIZE
- dst_po
;
3748 part_size
= PAGE_SIZE
- src_po
;
3750 if (part_size
> (amount_left
)){
3751 part_size
= amount_left
;
3754 if (result_page
== VM_PAGE_NULL
) {
3755 vm_page_part_zero_fill(dst_page
,
3758 vm_page_part_copy(result_page
, src_po
,
3759 dst_page
, dst_po
, part_size
);
3760 if(!dst_page
->dirty
){
3761 vm_object_lock(dst_object
);
3762 dst_page
->dirty
= TRUE
;
3763 vm_object_unlock(dst_page
->object
);
3768 part_size
= PAGE_SIZE
;
3770 if (result_page
== VM_PAGE_NULL
)
3771 vm_page_zero_fill(dst_page
);
3773 vm_page_copy(result_page
, dst_page
);
3774 if(!dst_page
->dirty
){
3775 vm_object_lock(dst_object
);
3776 dst_page
->dirty
= TRUE
;
3777 vm_object_unlock(dst_page
->object
);
3784 * Unlock everything, and return
3787 vm_map_verify_done(dst_map
, dst_version
);
3789 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3790 vm_fault_copy_cleanup(result_page
, src_top_page
);
3791 vm_fault_copy_dst_cleanup(dst_page
);
3793 amount_left
-= part_size
;
3794 src_offset
+= part_size
;
3795 dst_offset
+= part_size
;
3796 } while (amount_left
> 0);
3798 RETURN(KERN_SUCCESS
);
3807 * Routine: vm_fault_page_overwrite
3810 * A form of vm_fault_page that assumes that the
3811 * resulting page will be overwritten in its entirety,
3812 * making it unnecessary to obtain the correct *contents*
3816 * XXX Untested. Also unused. Eventually, this technology
3817 * could be used in vm_fault_copy() to advantage.
3820 vm_fault_page_overwrite(
3822 vm_object_t dst_object
,
3823 vm_object_offset_t dst_offset
,
3824 vm_page_t
*result_page
) /* OUT */
3828 kern_return_t wait_result
;
3830 #define interruptible THREAD_UNINT /* XXX */
3834 * Look for a page at this offset
3837 while ((dst_page
= vm_page_lookup(dst_object
, dst_offset
))
3840 * No page, no problem... just allocate one.
3843 dst_page
= vm_page_alloc(dst_object
, dst_offset
);
3844 if (dst_page
== VM_PAGE_NULL
) {
3845 vm_object_unlock(dst_object
);
3847 vm_object_lock(dst_object
);
3852 * Pretend that the memory manager
3853 * write-protected the page.
3855 * Note that we will be asking for write
3856 * permission without asking for the data
3860 dst_page
->overwriting
= TRUE
;
3861 dst_page
->page_lock
= VM_PROT_WRITE
;
3862 dst_page
->absent
= TRUE
;
3863 dst_page
->unusual
= TRUE
;
3864 dst_object
->absent_count
++;
3869 * When we bail out, we might have to throw
3870 * away the page created here.
3873 #define DISCARD_PAGE \
3875 vm_object_lock(dst_object); \
3876 dst_page = vm_page_lookup(dst_object, dst_offset); \
3877 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3878 VM_PAGE_FREE(dst_page); \
3879 vm_object_unlock(dst_object); \
3884 * If the page is write-protected...
3887 if (dst_page
->page_lock
& VM_PROT_WRITE
) {
3889 * ... and an unlock request hasn't been sent
3892 if ( ! (dst_page
->unlock_request
& VM_PROT_WRITE
)) {
3897 * ... then send one now.
3900 if (!dst_object
->pager_ready
) {
3901 wait_result
= vm_object_assert_wait(dst_object
,
3902 VM_OBJECT_EVENT_PAGER_READY
,
3904 vm_object_unlock(dst_object
);
3905 if (wait_result
== THREAD_WAITING
)
3906 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
3907 if (wait_result
!= THREAD_AWAKENED
) {
3909 return(VM_FAULT_INTERRUPTED
);
3914 u
= dst_page
->unlock_request
|= VM_PROT_WRITE
;
3915 vm_object_unlock(dst_object
);
3917 if ((rc
= memory_object_data_unlock(
3919 dst_offset
+ dst_object
->paging_offset
,
3921 u
)) != KERN_SUCCESS
) {
3923 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3925 return((rc
== MACH_SEND_INTERRUPTED
) ?
3926 VM_FAULT_INTERRUPTED
:
3927 VM_FAULT_MEMORY_ERROR
);
3929 vm_object_lock(dst_object
);
3933 /* ... fall through to wait below */
3936 * If the page isn't being used for other
3937 * purposes, then we're done.
3939 if ( ! (dst_page
->busy
|| dst_page
->absent
||
3940 dst_page
->error
|| dst_page
->restart
) )
3944 wait_result
= PAGE_ASSERT_WAIT(dst_page
, interruptible
);
3945 vm_object_unlock(dst_object
);
3946 if (wait_result
== THREAD_WAITING
)
3947 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
3948 if (wait_result
!= THREAD_AWAKENED
) {
3950 return(VM_FAULT_INTERRUPTED
);
3954 *result_page
= dst_page
;
3955 return(VM_FAULT_SUCCESS
);
3957 #undef interruptible
3963 #if VM_FAULT_CLASSIFY
3965 * Temporary statistics gathering support.
3969 * Statistics arrays:
3971 #define VM_FAULT_TYPES_MAX 5
3972 #define VM_FAULT_LEVEL_MAX 8
3974 int vm_fault_stats
[VM_FAULT_TYPES_MAX
][VM_FAULT_LEVEL_MAX
];
3976 #define VM_FAULT_TYPE_ZERO_FILL 0
3977 #define VM_FAULT_TYPE_MAP_IN 1
3978 #define VM_FAULT_TYPE_PAGER 2
3979 #define VM_FAULT_TYPE_COPY 3
3980 #define VM_FAULT_TYPE_OTHER 4
3984 vm_fault_classify(vm_object_t object
,
3985 vm_object_offset_t offset
,
3986 vm_prot_t fault_type
)
3988 int type
, level
= 0;
3992 m
= vm_page_lookup(object
, offset
);
3993 if (m
!= VM_PAGE_NULL
) {
3994 if (m
->busy
|| m
->error
|| m
->restart
|| m
->absent
||
3995 fault_type
& m
->page_lock
) {
3996 type
= VM_FAULT_TYPE_OTHER
;
3999 if (((fault_type
& VM_PROT_WRITE
) == 0) ||
4000 ((level
== 0) && object
->copy
== VM_OBJECT_NULL
)) {
4001 type
= VM_FAULT_TYPE_MAP_IN
;
4004 type
= VM_FAULT_TYPE_COPY
;
4008 if (object
->pager_created
) {
4009 type
= VM_FAULT_TYPE_PAGER
;
4012 if (object
->shadow
== VM_OBJECT_NULL
) {
4013 type
= VM_FAULT_TYPE_ZERO_FILL
;
4017 offset
+= object
->shadow_offset
;
4018 object
= object
->shadow
;
4024 if (level
> VM_FAULT_LEVEL_MAX
)
4025 level
= VM_FAULT_LEVEL_MAX
;
4027 vm_fault_stats
[type
][level
] += 1;
4032 /* cleanup routine to call from debugger */
4035 vm_fault_classify_init(void)
4039 for (type
= 0; type
< VM_FAULT_TYPES_MAX
; type
++) {
4040 for (level
= 0; level
< VM_FAULT_LEVEL_MAX
; level
++) {
4041 vm_fault_stats
[type
][level
] = 0;
4047 #endif /* VM_FAULT_CLASSIFY */