2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 * Carnegie Mellon requests users of this software to return to
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
56 * Page fault handling module.
59 /* remove after component interface available */
60 extern int vnode_pager_workaround
;
61 extern int device_pager_workaround
;
64 #include <mach_cluster_stats.h>
65 #include <mach_pagemap.h>
68 #include <vm/vm_fault.h>
69 #include <mach/kern_return.h>
70 #include <mach/message.h> /* for error codes */
71 #include <kern/host_statistics.h>
72 #include <kern/counters.h>
73 #include <kern/task.h>
74 #include <kern/thread.h>
75 #include <kern/sched_prim.h>
76 #include <kern/host.h>
78 #include <ppc/proc_reg.h>
79 #include <vm/task_working_set.h>
80 #include <vm/vm_map.h>
81 #include <vm/vm_object.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_kern.h>
85 #include <vm/vm_pageout.h>
86 #include <mach/vm_param.h>
87 #include <mach/vm_behavior.h>
88 #include <mach/memory_object.h>
89 /* For memory_object_data_{request,unlock} */
90 #include <kern/mach_param.h>
91 #include <kern/macro_help.h>
92 #include <kern/zalloc.h>
93 #include <kern/misc_protos.h>
95 #include <sys/kdebug.h>
97 #define VM_FAULT_CLASSIFY 0
98 #define VM_FAULT_STATIC_CONFIG 1
100 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
102 int vm_object_absent_max
= 50;
104 int vm_fault_debug
= 0;
106 #if !VM_FAULT_STATIC_CONFIG
107 boolean_t vm_fault_dirty_handling
= FALSE
;
108 boolean_t vm_fault_interruptible
= FALSE
;
109 boolean_t software_reference_bits
= TRUE
;
113 extern struct db_watchpoint
*db_watchpoint_list
;
114 #endif /* MACH_KDB */
116 /* Forward declarations of internal routines. */
117 extern kern_return_t
vm_fault_wire_fast(
120 vm_map_entry_t entry
,
122 vm_offset_t pmap_addr
);
124 extern void vm_fault_continue(void);
126 extern void vm_fault_copy_cleanup(
130 extern void vm_fault_copy_dst_cleanup(
133 #if VM_FAULT_CLASSIFY
134 extern void vm_fault_classify(vm_object_t object
,
135 vm_object_offset_t offset
,
136 vm_prot_t fault_type
);
138 extern void vm_fault_classify_init(void);
142 * Routine: vm_fault_init
144 * Initialize our private data structures.
152 * Routine: vm_fault_cleanup
154 * Clean up the result of vm_fault_page.
156 * The paging reference for "object" is released.
157 * "object" is unlocked.
158 * If "top_page" is not null, "top_page" is
159 * freed and the paging reference for the object
160 * containing it is released.
163 * "object" must be locked.
167 register vm_object_t object
,
168 register vm_page_t top_page
)
170 vm_object_paging_end(object
);
171 vm_object_unlock(object
);
173 if (top_page
!= VM_PAGE_NULL
) {
174 object
= top_page
->object
;
175 vm_object_lock(object
);
176 VM_PAGE_FREE(top_page
);
177 vm_object_paging_end(object
);
178 vm_object_unlock(object
);
182 #if MACH_CLUSTER_STATS
183 #define MAXCLUSTERPAGES 16
185 unsigned long pages_in_cluster
;
186 unsigned long pages_at_higher_offsets
;
187 unsigned long pages_at_lower_offsets
;
188 } cluster_stats_in
[MAXCLUSTERPAGES
];
189 #define CLUSTER_STAT(clause) clause
190 #define CLUSTER_STAT_HIGHER(x) \
191 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
192 #define CLUSTER_STAT_LOWER(x) \
193 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
194 #define CLUSTER_STAT_CLUSTER(x) \
195 ((cluster_stats_in[(x)].pages_in_cluster)++)
196 #else /* MACH_CLUSTER_STATS */
197 #define CLUSTER_STAT(clause)
198 #endif /* MACH_CLUSTER_STATS */
200 /* XXX - temporary */
201 boolean_t vm_allow_clustered_pagein
= FALSE
;
202 int vm_pagein_cluster_used
= 0;
204 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
207 boolean_t vm_page_deactivate_behind
= TRUE
;
209 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
211 int vm_default_ahead
= 0;
212 int vm_default_behind
= MAX_UPL_TRANSFER
;
215 * vm_page_deactivate_behind
217 * Determine if sequential access is in progress
218 * in accordance with the behavior specified. If
219 * so, compute a potential page to deactive and
222 * The object must be locked.
226 vm_fault_deactivate_behind(
229 vm_behavior_t behavior
)
234 dbgTrace(0xBEEF0018, (unsigned int) object
, (unsigned int) vm_fault_deactivate_behind
); /* (TEST/DEBUG) */
238 case VM_BEHAVIOR_RANDOM
:
239 object
->sequential
= PAGE_SIZE_64
;
242 case VM_BEHAVIOR_SEQUENTIAL
:
244 object
->last_alloc
== offset
- PAGE_SIZE_64
) {
245 object
->sequential
+= PAGE_SIZE_64
;
246 m
= vm_page_lookup(object
, offset
- PAGE_SIZE_64
);
248 object
->sequential
= PAGE_SIZE_64
; /* reset */
252 case VM_BEHAVIOR_RSEQNTL
:
253 if (object
->last_alloc
&&
254 object
->last_alloc
== offset
+ PAGE_SIZE_64
) {
255 object
->sequential
+= PAGE_SIZE_64
;
256 m
= vm_page_lookup(object
, offset
+ PAGE_SIZE_64
);
258 object
->sequential
= PAGE_SIZE_64
; /* reset */
262 case VM_BEHAVIOR_DEFAULT
:
265 object
->last_alloc
== offset
- PAGE_SIZE_64
) {
266 vm_object_offset_t behind
= vm_default_behind
* PAGE_SIZE_64
;
268 object
->sequential
+= PAGE_SIZE_64
;
269 m
= (offset
>= behind
&&
270 object
->sequential
>= behind
) ?
271 vm_page_lookup(object
, offset
- behind
) :
273 } else if (object
->last_alloc
&&
274 object
->last_alloc
== offset
+ PAGE_SIZE_64
) {
275 vm_object_offset_t behind
= vm_default_behind
* PAGE_SIZE_64
;
277 object
->sequential
+= PAGE_SIZE_64
;
278 m
= (offset
< -behind
&&
279 object
->sequential
>= behind
) ?
280 vm_page_lookup(object
, offset
+ behind
) :
283 object
->sequential
= PAGE_SIZE_64
;
289 object
->last_alloc
= offset
;
293 vm_page_lock_queues();
294 vm_page_deactivate(m
);
295 vm_page_unlock_queues();
297 dbgTrace(0xBEEF0019, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
307 * Routine: vm_fault_page
309 * Find the resident page for the virtual memory
310 * specified by the given virtual memory object
312 * Additional arguments:
313 * The required permissions for the page is given
314 * in "fault_type". Desired permissions are included
315 * in "protection". The minimum and maximum valid offsets
316 * within the object for the relevant map entry are
317 * passed in "lo_offset" and "hi_offset" respectively and
318 * the expected page reference pattern is passed in "behavior".
319 * These three parameters are used to determine pagein cluster
322 * If the desired page is known to be resident (for
323 * example, because it was previously wired down), asserting
324 * the "unwiring" parameter will speed the search.
326 * If the operation can be interrupted (by thread_abort
327 * or thread_terminate), then the "interruptible"
328 * parameter should be asserted.
331 * The page containing the proper data is returned
335 * The source object must be locked and referenced,
336 * and must donate one paging reference. The reference
337 * is not affected. The paging reference and lock are
340 * If the call succeeds, the object in which "result_page"
341 * resides is left locked and holding a paging reference.
342 * If this is not the original object, a busy page in the
343 * original object is returned in "top_page", to prevent other
344 * callers from pursuing this same data, along with a paging
345 * reference for the original object. The "top_page" should
346 * be destroyed when this guarantee is no longer required.
347 * The "result_page" is also left busy. It is not removed
348 * from the pageout queues.
354 vm_object_t first_object
, /* Object to begin search */
355 vm_object_offset_t first_offset
, /* Offset into object */
356 vm_prot_t fault_type
, /* What access is requested */
357 boolean_t must_be_resident
,/* Must page be resident? */
358 int interruptible
, /* how may fault be interrupted? */
359 vm_object_offset_t lo_offset
, /* Map entry start */
360 vm_object_offset_t hi_offset
, /* Map entry end */
361 vm_behavior_t behavior
, /* Page reference behavior */
362 /* Modifies in place: */
363 vm_prot_t
*protection
, /* Protection for mapping */
365 vm_page_t
*result_page
, /* Page found, if successful */
366 vm_page_t
*top_page
, /* Page in top object, if
367 * not result_page. */
368 int *type_of_fault
, /* if non-null, fill in with type of fault
369 * COW, zero-fill, etc... returned in trace point */
370 /* More arguments: */
371 kern_return_t
*error_code
, /* code if page is in error */
372 boolean_t no_zero_fill
, /* don't zero fill absent pages */
373 boolean_t data_supply
, /* treat as data_supply if
374 * it is a write fault and a full
375 * page is provided */
384 vm_object_offset_t offset
;
386 vm_object_t next_object
;
387 vm_object_t copy_object
;
388 boolean_t look_for_page
;
389 vm_prot_t access_required
= fault_type
;
390 vm_prot_t wants_copy_flag
;
391 vm_size_t cluster_size
, length
;
392 vm_object_offset_t cluster_offset
;
393 vm_object_offset_t cluster_start
, cluster_end
, paging_offset
;
394 vm_object_offset_t align_offset
;
395 CLUSTER_STAT(int pages_at_higher_offsets
;)
396 CLUSTER_STAT(int pages_at_lower_offsets
;)
397 kern_return_t wait_result
;
398 boolean_t interruptible_state
;
399 boolean_t bumped_pagein
= FALSE
;
404 * MACH page map - an optional optimization where a bit map is maintained
405 * by the VM subsystem for internal objects to indicate which pages of
406 * the object currently reside on backing store. This existence map
407 * duplicates information maintained by the vnode pager. It is
408 * created at the time of the first pageout against the object, i.e.
409 * at the same time pager for the object is created. The optimization
410 * is designed to eliminate pager interaction overhead, if it is
411 * 'known' that the page does not exist on backing store.
413 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
414 * either marked as paged out in the existence map for the object or no
415 * existence map exists for the object. LOOK_FOR() is one of the
416 * criteria in the decision to invoke the pager. It is also used as one
417 * of the criteria to terminate the scan for adjacent pages in a clustered
418 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
419 * permanent objects. Note also that if the pager for an internal object
420 * has not been created, the pager is not invoked regardless of the value
421 * of LOOK_FOR() and that clustered pagein scans are only done on an object
422 * for which a pager has been created.
424 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
425 * is marked as paged out in the existence map for the object. PAGED_OUT()
426 * PAGED_OUT() is used to determine if a page has already been pushed
427 * into a copy object in order to avoid a redundant page out operation.
429 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
430 != VM_EXTERNAL_STATE_ABSENT)
431 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
432 == VM_EXTERNAL_STATE_EXISTS)
433 #else /* MACH_PAGEMAP */
435 * If the MACH page map optimization is not enabled,
436 * LOOK_FOR() always evaluates to TRUE. The pager will always be
437 * invoked to resolve missing pages in an object, assuming the pager
438 * has been created for the object. In a clustered page operation, the
439 * absence of a page on backing backing store cannot be used to terminate
440 * a scan for adjacent pages since that information is available only in
441 * the pager. Hence pages that may not be paged out are potentially
442 * included in a clustered request. The vnode pager is coded to deal
443 * with any combination of absent/present pages in a clustered
444 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
445 * will always be invoked to push a dirty page into a copy object assuming
446 * a pager has been created. If the page has already been pushed, the
447 * pager will ingore the new request.
449 #define LOOK_FOR(o, f) TRUE
450 #define PAGED_OUT(o, f) FALSE
451 #endif /* MACH_PAGEMAP */
456 #define PREPARE_RELEASE_PAGE(m) \
458 vm_page_lock_queues(); \
461 #define DO_RELEASE_PAGE(m) \
463 PAGE_WAKEUP_DONE(m); \
464 if (!m->active && !m->inactive) \
465 vm_page_activate(m); \
466 vm_page_unlock_queues(); \
469 #define RELEASE_PAGE(m) \
471 PREPARE_RELEASE_PAGE(m); \
472 DO_RELEASE_PAGE(m); \
476 dbgTrace(0xBEEF0002, (unsigned int) first_object
, (unsigned int) first_offset
); /* (TEST/DEBUG) */
481 #if !VM_FAULT_STATIC_CONFIG
482 if (vm_fault_dirty_handling
485 * If there are watchpoints set, then
486 * we don't want to give away write permission
487 * on a read fault. Make the task write fault,
488 * so that the watchpoint code notices the access.
490 || db_watchpoint_list
491 #endif /* MACH_KDB */
494 * If we aren't asking for write permission,
495 * then don't give it away. We're using write
496 * faults to set the dirty bit.
498 if (!(fault_type
& VM_PROT_WRITE
))
499 *protection
&= ~VM_PROT_WRITE
;
502 if (!vm_fault_interruptible
)
503 interruptible
= THREAD_UNINT
;
504 #else /* STATIC_CONFIG */
507 * If there are watchpoints set, then
508 * we don't want to give away write permission
509 * on a read fault. Make the task write fault,
510 * so that the watchpoint code notices the access.
512 if (db_watchpoint_list
) {
514 * If we aren't asking for write permission,
515 * then don't give it away. We're using write
516 * faults to set the dirty bit.
518 if (!(fault_type
& VM_PROT_WRITE
))
519 *protection
&= ~VM_PROT_WRITE
;
522 #endif /* MACH_KDB */
523 #endif /* STATIC_CONFIG */
525 interruptible_state
= thread_interrupt_level(interruptible
);
528 * INVARIANTS (through entire routine):
530 * 1) At all times, we must either have the object
531 * lock or a busy page in some object to prevent
532 * some other thread from trying to bring in
535 * Note that we cannot hold any locks during the
536 * pager access or when waiting for memory, so
537 * we use a busy page then.
539 * Note also that we aren't as concerned about more than
540 * one thread attempting to memory_object_data_unlock
541 * the same page at once, so we don't hold the page
542 * as busy then, but do record the highest unlock
543 * value so far. [Unlock requests may also be delivered
546 * 2) To prevent another thread from racing us down the
547 * shadow chain and entering a new page in the top
548 * object before we do, we must keep a busy page in
549 * the top object while following the shadow chain.
551 * 3) We must increment paging_in_progress on any object
552 * for which we have a busy page
554 * 4) We leave busy pages on the pageout queues.
555 * If the pageout daemon comes across a busy page,
556 * it will remove the page from the pageout queues.
560 * Search for the page at object/offset.
563 object
= first_object
;
564 offset
= first_offset
;
565 first_m
= VM_PAGE_NULL
;
566 access_required
= fault_type
;
569 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
570 (integer_t
)object
, offset
, fault_type
, *protection
, 0);
573 * See whether this page is resident
578 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
580 if (!object
->alive
) {
581 vm_fault_cleanup(object
, first_m
);
582 thread_interrupt_level(interruptible_state
);
583 return(VM_FAULT_MEMORY_ERROR
);
585 m
= vm_page_lookup(object
, offset
);
587 dbgTrace(0xBEEF0004, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
589 if (m
!= VM_PAGE_NULL
) {
591 * If the page was pre-paged as part of a
592 * cluster, record the fact.
595 vm_pagein_cluster_used
++;
596 m
->clustered
= FALSE
;
600 * If the page is being brought in,
601 * wait for it and then retry.
603 * A possible optimization: if the page
604 * is known to be resident, we can ignore
605 * pages that are absent (regardless of
606 * whether they're busy).
611 dbgTrace(0xBEEF0005, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
613 wait_result
= PAGE_SLEEP(object
, m
, interruptible
);
615 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
616 (integer_t
)object
, offset
,
618 counter(c_vm_fault_page_block_busy_kernel
++);
620 if (wait_result
!= THREAD_AWAKENED
) {
621 vm_fault_cleanup(object
, first_m
);
622 thread_interrupt_level(interruptible_state
);
623 if (wait_result
== THREAD_RESTART
)
625 return(VM_FAULT_RETRY
);
629 return(VM_FAULT_INTERRUPTED
);
636 * If the page is in error, give up now.
641 dbgTrace(0xBEEF0006, (unsigned int) m
, (unsigned int) error_code
); /* (TEST/DEBUG) */
644 *error_code
= m
->page_error
;
646 vm_fault_cleanup(object
, first_m
);
647 thread_interrupt_level(interruptible_state
);
648 return(VM_FAULT_MEMORY_ERROR
);
652 * If the pager wants us to restart
653 * at the top of the chain,
654 * typically because it has moved the
655 * page to another pager, then do so.
660 dbgTrace(0xBEEF0007, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
663 vm_fault_cleanup(object
, first_m
);
664 thread_interrupt_level(interruptible_state
);
665 return(VM_FAULT_RETRY
);
669 * If the page isn't busy, but is absent,
670 * then it was deemed "unavailable".
675 * Remove the non-existent page (unless it's
676 * in the top object) and move on down to the
677 * next object (if there is one).
680 dbgTrace(0xBEEF0008, (unsigned int) m
, (unsigned int) object
->shadow
); /* (TEST/DEBUG) */
683 next_object
= object
->shadow
;
684 if (next_object
== VM_OBJECT_NULL
) {
687 assert(!must_be_resident
);
689 if (object
->shadow_severed
) {
692 thread_interrupt_level(interruptible_state
);
693 return VM_FAULT_MEMORY_ERROR
;
697 * Absent page at bottom of shadow
698 * chain; zero fill the page we left
699 * busy in the first object, and flush
700 * the absent page. But first we
701 * need to allocate a real page.
703 if (VM_PAGE_THROTTLED() ||
704 (real_m
= vm_page_grab())
708 thread_interrupt_level(
709 interruptible_state
);
711 VM_FAULT_MEMORY_SHORTAGE
);
715 * are we protecting the system from
716 * backing store exhaustion. If so
717 * sleep unless we are privileged.
720 if(vm_backing_store_low
) {
721 if(!(current_task()->priv_flags
722 & VM_BACKING_STORE_PRIV
)) {
723 assert_wait((event_t
)
724 &vm_backing_store_low
,
726 vm_fault_cleanup(object
,
728 thread_block((void(*)(void)) 0);
729 thread_interrupt_level(
730 interruptible_state
);
731 return(VM_FAULT_RETRY
);
737 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
738 (integer_t
)object
, offset
,
740 (integer_t
)first_object
, 0);
741 if (object
!= first_object
) {
743 vm_object_paging_end(object
);
744 vm_object_unlock(object
);
745 object
= first_object
;
746 offset
= first_offset
;
748 first_m
= VM_PAGE_NULL
;
749 vm_object_lock(object
);
753 assert(real_m
->busy
);
754 vm_page_insert(real_m
, object
, offset
);
758 * Drop the lock while zero filling
759 * page. Then break because this
760 * is the page we wanted. Checking
761 * the page lock is a waste of time;
762 * this page was either absent or
763 * newly allocated -- in both cases
764 * it can't be page locked by a pager.
769 vm_object_unlock(object
);
770 vm_page_zero_fill(m
);
771 vm_object_lock(object
);
774 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
775 VM_STAT(zero_fill_count
++);
777 if (bumped_pagein
== TRUE
) {
779 current_task()->pageins
--;
782 pmap_clear_modify(m
->phys_page
);
784 vm_page_lock_queues();
785 VM_PAGE_QUEUES_REMOVE(m
);
786 m
->page_ticket
= vm_page_ticket
;
787 if(m
->object
->size
> 0x80000) {
789 /* depends on the queues lock */
791 queue_enter(&vm_page_queue_zf
,
792 m
, vm_page_t
, pageq
);
795 &vm_page_queue_inactive
,
796 m
, vm_page_t
, pageq
);
798 vm_page_ticket_roll
++;
799 if(vm_page_ticket_roll
==
800 VM_PAGE_TICKETS_IN_ROLL
) {
801 vm_page_ticket_roll
= 0;
803 VM_PAGE_TICKET_ROLL_IDS
)
809 vm_page_inactive_count
++;
810 vm_page_unlock_queues();
813 if (must_be_resident
) {
814 vm_object_paging_end(object
);
815 } else if (object
!= first_object
) {
816 vm_object_paging_end(object
);
822 vm_object_absent_release(object
);
825 vm_page_lock_queues();
826 VM_PAGE_QUEUES_REMOVE(m
);
827 vm_page_unlock_queues();
830 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
831 (integer_t
)object
, offset
,
832 (integer_t
)next_object
,
833 offset
+object
->shadow_offset
,0);
834 offset
+= object
->shadow_offset
;
835 hi_offset
+= object
->shadow_offset
;
836 lo_offset
+= object
->shadow_offset
;
837 access_required
= VM_PROT_READ
;
838 vm_object_lock(next_object
);
839 vm_object_unlock(object
);
840 object
= next_object
;
841 vm_object_paging_begin(object
);
847 && ((object
!= first_object
) ||
848 (object
->copy
!= VM_OBJECT_NULL
))
849 && (fault_type
& VM_PROT_WRITE
)) {
851 * This is a copy-on-write fault that will
852 * cause us to revoke access to this page, but
853 * this page is in the process of being cleaned
854 * in a clustered pageout. We must wait until
855 * the cleaning operation completes before
856 * revoking access to the original page,
857 * otherwise we might attempt to remove a
861 dbgTrace(0xBEEF0009, (unsigned int) m
, (unsigned int) offset
); /* (TEST/DEBUG) */
864 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
865 (integer_t
)object
, offset
,
867 /* take an extra ref so that object won't die */
868 assert(object
->ref_count
> 0);
870 vm_object_res_reference(object
);
871 vm_fault_cleanup(object
, first_m
);
872 counter(c_vm_fault_page_block_backoff_kernel
++);
873 vm_object_lock(object
);
874 assert(object
->ref_count
> 0);
875 m
= vm_page_lookup(object
, offset
);
876 if (m
!= VM_PAGE_NULL
&& m
->cleaning
) {
877 PAGE_ASSERT_WAIT(m
, interruptible
);
878 vm_object_unlock(object
);
879 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
880 vm_object_deallocate(object
);
883 vm_object_unlock(object
);
884 vm_object_deallocate(object
);
885 thread_interrupt_level(interruptible_state
);
886 return VM_FAULT_RETRY
;
891 * If the desired access to this page has
892 * been locked out, request that it be unlocked.
895 if (access_required
& m
->page_lock
) {
896 if ((access_required
& m
->unlock_request
) != access_required
) {
897 vm_prot_t new_unlock_request
;
901 dbgTrace(0xBEEF000A, (unsigned int) m
, (unsigned int) object
->pager_ready
); /* (TEST/DEBUG) */
903 if (!object
->pager_ready
) {
905 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
907 (integer_t
)object
, offset
,
909 /* take an extra ref */
910 assert(object
->ref_count
> 0);
912 vm_object_res_reference(object
);
913 vm_fault_cleanup(object
,
915 counter(c_vm_fault_page_block_backoff_kernel
++);
916 vm_object_lock(object
);
917 assert(object
->ref_count
> 0);
918 if (!object
->pager_ready
) {
919 wait_result
= vm_object_assert_wait(
921 VM_OBJECT_EVENT_PAGER_READY
,
923 vm_object_unlock(object
);
924 if (wait_result
== THREAD_WAITING
)
925 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
926 vm_object_deallocate(object
);
929 vm_object_unlock(object
);
930 vm_object_deallocate(object
);
931 thread_interrupt_level(interruptible_state
);
932 return VM_FAULT_RETRY
;
936 new_unlock_request
= m
->unlock_request
=
937 (access_required
| m
->unlock_request
);
938 vm_object_unlock(object
);
940 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
941 (integer_t
)object
, offset
,
942 (integer_t
)m
, new_unlock_request
, 0);
943 if ((rc
= memory_object_data_unlock(
945 offset
+ object
->paging_offset
,
950 printf("vm_fault: memory_object_data_unlock failed\n");
951 vm_object_lock(object
);
952 vm_fault_cleanup(object
, first_m
);
953 thread_interrupt_level(interruptible_state
);
954 return((rc
== MACH_SEND_INTERRUPTED
) ?
955 VM_FAULT_INTERRUPTED
:
956 VM_FAULT_MEMORY_ERROR
);
958 vm_object_lock(object
);
963 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
964 access_required
, (integer_t
)object
,
965 offset
, (integer_t
)m
, 0);
966 /* take an extra ref so object won't die */
967 assert(object
->ref_count
> 0);
969 vm_object_res_reference(object
);
970 vm_fault_cleanup(object
, first_m
);
971 counter(c_vm_fault_page_block_backoff_kernel
++);
972 vm_object_lock(object
);
973 assert(object
->ref_count
> 0);
974 m
= vm_page_lookup(object
, offset
);
975 if (m
!= VM_PAGE_NULL
&&
976 (access_required
& m
->page_lock
) &&
977 !((access_required
& m
->unlock_request
) != access_required
)) {
978 PAGE_ASSERT_WAIT(m
, interruptible
);
979 vm_object_unlock(object
);
980 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
981 vm_object_deallocate(object
);
984 vm_object_unlock(object
);
985 vm_object_deallocate(object
);
986 thread_interrupt_level(interruptible_state
);
987 return VM_FAULT_RETRY
;
991 * We mark the page busy and leave it on
992 * the pageout queues. If the pageout
993 * deamon comes across it, then it will
998 dbgTrace(0xBEEF000B, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1001 #if !VM_FAULT_STATIC_CONFIG
1002 if (!software_reference_bits
) {
1003 vm_page_lock_queues();
1005 vm_stat
.reactivations
++;
1007 VM_PAGE_QUEUES_REMOVE(m
);
1008 vm_page_unlock_queues();
1012 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
1013 (integer_t
)object
, offset
, (integer_t
)m
, 0, 0);
1021 (object
->pager_created
) &&
1022 LOOK_FOR(object
, offset
) &&
1026 dbgTrace(0xBEEF000C, (unsigned int) look_for_page
, (unsigned int) object
); /* (TEST/DEBUG) */
1028 if ((look_for_page
|| (object
== first_object
))
1029 && !must_be_resident
1030 && !(object
->phys_contiguous
)) {
1032 * Allocate a new page for this object/offset
1036 m
= vm_page_grab_fictitious();
1038 dbgTrace(0xBEEF000D, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
1040 if (m
== VM_PAGE_NULL
) {
1041 vm_fault_cleanup(object
, first_m
);
1042 thread_interrupt_level(interruptible_state
);
1043 return(VM_FAULT_FICTITIOUS_SHORTAGE
);
1045 vm_page_insert(m
, object
, offset
);
1048 if ((look_for_page
&& !must_be_resident
)) {
1052 * If the memory manager is not ready, we
1053 * cannot make requests.
1055 if (!object
->pager_ready
) {
1057 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
1059 if(m
!= VM_PAGE_NULL
)
1062 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
1063 (integer_t
)object
, offset
, 0, 0, 0);
1064 /* take an extra ref so object won't die */
1065 assert(object
->ref_count
> 0);
1066 object
->ref_count
++;
1067 vm_object_res_reference(object
);
1068 vm_fault_cleanup(object
, first_m
);
1069 counter(c_vm_fault_page_block_backoff_kernel
++);
1070 vm_object_lock(object
);
1071 assert(object
->ref_count
> 0);
1072 if (!object
->pager_ready
) {
1073 wait_result
= vm_object_assert_wait(object
,
1074 VM_OBJECT_EVENT_PAGER_READY
,
1076 vm_object_unlock(object
);
1077 if (wait_result
== THREAD_WAITING
)
1078 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1079 vm_object_deallocate(object
);
1082 vm_object_unlock(object
);
1083 vm_object_deallocate(object
);
1084 thread_interrupt_level(interruptible_state
);
1085 return VM_FAULT_RETRY
;
1089 if(object
->phys_contiguous
) {
1090 if(m
!= VM_PAGE_NULL
) {
1096 if (object
->internal
) {
1098 * Requests to the default pager
1099 * must reserve a real page in advance,
1100 * because the pager's data-provided
1101 * won't block for pages. IMPORTANT:
1102 * this acts as a throttling mechanism
1103 * for data_requests to the default
1108 dbgTrace(0xBEEF000F, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1110 if (m
->fictitious
&& !vm_page_convert(m
)) {
1112 vm_fault_cleanup(object
, first_m
);
1113 thread_interrupt_level(interruptible_state
);
1114 return(VM_FAULT_MEMORY_SHORTAGE
);
1116 } else if (object
->absent_count
>
1117 vm_object_absent_max
) {
1119 * If there are too many outstanding page
1120 * requests pending on this object, we
1121 * wait for them to be resolved now.
1125 dbgTrace(0xBEEF0010, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1127 if(m
!= VM_PAGE_NULL
)
1129 /* take an extra ref so object won't die */
1130 assert(object
->ref_count
> 0);
1131 object
->ref_count
++;
1132 vm_object_res_reference(object
);
1133 vm_fault_cleanup(object
, first_m
);
1134 counter(c_vm_fault_page_block_backoff_kernel
++);
1135 vm_object_lock(object
);
1136 assert(object
->ref_count
> 0);
1137 if (object
->absent_count
> vm_object_absent_max
) {
1138 vm_object_absent_assert_wait(object
,
1140 vm_object_unlock(object
);
1141 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1142 vm_object_deallocate(object
);
1145 vm_object_unlock(object
);
1146 vm_object_deallocate(object
);
1147 thread_interrupt_level(interruptible_state
);
1148 return VM_FAULT_RETRY
;
1153 * Indicate that the page is waiting for data
1154 * from the memory manager.
1157 if(m
!= VM_PAGE_NULL
) {
1159 m
->list_req_pending
= TRUE
;
1162 object
->absent_count
++;
1167 cluster_start
= offset
;
1171 * lengthen the cluster by the pages in the working set
1174 (current_task()->dynamic_working_set
!= 0)) {
1175 cluster_end
= cluster_start
+ length
;
1176 /* tws values for start and end are just a
1177 * suggestions. Therefore, as long as
1178 * build_cluster does not use pointers or
1179 * take action based on values that
1180 * could be affected by re-entrance we
1181 * do not need to take the map lock.
1183 cluster_end
= offset
+ PAGE_SIZE_64
;
1184 tws_build_cluster((tws_hash_t
)
1185 current_task()->dynamic_working_set
,
1186 object
, &cluster_start
,
1187 &cluster_end
, 0x40000);
1188 length
= cluster_end
- cluster_start
;
1191 dbgTrace(0xBEEF0012, (unsigned int) object
, (unsigned int) 0); /* (TEST/DEBUG) */
1194 * We have a busy page, so we can
1195 * release the object lock.
1197 vm_object_unlock(object
);
1200 * Call the memory manager to retrieve the data.
1204 *type_of_fault
= (length
<< 8) | DBG_PAGEIN_FAULT
;
1206 current_task()->pageins
++;
1207 bumped_pagein
= TRUE
;
1210 * If this object uses a copy_call strategy,
1211 * and we are interested in a copy of this object
1212 * (having gotten here only by following a
1213 * shadow chain), then tell the memory manager
1214 * via a flag added to the desired_access
1215 * parameter, so that it can detect a race
1216 * between our walking down the shadow chain
1217 * and its pushing pages up into a copy of
1218 * the object that it manages.
1221 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_CALL
&&
1222 object
!= first_object
) {
1223 wants_copy_flag
= VM_PROT_WANTS_COPY
;
1225 wants_copy_flag
= VM_PROT_NONE
;
1229 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1230 (integer_t
)object
, offset
, (integer_t
)m
,
1231 access_required
| wants_copy_flag
, 0);
1233 rc
= memory_object_data_request(object
->pager
,
1234 cluster_start
+ object
->paging_offset
,
1236 access_required
| wants_copy_flag
);
1240 dbgTrace(0xBEEF0013, (unsigned int) object
, (unsigned int) rc
); /* (TEST/DEBUG) */
1242 if (rc
!= KERN_SUCCESS
) {
1243 if (rc
!= MACH_SEND_INTERRUPTED
1245 printf("%s(0x%x, 0x%x, 0x%x, 0x%x) failed, rc=%d\n",
1246 "memory_object_data_request",
1248 cluster_start
+ object
->paging_offset
,
1249 length
, access_required
, rc
);
1251 * Don't want to leave a busy page around,
1252 * but the data request may have blocked,
1253 * so check if it's still there and busy.
1255 if(!object
->phys_contiguous
) {
1256 vm_object_lock(object
);
1257 for (; length
; length
-= PAGE_SIZE
,
1258 cluster_start
+= PAGE_SIZE_64
) {
1260 if ((p
= vm_page_lookup(object
,
1262 && p
->absent
&& p
->busy
1268 vm_fault_cleanup(object
, first_m
);
1269 thread_interrupt_level(interruptible_state
);
1270 return((rc
== MACH_SEND_INTERRUPTED
) ?
1271 VM_FAULT_INTERRUPTED
:
1272 VM_FAULT_MEMORY_ERROR
);
1275 tws_hash_line_t line
;
1278 task
= current_task();
1281 (task
->dynamic_working_set
!= 0))
1282 && !(object
->private)) {
1283 vm_object_t base_object
;
1284 vm_object_offset_t base_offset
;
1285 base_object
= object
;
1286 base_offset
= offset
;
1287 while(base_object
->shadow
) {
1289 base_object
->shadow_offset
;
1291 base_object
->shadow
;
1295 task
->dynamic_working_set
,
1296 base_offset
, base_object
,
1297 &line
) == KERN_SUCCESS
) {
1298 tws_line_signal((tws_hash_t
)
1299 task
->dynamic_working_set
,
1307 * Retry with same object/offset, since new data may
1308 * be in a different page (i.e., m is meaningless at
1311 vm_object_lock(object
);
1312 if ((interruptible
!= THREAD_UNINT
) &&
1313 (current_thread()->state
& TH_ABORT
)) {
1314 vm_fault_cleanup(object
, first_m
);
1315 thread_interrupt_level(interruptible_state
);
1316 return(VM_FAULT_INTERRUPTED
);
1318 if(m
== VM_PAGE_NULL
)
1324 * The only case in which we get here is if
1325 * object has no pager (or unwiring). If the pager doesn't
1326 * have the page this is handled in the m->absent case above
1327 * (and if you change things here you should look above).
1330 dbgTrace(0xBEEF0014, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1332 if (object
== first_object
)
1335 assert(m
== VM_PAGE_NULL
);
1338 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1339 (integer_t
)object
, offset
, (integer_t
)m
,
1340 (integer_t
)object
->shadow
, 0);
1342 * Move on to the next object. Lock the next
1343 * object before unlocking the current one.
1345 next_object
= object
->shadow
;
1346 if (next_object
== VM_OBJECT_NULL
) {
1347 assert(!must_be_resident
);
1349 * If there's no object left, fill the page
1350 * in the top object with zeros. But first we
1351 * need to allocate a real page.
1354 if (object
!= first_object
) {
1355 vm_object_paging_end(object
);
1356 vm_object_unlock(object
);
1358 object
= first_object
;
1359 offset
= first_offset
;
1360 vm_object_lock(object
);
1364 assert(m
->object
== object
);
1365 first_m
= VM_PAGE_NULL
;
1367 if(m
== VM_PAGE_NULL
) {
1369 if (m
== VM_PAGE_NULL
) {
1371 object
, VM_PAGE_NULL
);
1372 thread_interrupt_level(
1373 interruptible_state
);
1374 return(VM_FAULT_MEMORY_SHORTAGE
);
1380 if (object
->shadow_severed
) {
1382 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1383 thread_interrupt_level(interruptible_state
);
1384 return VM_FAULT_MEMORY_ERROR
;
1388 * are we protecting the system from
1389 * backing store exhaustion. If so
1390 * sleep unless we are privileged.
1393 if(vm_backing_store_low
) {
1394 if(!(current_task()->priv_flags
1395 & VM_BACKING_STORE_PRIV
)) {
1396 assert_wait((event_t
)
1397 &vm_backing_store_low
,
1400 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1401 thread_block((void (*)(void)) 0);
1402 thread_interrupt_level(
1403 interruptible_state
);
1404 return(VM_FAULT_RETRY
);
1408 if (VM_PAGE_THROTTLED() ||
1409 (m
->fictitious
&& !vm_page_convert(m
))) {
1411 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1412 thread_interrupt_level(interruptible_state
);
1413 return(VM_FAULT_MEMORY_SHORTAGE
);
1415 m
->no_isync
= FALSE
;
1417 if (!no_zero_fill
) {
1418 vm_object_unlock(object
);
1419 vm_page_zero_fill(m
);
1420 vm_object_lock(object
);
1423 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
1424 VM_STAT(zero_fill_count
++);
1426 if (bumped_pagein
== TRUE
) {
1428 current_task()->pageins
--;
1431 vm_page_lock_queues();
1432 VM_PAGE_QUEUES_REMOVE(m
);
1433 if(m
->object
->size
> 0x80000) {
1434 m
->zero_fill
= TRUE
;
1435 /* depends on the queues lock */
1437 queue_enter(&vm_page_queue_zf
,
1438 m
, vm_page_t
, pageq
);
1441 &vm_page_queue_inactive
,
1442 m
, vm_page_t
, pageq
);
1444 m
->page_ticket
= vm_page_ticket
;
1445 vm_page_ticket_roll
++;
1446 if(vm_page_ticket_roll
== VM_PAGE_TICKETS_IN_ROLL
) {
1447 vm_page_ticket_roll
= 0;
1448 if(vm_page_ticket
==
1449 VM_PAGE_TICKET_ROLL_IDS
)
1455 vm_page_inactive_count
++;
1456 vm_page_unlock_queues();
1458 pmap_clear_modify(m
->phys_page
);
1463 if ((object
!= first_object
) || must_be_resident
)
1464 vm_object_paging_end(object
);
1465 offset
+= object
->shadow_offset
;
1466 hi_offset
+= object
->shadow_offset
;
1467 lo_offset
+= object
->shadow_offset
;
1468 access_required
= VM_PROT_READ
;
1469 vm_object_lock(next_object
);
1470 vm_object_unlock(object
);
1471 object
= next_object
;
1472 vm_object_paging_begin(object
);
1477 * PAGE HAS BEEN FOUND.
1480 * busy, so that we can play with it;
1481 * not absent, so that nobody else will fill it;
1482 * possibly eligible for pageout;
1484 * The top-level page (first_m) is:
1485 * VM_PAGE_NULL if the page was found in the
1487 * busy, not absent, and ineligible for pageout.
1489 * The current object (object) is locked. A paging
1490 * reference is held for the current and top-level
1495 dbgTrace(0xBEEF0015, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1497 #if EXTRA_ASSERTIONS
1498 if(m
!= VM_PAGE_NULL
) {
1499 assert(m
->busy
&& !m
->absent
);
1500 assert((first_m
== VM_PAGE_NULL
) ||
1501 (first_m
->busy
&& !first_m
->absent
&&
1502 !first_m
->active
&& !first_m
->inactive
));
1504 #endif /* EXTRA_ASSERTIONS */
1507 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1508 (integer_t
)object
, offset
, (integer_t
)m
,
1509 (integer_t
)first_object
, (integer_t
)first_m
);
1511 * If the page is being written, but isn't
1512 * already owned by the top-level object,
1513 * we have to copy it into a new page owned
1514 * by the top-level object.
1517 if ((object
!= first_object
) && (m
!= VM_PAGE_NULL
)) {
1519 * We only really need to copy if we
1524 dbgTrace(0xBEEF0016, (unsigned int) object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1526 if (fault_type
& VM_PROT_WRITE
) {
1529 assert(!must_be_resident
);
1532 * are we protecting the system from
1533 * backing store exhaustion. If so
1534 * sleep unless we are privileged.
1537 if(vm_backing_store_low
) {
1538 if(!(current_task()->priv_flags
1539 & VM_BACKING_STORE_PRIV
)) {
1540 assert_wait((event_t
)
1541 &vm_backing_store_low
,
1544 vm_fault_cleanup(object
, first_m
);
1545 thread_block((void (*)(void)) 0);
1546 thread_interrupt_level(
1547 interruptible_state
);
1548 return(VM_FAULT_RETRY
);
1553 * If we try to collapse first_object at this
1554 * point, we may deadlock when we try to get
1555 * the lock on an intermediate object (since we
1556 * have the bottom object locked). We can't
1557 * unlock the bottom object, because the page
1558 * we found may move (by collapse) if we do.
1560 * Instead, we first copy the page. Then, when
1561 * we have no more use for the bottom object,
1562 * we unlock it and try to collapse.
1564 * Note that we copy the page even if we didn't
1565 * need to... that's the breaks.
1569 * Allocate a page for the copy
1571 copy_m
= vm_page_grab();
1572 if (copy_m
== VM_PAGE_NULL
) {
1574 vm_fault_cleanup(object
, first_m
);
1575 thread_interrupt_level(interruptible_state
);
1576 return(VM_FAULT_MEMORY_SHORTAGE
);
1581 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1582 (integer_t
)object
, offset
,
1583 (integer_t
)m
, (integer_t
)copy_m
, 0);
1584 vm_page_copy(m
, copy_m
);
1587 * If another map is truly sharing this
1588 * page with us, we have to flush all
1589 * uses of the original page, since we
1590 * can't distinguish those which want the
1591 * original from those which need the
1594 * XXXO If we know that only one map has
1595 * access to this page, then we could
1596 * avoid the pmap_page_protect() call.
1599 vm_page_lock_queues();
1600 assert(!m
->cleaning
);
1601 pmap_page_protect(m
->phys_page
, VM_PROT_NONE
);
1602 vm_page_deactivate(m
);
1603 copy_m
->dirty
= TRUE
;
1605 * Setting reference here prevents this fault from
1606 * being counted as a (per-thread) reactivate as well
1607 * as a copy-on-write.
1609 first_m
->reference
= TRUE
;
1610 vm_page_unlock_queues();
1613 * We no longer need the old page or object.
1616 PAGE_WAKEUP_DONE(m
);
1617 vm_object_paging_end(object
);
1618 vm_object_unlock(object
);
1621 *type_of_fault
= DBG_COW_FAULT
;
1622 VM_STAT(cow_faults
++);
1623 current_task()->cow_faults
++;
1624 object
= first_object
;
1625 offset
= first_offset
;
1627 vm_object_lock(object
);
1628 VM_PAGE_FREE(first_m
);
1629 first_m
= VM_PAGE_NULL
;
1630 assert(copy_m
->busy
);
1631 vm_page_insert(copy_m
, object
, offset
);
1635 * Now that we've gotten the copy out of the
1636 * way, let's try to collapse the top object.
1637 * But we have to play ugly games with
1638 * paging_in_progress to do that...
1641 vm_object_paging_end(object
);
1642 vm_object_collapse(object
, offset
);
1643 vm_object_paging_begin(object
);
1647 *protection
&= (~VM_PROT_WRITE
);
1652 * Now check whether the page needs to be pushed into the
1653 * copy object. The use of asymmetric copy on write for
1654 * shared temporary objects means that we may do two copies to
1655 * satisfy the fault; one above to get the page from a
1656 * shadowed object, and one here to push it into the copy.
1659 while ((copy_object
= first_object
->copy
) != VM_OBJECT_NULL
&&
1660 (m
!= VM_PAGE_NULL
)) {
1661 vm_object_offset_t copy_offset
;
1665 dbgTrace(0xBEEF0017, (unsigned int) copy_object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1668 * If the page is being written, but hasn't been
1669 * copied to the copy-object, we have to copy it there.
1672 if ((fault_type
& VM_PROT_WRITE
) == 0) {
1673 *protection
&= ~VM_PROT_WRITE
;
1678 * If the page was guaranteed to be resident,
1679 * we must have already performed the copy.
1682 if (must_be_resident
)
1686 * Try to get the lock on the copy_object.
1688 if (!vm_object_lock_try(copy_object
)) {
1689 vm_object_unlock(object
);
1691 mutex_pause(); /* wait a bit */
1693 vm_object_lock(object
);
1698 * Make another reference to the copy-object,
1699 * to keep it from disappearing during the
1702 assert(copy_object
->ref_count
> 0);
1703 copy_object
->ref_count
++;
1704 VM_OBJ_RES_INCR(copy_object
);
1707 * Does the page exist in the copy?
1709 copy_offset
= first_offset
- copy_object
->shadow_offset
;
1710 if (copy_object
->size
<= copy_offset
)
1712 * Copy object doesn't cover this page -- do nothing.
1716 vm_page_lookup(copy_object
, copy_offset
)) != VM_PAGE_NULL
) {
1717 /* Page currently exists in the copy object */
1720 * If the page is being brought
1721 * in, wait for it and then retry.
1724 /* take an extra ref so object won't die */
1725 assert(copy_object
->ref_count
> 0);
1726 copy_object
->ref_count
++;
1727 vm_object_res_reference(copy_object
);
1728 vm_object_unlock(copy_object
);
1729 vm_fault_cleanup(object
, first_m
);
1730 counter(c_vm_fault_page_block_backoff_kernel
++);
1731 vm_object_lock(copy_object
);
1732 assert(copy_object
->ref_count
> 0);
1733 VM_OBJ_RES_DECR(copy_object
);
1734 copy_object
->ref_count
--;
1735 assert(copy_object
->ref_count
> 0);
1736 copy_m
= vm_page_lookup(copy_object
, copy_offset
);
1737 if (copy_m
!= VM_PAGE_NULL
&& copy_m
->busy
) {
1738 PAGE_ASSERT_WAIT(copy_m
, interruptible
);
1739 vm_object_unlock(copy_object
);
1740 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1741 vm_object_deallocate(copy_object
);
1744 vm_object_unlock(copy_object
);
1745 vm_object_deallocate(copy_object
);
1746 thread_interrupt_level(interruptible_state
);
1747 return VM_FAULT_RETRY
;
1751 else if (!PAGED_OUT(copy_object
, copy_offset
)) {
1753 * If PAGED_OUT is TRUE, then the page used to exist
1754 * in the copy-object, and has already been paged out.
1755 * We don't need to repeat this. If PAGED_OUT is
1756 * FALSE, then either we don't know (!pager_created,
1757 * for example) or it hasn't been paged out.
1758 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1759 * We must copy the page to the copy object.
1763 * are we protecting the system from
1764 * backing store exhaustion. If so
1765 * sleep unless we are privileged.
1768 if(vm_backing_store_low
) {
1769 if(!(current_task()->priv_flags
1770 & VM_BACKING_STORE_PRIV
)) {
1771 assert_wait((event_t
)
1772 &vm_backing_store_low
,
1775 VM_OBJ_RES_DECR(copy_object
);
1776 copy_object
->ref_count
--;
1777 assert(copy_object
->ref_count
> 0);
1778 vm_object_unlock(copy_object
);
1779 vm_fault_cleanup(object
, first_m
);
1780 thread_block((void (*)(void)) 0);
1781 thread_interrupt_level(
1782 interruptible_state
);
1783 return(VM_FAULT_RETRY
);
1788 * Allocate a page for the copy
1790 copy_m
= vm_page_alloc(copy_object
, copy_offset
);
1791 if (copy_m
== VM_PAGE_NULL
) {
1793 VM_OBJ_RES_DECR(copy_object
);
1794 copy_object
->ref_count
--;
1795 assert(copy_object
->ref_count
> 0);
1796 vm_object_unlock(copy_object
);
1797 vm_fault_cleanup(object
, first_m
);
1798 thread_interrupt_level(interruptible_state
);
1799 return(VM_FAULT_MEMORY_SHORTAGE
);
1803 * Must copy page into copy-object.
1806 vm_page_copy(m
, copy_m
);
1809 * If the old page was in use by any users
1810 * of the copy-object, it must be removed
1811 * from all pmaps. (We can't know which
1815 vm_page_lock_queues();
1816 assert(!m
->cleaning
);
1817 pmap_page_protect(m
->phys_page
, VM_PROT_NONE
);
1818 copy_m
->dirty
= TRUE
;
1819 vm_page_unlock_queues();
1822 * If there's a pager, then immediately
1823 * page out this page, using the "initialize"
1824 * option. Else, we use the copy.
1829 ((!copy_object
->pager_created
) ||
1830 vm_external_state_get(
1831 copy_object
->existence_map
, copy_offset
)
1832 == VM_EXTERNAL_STATE_ABSENT
)
1834 (!copy_object
->pager_created
)
1837 vm_page_lock_queues();
1838 vm_page_activate(copy_m
);
1839 vm_page_unlock_queues();
1840 PAGE_WAKEUP_DONE(copy_m
);
1843 assert(copy_m
->busy
== TRUE
);
1846 * The page is already ready for pageout:
1847 * not on pageout queues and busy.
1848 * Unlock everything except the
1849 * copy_object itself.
1852 vm_object_unlock(object
);
1855 * Write the page to the copy-object,
1856 * flushing it from the kernel.
1859 vm_pageout_initialize_page(copy_m
);
1862 * Since the pageout may have
1863 * temporarily dropped the
1864 * copy_object's lock, we
1865 * check whether we'll have
1866 * to deallocate the hard way.
1869 if ((copy_object
->shadow
!= object
) ||
1870 (copy_object
->ref_count
== 1)) {
1871 vm_object_unlock(copy_object
);
1872 vm_object_deallocate(copy_object
);
1873 vm_object_lock(object
);
1878 * Pick back up the old object's
1879 * lock. [It is safe to do so,
1880 * since it must be deeper in the
1884 vm_object_lock(object
);
1888 * Because we're pushing a page upward
1889 * in the object tree, we must restart
1890 * any faults that are waiting here.
1891 * [Note that this is an expansion of
1892 * PAGE_WAKEUP that uses the THREAD_RESTART
1893 * wait result]. Can't turn off the page's
1894 * busy bit because we're not done with it.
1899 thread_wakeup_with_result((event_t
) m
,
1905 * The reference count on copy_object must be
1906 * at least 2: one for our extra reference,
1907 * and at least one from the outside world
1908 * (we checked that when we last locked
1911 copy_object
->ref_count
--;
1912 assert(copy_object
->ref_count
> 0);
1913 VM_OBJ_RES_DECR(copy_object
);
1914 vm_object_unlock(copy_object
);
1920 *top_page
= first_m
;
1923 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1924 (integer_t
)object
, offset
, (integer_t
)m
, (integer_t
)first_m
, 0);
1926 * If the page can be written, assume that it will be.
1927 * [Earlier, we restrict the permission to allow write
1928 * access only if the fault so required, so we don't
1929 * mark read-only data as dirty.]
1933 if(m
!= VM_PAGE_NULL
) {
1934 #if !VM_FAULT_STATIC_CONFIG
1935 if (vm_fault_dirty_handling
&& (*protection
& VM_PROT_WRITE
))
1938 if (vm_page_deactivate_behind
)
1939 vm_fault_deactivate_behind(object
, offset
, behavior
);
1941 vm_object_unlock(object
);
1943 thread_interrupt_level(interruptible_state
);
1946 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS
, 0); /* (TEST/DEBUG) */
1948 return(VM_FAULT_SUCCESS
);
1952 vm_fault_cleanup(object
, first_m
);
1954 counter(c_vm_fault_page_block_backoff_kernel
++);
1955 thread_block(THREAD_CONTINUE_NULL
);
1959 thread_interrupt_level(interruptible_state
);
1960 if (wait_result
== THREAD_INTERRUPTED
)
1961 return VM_FAULT_INTERRUPTED
;
1962 return VM_FAULT_RETRY
;
1968 * Routine: vm_fault_tws_insert
1970 * Add fault information to the task working set.
1972 * We always insert the base object/offset pair
1973 * rather the actual object/offset.
1975 * Map and pmap_map locked.
1976 * Object locked and referenced.
1978 * TRUE if startup file should be written.
1979 * With object locked and still referenced.
1980 * But we may drop the object lock temporarily.
1983 vm_fault_tws_insert(
1988 vm_object_offset_t offset
)
1990 tws_hash_line_t line
;
1993 boolean_t result
= FALSE
;
1994 extern vm_map_t kalloc_map
;
1996 /* Avoid possible map lock deadlock issues */
1997 if (map
== kernel_map
|| map
== kalloc_map
||
1998 pmap_map
== kernel_map
|| pmap_map
== kalloc_map
)
2001 task
= current_task();
2002 if (task
->dynamic_working_set
!= 0) {
2003 vm_object_t base_object
;
2004 vm_object_t base_shadow
;
2005 vm_object_offset_t base_offset
;
2006 base_object
= object
;
2007 base_offset
= offset
;
2008 while(base_shadow
= base_object
->shadow
) {
2009 vm_object_lock(base_shadow
);
2010 vm_object_unlock(base_object
);
2012 base_object
->shadow_offset
;
2013 base_object
= base_shadow
;
2015 kr
= tws_lookup((tws_hash_t
)
2016 task
->dynamic_working_set
,
2017 base_offset
, base_object
,
2019 if (kr
== KERN_OPERATION_TIMED_OUT
){
2021 if (base_object
!= object
) {
2022 vm_object_unlock(base_object
);
2023 vm_object_lock(object
);
2025 } else if (kr
!= KERN_SUCCESS
) {
2026 if(base_object
!= object
)
2027 vm_object_reference_locked(base_object
);
2028 kr
= tws_insert((tws_hash_t
)
2029 task
->dynamic_working_set
,
2030 base_offset
, base_object
,
2032 if(base_object
!= object
) {
2033 vm_object_unlock(base_object
);
2034 vm_object_deallocate(base_object
);
2036 if(kr
== KERN_NO_SPACE
) {
2037 if (base_object
== object
)
2038 vm_object_unlock(object
);
2039 tws_expand_working_set(
2040 task
->dynamic_working_set
,
2041 TWS_HASH_LINE_COUNT
,
2043 if (base_object
== object
)
2044 vm_object_lock(object
);
2045 } else if(kr
== KERN_OPERATION_TIMED_OUT
) {
2048 if(base_object
!= object
)
2049 vm_object_lock(object
);
2050 } else if (base_object
!= object
) {
2051 vm_object_unlock(base_object
);
2052 vm_object_lock(object
);
2061 * Handle page faults, including pseudo-faults
2062 * used to change the wiring status of pages.
2064 * Explicit continuations have been removed.
2066 * vm_fault and vm_fault_page save mucho state
2067 * in the moral equivalent of a closure. The state
2068 * structure is allocated when first entering vm_fault
2069 * and deallocated when leaving vm_fault.
2076 vm_prot_t fault_type
,
2077 boolean_t change_wiring
,
2080 vm_offset_t caller_pmap_addr
)
2082 vm_map_version_t version
; /* Map version for verificiation */
2083 boolean_t wired
; /* Should mapping be wired down? */
2084 vm_object_t object
; /* Top-level object */
2085 vm_object_offset_t offset
; /* Top-level offset */
2086 vm_prot_t prot
; /* Protection for mapping */
2087 vm_behavior_t behavior
; /* Expected paging behavior */
2088 vm_object_offset_t lo_offset
, hi_offset
;
2089 vm_object_t old_copy_object
; /* Saved copy object */
2090 vm_page_t result_page
; /* Result of vm_fault_page */
2091 vm_page_t top_page
; /* Placeholder page */
2095 vm_page_t m
; /* Fast access to result_page */
2096 kern_return_t error_code
; /* page error reasons */
2098 vm_object_t cur_object
;
2100 vm_object_offset_t cur_offset
;
2102 vm_object_t new_object
;
2104 vm_map_t pmap_map
= map
;
2105 vm_map_t original_map
= map
;
2107 boolean_t funnel_set
= FALSE
;
2109 thread_t cur_thread
;
2110 boolean_t interruptible_state
;
2111 unsigned int cache_attr
;
2112 int write_startup_file
= 0;
2113 vm_prot_t full_fault_type
;
2115 if (get_preemption_level() != 0)
2116 return (KERN_FAILURE
);
2118 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_START
,
2125 /* at present we do not fully check for execute permission */
2126 /* we generally treat it is read except in certain device */
2127 /* memory settings */
2128 full_fault_type
= fault_type
;
2129 if(fault_type
& VM_PROT_EXECUTE
) {
2130 fault_type
&= ~VM_PROT_EXECUTE
;
2131 fault_type
|= VM_PROT_READ
;
2134 interruptible_state
= thread_interrupt_level(interruptible
);
2137 * assume we will hit a page in the cache
2138 * otherwise, explicitly override with
2139 * the real fault type once we determine it
2141 type_of_fault
= DBG_CACHE_HIT_FAULT
;
2144 current_task()->faults
++;
2147 * drop funnel if it is already held. Then restore while returning
2149 cur_thread
= current_thread();
2151 if ((cur_thread
->funnel_state
& TH_FN_OWNED
) == TH_FN_OWNED
) {
2153 curflock
= cur_thread
->funnel_lock
;
2154 thread_funnel_set( curflock
, FALSE
);
2160 * Find the backing store object and offset into
2161 * it to begin the search.
2164 vm_map_lock_read(map
);
2165 kr
= vm_map_lookup_locked(&map
, vaddr
, fault_type
, &version
,
2168 &behavior
, &lo_offset
, &hi_offset
, &pmap_map
);
2170 pmap
= pmap_map
->pmap
;
2172 if (kr
!= KERN_SUCCESS
) {
2173 vm_map_unlock_read(map
);
2178 * If the page is wired, we must fault for the current protection
2179 * value, to avoid further faults.
2183 fault_type
= prot
| VM_PROT_WRITE
;
2185 #if VM_FAULT_CLASSIFY
2187 * Temporary data gathering code
2189 vm_fault_classify(object
, offset
, fault_type
);
2192 * Fast fault code. The basic idea is to do as much as
2193 * possible while holding the map lock and object locks.
2194 * Busy pages are not used until the object lock has to
2195 * be dropped to do something (copy, zero fill, pmap enter).
2196 * Similarly, paging references aren't acquired until that
2197 * point, and object references aren't used.
2199 * If we can figure out what to do
2200 * (zero fill, copy on write, pmap enter) while holding
2201 * the locks, then it gets done. Otherwise, we give up,
2202 * and use the original fault path (which doesn't hold
2203 * the map lock, and relies on busy pages).
2204 * The give up cases include:
2205 * - Have to talk to pager.
2206 * - Page is busy, absent or in error.
2207 * - Pager has locked out desired access.
2208 * - Fault needs to be restarted.
2209 * - Have to push page into copy object.
2211 * The code is an infinite loop that moves one level down
2212 * the shadow chain each time. cur_object and cur_offset
2213 * refer to the current object being examined. object and offset
2214 * are the original object from the map. The loop is at the
2215 * top level if and only if object and cur_object are the same.
2217 * Invariants: Map lock is held throughout. Lock is held on
2218 * original object and cur_object (if different) when
2219 * continuing or exiting loop.
2225 * If this page is to be inserted in a copy delay object
2226 * for writing, and if the object has a copy, then the
2227 * copy delay strategy is implemented in the slow fault page.
2229 if (object
->copy_strategy
!= MEMORY_OBJECT_COPY_DELAY
||
2230 object
->copy
== VM_OBJECT_NULL
||
2231 (fault_type
& VM_PROT_WRITE
) == 0) {
2232 cur_object
= object
;
2233 cur_offset
= offset
;
2236 m
= vm_page_lookup(cur_object
, cur_offset
);
2237 if (m
!= VM_PAGE_NULL
) {
2239 wait_result_t result
;
2241 if (object
!= cur_object
)
2242 vm_object_unlock(object
);
2244 vm_map_unlock_read(map
);
2245 if (pmap_map
!= map
)
2246 vm_map_unlock(pmap_map
);
2248 #if !VM_FAULT_STATIC_CONFIG
2249 if (!vm_fault_interruptible
)
2250 interruptible
= THREAD_UNINT
;
2252 result
= PAGE_ASSERT_WAIT(m
, interruptible
);
2254 vm_object_unlock(cur_object
);
2256 if (result
== THREAD_WAITING
) {
2257 result
= thread_block(THREAD_CONTINUE_NULL
);
2259 counter(c_vm_fault_page_block_busy_kernel
++);
2261 if (result
== THREAD_AWAKENED
|| result
== THREAD_RESTART
)
2267 if (m
->unusual
&& (m
->error
|| m
->restart
|| m
->private
2268 || m
->absent
|| (fault_type
& m
->page_lock
))) {
2271 * Unusual case. Give up.
2277 * Two cases of map in faults:
2278 * - At top level w/o copy object.
2279 * - Read fault anywhere.
2280 * --> must disallow write.
2283 if (object
== cur_object
&&
2284 object
->copy
== VM_OBJECT_NULL
)
2285 goto FastMapInFault
;
2287 if ((fault_type
& VM_PROT_WRITE
) == 0) {
2288 boolean_t sequential
;
2290 prot
&= ~VM_PROT_WRITE
;
2293 * Set up to map the page ...
2294 * mark the page busy, drop
2295 * locks and take a paging reference
2296 * on the object with the page.
2299 if (object
!= cur_object
) {
2300 vm_object_unlock(object
);
2301 object
= cur_object
;
2306 vm_object_paging_begin(object
);
2310 * Check a couple of global reasons to
2311 * be conservative about write access.
2312 * Then do the pmap_enter.
2314 #if !VM_FAULT_STATIC_CONFIG
2315 if (vm_fault_dirty_handling
2317 || db_watchpoint_list
2319 && (fault_type
& VM_PROT_WRITE
) == 0)
2320 prot
&= ~VM_PROT_WRITE
;
2321 #else /* STATIC_CONFIG */
2323 if (db_watchpoint_list
2324 && (fault_type
& VM_PROT_WRITE
) == 0)
2325 prot
&= ~VM_PROT_WRITE
;
2326 #endif /* MACH_KDB */
2327 #endif /* STATIC_CONFIG */
2328 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
2331 if (m
->no_isync
== TRUE
) {
2332 m
->no_isync
= FALSE
;
2333 pmap_sync_caches_phys(m
->phys_page
);
2334 if (type_of_fault
== DBG_CACHE_HIT_FAULT
) {
2336 * found it in the cache, but this
2337 * is the first fault-in of the page (no_isync == TRUE)
2338 * so it must have come in as part of
2339 * a cluster... account 1 pagein against it
2342 current_task()->pageins
++;
2343 type_of_fault
= DBG_PAGEIN_FAULT
;
2346 } else if (cache_attr
!= VM_WIMG_DEFAULT
) {
2347 pmap_sync_caches_phys(m
->phys_page
);
2351 PMAP_ENTER(caller_pmap
,
2352 caller_pmap_addr
, m
,
2353 prot
, cache_attr
, wired
);
2355 PMAP_ENTER(pmap
, vaddr
, m
,
2356 prot
, cache_attr
, wired
);
2360 * Hold queues lock to manipulate
2361 * the page queues. Change wiring
2362 * case is obvious. In soft ref bits
2363 * case activate page only if it fell
2364 * off paging queues, otherwise just
2365 * activate it if it's inactive.
2367 * NOTE: original vm_fault code will
2368 * move active page to back of active
2369 * queue. This code doesn't.
2371 vm_page_lock_queues();
2373 vm_pagein_cluster_used
++;
2374 m
->clustered
= FALSE
;
2376 m
->reference
= TRUE
;
2378 if (change_wiring
) {
2384 #if VM_FAULT_STATIC_CONFIG
2386 if (!m
->active
&& !m
->inactive
)
2387 vm_page_activate(m
);
2390 else if (software_reference_bits
) {
2391 if (!m
->active
&& !m
->inactive
)
2392 vm_page_activate(m
);
2394 else if (!m
->active
) {
2395 vm_page_activate(m
);
2398 vm_page_unlock_queues();
2401 * That's it, clean up and return.
2403 PAGE_WAKEUP_DONE(m
);
2405 sequential
= (sequential
&& vm_page_deactivate_behind
) ?
2406 vm_fault_deactivate_behind(object
, cur_offset
, behavior
) :
2410 * Add non-sequential pages to the working set.
2411 * The sequential pages will be brought in through
2412 * normal clustering behavior.
2414 if (!sequential
&& !object
->private) {
2415 write_startup_file
=
2416 vm_fault_tws_insert(map
, pmap_map
, vaddr
,
2417 object
, cur_offset
);
2420 vm_object_paging_end(object
);
2421 vm_object_unlock(object
);
2423 vm_map_unlock_read(map
);
2425 vm_map_unlock(pmap_map
);
2427 if(write_startup_file
)
2428 tws_send_startup_info(current_task());
2431 thread_funnel_set( curflock
, TRUE
);
2433 thread_interrupt_level(interruptible_state
);
2436 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
2438 type_of_fault
& 0xff,
2443 return KERN_SUCCESS
;
2447 * Copy on write fault. If objects match, then
2448 * object->copy must not be NULL (else control
2449 * would be in previous code block), and we
2450 * have a potential push into the copy object
2451 * with which we won't cope here.
2454 if (cur_object
== object
)
2457 * This is now a shadow based copy on write
2458 * fault -- it requires a copy up the shadow
2461 * Allocate a page in the original top level
2462 * object. Give up if allocate fails. Also
2463 * need to remember current page, as it's the
2464 * source of the copy.
2468 if (m
== VM_PAGE_NULL
) {
2472 * Now do the copy. Mark the source busy
2473 * and take out paging references on both
2476 * NOTE: This code holds the map lock across
2481 vm_page_copy(cur_m
, m
);
2482 vm_page_insert(m
, object
, offset
);
2484 vm_object_paging_begin(cur_object
);
2485 vm_object_paging_begin(object
);
2487 type_of_fault
= DBG_COW_FAULT
;
2488 VM_STAT(cow_faults
++);
2489 current_task()->cow_faults
++;
2492 * Now cope with the source page and object
2493 * If the top object has a ref count of 1
2494 * then no other map can access it, and hence
2495 * it's not necessary to do the pmap_page_protect.
2499 vm_page_lock_queues();
2500 vm_page_deactivate(cur_m
);
2502 pmap_page_protect(cur_m
->phys_page
,
2504 vm_page_unlock_queues();
2506 PAGE_WAKEUP_DONE(cur_m
);
2507 vm_object_paging_end(cur_object
);
2508 vm_object_unlock(cur_object
);
2511 * Slight hack to call vm_object collapse
2512 * and then reuse common map in code.
2513 * note that the object lock was taken above.
2516 vm_object_paging_end(object
);
2517 vm_object_collapse(object
, offset
);
2518 vm_object_paging_begin(object
);
2525 * No page at cur_object, cur_offset
2528 if (cur_object
->pager_created
) {
2531 * Have to talk to the pager. Give up.
2537 if (cur_object
->shadow
== VM_OBJECT_NULL
) {
2539 if (cur_object
->shadow_severed
) {
2540 vm_object_paging_end(object
);
2541 vm_object_unlock(object
);
2542 vm_map_unlock_read(map
);
2544 vm_map_unlock(pmap_map
);
2546 if(write_startup_file
)
2547 tws_send_startup_info(
2551 thread_funnel_set( curflock
, TRUE
);
2554 thread_interrupt_level(interruptible_state
);
2556 return VM_FAULT_MEMORY_ERROR
;
2560 * Zero fill fault. Page gets
2561 * filled in top object. Insert
2562 * page, then drop any lower lock.
2563 * Give up if no page.
2565 if (VM_PAGE_THROTTLED()) {
2570 * are we protecting the system from
2571 * backing store exhaustion. If so
2572 * sleep unless we are privileged.
2574 if(vm_backing_store_low
) {
2575 if(!(current_task()->priv_flags
2576 & VM_BACKING_STORE_PRIV
))
2579 m
= vm_page_alloc(object
, offset
);
2580 if (m
== VM_PAGE_NULL
) {
2584 * This is a zero-fill or initial fill
2585 * page fault. As such, we consider it
2586 * undefined with respect to instruction
2587 * execution. i.e. it is the responsibility
2588 * of higher layers to call for an instruction
2589 * sync after changing the contents and before
2590 * sending a program into this area. We
2591 * choose this approach for performance
2594 m
->no_isync
= FALSE
;
2596 if (cur_object
!= object
)
2597 vm_object_unlock(cur_object
);
2599 vm_object_paging_begin(object
);
2600 vm_object_unlock(object
);
2603 * Now zero fill page and map it.
2604 * the page is probably going to
2605 * be written soon, so don't bother
2606 * to clear the modified bit
2608 * NOTE: This code holds the map
2609 * lock across the zero fill.
2612 if (!map
->no_zero_fill
) {
2613 vm_page_zero_fill(m
);
2614 type_of_fault
= DBG_ZERO_FILL_FAULT
;
2615 VM_STAT(zero_fill_count
++);
2617 vm_page_lock_queues();
2618 VM_PAGE_QUEUES_REMOVE(m
);
2620 m
->page_ticket
= vm_page_ticket
;
2621 if(m
->object
->size
> 0x80000) {
2622 m
->zero_fill
= TRUE
;
2623 /* depends on the queues lock */
2625 queue_enter(&vm_page_queue_zf
,
2626 m
, vm_page_t
, pageq
);
2629 &vm_page_queue_inactive
,
2630 m
, vm_page_t
, pageq
);
2632 vm_page_ticket_roll
++;
2633 if(vm_page_ticket_roll
==
2634 VM_PAGE_TICKETS_IN_ROLL
) {
2635 vm_page_ticket_roll
= 0;
2636 if(vm_page_ticket
==
2637 VM_PAGE_TICKET_ROLL_IDS
)
2644 vm_page_inactive_count
++;
2645 vm_page_unlock_queues();
2646 vm_object_lock(object
);
2652 * On to the next level
2655 cur_offset
+= cur_object
->shadow_offset
;
2656 new_object
= cur_object
->shadow
;
2657 vm_object_lock(new_object
);
2658 if (cur_object
!= object
)
2659 vm_object_unlock(cur_object
);
2660 cur_object
= new_object
;
2667 * Cleanup from fast fault failure. Drop any object
2668 * lock other than original and drop map lock.
2671 if (object
!= cur_object
)
2672 vm_object_unlock(cur_object
);
2674 vm_map_unlock_read(map
);
2677 vm_map_unlock(pmap_map
);
2680 * Make a reference to this object to
2681 * prevent its disposal while we are messing with
2682 * it. Once we have the reference, the map is free
2683 * to be diddled. Since objects reference their
2684 * shadows (and copies), they will stay around as well.
2687 assert(object
->ref_count
> 0);
2688 object
->ref_count
++;
2689 vm_object_res_reference(object
);
2690 vm_object_paging_begin(object
);
2692 XPR(XPR_VM_FAULT
,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2694 if (!object
->private) {
2695 write_startup_file
=
2696 vm_fault_tws_insert(map
, pmap_map
, vaddr
, object
, offset
);
2699 kr
= vm_fault_page(object
, offset
, fault_type
,
2700 (change_wiring
&& !wired
),
2702 lo_offset
, hi_offset
, behavior
,
2703 &prot
, &result_page
, &top_page
,
2705 &error_code
, map
->no_zero_fill
, FALSE
, map
, vaddr
);
2708 * If we didn't succeed, lose the object reference immediately.
2711 if (kr
!= VM_FAULT_SUCCESS
)
2712 vm_object_deallocate(object
);
2715 * See why we failed, and take corrective action.
2719 case VM_FAULT_SUCCESS
:
2721 case VM_FAULT_MEMORY_SHORTAGE
:
2722 if (vm_page_wait((change_wiring
) ?
2727 case VM_FAULT_INTERRUPTED
:
2730 case VM_FAULT_RETRY
:
2732 case VM_FAULT_FICTITIOUS_SHORTAGE
:
2733 vm_page_more_fictitious();
2735 case VM_FAULT_MEMORY_ERROR
:
2739 kr
= KERN_MEMORY_ERROR
;
2745 if(m
!= VM_PAGE_NULL
) {
2746 assert((change_wiring
&& !wired
) ?
2747 (top_page
== VM_PAGE_NULL
) :
2748 ((top_page
== VM_PAGE_NULL
) == (m
->object
== object
)));
2752 * How to clean up the result of vm_fault_page. This
2753 * happens whether the mapping is entered or not.
2756 #define UNLOCK_AND_DEALLOCATE \
2758 vm_fault_cleanup(m->object, top_page); \
2759 vm_object_deallocate(object); \
2763 * What to do with the resulting page from vm_fault_page
2764 * if it doesn't get entered into the physical map:
2767 #define RELEASE_PAGE(m) \
2769 PAGE_WAKEUP_DONE(m); \
2770 vm_page_lock_queues(); \
2771 if (!m->active && !m->inactive) \
2772 vm_page_activate(m); \
2773 vm_page_unlock_queues(); \
2777 * We must verify that the maps have not changed
2778 * since our last lookup.
2781 if(m
!= VM_PAGE_NULL
) {
2782 old_copy_object
= m
->object
->copy
;
2783 vm_object_unlock(m
->object
);
2785 old_copy_object
= VM_OBJECT_NULL
;
2787 if ((map
!= original_map
) || !vm_map_verify(map
, &version
)) {
2788 vm_object_t retry_object
;
2789 vm_object_offset_t retry_offset
;
2790 vm_prot_t retry_prot
;
2793 * To avoid trying to write_lock the map while another
2794 * thread has it read_locked (in vm_map_pageable), we
2795 * do not try for write permission. If the page is
2796 * still writable, we will get write permission. If it
2797 * is not, or has been marked needs_copy, we enter the
2798 * mapping without write permission, and will merely
2799 * take another fault.
2802 vm_map_lock_read(map
);
2803 kr
= vm_map_lookup_locked(&map
, vaddr
,
2804 fault_type
& ~VM_PROT_WRITE
, &version
,
2805 &retry_object
, &retry_offset
, &retry_prot
,
2806 &wired
, &behavior
, &lo_offset
, &hi_offset
,
2808 pmap
= pmap_map
->pmap
;
2810 if (kr
!= KERN_SUCCESS
) {
2811 vm_map_unlock_read(map
);
2812 if(m
!= VM_PAGE_NULL
) {
2813 vm_object_lock(m
->object
);
2815 UNLOCK_AND_DEALLOCATE
;
2817 vm_object_deallocate(object
);
2822 vm_object_unlock(retry_object
);
2823 if(m
!= VM_PAGE_NULL
) {
2824 vm_object_lock(m
->object
);
2826 vm_object_lock(object
);
2829 if ((retry_object
!= object
) ||
2830 (retry_offset
!= offset
)) {
2831 vm_map_unlock_read(map
);
2833 vm_map_unlock(pmap_map
);
2834 if(m
!= VM_PAGE_NULL
) {
2836 UNLOCK_AND_DEALLOCATE
;
2838 vm_object_deallocate(object
);
2844 * Check whether the protection has changed or the object
2845 * has been copied while we left the map unlocked.
2848 if(m
!= VM_PAGE_NULL
) {
2849 vm_object_unlock(m
->object
);
2851 vm_object_unlock(object
);
2854 if(m
!= VM_PAGE_NULL
) {
2855 vm_object_lock(m
->object
);
2857 vm_object_lock(object
);
2861 * If the copy object changed while the top-level object
2862 * was unlocked, then we must take away write permission.
2865 if(m
!= VM_PAGE_NULL
) {
2866 if (m
->object
->copy
!= old_copy_object
)
2867 prot
&= ~VM_PROT_WRITE
;
2871 * If we want to wire down this page, but no longer have
2872 * adequate permissions, we must start all over.
2875 if (wired
&& (fault_type
!= (prot
|VM_PROT_WRITE
))) {
2876 vm_map_verify_done(map
, &version
);
2878 vm_map_unlock(pmap_map
);
2879 if(m
!= VM_PAGE_NULL
) {
2881 UNLOCK_AND_DEALLOCATE
;
2883 vm_object_deallocate(object
);
2889 * Put this page into the physical map.
2890 * We had to do the unlock above because pmap_enter
2891 * may cause other faults. The page may be on
2892 * the pageout queues. If the pageout daemon comes
2893 * across the page, it will remove it from the queues.
2895 if (m
!= VM_PAGE_NULL
) {
2896 if (m
->no_isync
== TRUE
) {
2897 pmap_sync_caches_phys(m
->phys_page
);
2899 if (type_of_fault
== DBG_CACHE_HIT_FAULT
) {
2901 * found it in the cache, but this
2902 * is the first fault-in of the page (no_isync == TRUE)
2903 * so it must have come in as part of
2904 * a cluster... account 1 pagein against it
2907 current_task()->pageins
++;
2909 type_of_fault
= DBG_PAGEIN_FAULT
;
2911 m
->no_isync
= FALSE
;
2913 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
2916 PMAP_ENTER(caller_pmap
,
2917 caller_pmap_addr
, m
,
2918 prot
, cache_attr
, wired
);
2920 PMAP_ENTER(pmap
, vaddr
, m
,
2921 prot
, cache_attr
, wired
);
2925 * Add working set information for private objects here.
2927 if (m
->object
->private) {
2928 write_startup_file
=
2929 vm_fault_tws_insert(map
, pmap_map
, vaddr
,
2930 m
->object
, m
->offset
);
2936 vm_map_entry_t entry
;
2938 vm_offset_t ldelta
, hdelta
;
2941 * do a pmap block mapping from the physical address
2945 /* While we do not worry about execution protection in */
2946 /* general, certian pages may have instruction execution */
2947 /* disallowed. We will check here, and if not allowed */
2948 /* to execute, we return with a protection failure. */
2950 if((full_fault_type
& VM_PROT_EXECUTE
) &&
2951 (pmap_canExecute((ppnum_t
)
2952 (object
->shadow_offset
>> 12)) < 1)) {
2954 vm_map_verify_done(map
, &version
);
2956 vm_map_unlock(pmap_map
);
2957 vm_fault_cleanup(object
, top_page
);
2958 vm_object_deallocate(object
);
2959 kr
= KERN_PROTECTION_FAILURE
;
2963 if(pmap_map
!= map
) {
2964 vm_map_unlock(pmap_map
);
2966 if (original_map
!= map
) {
2967 vm_map_unlock_read(map
);
2968 vm_map_lock_read(original_map
);
2974 hdelta
= 0xFFFFF000;
2975 ldelta
= 0xFFFFF000;
2978 while(vm_map_lookup_entry(map
, laddr
, &entry
)) {
2979 if(ldelta
> (laddr
- entry
->vme_start
))
2980 ldelta
= laddr
- entry
->vme_start
;
2981 if(hdelta
> (entry
->vme_end
- laddr
))
2982 hdelta
= entry
->vme_end
- laddr
;
2983 if(entry
->is_sub_map
) {
2985 laddr
= (laddr
- entry
->vme_start
)
2987 vm_map_lock_read(entry
->object
.sub_map
);
2989 vm_map_unlock_read(map
);
2990 if(entry
->use_pmap
) {
2991 vm_map_unlock_read(pmap_map
);
2992 pmap_map
= entry
->object
.sub_map
;
2994 map
= entry
->object
.sub_map
;
3001 if(vm_map_lookup_entry(map
, laddr
, &entry
) &&
3002 (entry
->object
.vm_object
!= NULL
) &&
3003 (entry
->object
.vm_object
== object
)) {
3007 /* Set up a block mapped area */
3008 pmap_map_block(caller_pmap
,
3009 (addr64_t
)(caller_pmap_addr
- ldelta
),
3011 (entry
->object
.vm_object
->shadow_offset
))
3013 (laddr
- entry
->vme_start
)
3015 ldelta
+ hdelta
, prot
,
3016 (VM_WIMG_MASK
& (int)object
->wimg_bits
), 0);
3018 /* Set up a block mapped area */
3019 pmap_map_block(pmap_map
->pmap
,
3020 (addr64_t
)(vaddr
- ldelta
),
3022 (entry
->object
.vm_object
->shadow_offset
))
3024 (laddr
- entry
->vme_start
) - ldelta
)>>12,
3025 ldelta
+ hdelta
, prot
,
3026 (VM_WIMG_MASK
& (int)object
->wimg_bits
), 0);
3032 pmap_enter(caller_pmap
, caller_pmap_addr
,
3033 object
->shadow_offset
>>12, prot
, 0, TRUE
);
3035 pmap_enter(pmap
, vaddr
,
3036 object
->shadow_offset
>>12, prot
, 0, TRUE
);
3045 * If the page is not wired down and isn't already
3046 * on a pageout queue, then put it where the
3047 * pageout daemon can find it.
3049 if(m
!= VM_PAGE_NULL
) {
3050 vm_page_lock_queues();
3052 if (change_wiring
) {
3058 #if VM_FAULT_STATIC_CONFIG
3060 if (!m
->active
&& !m
->inactive
)
3061 vm_page_activate(m
);
3062 m
->reference
= TRUE
;
3065 else if (software_reference_bits
) {
3066 if (!m
->active
&& !m
->inactive
)
3067 vm_page_activate(m
);
3068 m
->reference
= TRUE
;
3070 vm_page_activate(m
);
3073 vm_page_unlock_queues();
3077 * Unlock everything, and return
3080 vm_map_verify_done(map
, &version
);
3082 vm_map_unlock(pmap_map
);
3083 if(m
!= VM_PAGE_NULL
) {
3084 PAGE_WAKEUP_DONE(m
);
3085 UNLOCK_AND_DEALLOCATE
;
3087 vm_fault_cleanup(object
, top_page
);
3088 vm_object_deallocate(object
);
3092 #undef UNLOCK_AND_DEALLOCATE
3096 if(write_startup_file
)
3097 tws_send_startup_info(current_task());
3099 thread_funnel_set( curflock
, TRUE
);
3102 thread_interrupt_level(interruptible_state
);
3104 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
3106 type_of_fault
& 0xff,
3117 * Wire down a range of virtual addresses in a map.
3122 vm_map_entry_t entry
,
3124 vm_offset_t pmap_addr
)
3127 register vm_offset_t va
;
3128 register vm_offset_t end_addr
= entry
->vme_end
;
3129 register kern_return_t rc
;
3131 assert(entry
->in_transition
);
3133 if ((entry
->object
.vm_object
!= NULL
) &&
3134 !entry
->is_sub_map
&&
3135 entry
->object
.vm_object
->phys_contiguous
) {
3136 return KERN_SUCCESS
;
3140 * Inform the physical mapping system that the
3141 * range of addresses may not fault, so that
3142 * page tables and such can be locked down as well.
3145 pmap_pageable(pmap
, pmap_addr
,
3146 pmap_addr
+ (end_addr
- entry
->vme_start
), FALSE
);
3149 * We simulate a fault to get the page and enter it
3150 * in the physical map.
3153 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
3154 if ((rc
= vm_fault_wire_fast(
3155 map
, va
, entry
, pmap
,
3156 pmap_addr
+ (va
- entry
->vme_start
)
3157 )) != KERN_SUCCESS
) {
3158 rc
= vm_fault(map
, va
, VM_PROT_NONE
, TRUE
,
3159 (pmap
== kernel_pmap
) ?
3160 THREAD_UNINT
: THREAD_ABORTSAFE
,
3161 pmap
, pmap_addr
+ (va
- entry
->vme_start
));
3164 if (rc
!= KERN_SUCCESS
) {
3165 struct vm_map_entry tmp_entry
= *entry
;
3167 /* unwire wired pages */
3168 tmp_entry
.vme_end
= va
;
3169 vm_fault_unwire(map
,
3170 &tmp_entry
, FALSE
, pmap
, pmap_addr
);
3175 return KERN_SUCCESS
;
3181 * Unwire a range of virtual addresses in a map.
3186 vm_map_entry_t entry
,
3187 boolean_t deallocate
,
3189 vm_offset_t pmap_addr
)
3191 register vm_offset_t va
;
3192 register vm_offset_t end_addr
= entry
->vme_end
;
3195 object
= (entry
->is_sub_map
)
3196 ? VM_OBJECT_NULL
: entry
->object
.vm_object
;
3199 * Since the pages are wired down, we must be able to
3200 * get their mappings from the physical map system.
3203 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
3204 pmap_change_wiring(pmap
,
3205 pmap_addr
+ (va
- entry
->vme_start
), FALSE
);
3207 if (object
== VM_OBJECT_NULL
) {
3208 (void) vm_fault(map
, va
, VM_PROT_NONE
,
3209 TRUE
, THREAD_UNINT
, pmap
, pmap_addr
);
3210 } else if (object
->phys_contiguous
) {
3214 vm_page_t result_page
;
3216 vm_object_t result_object
;
3217 vm_fault_return_t result
;
3220 prot
= VM_PROT_NONE
;
3222 vm_object_lock(object
);
3223 vm_object_paging_begin(object
);
3225 "vm_fault_unwire -> vm_fault_page\n",
3227 result
= vm_fault_page(object
,
3229 (va
- entry
->vme_start
),
3235 - entry
->vme_start
),
3241 0, map
->no_zero_fill
,
3243 } while (result
== VM_FAULT_RETRY
);
3245 if (result
!= VM_FAULT_SUCCESS
)
3246 panic("vm_fault_unwire: failure");
3248 result_object
= result_page
->object
;
3250 assert(!result_page
->fictitious
);
3251 pmap_page_protect(result_page
->phys_page
,
3253 VM_PAGE_FREE(result_page
);
3255 vm_page_lock_queues();
3256 vm_page_unwire(result_page
);
3257 vm_page_unlock_queues();
3258 PAGE_WAKEUP_DONE(result_page
);
3261 vm_fault_cleanup(result_object
, top_page
);
3266 * Inform the physical mapping system that the range
3267 * of addresses may fault, so that page tables and
3268 * such may be unwired themselves.
3271 pmap_pageable(pmap
, pmap_addr
,
3272 pmap_addr
+ (end_addr
- entry
->vme_start
), TRUE
);
3277 * vm_fault_wire_fast:
3279 * Handle common case of a wire down page fault at the given address.
3280 * If successful, the page is inserted into the associated physical map.
3281 * The map entry is passed in to avoid the overhead of a map lookup.
3283 * NOTE: the given address should be truncated to the
3284 * proper page address.
3286 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3287 * a standard error specifying why the fault is fatal is returned.
3289 * The map in question must be referenced, and remains so.
3290 * Caller has a read lock on the map.
3292 * This is a stripped version of vm_fault() for wiring pages. Anything
3293 * other than the common case will return KERN_FAILURE, and the caller
3294 * is expected to call vm_fault().
3300 vm_map_entry_t entry
,
3302 vm_offset_t pmap_addr
)
3305 vm_object_offset_t offset
;
3306 register vm_page_t m
;
3308 thread_act_t thr_act
;
3309 unsigned int cache_attr
;
3313 if((thr_act
=current_act()) && (thr_act
->task
!= TASK_NULL
))
3314 thr_act
->task
->faults
++;
3321 #define RELEASE_PAGE(m) { \
3322 PAGE_WAKEUP_DONE(m); \
3323 vm_page_lock_queues(); \
3324 vm_page_unwire(m); \
3325 vm_page_unlock_queues(); \
3329 #undef UNLOCK_THINGS
3330 #define UNLOCK_THINGS { \
3331 object->paging_in_progress--; \
3332 vm_object_unlock(object); \
3335 #undef UNLOCK_AND_DEALLOCATE
3336 #define UNLOCK_AND_DEALLOCATE { \
3338 vm_object_deallocate(object); \
3341 * Give up and have caller do things the hard way.
3345 UNLOCK_AND_DEALLOCATE; \
3346 return(KERN_FAILURE); \
3351 * If this entry is not directly to a vm_object, bail out.
3353 if (entry
->is_sub_map
)
3354 return(KERN_FAILURE
);
3357 * Find the backing store object and offset into it.
3360 object
= entry
->object
.vm_object
;
3361 offset
= (va
- entry
->vme_start
) + entry
->offset
;
3362 prot
= entry
->protection
;
3365 * Make a reference to this object to prevent its
3366 * disposal while we are messing with it.
3369 vm_object_lock(object
);
3370 assert(object
->ref_count
> 0);
3371 object
->ref_count
++;
3372 vm_object_res_reference(object
);
3373 object
->paging_in_progress
++;
3376 * INVARIANTS (through entire routine):
3378 * 1) At all times, we must either have the object
3379 * lock or a busy page in some object to prevent
3380 * some other thread from trying to bring in
3383 * 2) Once we have a busy page, we must remove it from
3384 * the pageout queues, so that the pageout daemon
3385 * will not grab it away.
3390 * Look for page in top-level object. If it's not there or
3391 * there's something going on, give up.
3393 m
= vm_page_lookup(object
, offset
);
3394 if ((m
== VM_PAGE_NULL
) || (m
->busy
) ||
3395 (m
->unusual
&& ( m
->error
|| m
->restart
|| m
->absent
||
3396 prot
& m
->page_lock
))) {
3402 * Wire the page down now. All bail outs beyond this
3403 * point must unwire the page.
3406 vm_page_lock_queues();
3408 vm_page_unlock_queues();
3411 * Mark page busy for other threads.
3418 * Give up if the page is being written and there's a copy object
3420 if ((object
->copy
!= VM_OBJECT_NULL
) && (prot
& VM_PROT_WRITE
)) {
3426 * Put this page into the physical map.
3427 * We have to unlock the object because pmap_enter
3428 * may cause other faults.
3430 if (m
->no_isync
== TRUE
) {
3431 pmap_sync_caches_phys(m
->phys_page
);
3433 m
->no_isync
= FALSE
;
3436 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
3438 PMAP_ENTER(pmap
, pmap_addr
, m
, prot
, cache_attr
, TRUE
);
3441 * Unlock everything, and return
3444 PAGE_WAKEUP_DONE(m
);
3445 UNLOCK_AND_DEALLOCATE
;
3447 return(KERN_SUCCESS
);
3452 * Routine: vm_fault_copy_cleanup
3454 * Release a page used by vm_fault_copy.
3458 vm_fault_copy_cleanup(
3462 vm_object_t object
= page
->object
;
3464 vm_object_lock(object
);
3465 PAGE_WAKEUP_DONE(page
);
3466 vm_page_lock_queues();
3467 if (!page
->active
&& !page
->inactive
)
3468 vm_page_activate(page
);
3469 vm_page_unlock_queues();
3470 vm_fault_cleanup(object
, top_page
);
3474 vm_fault_copy_dst_cleanup(
3479 if (page
!= VM_PAGE_NULL
) {
3480 object
= page
->object
;
3481 vm_object_lock(object
);
3482 vm_page_lock_queues();
3483 vm_page_unwire(page
);
3484 vm_page_unlock_queues();
3485 vm_object_paging_end(object
);
3486 vm_object_unlock(object
);
3491 * Routine: vm_fault_copy
3494 * Copy pages from one virtual memory object to another --
3495 * neither the source nor destination pages need be resident.
3497 * Before actually copying a page, the version associated with
3498 * the destination address map wil be verified.
3500 * In/out conditions:
3501 * The caller must hold a reference, but not a lock, to
3502 * each of the source and destination objects and to the
3506 * Returns KERN_SUCCESS if no errors were encountered in
3507 * reading or writing the data. Returns KERN_INTERRUPTED if
3508 * the operation was interrupted (only possible if the
3509 * "interruptible" argument is asserted). Other return values
3510 * indicate a permanent error in copying the data.
3512 * The actual amount of data copied will be returned in the
3513 * "copy_size" argument. In the event that the destination map
3514 * verification failed, this amount may be less than the amount
3519 vm_object_t src_object
,
3520 vm_object_offset_t src_offset
,
3521 vm_size_t
*src_size
, /* INOUT */
3522 vm_object_t dst_object
,
3523 vm_object_offset_t dst_offset
,
3525 vm_map_version_t
*dst_version
,
3528 vm_page_t result_page
;
3531 vm_page_t src_top_page
;
3535 vm_page_t dst_top_page
;
3538 vm_size_t amount_left
;
3539 vm_object_t old_copy_object
;
3540 kern_return_t error
= 0;
3542 vm_size_t part_size
;
3545 * In order not to confuse the clustered pageins, align
3546 * the different offsets on a page boundary.
3548 vm_object_offset_t src_lo_offset
= trunc_page_64(src_offset
);
3549 vm_object_offset_t dst_lo_offset
= trunc_page_64(dst_offset
);
3550 vm_object_offset_t src_hi_offset
= round_page_64(src_offset
+ *src_size
);
3551 vm_object_offset_t dst_hi_offset
= round_page_64(dst_offset
+ *src_size
);
3555 *src_size -= amount_left; \
3559 amount_left
= *src_size
;
3560 do { /* while (amount_left > 0) */
3562 * There may be a deadlock if both source and destination
3563 * pages are the same. To avoid this deadlock, the copy must
3564 * start by getting the destination page in order to apply
3565 * COW semantics if any.
3568 RetryDestinationFault
: ;
3570 dst_prot
= VM_PROT_WRITE
|VM_PROT_READ
;
3572 vm_object_lock(dst_object
);
3573 vm_object_paging_begin(dst_object
);
3575 XPR(XPR_VM_FAULT
,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3576 switch (vm_fault_page(dst_object
,
3577 trunc_page_64(dst_offset
),
3578 VM_PROT_WRITE
|VM_PROT_READ
,
3583 VM_BEHAVIOR_SEQUENTIAL
,
3589 dst_map
->no_zero_fill
,
3591 case VM_FAULT_SUCCESS
:
3593 case VM_FAULT_RETRY
:
3594 goto RetryDestinationFault
;
3595 case VM_FAULT_MEMORY_SHORTAGE
:
3596 if (vm_page_wait(interruptible
))
3597 goto RetryDestinationFault
;
3599 case VM_FAULT_INTERRUPTED
:
3600 RETURN(MACH_SEND_INTERRUPTED
);
3601 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3602 vm_page_more_fictitious();
3603 goto RetryDestinationFault
;
3604 case VM_FAULT_MEMORY_ERROR
:
3608 return(KERN_MEMORY_ERROR
);
3610 assert ((dst_prot
& VM_PROT_WRITE
) != VM_PROT_NONE
);
3612 old_copy_object
= dst_page
->object
->copy
;
3615 * There exists the possiblity that the source and
3616 * destination page are the same. But we can't
3617 * easily determine that now. If they are the
3618 * same, the call to vm_fault_page() for the
3619 * destination page will deadlock. To prevent this we
3620 * wire the page so we can drop busy without having
3621 * the page daemon steal the page. We clean up the
3622 * top page but keep the paging reference on the object
3623 * holding the dest page so it doesn't go away.
3626 vm_page_lock_queues();
3627 vm_page_wire(dst_page
);
3628 vm_page_unlock_queues();
3629 PAGE_WAKEUP_DONE(dst_page
);
3630 vm_object_unlock(dst_page
->object
);
3632 if (dst_top_page
!= VM_PAGE_NULL
) {
3633 vm_object_lock(dst_object
);
3634 VM_PAGE_FREE(dst_top_page
);
3635 vm_object_paging_end(dst_object
);
3636 vm_object_unlock(dst_object
);
3641 if (src_object
== VM_OBJECT_NULL
) {
3643 * No source object. We will just
3644 * zero-fill the page in dst_object.
3646 src_page
= VM_PAGE_NULL
;
3647 result_page
= VM_PAGE_NULL
;
3649 vm_object_lock(src_object
);
3650 src_page
= vm_page_lookup(src_object
,
3651 trunc_page_64(src_offset
));
3652 if (src_page
== dst_page
) {
3653 src_prot
= dst_prot
;
3654 result_page
= VM_PAGE_NULL
;
3656 src_prot
= VM_PROT_READ
;
3657 vm_object_paging_begin(src_object
);
3660 "vm_fault_copy(2) -> vm_fault_page\n",
3662 switch (vm_fault_page(src_object
,
3663 trunc_page_64(src_offset
),
3669 VM_BEHAVIOR_SEQUENTIAL
,
3678 case VM_FAULT_SUCCESS
:
3680 case VM_FAULT_RETRY
:
3681 goto RetrySourceFault
;
3682 case VM_FAULT_MEMORY_SHORTAGE
:
3683 if (vm_page_wait(interruptible
))
3684 goto RetrySourceFault
;
3686 case VM_FAULT_INTERRUPTED
:
3687 vm_fault_copy_dst_cleanup(dst_page
);
3688 RETURN(MACH_SEND_INTERRUPTED
);
3689 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3690 vm_page_more_fictitious();
3691 goto RetrySourceFault
;
3692 case VM_FAULT_MEMORY_ERROR
:
3693 vm_fault_copy_dst_cleanup(dst_page
);
3697 return(KERN_MEMORY_ERROR
);
3701 assert((src_top_page
== VM_PAGE_NULL
) ==
3702 (result_page
->object
== src_object
));
3704 assert ((src_prot
& VM_PROT_READ
) != VM_PROT_NONE
);
3705 vm_object_unlock(result_page
->object
);
3708 if (!vm_map_verify(dst_map
, dst_version
)) {
3709 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3710 vm_fault_copy_cleanup(result_page
, src_top_page
);
3711 vm_fault_copy_dst_cleanup(dst_page
);
3715 vm_object_lock(dst_page
->object
);
3717 if (dst_page
->object
->copy
!= old_copy_object
) {
3718 vm_object_unlock(dst_page
->object
);
3719 vm_map_verify_done(dst_map
, dst_version
);
3720 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3721 vm_fault_copy_cleanup(result_page
, src_top_page
);
3722 vm_fault_copy_dst_cleanup(dst_page
);
3725 vm_object_unlock(dst_page
->object
);
3728 * Copy the page, and note that it is dirty
3732 if (!page_aligned(src_offset
) ||
3733 !page_aligned(dst_offset
) ||
3734 !page_aligned(amount_left
)) {
3736 vm_object_offset_t src_po
,
3739 src_po
= src_offset
- trunc_page_64(src_offset
);
3740 dst_po
= dst_offset
- trunc_page_64(dst_offset
);
3742 if (dst_po
> src_po
) {
3743 part_size
= PAGE_SIZE
- dst_po
;
3745 part_size
= PAGE_SIZE
- src_po
;
3747 if (part_size
> (amount_left
)){
3748 part_size
= amount_left
;
3751 if (result_page
== VM_PAGE_NULL
) {
3752 vm_page_part_zero_fill(dst_page
,
3755 vm_page_part_copy(result_page
, src_po
,
3756 dst_page
, dst_po
, part_size
);
3757 if(!dst_page
->dirty
){
3758 vm_object_lock(dst_object
);
3759 dst_page
->dirty
= TRUE
;
3760 vm_object_unlock(dst_page
->object
);
3765 part_size
= PAGE_SIZE
;
3767 if (result_page
== VM_PAGE_NULL
)
3768 vm_page_zero_fill(dst_page
);
3770 vm_page_copy(result_page
, dst_page
);
3771 if(!dst_page
->dirty
){
3772 vm_object_lock(dst_object
);
3773 dst_page
->dirty
= TRUE
;
3774 vm_object_unlock(dst_page
->object
);
3781 * Unlock everything, and return
3784 vm_map_verify_done(dst_map
, dst_version
);
3786 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3787 vm_fault_copy_cleanup(result_page
, src_top_page
);
3788 vm_fault_copy_dst_cleanup(dst_page
);
3790 amount_left
-= part_size
;
3791 src_offset
+= part_size
;
3792 dst_offset
+= part_size
;
3793 } while (amount_left
> 0);
3795 RETURN(KERN_SUCCESS
);
3804 * Routine: vm_fault_page_overwrite
3807 * A form of vm_fault_page that assumes that the
3808 * resulting page will be overwritten in its entirety,
3809 * making it unnecessary to obtain the correct *contents*
3813 * XXX Untested. Also unused. Eventually, this technology
3814 * could be used in vm_fault_copy() to advantage.
3817 vm_fault_page_overwrite(
3819 vm_object_t dst_object
,
3820 vm_object_offset_t dst_offset
,
3821 vm_page_t
*result_page
) /* OUT */
3825 kern_return_t wait_result
;
3827 #define interruptible THREAD_UNINT /* XXX */
3831 * Look for a page at this offset
3834 while ((dst_page
= vm_page_lookup(dst_object
, dst_offset
))
3837 * No page, no problem... just allocate one.
3840 dst_page
= vm_page_alloc(dst_object
, dst_offset
);
3841 if (dst_page
== VM_PAGE_NULL
) {
3842 vm_object_unlock(dst_object
);
3844 vm_object_lock(dst_object
);
3849 * Pretend that the memory manager
3850 * write-protected the page.
3852 * Note that we will be asking for write
3853 * permission without asking for the data
3857 dst_page
->overwriting
= TRUE
;
3858 dst_page
->page_lock
= VM_PROT_WRITE
;
3859 dst_page
->absent
= TRUE
;
3860 dst_page
->unusual
= TRUE
;
3861 dst_object
->absent_count
++;
3866 * When we bail out, we might have to throw
3867 * away the page created here.
3870 #define DISCARD_PAGE \
3872 vm_object_lock(dst_object); \
3873 dst_page = vm_page_lookup(dst_object, dst_offset); \
3874 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3875 VM_PAGE_FREE(dst_page); \
3876 vm_object_unlock(dst_object); \
3881 * If the page is write-protected...
3884 if (dst_page
->page_lock
& VM_PROT_WRITE
) {
3886 * ... and an unlock request hasn't been sent
3889 if ( ! (dst_page
->unlock_request
& VM_PROT_WRITE
)) {
3894 * ... then send one now.
3897 if (!dst_object
->pager_ready
) {
3898 wait_result
= vm_object_assert_wait(dst_object
,
3899 VM_OBJECT_EVENT_PAGER_READY
,
3901 vm_object_unlock(dst_object
);
3902 if (wait_result
== THREAD_WAITING
)
3903 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
3904 if (wait_result
!= THREAD_AWAKENED
) {
3906 return(VM_FAULT_INTERRUPTED
);
3911 u
= dst_page
->unlock_request
|= VM_PROT_WRITE
;
3912 vm_object_unlock(dst_object
);
3914 if ((rc
= memory_object_data_unlock(
3916 dst_offset
+ dst_object
->paging_offset
,
3918 u
)) != KERN_SUCCESS
) {
3920 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3922 return((rc
== MACH_SEND_INTERRUPTED
) ?
3923 VM_FAULT_INTERRUPTED
:
3924 VM_FAULT_MEMORY_ERROR
);
3926 vm_object_lock(dst_object
);
3930 /* ... fall through to wait below */
3933 * If the page isn't being used for other
3934 * purposes, then we're done.
3936 if ( ! (dst_page
->busy
|| dst_page
->absent
||
3937 dst_page
->error
|| dst_page
->restart
) )
3941 wait_result
= PAGE_ASSERT_WAIT(dst_page
, interruptible
);
3942 vm_object_unlock(dst_object
);
3943 if (wait_result
== THREAD_WAITING
)
3944 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
3945 if (wait_result
!= THREAD_AWAKENED
) {
3947 return(VM_FAULT_INTERRUPTED
);
3951 *result_page
= dst_page
;
3952 return(VM_FAULT_SUCCESS
);
3954 #undef interruptible
3960 #if VM_FAULT_CLASSIFY
3962 * Temporary statistics gathering support.
3966 * Statistics arrays:
3968 #define VM_FAULT_TYPES_MAX 5
3969 #define VM_FAULT_LEVEL_MAX 8
3971 int vm_fault_stats
[VM_FAULT_TYPES_MAX
][VM_FAULT_LEVEL_MAX
];
3973 #define VM_FAULT_TYPE_ZERO_FILL 0
3974 #define VM_FAULT_TYPE_MAP_IN 1
3975 #define VM_FAULT_TYPE_PAGER 2
3976 #define VM_FAULT_TYPE_COPY 3
3977 #define VM_FAULT_TYPE_OTHER 4
3981 vm_fault_classify(vm_object_t object
,
3982 vm_object_offset_t offset
,
3983 vm_prot_t fault_type
)
3985 int type
, level
= 0;
3989 m
= vm_page_lookup(object
, offset
);
3990 if (m
!= VM_PAGE_NULL
) {
3991 if (m
->busy
|| m
->error
|| m
->restart
|| m
->absent
||
3992 fault_type
& m
->page_lock
) {
3993 type
= VM_FAULT_TYPE_OTHER
;
3996 if (((fault_type
& VM_PROT_WRITE
) == 0) ||
3997 ((level
== 0) && object
->copy
== VM_OBJECT_NULL
)) {
3998 type
= VM_FAULT_TYPE_MAP_IN
;
4001 type
= VM_FAULT_TYPE_COPY
;
4005 if (object
->pager_created
) {
4006 type
= VM_FAULT_TYPE_PAGER
;
4009 if (object
->shadow
== VM_OBJECT_NULL
) {
4010 type
= VM_FAULT_TYPE_ZERO_FILL
;
4014 offset
+= object
->shadow_offset
;
4015 object
= object
->shadow
;
4021 if (level
> VM_FAULT_LEVEL_MAX
)
4022 level
= VM_FAULT_LEVEL_MAX
;
4024 vm_fault_stats
[type
][level
] += 1;
4029 /* cleanup routine to call from debugger */
4032 vm_fault_classify_init(void)
4036 for (type
= 0; type
< VM_FAULT_TYPES_MAX
; type
++) {
4037 for (level
= 0; level
< VM_FAULT_LEVEL_MAX
; level
++) {
4038 vm_fault_stats
[type
][level
] = 0;
4044 #endif /* VM_FAULT_CLASSIFY */