3 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
7 * The contents of this file constitute Original Code as defined in and
8 * are subject to the Apple Public Source License Version 1.1 (the
9 * "License"). You may not use this file except in compliance with the
10 * License. Please obtain a copy of the License at
11 * http://www.apple.com/publicsource and read it before using this file.
13 * This Original Code and all software distributed under the License are
14 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
18 * License for the specific language governing rights and limitations
21 * @APPLE_LICENSE_HEADER_END@
27 * Mach Operating System
28 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
29 * All Rights Reserved.
31 * Permission to use, copy, modify and distribute this software and its
32 * documentation is hereby granted, provided that both the copyright
33 * notice and this permission notice appear in all copies of the
34 * software, derivative works or modified versions, and any portions
35 * thereof, and that both notices appear in supporting documentation.
37 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
38 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
39 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
41 * Carnegie Mellon requests users of this software to return to
43 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
44 * School of Computer Science
45 * Carnegie Mellon University
46 * Pittsburgh PA 15213-3890
48 * any improvements or extensions that they make and grant Carnegie Mellon
49 * the rights to redistribute these changes.
55 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * Page fault handling module.
60 /* remove after component interface available */
61 extern int vnode_pager_workaround
;
62 extern int device_pager_workaround
;
65 #include <mach_cluster_stats.h>
66 #include <mach_pagemap.h>
69 #include <vm/vm_fault.h>
70 #include <mach/kern_return.h>
71 #include <mach/message.h> /* for error codes */
72 #include <kern/host_statistics.h>
73 #include <kern/counters.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/sched_prim.h>
77 #include <kern/host.h>
79 #include <ppc/proc_reg.h>
80 #include <ppc/pmap_internals.h>
81 #include <vm/task_working_set.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_object.h>
84 #include <vm/vm_page.h>
86 #include <vm/vm_pageout.h>
87 #include <mach/vm_param.h>
88 #include <mach/vm_behavior.h>
89 #include <mach/memory_object.h>
90 /* For memory_object_data_{request,unlock} */
91 #include <kern/mach_param.h>
92 #include <kern/macro_help.h>
93 #include <kern/zalloc.h>
94 #include <kern/misc_protos.h>
96 #include <sys/kdebug.h>
98 #define VM_FAULT_CLASSIFY 0
99 #define VM_FAULT_STATIC_CONFIG 1
101 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
103 int vm_object_absent_max
= 50;
105 int vm_fault_debug
= 0;
106 boolean_t vm_page_deactivate_behind
= TRUE
;
109 #if !VM_FAULT_STATIC_CONFIG
110 boolean_t vm_fault_dirty_handling
= FALSE
;
111 boolean_t vm_fault_interruptible
= FALSE
;
112 boolean_t software_reference_bits
= TRUE
;
116 extern struct db_watchpoint
*db_watchpoint_list
;
117 #endif /* MACH_KDB */
119 /* Forward declarations of internal routines. */
120 extern kern_return_t
vm_fault_wire_fast(
123 vm_map_entry_t entry
,
125 vm_offset_t pmap_addr
);
127 extern void vm_fault_continue(void);
129 extern void vm_fault_copy_cleanup(
133 extern void vm_fault_copy_dst_cleanup(
136 #if VM_FAULT_CLASSIFY
137 extern void vm_fault_classify(vm_object_t object
,
138 vm_object_offset_t offset
,
139 vm_prot_t fault_type
);
141 extern void vm_fault_classify_init(void);
145 * Routine: vm_fault_init
147 * Initialize our private data structures.
155 * Routine: vm_fault_cleanup
157 * Clean up the result of vm_fault_page.
159 * The paging reference for "object" is released.
160 * "object" is unlocked.
161 * If "top_page" is not null, "top_page" is
162 * freed and the paging reference for the object
163 * containing it is released.
166 * "object" must be locked.
170 register vm_object_t object
,
171 register vm_page_t top_page
)
173 vm_object_paging_end(object
);
174 vm_object_unlock(object
);
176 if (top_page
!= VM_PAGE_NULL
) {
177 object
= top_page
->object
;
178 vm_object_lock(object
);
179 VM_PAGE_FREE(top_page
);
180 vm_object_paging_end(object
);
181 vm_object_unlock(object
);
185 #if MACH_CLUSTER_STATS
186 #define MAXCLUSTERPAGES 16
188 unsigned long pages_in_cluster
;
189 unsigned long pages_at_higher_offsets
;
190 unsigned long pages_at_lower_offsets
;
191 } cluster_stats_in
[MAXCLUSTERPAGES
];
192 #define CLUSTER_STAT(clause) clause
193 #define CLUSTER_STAT_HIGHER(x) \
194 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
195 #define CLUSTER_STAT_LOWER(x) \
196 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
197 #define CLUSTER_STAT_CLUSTER(x) \
198 ((cluster_stats_in[(x)].pages_in_cluster)++)
199 #else /* MACH_CLUSTER_STATS */
200 #define CLUSTER_STAT(clause)
201 #endif /* MACH_CLUSTER_STATS */
203 /* XXX - temporary */
204 boolean_t vm_allow_clustered_pagein
= FALSE
;
205 int vm_pagein_cluster_used
= 0;
208 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
210 int vm_default_ahead
= 1; /* Number of pages to prepage ahead */
211 int vm_default_behind
= 0; /* Number of pages to prepage behind */
213 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
216 * Routine: vm_fault_page
218 * Find the resident page for the virtual memory
219 * specified by the given virtual memory object
221 * Additional arguments:
222 * The required permissions for the page is given
223 * in "fault_type". Desired permissions are included
224 * in "protection". The minimum and maximum valid offsets
225 * within the object for the relevant map entry are
226 * passed in "lo_offset" and "hi_offset" respectively and
227 * the expected page reference pattern is passed in "behavior".
228 * These three parameters are used to determine pagein cluster
231 * If the desired page is known to be resident (for
232 * example, because it was previously wired down), asserting
233 * the "unwiring" parameter will speed the search.
235 * If the operation can be interrupted (by thread_abort
236 * or thread_terminate), then the "interruptible"
237 * parameter should be asserted.
240 * The page containing the proper data is returned
244 * The source object must be locked and referenced,
245 * and must donate one paging reference. The reference
246 * is not affected. The paging reference and lock are
249 * If the call succeeds, the object in which "result_page"
250 * resides is left locked and holding a paging reference.
251 * If this is not the original object, a busy page in the
252 * original object is returned in "top_page", to prevent other
253 * callers from pursuing this same data, along with a paging
254 * reference for the original object. The "top_page" should
255 * be destroyed when this guarantee is no longer required.
256 * The "result_page" is also left busy. It is not removed
257 * from the pageout queues.
263 vm_object_t first_object
, /* Object to begin search */
264 vm_object_offset_t first_offset
, /* Offset into object */
265 vm_prot_t fault_type
, /* What access is requested */
266 boolean_t must_be_resident
,/* Must page be resident? */
267 int interruptible
, /* how may fault be interrupted? */
268 vm_object_offset_t lo_offset
, /* Map entry start */
269 vm_object_offset_t hi_offset
, /* Map entry end */
270 vm_behavior_t behavior
, /* Page reference behavior */
271 /* Modifies in place: */
272 vm_prot_t
*protection
, /* Protection for mapping */
274 vm_page_t
*result_page
, /* Page found, if successful */
275 vm_page_t
*top_page
, /* Page in top object, if
276 * not result_page. */
277 int *type_of_fault
, /* if non-null, fill in with type of fault
278 * COW, zero-fill, etc... returned in trace point */
279 /* More arguments: */
280 kern_return_t
*error_code
, /* code if page is in error */
281 boolean_t no_zero_fill
, /* don't zero fill absent pages */
282 boolean_t data_supply
, /* treat as data_supply if
283 * it is a write fault and a full
284 * page is provided */
293 vm_object_offset_t offset
;
295 vm_object_t next_object
;
296 vm_object_t copy_object
;
297 boolean_t look_for_page
;
298 vm_prot_t access_required
= fault_type
;
299 vm_prot_t wants_copy_flag
;
300 vm_size_t cluster_size
, length
;
301 vm_object_offset_t cluster_offset
;
302 vm_object_offset_t cluster_start
, cluster_end
, paging_offset
;
303 vm_object_offset_t align_offset
;
304 CLUSTER_STAT(int pages_at_higher_offsets
;)
305 CLUSTER_STAT(int pages_at_lower_offsets
;)
306 kern_return_t wait_result
;
307 boolean_t interruptible_state
;
308 boolean_t bumped_pagein
= FALSE
;
313 * MACH page map - an optional optimization where a bit map is maintained
314 * by the VM subsystem for internal objects to indicate which pages of
315 * the object currently reside on backing store. This existence map
316 * duplicates information maintained by the vnode pager. It is
317 * created at the time of the first pageout against the object, i.e.
318 * at the same time pager for the object is created. The optimization
319 * is designed to eliminate pager interaction overhead, if it is
320 * 'known' that the page does not exist on backing store.
322 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
323 * either marked as paged out in the existence map for the object or no
324 * existence map exists for the object. LOOK_FOR() is one of the
325 * criteria in the decision to invoke the pager. It is also used as one
326 * of the criteria to terminate the scan for adjacent pages in a clustered
327 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
328 * permanent objects. Note also that if the pager for an internal object
329 * has not been created, the pager is not invoked regardless of the value
330 * of LOOK_FOR() and that clustered pagein scans are only done on an object
331 * for which a pager has been created.
333 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
334 * is marked as paged out in the existence map for the object. PAGED_OUT()
335 * PAGED_OUT() is used to determine if a page has already been pushed
336 * into a copy object in order to avoid a redundant page out operation.
338 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
339 != VM_EXTERNAL_STATE_ABSENT)
340 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
341 == VM_EXTERNAL_STATE_EXISTS)
342 #else /* MACH_PAGEMAP */
344 * If the MACH page map optimization is not enabled,
345 * LOOK_FOR() always evaluates to TRUE. The pager will always be
346 * invoked to resolve missing pages in an object, assuming the pager
347 * has been created for the object. In a clustered page operation, the
348 * absence of a page on backing backing store cannot be used to terminate
349 * a scan for adjacent pages since that information is available only in
350 * the pager. Hence pages that may not be paged out are potentially
351 * included in a clustered request. The vnode pager is coded to deal
352 * with any combination of absent/present pages in a clustered
353 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
354 * will always be invoked to push a dirty page into a copy object assuming
355 * a pager has been created. If the page has already been pushed, the
356 * pager will ingore the new request.
358 #define LOOK_FOR(o, f) TRUE
359 #define PAGED_OUT(o, f) FALSE
360 #endif /* MACH_PAGEMAP */
365 #define PREPARE_RELEASE_PAGE(m) \
367 vm_page_lock_queues(); \
370 #define DO_RELEASE_PAGE(m) \
372 PAGE_WAKEUP_DONE(m); \
373 if (!m->active && !m->inactive) \
374 vm_page_activate(m); \
375 vm_page_unlock_queues(); \
378 #define RELEASE_PAGE(m) \
380 PREPARE_RELEASE_PAGE(m); \
381 DO_RELEASE_PAGE(m); \
385 dbgTrace(0xBEEF0002, (unsigned int) first_object
, (unsigned int) first_offset
); /* (TEST/DEBUG) */
390 #if !VM_FAULT_STATIC_CONFIG
391 if (vm_fault_dirty_handling
394 * If there are watchpoints set, then
395 * we don't want to give away write permission
396 * on a read fault. Make the task write fault,
397 * so that the watchpoint code notices the access.
399 || db_watchpoint_list
400 #endif /* MACH_KDB */
403 * If we aren't asking for write permission,
404 * then don't give it away. We're using write
405 * faults to set the dirty bit.
407 if (!(fault_type
& VM_PROT_WRITE
))
408 *protection
&= ~VM_PROT_WRITE
;
411 if (!vm_fault_interruptible
)
412 interruptible
= THREAD_UNINT
;
413 #else /* STATIC_CONFIG */
416 * If there are watchpoints set, then
417 * we don't want to give away write permission
418 * on a read fault. Make the task write fault,
419 * so that the watchpoint code notices the access.
421 if (db_watchpoint_list
) {
423 * If we aren't asking for write permission,
424 * then don't give it away. We're using write
425 * faults to set the dirty bit.
427 if (!(fault_type
& VM_PROT_WRITE
))
428 *protection
&= ~VM_PROT_WRITE
;
431 #endif /* MACH_KDB */
432 #endif /* STATIC_CONFIG */
434 interruptible_state
= thread_interrupt_level(interruptible
);
437 * INVARIANTS (through entire routine):
439 * 1) At all times, we must either have the object
440 * lock or a busy page in some object to prevent
441 * some other thread from trying to bring in
444 * Note that we cannot hold any locks during the
445 * pager access or when waiting for memory, so
446 * we use a busy page then.
448 * Note also that we aren't as concerned about more than
449 * one thread attempting to memory_object_data_unlock
450 * the same page at once, so we don't hold the page
451 * as busy then, but do record the highest unlock
452 * value so far. [Unlock requests may also be delivered
455 * 2) To prevent another thread from racing us down the
456 * shadow chain and entering a new page in the top
457 * object before we do, we must keep a busy page in
458 * the top object while following the shadow chain.
460 * 3) We must increment paging_in_progress on any object
461 * for which we have a busy page
463 * 4) We leave busy pages on the pageout queues.
464 * If the pageout daemon comes across a busy page,
465 * it will remove the page from the pageout queues.
469 * Search for the page at object/offset.
472 object
= first_object
;
473 offset
= first_offset
;
474 first_m
= VM_PAGE_NULL
;
475 access_required
= fault_type
;
478 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
479 (integer_t
)object
, offset
, fault_type
, *protection
, 0);
482 * See whether this page is resident
487 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
489 if (!object
->alive
) {
490 vm_fault_cleanup(object
, first_m
);
491 thread_interrupt_level(interruptible_state
);
492 return(VM_FAULT_MEMORY_ERROR
);
494 m
= vm_page_lookup(object
, offset
);
496 dbgTrace(0xBEEF0004, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
498 if (m
!= VM_PAGE_NULL
) {
500 * If the page was pre-paged as part of a
501 * cluster, record the fact.
504 vm_pagein_cluster_used
++;
505 m
->clustered
= FALSE
;
509 * If the page is being brought in,
510 * wait for it and then retry.
512 * A possible optimization: if the page
513 * is known to be resident, we can ignore
514 * pages that are absent (regardless of
515 * whether they're busy).
520 dbgTrace(0xBEEF0005, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
522 wait_result
= PAGE_SLEEP(object
, m
, interruptible
);
524 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
525 (integer_t
)object
, offset
,
527 counter(c_vm_fault_page_block_busy_kernel
++);
529 if (wait_result
!= THREAD_AWAKENED
) {
530 vm_fault_cleanup(object
, first_m
);
531 thread_interrupt_level(interruptible_state
);
532 if (wait_result
== THREAD_RESTART
)
534 return(VM_FAULT_RETRY
);
538 return(VM_FAULT_INTERRUPTED
);
545 * If the page is in error, give up now.
550 dbgTrace(0xBEEF0006, (unsigned int) m
, (unsigned int) error_code
); /* (TEST/DEBUG) */
553 *error_code
= m
->page_error
;
555 vm_fault_cleanup(object
, first_m
);
556 thread_interrupt_level(interruptible_state
);
557 return(VM_FAULT_MEMORY_ERROR
);
561 * If the pager wants us to restart
562 * at the top of the chain,
563 * typically because it has moved the
564 * page to another pager, then do so.
569 dbgTrace(0xBEEF0007, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
572 vm_fault_cleanup(object
, first_m
);
573 thread_interrupt_level(interruptible_state
);
574 return(VM_FAULT_RETRY
);
578 * If the page isn't busy, but is absent,
579 * then it was deemed "unavailable".
584 * Remove the non-existent page (unless it's
585 * in the top object) and move on down to the
586 * next object (if there is one).
589 dbgTrace(0xBEEF0008, (unsigned int) m
, (unsigned int) object
->shadow
); /* (TEST/DEBUG) */
592 next_object
= object
->shadow
;
593 if (next_object
== VM_OBJECT_NULL
) {
596 assert(!must_be_resident
);
598 if (object
->shadow_severed
) {
601 thread_interrupt_level(interruptible_state
);
602 return VM_FAULT_MEMORY_ERROR
;
606 * Absent page at bottom of shadow
607 * chain; zero fill the page we left
608 * busy in the first object, and flush
609 * the absent page. But first we
610 * need to allocate a real page.
612 if (VM_PAGE_THROTTLED() ||
613 (real_m
= vm_page_grab()) == VM_PAGE_NULL
) {
614 vm_fault_cleanup(object
, first_m
);
615 thread_interrupt_level(interruptible_state
);
616 return(VM_FAULT_MEMORY_SHORTAGE
);
620 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
621 (integer_t
)object
, offset
,
623 (integer_t
)first_object
, 0);
624 if (object
!= first_object
) {
626 vm_object_paging_end(object
);
627 vm_object_unlock(object
);
628 object
= first_object
;
629 offset
= first_offset
;
631 first_m
= VM_PAGE_NULL
;
632 vm_object_lock(object
);
636 assert(real_m
->busy
);
637 vm_page_insert(real_m
, object
, offset
);
641 * Drop the lock while zero filling
642 * page. Then break because this
643 * is the page we wanted. Checking
644 * the page lock is a waste of time;
645 * this page was either absent or
646 * newly allocated -- in both cases
647 * it can't be page locked by a pager.
652 vm_object_unlock(object
);
653 vm_page_zero_fill(m
);
655 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
656 VM_STAT(zero_fill_count
++);
658 if (bumped_pagein
== TRUE
) {
660 current_task()->pageins
--;
662 vm_object_lock(object
);
664 pmap_clear_modify(m
->phys_addr
);
665 vm_page_lock_queues();
666 VM_PAGE_QUEUES_REMOVE(m
);
667 m
->page_ticket
= vm_page_ticket
;
668 if(m
->object
->size
> 0x80000) {
670 /* depends on the queues lock */
672 queue_enter(&vm_page_queue_zf
,
673 m
, vm_page_t
, pageq
);
676 &vm_page_queue_inactive
,
677 m
, vm_page_t
, pageq
);
679 vm_page_ticket_roll
++;
680 if(vm_page_ticket_roll
==
681 VM_PAGE_TICKETS_IN_ROLL
) {
682 vm_page_ticket_roll
= 0;
684 VM_PAGE_TICKET_ROLL_IDS
)
690 vm_page_inactive_count
++;
691 vm_page_unlock_queues();
694 if (must_be_resident
) {
695 vm_object_paging_end(object
);
696 } else if (object
!= first_object
) {
697 vm_object_paging_end(object
);
703 vm_object_absent_release(object
);
706 vm_page_lock_queues();
707 VM_PAGE_QUEUES_REMOVE(m
);
708 vm_page_unlock_queues();
711 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
712 (integer_t
)object
, offset
,
713 (integer_t
)next_object
,
714 offset
+object
->shadow_offset
,0);
715 offset
+= object
->shadow_offset
;
716 hi_offset
+= object
->shadow_offset
;
717 lo_offset
+= object
->shadow_offset
;
718 access_required
= VM_PROT_READ
;
719 vm_object_lock(next_object
);
720 vm_object_unlock(object
);
721 object
= next_object
;
722 vm_object_paging_begin(object
);
728 && ((object
!= first_object
) ||
729 (object
->copy
!= VM_OBJECT_NULL
))
730 && (fault_type
& VM_PROT_WRITE
)) {
732 * This is a copy-on-write fault that will
733 * cause us to revoke access to this page, but
734 * this page is in the process of being cleaned
735 * in a clustered pageout. We must wait until
736 * the cleaning operation completes before
737 * revoking access to the original page,
738 * otherwise we might attempt to remove a
742 dbgTrace(0xBEEF0009, (unsigned int) m
, (unsigned int) offset
); /* (TEST/DEBUG) */
745 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
746 (integer_t
)object
, offset
,
748 /* take an extra ref so that object won't die */
749 assert(object
->ref_count
> 0);
751 vm_object_res_reference(object
);
752 vm_fault_cleanup(object
, first_m
);
753 counter(c_vm_fault_page_block_backoff_kernel
++);
754 vm_object_lock(object
);
755 assert(object
->ref_count
> 0);
756 m
= vm_page_lookup(object
, offset
);
757 if (m
!= VM_PAGE_NULL
&& m
->cleaning
) {
758 PAGE_ASSERT_WAIT(m
, interruptible
);
759 vm_object_unlock(object
);
760 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
761 vm_object_deallocate(object
);
764 vm_object_unlock(object
);
765 vm_object_deallocate(object
);
766 thread_interrupt_level(interruptible_state
);
767 return VM_FAULT_RETRY
;
772 * If the desired access to this page has
773 * been locked out, request that it be unlocked.
776 if (access_required
& m
->page_lock
) {
777 if ((access_required
& m
->unlock_request
) != access_required
) {
778 vm_prot_t new_unlock_request
;
782 dbgTrace(0xBEEF000A, (unsigned int) m
, (unsigned int) object
->pager_ready
); /* (TEST/DEBUG) */
784 if (!object
->pager_ready
) {
786 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
788 (integer_t
)object
, offset
,
790 /* take an extra ref */
791 assert(object
->ref_count
> 0);
793 vm_object_res_reference(object
);
794 vm_fault_cleanup(object
,
796 counter(c_vm_fault_page_block_backoff_kernel
++);
797 vm_object_lock(object
);
798 assert(object
->ref_count
> 0);
799 if (!object
->pager_ready
) {
800 wait_result
= vm_object_assert_wait(
802 VM_OBJECT_EVENT_PAGER_READY
,
804 vm_object_unlock(object
);
805 if (wait_result
== THREAD_WAITING
)
806 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
807 vm_object_deallocate(object
);
810 vm_object_unlock(object
);
811 vm_object_deallocate(object
);
812 thread_interrupt_level(interruptible_state
);
813 return VM_FAULT_RETRY
;
817 new_unlock_request
= m
->unlock_request
=
818 (access_required
| m
->unlock_request
);
819 vm_object_unlock(object
);
821 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
822 (integer_t
)object
, offset
,
823 (integer_t
)m
, new_unlock_request
, 0);
824 if ((rc
= memory_object_data_unlock(
826 offset
+ object
->paging_offset
,
831 printf("vm_fault: memory_object_data_unlock failed\n");
832 vm_object_lock(object
);
833 vm_fault_cleanup(object
, first_m
);
834 thread_interrupt_level(interruptible_state
);
835 return((rc
== MACH_SEND_INTERRUPTED
) ?
836 VM_FAULT_INTERRUPTED
:
837 VM_FAULT_MEMORY_ERROR
);
839 vm_object_lock(object
);
844 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
845 access_required
, (integer_t
)object
,
846 offset
, (integer_t
)m
, 0);
847 /* take an extra ref so object won't die */
848 assert(object
->ref_count
> 0);
850 vm_object_res_reference(object
);
851 vm_fault_cleanup(object
, first_m
);
852 counter(c_vm_fault_page_block_backoff_kernel
++);
853 vm_object_lock(object
);
854 assert(object
->ref_count
> 0);
855 m
= vm_page_lookup(object
, offset
);
856 if (m
!= VM_PAGE_NULL
&&
857 (access_required
& m
->page_lock
) &&
858 !((access_required
& m
->unlock_request
) != access_required
)) {
859 PAGE_ASSERT_WAIT(m
, interruptible
);
860 vm_object_unlock(object
);
861 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
862 vm_object_deallocate(object
);
865 vm_object_unlock(object
);
866 vm_object_deallocate(object
);
867 thread_interrupt_level(interruptible_state
);
868 return VM_FAULT_RETRY
;
872 * We mark the page busy and leave it on
873 * the pageout queues. If the pageout
874 * deamon comes across it, then it will
879 dbgTrace(0xBEEF000B, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
882 #if !VM_FAULT_STATIC_CONFIG
883 if (!software_reference_bits
) {
884 vm_page_lock_queues();
886 vm_stat
.reactivations
++;
888 VM_PAGE_QUEUES_REMOVE(m
);
889 vm_page_unlock_queues();
893 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
894 (integer_t
)object
, offset
, (integer_t
)m
, 0, 0);
902 (object
->pager_created
) &&
903 LOOK_FOR(object
, offset
) &&
907 dbgTrace(0xBEEF000C, (unsigned int) look_for_page
, (unsigned int) object
); /* (TEST/DEBUG) */
909 if ((look_for_page
|| (object
== first_object
))
911 && !(object
->phys_contiguous
)) {
913 * Allocate a new page for this object/offset
917 m
= vm_page_grab_fictitious();
919 dbgTrace(0xBEEF000D, (unsigned int) m
, (unsigned int) object
); /* (TEST/DEBUG) */
921 if (m
== VM_PAGE_NULL
) {
922 vm_fault_cleanup(object
, first_m
);
923 thread_interrupt_level(interruptible_state
);
924 return(VM_FAULT_FICTITIOUS_SHORTAGE
);
926 vm_page_insert(m
, object
, offset
);
929 if ((look_for_page
&& !must_be_resident
)) {
933 * If the memory manager is not ready, we
934 * cannot make requests.
936 if (!object
->pager_ready
) {
938 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
940 if(m
!= VM_PAGE_NULL
)
943 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
944 (integer_t
)object
, offset
, 0, 0, 0);
945 /* take an extra ref so object won't die */
946 assert(object
->ref_count
> 0);
948 vm_object_res_reference(object
);
949 vm_fault_cleanup(object
, first_m
);
950 counter(c_vm_fault_page_block_backoff_kernel
++);
951 vm_object_lock(object
);
952 assert(object
->ref_count
> 0);
953 if (!object
->pager_ready
) {
954 wait_result
= vm_object_assert_wait(object
,
955 VM_OBJECT_EVENT_PAGER_READY
,
957 vm_object_unlock(object
);
958 if (wait_result
== THREAD_WAITING
)
959 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
960 vm_object_deallocate(object
);
963 vm_object_unlock(object
);
964 vm_object_deallocate(object
);
965 thread_interrupt_level(interruptible_state
);
966 return VM_FAULT_RETRY
;
970 if(object
->phys_contiguous
) {
971 if(m
!= VM_PAGE_NULL
) {
977 if (object
->internal
) {
979 * Requests to the default pager
980 * must reserve a real page in advance,
981 * because the pager's data-provided
982 * won't block for pages. IMPORTANT:
983 * this acts as a throttling mechanism
984 * for data_requests to the default
989 dbgTrace(0xBEEF000F, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
991 if (m
->fictitious
&& !vm_page_convert(m
)) {
993 vm_fault_cleanup(object
, first_m
);
994 thread_interrupt_level(interruptible_state
);
995 return(VM_FAULT_MEMORY_SHORTAGE
);
997 } else if (object
->absent_count
>
998 vm_object_absent_max
) {
1000 * If there are too many outstanding page
1001 * requests pending on this object, we
1002 * wait for them to be resolved now.
1006 dbgTrace(0xBEEF0010, (unsigned int) m
, (unsigned int) 0); /* (TEST/DEBUG) */
1008 if(m
!= VM_PAGE_NULL
)
1010 /* take an extra ref so object won't die */
1011 assert(object
->ref_count
> 0);
1012 object
->ref_count
++;
1013 vm_object_res_reference(object
);
1014 vm_fault_cleanup(object
, first_m
);
1015 counter(c_vm_fault_page_block_backoff_kernel
++);
1016 vm_object_lock(object
);
1017 assert(object
->ref_count
> 0);
1018 if (object
->absent_count
> vm_object_absent_max
) {
1019 vm_object_absent_assert_wait(object
,
1021 vm_object_unlock(object
);
1022 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1023 vm_object_deallocate(object
);
1026 vm_object_unlock(object
);
1027 vm_object_deallocate(object
);
1028 thread_interrupt_level(interruptible_state
);
1029 return VM_FAULT_RETRY
;
1034 * Indicate that the page is waiting for data
1035 * from the memory manager.
1038 if(m
!= VM_PAGE_NULL
) {
1040 m
->list_req_pending
= TRUE
;
1043 object
->absent_count
++;
1048 cluster_start
= offset
;
1052 * lengthen the cluster by the pages in the working set
1055 (current_task()->dynamic_working_set
!= 0)) {
1056 cluster_end
= cluster_start
+ length
;
1057 /* tws values for start and end are just a
1058 * suggestions. Therefore, as long as
1059 * build_cluster does not use pointers or
1060 * take action based on values that
1061 * could be affected by re-entrance we
1062 * do not need to take the map lock.
1064 cluster_end
= offset
+ PAGE_SIZE_64
;
1065 tws_build_cluster((tws_hash_t
)
1066 current_task()->dynamic_working_set
,
1067 object
, &cluster_start
,
1068 &cluster_end
, 0x40000);
1069 length
= cluster_end
- cluster_start
;
1072 dbgTrace(0xBEEF0012, (unsigned int) object
, (unsigned int) 0); /* (TEST/DEBUG) */
1075 * We have a busy page, so we can
1076 * release the object lock.
1078 vm_object_unlock(object
);
1081 * Call the memory manager to retrieve the data.
1085 *type_of_fault
= (length
<< 8) | DBG_PAGEIN_FAULT
;
1087 current_task()->pageins
++;
1088 bumped_pagein
= TRUE
;
1091 * If this object uses a copy_call strategy,
1092 * and we are interested in a copy of this object
1093 * (having gotten here only by following a
1094 * shadow chain), then tell the memory manager
1095 * via a flag added to the desired_access
1096 * parameter, so that it can detect a race
1097 * between our walking down the shadow chain
1098 * and its pushing pages up into a copy of
1099 * the object that it manages.
1102 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_CALL
&&
1103 object
!= first_object
) {
1104 wants_copy_flag
= VM_PROT_WANTS_COPY
;
1106 wants_copy_flag
= VM_PROT_NONE
;
1110 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1111 (integer_t
)object
, offset
, (integer_t
)m
,
1112 access_required
| wants_copy_flag
, 0);
1114 rc
= memory_object_data_request(object
->pager
,
1115 cluster_start
+ object
->paging_offset
,
1117 access_required
| wants_copy_flag
);
1121 dbgTrace(0xBEEF0013, (unsigned int) object
, (unsigned int) rc
); /* (TEST/DEBUG) */
1123 if (rc
!= KERN_SUCCESS
) {
1124 if (rc
!= MACH_SEND_INTERRUPTED
1126 printf("%s(0x%x, 0x%x, 0x%x, 0x%x) failed, rc=%d\n",
1127 "memory_object_data_request",
1129 cluster_start
+ object
->paging_offset
,
1130 length
, access_required
, rc
);
1132 * Don't want to leave a busy page around,
1133 * but the data request may have blocked,
1134 * so check if it's still there and busy.
1136 if(!object
->phys_contiguous
) {
1137 vm_object_lock(object
);
1138 for (; length
; length
-= PAGE_SIZE
,
1139 cluster_start
+= PAGE_SIZE_64
) {
1141 if ((p
= vm_page_lookup(object
,
1143 && p
->absent
&& p
->busy
1149 vm_fault_cleanup(object
, first_m
);
1150 thread_interrupt_level(interruptible_state
);
1151 return((rc
== MACH_SEND_INTERRUPTED
) ?
1152 VM_FAULT_INTERRUPTED
:
1153 VM_FAULT_MEMORY_ERROR
);
1156 tws_hash_line_t line
;
1159 task
= current_task();
1162 (task
->dynamic_working_set
!= 0))
1163 && !(object
->private)) {
1164 vm_object_t base_object
;
1165 vm_object_offset_t base_offset
;
1166 base_object
= object
;
1167 base_offset
= offset
;
1168 while(base_object
->shadow
) {
1170 base_object
->shadow_offset
;
1172 base_object
->shadow
;
1176 task
->dynamic_working_set
,
1177 base_offset
, base_object
,
1178 &line
) == KERN_SUCCESS
) {
1179 tws_line_signal((tws_hash_t
)
1180 task
->dynamic_working_set
,
1188 * Retry with same object/offset, since new data may
1189 * be in a different page (i.e., m is meaningless at
1192 vm_object_lock(object
);
1193 if ((interruptible
!= THREAD_UNINT
) &&
1194 (current_thread()->state
& TH_ABORT
)) {
1195 vm_fault_cleanup(object
, first_m
);
1196 thread_interrupt_level(interruptible_state
);
1197 return(VM_FAULT_INTERRUPTED
);
1199 if(m
== VM_PAGE_NULL
)
1205 * The only case in which we get here is if
1206 * object has no pager (or unwiring). If the pager doesn't
1207 * have the page this is handled in the m->absent case above
1208 * (and if you change things here you should look above).
1211 dbgTrace(0xBEEF0014, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1213 if (object
== first_object
)
1216 assert(m
== VM_PAGE_NULL
);
1219 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1220 (integer_t
)object
, offset
, (integer_t
)m
,
1221 (integer_t
)object
->shadow
, 0);
1223 * Move on to the next object. Lock the next
1224 * object before unlocking the current one.
1226 next_object
= object
->shadow
;
1227 if (next_object
== VM_OBJECT_NULL
) {
1228 assert(!must_be_resident
);
1230 * If there's no object left, fill the page
1231 * in the top object with zeros. But first we
1232 * need to allocate a real page.
1235 if (object
!= first_object
) {
1236 vm_object_paging_end(object
);
1237 vm_object_unlock(object
);
1239 object
= first_object
;
1240 offset
= first_offset
;
1241 vm_object_lock(object
);
1245 assert(m
->object
== object
);
1246 first_m
= VM_PAGE_NULL
;
1248 if (object
->shadow_severed
) {
1250 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1251 thread_interrupt_level(interruptible_state
);
1252 return VM_FAULT_MEMORY_ERROR
;
1255 if (VM_PAGE_THROTTLED() ||
1256 (m
->fictitious
&& !vm_page_convert(m
))) {
1258 vm_fault_cleanup(object
, VM_PAGE_NULL
);
1259 thread_interrupt_level(interruptible_state
);
1260 return(VM_FAULT_MEMORY_SHORTAGE
);
1262 m
->no_isync
= FALSE
;
1264 if (!no_zero_fill
) {
1265 vm_object_unlock(object
);
1266 vm_page_zero_fill(m
);
1268 *type_of_fault
= DBG_ZERO_FILL_FAULT
;
1269 VM_STAT(zero_fill_count
++);
1271 if (bumped_pagein
== TRUE
) {
1273 current_task()->pageins
--;
1275 vm_object_lock(object
);
1277 vm_page_lock_queues();
1278 VM_PAGE_QUEUES_REMOVE(m
);
1279 if(m
->object
->size
> 0x80000) {
1280 m
->zero_fill
= TRUE
;
1281 /* depends on the queues lock */
1283 queue_enter(&vm_page_queue_zf
,
1284 m
, vm_page_t
, pageq
);
1287 &vm_page_queue_inactive
,
1288 m
, vm_page_t
, pageq
);
1290 m
->page_ticket
= vm_page_ticket
;
1291 vm_page_ticket_roll
++;
1292 if(vm_page_ticket_roll
== VM_PAGE_TICKETS_IN_ROLL
) {
1293 vm_page_ticket_roll
= 0;
1294 if(vm_page_ticket
==
1295 VM_PAGE_TICKET_ROLL_IDS
)
1301 vm_page_inactive_count
++;
1302 vm_page_unlock_queues();
1303 pmap_clear_modify(m
->phys_addr
);
1307 if ((object
!= first_object
) || must_be_resident
)
1308 vm_object_paging_end(object
);
1309 offset
+= object
->shadow_offset
;
1310 hi_offset
+= object
->shadow_offset
;
1311 lo_offset
+= object
->shadow_offset
;
1312 access_required
= VM_PROT_READ
;
1313 vm_object_lock(next_object
);
1314 vm_object_unlock(object
);
1315 object
= next_object
;
1316 vm_object_paging_begin(object
);
1321 * PAGE HAS BEEN FOUND.
1324 * busy, so that we can play with it;
1325 * not absent, so that nobody else will fill it;
1326 * possibly eligible for pageout;
1328 * The top-level page (first_m) is:
1329 * VM_PAGE_NULL if the page was found in the
1331 * busy, not absent, and ineligible for pageout.
1333 * The current object (object) is locked. A paging
1334 * reference is held for the current and top-level
1339 dbgTrace(0xBEEF0015, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1341 #if EXTRA_ASSERTIONS
1342 if(m
!= VM_PAGE_NULL
) {
1343 assert(m
->busy
&& !m
->absent
);
1344 assert((first_m
== VM_PAGE_NULL
) ||
1345 (first_m
->busy
&& !first_m
->absent
&&
1346 !first_m
->active
&& !first_m
->inactive
));
1348 #endif /* EXTRA_ASSERTIONS */
1351 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1352 (integer_t
)object
, offset
, (integer_t
)m
,
1353 (integer_t
)first_object
, (integer_t
)first_m
);
1355 * If the page is being written, but isn't
1356 * already owned by the top-level object,
1357 * we have to copy it into a new page owned
1358 * by the top-level object.
1361 if ((object
!= first_object
) && (m
!= VM_PAGE_NULL
)) {
1363 * We only really need to copy if we
1368 dbgTrace(0xBEEF0016, (unsigned int) object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1370 if (fault_type
& VM_PROT_WRITE
) {
1373 assert(!must_be_resident
);
1376 * If we try to collapse first_object at this
1377 * point, we may deadlock when we try to get
1378 * the lock on an intermediate object (since we
1379 * have the bottom object locked). We can't
1380 * unlock the bottom object, because the page
1381 * we found may move (by collapse) if we do.
1383 * Instead, we first copy the page. Then, when
1384 * we have no more use for the bottom object,
1385 * we unlock it and try to collapse.
1387 * Note that we copy the page even if we didn't
1388 * need to... that's the breaks.
1392 * Allocate a page for the copy
1394 copy_m
= vm_page_grab();
1395 if (copy_m
== VM_PAGE_NULL
) {
1397 vm_fault_cleanup(object
, first_m
);
1398 thread_interrupt_level(interruptible_state
);
1399 return(VM_FAULT_MEMORY_SHORTAGE
);
1404 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1405 (integer_t
)object
, offset
,
1406 (integer_t
)m
, (integer_t
)copy_m
, 0);
1407 vm_page_copy(m
, copy_m
);
1410 * If another map is truly sharing this
1411 * page with us, we have to flush all
1412 * uses of the original page, since we
1413 * can't distinguish those which want the
1414 * original from those which need the
1417 * XXXO If we know that only one map has
1418 * access to this page, then we could
1419 * avoid the pmap_page_protect() call.
1422 vm_page_lock_queues();
1423 assert(!m
->cleaning
);
1424 pmap_page_protect(m
->phys_addr
, VM_PROT_NONE
);
1425 vm_page_deactivate(m
);
1426 copy_m
->dirty
= TRUE
;
1428 * Setting reference here prevents this fault from
1429 * being counted as a (per-thread) reactivate as well
1430 * as a copy-on-write.
1432 first_m
->reference
= TRUE
;
1433 vm_page_unlock_queues();
1436 * We no longer need the old page or object.
1439 PAGE_WAKEUP_DONE(m
);
1440 vm_object_paging_end(object
);
1441 vm_object_unlock(object
);
1444 *type_of_fault
= DBG_COW_FAULT
;
1445 VM_STAT(cow_faults
++);
1446 current_task()->cow_faults
++;
1447 object
= first_object
;
1448 offset
= first_offset
;
1450 vm_object_lock(object
);
1451 VM_PAGE_FREE(first_m
);
1452 first_m
= VM_PAGE_NULL
;
1453 assert(copy_m
->busy
);
1454 vm_page_insert(copy_m
, object
, offset
);
1458 * Now that we've gotten the copy out of the
1459 * way, let's try to collapse the top object.
1460 * But we have to play ugly games with
1461 * paging_in_progress to do that...
1464 vm_object_paging_end(object
);
1465 vm_object_collapse(object
);
1466 vm_object_paging_begin(object
);
1470 *protection
&= (~VM_PROT_WRITE
);
1475 * Now check whether the page needs to be pushed into the
1476 * copy object. The use of asymmetric copy on write for
1477 * shared temporary objects means that we may do two copies to
1478 * satisfy the fault; one above to get the page from a
1479 * shadowed object, and one here to push it into the copy.
1482 while ((copy_object
= first_object
->copy
) != VM_OBJECT_NULL
&&
1483 (m
!= VM_PAGE_NULL
)) {
1484 vm_object_offset_t copy_offset
;
1488 dbgTrace(0xBEEF0017, (unsigned int) copy_object
, (unsigned int) fault_type
); /* (TEST/DEBUG) */
1491 * If the page is being written, but hasn't been
1492 * copied to the copy-object, we have to copy it there.
1495 if ((fault_type
& VM_PROT_WRITE
) == 0) {
1496 *protection
&= ~VM_PROT_WRITE
;
1501 * If the page was guaranteed to be resident,
1502 * we must have already performed the copy.
1505 if (must_be_resident
)
1509 * Try to get the lock on the copy_object.
1511 if (!vm_object_lock_try(copy_object
)) {
1512 vm_object_unlock(object
);
1514 mutex_pause(); /* wait a bit */
1516 vm_object_lock(object
);
1521 * Make another reference to the copy-object,
1522 * to keep it from disappearing during the
1525 assert(copy_object
->ref_count
> 0);
1526 copy_object
->ref_count
++;
1527 VM_OBJ_RES_INCR(copy_object
);
1530 * Does the page exist in the copy?
1532 copy_offset
= first_offset
- copy_object
->shadow_offset
;
1533 if (copy_object
->size
<= copy_offset
)
1535 * Copy object doesn't cover this page -- do nothing.
1539 vm_page_lookup(copy_object
, copy_offset
)) != VM_PAGE_NULL
) {
1540 /* Page currently exists in the copy object */
1543 * If the page is being brought
1544 * in, wait for it and then retry.
1547 /* take an extra ref so object won't die */
1548 assert(copy_object
->ref_count
> 0);
1549 copy_object
->ref_count
++;
1550 vm_object_res_reference(copy_object
);
1551 vm_object_unlock(copy_object
);
1552 vm_fault_cleanup(object
, first_m
);
1553 counter(c_vm_fault_page_block_backoff_kernel
++);
1554 vm_object_lock(copy_object
);
1555 assert(copy_object
->ref_count
> 0);
1556 VM_OBJ_RES_DECR(copy_object
);
1557 copy_object
->ref_count
--;
1558 assert(copy_object
->ref_count
> 0);
1559 copy_m
= vm_page_lookup(copy_object
, copy_offset
);
1560 if (copy_m
!= VM_PAGE_NULL
&& copy_m
->busy
) {
1561 PAGE_ASSERT_WAIT(copy_m
, interruptible
);
1562 vm_object_unlock(copy_object
);
1563 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1564 vm_object_deallocate(copy_object
);
1567 vm_object_unlock(copy_object
);
1568 vm_object_deallocate(copy_object
);
1569 thread_interrupt_level(interruptible_state
);
1570 return VM_FAULT_RETRY
;
1574 else if (!PAGED_OUT(copy_object
, copy_offset
)) {
1576 * If PAGED_OUT is TRUE, then the page used to exist
1577 * in the copy-object, and has already been paged out.
1578 * We don't need to repeat this. If PAGED_OUT is
1579 * FALSE, then either we don't know (!pager_created,
1580 * for example) or it hasn't been paged out.
1581 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1582 * We must copy the page to the copy object.
1586 * Allocate a page for the copy
1588 copy_m
= vm_page_alloc(copy_object
, copy_offset
);
1589 if (copy_m
== VM_PAGE_NULL
) {
1591 VM_OBJ_RES_DECR(copy_object
);
1592 copy_object
->ref_count
--;
1593 assert(copy_object
->ref_count
> 0);
1594 vm_object_unlock(copy_object
);
1595 vm_fault_cleanup(object
, first_m
);
1596 thread_interrupt_level(interruptible_state
);
1597 return(VM_FAULT_MEMORY_SHORTAGE
);
1601 * Must copy page into copy-object.
1604 vm_page_copy(m
, copy_m
);
1607 * If the old page was in use by any users
1608 * of the copy-object, it must be removed
1609 * from all pmaps. (We can't know which
1613 vm_page_lock_queues();
1614 assert(!m
->cleaning
);
1615 pmap_page_protect(m
->phys_addr
, VM_PROT_NONE
);
1616 copy_m
->dirty
= TRUE
;
1617 vm_page_unlock_queues();
1620 * If there's a pager, then immediately
1621 * page out this page, using the "initialize"
1622 * option. Else, we use the copy.
1627 ((!copy_object
->pager_created
) ||
1628 vm_external_state_get(
1629 copy_object
->existence_map
, copy_offset
)
1630 == VM_EXTERNAL_STATE_ABSENT
)
1632 (!copy_object
->pager_created
)
1635 vm_page_lock_queues();
1636 vm_page_activate(copy_m
);
1637 vm_page_unlock_queues();
1638 PAGE_WAKEUP_DONE(copy_m
);
1641 assert(copy_m
->busy
== TRUE
);
1644 * The page is already ready for pageout:
1645 * not on pageout queues and busy.
1646 * Unlock everything except the
1647 * copy_object itself.
1650 vm_object_unlock(object
);
1653 * Write the page to the copy-object,
1654 * flushing it from the kernel.
1657 vm_pageout_initialize_page(copy_m
);
1660 * Since the pageout may have
1661 * temporarily dropped the
1662 * copy_object's lock, we
1663 * check whether we'll have
1664 * to deallocate the hard way.
1667 if ((copy_object
->shadow
!= object
) ||
1668 (copy_object
->ref_count
== 1)) {
1669 vm_object_unlock(copy_object
);
1670 vm_object_deallocate(copy_object
);
1671 vm_object_lock(object
);
1676 * Pick back up the old object's
1677 * lock. [It is safe to do so,
1678 * since it must be deeper in the
1682 vm_object_lock(object
);
1686 * Because we're pushing a page upward
1687 * in the object tree, we must restart
1688 * any faults that are waiting here.
1689 * [Note that this is an expansion of
1690 * PAGE_WAKEUP that uses the THREAD_RESTART
1691 * wait result]. Can't turn off the page's
1692 * busy bit because we're not done with it.
1697 thread_wakeup_with_result((event_t
) m
,
1703 * The reference count on copy_object must be
1704 * at least 2: one for our extra reference,
1705 * and at least one from the outside world
1706 * (we checked that when we last locked
1709 copy_object
->ref_count
--;
1710 assert(copy_object
->ref_count
> 0);
1711 VM_OBJ_RES_DECR(copy_object
);
1712 vm_object_unlock(copy_object
);
1718 *top_page
= first_m
;
1721 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1722 (integer_t
)object
, offset
, (integer_t
)m
, (integer_t
)first_m
, 0);
1724 * If the page can be written, assume that it will be.
1725 * [Earlier, we restrict the permission to allow write
1726 * access only if the fault so required, so we don't
1727 * mark read-only data as dirty.]
1730 #if !VM_FAULT_STATIC_CONFIG
1731 if (vm_fault_dirty_handling
&& (*protection
& VM_PROT_WRITE
) &&
1732 (m
!= VM_PAGE_NULL
)) {
1737 dbgTrace(0xBEEF0018, (unsigned int) object
, (unsigned int) vm_page_deactivate_behind
); /* (TEST/DEBUG) */
1739 if (vm_page_deactivate_behind
) {
1740 if (offset
&& /* don't underflow */
1741 (object
->last_alloc
== (offset
- PAGE_SIZE_64
))) {
1742 m
= vm_page_lookup(object
, object
->last_alloc
);
1743 if ((m
!= VM_PAGE_NULL
) && !m
->busy
) {
1744 vm_page_lock_queues();
1745 vm_page_deactivate(m
);
1746 vm_page_unlock_queues();
1749 dbgTrace(0xBEEF0019, (unsigned int) object
, (unsigned int) m
); /* (TEST/DEBUG) */
1752 object
->last_alloc
= offset
;
1755 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS
, 0); /* (TEST/DEBUG) */
1757 thread_interrupt_level(interruptible_state
);
1758 if(*result_page
== VM_PAGE_NULL
) {
1759 vm_object_unlock(object
);
1761 return(VM_FAULT_SUCCESS
);
1765 vm_fault_cleanup(object
, first_m
);
1767 counter(c_vm_fault_page_block_backoff_kernel
++);
1768 thread_block(THREAD_CONTINUE_NULL
);
1772 thread_interrupt_level(interruptible_state
);
1773 if (wait_result
== THREAD_INTERRUPTED
)
1774 return VM_FAULT_INTERRUPTED
;
1775 return VM_FAULT_RETRY
;
1783 * Handle page faults, including pseudo-faults
1784 * used to change the wiring status of pages.
1786 * Explicit continuations have been removed.
1788 * vm_fault and vm_fault_page save mucho state
1789 * in the moral equivalent of a closure. The state
1790 * structure is allocated when first entering vm_fault
1791 * and deallocated when leaving vm_fault.
1798 vm_prot_t fault_type
,
1799 boolean_t change_wiring
,
1802 vm_offset_t caller_pmap_addr
)
1804 vm_map_version_t version
; /* Map version for verificiation */
1805 boolean_t wired
; /* Should mapping be wired down? */
1806 vm_object_t object
; /* Top-level object */
1807 vm_object_offset_t offset
; /* Top-level offset */
1808 vm_prot_t prot
; /* Protection for mapping */
1809 vm_behavior_t behavior
; /* Expected paging behavior */
1810 vm_object_offset_t lo_offset
, hi_offset
;
1811 vm_object_t old_copy_object
; /* Saved copy object */
1812 vm_page_t result_page
; /* Result of vm_fault_page */
1813 vm_page_t top_page
; /* Placeholder page */
1817 vm_page_t m
; /* Fast access to result_page */
1818 kern_return_t error_code
; /* page error reasons */
1820 vm_object_t cur_object
;
1822 vm_object_offset_t cur_offset
;
1824 vm_object_t new_object
;
1826 vm_map_t pmap_map
= map
;
1827 vm_map_t original_map
= map
;
1829 boolean_t funnel_set
= FALSE
;
1831 thread_t cur_thread
;
1832 boolean_t interruptible_state
;
1833 unsigned int cache_attr
;
1834 int write_startup_file
= 0;
1835 vm_prot_t full_fault_type
;
1839 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_START
,
1846 cur_thread
= current_thread();
1847 /* at present we do not fully check for execute permission */
1848 /* we generally treat it is read except in certain device */
1849 /* memory settings */
1850 full_fault_type
= fault_type
;
1851 if(fault_type
& VM_PROT_EXECUTE
) {
1852 fault_type
&= ~VM_PROT_EXECUTE
;
1853 fault_type
|= VM_PROT_READ
;
1856 interruptible_state
= thread_interrupt_level(interruptible
);
1859 * assume we will hit a page in the cache
1860 * otherwise, explicitly override with
1861 * the real fault type once we determine it
1863 type_of_fault
= DBG_CACHE_HIT_FAULT
;
1866 current_task()->faults
++;
1869 * drop funnel if it is already held. Then restore while returning
1871 if ((cur_thread
->funnel_state
& TH_FN_OWNED
) == TH_FN_OWNED
) {
1873 curflock
= cur_thread
->funnel_lock
;
1874 thread_funnel_set( curflock
, FALSE
);
1880 * Find the backing store object and offset into
1881 * it to begin the search.
1884 vm_map_lock_read(map
);
1885 kr
= vm_map_lookup_locked(&map
, vaddr
, fault_type
, &version
,
1888 &behavior
, &lo_offset
, &hi_offset
, &pmap_map
);
1890 pmap
= pmap_map
->pmap
;
1892 if (kr
!= KERN_SUCCESS
) {
1893 vm_map_unlock_read(map
);
1898 * If the page is wired, we must fault for the current protection
1899 * value, to avoid further faults.
1903 fault_type
= prot
| VM_PROT_WRITE
;
1905 #if VM_FAULT_CLASSIFY
1907 * Temporary data gathering code
1909 vm_fault_classify(object
, offset
, fault_type
);
1912 * Fast fault code. The basic idea is to do as much as
1913 * possible while holding the map lock and object locks.
1914 * Busy pages are not used until the object lock has to
1915 * be dropped to do something (copy, zero fill, pmap enter).
1916 * Similarly, paging references aren't acquired until that
1917 * point, and object references aren't used.
1919 * If we can figure out what to do
1920 * (zero fill, copy on write, pmap enter) while holding
1921 * the locks, then it gets done. Otherwise, we give up,
1922 * and use the original fault path (which doesn't hold
1923 * the map lock, and relies on busy pages).
1924 * The give up cases include:
1925 * - Have to talk to pager.
1926 * - Page is busy, absent or in error.
1927 * - Pager has locked out desired access.
1928 * - Fault needs to be restarted.
1929 * - Have to push page into copy object.
1931 * The code is an infinite loop that moves one level down
1932 * the shadow chain each time. cur_object and cur_offset
1933 * refer to the current object being examined. object and offset
1934 * are the original object from the map. The loop is at the
1935 * top level if and only if object and cur_object are the same.
1937 * Invariants: Map lock is held throughout. Lock is held on
1938 * original object and cur_object (if different) when
1939 * continuing or exiting loop.
1945 * If this page is to be inserted in a copy delay object
1946 * for writing, and if the object has a copy, then the
1947 * copy delay strategy is implemented in the slow fault page.
1949 if (object
->copy_strategy
!= MEMORY_OBJECT_COPY_DELAY
||
1950 object
->copy
== VM_OBJECT_NULL
||
1951 (fault_type
& VM_PROT_WRITE
) == 0) {
1952 cur_object
= object
;
1953 cur_offset
= offset
;
1956 m
= vm_page_lookup(cur_object
, cur_offset
);
1957 if (m
!= VM_PAGE_NULL
) {
1959 wait_result_t result
;
1961 if (object
!= cur_object
)
1962 vm_object_unlock(object
);
1964 vm_map_unlock_read(map
);
1965 if (pmap_map
!= map
)
1966 vm_map_unlock(pmap_map
);
1968 #if !VM_FAULT_STATIC_CONFIG
1969 if (!vm_fault_interruptible
)
1970 interruptible
= THREAD_UNINT
;
1972 result
= PAGE_ASSERT_WAIT(m
, interruptible
);
1974 vm_object_unlock(cur_object
);
1976 if (result
== THREAD_WAITING
) {
1977 result
= thread_block(THREAD_CONTINUE_NULL
);
1979 counter(c_vm_fault_page_block_busy_kernel
++);
1981 if (result
== THREAD_AWAKENED
|| result
== THREAD_RESTART
)
1987 if (m
->unusual
&& (m
->error
|| m
->restart
|| m
->private
1988 || m
->absent
|| (fault_type
& m
->page_lock
))) {
1991 * Unusual case. Give up.
1997 * Two cases of map in faults:
1998 * - At top level w/o copy object.
1999 * - Read fault anywhere.
2000 * --> must disallow write.
2003 if (object
== cur_object
&&
2004 object
->copy
== VM_OBJECT_NULL
)
2005 goto FastMapInFault
;
2007 if ((fault_type
& VM_PROT_WRITE
) == 0) {
2009 prot
&= ~VM_PROT_WRITE
;
2012 * Set up to map the page ...
2013 * mark the page busy, drop
2014 * locks and take a paging reference
2015 * on the object with the page.
2018 if (object
!= cur_object
) {
2019 vm_object_unlock(object
);
2020 object
= cur_object
;
2025 vm_object_paging_begin(object
);
2029 * Check a couple of global reasons to
2030 * be conservative about write access.
2031 * Then do the pmap_enter.
2033 #if !VM_FAULT_STATIC_CONFIG
2034 if (vm_fault_dirty_handling
2036 || db_watchpoint_list
2038 && (fault_type
& VM_PROT_WRITE
) == 0)
2039 prot
&= ~VM_PROT_WRITE
;
2040 #else /* STATIC_CONFIG */
2042 if (db_watchpoint_list
2043 && (fault_type
& VM_PROT_WRITE
) == 0)
2044 prot
&= ~VM_PROT_WRITE
;
2045 #endif /* MACH_KDB */
2046 #endif /* STATIC_CONFIG */
2047 if (m
->no_isync
== TRUE
) {
2048 pmap_sync_caches_phys(m
->phys_addr
);
2049 m
->no_isync
= FALSE
;
2052 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
2054 PMAP_ENTER(caller_pmap
,
2055 caller_pmap_addr
, m
,
2056 prot
, cache_attr
, wired
);
2058 PMAP_ENTER(pmap
, vaddr
, m
,
2059 prot
, cache_attr
, wired
);
2063 * Grab the queues lock to manipulate
2064 * the page queues. Change wiring
2065 * case is obvious. In soft ref bits
2066 * case activate page only if it fell
2067 * off paging queues, otherwise just
2068 * activate it if it's inactive.
2070 * NOTE: original vm_fault code will
2071 * move active page to back of active
2072 * queue. This code doesn't.
2074 vm_page_lock_queues();
2077 vm_pagein_cluster_used
++;
2078 m
->clustered
= FALSE
;
2080 m
->reference
= TRUE
;
2082 if (change_wiring
) {
2088 #if VM_FAULT_STATIC_CONFIG
2090 if (!m
->active
&& !m
->inactive
)
2091 vm_page_activate(m
);
2094 else if (software_reference_bits
) {
2095 if (!m
->active
&& !m
->inactive
)
2096 vm_page_activate(m
);
2098 else if (!m
->active
) {
2099 vm_page_activate(m
);
2102 vm_page_unlock_queues();
2105 * That's it, clean up and return.
2107 PAGE_WAKEUP_DONE(m
);
2108 vm_object_paging_end(object
);
2111 tws_hash_line_t line
;
2114 task
= current_task();
2116 (task
->dynamic_working_set
!= 0) &&
2117 !(object
->private)) {
2119 vm_object_t base_object
;
2120 vm_object_offset_t base_offset
;
2121 base_object
= object
;
2122 base_offset
= cur_offset
;
2123 while(base_object
->shadow
) {
2125 base_object
->shadow_offset
;
2127 base_object
->shadow
;
2129 kr
= tws_lookup((tws_hash_t
)
2130 task
->dynamic_working_set
,
2131 base_offset
, base_object
,
2133 if(kr
== KERN_OPERATION_TIMED_OUT
){
2134 write_startup_file
= 1;
2135 } else if (kr
!= KERN_SUCCESS
) {
2136 kr
= tws_insert((tws_hash_t
)
2137 task
->dynamic_working_set
,
2138 base_offset
, base_object
,
2140 if(kr
== KERN_NO_SPACE
) {
2141 vm_object_unlock(object
);
2143 tws_expand_working_set(
2144 task
->dynamic_working_set
,
2145 TWS_HASH_LINE_COUNT
,
2148 vm_object_lock(object
);
2151 KERN_OPERATION_TIMED_OUT
) {
2152 write_startup_file
= 1;
2157 vm_object_unlock(object
);
2159 vm_map_unlock_read(map
);
2161 vm_map_unlock(pmap_map
);
2163 if(write_startup_file
)
2164 tws_send_startup_info(current_task());
2167 thread_funnel_set( curflock
, TRUE
);
2169 thread_interrupt_level(interruptible_state
);
2172 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
2174 type_of_fault
& 0xff,
2179 return KERN_SUCCESS
;
2183 * Copy on write fault. If objects match, then
2184 * object->copy must not be NULL (else control
2185 * would be in previous code block), and we
2186 * have a potential push into the copy object
2187 * with which we won't cope here.
2190 if (cur_object
== object
)
2193 * This is now a shadow based copy on write
2194 * fault -- it requires a copy up the shadow
2197 * Allocate a page in the original top level
2198 * object. Give up if allocate fails. Also
2199 * need to remember current page, as it's the
2200 * source of the copy.
2204 if (m
== VM_PAGE_NULL
) {
2208 * Now do the copy. Mark the source busy
2209 * and take out paging references on both
2212 * NOTE: This code holds the map lock across
2217 vm_page_copy(cur_m
, m
);
2218 vm_page_insert(m
, object
, offset
);
2220 vm_object_paging_begin(cur_object
);
2221 vm_object_paging_begin(object
);
2223 type_of_fault
= DBG_COW_FAULT
;
2224 VM_STAT(cow_faults
++);
2225 current_task()->cow_faults
++;
2228 * Now cope with the source page and object
2229 * If the top object has a ref count of 1
2230 * then no other map can access it, and hence
2231 * it's not necessary to do the pmap_page_protect.
2235 vm_page_lock_queues();
2236 vm_page_deactivate(cur_m
);
2238 pmap_page_protect(cur_m
->phys_addr
,
2240 vm_page_unlock_queues();
2242 PAGE_WAKEUP_DONE(cur_m
);
2243 vm_object_paging_end(cur_object
);
2244 vm_object_unlock(cur_object
);
2247 * Slight hack to call vm_object collapse
2248 * and then reuse common map in code.
2249 * note that the object lock was taken above.
2252 vm_object_paging_end(object
);
2253 vm_object_collapse(object
);
2254 vm_object_paging_begin(object
);
2261 * No page at cur_object, cur_offset
2264 if (cur_object
->pager_created
) {
2267 * Have to talk to the pager. Give up.
2273 if (cur_object
->shadow
== VM_OBJECT_NULL
) {
2275 if (cur_object
->shadow_severed
) {
2276 vm_object_paging_end(object
);
2277 vm_object_unlock(object
);
2278 vm_map_unlock_read(map
);
2280 vm_map_unlock(pmap_map
);
2282 if(write_startup_file
)
2283 tws_send_startup_info(
2287 thread_funnel_set( curflock
, TRUE
);
2290 thread_interrupt_level(interruptible_state
);
2292 return VM_FAULT_MEMORY_ERROR
;
2296 * Zero fill fault. Page gets
2297 * filled in top object. Insert
2298 * page, then drop any lower lock.
2299 * Give up if no page.
2301 if ((vm_page_free_target
-
2302 ((vm_page_free_target
-vm_page_free_min
)>>2))
2303 > vm_page_free_count
) {
2306 m
= vm_page_alloc(object
, offset
);
2307 if (m
== VM_PAGE_NULL
) {
2311 * This is a zero-fill or initial fill
2312 * page fault. As such, we consider it
2313 * undefined with respect to instruction
2314 * execution. i.e. it is the responsibility
2315 * of higher layers to call for an instruction
2316 * sync after changing the contents and before
2317 * sending a program into this area. We
2318 * choose this approach for performance
2321 m
->no_isync
= FALSE
;
2323 if (cur_object
!= object
)
2324 vm_object_unlock(cur_object
);
2326 vm_object_paging_begin(object
);
2327 vm_object_unlock(object
);
2330 * Now zero fill page and map it.
2331 * the page is probably going to
2332 * be written soon, so don't bother
2333 * to clear the modified bit
2335 * NOTE: This code holds the map
2336 * lock across the zero fill.
2339 if (!map
->no_zero_fill
) {
2340 vm_page_zero_fill(m
);
2341 type_of_fault
= DBG_ZERO_FILL_FAULT
;
2342 VM_STAT(zero_fill_count
++);
2344 vm_page_lock_queues();
2345 VM_PAGE_QUEUES_REMOVE(m
);
2347 m
->page_ticket
= vm_page_ticket
;
2348 if(m
->object
->size
> 0x80000) {
2349 m
->zero_fill
= TRUE
;
2350 /* depends on the queues lock */
2352 queue_enter(&vm_page_queue_zf
,
2353 m
, vm_page_t
, pageq
);
2356 &vm_page_queue_inactive
,
2357 m
, vm_page_t
, pageq
);
2359 vm_page_ticket_roll
++;
2360 if(vm_page_ticket_roll
==
2361 VM_PAGE_TICKETS_IN_ROLL
) {
2362 vm_page_ticket_roll
= 0;
2363 if(vm_page_ticket
==
2364 VM_PAGE_TICKET_ROLL_IDS
)
2371 vm_page_inactive_count
++;
2372 vm_page_unlock_queues();
2373 vm_object_lock(object
);
2379 * On to the next level
2382 cur_offset
+= cur_object
->shadow_offset
;
2383 new_object
= cur_object
->shadow
;
2384 vm_object_lock(new_object
);
2385 if (cur_object
!= object
)
2386 vm_object_unlock(cur_object
);
2387 cur_object
= new_object
;
2394 * Cleanup from fast fault failure. Drop any object
2395 * lock other than original and drop map lock.
2398 if (object
!= cur_object
)
2399 vm_object_unlock(cur_object
);
2401 vm_map_unlock_read(map
);
2404 vm_map_unlock(pmap_map
);
2407 * Make a reference to this object to
2408 * prevent its disposal while we are messing with
2409 * it. Once we have the reference, the map is free
2410 * to be diddled. Since objects reference their
2411 * shadows (and copies), they will stay around as well.
2414 assert(object
->ref_count
> 0);
2415 object
->ref_count
++;
2416 vm_object_res_reference(object
);
2417 vm_object_paging_begin(object
);
2419 XPR(XPR_VM_FAULT
,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2421 tws_hash_line_t line
;
2425 task
= current_task();
2427 (task
->dynamic_working_set
!= 0)
2428 && !(object
->private)) {
2429 vm_object_t base_object
;
2430 vm_object_offset_t base_offset
;
2431 base_object
= object
;
2432 base_offset
= offset
;
2433 while(base_object
->shadow
) {
2435 base_object
->shadow_offset
;
2437 base_object
->shadow
;
2439 kr
= tws_lookup((tws_hash_t
)
2440 task
->dynamic_working_set
,
2441 base_offset
, base_object
,
2443 if(kr
== KERN_OPERATION_TIMED_OUT
){
2444 write_startup_file
= 1;
2445 } else if (kr
!= KERN_SUCCESS
) {
2446 tws_insert((tws_hash_t
)
2447 task
->dynamic_working_set
,
2448 base_offset
, base_object
,
2450 kr
= tws_insert((tws_hash_t
)
2451 task
->dynamic_working_set
,
2452 base_offset
, base_object
,
2454 if(kr
== KERN_NO_SPACE
) {
2455 vm_object_unlock(object
);
2456 tws_expand_working_set(
2457 task
->dynamic_working_set
,
2458 TWS_HASH_LINE_COUNT
,
2460 vm_object_lock(object
);
2462 if(kr
== KERN_OPERATION_TIMED_OUT
) {
2463 write_startup_file
= 1;
2468 kr
= vm_fault_page(object
, offset
, fault_type
,
2469 (change_wiring
&& !wired
),
2471 lo_offset
, hi_offset
, behavior
,
2472 &prot
, &result_page
, &top_page
,
2474 &error_code
, map
->no_zero_fill
, FALSE
, map
, vaddr
);
2477 * If we didn't succeed, lose the object reference immediately.
2480 if (kr
!= VM_FAULT_SUCCESS
)
2481 vm_object_deallocate(object
);
2484 * See why we failed, and take corrective action.
2488 case VM_FAULT_SUCCESS
:
2490 case VM_FAULT_MEMORY_SHORTAGE
:
2491 if (vm_page_wait((change_wiring
) ?
2496 case VM_FAULT_INTERRUPTED
:
2499 case VM_FAULT_RETRY
:
2501 case VM_FAULT_FICTITIOUS_SHORTAGE
:
2502 vm_page_more_fictitious();
2504 case VM_FAULT_MEMORY_ERROR
:
2508 kr
= KERN_MEMORY_ERROR
;
2514 if(m
!= VM_PAGE_NULL
) {
2515 assert((change_wiring
&& !wired
) ?
2516 (top_page
== VM_PAGE_NULL
) :
2517 ((top_page
== VM_PAGE_NULL
) == (m
->object
== object
)));
2521 * How to clean up the result of vm_fault_page. This
2522 * happens whether the mapping is entered or not.
2525 #define UNLOCK_AND_DEALLOCATE \
2527 vm_fault_cleanup(m->object, top_page); \
2528 vm_object_deallocate(object); \
2532 * What to do with the resulting page from vm_fault_page
2533 * if it doesn't get entered into the physical map:
2536 #define RELEASE_PAGE(m) \
2538 PAGE_WAKEUP_DONE(m); \
2539 vm_page_lock_queues(); \
2540 if (!m->active && !m->inactive) \
2541 vm_page_activate(m); \
2542 vm_page_unlock_queues(); \
2546 * We must verify that the maps have not changed
2547 * since our last lookup.
2550 if(m
!= VM_PAGE_NULL
) {
2551 old_copy_object
= m
->object
->copy
;
2552 vm_object_unlock(m
->object
);
2554 old_copy_object
= VM_OBJECT_NULL
;
2556 if ((map
!= original_map
) || !vm_map_verify(map
, &version
)) {
2557 vm_object_t retry_object
;
2558 vm_object_offset_t retry_offset
;
2559 vm_prot_t retry_prot
;
2562 * To avoid trying to write_lock the map while another
2563 * thread has it read_locked (in vm_map_pageable), we
2564 * do not try for write permission. If the page is
2565 * still writable, we will get write permission. If it
2566 * is not, or has been marked needs_copy, we enter the
2567 * mapping without write permission, and will merely
2568 * take another fault.
2571 vm_map_lock_read(map
);
2572 kr
= vm_map_lookup_locked(&map
, vaddr
,
2573 fault_type
& ~VM_PROT_WRITE
, &version
,
2574 &retry_object
, &retry_offset
, &retry_prot
,
2575 &wired
, &behavior
, &lo_offset
, &hi_offset
,
2577 pmap
= pmap_map
->pmap
;
2579 if (kr
!= KERN_SUCCESS
) {
2580 vm_map_unlock_read(map
);
2581 if(m
!= VM_PAGE_NULL
) {
2582 vm_object_lock(m
->object
);
2584 UNLOCK_AND_DEALLOCATE
;
2586 vm_object_deallocate(object
);
2591 vm_object_unlock(retry_object
);
2592 if(m
!= VM_PAGE_NULL
) {
2593 vm_object_lock(m
->object
);
2595 vm_object_lock(object
);
2598 if ((retry_object
!= object
) ||
2599 (retry_offset
!= offset
)) {
2600 vm_map_unlock_read(map
);
2602 vm_map_unlock(pmap_map
);
2603 if(m
!= VM_PAGE_NULL
) {
2605 UNLOCK_AND_DEALLOCATE
;
2607 vm_object_deallocate(object
);
2613 * Check whether the protection has changed or the object
2614 * has been copied while we left the map unlocked.
2617 if(m
!= VM_PAGE_NULL
) {
2618 vm_object_unlock(m
->object
);
2620 vm_object_unlock(object
);
2623 if(m
!= VM_PAGE_NULL
) {
2624 vm_object_lock(m
->object
);
2626 vm_object_lock(object
);
2630 * If the copy object changed while the top-level object
2631 * was unlocked, then we must take away write permission.
2634 if(m
!= VM_PAGE_NULL
) {
2635 if (m
->object
->copy
!= old_copy_object
)
2636 prot
&= ~VM_PROT_WRITE
;
2640 * If we want to wire down this page, but no longer have
2641 * adequate permissions, we must start all over.
2644 if (wired
&& (fault_type
!= (prot
|VM_PROT_WRITE
))) {
2645 vm_map_verify_done(map
, &version
);
2647 vm_map_unlock(pmap_map
);
2648 if(m
!= VM_PAGE_NULL
) {
2650 UNLOCK_AND_DEALLOCATE
;
2652 vm_object_deallocate(object
);
2658 * Put this page into the physical map.
2659 * We had to do the unlock above because pmap_enter
2660 * may cause other faults. The page may be on
2661 * the pageout queues. If the pageout daemon comes
2662 * across the page, it will remove it from the queues.
2664 if (m
!= VM_PAGE_NULL
) {
2665 if (m
->no_isync
== TRUE
) {
2666 pmap_sync_caches_phys(m
->phys_addr
);
2668 m
->no_isync
= FALSE
;
2671 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
2674 PMAP_ENTER(caller_pmap
,
2675 caller_pmap_addr
, m
,
2676 prot
, cache_attr
, wired
);
2678 PMAP_ENTER(pmap
, vaddr
, m
,
2679 prot
, cache_attr
, wired
);
2682 tws_hash_line_t line
;
2686 task
= current_task();
2688 (task
->dynamic_working_set
!= 0)
2689 && (object
->private)) {
2690 vm_object_t base_object
;
2691 vm_object_offset_t base_offset
;
2692 base_object
= m
->object
;
2693 base_offset
= m
->offset
;
2694 while(base_object
->shadow
) {
2696 base_object
->shadow_offset
;
2698 base_object
->shadow
;
2700 kr
= tws_lookup((tws_hash_t
)
2701 task
->dynamic_working_set
,
2702 base_offset
, base_object
, &line
);
2703 if(kr
== KERN_OPERATION_TIMED_OUT
){
2704 write_startup_file
= 1;
2705 } else if (kr
!= KERN_SUCCESS
) {
2706 tws_insert((tws_hash_t
)
2707 task
->dynamic_working_set
,
2708 base_offset
, base_object
,
2710 kr
= tws_insert((tws_hash_t
)
2711 task
->dynamic_working_set
,
2712 base_offset
, base_object
,
2714 if(kr
== KERN_NO_SPACE
) {
2715 vm_object_unlock(m
->object
);
2716 tws_expand_working_set(
2717 task
->dynamic_working_set
,
2718 TWS_HASH_LINE_COUNT
,
2720 vm_object_lock(m
->object
);
2722 if(kr
== KERN_OPERATION_TIMED_OUT
) {
2723 write_startup_file
= 1;
2732 struct phys_entry
*pp
;
2733 vm_map_entry_t entry
;
2735 vm_offset_t ldelta
, hdelta
;
2738 * do a pmap block mapping from the physical address
2741 if(pp
= pmap_find_physentry(
2742 (vm_offset_t
)object
->shadow_offset
)) {
2743 memattr
= ((pp
->pte1
& 0x00000078) >> 3);
2745 memattr
= VM_WIMG_MASK
& (int)object
->wimg_bits
;
2749 /* While we do not worry about execution protection in */
2750 /* general, we may be able to read device memory and */
2751 /* still not be able to execute it. Here we check for */
2752 /* the guarded bit. If its set and we are attempting */
2753 /* to execute, we return with a protection failure. */
2755 if((memattr
& VM_MEM_GUARDED
) &&
2756 (full_fault_type
& VM_PROT_EXECUTE
)) {
2757 vm_map_verify_done(map
, &version
);
2759 vm_map_unlock(pmap_map
);
2760 vm_fault_cleanup(object
, top_page
);
2761 vm_object_deallocate(object
);
2762 kr
= KERN_PROTECTION_FAILURE
;
2768 if(pmap_map
!= map
) {
2769 vm_map_unlock(pmap_map
);
2771 if (original_map
!= map
) {
2772 vm_map_unlock_read(map
);
2773 vm_map_lock_read(original_map
);
2779 hdelta
= 0xFFFFF000;
2780 ldelta
= 0xFFFFF000;
2783 while(vm_map_lookup_entry(map
, laddr
, &entry
)) {
2784 if(ldelta
> (laddr
- entry
->vme_start
))
2785 ldelta
= laddr
- entry
->vme_start
;
2786 if(hdelta
> (entry
->vme_end
- laddr
))
2787 hdelta
= entry
->vme_end
- laddr
;
2788 if(entry
->is_sub_map
) {
2790 laddr
= (laddr
- entry
->vme_start
)
2792 vm_map_lock_read(entry
->object
.sub_map
);
2794 vm_map_unlock_read(map
);
2795 if(entry
->use_pmap
) {
2796 vm_map_unlock_read(pmap_map
);
2797 pmap_map
= entry
->object
.sub_map
;
2799 map
= entry
->object
.sub_map
;
2806 if(vm_map_lookup_entry(map
, laddr
, &entry
) &&
2807 (entry
->object
.vm_object
!= NULL
) &&
2808 (entry
->object
.vm_object
== object
)) {
2812 pmap_map_block(caller_pmap
,
2813 caller_pmap_addr
- ldelta
,
2815 (entry
->object
.vm_object
->shadow_offset
))
2817 (laddr
- entry
->vme_start
) - ldelta
,
2818 ldelta
+ hdelta
, prot
,
2819 memattr
, 0); /* Set up a block mapped area */
2821 pmap_map_block(pmap_map
->pmap
, vaddr
- ldelta
,
2823 (entry
->object
.vm_object
->shadow_offset
))
2825 (laddr
- entry
->vme_start
) - ldelta
,
2826 ldelta
+ hdelta
, prot
,
2827 memattr
, 0); /* Set up a block mapped area */
2833 pmap_enter(caller_pmap
, caller_pmap_addr
,
2834 object
->shadow_offset
, prot
, 0, TRUE
);
2836 pmap_enter(pmap
, vaddr
,
2837 object
->shadow_offset
, prot
, 0, TRUE
);
2846 * If the page is not wired down and isn't already
2847 * on a pageout queue, then put it where the
2848 * pageout daemon can find it.
2850 if(m
!= VM_PAGE_NULL
) {
2851 vm_page_lock_queues();
2853 if (change_wiring
) {
2859 #if VM_FAULT_STATIC_CONFIG
2861 if (!m
->active
&& !m
->inactive
)
2862 vm_page_activate(m
);
2863 m
->reference
= TRUE
;
2866 else if (software_reference_bits
) {
2867 if (!m
->active
&& !m
->inactive
)
2868 vm_page_activate(m
);
2869 m
->reference
= TRUE
;
2871 vm_page_activate(m
);
2874 vm_page_unlock_queues();
2878 * Unlock everything, and return
2881 vm_map_verify_done(map
, &version
);
2883 vm_map_unlock(pmap_map
);
2884 if(m
!= VM_PAGE_NULL
) {
2885 PAGE_WAKEUP_DONE(m
);
2886 UNLOCK_AND_DEALLOCATE
;
2888 vm_fault_cleanup(object
, top_page
);
2889 vm_object_deallocate(object
);
2893 #undef UNLOCK_AND_DEALLOCATE
2897 if(write_startup_file
)
2898 tws_send_startup_info(current_task());
2900 thread_funnel_set( curflock
, TRUE
);
2903 thread_interrupt_level(interruptible_state
);
2905 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, 0)) | DBG_FUNC_END
,
2907 type_of_fault
& 0xff,
2918 * Wire down a range of virtual addresses in a map.
2923 vm_map_entry_t entry
,
2925 vm_offset_t pmap_addr
)
2928 register vm_offset_t va
;
2929 register vm_offset_t end_addr
= entry
->vme_end
;
2930 register kern_return_t rc
;
2932 assert(entry
->in_transition
);
2934 if ((entry
->object
.vm_object
!= NULL
) &&
2935 !entry
->is_sub_map
&&
2936 entry
->object
.vm_object
->phys_contiguous
) {
2937 return KERN_SUCCESS
;
2941 * Inform the physical mapping system that the
2942 * range of addresses may not fault, so that
2943 * page tables and such can be locked down as well.
2946 pmap_pageable(pmap
, pmap_addr
,
2947 pmap_addr
+ (end_addr
- entry
->vme_start
), FALSE
);
2950 * We simulate a fault to get the page and enter it
2951 * in the physical map.
2954 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
2955 if ((rc
= vm_fault_wire_fast(
2956 map
, va
, entry
, pmap
,
2957 pmap_addr
+ (va
- entry
->vme_start
)
2958 )) != KERN_SUCCESS
) {
2959 rc
= vm_fault(map
, va
, VM_PROT_NONE
, TRUE
,
2960 (pmap
== kernel_pmap
) ?
2961 THREAD_UNINT
: THREAD_ABORTSAFE
,
2962 pmap
, pmap_addr
+ (va
- entry
->vme_start
));
2965 if (rc
!= KERN_SUCCESS
) {
2966 struct vm_map_entry tmp_entry
= *entry
;
2968 /* unwire wired pages */
2969 tmp_entry
.vme_end
= va
;
2970 vm_fault_unwire(map
,
2971 &tmp_entry
, FALSE
, pmap
, pmap_addr
);
2976 return KERN_SUCCESS
;
2982 * Unwire a range of virtual addresses in a map.
2987 vm_map_entry_t entry
,
2988 boolean_t deallocate
,
2990 vm_offset_t pmap_addr
)
2992 register vm_offset_t va
;
2993 register vm_offset_t end_addr
= entry
->vme_end
;
2996 object
= (entry
->is_sub_map
)
2997 ? VM_OBJECT_NULL
: entry
->object
.vm_object
;
3000 * Since the pages are wired down, we must be able to
3001 * get their mappings from the physical map system.
3004 for (va
= entry
->vme_start
; va
< end_addr
; va
+= PAGE_SIZE
) {
3005 pmap_change_wiring(pmap
,
3006 pmap_addr
+ (va
- entry
->vme_start
), FALSE
);
3008 if (object
== VM_OBJECT_NULL
) {
3009 (void) vm_fault(map
, va
, VM_PROT_NONE
,
3010 TRUE
, THREAD_UNINT
, pmap
, pmap_addr
);
3011 } else if (object
->phys_contiguous
) {
3015 vm_page_t result_page
;
3017 vm_object_t result_object
;
3018 vm_fault_return_t result
;
3021 prot
= VM_PROT_NONE
;
3023 vm_object_lock(object
);
3024 vm_object_paging_begin(object
);
3026 "vm_fault_unwire -> vm_fault_page\n",
3028 result
= vm_fault_page(object
,
3030 (va
- entry
->vme_start
),
3036 - entry
->vme_start
),
3042 0, map
->no_zero_fill
,
3044 } while (result
== VM_FAULT_RETRY
);
3046 if (result
!= VM_FAULT_SUCCESS
)
3047 panic("vm_fault_unwire: failure");
3049 result_object
= result_page
->object
;
3051 assert(!result_page
->fictitious
);
3052 pmap_page_protect(result_page
->phys_addr
,
3054 VM_PAGE_FREE(result_page
);
3056 vm_page_lock_queues();
3057 vm_page_unwire(result_page
);
3058 vm_page_unlock_queues();
3059 PAGE_WAKEUP_DONE(result_page
);
3062 vm_fault_cleanup(result_object
, top_page
);
3067 * Inform the physical mapping system that the range
3068 * of addresses may fault, so that page tables and
3069 * such may be unwired themselves.
3072 pmap_pageable(pmap
, pmap_addr
,
3073 pmap_addr
+ (end_addr
- entry
->vme_start
), TRUE
);
3078 * vm_fault_wire_fast:
3080 * Handle common case of a wire down page fault at the given address.
3081 * If successful, the page is inserted into the associated physical map.
3082 * The map entry is passed in to avoid the overhead of a map lookup.
3084 * NOTE: the given address should be truncated to the
3085 * proper page address.
3087 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3088 * a standard error specifying why the fault is fatal is returned.
3090 * The map in question must be referenced, and remains so.
3091 * Caller has a read lock on the map.
3093 * This is a stripped version of vm_fault() for wiring pages. Anything
3094 * other than the common case will return KERN_FAILURE, and the caller
3095 * is expected to call vm_fault().
3101 vm_map_entry_t entry
,
3103 vm_offset_t pmap_addr
)
3106 vm_object_offset_t offset
;
3107 register vm_page_t m
;
3109 thread_act_t thr_act
;
3110 unsigned int cache_attr
;
3114 if((thr_act
=current_act()) && (thr_act
->task
!= TASK_NULL
))
3115 thr_act
->task
->faults
++;
3122 #define RELEASE_PAGE(m) { \
3123 PAGE_WAKEUP_DONE(m); \
3124 vm_page_lock_queues(); \
3125 vm_page_unwire(m); \
3126 vm_page_unlock_queues(); \
3130 #undef UNLOCK_THINGS
3131 #define UNLOCK_THINGS { \
3132 object->paging_in_progress--; \
3133 vm_object_unlock(object); \
3136 #undef UNLOCK_AND_DEALLOCATE
3137 #define UNLOCK_AND_DEALLOCATE { \
3139 vm_object_deallocate(object); \
3142 * Give up and have caller do things the hard way.
3146 UNLOCK_AND_DEALLOCATE; \
3147 return(KERN_FAILURE); \
3152 * If this entry is not directly to a vm_object, bail out.
3154 if (entry
->is_sub_map
)
3155 return(KERN_FAILURE
);
3158 * Find the backing store object and offset into it.
3161 object
= entry
->object
.vm_object
;
3162 offset
= (va
- entry
->vme_start
) + entry
->offset
;
3163 prot
= entry
->protection
;
3166 * Make a reference to this object to prevent its
3167 * disposal while we are messing with it.
3170 vm_object_lock(object
);
3171 assert(object
->ref_count
> 0);
3172 object
->ref_count
++;
3173 vm_object_res_reference(object
);
3174 object
->paging_in_progress
++;
3177 * INVARIANTS (through entire routine):
3179 * 1) At all times, we must either have the object
3180 * lock or a busy page in some object to prevent
3181 * some other thread from trying to bring in
3184 * 2) Once we have a busy page, we must remove it from
3185 * the pageout queues, so that the pageout daemon
3186 * will not grab it away.
3191 * Look for page in top-level object. If it's not there or
3192 * there's something going on, give up.
3194 m
= vm_page_lookup(object
, offset
);
3195 if ((m
== VM_PAGE_NULL
) || (m
->busy
) ||
3196 (m
->unusual
&& ( m
->error
|| m
->restart
|| m
->absent
||
3197 prot
& m
->page_lock
))) {
3203 * Wire the page down now. All bail outs beyond this
3204 * point must unwire the page.
3207 vm_page_lock_queues();
3209 vm_page_unlock_queues();
3212 * Mark page busy for other threads.
3219 * Give up if the page is being written and there's a copy object
3221 if ((object
->copy
!= VM_OBJECT_NULL
) && (prot
& VM_PROT_WRITE
)) {
3227 * Put this page into the physical map.
3228 * We have to unlock the object because pmap_enter
3229 * may cause other faults.
3231 if (m
->no_isync
== TRUE
) {
3232 pmap_sync_caches_phys(m
->phys_addr
);
3234 m
->no_isync
= FALSE
;
3237 cache_attr
= ((unsigned int)m
->object
->wimg_bits
) & VM_WIMG_MASK
;
3239 PMAP_ENTER(pmap
, pmap_addr
, m
, prot
, cache_attr
, TRUE
);
3242 * Unlock everything, and return
3245 PAGE_WAKEUP_DONE(m
);
3246 UNLOCK_AND_DEALLOCATE
;
3248 return(KERN_SUCCESS
);
3253 * Routine: vm_fault_copy_cleanup
3255 * Release a page used by vm_fault_copy.
3259 vm_fault_copy_cleanup(
3263 vm_object_t object
= page
->object
;
3265 vm_object_lock(object
);
3266 PAGE_WAKEUP_DONE(page
);
3267 vm_page_lock_queues();
3268 if (!page
->active
&& !page
->inactive
)
3269 vm_page_activate(page
);
3270 vm_page_unlock_queues();
3271 vm_fault_cleanup(object
, top_page
);
3275 vm_fault_copy_dst_cleanup(
3280 if (page
!= VM_PAGE_NULL
) {
3281 object
= page
->object
;
3282 vm_object_lock(object
);
3283 vm_page_lock_queues();
3284 vm_page_unwire(page
);
3285 vm_page_unlock_queues();
3286 vm_object_paging_end(object
);
3287 vm_object_unlock(object
);
3292 * Routine: vm_fault_copy
3295 * Copy pages from one virtual memory object to another --
3296 * neither the source nor destination pages need be resident.
3298 * Before actually copying a page, the version associated with
3299 * the destination address map wil be verified.
3301 * In/out conditions:
3302 * The caller must hold a reference, but not a lock, to
3303 * each of the source and destination objects and to the
3307 * Returns KERN_SUCCESS if no errors were encountered in
3308 * reading or writing the data. Returns KERN_INTERRUPTED if
3309 * the operation was interrupted (only possible if the
3310 * "interruptible" argument is asserted). Other return values
3311 * indicate a permanent error in copying the data.
3313 * The actual amount of data copied will be returned in the
3314 * "copy_size" argument. In the event that the destination map
3315 * verification failed, this amount may be less than the amount
3320 vm_object_t src_object
,
3321 vm_object_offset_t src_offset
,
3322 vm_size_t
*src_size
, /* INOUT */
3323 vm_object_t dst_object
,
3324 vm_object_offset_t dst_offset
,
3326 vm_map_version_t
*dst_version
,
3329 vm_page_t result_page
;
3332 vm_page_t src_top_page
;
3336 vm_page_t dst_top_page
;
3339 vm_size_t amount_left
;
3340 vm_object_t old_copy_object
;
3341 kern_return_t error
= 0;
3343 vm_size_t part_size
;
3346 * In order not to confuse the clustered pageins, align
3347 * the different offsets on a page boundary.
3349 vm_object_offset_t src_lo_offset
= trunc_page_64(src_offset
);
3350 vm_object_offset_t dst_lo_offset
= trunc_page_64(dst_offset
);
3351 vm_object_offset_t src_hi_offset
= round_page_64(src_offset
+ *src_size
);
3352 vm_object_offset_t dst_hi_offset
= round_page_64(dst_offset
+ *src_size
);
3356 *src_size -= amount_left; \
3360 amount_left
= *src_size
;
3361 do { /* while (amount_left > 0) */
3363 * There may be a deadlock if both source and destination
3364 * pages are the same. To avoid this deadlock, the copy must
3365 * start by getting the destination page in order to apply
3366 * COW semantics if any.
3369 RetryDestinationFault
: ;
3371 dst_prot
= VM_PROT_WRITE
|VM_PROT_READ
;
3373 vm_object_lock(dst_object
);
3374 vm_object_paging_begin(dst_object
);
3376 XPR(XPR_VM_FAULT
,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3377 switch (vm_fault_page(dst_object
,
3378 trunc_page_64(dst_offset
),
3379 VM_PROT_WRITE
|VM_PROT_READ
,
3384 VM_BEHAVIOR_SEQUENTIAL
,
3390 dst_map
->no_zero_fill
,
3392 case VM_FAULT_SUCCESS
:
3394 case VM_FAULT_RETRY
:
3395 goto RetryDestinationFault
;
3396 case VM_FAULT_MEMORY_SHORTAGE
:
3397 if (vm_page_wait(interruptible
))
3398 goto RetryDestinationFault
;
3400 case VM_FAULT_INTERRUPTED
:
3401 RETURN(MACH_SEND_INTERRUPTED
);
3402 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3403 vm_page_more_fictitious();
3404 goto RetryDestinationFault
;
3405 case VM_FAULT_MEMORY_ERROR
:
3409 return(KERN_MEMORY_ERROR
);
3411 assert ((dst_prot
& VM_PROT_WRITE
) != VM_PROT_NONE
);
3413 old_copy_object
= dst_page
->object
->copy
;
3416 * There exists the possiblity that the source and
3417 * destination page are the same. But we can't
3418 * easily determine that now. If they are the
3419 * same, the call to vm_fault_page() for the
3420 * destination page will deadlock. To prevent this we
3421 * wire the page so we can drop busy without having
3422 * the page daemon steal the page. We clean up the
3423 * top page but keep the paging reference on the object
3424 * holding the dest page so it doesn't go away.
3427 vm_page_lock_queues();
3428 vm_page_wire(dst_page
);
3429 vm_page_unlock_queues();
3430 PAGE_WAKEUP_DONE(dst_page
);
3431 vm_object_unlock(dst_page
->object
);
3433 if (dst_top_page
!= VM_PAGE_NULL
) {
3434 vm_object_lock(dst_object
);
3435 VM_PAGE_FREE(dst_top_page
);
3436 vm_object_paging_end(dst_object
);
3437 vm_object_unlock(dst_object
);
3442 if (src_object
== VM_OBJECT_NULL
) {
3444 * No source object. We will just
3445 * zero-fill the page in dst_object.
3447 src_page
= VM_PAGE_NULL
;
3448 result_page
= VM_PAGE_NULL
;
3450 vm_object_lock(src_object
);
3451 src_page
= vm_page_lookup(src_object
,
3452 trunc_page_64(src_offset
));
3453 if (src_page
== dst_page
) {
3454 src_prot
= dst_prot
;
3455 result_page
= VM_PAGE_NULL
;
3457 src_prot
= VM_PROT_READ
;
3458 vm_object_paging_begin(src_object
);
3461 "vm_fault_copy(2) -> vm_fault_page\n",
3463 switch (vm_fault_page(src_object
,
3464 trunc_page_64(src_offset
),
3470 VM_BEHAVIOR_SEQUENTIAL
,
3479 case VM_FAULT_SUCCESS
:
3481 case VM_FAULT_RETRY
:
3482 goto RetrySourceFault
;
3483 case VM_FAULT_MEMORY_SHORTAGE
:
3484 if (vm_page_wait(interruptible
))
3485 goto RetrySourceFault
;
3487 case VM_FAULT_INTERRUPTED
:
3488 vm_fault_copy_dst_cleanup(dst_page
);
3489 RETURN(MACH_SEND_INTERRUPTED
);
3490 case VM_FAULT_FICTITIOUS_SHORTAGE
:
3491 vm_page_more_fictitious();
3492 goto RetrySourceFault
;
3493 case VM_FAULT_MEMORY_ERROR
:
3494 vm_fault_copy_dst_cleanup(dst_page
);
3498 return(KERN_MEMORY_ERROR
);
3502 assert((src_top_page
== VM_PAGE_NULL
) ==
3503 (result_page
->object
== src_object
));
3505 assert ((src_prot
& VM_PROT_READ
) != VM_PROT_NONE
);
3506 vm_object_unlock(result_page
->object
);
3509 if (!vm_map_verify(dst_map
, dst_version
)) {
3510 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3511 vm_fault_copy_cleanup(result_page
, src_top_page
);
3512 vm_fault_copy_dst_cleanup(dst_page
);
3516 vm_object_lock(dst_page
->object
);
3518 if (dst_page
->object
->copy
!= old_copy_object
) {
3519 vm_object_unlock(dst_page
->object
);
3520 vm_map_verify_done(dst_map
, dst_version
);
3521 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3522 vm_fault_copy_cleanup(result_page
, src_top_page
);
3523 vm_fault_copy_dst_cleanup(dst_page
);
3526 vm_object_unlock(dst_page
->object
);
3529 * Copy the page, and note that it is dirty
3533 if (!page_aligned(src_offset
) ||
3534 !page_aligned(dst_offset
) ||
3535 !page_aligned(amount_left
)) {
3537 vm_object_offset_t src_po
,
3540 src_po
= src_offset
- trunc_page_64(src_offset
);
3541 dst_po
= dst_offset
- trunc_page_64(dst_offset
);
3543 if (dst_po
> src_po
) {
3544 part_size
= PAGE_SIZE
- dst_po
;
3546 part_size
= PAGE_SIZE
- src_po
;
3548 if (part_size
> (amount_left
)){
3549 part_size
= amount_left
;
3552 if (result_page
== VM_PAGE_NULL
) {
3553 vm_page_part_zero_fill(dst_page
,
3556 vm_page_part_copy(result_page
, src_po
,
3557 dst_page
, dst_po
, part_size
);
3558 if(!dst_page
->dirty
){
3559 vm_object_lock(dst_object
);
3560 dst_page
->dirty
= TRUE
;
3561 vm_object_unlock(dst_page
->object
);
3566 part_size
= PAGE_SIZE
;
3568 if (result_page
== VM_PAGE_NULL
)
3569 vm_page_zero_fill(dst_page
);
3571 vm_page_copy(result_page
, dst_page
);
3572 if(!dst_page
->dirty
){
3573 vm_object_lock(dst_object
);
3574 dst_page
->dirty
= TRUE
;
3575 vm_object_unlock(dst_page
->object
);
3582 * Unlock everything, and return
3585 vm_map_verify_done(dst_map
, dst_version
);
3587 if (result_page
!= VM_PAGE_NULL
&& src_page
!= dst_page
)
3588 vm_fault_copy_cleanup(result_page
, src_top_page
);
3589 vm_fault_copy_dst_cleanup(dst_page
);
3591 amount_left
-= part_size
;
3592 src_offset
+= part_size
;
3593 dst_offset
+= part_size
;
3594 } while (amount_left
> 0);
3596 RETURN(KERN_SUCCESS
);
3605 * Routine: vm_fault_page_overwrite
3608 * A form of vm_fault_page that assumes that the
3609 * resulting page will be overwritten in its entirety,
3610 * making it unnecessary to obtain the correct *contents*
3614 * XXX Untested. Also unused. Eventually, this technology
3615 * could be used in vm_fault_copy() to advantage.
3618 vm_fault_page_overwrite(
3620 vm_object_t dst_object
,
3621 vm_object_offset_t dst_offset
,
3622 vm_page_t
*result_page
) /* OUT */
3626 kern_return_t wait_result
;
3628 #define interruptible THREAD_UNINT /* XXX */
3632 * Look for a page at this offset
3635 while ((dst_page
= vm_page_lookup(dst_object
, dst_offset
))
3638 * No page, no problem... just allocate one.
3641 dst_page
= vm_page_alloc(dst_object
, dst_offset
);
3642 if (dst_page
== VM_PAGE_NULL
) {
3643 vm_object_unlock(dst_object
);
3645 vm_object_lock(dst_object
);
3650 * Pretend that the memory manager
3651 * write-protected the page.
3653 * Note that we will be asking for write
3654 * permission without asking for the data
3658 dst_page
->overwriting
= TRUE
;
3659 dst_page
->page_lock
= VM_PROT_WRITE
;
3660 dst_page
->absent
= TRUE
;
3661 dst_page
->unusual
= TRUE
;
3662 dst_object
->absent_count
++;
3667 * When we bail out, we might have to throw
3668 * away the page created here.
3671 #define DISCARD_PAGE \
3673 vm_object_lock(dst_object); \
3674 dst_page = vm_page_lookup(dst_object, dst_offset); \
3675 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3676 VM_PAGE_FREE(dst_page); \
3677 vm_object_unlock(dst_object); \
3682 * If the page is write-protected...
3685 if (dst_page
->page_lock
& VM_PROT_WRITE
) {
3687 * ... and an unlock request hasn't been sent
3690 if ( ! (dst_page
->unlock_request
& VM_PROT_WRITE
)) {
3695 * ... then send one now.
3698 if (!dst_object
->pager_ready
) {
3699 wait_result
= vm_object_assert_wait(dst_object
,
3700 VM_OBJECT_EVENT_PAGER_READY
,
3702 vm_object_unlock(dst_object
);
3703 if (wait_result
== THREAD_WAITING
)
3704 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
3705 if (wait_result
!= THREAD_AWAKENED
) {
3707 return(VM_FAULT_INTERRUPTED
);
3712 u
= dst_page
->unlock_request
|= VM_PROT_WRITE
;
3713 vm_object_unlock(dst_object
);
3715 if ((rc
= memory_object_data_unlock(
3717 dst_offset
+ dst_object
->paging_offset
,
3719 u
)) != KERN_SUCCESS
) {
3721 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3723 return((rc
== MACH_SEND_INTERRUPTED
) ?
3724 VM_FAULT_INTERRUPTED
:
3725 VM_FAULT_MEMORY_ERROR
);
3727 vm_object_lock(dst_object
);
3731 /* ... fall through to wait below */
3734 * If the page isn't being used for other
3735 * purposes, then we're done.
3737 if ( ! (dst_page
->busy
|| dst_page
->absent
||
3738 dst_page
->error
|| dst_page
->restart
) )
3742 wait_result
= PAGE_ASSERT_WAIT(dst_page
, interruptible
);
3743 vm_object_unlock(dst_object
);
3744 if (wait_result
== THREAD_WAITING
)
3745 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
3746 if (wait_result
!= THREAD_AWAKENED
) {
3748 return(VM_FAULT_INTERRUPTED
);
3752 *result_page
= dst_page
;
3753 return(VM_FAULT_SUCCESS
);
3755 #undef interruptible
3761 #if VM_FAULT_CLASSIFY
3763 * Temporary statistics gathering support.
3767 * Statistics arrays:
3769 #define VM_FAULT_TYPES_MAX 5
3770 #define VM_FAULT_LEVEL_MAX 8
3772 int vm_fault_stats
[VM_FAULT_TYPES_MAX
][VM_FAULT_LEVEL_MAX
];
3774 #define VM_FAULT_TYPE_ZERO_FILL 0
3775 #define VM_FAULT_TYPE_MAP_IN 1
3776 #define VM_FAULT_TYPE_PAGER 2
3777 #define VM_FAULT_TYPE_COPY 3
3778 #define VM_FAULT_TYPE_OTHER 4
3782 vm_fault_classify(vm_object_t object
,
3783 vm_object_offset_t offset
,
3784 vm_prot_t fault_type
)
3786 int type
, level
= 0;
3790 m
= vm_page_lookup(object
, offset
);
3791 if (m
!= VM_PAGE_NULL
) {
3792 if (m
->busy
|| m
->error
|| m
->restart
|| m
->absent
||
3793 fault_type
& m
->page_lock
) {
3794 type
= VM_FAULT_TYPE_OTHER
;
3797 if (((fault_type
& VM_PROT_WRITE
) == 0) ||
3798 ((level
== 0) && object
->copy
== VM_OBJECT_NULL
)) {
3799 type
= VM_FAULT_TYPE_MAP_IN
;
3802 type
= VM_FAULT_TYPE_COPY
;
3806 if (object
->pager_created
) {
3807 type
= VM_FAULT_TYPE_PAGER
;
3810 if (object
->shadow
== VM_OBJECT_NULL
) {
3811 type
= VM_FAULT_TYPE_ZERO_FILL
;
3815 offset
+= object
->shadow_offset
;
3816 object
= object
->shadow
;
3822 if (level
> VM_FAULT_LEVEL_MAX
)
3823 level
= VM_FAULT_LEVEL_MAX
;
3825 vm_fault_stats
[type
][level
] += 1;
3830 /* cleanup routine to call from debugger */
3833 vm_fault_classify_init(void)
3837 for (type
= 0; type
< VM_FAULT_TYPES_MAX
; type
++) {
3838 for (level
= 0; level
< VM_FAULT_LEVEL_MAX
; level
++) {
3839 vm_fault_stats
[type
][level
] = 0;
3845 #endif /* VM_FAULT_CLASSIFY */