]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_fault.c
xnu-792.13.8.tar.gz
[apple/xnu.git] / osfmk / vm / vm_fault.c
CommitLineData
1c79356b 1/*
91447636 2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
1c79356b 3 *
8ad349bb 4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
1c79356b 5 *
8ad349bb
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
1c79356b
A
29 */
30/*
31 * @OSF_COPYRIGHT@
32 */
33/*
34 * Mach Operating System
35 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
36 * All Rights Reserved.
37 *
38 * Permission to use, copy, modify and distribute this software and its
39 * documentation is hereby granted, provided that both the copyright
40 * notice and this permission notice appear in all copies of the
41 * software, derivative works or modified versions, and any portions
42 * thereof, and that both notices appear in supporting documentation.
43 *
44 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
45 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
46 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
47 *
48 * Carnegie Mellon requests users of this software to return to
49 *
50 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
51 * School of Computer Science
52 * Carnegie Mellon University
53 * Pittsburgh PA 15213-3890
54 *
55 * any improvements or extensions that they make and grant Carnegie Mellon
56 * the rights to redistribute these changes.
57 */
58/*
59 */
60/*
61 * File: vm_fault.c
62 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 *
64 * Page fault handling module.
65 */
1c79356b
A
66
67#include <mach_cluster_stats.h>
68#include <mach_pagemap.h>
69#include <mach_kdb.h>
70
91447636 71#include <mach/mach_types.h>
1c79356b
A
72#include <mach/kern_return.h>
73#include <mach/message.h> /* for error codes */
91447636
A
74#include <mach/vm_param.h>
75#include <mach/vm_behavior.h>
76#include <mach/memory_object.h>
77 /* For memory_object_data_{request,unlock} */
78
79#include <kern/kern_types.h>
1c79356b
A
80#include <kern/host_statistics.h>
81#include <kern/counters.h>
82#include <kern/task.h>
83#include <kern/thread.h>
84#include <kern/sched_prim.h>
85#include <kern/host.h>
86#include <kern/xpr.h>
91447636
A
87#include <kern/mach_param.h>
88#include <kern/macro_help.h>
89#include <kern/zalloc.h>
90#include <kern/misc_protos.h>
91
0b4e3aa0 92#include <ppc/proc_reg.h>
91447636
A
93
94#include <vm/vm_fault.h>
0b4e3aa0 95#include <vm/task_working_set.h>
1c79356b
A
96#include <vm/vm_map.h>
97#include <vm/vm_object.h>
98#include <vm/vm_page.h>
55e303ae 99#include <vm/vm_kern.h>
1c79356b
A
100#include <vm/pmap.h>
101#include <vm/vm_pageout.h>
91447636 102#include <vm/vm_protos.h>
1c79356b
A
103
104#include <sys/kdebug.h>
105
106#define VM_FAULT_CLASSIFY 0
107#define VM_FAULT_STATIC_CONFIG 1
108
109#define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
110
91447636 111unsigned int vm_object_absent_max = 50;
1c79356b
A
112
113int vm_fault_debug = 0;
1c79356b
A
114
115#if !VM_FAULT_STATIC_CONFIG
116boolean_t vm_fault_dirty_handling = FALSE;
117boolean_t vm_fault_interruptible = FALSE;
118boolean_t software_reference_bits = TRUE;
119#endif
120
121#if MACH_KDB
122extern struct db_watchpoint *db_watchpoint_list;
123#endif /* MACH_KDB */
124
91447636 125
1c79356b
A
126/* Forward declarations of internal routines. */
127extern kern_return_t vm_fault_wire_fast(
128 vm_map_t map,
91447636 129 vm_map_offset_t va,
1c79356b 130 vm_map_entry_t entry,
9bccf70c 131 pmap_t pmap,
91447636 132 vm_map_offset_t pmap_addr);
1c79356b
A
133
134extern void vm_fault_continue(void);
135
136extern void vm_fault_copy_cleanup(
137 vm_page_t page,
138 vm_page_t top_page);
139
140extern void vm_fault_copy_dst_cleanup(
141 vm_page_t page);
142
143#if VM_FAULT_CLASSIFY
144extern void vm_fault_classify(vm_object_t object,
145 vm_object_offset_t offset,
146 vm_prot_t fault_type);
147
148extern void vm_fault_classify_init(void);
149#endif
150
151/*
152 * Routine: vm_fault_init
153 * Purpose:
154 * Initialize our private data structures.
155 */
156void
157vm_fault_init(void)
158{
159}
160
161/*
162 * Routine: vm_fault_cleanup
163 * Purpose:
164 * Clean up the result of vm_fault_page.
165 * Results:
166 * The paging reference for "object" is released.
167 * "object" is unlocked.
168 * If "top_page" is not null, "top_page" is
169 * freed and the paging reference for the object
170 * containing it is released.
171 *
172 * In/out conditions:
173 * "object" must be locked.
174 */
175void
176vm_fault_cleanup(
177 register vm_object_t object,
178 register vm_page_t top_page)
179{
180 vm_object_paging_end(object);
181 vm_object_unlock(object);
182
183 if (top_page != VM_PAGE_NULL) {
184 object = top_page->object;
185 vm_object_lock(object);
186 VM_PAGE_FREE(top_page);
187 vm_object_paging_end(object);
188 vm_object_unlock(object);
189 }
190}
191
192#if MACH_CLUSTER_STATS
193#define MAXCLUSTERPAGES 16
194struct {
195 unsigned long pages_in_cluster;
196 unsigned long pages_at_higher_offsets;
197 unsigned long pages_at_lower_offsets;
198} cluster_stats_in[MAXCLUSTERPAGES];
199#define CLUSTER_STAT(clause) clause
200#define CLUSTER_STAT_HIGHER(x) \
201 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
202#define CLUSTER_STAT_LOWER(x) \
203 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
204#define CLUSTER_STAT_CLUSTER(x) \
205 ((cluster_stats_in[(x)].pages_in_cluster)++)
206#else /* MACH_CLUSTER_STATS */
207#define CLUSTER_STAT(clause)
208#endif /* MACH_CLUSTER_STATS */
209
210/* XXX - temporary */
211boolean_t vm_allow_clustered_pagein = FALSE;
212int vm_pagein_cluster_used = 0;
213
55e303ae
A
214#define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
215
216
217boolean_t vm_page_deactivate_behind = TRUE;
1c79356b
A
218/*
219 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
220 */
55e303ae
A
221int vm_default_ahead = 0;
222int vm_default_behind = MAX_UPL_TRANSFER;
223
224/*
225 * vm_page_deactivate_behind
226 *
227 * Determine if sequential access is in progress
228 * in accordance with the behavior specified. If
229 * so, compute a potential page to deactive and
230 * deactivate it.
231 *
232 * The object must be locked.
233 */
234static
235boolean_t
236vm_fault_deactivate_behind(
91447636
A
237 vm_object_t object,
238 vm_object_offset_t offset,
239 vm_behavior_t behavior)
55e303ae
A
240{
241 vm_page_t m;
242
243#if TRACEFAULTPAGE
244 dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind); /* (TEST/DEBUG) */
245#endif
246
91447636
A
247 if (object == kernel_object) {
248 /*
249 * Do not deactivate pages from the kernel object: they
250 * are not intended to become pageable.
251 */
252 return FALSE;
253 }
254
55e303ae
A
255 switch (behavior) {
256 case VM_BEHAVIOR_RANDOM:
257 object->sequential = PAGE_SIZE_64;
258 m = VM_PAGE_NULL;
259 break;
260 case VM_BEHAVIOR_SEQUENTIAL:
261 if (offset &&
262 object->last_alloc == offset - PAGE_SIZE_64) {
263 object->sequential += PAGE_SIZE_64;
264 m = vm_page_lookup(object, offset - PAGE_SIZE_64);
265 } else {
266 object->sequential = PAGE_SIZE_64; /* reset */
267 m = VM_PAGE_NULL;
268 }
269 break;
270 case VM_BEHAVIOR_RSEQNTL:
271 if (object->last_alloc &&
272 object->last_alloc == offset + PAGE_SIZE_64) {
273 object->sequential += PAGE_SIZE_64;
274 m = vm_page_lookup(object, offset + PAGE_SIZE_64);
275 } else {
276 object->sequential = PAGE_SIZE_64; /* reset */
277 m = VM_PAGE_NULL;
278 }
279 break;
280 case VM_BEHAVIOR_DEFAULT:
281 default:
282 if (offset &&
283 object->last_alloc == offset - PAGE_SIZE_64) {
284 vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
285
286 object->sequential += PAGE_SIZE_64;
287 m = (offset >= behind &&
288 object->sequential >= behind) ?
289 vm_page_lookup(object, offset - behind) :
290 VM_PAGE_NULL;
291 } else if (object->last_alloc &&
292 object->last_alloc == offset + PAGE_SIZE_64) {
293 vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
294
295 object->sequential += PAGE_SIZE_64;
296 m = (offset < -behind &&
297 object->sequential >= behind) ?
298 vm_page_lookup(object, offset + behind) :
299 VM_PAGE_NULL;
300 } else {
301 object->sequential = PAGE_SIZE_64;
302 m = VM_PAGE_NULL;
303 }
304 break;
305 }
306
307 object->last_alloc = offset;
308
309 if (m) {
310 if (!m->busy) {
311 vm_page_lock_queues();
312 vm_page_deactivate(m);
313 vm_page_unlock_queues();
314#if TRACEFAULTPAGE
315 dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
316#endif
317 }
318 return TRUE;
319 }
320 return FALSE;
321}
1c79356b 322
1c79356b
A
323
324/*
325 * Routine: vm_fault_page
326 * Purpose:
327 * Find the resident page for the virtual memory
328 * specified by the given virtual memory object
329 * and offset.
330 * Additional arguments:
331 * The required permissions for the page is given
332 * in "fault_type". Desired permissions are included
333 * in "protection". The minimum and maximum valid offsets
334 * within the object for the relevant map entry are
335 * passed in "lo_offset" and "hi_offset" respectively and
336 * the expected page reference pattern is passed in "behavior".
337 * These three parameters are used to determine pagein cluster
338 * limits.
339 *
340 * If the desired page is known to be resident (for
341 * example, because it was previously wired down), asserting
342 * the "unwiring" parameter will speed the search.
343 *
344 * If the operation can be interrupted (by thread_abort
345 * or thread_terminate), then the "interruptible"
346 * parameter should be asserted.
347 *
348 * Results:
349 * The page containing the proper data is returned
350 * in "result_page".
351 *
352 * In/out conditions:
353 * The source object must be locked and referenced,
354 * and must donate one paging reference. The reference
355 * is not affected. The paging reference and lock are
356 * consumed.
357 *
358 * If the call succeeds, the object in which "result_page"
359 * resides is left locked and holding a paging reference.
360 * If this is not the original object, a busy page in the
361 * original object is returned in "top_page", to prevent other
362 * callers from pursuing this same data, along with a paging
363 * reference for the original object. The "top_page" should
364 * be destroyed when this guarantee is no longer required.
365 * The "result_page" is also left busy. It is not removed
366 * from the pageout queues.
367 */
368
369vm_fault_return_t
370vm_fault_page(
371 /* Arguments: */
372 vm_object_t first_object, /* Object to begin search */
373 vm_object_offset_t first_offset, /* Offset into object */
374 vm_prot_t fault_type, /* What access is requested */
375 boolean_t must_be_resident,/* Must page be resident? */
376 int interruptible, /* how may fault be interrupted? */
91447636
A
377 vm_map_offset_t lo_offset, /* Map entry start */
378 vm_map_offset_t hi_offset, /* Map entry end */
1c79356b
A
379 vm_behavior_t behavior, /* Page reference behavior */
380 /* Modifies in place: */
381 vm_prot_t *protection, /* Protection for mapping */
382 /* Returns: */
383 vm_page_t *result_page, /* Page found, if successful */
384 vm_page_t *top_page, /* Page in top object, if
385 * not result_page. */
386 int *type_of_fault, /* if non-null, fill in with type of fault
387 * COW, zero-fill, etc... returned in trace point */
388 /* More arguments: */
389 kern_return_t *error_code, /* code if page is in error */
390 boolean_t no_zero_fill, /* don't zero fill absent pages */
0b4e3aa0 391 boolean_t data_supply, /* treat as data_supply if
1c79356b
A
392 * it is a write fault and a full
393 * page is provided */
0b4e3aa0 394 vm_map_t map,
91447636 395 __unused vm_map_offset_t vaddr)
1c79356b
A
396{
397 register
398 vm_page_t m;
399 register
400 vm_object_t object;
401 register
402 vm_object_offset_t offset;
403 vm_page_t first_m;
404 vm_object_t next_object;
405 vm_object_t copy_object;
406 boolean_t look_for_page;
407 vm_prot_t access_required = fault_type;
408 vm_prot_t wants_copy_flag;
91447636
A
409 vm_object_size_t length;
410 vm_object_offset_t cluster_start, cluster_end;
1c79356b
A
411 CLUSTER_STAT(int pages_at_higher_offsets;)
412 CLUSTER_STAT(int pages_at_lower_offsets;)
413 kern_return_t wait_result;
1c79356b 414 boolean_t interruptible_state;
0b4e3aa0 415 boolean_t bumped_pagein = FALSE;
1c79356b 416
1c79356b
A
417
418#if MACH_PAGEMAP
419/*
420 * MACH page map - an optional optimization where a bit map is maintained
421 * by the VM subsystem for internal objects to indicate which pages of
422 * the object currently reside on backing store. This existence map
423 * duplicates information maintained by the vnode pager. It is
424 * created at the time of the first pageout against the object, i.e.
425 * at the same time pager for the object is created. The optimization
426 * is designed to eliminate pager interaction overhead, if it is
427 * 'known' that the page does not exist on backing store.
428 *
429 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
430 * either marked as paged out in the existence map for the object or no
431 * existence map exists for the object. LOOK_FOR() is one of the
432 * criteria in the decision to invoke the pager. It is also used as one
433 * of the criteria to terminate the scan for adjacent pages in a clustered
434 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
435 * permanent objects. Note also that if the pager for an internal object
436 * has not been created, the pager is not invoked regardless of the value
437 * of LOOK_FOR() and that clustered pagein scans are only done on an object
438 * for which a pager has been created.
439 *
440 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
441 * is marked as paged out in the existence map for the object. PAGED_OUT()
442 * PAGED_OUT() is used to determine if a page has already been pushed
443 * into a copy object in order to avoid a redundant page out operation.
444 */
445#define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
446 != VM_EXTERNAL_STATE_ABSENT)
447#define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
448 == VM_EXTERNAL_STATE_EXISTS)
449#else /* MACH_PAGEMAP */
450/*
451 * If the MACH page map optimization is not enabled,
452 * LOOK_FOR() always evaluates to TRUE. The pager will always be
453 * invoked to resolve missing pages in an object, assuming the pager
454 * has been created for the object. In a clustered page operation, the
455 * absence of a page on backing backing store cannot be used to terminate
456 * a scan for adjacent pages since that information is available only in
457 * the pager. Hence pages that may not be paged out are potentially
458 * included in a clustered request. The vnode pager is coded to deal
459 * with any combination of absent/present pages in a clustered
460 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
461 * will always be invoked to push a dirty page into a copy object assuming
462 * a pager has been created. If the page has already been pushed, the
463 * pager will ingore the new request.
464 */
465#define LOOK_FOR(o, f) TRUE
466#define PAGED_OUT(o, f) FALSE
467#endif /* MACH_PAGEMAP */
468
469/*
470 * Recovery actions
471 */
472#define PREPARE_RELEASE_PAGE(m) \
473 MACRO_BEGIN \
474 vm_page_lock_queues(); \
475 MACRO_END
476
477#define DO_RELEASE_PAGE(m) \
478 MACRO_BEGIN \
479 PAGE_WAKEUP_DONE(m); \
480 if (!m->active && !m->inactive) \
481 vm_page_activate(m); \
482 vm_page_unlock_queues(); \
483 MACRO_END
484
485#define RELEASE_PAGE(m) \
486 MACRO_BEGIN \
487 PREPARE_RELEASE_PAGE(m); \
488 DO_RELEASE_PAGE(m); \
489 MACRO_END
490
491#if TRACEFAULTPAGE
492 dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
493#endif
494
495
496
497#if !VM_FAULT_STATIC_CONFIG
498 if (vm_fault_dirty_handling
499#if MACH_KDB
500 /*
501 * If there are watchpoints set, then
502 * we don't want to give away write permission
503 * on a read fault. Make the task write fault,
504 * so that the watchpoint code notices the access.
505 */
506 || db_watchpoint_list
507#endif /* MACH_KDB */
508 ) {
509 /*
510 * If we aren't asking for write permission,
511 * then don't give it away. We're using write
512 * faults to set the dirty bit.
513 */
514 if (!(fault_type & VM_PROT_WRITE))
515 *protection &= ~VM_PROT_WRITE;
516 }
517
518 if (!vm_fault_interruptible)
519 interruptible = THREAD_UNINT;
520#else /* STATIC_CONFIG */
521#if MACH_KDB
522 /*
523 * If there are watchpoints set, then
524 * we don't want to give away write permission
525 * on a read fault. Make the task write fault,
526 * so that the watchpoint code notices the access.
527 */
528 if (db_watchpoint_list) {
529 /*
530 * If we aren't asking for write permission,
531 * then don't give it away. We're using write
532 * faults to set the dirty bit.
533 */
534 if (!(fault_type & VM_PROT_WRITE))
535 *protection &= ~VM_PROT_WRITE;
536 }
537
538#endif /* MACH_KDB */
539#endif /* STATIC_CONFIG */
540
9bccf70c 541 interruptible_state = thread_interrupt_level(interruptible);
1c79356b
A
542
543 /*
544 * INVARIANTS (through entire routine):
545 *
546 * 1) At all times, we must either have the object
547 * lock or a busy page in some object to prevent
548 * some other thread from trying to bring in
549 * the same page.
550 *
551 * Note that we cannot hold any locks during the
552 * pager access or when waiting for memory, so
553 * we use a busy page then.
554 *
555 * Note also that we aren't as concerned about more than
556 * one thread attempting to memory_object_data_unlock
557 * the same page at once, so we don't hold the page
558 * as busy then, but do record the highest unlock
559 * value so far. [Unlock requests may also be delivered
560 * out of order.]
561 *
562 * 2) To prevent another thread from racing us down the
563 * shadow chain and entering a new page in the top
564 * object before we do, we must keep a busy page in
565 * the top object while following the shadow chain.
566 *
567 * 3) We must increment paging_in_progress on any object
568 * for which we have a busy page
569 *
570 * 4) We leave busy pages on the pageout queues.
571 * If the pageout daemon comes across a busy page,
572 * it will remove the page from the pageout queues.
573 */
574
575 /*
576 * Search for the page at object/offset.
577 */
578
579 object = first_object;
580 offset = first_offset;
581 first_m = VM_PAGE_NULL;
582 access_required = fault_type;
583
584 XPR(XPR_VM_FAULT,
585 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
586 (integer_t)object, offset, fault_type, *protection, 0);
587
588 /*
589 * See whether this page is resident
590 */
591
592 while (TRUE) {
593#if TRACEFAULTPAGE
594 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
595#endif
596 if (!object->alive) {
597 vm_fault_cleanup(object, first_m);
9bccf70c 598 thread_interrupt_level(interruptible_state);
1c79356b
A
599 return(VM_FAULT_MEMORY_ERROR);
600 }
601 m = vm_page_lookup(object, offset);
602#if TRACEFAULTPAGE
603 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
604#endif
605 if (m != VM_PAGE_NULL) {
606 /*
607 * If the page was pre-paged as part of a
608 * cluster, record the fact.
91447636
A
609 * If we were passed a valid pointer for
610 * "type_of_fault", than we came from
611 * vm_fault... we'll let it deal with
612 * this condition, since it
613 * needs to see m->clustered to correctly
614 * account the pageins.
1c79356b 615 */
91447636 616 if (type_of_fault == NULL && m->clustered) {
1c79356b
A
617 vm_pagein_cluster_used++;
618 m->clustered = FALSE;
619 }
620
621 /*
622 * If the page is being brought in,
623 * wait for it and then retry.
624 *
625 * A possible optimization: if the page
626 * is known to be resident, we can ignore
627 * pages that are absent (regardless of
628 * whether they're busy).
629 */
630
631 if (m->busy) {
632#if TRACEFAULTPAGE
633 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
634#endif
9bccf70c 635 wait_result = PAGE_SLEEP(object, m, interruptible);
1c79356b
A
636 XPR(XPR_VM_FAULT,
637 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
638 (integer_t)object, offset,
639 (integer_t)m, 0, 0);
640 counter(c_vm_fault_page_block_busy_kernel++);
1c79356b 641
1c79356b
A
642 if (wait_result != THREAD_AWAKENED) {
643 vm_fault_cleanup(object, first_m);
9bccf70c 644 thread_interrupt_level(interruptible_state);
1c79356b
A
645 if (wait_result == THREAD_RESTART)
646 {
647 return(VM_FAULT_RETRY);
648 }
649 else
650 {
651 return(VM_FAULT_INTERRUPTED);
652 }
653 }
654 continue;
655 }
656
91447636
A
657 if (m->encrypted) {
658 /*
659 * ENCRYPTED SWAP:
660 * the user needs access to a page that we
661 * encrypted before paging it out.
662 * Decrypt the page now.
663 * Keep it busy to prevent anyone from
664 * accessing it during the decryption.
665 */
666 m->busy = TRUE;
667 vm_page_decrypt(m, 0);
668 assert(object == m->object);
669 assert(m->busy);
670 PAGE_WAKEUP_DONE(m);
671
672 /*
673 * Retry from the top, in case
674 * something changed while we were
675 * decrypting.
676 */
677 continue;
678 }
679 ASSERT_PAGE_DECRYPTED(m);
680
1c79356b
A
681 /*
682 * If the page is in error, give up now.
683 */
684
685 if (m->error) {
686#if TRACEFAULTPAGE
687 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code); /* (TEST/DEBUG) */
688#endif
689 if (error_code)
690 *error_code = m->page_error;
691 VM_PAGE_FREE(m);
692 vm_fault_cleanup(object, first_m);
9bccf70c 693 thread_interrupt_level(interruptible_state);
1c79356b
A
694 return(VM_FAULT_MEMORY_ERROR);
695 }
696
697 /*
698 * If the pager wants us to restart
699 * at the top of the chain,
700 * typically because it has moved the
701 * page to another pager, then do so.
702 */
703
704 if (m->restart) {
705#if TRACEFAULTPAGE
706 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
707#endif
708 VM_PAGE_FREE(m);
709 vm_fault_cleanup(object, first_m);
9bccf70c 710 thread_interrupt_level(interruptible_state);
1c79356b
A
711 return(VM_FAULT_RETRY);
712 }
713
714 /*
715 * If the page isn't busy, but is absent,
716 * then it was deemed "unavailable".
717 */
718
719 if (m->absent) {
720 /*
721 * Remove the non-existent page (unless it's
722 * in the top object) and move on down to the
723 * next object (if there is one).
724 */
725#if TRACEFAULTPAGE
726 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow); /* (TEST/DEBUG) */
727#endif
728
729 next_object = object->shadow;
730 if (next_object == VM_OBJECT_NULL) {
731 vm_page_t real_m;
732
733 assert(!must_be_resident);
734
735 if (object->shadow_severed) {
736 vm_fault_cleanup(
737 object, first_m);
9bccf70c 738 thread_interrupt_level(interruptible_state);
1c79356b
A
739 return VM_FAULT_MEMORY_ERROR;
740 }
741
742 /*
743 * Absent page at bottom of shadow
744 * chain; zero fill the page we left
745 * busy in the first object, and flush
746 * the absent page. But first we
747 * need to allocate a real page.
748 */
749 if (VM_PAGE_THROTTLED() ||
55e303ae
A
750 (real_m = vm_page_grab())
751 == VM_PAGE_NULL) {
752 vm_fault_cleanup(
753 object, first_m);
754 thread_interrupt_level(
755 interruptible_state);
756 return(
757 VM_FAULT_MEMORY_SHORTAGE);
758 }
759
760 /*
761 * are we protecting the system from
762 * backing store exhaustion. If so
763 * sleep unless we are privileged.
764 */
765
766 if(vm_backing_store_low) {
767 if(!(current_task()->priv_flags
768 & VM_BACKING_STORE_PRIV)) {
769 assert_wait((event_t)
770 &vm_backing_store_low,
771 THREAD_UNINT);
772 vm_fault_cleanup(object,
773 first_m);
91447636 774 thread_block(THREAD_CONTINUE_NULL);
55e303ae
A
775 thread_interrupt_level(
776 interruptible_state);
777 return(VM_FAULT_RETRY);
778 }
1c79356b
A
779 }
780
55e303ae 781
1c79356b
A
782 XPR(XPR_VM_FAULT,
783 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
784 (integer_t)object, offset,
785 (integer_t)m,
786 (integer_t)first_object, 0);
787 if (object != first_object) {
788 VM_PAGE_FREE(m);
789 vm_object_paging_end(object);
790 vm_object_unlock(object);
791 object = first_object;
792 offset = first_offset;
793 m = first_m;
794 first_m = VM_PAGE_NULL;
795 vm_object_lock(object);
796 }
797
798 VM_PAGE_FREE(m);
799 assert(real_m->busy);
800 vm_page_insert(real_m, object, offset);
801 m = real_m;
802
803 /*
804 * Drop the lock while zero filling
805 * page. Then break because this
806 * is the page we wanted. Checking
807 * the page lock is a waste of time;
808 * this page was either absent or
809 * newly allocated -- in both cases
810 * it can't be page locked by a pager.
811 */
0b4e3aa0
A
812 m->no_isync = FALSE;
813
1c79356b
A
814 if (!no_zero_fill) {
815 vm_object_unlock(object);
816 vm_page_zero_fill(m);
1c79356b 817 vm_object_lock(object);
55e303ae 818
91447636
A
819 if (type_of_fault)
820 *type_of_fault = DBG_ZERO_FILL_FAULT;
821 VM_STAT(zero_fill_count++);
822 }
55e303ae
A
823 if (bumped_pagein == TRUE) {
824 VM_STAT(pageins--);
825 current_task()->pageins--;
826 }
1c79356b
A
827 vm_page_lock_queues();
828 VM_PAGE_QUEUES_REMOVE(m);
0b4e3aa0 829 m->page_ticket = vm_page_ticket;
91447636
A
830 assert(!m->laundry);
831 assert(m->object != kernel_object);
832 assert(m->pageq.next == NULL &&
833 m->pageq.prev == NULL);
834 if(m->object->size > 0x200000) {
9bccf70c
A
835 m->zero_fill = TRUE;
836 /* depends on the queues lock */
837 vm_zf_count += 1;
838 queue_enter(&vm_page_queue_zf,
839 m, vm_page_t, pageq);
840 } else {
841 queue_enter(
842 &vm_page_queue_inactive,
843 m, vm_page_t, pageq);
844 }
0b4e3aa0
A
845 vm_page_ticket_roll++;
846 if(vm_page_ticket_roll ==
847 VM_PAGE_TICKETS_IN_ROLL) {
848 vm_page_ticket_roll = 0;
849 if(vm_page_ticket ==
850 VM_PAGE_TICKET_ROLL_IDS)
851 vm_page_ticket= 0;
852 else
853 vm_page_ticket++;
854 }
1c79356b
A
855 m->inactive = TRUE;
856 vm_page_inactive_count++;
857 vm_page_unlock_queues();
858 break;
859 } else {
860 if (must_be_resident) {
861 vm_object_paging_end(object);
862 } else if (object != first_object) {
863 vm_object_paging_end(object);
864 VM_PAGE_FREE(m);
865 } else {
866 first_m = m;
867 m->absent = FALSE;
868 m->unusual = FALSE;
869 vm_object_absent_release(object);
870 m->busy = TRUE;
871
872 vm_page_lock_queues();
873 VM_PAGE_QUEUES_REMOVE(m);
874 vm_page_unlock_queues();
875 }
876 XPR(XPR_VM_FAULT,
877 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
878 (integer_t)object, offset,
879 (integer_t)next_object,
880 offset+object->shadow_offset,0);
881 offset += object->shadow_offset;
882 hi_offset += object->shadow_offset;
883 lo_offset += object->shadow_offset;
884 access_required = VM_PROT_READ;
885 vm_object_lock(next_object);
886 vm_object_unlock(object);
887 object = next_object;
888 vm_object_paging_begin(object);
889 continue;
890 }
891 }
892
893 if ((m->cleaning)
894 && ((object != first_object) ||
895 (object->copy != VM_OBJECT_NULL))
896 && (fault_type & VM_PROT_WRITE)) {
897 /*
898 * This is a copy-on-write fault that will
899 * cause us to revoke access to this page, but
900 * this page is in the process of being cleaned
901 * in a clustered pageout. We must wait until
902 * the cleaning operation completes before
903 * revoking access to the original page,
904 * otherwise we might attempt to remove a
905 * wired mapping.
906 */
907#if TRACEFAULTPAGE
908 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset); /* (TEST/DEBUG) */
909#endif
910 XPR(XPR_VM_FAULT,
911 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
912 (integer_t)object, offset,
913 (integer_t)m, 0, 0);
914 /* take an extra ref so that object won't die */
915 assert(object->ref_count > 0);
916 object->ref_count++;
917 vm_object_res_reference(object);
918 vm_fault_cleanup(object, first_m);
919 counter(c_vm_fault_page_block_backoff_kernel++);
920 vm_object_lock(object);
921 assert(object->ref_count > 0);
922 m = vm_page_lookup(object, offset);
923 if (m != VM_PAGE_NULL && m->cleaning) {
924 PAGE_ASSERT_WAIT(m, interruptible);
925 vm_object_unlock(object);
9bccf70c 926 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
927 vm_object_deallocate(object);
928 goto backoff;
929 } else {
930 vm_object_unlock(object);
931 vm_object_deallocate(object);
9bccf70c 932 thread_interrupt_level(interruptible_state);
1c79356b
A
933 return VM_FAULT_RETRY;
934 }
935 }
936
937 /*
938 * If the desired access to this page has
939 * been locked out, request that it be unlocked.
940 */
941
942 if (access_required & m->page_lock) {
943 if ((access_required & m->unlock_request) != access_required) {
944 vm_prot_t new_unlock_request;
945 kern_return_t rc;
946
947#if TRACEFAULTPAGE
948 dbgTrace(0xBEEF000A, (unsigned int) m, (unsigned int) object->pager_ready); /* (TEST/DEBUG) */
949#endif
950 if (!object->pager_ready) {
951 XPR(XPR_VM_FAULT,
952 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
953 access_required,
954 (integer_t)object, offset,
955 (integer_t)m, 0);
956 /* take an extra ref */
957 assert(object->ref_count > 0);
958 object->ref_count++;
959 vm_object_res_reference(object);
960 vm_fault_cleanup(object,
961 first_m);
962 counter(c_vm_fault_page_block_backoff_kernel++);
963 vm_object_lock(object);
964 assert(object->ref_count > 0);
965 if (!object->pager_ready) {
9bccf70c 966 wait_result = vm_object_assert_wait(
1c79356b
A
967 object,
968 VM_OBJECT_EVENT_PAGER_READY,
969 interruptible);
970 vm_object_unlock(object);
9bccf70c
A
971 if (wait_result == THREAD_WAITING)
972 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
973 vm_object_deallocate(object);
974 goto backoff;
975 } else {
976 vm_object_unlock(object);
977 vm_object_deallocate(object);
9bccf70c 978 thread_interrupt_level(interruptible_state);
1c79356b
A
979 return VM_FAULT_RETRY;
980 }
981 }
982
983 new_unlock_request = m->unlock_request =
984 (access_required | m->unlock_request);
985 vm_object_unlock(object);
986 XPR(XPR_VM_FAULT,
987 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
988 (integer_t)object, offset,
989 (integer_t)m, new_unlock_request, 0);
990 if ((rc = memory_object_data_unlock(
991 object->pager,
1c79356b
A
992 offset + object->paging_offset,
993 PAGE_SIZE,
994 new_unlock_request))
995 != KERN_SUCCESS) {
996 if (vm_fault_debug)
997 printf("vm_fault: memory_object_data_unlock failed\n");
998 vm_object_lock(object);
999 vm_fault_cleanup(object, first_m);
9bccf70c 1000 thread_interrupt_level(interruptible_state);
1c79356b
A
1001 return((rc == MACH_SEND_INTERRUPTED) ?
1002 VM_FAULT_INTERRUPTED :
1003 VM_FAULT_MEMORY_ERROR);
1004 }
1005 vm_object_lock(object);
1006 continue;
1007 }
1008
1009 XPR(XPR_VM_FAULT,
1010 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
1011 access_required, (integer_t)object,
1012 offset, (integer_t)m, 0);
1013 /* take an extra ref so object won't die */
1014 assert(object->ref_count > 0);
1015 object->ref_count++;
1016 vm_object_res_reference(object);
1017 vm_fault_cleanup(object, first_m);
1018 counter(c_vm_fault_page_block_backoff_kernel++);
1019 vm_object_lock(object);
1020 assert(object->ref_count > 0);
1021 m = vm_page_lookup(object, offset);
1022 if (m != VM_PAGE_NULL &&
1023 (access_required & m->page_lock) &&
1024 !((access_required & m->unlock_request) != access_required)) {
1025 PAGE_ASSERT_WAIT(m, interruptible);
1026 vm_object_unlock(object);
9bccf70c 1027 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1028 vm_object_deallocate(object);
1029 goto backoff;
1030 } else {
1031 vm_object_unlock(object);
1032 vm_object_deallocate(object);
9bccf70c 1033 thread_interrupt_level(interruptible_state);
1c79356b
A
1034 return VM_FAULT_RETRY;
1035 }
1036 }
1037 /*
1038 * We mark the page busy and leave it on
1039 * the pageout queues. If the pageout
1040 * deamon comes across it, then it will
1041 * remove the page.
1042 */
1043
1044#if TRACEFAULTPAGE
1045 dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1046#endif
1047
1048#if !VM_FAULT_STATIC_CONFIG
1049 if (!software_reference_bits) {
1050 vm_page_lock_queues();
1051 if (m->inactive)
1052 vm_stat.reactivations++;
1053
1054 VM_PAGE_QUEUES_REMOVE(m);
1055 vm_page_unlock_queues();
1056 }
1057#endif
1058 XPR(XPR_VM_FAULT,
1059 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
1060 (integer_t)object, offset, (integer_t)m, 0, 0);
1061 assert(!m->busy);
1062 m->busy = TRUE;
1063 assert(!m->absent);
1064 break;
1065 }
1066
1067 look_for_page =
1068 (object->pager_created) &&
1069 LOOK_FOR(object, offset) &&
1070 (!data_supply);
1071
1072#if TRACEFAULTPAGE
1073 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object); /* (TEST/DEBUG) */
1074#endif
1075 if ((look_for_page || (object == first_object))
0b4e3aa0
A
1076 && !must_be_resident
1077 && !(object->phys_contiguous)) {
1c79356b
A
1078 /*
1079 * Allocate a new page for this object/offset
1080 * pair.
1081 */
1082
1083 m = vm_page_grab_fictitious();
1084#if TRACEFAULTPAGE
1085 dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
1086#endif
1087 if (m == VM_PAGE_NULL) {
1088 vm_fault_cleanup(object, first_m);
9bccf70c 1089 thread_interrupt_level(interruptible_state);
1c79356b
A
1090 return(VM_FAULT_FICTITIOUS_SHORTAGE);
1091 }
1092 vm_page_insert(m, object, offset);
1093 }
1094
0b4e3aa0 1095 if ((look_for_page && !must_be_resident)) {
1c79356b
A
1096 kern_return_t rc;
1097
1098 /*
1099 * If the memory manager is not ready, we
1100 * cannot make requests.
1101 */
1102 if (!object->pager_ready) {
1103#if TRACEFAULTPAGE
1104 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
1105#endif
0b4e3aa0
A
1106 if(m != VM_PAGE_NULL)
1107 VM_PAGE_FREE(m);
1c79356b
A
1108 XPR(XPR_VM_FAULT,
1109 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
1110 (integer_t)object, offset, 0, 0, 0);
1111 /* take an extra ref so object won't die */
1112 assert(object->ref_count > 0);
1113 object->ref_count++;
1114 vm_object_res_reference(object);
1115 vm_fault_cleanup(object, first_m);
1116 counter(c_vm_fault_page_block_backoff_kernel++);
1117 vm_object_lock(object);
1118 assert(object->ref_count > 0);
1119 if (!object->pager_ready) {
9bccf70c 1120 wait_result = vm_object_assert_wait(object,
1c79356b
A
1121 VM_OBJECT_EVENT_PAGER_READY,
1122 interruptible);
1123 vm_object_unlock(object);
9bccf70c
A
1124 if (wait_result == THREAD_WAITING)
1125 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1126 vm_object_deallocate(object);
1127 goto backoff;
1128 } else {
1129 vm_object_unlock(object);
1130 vm_object_deallocate(object);
9bccf70c 1131 thread_interrupt_level(interruptible_state);
1c79356b
A
1132 return VM_FAULT_RETRY;
1133 }
1134 }
1135
0b4e3aa0
A
1136 if(object->phys_contiguous) {
1137 if(m != VM_PAGE_NULL) {
1138 VM_PAGE_FREE(m);
1139 m = VM_PAGE_NULL;
1140 }
1141 goto no_clustering;
1142 }
1c79356b
A
1143 if (object->internal) {
1144 /*
1145 * Requests to the default pager
1146 * must reserve a real page in advance,
1147 * because the pager's data-provided
1148 * won't block for pages. IMPORTANT:
1149 * this acts as a throttling mechanism
1150 * for data_requests to the default
1151 * pager.
1152 */
1153
1154#if TRACEFAULTPAGE
1155 dbgTrace(0xBEEF000F, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1156#endif
1157 if (m->fictitious && !vm_page_convert(m)) {
1158 VM_PAGE_FREE(m);
1159 vm_fault_cleanup(object, first_m);
9bccf70c 1160 thread_interrupt_level(interruptible_state);
1c79356b
A
1161 return(VM_FAULT_MEMORY_SHORTAGE);
1162 }
1163 } else if (object->absent_count >
1164 vm_object_absent_max) {
1165 /*
1166 * If there are too many outstanding page
1167 * requests pending on this object, we
1168 * wait for them to be resolved now.
1169 */
1170
1171#if TRACEFAULTPAGE
1172 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1173#endif
0b4e3aa0
A
1174 if(m != VM_PAGE_NULL)
1175 VM_PAGE_FREE(m);
1c79356b
A
1176 /* take an extra ref so object won't die */
1177 assert(object->ref_count > 0);
1178 object->ref_count++;
1179 vm_object_res_reference(object);
1180 vm_fault_cleanup(object, first_m);
1181 counter(c_vm_fault_page_block_backoff_kernel++);
1182 vm_object_lock(object);
1183 assert(object->ref_count > 0);
1184 if (object->absent_count > vm_object_absent_max) {
1185 vm_object_absent_assert_wait(object,
1186 interruptible);
1187 vm_object_unlock(object);
9bccf70c 1188 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1189 vm_object_deallocate(object);
1190 goto backoff;
1191 } else {
1192 vm_object_unlock(object);
1193 vm_object_deallocate(object);
9bccf70c 1194 thread_interrupt_level(interruptible_state);
1c79356b
A
1195 return VM_FAULT_RETRY;
1196 }
1197 }
1198
1199 /*
1200 * Indicate that the page is waiting for data
1201 * from the memory manager.
1202 */
1203
0b4e3aa0
A
1204 if(m != VM_PAGE_NULL) {
1205
1206 m->list_req_pending = TRUE;
1207 m->absent = TRUE;
1208 m->unusual = TRUE;
1209 object->absent_count++;
1210
1211 }
1c79356b 1212
9bccf70c 1213no_clustering:
1c79356b
A
1214 cluster_start = offset;
1215 length = PAGE_SIZE;
1c79356b 1216
0b4e3aa0
A
1217 /*
1218 * lengthen the cluster by the pages in the working set
1219 */
1220 if((map != NULL) &&
1221 (current_task()->dynamic_working_set != 0)) {
1222 cluster_end = cluster_start + length;
1223 /* tws values for start and end are just a
1224 * suggestions. Therefore, as long as
1225 * build_cluster does not use pointers or
1226 * take action based on values that
1227 * could be affected by re-entrance we
1228 * do not need to take the map lock.
1229 */
9bccf70c 1230 cluster_end = offset + PAGE_SIZE_64;
91447636 1231 tws_build_cluster(
0b4e3aa0
A
1232 current_task()->dynamic_working_set,
1233 object, &cluster_start,
9bccf70c 1234 &cluster_end, 0x40000);
0b4e3aa0
A
1235 length = cluster_end - cluster_start;
1236 }
1c79356b
A
1237#if TRACEFAULTPAGE
1238 dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0); /* (TEST/DEBUG) */
1239#endif
1240 /*
1241 * We have a busy page, so we can
1242 * release the object lock.
1243 */
1244 vm_object_unlock(object);
1245
1246 /*
1247 * Call the memory manager to retrieve the data.
1248 */
1249
1250 if (type_of_fault)
91447636 1251 *type_of_fault = ((int)length << 8) | DBG_PAGEIN_FAULT;
1c79356b
A
1252 VM_STAT(pageins++);
1253 current_task()->pageins++;
0b4e3aa0 1254 bumped_pagein = TRUE;
1c79356b
A
1255
1256 /*
1257 * If this object uses a copy_call strategy,
1258 * and we are interested in a copy of this object
1259 * (having gotten here only by following a
1260 * shadow chain), then tell the memory manager
1261 * via a flag added to the desired_access
1262 * parameter, so that it can detect a race
1263 * between our walking down the shadow chain
1264 * and its pushing pages up into a copy of
1265 * the object that it manages.
1266 */
1267
1268 if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL &&
1269 object != first_object) {
1270 wants_copy_flag = VM_PROT_WANTS_COPY;
1271 } else {
1272 wants_copy_flag = VM_PROT_NONE;
1273 }
1274
1275 XPR(XPR_VM_FAULT,
1276 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1277 (integer_t)object, offset, (integer_t)m,
1278 access_required | wants_copy_flag, 0);
1279
1c79356b 1280 rc = memory_object_data_request(object->pager,
1c79356b
A
1281 cluster_start + object->paging_offset,
1282 length,
1283 access_required | wants_copy_flag);
1284
1c79356b
A
1285
1286#if TRACEFAULTPAGE
1287 dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
1288#endif
1289 if (rc != KERN_SUCCESS) {
1290 if (rc != MACH_SEND_INTERRUPTED
1291 && vm_fault_debug)
91447636 1292 printf("%s(0x%x, 0x%xll, 0x%xll, 0x%x) failed, rc=%d\n",
1c79356b
A
1293 "memory_object_data_request",
1294 object->pager,
1c79356b 1295 cluster_start + object->paging_offset,
0b4e3aa0 1296 length, access_required, rc);
1c79356b
A
1297 /*
1298 * Don't want to leave a busy page around,
1299 * but the data request may have blocked,
1300 * so check if it's still there and busy.
1301 */
0b4e3aa0
A
1302 if(!object->phys_contiguous) {
1303 vm_object_lock(object);
1304 for (; length; length -= PAGE_SIZE,
1305 cluster_start += PAGE_SIZE_64) {
1306 vm_page_t p;
1307 if ((p = vm_page_lookup(object,
1c79356b 1308 cluster_start))
0b4e3aa0
A
1309 && p->absent && p->busy
1310 && p != first_m) {
1311 VM_PAGE_FREE(p);
1312 }
1313 }
1c79356b
A
1314 }
1315 vm_fault_cleanup(object, first_m);
9bccf70c 1316 thread_interrupt_level(interruptible_state);
1c79356b
A
1317 return((rc == MACH_SEND_INTERRUPTED) ?
1318 VM_FAULT_INTERRUPTED :
1319 VM_FAULT_MEMORY_ERROR);
1320 }
1321
1c79356b
A
1322 vm_object_lock(object);
1323 if ((interruptible != THREAD_UNINT) &&
1324 (current_thread()->state & TH_ABORT)) {
1325 vm_fault_cleanup(object, first_m);
9bccf70c 1326 thread_interrupt_level(interruptible_state);
1c79356b
A
1327 return(VM_FAULT_INTERRUPTED);
1328 }
91447636
A
1329 if (m == VM_PAGE_NULL &&
1330 object->phys_contiguous) {
1331 /*
1332 * No page here means that the object we
1333 * initially looked up was "physically
1334 * contiguous" (i.e. device memory). However,
1335 * with Virtual VRAM, the object might not
1336 * be backed by that device memory anymore,
1337 * so we're done here only if the object is
1338 * still "phys_contiguous".
1339 * Otherwise, if the object is no longer
1340 * "phys_contiguous", we need to retry the
1341 * page fault against the object's new backing
1342 * store (different memory object).
1343 */
0b4e3aa0 1344 break;
91447636
A
1345 }
1346
1347 /*
1348 * Retry with same object/offset, since new data may
1349 * be in a different page (i.e., m is meaningless at
1350 * this point).
1351 */
1c79356b
A
1352 continue;
1353 }
1354
1355 /*
1356 * The only case in which we get here is if
1357 * object has no pager (or unwiring). If the pager doesn't
1358 * have the page this is handled in the m->absent case above
1359 * (and if you change things here you should look above).
1360 */
1361#if TRACEFAULTPAGE
1362 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1363#endif
1364 if (object == first_object)
1365 first_m = m;
1366 else
1367 assert(m == VM_PAGE_NULL);
1368
1369 XPR(XPR_VM_FAULT,
1370 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1371 (integer_t)object, offset, (integer_t)m,
1372 (integer_t)object->shadow, 0);
1373 /*
1374 * Move on to the next object. Lock the next
1375 * object before unlocking the current one.
1376 */
1377 next_object = object->shadow;
1378 if (next_object == VM_OBJECT_NULL) {
1379 assert(!must_be_resident);
1380 /*
1381 * If there's no object left, fill the page
1382 * in the top object with zeros. But first we
1383 * need to allocate a real page.
1384 */
1385
1386 if (object != first_object) {
1387 vm_object_paging_end(object);
1388 vm_object_unlock(object);
1389
1390 object = first_object;
1391 offset = first_offset;
1392 vm_object_lock(object);
1393 }
1394
1395 m = first_m;
1396 assert(m->object == object);
1397 first_m = VM_PAGE_NULL;
1398
55e303ae
A
1399 if(m == VM_PAGE_NULL) {
1400 m = vm_page_grab();
1401 if (m == VM_PAGE_NULL) {
1402 vm_fault_cleanup(
1403 object, VM_PAGE_NULL);
1404 thread_interrupt_level(
1405 interruptible_state);
1406 return(VM_FAULT_MEMORY_SHORTAGE);
1407 }
1408 vm_page_insert(
1409 m, object, offset);
1410 }
1411
1c79356b
A
1412 if (object->shadow_severed) {
1413 VM_PAGE_FREE(m);
1414 vm_fault_cleanup(object, VM_PAGE_NULL);
9bccf70c 1415 thread_interrupt_level(interruptible_state);
1c79356b
A
1416 return VM_FAULT_MEMORY_ERROR;
1417 }
1418
55e303ae
A
1419 /*
1420 * are we protecting the system from
1421 * backing store exhaustion. If so
1422 * sleep unless we are privileged.
1423 */
1424
1425 if(vm_backing_store_low) {
1426 if(!(current_task()->priv_flags
1427 & VM_BACKING_STORE_PRIV)) {
1428 assert_wait((event_t)
1429 &vm_backing_store_low,
1430 THREAD_UNINT);
1431 VM_PAGE_FREE(m);
1432 vm_fault_cleanup(object, VM_PAGE_NULL);
91447636 1433 thread_block(THREAD_CONTINUE_NULL);
55e303ae
A
1434 thread_interrupt_level(
1435 interruptible_state);
1436 return(VM_FAULT_RETRY);
1437 }
1438 }
1439
1c79356b
A
1440 if (VM_PAGE_THROTTLED() ||
1441 (m->fictitious && !vm_page_convert(m))) {
1442 VM_PAGE_FREE(m);
1443 vm_fault_cleanup(object, VM_PAGE_NULL);
9bccf70c 1444 thread_interrupt_level(interruptible_state);
1c79356b
A
1445 return(VM_FAULT_MEMORY_SHORTAGE);
1446 }
0b4e3aa0 1447 m->no_isync = FALSE;
1c79356b
A
1448
1449 if (!no_zero_fill) {
1450 vm_object_unlock(object);
1451 vm_page_zero_fill(m);
1c79356b 1452 vm_object_lock(object);
55e303ae 1453
91447636
A
1454 if (type_of_fault)
1455 *type_of_fault = DBG_ZERO_FILL_FAULT;
1456 VM_STAT(zero_fill_count++);
1457 }
55e303ae
A
1458 if (bumped_pagein == TRUE) {
1459 VM_STAT(pageins--);
1460 current_task()->pageins--;
1461 }
1c79356b
A
1462 vm_page_lock_queues();
1463 VM_PAGE_QUEUES_REMOVE(m);
91447636
A
1464 assert(!m->laundry);
1465 assert(m->object != kernel_object);
1466 assert(m->pageq.next == NULL &&
1467 m->pageq.prev == NULL);
1468 if(m->object->size > 0x200000) {
9bccf70c
A
1469 m->zero_fill = TRUE;
1470 /* depends on the queues lock */
1471 vm_zf_count += 1;
1472 queue_enter(&vm_page_queue_zf,
1473 m, vm_page_t, pageq);
1474 } else {
1475 queue_enter(
1476 &vm_page_queue_inactive,
1477 m, vm_page_t, pageq);
1478 }
0b4e3aa0
A
1479 m->page_ticket = vm_page_ticket;
1480 vm_page_ticket_roll++;
1481 if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
1482 vm_page_ticket_roll = 0;
1483 if(vm_page_ticket ==
1484 VM_PAGE_TICKET_ROLL_IDS)
1485 vm_page_ticket= 0;
1486 else
1487 vm_page_ticket++;
1488 }
1c79356b
A
1489 m->inactive = TRUE;
1490 vm_page_inactive_count++;
1491 vm_page_unlock_queues();
55e303ae
A
1492#if 0
1493 pmap_clear_modify(m->phys_page);
1494#endif
1c79356b
A
1495 break;
1496 }
1497 else {
1498 if ((object != first_object) || must_be_resident)
1499 vm_object_paging_end(object);
1500 offset += object->shadow_offset;
1501 hi_offset += object->shadow_offset;
1502 lo_offset += object->shadow_offset;
1503 access_required = VM_PROT_READ;
1504 vm_object_lock(next_object);
1505 vm_object_unlock(object);
1506 object = next_object;
1507 vm_object_paging_begin(object);
1508 }
1509 }
1510
1511 /*
1512 * PAGE HAS BEEN FOUND.
1513 *
1514 * This page (m) is:
1515 * busy, so that we can play with it;
1516 * not absent, so that nobody else will fill it;
1517 * possibly eligible for pageout;
1518 *
1519 * The top-level page (first_m) is:
1520 * VM_PAGE_NULL if the page was found in the
1521 * top-level object;
1522 * busy, not absent, and ineligible for pageout.
1523 *
1524 * The current object (object) is locked. A paging
1525 * reference is held for the current and top-level
1526 * objects.
1527 */
1528
1529#if TRACEFAULTPAGE
1530 dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1531#endif
1532#if EXTRA_ASSERTIONS
0b4e3aa0
A
1533 if(m != VM_PAGE_NULL) {
1534 assert(m->busy && !m->absent);
1535 assert((first_m == VM_PAGE_NULL) ||
1536 (first_m->busy && !first_m->absent &&
1537 !first_m->active && !first_m->inactive));
1538 }
1c79356b
A
1539#endif /* EXTRA_ASSERTIONS */
1540
91447636
A
1541 /*
1542 * ENCRYPTED SWAP:
1543 * If we found a page, we must have decrypted it before we
1544 * get here...
1545 */
1546 if (m != VM_PAGE_NULL) {
1547 ASSERT_PAGE_DECRYPTED(m);
1548 }
1549
1c79356b
A
1550 XPR(XPR_VM_FAULT,
1551 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1552 (integer_t)object, offset, (integer_t)m,
1553 (integer_t)first_object, (integer_t)first_m);
1554 /*
1555 * If the page is being written, but isn't
1556 * already owned by the top-level object,
1557 * we have to copy it into a new page owned
1558 * by the top-level object.
1559 */
1560
0b4e3aa0 1561 if ((object != first_object) && (m != VM_PAGE_NULL)) {
1c79356b
A
1562 /*
1563 * We only really need to copy if we
1564 * want to write it.
1565 */
1566
1567#if TRACEFAULTPAGE
1568 dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1569#endif
1570 if (fault_type & VM_PROT_WRITE) {
1571 vm_page_t copy_m;
1572
1573 assert(!must_be_resident);
1574
55e303ae
A
1575 /*
1576 * are we protecting the system from
1577 * backing store exhaustion. If so
1578 * sleep unless we are privileged.
1579 */
1580
1581 if(vm_backing_store_low) {
1582 if(!(current_task()->priv_flags
1583 & VM_BACKING_STORE_PRIV)) {
1584 assert_wait((event_t)
1585 &vm_backing_store_low,
1586 THREAD_UNINT);
1587 RELEASE_PAGE(m);
1588 vm_fault_cleanup(object, first_m);
91447636 1589 thread_block(THREAD_CONTINUE_NULL);
55e303ae
A
1590 thread_interrupt_level(
1591 interruptible_state);
1592 return(VM_FAULT_RETRY);
1593 }
1594 }
1595
1c79356b
A
1596 /*
1597 * If we try to collapse first_object at this
1598 * point, we may deadlock when we try to get
1599 * the lock on an intermediate object (since we
1600 * have the bottom object locked). We can't
1601 * unlock the bottom object, because the page
1602 * we found may move (by collapse) if we do.
1603 *
1604 * Instead, we first copy the page. Then, when
1605 * we have no more use for the bottom object,
1606 * we unlock it and try to collapse.
1607 *
1608 * Note that we copy the page even if we didn't
1609 * need to... that's the breaks.
1610 */
1611
1612 /*
1613 * Allocate a page for the copy
1614 */
1615 copy_m = vm_page_grab();
1616 if (copy_m == VM_PAGE_NULL) {
1617 RELEASE_PAGE(m);
1618 vm_fault_cleanup(object, first_m);
9bccf70c 1619 thread_interrupt_level(interruptible_state);
1c79356b
A
1620 return(VM_FAULT_MEMORY_SHORTAGE);
1621 }
1622
1623
1624 XPR(XPR_VM_FAULT,
1625 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1626 (integer_t)object, offset,
1627 (integer_t)m, (integer_t)copy_m, 0);
1628 vm_page_copy(m, copy_m);
1629
1630 /*
1631 * If another map is truly sharing this
1632 * page with us, we have to flush all
1633 * uses of the original page, since we
1634 * can't distinguish those which want the
1635 * original from those which need the
1636 * new copy.
1637 *
1638 * XXXO If we know that only one map has
1639 * access to this page, then we could
91447636 1640 * avoid the pmap_disconnect() call.
1c79356b
A
1641 */
1642
1643 vm_page_lock_queues();
1644 assert(!m->cleaning);
91447636 1645 pmap_disconnect(m->phys_page);
1c79356b
A
1646 vm_page_deactivate(m);
1647 copy_m->dirty = TRUE;
1648 /*
1649 * Setting reference here prevents this fault from
1650 * being counted as a (per-thread) reactivate as well
1651 * as a copy-on-write.
1652 */
1653 first_m->reference = TRUE;
1654 vm_page_unlock_queues();
1655
1656 /*
1657 * We no longer need the old page or object.
1658 */
1659
1660 PAGE_WAKEUP_DONE(m);
1661 vm_object_paging_end(object);
1662 vm_object_unlock(object);
1663
1664 if (type_of_fault)
1665 *type_of_fault = DBG_COW_FAULT;
1666 VM_STAT(cow_faults++);
1667 current_task()->cow_faults++;
1668 object = first_object;
1669 offset = first_offset;
1670
1671 vm_object_lock(object);
1672 VM_PAGE_FREE(first_m);
1673 first_m = VM_PAGE_NULL;
1674 assert(copy_m->busy);
1675 vm_page_insert(copy_m, object, offset);
1676 m = copy_m;
1677
1678 /*
1679 * Now that we've gotten the copy out of the
1680 * way, let's try to collapse the top object.
1681 * But we have to play ugly games with
1682 * paging_in_progress to do that...
1683 */
1684
1685 vm_object_paging_end(object);
5d5c5d0d 1686 vm_object_collapse(object, offset, TRUE);
1c79356b
A
1687 vm_object_paging_begin(object);
1688
1689 }
1690 else {
1691 *protection &= (~VM_PROT_WRITE);
1692 }
1693 }
1694
1695 /*
1696 * Now check whether the page needs to be pushed into the
1697 * copy object. The use of asymmetric copy on write for
1698 * shared temporary objects means that we may do two copies to
1699 * satisfy the fault; one above to get the page from a
1700 * shadowed object, and one here to push it into the copy.
1701 */
1702
9bccf70c 1703 while ((copy_object = first_object->copy) != VM_OBJECT_NULL &&
0b4e3aa0 1704 (m!= VM_PAGE_NULL)) {
1c79356b
A
1705 vm_object_offset_t copy_offset;
1706 vm_page_t copy_m;
1707
1708#if TRACEFAULTPAGE
1709 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1710#endif
1711 /*
1712 * If the page is being written, but hasn't been
1713 * copied to the copy-object, we have to copy it there.
1714 */
1715
1716 if ((fault_type & VM_PROT_WRITE) == 0) {
1717 *protection &= ~VM_PROT_WRITE;
1718 break;
1719 }
1720
1721 /*
1722 * If the page was guaranteed to be resident,
1723 * we must have already performed the copy.
1724 */
1725
1726 if (must_be_resident)
1727 break;
1728
1729 /*
1730 * Try to get the lock on the copy_object.
1731 */
1732 if (!vm_object_lock_try(copy_object)) {
1733 vm_object_unlock(object);
1734
1735 mutex_pause(); /* wait a bit */
1736
1737 vm_object_lock(object);
1738 continue;
1739 }
1740
1741 /*
1742 * Make another reference to the copy-object,
1743 * to keep it from disappearing during the
1744 * copy.
1745 */
1746 assert(copy_object->ref_count > 0);
1747 copy_object->ref_count++;
1748 VM_OBJ_RES_INCR(copy_object);
1749
1750 /*
1751 * Does the page exist in the copy?
1752 */
1753 copy_offset = first_offset - copy_object->shadow_offset;
1754 if (copy_object->size <= copy_offset)
1755 /*
1756 * Copy object doesn't cover this page -- do nothing.
1757 */
1758 ;
1759 else if ((copy_m =
1760 vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
1761 /* Page currently exists in the copy object */
1762 if (copy_m->busy) {
1763 /*
1764 * If the page is being brought
1765 * in, wait for it and then retry.
1766 */
1767 RELEASE_PAGE(m);
1768 /* take an extra ref so object won't die */
1769 assert(copy_object->ref_count > 0);
1770 copy_object->ref_count++;
1771 vm_object_res_reference(copy_object);
1772 vm_object_unlock(copy_object);
1773 vm_fault_cleanup(object, first_m);
1774 counter(c_vm_fault_page_block_backoff_kernel++);
1775 vm_object_lock(copy_object);
1776 assert(copy_object->ref_count > 0);
1777 VM_OBJ_RES_DECR(copy_object);
1778 copy_object->ref_count--;
1779 assert(copy_object->ref_count > 0);
1780 copy_m = vm_page_lookup(copy_object, copy_offset);
91447636
A
1781 /*
1782 * ENCRYPTED SWAP:
1783 * it's OK if the "copy_m" page is encrypted,
1784 * because we're not moving it nor handling its
1785 * contents.
1786 */
1c79356b
A
1787 if (copy_m != VM_PAGE_NULL && copy_m->busy) {
1788 PAGE_ASSERT_WAIT(copy_m, interruptible);
1789 vm_object_unlock(copy_object);
9bccf70c 1790 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1791 vm_object_deallocate(copy_object);
1792 goto backoff;
1793 } else {
1794 vm_object_unlock(copy_object);
1795 vm_object_deallocate(copy_object);
9bccf70c 1796 thread_interrupt_level(interruptible_state);
1c79356b
A
1797 return VM_FAULT_RETRY;
1798 }
1799 }
1800 }
1801 else if (!PAGED_OUT(copy_object, copy_offset)) {
1802 /*
1803 * If PAGED_OUT is TRUE, then the page used to exist
1804 * in the copy-object, and has already been paged out.
1805 * We don't need to repeat this. If PAGED_OUT is
1806 * FALSE, then either we don't know (!pager_created,
1807 * for example) or it hasn't been paged out.
1808 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1809 * We must copy the page to the copy object.
1810 */
1811
55e303ae
A
1812 /*
1813 * are we protecting the system from
1814 * backing store exhaustion. If so
1815 * sleep unless we are privileged.
1816 */
1817
1818 if(vm_backing_store_low) {
1819 if(!(current_task()->priv_flags
1820 & VM_BACKING_STORE_PRIV)) {
1821 assert_wait((event_t)
1822 &vm_backing_store_low,
1823 THREAD_UNINT);
1824 RELEASE_PAGE(m);
1825 VM_OBJ_RES_DECR(copy_object);
1826 copy_object->ref_count--;
1827 assert(copy_object->ref_count > 0);
1828 vm_object_unlock(copy_object);
1829 vm_fault_cleanup(object, first_m);
91447636 1830 thread_block(THREAD_CONTINUE_NULL);
55e303ae
A
1831 thread_interrupt_level(
1832 interruptible_state);
1833 return(VM_FAULT_RETRY);
1834 }
1835 }
1836
1c79356b
A
1837 /*
1838 * Allocate a page for the copy
1839 */
1840 copy_m = vm_page_alloc(copy_object, copy_offset);
1841 if (copy_m == VM_PAGE_NULL) {
1842 RELEASE_PAGE(m);
1843 VM_OBJ_RES_DECR(copy_object);
1844 copy_object->ref_count--;
1845 assert(copy_object->ref_count > 0);
1846 vm_object_unlock(copy_object);
1847 vm_fault_cleanup(object, first_m);
9bccf70c 1848 thread_interrupt_level(interruptible_state);
1c79356b
A
1849 return(VM_FAULT_MEMORY_SHORTAGE);
1850 }
1851
1852 /*
1853 * Must copy page into copy-object.
1854 */
1855
1856 vm_page_copy(m, copy_m);
1857
1858 /*
1859 * If the old page was in use by any users
1860 * of the copy-object, it must be removed
1861 * from all pmaps. (We can't know which
1862 * pmaps use it.)
1863 */
1864
1865 vm_page_lock_queues();
1866 assert(!m->cleaning);
91447636 1867 pmap_disconnect(m->phys_page);
1c79356b
A
1868 copy_m->dirty = TRUE;
1869 vm_page_unlock_queues();
1870
1871 /*
1872 * If there's a pager, then immediately
1873 * page out this page, using the "initialize"
1874 * option. Else, we use the copy.
1875 */
1876
1877 if
1878#if MACH_PAGEMAP
1879 ((!copy_object->pager_created) ||
1880 vm_external_state_get(
1881 copy_object->existence_map, copy_offset)
1882 == VM_EXTERNAL_STATE_ABSENT)
1883#else
1884 (!copy_object->pager_created)
1885#endif
1886 {
1887 vm_page_lock_queues();
1888 vm_page_activate(copy_m);
1889 vm_page_unlock_queues();
1890 PAGE_WAKEUP_DONE(copy_m);
1891 }
1892 else {
1893 assert(copy_m->busy == TRUE);
1894
1895 /*
1896 * The page is already ready for pageout:
1897 * not on pageout queues and busy.
1898 * Unlock everything except the
1899 * copy_object itself.
1900 */
1901
1902 vm_object_unlock(object);
1903
1904 /*
1905 * Write the page to the copy-object,
1906 * flushing it from the kernel.
1907 */
1908
1909 vm_pageout_initialize_page(copy_m);
1910
1911 /*
1912 * Since the pageout may have
1913 * temporarily dropped the
1914 * copy_object's lock, we
1915 * check whether we'll have
1916 * to deallocate the hard way.
1917 */
1918
1919 if ((copy_object->shadow != object) ||
1920 (copy_object->ref_count == 1)) {
1921 vm_object_unlock(copy_object);
1922 vm_object_deallocate(copy_object);
1923 vm_object_lock(object);
1924 continue;
1925 }
1926
1927 /*
1928 * Pick back up the old object's
1929 * lock. [It is safe to do so,
1930 * since it must be deeper in the
1931 * object tree.]
1932 */
1933
1934 vm_object_lock(object);
1935 }
1936
1937 /*
1938 * Because we're pushing a page upward
1939 * in the object tree, we must restart
1940 * any faults that are waiting here.
1941 * [Note that this is an expansion of
1942 * PAGE_WAKEUP that uses the THREAD_RESTART
1943 * wait result]. Can't turn off the page's
1944 * busy bit because we're not done with it.
1945 */
1946
1947 if (m->wanted) {
1948 m->wanted = FALSE;
1949 thread_wakeup_with_result((event_t) m,
1950 THREAD_RESTART);
1951 }
1952 }
1953
1954 /*
1955 * The reference count on copy_object must be
1956 * at least 2: one for our extra reference,
1957 * and at least one from the outside world
1958 * (we checked that when we last locked
1959 * copy_object).
1960 */
1961 copy_object->ref_count--;
1962 assert(copy_object->ref_count > 0);
1963 VM_OBJ_RES_DECR(copy_object);
1964 vm_object_unlock(copy_object);
1965
1966 break;
1967 }
1968
1969 *result_page = m;
1970 *top_page = first_m;
1971
1972 XPR(XPR_VM_FAULT,
1973 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1974 (integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
1975 /*
1976 * If the page can be written, assume that it will be.
1977 * [Earlier, we restrict the permission to allow write
1978 * access only if the fault so required, so we don't
1979 * mark read-only data as dirty.]
1980 */
1981
55e303ae
A
1982
1983 if(m != VM_PAGE_NULL) {
1c79356b 1984#if !VM_FAULT_STATIC_CONFIG
55e303ae
A
1985 if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE))
1986 m->dirty = TRUE;
1c79356b 1987#endif
55e303ae
A
1988 if (vm_page_deactivate_behind)
1989 vm_fault_deactivate_behind(object, offset, behavior);
1990 } else {
1991 vm_object_unlock(object);
1c79356b 1992 }
55e303ae
A
1993 thread_interrupt_level(interruptible_state);
1994
1c79356b
A
1995#if TRACEFAULTPAGE
1996 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0); /* (TEST/DEBUG) */
1997#endif
1c79356b
A
1998 return(VM_FAULT_SUCCESS);
1999
2000#if 0
2001 block_and_backoff:
2002 vm_fault_cleanup(object, first_m);
2003
2004 counter(c_vm_fault_page_block_backoff_kernel++);
9bccf70c 2005 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
2006#endif
2007
2008 backoff:
9bccf70c 2009 thread_interrupt_level(interruptible_state);
1c79356b
A
2010 if (wait_result == THREAD_INTERRUPTED)
2011 return VM_FAULT_INTERRUPTED;
2012 return VM_FAULT_RETRY;
2013
2014#undef RELEASE_PAGE
2015}
2016
55e303ae
A
2017/*
2018 * Routine: vm_fault_tws_insert
2019 * Purpose:
2020 * Add fault information to the task working set.
2021 * Implementation:
2022 * We always insert the base object/offset pair
2023 * rather the actual object/offset.
2024 * Assumptions:
91447636 2025 * Map and real_map locked.
55e303ae
A
2026 * Object locked and referenced.
2027 * Returns:
2028 * TRUE if startup file should be written.
2029 * With object locked and still referenced.
2030 * But we may drop the object lock temporarily.
2031 */
2032static boolean_t
2033vm_fault_tws_insert(
2034 vm_map_t map,
91447636
A
2035 vm_map_t real_map,
2036 vm_map_offset_t vaddr,
55e303ae
A
2037 vm_object_t object,
2038 vm_object_offset_t offset)
2039{
2040 tws_hash_line_t line;
2041 task_t task;
2042 kern_return_t kr;
2043 boolean_t result = FALSE;
55e303ae
A
2044
2045 /* Avoid possible map lock deadlock issues */
2046 if (map == kernel_map || map == kalloc_map ||
91447636 2047 real_map == kernel_map || real_map == kalloc_map)
55e303ae
A
2048 return result;
2049
2050 task = current_task();
2051 if (task->dynamic_working_set != 0) {
2052 vm_object_t base_object;
2053 vm_object_t base_shadow;
2054 vm_object_offset_t base_offset;
2055 base_object = object;
2056 base_offset = offset;
91447636 2057 while ((base_shadow = base_object->shadow)) {
55e303ae
A
2058 vm_object_lock(base_shadow);
2059 vm_object_unlock(base_object);
2060 base_offset +=
91447636 2061 base_object->shadow_offset;
55e303ae
A
2062 base_object = base_shadow;
2063 }
91447636 2064 kr = tws_lookup(
55e303ae
A
2065 task->dynamic_working_set,
2066 base_offset, base_object,
2067 &line);
2068 if (kr == KERN_OPERATION_TIMED_OUT){
2069 result = TRUE;
2070 if (base_object != object) {
2071 vm_object_unlock(base_object);
2072 vm_object_lock(object);
2073 }
2074 } else if (kr != KERN_SUCCESS) {
2075 if(base_object != object)
2076 vm_object_reference_locked(base_object);
91447636 2077 kr = tws_insert(
55e303ae
A
2078 task->dynamic_working_set,
2079 base_offset, base_object,
91447636 2080 vaddr, real_map);
55e303ae
A
2081 if(base_object != object) {
2082 vm_object_unlock(base_object);
2083 vm_object_deallocate(base_object);
2084 }
2085 if(kr == KERN_NO_SPACE) {
2086 if (base_object == object)
2087 vm_object_unlock(object);
2088 tws_expand_working_set(
2089 task->dynamic_working_set,
2090 TWS_HASH_LINE_COUNT,
2091 FALSE);
2092 if (base_object == object)
2093 vm_object_lock(object);
2094 } else if(kr == KERN_OPERATION_TIMED_OUT) {
2095 result = TRUE;
2096 }
2097 if(base_object != object)
2098 vm_object_lock(object);
2099 } else if (base_object != object) {
2100 vm_object_unlock(base_object);
2101 vm_object_lock(object);
2102 }
2103 }
2104 return result;
2105}
2106
1c79356b
A
2107/*
2108 * Routine: vm_fault
2109 * Purpose:
2110 * Handle page faults, including pseudo-faults
2111 * used to change the wiring status of pages.
2112 * Returns:
2113 * Explicit continuations have been removed.
2114 * Implementation:
2115 * vm_fault and vm_fault_page save mucho state
2116 * in the moral equivalent of a closure. The state
2117 * structure is allocated when first entering vm_fault
2118 * and deallocated when leaving vm_fault.
2119 */
2120
91447636
A
2121extern int _map_enter_debug;
2122
1c79356b
A
2123kern_return_t
2124vm_fault(
2125 vm_map_t map,
91447636 2126 vm_map_offset_t vaddr,
1c79356b
A
2127 vm_prot_t fault_type,
2128 boolean_t change_wiring,
9bccf70c
A
2129 int interruptible,
2130 pmap_t caller_pmap,
91447636 2131 vm_map_offset_t caller_pmap_addr)
1c79356b
A
2132{
2133 vm_map_version_t version; /* Map version for verificiation */
2134 boolean_t wired; /* Should mapping be wired down? */
2135 vm_object_t object; /* Top-level object */
2136 vm_object_offset_t offset; /* Top-level offset */
2137 vm_prot_t prot; /* Protection for mapping */
2138 vm_behavior_t behavior; /* Expected paging behavior */
91447636 2139 vm_map_offset_t lo_offset, hi_offset;
1c79356b
A
2140 vm_object_t old_copy_object; /* Saved copy object */
2141 vm_page_t result_page; /* Result of vm_fault_page */
2142 vm_page_t top_page; /* Placeholder page */
2143 kern_return_t kr;
2144
2145 register
2146 vm_page_t m; /* Fast access to result_page */
91447636 2147 kern_return_t error_code = 0; /* page error reasons */
1c79356b
A
2148 register
2149 vm_object_t cur_object;
2150 register
2151 vm_object_offset_t cur_offset;
2152 vm_page_t cur_m;
2153 vm_object_t new_object;
2154 int type_of_fault;
91447636 2155 vm_map_t real_map = map;
1c79356b
A
2156 vm_map_t original_map = map;
2157 pmap_t pmap = NULL;
1c79356b 2158 boolean_t interruptible_state;
9bccf70c
A
2159 unsigned int cache_attr;
2160 int write_startup_file = 0;
91447636 2161 boolean_t need_activation;
5d5c5d0d 2162 vm_prot_t original_fault_type;
1c79356b 2163
de355530 2164
1c79356b
A
2165 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_START,
2166 vaddr,
2167 0,
2168 0,
2169 0,
2170 0);
2171
5d5c5d0d
A
2172 if (get_preemption_level() != 0) {
2173 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2174 vaddr,
2175 0,
2176 KERN_FAILURE,
2177 0,
2178 0);
2179
2180 return (KERN_FAILURE);
9bccf70c 2181 }
1c79356b 2182
9bccf70c 2183 interruptible_state = thread_interrupt_level(interruptible);
1c79356b
A
2184
2185 /*
2186 * assume we will hit a page in the cache
2187 * otherwise, explicitly override with
2188 * the real fault type once we determine it
2189 */
2190 type_of_fault = DBG_CACHE_HIT_FAULT;
2191
2192 VM_STAT(faults++);
2193 current_task()->faults++;
2194
5d5c5d0d
A
2195 original_fault_type = fault_type;
2196
1c79356b
A
2197 RetryFault: ;
2198
2199 /*
2200 * Find the backing store object and offset into
2201 * it to begin the search.
2202 */
5d5c5d0d 2203 fault_type = original_fault_type;
1c79356b
A
2204 map = original_map;
2205 vm_map_lock_read(map);
2206 kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version,
2207 &object, &offset,
2208 &prot, &wired,
91447636
A
2209 &behavior, &lo_offset, &hi_offset, &real_map);
2210
2211//if (_map_enter_debug)printf("vm_map_lookup_locked(map=0x%x, addr=0x%llx, prot=%d wired=%d) = %d\n", map, vaddr, prot, wired, kr);
1c79356b 2212
91447636 2213 pmap = real_map->pmap;
1c79356b
A
2214
2215 if (kr != KERN_SUCCESS) {
2216 vm_map_unlock_read(map);
2217 goto done;
2218 }
2219
2220 /*
2221 * If the page is wired, we must fault for the current protection
2222 * value, to avoid further faults.
2223 */
2224
2225 if (wired)
2226 fault_type = prot | VM_PROT_WRITE;
2227
2228#if VM_FAULT_CLASSIFY
2229 /*
2230 * Temporary data gathering code
2231 */
2232 vm_fault_classify(object, offset, fault_type);
2233#endif
2234 /*
2235 * Fast fault code. The basic idea is to do as much as
2236 * possible while holding the map lock and object locks.
2237 * Busy pages are not used until the object lock has to
2238 * be dropped to do something (copy, zero fill, pmap enter).
2239 * Similarly, paging references aren't acquired until that
2240 * point, and object references aren't used.
2241 *
2242 * If we can figure out what to do
2243 * (zero fill, copy on write, pmap enter) while holding
2244 * the locks, then it gets done. Otherwise, we give up,
2245 * and use the original fault path (which doesn't hold
2246 * the map lock, and relies on busy pages).
2247 * The give up cases include:
2248 * - Have to talk to pager.
2249 * - Page is busy, absent or in error.
2250 * - Pager has locked out desired access.
2251 * - Fault needs to be restarted.
2252 * - Have to push page into copy object.
2253 *
2254 * The code is an infinite loop that moves one level down
2255 * the shadow chain each time. cur_object and cur_offset
2256 * refer to the current object being examined. object and offset
2257 * are the original object from the map. The loop is at the
2258 * top level if and only if object and cur_object are the same.
2259 *
2260 * Invariants: Map lock is held throughout. Lock is held on
2261 * original object and cur_object (if different) when
2262 * continuing or exiting loop.
2263 *
2264 */
2265
2266
2267 /*
2268 * If this page is to be inserted in a copy delay object
2269 * for writing, and if the object has a copy, then the
2270 * copy delay strategy is implemented in the slow fault page.
2271 */
2272 if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY ||
2273 object->copy == VM_OBJECT_NULL ||
2274 (fault_type & VM_PROT_WRITE) == 0) {
2275 cur_object = object;
2276 cur_offset = offset;
2277
2278 while (TRUE) {
2279 m = vm_page_lookup(cur_object, cur_offset);
2280 if (m != VM_PAGE_NULL) {
55e303ae 2281 if (m->busy) {
143cc14e
A
2282 wait_result_t result;
2283
2284 if (object != cur_object)
2285 vm_object_unlock(object);
2286
2287 vm_map_unlock_read(map);
91447636
A
2288 if (real_map != map)
2289 vm_map_unlock(real_map);
143cc14e
A
2290
2291#if !VM_FAULT_STATIC_CONFIG
2292 if (!vm_fault_interruptible)
2293 interruptible = THREAD_UNINT;
2294#endif
2295 result = PAGE_ASSERT_WAIT(m, interruptible);
1c79356b 2296
143cc14e
A
2297 vm_object_unlock(cur_object);
2298
2299 if (result == THREAD_WAITING) {
2300 result = thread_block(THREAD_CONTINUE_NULL);
2301
2302 counter(c_vm_fault_page_block_busy_kernel++);
2303 }
2304 if (result == THREAD_AWAKENED || result == THREAD_RESTART)
2305 goto RetryFault;
2306
2307 kr = KERN_ABORTED;
2308 goto done;
2309 }
0b4e3aa0
A
2310 if (m->unusual && (m->error || m->restart || m->private
2311 || m->absent || (fault_type & m->page_lock))) {
1c79356b 2312
143cc14e 2313 /*
1c79356b
A
2314 * Unusual case. Give up.
2315 */
2316 break;
2317 }
2318
91447636
A
2319 if (m->encrypted) {
2320 /*
2321 * ENCRYPTED SWAP:
2322 * We've soft-faulted (because it's not in the page
2323 * table) on an encrypted page.
2324 * Keep the page "busy" so that noone messes with
2325 * it during the decryption.
2326 * Release the extra locks we're holding, keep only
2327 * the page's VM object lock.
2328 */
2329 m->busy = TRUE;
2330 if (object != cur_object) {
2331 vm_object_unlock(object);
2332 }
2333 vm_map_unlock_read(map);
2334 if (real_map != map)
2335 vm_map_unlock(real_map);
2336
2337 vm_page_decrypt(m, 0);
2338
2339 assert(m->busy);
2340 PAGE_WAKEUP_DONE(m);
2341 vm_object_unlock(m->object);
2342
2343 /*
2344 * Retry from the top, in case anything
2345 * changed while we were decrypting...
2346 */
2347 goto RetryFault;
2348 }
2349 ASSERT_PAGE_DECRYPTED(m);
2350
1c79356b
A
2351 /*
2352 * Two cases of map in faults:
2353 * - At top level w/o copy object.
2354 * - Read fault anywhere.
2355 * --> must disallow write.
2356 */
2357
2358 if (object == cur_object &&
2359 object->copy == VM_OBJECT_NULL)
2360 goto FastMapInFault;
2361
2362 if ((fault_type & VM_PROT_WRITE) == 0) {
55e303ae 2363 boolean_t sequential;
1c79356b
A
2364
2365 prot &= ~VM_PROT_WRITE;
2366
2367 /*
2368 * Set up to map the page ...
2369 * mark the page busy, drop
2370 * locks and take a paging reference
2371 * on the object with the page.
2372 */
2373
2374 if (object != cur_object) {
2375 vm_object_unlock(object);
2376 object = cur_object;
2377 }
2378FastMapInFault:
2379 m->busy = TRUE;
2380
1c79356b
A
2381FastPmapEnter:
2382 /*
2383 * Check a couple of global reasons to
2384 * be conservative about write access.
2385 * Then do the pmap_enter.
2386 */
2387#if !VM_FAULT_STATIC_CONFIG
2388 if (vm_fault_dirty_handling
2389#if MACH_KDB
2390 || db_watchpoint_list
2391#endif
2392 && (fault_type & VM_PROT_WRITE) == 0)
2393 prot &= ~VM_PROT_WRITE;
2394#else /* STATIC_CONFIG */
2395#if MACH_KDB
2396 if (db_watchpoint_list
2397 && (fault_type & VM_PROT_WRITE) == 0)
2398 prot &= ~VM_PROT_WRITE;
2399#endif /* MACH_KDB */
2400#endif /* STATIC_CONFIG */
55e303ae
A
2401 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2402
2403 sequential = FALSE;
91447636
A
2404 need_activation = FALSE;
2405
de355530 2406 if (m->no_isync == TRUE) {
143cc14e 2407 m->no_isync = FALSE;
91447636
A
2408 pmap_sync_page_data_phys(m->phys_page);
2409
2410 if ((type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) {
55e303ae
A
2411 /*
2412 * found it in the cache, but this
2413 * is the first fault-in of the page (no_isync == TRUE)
2414 * so it must have come in as part of
2415 * a cluster... account 1 pagein against it
2416 */
2417 VM_STAT(pageins++);
2418 current_task()->pageins++;
2419 type_of_fault = DBG_PAGEIN_FAULT;
2420 sequential = TRUE;
2421 }
91447636
A
2422 if (m->clustered)
2423 need_activation = TRUE;
2424
55e303ae 2425 } else if (cache_attr != VM_WIMG_DEFAULT) {
91447636 2426 pmap_sync_page_attributes_phys(m->phys_page);
143cc14e 2427 }
0b4e3aa0 2428
9bccf70c
A
2429 if(caller_pmap) {
2430 PMAP_ENTER(caller_pmap,
2431 caller_pmap_addr, m,
2432 prot, cache_attr, wired);
2433 } else {
2434 PMAP_ENTER(pmap, vaddr, m,
2435 prot, cache_attr, wired);
2436 }
0b4e3aa0 2437
1c79356b 2438 /*
55e303ae 2439 * Hold queues lock to manipulate
1c79356b
A
2440 * the page queues. Change wiring
2441 * case is obvious. In soft ref bits
2442 * case activate page only if it fell
2443 * off paging queues, otherwise just
2444 * activate it if it's inactive.
2445 *
2446 * NOTE: original vm_fault code will
2447 * move active page to back of active
2448 * queue. This code doesn't.
2449 */
765c9de3
A
2450 if (m->clustered) {
2451 vm_pagein_cluster_used++;
2452 m->clustered = FALSE;
2453 }
8ad349bb 2454 if (change_wiring) {
5d5c5d0d
A
2455 vm_page_lock_queues();
2456
1c79356b
A
2457 if (wired)
2458 vm_page_wire(m);
2459 else
2460 vm_page_unwire(m);
5d5c5d0d
A
2461
2462 vm_page_unlock_queues();
1c79356b 2463 }
1c79356b 2464 else {
5d5c5d0d
A
2465 if ((!m->active && !m->inactive) || ((need_activation == TRUE) && !m->active)) {
2466 vm_page_lock_queues();
1c79356b 2467 vm_page_activate(m);
5d5c5d0d
A
2468 vm_page_unlock_queues();
2469 }
1c79356b 2470 }
1c79356b
A
2471
2472 /*
2473 * That's it, clean up and return.
2474 */
2475 PAGE_WAKEUP_DONE(m);
143cc14e 2476
55e303ae
A
2477 sequential = (sequential && vm_page_deactivate_behind) ?
2478 vm_fault_deactivate_behind(object, cur_offset, behavior) :
2479 FALSE;
2480
2481 /*
2482 * Add non-sequential pages to the working set.
2483 * The sequential pages will be brought in through
2484 * normal clustering behavior.
2485 */
2486 if (!sequential && !object->private) {
5d5c5d0d
A
2487 vm_object_paging_begin(object);
2488
55e303ae 2489 write_startup_file =
91447636 2490 vm_fault_tws_insert(map, real_map, vaddr,
55e303ae 2491 object, cur_offset);
8ad349bb 2492
5d5c5d0d
A
2493 vm_object_paging_end(object);
2494 }
1c79356b 2495 vm_object_unlock(object);
143cc14e 2496
1c79356b 2497 vm_map_unlock_read(map);
91447636
A
2498 if(real_map != map)
2499 vm_map_unlock(real_map);
1c79356b 2500
9bccf70c
A
2501 if(write_startup_file)
2502 tws_send_startup_info(current_task());
2503
9bccf70c 2504 thread_interrupt_level(interruptible_state);
1c79356b 2505
143cc14e 2506
1c79356b
A
2507 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2508 vaddr,
9bccf70c 2509 type_of_fault & 0xff,
1c79356b 2510 KERN_SUCCESS,
9bccf70c 2511 type_of_fault >> 8,
1c79356b 2512 0);
143cc14e 2513
1c79356b
A
2514 return KERN_SUCCESS;
2515 }
2516
2517 /*
2518 * Copy on write fault. If objects match, then
2519 * object->copy must not be NULL (else control
2520 * would be in previous code block), and we
2521 * have a potential push into the copy object
2522 * with which we won't cope here.
2523 */
2524
2525 if (cur_object == object)
2526 break;
1c79356b
A
2527 /*
2528 * This is now a shadow based copy on write
2529 * fault -- it requires a copy up the shadow
2530 * chain.
2531 *
2532 * Allocate a page in the original top level
2533 * object. Give up if allocate fails. Also
2534 * need to remember current page, as it's the
2535 * source of the copy.
2536 */
2537 cur_m = m;
2538 m = vm_page_grab();
2539 if (m == VM_PAGE_NULL) {
2540 break;
2541 }
1c79356b
A
2542 /*
2543 * Now do the copy. Mark the source busy
2544 * and take out paging references on both
2545 * objects.
2546 *
2547 * NOTE: This code holds the map lock across
2548 * the page copy.
2549 */
2550
2551 cur_m->busy = TRUE;
2552 vm_page_copy(cur_m, m);
2553 vm_page_insert(m, object, offset);
2554
2555 vm_object_paging_begin(cur_object);
2556 vm_object_paging_begin(object);
2557
2558 type_of_fault = DBG_COW_FAULT;
2559 VM_STAT(cow_faults++);
2560 current_task()->cow_faults++;
2561
2562 /*
2563 * Now cope with the source page and object
2564 * If the top object has a ref count of 1
2565 * then no other map can access it, and hence
91447636 2566 * it's not necessary to do the pmap_disconnect.
1c79356b
A
2567 */
2568
1c79356b
A
2569 vm_page_lock_queues();
2570 vm_page_deactivate(cur_m);
2571 m->dirty = TRUE;
91447636 2572 pmap_disconnect(cur_m->phys_page);
1c79356b
A
2573 vm_page_unlock_queues();
2574
2575 PAGE_WAKEUP_DONE(cur_m);
2576 vm_object_paging_end(cur_object);
2577 vm_object_unlock(cur_object);
2578
2579 /*
2580 * Slight hack to call vm_object collapse
2581 * and then reuse common map in code.
2582 * note that the object lock was taken above.
2583 */
2584
2585 vm_object_paging_end(object);
5d5c5d0d 2586 vm_object_collapse(object, offset, TRUE);
1c79356b
A
2587
2588 goto FastPmapEnter;
2589 }
2590 else {
2591
2592 /*
2593 * No page at cur_object, cur_offset
2594 */
2595
2596 if (cur_object->pager_created) {
2597
2598 /*
2599 * Have to talk to the pager. Give up.
2600 */
1c79356b
A
2601 break;
2602 }
2603
2604
2605 if (cur_object->shadow == VM_OBJECT_NULL) {
2606
2607 if (cur_object->shadow_severed) {
2608 vm_object_paging_end(object);
2609 vm_object_unlock(object);
2610 vm_map_unlock_read(map);
91447636
A
2611 if(real_map != map)
2612 vm_map_unlock(real_map);
1c79356b 2613
9bccf70c
A
2614 if(write_startup_file)
2615 tws_send_startup_info(
2616 current_task());
2617
9bccf70c 2618 thread_interrupt_level(interruptible_state);
1c79356b 2619
91447636 2620 return KERN_MEMORY_ERROR;
1c79356b
A
2621 }
2622
2623 /*
2624 * Zero fill fault. Page gets
2625 * filled in top object. Insert
2626 * page, then drop any lower lock.
2627 * Give up if no page.
2628 */
55e303ae
A
2629 if (VM_PAGE_THROTTLED()) {
2630 break;
2631 }
2632
2633 /*
2634 * are we protecting the system from
2635 * backing store exhaustion. If so
2636 * sleep unless we are privileged.
2637 */
2638 if(vm_backing_store_low) {
2639 if(!(current_task()->priv_flags
2640 & VM_BACKING_STORE_PRIV))
1c79356b
A
2641 break;
2642 }
2643 m = vm_page_alloc(object, offset);
2644 if (m == VM_PAGE_NULL) {
2645 break;
2646 }
0b4e3aa0
A
2647 /*
2648 * This is a zero-fill or initial fill
2649 * page fault. As such, we consider it
2650 * undefined with respect to instruction
2651 * execution. i.e. it is the responsibility
2652 * of higher layers to call for an instruction
2653 * sync after changing the contents and before
2654 * sending a program into this area. We
2655 * choose this approach for performance
2656 */
2657
2658 m->no_isync = FALSE;
1c79356b
A
2659
2660 if (cur_object != object)
2661 vm_object_unlock(cur_object);
2662
1c79356b
A
2663 /*
2664 * Now zero fill page and map it.
2665 * the page is probably going to
2666 * be written soon, so don't bother
2667 * to clear the modified bit
2668 *
2669 * NOTE: This code holds the map
2670 * lock across the zero fill.
2671 */
2672
2673 if (!map->no_zero_fill) {
2674 vm_page_zero_fill(m);
2675 type_of_fault = DBG_ZERO_FILL_FAULT;
2676 VM_STAT(zero_fill_count++);
2677 }
2678 vm_page_lock_queues();
2679 VM_PAGE_QUEUES_REMOVE(m);
0b4e3aa0
A
2680
2681 m->page_ticket = vm_page_ticket;
91447636
A
2682 assert(!m->laundry);
2683 assert(m->object != kernel_object);
2684 assert(m->pageq.next == NULL &&
2685 m->pageq.prev == NULL);
2686 if(m->object->size > 0x200000) {
9bccf70c
A
2687 m->zero_fill = TRUE;
2688 /* depends on the queues lock */
2689 vm_zf_count += 1;
2690 queue_enter(&vm_page_queue_zf,
2691 m, vm_page_t, pageq);
2692 } else {
2693 queue_enter(
2694 &vm_page_queue_inactive,
2695 m, vm_page_t, pageq);
2696 }
0b4e3aa0
A
2697 vm_page_ticket_roll++;
2698 if(vm_page_ticket_roll ==
2699 VM_PAGE_TICKETS_IN_ROLL) {
2700 vm_page_ticket_roll = 0;
2701 if(vm_page_ticket ==
2702 VM_PAGE_TICKET_ROLL_IDS)
2703 vm_page_ticket= 0;
2704 else
2705 vm_page_ticket++;
2706 }
2707
1c79356b
A
2708 m->inactive = TRUE;
2709 vm_page_inactive_count++;
2710 vm_page_unlock_queues();
143cc14e 2711
1c79356b
A
2712 goto FastPmapEnter;
2713 }
2714
2715 /*
2716 * On to the next level
2717 */
2718
2719 cur_offset += cur_object->shadow_offset;
2720 new_object = cur_object->shadow;
2721 vm_object_lock(new_object);
2722 if (cur_object != object)
2723 vm_object_unlock(cur_object);
2724 cur_object = new_object;
2725
2726 continue;
2727 }
2728 }
2729
2730 /*
2731 * Cleanup from fast fault failure. Drop any object
2732 * lock other than original and drop map lock.
2733 */
2734
2735 if (object != cur_object)
2736 vm_object_unlock(cur_object);
2737 }
2738 vm_map_unlock_read(map);
143cc14e 2739
91447636
A
2740 if(real_map != map)
2741 vm_map_unlock(real_map);
1c79356b
A
2742
2743 /*
2744 * Make a reference to this object to
2745 * prevent its disposal while we are messing with
2746 * it. Once we have the reference, the map is free
2747 * to be diddled. Since objects reference their
2748 * shadows (and copies), they will stay around as well.
2749 */
2750
2751 assert(object->ref_count > 0);
2752 object->ref_count++;
2753 vm_object_res_reference(object);
2754 vm_object_paging_begin(object);
2755
2756 XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
55e303ae
A
2757
2758 if (!object->private) {
2759 write_startup_file =
91447636 2760 vm_fault_tws_insert(map, real_map, vaddr, object, offset);
9bccf70c 2761 }
55e303ae 2762
1c79356b
A
2763 kr = vm_fault_page(object, offset, fault_type,
2764 (change_wiring && !wired),
2765 interruptible,
2766 lo_offset, hi_offset, behavior,
2767 &prot, &result_page, &top_page,
2768 &type_of_fault,
0b4e3aa0 2769 &error_code, map->no_zero_fill, FALSE, map, vaddr);
1c79356b
A
2770
2771 /*
2772 * If we didn't succeed, lose the object reference immediately.
2773 */
2774
2775 if (kr != VM_FAULT_SUCCESS)
2776 vm_object_deallocate(object);
2777
2778 /*
2779 * See why we failed, and take corrective action.
2780 */
2781
2782 switch (kr) {
2783 case VM_FAULT_SUCCESS:
2784 break;
2785 case VM_FAULT_MEMORY_SHORTAGE:
2786 if (vm_page_wait((change_wiring) ?
2787 THREAD_UNINT :
2788 THREAD_ABORTSAFE))
2789 goto RetryFault;
2790 /* fall thru */
2791 case VM_FAULT_INTERRUPTED:
2792 kr = KERN_ABORTED;
2793 goto done;
2794 case VM_FAULT_RETRY:
2795 goto RetryFault;
2796 case VM_FAULT_FICTITIOUS_SHORTAGE:
2797 vm_page_more_fictitious();
2798 goto RetryFault;
2799 case VM_FAULT_MEMORY_ERROR:
2800 if (error_code)
2801 kr = error_code;
2802 else
2803 kr = KERN_MEMORY_ERROR;
2804 goto done;
2805 }
2806
2807 m = result_page;
2808
0b4e3aa0
A
2809 if(m != VM_PAGE_NULL) {
2810 assert((change_wiring && !wired) ?
2811 (top_page == VM_PAGE_NULL) :
2812 ((top_page == VM_PAGE_NULL) == (m->object == object)));
2813 }
1c79356b
A
2814
2815 /*
2816 * How to clean up the result of vm_fault_page. This
2817 * happens whether the mapping is entered or not.
2818 */
2819
2820#define UNLOCK_AND_DEALLOCATE \
2821 MACRO_BEGIN \
2822 vm_fault_cleanup(m->object, top_page); \
2823 vm_object_deallocate(object); \
2824 MACRO_END
2825
2826 /*
2827 * What to do with the resulting page from vm_fault_page
2828 * if it doesn't get entered into the physical map:
2829 */
2830
2831#define RELEASE_PAGE(m) \
2832 MACRO_BEGIN \
2833 PAGE_WAKEUP_DONE(m); \
2834 vm_page_lock_queues(); \
2835 if (!m->active && !m->inactive) \
2836 vm_page_activate(m); \
2837 vm_page_unlock_queues(); \
2838 MACRO_END
2839
2840 /*
2841 * We must verify that the maps have not changed
2842 * since our last lookup.
2843 */
2844
0b4e3aa0
A
2845 if(m != VM_PAGE_NULL) {
2846 old_copy_object = m->object->copy;
0b4e3aa0
A
2847 vm_object_unlock(m->object);
2848 } else {
2849 old_copy_object = VM_OBJECT_NULL;
2850 }
1c79356b
A
2851 if ((map != original_map) || !vm_map_verify(map, &version)) {
2852 vm_object_t retry_object;
2853 vm_object_offset_t retry_offset;
2854 vm_prot_t retry_prot;
2855
2856 /*
2857 * To avoid trying to write_lock the map while another
2858 * thread has it read_locked (in vm_map_pageable), we
2859 * do not try for write permission. If the page is
2860 * still writable, we will get write permission. If it
2861 * is not, or has been marked needs_copy, we enter the
2862 * mapping without write permission, and will merely
2863 * take another fault.
2864 */
2865 map = original_map;
2866 vm_map_lock_read(map);
2867 kr = vm_map_lookup_locked(&map, vaddr,
2868 fault_type & ~VM_PROT_WRITE, &version,
2869 &retry_object, &retry_offset, &retry_prot,
2870 &wired, &behavior, &lo_offset, &hi_offset,
91447636
A
2871 &real_map);
2872 pmap = real_map->pmap;
1c79356b
A
2873
2874 if (kr != KERN_SUCCESS) {
2875 vm_map_unlock_read(map);
0b4e3aa0
A
2876 if(m != VM_PAGE_NULL) {
2877 vm_object_lock(m->object);
2878 RELEASE_PAGE(m);
2879 UNLOCK_AND_DEALLOCATE;
2880 } else {
2881 vm_object_deallocate(object);
2882 }
1c79356b
A
2883 goto done;
2884 }
2885
2886 vm_object_unlock(retry_object);
0b4e3aa0
A
2887 if(m != VM_PAGE_NULL) {
2888 vm_object_lock(m->object);
2889 } else {
2890 vm_object_lock(object);
2891 }
1c79356b
A
2892
2893 if ((retry_object != object) ||
2894 (retry_offset != offset)) {
2895 vm_map_unlock_read(map);
91447636
A
2896 if(real_map != map)
2897 vm_map_unlock(real_map);
0b4e3aa0
A
2898 if(m != VM_PAGE_NULL) {
2899 RELEASE_PAGE(m);
2900 UNLOCK_AND_DEALLOCATE;
2901 } else {
2902 vm_object_deallocate(object);
2903 }
1c79356b
A
2904 goto RetryFault;
2905 }
2906
2907 /*
2908 * Check whether the protection has changed or the object
2909 * has been copied while we left the map unlocked.
2910 */
2911 prot &= retry_prot;
0b4e3aa0
A
2912 if(m != VM_PAGE_NULL) {
2913 vm_object_unlock(m->object);
2914 } else {
2915 vm_object_unlock(object);
2916 }
2917 }
2918 if(m != VM_PAGE_NULL) {
2919 vm_object_lock(m->object);
2920 } else {
2921 vm_object_lock(object);
1c79356b 2922 }
1c79356b
A
2923
2924 /*
2925 * If the copy object changed while the top-level object
2926 * was unlocked, then we must take away write permission.
2927 */
2928
0b4e3aa0
A
2929 if(m != VM_PAGE_NULL) {
2930 if (m->object->copy != old_copy_object)
2931 prot &= ~VM_PROT_WRITE;
2932 }
1c79356b
A
2933
2934 /*
2935 * If we want to wire down this page, but no longer have
2936 * adequate permissions, we must start all over.
2937 */
2938
2939 if (wired && (fault_type != (prot|VM_PROT_WRITE))) {
2940 vm_map_verify_done(map, &version);
91447636
A
2941 if(real_map != map)
2942 vm_map_unlock(real_map);
0b4e3aa0
A
2943 if(m != VM_PAGE_NULL) {
2944 RELEASE_PAGE(m);
2945 UNLOCK_AND_DEALLOCATE;
2946 } else {
2947 vm_object_deallocate(object);
2948 }
1c79356b
A
2949 goto RetryFault;
2950 }
2951
1c79356b
A
2952 /*
2953 * Put this page into the physical map.
2954 * We had to do the unlock above because pmap_enter
2955 * may cause other faults. The page may be on
2956 * the pageout queues. If the pageout daemon comes
2957 * across the page, it will remove it from the queues.
2958 */
91447636
A
2959 need_activation = FALSE;
2960
765c9de3
A
2961 if (m != VM_PAGE_NULL) {
2962 if (m->no_isync == TRUE) {
91447636 2963 pmap_sync_page_data_phys(m->phys_page);
55e303ae 2964
91447636 2965 if ((type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) {
55e303ae
A
2966 /*
2967 * found it in the cache, but this
2968 * is the first fault-in of the page (no_isync == TRUE)
2969 * so it must have come in as part of
2970 * a cluster... account 1 pagein against it
2971 */
2972 VM_STAT(pageins++);
2973 current_task()->pageins++;
2974
2975 type_of_fault = DBG_PAGEIN_FAULT;
2976 }
91447636
A
2977 if (m->clustered) {
2978 need_activation = TRUE;
2979 }
765c9de3
A
2980 m->no_isync = FALSE;
2981 }
9bccf70c 2982 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
0b4e3aa0 2983
9bccf70c
A
2984 if(caller_pmap) {
2985 PMAP_ENTER(caller_pmap,
2986 caller_pmap_addr, m,
2987 prot, cache_attr, wired);
2988 } else {
2989 PMAP_ENTER(pmap, vaddr, m,
2990 prot, cache_attr, wired);
2991 }
55e303ae
A
2992
2993 /*
2994 * Add working set information for private objects here.
2995 */
2996 if (m->object->private) {
2997 write_startup_file =
91447636 2998 vm_fault_tws_insert(map, real_map, vaddr,
55e303ae 2999 m->object, m->offset);
0b4e3aa0
A
3000 }
3001 } else {
3002
9bccf70c 3003 vm_map_entry_t entry;
91447636
A
3004 vm_map_offset_t laddr;
3005 vm_map_offset_t ldelta, hdelta;
143cc14e 3006
0b4e3aa0
A
3007 /*
3008 * do a pmap block mapping from the physical address
3009 * in the object
3010 */
9bccf70c 3011
5d5c5d0d 3012#ifndef i386
55e303ae
A
3013 /* While we do not worry about execution protection in */
3014 /* general, certian pages may have instruction execution */
3015 /* disallowed. We will check here, and if not allowed */
3016 /* to execute, we return with a protection failure. */
9bccf70c 3017
5d5c5d0d 3018 if((fault_type & VM_PROT_EXECUTE) &&
91447636
A
3019 (!pmap_eligible_for_execute((ppnum_t)
3020 (object->shadow_offset >> 12)))) {
9bccf70c 3021
9bccf70c 3022 vm_map_verify_done(map, &version);
91447636
A
3023 if(real_map != map)
3024 vm_map_unlock(real_map);
9bccf70c
A
3025 vm_fault_cleanup(object, top_page);
3026 vm_object_deallocate(object);
3027 kr = KERN_PROTECTION_FAILURE;
3028 goto done;
0b4e3aa0 3029 }
5d5c5d0d 3030#endif /* !i386 */
1c79356b 3031
91447636
A
3032 if(real_map != map) {
3033 vm_map_unlock(real_map);
9bccf70c
A
3034 }
3035 if (original_map != map) {
3036 vm_map_unlock_read(map);
3037 vm_map_lock_read(original_map);
3038 map = original_map;
3039 }
91447636 3040 real_map = map;
9bccf70c
A
3041
3042 laddr = vaddr;
3043 hdelta = 0xFFFFF000;
3044 ldelta = 0xFFFFF000;
3045
3046
3047 while(vm_map_lookup_entry(map, laddr, &entry)) {
3048 if(ldelta > (laddr - entry->vme_start))
3049 ldelta = laddr - entry->vme_start;
3050 if(hdelta > (entry->vme_end - laddr))
3051 hdelta = entry->vme_end - laddr;
3052 if(entry->is_sub_map) {
3053
3054 laddr = (laddr - entry->vme_start)
3055 + entry->offset;
3056 vm_map_lock_read(entry->object.sub_map);
91447636 3057 if(map != real_map)
9bccf70c
A
3058 vm_map_unlock_read(map);
3059 if(entry->use_pmap) {
91447636
A
3060 vm_map_unlock_read(real_map);
3061 real_map = entry->object.sub_map;
9bccf70c
A
3062 }
3063 map = entry->object.sub_map;
3064
3065 } else {
3066 break;
3067 }
3068 }
3069
3070 if(vm_map_lookup_entry(map, laddr, &entry) &&
5d5c5d0d
A
3071 (entry->object.vm_object != NULL) &&
3072 (entry->object.vm_object == object)) {
9bccf70c 3073
5d5c5d0d 3074 vm_map_offset_t phys_offset;
9bccf70c 3075
5d5c5d0d
A
3076 phys_offset = (entry->object.vm_object->shadow_offset
3077 + entry->offset
3078 + laddr
3079 - entry->vme_start);
3080 phys_offset -= ldelta;
9bccf70c 3081 if(caller_pmap) {
55e303ae 3082 /* Set up a block mapped area */
5d5c5d0d
A
3083 pmap_map_block(
3084 caller_pmap,
55e303ae 3085 (addr64_t)(caller_pmap_addr - ldelta),
5d5c5d0d
A
3086 phys_offset >> 12,
3087 (ldelta + hdelta) >> 12,
3088 prot,
3089 (VM_WIMG_MASK & (int)object->wimg_bits),
3090 0);
55e303ae
A
3091 } else {
3092 /* Set up a block mapped area */
5d5c5d0d
A
3093 pmap_map_block(
3094 real_map->pmap,
3095 (addr64_t)(vaddr - ldelta),
3096 phys_offset >> 12,
3097 (ldelta + hdelta) >> 12,
3098 prot,
3099 (VM_WIMG_MASK & (int)object->wimg_bits),
3100 0);
9bccf70c
A
3101 }
3102 }
0b4e3aa0
A
3103
3104 }
1c79356b
A
3105
3106 /*
3107 * If the page is not wired down and isn't already
3108 * on a pageout queue, then put it where the
3109 * pageout daemon can find it.
3110 */
0b4e3aa0 3111 if(m != VM_PAGE_NULL) {
0b4e3aa0
A
3112 vm_page_lock_queues();
3113
91447636
A
3114 if (m->clustered) {
3115 vm_pagein_cluster_used++;
3116 m->clustered = FALSE;
3117 }
3118 m->reference = TRUE;
3119
0b4e3aa0
A
3120 if (change_wiring) {
3121 if (wired)
3122 vm_page_wire(m);
3123 else
3124 vm_page_unwire(m);
3125 }
1c79356b 3126#if VM_FAULT_STATIC_CONFIG
0b4e3aa0 3127 else {
91447636 3128 if ((!m->active && !m->inactive) || ((need_activation == TRUE) && !m->active))
0b4e3aa0 3129 vm_page_activate(m);
0b4e3aa0 3130 }
1c79356b 3131#else
0b4e3aa0
A
3132 else if (software_reference_bits) {
3133 if (!m->active && !m->inactive)
3134 vm_page_activate(m);
3135 m->reference = TRUE;
3136 } else {
1c79356b 3137 vm_page_activate(m);
0b4e3aa0 3138 }
1c79356b 3139#endif
0b4e3aa0
A
3140 vm_page_unlock_queues();
3141 }
1c79356b
A
3142
3143 /*
3144 * Unlock everything, and return
3145 */
3146
3147 vm_map_verify_done(map, &version);
91447636
A
3148 if(real_map != map)
3149 vm_map_unlock(real_map);
0b4e3aa0
A
3150 if(m != VM_PAGE_NULL) {
3151 PAGE_WAKEUP_DONE(m);
3152 UNLOCK_AND_DEALLOCATE;
3153 } else {
3154 vm_fault_cleanup(object, top_page);
3155 vm_object_deallocate(object);
3156 }
1c79356b 3157 kr = KERN_SUCCESS;
1c79356b
A
3158
3159#undef UNLOCK_AND_DEALLOCATE
3160#undef RELEASE_PAGE
3161
3162 done:
9bccf70c
A
3163 if(write_startup_file)
3164 tws_send_startup_info(current_task());
91447636 3165
9bccf70c 3166 thread_interrupt_level(interruptible_state);
1c79356b
A
3167
3168 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
3169 vaddr,
9bccf70c 3170 type_of_fault & 0xff,
1c79356b 3171 kr,
9bccf70c 3172 type_of_fault >> 8,
1c79356b 3173 0);
143cc14e 3174
1c79356b
A
3175 return(kr);
3176}
3177
3178/*
3179 * vm_fault_wire:
3180 *
3181 * Wire down a range of virtual addresses in a map.
3182 */
3183kern_return_t
3184vm_fault_wire(
3185 vm_map_t map,
3186 vm_map_entry_t entry,
9bccf70c 3187 pmap_t pmap,
91447636 3188 vm_map_offset_t pmap_addr)
1c79356b
A
3189{
3190
91447636
A
3191 register vm_map_offset_t va;
3192 register vm_map_offset_t end_addr = entry->vme_end;
1c79356b
A
3193 register kern_return_t rc;
3194
3195 assert(entry->in_transition);
3196
9bccf70c
A
3197 if ((entry->object.vm_object != NULL) &&
3198 !entry->is_sub_map &&
3199 entry->object.vm_object->phys_contiguous) {
3200 return KERN_SUCCESS;
3201 }
3202
1c79356b
A
3203 /*
3204 * Inform the physical mapping system that the
3205 * range of addresses may not fault, so that
3206 * page tables and such can be locked down as well.
3207 */
3208
9bccf70c
A
3209 pmap_pageable(pmap, pmap_addr,
3210 pmap_addr + (end_addr - entry->vme_start), FALSE);
1c79356b
A
3211
3212 /*
3213 * We simulate a fault to get the page and enter it
3214 * in the physical map.
3215 */
3216
3217 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3218 if ((rc = vm_fault_wire_fast(
9bccf70c
A
3219 map, va, entry, pmap,
3220 pmap_addr + (va - entry->vme_start)
3221 )) != KERN_SUCCESS) {
1c79356b 3222 rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
9bccf70c
A
3223 (pmap == kernel_pmap) ?
3224 THREAD_UNINT : THREAD_ABORTSAFE,
3225 pmap, pmap_addr + (va - entry->vme_start));
1c79356b
A
3226 }
3227
3228 if (rc != KERN_SUCCESS) {
3229 struct vm_map_entry tmp_entry = *entry;
3230
3231 /* unwire wired pages */
3232 tmp_entry.vme_end = va;
9bccf70c
A
3233 vm_fault_unwire(map,
3234 &tmp_entry, FALSE, pmap, pmap_addr);
1c79356b
A
3235
3236 return rc;
3237 }
3238 }
3239 return KERN_SUCCESS;
3240}
3241
3242/*
3243 * vm_fault_unwire:
3244 *
3245 * Unwire a range of virtual addresses in a map.
3246 */
3247void
3248vm_fault_unwire(
3249 vm_map_t map,
3250 vm_map_entry_t entry,
3251 boolean_t deallocate,
9bccf70c 3252 pmap_t pmap,
91447636 3253 vm_map_offset_t pmap_addr)
1c79356b 3254{
91447636
A
3255 register vm_map_offset_t va;
3256 register vm_map_offset_t end_addr = entry->vme_end;
1c79356b
A
3257 vm_object_t object;
3258
3259 object = (entry->is_sub_map)
3260 ? VM_OBJECT_NULL : entry->object.vm_object;
3261
3262 /*
3263 * Since the pages are wired down, we must be able to
3264 * get their mappings from the physical map system.
3265 */
3266
3267 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
9bccf70c
A
3268 pmap_change_wiring(pmap,
3269 pmap_addr + (va - entry->vme_start), FALSE);
1c79356b
A
3270
3271 if (object == VM_OBJECT_NULL) {
9bccf70c
A
3272 (void) vm_fault(map, va, VM_PROT_NONE,
3273 TRUE, THREAD_UNINT, pmap, pmap_addr);
3274 } else if (object->phys_contiguous) {
3275 continue;
1c79356b
A
3276 } else {
3277 vm_prot_t prot;
3278 vm_page_t result_page;
3279 vm_page_t top_page;
3280 vm_object_t result_object;
3281 vm_fault_return_t result;
3282
3283 do {
3284 prot = VM_PROT_NONE;
3285
3286 vm_object_lock(object);
3287 vm_object_paging_begin(object);
3288 XPR(XPR_VM_FAULT,
3289 "vm_fault_unwire -> vm_fault_page\n",
3290 0,0,0,0,0);
3291 result = vm_fault_page(object,
3292 entry->offset +
3293 (va - entry->vme_start),
3294 VM_PROT_NONE, TRUE,
3295 THREAD_UNINT,
3296 entry->offset,
3297 entry->offset +
3298 (entry->vme_end
3299 - entry->vme_start),
3300 entry->behavior,
3301 &prot,
3302 &result_page,
3303 &top_page,
3304 (int *)0,
3305 0, map->no_zero_fill,
0b4e3aa0 3306 FALSE, NULL, 0);
1c79356b
A
3307 } while (result == VM_FAULT_RETRY);
3308
3309 if (result != VM_FAULT_SUCCESS)
3310 panic("vm_fault_unwire: failure");
3311
3312 result_object = result_page->object;
3313 if (deallocate) {
3314 assert(!result_page->fictitious);
91447636 3315 pmap_disconnect(result_page->phys_page);
1c79356b
A
3316 VM_PAGE_FREE(result_page);
3317 } else {
3318 vm_page_lock_queues();
3319 vm_page_unwire(result_page);
3320 vm_page_unlock_queues();
3321 PAGE_WAKEUP_DONE(result_page);
3322 }
3323
3324 vm_fault_cleanup(result_object, top_page);
3325 }
3326 }
3327
3328 /*
3329 * Inform the physical mapping system that the range
3330 * of addresses may fault, so that page tables and
3331 * such may be unwired themselves.
3332 */
3333
9bccf70c
A
3334 pmap_pageable(pmap, pmap_addr,
3335 pmap_addr + (end_addr - entry->vme_start), TRUE);
1c79356b
A
3336
3337}
3338
3339/*
3340 * vm_fault_wire_fast:
3341 *
3342 * Handle common case of a wire down page fault at the given address.
3343 * If successful, the page is inserted into the associated physical map.
3344 * The map entry is passed in to avoid the overhead of a map lookup.
3345 *
3346 * NOTE: the given address should be truncated to the
3347 * proper page address.
3348 *
3349 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3350 * a standard error specifying why the fault is fatal is returned.
3351 *
3352 * The map in question must be referenced, and remains so.
3353 * Caller has a read lock on the map.
3354 *
3355 * This is a stripped version of vm_fault() for wiring pages. Anything
3356 * other than the common case will return KERN_FAILURE, and the caller
3357 * is expected to call vm_fault().
3358 */
3359kern_return_t
3360vm_fault_wire_fast(
91447636
A
3361 __unused vm_map_t map,
3362 vm_map_offset_t va,
1c79356b 3363 vm_map_entry_t entry,
91447636
A
3364 pmap_t pmap,
3365 vm_map_offset_t pmap_addr)
1c79356b
A
3366{
3367 vm_object_t object;
3368 vm_object_offset_t offset;
3369 register vm_page_t m;
3370 vm_prot_t prot;
91447636 3371 thread_t thread = current_thread();
9bccf70c 3372 unsigned int cache_attr;
1c79356b
A
3373
3374 VM_STAT(faults++);
3375
91447636
A
3376 if (thread != THREAD_NULL && thread->task != TASK_NULL)
3377 thread->task->faults++;
1c79356b
A
3378
3379/*
3380 * Recovery actions
3381 */
3382
3383#undef RELEASE_PAGE
3384#define RELEASE_PAGE(m) { \
3385 PAGE_WAKEUP_DONE(m); \
3386 vm_page_lock_queues(); \
3387 vm_page_unwire(m); \
3388 vm_page_unlock_queues(); \
3389}
3390
3391
3392#undef UNLOCK_THINGS
3393#define UNLOCK_THINGS { \
ff6e181a
A
3394 vm_object_paging_end(object); \
3395 vm_object_unlock(object); \
1c79356b
A
3396}
3397
3398#undef UNLOCK_AND_DEALLOCATE
3399#define UNLOCK_AND_DEALLOCATE { \
3400 UNLOCK_THINGS; \
3401 vm_object_deallocate(object); \
3402}
3403/*
3404 * Give up and have caller do things the hard way.
3405 */
3406
3407#define GIVE_UP { \
3408 UNLOCK_AND_DEALLOCATE; \
3409 return(KERN_FAILURE); \
3410}
3411
3412
3413 /*
3414 * If this entry is not directly to a vm_object, bail out.
3415 */
3416 if (entry->is_sub_map)
3417 return(KERN_FAILURE);
3418
3419 /*
3420 * Find the backing store object and offset into it.
3421 */
3422
3423 object = entry->object.vm_object;
3424 offset = (va - entry->vme_start) + entry->offset;
3425 prot = entry->protection;
3426
3427 /*
3428 * Make a reference to this object to prevent its
3429 * disposal while we are messing with it.
3430 */
3431
3432 vm_object_lock(object);
3433 assert(object->ref_count > 0);
3434 object->ref_count++;
3435 vm_object_res_reference(object);
ff6e181a 3436 vm_object_paging_begin(object);
1c79356b
A
3437
3438 /*
3439 * INVARIANTS (through entire routine):
3440 *
3441 * 1) At all times, we must either have the object
3442 * lock or a busy page in some object to prevent
3443 * some other thread from trying to bring in
3444 * the same page.
3445 *
3446 * 2) Once we have a busy page, we must remove it from
3447 * the pageout queues, so that the pageout daemon
3448 * will not grab it away.
3449 *
3450 */
3451
3452 /*
3453 * Look for page in top-level object. If it's not there or
3454 * there's something going on, give up.
91447636
A
3455 * ENCRYPTED SWAP: use the slow fault path, since we'll need to
3456 * decrypt the page before wiring it down.
1c79356b
A
3457 */
3458 m = vm_page_lookup(object, offset);
91447636 3459 if ((m == VM_PAGE_NULL) || (m->busy) || (m->encrypted) ||
1c79356b
A
3460 (m->unusual && ( m->error || m->restart || m->absent ||
3461 prot & m->page_lock))) {
3462
3463 GIVE_UP;
3464 }
91447636 3465 ASSERT_PAGE_DECRYPTED(m);
1c79356b
A
3466
3467 /*
3468 * Wire the page down now. All bail outs beyond this
3469 * point must unwire the page.
3470 */
3471
3472 vm_page_lock_queues();
3473 vm_page_wire(m);
3474 vm_page_unlock_queues();
3475
3476 /*
3477 * Mark page busy for other threads.
3478 */
3479 assert(!m->busy);
3480 m->busy = TRUE;
3481 assert(!m->absent);
3482
3483 /*
3484 * Give up if the page is being written and there's a copy object
3485 */
3486 if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
3487 RELEASE_PAGE(m);
3488 GIVE_UP;
3489 }
3490
3491 /*
3492 * Put this page into the physical map.
3493 * We have to unlock the object because pmap_enter
3494 * may cause other faults.
3495 */
765c9de3 3496 if (m->no_isync == TRUE) {
91447636 3497 pmap_sync_page_data_phys(m->phys_page);
0b4e3aa0 3498
765c9de3 3499 m->no_isync = FALSE;
0b4e3aa0 3500 }
9bccf70c
A
3501
3502 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
765c9de3 3503
9bccf70c 3504 PMAP_ENTER(pmap, pmap_addr, m, prot, cache_attr, TRUE);
1c79356b 3505
1c79356b
A
3506 /*
3507 * Unlock everything, and return
3508 */
3509
3510 PAGE_WAKEUP_DONE(m);
3511 UNLOCK_AND_DEALLOCATE;
3512
3513 return(KERN_SUCCESS);
3514
3515}
3516
3517/*
3518 * Routine: vm_fault_copy_cleanup
3519 * Purpose:
3520 * Release a page used by vm_fault_copy.
3521 */
3522
3523void
3524vm_fault_copy_cleanup(
3525 vm_page_t page,
3526 vm_page_t top_page)
3527{
3528 vm_object_t object = page->object;
3529
3530 vm_object_lock(object);
3531 PAGE_WAKEUP_DONE(page);
3532 vm_page_lock_queues();
3533 if (!page->active && !page->inactive)
3534 vm_page_activate(page);
3535 vm_page_unlock_queues();
3536 vm_fault_cleanup(object, top_page);
3537}
3538
3539void
3540vm_fault_copy_dst_cleanup(
3541 vm_page_t page)
3542{
3543 vm_object_t object;
3544
3545 if (page != VM_PAGE_NULL) {
3546 object = page->object;
3547 vm_object_lock(object);
3548 vm_page_lock_queues();
3549 vm_page_unwire(page);
3550 vm_page_unlock_queues();
3551 vm_object_paging_end(object);
3552 vm_object_unlock(object);
3553 }
3554}
3555
3556/*
3557 * Routine: vm_fault_copy
3558 *
3559 * Purpose:
3560 * Copy pages from one virtual memory object to another --
3561 * neither the source nor destination pages need be resident.
3562 *
3563 * Before actually copying a page, the version associated with
3564 * the destination address map wil be verified.
3565 *
3566 * In/out conditions:
3567 * The caller must hold a reference, but not a lock, to
3568 * each of the source and destination objects and to the
3569 * destination map.
3570 *
3571 * Results:
3572 * Returns KERN_SUCCESS if no errors were encountered in
3573 * reading or writing the data. Returns KERN_INTERRUPTED if
3574 * the operation was interrupted (only possible if the
3575 * "interruptible" argument is asserted). Other return values
3576 * indicate a permanent error in copying the data.
3577 *
3578 * The actual amount of data copied will be returned in the
3579 * "copy_size" argument. In the event that the destination map
3580 * verification failed, this amount may be less than the amount
3581 * requested.
3582 */
3583kern_return_t
3584vm_fault_copy(
3585 vm_object_t src_object,
3586 vm_object_offset_t src_offset,
91447636 3587 vm_map_size_t *copy_size, /* INOUT */
1c79356b
A
3588 vm_object_t dst_object,
3589 vm_object_offset_t dst_offset,
3590 vm_map_t dst_map,
3591 vm_map_version_t *dst_version,
3592 int interruptible)
3593{
3594 vm_page_t result_page;
3595
3596 vm_page_t src_page;
3597 vm_page_t src_top_page;
3598 vm_prot_t src_prot;
3599
3600 vm_page_t dst_page;
3601 vm_page_t dst_top_page;
3602 vm_prot_t dst_prot;
3603
91447636 3604 vm_map_size_t amount_left;
1c79356b
A
3605 vm_object_t old_copy_object;
3606 kern_return_t error = 0;
3607
91447636 3608 vm_map_size_t part_size;
1c79356b
A
3609
3610 /*
3611 * In order not to confuse the clustered pageins, align
3612 * the different offsets on a page boundary.
3613 */
91447636
A
3614 vm_object_offset_t src_lo_offset = vm_object_trunc_page(src_offset);
3615 vm_object_offset_t dst_lo_offset = vm_object_trunc_page(dst_offset);
3616 vm_object_offset_t src_hi_offset = vm_object_round_page(src_offset + *copy_size);
3617 vm_object_offset_t dst_hi_offset = vm_object_round_page(dst_offset + *copy_size);
1c79356b
A
3618
3619#define RETURN(x) \
3620 MACRO_BEGIN \
91447636 3621 *copy_size -= amount_left; \
1c79356b
A
3622 MACRO_RETURN(x); \
3623 MACRO_END
3624
91447636 3625 amount_left = *copy_size;
1c79356b
A
3626 do { /* while (amount_left > 0) */
3627 /*
3628 * There may be a deadlock if both source and destination
3629 * pages are the same. To avoid this deadlock, the copy must
3630 * start by getting the destination page in order to apply
3631 * COW semantics if any.
3632 */
3633
3634 RetryDestinationFault: ;
3635
3636 dst_prot = VM_PROT_WRITE|VM_PROT_READ;
3637
3638 vm_object_lock(dst_object);
3639 vm_object_paging_begin(dst_object);
3640
3641 XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3642 switch (vm_fault_page(dst_object,
91447636 3643 vm_object_trunc_page(dst_offset),
1c79356b
A
3644 VM_PROT_WRITE|VM_PROT_READ,
3645 FALSE,
3646 interruptible,
3647 dst_lo_offset,
3648 dst_hi_offset,
3649 VM_BEHAVIOR_SEQUENTIAL,
3650 &dst_prot,
3651 &dst_page,
3652 &dst_top_page,
3653 (int *)0,
3654 &error,
3655 dst_map->no_zero_fill,
0b4e3aa0 3656 FALSE, NULL, 0)) {
1c79356b
A
3657 case VM_FAULT_SUCCESS:
3658 break;
3659 case VM_FAULT_RETRY:
3660 goto RetryDestinationFault;
3661 case VM_FAULT_MEMORY_SHORTAGE:
3662 if (vm_page_wait(interruptible))
3663 goto RetryDestinationFault;
3664 /* fall thru */
3665 case VM_FAULT_INTERRUPTED:
3666 RETURN(MACH_SEND_INTERRUPTED);
3667 case VM_FAULT_FICTITIOUS_SHORTAGE:
3668 vm_page_more_fictitious();
3669 goto RetryDestinationFault;
3670 case VM_FAULT_MEMORY_ERROR:
3671 if (error)
3672 return (error);
3673 else
3674 return(KERN_MEMORY_ERROR);
3675 }
3676 assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
3677
3678 old_copy_object = dst_page->object->copy;
3679
3680 /*
3681 * There exists the possiblity that the source and
3682 * destination page are the same. But we can't
3683 * easily determine that now. If they are the
3684 * same, the call to vm_fault_page() for the
3685 * destination page will deadlock. To prevent this we
3686 * wire the page so we can drop busy without having
3687 * the page daemon steal the page. We clean up the
3688 * top page but keep the paging reference on the object
3689 * holding the dest page so it doesn't go away.
3690 */
3691
3692 vm_page_lock_queues();
3693 vm_page_wire(dst_page);
3694 vm_page_unlock_queues();
3695 PAGE_WAKEUP_DONE(dst_page);
3696 vm_object_unlock(dst_page->object);
3697
3698 if (dst_top_page != VM_PAGE_NULL) {
3699 vm_object_lock(dst_object);
3700 VM_PAGE_FREE(dst_top_page);
3701 vm_object_paging_end(dst_object);
3702 vm_object_unlock(dst_object);
3703 }
3704
3705 RetrySourceFault: ;
3706
3707 if (src_object == VM_OBJECT_NULL) {
3708 /*
3709 * No source object. We will just
3710 * zero-fill the page in dst_object.
3711 */
3712 src_page = VM_PAGE_NULL;
e3027f41 3713 result_page = VM_PAGE_NULL;
1c79356b
A
3714 } else {
3715 vm_object_lock(src_object);
3716 src_page = vm_page_lookup(src_object,
91447636 3717 vm_object_trunc_page(src_offset));
e3027f41 3718 if (src_page == dst_page) {
1c79356b 3719 src_prot = dst_prot;
e3027f41
A
3720 result_page = VM_PAGE_NULL;
3721 } else {
1c79356b
A
3722 src_prot = VM_PROT_READ;
3723 vm_object_paging_begin(src_object);
3724
3725 XPR(XPR_VM_FAULT,
3726 "vm_fault_copy(2) -> vm_fault_page\n",
3727 0,0,0,0,0);
3728 switch (vm_fault_page(src_object,
91447636 3729 vm_object_trunc_page(src_offset),
1c79356b
A
3730 VM_PROT_READ,
3731 FALSE,
3732 interruptible,
3733 src_lo_offset,
3734 src_hi_offset,
3735 VM_BEHAVIOR_SEQUENTIAL,
3736 &src_prot,
3737 &result_page,
3738 &src_top_page,
3739 (int *)0,
3740 &error,
3741 FALSE,
0b4e3aa0 3742 FALSE, NULL, 0)) {
1c79356b
A
3743
3744 case VM_FAULT_SUCCESS:
3745 break;
3746 case VM_FAULT_RETRY:
3747 goto RetrySourceFault;
3748 case VM_FAULT_MEMORY_SHORTAGE:
3749 if (vm_page_wait(interruptible))
3750 goto RetrySourceFault;
3751 /* fall thru */
3752 case VM_FAULT_INTERRUPTED:
3753 vm_fault_copy_dst_cleanup(dst_page);
3754 RETURN(MACH_SEND_INTERRUPTED);
3755 case VM_FAULT_FICTITIOUS_SHORTAGE:
3756 vm_page_more_fictitious();
3757 goto RetrySourceFault;
3758 case VM_FAULT_MEMORY_ERROR:
3759 vm_fault_copy_dst_cleanup(dst_page);
3760 if (error)
3761 return (error);
3762 else
3763 return(KERN_MEMORY_ERROR);
3764 }
3765
1c79356b
A
3766
3767 assert((src_top_page == VM_PAGE_NULL) ==
e3027f41 3768 (result_page->object == src_object));
1c79356b
A
3769 }
3770 assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
e3027f41 3771 vm_object_unlock(result_page->object);
1c79356b
A
3772 }
3773
3774 if (!vm_map_verify(dst_map, dst_version)) {
e3027f41
A
3775 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3776 vm_fault_copy_cleanup(result_page, src_top_page);
1c79356b
A
3777 vm_fault_copy_dst_cleanup(dst_page);
3778 break;
3779 }
3780
3781 vm_object_lock(dst_page->object);
3782
3783 if (dst_page->object->copy != old_copy_object) {
3784 vm_object_unlock(dst_page->object);
3785 vm_map_verify_done(dst_map, dst_version);
e3027f41
A
3786 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3787 vm_fault_copy_cleanup(result_page, src_top_page);
1c79356b
A
3788 vm_fault_copy_dst_cleanup(dst_page);
3789 break;
3790 }
3791 vm_object_unlock(dst_page->object);
3792
3793 /*
3794 * Copy the page, and note that it is dirty
3795 * immediately.
3796 */
3797
3798 if (!page_aligned(src_offset) ||
3799 !page_aligned(dst_offset) ||
3800 !page_aligned(amount_left)) {
3801
3802 vm_object_offset_t src_po,
3803 dst_po;
3804
91447636
A
3805 src_po = src_offset - vm_object_trunc_page(src_offset);
3806 dst_po = dst_offset - vm_object_trunc_page(dst_offset);
1c79356b
A
3807
3808 if (dst_po > src_po) {
3809 part_size = PAGE_SIZE - dst_po;
3810 } else {
3811 part_size = PAGE_SIZE - src_po;
3812 }
3813 if (part_size > (amount_left)){
3814 part_size = amount_left;
3815 }
3816
e3027f41 3817 if (result_page == VM_PAGE_NULL) {
1c79356b
A
3818 vm_page_part_zero_fill(dst_page,
3819 dst_po, part_size);
3820 } else {
e3027f41 3821 vm_page_part_copy(result_page, src_po,
1c79356b
A
3822 dst_page, dst_po, part_size);
3823 if(!dst_page->dirty){
3824 vm_object_lock(dst_object);
3825 dst_page->dirty = TRUE;
3826 vm_object_unlock(dst_page->object);
3827 }
3828
3829 }
3830 } else {
3831 part_size = PAGE_SIZE;
3832
e3027f41 3833 if (result_page == VM_PAGE_NULL)
1c79356b
A
3834 vm_page_zero_fill(dst_page);
3835 else{
e3027f41 3836 vm_page_copy(result_page, dst_page);
1c79356b
A
3837 if(!dst_page->dirty){
3838 vm_object_lock(dst_object);
3839 dst_page->dirty = TRUE;
3840 vm_object_unlock(dst_page->object);
3841 }
3842 }
3843
3844 }
3845
3846 /*
3847 * Unlock everything, and return
3848 */
3849
3850 vm_map_verify_done(dst_map, dst_version);
3851
e3027f41
A
3852 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3853 vm_fault_copy_cleanup(result_page, src_top_page);
1c79356b
A
3854 vm_fault_copy_dst_cleanup(dst_page);
3855
3856 amount_left -= part_size;
3857 src_offset += part_size;
3858 dst_offset += part_size;
3859 } while (amount_left > 0);
3860
3861 RETURN(KERN_SUCCESS);
3862#undef RETURN
3863
3864 /*NOTREACHED*/
3865}
3866
3867#ifdef notdef
3868
3869/*
3870 * Routine: vm_fault_page_overwrite
3871 *
3872 * Description:
3873 * A form of vm_fault_page that assumes that the
3874 * resulting page will be overwritten in its entirety,
3875 * making it unnecessary to obtain the correct *contents*
3876 * of the page.
3877 *
3878 * Implementation:
3879 * XXX Untested. Also unused. Eventually, this technology
3880 * could be used in vm_fault_copy() to advantage.
3881 */
3882vm_fault_return_t
3883vm_fault_page_overwrite(
3884 register
3885 vm_object_t dst_object,
3886 vm_object_offset_t dst_offset,
3887 vm_page_t *result_page) /* OUT */
3888{
3889 register
3890 vm_page_t dst_page;
3891 kern_return_t wait_result;
3892
3893#define interruptible THREAD_UNINT /* XXX */
3894
3895 while (TRUE) {
3896 /*
3897 * Look for a page at this offset
3898 */
3899
3900 while ((dst_page = vm_page_lookup(dst_object, dst_offset))
3901 == VM_PAGE_NULL) {
3902 /*
3903 * No page, no problem... just allocate one.
3904 */
3905
3906 dst_page = vm_page_alloc(dst_object, dst_offset);
3907 if (dst_page == VM_PAGE_NULL) {
3908 vm_object_unlock(dst_object);
3909 VM_PAGE_WAIT();
3910 vm_object_lock(dst_object);
3911 continue;
3912 }
3913
3914 /*
3915 * Pretend that the memory manager
3916 * write-protected the page.
3917 *
3918 * Note that we will be asking for write
3919 * permission without asking for the data
3920 * first.
3921 */
3922
3923 dst_page->overwriting = TRUE;
3924 dst_page->page_lock = VM_PROT_WRITE;
3925 dst_page->absent = TRUE;
3926 dst_page->unusual = TRUE;
3927 dst_object->absent_count++;
3928
3929 break;
3930
3931 /*
3932 * When we bail out, we might have to throw
3933 * away the page created here.
3934 */
3935
3936#define DISCARD_PAGE \
3937 MACRO_BEGIN \
3938 vm_object_lock(dst_object); \
3939 dst_page = vm_page_lookup(dst_object, dst_offset); \
3940 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3941 VM_PAGE_FREE(dst_page); \
3942 vm_object_unlock(dst_object); \
3943 MACRO_END
3944 }
3945
3946 /*
3947 * If the page is write-protected...
3948 */
3949
3950 if (dst_page->page_lock & VM_PROT_WRITE) {
3951 /*
3952 * ... and an unlock request hasn't been sent
3953 */
3954
3955 if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
3956 vm_prot_t u;
3957 kern_return_t rc;
3958
3959 /*
3960 * ... then send one now.
3961 */
3962
3963 if (!dst_object->pager_ready) {
9bccf70c
A
3964 wait_result = vm_object_assert_wait(dst_object,
3965 VM_OBJECT_EVENT_PAGER_READY,
3966 interruptible);
1c79356b 3967 vm_object_unlock(dst_object);
9bccf70c
A
3968 if (wait_result == THREAD_WAITING)
3969 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
3970 if (wait_result != THREAD_AWAKENED) {
3971 DISCARD_PAGE;
3972 return(VM_FAULT_INTERRUPTED);
3973 }
3974 continue;
3975 }
3976
3977 u = dst_page->unlock_request |= VM_PROT_WRITE;
3978 vm_object_unlock(dst_object);
3979
3980 if ((rc = memory_object_data_unlock(
3981 dst_object->pager,
1c79356b
A
3982 dst_offset + dst_object->paging_offset,
3983 PAGE_SIZE,
3984 u)) != KERN_SUCCESS) {
3985 if (vm_fault_debug)
3986 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3987 DISCARD_PAGE;
3988 return((rc == MACH_SEND_INTERRUPTED) ?
3989 VM_FAULT_INTERRUPTED :
3990 VM_FAULT_MEMORY_ERROR);
3991 }
3992 vm_object_lock(dst_object);
3993 continue;
3994 }
3995
3996 /* ... fall through to wait below */
3997 } else {
3998 /*
3999 * If the page isn't being used for other
4000 * purposes, then we're done.
4001 */
4002 if ( ! (dst_page->busy || dst_page->absent ||
4003 dst_page->error || dst_page->restart) )
4004 break;
4005 }
4006
9bccf70c 4007 wait_result = PAGE_ASSERT_WAIT(dst_page, interruptible);
1c79356b 4008 vm_object_unlock(dst_object);
9bccf70c
A
4009 if (wait_result == THREAD_WAITING)
4010 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
4011 if (wait_result != THREAD_AWAKENED) {
4012 DISCARD_PAGE;
4013 return(VM_FAULT_INTERRUPTED);
4014 }
4015 }
4016
4017 *result_page = dst_page;
4018 return(VM_FAULT_SUCCESS);
4019
4020#undef interruptible
4021#undef DISCARD_PAGE
4022}
4023
4024#endif /* notdef */
4025
4026#if VM_FAULT_CLASSIFY
4027/*
4028 * Temporary statistics gathering support.
4029 */
4030
4031/*
4032 * Statistics arrays:
4033 */
4034#define VM_FAULT_TYPES_MAX 5
4035#define VM_FAULT_LEVEL_MAX 8
4036
4037int vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
4038
4039#define VM_FAULT_TYPE_ZERO_FILL 0
4040#define VM_FAULT_TYPE_MAP_IN 1
4041#define VM_FAULT_TYPE_PAGER 2
4042#define VM_FAULT_TYPE_COPY 3
4043#define VM_FAULT_TYPE_OTHER 4
4044
4045
4046void
4047vm_fault_classify(vm_object_t object,
4048 vm_object_offset_t offset,
4049 vm_prot_t fault_type)
4050{
4051 int type, level = 0;
4052 vm_page_t m;
4053
4054 while (TRUE) {
4055 m = vm_page_lookup(object, offset);
4056 if (m != VM_PAGE_NULL) {
4057 if (m->busy || m->error || m->restart || m->absent ||
4058 fault_type & m->page_lock) {
4059 type = VM_FAULT_TYPE_OTHER;
4060 break;
4061 }
4062 if (((fault_type & VM_PROT_WRITE) == 0) ||
4063 ((level == 0) && object->copy == VM_OBJECT_NULL)) {
4064 type = VM_FAULT_TYPE_MAP_IN;
4065 break;
4066 }
4067 type = VM_FAULT_TYPE_COPY;
4068 break;
4069 }
4070 else {
4071 if (object->pager_created) {
4072 type = VM_FAULT_TYPE_PAGER;
4073 break;
4074 }
4075 if (object->shadow == VM_OBJECT_NULL) {
4076 type = VM_FAULT_TYPE_ZERO_FILL;
4077 break;
4078 }
4079
4080 offset += object->shadow_offset;
4081 object = object->shadow;
4082 level++;
4083 continue;
4084 }
4085 }
4086
4087 if (level > VM_FAULT_LEVEL_MAX)
4088 level = VM_FAULT_LEVEL_MAX;
4089
4090 vm_fault_stats[type][level] += 1;
4091
4092 return;
4093}
4094
4095/* cleanup routine to call from debugger */
4096
4097void
4098vm_fault_classify_init(void)
4099{
4100 int type, level;
4101
4102 for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
4103 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
4104 vm_fault_stats[type][level] = 0;
4105 }
4106 }
4107
4108 return;
4109}
4110#endif /* VM_FAULT_CLASSIFY */