]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_fault.c
xnu-792.10.96.tar.gz
[apple/xnu.git] / osfmk / vm / vm_fault.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_COPYRIGHT@
24 */
25 /*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50 /*
51 */
52 /*
53 * File: vm_fault.c
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
55 *
56 * Page fault handling module.
57 */
58
59 #include <mach_cluster_stats.h>
60 #include <mach_pagemap.h>
61 #include <mach_kdb.h>
62
63 #include <mach/mach_types.h>
64 #include <mach/kern_return.h>
65 #include <mach/message.h> /* for error codes */
66 #include <mach/vm_param.h>
67 #include <mach/vm_behavior.h>
68 #include <mach/memory_object.h>
69 /* For memory_object_data_{request,unlock} */
70
71 #include <kern/kern_types.h>
72 #include <kern/host_statistics.h>
73 #include <kern/counters.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/sched_prim.h>
77 #include <kern/host.h>
78 #include <kern/xpr.h>
79 #include <kern/mach_param.h>
80 #include <kern/macro_help.h>
81 #include <kern/zalloc.h>
82 #include <kern/misc_protos.h>
83
84 #include <ppc/proc_reg.h>
85
86 #include <vm/vm_fault.h>
87 #include <vm/task_working_set.h>
88 #include <vm/vm_map.h>
89 #include <vm/vm_object.h>
90 #include <vm/vm_page.h>
91 #include <vm/vm_kern.h>
92 #include <vm/pmap.h>
93 #include <vm/vm_pageout.h>
94 #include <vm/vm_protos.h>
95
96 #include <sys/kdebug.h>
97
98 #define VM_FAULT_CLASSIFY 0
99 #define VM_FAULT_STATIC_CONFIG 1
100
101 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
102
103 unsigned int vm_object_absent_max = 50;
104
105 int vm_fault_debug = 0;
106
107 #if !VM_FAULT_STATIC_CONFIG
108 boolean_t vm_fault_dirty_handling = FALSE;
109 boolean_t vm_fault_interruptible = FALSE;
110 boolean_t software_reference_bits = TRUE;
111 #endif
112
113 #if MACH_KDB
114 extern struct db_watchpoint *db_watchpoint_list;
115 #endif /* MACH_KDB */
116
117
118 /* Forward declarations of internal routines. */
119 extern kern_return_t vm_fault_wire_fast(
120 vm_map_t map,
121 vm_map_offset_t va,
122 vm_map_entry_t entry,
123 pmap_t pmap,
124 vm_map_offset_t pmap_addr);
125
126 extern void vm_fault_continue(void);
127
128 extern void vm_fault_copy_cleanup(
129 vm_page_t page,
130 vm_page_t top_page);
131
132 extern void vm_fault_copy_dst_cleanup(
133 vm_page_t page);
134
135 #if VM_FAULT_CLASSIFY
136 extern void vm_fault_classify(vm_object_t object,
137 vm_object_offset_t offset,
138 vm_prot_t fault_type);
139
140 extern void vm_fault_classify_init(void);
141 #endif
142
143 /*
144 * Routine: vm_fault_init
145 * Purpose:
146 * Initialize our private data structures.
147 */
148 void
149 vm_fault_init(void)
150 {
151 }
152
153 /*
154 * Routine: vm_fault_cleanup
155 * Purpose:
156 * Clean up the result of vm_fault_page.
157 * Results:
158 * The paging reference for "object" is released.
159 * "object" is unlocked.
160 * If "top_page" is not null, "top_page" is
161 * freed and the paging reference for the object
162 * containing it is released.
163 *
164 * In/out conditions:
165 * "object" must be locked.
166 */
167 void
168 vm_fault_cleanup(
169 register vm_object_t object,
170 register vm_page_t top_page)
171 {
172 vm_object_paging_end(object);
173 vm_object_unlock(object);
174
175 if (top_page != VM_PAGE_NULL) {
176 object = top_page->object;
177 vm_object_lock(object);
178 VM_PAGE_FREE(top_page);
179 vm_object_paging_end(object);
180 vm_object_unlock(object);
181 }
182 }
183
184 #if MACH_CLUSTER_STATS
185 #define MAXCLUSTERPAGES 16
186 struct {
187 unsigned long pages_in_cluster;
188 unsigned long pages_at_higher_offsets;
189 unsigned long pages_at_lower_offsets;
190 } cluster_stats_in[MAXCLUSTERPAGES];
191 #define CLUSTER_STAT(clause) clause
192 #define CLUSTER_STAT_HIGHER(x) \
193 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
194 #define CLUSTER_STAT_LOWER(x) \
195 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
196 #define CLUSTER_STAT_CLUSTER(x) \
197 ((cluster_stats_in[(x)].pages_in_cluster)++)
198 #else /* MACH_CLUSTER_STATS */
199 #define CLUSTER_STAT(clause)
200 #endif /* MACH_CLUSTER_STATS */
201
202 /* XXX - temporary */
203 boolean_t vm_allow_clustered_pagein = FALSE;
204 int vm_pagein_cluster_used = 0;
205
206 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
207
208
209 boolean_t vm_page_deactivate_behind = TRUE;
210 /*
211 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
212 */
213 int vm_default_ahead = 0;
214 int vm_default_behind = MAX_UPL_TRANSFER;
215
216 /*
217 * vm_page_deactivate_behind
218 *
219 * Determine if sequential access is in progress
220 * in accordance with the behavior specified. If
221 * so, compute a potential page to deactive and
222 * deactivate it.
223 *
224 * The object must be locked.
225 */
226 static
227 boolean_t
228 vm_fault_deactivate_behind(
229 vm_object_t object,
230 vm_object_offset_t offset,
231 vm_behavior_t behavior)
232 {
233 vm_page_t m;
234
235 #if TRACEFAULTPAGE
236 dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind); /* (TEST/DEBUG) */
237 #endif
238
239 if (object == kernel_object) {
240 /*
241 * Do not deactivate pages from the kernel object: they
242 * are not intended to become pageable.
243 */
244 return FALSE;
245 }
246
247 switch (behavior) {
248 case VM_BEHAVIOR_RANDOM:
249 object->sequential = PAGE_SIZE_64;
250 m = VM_PAGE_NULL;
251 break;
252 case VM_BEHAVIOR_SEQUENTIAL:
253 if (offset &&
254 object->last_alloc == offset - PAGE_SIZE_64) {
255 object->sequential += PAGE_SIZE_64;
256 m = vm_page_lookup(object, offset - PAGE_SIZE_64);
257 } else {
258 object->sequential = PAGE_SIZE_64; /* reset */
259 m = VM_PAGE_NULL;
260 }
261 break;
262 case VM_BEHAVIOR_RSEQNTL:
263 if (object->last_alloc &&
264 object->last_alloc == offset + PAGE_SIZE_64) {
265 object->sequential += PAGE_SIZE_64;
266 m = vm_page_lookup(object, offset + PAGE_SIZE_64);
267 } else {
268 object->sequential = PAGE_SIZE_64; /* reset */
269 m = VM_PAGE_NULL;
270 }
271 break;
272 case VM_BEHAVIOR_DEFAULT:
273 default:
274 if (offset &&
275 object->last_alloc == offset - PAGE_SIZE_64) {
276 vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
277
278 object->sequential += PAGE_SIZE_64;
279 m = (offset >= behind &&
280 object->sequential >= behind) ?
281 vm_page_lookup(object, offset - behind) :
282 VM_PAGE_NULL;
283 } else if (object->last_alloc &&
284 object->last_alloc == offset + PAGE_SIZE_64) {
285 vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
286
287 object->sequential += PAGE_SIZE_64;
288 m = (offset < -behind &&
289 object->sequential >= behind) ?
290 vm_page_lookup(object, offset + behind) :
291 VM_PAGE_NULL;
292 } else {
293 object->sequential = PAGE_SIZE_64;
294 m = VM_PAGE_NULL;
295 }
296 break;
297 }
298
299 object->last_alloc = offset;
300
301 if (m) {
302 if (!m->busy) {
303 vm_page_lock_queues();
304 vm_page_deactivate(m);
305 vm_page_unlock_queues();
306 #if TRACEFAULTPAGE
307 dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
308 #endif
309 }
310 return TRUE;
311 }
312 return FALSE;
313 }
314
315
316 /*
317 * Routine: vm_fault_page
318 * Purpose:
319 * Find the resident page for the virtual memory
320 * specified by the given virtual memory object
321 * and offset.
322 * Additional arguments:
323 * The required permissions for the page is given
324 * in "fault_type". Desired permissions are included
325 * in "protection". The minimum and maximum valid offsets
326 * within the object for the relevant map entry are
327 * passed in "lo_offset" and "hi_offset" respectively and
328 * the expected page reference pattern is passed in "behavior".
329 * These three parameters are used to determine pagein cluster
330 * limits.
331 *
332 * If the desired page is known to be resident (for
333 * example, because it was previously wired down), asserting
334 * the "unwiring" parameter will speed the search.
335 *
336 * If the operation can be interrupted (by thread_abort
337 * or thread_terminate), then the "interruptible"
338 * parameter should be asserted.
339 *
340 * Results:
341 * The page containing the proper data is returned
342 * in "result_page".
343 *
344 * In/out conditions:
345 * The source object must be locked and referenced,
346 * and must donate one paging reference. The reference
347 * is not affected. The paging reference and lock are
348 * consumed.
349 *
350 * If the call succeeds, the object in which "result_page"
351 * resides is left locked and holding a paging reference.
352 * If this is not the original object, a busy page in the
353 * original object is returned in "top_page", to prevent other
354 * callers from pursuing this same data, along with a paging
355 * reference for the original object. The "top_page" should
356 * be destroyed when this guarantee is no longer required.
357 * The "result_page" is also left busy. It is not removed
358 * from the pageout queues.
359 */
360
361 vm_fault_return_t
362 vm_fault_page(
363 /* Arguments: */
364 vm_object_t first_object, /* Object to begin search */
365 vm_object_offset_t first_offset, /* Offset into object */
366 vm_prot_t fault_type, /* What access is requested */
367 boolean_t must_be_resident,/* Must page be resident? */
368 int interruptible, /* how may fault be interrupted? */
369 vm_map_offset_t lo_offset, /* Map entry start */
370 vm_map_offset_t hi_offset, /* Map entry end */
371 vm_behavior_t behavior, /* Page reference behavior */
372 /* Modifies in place: */
373 vm_prot_t *protection, /* Protection for mapping */
374 /* Returns: */
375 vm_page_t *result_page, /* Page found, if successful */
376 vm_page_t *top_page, /* Page in top object, if
377 * not result_page. */
378 int *type_of_fault, /* if non-null, fill in with type of fault
379 * COW, zero-fill, etc... returned in trace point */
380 /* More arguments: */
381 kern_return_t *error_code, /* code if page is in error */
382 boolean_t no_zero_fill, /* don't zero fill absent pages */
383 boolean_t data_supply, /* treat as data_supply if
384 * it is a write fault and a full
385 * page is provided */
386 vm_map_t map,
387 __unused vm_map_offset_t vaddr)
388 {
389 register
390 vm_page_t m;
391 register
392 vm_object_t object;
393 register
394 vm_object_offset_t offset;
395 vm_page_t first_m;
396 vm_object_t next_object;
397 vm_object_t copy_object;
398 boolean_t look_for_page;
399 vm_prot_t access_required = fault_type;
400 vm_prot_t wants_copy_flag;
401 vm_object_size_t length;
402 vm_object_offset_t cluster_start, cluster_end;
403 CLUSTER_STAT(int pages_at_higher_offsets;)
404 CLUSTER_STAT(int pages_at_lower_offsets;)
405 kern_return_t wait_result;
406 boolean_t interruptible_state;
407 boolean_t bumped_pagein = FALSE;
408
409
410 #if MACH_PAGEMAP
411 /*
412 * MACH page map - an optional optimization where a bit map is maintained
413 * by the VM subsystem for internal objects to indicate which pages of
414 * the object currently reside on backing store. This existence map
415 * duplicates information maintained by the vnode pager. It is
416 * created at the time of the first pageout against the object, i.e.
417 * at the same time pager for the object is created. The optimization
418 * is designed to eliminate pager interaction overhead, if it is
419 * 'known' that the page does not exist on backing store.
420 *
421 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
422 * either marked as paged out in the existence map for the object or no
423 * existence map exists for the object. LOOK_FOR() is one of the
424 * criteria in the decision to invoke the pager. It is also used as one
425 * of the criteria to terminate the scan for adjacent pages in a clustered
426 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
427 * permanent objects. Note also that if the pager for an internal object
428 * has not been created, the pager is not invoked regardless of the value
429 * of LOOK_FOR() and that clustered pagein scans are only done on an object
430 * for which a pager has been created.
431 *
432 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
433 * is marked as paged out in the existence map for the object. PAGED_OUT()
434 * PAGED_OUT() is used to determine if a page has already been pushed
435 * into a copy object in order to avoid a redundant page out operation.
436 */
437 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
438 != VM_EXTERNAL_STATE_ABSENT)
439 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
440 == VM_EXTERNAL_STATE_EXISTS)
441 #else /* MACH_PAGEMAP */
442 /*
443 * If the MACH page map optimization is not enabled,
444 * LOOK_FOR() always evaluates to TRUE. The pager will always be
445 * invoked to resolve missing pages in an object, assuming the pager
446 * has been created for the object. In a clustered page operation, the
447 * absence of a page on backing backing store cannot be used to terminate
448 * a scan for adjacent pages since that information is available only in
449 * the pager. Hence pages that may not be paged out are potentially
450 * included in a clustered request. The vnode pager is coded to deal
451 * with any combination of absent/present pages in a clustered
452 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
453 * will always be invoked to push a dirty page into a copy object assuming
454 * a pager has been created. If the page has already been pushed, the
455 * pager will ingore the new request.
456 */
457 #define LOOK_FOR(o, f) TRUE
458 #define PAGED_OUT(o, f) FALSE
459 #endif /* MACH_PAGEMAP */
460
461 /*
462 * Recovery actions
463 */
464 #define PREPARE_RELEASE_PAGE(m) \
465 MACRO_BEGIN \
466 vm_page_lock_queues(); \
467 MACRO_END
468
469 #define DO_RELEASE_PAGE(m) \
470 MACRO_BEGIN \
471 PAGE_WAKEUP_DONE(m); \
472 if (!m->active && !m->inactive) \
473 vm_page_activate(m); \
474 vm_page_unlock_queues(); \
475 MACRO_END
476
477 #define RELEASE_PAGE(m) \
478 MACRO_BEGIN \
479 PREPARE_RELEASE_PAGE(m); \
480 DO_RELEASE_PAGE(m); \
481 MACRO_END
482
483 #if TRACEFAULTPAGE
484 dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
485 #endif
486
487
488
489 #if !VM_FAULT_STATIC_CONFIG
490 if (vm_fault_dirty_handling
491 #if MACH_KDB
492 /*
493 * If there are watchpoints set, then
494 * we don't want to give away write permission
495 * on a read fault. Make the task write fault,
496 * so that the watchpoint code notices the access.
497 */
498 || db_watchpoint_list
499 #endif /* MACH_KDB */
500 ) {
501 /*
502 * If we aren't asking for write permission,
503 * then don't give it away. We're using write
504 * faults to set the dirty bit.
505 */
506 if (!(fault_type & VM_PROT_WRITE))
507 *protection &= ~VM_PROT_WRITE;
508 }
509
510 if (!vm_fault_interruptible)
511 interruptible = THREAD_UNINT;
512 #else /* STATIC_CONFIG */
513 #if MACH_KDB
514 /*
515 * If there are watchpoints set, then
516 * we don't want to give away write permission
517 * on a read fault. Make the task write fault,
518 * so that the watchpoint code notices the access.
519 */
520 if (db_watchpoint_list) {
521 /*
522 * If we aren't asking for write permission,
523 * then don't give it away. We're using write
524 * faults to set the dirty bit.
525 */
526 if (!(fault_type & VM_PROT_WRITE))
527 *protection &= ~VM_PROT_WRITE;
528 }
529
530 #endif /* MACH_KDB */
531 #endif /* STATIC_CONFIG */
532
533 interruptible_state = thread_interrupt_level(interruptible);
534
535 /*
536 * INVARIANTS (through entire routine):
537 *
538 * 1) At all times, we must either have the object
539 * lock or a busy page in some object to prevent
540 * some other thread from trying to bring in
541 * the same page.
542 *
543 * Note that we cannot hold any locks during the
544 * pager access or when waiting for memory, so
545 * we use a busy page then.
546 *
547 * Note also that we aren't as concerned about more than
548 * one thread attempting to memory_object_data_unlock
549 * the same page at once, so we don't hold the page
550 * as busy then, but do record the highest unlock
551 * value so far. [Unlock requests may also be delivered
552 * out of order.]
553 *
554 * 2) To prevent another thread from racing us down the
555 * shadow chain and entering a new page in the top
556 * object before we do, we must keep a busy page in
557 * the top object while following the shadow chain.
558 *
559 * 3) We must increment paging_in_progress on any object
560 * for which we have a busy page
561 *
562 * 4) We leave busy pages on the pageout queues.
563 * If the pageout daemon comes across a busy page,
564 * it will remove the page from the pageout queues.
565 */
566
567 /*
568 * Search for the page at object/offset.
569 */
570
571 object = first_object;
572 offset = first_offset;
573 first_m = VM_PAGE_NULL;
574 access_required = fault_type;
575
576 XPR(XPR_VM_FAULT,
577 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
578 (integer_t)object, offset, fault_type, *protection, 0);
579
580 /*
581 * See whether this page is resident
582 */
583
584 while (TRUE) {
585 #if TRACEFAULTPAGE
586 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
587 #endif
588 if (!object->alive) {
589 vm_fault_cleanup(object, first_m);
590 thread_interrupt_level(interruptible_state);
591 return(VM_FAULT_MEMORY_ERROR);
592 }
593 m = vm_page_lookup(object, offset);
594 #if TRACEFAULTPAGE
595 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
596 #endif
597 if (m != VM_PAGE_NULL) {
598 /*
599 * If the page was pre-paged as part of a
600 * cluster, record the fact.
601 * If we were passed a valid pointer for
602 * "type_of_fault", than we came from
603 * vm_fault... we'll let it deal with
604 * this condition, since it
605 * needs to see m->clustered to correctly
606 * account the pageins.
607 */
608 if (type_of_fault == NULL && m->clustered) {
609 vm_pagein_cluster_used++;
610 m->clustered = FALSE;
611 }
612
613 /*
614 * If the page is being brought in,
615 * wait for it and then retry.
616 *
617 * A possible optimization: if the page
618 * is known to be resident, we can ignore
619 * pages that are absent (regardless of
620 * whether they're busy).
621 */
622
623 if (m->busy) {
624 #if TRACEFAULTPAGE
625 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
626 #endif
627 wait_result = PAGE_SLEEP(object, m, interruptible);
628 XPR(XPR_VM_FAULT,
629 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
630 (integer_t)object, offset,
631 (integer_t)m, 0, 0);
632 counter(c_vm_fault_page_block_busy_kernel++);
633
634 if (wait_result != THREAD_AWAKENED) {
635 vm_fault_cleanup(object, first_m);
636 thread_interrupt_level(interruptible_state);
637 if (wait_result == THREAD_RESTART)
638 {
639 return(VM_FAULT_RETRY);
640 }
641 else
642 {
643 return(VM_FAULT_INTERRUPTED);
644 }
645 }
646 continue;
647 }
648
649 if (m->encrypted) {
650 /*
651 * ENCRYPTED SWAP:
652 * the user needs access to a page that we
653 * encrypted before paging it out.
654 * Decrypt the page now.
655 * Keep it busy to prevent anyone from
656 * accessing it during the decryption.
657 */
658 m->busy = TRUE;
659 vm_page_decrypt(m, 0);
660 assert(object == m->object);
661 assert(m->busy);
662 PAGE_WAKEUP_DONE(m);
663
664 /*
665 * Retry from the top, in case
666 * something changed while we were
667 * decrypting.
668 */
669 continue;
670 }
671 ASSERT_PAGE_DECRYPTED(m);
672
673 /*
674 * If the page is in error, give up now.
675 */
676
677 if (m->error) {
678 #if TRACEFAULTPAGE
679 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code); /* (TEST/DEBUG) */
680 #endif
681 if (error_code)
682 *error_code = m->page_error;
683 VM_PAGE_FREE(m);
684 vm_fault_cleanup(object, first_m);
685 thread_interrupt_level(interruptible_state);
686 return(VM_FAULT_MEMORY_ERROR);
687 }
688
689 /*
690 * If the pager wants us to restart
691 * at the top of the chain,
692 * typically because it has moved the
693 * page to another pager, then do so.
694 */
695
696 if (m->restart) {
697 #if TRACEFAULTPAGE
698 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
699 #endif
700 VM_PAGE_FREE(m);
701 vm_fault_cleanup(object, first_m);
702 thread_interrupt_level(interruptible_state);
703 return(VM_FAULT_RETRY);
704 }
705
706 /*
707 * If the page isn't busy, but is absent,
708 * then it was deemed "unavailable".
709 */
710
711 if (m->absent) {
712 /*
713 * Remove the non-existent page (unless it's
714 * in the top object) and move on down to the
715 * next object (if there is one).
716 */
717 #if TRACEFAULTPAGE
718 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow); /* (TEST/DEBUG) */
719 #endif
720
721 next_object = object->shadow;
722 if (next_object == VM_OBJECT_NULL) {
723 vm_page_t real_m;
724
725 assert(!must_be_resident);
726
727 if (object->shadow_severed) {
728 vm_fault_cleanup(
729 object, first_m);
730 thread_interrupt_level(interruptible_state);
731 return VM_FAULT_MEMORY_ERROR;
732 }
733
734 /*
735 * Absent page at bottom of shadow
736 * chain; zero fill the page we left
737 * busy in the first object, and flush
738 * the absent page. But first we
739 * need to allocate a real page.
740 */
741 if (VM_PAGE_THROTTLED() ||
742 (real_m = vm_page_grab())
743 == VM_PAGE_NULL) {
744 vm_fault_cleanup(
745 object, first_m);
746 thread_interrupt_level(
747 interruptible_state);
748 return(
749 VM_FAULT_MEMORY_SHORTAGE);
750 }
751
752 /*
753 * are we protecting the system from
754 * backing store exhaustion. If so
755 * sleep unless we are privileged.
756 */
757
758 if(vm_backing_store_low) {
759 if(!(current_task()->priv_flags
760 & VM_BACKING_STORE_PRIV)) {
761 assert_wait((event_t)
762 &vm_backing_store_low,
763 THREAD_UNINT);
764 vm_fault_cleanup(object,
765 first_m);
766 thread_block(THREAD_CONTINUE_NULL);
767 thread_interrupt_level(
768 interruptible_state);
769 return(VM_FAULT_RETRY);
770 }
771 }
772
773
774 XPR(XPR_VM_FAULT,
775 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
776 (integer_t)object, offset,
777 (integer_t)m,
778 (integer_t)first_object, 0);
779 if (object != first_object) {
780 VM_PAGE_FREE(m);
781 vm_object_paging_end(object);
782 vm_object_unlock(object);
783 object = first_object;
784 offset = first_offset;
785 m = first_m;
786 first_m = VM_PAGE_NULL;
787 vm_object_lock(object);
788 }
789
790 VM_PAGE_FREE(m);
791 assert(real_m->busy);
792 vm_page_insert(real_m, object, offset);
793 m = real_m;
794
795 /*
796 * Drop the lock while zero filling
797 * page. Then break because this
798 * is the page we wanted. Checking
799 * the page lock is a waste of time;
800 * this page was either absent or
801 * newly allocated -- in both cases
802 * it can't be page locked by a pager.
803 */
804 m->no_isync = FALSE;
805
806 if (!no_zero_fill) {
807 vm_object_unlock(object);
808 vm_page_zero_fill(m);
809 vm_object_lock(object);
810
811 if (type_of_fault)
812 *type_of_fault = DBG_ZERO_FILL_FAULT;
813 VM_STAT(zero_fill_count++);
814 }
815 if (bumped_pagein == TRUE) {
816 VM_STAT(pageins--);
817 current_task()->pageins--;
818 }
819 vm_page_lock_queues();
820 VM_PAGE_QUEUES_REMOVE(m);
821 m->page_ticket = vm_page_ticket;
822 assert(!m->laundry);
823 assert(m->object != kernel_object);
824 assert(m->pageq.next == NULL &&
825 m->pageq.prev == NULL);
826 if(m->object->size > 0x200000) {
827 m->zero_fill = TRUE;
828 /* depends on the queues lock */
829 vm_zf_count += 1;
830 queue_enter(&vm_page_queue_zf,
831 m, vm_page_t, pageq);
832 } else {
833 queue_enter(
834 &vm_page_queue_inactive,
835 m, vm_page_t, pageq);
836 }
837 vm_page_ticket_roll++;
838 if(vm_page_ticket_roll ==
839 VM_PAGE_TICKETS_IN_ROLL) {
840 vm_page_ticket_roll = 0;
841 if(vm_page_ticket ==
842 VM_PAGE_TICKET_ROLL_IDS)
843 vm_page_ticket= 0;
844 else
845 vm_page_ticket++;
846 }
847 m->inactive = TRUE;
848 vm_page_inactive_count++;
849 vm_page_unlock_queues();
850 break;
851 } else {
852 if (must_be_resident) {
853 vm_object_paging_end(object);
854 } else if (object != first_object) {
855 vm_object_paging_end(object);
856 VM_PAGE_FREE(m);
857 } else {
858 first_m = m;
859 m->absent = FALSE;
860 m->unusual = FALSE;
861 vm_object_absent_release(object);
862 m->busy = TRUE;
863
864 vm_page_lock_queues();
865 VM_PAGE_QUEUES_REMOVE(m);
866 vm_page_unlock_queues();
867 }
868 XPR(XPR_VM_FAULT,
869 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
870 (integer_t)object, offset,
871 (integer_t)next_object,
872 offset+object->shadow_offset,0);
873 offset += object->shadow_offset;
874 hi_offset += object->shadow_offset;
875 lo_offset += object->shadow_offset;
876 access_required = VM_PROT_READ;
877 vm_object_lock(next_object);
878 vm_object_unlock(object);
879 object = next_object;
880 vm_object_paging_begin(object);
881 continue;
882 }
883 }
884
885 if ((m->cleaning)
886 && ((object != first_object) ||
887 (object->copy != VM_OBJECT_NULL))
888 && (fault_type & VM_PROT_WRITE)) {
889 /*
890 * This is a copy-on-write fault that will
891 * cause us to revoke access to this page, but
892 * this page is in the process of being cleaned
893 * in a clustered pageout. We must wait until
894 * the cleaning operation completes before
895 * revoking access to the original page,
896 * otherwise we might attempt to remove a
897 * wired mapping.
898 */
899 #if TRACEFAULTPAGE
900 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset); /* (TEST/DEBUG) */
901 #endif
902 XPR(XPR_VM_FAULT,
903 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
904 (integer_t)object, offset,
905 (integer_t)m, 0, 0);
906 /* take an extra ref so that object won't die */
907 assert(object->ref_count > 0);
908 object->ref_count++;
909 vm_object_res_reference(object);
910 vm_fault_cleanup(object, first_m);
911 counter(c_vm_fault_page_block_backoff_kernel++);
912 vm_object_lock(object);
913 assert(object->ref_count > 0);
914 m = vm_page_lookup(object, offset);
915 if (m != VM_PAGE_NULL && m->cleaning) {
916 PAGE_ASSERT_WAIT(m, interruptible);
917 vm_object_unlock(object);
918 wait_result = thread_block(THREAD_CONTINUE_NULL);
919 vm_object_deallocate(object);
920 goto backoff;
921 } else {
922 vm_object_unlock(object);
923 vm_object_deallocate(object);
924 thread_interrupt_level(interruptible_state);
925 return VM_FAULT_RETRY;
926 }
927 }
928
929 /*
930 * If the desired access to this page has
931 * been locked out, request that it be unlocked.
932 */
933
934 if (access_required & m->page_lock) {
935 if ((access_required & m->unlock_request) != access_required) {
936 vm_prot_t new_unlock_request;
937 kern_return_t rc;
938
939 #if TRACEFAULTPAGE
940 dbgTrace(0xBEEF000A, (unsigned int) m, (unsigned int) object->pager_ready); /* (TEST/DEBUG) */
941 #endif
942 if (!object->pager_ready) {
943 XPR(XPR_VM_FAULT,
944 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
945 access_required,
946 (integer_t)object, offset,
947 (integer_t)m, 0);
948 /* take an extra ref */
949 assert(object->ref_count > 0);
950 object->ref_count++;
951 vm_object_res_reference(object);
952 vm_fault_cleanup(object,
953 first_m);
954 counter(c_vm_fault_page_block_backoff_kernel++);
955 vm_object_lock(object);
956 assert(object->ref_count > 0);
957 if (!object->pager_ready) {
958 wait_result = vm_object_assert_wait(
959 object,
960 VM_OBJECT_EVENT_PAGER_READY,
961 interruptible);
962 vm_object_unlock(object);
963 if (wait_result == THREAD_WAITING)
964 wait_result = thread_block(THREAD_CONTINUE_NULL);
965 vm_object_deallocate(object);
966 goto backoff;
967 } else {
968 vm_object_unlock(object);
969 vm_object_deallocate(object);
970 thread_interrupt_level(interruptible_state);
971 return VM_FAULT_RETRY;
972 }
973 }
974
975 new_unlock_request = m->unlock_request =
976 (access_required | m->unlock_request);
977 vm_object_unlock(object);
978 XPR(XPR_VM_FAULT,
979 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
980 (integer_t)object, offset,
981 (integer_t)m, new_unlock_request, 0);
982 if ((rc = memory_object_data_unlock(
983 object->pager,
984 offset + object->paging_offset,
985 PAGE_SIZE,
986 new_unlock_request))
987 != KERN_SUCCESS) {
988 if (vm_fault_debug)
989 printf("vm_fault: memory_object_data_unlock failed\n");
990 vm_object_lock(object);
991 vm_fault_cleanup(object, first_m);
992 thread_interrupt_level(interruptible_state);
993 return((rc == MACH_SEND_INTERRUPTED) ?
994 VM_FAULT_INTERRUPTED :
995 VM_FAULT_MEMORY_ERROR);
996 }
997 vm_object_lock(object);
998 continue;
999 }
1000
1001 XPR(XPR_VM_FAULT,
1002 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
1003 access_required, (integer_t)object,
1004 offset, (integer_t)m, 0);
1005 /* take an extra ref so object won't die */
1006 assert(object->ref_count > 0);
1007 object->ref_count++;
1008 vm_object_res_reference(object);
1009 vm_fault_cleanup(object, first_m);
1010 counter(c_vm_fault_page_block_backoff_kernel++);
1011 vm_object_lock(object);
1012 assert(object->ref_count > 0);
1013 m = vm_page_lookup(object, offset);
1014 if (m != VM_PAGE_NULL &&
1015 (access_required & m->page_lock) &&
1016 !((access_required & m->unlock_request) != access_required)) {
1017 PAGE_ASSERT_WAIT(m, interruptible);
1018 vm_object_unlock(object);
1019 wait_result = thread_block(THREAD_CONTINUE_NULL);
1020 vm_object_deallocate(object);
1021 goto backoff;
1022 } else {
1023 vm_object_unlock(object);
1024 vm_object_deallocate(object);
1025 thread_interrupt_level(interruptible_state);
1026 return VM_FAULT_RETRY;
1027 }
1028 }
1029 /*
1030 * We mark the page busy and leave it on
1031 * the pageout queues. If the pageout
1032 * deamon comes across it, then it will
1033 * remove the page.
1034 */
1035
1036 #if TRACEFAULTPAGE
1037 dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1038 #endif
1039
1040 #if !VM_FAULT_STATIC_CONFIG
1041 if (!software_reference_bits) {
1042 vm_page_lock_queues();
1043 if (m->inactive)
1044 vm_stat.reactivations++;
1045
1046 VM_PAGE_QUEUES_REMOVE(m);
1047 vm_page_unlock_queues();
1048 }
1049 #endif
1050 XPR(XPR_VM_FAULT,
1051 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
1052 (integer_t)object, offset, (integer_t)m, 0, 0);
1053 assert(!m->busy);
1054 m->busy = TRUE;
1055 assert(!m->absent);
1056 break;
1057 }
1058
1059 look_for_page =
1060 (object->pager_created) &&
1061 LOOK_FOR(object, offset) &&
1062 (!data_supply);
1063
1064 #if TRACEFAULTPAGE
1065 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object); /* (TEST/DEBUG) */
1066 #endif
1067 if ((look_for_page || (object == first_object))
1068 && !must_be_resident
1069 && !(object->phys_contiguous)) {
1070 /*
1071 * Allocate a new page for this object/offset
1072 * pair.
1073 */
1074
1075 m = vm_page_grab_fictitious();
1076 #if TRACEFAULTPAGE
1077 dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
1078 #endif
1079 if (m == VM_PAGE_NULL) {
1080 vm_fault_cleanup(object, first_m);
1081 thread_interrupt_level(interruptible_state);
1082 return(VM_FAULT_FICTITIOUS_SHORTAGE);
1083 }
1084 vm_page_insert(m, object, offset);
1085 }
1086
1087 if ((look_for_page && !must_be_resident)) {
1088 kern_return_t rc;
1089
1090 /*
1091 * If the memory manager is not ready, we
1092 * cannot make requests.
1093 */
1094 if (!object->pager_ready) {
1095 #if TRACEFAULTPAGE
1096 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
1097 #endif
1098 if(m != VM_PAGE_NULL)
1099 VM_PAGE_FREE(m);
1100 XPR(XPR_VM_FAULT,
1101 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
1102 (integer_t)object, offset, 0, 0, 0);
1103 /* take an extra ref so object won't die */
1104 assert(object->ref_count > 0);
1105 object->ref_count++;
1106 vm_object_res_reference(object);
1107 vm_fault_cleanup(object, first_m);
1108 counter(c_vm_fault_page_block_backoff_kernel++);
1109 vm_object_lock(object);
1110 assert(object->ref_count > 0);
1111 if (!object->pager_ready) {
1112 wait_result = vm_object_assert_wait(object,
1113 VM_OBJECT_EVENT_PAGER_READY,
1114 interruptible);
1115 vm_object_unlock(object);
1116 if (wait_result == THREAD_WAITING)
1117 wait_result = thread_block(THREAD_CONTINUE_NULL);
1118 vm_object_deallocate(object);
1119 goto backoff;
1120 } else {
1121 vm_object_unlock(object);
1122 vm_object_deallocate(object);
1123 thread_interrupt_level(interruptible_state);
1124 return VM_FAULT_RETRY;
1125 }
1126 }
1127
1128 if(object->phys_contiguous) {
1129 if(m != VM_PAGE_NULL) {
1130 VM_PAGE_FREE(m);
1131 m = VM_PAGE_NULL;
1132 }
1133 goto no_clustering;
1134 }
1135 if (object->internal) {
1136 /*
1137 * Requests to the default pager
1138 * must reserve a real page in advance,
1139 * because the pager's data-provided
1140 * won't block for pages. IMPORTANT:
1141 * this acts as a throttling mechanism
1142 * for data_requests to the default
1143 * pager.
1144 */
1145
1146 #if TRACEFAULTPAGE
1147 dbgTrace(0xBEEF000F, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1148 #endif
1149 if (m->fictitious && !vm_page_convert(m)) {
1150 VM_PAGE_FREE(m);
1151 vm_fault_cleanup(object, first_m);
1152 thread_interrupt_level(interruptible_state);
1153 return(VM_FAULT_MEMORY_SHORTAGE);
1154 }
1155 } else if (object->absent_count >
1156 vm_object_absent_max) {
1157 /*
1158 * If there are too many outstanding page
1159 * requests pending on this object, we
1160 * wait for them to be resolved now.
1161 */
1162
1163 #if TRACEFAULTPAGE
1164 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1165 #endif
1166 if(m != VM_PAGE_NULL)
1167 VM_PAGE_FREE(m);
1168 /* take an extra ref so object won't die */
1169 assert(object->ref_count > 0);
1170 object->ref_count++;
1171 vm_object_res_reference(object);
1172 vm_fault_cleanup(object, first_m);
1173 counter(c_vm_fault_page_block_backoff_kernel++);
1174 vm_object_lock(object);
1175 assert(object->ref_count > 0);
1176 if (object->absent_count > vm_object_absent_max) {
1177 vm_object_absent_assert_wait(object,
1178 interruptible);
1179 vm_object_unlock(object);
1180 wait_result = thread_block(THREAD_CONTINUE_NULL);
1181 vm_object_deallocate(object);
1182 goto backoff;
1183 } else {
1184 vm_object_unlock(object);
1185 vm_object_deallocate(object);
1186 thread_interrupt_level(interruptible_state);
1187 return VM_FAULT_RETRY;
1188 }
1189 }
1190
1191 /*
1192 * Indicate that the page is waiting for data
1193 * from the memory manager.
1194 */
1195
1196 if(m != VM_PAGE_NULL) {
1197
1198 m->list_req_pending = TRUE;
1199 m->absent = TRUE;
1200 m->unusual = TRUE;
1201 object->absent_count++;
1202
1203 }
1204
1205 no_clustering:
1206 cluster_start = offset;
1207 length = PAGE_SIZE;
1208
1209 /*
1210 * lengthen the cluster by the pages in the working set
1211 */
1212 if((map != NULL) &&
1213 (current_task()->dynamic_working_set != 0)) {
1214 cluster_end = cluster_start + length;
1215 /* tws values for start and end are just a
1216 * suggestions. Therefore, as long as
1217 * build_cluster does not use pointers or
1218 * take action based on values that
1219 * could be affected by re-entrance we
1220 * do not need to take the map lock.
1221 */
1222 cluster_end = offset + PAGE_SIZE_64;
1223 tws_build_cluster(
1224 current_task()->dynamic_working_set,
1225 object, &cluster_start,
1226 &cluster_end, 0x40000);
1227 length = cluster_end - cluster_start;
1228 }
1229 #if TRACEFAULTPAGE
1230 dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0); /* (TEST/DEBUG) */
1231 #endif
1232 /*
1233 * We have a busy page, so we can
1234 * release the object lock.
1235 */
1236 vm_object_unlock(object);
1237
1238 /*
1239 * Call the memory manager to retrieve the data.
1240 */
1241
1242 if (type_of_fault)
1243 *type_of_fault = ((int)length << 8) | DBG_PAGEIN_FAULT;
1244 VM_STAT(pageins++);
1245 current_task()->pageins++;
1246 bumped_pagein = TRUE;
1247
1248 /*
1249 * If this object uses a copy_call strategy,
1250 * and we are interested in a copy of this object
1251 * (having gotten here only by following a
1252 * shadow chain), then tell the memory manager
1253 * via a flag added to the desired_access
1254 * parameter, so that it can detect a race
1255 * between our walking down the shadow chain
1256 * and its pushing pages up into a copy of
1257 * the object that it manages.
1258 */
1259
1260 if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL &&
1261 object != first_object) {
1262 wants_copy_flag = VM_PROT_WANTS_COPY;
1263 } else {
1264 wants_copy_flag = VM_PROT_NONE;
1265 }
1266
1267 XPR(XPR_VM_FAULT,
1268 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1269 (integer_t)object, offset, (integer_t)m,
1270 access_required | wants_copy_flag, 0);
1271
1272 rc = memory_object_data_request(object->pager,
1273 cluster_start + object->paging_offset,
1274 length,
1275 access_required | wants_copy_flag);
1276
1277
1278 #if TRACEFAULTPAGE
1279 dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
1280 #endif
1281 if (rc != KERN_SUCCESS) {
1282 if (rc != MACH_SEND_INTERRUPTED
1283 && vm_fault_debug)
1284 printf("%s(0x%x, 0x%xll, 0x%xll, 0x%x) failed, rc=%d\n",
1285 "memory_object_data_request",
1286 object->pager,
1287 cluster_start + object->paging_offset,
1288 length, access_required, rc);
1289 /*
1290 * Don't want to leave a busy page around,
1291 * but the data request may have blocked,
1292 * so check if it's still there and busy.
1293 */
1294 if(!object->phys_contiguous) {
1295 vm_object_lock(object);
1296 for (; length; length -= PAGE_SIZE,
1297 cluster_start += PAGE_SIZE_64) {
1298 vm_page_t p;
1299 if ((p = vm_page_lookup(object,
1300 cluster_start))
1301 && p->absent && p->busy
1302 && p != first_m) {
1303 VM_PAGE_FREE(p);
1304 }
1305 }
1306 }
1307 vm_fault_cleanup(object, first_m);
1308 thread_interrupt_level(interruptible_state);
1309 return((rc == MACH_SEND_INTERRUPTED) ?
1310 VM_FAULT_INTERRUPTED :
1311 VM_FAULT_MEMORY_ERROR);
1312 }
1313
1314 vm_object_lock(object);
1315 if ((interruptible != THREAD_UNINT) &&
1316 (current_thread()->state & TH_ABORT)) {
1317 vm_fault_cleanup(object, first_m);
1318 thread_interrupt_level(interruptible_state);
1319 return(VM_FAULT_INTERRUPTED);
1320 }
1321 if (m == VM_PAGE_NULL &&
1322 object->phys_contiguous) {
1323 /*
1324 * No page here means that the object we
1325 * initially looked up was "physically
1326 * contiguous" (i.e. device memory). However,
1327 * with Virtual VRAM, the object might not
1328 * be backed by that device memory anymore,
1329 * so we're done here only if the object is
1330 * still "phys_contiguous".
1331 * Otherwise, if the object is no longer
1332 * "phys_contiguous", we need to retry the
1333 * page fault against the object's new backing
1334 * store (different memory object).
1335 */
1336 break;
1337 }
1338
1339 /*
1340 * Retry with same object/offset, since new data may
1341 * be in a different page (i.e., m is meaningless at
1342 * this point).
1343 */
1344 continue;
1345 }
1346
1347 /*
1348 * The only case in which we get here is if
1349 * object has no pager (or unwiring). If the pager doesn't
1350 * have the page this is handled in the m->absent case above
1351 * (and if you change things here you should look above).
1352 */
1353 #if TRACEFAULTPAGE
1354 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1355 #endif
1356 if (object == first_object)
1357 first_m = m;
1358 else
1359 assert(m == VM_PAGE_NULL);
1360
1361 XPR(XPR_VM_FAULT,
1362 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1363 (integer_t)object, offset, (integer_t)m,
1364 (integer_t)object->shadow, 0);
1365 /*
1366 * Move on to the next object. Lock the next
1367 * object before unlocking the current one.
1368 */
1369 next_object = object->shadow;
1370 if (next_object == VM_OBJECT_NULL) {
1371 assert(!must_be_resident);
1372 /*
1373 * If there's no object left, fill the page
1374 * in the top object with zeros. But first we
1375 * need to allocate a real page.
1376 */
1377
1378 if (object != first_object) {
1379 vm_object_paging_end(object);
1380 vm_object_unlock(object);
1381
1382 object = first_object;
1383 offset = first_offset;
1384 vm_object_lock(object);
1385 }
1386
1387 m = first_m;
1388 assert(m->object == object);
1389 first_m = VM_PAGE_NULL;
1390
1391 if(m == VM_PAGE_NULL) {
1392 m = vm_page_grab();
1393 if (m == VM_PAGE_NULL) {
1394 vm_fault_cleanup(
1395 object, VM_PAGE_NULL);
1396 thread_interrupt_level(
1397 interruptible_state);
1398 return(VM_FAULT_MEMORY_SHORTAGE);
1399 }
1400 vm_page_insert(
1401 m, object, offset);
1402 }
1403
1404 if (object->shadow_severed) {
1405 VM_PAGE_FREE(m);
1406 vm_fault_cleanup(object, VM_PAGE_NULL);
1407 thread_interrupt_level(interruptible_state);
1408 return VM_FAULT_MEMORY_ERROR;
1409 }
1410
1411 /*
1412 * are we protecting the system from
1413 * backing store exhaustion. If so
1414 * sleep unless we are privileged.
1415 */
1416
1417 if(vm_backing_store_low) {
1418 if(!(current_task()->priv_flags
1419 & VM_BACKING_STORE_PRIV)) {
1420 assert_wait((event_t)
1421 &vm_backing_store_low,
1422 THREAD_UNINT);
1423 VM_PAGE_FREE(m);
1424 vm_fault_cleanup(object, VM_PAGE_NULL);
1425 thread_block(THREAD_CONTINUE_NULL);
1426 thread_interrupt_level(
1427 interruptible_state);
1428 return(VM_FAULT_RETRY);
1429 }
1430 }
1431
1432 if (VM_PAGE_THROTTLED() ||
1433 (m->fictitious && !vm_page_convert(m))) {
1434 VM_PAGE_FREE(m);
1435 vm_fault_cleanup(object, VM_PAGE_NULL);
1436 thread_interrupt_level(interruptible_state);
1437 return(VM_FAULT_MEMORY_SHORTAGE);
1438 }
1439 m->no_isync = FALSE;
1440
1441 if (!no_zero_fill) {
1442 vm_object_unlock(object);
1443 vm_page_zero_fill(m);
1444 vm_object_lock(object);
1445
1446 if (type_of_fault)
1447 *type_of_fault = DBG_ZERO_FILL_FAULT;
1448 VM_STAT(zero_fill_count++);
1449 }
1450 if (bumped_pagein == TRUE) {
1451 VM_STAT(pageins--);
1452 current_task()->pageins--;
1453 }
1454 vm_page_lock_queues();
1455 VM_PAGE_QUEUES_REMOVE(m);
1456 assert(!m->laundry);
1457 assert(m->object != kernel_object);
1458 assert(m->pageq.next == NULL &&
1459 m->pageq.prev == NULL);
1460 if(m->object->size > 0x200000) {
1461 m->zero_fill = TRUE;
1462 /* depends on the queues lock */
1463 vm_zf_count += 1;
1464 queue_enter(&vm_page_queue_zf,
1465 m, vm_page_t, pageq);
1466 } else {
1467 queue_enter(
1468 &vm_page_queue_inactive,
1469 m, vm_page_t, pageq);
1470 }
1471 m->page_ticket = vm_page_ticket;
1472 vm_page_ticket_roll++;
1473 if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
1474 vm_page_ticket_roll = 0;
1475 if(vm_page_ticket ==
1476 VM_PAGE_TICKET_ROLL_IDS)
1477 vm_page_ticket= 0;
1478 else
1479 vm_page_ticket++;
1480 }
1481 m->inactive = TRUE;
1482 vm_page_inactive_count++;
1483 vm_page_unlock_queues();
1484 #if 0
1485 pmap_clear_modify(m->phys_page);
1486 #endif
1487 break;
1488 }
1489 else {
1490 if ((object != first_object) || must_be_resident)
1491 vm_object_paging_end(object);
1492 offset += object->shadow_offset;
1493 hi_offset += object->shadow_offset;
1494 lo_offset += object->shadow_offset;
1495 access_required = VM_PROT_READ;
1496 vm_object_lock(next_object);
1497 vm_object_unlock(object);
1498 object = next_object;
1499 vm_object_paging_begin(object);
1500 }
1501 }
1502
1503 /*
1504 * PAGE HAS BEEN FOUND.
1505 *
1506 * This page (m) is:
1507 * busy, so that we can play with it;
1508 * not absent, so that nobody else will fill it;
1509 * possibly eligible for pageout;
1510 *
1511 * The top-level page (first_m) is:
1512 * VM_PAGE_NULL if the page was found in the
1513 * top-level object;
1514 * busy, not absent, and ineligible for pageout.
1515 *
1516 * The current object (object) is locked. A paging
1517 * reference is held for the current and top-level
1518 * objects.
1519 */
1520
1521 #if TRACEFAULTPAGE
1522 dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1523 #endif
1524 #if EXTRA_ASSERTIONS
1525 if(m != VM_PAGE_NULL) {
1526 assert(m->busy && !m->absent);
1527 assert((first_m == VM_PAGE_NULL) ||
1528 (first_m->busy && !first_m->absent &&
1529 !first_m->active && !first_m->inactive));
1530 }
1531 #endif /* EXTRA_ASSERTIONS */
1532
1533 /*
1534 * ENCRYPTED SWAP:
1535 * If we found a page, we must have decrypted it before we
1536 * get here...
1537 */
1538 if (m != VM_PAGE_NULL) {
1539 ASSERT_PAGE_DECRYPTED(m);
1540 }
1541
1542 XPR(XPR_VM_FAULT,
1543 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1544 (integer_t)object, offset, (integer_t)m,
1545 (integer_t)first_object, (integer_t)first_m);
1546 /*
1547 * If the page is being written, but isn't
1548 * already owned by the top-level object,
1549 * we have to copy it into a new page owned
1550 * by the top-level object.
1551 */
1552
1553 if ((object != first_object) && (m != VM_PAGE_NULL)) {
1554 /*
1555 * We only really need to copy if we
1556 * want to write it.
1557 */
1558
1559 #if TRACEFAULTPAGE
1560 dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1561 #endif
1562 if (fault_type & VM_PROT_WRITE) {
1563 vm_page_t copy_m;
1564
1565 assert(!must_be_resident);
1566
1567 /*
1568 * are we protecting the system from
1569 * backing store exhaustion. If so
1570 * sleep unless we are privileged.
1571 */
1572
1573 if(vm_backing_store_low) {
1574 if(!(current_task()->priv_flags
1575 & VM_BACKING_STORE_PRIV)) {
1576 assert_wait((event_t)
1577 &vm_backing_store_low,
1578 THREAD_UNINT);
1579 RELEASE_PAGE(m);
1580 vm_fault_cleanup(object, first_m);
1581 thread_block(THREAD_CONTINUE_NULL);
1582 thread_interrupt_level(
1583 interruptible_state);
1584 return(VM_FAULT_RETRY);
1585 }
1586 }
1587
1588 /*
1589 * If we try to collapse first_object at this
1590 * point, we may deadlock when we try to get
1591 * the lock on an intermediate object (since we
1592 * have the bottom object locked). We can't
1593 * unlock the bottom object, because the page
1594 * we found may move (by collapse) if we do.
1595 *
1596 * Instead, we first copy the page. Then, when
1597 * we have no more use for the bottom object,
1598 * we unlock it and try to collapse.
1599 *
1600 * Note that we copy the page even if we didn't
1601 * need to... that's the breaks.
1602 */
1603
1604 /*
1605 * Allocate a page for the copy
1606 */
1607 copy_m = vm_page_grab();
1608 if (copy_m == VM_PAGE_NULL) {
1609 RELEASE_PAGE(m);
1610 vm_fault_cleanup(object, first_m);
1611 thread_interrupt_level(interruptible_state);
1612 return(VM_FAULT_MEMORY_SHORTAGE);
1613 }
1614
1615
1616 XPR(XPR_VM_FAULT,
1617 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1618 (integer_t)object, offset,
1619 (integer_t)m, (integer_t)copy_m, 0);
1620 vm_page_copy(m, copy_m);
1621
1622 /*
1623 * If another map is truly sharing this
1624 * page with us, we have to flush all
1625 * uses of the original page, since we
1626 * can't distinguish those which want the
1627 * original from those which need the
1628 * new copy.
1629 *
1630 * XXXO If we know that only one map has
1631 * access to this page, then we could
1632 * avoid the pmap_disconnect() call.
1633 */
1634
1635 vm_page_lock_queues();
1636 assert(!m->cleaning);
1637 pmap_disconnect(m->phys_page);
1638 vm_page_deactivate(m);
1639 copy_m->dirty = TRUE;
1640 /*
1641 * Setting reference here prevents this fault from
1642 * being counted as a (per-thread) reactivate as well
1643 * as a copy-on-write.
1644 */
1645 first_m->reference = TRUE;
1646 vm_page_unlock_queues();
1647
1648 /*
1649 * We no longer need the old page or object.
1650 */
1651
1652 PAGE_WAKEUP_DONE(m);
1653 vm_object_paging_end(object);
1654 vm_object_unlock(object);
1655
1656 if (type_of_fault)
1657 *type_of_fault = DBG_COW_FAULT;
1658 VM_STAT(cow_faults++);
1659 current_task()->cow_faults++;
1660 object = first_object;
1661 offset = first_offset;
1662
1663 vm_object_lock(object);
1664 VM_PAGE_FREE(first_m);
1665 first_m = VM_PAGE_NULL;
1666 assert(copy_m->busy);
1667 vm_page_insert(copy_m, object, offset);
1668 m = copy_m;
1669
1670 /*
1671 * Now that we've gotten the copy out of the
1672 * way, let's try to collapse the top object.
1673 * But we have to play ugly games with
1674 * paging_in_progress to do that...
1675 */
1676
1677 vm_object_paging_end(object);
1678 vm_object_collapse(object, offset, TRUE);
1679 vm_object_paging_begin(object);
1680
1681 }
1682 else {
1683 *protection &= (~VM_PROT_WRITE);
1684 }
1685 }
1686
1687 /*
1688 * Now check whether the page needs to be pushed into the
1689 * copy object. The use of asymmetric copy on write for
1690 * shared temporary objects means that we may do two copies to
1691 * satisfy the fault; one above to get the page from a
1692 * shadowed object, and one here to push it into the copy.
1693 */
1694
1695 while ((copy_object = first_object->copy) != VM_OBJECT_NULL &&
1696 (m!= VM_PAGE_NULL)) {
1697 vm_object_offset_t copy_offset;
1698 vm_page_t copy_m;
1699
1700 #if TRACEFAULTPAGE
1701 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1702 #endif
1703 /*
1704 * If the page is being written, but hasn't been
1705 * copied to the copy-object, we have to copy it there.
1706 */
1707
1708 if ((fault_type & VM_PROT_WRITE) == 0) {
1709 *protection &= ~VM_PROT_WRITE;
1710 break;
1711 }
1712
1713 /*
1714 * If the page was guaranteed to be resident,
1715 * we must have already performed the copy.
1716 */
1717
1718 if (must_be_resident)
1719 break;
1720
1721 /*
1722 * Try to get the lock on the copy_object.
1723 */
1724 if (!vm_object_lock_try(copy_object)) {
1725 vm_object_unlock(object);
1726
1727 mutex_pause(); /* wait a bit */
1728
1729 vm_object_lock(object);
1730 continue;
1731 }
1732
1733 /*
1734 * Make another reference to the copy-object,
1735 * to keep it from disappearing during the
1736 * copy.
1737 */
1738 assert(copy_object->ref_count > 0);
1739 copy_object->ref_count++;
1740 VM_OBJ_RES_INCR(copy_object);
1741
1742 /*
1743 * Does the page exist in the copy?
1744 */
1745 copy_offset = first_offset - copy_object->shadow_offset;
1746 if (copy_object->size <= copy_offset)
1747 /*
1748 * Copy object doesn't cover this page -- do nothing.
1749 */
1750 ;
1751 else if ((copy_m =
1752 vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
1753 /* Page currently exists in the copy object */
1754 if (copy_m->busy) {
1755 /*
1756 * If the page is being brought
1757 * in, wait for it and then retry.
1758 */
1759 RELEASE_PAGE(m);
1760 /* take an extra ref so object won't die */
1761 assert(copy_object->ref_count > 0);
1762 copy_object->ref_count++;
1763 vm_object_res_reference(copy_object);
1764 vm_object_unlock(copy_object);
1765 vm_fault_cleanup(object, first_m);
1766 counter(c_vm_fault_page_block_backoff_kernel++);
1767 vm_object_lock(copy_object);
1768 assert(copy_object->ref_count > 0);
1769 VM_OBJ_RES_DECR(copy_object);
1770 copy_object->ref_count--;
1771 assert(copy_object->ref_count > 0);
1772 copy_m = vm_page_lookup(copy_object, copy_offset);
1773 /*
1774 * ENCRYPTED SWAP:
1775 * it's OK if the "copy_m" page is encrypted,
1776 * because we're not moving it nor handling its
1777 * contents.
1778 */
1779 if (copy_m != VM_PAGE_NULL && copy_m->busy) {
1780 PAGE_ASSERT_WAIT(copy_m, interruptible);
1781 vm_object_unlock(copy_object);
1782 wait_result = thread_block(THREAD_CONTINUE_NULL);
1783 vm_object_deallocate(copy_object);
1784 goto backoff;
1785 } else {
1786 vm_object_unlock(copy_object);
1787 vm_object_deallocate(copy_object);
1788 thread_interrupt_level(interruptible_state);
1789 return VM_FAULT_RETRY;
1790 }
1791 }
1792 }
1793 else if (!PAGED_OUT(copy_object, copy_offset)) {
1794 /*
1795 * If PAGED_OUT is TRUE, then the page used to exist
1796 * in the copy-object, and has already been paged out.
1797 * We don't need to repeat this. If PAGED_OUT is
1798 * FALSE, then either we don't know (!pager_created,
1799 * for example) or it hasn't been paged out.
1800 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1801 * We must copy the page to the copy object.
1802 */
1803
1804 /*
1805 * are we protecting the system from
1806 * backing store exhaustion. If so
1807 * sleep unless we are privileged.
1808 */
1809
1810 if(vm_backing_store_low) {
1811 if(!(current_task()->priv_flags
1812 & VM_BACKING_STORE_PRIV)) {
1813 assert_wait((event_t)
1814 &vm_backing_store_low,
1815 THREAD_UNINT);
1816 RELEASE_PAGE(m);
1817 VM_OBJ_RES_DECR(copy_object);
1818 copy_object->ref_count--;
1819 assert(copy_object->ref_count > 0);
1820 vm_object_unlock(copy_object);
1821 vm_fault_cleanup(object, first_m);
1822 thread_block(THREAD_CONTINUE_NULL);
1823 thread_interrupt_level(
1824 interruptible_state);
1825 return(VM_FAULT_RETRY);
1826 }
1827 }
1828
1829 /*
1830 * Allocate a page for the copy
1831 */
1832 copy_m = vm_page_alloc(copy_object, copy_offset);
1833 if (copy_m == VM_PAGE_NULL) {
1834 RELEASE_PAGE(m);
1835 VM_OBJ_RES_DECR(copy_object);
1836 copy_object->ref_count--;
1837 assert(copy_object->ref_count > 0);
1838 vm_object_unlock(copy_object);
1839 vm_fault_cleanup(object, first_m);
1840 thread_interrupt_level(interruptible_state);
1841 return(VM_FAULT_MEMORY_SHORTAGE);
1842 }
1843
1844 /*
1845 * Must copy page into copy-object.
1846 */
1847
1848 vm_page_copy(m, copy_m);
1849
1850 /*
1851 * If the old page was in use by any users
1852 * of the copy-object, it must be removed
1853 * from all pmaps. (We can't know which
1854 * pmaps use it.)
1855 */
1856
1857 vm_page_lock_queues();
1858 assert(!m->cleaning);
1859 pmap_disconnect(m->phys_page);
1860 copy_m->dirty = TRUE;
1861 vm_page_unlock_queues();
1862
1863 /*
1864 * If there's a pager, then immediately
1865 * page out this page, using the "initialize"
1866 * option. Else, we use the copy.
1867 */
1868
1869 if
1870 #if MACH_PAGEMAP
1871 ((!copy_object->pager_created) ||
1872 vm_external_state_get(
1873 copy_object->existence_map, copy_offset)
1874 == VM_EXTERNAL_STATE_ABSENT)
1875 #else
1876 (!copy_object->pager_created)
1877 #endif
1878 {
1879 vm_page_lock_queues();
1880 vm_page_activate(copy_m);
1881 vm_page_unlock_queues();
1882 PAGE_WAKEUP_DONE(copy_m);
1883 }
1884 else {
1885 assert(copy_m->busy == TRUE);
1886
1887 /*
1888 * The page is already ready for pageout:
1889 * not on pageout queues and busy.
1890 * Unlock everything except the
1891 * copy_object itself.
1892 */
1893
1894 vm_object_unlock(object);
1895
1896 /*
1897 * Write the page to the copy-object,
1898 * flushing it from the kernel.
1899 */
1900
1901 vm_pageout_initialize_page(copy_m);
1902
1903 /*
1904 * Since the pageout may have
1905 * temporarily dropped the
1906 * copy_object's lock, we
1907 * check whether we'll have
1908 * to deallocate the hard way.
1909 */
1910
1911 if ((copy_object->shadow != object) ||
1912 (copy_object->ref_count == 1)) {
1913 vm_object_unlock(copy_object);
1914 vm_object_deallocate(copy_object);
1915 vm_object_lock(object);
1916 continue;
1917 }
1918
1919 /*
1920 * Pick back up the old object's
1921 * lock. [It is safe to do so,
1922 * since it must be deeper in the
1923 * object tree.]
1924 */
1925
1926 vm_object_lock(object);
1927 }
1928
1929 /*
1930 * Because we're pushing a page upward
1931 * in the object tree, we must restart
1932 * any faults that are waiting here.
1933 * [Note that this is an expansion of
1934 * PAGE_WAKEUP that uses the THREAD_RESTART
1935 * wait result]. Can't turn off the page's
1936 * busy bit because we're not done with it.
1937 */
1938
1939 if (m->wanted) {
1940 m->wanted = FALSE;
1941 thread_wakeup_with_result((event_t) m,
1942 THREAD_RESTART);
1943 }
1944 }
1945
1946 /*
1947 * The reference count on copy_object must be
1948 * at least 2: one for our extra reference,
1949 * and at least one from the outside world
1950 * (we checked that when we last locked
1951 * copy_object).
1952 */
1953 copy_object->ref_count--;
1954 assert(copy_object->ref_count > 0);
1955 VM_OBJ_RES_DECR(copy_object);
1956 vm_object_unlock(copy_object);
1957
1958 break;
1959 }
1960
1961 *result_page = m;
1962 *top_page = first_m;
1963
1964 XPR(XPR_VM_FAULT,
1965 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1966 (integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
1967 /*
1968 * If the page can be written, assume that it will be.
1969 * [Earlier, we restrict the permission to allow write
1970 * access only if the fault so required, so we don't
1971 * mark read-only data as dirty.]
1972 */
1973
1974
1975 if(m != VM_PAGE_NULL) {
1976 #if !VM_FAULT_STATIC_CONFIG
1977 if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE))
1978 m->dirty = TRUE;
1979 #endif
1980 if (vm_page_deactivate_behind)
1981 vm_fault_deactivate_behind(object, offset, behavior);
1982 } else {
1983 vm_object_unlock(object);
1984 }
1985 thread_interrupt_level(interruptible_state);
1986
1987 #if TRACEFAULTPAGE
1988 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0); /* (TEST/DEBUG) */
1989 #endif
1990 return(VM_FAULT_SUCCESS);
1991
1992 #if 0
1993 block_and_backoff:
1994 vm_fault_cleanup(object, first_m);
1995
1996 counter(c_vm_fault_page_block_backoff_kernel++);
1997 thread_block(THREAD_CONTINUE_NULL);
1998 #endif
1999
2000 backoff:
2001 thread_interrupt_level(interruptible_state);
2002 if (wait_result == THREAD_INTERRUPTED)
2003 return VM_FAULT_INTERRUPTED;
2004 return VM_FAULT_RETRY;
2005
2006 #undef RELEASE_PAGE
2007 }
2008
2009 /*
2010 * Routine: vm_fault_tws_insert
2011 * Purpose:
2012 * Add fault information to the task working set.
2013 * Implementation:
2014 * We always insert the base object/offset pair
2015 * rather the actual object/offset.
2016 * Assumptions:
2017 * Map and real_map locked.
2018 * Object locked and referenced.
2019 * Returns:
2020 * TRUE if startup file should be written.
2021 * With object locked and still referenced.
2022 * But we may drop the object lock temporarily.
2023 */
2024 static boolean_t
2025 vm_fault_tws_insert(
2026 vm_map_t map,
2027 vm_map_t real_map,
2028 vm_map_offset_t vaddr,
2029 vm_object_t object,
2030 vm_object_offset_t offset)
2031 {
2032 tws_hash_line_t line;
2033 task_t task;
2034 kern_return_t kr;
2035 boolean_t result = FALSE;
2036
2037 /* Avoid possible map lock deadlock issues */
2038 if (map == kernel_map || map == kalloc_map ||
2039 real_map == kernel_map || real_map == kalloc_map)
2040 return result;
2041
2042 task = current_task();
2043 if (task->dynamic_working_set != 0) {
2044 vm_object_t base_object;
2045 vm_object_t base_shadow;
2046 vm_object_offset_t base_offset;
2047 base_object = object;
2048 base_offset = offset;
2049 while ((base_shadow = base_object->shadow)) {
2050 vm_object_lock(base_shadow);
2051 vm_object_unlock(base_object);
2052 base_offset +=
2053 base_object->shadow_offset;
2054 base_object = base_shadow;
2055 }
2056 kr = tws_lookup(
2057 task->dynamic_working_set,
2058 base_offset, base_object,
2059 &line);
2060 if (kr == KERN_OPERATION_TIMED_OUT){
2061 result = TRUE;
2062 if (base_object != object) {
2063 vm_object_unlock(base_object);
2064 vm_object_lock(object);
2065 }
2066 } else if (kr != KERN_SUCCESS) {
2067 if(base_object != object)
2068 vm_object_reference_locked(base_object);
2069 kr = tws_insert(
2070 task->dynamic_working_set,
2071 base_offset, base_object,
2072 vaddr, real_map);
2073 if(base_object != object) {
2074 vm_object_unlock(base_object);
2075 vm_object_deallocate(base_object);
2076 }
2077 if(kr == KERN_NO_SPACE) {
2078 if (base_object == object)
2079 vm_object_unlock(object);
2080 tws_expand_working_set(
2081 task->dynamic_working_set,
2082 TWS_HASH_LINE_COUNT,
2083 FALSE);
2084 if (base_object == object)
2085 vm_object_lock(object);
2086 } else if(kr == KERN_OPERATION_TIMED_OUT) {
2087 result = TRUE;
2088 }
2089 if(base_object != object)
2090 vm_object_lock(object);
2091 } else if (base_object != object) {
2092 vm_object_unlock(base_object);
2093 vm_object_lock(object);
2094 }
2095 }
2096 return result;
2097 }
2098
2099 /*
2100 * Routine: vm_fault
2101 * Purpose:
2102 * Handle page faults, including pseudo-faults
2103 * used to change the wiring status of pages.
2104 * Returns:
2105 * Explicit continuations have been removed.
2106 * Implementation:
2107 * vm_fault and vm_fault_page save mucho state
2108 * in the moral equivalent of a closure. The state
2109 * structure is allocated when first entering vm_fault
2110 * and deallocated when leaving vm_fault.
2111 */
2112
2113 extern int _map_enter_debug;
2114
2115 kern_return_t
2116 vm_fault(
2117 vm_map_t map,
2118 vm_map_offset_t vaddr,
2119 vm_prot_t fault_type,
2120 boolean_t change_wiring,
2121 int interruptible,
2122 pmap_t caller_pmap,
2123 vm_map_offset_t caller_pmap_addr)
2124 {
2125 vm_map_version_t version; /* Map version for verificiation */
2126 boolean_t wired; /* Should mapping be wired down? */
2127 vm_object_t object; /* Top-level object */
2128 vm_object_offset_t offset; /* Top-level offset */
2129 vm_prot_t prot; /* Protection for mapping */
2130 vm_behavior_t behavior; /* Expected paging behavior */
2131 vm_map_offset_t lo_offset, hi_offset;
2132 vm_object_t old_copy_object; /* Saved copy object */
2133 vm_page_t result_page; /* Result of vm_fault_page */
2134 vm_page_t top_page; /* Placeholder page */
2135 kern_return_t kr;
2136
2137 register
2138 vm_page_t m; /* Fast access to result_page */
2139 kern_return_t error_code = 0; /* page error reasons */
2140 register
2141 vm_object_t cur_object;
2142 register
2143 vm_object_offset_t cur_offset;
2144 vm_page_t cur_m;
2145 vm_object_t new_object;
2146 int type_of_fault;
2147 vm_map_t real_map = map;
2148 vm_map_t original_map = map;
2149 pmap_t pmap = NULL;
2150 boolean_t interruptible_state;
2151 unsigned int cache_attr;
2152 int write_startup_file = 0;
2153 boolean_t need_activation;
2154 vm_prot_t original_fault_type;
2155
2156
2157 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_START,
2158 vaddr,
2159 0,
2160 0,
2161 0,
2162 0);
2163
2164 if (get_preemption_level() != 0) {
2165 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2166 vaddr,
2167 0,
2168 KERN_FAILURE,
2169 0,
2170 0);
2171
2172 return (KERN_FAILURE);
2173 }
2174
2175 interruptible_state = thread_interrupt_level(interruptible);
2176
2177 /*
2178 * assume we will hit a page in the cache
2179 * otherwise, explicitly override with
2180 * the real fault type once we determine it
2181 */
2182 type_of_fault = DBG_CACHE_HIT_FAULT;
2183
2184 VM_STAT(faults++);
2185 current_task()->faults++;
2186
2187 original_fault_type = fault_type;
2188
2189 RetryFault: ;
2190
2191 /*
2192 * Find the backing store object and offset into
2193 * it to begin the search.
2194 */
2195 fault_type = original_fault_type;
2196 map = original_map;
2197 vm_map_lock_read(map);
2198 kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version,
2199 &object, &offset,
2200 &prot, &wired,
2201 &behavior, &lo_offset, &hi_offset, &real_map);
2202
2203 //if (_map_enter_debug)printf("vm_map_lookup_locked(map=0x%x, addr=0x%llx, prot=%d wired=%d) = %d\n", map, vaddr, prot, wired, kr);
2204
2205 pmap = real_map->pmap;
2206
2207 if (kr != KERN_SUCCESS) {
2208 vm_map_unlock_read(map);
2209 goto done;
2210 }
2211
2212 /*
2213 * If the page is wired, we must fault for the current protection
2214 * value, to avoid further faults.
2215 */
2216
2217 if (wired)
2218 fault_type = prot | VM_PROT_WRITE;
2219
2220 #if VM_FAULT_CLASSIFY
2221 /*
2222 * Temporary data gathering code
2223 */
2224 vm_fault_classify(object, offset, fault_type);
2225 #endif
2226 /*
2227 * Fast fault code. The basic idea is to do as much as
2228 * possible while holding the map lock and object locks.
2229 * Busy pages are not used until the object lock has to
2230 * be dropped to do something (copy, zero fill, pmap enter).
2231 * Similarly, paging references aren't acquired until that
2232 * point, and object references aren't used.
2233 *
2234 * If we can figure out what to do
2235 * (zero fill, copy on write, pmap enter) while holding
2236 * the locks, then it gets done. Otherwise, we give up,
2237 * and use the original fault path (which doesn't hold
2238 * the map lock, and relies on busy pages).
2239 * The give up cases include:
2240 * - Have to talk to pager.
2241 * - Page is busy, absent or in error.
2242 * - Pager has locked out desired access.
2243 * - Fault needs to be restarted.
2244 * - Have to push page into copy object.
2245 *
2246 * The code is an infinite loop that moves one level down
2247 * the shadow chain each time. cur_object and cur_offset
2248 * refer to the current object being examined. object and offset
2249 * are the original object from the map. The loop is at the
2250 * top level if and only if object and cur_object are the same.
2251 *
2252 * Invariants: Map lock is held throughout. Lock is held on
2253 * original object and cur_object (if different) when
2254 * continuing or exiting loop.
2255 *
2256 */
2257
2258
2259 /*
2260 * If this page is to be inserted in a copy delay object
2261 * for writing, and if the object has a copy, then the
2262 * copy delay strategy is implemented in the slow fault page.
2263 */
2264 if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY ||
2265 object->copy == VM_OBJECT_NULL ||
2266 (fault_type & VM_PROT_WRITE) == 0) {
2267 cur_object = object;
2268 cur_offset = offset;
2269
2270 while (TRUE) {
2271 m = vm_page_lookup(cur_object, cur_offset);
2272 if (m != VM_PAGE_NULL) {
2273 if (m->busy) {
2274 wait_result_t result;
2275
2276 if (object != cur_object)
2277 vm_object_unlock(object);
2278
2279 vm_map_unlock_read(map);
2280 if (real_map != map)
2281 vm_map_unlock(real_map);
2282
2283 #if !VM_FAULT_STATIC_CONFIG
2284 if (!vm_fault_interruptible)
2285 interruptible = THREAD_UNINT;
2286 #endif
2287 result = PAGE_ASSERT_WAIT(m, interruptible);
2288
2289 vm_object_unlock(cur_object);
2290
2291 if (result == THREAD_WAITING) {
2292 result = thread_block(THREAD_CONTINUE_NULL);
2293
2294 counter(c_vm_fault_page_block_busy_kernel++);
2295 }
2296 if (result == THREAD_AWAKENED || result == THREAD_RESTART)
2297 goto RetryFault;
2298
2299 kr = KERN_ABORTED;
2300 goto done;
2301 }
2302 if (m->unusual && (m->error || m->restart || m->private
2303 || m->absent || (fault_type & m->page_lock))) {
2304
2305 /*
2306 * Unusual case. Give up.
2307 */
2308 break;
2309 }
2310
2311 if (m->encrypted) {
2312 /*
2313 * ENCRYPTED SWAP:
2314 * We've soft-faulted (because it's not in the page
2315 * table) on an encrypted page.
2316 * Keep the page "busy" so that noone messes with
2317 * it during the decryption.
2318 * Release the extra locks we're holding, keep only
2319 * the page's VM object lock.
2320 */
2321 m->busy = TRUE;
2322 if (object != cur_object) {
2323 vm_object_unlock(object);
2324 }
2325 vm_map_unlock_read(map);
2326 if (real_map != map)
2327 vm_map_unlock(real_map);
2328
2329 vm_page_decrypt(m, 0);
2330
2331 assert(m->busy);
2332 PAGE_WAKEUP_DONE(m);
2333 vm_object_unlock(m->object);
2334
2335 /*
2336 * Retry from the top, in case anything
2337 * changed while we were decrypting...
2338 */
2339 goto RetryFault;
2340 }
2341 ASSERT_PAGE_DECRYPTED(m);
2342
2343 /*
2344 * Two cases of map in faults:
2345 * - At top level w/o copy object.
2346 * - Read fault anywhere.
2347 * --> must disallow write.
2348 */
2349
2350 if (object == cur_object &&
2351 object->copy == VM_OBJECT_NULL)
2352 goto FastMapInFault;
2353
2354 if ((fault_type & VM_PROT_WRITE) == 0) {
2355 boolean_t sequential;
2356
2357 prot &= ~VM_PROT_WRITE;
2358
2359 /*
2360 * Set up to map the page ...
2361 * mark the page busy, drop
2362 * locks and take a paging reference
2363 * on the object with the page.
2364 */
2365
2366 if (object != cur_object) {
2367 vm_object_unlock(object);
2368 object = cur_object;
2369 }
2370 FastMapInFault:
2371 m->busy = TRUE;
2372
2373 FastPmapEnter:
2374 /*
2375 * Check a couple of global reasons to
2376 * be conservative about write access.
2377 * Then do the pmap_enter.
2378 */
2379 #if !VM_FAULT_STATIC_CONFIG
2380 if (vm_fault_dirty_handling
2381 #if MACH_KDB
2382 || db_watchpoint_list
2383 #endif
2384 && (fault_type & VM_PROT_WRITE) == 0)
2385 prot &= ~VM_PROT_WRITE;
2386 #else /* STATIC_CONFIG */
2387 #if MACH_KDB
2388 if (db_watchpoint_list
2389 && (fault_type & VM_PROT_WRITE) == 0)
2390 prot &= ~VM_PROT_WRITE;
2391 #endif /* MACH_KDB */
2392 #endif /* STATIC_CONFIG */
2393 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2394
2395 sequential = FALSE;
2396 need_activation = FALSE;
2397
2398 if (m->no_isync == TRUE) {
2399 m->no_isync = FALSE;
2400 pmap_sync_page_data_phys(m->phys_page);
2401
2402 if ((type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) {
2403 /*
2404 * found it in the cache, but this
2405 * is the first fault-in of the page (no_isync == TRUE)
2406 * so it must have come in as part of
2407 * a cluster... account 1 pagein against it
2408 */
2409 VM_STAT(pageins++);
2410 current_task()->pageins++;
2411 type_of_fault = DBG_PAGEIN_FAULT;
2412 sequential = TRUE;
2413 }
2414 if (m->clustered)
2415 need_activation = TRUE;
2416
2417 } else if (cache_attr != VM_WIMG_DEFAULT) {
2418 pmap_sync_page_attributes_phys(m->phys_page);
2419 }
2420
2421 if(caller_pmap) {
2422 PMAP_ENTER(caller_pmap,
2423 caller_pmap_addr, m,
2424 prot, cache_attr, wired);
2425 } else {
2426 PMAP_ENTER(pmap, vaddr, m,
2427 prot, cache_attr, wired);
2428 }
2429
2430 /*
2431 * Hold queues lock to manipulate
2432 * the page queues. Change wiring
2433 * case is obvious. In soft ref bits
2434 * case activate page only if it fell
2435 * off paging queues, otherwise just
2436 * activate it if it's inactive.
2437 *
2438 * NOTE: original vm_fault code will
2439 * move active page to back of active
2440 * queue. This code doesn't.
2441 */
2442 if (m->clustered) {
2443 vm_pagein_cluster_used++;
2444 m->clustered = FALSE;
2445 }
2446 if (change_wiring) {
2447 vm_page_lock_queues();
2448
2449 if (wired)
2450 vm_page_wire(m);
2451 else
2452 vm_page_unwire(m);
2453
2454 vm_page_unlock_queues();
2455 }
2456 else {
2457 if ((!m->active && !m->inactive) || ((need_activation == TRUE) && !m->active)) {
2458 vm_page_lock_queues();
2459 vm_page_activate(m);
2460 vm_page_unlock_queues();
2461 }
2462 }
2463
2464 /*
2465 * That's it, clean up and return.
2466 */
2467 PAGE_WAKEUP_DONE(m);
2468
2469 sequential = (sequential && vm_page_deactivate_behind) ?
2470 vm_fault_deactivate_behind(object, cur_offset, behavior) :
2471 FALSE;
2472
2473 /*
2474 * Add non-sequential pages to the working set.
2475 * The sequential pages will be brought in through
2476 * normal clustering behavior.
2477 */
2478 if (!sequential && !object->private) {
2479 vm_object_paging_begin(object);
2480
2481 write_startup_file =
2482 vm_fault_tws_insert(map, real_map, vaddr,
2483 object, cur_offset);
2484
2485 vm_object_paging_end(object);
2486 }
2487 vm_object_unlock(object);
2488
2489 vm_map_unlock_read(map);
2490 if(real_map != map)
2491 vm_map_unlock(real_map);
2492
2493 if(write_startup_file)
2494 tws_send_startup_info(current_task());
2495
2496 thread_interrupt_level(interruptible_state);
2497
2498
2499 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2500 vaddr,
2501 type_of_fault & 0xff,
2502 KERN_SUCCESS,
2503 type_of_fault >> 8,
2504 0);
2505
2506 return KERN_SUCCESS;
2507 }
2508
2509 /*
2510 * Copy on write fault. If objects match, then
2511 * object->copy must not be NULL (else control
2512 * would be in previous code block), and we
2513 * have a potential push into the copy object
2514 * with which we won't cope here.
2515 */
2516
2517 if (cur_object == object)
2518 break;
2519 /*
2520 * This is now a shadow based copy on write
2521 * fault -- it requires a copy up the shadow
2522 * chain.
2523 *
2524 * Allocate a page in the original top level
2525 * object. Give up if allocate fails. Also
2526 * need to remember current page, as it's the
2527 * source of the copy.
2528 */
2529 cur_m = m;
2530 m = vm_page_grab();
2531 if (m == VM_PAGE_NULL) {
2532 break;
2533 }
2534 /*
2535 * Now do the copy. Mark the source busy
2536 * and take out paging references on both
2537 * objects.
2538 *
2539 * NOTE: This code holds the map lock across
2540 * the page copy.
2541 */
2542
2543 cur_m->busy = TRUE;
2544 vm_page_copy(cur_m, m);
2545 vm_page_insert(m, object, offset);
2546
2547 vm_object_paging_begin(cur_object);
2548 vm_object_paging_begin(object);
2549
2550 type_of_fault = DBG_COW_FAULT;
2551 VM_STAT(cow_faults++);
2552 current_task()->cow_faults++;
2553
2554 /*
2555 * Now cope with the source page and object
2556 * If the top object has a ref count of 1
2557 * then no other map can access it, and hence
2558 * it's not necessary to do the pmap_disconnect.
2559 */
2560
2561 vm_page_lock_queues();
2562 vm_page_deactivate(cur_m);
2563 m->dirty = TRUE;
2564 pmap_disconnect(cur_m->phys_page);
2565 vm_page_unlock_queues();
2566
2567 PAGE_WAKEUP_DONE(cur_m);
2568 vm_object_paging_end(cur_object);
2569 vm_object_unlock(cur_object);
2570
2571 /*
2572 * Slight hack to call vm_object collapse
2573 * and then reuse common map in code.
2574 * note that the object lock was taken above.
2575 */
2576
2577 vm_object_paging_end(object);
2578 vm_object_collapse(object, offset, TRUE);
2579
2580 goto FastPmapEnter;
2581 }
2582 else {
2583
2584 /*
2585 * No page at cur_object, cur_offset
2586 */
2587
2588 if (cur_object->pager_created) {
2589
2590 /*
2591 * Have to talk to the pager. Give up.
2592 */
2593 break;
2594 }
2595
2596
2597 if (cur_object->shadow == VM_OBJECT_NULL) {
2598
2599 if (cur_object->shadow_severed) {
2600 vm_object_paging_end(object);
2601 vm_object_unlock(object);
2602 vm_map_unlock_read(map);
2603 if(real_map != map)
2604 vm_map_unlock(real_map);
2605
2606 if(write_startup_file)
2607 tws_send_startup_info(
2608 current_task());
2609
2610 thread_interrupt_level(interruptible_state);
2611
2612 return KERN_MEMORY_ERROR;
2613 }
2614
2615 /*
2616 * Zero fill fault. Page gets
2617 * filled in top object. Insert
2618 * page, then drop any lower lock.
2619 * Give up if no page.
2620 */
2621 if (VM_PAGE_THROTTLED()) {
2622 break;
2623 }
2624
2625 /*
2626 * are we protecting the system from
2627 * backing store exhaustion. If so
2628 * sleep unless we are privileged.
2629 */
2630 if(vm_backing_store_low) {
2631 if(!(current_task()->priv_flags
2632 & VM_BACKING_STORE_PRIV))
2633 break;
2634 }
2635 m = vm_page_alloc(object, offset);
2636 if (m == VM_PAGE_NULL) {
2637 break;
2638 }
2639 /*
2640 * This is a zero-fill or initial fill
2641 * page fault. As such, we consider it
2642 * undefined with respect to instruction
2643 * execution. i.e. it is the responsibility
2644 * of higher layers to call for an instruction
2645 * sync after changing the contents and before
2646 * sending a program into this area. We
2647 * choose this approach for performance
2648 */
2649
2650 m->no_isync = FALSE;
2651
2652 if (cur_object != object)
2653 vm_object_unlock(cur_object);
2654
2655 /*
2656 * Now zero fill page and map it.
2657 * the page is probably going to
2658 * be written soon, so don't bother
2659 * to clear the modified bit
2660 *
2661 * NOTE: This code holds the map
2662 * lock across the zero fill.
2663 */
2664
2665 if (!map->no_zero_fill) {
2666 vm_page_zero_fill(m);
2667 type_of_fault = DBG_ZERO_FILL_FAULT;
2668 VM_STAT(zero_fill_count++);
2669 }
2670 vm_page_lock_queues();
2671 VM_PAGE_QUEUES_REMOVE(m);
2672
2673 m->page_ticket = vm_page_ticket;
2674 assert(!m->laundry);
2675 assert(m->object != kernel_object);
2676 assert(m->pageq.next == NULL &&
2677 m->pageq.prev == NULL);
2678 if(m->object->size > 0x200000) {
2679 m->zero_fill = TRUE;
2680 /* depends on the queues lock */
2681 vm_zf_count += 1;
2682 queue_enter(&vm_page_queue_zf,
2683 m, vm_page_t, pageq);
2684 } else {
2685 queue_enter(
2686 &vm_page_queue_inactive,
2687 m, vm_page_t, pageq);
2688 }
2689 vm_page_ticket_roll++;
2690 if(vm_page_ticket_roll ==
2691 VM_PAGE_TICKETS_IN_ROLL) {
2692 vm_page_ticket_roll = 0;
2693 if(vm_page_ticket ==
2694 VM_PAGE_TICKET_ROLL_IDS)
2695 vm_page_ticket= 0;
2696 else
2697 vm_page_ticket++;
2698 }
2699
2700 m->inactive = TRUE;
2701 vm_page_inactive_count++;
2702 vm_page_unlock_queues();
2703
2704 goto FastPmapEnter;
2705 }
2706
2707 /*
2708 * On to the next level
2709 */
2710
2711 cur_offset += cur_object->shadow_offset;
2712 new_object = cur_object->shadow;
2713 vm_object_lock(new_object);
2714 if (cur_object != object)
2715 vm_object_unlock(cur_object);
2716 cur_object = new_object;
2717
2718 continue;
2719 }
2720 }
2721
2722 /*
2723 * Cleanup from fast fault failure. Drop any object
2724 * lock other than original and drop map lock.
2725 */
2726
2727 if (object != cur_object)
2728 vm_object_unlock(cur_object);
2729 }
2730 vm_map_unlock_read(map);
2731
2732 if(real_map != map)
2733 vm_map_unlock(real_map);
2734
2735 /*
2736 * Make a reference to this object to
2737 * prevent its disposal while we are messing with
2738 * it. Once we have the reference, the map is free
2739 * to be diddled. Since objects reference their
2740 * shadows (and copies), they will stay around as well.
2741 */
2742
2743 assert(object->ref_count > 0);
2744 object->ref_count++;
2745 vm_object_res_reference(object);
2746 vm_object_paging_begin(object);
2747
2748 XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2749
2750 if (!object->private) {
2751 write_startup_file =
2752 vm_fault_tws_insert(map, real_map, vaddr, object, offset);
2753 }
2754
2755 kr = vm_fault_page(object, offset, fault_type,
2756 (change_wiring && !wired),
2757 interruptible,
2758 lo_offset, hi_offset, behavior,
2759 &prot, &result_page, &top_page,
2760 &type_of_fault,
2761 &error_code, map->no_zero_fill, FALSE, map, vaddr);
2762
2763 /*
2764 * If we didn't succeed, lose the object reference immediately.
2765 */
2766
2767 if (kr != VM_FAULT_SUCCESS)
2768 vm_object_deallocate(object);
2769
2770 /*
2771 * See why we failed, and take corrective action.
2772 */
2773
2774 switch (kr) {
2775 case VM_FAULT_SUCCESS:
2776 break;
2777 case VM_FAULT_MEMORY_SHORTAGE:
2778 if (vm_page_wait((change_wiring) ?
2779 THREAD_UNINT :
2780 THREAD_ABORTSAFE))
2781 goto RetryFault;
2782 /* fall thru */
2783 case VM_FAULT_INTERRUPTED:
2784 kr = KERN_ABORTED;
2785 goto done;
2786 case VM_FAULT_RETRY:
2787 goto RetryFault;
2788 case VM_FAULT_FICTITIOUS_SHORTAGE:
2789 vm_page_more_fictitious();
2790 goto RetryFault;
2791 case VM_FAULT_MEMORY_ERROR:
2792 if (error_code)
2793 kr = error_code;
2794 else
2795 kr = KERN_MEMORY_ERROR;
2796 goto done;
2797 }
2798
2799 m = result_page;
2800
2801 if(m != VM_PAGE_NULL) {
2802 assert((change_wiring && !wired) ?
2803 (top_page == VM_PAGE_NULL) :
2804 ((top_page == VM_PAGE_NULL) == (m->object == object)));
2805 }
2806
2807 /*
2808 * How to clean up the result of vm_fault_page. This
2809 * happens whether the mapping is entered or not.
2810 */
2811
2812 #define UNLOCK_AND_DEALLOCATE \
2813 MACRO_BEGIN \
2814 vm_fault_cleanup(m->object, top_page); \
2815 vm_object_deallocate(object); \
2816 MACRO_END
2817
2818 /*
2819 * What to do with the resulting page from vm_fault_page
2820 * if it doesn't get entered into the physical map:
2821 */
2822
2823 #define RELEASE_PAGE(m) \
2824 MACRO_BEGIN \
2825 PAGE_WAKEUP_DONE(m); \
2826 vm_page_lock_queues(); \
2827 if (!m->active && !m->inactive) \
2828 vm_page_activate(m); \
2829 vm_page_unlock_queues(); \
2830 MACRO_END
2831
2832 /*
2833 * We must verify that the maps have not changed
2834 * since our last lookup.
2835 */
2836
2837 if(m != VM_PAGE_NULL) {
2838 old_copy_object = m->object->copy;
2839 vm_object_unlock(m->object);
2840 } else {
2841 old_copy_object = VM_OBJECT_NULL;
2842 }
2843 if ((map != original_map) || !vm_map_verify(map, &version)) {
2844 vm_object_t retry_object;
2845 vm_object_offset_t retry_offset;
2846 vm_prot_t retry_prot;
2847
2848 /*
2849 * To avoid trying to write_lock the map while another
2850 * thread has it read_locked (in vm_map_pageable), we
2851 * do not try for write permission. If the page is
2852 * still writable, we will get write permission. If it
2853 * is not, or has been marked needs_copy, we enter the
2854 * mapping without write permission, and will merely
2855 * take another fault.
2856 */
2857 map = original_map;
2858 vm_map_lock_read(map);
2859 kr = vm_map_lookup_locked(&map, vaddr,
2860 fault_type & ~VM_PROT_WRITE, &version,
2861 &retry_object, &retry_offset, &retry_prot,
2862 &wired, &behavior, &lo_offset, &hi_offset,
2863 &real_map);
2864 pmap = real_map->pmap;
2865
2866 if (kr != KERN_SUCCESS) {
2867 vm_map_unlock_read(map);
2868 if(m != VM_PAGE_NULL) {
2869 vm_object_lock(m->object);
2870 RELEASE_PAGE(m);
2871 UNLOCK_AND_DEALLOCATE;
2872 } else {
2873 vm_object_deallocate(object);
2874 }
2875 goto done;
2876 }
2877
2878 vm_object_unlock(retry_object);
2879 if(m != VM_PAGE_NULL) {
2880 vm_object_lock(m->object);
2881 } else {
2882 vm_object_lock(object);
2883 }
2884
2885 if ((retry_object != object) ||
2886 (retry_offset != offset)) {
2887 vm_map_unlock_read(map);
2888 if(real_map != map)
2889 vm_map_unlock(real_map);
2890 if(m != VM_PAGE_NULL) {
2891 RELEASE_PAGE(m);
2892 UNLOCK_AND_DEALLOCATE;
2893 } else {
2894 vm_object_deallocate(object);
2895 }
2896 goto RetryFault;
2897 }
2898
2899 /*
2900 * Check whether the protection has changed or the object
2901 * has been copied while we left the map unlocked.
2902 */
2903 prot &= retry_prot;
2904 if(m != VM_PAGE_NULL) {
2905 vm_object_unlock(m->object);
2906 } else {
2907 vm_object_unlock(object);
2908 }
2909 }
2910 if(m != VM_PAGE_NULL) {
2911 vm_object_lock(m->object);
2912 } else {
2913 vm_object_lock(object);
2914 }
2915
2916 /*
2917 * If the copy object changed while the top-level object
2918 * was unlocked, then we must take away write permission.
2919 */
2920
2921 if(m != VM_PAGE_NULL) {
2922 if (m->object->copy != old_copy_object)
2923 prot &= ~VM_PROT_WRITE;
2924 }
2925
2926 /*
2927 * If we want to wire down this page, but no longer have
2928 * adequate permissions, we must start all over.
2929 */
2930
2931 if (wired && (fault_type != (prot|VM_PROT_WRITE))) {
2932 vm_map_verify_done(map, &version);
2933 if(real_map != map)
2934 vm_map_unlock(real_map);
2935 if(m != VM_PAGE_NULL) {
2936 RELEASE_PAGE(m);
2937 UNLOCK_AND_DEALLOCATE;
2938 } else {
2939 vm_object_deallocate(object);
2940 }
2941 goto RetryFault;
2942 }
2943
2944 /*
2945 * Put this page into the physical map.
2946 * We had to do the unlock above because pmap_enter
2947 * may cause other faults. The page may be on
2948 * the pageout queues. If the pageout daemon comes
2949 * across the page, it will remove it from the queues.
2950 */
2951 need_activation = FALSE;
2952
2953 if (m != VM_PAGE_NULL) {
2954 if (m->no_isync == TRUE) {
2955 pmap_sync_page_data_phys(m->phys_page);
2956
2957 if ((type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) {
2958 /*
2959 * found it in the cache, but this
2960 * is the first fault-in of the page (no_isync == TRUE)
2961 * so it must have come in as part of
2962 * a cluster... account 1 pagein against it
2963 */
2964 VM_STAT(pageins++);
2965 current_task()->pageins++;
2966
2967 type_of_fault = DBG_PAGEIN_FAULT;
2968 }
2969 if (m->clustered) {
2970 need_activation = TRUE;
2971 }
2972 m->no_isync = FALSE;
2973 }
2974 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2975
2976 if(caller_pmap) {
2977 PMAP_ENTER(caller_pmap,
2978 caller_pmap_addr, m,
2979 prot, cache_attr, wired);
2980 } else {
2981 PMAP_ENTER(pmap, vaddr, m,
2982 prot, cache_attr, wired);
2983 }
2984
2985 /*
2986 * Add working set information for private objects here.
2987 */
2988 if (m->object->private) {
2989 write_startup_file =
2990 vm_fault_tws_insert(map, real_map, vaddr,
2991 m->object, m->offset);
2992 }
2993 } else {
2994
2995 vm_map_entry_t entry;
2996 vm_map_offset_t laddr;
2997 vm_map_offset_t ldelta, hdelta;
2998
2999 /*
3000 * do a pmap block mapping from the physical address
3001 * in the object
3002 */
3003
3004 #ifndef i386
3005 /* While we do not worry about execution protection in */
3006 /* general, certian pages may have instruction execution */
3007 /* disallowed. We will check here, and if not allowed */
3008 /* to execute, we return with a protection failure. */
3009
3010 if((fault_type & VM_PROT_EXECUTE) &&
3011 (!pmap_eligible_for_execute((ppnum_t)
3012 (object->shadow_offset >> 12)))) {
3013
3014 vm_map_verify_done(map, &version);
3015 if(real_map != map)
3016 vm_map_unlock(real_map);
3017 vm_fault_cleanup(object, top_page);
3018 vm_object_deallocate(object);
3019 kr = KERN_PROTECTION_FAILURE;
3020 goto done;
3021 }
3022 #endif /* !i386 */
3023
3024 if(real_map != map) {
3025 vm_map_unlock(real_map);
3026 }
3027 if (original_map != map) {
3028 vm_map_unlock_read(map);
3029 vm_map_lock_read(original_map);
3030 map = original_map;
3031 }
3032 real_map = map;
3033
3034 laddr = vaddr;
3035 hdelta = 0xFFFFF000;
3036 ldelta = 0xFFFFF000;
3037
3038
3039 while(vm_map_lookup_entry(map, laddr, &entry)) {
3040 if(ldelta > (laddr - entry->vme_start))
3041 ldelta = laddr - entry->vme_start;
3042 if(hdelta > (entry->vme_end - laddr))
3043 hdelta = entry->vme_end - laddr;
3044 if(entry->is_sub_map) {
3045
3046 laddr = (laddr - entry->vme_start)
3047 + entry->offset;
3048 vm_map_lock_read(entry->object.sub_map);
3049 if(map != real_map)
3050 vm_map_unlock_read(map);
3051 if(entry->use_pmap) {
3052 vm_map_unlock_read(real_map);
3053 real_map = entry->object.sub_map;
3054 }
3055 map = entry->object.sub_map;
3056
3057 } else {
3058 break;
3059 }
3060 }
3061
3062 if(vm_map_lookup_entry(map, laddr, &entry) &&
3063 (entry->object.vm_object != NULL) &&
3064 (entry->object.vm_object == object)) {
3065
3066 vm_map_offset_t phys_offset;
3067
3068 phys_offset = (entry->object.vm_object->shadow_offset
3069 + entry->offset
3070 + laddr
3071 - entry->vme_start);
3072 phys_offset -= ldelta;
3073 if(caller_pmap) {
3074 /* Set up a block mapped area */
3075 pmap_map_block(
3076 caller_pmap,
3077 (addr64_t)(caller_pmap_addr - ldelta),
3078 phys_offset >> 12,
3079 (ldelta + hdelta) >> 12,
3080 prot,
3081 (VM_WIMG_MASK & (int)object->wimg_bits),
3082 0);
3083 } else {
3084 /* Set up a block mapped area */
3085 pmap_map_block(
3086 real_map->pmap,
3087 (addr64_t)(vaddr - ldelta),
3088 phys_offset >> 12,
3089 (ldelta + hdelta) >> 12,
3090 prot,
3091 (VM_WIMG_MASK & (int)object->wimg_bits),
3092 0);
3093 }
3094 }
3095
3096 }
3097
3098 /*
3099 * If the page is not wired down and isn't already
3100 * on a pageout queue, then put it where the
3101 * pageout daemon can find it.
3102 */
3103 if(m != VM_PAGE_NULL) {
3104 vm_page_lock_queues();
3105
3106 if (m->clustered) {
3107 vm_pagein_cluster_used++;
3108 m->clustered = FALSE;
3109 }
3110 m->reference = TRUE;
3111
3112 if (change_wiring) {
3113 if (wired)
3114 vm_page_wire(m);
3115 else
3116 vm_page_unwire(m);
3117 }
3118 #if VM_FAULT_STATIC_CONFIG
3119 else {
3120 if ((!m->active && !m->inactive) || ((need_activation == TRUE) && !m->active))
3121 vm_page_activate(m);
3122 }
3123 #else
3124 else if (software_reference_bits) {
3125 if (!m->active && !m->inactive)
3126 vm_page_activate(m);
3127 m->reference = TRUE;
3128 } else {
3129 vm_page_activate(m);
3130 }
3131 #endif
3132 vm_page_unlock_queues();
3133 }
3134
3135 /*
3136 * Unlock everything, and return
3137 */
3138
3139 vm_map_verify_done(map, &version);
3140 if(real_map != map)
3141 vm_map_unlock(real_map);
3142 if(m != VM_PAGE_NULL) {
3143 PAGE_WAKEUP_DONE(m);
3144 UNLOCK_AND_DEALLOCATE;
3145 } else {
3146 vm_fault_cleanup(object, top_page);
3147 vm_object_deallocate(object);
3148 }
3149 kr = KERN_SUCCESS;
3150
3151 #undef UNLOCK_AND_DEALLOCATE
3152 #undef RELEASE_PAGE
3153
3154 done:
3155 if(write_startup_file)
3156 tws_send_startup_info(current_task());
3157
3158 thread_interrupt_level(interruptible_state);
3159
3160 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
3161 vaddr,
3162 type_of_fault & 0xff,
3163 kr,
3164 type_of_fault >> 8,
3165 0);
3166
3167 return(kr);
3168 }
3169
3170 /*
3171 * vm_fault_wire:
3172 *
3173 * Wire down a range of virtual addresses in a map.
3174 */
3175 kern_return_t
3176 vm_fault_wire(
3177 vm_map_t map,
3178 vm_map_entry_t entry,
3179 pmap_t pmap,
3180 vm_map_offset_t pmap_addr)
3181 {
3182
3183 register vm_map_offset_t va;
3184 register vm_map_offset_t end_addr = entry->vme_end;
3185 register kern_return_t rc;
3186
3187 assert(entry->in_transition);
3188
3189 if ((entry->object.vm_object != NULL) &&
3190 !entry->is_sub_map &&
3191 entry->object.vm_object->phys_contiguous) {
3192 return KERN_SUCCESS;
3193 }
3194
3195 /*
3196 * Inform the physical mapping system that the
3197 * range of addresses may not fault, so that
3198 * page tables and such can be locked down as well.
3199 */
3200
3201 pmap_pageable(pmap, pmap_addr,
3202 pmap_addr + (end_addr - entry->vme_start), FALSE);
3203
3204 /*
3205 * We simulate a fault to get the page and enter it
3206 * in the physical map.
3207 */
3208
3209 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3210 if ((rc = vm_fault_wire_fast(
3211 map, va, entry, pmap,
3212 pmap_addr + (va - entry->vme_start)
3213 )) != KERN_SUCCESS) {
3214 rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
3215 (pmap == kernel_pmap) ?
3216 THREAD_UNINT : THREAD_ABORTSAFE,
3217 pmap, pmap_addr + (va - entry->vme_start));
3218 }
3219
3220 if (rc != KERN_SUCCESS) {
3221 struct vm_map_entry tmp_entry = *entry;
3222
3223 /* unwire wired pages */
3224 tmp_entry.vme_end = va;
3225 vm_fault_unwire(map,
3226 &tmp_entry, FALSE, pmap, pmap_addr);
3227
3228 return rc;
3229 }
3230 }
3231 return KERN_SUCCESS;
3232 }
3233
3234 /*
3235 * vm_fault_unwire:
3236 *
3237 * Unwire a range of virtual addresses in a map.
3238 */
3239 void
3240 vm_fault_unwire(
3241 vm_map_t map,
3242 vm_map_entry_t entry,
3243 boolean_t deallocate,
3244 pmap_t pmap,
3245 vm_map_offset_t pmap_addr)
3246 {
3247 register vm_map_offset_t va;
3248 register vm_map_offset_t end_addr = entry->vme_end;
3249 vm_object_t object;
3250
3251 object = (entry->is_sub_map)
3252 ? VM_OBJECT_NULL : entry->object.vm_object;
3253
3254 /*
3255 * Since the pages are wired down, we must be able to
3256 * get their mappings from the physical map system.
3257 */
3258
3259 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3260 pmap_change_wiring(pmap,
3261 pmap_addr + (va - entry->vme_start), FALSE);
3262
3263 if (object == VM_OBJECT_NULL) {
3264 (void) vm_fault(map, va, VM_PROT_NONE,
3265 TRUE, THREAD_UNINT, pmap, pmap_addr);
3266 } else if (object->phys_contiguous) {
3267 continue;
3268 } else {
3269 vm_prot_t prot;
3270 vm_page_t result_page;
3271 vm_page_t top_page;
3272 vm_object_t result_object;
3273 vm_fault_return_t result;
3274
3275 do {
3276 prot = VM_PROT_NONE;
3277
3278 vm_object_lock(object);
3279 vm_object_paging_begin(object);
3280 XPR(XPR_VM_FAULT,
3281 "vm_fault_unwire -> vm_fault_page\n",
3282 0,0,0,0,0);
3283 result = vm_fault_page(object,
3284 entry->offset +
3285 (va - entry->vme_start),
3286 VM_PROT_NONE, TRUE,
3287 THREAD_UNINT,
3288 entry->offset,
3289 entry->offset +
3290 (entry->vme_end
3291 - entry->vme_start),
3292 entry->behavior,
3293 &prot,
3294 &result_page,
3295 &top_page,
3296 (int *)0,
3297 0, map->no_zero_fill,
3298 FALSE, NULL, 0);
3299 } while (result == VM_FAULT_RETRY);
3300
3301 if (result != VM_FAULT_SUCCESS)
3302 panic("vm_fault_unwire: failure");
3303
3304 result_object = result_page->object;
3305 if (deallocate) {
3306 assert(!result_page->fictitious);
3307 pmap_disconnect(result_page->phys_page);
3308 VM_PAGE_FREE(result_page);
3309 } else {
3310 vm_page_lock_queues();
3311 vm_page_unwire(result_page);
3312 vm_page_unlock_queues();
3313 PAGE_WAKEUP_DONE(result_page);
3314 }
3315
3316 vm_fault_cleanup(result_object, top_page);
3317 }
3318 }
3319
3320 /*
3321 * Inform the physical mapping system that the range
3322 * of addresses may fault, so that page tables and
3323 * such may be unwired themselves.
3324 */
3325
3326 pmap_pageable(pmap, pmap_addr,
3327 pmap_addr + (end_addr - entry->vme_start), TRUE);
3328
3329 }
3330
3331 /*
3332 * vm_fault_wire_fast:
3333 *
3334 * Handle common case of a wire down page fault at the given address.
3335 * If successful, the page is inserted into the associated physical map.
3336 * The map entry is passed in to avoid the overhead of a map lookup.
3337 *
3338 * NOTE: the given address should be truncated to the
3339 * proper page address.
3340 *
3341 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3342 * a standard error specifying why the fault is fatal is returned.
3343 *
3344 * The map in question must be referenced, and remains so.
3345 * Caller has a read lock on the map.
3346 *
3347 * This is a stripped version of vm_fault() for wiring pages. Anything
3348 * other than the common case will return KERN_FAILURE, and the caller
3349 * is expected to call vm_fault().
3350 */
3351 kern_return_t
3352 vm_fault_wire_fast(
3353 __unused vm_map_t map,
3354 vm_map_offset_t va,
3355 vm_map_entry_t entry,
3356 pmap_t pmap,
3357 vm_map_offset_t pmap_addr)
3358 {
3359 vm_object_t object;
3360 vm_object_offset_t offset;
3361 register vm_page_t m;
3362 vm_prot_t prot;
3363 thread_t thread = current_thread();
3364 unsigned int cache_attr;
3365
3366 VM_STAT(faults++);
3367
3368 if (thread != THREAD_NULL && thread->task != TASK_NULL)
3369 thread->task->faults++;
3370
3371 /*
3372 * Recovery actions
3373 */
3374
3375 #undef RELEASE_PAGE
3376 #define RELEASE_PAGE(m) { \
3377 PAGE_WAKEUP_DONE(m); \
3378 vm_page_lock_queues(); \
3379 vm_page_unwire(m); \
3380 vm_page_unlock_queues(); \
3381 }
3382
3383
3384 #undef UNLOCK_THINGS
3385 #define UNLOCK_THINGS { \
3386 vm_object_paging_end(object); \
3387 vm_object_unlock(object); \
3388 }
3389
3390 #undef UNLOCK_AND_DEALLOCATE
3391 #define UNLOCK_AND_DEALLOCATE { \
3392 UNLOCK_THINGS; \
3393 vm_object_deallocate(object); \
3394 }
3395 /*
3396 * Give up and have caller do things the hard way.
3397 */
3398
3399 #define GIVE_UP { \
3400 UNLOCK_AND_DEALLOCATE; \
3401 return(KERN_FAILURE); \
3402 }
3403
3404
3405 /*
3406 * If this entry is not directly to a vm_object, bail out.
3407 */
3408 if (entry->is_sub_map)
3409 return(KERN_FAILURE);
3410
3411 /*
3412 * Find the backing store object and offset into it.
3413 */
3414
3415 object = entry->object.vm_object;
3416 offset = (va - entry->vme_start) + entry->offset;
3417 prot = entry->protection;
3418
3419 /*
3420 * Make a reference to this object to prevent its
3421 * disposal while we are messing with it.
3422 */
3423
3424 vm_object_lock(object);
3425 assert(object->ref_count > 0);
3426 object->ref_count++;
3427 vm_object_res_reference(object);
3428 vm_object_paging_begin(object);
3429
3430 /*
3431 * INVARIANTS (through entire routine):
3432 *
3433 * 1) At all times, we must either have the object
3434 * lock or a busy page in some object to prevent
3435 * some other thread from trying to bring in
3436 * the same page.
3437 *
3438 * 2) Once we have a busy page, we must remove it from
3439 * the pageout queues, so that the pageout daemon
3440 * will not grab it away.
3441 *
3442 */
3443
3444 /*
3445 * Look for page in top-level object. If it's not there or
3446 * there's something going on, give up.
3447 * ENCRYPTED SWAP: use the slow fault path, since we'll need to
3448 * decrypt the page before wiring it down.
3449 */
3450 m = vm_page_lookup(object, offset);
3451 if ((m == VM_PAGE_NULL) || (m->busy) || (m->encrypted) ||
3452 (m->unusual && ( m->error || m->restart || m->absent ||
3453 prot & m->page_lock))) {
3454
3455 GIVE_UP;
3456 }
3457 ASSERT_PAGE_DECRYPTED(m);
3458
3459 /*
3460 * Wire the page down now. All bail outs beyond this
3461 * point must unwire the page.
3462 */
3463
3464 vm_page_lock_queues();
3465 vm_page_wire(m);
3466 vm_page_unlock_queues();
3467
3468 /*
3469 * Mark page busy for other threads.
3470 */
3471 assert(!m->busy);
3472 m->busy = TRUE;
3473 assert(!m->absent);
3474
3475 /*
3476 * Give up if the page is being written and there's a copy object
3477 */
3478 if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
3479 RELEASE_PAGE(m);
3480 GIVE_UP;
3481 }
3482
3483 /*
3484 * Put this page into the physical map.
3485 * We have to unlock the object because pmap_enter
3486 * may cause other faults.
3487 */
3488 if (m->no_isync == TRUE) {
3489 pmap_sync_page_data_phys(m->phys_page);
3490
3491 m->no_isync = FALSE;
3492 }
3493
3494 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3495
3496 PMAP_ENTER(pmap, pmap_addr, m, prot, cache_attr, TRUE);
3497
3498 /*
3499 * Unlock everything, and return
3500 */
3501
3502 PAGE_WAKEUP_DONE(m);
3503 UNLOCK_AND_DEALLOCATE;
3504
3505 return(KERN_SUCCESS);
3506
3507 }
3508
3509 /*
3510 * Routine: vm_fault_copy_cleanup
3511 * Purpose:
3512 * Release a page used by vm_fault_copy.
3513 */
3514
3515 void
3516 vm_fault_copy_cleanup(
3517 vm_page_t page,
3518 vm_page_t top_page)
3519 {
3520 vm_object_t object = page->object;
3521
3522 vm_object_lock(object);
3523 PAGE_WAKEUP_DONE(page);
3524 vm_page_lock_queues();
3525 if (!page->active && !page->inactive)
3526 vm_page_activate(page);
3527 vm_page_unlock_queues();
3528 vm_fault_cleanup(object, top_page);
3529 }
3530
3531 void
3532 vm_fault_copy_dst_cleanup(
3533 vm_page_t page)
3534 {
3535 vm_object_t object;
3536
3537 if (page != VM_PAGE_NULL) {
3538 object = page->object;
3539 vm_object_lock(object);
3540 vm_page_lock_queues();
3541 vm_page_unwire(page);
3542 vm_page_unlock_queues();
3543 vm_object_paging_end(object);
3544 vm_object_unlock(object);
3545 }
3546 }
3547
3548 /*
3549 * Routine: vm_fault_copy
3550 *
3551 * Purpose:
3552 * Copy pages from one virtual memory object to another --
3553 * neither the source nor destination pages need be resident.
3554 *
3555 * Before actually copying a page, the version associated with
3556 * the destination address map wil be verified.
3557 *
3558 * In/out conditions:
3559 * The caller must hold a reference, but not a lock, to
3560 * each of the source and destination objects and to the
3561 * destination map.
3562 *
3563 * Results:
3564 * Returns KERN_SUCCESS if no errors were encountered in
3565 * reading or writing the data. Returns KERN_INTERRUPTED if
3566 * the operation was interrupted (only possible if the
3567 * "interruptible" argument is asserted). Other return values
3568 * indicate a permanent error in copying the data.
3569 *
3570 * The actual amount of data copied will be returned in the
3571 * "copy_size" argument. In the event that the destination map
3572 * verification failed, this amount may be less than the amount
3573 * requested.
3574 */
3575 kern_return_t
3576 vm_fault_copy(
3577 vm_object_t src_object,
3578 vm_object_offset_t src_offset,
3579 vm_map_size_t *copy_size, /* INOUT */
3580 vm_object_t dst_object,
3581 vm_object_offset_t dst_offset,
3582 vm_map_t dst_map,
3583 vm_map_version_t *dst_version,
3584 int interruptible)
3585 {
3586 vm_page_t result_page;
3587
3588 vm_page_t src_page;
3589 vm_page_t src_top_page;
3590 vm_prot_t src_prot;
3591
3592 vm_page_t dst_page;
3593 vm_page_t dst_top_page;
3594 vm_prot_t dst_prot;
3595
3596 vm_map_size_t amount_left;
3597 vm_object_t old_copy_object;
3598 kern_return_t error = 0;
3599
3600 vm_map_size_t part_size;
3601
3602 /*
3603 * In order not to confuse the clustered pageins, align
3604 * the different offsets on a page boundary.
3605 */
3606 vm_object_offset_t src_lo_offset = vm_object_trunc_page(src_offset);
3607 vm_object_offset_t dst_lo_offset = vm_object_trunc_page(dst_offset);
3608 vm_object_offset_t src_hi_offset = vm_object_round_page(src_offset + *copy_size);
3609 vm_object_offset_t dst_hi_offset = vm_object_round_page(dst_offset + *copy_size);
3610
3611 #define RETURN(x) \
3612 MACRO_BEGIN \
3613 *copy_size -= amount_left; \
3614 MACRO_RETURN(x); \
3615 MACRO_END
3616
3617 amount_left = *copy_size;
3618 do { /* while (amount_left > 0) */
3619 /*
3620 * There may be a deadlock if both source and destination
3621 * pages are the same. To avoid this deadlock, the copy must
3622 * start by getting the destination page in order to apply
3623 * COW semantics if any.
3624 */
3625
3626 RetryDestinationFault: ;
3627
3628 dst_prot = VM_PROT_WRITE|VM_PROT_READ;
3629
3630 vm_object_lock(dst_object);
3631 vm_object_paging_begin(dst_object);
3632
3633 XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3634 switch (vm_fault_page(dst_object,
3635 vm_object_trunc_page(dst_offset),
3636 VM_PROT_WRITE|VM_PROT_READ,
3637 FALSE,
3638 interruptible,
3639 dst_lo_offset,
3640 dst_hi_offset,
3641 VM_BEHAVIOR_SEQUENTIAL,
3642 &dst_prot,
3643 &dst_page,
3644 &dst_top_page,
3645 (int *)0,
3646 &error,
3647 dst_map->no_zero_fill,
3648 FALSE, NULL, 0)) {
3649 case VM_FAULT_SUCCESS:
3650 break;
3651 case VM_FAULT_RETRY:
3652 goto RetryDestinationFault;
3653 case VM_FAULT_MEMORY_SHORTAGE:
3654 if (vm_page_wait(interruptible))
3655 goto RetryDestinationFault;
3656 /* fall thru */
3657 case VM_FAULT_INTERRUPTED:
3658 RETURN(MACH_SEND_INTERRUPTED);
3659 case VM_FAULT_FICTITIOUS_SHORTAGE:
3660 vm_page_more_fictitious();
3661 goto RetryDestinationFault;
3662 case VM_FAULT_MEMORY_ERROR:
3663 if (error)
3664 return (error);
3665 else
3666 return(KERN_MEMORY_ERROR);
3667 }
3668 assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
3669
3670 old_copy_object = dst_page->object->copy;
3671
3672 /*
3673 * There exists the possiblity that the source and
3674 * destination page are the same. But we can't
3675 * easily determine that now. If they are the
3676 * same, the call to vm_fault_page() for the
3677 * destination page will deadlock. To prevent this we
3678 * wire the page so we can drop busy without having
3679 * the page daemon steal the page. We clean up the
3680 * top page but keep the paging reference on the object
3681 * holding the dest page so it doesn't go away.
3682 */
3683
3684 vm_page_lock_queues();
3685 vm_page_wire(dst_page);
3686 vm_page_unlock_queues();
3687 PAGE_WAKEUP_DONE(dst_page);
3688 vm_object_unlock(dst_page->object);
3689
3690 if (dst_top_page != VM_PAGE_NULL) {
3691 vm_object_lock(dst_object);
3692 VM_PAGE_FREE(dst_top_page);
3693 vm_object_paging_end(dst_object);
3694 vm_object_unlock(dst_object);
3695 }
3696
3697 RetrySourceFault: ;
3698
3699 if (src_object == VM_OBJECT_NULL) {
3700 /*
3701 * No source object. We will just
3702 * zero-fill the page in dst_object.
3703 */
3704 src_page = VM_PAGE_NULL;
3705 result_page = VM_PAGE_NULL;
3706 } else {
3707 vm_object_lock(src_object);
3708 src_page = vm_page_lookup(src_object,
3709 vm_object_trunc_page(src_offset));
3710 if (src_page == dst_page) {
3711 src_prot = dst_prot;
3712 result_page = VM_PAGE_NULL;
3713 } else {
3714 src_prot = VM_PROT_READ;
3715 vm_object_paging_begin(src_object);
3716
3717 XPR(XPR_VM_FAULT,
3718 "vm_fault_copy(2) -> vm_fault_page\n",
3719 0,0,0,0,0);
3720 switch (vm_fault_page(src_object,
3721 vm_object_trunc_page(src_offset),
3722 VM_PROT_READ,
3723 FALSE,
3724 interruptible,
3725 src_lo_offset,
3726 src_hi_offset,
3727 VM_BEHAVIOR_SEQUENTIAL,
3728 &src_prot,
3729 &result_page,
3730 &src_top_page,
3731 (int *)0,
3732 &error,
3733 FALSE,
3734 FALSE, NULL, 0)) {
3735
3736 case VM_FAULT_SUCCESS:
3737 break;
3738 case VM_FAULT_RETRY:
3739 goto RetrySourceFault;
3740 case VM_FAULT_MEMORY_SHORTAGE:
3741 if (vm_page_wait(interruptible))
3742 goto RetrySourceFault;
3743 /* fall thru */
3744 case VM_FAULT_INTERRUPTED:
3745 vm_fault_copy_dst_cleanup(dst_page);
3746 RETURN(MACH_SEND_INTERRUPTED);
3747 case VM_FAULT_FICTITIOUS_SHORTAGE:
3748 vm_page_more_fictitious();
3749 goto RetrySourceFault;
3750 case VM_FAULT_MEMORY_ERROR:
3751 vm_fault_copy_dst_cleanup(dst_page);
3752 if (error)
3753 return (error);
3754 else
3755 return(KERN_MEMORY_ERROR);
3756 }
3757
3758
3759 assert((src_top_page == VM_PAGE_NULL) ==
3760 (result_page->object == src_object));
3761 }
3762 assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
3763 vm_object_unlock(result_page->object);
3764 }
3765
3766 if (!vm_map_verify(dst_map, dst_version)) {
3767 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3768 vm_fault_copy_cleanup(result_page, src_top_page);
3769 vm_fault_copy_dst_cleanup(dst_page);
3770 break;
3771 }
3772
3773 vm_object_lock(dst_page->object);
3774
3775 if (dst_page->object->copy != old_copy_object) {
3776 vm_object_unlock(dst_page->object);
3777 vm_map_verify_done(dst_map, dst_version);
3778 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3779 vm_fault_copy_cleanup(result_page, src_top_page);
3780 vm_fault_copy_dst_cleanup(dst_page);
3781 break;
3782 }
3783 vm_object_unlock(dst_page->object);
3784
3785 /*
3786 * Copy the page, and note that it is dirty
3787 * immediately.
3788 */
3789
3790 if (!page_aligned(src_offset) ||
3791 !page_aligned(dst_offset) ||
3792 !page_aligned(amount_left)) {
3793
3794 vm_object_offset_t src_po,
3795 dst_po;
3796
3797 src_po = src_offset - vm_object_trunc_page(src_offset);
3798 dst_po = dst_offset - vm_object_trunc_page(dst_offset);
3799
3800 if (dst_po > src_po) {
3801 part_size = PAGE_SIZE - dst_po;
3802 } else {
3803 part_size = PAGE_SIZE - src_po;
3804 }
3805 if (part_size > (amount_left)){
3806 part_size = amount_left;
3807 }
3808
3809 if (result_page == VM_PAGE_NULL) {
3810 vm_page_part_zero_fill(dst_page,
3811 dst_po, part_size);
3812 } else {
3813 vm_page_part_copy(result_page, src_po,
3814 dst_page, dst_po, part_size);
3815 if(!dst_page->dirty){
3816 vm_object_lock(dst_object);
3817 dst_page->dirty = TRUE;
3818 vm_object_unlock(dst_page->object);
3819 }
3820
3821 }
3822 } else {
3823 part_size = PAGE_SIZE;
3824
3825 if (result_page == VM_PAGE_NULL)
3826 vm_page_zero_fill(dst_page);
3827 else{
3828 vm_page_copy(result_page, dst_page);
3829 if(!dst_page->dirty){
3830 vm_object_lock(dst_object);
3831 dst_page->dirty = TRUE;
3832 vm_object_unlock(dst_page->object);
3833 }
3834 }
3835
3836 }
3837
3838 /*
3839 * Unlock everything, and return
3840 */
3841
3842 vm_map_verify_done(dst_map, dst_version);
3843
3844 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3845 vm_fault_copy_cleanup(result_page, src_top_page);
3846 vm_fault_copy_dst_cleanup(dst_page);
3847
3848 amount_left -= part_size;
3849 src_offset += part_size;
3850 dst_offset += part_size;
3851 } while (amount_left > 0);
3852
3853 RETURN(KERN_SUCCESS);
3854 #undef RETURN
3855
3856 /*NOTREACHED*/
3857 }
3858
3859 #ifdef notdef
3860
3861 /*
3862 * Routine: vm_fault_page_overwrite
3863 *
3864 * Description:
3865 * A form of vm_fault_page that assumes that the
3866 * resulting page will be overwritten in its entirety,
3867 * making it unnecessary to obtain the correct *contents*
3868 * of the page.
3869 *
3870 * Implementation:
3871 * XXX Untested. Also unused. Eventually, this technology
3872 * could be used in vm_fault_copy() to advantage.
3873 */
3874 vm_fault_return_t
3875 vm_fault_page_overwrite(
3876 register
3877 vm_object_t dst_object,
3878 vm_object_offset_t dst_offset,
3879 vm_page_t *result_page) /* OUT */
3880 {
3881 register
3882 vm_page_t dst_page;
3883 kern_return_t wait_result;
3884
3885 #define interruptible THREAD_UNINT /* XXX */
3886
3887 while (TRUE) {
3888 /*
3889 * Look for a page at this offset
3890 */
3891
3892 while ((dst_page = vm_page_lookup(dst_object, dst_offset))
3893 == VM_PAGE_NULL) {
3894 /*
3895 * No page, no problem... just allocate one.
3896 */
3897
3898 dst_page = vm_page_alloc(dst_object, dst_offset);
3899 if (dst_page == VM_PAGE_NULL) {
3900 vm_object_unlock(dst_object);
3901 VM_PAGE_WAIT();
3902 vm_object_lock(dst_object);
3903 continue;
3904 }
3905
3906 /*
3907 * Pretend that the memory manager
3908 * write-protected the page.
3909 *
3910 * Note that we will be asking for write
3911 * permission without asking for the data
3912 * first.
3913 */
3914
3915 dst_page->overwriting = TRUE;
3916 dst_page->page_lock = VM_PROT_WRITE;
3917 dst_page->absent = TRUE;
3918 dst_page->unusual = TRUE;
3919 dst_object->absent_count++;
3920
3921 break;
3922
3923 /*
3924 * When we bail out, we might have to throw
3925 * away the page created here.
3926 */
3927
3928 #define DISCARD_PAGE \
3929 MACRO_BEGIN \
3930 vm_object_lock(dst_object); \
3931 dst_page = vm_page_lookup(dst_object, dst_offset); \
3932 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3933 VM_PAGE_FREE(dst_page); \
3934 vm_object_unlock(dst_object); \
3935 MACRO_END
3936 }
3937
3938 /*
3939 * If the page is write-protected...
3940 */
3941
3942 if (dst_page->page_lock & VM_PROT_WRITE) {
3943 /*
3944 * ... and an unlock request hasn't been sent
3945 */
3946
3947 if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
3948 vm_prot_t u;
3949 kern_return_t rc;
3950
3951 /*
3952 * ... then send one now.
3953 */
3954
3955 if (!dst_object->pager_ready) {
3956 wait_result = vm_object_assert_wait(dst_object,
3957 VM_OBJECT_EVENT_PAGER_READY,
3958 interruptible);
3959 vm_object_unlock(dst_object);
3960 if (wait_result == THREAD_WAITING)
3961 wait_result = thread_block(THREAD_CONTINUE_NULL);
3962 if (wait_result != THREAD_AWAKENED) {
3963 DISCARD_PAGE;
3964 return(VM_FAULT_INTERRUPTED);
3965 }
3966 continue;
3967 }
3968
3969 u = dst_page->unlock_request |= VM_PROT_WRITE;
3970 vm_object_unlock(dst_object);
3971
3972 if ((rc = memory_object_data_unlock(
3973 dst_object->pager,
3974 dst_offset + dst_object->paging_offset,
3975 PAGE_SIZE,
3976 u)) != KERN_SUCCESS) {
3977 if (vm_fault_debug)
3978 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3979 DISCARD_PAGE;
3980 return((rc == MACH_SEND_INTERRUPTED) ?
3981 VM_FAULT_INTERRUPTED :
3982 VM_FAULT_MEMORY_ERROR);
3983 }
3984 vm_object_lock(dst_object);
3985 continue;
3986 }
3987
3988 /* ... fall through to wait below */
3989 } else {
3990 /*
3991 * If the page isn't being used for other
3992 * purposes, then we're done.
3993 */
3994 if ( ! (dst_page->busy || dst_page->absent ||
3995 dst_page->error || dst_page->restart) )
3996 break;
3997 }
3998
3999 wait_result = PAGE_ASSERT_WAIT(dst_page, interruptible);
4000 vm_object_unlock(dst_object);
4001 if (wait_result == THREAD_WAITING)
4002 wait_result = thread_block(THREAD_CONTINUE_NULL);
4003 if (wait_result != THREAD_AWAKENED) {
4004 DISCARD_PAGE;
4005 return(VM_FAULT_INTERRUPTED);
4006 }
4007 }
4008
4009 *result_page = dst_page;
4010 return(VM_FAULT_SUCCESS);
4011
4012 #undef interruptible
4013 #undef DISCARD_PAGE
4014 }
4015
4016 #endif /* notdef */
4017
4018 #if VM_FAULT_CLASSIFY
4019 /*
4020 * Temporary statistics gathering support.
4021 */
4022
4023 /*
4024 * Statistics arrays:
4025 */
4026 #define VM_FAULT_TYPES_MAX 5
4027 #define VM_FAULT_LEVEL_MAX 8
4028
4029 int vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
4030
4031 #define VM_FAULT_TYPE_ZERO_FILL 0
4032 #define VM_FAULT_TYPE_MAP_IN 1
4033 #define VM_FAULT_TYPE_PAGER 2
4034 #define VM_FAULT_TYPE_COPY 3
4035 #define VM_FAULT_TYPE_OTHER 4
4036
4037
4038 void
4039 vm_fault_classify(vm_object_t object,
4040 vm_object_offset_t offset,
4041 vm_prot_t fault_type)
4042 {
4043 int type, level = 0;
4044 vm_page_t m;
4045
4046 while (TRUE) {
4047 m = vm_page_lookup(object, offset);
4048 if (m != VM_PAGE_NULL) {
4049 if (m->busy || m->error || m->restart || m->absent ||
4050 fault_type & m->page_lock) {
4051 type = VM_FAULT_TYPE_OTHER;
4052 break;
4053 }
4054 if (((fault_type & VM_PROT_WRITE) == 0) ||
4055 ((level == 0) && object->copy == VM_OBJECT_NULL)) {
4056 type = VM_FAULT_TYPE_MAP_IN;
4057 break;
4058 }
4059 type = VM_FAULT_TYPE_COPY;
4060 break;
4061 }
4062 else {
4063 if (object->pager_created) {
4064 type = VM_FAULT_TYPE_PAGER;
4065 break;
4066 }
4067 if (object->shadow == VM_OBJECT_NULL) {
4068 type = VM_FAULT_TYPE_ZERO_FILL;
4069 break;
4070 }
4071
4072 offset += object->shadow_offset;
4073 object = object->shadow;
4074 level++;
4075 continue;
4076 }
4077 }
4078
4079 if (level > VM_FAULT_LEVEL_MAX)
4080 level = VM_FAULT_LEVEL_MAX;
4081
4082 vm_fault_stats[type][level] += 1;
4083
4084 return;
4085 }
4086
4087 /* cleanup routine to call from debugger */
4088
4089 void
4090 vm_fault_classify_init(void)
4091 {
4092 int type, level;
4093
4094 for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
4095 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
4096 vm_fault_stats[type][level] = 0;
4097 }
4098 }
4099
4100 return;
4101 }
4102 #endif /* VM_FAULT_CLASSIFY */