]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_fault.c
xnu-517.11.1.tar.gz
[apple/xnu.git] / osfmk / vm / vm_fault.c
CommitLineData
1c79356b 1/*
55e303ae 2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
e5568f75
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
e5568f75
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
e5568f75
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * @OSF_COPYRIGHT@
24 */
25/*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50/*
51 */
52/*
53 * File: vm_fault.c
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
55 *
56 * Page fault handling module.
57 */
58#ifdef MACH_BSD
59/* remove after component interface available */
60extern int vnode_pager_workaround;
0b4e3aa0 61extern int device_pager_workaround;
1c79356b
A
62#endif
63
64#include <mach_cluster_stats.h>
65#include <mach_pagemap.h>
66#include <mach_kdb.h>
67
68#include <vm/vm_fault.h>
69#include <mach/kern_return.h>
70#include <mach/message.h> /* for error codes */
71#include <kern/host_statistics.h>
72#include <kern/counters.h>
73#include <kern/task.h>
74#include <kern/thread.h>
75#include <kern/sched_prim.h>
76#include <kern/host.h>
77#include <kern/xpr.h>
0b4e3aa0 78#include <ppc/proc_reg.h>
0b4e3aa0 79#include <vm/task_working_set.h>
1c79356b
A
80#include <vm/vm_map.h>
81#include <vm/vm_object.h>
82#include <vm/vm_page.h>
55e303ae 83#include <vm/vm_kern.h>
1c79356b
A
84#include <vm/pmap.h>
85#include <vm/vm_pageout.h>
86#include <mach/vm_param.h>
87#include <mach/vm_behavior.h>
88#include <mach/memory_object.h>
89 /* For memory_object_data_{request,unlock} */
90#include <kern/mach_param.h>
91#include <kern/macro_help.h>
92#include <kern/zalloc.h>
93#include <kern/misc_protos.h>
94
95#include <sys/kdebug.h>
96
97#define VM_FAULT_CLASSIFY 0
98#define VM_FAULT_STATIC_CONFIG 1
99
100#define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
101
102int vm_object_absent_max = 50;
103
104int vm_fault_debug = 0;
1c79356b
A
105
106#if !VM_FAULT_STATIC_CONFIG
107boolean_t vm_fault_dirty_handling = FALSE;
108boolean_t vm_fault_interruptible = FALSE;
109boolean_t software_reference_bits = TRUE;
110#endif
111
112#if MACH_KDB
113extern struct db_watchpoint *db_watchpoint_list;
114#endif /* MACH_KDB */
115
116/* Forward declarations of internal routines. */
117extern kern_return_t vm_fault_wire_fast(
118 vm_map_t map,
119 vm_offset_t va,
120 vm_map_entry_t entry,
9bccf70c
A
121 pmap_t pmap,
122 vm_offset_t pmap_addr);
1c79356b
A
123
124extern void vm_fault_continue(void);
125
126extern void vm_fault_copy_cleanup(
127 vm_page_t page,
128 vm_page_t top_page);
129
130extern void vm_fault_copy_dst_cleanup(
131 vm_page_t page);
132
133#if VM_FAULT_CLASSIFY
134extern void vm_fault_classify(vm_object_t object,
135 vm_object_offset_t offset,
136 vm_prot_t fault_type);
137
138extern void vm_fault_classify_init(void);
139#endif
140
141/*
142 * Routine: vm_fault_init
143 * Purpose:
144 * Initialize our private data structures.
145 */
146void
147vm_fault_init(void)
148{
149}
150
151/*
152 * Routine: vm_fault_cleanup
153 * Purpose:
154 * Clean up the result of vm_fault_page.
155 * Results:
156 * The paging reference for "object" is released.
157 * "object" is unlocked.
158 * If "top_page" is not null, "top_page" is
159 * freed and the paging reference for the object
160 * containing it is released.
161 *
162 * In/out conditions:
163 * "object" must be locked.
164 */
165void
166vm_fault_cleanup(
167 register vm_object_t object,
168 register vm_page_t top_page)
169{
170 vm_object_paging_end(object);
171 vm_object_unlock(object);
172
173 if (top_page != VM_PAGE_NULL) {
174 object = top_page->object;
175 vm_object_lock(object);
176 VM_PAGE_FREE(top_page);
177 vm_object_paging_end(object);
178 vm_object_unlock(object);
179 }
180}
181
182#if MACH_CLUSTER_STATS
183#define MAXCLUSTERPAGES 16
184struct {
185 unsigned long pages_in_cluster;
186 unsigned long pages_at_higher_offsets;
187 unsigned long pages_at_lower_offsets;
188} cluster_stats_in[MAXCLUSTERPAGES];
189#define CLUSTER_STAT(clause) clause
190#define CLUSTER_STAT_HIGHER(x) \
191 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
192#define CLUSTER_STAT_LOWER(x) \
193 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
194#define CLUSTER_STAT_CLUSTER(x) \
195 ((cluster_stats_in[(x)].pages_in_cluster)++)
196#else /* MACH_CLUSTER_STATS */
197#define CLUSTER_STAT(clause)
198#endif /* MACH_CLUSTER_STATS */
199
200/* XXX - temporary */
201boolean_t vm_allow_clustered_pagein = FALSE;
202int vm_pagein_cluster_used = 0;
203
55e303ae
A
204#define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
205
206
207boolean_t vm_page_deactivate_behind = TRUE;
1c79356b
A
208/*
209 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
210 */
55e303ae
A
211int vm_default_ahead = 0;
212int vm_default_behind = MAX_UPL_TRANSFER;
213
214/*
215 * vm_page_deactivate_behind
216 *
217 * Determine if sequential access is in progress
218 * in accordance with the behavior specified. If
219 * so, compute a potential page to deactive and
220 * deactivate it.
221 *
222 * The object must be locked.
223 */
224static
225boolean_t
226vm_fault_deactivate_behind(
227 vm_object_t object,
228 vm_offset_t offset,
229 vm_behavior_t behavior)
230{
231 vm_page_t m;
232
233#if TRACEFAULTPAGE
234 dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind); /* (TEST/DEBUG) */
235#endif
236
237 switch (behavior) {
238 case VM_BEHAVIOR_RANDOM:
239 object->sequential = PAGE_SIZE_64;
240 m = VM_PAGE_NULL;
241 break;
242 case VM_BEHAVIOR_SEQUENTIAL:
243 if (offset &&
244 object->last_alloc == offset - PAGE_SIZE_64) {
245 object->sequential += PAGE_SIZE_64;
246 m = vm_page_lookup(object, offset - PAGE_SIZE_64);
247 } else {
248 object->sequential = PAGE_SIZE_64; /* reset */
249 m = VM_PAGE_NULL;
250 }
251 break;
252 case VM_BEHAVIOR_RSEQNTL:
253 if (object->last_alloc &&
254 object->last_alloc == offset + PAGE_SIZE_64) {
255 object->sequential += PAGE_SIZE_64;
256 m = vm_page_lookup(object, offset + PAGE_SIZE_64);
257 } else {
258 object->sequential = PAGE_SIZE_64; /* reset */
259 m = VM_PAGE_NULL;
260 }
261 break;
262 case VM_BEHAVIOR_DEFAULT:
263 default:
264 if (offset &&
265 object->last_alloc == offset - PAGE_SIZE_64) {
266 vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
267
268 object->sequential += PAGE_SIZE_64;
269 m = (offset >= behind &&
270 object->sequential >= behind) ?
271 vm_page_lookup(object, offset - behind) :
272 VM_PAGE_NULL;
273 } else if (object->last_alloc &&
274 object->last_alloc == offset + PAGE_SIZE_64) {
275 vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
276
277 object->sequential += PAGE_SIZE_64;
278 m = (offset < -behind &&
279 object->sequential >= behind) ?
280 vm_page_lookup(object, offset + behind) :
281 VM_PAGE_NULL;
282 } else {
283 object->sequential = PAGE_SIZE_64;
284 m = VM_PAGE_NULL;
285 }
286 break;
287 }
288
289 object->last_alloc = offset;
290
291 if (m) {
292 if (!m->busy) {
293 vm_page_lock_queues();
294 vm_page_deactivate(m);
295 vm_page_unlock_queues();
296#if TRACEFAULTPAGE
297 dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
298#endif
299 }
300 return TRUE;
301 }
302 return FALSE;
303}
1c79356b 304
1c79356b
A
305
306/*
307 * Routine: vm_fault_page
308 * Purpose:
309 * Find the resident page for the virtual memory
310 * specified by the given virtual memory object
311 * and offset.
312 * Additional arguments:
313 * The required permissions for the page is given
314 * in "fault_type". Desired permissions are included
315 * in "protection". The minimum and maximum valid offsets
316 * within the object for the relevant map entry are
317 * passed in "lo_offset" and "hi_offset" respectively and
318 * the expected page reference pattern is passed in "behavior".
319 * These three parameters are used to determine pagein cluster
320 * limits.
321 *
322 * If the desired page is known to be resident (for
323 * example, because it was previously wired down), asserting
324 * the "unwiring" parameter will speed the search.
325 *
326 * If the operation can be interrupted (by thread_abort
327 * or thread_terminate), then the "interruptible"
328 * parameter should be asserted.
329 *
330 * Results:
331 * The page containing the proper data is returned
332 * in "result_page".
333 *
334 * In/out conditions:
335 * The source object must be locked and referenced,
336 * and must donate one paging reference. The reference
337 * is not affected. The paging reference and lock are
338 * consumed.
339 *
340 * If the call succeeds, the object in which "result_page"
341 * resides is left locked and holding a paging reference.
342 * If this is not the original object, a busy page in the
343 * original object is returned in "top_page", to prevent other
344 * callers from pursuing this same data, along with a paging
345 * reference for the original object. The "top_page" should
346 * be destroyed when this guarantee is no longer required.
347 * The "result_page" is also left busy. It is not removed
348 * from the pageout queues.
349 */
350
351vm_fault_return_t
352vm_fault_page(
353 /* Arguments: */
354 vm_object_t first_object, /* Object to begin search */
355 vm_object_offset_t first_offset, /* Offset into object */
356 vm_prot_t fault_type, /* What access is requested */
357 boolean_t must_be_resident,/* Must page be resident? */
358 int interruptible, /* how may fault be interrupted? */
359 vm_object_offset_t lo_offset, /* Map entry start */
360 vm_object_offset_t hi_offset, /* Map entry end */
361 vm_behavior_t behavior, /* Page reference behavior */
362 /* Modifies in place: */
363 vm_prot_t *protection, /* Protection for mapping */
364 /* Returns: */
365 vm_page_t *result_page, /* Page found, if successful */
366 vm_page_t *top_page, /* Page in top object, if
367 * not result_page. */
368 int *type_of_fault, /* if non-null, fill in with type of fault
369 * COW, zero-fill, etc... returned in trace point */
370 /* More arguments: */
371 kern_return_t *error_code, /* code if page is in error */
372 boolean_t no_zero_fill, /* don't zero fill absent pages */
0b4e3aa0 373 boolean_t data_supply, /* treat as data_supply if
1c79356b
A
374 * it is a write fault and a full
375 * page is provided */
0b4e3aa0
A
376 vm_map_t map,
377 vm_offset_t vaddr)
1c79356b
A
378{
379 register
380 vm_page_t m;
381 register
382 vm_object_t object;
383 register
384 vm_object_offset_t offset;
385 vm_page_t first_m;
386 vm_object_t next_object;
387 vm_object_t copy_object;
388 boolean_t look_for_page;
389 vm_prot_t access_required = fault_type;
390 vm_prot_t wants_copy_flag;
391 vm_size_t cluster_size, length;
392 vm_object_offset_t cluster_offset;
393 vm_object_offset_t cluster_start, cluster_end, paging_offset;
394 vm_object_offset_t align_offset;
395 CLUSTER_STAT(int pages_at_higher_offsets;)
396 CLUSTER_STAT(int pages_at_lower_offsets;)
397 kern_return_t wait_result;
1c79356b 398 boolean_t interruptible_state;
0b4e3aa0 399 boolean_t bumped_pagein = FALSE;
1c79356b 400
1c79356b
A
401
402#if MACH_PAGEMAP
403/*
404 * MACH page map - an optional optimization where a bit map is maintained
405 * by the VM subsystem for internal objects to indicate which pages of
406 * the object currently reside on backing store. This existence map
407 * duplicates information maintained by the vnode pager. It is
408 * created at the time of the first pageout against the object, i.e.
409 * at the same time pager for the object is created. The optimization
410 * is designed to eliminate pager interaction overhead, if it is
411 * 'known' that the page does not exist on backing store.
412 *
413 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
414 * either marked as paged out in the existence map for the object or no
415 * existence map exists for the object. LOOK_FOR() is one of the
416 * criteria in the decision to invoke the pager. It is also used as one
417 * of the criteria to terminate the scan for adjacent pages in a clustered
418 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
419 * permanent objects. Note also that if the pager for an internal object
420 * has not been created, the pager is not invoked regardless of the value
421 * of LOOK_FOR() and that clustered pagein scans are only done on an object
422 * for which a pager has been created.
423 *
424 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
425 * is marked as paged out in the existence map for the object. PAGED_OUT()
426 * PAGED_OUT() is used to determine if a page has already been pushed
427 * into a copy object in order to avoid a redundant page out operation.
428 */
429#define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
430 != VM_EXTERNAL_STATE_ABSENT)
431#define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
432 == VM_EXTERNAL_STATE_EXISTS)
433#else /* MACH_PAGEMAP */
434/*
435 * If the MACH page map optimization is not enabled,
436 * LOOK_FOR() always evaluates to TRUE. The pager will always be
437 * invoked to resolve missing pages in an object, assuming the pager
438 * has been created for the object. In a clustered page operation, the
439 * absence of a page on backing backing store cannot be used to terminate
440 * a scan for adjacent pages since that information is available only in
441 * the pager. Hence pages that may not be paged out are potentially
442 * included in a clustered request. The vnode pager is coded to deal
443 * with any combination of absent/present pages in a clustered
444 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
445 * will always be invoked to push a dirty page into a copy object assuming
446 * a pager has been created. If the page has already been pushed, the
447 * pager will ingore the new request.
448 */
449#define LOOK_FOR(o, f) TRUE
450#define PAGED_OUT(o, f) FALSE
451#endif /* MACH_PAGEMAP */
452
453/*
454 * Recovery actions
455 */
456#define PREPARE_RELEASE_PAGE(m) \
457 MACRO_BEGIN \
458 vm_page_lock_queues(); \
459 MACRO_END
460
461#define DO_RELEASE_PAGE(m) \
462 MACRO_BEGIN \
463 PAGE_WAKEUP_DONE(m); \
464 if (!m->active && !m->inactive) \
465 vm_page_activate(m); \
466 vm_page_unlock_queues(); \
467 MACRO_END
468
469#define RELEASE_PAGE(m) \
470 MACRO_BEGIN \
471 PREPARE_RELEASE_PAGE(m); \
472 DO_RELEASE_PAGE(m); \
473 MACRO_END
474
475#if TRACEFAULTPAGE
476 dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
477#endif
478
479
480
481#if !VM_FAULT_STATIC_CONFIG
482 if (vm_fault_dirty_handling
483#if MACH_KDB
484 /*
485 * If there are watchpoints set, then
486 * we don't want to give away write permission
487 * on a read fault. Make the task write fault,
488 * so that the watchpoint code notices the access.
489 */
490 || db_watchpoint_list
491#endif /* MACH_KDB */
492 ) {
493 /*
494 * If we aren't asking for write permission,
495 * then don't give it away. We're using write
496 * faults to set the dirty bit.
497 */
498 if (!(fault_type & VM_PROT_WRITE))
499 *protection &= ~VM_PROT_WRITE;
500 }
501
502 if (!vm_fault_interruptible)
503 interruptible = THREAD_UNINT;
504#else /* STATIC_CONFIG */
505#if MACH_KDB
506 /*
507 * If there are watchpoints set, then
508 * we don't want to give away write permission
509 * on a read fault. Make the task write fault,
510 * so that the watchpoint code notices the access.
511 */
512 if (db_watchpoint_list) {
513 /*
514 * If we aren't asking for write permission,
515 * then don't give it away. We're using write
516 * faults to set the dirty bit.
517 */
518 if (!(fault_type & VM_PROT_WRITE))
519 *protection &= ~VM_PROT_WRITE;
520 }
521
522#endif /* MACH_KDB */
523#endif /* STATIC_CONFIG */
524
9bccf70c 525 interruptible_state = thread_interrupt_level(interruptible);
1c79356b
A
526
527 /*
528 * INVARIANTS (through entire routine):
529 *
530 * 1) At all times, we must either have the object
531 * lock or a busy page in some object to prevent
532 * some other thread from trying to bring in
533 * the same page.
534 *
535 * Note that we cannot hold any locks during the
536 * pager access or when waiting for memory, so
537 * we use a busy page then.
538 *
539 * Note also that we aren't as concerned about more than
540 * one thread attempting to memory_object_data_unlock
541 * the same page at once, so we don't hold the page
542 * as busy then, but do record the highest unlock
543 * value so far. [Unlock requests may also be delivered
544 * out of order.]
545 *
546 * 2) To prevent another thread from racing us down the
547 * shadow chain and entering a new page in the top
548 * object before we do, we must keep a busy page in
549 * the top object while following the shadow chain.
550 *
551 * 3) We must increment paging_in_progress on any object
552 * for which we have a busy page
553 *
554 * 4) We leave busy pages on the pageout queues.
555 * If the pageout daemon comes across a busy page,
556 * it will remove the page from the pageout queues.
557 */
558
559 /*
560 * Search for the page at object/offset.
561 */
562
563 object = first_object;
564 offset = first_offset;
565 first_m = VM_PAGE_NULL;
566 access_required = fault_type;
567
568 XPR(XPR_VM_FAULT,
569 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
570 (integer_t)object, offset, fault_type, *protection, 0);
571
572 /*
573 * See whether this page is resident
574 */
575
576 while (TRUE) {
577#if TRACEFAULTPAGE
578 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
579#endif
580 if (!object->alive) {
581 vm_fault_cleanup(object, first_m);
9bccf70c 582 thread_interrupt_level(interruptible_state);
1c79356b
A
583 return(VM_FAULT_MEMORY_ERROR);
584 }
585 m = vm_page_lookup(object, offset);
586#if TRACEFAULTPAGE
587 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
588#endif
589 if (m != VM_PAGE_NULL) {
590 /*
591 * If the page was pre-paged as part of a
592 * cluster, record the fact.
593 */
594 if (m->clustered) {
595 vm_pagein_cluster_used++;
596 m->clustered = FALSE;
597 }
598
599 /*
600 * If the page is being brought in,
601 * wait for it and then retry.
602 *
603 * A possible optimization: if the page
604 * is known to be resident, we can ignore
605 * pages that are absent (regardless of
606 * whether they're busy).
607 */
608
609 if (m->busy) {
610#if TRACEFAULTPAGE
611 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
612#endif
9bccf70c 613 wait_result = PAGE_SLEEP(object, m, interruptible);
1c79356b
A
614 XPR(XPR_VM_FAULT,
615 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
616 (integer_t)object, offset,
617 (integer_t)m, 0, 0);
618 counter(c_vm_fault_page_block_busy_kernel++);
1c79356b 619
1c79356b
A
620 if (wait_result != THREAD_AWAKENED) {
621 vm_fault_cleanup(object, first_m);
9bccf70c 622 thread_interrupt_level(interruptible_state);
1c79356b
A
623 if (wait_result == THREAD_RESTART)
624 {
625 return(VM_FAULT_RETRY);
626 }
627 else
628 {
629 return(VM_FAULT_INTERRUPTED);
630 }
631 }
632 continue;
633 }
634
635 /*
636 * If the page is in error, give up now.
637 */
638
639 if (m->error) {
640#if TRACEFAULTPAGE
641 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code); /* (TEST/DEBUG) */
642#endif
643 if (error_code)
644 *error_code = m->page_error;
645 VM_PAGE_FREE(m);
646 vm_fault_cleanup(object, first_m);
9bccf70c 647 thread_interrupt_level(interruptible_state);
1c79356b
A
648 return(VM_FAULT_MEMORY_ERROR);
649 }
650
651 /*
652 * If the pager wants us to restart
653 * at the top of the chain,
654 * typically because it has moved the
655 * page to another pager, then do so.
656 */
657
658 if (m->restart) {
659#if TRACEFAULTPAGE
660 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
661#endif
662 VM_PAGE_FREE(m);
663 vm_fault_cleanup(object, first_m);
9bccf70c 664 thread_interrupt_level(interruptible_state);
1c79356b
A
665 return(VM_FAULT_RETRY);
666 }
667
668 /*
669 * If the page isn't busy, but is absent,
670 * then it was deemed "unavailable".
671 */
672
673 if (m->absent) {
674 /*
675 * Remove the non-existent page (unless it's
676 * in the top object) and move on down to the
677 * next object (if there is one).
678 */
679#if TRACEFAULTPAGE
680 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow); /* (TEST/DEBUG) */
681#endif
682
683 next_object = object->shadow;
684 if (next_object == VM_OBJECT_NULL) {
685 vm_page_t real_m;
686
687 assert(!must_be_resident);
688
689 if (object->shadow_severed) {
690 vm_fault_cleanup(
691 object, first_m);
9bccf70c 692 thread_interrupt_level(interruptible_state);
1c79356b
A
693 return VM_FAULT_MEMORY_ERROR;
694 }
695
696 /*
697 * Absent page at bottom of shadow
698 * chain; zero fill the page we left
699 * busy in the first object, and flush
700 * the absent page. But first we
701 * need to allocate a real page.
702 */
703 if (VM_PAGE_THROTTLED() ||
55e303ae
A
704 (real_m = vm_page_grab())
705 == VM_PAGE_NULL) {
706 vm_fault_cleanup(
707 object, first_m);
708 thread_interrupt_level(
709 interruptible_state);
710 return(
711 VM_FAULT_MEMORY_SHORTAGE);
712 }
713
714 /*
715 * are we protecting the system from
716 * backing store exhaustion. If so
717 * sleep unless we are privileged.
718 */
719
720 if(vm_backing_store_low) {
721 if(!(current_task()->priv_flags
722 & VM_BACKING_STORE_PRIV)) {
723 assert_wait((event_t)
724 &vm_backing_store_low,
725 THREAD_UNINT);
726 vm_fault_cleanup(object,
727 first_m);
728 thread_block((void(*)(void)) 0);
729 thread_interrupt_level(
730 interruptible_state);
731 return(VM_FAULT_RETRY);
732 }
1c79356b
A
733 }
734
55e303ae 735
1c79356b
A
736 XPR(XPR_VM_FAULT,
737 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
738 (integer_t)object, offset,
739 (integer_t)m,
740 (integer_t)first_object, 0);
741 if (object != first_object) {
742 VM_PAGE_FREE(m);
743 vm_object_paging_end(object);
744 vm_object_unlock(object);
745 object = first_object;
746 offset = first_offset;
747 m = first_m;
748 first_m = VM_PAGE_NULL;
749 vm_object_lock(object);
750 }
751
752 VM_PAGE_FREE(m);
753 assert(real_m->busy);
754 vm_page_insert(real_m, object, offset);
755 m = real_m;
756
757 /*
758 * Drop the lock while zero filling
759 * page. Then break because this
760 * is the page we wanted. Checking
761 * the page lock is a waste of time;
762 * this page was either absent or
763 * newly allocated -- in both cases
764 * it can't be page locked by a pager.
765 */
0b4e3aa0
A
766 m->no_isync = FALSE;
767
1c79356b
A
768 if (!no_zero_fill) {
769 vm_object_unlock(object);
770 vm_page_zero_fill(m);
1c79356b
A
771 vm_object_lock(object);
772 }
55e303ae
A
773 if (type_of_fault)
774 *type_of_fault = DBG_ZERO_FILL_FAULT;
775 VM_STAT(zero_fill_count++);
776
777 if (bumped_pagein == TRUE) {
778 VM_STAT(pageins--);
779 current_task()->pageins--;
780 }
781#if 0
782 pmap_clear_modify(m->phys_page);
783#endif
1c79356b
A
784 vm_page_lock_queues();
785 VM_PAGE_QUEUES_REMOVE(m);
0b4e3aa0 786 m->page_ticket = vm_page_ticket;
9bccf70c
A
787 if(m->object->size > 0x80000) {
788 m->zero_fill = TRUE;
789 /* depends on the queues lock */
790 vm_zf_count += 1;
791 queue_enter(&vm_page_queue_zf,
792 m, vm_page_t, pageq);
793 } else {
794 queue_enter(
795 &vm_page_queue_inactive,
796 m, vm_page_t, pageq);
797 }
0b4e3aa0
A
798 vm_page_ticket_roll++;
799 if(vm_page_ticket_roll ==
800 VM_PAGE_TICKETS_IN_ROLL) {
801 vm_page_ticket_roll = 0;
802 if(vm_page_ticket ==
803 VM_PAGE_TICKET_ROLL_IDS)
804 vm_page_ticket= 0;
805 else
806 vm_page_ticket++;
807 }
1c79356b
A
808 m->inactive = TRUE;
809 vm_page_inactive_count++;
810 vm_page_unlock_queues();
811 break;
812 } else {
813 if (must_be_resident) {
814 vm_object_paging_end(object);
815 } else if (object != first_object) {
816 vm_object_paging_end(object);
817 VM_PAGE_FREE(m);
818 } else {
819 first_m = m;
820 m->absent = FALSE;
821 m->unusual = FALSE;
822 vm_object_absent_release(object);
823 m->busy = TRUE;
824
825 vm_page_lock_queues();
826 VM_PAGE_QUEUES_REMOVE(m);
827 vm_page_unlock_queues();
828 }
829 XPR(XPR_VM_FAULT,
830 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
831 (integer_t)object, offset,
832 (integer_t)next_object,
833 offset+object->shadow_offset,0);
834 offset += object->shadow_offset;
835 hi_offset += object->shadow_offset;
836 lo_offset += object->shadow_offset;
837 access_required = VM_PROT_READ;
838 vm_object_lock(next_object);
839 vm_object_unlock(object);
840 object = next_object;
841 vm_object_paging_begin(object);
842 continue;
843 }
844 }
845
846 if ((m->cleaning)
847 && ((object != first_object) ||
848 (object->copy != VM_OBJECT_NULL))
849 && (fault_type & VM_PROT_WRITE)) {
850 /*
851 * This is a copy-on-write fault that will
852 * cause us to revoke access to this page, but
853 * this page is in the process of being cleaned
854 * in a clustered pageout. We must wait until
855 * the cleaning operation completes before
856 * revoking access to the original page,
857 * otherwise we might attempt to remove a
858 * wired mapping.
859 */
860#if TRACEFAULTPAGE
861 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset); /* (TEST/DEBUG) */
862#endif
863 XPR(XPR_VM_FAULT,
864 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
865 (integer_t)object, offset,
866 (integer_t)m, 0, 0);
867 /* take an extra ref so that object won't die */
868 assert(object->ref_count > 0);
869 object->ref_count++;
870 vm_object_res_reference(object);
871 vm_fault_cleanup(object, first_m);
872 counter(c_vm_fault_page_block_backoff_kernel++);
873 vm_object_lock(object);
874 assert(object->ref_count > 0);
875 m = vm_page_lookup(object, offset);
876 if (m != VM_PAGE_NULL && m->cleaning) {
877 PAGE_ASSERT_WAIT(m, interruptible);
878 vm_object_unlock(object);
9bccf70c 879 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
880 vm_object_deallocate(object);
881 goto backoff;
882 } else {
883 vm_object_unlock(object);
884 vm_object_deallocate(object);
9bccf70c 885 thread_interrupt_level(interruptible_state);
1c79356b
A
886 return VM_FAULT_RETRY;
887 }
888 }
889
890 /*
891 * If the desired access to this page has
892 * been locked out, request that it be unlocked.
893 */
894
895 if (access_required & m->page_lock) {
896 if ((access_required & m->unlock_request) != access_required) {
897 vm_prot_t new_unlock_request;
898 kern_return_t rc;
899
900#if TRACEFAULTPAGE
901 dbgTrace(0xBEEF000A, (unsigned int) m, (unsigned int) object->pager_ready); /* (TEST/DEBUG) */
902#endif
903 if (!object->pager_ready) {
904 XPR(XPR_VM_FAULT,
905 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
906 access_required,
907 (integer_t)object, offset,
908 (integer_t)m, 0);
909 /* take an extra ref */
910 assert(object->ref_count > 0);
911 object->ref_count++;
912 vm_object_res_reference(object);
913 vm_fault_cleanup(object,
914 first_m);
915 counter(c_vm_fault_page_block_backoff_kernel++);
916 vm_object_lock(object);
917 assert(object->ref_count > 0);
918 if (!object->pager_ready) {
9bccf70c 919 wait_result = vm_object_assert_wait(
1c79356b
A
920 object,
921 VM_OBJECT_EVENT_PAGER_READY,
922 interruptible);
923 vm_object_unlock(object);
9bccf70c
A
924 if (wait_result == THREAD_WAITING)
925 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
926 vm_object_deallocate(object);
927 goto backoff;
928 } else {
929 vm_object_unlock(object);
930 vm_object_deallocate(object);
9bccf70c 931 thread_interrupt_level(interruptible_state);
1c79356b
A
932 return VM_FAULT_RETRY;
933 }
934 }
935
936 new_unlock_request = m->unlock_request =
937 (access_required | m->unlock_request);
938 vm_object_unlock(object);
939 XPR(XPR_VM_FAULT,
940 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
941 (integer_t)object, offset,
942 (integer_t)m, new_unlock_request, 0);
943 if ((rc = memory_object_data_unlock(
944 object->pager,
1c79356b
A
945 offset + object->paging_offset,
946 PAGE_SIZE,
947 new_unlock_request))
948 != KERN_SUCCESS) {
949 if (vm_fault_debug)
950 printf("vm_fault: memory_object_data_unlock failed\n");
951 vm_object_lock(object);
952 vm_fault_cleanup(object, first_m);
9bccf70c 953 thread_interrupt_level(interruptible_state);
1c79356b
A
954 return((rc == MACH_SEND_INTERRUPTED) ?
955 VM_FAULT_INTERRUPTED :
956 VM_FAULT_MEMORY_ERROR);
957 }
958 vm_object_lock(object);
959 continue;
960 }
961
962 XPR(XPR_VM_FAULT,
963 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
964 access_required, (integer_t)object,
965 offset, (integer_t)m, 0);
966 /* take an extra ref so object won't die */
967 assert(object->ref_count > 0);
968 object->ref_count++;
969 vm_object_res_reference(object);
970 vm_fault_cleanup(object, first_m);
971 counter(c_vm_fault_page_block_backoff_kernel++);
972 vm_object_lock(object);
973 assert(object->ref_count > 0);
974 m = vm_page_lookup(object, offset);
975 if (m != VM_PAGE_NULL &&
976 (access_required & m->page_lock) &&
977 !((access_required & m->unlock_request) != access_required)) {
978 PAGE_ASSERT_WAIT(m, interruptible);
979 vm_object_unlock(object);
9bccf70c 980 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
981 vm_object_deallocate(object);
982 goto backoff;
983 } else {
984 vm_object_unlock(object);
985 vm_object_deallocate(object);
9bccf70c 986 thread_interrupt_level(interruptible_state);
1c79356b
A
987 return VM_FAULT_RETRY;
988 }
989 }
990 /*
991 * We mark the page busy and leave it on
992 * the pageout queues. If the pageout
993 * deamon comes across it, then it will
994 * remove the page.
995 */
996
997#if TRACEFAULTPAGE
998 dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
999#endif
1000
1001#if !VM_FAULT_STATIC_CONFIG
1002 if (!software_reference_bits) {
1003 vm_page_lock_queues();
1004 if (m->inactive)
1005 vm_stat.reactivations++;
1006
1007 VM_PAGE_QUEUES_REMOVE(m);
1008 vm_page_unlock_queues();
1009 }
1010#endif
1011 XPR(XPR_VM_FAULT,
1012 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
1013 (integer_t)object, offset, (integer_t)m, 0, 0);
1014 assert(!m->busy);
1015 m->busy = TRUE;
1016 assert(!m->absent);
1017 break;
1018 }
1019
1020 look_for_page =
1021 (object->pager_created) &&
1022 LOOK_FOR(object, offset) &&
1023 (!data_supply);
1024
1025#if TRACEFAULTPAGE
1026 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object); /* (TEST/DEBUG) */
1027#endif
1028 if ((look_for_page || (object == first_object))
0b4e3aa0
A
1029 && !must_be_resident
1030 && !(object->phys_contiguous)) {
1c79356b
A
1031 /*
1032 * Allocate a new page for this object/offset
1033 * pair.
1034 */
1035
1036 m = vm_page_grab_fictitious();
1037#if TRACEFAULTPAGE
1038 dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
1039#endif
1040 if (m == VM_PAGE_NULL) {
1041 vm_fault_cleanup(object, first_m);
9bccf70c 1042 thread_interrupt_level(interruptible_state);
1c79356b
A
1043 return(VM_FAULT_FICTITIOUS_SHORTAGE);
1044 }
1045 vm_page_insert(m, object, offset);
1046 }
1047
0b4e3aa0 1048 if ((look_for_page && !must_be_resident)) {
1c79356b
A
1049 kern_return_t rc;
1050
1051 /*
1052 * If the memory manager is not ready, we
1053 * cannot make requests.
1054 */
1055 if (!object->pager_ready) {
1056#if TRACEFAULTPAGE
1057 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
1058#endif
0b4e3aa0
A
1059 if(m != VM_PAGE_NULL)
1060 VM_PAGE_FREE(m);
1c79356b
A
1061 XPR(XPR_VM_FAULT,
1062 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
1063 (integer_t)object, offset, 0, 0, 0);
1064 /* take an extra ref so object won't die */
1065 assert(object->ref_count > 0);
1066 object->ref_count++;
1067 vm_object_res_reference(object);
1068 vm_fault_cleanup(object, first_m);
1069 counter(c_vm_fault_page_block_backoff_kernel++);
1070 vm_object_lock(object);
1071 assert(object->ref_count > 0);
1072 if (!object->pager_ready) {
9bccf70c 1073 wait_result = vm_object_assert_wait(object,
1c79356b
A
1074 VM_OBJECT_EVENT_PAGER_READY,
1075 interruptible);
1076 vm_object_unlock(object);
9bccf70c
A
1077 if (wait_result == THREAD_WAITING)
1078 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1079 vm_object_deallocate(object);
1080 goto backoff;
1081 } else {
1082 vm_object_unlock(object);
1083 vm_object_deallocate(object);
9bccf70c 1084 thread_interrupt_level(interruptible_state);
1c79356b
A
1085 return VM_FAULT_RETRY;
1086 }
1087 }
1088
0b4e3aa0
A
1089 if(object->phys_contiguous) {
1090 if(m != VM_PAGE_NULL) {
1091 VM_PAGE_FREE(m);
1092 m = VM_PAGE_NULL;
1093 }
1094 goto no_clustering;
1095 }
1c79356b
A
1096 if (object->internal) {
1097 /*
1098 * Requests to the default pager
1099 * must reserve a real page in advance,
1100 * because the pager's data-provided
1101 * won't block for pages. IMPORTANT:
1102 * this acts as a throttling mechanism
1103 * for data_requests to the default
1104 * pager.
1105 */
1106
1107#if TRACEFAULTPAGE
1108 dbgTrace(0xBEEF000F, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1109#endif
1110 if (m->fictitious && !vm_page_convert(m)) {
1111 VM_PAGE_FREE(m);
1112 vm_fault_cleanup(object, first_m);
9bccf70c 1113 thread_interrupt_level(interruptible_state);
1c79356b
A
1114 return(VM_FAULT_MEMORY_SHORTAGE);
1115 }
1116 } else if (object->absent_count >
1117 vm_object_absent_max) {
1118 /*
1119 * If there are too many outstanding page
1120 * requests pending on this object, we
1121 * wait for them to be resolved now.
1122 */
1123
1124#if TRACEFAULTPAGE
1125 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1126#endif
0b4e3aa0
A
1127 if(m != VM_PAGE_NULL)
1128 VM_PAGE_FREE(m);
1c79356b
A
1129 /* take an extra ref so object won't die */
1130 assert(object->ref_count > 0);
1131 object->ref_count++;
1132 vm_object_res_reference(object);
1133 vm_fault_cleanup(object, first_m);
1134 counter(c_vm_fault_page_block_backoff_kernel++);
1135 vm_object_lock(object);
1136 assert(object->ref_count > 0);
1137 if (object->absent_count > vm_object_absent_max) {
1138 vm_object_absent_assert_wait(object,
1139 interruptible);
1140 vm_object_unlock(object);
9bccf70c 1141 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1142 vm_object_deallocate(object);
1143 goto backoff;
1144 } else {
1145 vm_object_unlock(object);
1146 vm_object_deallocate(object);
9bccf70c 1147 thread_interrupt_level(interruptible_state);
1c79356b
A
1148 return VM_FAULT_RETRY;
1149 }
1150 }
1151
1152 /*
1153 * Indicate that the page is waiting for data
1154 * from the memory manager.
1155 */
1156
0b4e3aa0
A
1157 if(m != VM_PAGE_NULL) {
1158
1159 m->list_req_pending = TRUE;
1160 m->absent = TRUE;
1161 m->unusual = TRUE;
1162 object->absent_count++;
1163
1164 }
1c79356b 1165
9bccf70c 1166no_clustering:
1c79356b
A
1167 cluster_start = offset;
1168 length = PAGE_SIZE;
1c79356b 1169
0b4e3aa0
A
1170 /*
1171 * lengthen the cluster by the pages in the working set
1172 */
1173 if((map != NULL) &&
1174 (current_task()->dynamic_working_set != 0)) {
1175 cluster_end = cluster_start + length;
1176 /* tws values for start and end are just a
1177 * suggestions. Therefore, as long as
1178 * build_cluster does not use pointers or
1179 * take action based on values that
1180 * could be affected by re-entrance we
1181 * do not need to take the map lock.
1182 */
9bccf70c 1183 cluster_end = offset + PAGE_SIZE_64;
0b4e3aa0
A
1184 tws_build_cluster((tws_hash_t)
1185 current_task()->dynamic_working_set,
1186 object, &cluster_start,
9bccf70c 1187 &cluster_end, 0x40000);
0b4e3aa0
A
1188 length = cluster_end - cluster_start;
1189 }
1c79356b
A
1190#if TRACEFAULTPAGE
1191 dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0); /* (TEST/DEBUG) */
1192#endif
1193 /*
1194 * We have a busy page, so we can
1195 * release the object lock.
1196 */
1197 vm_object_unlock(object);
1198
1199 /*
1200 * Call the memory manager to retrieve the data.
1201 */
1202
1203 if (type_of_fault)
9bccf70c 1204 *type_of_fault = (length << 8) | DBG_PAGEIN_FAULT;
1c79356b
A
1205 VM_STAT(pageins++);
1206 current_task()->pageins++;
0b4e3aa0 1207 bumped_pagein = TRUE;
1c79356b
A
1208
1209 /*
1210 * If this object uses a copy_call strategy,
1211 * and we are interested in a copy of this object
1212 * (having gotten here only by following a
1213 * shadow chain), then tell the memory manager
1214 * via a flag added to the desired_access
1215 * parameter, so that it can detect a race
1216 * between our walking down the shadow chain
1217 * and its pushing pages up into a copy of
1218 * the object that it manages.
1219 */
1220
1221 if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL &&
1222 object != first_object) {
1223 wants_copy_flag = VM_PROT_WANTS_COPY;
1224 } else {
1225 wants_copy_flag = VM_PROT_NONE;
1226 }
1227
1228 XPR(XPR_VM_FAULT,
1229 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1230 (integer_t)object, offset, (integer_t)m,
1231 access_required | wants_copy_flag, 0);
1232
1c79356b 1233 rc = memory_object_data_request(object->pager,
1c79356b
A
1234 cluster_start + object->paging_offset,
1235 length,
1236 access_required | wants_copy_flag);
1237
1c79356b
A
1238
1239#if TRACEFAULTPAGE
1240 dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
1241#endif
1242 if (rc != KERN_SUCCESS) {
1243 if (rc != MACH_SEND_INTERRUPTED
1244 && vm_fault_debug)
0b4e3aa0 1245 printf("%s(0x%x, 0x%x, 0x%x, 0x%x) failed, rc=%d\n",
1c79356b
A
1246 "memory_object_data_request",
1247 object->pager,
1c79356b 1248 cluster_start + object->paging_offset,
0b4e3aa0 1249 length, access_required, rc);
1c79356b
A
1250 /*
1251 * Don't want to leave a busy page around,
1252 * but the data request may have blocked,
1253 * so check if it's still there and busy.
1254 */
0b4e3aa0
A
1255 if(!object->phys_contiguous) {
1256 vm_object_lock(object);
1257 for (; length; length -= PAGE_SIZE,
1258 cluster_start += PAGE_SIZE_64) {
1259 vm_page_t p;
1260 if ((p = vm_page_lookup(object,
1c79356b 1261 cluster_start))
0b4e3aa0
A
1262 && p->absent && p->busy
1263 && p != first_m) {
1264 VM_PAGE_FREE(p);
1265 }
1266 }
1c79356b
A
1267 }
1268 vm_fault_cleanup(object, first_m);
9bccf70c 1269 thread_interrupt_level(interruptible_state);
1c79356b
A
1270 return((rc == MACH_SEND_INTERRUPTED) ?
1271 VM_FAULT_INTERRUPTED :
1272 VM_FAULT_MEMORY_ERROR);
0b4e3aa0
A
1273 } else {
1274#ifdef notdefcdy
1275 tws_hash_line_t line;
1276 task_t task;
1277
1278 task = current_task();
1279
1280 if((map != NULL) &&
9bccf70c
A
1281 (task->dynamic_working_set != 0))
1282 && !(object->private)) {
1283 vm_object_t base_object;
1284 vm_object_offset_t base_offset;
1285 base_object = object;
1286 base_offset = offset;
1287 while(base_object->shadow) {
1288 base_offset +=
1289 base_object->shadow_offset;
1290 base_object =
1291 base_object->shadow;
1292 }
0b4e3aa0
A
1293 if(tws_lookup
1294 ((tws_hash_t)
1295 task->dynamic_working_set,
9bccf70c 1296 base_offset, base_object,
0b4e3aa0
A
1297 &line) == KERN_SUCCESS) {
1298 tws_line_signal((tws_hash_t)
1299 task->dynamic_working_set,
1300 map, line, vaddr);
1301 }
1302 }
1303#endif
1c79356b
A
1304 }
1305
1306 /*
1307 * Retry with same object/offset, since new data may
1308 * be in a different page (i.e., m is meaningless at
1309 * this point).
1310 */
1311 vm_object_lock(object);
1312 if ((interruptible != THREAD_UNINT) &&
1313 (current_thread()->state & TH_ABORT)) {
1314 vm_fault_cleanup(object, first_m);
9bccf70c 1315 thread_interrupt_level(interruptible_state);
1c79356b
A
1316 return(VM_FAULT_INTERRUPTED);
1317 }
0b4e3aa0
A
1318 if(m == VM_PAGE_NULL)
1319 break;
1c79356b
A
1320 continue;
1321 }
1322
1323 /*
1324 * The only case in which we get here is if
1325 * object has no pager (or unwiring). If the pager doesn't
1326 * have the page this is handled in the m->absent case above
1327 * (and if you change things here you should look above).
1328 */
1329#if TRACEFAULTPAGE
1330 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1331#endif
1332 if (object == first_object)
1333 first_m = m;
1334 else
1335 assert(m == VM_PAGE_NULL);
1336
1337 XPR(XPR_VM_FAULT,
1338 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1339 (integer_t)object, offset, (integer_t)m,
1340 (integer_t)object->shadow, 0);
1341 /*
1342 * Move on to the next object. Lock the next
1343 * object before unlocking the current one.
1344 */
1345 next_object = object->shadow;
1346 if (next_object == VM_OBJECT_NULL) {
1347 assert(!must_be_resident);
1348 /*
1349 * If there's no object left, fill the page
1350 * in the top object with zeros. But first we
1351 * need to allocate a real page.
1352 */
1353
1354 if (object != first_object) {
1355 vm_object_paging_end(object);
1356 vm_object_unlock(object);
1357
1358 object = first_object;
1359 offset = first_offset;
1360 vm_object_lock(object);
1361 }
1362
1363 m = first_m;
1364 assert(m->object == object);
1365 first_m = VM_PAGE_NULL;
1366
55e303ae
A
1367 if(m == VM_PAGE_NULL) {
1368 m = vm_page_grab();
1369 if (m == VM_PAGE_NULL) {
1370 vm_fault_cleanup(
1371 object, VM_PAGE_NULL);
1372 thread_interrupt_level(
1373 interruptible_state);
1374 return(VM_FAULT_MEMORY_SHORTAGE);
1375 }
1376 vm_page_insert(
1377 m, object, offset);
1378 }
1379
1c79356b
A
1380 if (object->shadow_severed) {
1381 VM_PAGE_FREE(m);
1382 vm_fault_cleanup(object, VM_PAGE_NULL);
9bccf70c 1383 thread_interrupt_level(interruptible_state);
1c79356b
A
1384 return VM_FAULT_MEMORY_ERROR;
1385 }
1386
55e303ae
A
1387 /*
1388 * are we protecting the system from
1389 * backing store exhaustion. If so
1390 * sleep unless we are privileged.
1391 */
1392
1393 if(vm_backing_store_low) {
1394 if(!(current_task()->priv_flags
1395 & VM_BACKING_STORE_PRIV)) {
1396 assert_wait((event_t)
1397 &vm_backing_store_low,
1398 THREAD_UNINT);
1399 VM_PAGE_FREE(m);
1400 vm_fault_cleanup(object, VM_PAGE_NULL);
1401 thread_block((void (*)(void)) 0);
1402 thread_interrupt_level(
1403 interruptible_state);
1404 return(VM_FAULT_RETRY);
1405 }
1406 }
1407
1c79356b
A
1408 if (VM_PAGE_THROTTLED() ||
1409 (m->fictitious && !vm_page_convert(m))) {
1410 VM_PAGE_FREE(m);
1411 vm_fault_cleanup(object, VM_PAGE_NULL);
9bccf70c 1412 thread_interrupt_level(interruptible_state);
1c79356b
A
1413 return(VM_FAULT_MEMORY_SHORTAGE);
1414 }
0b4e3aa0 1415 m->no_isync = FALSE;
1c79356b
A
1416
1417 if (!no_zero_fill) {
1418 vm_object_unlock(object);
1419 vm_page_zero_fill(m);
1c79356b
A
1420 vm_object_lock(object);
1421 }
55e303ae
A
1422 if (type_of_fault)
1423 *type_of_fault = DBG_ZERO_FILL_FAULT;
1424 VM_STAT(zero_fill_count++);
1425
1426 if (bumped_pagein == TRUE) {
1427 VM_STAT(pageins--);
1428 current_task()->pageins--;
1429 }
1430
1c79356b
A
1431 vm_page_lock_queues();
1432 VM_PAGE_QUEUES_REMOVE(m);
9bccf70c
A
1433 if(m->object->size > 0x80000) {
1434 m->zero_fill = TRUE;
1435 /* depends on the queues lock */
1436 vm_zf_count += 1;
1437 queue_enter(&vm_page_queue_zf,
1438 m, vm_page_t, pageq);
1439 } else {
1440 queue_enter(
1441 &vm_page_queue_inactive,
1442 m, vm_page_t, pageq);
1443 }
0b4e3aa0
A
1444 m->page_ticket = vm_page_ticket;
1445 vm_page_ticket_roll++;
1446 if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
1447 vm_page_ticket_roll = 0;
1448 if(vm_page_ticket ==
1449 VM_PAGE_TICKET_ROLL_IDS)
1450 vm_page_ticket= 0;
1451 else
1452 vm_page_ticket++;
1453 }
1c79356b
A
1454 m->inactive = TRUE;
1455 vm_page_inactive_count++;
1456 vm_page_unlock_queues();
55e303ae
A
1457#if 0
1458 pmap_clear_modify(m->phys_page);
1459#endif
1c79356b
A
1460 break;
1461 }
1462 else {
1463 if ((object != first_object) || must_be_resident)
1464 vm_object_paging_end(object);
1465 offset += object->shadow_offset;
1466 hi_offset += object->shadow_offset;
1467 lo_offset += object->shadow_offset;
1468 access_required = VM_PROT_READ;
1469 vm_object_lock(next_object);
1470 vm_object_unlock(object);
1471 object = next_object;
1472 vm_object_paging_begin(object);
1473 }
1474 }
1475
1476 /*
1477 * PAGE HAS BEEN FOUND.
1478 *
1479 * This page (m) is:
1480 * busy, so that we can play with it;
1481 * not absent, so that nobody else will fill it;
1482 * possibly eligible for pageout;
1483 *
1484 * The top-level page (first_m) is:
1485 * VM_PAGE_NULL if the page was found in the
1486 * top-level object;
1487 * busy, not absent, and ineligible for pageout.
1488 *
1489 * The current object (object) is locked. A paging
1490 * reference is held for the current and top-level
1491 * objects.
1492 */
1493
1494#if TRACEFAULTPAGE
1495 dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1496#endif
1497#if EXTRA_ASSERTIONS
0b4e3aa0
A
1498 if(m != VM_PAGE_NULL) {
1499 assert(m->busy && !m->absent);
1500 assert((first_m == VM_PAGE_NULL) ||
1501 (first_m->busy && !first_m->absent &&
1502 !first_m->active && !first_m->inactive));
1503 }
1c79356b
A
1504#endif /* EXTRA_ASSERTIONS */
1505
1506 XPR(XPR_VM_FAULT,
1507 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1508 (integer_t)object, offset, (integer_t)m,
1509 (integer_t)first_object, (integer_t)first_m);
1510 /*
1511 * If the page is being written, but isn't
1512 * already owned by the top-level object,
1513 * we have to copy it into a new page owned
1514 * by the top-level object.
1515 */
1516
0b4e3aa0 1517 if ((object != first_object) && (m != VM_PAGE_NULL)) {
1c79356b
A
1518 /*
1519 * We only really need to copy if we
1520 * want to write it.
1521 */
1522
1523#if TRACEFAULTPAGE
1524 dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1525#endif
1526 if (fault_type & VM_PROT_WRITE) {
1527 vm_page_t copy_m;
1528
1529 assert(!must_be_resident);
1530
55e303ae
A
1531 /*
1532 * are we protecting the system from
1533 * backing store exhaustion. If so
1534 * sleep unless we are privileged.
1535 */
1536
1537 if(vm_backing_store_low) {
1538 if(!(current_task()->priv_flags
1539 & VM_BACKING_STORE_PRIV)) {
1540 assert_wait((event_t)
1541 &vm_backing_store_low,
1542 THREAD_UNINT);
1543 RELEASE_PAGE(m);
1544 vm_fault_cleanup(object, first_m);
1545 thread_block((void (*)(void)) 0);
1546 thread_interrupt_level(
1547 interruptible_state);
1548 return(VM_FAULT_RETRY);
1549 }
1550 }
1551
1c79356b
A
1552 /*
1553 * If we try to collapse first_object at this
1554 * point, we may deadlock when we try to get
1555 * the lock on an intermediate object (since we
1556 * have the bottom object locked). We can't
1557 * unlock the bottom object, because the page
1558 * we found may move (by collapse) if we do.
1559 *
1560 * Instead, we first copy the page. Then, when
1561 * we have no more use for the bottom object,
1562 * we unlock it and try to collapse.
1563 *
1564 * Note that we copy the page even if we didn't
1565 * need to... that's the breaks.
1566 */
1567
1568 /*
1569 * Allocate a page for the copy
1570 */
1571 copy_m = vm_page_grab();
1572 if (copy_m == VM_PAGE_NULL) {
1573 RELEASE_PAGE(m);
1574 vm_fault_cleanup(object, first_m);
9bccf70c 1575 thread_interrupt_level(interruptible_state);
1c79356b
A
1576 return(VM_FAULT_MEMORY_SHORTAGE);
1577 }
1578
1579
1580 XPR(XPR_VM_FAULT,
1581 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1582 (integer_t)object, offset,
1583 (integer_t)m, (integer_t)copy_m, 0);
1584 vm_page_copy(m, copy_m);
1585
1586 /*
1587 * If another map is truly sharing this
1588 * page with us, we have to flush all
1589 * uses of the original page, since we
1590 * can't distinguish those which want the
1591 * original from those which need the
1592 * new copy.
1593 *
1594 * XXXO If we know that only one map has
1595 * access to this page, then we could
1596 * avoid the pmap_page_protect() call.
1597 */
1598
1599 vm_page_lock_queues();
1600 assert(!m->cleaning);
55e303ae 1601 pmap_page_protect(m->phys_page, VM_PROT_NONE);
1c79356b
A
1602 vm_page_deactivate(m);
1603 copy_m->dirty = TRUE;
1604 /*
1605 * Setting reference here prevents this fault from
1606 * being counted as a (per-thread) reactivate as well
1607 * as a copy-on-write.
1608 */
1609 first_m->reference = TRUE;
1610 vm_page_unlock_queues();
1611
1612 /*
1613 * We no longer need the old page or object.
1614 */
1615
1616 PAGE_WAKEUP_DONE(m);
1617 vm_object_paging_end(object);
1618 vm_object_unlock(object);
1619
1620 if (type_of_fault)
1621 *type_of_fault = DBG_COW_FAULT;
1622 VM_STAT(cow_faults++);
1623 current_task()->cow_faults++;
1624 object = first_object;
1625 offset = first_offset;
1626
1627 vm_object_lock(object);
1628 VM_PAGE_FREE(first_m);
1629 first_m = VM_PAGE_NULL;
1630 assert(copy_m->busy);
1631 vm_page_insert(copy_m, object, offset);
1632 m = copy_m;
1633
1634 /*
1635 * Now that we've gotten the copy out of the
1636 * way, let's try to collapse the top object.
1637 * But we have to play ugly games with
1638 * paging_in_progress to do that...
1639 */
1640
1641 vm_object_paging_end(object);
55e303ae 1642 vm_object_collapse(object, offset);
1c79356b
A
1643 vm_object_paging_begin(object);
1644
1645 }
1646 else {
1647 *protection &= (~VM_PROT_WRITE);
1648 }
1649 }
1650
1651 /*
1652 * Now check whether the page needs to be pushed into the
1653 * copy object. The use of asymmetric copy on write for
1654 * shared temporary objects means that we may do two copies to
1655 * satisfy the fault; one above to get the page from a
1656 * shadowed object, and one here to push it into the copy.
1657 */
1658
9bccf70c 1659 while ((copy_object = first_object->copy) != VM_OBJECT_NULL &&
0b4e3aa0 1660 (m!= VM_PAGE_NULL)) {
1c79356b
A
1661 vm_object_offset_t copy_offset;
1662 vm_page_t copy_m;
1663
1664#if TRACEFAULTPAGE
1665 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1666#endif
1667 /*
1668 * If the page is being written, but hasn't been
1669 * copied to the copy-object, we have to copy it there.
1670 */
1671
1672 if ((fault_type & VM_PROT_WRITE) == 0) {
1673 *protection &= ~VM_PROT_WRITE;
1674 break;
1675 }
1676
1677 /*
1678 * If the page was guaranteed to be resident,
1679 * we must have already performed the copy.
1680 */
1681
1682 if (must_be_resident)
1683 break;
1684
1685 /*
1686 * Try to get the lock on the copy_object.
1687 */
1688 if (!vm_object_lock_try(copy_object)) {
1689 vm_object_unlock(object);
1690
1691 mutex_pause(); /* wait a bit */
1692
1693 vm_object_lock(object);
1694 continue;
1695 }
1696
1697 /*
1698 * Make another reference to the copy-object,
1699 * to keep it from disappearing during the
1700 * copy.
1701 */
1702 assert(copy_object->ref_count > 0);
1703 copy_object->ref_count++;
1704 VM_OBJ_RES_INCR(copy_object);
1705
1706 /*
1707 * Does the page exist in the copy?
1708 */
1709 copy_offset = first_offset - copy_object->shadow_offset;
1710 if (copy_object->size <= copy_offset)
1711 /*
1712 * Copy object doesn't cover this page -- do nothing.
1713 */
1714 ;
1715 else if ((copy_m =
1716 vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
1717 /* Page currently exists in the copy object */
1718 if (copy_m->busy) {
1719 /*
1720 * If the page is being brought
1721 * in, wait for it and then retry.
1722 */
1723 RELEASE_PAGE(m);
1724 /* take an extra ref so object won't die */
1725 assert(copy_object->ref_count > 0);
1726 copy_object->ref_count++;
1727 vm_object_res_reference(copy_object);
1728 vm_object_unlock(copy_object);
1729 vm_fault_cleanup(object, first_m);
1730 counter(c_vm_fault_page_block_backoff_kernel++);
1731 vm_object_lock(copy_object);
1732 assert(copy_object->ref_count > 0);
1733 VM_OBJ_RES_DECR(copy_object);
1734 copy_object->ref_count--;
1735 assert(copy_object->ref_count > 0);
1736 copy_m = vm_page_lookup(copy_object, copy_offset);
1737 if (copy_m != VM_PAGE_NULL && copy_m->busy) {
1738 PAGE_ASSERT_WAIT(copy_m, interruptible);
1739 vm_object_unlock(copy_object);
9bccf70c 1740 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1741 vm_object_deallocate(copy_object);
1742 goto backoff;
1743 } else {
1744 vm_object_unlock(copy_object);
1745 vm_object_deallocate(copy_object);
9bccf70c 1746 thread_interrupt_level(interruptible_state);
1c79356b
A
1747 return VM_FAULT_RETRY;
1748 }
1749 }
1750 }
1751 else if (!PAGED_OUT(copy_object, copy_offset)) {
1752 /*
1753 * If PAGED_OUT is TRUE, then the page used to exist
1754 * in the copy-object, and has already been paged out.
1755 * We don't need to repeat this. If PAGED_OUT is
1756 * FALSE, then either we don't know (!pager_created,
1757 * for example) or it hasn't been paged out.
1758 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1759 * We must copy the page to the copy object.
1760 */
1761
55e303ae
A
1762 /*
1763 * are we protecting the system from
1764 * backing store exhaustion. If so
1765 * sleep unless we are privileged.
1766 */
1767
1768 if(vm_backing_store_low) {
1769 if(!(current_task()->priv_flags
1770 & VM_BACKING_STORE_PRIV)) {
1771 assert_wait((event_t)
1772 &vm_backing_store_low,
1773 THREAD_UNINT);
1774 RELEASE_PAGE(m);
1775 VM_OBJ_RES_DECR(copy_object);
1776 copy_object->ref_count--;
1777 assert(copy_object->ref_count > 0);
1778 vm_object_unlock(copy_object);
1779 vm_fault_cleanup(object, first_m);
1780 thread_block((void (*)(void)) 0);
1781 thread_interrupt_level(
1782 interruptible_state);
1783 return(VM_FAULT_RETRY);
1784 }
1785 }
1786
1c79356b
A
1787 /*
1788 * Allocate a page for the copy
1789 */
1790 copy_m = vm_page_alloc(copy_object, copy_offset);
1791 if (copy_m == VM_PAGE_NULL) {
1792 RELEASE_PAGE(m);
1793 VM_OBJ_RES_DECR(copy_object);
1794 copy_object->ref_count--;
1795 assert(copy_object->ref_count > 0);
1796 vm_object_unlock(copy_object);
1797 vm_fault_cleanup(object, first_m);
9bccf70c 1798 thread_interrupt_level(interruptible_state);
1c79356b
A
1799 return(VM_FAULT_MEMORY_SHORTAGE);
1800 }
1801
1802 /*
1803 * Must copy page into copy-object.
1804 */
1805
1806 vm_page_copy(m, copy_m);
1807
1808 /*
1809 * If the old page was in use by any users
1810 * of the copy-object, it must be removed
1811 * from all pmaps. (We can't know which
1812 * pmaps use it.)
1813 */
1814
1815 vm_page_lock_queues();
1816 assert(!m->cleaning);
55e303ae 1817 pmap_page_protect(m->phys_page, VM_PROT_NONE);
1c79356b
A
1818 copy_m->dirty = TRUE;
1819 vm_page_unlock_queues();
1820
1821 /*
1822 * If there's a pager, then immediately
1823 * page out this page, using the "initialize"
1824 * option. Else, we use the copy.
1825 */
1826
1827 if
1828#if MACH_PAGEMAP
1829 ((!copy_object->pager_created) ||
1830 vm_external_state_get(
1831 copy_object->existence_map, copy_offset)
1832 == VM_EXTERNAL_STATE_ABSENT)
1833#else
1834 (!copy_object->pager_created)
1835#endif
1836 {
1837 vm_page_lock_queues();
1838 vm_page_activate(copy_m);
1839 vm_page_unlock_queues();
1840 PAGE_WAKEUP_DONE(copy_m);
1841 }
1842 else {
1843 assert(copy_m->busy == TRUE);
1844
1845 /*
1846 * The page is already ready for pageout:
1847 * not on pageout queues and busy.
1848 * Unlock everything except the
1849 * copy_object itself.
1850 */
1851
1852 vm_object_unlock(object);
1853
1854 /*
1855 * Write the page to the copy-object,
1856 * flushing it from the kernel.
1857 */
1858
1859 vm_pageout_initialize_page(copy_m);
1860
1861 /*
1862 * Since the pageout may have
1863 * temporarily dropped the
1864 * copy_object's lock, we
1865 * check whether we'll have
1866 * to deallocate the hard way.
1867 */
1868
1869 if ((copy_object->shadow != object) ||
1870 (copy_object->ref_count == 1)) {
1871 vm_object_unlock(copy_object);
1872 vm_object_deallocate(copy_object);
1873 vm_object_lock(object);
1874 continue;
1875 }
1876
1877 /*
1878 * Pick back up the old object's
1879 * lock. [It is safe to do so,
1880 * since it must be deeper in the
1881 * object tree.]
1882 */
1883
1884 vm_object_lock(object);
1885 }
1886
1887 /*
1888 * Because we're pushing a page upward
1889 * in the object tree, we must restart
1890 * any faults that are waiting here.
1891 * [Note that this is an expansion of
1892 * PAGE_WAKEUP that uses the THREAD_RESTART
1893 * wait result]. Can't turn off the page's
1894 * busy bit because we're not done with it.
1895 */
1896
1897 if (m->wanted) {
1898 m->wanted = FALSE;
1899 thread_wakeup_with_result((event_t) m,
1900 THREAD_RESTART);
1901 }
1902 }
1903
1904 /*
1905 * The reference count on copy_object must be
1906 * at least 2: one for our extra reference,
1907 * and at least one from the outside world
1908 * (we checked that when we last locked
1909 * copy_object).
1910 */
1911 copy_object->ref_count--;
1912 assert(copy_object->ref_count > 0);
1913 VM_OBJ_RES_DECR(copy_object);
1914 vm_object_unlock(copy_object);
1915
1916 break;
1917 }
1918
1919 *result_page = m;
1920 *top_page = first_m;
1921
1922 XPR(XPR_VM_FAULT,
1923 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1924 (integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
1925 /*
1926 * If the page can be written, assume that it will be.
1927 * [Earlier, we restrict the permission to allow write
1928 * access only if the fault so required, so we don't
1929 * mark read-only data as dirty.]
1930 */
1931
55e303ae
A
1932
1933 if(m != VM_PAGE_NULL) {
1c79356b 1934#if !VM_FAULT_STATIC_CONFIG
55e303ae
A
1935 if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE))
1936 m->dirty = TRUE;
1c79356b 1937#endif
55e303ae
A
1938 if (vm_page_deactivate_behind)
1939 vm_fault_deactivate_behind(object, offset, behavior);
1940 } else {
1941 vm_object_unlock(object);
1c79356b 1942 }
55e303ae
A
1943 thread_interrupt_level(interruptible_state);
1944
1c79356b
A
1945#if TRACEFAULTPAGE
1946 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0); /* (TEST/DEBUG) */
1947#endif
1c79356b
A
1948 return(VM_FAULT_SUCCESS);
1949
1950#if 0
1951 block_and_backoff:
1952 vm_fault_cleanup(object, first_m);
1953
1954 counter(c_vm_fault_page_block_backoff_kernel++);
9bccf70c 1955 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1956#endif
1957
1958 backoff:
9bccf70c 1959 thread_interrupt_level(interruptible_state);
1c79356b
A
1960 if (wait_result == THREAD_INTERRUPTED)
1961 return VM_FAULT_INTERRUPTED;
1962 return VM_FAULT_RETRY;
1963
1964#undef RELEASE_PAGE
1965}
1966
55e303ae
A
1967/*
1968 * Routine: vm_fault_tws_insert
1969 * Purpose:
1970 * Add fault information to the task working set.
1971 * Implementation:
1972 * We always insert the base object/offset pair
1973 * rather the actual object/offset.
1974 * Assumptions:
1975 * Map and pmap_map locked.
1976 * Object locked and referenced.
1977 * Returns:
1978 * TRUE if startup file should be written.
1979 * With object locked and still referenced.
1980 * But we may drop the object lock temporarily.
1981 */
1982static boolean_t
1983vm_fault_tws_insert(
1984 vm_map_t map,
1985 vm_map_t pmap_map,
1986 vm_offset_t vaddr,
1987 vm_object_t object,
1988 vm_object_offset_t offset)
1989{
1990 tws_hash_line_t line;
1991 task_t task;
1992 kern_return_t kr;
1993 boolean_t result = FALSE;
1994 extern vm_map_t kalloc_map;
1995
1996 /* Avoid possible map lock deadlock issues */
1997 if (map == kernel_map || map == kalloc_map ||
1998 pmap_map == kernel_map || pmap_map == kalloc_map)
1999 return result;
2000
2001 task = current_task();
2002 if (task->dynamic_working_set != 0) {
2003 vm_object_t base_object;
2004 vm_object_t base_shadow;
2005 vm_object_offset_t base_offset;
2006 base_object = object;
2007 base_offset = offset;
2008 while(base_shadow = base_object->shadow) {
2009 vm_object_lock(base_shadow);
2010 vm_object_unlock(base_object);
2011 base_offset +=
2012 base_object->shadow_offset;
2013 base_object = base_shadow;
2014 }
2015 kr = tws_lookup((tws_hash_t)
2016 task->dynamic_working_set,
2017 base_offset, base_object,
2018 &line);
2019 if (kr == KERN_OPERATION_TIMED_OUT){
2020 result = TRUE;
2021 if (base_object != object) {
2022 vm_object_unlock(base_object);
2023 vm_object_lock(object);
2024 }
2025 } else if (kr != KERN_SUCCESS) {
2026 if(base_object != object)
2027 vm_object_reference_locked(base_object);
2028 kr = tws_insert((tws_hash_t)
2029 task->dynamic_working_set,
2030 base_offset, base_object,
2031 vaddr, pmap_map);
2032 if(base_object != object) {
2033 vm_object_unlock(base_object);
2034 vm_object_deallocate(base_object);
2035 }
2036 if(kr == KERN_NO_SPACE) {
2037 if (base_object == object)
2038 vm_object_unlock(object);
2039 tws_expand_working_set(
2040 task->dynamic_working_set,
2041 TWS_HASH_LINE_COUNT,
2042 FALSE);
2043 if (base_object == object)
2044 vm_object_lock(object);
2045 } else if(kr == KERN_OPERATION_TIMED_OUT) {
2046 result = TRUE;
2047 }
2048 if(base_object != object)
2049 vm_object_lock(object);
2050 } else if (base_object != object) {
2051 vm_object_unlock(base_object);
2052 vm_object_lock(object);
2053 }
2054 }
2055 return result;
2056}
2057
1c79356b
A
2058/*
2059 * Routine: vm_fault
2060 * Purpose:
2061 * Handle page faults, including pseudo-faults
2062 * used to change the wiring status of pages.
2063 * Returns:
2064 * Explicit continuations have been removed.
2065 * Implementation:
2066 * vm_fault and vm_fault_page save mucho state
2067 * in the moral equivalent of a closure. The state
2068 * structure is allocated when first entering vm_fault
2069 * and deallocated when leaving vm_fault.
2070 */
2071
2072kern_return_t
2073vm_fault(
2074 vm_map_t map,
2075 vm_offset_t vaddr,
2076 vm_prot_t fault_type,
2077 boolean_t change_wiring,
9bccf70c
A
2078 int interruptible,
2079 pmap_t caller_pmap,
2080 vm_offset_t caller_pmap_addr)
1c79356b
A
2081{
2082 vm_map_version_t version; /* Map version for verificiation */
2083 boolean_t wired; /* Should mapping be wired down? */
2084 vm_object_t object; /* Top-level object */
2085 vm_object_offset_t offset; /* Top-level offset */
2086 vm_prot_t prot; /* Protection for mapping */
2087 vm_behavior_t behavior; /* Expected paging behavior */
2088 vm_object_offset_t lo_offset, hi_offset;
2089 vm_object_t old_copy_object; /* Saved copy object */
2090 vm_page_t result_page; /* Result of vm_fault_page */
2091 vm_page_t top_page; /* Placeholder page */
2092 kern_return_t kr;
2093
2094 register
2095 vm_page_t m; /* Fast access to result_page */
2096 kern_return_t error_code; /* page error reasons */
2097 register
2098 vm_object_t cur_object;
2099 register
2100 vm_object_offset_t cur_offset;
2101 vm_page_t cur_m;
2102 vm_object_t new_object;
2103 int type_of_fault;
2104 vm_map_t pmap_map = map;
2105 vm_map_t original_map = map;
2106 pmap_t pmap = NULL;
2107 boolean_t funnel_set = FALSE;
2108 funnel_t *curflock;
2109 thread_t cur_thread;
2110 boolean_t interruptible_state;
9bccf70c
A
2111 unsigned int cache_attr;
2112 int write_startup_file = 0;
2113 vm_prot_t full_fault_type;
1c79356b 2114
55e303ae
A
2115 if (get_preemption_level() != 0)
2116 return (KERN_FAILURE);
de355530 2117
1c79356b
A
2118 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_START,
2119 vaddr,
2120 0,
2121 0,
2122 0,
2123 0);
2124
9bccf70c
A
2125 /* at present we do not fully check for execute permission */
2126 /* we generally treat it is read except in certain device */
2127 /* memory settings */
2128 full_fault_type = fault_type;
2129 if(fault_type & VM_PROT_EXECUTE) {
2130 fault_type &= ~VM_PROT_EXECUTE;
2131 fault_type |= VM_PROT_READ;
2132 }
1c79356b 2133
9bccf70c 2134 interruptible_state = thread_interrupt_level(interruptible);
1c79356b
A
2135
2136 /*
2137 * assume we will hit a page in the cache
2138 * otherwise, explicitly override with
2139 * the real fault type once we determine it
2140 */
2141 type_of_fault = DBG_CACHE_HIT_FAULT;
2142
2143 VM_STAT(faults++);
2144 current_task()->faults++;
2145
2146 /*
2147 * drop funnel if it is already held. Then restore while returning
2148 */
55e303ae
A
2149 cur_thread = current_thread();
2150
1c79356b
A
2151 if ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED) {
2152 funnel_set = TRUE;
2153 curflock = cur_thread->funnel_lock;
2154 thread_funnel_set( curflock , FALSE);
2155 }
2156
2157 RetryFault: ;
2158
2159 /*
2160 * Find the backing store object and offset into
2161 * it to begin the search.
2162 */
2163 map = original_map;
2164 vm_map_lock_read(map);
2165 kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version,
2166 &object, &offset,
2167 &prot, &wired,
2168 &behavior, &lo_offset, &hi_offset, &pmap_map);
2169
2170 pmap = pmap_map->pmap;
2171
2172 if (kr != KERN_SUCCESS) {
2173 vm_map_unlock_read(map);
2174 goto done;
2175 }
2176
2177 /*
2178 * If the page is wired, we must fault for the current protection
2179 * value, to avoid further faults.
2180 */
2181
2182 if (wired)
2183 fault_type = prot | VM_PROT_WRITE;
2184
2185#if VM_FAULT_CLASSIFY
2186 /*
2187 * Temporary data gathering code
2188 */
2189 vm_fault_classify(object, offset, fault_type);
2190#endif
2191 /*
2192 * Fast fault code. The basic idea is to do as much as
2193 * possible while holding the map lock and object locks.
2194 * Busy pages are not used until the object lock has to
2195 * be dropped to do something (copy, zero fill, pmap enter).
2196 * Similarly, paging references aren't acquired until that
2197 * point, and object references aren't used.
2198 *
2199 * If we can figure out what to do
2200 * (zero fill, copy on write, pmap enter) while holding
2201 * the locks, then it gets done. Otherwise, we give up,
2202 * and use the original fault path (which doesn't hold
2203 * the map lock, and relies on busy pages).
2204 * The give up cases include:
2205 * - Have to talk to pager.
2206 * - Page is busy, absent or in error.
2207 * - Pager has locked out desired access.
2208 * - Fault needs to be restarted.
2209 * - Have to push page into copy object.
2210 *
2211 * The code is an infinite loop that moves one level down
2212 * the shadow chain each time. cur_object and cur_offset
2213 * refer to the current object being examined. object and offset
2214 * are the original object from the map. The loop is at the
2215 * top level if and only if object and cur_object are the same.
2216 *
2217 * Invariants: Map lock is held throughout. Lock is held on
2218 * original object and cur_object (if different) when
2219 * continuing or exiting loop.
2220 *
2221 */
2222
2223
2224 /*
2225 * If this page is to be inserted in a copy delay object
2226 * for writing, and if the object has a copy, then the
2227 * copy delay strategy is implemented in the slow fault page.
2228 */
2229 if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY ||
2230 object->copy == VM_OBJECT_NULL ||
2231 (fault_type & VM_PROT_WRITE) == 0) {
2232 cur_object = object;
2233 cur_offset = offset;
2234
2235 while (TRUE) {
2236 m = vm_page_lookup(cur_object, cur_offset);
2237 if (m != VM_PAGE_NULL) {
55e303ae 2238 if (m->busy) {
143cc14e
A
2239 wait_result_t result;
2240
2241 if (object != cur_object)
2242 vm_object_unlock(object);
2243
2244 vm_map_unlock_read(map);
2245 if (pmap_map != map)
2246 vm_map_unlock(pmap_map);
2247
2248#if !VM_FAULT_STATIC_CONFIG
2249 if (!vm_fault_interruptible)
2250 interruptible = THREAD_UNINT;
2251#endif
2252 result = PAGE_ASSERT_WAIT(m, interruptible);
1c79356b 2253
143cc14e
A
2254 vm_object_unlock(cur_object);
2255
2256 if (result == THREAD_WAITING) {
2257 result = thread_block(THREAD_CONTINUE_NULL);
2258
2259 counter(c_vm_fault_page_block_busy_kernel++);
2260 }
2261 if (result == THREAD_AWAKENED || result == THREAD_RESTART)
2262 goto RetryFault;
2263
2264 kr = KERN_ABORTED;
2265 goto done;
2266 }
0b4e3aa0
A
2267 if (m->unusual && (m->error || m->restart || m->private
2268 || m->absent || (fault_type & m->page_lock))) {
1c79356b 2269
143cc14e 2270 /*
1c79356b
A
2271 * Unusual case. Give up.
2272 */
2273 break;
2274 }
2275
2276 /*
2277 * Two cases of map in faults:
2278 * - At top level w/o copy object.
2279 * - Read fault anywhere.
2280 * --> must disallow write.
2281 */
2282
2283 if (object == cur_object &&
2284 object->copy == VM_OBJECT_NULL)
2285 goto FastMapInFault;
2286
2287 if ((fault_type & VM_PROT_WRITE) == 0) {
55e303ae 2288 boolean_t sequential;
1c79356b
A
2289
2290 prot &= ~VM_PROT_WRITE;
2291
2292 /*
2293 * Set up to map the page ...
2294 * mark the page busy, drop
2295 * locks and take a paging reference
2296 * on the object with the page.
2297 */
2298
2299 if (object != cur_object) {
2300 vm_object_unlock(object);
2301 object = cur_object;
2302 }
2303FastMapInFault:
2304 m->busy = TRUE;
2305
2306 vm_object_paging_begin(object);
1c79356b
A
2307
2308FastPmapEnter:
2309 /*
2310 * Check a couple of global reasons to
2311 * be conservative about write access.
2312 * Then do the pmap_enter.
2313 */
2314#if !VM_FAULT_STATIC_CONFIG
2315 if (vm_fault_dirty_handling
2316#if MACH_KDB
2317 || db_watchpoint_list
2318#endif
2319 && (fault_type & VM_PROT_WRITE) == 0)
2320 prot &= ~VM_PROT_WRITE;
2321#else /* STATIC_CONFIG */
2322#if MACH_KDB
2323 if (db_watchpoint_list
2324 && (fault_type & VM_PROT_WRITE) == 0)
2325 prot &= ~VM_PROT_WRITE;
2326#endif /* MACH_KDB */
2327#endif /* STATIC_CONFIG */
55e303ae
A
2328 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2329
2330 sequential = FALSE;
de355530 2331 if (m->no_isync == TRUE) {
143cc14e 2332 m->no_isync = FALSE;
55e303ae
A
2333 pmap_sync_caches_phys(m->phys_page);
2334 if (type_of_fault == DBG_CACHE_HIT_FAULT) {
2335 /*
2336 * found it in the cache, but this
2337 * is the first fault-in of the page (no_isync == TRUE)
2338 * so it must have come in as part of
2339 * a cluster... account 1 pagein against it
2340 */
2341 VM_STAT(pageins++);
2342 current_task()->pageins++;
2343 type_of_fault = DBG_PAGEIN_FAULT;
2344 sequential = TRUE;
2345 }
2346 } else if (cache_attr != VM_WIMG_DEFAULT) {
2347 pmap_sync_caches_phys(m->phys_page);
143cc14e 2348 }
0b4e3aa0 2349
9bccf70c
A
2350 if(caller_pmap) {
2351 PMAP_ENTER(caller_pmap,
2352 caller_pmap_addr, m,
2353 prot, cache_attr, wired);
2354 } else {
2355 PMAP_ENTER(pmap, vaddr, m,
2356 prot, cache_attr, wired);
2357 }
0b4e3aa0 2358
1c79356b 2359 /*
55e303ae 2360 * Hold queues lock to manipulate
1c79356b
A
2361 * the page queues. Change wiring
2362 * case is obvious. In soft ref bits
2363 * case activate page only if it fell
2364 * off paging queues, otherwise just
2365 * activate it if it's inactive.
2366 *
2367 * NOTE: original vm_fault code will
2368 * move active page to back of active
2369 * queue. This code doesn't.
2370 */
1c79356b 2371 vm_page_lock_queues();
765c9de3
A
2372 if (m->clustered) {
2373 vm_pagein_cluster_used++;
2374 m->clustered = FALSE;
2375 }
1c79356b
A
2376 m->reference = TRUE;
2377
2378 if (change_wiring) {
2379 if (wired)
2380 vm_page_wire(m);
2381 else
2382 vm_page_unwire(m);
2383 }
2384#if VM_FAULT_STATIC_CONFIG
2385 else {
2386 if (!m->active && !m->inactive)
2387 vm_page_activate(m);
2388 }
2389#else
2390 else if (software_reference_bits) {
2391 if (!m->active && !m->inactive)
2392 vm_page_activate(m);
2393 }
2394 else if (!m->active) {
2395 vm_page_activate(m);
2396 }
2397#endif
2398 vm_page_unlock_queues();
2399
2400 /*
2401 * That's it, clean up and return.
2402 */
2403 PAGE_WAKEUP_DONE(m);
143cc14e 2404
55e303ae
A
2405 sequential = (sequential && vm_page_deactivate_behind) ?
2406 vm_fault_deactivate_behind(object, cur_offset, behavior) :
2407 FALSE;
2408
2409 /*
2410 * Add non-sequential pages to the working set.
2411 * The sequential pages will be brought in through
2412 * normal clustering behavior.
2413 */
2414 if (!sequential && !object->private) {
2415 write_startup_file =
2416 vm_fault_tws_insert(map, pmap_map, vaddr,
2417 object, cur_offset);
143cc14e 2418 }
55e303ae
A
2419
2420 vm_object_paging_end(object);
1c79356b 2421 vm_object_unlock(object);
143cc14e 2422
1c79356b
A
2423 vm_map_unlock_read(map);
2424 if(pmap_map != map)
2425 vm_map_unlock(pmap_map);
2426
9bccf70c
A
2427 if(write_startup_file)
2428 tws_send_startup_info(current_task());
2429
143cc14e 2430 if (funnel_set)
1c79356b 2431 thread_funnel_set( curflock, TRUE);
143cc14e 2432
9bccf70c 2433 thread_interrupt_level(interruptible_state);
1c79356b 2434
143cc14e 2435
1c79356b
A
2436 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2437 vaddr,
9bccf70c 2438 type_of_fault & 0xff,
1c79356b 2439 KERN_SUCCESS,
9bccf70c 2440 type_of_fault >> 8,
1c79356b 2441 0);
143cc14e 2442
1c79356b
A
2443 return KERN_SUCCESS;
2444 }
2445
2446 /*
2447 * Copy on write fault. If objects match, then
2448 * object->copy must not be NULL (else control
2449 * would be in previous code block), and we
2450 * have a potential push into the copy object
2451 * with which we won't cope here.
2452 */
2453
2454 if (cur_object == object)
2455 break;
1c79356b
A
2456 /*
2457 * This is now a shadow based copy on write
2458 * fault -- it requires a copy up the shadow
2459 * chain.
2460 *
2461 * Allocate a page in the original top level
2462 * object. Give up if allocate fails. Also
2463 * need to remember current page, as it's the
2464 * source of the copy.
2465 */
2466 cur_m = m;
2467 m = vm_page_grab();
2468 if (m == VM_PAGE_NULL) {
2469 break;
2470 }
1c79356b
A
2471 /*
2472 * Now do the copy. Mark the source busy
2473 * and take out paging references on both
2474 * objects.
2475 *
2476 * NOTE: This code holds the map lock across
2477 * the page copy.
2478 */
2479
2480 cur_m->busy = TRUE;
2481 vm_page_copy(cur_m, m);
2482 vm_page_insert(m, object, offset);
2483
2484 vm_object_paging_begin(cur_object);
2485 vm_object_paging_begin(object);
2486
2487 type_of_fault = DBG_COW_FAULT;
2488 VM_STAT(cow_faults++);
2489 current_task()->cow_faults++;
2490
2491 /*
2492 * Now cope with the source page and object
2493 * If the top object has a ref count of 1
2494 * then no other map can access it, and hence
2495 * it's not necessary to do the pmap_page_protect.
2496 */
2497
2498
2499 vm_page_lock_queues();
2500 vm_page_deactivate(cur_m);
2501 m->dirty = TRUE;
55e303ae 2502 pmap_page_protect(cur_m->phys_page,
1c79356b
A
2503 VM_PROT_NONE);
2504 vm_page_unlock_queues();
2505
2506 PAGE_WAKEUP_DONE(cur_m);
2507 vm_object_paging_end(cur_object);
2508 vm_object_unlock(cur_object);
2509
2510 /*
2511 * Slight hack to call vm_object collapse
2512 * and then reuse common map in code.
2513 * note that the object lock was taken above.
2514 */
2515
2516 vm_object_paging_end(object);
55e303ae 2517 vm_object_collapse(object, offset);
1c79356b 2518 vm_object_paging_begin(object);
1c79356b
A
2519
2520 goto FastPmapEnter;
2521 }
2522 else {
2523
2524 /*
2525 * No page at cur_object, cur_offset
2526 */
2527
2528 if (cur_object->pager_created) {
2529
2530 /*
2531 * Have to talk to the pager. Give up.
2532 */
1c79356b
A
2533 break;
2534 }
2535
2536
2537 if (cur_object->shadow == VM_OBJECT_NULL) {
2538
2539 if (cur_object->shadow_severed) {
2540 vm_object_paging_end(object);
2541 vm_object_unlock(object);
2542 vm_map_unlock_read(map);
2543 if(pmap_map != map)
2544 vm_map_unlock(pmap_map);
2545
9bccf70c
A
2546 if(write_startup_file)
2547 tws_send_startup_info(
2548 current_task());
2549
1c79356b
A
2550 if (funnel_set) {
2551 thread_funnel_set( curflock, TRUE);
2552 funnel_set = FALSE;
2553 }
9bccf70c 2554 thread_interrupt_level(interruptible_state);
1c79356b
A
2555
2556 return VM_FAULT_MEMORY_ERROR;
2557 }
2558
2559 /*
2560 * Zero fill fault. Page gets
2561 * filled in top object. Insert
2562 * page, then drop any lower lock.
2563 * Give up if no page.
2564 */
55e303ae
A
2565 if (VM_PAGE_THROTTLED()) {
2566 break;
2567 }
2568
2569 /*
2570 * are we protecting the system from
2571 * backing store exhaustion. If so
2572 * sleep unless we are privileged.
2573 */
2574 if(vm_backing_store_low) {
2575 if(!(current_task()->priv_flags
2576 & VM_BACKING_STORE_PRIV))
1c79356b
A
2577 break;
2578 }
2579 m = vm_page_alloc(object, offset);
2580 if (m == VM_PAGE_NULL) {
2581 break;
2582 }
0b4e3aa0
A
2583 /*
2584 * This is a zero-fill or initial fill
2585 * page fault. As such, we consider it
2586 * undefined with respect to instruction
2587 * execution. i.e. it is the responsibility
2588 * of higher layers to call for an instruction
2589 * sync after changing the contents and before
2590 * sending a program into this area. We
2591 * choose this approach for performance
2592 */
2593
2594 m->no_isync = FALSE;
1c79356b
A
2595
2596 if (cur_object != object)
2597 vm_object_unlock(cur_object);
2598
2599 vm_object_paging_begin(object);
2600 vm_object_unlock(object);
2601
2602 /*
2603 * Now zero fill page and map it.
2604 * the page is probably going to
2605 * be written soon, so don't bother
2606 * to clear the modified bit
2607 *
2608 * NOTE: This code holds the map
2609 * lock across the zero fill.
2610 */
2611
2612 if (!map->no_zero_fill) {
2613 vm_page_zero_fill(m);
2614 type_of_fault = DBG_ZERO_FILL_FAULT;
2615 VM_STAT(zero_fill_count++);
2616 }
2617 vm_page_lock_queues();
2618 VM_PAGE_QUEUES_REMOVE(m);
0b4e3aa0
A
2619
2620 m->page_ticket = vm_page_ticket;
9bccf70c
A
2621 if(m->object->size > 0x80000) {
2622 m->zero_fill = TRUE;
2623 /* depends on the queues lock */
2624 vm_zf_count += 1;
2625 queue_enter(&vm_page_queue_zf,
2626 m, vm_page_t, pageq);
2627 } else {
2628 queue_enter(
2629 &vm_page_queue_inactive,
2630 m, vm_page_t, pageq);
2631 }
0b4e3aa0
A
2632 vm_page_ticket_roll++;
2633 if(vm_page_ticket_roll ==
2634 VM_PAGE_TICKETS_IN_ROLL) {
2635 vm_page_ticket_roll = 0;
2636 if(vm_page_ticket ==
2637 VM_PAGE_TICKET_ROLL_IDS)
2638 vm_page_ticket= 0;
2639 else
2640 vm_page_ticket++;
2641 }
2642
1c79356b
A
2643 m->inactive = TRUE;
2644 vm_page_inactive_count++;
2645 vm_page_unlock_queues();
143cc14e
A
2646 vm_object_lock(object);
2647
1c79356b
A
2648 goto FastPmapEnter;
2649 }
2650
2651 /*
2652 * On to the next level
2653 */
2654
2655 cur_offset += cur_object->shadow_offset;
2656 new_object = cur_object->shadow;
2657 vm_object_lock(new_object);
2658 if (cur_object != object)
2659 vm_object_unlock(cur_object);
2660 cur_object = new_object;
2661
2662 continue;
2663 }
2664 }
2665
2666 /*
2667 * Cleanup from fast fault failure. Drop any object
2668 * lock other than original and drop map lock.
2669 */
2670
2671 if (object != cur_object)
2672 vm_object_unlock(cur_object);
2673 }
2674 vm_map_unlock_read(map);
143cc14e 2675
1c79356b
A
2676 if(pmap_map != map)
2677 vm_map_unlock(pmap_map);
2678
2679 /*
2680 * Make a reference to this object to
2681 * prevent its disposal while we are messing with
2682 * it. Once we have the reference, the map is free
2683 * to be diddled. Since objects reference their
2684 * shadows (and copies), they will stay around as well.
2685 */
2686
2687 assert(object->ref_count > 0);
2688 object->ref_count++;
2689 vm_object_res_reference(object);
2690 vm_object_paging_begin(object);
2691
2692 XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
55e303ae
A
2693
2694 if (!object->private) {
2695 write_startup_file =
2696 vm_fault_tws_insert(map, pmap_map, vaddr, object, offset);
9bccf70c 2697 }
55e303ae 2698
1c79356b
A
2699 kr = vm_fault_page(object, offset, fault_type,
2700 (change_wiring && !wired),
2701 interruptible,
2702 lo_offset, hi_offset, behavior,
2703 &prot, &result_page, &top_page,
2704 &type_of_fault,
0b4e3aa0 2705 &error_code, map->no_zero_fill, FALSE, map, vaddr);
1c79356b
A
2706
2707 /*
2708 * If we didn't succeed, lose the object reference immediately.
2709 */
2710
2711 if (kr != VM_FAULT_SUCCESS)
2712 vm_object_deallocate(object);
2713
2714 /*
2715 * See why we failed, and take corrective action.
2716 */
2717
2718 switch (kr) {
2719 case VM_FAULT_SUCCESS:
2720 break;
2721 case VM_FAULT_MEMORY_SHORTAGE:
2722 if (vm_page_wait((change_wiring) ?
2723 THREAD_UNINT :
2724 THREAD_ABORTSAFE))
2725 goto RetryFault;
2726 /* fall thru */
2727 case VM_FAULT_INTERRUPTED:
2728 kr = KERN_ABORTED;
2729 goto done;
2730 case VM_FAULT_RETRY:
2731 goto RetryFault;
2732 case VM_FAULT_FICTITIOUS_SHORTAGE:
2733 vm_page_more_fictitious();
2734 goto RetryFault;
2735 case VM_FAULT_MEMORY_ERROR:
2736 if (error_code)
2737 kr = error_code;
2738 else
2739 kr = KERN_MEMORY_ERROR;
2740 goto done;
2741 }
2742
2743 m = result_page;
2744
0b4e3aa0
A
2745 if(m != VM_PAGE_NULL) {
2746 assert((change_wiring && !wired) ?
2747 (top_page == VM_PAGE_NULL) :
2748 ((top_page == VM_PAGE_NULL) == (m->object == object)));
2749 }
1c79356b
A
2750
2751 /*
2752 * How to clean up the result of vm_fault_page. This
2753 * happens whether the mapping is entered or not.
2754 */
2755
2756#define UNLOCK_AND_DEALLOCATE \
2757 MACRO_BEGIN \
2758 vm_fault_cleanup(m->object, top_page); \
2759 vm_object_deallocate(object); \
2760 MACRO_END
2761
2762 /*
2763 * What to do with the resulting page from vm_fault_page
2764 * if it doesn't get entered into the physical map:
2765 */
2766
2767#define RELEASE_PAGE(m) \
2768 MACRO_BEGIN \
2769 PAGE_WAKEUP_DONE(m); \
2770 vm_page_lock_queues(); \
2771 if (!m->active && !m->inactive) \
2772 vm_page_activate(m); \
2773 vm_page_unlock_queues(); \
2774 MACRO_END
2775
2776 /*
2777 * We must verify that the maps have not changed
2778 * since our last lookup.
2779 */
2780
0b4e3aa0
A
2781 if(m != VM_PAGE_NULL) {
2782 old_copy_object = m->object->copy;
0b4e3aa0
A
2783 vm_object_unlock(m->object);
2784 } else {
2785 old_copy_object = VM_OBJECT_NULL;
2786 }
1c79356b
A
2787 if ((map != original_map) || !vm_map_verify(map, &version)) {
2788 vm_object_t retry_object;
2789 vm_object_offset_t retry_offset;
2790 vm_prot_t retry_prot;
2791
2792 /*
2793 * To avoid trying to write_lock the map while another
2794 * thread has it read_locked (in vm_map_pageable), we
2795 * do not try for write permission. If the page is
2796 * still writable, we will get write permission. If it
2797 * is not, or has been marked needs_copy, we enter the
2798 * mapping without write permission, and will merely
2799 * take another fault.
2800 */
2801 map = original_map;
2802 vm_map_lock_read(map);
2803 kr = vm_map_lookup_locked(&map, vaddr,
2804 fault_type & ~VM_PROT_WRITE, &version,
2805 &retry_object, &retry_offset, &retry_prot,
2806 &wired, &behavior, &lo_offset, &hi_offset,
2807 &pmap_map);
2808 pmap = pmap_map->pmap;
2809
2810 if (kr != KERN_SUCCESS) {
2811 vm_map_unlock_read(map);
0b4e3aa0
A
2812 if(m != VM_PAGE_NULL) {
2813 vm_object_lock(m->object);
2814 RELEASE_PAGE(m);
2815 UNLOCK_AND_DEALLOCATE;
2816 } else {
2817 vm_object_deallocate(object);
2818 }
1c79356b
A
2819 goto done;
2820 }
2821
2822 vm_object_unlock(retry_object);
0b4e3aa0
A
2823 if(m != VM_PAGE_NULL) {
2824 vm_object_lock(m->object);
2825 } else {
2826 vm_object_lock(object);
2827 }
1c79356b
A
2828
2829 if ((retry_object != object) ||
2830 (retry_offset != offset)) {
2831 vm_map_unlock_read(map);
2832 if(pmap_map != map)
2833 vm_map_unlock(pmap_map);
0b4e3aa0
A
2834 if(m != VM_PAGE_NULL) {
2835 RELEASE_PAGE(m);
2836 UNLOCK_AND_DEALLOCATE;
2837 } else {
2838 vm_object_deallocate(object);
2839 }
1c79356b
A
2840 goto RetryFault;
2841 }
2842
2843 /*
2844 * Check whether the protection has changed or the object
2845 * has been copied while we left the map unlocked.
2846 */
2847 prot &= retry_prot;
0b4e3aa0
A
2848 if(m != VM_PAGE_NULL) {
2849 vm_object_unlock(m->object);
2850 } else {
2851 vm_object_unlock(object);
2852 }
2853 }
2854 if(m != VM_PAGE_NULL) {
2855 vm_object_lock(m->object);
2856 } else {
2857 vm_object_lock(object);
1c79356b 2858 }
1c79356b
A
2859
2860 /*
2861 * If the copy object changed while the top-level object
2862 * was unlocked, then we must take away write permission.
2863 */
2864
0b4e3aa0
A
2865 if(m != VM_PAGE_NULL) {
2866 if (m->object->copy != old_copy_object)
2867 prot &= ~VM_PROT_WRITE;
2868 }
1c79356b
A
2869
2870 /*
2871 * If we want to wire down this page, but no longer have
2872 * adequate permissions, we must start all over.
2873 */
2874
2875 if (wired && (fault_type != (prot|VM_PROT_WRITE))) {
2876 vm_map_verify_done(map, &version);
2877 if(pmap_map != map)
2878 vm_map_unlock(pmap_map);
0b4e3aa0
A
2879 if(m != VM_PAGE_NULL) {
2880 RELEASE_PAGE(m);
2881 UNLOCK_AND_DEALLOCATE;
2882 } else {
2883 vm_object_deallocate(object);
2884 }
1c79356b
A
2885 goto RetryFault;
2886 }
2887
1c79356b
A
2888 /*
2889 * Put this page into the physical map.
2890 * We had to do the unlock above because pmap_enter
2891 * may cause other faults. The page may be on
2892 * the pageout queues. If the pageout daemon comes
2893 * across the page, it will remove it from the queues.
2894 */
765c9de3
A
2895 if (m != VM_PAGE_NULL) {
2896 if (m->no_isync == TRUE) {
55e303ae
A
2897 pmap_sync_caches_phys(m->phys_page);
2898
2899 if (type_of_fault == DBG_CACHE_HIT_FAULT) {
2900 /*
2901 * found it in the cache, but this
2902 * is the first fault-in of the page (no_isync == TRUE)
2903 * so it must have come in as part of
2904 * a cluster... account 1 pagein against it
2905 */
2906 VM_STAT(pageins++);
2907 current_task()->pageins++;
2908
2909 type_of_fault = DBG_PAGEIN_FAULT;
2910 }
765c9de3
A
2911 m->no_isync = FALSE;
2912 }
9bccf70c 2913 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
0b4e3aa0 2914
9bccf70c
A
2915 if(caller_pmap) {
2916 PMAP_ENTER(caller_pmap,
2917 caller_pmap_addr, m,
2918 prot, cache_attr, wired);
2919 } else {
2920 PMAP_ENTER(pmap, vaddr, m,
2921 prot, cache_attr, wired);
2922 }
55e303ae
A
2923
2924 /*
2925 * Add working set information for private objects here.
2926 */
2927 if (m->object->private) {
2928 write_startup_file =
2929 vm_fault_tws_insert(map, pmap_map, vaddr,
2930 m->object, m->offset);
0b4e3aa0
A
2931 }
2932 } else {
2933
9bccf70c
A
2934#ifndef i386
2935 int memattr;
9bccf70c
A
2936 vm_map_entry_t entry;
2937 vm_offset_t laddr;
2938 vm_offset_t ldelta, hdelta;
143cc14e 2939
0b4e3aa0
A
2940 /*
2941 * do a pmap block mapping from the physical address
2942 * in the object
2943 */
9bccf70c 2944
55e303ae
A
2945 /* While we do not worry about execution protection in */
2946 /* general, certian pages may have instruction execution */
2947 /* disallowed. We will check here, and if not allowed */
2948 /* to execute, we return with a protection failure. */
9bccf70c 2949
55e303ae
A
2950 if((full_fault_type & VM_PROT_EXECUTE) &&
2951 (pmap_canExecute((ppnum_t)
2952 (object->shadow_offset >> 12)) < 1)) {
9bccf70c 2953
9bccf70c
A
2954 vm_map_verify_done(map, &version);
2955 if(pmap_map != map)
2956 vm_map_unlock(pmap_map);
2957 vm_fault_cleanup(object, top_page);
2958 vm_object_deallocate(object);
2959 kr = KERN_PROTECTION_FAILURE;
2960 goto done;
0b4e3aa0 2961 }
1c79356b 2962
9bccf70c
A
2963 if(pmap_map != map) {
2964 vm_map_unlock(pmap_map);
2965 }
2966 if (original_map != map) {
2967 vm_map_unlock_read(map);
2968 vm_map_lock_read(original_map);
2969 map = original_map;
2970 }
2971 pmap_map = map;
2972
2973 laddr = vaddr;
2974 hdelta = 0xFFFFF000;
2975 ldelta = 0xFFFFF000;
2976
2977
2978 while(vm_map_lookup_entry(map, laddr, &entry)) {
2979 if(ldelta > (laddr - entry->vme_start))
2980 ldelta = laddr - entry->vme_start;
2981 if(hdelta > (entry->vme_end - laddr))
2982 hdelta = entry->vme_end - laddr;
2983 if(entry->is_sub_map) {
2984
2985 laddr = (laddr - entry->vme_start)
2986 + entry->offset;
2987 vm_map_lock_read(entry->object.sub_map);
2988 if(map != pmap_map)
2989 vm_map_unlock_read(map);
2990 if(entry->use_pmap) {
2991 vm_map_unlock_read(pmap_map);
2992 pmap_map = entry->object.sub_map;
2993 }
2994 map = entry->object.sub_map;
2995
2996 } else {
2997 break;
2998 }
2999 }
3000
3001 if(vm_map_lookup_entry(map, laddr, &entry) &&
3002 (entry->object.vm_object != NULL) &&
3003 (entry->object.vm_object == object)) {
3004
3005
3006 if(caller_pmap) {
55e303ae 3007 /* Set up a block mapped area */
9bccf70c 3008 pmap_map_block(caller_pmap,
55e303ae
A
3009 (addr64_t)(caller_pmap_addr - ldelta),
3010 (((vm_offset_t)
9bccf70c
A
3011 (entry->object.vm_object->shadow_offset))
3012 + entry->offset +
55e303ae
A
3013 (laddr - entry->vme_start)
3014 - ldelta)>>12,
9bccf70c 3015 ldelta + hdelta, prot,
55e303ae
A
3016 (VM_WIMG_MASK & (int)object->wimg_bits), 0);
3017 } else {
3018 /* Set up a block mapped area */
3019 pmap_map_block(pmap_map->pmap,
3020 (addr64_t)(vaddr - ldelta),
3021 (((vm_offset_t)
9bccf70c 3022 (entry->object.vm_object->shadow_offset))
55e303ae
A
3023 + entry->offset +
3024 (laddr - entry->vme_start) - ldelta)>>12,
3025 ldelta + hdelta, prot,
3026 (VM_WIMG_MASK & (int)object->wimg_bits), 0);
9bccf70c
A
3027 }
3028 }
3029#else
3030#ifdef notyet
3031 if(caller_pmap) {
3032 pmap_enter(caller_pmap, caller_pmap_addr,
55e303ae 3033 object->shadow_offset>>12, prot, 0, TRUE);
9bccf70c
A
3034 } else {
3035 pmap_enter(pmap, vaddr,
55e303ae 3036 object->shadow_offset>>12, prot, 0, TRUE);
9bccf70c 3037 }
0b4e3aa0 3038 /* Map it in */
9bccf70c 3039#endif
0b4e3aa0
A
3040#endif
3041
3042 }
1c79356b
A
3043
3044 /*
3045 * If the page is not wired down and isn't already
3046 * on a pageout queue, then put it where the
3047 * pageout daemon can find it.
3048 */
0b4e3aa0 3049 if(m != VM_PAGE_NULL) {
0b4e3aa0
A
3050 vm_page_lock_queues();
3051
3052 if (change_wiring) {
3053 if (wired)
3054 vm_page_wire(m);
3055 else
3056 vm_page_unwire(m);
3057 }
1c79356b 3058#if VM_FAULT_STATIC_CONFIG
0b4e3aa0
A
3059 else {
3060 if (!m->active && !m->inactive)
3061 vm_page_activate(m);
3062 m->reference = TRUE;
3063 }
1c79356b 3064#else
0b4e3aa0
A
3065 else if (software_reference_bits) {
3066 if (!m->active && !m->inactive)
3067 vm_page_activate(m);
3068 m->reference = TRUE;
3069 } else {
1c79356b 3070 vm_page_activate(m);
0b4e3aa0 3071 }
1c79356b 3072#endif
0b4e3aa0
A
3073 vm_page_unlock_queues();
3074 }
1c79356b
A
3075
3076 /*
3077 * Unlock everything, and return
3078 */
3079
3080 vm_map_verify_done(map, &version);
3081 if(pmap_map != map)
3082 vm_map_unlock(pmap_map);
0b4e3aa0
A
3083 if(m != VM_PAGE_NULL) {
3084 PAGE_WAKEUP_DONE(m);
3085 UNLOCK_AND_DEALLOCATE;
3086 } else {
3087 vm_fault_cleanup(object, top_page);
3088 vm_object_deallocate(object);
3089 }
1c79356b 3090 kr = KERN_SUCCESS;
1c79356b
A
3091
3092#undef UNLOCK_AND_DEALLOCATE
3093#undef RELEASE_PAGE
3094
3095 done:
9bccf70c
A
3096 if(write_startup_file)
3097 tws_send_startup_info(current_task());
1c79356b
A
3098 if (funnel_set) {
3099 thread_funnel_set( curflock, TRUE);
3100 funnel_set = FALSE;
3101 }
9bccf70c 3102 thread_interrupt_level(interruptible_state);
1c79356b
A
3103
3104 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
3105 vaddr,
9bccf70c 3106 type_of_fault & 0xff,
1c79356b 3107 kr,
9bccf70c 3108 type_of_fault >> 8,
1c79356b 3109 0);
143cc14e 3110
1c79356b
A
3111 return(kr);
3112}
3113
3114/*
3115 * vm_fault_wire:
3116 *
3117 * Wire down a range of virtual addresses in a map.
3118 */
3119kern_return_t
3120vm_fault_wire(
3121 vm_map_t map,
3122 vm_map_entry_t entry,
9bccf70c
A
3123 pmap_t pmap,
3124 vm_offset_t pmap_addr)
1c79356b
A
3125{
3126
3127 register vm_offset_t va;
3128 register vm_offset_t end_addr = entry->vme_end;
3129 register kern_return_t rc;
3130
3131 assert(entry->in_transition);
3132
9bccf70c
A
3133 if ((entry->object.vm_object != NULL) &&
3134 !entry->is_sub_map &&
3135 entry->object.vm_object->phys_contiguous) {
3136 return KERN_SUCCESS;
3137 }
3138
1c79356b
A
3139 /*
3140 * Inform the physical mapping system that the
3141 * range of addresses may not fault, so that
3142 * page tables and such can be locked down as well.
3143 */
3144
9bccf70c
A
3145 pmap_pageable(pmap, pmap_addr,
3146 pmap_addr + (end_addr - entry->vme_start), FALSE);
1c79356b
A
3147
3148 /*
3149 * We simulate a fault to get the page and enter it
3150 * in the physical map.
3151 */
3152
3153 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3154 if ((rc = vm_fault_wire_fast(
9bccf70c
A
3155 map, va, entry, pmap,
3156 pmap_addr + (va - entry->vme_start)
3157 )) != KERN_SUCCESS) {
1c79356b 3158 rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
9bccf70c
A
3159 (pmap == kernel_pmap) ?
3160 THREAD_UNINT : THREAD_ABORTSAFE,
3161 pmap, pmap_addr + (va - entry->vme_start));
1c79356b
A
3162 }
3163
3164 if (rc != KERN_SUCCESS) {
3165 struct vm_map_entry tmp_entry = *entry;
3166
3167 /* unwire wired pages */
3168 tmp_entry.vme_end = va;
9bccf70c
A
3169 vm_fault_unwire(map,
3170 &tmp_entry, FALSE, pmap, pmap_addr);
1c79356b
A
3171
3172 return rc;
3173 }
3174 }
3175 return KERN_SUCCESS;
3176}
3177
3178/*
3179 * vm_fault_unwire:
3180 *
3181 * Unwire a range of virtual addresses in a map.
3182 */
3183void
3184vm_fault_unwire(
3185 vm_map_t map,
3186 vm_map_entry_t entry,
3187 boolean_t deallocate,
9bccf70c
A
3188 pmap_t pmap,
3189 vm_offset_t pmap_addr)
1c79356b
A
3190{
3191 register vm_offset_t va;
3192 register vm_offset_t end_addr = entry->vme_end;
3193 vm_object_t object;
3194
3195 object = (entry->is_sub_map)
3196 ? VM_OBJECT_NULL : entry->object.vm_object;
3197
3198 /*
3199 * Since the pages are wired down, we must be able to
3200 * get their mappings from the physical map system.
3201 */
3202
3203 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
9bccf70c
A
3204 pmap_change_wiring(pmap,
3205 pmap_addr + (va - entry->vme_start), FALSE);
1c79356b
A
3206
3207 if (object == VM_OBJECT_NULL) {
9bccf70c
A
3208 (void) vm_fault(map, va, VM_PROT_NONE,
3209 TRUE, THREAD_UNINT, pmap, pmap_addr);
3210 } else if (object->phys_contiguous) {
3211 continue;
1c79356b
A
3212 } else {
3213 vm_prot_t prot;
3214 vm_page_t result_page;
3215 vm_page_t top_page;
3216 vm_object_t result_object;
3217 vm_fault_return_t result;
3218
3219 do {
3220 prot = VM_PROT_NONE;
3221
3222 vm_object_lock(object);
3223 vm_object_paging_begin(object);
3224 XPR(XPR_VM_FAULT,
3225 "vm_fault_unwire -> vm_fault_page\n",
3226 0,0,0,0,0);
3227 result = vm_fault_page(object,
3228 entry->offset +
3229 (va - entry->vme_start),
3230 VM_PROT_NONE, TRUE,
3231 THREAD_UNINT,
3232 entry->offset,
3233 entry->offset +
3234 (entry->vme_end
3235 - entry->vme_start),
3236 entry->behavior,
3237 &prot,
3238 &result_page,
3239 &top_page,
3240 (int *)0,
3241 0, map->no_zero_fill,
0b4e3aa0 3242 FALSE, NULL, 0);
1c79356b
A
3243 } while (result == VM_FAULT_RETRY);
3244
3245 if (result != VM_FAULT_SUCCESS)
3246 panic("vm_fault_unwire: failure");
3247
3248 result_object = result_page->object;
3249 if (deallocate) {
3250 assert(!result_page->fictitious);
55e303ae 3251 pmap_page_protect(result_page->phys_page,
1c79356b
A
3252 VM_PROT_NONE);
3253 VM_PAGE_FREE(result_page);
3254 } else {
3255 vm_page_lock_queues();
3256 vm_page_unwire(result_page);
3257 vm_page_unlock_queues();
3258 PAGE_WAKEUP_DONE(result_page);
3259 }
3260
3261 vm_fault_cleanup(result_object, top_page);
3262 }
3263 }
3264
3265 /*
3266 * Inform the physical mapping system that the range
3267 * of addresses may fault, so that page tables and
3268 * such may be unwired themselves.
3269 */
3270
9bccf70c
A
3271 pmap_pageable(pmap, pmap_addr,
3272 pmap_addr + (end_addr - entry->vme_start), TRUE);
1c79356b
A
3273
3274}
3275
3276/*
3277 * vm_fault_wire_fast:
3278 *
3279 * Handle common case of a wire down page fault at the given address.
3280 * If successful, the page is inserted into the associated physical map.
3281 * The map entry is passed in to avoid the overhead of a map lookup.
3282 *
3283 * NOTE: the given address should be truncated to the
3284 * proper page address.
3285 *
3286 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3287 * a standard error specifying why the fault is fatal is returned.
3288 *
3289 * The map in question must be referenced, and remains so.
3290 * Caller has a read lock on the map.
3291 *
3292 * This is a stripped version of vm_fault() for wiring pages. Anything
3293 * other than the common case will return KERN_FAILURE, and the caller
3294 * is expected to call vm_fault().
3295 */
3296kern_return_t
3297vm_fault_wire_fast(
3298 vm_map_t map,
3299 vm_offset_t va,
3300 vm_map_entry_t entry,
9bccf70c
A
3301 pmap_t pmap,
3302 vm_offset_t pmap_addr)
1c79356b
A
3303{
3304 vm_object_t object;
3305 vm_object_offset_t offset;
3306 register vm_page_t m;
3307 vm_prot_t prot;
3308 thread_act_t thr_act;
9bccf70c 3309 unsigned int cache_attr;
1c79356b
A
3310
3311 VM_STAT(faults++);
3312
3313 if((thr_act=current_act()) && (thr_act->task != TASK_NULL))
3314 thr_act->task->faults++;
3315
3316/*
3317 * Recovery actions
3318 */
3319
3320#undef RELEASE_PAGE
3321#define RELEASE_PAGE(m) { \
3322 PAGE_WAKEUP_DONE(m); \
3323 vm_page_lock_queues(); \
3324 vm_page_unwire(m); \
3325 vm_page_unlock_queues(); \
3326}
3327
3328
3329#undef UNLOCK_THINGS
3330#define UNLOCK_THINGS { \
3331 object->paging_in_progress--; \
3332 vm_object_unlock(object); \
3333}
3334
3335#undef UNLOCK_AND_DEALLOCATE
3336#define UNLOCK_AND_DEALLOCATE { \
3337 UNLOCK_THINGS; \
3338 vm_object_deallocate(object); \
3339}
3340/*
3341 * Give up and have caller do things the hard way.
3342 */
3343
3344#define GIVE_UP { \
3345 UNLOCK_AND_DEALLOCATE; \
3346 return(KERN_FAILURE); \
3347}
3348
3349
3350 /*
3351 * If this entry is not directly to a vm_object, bail out.
3352 */
3353 if (entry->is_sub_map)
3354 return(KERN_FAILURE);
3355
3356 /*
3357 * Find the backing store object and offset into it.
3358 */
3359
3360 object = entry->object.vm_object;
3361 offset = (va - entry->vme_start) + entry->offset;
3362 prot = entry->protection;
3363
3364 /*
3365 * Make a reference to this object to prevent its
3366 * disposal while we are messing with it.
3367 */
3368
3369 vm_object_lock(object);
3370 assert(object->ref_count > 0);
3371 object->ref_count++;
3372 vm_object_res_reference(object);
3373 object->paging_in_progress++;
3374
3375 /*
3376 * INVARIANTS (through entire routine):
3377 *
3378 * 1) At all times, we must either have the object
3379 * lock or a busy page in some object to prevent
3380 * some other thread from trying to bring in
3381 * the same page.
3382 *
3383 * 2) Once we have a busy page, we must remove it from
3384 * the pageout queues, so that the pageout daemon
3385 * will not grab it away.
3386 *
3387 */
3388
3389 /*
3390 * Look for page in top-level object. If it's not there or
3391 * there's something going on, give up.
3392 */
3393 m = vm_page_lookup(object, offset);
3394 if ((m == VM_PAGE_NULL) || (m->busy) ||
3395 (m->unusual && ( m->error || m->restart || m->absent ||
3396 prot & m->page_lock))) {
3397
3398 GIVE_UP;
3399 }
3400
3401 /*
3402 * Wire the page down now. All bail outs beyond this
3403 * point must unwire the page.
3404 */
3405
3406 vm_page_lock_queues();
3407 vm_page_wire(m);
3408 vm_page_unlock_queues();
3409
3410 /*
3411 * Mark page busy for other threads.
3412 */
3413 assert(!m->busy);
3414 m->busy = TRUE;
3415 assert(!m->absent);
3416
3417 /*
3418 * Give up if the page is being written and there's a copy object
3419 */
3420 if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
3421 RELEASE_PAGE(m);
3422 GIVE_UP;
3423 }
3424
3425 /*
3426 * Put this page into the physical map.
3427 * We have to unlock the object because pmap_enter
3428 * may cause other faults.
3429 */
765c9de3 3430 if (m->no_isync == TRUE) {
55e303ae 3431 pmap_sync_caches_phys(m->phys_page);
0b4e3aa0 3432
765c9de3 3433 m->no_isync = FALSE;
0b4e3aa0 3434 }
9bccf70c
A
3435
3436 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
765c9de3 3437
9bccf70c 3438 PMAP_ENTER(pmap, pmap_addr, m, prot, cache_attr, TRUE);
1c79356b 3439
1c79356b
A
3440 /*
3441 * Unlock everything, and return
3442 */
3443
3444 PAGE_WAKEUP_DONE(m);
3445 UNLOCK_AND_DEALLOCATE;
3446
3447 return(KERN_SUCCESS);
3448
3449}
3450
3451/*
3452 * Routine: vm_fault_copy_cleanup
3453 * Purpose:
3454 * Release a page used by vm_fault_copy.
3455 */
3456
3457void
3458vm_fault_copy_cleanup(
3459 vm_page_t page,
3460 vm_page_t top_page)
3461{
3462 vm_object_t object = page->object;
3463
3464 vm_object_lock(object);
3465 PAGE_WAKEUP_DONE(page);
3466 vm_page_lock_queues();
3467 if (!page->active && !page->inactive)
3468 vm_page_activate(page);
3469 vm_page_unlock_queues();
3470 vm_fault_cleanup(object, top_page);
3471}
3472
3473void
3474vm_fault_copy_dst_cleanup(
3475 vm_page_t page)
3476{
3477 vm_object_t object;
3478
3479 if (page != VM_PAGE_NULL) {
3480 object = page->object;
3481 vm_object_lock(object);
3482 vm_page_lock_queues();
3483 vm_page_unwire(page);
3484 vm_page_unlock_queues();
3485 vm_object_paging_end(object);
3486 vm_object_unlock(object);
3487 }
3488}
3489
3490/*
3491 * Routine: vm_fault_copy
3492 *
3493 * Purpose:
3494 * Copy pages from one virtual memory object to another --
3495 * neither the source nor destination pages need be resident.
3496 *
3497 * Before actually copying a page, the version associated with
3498 * the destination address map wil be verified.
3499 *
3500 * In/out conditions:
3501 * The caller must hold a reference, but not a lock, to
3502 * each of the source and destination objects and to the
3503 * destination map.
3504 *
3505 * Results:
3506 * Returns KERN_SUCCESS if no errors were encountered in
3507 * reading or writing the data. Returns KERN_INTERRUPTED if
3508 * the operation was interrupted (only possible if the
3509 * "interruptible" argument is asserted). Other return values
3510 * indicate a permanent error in copying the data.
3511 *
3512 * The actual amount of data copied will be returned in the
3513 * "copy_size" argument. In the event that the destination map
3514 * verification failed, this amount may be less than the amount
3515 * requested.
3516 */
3517kern_return_t
3518vm_fault_copy(
3519 vm_object_t src_object,
3520 vm_object_offset_t src_offset,
3521 vm_size_t *src_size, /* INOUT */
3522 vm_object_t dst_object,
3523 vm_object_offset_t dst_offset,
3524 vm_map_t dst_map,
3525 vm_map_version_t *dst_version,
3526 int interruptible)
3527{
3528 vm_page_t result_page;
3529
3530 vm_page_t src_page;
3531 vm_page_t src_top_page;
3532 vm_prot_t src_prot;
3533
3534 vm_page_t dst_page;
3535 vm_page_t dst_top_page;
3536 vm_prot_t dst_prot;
3537
3538 vm_size_t amount_left;
3539 vm_object_t old_copy_object;
3540 kern_return_t error = 0;
3541
3542 vm_size_t part_size;
3543
3544 /*
3545 * In order not to confuse the clustered pageins, align
3546 * the different offsets on a page boundary.
3547 */
3548 vm_object_offset_t src_lo_offset = trunc_page_64(src_offset);
3549 vm_object_offset_t dst_lo_offset = trunc_page_64(dst_offset);
3550 vm_object_offset_t src_hi_offset = round_page_64(src_offset + *src_size);
3551 vm_object_offset_t dst_hi_offset = round_page_64(dst_offset + *src_size);
3552
3553#define RETURN(x) \
3554 MACRO_BEGIN \
3555 *src_size -= amount_left; \
3556 MACRO_RETURN(x); \
3557 MACRO_END
3558
3559 amount_left = *src_size;
3560 do { /* while (amount_left > 0) */
3561 /*
3562 * There may be a deadlock if both source and destination
3563 * pages are the same. To avoid this deadlock, the copy must
3564 * start by getting the destination page in order to apply
3565 * COW semantics if any.
3566 */
3567
3568 RetryDestinationFault: ;
3569
3570 dst_prot = VM_PROT_WRITE|VM_PROT_READ;
3571
3572 vm_object_lock(dst_object);
3573 vm_object_paging_begin(dst_object);
3574
3575 XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3576 switch (vm_fault_page(dst_object,
3577 trunc_page_64(dst_offset),
3578 VM_PROT_WRITE|VM_PROT_READ,
3579 FALSE,
3580 interruptible,
3581 dst_lo_offset,
3582 dst_hi_offset,
3583 VM_BEHAVIOR_SEQUENTIAL,
3584 &dst_prot,
3585 &dst_page,
3586 &dst_top_page,
3587 (int *)0,
3588 &error,
3589 dst_map->no_zero_fill,
0b4e3aa0 3590 FALSE, NULL, 0)) {
1c79356b
A
3591 case VM_FAULT_SUCCESS:
3592 break;
3593 case VM_FAULT_RETRY:
3594 goto RetryDestinationFault;
3595 case VM_FAULT_MEMORY_SHORTAGE:
3596 if (vm_page_wait(interruptible))
3597 goto RetryDestinationFault;
3598 /* fall thru */
3599 case VM_FAULT_INTERRUPTED:
3600 RETURN(MACH_SEND_INTERRUPTED);
3601 case VM_FAULT_FICTITIOUS_SHORTAGE:
3602 vm_page_more_fictitious();
3603 goto RetryDestinationFault;
3604 case VM_FAULT_MEMORY_ERROR:
3605 if (error)
3606 return (error);
3607 else
3608 return(KERN_MEMORY_ERROR);
3609 }
3610 assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
3611
3612 old_copy_object = dst_page->object->copy;
3613
3614 /*
3615 * There exists the possiblity that the source and
3616 * destination page are the same. But we can't
3617 * easily determine that now. If they are the
3618 * same, the call to vm_fault_page() for the
3619 * destination page will deadlock. To prevent this we
3620 * wire the page so we can drop busy without having
3621 * the page daemon steal the page. We clean up the
3622 * top page but keep the paging reference on the object
3623 * holding the dest page so it doesn't go away.
3624 */
3625
3626 vm_page_lock_queues();
3627 vm_page_wire(dst_page);
3628 vm_page_unlock_queues();
3629 PAGE_WAKEUP_DONE(dst_page);
3630 vm_object_unlock(dst_page->object);
3631
3632 if (dst_top_page != VM_PAGE_NULL) {
3633 vm_object_lock(dst_object);
3634 VM_PAGE_FREE(dst_top_page);
3635 vm_object_paging_end(dst_object);
3636 vm_object_unlock(dst_object);
3637 }
3638
3639 RetrySourceFault: ;
3640
3641 if (src_object == VM_OBJECT_NULL) {
3642 /*
3643 * No source object. We will just
3644 * zero-fill the page in dst_object.
3645 */
3646 src_page = VM_PAGE_NULL;
e3027f41 3647 result_page = VM_PAGE_NULL;
1c79356b
A
3648 } else {
3649 vm_object_lock(src_object);
3650 src_page = vm_page_lookup(src_object,
3651 trunc_page_64(src_offset));
e3027f41 3652 if (src_page == dst_page) {
1c79356b 3653 src_prot = dst_prot;
e3027f41
A
3654 result_page = VM_PAGE_NULL;
3655 } else {
1c79356b
A
3656 src_prot = VM_PROT_READ;
3657 vm_object_paging_begin(src_object);
3658
3659 XPR(XPR_VM_FAULT,
3660 "vm_fault_copy(2) -> vm_fault_page\n",
3661 0,0,0,0,0);
3662 switch (vm_fault_page(src_object,
3663 trunc_page_64(src_offset),
3664 VM_PROT_READ,
3665 FALSE,
3666 interruptible,
3667 src_lo_offset,
3668 src_hi_offset,
3669 VM_BEHAVIOR_SEQUENTIAL,
3670 &src_prot,
3671 &result_page,
3672 &src_top_page,
3673 (int *)0,
3674 &error,
3675 FALSE,
0b4e3aa0 3676 FALSE, NULL, 0)) {
1c79356b
A
3677
3678 case VM_FAULT_SUCCESS:
3679 break;
3680 case VM_FAULT_RETRY:
3681 goto RetrySourceFault;
3682 case VM_FAULT_MEMORY_SHORTAGE:
3683 if (vm_page_wait(interruptible))
3684 goto RetrySourceFault;
3685 /* fall thru */
3686 case VM_FAULT_INTERRUPTED:
3687 vm_fault_copy_dst_cleanup(dst_page);
3688 RETURN(MACH_SEND_INTERRUPTED);
3689 case VM_FAULT_FICTITIOUS_SHORTAGE:
3690 vm_page_more_fictitious();
3691 goto RetrySourceFault;
3692 case VM_FAULT_MEMORY_ERROR:
3693 vm_fault_copy_dst_cleanup(dst_page);
3694 if (error)
3695 return (error);
3696 else
3697 return(KERN_MEMORY_ERROR);
3698 }
3699
1c79356b
A
3700
3701 assert((src_top_page == VM_PAGE_NULL) ==
e3027f41 3702 (result_page->object == src_object));
1c79356b
A
3703 }
3704 assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
e3027f41 3705 vm_object_unlock(result_page->object);
1c79356b
A
3706 }
3707
3708 if (!vm_map_verify(dst_map, dst_version)) {
e3027f41
A
3709 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3710 vm_fault_copy_cleanup(result_page, src_top_page);
1c79356b
A
3711 vm_fault_copy_dst_cleanup(dst_page);
3712 break;
3713 }
3714
3715 vm_object_lock(dst_page->object);
3716
3717 if (dst_page->object->copy != old_copy_object) {
3718 vm_object_unlock(dst_page->object);
3719 vm_map_verify_done(dst_map, dst_version);
e3027f41
A
3720 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3721 vm_fault_copy_cleanup(result_page, src_top_page);
1c79356b
A
3722 vm_fault_copy_dst_cleanup(dst_page);
3723 break;
3724 }
3725 vm_object_unlock(dst_page->object);
3726
3727 /*
3728 * Copy the page, and note that it is dirty
3729 * immediately.
3730 */
3731
3732 if (!page_aligned(src_offset) ||
3733 !page_aligned(dst_offset) ||
3734 !page_aligned(amount_left)) {
3735
3736 vm_object_offset_t src_po,
3737 dst_po;
3738
3739 src_po = src_offset - trunc_page_64(src_offset);
3740 dst_po = dst_offset - trunc_page_64(dst_offset);
3741
3742 if (dst_po > src_po) {
3743 part_size = PAGE_SIZE - dst_po;
3744 } else {
3745 part_size = PAGE_SIZE - src_po;
3746 }
3747 if (part_size > (amount_left)){
3748 part_size = amount_left;
3749 }
3750
e3027f41 3751 if (result_page == VM_PAGE_NULL) {
1c79356b
A
3752 vm_page_part_zero_fill(dst_page,
3753 dst_po, part_size);
3754 } else {
e3027f41 3755 vm_page_part_copy(result_page, src_po,
1c79356b
A
3756 dst_page, dst_po, part_size);
3757 if(!dst_page->dirty){
3758 vm_object_lock(dst_object);
3759 dst_page->dirty = TRUE;
3760 vm_object_unlock(dst_page->object);
3761 }
3762
3763 }
3764 } else {
3765 part_size = PAGE_SIZE;
3766
e3027f41 3767 if (result_page == VM_PAGE_NULL)
1c79356b
A
3768 vm_page_zero_fill(dst_page);
3769 else{
e3027f41 3770 vm_page_copy(result_page, dst_page);
1c79356b
A
3771 if(!dst_page->dirty){
3772 vm_object_lock(dst_object);
3773 dst_page->dirty = TRUE;
3774 vm_object_unlock(dst_page->object);
3775 }
3776 }
3777
3778 }
3779
3780 /*
3781 * Unlock everything, and return
3782 */
3783
3784 vm_map_verify_done(dst_map, dst_version);
3785
e3027f41
A
3786 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3787 vm_fault_copy_cleanup(result_page, src_top_page);
1c79356b
A
3788 vm_fault_copy_dst_cleanup(dst_page);
3789
3790 amount_left -= part_size;
3791 src_offset += part_size;
3792 dst_offset += part_size;
3793 } while (amount_left > 0);
3794
3795 RETURN(KERN_SUCCESS);
3796#undef RETURN
3797
3798 /*NOTREACHED*/
3799}
3800
3801#ifdef notdef
3802
3803/*
3804 * Routine: vm_fault_page_overwrite
3805 *
3806 * Description:
3807 * A form of vm_fault_page that assumes that the
3808 * resulting page will be overwritten in its entirety,
3809 * making it unnecessary to obtain the correct *contents*
3810 * of the page.
3811 *
3812 * Implementation:
3813 * XXX Untested. Also unused. Eventually, this technology
3814 * could be used in vm_fault_copy() to advantage.
3815 */
3816vm_fault_return_t
3817vm_fault_page_overwrite(
3818 register
3819 vm_object_t dst_object,
3820 vm_object_offset_t dst_offset,
3821 vm_page_t *result_page) /* OUT */
3822{
3823 register
3824 vm_page_t dst_page;
3825 kern_return_t wait_result;
3826
3827#define interruptible THREAD_UNINT /* XXX */
3828
3829 while (TRUE) {
3830 /*
3831 * Look for a page at this offset
3832 */
3833
3834 while ((dst_page = vm_page_lookup(dst_object, dst_offset))
3835 == VM_PAGE_NULL) {
3836 /*
3837 * No page, no problem... just allocate one.
3838 */
3839
3840 dst_page = vm_page_alloc(dst_object, dst_offset);
3841 if (dst_page == VM_PAGE_NULL) {
3842 vm_object_unlock(dst_object);
3843 VM_PAGE_WAIT();
3844 vm_object_lock(dst_object);
3845 continue;
3846 }
3847
3848 /*
3849 * Pretend that the memory manager
3850 * write-protected the page.
3851 *
3852 * Note that we will be asking for write
3853 * permission without asking for the data
3854 * first.
3855 */
3856
3857 dst_page->overwriting = TRUE;
3858 dst_page->page_lock = VM_PROT_WRITE;
3859 dst_page->absent = TRUE;
3860 dst_page->unusual = TRUE;
3861 dst_object->absent_count++;
3862
3863 break;
3864
3865 /*
3866 * When we bail out, we might have to throw
3867 * away the page created here.
3868 */
3869
3870#define DISCARD_PAGE \
3871 MACRO_BEGIN \
3872 vm_object_lock(dst_object); \
3873 dst_page = vm_page_lookup(dst_object, dst_offset); \
3874 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3875 VM_PAGE_FREE(dst_page); \
3876 vm_object_unlock(dst_object); \
3877 MACRO_END
3878 }
3879
3880 /*
3881 * If the page is write-protected...
3882 */
3883
3884 if (dst_page->page_lock & VM_PROT_WRITE) {
3885 /*
3886 * ... and an unlock request hasn't been sent
3887 */
3888
3889 if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
3890 vm_prot_t u;
3891 kern_return_t rc;
3892
3893 /*
3894 * ... then send one now.
3895 */
3896
3897 if (!dst_object->pager_ready) {
9bccf70c
A
3898 wait_result = vm_object_assert_wait(dst_object,
3899 VM_OBJECT_EVENT_PAGER_READY,
3900 interruptible);
1c79356b 3901 vm_object_unlock(dst_object);
9bccf70c
A
3902 if (wait_result == THREAD_WAITING)
3903 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
3904 if (wait_result != THREAD_AWAKENED) {
3905 DISCARD_PAGE;
3906 return(VM_FAULT_INTERRUPTED);
3907 }
3908 continue;
3909 }
3910
3911 u = dst_page->unlock_request |= VM_PROT_WRITE;
3912 vm_object_unlock(dst_object);
3913
3914 if ((rc = memory_object_data_unlock(
3915 dst_object->pager,
1c79356b
A
3916 dst_offset + dst_object->paging_offset,
3917 PAGE_SIZE,
3918 u)) != KERN_SUCCESS) {
3919 if (vm_fault_debug)
3920 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3921 DISCARD_PAGE;
3922 return((rc == MACH_SEND_INTERRUPTED) ?
3923 VM_FAULT_INTERRUPTED :
3924 VM_FAULT_MEMORY_ERROR);
3925 }
3926 vm_object_lock(dst_object);
3927 continue;
3928 }
3929
3930 /* ... fall through to wait below */
3931 } else {
3932 /*
3933 * If the page isn't being used for other
3934 * purposes, then we're done.
3935 */
3936 if ( ! (dst_page->busy || dst_page->absent ||
3937 dst_page->error || dst_page->restart) )
3938 break;
3939 }
3940
9bccf70c 3941 wait_result = PAGE_ASSERT_WAIT(dst_page, interruptible);
1c79356b 3942 vm_object_unlock(dst_object);
9bccf70c
A
3943 if (wait_result == THREAD_WAITING)
3944 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
3945 if (wait_result != THREAD_AWAKENED) {
3946 DISCARD_PAGE;
3947 return(VM_FAULT_INTERRUPTED);
3948 }
3949 }
3950
3951 *result_page = dst_page;
3952 return(VM_FAULT_SUCCESS);
3953
3954#undef interruptible
3955#undef DISCARD_PAGE
3956}
3957
3958#endif /* notdef */
3959
3960#if VM_FAULT_CLASSIFY
3961/*
3962 * Temporary statistics gathering support.
3963 */
3964
3965/*
3966 * Statistics arrays:
3967 */
3968#define VM_FAULT_TYPES_MAX 5
3969#define VM_FAULT_LEVEL_MAX 8
3970
3971int vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
3972
3973#define VM_FAULT_TYPE_ZERO_FILL 0
3974#define VM_FAULT_TYPE_MAP_IN 1
3975#define VM_FAULT_TYPE_PAGER 2
3976#define VM_FAULT_TYPE_COPY 3
3977#define VM_FAULT_TYPE_OTHER 4
3978
3979
3980void
3981vm_fault_classify(vm_object_t object,
3982 vm_object_offset_t offset,
3983 vm_prot_t fault_type)
3984{
3985 int type, level = 0;
3986 vm_page_t m;
3987
3988 while (TRUE) {
3989 m = vm_page_lookup(object, offset);
3990 if (m != VM_PAGE_NULL) {
3991 if (m->busy || m->error || m->restart || m->absent ||
3992 fault_type & m->page_lock) {
3993 type = VM_FAULT_TYPE_OTHER;
3994 break;
3995 }
3996 if (((fault_type & VM_PROT_WRITE) == 0) ||
3997 ((level == 0) && object->copy == VM_OBJECT_NULL)) {
3998 type = VM_FAULT_TYPE_MAP_IN;
3999 break;
4000 }
4001 type = VM_FAULT_TYPE_COPY;
4002 break;
4003 }
4004 else {
4005 if (object->pager_created) {
4006 type = VM_FAULT_TYPE_PAGER;
4007 break;
4008 }
4009 if (object->shadow == VM_OBJECT_NULL) {
4010 type = VM_FAULT_TYPE_ZERO_FILL;
4011 break;
4012 }
4013
4014 offset += object->shadow_offset;
4015 object = object->shadow;
4016 level++;
4017 continue;
4018 }
4019 }
4020
4021 if (level > VM_FAULT_LEVEL_MAX)
4022 level = VM_FAULT_LEVEL_MAX;
4023
4024 vm_fault_stats[type][level] += 1;
4025
4026 return;
4027}
4028
4029/* cleanup routine to call from debugger */
4030
4031void
4032vm_fault_classify_init(void)
4033{
4034 int type, level;
4035
4036 for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
4037 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
4038 vm_fault_stats[type][level] = 0;
4039 }
4040 }
4041
4042 return;
4043}
4044#endif /* VM_FAULT_CLASSIFY */