]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_fault.c
xnu-517.3.7.tar.gz
[apple/xnu.git] / osfmk / vm / vm_fault.c
CommitLineData
1c79356b 1/*
55e303ae 2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
43866e37 6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
1c79356b 7 *
43866e37
A
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
43866e37
A
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
1c79356b
A
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25/*
26 * @OSF_COPYRIGHT@
27 */
28/*
29 * Mach Operating System
30 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
31 * All Rights Reserved.
32 *
33 * Permission to use, copy, modify and distribute this software and its
34 * documentation is hereby granted, provided that both the copyright
35 * notice and this permission notice appear in all copies of the
36 * software, derivative works or modified versions, and any portions
37 * thereof, and that both notices appear in supporting documentation.
38 *
39 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
40 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
41 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
42 *
43 * Carnegie Mellon requests users of this software to return to
44 *
45 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
46 * School of Computer Science
47 * Carnegie Mellon University
48 * Pittsburgh PA 15213-3890
49 *
50 * any improvements or extensions that they make and grant Carnegie Mellon
51 * the rights to redistribute these changes.
52 */
53/*
54 */
55/*
56 * File: vm_fault.c
57 * Author: Avadis Tevanian, Jr., Michael Wayne Young
58 *
59 * Page fault handling module.
60 */
61#ifdef MACH_BSD
62/* remove after component interface available */
63extern int vnode_pager_workaround;
0b4e3aa0 64extern int device_pager_workaround;
1c79356b
A
65#endif
66
67#include <mach_cluster_stats.h>
68#include <mach_pagemap.h>
69#include <mach_kdb.h>
70
71#include <vm/vm_fault.h>
72#include <mach/kern_return.h>
73#include <mach/message.h> /* for error codes */
74#include <kern/host_statistics.h>
75#include <kern/counters.h>
76#include <kern/task.h>
77#include <kern/thread.h>
78#include <kern/sched_prim.h>
79#include <kern/host.h>
80#include <kern/xpr.h>
0b4e3aa0 81#include <ppc/proc_reg.h>
0b4e3aa0 82#include <vm/task_working_set.h>
1c79356b
A
83#include <vm/vm_map.h>
84#include <vm/vm_object.h>
85#include <vm/vm_page.h>
55e303ae 86#include <vm/vm_kern.h>
1c79356b
A
87#include <vm/pmap.h>
88#include <vm/vm_pageout.h>
89#include <mach/vm_param.h>
90#include <mach/vm_behavior.h>
91#include <mach/memory_object.h>
92 /* For memory_object_data_{request,unlock} */
93#include <kern/mach_param.h>
94#include <kern/macro_help.h>
95#include <kern/zalloc.h>
96#include <kern/misc_protos.h>
97
98#include <sys/kdebug.h>
99
100#define VM_FAULT_CLASSIFY 0
101#define VM_FAULT_STATIC_CONFIG 1
102
103#define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
104
105int vm_object_absent_max = 50;
106
107int vm_fault_debug = 0;
1c79356b
A
108
109#if !VM_FAULT_STATIC_CONFIG
110boolean_t vm_fault_dirty_handling = FALSE;
111boolean_t vm_fault_interruptible = FALSE;
112boolean_t software_reference_bits = TRUE;
113#endif
114
115#if MACH_KDB
116extern struct db_watchpoint *db_watchpoint_list;
117#endif /* MACH_KDB */
118
119/* Forward declarations of internal routines. */
120extern kern_return_t vm_fault_wire_fast(
121 vm_map_t map,
122 vm_offset_t va,
123 vm_map_entry_t entry,
9bccf70c
A
124 pmap_t pmap,
125 vm_offset_t pmap_addr);
1c79356b
A
126
127extern void vm_fault_continue(void);
128
129extern void vm_fault_copy_cleanup(
130 vm_page_t page,
131 vm_page_t top_page);
132
133extern void vm_fault_copy_dst_cleanup(
134 vm_page_t page);
135
136#if VM_FAULT_CLASSIFY
137extern void vm_fault_classify(vm_object_t object,
138 vm_object_offset_t offset,
139 vm_prot_t fault_type);
140
141extern void vm_fault_classify_init(void);
142#endif
143
144/*
145 * Routine: vm_fault_init
146 * Purpose:
147 * Initialize our private data structures.
148 */
149void
150vm_fault_init(void)
151{
152}
153
154/*
155 * Routine: vm_fault_cleanup
156 * Purpose:
157 * Clean up the result of vm_fault_page.
158 * Results:
159 * The paging reference for "object" is released.
160 * "object" is unlocked.
161 * If "top_page" is not null, "top_page" is
162 * freed and the paging reference for the object
163 * containing it is released.
164 *
165 * In/out conditions:
166 * "object" must be locked.
167 */
168void
169vm_fault_cleanup(
170 register vm_object_t object,
171 register vm_page_t top_page)
172{
173 vm_object_paging_end(object);
174 vm_object_unlock(object);
175
176 if (top_page != VM_PAGE_NULL) {
177 object = top_page->object;
178 vm_object_lock(object);
179 VM_PAGE_FREE(top_page);
180 vm_object_paging_end(object);
181 vm_object_unlock(object);
182 }
183}
184
185#if MACH_CLUSTER_STATS
186#define MAXCLUSTERPAGES 16
187struct {
188 unsigned long pages_in_cluster;
189 unsigned long pages_at_higher_offsets;
190 unsigned long pages_at_lower_offsets;
191} cluster_stats_in[MAXCLUSTERPAGES];
192#define CLUSTER_STAT(clause) clause
193#define CLUSTER_STAT_HIGHER(x) \
194 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
195#define CLUSTER_STAT_LOWER(x) \
196 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
197#define CLUSTER_STAT_CLUSTER(x) \
198 ((cluster_stats_in[(x)].pages_in_cluster)++)
199#else /* MACH_CLUSTER_STATS */
200#define CLUSTER_STAT(clause)
201#endif /* MACH_CLUSTER_STATS */
202
203/* XXX - temporary */
204boolean_t vm_allow_clustered_pagein = FALSE;
205int vm_pagein_cluster_used = 0;
206
55e303ae
A
207#define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
208
209
210boolean_t vm_page_deactivate_behind = TRUE;
1c79356b
A
211/*
212 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
213 */
55e303ae
A
214int vm_default_ahead = 0;
215int vm_default_behind = MAX_UPL_TRANSFER;
216
217/*
218 * vm_page_deactivate_behind
219 *
220 * Determine if sequential access is in progress
221 * in accordance with the behavior specified. If
222 * so, compute a potential page to deactive and
223 * deactivate it.
224 *
225 * The object must be locked.
226 */
227static
228boolean_t
229vm_fault_deactivate_behind(
230 vm_object_t object,
231 vm_offset_t offset,
232 vm_behavior_t behavior)
233{
234 vm_page_t m;
235
236#if TRACEFAULTPAGE
237 dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind); /* (TEST/DEBUG) */
238#endif
239
240 switch (behavior) {
241 case VM_BEHAVIOR_RANDOM:
242 object->sequential = PAGE_SIZE_64;
243 m = VM_PAGE_NULL;
244 break;
245 case VM_BEHAVIOR_SEQUENTIAL:
246 if (offset &&
247 object->last_alloc == offset - PAGE_SIZE_64) {
248 object->sequential += PAGE_SIZE_64;
249 m = vm_page_lookup(object, offset - PAGE_SIZE_64);
250 } else {
251 object->sequential = PAGE_SIZE_64; /* reset */
252 m = VM_PAGE_NULL;
253 }
254 break;
255 case VM_BEHAVIOR_RSEQNTL:
256 if (object->last_alloc &&
257 object->last_alloc == offset + PAGE_SIZE_64) {
258 object->sequential += PAGE_SIZE_64;
259 m = vm_page_lookup(object, offset + PAGE_SIZE_64);
260 } else {
261 object->sequential = PAGE_SIZE_64; /* reset */
262 m = VM_PAGE_NULL;
263 }
264 break;
265 case VM_BEHAVIOR_DEFAULT:
266 default:
267 if (offset &&
268 object->last_alloc == offset - PAGE_SIZE_64) {
269 vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
270
271 object->sequential += PAGE_SIZE_64;
272 m = (offset >= behind &&
273 object->sequential >= behind) ?
274 vm_page_lookup(object, offset - behind) :
275 VM_PAGE_NULL;
276 } else if (object->last_alloc &&
277 object->last_alloc == offset + PAGE_SIZE_64) {
278 vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
279
280 object->sequential += PAGE_SIZE_64;
281 m = (offset < -behind &&
282 object->sequential >= behind) ?
283 vm_page_lookup(object, offset + behind) :
284 VM_PAGE_NULL;
285 } else {
286 object->sequential = PAGE_SIZE_64;
287 m = VM_PAGE_NULL;
288 }
289 break;
290 }
291
292 object->last_alloc = offset;
293
294 if (m) {
295 if (!m->busy) {
296 vm_page_lock_queues();
297 vm_page_deactivate(m);
298 vm_page_unlock_queues();
299#if TRACEFAULTPAGE
300 dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
301#endif
302 }
303 return TRUE;
304 }
305 return FALSE;
306}
1c79356b 307
1c79356b
A
308
309/*
310 * Routine: vm_fault_page
311 * Purpose:
312 * Find the resident page for the virtual memory
313 * specified by the given virtual memory object
314 * and offset.
315 * Additional arguments:
316 * The required permissions for the page is given
317 * in "fault_type". Desired permissions are included
318 * in "protection". The minimum and maximum valid offsets
319 * within the object for the relevant map entry are
320 * passed in "lo_offset" and "hi_offset" respectively and
321 * the expected page reference pattern is passed in "behavior".
322 * These three parameters are used to determine pagein cluster
323 * limits.
324 *
325 * If the desired page is known to be resident (for
326 * example, because it was previously wired down), asserting
327 * the "unwiring" parameter will speed the search.
328 *
329 * If the operation can be interrupted (by thread_abort
330 * or thread_terminate), then the "interruptible"
331 * parameter should be asserted.
332 *
333 * Results:
334 * The page containing the proper data is returned
335 * in "result_page".
336 *
337 * In/out conditions:
338 * The source object must be locked and referenced,
339 * and must donate one paging reference. The reference
340 * is not affected. The paging reference and lock are
341 * consumed.
342 *
343 * If the call succeeds, the object in which "result_page"
344 * resides is left locked and holding a paging reference.
345 * If this is not the original object, a busy page in the
346 * original object is returned in "top_page", to prevent other
347 * callers from pursuing this same data, along with a paging
348 * reference for the original object. The "top_page" should
349 * be destroyed when this guarantee is no longer required.
350 * The "result_page" is also left busy. It is not removed
351 * from the pageout queues.
352 */
353
354vm_fault_return_t
355vm_fault_page(
356 /* Arguments: */
357 vm_object_t first_object, /* Object to begin search */
358 vm_object_offset_t first_offset, /* Offset into object */
359 vm_prot_t fault_type, /* What access is requested */
360 boolean_t must_be_resident,/* Must page be resident? */
361 int interruptible, /* how may fault be interrupted? */
362 vm_object_offset_t lo_offset, /* Map entry start */
363 vm_object_offset_t hi_offset, /* Map entry end */
364 vm_behavior_t behavior, /* Page reference behavior */
365 /* Modifies in place: */
366 vm_prot_t *protection, /* Protection for mapping */
367 /* Returns: */
368 vm_page_t *result_page, /* Page found, if successful */
369 vm_page_t *top_page, /* Page in top object, if
370 * not result_page. */
371 int *type_of_fault, /* if non-null, fill in with type of fault
372 * COW, zero-fill, etc... returned in trace point */
373 /* More arguments: */
374 kern_return_t *error_code, /* code if page is in error */
375 boolean_t no_zero_fill, /* don't zero fill absent pages */
0b4e3aa0 376 boolean_t data_supply, /* treat as data_supply if
1c79356b
A
377 * it is a write fault and a full
378 * page is provided */
0b4e3aa0
A
379 vm_map_t map,
380 vm_offset_t vaddr)
1c79356b
A
381{
382 register
383 vm_page_t m;
384 register
385 vm_object_t object;
386 register
387 vm_object_offset_t offset;
388 vm_page_t first_m;
389 vm_object_t next_object;
390 vm_object_t copy_object;
391 boolean_t look_for_page;
392 vm_prot_t access_required = fault_type;
393 vm_prot_t wants_copy_flag;
394 vm_size_t cluster_size, length;
395 vm_object_offset_t cluster_offset;
396 vm_object_offset_t cluster_start, cluster_end, paging_offset;
397 vm_object_offset_t align_offset;
398 CLUSTER_STAT(int pages_at_higher_offsets;)
399 CLUSTER_STAT(int pages_at_lower_offsets;)
400 kern_return_t wait_result;
1c79356b 401 boolean_t interruptible_state;
0b4e3aa0 402 boolean_t bumped_pagein = FALSE;
1c79356b 403
1c79356b
A
404
405#if MACH_PAGEMAP
406/*
407 * MACH page map - an optional optimization where a bit map is maintained
408 * by the VM subsystem for internal objects to indicate which pages of
409 * the object currently reside on backing store. This existence map
410 * duplicates information maintained by the vnode pager. It is
411 * created at the time of the first pageout against the object, i.e.
412 * at the same time pager for the object is created. The optimization
413 * is designed to eliminate pager interaction overhead, if it is
414 * 'known' that the page does not exist on backing store.
415 *
416 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
417 * either marked as paged out in the existence map for the object or no
418 * existence map exists for the object. LOOK_FOR() is one of the
419 * criteria in the decision to invoke the pager. It is also used as one
420 * of the criteria to terminate the scan for adjacent pages in a clustered
421 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
422 * permanent objects. Note also that if the pager for an internal object
423 * has not been created, the pager is not invoked regardless of the value
424 * of LOOK_FOR() and that clustered pagein scans are only done on an object
425 * for which a pager has been created.
426 *
427 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
428 * is marked as paged out in the existence map for the object. PAGED_OUT()
429 * PAGED_OUT() is used to determine if a page has already been pushed
430 * into a copy object in order to avoid a redundant page out operation.
431 */
432#define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
433 != VM_EXTERNAL_STATE_ABSENT)
434#define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
435 == VM_EXTERNAL_STATE_EXISTS)
436#else /* MACH_PAGEMAP */
437/*
438 * If the MACH page map optimization is not enabled,
439 * LOOK_FOR() always evaluates to TRUE. The pager will always be
440 * invoked to resolve missing pages in an object, assuming the pager
441 * has been created for the object. In a clustered page operation, the
442 * absence of a page on backing backing store cannot be used to terminate
443 * a scan for adjacent pages since that information is available only in
444 * the pager. Hence pages that may not be paged out are potentially
445 * included in a clustered request. The vnode pager is coded to deal
446 * with any combination of absent/present pages in a clustered
447 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
448 * will always be invoked to push a dirty page into a copy object assuming
449 * a pager has been created. If the page has already been pushed, the
450 * pager will ingore the new request.
451 */
452#define LOOK_FOR(o, f) TRUE
453#define PAGED_OUT(o, f) FALSE
454#endif /* MACH_PAGEMAP */
455
456/*
457 * Recovery actions
458 */
459#define PREPARE_RELEASE_PAGE(m) \
460 MACRO_BEGIN \
461 vm_page_lock_queues(); \
462 MACRO_END
463
464#define DO_RELEASE_PAGE(m) \
465 MACRO_BEGIN \
466 PAGE_WAKEUP_DONE(m); \
467 if (!m->active && !m->inactive) \
468 vm_page_activate(m); \
469 vm_page_unlock_queues(); \
470 MACRO_END
471
472#define RELEASE_PAGE(m) \
473 MACRO_BEGIN \
474 PREPARE_RELEASE_PAGE(m); \
475 DO_RELEASE_PAGE(m); \
476 MACRO_END
477
478#if TRACEFAULTPAGE
479 dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
480#endif
481
482
483
484#if !VM_FAULT_STATIC_CONFIG
485 if (vm_fault_dirty_handling
486#if MACH_KDB
487 /*
488 * If there are watchpoints set, then
489 * we don't want to give away write permission
490 * on a read fault. Make the task write fault,
491 * so that the watchpoint code notices the access.
492 */
493 || db_watchpoint_list
494#endif /* MACH_KDB */
495 ) {
496 /*
497 * If we aren't asking for write permission,
498 * then don't give it away. We're using write
499 * faults to set the dirty bit.
500 */
501 if (!(fault_type & VM_PROT_WRITE))
502 *protection &= ~VM_PROT_WRITE;
503 }
504
505 if (!vm_fault_interruptible)
506 interruptible = THREAD_UNINT;
507#else /* STATIC_CONFIG */
508#if MACH_KDB
509 /*
510 * If there are watchpoints set, then
511 * we don't want to give away write permission
512 * on a read fault. Make the task write fault,
513 * so that the watchpoint code notices the access.
514 */
515 if (db_watchpoint_list) {
516 /*
517 * If we aren't asking for write permission,
518 * then don't give it away. We're using write
519 * faults to set the dirty bit.
520 */
521 if (!(fault_type & VM_PROT_WRITE))
522 *protection &= ~VM_PROT_WRITE;
523 }
524
525#endif /* MACH_KDB */
526#endif /* STATIC_CONFIG */
527
9bccf70c 528 interruptible_state = thread_interrupt_level(interruptible);
1c79356b
A
529
530 /*
531 * INVARIANTS (through entire routine):
532 *
533 * 1) At all times, we must either have the object
534 * lock or a busy page in some object to prevent
535 * some other thread from trying to bring in
536 * the same page.
537 *
538 * Note that we cannot hold any locks during the
539 * pager access or when waiting for memory, so
540 * we use a busy page then.
541 *
542 * Note also that we aren't as concerned about more than
543 * one thread attempting to memory_object_data_unlock
544 * the same page at once, so we don't hold the page
545 * as busy then, but do record the highest unlock
546 * value so far. [Unlock requests may also be delivered
547 * out of order.]
548 *
549 * 2) To prevent another thread from racing us down the
550 * shadow chain and entering a new page in the top
551 * object before we do, we must keep a busy page in
552 * the top object while following the shadow chain.
553 *
554 * 3) We must increment paging_in_progress on any object
555 * for which we have a busy page
556 *
557 * 4) We leave busy pages on the pageout queues.
558 * If the pageout daemon comes across a busy page,
559 * it will remove the page from the pageout queues.
560 */
561
562 /*
563 * Search for the page at object/offset.
564 */
565
566 object = first_object;
567 offset = first_offset;
568 first_m = VM_PAGE_NULL;
569 access_required = fault_type;
570
571 XPR(XPR_VM_FAULT,
572 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
573 (integer_t)object, offset, fault_type, *protection, 0);
574
575 /*
576 * See whether this page is resident
577 */
578
579 while (TRUE) {
580#if TRACEFAULTPAGE
581 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
582#endif
583 if (!object->alive) {
584 vm_fault_cleanup(object, first_m);
9bccf70c 585 thread_interrupt_level(interruptible_state);
1c79356b
A
586 return(VM_FAULT_MEMORY_ERROR);
587 }
588 m = vm_page_lookup(object, offset);
589#if TRACEFAULTPAGE
590 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
591#endif
592 if (m != VM_PAGE_NULL) {
593 /*
594 * If the page was pre-paged as part of a
595 * cluster, record the fact.
596 */
597 if (m->clustered) {
598 vm_pagein_cluster_used++;
599 m->clustered = FALSE;
600 }
601
602 /*
603 * If the page is being brought in,
604 * wait for it and then retry.
605 *
606 * A possible optimization: if the page
607 * is known to be resident, we can ignore
608 * pages that are absent (regardless of
609 * whether they're busy).
610 */
611
612 if (m->busy) {
613#if TRACEFAULTPAGE
614 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
615#endif
9bccf70c 616 wait_result = PAGE_SLEEP(object, m, interruptible);
1c79356b
A
617 XPR(XPR_VM_FAULT,
618 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
619 (integer_t)object, offset,
620 (integer_t)m, 0, 0);
621 counter(c_vm_fault_page_block_busy_kernel++);
1c79356b 622
1c79356b
A
623 if (wait_result != THREAD_AWAKENED) {
624 vm_fault_cleanup(object, first_m);
9bccf70c 625 thread_interrupt_level(interruptible_state);
1c79356b
A
626 if (wait_result == THREAD_RESTART)
627 {
628 return(VM_FAULT_RETRY);
629 }
630 else
631 {
632 return(VM_FAULT_INTERRUPTED);
633 }
634 }
635 continue;
636 }
637
638 /*
639 * If the page is in error, give up now.
640 */
641
642 if (m->error) {
643#if TRACEFAULTPAGE
644 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code); /* (TEST/DEBUG) */
645#endif
646 if (error_code)
647 *error_code = m->page_error;
648 VM_PAGE_FREE(m);
649 vm_fault_cleanup(object, first_m);
9bccf70c 650 thread_interrupt_level(interruptible_state);
1c79356b
A
651 return(VM_FAULT_MEMORY_ERROR);
652 }
653
654 /*
655 * If the pager wants us to restart
656 * at the top of the chain,
657 * typically because it has moved the
658 * page to another pager, then do so.
659 */
660
661 if (m->restart) {
662#if TRACEFAULTPAGE
663 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
664#endif
665 VM_PAGE_FREE(m);
666 vm_fault_cleanup(object, first_m);
9bccf70c 667 thread_interrupt_level(interruptible_state);
1c79356b
A
668 return(VM_FAULT_RETRY);
669 }
670
671 /*
672 * If the page isn't busy, but is absent,
673 * then it was deemed "unavailable".
674 */
675
676 if (m->absent) {
677 /*
678 * Remove the non-existent page (unless it's
679 * in the top object) and move on down to the
680 * next object (if there is one).
681 */
682#if TRACEFAULTPAGE
683 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow); /* (TEST/DEBUG) */
684#endif
685
686 next_object = object->shadow;
687 if (next_object == VM_OBJECT_NULL) {
688 vm_page_t real_m;
689
690 assert(!must_be_resident);
691
692 if (object->shadow_severed) {
693 vm_fault_cleanup(
694 object, first_m);
9bccf70c 695 thread_interrupt_level(interruptible_state);
1c79356b
A
696 return VM_FAULT_MEMORY_ERROR;
697 }
698
699 /*
700 * Absent page at bottom of shadow
701 * chain; zero fill the page we left
702 * busy in the first object, and flush
703 * the absent page. But first we
704 * need to allocate a real page.
705 */
706 if (VM_PAGE_THROTTLED() ||
55e303ae
A
707 (real_m = vm_page_grab())
708 == VM_PAGE_NULL) {
709 vm_fault_cleanup(
710 object, first_m);
711 thread_interrupt_level(
712 interruptible_state);
713 return(
714 VM_FAULT_MEMORY_SHORTAGE);
715 }
716
717 /*
718 * are we protecting the system from
719 * backing store exhaustion. If so
720 * sleep unless we are privileged.
721 */
722
723 if(vm_backing_store_low) {
724 if(!(current_task()->priv_flags
725 & VM_BACKING_STORE_PRIV)) {
726 assert_wait((event_t)
727 &vm_backing_store_low,
728 THREAD_UNINT);
729 vm_fault_cleanup(object,
730 first_m);
731 thread_block((void(*)(void)) 0);
732 thread_interrupt_level(
733 interruptible_state);
734 return(VM_FAULT_RETRY);
735 }
1c79356b
A
736 }
737
55e303ae 738
1c79356b
A
739 XPR(XPR_VM_FAULT,
740 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
741 (integer_t)object, offset,
742 (integer_t)m,
743 (integer_t)first_object, 0);
744 if (object != first_object) {
745 VM_PAGE_FREE(m);
746 vm_object_paging_end(object);
747 vm_object_unlock(object);
748 object = first_object;
749 offset = first_offset;
750 m = first_m;
751 first_m = VM_PAGE_NULL;
752 vm_object_lock(object);
753 }
754
755 VM_PAGE_FREE(m);
756 assert(real_m->busy);
757 vm_page_insert(real_m, object, offset);
758 m = real_m;
759
760 /*
761 * Drop the lock while zero filling
762 * page. Then break because this
763 * is the page we wanted. Checking
764 * the page lock is a waste of time;
765 * this page was either absent or
766 * newly allocated -- in both cases
767 * it can't be page locked by a pager.
768 */
0b4e3aa0
A
769 m->no_isync = FALSE;
770
1c79356b
A
771 if (!no_zero_fill) {
772 vm_object_unlock(object);
773 vm_page_zero_fill(m);
1c79356b
A
774 vm_object_lock(object);
775 }
55e303ae
A
776 if (type_of_fault)
777 *type_of_fault = DBG_ZERO_FILL_FAULT;
778 VM_STAT(zero_fill_count++);
779
780 if (bumped_pagein == TRUE) {
781 VM_STAT(pageins--);
782 current_task()->pageins--;
783 }
784#if 0
785 pmap_clear_modify(m->phys_page);
786#endif
1c79356b
A
787 vm_page_lock_queues();
788 VM_PAGE_QUEUES_REMOVE(m);
0b4e3aa0 789 m->page_ticket = vm_page_ticket;
9bccf70c
A
790 if(m->object->size > 0x80000) {
791 m->zero_fill = TRUE;
792 /* depends on the queues lock */
793 vm_zf_count += 1;
794 queue_enter(&vm_page_queue_zf,
795 m, vm_page_t, pageq);
796 } else {
797 queue_enter(
798 &vm_page_queue_inactive,
799 m, vm_page_t, pageq);
800 }
0b4e3aa0
A
801 vm_page_ticket_roll++;
802 if(vm_page_ticket_roll ==
803 VM_PAGE_TICKETS_IN_ROLL) {
804 vm_page_ticket_roll = 0;
805 if(vm_page_ticket ==
806 VM_PAGE_TICKET_ROLL_IDS)
807 vm_page_ticket= 0;
808 else
809 vm_page_ticket++;
810 }
1c79356b
A
811 m->inactive = TRUE;
812 vm_page_inactive_count++;
813 vm_page_unlock_queues();
814 break;
815 } else {
816 if (must_be_resident) {
817 vm_object_paging_end(object);
818 } else if (object != first_object) {
819 vm_object_paging_end(object);
820 VM_PAGE_FREE(m);
821 } else {
822 first_m = m;
823 m->absent = FALSE;
824 m->unusual = FALSE;
825 vm_object_absent_release(object);
826 m->busy = TRUE;
827
828 vm_page_lock_queues();
829 VM_PAGE_QUEUES_REMOVE(m);
830 vm_page_unlock_queues();
831 }
832 XPR(XPR_VM_FAULT,
833 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
834 (integer_t)object, offset,
835 (integer_t)next_object,
836 offset+object->shadow_offset,0);
837 offset += object->shadow_offset;
838 hi_offset += object->shadow_offset;
839 lo_offset += object->shadow_offset;
840 access_required = VM_PROT_READ;
841 vm_object_lock(next_object);
842 vm_object_unlock(object);
843 object = next_object;
844 vm_object_paging_begin(object);
845 continue;
846 }
847 }
848
849 if ((m->cleaning)
850 && ((object != first_object) ||
851 (object->copy != VM_OBJECT_NULL))
852 && (fault_type & VM_PROT_WRITE)) {
853 /*
854 * This is a copy-on-write fault that will
855 * cause us to revoke access to this page, but
856 * this page is in the process of being cleaned
857 * in a clustered pageout. We must wait until
858 * the cleaning operation completes before
859 * revoking access to the original page,
860 * otherwise we might attempt to remove a
861 * wired mapping.
862 */
863#if TRACEFAULTPAGE
864 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset); /* (TEST/DEBUG) */
865#endif
866 XPR(XPR_VM_FAULT,
867 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
868 (integer_t)object, offset,
869 (integer_t)m, 0, 0);
870 /* take an extra ref so that object won't die */
871 assert(object->ref_count > 0);
872 object->ref_count++;
873 vm_object_res_reference(object);
874 vm_fault_cleanup(object, first_m);
875 counter(c_vm_fault_page_block_backoff_kernel++);
876 vm_object_lock(object);
877 assert(object->ref_count > 0);
878 m = vm_page_lookup(object, offset);
879 if (m != VM_PAGE_NULL && m->cleaning) {
880 PAGE_ASSERT_WAIT(m, interruptible);
881 vm_object_unlock(object);
9bccf70c 882 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
883 vm_object_deallocate(object);
884 goto backoff;
885 } else {
886 vm_object_unlock(object);
887 vm_object_deallocate(object);
9bccf70c 888 thread_interrupt_level(interruptible_state);
1c79356b
A
889 return VM_FAULT_RETRY;
890 }
891 }
892
893 /*
894 * If the desired access to this page has
895 * been locked out, request that it be unlocked.
896 */
897
898 if (access_required & m->page_lock) {
899 if ((access_required & m->unlock_request) != access_required) {
900 vm_prot_t new_unlock_request;
901 kern_return_t rc;
902
903#if TRACEFAULTPAGE
904 dbgTrace(0xBEEF000A, (unsigned int) m, (unsigned int) object->pager_ready); /* (TEST/DEBUG) */
905#endif
906 if (!object->pager_ready) {
907 XPR(XPR_VM_FAULT,
908 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
909 access_required,
910 (integer_t)object, offset,
911 (integer_t)m, 0);
912 /* take an extra ref */
913 assert(object->ref_count > 0);
914 object->ref_count++;
915 vm_object_res_reference(object);
916 vm_fault_cleanup(object,
917 first_m);
918 counter(c_vm_fault_page_block_backoff_kernel++);
919 vm_object_lock(object);
920 assert(object->ref_count > 0);
921 if (!object->pager_ready) {
9bccf70c 922 wait_result = vm_object_assert_wait(
1c79356b
A
923 object,
924 VM_OBJECT_EVENT_PAGER_READY,
925 interruptible);
926 vm_object_unlock(object);
9bccf70c
A
927 if (wait_result == THREAD_WAITING)
928 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
929 vm_object_deallocate(object);
930 goto backoff;
931 } else {
932 vm_object_unlock(object);
933 vm_object_deallocate(object);
9bccf70c 934 thread_interrupt_level(interruptible_state);
1c79356b
A
935 return VM_FAULT_RETRY;
936 }
937 }
938
939 new_unlock_request = m->unlock_request =
940 (access_required | m->unlock_request);
941 vm_object_unlock(object);
942 XPR(XPR_VM_FAULT,
943 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
944 (integer_t)object, offset,
945 (integer_t)m, new_unlock_request, 0);
946 if ((rc = memory_object_data_unlock(
947 object->pager,
1c79356b
A
948 offset + object->paging_offset,
949 PAGE_SIZE,
950 new_unlock_request))
951 != KERN_SUCCESS) {
952 if (vm_fault_debug)
953 printf("vm_fault: memory_object_data_unlock failed\n");
954 vm_object_lock(object);
955 vm_fault_cleanup(object, first_m);
9bccf70c 956 thread_interrupt_level(interruptible_state);
1c79356b
A
957 return((rc == MACH_SEND_INTERRUPTED) ?
958 VM_FAULT_INTERRUPTED :
959 VM_FAULT_MEMORY_ERROR);
960 }
961 vm_object_lock(object);
962 continue;
963 }
964
965 XPR(XPR_VM_FAULT,
966 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
967 access_required, (integer_t)object,
968 offset, (integer_t)m, 0);
969 /* take an extra ref so object won't die */
970 assert(object->ref_count > 0);
971 object->ref_count++;
972 vm_object_res_reference(object);
973 vm_fault_cleanup(object, first_m);
974 counter(c_vm_fault_page_block_backoff_kernel++);
975 vm_object_lock(object);
976 assert(object->ref_count > 0);
977 m = vm_page_lookup(object, offset);
978 if (m != VM_PAGE_NULL &&
979 (access_required & m->page_lock) &&
980 !((access_required & m->unlock_request) != access_required)) {
981 PAGE_ASSERT_WAIT(m, interruptible);
982 vm_object_unlock(object);
9bccf70c 983 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
984 vm_object_deallocate(object);
985 goto backoff;
986 } else {
987 vm_object_unlock(object);
988 vm_object_deallocate(object);
9bccf70c 989 thread_interrupt_level(interruptible_state);
1c79356b
A
990 return VM_FAULT_RETRY;
991 }
992 }
993 /*
994 * We mark the page busy and leave it on
995 * the pageout queues. If the pageout
996 * deamon comes across it, then it will
997 * remove the page.
998 */
999
1000#if TRACEFAULTPAGE
1001 dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1002#endif
1003
1004#if !VM_FAULT_STATIC_CONFIG
1005 if (!software_reference_bits) {
1006 vm_page_lock_queues();
1007 if (m->inactive)
1008 vm_stat.reactivations++;
1009
1010 VM_PAGE_QUEUES_REMOVE(m);
1011 vm_page_unlock_queues();
1012 }
1013#endif
1014 XPR(XPR_VM_FAULT,
1015 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
1016 (integer_t)object, offset, (integer_t)m, 0, 0);
1017 assert(!m->busy);
1018 m->busy = TRUE;
1019 assert(!m->absent);
1020 break;
1021 }
1022
1023 look_for_page =
1024 (object->pager_created) &&
1025 LOOK_FOR(object, offset) &&
1026 (!data_supply);
1027
1028#if TRACEFAULTPAGE
1029 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object); /* (TEST/DEBUG) */
1030#endif
1031 if ((look_for_page || (object == first_object))
0b4e3aa0
A
1032 && !must_be_resident
1033 && !(object->phys_contiguous)) {
1c79356b
A
1034 /*
1035 * Allocate a new page for this object/offset
1036 * pair.
1037 */
1038
1039 m = vm_page_grab_fictitious();
1040#if TRACEFAULTPAGE
1041 dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
1042#endif
1043 if (m == VM_PAGE_NULL) {
1044 vm_fault_cleanup(object, first_m);
9bccf70c 1045 thread_interrupt_level(interruptible_state);
1c79356b
A
1046 return(VM_FAULT_FICTITIOUS_SHORTAGE);
1047 }
1048 vm_page_insert(m, object, offset);
1049 }
1050
0b4e3aa0 1051 if ((look_for_page && !must_be_resident)) {
1c79356b
A
1052 kern_return_t rc;
1053
1054 /*
1055 * If the memory manager is not ready, we
1056 * cannot make requests.
1057 */
1058 if (!object->pager_ready) {
1059#if TRACEFAULTPAGE
1060 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
1061#endif
0b4e3aa0
A
1062 if(m != VM_PAGE_NULL)
1063 VM_PAGE_FREE(m);
1c79356b
A
1064 XPR(XPR_VM_FAULT,
1065 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
1066 (integer_t)object, offset, 0, 0, 0);
1067 /* take an extra ref so object won't die */
1068 assert(object->ref_count > 0);
1069 object->ref_count++;
1070 vm_object_res_reference(object);
1071 vm_fault_cleanup(object, first_m);
1072 counter(c_vm_fault_page_block_backoff_kernel++);
1073 vm_object_lock(object);
1074 assert(object->ref_count > 0);
1075 if (!object->pager_ready) {
9bccf70c 1076 wait_result = vm_object_assert_wait(object,
1c79356b
A
1077 VM_OBJECT_EVENT_PAGER_READY,
1078 interruptible);
1079 vm_object_unlock(object);
9bccf70c
A
1080 if (wait_result == THREAD_WAITING)
1081 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1082 vm_object_deallocate(object);
1083 goto backoff;
1084 } else {
1085 vm_object_unlock(object);
1086 vm_object_deallocate(object);
9bccf70c 1087 thread_interrupt_level(interruptible_state);
1c79356b
A
1088 return VM_FAULT_RETRY;
1089 }
1090 }
1091
0b4e3aa0
A
1092 if(object->phys_contiguous) {
1093 if(m != VM_PAGE_NULL) {
1094 VM_PAGE_FREE(m);
1095 m = VM_PAGE_NULL;
1096 }
1097 goto no_clustering;
1098 }
1c79356b
A
1099 if (object->internal) {
1100 /*
1101 * Requests to the default pager
1102 * must reserve a real page in advance,
1103 * because the pager's data-provided
1104 * won't block for pages. IMPORTANT:
1105 * this acts as a throttling mechanism
1106 * for data_requests to the default
1107 * pager.
1108 */
1109
1110#if TRACEFAULTPAGE
1111 dbgTrace(0xBEEF000F, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1112#endif
1113 if (m->fictitious && !vm_page_convert(m)) {
1114 VM_PAGE_FREE(m);
1115 vm_fault_cleanup(object, first_m);
9bccf70c 1116 thread_interrupt_level(interruptible_state);
1c79356b
A
1117 return(VM_FAULT_MEMORY_SHORTAGE);
1118 }
1119 } else if (object->absent_count >
1120 vm_object_absent_max) {
1121 /*
1122 * If there are too many outstanding page
1123 * requests pending on this object, we
1124 * wait for them to be resolved now.
1125 */
1126
1127#if TRACEFAULTPAGE
1128 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1129#endif
0b4e3aa0
A
1130 if(m != VM_PAGE_NULL)
1131 VM_PAGE_FREE(m);
1c79356b
A
1132 /* take an extra ref so object won't die */
1133 assert(object->ref_count > 0);
1134 object->ref_count++;
1135 vm_object_res_reference(object);
1136 vm_fault_cleanup(object, first_m);
1137 counter(c_vm_fault_page_block_backoff_kernel++);
1138 vm_object_lock(object);
1139 assert(object->ref_count > 0);
1140 if (object->absent_count > vm_object_absent_max) {
1141 vm_object_absent_assert_wait(object,
1142 interruptible);
1143 vm_object_unlock(object);
9bccf70c 1144 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1145 vm_object_deallocate(object);
1146 goto backoff;
1147 } else {
1148 vm_object_unlock(object);
1149 vm_object_deallocate(object);
9bccf70c 1150 thread_interrupt_level(interruptible_state);
1c79356b
A
1151 return VM_FAULT_RETRY;
1152 }
1153 }
1154
1155 /*
1156 * Indicate that the page is waiting for data
1157 * from the memory manager.
1158 */
1159
0b4e3aa0
A
1160 if(m != VM_PAGE_NULL) {
1161
1162 m->list_req_pending = TRUE;
1163 m->absent = TRUE;
1164 m->unusual = TRUE;
1165 object->absent_count++;
1166
1167 }
1c79356b 1168
9bccf70c 1169no_clustering:
1c79356b
A
1170 cluster_start = offset;
1171 length = PAGE_SIZE;
1c79356b 1172
0b4e3aa0
A
1173 /*
1174 * lengthen the cluster by the pages in the working set
1175 */
1176 if((map != NULL) &&
1177 (current_task()->dynamic_working_set != 0)) {
1178 cluster_end = cluster_start + length;
1179 /* tws values for start and end are just a
1180 * suggestions. Therefore, as long as
1181 * build_cluster does not use pointers or
1182 * take action based on values that
1183 * could be affected by re-entrance we
1184 * do not need to take the map lock.
1185 */
9bccf70c 1186 cluster_end = offset + PAGE_SIZE_64;
0b4e3aa0
A
1187 tws_build_cluster((tws_hash_t)
1188 current_task()->dynamic_working_set,
1189 object, &cluster_start,
9bccf70c 1190 &cluster_end, 0x40000);
0b4e3aa0
A
1191 length = cluster_end - cluster_start;
1192 }
1c79356b
A
1193#if TRACEFAULTPAGE
1194 dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0); /* (TEST/DEBUG) */
1195#endif
1196 /*
1197 * We have a busy page, so we can
1198 * release the object lock.
1199 */
1200 vm_object_unlock(object);
1201
1202 /*
1203 * Call the memory manager to retrieve the data.
1204 */
1205
1206 if (type_of_fault)
9bccf70c 1207 *type_of_fault = (length << 8) | DBG_PAGEIN_FAULT;
1c79356b
A
1208 VM_STAT(pageins++);
1209 current_task()->pageins++;
0b4e3aa0 1210 bumped_pagein = TRUE;
1c79356b
A
1211
1212 /*
1213 * If this object uses a copy_call strategy,
1214 * and we are interested in a copy of this object
1215 * (having gotten here only by following a
1216 * shadow chain), then tell the memory manager
1217 * via a flag added to the desired_access
1218 * parameter, so that it can detect a race
1219 * between our walking down the shadow chain
1220 * and its pushing pages up into a copy of
1221 * the object that it manages.
1222 */
1223
1224 if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL &&
1225 object != first_object) {
1226 wants_copy_flag = VM_PROT_WANTS_COPY;
1227 } else {
1228 wants_copy_flag = VM_PROT_NONE;
1229 }
1230
1231 XPR(XPR_VM_FAULT,
1232 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1233 (integer_t)object, offset, (integer_t)m,
1234 access_required | wants_copy_flag, 0);
1235
1c79356b 1236 rc = memory_object_data_request(object->pager,
1c79356b
A
1237 cluster_start + object->paging_offset,
1238 length,
1239 access_required | wants_copy_flag);
1240
1c79356b
A
1241
1242#if TRACEFAULTPAGE
1243 dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
1244#endif
1245 if (rc != KERN_SUCCESS) {
1246 if (rc != MACH_SEND_INTERRUPTED
1247 && vm_fault_debug)
0b4e3aa0 1248 printf("%s(0x%x, 0x%x, 0x%x, 0x%x) failed, rc=%d\n",
1c79356b
A
1249 "memory_object_data_request",
1250 object->pager,
1c79356b 1251 cluster_start + object->paging_offset,
0b4e3aa0 1252 length, access_required, rc);
1c79356b
A
1253 /*
1254 * Don't want to leave a busy page around,
1255 * but the data request may have blocked,
1256 * so check if it's still there and busy.
1257 */
0b4e3aa0
A
1258 if(!object->phys_contiguous) {
1259 vm_object_lock(object);
1260 for (; length; length -= PAGE_SIZE,
1261 cluster_start += PAGE_SIZE_64) {
1262 vm_page_t p;
1263 if ((p = vm_page_lookup(object,
1c79356b 1264 cluster_start))
0b4e3aa0
A
1265 && p->absent && p->busy
1266 && p != first_m) {
1267 VM_PAGE_FREE(p);
1268 }
1269 }
1c79356b
A
1270 }
1271 vm_fault_cleanup(object, first_m);
9bccf70c 1272 thread_interrupt_level(interruptible_state);
1c79356b
A
1273 return((rc == MACH_SEND_INTERRUPTED) ?
1274 VM_FAULT_INTERRUPTED :
1275 VM_FAULT_MEMORY_ERROR);
0b4e3aa0
A
1276 } else {
1277#ifdef notdefcdy
1278 tws_hash_line_t line;
1279 task_t task;
1280
1281 task = current_task();
1282
1283 if((map != NULL) &&
9bccf70c
A
1284 (task->dynamic_working_set != 0))
1285 && !(object->private)) {
1286 vm_object_t base_object;
1287 vm_object_offset_t base_offset;
1288 base_object = object;
1289 base_offset = offset;
1290 while(base_object->shadow) {
1291 base_offset +=
1292 base_object->shadow_offset;
1293 base_object =
1294 base_object->shadow;
1295 }
0b4e3aa0
A
1296 if(tws_lookup
1297 ((tws_hash_t)
1298 task->dynamic_working_set,
9bccf70c 1299 base_offset, base_object,
0b4e3aa0
A
1300 &line) == KERN_SUCCESS) {
1301 tws_line_signal((tws_hash_t)
1302 task->dynamic_working_set,
1303 map, line, vaddr);
1304 }
1305 }
1306#endif
1c79356b
A
1307 }
1308
1309 /*
1310 * Retry with same object/offset, since new data may
1311 * be in a different page (i.e., m is meaningless at
1312 * this point).
1313 */
1314 vm_object_lock(object);
1315 if ((interruptible != THREAD_UNINT) &&
1316 (current_thread()->state & TH_ABORT)) {
1317 vm_fault_cleanup(object, first_m);
9bccf70c 1318 thread_interrupt_level(interruptible_state);
1c79356b
A
1319 return(VM_FAULT_INTERRUPTED);
1320 }
0b4e3aa0
A
1321 if(m == VM_PAGE_NULL)
1322 break;
1c79356b
A
1323 continue;
1324 }
1325
1326 /*
1327 * The only case in which we get here is if
1328 * object has no pager (or unwiring). If the pager doesn't
1329 * have the page this is handled in the m->absent case above
1330 * (and if you change things here you should look above).
1331 */
1332#if TRACEFAULTPAGE
1333 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1334#endif
1335 if (object == first_object)
1336 first_m = m;
1337 else
1338 assert(m == VM_PAGE_NULL);
1339
1340 XPR(XPR_VM_FAULT,
1341 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1342 (integer_t)object, offset, (integer_t)m,
1343 (integer_t)object->shadow, 0);
1344 /*
1345 * Move on to the next object. Lock the next
1346 * object before unlocking the current one.
1347 */
1348 next_object = object->shadow;
1349 if (next_object == VM_OBJECT_NULL) {
1350 assert(!must_be_resident);
1351 /*
1352 * If there's no object left, fill the page
1353 * in the top object with zeros. But first we
1354 * need to allocate a real page.
1355 */
1356
1357 if (object != first_object) {
1358 vm_object_paging_end(object);
1359 vm_object_unlock(object);
1360
1361 object = first_object;
1362 offset = first_offset;
1363 vm_object_lock(object);
1364 }
1365
1366 m = first_m;
1367 assert(m->object == object);
1368 first_m = VM_PAGE_NULL;
1369
55e303ae
A
1370 if(m == VM_PAGE_NULL) {
1371 m = vm_page_grab();
1372 if (m == VM_PAGE_NULL) {
1373 vm_fault_cleanup(
1374 object, VM_PAGE_NULL);
1375 thread_interrupt_level(
1376 interruptible_state);
1377 return(VM_FAULT_MEMORY_SHORTAGE);
1378 }
1379 vm_page_insert(
1380 m, object, offset);
1381 }
1382
1c79356b
A
1383 if (object->shadow_severed) {
1384 VM_PAGE_FREE(m);
1385 vm_fault_cleanup(object, VM_PAGE_NULL);
9bccf70c 1386 thread_interrupt_level(interruptible_state);
1c79356b
A
1387 return VM_FAULT_MEMORY_ERROR;
1388 }
1389
55e303ae
A
1390 /*
1391 * are we protecting the system from
1392 * backing store exhaustion. If so
1393 * sleep unless we are privileged.
1394 */
1395
1396 if(vm_backing_store_low) {
1397 if(!(current_task()->priv_flags
1398 & VM_BACKING_STORE_PRIV)) {
1399 assert_wait((event_t)
1400 &vm_backing_store_low,
1401 THREAD_UNINT);
1402 VM_PAGE_FREE(m);
1403 vm_fault_cleanup(object, VM_PAGE_NULL);
1404 thread_block((void (*)(void)) 0);
1405 thread_interrupt_level(
1406 interruptible_state);
1407 return(VM_FAULT_RETRY);
1408 }
1409 }
1410
1c79356b
A
1411 if (VM_PAGE_THROTTLED() ||
1412 (m->fictitious && !vm_page_convert(m))) {
1413 VM_PAGE_FREE(m);
1414 vm_fault_cleanup(object, VM_PAGE_NULL);
9bccf70c 1415 thread_interrupt_level(interruptible_state);
1c79356b
A
1416 return(VM_FAULT_MEMORY_SHORTAGE);
1417 }
0b4e3aa0 1418 m->no_isync = FALSE;
1c79356b
A
1419
1420 if (!no_zero_fill) {
1421 vm_object_unlock(object);
1422 vm_page_zero_fill(m);
1c79356b
A
1423 vm_object_lock(object);
1424 }
55e303ae
A
1425 if (type_of_fault)
1426 *type_of_fault = DBG_ZERO_FILL_FAULT;
1427 VM_STAT(zero_fill_count++);
1428
1429 if (bumped_pagein == TRUE) {
1430 VM_STAT(pageins--);
1431 current_task()->pageins--;
1432 }
1433
1c79356b
A
1434 vm_page_lock_queues();
1435 VM_PAGE_QUEUES_REMOVE(m);
9bccf70c
A
1436 if(m->object->size > 0x80000) {
1437 m->zero_fill = TRUE;
1438 /* depends on the queues lock */
1439 vm_zf_count += 1;
1440 queue_enter(&vm_page_queue_zf,
1441 m, vm_page_t, pageq);
1442 } else {
1443 queue_enter(
1444 &vm_page_queue_inactive,
1445 m, vm_page_t, pageq);
1446 }
0b4e3aa0
A
1447 m->page_ticket = vm_page_ticket;
1448 vm_page_ticket_roll++;
1449 if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
1450 vm_page_ticket_roll = 0;
1451 if(vm_page_ticket ==
1452 VM_PAGE_TICKET_ROLL_IDS)
1453 vm_page_ticket= 0;
1454 else
1455 vm_page_ticket++;
1456 }
1c79356b
A
1457 m->inactive = TRUE;
1458 vm_page_inactive_count++;
1459 vm_page_unlock_queues();
55e303ae
A
1460#if 0
1461 pmap_clear_modify(m->phys_page);
1462#endif
1c79356b
A
1463 break;
1464 }
1465 else {
1466 if ((object != first_object) || must_be_resident)
1467 vm_object_paging_end(object);
1468 offset += object->shadow_offset;
1469 hi_offset += object->shadow_offset;
1470 lo_offset += object->shadow_offset;
1471 access_required = VM_PROT_READ;
1472 vm_object_lock(next_object);
1473 vm_object_unlock(object);
1474 object = next_object;
1475 vm_object_paging_begin(object);
1476 }
1477 }
1478
1479 /*
1480 * PAGE HAS BEEN FOUND.
1481 *
1482 * This page (m) is:
1483 * busy, so that we can play with it;
1484 * not absent, so that nobody else will fill it;
1485 * possibly eligible for pageout;
1486 *
1487 * The top-level page (first_m) is:
1488 * VM_PAGE_NULL if the page was found in the
1489 * top-level object;
1490 * busy, not absent, and ineligible for pageout.
1491 *
1492 * The current object (object) is locked. A paging
1493 * reference is held for the current and top-level
1494 * objects.
1495 */
1496
1497#if TRACEFAULTPAGE
1498 dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1499#endif
1500#if EXTRA_ASSERTIONS
0b4e3aa0
A
1501 if(m != VM_PAGE_NULL) {
1502 assert(m->busy && !m->absent);
1503 assert((first_m == VM_PAGE_NULL) ||
1504 (first_m->busy && !first_m->absent &&
1505 !first_m->active && !first_m->inactive));
1506 }
1c79356b
A
1507#endif /* EXTRA_ASSERTIONS */
1508
1509 XPR(XPR_VM_FAULT,
1510 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1511 (integer_t)object, offset, (integer_t)m,
1512 (integer_t)first_object, (integer_t)first_m);
1513 /*
1514 * If the page is being written, but isn't
1515 * already owned by the top-level object,
1516 * we have to copy it into a new page owned
1517 * by the top-level object.
1518 */
1519
0b4e3aa0 1520 if ((object != first_object) && (m != VM_PAGE_NULL)) {
1c79356b
A
1521 /*
1522 * We only really need to copy if we
1523 * want to write it.
1524 */
1525
1526#if TRACEFAULTPAGE
1527 dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1528#endif
1529 if (fault_type & VM_PROT_WRITE) {
1530 vm_page_t copy_m;
1531
1532 assert(!must_be_resident);
1533
55e303ae
A
1534 /*
1535 * are we protecting the system from
1536 * backing store exhaustion. If so
1537 * sleep unless we are privileged.
1538 */
1539
1540 if(vm_backing_store_low) {
1541 if(!(current_task()->priv_flags
1542 & VM_BACKING_STORE_PRIV)) {
1543 assert_wait((event_t)
1544 &vm_backing_store_low,
1545 THREAD_UNINT);
1546 RELEASE_PAGE(m);
1547 vm_fault_cleanup(object, first_m);
1548 thread_block((void (*)(void)) 0);
1549 thread_interrupt_level(
1550 interruptible_state);
1551 return(VM_FAULT_RETRY);
1552 }
1553 }
1554
1c79356b
A
1555 /*
1556 * If we try to collapse first_object at this
1557 * point, we may deadlock when we try to get
1558 * the lock on an intermediate object (since we
1559 * have the bottom object locked). We can't
1560 * unlock the bottom object, because the page
1561 * we found may move (by collapse) if we do.
1562 *
1563 * Instead, we first copy the page. Then, when
1564 * we have no more use for the bottom object,
1565 * we unlock it and try to collapse.
1566 *
1567 * Note that we copy the page even if we didn't
1568 * need to... that's the breaks.
1569 */
1570
1571 /*
1572 * Allocate a page for the copy
1573 */
1574 copy_m = vm_page_grab();
1575 if (copy_m == VM_PAGE_NULL) {
1576 RELEASE_PAGE(m);
1577 vm_fault_cleanup(object, first_m);
9bccf70c 1578 thread_interrupt_level(interruptible_state);
1c79356b
A
1579 return(VM_FAULT_MEMORY_SHORTAGE);
1580 }
1581
1582
1583 XPR(XPR_VM_FAULT,
1584 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1585 (integer_t)object, offset,
1586 (integer_t)m, (integer_t)copy_m, 0);
1587 vm_page_copy(m, copy_m);
1588
1589 /*
1590 * If another map is truly sharing this
1591 * page with us, we have to flush all
1592 * uses of the original page, since we
1593 * can't distinguish those which want the
1594 * original from those which need the
1595 * new copy.
1596 *
1597 * XXXO If we know that only one map has
1598 * access to this page, then we could
1599 * avoid the pmap_page_protect() call.
1600 */
1601
1602 vm_page_lock_queues();
1603 assert(!m->cleaning);
55e303ae 1604 pmap_page_protect(m->phys_page, VM_PROT_NONE);
1c79356b
A
1605 vm_page_deactivate(m);
1606 copy_m->dirty = TRUE;
1607 /*
1608 * Setting reference here prevents this fault from
1609 * being counted as a (per-thread) reactivate as well
1610 * as a copy-on-write.
1611 */
1612 first_m->reference = TRUE;
1613 vm_page_unlock_queues();
1614
1615 /*
1616 * We no longer need the old page or object.
1617 */
1618
1619 PAGE_WAKEUP_DONE(m);
1620 vm_object_paging_end(object);
1621 vm_object_unlock(object);
1622
1623 if (type_of_fault)
1624 *type_of_fault = DBG_COW_FAULT;
1625 VM_STAT(cow_faults++);
1626 current_task()->cow_faults++;
1627 object = first_object;
1628 offset = first_offset;
1629
1630 vm_object_lock(object);
1631 VM_PAGE_FREE(first_m);
1632 first_m = VM_PAGE_NULL;
1633 assert(copy_m->busy);
1634 vm_page_insert(copy_m, object, offset);
1635 m = copy_m;
1636
1637 /*
1638 * Now that we've gotten the copy out of the
1639 * way, let's try to collapse the top object.
1640 * But we have to play ugly games with
1641 * paging_in_progress to do that...
1642 */
1643
1644 vm_object_paging_end(object);
55e303ae 1645 vm_object_collapse(object, offset);
1c79356b
A
1646 vm_object_paging_begin(object);
1647
1648 }
1649 else {
1650 *protection &= (~VM_PROT_WRITE);
1651 }
1652 }
1653
1654 /*
1655 * Now check whether the page needs to be pushed into the
1656 * copy object. The use of asymmetric copy on write for
1657 * shared temporary objects means that we may do two copies to
1658 * satisfy the fault; one above to get the page from a
1659 * shadowed object, and one here to push it into the copy.
1660 */
1661
9bccf70c 1662 while ((copy_object = first_object->copy) != VM_OBJECT_NULL &&
0b4e3aa0 1663 (m!= VM_PAGE_NULL)) {
1c79356b
A
1664 vm_object_offset_t copy_offset;
1665 vm_page_t copy_m;
1666
1667#if TRACEFAULTPAGE
1668 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1669#endif
1670 /*
1671 * If the page is being written, but hasn't been
1672 * copied to the copy-object, we have to copy it there.
1673 */
1674
1675 if ((fault_type & VM_PROT_WRITE) == 0) {
1676 *protection &= ~VM_PROT_WRITE;
1677 break;
1678 }
1679
1680 /*
1681 * If the page was guaranteed to be resident,
1682 * we must have already performed the copy.
1683 */
1684
1685 if (must_be_resident)
1686 break;
1687
1688 /*
1689 * Try to get the lock on the copy_object.
1690 */
1691 if (!vm_object_lock_try(copy_object)) {
1692 vm_object_unlock(object);
1693
1694 mutex_pause(); /* wait a bit */
1695
1696 vm_object_lock(object);
1697 continue;
1698 }
1699
1700 /*
1701 * Make another reference to the copy-object,
1702 * to keep it from disappearing during the
1703 * copy.
1704 */
1705 assert(copy_object->ref_count > 0);
1706 copy_object->ref_count++;
1707 VM_OBJ_RES_INCR(copy_object);
1708
1709 /*
1710 * Does the page exist in the copy?
1711 */
1712 copy_offset = first_offset - copy_object->shadow_offset;
1713 if (copy_object->size <= copy_offset)
1714 /*
1715 * Copy object doesn't cover this page -- do nothing.
1716 */
1717 ;
1718 else if ((copy_m =
1719 vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
1720 /* Page currently exists in the copy object */
1721 if (copy_m->busy) {
1722 /*
1723 * If the page is being brought
1724 * in, wait for it and then retry.
1725 */
1726 RELEASE_PAGE(m);
1727 /* take an extra ref so object won't die */
1728 assert(copy_object->ref_count > 0);
1729 copy_object->ref_count++;
1730 vm_object_res_reference(copy_object);
1731 vm_object_unlock(copy_object);
1732 vm_fault_cleanup(object, first_m);
1733 counter(c_vm_fault_page_block_backoff_kernel++);
1734 vm_object_lock(copy_object);
1735 assert(copy_object->ref_count > 0);
1736 VM_OBJ_RES_DECR(copy_object);
1737 copy_object->ref_count--;
1738 assert(copy_object->ref_count > 0);
1739 copy_m = vm_page_lookup(copy_object, copy_offset);
1740 if (copy_m != VM_PAGE_NULL && copy_m->busy) {
1741 PAGE_ASSERT_WAIT(copy_m, interruptible);
1742 vm_object_unlock(copy_object);
9bccf70c 1743 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1744 vm_object_deallocate(copy_object);
1745 goto backoff;
1746 } else {
1747 vm_object_unlock(copy_object);
1748 vm_object_deallocate(copy_object);
9bccf70c 1749 thread_interrupt_level(interruptible_state);
1c79356b
A
1750 return VM_FAULT_RETRY;
1751 }
1752 }
1753 }
1754 else if (!PAGED_OUT(copy_object, copy_offset)) {
1755 /*
1756 * If PAGED_OUT is TRUE, then the page used to exist
1757 * in the copy-object, and has already been paged out.
1758 * We don't need to repeat this. If PAGED_OUT is
1759 * FALSE, then either we don't know (!pager_created,
1760 * for example) or it hasn't been paged out.
1761 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1762 * We must copy the page to the copy object.
1763 */
1764
55e303ae
A
1765 /*
1766 * are we protecting the system from
1767 * backing store exhaustion. If so
1768 * sleep unless we are privileged.
1769 */
1770
1771 if(vm_backing_store_low) {
1772 if(!(current_task()->priv_flags
1773 & VM_BACKING_STORE_PRIV)) {
1774 assert_wait((event_t)
1775 &vm_backing_store_low,
1776 THREAD_UNINT);
1777 RELEASE_PAGE(m);
1778 VM_OBJ_RES_DECR(copy_object);
1779 copy_object->ref_count--;
1780 assert(copy_object->ref_count > 0);
1781 vm_object_unlock(copy_object);
1782 vm_fault_cleanup(object, first_m);
1783 thread_block((void (*)(void)) 0);
1784 thread_interrupt_level(
1785 interruptible_state);
1786 return(VM_FAULT_RETRY);
1787 }
1788 }
1789
1c79356b
A
1790 /*
1791 * Allocate a page for the copy
1792 */
1793 copy_m = vm_page_alloc(copy_object, copy_offset);
1794 if (copy_m == VM_PAGE_NULL) {
1795 RELEASE_PAGE(m);
1796 VM_OBJ_RES_DECR(copy_object);
1797 copy_object->ref_count--;
1798 assert(copy_object->ref_count > 0);
1799 vm_object_unlock(copy_object);
1800 vm_fault_cleanup(object, first_m);
9bccf70c 1801 thread_interrupt_level(interruptible_state);
1c79356b
A
1802 return(VM_FAULT_MEMORY_SHORTAGE);
1803 }
1804
1805 /*
1806 * Must copy page into copy-object.
1807 */
1808
1809 vm_page_copy(m, copy_m);
1810
1811 /*
1812 * If the old page was in use by any users
1813 * of the copy-object, it must be removed
1814 * from all pmaps. (We can't know which
1815 * pmaps use it.)
1816 */
1817
1818 vm_page_lock_queues();
1819 assert(!m->cleaning);
55e303ae 1820 pmap_page_protect(m->phys_page, VM_PROT_NONE);
1c79356b
A
1821 copy_m->dirty = TRUE;
1822 vm_page_unlock_queues();
1823
1824 /*
1825 * If there's a pager, then immediately
1826 * page out this page, using the "initialize"
1827 * option. Else, we use the copy.
1828 */
1829
1830 if
1831#if MACH_PAGEMAP
1832 ((!copy_object->pager_created) ||
1833 vm_external_state_get(
1834 copy_object->existence_map, copy_offset)
1835 == VM_EXTERNAL_STATE_ABSENT)
1836#else
1837 (!copy_object->pager_created)
1838#endif
1839 {
1840 vm_page_lock_queues();
1841 vm_page_activate(copy_m);
1842 vm_page_unlock_queues();
1843 PAGE_WAKEUP_DONE(copy_m);
1844 }
1845 else {
1846 assert(copy_m->busy == TRUE);
1847
1848 /*
1849 * The page is already ready for pageout:
1850 * not on pageout queues and busy.
1851 * Unlock everything except the
1852 * copy_object itself.
1853 */
1854
1855 vm_object_unlock(object);
1856
1857 /*
1858 * Write the page to the copy-object,
1859 * flushing it from the kernel.
1860 */
1861
1862 vm_pageout_initialize_page(copy_m);
1863
1864 /*
1865 * Since the pageout may have
1866 * temporarily dropped the
1867 * copy_object's lock, we
1868 * check whether we'll have
1869 * to deallocate the hard way.
1870 */
1871
1872 if ((copy_object->shadow != object) ||
1873 (copy_object->ref_count == 1)) {
1874 vm_object_unlock(copy_object);
1875 vm_object_deallocate(copy_object);
1876 vm_object_lock(object);
1877 continue;
1878 }
1879
1880 /*
1881 * Pick back up the old object's
1882 * lock. [It is safe to do so,
1883 * since it must be deeper in the
1884 * object tree.]
1885 */
1886
1887 vm_object_lock(object);
1888 }
1889
1890 /*
1891 * Because we're pushing a page upward
1892 * in the object tree, we must restart
1893 * any faults that are waiting here.
1894 * [Note that this is an expansion of
1895 * PAGE_WAKEUP that uses the THREAD_RESTART
1896 * wait result]. Can't turn off the page's
1897 * busy bit because we're not done with it.
1898 */
1899
1900 if (m->wanted) {
1901 m->wanted = FALSE;
1902 thread_wakeup_with_result((event_t) m,
1903 THREAD_RESTART);
1904 }
1905 }
1906
1907 /*
1908 * The reference count on copy_object must be
1909 * at least 2: one for our extra reference,
1910 * and at least one from the outside world
1911 * (we checked that when we last locked
1912 * copy_object).
1913 */
1914 copy_object->ref_count--;
1915 assert(copy_object->ref_count > 0);
1916 VM_OBJ_RES_DECR(copy_object);
1917 vm_object_unlock(copy_object);
1918
1919 break;
1920 }
1921
1922 *result_page = m;
1923 *top_page = first_m;
1924
1925 XPR(XPR_VM_FAULT,
1926 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1927 (integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
1928 /*
1929 * If the page can be written, assume that it will be.
1930 * [Earlier, we restrict the permission to allow write
1931 * access only if the fault so required, so we don't
1932 * mark read-only data as dirty.]
1933 */
1934
55e303ae
A
1935
1936 if(m != VM_PAGE_NULL) {
1c79356b 1937#if !VM_FAULT_STATIC_CONFIG
55e303ae
A
1938 if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE))
1939 m->dirty = TRUE;
1c79356b 1940#endif
55e303ae
A
1941 if (vm_page_deactivate_behind)
1942 vm_fault_deactivate_behind(object, offset, behavior);
1943 } else {
1944 vm_object_unlock(object);
1c79356b 1945 }
55e303ae
A
1946 thread_interrupt_level(interruptible_state);
1947
1c79356b
A
1948#if TRACEFAULTPAGE
1949 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0); /* (TEST/DEBUG) */
1950#endif
1c79356b
A
1951 return(VM_FAULT_SUCCESS);
1952
1953#if 0
1954 block_and_backoff:
1955 vm_fault_cleanup(object, first_m);
1956
1957 counter(c_vm_fault_page_block_backoff_kernel++);
9bccf70c 1958 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1959#endif
1960
1961 backoff:
9bccf70c 1962 thread_interrupt_level(interruptible_state);
1c79356b
A
1963 if (wait_result == THREAD_INTERRUPTED)
1964 return VM_FAULT_INTERRUPTED;
1965 return VM_FAULT_RETRY;
1966
1967#undef RELEASE_PAGE
1968}
1969
55e303ae
A
1970/*
1971 * Routine: vm_fault_tws_insert
1972 * Purpose:
1973 * Add fault information to the task working set.
1974 * Implementation:
1975 * We always insert the base object/offset pair
1976 * rather the actual object/offset.
1977 * Assumptions:
1978 * Map and pmap_map locked.
1979 * Object locked and referenced.
1980 * Returns:
1981 * TRUE if startup file should be written.
1982 * With object locked and still referenced.
1983 * But we may drop the object lock temporarily.
1984 */
1985static boolean_t
1986vm_fault_tws_insert(
1987 vm_map_t map,
1988 vm_map_t pmap_map,
1989 vm_offset_t vaddr,
1990 vm_object_t object,
1991 vm_object_offset_t offset)
1992{
1993 tws_hash_line_t line;
1994 task_t task;
1995 kern_return_t kr;
1996 boolean_t result = FALSE;
1997 extern vm_map_t kalloc_map;
1998
1999 /* Avoid possible map lock deadlock issues */
2000 if (map == kernel_map || map == kalloc_map ||
2001 pmap_map == kernel_map || pmap_map == kalloc_map)
2002 return result;
2003
2004 task = current_task();
2005 if (task->dynamic_working_set != 0) {
2006 vm_object_t base_object;
2007 vm_object_t base_shadow;
2008 vm_object_offset_t base_offset;
2009 base_object = object;
2010 base_offset = offset;
2011 while(base_shadow = base_object->shadow) {
2012 vm_object_lock(base_shadow);
2013 vm_object_unlock(base_object);
2014 base_offset +=
2015 base_object->shadow_offset;
2016 base_object = base_shadow;
2017 }
2018 kr = tws_lookup((tws_hash_t)
2019 task->dynamic_working_set,
2020 base_offset, base_object,
2021 &line);
2022 if (kr == KERN_OPERATION_TIMED_OUT){
2023 result = TRUE;
2024 if (base_object != object) {
2025 vm_object_unlock(base_object);
2026 vm_object_lock(object);
2027 }
2028 } else if (kr != KERN_SUCCESS) {
2029 if(base_object != object)
2030 vm_object_reference_locked(base_object);
2031 kr = tws_insert((tws_hash_t)
2032 task->dynamic_working_set,
2033 base_offset, base_object,
2034 vaddr, pmap_map);
2035 if(base_object != object) {
2036 vm_object_unlock(base_object);
2037 vm_object_deallocate(base_object);
2038 }
2039 if(kr == KERN_NO_SPACE) {
2040 if (base_object == object)
2041 vm_object_unlock(object);
2042 tws_expand_working_set(
2043 task->dynamic_working_set,
2044 TWS_HASH_LINE_COUNT,
2045 FALSE);
2046 if (base_object == object)
2047 vm_object_lock(object);
2048 } else if(kr == KERN_OPERATION_TIMED_OUT) {
2049 result = TRUE;
2050 }
2051 if(base_object != object)
2052 vm_object_lock(object);
2053 } else if (base_object != object) {
2054 vm_object_unlock(base_object);
2055 vm_object_lock(object);
2056 }
2057 }
2058 return result;
2059}
2060
1c79356b
A
2061/*
2062 * Routine: vm_fault
2063 * Purpose:
2064 * Handle page faults, including pseudo-faults
2065 * used to change the wiring status of pages.
2066 * Returns:
2067 * Explicit continuations have been removed.
2068 * Implementation:
2069 * vm_fault and vm_fault_page save mucho state
2070 * in the moral equivalent of a closure. The state
2071 * structure is allocated when first entering vm_fault
2072 * and deallocated when leaving vm_fault.
2073 */
2074
2075kern_return_t
2076vm_fault(
2077 vm_map_t map,
2078 vm_offset_t vaddr,
2079 vm_prot_t fault_type,
2080 boolean_t change_wiring,
9bccf70c
A
2081 int interruptible,
2082 pmap_t caller_pmap,
2083 vm_offset_t caller_pmap_addr)
1c79356b
A
2084{
2085 vm_map_version_t version; /* Map version for verificiation */
2086 boolean_t wired; /* Should mapping be wired down? */
2087 vm_object_t object; /* Top-level object */
2088 vm_object_offset_t offset; /* Top-level offset */
2089 vm_prot_t prot; /* Protection for mapping */
2090 vm_behavior_t behavior; /* Expected paging behavior */
2091 vm_object_offset_t lo_offset, hi_offset;
2092 vm_object_t old_copy_object; /* Saved copy object */
2093 vm_page_t result_page; /* Result of vm_fault_page */
2094 vm_page_t top_page; /* Placeholder page */
2095 kern_return_t kr;
2096
2097 register
2098 vm_page_t m; /* Fast access to result_page */
2099 kern_return_t error_code; /* page error reasons */
2100 register
2101 vm_object_t cur_object;
2102 register
2103 vm_object_offset_t cur_offset;
2104 vm_page_t cur_m;
2105 vm_object_t new_object;
2106 int type_of_fault;
2107 vm_map_t pmap_map = map;
2108 vm_map_t original_map = map;
2109 pmap_t pmap = NULL;
2110 boolean_t funnel_set = FALSE;
2111 funnel_t *curflock;
2112 thread_t cur_thread;
2113 boolean_t interruptible_state;
9bccf70c
A
2114 unsigned int cache_attr;
2115 int write_startup_file = 0;
2116 vm_prot_t full_fault_type;
1c79356b 2117
55e303ae
A
2118 if (get_preemption_level() != 0)
2119 return (KERN_FAILURE);
de355530 2120
1c79356b
A
2121 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_START,
2122 vaddr,
2123 0,
2124 0,
2125 0,
2126 0);
2127
9bccf70c
A
2128 /* at present we do not fully check for execute permission */
2129 /* we generally treat it is read except in certain device */
2130 /* memory settings */
2131 full_fault_type = fault_type;
2132 if(fault_type & VM_PROT_EXECUTE) {
2133 fault_type &= ~VM_PROT_EXECUTE;
2134 fault_type |= VM_PROT_READ;
2135 }
1c79356b 2136
9bccf70c 2137 interruptible_state = thread_interrupt_level(interruptible);
1c79356b
A
2138
2139 /*
2140 * assume we will hit a page in the cache
2141 * otherwise, explicitly override with
2142 * the real fault type once we determine it
2143 */
2144 type_of_fault = DBG_CACHE_HIT_FAULT;
2145
2146 VM_STAT(faults++);
2147 current_task()->faults++;
2148
2149 /*
2150 * drop funnel if it is already held. Then restore while returning
2151 */
55e303ae
A
2152 cur_thread = current_thread();
2153
1c79356b
A
2154 if ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED) {
2155 funnel_set = TRUE;
2156 curflock = cur_thread->funnel_lock;
2157 thread_funnel_set( curflock , FALSE);
2158 }
2159
2160 RetryFault: ;
2161
2162 /*
2163 * Find the backing store object and offset into
2164 * it to begin the search.
2165 */
2166 map = original_map;
2167 vm_map_lock_read(map);
2168 kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version,
2169 &object, &offset,
2170 &prot, &wired,
2171 &behavior, &lo_offset, &hi_offset, &pmap_map);
2172
2173 pmap = pmap_map->pmap;
2174
2175 if (kr != KERN_SUCCESS) {
2176 vm_map_unlock_read(map);
2177 goto done;
2178 }
2179
2180 /*
2181 * If the page is wired, we must fault for the current protection
2182 * value, to avoid further faults.
2183 */
2184
2185 if (wired)
2186 fault_type = prot | VM_PROT_WRITE;
2187
2188#if VM_FAULT_CLASSIFY
2189 /*
2190 * Temporary data gathering code
2191 */
2192 vm_fault_classify(object, offset, fault_type);
2193#endif
2194 /*
2195 * Fast fault code. The basic idea is to do as much as
2196 * possible while holding the map lock and object locks.
2197 * Busy pages are not used until the object lock has to
2198 * be dropped to do something (copy, zero fill, pmap enter).
2199 * Similarly, paging references aren't acquired until that
2200 * point, and object references aren't used.
2201 *
2202 * If we can figure out what to do
2203 * (zero fill, copy on write, pmap enter) while holding
2204 * the locks, then it gets done. Otherwise, we give up,
2205 * and use the original fault path (which doesn't hold
2206 * the map lock, and relies on busy pages).
2207 * The give up cases include:
2208 * - Have to talk to pager.
2209 * - Page is busy, absent or in error.
2210 * - Pager has locked out desired access.
2211 * - Fault needs to be restarted.
2212 * - Have to push page into copy object.
2213 *
2214 * The code is an infinite loop that moves one level down
2215 * the shadow chain each time. cur_object and cur_offset
2216 * refer to the current object being examined. object and offset
2217 * are the original object from the map. The loop is at the
2218 * top level if and only if object and cur_object are the same.
2219 *
2220 * Invariants: Map lock is held throughout. Lock is held on
2221 * original object and cur_object (if different) when
2222 * continuing or exiting loop.
2223 *
2224 */
2225
2226
2227 /*
2228 * If this page is to be inserted in a copy delay object
2229 * for writing, and if the object has a copy, then the
2230 * copy delay strategy is implemented in the slow fault page.
2231 */
2232 if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY ||
2233 object->copy == VM_OBJECT_NULL ||
2234 (fault_type & VM_PROT_WRITE) == 0) {
2235 cur_object = object;
2236 cur_offset = offset;
2237
2238 while (TRUE) {
2239 m = vm_page_lookup(cur_object, cur_offset);
2240 if (m != VM_PAGE_NULL) {
55e303ae 2241 if (m->busy) {
143cc14e
A
2242 wait_result_t result;
2243
2244 if (object != cur_object)
2245 vm_object_unlock(object);
2246
2247 vm_map_unlock_read(map);
2248 if (pmap_map != map)
2249 vm_map_unlock(pmap_map);
2250
2251#if !VM_FAULT_STATIC_CONFIG
2252 if (!vm_fault_interruptible)
2253 interruptible = THREAD_UNINT;
2254#endif
2255 result = PAGE_ASSERT_WAIT(m, interruptible);
1c79356b 2256
143cc14e
A
2257 vm_object_unlock(cur_object);
2258
2259 if (result == THREAD_WAITING) {
2260 result = thread_block(THREAD_CONTINUE_NULL);
2261
2262 counter(c_vm_fault_page_block_busy_kernel++);
2263 }
2264 if (result == THREAD_AWAKENED || result == THREAD_RESTART)
2265 goto RetryFault;
2266
2267 kr = KERN_ABORTED;
2268 goto done;
2269 }
0b4e3aa0
A
2270 if (m->unusual && (m->error || m->restart || m->private
2271 || m->absent || (fault_type & m->page_lock))) {
1c79356b 2272
143cc14e 2273 /*
1c79356b
A
2274 * Unusual case. Give up.
2275 */
2276 break;
2277 }
2278
2279 /*
2280 * Two cases of map in faults:
2281 * - At top level w/o copy object.
2282 * - Read fault anywhere.
2283 * --> must disallow write.
2284 */
2285
2286 if (object == cur_object &&
2287 object->copy == VM_OBJECT_NULL)
2288 goto FastMapInFault;
2289
2290 if ((fault_type & VM_PROT_WRITE) == 0) {
55e303ae 2291 boolean_t sequential;
1c79356b
A
2292
2293 prot &= ~VM_PROT_WRITE;
2294
2295 /*
2296 * Set up to map the page ...
2297 * mark the page busy, drop
2298 * locks and take a paging reference
2299 * on the object with the page.
2300 */
2301
2302 if (object != cur_object) {
2303 vm_object_unlock(object);
2304 object = cur_object;
2305 }
2306FastMapInFault:
2307 m->busy = TRUE;
2308
2309 vm_object_paging_begin(object);
1c79356b
A
2310
2311FastPmapEnter:
2312 /*
2313 * Check a couple of global reasons to
2314 * be conservative about write access.
2315 * Then do the pmap_enter.
2316 */
2317#if !VM_FAULT_STATIC_CONFIG
2318 if (vm_fault_dirty_handling
2319#if MACH_KDB
2320 || db_watchpoint_list
2321#endif
2322 && (fault_type & VM_PROT_WRITE) == 0)
2323 prot &= ~VM_PROT_WRITE;
2324#else /* STATIC_CONFIG */
2325#if MACH_KDB
2326 if (db_watchpoint_list
2327 && (fault_type & VM_PROT_WRITE) == 0)
2328 prot &= ~VM_PROT_WRITE;
2329#endif /* MACH_KDB */
2330#endif /* STATIC_CONFIG */
55e303ae
A
2331 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2332
2333 sequential = FALSE;
de355530 2334 if (m->no_isync == TRUE) {
143cc14e 2335 m->no_isync = FALSE;
55e303ae
A
2336 pmap_sync_caches_phys(m->phys_page);
2337 if (type_of_fault == DBG_CACHE_HIT_FAULT) {
2338 /*
2339 * found it in the cache, but this
2340 * is the first fault-in of the page (no_isync == TRUE)
2341 * so it must have come in as part of
2342 * a cluster... account 1 pagein against it
2343 */
2344 VM_STAT(pageins++);
2345 current_task()->pageins++;
2346 type_of_fault = DBG_PAGEIN_FAULT;
2347 sequential = TRUE;
2348 }
2349 } else if (cache_attr != VM_WIMG_DEFAULT) {
2350 pmap_sync_caches_phys(m->phys_page);
143cc14e 2351 }
0b4e3aa0 2352
9bccf70c
A
2353 if(caller_pmap) {
2354 PMAP_ENTER(caller_pmap,
2355 caller_pmap_addr, m,
2356 prot, cache_attr, wired);
2357 } else {
2358 PMAP_ENTER(pmap, vaddr, m,
2359 prot, cache_attr, wired);
2360 }
0b4e3aa0 2361
1c79356b 2362 /*
55e303ae 2363 * Hold queues lock to manipulate
1c79356b
A
2364 * the page queues. Change wiring
2365 * case is obvious. In soft ref bits
2366 * case activate page only if it fell
2367 * off paging queues, otherwise just
2368 * activate it if it's inactive.
2369 *
2370 * NOTE: original vm_fault code will
2371 * move active page to back of active
2372 * queue. This code doesn't.
2373 */
1c79356b 2374 vm_page_lock_queues();
765c9de3
A
2375 if (m->clustered) {
2376 vm_pagein_cluster_used++;
2377 m->clustered = FALSE;
2378 }
1c79356b
A
2379 m->reference = TRUE;
2380
2381 if (change_wiring) {
2382 if (wired)
2383 vm_page_wire(m);
2384 else
2385 vm_page_unwire(m);
2386 }
2387#if VM_FAULT_STATIC_CONFIG
2388 else {
2389 if (!m->active && !m->inactive)
2390 vm_page_activate(m);
2391 }
2392#else
2393 else if (software_reference_bits) {
2394 if (!m->active && !m->inactive)
2395 vm_page_activate(m);
2396 }
2397 else if (!m->active) {
2398 vm_page_activate(m);
2399 }
2400#endif
2401 vm_page_unlock_queues();
2402
2403 /*
2404 * That's it, clean up and return.
2405 */
2406 PAGE_WAKEUP_DONE(m);
143cc14e 2407
55e303ae
A
2408 sequential = (sequential && vm_page_deactivate_behind) ?
2409 vm_fault_deactivate_behind(object, cur_offset, behavior) :
2410 FALSE;
2411
2412 /*
2413 * Add non-sequential pages to the working set.
2414 * The sequential pages will be brought in through
2415 * normal clustering behavior.
2416 */
2417 if (!sequential && !object->private) {
2418 write_startup_file =
2419 vm_fault_tws_insert(map, pmap_map, vaddr,
2420 object, cur_offset);
143cc14e 2421 }
55e303ae
A
2422
2423 vm_object_paging_end(object);
1c79356b 2424 vm_object_unlock(object);
143cc14e 2425
1c79356b
A
2426 vm_map_unlock_read(map);
2427 if(pmap_map != map)
2428 vm_map_unlock(pmap_map);
2429
9bccf70c
A
2430 if(write_startup_file)
2431 tws_send_startup_info(current_task());
2432
143cc14e 2433 if (funnel_set)
1c79356b 2434 thread_funnel_set( curflock, TRUE);
143cc14e 2435
9bccf70c 2436 thread_interrupt_level(interruptible_state);
1c79356b 2437
143cc14e 2438
1c79356b
A
2439 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2440 vaddr,
9bccf70c 2441 type_of_fault & 0xff,
1c79356b 2442 KERN_SUCCESS,
9bccf70c 2443 type_of_fault >> 8,
1c79356b 2444 0);
143cc14e 2445
1c79356b
A
2446 return KERN_SUCCESS;
2447 }
2448
2449 /*
2450 * Copy on write fault. If objects match, then
2451 * object->copy must not be NULL (else control
2452 * would be in previous code block), and we
2453 * have a potential push into the copy object
2454 * with which we won't cope here.
2455 */
2456
2457 if (cur_object == object)
2458 break;
1c79356b
A
2459 /*
2460 * This is now a shadow based copy on write
2461 * fault -- it requires a copy up the shadow
2462 * chain.
2463 *
2464 * Allocate a page in the original top level
2465 * object. Give up if allocate fails. Also
2466 * need to remember current page, as it's the
2467 * source of the copy.
2468 */
2469 cur_m = m;
2470 m = vm_page_grab();
2471 if (m == VM_PAGE_NULL) {
2472 break;
2473 }
1c79356b
A
2474 /*
2475 * Now do the copy. Mark the source busy
2476 * and take out paging references on both
2477 * objects.
2478 *
2479 * NOTE: This code holds the map lock across
2480 * the page copy.
2481 */
2482
2483 cur_m->busy = TRUE;
2484 vm_page_copy(cur_m, m);
2485 vm_page_insert(m, object, offset);
2486
2487 vm_object_paging_begin(cur_object);
2488 vm_object_paging_begin(object);
2489
2490 type_of_fault = DBG_COW_FAULT;
2491 VM_STAT(cow_faults++);
2492 current_task()->cow_faults++;
2493
2494 /*
2495 * Now cope with the source page and object
2496 * If the top object has a ref count of 1
2497 * then no other map can access it, and hence
2498 * it's not necessary to do the pmap_page_protect.
2499 */
2500
2501
2502 vm_page_lock_queues();
2503 vm_page_deactivate(cur_m);
2504 m->dirty = TRUE;
55e303ae 2505 pmap_page_protect(cur_m->phys_page,
1c79356b
A
2506 VM_PROT_NONE);
2507 vm_page_unlock_queues();
2508
2509 PAGE_WAKEUP_DONE(cur_m);
2510 vm_object_paging_end(cur_object);
2511 vm_object_unlock(cur_object);
2512
2513 /*
2514 * Slight hack to call vm_object collapse
2515 * and then reuse common map in code.
2516 * note that the object lock was taken above.
2517 */
2518
2519 vm_object_paging_end(object);
55e303ae 2520 vm_object_collapse(object, offset);
1c79356b 2521 vm_object_paging_begin(object);
1c79356b
A
2522
2523 goto FastPmapEnter;
2524 }
2525 else {
2526
2527 /*
2528 * No page at cur_object, cur_offset
2529 */
2530
2531 if (cur_object->pager_created) {
2532
2533 /*
2534 * Have to talk to the pager. Give up.
2535 */
1c79356b
A
2536 break;
2537 }
2538
2539
2540 if (cur_object->shadow == VM_OBJECT_NULL) {
2541
2542 if (cur_object->shadow_severed) {
2543 vm_object_paging_end(object);
2544 vm_object_unlock(object);
2545 vm_map_unlock_read(map);
2546 if(pmap_map != map)
2547 vm_map_unlock(pmap_map);
2548
9bccf70c
A
2549 if(write_startup_file)
2550 tws_send_startup_info(
2551 current_task());
2552
1c79356b
A
2553 if (funnel_set) {
2554 thread_funnel_set( curflock, TRUE);
2555 funnel_set = FALSE;
2556 }
9bccf70c 2557 thread_interrupt_level(interruptible_state);
1c79356b
A
2558
2559 return VM_FAULT_MEMORY_ERROR;
2560 }
2561
2562 /*
2563 * Zero fill fault. Page gets
2564 * filled in top object. Insert
2565 * page, then drop any lower lock.
2566 * Give up if no page.
2567 */
55e303ae
A
2568 if (VM_PAGE_THROTTLED()) {
2569 break;
2570 }
2571
2572 /*
2573 * are we protecting the system from
2574 * backing store exhaustion. If so
2575 * sleep unless we are privileged.
2576 */
2577 if(vm_backing_store_low) {
2578 if(!(current_task()->priv_flags
2579 & VM_BACKING_STORE_PRIV))
1c79356b
A
2580 break;
2581 }
2582 m = vm_page_alloc(object, offset);
2583 if (m == VM_PAGE_NULL) {
2584 break;
2585 }
0b4e3aa0
A
2586 /*
2587 * This is a zero-fill or initial fill
2588 * page fault. As such, we consider it
2589 * undefined with respect to instruction
2590 * execution. i.e. it is the responsibility
2591 * of higher layers to call for an instruction
2592 * sync after changing the contents and before
2593 * sending a program into this area. We
2594 * choose this approach for performance
2595 */
2596
2597 m->no_isync = FALSE;
1c79356b
A
2598
2599 if (cur_object != object)
2600 vm_object_unlock(cur_object);
2601
2602 vm_object_paging_begin(object);
2603 vm_object_unlock(object);
2604
2605 /*
2606 * Now zero fill page and map it.
2607 * the page is probably going to
2608 * be written soon, so don't bother
2609 * to clear the modified bit
2610 *
2611 * NOTE: This code holds the map
2612 * lock across the zero fill.
2613 */
2614
2615 if (!map->no_zero_fill) {
2616 vm_page_zero_fill(m);
2617 type_of_fault = DBG_ZERO_FILL_FAULT;
2618 VM_STAT(zero_fill_count++);
2619 }
2620 vm_page_lock_queues();
2621 VM_PAGE_QUEUES_REMOVE(m);
0b4e3aa0
A
2622
2623 m->page_ticket = vm_page_ticket;
9bccf70c
A
2624 if(m->object->size > 0x80000) {
2625 m->zero_fill = TRUE;
2626 /* depends on the queues lock */
2627 vm_zf_count += 1;
2628 queue_enter(&vm_page_queue_zf,
2629 m, vm_page_t, pageq);
2630 } else {
2631 queue_enter(
2632 &vm_page_queue_inactive,
2633 m, vm_page_t, pageq);
2634 }
0b4e3aa0
A
2635 vm_page_ticket_roll++;
2636 if(vm_page_ticket_roll ==
2637 VM_PAGE_TICKETS_IN_ROLL) {
2638 vm_page_ticket_roll = 0;
2639 if(vm_page_ticket ==
2640 VM_PAGE_TICKET_ROLL_IDS)
2641 vm_page_ticket= 0;
2642 else
2643 vm_page_ticket++;
2644 }
2645
1c79356b
A
2646 m->inactive = TRUE;
2647 vm_page_inactive_count++;
2648 vm_page_unlock_queues();
143cc14e
A
2649 vm_object_lock(object);
2650
1c79356b
A
2651 goto FastPmapEnter;
2652 }
2653
2654 /*
2655 * On to the next level
2656 */
2657
2658 cur_offset += cur_object->shadow_offset;
2659 new_object = cur_object->shadow;
2660 vm_object_lock(new_object);
2661 if (cur_object != object)
2662 vm_object_unlock(cur_object);
2663 cur_object = new_object;
2664
2665 continue;
2666 }
2667 }
2668
2669 /*
2670 * Cleanup from fast fault failure. Drop any object
2671 * lock other than original and drop map lock.
2672 */
2673
2674 if (object != cur_object)
2675 vm_object_unlock(cur_object);
2676 }
2677 vm_map_unlock_read(map);
143cc14e 2678
1c79356b
A
2679 if(pmap_map != map)
2680 vm_map_unlock(pmap_map);
2681
2682 /*
2683 * Make a reference to this object to
2684 * prevent its disposal while we are messing with
2685 * it. Once we have the reference, the map is free
2686 * to be diddled. Since objects reference their
2687 * shadows (and copies), they will stay around as well.
2688 */
2689
2690 assert(object->ref_count > 0);
2691 object->ref_count++;
2692 vm_object_res_reference(object);
2693 vm_object_paging_begin(object);
2694
2695 XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
55e303ae
A
2696
2697 if (!object->private) {
2698 write_startup_file =
2699 vm_fault_tws_insert(map, pmap_map, vaddr, object, offset);
9bccf70c 2700 }
55e303ae 2701
1c79356b
A
2702 kr = vm_fault_page(object, offset, fault_type,
2703 (change_wiring && !wired),
2704 interruptible,
2705 lo_offset, hi_offset, behavior,
2706 &prot, &result_page, &top_page,
2707 &type_of_fault,
0b4e3aa0 2708 &error_code, map->no_zero_fill, FALSE, map, vaddr);
1c79356b
A
2709
2710 /*
2711 * If we didn't succeed, lose the object reference immediately.
2712 */
2713
2714 if (kr != VM_FAULT_SUCCESS)
2715 vm_object_deallocate(object);
2716
2717 /*
2718 * See why we failed, and take corrective action.
2719 */
2720
2721 switch (kr) {
2722 case VM_FAULT_SUCCESS:
2723 break;
2724 case VM_FAULT_MEMORY_SHORTAGE:
2725 if (vm_page_wait((change_wiring) ?
2726 THREAD_UNINT :
2727 THREAD_ABORTSAFE))
2728 goto RetryFault;
2729 /* fall thru */
2730 case VM_FAULT_INTERRUPTED:
2731 kr = KERN_ABORTED;
2732 goto done;
2733 case VM_FAULT_RETRY:
2734 goto RetryFault;
2735 case VM_FAULT_FICTITIOUS_SHORTAGE:
2736 vm_page_more_fictitious();
2737 goto RetryFault;
2738 case VM_FAULT_MEMORY_ERROR:
2739 if (error_code)
2740 kr = error_code;
2741 else
2742 kr = KERN_MEMORY_ERROR;
2743 goto done;
2744 }
2745
2746 m = result_page;
2747
0b4e3aa0
A
2748 if(m != VM_PAGE_NULL) {
2749 assert((change_wiring && !wired) ?
2750 (top_page == VM_PAGE_NULL) :
2751 ((top_page == VM_PAGE_NULL) == (m->object == object)));
2752 }
1c79356b
A
2753
2754 /*
2755 * How to clean up the result of vm_fault_page. This
2756 * happens whether the mapping is entered or not.
2757 */
2758
2759#define UNLOCK_AND_DEALLOCATE \
2760 MACRO_BEGIN \
2761 vm_fault_cleanup(m->object, top_page); \
2762 vm_object_deallocate(object); \
2763 MACRO_END
2764
2765 /*
2766 * What to do with the resulting page from vm_fault_page
2767 * if it doesn't get entered into the physical map:
2768 */
2769
2770#define RELEASE_PAGE(m) \
2771 MACRO_BEGIN \
2772 PAGE_WAKEUP_DONE(m); \
2773 vm_page_lock_queues(); \
2774 if (!m->active && !m->inactive) \
2775 vm_page_activate(m); \
2776 vm_page_unlock_queues(); \
2777 MACRO_END
2778
2779 /*
2780 * We must verify that the maps have not changed
2781 * since our last lookup.
2782 */
2783
0b4e3aa0
A
2784 if(m != VM_PAGE_NULL) {
2785 old_copy_object = m->object->copy;
0b4e3aa0
A
2786 vm_object_unlock(m->object);
2787 } else {
2788 old_copy_object = VM_OBJECT_NULL;
2789 }
1c79356b
A
2790 if ((map != original_map) || !vm_map_verify(map, &version)) {
2791 vm_object_t retry_object;
2792 vm_object_offset_t retry_offset;
2793 vm_prot_t retry_prot;
2794
2795 /*
2796 * To avoid trying to write_lock the map while another
2797 * thread has it read_locked (in vm_map_pageable), we
2798 * do not try for write permission. If the page is
2799 * still writable, we will get write permission. If it
2800 * is not, or has been marked needs_copy, we enter the
2801 * mapping without write permission, and will merely
2802 * take another fault.
2803 */
2804 map = original_map;
2805 vm_map_lock_read(map);
2806 kr = vm_map_lookup_locked(&map, vaddr,
2807 fault_type & ~VM_PROT_WRITE, &version,
2808 &retry_object, &retry_offset, &retry_prot,
2809 &wired, &behavior, &lo_offset, &hi_offset,
2810 &pmap_map);
2811 pmap = pmap_map->pmap;
2812
2813 if (kr != KERN_SUCCESS) {
2814 vm_map_unlock_read(map);
0b4e3aa0
A
2815 if(m != VM_PAGE_NULL) {
2816 vm_object_lock(m->object);
2817 RELEASE_PAGE(m);
2818 UNLOCK_AND_DEALLOCATE;
2819 } else {
2820 vm_object_deallocate(object);
2821 }
1c79356b
A
2822 goto done;
2823 }
2824
2825 vm_object_unlock(retry_object);
0b4e3aa0
A
2826 if(m != VM_PAGE_NULL) {
2827 vm_object_lock(m->object);
2828 } else {
2829 vm_object_lock(object);
2830 }
1c79356b
A
2831
2832 if ((retry_object != object) ||
2833 (retry_offset != offset)) {
2834 vm_map_unlock_read(map);
2835 if(pmap_map != map)
2836 vm_map_unlock(pmap_map);
0b4e3aa0
A
2837 if(m != VM_PAGE_NULL) {
2838 RELEASE_PAGE(m);
2839 UNLOCK_AND_DEALLOCATE;
2840 } else {
2841 vm_object_deallocate(object);
2842 }
1c79356b
A
2843 goto RetryFault;
2844 }
2845
2846 /*
2847 * Check whether the protection has changed or the object
2848 * has been copied while we left the map unlocked.
2849 */
2850 prot &= retry_prot;
0b4e3aa0
A
2851 if(m != VM_PAGE_NULL) {
2852 vm_object_unlock(m->object);
2853 } else {
2854 vm_object_unlock(object);
2855 }
2856 }
2857 if(m != VM_PAGE_NULL) {
2858 vm_object_lock(m->object);
2859 } else {
2860 vm_object_lock(object);
1c79356b 2861 }
1c79356b
A
2862
2863 /*
2864 * If the copy object changed while the top-level object
2865 * was unlocked, then we must take away write permission.
2866 */
2867
0b4e3aa0
A
2868 if(m != VM_PAGE_NULL) {
2869 if (m->object->copy != old_copy_object)
2870 prot &= ~VM_PROT_WRITE;
2871 }
1c79356b
A
2872
2873 /*
2874 * If we want to wire down this page, but no longer have
2875 * adequate permissions, we must start all over.
2876 */
2877
2878 if (wired && (fault_type != (prot|VM_PROT_WRITE))) {
2879 vm_map_verify_done(map, &version);
2880 if(pmap_map != map)
2881 vm_map_unlock(pmap_map);
0b4e3aa0
A
2882 if(m != VM_PAGE_NULL) {
2883 RELEASE_PAGE(m);
2884 UNLOCK_AND_DEALLOCATE;
2885 } else {
2886 vm_object_deallocate(object);
2887 }
1c79356b
A
2888 goto RetryFault;
2889 }
2890
1c79356b
A
2891 /*
2892 * Put this page into the physical map.
2893 * We had to do the unlock above because pmap_enter
2894 * may cause other faults. The page may be on
2895 * the pageout queues. If the pageout daemon comes
2896 * across the page, it will remove it from the queues.
2897 */
765c9de3
A
2898 if (m != VM_PAGE_NULL) {
2899 if (m->no_isync == TRUE) {
55e303ae
A
2900 pmap_sync_caches_phys(m->phys_page);
2901
2902 if (type_of_fault == DBG_CACHE_HIT_FAULT) {
2903 /*
2904 * found it in the cache, but this
2905 * is the first fault-in of the page (no_isync == TRUE)
2906 * so it must have come in as part of
2907 * a cluster... account 1 pagein against it
2908 */
2909 VM_STAT(pageins++);
2910 current_task()->pageins++;
2911
2912 type_of_fault = DBG_PAGEIN_FAULT;
2913 }
765c9de3
A
2914 m->no_isync = FALSE;
2915 }
9bccf70c 2916 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
0b4e3aa0 2917
9bccf70c
A
2918 if(caller_pmap) {
2919 PMAP_ENTER(caller_pmap,
2920 caller_pmap_addr, m,
2921 prot, cache_attr, wired);
2922 } else {
2923 PMAP_ENTER(pmap, vaddr, m,
2924 prot, cache_attr, wired);
2925 }
55e303ae
A
2926
2927 /*
2928 * Add working set information for private objects here.
2929 */
2930 if (m->object->private) {
2931 write_startup_file =
2932 vm_fault_tws_insert(map, pmap_map, vaddr,
2933 m->object, m->offset);
0b4e3aa0
A
2934 }
2935 } else {
2936
9bccf70c
A
2937#ifndef i386
2938 int memattr;
9bccf70c
A
2939 vm_map_entry_t entry;
2940 vm_offset_t laddr;
2941 vm_offset_t ldelta, hdelta;
143cc14e 2942
0b4e3aa0
A
2943 /*
2944 * do a pmap block mapping from the physical address
2945 * in the object
2946 */
9bccf70c 2947
55e303ae
A
2948 /* While we do not worry about execution protection in */
2949 /* general, certian pages may have instruction execution */
2950 /* disallowed. We will check here, and if not allowed */
2951 /* to execute, we return with a protection failure. */
9bccf70c 2952
55e303ae
A
2953 if((full_fault_type & VM_PROT_EXECUTE) &&
2954 (pmap_canExecute((ppnum_t)
2955 (object->shadow_offset >> 12)) < 1)) {
9bccf70c 2956
9bccf70c
A
2957 vm_map_verify_done(map, &version);
2958 if(pmap_map != map)
2959 vm_map_unlock(pmap_map);
2960 vm_fault_cleanup(object, top_page);
2961 vm_object_deallocate(object);
2962 kr = KERN_PROTECTION_FAILURE;
2963 goto done;
0b4e3aa0 2964 }
1c79356b 2965
9bccf70c
A
2966 if(pmap_map != map) {
2967 vm_map_unlock(pmap_map);
2968 }
2969 if (original_map != map) {
2970 vm_map_unlock_read(map);
2971 vm_map_lock_read(original_map);
2972 map = original_map;
2973 }
2974 pmap_map = map;
2975
2976 laddr = vaddr;
2977 hdelta = 0xFFFFF000;
2978 ldelta = 0xFFFFF000;
2979
2980
2981 while(vm_map_lookup_entry(map, laddr, &entry)) {
2982 if(ldelta > (laddr - entry->vme_start))
2983 ldelta = laddr - entry->vme_start;
2984 if(hdelta > (entry->vme_end - laddr))
2985 hdelta = entry->vme_end - laddr;
2986 if(entry->is_sub_map) {
2987
2988 laddr = (laddr - entry->vme_start)
2989 + entry->offset;
2990 vm_map_lock_read(entry->object.sub_map);
2991 if(map != pmap_map)
2992 vm_map_unlock_read(map);
2993 if(entry->use_pmap) {
2994 vm_map_unlock_read(pmap_map);
2995 pmap_map = entry->object.sub_map;
2996 }
2997 map = entry->object.sub_map;
2998
2999 } else {
3000 break;
3001 }
3002 }
3003
3004 if(vm_map_lookup_entry(map, laddr, &entry) &&
3005 (entry->object.vm_object != NULL) &&
3006 (entry->object.vm_object == object)) {
3007
3008
3009 if(caller_pmap) {
55e303ae 3010 /* Set up a block mapped area */
9bccf70c 3011 pmap_map_block(caller_pmap,
55e303ae
A
3012 (addr64_t)(caller_pmap_addr - ldelta),
3013 (((vm_offset_t)
9bccf70c
A
3014 (entry->object.vm_object->shadow_offset))
3015 + entry->offset +
55e303ae
A
3016 (laddr - entry->vme_start)
3017 - ldelta)>>12,
9bccf70c 3018 ldelta + hdelta, prot,
55e303ae
A
3019 (VM_WIMG_MASK & (int)object->wimg_bits), 0);
3020 } else {
3021 /* Set up a block mapped area */
3022 pmap_map_block(pmap_map->pmap,
3023 (addr64_t)(vaddr - ldelta),
3024 (((vm_offset_t)
9bccf70c 3025 (entry->object.vm_object->shadow_offset))
55e303ae
A
3026 + entry->offset +
3027 (laddr - entry->vme_start) - ldelta)>>12,
3028 ldelta + hdelta, prot,
3029 (VM_WIMG_MASK & (int)object->wimg_bits), 0);
9bccf70c
A
3030 }
3031 }
3032#else
3033#ifdef notyet
3034 if(caller_pmap) {
3035 pmap_enter(caller_pmap, caller_pmap_addr,
55e303ae 3036 object->shadow_offset>>12, prot, 0, TRUE);
9bccf70c
A
3037 } else {
3038 pmap_enter(pmap, vaddr,
55e303ae 3039 object->shadow_offset>>12, prot, 0, TRUE);
9bccf70c 3040 }
0b4e3aa0 3041 /* Map it in */
9bccf70c 3042#endif
0b4e3aa0
A
3043#endif
3044
3045 }
1c79356b
A
3046
3047 /*
3048 * If the page is not wired down and isn't already
3049 * on a pageout queue, then put it where the
3050 * pageout daemon can find it.
3051 */
0b4e3aa0 3052 if(m != VM_PAGE_NULL) {
0b4e3aa0
A
3053 vm_page_lock_queues();
3054
3055 if (change_wiring) {
3056 if (wired)
3057 vm_page_wire(m);
3058 else
3059 vm_page_unwire(m);
3060 }
1c79356b 3061#if VM_FAULT_STATIC_CONFIG
0b4e3aa0
A
3062 else {
3063 if (!m->active && !m->inactive)
3064 vm_page_activate(m);
3065 m->reference = TRUE;
3066 }
1c79356b 3067#else
0b4e3aa0
A
3068 else if (software_reference_bits) {
3069 if (!m->active && !m->inactive)
3070 vm_page_activate(m);
3071 m->reference = TRUE;
3072 } else {
1c79356b 3073 vm_page_activate(m);
0b4e3aa0 3074 }
1c79356b 3075#endif
0b4e3aa0
A
3076 vm_page_unlock_queues();
3077 }
1c79356b
A
3078
3079 /*
3080 * Unlock everything, and return
3081 */
3082
3083 vm_map_verify_done(map, &version);
3084 if(pmap_map != map)
3085 vm_map_unlock(pmap_map);
0b4e3aa0
A
3086 if(m != VM_PAGE_NULL) {
3087 PAGE_WAKEUP_DONE(m);
3088 UNLOCK_AND_DEALLOCATE;
3089 } else {
3090 vm_fault_cleanup(object, top_page);
3091 vm_object_deallocate(object);
3092 }
1c79356b 3093 kr = KERN_SUCCESS;
1c79356b
A
3094
3095#undef UNLOCK_AND_DEALLOCATE
3096#undef RELEASE_PAGE
3097
3098 done:
9bccf70c
A
3099 if(write_startup_file)
3100 tws_send_startup_info(current_task());
1c79356b
A
3101 if (funnel_set) {
3102 thread_funnel_set( curflock, TRUE);
3103 funnel_set = FALSE;
3104 }
9bccf70c 3105 thread_interrupt_level(interruptible_state);
1c79356b
A
3106
3107 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
3108 vaddr,
9bccf70c 3109 type_of_fault & 0xff,
1c79356b 3110 kr,
9bccf70c 3111 type_of_fault >> 8,
1c79356b 3112 0);
143cc14e 3113
1c79356b
A
3114 return(kr);
3115}
3116
3117/*
3118 * vm_fault_wire:
3119 *
3120 * Wire down a range of virtual addresses in a map.
3121 */
3122kern_return_t
3123vm_fault_wire(
3124 vm_map_t map,
3125 vm_map_entry_t entry,
9bccf70c
A
3126 pmap_t pmap,
3127 vm_offset_t pmap_addr)
1c79356b
A
3128{
3129
3130 register vm_offset_t va;
3131 register vm_offset_t end_addr = entry->vme_end;
3132 register kern_return_t rc;
3133
3134 assert(entry->in_transition);
3135
9bccf70c
A
3136 if ((entry->object.vm_object != NULL) &&
3137 !entry->is_sub_map &&
3138 entry->object.vm_object->phys_contiguous) {
3139 return KERN_SUCCESS;
3140 }
3141
1c79356b
A
3142 /*
3143 * Inform the physical mapping system that the
3144 * range of addresses may not fault, so that
3145 * page tables and such can be locked down as well.
3146 */
3147
9bccf70c
A
3148 pmap_pageable(pmap, pmap_addr,
3149 pmap_addr + (end_addr - entry->vme_start), FALSE);
1c79356b
A
3150
3151 /*
3152 * We simulate a fault to get the page and enter it
3153 * in the physical map.
3154 */
3155
3156 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3157 if ((rc = vm_fault_wire_fast(
9bccf70c
A
3158 map, va, entry, pmap,
3159 pmap_addr + (va - entry->vme_start)
3160 )) != KERN_SUCCESS) {
1c79356b 3161 rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
9bccf70c
A
3162 (pmap == kernel_pmap) ?
3163 THREAD_UNINT : THREAD_ABORTSAFE,
3164 pmap, pmap_addr + (va - entry->vme_start));
1c79356b
A
3165 }
3166
3167 if (rc != KERN_SUCCESS) {
3168 struct vm_map_entry tmp_entry = *entry;
3169
3170 /* unwire wired pages */
3171 tmp_entry.vme_end = va;
9bccf70c
A
3172 vm_fault_unwire(map,
3173 &tmp_entry, FALSE, pmap, pmap_addr);
1c79356b
A
3174
3175 return rc;
3176 }
3177 }
3178 return KERN_SUCCESS;
3179}
3180
3181/*
3182 * vm_fault_unwire:
3183 *
3184 * Unwire a range of virtual addresses in a map.
3185 */
3186void
3187vm_fault_unwire(
3188 vm_map_t map,
3189 vm_map_entry_t entry,
3190 boolean_t deallocate,
9bccf70c
A
3191 pmap_t pmap,
3192 vm_offset_t pmap_addr)
1c79356b
A
3193{
3194 register vm_offset_t va;
3195 register vm_offset_t end_addr = entry->vme_end;
3196 vm_object_t object;
3197
3198 object = (entry->is_sub_map)
3199 ? VM_OBJECT_NULL : entry->object.vm_object;
3200
3201 /*
3202 * Since the pages are wired down, we must be able to
3203 * get their mappings from the physical map system.
3204 */
3205
3206 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
9bccf70c
A
3207 pmap_change_wiring(pmap,
3208 pmap_addr + (va - entry->vme_start), FALSE);
1c79356b
A
3209
3210 if (object == VM_OBJECT_NULL) {
9bccf70c
A
3211 (void) vm_fault(map, va, VM_PROT_NONE,
3212 TRUE, THREAD_UNINT, pmap, pmap_addr);
3213 } else if (object->phys_contiguous) {
3214 continue;
1c79356b
A
3215 } else {
3216 vm_prot_t prot;
3217 vm_page_t result_page;
3218 vm_page_t top_page;
3219 vm_object_t result_object;
3220 vm_fault_return_t result;
3221
3222 do {
3223 prot = VM_PROT_NONE;
3224
3225 vm_object_lock(object);
3226 vm_object_paging_begin(object);
3227 XPR(XPR_VM_FAULT,
3228 "vm_fault_unwire -> vm_fault_page\n",
3229 0,0,0,0,0);
3230 result = vm_fault_page(object,
3231 entry->offset +
3232 (va - entry->vme_start),
3233 VM_PROT_NONE, TRUE,
3234 THREAD_UNINT,
3235 entry->offset,
3236 entry->offset +
3237 (entry->vme_end
3238 - entry->vme_start),
3239 entry->behavior,
3240 &prot,
3241 &result_page,
3242 &top_page,
3243 (int *)0,
3244 0, map->no_zero_fill,
0b4e3aa0 3245 FALSE, NULL, 0);
1c79356b
A
3246 } while (result == VM_FAULT_RETRY);
3247
3248 if (result != VM_FAULT_SUCCESS)
3249 panic("vm_fault_unwire: failure");
3250
3251 result_object = result_page->object;
3252 if (deallocate) {
3253 assert(!result_page->fictitious);
55e303ae 3254 pmap_page_protect(result_page->phys_page,
1c79356b
A
3255 VM_PROT_NONE);
3256 VM_PAGE_FREE(result_page);
3257 } else {
3258 vm_page_lock_queues();
3259 vm_page_unwire(result_page);
3260 vm_page_unlock_queues();
3261 PAGE_WAKEUP_DONE(result_page);
3262 }
3263
3264 vm_fault_cleanup(result_object, top_page);
3265 }
3266 }
3267
3268 /*
3269 * Inform the physical mapping system that the range
3270 * of addresses may fault, so that page tables and
3271 * such may be unwired themselves.
3272 */
3273
9bccf70c
A
3274 pmap_pageable(pmap, pmap_addr,
3275 pmap_addr + (end_addr - entry->vme_start), TRUE);
1c79356b
A
3276
3277}
3278
3279/*
3280 * vm_fault_wire_fast:
3281 *
3282 * Handle common case of a wire down page fault at the given address.
3283 * If successful, the page is inserted into the associated physical map.
3284 * The map entry is passed in to avoid the overhead of a map lookup.
3285 *
3286 * NOTE: the given address should be truncated to the
3287 * proper page address.
3288 *
3289 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3290 * a standard error specifying why the fault is fatal is returned.
3291 *
3292 * The map in question must be referenced, and remains so.
3293 * Caller has a read lock on the map.
3294 *
3295 * This is a stripped version of vm_fault() for wiring pages. Anything
3296 * other than the common case will return KERN_FAILURE, and the caller
3297 * is expected to call vm_fault().
3298 */
3299kern_return_t
3300vm_fault_wire_fast(
3301 vm_map_t map,
3302 vm_offset_t va,
3303 vm_map_entry_t entry,
9bccf70c
A
3304 pmap_t pmap,
3305 vm_offset_t pmap_addr)
1c79356b
A
3306{
3307 vm_object_t object;
3308 vm_object_offset_t offset;
3309 register vm_page_t m;
3310 vm_prot_t prot;
3311 thread_act_t thr_act;
9bccf70c 3312 unsigned int cache_attr;
1c79356b
A
3313
3314 VM_STAT(faults++);
3315
3316 if((thr_act=current_act()) && (thr_act->task != TASK_NULL))
3317 thr_act->task->faults++;
3318
3319/*
3320 * Recovery actions
3321 */
3322
3323#undef RELEASE_PAGE
3324#define RELEASE_PAGE(m) { \
3325 PAGE_WAKEUP_DONE(m); \
3326 vm_page_lock_queues(); \
3327 vm_page_unwire(m); \
3328 vm_page_unlock_queues(); \
3329}
3330
3331
3332#undef UNLOCK_THINGS
3333#define UNLOCK_THINGS { \
3334 object->paging_in_progress--; \
3335 vm_object_unlock(object); \
3336}
3337
3338#undef UNLOCK_AND_DEALLOCATE
3339#define UNLOCK_AND_DEALLOCATE { \
3340 UNLOCK_THINGS; \
3341 vm_object_deallocate(object); \
3342}
3343/*
3344 * Give up and have caller do things the hard way.
3345 */
3346
3347#define GIVE_UP { \
3348 UNLOCK_AND_DEALLOCATE; \
3349 return(KERN_FAILURE); \
3350}
3351
3352
3353 /*
3354 * If this entry is not directly to a vm_object, bail out.
3355 */
3356 if (entry->is_sub_map)
3357 return(KERN_FAILURE);
3358
3359 /*
3360 * Find the backing store object and offset into it.
3361 */
3362
3363 object = entry->object.vm_object;
3364 offset = (va - entry->vme_start) + entry->offset;
3365 prot = entry->protection;
3366
3367 /*
3368 * Make a reference to this object to prevent its
3369 * disposal while we are messing with it.
3370 */
3371
3372 vm_object_lock(object);
3373 assert(object->ref_count > 0);
3374 object->ref_count++;
3375 vm_object_res_reference(object);
3376 object->paging_in_progress++;
3377
3378 /*
3379 * INVARIANTS (through entire routine):
3380 *
3381 * 1) At all times, we must either have the object
3382 * lock or a busy page in some object to prevent
3383 * some other thread from trying to bring in
3384 * the same page.
3385 *
3386 * 2) Once we have a busy page, we must remove it from
3387 * the pageout queues, so that the pageout daemon
3388 * will not grab it away.
3389 *
3390 */
3391
3392 /*
3393 * Look for page in top-level object. If it's not there or
3394 * there's something going on, give up.
3395 */
3396 m = vm_page_lookup(object, offset);
3397 if ((m == VM_PAGE_NULL) || (m->busy) ||
3398 (m->unusual && ( m->error || m->restart || m->absent ||
3399 prot & m->page_lock))) {
3400
3401 GIVE_UP;
3402 }
3403
3404 /*
3405 * Wire the page down now. All bail outs beyond this
3406 * point must unwire the page.
3407 */
3408
3409 vm_page_lock_queues();
3410 vm_page_wire(m);
3411 vm_page_unlock_queues();
3412
3413 /*
3414 * Mark page busy for other threads.
3415 */
3416 assert(!m->busy);
3417 m->busy = TRUE;
3418 assert(!m->absent);
3419
3420 /*
3421 * Give up if the page is being written and there's a copy object
3422 */
3423 if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
3424 RELEASE_PAGE(m);
3425 GIVE_UP;
3426 }
3427
3428 /*
3429 * Put this page into the physical map.
3430 * We have to unlock the object because pmap_enter
3431 * may cause other faults.
3432 */
765c9de3 3433 if (m->no_isync == TRUE) {
55e303ae 3434 pmap_sync_caches_phys(m->phys_page);
0b4e3aa0 3435
765c9de3 3436 m->no_isync = FALSE;
0b4e3aa0 3437 }
9bccf70c
A
3438
3439 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
765c9de3 3440
9bccf70c 3441 PMAP_ENTER(pmap, pmap_addr, m, prot, cache_attr, TRUE);
1c79356b 3442
1c79356b
A
3443 /*
3444 * Unlock everything, and return
3445 */
3446
3447 PAGE_WAKEUP_DONE(m);
3448 UNLOCK_AND_DEALLOCATE;
3449
3450 return(KERN_SUCCESS);
3451
3452}
3453
3454/*
3455 * Routine: vm_fault_copy_cleanup
3456 * Purpose:
3457 * Release a page used by vm_fault_copy.
3458 */
3459
3460void
3461vm_fault_copy_cleanup(
3462 vm_page_t page,
3463 vm_page_t top_page)
3464{
3465 vm_object_t object = page->object;
3466
3467 vm_object_lock(object);
3468 PAGE_WAKEUP_DONE(page);
3469 vm_page_lock_queues();
3470 if (!page->active && !page->inactive)
3471 vm_page_activate(page);
3472 vm_page_unlock_queues();
3473 vm_fault_cleanup(object, top_page);
3474}
3475
3476void
3477vm_fault_copy_dst_cleanup(
3478 vm_page_t page)
3479{
3480 vm_object_t object;
3481
3482 if (page != VM_PAGE_NULL) {
3483 object = page->object;
3484 vm_object_lock(object);
3485 vm_page_lock_queues();
3486 vm_page_unwire(page);
3487 vm_page_unlock_queues();
3488 vm_object_paging_end(object);
3489 vm_object_unlock(object);
3490 }
3491}
3492
3493/*
3494 * Routine: vm_fault_copy
3495 *
3496 * Purpose:
3497 * Copy pages from one virtual memory object to another --
3498 * neither the source nor destination pages need be resident.
3499 *
3500 * Before actually copying a page, the version associated with
3501 * the destination address map wil be verified.
3502 *
3503 * In/out conditions:
3504 * The caller must hold a reference, but not a lock, to
3505 * each of the source and destination objects and to the
3506 * destination map.
3507 *
3508 * Results:
3509 * Returns KERN_SUCCESS if no errors were encountered in
3510 * reading or writing the data. Returns KERN_INTERRUPTED if
3511 * the operation was interrupted (only possible if the
3512 * "interruptible" argument is asserted). Other return values
3513 * indicate a permanent error in copying the data.
3514 *
3515 * The actual amount of data copied will be returned in the
3516 * "copy_size" argument. In the event that the destination map
3517 * verification failed, this amount may be less than the amount
3518 * requested.
3519 */
3520kern_return_t
3521vm_fault_copy(
3522 vm_object_t src_object,
3523 vm_object_offset_t src_offset,
3524 vm_size_t *src_size, /* INOUT */
3525 vm_object_t dst_object,
3526 vm_object_offset_t dst_offset,
3527 vm_map_t dst_map,
3528 vm_map_version_t *dst_version,
3529 int interruptible)
3530{
3531 vm_page_t result_page;
3532
3533 vm_page_t src_page;
3534 vm_page_t src_top_page;
3535 vm_prot_t src_prot;
3536
3537 vm_page_t dst_page;
3538 vm_page_t dst_top_page;
3539 vm_prot_t dst_prot;
3540
3541 vm_size_t amount_left;
3542 vm_object_t old_copy_object;
3543 kern_return_t error = 0;
3544
3545 vm_size_t part_size;
3546
3547 /*
3548 * In order not to confuse the clustered pageins, align
3549 * the different offsets on a page boundary.
3550 */
3551 vm_object_offset_t src_lo_offset = trunc_page_64(src_offset);
3552 vm_object_offset_t dst_lo_offset = trunc_page_64(dst_offset);
3553 vm_object_offset_t src_hi_offset = round_page_64(src_offset + *src_size);
3554 vm_object_offset_t dst_hi_offset = round_page_64(dst_offset + *src_size);
3555
3556#define RETURN(x) \
3557 MACRO_BEGIN \
3558 *src_size -= amount_left; \
3559 MACRO_RETURN(x); \
3560 MACRO_END
3561
3562 amount_left = *src_size;
3563 do { /* while (amount_left > 0) */
3564 /*
3565 * There may be a deadlock if both source and destination
3566 * pages are the same. To avoid this deadlock, the copy must
3567 * start by getting the destination page in order to apply
3568 * COW semantics if any.
3569 */
3570
3571 RetryDestinationFault: ;
3572
3573 dst_prot = VM_PROT_WRITE|VM_PROT_READ;
3574
3575 vm_object_lock(dst_object);
3576 vm_object_paging_begin(dst_object);
3577
3578 XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3579 switch (vm_fault_page(dst_object,
3580 trunc_page_64(dst_offset),
3581 VM_PROT_WRITE|VM_PROT_READ,
3582 FALSE,
3583 interruptible,
3584 dst_lo_offset,
3585 dst_hi_offset,
3586 VM_BEHAVIOR_SEQUENTIAL,
3587 &dst_prot,
3588 &dst_page,
3589 &dst_top_page,
3590 (int *)0,
3591 &error,
3592 dst_map->no_zero_fill,
0b4e3aa0 3593 FALSE, NULL, 0)) {
1c79356b
A
3594 case VM_FAULT_SUCCESS:
3595 break;
3596 case VM_FAULT_RETRY:
3597 goto RetryDestinationFault;
3598 case VM_FAULT_MEMORY_SHORTAGE:
3599 if (vm_page_wait(interruptible))
3600 goto RetryDestinationFault;
3601 /* fall thru */
3602 case VM_FAULT_INTERRUPTED:
3603 RETURN(MACH_SEND_INTERRUPTED);
3604 case VM_FAULT_FICTITIOUS_SHORTAGE:
3605 vm_page_more_fictitious();
3606 goto RetryDestinationFault;
3607 case VM_FAULT_MEMORY_ERROR:
3608 if (error)
3609 return (error);
3610 else
3611 return(KERN_MEMORY_ERROR);
3612 }
3613 assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
3614
3615 old_copy_object = dst_page->object->copy;
3616
3617 /*
3618 * There exists the possiblity that the source and
3619 * destination page are the same. But we can't
3620 * easily determine that now. If they are the
3621 * same, the call to vm_fault_page() for the
3622 * destination page will deadlock. To prevent this we
3623 * wire the page so we can drop busy without having
3624 * the page daemon steal the page. We clean up the
3625 * top page but keep the paging reference on the object
3626 * holding the dest page so it doesn't go away.
3627 */
3628
3629 vm_page_lock_queues();
3630 vm_page_wire(dst_page);
3631 vm_page_unlock_queues();
3632 PAGE_WAKEUP_DONE(dst_page);
3633 vm_object_unlock(dst_page->object);
3634
3635 if (dst_top_page != VM_PAGE_NULL) {
3636 vm_object_lock(dst_object);
3637 VM_PAGE_FREE(dst_top_page);
3638 vm_object_paging_end(dst_object);
3639 vm_object_unlock(dst_object);
3640 }
3641
3642 RetrySourceFault: ;
3643
3644 if (src_object == VM_OBJECT_NULL) {
3645 /*
3646 * No source object. We will just
3647 * zero-fill the page in dst_object.
3648 */
3649 src_page = VM_PAGE_NULL;
e3027f41 3650 result_page = VM_PAGE_NULL;
1c79356b
A
3651 } else {
3652 vm_object_lock(src_object);
3653 src_page = vm_page_lookup(src_object,
3654 trunc_page_64(src_offset));
e3027f41 3655 if (src_page == dst_page) {
1c79356b 3656 src_prot = dst_prot;
e3027f41
A
3657 result_page = VM_PAGE_NULL;
3658 } else {
1c79356b
A
3659 src_prot = VM_PROT_READ;
3660 vm_object_paging_begin(src_object);
3661
3662 XPR(XPR_VM_FAULT,
3663 "vm_fault_copy(2) -> vm_fault_page\n",
3664 0,0,0,0,0);
3665 switch (vm_fault_page(src_object,
3666 trunc_page_64(src_offset),
3667 VM_PROT_READ,
3668 FALSE,
3669 interruptible,
3670 src_lo_offset,
3671 src_hi_offset,
3672 VM_BEHAVIOR_SEQUENTIAL,
3673 &src_prot,
3674 &result_page,
3675 &src_top_page,
3676 (int *)0,
3677 &error,
3678 FALSE,
0b4e3aa0 3679 FALSE, NULL, 0)) {
1c79356b
A
3680
3681 case VM_FAULT_SUCCESS:
3682 break;
3683 case VM_FAULT_RETRY:
3684 goto RetrySourceFault;
3685 case VM_FAULT_MEMORY_SHORTAGE:
3686 if (vm_page_wait(interruptible))
3687 goto RetrySourceFault;
3688 /* fall thru */
3689 case VM_FAULT_INTERRUPTED:
3690 vm_fault_copy_dst_cleanup(dst_page);
3691 RETURN(MACH_SEND_INTERRUPTED);
3692 case VM_FAULT_FICTITIOUS_SHORTAGE:
3693 vm_page_more_fictitious();
3694 goto RetrySourceFault;
3695 case VM_FAULT_MEMORY_ERROR:
3696 vm_fault_copy_dst_cleanup(dst_page);
3697 if (error)
3698 return (error);
3699 else
3700 return(KERN_MEMORY_ERROR);
3701 }
3702
1c79356b
A
3703
3704 assert((src_top_page == VM_PAGE_NULL) ==
e3027f41 3705 (result_page->object == src_object));
1c79356b
A
3706 }
3707 assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
e3027f41 3708 vm_object_unlock(result_page->object);
1c79356b
A
3709 }
3710
3711 if (!vm_map_verify(dst_map, dst_version)) {
e3027f41
A
3712 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3713 vm_fault_copy_cleanup(result_page, src_top_page);
1c79356b
A
3714 vm_fault_copy_dst_cleanup(dst_page);
3715 break;
3716 }
3717
3718 vm_object_lock(dst_page->object);
3719
3720 if (dst_page->object->copy != old_copy_object) {
3721 vm_object_unlock(dst_page->object);
3722 vm_map_verify_done(dst_map, dst_version);
e3027f41
A
3723 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3724 vm_fault_copy_cleanup(result_page, src_top_page);
1c79356b
A
3725 vm_fault_copy_dst_cleanup(dst_page);
3726 break;
3727 }
3728 vm_object_unlock(dst_page->object);
3729
3730 /*
3731 * Copy the page, and note that it is dirty
3732 * immediately.
3733 */
3734
3735 if (!page_aligned(src_offset) ||
3736 !page_aligned(dst_offset) ||
3737 !page_aligned(amount_left)) {
3738
3739 vm_object_offset_t src_po,
3740 dst_po;
3741
3742 src_po = src_offset - trunc_page_64(src_offset);
3743 dst_po = dst_offset - trunc_page_64(dst_offset);
3744
3745 if (dst_po > src_po) {
3746 part_size = PAGE_SIZE - dst_po;
3747 } else {
3748 part_size = PAGE_SIZE - src_po;
3749 }
3750 if (part_size > (amount_left)){
3751 part_size = amount_left;
3752 }
3753
e3027f41 3754 if (result_page == VM_PAGE_NULL) {
1c79356b
A
3755 vm_page_part_zero_fill(dst_page,
3756 dst_po, part_size);
3757 } else {
e3027f41 3758 vm_page_part_copy(result_page, src_po,
1c79356b
A
3759 dst_page, dst_po, part_size);
3760 if(!dst_page->dirty){
3761 vm_object_lock(dst_object);
3762 dst_page->dirty = TRUE;
3763 vm_object_unlock(dst_page->object);
3764 }
3765
3766 }
3767 } else {
3768 part_size = PAGE_SIZE;
3769
e3027f41 3770 if (result_page == VM_PAGE_NULL)
1c79356b
A
3771 vm_page_zero_fill(dst_page);
3772 else{
e3027f41 3773 vm_page_copy(result_page, dst_page);
1c79356b
A
3774 if(!dst_page->dirty){
3775 vm_object_lock(dst_object);
3776 dst_page->dirty = TRUE;
3777 vm_object_unlock(dst_page->object);
3778 }
3779 }
3780
3781 }
3782
3783 /*
3784 * Unlock everything, and return
3785 */
3786
3787 vm_map_verify_done(dst_map, dst_version);
3788
e3027f41
A
3789 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3790 vm_fault_copy_cleanup(result_page, src_top_page);
1c79356b
A
3791 vm_fault_copy_dst_cleanup(dst_page);
3792
3793 amount_left -= part_size;
3794 src_offset += part_size;
3795 dst_offset += part_size;
3796 } while (amount_left > 0);
3797
3798 RETURN(KERN_SUCCESS);
3799#undef RETURN
3800
3801 /*NOTREACHED*/
3802}
3803
3804#ifdef notdef
3805
3806/*
3807 * Routine: vm_fault_page_overwrite
3808 *
3809 * Description:
3810 * A form of vm_fault_page that assumes that the
3811 * resulting page will be overwritten in its entirety,
3812 * making it unnecessary to obtain the correct *contents*
3813 * of the page.
3814 *
3815 * Implementation:
3816 * XXX Untested. Also unused. Eventually, this technology
3817 * could be used in vm_fault_copy() to advantage.
3818 */
3819vm_fault_return_t
3820vm_fault_page_overwrite(
3821 register
3822 vm_object_t dst_object,
3823 vm_object_offset_t dst_offset,
3824 vm_page_t *result_page) /* OUT */
3825{
3826 register
3827 vm_page_t dst_page;
3828 kern_return_t wait_result;
3829
3830#define interruptible THREAD_UNINT /* XXX */
3831
3832 while (TRUE) {
3833 /*
3834 * Look for a page at this offset
3835 */
3836
3837 while ((dst_page = vm_page_lookup(dst_object, dst_offset))
3838 == VM_PAGE_NULL) {
3839 /*
3840 * No page, no problem... just allocate one.
3841 */
3842
3843 dst_page = vm_page_alloc(dst_object, dst_offset);
3844 if (dst_page == VM_PAGE_NULL) {
3845 vm_object_unlock(dst_object);
3846 VM_PAGE_WAIT();
3847 vm_object_lock(dst_object);
3848 continue;
3849 }
3850
3851 /*
3852 * Pretend that the memory manager
3853 * write-protected the page.
3854 *
3855 * Note that we will be asking for write
3856 * permission without asking for the data
3857 * first.
3858 */
3859
3860 dst_page->overwriting = TRUE;
3861 dst_page->page_lock = VM_PROT_WRITE;
3862 dst_page->absent = TRUE;
3863 dst_page->unusual = TRUE;
3864 dst_object->absent_count++;
3865
3866 break;
3867
3868 /*
3869 * When we bail out, we might have to throw
3870 * away the page created here.
3871 */
3872
3873#define DISCARD_PAGE \
3874 MACRO_BEGIN \
3875 vm_object_lock(dst_object); \
3876 dst_page = vm_page_lookup(dst_object, dst_offset); \
3877 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3878 VM_PAGE_FREE(dst_page); \
3879 vm_object_unlock(dst_object); \
3880 MACRO_END
3881 }
3882
3883 /*
3884 * If the page is write-protected...
3885 */
3886
3887 if (dst_page->page_lock & VM_PROT_WRITE) {
3888 /*
3889 * ... and an unlock request hasn't been sent
3890 */
3891
3892 if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
3893 vm_prot_t u;
3894 kern_return_t rc;
3895
3896 /*
3897 * ... then send one now.
3898 */
3899
3900 if (!dst_object->pager_ready) {
9bccf70c
A
3901 wait_result = vm_object_assert_wait(dst_object,
3902 VM_OBJECT_EVENT_PAGER_READY,
3903 interruptible);
1c79356b 3904 vm_object_unlock(dst_object);
9bccf70c
A
3905 if (wait_result == THREAD_WAITING)
3906 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
3907 if (wait_result != THREAD_AWAKENED) {
3908 DISCARD_PAGE;
3909 return(VM_FAULT_INTERRUPTED);
3910 }
3911 continue;
3912 }
3913
3914 u = dst_page->unlock_request |= VM_PROT_WRITE;
3915 vm_object_unlock(dst_object);
3916
3917 if ((rc = memory_object_data_unlock(
3918 dst_object->pager,
1c79356b
A
3919 dst_offset + dst_object->paging_offset,
3920 PAGE_SIZE,
3921 u)) != KERN_SUCCESS) {
3922 if (vm_fault_debug)
3923 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3924 DISCARD_PAGE;
3925 return((rc == MACH_SEND_INTERRUPTED) ?
3926 VM_FAULT_INTERRUPTED :
3927 VM_FAULT_MEMORY_ERROR);
3928 }
3929 vm_object_lock(dst_object);
3930 continue;
3931 }
3932
3933 /* ... fall through to wait below */
3934 } else {
3935 /*
3936 * If the page isn't being used for other
3937 * purposes, then we're done.
3938 */
3939 if ( ! (dst_page->busy || dst_page->absent ||
3940 dst_page->error || dst_page->restart) )
3941 break;
3942 }
3943
9bccf70c 3944 wait_result = PAGE_ASSERT_WAIT(dst_page, interruptible);
1c79356b 3945 vm_object_unlock(dst_object);
9bccf70c
A
3946 if (wait_result == THREAD_WAITING)
3947 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
3948 if (wait_result != THREAD_AWAKENED) {
3949 DISCARD_PAGE;
3950 return(VM_FAULT_INTERRUPTED);
3951 }
3952 }
3953
3954 *result_page = dst_page;
3955 return(VM_FAULT_SUCCESS);
3956
3957#undef interruptible
3958#undef DISCARD_PAGE
3959}
3960
3961#endif /* notdef */
3962
3963#if VM_FAULT_CLASSIFY
3964/*
3965 * Temporary statistics gathering support.
3966 */
3967
3968/*
3969 * Statistics arrays:
3970 */
3971#define VM_FAULT_TYPES_MAX 5
3972#define VM_FAULT_LEVEL_MAX 8
3973
3974int vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
3975
3976#define VM_FAULT_TYPE_ZERO_FILL 0
3977#define VM_FAULT_TYPE_MAP_IN 1
3978#define VM_FAULT_TYPE_PAGER 2
3979#define VM_FAULT_TYPE_COPY 3
3980#define VM_FAULT_TYPE_OTHER 4
3981
3982
3983void
3984vm_fault_classify(vm_object_t object,
3985 vm_object_offset_t offset,
3986 vm_prot_t fault_type)
3987{
3988 int type, level = 0;
3989 vm_page_t m;
3990
3991 while (TRUE) {
3992 m = vm_page_lookup(object, offset);
3993 if (m != VM_PAGE_NULL) {
3994 if (m->busy || m->error || m->restart || m->absent ||
3995 fault_type & m->page_lock) {
3996 type = VM_FAULT_TYPE_OTHER;
3997 break;
3998 }
3999 if (((fault_type & VM_PROT_WRITE) == 0) ||
4000 ((level == 0) && object->copy == VM_OBJECT_NULL)) {
4001 type = VM_FAULT_TYPE_MAP_IN;
4002 break;
4003 }
4004 type = VM_FAULT_TYPE_COPY;
4005 break;
4006 }
4007 else {
4008 if (object->pager_created) {
4009 type = VM_FAULT_TYPE_PAGER;
4010 break;
4011 }
4012 if (object->shadow == VM_OBJECT_NULL) {
4013 type = VM_FAULT_TYPE_ZERO_FILL;
4014 break;
4015 }
4016
4017 offset += object->shadow_offset;
4018 object = object->shadow;
4019 level++;
4020 continue;
4021 }
4022 }
4023
4024 if (level > VM_FAULT_LEVEL_MAX)
4025 level = VM_FAULT_LEVEL_MAX;
4026
4027 vm_fault_stats[type][level] += 1;
4028
4029 return;
4030}
4031
4032/* cleanup routine to call from debugger */
4033
4034void
4035vm_fault_classify_init(void)
4036{
4037 int type, level;
4038
4039 for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
4040 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
4041 vm_fault_stats[type][level] = 0;
4042 }
4043 }
4044
4045 return;
4046}
4047#endif /* VM_FAULT_CLASSIFY */