]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_fault.c
xnu-201.42.3.tar.gz
[apple/xnu.git] / osfmk / vm / vm_fault.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_COPYRIGHT@
24 */
25 /*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50 /*
51 */
52 /*
53 * File: vm_fault.c
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
55 *
56 * Page fault handling module.
57 */
58 #ifdef MACH_BSD
59 /* remove after component interface available */
60 extern int vnode_pager_workaround;
61 extern int device_pager_workaround;
62 #endif
63
64 #include <mach_cluster_stats.h>
65 #include <mach_pagemap.h>
66 #include <mach_kdb.h>
67
68 #include <vm/vm_fault.h>
69 #include <mach/kern_return.h>
70 #include <mach/message.h> /* for error codes */
71 #include <kern/host_statistics.h>
72 #include <kern/counters.h>
73 #include <kern/task.h>
74 #include <kern/thread.h>
75 #include <kern/sched_prim.h>
76 #include <kern/host.h>
77 #include <kern/xpr.h>
78 #include <ppc/proc_reg.h>
79 #include <ppc/pmap_internals.h>
80 #include <vm/task_working_set.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_object.h>
83 #include <vm/vm_page.h>
84 #include <vm/pmap.h>
85 #include <vm/vm_pageout.h>
86 #include <mach/vm_param.h>
87 #include <mach/vm_behavior.h>
88 #include <mach/memory_object.h>
89 /* For memory_object_data_{request,unlock} */
90 #include <kern/mach_param.h>
91 #include <kern/macro_help.h>
92 #include <kern/zalloc.h>
93 #include <kern/misc_protos.h>
94
95 #include <sys/kdebug.h>
96
97 #define VM_FAULT_CLASSIFY 0
98 #define VM_FAULT_STATIC_CONFIG 1
99
100 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
101
102 int vm_object_absent_max = 50;
103
104 int vm_fault_debug = 0;
105 boolean_t vm_page_deactivate_behind = TRUE;
106
107
108 #if !VM_FAULT_STATIC_CONFIG
109 boolean_t vm_fault_dirty_handling = FALSE;
110 boolean_t vm_fault_interruptible = FALSE;
111 boolean_t software_reference_bits = TRUE;
112 #endif
113
114 #if MACH_KDB
115 extern struct db_watchpoint *db_watchpoint_list;
116 #endif /* MACH_KDB */
117
118 /* Forward declarations of internal routines. */
119 extern kern_return_t vm_fault_wire_fast(
120 vm_map_t map,
121 vm_offset_t va,
122 vm_map_entry_t entry,
123 pmap_t pmap);
124
125 extern void vm_fault_continue(void);
126
127 extern void vm_fault_copy_cleanup(
128 vm_page_t page,
129 vm_page_t top_page);
130
131 extern void vm_fault_copy_dst_cleanup(
132 vm_page_t page);
133
134 #if VM_FAULT_CLASSIFY
135 extern void vm_fault_classify(vm_object_t object,
136 vm_object_offset_t offset,
137 vm_prot_t fault_type);
138
139 extern void vm_fault_classify_init(void);
140 #endif
141
142 /*
143 * Routine: vm_fault_init
144 * Purpose:
145 * Initialize our private data structures.
146 */
147 void
148 vm_fault_init(void)
149 {
150 }
151
152 /*
153 * Routine: vm_fault_cleanup
154 * Purpose:
155 * Clean up the result of vm_fault_page.
156 * Results:
157 * The paging reference for "object" is released.
158 * "object" is unlocked.
159 * If "top_page" is not null, "top_page" is
160 * freed and the paging reference for the object
161 * containing it is released.
162 *
163 * In/out conditions:
164 * "object" must be locked.
165 */
166 void
167 vm_fault_cleanup(
168 register vm_object_t object,
169 register vm_page_t top_page)
170 {
171 vm_object_paging_end(object);
172 vm_object_unlock(object);
173
174 if (top_page != VM_PAGE_NULL) {
175 object = top_page->object;
176 vm_object_lock(object);
177 VM_PAGE_FREE(top_page);
178 vm_object_paging_end(object);
179 vm_object_unlock(object);
180 }
181 }
182
183 #if MACH_CLUSTER_STATS
184 #define MAXCLUSTERPAGES 16
185 struct {
186 unsigned long pages_in_cluster;
187 unsigned long pages_at_higher_offsets;
188 unsigned long pages_at_lower_offsets;
189 } cluster_stats_in[MAXCLUSTERPAGES];
190 #define CLUSTER_STAT(clause) clause
191 #define CLUSTER_STAT_HIGHER(x) \
192 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
193 #define CLUSTER_STAT_LOWER(x) \
194 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
195 #define CLUSTER_STAT_CLUSTER(x) \
196 ((cluster_stats_in[(x)].pages_in_cluster)++)
197 #else /* MACH_CLUSTER_STATS */
198 #define CLUSTER_STAT(clause)
199 #endif /* MACH_CLUSTER_STATS */
200
201 /* XXX - temporary */
202 boolean_t vm_allow_clustered_pagein = FALSE;
203 int vm_pagein_cluster_used = 0;
204
205 /*
206 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
207 */
208 int vm_default_ahead = 1; /* Number of pages to prepage ahead */
209 int vm_default_behind = 0; /* Number of pages to prepage behind */
210
211 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
212
213 /*
214 * Routine: vm_fault_page
215 * Purpose:
216 * Find the resident page for the virtual memory
217 * specified by the given virtual memory object
218 * and offset.
219 * Additional arguments:
220 * The required permissions for the page is given
221 * in "fault_type". Desired permissions are included
222 * in "protection". The minimum and maximum valid offsets
223 * within the object for the relevant map entry are
224 * passed in "lo_offset" and "hi_offset" respectively and
225 * the expected page reference pattern is passed in "behavior".
226 * These three parameters are used to determine pagein cluster
227 * limits.
228 *
229 * If the desired page is known to be resident (for
230 * example, because it was previously wired down), asserting
231 * the "unwiring" parameter will speed the search.
232 *
233 * If the operation can be interrupted (by thread_abort
234 * or thread_terminate), then the "interruptible"
235 * parameter should be asserted.
236 *
237 * Results:
238 * The page containing the proper data is returned
239 * in "result_page".
240 *
241 * In/out conditions:
242 * The source object must be locked and referenced,
243 * and must donate one paging reference. The reference
244 * is not affected. The paging reference and lock are
245 * consumed.
246 *
247 * If the call succeeds, the object in which "result_page"
248 * resides is left locked and holding a paging reference.
249 * If this is not the original object, a busy page in the
250 * original object is returned in "top_page", to prevent other
251 * callers from pursuing this same data, along with a paging
252 * reference for the original object. The "top_page" should
253 * be destroyed when this guarantee is no longer required.
254 * The "result_page" is also left busy. It is not removed
255 * from the pageout queues.
256 */
257
258 vm_fault_return_t
259 vm_fault_page(
260 /* Arguments: */
261 vm_object_t first_object, /* Object to begin search */
262 vm_object_offset_t first_offset, /* Offset into object */
263 vm_prot_t fault_type, /* What access is requested */
264 boolean_t must_be_resident,/* Must page be resident? */
265 int interruptible, /* how may fault be interrupted? */
266 vm_object_offset_t lo_offset, /* Map entry start */
267 vm_object_offset_t hi_offset, /* Map entry end */
268 vm_behavior_t behavior, /* Page reference behavior */
269 /* Modifies in place: */
270 vm_prot_t *protection, /* Protection for mapping */
271 /* Returns: */
272 vm_page_t *result_page, /* Page found, if successful */
273 vm_page_t *top_page, /* Page in top object, if
274 * not result_page. */
275 int *type_of_fault, /* if non-null, fill in with type of fault
276 * COW, zero-fill, etc... returned in trace point */
277 /* More arguments: */
278 kern_return_t *error_code, /* code if page is in error */
279 boolean_t no_zero_fill, /* don't zero fill absent pages */
280 boolean_t data_supply, /* treat as data_supply if
281 * it is a write fault and a full
282 * page is provided */
283 vm_map_t map,
284 vm_offset_t vaddr)
285 {
286 register
287 vm_page_t m;
288 register
289 vm_object_t object;
290 register
291 vm_object_offset_t offset;
292 vm_page_t first_m;
293 vm_object_t next_object;
294 vm_object_t copy_object;
295 boolean_t look_for_page;
296 vm_prot_t access_required = fault_type;
297 vm_prot_t wants_copy_flag;
298 vm_size_t cluster_size, length;
299 vm_object_offset_t cluster_offset;
300 vm_object_offset_t cluster_start, cluster_end, paging_offset;
301 vm_object_offset_t align_offset;
302 CLUSTER_STAT(int pages_at_higher_offsets;)
303 CLUSTER_STAT(int pages_at_lower_offsets;)
304 kern_return_t wait_result;
305 thread_t cur_thread;
306 boolean_t interruptible_state;
307 boolean_t bumped_pagein = FALSE;
308
309
310 #if MACH_PAGEMAP
311 /*
312 * MACH page map - an optional optimization where a bit map is maintained
313 * by the VM subsystem for internal objects to indicate which pages of
314 * the object currently reside on backing store. This existence map
315 * duplicates information maintained by the vnode pager. It is
316 * created at the time of the first pageout against the object, i.e.
317 * at the same time pager for the object is created. The optimization
318 * is designed to eliminate pager interaction overhead, if it is
319 * 'known' that the page does not exist on backing store.
320 *
321 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
322 * either marked as paged out in the existence map for the object or no
323 * existence map exists for the object. LOOK_FOR() is one of the
324 * criteria in the decision to invoke the pager. It is also used as one
325 * of the criteria to terminate the scan for adjacent pages in a clustered
326 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
327 * permanent objects. Note also that if the pager for an internal object
328 * has not been created, the pager is not invoked regardless of the value
329 * of LOOK_FOR() and that clustered pagein scans are only done on an object
330 * for which a pager has been created.
331 *
332 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
333 * is marked as paged out in the existence map for the object. PAGED_OUT()
334 * PAGED_OUT() is used to determine if a page has already been pushed
335 * into a copy object in order to avoid a redundant page out operation.
336 */
337 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
338 != VM_EXTERNAL_STATE_ABSENT)
339 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
340 == VM_EXTERNAL_STATE_EXISTS)
341 #else /* MACH_PAGEMAP */
342 /*
343 * If the MACH page map optimization is not enabled,
344 * LOOK_FOR() always evaluates to TRUE. The pager will always be
345 * invoked to resolve missing pages in an object, assuming the pager
346 * has been created for the object. In a clustered page operation, the
347 * absence of a page on backing backing store cannot be used to terminate
348 * a scan for adjacent pages since that information is available only in
349 * the pager. Hence pages that may not be paged out are potentially
350 * included in a clustered request. The vnode pager is coded to deal
351 * with any combination of absent/present pages in a clustered
352 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
353 * will always be invoked to push a dirty page into a copy object assuming
354 * a pager has been created. If the page has already been pushed, the
355 * pager will ingore the new request.
356 */
357 #define LOOK_FOR(o, f) TRUE
358 #define PAGED_OUT(o, f) FALSE
359 #endif /* MACH_PAGEMAP */
360
361 /*
362 * Recovery actions
363 */
364 #define PREPARE_RELEASE_PAGE(m) \
365 MACRO_BEGIN \
366 vm_page_lock_queues(); \
367 MACRO_END
368
369 #define DO_RELEASE_PAGE(m) \
370 MACRO_BEGIN \
371 PAGE_WAKEUP_DONE(m); \
372 if (!m->active && !m->inactive) \
373 vm_page_activate(m); \
374 vm_page_unlock_queues(); \
375 MACRO_END
376
377 #define RELEASE_PAGE(m) \
378 MACRO_BEGIN \
379 PREPARE_RELEASE_PAGE(m); \
380 DO_RELEASE_PAGE(m); \
381 MACRO_END
382
383 #if TRACEFAULTPAGE
384 dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
385 #endif
386
387
388
389 #if !VM_FAULT_STATIC_CONFIG
390 if (vm_fault_dirty_handling
391 #if MACH_KDB
392 /*
393 * If there are watchpoints set, then
394 * we don't want to give away write permission
395 * on a read fault. Make the task write fault,
396 * so that the watchpoint code notices the access.
397 */
398 || db_watchpoint_list
399 #endif /* MACH_KDB */
400 ) {
401 /*
402 * If we aren't asking for write permission,
403 * then don't give it away. We're using write
404 * faults to set the dirty bit.
405 */
406 if (!(fault_type & VM_PROT_WRITE))
407 *protection &= ~VM_PROT_WRITE;
408 }
409
410 if (!vm_fault_interruptible)
411 interruptible = THREAD_UNINT;
412 #else /* STATIC_CONFIG */
413 #if MACH_KDB
414 /*
415 * If there are watchpoints set, then
416 * we don't want to give away write permission
417 * on a read fault. Make the task write fault,
418 * so that the watchpoint code notices the access.
419 */
420 if (db_watchpoint_list) {
421 /*
422 * If we aren't asking for write permission,
423 * then don't give it away. We're using write
424 * faults to set the dirty bit.
425 */
426 if (!(fault_type & VM_PROT_WRITE))
427 *protection &= ~VM_PROT_WRITE;
428 }
429
430 #endif /* MACH_KDB */
431 #endif /* STATIC_CONFIG */
432
433 cur_thread = current_thread();
434
435 interruptible_state = cur_thread->interruptible;
436 if (interruptible == THREAD_UNINT)
437 cur_thread->interruptible = FALSE;
438
439 /*
440 * INVARIANTS (through entire routine):
441 *
442 * 1) At all times, we must either have the object
443 * lock or a busy page in some object to prevent
444 * some other thread from trying to bring in
445 * the same page.
446 *
447 * Note that we cannot hold any locks during the
448 * pager access or when waiting for memory, so
449 * we use a busy page then.
450 *
451 * Note also that we aren't as concerned about more than
452 * one thread attempting to memory_object_data_unlock
453 * the same page at once, so we don't hold the page
454 * as busy then, but do record the highest unlock
455 * value so far. [Unlock requests may also be delivered
456 * out of order.]
457 *
458 * 2) To prevent another thread from racing us down the
459 * shadow chain and entering a new page in the top
460 * object before we do, we must keep a busy page in
461 * the top object while following the shadow chain.
462 *
463 * 3) We must increment paging_in_progress on any object
464 * for which we have a busy page
465 *
466 * 4) We leave busy pages on the pageout queues.
467 * If the pageout daemon comes across a busy page,
468 * it will remove the page from the pageout queues.
469 */
470
471 /*
472 * Search for the page at object/offset.
473 */
474
475 object = first_object;
476 offset = first_offset;
477 first_m = VM_PAGE_NULL;
478 access_required = fault_type;
479
480 XPR(XPR_VM_FAULT,
481 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
482 (integer_t)object, offset, fault_type, *protection, 0);
483
484 /*
485 * See whether this page is resident
486 */
487
488 while (TRUE) {
489 #if TRACEFAULTPAGE
490 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
491 #endif
492 if (!object->alive) {
493 vm_fault_cleanup(object, first_m);
494 cur_thread->interruptible = interruptible_state;
495 return(VM_FAULT_MEMORY_ERROR);
496 }
497 m = vm_page_lookup(object, offset);
498 #if TRACEFAULTPAGE
499 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
500 #endif
501 if (m != VM_PAGE_NULL) {
502 /*
503 * If the page was pre-paged as part of a
504 * cluster, record the fact.
505 */
506 if (m->clustered) {
507 vm_pagein_cluster_used++;
508 m->clustered = FALSE;
509 }
510
511 /*
512 * If the page is being brought in,
513 * wait for it and then retry.
514 *
515 * A possible optimization: if the page
516 * is known to be resident, we can ignore
517 * pages that are absent (regardless of
518 * whether they're busy).
519 */
520
521 if (m->busy) {
522 #if TRACEFAULTPAGE
523 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
524 #endif
525 PAGE_ASSERT_WAIT(m, interruptible);
526 vm_object_unlock(object);
527 XPR(XPR_VM_FAULT,
528 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
529 (integer_t)object, offset,
530 (integer_t)m, 0, 0);
531 counter(c_vm_fault_page_block_busy_kernel++);
532 wait_result = thread_block((void (*)(void))0);
533
534 vm_object_lock(object);
535 if (wait_result != THREAD_AWAKENED) {
536 vm_fault_cleanup(object, first_m);
537 cur_thread->interruptible = interruptible_state;
538 if (wait_result == THREAD_RESTART)
539 {
540 return(VM_FAULT_RETRY);
541 }
542 else
543 {
544 return(VM_FAULT_INTERRUPTED);
545 }
546 }
547 continue;
548 }
549
550 /*
551 * If the page is in error, give up now.
552 */
553
554 if (m->error) {
555 #if TRACEFAULTPAGE
556 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code); /* (TEST/DEBUG) */
557 #endif
558 if (error_code)
559 *error_code = m->page_error;
560 VM_PAGE_FREE(m);
561 vm_fault_cleanup(object, first_m);
562 cur_thread->interruptible = interruptible_state;
563 return(VM_FAULT_MEMORY_ERROR);
564 }
565
566 /*
567 * If the pager wants us to restart
568 * at the top of the chain,
569 * typically because it has moved the
570 * page to another pager, then do so.
571 */
572
573 if (m->restart) {
574 #if TRACEFAULTPAGE
575 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
576 #endif
577 VM_PAGE_FREE(m);
578 vm_fault_cleanup(object, first_m);
579 cur_thread->interruptible = interruptible_state;
580 return(VM_FAULT_RETRY);
581 }
582
583 /*
584 * If the page isn't busy, but is absent,
585 * then it was deemed "unavailable".
586 */
587
588 if (m->absent) {
589 /*
590 * Remove the non-existent page (unless it's
591 * in the top object) and move on down to the
592 * next object (if there is one).
593 */
594 #if TRACEFAULTPAGE
595 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow); /* (TEST/DEBUG) */
596 #endif
597
598 next_object = object->shadow;
599 if (next_object == VM_OBJECT_NULL) {
600 vm_page_t real_m;
601
602 assert(!must_be_resident);
603
604 if (object->shadow_severed) {
605 vm_fault_cleanup(
606 object, first_m);
607 cur_thread->interruptible = interruptible_state;
608 return VM_FAULT_MEMORY_ERROR;
609 }
610
611 /*
612 * Absent page at bottom of shadow
613 * chain; zero fill the page we left
614 * busy in the first object, and flush
615 * the absent page. But first we
616 * need to allocate a real page.
617 */
618 if (VM_PAGE_THROTTLED() ||
619 (real_m = vm_page_grab()) == VM_PAGE_NULL) {
620 vm_fault_cleanup(object, first_m);
621 cur_thread->interruptible = interruptible_state;
622 return(VM_FAULT_MEMORY_SHORTAGE);
623 }
624
625 XPR(XPR_VM_FAULT,
626 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
627 (integer_t)object, offset,
628 (integer_t)m,
629 (integer_t)first_object, 0);
630 if (object != first_object) {
631 VM_PAGE_FREE(m);
632 vm_object_paging_end(object);
633 vm_object_unlock(object);
634 object = first_object;
635 offset = first_offset;
636 m = first_m;
637 first_m = VM_PAGE_NULL;
638 vm_object_lock(object);
639 }
640
641 VM_PAGE_FREE(m);
642 assert(real_m->busy);
643 vm_page_insert(real_m, object, offset);
644 m = real_m;
645
646 /*
647 * Drop the lock while zero filling
648 * page. Then break because this
649 * is the page we wanted. Checking
650 * the page lock is a waste of time;
651 * this page was either absent or
652 * newly allocated -- in both cases
653 * it can't be page locked by a pager.
654 */
655 m->no_isync = FALSE;
656
657 if (!no_zero_fill) {
658 vm_object_unlock(object);
659 vm_page_zero_fill(m);
660 if (type_of_fault)
661 *type_of_fault = DBG_ZERO_FILL_FAULT;
662 VM_STAT(zero_fill_count++);
663
664 if (bumped_pagein == TRUE) {
665 VM_STAT(pageins--);
666 current_task()->pageins--;
667 }
668 vm_object_lock(object);
669 }
670 pmap_clear_modify(m->phys_addr);
671 vm_page_lock_queues();
672 VM_PAGE_QUEUES_REMOVE(m);
673 m->page_ticket = vm_page_ticket;
674 vm_page_ticket_roll++;
675 if(vm_page_ticket_roll ==
676 VM_PAGE_TICKETS_IN_ROLL) {
677 vm_page_ticket_roll = 0;
678 if(vm_page_ticket ==
679 VM_PAGE_TICKET_ROLL_IDS)
680 vm_page_ticket= 0;
681 else
682 vm_page_ticket++;
683 }
684 queue_enter(&vm_page_queue_inactive,
685 m, vm_page_t, pageq);
686 m->inactive = TRUE;
687 vm_page_inactive_count++;
688 vm_page_unlock_queues();
689 break;
690 } else {
691 if (must_be_resident) {
692 vm_object_paging_end(object);
693 } else if (object != first_object) {
694 vm_object_paging_end(object);
695 VM_PAGE_FREE(m);
696 } else {
697 first_m = m;
698 m->absent = FALSE;
699 m->unusual = FALSE;
700 vm_object_absent_release(object);
701 m->busy = TRUE;
702
703 vm_page_lock_queues();
704 VM_PAGE_QUEUES_REMOVE(m);
705 vm_page_unlock_queues();
706 }
707 XPR(XPR_VM_FAULT,
708 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
709 (integer_t)object, offset,
710 (integer_t)next_object,
711 offset+object->shadow_offset,0);
712 offset += object->shadow_offset;
713 hi_offset += object->shadow_offset;
714 lo_offset += object->shadow_offset;
715 access_required = VM_PROT_READ;
716 vm_object_lock(next_object);
717 vm_object_unlock(object);
718 object = next_object;
719 vm_object_paging_begin(object);
720 continue;
721 }
722 }
723
724 if ((m->cleaning)
725 && ((object != first_object) ||
726 (object->copy != VM_OBJECT_NULL))
727 && (fault_type & VM_PROT_WRITE)) {
728 /*
729 * This is a copy-on-write fault that will
730 * cause us to revoke access to this page, but
731 * this page is in the process of being cleaned
732 * in a clustered pageout. We must wait until
733 * the cleaning operation completes before
734 * revoking access to the original page,
735 * otherwise we might attempt to remove a
736 * wired mapping.
737 */
738 #if TRACEFAULTPAGE
739 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset); /* (TEST/DEBUG) */
740 #endif
741 XPR(XPR_VM_FAULT,
742 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
743 (integer_t)object, offset,
744 (integer_t)m, 0, 0);
745 /* take an extra ref so that object won't die */
746 assert(object->ref_count > 0);
747 object->ref_count++;
748 vm_object_res_reference(object);
749 vm_fault_cleanup(object, first_m);
750 counter(c_vm_fault_page_block_backoff_kernel++);
751 vm_object_lock(object);
752 assert(object->ref_count > 0);
753 m = vm_page_lookup(object, offset);
754 if (m != VM_PAGE_NULL && m->cleaning) {
755 PAGE_ASSERT_WAIT(m, interruptible);
756 vm_object_unlock(object);
757 wait_result = thread_block((void (*)(void)) 0);
758 vm_object_deallocate(object);
759 goto backoff;
760 } else {
761 vm_object_unlock(object);
762 vm_object_deallocate(object);
763 cur_thread->interruptible = interruptible_state;
764 return VM_FAULT_RETRY;
765 }
766 }
767
768 /*
769 * If the desired access to this page has
770 * been locked out, request that it be unlocked.
771 */
772
773 if (access_required & m->page_lock) {
774 if ((access_required & m->unlock_request) != access_required) {
775 vm_prot_t new_unlock_request;
776 kern_return_t rc;
777
778 #if TRACEFAULTPAGE
779 dbgTrace(0xBEEF000A, (unsigned int) m, (unsigned int) object->pager_ready); /* (TEST/DEBUG) */
780 #endif
781 if (!object->pager_ready) {
782 XPR(XPR_VM_FAULT,
783 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
784 access_required,
785 (integer_t)object, offset,
786 (integer_t)m, 0);
787 /* take an extra ref */
788 assert(object->ref_count > 0);
789 object->ref_count++;
790 vm_object_res_reference(object);
791 vm_fault_cleanup(object,
792 first_m);
793 counter(c_vm_fault_page_block_backoff_kernel++);
794 vm_object_lock(object);
795 assert(object->ref_count > 0);
796 if (!object->pager_ready) {
797 vm_object_assert_wait(
798 object,
799 VM_OBJECT_EVENT_PAGER_READY,
800 interruptible);
801 vm_object_unlock(object);
802 wait_result = thread_block((void (*)(void))0);
803 vm_object_deallocate(object);
804 goto backoff;
805 } else {
806 vm_object_unlock(object);
807 vm_object_deallocate(object);
808 cur_thread->interruptible = interruptible_state;
809 return VM_FAULT_RETRY;
810 }
811 }
812
813 new_unlock_request = m->unlock_request =
814 (access_required | m->unlock_request);
815 vm_object_unlock(object);
816 XPR(XPR_VM_FAULT,
817 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
818 (integer_t)object, offset,
819 (integer_t)m, new_unlock_request, 0);
820 if ((rc = memory_object_data_unlock(
821 object->pager,
822 offset + object->paging_offset,
823 PAGE_SIZE,
824 new_unlock_request))
825 != KERN_SUCCESS) {
826 if (vm_fault_debug)
827 printf("vm_fault: memory_object_data_unlock failed\n");
828 vm_object_lock(object);
829 vm_fault_cleanup(object, first_m);
830 cur_thread->interruptible = interruptible_state;
831 return((rc == MACH_SEND_INTERRUPTED) ?
832 VM_FAULT_INTERRUPTED :
833 VM_FAULT_MEMORY_ERROR);
834 }
835 vm_object_lock(object);
836 continue;
837 }
838
839 XPR(XPR_VM_FAULT,
840 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
841 access_required, (integer_t)object,
842 offset, (integer_t)m, 0);
843 /* take an extra ref so object won't die */
844 assert(object->ref_count > 0);
845 object->ref_count++;
846 vm_object_res_reference(object);
847 vm_fault_cleanup(object, first_m);
848 counter(c_vm_fault_page_block_backoff_kernel++);
849 vm_object_lock(object);
850 assert(object->ref_count > 0);
851 m = vm_page_lookup(object, offset);
852 if (m != VM_PAGE_NULL &&
853 (access_required & m->page_lock) &&
854 !((access_required & m->unlock_request) != access_required)) {
855 PAGE_ASSERT_WAIT(m, interruptible);
856 vm_object_unlock(object);
857 wait_result = thread_block((void (*)(void)) 0);
858 vm_object_deallocate(object);
859 goto backoff;
860 } else {
861 vm_object_unlock(object);
862 vm_object_deallocate(object);
863 cur_thread->interruptible = interruptible_state;
864 return VM_FAULT_RETRY;
865 }
866 }
867 /*
868 * We mark the page busy and leave it on
869 * the pageout queues. If the pageout
870 * deamon comes across it, then it will
871 * remove the page.
872 */
873
874 #if TRACEFAULTPAGE
875 dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
876 #endif
877
878 #if !VM_FAULT_STATIC_CONFIG
879 if (!software_reference_bits) {
880 vm_page_lock_queues();
881 if (m->inactive)
882 vm_stat.reactivations++;
883
884 VM_PAGE_QUEUES_REMOVE(m);
885 vm_page_unlock_queues();
886 }
887 #endif
888 XPR(XPR_VM_FAULT,
889 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
890 (integer_t)object, offset, (integer_t)m, 0, 0);
891 assert(!m->busy);
892 m->busy = TRUE;
893 assert(!m->absent);
894 break;
895 }
896
897 look_for_page =
898 (object->pager_created) &&
899 LOOK_FOR(object, offset) &&
900 (!data_supply);
901
902 #if TRACEFAULTPAGE
903 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object); /* (TEST/DEBUG) */
904 #endif
905 if ((look_for_page || (object == first_object))
906 && !must_be_resident
907 && !(object->phys_contiguous)) {
908 /*
909 * Allocate a new page for this object/offset
910 * pair.
911 */
912
913 m = vm_page_grab_fictitious();
914 #if TRACEFAULTPAGE
915 dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
916 #endif
917 if (m == VM_PAGE_NULL) {
918 vm_fault_cleanup(object, first_m);
919 cur_thread->interruptible = interruptible_state;
920 return(VM_FAULT_FICTITIOUS_SHORTAGE);
921 }
922 vm_page_insert(m, object, offset);
923 }
924
925 if ((look_for_page && !must_be_resident)) {
926 kern_return_t rc;
927
928 /*
929 * If the memory manager is not ready, we
930 * cannot make requests.
931 */
932 if (!object->pager_ready) {
933 #if TRACEFAULTPAGE
934 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
935 #endif
936 if(m != VM_PAGE_NULL)
937 VM_PAGE_FREE(m);
938 XPR(XPR_VM_FAULT,
939 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
940 (integer_t)object, offset, 0, 0, 0);
941 /* take an extra ref so object won't die */
942 assert(object->ref_count > 0);
943 object->ref_count++;
944 vm_object_res_reference(object);
945 vm_fault_cleanup(object, first_m);
946 counter(c_vm_fault_page_block_backoff_kernel++);
947 vm_object_lock(object);
948 assert(object->ref_count > 0);
949 if (!object->pager_ready) {
950 vm_object_assert_wait(object,
951 VM_OBJECT_EVENT_PAGER_READY,
952 interruptible);
953 vm_object_unlock(object);
954 wait_result = thread_block((void (*)(void))0);
955 vm_object_deallocate(object);
956 goto backoff;
957 } else {
958 vm_object_unlock(object);
959 vm_object_deallocate(object);
960 cur_thread->interruptible = interruptible_state;
961 return VM_FAULT_RETRY;
962 }
963 }
964
965 if(object->phys_contiguous) {
966 if(m != VM_PAGE_NULL) {
967 VM_PAGE_FREE(m);
968 m = VM_PAGE_NULL;
969 }
970 goto no_clustering;
971 }
972 if (object->internal) {
973 /*
974 * Requests to the default pager
975 * must reserve a real page in advance,
976 * because the pager's data-provided
977 * won't block for pages. IMPORTANT:
978 * this acts as a throttling mechanism
979 * for data_requests to the default
980 * pager.
981 */
982
983 #if TRACEFAULTPAGE
984 dbgTrace(0xBEEF000F, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
985 #endif
986 if (m->fictitious && !vm_page_convert(m)) {
987 VM_PAGE_FREE(m);
988 vm_fault_cleanup(object, first_m);
989 cur_thread->interruptible = interruptible_state;
990 return(VM_FAULT_MEMORY_SHORTAGE);
991 }
992 } else if (object->absent_count >
993 vm_object_absent_max) {
994 /*
995 * If there are too many outstanding page
996 * requests pending on this object, we
997 * wait for them to be resolved now.
998 */
999
1000 #if TRACEFAULTPAGE
1001 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1002 #endif
1003 if(m != VM_PAGE_NULL)
1004 VM_PAGE_FREE(m);
1005 /* take an extra ref so object won't die */
1006 assert(object->ref_count > 0);
1007 object->ref_count++;
1008 vm_object_res_reference(object);
1009 vm_fault_cleanup(object, first_m);
1010 counter(c_vm_fault_page_block_backoff_kernel++);
1011 vm_object_lock(object);
1012 assert(object->ref_count > 0);
1013 if (object->absent_count > vm_object_absent_max) {
1014 vm_object_absent_assert_wait(object,
1015 interruptible);
1016 vm_object_unlock(object);
1017 wait_result = thread_block((void (*)(void))0);
1018 vm_object_deallocate(object);
1019 goto backoff;
1020 } else {
1021 vm_object_unlock(object);
1022 vm_object_deallocate(object);
1023 cur_thread->interruptible = interruptible_state;
1024 return VM_FAULT_RETRY;
1025 }
1026 }
1027
1028 /*
1029 * Indicate that the page is waiting for data
1030 * from the memory manager.
1031 */
1032
1033 if(m != VM_PAGE_NULL) {
1034
1035 m->list_req_pending = TRUE;
1036 m->absent = TRUE;
1037 m->unusual = TRUE;
1038 object->absent_count++;
1039
1040 }
1041
1042 cluster_start = offset;
1043 length = PAGE_SIZE;
1044 cluster_size = object->cluster_size;
1045
1046 /*
1047 * Skip clustered pagein if it is globally disabled
1048 * or random page reference behavior is expected
1049 * for the address range containing the faulting
1050 * address or the object paging block size is
1051 * equal to the page size.
1052 */
1053 if (!vm_allow_clustered_pagein ||
1054 behavior == VM_BEHAVIOR_RANDOM ||
1055 m == VM_PAGE_NULL ||
1056 cluster_size == PAGE_SIZE) {
1057 cluster_start = trunc_page_64(cluster_start);
1058 goto no_clustering;
1059 }
1060
1061 assert(offset >= lo_offset);
1062 assert(offset < hi_offset);
1063 assert(ALIGNED(object->paging_offset));
1064 assert(cluster_size >= PAGE_SIZE);
1065
1066 #if TRACEFAULTPAGE
1067 dbgTrace(0xBEEF0011, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1068 #endif
1069 /*
1070 * Decide whether to scan ahead or behind for
1071 * additional pages contiguous to the faulted
1072 * page in the same paging block. The decision
1073 * is based on system wide globals and the
1074 * expected page reference behavior of the
1075 * address range contained the faulting address.
1076 * First calculate some constants.
1077 */
1078 paging_offset = offset + object->paging_offset;
1079 cluster_offset = paging_offset & (cluster_size - 1);
1080 align_offset = paging_offset&(PAGE_SIZE_64-1);
1081 if (align_offset != 0) {
1082 cluster_offset = trunc_page_64(cluster_offset);
1083 }
1084
1085 #define SPANS_CLUSTER(x) ((((x) - align_offset) & (vm_object_offset_t)(cluster_size - 1)) == 0)
1086
1087 /*
1088 * Backward scan only if reverse sequential
1089 * behavior has been specified
1090 */
1091 CLUSTER_STAT(pages_at_lower_offsets = 0;)
1092 if (((vm_default_behind != 0 &&
1093 behavior == VM_BEHAVIOR_DEFAULT) ||
1094 behavior == VM_BEHAVIOR_RSEQNTL) && offset) {
1095 vm_object_offset_t cluster_bot;
1096
1097 /*
1098 * Calculate lower search boundary.
1099 * Exclude pages that span a cluster boundary.
1100 * Clip to start of map entry.
1101 * For default page reference behavior, scan
1102 * default pages behind.
1103 */
1104 cluster_bot = (offset > cluster_offset) ?
1105 offset - cluster_offset : offset;
1106 if (align_offset != 0) {
1107 if ((cluster_bot < offset) &&
1108 SPANS_CLUSTER(cluster_bot)) {
1109 cluster_bot += PAGE_SIZE_64;
1110 }
1111 }
1112 if (behavior == VM_BEHAVIOR_DEFAULT) {
1113 vm_object_offset_t
1114 bot = (vm_object_offset_t)
1115 (vm_default_behind * PAGE_SIZE);
1116
1117 if (cluster_bot < (offset - bot))
1118 cluster_bot = offset - bot;
1119 }
1120 if (lo_offset > cluster_bot)
1121 cluster_bot = lo_offset;
1122
1123 for ( cluster_start = offset - PAGE_SIZE_64;
1124 (cluster_start >= cluster_bot) &&
1125 (cluster_start !=
1126 (align_offset - PAGE_SIZE_64));
1127 cluster_start -= PAGE_SIZE_64) {
1128 assert(cluster_size > PAGE_SIZE_64);
1129 retry_cluster_backw:
1130 if (!LOOK_FOR(object, cluster_start) ||
1131 vm_page_lookup(object, cluster_start)
1132 != VM_PAGE_NULL) {
1133 break;
1134 }
1135 if (object->internal) {
1136 /*
1137 * need to acquire a real page in
1138 * advance because this acts as
1139 * a throttling mechanism for
1140 * data_requests to the default
1141 * pager. If this fails, give up
1142 * trying to find any more pages
1143 * in the cluster and send off the
1144 * request for what we already have.
1145 */
1146 if ((m = vm_page_grab())
1147 == VM_PAGE_NULL) {
1148 cluster_start += PAGE_SIZE_64;
1149 cluster_end = offset + PAGE_SIZE_64;
1150 goto give_up;
1151 }
1152 } else if ((m = vm_page_grab_fictitious())
1153 == VM_PAGE_NULL) {
1154 vm_object_unlock(object);
1155 vm_page_more_fictitious();
1156 vm_object_lock(object);
1157 goto retry_cluster_backw;
1158 }
1159 m->absent = TRUE;
1160 m->unusual = TRUE;
1161 m->clustered = TRUE;
1162 m->list_req_pending = TRUE;
1163
1164 vm_page_insert(m, object, cluster_start);
1165 CLUSTER_STAT(pages_at_lower_offsets++;)
1166 object->absent_count++;
1167 }
1168 cluster_start += PAGE_SIZE_64;
1169 assert(cluster_start >= cluster_bot);
1170 }
1171 assert(cluster_start <= offset);
1172
1173 /*
1174 * Forward scan if default or sequential behavior
1175 * specified
1176 */
1177 CLUSTER_STAT(pages_at_higher_offsets = 0;)
1178 if ((behavior == VM_BEHAVIOR_DEFAULT &&
1179 vm_default_ahead != 0) ||
1180 behavior == VM_BEHAVIOR_SEQUENTIAL) {
1181 vm_object_offset_t cluster_top;
1182
1183 /*
1184 * Calculate upper search boundary.
1185 * Exclude pages that span a cluster boundary.
1186 * Clip to end of map entry.
1187 * For default page reference behavior, scan
1188 * default pages ahead.
1189 */
1190 cluster_top = (offset + cluster_size) -
1191 cluster_offset;
1192 if (align_offset != 0) {
1193 if ((cluster_top > (offset + PAGE_SIZE_64)) &&
1194 SPANS_CLUSTER(cluster_top)) {
1195 cluster_top -= PAGE_SIZE_64;
1196 }
1197 }
1198 if (behavior == VM_BEHAVIOR_DEFAULT) {
1199 vm_object_offset_t top = (vm_object_offset_t)
1200 ((vm_default_ahead*PAGE_SIZE)+PAGE_SIZE);
1201
1202 if (cluster_top > (offset + top))
1203 cluster_top = offset + top;
1204 }
1205 if (cluster_top > hi_offset)
1206 cluster_top = hi_offset;
1207
1208 for (cluster_end = offset + PAGE_SIZE_64;
1209 cluster_end < cluster_top;
1210 cluster_end += PAGE_SIZE_64) {
1211 assert(cluster_size > PAGE_SIZE);
1212 retry_cluster_forw:
1213 if (!LOOK_FOR(object, cluster_end) ||
1214 vm_page_lookup(object, cluster_end)
1215 != VM_PAGE_NULL) {
1216 break;
1217 }
1218 if (object->internal) {
1219 /*
1220 * need to acquire a real page in
1221 * advance because this acts as
1222 * a throttling mechanism for
1223 * data_requests to the default
1224 * pager. If this fails, give up
1225 * trying to find any more pages
1226 * in the cluster and send off the
1227 * request for what we already have.
1228 */
1229 if ((m = vm_page_grab())
1230 == VM_PAGE_NULL) {
1231 break;
1232 }
1233 } else if ((m = vm_page_grab_fictitious())
1234 == VM_PAGE_NULL) {
1235 vm_object_unlock(object);
1236 vm_page_more_fictitious();
1237 vm_object_lock(object);
1238 goto retry_cluster_forw;
1239 }
1240 m->absent = TRUE;
1241 m->unusual = TRUE;
1242 m->clustered = TRUE;
1243 m->list_req_pending = TRUE;
1244
1245 vm_page_insert(m, object, cluster_end);
1246 CLUSTER_STAT(pages_at_higher_offsets++;)
1247 object->absent_count++;
1248 }
1249 assert(cluster_end <= cluster_top);
1250 }
1251 else {
1252 cluster_end = offset + PAGE_SIZE_64;
1253 }
1254 give_up:
1255 assert(cluster_end >= offset + PAGE_SIZE_64);
1256 length = cluster_end - cluster_start;
1257
1258 #if MACH_CLUSTER_STATS
1259 CLUSTER_STAT_HIGHER(pages_at_higher_offsets);
1260 CLUSTER_STAT_LOWER(pages_at_lower_offsets);
1261 CLUSTER_STAT_CLUSTER(length/PAGE_SIZE);
1262 #endif /* MACH_CLUSTER_STATS */
1263
1264 no_clustering:
1265 /*
1266 * lengthen the cluster by the pages in the working set
1267 */
1268 if((map != NULL) &&
1269 (current_task()->dynamic_working_set != 0)) {
1270 cluster_end = cluster_start + length;
1271 /* tws values for start and end are just a
1272 * suggestions. Therefore, as long as
1273 * build_cluster does not use pointers or
1274 * take action based on values that
1275 * could be affected by re-entrance we
1276 * do not need to take the map lock.
1277 */
1278 tws_build_cluster((tws_hash_t)
1279 current_task()->dynamic_working_set,
1280 object, &cluster_start,
1281 &cluster_end, 0x16000);
1282 length = cluster_end - cluster_start;
1283 }
1284 #if TRACEFAULTPAGE
1285 dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0); /* (TEST/DEBUG) */
1286 #endif
1287 /*
1288 * We have a busy page, so we can
1289 * release the object lock.
1290 */
1291 vm_object_unlock(object);
1292
1293 /*
1294 * Call the memory manager to retrieve the data.
1295 */
1296
1297 if (type_of_fault)
1298 *type_of_fault = DBG_PAGEIN_FAULT;
1299 VM_STAT(pageins++);
1300 current_task()->pageins++;
1301 bumped_pagein = TRUE;
1302
1303 /*
1304 * If this object uses a copy_call strategy,
1305 * and we are interested in a copy of this object
1306 * (having gotten here only by following a
1307 * shadow chain), then tell the memory manager
1308 * via a flag added to the desired_access
1309 * parameter, so that it can detect a race
1310 * between our walking down the shadow chain
1311 * and its pushing pages up into a copy of
1312 * the object that it manages.
1313 */
1314
1315 if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL &&
1316 object != first_object) {
1317 wants_copy_flag = VM_PROT_WANTS_COPY;
1318 } else {
1319 wants_copy_flag = VM_PROT_NONE;
1320 }
1321
1322 XPR(XPR_VM_FAULT,
1323 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1324 (integer_t)object, offset, (integer_t)m,
1325 access_required | wants_copy_flag, 0);
1326
1327 rc = memory_object_data_request(object->pager,
1328 cluster_start + object->paging_offset,
1329 length,
1330 access_required | wants_copy_flag);
1331
1332
1333 #if TRACEFAULTPAGE
1334 dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
1335 #endif
1336 if (rc != KERN_SUCCESS) {
1337 if (rc != MACH_SEND_INTERRUPTED
1338 && vm_fault_debug)
1339 printf("%s(0x%x, 0x%x, 0x%x, 0x%x) failed, rc=%d\n",
1340 "memory_object_data_request",
1341 object->pager,
1342 cluster_start + object->paging_offset,
1343 length, access_required, rc);
1344 /*
1345 * Don't want to leave a busy page around,
1346 * but the data request may have blocked,
1347 * so check if it's still there and busy.
1348 */
1349 if(!object->phys_contiguous) {
1350 vm_object_lock(object);
1351 for (; length; length -= PAGE_SIZE,
1352 cluster_start += PAGE_SIZE_64) {
1353 vm_page_t p;
1354 if ((p = vm_page_lookup(object,
1355 cluster_start))
1356 && p->absent && p->busy
1357 && p != first_m) {
1358 VM_PAGE_FREE(p);
1359 }
1360 }
1361 }
1362 vm_fault_cleanup(object, first_m);
1363 cur_thread->interruptible = interruptible_state;
1364 return((rc == MACH_SEND_INTERRUPTED) ?
1365 VM_FAULT_INTERRUPTED :
1366 VM_FAULT_MEMORY_ERROR);
1367 } else {
1368 #ifdef notdefcdy
1369 tws_hash_line_t line;
1370 task_t task;
1371
1372 task = current_task();
1373
1374 if((map != NULL) &&
1375 (task->dynamic_working_set != 0)) {
1376 if(tws_lookup
1377 ((tws_hash_t)
1378 task->dynamic_working_set,
1379 offset, object,
1380 &line) == KERN_SUCCESS) {
1381 tws_line_signal((tws_hash_t)
1382 task->dynamic_working_set,
1383 map, line, vaddr);
1384 }
1385 }
1386 #endif
1387 }
1388
1389 /*
1390 * Retry with same object/offset, since new data may
1391 * be in a different page (i.e., m is meaningless at
1392 * this point).
1393 */
1394 vm_object_lock(object);
1395 if ((interruptible != THREAD_UNINT) &&
1396 (current_thread()->state & TH_ABORT)) {
1397 vm_fault_cleanup(object, first_m);
1398 cur_thread->interruptible = interruptible_state;
1399 return(VM_FAULT_INTERRUPTED);
1400 }
1401 if(m == VM_PAGE_NULL)
1402 break;
1403 continue;
1404 }
1405
1406 /*
1407 * The only case in which we get here is if
1408 * object has no pager (or unwiring). If the pager doesn't
1409 * have the page this is handled in the m->absent case above
1410 * (and if you change things here you should look above).
1411 */
1412 #if TRACEFAULTPAGE
1413 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1414 #endif
1415 if (object == first_object)
1416 first_m = m;
1417 else
1418 assert(m == VM_PAGE_NULL);
1419
1420 XPR(XPR_VM_FAULT,
1421 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1422 (integer_t)object, offset, (integer_t)m,
1423 (integer_t)object->shadow, 0);
1424 /*
1425 * Move on to the next object. Lock the next
1426 * object before unlocking the current one.
1427 */
1428 next_object = object->shadow;
1429 if (next_object == VM_OBJECT_NULL) {
1430 assert(!must_be_resident);
1431 /*
1432 * If there's no object left, fill the page
1433 * in the top object with zeros. But first we
1434 * need to allocate a real page.
1435 */
1436
1437 if (object != first_object) {
1438 vm_object_paging_end(object);
1439 vm_object_unlock(object);
1440
1441 object = first_object;
1442 offset = first_offset;
1443 vm_object_lock(object);
1444 }
1445
1446 m = first_m;
1447 assert(m->object == object);
1448 first_m = VM_PAGE_NULL;
1449
1450 if (object->shadow_severed) {
1451 VM_PAGE_FREE(m);
1452 vm_fault_cleanup(object, VM_PAGE_NULL);
1453 cur_thread->interruptible = interruptible_state;
1454 return VM_FAULT_MEMORY_ERROR;
1455 }
1456
1457 if (VM_PAGE_THROTTLED() ||
1458 (m->fictitious && !vm_page_convert(m))) {
1459 VM_PAGE_FREE(m);
1460 vm_fault_cleanup(object, VM_PAGE_NULL);
1461 cur_thread->interruptible = interruptible_state;
1462 return(VM_FAULT_MEMORY_SHORTAGE);
1463 }
1464 m->no_isync = FALSE;
1465
1466 if (!no_zero_fill) {
1467 vm_object_unlock(object);
1468 vm_page_zero_fill(m);
1469 if (type_of_fault)
1470 *type_of_fault = DBG_ZERO_FILL_FAULT;
1471 VM_STAT(zero_fill_count++);
1472
1473 if (bumped_pagein == TRUE) {
1474 VM_STAT(pageins--);
1475 current_task()->pageins--;
1476 }
1477 vm_object_lock(object);
1478 }
1479 vm_page_lock_queues();
1480 VM_PAGE_QUEUES_REMOVE(m);
1481 m->page_ticket = vm_page_ticket;
1482 vm_page_ticket_roll++;
1483 if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
1484 vm_page_ticket_roll = 0;
1485 if(vm_page_ticket ==
1486 VM_PAGE_TICKET_ROLL_IDS)
1487 vm_page_ticket= 0;
1488 else
1489 vm_page_ticket++;
1490 }
1491 queue_enter(&vm_page_queue_inactive,
1492 m, vm_page_t, pageq);
1493 m->inactive = TRUE;
1494 vm_page_inactive_count++;
1495 vm_page_unlock_queues();
1496 pmap_clear_modify(m->phys_addr);
1497 break;
1498 }
1499 else {
1500 if ((object != first_object) || must_be_resident)
1501 vm_object_paging_end(object);
1502 offset += object->shadow_offset;
1503 hi_offset += object->shadow_offset;
1504 lo_offset += object->shadow_offset;
1505 access_required = VM_PROT_READ;
1506 vm_object_lock(next_object);
1507 vm_object_unlock(object);
1508 object = next_object;
1509 vm_object_paging_begin(object);
1510 }
1511 }
1512
1513 /*
1514 * PAGE HAS BEEN FOUND.
1515 *
1516 * This page (m) is:
1517 * busy, so that we can play with it;
1518 * not absent, so that nobody else will fill it;
1519 * possibly eligible for pageout;
1520 *
1521 * The top-level page (first_m) is:
1522 * VM_PAGE_NULL if the page was found in the
1523 * top-level object;
1524 * busy, not absent, and ineligible for pageout.
1525 *
1526 * The current object (object) is locked. A paging
1527 * reference is held for the current and top-level
1528 * objects.
1529 */
1530
1531 #if TRACEFAULTPAGE
1532 dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1533 #endif
1534 #if EXTRA_ASSERTIONS
1535 if(m != VM_PAGE_NULL) {
1536 assert(m->busy && !m->absent);
1537 assert((first_m == VM_PAGE_NULL) ||
1538 (first_m->busy && !first_m->absent &&
1539 !first_m->active && !first_m->inactive));
1540 }
1541 #endif /* EXTRA_ASSERTIONS */
1542
1543 XPR(XPR_VM_FAULT,
1544 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1545 (integer_t)object, offset, (integer_t)m,
1546 (integer_t)first_object, (integer_t)first_m);
1547 /*
1548 * If the page is being written, but isn't
1549 * already owned by the top-level object,
1550 * we have to copy it into a new page owned
1551 * by the top-level object.
1552 */
1553
1554 if ((object != first_object) && (m != VM_PAGE_NULL)) {
1555 /*
1556 * We only really need to copy if we
1557 * want to write it.
1558 */
1559
1560 #if TRACEFAULTPAGE
1561 dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1562 #endif
1563 if (fault_type & VM_PROT_WRITE) {
1564 vm_page_t copy_m;
1565
1566 assert(!must_be_resident);
1567
1568 /*
1569 * If we try to collapse first_object at this
1570 * point, we may deadlock when we try to get
1571 * the lock on an intermediate object (since we
1572 * have the bottom object locked). We can't
1573 * unlock the bottom object, because the page
1574 * we found may move (by collapse) if we do.
1575 *
1576 * Instead, we first copy the page. Then, when
1577 * we have no more use for the bottom object,
1578 * we unlock it and try to collapse.
1579 *
1580 * Note that we copy the page even if we didn't
1581 * need to... that's the breaks.
1582 */
1583
1584 /*
1585 * Allocate a page for the copy
1586 */
1587 copy_m = vm_page_grab();
1588 if (copy_m == VM_PAGE_NULL) {
1589 RELEASE_PAGE(m);
1590 vm_fault_cleanup(object, first_m);
1591 cur_thread->interruptible = interruptible_state;
1592 return(VM_FAULT_MEMORY_SHORTAGE);
1593 }
1594
1595
1596 XPR(XPR_VM_FAULT,
1597 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1598 (integer_t)object, offset,
1599 (integer_t)m, (integer_t)copy_m, 0);
1600 vm_page_copy(m, copy_m);
1601
1602 /*
1603 * If another map is truly sharing this
1604 * page with us, we have to flush all
1605 * uses of the original page, since we
1606 * can't distinguish those which want the
1607 * original from those which need the
1608 * new copy.
1609 *
1610 * XXXO If we know that only one map has
1611 * access to this page, then we could
1612 * avoid the pmap_page_protect() call.
1613 */
1614
1615 vm_page_lock_queues();
1616 assert(!m->cleaning);
1617 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
1618 vm_page_deactivate(m);
1619 copy_m->dirty = TRUE;
1620 /*
1621 * Setting reference here prevents this fault from
1622 * being counted as a (per-thread) reactivate as well
1623 * as a copy-on-write.
1624 */
1625 first_m->reference = TRUE;
1626 vm_page_unlock_queues();
1627
1628 /*
1629 * We no longer need the old page or object.
1630 */
1631
1632 PAGE_WAKEUP_DONE(m);
1633 vm_object_paging_end(object);
1634 vm_object_unlock(object);
1635
1636 if (type_of_fault)
1637 *type_of_fault = DBG_COW_FAULT;
1638 VM_STAT(cow_faults++);
1639 current_task()->cow_faults++;
1640 object = first_object;
1641 offset = first_offset;
1642
1643 vm_object_lock(object);
1644 VM_PAGE_FREE(first_m);
1645 first_m = VM_PAGE_NULL;
1646 assert(copy_m->busy);
1647 vm_page_insert(copy_m, object, offset);
1648 m = copy_m;
1649
1650 /*
1651 * Now that we've gotten the copy out of the
1652 * way, let's try to collapse the top object.
1653 * But we have to play ugly games with
1654 * paging_in_progress to do that...
1655 */
1656
1657 vm_object_paging_end(object);
1658 vm_object_collapse(object);
1659 vm_object_paging_begin(object);
1660
1661 }
1662 else {
1663 *protection &= (~VM_PROT_WRITE);
1664 }
1665 }
1666
1667 /*
1668 * Now check whether the page needs to be pushed into the
1669 * copy object. The use of asymmetric copy on write for
1670 * shared temporary objects means that we may do two copies to
1671 * satisfy the fault; one above to get the page from a
1672 * shadowed object, and one here to push it into the copy.
1673 */
1674
1675 while (first_object->copy_strategy == MEMORY_OBJECT_COPY_DELAY &&
1676 (copy_object = first_object->copy) != VM_OBJECT_NULL &&
1677 (m!= VM_PAGE_NULL)) {
1678 vm_object_offset_t copy_offset;
1679 vm_page_t copy_m;
1680
1681 #if TRACEFAULTPAGE
1682 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1683 #endif
1684 /*
1685 * If the page is being written, but hasn't been
1686 * copied to the copy-object, we have to copy it there.
1687 */
1688
1689 if ((fault_type & VM_PROT_WRITE) == 0) {
1690 *protection &= ~VM_PROT_WRITE;
1691 break;
1692 }
1693
1694 /*
1695 * If the page was guaranteed to be resident,
1696 * we must have already performed the copy.
1697 */
1698
1699 if (must_be_resident)
1700 break;
1701
1702 /*
1703 * Try to get the lock on the copy_object.
1704 */
1705 if (!vm_object_lock_try(copy_object)) {
1706 vm_object_unlock(object);
1707
1708 mutex_pause(); /* wait a bit */
1709
1710 vm_object_lock(object);
1711 continue;
1712 }
1713
1714 /*
1715 * Make another reference to the copy-object,
1716 * to keep it from disappearing during the
1717 * copy.
1718 */
1719 assert(copy_object->ref_count > 0);
1720 copy_object->ref_count++;
1721 VM_OBJ_RES_INCR(copy_object);
1722
1723 /*
1724 * Does the page exist in the copy?
1725 */
1726 copy_offset = first_offset - copy_object->shadow_offset;
1727 if (copy_object->size <= copy_offset)
1728 /*
1729 * Copy object doesn't cover this page -- do nothing.
1730 */
1731 ;
1732 else if ((copy_m =
1733 vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
1734 /* Page currently exists in the copy object */
1735 if (copy_m->busy) {
1736 /*
1737 * If the page is being brought
1738 * in, wait for it and then retry.
1739 */
1740 RELEASE_PAGE(m);
1741 /* take an extra ref so object won't die */
1742 assert(copy_object->ref_count > 0);
1743 copy_object->ref_count++;
1744 vm_object_res_reference(copy_object);
1745 vm_object_unlock(copy_object);
1746 vm_fault_cleanup(object, first_m);
1747 counter(c_vm_fault_page_block_backoff_kernel++);
1748 vm_object_lock(copy_object);
1749 assert(copy_object->ref_count > 0);
1750 VM_OBJ_RES_DECR(copy_object);
1751 copy_object->ref_count--;
1752 assert(copy_object->ref_count > 0);
1753 copy_m = vm_page_lookup(copy_object, copy_offset);
1754 if (copy_m != VM_PAGE_NULL && copy_m->busy) {
1755 PAGE_ASSERT_WAIT(copy_m, interruptible);
1756 vm_object_unlock(copy_object);
1757 wait_result = thread_block((void (*)(void))0);
1758 vm_object_deallocate(copy_object);
1759 goto backoff;
1760 } else {
1761 vm_object_unlock(copy_object);
1762 vm_object_deallocate(copy_object);
1763 cur_thread->interruptible = interruptible_state;
1764 return VM_FAULT_RETRY;
1765 }
1766 }
1767 }
1768 else if (!PAGED_OUT(copy_object, copy_offset)) {
1769 /*
1770 * If PAGED_OUT is TRUE, then the page used to exist
1771 * in the copy-object, and has already been paged out.
1772 * We don't need to repeat this. If PAGED_OUT is
1773 * FALSE, then either we don't know (!pager_created,
1774 * for example) or it hasn't been paged out.
1775 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1776 * We must copy the page to the copy object.
1777 */
1778
1779 /*
1780 * Allocate a page for the copy
1781 */
1782 copy_m = vm_page_alloc(copy_object, copy_offset);
1783 if (copy_m == VM_PAGE_NULL) {
1784 RELEASE_PAGE(m);
1785 VM_OBJ_RES_DECR(copy_object);
1786 copy_object->ref_count--;
1787 assert(copy_object->ref_count > 0);
1788 vm_object_unlock(copy_object);
1789 vm_fault_cleanup(object, first_m);
1790 cur_thread->interruptible = interruptible_state;
1791 return(VM_FAULT_MEMORY_SHORTAGE);
1792 }
1793
1794 /*
1795 * Must copy page into copy-object.
1796 */
1797
1798 vm_page_copy(m, copy_m);
1799
1800 /*
1801 * If the old page was in use by any users
1802 * of the copy-object, it must be removed
1803 * from all pmaps. (We can't know which
1804 * pmaps use it.)
1805 */
1806
1807 vm_page_lock_queues();
1808 assert(!m->cleaning);
1809 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
1810 copy_m->dirty = TRUE;
1811 vm_page_unlock_queues();
1812
1813 /*
1814 * If there's a pager, then immediately
1815 * page out this page, using the "initialize"
1816 * option. Else, we use the copy.
1817 */
1818
1819 if
1820 #if MACH_PAGEMAP
1821 ((!copy_object->pager_created) ||
1822 vm_external_state_get(
1823 copy_object->existence_map, copy_offset)
1824 == VM_EXTERNAL_STATE_ABSENT)
1825 #else
1826 (!copy_object->pager_created)
1827 #endif
1828 {
1829 vm_page_lock_queues();
1830 vm_page_activate(copy_m);
1831 vm_page_unlock_queues();
1832 PAGE_WAKEUP_DONE(copy_m);
1833 }
1834 else {
1835 assert(copy_m->busy == TRUE);
1836
1837 /*
1838 * The page is already ready for pageout:
1839 * not on pageout queues and busy.
1840 * Unlock everything except the
1841 * copy_object itself.
1842 */
1843
1844 vm_object_unlock(object);
1845
1846 /*
1847 * Write the page to the copy-object,
1848 * flushing it from the kernel.
1849 */
1850
1851 vm_pageout_initialize_page(copy_m);
1852
1853 /*
1854 * Since the pageout may have
1855 * temporarily dropped the
1856 * copy_object's lock, we
1857 * check whether we'll have
1858 * to deallocate the hard way.
1859 */
1860
1861 if ((copy_object->shadow != object) ||
1862 (copy_object->ref_count == 1)) {
1863 vm_object_unlock(copy_object);
1864 vm_object_deallocate(copy_object);
1865 vm_object_lock(object);
1866 continue;
1867 }
1868
1869 /*
1870 * Pick back up the old object's
1871 * lock. [It is safe to do so,
1872 * since it must be deeper in the
1873 * object tree.]
1874 */
1875
1876 vm_object_lock(object);
1877 }
1878
1879 /*
1880 * Because we're pushing a page upward
1881 * in the object tree, we must restart
1882 * any faults that are waiting here.
1883 * [Note that this is an expansion of
1884 * PAGE_WAKEUP that uses the THREAD_RESTART
1885 * wait result]. Can't turn off the page's
1886 * busy bit because we're not done with it.
1887 */
1888
1889 if (m->wanted) {
1890 m->wanted = FALSE;
1891 thread_wakeup_with_result((event_t) m,
1892 THREAD_RESTART);
1893 }
1894 }
1895
1896 /*
1897 * The reference count on copy_object must be
1898 * at least 2: one for our extra reference,
1899 * and at least one from the outside world
1900 * (we checked that when we last locked
1901 * copy_object).
1902 */
1903 copy_object->ref_count--;
1904 assert(copy_object->ref_count > 0);
1905 VM_OBJ_RES_DECR(copy_object);
1906 vm_object_unlock(copy_object);
1907
1908 break;
1909 }
1910
1911 *result_page = m;
1912 *top_page = first_m;
1913
1914 XPR(XPR_VM_FAULT,
1915 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1916 (integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
1917 /*
1918 * If the page can be written, assume that it will be.
1919 * [Earlier, we restrict the permission to allow write
1920 * access only if the fault so required, so we don't
1921 * mark read-only data as dirty.]
1922 */
1923
1924 #if !VM_FAULT_STATIC_CONFIG
1925 if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE) &&
1926 (m != VM_PAGE_NULL)) {
1927 m->dirty = TRUE;
1928 }
1929 #endif
1930 #if TRACEFAULTPAGE
1931 dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_page_deactivate_behind); /* (TEST/DEBUG) */
1932 #endif
1933 if (vm_page_deactivate_behind) {
1934 if (offset && /* don't underflow */
1935 (object->last_alloc == (offset - PAGE_SIZE_64))) {
1936 m = vm_page_lookup(object, object->last_alloc);
1937 if ((m != VM_PAGE_NULL) && !m->busy) {
1938 vm_page_lock_queues();
1939 vm_page_deactivate(m);
1940 vm_page_unlock_queues();
1941 }
1942 #if TRACEFAULTPAGE
1943 dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1944 #endif
1945 }
1946 object->last_alloc = offset;
1947 }
1948 #if TRACEFAULTPAGE
1949 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0); /* (TEST/DEBUG) */
1950 #endif
1951 cur_thread->interruptible = interruptible_state;
1952 if(*result_page == VM_PAGE_NULL) {
1953 vm_object_unlock(object);
1954 }
1955 return(VM_FAULT_SUCCESS);
1956
1957 #if 0
1958 block_and_backoff:
1959 vm_fault_cleanup(object, first_m);
1960
1961 counter(c_vm_fault_page_block_backoff_kernel++);
1962 thread_block((void (*)(void))0);
1963 #endif
1964
1965 backoff:
1966 cur_thread->interruptible = interruptible_state;
1967 if (wait_result == THREAD_INTERRUPTED)
1968 return VM_FAULT_INTERRUPTED;
1969 return VM_FAULT_RETRY;
1970
1971 #undef RELEASE_PAGE
1972 }
1973
1974 /*
1975 * Routine: vm_fault
1976 * Purpose:
1977 * Handle page faults, including pseudo-faults
1978 * used to change the wiring status of pages.
1979 * Returns:
1980 * Explicit continuations have been removed.
1981 * Implementation:
1982 * vm_fault and vm_fault_page save mucho state
1983 * in the moral equivalent of a closure. The state
1984 * structure is allocated when first entering vm_fault
1985 * and deallocated when leaving vm_fault.
1986 */
1987
1988 kern_return_t
1989 vm_fault(
1990 vm_map_t map,
1991 vm_offset_t vaddr,
1992 vm_prot_t fault_type,
1993 boolean_t change_wiring,
1994 int interruptible)
1995 {
1996 vm_map_version_t version; /* Map version for verificiation */
1997 boolean_t wired; /* Should mapping be wired down? */
1998 vm_object_t object; /* Top-level object */
1999 vm_object_offset_t offset; /* Top-level offset */
2000 vm_prot_t prot; /* Protection for mapping */
2001 vm_behavior_t behavior; /* Expected paging behavior */
2002 vm_object_offset_t lo_offset, hi_offset;
2003 vm_object_t old_copy_object; /* Saved copy object */
2004 vm_page_t result_page; /* Result of vm_fault_page */
2005 vm_page_t top_page; /* Placeholder page */
2006 kern_return_t kr;
2007
2008 register
2009 vm_page_t m; /* Fast access to result_page */
2010 kern_return_t error_code; /* page error reasons */
2011 register
2012 vm_object_t cur_object;
2013 register
2014 vm_object_offset_t cur_offset;
2015 vm_page_t cur_m;
2016 vm_object_t new_object;
2017 int type_of_fault;
2018 vm_map_t pmap_map = map;
2019 vm_map_t original_map = map;
2020 pmap_t pmap = NULL;
2021 boolean_t funnel_set = FALSE;
2022 funnel_t *curflock;
2023 thread_t cur_thread;
2024 boolean_t interruptible_state;
2025
2026
2027 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_START,
2028 vaddr,
2029 0,
2030 0,
2031 0,
2032 0);
2033
2034 cur_thread = current_thread();
2035
2036 interruptible_state = cur_thread->interruptible;
2037 if (interruptible == THREAD_UNINT)
2038 cur_thread->interruptible = FALSE;
2039
2040 /*
2041 * assume we will hit a page in the cache
2042 * otherwise, explicitly override with
2043 * the real fault type once we determine it
2044 */
2045 type_of_fault = DBG_CACHE_HIT_FAULT;
2046
2047 VM_STAT(faults++);
2048 current_task()->faults++;
2049
2050 /*
2051 * drop funnel if it is already held. Then restore while returning
2052 */
2053 if ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED) {
2054 funnel_set = TRUE;
2055 curflock = cur_thread->funnel_lock;
2056 thread_funnel_set( curflock , FALSE);
2057 }
2058
2059 RetryFault: ;
2060
2061 /*
2062 * Find the backing store object and offset into
2063 * it to begin the search.
2064 */
2065 map = original_map;
2066 vm_map_lock_read(map);
2067 kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version,
2068 &object, &offset,
2069 &prot, &wired,
2070 &behavior, &lo_offset, &hi_offset, &pmap_map);
2071
2072 pmap = pmap_map->pmap;
2073
2074 if (kr != KERN_SUCCESS) {
2075 vm_map_unlock_read(map);
2076 goto done;
2077 }
2078
2079 /*
2080 * If the page is wired, we must fault for the current protection
2081 * value, to avoid further faults.
2082 */
2083
2084 if (wired)
2085 fault_type = prot | VM_PROT_WRITE;
2086
2087 #if VM_FAULT_CLASSIFY
2088 /*
2089 * Temporary data gathering code
2090 */
2091 vm_fault_classify(object, offset, fault_type);
2092 #endif
2093 /*
2094 * Fast fault code. The basic idea is to do as much as
2095 * possible while holding the map lock and object locks.
2096 * Busy pages are not used until the object lock has to
2097 * be dropped to do something (copy, zero fill, pmap enter).
2098 * Similarly, paging references aren't acquired until that
2099 * point, and object references aren't used.
2100 *
2101 * If we can figure out what to do
2102 * (zero fill, copy on write, pmap enter) while holding
2103 * the locks, then it gets done. Otherwise, we give up,
2104 * and use the original fault path (which doesn't hold
2105 * the map lock, and relies on busy pages).
2106 * The give up cases include:
2107 * - Have to talk to pager.
2108 * - Page is busy, absent or in error.
2109 * - Pager has locked out desired access.
2110 * - Fault needs to be restarted.
2111 * - Have to push page into copy object.
2112 *
2113 * The code is an infinite loop that moves one level down
2114 * the shadow chain each time. cur_object and cur_offset
2115 * refer to the current object being examined. object and offset
2116 * are the original object from the map. The loop is at the
2117 * top level if and only if object and cur_object are the same.
2118 *
2119 * Invariants: Map lock is held throughout. Lock is held on
2120 * original object and cur_object (if different) when
2121 * continuing or exiting loop.
2122 *
2123 */
2124
2125
2126 /*
2127 * If this page is to be inserted in a copy delay object
2128 * for writing, and if the object has a copy, then the
2129 * copy delay strategy is implemented in the slow fault page.
2130 */
2131 if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY ||
2132 object->copy == VM_OBJECT_NULL ||
2133 (fault_type & VM_PROT_WRITE) == 0) {
2134 cur_object = object;
2135 cur_offset = offset;
2136
2137 while (TRUE) {
2138 m = vm_page_lookup(cur_object, cur_offset);
2139 if (m != VM_PAGE_NULL) {
2140 if (m->busy)
2141 break;
2142
2143 if (m->unusual && (m->error || m->restart || m->private
2144 || m->absent || (fault_type & m->page_lock))) {
2145
2146 /*
2147 * Unusual case. Give up.
2148 */
2149 break;
2150 }
2151
2152 /*
2153 * Two cases of map in faults:
2154 * - At top level w/o copy object.
2155 * - Read fault anywhere.
2156 * --> must disallow write.
2157 */
2158
2159 if (object == cur_object &&
2160 object->copy == VM_OBJECT_NULL)
2161 goto FastMapInFault;
2162
2163 if ((fault_type & VM_PROT_WRITE) == 0) {
2164
2165 prot &= ~VM_PROT_WRITE;
2166
2167 /*
2168 * Set up to map the page ...
2169 * mark the page busy, drop
2170 * locks and take a paging reference
2171 * on the object with the page.
2172 */
2173
2174 if (object != cur_object) {
2175 vm_object_unlock(object);
2176 object = cur_object;
2177 }
2178 FastMapInFault:
2179 m->busy = TRUE;
2180
2181 vm_object_paging_begin(object);
2182 vm_object_unlock(object);
2183
2184 FastPmapEnter:
2185 /*
2186 * Check a couple of global reasons to
2187 * be conservative about write access.
2188 * Then do the pmap_enter.
2189 */
2190 #if !VM_FAULT_STATIC_CONFIG
2191 if (vm_fault_dirty_handling
2192 #if MACH_KDB
2193 || db_watchpoint_list
2194 #endif
2195 && (fault_type & VM_PROT_WRITE) == 0)
2196 prot &= ~VM_PROT_WRITE;
2197 #else /* STATIC_CONFIG */
2198 #if MACH_KDB
2199 if (db_watchpoint_list
2200 && (fault_type & VM_PROT_WRITE) == 0)
2201 prot &= ~VM_PROT_WRITE;
2202 #endif /* MACH_KDB */
2203 #endif /* STATIC_CONFIG */
2204 if (m->no_isync == TRUE)
2205 pmap_sync_caches_phys(m->phys_addr);
2206
2207 PMAP_ENTER(pmap, vaddr, m, prot, wired);
2208 {
2209 tws_hash_line_t line;
2210 task_t task;
2211
2212 task = current_task();
2213 if((map != NULL) &&
2214 (task->dynamic_working_set != 0)) {
2215 if(tws_lookup
2216 ((tws_hash_t)
2217 task->dynamic_working_set,
2218 cur_offset, object,
2219 &line) != KERN_SUCCESS) {
2220 if(tws_insert((tws_hash_t)
2221 task->dynamic_working_set,
2222 m->offset, m->object,
2223 vaddr, pmap_map)
2224 == KERN_NO_SPACE) {
2225 tws_expand_working_set(
2226 task->dynamic_working_set,
2227 TWS_HASH_LINE_COUNT);
2228 }
2229 }
2230 }
2231 }
2232 /*
2233 * Grab the object lock to manipulate
2234 * the page queues. Change wiring
2235 * case is obvious. In soft ref bits
2236 * case activate page only if it fell
2237 * off paging queues, otherwise just
2238 * activate it if it's inactive.
2239 *
2240 * NOTE: original vm_fault code will
2241 * move active page to back of active
2242 * queue. This code doesn't.
2243 */
2244 vm_object_lock(object);
2245 vm_page_lock_queues();
2246
2247 if (m->clustered) {
2248 vm_pagein_cluster_used++;
2249 m->clustered = FALSE;
2250 }
2251 /*
2252 * we did the isync above (if needed)... we're clearing
2253 * the flag here to avoid holding a lock
2254 * while calling pmap functions, however
2255 * we need hold the object lock before
2256 * we can modify the flag
2257 */
2258 m->no_isync = FALSE;
2259 m->reference = TRUE;
2260
2261 if (change_wiring) {
2262 if (wired)
2263 vm_page_wire(m);
2264 else
2265 vm_page_unwire(m);
2266 }
2267 #if VM_FAULT_STATIC_CONFIG
2268 else {
2269 if (!m->active && !m->inactive)
2270 vm_page_activate(m);
2271 }
2272 #else
2273 else if (software_reference_bits) {
2274 if (!m->active && !m->inactive)
2275 vm_page_activate(m);
2276 }
2277 else if (!m->active) {
2278 vm_page_activate(m);
2279 }
2280 #endif
2281 vm_page_unlock_queues();
2282
2283 /*
2284 * That's it, clean up and return.
2285 */
2286 PAGE_WAKEUP_DONE(m);
2287 vm_object_paging_end(object);
2288 vm_object_unlock(object);
2289 vm_map_unlock_read(map);
2290 if(pmap_map != map)
2291 vm_map_unlock(pmap_map);
2292
2293 if (funnel_set) {
2294 thread_funnel_set( curflock, TRUE);
2295 funnel_set = FALSE;
2296 }
2297 cur_thread->interruptible = interruptible_state;
2298
2299 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2300 vaddr,
2301 type_of_fault,
2302 KERN_SUCCESS,
2303 0,
2304 0);
2305 return KERN_SUCCESS;
2306 }
2307
2308 /*
2309 * Copy on write fault. If objects match, then
2310 * object->copy must not be NULL (else control
2311 * would be in previous code block), and we
2312 * have a potential push into the copy object
2313 * with which we won't cope here.
2314 */
2315
2316 if (cur_object == object)
2317 break;
2318
2319 /*
2320 * This is now a shadow based copy on write
2321 * fault -- it requires a copy up the shadow
2322 * chain.
2323 *
2324 * Allocate a page in the original top level
2325 * object. Give up if allocate fails. Also
2326 * need to remember current page, as it's the
2327 * source of the copy.
2328 */
2329 cur_m = m;
2330 m = vm_page_grab();
2331 if (m == VM_PAGE_NULL) {
2332 break;
2333 }
2334
2335 /*
2336 * Now do the copy. Mark the source busy
2337 * and take out paging references on both
2338 * objects.
2339 *
2340 * NOTE: This code holds the map lock across
2341 * the page copy.
2342 */
2343
2344 cur_m->busy = TRUE;
2345 vm_page_copy(cur_m, m);
2346 vm_page_insert(m, object, offset);
2347
2348 vm_object_paging_begin(cur_object);
2349 vm_object_paging_begin(object);
2350
2351 type_of_fault = DBG_COW_FAULT;
2352 VM_STAT(cow_faults++);
2353 current_task()->cow_faults++;
2354
2355 /*
2356 * Now cope with the source page and object
2357 * If the top object has a ref count of 1
2358 * then no other map can access it, and hence
2359 * it's not necessary to do the pmap_page_protect.
2360 */
2361
2362
2363 vm_page_lock_queues();
2364 vm_page_deactivate(cur_m);
2365 m->dirty = TRUE;
2366 pmap_page_protect(cur_m->phys_addr,
2367 VM_PROT_NONE);
2368 vm_page_unlock_queues();
2369
2370 PAGE_WAKEUP_DONE(cur_m);
2371 vm_object_paging_end(cur_object);
2372 vm_object_unlock(cur_object);
2373
2374 /*
2375 * Slight hack to call vm_object collapse
2376 * and then reuse common map in code.
2377 * note that the object lock was taken above.
2378 */
2379
2380 vm_object_paging_end(object);
2381 vm_object_collapse(object);
2382 vm_object_paging_begin(object);
2383 vm_object_unlock(object);
2384
2385 goto FastPmapEnter;
2386 }
2387 else {
2388
2389 /*
2390 * No page at cur_object, cur_offset
2391 */
2392
2393 if (cur_object->pager_created) {
2394
2395 /*
2396 * Have to talk to the pager. Give up.
2397 */
2398
2399 break;
2400 }
2401
2402
2403 if (cur_object->shadow == VM_OBJECT_NULL) {
2404
2405 if (cur_object->shadow_severed) {
2406 vm_object_paging_end(object);
2407 vm_object_unlock(object);
2408 vm_map_unlock_read(map);
2409 if(pmap_map != map)
2410 vm_map_unlock(pmap_map);
2411
2412 if (funnel_set) {
2413 thread_funnel_set( curflock, TRUE);
2414 funnel_set = FALSE;
2415 }
2416 cur_thread->interruptible = interruptible_state;
2417
2418 return VM_FAULT_MEMORY_ERROR;
2419 }
2420
2421 /*
2422 * Zero fill fault. Page gets
2423 * filled in top object. Insert
2424 * page, then drop any lower lock.
2425 * Give up if no page.
2426 */
2427 if ((vm_page_free_target -
2428 ((vm_page_free_target-vm_page_free_min)>>2))
2429 > vm_page_free_count) {
2430 break;
2431 }
2432 m = vm_page_alloc(object, offset);
2433 if (m == VM_PAGE_NULL) {
2434 break;
2435 }
2436 /*
2437 * This is a zero-fill or initial fill
2438 * page fault. As such, we consider it
2439 * undefined with respect to instruction
2440 * execution. i.e. it is the responsibility
2441 * of higher layers to call for an instruction
2442 * sync after changing the contents and before
2443 * sending a program into this area. We
2444 * choose this approach for performance
2445 */
2446
2447 m->no_isync = FALSE;
2448
2449 if (cur_object != object)
2450 vm_object_unlock(cur_object);
2451
2452 vm_object_paging_begin(object);
2453 vm_object_unlock(object);
2454
2455 /*
2456 * Now zero fill page and map it.
2457 * the page is probably going to
2458 * be written soon, so don't bother
2459 * to clear the modified bit
2460 *
2461 * NOTE: This code holds the map
2462 * lock across the zero fill.
2463 */
2464
2465 if (!map->no_zero_fill) {
2466 vm_page_zero_fill(m);
2467 type_of_fault = DBG_ZERO_FILL_FAULT;
2468 VM_STAT(zero_fill_count++);
2469 }
2470 vm_page_lock_queues();
2471 VM_PAGE_QUEUES_REMOVE(m);
2472
2473 m->page_ticket = vm_page_ticket;
2474 vm_page_ticket_roll++;
2475 if(vm_page_ticket_roll ==
2476 VM_PAGE_TICKETS_IN_ROLL) {
2477 vm_page_ticket_roll = 0;
2478 if(vm_page_ticket ==
2479 VM_PAGE_TICKET_ROLL_IDS)
2480 vm_page_ticket= 0;
2481 else
2482 vm_page_ticket++;
2483 }
2484
2485 queue_enter(&vm_page_queue_inactive,
2486 m, vm_page_t, pageq);
2487 m->inactive = TRUE;
2488 vm_page_inactive_count++;
2489 vm_page_unlock_queues();
2490 goto FastPmapEnter;
2491 }
2492
2493 /*
2494 * On to the next level
2495 */
2496
2497 cur_offset += cur_object->shadow_offset;
2498 new_object = cur_object->shadow;
2499 vm_object_lock(new_object);
2500 if (cur_object != object)
2501 vm_object_unlock(cur_object);
2502 cur_object = new_object;
2503
2504 continue;
2505 }
2506 }
2507
2508 /*
2509 * Cleanup from fast fault failure. Drop any object
2510 * lock other than original and drop map lock.
2511 */
2512
2513 if (object != cur_object)
2514 vm_object_unlock(cur_object);
2515 }
2516 vm_map_unlock_read(map);
2517 if(pmap_map != map)
2518 vm_map_unlock(pmap_map);
2519
2520 /*
2521 * Make a reference to this object to
2522 * prevent its disposal while we are messing with
2523 * it. Once we have the reference, the map is free
2524 * to be diddled. Since objects reference their
2525 * shadows (and copies), they will stay around as well.
2526 */
2527
2528 assert(object->ref_count > 0);
2529 object->ref_count++;
2530 vm_object_res_reference(object);
2531 vm_object_paging_begin(object);
2532
2533 XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2534 kr = vm_fault_page(object, offset, fault_type,
2535 (change_wiring && !wired),
2536 interruptible,
2537 lo_offset, hi_offset, behavior,
2538 &prot, &result_page, &top_page,
2539 &type_of_fault,
2540 &error_code, map->no_zero_fill, FALSE, map, vaddr);
2541
2542 /*
2543 * If we didn't succeed, lose the object reference immediately.
2544 */
2545
2546 if (kr != VM_FAULT_SUCCESS)
2547 vm_object_deallocate(object);
2548
2549 /*
2550 * See why we failed, and take corrective action.
2551 */
2552
2553 switch (kr) {
2554 case VM_FAULT_SUCCESS:
2555 break;
2556 case VM_FAULT_MEMORY_SHORTAGE:
2557 if (vm_page_wait((change_wiring) ?
2558 THREAD_UNINT :
2559 THREAD_ABORTSAFE))
2560 goto RetryFault;
2561 /* fall thru */
2562 case VM_FAULT_INTERRUPTED:
2563 kr = KERN_ABORTED;
2564 goto done;
2565 case VM_FAULT_RETRY:
2566 goto RetryFault;
2567 case VM_FAULT_FICTITIOUS_SHORTAGE:
2568 vm_page_more_fictitious();
2569 goto RetryFault;
2570 case VM_FAULT_MEMORY_ERROR:
2571 if (error_code)
2572 kr = error_code;
2573 else
2574 kr = KERN_MEMORY_ERROR;
2575 goto done;
2576 }
2577
2578 m = result_page;
2579
2580 if(m != VM_PAGE_NULL) {
2581 assert((change_wiring && !wired) ?
2582 (top_page == VM_PAGE_NULL) :
2583 ((top_page == VM_PAGE_NULL) == (m->object == object)));
2584 }
2585
2586 /*
2587 * How to clean up the result of vm_fault_page. This
2588 * happens whether the mapping is entered or not.
2589 */
2590
2591 #define UNLOCK_AND_DEALLOCATE \
2592 MACRO_BEGIN \
2593 vm_fault_cleanup(m->object, top_page); \
2594 vm_object_deallocate(object); \
2595 MACRO_END
2596
2597 /*
2598 * What to do with the resulting page from vm_fault_page
2599 * if it doesn't get entered into the physical map:
2600 */
2601
2602 #define RELEASE_PAGE(m) \
2603 MACRO_BEGIN \
2604 PAGE_WAKEUP_DONE(m); \
2605 vm_page_lock_queues(); \
2606 if (!m->active && !m->inactive) \
2607 vm_page_activate(m); \
2608 vm_page_unlock_queues(); \
2609 MACRO_END
2610
2611 /*
2612 * We must verify that the maps have not changed
2613 * since our last lookup.
2614 */
2615
2616 if(m != VM_PAGE_NULL) {
2617 old_copy_object = m->object->copy;
2618
2619 vm_object_unlock(m->object);
2620 } else {
2621 old_copy_object = VM_OBJECT_NULL;
2622 }
2623 if ((map != original_map) || !vm_map_verify(map, &version)) {
2624 vm_object_t retry_object;
2625 vm_object_offset_t retry_offset;
2626 vm_prot_t retry_prot;
2627
2628 /*
2629 * To avoid trying to write_lock the map while another
2630 * thread has it read_locked (in vm_map_pageable), we
2631 * do not try for write permission. If the page is
2632 * still writable, we will get write permission. If it
2633 * is not, or has been marked needs_copy, we enter the
2634 * mapping without write permission, and will merely
2635 * take another fault.
2636 */
2637 map = original_map;
2638 vm_map_lock_read(map);
2639 kr = vm_map_lookup_locked(&map, vaddr,
2640 fault_type & ~VM_PROT_WRITE, &version,
2641 &retry_object, &retry_offset, &retry_prot,
2642 &wired, &behavior, &lo_offset, &hi_offset,
2643 &pmap_map);
2644 pmap = pmap_map->pmap;
2645
2646 if (kr != KERN_SUCCESS) {
2647 vm_map_unlock_read(map);
2648 if(m != VM_PAGE_NULL) {
2649 vm_object_lock(m->object);
2650 RELEASE_PAGE(m);
2651 UNLOCK_AND_DEALLOCATE;
2652 } else {
2653 vm_object_deallocate(object);
2654 }
2655 goto done;
2656 }
2657
2658 vm_object_unlock(retry_object);
2659 if(m != VM_PAGE_NULL) {
2660 vm_object_lock(m->object);
2661 } else {
2662 vm_object_lock(object);
2663 }
2664
2665 if ((retry_object != object) ||
2666 (retry_offset != offset)) {
2667 vm_map_unlock_read(map);
2668 if(pmap_map != map)
2669 vm_map_unlock(pmap_map);
2670 if(m != VM_PAGE_NULL) {
2671 RELEASE_PAGE(m);
2672 UNLOCK_AND_DEALLOCATE;
2673 } else {
2674 vm_object_deallocate(object);
2675 }
2676 goto RetryFault;
2677 }
2678
2679 /*
2680 * Check whether the protection has changed or the object
2681 * has been copied while we left the map unlocked.
2682 */
2683 prot &= retry_prot;
2684 if(m != VM_PAGE_NULL) {
2685 vm_object_unlock(m->object);
2686 } else {
2687 vm_object_unlock(object);
2688 }
2689 }
2690 if(m != VM_PAGE_NULL) {
2691 vm_object_lock(m->object);
2692 } else {
2693 vm_object_lock(object);
2694 }
2695
2696 /*
2697 * If the copy object changed while the top-level object
2698 * was unlocked, then we must take away write permission.
2699 */
2700
2701 if(m != VM_PAGE_NULL) {
2702 if (m->object->copy != old_copy_object)
2703 prot &= ~VM_PROT_WRITE;
2704 }
2705
2706 /*
2707 * If we want to wire down this page, but no longer have
2708 * adequate permissions, we must start all over.
2709 */
2710
2711 if (wired && (fault_type != (prot|VM_PROT_WRITE))) {
2712 vm_map_verify_done(map, &version);
2713 if(pmap_map != map)
2714 vm_map_unlock(pmap_map);
2715 if(m != VM_PAGE_NULL) {
2716 RELEASE_PAGE(m);
2717 UNLOCK_AND_DEALLOCATE;
2718 } else {
2719 vm_object_deallocate(object);
2720 }
2721 goto RetryFault;
2722 }
2723
2724 /*
2725 * Put this page into the physical map.
2726 * We had to do the unlock above because pmap_enter
2727 * may cause other faults. The page may be on
2728 * the pageout queues. If the pageout daemon comes
2729 * across the page, it will remove it from the queues.
2730 */
2731 if (m != VM_PAGE_NULL) {
2732 if (m->no_isync == TRUE) {
2733 pmap_sync_caches_phys(m->phys_addr);
2734
2735 m->no_isync = FALSE;
2736 }
2737 vm_object_unlock(m->object);
2738
2739 PMAP_ENTER(pmap, vaddr, m, prot, wired);
2740 {
2741 tws_hash_line_t line;
2742 task_t task;
2743
2744 task = current_task();
2745 if((map != NULL) &&
2746 (task->dynamic_working_set != 0)) {
2747 if(tws_lookup
2748 ((tws_hash_t)
2749 task->dynamic_working_set,
2750 m->offset, m->object,
2751 &line) != KERN_SUCCESS) {
2752 tws_insert((tws_hash_t)
2753 task->dynamic_working_set,
2754 m->offset, m->object,
2755 vaddr, pmap_map);
2756 if(tws_insert((tws_hash_t)
2757 task->dynamic_working_set,
2758 m->offset, m->object,
2759 vaddr, pmap_map)
2760 == KERN_NO_SPACE) {
2761 tws_expand_working_set(
2762 task->dynamic_working_set,
2763 TWS_HASH_LINE_COUNT);
2764 }
2765 }
2766 }
2767 }
2768 } else {
2769
2770 /* if __ppc__ not working until figure out phys copy on block maps */
2771 #ifdef notdefcdy
2772 int memattr;
2773 struct phys_entry *pp;
2774 /*
2775 * do a pmap block mapping from the physical address
2776 * in the object
2777 */
2778 if(pp = pmap_find_physentry(
2779 (vm_offset_t)object->shadow_offset)) {
2780 memattr = ((pp->pte1 & 0x00000078) >> 3);
2781 } else {
2782 memattr = PTE_WIMG_UNCACHED_COHERENT_GUARDED;
2783 }
2784
2785 pmap_map_block(pmap, vaddr,
2786 (vm_offset_t)object->shadow_offset,
2787 object->size, prot,
2788 memattr, 0); /* Set up a block mapped area */
2789 //#else
2790 vm_offset_t off;
2791 for (off = 0; off < object->size; off += page_size) {
2792 pmap_enter(pmap, vaddr + off,
2793 object->shadow_offset + off, prot, TRUE);
2794 /* Map it in */
2795 }
2796 #endif
2797
2798 }
2799
2800 /*
2801 * If the page is not wired down and isn't already
2802 * on a pageout queue, then put it where the
2803 * pageout daemon can find it.
2804 */
2805 if(m != VM_PAGE_NULL) {
2806 vm_object_lock(m->object);
2807 vm_page_lock_queues();
2808
2809 if (change_wiring) {
2810 if (wired)
2811 vm_page_wire(m);
2812 else
2813 vm_page_unwire(m);
2814 }
2815 #if VM_FAULT_STATIC_CONFIG
2816 else {
2817 if (!m->active && !m->inactive)
2818 vm_page_activate(m);
2819 m->reference = TRUE;
2820 }
2821 #else
2822 else if (software_reference_bits) {
2823 if (!m->active && !m->inactive)
2824 vm_page_activate(m);
2825 m->reference = TRUE;
2826 } else {
2827 vm_page_activate(m);
2828 }
2829 #endif
2830 vm_page_unlock_queues();
2831 }
2832
2833 /*
2834 * Unlock everything, and return
2835 */
2836
2837 vm_map_verify_done(map, &version);
2838 if(pmap_map != map)
2839 vm_map_unlock(pmap_map);
2840 if(m != VM_PAGE_NULL) {
2841 PAGE_WAKEUP_DONE(m);
2842 UNLOCK_AND_DEALLOCATE;
2843 } else {
2844 vm_fault_cleanup(object, top_page);
2845 vm_object_deallocate(object);
2846 }
2847 kr = KERN_SUCCESS;
2848
2849 #undef UNLOCK_AND_DEALLOCATE
2850 #undef RELEASE_PAGE
2851
2852 done:
2853 if (funnel_set) {
2854 thread_funnel_set( curflock, TRUE);
2855 funnel_set = FALSE;
2856 }
2857 cur_thread->interruptible = interruptible_state;
2858
2859 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2860 vaddr,
2861 type_of_fault,
2862 kr,
2863 0,
2864 0);
2865 return(kr);
2866 }
2867
2868 /*
2869 * vm_fault_wire:
2870 *
2871 * Wire down a range of virtual addresses in a map.
2872 */
2873 kern_return_t
2874 vm_fault_wire(
2875 vm_map_t map,
2876 vm_map_entry_t entry,
2877 pmap_t pmap)
2878 {
2879
2880 register vm_offset_t va;
2881 register vm_offset_t end_addr = entry->vme_end;
2882 register kern_return_t rc;
2883
2884 assert(entry->in_transition);
2885
2886 /*
2887 * Inform the physical mapping system that the
2888 * range of addresses may not fault, so that
2889 * page tables and such can be locked down as well.
2890 */
2891
2892 pmap_pageable(pmap, entry->vme_start, end_addr, FALSE);
2893
2894 /*
2895 * We simulate a fault to get the page and enter it
2896 * in the physical map.
2897 */
2898
2899 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
2900 if ((rc = vm_fault_wire_fast(
2901 map, va, entry, pmap)) != KERN_SUCCESS) {
2902 rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
2903 (pmap == kernel_pmap) ? THREAD_UNINT : THREAD_ABORTSAFE);
2904 }
2905
2906 if (rc != KERN_SUCCESS) {
2907 struct vm_map_entry tmp_entry = *entry;
2908
2909 /* unwire wired pages */
2910 tmp_entry.vme_end = va;
2911 vm_fault_unwire(map, &tmp_entry, FALSE, pmap);
2912
2913 return rc;
2914 }
2915 }
2916 return KERN_SUCCESS;
2917 }
2918
2919 /*
2920 * vm_fault_unwire:
2921 *
2922 * Unwire a range of virtual addresses in a map.
2923 */
2924 void
2925 vm_fault_unwire(
2926 vm_map_t map,
2927 vm_map_entry_t entry,
2928 boolean_t deallocate,
2929 pmap_t pmap)
2930 {
2931 register vm_offset_t va;
2932 register vm_offset_t end_addr = entry->vme_end;
2933 vm_object_t object;
2934
2935 object = (entry->is_sub_map)
2936 ? VM_OBJECT_NULL : entry->object.vm_object;
2937
2938 /*
2939 * Since the pages are wired down, we must be able to
2940 * get their mappings from the physical map system.
2941 */
2942
2943 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
2944 pmap_change_wiring(pmap, va, FALSE);
2945
2946 if (object == VM_OBJECT_NULL) {
2947 (void) vm_fault(map, va, VM_PROT_NONE, TRUE, THREAD_UNINT);
2948 } else {
2949 vm_prot_t prot;
2950 vm_page_t result_page;
2951 vm_page_t top_page;
2952 vm_object_t result_object;
2953 vm_fault_return_t result;
2954
2955 do {
2956 prot = VM_PROT_NONE;
2957
2958 vm_object_lock(object);
2959 vm_object_paging_begin(object);
2960 XPR(XPR_VM_FAULT,
2961 "vm_fault_unwire -> vm_fault_page\n",
2962 0,0,0,0,0);
2963 result = vm_fault_page(object,
2964 entry->offset +
2965 (va - entry->vme_start),
2966 VM_PROT_NONE, TRUE,
2967 THREAD_UNINT,
2968 entry->offset,
2969 entry->offset +
2970 (entry->vme_end
2971 - entry->vme_start),
2972 entry->behavior,
2973 &prot,
2974 &result_page,
2975 &top_page,
2976 (int *)0,
2977 0, map->no_zero_fill,
2978 FALSE, NULL, 0);
2979 } while (result == VM_FAULT_RETRY);
2980
2981 if (result != VM_FAULT_SUCCESS)
2982 panic("vm_fault_unwire: failure");
2983
2984 result_object = result_page->object;
2985 if (deallocate) {
2986 assert(!result_page->fictitious);
2987 pmap_page_protect(result_page->phys_addr,
2988 VM_PROT_NONE);
2989 VM_PAGE_FREE(result_page);
2990 } else {
2991 vm_page_lock_queues();
2992 vm_page_unwire(result_page);
2993 vm_page_unlock_queues();
2994 PAGE_WAKEUP_DONE(result_page);
2995 }
2996
2997 vm_fault_cleanup(result_object, top_page);
2998 }
2999 }
3000
3001 /*
3002 * Inform the physical mapping system that the range
3003 * of addresses may fault, so that page tables and
3004 * such may be unwired themselves.
3005 */
3006
3007 pmap_pageable(pmap, entry->vme_start, end_addr, TRUE);
3008
3009 }
3010
3011 /*
3012 * vm_fault_wire_fast:
3013 *
3014 * Handle common case of a wire down page fault at the given address.
3015 * If successful, the page is inserted into the associated physical map.
3016 * The map entry is passed in to avoid the overhead of a map lookup.
3017 *
3018 * NOTE: the given address should be truncated to the
3019 * proper page address.
3020 *
3021 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3022 * a standard error specifying why the fault is fatal is returned.
3023 *
3024 * The map in question must be referenced, and remains so.
3025 * Caller has a read lock on the map.
3026 *
3027 * This is a stripped version of vm_fault() for wiring pages. Anything
3028 * other than the common case will return KERN_FAILURE, and the caller
3029 * is expected to call vm_fault().
3030 */
3031 kern_return_t
3032 vm_fault_wire_fast(
3033 vm_map_t map,
3034 vm_offset_t va,
3035 vm_map_entry_t entry,
3036 pmap_t pmap)
3037 {
3038 vm_object_t object;
3039 vm_object_offset_t offset;
3040 register vm_page_t m;
3041 vm_prot_t prot;
3042 thread_act_t thr_act;
3043
3044 VM_STAT(faults++);
3045
3046 if((thr_act=current_act()) && (thr_act->task != TASK_NULL))
3047 thr_act->task->faults++;
3048
3049 /*
3050 * Recovery actions
3051 */
3052
3053 #undef RELEASE_PAGE
3054 #define RELEASE_PAGE(m) { \
3055 PAGE_WAKEUP_DONE(m); \
3056 vm_page_lock_queues(); \
3057 vm_page_unwire(m); \
3058 vm_page_unlock_queues(); \
3059 }
3060
3061
3062 #undef UNLOCK_THINGS
3063 #define UNLOCK_THINGS { \
3064 object->paging_in_progress--; \
3065 vm_object_unlock(object); \
3066 }
3067
3068 #undef UNLOCK_AND_DEALLOCATE
3069 #define UNLOCK_AND_DEALLOCATE { \
3070 UNLOCK_THINGS; \
3071 vm_object_deallocate(object); \
3072 }
3073 /*
3074 * Give up and have caller do things the hard way.
3075 */
3076
3077 #define GIVE_UP { \
3078 UNLOCK_AND_DEALLOCATE; \
3079 return(KERN_FAILURE); \
3080 }
3081
3082
3083 /*
3084 * If this entry is not directly to a vm_object, bail out.
3085 */
3086 if (entry->is_sub_map)
3087 return(KERN_FAILURE);
3088
3089 /*
3090 * Find the backing store object and offset into it.
3091 */
3092
3093 object = entry->object.vm_object;
3094 offset = (va - entry->vme_start) + entry->offset;
3095 prot = entry->protection;
3096
3097 /*
3098 * Make a reference to this object to prevent its
3099 * disposal while we are messing with it.
3100 */
3101
3102 vm_object_lock(object);
3103 assert(object->ref_count > 0);
3104 object->ref_count++;
3105 vm_object_res_reference(object);
3106 object->paging_in_progress++;
3107
3108 /*
3109 * INVARIANTS (through entire routine):
3110 *
3111 * 1) At all times, we must either have the object
3112 * lock or a busy page in some object to prevent
3113 * some other thread from trying to bring in
3114 * the same page.
3115 *
3116 * 2) Once we have a busy page, we must remove it from
3117 * the pageout queues, so that the pageout daemon
3118 * will not grab it away.
3119 *
3120 */
3121
3122 /*
3123 * Look for page in top-level object. If it's not there or
3124 * there's something going on, give up.
3125 */
3126 m = vm_page_lookup(object, offset);
3127 if ((m == VM_PAGE_NULL) || (m->busy) ||
3128 (m->unusual && ( m->error || m->restart || m->absent ||
3129 prot & m->page_lock))) {
3130
3131 GIVE_UP;
3132 }
3133
3134 /*
3135 * Wire the page down now. All bail outs beyond this
3136 * point must unwire the page.
3137 */
3138
3139 vm_page_lock_queues();
3140 vm_page_wire(m);
3141 vm_page_unlock_queues();
3142
3143 /*
3144 * Mark page busy for other threads.
3145 */
3146 assert(!m->busy);
3147 m->busy = TRUE;
3148 assert(!m->absent);
3149
3150 /*
3151 * Give up if the page is being written and there's a copy object
3152 */
3153 if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
3154 RELEASE_PAGE(m);
3155 GIVE_UP;
3156 }
3157
3158 /*
3159 * Put this page into the physical map.
3160 * We have to unlock the object because pmap_enter
3161 * may cause other faults.
3162 */
3163 if (m->no_isync == TRUE) {
3164 pmap_sync_caches_phys(m->phys_addr);
3165
3166 m->no_isync = FALSE;
3167 }
3168 vm_object_unlock(object);
3169
3170 PMAP_ENTER(pmap, va, m, prot, TRUE);
3171
3172 /*
3173 * Must relock object so that paging_in_progress can be cleared.
3174 */
3175 vm_object_lock(object);
3176
3177 /*
3178 * Unlock everything, and return
3179 */
3180
3181 PAGE_WAKEUP_DONE(m);
3182 UNLOCK_AND_DEALLOCATE;
3183
3184 return(KERN_SUCCESS);
3185
3186 }
3187
3188 /*
3189 * Routine: vm_fault_copy_cleanup
3190 * Purpose:
3191 * Release a page used by vm_fault_copy.
3192 */
3193
3194 void
3195 vm_fault_copy_cleanup(
3196 vm_page_t page,
3197 vm_page_t top_page)
3198 {
3199 vm_object_t object = page->object;
3200
3201 vm_object_lock(object);
3202 PAGE_WAKEUP_DONE(page);
3203 vm_page_lock_queues();
3204 if (!page->active && !page->inactive)
3205 vm_page_activate(page);
3206 vm_page_unlock_queues();
3207 vm_fault_cleanup(object, top_page);
3208 }
3209
3210 void
3211 vm_fault_copy_dst_cleanup(
3212 vm_page_t page)
3213 {
3214 vm_object_t object;
3215
3216 if (page != VM_PAGE_NULL) {
3217 object = page->object;
3218 vm_object_lock(object);
3219 vm_page_lock_queues();
3220 vm_page_unwire(page);
3221 vm_page_unlock_queues();
3222 vm_object_paging_end(object);
3223 vm_object_unlock(object);
3224 }
3225 }
3226
3227 /*
3228 * Routine: vm_fault_copy
3229 *
3230 * Purpose:
3231 * Copy pages from one virtual memory object to another --
3232 * neither the source nor destination pages need be resident.
3233 *
3234 * Before actually copying a page, the version associated with
3235 * the destination address map wil be verified.
3236 *
3237 * In/out conditions:
3238 * The caller must hold a reference, but not a lock, to
3239 * each of the source and destination objects and to the
3240 * destination map.
3241 *
3242 * Results:
3243 * Returns KERN_SUCCESS if no errors were encountered in
3244 * reading or writing the data. Returns KERN_INTERRUPTED if
3245 * the operation was interrupted (only possible if the
3246 * "interruptible" argument is asserted). Other return values
3247 * indicate a permanent error in copying the data.
3248 *
3249 * The actual amount of data copied will be returned in the
3250 * "copy_size" argument. In the event that the destination map
3251 * verification failed, this amount may be less than the amount
3252 * requested.
3253 */
3254 kern_return_t
3255 vm_fault_copy(
3256 vm_object_t src_object,
3257 vm_object_offset_t src_offset,
3258 vm_size_t *src_size, /* INOUT */
3259 vm_object_t dst_object,
3260 vm_object_offset_t dst_offset,
3261 vm_map_t dst_map,
3262 vm_map_version_t *dst_version,
3263 int interruptible)
3264 {
3265 vm_page_t result_page;
3266
3267 vm_page_t src_page;
3268 vm_page_t src_top_page;
3269 vm_prot_t src_prot;
3270
3271 vm_page_t dst_page;
3272 vm_page_t dst_top_page;
3273 vm_prot_t dst_prot;
3274
3275 vm_size_t amount_left;
3276 vm_object_t old_copy_object;
3277 kern_return_t error = 0;
3278
3279 vm_size_t part_size;
3280
3281 /*
3282 * In order not to confuse the clustered pageins, align
3283 * the different offsets on a page boundary.
3284 */
3285 vm_object_offset_t src_lo_offset = trunc_page_64(src_offset);
3286 vm_object_offset_t dst_lo_offset = trunc_page_64(dst_offset);
3287 vm_object_offset_t src_hi_offset = round_page_64(src_offset + *src_size);
3288 vm_object_offset_t dst_hi_offset = round_page_64(dst_offset + *src_size);
3289
3290 #define RETURN(x) \
3291 MACRO_BEGIN \
3292 *src_size -= amount_left; \
3293 MACRO_RETURN(x); \
3294 MACRO_END
3295
3296 amount_left = *src_size;
3297 do { /* while (amount_left > 0) */
3298 /*
3299 * There may be a deadlock if both source and destination
3300 * pages are the same. To avoid this deadlock, the copy must
3301 * start by getting the destination page in order to apply
3302 * COW semantics if any.
3303 */
3304
3305 RetryDestinationFault: ;
3306
3307 dst_prot = VM_PROT_WRITE|VM_PROT_READ;
3308
3309 vm_object_lock(dst_object);
3310 vm_object_paging_begin(dst_object);
3311
3312 XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3313 switch (vm_fault_page(dst_object,
3314 trunc_page_64(dst_offset),
3315 VM_PROT_WRITE|VM_PROT_READ,
3316 FALSE,
3317 interruptible,
3318 dst_lo_offset,
3319 dst_hi_offset,
3320 VM_BEHAVIOR_SEQUENTIAL,
3321 &dst_prot,
3322 &dst_page,
3323 &dst_top_page,
3324 (int *)0,
3325 &error,
3326 dst_map->no_zero_fill,
3327 FALSE, NULL, 0)) {
3328 case VM_FAULT_SUCCESS:
3329 break;
3330 case VM_FAULT_RETRY:
3331 goto RetryDestinationFault;
3332 case VM_FAULT_MEMORY_SHORTAGE:
3333 if (vm_page_wait(interruptible))
3334 goto RetryDestinationFault;
3335 /* fall thru */
3336 case VM_FAULT_INTERRUPTED:
3337 RETURN(MACH_SEND_INTERRUPTED);
3338 case VM_FAULT_FICTITIOUS_SHORTAGE:
3339 vm_page_more_fictitious();
3340 goto RetryDestinationFault;
3341 case VM_FAULT_MEMORY_ERROR:
3342 if (error)
3343 return (error);
3344 else
3345 return(KERN_MEMORY_ERROR);
3346 }
3347 assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
3348
3349 old_copy_object = dst_page->object->copy;
3350
3351 /*
3352 * There exists the possiblity that the source and
3353 * destination page are the same. But we can't
3354 * easily determine that now. If they are the
3355 * same, the call to vm_fault_page() for the
3356 * destination page will deadlock. To prevent this we
3357 * wire the page so we can drop busy without having
3358 * the page daemon steal the page. We clean up the
3359 * top page but keep the paging reference on the object
3360 * holding the dest page so it doesn't go away.
3361 */
3362
3363 vm_page_lock_queues();
3364 vm_page_wire(dst_page);
3365 vm_page_unlock_queues();
3366 PAGE_WAKEUP_DONE(dst_page);
3367 vm_object_unlock(dst_page->object);
3368
3369 if (dst_top_page != VM_PAGE_NULL) {
3370 vm_object_lock(dst_object);
3371 VM_PAGE_FREE(dst_top_page);
3372 vm_object_paging_end(dst_object);
3373 vm_object_unlock(dst_object);
3374 }
3375
3376 RetrySourceFault: ;
3377
3378 if (src_object == VM_OBJECT_NULL) {
3379 /*
3380 * No source object. We will just
3381 * zero-fill the page in dst_object.
3382 */
3383 src_page = VM_PAGE_NULL;
3384 result_page = VM_PAGE_NULL;
3385 } else {
3386 vm_object_lock(src_object);
3387 src_page = vm_page_lookup(src_object,
3388 trunc_page_64(src_offset));
3389 if (src_page == dst_page) {
3390 src_prot = dst_prot;
3391 result_page = VM_PAGE_NULL;
3392 } else {
3393 src_prot = VM_PROT_READ;
3394 vm_object_paging_begin(src_object);
3395
3396 XPR(XPR_VM_FAULT,
3397 "vm_fault_copy(2) -> vm_fault_page\n",
3398 0,0,0,0,0);
3399 switch (vm_fault_page(src_object,
3400 trunc_page_64(src_offset),
3401 VM_PROT_READ,
3402 FALSE,
3403 interruptible,
3404 src_lo_offset,
3405 src_hi_offset,
3406 VM_BEHAVIOR_SEQUENTIAL,
3407 &src_prot,
3408 &result_page,
3409 &src_top_page,
3410 (int *)0,
3411 &error,
3412 FALSE,
3413 FALSE, NULL, 0)) {
3414
3415 case VM_FAULT_SUCCESS:
3416 break;
3417 case VM_FAULT_RETRY:
3418 goto RetrySourceFault;
3419 case VM_FAULT_MEMORY_SHORTAGE:
3420 if (vm_page_wait(interruptible))
3421 goto RetrySourceFault;
3422 /* fall thru */
3423 case VM_FAULT_INTERRUPTED:
3424 vm_fault_copy_dst_cleanup(dst_page);
3425 RETURN(MACH_SEND_INTERRUPTED);
3426 case VM_FAULT_FICTITIOUS_SHORTAGE:
3427 vm_page_more_fictitious();
3428 goto RetrySourceFault;
3429 case VM_FAULT_MEMORY_ERROR:
3430 vm_fault_copy_dst_cleanup(dst_page);
3431 if (error)
3432 return (error);
3433 else
3434 return(KERN_MEMORY_ERROR);
3435 }
3436
3437
3438 assert((src_top_page == VM_PAGE_NULL) ==
3439 (result_page->object == src_object));
3440 }
3441 assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
3442 vm_object_unlock(result_page->object);
3443 }
3444
3445 if (!vm_map_verify(dst_map, dst_version)) {
3446 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3447 vm_fault_copy_cleanup(result_page, src_top_page);
3448 vm_fault_copy_dst_cleanup(dst_page);
3449 break;
3450 }
3451
3452 vm_object_lock(dst_page->object);
3453
3454 if (dst_page->object->copy != old_copy_object) {
3455 vm_object_unlock(dst_page->object);
3456 vm_map_verify_done(dst_map, dst_version);
3457 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3458 vm_fault_copy_cleanup(result_page, src_top_page);
3459 vm_fault_copy_dst_cleanup(dst_page);
3460 break;
3461 }
3462 vm_object_unlock(dst_page->object);
3463
3464 /*
3465 * Copy the page, and note that it is dirty
3466 * immediately.
3467 */
3468
3469 if (!page_aligned(src_offset) ||
3470 !page_aligned(dst_offset) ||
3471 !page_aligned(amount_left)) {
3472
3473 vm_object_offset_t src_po,
3474 dst_po;
3475
3476 src_po = src_offset - trunc_page_64(src_offset);
3477 dst_po = dst_offset - trunc_page_64(dst_offset);
3478
3479 if (dst_po > src_po) {
3480 part_size = PAGE_SIZE - dst_po;
3481 } else {
3482 part_size = PAGE_SIZE - src_po;
3483 }
3484 if (part_size > (amount_left)){
3485 part_size = amount_left;
3486 }
3487
3488 if (result_page == VM_PAGE_NULL) {
3489 vm_page_part_zero_fill(dst_page,
3490 dst_po, part_size);
3491 } else {
3492 vm_page_part_copy(result_page, src_po,
3493 dst_page, dst_po, part_size);
3494 if(!dst_page->dirty){
3495 vm_object_lock(dst_object);
3496 dst_page->dirty = TRUE;
3497 vm_object_unlock(dst_page->object);
3498 }
3499
3500 }
3501 } else {
3502 part_size = PAGE_SIZE;
3503
3504 if (result_page == VM_PAGE_NULL)
3505 vm_page_zero_fill(dst_page);
3506 else{
3507 vm_page_copy(result_page, dst_page);
3508 if(!dst_page->dirty){
3509 vm_object_lock(dst_object);
3510 dst_page->dirty = TRUE;
3511 vm_object_unlock(dst_page->object);
3512 }
3513 }
3514
3515 }
3516
3517 /*
3518 * Unlock everything, and return
3519 */
3520
3521 vm_map_verify_done(dst_map, dst_version);
3522
3523 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3524 vm_fault_copy_cleanup(result_page, src_top_page);
3525 vm_fault_copy_dst_cleanup(dst_page);
3526
3527 amount_left -= part_size;
3528 src_offset += part_size;
3529 dst_offset += part_size;
3530 } while (amount_left > 0);
3531
3532 RETURN(KERN_SUCCESS);
3533 #undef RETURN
3534
3535 /*NOTREACHED*/
3536 }
3537
3538 #ifdef notdef
3539
3540 /*
3541 * Routine: vm_fault_page_overwrite
3542 *
3543 * Description:
3544 * A form of vm_fault_page that assumes that the
3545 * resulting page will be overwritten in its entirety,
3546 * making it unnecessary to obtain the correct *contents*
3547 * of the page.
3548 *
3549 * Implementation:
3550 * XXX Untested. Also unused. Eventually, this technology
3551 * could be used in vm_fault_copy() to advantage.
3552 */
3553 vm_fault_return_t
3554 vm_fault_page_overwrite(
3555 register
3556 vm_object_t dst_object,
3557 vm_object_offset_t dst_offset,
3558 vm_page_t *result_page) /* OUT */
3559 {
3560 register
3561 vm_page_t dst_page;
3562 kern_return_t wait_result;
3563
3564 #define interruptible THREAD_UNINT /* XXX */
3565
3566 while (TRUE) {
3567 /*
3568 * Look for a page at this offset
3569 */
3570
3571 while ((dst_page = vm_page_lookup(dst_object, dst_offset))
3572 == VM_PAGE_NULL) {
3573 /*
3574 * No page, no problem... just allocate one.
3575 */
3576
3577 dst_page = vm_page_alloc(dst_object, dst_offset);
3578 if (dst_page == VM_PAGE_NULL) {
3579 vm_object_unlock(dst_object);
3580 VM_PAGE_WAIT();
3581 vm_object_lock(dst_object);
3582 continue;
3583 }
3584
3585 /*
3586 * Pretend that the memory manager
3587 * write-protected the page.
3588 *
3589 * Note that we will be asking for write
3590 * permission without asking for the data
3591 * first.
3592 */
3593
3594 dst_page->overwriting = TRUE;
3595 dst_page->page_lock = VM_PROT_WRITE;
3596 dst_page->absent = TRUE;
3597 dst_page->unusual = TRUE;
3598 dst_object->absent_count++;
3599
3600 break;
3601
3602 /*
3603 * When we bail out, we might have to throw
3604 * away the page created here.
3605 */
3606
3607 #define DISCARD_PAGE \
3608 MACRO_BEGIN \
3609 vm_object_lock(dst_object); \
3610 dst_page = vm_page_lookup(dst_object, dst_offset); \
3611 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3612 VM_PAGE_FREE(dst_page); \
3613 vm_object_unlock(dst_object); \
3614 MACRO_END
3615 }
3616
3617 /*
3618 * If the page is write-protected...
3619 */
3620
3621 if (dst_page->page_lock & VM_PROT_WRITE) {
3622 /*
3623 * ... and an unlock request hasn't been sent
3624 */
3625
3626 if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
3627 vm_prot_t u;
3628 kern_return_t rc;
3629
3630 /*
3631 * ... then send one now.
3632 */
3633
3634 if (!dst_object->pager_ready) {
3635 vm_object_assert_wait(dst_object,
3636 VM_OBJECT_EVENT_PAGER_READY,
3637 interruptible);
3638 vm_object_unlock(dst_object);
3639 wait_result = thread_block((void (*)(void))0);
3640 if (wait_result != THREAD_AWAKENED) {
3641 DISCARD_PAGE;
3642 return(VM_FAULT_INTERRUPTED);
3643 }
3644 continue;
3645 }
3646
3647 u = dst_page->unlock_request |= VM_PROT_WRITE;
3648 vm_object_unlock(dst_object);
3649
3650 if ((rc = memory_object_data_unlock(
3651 dst_object->pager,
3652 dst_offset + dst_object->paging_offset,
3653 PAGE_SIZE,
3654 u)) != KERN_SUCCESS) {
3655 if (vm_fault_debug)
3656 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3657 DISCARD_PAGE;
3658 return((rc == MACH_SEND_INTERRUPTED) ?
3659 VM_FAULT_INTERRUPTED :
3660 VM_FAULT_MEMORY_ERROR);
3661 }
3662 vm_object_lock(dst_object);
3663 continue;
3664 }
3665
3666 /* ... fall through to wait below */
3667 } else {
3668 /*
3669 * If the page isn't being used for other
3670 * purposes, then we're done.
3671 */
3672 if ( ! (dst_page->busy || dst_page->absent ||
3673 dst_page->error || dst_page->restart) )
3674 break;
3675 }
3676
3677 PAGE_ASSERT_WAIT(dst_page, interruptible);
3678 vm_object_unlock(dst_object);
3679 wait_result = thread_block((void (*)(void))0);
3680 if (wait_result != THREAD_AWAKENED) {
3681 DISCARD_PAGE;
3682 return(VM_FAULT_INTERRUPTED);
3683 }
3684 }
3685
3686 *result_page = dst_page;
3687 return(VM_FAULT_SUCCESS);
3688
3689 #undef interruptible
3690 #undef DISCARD_PAGE
3691 }
3692
3693 #endif /* notdef */
3694
3695 #if VM_FAULT_CLASSIFY
3696 /*
3697 * Temporary statistics gathering support.
3698 */
3699
3700 /*
3701 * Statistics arrays:
3702 */
3703 #define VM_FAULT_TYPES_MAX 5
3704 #define VM_FAULT_LEVEL_MAX 8
3705
3706 int vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
3707
3708 #define VM_FAULT_TYPE_ZERO_FILL 0
3709 #define VM_FAULT_TYPE_MAP_IN 1
3710 #define VM_FAULT_TYPE_PAGER 2
3711 #define VM_FAULT_TYPE_COPY 3
3712 #define VM_FAULT_TYPE_OTHER 4
3713
3714
3715 void
3716 vm_fault_classify(vm_object_t object,
3717 vm_object_offset_t offset,
3718 vm_prot_t fault_type)
3719 {
3720 int type, level = 0;
3721 vm_page_t m;
3722
3723 while (TRUE) {
3724 m = vm_page_lookup(object, offset);
3725 if (m != VM_PAGE_NULL) {
3726 if (m->busy || m->error || m->restart || m->absent ||
3727 fault_type & m->page_lock) {
3728 type = VM_FAULT_TYPE_OTHER;
3729 break;
3730 }
3731 if (((fault_type & VM_PROT_WRITE) == 0) ||
3732 ((level == 0) && object->copy == VM_OBJECT_NULL)) {
3733 type = VM_FAULT_TYPE_MAP_IN;
3734 break;
3735 }
3736 type = VM_FAULT_TYPE_COPY;
3737 break;
3738 }
3739 else {
3740 if (object->pager_created) {
3741 type = VM_FAULT_TYPE_PAGER;
3742 break;
3743 }
3744 if (object->shadow == VM_OBJECT_NULL) {
3745 type = VM_FAULT_TYPE_ZERO_FILL;
3746 break;
3747 }
3748
3749 offset += object->shadow_offset;
3750 object = object->shadow;
3751 level++;
3752 continue;
3753 }
3754 }
3755
3756 if (level > VM_FAULT_LEVEL_MAX)
3757 level = VM_FAULT_LEVEL_MAX;
3758
3759 vm_fault_stats[type][level] += 1;
3760
3761 return;
3762 }
3763
3764 /* cleanup routine to call from debugger */
3765
3766 void
3767 vm_fault_classify_init(void)
3768 {
3769 int type, level;
3770
3771 for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
3772 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
3773 vm_fault_stats[type][level] = 0;
3774 }
3775 }
3776
3777 return;
3778 }
3779 #endif /* VM_FAULT_CLASSIFY */