]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_fault.c
72d3061c97b1f725e30927576a175cec5d2aa7fd
[apple/xnu.git] / osfmk / vm / vm_fault.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_COPYRIGHT@
24 */
25 /*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50 /*
51 */
52 /*
53 * File: vm_fault.c
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
55 *
56 * Page fault handling module.
57 */
58 #ifdef MACH_BSD
59 /* remove after component interface available */
60 extern int vnode_pager_workaround;
61 extern int device_pager_workaround;
62 #endif
63
64 #include <mach_cluster_stats.h>
65 #include <mach_pagemap.h>
66 #include <mach_kdb.h>
67
68 #include <vm/vm_fault.h>
69 #include <mach/kern_return.h>
70 #include <mach/message.h> /* for error codes */
71 #include <kern/host_statistics.h>
72 #include <kern/counters.h>
73 #include <kern/task.h>
74 #include <kern/thread.h>
75 #include <kern/sched_prim.h>
76 #include <kern/host.h>
77 #include <kern/xpr.h>
78 #include <ppc/proc_reg.h>
79 #include <ppc/pmap_internals.h>
80 #include <vm/task_working_set.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_object.h>
83 #include <vm/vm_page.h>
84 #include <vm/pmap.h>
85 #include <vm/vm_pageout.h>
86 #include <mach/vm_param.h>
87 #include <mach/vm_behavior.h>
88 #include <mach/memory_object.h>
89 /* For memory_object_data_{request,unlock} */
90 #include <kern/mach_param.h>
91 #include <kern/macro_help.h>
92 #include <kern/zalloc.h>
93 #include <kern/misc_protos.h>
94
95 #include <sys/kdebug.h>
96
97 #define VM_FAULT_CLASSIFY 0
98 #define VM_FAULT_STATIC_CONFIG 1
99
100 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
101
102 int vm_object_absent_max = 50;
103
104 int vm_fault_debug = 0;
105 boolean_t vm_page_deactivate_behind = TRUE;
106
107 vm_machine_attribute_val_t mv_cache_sync = MATTR_VAL_CACHE_SYNC;
108
109 #if !VM_FAULT_STATIC_CONFIG
110 boolean_t vm_fault_dirty_handling = FALSE;
111 boolean_t vm_fault_interruptible = FALSE;
112 boolean_t software_reference_bits = TRUE;
113 #endif
114
115 #if MACH_KDB
116 extern struct db_watchpoint *db_watchpoint_list;
117 #endif /* MACH_KDB */
118
119 /* Forward declarations of internal routines. */
120 extern kern_return_t vm_fault_wire_fast(
121 vm_map_t map,
122 vm_offset_t va,
123 vm_map_entry_t entry,
124 pmap_t pmap);
125
126 extern void vm_fault_continue(void);
127
128 extern void vm_fault_copy_cleanup(
129 vm_page_t page,
130 vm_page_t top_page);
131
132 extern void vm_fault_copy_dst_cleanup(
133 vm_page_t page);
134
135 #if VM_FAULT_CLASSIFY
136 extern void vm_fault_classify(vm_object_t object,
137 vm_object_offset_t offset,
138 vm_prot_t fault_type);
139
140 extern void vm_fault_classify_init(void);
141 #endif
142
143 /*
144 * Routine: vm_fault_init
145 * Purpose:
146 * Initialize our private data structures.
147 */
148 void
149 vm_fault_init(void)
150 {
151 }
152
153 /*
154 * Routine: vm_fault_cleanup
155 * Purpose:
156 * Clean up the result of vm_fault_page.
157 * Results:
158 * The paging reference for "object" is released.
159 * "object" is unlocked.
160 * If "top_page" is not null, "top_page" is
161 * freed and the paging reference for the object
162 * containing it is released.
163 *
164 * In/out conditions:
165 * "object" must be locked.
166 */
167 void
168 vm_fault_cleanup(
169 register vm_object_t object,
170 register vm_page_t top_page)
171 {
172 vm_object_paging_end(object);
173 vm_object_unlock(object);
174
175 if (top_page != VM_PAGE_NULL) {
176 object = top_page->object;
177 vm_object_lock(object);
178 VM_PAGE_FREE(top_page);
179 vm_object_paging_end(object);
180 vm_object_unlock(object);
181 }
182 }
183
184 #if MACH_CLUSTER_STATS
185 #define MAXCLUSTERPAGES 16
186 struct {
187 unsigned long pages_in_cluster;
188 unsigned long pages_at_higher_offsets;
189 unsigned long pages_at_lower_offsets;
190 } cluster_stats_in[MAXCLUSTERPAGES];
191 #define CLUSTER_STAT(clause) clause
192 #define CLUSTER_STAT_HIGHER(x) \
193 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
194 #define CLUSTER_STAT_LOWER(x) \
195 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
196 #define CLUSTER_STAT_CLUSTER(x) \
197 ((cluster_stats_in[(x)].pages_in_cluster)++)
198 #else /* MACH_CLUSTER_STATS */
199 #define CLUSTER_STAT(clause)
200 #endif /* MACH_CLUSTER_STATS */
201
202 /* XXX - temporary */
203 boolean_t vm_allow_clustered_pagein = FALSE;
204 int vm_pagein_cluster_used = 0;
205
206 /*
207 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
208 */
209 int vm_default_ahead = 1; /* Number of pages to prepage ahead */
210 int vm_default_behind = 0; /* Number of pages to prepage behind */
211
212 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
213
214 /*
215 * Routine: vm_fault_page
216 * Purpose:
217 * Find the resident page for the virtual memory
218 * specified by the given virtual memory object
219 * and offset.
220 * Additional arguments:
221 * The required permissions for the page is given
222 * in "fault_type". Desired permissions are included
223 * in "protection". The minimum and maximum valid offsets
224 * within the object for the relevant map entry are
225 * passed in "lo_offset" and "hi_offset" respectively and
226 * the expected page reference pattern is passed in "behavior".
227 * These three parameters are used to determine pagein cluster
228 * limits.
229 *
230 * If the desired page is known to be resident (for
231 * example, because it was previously wired down), asserting
232 * the "unwiring" parameter will speed the search.
233 *
234 * If the operation can be interrupted (by thread_abort
235 * or thread_terminate), then the "interruptible"
236 * parameter should be asserted.
237 *
238 * Results:
239 * The page containing the proper data is returned
240 * in "result_page".
241 *
242 * In/out conditions:
243 * The source object must be locked and referenced,
244 * and must donate one paging reference. The reference
245 * is not affected. The paging reference and lock are
246 * consumed.
247 *
248 * If the call succeeds, the object in which "result_page"
249 * resides is left locked and holding a paging reference.
250 * If this is not the original object, a busy page in the
251 * original object is returned in "top_page", to prevent other
252 * callers from pursuing this same data, along with a paging
253 * reference for the original object. The "top_page" should
254 * be destroyed when this guarantee is no longer required.
255 * The "result_page" is also left busy. It is not removed
256 * from the pageout queues.
257 */
258
259 vm_fault_return_t
260 vm_fault_page(
261 /* Arguments: */
262 vm_object_t first_object, /* Object to begin search */
263 vm_object_offset_t first_offset, /* Offset into object */
264 vm_prot_t fault_type, /* What access is requested */
265 boolean_t must_be_resident,/* Must page be resident? */
266 int interruptible, /* how may fault be interrupted? */
267 vm_object_offset_t lo_offset, /* Map entry start */
268 vm_object_offset_t hi_offset, /* Map entry end */
269 vm_behavior_t behavior, /* Page reference behavior */
270 /* Modifies in place: */
271 vm_prot_t *protection, /* Protection for mapping */
272 /* Returns: */
273 vm_page_t *result_page, /* Page found, if successful */
274 vm_page_t *top_page, /* Page in top object, if
275 * not result_page. */
276 int *type_of_fault, /* if non-null, fill in with type of fault
277 * COW, zero-fill, etc... returned in trace point */
278 /* More arguments: */
279 kern_return_t *error_code, /* code if page is in error */
280 boolean_t no_zero_fill, /* don't zero fill absent pages */
281 boolean_t data_supply, /* treat as data_supply if
282 * it is a write fault and a full
283 * page is provided */
284 vm_map_t map,
285 vm_offset_t vaddr)
286 {
287 register
288 vm_page_t m;
289 register
290 vm_object_t object;
291 register
292 vm_object_offset_t offset;
293 vm_page_t first_m;
294 vm_object_t next_object;
295 vm_object_t copy_object;
296 boolean_t look_for_page;
297 vm_prot_t access_required = fault_type;
298 vm_prot_t wants_copy_flag;
299 vm_size_t cluster_size, length;
300 vm_object_offset_t cluster_offset;
301 vm_object_offset_t cluster_start, cluster_end, paging_offset;
302 vm_object_offset_t align_offset;
303 CLUSTER_STAT(int pages_at_higher_offsets;)
304 CLUSTER_STAT(int pages_at_lower_offsets;)
305 kern_return_t wait_result;
306 thread_t cur_thread;
307 boolean_t interruptible_state;
308 boolean_t bumped_pagein = FALSE;
309
310
311 #if MACH_PAGEMAP
312 /*
313 * MACH page map - an optional optimization where a bit map is maintained
314 * by the VM subsystem for internal objects to indicate which pages of
315 * the object currently reside on backing store. This existence map
316 * duplicates information maintained by the vnode pager. It is
317 * created at the time of the first pageout against the object, i.e.
318 * at the same time pager for the object is created. The optimization
319 * is designed to eliminate pager interaction overhead, if it is
320 * 'known' that the page does not exist on backing store.
321 *
322 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
323 * either marked as paged out in the existence map for the object or no
324 * existence map exists for the object. LOOK_FOR() is one of the
325 * criteria in the decision to invoke the pager. It is also used as one
326 * of the criteria to terminate the scan for adjacent pages in a clustered
327 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
328 * permanent objects. Note also that if the pager for an internal object
329 * has not been created, the pager is not invoked regardless of the value
330 * of LOOK_FOR() and that clustered pagein scans are only done on an object
331 * for which a pager has been created.
332 *
333 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
334 * is marked as paged out in the existence map for the object. PAGED_OUT()
335 * PAGED_OUT() is used to determine if a page has already been pushed
336 * into a copy object in order to avoid a redundant page out operation.
337 */
338 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
339 != VM_EXTERNAL_STATE_ABSENT)
340 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
341 == VM_EXTERNAL_STATE_EXISTS)
342 #else /* MACH_PAGEMAP */
343 /*
344 * If the MACH page map optimization is not enabled,
345 * LOOK_FOR() always evaluates to TRUE. The pager will always be
346 * invoked to resolve missing pages in an object, assuming the pager
347 * has been created for the object. In a clustered page operation, the
348 * absence of a page on backing backing store cannot be used to terminate
349 * a scan for adjacent pages since that information is available only in
350 * the pager. Hence pages that may not be paged out are potentially
351 * included in a clustered request. The vnode pager is coded to deal
352 * with any combination of absent/present pages in a clustered
353 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
354 * will always be invoked to push a dirty page into a copy object assuming
355 * a pager has been created. If the page has already been pushed, the
356 * pager will ingore the new request.
357 */
358 #define LOOK_FOR(o, f) TRUE
359 #define PAGED_OUT(o, f) FALSE
360 #endif /* MACH_PAGEMAP */
361
362 /*
363 * Recovery actions
364 */
365 #define PREPARE_RELEASE_PAGE(m) \
366 MACRO_BEGIN \
367 vm_page_lock_queues(); \
368 MACRO_END
369
370 #define DO_RELEASE_PAGE(m) \
371 MACRO_BEGIN \
372 PAGE_WAKEUP_DONE(m); \
373 if (!m->active && !m->inactive) \
374 vm_page_activate(m); \
375 vm_page_unlock_queues(); \
376 MACRO_END
377
378 #define RELEASE_PAGE(m) \
379 MACRO_BEGIN \
380 PREPARE_RELEASE_PAGE(m); \
381 DO_RELEASE_PAGE(m); \
382 MACRO_END
383
384 #if TRACEFAULTPAGE
385 dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
386 #endif
387
388
389
390 #if !VM_FAULT_STATIC_CONFIG
391 if (vm_fault_dirty_handling
392 #if MACH_KDB
393 /*
394 * If there are watchpoints set, then
395 * we don't want to give away write permission
396 * on a read fault. Make the task write fault,
397 * so that the watchpoint code notices the access.
398 */
399 || db_watchpoint_list
400 #endif /* MACH_KDB */
401 ) {
402 /*
403 * If we aren't asking for write permission,
404 * then don't give it away. We're using write
405 * faults to set the dirty bit.
406 */
407 if (!(fault_type & VM_PROT_WRITE))
408 *protection &= ~VM_PROT_WRITE;
409 }
410
411 if (!vm_fault_interruptible)
412 interruptible = THREAD_UNINT;
413 #else /* STATIC_CONFIG */
414 #if MACH_KDB
415 /*
416 * If there are watchpoints set, then
417 * we don't want to give away write permission
418 * on a read fault. Make the task write fault,
419 * so that the watchpoint code notices the access.
420 */
421 if (db_watchpoint_list) {
422 /*
423 * If we aren't asking for write permission,
424 * then don't give it away. We're using write
425 * faults to set the dirty bit.
426 */
427 if (!(fault_type & VM_PROT_WRITE))
428 *protection &= ~VM_PROT_WRITE;
429 }
430
431 #endif /* MACH_KDB */
432 #endif /* STATIC_CONFIG */
433
434 cur_thread = current_thread();
435
436 interruptible_state = cur_thread->interruptible;
437 if (interruptible == THREAD_UNINT)
438 cur_thread->interruptible = FALSE;
439
440 /*
441 * INVARIANTS (through entire routine):
442 *
443 * 1) At all times, we must either have the object
444 * lock or a busy page in some object to prevent
445 * some other thread from trying to bring in
446 * the same page.
447 *
448 * Note that we cannot hold any locks during the
449 * pager access or when waiting for memory, so
450 * we use a busy page then.
451 *
452 * Note also that we aren't as concerned about more than
453 * one thread attempting to memory_object_data_unlock
454 * the same page at once, so we don't hold the page
455 * as busy then, but do record the highest unlock
456 * value so far. [Unlock requests may also be delivered
457 * out of order.]
458 *
459 * 2) To prevent another thread from racing us down the
460 * shadow chain and entering a new page in the top
461 * object before we do, we must keep a busy page in
462 * the top object while following the shadow chain.
463 *
464 * 3) We must increment paging_in_progress on any object
465 * for which we have a busy page
466 *
467 * 4) We leave busy pages on the pageout queues.
468 * If the pageout daemon comes across a busy page,
469 * it will remove the page from the pageout queues.
470 */
471
472 /*
473 * Search for the page at object/offset.
474 */
475
476 object = first_object;
477 offset = first_offset;
478 first_m = VM_PAGE_NULL;
479 access_required = fault_type;
480
481 XPR(XPR_VM_FAULT,
482 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
483 (integer_t)object, offset, fault_type, *protection, 0);
484
485 /*
486 * See whether this page is resident
487 */
488
489 while (TRUE) {
490 #if TRACEFAULTPAGE
491 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
492 #endif
493 if (!object->alive) {
494 vm_fault_cleanup(object, first_m);
495 cur_thread->interruptible = interruptible_state;
496 return(VM_FAULT_MEMORY_ERROR);
497 }
498 m = vm_page_lookup(object, offset);
499 #if TRACEFAULTPAGE
500 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
501 #endif
502 if (m != VM_PAGE_NULL) {
503 /*
504 * If the page was pre-paged as part of a
505 * cluster, record the fact.
506 */
507 if (m->clustered) {
508 vm_pagein_cluster_used++;
509 m->clustered = FALSE;
510 }
511
512 /*
513 * If the page is being brought in,
514 * wait for it and then retry.
515 *
516 * A possible optimization: if the page
517 * is known to be resident, we can ignore
518 * pages that are absent (regardless of
519 * whether they're busy).
520 */
521
522 if (m->busy) {
523 #if TRACEFAULTPAGE
524 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
525 #endif
526 PAGE_ASSERT_WAIT(m, interruptible);
527 vm_object_unlock(object);
528 XPR(XPR_VM_FAULT,
529 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
530 (integer_t)object, offset,
531 (integer_t)m, 0, 0);
532 counter(c_vm_fault_page_block_busy_kernel++);
533 wait_result = thread_block((void (*)(void))0);
534
535 vm_object_lock(object);
536 if (wait_result != THREAD_AWAKENED) {
537 vm_fault_cleanup(object, first_m);
538 cur_thread->interruptible = interruptible_state;
539 if (wait_result == THREAD_RESTART)
540 {
541 return(VM_FAULT_RETRY);
542 }
543 else
544 {
545 return(VM_FAULT_INTERRUPTED);
546 }
547 }
548 continue;
549 }
550
551 /*
552 * If the page is in error, give up now.
553 */
554
555 if (m->error) {
556 #if TRACEFAULTPAGE
557 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code); /* (TEST/DEBUG) */
558 #endif
559 if (error_code)
560 *error_code = m->page_error;
561 VM_PAGE_FREE(m);
562 vm_fault_cleanup(object, first_m);
563 cur_thread->interruptible = interruptible_state;
564 return(VM_FAULT_MEMORY_ERROR);
565 }
566
567 /*
568 * If the pager wants us to restart
569 * at the top of the chain,
570 * typically because it has moved the
571 * page to another pager, then do so.
572 */
573
574 if (m->restart) {
575 #if TRACEFAULTPAGE
576 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
577 #endif
578 VM_PAGE_FREE(m);
579 vm_fault_cleanup(object, first_m);
580 cur_thread->interruptible = interruptible_state;
581 return(VM_FAULT_RETRY);
582 }
583
584 /*
585 * If the page isn't busy, but is absent,
586 * then it was deemed "unavailable".
587 */
588
589 if (m->absent) {
590 /*
591 * Remove the non-existent page (unless it's
592 * in the top object) and move on down to the
593 * next object (if there is one).
594 */
595 #if TRACEFAULTPAGE
596 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow); /* (TEST/DEBUG) */
597 #endif
598
599 next_object = object->shadow;
600 if (next_object == VM_OBJECT_NULL) {
601 vm_page_t real_m;
602
603 assert(!must_be_resident);
604
605 if (object->shadow_severed) {
606 vm_fault_cleanup(
607 object, first_m);
608 cur_thread->interruptible = interruptible_state;
609 return VM_FAULT_MEMORY_ERROR;
610 }
611
612 /*
613 * Absent page at bottom of shadow
614 * chain; zero fill the page we left
615 * busy in the first object, and flush
616 * the absent page. But first we
617 * need to allocate a real page.
618 */
619 if (VM_PAGE_THROTTLED() ||
620 (real_m = vm_page_grab()) == VM_PAGE_NULL) {
621 vm_fault_cleanup(object, first_m);
622 cur_thread->interruptible = interruptible_state;
623 return(VM_FAULT_MEMORY_SHORTAGE);
624 }
625
626 XPR(XPR_VM_FAULT,
627 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
628 (integer_t)object, offset,
629 (integer_t)m,
630 (integer_t)first_object, 0);
631 if (object != first_object) {
632 VM_PAGE_FREE(m);
633 vm_object_paging_end(object);
634 vm_object_unlock(object);
635 object = first_object;
636 offset = first_offset;
637 m = first_m;
638 first_m = VM_PAGE_NULL;
639 vm_object_lock(object);
640 }
641
642 VM_PAGE_FREE(m);
643 assert(real_m->busy);
644 vm_page_insert(real_m, object, offset);
645 m = real_m;
646
647 /*
648 * Drop the lock while zero filling
649 * page. Then break because this
650 * is the page we wanted. Checking
651 * the page lock is a waste of time;
652 * this page was either absent or
653 * newly allocated -- in both cases
654 * it can't be page locked by a pager.
655 */
656 m->no_isync = FALSE;
657
658 if (!no_zero_fill) {
659 vm_object_unlock(object);
660 vm_page_zero_fill(m);
661 if (type_of_fault)
662 *type_of_fault = DBG_ZERO_FILL_FAULT;
663 VM_STAT(zero_fill_count++);
664
665 if (bumped_pagein == TRUE) {
666 VM_STAT(pageins--);
667 current_task()->pageins--;
668 }
669 vm_object_lock(object);
670 }
671 pmap_clear_modify(m->phys_addr);
672 vm_page_lock_queues();
673 VM_PAGE_QUEUES_REMOVE(m);
674 m->page_ticket = vm_page_ticket;
675 vm_page_ticket_roll++;
676 if(vm_page_ticket_roll ==
677 VM_PAGE_TICKETS_IN_ROLL) {
678 vm_page_ticket_roll = 0;
679 if(vm_page_ticket ==
680 VM_PAGE_TICKET_ROLL_IDS)
681 vm_page_ticket= 0;
682 else
683 vm_page_ticket++;
684 }
685 queue_enter(&vm_page_queue_inactive,
686 m, vm_page_t, pageq);
687 m->inactive = TRUE;
688 vm_page_inactive_count++;
689 vm_page_unlock_queues();
690 break;
691 } else {
692 if (must_be_resident) {
693 vm_object_paging_end(object);
694 } else if (object != first_object) {
695 vm_object_paging_end(object);
696 VM_PAGE_FREE(m);
697 } else {
698 first_m = m;
699 m->absent = FALSE;
700 m->unusual = FALSE;
701 vm_object_absent_release(object);
702 m->busy = TRUE;
703
704 vm_page_lock_queues();
705 VM_PAGE_QUEUES_REMOVE(m);
706 vm_page_unlock_queues();
707 }
708 XPR(XPR_VM_FAULT,
709 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
710 (integer_t)object, offset,
711 (integer_t)next_object,
712 offset+object->shadow_offset,0);
713 offset += object->shadow_offset;
714 hi_offset += object->shadow_offset;
715 lo_offset += object->shadow_offset;
716 access_required = VM_PROT_READ;
717 vm_object_lock(next_object);
718 vm_object_unlock(object);
719 object = next_object;
720 vm_object_paging_begin(object);
721 continue;
722 }
723 }
724
725 if ((m->cleaning)
726 && ((object != first_object) ||
727 (object->copy != VM_OBJECT_NULL))
728 && (fault_type & VM_PROT_WRITE)) {
729 /*
730 * This is a copy-on-write fault that will
731 * cause us to revoke access to this page, but
732 * this page is in the process of being cleaned
733 * in a clustered pageout. We must wait until
734 * the cleaning operation completes before
735 * revoking access to the original page,
736 * otherwise we might attempt to remove a
737 * wired mapping.
738 */
739 #if TRACEFAULTPAGE
740 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset); /* (TEST/DEBUG) */
741 #endif
742 XPR(XPR_VM_FAULT,
743 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
744 (integer_t)object, offset,
745 (integer_t)m, 0, 0);
746 /* take an extra ref so that object won't die */
747 assert(object->ref_count > 0);
748 object->ref_count++;
749 vm_object_res_reference(object);
750 vm_fault_cleanup(object, first_m);
751 counter(c_vm_fault_page_block_backoff_kernel++);
752 vm_object_lock(object);
753 assert(object->ref_count > 0);
754 m = vm_page_lookup(object, offset);
755 if (m != VM_PAGE_NULL && m->cleaning) {
756 PAGE_ASSERT_WAIT(m, interruptible);
757 vm_object_unlock(object);
758 wait_result = thread_block((void (*)(void)) 0);
759 vm_object_deallocate(object);
760 goto backoff;
761 } else {
762 vm_object_unlock(object);
763 vm_object_deallocate(object);
764 cur_thread->interruptible = interruptible_state;
765 return VM_FAULT_RETRY;
766 }
767 }
768
769 /*
770 * If the desired access to this page has
771 * been locked out, request that it be unlocked.
772 */
773
774 if (access_required & m->page_lock) {
775 if ((access_required & m->unlock_request) != access_required) {
776 vm_prot_t new_unlock_request;
777 kern_return_t rc;
778
779 #if TRACEFAULTPAGE
780 dbgTrace(0xBEEF000A, (unsigned int) m, (unsigned int) object->pager_ready); /* (TEST/DEBUG) */
781 #endif
782 if (!object->pager_ready) {
783 XPR(XPR_VM_FAULT,
784 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
785 access_required,
786 (integer_t)object, offset,
787 (integer_t)m, 0);
788 /* take an extra ref */
789 assert(object->ref_count > 0);
790 object->ref_count++;
791 vm_object_res_reference(object);
792 vm_fault_cleanup(object,
793 first_m);
794 counter(c_vm_fault_page_block_backoff_kernel++);
795 vm_object_lock(object);
796 assert(object->ref_count > 0);
797 if (!object->pager_ready) {
798 vm_object_assert_wait(
799 object,
800 VM_OBJECT_EVENT_PAGER_READY,
801 interruptible);
802 vm_object_unlock(object);
803 wait_result = thread_block((void (*)(void))0);
804 vm_object_deallocate(object);
805 goto backoff;
806 } else {
807 vm_object_unlock(object);
808 vm_object_deallocate(object);
809 cur_thread->interruptible = interruptible_state;
810 return VM_FAULT_RETRY;
811 }
812 }
813
814 new_unlock_request = m->unlock_request =
815 (access_required | m->unlock_request);
816 vm_object_unlock(object);
817 XPR(XPR_VM_FAULT,
818 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
819 (integer_t)object, offset,
820 (integer_t)m, new_unlock_request, 0);
821 if ((rc = memory_object_data_unlock(
822 object->pager,
823 offset + object->paging_offset,
824 PAGE_SIZE,
825 new_unlock_request))
826 != KERN_SUCCESS) {
827 if (vm_fault_debug)
828 printf("vm_fault: memory_object_data_unlock failed\n");
829 vm_object_lock(object);
830 vm_fault_cleanup(object, first_m);
831 cur_thread->interruptible = interruptible_state;
832 return((rc == MACH_SEND_INTERRUPTED) ?
833 VM_FAULT_INTERRUPTED :
834 VM_FAULT_MEMORY_ERROR);
835 }
836 vm_object_lock(object);
837 continue;
838 }
839
840 XPR(XPR_VM_FAULT,
841 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
842 access_required, (integer_t)object,
843 offset, (integer_t)m, 0);
844 /* take an extra ref so object won't die */
845 assert(object->ref_count > 0);
846 object->ref_count++;
847 vm_object_res_reference(object);
848 vm_fault_cleanup(object, first_m);
849 counter(c_vm_fault_page_block_backoff_kernel++);
850 vm_object_lock(object);
851 assert(object->ref_count > 0);
852 m = vm_page_lookup(object, offset);
853 if (m != VM_PAGE_NULL &&
854 (access_required & m->page_lock) &&
855 !((access_required & m->unlock_request) != access_required)) {
856 PAGE_ASSERT_WAIT(m, interruptible);
857 vm_object_unlock(object);
858 wait_result = thread_block((void (*)(void)) 0);
859 vm_object_deallocate(object);
860 goto backoff;
861 } else {
862 vm_object_unlock(object);
863 vm_object_deallocate(object);
864 cur_thread->interruptible = interruptible_state;
865 return VM_FAULT_RETRY;
866 }
867 }
868 /*
869 * We mark the page busy and leave it on
870 * the pageout queues. If the pageout
871 * deamon comes across it, then it will
872 * remove the page.
873 */
874
875 #if TRACEFAULTPAGE
876 dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
877 #endif
878
879 #if !VM_FAULT_STATIC_CONFIG
880 if (!software_reference_bits) {
881 vm_page_lock_queues();
882 if (m->inactive)
883 vm_stat.reactivations++;
884
885 VM_PAGE_QUEUES_REMOVE(m);
886 vm_page_unlock_queues();
887 }
888 #endif
889 XPR(XPR_VM_FAULT,
890 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
891 (integer_t)object, offset, (integer_t)m, 0, 0);
892 assert(!m->busy);
893 m->busy = TRUE;
894 assert(!m->absent);
895 break;
896 }
897
898 look_for_page =
899 (object->pager_created) &&
900 LOOK_FOR(object, offset) &&
901 (!data_supply);
902
903 #if TRACEFAULTPAGE
904 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object); /* (TEST/DEBUG) */
905 #endif
906 if ((look_for_page || (object == first_object))
907 && !must_be_resident
908 && !(object->phys_contiguous)) {
909 /*
910 * Allocate a new page for this object/offset
911 * pair.
912 */
913
914 m = vm_page_grab_fictitious();
915 #if TRACEFAULTPAGE
916 dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
917 #endif
918 if (m == VM_PAGE_NULL) {
919 vm_fault_cleanup(object, first_m);
920 cur_thread->interruptible = interruptible_state;
921 return(VM_FAULT_FICTITIOUS_SHORTAGE);
922 }
923 vm_page_insert(m, object, offset);
924 }
925
926 if ((look_for_page && !must_be_resident)) {
927 kern_return_t rc;
928
929 /*
930 * If the memory manager is not ready, we
931 * cannot make requests.
932 */
933 if (!object->pager_ready) {
934 #if TRACEFAULTPAGE
935 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
936 #endif
937 if(m != VM_PAGE_NULL)
938 VM_PAGE_FREE(m);
939 XPR(XPR_VM_FAULT,
940 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
941 (integer_t)object, offset, 0, 0, 0);
942 /* take an extra ref so object won't die */
943 assert(object->ref_count > 0);
944 object->ref_count++;
945 vm_object_res_reference(object);
946 vm_fault_cleanup(object, first_m);
947 counter(c_vm_fault_page_block_backoff_kernel++);
948 vm_object_lock(object);
949 assert(object->ref_count > 0);
950 if (!object->pager_ready) {
951 vm_object_assert_wait(object,
952 VM_OBJECT_EVENT_PAGER_READY,
953 interruptible);
954 vm_object_unlock(object);
955 wait_result = thread_block((void (*)(void))0);
956 vm_object_deallocate(object);
957 goto backoff;
958 } else {
959 vm_object_unlock(object);
960 vm_object_deallocate(object);
961 cur_thread->interruptible = interruptible_state;
962 return VM_FAULT_RETRY;
963 }
964 }
965
966 if(object->phys_contiguous) {
967 if(m != VM_PAGE_NULL) {
968 VM_PAGE_FREE(m);
969 m = VM_PAGE_NULL;
970 }
971 goto no_clustering;
972 }
973 if (object->internal) {
974 /*
975 * Requests to the default pager
976 * must reserve a real page in advance,
977 * because the pager's data-provided
978 * won't block for pages. IMPORTANT:
979 * this acts as a throttling mechanism
980 * for data_requests to the default
981 * pager.
982 */
983
984 #if TRACEFAULTPAGE
985 dbgTrace(0xBEEF000F, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
986 #endif
987 if (m->fictitious && !vm_page_convert(m)) {
988 VM_PAGE_FREE(m);
989 vm_fault_cleanup(object, first_m);
990 cur_thread->interruptible = interruptible_state;
991 return(VM_FAULT_MEMORY_SHORTAGE);
992 }
993 } else if (object->absent_count >
994 vm_object_absent_max) {
995 /*
996 * If there are too many outstanding page
997 * requests pending on this object, we
998 * wait for them to be resolved now.
999 */
1000
1001 #if TRACEFAULTPAGE
1002 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1003 #endif
1004 if(m != VM_PAGE_NULL)
1005 VM_PAGE_FREE(m);
1006 /* take an extra ref so object won't die */
1007 assert(object->ref_count > 0);
1008 object->ref_count++;
1009 vm_object_res_reference(object);
1010 vm_fault_cleanup(object, first_m);
1011 counter(c_vm_fault_page_block_backoff_kernel++);
1012 vm_object_lock(object);
1013 assert(object->ref_count > 0);
1014 if (object->absent_count > vm_object_absent_max) {
1015 vm_object_absent_assert_wait(object,
1016 interruptible);
1017 vm_object_unlock(object);
1018 wait_result = thread_block((void (*)(void))0);
1019 vm_object_deallocate(object);
1020 goto backoff;
1021 } else {
1022 vm_object_unlock(object);
1023 vm_object_deallocate(object);
1024 cur_thread->interruptible = interruptible_state;
1025 return VM_FAULT_RETRY;
1026 }
1027 }
1028
1029 /*
1030 * Indicate that the page is waiting for data
1031 * from the memory manager.
1032 */
1033
1034 if(m != VM_PAGE_NULL) {
1035
1036 m->list_req_pending = TRUE;
1037 m->absent = TRUE;
1038 m->unusual = TRUE;
1039 object->absent_count++;
1040
1041 }
1042
1043 cluster_start = offset;
1044 length = PAGE_SIZE;
1045 cluster_size = object->cluster_size;
1046
1047 /*
1048 * Skip clustered pagein if it is globally disabled
1049 * or random page reference behavior is expected
1050 * for the address range containing the faulting
1051 * address or the object paging block size is
1052 * equal to the page size.
1053 */
1054 if (!vm_allow_clustered_pagein ||
1055 behavior == VM_BEHAVIOR_RANDOM ||
1056 m == VM_PAGE_NULL ||
1057 cluster_size == PAGE_SIZE) {
1058 cluster_start = trunc_page_64(cluster_start);
1059 goto no_clustering;
1060 }
1061
1062 assert(offset >= lo_offset);
1063 assert(offset < hi_offset);
1064 assert(ALIGNED(object->paging_offset));
1065 assert(cluster_size >= PAGE_SIZE);
1066
1067 #if TRACEFAULTPAGE
1068 dbgTrace(0xBEEF0011, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1069 #endif
1070 /*
1071 * Decide whether to scan ahead or behind for
1072 * additional pages contiguous to the faulted
1073 * page in the same paging block. The decision
1074 * is based on system wide globals and the
1075 * expected page reference behavior of the
1076 * address range contained the faulting address.
1077 * First calculate some constants.
1078 */
1079 paging_offset = offset + object->paging_offset;
1080 cluster_offset = paging_offset & (cluster_size - 1);
1081 align_offset = paging_offset&(PAGE_SIZE_64-1);
1082 if (align_offset != 0) {
1083 cluster_offset = trunc_page_64(cluster_offset);
1084 }
1085
1086 #define SPANS_CLUSTER(x) ((((x) - align_offset) & (vm_object_offset_t)(cluster_size - 1)) == 0)
1087
1088 /*
1089 * Backward scan only if reverse sequential
1090 * behavior has been specified
1091 */
1092 CLUSTER_STAT(pages_at_lower_offsets = 0;)
1093 if (((vm_default_behind != 0 &&
1094 behavior == VM_BEHAVIOR_DEFAULT) ||
1095 behavior == VM_BEHAVIOR_RSEQNTL) && offset) {
1096 vm_object_offset_t cluster_bot;
1097
1098 /*
1099 * Calculate lower search boundary.
1100 * Exclude pages that span a cluster boundary.
1101 * Clip to start of map entry.
1102 * For default page reference behavior, scan
1103 * default pages behind.
1104 */
1105 cluster_bot = (offset > cluster_offset) ?
1106 offset - cluster_offset : offset;
1107 if (align_offset != 0) {
1108 if ((cluster_bot < offset) &&
1109 SPANS_CLUSTER(cluster_bot)) {
1110 cluster_bot += PAGE_SIZE_64;
1111 }
1112 }
1113 if (behavior == VM_BEHAVIOR_DEFAULT) {
1114 vm_object_offset_t
1115 bot = (vm_object_offset_t)
1116 (vm_default_behind * PAGE_SIZE);
1117
1118 if (cluster_bot < (offset - bot))
1119 cluster_bot = offset - bot;
1120 }
1121 if (lo_offset > cluster_bot)
1122 cluster_bot = lo_offset;
1123
1124 for ( cluster_start = offset - PAGE_SIZE_64;
1125 (cluster_start >= cluster_bot) &&
1126 (cluster_start !=
1127 (align_offset - PAGE_SIZE_64));
1128 cluster_start -= PAGE_SIZE_64) {
1129 assert(cluster_size > PAGE_SIZE_64);
1130 retry_cluster_backw:
1131 if (!LOOK_FOR(object, cluster_start) ||
1132 vm_page_lookup(object, cluster_start)
1133 != VM_PAGE_NULL) {
1134 break;
1135 }
1136 if (object->internal) {
1137 /*
1138 * need to acquire a real page in
1139 * advance because this acts as
1140 * a throttling mechanism for
1141 * data_requests to the default
1142 * pager. If this fails, give up
1143 * trying to find any more pages
1144 * in the cluster and send off the
1145 * request for what we already have.
1146 */
1147 if ((m = vm_page_grab())
1148 == VM_PAGE_NULL) {
1149 cluster_start += PAGE_SIZE_64;
1150 cluster_end = offset + PAGE_SIZE_64;
1151 goto give_up;
1152 }
1153 } else if ((m = vm_page_grab_fictitious())
1154 == VM_PAGE_NULL) {
1155 vm_object_unlock(object);
1156 vm_page_more_fictitious();
1157 vm_object_lock(object);
1158 goto retry_cluster_backw;
1159 }
1160 m->absent = TRUE;
1161 m->unusual = TRUE;
1162 m->clustered = TRUE;
1163 m->list_req_pending = TRUE;
1164
1165 vm_page_insert(m, object, cluster_start);
1166 CLUSTER_STAT(pages_at_lower_offsets++;)
1167 object->absent_count++;
1168 }
1169 cluster_start += PAGE_SIZE_64;
1170 assert(cluster_start >= cluster_bot);
1171 }
1172 assert(cluster_start <= offset);
1173
1174 /*
1175 * Forward scan if default or sequential behavior
1176 * specified
1177 */
1178 CLUSTER_STAT(pages_at_higher_offsets = 0;)
1179 if ((behavior == VM_BEHAVIOR_DEFAULT &&
1180 vm_default_ahead != 0) ||
1181 behavior == VM_BEHAVIOR_SEQUENTIAL) {
1182 vm_object_offset_t cluster_top;
1183
1184 /*
1185 * Calculate upper search boundary.
1186 * Exclude pages that span a cluster boundary.
1187 * Clip to end of map entry.
1188 * For default page reference behavior, scan
1189 * default pages ahead.
1190 */
1191 cluster_top = (offset + cluster_size) -
1192 cluster_offset;
1193 if (align_offset != 0) {
1194 if ((cluster_top > (offset + PAGE_SIZE_64)) &&
1195 SPANS_CLUSTER(cluster_top)) {
1196 cluster_top -= PAGE_SIZE_64;
1197 }
1198 }
1199 if (behavior == VM_BEHAVIOR_DEFAULT) {
1200 vm_object_offset_t top = (vm_object_offset_t)
1201 ((vm_default_ahead*PAGE_SIZE)+PAGE_SIZE);
1202
1203 if (cluster_top > (offset + top))
1204 cluster_top = offset + top;
1205 }
1206 if (cluster_top > hi_offset)
1207 cluster_top = hi_offset;
1208
1209 for (cluster_end = offset + PAGE_SIZE_64;
1210 cluster_end < cluster_top;
1211 cluster_end += PAGE_SIZE_64) {
1212 assert(cluster_size > PAGE_SIZE);
1213 retry_cluster_forw:
1214 if (!LOOK_FOR(object, cluster_end) ||
1215 vm_page_lookup(object, cluster_end)
1216 != VM_PAGE_NULL) {
1217 break;
1218 }
1219 if (object->internal) {
1220 /*
1221 * need to acquire a real page in
1222 * advance because this acts as
1223 * a throttling mechanism for
1224 * data_requests to the default
1225 * pager. If this fails, give up
1226 * trying to find any more pages
1227 * in the cluster and send off the
1228 * request for what we already have.
1229 */
1230 if ((m = vm_page_grab())
1231 == VM_PAGE_NULL) {
1232 break;
1233 }
1234 } else if ((m = vm_page_grab_fictitious())
1235 == VM_PAGE_NULL) {
1236 vm_object_unlock(object);
1237 vm_page_more_fictitious();
1238 vm_object_lock(object);
1239 goto retry_cluster_forw;
1240 }
1241 m->absent = TRUE;
1242 m->unusual = TRUE;
1243 m->clustered = TRUE;
1244 m->list_req_pending = TRUE;
1245
1246 vm_page_insert(m, object, cluster_end);
1247 CLUSTER_STAT(pages_at_higher_offsets++;)
1248 object->absent_count++;
1249 }
1250 assert(cluster_end <= cluster_top);
1251 }
1252 else {
1253 cluster_end = offset + PAGE_SIZE_64;
1254 }
1255 give_up:
1256 assert(cluster_end >= offset + PAGE_SIZE_64);
1257 length = cluster_end - cluster_start;
1258
1259 #if MACH_CLUSTER_STATS
1260 CLUSTER_STAT_HIGHER(pages_at_higher_offsets);
1261 CLUSTER_STAT_LOWER(pages_at_lower_offsets);
1262 CLUSTER_STAT_CLUSTER(length/PAGE_SIZE);
1263 #endif /* MACH_CLUSTER_STATS */
1264
1265 no_clustering:
1266 /*
1267 * lengthen the cluster by the pages in the working set
1268 */
1269 if((map != NULL) &&
1270 (current_task()->dynamic_working_set != 0)) {
1271 cluster_end = cluster_start + length;
1272 /* tws values for start and end are just a
1273 * suggestions. Therefore, as long as
1274 * build_cluster does not use pointers or
1275 * take action based on values that
1276 * could be affected by re-entrance we
1277 * do not need to take the map lock.
1278 */
1279 tws_build_cluster((tws_hash_t)
1280 current_task()->dynamic_working_set,
1281 object, &cluster_start,
1282 &cluster_end, 0x16000);
1283 length = cluster_end - cluster_start;
1284 }
1285 #if TRACEFAULTPAGE
1286 dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0); /* (TEST/DEBUG) */
1287 #endif
1288 /*
1289 * We have a busy page, so we can
1290 * release the object lock.
1291 */
1292 vm_object_unlock(object);
1293
1294 /*
1295 * Call the memory manager to retrieve the data.
1296 */
1297
1298 if (type_of_fault)
1299 *type_of_fault = DBG_PAGEIN_FAULT;
1300 VM_STAT(pageins++);
1301 current_task()->pageins++;
1302 bumped_pagein = TRUE;
1303
1304 /*
1305 * If this object uses a copy_call strategy,
1306 * and we are interested in a copy of this object
1307 * (having gotten here only by following a
1308 * shadow chain), then tell the memory manager
1309 * via a flag added to the desired_access
1310 * parameter, so that it can detect a race
1311 * between our walking down the shadow chain
1312 * and its pushing pages up into a copy of
1313 * the object that it manages.
1314 */
1315
1316 if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL &&
1317 object != first_object) {
1318 wants_copy_flag = VM_PROT_WANTS_COPY;
1319 } else {
1320 wants_copy_flag = VM_PROT_NONE;
1321 }
1322
1323 XPR(XPR_VM_FAULT,
1324 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1325 (integer_t)object, offset, (integer_t)m,
1326 access_required | wants_copy_flag, 0);
1327
1328 rc = memory_object_data_request(object->pager,
1329 cluster_start + object->paging_offset,
1330 length,
1331 access_required | wants_copy_flag);
1332
1333
1334 #if TRACEFAULTPAGE
1335 dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
1336 #endif
1337 if (rc != KERN_SUCCESS) {
1338 if (rc != MACH_SEND_INTERRUPTED
1339 && vm_fault_debug)
1340 printf("%s(0x%x, 0x%x, 0x%x, 0x%x) failed, rc=%d\n",
1341 "memory_object_data_request",
1342 object->pager,
1343 cluster_start + object->paging_offset,
1344 length, access_required, rc);
1345 /*
1346 * Don't want to leave a busy page around,
1347 * but the data request may have blocked,
1348 * so check if it's still there and busy.
1349 */
1350 if(!object->phys_contiguous) {
1351 vm_object_lock(object);
1352 for (; length; length -= PAGE_SIZE,
1353 cluster_start += PAGE_SIZE_64) {
1354 vm_page_t p;
1355 if ((p = vm_page_lookup(object,
1356 cluster_start))
1357 && p->absent && p->busy
1358 && p != first_m) {
1359 VM_PAGE_FREE(p);
1360 }
1361 }
1362 }
1363 vm_fault_cleanup(object, first_m);
1364 cur_thread->interruptible = interruptible_state;
1365 return((rc == MACH_SEND_INTERRUPTED) ?
1366 VM_FAULT_INTERRUPTED :
1367 VM_FAULT_MEMORY_ERROR);
1368 } else {
1369 #ifdef notdefcdy
1370 tws_hash_line_t line;
1371 task_t task;
1372
1373 task = current_task();
1374
1375 if((map != NULL) &&
1376 (task->dynamic_working_set != 0)) {
1377 if(tws_lookup
1378 ((tws_hash_t)
1379 task->dynamic_working_set,
1380 offset, object,
1381 &line) == KERN_SUCCESS) {
1382 tws_line_signal((tws_hash_t)
1383 task->dynamic_working_set,
1384 map, line, vaddr);
1385 }
1386 }
1387 #endif
1388 }
1389
1390 /*
1391 * Retry with same object/offset, since new data may
1392 * be in a different page (i.e., m is meaningless at
1393 * this point).
1394 */
1395 vm_object_lock(object);
1396 if ((interruptible != THREAD_UNINT) &&
1397 (current_thread()->state & TH_ABORT)) {
1398 vm_fault_cleanup(object, first_m);
1399 cur_thread->interruptible = interruptible_state;
1400 return(VM_FAULT_INTERRUPTED);
1401 }
1402 if(m == VM_PAGE_NULL)
1403 break;
1404 continue;
1405 }
1406
1407 /*
1408 * The only case in which we get here is if
1409 * object has no pager (or unwiring). If the pager doesn't
1410 * have the page this is handled in the m->absent case above
1411 * (and if you change things here you should look above).
1412 */
1413 #if TRACEFAULTPAGE
1414 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1415 #endif
1416 if (object == first_object)
1417 first_m = m;
1418 else
1419 assert(m == VM_PAGE_NULL);
1420
1421 XPR(XPR_VM_FAULT,
1422 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1423 (integer_t)object, offset, (integer_t)m,
1424 (integer_t)object->shadow, 0);
1425 /*
1426 * Move on to the next object. Lock the next
1427 * object before unlocking the current one.
1428 */
1429 next_object = object->shadow;
1430 if (next_object == VM_OBJECT_NULL) {
1431 assert(!must_be_resident);
1432 /*
1433 * If there's no object left, fill the page
1434 * in the top object with zeros. But first we
1435 * need to allocate a real page.
1436 */
1437
1438 if (object != first_object) {
1439 vm_object_paging_end(object);
1440 vm_object_unlock(object);
1441
1442 object = first_object;
1443 offset = first_offset;
1444 vm_object_lock(object);
1445 }
1446
1447 m = first_m;
1448 assert(m->object == object);
1449 first_m = VM_PAGE_NULL;
1450
1451 if (object->shadow_severed) {
1452 VM_PAGE_FREE(m);
1453 vm_fault_cleanup(object, VM_PAGE_NULL);
1454 cur_thread->interruptible = interruptible_state;
1455 return VM_FAULT_MEMORY_ERROR;
1456 }
1457
1458 if (VM_PAGE_THROTTLED() ||
1459 (m->fictitious && !vm_page_convert(m))) {
1460 VM_PAGE_FREE(m);
1461 vm_fault_cleanup(object, VM_PAGE_NULL);
1462 cur_thread->interruptible = interruptible_state;
1463 return(VM_FAULT_MEMORY_SHORTAGE);
1464 }
1465 m->no_isync = FALSE;
1466
1467 if (!no_zero_fill) {
1468 vm_object_unlock(object);
1469 vm_page_zero_fill(m);
1470 if (type_of_fault)
1471 *type_of_fault = DBG_ZERO_FILL_FAULT;
1472 VM_STAT(zero_fill_count++);
1473
1474 if (bumped_pagein == TRUE) {
1475 VM_STAT(pageins--);
1476 current_task()->pageins--;
1477 }
1478 vm_object_lock(object);
1479 }
1480 vm_page_lock_queues();
1481 VM_PAGE_QUEUES_REMOVE(m);
1482 m->page_ticket = vm_page_ticket;
1483 vm_page_ticket_roll++;
1484 if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
1485 vm_page_ticket_roll = 0;
1486 if(vm_page_ticket ==
1487 VM_PAGE_TICKET_ROLL_IDS)
1488 vm_page_ticket= 0;
1489 else
1490 vm_page_ticket++;
1491 }
1492 queue_enter(&vm_page_queue_inactive,
1493 m, vm_page_t, pageq);
1494 m->inactive = TRUE;
1495 vm_page_inactive_count++;
1496 vm_page_unlock_queues();
1497 pmap_clear_modify(m->phys_addr);
1498 break;
1499 }
1500 else {
1501 if ((object != first_object) || must_be_resident)
1502 vm_object_paging_end(object);
1503 offset += object->shadow_offset;
1504 hi_offset += object->shadow_offset;
1505 lo_offset += object->shadow_offset;
1506 access_required = VM_PROT_READ;
1507 vm_object_lock(next_object);
1508 vm_object_unlock(object);
1509 object = next_object;
1510 vm_object_paging_begin(object);
1511 }
1512 }
1513
1514 /*
1515 * PAGE HAS BEEN FOUND.
1516 *
1517 * This page (m) is:
1518 * busy, so that we can play with it;
1519 * not absent, so that nobody else will fill it;
1520 * possibly eligible for pageout;
1521 *
1522 * The top-level page (first_m) is:
1523 * VM_PAGE_NULL if the page was found in the
1524 * top-level object;
1525 * busy, not absent, and ineligible for pageout.
1526 *
1527 * The current object (object) is locked. A paging
1528 * reference is held for the current and top-level
1529 * objects.
1530 */
1531
1532 #if TRACEFAULTPAGE
1533 dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1534 #endif
1535 #if EXTRA_ASSERTIONS
1536 if(m != VM_PAGE_NULL) {
1537 assert(m->busy && !m->absent);
1538 assert((first_m == VM_PAGE_NULL) ||
1539 (first_m->busy && !first_m->absent &&
1540 !first_m->active && !first_m->inactive));
1541 }
1542 #endif /* EXTRA_ASSERTIONS */
1543
1544 XPR(XPR_VM_FAULT,
1545 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1546 (integer_t)object, offset, (integer_t)m,
1547 (integer_t)first_object, (integer_t)first_m);
1548 /*
1549 * If the page is being written, but isn't
1550 * already owned by the top-level object,
1551 * we have to copy it into a new page owned
1552 * by the top-level object.
1553 */
1554
1555 if ((object != first_object) && (m != VM_PAGE_NULL)) {
1556 /*
1557 * We only really need to copy if we
1558 * want to write it.
1559 */
1560
1561 #if TRACEFAULTPAGE
1562 dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1563 #endif
1564 if (fault_type & VM_PROT_WRITE) {
1565 vm_page_t copy_m;
1566
1567 assert(!must_be_resident);
1568
1569 /*
1570 * If we try to collapse first_object at this
1571 * point, we may deadlock when we try to get
1572 * the lock on an intermediate object (since we
1573 * have the bottom object locked). We can't
1574 * unlock the bottom object, because the page
1575 * we found may move (by collapse) if we do.
1576 *
1577 * Instead, we first copy the page. Then, when
1578 * we have no more use for the bottom object,
1579 * we unlock it and try to collapse.
1580 *
1581 * Note that we copy the page even if we didn't
1582 * need to... that's the breaks.
1583 */
1584
1585 /*
1586 * Allocate a page for the copy
1587 */
1588 copy_m = vm_page_grab();
1589 if (copy_m == VM_PAGE_NULL) {
1590 RELEASE_PAGE(m);
1591 vm_fault_cleanup(object, first_m);
1592 cur_thread->interruptible = interruptible_state;
1593 return(VM_FAULT_MEMORY_SHORTAGE);
1594 }
1595
1596
1597 XPR(XPR_VM_FAULT,
1598 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1599 (integer_t)object, offset,
1600 (integer_t)m, (integer_t)copy_m, 0);
1601 vm_page_copy(m, copy_m);
1602
1603 /*
1604 * If another map is truly sharing this
1605 * page with us, we have to flush all
1606 * uses of the original page, since we
1607 * can't distinguish those which want the
1608 * original from those which need the
1609 * new copy.
1610 *
1611 * XXXO If we know that only one map has
1612 * access to this page, then we could
1613 * avoid the pmap_page_protect() call.
1614 */
1615
1616 vm_page_lock_queues();
1617 assert(!m->cleaning);
1618 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
1619 vm_page_deactivate(m);
1620 copy_m->dirty = TRUE;
1621 /*
1622 * Setting reference here prevents this fault from
1623 * being counted as a (per-thread) reactivate as well
1624 * as a copy-on-write.
1625 */
1626 first_m->reference = TRUE;
1627 vm_page_unlock_queues();
1628
1629 /*
1630 * We no longer need the old page or object.
1631 */
1632
1633 PAGE_WAKEUP_DONE(m);
1634 vm_object_paging_end(object);
1635 vm_object_unlock(object);
1636
1637 if (type_of_fault)
1638 *type_of_fault = DBG_COW_FAULT;
1639 VM_STAT(cow_faults++);
1640 current_task()->cow_faults++;
1641 object = first_object;
1642 offset = first_offset;
1643
1644 vm_object_lock(object);
1645 VM_PAGE_FREE(first_m);
1646 first_m = VM_PAGE_NULL;
1647 assert(copy_m->busy);
1648 vm_page_insert(copy_m, object, offset);
1649 m = copy_m;
1650
1651 /*
1652 * Now that we've gotten the copy out of the
1653 * way, let's try to collapse the top object.
1654 * But we have to play ugly games with
1655 * paging_in_progress to do that...
1656 */
1657
1658 vm_object_paging_end(object);
1659 vm_object_collapse(object);
1660 vm_object_paging_begin(object);
1661
1662 }
1663 else {
1664 *protection &= (~VM_PROT_WRITE);
1665 }
1666 }
1667
1668 /*
1669 * Now check whether the page needs to be pushed into the
1670 * copy object. The use of asymmetric copy on write for
1671 * shared temporary objects means that we may do two copies to
1672 * satisfy the fault; one above to get the page from a
1673 * shadowed object, and one here to push it into the copy.
1674 */
1675
1676 while (first_object->copy_strategy == MEMORY_OBJECT_COPY_DELAY &&
1677 (copy_object = first_object->copy) != VM_OBJECT_NULL &&
1678 (m!= VM_PAGE_NULL)) {
1679 vm_object_offset_t copy_offset;
1680 vm_page_t copy_m;
1681
1682 #if TRACEFAULTPAGE
1683 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1684 #endif
1685 /*
1686 * If the page is being written, but hasn't been
1687 * copied to the copy-object, we have to copy it there.
1688 */
1689
1690 if ((fault_type & VM_PROT_WRITE) == 0) {
1691 *protection &= ~VM_PROT_WRITE;
1692 break;
1693 }
1694
1695 /*
1696 * If the page was guaranteed to be resident,
1697 * we must have already performed the copy.
1698 */
1699
1700 if (must_be_resident)
1701 break;
1702
1703 /*
1704 * Try to get the lock on the copy_object.
1705 */
1706 if (!vm_object_lock_try(copy_object)) {
1707 vm_object_unlock(object);
1708
1709 mutex_pause(); /* wait a bit */
1710
1711 vm_object_lock(object);
1712 continue;
1713 }
1714
1715 /*
1716 * Make another reference to the copy-object,
1717 * to keep it from disappearing during the
1718 * copy.
1719 */
1720 assert(copy_object->ref_count > 0);
1721 copy_object->ref_count++;
1722 VM_OBJ_RES_INCR(copy_object);
1723
1724 /*
1725 * Does the page exist in the copy?
1726 */
1727 copy_offset = first_offset - copy_object->shadow_offset;
1728 if (copy_object->size <= copy_offset)
1729 /*
1730 * Copy object doesn't cover this page -- do nothing.
1731 */
1732 ;
1733 else if ((copy_m =
1734 vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
1735 /* Page currently exists in the copy object */
1736 if (copy_m->busy) {
1737 /*
1738 * If the page is being brought
1739 * in, wait for it and then retry.
1740 */
1741 RELEASE_PAGE(m);
1742 /* take an extra ref so object won't die */
1743 assert(copy_object->ref_count > 0);
1744 copy_object->ref_count++;
1745 vm_object_res_reference(copy_object);
1746 vm_object_unlock(copy_object);
1747 vm_fault_cleanup(object, first_m);
1748 counter(c_vm_fault_page_block_backoff_kernel++);
1749 vm_object_lock(copy_object);
1750 assert(copy_object->ref_count > 0);
1751 VM_OBJ_RES_DECR(copy_object);
1752 copy_object->ref_count--;
1753 assert(copy_object->ref_count > 0);
1754 copy_m = vm_page_lookup(copy_object, copy_offset);
1755 if (copy_m != VM_PAGE_NULL && copy_m->busy) {
1756 PAGE_ASSERT_WAIT(copy_m, interruptible);
1757 vm_object_unlock(copy_object);
1758 wait_result = thread_block((void (*)(void))0);
1759 vm_object_deallocate(copy_object);
1760 goto backoff;
1761 } else {
1762 vm_object_unlock(copy_object);
1763 vm_object_deallocate(copy_object);
1764 cur_thread->interruptible = interruptible_state;
1765 return VM_FAULT_RETRY;
1766 }
1767 }
1768 }
1769 else if (!PAGED_OUT(copy_object, copy_offset)) {
1770 /*
1771 * If PAGED_OUT is TRUE, then the page used to exist
1772 * in the copy-object, and has already been paged out.
1773 * We don't need to repeat this. If PAGED_OUT is
1774 * FALSE, then either we don't know (!pager_created,
1775 * for example) or it hasn't been paged out.
1776 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1777 * We must copy the page to the copy object.
1778 */
1779
1780 /*
1781 * Allocate a page for the copy
1782 */
1783 copy_m = vm_page_alloc(copy_object, copy_offset);
1784 if (copy_m == VM_PAGE_NULL) {
1785 RELEASE_PAGE(m);
1786 VM_OBJ_RES_DECR(copy_object);
1787 copy_object->ref_count--;
1788 assert(copy_object->ref_count > 0);
1789 vm_object_unlock(copy_object);
1790 vm_fault_cleanup(object, first_m);
1791 cur_thread->interruptible = interruptible_state;
1792 return(VM_FAULT_MEMORY_SHORTAGE);
1793 }
1794
1795 /*
1796 * Must copy page into copy-object.
1797 */
1798
1799 vm_page_copy(m, copy_m);
1800
1801 /*
1802 * If the old page was in use by any users
1803 * of the copy-object, it must be removed
1804 * from all pmaps. (We can't know which
1805 * pmaps use it.)
1806 */
1807
1808 vm_page_lock_queues();
1809 assert(!m->cleaning);
1810 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
1811 copy_m->dirty = TRUE;
1812 vm_page_unlock_queues();
1813
1814 /*
1815 * If there's a pager, then immediately
1816 * page out this page, using the "initialize"
1817 * option. Else, we use the copy.
1818 */
1819
1820 if
1821 #if MACH_PAGEMAP
1822 ((!copy_object->pager_created) ||
1823 vm_external_state_get(
1824 copy_object->existence_map, copy_offset)
1825 == VM_EXTERNAL_STATE_ABSENT)
1826 #else
1827 (!copy_object->pager_created)
1828 #endif
1829 {
1830 vm_page_lock_queues();
1831 vm_page_activate(copy_m);
1832 vm_page_unlock_queues();
1833 PAGE_WAKEUP_DONE(copy_m);
1834 }
1835 else {
1836 assert(copy_m->busy == TRUE);
1837
1838 /*
1839 * The page is already ready for pageout:
1840 * not on pageout queues and busy.
1841 * Unlock everything except the
1842 * copy_object itself.
1843 */
1844
1845 vm_object_unlock(object);
1846
1847 /*
1848 * Write the page to the copy-object,
1849 * flushing it from the kernel.
1850 */
1851
1852 vm_pageout_initialize_page(copy_m);
1853
1854 /*
1855 * Since the pageout may have
1856 * temporarily dropped the
1857 * copy_object's lock, we
1858 * check whether we'll have
1859 * to deallocate the hard way.
1860 */
1861
1862 if ((copy_object->shadow != object) ||
1863 (copy_object->ref_count == 1)) {
1864 vm_object_unlock(copy_object);
1865 vm_object_deallocate(copy_object);
1866 vm_object_lock(object);
1867 continue;
1868 }
1869
1870 /*
1871 * Pick back up the old object's
1872 * lock. [It is safe to do so,
1873 * since it must be deeper in the
1874 * object tree.]
1875 */
1876
1877 vm_object_lock(object);
1878 }
1879
1880 /*
1881 * Because we're pushing a page upward
1882 * in the object tree, we must restart
1883 * any faults that are waiting here.
1884 * [Note that this is an expansion of
1885 * PAGE_WAKEUP that uses the THREAD_RESTART
1886 * wait result]. Can't turn off the page's
1887 * busy bit because we're not done with it.
1888 */
1889
1890 if (m->wanted) {
1891 m->wanted = FALSE;
1892 thread_wakeup_with_result((event_t) m,
1893 THREAD_RESTART);
1894 }
1895 }
1896
1897 /*
1898 * The reference count on copy_object must be
1899 * at least 2: one for our extra reference,
1900 * and at least one from the outside world
1901 * (we checked that when we last locked
1902 * copy_object).
1903 */
1904 copy_object->ref_count--;
1905 assert(copy_object->ref_count > 0);
1906 VM_OBJ_RES_DECR(copy_object);
1907 vm_object_unlock(copy_object);
1908
1909 break;
1910 }
1911
1912 *result_page = m;
1913 *top_page = first_m;
1914
1915 XPR(XPR_VM_FAULT,
1916 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1917 (integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
1918 /*
1919 * If the page can be written, assume that it will be.
1920 * [Earlier, we restrict the permission to allow write
1921 * access only if the fault so required, so we don't
1922 * mark read-only data as dirty.]
1923 */
1924
1925 #if !VM_FAULT_STATIC_CONFIG
1926 if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE) &&
1927 (m != VM_PAGE_NULL)) {
1928 m->dirty = TRUE;
1929 }
1930 #endif
1931 #if TRACEFAULTPAGE
1932 dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_page_deactivate_behind); /* (TEST/DEBUG) */
1933 #endif
1934 if (vm_page_deactivate_behind) {
1935 if (offset && /* don't underflow */
1936 (object->last_alloc == (offset - PAGE_SIZE_64))) {
1937 m = vm_page_lookup(object, object->last_alloc);
1938 if ((m != VM_PAGE_NULL) && !m->busy) {
1939 vm_page_lock_queues();
1940 vm_page_deactivate(m);
1941 vm_page_unlock_queues();
1942 }
1943 #if TRACEFAULTPAGE
1944 dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1945 #endif
1946 }
1947 object->last_alloc = offset;
1948 }
1949 #if TRACEFAULTPAGE
1950 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0); /* (TEST/DEBUG) */
1951 #endif
1952 cur_thread->interruptible = interruptible_state;
1953 if(*result_page == VM_PAGE_NULL) {
1954 vm_object_unlock(object);
1955 }
1956 return(VM_FAULT_SUCCESS);
1957
1958 #if 0
1959 block_and_backoff:
1960 vm_fault_cleanup(object, first_m);
1961
1962 counter(c_vm_fault_page_block_backoff_kernel++);
1963 thread_block((void (*)(void))0);
1964 #endif
1965
1966 backoff:
1967 cur_thread->interruptible = interruptible_state;
1968 if (wait_result == THREAD_INTERRUPTED)
1969 return VM_FAULT_INTERRUPTED;
1970 return VM_FAULT_RETRY;
1971
1972 #undef RELEASE_PAGE
1973 }
1974
1975 /*
1976 * Routine: vm_fault
1977 * Purpose:
1978 * Handle page faults, including pseudo-faults
1979 * used to change the wiring status of pages.
1980 * Returns:
1981 * Explicit continuations have been removed.
1982 * Implementation:
1983 * vm_fault and vm_fault_page save mucho state
1984 * in the moral equivalent of a closure. The state
1985 * structure is allocated when first entering vm_fault
1986 * and deallocated when leaving vm_fault.
1987 */
1988
1989 kern_return_t
1990 vm_fault(
1991 vm_map_t map,
1992 vm_offset_t vaddr,
1993 vm_prot_t fault_type,
1994 boolean_t change_wiring,
1995 int interruptible)
1996 {
1997 vm_map_version_t version; /* Map version for verificiation */
1998 boolean_t wired; /* Should mapping be wired down? */
1999 vm_object_t object; /* Top-level object */
2000 vm_object_offset_t offset; /* Top-level offset */
2001 vm_prot_t prot; /* Protection for mapping */
2002 vm_behavior_t behavior; /* Expected paging behavior */
2003 vm_object_offset_t lo_offset, hi_offset;
2004 vm_object_t old_copy_object; /* Saved copy object */
2005 vm_page_t result_page; /* Result of vm_fault_page */
2006 vm_page_t top_page; /* Placeholder page */
2007 kern_return_t kr;
2008
2009 register
2010 vm_page_t m; /* Fast access to result_page */
2011 kern_return_t error_code; /* page error reasons */
2012 register
2013 vm_object_t cur_object;
2014 register
2015 vm_object_offset_t cur_offset;
2016 vm_page_t cur_m;
2017 vm_object_t new_object;
2018 int type_of_fault;
2019 vm_map_t pmap_map = map;
2020 vm_map_t original_map = map;
2021 pmap_t pmap = NULL;
2022 boolean_t funnel_set = FALSE;
2023 funnel_t *curflock;
2024 thread_t cur_thread;
2025 boolean_t interruptible_state;
2026
2027
2028 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_START,
2029 vaddr,
2030 0,
2031 0,
2032 0,
2033 0);
2034
2035 cur_thread = current_thread();
2036
2037 interruptible_state = cur_thread->interruptible;
2038 if (interruptible == THREAD_UNINT)
2039 cur_thread->interruptible = FALSE;
2040
2041 /*
2042 * assume we will hit a page in the cache
2043 * otherwise, explicitly override with
2044 * the real fault type once we determine it
2045 */
2046 type_of_fault = DBG_CACHE_HIT_FAULT;
2047
2048 VM_STAT(faults++);
2049 current_task()->faults++;
2050
2051 /*
2052 * drop funnel if it is already held. Then restore while returning
2053 */
2054 if ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED) {
2055 funnel_set = TRUE;
2056 curflock = cur_thread->funnel_lock;
2057 thread_funnel_set( curflock , FALSE);
2058 }
2059
2060 RetryFault: ;
2061
2062 /*
2063 * Find the backing store object and offset into
2064 * it to begin the search.
2065 */
2066 map = original_map;
2067 vm_map_lock_read(map);
2068 kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version,
2069 &object, &offset,
2070 &prot, &wired,
2071 &behavior, &lo_offset, &hi_offset, &pmap_map);
2072
2073 pmap = pmap_map->pmap;
2074
2075 if (kr != KERN_SUCCESS) {
2076 vm_map_unlock_read(map);
2077 goto done;
2078 }
2079
2080 /*
2081 * If the page is wired, we must fault for the current protection
2082 * value, to avoid further faults.
2083 */
2084
2085 if (wired)
2086 fault_type = prot | VM_PROT_WRITE;
2087
2088 #if VM_FAULT_CLASSIFY
2089 /*
2090 * Temporary data gathering code
2091 */
2092 vm_fault_classify(object, offset, fault_type);
2093 #endif
2094 /*
2095 * Fast fault code. The basic idea is to do as much as
2096 * possible while holding the map lock and object locks.
2097 * Busy pages are not used until the object lock has to
2098 * be dropped to do something (copy, zero fill, pmap enter).
2099 * Similarly, paging references aren't acquired until that
2100 * point, and object references aren't used.
2101 *
2102 * If we can figure out what to do
2103 * (zero fill, copy on write, pmap enter) while holding
2104 * the locks, then it gets done. Otherwise, we give up,
2105 * and use the original fault path (which doesn't hold
2106 * the map lock, and relies on busy pages).
2107 * The give up cases include:
2108 * - Have to talk to pager.
2109 * - Page is busy, absent or in error.
2110 * - Pager has locked out desired access.
2111 * - Fault needs to be restarted.
2112 * - Have to push page into copy object.
2113 *
2114 * The code is an infinite loop that moves one level down
2115 * the shadow chain each time. cur_object and cur_offset
2116 * refer to the current object being examined. object and offset
2117 * are the original object from the map. The loop is at the
2118 * top level if and only if object and cur_object are the same.
2119 *
2120 * Invariants: Map lock is held throughout. Lock is held on
2121 * original object and cur_object (if different) when
2122 * continuing or exiting loop.
2123 *
2124 */
2125
2126
2127 /*
2128 * If this page is to be inserted in a copy delay object
2129 * for writing, and if the object has a copy, then the
2130 * copy delay strategy is implemented in the slow fault page.
2131 */
2132 if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY ||
2133 object->copy == VM_OBJECT_NULL ||
2134 (fault_type & VM_PROT_WRITE) == 0) {
2135 cur_object = object;
2136 cur_offset = offset;
2137
2138 while (TRUE) {
2139 m = vm_page_lookup(cur_object, cur_offset);
2140 if (m != VM_PAGE_NULL) {
2141 if (m->busy)
2142 break;
2143
2144 if (m->unusual && (m->error || m->restart || m->private
2145 || m->absent || (fault_type & m->page_lock))) {
2146
2147 /*
2148 * Unusual case. Give up.
2149 */
2150 break;
2151 }
2152
2153 /*
2154 * Two cases of map in faults:
2155 * - At top level w/o copy object.
2156 * - Read fault anywhere.
2157 * --> must disallow write.
2158 */
2159
2160 if (object == cur_object &&
2161 object->copy == VM_OBJECT_NULL)
2162 goto FastMapInFault;
2163
2164 if ((fault_type & VM_PROT_WRITE) == 0) {
2165
2166 prot &= ~VM_PROT_WRITE;
2167
2168 /*
2169 * Set up to map the page ...
2170 * mark the page busy, drop
2171 * locks and take a paging reference
2172 * on the object with the page.
2173 */
2174
2175 if (object != cur_object) {
2176 vm_object_unlock(object);
2177 object = cur_object;
2178 }
2179 FastMapInFault:
2180 m->busy = TRUE;
2181
2182 vm_object_paging_begin(object);
2183 vm_object_unlock(object);
2184
2185 FastPmapEnter:
2186 /*
2187 * Check a couple of global reasons to
2188 * be conservative about write access.
2189 * Then do the pmap_enter.
2190 */
2191 #if !VM_FAULT_STATIC_CONFIG
2192 if (vm_fault_dirty_handling
2193 #if MACH_KDB
2194 || db_watchpoint_list
2195 #endif
2196 && (fault_type & VM_PROT_WRITE) == 0)
2197 prot &= ~VM_PROT_WRITE;
2198 #else /* STATIC_CONFIG */
2199 #if MACH_KDB
2200 if (db_watchpoint_list
2201 && (fault_type & VM_PROT_WRITE) == 0)
2202 prot &= ~VM_PROT_WRITE;
2203 #endif /* MACH_KDB */
2204 #endif /* STATIC_CONFIG */
2205 PMAP_ENTER(pmap, vaddr, m, prot, wired);
2206
2207 if (m->no_isync) {
2208 pmap_attribute(pmap,
2209 vaddr,
2210 PAGE_SIZE,
2211 MATTR_CACHE,
2212 &mv_cache_sync);
2213
2214 }
2215 {
2216 tws_hash_line_t line;
2217 task_t task;
2218
2219 task = current_task();
2220 if((map != NULL) &&
2221 (task->dynamic_working_set != 0)) {
2222 if(tws_lookup
2223 ((tws_hash_t)
2224 task->dynamic_working_set,
2225 cur_offset, object,
2226 &line) != KERN_SUCCESS) {
2227 if(tws_insert((tws_hash_t)
2228 task->dynamic_working_set,
2229 m->offset, m->object,
2230 vaddr, pmap_map)
2231 == KERN_NO_SPACE) {
2232 tws_expand_working_set(
2233 task->dynamic_working_set,
2234 TWS_HASH_LINE_COUNT);
2235 }
2236 }
2237 }
2238 }
2239
2240 if (m->clustered) {
2241 vm_pagein_cluster_used++;
2242 m->clustered = FALSE;
2243 }
2244 /*
2245 * Grab the object lock to manipulate
2246 * the page queues. Change wiring
2247 * case is obvious. In soft ref bits
2248 * case activate page only if it fell
2249 * off paging queues, otherwise just
2250 * activate it if it's inactive.
2251 *
2252 * NOTE: original vm_fault code will
2253 * move active page to back of active
2254 * queue. This code doesn't.
2255 */
2256 vm_object_lock(object);
2257 vm_page_lock_queues();
2258 /*
2259 * we did the isync above... we're clearing
2260 * the flag here to avoid holding a lock
2261 * while calling pmap functions, however
2262 * we need hold the object lock before
2263 * we can modify the flag
2264 */
2265 m->no_isync = FALSE;
2266 m->reference = TRUE;
2267
2268 if (change_wiring) {
2269 if (wired)
2270 vm_page_wire(m);
2271 else
2272 vm_page_unwire(m);
2273 }
2274 #if VM_FAULT_STATIC_CONFIG
2275 else {
2276 if (!m->active && !m->inactive)
2277 vm_page_activate(m);
2278 }
2279 #else
2280 else if (software_reference_bits) {
2281 if (!m->active && !m->inactive)
2282 vm_page_activate(m);
2283 }
2284 else if (!m->active) {
2285 vm_page_activate(m);
2286 }
2287 #endif
2288 vm_page_unlock_queues();
2289
2290 /*
2291 * That's it, clean up and return.
2292 */
2293 PAGE_WAKEUP_DONE(m);
2294 vm_object_paging_end(object);
2295 vm_object_unlock(object);
2296 vm_map_unlock_read(map);
2297 if(pmap_map != map)
2298 vm_map_unlock(pmap_map);
2299
2300 if (funnel_set) {
2301 thread_funnel_set( curflock, TRUE);
2302 funnel_set = FALSE;
2303 }
2304 cur_thread->interruptible = interruptible_state;
2305
2306 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2307 vaddr,
2308 type_of_fault,
2309 KERN_SUCCESS,
2310 0,
2311 0);
2312 return KERN_SUCCESS;
2313 }
2314
2315 /*
2316 * Copy on write fault. If objects match, then
2317 * object->copy must not be NULL (else control
2318 * would be in previous code block), and we
2319 * have a potential push into the copy object
2320 * with which we won't cope here.
2321 */
2322
2323 if (cur_object == object)
2324 break;
2325
2326 /*
2327 * This is now a shadow based copy on write
2328 * fault -- it requires a copy up the shadow
2329 * chain.
2330 *
2331 * Allocate a page in the original top level
2332 * object. Give up if allocate fails. Also
2333 * need to remember current page, as it's the
2334 * source of the copy.
2335 */
2336 cur_m = m;
2337 m = vm_page_grab();
2338 if (m == VM_PAGE_NULL) {
2339 break;
2340 }
2341
2342 /*
2343 * Now do the copy. Mark the source busy
2344 * and take out paging references on both
2345 * objects.
2346 *
2347 * NOTE: This code holds the map lock across
2348 * the page copy.
2349 */
2350
2351 cur_m->busy = TRUE;
2352 vm_page_copy(cur_m, m);
2353 vm_page_insert(m, object, offset);
2354
2355 vm_object_paging_begin(cur_object);
2356 vm_object_paging_begin(object);
2357
2358 type_of_fault = DBG_COW_FAULT;
2359 VM_STAT(cow_faults++);
2360 current_task()->cow_faults++;
2361
2362 /*
2363 * Now cope with the source page and object
2364 * If the top object has a ref count of 1
2365 * then no other map can access it, and hence
2366 * it's not necessary to do the pmap_page_protect.
2367 */
2368
2369
2370 vm_page_lock_queues();
2371 vm_page_deactivate(cur_m);
2372 m->dirty = TRUE;
2373 pmap_page_protect(cur_m->phys_addr,
2374 VM_PROT_NONE);
2375 vm_page_unlock_queues();
2376
2377 PAGE_WAKEUP_DONE(cur_m);
2378 vm_object_paging_end(cur_object);
2379 vm_object_unlock(cur_object);
2380
2381 /*
2382 * Slight hack to call vm_object collapse
2383 * and then reuse common map in code.
2384 * note that the object lock was taken above.
2385 */
2386
2387 vm_object_paging_end(object);
2388 vm_object_collapse(object);
2389 vm_object_paging_begin(object);
2390 vm_object_unlock(object);
2391
2392 goto FastPmapEnter;
2393 }
2394 else {
2395
2396 /*
2397 * No page at cur_object, cur_offset
2398 */
2399
2400 if (cur_object->pager_created) {
2401
2402 /*
2403 * Have to talk to the pager. Give up.
2404 */
2405
2406 break;
2407 }
2408
2409
2410 if (cur_object->shadow == VM_OBJECT_NULL) {
2411
2412 if (cur_object->shadow_severed) {
2413 vm_object_paging_end(object);
2414 vm_object_unlock(object);
2415 vm_map_unlock_read(map);
2416 if(pmap_map != map)
2417 vm_map_unlock(pmap_map);
2418
2419 if (funnel_set) {
2420 thread_funnel_set( curflock, TRUE);
2421 funnel_set = FALSE;
2422 }
2423 cur_thread->interruptible = interruptible_state;
2424
2425 return VM_FAULT_MEMORY_ERROR;
2426 }
2427
2428 /*
2429 * Zero fill fault. Page gets
2430 * filled in top object. Insert
2431 * page, then drop any lower lock.
2432 * Give up if no page.
2433 */
2434 if ((vm_page_free_target -
2435 ((vm_page_free_target-vm_page_free_min)>>2))
2436 > vm_page_free_count) {
2437 break;
2438 }
2439 m = vm_page_alloc(object, offset);
2440 if (m == VM_PAGE_NULL) {
2441 break;
2442 }
2443 /*
2444 * This is a zero-fill or initial fill
2445 * page fault. As such, we consider it
2446 * undefined with respect to instruction
2447 * execution. i.e. it is the responsibility
2448 * of higher layers to call for an instruction
2449 * sync after changing the contents and before
2450 * sending a program into this area. We
2451 * choose this approach for performance
2452 */
2453
2454 m->no_isync = FALSE;
2455
2456 if (cur_object != object)
2457 vm_object_unlock(cur_object);
2458
2459 vm_object_paging_begin(object);
2460 vm_object_unlock(object);
2461
2462 /*
2463 * Now zero fill page and map it.
2464 * the page is probably going to
2465 * be written soon, so don't bother
2466 * to clear the modified bit
2467 *
2468 * NOTE: This code holds the map
2469 * lock across the zero fill.
2470 */
2471
2472 if (!map->no_zero_fill) {
2473 vm_page_zero_fill(m);
2474 type_of_fault = DBG_ZERO_FILL_FAULT;
2475 VM_STAT(zero_fill_count++);
2476 }
2477 vm_page_lock_queues();
2478 VM_PAGE_QUEUES_REMOVE(m);
2479
2480 m->page_ticket = vm_page_ticket;
2481 vm_page_ticket_roll++;
2482 if(vm_page_ticket_roll ==
2483 VM_PAGE_TICKETS_IN_ROLL) {
2484 vm_page_ticket_roll = 0;
2485 if(vm_page_ticket ==
2486 VM_PAGE_TICKET_ROLL_IDS)
2487 vm_page_ticket= 0;
2488 else
2489 vm_page_ticket++;
2490 }
2491
2492 queue_enter(&vm_page_queue_inactive,
2493 m, vm_page_t, pageq);
2494 m->inactive = TRUE;
2495 vm_page_inactive_count++;
2496 vm_page_unlock_queues();
2497 goto FastPmapEnter;
2498 }
2499
2500 /*
2501 * On to the next level
2502 */
2503
2504 cur_offset += cur_object->shadow_offset;
2505 new_object = cur_object->shadow;
2506 vm_object_lock(new_object);
2507 if (cur_object != object)
2508 vm_object_unlock(cur_object);
2509 cur_object = new_object;
2510
2511 continue;
2512 }
2513 }
2514
2515 /*
2516 * Cleanup from fast fault failure. Drop any object
2517 * lock other than original and drop map lock.
2518 */
2519
2520 if (object != cur_object)
2521 vm_object_unlock(cur_object);
2522 }
2523 vm_map_unlock_read(map);
2524 if(pmap_map != map)
2525 vm_map_unlock(pmap_map);
2526
2527 /*
2528 * Make a reference to this object to
2529 * prevent its disposal while we are messing with
2530 * it. Once we have the reference, the map is free
2531 * to be diddled. Since objects reference their
2532 * shadows (and copies), they will stay around as well.
2533 */
2534
2535 assert(object->ref_count > 0);
2536 object->ref_count++;
2537 vm_object_res_reference(object);
2538 vm_object_paging_begin(object);
2539
2540 XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2541 kr = vm_fault_page(object, offset, fault_type,
2542 (change_wiring && !wired),
2543 interruptible,
2544 lo_offset, hi_offset, behavior,
2545 &prot, &result_page, &top_page,
2546 &type_of_fault,
2547 &error_code, map->no_zero_fill, FALSE, map, vaddr);
2548
2549 /*
2550 * If we didn't succeed, lose the object reference immediately.
2551 */
2552
2553 if (kr != VM_FAULT_SUCCESS)
2554 vm_object_deallocate(object);
2555
2556 /*
2557 * See why we failed, and take corrective action.
2558 */
2559
2560 switch (kr) {
2561 case VM_FAULT_SUCCESS:
2562 break;
2563 case VM_FAULT_MEMORY_SHORTAGE:
2564 if (vm_page_wait((change_wiring) ?
2565 THREAD_UNINT :
2566 THREAD_ABORTSAFE))
2567 goto RetryFault;
2568 /* fall thru */
2569 case VM_FAULT_INTERRUPTED:
2570 kr = KERN_ABORTED;
2571 goto done;
2572 case VM_FAULT_RETRY:
2573 goto RetryFault;
2574 case VM_FAULT_FICTITIOUS_SHORTAGE:
2575 vm_page_more_fictitious();
2576 goto RetryFault;
2577 case VM_FAULT_MEMORY_ERROR:
2578 if (error_code)
2579 kr = error_code;
2580 else
2581 kr = KERN_MEMORY_ERROR;
2582 goto done;
2583 }
2584
2585 m = result_page;
2586
2587 if(m != VM_PAGE_NULL) {
2588 assert((change_wiring && !wired) ?
2589 (top_page == VM_PAGE_NULL) :
2590 ((top_page == VM_PAGE_NULL) == (m->object == object)));
2591 }
2592
2593 /*
2594 * How to clean up the result of vm_fault_page. This
2595 * happens whether the mapping is entered or not.
2596 */
2597
2598 #define UNLOCK_AND_DEALLOCATE \
2599 MACRO_BEGIN \
2600 vm_fault_cleanup(m->object, top_page); \
2601 vm_object_deallocate(object); \
2602 MACRO_END
2603
2604 /*
2605 * What to do with the resulting page from vm_fault_page
2606 * if it doesn't get entered into the physical map:
2607 */
2608
2609 #define RELEASE_PAGE(m) \
2610 MACRO_BEGIN \
2611 PAGE_WAKEUP_DONE(m); \
2612 vm_page_lock_queues(); \
2613 if (!m->active && !m->inactive) \
2614 vm_page_activate(m); \
2615 vm_page_unlock_queues(); \
2616 MACRO_END
2617
2618 /*
2619 * We must verify that the maps have not changed
2620 * since our last lookup.
2621 */
2622
2623 if(m != VM_PAGE_NULL) {
2624 old_copy_object = m->object->copy;
2625
2626 vm_object_unlock(m->object);
2627 } else {
2628 old_copy_object = VM_OBJECT_NULL;
2629 }
2630 if ((map != original_map) || !vm_map_verify(map, &version)) {
2631 vm_object_t retry_object;
2632 vm_object_offset_t retry_offset;
2633 vm_prot_t retry_prot;
2634
2635 /*
2636 * To avoid trying to write_lock the map while another
2637 * thread has it read_locked (in vm_map_pageable), we
2638 * do not try for write permission. If the page is
2639 * still writable, we will get write permission. If it
2640 * is not, or has been marked needs_copy, we enter the
2641 * mapping without write permission, and will merely
2642 * take another fault.
2643 */
2644 map = original_map;
2645 vm_map_lock_read(map);
2646 kr = vm_map_lookup_locked(&map, vaddr,
2647 fault_type & ~VM_PROT_WRITE, &version,
2648 &retry_object, &retry_offset, &retry_prot,
2649 &wired, &behavior, &lo_offset, &hi_offset,
2650 &pmap_map);
2651 pmap = pmap_map->pmap;
2652
2653 if (kr != KERN_SUCCESS) {
2654 vm_map_unlock_read(map);
2655 if(m != VM_PAGE_NULL) {
2656 vm_object_lock(m->object);
2657 RELEASE_PAGE(m);
2658 UNLOCK_AND_DEALLOCATE;
2659 } else {
2660 vm_object_deallocate(object);
2661 }
2662 goto done;
2663 }
2664
2665 vm_object_unlock(retry_object);
2666 if(m != VM_PAGE_NULL) {
2667 vm_object_lock(m->object);
2668 } else {
2669 vm_object_lock(object);
2670 }
2671
2672 if ((retry_object != object) ||
2673 (retry_offset != offset)) {
2674 vm_map_unlock_read(map);
2675 if(pmap_map != map)
2676 vm_map_unlock(pmap_map);
2677 if(m != VM_PAGE_NULL) {
2678 RELEASE_PAGE(m);
2679 UNLOCK_AND_DEALLOCATE;
2680 } else {
2681 vm_object_deallocate(object);
2682 }
2683 goto RetryFault;
2684 }
2685
2686 /*
2687 * Check whether the protection has changed or the object
2688 * has been copied while we left the map unlocked.
2689 */
2690 prot &= retry_prot;
2691 if(m != VM_PAGE_NULL) {
2692 vm_object_unlock(m->object);
2693 } else {
2694 vm_object_unlock(object);
2695 }
2696 }
2697 if(m != VM_PAGE_NULL) {
2698 vm_object_lock(m->object);
2699 } else {
2700 vm_object_lock(object);
2701 }
2702
2703 /*
2704 * If the copy object changed while the top-level object
2705 * was unlocked, then we must take away write permission.
2706 */
2707
2708 if(m != VM_PAGE_NULL) {
2709 if (m->object->copy != old_copy_object)
2710 prot &= ~VM_PROT_WRITE;
2711 }
2712
2713 /*
2714 * If we want to wire down this page, but no longer have
2715 * adequate permissions, we must start all over.
2716 */
2717
2718 if (wired && (fault_type != (prot|VM_PROT_WRITE))) {
2719 vm_map_verify_done(map, &version);
2720 if(pmap_map != map)
2721 vm_map_unlock(pmap_map);
2722 if(m != VM_PAGE_NULL) {
2723 RELEASE_PAGE(m);
2724 UNLOCK_AND_DEALLOCATE;
2725 } else {
2726 vm_object_deallocate(object);
2727 }
2728 goto RetryFault;
2729 }
2730
2731 /*
2732 * Put this page into the physical map.
2733 * We had to do the unlock above because pmap_enter
2734 * may cause other faults. The page may be on
2735 * the pageout queues. If the pageout daemon comes
2736 * across the page, it will remove it from the queues.
2737 */
2738 if(m != VM_PAGE_NULL) {
2739 if (m->no_isync) {
2740 m->no_isync = FALSE;
2741
2742 vm_object_unlock(m->object);
2743
2744 PMAP_ENTER(pmap, vaddr, m, prot, wired);
2745
2746 /*
2747 * It's critically important that a wired-down page be faulted
2748 * only once in each map for which it is wired.
2749 */
2750 /* Sync I & D caches for new mapping */
2751 pmap_attribute(pmap,
2752 vaddr,
2753 PAGE_SIZE,
2754 MATTR_CACHE,
2755 &mv_cache_sync);
2756 } else {
2757 vm_object_unlock(m->object);
2758
2759 PMAP_ENTER(pmap, vaddr, m, prot, wired);
2760 }
2761 {
2762 tws_hash_line_t line;
2763 task_t task;
2764
2765 task = current_task();
2766 if((map != NULL) &&
2767 (task->dynamic_working_set != 0)) {
2768 if(tws_lookup
2769 ((tws_hash_t)
2770 task->dynamic_working_set,
2771 m->offset, m->object,
2772 &line) != KERN_SUCCESS) {
2773 tws_insert((tws_hash_t)
2774 task->dynamic_working_set,
2775 m->offset, m->object,
2776 vaddr, pmap_map);
2777 if(tws_insert((tws_hash_t)
2778 task->dynamic_working_set,
2779 m->offset, m->object,
2780 vaddr, pmap_map)
2781 == KERN_NO_SPACE) {
2782 tws_expand_working_set(
2783 task->dynamic_working_set,
2784 TWS_HASH_LINE_COUNT);
2785 }
2786 }
2787 }
2788 }
2789 } else {
2790
2791 /* if __ppc__ not working until figure out phys copy on block maps */
2792 #ifdef notdefcdy
2793 int memattr;
2794 struct phys_entry *pp;
2795 /*
2796 * do a pmap block mapping from the physical address
2797 * in the object
2798 */
2799 if(pp = pmap_find_physentry(
2800 (vm_offset_t)object->shadow_offset)) {
2801 memattr = ((pp->pte1 & 0x00000078) >> 3);
2802 } else {
2803 memattr = PTE_WIMG_UNCACHED_COHERENT_GUARDED;
2804 }
2805
2806 pmap_map_block(pmap, vaddr,
2807 (vm_offset_t)object->shadow_offset,
2808 object->size, prot,
2809 memattr, 0); /* Set up a block mapped area */
2810 //#else
2811 vm_offset_t off;
2812 for (off = 0; off < object->size; off += page_size) {
2813 pmap_enter(pmap, vaddr + off,
2814 object->shadow_offset + off, prot, TRUE);
2815 /* Map it in */
2816 }
2817 #endif
2818
2819 }
2820
2821 /*
2822 * If the page is not wired down and isn't already
2823 * on a pageout queue, then put it where the
2824 * pageout daemon can find it.
2825 */
2826 if(m != VM_PAGE_NULL) {
2827 vm_object_lock(m->object);
2828 vm_page_lock_queues();
2829
2830 if (change_wiring) {
2831 if (wired)
2832 vm_page_wire(m);
2833 else
2834 vm_page_unwire(m);
2835 }
2836 #if VM_FAULT_STATIC_CONFIG
2837 else {
2838 if (!m->active && !m->inactive)
2839 vm_page_activate(m);
2840 m->reference = TRUE;
2841 }
2842 #else
2843 else if (software_reference_bits) {
2844 if (!m->active && !m->inactive)
2845 vm_page_activate(m);
2846 m->reference = TRUE;
2847 } else {
2848 vm_page_activate(m);
2849 }
2850 #endif
2851 vm_page_unlock_queues();
2852 }
2853
2854 /*
2855 * Unlock everything, and return
2856 */
2857
2858 vm_map_verify_done(map, &version);
2859 if(pmap_map != map)
2860 vm_map_unlock(pmap_map);
2861 if(m != VM_PAGE_NULL) {
2862 PAGE_WAKEUP_DONE(m);
2863 UNLOCK_AND_DEALLOCATE;
2864 } else {
2865 vm_fault_cleanup(object, top_page);
2866 vm_object_deallocate(object);
2867 }
2868 kr = KERN_SUCCESS;
2869
2870 #undef UNLOCK_AND_DEALLOCATE
2871 #undef RELEASE_PAGE
2872
2873 done:
2874 if (funnel_set) {
2875 thread_funnel_set( curflock, TRUE);
2876 funnel_set = FALSE;
2877 }
2878 cur_thread->interruptible = interruptible_state;
2879
2880 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2881 vaddr,
2882 type_of_fault,
2883 kr,
2884 0,
2885 0);
2886 return(kr);
2887 }
2888
2889 /*
2890 * vm_fault_wire:
2891 *
2892 * Wire down a range of virtual addresses in a map.
2893 */
2894 kern_return_t
2895 vm_fault_wire(
2896 vm_map_t map,
2897 vm_map_entry_t entry,
2898 pmap_t pmap)
2899 {
2900
2901 register vm_offset_t va;
2902 register vm_offset_t end_addr = entry->vme_end;
2903 register kern_return_t rc;
2904
2905 assert(entry->in_transition);
2906
2907 /*
2908 * Inform the physical mapping system that the
2909 * range of addresses may not fault, so that
2910 * page tables and such can be locked down as well.
2911 */
2912
2913 pmap_pageable(pmap, entry->vme_start, end_addr, FALSE);
2914
2915 /*
2916 * We simulate a fault to get the page and enter it
2917 * in the physical map.
2918 */
2919
2920 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
2921 if ((rc = vm_fault_wire_fast(
2922 map, va, entry, pmap)) != KERN_SUCCESS) {
2923 rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
2924 (pmap == kernel_pmap) ? THREAD_UNINT : THREAD_ABORTSAFE);
2925 }
2926
2927 if (rc != KERN_SUCCESS) {
2928 struct vm_map_entry tmp_entry = *entry;
2929
2930 /* unwire wired pages */
2931 tmp_entry.vme_end = va;
2932 vm_fault_unwire(map, &tmp_entry, FALSE, pmap);
2933
2934 return rc;
2935 }
2936 }
2937 return KERN_SUCCESS;
2938 }
2939
2940 /*
2941 * vm_fault_unwire:
2942 *
2943 * Unwire a range of virtual addresses in a map.
2944 */
2945 void
2946 vm_fault_unwire(
2947 vm_map_t map,
2948 vm_map_entry_t entry,
2949 boolean_t deallocate,
2950 pmap_t pmap)
2951 {
2952 register vm_offset_t va;
2953 register vm_offset_t end_addr = entry->vme_end;
2954 vm_object_t object;
2955
2956 object = (entry->is_sub_map)
2957 ? VM_OBJECT_NULL : entry->object.vm_object;
2958
2959 /*
2960 * Since the pages are wired down, we must be able to
2961 * get their mappings from the physical map system.
2962 */
2963
2964 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
2965 pmap_change_wiring(pmap, va, FALSE);
2966
2967 if (object == VM_OBJECT_NULL) {
2968 (void) vm_fault(map, va, VM_PROT_NONE, TRUE, THREAD_UNINT);
2969 } else {
2970 vm_prot_t prot;
2971 vm_page_t result_page;
2972 vm_page_t top_page;
2973 vm_object_t result_object;
2974 vm_fault_return_t result;
2975
2976 do {
2977 prot = VM_PROT_NONE;
2978
2979 vm_object_lock(object);
2980 vm_object_paging_begin(object);
2981 XPR(XPR_VM_FAULT,
2982 "vm_fault_unwire -> vm_fault_page\n",
2983 0,0,0,0,0);
2984 result = vm_fault_page(object,
2985 entry->offset +
2986 (va - entry->vme_start),
2987 VM_PROT_NONE, TRUE,
2988 THREAD_UNINT,
2989 entry->offset,
2990 entry->offset +
2991 (entry->vme_end
2992 - entry->vme_start),
2993 entry->behavior,
2994 &prot,
2995 &result_page,
2996 &top_page,
2997 (int *)0,
2998 0, map->no_zero_fill,
2999 FALSE, NULL, 0);
3000 } while (result == VM_FAULT_RETRY);
3001
3002 if (result != VM_FAULT_SUCCESS)
3003 panic("vm_fault_unwire: failure");
3004
3005 result_object = result_page->object;
3006 if (deallocate) {
3007 assert(!result_page->fictitious);
3008 pmap_page_protect(result_page->phys_addr,
3009 VM_PROT_NONE);
3010 VM_PAGE_FREE(result_page);
3011 } else {
3012 vm_page_lock_queues();
3013 vm_page_unwire(result_page);
3014 vm_page_unlock_queues();
3015 PAGE_WAKEUP_DONE(result_page);
3016 }
3017
3018 vm_fault_cleanup(result_object, top_page);
3019 }
3020 }
3021
3022 /*
3023 * Inform the physical mapping system that the range
3024 * of addresses may fault, so that page tables and
3025 * such may be unwired themselves.
3026 */
3027
3028 pmap_pageable(pmap, entry->vme_start, end_addr, TRUE);
3029
3030 }
3031
3032 /*
3033 * vm_fault_wire_fast:
3034 *
3035 * Handle common case of a wire down page fault at the given address.
3036 * If successful, the page is inserted into the associated physical map.
3037 * The map entry is passed in to avoid the overhead of a map lookup.
3038 *
3039 * NOTE: the given address should be truncated to the
3040 * proper page address.
3041 *
3042 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3043 * a standard error specifying why the fault is fatal is returned.
3044 *
3045 * The map in question must be referenced, and remains so.
3046 * Caller has a read lock on the map.
3047 *
3048 * This is a stripped version of vm_fault() for wiring pages. Anything
3049 * other than the common case will return KERN_FAILURE, and the caller
3050 * is expected to call vm_fault().
3051 */
3052 kern_return_t
3053 vm_fault_wire_fast(
3054 vm_map_t map,
3055 vm_offset_t va,
3056 vm_map_entry_t entry,
3057 pmap_t pmap)
3058 {
3059 vm_object_t object;
3060 vm_object_offset_t offset;
3061 register vm_page_t m;
3062 vm_prot_t prot;
3063 thread_act_t thr_act;
3064
3065 VM_STAT(faults++);
3066
3067 if((thr_act=current_act()) && (thr_act->task != TASK_NULL))
3068 thr_act->task->faults++;
3069
3070 /*
3071 * Recovery actions
3072 */
3073
3074 #undef RELEASE_PAGE
3075 #define RELEASE_PAGE(m) { \
3076 PAGE_WAKEUP_DONE(m); \
3077 vm_page_lock_queues(); \
3078 vm_page_unwire(m); \
3079 vm_page_unlock_queues(); \
3080 }
3081
3082
3083 #undef UNLOCK_THINGS
3084 #define UNLOCK_THINGS { \
3085 object->paging_in_progress--; \
3086 vm_object_unlock(object); \
3087 }
3088
3089 #undef UNLOCK_AND_DEALLOCATE
3090 #define UNLOCK_AND_DEALLOCATE { \
3091 UNLOCK_THINGS; \
3092 vm_object_deallocate(object); \
3093 }
3094 /*
3095 * Give up and have caller do things the hard way.
3096 */
3097
3098 #define GIVE_UP { \
3099 UNLOCK_AND_DEALLOCATE; \
3100 return(KERN_FAILURE); \
3101 }
3102
3103
3104 /*
3105 * If this entry is not directly to a vm_object, bail out.
3106 */
3107 if (entry->is_sub_map)
3108 return(KERN_FAILURE);
3109
3110 /*
3111 * Find the backing store object and offset into it.
3112 */
3113
3114 object = entry->object.vm_object;
3115 offset = (va - entry->vme_start) + entry->offset;
3116 prot = entry->protection;
3117
3118 /*
3119 * Make a reference to this object to prevent its
3120 * disposal while we are messing with it.
3121 */
3122
3123 vm_object_lock(object);
3124 assert(object->ref_count > 0);
3125 object->ref_count++;
3126 vm_object_res_reference(object);
3127 object->paging_in_progress++;
3128
3129 /*
3130 * INVARIANTS (through entire routine):
3131 *
3132 * 1) At all times, we must either have the object
3133 * lock or a busy page in some object to prevent
3134 * some other thread from trying to bring in
3135 * the same page.
3136 *
3137 * 2) Once we have a busy page, we must remove it from
3138 * the pageout queues, so that the pageout daemon
3139 * will not grab it away.
3140 *
3141 */
3142
3143 /*
3144 * Look for page in top-level object. If it's not there or
3145 * there's something going on, give up.
3146 */
3147 m = vm_page_lookup(object, offset);
3148 if ((m == VM_PAGE_NULL) || (m->busy) ||
3149 (m->unusual && ( m->error || m->restart || m->absent ||
3150 prot & m->page_lock))) {
3151
3152 GIVE_UP;
3153 }
3154
3155 /*
3156 * Wire the page down now. All bail outs beyond this
3157 * point must unwire the page.
3158 */
3159
3160 vm_page_lock_queues();
3161 vm_page_wire(m);
3162 vm_page_unlock_queues();
3163
3164 /*
3165 * Mark page busy for other threads.
3166 */
3167 assert(!m->busy);
3168 m->busy = TRUE;
3169 assert(!m->absent);
3170
3171 /*
3172 * Give up if the page is being written and there's a copy object
3173 */
3174 if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
3175 RELEASE_PAGE(m);
3176 GIVE_UP;
3177 }
3178
3179 /*
3180 * Put this page into the physical map.
3181 * We have to unlock the object because pmap_enter
3182 * may cause other faults.
3183 */
3184 if (m->no_isync) {
3185 m->no_isync = FALSE;
3186
3187 vm_object_unlock(object);
3188
3189 PMAP_ENTER(pmap, va, m, prot, TRUE);
3190
3191 /* Sync I & D caches for new mapping */
3192 pmap_attribute(pmap,
3193 va,
3194 PAGE_SIZE,
3195 MATTR_CACHE,
3196 &mv_cache_sync);
3197
3198 } else {
3199 vm_object_unlock(object);
3200
3201 PMAP_ENTER(pmap, va, m, prot, TRUE);
3202 }
3203
3204 /*
3205 * Must relock object so that paging_in_progress can be cleared.
3206 */
3207 vm_object_lock(object);
3208
3209 /*
3210 * Unlock everything, and return
3211 */
3212
3213 PAGE_WAKEUP_DONE(m);
3214 UNLOCK_AND_DEALLOCATE;
3215
3216 return(KERN_SUCCESS);
3217
3218 }
3219
3220 /*
3221 * Routine: vm_fault_copy_cleanup
3222 * Purpose:
3223 * Release a page used by vm_fault_copy.
3224 */
3225
3226 void
3227 vm_fault_copy_cleanup(
3228 vm_page_t page,
3229 vm_page_t top_page)
3230 {
3231 vm_object_t object = page->object;
3232
3233 vm_object_lock(object);
3234 PAGE_WAKEUP_DONE(page);
3235 vm_page_lock_queues();
3236 if (!page->active && !page->inactive)
3237 vm_page_activate(page);
3238 vm_page_unlock_queues();
3239 vm_fault_cleanup(object, top_page);
3240 }
3241
3242 void
3243 vm_fault_copy_dst_cleanup(
3244 vm_page_t page)
3245 {
3246 vm_object_t object;
3247
3248 if (page != VM_PAGE_NULL) {
3249 object = page->object;
3250 vm_object_lock(object);
3251 vm_page_lock_queues();
3252 vm_page_unwire(page);
3253 vm_page_unlock_queues();
3254 vm_object_paging_end(object);
3255 vm_object_unlock(object);
3256 }
3257 }
3258
3259 /*
3260 * Routine: vm_fault_copy
3261 *
3262 * Purpose:
3263 * Copy pages from one virtual memory object to another --
3264 * neither the source nor destination pages need be resident.
3265 *
3266 * Before actually copying a page, the version associated with
3267 * the destination address map wil be verified.
3268 *
3269 * In/out conditions:
3270 * The caller must hold a reference, but not a lock, to
3271 * each of the source and destination objects and to the
3272 * destination map.
3273 *
3274 * Results:
3275 * Returns KERN_SUCCESS if no errors were encountered in
3276 * reading or writing the data. Returns KERN_INTERRUPTED if
3277 * the operation was interrupted (only possible if the
3278 * "interruptible" argument is asserted). Other return values
3279 * indicate a permanent error in copying the data.
3280 *
3281 * The actual amount of data copied will be returned in the
3282 * "copy_size" argument. In the event that the destination map
3283 * verification failed, this amount may be less than the amount
3284 * requested.
3285 */
3286 kern_return_t
3287 vm_fault_copy(
3288 vm_object_t src_object,
3289 vm_object_offset_t src_offset,
3290 vm_size_t *src_size, /* INOUT */
3291 vm_object_t dst_object,
3292 vm_object_offset_t dst_offset,
3293 vm_map_t dst_map,
3294 vm_map_version_t *dst_version,
3295 int interruptible)
3296 {
3297 vm_page_t result_page;
3298
3299 vm_page_t src_page;
3300 vm_page_t src_top_page;
3301 vm_prot_t src_prot;
3302
3303 vm_page_t dst_page;
3304 vm_page_t dst_top_page;
3305 vm_prot_t dst_prot;
3306
3307 vm_size_t amount_left;
3308 vm_object_t old_copy_object;
3309 kern_return_t error = 0;
3310
3311 vm_size_t part_size;
3312
3313 /*
3314 * In order not to confuse the clustered pageins, align
3315 * the different offsets on a page boundary.
3316 */
3317 vm_object_offset_t src_lo_offset = trunc_page_64(src_offset);
3318 vm_object_offset_t dst_lo_offset = trunc_page_64(dst_offset);
3319 vm_object_offset_t src_hi_offset = round_page_64(src_offset + *src_size);
3320 vm_object_offset_t dst_hi_offset = round_page_64(dst_offset + *src_size);
3321
3322 #define RETURN(x) \
3323 MACRO_BEGIN \
3324 *src_size -= amount_left; \
3325 MACRO_RETURN(x); \
3326 MACRO_END
3327
3328 amount_left = *src_size;
3329 do { /* while (amount_left > 0) */
3330 /*
3331 * There may be a deadlock if both source and destination
3332 * pages are the same. To avoid this deadlock, the copy must
3333 * start by getting the destination page in order to apply
3334 * COW semantics if any.
3335 */
3336
3337 RetryDestinationFault: ;
3338
3339 dst_prot = VM_PROT_WRITE|VM_PROT_READ;
3340
3341 vm_object_lock(dst_object);
3342 vm_object_paging_begin(dst_object);
3343
3344 XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3345 switch (vm_fault_page(dst_object,
3346 trunc_page_64(dst_offset),
3347 VM_PROT_WRITE|VM_PROT_READ,
3348 FALSE,
3349 interruptible,
3350 dst_lo_offset,
3351 dst_hi_offset,
3352 VM_BEHAVIOR_SEQUENTIAL,
3353 &dst_prot,
3354 &dst_page,
3355 &dst_top_page,
3356 (int *)0,
3357 &error,
3358 dst_map->no_zero_fill,
3359 FALSE, NULL, 0)) {
3360 case VM_FAULT_SUCCESS:
3361 break;
3362 case VM_FAULT_RETRY:
3363 goto RetryDestinationFault;
3364 case VM_FAULT_MEMORY_SHORTAGE:
3365 if (vm_page_wait(interruptible))
3366 goto RetryDestinationFault;
3367 /* fall thru */
3368 case VM_FAULT_INTERRUPTED:
3369 RETURN(MACH_SEND_INTERRUPTED);
3370 case VM_FAULT_FICTITIOUS_SHORTAGE:
3371 vm_page_more_fictitious();
3372 goto RetryDestinationFault;
3373 case VM_FAULT_MEMORY_ERROR:
3374 if (error)
3375 return (error);
3376 else
3377 return(KERN_MEMORY_ERROR);
3378 }
3379 assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
3380
3381 old_copy_object = dst_page->object->copy;
3382
3383 /*
3384 * There exists the possiblity that the source and
3385 * destination page are the same. But we can't
3386 * easily determine that now. If they are the
3387 * same, the call to vm_fault_page() for the
3388 * destination page will deadlock. To prevent this we
3389 * wire the page so we can drop busy without having
3390 * the page daemon steal the page. We clean up the
3391 * top page but keep the paging reference on the object
3392 * holding the dest page so it doesn't go away.
3393 */
3394
3395 vm_page_lock_queues();
3396 vm_page_wire(dst_page);
3397 vm_page_unlock_queues();
3398 PAGE_WAKEUP_DONE(dst_page);
3399 vm_object_unlock(dst_page->object);
3400
3401 if (dst_top_page != VM_PAGE_NULL) {
3402 vm_object_lock(dst_object);
3403 VM_PAGE_FREE(dst_top_page);
3404 vm_object_paging_end(dst_object);
3405 vm_object_unlock(dst_object);
3406 }
3407
3408 RetrySourceFault: ;
3409
3410 if (src_object == VM_OBJECT_NULL) {
3411 /*
3412 * No source object. We will just
3413 * zero-fill the page in dst_object.
3414 */
3415 src_page = VM_PAGE_NULL;
3416 result_page = VM_PAGE_NULL;
3417 } else {
3418 vm_object_lock(src_object);
3419 src_page = vm_page_lookup(src_object,
3420 trunc_page_64(src_offset));
3421 if (src_page == dst_page) {
3422 src_prot = dst_prot;
3423 result_page = VM_PAGE_NULL;
3424 } else {
3425 src_prot = VM_PROT_READ;
3426 vm_object_paging_begin(src_object);
3427
3428 XPR(XPR_VM_FAULT,
3429 "vm_fault_copy(2) -> vm_fault_page\n",
3430 0,0,0,0,0);
3431 switch (vm_fault_page(src_object,
3432 trunc_page_64(src_offset),
3433 VM_PROT_READ,
3434 FALSE,
3435 interruptible,
3436 src_lo_offset,
3437 src_hi_offset,
3438 VM_BEHAVIOR_SEQUENTIAL,
3439 &src_prot,
3440 &result_page,
3441 &src_top_page,
3442 (int *)0,
3443 &error,
3444 FALSE,
3445 FALSE, NULL, 0)) {
3446
3447 case VM_FAULT_SUCCESS:
3448 break;
3449 case VM_FAULT_RETRY:
3450 goto RetrySourceFault;
3451 case VM_FAULT_MEMORY_SHORTAGE:
3452 if (vm_page_wait(interruptible))
3453 goto RetrySourceFault;
3454 /* fall thru */
3455 case VM_FAULT_INTERRUPTED:
3456 vm_fault_copy_dst_cleanup(dst_page);
3457 RETURN(MACH_SEND_INTERRUPTED);
3458 case VM_FAULT_FICTITIOUS_SHORTAGE:
3459 vm_page_more_fictitious();
3460 goto RetrySourceFault;
3461 case VM_FAULT_MEMORY_ERROR:
3462 vm_fault_copy_dst_cleanup(dst_page);
3463 if (error)
3464 return (error);
3465 else
3466 return(KERN_MEMORY_ERROR);
3467 }
3468
3469
3470 assert((src_top_page == VM_PAGE_NULL) ==
3471 (result_page->object == src_object));
3472 }
3473 assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
3474 vm_object_unlock(result_page->object);
3475 }
3476
3477 if (!vm_map_verify(dst_map, dst_version)) {
3478 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3479 vm_fault_copy_cleanup(result_page, src_top_page);
3480 vm_fault_copy_dst_cleanup(dst_page);
3481 break;
3482 }
3483
3484 vm_object_lock(dst_page->object);
3485
3486 if (dst_page->object->copy != old_copy_object) {
3487 vm_object_unlock(dst_page->object);
3488 vm_map_verify_done(dst_map, dst_version);
3489 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3490 vm_fault_copy_cleanup(result_page, src_top_page);
3491 vm_fault_copy_dst_cleanup(dst_page);
3492 break;
3493 }
3494 vm_object_unlock(dst_page->object);
3495
3496 /*
3497 * Copy the page, and note that it is dirty
3498 * immediately.
3499 */
3500
3501 if (!page_aligned(src_offset) ||
3502 !page_aligned(dst_offset) ||
3503 !page_aligned(amount_left)) {
3504
3505 vm_object_offset_t src_po,
3506 dst_po;
3507
3508 src_po = src_offset - trunc_page_64(src_offset);
3509 dst_po = dst_offset - trunc_page_64(dst_offset);
3510
3511 if (dst_po > src_po) {
3512 part_size = PAGE_SIZE - dst_po;
3513 } else {
3514 part_size = PAGE_SIZE - src_po;
3515 }
3516 if (part_size > (amount_left)){
3517 part_size = amount_left;
3518 }
3519
3520 if (result_page == VM_PAGE_NULL) {
3521 vm_page_part_zero_fill(dst_page,
3522 dst_po, part_size);
3523 } else {
3524 vm_page_part_copy(result_page, src_po,
3525 dst_page, dst_po, part_size);
3526 if(!dst_page->dirty){
3527 vm_object_lock(dst_object);
3528 dst_page->dirty = TRUE;
3529 vm_object_unlock(dst_page->object);
3530 }
3531
3532 }
3533 } else {
3534 part_size = PAGE_SIZE;
3535
3536 if (result_page == VM_PAGE_NULL)
3537 vm_page_zero_fill(dst_page);
3538 else{
3539 vm_page_copy(result_page, dst_page);
3540 if(!dst_page->dirty){
3541 vm_object_lock(dst_object);
3542 dst_page->dirty = TRUE;
3543 vm_object_unlock(dst_page->object);
3544 }
3545 }
3546
3547 }
3548
3549 /*
3550 * Unlock everything, and return
3551 */
3552
3553 vm_map_verify_done(dst_map, dst_version);
3554
3555 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3556 vm_fault_copy_cleanup(result_page, src_top_page);
3557 vm_fault_copy_dst_cleanup(dst_page);
3558
3559 amount_left -= part_size;
3560 src_offset += part_size;
3561 dst_offset += part_size;
3562 } while (amount_left > 0);
3563
3564 RETURN(KERN_SUCCESS);
3565 #undef RETURN
3566
3567 /*NOTREACHED*/
3568 }
3569
3570 #ifdef notdef
3571
3572 /*
3573 * Routine: vm_fault_page_overwrite
3574 *
3575 * Description:
3576 * A form of vm_fault_page that assumes that the
3577 * resulting page will be overwritten in its entirety,
3578 * making it unnecessary to obtain the correct *contents*
3579 * of the page.
3580 *
3581 * Implementation:
3582 * XXX Untested. Also unused. Eventually, this technology
3583 * could be used in vm_fault_copy() to advantage.
3584 */
3585 vm_fault_return_t
3586 vm_fault_page_overwrite(
3587 register
3588 vm_object_t dst_object,
3589 vm_object_offset_t dst_offset,
3590 vm_page_t *result_page) /* OUT */
3591 {
3592 register
3593 vm_page_t dst_page;
3594 kern_return_t wait_result;
3595
3596 #define interruptible THREAD_UNINT /* XXX */
3597
3598 while (TRUE) {
3599 /*
3600 * Look for a page at this offset
3601 */
3602
3603 while ((dst_page = vm_page_lookup(dst_object, dst_offset))
3604 == VM_PAGE_NULL) {
3605 /*
3606 * No page, no problem... just allocate one.
3607 */
3608
3609 dst_page = vm_page_alloc(dst_object, dst_offset);
3610 if (dst_page == VM_PAGE_NULL) {
3611 vm_object_unlock(dst_object);
3612 VM_PAGE_WAIT();
3613 vm_object_lock(dst_object);
3614 continue;
3615 }
3616
3617 /*
3618 * Pretend that the memory manager
3619 * write-protected the page.
3620 *
3621 * Note that we will be asking for write
3622 * permission without asking for the data
3623 * first.
3624 */
3625
3626 dst_page->overwriting = TRUE;
3627 dst_page->page_lock = VM_PROT_WRITE;
3628 dst_page->absent = TRUE;
3629 dst_page->unusual = TRUE;
3630 dst_object->absent_count++;
3631
3632 break;
3633
3634 /*
3635 * When we bail out, we might have to throw
3636 * away the page created here.
3637 */
3638
3639 #define DISCARD_PAGE \
3640 MACRO_BEGIN \
3641 vm_object_lock(dst_object); \
3642 dst_page = vm_page_lookup(dst_object, dst_offset); \
3643 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3644 VM_PAGE_FREE(dst_page); \
3645 vm_object_unlock(dst_object); \
3646 MACRO_END
3647 }
3648
3649 /*
3650 * If the page is write-protected...
3651 */
3652
3653 if (dst_page->page_lock & VM_PROT_WRITE) {
3654 /*
3655 * ... and an unlock request hasn't been sent
3656 */
3657
3658 if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
3659 vm_prot_t u;
3660 kern_return_t rc;
3661
3662 /*
3663 * ... then send one now.
3664 */
3665
3666 if (!dst_object->pager_ready) {
3667 vm_object_assert_wait(dst_object,
3668 VM_OBJECT_EVENT_PAGER_READY,
3669 interruptible);
3670 vm_object_unlock(dst_object);
3671 wait_result = thread_block((void (*)(void))0);
3672 if (wait_result != THREAD_AWAKENED) {
3673 DISCARD_PAGE;
3674 return(VM_FAULT_INTERRUPTED);
3675 }
3676 continue;
3677 }
3678
3679 u = dst_page->unlock_request |= VM_PROT_WRITE;
3680 vm_object_unlock(dst_object);
3681
3682 if ((rc = memory_object_data_unlock(
3683 dst_object->pager,
3684 dst_offset + dst_object->paging_offset,
3685 PAGE_SIZE,
3686 u)) != KERN_SUCCESS) {
3687 if (vm_fault_debug)
3688 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3689 DISCARD_PAGE;
3690 return((rc == MACH_SEND_INTERRUPTED) ?
3691 VM_FAULT_INTERRUPTED :
3692 VM_FAULT_MEMORY_ERROR);
3693 }
3694 vm_object_lock(dst_object);
3695 continue;
3696 }
3697
3698 /* ... fall through to wait below */
3699 } else {
3700 /*
3701 * If the page isn't being used for other
3702 * purposes, then we're done.
3703 */
3704 if ( ! (dst_page->busy || dst_page->absent ||
3705 dst_page->error || dst_page->restart) )
3706 break;
3707 }
3708
3709 PAGE_ASSERT_WAIT(dst_page, interruptible);
3710 vm_object_unlock(dst_object);
3711 wait_result = thread_block((void (*)(void))0);
3712 if (wait_result != THREAD_AWAKENED) {
3713 DISCARD_PAGE;
3714 return(VM_FAULT_INTERRUPTED);
3715 }
3716 }
3717
3718 *result_page = dst_page;
3719 return(VM_FAULT_SUCCESS);
3720
3721 #undef interruptible
3722 #undef DISCARD_PAGE
3723 }
3724
3725 #endif /* notdef */
3726
3727 #if VM_FAULT_CLASSIFY
3728 /*
3729 * Temporary statistics gathering support.
3730 */
3731
3732 /*
3733 * Statistics arrays:
3734 */
3735 #define VM_FAULT_TYPES_MAX 5
3736 #define VM_FAULT_LEVEL_MAX 8
3737
3738 int vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
3739
3740 #define VM_FAULT_TYPE_ZERO_FILL 0
3741 #define VM_FAULT_TYPE_MAP_IN 1
3742 #define VM_FAULT_TYPE_PAGER 2
3743 #define VM_FAULT_TYPE_COPY 3
3744 #define VM_FAULT_TYPE_OTHER 4
3745
3746
3747 void
3748 vm_fault_classify(vm_object_t object,
3749 vm_object_offset_t offset,
3750 vm_prot_t fault_type)
3751 {
3752 int type, level = 0;
3753 vm_page_t m;
3754
3755 while (TRUE) {
3756 m = vm_page_lookup(object, offset);
3757 if (m != VM_PAGE_NULL) {
3758 if (m->busy || m->error || m->restart || m->absent ||
3759 fault_type & m->page_lock) {
3760 type = VM_FAULT_TYPE_OTHER;
3761 break;
3762 }
3763 if (((fault_type & VM_PROT_WRITE) == 0) ||
3764 ((level == 0) && object->copy == VM_OBJECT_NULL)) {
3765 type = VM_FAULT_TYPE_MAP_IN;
3766 break;
3767 }
3768 type = VM_FAULT_TYPE_COPY;
3769 break;
3770 }
3771 else {
3772 if (object->pager_created) {
3773 type = VM_FAULT_TYPE_PAGER;
3774 break;
3775 }
3776 if (object->shadow == VM_OBJECT_NULL) {
3777 type = VM_FAULT_TYPE_ZERO_FILL;
3778 break;
3779 }
3780
3781 offset += object->shadow_offset;
3782 object = object->shadow;
3783 level++;
3784 continue;
3785 }
3786 }
3787
3788 if (level > VM_FAULT_LEVEL_MAX)
3789 level = VM_FAULT_LEVEL_MAX;
3790
3791 vm_fault_stats[type][level] += 1;
3792
3793 return;
3794 }
3795
3796 /* cleanup routine to call from debugger */
3797
3798 void
3799 vm_fault_classify_init(void)
3800 {
3801 int type, level;
3802
3803 for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
3804 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
3805 vm_fault_stats[type][level] = 0;
3806 }
3807 }
3808
3809 return;
3810 }
3811 #endif /* VM_FAULT_CLASSIFY */