]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_fault.c
0958f3cf09452218cca63a34b081252a536d9f07
[apple/xnu.git] / osfmk / vm / vm_fault.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /*
26 * @OSF_COPYRIGHT@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
31 * All Rights Reserved.
32 *
33 * Permission to use, copy, modify and distribute this software and its
34 * documentation is hereby granted, provided that both the copyright
35 * notice and this permission notice appear in all copies of the
36 * software, derivative works or modified versions, and any portions
37 * thereof, and that both notices appear in supporting documentation.
38 *
39 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
40 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
41 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
42 *
43 * Carnegie Mellon requests users of this software to return to
44 *
45 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
46 * School of Computer Science
47 * Carnegie Mellon University
48 * Pittsburgh PA 15213-3890
49 *
50 * any improvements or extensions that they make and grant Carnegie Mellon
51 * the rights to redistribute these changes.
52 */
53 /*
54 */
55 /*
56 * File: vm_fault.c
57 * Author: Avadis Tevanian, Jr., Michael Wayne Young
58 *
59 * Page fault handling module.
60 */
61 #ifdef MACH_BSD
62 /* remove after component interface available */
63 extern int vnode_pager_workaround;
64 extern int device_pager_workaround;
65 #endif
66
67 #include <mach_cluster_stats.h>
68 #include <mach_pagemap.h>
69 #include <mach_kdb.h>
70
71 #include <vm/vm_fault.h>
72 #include <mach/kern_return.h>
73 #include <mach/message.h> /* for error codes */
74 #include <kern/host_statistics.h>
75 #include <kern/counters.h>
76 #include <kern/task.h>
77 #include <kern/thread.h>
78 #include <kern/sched_prim.h>
79 #include <kern/host.h>
80 #include <kern/xpr.h>
81 #include <ppc/proc_reg.h>
82 #include <vm/task_working_set.h>
83 #include <vm/vm_map.h>
84 #include <vm/vm_object.h>
85 #include <vm/vm_page.h>
86 #include <vm/pmap.h>
87 #include <vm/vm_pageout.h>
88 #include <mach/vm_param.h>
89 #include <mach/vm_behavior.h>
90 #include <mach/memory_object.h>
91 /* For memory_object_data_{request,unlock} */
92 #include <kern/mach_param.h>
93 #include <kern/macro_help.h>
94 #include <kern/zalloc.h>
95 #include <kern/misc_protos.h>
96
97 #include <sys/kdebug.h>
98
99 #define VM_FAULT_CLASSIFY 0
100 #define VM_FAULT_STATIC_CONFIG 1
101
102 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
103
104 int vm_object_absent_max = 50;
105
106 int vm_fault_debug = 0;
107 boolean_t vm_page_deactivate_behind = TRUE;
108
109
110 #if !VM_FAULT_STATIC_CONFIG
111 boolean_t vm_fault_dirty_handling = FALSE;
112 boolean_t vm_fault_interruptible = FALSE;
113 boolean_t software_reference_bits = TRUE;
114 #endif
115
116 #if MACH_KDB
117 extern struct db_watchpoint *db_watchpoint_list;
118 #endif /* MACH_KDB */
119
120 /* Forward declarations of internal routines. */
121 extern kern_return_t vm_fault_wire_fast(
122 vm_map_t map,
123 vm_offset_t va,
124 vm_map_entry_t entry,
125 pmap_t pmap,
126 vm_offset_t pmap_addr);
127
128 extern void vm_fault_continue(void);
129
130 extern void vm_fault_copy_cleanup(
131 vm_page_t page,
132 vm_page_t top_page);
133
134 extern void vm_fault_copy_dst_cleanup(
135 vm_page_t page);
136
137 #if VM_FAULT_CLASSIFY
138 extern void vm_fault_classify(vm_object_t object,
139 vm_object_offset_t offset,
140 vm_prot_t fault_type);
141
142 extern void vm_fault_classify_init(void);
143 #endif
144
145 /*
146 * Routine: vm_fault_init
147 * Purpose:
148 * Initialize our private data structures.
149 */
150 void
151 vm_fault_init(void)
152 {
153 }
154
155 /*
156 * Routine: vm_fault_cleanup
157 * Purpose:
158 * Clean up the result of vm_fault_page.
159 * Results:
160 * The paging reference for "object" is released.
161 * "object" is unlocked.
162 * If "top_page" is not null, "top_page" is
163 * freed and the paging reference for the object
164 * containing it is released.
165 *
166 * In/out conditions:
167 * "object" must be locked.
168 */
169 void
170 vm_fault_cleanup(
171 register vm_object_t object,
172 register vm_page_t top_page)
173 {
174 vm_object_paging_end(object);
175 vm_object_unlock(object);
176
177 if (top_page != VM_PAGE_NULL) {
178 object = top_page->object;
179 vm_object_lock(object);
180 VM_PAGE_FREE(top_page);
181 vm_object_paging_end(object);
182 vm_object_unlock(object);
183 }
184 }
185
186 #if MACH_CLUSTER_STATS
187 #define MAXCLUSTERPAGES 16
188 struct {
189 unsigned long pages_in_cluster;
190 unsigned long pages_at_higher_offsets;
191 unsigned long pages_at_lower_offsets;
192 } cluster_stats_in[MAXCLUSTERPAGES];
193 #define CLUSTER_STAT(clause) clause
194 #define CLUSTER_STAT_HIGHER(x) \
195 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
196 #define CLUSTER_STAT_LOWER(x) \
197 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
198 #define CLUSTER_STAT_CLUSTER(x) \
199 ((cluster_stats_in[(x)].pages_in_cluster)++)
200 #else /* MACH_CLUSTER_STATS */
201 #define CLUSTER_STAT(clause)
202 #endif /* MACH_CLUSTER_STATS */
203
204 /* XXX - temporary */
205 boolean_t vm_allow_clustered_pagein = FALSE;
206 int vm_pagein_cluster_used = 0;
207
208 /*
209 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
210 */
211 int vm_default_ahead = 1; /* Number of pages to prepage ahead */
212 int vm_default_behind = 0; /* Number of pages to prepage behind */
213
214 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
215
216 /*
217 * Routine: vm_fault_page
218 * Purpose:
219 * Find the resident page for the virtual memory
220 * specified by the given virtual memory object
221 * and offset.
222 * Additional arguments:
223 * The required permissions for the page is given
224 * in "fault_type". Desired permissions are included
225 * in "protection". The minimum and maximum valid offsets
226 * within the object for the relevant map entry are
227 * passed in "lo_offset" and "hi_offset" respectively and
228 * the expected page reference pattern is passed in "behavior".
229 * These three parameters are used to determine pagein cluster
230 * limits.
231 *
232 * If the desired page is known to be resident (for
233 * example, because it was previously wired down), asserting
234 * the "unwiring" parameter will speed the search.
235 *
236 * If the operation can be interrupted (by thread_abort
237 * or thread_terminate), then the "interruptible"
238 * parameter should be asserted.
239 *
240 * Results:
241 * The page containing the proper data is returned
242 * in "result_page".
243 *
244 * In/out conditions:
245 * The source object must be locked and referenced,
246 * and must donate one paging reference. The reference
247 * is not affected. The paging reference and lock are
248 * consumed.
249 *
250 * If the call succeeds, the object in which "result_page"
251 * resides is left locked and holding a paging reference.
252 * If this is not the original object, a busy page in the
253 * original object is returned in "top_page", to prevent other
254 * callers from pursuing this same data, along with a paging
255 * reference for the original object. The "top_page" should
256 * be destroyed when this guarantee is no longer required.
257 * The "result_page" is also left busy. It is not removed
258 * from the pageout queues.
259 */
260
261 vm_fault_return_t
262 vm_fault_page(
263 /* Arguments: */
264 vm_object_t first_object, /* Object to begin search */
265 vm_object_offset_t first_offset, /* Offset into object */
266 vm_prot_t fault_type, /* What access is requested */
267 boolean_t must_be_resident,/* Must page be resident? */
268 int interruptible, /* how may fault be interrupted? */
269 vm_object_offset_t lo_offset, /* Map entry start */
270 vm_object_offset_t hi_offset, /* Map entry end */
271 vm_behavior_t behavior, /* Page reference behavior */
272 /* Modifies in place: */
273 vm_prot_t *protection, /* Protection for mapping */
274 /* Returns: */
275 vm_page_t *result_page, /* Page found, if successful */
276 vm_page_t *top_page, /* Page in top object, if
277 * not result_page. */
278 int *type_of_fault, /* if non-null, fill in with type of fault
279 * COW, zero-fill, etc... returned in trace point */
280 /* More arguments: */
281 kern_return_t *error_code, /* code if page is in error */
282 boolean_t no_zero_fill, /* don't zero fill absent pages */
283 boolean_t data_supply, /* treat as data_supply if
284 * it is a write fault and a full
285 * page is provided */
286 vm_map_t map,
287 vm_offset_t vaddr)
288 {
289 register
290 vm_page_t m;
291 register
292 vm_object_t object;
293 register
294 vm_object_offset_t offset;
295 vm_page_t first_m;
296 vm_object_t next_object;
297 vm_object_t copy_object;
298 boolean_t look_for_page;
299 vm_prot_t access_required = fault_type;
300 vm_prot_t wants_copy_flag;
301 vm_size_t cluster_size, length;
302 vm_object_offset_t cluster_offset;
303 vm_object_offset_t cluster_start, cluster_end, paging_offset;
304 vm_object_offset_t align_offset;
305 CLUSTER_STAT(int pages_at_higher_offsets;)
306 CLUSTER_STAT(int pages_at_lower_offsets;)
307 kern_return_t wait_result;
308 boolean_t interruptible_state;
309 boolean_t bumped_pagein = FALSE;
310
311
312 #if MACH_PAGEMAP
313 /*
314 * MACH page map - an optional optimization where a bit map is maintained
315 * by the VM subsystem for internal objects to indicate which pages of
316 * the object currently reside on backing store. This existence map
317 * duplicates information maintained by the vnode pager. It is
318 * created at the time of the first pageout against the object, i.e.
319 * at the same time pager for the object is created. The optimization
320 * is designed to eliminate pager interaction overhead, if it is
321 * 'known' that the page does not exist on backing store.
322 *
323 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
324 * either marked as paged out in the existence map for the object or no
325 * existence map exists for the object. LOOK_FOR() is one of the
326 * criteria in the decision to invoke the pager. It is also used as one
327 * of the criteria to terminate the scan for adjacent pages in a clustered
328 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
329 * permanent objects. Note also that if the pager for an internal object
330 * has not been created, the pager is not invoked regardless of the value
331 * of LOOK_FOR() and that clustered pagein scans are only done on an object
332 * for which a pager has been created.
333 *
334 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
335 * is marked as paged out in the existence map for the object. PAGED_OUT()
336 * PAGED_OUT() is used to determine if a page has already been pushed
337 * into a copy object in order to avoid a redundant page out operation.
338 */
339 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
340 != VM_EXTERNAL_STATE_ABSENT)
341 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
342 == VM_EXTERNAL_STATE_EXISTS)
343 #else /* MACH_PAGEMAP */
344 /*
345 * If the MACH page map optimization is not enabled,
346 * LOOK_FOR() always evaluates to TRUE. The pager will always be
347 * invoked to resolve missing pages in an object, assuming the pager
348 * has been created for the object. In a clustered page operation, the
349 * absence of a page on backing backing store cannot be used to terminate
350 * a scan for adjacent pages since that information is available only in
351 * the pager. Hence pages that may not be paged out are potentially
352 * included in a clustered request. The vnode pager is coded to deal
353 * with any combination of absent/present pages in a clustered
354 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
355 * will always be invoked to push a dirty page into a copy object assuming
356 * a pager has been created. If the page has already been pushed, the
357 * pager will ingore the new request.
358 */
359 #define LOOK_FOR(o, f) TRUE
360 #define PAGED_OUT(o, f) FALSE
361 #endif /* MACH_PAGEMAP */
362
363 /*
364 * Recovery actions
365 */
366 #define PREPARE_RELEASE_PAGE(m) \
367 MACRO_BEGIN \
368 vm_page_lock_queues(); \
369 MACRO_END
370
371 #define DO_RELEASE_PAGE(m) \
372 MACRO_BEGIN \
373 PAGE_WAKEUP_DONE(m); \
374 if (!m->active && !m->inactive) \
375 vm_page_activate(m); \
376 vm_page_unlock_queues(); \
377 MACRO_END
378
379 #define RELEASE_PAGE(m) \
380 MACRO_BEGIN \
381 PREPARE_RELEASE_PAGE(m); \
382 DO_RELEASE_PAGE(m); \
383 MACRO_END
384
385 #if TRACEFAULTPAGE
386 dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
387 #endif
388
389
390
391 #if !VM_FAULT_STATIC_CONFIG
392 if (vm_fault_dirty_handling
393 #if MACH_KDB
394 /*
395 * If there are watchpoints set, then
396 * we don't want to give away write permission
397 * on a read fault. Make the task write fault,
398 * so that the watchpoint code notices the access.
399 */
400 || db_watchpoint_list
401 #endif /* MACH_KDB */
402 ) {
403 /*
404 * If we aren't asking for write permission,
405 * then don't give it away. We're using write
406 * faults to set the dirty bit.
407 */
408 if (!(fault_type & VM_PROT_WRITE))
409 *protection &= ~VM_PROT_WRITE;
410 }
411
412 if (!vm_fault_interruptible)
413 interruptible = THREAD_UNINT;
414 #else /* STATIC_CONFIG */
415 #if MACH_KDB
416 /*
417 * If there are watchpoints set, then
418 * we don't want to give away write permission
419 * on a read fault. Make the task write fault,
420 * so that the watchpoint code notices the access.
421 */
422 if (db_watchpoint_list) {
423 /*
424 * If we aren't asking for write permission,
425 * then don't give it away. We're using write
426 * faults to set the dirty bit.
427 */
428 if (!(fault_type & VM_PROT_WRITE))
429 *protection &= ~VM_PROT_WRITE;
430 }
431
432 #endif /* MACH_KDB */
433 #endif /* STATIC_CONFIG */
434
435 interruptible_state = thread_interrupt_level(interruptible);
436
437 /*
438 * INVARIANTS (through entire routine):
439 *
440 * 1) At all times, we must either have the object
441 * lock or a busy page in some object to prevent
442 * some other thread from trying to bring in
443 * the same page.
444 *
445 * Note that we cannot hold any locks during the
446 * pager access or when waiting for memory, so
447 * we use a busy page then.
448 *
449 * Note also that we aren't as concerned about more than
450 * one thread attempting to memory_object_data_unlock
451 * the same page at once, so we don't hold the page
452 * as busy then, but do record the highest unlock
453 * value so far. [Unlock requests may also be delivered
454 * out of order.]
455 *
456 * 2) To prevent another thread from racing us down the
457 * shadow chain and entering a new page in the top
458 * object before we do, we must keep a busy page in
459 * the top object while following the shadow chain.
460 *
461 * 3) We must increment paging_in_progress on any object
462 * for which we have a busy page
463 *
464 * 4) We leave busy pages on the pageout queues.
465 * If the pageout daemon comes across a busy page,
466 * it will remove the page from the pageout queues.
467 */
468
469 /*
470 * Search for the page at object/offset.
471 */
472
473 object = first_object;
474 offset = first_offset;
475 first_m = VM_PAGE_NULL;
476 access_required = fault_type;
477
478 XPR(XPR_VM_FAULT,
479 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
480 (integer_t)object, offset, fault_type, *protection, 0);
481
482 /*
483 * See whether this page is resident
484 */
485
486 while (TRUE) {
487 #if TRACEFAULTPAGE
488 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
489 #endif
490 if (!object->alive) {
491 vm_fault_cleanup(object, first_m);
492 thread_interrupt_level(interruptible_state);
493 return(VM_FAULT_MEMORY_ERROR);
494 }
495 m = vm_page_lookup(object, offset);
496 #if TRACEFAULTPAGE
497 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
498 #endif
499 if (m != VM_PAGE_NULL) {
500 /*
501 * If the page was pre-paged as part of a
502 * cluster, record the fact.
503 */
504 if (m->clustered) {
505 vm_pagein_cluster_used++;
506 m->clustered = FALSE;
507 }
508
509 /*
510 * If the page is being brought in,
511 * wait for it and then retry.
512 *
513 * A possible optimization: if the page
514 * is known to be resident, we can ignore
515 * pages that are absent (regardless of
516 * whether they're busy).
517 */
518
519 if (m->busy) {
520 #if TRACEFAULTPAGE
521 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
522 #endif
523 wait_result = PAGE_SLEEP(object, m, interruptible);
524 XPR(XPR_VM_FAULT,
525 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
526 (integer_t)object, offset,
527 (integer_t)m, 0, 0);
528 counter(c_vm_fault_page_block_busy_kernel++);
529
530 if (wait_result != THREAD_AWAKENED) {
531 vm_fault_cleanup(object, first_m);
532 thread_interrupt_level(interruptible_state);
533 if (wait_result == THREAD_RESTART)
534 {
535 return(VM_FAULT_RETRY);
536 }
537 else
538 {
539 return(VM_FAULT_INTERRUPTED);
540 }
541 }
542 continue;
543 }
544
545 /*
546 * If the page is in error, give up now.
547 */
548
549 if (m->error) {
550 #if TRACEFAULTPAGE
551 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code); /* (TEST/DEBUG) */
552 #endif
553 if (error_code)
554 *error_code = m->page_error;
555 VM_PAGE_FREE(m);
556 vm_fault_cleanup(object, first_m);
557 thread_interrupt_level(interruptible_state);
558 return(VM_FAULT_MEMORY_ERROR);
559 }
560
561 /*
562 * If the pager wants us to restart
563 * at the top of the chain,
564 * typically because it has moved the
565 * page to another pager, then do so.
566 */
567
568 if (m->restart) {
569 #if TRACEFAULTPAGE
570 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
571 #endif
572 VM_PAGE_FREE(m);
573 vm_fault_cleanup(object, first_m);
574 thread_interrupt_level(interruptible_state);
575 return(VM_FAULT_RETRY);
576 }
577
578 /*
579 * If the page isn't busy, but is absent,
580 * then it was deemed "unavailable".
581 */
582
583 if (m->absent) {
584 /*
585 * Remove the non-existent page (unless it's
586 * in the top object) and move on down to the
587 * next object (if there is one).
588 */
589 #if TRACEFAULTPAGE
590 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow); /* (TEST/DEBUG) */
591 #endif
592
593 next_object = object->shadow;
594 if (next_object == VM_OBJECT_NULL) {
595 vm_page_t real_m;
596
597 assert(!must_be_resident);
598
599 if (object->shadow_severed) {
600 vm_fault_cleanup(
601 object, first_m);
602 thread_interrupt_level(interruptible_state);
603 return VM_FAULT_MEMORY_ERROR;
604 }
605
606 /*
607 * Absent page at bottom of shadow
608 * chain; zero fill the page we left
609 * busy in the first object, and flush
610 * the absent page. But first we
611 * need to allocate a real page.
612 */
613 if (VM_PAGE_THROTTLED() ||
614 (real_m = vm_page_grab()) == VM_PAGE_NULL) {
615 vm_fault_cleanup(object, first_m);
616 thread_interrupt_level(interruptible_state);
617 return(VM_FAULT_MEMORY_SHORTAGE);
618 }
619
620
621 XPR(XPR_VM_FAULT,
622 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
623 (integer_t)object, offset,
624 (integer_t)m,
625 (integer_t)first_object, 0);
626 if (object != first_object) {
627 VM_PAGE_FREE(m);
628 vm_object_paging_end(object);
629 vm_object_unlock(object);
630 object = first_object;
631 offset = first_offset;
632 m = first_m;
633 first_m = VM_PAGE_NULL;
634 vm_object_lock(object);
635 }
636
637 VM_PAGE_FREE(m);
638 assert(real_m->busy);
639 vm_page_insert(real_m, object, offset);
640 m = real_m;
641
642 /*
643 * Drop the lock while zero filling
644 * page. Then break because this
645 * is the page we wanted. Checking
646 * the page lock is a waste of time;
647 * this page was either absent or
648 * newly allocated -- in both cases
649 * it can't be page locked by a pager.
650 */
651 m->no_isync = FALSE;
652
653 if (!no_zero_fill) {
654 vm_object_unlock(object);
655 vm_page_zero_fill(m);
656 if (type_of_fault)
657 *type_of_fault = DBG_ZERO_FILL_FAULT;
658 VM_STAT(zero_fill_count++);
659
660 if (bumped_pagein == TRUE) {
661 VM_STAT(pageins--);
662 current_task()->pageins--;
663 }
664 vm_object_lock(object);
665 }
666 pmap_clear_modify(m->phys_page);
667 vm_page_lock_queues();
668 VM_PAGE_QUEUES_REMOVE(m);
669 m->page_ticket = vm_page_ticket;
670 if(m->object->size > 0x80000) {
671 m->zero_fill = TRUE;
672 /* depends on the queues lock */
673 vm_zf_count += 1;
674 queue_enter(&vm_page_queue_zf,
675 m, vm_page_t, pageq);
676 } else {
677 queue_enter(
678 &vm_page_queue_inactive,
679 m, vm_page_t, pageq);
680 }
681 vm_page_ticket_roll++;
682 if(vm_page_ticket_roll ==
683 VM_PAGE_TICKETS_IN_ROLL) {
684 vm_page_ticket_roll = 0;
685 if(vm_page_ticket ==
686 VM_PAGE_TICKET_ROLL_IDS)
687 vm_page_ticket= 0;
688 else
689 vm_page_ticket++;
690 }
691 m->inactive = TRUE;
692 vm_page_inactive_count++;
693 vm_page_unlock_queues();
694 break;
695 } else {
696 if (must_be_resident) {
697 vm_object_paging_end(object);
698 } else if (object != first_object) {
699 vm_object_paging_end(object);
700 VM_PAGE_FREE(m);
701 } else {
702 first_m = m;
703 m->absent = FALSE;
704 m->unusual = FALSE;
705 vm_object_absent_release(object);
706 m->busy = TRUE;
707
708 vm_page_lock_queues();
709 VM_PAGE_QUEUES_REMOVE(m);
710 vm_page_unlock_queues();
711 }
712 XPR(XPR_VM_FAULT,
713 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
714 (integer_t)object, offset,
715 (integer_t)next_object,
716 offset+object->shadow_offset,0);
717 offset += object->shadow_offset;
718 hi_offset += object->shadow_offset;
719 lo_offset += object->shadow_offset;
720 access_required = VM_PROT_READ;
721 vm_object_lock(next_object);
722 vm_object_unlock(object);
723 object = next_object;
724 vm_object_paging_begin(object);
725 continue;
726 }
727 }
728
729 if ((m->cleaning)
730 && ((object != first_object) ||
731 (object->copy != VM_OBJECT_NULL))
732 && (fault_type & VM_PROT_WRITE)) {
733 /*
734 * This is a copy-on-write fault that will
735 * cause us to revoke access to this page, but
736 * this page is in the process of being cleaned
737 * in a clustered pageout. We must wait until
738 * the cleaning operation completes before
739 * revoking access to the original page,
740 * otherwise we might attempt to remove a
741 * wired mapping.
742 */
743 #if TRACEFAULTPAGE
744 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset); /* (TEST/DEBUG) */
745 #endif
746 XPR(XPR_VM_FAULT,
747 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
748 (integer_t)object, offset,
749 (integer_t)m, 0, 0);
750 /* take an extra ref so that object won't die */
751 assert(object->ref_count > 0);
752 object->ref_count++;
753 vm_object_res_reference(object);
754 vm_fault_cleanup(object, first_m);
755 counter(c_vm_fault_page_block_backoff_kernel++);
756 vm_object_lock(object);
757 assert(object->ref_count > 0);
758 m = vm_page_lookup(object, offset);
759 if (m != VM_PAGE_NULL && m->cleaning) {
760 PAGE_ASSERT_WAIT(m, interruptible);
761 vm_object_unlock(object);
762 wait_result = thread_block(THREAD_CONTINUE_NULL);
763 vm_object_deallocate(object);
764 goto backoff;
765 } else {
766 vm_object_unlock(object);
767 vm_object_deallocate(object);
768 thread_interrupt_level(interruptible_state);
769 return VM_FAULT_RETRY;
770 }
771 }
772
773 /*
774 * If the desired access to this page has
775 * been locked out, request that it be unlocked.
776 */
777
778 if (access_required & m->page_lock) {
779 if ((access_required & m->unlock_request) != access_required) {
780 vm_prot_t new_unlock_request;
781 kern_return_t rc;
782
783 #if TRACEFAULTPAGE
784 dbgTrace(0xBEEF000A, (unsigned int) m, (unsigned int) object->pager_ready); /* (TEST/DEBUG) */
785 #endif
786 if (!object->pager_ready) {
787 XPR(XPR_VM_FAULT,
788 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
789 access_required,
790 (integer_t)object, offset,
791 (integer_t)m, 0);
792 /* take an extra ref */
793 assert(object->ref_count > 0);
794 object->ref_count++;
795 vm_object_res_reference(object);
796 vm_fault_cleanup(object,
797 first_m);
798 counter(c_vm_fault_page_block_backoff_kernel++);
799 vm_object_lock(object);
800 assert(object->ref_count > 0);
801 if (!object->pager_ready) {
802 wait_result = vm_object_assert_wait(
803 object,
804 VM_OBJECT_EVENT_PAGER_READY,
805 interruptible);
806 vm_object_unlock(object);
807 if (wait_result == THREAD_WAITING)
808 wait_result = thread_block(THREAD_CONTINUE_NULL);
809 vm_object_deallocate(object);
810 goto backoff;
811 } else {
812 vm_object_unlock(object);
813 vm_object_deallocate(object);
814 thread_interrupt_level(interruptible_state);
815 return VM_FAULT_RETRY;
816 }
817 }
818
819 new_unlock_request = m->unlock_request =
820 (access_required | m->unlock_request);
821 vm_object_unlock(object);
822 XPR(XPR_VM_FAULT,
823 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
824 (integer_t)object, offset,
825 (integer_t)m, new_unlock_request, 0);
826 if ((rc = memory_object_data_unlock(
827 object->pager,
828 offset + object->paging_offset,
829 PAGE_SIZE,
830 new_unlock_request))
831 != KERN_SUCCESS) {
832 if (vm_fault_debug)
833 printf("vm_fault: memory_object_data_unlock failed\n");
834 vm_object_lock(object);
835 vm_fault_cleanup(object, first_m);
836 thread_interrupt_level(interruptible_state);
837 return((rc == MACH_SEND_INTERRUPTED) ?
838 VM_FAULT_INTERRUPTED :
839 VM_FAULT_MEMORY_ERROR);
840 }
841 vm_object_lock(object);
842 continue;
843 }
844
845 XPR(XPR_VM_FAULT,
846 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
847 access_required, (integer_t)object,
848 offset, (integer_t)m, 0);
849 /* take an extra ref so object won't die */
850 assert(object->ref_count > 0);
851 object->ref_count++;
852 vm_object_res_reference(object);
853 vm_fault_cleanup(object, first_m);
854 counter(c_vm_fault_page_block_backoff_kernel++);
855 vm_object_lock(object);
856 assert(object->ref_count > 0);
857 m = vm_page_lookup(object, offset);
858 if (m != VM_PAGE_NULL &&
859 (access_required & m->page_lock) &&
860 !((access_required & m->unlock_request) != access_required)) {
861 PAGE_ASSERT_WAIT(m, interruptible);
862 vm_object_unlock(object);
863 wait_result = thread_block(THREAD_CONTINUE_NULL);
864 vm_object_deallocate(object);
865 goto backoff;
866 } else {
867 vm_object_unlock(object);
868 vm_object_deallocate(object);
869 thread_interrupt_level(interruptible_state);
870 return VM_FAULT_RETRY;
871 }
872 }
873 /*
874 * We mark the page busy and leave it on
875 * the pageout queues. If the pageout
876 * deamon comes across it, then it will
877 * remove the page.
878 */
879
880 #if TRACEFAULTPAGE
881 dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
882 #endif
883
884 #if !VM_FAULT_STATIC_CONFIG
885 if (!software_reference_bits) {
886 vm_page_lock_queues();
887 if (m->inactive)
888 vm_stat.reactivations++;
889
890 VM_PAGE_QUEUES_REMOVE(m);
891 vm_page_unlock_queues();
892 }
893 #endif
894 XPR(XPR_VM_FAULT,
895 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
896 (integer_t)object, offset, (integer_t)m, 0, 0);
897 assert(!m->busy);
898 m->busy = TRUE;
899 assert(!m->absent);
900 break;
901 }
902
903 look_for_page =
904 (object->pager_created) &&
905 LOOK_FOR(object, offset) &&
906 (!data_supply);
907
908 #if TRACEFAULTPAGE
909 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object); /* (TEST/DEBUG) */
910 #endif
911 if ((look_for_page || (object == first_object))
912 && !must_be_resident
913 && !(object->phys_contiguous)) {
914 /*
915 * Allocate a new page for this object/offset
916 * pair.
917 */
918
919 m = vm_page_grab_fictitious();
920 #if TRACEFAULTPAGE
921 dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
922 #endif
923 if (m == VM_PAGE_NULL) {
924 vm_fault_cleanup(object, first_m);
925 thread_interrupt_level(interruptible_state);
926 return(VM_FAULT_FICTITIOUS_SHORTAGE);
927 }
928 vm_page_insert(m, object, offset);
929 }
930
931 if ((look_for_page && !must_be_resident)) {
932 kern_return_t rc;
933
934 /*
935 * If the memory manager is not ready, we
936 * cannot make requests.
937 */
938 if (!object->pager_ready) {
939 #if TRACEFAULTPAGE
940 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
941 #endif
942 if(m != VM_PAGE_NULL)
943 VM_PAGE_FREE(m);
944 XPR(XPR_VM_FAULT,
945 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
946 (integer_t)object, offset, 0, 0, 0);
947 /* take an extra ref so object won't die */
948 assert(object->ref_count > 0);
949 object->ref_count++;
950 vm_object_res_reference(object);
951 vm_fault_cleanup(object, first_m);
952 counter(c_vm_fault_page_block_backoff_kernel++);
953 vm_object_lock(object);
954 assert(object->ref_count > 0);
955 if (!object->pager_ready) {
956 wait_result = vm_object_assert_wait(object,
957 VM_OBJECT_EVENT_PAGER_READY,
958 interruptible);
959 vm_object_unlock(object);
960 if (wait_result == THREAD_WAITING)
961 wait_result = thread_block(THREAD_CONTINUE_NULL);
962 vm_object_deallocate(object);
963 goto backoff;
964 } else {
965 vm_object_unlock(object);
966 vm_object_deallocate(object);
967 thread_interrupt_level(interruptible_state);
968 return VM_FAULT_RETRY;
969 }
970 }
971
972 if(object->phys_contiguous) {
973 if(m != VM_PAGE_NULL) {
974 VM_PAGE_FREE(m);
975 m = VM_PAGE_NULL;
976 }
977 goto no_clustering;
978 }
979 if (object->internal) {
980 /*
981 * Requests to the default pager
982 * must reserve a real page in advance,
983 * because the pager's data-provided
984 * won't block for pages. IMPORTANT:
985 * this acts as a throttling mechanism
986 * for data_requests to the default
987 * pager.
988 */
989
990 #if TRACEFAULTPAGE
991 dbgTrace(0xBEEF000F, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
992 #endif
993 if (m->fictitious && !vm_page_convert(m)) {
994 VM_PAGE_FREE(m);
995 vm_fault_cleanup(object, first_m);
996 thread_interrupt_level(interruptible_state);
997 return(VM_FAULT_MEMORY_SHORTAGE);
998 }
999 } else if (object->absent_count >
1000 vm_object_absent_max) {
1001 /*
1002 * If there are too many outstanding page
1003 * requests pending on this object, we
1004 * wait for them to be resolved now.
1005 */
1006
1007 #if TRACEFAULTPAGE
1008 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1009 #endif
1010 if(m != VM_PAGE_NULL)
1011 VM_PAGE_FREE(m);
1012 /* take an extra ref so object won't die */
1013 assert(object->ref_count > 0);
1014 object->ref_count++;
1015 vm_object_res_reference(object);
1016 vm_fault_cleanup(object, first_m);
1017 counter(c_vm_fault_page_block_backoff_kernel++);
1018 vm_object_lock(object);
1019 assert(object->ref_count > 0);
1020 if (object->absent_count > vm_object_absent_max) {
1021 vm_object_absent_assert_wait(object,
1022 interruptible);
1023 vm_object_unlock(object);
1024 wait_result = thread_block(THREAD_CONTINUE_NULL);
1025 vm_object_deallocate(object);
1026 goto backoff;
1027 } else {
1028 vm_object_unlock(object);
1029 vm_object_deallocate(object);
1030 thread_interrupt_level(interruptible_state);
1031 return VM_FAULT_RETRY;
1032 }
1033 }
1034
1035 /*
1036 * Indicate that the page is waiting for data
1037 * from the memory manager.
1038 */
1039
1040 if(m != VM_PAGE_NULL) {
1041
1042 m->list_req_pending = TRUE;
1043 m->absent = TRUE;
1044 m->unusual = TRUE;
1045 object->absent_count++;
1046
1047 }
1048
1049 no_clustering:
1050 cluster_start = offset;
1051 length = PAGE_SIZE;
1052
1053 /*
1054 * lengthen the cluster by the pages in the working set
1055 */
1056 if((map != NULL) &&
1057 (current_task()->dynamic_working_set != 0)) {
1058 cluster_end = cluster_start + length;
1059 /* tws values for start and end are just a
1060 * suggestions. Therefore, as long as
1061 * build_cluster does not use pointers or
1062 * take action based on values that
1063 * could be affected by re-entrance we
1064 * do not need to take the map lock.
1065 */
1066 cluster_end = offset + PAGE_SIZE_64;
1067 tws_build_cluster((tws_hash_t)
1068 current_task()->dynamic_working_set,
1069 object, &cluster_start,
1070 &cluster_end, 0x40000);
1071 length = cluster_end - cluster_start;
1072 }
1073 #if TRACEFAULTPAGE
1074 dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0); /* (TEST/DEBUG) */
1075 #endif
1076 /*
1077 * We have a busy page, so we can
1078 * release the object lock.
1079 */
1080 vm_object_unlock(object);
1081
1082 /*
1083 * Call the memory manager to retrieve the data.
1084 */
1085
1086 if (type_of_fault)
1087 *type_of_fault = (length << 8) | DBG_PAGEIN_FAULT;
1088 VM_STAT(pageins++);
1089 current_task()->pageins++;
1090 bumped_pagein = TRUE;
1091
1092 /*
1093 * If this object uses a copy_call strategy,
1094 * and we are interested in a copy of this object
1095 * (having gotten here only by following a
1096 * shadow chain), then tell the memory manager
1097 * via a flag added to the desired_access
1098 * parameter, so that it can detect a race
1099 * between our walking down the shadow chain
1100 * and its pushing pages up into a copy of
1101 * the object that it manages.
1102 */
1103
1104 if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL &&
1105 object != first_object) {
1106 wants_copy_flag = VM_PROT_WANTS_COPY;
1107 } else {
1108 wants_copy_flag = VM_PROT_NONE;
1109 }
1110
1111 XPR(XPR_VM_FAULT,
1112 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1113 (integer_t)object, offset, (integer_t)m,
1114 access_required | wants_copy_flag, 0);
1115
1116 rc = memory_object_data_request(object->pager,
1117 cluster_start + object->paging_offset,
1118 length,
1119 access_required | wants_copy_flag);
1120
1121
1122 #if TRACEFAULTPAGE
1123 dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
1124 #endif
1125 if (rc != KERN_SUCCESS) {
1126 if (rc != MACH_SEND_INTERRUPTED
1127 && vm_fault_debug)
1128 printf("%s(0x%x, 0x%x, 0x%x, 0x%x) failed, rc=%d\n",
1129 "memory_object_data_request",
1130 object->pager,
1131 cluster_start + object->paging_offset,
1132 length, access_required, rc);
1133 /*
1134 * Don't want to leave a busy page around,
1135 * but the data request may have blocked,
1136 * so check if it's still there and busy.
1137 */
1138 if(!object->phys_contiguous) {
1139 vm_object_lock(object);
1140 for (; length; length -= PAGE_SIZE,
1141 cluster_start += PAGE_SIZE_64) {
1142 vm_page_t p;
1143 if ((p = vm_page_lookup(object,
1144 cluster_start))
1145 && p->absent && p->busy
1146 && p != first_m) {
1147 VM_PAGE_FREE(p);
1148 }
1149 }
1150 }
1151 vm_fault_cleanup(object, first_m);
1152 thread_interrupt_level(interruptible_state);
1153 return((rc == MACH_SEND_INTERRUPTED) ?
1154 VM_FAULT_INTERRUPTED :
1155 VM_FAULT_MEMORY_ERROR);
1156 } else {
1157 #ifdef notdefcdy
1158 tws_hash_line_t line;
1159 task_t task;
1160
1161 task = current_task();
1162
1163 if((map != NULL) &&
1164 (task->dynamic_working_set != 0))
1165 && !(object->private)) {
1166 vm_object_t base_object;
1167 vm_object_offset_t base_offset;
1168 base_object = object;
1169 base_offset = offset;
1170 while(base_object->shadow) {
1171 base_offset +=
1172 base_object->shadow_offset;
1173 base_object =
1174 base_object->shadow;
1175 }
1176 if(tws_lookup
1177 ((tws_hash_t)
1178 task->dynamic_working_set,
1179 base_offset, base_object,
1180 &line) == KERN_SUCCESS) {
1181 tws_line_signal((tws_hash_t)
1182 task->dynamic_working_set,
1183 map, line, vaddr);
1184 }
1185 }
1186 #endif
1187 }
1188
1189 /*
1190 * Retry with same object/offset, since new data may
1191 * be in a different page (i.e., m is meaningless at
1192 * this point).
1193 */
1194 vm_object_lock(object);
1195 if ((interruptible != THREAD_UNINT) &&
1196 (current_thread()->state & TH_ABORT)) {
1197 vm_fault_cleanup(object, first_m);
1198 thread_interrupt_level(interruptible_state);
1199 return(VM_FAULT_INTERRUPTED);
1200 }
1201 if(m == VM_PAGE_NULL)
1202 break;
1203 continue;
1204 }
1205
1206 /*
1207 * The only case in which we get here is if
1208 * object has no pager (or unwiring). If the pager doesn't
1209 * have the page this is handled in the m->absent case above
1210 * (and if you change things here you should look above).
1211 */
1212 #if TRACEFAULTPAGE
1213 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1214 #endif
1215 if (object == first_object)
1216 first_m = m;
1217 else
1218 assert(m == VM_PAGE_NULL);
1219
1220 XPR(XPR_VM_FAULT,
1221 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1222 (integer_t)object, offset, (integer_t)m,
1223 (integer_t)object->shadow, 0);
1224 /*
1225 * Move on to the next object. Lock the next
1226 * object before unlocking the current one.
1227 */
1228 next_object = object->shadow;
1229 if (next_object == VM_OBJECT_NULL) {
1230 assert(!must_be_resident);
1231 /*
1232 * If there's no object left, fill the page
1233 * in the top object with zeros. But first we
1234 * need to allocate a real page.
1235 */
1236
1237 if (object != first_object) {
1238 vm_object_paging_end(object);
1239 vm_object_unlock(object);
1240
1241 object = first_object;
1242 offset = first_offset;
1243 vm_object_lock(object);
1244 }
1245
1246 m = first_m;
1247 assert(m->object == object);
1248 first_m = VM_PAGE_NULL;
1249
1250 if (object->shadow_severed) {
1251 VM_PAGE_FREE(m);
1252 vm_fault_cleanup(object, VM_PAGE_NULL);
1253 thread_interrupt_level(interruptible_state);
1254 return VM_FAULT_MEMORY_ERROR;
1255 }
1256
1257 if (VM_PAGE_THROTTLED() ||
1258 (m->fictitious && !vm_page_convert(m))) {
1259 VM_PAGE_FREE(m);
1260 vm_fault_cleanup(object, VM_PAGE_NULL);
1261 thread_interrupt_level(interruptible_state);
1262 return(VM_FAULT_MEMORY_SHORTAGE);
1263 }
1264 m->no_isync = FALSE;
1265
1266 if (!no_zero_fill) {
1267 vm_object_unlock(object);
1268 vm_page_zero_fill(m);
1269 if (type_of_fault)
1270 *type_of_fault = DBG_ZERO_FILL_FAULT;
1271 VM_STAT(zero_fill_count++);
1272
1273 if (bumped_pagein == TRUE) {
1274 VM_STAT(pageins--);
1275 current_task()->pageins--;
1276 }
1277 vm_object_lock(object);
1278 }
1279 vm_page_lock_queues();
1280 VM_PAGE_QUEUES_REMOVE(m);
1281 if(m->object->size > 0x80000) {
1282 m->zero_fill = TRUE;
1283 /* depends on the queues lock */
1284 vm_zf_count += 1;
1285 queue_enter(&vm_page_queue_zf,
1286 m, vm_page_t, pageq);
1287 } else {
1288 queue_enter(
1289 &vm_page_queue_inactive,
1290 m, vm_page_t, pageq);
1291 }
1292 m->page_ticket = vm_page_ticket;
1293 vm_page_ticket_roll++;
1294 if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
1295 vm_page_ticket_roll = 0;
1296 if(vm_page_ticket ==
1297 VM_PAGE_TICKET_ROLL_IDS)
1298 vm_page_ticket= 0;
1299 else
1300 vm_page_ticket++;
1301 }
1302 m->inactive = TRUE;
1303 vm_page_inactive_count++;
1304 vm_page_unlock_queues();
1305 pmap_clear_modify(m->phys_page);
1306 break;
1307 }
1308 else {
1309 if ((object != first_object) || must_be_resident)
1310 vm_object_paging_end(object);
1311 offset += object->shadow_offset;
1312 hi_offset += object->shadow_offset;
1313 lo_offset += object->shadow_offset;
1314 access_required = VM_PROT_READ;
1315 vm_object_lock(next_object);
1316 vm_object_unlock(object);
1317 object = next_object;
1318 vm_object_paging_begin(object);
1319 }
1320 }
1321
1322 /*
1323 * PAGE HAS BEEN FOUND.
1324 *
1325 * This page (m) is:
1326 * busy, so that we can play with it;
1327 * not absent, so that nobody else will fill it;
1328 * possibly eligible for pageout;
1329 *
1330 * The top-level page (first_m) is:
1331 * VM_PAGE_NULL if the page was found in the
1332 * top-level object;
1333 * busy, not absent, and ineligible for pageout.
1334 *
1335 * The current object (object) is locked. A paging
1336 * reference is held for the current and top-level
1337 * objects.
1338 */
1339
1340 #if TRACEFAULTPAGE
1341 dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1342 #endif
1343 #if EXTRA_ASSERTIONS
1344 if(m != VM_PAGE_NULL) {
1345 assert(m->busy && !m->absent);
1346 assert((first_m == VM_PAGE_NULL) ||
1347 (first_m->busy && !first_m->absent &&
1348 !first_m->active && !first_m->inactive));
1349 }
1350 #endif /* EXTRA_ASSERTIONS */
1351
1352 XPR(XPR_VM_FAULT,
1353 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1354 (integer_t)object, offset, (integer_t)m,
1355 (integer_t)first_object, (integer_t)first_m);
1356 /*
1357 * If the page is being written, but isn't
1358 * already owned by the top-level object,
1359 * we have to copy it into a new page owned
1360 * by the top-level object.
1361 */
1362
1363 if ((object != first_object) && (m != VM_PAGE_NULL)) {
1364 /*
1365 * We only really need to copy if we
1366 * want to write it.
1367 */
1368
1369 #if TRACEFAULTPAGE
1370 dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1371 #endif
1372 if (fault_type & VM_PROT_WRITE) {
1373 vm_page_t copy_m;
1374
1375 assert(!must_be_resident);
1376
1377 /*
1378 * If we try to collapse first_object at this
1379 * point, we may deadlock when we try to get
1380 * the lock on an intermediate object (since we
1381 * have the bottom object locked). We can't
1382 * unlock the bottom object, because the page
1383 * we found may move (by collapse) if we do.
1384 *
1385 * Instead, we first copy the page. Then, when
1386 * we have no more use for the bottom object,
1387 * we unlock it and try to collapse.
1388 *
1389 * Note that we copy the page even if we didn't
1390 * need to... that's the breaks.
1391 */
1392
1393 /*
1394 * Allocate a page for the copy
1395 */
1396 copy_m = vm_page_grab();
1397 if (copy_m == VM_PAGE_NULL) {
1398 RELEASE_PAGE(m);
1399 vm_fault_cleanup(object, first_m);
1400 thread_interrupt_level(interruptible_state);
1401 return(VM_FAULT_MEMORY_SHORTAGE);
1402 }
1403
1404
1405 XPR(XPR_VM_FAULT,
1406 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1407 (integer_t)object, offset,
1408 (integer_t)m, (integer_t)copy_m, 0);
1409 vm_page_copy(m, copy_m);
1410
1411 /*
1412 * If another map is truly sharing this
1413 * page with us, we have to flush all
1414 * uses of the original page, since we
1415 * can't distinguish those which want the
1416 * original from those which need the
1417 * new copy.
1418 *
1419 * XXXO If we know that only one map has
1420 * access to this page, then we could
1421 * avoid the pmap_page_protect() call.
1422 */
1423
1424 vm_page_lock_queues();
1425 assert(!m->cleaning);
1426 pmap_page_protect(m->phys_page, VM_PROT_NONE);
1427 vm_page_deactivate(m);
1428 copy_m->dirty = TRUE;
1429 /*
1430 * Setting reference here prevents this fault from
1431 * being counted as a (per-thread) reactivate as well
1432 * as a copy-on-write.
1433 */
1434 first_m->reference = TRUE;
1435 vm_page_unlock_queues();
1436
1437 /*
1438 * We no longer need the old page or object.
1439 */
1440
1441 PAGE_WAKEUP_DONE(m);
1442 vm_object_paging_end(object);
1443 vm_object_unlock(object);
1444
1445 if (type_of_fault)
1446 *type_of_fault = DBG_COW_FAULT;
1447 VM_STAT(cow_faults++);
1448 current_task()->cow_faults++;
1449 object = first_object;
1450 offset = first_offset;
1451
1452 vm_object_lock(object);
1453 VM_PAGE_FREE(first_m);
1454 first_m = VM_PAGE_NULL;
1455 assert(copy_m->busy);
1456 vm_page_insert(copy_m, object, offset);
1457 m = copy_m;
1458
1459 /*
1460 * Now that we've gotten the copy out of the
1461 * way, let's try to collapse the top object.
1462 * But we have to play ugly games with
1463 * paging_in_progress to do that...
1464 */
1465
1466 vm_object_paging_end(object);
1467 vm_object_collapse(object);
1468 vm_object_paging_begin(object);
1469
1470 }
1471 else {
1472 *protection &= (~VM_PROT_WRITE);
1473 }
1474 }
1475
1476 /*
1477 * Now check whether the page needs to be pushed into the
1478 * copy object. The use of asymmetric copy on write for
1479 * shared temporary objects means that we may do two copies to
1480 * satisfy the fault; one above to get the page from a
1481 * shadowed object, and one here to push it into the copy.
1482 */
1483
1484 while ((copy_object = first_object->copy) != VM_OBJECT_NULL &&
1485 (m!= VM_PAGE_NULL)) {
1486 vm_object_offset_t copy_offset;
1487 vm_page_t copy_m;
1488
1489 #if TRACEFAULTPAGE
1490 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1491 #endif
1492 /*
1493 * If the page is being written, but hasn't been
1494 * copied to the copy-object, we have to copy it there.
1495 */
1496
1497 if ((fault_type & VM_PROT_WRITE) == 0) {
1498 *protection &= ~VM_PROT_WRITE;
1499 break;
1500 }
1501
1502 /*
1503 * If the page was guaranteed to be resident,
1504 * we must have already performed the copy.
1505 */
1506
1507 if (must_be_resident)
1508 break;
1509
1510 /*
1511 * Try to get the lock on the copy_object.
1512 */
1513 if (!vm_object_lock_try(copy_object)) {
1514 vm_object_unlock(object);
1515
1516 mutex_pause(); /* wait a bit */
1517
1518 vm_object_lock(object);
1519 continue;
1520 }
1521
1522 /*
1523 * Make another reference to the copy-object,
1524 * to keep it from disappearing during the
1525 * copy.
1526 */
1527 assert(copy_object->ref_count > 0);
1528 copy_object->ref_count++;
1529 VM_OBJ_RES_INCR(copy_object);
1530
1531 /*
1532 * Does the page exist in the copy?
1533 */
1534 copy_offset = first_offset - copy_object->shadow_offset;
1535 if (copy_object->size <= copy_offset)
1536 /*
1537 * Copy object doesn't cover this page -- do nothing.
1538 */
1539 ;
1540 else if ((copy_m =
1541 vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
1542 /* Page currently exists in the copy object */
1543 if (copy_m->busy) {
1544 /*
1545 * If the page is being brought
1546 * in, wait for it and then retry.
1547 */
1548 RELEASE_PAGE(m);
1549 /* take an extra ref so object won't die */
1550 assert(copy_object->ref_count > 0);
1551 copy_object->ref_count++;
1552 vm_object_res_reference(copy_object);
1553 vm_object_unlock(copy_object);
1554 vm_fault_cleanup(object, first_m);
1555 counter(c_vm_fault_page_block_backoff_kernel++);
1556 vm_object_lock(copy_object);
1557 assert(copy_object->ref_count > 0);
1558 VM_OBJ_RES_DECR(copy_object);
1559 copy_object->ref_count--;
1560 assert(copy_object->ref_count > 0);
1561 copy_m = vm_page_lookup(copy_object, copy_offset);
1562 if (copy_m != VM_PAGE_NULL && copy_m->busy) {
1563 PAGE_ASSERT_WAIT(copy_m, interruptible);
1564 vm_object_unlock(copy_object);
1565 wait_result = thread_block(THREAD_CONTINUE_NULL);
1566 vm_object_deallocate(copy_object);
1567 goto backoff;
1568 } else {
1569 vm_object_unlock(copy_object);
1570 vm_object_deallocate(copy_object);
1571 thread_interrupt_level(interruptible_state);
1572 return VM_FAULT_RETRY;
1573 }
1574 }
1575 }
1576 else if (!PAGED_OUT(copy_object, copy_offset)) {
1577 /*
1578 * If PAGED_OUT is TRUE, then the page used to exist
1579 * in the copy-object, and has already been paged out.
1580 * We don't need to repeat this. If PAGED_OUT is
1581 * FALSE, then either we don't know (!pager_created,
1582 * for example) or it hasn't been paged out.
1583 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1584 * We must copy the page to the copy object.
1585 */
1586
1587 /*
1588 * Allocate a page for the copy
1589 */
1590 copy_m = vm_page_alloc(copy_object, copy_offset);
1591 if (copy_m == VM_PAGE_NULL) {
1592 RELEASE_PAGE(m);
1593 VM_OBJ_RES_DECR(copy_object);
1594 copy_object->ref_count--;
1595 assert(copy_object->ref_count > 0);
1596 vm_object_unlock(copy_object);
1597 vm_fault_cleanup(object, first_m);
1598 thread_interrupt_level(interruptible_state);
1599 return(VM_FAULT_MEMORY_SHORTAGE);
1600 }
1601
1602 /*
1603 * Must copy page into copy-object.
1604 */
1605
1606 vm_page_copy(m, copy_m);
1607
1608 /*
1609 * If the old page was in use by any users
1610 * of the copy-object, it must be removed
1611 * from all pmaps. (We can't know which
1612 * pmaps use it.)
1613 */
1614
1615 vm_page_lock_queues();
1616 assert(!m->cleaning);
1617 pmap_page_protect(m->phys_page, VM_PROT_NONE);
1618 copy_m->dirty = TRUE;
1619 vm_page_unlock_queues();
1620
1621 /*
1622 * If there's a pager, then immediately
1623 * page out this page, using the "initialize"
1624 * option. Else, we use the copy.
1625 */
1626
1627 if
1628 #if MACH_PAGEMAP
1629 ((!copy_object->pager_created) ||
1630 vm_external_state_get(
1631 copy_object->existence_map, copy_offset)
1632 == VM_EXTERNAL_STATE_ABSENT)
1633 #else
1634 (!copy_object->pager_created)
1635 #endif
1636 {
1637 vm_page_lock_queues();
1638 vm_page_activate(copy_m);
1639 vm_page_unlock_queues();
1640 PAGE_WAKEUP_DONE(copy_m);
1641 }
1642 else {
1643 assert(copy_m->busy == TRUE);
1644
1645 /*
1646 * The page is already ready for pageout:
1647 * not on pageout queues and busy.
1648 * Unlock everything except the
1649 * copy_object itself.
1650 */
1651
1652 vm_object_unlock(object);
1653
1654 /*
1655 * Write the page to the copy-object,
1656 * flushing it from the kernel.
1657 */
1658
1659 vm_pageout_initialize_page(copy_m);
1660
1661 /*
1662 * Since the pageout may have
1663 * temporarily dropped the
1664 * copy_object's lock, we
1665 * check whether we'll have
1666 * to deallocate the hard way.
1667 */
1668
1669 if ((copy_object->shadow != object) ||
1670 (copy_object->ref_count == 1)) {
1671 vm_object_unlock(copy_object);
1672 vm_object_deallocate(copy_object);
1673 vm_object_lock(object);
1674 continue;
1675 }
1676
1677 /*
1678 * Pick back up the old object's
1679 * lock. [It is safe to do so,
1680 * since it must be deeper in the
1681 * object tree.]
1682 */
1683
1684 vm_object_lock(object);
1685 }
1686
1687 /*
1688 * Because we're pushing a page upward
1689 * in the object tree, we must restart
1690 * any faults that are waiting here.
1691 * [Note that this is an expansion of
1692 * PAGE_WAKEUP that uses the THREAD_RESTART
1693 * wait result]. Can't turn off the page's
1694 * busy bit because we're not done with it.
1695 */
1696
1697 if (m->wanted) {
1698 m->wanted = FALSE;
1699 thread_wakeup_with_result((event_t) m,
1700 THREAD_RESTART);
1701 }
1702 }
1703
1704 /*
1705 * The reference count on copy_object must be
1706 * at least 2: one for our extra reference,
1707 * and at least one from the outside world
1708 * (we checked that when we last locked
1709 * copy_object).
1710 */
1711 copy_object->ref_count--;
1712 assert(copy_object->ref_count > 0);
1713 VM_OBJ_RES_DECR(copy_object);
1714 vm_object_unlock(copy_object);
1715
1716 break;
1717 }
1718
1719 *result_page = m;
1720 *top_page = first_m;
1721
1722 XPR(XPR_VM_FAULT,
1723 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1724 (integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
1725 /*
1726 * If the page can be written, assume that it will be.
1727 * [Earlier, we restrict the permission to allow write
1728 * access only if the fault so required, so we don't
1729 * mark read-only data as dirty.]
1730 */
1731
1732 #if !VM_FAULT_STATIC_CONFIG
1733 if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE) &&
1734 (m != VM_PAGE_NULL)) {
1735 m->dirty = TRUE;
1736 }
1737 #endif
1738 #if TRACEFAULTPAGE
1739 dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_page_deactivate_behind); /* (TEST/DEBUG) */
1740 #endif
1741 if (vm_page_deactivate_behind) {
1742 if (offset && /* don't underflow */
1743 (object->last_alloc == (offset - PAGE_SIZE_64))) {
1744 m = vm_page_lookup(object, object->last_alloc);
1745 if ((m != VM_PAGE_NULL) && !m->busy) {
1746 vm_page_lock_queues();
1747 vm_page_deactivate(m);
1748 vm_page_unlock_queues();
1749 }
1750 #if TRACEFAULTPAGE
1751 dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1752 #endif
1753 }
1754 object->last_alloc = offset;
1755 }
1756 #if TRACEFAULTPAGE
1757 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0); /* (TEST/DEBUG) */
1758 #endif
1759 thread_interrupt_level(interruptible_state);
1760 if(*result_page == VM_PAGE_NULL) {
1761 vm_object_unlock(object);
1762 }
1763 return(VM_FAULT_SUCCESS);
1764
1765 #if 0
1766 block_and_backoff:
1767 vm_fault_cleanup(object, first_m);
1768
1769 counter(c_vm_fault_page_block_backoff_kernel++);
1770 thread_block(THREAD_CONTINUE_NULL);
1771 #endif
1772
1773 backoff:
1774 thread_interrupt_level(interruptible_state);
1775 if (wait_result == THREAD_INTERRUPTED)
1776 return VM_FAULT_INTERRUPTED;
1777 return VM_FAULT_RETRY;
1778
1779 #undef RELEASE_PAGE
1780 }
1781
1782 /*
1783 * Routine: vm_fault
1784 * Purpose:
1785 * Handle page faults, including pseudo-faults
1786 * used to change the wiring status of pages.
1787 * Returns:
1788 * Explicit continuations have been removed.
1789 * Implementation:
1790 * vm_fault and vm_fault_page save mucho state
1791 * in the moral equivalent of a closure. The state
1792 * structure is allocated when first entering vm_fault
1793 * and deallocated when leaving vm_fault.
1794 */
1795
1796 kern_return_t
1797 vm_fault(
1798 vm_map_t map,
1799 vm_offset_t vaddr,
1800 vm_prot_t fault_type,
1801 boolean_t change_wiring,
1802 int interruptible,
1803 pmap_t caller_pmap,
1804 vm_offset_t caller_pmap_addr)
1805 {
1806 vm_map_version_t version; /* Map version for verificiation */
1807 boolean_t wired; /* Should mapping be wired down? */
1808 vm_object_t object; /* Top-level object */
1809 vm_object_offset_t offset; /* Top-level offset */
1810 vm_prot_t prot; /* Protection for mapping */
1811 vm_behavior_t behavior; /* Expected paging behavior */
1812 vm_object_offset_t lo_offset, hi_offset;
1813 vm_object_t old_copy_object; /* Saved copy object */
1814 vm_page_t result_page; /* Result of vm_fault_page */
1815 vm_page_t top_page; /* Placeholder page */
1816 kern_return_t kr;
1817
1818 register
1819 vm_page_t m; /* Fast access to result_page */
1820 kern_return_t error_code; /* page error reasons */
1821 register
1822 vm_object_t cur_object;
1823 register
1824 vm_object_offset_t cur_offset;
1825 vm_page_t cur_m;
1826 vm_object_t new_object;
1827 int type_of_fault;
1828 vm_map_t pmap_map = map;
1829 vm_map_t original_map = map;
1830 pmap_t pmap = NULL;
1831 boolean_t funnel_set = FALSE;
1832 funnel_t *curflock;
1833 thread_t cur_thread;
1834 boolean_t interruptible_state;
1835 unsigned int cache_attr;
1836 int write_startup_file = 0;
1837 vm_prot_t full_fault_type;
1838
1839
1840 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_START,
1841 vaddr,
1842 0,
1843 0,
1844 0,
1845 0);
1846
1847 /* at present we do not fully check for execute permission */
1848 /* we generally treat it is read except in certain device */
1849 /* memory settings */
1850 full_fault_type = fault_type;
1851 if(fault_type & VM_PROT_EXECUTE) {
1852 fault_type &= ~VM_PROT_EXECUTE;
1853 fault_type |= VM_PROT_READ;
1854 }
1855
1856 interruptible_state = thread_interrupt_level(interruptible);
1857
1858 /*
1859 * assume we will hit a page in the cache
1860 * otherwise, explicitly override with
1861 * the real fault type once we determine it
1862 */
1863 type_of_fault = DBG_CACHE_HIT_FAULT;
1864
1865 VM_STAT(faults++);
1866 current_task()->faults++;
1867
1868 /*
1869 * drop funnel if it is already held. Then restore while returning
1870 */
1871 cur_thread = current_thread();
1872
1873 if ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED) {
1874 funnel_set = TRUE;
1875 curflock = cur_thread->funnel_lock;
1876 thread_funnel_set( curflock , FALSE);
1877 }
1878
1879 RetryFault: ;
1880
1881 /*
1882 * Find the backing store object and offset into
1883 * it to begin the search.
1884 */
1885 map = original_map;
1886 vm_map_lock_read(map);
1887 kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version,
1888 &object, &offset,
1889 &prot, &wired,
1890 &behavior, &lo_offset, &hi_offset, &pmap_map);
1891
1892 pmap = pmap_map->pmap;
1893
1894 if (kr != KERN_SUCCESS) {
1895 vm_map_unlock_read(map);
1896 goto done;
1897 }
1898
1899 /*
1900 * If the page is wired, we must fault for the current protection
1901 * value, to avoid further faults.
1902 */
1903
1904 if (wired)
1905 fault_type = prot | VM_PROT_WRITE;
1906
1907 #if VM_FAULT_CLASSIFY
1908 /*
1909 * Temporary data gathering code
1910 */
1911 vm_fault_classify(object, offset, fault_type);
1912 #endif
1913 /*
1914 * Fast fault code. The basic idea is to do as much as
1915 * possible while holding the map lock and object locks.
1916 * Busy pages are not used until the object lock has to
1917 * be dropped to do something (copy, zero fill, pmap enter).
1918 * Similarly, paging references aren't acquired until that
1919 * point, and object references aren't used.
1920 *
1921 * If we can figure out what to do
1922 * (zero fill, copy on write, pmap enter) while holding
1923 * the locks, then it gets done. Otherwise, we give up,
1924 * and use the original fault path (which doesn't hold
1925 * the map lock, and relies on busy pages).
1926 * The give up cases include:
1927 * - Have to talk to pager.
1928 * - Page is busy, absent or in error.
1929 * - Pager has locked out desired access.
1930 * - Fault needs to be restarted.
1931 * - Have to push page into copy object.
1932 *
1933 * The code is an infinite loop that moves one level down
1934 * the shadow chain each time. cur_object and cur_offset
1935 * refer to the current object being examined. object and offset
1936 * are the original object from the map. The loop is at the
1937 * top level if and only if object and cur_object are the same.
1938 *
1939 * Invariants: Map lock is held throughout. Lock is held on
1940 * original object and cur_object (if different) when
1941 * continuing or exiting loop.
1942 *
1943 */
1944
1945
1946 /*
1947 * If this page is to be inserted in a copy delay object
1948 * for writing, and if the object has a copy, then the
1949 * copy delay strategy is implemented in the slow fault page.
1950 */
1951 if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY ||
1952 object->copy == VM_OBJECT_NULL ||
1953 (fault_type & VM_PROT_WRITE) == 0) {
1954 cur_object = object;
1955 cur_offset = offset;
1956
1957 while (TRUE) {
1958 m = vm_page_lookup(cur_object, cur_offset);
1959 if (m != VM_PAGE_NULL) {
1960 if (m->busy) {
1961 wait_result_t result;
1962
1963 if (object != cur_object)
1964 vm_object_unlock(object);
1965
1966 vm_map_unlock_read(map);
1967 if (pmap_map != map)
1968 vm_map_unlock(pmap_map);
1969
1970 #if !VM_FAULT_STATIC_CONFIG
1971 if (!vm_fault_interruptible)
1972 interruptible = THREAD_UNINT;
1973 #endif
1974 result = PAGE_ASSERT_WAIT(m, interruptible);
1975
1976 vm_object_unlock(cur_object);
1977
1978 if (result == THREAD_WAITING) {
1979 result = thread_block(THREAD_CONTINUE_NULL);
1980
1981 counter(c_vm_fault_page_block_busy_kernel++);
1982 }
1983 if (result == THREAD_AWAKENED || result == THREAD_RESTART)
1984 goto RetryFault;
1985
1986 kr = KERN_ABORTED;
1987 goto done;
1988 }
1989 if (m->unusual && (m->error || m->restart || m->private
1990 || m->absent || (fault_type & m->page_lock))) {
1991
1992 /*
1993 * Unusual case. Give up.
1994 */
1995 break;
1996 }
1997
1998 /*
1999 * Two cases of map in faults:
2000 * - At top level w/o copy object.
2001 * - Read fault anywhere.
2002 * --> must disallow write.
2003 */
2004
2005 if (object == cur_object &&
2006 object->copy == VM_OBJECT_NULL)
2007 goto FastMapInFault;
2008
2009 if ((fault_type & VM_PROT_WRITE) == 0) {
2010
2011 prot &= ~VM_PROT_WRITE;
2012
2013 /*
2014 * Set up to map the page ...
2015 * mark the page busy, drop
2016 * locks and take a paging reference
2017 * on the object with the page.
2018 */
2019
2020 if (object != cur_object) {
2021 vm_object_unlock(object);
2022 object = cur_object;
2023 }
2024 FastMapInFault:
2025 m->busy = TRUE;
2026
2027 vm_object_paging_begin(object);
2028
2029 FastPmapEnter:
2030 /*
2031 * Check a couple of global reasons to
2032 * be conservative about write access.
2033 * Then do the pmap_enter.
2034 */
2035 #if !VM_FAULT_STATIC_CONFIG
2036 if (vm_fault_dirty_handling
2037 #if MACH_KDB
2038 || db_watchpoint_list
2039 #endif
2040 && (fault_type & VM_PROT_WRITE) == 0)
2041 prot &= ~VM_PROT_WRITE;
2042 #else /* STATIC_CONFIG */
2043 #if MACH_KDB
2044 if (db_watchpoint_list
2045 && (fault_type & VM_PROT_WRITE) == 0)
2046 prot &= ~VM_PROT_WRITE;
2047 #endif /* MACH_KDB */
2048 #endif /* STATIC_CONFIG */
2049 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2050 if ((m->no_isync == TRUE) ||
2051 (cache_attr != VM_WIMG_DEFAULT)) {
2052 pmap_sync_caches_phys(m->phys_page);
2053 m->no_isync = FALSE;
2054 }
2055
2056 if(caller_pmap) {
2057 PMAP_ENTER(caller_pmap,
2058 caller_pmap_addr, m,
2059 prot, cache_attr, wired);
2060 } else {
2061 PMAP_ENTER(pmap, vaddr, m,
2062 prot, cache_attr, wired);
2063 }
2064
2065 /*
2066 * Grab the queues lock to manipulate
2067 * the page queues. Change wiring
2068 * case is obvious. In soft ref bits
2069 * case activate page only if it fell
2070 * off paging queues, otherwise just
2071 * activate it if it's inactive.
2072 *
2073 * NOTE: original vm_fault code will
2074 * move active page to back of active
2075 * queue. This code doesn't.
2076 */
2077 vm_page_lock_queues();
2078
2079 if (m->clustered) {
2080 vm_pagein_cluster_used++;
2081 m->clustered = FALSE;
2082 }
2083 m->reference = TRUE;
2084
2085 if (change_wiring) {
2086 if (wired)
2087 vm_page_wire(m);
2088 else
2089 vm_page_unwire(m);
2090 }
2091 #if VM_FAULT_STATIC_CONFIG
2092 else {
2093 if (!m->active && !m->inactive)
2094 vm_page_activate(m);
2095 }
2096 #else
2097 else if (software_reference_bits) {
2098 if (!m->active && !m->inactive)
2099 vm_page_activate(m);
2100 }
2101 else if (!m->active) {
2102 vm_page_activate(m);
2103 }
2104 #endif
2105 vm_page_unlock_queues();
2106
2107 /*
2108 * That's it, clean up and return.
2109 */
2110 PAGE_WAKEUP_DONE(m);
2111 vm_object_paging_end(object);
2112
2113 {
2114 tws_hash_line_t line;
2115 task_t task;
2116
2117 task = current_task();
2118 if((map != NULL) &&
2119 (task->dynamic_working_set != 0) &&
2120 !(object->private)) {
2121 kern_return_t kr;
2122 vm_object_t base_object;
2123 vm_object_offset_t base_offset;
2124 base_object = object;
2125 base_offset = cur_offset;
2126 while(base_object->shadow) {
2127 base_offset +=
2128 base_object->shadow_offset;
2129 base_object =
2130 base_object->shadow;
2131 }
2132 kr = tws_lookup((tws_hash_t)
2133 task->dynamic_working_set,
2134 base_offset, base_object,
2135 &line);
2136 if(kr == KERN_OPERATION_TIMED_OUT){
2137 write_startup_file = 1;
2138 } else if (kr != KERN_SUCCESS) {
2139 kr = tws_insert((tws_hash_t)
2140 task->dynamic_working_set,
2141 base_offset, base_object,
2142 vaddr, pmap_map);
2143 if(kr == KERN_NO_SPACE) {
2144 vm_object_unlock(object);
2145
2146 tws_expand_working_set(
2147 task->dynamic_working_set,
2148 TWS_HASH_LINE_COUNT,
2149 FALSE);
2150
2151 vm_object_lock(object);
2152 }
2153 if(kr ==
2154 KERN_OPERATION_TIMED_OUT) {
2155 write_startup_file = 1;
2156 }
2157 }
2158 }
2159 }
2160 vm_object_unlock(object);
2161
2162 vm_map_unlock_read(map);
2163 if(pmap_map != map)
2164 vm_map_unlock(pmap_map);
2165
2166 if(write_startup_file)
2167 tws_send_startup_info(current_task());
2168
2169 if (funnel_set)
2170 thread_funnel_set( curflock, TRUE);
2171
2172 thread_interrupt_level(interruptible_state);
2173
2174
2175 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2176 vaddr,
2177 type_of_fault & 0xff,
2178 KERN_SUCCESS,
2179 type_of_fault >> 8,
2180 0);
2181
2182 return KERN_SUCCESS;
2183 }
2184
2185 /*
2186 * Copy on write fault. If objects match, then
2187 * object->copy must not be NULL (else control
2188 * would be in previous code block), and we
2189 * have a potential push into the copy object
2190 * with which we won't cope here.
2191 */
2192
2193 if (cur_object == object)
2194 break;
2195 /*
2196 * This is now a shadow based copy on write
2197 * fault -- it requires a copy up the shadow
2198 * chain.
2199 *
2200 * Allocate a page in the original top level
2201 * object. Give up if allocate fails. Also
2202 * need to remember current page, as it's the
2203 * source of the copy.
2204 */
2205 cur_m = m;
2206 m = vm_page_grab();
2207 if (m == VM_PAGE_NULL) {
2208 break;
2209 }
2210 /*
2211 * Now do the copy. Mark the source busy
2212 * and take out paging references on both
2213 * objects.
2214 *
2215 * NOTE: This code holds the map lock across
2216 * the page copy.
2217 */
2218
2219 cur_m->busy = TRUE;
2220 vm_page_copy(cur_m, m);
2221 vm_page_insert(m, object, offset);
2222
2223 vm_object_paging_begin(cur_object);
2224 vm_object_paging_begin(object);
2225
2226 type_of_fault = DBG_COW_FAULT;
2227 VM_STAT(cow_faults++);
2228 current_task()->cow_faults++;
2229
2230 /*
2231 * Now cope with the source page and object
2232 * If the top object has a ref count of 1
2233 * then no other map can access it, and hence
2234 * it's not necessary to do the pmap_page_protect.
2235 */
2236
2237
2238 vm_page_lock_queues();
2239 vm_page_deactivate(cur_m);
2240 m->dirty = TRUE;
2241 pmap_page_protect(cur_m->phys_page,
2242 VM_PROT_NONE);
2243 vm_page_unlock_queues();
2244
2245 PAGE_WAKEUP_DONE(cur_m);
2246 vm_object_paging_end(cur_object);
2247 vm_object_unlock(cur_object);
2248
2249 /*
2250 * Slight hack to call vm_object collapse
2251 * and then reuse common map in code.
2252 * note that the object lock was taken above.
2253 */
2254
2255 vm_object_paging_end(object);
2256 vm_object_collapse(object);
2257 vm_object_paging_begin(object);
2258
2259 goto FastPmapEnter;
2260 }
2261 else {
2262
2263 /*
2264 * No page at cur_object, cur_offset
2265 */
2266
2267 if (cur_object->pager_created) {
2268
2269 /*
2270 * Have to talk to the pager. Give up.
2271 */
2272 break;
2273 }
2274
2275
2276 if (cur_object->shadow == VM_OBJECT_NULL) {
2277
2278 if (cur_object->shadow_severed) {
2279 vm_object_paging_end(object);
2280 vm_object_unlock(object);
2281 vm_map_unlock_read(map);
2282 if(pmap_map != map)
2283 vm_map_unlock(pmap_map);
2284
2285 if(write_startup_file)
2286 tws_send_startup_info(
2287 current_task());
2288
2289 if (funnel_set) {
2290 thread_funnel_set( curflock, TRUE);
2291 funnel_set = FALSE;
2292 }
2293 thread_interrupt_level(interruptible_state);
2294
2295 return VM_FAULT_MEMORY_ERROR;
2296 }
2297
2298 /*
2299 * Zero fill fault. Page gets
2300 * filled in top object. Insert
2301 * page, then drop any lower lock.
2302 * Give up if no page.
2303 */
2304 if ((vm_page_free_target -
2305 ((vm_page_free_target-vm_page_free_min)>>2))
2306 > vm_page_free_count) {
2307 break;
2308 }
2309 m = vm_page_alloc(object, offset);
2310 if (m == VM_PAGE_NULL) {
2311 break;
2312 }
2313 /*
2314 * This is a zero-fill or initial fill
2315 * page fault. As such, we consider it
2316 * undefined with respect to instruction
2317 * execution. i.e. it is the responsibility
2318 * of higher layers to call for an instruction
2319 * sync after changing the contents and before
2320 * sending a program into this area. We
2321 * choose this approach for performance
2322 */
2323
2324 m->no_isync = FALSE;
2325
2326 if (cur_object != object)
2327 vm_object_unlock(cur_object);
2328
2329 vm_object_paging_begin(object);
2330 vm_object_unlock(object);
2331
2332 /*
2333 * Now zero fill page and map it.
2334 * the page is probably going to
2335 * be written soon, so don't bother
2336 * to clear the modified bit
2337 *
2338 * NOTE: This code holds the map
2339 * lock across the zero fill.
2340 */
2341
2342 if (!map->no_zero_fill) {
2343 vm_page_zero_fill(m);
2344 type_of_fault = DBG_ZERO_FILL_FAULT;
2345 VM_STAT(zero_fill_count++);
2346 }
2347 vm_page_lock_queues();
2348 VM_PAGE_QUEUES_REMOVE(m);
2349
2350 m->page_ticket = vm_page_ticket;
2351 if(m->object->size > 0x80000) {
2352 m->zero_fill = TRUE;
2353 /* depends on the queues lock */
2354 vm_zf_count += 1;
2355 queue_enter(&vm_page_queue_zf,
2356 m, vm_page_t, pageq);
2357 } else {
2358 queue_enter(
2359 &vm_page_queue_inactive,
2360 m, vm_page_t, pageq);
2361 }
2362 vm_page_ticket_roll++;
2363 if(vm_page_ticket_roll ==
2364 VM_PAGE_TICKETS_IN_ROLL) {
2365 vm_page_ticket_roll = 0;
2366 if(vm_page_ticket ==
2367 VM_PAGE_TICKET_ROLL_IDS)
2368 vm_page_ticket= 0;
2369 else
2370 vm_page_ticket++;
2371 }
2372
2373 m->inactive = TRUE;
2374 vm_page_inactive_count++;
2375 vm_page_unlock_queues();
2376 vm_object_lock(object);
2377
2378 goto FastPmapEnter;
2379 }
2380
2381 /*
2382 * On to the next level
2383 */
2384
2385 cur_offset += cur_object->shadow_offset;
2386 new_object = cur_object->shadow;
2387 vm_object_lock(new_object);
2388 if (cur_object != object)
2389 vm_object_unlock(cur_object);
2390 cur_object = new_object;
2391
2392 continue;
2393 }
2394 }
2395
2396 /*
2397 * Cleanup from fast fault failure. Drop any object
2398 * lock other than original and drop map lock.
2399 */
2400
2401 if (object != cur_object)
2402 vm_object_unlock(cur_object);
2403 }
2404 vm_map_unlock_read(map);
2405
2406 if(pmap_map != map)
2407 vm_map_unlock(pmap_map);
2408
2409 /*
2410 * Make a reference to this object to
2411 * prevent its disposal while we are messing with
2412 * it. Once we have the reference, the map is free
2413 * to be diddled. Since objects reference their
2414 * shadows (and copies), they will stay around as well.
2415 */
2416
2417 assert(object->ref_count > 0);
2418 object->ref_count++;
2419 vm_object_res_reference(object);
2420 vm_object_paging_begin(object);
2421
2422 XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2423 {
2424 tws_hash_line_t line;
2425 task_t task;
2426 kern_return_t kr;
2427
2428 task = current_task();
2429 if((map != NULL) &&
2430 (task->dynamic_working_set != 0)
2431 && !(object->private)) {
2432 vm_object_t base_object;
2433 vm_object_offset_t base_offset;
2434 base_object = object;
2435 base_offset = offset;
2436 while(base_object->shadow) {
2437 base_offset +=
2438 base_object->shadow_offset;
2439 base_object =
2440 base_object->shadow;
2441 }
2442 kr = tws_lookup((tws_hash_t)
2443 task->dynamic_working_set,
2444 base_offset, base_object,
2445 &line);
2446 if(kr == KERN_OPERATION_TIMED_OUT){
2447 write_startup_file = 1;
2448 } else if (kr != KERN_SUCCESS) {
2449 tws_insert((tws_hash_t)
2450 task->dynamic_working_set,
2451 base_offset, base_object,
2452 vaddr, pmap_map);
2453 kr = tws_insert((tws_hash_t)
2454 task->dynamic_working_set,
2455 base_offset, base_object,
2456 vaddr, pmap_map);
2457 if(kr == KERN_NO_SPACE) {
2458 vm_object_unlock(object);
2459 tws_expand_working_set(
2460 task->dynamic_working_set,
2461 TWS_HASH_LINE_COUNT,
2462 FALSE);
2463 vm_object_lock(object);
2464 }
2465 if(kr == KERN_OPERATION_TIMED_OUT) {
2466 write_startup_file = 1;
2467 }
2468 }
2469 }
2470 }
2471 kr = vm_fault_page(object, offset, fault_type,
2472 (change_wiring && !wired),
2473 interruptible,
2474 lo_offset, hi_offset, behavior,
2475 &prot, &result_page, &top_page,
2476 &type_of_fault,
2477 &error_code, map->no_zero_fill, FALSE, map, vaddr);
2478
2479 /*
2480 * If we didn't succeed, lose the object reference immediately.
2481 */
2482
2483 if (kr != VM_FAULT_SUCCESS)
2484 vm_object_deallocate(object);
2485
2486 /*
2487 * See why we failed, and take corrective action.
2488 */
2489
2490 switch (kr) {
2491 case VM_FAULT_SUCCESS:
2492 break;
2493 case VM_FAULT_MEMORY_SHORTAGE:
2494 if (vm_page_wait((change_wiring) ?
2495 THREAD_UNINT :
2496 THREAD_ABORTSAFE))
2497 goto RetryFault;
2498 /* fall thru */
2499 case VM_FAULT_INTERRUPTED:
2500 kr = KERN_ABORTED;
2501 goto done;
2502 case VM_FAULT_RETRY:
2503 goto RetryFault;
2504 case VM_FAULT_FICTITIOUS_SHORTAGE:
2505 vm_page_more_fictitious();
2506 goto RetryFault;
2507 case VM_FAULT_MEMORY_ERROR:
2508 if (error_code)
2509 kr = error_code;
2510 else
2511 kr = KERN_MEMORY_ERROR;
2512 goto done;
2513 }
2514
2515 m = result_page;
2516
2517 if(m != VM_PAGE_NULL) {
2518 assert((change_wiring && !wired) ?
2519 (top_page == VM_PAGE_NULL) :
2520 ((top_page == VM_PAGE_NULL) == (m->object == object)));
2521 }
2522
2523 /*
2524 * How to clean up the result of vm_fault_page. This
2525 * happens whether the mapping is entered or not.
2526 */
2527
2528 #define UNLOCK_AND_DEALLOCATE \
2529 MACRO_BEGIN \
2530 vm_fault_cleanup(m->object, top_page); \
2531 vm_object_deallocate(object); \
2532 MACRO_END
2533
2534 /*
2535 * What to do with the resulting page from vm_fault_page
2536 * if it doesn't get entered into the physical map:
2537 */
2538
2539 #define RELEASE_PAGE(m) \
2540 MACRO_BEGIN \
2541 PAGE_WAKEUP_DONE(m); \
2542 vm_page_lock_queues(); \
2543 if (!m->active && !m->inactive) \
2544 vm_page_activate(m); \
2545 vm_page_unlock_queues(); \
2546 MACRO_END
2547
2548 /*
2549 * We must verify that the maps have not changed
2550 * since our last lookup.
2551 */
2552
2553 if(m != VM_PAGE_NULL) {
2554 old_copy_object = m->object->copy;
2555 vm_object_unlock(m->object);
2556 } else {
2557 old_copy_object = VM_OBJECT_NULL;
2558 }
2559 if ((map != original_map) || !vm_map_verify(map, &version)) {
2560 vm_object_t retry_object;
2561 vm_object_offset_t retry_offset;
2562 vm_prot_t retry_prot;
2563
2564 /*
2565 * To avoid trying to write_lock the map while another
2566 * thread has it read_locked (in vm_map_pageable), we
2567 * do not try for write permission. If the page is
2568 * still writable, we will get write permission. If it
2569 * is not, or has been marked needs_copy, we enter the
2570 * mapping without write permission, and will merely
2571 * take another fault.
2572 */
2573 map = original_map;
2574 vm_map_lock_read(map);
2575 kr = vm_map_lookup_locked(&map, vaddr,
2576 fault_type & ~VM_PROT_WRITE, &version,
2577 &retry_object, &retry_offset, &retry_prot,
2578 &wired, &behavior, &lo_offset, &hi_offset,
2579 &pmap_map);
2580 pmap = pmap_map->pmap;
2581
2582 if (kr != KERN_SUCCESS) {
2583 vm_map_unlock_read(map);
2584 if(m != VM_PAGE_NULL) {
2585 vm_object_lock(m->object);
2586 RELEASE_PAGE(m);
2587 UNLOCK_AND_DEALLOCATE;
2588 } else {
2589 vm_object_deallocate(object);
2590 }
2591 goto done;
2592 }
2593
2594 vm_object_unlock(retry_object);
2595 if(m != VM_PAGE_NULL) {
2596 vm_object_lock(m->object);
2597 } else {
2598 vm_object_lock(object);
2599 }
2600
2601 if ((retry_object != object) ||
2602 (retry_offset != offset)) {
2603 vm_map_unlock_read(map);
2604 if(pmap_map != map)
2605 vm_map_unlock(pmap_map);
2606 if(m != VM_PAGE_NULL) {
2607 RELEASE_PAGE(m);
2608 UNLOCK_AND_DEALLOCATE;
2609 } else {
2610 vm_object_deallocate(object);
2611 }
2612 goto RetryFault;
2613 }
2614
2615 /*
2616 * Check whether the protection has changed or the object
2617 * has been copied while we left the map unlocked.
2618 */
2619 prot &= retry_prot;
2620 if(m != VM_PAGE_NULL) {
2621 vm_object_unlock(m->object);
2622 } else {
2623 vm_object_unlock(object);
2624 }
2625 }
2626 if(m != VM_PAGE_NULL) {
2627 vm_object_lock(m->object);
2628 } else {
2629 vm_object_lock(object);
2630 }
2631
2632 /*
2633 * If the copy object changed while the top-level object
2634 * was unlocked, then we must take away write permission.
2635 */
2636
2637 if(m != VM_PAGE_NULL) {
2638 if (m->object->copy != old_copy_object)
2639 prot &= ~VM_PROT_WRITE;
2640 }
2641
2642 /*
2643 * If we want to wire down this page, but no longer have
2644 * adequate permissions, we must start all over.
2645 */
2646
2647 if (wired && (fault_type != (prot|VM_PROT_WRITE))) {
2648 vm_map_verify_done(map, &version);
2649 if(pmap_map != map)
2650 vm_map_unlock(pmap_map);
2651 if(m != VM_PAGE_NULL) {
2652 RELEASE_PAGE(m);
2653 UNLOCK_AND_DEALLOCATE;
2654 } else {
2655 vm_object_deallocate(object);
2656 }
2657 goto RetryFault;
2658 }
2659
2660 /*
2661 * Put this page into the physical map.
2662 * We had to do the unlock above because pmap_enter
2663 * may cause other faults. The page may be on
2664 * the pageout queues. If the pageout daemon comes
2665 * across the page, it will remove it from the queues.
2666 */
2667 if (m != VM_PAGE_NULL) {
2668 if (m->no_isync == TRUE) {
2669 pmap_sync_caches_phys(m->phys_page);
2670
2671 m->no_isync = FALSE;
2672 }
2673
2674 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2675
2676 if(caller_pmap) {
2677 PMAP_ENTER(caller_pmap,
2678 caller_pmap_addr, m,
2679 prot, cache_attr, wired);
2680 } else {
2681 PMAP_ENTER(pmap, vaddr, m,
2682 prot, cache_attr, wired);
2683 }
2684 {
2685 tws_hash_line_t line;
2686 task_t task;
2687 kern_return_t kr;
2688
2689 task = current_task();
2690 if((map != NULL) &&
2691 (task->dynamic_working_set != 0)
2692 && (object->private)) {
2693 vm_object_t base_object;
2694 vm_object_offset_t base_offset;
2695 base_object = m->object;
2696 base_offset = m->offset;
2697 while(base_object->shadow) {
2698 base_offset +=
2699 base_object->shadow_offset;
2700 base_object =
2701 base_object->shadow;
2702 }
2703 kr = tws_lookup((tws_hash_t)
2704 task->dynamic_working_set,
2705 base_offset, base_object, &line);
2706 if(kr == KERN_OPERATION_TIMED_OUT){
2707 write_startup_file = 1;
2708 } else if (kr != KERN_SUCCESS) {
2709 tws_insert((tws_hash_t)
2710 task->dynamic_working_set,
2711 base_offset, base_object,
2712 vaddr, pmap_map);
2713 kr = tws_insert((tws_hash_t)
2714 task->dynamic_working_set,
2715 base_offset, base_object,
2716 vaddr, pmap_map);
2717 if(kr == KERN_NO_SPACE) {
2718 vm_object_unlock(m->object);
2719 tws_expand_working_set(
2720 task->dynamic_working_set,
2721 TWS_HASH_LINE_COUNT,
2722 FALSE);
2723 vm_object_lock(m->object);
2724 }
2725 if(kr == KERN_OPERATION_TIMED_OUT) {
2726 write_startup_file = 1;
2727 }
2728 }
2729 }
2730 }
2731 } else {
2732
2733 #ifndef i386
2734 int memattr;
2735 vm_map_entry_t entry;
2736 vm_offset_t laddr;
2737 vm_offset_t ldelta, hdelta;
2738
2739 /*
2740 * do a pmap block mapping from the physical address
2741 * in the object
2742 */
2743
2744 /* While we do not worry about execution protection in */
2745 /* general, certian pages may have instruction execution */
2746 /* disallowed. We will check here, and if not allowed */
2747 /* to execute, we return with a protection failure. */
2748
2749 if((full_fault_type & VM_PROT_EXECUTE) &&
2750 (pmap_canExecute((ppnum_t)
2751 (object->shadow_offset >> 12)) < 1)) {
2752
2753 vm_map_verify_done(map, &version);
2754 if(pmap_map != map)
2755 vm_map_unlock(pmap_map);
2756 vm_fault_cleanup(object, top_page);
2757 vm_object_deallocate(object);
2758 kr = KERN_PROTECTION_FAILURE;
2759 goto done;
2760 }
2761
2762 if(pmap_map != map) {
2763 vm_map_unlock(pmap_map);
2764 }
2765 if (original_map != map) {
2766 vm_map_unlock_read(map);
2767 vm_map_lock_read(original_map);
2768 map = original_map;
2769 }
2770 pmap_map = map;
2771
2772 laddr = vaddr;
2773 hdelta = 0xFFFFF000;
2774 ldelta = 0xFFFFF000;
2775
2776
2777 while(vm_map_lookup_entry(map, laddr, &entry)) {
2778 if(ldelta > (laddr - entry->vme_start))
2779 ldelta = laddr - entry->vme_start;
2780 if(hdelta > (entry->vme_end - laddr))
2781 hdelta = entry->vme_end - laddr;
2782 if(entry->is_sub_map) {
2783
2784 laddr = (laddr - entry->vme_start)
2785 + entry->offset;
2786 vm_map_lock_read(entry->object.sub_map);
2787 if(map != pmap_map)
2788 vm_map_unlock_read(map);
2789 if(entry->use_pmap) {
2790 vm_map_unlock_read(pmap_map);
2791 pmap_map = entry->object.sub_map;
2792 }
2793 map = entry->object.sub_map;
2794
2795 } else {
2796 break;
2797 }
2798 }
2799
2800 if(vm_map_lookup_entry(map, laddr, &entry) &&
2801 (entry->object.vm_object != NULL) &&
2802 (entry->object.vm_object == object)) {
2803
2804
2805 if(caller_pmap) {
2806 /* Set up a block mapped area */
2807 pmap_map_block(caller_pmap,
2808 (addr64_t)(caller_pmap_addr - ldelta),
2809 (((vm_offset_t)
2810 (entry->object.vm_object->shadow_offset))
2811 + entry->offset +
2812 (laddr - entry->vme_start)
2813 - ldelta)>>12,
2814 ldelta + hdelta, prot,
2815 (VM_WIMG_MASK & (int)object->wimg_bits), 0);
2816 } else {
2817 /* Set up a block mapped area */
2818 pmap_map_block(pmap_map->pmap,
2819 (addr64_t)(vaddr - ldelta),
2820 (((vm_offset_t)
2821 (entry->object.vm_object->shadow_offset))
2822 + entry->offset +
2823 (laddr - entry->vme_start) - ldelta)>>12,
2824 ldelta + hdelta, prot,
2825 (VM_WIMG_MASK & (int)object->wimg_bits), 0);
2826 }
2827 }
2828 #else
2829 #ifdef notyet
2830 if(caller_pmap) {
2831 pmap_enter(caller_pmap, caller_pmap_addr,
2832 object->shadow_offset>>12, prot, 0, TRUE);
2833 } else {
2834 pmap_enter(pmap, vaddr,
2835 object->shadow_offset>>12, prot, 0, TRUE);
2836 }
2837 /* Map it in */
2838 #endif
2839 #endif
2840
2841 }
2842
2843 /*
2844 * If the page is not wired down and isn't already
2845 * on a pageout queue, then put it where the
2846 * pageout daemon can find it.
2847 */
2848 if(m != VM_PAGE_NULL) {
2849 vm_page_lock_queues();
2850
2851 if (change_wiring) {
2852 if (wired)
2853 vm_page_wire(m);
2854 else
2855 vm_page_unwire(m);
2856 }
2857 #if VM_FAULT_STATIC_CONFIG
2858 else {
2859 if (!m->active && !m->inactive)
2860 vm_page_activate(m);
2861 m->reference = TRUE;
2862 }
2863 #else
2864 else if (software_reference_bits) {
2865 if (!m->active && !m->inactive)
2866 vm_page_activate(m);
2867 m->reference = TRUE;
2868 } else {
2869 vm_page_activate(m);
2870 }
2871 #endif
2872 vm_page_unlock_queues();
2873 }
2874
2875 /*
2876 * Unlock everything, and return
2877 */
2878
2879 vm_map_verify_done(map, &version);
2880 if(pmap_map != map)
2881 vm_map_unlock(pmap_map);
2882 if(m != VM_PAGE_NULL) {
2883 PAGE_WAKEUP_DONE(m);
2884 UNLOCK_AND_DEALLOCATE;
2885 } else {
2886 vm_fault_cleanup(object, top_page);
2887 vm_object_deallocate(object);
2888 }
2889 kr = KERN_SUCCESS;
2890
2891 #undef UNLOCK_AND_DEALLOCATE
2892 #undef RELEASE_PAGE
2893
2894 done:
2895 if(write_startup_file)
2896 tws_send_startup_info(current_task());
2897 if (funnel_set) {
2898 thread_funnel_set( curflock, TRUE);
2899 funnel_set = FALSE;
2900 }
2901 thread_interrupt_level(interruptible_state);
2902
2903 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2904 vaddr,
2905 type_of_fault & 0xff,
2906 kr,
2907 type_of_fault >> 8,
2908 0);
2909
2910 return(kr);
2911 }
2912
2913 /*
2914 * vm_fault_wire:
2915 *
2916 * Wire down a range of virtual addresses in a map.
2917 */
2918 kern_return_t
2919 vm_fault_wire(
2920 vm_map_t map,
2921 vm_map_entry_t entry,
2922 pmap_t pmap,
2923 vm_offset_t pmap_addr)
2924 {
2925
2926 register vm_offset_t va;
2927 register vm_offset_t end_addr = entry->vme_end;
2928 register kern_return_t rc;
2929
2930 assert(entry->in_transition);
2931
2932 if ((entry->object.vm_object != NULL) &&
2933 !entry->is_sub_map &&
2934 entry->object.vm_object->phys_contiguous) {
2935 return KERN_SUCCESS;
2936 }
2937
2938 /*
2939 * Inform the physical mapping system that the
2940 * range of addresses may not fault, so that
2941 * page tables and such can be locked down as well.
2942 */
2943
2944 pmap_pageable(pmap, pmap_addr,
2945 pmap_addr + (end_addr - entry->vme_start), FALSE);
2946
2947 /*
2948 * We simulate a fault to get the page and enter it
2949 * in the physical map.
2950 */
2951
2952 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
2953 if ((rc = vm_fault_wire_fast(
2954 map, va, entry, pmap,
2955 pmap_addr + (va - entry->vme_start)
2956 )) != KERN_SUCCESS) {
2957 rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
2958 (pmap == kernel_pmap) ?
2959 THREAD_UNINT : THREAD_ABORTSAFE,
2960 pmap, pmap_addr + (va - entry->vme_start));
2961 }
2962
2963 if (rc != KERN_SUCCESS) {
2964 struct vm_map_entry tmp_entry = *entry;
2965
2966 /* unwire wired pages */
2967 tmp_entry.vme_end = va;
2968 vm_fault_unwire(map,
2969 &tmp_entry, FALSE, pmap, pmap_addr);
2970
2971 return rc;
2972 }
2973 }
2974 return KERN_SUCCESS;
2975 }
2976
2977 /*
2978 * vm_fault_unwire:
2979 *
2980 * Unwire a range of virtual addresses in a map.
2981 */
2982 void
2983 vm_fault_unwire(
2984 vm_map_t map,
2985 vm_map_entry_t entry,
2986 boolean_t deallocate,
2987 pmap_t pmap,
2988 vm_offset_t pmap_addr)
2989 {
2990 register vm_offset_t va;
2991 register vm_offset_t end_addr = entry->vme_end;
2992 vm_object_t object;
2993
2994 object = (entry->is_sub_map)
2995 ? VM_OBJECT_NULL : entry->object.vm_object;
2996
2997 /*
2998 * Since the pages are wired down, we must be able to
2999 * get their mappings from the physical map system.
3000 */
3001
3002 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3003 pmap_change_wiring(pmap,
3004 pmap_addr + (va - entry->vme_start), FALSE);
3005
3006 if (object == VM_OBJECT_NULL) {
3007 (void) vm_fault(map, va, VM_PROT_NONE,
3008 TRUE, THREAD_UNINT, pmap, pmap_addr);
3009 } else if (object->phys_contiguous) {
3010 continue;
3011 } else {
3012 vm_prot_t prot;
3013 vm_page_t result_page;
3014 vm_page_t top_page;
3015 vm_object_t result_object;
3016 vm_fault_return_t result;
3017
3018 do {
3019 prot = VM_PROT_NONE;
3020
3021 vm_object_lock(object);
3022 vm_object_paging_begin(object);
3023 XPR(XPR_VM_FAULT,
3024 "vm_fault_unwire -> vm_fault_page\n",
3025 0,0,0,0,0);
3026 result = vm_fault_page(object,
3027 entry->offset +
3028 (va - entry->vme_start),
3029 VM_PROT_NONE, TRUE,
3030 THREAD_UNINT,
3031 entry->offset,
3032 entry->offset +
3033 (entry->vme_end
3034 - entry->vme_start),
3035 entry->behavior,
3036 &prot,
3037 &result_page,
3038 &top_page,
3039 (int *)0,
3040 0, map->no_zero_fill,
3041 FALSE, NULL, 0);
3042 } while (result == VM_FAULT_RETRY);
3043
3044 if (result != VM_FAULT_SUCCESS)
3045 panic("vm_fault_unwire: failure");
3046
3047 result_object = result_page->object;
3048 if (deallocate) {
3049 assert(!result_page->fictitious);
3050 pmap_page_protect(result_page->phys_page,
3051 VM_PROT_NONE);
3052 VM_PAGE_FREE(result_page);
3053 } else {
3054 vm_page_lock_queues();
3055 vm_page_unwire(result_page);
3056 vm_page_unlock_queues();
3057 PAGE_WAKEUP_DONE(result_page);
3058 }
3059
3060 vm_fault_cleanup(result_object, top_page);
3061 }
3062 }
3063
3064 /*
3065 * Inform the physical mapping system that the range
3066 * of addresses may fault, so that page tables and
3067 * such may be unwired themselves.
3068 */
3069
3070 pmap_pageable(pmap, pmap_addr,
3071 pmap_addr + (end_addr - entry->vme_start), TRUE);
3072
3073 }
3074
3075 /*
3076 * vm_fault_wire_fast:
3077 *
3078 * Handle common case of a wire down page fault at the given address.
3079 * If successful, the page is inserted into the associated physical map.
3080 * The map entry is passed in to avoid the overhead of a map lookup.
3081 *
3082 * NOTE: the given address should be truncated to the
3083 * proper page address.
3084 *
3085 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3086 * a standard error specifying why the fault is fatal is returned.
3087 *
3088 * The map in question must be referenced, and remains so.
3089 * Caller has a read lock on the map.
3090 *
3091 * This is a stripped version of vm_fault() for wiring pages. Anything
3092 * other than the common case will return KERN_FAILURE, and the caller
3093 * is expected to call vm_fault().
3094 */
3095 kern_return_t
3096 vm_fault_wire_fast(
3097 vm_map_t map,
3098 vm_offset_t va,
3099 vm_map_entry_t entry,
3100 pmap_t pmap,
3101 vm_offset_t pmap_addr)
3102 {
3103 vm_object_t object;
3104 vm_object_offset_t offset;
3105 register vm_page_t m;
3106 vm_prot_t prot;
3107 thread_act_t thr_act;
3108 unsigned int cache_attr;
3109
3110 VM_STAT(faults++);
3111
3112 if((thr_act=current_act()) && (thr_act->task != TASK_NULL))
3113 thr_act->task->faults++;
3114
3115 /*
3116 * Recovery actions
3117 */
3118
3119 #undef RELEASE_PAGE
3120 #define RELEASE_PAGE(m) { \
3121 PAGE_WAKEUP_DONE(m); \
3122 vm_page_lock_queues(); \
3123 vm_page_unwire(m); \
3124 vm_page_unlock_queues(); \
3125 }
3126
3127
3128 #undef UNLOCK_THINGS
3129 #define UNLOCK_THINGS { \
3130 object->paging_in_progress--; \
3131 vm_object_unlock(object); \
3132 }
3133
3134 #undef UNLOCK_AND_DEALLOCATE
3135 #define UNLOCK_AND_DEALLOCATE { \
3136 UNLOCK_THINGS; \
3137 vm_object_deallocate(object); \
3138 }
3139 /*
3140 * Give up and have caller do things the hard way.
3141 */
3142
3143 #define GIVE_UP { \
3144 UNLOCK_AND_DEALLOCATE; \
3145 return(KERN_FAILURE); \
3146 }
3147
3148
3149 /*
3150 * If this entry is not directly to a vm_object, bail out.
3151 */
3152 if (entry->is_sub_map)
3153 return(KERN_FAILURE);
3154
3155 /*
3156 * Find the backing store object and offset into it.
3157 */
3158
3159 object = entry->object.vm_object;
3160 offset = (va - entry->vme_start) + entry->offset;
3161 prot = entry->protection;
3162
3163 /*
3164 * Make a reference to this object to prevent its
3165 * disposal while we are messing with it.
3166 */
3167
3168 vm_object_lock(object);
3169 assert(object->ref_count > 0);
3170 object->ref_count++;
3171 vm_object_res_reference(object);
3172 object->paging_in_progress++;
3173
3174 /*
3175 * INVARIANTS (through entire routine):
3176 *
3177 * 1) At all times, we must either have the object
3178 * lock or a busy page in some object to prevent
3179 * some other thread from trying to bring in
3180 * the same page.
3181 *
3182 * 2) Once we have a busy page, we must remove it from
3183 * the pageout queues, so that the pageout daemon
3184 * will not grab it away.
3185 *
3186 */
3187
3188 /*
3189 * Look for page in top-level object. If it's not there or
3190 * there's something going on, give up.
3191 */
3192 m = vm_page_lookup(object, offset);
3193 if ((m == VM_PAGE_NULL) || (m->busy) ||
3194 (m->unusual && ( m->error || m->restart || m->absent ||
3195 prot & m->page_lock))) {
3196
3197 GIVE_UP;
3198 }
3199
3200 /*
3201 * Wire the page down now. All bail outs beyond this
3202 * point must unwire the page.
3203 */
3204
3205 vm_page_lock_queues();
3206 vm_page_wire(m);
3207 vm_page_unlock_queues();
3208
3209 /*
3210 * Mark page busy for other threads.
3211 */
3212 assert(!m->busy);
3213 m->busy = TRUE;
3214 assert(!m->absent);
3215
3216 /*
3217 * Give up if the page is being written and there's a copy object
3218 */
3219 if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
3220 RELEASE_PAGE(m);
3221 GIVE_UP;
3222 }
3223
3224 /*
3225 * Put this page into the physical map.
3226 * We have to unlock the object because pmap_enter
3227 * may cause other faults.
3228 */
3229 if (m->no_isync == TRUE) {
3230 pmap_sync_caches_phys(m->phys_page);
3231
3232 m->no_isync = FALSE;
3233 }
3234
3235 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3236
3237 PMAP_ENTER(pmap, pmap_addr, m, prot, cache_attr, TRUE);
3238
3239 /*
3240 * Unlock everything, and return
3241 */
3242
3243 PAGE_WAKEUP_DONE(m);
3244 UNLOCK_AND_DEALLOCATE;
3245
3246 return(KERN_SUCCESS);
3247
3248 }
3249
3250 /*
3251 * Routine: vm_fault_copy_cleanup
3252 * Purpose:
3253 * Release a page used by vm_fault_copy.
3254 */
3255
3256 void
3257 vm_fault_copy_cleanup(
3258 vm_page_t page,
3259 vm_page_t top_page)
3260 {
3261 vm_object_t object = page->object;
3262
3263 vm_object_lock(object);
3264 PAGE_WAKEUP_DONE(page);
3265 vm_page_lock_queues();
3266 if (!page->active && !page->inactive)
3267 vm_page_activate(page);
3268 vm_page_unlock_queues();
3269 vm_fault_cleanup(object, top_page);
3270 }
3271
3272 void
3273 vm_fault_copy_dst_cleanup(
3274 vm_page_t page)
3275 {
3276 vm_object_t object;
3277
3278 if (page != VM_PAGE_NULL) {
3279 object = page->object;
3280 vm_object_lock(object);
3281 vm_page_lock_queues();
3282 vm_page_unwire(page);
3283 vm_page_unlock_queues();
3284 vm_object_paging_end(object);
3285 vm_object_unlock(object);
3286 }
3287 }
3288
3289 /*
3290 * Routine: vm_fault_copy
3291 *
3292 * Purpose:
3293 * Copy pages from one virtual memory object to another --
3294 * neither the source nor destination pages need be resident.
3295 *
3296 * Before actually copying a page, the version associated with
3297 * the destination address map wil be verified.
3298 *
3299 * In/out conditions:
3300 * The caller must hold a reference, but not a lock, to
3301 * each of the source and destination objects and to the
3302 * destination map.
3303 *
3304 * Results:
3305 * Returns KERN_SUCCESS if no errors were encountered in
3306 * reading or writing the data. Returns KERN_INTERRUPTED if
3307 * the operation was interrupted (only possible if the
3308 * "interruptible" argument is asserted). Other return values
3309 * indicate a permanent error in copying the data.
3310 *
3311 * The actual amount of data copied will be returned in the
3312 * "copy_size" argument. In the event that the destination map
3313 * verification failed, this amount may be less than the amount
3314 * requested.
3315 */
3316 kern_return_t
3317 vm_fault_copy(
3318 vm_object_t src_object,
3319 vm_object_offset_t src_offset,
3320 vm_size_t *src_size, /* INOUT */
3321 vm_object_t dst_object,
3322 vm_object_offset_t dst_offset,
3323 vm_map_t dst_map,
3324 vm_map_version_t *dst_version,
3325 int interruptible)
3326 {
3327 vm_page_t result_page;
3328
3329 vm_page_t src_page;
3330 vm_page_t src_top_page;
3331 vm_prot_t src_prot;
3332
3333 vm_page_t dst_page;
3334 vm_page_t dst_top_page;
3335 vm_prot_t dst_prot;
3336
3337 vm_size_t amount_left;
3338 vm_object_t old_copy_object;
3339 kern_return_t error = 0;
3340
3341 vm_size_t part_size;
3342
3343 /*
3344 * In order not to confuse the clustered pageins, align
3345 * the different offsets on a page boundary.
3346 */
3347 vm_object_offset_t src_lo_offset = trunc_page_64(src_offset);
3348 vm_object_offset_t dst_lo_offset = trunc_page_64(dst_offset);
3349 vm_object_offset_t src_hi_offset = round_page_64(src_offset + *src_size);
3350 vm_object_offset_t dst_hi_offset = round_page_64(dst_offset + *src_size);
3351
3352 #define RETURN(x) \
3353 MACRO_BEGIN \
3354 *src_size -= amount_left; \
3355 MACRO_RETURN(x); \
3356 MACRO_END
3357
3358 amount_left = *src_size;
3359 do { /* while (amount_left > 0) */
3360 /*
3361 * There may be a deadlock if both source and destination
3362 * pages are the same. To avoid this deadlock, the copy must
3363 * start by getting the destination page in order to apply
3364 * COW semantics if any.
3365 */
3366
3367 RetryDestinationFault: ;
3368
3369 dst_prot = VM_PROT_WRITE|VM_PROT_READ;
3370
3371 vm_object_lock(dst_object);
3372 vm_object_paging_begin(dst_object);
3373
3374 XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3375 switch (vm_fault_page(dst_object,
3376 trunc_page_64(dst_offset),
3377 VM_PROT_WRITE|VM_PROT_READ,
3378 FALSE,
3379 interruptible,
3380 dst_lo_offset,
3381 dst_hi_offset,
3382 VM_BEHAVIOR_SEQUENTIAL,
3383 &dst_prot,
3384 &dst_page,
3385 &dst_top_page,
3386 (int *)0,
3387 &error,
3388 dst_map->no_zero_fill,
3389 FALSE, NULL, 0)) {
3390 case VM_FAULT_SUCCESS:
3391 break;
3392 case VM_FAULT_RETRY:
3393 goto RetryDestinationFault;
3394 case VM_FAULT_MEMORY_SHORTAGE:
3395 if (vm_page_wait(interruptible))
3396 goto RetryDestinationFault;
3397 /* fall thru */
3398 case VM_FAULT_INTERRUPTED:
3399 RETURN(MACH_SEND_INTERRUPTED);
3400 case VM_FAULT_FICTITIOUS_SHORTAGE:
3401 vm_page_more_fictitious();
3402 goto RetryDestinationFault;
3403 case VM_FAULT_MEMORY_ERROR:
3404 if (error)
3405 return (error);
3406 else
3407 return(KERN_MEMORY_ERROR);
3408 }
3409 assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
3410
3411 old_copy_object = dst_page->object->copy;
3412
3413 /*
3414 * There exists the possiblity that the source and
3415 * destination page are the same. But we can't
3416 * easily determine that now. If they are the
3417 * same, the call to vm_fault_page() for the
3418 * destination page will deadlock. To prevent this we
3419 * wire the page so we can drop busy without having
3420 * the page daemon steal the page. We clean up the
3421 * top page but keep the paging reference on the object
3422 * holding the dest page so it doesn't go away.
3423 */
3424
3425 vm_page_lock_queues();
3426 vm_page_wire(dst_page);
3427 vm_page_unlock_queues();
3428 PAGE_WAKEUP_DONE(dst_page);
3429 vm_object_unlock(dst_page->object);
3430
3431 if (dst_top_page != VM_PAGE_NULL) {
3432 vm_object_lock(dst_object);
3433 VM_PAGE_FREE(dst_top_page);
3434 vm_object_paging_end(dst_object);
3435 vm_object_unlock(dst_object);
3436 }
3437
3438 RetrySourceFault: ;
3439
3440 if (src_object == VM_OBJECT_NULL) {
3441 /*
3442 * No source object. We will just
3443 * zero-fill the page in dst_object.
3444 */
3445 src_page = VM_PAGE_NULL;
3446 result_page = VM_PAGE_NULL;
3447 } else {
3448 vm_object_lock(src_object);
3449 src_page = vm_page_lookup(src_object,
3450 trunc_page_64(src_offset));
3451 if (src_page == dst_page) {
3452 src_prot = dst_prot;
3453 result_page = VM_PAGE_NULL;
3454 } else {
3455 src_prot = VM_PROT_READ;
3456 vm_object_paging_begin(src_object);
3457
3458 XPR(XPR_VM_FAULT,
3459 "vm_fault_copy(2) -> vm_fault_page\n",
3460 0,0,0,0,0);
3461 switch (vm_fault_page(src_object,
3462 trunc_page_64(src_offset),
3463 VM_PROT_READ,
3464 FALSE,
3465 interruptible,
3466 src_lo_offset,
3467 src_hi_offset,
3468 VM_BEHAVIOR_SEQUENTIAL,
3469 &src_prot,
3470 &result_page,
3471 &src_top_page,
3472 (int *)0,
3473 &error,
3474 FALSE,
3475 FALSE, NULL, 0)) {
3476
3477 case VM_FAULT_SUCCESS:
3478 break;
3479 case VM_FAULT_RETRY:
3480 goto RetrySourceFault;
3481 case VM_FAULT_MEMORY_SHORTAGE:
3482 if (vm_page_wait(interruptible))
3483 goto RetrySourceFault;
3484 /* fall thru */
3485 case VM_FAULT_INTERRUPTED:
3486 vm_fault_copy_dst_cleanup(dst_page);
3487 RETURN(MACH_SEND_INTERRUPTED);
3488 case VM_FAULT_FICTITIOUS_SHORTAGE:
3489 vm_page_more_fictitious();
3490 goto RetrySourceFault;
3491 case VM_FAULT_MEMORY_ERROR:
3492 vm_fault_copy_dst_cleanup(dst_page);
3493 if (error)
3494 return (error);
3495 else
3496 return(KERN_MEMORY_ERROR);
3497 }
3498
3499
3500 assert((src_top_page == VM_PAGE_NULL) ==
3501 (result_page->object == src_object));
3502 }
3503 assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
3504 vm_object_unlock(result_page->object);
3505 }
3506
3507 if (!vm_map_verify(dst_map, dst_version)) {
3508 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3509 vm_fault_copy_cleanup(result_page, src_top_page);
3510 vm_fault_copy_dst_cleanup(dst_page);
3511 break;
3512 }
3513
3514 vm_object_lock(dst_page->object);
3515
3516 if (dst_page->object->copy != old_copy_object) {
3517 vm_object_unlock(dst_page->object);
3518 vm_map_verify_done(dst_map, dst_version);
3519 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3520 vm_fault_copy_cleanup(result_page, src_top_page);
3521 vm_fault_copy_dst_cleanup(dst_page);
3522 break;
3523 }
3524 vm_object_unlock(dst_page->object);
3525
3526 /*
3527 * Copy the page, and note that it is dirty
3528 * immediately.
3529 */
3530
3531 if (!page_aligned(src_offset) ||
3532 !page_aligned(dst_offset) ||
3533 !page_aligned(amount_left)) {
3534
3535 vm_object_offset_t src_po,
3536 dst_po;
3537
3538 src_po = src_offset - trunc_page_64(src_offset);
3539 dst_po = dst_offset - trunc_page_64(dst_offset);
3540
3541 if (dst_po > src_po) {
3542 part_size = PAGE_SIZE - dst_po;
3543 } else {
3544 part_size = PAGE_SIZE - src_po;
3545 }
3546 if (part_size > (amount_left)){
3547 part_size = amount_left;
3548 }
3549
3550 if (result_page == VM_PAGE_NULL) {
3551 vm_page_part_zero_fill(dst_page,
3552 dst_po, part_size);
3553 } else {
3554 vm_page_part_copy(result_page, src_po,
3555 dst_page, dst_po, part_size);
3556 if(!dst_page->dirty){
3557 vm_object_lock(dst_object);
3558 dst_page->dirty = TRUE;
3559 vm_object_unlock(dst_page->object);
3560 }
3561
3562 }
3563 } else {
3564 part_size = PAGE_SIZE;
3565
3566 if (result_page == VM_PAGE_NULL)
3567 vm_page_zero_fill(dst_page);
3568 else{
3569 vm_page_copy(result_page, dst_page);
3570 if(!dst_page->dirty){
3571 vm_object_lock(dst_object);
3572 dst_page->dirty = TRUE;
3573 vm_object_unlock(dst_page->object);
3574 }
3575 }
3576
3577 }
3578
3579 /*
3580 * Unlock everything, and return
3581 */
3582
3583 vm_map_verify_done(dst_map, dst_version);
3584
3585 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3586 vm_fault_copy_cleanup(result_page, src_top_page);
3587 vm_fault_copy_dst_cleanup(dst_page);
3588
3589 amount_left -= part_size;
3590 src_offset += part_size;
3591 dst_offset += part_size;
3592 } while (amount_left > 0);
3593
3594 RETURN(KERN_SUCCESS);
3595 #undef RETURN
3596
3597 /*NOTREACHED*/
3598 }
3599
3600 #ifdef notdef
3601
3602 /*
3603 * Routine: vm_fault_page_overwrite
3604 *
3605 * Description:
3606 * A form of vm_fault_page that assumes that the
3607 * resulting page will be overwritten in its entirety,
3608 * making it unnecessary to obtain the correct *contents*
3609 * of the page.
3610 *
3611 * Implementation:
3612 * XXX Untested. Also unused. Eventually, this technology
3613 * could be used in vm_fault_copy() to advantage.
3614 */
3615 vm_fault_return_t
3616 vm_fault_page_overwrite(
3617 register
3618 vm_object_t dst_object,
3619 vm_object_offset_t dst_offset,
3620 vm_page_t *result_page) /* OUT */
3621 {
3622 register
3623 vm_page_t dst_page;
3624 kern_return_t wait_result;
3625
3626 #define interruptible THREAD_UNINT /* XXX */
3627
3628 while (TRUE) {
3629 /*
3630 * Look for a page at this offset
3631 */
3632
3633 while ((dst_page = vm_page_lookup(dst_object, dst_offset))
3634 == VM_PAGE_NULL) {
3635 /*
3636 * No page, no problem... just allocate one.
3637 */
3638
3639 dst_page = vm_page_alloc(dst_object, dst_offset);
3640 if (dst_page == VM_PAGE_NULL) {
3641 vm_object_unlock(dst_object);
3642 VM_PAGE_WAIT();
3643 vm_object_lock(dst_object);
3644 continue;
3645 }
3646
3647 /*
3648 * Pretend that the memory manager
3649 * write-protected the page.
3650 *
3651 * Note that we will be asking for write
3652 * permission without asking for the data
3653 * first.
3654 */
3655
3656 dst_page->overwriting = TRUE;
3657 dst_page->page_lock = VM_PROT_WRITE;
3658 dst_page->absent = TRUE;
3659 dst_page->unusual = TRUE;
3660 dst_object->absent_count++;
3661
3662 break;
3663
3664 /*
3665 * When we bail out, we might have to throw
3666 * away the page created here.
3667 */
3668
3669 #define DISCARD_PAGE \
3670 MACRO_BEGIN \
3671 vm_object_lock(dst_object); \
3672 dst_page = vm_page_lookup(dst_object, dst_offset); \
3673 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3674 VM_PAGE_FREE(dst_page); \
3675 vm_object_unlock(dst_object); \
3676 MACRO_END
3677 }
3678
3679 /*
3680 * If the page is write-protected...
3681 */
3682
3683 if (dst_page->page_lock & VM_PROT_WRITE) {
3684 /*
3685 * ... and an unlock request hasn't been sent
3686 */
3687
3688 if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
3689 vm_prot_t u;
3690 kern_return_t rc;
3691
3692 /*
3693 * ... then send one now.
3694 */
3695
3696 if (!dst_object->pager_ready) {
3697 wait_result = vm_object_assert_wait(dst_object,
3698 VM_OBJECT_EVENT_PAGER_READY,
3699 interruptible);
3700 vm_object_unlock(dst_object);
3701 if (wait_result == THREAD_WAITING)
3702 wait_result = thread_block(THREAD_CONTINUE_NULL);
3703 if (wait_result != THREAD_AWAKENED) {
3704 DISCARD_PAGE;
3705 return(VM_FAULT_INTERRUPTED);
3706 }
3707 continue;
3708 }
3709
3710 u = dst_page->unlock_request |= VM_PROT_WRITE;
3711 vm_object_unlock(dst_object);
3712
3713 if ((rc = memory_object_data_unlock(
3714 dst_object->pager,
3715 dst_offset + dst_object->paging_offset,
3716 PAGE_SIZE,
3717 u)) != KERN_SUCCESS) {
3718 if (vm_fault_debug)
3719 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3720 DISCARD_PAGE;
3721 return((rc == MACH_SEND_INTERRUPTED) ?
3722 VM_FAULT_INTERRUPTED :
3723 VM_FAULT_MEMORY_ERROR);
3724 }
3725 vm_object_lock(dst_object);
3726 continue;
3727 }
3728
3729 /* ... fall through to wait below */
3730 } else {
3731 /*
3732 * If the page isn't being used for other
3733 * purposes, then we're done.
3734 */
3735 if ( ! (dst_page->busy || dst_page->absent ||
3736 dst_page->error || dst_page->restart) )
3737 break;
3738 }
3739
3740 wait_result = PAGE_ASSERT_WAIT(dst_page, interruptible);
3741 vm_object_unlock(dst_object);
3742 if (wait_result == THREAD_WAITING)
3743 wait_result = thread_block(THREAD_CONTINUE_NULL);
3744 if (wait_result != THREAD_AWAKENED) {
3745 DISCARD_PAGE;
3746 return(VM_FAULT_INTERRUPTED);
3747 }
3748 }
3749
3750 *result_page = dst_page;
3751 return(VM_FAULT_SUCCESS);
3752
3753 #undef interruptible
3754 #undef DISCARD_PAGE
3755 }
3756
3757 #endif /* notdef */
3758
3759 #if VM_FAULT_CLASSIFY
3760 /*
3761 * Temporary statistics gathering support.
3762 */
3763
3764 /*
3765 * Statistics arrays:
3766 */
3767 #define VM_FAULT_TYPES_MAX 5
3768 #define VM_FAULT_LEVEL_MAX 8
3769
3770 int vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
3771
3772 #define VM_FAULT_TYPE_ZERO_FILL 0
3773 #define VM_FAULT_TYPE_MAP_IN 1
3774 #define VM_FAULT_TYPE_PAGER 2
3775 #define VM_FAULT_TYPE_COPY 3
3776 #define VM_FAULT_TYPE_OTHER 4
3777
3778
3779 void
3780 vm_fault_classify(vm_object_t object,
3781 vm_object_offset_t offset,
3782 vm_prot_t fault_type)
3783 {
3784 int type, level = 0;
3785 vm_page_t m;
3786
3787 while (TRUE) {
3788 m = vm_page_lookup(object, offset);
3789 if (m != VM_PAGE_NULL) {
3790 if (m->busy || m->error || m->restart || m->absent ||
3791 fault_type & m->page_lock) {
3792 type = VM_FAULT_TYPE_OTHER;
3793 break;
3794 }
3795 if (((fault_type & VM_PROT_WRITE) == 0) ||
3796 ((level == 0) && object->copy == VM_OBJECT_NULL)) {
3797 type = VM_FAULT_TYPE_MAP_IN;
3798 break;
3799 }
3800 type = VM_FAULT_TYPE_COPY;
3801 break;
3802 }
3803 else {
3804 if (object->pager_created) {
3805 type = VM_FAULT_TYPE_PAGER;
3806 break;
3807 }
3808 if (object->shadow == VM_OBJECT_NULL) {
3809 type = VM_FAULT_TYPE_ZERO_FILL;
3810 break;
3811 }
3812
3813 offset += object->shadow_offset;
3814 object = object->shadow;
3815 level++;
3816 continue;
3817 }
3818 }
3819
3820 if (level > VM_FAULT_LEVEL_MAX)
3821 level = VM_FAULT_LEVEL_MAX;
3822
3823 vm_fault_stats[type][level] += 1;
3824
3825 return;
3826 }
3827
3828 /* cleanup routine to call from debugger */
3829
3830 void
3831 vm_fault_classify_init(void)
3832 {
3833 int type, level;
3834
3835 for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
3836 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
3837 vm_fault_stats[type][level] = 0;
3838 }
3839 }
3840
3841 return;
3842 }
3843 #endif /* VM_FAULT_CLASSIFY */