]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_fault.c
4fd45fabf222e0022ef2685b6c0e2522f5e198ee
[apple/xnu.git] / osfmk / vm / vm_fault.c
1
2 /*
3 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 *
9 * This file contains Original Code and/or Modifications of Original Code
10 * as defined in and that are subject to the Apple Public Source License
11 * Version 2.0 (the 'License'). You may not use this file except in
12 * compliance with the License. Please obtain a copy of the License at
13 * http://www.opensource.apple.com/apsl/ and read it before using this
14 * file.
15 *
16 * The Original Code and all software distributed under the License are
17 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
18 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
19 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
21 * Please see the License for the specific language governing rights and
22 * limitations under the License.
23 *
24 * @APPLE_LICENSE_HEADER_END@
25 */
26 /*
27 * @OSF_COPYRIGHT@
28 */
29 /*
30 * Mach Operating System
31 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
32 * All Rights Reserved.
33 *
34 * Permission to use, copy, modify and distribute this software and its
35 * documentation is hereby granted, provided that both the copyright
36 * notice and this permission notice appear in all copies of the
37 * software, derivative works or modified versions, and any portions
38 * thereof, and that both notices appear in supporting documentation.
39 *
40 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
41 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
42 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
43 *
44 * Carnegie Mellon requests users of this software to return to
45 *
46 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
47 * School of Computer Science
48 * Carnegie Mellon University
49 * Pittsburgh PA 15213-3890
50 *
51 * any improvements or extensions that they make and grant Carnegie Mellon
52 * the rights to redistribute these changes.
53 */
54 /*
55 */
56 /*
57 * File: vm_fault.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 *
60 * Page fault handling module.
61 */
62 #ifdef MACH_BSD
63 /* remove after component interface available */
64 extern int vnode_pager_workaround;
65 extern int device_pager_workaround;
66 #endif
67
68 #include <mach_cluster_stats.h>
69 #include <mach_pagemap.h>
70 #include <mach_kdb.h>
71
72 #include <vm/vm_fault.h>
73 #include <mach/kern_return.h>
74 #include <mach/message.h> /* for error codes */
75 #include <kern/host_statistics.h>
76 #include <kern/counters.h>
77 #include <kern/task.h>
78 #include <kern/thread.h>
79 #include <kern/sched_prim.h>
80 #include <kern/host.h>
81 #include <kern/xpr.h>
82 #include <ppc/proc_reg.h>
83 #include <ppc/pmap_internals.h>
84 #include <vm/task_working_set.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_object.h>
87 #include <vm/vm_page.h>
88 #include <vm/pmap.h>
89 #include <vm/vm_pageout.h>
90 #include <mach/vm_param.h>
91 #include <mach/vm_behavior.h>
92 #include <mach/memory_object.h>
93 /* For memory_object_data_{request,unlock} */
94 #include <kern/mach_param.h>
95 #include <kern/macro_help.h>
96 #include <kern/zalloc.h>
97 #include <kern/misc_protos.h>
98
99 #include <sys/kdebug.h>
100
101 #define VM_FAULT_CLASSIFY 0
102 #define VM_FAULT_STATIC_CONFIG 1
103
104 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
105
106 int vm_object_absent_max = 50;
107
108 int vm_fault_debug = 0;
109 boolean_t vm_page_deactivate_behind = TRUE;
110
111
112 #if !VM_FAULT_STATIC_CONFIG
113 boolean_t vm_fault_dirty_handling = FALSE;
114 boolean_t vm_fault_interruptible = FALSE;
115 boolean_t software_reference_bits = TRUE;
116 #endif
117
118 #if MACH_KDB
119 extern struct db_watchpoint *db_watchpoint_list;
120 #endif /* MACH_KDB */
121
122 /* Forward declarations of internal routines. */
123 extern kern_return_t vm_fault_wire_fast(
124 vm_map_t map,
125 vm_offset_t va,
126 vm_map_entry_t entry,
127 pmap_t pmap,
128 vm_offset_t pmap_addr);
129
130 extern void vm_fault_continue(void);
131
132 extern void vm_fault_copy_cleanup(
133 vm_page_t page,
134 vm_page_t top_page);
135
136 extern void vm_fault_copy_dst_cleanup(
137 vm_page_t page);
138
139 #if VM_FAULT_CLASSIFY
140 extern void vm_fault_classify(vm_object_t object,
141 vm_object_offset_t offset,
142 vm_prot_t fault_type);
143
144 extern void vm_fault_classify_init(void);
145 #endif
146
147 /*
148 * Routine: vm_fault_init
149 * Purpose:
150 * Initialize our private data structures.
151 */
152 void
153 vm_fault_init(void)
154 {
155 }
156
157 /*
158 * Routine: vm_fault_cleanup
159 * Purpose:
160 * Clean up the result of vm_fault_page.
161 * Results:
162 * The paging reference for "object" is released.
163 * "object" is unlocked.
164 * If "top_page" is not null, "top_page" is
165 * freed and the paging reference for the object
166 * containing it is released.
167 *
168 * In/out conditions:
169 * "object" must be locked.
170 */
171 void
172 vm_fault_cleanup(
173 register vm_object_t object,
174 register vm_page_t top_page)
175 {
176 vm_object_paging_end(object);
177 vm_object_unlock(object);
178
179 if (top_page != VM_PAGE_NULL) {
180 object = top_page->object;
181 vm_object_lock(object);
182 VM_PAGE_FREE(top_page);
183 vm_object_paging_end(object);
184 vm_object_unlock(object);
185 }
186 }
187
188 #if MACH_CLUSTER_STATS
189 #define MAXCLUSTERPAGES 16
190 struct {
191 unsigned long pages_in_cluster;
192 unsigned long pages_at_higher_offsets;
193 unsigned long pages_at_lower_offsets;
194 } cluster_stats_in[MAXCLUSTERPAGES];
195 #define CLUSTER_STAT(clause) clause
196 #define CLUSTER_STAT_HIGHER(x) \
197 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
198 #define CLUSTER_STAT_LOWER(x) \
199 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
200 #define CLUSTER_STAT_CLUSTER(x) \
201 ((cluster_stats_in[(x)].pages_in_cluster)++)
202 #else /* MACH_CLUSTER_STATS */
203 #define CLUSTER_STAT(clause)
204 #endif /* MACH_CLUSTER_STATS */
205
206 /* XXX - temporary */
207 boolean_t vm_allow_clustered_pagein = FALSE;
208 int vm_pagein_cluster_used = 0;
209
210 /*
211 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
212 */
213 int vm_default_ahead = 1; /* Number of pages to prepage ahead */
214 int vm_default_behind = 0; /* Number of pages to prepage behind */
215
216 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
217
218 /*
219 * Routine: vm_fault_page
220 * Purpose:
221 * Find the resident page for the virtual memory
222 * specified by the given virtual memory object
223 * and offset.
224 * Additional arguments:
225 * The required permissions for the page is given
226 * in "fault_type". Desired permissions are included
227 * in "protection". The minimum and maximum valid offsets
228 * within the object for the relevant map entry are
229 * passed in "lo_offset" and "hi_offset" respectively and
230 * the expected page reference pattern is passed in "behavior".
231 * These three parameters are used to determine pagein cluster
232 * limits.
233 *
234 * If the desired page is known to be resident (for
235 * example, because it was previously wired down), asserting
236 * the "unwiring" parameter will speed the search.
237 *
238 * If the operation can be interrupted (by thread_abort
239 * or thread_terminate), then the "interruptible"
240 * parameter should be asserted.
241 *
242 * Results:
243 * The page containing the proper data is returned
244 * in "result_page".
245 *
246 * In/out conditions:
247 * The source object must be locked and referenced,
248 * and must donate one paging reference. The reference
249 * is not affected. The paging reference and lock are
250 * consumed.
251 *
252 * If the call succeeds, the object in which "result_page"
253 * resides is left locked and holding a paging reference.
254 * If this is not the original object, a busy page in the
255 * original object is returned in "top_page", to prevent other
256 * callers from pursuing this same data, along with a paging
257 * reference for the original object. The "top_page" should
258 * be destroyed when this guarantee is no longer required.
259 * The "result_page" is also left busy. It is not removed
260 * from the pageout queues.
261 */
262
263 vm_fault_return_t
264 vm_fault_page(
265 /* Arguments: */
266 vm_object_t first_object, /* Object to begin search */
267 vm_object_offset_t first_offset, /* Offset into object */
268 vm_prot_t fault_type, /* What access is requested */
269 boolean_t must_be_resident,/* Must page be resident? */
270 int interruptible, /* how may fault be interrupted? */
271 vm_object_offset_t lo_offset, /* Map entry start */
272 vm_object_offset_t hi_offset, /* Map entry end */
273 vm_behavior_t behavior, /* Page reference behavior */
274 /* Modifies in place: */
275 vm_prot_t *protection, /* Protection for mapping */
276 /* Returns: */
277 vm_page_t *result_page, /* Page found, if successful */
278 vm_page_t *top_page, /* Page in top object, if
279 * not result_page. */
280 int *type_of_fault, /* if non-null, fill in with type of fault
281 * COW, zero-fill, etc... returned in trace point */
282 /* More arguments: */
283 kern_return_t *error_code, /* code if page is in error */
284 boolean_t no_zero_fill, /* don't zero fill absent pages */
285 boolean_t data_supply, /* treat as data_supply if
286 * it is a write fault and a full
287 * page is provided */
288 vm_map_t map,
289 vm_offset_t vaddr)
290 {
291 register
292 vm_page_t m;
293 register
294 vm_object_t object;
295 register
296 vm_object_offset_t offset;
297 vm_page_t first_m;
298 vm_object_t next_object;
299 vm_object_t copy_object;
300 boolean_t look_for_page;
301 vm_prot_t access_required = fault_type;
302 vm_prot_t wants_copy_flag;
303 vm_size_t cluster_size, length;
304 vm_object_offset_t cluster_offset;
305 vm_object_offset_t cluster_start, cluster_end, paging_offset;
306 vm_object_offset_t align_offset;
307 CLUSTER_STAT(int pages_at_higher_offsets;)
308 CLUSTER_STAT(int pages_at_lower_offsets;)
309 kern_return_t wait_result;
310 boolean_t interruptible_state;
311 boolean_t bumped_pagein = FALSE;
312
313
314 #if MACH_PAGEMAP
315 /*
316 * MACH page map - an optional optimization where a bit map is maintained
317 * by the VM subsystem for internal objects to indicate which pages of
318 * the object currently reside on backing store. This existence map
319 * duplicates information maintained by the vnode pager. It is
320 * created at the time of the first pageout against the object, i.e.
321 * at the same time pager for the object is created. The optimization
322 * is designed to eliminate pager interaction overhead, if it is
323 * 'known' that the page does not exist on backing store.
324 *
325 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
326 * either marked as paged out in the existence map for the object or no
327 * existence map exists for the object. LOOK_FOR() is one of the
328 * criteria in the decision to invoke the pager. It is also used as one
329 * of the criteria to terminate the scan for adjacent pages in a clustered
330 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
331 * permanent objects. Note also that if the pager for an internal object
332 * has not been created, the pager is not invoked regardless of the value
333 * of LOOK_FOR() and that clustered pagein scans are only done on an object
334 * for which a pager has been created.
335 *
336 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
337 * is marked as paged out in the existence map for the object. PAGED_OUT()
338 * PAGED_OUT() is used to determine if a page has already been pushed
339 * into a copy object in order to avoid a redundant page out operation.
340 */
341 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
342 != VM_EXTERNAL_STATE_ABSENT)
343 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
344 == VM_EXTERNAL_STATE_EXISTS)
345 #else /* MACH_PAGEMAP */
346 /*
347 * If the MACH page map optimization is not enabled,
348 * LOOK_FOR() always evaluates to TRUE. The pager will always be
349 * invoked to resolve missing pages in an object, assuming the pager
350 * has been created for the object. In a clustered page operation, the
351 * absence of a page on backing backing store cannot be used to terminate
352 * a scan for adjacent pages since that information is available only in
353 * the pager. Hence pages that may not be paged out are potentially
354 * included in a clustered request. The vnode pager is coded to deal
355 * with any combination of absent/present pages in a clustered
356 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
357 * will always be invoked to push a dirty page into a copy object assuming
358 * a pager has been created. If the page has already been pushed, the
359 * pager will ingore the new request.
360 */
361 #define LOOK_FOR(o, f) TRUE
362 #define PAGED_OUT(o, f) FALSE
363 #endif /* MACH_PAGEMAP */
364
365 /*
366 * Recovery actions
367 */
368 #define PREPARE_RELEASE_PAGE(m) \
369 MACRO_BEGIN \
370 vm_page_lock_queues(); \
371 MACRO_END
372
373 #define DO_RELEASE_PAGE(m) \
374 MACRO_BEGIN \
375 PAGE_WAKEUP_DONE(m); \
376 if (!m->active && !m->inactive) \
377 vm_page_activate(m); \
378 vm_page_unlock_queues(); \
379 MACRO_END
380
381 #define RELEASE_PAGE(m) \
382 MACRO_BEGIN \
383 PREPARE_RELEASE_PAGE(m); \
384 DO_RELEASE_PAGE(m); \
385 MACRO_END
386
387 #if TRACEFAULTPAGE
388 dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
389 #endif
390
391
392
393 #if !VM_FAULT_STATIC_CONFIG
394 if (vm_fault_dirty_handling
395 #if MACH_KDB
396 /*
397 * If there are watchpoints set, then
398 * we don't want to give away write permission
399 * on a read fault. Make the task write fault,
400 * so that the watchpoint code notices the access.
401 */
402 || db_watchpoint_list
403 #endif /* MACH_KDB */
404 ) {
405 /*
406 * If we aren't asking for write permission,
407 * then don't give it away. We're using write
408 * faults to set the dirty bit.
409 */
410 if (!(fault_type & VM_PROT_WRITE))
411 *protection &= ~VM_PROT_WRITE;
412 }
413
414 if (!vm_fault_interruptible)
415 interruptible = THREAD_UNINT;
416 #else /* STATIC_CONFIG */
417 #if MACH_KDB
418 /*
419 * If there are watchpoints set, then
420 * we don't want to give away write permission
421 * on a read fault. Make the task write fault,
422 * so that the watchpoint code notices the access.
423 */
424 if (db_watchpoint_list) {
425 /*
426 * If we aren't asking for write permission,
427 * then don't give it away. We're using write
428 * faults to set the dirty bit.
429 */
430 if (!(fault_type & VM_PROT_WRITE))
431 *protection &= ~VM_PROT_WRITE;
432 }
433
434 #endif /* MACH_KDB */
435 #endif /* STATIC_CONFIG */
436
437 interruptible_state = thread_interrupt_level(interruptible);
438
439 /*
440 * INVARIANTS (through entire routine):
441 *
442 * 1) At all times, we must either have the object
443 * lock or a busy page in some object to prevent
444 * some other thread from trying to bring in
445 * the same page.
446 *
447 * Note that we cannot hold any locks during the
448 * pager access or when waiting for memory, so
449 * we use a busy page then.
450 *
451 * Note also that we aren't as concerned about more than
452 * one thread attempting to memory_object_data_unlock
453 * the same page at once, so we don't hold the page
454 * as busy then, but do record the highest unlock
455 * value so far. [Unlock requests may also be delivered
456 * out of order.]
457 *
458 * 2) To prevent another thread from racing us down the
459 * shadow chain and entering a new page in the top
460 * object before we do, we must keep a busy page in
461 * the top object while following the shadow chain.
462 *
463 * 3) We must increment paging_in_progress on any object
464 * for which we have a busy page
465 *
466 * 4) We leave busy pages on the pageout queues.
467 * If the pageout daemon comes across a busy page,
468 * it will remove the page from the pageout queues.
469 */
470
471 /*
472 * Search for the page at object/offset.
473 */
474
475 object = first_object;
476 offset = first_offset;
477 first_m = VM_PAGE_NULL;
478 access_required = fault_type;
479
480 XPR(XPR_VM_FAULT,
481 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
482 (integer_t)object, offset, fault_type, *protection, 0);
483
484 /*
485 * See whether this page is resident
486 */
487
488 while (TRUE) {
489 #if TRACEFAULTPAGE
490 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
491 #endif
492 if (!object->alive) {
493 vm_fault_cleanup(object, first_m);
494 thread_interrupt_level(interruptible_state);
495 return(VM_FAULT_MEMORY_ERROR);
496 }
497 m = vm_page_lookup(object, offset);
498 #if TRACEFAULTPAGE
499 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
500 #endif
501 if (m != VM_PAGE_NULL) {
502 /*
503 * If the page was pre-paged as part of a
504 * cluster, record the fact.
505 */
506 if (m->clustered) {
507 vm_pagein_cluster_used++;
508 m->clustered = FALSE;
509 }
510
511 /*
512 * If the page is being brought in,
513 * wait for it and then retry.
514 *
515 * A possible optimization: if the page
516 * is known to be resident, we can ignore
517 * pages that are absent (regardless of
518 * whether they're busy).
519 */
520
521 if (m->busy) {
522 #if TRACEFAULTPAGE
523 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
524 #endif
525 wait_result = PAGE_SLEEP(object, m, interruptible);
526 XPR(XPR_VM_FAULT,
527 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
528 (integer_t)object, offset,
529 (integer_t)m, 0, 0);
530 counter(c_vm_fault_page_block_busy_kernel++);
531
532 if (wait_result != THREAD_AWAKENED) {
533 vm_fault_cleanup(object, first_m);
534 thread_interrupt_level(interruptible_state);
535 if (wait_result == THREAD_RESTART)
536 {
537 return(VM_FAULT_RETRY);
538 }
539 else
540 {
541 return(VM_FAULT_INTERRUPTED);
542 }
543 }
544 continue;
545 }
546
547 /*
548 * If the page is in error, give up now.
549 */
550
551 if (m->error) {
552 #if TRACEFAULTPAGE
553 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code); /* (TEST/DEBUG) */
554 #endif
555 if (error_code)
556 *error_code = m->page_error;
557 VM_PAGE_FREE(m);
558 vm_fault_cleanup(object, first_m);
559 thread_interrupt_level(interruptible_state);
560 return(VM_FAULT_MEMORY_ERROR);
561 }
562
563 /*
564 * If the pager wants us to restart
565 * at the top of the chain,
566 * typically because it has moved the
567 * page to another pager, then do so.
568 */
569
570 if (m->restart) {
571 #if TRACEFAULTPAGE
572 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
573 #endif
574 VM_PAGE_FREE(m);
575 vm_fault_cleanup(object, first_m);
576 thread_interrupt_level(interruptible_state);
577 return(VM_FAULT_RETRY);
578 }
579
580 /*
581 * If the page isn't busy, but is absent,
582 * then it was deemed "unavailable".
583 */
584
585 if (m->absent) {
586 /*
587 * Remove the non-existent page (unless it's
588 * in the top object) and move on down to the
589 * next object (if there is one).
590 */
591 #if TRACEFAULTPAGE
592 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow); /* (TEST/DEBUG) */
593 #endif
594
595 next_object = object->shadow;
596 if (next_object == VM_OBJECT_NULL) {
597 vm_page_t real_m;
598
599 assert(!must_be_resident);
600
601 if (object->shadow_severed) {
602 vm_fault_cleanup(
603 object, first_m);
604 thread_interrupt_level(interruptible_state);
605 return VM_FAULT_MEMORY_ERROR;
606 }
607
608 /*
609 * Absent page at bottom of shadow
610 * chain; zero fill the page we left
611 * busy in the first object, and flush
612 * the absent page. But first we
613 * need to allocate a real page.
614 */
615 if (VM_PAGE_THROTTLED() ||
616 (real_m = vm_page_grab()) == VM_PAGE_NULL) {
617 vm_fault_cleanup(object, first_m);
618 thread_interrupt_level(interruptible_state);
619 return(VM_FAULT_MEMORY_SHORTAGE);
620 }
621
622 XPR(XPR_VM_FAULT,
623 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
624 (integer_t)object, offset,
625 (integer_t)m,
626 (integer_t)first_object, 0);
627 if (object != first_object) {
628 VM_PAGE_FREE(m);
629 vm_object_paging_end(object);
630 vm_object_unlock(object);
631 object = first_object;
632 offset = first_offset;
633 m = first_m;
634 first_m = VM_PAGE_NULL;
635 vm_object_lock(object);
636 }
637
638 VM_PAGE_FREE(m);
639 assert(real_m->busy);
640 vm_page_insert(real_m, object, offset);
641 m = real_m;
642
643 /*
644 * Drop the lock while zero filling
645 * page. Then break because this
646 * is the page we wanted. Checking
647 * the page lock is a waste of time;
648 * this page was either absent or
649 * newly allocated -- in both cases
650 * it can't be page locked by a pager.
651 */
652 m->no_isync = FALSE;
653
654 if (!no_zero_fill) {
655 vm_object_unlock(object);
656 vm_page_zero_fill(m);
657 if (type_of_fault)
658 *type_of_fault = DBG_ZERO_FILL_FAULT;
659 VM_STAT(zero_fill_count++);
660
661 if (bumped_pagein == TRUE) {
662 VM_STAT(pageins--);
663 current_task()->pageins--;
664 }
665 vm_object_lock(object);
666 }
667 pmap_clear_modify(m->phys_addr);
668 vm_page_lock_queues();
669 VM_PAGE_QUEUES_REMOVE(m);
670 m->page_ticket = vm_page_ticket;
671 if(m->object->size > 0x80000) {
672 m->zero_fill = TRUE;
673 /* depends on the queues lock */
674 vm_zf_count += 1;
675 queue_enter(&vm_page_queue_zf,
676 m, vm_page_t, pageq);
677 } else {
678 queue_enter(
679 &vm_page_queue_inactive,
680 m, vm_page_t, pageq);
681 }
682 vm_page_ticket_roll++;
683 if(vm_page_ticket_roll ==
684 VM_PAGE_TICKETS_IN_ROLL) {
685 vm_page_ticket_roll = 0;
686 if(vm_page_ticket ==
687 VM_PAGE_TICKET_ROLL_IDS)
688 vm_page_ticket= 0;
689 else
690 vm_page_ticket++;
691 }
692 m->inactive = TRUE;
693 vm_page_inactive_count++;
694 vm_page_unlock_queues();
695 break;
696 } else {
697 if (must_be_resident) {
698 vm_object_paging_end(object);
699 } else if (object != first_object) {
700 vm_object_paging_end(object);
701 VM_PAGE_FREE(m);
702 } else {
703 first_m = m;
704 m->absent = FALSE;
705 m->unusual = FALSE;
706 vm_object_absent_release(object);
707 m->busy = TRUE;
708
709 vm_page_lock_queues();
710 VM_PAGE_QUEUES_REMOVE(m);
711 vm_page_unlock_queues();
712 }
713 XPR(XPR_VM_FAULT,
714 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
715 (integer_t)object, offset,
716 (integer_t)next_object,
717 offset+object->shadow_offset,0);
718 offset += object->shadow_offset;
719 hi_offset += object->shadow_offset;
720 lo_offset += object->shadow_offset;
721 access_required = VM_PROT_READ;
722 vm_object_lock(next_object);
723 vm_object_unlock(object);
724 object = next_object;
725 vm_object_paging_begin(object);
726 continue;
727 }
728 }
729
730 if ((m->cleaning)
731 && ((object != first_object) ||
732 (object->copy != VM_OBJECT_NULL))
733 && (fault_type & VM_PROT_WRITE)) {
734 /*
735 * This is a copy-on-write fault that will
736 * cause us to revoke access to this page, but
737 * this page is in the process of being cleaned
738 * in a clustered pageout. We must wait until
739 * the cleaning operation completes before
740 * revoking access to the original page,
741 * otherwise we might attempt to remove a
742 * wired mapping.
743 */
744 #if TRACEFAULTPAGE
745 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset); /* (TEST/DEBUG) */
746 #endif
747 XPR(XPR_VM_FAULT,
748 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
749 (integer_t)object, offset,
750 (integer_t)m, 0, 0);
751 /* take an extra ref so that object won't die */
752 assert(object->ref_count > 0);
753 object->ref_count++;
754 vm_object_res_reference(object);
755 vm_fault_cleanup(object, first_m);
756 counter(c_vm_fault_page_block_backoff_kernel++);
757 vm_object_lock(object);
758 assert(object->ref_count > 0);
759 m = vm_page_lookup(object, offset);
760 if (m != VM_PAGE_NULL && m->cleaning) {
761 PAGE_ASSERT_WAIT(m, interruptible);
762 vm_object_unlock(object);
763 wait_result = thread_block(THREAD_CONTINUE_NULL);
764 vm_object_deallocate(object);
765 goto backoff;
766 } else {
767 vm_object_unlock(object);
768 vm_object_deallocate(object);
769 thread_interrupt_level(interruptible_state);
770 return VM_FAULT_RETRY;
771 }
772 }
773
774 /*
775 * If the desired access to this page has
776 * been locked out, request that it be unlocked.
777 */
778
779 if (access_required & m->page_lock) {
780 if ((access_required & m->unlock_request) != access_required) {
781 vm_prot_t new_unlock_request;
782 kern_return_t rc;
783
784 #if TRACEFAULTPAGE
785 dbgTrace(0xBEEF000A, (unsigned int) m, (unsigned int) object->pager_ready); /* (TEST/DEBUG) */
786 #endif
787 if (!object->pager_ready) {
788 XPR(XPR_VM_FAULT,
789 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
790 access_required,
791 (integer_t)object, offset,
792 (integer_t)m, 0);
793 /* take an extra ref */
794 assert(object->ref_count > 0);
795 object->ref_count++;
796 vm_object_res_reference(object);
797 vm_fault_cleanup(object,
798 first_m);
799 counter(c_vm_fault_page_block_backoff_kernel++);
800 vm_object_lock(object);
801 assert(object->ref_count > 0);
802 if (!object->pager_ready) {
803 wait_result = vm_object_assert_wait(
804 object,
805 VM_OBJECT_EVENT_PAGER_READY,
806 interruptible);
807 vm_object_unlock(object);
808 if (wait_result == THREAD_WAITING)
809 wait_result = thread_block(THREAD_CONTINUE_NULL);
810 vm_object_deallocate(object);
811 goto backoff;
812 } else {
813 vm_object_unlock(object);
814 vm_object_deallocate(object);
815 thread_interrupt_level(interruptible_state);
816 return VM_FAULT_RETRY;
817 }
818 }
819
820 new_unlock_request = m->unlock_request =
821 (access_required | m->unlock_request);
822 vm_object_unlock(object);
823 XPR(XPR_VM_FAULT,
824 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
825 (integer_t)object, offset,
826 (integer_t)m, new_unlock_request, 0);
827 if ((rc = memory_object_data_unlock(
828 object->pager,
829 offset + object->paging_offset,
830 PAGE_SIZE,
831 new_unlock_request))
832 != KERN_SUCCESS) {
833 if (vm_fault_debug)
834 printf("vm_fault: memory_object_data_unlock failed\n");
835 vm_object_lock(object);
836 vm_fault_cleanup(object, first_m);
837 thread_interrupt_level(interruptible_state);
838 return((rc == MACH_SEND_INTERRUPTED) ?
839 VM_FAULT_INTERRUPTED :
840 VM_FAULT_MEMORY_ERROR);
841 }
842 vm_object_lock(object);
843 continue;
844 }
845
846 XPR(XPR_VM_FAULT,
847 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
848 access_required, (integer_t)object,
849 offset, (integer_t)m, 0);
850 /* take an extra ref so object won't die */
851 assert(object->ref_count > 0);
852 object->ref_count++;
853 vm_object_res_reference(object);
854 vm_fault_cleanup(object, first_m);
855 counter(c_vm_fault_page_block_backoff_kernel++);
856 vm_object_lock(object);
857 assert(object->ref_count > 0);
858 m = vm_page_lookup(object, offset);
859 if (m != VM_PAGE_NULL &&
860 (access_required & m->page_lock) &&
861 !((access_required & m->unlock_request) != access_required)) {
862 PAGE_ASSERT_WAIT(m, interruptible);
863 vm_object_unlock(object);
864 wait_result = thread_block(THREAD_CONTINUE_NULL);
865 vm_object_deallocate(object);
866 goto backoff;
867 } else {
868 vm_object_unlock(object);
869 vm_object_deallocate(object);
870 thread_interrupt_level(interruptible_state);
871 return VM_FAULT_RETRY;
872 }
873 }
874 /*
875 * We mark the page busy and leave it on
876 * the pageout queues. If the pageout
877 * deamon comes across it, then it will
878 * remove the page.
879 */
880
881 #if TRACEFAULTPAGE
882 dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
883 #endif
884
885 #if !VM_FAULT_STATIC_CONFIG
886 if (!software_reference_bits) {
887 vm_page_lock_queues();
888 if (m->inactive)
889 vm_stat.reactivations++;
890
891 VM_PAGE_QUEUES_REMOVE(m);
892 vm_page_unlock_queues();
893 }
894 #endif
895 XPR(XPR_VM_FAULT,
896 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
897 (integer_t)object, offset, (integer_t)m, 0, 0);
898 assert(!m->busy);
899 m->busy = TRUE;
900 assert(!m->absent);
901 break;
902 }
903
904 look_for_page =
905 (object->pager_created) &&
906 LOOK_FOR(object, offset) &&
907 (!data_supply);
908
909 #if TRACEFAULTPAGE
910 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object); /* (TEST/DEBUG) */
911 #endif
912 if ((look_for_page || (object == first_object))
913 && !must_be_resident
914 && !(object->phys_contiguous)) {
915 /*
916 * Allocate a new page for this object/offset
917 * pair.
918 */
919
920 m = vm_page_grab_fictitious();
921 #if TRACEFAULTPAGE
922 dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
923 #endif
924 if (m == VM_PAGE_NULL) {
925 vm_fault_cleanup(object, first_m);
926 thread_interrupt_level(interruptible_state);
927 return(VM_FAULT_FICTITIOUS_SHORTAGE);
928 }
929 vm_page_insert(m, object, offset);
930 }
931
932 if ((look_for_page && !must_be_resident)) {
933 kern_return_t rc;
934
935 /*
936 * If the memory manager is not ready, we
937 * cannot make requests.
938 */
939 if (!object->pager_ready) {
940 #if TRACEFAULTPAGE
941 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
942 #endif
943 if(m != VM_PAGE_NULL)
944 VM_PAGE_FREE(m);
945 XPR(XPR_VM_FAULT,
946 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
947 (integer_t)object, offset, 0, 0, 0);
948 /* take an extra ref so object won't die */
949 assert(object->ref_count > 0);
950 object->ref_count++;
951 vm_object_res_reference(object);
952 vm_fault_cleanup(object, first_m);
953 counter(c_vm_fault_page_block_backoff_kernel++);
954 vm_object_lock(object);
955 assert(object->ref_count > 0);
956 if (!object->pager_ready) {
957 wait_result = vm_object_assert_wait(object,
958 VM_OBJECT_EVENT_PAGER_READY,
959 interruptible);
960 vm_object_unlock(object);
961 if (wait_result == THREAD_WAITING)
962 wait_result = thread_block(THREAD_CONTINUE_NULL);
963 vm_object_deallocate(object);
964 goto backoff;
965 } else {
966 vm_object_unlock(object);
967 vm_object_deallocate(object);
968 thread_interrupt_level(interruptible_state);
969 return VM_FAULT_RETRY;
970 }
971 }
972
973 if(object->phys_contiguous) {
974 if(m != VM_PAGE_NULL) {
975 VM_PAGE_FREE(m);
976 m = VM_PAGE_NULL;
977 }
978 goto no_clustering;
979 }
980 if (object->internal) {
981 /*
982 * Requests to the default pager
983 * must reserve a real page in advance,
984 * because the pager's data-provided
985 * won't block for pages. IMPORTANT:
986 * this acts as a throttling mechanism
987 * for data_requests to the default
988 * pager.
989 */
990
991 #if TRACEFAULTPAGE
992 dbgTrace(0xBEEF000F, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
993 #endif
994 if (m->fictitious && !vm_page_convert(m)) {
995 VM_PAGE_FREE(m);
996 vm_fault_cleanup(object, first_m);
997 thread_interrupt_level(interruptible_state);
998 return(VM_FAULT_MEMORY_SHORTAGE);
999 }
1000 } else if (object->absent_count >
1001 vm_object_absent_max) {
1002 /*
1003 * If there are too many outstanding page
1004 * requests pending on this object, we
1005 * wait for them to be resolved now.
1006 */
1007
1008 #if TRACEFAULTPAGE
1009 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1010 #endif
1011 if(m != VM_PAGE_NULL)
1012 VM_PAGE_FREE(m);
1013 /* take an extra ref so object won't die */
1014 assert(object->ref_count > 0);
1015 object->ref_count++;
1016 vm_object_res_reference(object);
1017 vm_fault_cleanup(object, first_m);
1018 counter(c_vm_fault_page_block_backoff_kernel++);
1019 vm_object_lock(object);
1020 assert(object->ref_count > 0);
1021 if (object->absent_count > vm_object_absent_max) {
1022 vm_object_absent_assert_wait(object,
1023 interruptible);
1024 vm_object_unlock(object);
1025 wait_result = thread_block(THREAD_CONTINUE_NULL);
1026 vm_object_deallocate(object);
1027 goto backoff;
1028 } else {
1029 vm_object_unlock(object);
1030 vm_object_deallocate(object);
1031 thread_interrupt_level(interruptible_state);
1032 return VM_FAULT_RETRY;
1033 }
1034 }
1035
1036 /*
1037 * Indicate that the page is waiting for data
1038 * from the memory manager.
1039 */
1040
1041 if(m != VM_PAGE_NULL) {
1042
1043 m->list_req_pending = TRUE;
1044 m->absent = TRUE;
1045 m->unusual = TRUE;
1046 object->absent_count++;
1047
1048 }
1049
1050 no_clustering:
1051 cluster_start = offset;
1052 length = PAGE_SIZE;
1053
1054 /*
1055 * lengthen the cluster by the pages in the working set
1056 */
1057 if((map != NULL) &&
1058 (current_task()->dynamic_working_set != 0)) {
1059 cluster_end = cluster_start + length;
1060 /* tws values for start and end are just a
1061 * suggestions. Therefore, as long as
1062 * build_cluster does not use pointers or
1063 * take action based on values that
1064 * could be affected by re-entrance we
1065 * do not need to take the map lock.
1066 */
1067 cluster_end = offset + PAGE_SIZE_64;
1068 tws_build_cluster((tws_hash_t)
1069 current_task()->dynamic_working_set,
1070 object, &cluster_start,
1071 &cluster_end, 0x40000);
1072 length = cluster_end - cluster_start;
1073 }
1074 #if TRACEFAULTPAGE
1075 dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0); /* (TEST/DEBUG) */
1076 #endif
1077 /*
1078 * We have a busy page, so we can
1079 * release the object lock.
1080 */
1081 vm_object_unlock(object);
1082
1083 /*
1084 * Call the memory manager to retrieve the data.
1085 */
1086
1087 if (type_of_fault)
1088 *type_of_fault = (length << 8) | DBG_PAGEIN_FAULT;
1089 VM_STAT(pageins++);
1090 current_task()->pageins++;
1091 bumped_pagein = TRUE;
1092
1093 /*
1094 * If this object uses a copy_call strategy,
1095 * and we are interested in a copy of this object
1096 * (having gotten here only by following a
1097 * shadow chain), then tell the memory manager
1098 * via a flag added to the desired_access
1099 * parameter, so that it can detect a race
1100 * between our walking down the shadow chain
1101 * and its pushing pages up into a copy of
1102 * the object that it manages.
1103 */
1104
1105 if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL &&
1106 object != first_object) {
1107 wants_copy_flag = VM_PROT_WANTS_COPY;
1108 } else {
1109 wants_copy_flag = VM_PROT_NONE;
1110 }
1111
1112 XPR(XPR_VM_FAULT,
1113 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1114 (integer_t)object, offset, (integer_t)m,
1115 access_required | wants_copy_flag, 0);
1116
1117 rc = memory_object_data_request(object->pager,
1118 cluster_start + object->paging_offset,
1119 length,
1120 access_required | wants_copy_flag);
1121
1122
1123 #if TRACEFAULTPAGE
1124 dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
1125 #endif
1126 if (rc != KERN_SUCCESS) {
1127 if (rc != MACH_SEND_INTERRUPTED
1128 && vm_fault_debug)
1129 printf("%s(0x%x, 0x%x, 0x%x, 0x%x) failed, rc=%d\n",
1130 "memory_object_data_request",
1131 object->pager,
1132 cluster_start + object->paging_offset,
1133 length, access_required, rc);
1134 /*
1135 * Don't want to leave a busy page around,
1136 * but the data request may have blocked,
1137 * so check if it's still there and busy.
1138 */
1139 if(!object->phys_contiguous) {
1140 vm_object_lock(object);
1141 for (; length; length -= PAGE_SIZE,
1142 cluster_start += PAGE_SIZE_64) {
1143 vm_page_t p;
1144 if ((p = vm_page_lookup(object,
1145 cluster_start))
1146 && p->absent && p->busy
1147 && p != first_m) {
1148 VM_PAGE_FREE(p);
1149 }
1150 }
1151 }
1152 vm_fault_cleanup(object, first_m);
1153 thread_interrupt_level(interruptible_state);
1154 return((rc == MACH_SEND_INTERRUPTED) ?
1155 VM_FAULT_INTERRUPTED :
1156 VM_FAULT_MEMORY_ERROR);
1157 } else {
1158 #ifdef notdefcdy
1159 tws_hash_line_t line;
1160 task_t task;
1161
1162 task = current_task();
1163
1164 if((map != NULL) &&
1165 (task->dynamic_working_set != 0))
1166 && !(object->private)) {
1167 vm_object_t base_object;
1168 vm_object_offset_t base_offset;
1169 base_object = object;
1170 base_offset = offset;
1171 while(base_object->shadow) {
1172 base_offset +=
1173 base_object->shadow_offset;
1174 base_object =
1175 base_object->shadow;
1176 }
1177 if(tws_lookup
1178 ((tws_hash_t)
1179 task->dynamic_working_set,
1180 base_offset, base_object,
1181 &line) == KERN_SUCCESS) {
1182 tws_line_signal((tws_hash_t)
1183 task->dynamic_working_set,
1184 map, line, vaddr);
1185 }
1186 }
1187 #endif
1188 }
1189
1190 /*
1191 * Retry with same object/offset, since new data may
1192 * be in a different page (i.e., m is meaningless at
1193 * this point).
1194 */
1195 vm_object_lock(object);
1196 if ((interruptible != THREAD_UNINT) &&
1197 (current_thread()->state & TH_ABORT)) {
1198 vm_fault_cleanup(object, first_m);
1199 thread_interrupt_level(interruptible_state);
1200 return(VM_FAULT_INTERRUPTED);
1201 }
1202 if(m == VM_PAGE_NULL)
1203 break;
1204 continue;
1205 }
1206
1207 /*
1208 * The only case in which we get here is if
1209 * object has no pager (or unwiring). If the pager doesn't
1210 * have the page this is handled in the m->absent case above
1211 * (and if you change things here you should look above).
1212 */
1213 #if TRACEFAULTPAGE
1214 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1215 #endif
1216 if (object == first_object)
1217 first_m = m;
1218 else
1219 assert(m == VM_PAGE_NULL);
1220
1221 XPR(XPR_VM_FAULT,
1222 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1223 (integer_t)object, offset, (integer_t)m,
1224 (integer_t)object->shadow, 0);
1225 /*
1226 * Move on to the next object. Lock the next
1227 * object before unlocking the current one.
1228 */
1229 next_object = object->shadow;
1230 if (next_object == VM_OBJECT_NULL) {
1231 assert(!must_be_resident);
1232 /*
1233 * If there's no object left, fill the page
1234 * in the top object with zeros. But first we
1235 * need to allocate a real page.
1236 */
1237
1238 if (object != first_object) {
1239 vm_object_paging_end(object);
1240 vm_object_unlock(object);
1241
1242 object = first_object;
1243 offset = first_offset;
1244 vm_object_lock(object);
1245 }
1246
1247 m = first_m;
1248 assert(m->object == object);
1249 first_m = VM_PAGE_NULL;
1250
1251 if (object->shadow_severed) {
1252 VM_PAGE_FREE(m);
1253 vm_fault_cleanup(object, VM_PAGE_NULL);
1254 thread_interrupt_level(interruptible_state);
1255 return VM_FAULT_MEMORY_ERROR;
1256 }
1257
1258 if (VM_PAGE_THROTTLED() ||
1259 (m->fictitious && !vm_page_convert(m))) {
1260 VM_PAGE_FREE(m);
1261 vm_fault_cleanup(object, VM_PAGE_NULL);
1262 thread_interrupt_level(interruptible_state);
1263 return(VM_FAULT_MEMORY_SHORTAGE);
1264 }
1265 m->no_isync = FALSE;
1266
1267 if (!no_zero_fill) {
1268 vm_object_unlock(object);
1269 vm_page_zero_fill(m);
1270 if (type_of_fault)
1271 *type_of_fault = DBG_ZERO_FILL_FAULT;
1272 VM_STAT(zero_fill_count++);
1273
1274 if (bumped_pagein == TRUE) {
1275 VM_STAT(pageins--);
1276 current_task()->pageins--;
1277 }
1278 vm_object_lock(object);
1279 }
1280 vm_page_lock_queues();
1281 VM_PAGE_QUEUES_REMOVE(m);
1282 if(m->object->size > 0x80000) {
1283 m->zero_fill = TRUE;
1284 /* depends on the queues lock */
1285 vm_zf_count += 1;
1286 queue_enter(&vm_page_queue_zf,
1287 m, vm_page_t, pageq);
1288 } else {
1289 queue_enter(
1290 &vm_page_queue_inactive,
1291 m, vm_page_t, pageq);
1292 }
1293 m->page_ticket = vm_page_ticket;
1294 vm_page_ticket_roll++;
1295 if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
1296 vm_page_ticket_roll = 0;
1297 if(vm_page_ticket ==
1298 VM_PAGE_TICKET_ROLL_IDS)
1299 vm_page_ticket= 0;
1300 else
1301 vm_page_ticket++;
1302 }
1303 m->inactive = TRUE;
1304 vm_page_inactive_count++;
1305 vm_page_unlock_queues();
1306 pmap_clear_modify(m->phys_addr);
1307 break;
1308 }
1309 else {
1310 if ((object != first_object) || must_be_resident)
1311 vm_object_paging_end(object);
1312 offset += object->shadow_offset;
1313 hi_offset += object->shadow_offset;
1314 lo_offset += object->shadow_offset;
1315 access_required = VM_PROT_READ;
1316 vm_object_lock(next_object);
1317 vm_object_unlock(object);
1318 object = next_object;
1319 vm_object_paging_begin(object);
1320 }
1321 }
1322
1323 /*
1324 * PAGE HAS BEEN FOUND.
1325 *
1326 * This page (m) is:
1327 * busy, so that we can play with it;
1328 * not absent, so that nobody else will fill it;
1329 * possibly eligible for pageout;
1330 *
1331 * The top-level page (first_m) is:
1332 * VM_PAGE_NULL if the page was found in the
1333 * top-level object;
1334 * busy, not absent, and ineligible for pageout.
1335 *
1336 * The current object (object) is locked. A paging
1337 * reference is held for the current and top-level
1338 * objects.
1339 */
1340
1341 #if TRACEFAULTPAGE
1342 dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1343 #endif
1344 #if EXTRA_ASSERTIONS
1345 if(m != VM_PAGE_NULL) {
1346 assert(m->busy && !m->absent);
1347 assert((first_m == VM_PAGE_NULL) ||
1348 (first_m->busy && !first_m->absent &&
1349 !first_m->active && !first_m->inactive));
1350 }
1351 #endif /* EXTRA_ASSERTIONS */
1352
1353 XPR(XPR_VM_FAULT,
1354 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1355 (integer_t)object, offset, (integer_t)m,
1356 (integer_t)first_object, (integer_t)first_m);
1357 /*
1358 * If the page is being written, but isn't
1359 * already owned by the top-level object,
1360 * we have to copy it into a new page owned
1361 * by the top-level object.
1362 */
1363
1364 if ((object != first_object) && (m != VM_PAGE_NULL)) {
1365 /*
1366 * We only really need to copy if we
1367 * want to write it.
1368 */
1369
1370 #if TRACEFAULTPAGE
1371 dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1372 #endif
1373 if (fault_type & VM_PROT_WRITE) {
1374 vm_page_t copy_m;
1375
1376 assert(!must_be_resident);
1377
1378 /*
1379 * If we try to collapse first_object at this
1380 * point, we may deadlock when we try to get
1381 * the lock on an intermediate object (since we
1382 * have the bottom object locked). We can't
1383 * unlock the bottom object, because the page
1384 * we found may move (by collapse) if we do.
1385 *
1386 * Instead, we first copy the page. Then, when
1387 * we have no more use for the bottom object,
1388 * we unlock it and try to collapse.
1389 *
1390 * Note that we copy the page even if we didn't
1391 * need to... that's the breaks.
1392 */
1393
1394 /*
1395 * Allocate a page for the copy
1396 */
1397 copy_m = vm_page_grab();
1398 if (copy_m == VM_PAGE_NULL) {
1399 RELEASE_PAGE(m);
1400 vm_fault_cleanup(object, first_m);
1401 thread_interrupt_level(interruptible_state);
1402 return(VM_FAULT_MEMORY_SHORTAGE);
1403 }
1404
1405
1406 XPR(XPR_VM_FAULT,
1407 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1408 (integer_t)object, offset,
1409 (integer_t)m, (integer_t)copy_m, 0);
1410 vm_page_copy(m, copy_m);
1411
1412 /*
1413 * If another map is truly sharing this
1414 * page with us, we have to flush all
1415 * uses of the original page, since we
1416 * can't distinguish those which want the
1417 * original from those which need the
1418 * new copy.
1419 *
1420 * XXXO If we know that only one map has
1421 * access to this page, then we could
1422 * avoid the pmap_page_protect() call.
1423 */
1424
1425 vm_page_lock_queues();
1426 assert(!m->cleaning);
1427 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
1428 vm_page_deactivate(m);
1429 copy_m->dirty = TRUE;
1430 /*
1431 * Setting reference here prevents this fault from
1432 * being counted as a (per-thread) reactivate as well
1433 * as a copy-on-write.
1434 */
1435 first_m->reference = TRUE;
1436 vm_page_unlock_queues();
1437
1438 /*
1439 * We no longer need the old page or object.
1440 */
1441
1442 PAGE_WAKEUP_DONE(m);
1443 vm_object_paging_end(object);
1444 vm_object_unlock(object);
1445
1446 if (type_of_fault)
1447 *type_of_fault = DBG_COW_FAULT;
1448 VM_STAT(cow_faults++);
1449 current_task()->cow_faults++;
1450 object = first_object;
1451 offset = first_offset;
1452
1453 vm_object_lock(object);
1454 VM_PAGE_FREE(first_m);
1455 first_m = VM_PAGE_NULL;
1456 assert(copy_m->busy);
1457 vm_page_insert(copy_m, object, offset);
1458 m = copy_m;
1459
1460 /*
1461 * Now that we've gotten the copy out of the
1462 * way, let's try to collapse the top object.
1463 * But we have to play ugly games with
1464 * paging_in_progress to do that...
1465 */
1466
1467 vm_object_paging_end(object);
1468 vm_object_collapse(object);
1469 vm_object_paging_begin(object);
1470
1471 }
1472 else {
1473 *protection &= (~VM_PROT_WRITE);
1474 }
1475 }
1476
1477 /*
1478 * Now check whether the page needs to be pushed into the
1479 * copy object. The use of asymmetric copy on write for
1480 * shared temporary objects means that we may do two copies to
1481 * satisfy the fault; one above to get the page from a
1482 * shadowed object, and one here to push it into the copy.
1483 */
1484
1485 while ((copy_object = first_object->copy) != VM_OBJECT_NULL &&
1486 (m!= VM_PAGE_NULL)) {
1487 vm_object_offset_t copy_offset;
1488 vm_page_t copy_m;
1489
1490 #if TRACEFAULTPAGE
1491 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1492 #endif
1493 /*
1494 * If the page is being written, but hasn't been
1495 * copied to the copy-object, we have to copy it there.
1496 */
1497
1498 if ((fault_type & VM_PROT_WRITE) == 0) {
1499 *protection &= ~VM_PROT_WRITE;
1500 break;
1501 }
1502
1503 /*
1504 * If the page was guaranteed to be resident,
1505 * we must have already performed the copy.
1506 */
1507
1508 if (must_be_resident)
1509 break;
1510
1511 /*
1512 * Try to get the lock on the copy_object.
1513 */
1514 if (!vm_object_lock_try(copy_object)) {
1515 vm_object_unlock(object);
1516
1517 mutex_pause(); /* wait a bit */
1518
1519 vm_object_lock(object);
1520 continue;
1521 }
1522
1523 /*
1524 * Make another reference to the copy-object,
1525 * to keep it from disappearing during the
1526 * copy.
1527 */
1528 assert(copy_object->ref_count > 0);
1529 copy_object->ref_count++;
1530 VM_OBJ_RES_INCR(copy_object);
1531
1532 /*
1533 * Does the page exist in the copy?
1534 */
1535 copy_offset = first_offset - copy_object->shadow_offset;
1536 if (copy_object->size <= copy_offset)
1537 /*
1538 * Copy object doesn't cover this page -- do nothing.
1539 */
1540 ;
1541 else if ((copy_m =
1542 vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
1543 /* Page currently exists in the copy object */
1544 if (copy_m->busy) {
1545 /*
1546 * If the page is being brought
1547 * in, wait for it and then retry.
1548 */
1549 RELEASE_PAGE(m);
1550 /* take an extra ref so object won't die */
1551 assert(copy_object->ref_count > 0);
1552 copy_object->ref_count++;
1553 vm_object_res_reference(copy_object);
1554 vm_object_unlock(copy_object);
1555 vm_fault_cleanup(object, first_m);
1556 counter(c_vm_fault_page_block_backoff_kernel++);
1557 vm_object_lock(copy_object);
1558 assert(copy_object->ref_count > 0);
1559 VM_OBJ_RES_DECR(copy_object);
1560 copy_object->ref_count--;
1561 assert(copy_object->ref_count > 0);
1562 copy_m = vm_page_lookup(copy_object, copy_offset);
1563 if (copy_m != VM_PAGE_NULL && copy_m->busy) {
1564 PAGE_ASSERT_WAIT(copy_m, interruptible);
1565 vm_object_unlock(copy_object);
1566 wait_result = thread_block(THREAD_CONTINUE_NULL);
1567 vm_object_deallocate(copy_object);
1568 goto backoff;
1569 } else {
1570 vm_object_unlock(copy_object);
1571 vm_object_deallocate(copy_object);
1572 thread_interrupt_level(interruptible_state);
1573 return VM_FAULT_RETRY;
1574 }
1575 }
1576 }
1577 else if (!PAGED_OUT(copy_object, copy_offset)) {
1578 /*
1579 * If PAGED_OUT is TRUE, then the page used to exist
1580 * in the copy-object, and has already been paged out.
1581 * We don't need to repeat this. If PAGED_OUT is
1582 * FALSE, then either we don't know (!pager_created,
1583 * for example) or it hasn't been paged out.
1584 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1585 * We must copy the page to the copy object.
1586 */
1587
1588 /*
1589 * Allocate a page for the copy
1590 */
1591 copy_m = vm_page_alloc(copy_object, copy_offset);
1592 if (copy_m == VM_PAGE_NULL) {
1593 RELEASE_PAGE(m);
1594 VM_OBJ_RES_DECR(copy_object);
1595 copy_object->ref_count--;
1596 assert(copy_object->ref_count > 0);
1597 vm_object_unlock(copy_object);
1598 vm_fault_cleanup(object, first_m);
1599 thread_interrupt_level(interruptible_state);
1600 return(VM_FAULT_MEMORY_SHORTAGE);
1601 }
1602
1603 /*
1604 * Must copy page into copy-object.
1605 */
1606
1607 vm_page_copy(m, copy_m);
1608
1609 /*
1610 * If the old page was in use by any users
1611 * of the copy-object, it must be removed
1612 * from all pmaps. (We can't know which
1613 * pmaps use it.)
1614 */
1615
1616 vm_page_lock_queues();
1617 assert(!m->cleaning);
1618 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
1619 copy_m->dirty = TRUE;
1620 vm_page_unlock_queues();
1621
1622 /*
1623 * If there's a pager, then immediately
1624 * page out this page, using the "initialize"
1625 * option. Else, we use the copy.
1626 */
1627
1628 if
1629 #if MACH_PAGEMAP
1630 ((!copy_object->pager_created) ||
1631 vm_external_state_get(
1632 copy_object->existence_map, copy_offset)
1633 == VM_EXTERNAL_STATE_ABSENT)
1634 #else
1635 (!copy_object->pager_created)
1636 #endif
1637 {
1638 vm_page_lock_queues();
1639 vm_page_activate(copy_m);
1640 vm_page_unlock_queues();
1641 PAGE_WAKEUP_DONE(copy_m);
1642 }
1643 else {
1644 assert(copy_m->busy == TRUE);
1645
1646 /*
1647 * The page is already ready for pageout:
1648 * not on pageout queues and busy.
1649 * Unlock everything except the
1650 * copy_object itself.
1651 */
1652
1653 vm_object_unlock(object);
1654
1655 /*
1656 * Write the page to the copy-object,
1657 * flushing it from the kernel.
1658 */
1659
1660 vm_pageout_initialize_page(copy_m);
1661
1662 /*
1663 * Since the pageout may have
1664 * temporarily dropped the
1665 * copy_object's lock, we
1666 * check whether we'll have
1667 * to deallocate the hard way.
1668 */
1669
1670 if ((copy_object->shadow != object) ||
1671 (copy_object->ref_count == 1)) {
1672 vm_object_unlock(copy_object);
1673 vm_object_deallocate(copy_object);
1674 vm_object_lock(object);
1675 continue;
1676 }
1677
1678 /*
1679 * Pick back up the old object's
1680 * lock. [It is safe to do so,
1681 * since it must be deeper in the
1682 * object tree.]
1683 */
1684
1685 vm_object_lock(object);
1686 }
1687
1688 /*
1689 * Because we're pushing a page upward
1690 * in the object tree, we must restart
1691 * any faults that are waiting here.
1692 * [Note that this is an expansion of
1693 * PAGE_WAKEUP that uses the THREAD_RESTART
1694 * wait result]. Can't turn off the page's
1695 * busy bit because we're not done with it.
1696 */
1697
1698 if (m->wanted) {
1699 m->wanted = FALSE;
1700 thread_wakeup_with_result((event_t) m,
1701 THREAD_RESTART);
1702 }
1703 }
1704
1705 /*
1706 * The reference count on copy_object must be
1707 * at least 2: one for our extra reference,
1708 * and at least one from the outside world
1709 * (we checked that when we last locked
1710 * copy_object).
1711 */
1712 copy_object->ref_count--;
1713 assert(copy_object->ref_count > 0);
1714 VM_OBJ_RES_DECR(copy_object);
1715 vm_object_unlock(copy_object);
1716
1717 break;
1718 }
1719
1720 *result_page = m;
1721 *top_page = first_m;
1722
1723 XPR(XPR_VM_FAULT,
1724 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1725 (integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
1726 /*
1727 * If the page can be written, assume that it will be.
1728 * [Earlier, we restrict the permission to allow write
1729 * access only if the fault so required, so we don't
1730 * mark read-only data as dirty.]
1731 */
1732
1733 #if !VM_FAULT_STATIC_CONFIG
1734 if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE) &&
1735 (m != VM_PAGE_NULL)) {
1736 m->dirty = TRUE;
1737 }
1738 #endif
1739 #if TRACEFAULTPAGE
1740 dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_page_deactivate_behind); /* (TEST/DEBUG) */
1741 #endif
1742 if (vm_page_deactivate_behind) {
1743 if (offset && /* don't underflow */
1744 (object->last_alloc == (offset - PAGE_SIZE_64))) {
1745 m = vm_page_lookup(object, object->last_alloc);
1746 if ((m != VM_PAGE_NULL) && !m->busy) {
1747 vm_page_lock_queues();
1748 vm_page_deactivate(m);
1749 vm_page_unlock_queues();
1750 }
1751 #if TRACEFAULTPAGE
1752 dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1753 #endif
1754 }
1755 object->last_alloc = offset;
1756 }
1757 #if TRACEFAULTPAGE
1758 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0); /* (TEST/DEBUG) */
1759 #endif
1760 thread_interrupt_level(interruptible_state);
1761 if(*result_page == VM_PAGE_NULL) {
1762 vm_object_unlock(object);
1763 }
1764 return(VM_FAULT_SUCCESS);
1765
1766 #if 0
1767 block_and_backoff:
1768 vm_fault_cleanup(object, first_m);
1769
1770 counter(c_vm_fault_page_block_backoff_kernel++);
1771 thread_block(THREAD_CONTINUE_NULL);
1772 #endif
1773
1774 backoff:
1775 thread_interrupt_level(interruptible_state);
1776 if (wait_result == THREAD_INTERRUPTED)
1777 return VM_FAULT_INTERRUPTED;
1778 return VM_FAULT_RETRY;
1779
1780 #undef RELEASE_PAGE
1781 }
1782
1783 /*
1784 * Routine: vm_fault
1785 * Purpose:
1786 * Handle page faults, including pseudo-faults
1787 * used to change the wiring status of pages.
1788 * Returns:
1789 * Explicit continuations have been removed.
1790 * Implementation:
1791 * vm_fault and vm_fault_page save mucho state
1792 * in the moral equivalent of a closure. The state
1793 * structure is allocated when first entering vm_fault
1794 * and deallocated when leaving vm_fault.
1795 */
1796
1797 kern_return_t
1798 vm_fault(
1799 vm_map_t map,
1800 vm_offset_t vaddr,
1801 vm_prot_t fault_type,
1802 boolean_t change_wiring,
1803 int interruptible,
1804 pmap_t caller_pmap,
1805 vm_offset_t caller_pmap_addr)
1806 {
1807 vm_map_version_t version; /* Map version for verificiation */
1808 boolean_t wired; /* Should mapping be wired down? */
1809 vm_object_t object; /* Top-level object */
1810 vm_object_offset_t offset; /* Top-level offset */
1811 vm_prot_t prot; /* Protection for mapping */
1812 vm_behavior_t behavior; /* Expected paging behavior */
1813 vm_object_offset_t lo_offset, hi_offset;
1814 vm_object_t old_copy_object; /* Saved copy object */
1815 vm_page_t result_page; /* Result of vm_fault_page */
1816 vm_page_t top_page; /* Placeholder page */
1817 kern_return_t kr;
1818
1819 register
1820 vm_page_t m; /* Fast access to result_page */
1821 kern_return_t error_code; /* page error reasons */
1822 register
1823 vm_object_t cur_object;
1824 register
1825 vm_object_offset_t cur_offset;
1826 vm_page_t cur_m;
1827 vm_object_t new_object;
1828 int type_of_fault;
1829 vm_map_t pmap_map = map;
1830 vm_map_t original_map = map;
1831 pmap_t pmap = NULL;
1832 boolean_t funnel_set = FALSE;
1833 funnel_t *curflock;
1834 thread_t cur_thread;
1835 boolean_t interruptible_state;
1836 unsigned int cache_attr;
1837 int write_startup_file = 0;
1838 vm_prot_t full_fault_type;
1839
1840
1841
1842 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_START,
1843 vaddr,
1844 0,
1845 0,
1846 0,
1847 0);
1848
1849 cur_thread = current_thread();
1850 /* at present we do not fully check for execute permission */
1851 /* we generally treat it is read except in certain device */
1852 /* memory settings */
1853 full_fault_type = fault_type;
1854 if(fault_type & VM_PROT_EXECUTE) {
1855 fault_type &= ~VM_PROT_EXECUTE;
1856 fault_type |= VM_PROT_READ;
1857 }
1858
1859 interruptible_state = thread_interrupt_level(interruptible);
1860
1861 /*
1862 * assume we will hit a page in the cache
1863 * otherwise, explicitly override with
1864 * the real fault type once we determine it
1865 */
1866 type_of_fault = DBG_CACHE_HIT_FAULT;
1867
1868 VM_STAT(faults++);
1869 current_task()->faults++;
1870
1871 /*
1872 * drop funnel if it is already held. Then restore while returning
1873 */
1874 if ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED) {
1875 funnel_set = TRUE;
1876 curflock = cur_thread->funnel_lock;
1877 thread_funnel_set( curflock , FALSE);
1878 }
1879
1880 RetryFault: ;
1881
1882 /*
1883 * Find the backing store object and offset into
1884 * it to begin the search.
1885 */
1886 map = original_map;
1887 vm_map_lock_read(map);
1888 kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version,
1889 &object, &offset,
1890 &prot, &wired,
1891 &behavior, &lo_offset, &hi_offset, &pmap_map);
1892
1893 pmap = pmap_map->pmap;
1894
1895 if (kr != KERN_SUCCESS) {
1896 vm_map_unlock_read(map);
1897 goto done;
1898 }
1899
1900 /*
1901 * If the page is wired, we must fault for the current protection
1902 * value, to avoid further faults.
1903 */
1904
1905 if (wired)
1906 fault_type = prot | VM_PROT_WRITE;
1907
1908 #if VM_FAULT_CLASSIFY
1909 /*
1910 * Temporary data gathering code
1911 */
1912 vm_fault_classify(object, offset, fault_type);
1913 #endif
1914 /*
1915 * Fast fault code. The basic idea is to do as much as
1916 * possible while holding the map lock and object locks.
1917 * Busy pages are not used until the object lock has to
1918 * be dropped to do something (copy, zero fill, pmap enter).
1919 * Similarly, paging references aren't acquired until that
1920 * point, and object references aren't used.
1921 *
1922 * If we can figure out what to do
1923 * (zero fill, copy on write, pmap enter) while holding
1924 * the locks, then it gets done. Otherwise, we give up,
1925 * and use the original fault path (which doesn't hold
1926 * the map lock, and relies on busy pages).
1927 * The give up cases include:
1928 * - Have to talk to pager.
1929 * - Page is busy, absent or in error.
1930 * - Pager has locked out desired access.
1931 * - Fault needs to be restarted.
1932 * - Have to push page into copy object.
1933 *
1934 * The code is an infinite loop that moves one level down
1935 * the shadow chain each time. cur_object and cur_offset
1936 * refer to the current object being examined. object and offset
1937 * are the original object from the map. The loop is at the
1938 * top level if and only if object and cur_object are the same.
1939 *
1940 * Invariants: Map lock is held throughout. Lock is held on
1941 * original object and cur_object (if different) when
1942 * continuing or exiting loop.
1943 *
1944 */
1945
1946
1947 /*
1948 * If this page is to be inserted in a copy delay object
1949 * for writing, and if the object has a copy, then the
1950 * copy delay strategy is implemented in the slow fault page.
1951 */
1952 if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY ||
1953 object->copy == VM_OBJECT_NULL ||
1954 (fault_type & VM_PROT_WRITE) == 0) {
1955 cur_object = object;
1956 cur_offset = offset;
1957
1958 while (TRUE) {
1959 m = vm_page_lookup(cur_object, cur_offset);
1960 if (m != VM_PAGE_NULL) {
1961 if (m->busy) {
1962 wait_result_t result;
1963
1964 if (object != cur_object)
1965 vm_object_unlock(object);
1966
1967 vm_map_unlock_read(map);
1968 if (pmap_map != map)
1969 vm_map_unlock(pmap_map);
1970
1971 #if !VM_FAULT_STATIC_CONFIG
1972 if (!vm_fault_interruptible)
1973 interruptible = THREAD_UNINT;
1974 #endif
1975 result = PAGE_ASSERT_WAIT(m, interruptible);
1976
1977 vm_object_unlock(cur_object);
1978
1979 if (result == THREAD_WAITING) {
1980 result = thread_block(THREAD_CONTINUE_NULL);
1981
1982 counter(c_vm_fault_page_block_busy_kernel++);
1983 }
1984 if (result == THREAD_AWAKENED || result == THREAD_RESTART)
1985 goto RetryFault;
1986
1987 kr = KERN_ABORTED;
1988 goto done;
1989 }
1990 if (m->unusual && (m->error || m->restart || m->private
1991 || m->absent || (fault_type & m->page_lock))) {
1992
1993 /*
1994 * Unusual case. Give up.
1995 */
1996 break;
1997 }
1998
1999 /*
2000 * Two cases of map in faults:
2001 * - At top level w/o copy object.
2002 * - Read fault anywhere.
2003 * --> must disallow write.
2004 */
2005
2006 if (object == cur_object &&
2007 object->copy == VM_OBJECT_NULL)
2008 goto FastMapInFault;
2009
2010 if ((fault_type & VM_PROT_WRITE) == 0) {
2011
2012 prot &= ~VM_PROT_WRITE;
2013
2014 /*
2015 * Set up to map the page ...
2016 * mark the page busy, drop
2017 * locks and take a paging reference
2018 * on the object with the page.
2019 */
2020
2021 if (object != cur_object) {
2022 vm_object_unlock(object);
2023 object = cur_object;
2024 }
2025 FastMapInFault:
2026 m->busy = TRUE;
2027
2028 vm_object_paging_begin(object);
2029
2030 FastPmapEnter:
2031 /*
2032 * Check a couple of global reasons to
2033 * be conservative about write access.
2034 * Then do the pmap_enter.
2035 */
2036 #if !VM_FAULT_STATIC_CONFIG
2037 if (vm_fault_dirty_handling
2038 #if MACH_KDB
2039 || db_watchpoint_list
2040 #endif
2041 && (fault_type & VM_PROT_WRITE) == 0)
2042 prot &= ~VM_PROT_WRITE;
2043 #else /* STATIC_CONFIG */
2044 #if MACH_KDB
2045 if (db_watchpoint_list
2046 && (fault_type & VM_PROT_WRITE) == 0)
2047 prot &= ~VM_PROT_WRITE;
2048 #endif /* MACH_KDB */
2049 #endif /* STATIC_CONFIG */
2050 if (m->no_isync == TRUE) {
2051 pmap_sync_caches_phys(m->phys_addr);
2052 m->no_isync = FALSE;
2053 }
2054
2055 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2056 if(caller_pmap) {
2057 PMAP_ENTER(caller_pmap,
2058 caller_pmap_addr, m,
2059 prot, cache_attr, wired);
2060 } else {
2061 PMAP_ENTER(pmap, vaddr, m,
2062 prot, cache_attr, wired);
2063 }
2064
2065 /*
2066 * Grab the queues lock to manipulate
2067 * the page queues. Change wiring
2068 * case is obvious. In soft ref bits
2069 * case activate page only if it fell
2070 * off paging queues, otherwise just
2071 * activate it if it's inactive.
2072 *
2073 * NOTE: original vm_fault code will
2074 * move active page to back of active
2075 * queue. This code doesn't.
2076 */
2077 vm_page_lock_queues();
2078
2079 if (m->clustered) {
2080 vm_pagein_cluster_used++;
2081 m->clustered = FALSE;
2082 }
2083 m->reference = TRUE;
2084
2085 if (change_wiring) {
2086 if (wired)
2087 vm_page_wire(m);
2088 else
2089 vm_page_unwire(m);
2090 }
2091 #if VM_FAULT_STATIC_CONFIG
2092 else {
2093 if (!m->active && !m->inactive)
2094 vm_page_activate(m);
2095 }
2096 #else
2097 else if (software_reference_bits) {
2098 if (!m->active && !m->inactive)
2099 vm_page_activate(m);
2100 }
2101 else if (!m->active) {
2102 vm_page_activate(m);
2103 }
2104 #endif
2105 vm_page_unlock_queues();
2106
2107 /*
2108 * That's it, clean up and return.
2109 */
2110 PAGE_WAKEUP_DONE(m);
2111 vm_object_paging_end(object);
2112
2113 {
2114 tws_hash_line_t line;
2115 task_t task;
2116
2117 task = current_task();
2118 if((map != NULL) &&
2119 (task->dynamic_working_set != 0) &&
2120 !(object->private)) {
2121 kern_return_t kr;
2122 vm_object_t base_object;
2123 vm_object_offset_t base_offset;
2124 base_object = object;
2125 base_offset = cur_offset;
2126 while(base_object->shadow) {
2127 base_offset +=
2128 base_object->shadow_offset;
2129 base_object =
2130 base_object->shadow;
2131 }
2132 kr = tws_lookup((tws_hash_t)
2133 task->dynamic_working_set,
2134 base_offset, base_object,
2135 &line);
2136 if(kr == KERN_OPERATION_TIMED_OUT){
2137 write_startup_file = 1;
2138 } else if (kr != KERN_SUCCESS) {
2139 kr = tws_insert((tws_hash_t)
2140 task->dynamic_working_set,
2141 base_offset, base_object,
2142 vaddr, pmap_map);
2143 if(kr == KERN_NO_SPACE) {
2144 vm_object_unlock(object);
2145
2146 tws_expand_working_set(
2147 task->dynamic_working_set,
2148 TWS_HASH_LINE_COUNT,
2149 FALSE);
2150
2151 vm_object_lock(object);
2152 }
2153 if(kr ==
2154 KERN_OPERATION_TIMED_OUT) {
2155 write_startup_file = 1;
2156 }
2157 }
2158 }
2159 }
2160 vm_object_unlock(object);
2161
2162 vm_map_unlock_read(map);
2163 if(pmap_map != map)
2164 vm_map_unlock(pmap_map);
2165
2166 if(write_startup_file)
2167 tws_send_startup_info(current_task());
2168
2169 if (funnel_set)
2170 thread_funnel_set( curflock, TRUE);
2171
2172 thread_interrupt_level(interruptible_state);
2173
2174
2175 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2176 vaddr,
2177 type_of_fault & 0xff,
2178 KERN_SUCCESS,
2179 type_of_fault >> 8,
2180 0);
2181
2182 return KERN_SUCCESS;
2183 }
2184
2185 /*
2186 * Copy on write fault. If objects match, then
2187 * object->copy must not be NULL (else control
2188 * would be in previous code block), and we
2189 * have a potential push into the copy object
2190 * with which we won't cope here.
2191 */
2192
2193 if (cur_object == object)
2194 break;
2195 /*
2196 * This is now a shadow based copy on write
2197 * fault -- it requires a copy up the shadow
2198 * chain.
2199 *
2200 * Allocate a page in the original top level
2201 * object. Give up if allocate fails. Also
2202 * need to remember current page, as it's the
2203 * source of the copy.
2204 */
2205 cur_m = m;
2206 m = vm_page_grab();
2207 if (m == VM_PAGE_NULL) {
2208 break;
2209 }
2210 /*
2211 * Now do the copy. Mark the source busy
2212 * and take out paging references on both
2213 * objects.
2214 *
2215 * NOTE: This code holds the map lock across
2216 * the page copy.
2217 */
2218
2219 cur_m->busy = TRUE;
2220 vm_page_copy(cur_m, m);
2221 vm_page_insert(m, object, offset);
2222
2223 vm_object_paging_begin(cur_object);
2224 vm_object_paging_begin(object);
2225
2226 type_of_fault = DBG_COW_FAULT;
2227 VM_STAT(cow_faults++);
2228 current_task()->cow_faults++;
2229
2230 /*
2231 * Now cope with the source page and object
2232 * If the top object has a ref count of 1
2233 * then no other map can access it, and hence
2234 * it's not necessary to do the pmap_page_protect.
2235 */
2236
2237
2238 vm_page_lock_queues();
2239 vm_page_deactivate(cur_m);
2240 m->dirty = TRUE;
2241 pmap_page_protect(cur_m->phys_addr,
2242 VM_PROT_NONE);
2243 vm_page_unlock_queues();
2244
2245 PAGE_WAKEUP_DONE(cur_m);
2246 vm_object_paging_end(cur_object);
2247 vm_object_unlock(cur_object);
2248
2249 /*
2250 * Slight hack to call vm_object collapse
2251 * and then reuse common map in code.
2252 * note that the object lock was taken above.
2253 */
2254
2255 vm_object_paging_end(object);
2256 vm_object_collapse(object);
2257 vm_object_paging_begin(object);
2258
2259 goto FastPmapEnter;
2260 }
2261 else {
2262
2263 /*
2264 * No page at cur_object, cur_offset
2265 */
2266
2267 if (cur_object->pager_created) {
2268
2269 /*
2270 * Have to talk to the pager. Give up.
2271 */
2272 break;
2273 }
2274
2275
2276 if (cur_object->shadow == VM_OBJECT_NULL) {
2277
2278 if (cur_object->shadow_severed) {
2279 vm_object_paging_end(object);
2280 vm_object_unlock(object);
2281 vm_map_unlock_read(map);
2282 if(pmap_map != map)
2283 vm_map_unlock(pmap_map);
2284
2285 if(write_startup_file)
2286 tws_send_startup_info(
2287 current_task());
2288
2289 if (funnel_set) {
2290 thread_funnel_set( curflock, TRUE);
2291 funnel_set = FALSE;
2292 }
2293 thread_interrupt_level(interruptible_state);
2294
2295 return VM_FAULT_MEMORY_ERROR;
2296 }
2297
2298 /*
2299 * Zero fill fault. Page gets
2300 * filled in top object. Insert
2301 * page, then drop any lower lock.
2302 * Give up if no page.
2303 */
2304 if ((vm_page_free_target -
2305 ((vm_page_free_target-vm_page_free_min)>>2))
2306 > vm_page_free_count) {
2307 break;
2308 }
2309 m = vm_page_alloc(object, offset);
2310 if (m == VM_PAGE_NULL) {
2311 break;
2312 }
2313 /*
2314 * This is a zero-fill or initial fill
2315 * page fault. As such, we consider it
2316 * undefined with respect to instruction
2317 * execution. i.e. it is the responsibility
2318 * of higher layers to call for an instruction
2319 * sync after changing the contents and before
2320 * sending a program into this area. We
2321 * choose this approach for performance
2322 */
2323
2324 m->no_isync = FALSE;
2325
2326 if (cur_object != object)
2327 vm_object_unlock(cur_object);
2328
2329 vm_object_paging_begin(object);
2330 vm_object_unlock(object);
2331
2332 /*
2333 * Now zero fill page and map it.
2334 * the page is probably going to
2335 * be written soon, so don't bother
2336 * to clear the modified bit
2337 *
2338 * NOTE: This code holds the map
2339 * lock across the zero fill.
2340 */
2341
2342 if (!map->no_zero_fill) {
2343 vm_page_zero_fill(m);
2344 type_of_fault = DBG_ZERO_FILL_FAULT;
2345 VM_STAT(zero_fill_count++);
2346 }
2347 vm_page_lock_queues();
2348 VM_PAGE_QUEUES_REMOVE(m);
2349
2350 m->page_ticket = vm_page_ticket;
2351 if(m->object->size > 0x80000) {
2352 m->zero_fill = TRUE;
2353 /* depends on the queues lock */
2354 vm_zf_count += 1;
2355 queue_enter(&vm_page_queue_zf,
2356 m, vm_page_t, pageq);
2357 } else {
2358 queue_enter(
2359 &vm_page_queue_inactive,
2360 m, vm_page_t, pageq);
2361 }
2362 vm_page_ticket_roll++;
2363 if(vm_page_ticket_roll ==
2364 VM_PAGE_TICKETS_IN_ROLL) {
2365 vm_page_ticket_roll = 0;
2366 if(vm_page_ticket ==
2367 VM_PAGE_TICKET_ROLL_IDS)
2368 vm_page_ticket= 0;
2369 else
2370 vm_page_ticket++;
2371 }
2372
2373 m->inactive = TRUE;
2374 vm_page_inactive_count++;
2375 vm_page_unlock_queues();
2376 vm_object_lock(object);
2377
2378 goto FastPmapEnter;
2379 }
2380
2381 /*
2382 * On to the next level
2383 */
2384
2385 cur_offset += cur_object->shadow_offset;
2386 new_object = cur_object->shadow;
2387 vm_object_lock(new_object);
2388 if (cur_object != object)
2389 vm_object_unlock(cur_object);
2390 cur_object = new_object;
2391
2392 continue;
2393 }
2394 }
2395
2396 /*
2397 * Cleanup from fast fault failure. Drop any object
2398 * lock other than original and drop map lock.
2399 */
2400
2401 if (object != cur_object)
2402 vm_object_unlock(cur_object);
2403 }
2404 vm_map_unlock_read(map);
2405
2406 if(pmap_map != map)
2407 vm_map_unlock(pmap_map);
2408
2409 /*
2410 * Make a reference to this object to
2411 * prevent its disposal while we are messing with
2412 * it. Once we have the reference, the map is free
2413 * to be diddled. Since objects reference their
2414 * shadows (and copies), they will stay around as well.
2415 */
2416
2417 assert(object->ref_count > 0);
2418 object->ref_count++;
2419 vm_object_res_reference(object);
2420 vm_object_paging_begin(object);
2421
2422 XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2423 {
2424 tws_hash_line_t line;
2425 task_t task;
2426 kern_return_t kr;
2427
2428 task = current_task();
2429 if((map != NULL) &&
2430 (task->dynamic_working_set != 0)
2431 && !(object->private)) {
2432 vm_object_t base_object;
2433 vm_object_offset_t base_offset;
2434 base_object = object;
2435 base_offset = offset;
2436 while(base_object->shadow) {
2437 base_offset +=
2438 base_object->shadow_offset;
2439 base_object =
2440 base_object->shadow;
2441 }
2442 kr = tws_lookup((tws_hash_t)
2443 task->dynamic_working_set,
2444 base_offset, base_object,
2445 &line);
2446 if(kr == KERN_OPERATION_TIMED_OUT){
2447 write_startup_file = 1;
2448 } else if (kr != KERN_SUCCESS) {
2449 tws_insert((tws_hash_t)
2450 task->dynamic_working_set,
2451 base_offset, base_object,
2452 vaddr, pmap_map);
2453 kr = tws_insert((tws_hash_t)
2454 task->dynamic_working_set,
2455 base_offset, base_object,
2456 vaddr, pmap_map);
2457 if(kr == KERN_NO_SPACE) {
2458 vm_object_unlock(object);
2459 tws_expand_working_set(
2460 task->dynamic_working_set,
2461 TWS_HASH_LINE_COUNT,
2462 FALSE);
2463 vm_object_lock(object);
2464 }
2465 if(kr == KERN_OPERATION_TIMED_OUT) {
2466 write_startup_file = 1;
2467 }
2468 }
2469 }
2470 }
2471 kr = vm_fault_page(object, offset, fault_type,
2472 (change_wiring && !wired),
2473 interruptible,
2474 lo_offset, hi_offset, behavior,
2475 &prot, &result_page, &top_page,
2476 &type_of_fault,
2477 &error_code, map->no_zero_fill, FALSE, map, vaddr);
2478
2479 /*
2480 * If we didn't succeed, lose the object reference immediately.
2481 */
2482
2483 if (kr != VM_FAULT_SUCCESS)
2484 vm_object_deallocate(object);
2485
2486 /*
2487 * See why we failed, and take corrective action.
2488 */
2489
2490 switch (kr) {
2491 case VM_FAULT_SUCCESS:
2492 break;
2493 case VM_FAULT_MEMORY_SHORTAGE:
2494 if (vm_page_wait((change_wiring) ?
2495 THREAD_UNINT :
2496 THREAD_ABORTSAFE))
2497 goto RetryFault;
2498 /* fall thru */
2499 case VM_FAULT_INTERRUPTED:
2500 kr = KERN_ABORTED;
2501 goto done;
2502 case VM_FAULT_RETRY:
2503 goto RetryFault;
2504 case VM_FAULT_FICTITIOUS_SHORTAGE:
2505 vm_page_more_fictitious();
2506 goto RetryFault;
2507 case VM_FAULT_MEMORY_ERROR:
2508 if (error_code)
2509 kr = error_code;
2510 else
2511 kr = KERN_MEMORY_ERROR;
2512 goto done;
2513 }
2514
2515 m = result_page;
2516
2517 if(m != VM_PAGE_NULL) {
2518 assert((change_wiring && !wired) ?
2519 (top_page == VM_PAGE_NULL) :
2520 ((top_page == VM_PAGE_NULL) == (m->object == object)));
2521 }
2522
2523 /*
2524 * How to clean up the result of vm_fault_page. This
2525 * happens whether the mapping is entered or not.
2526 */
2527
2528 #define UNLOCK_AND_DEALLOCATE \
2529 MACRO_BEGIN \
2530 vm_fault_cleanup(m->object, top_page); \
2531 vm_object_deallocate(object); \
2532 MACRO_END
2533
2534 /*
2535 * What to do with the resulting page from vm_fault_page
2536 * if it doesn't get entered into the physical map:
2537 */
2538
2539 #define RELEASE_PAGE(m) \
2540 MACRO_BEGIN \
2541 PAGE_WAKEUP_DONE(m); \
2542 vm_page_lock_queues(); \
2543 if (!m->active && !m->inactive) \
2544 vm_page_activate(m); \
2545 vm_page_unlock_queues(); \
2546 MACRO_END
2547
2548 /*
2549 * We must verify that the maps have not changed
2550 * since our last lookup.
2551 */
2552
2553 if(m != VM_PAGE_NULL) {
2554 old_copy_object = m->object->copy;
2555 vm_object_unlock(m->object);
2556 } else {
2557 old_copy_object = VM_OBJECT_NULL;
2558 }
2559 if ((map != original_map) || !vm_map_verify(map, &version)) {
2560 vm_object_t retry_object;
2561 vm_object_offset_t retry_offset;
2562 vm_prot_t retry_prot;
2563
2564 /*
2565 * To avoid trying to write_lock the map while another
2566 * thread has it read_locked (in vm_map_pageable), we
2567 * do not try for write permission. If the page is
2568 * still writable, we will get write permission. If it
2569 * is not, or has been marked needs_copy, we enter the
2570 * mapping without write permission, and will merely
2571 * take another fault.
2572 */
2573 map = original_map;
2574 vm_map_lock_read(map);
2575 kr = vm_map_lookup_locked(&map, vaddr,
2576 fault_type & ~VM_PROT_WRITE, &version,
2577 &retry_object, &retry_offset, &retry_prot,
2578 &wired, &behavior, &lo_offset, &hi_offset,
2579 &pmap_map);
2580 pmap = pmap_map->pmap;
2581
2582 if (kr != KERN_SUCCESS) {
2583 vm_map_unlock_read(map);
2584 if(m != VM_PAGE_NULL) {
2585 vm_object_lock(m->object);
2586 RELEASE_PAGE(m);
2587 UNLOCK_AND_DEALLOCATE;
2588 } else {
2589 vm_object_deallocate(object);
2590 }
2591 goto done;
2592 }
2593
2594 vm_object_unlock(retry_object);
2595 if(m != VM_PAGE_NULL) {
2596 vm_object_lock(m->object);
2597 } else {
2598 vm_object_lock(object);
2599 }
2600
2601 if ((retry_object != object) ||
2602 (retry_offset != offset)) {
2603 vm_map_unlock_read(map);
2604 if(pmap_map != map)
2605 vm_map_unlock(pmap_map);
2606 if(m != VM_PAGE_NULL) {
2607 RELEASE_PAGE(m);
2608 UNLOCK_AND_DEALLOCATE;
2609 } else {
2610 vm_object_deallocate(object);
2611 }
2612 goto RetryFault;
2613 }
2614
2615 /*
2616 * Check whether the protection has changed or the object
2617 * has been copied while we left the map unlocked.
2618 */
2619 prot &= retry_prot;
2620 if(m != VM_PAGE_NULL) {
2621 vm_object_unlock(m->object);
2622 } else {
2623 vm_object_unlock(object);
2624 }
2625 }
2626 if(m != VM_PAGE_NULL) {
2627 vm_object_lock(m->object);
2628 } else {
2629 vm_object_lock(object);
2630 }
2631
2632 /*
2633 * If the copy object changed while the top-level object
2634 * was unlocked, then we must take away write permission.
2635 */
2636
2637 if(m != VM_PAGE_NULL) {
2638 if (m->object->copy != old_copy_object)
2639 prot &= ~VM_PROT_WRITE;
2640 }
2641
2642 /*
2643 * If we want to wire down this page, but no longer have
2644 * adequate permissions, we must start all over.
2645 */
2646
2647 if (wired && (fault_type != (prot|VM_PROT_WRITE))) {
2648 vm_map_verify_done(map, &version);
2649 if(pmap_map != map)
2650 vm_map_unlock(pmap_map);
2651 if(m != VM_PAGE_NULL) {
2652 RELEASE_PAGE(m);
2653 UNLOCK_AND_DEALLOCATE;
2654 } else {
2655 vm_object_deallocate(object);
2656 }
2657 goto RetryFault;
2658 }
2659
2660 /*
2661 * Put this page into the physical map.
2662 * We had to do the unlock above because pmap_enter
2663 * may cause other faults. The page may be on
2664 * the pageout queues. If the pageout daemon comes
2665 * across the page, it will remove it from the queues.
2666 */
2667 if (m != VM_PAGE_NULL) {
2668 if (m->no_isync == TRUE) {
2669 pmap_sync_caches_phys(m->phys_addr);
2670
2671 m->no_isync = FALSE;
2672 }
2673
2674 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2675
2676 if(caller_pmap) {
2677 PMAP_ENTER(caller_pmap,
2678 caller_pmap_addr, m,
2679 prot, cache_attr, wired);
2680 } else {
2681 PMAP_ENTER(pmap, vaddr, m,
2682 prot, cache_attr, wired);
2683 }
2684 {
2685 tws_hash_line_t line;
2686 task_t task;
2687 kern_return_t kr;
2688
2689 task = current_task();
2690 if((map != NULL) &&
2691 (task->dynamic_working_set != 0)
2692 && (object->private)) {
2693 vm_object_t base_object;
2694 vm_object_offset_t base_offset;
2695 base_object = m->object;
2696 base_offset = m->offset;
2697 while(base_object->shadow) {
2698 base_offset +=
2699 base_object->shadow_offset;
2700 base_object =
2701 base_object->shadow;
2702 }
2703 kr = tws_lookup((tws_hash_t)
2704 task->dynamic_working_set,
2705 base_offset, base_object, &line);
2706 if(kr == KERN_OPERATION_TIMED_OUT){
2707 write_startup_file = 1;
2708 } else if (kr != KERN_SUCCESS) {
2709 tws_insert((tws_hash_t)
2710 task->dynamic_working_set,
2711 base_offset, base_object,
2712 vaddr, pmap_map);
2713 kr = tws_insert((tws_hash_t)
2714 task->dynamic_working_set,
2715 base_offset, base_object,
2716 vaddr, pmap_map);
2717 if(kr == KERN_NO_SPACE) {
2718 vm_object_unlock(m->object);
2719 tws_expand_working_set(
2720 task->dynamic_working_set,
2721 TWS_HASH_LINE_COUNT,
2722 FALSE);
2723 vm_object_lock(m->object);
2724 }
2725 if(kr == KERN_OPERATION_TIMED_OUT) {
2726 write_startup_file = 1;
2727 }
2728 }
2729 }
2730 }
2731 } else {
2732
2733 #ifndef i386
2734 int memattr;
2735 struct phys_entry *pp;
2736 vm_map_entry_t entry;
2737 vm_offset_t laddr;
2738 vm_offset_t ldelta, hdelta;
2739
2740 /*
2741 * do a pmap block mapping from the physical address
2742 * in the object
2743 */
2744 if(pp = pmap_find_physentry(
2745 (vm_offset_t)object->shadow_offset)) {
2746 memattr = ((pp->pte1 & 0x00000078) >> 3);
2747 } else {
2748 memattr = VM_WIMG_MASK & (int)object->wimg_bits;
2749 }
2750
2751
2752 /* While we do not worry about execution protection in */
2753 /* general, we may be able to read device memory and */
2754 /* still not be able to execute it. Here we check for */
2755 /* the guarded bit. If its set and we are attempting */
2756 /* to execute, we return with a protection failure. */
2757
2758 if((memattr & VM_MEM_GUARDED) &&
2759 (full_fault_type & VM_PROT_EXECUTE)) {
2760 vm_map_verify_done(map, &version);
2761 if(pmap_map != map)
2762 vm_map_unlock(pmap_map);
2763 vm_fault_cleanup(object, top_page);
2764 vm_object_deallocate(object);
2765 kr = KERN_PROTECTION_FAILURE;
2766 goto done;
2767 }
2768
2769
2770
2771 if(pmap_map != map) {
2772 vm_map_unlock(pmap_map);
2773 }
2774 if (original_map != map) {
2775 vm_map_unlock_read(map);
2776 vm_map_lock_read(original_map);
2777 map = original_map;
2778 }
2779 pmap_map = map;
2780
2781 laddr = vaddr;
2782 hdelta = 0xFFFFF000;
2783 ldelta = 0xFFFFF000;
2784
2785
2786 while(vm_map_lookup_entry(map, laddr, &entry)) {
2787 if(ldelta > (laddr - entry->vme_start))
2788 ldelta = laddr - entry->vme_start;
2789 if(hdelta > (entry->vme_end - laddr))
2790 hdelta = entry->vme_end - laddr;
2791 if(entry->is_sub_map) {
2792
2793 laddr = (laddr - entry->vme_start)
2794 + entry->offset;
2795 vm_map_lock_read(entry->object.sub_map);
2796 if(map != pmap_map)
2797 vm_map_unlock_read(map);
2798 if(entry->use_pmap) {
2799 vm_map_unlock_read(pmap_map);
2800 pmap_map = entry->object.sub_map;
2801 }
2802 map = entry->object.sub_map;
2803
2804 } else {
2805 break;
2806 }
2807 }
2808
2809 if(vm_map_lookup_entry(map, laddr, &entry) &&
2810 (entry->object.vm_object != NULL) &&
2811 (entry->object.vm_object == object)) {
2812
2813
2814 if(caller_pmap) {
2815 pmap_map_block(caller_pmap,
2816 caller_pmap_addr - ldelta,
2817 ((vm_offset_t)
2818 (entry->object.vm_object->shadow_offset))
2819 + entry->offset +
2820 (laddr - entry->vme_start) - ldelta,
2821 ldelta + hdelta, prot,
2822 memattr, 0); /* Set up a block mapped area */
2823 } else {
2824 pmap_map_block(pmap_map->pmap, vaddr - ldelta,
2825 ((vm_offset_t)
2826 (entry->object.vm_object->shadow_offset))
2827 + entry->offset +
2828 (laddr - entry->vme_start) - ldelta,
2829 ldelta + hdelta, prot,
2830 memattr, 0); /* Set up a block mapped area */
2831 }
2832 }
2833 #else
2834 #ifdef notyet
2835 if(caller_pmap) {
2836 pmap_enter(caller_pmap, caller_pmap_addr,
2837 object->shadow_offset, prot, 0, TRUE);
2838 } else {
2839 pmap_enter(pmap, vaddr,
2840 object->shadow_offset, prot, 0, TRUE);
2841 }
2842 /* Map it in */
2843 #endif
2844 #endif
2845
2846 }
2847
2848 /*
2849 * If the page is not wired down and isn't already
2850 * on a pageout queue, then put it where the
2851 * pageout daemon can find it.
2852 */
2853 if(m != VM_PAGE_NULL) {
2854 vm_page_lock_queues();
2855
2856 if (change_wiring) {
2857 if (wired)
2858 vm_page_wire(m);
2859 else
2860 vm_page_unwire(m);
2861 }
2862 #if VM_FAULT_STATIC_CONFIG
2863 else {
2864 if (!m->active && !m->inactive)
2865 vm_page_activate(m);
2866 m->reference = TRUE;
2867 }
2868 #else
2869 else if (software_reference_bits) {
2870 if (!m->active && !m->inactive)
2871 vm_page_activate(m);
2872 m->reference = TRUE;
2873 } else {
2874 vm_page_activate(m);
2875 }
2876 #endif
2877 vm_page_unlock_queues();
2878 }
2879
2880 /*
2881 * Unlock everything, and return
2882 */
2883
2884 vm_map_verify_done(map, &version);
2885 if(pmap_map != map)
2886 vm_map_unlock(pmap_map);
2887 if(m != VM_PAGE_NULL) {
2888 PAGE_WAKEUP_DONE(m);
2889 UNLOCK_AND_DEALLOCATE;
2890 } else {
2891 vm_fault_cleanup(object, top_page);
2892 vm_object_deallocate(object);
2893 }
2894 kr = KERN_SUCCESS;
2895
2896 #undef UNLOCK_AND_DEALLOCATE
2897 #undef RELEASE_PAGE
2898
2899 done:
2900 if(write_startup_file)
2901 tws_send_startup_info(current_task());
2902 if (funnel_set) {
2903 thread_funnel_set( curflock, TRUE);
2904 funnel_set = FALSE;
2905 }
2906 thread_interrupt_level(interruptible_state);
2907
2908 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2909 vaddr,
2910 type_of_fault & 0xff,
2911 kr,
2912 type_of_fault >> 8,
2913 0);
2914
2915 return(kr);
2916 }
2917
2918 /*
2919 * vm_fault_wire:
2920 *
2921 * Wire down a range of virtual addresses in a map.
2922 */
2923 kern_return_t
2924 vm_fault_wire(
2925 vm_map_t map,
2926 vm_map_entry_t entry,
2927 pmap_t pmap,
2928 vm_offset_t pmap_addr)
2929 {
2930
2931 register vm_offset_t va;
2932 register vm_offset_t end_addr = entry->vme_end;
2933 register kern_return_t rc;
2934
2935 assert(entry->in_transition);
2936
2937 if ((entry->object.vm_object != NULL) &&
2938 !entry->is_sub_map &&
2939 entry->object.vm_object->phys_contiguous) {
2940 return KERN_SUCCESS;
2941 }
2942
2943 /*
2944 * Inform the physical mapping system that the
2945 * range of addresses may not fault, so that
2946 * page tables and such can be locked down as well.
2947 */
2948
2949 pmap_pageable(pmap, pmap_addr,
2950 pmap_addr + (end_addr - entry->vme_start), FALSE);
2951
2952 /*
2953 * We simulate a fault to get the page and enter it
2954 * in the physical map.
2955 */
2956
2957 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
2958 if ((rc = vm_fault_wire_fast(
2959 map, va, entry, pmap,
2960 pmap_addr + (va - entry->vme_start)
2961 )) != KERN_SUCCESS) {
2962 rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
2963 (pmap == kernel_pmap) ?
2964 THREAD_UNINT : THREAD_ABORTSAFE,
2965 pmap, pmap_addr + (va - entry->vme_start));
2966 }
2967
2968 if (rc != KERN_SUCCESS) {
2969 struct vm_map_entry tmp_entry = *entry;
2970
2971 /* unwire wired pages */
2972 tmp_entry.vme_end = va;
2973 vm_fault_unwire(map,
2974 &tmp_entry, FALSE, pmap, pmap_addr);
2975
2976 return rc;
2977 }
2978 }
2979 return KERN_SUCCESS;
2980 }
2981
2982 /*
2983 * vm_fault_unwire:
2984 *
2985 * Unwire a range of virtual addresses in a map.
2986 */
2987 void
2988 vm_fault_unwire(
2989 vm_map_t map,
2990 vm_map_entry_t entry,
2991 boolean_t deallocate,
2992 pmap_t pmap,
2993 vm_offset_t pmap_addr)
2994 {
2995 register vm_offset_t va;
2996 register vm_offset_t end_addr = entry->vme_end;
2997 vm_object_t object;
2998
2999 object = (entry->is_sub_map)
3000 ? VM_OBJECT_NULL : entry->object.vm_object;
3001
3002 /*
3003 * Since the pages are wired down, we must be able to
3004 * get their mappings from the physical map system.
3005 */
3006
3007 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3008 pmap_change_wiring(pmap,
3009 pmap_addr + (va - entry->vme_start), FALSE);
3010
3011 if (object == VM_OBJECT_NULL) {
3012 (void) vm_fault(map, va, VM_PROT_NONE,
3013 TRUE, THREAD_UNINT, pmap, pmap_addr);
3014 } else if (object->phys_contiguous) {
3015 continue;
3016 } else {
3017 vm_prot_t prot;
3018 vm_page_t result_page;
3019 vm_page_t top_page;
3020 vm_object_t result_object;
3021 vm_fault_return_t result;
3022
3023 do {
3024 prot = VM_PROT_NONE;
3025
3026 vm_object_lock(object);
3027 vm_object_paging_begin(object);
3028 XPR(XPR_VM_FAULT,
3029 "vm_fault_unwire -> vm_fault_page\n",
3030 0,0,0,0,0);
3031 result = vm_fault_page(object,
3032 entry->offset +
3033 (va - entry->vme_start),
3034 VM_PROT_NONE, TRUE,
3035 THREAD_UNINT,
3036 entry->offset,
3037 entry->offset +
3038 (entry->vme_end
3039 - entry->vme_start),
3040 entry->behavior,
3041 &prot,
3042 &result_page,
3043 &top_page,
3044 (int *)0,
3045 0, map->no_zero_fill,
3046 FALSE, NULL, 0);
3047 } while (result == VM_FAULT_RETRY);
3048
3049 if (result != VM_FAULT_SUCCESS)
3050 panic("vm_fault_unwire: failure");
3051
3052 result_object = result_page->object;
3053 if (deallocate) {
3054 assert(!result_page->fictitious);
3055 pmap_page_protect(result_page->phys_addr,
3056 VM_PROT_NONE);
3057 VM_PAGE_FREE(result_page);
3058 } else {
3059 vm_page_lock_queues();
3060 vm_page_unwire(result_page);
3061 vm_page_unlock_queues();
3062 PAGE_WAKEUP_DONE(result_page);
3063 }
3064
3065 vm_fault_cleanup(result_object, top_page);
3066 }
3067 }
3068
3069 /*
3070 * Inform the physical mapping system that the range
3071 * of addresses may fault, so that page tables and
3072 * such may be unwired themselves.
3073 */
3074
3075 pmap_pageable(pmap, pmap_addr,
3076 pmap_addr + (end_addr - entry->vme_start), TRUE);
3077
3078 }
3079
3080 /*
3081 * vm_fault_wire_fast:
3082 *
3083 * Handle common case of a wire down page fault at the given address.
3084 * If successful, the page is inserted into the associated physical map.
3085 * The map entry is passed in to avoid the overhead of a map lookup.
3086 *
3087 * NOTE: the given address should be truncated to the
3088 * proper page address.
3089 *
3090 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3091 * a standard error specifying why the fault is fatal is returned.
3092 *
3093 * The map in question must be referenced, and remains so.
3094 * Caller has a read lock on the map.
3095 *
3096 * This is a stripped version of vm_fault() for wiring pages. Anything
3097 * other than the common case will return KERN_FAILURE, and the caller
3098 * is expected to call vm_fault().
3099 */
3100 kern_return_t
3101 vm_fault_wire_fast(
3102 vm_map_t map,
3103 vm_offset_t va,
3104 vm_map_entry_t entry,
3105 pmap_t pmap,
3106 vm_offset_t pmap_addr)
3107 {
3108 vm_object_t object;
3109 vm_object_offset_t offset;
3110 register vm_page_t m;
3111 vm_prot_t prot;
3112 thread_act_t thr_act;
3113 unsigned int cache_attr;
3114
3115 VM_STAT(faults++);
3116
3117 if((thr_act=current_act()) && (thr_act->task != TASK_NULL))
3118 thr_act->task->faults++;
3119
3120 /*
3121 * Recovery actions
3122 */
3123
3124 #undef RELEASE_PAGE
3125 #define RELEASE_PAGE(m) { \
3126 PAGE_WAKEUP_DONE(m); \
3127 vm_page_lock_queues(); \
3128 vm_page_unwire(m); \
3129 vm_page_unlock_queues(); \
3130 }
3131
3132
3133 #undef UNLOCK_THINGS
3134 #define UNLOCK_THINGS { \
3135 object->paging_in_progress--; \
3136 vm_object_unlock(object); \
3137 }
3138
3139 #undef UNLOCK_AND_DEALLOCATE
3140 #define UNLOCK_AND_DEALLOCATE { \
3141 UNLOCK_THINGS; \
3142 vm_object_deallocate(object); \
3143 }
3144 /*
3145 * Give up and have caller do things the hard way.
3146 */
3147
3148 #define GIVE_UP { \
3149 UNLOCK_AND_DEALLOCATE; \
3150 return(KERN_FAILURE); \
3151 }
3152
3153
3154 /*
3155 * If this entry is not directly to a vm_object, bail out.
3156 */
3157 if (entry->is_sub_map)
3158 return(KERN_FAILURE);
3159
3160 /*
3161 * Find the backing store object and offset into it.
3162 */
3163
3164 object = entry->object.vm_object;
3165 offset = (va - entry->vme_start) + entry->offset;
3166 prot = entry->protection;
3167
3168 /*
3169 * Make a reference to this object to prevent its
3170 * disposal while we are messing with it.
3171 */
3172
3173 vm_object_lock(object);
3174 assert(object->ref_count > 0);
3175 object->ref_count++;
3176 vm_object_res_reference(object);
3177 object->paging_in_progress++;
3178
3179 /*
3180 * INVARIANTS (through entire routine):
3181 *
3182 * 1) At all times, we must either have the object
3183 * lock or a busy page in some object to prevent
3184 * some other thread from trying to bring in
3185 * the same page.
3186 *
3187 * 2) Once we have a busy page, we must remove it from
3188 * the pageout queues, so that the pageout daemon
3189 * will not grab it away.
3190 *
3191 */
3192
3193 /*
3194 * Look for page in top-level object. If it's not there or
3195 * there's something going on, give up.
3196 */
3197 m = vm_page_lookup(object, offset);
3198 if ((m == VM_PAGE_NULL) || (m->busy) ||
3199 (m->unusual && ( m->error || m->restart || m->absent ||
3200 prot & m->page_lock))) {
3201
3202 GIVE_UP;
3203 }
3204
3205 /*
3206 * Wire the page down now. All bail outs beyond this
3207 * point must unwire the page.
3208 */
3209
3210 vm_page_lock_queues();
3211 vm_page_wire(m);
3212 vm_page_unlock_queues();
3213
3214 /*
3215 * Mark page busy for other threads.
3216 */
3217 assert(!m->busy);
3218 m->busy = TRUE;
3219 assert(!m->absent);
3220
3221 /*
3222 * Give up if the page is being written and there's a copy object
3223 */
3224 if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
3225 RELEASE_PAGE(m);
3226 GIVE_UP;
3227 }
3228
3229 /*
3230 * Put this page into the physical map.
3231 * We have to unlock the object because pmap_enter
3232 * may cause other faults.
3233 */
3234 if (m->no_isync == TRUE) {
3235 pmap_sync_caches_phys(m->phys_addr);
3236
3237 m->no_isync = FALSE;
3238 }
3239
3240 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3241
3242 PMAP_ENTER(pmap, pmap_addr, m, prot, cache_attr, TRUE);
3243
3244 /*
3245 * Unlock everything, and return
3246 */
3247
3248 PAGE_WAKEUP_DONE(m);
3249 UNLOCK_AND_DEALLOCATE;
3250
3251 return(KERN_SUCCESS);
3252
3253 }
3254
3255 /*
3256 * Routine: vm_fault_copy_cleanup
3257 * Purpose:
3258 * Release a page used by vm_fault_copy.
3259 */
3260
3261 void
3262 vm_fault_copy_cleanup(
3263 vm_page_t page,
3264 vm_page_t top_page)
3265 {
3266 vm_object_t object = page->object;
3267
3268 vm_object_lock(object);
3269 PAGE_WAKEUP_DONE(page);
3270 vm_page_lock_queues();
3271 if (!page->active && !page->inactive)
3272 vm_page_activate(page);
3273 vm_page_unlock_queues();
3274 vm_fault_cleanup(object, top_page);
3275 }
3276
3277 void
3278 vm_fault_copy_dst_cleanup(
3279 vm_page_t page)
3280 {
3281 vm_object_t object;
3282
3283 if (page != VM_PAGE_NULL) {
3284 object = page->object;
3285 vm_object_lock(object);
3286 vm_page_lock_queues();
3287 vm_page_unwire(page);
3288 vm_page_unlock_queues();
3289 vm_object_paging_end(object);
3290 vm_object_unlock(object);
3291 }
3292 }
3293
3294 /*
3295 * Routine: vm_fault_copy
3296 *
3297 * Purpose:
3298 * Copy pages from one virtual memory object to another --
3299 * neither the source nor destination pages need be resident.
3300 *
3301 * Before actually copying a page, the version associated with
3302 * the destination address map wil be verified.
3303 *
3304 * In/out conditions:
3305 * The caller must hold a reference, but not a lock, to
3306 * each of the source and destination objects and to the
3307 * destination map.
3308 *
3309 * Results:
3310 * Returns KERN_SUCCESS if no errors were encountered in
3311 * reading or writing the data. Returns KERN_INTERRUPTED if
3312 * the operation was interrupted (only possible if the
3313 * "interruptible" argument is asserted). Other return values
3314 * indicate a permanent error in copying the data.
3315 *
3316 * The actual amount of data copied will be returned in the
3317 * "copy_size" argument. In the event that the destination map
3318 * verification failed, this amount may be less than the amount
3319 * requested.
3320 */
3321 kern_return_t
3322 vm_fault_copy(
3323 vm_object_t src_object,
3324 vm_object_offset_t src_offset,
3325 vm_size_t *src_size, /* INOUT */
3326 vm_object_t dst_object,
3327 vm_object_offset_t dst_offset,
3328 vm_map_t dst_map,
3329 vm_map_version_t *dst_version,
3330 int interruptible)
3331 {
3332 vm_page_t result_page;
3333
3334 vm_page_t src_page;
3335 vm_page_t src_top_page;
3336 vm_prot_t src_prot;
3337
3338 vm_page_t dst_page;
3339 vm_page_t dst_top_page;
3340 vm_prot_t dst_prot;
3341
3342 vm_size_t amount_left;
3343 vm_object_t old_copy_object;
3344 kern_return_t error = 0;
3345
3346 vm_size_t part_size;
3347
3348 /*
3349 * In order not to confuse the clustered pageins, align
3350 * the different offsets on a page boundary.
3351 */
3352 vm_object_offset_t src_lo_offset = trunc_page_64(src_offset);
3353 vm_object_offset_t dst_lo_offset = trunc_page_64(dst_offset);
3354 vm_object_offset_t src_hi_offset = round_page_64(src_offset + *src_size);
3355 vm_object_offset_t dst_hi_offset = round_page_64(dst_offset + *src_size);
3356
3357 #define RETURN(x) \
3358 MACRO_BEGIN \
3359 *src_size -= amount_left; \
3360 MACRO_RETURN(x); \
3361 MACRO_END
3362
3363 amount_left = *src_size;
3364 do { /* while (amount_left > 0) */
3365 /*
3366 * There may be a deadlock if both source and destination
3367 * pages are the same. To avoid this deadlock, the copy must
3368 * start by getting the destination page in order to apply
3369 * COW semantics if any.
3370 */
3371
3372 RetryDestinationFault: ;
3373
3374 dst_prot = VM_PROT_WRITE|VM_PROT_READ;
3375
3376 vm_object_lock(dst_object);
3377 vm_object_paging_begin(dst_object);
3378
3379 XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3380 switch (vm_fault_page(dst_object,
3381 trunc_page_64(dst_offset),
3382 VM_PROT_WRITE|VM_PROT_READ,
3383 FALSE,
3384 interruptible,
3385 dst_lo_offset,
3386 dst_hi_offset,
3387 VM_BEHAVIOR_SEQUENTIAL,
3388 &dst_prot,
3389 &dst_page,
3390 &dst_top_page,
3391 (int *)0,
3392 &error,
3393 dst_map->no_zero_fill,
3394 FALSE, NULL, 0)) {
3395 case VM_FAULT_SUCCESS:
3396 break;
3397 case VM_FAULT_RETRY:
3398 goto RetryDestinationFault;
3399 case VM_FAULT_MEMORY_SHORTAGE:
3400 if (vm_page_wait(interruptible))
3401 goto RetryDestinationFault;
3402 /* fall thru */
3403 case VM_FAULT_INTERRUPTED:
3404 RETURN(MACH_SEND_INTERRUPTED);
3405 case VM_FAULT_FICTITIOUS_SHORTAGE:
3406 vm_page_more_fictitious();
3407 goto RetryDestinationFault;
3408 case VM_FAULT_MEMORY_ERROR:
3409 if (error)
3410 return (error);
3411 else
3412 return(KERN_MEMORY_ERROR);
3413 }
3414 assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
3415
3416 old_copy_object = dst_page->object->copy;
3417
3418 /*
3419 * There exists the possiblity that the source and
3420 * destination page are the same. But we can't
3421 * easily determine that now. If they are the
3422 * same, the call to vm_fault_page() for the
3423 * destination page will deadlock. To prevent this we
3424 * wire the page so we can drop busy without having
3425 * the page daemon steal the page. We clean up the
3426 * top page but keep the paging reference on the object
3427 * holding the dest page so it doesn't go away.
3428 */
3429
3430 vm_page_lock_queues();
3431 vm_page_wire(dst_page);
3432 vm_page_unlock_queues();
3433 PAGE_WAKEUP_DONE(dst_page);
3434 vm_object_unlock(dst_page->object);
3435
3436 if (dst_top_page != VM_PAGE_NULL) {
3437 vm_object_lock(dst_object);
3438 VM_PAGE_FREE(dst_top_page);
3439 vm_object_paging_end(dst_object);
3440 vm_object_unlock(dst_object);
3441 }
3442
3443 RetrySourceFault: ;
3444
3445 if (src_object == VM_OBJECT_NULL) {
3446 /*
3447 * No source object. We will just
3448 * zero-fill the page in dst_object.
3449 */
3450 src_page = VM_PAGE_NULL;
3451 result_page = VM_PAGE_NULL;
3452 } else {
3453 vm_object_lock(src_object);
3454 src_page = vm_page_lookup(src_object,
3455 trunc_page_64(src_offset));
3456 if (src_page == dst_page) {
3457 src_prot = dst_prot;
3458 result_page = VM_PAGE_NULL;
3459 } else {
3460 src_prot = VM_PROT_READ;
3461 vm_object_paging_begin(src_object);
3462
3463 XPR(XPR_VM_FAULT,
3464 "vm_fault_copy(2) -> vm_fault_page\n",
3465 0,0,0,0,0);
3466 switch (vm_fault_page(src_object,
3467 trunc_page_64(src_offset),
3468 VM_PROT_READ,
3469 FALSE,
3470 interruptible,
3471 src_lo_offset,
3472 src_hi_offset,
3473 VM_BEHAVIOR_SEQUENTIAL,
3474 &src_prot,
3475 &result_page,
3476 &src_top_page,
3477 (int *)0,
3478 &error,
3479 FALSE,
3480 FALSE, NULL, 0)) {
3481
3482 case VM_FAULT_SUCCESS:
3483 break;
3484 case VM_FAULT_RETRY:
3485 goto RetrySourceFault;
3486 case VM_FAULT_MEMORY_SHORTAGE:
3487 if (vm_page_wait(interruptible))
3488 goto RetrySourceFault;
3489 /* fall thru */
3490 case VM_FAULT_INTERRUPTED:
3491 vm_fault_copy_dst_cleanup(dst_page);
3492 RETURN(MACH_SEND_INTERRUPTED);
3493 case VM_FAULT_FICTITIOUS_SHORTAGE:
3494 vm_page_more_fictitious();
3495 goto RetrySourceFault;
3496 case VM_FAULT_MEMORY_ERROR:
3497 vm_fault_copy_dst_cleanup(dst_page);
3498 if (error)
3499 return (error);
3500 else
3501 return(KERN_MEMORY_ERROR);
3502 }
3503
3504
3505 assert((src_top_page == VM_PAGE_NULL) ==
3506 (result_page->object == src_object));
3507 }
3508 assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
3509 vm_object_unlock(result_page->object);
3510 }
3511
3512 if (!vm_map_verify(dst_map, dst_version)) {
3513 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3514 vm_fault_copy_cleanup(result_page, src_top_page);
3515 vm_fault_copy_dst_cleanup(dst_page);
3516 break;
3517 }
3518
3519 vm_object_lock(dst_page->object);
3520
3521 if (dst_page->object->copy != old_copy_object) {
3522 vm_object_unlock(dst_page->object);
3523 vm_map_verify_done(dst_map, dst_version);
3524 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3525 vm_fault_copy_cleanup(result_page, src_top_page);
3526 vm_fault_copy_dst_cleanup(dst_page);
3527 break;
3528 }
3529 vm_object_unlock(dst_page->object);
3530
3531 /*
3532 * Copy the page, and note that it is dirty
3533 * immediately.
3534 */
3535
3536 if (!page_aligned(src_offset) ||
3537 !page_aligned(dst_offset) ||
3538 !page_aligned(amount_left)) {
3539
3540 vm_object_offset_t src_po,
3541 dst_po;
3542
3543 src_po = src_offset - trunc_page_64(src_offset);
3544 dst_po = dst_offset - trunc_page_64(dst_offset);
3545
3546 if (dst_po > src_po) {
3547 part_size = PAGE_SIZE - dst_po;
3548 } else {
3549 part_size = PAGE_SIZE - src_po;
3550 }
3551 if (part_size > (amount_left)){
3552 part_size = amount_left;
3553 }
3554
3555 if (result_page == VM_PAGE_NULL) {
3556 vm_page_part_zero_fill(dst_page,
3557 dst_po, part_size);
3558 } else {
3559 vm_page_part_copy(result_page, src_po,
3560 dst_page, dst_po, part_size);
3561 if(!dst_page->dirty){
3562 vm_object_lock(dst_object);
3563 dst_page->dirty = TRUE;
3564 vm_object_unlock(dst_page->object);
3565 }
3566
3567 }
3568 } else {
3569 part_size = PAGE_SIZE;
3570
3571 if (result_page == VM_PAGE_NULL)
3572 vm_page_zero_fill(dst_page);
3573 else{
3574 vm_page_copy(result_page, dst_page);
3575 if(!dst_page->dirty){
3576 vm_object_lock(dst_object);
3577 dst_page->dirty = TRUE;
3578 vm_object_unlock(dst_page->object);
3579 }
3580 }
3581
3582 }
3583
3584 /*
3585 * Unlock everything, and return
3586 */
3587
3588 vm_map_verify_done(dst_map, dst_version);
3589
3590 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3591 vm_fault_copy_cleanup(result_page, src_top_page);
3592 vm_fault_copy_dst_cleanup(dst_page);
3593
3594 amount_left -= part_size;
3595 src_offset += part_size;
3596 dst_offset += part_size;
3597 } while (amount_left > 0);
3598
3599 RETURN(KERN_SUCCESS);
3600 #undef RETURN
3601
3602 /*NOTREACHED*/
3603 }
3604
3605 #ifdef notdef
3606
3607 /*
3608 * Routine: vm_fault_page_overwrite
3609 *
3610 * Description:
3611 * A form of vm_fault_page that assumes that the
3612 * resulting page will be overwritten in its entirety,
3613 * making it unnecessary to obtain the correct *contents*
3614 * of the page.
3615 *
3616 * Implementation:
3617 * XXX Untested. Also unused. Eventually, this technology
3618 * could be used in vm_fault_copy() to advantage.
3619 */
3620 vm_fault_return_t
3621 vm_fault_page_overwrite(
3622 register
3623 vm_object_t dst_object,
3624 vm_object_offset_t dst_offset,
3625 vm_page_t *result_page) /* OUT */
3626 {
3627 register
3628 vm_page_t dst_page;
3629 kern_return_t wait_result;
3630
3631 #define interruptible THREAD_UNINT /* XXX */
3632
3633 while (TRUE) {
3634 /*
3635 * Look for a page at this offset
3636 */
3637
3638 while ((dst_page = vm_page_lookup(dst_object, dst_offset))
3639 == VM_PAGE_NULL) {
3640 /*
3641 * No page, no problem... just allocate one.
3642 */
3643
3644 dst_page = vm_page_alloc(dst_object, dst_offset);
3645 if (dst_page == VM_PAGE_NULL) {
3646 vm_object_unlock(dst_object);
3647 VM_PAGE_WAIT();
3648 vm_object_lock(dst_object);
3649 continue;
3650 }
3651
3652 /*
3653 * Pretend that the memory manager
3654 * write-protected the page.
3655 *
3656 * Note that we will be asking for write
3657 * permission without asking for the data
3658 * first.
3659 */
3660
3661 dst_page->overwriting = TRUE;
3662 dst_page->page_lock = VM_PROT_WRITE;
3663 dst_page->absent = TRUE;
3664 dst_page->unusual = TRUE;
3665 dst_object->absent_count++;
3666
3667 break;
3668
3669 /*
3670 * When we bail out, we might have to throw
3671 * away the page created here.
3672 */
3673
3674 #define DISCARD_PAGE \
3675 MACRO_BEGIN \
3676 vm_object_lock(dst_object); \
3677 dst_page = vm_page_lookup(dst_object, dst_offset); \
3678 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3679 VM_PAGE_FREE(dst_page); \
3680 vm_object_unlock(dst_object); \
3681 MACRO_END
3682 }
3683
3684 /*
3685 * If the page is write-protected...
3686 */
3687
3688 if (dst_page->page_lock & VM_PROT_WRITE) {
3689 /*
3690 * ... and an unlock request hasn't been sent
3691 */
3692
3693 if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
3694 vm_prot_t u;
3695 kern_return_t rc;
3696
3697 /*
3698 * ... then send one now.
3699 */
3700
3701 if (!dst_object->pager_ready) {
3702 wait_result = vm_object_assert_wait(dst_object,
3703 VM_OBJECT_EVENT_PAGER_READY,
3704 interruptible);
3705 vm_object_unlock(dst_object);
3706 if (wait_result == THREAD_WAITING)
3707 wait_result = thread_block(THREAD_CONTINUE_NULL);
3708 if (wait_result != THREAD_AWAKENED) {
3709 DISCARD_PAGE;
3710 return(VM_FAULT_INTERRUPTED);
3711 }
3712 continue;
3713 }
3714
3715 u = dst_page->unlock_request |= VM_PROT_WRITE;
3716 vm_object_unlock(dst_object);
3717
3718 if ((rc = memory_object_data_unlock(
3719 dst_object->pager,
3720 dst_offset + dst_object->paging_offset,
3721 PAGE_SIZE,
3722 u)) != KERN_SUCCESS) {
3723 if (vm_fault_debug)
3724 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3725 DISCARD_PAGE;
3726 return((rc == MACH_SEND_INTERRUPTED) ?
3727 VM_FAULT_INTERRUPTED :
3728 VM_FAULT_MEMORY_ERROR);
3729 }
3730 vm_object_lock(dst_object);
3731 continue;
3732 }
3733
3734 /* ... fall through to wait below */
3735 } else {
3736 /*
3737 * If the page isn't being used for other
3738 * purposes, then we're done.
3739 */
3740 if ( ! (dst_page->busy || dst_page->absent ||
3741 dst_page->error || dst_page->restart) )
3742 break;
3743 }
3744
3745 wait_result = PAGE_ASSERT_WAIT(dst_page, interruptible);
3746 vm_object_unlock(dst_object);
3747 if (wait_result == THREAD_WAITING)
3748 wait_result = thread_block(THREAD_CONTINUE_NULL);
3749 if (wait_result != THREAD_AWAKENED) {
3750 DISCARD_PAGE;
3751 return(VM_FAULT_INTERRUPTED);
3752 }
3753 }
3754
3755 *result_page = dst_page;
3756 return(VM_FAULT_SUCCESS);
3757
3758 #undef interruptible
3759 #undef DISCARD_PAGE
3760 }
3761
3762 #endif /* notdef */
3763
3764 #if VM_FAULT_CLASSIFY
3765 /*
3766 * Temporary statistics gathering support.
3767 */
3768
3769 /*
3770 * Statistics arrays:
3771 */
3772 #define VM_FAULT_TYPES_MAX 5
3773 #define VM_FAULT_LEVEL_MAX 8
3774
3775 int vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
3776
3777 #define VM_FAULT_TYPE_ZERO_FILL 0
3778 #define VM_FAULT_TYPE_MAP_IN 1
3779 #define VM_FAULT_TYPE_PAGER 2
3780 #define VM_FAULT_TYPE_COPY 3
3781 #define VM_FAULT_TYPE_OTHER 4
3782
3783
3784 void
3785 vm_fault_classify(vm_object_t object,
3786 vm_object_offset_t offset,
3787 vm_prot_t fault_type)
3788 {
3789 int type, level = 0;
3790 vm_page_t m;
3791
3792 while (TRUE) {
3793 m = vm_page_lookup(object, offset);
3794 if (m != VM_PAGE_NULL) {
3795 if (m->busy || m->error || m->restart || m->absent ||
3796 fault_type & m->page_lock) {
3797 type = VM_FAULT_TYPE_OTHER;
3798 break;
3799 }
3800 if (((fault_type & VM_PROT_WRITE) == 0) ||
3801 ((level == 0) && object->copy == VM_OBJECT_NULL)) {
3802 type = VM_FAULT_TYPE_MAP_IN;
3803 break;
3804 }
3805 type = VM_FAULT_TYPE_COPY;
3806 break;
3807 }
3808 else {
3809 if (object->pager_created) {
3810 type = VM_FAULT_TYPE_PAGER;
3811 break;
3812 }
3813 if (object->shadow == VM_OBJECT_NULL) {
3814 type = VM_FAULT_TYPE_ZERO_FILL;
3815 break;
3816 }
3817
3818 offset += object->shadow_offset;
3819 object = object->shadow;
3820 level++;
3821 continue;
3822 }
3823 }
3824
3825 if (level > VM_FAULT_LEVEL_MAX)
3826 level = VM_FAULT_LEVEL_MAX;
3827
3828 vm_fault_stats[type][level] += 1;
3829
3830 return;
3831 }
3832
3833 /* cleanup routine to call from debugger */
3834
3835 void
3836 vm_fault_classify_init(void)
3837 {
3838 int type, level;
3839
3840 for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
3841 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
3842 vm_fault_stats[type][level] = 0;
3843 }
3844 }
3845
3846 return;
3847 }
3848 #endif /* VM_FAULT_CLASSIFY */