]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_fault.c
6d907567aa086701b7159967740863c08a6acd9e
[apple/xnu.git] / osfmk / vm / vm_fault.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_COPYRIGHT@
24 */
25 /*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50 /*
51 */
52 /*
53 * File: vm_fault.c
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
55 *
56 * Page fault handling module.
57 */
58 #ifdef MACH_BSD
59 /* remove after component interface available */
60 extern int vnode_pager_workaround;
61 extern int device_pager_workaround;
62 #endif
63
64 #include <mach_cluster_stats.h>
65 #include <mach_pagemap.h>
66 #include <mach_kdb.h>
67
68 #include <vm/vm_fault.h>
69 #include <mach/kern_return.h>
70 #include <mach/message.h> /* for error codes */
71 #include <kern/host_statistics.h>
72 #include <kern/counters.h>
73 #include <kern/task.h>
74 #include <kern/thread.h>
75 #include <kern/sched_prim.h>
76 #include <kern/host.h>
77 #include <kern/xpr.h>
78 #include <ppc/proc_reg.h>
79 #include <ppc/pmap_internals.h>
80 #include <vm/task_working_set.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_object.h>
83 #include <vm/vm_page.h>
84 #include <vm/pmap.h>
85 #include <vm/vm_pageout.h>
86 #include <mach/vm_param.h>
87 #include <mach/vm_behavior.h>
88 #include <mach/memory_object.h>
89 /* For memory_object_data_{request,unlock} */
90 #include <kern/mach_param.h>
91 #include <kern/macro_help.h>
92 #include <kern/zalloc.h>
93 #include <kern/misc_protos.h>
94
95 #include <sys/kdebug.h>
96
97 #define VM_FAULT_CLASSIFY 0
98 #define VM_FAULT_STATIC_CONFIG 1
99
100 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
101
102 int vm_object_absent_max = 50;
103
104 int vm_fault_debug = 0;
105 boolean_t vm_page_deactivate_behind = TRUE;
106
107
108 #if !VM_FAULT_STATIC_CONFIG
109 boolean_t vm_fault_dirty_handling = FALSE;
110 boolean_t vm_fault_interruptible = FALSE;
111 boolean_t software_reference_bits = TRUE;
112 #endif
113
114 #if MACH_KDB
115 extern struct db_watchpoint *db_watchpoint_list;
116 #endif /* MACH_KDB */
117
118 /* Forward declarations of internal routines. */
119 extern kern_return_t vm_fault_wire_fast(
120 vm_map_t map,
121 vm_offset_t va,
122 vm_map_entry_t entry,
123 pmap_t pmap,
124 vm_offset_t pmap_addr);
125
126 extern void vm_fault_continue(void);
127
128 extern void vm_fault_copy_cleanup(
129 vm_page_t page,
130 vm_page_t top_page);
131
132 extern void vm_fault_copy_dst_cleanup(
133 vm_page_t page);
134
135 #if VM_FAULT_CLASSIFY
136 extern void vm_fault_classify(vm_object_t object,
137 vm_object_offset_t offset,
138 vm_prot_t fault_type);
139
140 extern void vm_fault_classify_init(void);
141 #endif
142
143 /*
144 * Routine: vm_fault_init
145 * Purpose:
146 * Initialize our private data structures.
147 */
148 void
149 vm_fault_init(void)
150 {
151 }
152
153 /*
154 * Routine: vm_fault_cleanup
155 * Purpose:
156 * Clean up the result of vm_fault_page.
157 * Results:
158 * The paging reference for "object" is released.
159 * "object" is unlocked.
160 * If "top_page" is not null, "top_page" is
161 * freed and the paging reference for the object
162 * containing it is released.
163 *
164 * In/out conditions:
165 * "object" must be locked.
166 */
167 void
168 vm_fault_cleanup(
169 register vm_object_t object,
170 register vm_page_t top_page)
171 {
172 vm_object_paging_end(object);
173 vm_object_unlock(object);
174
175 if (top_page != VM_PAGE_NULL) {
176 object = top_page->object;
177 vm_object_lock(object);
178 VM_PAGE_FREE(top_page);
179 vm_object_paging_end(object);
180 vm_object_unlock(object);
181 }
182 }
183
184 #if MACH_CLUSTER_STATS
185 #define MAXCLUSTERPAGES 16
186 struct {
187 unsigned long pages_in_cluster;
188 unsigned long pages_at_higher_offsets;
189 unsigned long pages_at_lower_offsets;
190 } cluster_stats_in[MAXCLUSTERPAGES];
191 #define CLUSTER_STAT(clause) clause
192 #define CLUSTER_STAT_HIGHER(x) \
193 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
194 #define CLUSTER_STAT_LOWER(x) \
195 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
196 #define CLUSTER_STAT_CLUSTER(x) \
197 ((cluster_stats_in[(x)].pages_in_cluster)++)
198 #else /* MACH_CLUSTER_STATS */
199 #define CLUSTER_STAT(clause)
200 #endif /* MACH_CLUSTER_STATS */
201
202 /* XXX - temporary */
203 boolean_t vm_allow_clustered_pagein = FALSE;
204 int vm_pagein_cluster_used = 0;
205
206 /*
207 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
208 */
209 int vm_default_ahead = 1; /* Number of pages to prepage ahead */
210 int vm_default_behind = 0; /* Number of pages to prepage behind */
211
212 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
213
214 /*
215 * Routine: vm_fault_page
216 * Purpose:
217 * Find the resident page for the virtual memory
218 * specified by the given virtual memory object
219 * and offset.
220 * Additional arguments:
221 * The required permissions for the page is given
222 * in "fault_type". Desired permissions are included
223 * in "protection". The minimum and maximum valid offsets
224 * within the object for the relevant map entry are
225 * passed in "lo_offset" and "hi_offset" respectively and
226 * the expected page reference pattern is passed in "behavior".
227 * These three parameters are used to determine pagein cluster
228 * limits.
229 *
230 * If the desired page is known to be resident (for
231 * example, because it was previously wired down), asserting
232 * the "unwiring" parameter will speed the search.
233 *
234 * If the operation can be interrupted (by thread_abort
235 * or thread_terminate), then the "interruptible"
236 * parameter should be asserted.
237 *
238 * Results:
239 * The page containing the proper data is returned
240 * in "result_page".
241 *
242 * In/out conditions:
243 * The source object must be locked and referenced,
244 * and must donate one paging reference. The reference
245 * is not affected. The paging reference and lock are
246 * consumed.
247 *
248 * If the call succeeds, the object in which "result_page"
249 * resides is left locked and holding a paging reference.
250 * If this is not the original object, a busy page in the
251 * original object is returned in "top_page", to prevent other
252 * callers from pursuing this same data, along with a paging
253 * reference for the original object. The "top_page" should
254 * be destroyed when this guarantee is no longer required.
255 * The "result_page" is also left busy. It is not removed
256 * from the pageout queues.
257 */
258
259 vm_fault_return_t
260 vm_fault_page(
261 /* Arguments: */
262 vm_object_t first_object, /* Object to begin search */
263 vm_object_offset_t first_offset, /* Offset into object */
264 vm_prot_t fault_type, /* What access is requested */
265 boolean_t must_be_resident,/* Must page be resident? */
266 int interruptible, /* how may fault be interrupted? */
267 vm_object_offset_t lo_offset, /* Map entry start */
268 vm_object_offset_t hi_offset, /* Map entry end */
269 vm_behavior_t behavior, /* Page reference behavior */
270 /* Modifies in place: */
271 vm_prot_t *protection, /* Protection for mapping */
272 /* Returns: */
273 vm_page_t *result_page, /* Page found, if successful */
274 vm_page_t *top_page, /* Page in top object, if
275 * not result_page. */
276 int *type_of_fault, /* if non-null, fill in with type of fault
277 * COW, zero-fill, etc... returned in trace point */
278 /* More arguments: */
279 kern_return_t *error_code, /* code if page is in error */
280 boolean_t no_zero_fill, /* don't zero fill absent pages */
281 boolean_t data_supply, /* treat as data_supply if
282 * it is a write fault and a full
283 * page is provided */
284 vm_map_t map,
285 vm_offset_t vaddr)
286 {
287 register
288 vm_page_t m;
289 register
290 vm_object_t object;
291 register
292 vm_object_offset_t offset;
293 vm_page_t first_m;
294 vm_object_t next_object;
295 vm_object_t copy_object;
296 boolean_t look_for_page;
297 vm_prot_t access_required = fault_type;
298 vm_prot_t wants_copy_flag;
299 vm_size_t cluster_size, length;
300 vm_object_offset_t cluster_offset;
301 vm_object_offset_t cluster_start, cluster_end, paging_offset;
302 vm_object_offset_t align_offset;
303 CLUSTER_STAT(int pages_at_higher_offsets;)
304 CLUSTER_STAT(int pages_at_lower_offsets;)
305 kern_return_t wait_result;
306 boolean_t interruptible_state;
307 boolean_t bumped_pagein = FALSE;
308
309
310 #if MACH_PAGEMAP
311 /*
312 * MACH page map - an optional optimization where a bit map is maintained
313 * by the VM subsystem for internal objects to indicate which pages of
314 * the object currently reside on backing store. This existence map
315 * duplicates information maintained by the vnode pager. It is
316 * created at the time of the first pageout against the object, i.e.
317 * at the same time pager for the object is created. The optimization
318 * is designed to eliminate pager interaction overhead, if it is
319 * 'known' that the page does not exist on backing store.
320 *
321 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
322 * either marked as paged out in the existence map for the object or no
323 * existence map exists for the object. LOOK_FOR() is one of the
324 * criteria in the decision to invoke the pager. It is also used as one
325 * of the criteria to terminate the scan for adjacent pages in a clustered
326 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
327 * permanent objects. Note also that if the pager for an internal object
328 * has not been created, the pager is not invoked regardless of the value
329 * of LOOK_FOR() and that clustered pagein scans are only done on an object
330 * for which a pager has been created.
331 *
332 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
333 * is marked as paged out in the existence map for the object. PAGED_OUT()
334 * PAGED_OUT() is used to determine if a page has already been pushed
335 * into a copy object in order to avoid a redundant page out operation.
336 */
337 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
338 != VM_EXTERNAL_STATE_ABSENT)
339 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
340 == VM_EXTERNAL_STATE_EXISTS)
341 #else /* MACH_PAGEMAP */
342 /*
343 * If the MACH page map optimization is not enabled,
344 * LOOK_FOR() always evaluates to TRUE. The pager will always be
345 * invoked to resolve missing pages in an object, assuming the pager
346 * has been created for the object. In a clustered page operation, the
347 * absence of a page on backing backing store cannot be used to terminate
348 * a scan for adjacent pages since that information is available only in
349 * the pager. Hence pages that may not be paged out are potentially
350 * included in a clustered request. The vnode pager is coded to deal
351 * with any combination of absent/present pages in a clustered
352 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
353 * will always be invoked to push a dirty page into a copy object assuming
354 * a pager has been created. If the page has already been pushed, the
355 * pager will ingore the new request.
356 */
357 #define LOOK_FOR(o, f) TRUE
358 #define PAGED_OUT(o, f) FALSE
359 #endif /* MACH_PAGEMAP */
360
361 /*
362 * Recovery actions
363 */
364 #define PREPARE_RELEASE_PAGE(m) \
365 MACRO_BEGIN \
366 vm_page_lock_queues(); \
367 MACRO_END
368
369 #define DO_RELEASE_PAGE(m) \
370 MACRO_BEGIN \
371 PAGE_WAKEUP_DONE(m); \
372 if (!m->active && !m->inactive) \
373 vm_page_activate(m); \
374 vm_page_unlock_queues(); \
375 MACRO_END
376
377 #define RELEASE_PAGE(m) \
378 MACRO_BEGIN \
379 PREPARE_RELEASE_PAGE(m); \
380 DO_RELEASE_PAGE(m); \
381 MACRO_END
382
383 #if TRACEFAULTPAGE
384 dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
385 #endif
386
387
388
389 #if !VM_FAULT_STATIC_CONFIG
390 if (vm_fault_dirty_handling
391 #if MACH_KDB
392 /*
393 * If there are watchpoints set, then
394 * we don't want to give away write permission
395 * on a read fault. Make the task write fault,
396 * so that the watchpoint code notices the access.
397 */
398 || db_watchpoint_list
399 #endif /* MACH_KDB */
400 ) {
401 /*
402 * If we aren't asking for write permission,
403 * then don't give it away. We're using write
404 * faults to set the dirty bit.
405 */
406 if (!(fault_type & VM_PROT_WRITE))
407 *protection &= ~VM_PROT_WRITE;
408 }
409
410 if (!vm_fault_interruptible)
411 interruptible = THREAD_UNINT;
412 #else /* STATIC_CONFIG */
413 #if MACH_KDB
414 /*
415 * If there are watchpoints set, then
416 * we don't want to give away write permission
417 * on a read fault. Make the task write fault,
418 * so that the watchpoint code notices the access.
419 */
420 if (db_watchpoint_list) {
421 /*
422 * If we aren't asking for write permission,
423 * then don't give it away. We're using write
424 * faults to set the dirty bit.
425 */
426 if (!(fault_type & VM_PROT_WRITE))
427 *protection &= ~VM_PROT_WRITE;
428 }
429
430 #endif /* MACH_KDB */
431 #endif /* STATIC_CONFIG */
432
433 interruptible_state = thread_interrupt_level(interruptible);
434
435 /*
436 * INVARIANTS (through entire routine):
437 *
438 * 1) At all times, we must either have the object
439 * lock or a busy page in some object to prevent
440 * some other thread from trying to bring in
441 * the same page.
442 *
443 * Note that we cannot hold any locks during the
444 * pager access or when waiting for memory, so
445 * we use a busy page then.
446 *
447 * Note also that we aren't as concerned about more than
448 * one thread attempting to memory_object_data_unlock
449 * the same page at once, so we don't hold the page
450 * as busy then, but do record the highest unlock
451 * value so far. [Unlock requests may also be delivered
452 * out of order.]
453 *
454 * 2) To prevent another thread from racing us down the
455 * shadow chain and entering a new page in the top
456 * object before we do, we must keep a busy page in
457 * the top object while following the shadow chain.
458 *
459 * 3) We must increment paging_in_progress on any object
460 * for which we have a busy page
461 *
462 * 4) We leave busy pages on the pageout queues.
463 * If the pageout daemon comes across a busy page,
464 * it will remove the page from the pageout queues.
465 */
466
467 /*
468 * Search for the page at object/offset.
469 */
470
471 object = first_object;
472 offset = first_offset;
473 first_m = VM_PAGE_NULL;
474 access_required = fault_type;
475
476 XPR(XPR_VM_FAULT,
477 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
478 (integer_t)object, offset, fault_type, *protection, 0);
479
480 /*
481 * See whether this page is resident
482 */
483
484 while (TRUE) {
485 #if TRACEFAULTPAGE
486 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
487 #endif
488 if (!object->alive) {
489 vm_fault_cleanup(object, first_m);
490 thread_interrupt_level(interruptible_state);
491 return(VM_FAULT_MEMORY_ERROR);
492 }
493 m = vm_page_lookup(object, offset);
494 #if TRACEFAULTPAGE
495 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
496 #endif
497 if (m != VM_PAGE_NULL) {
498 /*
499 * If the page was pre-paged as part of a
500 * cluster, record the fact.
501 */
502 if (m->clustered) {
503 vm_pagein_cluster_used++;
504 m->clustered = FALSE;
505 }
506
507 /*
508 * If the page is being brought in,
509 * wait for it and then retry.
510 *
511 * A possible optimization: if the page
512 * is known to be resident, we can ignore
513 * pages that are absent (regardless of
514 * whether they're busy).
515 */
516
517 if (m->busy) {
518 #if TRACEFAULTPAGE
519 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
520 #endif
521 wait_result = PAGE_SLEEP(object, m, interruptible);
522 XPR(XPR_VM_FAULT,
523 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
524 (integer_t)object, offset,
525 (integer_t)m, 0, 0);
526 counter(c_vm_fault_page_block_busy_kernel++);
527
528 if (wait_result != THREAD_AWAKENED) {
529 vm_fault_cleanup(object, first_m);
530 thread_interrupt_level(interruptible_state);
531 if (wait_result == THREAD_RESTART)
532 {
533 return(VM_FAULT_RETRY);
534 }
535 else
536 {
537 return(VM_FAULT_INTERRUPTED);
538 }
539 }
540 continue;
541 }
542
543 /*
544 * If the page is in error, give up now.
545 */
546
547 if (m->error) {
548 #if TRACEFAULTPAGE
549 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code); /* (TEST/DEBUG) */
550 #endif
551 if (error_code)
552 *error_code = m->page_error;
553 VM_PAGE_FREE(m);
554 vm_fault_cleanup(object, first_m);
555 thread_interrupt_level(interruptible_state);
556 return(VM_FAULT_MEMORY_ERROR);
557 }
558
559 /*
560 * If the pager wants us to restart
561 * at the top of the chain,
562 * typically because it has moved the
563 * page to another pager, then do so.
564 */
565
566 if (m->restart) {
567 #if TRACEFAULTPAGE
568 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
569 #endif
570 VM_PAGE_FREE(m);
571 vm_fault_cleanup(object, first_m);
572 thread_interrupt_level(interruptible_state);
573 return(VM_FAULT_RETRY);
574 }
575
576 /*
577 * If the page isn't busy, but is absent,
578 * then it was deemed "unavailable".
579 */
580
581 if (m->absent) {
582 /*
583 * Remove the non-existent page (unless it's
584 * in the top object) and move on down to the
585 * next object (if there is one).
586 */
587 #if TRACEFAULTPAGE
588 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow); /* (TEST/DEBUG) */
589 #endif
590
591 next_object = object->shadow;
592 if (next_object == VM_OBJECT_NULL) {
593 vm_page_t real_m;
594
595 assert(!must_be_resident);
596
597 if (object->shadow_severed) {
598 vm_fault_cleanup(
599 object, first_m);
600 thread_interrupt_level(interruptible_state);
601 return VM_FAULT_MEMORY_ERROR;
602 }
603
604 /*
605 * Absent page at bottom of shadow
606 * chain; zero fill the page we left
607 * busy in the first object, and flush
608 * the absent page. But first we
609 * need to allocate a real page.
610 */
611 if (VM_PAGE_THROTTLED() ||
612 (real_m = vm_page_grab()) == VM_PAGE_NULL) {
613 vm_fault_cleanup(object, first_m);
614 thread_interrupt_level(interruptible_state);
615 return(VM_FAULT_MEMORY_SHORTAGE);
616 }
617
618 XPR(XPR_VM_FAULT,
619 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
620 (integer_t)object, offset,
621 (integer_t)m,
622 (integer_t)first_object, 0);
623 if (object != first_object) {
624 VM_PAGE_FREE(m);
625 vm_object_paging_end(object);
626 vm_object_unlock(object);
627 object = first_object;
628 offset = first_offset;
629 m = first_m;
630 first_m = VM_PAGE_NULL;
631 vm_object_lock(object);
632 }
633
634 VM_PAGE_FREE(m);
635 assert(real_m->busy);
636 vm_page_insert(real_m, object, offset);
637 m = real_m;
638
639 /*
640 * Drop the lock while zero filling
641 * page. Then break because this
642 * is the page we wanted. Checking
643 * the page lock is a waste of time;
644 * this page was either absent or
645 * newly allocated -- in both cases
646 * it can't be page locked by a pager.
647 */
648 m->no_isync = FALSE;
649
650 if (!no_zero_fill) {
651 vm_object_unlock(object);
652 vm_page_zero_fill(m);
653 if (type_of_fault)
654 *type_of_fault = DBG_ZERO_FILL_FAULT;
655 VM_STAT(zero_fill_count++);
656
657 if (bumped_pagein == TRUE) {
658 VM_STAT(pageins--);
659 current_task()->pageins--;
660 }
661 vm_object_lock(object);
662 }
663 pmap_clear_modify(m->phys_addr);
664 vm_page_lock_queues();
665 VM_PAGE_QUEUES_REMOVE(m);
666 m->page_ticket = vm_page_ticket;
667 if(m->object->size > 0x80000) {
668 m->zero_fill = TRUE;
669 /* depends on the queues lock */
670 vm_zf_count += 1;
671 queue_enter(&vm_page_queue_zf,
672 m, vm_page_t, pageq);
673 } else {
674 queue_enter(
675 &vm_page_queue_inactive,
676 m, vm_page_t, pageq);
677 }
678 vm_page_ticket_roll++;
679 if(vm_page_ticket_roll ==
680 VM_PAGE_TICKETS_IN_ROLL) {
681 vm_page_ticket_roll = 0;
682 if(vm_page_ticket ==
683 VM_PAGE_TICKET_ROLL_IDS)
684 vm_page_ticket= 0;
685 else
686 vm_page_ticket++;
687 }
688 m->inactive = TRUE;
689 vm_page_inactive_count++;
690 vm_page_unlock_queues();
691 break;
692 } else {
693 if (must_be_resident) {
694 vm_object_paging_end(object);
695 } else if (object != first_object) {
696 vm_object_paging_end(object);
697 VM_PAGE_FREE(m);
698 } else {
699 first_m = m;
700 m->absent = FALSE;
701 m->unusual = FALSE;
702 vm_object_absent_release(object);
703 m->busy = TRUE;
704
705 vm_page_lock_queues();
706 VM_PAGE_QUEUES_REMOVE(m);
707 vm_page_unlock_queues();
708 }
709 XPR(XPR_VM_FAULT,
710 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
711 (integer_t)object, offset,
712 (integer_t)next_object,
713 offset+object->shadow_offset,0);
714 offset += object->shadow_offset;
715 hi_offset += object->shadow_offset;
716 lo_offset += object->shadow_offset;
717 access_required = VM_PROT_READ;
718 vm_object_lock(next_object);
719 vm_object_unlock(object);
720 object = next_object;
721 vm_object_paging_begin(object);
722 continue;
723 }
724 }
725
726 if ((m->cleaning)
727 && ((object != first_object) ||
728 (object->copy != VM_OBJECT_NULL))
729 && (fault_type & VM_PROT_WRITE)) {
730 /*
731 * This is a copy-on-write fault that will
732 * cause us to revoke access to this page, but
733 * this page is in the process of being cleaned
734 * in a clustered pageout. We must wait until
735 * the cleaning operation completes before
736 * revoking access to the original page,
737 * otherwise we might attempt to remove a
738 * wired mapping.
739 */
740 #if TRACEFAULTPAGE
741 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset); /* (TEST/DEBUG) */
742 #endif
743 XPR(XPR_VM_FAULT,
744 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
745 (integer_t)object, offset,
746 (integer_t)m, 0, 0);
747 /* take an extra ref so that object won't die */
748 assert(object->ref_count > 0);
749 object->ref_count++;
750 vm_object_res_reference(object);
751 vm_fault_cleanup(object, first_m);
752 counter(c_vm_fault_page_block_backoff_kernel++);
753 vm_object_lock(object);
754 assert(object->ref_count > 0);
755 m = vm_page_lookup(object, offset);
756 if (m != VM_PAGE_NULL && m->cleaning) {
757 PAGE_ASSERT_WAIT(m, interruptible);
758 vm_object_unlock(object);
759 wait_result = thread_block(THREAD_CONTINUE_NULL);
760 vm_object_deallocate(object);
761 goto backoff;
762 } else {
763 vm_object_unlock(object);
764 vm_object_deallocate(object);
765 thread_interrupt_level(interruptible_state);
766 return VM_FAULT_RETRY;
767 }
768 }
769
770 /*
771 * If the desired access to this page has
772 * been locked out, request that it be unlocked.
773 */
774
775 if (access_required & m->page_lock) {
776 if ((access_required & m->unlock_request) != access_required) {
777 vm_prot_t new_unlock_request;
778 kern_return_t rc;
779
780 #if TRACEFAULTPAGE
781 dbgTrace(0xBEEF000A, (unsigned int) m, (unsigned int) object->pager_ready); /* (TEST/DEBUG) */
782 #endif
783 if (!object->pager_ready) {
784 XPR(XPR_VM_FAULT,
785 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
786 access_required,
787 (integer_t)object, offset,
788 (integer_t)m, 0);
789 /* take an extra ref */
790 assert(object->ref_count > 0);
791 object->ref_count++;
792 vm_object_res_reference(object);
793 vm_fault_cleanup(object,
794 first_m);
795 counter(c_vm_fault_page_block_backoff_kernel++);
796 vm_object_lock(object);
797 assert(object->ref_count > 0);
798 if (!object->pager_ready) {
799 wait_result = vm_object_assert_wait(
800 object,
801 VM_OBJECT_EVENT_PAGER_READY,
802 interruptible);
803 vm_object_unlock(object);
804 if (wait_result == THREAD_WAITING)
805 wait_result = thread_block(THREAD_CONTINUE_NULL);
806 vm_object_deallocate(object);
807 goto backoff;
808 } else {
809 vm_object_unlock(object);
810 vm_object_deallocate(object);
811 thread_interrupt_level(interruptible_state);
812 return VM_FAULT_RETRY;
813 }
814 }
815
816 new_unlock_request = m->unlock_request =
817 (access_required | m->unlock_request);
818 vm_object_unlock(object);
819 XPR(XPR_VM_FAULT,
820 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
821 (integer_t)object, offset,
822 (integer_t)m, new_unlock_request, 0);
823 if ((rc = memory_object_data_unlock(
824 object->pager,
825 offset + object->paging_offset,
826 PAGE_SIZE,
827 new_unlock_request))
828 != KERN_SUCCESS) {
829 if (vm_fault_debug)
830 printf("vm_fault: memory_object_data_unlock failed\n");
831 vm_object_lock(object);
832 vm_fault_cleanup(object, first_m);
833 thread_interrupt_level(interruptible_state);
834 return((rc == MACH_SEND_INTERRUPTED) ?
835 VM_FAULT_INTERRUPTED :
836 VM_FAULT_MEMORY_ERROR);
837 }
838 vm_object_lock(object);
839 continue;
840 }
841
842 XPR(XPR_VM_FAULT,
843 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
844 access_required, (integer_t)object,
845 offset, (integer_t)m, 0);
846 /* take an extra ref so object won't die */
847 assert(object->ref_count > 0);
848 object->ref_count++;
849 vm_object_res_reference(object);
850 vm_fault_cleanup(object, first_m);
851 counter(c_vm_fault_page_block_backoff_kernel++);
852 vm_object_lock(object);
853 assert(object->ref_count > 0);
854 m = vm_page_lookup(object, offset);
855 if (m != VM_PAGE_NULL &&
856 (access_required & m->page_lock) &&
857 !((access_required & m->unlock_request) != access_required)) {
858 PAGE_ASSERT_WAIT(m, interruptible);
859 vm_object_unlock(object);
860 wait_result = thread_block(THREAD_CONTINUE_NULL);
861 vm_object_deallocate(object);
862 goto backoff;
863 } else {
864 vm_object_unlock(object);
865 vm_object_deallocate(object);
866 thread_interrupt_level(interruptible_state);
867 return VM_FAULT_RETRY;
868 }
869 }
870 /*
871 * We mark the page busy and leave it on
872 * the pageout queues. If the pageout
873 * deamon comes across it, then it will
874 * remove the page.
875 */
876
877 #if TRACEFAULTPAGE
878 dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
879 #endif
880
881 #if !VM_FAULT_STATIC_CONFIG
882 if (!software_reference_bits) {
883 vm_page_lock_queues();
884 if (m->inactive)
885 vm_stat.reactivations++;
886
887 VM_PAGE_QUEUES_REMOVE(m);
888 vm_page_unlock_queues();
889 }
890 #endif
891 XPR(XPR_VM_FAULT,
892 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
893 (integer_t)object, offset, (integer_t)m, 0, 0);
894 assert(!m->busy);
895 m->busy = TRUE;
896 assert(!m->absent);
897 break;
898 }
899
900 look_for_page =
901 (object->pager_created) &&
902 LOOK_FOR(object, offset) &&
903 (!data_supply);
904
905 #if TRACEFAULTPAGE
906 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object); /* (TEST/DEBUG) */
907 #endif
908 if ((look_for_page || (object == first_object))
909 && !must_be_resident
910 && !(object->phys_contiguous)) {
911 /*
912 * Allocate a new page for this object/offset
913 * pair.
914 */
915
916 m = vm_page_grab_fictitious();
917 #if TRACEFAULTPAGE
918 dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
919 #endif
920 if (m == VM_PAGE_NULL) {
921 vm_fault_cleanup(object, first_m);
922 thread_interrupt_level(interruptible_state);
923 return(VM_FAULT_FICTITIOUS_SHORTAGE);
924 }
925 vm_page_insert(m, object, offset);
926 }
927
928 if ((look_for_page && !must_be_resident)) {
929 kern_return_t rc;
930
931 /*
932 * If the memory manager is not ready, we
933 * cannot make requests.
934 */
935 if (!object->pager_ready) {
936 #if TRACEFAULTPAGE
937 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
938 #endif
939 if(m != VM_PAGE_NULL)
940 VM_PAGE_FREE(m);
941 XPR(XPR_VM_FAULT,
942 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
943 (integer_t)object, offset, 0, 0, 0);
944 /* take an extra ref so object won't die */
945 assert(object->ref_count > 0);
946 object->ref_count++;
947 vm_object_res_reference(object);
948 vm_fault_cleanup(object, first_m);
949 counter(c_vm_fault_page_block_backoff_kernel++);
950 vm_object_lock(object);
951 assert(object->ref_count > 0);
952 if (!object->pager_ready) {
953 wait_result = vm_object_assert_wait(object,
954 VM_OBJECT_EVENT_PAGER_READY,
955 interruptible);
956 vm_object_unlock(object);
957 if (wait_result == THREAD_WAITING)
958 wait_result = thread_block(THREAD_CONTINUE_NULL);
959 vm_object_deallocate(object);
960 goto backoff;
961 } else {
962 vm_object_unlock(object);
963 vm_object_deallocate(object);
964 thread_interrupt_level(interruptible_state);
965 return VM_FAULT_RETRY;
966 }
967 }
968
969 if(object->phys_contiguous) {
970 if(m != VM_PAGE_NULL) {
971 VM_PAGE_FREE(m);
972 m = VM_PAGE_NULL;
973 }
974 goto no_clustering;
975 }
976 if (object->internal) {
977 /*
978 * Requests to the default pager
979 * must reserve a real page in advance,
980 * because the pager's data-provided
981 * won't block for pages. IMPORTANT:
982 * this acts as a throttling mechanism
983 * for data_requests to the default
984 * pager.
985 */
986
987 #if TRACEFAULTPAGE
988 dbgTrace(0xBEEF000F, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
989 #endif
990 if (m->fictitious && !vm_page_convert(m)) {
991 VM_PAGE_FREE(m);
992 vm_fault_cleanup(object, first_m);
993 thread_interrupt_level(interruptible_state);
994 return(VM_FAULT_MEMORY_SHORTAGE);
995 }
996 } else if (object->absent_count >
997 vm_object_absent_max) {
998 /*
999 * If there are too many outstanding page
1000 * requests pending on this object, we
1001 * wait for them to be resolved now.
1002 */
1003
1004 #if TRACEFAULTPAGE
1005 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1006 #endif
1007 if(m != VM_PAGE_NULL)
1008 VM_PAGE_FREE(m);
1009 /* take an extra ref so object won't die */
1010 assert(object->ref_count > 0);
1011 object->ref_count++;
1012 vm_object_res_reference(object);
1013 vm_fault_cleanup(object, first_m);
1014 counter(c_vm_fault_page_block_backoff_kernel++);
1015 vm_object_lock(object);
1016 assert(object->ref_count > 0);
1017 if (object->absent_count > vm_object_absent_max) {
1018 vm_object_absent_assert_wait(object,
1019 interruptible);
1020 vm_object_unlock(object);
1021 wait_result = thread_block(THREAD_CONTINUE_NULL);
1022 vm_object_deallocate(object);
1023 goto backoff;
1024 } else {
1025 vm_object_unlock(object);
1026 vm_object_deallocate(object);
1027 thread_interrupt_level(interruptible_state);
1028 return VM_FAULT_RETRY;
1029 }
1030 }
1031
1032 /*
1033 * Indicate that the page is waiting for data
1034 * from the memory manager.
1035 */
1036
1037 if(m != VM_PAGE_NULL) {
1038
1039 m->list_req_pending = TRUE;
1040 m->absent = TRUE;
1041 m->unusual = TRUE;
1042 object->absent_count++;
1043
1044 }
1045
1046 no_clustering:
1047 cluster_start = offset;
1048 length = PAGE_SIZE;
1049
1050 /*
1051 * lengthen the cluster by the pages in the working set
1052 */
1053 if((map != NULL) &&
1054 (current_task()->dynamic_working_set != 0)) {
1055 cluster_end = cluster_start + length;
1056 /* tws values for start and end are just a
1057 * suggestions. Therefore, as long as
1058 * build_cluster does not use pointers or
1059 * take action based on values that
1060 * could be affected by re-entrance we
1061 * do not need to take the map lock.
1062 */
1063 cluster_end = offset + PAGE_SIZE_64;
1064 tws_build_cluster((tws_hash_t)
1065 current_task()->dynamic_working_set,
1066 object, &cluster_start,
1067 &cluster_end, 0x40000);
1068 length = cluster_end - cluster_start;
1069 }
1070 #if TRACEFAULTPAGE
1071 dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0); /* (TEST/DEBUG) */
1072 #endif
1073 /*
1074 * We have a busy page, so we can
1075 * release the object lock.
1076 */
1077 vm_object_unlock(object);
1078
1079 /*
1080 * Call the memory manager to retrieve the data.
1081 */
1082
1083 if (type_of_fault)
1084 *type_of_fault = (length << 8) | DBG_PAGEIN_FAULT;
1085 VM_STAT(pageins++);
1086 current_task()->pageins++;
1087 bumped_pagein = TRUE;
1088
1089 /*
1090 * If this object uses a copy_call strategy,
1091 * and we are interested in a copy of this object
1092 * (having gotten here only by following a
1093 * shadow chain), then tell the memory manager
1094 * via a flag added to the desired_access
1095 * parameter, so that it can detect a race
1096 * between our walking down the shadow chain
1097 * and its pushing pages up into a copy of
1098 * the object that it manages.
1099 */
1100
1101 if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL &&
1102 object != first_object) {
1103 wants_copy_flag = VM_PROT_WANTS_COPY;
1104 } else {
1105 wants_copy_flag = VM_PROT_NONE;
1106 }
1107
1108 XPR(XPR_VM_FAULT,
1109 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1110 (integer_t)object, offset, (integer_t)m,
1111 access_required | wants_copy_flag, 0);
1112
1113 rc = memory_object_data_request(object->pager,
1114 cluster_start + object->paging_offset,
1115 length,
1116 access_required | wants_copy_flag);
1117
1118
1119 #if TRACEFAULTPAGE
1120 dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
1121 #endif
1122 if (rc != KERN_SUCCESS) {
1123 if (rc != MACH_SEND_INTERRUPTED
1124 && vm_fault_debug)
1125 printf("%s(0x%x, 0x%x, 0x%x, 0x%x) failed, rc=%d\n",
1126 "memory_object_data_request",
1127 object->pager,
1128 cluster_start + object->paging_offset,
1129 length, access_required, rc);
1130 /*
1131 * Don't want to leave a busy page around,
1132 * but the data request may have blocked,
1133 * so check if it's still there and busy.
1134 */
1135 if(!object->phys_contiguous) {
1136 vm_object_lock(object);
1137 for (; length; length -= PAGE_SIZE,
1138 cluster_start += PAGE_SIZE_64) {
1139 vm_page_t p;
1140 if ((p = vm_page_lookup(object,
1141 cluster_start))
1142 && p->absent && p->busy
1143 && p != first_m) {
1144 VM_PAGE_FREE(p);
1145 }
1146 }
1147 }
1148 vm_fault_cleanup(object, first_m);
1149 thread_interrupt_level(interruptible_state);
1150 return((rc == MACH_SEND_INTERRUPTED) ?
1151 VM_FAULT_INTERRUPTED :
1152 VM_FAULT_MEMORY_ERROR);
1153 } else {
1154 #ifdef notdefcdy
1155 tws_hash_line_t line;
1156 task_t task;
1157
1158 task = current_task();
1159
1160 if((map != NULL) &&
1161 (task->dynamic_working_set != 0))
1162 && !(object->private)) {
1163 vm_object_t base_object;
1164 vm_object_offset_t base_offset;
1165 base_object = object;
1166 base_offset = offset;
1167 while(base_object->shadow) {
1168 base_offset +=
1169 base_object->shadow_offset;
1170 base_object =
1171 base_object->shadow;
1172 }
1173 if(tws_lookup
1174 ((tws_hash_t)
1175 task->dynamic_working_set,
1176 base_offset, base_object,
1177 &line) == KERN_SUCCESS) {
1178 tws_line_signal((tws_hash_t)
1179 task->dynamic_working_set,
1180 map, line, vaddr);
1181 }
1182 }
1183 #endif
1184 }
1185
1186 /*
1187 * Retry with same object/offset, since new data may
1188 * be in a different page (i.e., m is meaningless at
1189 * this point).
1190 */
1191 vm_object_lock(object);
1192 if ((interruptible != THREAD_UNINT) &&
1193 (current_thread()->state & TH_ABORT)) {
1194 vm_fault_cleanup(object, first_m);
1195 thread_interrupt_level(interruptible_state);
1196 return(VM_FAULT_INTERRUPTED);
1197 }
1198 if(m == VM_PAGE_NULL)
1199 break;
1200 continue;
1201 }
1202
1203 /*
1204 * The only case in which we get here is if
1205 * object has no pager (or unwiring). If the pager doesn't
1206 * have the page this is handled in the m->absent case above
1207 * (and if you change things here you should look above).
1208 */
1209 #if TRACEFAULTPAGE
1210 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1211 #endif
1212 if (object == first_object)
1213 first_m = m;
1214 else
1215 assert(m == VM_PAGE_NULL);
1216
1217 XPR(XPR_VM_FAULT,
1218 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1219 (integer_t)object, offset, (integer_t)m,
1220 (integer_t)object->shadow, 0);
1221 /*
1222 * Move on to the next object. Lock the next
1223 * object before unlocking the current one.
1224 */
1225 next_object = object->shadow;
1226 if (next_object == VM_OBJECT_NULL) {
1227 assert(!must_be_resident);
1228 /*
1229 * If there's no object left, fill the page
1230 * in the top object with zeros. But first we
1231 * need to allocate a real page.
1232 */
1233
1234 if (object != first_object) {
1235 vm_object_paging_end(object);
1236 vm_object_unlock(object);
1237
1238 object = first_object;
1239 offset = first_offset;
1240 vm_object_lock(object);
1241 }
1242
1243 m = first_m;
1244 assert(m->object == object);
1245 first_m = VM_PAGE_NULL;
1246
1247 if (object->shadow_severed) {
1248 VM_PAGE_FREE(m);
1249 vm_fault_cleanup(object, VM_PAGE_NULL);
1250 thread_interrupt_level(interruptible_state);
1251 return VM_FAULT_MEMORY_ERROR;
1252 }
1253
1254 if (VM_PAGE_THROTTLED() ||
1255 (m->fictitious && !vm_page_convert(m))) {
1256 VM_PAGE_FREE(m);
1257 vm_fault_cleanup(object, VM_PAGE_NULL);
1258 thread_interrupt_level(interruptible_state);
1259 return(VM_FAULT_MEMORY_SHORTAGE);
1260 }
1261 m->no_isync = FALSE;
1262
1263 if (!no_zero_fill) {
1264 vm_object_unlock(object);
1265 vm_page_zero_fill(m);
1266 if (type_of_fault)
1267 *type_of_fault = DBG_ZERO_FILL_FAULT;
1268 VM_STAT(zero_fill_count++);
1269
1270 if (bumped_pagein == TRUE) {
1271 VM_STAT(pageins--);
1272 current_task()->pageins--;
1273 }
1274 vm_object_lock(object);
1275 }
1276 vm_page_lock_queues();
1277 VM_PAGE_QUEUES_REMOVE(m);
1278 if(m->object->size > 0x80000) {
1279 m->zero_fill = TRUE;
1280 /* depends on the queues lock */
1281 vm_zf_count += 1;
1282 queue_enter(&vm_page_queue_zf,
1283 m, vm_page_t, pageq);
1284 } else {
1285 queue_enter(
1286 &vm_page_queue_inactive,
1287 m, vm_page_t, pageq);
1288 }
1289 m->page_ticket = vm_page_ticket;
1290 vm_page_ticket_roll++;
1291 if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
1292 vm_page_ticket_roll = 0;
1293 if(vm_page_ticket ==
1294 VM_PAGE_TICKET_ROLL_IDS)
1295 vm_page_ticket= 0;
1296 else
1297 vm_page_ticket++;
1298 }
1299 m->inactive = TRUE;
1300 vm_page_inactive_count++;
1301 vm_page_unlock_queues();
1302 pmap_clear_modify(m->phys_addr);
1303 break;
1304 }
1305 else {
1306 if ((object != first_object) || must_be_resident)
1307 vm_object_paging_end(object);
1308 offset += object->shadow_offset;
1309 hi_offset += object->shadow_offset;
1310 lo_offset += object->shadow_offset;
1311 access_required = VM_PROT_READ;
1312 vm_object_lock(next_object);
1313 vm_object_unlock(object);
1314 object = next_object;
1315 vm_object_paging_begin(object);
1316 }
1317 }
1318
1319 /*
1320 * PAGE HAS BEEN FOUND.
1321 *
1322 * This page (m) is:
1323 * busy, so that we can play with it;
1324 * not absent, so that nobody else will fill it;
1325 * possibly eligible for pageout;
1326 *
1327 * The top-level page (first_m) is:
1328 * VM_PAGE_NULL if the page was found in the
1329 * top-level object;
1330 * busy, not absent, and ineligible for pageout.
1331 *
1332 * The current object (object) is locked. A paging
1333 * reference is held for the current and top-level
1334 * objects.
1335 */
1336
1337 #if TRACEFAULTPAGE
1338 dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1339 #endif
1340 #if EXTRA_ASSERTIONS
1341 if(m != VM_PAGE_NULL) {
1342 assert(m->busy && !m->absent);
1343 assert((first_m == VM_PAGE_NULL) ||
1344 (first_m->busy && !first_m->absent &&
1345 !first_m->active && !first_m->inactive));
1346 }
1347 #endif /* EXTRA_ASSERTIONS */
1348
1349 XPR(XPR_VM_FAULT,
1350 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1351 (integer_t)object, offset, (integer_t)m,
1352 (integer_t)first_object, (integer_t)first_m);
1353 /*
1354 * If the page is being written, but isn't
1355 * already owned by the top-level object,
1356 * we have to copy it into a new page owned
1357 * by the top-level object.
1358 */
1359
1360 if ((object != first_object) && (m != VM_PAGE_NULL)) {
1361 /*
1362 * We only really need to copy if we
1363 * want to write it.
1364 */
1365
1366 #if TRACEFAULTPAGE
1367 dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1368 #endif
1369 if (fault_type & VM_PROT_WRITE) {
1370 vm_page_t copy_m;
1371
1372 assert(!must_be_resident);
1373
1374 /*
1375 * If we try to collapse first_object at this
1376 * point, we may deadlock when we try to get
1377 * the lock on an intermediate object (since we
1378 * have the bottom object locked). We can't
1379 * unlock the bottom object, because the page
1380 * we found may move (by collapse) if we do.
1381 *
1382 * Instead, we first copy the page. Then, when
1383 * we have no more use for the bottom object,
1384 * we unlock it and try to collapse.
1385 *
1386 * Note that we copy the page even if we didn't
1387 * need to... that's the breaks.
1388 */
1389
1390 /*
1391 * Allocate a page for the copy
1392 */
1393 copy_m = vm_page_grab();
1394 if (copy_m == VM_PAGE_NULL) {
1395 RELEASE_PAGE(m);
1396 vm_fault_cleanup(object, first_m);
1397 thread_interrupt_level(interruptible_state);
1398 return(VM_FAULT_MEMORY_SHORTAGE);
1399 }
1400
1401
1402 XPR(XPR_VM_FAULT,
1403 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1404 (integer_t)object, offset,
1405 (integer_t)m, (integer_t)copy_m, 0);
1406 vm_page_copy(m, copy_m);
1407
1408 /*
1409 * If another map is truly sharing this
1410 * page with us, we have to flush all
1411 * uses of the original page, since we
1412 * can't distinguish those which want the
1413 * original from those which need the
1414 * new copy.
1415 *
1416 * XXXO If we know that only one map has
1417 * access to this page, then we could
1418 * avoid the pmap_page_protect() call.
1419 */
1420
1421 vm_page_lock_queues();
1422 assert(!m->cleaning);
1423 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
1424 vm_page_deactivate(m);
1425 copy_m->dirty = TRUE;
1426 /*
1427 * Setting reference here prevents this fault from
1428 * being counted as a (per-thread) reactivate as well
1429 * as a copy-on-write.
1430 */
1431 first_m->reference = TRUE;
1432 vm_page_unlock_queues();
1433
1434 /*
1435 * We no longer need the old page or object.
1436 */
1437
1438 PAGE_WAKEUP_DONE(m);
1439 vm_object_paging_end(object);
1440 vm_object_unlock(object);
1441
1442 if (type_of_fault)
1443 *type_of_fault = DBG_COW_FAULT;
1444 VM_STAT(cow_faults++);
1445 current_task()->cow_faults++;
1446 object = first_object;
1447 offset = first_offset;
1448
1449 vm_object_lock(object);
1450 VM_PAGE_FREE(first_m);
1451 first_m = VM_PAGE_NULL;
1452 assert(copy_m->busy);
1453 vm_page_insert(copy_m, object, offset);
1454 m = copy_m;
1455
1456 /*
1457 * Now that we've gotten the copy out of the
1458 * way, let's try to collapse the top object.
1459 * But we have to play ugly games with
1460 * paging_in_progress to do that...
1461 */
1462
1463 vm_object_paging_end(object);
1464 vm_object_collapse(object);
1465 vm_object_paging_begin(object);
1466
1467 }
1468 else {
1469 *protection &= (~VM_PROT_WRITE);
1470 }
1471 }
1472
1473 /*
1474 * Now check whether the page needs to be pushed into the
1475 * copy object. The use of asymmetric copy on write for
1476 * shared temporary objects means that we may do two copies to
1477 * satisfy the fault; one above to get the page from a
1478 * shadowed object, and one here to push it into the copy.
1479 */
1480
1481 while ((copy_object = first_object->copy) != VM_OBJECT_NULL &&
1482 (m!= VM_PAGE_NULL)) {
1483 vm_object_offset_t copy_offset;
1484 vm_page_t copy_m;
1485
1486 #if TRACEFAULTPAGE
1487 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1488 #endif
1489 /*
1490 * If the page is being written, but hasn't been
1491 * copied to the copy-object, we have to copy it there.
1492 */
1493
1494 if ((fault_type & VM_PROT_WRITE) == 0) {
1495 *protection &= ~VM_PROT_WRITE;
1496 break;
1497 }
1498
1499 /*
1500 * If the page was guaranteed to be resident,
1501 * we must have already performed the copy.
1502 */
1503
1504 if (must_be_resident)
1505 break;
1506
1507 /*
1508 * Try to get the lock on the copy_object.
1509 */
1510 if (!vm_object_lock_try(copy_object)) {
1511 vm_object_unlock(object);
1512
1513 mutex_pause(); /* wait a bit */
1514
1515 vm_object_lock(object);
1516 continue;
1517 }
1518
1519 /*
1520 * Make another reference to the copy-object,
1521 * to keep it from disappearing during the
1522 * copy.
1523 */
1524 assert(copy_object->ref_count > 0);
1525 copy_object->ref_count++;
1526 VM_OBJ_RES_INCR(copy_object);
1527
1528 /*
1529 * Does the page exist in the copy?
1530 */
1531 copy_offset = first_offset - copy_object->shadow_offset;
1532 if (copy_object->size <= copy_offset)
1533 /*
1534 * Copy object doesn't cover this page -- do nothing.
1535 */
1536 ;
1537 else if ((copy_m =
1538 vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
1539 /* Page currently exists in the copy object */
1540 if (copy_m->busy) {
1541 /*
1542 * If the page is being brought
1543 * in, wait for it and then retry.
1544 */
1545 RELEASE_PAGE(m);
1546 /* take an extra ref so object won't die */
1547 assert(copy_object->ref_count > 0);
1548 copy_object->ref_count++;
1549 vm_object_res_reference(copy_object);
1550 vm_object_unlock(copy_object);
1551 vm_fault_cleanup(object, first_m);
1552 counter(c_vm_fault_page_block_backoff_kernel++);
1553 vm_object_lock(copy_object);
1554 assert(copy_object->ref_count > 0);
1555 VM_OBJ_RES_DECR(copy_object);
1556 copy_object->ref_count--;
1557 assert(copy_object->ref_count > 0);
1558 copy_m = vm_page_lookup(copy_object, copy_offset);
1559 if (copy_m != VM_PAGE_NULL && copy_m->busy) {
1560 PAGE_ASSERT_WAIT(copy_m, interruptible);
1561 vm_object_unlock(copy_object);
1562 wait_result = thread_block(THREAD_CONTINUE_NULL);
1563 vm_object_deallocate(copy_object);
1564 goto backoff;
1565 } else {
1566 vm_object_unlock(copy_object);
1567 vm_object_deallocate(copy_object);
1568 thread_interrupt_level(interruptible_state);
1569 return VM_FAULT_RETRY;
1570 }
1571 }
1572 }
1573 else if (!PAGED_OUT(copy_object, copy_offset)) {
1574 /*
1575 * If PAGED_OUT is TRUE, then the page used to exist
1576 * in the copy-object, and has already been paged out.
1577 * We don't need to repeat this. If PAGED_OUT is
1578 * FALSE, then either we don't know (!pager_created,
1579 * for example) or it hasn't been paged out.
1580 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1581 * We must copy the page to the copy object.
1582 */
1583
1584 /*
1585 * Allocate a page for the copy
1586 */
1587 copy_m = vm_page_alloc(copy_object, copy_offset);
1588 if (copy_m == VM_PAGE_NULL) {
1589 RELEASE_PAGE(m);
1590 VM_OBJ_RES_DECR(copy_object);
1591 copy_object->ref_count--;
1592 assert(copy_object->ref_count > 0);
1593 vm_object_unlock(copy_object);
1594 vm_fault_cleanup(object, first_m);
1595 thread_interrupt_level(interruptible_state);
1596 return(VM_FAULT_MEMORY_SHORTAGE);
1597 }
1598
1599 /*
1600 * Must copy page into copy-object.
1601 */
1602
1603 vm_page_copy(m, copy_m);
1604
1605 /*
1606 * If the old page was in use by any users
1607 * of the copy-object, it must be removed
1608 * from all pmaps. (We can't know which
1609 * pmaps use it.)
1610 */
1611
1612 vm_page_lock_queues();
1613 assert(!m->cleaning);
1614 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
1615 copy_m->dirty = TRUE;
1616 vm_page_unlock_queues();
1617
1618 /*
1619 * If there's a pager, then immediately
1620 * page out this page, using the "initialize"
1621 * option. Else, we use the copy.
1622 */
1623
1624 if
1625 #if MACH_PAGEMAP
1626 ((!copy_object->pager_created) ||
1627 vm_external_state_get(
1628 copy_object->existence_map, copy_offset)
1629 == VM_EXTERNAL_STATE_ABSENT)
1630 #else
1631 (!copy_object->pager_created)
1632 #endif
1633 {
1634 vm_page_lock_queues();
1635 vm_page_activate(copy_m);
1636 vm_page_unlock_queues();
1637 PAGE_WAKEUP_DONE(copy_m);
1638 }
1639 else {
1640 assert(copy_m->busy == TRUE);
1641
1642 /*
1643 * The page is already ready for pageout:
1644 * not on pageout queues and busy.
1645 * Unlock everything except the
1646 * copy_object itself.
1647 */
1648
1649 vm_object_unlock(object);
1650
1651 /*
1652 * Write the page to the copy-object,
1653 * flushing it from the kernel.
1654 */
1655
1656 vm_pageout_initialize_page(copy_m);
1657
1658 /*
1659 * Since the pageout may have
1660 * temporarily dropped the
1661 * copy_object's lock, we
1662 * check whether we'll have
1663 * to deallocate the hard way.
1664 */
1665
1666 if ((copy_object->shadow != object) ||
1667 (copy_object->ref_count == 1)) {
1668 vm_object_unlock(copy_object);
1669 vm_object_deallocate(copy_object);
1670 vm_object_lock(object);
1671 continue;
1672 }
1673
1674 /*
1675 * Pick back up the old object's
1676 * lock. [It is safe to do so,
1677 * since it must be deeper in the
1678 * object tree.]
1679 */
1680
1681 vm_object_lock(object);
1682 }
1683
1684 /*
1685 * Because we're pushing a page upward
1686 * in the object tree, we must restart
1687 * any faults that are waiting here.
1688 * [Note that this is an expansion of
1689 * PAGE_WAKEUP that uses the THREAD_RESTART
1690 * wait result]. Can't turn off the page's
1691 * busy bit because we're not done with it.
1692 */
1693
1694 if (m->wanted) {
1695 m->wanted = FALSE;
1696 thread_wakeup_with_result((event_t) m,
1697 THREAD_RESTART);
1698 }
1699 }
1700
1701 /*
1702 * The reference count on copy_object must be
1703 * at least 2: one for our extra reference,
1704 * and at least one from the outside world
1705 * (we checked that when we last locked
1706 * copy_object).
1707 */
1708 copy_object->ref_count--;
1709 assert(copy_object->ref_count > 0);
1710 VM_OBJ_RES_DECR(copy_object);
1711 vm_object_unlock(copy_object);
1712
1713 break;
1714 }
1715
1716 *result_page = m;
1717 *top_page = first_m;
1718
1719 XPR(XPR_VM_FAULT,
1720 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1721 (integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
1722 /*
1723 * If the page can be written, assume that it will be.
1724 * [Earlier, we restrict the permission to allow write
1725 * access only if the fault so required, so we don't
1726 * mark read-only data as dirty.]
1727 */
1728
1729 #if !VM_FAULT_STATIC_CONFIG
1730 if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE) &&
1731 (m != VM_PAGE_NULL)) {
1732 m->dirty = TRUE;
1733 }
1734 #endif
1735 #if TRACEFAULTPAGE
1736 dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_page_deactivate_behind); /* (TEST/DEBUG) */
1737 #endif
1738 if (vm_page_deactivate_behind) {
1739 if (offset && /* don't underflow */
1740 (object->last_alloc == (offset - PAGE_SIZE_64))) {
1741 m = vm_page_lookup(object, object->last_alloc);
1742 if ((m != VM_PAGE_NULL) && !m->busy) {
1743 vm_page_lock_queues();
1744 vm_page_deactivate(m);
1745 vm_page_unlock_queues();
1746 }
1747 #if TRACEFAULTPAGE
1748 dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1749 #endif
1750 }
1751 object->last_alloc = offset;
1752 }
1753 #if TRACEFAULTPAGE
1754 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0); /* (TEST/DEBUG) */
1755 #endif
1756 thread_interrupt_level(interruptible_state);
1757 if(*result_page == VM_PAGE_NULL) {
1758 vm_object_unlock(object);
1759 }
1760 return(VM_FAULT_SUCCESS);
1761
1762 #if 0
1763 block_and_backoff:
1764 vm_fault_cleanup(object, first_m);
1765
1766 counter(c_vm_fault_page_block_backoff_kernel++);
1767 thread_block(THREAD_CONTINUE_NULL);
1768 #endif
1769
1770 backoff:
1771 thread_interrupt_level(interruptible_state);
1772 if (wait_result == THREAD_INTERRUPTED)
1773 return VM_FAULT_INTERRUPTED;
1774 return VM_FAULT_RETRY;
1775
1776 #undef RELEASE_PAGE
1777 }
1778
1779 /*
1780 * Routine: vm_fault
1781 * Purpose:
1782 * Handle page faults, including pseudo-faults
1783 * used to change the wiring status of pages.
1784 * Returns:
1785 * Explicit continuations have been removed.
1786 * Implementation:
1787 * vm_fault and vm_fault_page save mucho state
1788 * in the moral equivalent of a closure. The state
1789 * structure is allocated when first entering vm_fault
1790 * and deallocated when leaving vm_fault.
1791 */
1792
1793 kern_return_t
1794 vm_fault(
1795 vm_map_t map,
1796 vm_offset_t vaddr,
1797 vm_prot_t fault_type,
1798 boolean_t change_wiring,
1799 int interruptible,
1800 pmap_t caller_pmap,
1801 vm_offset_t caller_pmap_addr)
1802 {
1803 vm_map_version_t version; /* Map version for verificiation */
1804 boolean_t wired; /* Should mapping be wired down? */
1805 vm_object_t object; /* Top-level object */
1806 vm_object_offset_t offset; /* Top-level offset */
1807 vm_prot_t prot; /* Protection for mapping */
1808 vm_behavior_t behavior; /* Expected paging behavior */
1809 vm_object_offset_t lo_offset, hi_offset;
1810 vm_object_t old_copy_object; /* Saved copy object */
1811 vm_page_t result_page; /* Result of vm_fault_page */
1812 vm_page_t top_page; /* Placeholder page */
1813 kern_return_t kr;
1814
1815 register
1816 vm_page_t m; /* Fast access to result_page */
1817 kern_return_t error_code; /* page error reasons */
1818 register
1819 vm_object_t cur_object;
1820 register
1821 vm_object_offset_t cur_offset;
1822 vm_page_t cur_m;
1823 vm_object_t new_object;
1824 int type_of_fault;
1825 vm_map_t pmap_map = map;
1826 vm_map_t original_map = map;
1827 pmap_t pmap = NULL;
1828 boolean_t funnel_set = FALSE;
1829 funnel_t *curflock;
1830 thread_t cur_thread;
1831 boolean_t interruptible_state;
1832 unsigned int cache_attr;
1833 int write_startup_file = 0;
1834 vm_prot_t full_fault_type;
1835
1836
1837 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_START,
1838 vaddr,
1839 0,
1840 0,
1841 0,
1842 0);
1843
1844 cur_thread = current_thread();
1845 /* at present we do not fully check for execute permission */
1846 /* we generally treat it is read except in certain device */
1847 /* memory settings */
1848 full_fault_type = fault_type;
1849 if(fault_type & VM_PROT_EXECUTE) {
1850 fault_type &= ~VM_PROT_EXECUTE;
1851 fault_type |= VM_PROT_READ;
1852 }
1853
1854 interruptible_state = thread_interrupt_level(interruptible);
1855
1856 /*
1857 * assume we will hit a page in the cache
1858 * otherwise, explicitly override with
1859 * the real fault type once we determine it
1860 */
1861 type_of_fault = DBG_CACHE_HIT_FAULT;
1862
1863 VM_STAT(faults++);
1864 current_task()->faults++;
1865
1866 /*
1867 * drop funnel if it is already held. Then restore while returning
1868 */
1869 if ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED) {
1870 funnel_set = TRUE;
1871 curflock = cur_thread->funnel_lock;
1872 thread_funnel_set( curflock , FALSE);
1873 }
1874
1875 RetryFault: ;
1876
1877 /*
1878 * Find the backing store object and offset into
1879 * it to begin the search.
1880 */
1881 map = original_map;
1882 vm_map_lock_read(map);
1883 kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version,
1884 &object, &offset,
1885 &prot, &wired,
1886 &behavior, &lo_offset, &hi_offset, &pmap_map);
1887
1888 pmap = pmap_map->pmap;
1889
1890 if (kr != KERN_SUCCESS) {
1891 vm_map_unlock_read(map);
1892 goto done;
1893 }
1894
1895 /*
1896 * If the page is wired, we must fault for the current protection
1897 * value, to avoid further faults.
1898 */
1899
1900 if (wired)
1901 fault_type = prot | VM_PROT_WRITE;
1902
1903 #if VM_FAULT_CLASSIFY
1904 /*
1905 * Temporary data gathering code
1906 */
1907 vm_fault_classify(object, offset, fault_type);
1908 #endif
1909 /*
1910 * Fast fault code. The basic idea is to do as much as
1911 * possible while holding the map lock and object locks.
1912 * Busy pages are not used until the object lock has to
1913 * be dropped to do something (copy, zero fill, pmap enter).
1914 * Similarly, paging references aren't acquired until that
1915 * point, and object references aren't used.
1916 *
1917 * If we can figure out what to do
1918 * (zero fill, copy on write, pmap enter) while holding
1919 * the locks, then it gets done. Otherwise, we give up,
1920 * and use the original fault path (which doesn't hold
1921 * the map lock, and relies on busy pages).
1922 * The give up cases include:
1923 * - Have to talk to pager.
1924 * - Page is busy, absent or in error.
1925 * - Pager has locked out desired access.
1926 * - Fault needs to be restarted.
1927 * - Have to push page into copy object.
1928 *
1929 * The code is an infinite loop that moves one level down
1930 * the shadow chain each time. cur_object and cur_offset
1931 * refer to the current object being examined. object and offset
1932 * are the original object from the map. The loop is at the
1933 * top level if and only if object and cur_object are the same.
1934 *
1935 * Invariants: Map lock is held throughout. Lock is held on
1936 * original object and cur_object (if different) when
1937 * continuing or exiting loop.
1938 *
1939 */
1940
1941
1942 /*
1943 * If this page is to be inserted in a copy delay object
1944 * for writing, and if the object has a copy, then the
1945 * copy delay strategy is implemented in the slow fault page.
1946 */
1947 if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY ||
1948 object->copy == VM_OBJECT_NULL ||
1949 (fault_type & VM_PROT_WRITE) == 0) {
1950 cur_object = object;
1951 cur_offset = offset;
1952
1953 while (TRUE) {
1954 m = vm_page_lookup(cur_object, cur_offset);
1955 if (m != VM_PAGE_NULL) {
1956 if (m->busy)
1957 break;
1958
1959 if (m->unusual && (m->error || m->restart || m->private
1960 || m->absent || (fault_type & m->page_lock))) {
1961
1962 /*
1963 * Unusual case. Give up.
1964 */
1965 break;
1966 }
1967
1968 /*
1969 * Two cases of map in faults:
1970 * - At top level w/o copy object.
1971 * - Read fault anywhere.
1972 * --> must disallow write.
1973 */
1974
1975 if (object == cur_object &&
1976 object->copy == VM_OBJECT_NULL)
1977 goto FastMapInFault;
1978
1979 if ((fault_type & VM_PROT_WRITE) == 0) {
1980
1981 prot &= ~VM_PROT_WRITE;
1982
1983 /*
1984 * Set up to map the page ...
1985 * mark the page busy, drop
1986 * locks and take a paging reference
1987 * on the object with the page.
1988 */
1989
1990 if (object != cur_object) {
1991 vm_object_unlock(object);
1992 object = cur_object;
1993 }
1994 FastMapInFault:
1995 m->busy = TRUE;
1996
1997 vm_object_paging_begin(object);
1998 vm_object_unlock(object);
1999
2000 FastPmapEnter:
2001 /*
2002 * Check a couple of global reasons to
2003 * be conservative about write access.
2004 * Then do the pmap_enter.
2005 */
2006 #if !VM_FAULT_STATIC_CONFIG
2007 if (vm_fault_dirty_handling
2008 #if MACH_KDB
2009 || db_watchpoint_list
2010 #endif
2011 && (fault_type & VM_PROT_WRITE) == 0)
2012 prot &= ~VM_PROT_WRITE;
2013 #else /* STATIC_CONFIG */
2014 #if MACH_KDB
2015 if (db_watchpoint_list
2016 && (fault_type & VM_PROT_WRITE) == 0)
2017 prot &= ~VM_PROT_WRITE;
2018 #endif /* MACH_KDB */
2019 #endif /* STATIC_CONFIG */
2020 if (m->no_isync == TRUE)
2021 pmap_sync_caches_phys(m->phys_addr);
2022
2023 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2024 if(caller_pmap) {
2025 PMAP_ENTER(caller_pmap,
2026 caller_pmap_addr, m,
2027 prot, cache_attr, wired);
2028 } else {
2029 PMAP_ENTER(pmap, vaddr, m,
2030 prot, cache_attr, wired);
2031 }
2032 {
2033 tws_hash_line_t line;
2034 task_t task;
2035
2036 task = current_task();
2037 if((map != NULL) &&
2038 (task->dynamic_working_set != 0) &&
2039 !(object->private)) {
2040 kern_return_t kr;
2041 vm_object_t base_object;
2042 vm_object_offset_t base_offset;
2043 base_object = object;
2044 base_offset = cur_offset;
2045 while(base_object->shadow) {
2046 base_offset +=
2047 base_object->shadow_offset;
2048 base_object =
2049 base_object->shadow;
2050 }
2051 kr = tws_lookup((tws_hash_t)
2052 task->dynamic_working_set,
2053 base_offset, base_object,
2054 &line);
2055 if(kr == KERN_OPERATION_TIMED_OUT){
2056 write_startup_file = 1;
2057 } else if (kr != KERN_SUCCESS) {
2058 kr = tws_insert((tws_hash_t)
2059 task->dynamic_working_set,
2060 base_offset, base_object,
2061 vaddr, pmap_map);
2062 if(kr == KERN_NO_SPACE) {
2063 tws_expand_working_set(
2064 task->dynamic_working_set,
2065 TWS_HASH_LINE_COUNT,
2066 FALSE);
2067 }
2068 if(kr ==
2069 KERN_OPERATION_TIMED_OUT) {
2070 write_startup_file = 1;
2071 }
2072 }
2073 }
2074 }
2075 /*
2076 * Grab the object lock to manipulate
2077 * the page queues. Change wiring
2078 * case is obvious. In soft ref bits
2079 * case activate page only if it fell
2080 * off paging queues, otherwise just
2081 * activate it if it's inactive.
2082 *
2083 * NOTE: original vm_fault code will
2084 * move active page to back of active
2085 * queue. This code doesn't.
2086 */
2087 vm_object_lock(object);
2088 vm_page_lock_queues();
2089
2090 if (m->clustered) {
2091 vm_pagein_cluster_used++;
2092 m->clustered = FALSE;
2093 }
2094 /*
2095 * we did the isync above (if needed)... we're clearing
2096 * the flag here to avoid holding a lock
2097 * while calling pmap functions, however
2098 * we need hold the object lock before
2099 * we can modify the flag
2100 */
2101 m->no_isync = FALSE;
2102 m->reference = TRUE;
2103
2104 if (change_wiring) {
2105 if (wired)
2106 vm_page_wire(m);
2107 else
2108 vm_page_unwire(m);
2109 }
2110 #if VM_FAULT_STATIC_CONFIG
2111 else {
2112 if (!m->active && !m->inactive)
2113 vm_page_activate(m);
2114 }
2115 #else
2116 else if (software_reference_bits) {
2117 if (!m->active && !m->inactive)
2118 vm_page_activate(m);
2119 }
2120 else if (!m->active) {
2121 vm_page_activate(m);
2122 }
2123 #endif
2124 vm_page_unlock_queues();
2125
2126 /*
2127 * That's it, clean up and return.
2128 */
2129 PAGE_WAKEUP_DONE(m);
2130 vm_object_paging_end(object);
2131 vm_object_unlock(object);
2132 vm_map_unlock_read(map);
2133 if(pmap_map != map)
2134 vm_map_unlock(pmap_map);
2135
2136 if(write_startup_file)
2137 tws_send_startup_info(current_task());
2138
2139 if (funnel_set) {
2140 thread_funnel_set( curflock, TRUE);
2141 funnel_set = FALSE;
2142 }
2143 thread_interrupt_level(interruptible_state);
2144
2145 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2146 vaddr,
2147 type_of_fault & 0xff,
2148 KERN_SUCCESS,
2149 type_of_fault >> 8,
2150 0);
2151 return KERN_SUCCESS;
2152 }
2153
2154 /*
2155 * Copy on write fault. If objects match, then
2156 * object->copy must not be NULL (else control
2157 * would be in previous code block), and we
2158 * have a potential push into the copy object
2159 * with which we won't cope here.
2160 */
2161
2162 if (cur_object == object)
2163 break;
2164
2165 /*
2166 * This is now a shadow based copy on write
2167 * fault -- it requires a copy up the shadow
2168 * chain.
2169 *
2170 * Allocate a page in the original top level
2171 * object. Give up if allocate fails. Also
2172 * need to remember current page, as it's the
2173 * source of the copy.
2174 */
2175 cur_m = m;
2176 m = vm_page_grab();
2177 if (m == VM_PAGE_NULL) {
2178 break;
2179 }
2180
2181 /*
2182 * Now do the copy. Mark the source busy
2183 * and take out paging references on both
2184 * objects.
2185 *
2186 * NOTE: This code holds the map lock across
2187 * the page copy.
2188 */
2189
2190 cur_m->busy = TRUE;
2191 vm_page_copy(cur_m, m);
2192 vm_page_insert(m, object, offset);
2193
2194 vm_object_paging_begin(cur_object);
2195 vm_object_paging_begin(object);
2196
2197 type_of_fault = DBG_COW_FAULT;
2198 VM_STAT(cow_faults++);
2199 current_task()->cow_faults++;
2200
2201 /*
2202 * Now cope with the source page and object
2203 * If the top object has a ref count of 1
2204 * then no other map can access it, and hence
2205 * it's not necessary to do the pmap_page_protect.
2206 */
2207
2208
2209 vm_page_lock_queues();
2210 vm_page_deactivate(cur_m);
2211 m->dirty = TRUE;
2212 pmap_page_protect(cur_m->phys_addr,
2213 VM_PROT_NONE);
2214 vm_page_unlock_queues();
2215
2216 PAGE_WAKEUP_DONE(cur_m);
2217 vm_object_paging_end(cur_object);
2218 vm_object_unlock(cur_object);
2219
2220 /*
2221 * Slight hack to call vm_object collapse
2222 * and then reuse common map in code.
2223 * note that the object lock was taken above.
2224 */
2225
2226 vm_object_paging_end(object);
2227 vm_object_collapse(object);
2228 vm_object_paging_begin(object);
2229 vm_object_unlock(object);
2230
2231 goto FastPmapEnter;
2232 }
2233 else {
2234
2235 /*
2236 * No page at cur_object, cur_offset
2237 */
2238
2239 if (cur_object->pager_created) {
2240
2241 /*
2242 * Have to talk to the pager. Give up.
2243 */
2244
2245 break;
2246 }
2247
2248
2249 if (cur_object->shadow == VM_OBJECT_NULL) {
2250
2251 if (cur_object->shadow_severed) {
2252 vm_object_paging_end(object);
2253 vm_object_unlock(object);
2254 vm_map_unlock_read(map);
2255 if(pmap_map != map)
2256 vm_map_unlock(pmap_map);
2257
2258 if(write_startup_file)
2259 tws_send_startup_info(
2260 current_task());
2261
2262 if (funnel_set) {
2263 thread_funnel_set( curflock, TRUE);
2264 funnel_set = FALSE;
2265 }
2266 thread_interrupt_level(interruptible_state);
2267
2268 return VM_FAULT_MEMORY_ERROR;
2269 }
2270
2271 /*
2272 * Zero fill fault. Page gets
2273 * filled in top object. Insert
2274 * page, then drop any lower lock.
2275 * Give up if no page.
2276 */
2277 if ((vm_page_free_target -
2278 ((vm_page_free_target-vm_page_free_min)>>2))
2279 > vm_page_free_count) {
2280 break;
2281 }
2282 m = vm_page_alloc(object, offset);
2283 if (m == VM_PAGE_NULL) {
2284 break;
2285 }
2286 /*
2287 * This is a zero-fill or initial fill
2288 * page fault. As such, we consider it
2289 * undefined with respect to instruction
2290 * execution. i.e. it is the responsibility
2291 * of higher layers to call for an instruction
2292 * sync after changing the contents and before
2293 * sending a program into this area. We
2294 * choose this approach for performance
2295 */
2296
2297 m->no_isync = FALSE;
2298
2299 if (cur_object != object)
2300 vm_object_unlock(cur_object);
2301
2302 vm_object_paging_begin(object);
2303 vm_object_unlock(object);
2304
2305 /*
2306 * Now zero fill page and map it.
2307 * the page is probably going to
2308 * be written soon, so don't bother
2309 * to clear the modified bit
2310 *
2311 * NOTE: This code holds the map
2312 * lock across the zero fill.
2313 */
2314
2315 if (!map->no_zero_fill) {
2316 vm_page_zero_fill(m);
2317 type_of_fault = DBG_ZERO_FILL_FAULT;
2318 VM_STAT(zero_fill_count++);
2319 }
2320 vm_page_lock_queues();
2321 VM_PAGE_QUEUES_REMOVE(m);
2322
2323 m->page_ticket = vm_page_ticket;
2324 if(m->object->size > 0x80000) {
2325 m->zero_fill = TRUE;
2326 /* depends on the queues lock */
2327 vm_zf_count += 1;
2328 queue_enter(&vm_page_queue_zf,
2329 m, vm_page_t, pageq);
2330 } else {
2331 queue_enter(
2332 &vm_page_queue_inactive,
2333 m, vm_page_t, pageq);
2334 }
2335 vm_page_ticket_roll++;
2336 if(vm_page_ticket_roll ==
2337 VM_PAGE_TICKETS_IN_ROLL) {
2338 vm_page_ticket_roll = 0;
2339 if(vm_page_ticket ==
2340 VM_PAGE_TICKET_ROLL_IDS)
2341 vm_page_ticket= 0;
2342 else
2343 vm_page_ticket++;
2344 }
2345
2346 m->inactive = TRUE;
2347 vm_page_inactive_count++;
2348 vm_page_unlock_queues();
2349 goto FastPmapEnter;
2350 }
2351
2352 /*
2353 * On to the next level
2354 */
2355
2356 cur_offset += cur_object->shadow_offset;
2357 new_object = cur_object->shadow;
2358 vm_object_lock(new_object);
2359 if (cur_object != object)
2360 vm_object_unlock(cur_object);
2361 cur_object = new_object;
2362
2363 continue;
2364 }
2365 }
2366
2367 /*
2368 * Cleanup from fast fault failure. Drop any object
2369 * lock other than original and drop map lock.
2370 */
2371
2372 if (object != cur_object)
2373 vm_object_unlock(cur_object);
2374 }
2375 vm_map_unlock_read(map);
2376 if(pmap_map != map)
2377 vm_map_unlock(pmap_map);
2378
2379 /*
2380 * Make a reference to this object to
2381 * prevent its disposal while we are messing with
2382 * it. Once we have the reference, the map is free
2383 * to be diddled. Since objects reference their
2384 * shadows (and copies), they will stay around as well.
2385 */
2386
2387 assert(object->ref_count > 0);
2388 object->ref_count++;
2389 vm_object_res_reference(object);
2390 vm_object_paging_begin(object);
2391
2392 XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2393 {
2394 tws_hash_line_t line;
2395 task_t task;
2396 kern_return_t kr;
2397
2398 task = current_task();
2399 if((map != NULL) &&
2400 (task->dynamic_working_set != 0)
2401 && !(object->private)) {
2402 vm_object_t base_object;
2403 vm_object_offset_t base_offset;
2404 base_object = object;
2405 base_offset = offset;
2406 while(base_object->shadow) {
2407 base_offset +=
2408 base_object->shadow_offset;
2409 base_object =
2410 base_object->shadow;
2411 }
2412 kr = tws_lookup((tws_hash_t)
2413 task->dynamic_working_set,
2414 base_offset, base_object,
2415 &line);
2416 if(kr == KERN_OPERATION_TIMED_OUT){
2417 write_startup_file = 1;
2418 } else if (kr != KERN_SUCCESS) {
2419 tws_insert((tws_hash_t)
2420 task->dynamic_working_set,
2421 base_offset, base_object,
2422 vaddr, pmap_map);
2423 kr = tws_insert((tws_hash_t)
2424 task->dynamic_working_set,
2425 base_offset, base_object,
2426 vaddr, pmap_map);
2427 if(kr == KERN_NO_SPACE) {
2428 vm_object_unlock(object);
2429 tws_expand_working_set(
2430 task->dynamic_working_set,
2431 TWS_HASH_LINE_COUNT,
2432 FALSE);
2433 vm_object_lock(object);
2434 }
2435 if(kr == KERN_OPERATION_TIMED_OUT) {
2436 write_startup_file = 1;
2437 }
2438 }
2439 }
2440 }
2441 kr = vm_fault_page(object, offset, fault_type,
2442 (change_wiring && !wired),
2443 interruptible,
2444 lo_offset, hi_offset, behavior,
2445 &prot, &result_page, &top_page,
2446 &type_of_fault,
2447 &error_code, map->no_zero_fill, FALSE, map, vaddr);
2448
2449 /*
2450 * If we didn't succeed, lose the object reference immediately.
2451 */
2452
2453 if (kr != VM_FAULT_SUCCESS)
2454 vm_object_deallocate(object);
2455
2456 /*
2457 * See why we failed, and take corrective action.
2458 */
2459
2460 switch (kr) {
2461 case VM_FAULT_SUCCESS:
2462 break;
2463 case VM_FAULT_MEMORY_SHORTAGE:
2464 if (vm_page_wait((change_wiring) ?
2465 THREAD_UNINT :
2466 THREAD_ABORTSAFE))
2467 goto RetryFault;
2468 /* fall thru */
2469 case VM_FAULT_INTERRUPTED:
2470 kr = KERN_ABORTED;
2471 goto done;
2472 case VM_FAULT_RETRY:
2473 goto RetryFault;
2474 case VM_FAULT_FICTITIOUS_SHORTAGE:
2475 vm_page_more_fictitious();
2476 goto RetryFault;
2477 case VM_FAULT_MEMORY_ERROR:
2478 if (error_code)
2479 kr = error_code;
2480 else
2481 kr = KERN_MEMORY_ERROR;
2482 goto done;
2483 }
2484
2485 m = result_page;
2486
2487 if(m != VM_PAGE_NULL) {
2488 assert((change_wiring && !wired) ?
2489 (top_page == VM_PAGE_NULL) :
2490 ((top_page == VM_PAGE_NULL) == (m->object == object)));
2491 }
2492
2493 /*
2494 * How to clean up the result of vm_fault_page. This
2495 * happens whether the mapping is entered or not.
2496 */
2497
2498 #define UNLOCK_AND_DEALLOCATE \
2499 MACRO_BEGIN \
2500 vm_fault_cleanup(m->object, top_page); \
2501 vm_object_deallocate(object); \
2502 MACRO_END
2503
2504 /*
2505 * What to do with the resulting page from vm_fault_page
2506 * if it doesn't get entered into the physical map:
2507 */
2508
2509 #define RELEASE_PAGE(m) \
2510 MACRO_BEGIN \
2511 PAGE_WAKEUP_DONE(m); \
2512 vm_page_lock_queues(); \
2513 if (!m->active && !m->inactive) \
2514 vm_page_activate(m); \
2515 vm_page_unlock_queues(); \
2516 MACRO_END
2517
2518 /*
2519 * We must verify that the maps have not changed
2520 * since our last lookup.
2521 */
2522
2523 if(m != VM_PAGE_NULL) {
2524 old_copy_object = m->object->copy;
2525
2526 vm_object_unlock(m->object);
2527 } else {
2528 old_copy_object = VM_OBJECT_NULL;
2529 }
2530 if ((map != original_map) || !vm_map_verify(map, &version)) {
2531 vm_object_t retry_object;
2532 vm_object_offset_t retry_offset;
2533 vm_prot_t retry_prot;
2534
2535 /*
2536 * To avoid trying to write_lock the map while another
2537 * thread has it read_locked (in vm_map_pageable), we
2538 * do not try for write permission. If the page is
2539 * still writable, we will get write permission. If it
2540 * is not, or has been marked needs_copy, we enter the
2541 * mapping without write permission, and will merely
2542 * take another fault.
2543 */
2544 map = original_map;
2545 vm_map_lock_read(map);
2546 kr = vm_map_lookup_locked(&map, vaddr,
2547 fault_type & ~VM_PROT_WRITE, &version,
2548 &retry_object, &retry_offset, &retry_prot,
2549 &wired, &behavior, &lo_offset, &hi_offset,
2550 &pmap_map);
2551 pmap = pmap_map->pmap;
2552
2553 if (kr != KERN_SUCCESS) {
2554 vm_map_unlock_read(map);
2555 if(m != VM_PAGE_NULL) {
2556 vm_object_lock(m->object);
2557 RELEASE_PAGE(m);
2558 UNLOCK_AND_DEALLOCATE;
2559 } else {
2560 vm_object_deallocate(object);
2561 }
2562 goto done;
2563 }
2564
2565 vm_object_unlock(retry_object);
2566 if(m != VM_PAGE_NULL) {
2567 vm_object_lock(m->object);
2568 } else {
2569 vm_object_lock(object);
2570 }
2571
2572 if ((retry_object != object) ||
2573 (retry_offset != offset)) {
2574 vm_map_unlock_read(map);
2575 if(pmap_map != map)
2576 vm_map_unlock(pmap_map);
2577 if(m != VM_PAGE_NULL) {
2578 RELEASE_PAGE(m);
2579 UNLOCK_AND_DEALLOCATE;
2580 } else {
2581 vm_object_deallocate(object);
2582 }
2583 goto RetryFault;
2584 }
2585
2586 /*
2587 * Check whether the protection has changed or the object
2588 * has been copied while we left the map unlocked.
2589 */
2590 prot &= retry_prot;
2591 if(m != VM_PAGE_NULL) {
2592 vm_object_unlock(m->object);
2593 } else {
2594 vm_object_unlock(object);
2595 }
2596 }
2597 if(m != VM_PAGE_NULL) {
2598 vm_object_lock(m->object);
2599 } else {
2600 vm_object_lock(object);
2601 }
2602
2603 /*
2604 * If the copy object changed while the top-level object
2605 * was unlocked, then we must take away write permission.
2606 */
2607
2608 if(m != VM_PAGE_NULL) {
2609 if (m->object->copy != old_copy_object)
2610 prot &= ~VM_PROT_WRITE;
2611 }
2612
2613 /*
2614 * If we want to wire down this page, but no longer have
2615 * adequate permissions, we must start all over.
2616 */
2617
2618 if (wired && (fault_type != (prot|VM_PROT_WRITE))) {
2619 vm_map_verify_done(map, &version);
2620 if(pmap_map != map)
2621 vm_map_unlock(pmap_map);
2622 if(m != VM_PAGE_NULL) {
2623 RELEASE_PAGE(m);
2624 UNLOCK_AND_DEALLOCATE;
2625 } else {
2626 vm_object_deallocate(object);
2627 }
2628 goto RetryFault;
2629 }
2630
2631 /*
2632 * Put this page into the physical map.
2633 * We had to do the unlock above because pmap_enter
2634 * may cause other faults. The page may be on
2635 * the pageout queues. If the pageout daemon comes
2636 * across the page, it will remove it from the queues.
2637 */
2638 if (m != VM_PAGE_NULL) {
2639 if (m->no_isync == TRUE) {
2640 pmap_sync_caches_phys(m->phys_addr);
2641
2642 m->no_isync = FALSE;
2643 }
2644
2645 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2646 vm_object_unlock(m->object);
2647
2648 if(caller_pmap) {
2649 PMAP_ENTER(caller_pmap,
2650 caller_pmap_addr, m,
2651 prot, cache_attr, wired);
2652 } else {
2653 PMAP_ENTER(pmap, vaddr, m,
2654 prot, cache_attr, wired);
2655 }
2656 {
2657 tws_hash_line_t line;
2658 task_t task;
2659 kern_return_t kr;
2660
2661 task = current_task();
2662 if((map != NULL) &&
2663 (task->dynamic_working_set != 0)
2664 && (object->private)) {
2665 vm_object_t base_object;
2666 vm_object_offset_t base_offset;
2667 base_object = m->object;
2668 base_offset = m->offset;
2669 while(base_object->shadow) {
2670 base_offset +=
2671 base_object->shadow_offset;
2672 base_object =
2673 base_object->shadow;
2674 }
2675 kr = tws_lookup((tws_hash_t)
2676 task->dynamic_working_set,
2677 base_offset, base_object, &line);
2678 if(kr == KERN_OPERATION_TIMED_OUT){
2679 write_startup_file = 1;
2680 } else if (kr != KERN_SUCCESS) {
2681 tws_insert((tws_hash_t)
2682 task->dynamic_working_set,
2683 base_offset, base_object,
2684 vaddr, pmap_map);
2685 kr = tws_insert((tws_hash_t)
2686 task->dynamic_working_set,
2687 base_offset, base_object,
2688 vaddr, pmap_map);
2689 if(kr == KERN_NO_SPACE) {
2690 tws_expand_working_set(
2691 task->dynamic_working_set,
2692 TWS_HASH_LINE_COUNT,
2693 FALSE);
2694 }
2695 if(kr == KERN_OPERATION_TIMED_OUT) {
2696 write_startup_file = 1;
2697 }
2698 }
2699 }
2700 }
2701 } else {
2702
2703 #ifndef i386
2704 int memattr;
2705 struct phys_entry *pp;
2706 vm_map_entry_t entry;
2707 vm_offset_t laddr;
2708 vm_offset_t ldelta, hdelta;
2709 /*
2710 * do a pmap block mapping from the physical address
2711 * in the object
2712 */
2713 if(pp = pmap_find_physentry(
2714 (vm_offset_t)object->shadow_offset)) {
2715 memattr = ((pp->pte1 & 0x00000078) >> 3);
2716 } else {
2717 memattr = VM_WIMG_MASK & (int)object->wimg_bits;
2718 }
2719
2720
2721 /* While we do not worry about execution protection in */
2722 /* general, we may be able to read device memory and */
2723 /* still not be able to execute it. Here we check for */
2724 /* the guarded bit. If its set and we are attempting */
2725 /* to execute, we return with a protection failure. */
2726
2727 if((memattr & VM_MEM_GUARDED) &&
2728 (full_fault_type & VM_PROT_EXECUTE)) {
2729 vm_map_verify_done(map, &version);
2730 if(pmap_map != map)
2731 vm_map_unlock(pmap_map);
2732 vm_fault_cleanup(object, top_page);
2733 vm_object_deallocate(object);
2734 kr = KERN_PROTECTION_FAILURE;
2735 goto done;
2736 }
2737
2738
2739
2740 if(pmap_map != map) {
2741 vm_map_unlock(pmap_map);
2742 }
2743 if (original_map != map) {
2744 vm_map_unlock_read(map);
2745 vm_map_lock_read(original_map);
2746 map = original_map;
2747 }
2748 pmap_map = map;
2749
2750 laddr = vaddr;
2751 hdelta = 0xFFFFF000;
2752 ldelta = 0xFFFFF000;
2753
2754
2755 while(vm_map_lookup_entry(map, laddr, &entry)) {
2756 if(ldelta > (laddr - entry->vme_start))
2757 ldelta = laddr - entry->vme_start;
2758 if(hdelta > (entry->vme_end - laddr))
2759 hdelta = entry->vme_end - laddr;
2760 if(entry->is_sub_map) {
2761
2762 laddr = (laddr - entry->vme_start)
2763 + entry->offset;
2764 vm_map_lock_read(entry->object.sub_map);
2765 if(map != pmap_map)
2766 vm_map_unlock_read(map);
2767 if(entry->use_pmap) {
2768 vm_map_unlock_read(pmap_map);
2769 pmap_map = entry->object.sub_map;
2770 }
2771 map = entry->object.sub_map;
2772
2773 } else {
2774 break;
2775 }
2776 }
2777
2778 if(vm_map_lookup_entry(map, laddr, &entry) &&
2779 (entry->object.vm_object != NULL) &&
2780 (entry->object.vm_object == object)) {
2781
2782
2783 if(caller_pmap) {
2784 pmap_map_block(caller_pmap,
2785 caller_pmap_addr - ldelta,
2786 ((vm_offset_t)
2787 (entry->object.vm_object->shadow_offset))
2788 + entry->offset +
2789 (laddr - entry->vme_start) - ldelta,
2790 ldelta + hdelta, prot,
2791 memattr, 0); /* Set up a block mapped area */
2792 } else {
2793 pmap_map_block(pmap_map->pmap, vaddr - ldelta,
2794 ((vm_offset_t)
2795 (entry->object.vm_object->shadow_offset))
2796 + entry->offset +
2797 (laddr - entry->vme_start) - ldelta,
2798 ldelta + hdelta, prot,
2799 memattr, 0); /* Set up a block mapped area */
2800 }
2801 }
2802 #else
2803 #ifdef notyet
2804 if(caller_pmap) {
2805 pmap_enter(caller_pmap, caller_pmap_addr,
2806 object->shadow_offset, prot, 0, TRUE);
2807 } else {
2808 pmap_enter(pmap, vaddr,
2809 object->shadow_offset, prot, 0, TRUE);
2810 }
2811 /* Map it in */
2812 #endif
2813 #endif
2814
2815 }
2816
2817 /*
2818 * If the page is not wired down and isn't already
2819 * on a pageout queue, then put it where the
2820 * pageout daemon can find it.
2821 */
2822 if(m != VM_PAGE_NULL) {
2823 vm_object_lock(m->object);
2824 vm_page_lock_queues();
2825
2826 if (change_wiring) {
2827 if (wired)
2828 vm_page_wire(m);
2829 else
2830 vm_page_unwire(m);
2831 }
2832 #if VM_FAULT_STATIC_CONFIG
2833 else {
2834 if (!m->active && !m->inactive)
2835 vm_page_activate(m);
2836 m->reference = TRUE;
2837 }
2838 #else
2839 else if (software_reference_bits) {
2840 if (!m->active && !m->inactive)
2841 vm_page_activate(m);
2842 m->reference = TRUE;
2843 } else {
2844 vm_page_activate(m);
2845 }
2846 #endif
2847 vm_page_unlock_queues();
2848 }
2849
2850 /*
2851 * Unlock everything, and return
2852 */
2853
2854 vm_map_verify_done(map, &version);
2855 if(pmap_map != map)
2856 vm_map_unlock(pmap_map);
2857 if(m != VM_PAGE_NULL) {
2858 PAGE_WAKEUP_DONE(m);
2859 UNLOCK_AND_DEALLOCATE;
2860 } else {
2861 vm_fault_cleanup(object, top_page);
2862 vm_object_deallocate(object);
2863 }
2864 kr = KERN_SUCCESS;
2865
2866 #undef UNLOCK_AND_DEALLOCATE
2867 #undef RELEASE_PAGE
2868
2869 done:
2870 if(write_startup_file)
2871 tws_send_startup_info(current_task());
2872 if (funnel_set) {
2873 thread_funnel_set( curflock, TRUE);
2874 funnel_set = FALSE;
2875 }
2876 thread_interrupt_level(interruptible_state);
2877
2878 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2879 vaddr,
2880 type_of_fault & 0xff,
2881 kr,
2882 type_of_fault >> 8,
2883 0);
2884 return(kr);
2885 }
2886
2887 /*
2888 * vm_fault_wire:
2889 *
2890 * Wire down a range of virtual addresses in a map.
2891 */
2892 kern_return_t
2893 vm_fault_wire(
2894 vm_map_t map,
2895 vm_map_entry_t entry,
2896 pmap_t pmap,
2897 vm_offset_t pmap_addr)
2898 {
2899
2900 register vm_offset_t va;
2901 register vm_offset_t end_addr = entry->vme_end;
2902 register kern_return_t rc;
2903
2904 assert(entry->in_transition);
2905
2906 if ((entry->object.vm_object != NULL) &&
2907 !entry->is_sub_map &&
2908 entry->object.vm_object->phys_contiguous) {
2909 return KERN_SUCCESS;
2910 }
2911
2912 /*
2913 * Inform the physical mapping system that the
2914 * range of addresses may not fault, so that
2915 * page tables and such can be locked down as well.
2916 */
2917
2918 pmap_pageable(pmap, pmap_addr,
2919 pmap_addr + (end_addr - entry->vme_start), FALSE);
2920
2921 /*
2922 * We simulate a fault to get the page and enter it
2923 * in the physical map.
2924 */
2925
2926 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
2927 if ((rc = vm_fault_wire_fast(
2928 map, va, entry, pmap,
2929 pmap_addr + (va - entry->vme_start)
2930 )) != KERN_SUCCESS) {
2931 rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
2932 (pmap == kernel_pmap) ?
2933 THREAD_UNINT : THREAD_ABORTSAFE,
2934 pmap, pmap_addr + (va - entry->vme_start));
2935 }
2936
2937 if (rc != KERN_SUCCESS) {
2938 struct vm_map_entry tmp_entry = *entry;
2939
2940 /* unwire wired pages */
2941 tmp_entry.vme_end = va;
2942 vm_fault_unwire(map,
2943 &tmp_entry, FALSE, pmap, pmap_addr);
2944
2945 return rc;
2946 }
2947 }
2948 return KERN_SUCCESS;
2949 }
2950
2951 /*
2952 * vm_fault_unwire:
2953 *
2954 * Unwire a range of virtual addresses in a map.
2955 */
2956 void
2957 vm_fault_unwire(
2958 vm_map_t map,
2959 vm_map_entry_t entry,
2960 boolean_t deallocate,
2961 pmap_t pmap,
2962 vm_offset_t pmap_addr)
2963 {
2964 register vm_offset_t va;
2965 register vm_offset_t end_addr = entry->vme_end;
2966 vm_object_t object;
2967
2968 object = (entry->is_sub_map)
2969 ? VM_OBJECT_NULL : entry->object.vm_object;
2970
2971 /*
2972 * Since the pages are wired down, we must be able to
2973 * get their mappings from the physical map system.
2974 */
2975
2976 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
2977 pmap_change_wiring(pmap,
2978 pmap_addr + (va - entry->vme_start), FALSE);
2979
2980 if (object == VM_OBJECT_NULL) {
2981 (void) vm_fault(map, va, VM_PROT_NONE,
2982 TRUE, THREAD_UNINT, pmap, pmap_addr);
2983 } else if (object->phys_contiguous) {
2984 continue;
2985 } else {
2986 vm_prot_t prot;
2987 vm_page_t result_page;
2988 vm_page_t top_page;
2989 vm_object_t result_object;
2990 vm_fault_return_t result;
2991
2992 do {
2993 prot = VM_PROT_NONE;
2994
2995 vm_object_lock(object);
2996 vm_object_paging_begin(object);
2997 XPR(XPR_VM_FAULT,
2998 "vm_fault_unwire -> vm_fault_page\n",
2999 0,0,0,0,0);
3000 result = vm_fault_page(object,
3001 entry->offset +
3002 (va - entry->vme_start),
3003 VM_PROT_NONE, TRUE,
3004 THREAD_UNINT,
3005 entry->offset,
3006 entry->offset +
3007 (entry->vme_end
3008 - entry->vme_start),
3009 entry->behavior,
3010 &prot,
3011 &result_page,
3012 &top_page,
3013 (int *)0,
3014 0, map->no_zero_fill,
3015 FALSE, NULL, 0);
3016 } while (result == VM_FAULT_RETRY);
3017
3018 if (result != VM_FAULT_SUCCESS)
3019 panic("vm_fault_unwire: failure");
3020
3021 result_object = result_page->object;
3022 if (deallocate) {
3023 assert(!result_page->fictitious);
3024 pmap_page_protect(result_page->phys_addr,
3025 VM_PROT_NONE);
3026 VM_PAGE_FREE(result_page);
3027 } else {
3028 vm_page_lock_queues();
3029 vm_page_unwire(result_page);
3030 vm_page_unlock_queues();
3031 PAGE_WAKEUP_DONE(result_page);
3032 }
3033
3034 vm_fault_cleanup(result_object, top_page);
3035 }
3036 }
3037
3038 /*
3039 * Inform the physical mapping system that the range
3040 * of addresses may fault, so that page tables and
3041 * such may be unwired themselves.
3042 */
3043
3044 pmap_pageable(pmap, pmap_addr,
3045 pmap_addr + (end_addr - entry->vme_start), TRUE);
3046
3047 }
3048
3049 /*
3050 * vm_fault_wire_fast:
3051 *
3052 * Handle common case of a wire down page fault at the given address.
3053 * If successful, the page is inserted into the associated physical map.
3054 * The map entry is passed in to avoid the overhead of a map lookup.
3055 *
3056 * NOTE: the given address should be truncated to the
3057 * proper page address.
3058 *
3059 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3060 * a standard error specifying why the fault is fatal is returned.
3061 *
3062 * The map in question must be referenced, and remains so.
3063 * Caller has a read lock on the map.
3064 *
3065 * This is a stripped version of vm_fault() for wiring pages. Anything
3066 * other than the common case will return KERN_FAILURE, and the caller
3067 * is expected to call vm_fault().
3068 */
3069 kern_return_t
3070 vm_fault_wire_fast(
3071 vm_map_t map,
3072 vm_offset_t va,
3073 vm_map_entry_t entry,
3074 pmap_t pmap,
3075 vm_offset_t pmap_addr)
3076 {
3077 vm_object_t object;
3078 vm_object_offset_t offset;
3079 register vm_page_t m;
3080 vm_prot_t prot;
3081 thread_act_t thr_act;
3082 unsigned int cache_attr;
3083
3084 VM_STAT(faults++);
3085
3086 if((thr_act=current_act()) && (thr_act->task != TASK_NULL))
3087 thr_act->task->faults++;
3088
3089 /*
3090 * Recovery actions
3091 */
3092
3093 #undef RELEASE_PAGE
3094 #define RELEASE_PAGE(m) { \
3095 PAGE_WAKEUP_DONE(m); \
3096 vm_page_lock_queues(); \
3097 vm_page_unwire(m); \
3098 vm_page_unlock_queues(); \
3099 }
3100
3101
3102 #undef UNLOCK_THINGS
3103 #define UNLOCK_THINGS { \
3104 object->paging_in_progress--; \
3105 vm_object_unlock(object); \
3106 }
3107
3108 #undef UNLOCK_AND_DEALLOCATE
3109 #define UNLOCK_AND_DEALLOCATE { \
3110 UNLOCK_THINGS; \
3111 vm_object_deallocate(object); \
3112 }
3113 /*
3114 * Give up and have caller do things the hard way.
3115 */
3116
3117 #define GIVE_UP { \
3118 UNLOCK_AND_DEALLOCATE; \
3119 return(KERN_FAILURE); \
3120 }
3121
3122
3123 /*
3124 * If this entry is not directly to a vm_object, bail out.
3125 */
3126 if (entry->is_sub_map)
3127 return(KERN_FAILURE);
3128
3129 /*
3130 * Find the backing store object and offset into it.
3131 */
3132
3133 object = entry->object.vm_object;
3134 offset = (va - entry->vme_start) + entry->offset;
3135 prot = entry->protection;
3136
3137 /*
3138 * Make a reference to this object to prevent its
3139 * disposal while we are messing with it.
3140 */
3141
3142 vm_object_lock(object);
3143 assert(object->ref_count > 0);
3144 object->ref_count++;
3145 vm_object_res_reference(object);
3146 object->paging_in_progress++;
3147
3148 /*
3149 * INVARIANTS (through entire routine):
3150 *
3151 * 1) At all times, we must either have the object
3152 * lock or a busy page in some object to prevent
3153 * some other thread from trying to bring in
3154 * the same page.
3155 *
3156 * 2) Once we have a busy page, we must remove it from
3157 * the pageout queues, so that the pageout daemon
3158 * will not grab it away.
3159 *
3160 */
3161
3162 /*
3163 * Look for page in top-level object. If it's not there or
3164 * there's something going on, give up.
3165 */
3166 m = vm_page_lookup(object, offset);
3167 if ((m == VM_PAGE_NULL) || (m->busy) ||
3168 (m->unusual && ( m->error || m->restart || m->absent ||
3169 prot & m->page_lock))) {
3170
3171 GIVE_UP;
3172 }
3173
3174 /*
3175 * Wire the page down now. All bail outs beyond this
3176 * point must unwire the page.
3177 */
3178
3179 vm_page_lock_queues();
3180 vm_page_wire(m);
3181 vm_page_unlock_queues();
3182
3183 /*
3184 * Mark page busy for other threads.
3185 */
3186 assert(!m->busy);
3187 m->busy = TRUE;
3188 assert(!m->absent);
3189
3190 /*
3191 * Give up if the page is being written and there's a copy object
3192 */
3193 if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
3194 RELEASE_PAGE(m);
3195 GIVE_UP;
3196 }
3197
3198 /*
3199 * Put this page into the physical map.
3200 * We have to unlock the object because pmap_enter
3201 * may cause other faults.
3202 */
3203 if (m->no_isync == TRUE) {
3204 pmap_sync_caches_phys(m->phys_addr);
3205
3206 m->no_isync = FALSE;
3207 }
3208
3209 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3210 vm_object_unlock(object);
3211
3212 PMAP_ENTER(pmap, pmap_addr, m, prot, cache_attr, TRUE);
3213
3214 /*
3215 * Must relock object so that paging_in_progress can be cleared.
3216 */
3217 vm_object_lock(object);
3218
3219 /*
3220 * Unlock everything, and return
3221 */
3222
3223 PAGE_WAKEUP_DONE(m);
3224 UNLOCK_AND_DEALLOCATE;
3225
3226 return(KERN_SUCCESS);
3227
3228 }
3229
3230 /*
3231 * Routine: vm_fault_copy_cleanup
3232 * Purpose:
3233 * Release a page used by vm_fault_copy.
3234 */
3235
3236 void
3237 vm_fault_copy_cleanup(
3238 vm_page_t page,
3239 vm_page_t top_page)
3240 {
3241 vm_object_t object = page->object;
3242
3243 vm_object_lock(object);
3244 PAGE_WAKEUP_DONE(page);
3245 vm_page_lock_queues();
3246 if (!page->active && !page->inactive)
3247 vm_page_activate(page);
3248 vm_page_unlock_queues();
3249 vm_fault_cleanup(object, top_page);
3250 }
3251
3252 void
3253 vm_fault_copy_dst_cleanup(
3254 vm_page_t page)
3255 {
3256 vm_object_t object;
3257
3258 if (page != VM_PAGE_NULL) {
3259 object = page->object;
3260 vm_object_lock(object);
3261 vm_page_lock_queues();
3262 vm_page_unwire(page);
3263 vm_page_unlock_queues();
3264 vm_object_paging_end(object);
3265 vm_object_unlock(object);
3266 }
3267 }
3268
3269 /*
3270 * Routine: vm_fault_copy
3271 *
3272 * Purpose:
3273 * Copy pages from one virtual memory object to another --
3274 * neither the source nor destination pages need be resident.
3275 *
3276 * Before actually copying a page, the version associated with
3277 * the destination address map wil be verified.
3278 *
3279 * In/out conditions:
3280 * The caller must hold a reference, but not a lock, to
3281 * each of the source and destination objects and to the
3282 * destination map.
3283 *
3284 * Results:
3285 * Returns KERN_SUCCESS if no errors were encountered in
3286 * reading or writing the data. Returns KERN_INTERRUPTED if
3287 * the operation was interrupted (only possible if the
3288 * "interruptible" argument is asserted). Other return values
3289 * indicate a permanent error in copying the data.
3290 *
3291 * The actual amount of data copied will be returned in the
3292 * "copy_size" argument. In the event that the destination map
3293 * verification failed, this amount may be less than the amount
3294 * requested.
3295 */
3296 kern_return_t
3297 vm_fault_copy(
3298 vm_object_t src_object,
3299 vm_object_offset_t src_offset,
3300 vm_size_t *src_size, /* INOUT */
3301 vm_object_t dst_object,
3302 vm_object_offset_t dst_offset,
3303 vm_map_t dst_map,
3304 vm_map_version_t *dst_version,
3305 int interruptible)
3306 {
3307 vm_page_t result_page;
3308
3309 vm_page_t src_page;
3310 vm_page_t src_top_page;
3311 vm_prot_t src_prot;
3312
3313 vm_page_t dst_page;
3314 vm_page_t dst_top_page;
3315 vm_prot_t dst_prot;
3316
3317 vm_size_t amount_left;
3318 vm_object_t old_copy_object;
3319 kern_return_t error = 0;
3320
3321 vm_size_t part_size;
3322
3323 /*
3324 * In order not to confuse the clustered pageins, align
3325 * the different offsets on a page boundary.
3326 */
3327 vm_object_offset_t src_lo_offset = trunc_page_64(src_offset);
3328 vm_object_offset_t dst_lo_offset = trunc_page_64(dst_offset);
3329 vm_object_offset_t src_hi_offset = round_page_64(src_offset + *src_size);
3330 vm_object_offset_t dst_hi_offset = round_page_64(dst_offset + *src_size);
3331
3332 #define RETURN(x) \
3333 MACRO_BEGIN \
3334 *src_size -= amount_left; \
3335 MACRO_RETURN(x); \
3336 MACRO_END
3337
3338 amount_left = *src_size;
3339 do { /* while (amount_left > 0) */
3340 /*
3341 * There may be a deadlock if both source and destination
3342 * pages are the same. To avoid this deadlock, the copy must
3343 * start by getting the destination page in order to apply
3344 * COW semantics if any.
3345 */
3346
3347 RetryDestinationFault: ;
3348
3349 dst_prot = VM_PROT_WRITE|VM_PROT_READ;
3350
3351 vm_object_lock(dst_object);
3352 vm_object_paging_begin(dst_object);
3353
3354 XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3355 switch (vm_fault_page(dst_object,
3356 trunc_page_64(dst_offset),
3357 VM_PROT_WRITE|VM_PROT_READ,
3358 FALSE,
3359 interruptible,
3360 dst_lo_offset,
3361 dst_hi_offset,
3362 VM_BEHAVIOR_SEQUENTIAL,
3363 &dst_prot,
3364 &dst_page,
3365 &dst_top_page,
3366 (int *)0,
3367 &error,
3368 dst_map->no_zero_fill,
3369 FALSE, NULL, 0)) {
3370 case VM_FAULT_SUCCESS:
3371 break;
3372 case VM_FAULT_RETRY:
3373 goto RetryDestinationFault;
3374 case VM_FAULT_MEMORY_SHORTAGE:
3375 if (vm_page_wait(interruptible))
3376 goto RetryDestinationFault;
3377 /* fall thru */
3378 case VM_FAULT_INTERRUPTED:
3379 RETURN(MACH_SEND_INTERRUPTED);
3380 case VM_FAULT_FICTITIOUS_SHORTAGE:
3381 vm_page_more_fictitious();
3382 goto RetryDestinationFault;
3383 case VM_FAULT_MEMORY_ERROR:
3384 if (error)
3385 return (error);
3386 else
3387 return(KERN_MEMORY_ERROR);
3388 }
3389 assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
3390
3391 old_copy_object = dst_page->object->copy;
3392
3393 /*
3394 * There exists the possiblity that the source and
3395 * destination page are the same. But we can't
3396 * easily determine that now. If they are the
3397 * same, the call to vm_fault_page() for the
3398 * destination page will deadlock. To prevent this we
3399 * wire the page so we can drop busy without having
3400 * the page daemon steal the page. We clean up the
3401 * top page but keep the paging reference on the object
3402 * holding the dest page so it doesn't go away.
3403 */
3404
3405 vm_page_lock_queues();
3406 vm_page_wire(dst_page);
3407 vm_page_unlock_queues();
3408 PAGE_WAKEUP_DONE(dst_page);
3409 vm_object_unlock(dst_page->object);
3410
3411 if (dst_top_page != VM_PAGE_NULL) {
3412 vm_object_lock(dst_object);
3413 VM_PAGE_FREE(dst_top_page);
3414 vm_object_paging_end(dst_object);
3415 vm_object_unlock(dst_object);
3416 }
3417
3418 RetrySourceFault: ;
3419
3420 if (src_object == VM_OBJECT_NULL) {
3421 /*
3422 * No source object. We will just
3423 * zero-fill the page in dst_object.
3424 */
3425 src_page = VM_PAGE_NULL;
3426 result_page = VM_PAGE_NULL;
3427 } else {
3428 vm_object_lock(src_object);
3429 src_page = vm_page_lookup(src_object,
3430 trunc_page_64(src_offset));
3431 if (src_page == dst_page) {
3432 src_prot = dst_prot;
3433 result_page = VM_PAGE_NULL;
3434 } else {
3435 src_prot = VM_PROT_READ;
3436 vm_object_paging_begin(src_object);
3437
3438 XPR(XPR_VM_FAULT,
3439 "vm_fault_copy(2) -> vm_fault_page\n",
3440 0,0,0,0,0);
3441 switch (vm_fault_page(src_object,
3442 trunc_page_64(src_offset),
3443 VM_PROT_READ,
3444 FALSE,
3445 interruptible,
3446 src_lo_offset,
3447 src_hi_offset,
3448 VM_BEHAVIOR_SEQUENTIAL,
3449 &src_prot,
3450 &result_page,
3451 &src_top_page,
3452 (int *)0,
3453 &error,
3454 FALSE,
3455 FALSE, NULL, 0)) {
3456
3457 case VM_FAULT_SUCCESS:
3458 break;
3459 case VM_FAULT_RETRY:
3460 goto RetrySourceFault;
3461 case VM_FAULT_MEMORY_SHORTAGE:
3462 if (vm_page_wait(interruptible))
3463 goto RetrySourceFault;
3464 /* fall thru */
3465 case VM_FAULT_INTERRUPTED:
3466 vm_fault_copy_dst_cleanup(dst_page);
3467 RETURN(MACH_SEND_INTERRUPTED);
3468 case VM_FAULT_FICTITIOUS_SHORTAGE:
3469 vm_page_more_fictitious();
3470 goto RetrySourceFault;
3471 case VM_FAULT_MEMORY_ERROR:
3472 vm_fault_copy_dst_cleanup(dst_page);
3473 if (error)
3474 return (error);
3475 else
3476 return(KERN_MEMORY_ERROR);
3477 }
3478
3479
3480 assert((src_top_page == VM_PAGE_NULL) ==
3481 (result_page->object == src_object));
3482 }
3483 assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
3484 vm_object_unlock(result_page->object);
3485 }
3486
3487 if (!vm_map_verify(dst_map, dst_version)) {
3488 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3489 vm_fault_copy_cleanup(result_page, src_top_page);
3490 vm_fault_copy_dst_cleanup(dst_page);
3491 break;
3492 }
3493
3494 vm_object_lock(dst_page->object);
3495
3496 if (dst_page->object->copy != old_copy_object) {
3497 vm_object_unlock(dst_page->object);
3498 vm_map_verify_done(dst_map, dst_version);
3499 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3500 vm_fault_copy_cleanup(result_page, src_top_page);
3501 vm_fault_copy_dst_cleanup(dst_page);
3502 break;
3503 }
3504 vm_object_unlock(dst_page->object);
3505
3506 /*
3507 * Copy the page, and note that it is dirty
3508 * immediately.
3509 */
3510
3511 if (!page_aligned(src_offset) ||
3512 !page_aligned(dst_offset) ||
3513 !page_aligned(amount_left)) {
3514
3515 vm_object_offset_t src_po,
3516 dst_po;
3517
3518 src_po = src_offset - trunc_page_64(src_offset);
3519 dst_po = dst_offset - trunc_page_64(dst_offset);
3520
3521 if (dst_po > src_po) {
3522 part_size = PAGE_SIZE - dst_po;
3523 } else {
3524 part_size = PAGE_SIZE - src_po;
3525 }
3526 if (part_size > (amount_left)){
3527 part_size = amount_left;
3528 }
3529
3530 if (result_page == VM_PAGE_NULL) {
3531 vm_page_part_zero_fill(dst_page,
3532 dst_po, part_size);
3533 } else {
3534 vm_page_part_copy(result_page, src_po,
3535 dst_page, dst_po, part_size);
3536 if(!dst_page->dirty){
3537 vm_object_lock(dst_object);
3538 dst_page->dirty = TRUE;
3539 vm_object_unlock(dst_page->object);
3540 }
3541
3542 }
3543 } else {
3544 part_size = PAGE_SIZE;
3545
3546 if (result_page == VM_PAGE_NULL)
3547 vm_page_zero_fill(dst_page);
3548 else{
3549 vm_page_copy(result_page, dst_page);
3550 if(!dst_page->dirty){
3551 vm_object_lock(dst_object);
3552 dst_page->dirty = TRUE;
3553 vm_object_unlock(dst_page->object);
3554 }
3555 }
3556
3557 }
3558
3559 /*
3560 * Unlock everything, and return
3561 */
3562
3563 vm_map_verify_done(dst_map, dst_version);
3564
3565 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3566 vm_fault_copy_cleanup(result_page, src_top_page);
3567 vm_fault_copy_dst_cleanup(dst_page);
3568
3569 amount_left -= part_size;
3570 src_offset += part_size;
3571 dst_offset += part_size;
3572 } while (amount_left > 0);
3573
3574 RETURN(KERN_SUCCESS);
3575 #undef RETURN
3576
3577 /*NOTREACHED*/
3578 }
3579
3580 #ifdef notdef
3581
3582 /*
3583 * Routine: vm_fault_page_overwrite
3584 *
3585 * Description:
3586 * A form of vm_fault_page that assumes that the
3587 * resulting page will be overwritten in its entirety,
3588 * making it unnecessary to obtain the correct *contents*
3589 * of the page.
3590 *
3591 * Implementation:
3592 * XXX Untested. Also unused. Eventually, this technology
3593 * could be used in vm_fault_copy() to advantage.
3594 */
3595 vm_fault_return_t
3596 vm_fault_page_overwrite(
3597 register
3598 vm_object_t dst_object,
3599 vm_object_offset_t dst_offset,
3600 vm_page_t *result_page) /* OUT */
3601 {
3602 register
3603 vm_page_t dst_page;
3604 kern_return_t wait_result;
3605
3606 #define interruptible THREAD_UNINT /* XXX */
3607
3608 while (TRUE) {
3609 /*
3610 * Look for a page at this offset
3611 */
3612
3613 while ((dst_page = vm_page_lookup(dst_object, dst_offset))
3614 == VM_PAGE_NULL) {
3615 /*
3616 * No page, no problem... just allocate one.
3617 */
3618
3619 dst_page = vm_page_alloc(dst_object, dst_offset);
3620 if (dst_page == VM_PAGE_NULL) {
3621 vm_object_unlock(dst_object);
3622 VM_PAGE_WAIT();
3623 vm_object_lock(dst_object);
3624 continue;
3625 }
3626
3627 /*
3628 * Pretend that the memory manager
3629 * write-protected the page.
3630 *
3631 * Note that we will be asking for write
3632 * permission without asking for the data
3633 * first.
3634 */
3635
3636 dst_page->overwriting = TRUE;
3637 dst_page->page_lock = VM_PROT_WRITE;
3638 dst_page->absent = TRUE;
3639 dst_page->unusual = TRUE;
3640 dst_object->absent_count++;
3641
3642 break;
3643
3644 /*
3645 * When we bail out, we might have to throw
3646 * away the page created here.
3647 */
3648
3649 #define DISCARD_PAGE \
3650 MACRO_BEGIN \
3651 vm_object_lock(dst_object); \
3652 dst_page = vm_page_lookup(dst_object, dst_offset); \
3653 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3654 VM_PAGE_FREE(dst_page); \
3655 vm_object_unlock(dst_object); \
3656 MACRO_END
3657 }
3658
3659 /*
3660 * If the page is write-protected...
3661 */
3662
3663 if (dst_page->page_lock & VM_PROT_WRITE) {
3664 /*
3665 * ... and an unlock request hasn't been sent
3666 */
3667
3668 if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
3669 vm_prot_t u;
3670 kern_return_t rc;
3671
3672 /*
3673 * ... then send one now.
3674 */
3675
3676 if (!dst_object->pager_ready) {
3677 wait_result = vm_object_assert_wait(dst_object,
3678 VM_OBJECT_EVENT_PAGER_READY,
3679 interruptible);
3680 vm_object_unlock(dst_object);
3681 if (wait_result == THREAD_WAITING)
3682 wait_result = thread_block(THREAD_CONTINUE_NULL);
3683 if (wait_result != THREAD_AWAKENED) {
3684 DISCARD_PAGE;
3685 return(VM_FAULT_INTERRUPTED);
3686 }
3687 continue;
3688 }
3689
3690 u = dst_page->unlock_request |= VM_PROT_WRITE;
3691 vm_object_unlock(dst_object);
3692
3693 if ((rc = memory_object_data_unlock(
3694 dst_object->pager,
3695 dst_offset + dst_object->paging_offset,
3696 PAGE_SIZE,
3697 u)) != KERN_SUCCESS) {
3698 if (vm_fault_debug)
3699 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3700 DISCARD_PAGE;
3701 return((rc == MACH_SEND_INTERRUPTED) ?
3702 VM_FAULT_INTERRUPTED :
3703 VM_FAULT_MEMORY_ERROR);
3704 }
3705 vm_object_lock(dst_object);
3706 continue;
3707 }
3708
3709 /* ... fall through to wait below */
3710 } else {
3711 /*
3712 * If the page isn't being used for other
3713 * purposes, then we're done.
3714 */
3715 if ( ! (dst_page->busy || dst_page->absent ||
3716 dst_page->error || dst_page->restart) )
3717 break;
3718 }
3719
3720 wait_result = PAGE_ASSERT_WAIT(dst_page, interruptible);
3721 vm_object_unlock(dst_object);
3722 if (wait_result == THREAD_WAITING)
3723 wait_result = thread_block(THREAD_CONTINUE_NULL);
3724 if (wait_result != THREAD_AWAKENED) {
3725 DISCARD_PAGE;
3726 return(VM_FAULT_INTERRUPTED);
3727 }
3728 }
3729
3730 *result_page = dst_page;
3731 return(VM_FAULT_SUCCESS);
3732
3733 #undef interruptible
3734 #undef DISCARD_PAGE
3735 }
3736
3737 #endif /* notdef */
3738
3739 #if VM_FAULT_CLASSIFY
3740 /*
3741 * Temporary statistics gathering support.
3742 */
3743
3744 /*
3745 * Statistics arrays:
3746 */
3747 #define VM_FAULT_TYPES_MAX 5
3748 #define VM_FAULT_LEVEL_MAX 8
3749
3750 int vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
3751
3752 #define VM_FAULT_TYPE_ZERO_FILL 0
3753 #define VM_FAULT_TYPE_MAP_IN 1
3754 #define VM_FAULT_TYPE_PAGER 2
3755 #define VM_FAULT_TYPE_COPY 3
3756 #define VM_FAULT_TYPE_OTHER 4
3757
3758
3759 void
3760 vm_fault_classify(vm_object_t object,
3761 vm_object_offset_t offset,
3762 vm_prot_t fault_type)
3763 {
3764 int type, level = 0;
3765 vm_page_t m;
3766
3767 while (TRUE) {
3768 m = vm_page_lookup(object, offset);
3769 if (m != VM_PAGE_NULL) {
3770 if (m->busy || m->error || m->restart || m->absent ||
3771 fault_type & m->page_lock) {
3772 type = VM_FAULT_TYPE_OTHER;
3773 break;
3774 }
3775 if (((fault_type & VM_PROT_WRITE) == 0) ||
3776 ((level == 0) && object->copy == VM_OBJECT_NULL)) {
3777 type = VM_FAULT_TYPE_MAP_IN;
3778 break;
3779 }
3780 type = VM_FAULT_TYPE_COPY;
3781 break;
3782 }
3783 else {
3784 if (object->pager_created) {
3785 type = VM_FAULT_TYPE_PAGER;
3786 break;
3787 }
3788 if (object->shadow == VM_OBJECT_NULL) {
3789 type = VM_FAULT_TYPE_ZERO_FILL;
3790 break;
3791 }
3792
3793 offset += object->shadow_offset;
3794 object = object->shadow;
3795 level++;
3796 continue;
3797 }
3798 }
3799
3800 if (level > VM_FAULT_LEVEL_MAX)
3801 level = VM_FAULT_LEVEL_MAX;
3802
3803 vm_fault_stats[type][level] += 1;
3804
3805 return;
3806 }
3807
3808 /* cleanup routine to call from debugger */
3809
3810 void
3811 vm_fault_classify_init(void)
3812 {
3813 int type, level;
3814
3815 for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
3816 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
3817 vm_fault_stats[type][level] = 0;
3818 }
3819 }
3820
3821 return;
3822 }
3823 #endif /* VM_FAULT_CLASSIFY */