]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_fault.c
xnu-344.23.tar.gz
[apple/xnu.git] / osfmk / vm / vm_fault.c
CommitLineData
de355530 1
1c79356b
A
2/*
3 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
de355530
A
7 * The contents of this file constitute Original Code as defined in and
8 * are subject to the Apple Public Source License Version 1.1 (the
9 * "License"). You may not use this file except in compliance with the
10 * License. Please obtain a copy of the License at
11 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 12 *
de355530
A
13 * This Original Code and all software distributed under the License are
14 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
de355530
A
17 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
18 * License for the specific language governing rights and limitations
19 * under the License.
1c79356b
A
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23/*
24 * @OSF_COPYRIGHT@
25 */
26/*
27 * Mach Operating System
28 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
29 * All Rights Reserved.
30 *
31 * Permission to use, copy, modify and distribute this software and its
32 * documentation is hereby granted, provided that both the copyright
33 * notice and this permission notice appear in all copies of the
34 * software, derivative works or modified versions, and any portions
35 * thereof, and that both notices appear in supporting documentation.
36 *
37 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
38 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
39 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 *
41 * Carnegie Mellon requests users of this software to return to
42 *
43 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
44 * School of Computer Science
45 * Carnegie Mellon University
46 * Pittsburgh PA 15213-3890
47 *
48 * any improvements or extensions that they make and grant Carnegie Mellon
49 * the rights to redistribute these changes.
50 */
51/*
52 */
53/*
54 * File: vm_fault.c
55 * Author: Avadis Tevanian, Jr., Michael Wayne Young
56 *
57 * Page fault handling module.
58 */
59#ifdef MACH_BSD
60/* remove after component interface available */
61extern int vnode_pager_workaround;
0b4e3aa0 62extern int device_pager_workaround;
1c79356b
A
63#endif
64
65#include <mach_cluster_stats.h>
66#include <mach_pagemap.h>
67#include <mach_kdb.h>
68
69#include <vm/vm_fault.h>
70#include <mach/kern_return.h>
71#include <mach/message.h> /* for error codes */
72#include <kern/host_statistics.h>
73#include <kern/counters.h>
74#include <kern/task.h>
75#include <kern/thread.h>
76#include <kern/sched_prim.h>
77#include <kern/host.h>
78#include <kern/xpr.h>
0b4e3aa0 79#include <ppc/proc_reg.h>
de355530 80#include <ppc/pmap_internals.h>
0b4e3aa0 81#include <vm/task_working_set.h>
1c79356b
A
82#include <vm/vm_map.h>
83#include <vm/vm_object.h>
84#include <vm/vm_page.h>
85#include <vm/pmap.h>
86#include <vm/vm_pageout.h>
87#include <mach/vm_param.h>
88#include <mach/vm_behavior.h>
89#include <mach/memory_object.h>
90 /* For memory_object_data_{request,unlock} */
91#include <kern/mach_param.h>
92#include <kern/macro_help.h>
93#include <kern/zalloc.h>
94#include <kern/misc_protos.h>
95
96#include <sys/kdebug.h>
97
98#define VM_FAULT_CLASSIFY 0
99#define VM_FAULT_STATIC_CONFIG 1
100
101#define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
102
103int vm_object_absent_max = 50;
104
105int vm_fault_debug = 0;
106boolean_t vm_page_deactivate_behind = TRUE;
107
1c79356b
A
108
109#if !VM_FAULT_STATIC_CONFIG
110boolean_t vm_fault_dirty_handling = FALSE;
111boolean_t vm_fault_interruptible = FALSE;
112boolean_t software_reference_bits = TRUE;
113#endif
114
115#if MACH_KDB
116extern struct db_watchpoint *db_watchpoint_list;
117#endif /* MACH_KDB */
118
119/* Forward declarations of internal routines. */
120extern kern_return_t vm_fault_wire_fast(
121 vm_map_t map,
122 vm_offset_t va,
123 vm_map_entry_t entry,
9bccf70c
A
124 pmap_t pmap,
125 vm_offset_t pmap_addr);
1c79356b
A
126
127extern void vm_fault_continue(void);
128
129extern void vm_fault_copy_cleanup(
130 vm_page_t page,
131 vm_page_t top_page);
132
133extern void vm_fault_copy_dst_cleanup(
134 vm_page_t page);
135
136#if VM_FAULT_CLASSIFY
137extern void vm_fault_classify(vm_object_t object,
138 vm_object_offset_t offset,
139 vm_prot_t fault_type);
140
141extern void vm_fault_classify_init(void);
142#endif
143
144/*
145 * Routine: vm_fault_init
146 * Purpose:
147 * Initialize our private data structures.
148 */
149void
150vm_fault_init(void)
151{
152}
153
154/*
155 * Routine: vm_fault_cleanup
156 * Purpose:
157 * Clean up the result of vm_fault_page.
158 * Results:
159 * The paging reference for "object" is released.
160 * "object" is unlocked.
161 * If "top_page" is not null, "top_page" is
162 * freed and the paging reference for the object
163 * containing it is released.
164 *
165 * In/out conditions:
166 * "object" must be locked.
167 */
168void
169vm_fault_cleanup(
170 register vm_object_t object,
171 register vm_page_t top_page)
172{
173 vm_object_paging_end(object);
174 vm_object_unlock(object);
175
176 if (top_page != VM_PAGE_NULL) {
177 object = top_page->object;
178 vm_object_lock(object);
179 VM_PAGE_FREE(top_page);
180 vm_object_paging_end(object);
181 vm_object_unlock(object);
182 }
183}
184
185#if MACH_CLUSTER_STATS
186#define MAXCLUSTERPAGES 16
187struct {
188 unsigned long pages_in_cluster;
189 unsigned long pages_at_higher_offsets;
190 unsigned long pages_at_lower_offsets;
191} cluster_stats_in[MAXCLUSTERPAGES];
192#define CLUSTER_STAT(clause) clause
193#define CLUSTER_STAT_HIGHER(x) \
194 ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
195#define CLUSTER_STAT_LOWER(x) \
196 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
197#define CLUSTER_STAT_CLUSTER(x) \
198 ((cluster_stats_in[(x)].pages_in_cluster)++)
199#else /* MACH_CLUSTER_STATS */
200#define CLUSTER_STAT(clause)
201#endif /* MACH_CLUSTER_STATS */
202
203/* XXX - temporary */
204boolean_t vm_allow_clustered_pagein = FALSE;
205int vm_pagein_cluster_used = 0;
206
207/*
208 * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
209 */
210int vm_default_ahead = 1; /* Number of pages to prepage ahead */
211int vm_default_behind = 0; /* Number of pages to prepage behind */
212
213#define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
214
215/*
216 * Routine: vm_fault_page
217 * Purpose:
218 * Find the resident page for the virtual memory
219 * specified by the given virtual memory object
220 * and offset.
221 * Additional arguments:
222 * The required permissions for the page is given
223 * in "fault_type". Desired permissions are included
224 * in "protection". The minimum and maximum valid offsets
225 * within the object for the relevant map entry are
226 * passed in "lo_offset" and "hi_offset" respectively and
227 * the expected page reference pattern is passed in "behavior".
228 * These three parameters are used to determine pagein cluster
229 * limits.
230 *
231 * If the desired page is known to be resident (for
232 * example, because it was previously wired down), asserting
233 * the "unwiring" parameter will speed the search.
234 *
235 * If the operation can be interrupted (by thread_abort
236 * or thread_terminate), then the "interruptible"
237 * parameter should be asserted.
238 *
239 * Results:
240 * The page containing the proper data is returned
241 * in "result_page".
242 *
243 * In/out conditions:
244 * The source object must be locked and referenced,
245 * and must donate one paging reference. The reference
246 * is not affected. The paging reference and lock are
247 * consumed.
248 *
249 * If the call succeeds, the object in which "result_page"
250 * resides is left locked and holding a paging reference.
251 * If this is not the original object, a busy page in the
252 * original object is returned in "top_page", to prevent other
253 * callers from pursuing this same data, along with a paging
254 * reference for the original object. The "top_page" should
255 * be destroyed when this guarantee is no longer required.
256 * The "result_page" is also left busy. It is not removed
257 * from the pageout queues.
258 */
259
260vm_fault_return_t
261vm_fault_page(
262 /* Arguments: */
263 vm_object_t first_object, /* Object to begin search */
264 vm_object_offset_t first_offset, /* Offset into object */
265 vm_prot_t fault_type, /* What access is requested */
266 boolean_t must_be_resident,/* Must page be resident? */
267 int interruptible, /* how may fault be interrupted? */
268 vm_object_offset_t lo_offset, /* Map entry start */
269 vm_object_offset_t hi_offset, /* Map entry end */
270 vm_behavior_t behavior, /* Page reference behavior */
271 /* Modifies in place: */
272 vm_prot_t *protection, /* Protection for mapping */
273 /* Returns: */
274 vm_page_t *result_page, /* Page found, if successful */
275 vm_page_t *top_page, /* Page in top object, if
276 * not result_page. */
277 int *type_of_fault, /* if non-null, fill in with type of fault
278 * COW, zero-fill, etc... returned in trace point */
279 /* More arguments: */
280 kern_return_t *error_code, /* code if page is in error */
281 boolean_t no_zero_fill, /* don't zero fill absent pages */
0b4e3aa0 282 boolean_t data_supply, /* treat as data_supply if
1c79356b
A
283 * it is a write fault and a full
284 * page is provided */
0b4e3aa0
A
285 vm_map_t map,
286 vm_offset_t vaddr)
1c79356b
A
287{
288 register
289 vm_page_t m;
290 register
291 vm_object_t object;
292 register
293 vm_object_offset_t offset;
294 vm_page_t first_m;
295 vm_object_t next_object;
296 vm_object_t copy_object;
297 boolean_t look_for_page;
298 vm_prot_t access_required = fault_type;
299 vm_prot_t wants_copy_flag;
300 vm_size_t cluster_size, length;
301 vm_object_offset_t cluster_offset;
302 vm_object_offset_t cluster_start, cluster_end, paging_offset;
303 vm_object_offset_t align_offset;
304 CLUSTER_STAT(int pages_at_higher_offsets;)
305 CLUSTER_STAT(int pages_at_lower_offsets;)
306 kern_return_t wait_result;
1c79356b 307 boolean_t interruptible_state;
0b4e3aa0 308 boolean_t bumped_pagein = FALSE;
1c79356b 309
1c79356b
A
310
311#if MACH_PAGEMAP
312/*
313 * MACH page map - an optional optimization where a bit map is maintained
314 * by the VM subsystem for internal objects to indicate which pages of
315 * the object currently reside on backing store. This existence map
316 * duplicates information maintained by the vnode pager. It is
317 * created at the time of the first pageout against the object, i.e.
318 * at the same time pager for the object is created. The optimization
319 * is designed to eliminate pager interaction overhead, if it is
320 * 'known' that the page does not exist on backing store.
321 *
322 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
323 * either marked as paged out in the existence map for the object or no
324 * existence map exists for the object. LOOK_FOR() is one of the
325 * criteria in the decision to invoke the pager. It is also used as one
326 * of the criteria to terminate the scan for adjacent pages in a clustered
327 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
328 * permanent objects. Note also that if the pager for an internal object
329 * has not been created, the pager is not invoked regardless of the value
330 * of LOOK_FOR() and that clustered pagein scans are only done on an object
331 * for which a pager has been created.
332 *
333 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
334 * is marked as paged out in the existence map for the object. PAGED_OUT()
335 * PAGED_OUT() is used to determine if a page has already been pushed
336 * into a copy object in order to avoid a redundant page out operation.
337 */
338#define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
339 != VM_EXTERNAL_STATE_ABSENT)
340#define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
341 == VM_EXTERNAL_STATE_EXISTS)
342#else /* MACH_PAGEMAP */
343/*
344 * If the MACH page map optimization is not enabled,
345 * LOOK_FOR() always evaluates to TRUE. The pager will always be
346 * invoked to resolve missing pages in an object, assuming the pager
347 * has been created for the object. In a clustered page operation, the
348 * absence of a page on backing backing store cannot be used to terminate
349 * a scan for adjacent pages since that information is available only in
350 * the pager. Hence pages that may not be paged out are potentially
351 * included in a clustered request. The vnode pager is coded to deal
352 * with any combination of absent/present pages in a clustered
353 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
354 * will always be invoked to push a dirty page into a copy object assuming
355 * a pager has been created. If the page has already been pushed, the
356 * pager will ingore the new request.
357 */
358#define LOOK_FOR(o, f) TRUE
359#define PAGED_OUT(o, f) FALSE
360#endif /* MACH_PAGEMAP */
361
362/*
363 * Recovery actions
364 */
365#define PREPARE_RELEASE_PAGE(m) \
366 MACRO_BEGIN \
367 vm_page_lock_queues(); \
368 MACRO_END
369
370#define DO_RELEASE_PAGE(m) \
371 MACRO_BEGIN \
372 PAGE_WAKEUP_DONE(m); \
373 if (!m->active && !m->inactive) \
374 vm_page_activate(m); \
375 vm_page_unlock_queues(); \
376 MACRO_END
377
378#define RELEASE_PAGE(m) \
379 MACRO_BEGIN \
380 PREPARE_RELEASE_PAGE(m); \
381 DO_RELEASE_PAGE(m); \
382 MACRO_END
383
384#if TRACEFAULTPAGE
385 dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
386#endif
387
388
389
390#if !VM_FAULT_STATIC_CONFIG
391 if (vm_fault_dirty_handling
392#if MACH_KDB
393 /*
394 * If there are watchpoints set, then
395 * we don't want to give away write permission
396 * on a read fault. Make the task write fault,
397 * so that the watchpoint code notices the access.
398 */
399 || db_watchpoint_list
400#endif /* MACH_KDB */
401 ) {
402 /*
403 * If we aren't asking for write permission,
404 * then don't give it away. We're using write
405 * faults to set the dirty bit.
406 */
407 if (!(fault_type & VM_PROT_WRITE))
408 *protection &= ~VM_PROT_WRITE;
409 }
410
411 if (!vm_fault_interruptible)
412 interruptible = THREAD_UNINT;
413#else /* STATIC_CONFIG */
414#if MACH_KDB
415 /*
416 * If there are watchpoints set, then
417 * we don't want to give away write permission
418 * on a read fault. Make the task write fault,
419 * so that the watchpoint code notices the access.
420 */
421 if (db_watchpoint_list) {
422 /*
423 * If we aren't asking for write permission,
424 * then don't give it away. We're using write
425 * faults to set the dirty bit.
426 */
427 if (!(fault_type & VM_PROT_WRITE))
428 *protection &= ~VM_PROT_WRITE;
429 }
430
431#endif /* MACH_KDB */
432#endif /* STATIC_CONFIG */
433
9bccf70c 434 interruptible_state = thread_interrupt_level(interruptible);
1c79356b
A
435
436 /*
437 * INVARIANTS (through entire routine):
438 *
439 * 1) At all times, we must either have the object
440 * lock or a busy page in some object to prevent
441 * some other thread from trying to bring in
442 * the same page.
443 *
444 * Note that we cannot hold any locks during the
445 * pager access or when waiting for memory, so
446 * we use a busy page then.
447 *
448 * Note also that we aren't as concerned about more than
449 * one thread attempting to memory_object_data_unlock
450 * the same page at once, so we don't hold the page
451 * as busy then, but do record the highest unlock
452 * value so far. [Unlock requests may also be delivered
453 * out of order.]
454 *
455 * 2) To prevent another thread from racing us down the
456 * shadow chain and entering a new page in the top
457 * object before we do, we must keep a busy page in
458 * the top object while following the shadow chain.
459 *
460 * 3) We must increment paging_in_progress on any object
461 * for which we have a busy page
462 *
463 * 4) We leave busy pages on the pageout queues.
464 * If the pageout daemon comes across a busy page,
465 * it will remove the page from the pageout queues.
466 */
467
468 /*
469 * Search for the page at object/offset.
470 */
471
472 object = first_object;
473 offset = first_offset;
474 first_m = VM_PAGE_NULL;
475 access_required = fault_type;
476
477 XPR(XPR_VM_FAULT,
478 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
479 (integer_t)object, offset, fault_type, *protection, 0);
480
481 /*
482 * See whether this page is resident
483 */
484
485 while (TRUE) {
486#if TRACEFAULTPAGE
487 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
488#endif
489 if (!object->alive) {
490 vm_fault_cleanup(object, first_m);
9bccf70c 491 thread_interrupt_level(interruptible_state);
1c79356b
A
492 return(VM_FAULT_MEMORY_ERROR);
493 }
494 m = vm_page_lookup(object, offset);
495#if TRACEFAULTPAGE
496 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
497#endif
498 if (m != VM_PAGE_NULL) {
499 /*
500 * If the page was pre-paged as part of a
501 * cluster, record the fact.
502 */
503 if (m->clustered) {
504 vm_pagein_cluster_used++;
505 m->clustered = FALSE;
506 }
507
508 /*
509 * If the page is being brought in,
510 * wait for it and then retry.
511 *
512 * A possible optimization: if the page
513 * is known to be resident, we can ignore
514 * pages that are absent (regardless of
515 * whether they're busy).
516 */
517
518 if (m->busy) {
519#if TRACEFAULTPAGE
520 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
521#endif
9bccf70c 522 wait_result = PAGE_SLEEP(object, m, interruptible);
1c79356b
A
523 XPR(XPR_VM_FAULT,
524 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
525 (integer_t)object, offset,
526 (integer_t)m, 0, 0);
527 counter(c_vm_fault_page_block_busy_kernel++);
1c79356b 528
1c79356b
A
529 if (wait_result != THREAD_AWAKENED) {
530 vm_fault_cleanup(object, first_m);
9bccf70c 531 thread_interrupt_level(interruptible_state);
1c79356b
A
532 if (wait_result == THREAD_RESTART)
533 {
534 return(VM_FAULT_RETRY);
535 }
536 else
537 {
538 return(VM_FAULT_INTERRUPTED);
539 }
540 }
541 continue;
542 }
543
544 /*
545 * If the page is in error, give up now.
546 */
547
548 if (m->error) {
549#if TRACEFAULTPAGE
550 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code); /* (TEST/DEBUG) */
551#endif
552 if (error_code)
553 *error_code = m->page_error;
554 VM_PAGE_FREE(m);
555 vm_fault_cleanup(object, first_m);
9bccf70c 556 thread_interrupt_level(interruptible_state);
1c79356b
A
557 return(VM_FAULT_MEMORY_ERROR);
558 }
559
560 /*
561 * If the pager wants us to restart
562 * at the top of the chain,
563 * typically because it has moved the
564 * page to another pager, then do so.
565 */
566
567 if (m->restart) {
568#if TRACEFAULTPAGE
569 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
570#endif
571 VM_PAGE_FREE(m);
572 vm_fault_cleanup(object, first_m);
9bccf70c 573 thread_interrupt_level(interruptible_state);
1c79356b
A
574 return(VM_FAULT_RETRY);
575 }
576
577 /*
578 * If the page isn't busy, but is absent,
579 * then it was deemed "unavailable".
580 */
581
582 if (m->absent) {
583 /*
584 * Remove the non-existent page (unless it's
585 * in the top object) and move on down to the
586 * next object (if there is one).
587 */
588#if TRACEFAULTPAGE
589 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow); /* (TEST/DEBUG) */
590#endif
591
592 next_object = object->shadow;
593 if (next_object == VM_OBJECT_NULL) {
594 vm_page_t real_m;
595
596 assert(!must_be_resident);
597
598 if (object->shadow_severed) {
599 vm_fault_cleanup(
600 object, first_m);
9bccf70c 601 thread_interrupt_level(interruptible_state);
1c79356b
A
602 return VM_FAULT_MEMORY_ERROR;
603 }
604
605 /*
606 * Absent page at bottom of shadow
607 * chain; zero fill the page we left
608 * busy in the first object, and flush
609 * the absent page. But first we
610 * need to allocate a real page.
611 */
612 if (VM_PAGE_THROTTLED() ||
613 (real_m = vm_page_grab()) == VM_PAGE_NULL) {
614 vm_fault_cleanup(object, first_m);
9bccf70c 615 thread_interrupt_level(interruptible_state);
1c79356b
A
616 return(VM_FAULT_MEMORY_SHORTAGE);
617 }
618
619 XPR(XPR_VM_FAULT,
620 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
621 (integer_t)object, offset,
622 (integer_t)m,
623 (integer_t)first_object, 0);
624 if (object != first_object) {
625 VM_PAGE_FREE(m);
626 vm_object_paging_end(object);
627 vm_object_unlock(object);
628 object = first_object;
629 offset = first_offset;
630 m = first_m;
631 first_m = VM_PAGE_NULL;
632 vm_object_lock(object);
633 }
634
635 VM_PAGE_FREE(m);
636 assert(real_m->busy);
637 vm_page_insert(real_m, object, offset);
638 m = real_m;
639
640 /*
641 * Drop the lock while zero filling
642 * page. Then break because this
643 * is the page we wanted. Checking
644 * the page lock is a waste of time;
645 * this page was either absent or
646 * newly allocated -- in both cases
647 * it can't be page locked by a pager.
648 */
0b4e3aa0
A
649 m->no_isync = FALSE;
650
1c79356b
A
651 if (!no_zero_fill) {
652 vm_object_unlock(object);
653 vm_page_zero_fill(m);
654 if (type_of_fault)
655 *type_of_fault = DBG_ZERO_FILL_FAULT;
656 VM_STAT(zero_fill_count++);
0b4e3aa0
A
657
658 if (bumped_pagein == TRUE) {
659 VM_STAT(pageins--);
660 current_task()->pageins--;
661 }
1c79356b
A
662 vm_object_lock(object);
663 }
de355530 664 pmap_clear_modify(m->phys_addr);
1c79356b
A
665 vm_page_lock_queues();
666 VM_PAGE_QUEUES_REMOVE(m);
0b4e3aa0 667 m->page_ticket = vm_page_ticket;
9bccf70c
A
668 if(m->object->size > 0x80000) {
669 m->zero_fill = TRUE;
670 /* depends on the queues lock */
671 vm_zf_count += 1;
672 queue_enter(&vm_page_queue_zf,
673 m, vm_page_t, pageq);
674 } else {
675 queue_enter(
676 &vm_page_queue_inactive,
677 m, vm_page_t, pageq);
678 }
0b4e3aa0
A
679 vm_page_ticket_roll++;
680 if(vm_page_ticket_roll ==
681 VM_PAGE_TICKETS_IN_ROLL) {
682 vm_page_ticket_roll = 0;
683 if(vm_page_ticket ==
684 VM_PAGE_TICKET_ROLL_IDS)
685 vm_page_ticket= 0;
686 else
687 vm_page_ticket++;
688 }
1c79356b
A
689 m->inactive = TRUE;
690 vm_page_inactive_count++;
691 vm_page_unlock_queues();
692 break;
693 } else {
694 if (must_be_resident) {
695 vm_object_paging_end(object);
696 } else if (object != first_object) {
697 vm_object_paging_end(object);
698 VM_PAGE_FREE(m);
699 } else {
700 first_m = m;
701 m->absent = FALSE;
702 m->unusual = FALSE;
703 vm_object_absent_release(object);
704 m->busy = TRUE;
705
706 vm_page_lock_queues();
707 VM_PAGE_QUEUES_REMOVE(m);
708 vm_page_unlock_queues();
709 }
710 XPR(XPR_VM_FAULT,
711 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
712 (integer_t)object, offset,
713 (integer_t)next_object,
714 offset+object->shadow_offset,0);
715 offset += object->shadow_offset;
716 hi_offset += object->shadow_offset;
717 lo_offset += object->shadow_offset;
718 access_required = VM_PROT_READ;
719 vm_object_lock(next_object);
720 vm_object_unlock(object);
721 object = next_object;
722 vm_object_paging_begin(object);
723 continue;
724 }
725 }
726
727 if ((m->cleaning)
728 && ((object != first_object) ||
729 (object->copy != VM_OBJECT_NULL))
730 && (fault_type & VM_PROT_WRITE)) {
731 /*
732 * This is a copy-on-write fault that will
733 * cause us to revoke access to this page, but
734 * this page is in the process of being cleaned
735 * in a clustered pageout. We must wait until
736 * the cleaning operation completes before
737 * revoking access to the original page,
738 * otherwise we might attempt to remove a
739 * wired mapping.
740 */
741#if TRACEFAULTPAGE
742 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset); /* (TEST/DEBUG) */
743#endif
744 XPR(XPR_VM_FAULT,
745 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
746 (integer_t)object, offset,
747 (integer_t)m, 0, 0);
748 /* take an extra ref so that object won't die */
749 assert(object->ref_count > 0);
750 object->ref_count++;
751 vm_object_res_reference(object);
752 vm_fault_cleanup(object, first_m);
753 counter(c_vm_fault_page_block_backoff_kernel++);
754 vm_object_lock(object);
755 assert(object->ref_count > 0);
756 m = vm_page_lookup(object, offset);
757 if (m != VM_PAGE_NULL && m->cleaning) {
758 PAGE_ASSERT_WAIT(m, interruptible);
759 vm_object_unlock(object);
9bccf70c 760 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
761 vm_object_deallocate(object);
762 goto backoff;
763 } else {
764 vm_object_unlock(object);
765 vm_object_deallocate(object);
9bccf70c 766 thread_interrupt_level(interruptible_state);
1c79356b
A
767 return VM_FAULT_RETRY;
768 }
769 }
770
771 /*
772 * If the desired access to this page has
773 * been locked out, request that it be unlocked.
774 */
775
776 if (access_required & m->page_lock) {
777 if ((access_required & m->unlock_request) != access_required) {
778 vm_prot_t new_unlock_request;
779 kern_return_t rc;
780
781#if TRACEFAULTPAGE
782 dbgTrace(0xBEEF000A, (unsigned int) m, (unsigned int) object->pager_ready); /* (TEST/DEBUG) */
783#endif
784 if (!object->pager_ready) {
785 XPR(XPR_VM_FAULT,
786 "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
787 access_required,
788 (integer_t)object, offset,
789 (integer_t)m, 0);
790 /* take an extra ref */
791 assert(object->ref_count > 0);
792 object->ref_count++;
793 vm_object_res_reference(object);
794 vm_fault_cleanup(object,
795 first_m);
796 counter(c_vm_fault_page_block_backoff_kernel++);
797 vm_object_lock(object);
798 assert(object->ref_count > 0);
799 if (!object->pager_ready) {
9bccf70c 800 wait_result = vm_object_assert_wait(
1c79356b
A
801 object,
802 VM_OBJECT_EVENT_PAGER_READY,
803 interruptible);
804 vm_object_unlock(object);
9bccf70c
A
805 if (wait_result == THREAD_WAITING)
806 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
807 vm_object_deallocate(object);
808 goto backoff;
809 } else {
810 vm_object_unlock(object);
811 vm_object_deallocate(object);
9bccf70c 812 thread_interrupt_level(interruptible_state);
1c79356b
A
813 return VM_FAULT_RETRY;
814 }
815 }
816
817 new_unlock_request = m->unlock_request =
818 (access_required | m->unlock_request);
819 vm_object_unlock(object);
820 XPR(XPR_VM_FAULT,
821 "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
822 (integer_t)object, offset,
823 (integer_t)m, new_unlock_request, 0);
824 if ((rc = memory_object_data_unlock(
825 object->pager,
1c79356b
A
826 offset + object->paging_offset,
827 PAGE_SIZE,
828 new_unlock_request))
829 != KERN_SUCCESS) {
830 if (vm_fault_debug)
831 printf("vm_fault: memory_object_data_unlock failed\n");
832 vm_object_lock(object);
833 vm_fault_cleanup(object, first_m);
9bccf70c 834 thread_interrupt_level(interruptible_state);
1c79356b
A
835 return((rc == MACH_SEND_INTERRUPTED) ?
836 VM_FAULT_INTERRUPTED :
837 VM_FAULT_MEMORY_ERROR);
838 }
839 vm_object_lock(object);
840 continue;
841 }
842
843 XPR(XPR_VM_FAULT,
844 "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
845 access_required, (integer_t)object,
846 offset, (integer_t)m, 0);
847 /* take an extra ref so object won't die */
848 assert(object->ref_count > 0);
849 object->ref_count++;
850 vm_object_res_reference(object);
851 vm_fault_cleanup(object, first_m);
852 counter(c_vm_fault_page_block_backoff_kernel++);
853 vm_object_lock(object);
854 assert(object->ref_count > 0);
855 m = vm_page_lookup(object, offset);
856 if (m != VM_PAGE_NULL &&
857 (access_required & m->page_lock) &&
858 !((access_required & m->unlock_request) != access_required)) {
859 PAGE_ASSERT_WAIT(m, interruptible);
860 vm_object_unlock(object);
9bccf70c 861 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
862 vm_object_deallocate(object);
863 goto backoff;
864 } else {
865 vm_object_unlock(object);
866 vm_object_deallocate(object);
9bccf70c 867 thread_interrupt_level(interruptible_state);
1c79356b
A
868 return VM_FAULT_RETRY;
869 }
870 }
871 /*
872 * We mark the page busy and leave it on
873 * the pageout queues. If the pageout
874 * deamon comes across it, then it will
875 * remove the page.
876 */
877
878#if TRACEFAULTPAGE
879 dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
880#endif
881
882#if !VM_FAULT_STATIC_CONFIG
883 if (!software_reference_bits) {
884 vm_page_lock_queues();
885 if (m->inactive)
886 vm_stat.reactivations++;
887
888 VM_PAGE_QUEUES_REMOVE(m);
889 vm_page_unlock_queues();
890 }
891#endif
892 XPR(XPR_VM_FAULT,
893 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
894 (integer_t)object, offset, (integer_t)m, 0, 0);
895 assert(!m->busy);
896 m->busy = TRUE;
897 assert(!m->absent);
898 break;
899 }
900
901 look_for_page =
902 (object->pager_created) &&
903 LOOK_FOR(object, offset) &&
904 (!data_supply);
905
906#if TRACEFAULTPAGE
907 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object); /* (TEST/DEBUG) */
908#endif
909 if ((look_for_page || (object == first_object))
0b4e3aa0
A
910 && !must_be_resident
911 && !(object->phys_contiguous)) {
1c79356b
A
912 /*
913 * Allocate a new page for this object/offset
914 * pair.
915 */
916
917 m = vm_page_grab_fictitious();
918#if TRACEFAULTPAGE
919 dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
920#endif
921 if (m == VM_PAGE_NULL) {
922 vm_fault_cleanup(object, first_m);
9bccf70c 923 thread_interrupt_level(interruptible_state);
1c79356b
A
924 return(VM_FAULT_FICTITIOUS_SHORTAGE);
925 }
926 vm_page_insert(m, object, offset);
927 }
928
0b4e3aa0 929 if ((look_for_page && !must_be_resident)) {
1c79356b
A
930 kern_return_t rc;
931
932 /*
933 * If the memory manager is not ready, we
934 * cannot make requests.
935 */
936 if (!object->pager_ready) {
937#if TRACEFAULTPAGE
938 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
939#endif
0b4e3aa0
A
940 if(m != VM_PAGE_NULL)
941 VM_PAGE_FREE(m);
1c79356b
A
942 XPR(XPR_VM_FAULT,
943 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
944 (integer_t)object, offset, 0, 0, 0);
945 /* take an extra ref so object won't die */
946 assert(object->ref_count > 0);
947 object->ref_count++;
948 vm_object_res_reference(object);
949 vm_fault_cleanup(object, first_m);
950 counter(c_vm_fault_page_block_backoff_kernel++);
951 vm_object_lock(object);
952 assert(object->ref_count > 0);
953 if (!object->pager_ready) {
9bccf70c 954 wait_result = vm_object_assert_wait(object,
1c79356b
A
955 VM_OBJECT_EVENT_PAGER_READY,
956 interruptible);
957 vm_object_unlock(object);
9bccf70c
A
958 if (wait_result == THREAD_WAITING)
959 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
960 vm_object_deallocate(object);
961 goto backoff;
962 } else {
963 vm_object_unlock(object);
964 vm_object_deallocate(object);
9bccf70c 965 thread_interrupt_level(interruptible_state);
1c79356b
A
966 return VM_FAULT_RETRY;
967 }
968 }
969
0b4e3aa0
A
970 if(object->phys_contiguous) {
971 if(m != VM_PAGE_NULL) {
972 VM_PAGE_FREE(m);
973 m = VM_PAGE_NULL;
974 }
975 goto no_clustering;
976 }
1c79356b
A
977 if (object->internal) {
978 /*
979 * Requests to the default pager
980 * must reserve a real page in advance,
981 * because the pager's data-provided
982 * won't block for pages. IMPORTANT:
983 * this acts as a throttling mechanism
984 * for data_requests to the default
985 * pager.
986 */
987
988#if TRACEFAULTPAGE
989 dbgTrace(0xBEEF000F, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
990#endif
991 if (m->fictitious && !vm_page_convert(m)) {
992 VM_PAGE_FREE(m);
993 vm_fault_cleanup(object, first_m);
9bccf70c 994 thread_interrupt_level(interruptible_state);
1c79356b
A
995 return(VM_FAULT_MEMORY_SHORTAGE);
996 }
997 } else if (object->absent_count >
998 vm_object_absent_max) {
999 /*
1000 * If there are too many outstanding page
1001 * requests pending on this object, we
1002 * wait for them to be resolved now.
1003 */
1004
1005#if TRACEFAULTPAGE
1006 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1007#endif
0b4e3aa0
A
1008 if(m != VM_PAGE_NULL)
1009 VM_PAGE_FREE(m);
1c79356b
A
1010 /* take an extra ref so object won't die */
1011 assert(object->ref_count > 0);
1012 object->ref_count++;
1013 vm_object_res_reference(object);
1014 vm_fault_cleanup(object, first_m);
1015 counter(c_vm_fault_page_block_backoff_kernel++);
1016 vm_object_lock(object);
1017 assert(object->ref_count > 0);
1018 if (object->absent_count > vm_object_absent_max) {
1019 vm_object_absent_assert_wait(object,
1020 interruptible);
1021 vm_object_unlock(object);
9bccf70c 1022 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1023 vm_object_deallocate(object);
1024 goto backoff;
1025 } else {
1026 vm_object_unlock(object);
1027 vm_object_deallocate(object);
9bccf70c 1028 thread_interrupt_level(interruptible_state);
1c79356b
A
1029 return VM_FAULT_RETRY;
1030 }
1031 }
1032
1033 /*
1034 * Indicate that the page is waiting for data
1035 * from the memory manager.
1036 */
1037
0b4e3aa0
A
1038 if(m != VM_PAGE_NULL) {
1039
1040 m->list_req_pending = TRUE;
1041 m->absent = TRUE;
1042 m->unusual = TRUE;
1043 object->absent_count++;
1044
1045 }
1c79356b 1046
9bccf70c 1047no_clustering:
1c79356b
A
1048 cluster_start = offset;
1049 length = PAGE_SIZE;
1c79356b 1050
0b4e3aa0
A
1051 /*
1052 * lengthen the cluster by the pages in the working set
1053 */
1054 if((map != NULL) &&
1055 (current_task()->dynamic_working_set != 0)) {
1056 cluster_end = cluster_start + length;
1057 /* tws values for start and end are just a
1058 * suggestions. Therefore, as long as
1059 * build_cluster does not use pointers or
1060 * take action based on values that
1061 * could be affected by re-entrance we
1062 * do not need to take the map lock.
1063 */
9bccf70c 1064 cluster_end = offset + PAGE_SIZE_64;
0b4e3aa0
A
1065 tws_build_cluster((tws_hash_t)
1066 current_task()->dynamic_working_set,
1067 object, &cluster_start,
9bccf70c 1068 &cluster_end, 0x40000);
0b4e3aa0
A
1069 length = cluster_end - cluster_start;
1070 }
1c79356b
A
1071#if TRACEFAULTPAGE
1072 dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0); /* (TEST/DEBUG) */
1073#endif
1074 /*
1075 * We have a busy page, so we can
1076 * release the object lock.
1077 */
1078 vm_object_unlock(object);
1079
1080 /*
1081 * Call the memory manager to retrieve the data.
1082 */
1083
1084 if (type_of_fault)
9bccf70c 1085 *type_of_fault = (length << 8) | DBG_PAGEIN_FAULT;
1c79356b
A
1086 VM_STAT(pageins++);
1087 current_task()->pageins++;
0b4e3aa0 1088 bumped_pagein = TRUE;
1c79356b
A
1089
1090 /*
1091 * If this object uses a copy_call strategy,
1092 * and we are interested in a copy of this object
1093 * (having gotten here only by following a
1094 * shadow chain), then tell the memory manager
1095 * via a flag added to the desired_access
1096 * parameter, so that it can detect a race
1097 * between our walking down the shadow chain
1098 * and its pushing pages up into a copy of
1099 * the object that it manages.
1100 */
1101
1102 if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL &&
1103 object != first_object) {
1104 wants_copy_flag = VM_PROT_WANTS_COPY;
1105 } else {
1106 wants_copy_flag = VM_PROT_NONE;
1107 }
1108
1109 XPR(XPR_VM_FAULT,
1110 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1111 (integer_t)object, offset, (integer_t)m,
1112 access_required | wants_copy_flag, 0);
1113
1c79356b 1114 rc = memory_object_data_request(object->pager,
1c79356b
A
1115 cluster_start + object->paging_offset,
1116 length,
1117 access_required | wants_copy_flag);
1118
1c79356b
A
1119
1120#if TRACEFAULTPAGE
1121 dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
1122#endif
1123 if (rc != KERN_SUCCESS) {
1124 if (rc != MACH_SEND_INTERRUPTED
1125 && vm_fault_debug)
0b4e3aa0 1126 printf("%s(0x%x, 0x%x, 0x%x, 0x%x) failed, rc=%d\n",
1c79356b
A
1127 "memory_object_data_request",
1128 object->pager,
1c79356b 1129 cluster_start + object->paging_offset,
0b4e3aa0 1130 length, access_required, rc);
1c79356b
A
1131 /*
1132 * Don't want to leave a busy page around,
1133 * but the data request may have blocked,
1134 * so check if it's still there and busy.
1135 */
0b4e3aa0
A
1136 if(!object->phys_contiguous) {
1137 vm_object_lock(object);
1138 for (; length; length -= PAGE_SIZE,
1139 cluster_start += PAGE_SIZE_64) {
1140 vm_page_t p;
1141 if ((p = vm_page_lookup(object,
1c79356b 1142 cluster_start))
0b4e3aa0
A
1143 && p->absent && p->busy
1144 && p != first_m) {
1145 VM_PAGE_FREE(p);
1146 }
1147 }
1c79356b
A
1148 }
1149 vm_fault_cleanup(object, first_m);
9bccf70c 1150 thread_interrupt_level(interruptible_state);
1c79356b
A
1151 return((rc == MACH_SEND_INTERRUPTED) ?
1152 VM_FAULT_INTERRUPTED :
1153 VM_FAULT_MEMORY_ERROR);
0b4e3aa0
A
1154 } else {
1155#ifdef notdefcdy
1156 tws_hash_line_t line;
1157 task_t task;
1158
1159 task = current_task();
1160
1161 if((map != NULL) &&
9bccf70c
A
1162 (task->dynamic_working_set != 0))
1163 && !(object->private)) {
1164 vm_object_t base_object;
1165 vm_object_offset_t base_offset;
1166 base_object = object;
1167 base_offset = offset;
1168 while(base_object->shadow) {
1169 base_offset +=
1170 base_object->shadow_offset;
1171 base_object =
1172 base_object->shadow;
1173 }
0b4e3aa0
A
1174 if(tws_lookup
1175 ((tws_hash_t)
1176 task->dynamic_working_set,
9bccf70c 1177 base_offset, base_object,
0b4e3aa0
A
1178 &line) == KERN_SUCCESS) {
1179 tws_line_signal((tws_hash_t)
1180 task->dynamic_working_set,
1181 map, line, vaddr);
1182 }
1183 }
1184#endif
1c79356b
A
1185 }
1186
1187 /*
1188 * Retry with same object/offset, since new data may
1189 * be in a different page (i.e., m is meaningless at
1190 * this point).
1191 */
1192 vm_object_lock(object);
1193 if ((interruptible != THREAD_UNINT) &&
1194 (current_thread()->state & TH_ABORT)) {
1195 vm_fault_cleanup(object, first_m);
9bccf70c 1196 thread_interrupt_level(interruptible_state);
1c79356b
A
1197 return(VM_FAULT_INTERRUPTED);
1198 }
0b4e3aa0
A
1199 if(m == VM_PAGE_NULL)
1200 break;
1c79356b
A
1201 continue;
1202 }
1203
1204 /*
1205 * The only case in which we get here is if
1206 * object has no pager (or unwiring). If the pager doesn't
1207 * have the page this is handled in the m->absent case above
1208 * (and if you change things here you should look above).
1209 */
1210#if TRACEFAULTPAGE
1211 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1212#endif
1213 if (object == first_object)
1214 first_m = m;
1215 else
1216 assert(m == VM_PAGE_NULL);
1217
1218 XPR(XPR_VM_FAULT,
1219 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1220 (integer_t)object, offset, (integer_t)m,
1221 (integer_t)object->shadow, 0);
1222 /*
1223 * Move on to the next object. Lock the next
1224 * object before unlocking the current one.
1225 */
1226 next_object = object->shadow;
1227 if (next_object == VM_OBJECT_NULL) {
1228 assert(!must_be_resident);
1229 /*
1230 * If there's no object left, fill the page
1231 * in the top object with zeros. But first we
1232 * need to allocate a real page.
1233 */
1234
1235 if (object != first_object) {
1236 vm_object_paging_end(object);
1237 vm_object_unlock(object);
1238
1239 object = first_object;
1240 offset = first_offset;
1241 vm_object_lock(object);
1242 }
1243
1244 m = first_m;
1245 assert(m->object == object);
1246 first_m = VM_PAGE_NULL;
1247
1248 if (object->shadow_severed) {
1249 VM_PAGE_FREE(m);
1250 vm_fault_cleanup(object, VM_PAGE_NULL);
9bccf70c 1251 thread_interrupt_level(interruptible_state);
1c79356b
A
1252 return VM_FAULT_MEMORY_ERROR;
1253 }
1254
1255 if (VM_PAGE_THROTTLED() ||
1256 (m->fictitious && !vm_page_convert(m))) {
1257 VM_PAGE_FREE(m);
1258 vm_fault_cleanup(object, VM_PAGE_NULL);
9bccf70c 1259 thread_interrupt_level(interruptible_state);
1c79356b
A
1260 return(VM_FAULT_MEMORY_SHORTAGE);
1261 }
0b4e3aa0 1262 m->no_isync = FALSE;
1c79356b
A
1263
1264 if (!no_zero_fill) {
1265 vm_object_unlock(object);
1266 vm_page_zero_fill(m);
1267 if (type_of_fault)
1268 *type_of_fault = DBG_ZERO_FILL_FAULT;
1269 VM_STAT(zero_fill_count++);
0b4e3aa0
A
1270
1271 if (bumped_pagein == TRUE) {
1272 VM_STAT(pageins--);
1273 current_task()->pageins--;
1274 }
1c79356b
A
1275 vm_object_lock(object);
1276 }
1277 vm_page_lock_queues();
1278 VM_PAGE_QUEUES_REMOVE(m);
9bccf70c
A
1279 if(m->object->size > 0x80000) {
1280 m->zero_fill = TRUE;
1281 /* depends on the queues lock */
1282 vm_zf_count += 1;
1283 queue_enter(&vm_page_queue_zf,
1284 m, vm_page_t, pageq);
1285 } else {
1286 queue_enter(
1287 &vm_page_queue_inactive,
1288 m, vm_page_t, pageq);
1289 }
0b4e3aa0
A
1290 m->page_ticket = vm_page_ticket;
1291 vm_page_ticket_roll++;
1292 if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
1293 vm_page_ticket_roll = 0;
1294 if(vm_page_ticket ==
1295 VM_PAGE_TICKET_ROLL_IDS)
1296 vm_page_ticket= 0;
1297 else
1298 vm_page_ticket++;
1299 }
1c79356b
A
1300 m->inactive = TRUE;
1301 vm_page_inactive_count++;
1302 vm_page_unlock_queues();
de355530 1303 pmap_clear_modify(m->phys_addr);
1c79356b
A
1304 break;
1305 }
1306 else {
1307 if ((object != first_object) || must_be_resident)
1308 vm_object_paging_end(object);
1309 offset += object->shadow_offset;
1310 hi_offset += object->shadow_offset;
1311 lo_offset += object->shadow_offset;
1312 access_required = VM_PROT_READ;
1313 vm_object_lock(next_object);
1314 vm_object_unlock(object);
1315 object = next_object;
1316 vm_object_paging_begin(object);
1317 }
1318 }
1319
1320 /*
1321 * PAGE HAS BEEN FOUND.
1322 *
1323 * This page (m) is:
1324 * busy, so that we can play with it;
1325 * not absent, so that nobody else will fill it;
1326 * possibly eligible for pageout;
1327 *
1328 * The top-level page (first_m) is:
1329 * VM_PAGE_NULL if the page was found in the
1330 * top-level object;
1331 * busy, not absent, and ineligible for pageout.
1332 *
1333 * The current object (object) is locked. A paging
1334 * reference is held for the current and top-level
1335 * objects.
1336 */
1337
1338#if TRACEFAULTPAGE
1339 dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1340#endif
1341#if EXTRA_ASSERTIONS
0b4e3aa0
A
1342 if(m != VM_PAGE_NULL) {
1343 assert(m->busy && !m->absent);
1344 assert((first_m == VM_PAGE_NULL) ||
1345 (first_m->busy && !first_m->absent &&
1346 !first_m->active && !first_m->inactive));
1347 }
1c79356b
A
1348#endif /* EXTRA_ASSERTIONS */
1349
1350 XPR(XPR_VM_FAULT,
1351 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1352 (integer_t)object, offset, (integer_t)m,
1353 (integer_t)first_object, (integer_t)first_m);
1354 /*
1355 * If the page is being written, but isn't
1356 * already owned by the top-level object,
1357 * we have to copy it into a new page owned
1358 * by the top-level object.
1359 */
1360
0b4e3aa0 1361 if ((object != first_object) && (m != VM_PAGE_NULL)) {
1c79356b
A
1362 /*
1363 * We only really need to copy if we
1364 * want to write it.
1365 */
1366
1367#if TRACEFAULTPAGE
1368 dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1369#endif
1370 if (fault_type & VM_PROT_WRITE) {
1371 vm_page_t copy_m;
1372
1373 assert(!must_be_resident);
1374
1375 /*
1376 * If we try to collapse first_object at this
1377 * point, we may deadlock when we try to get
1378 * the lock on an intermediate object (since we
1379 * have the bottom object locked). We can't
1380 * unlock the bottom object, because the page
1381 * we found may move (by collapse) if we do.
1382 *
1383 * Instead, we first copy the page. Then, when
1384 * we have no more use for the bottom object,
1385 * we unlock it and try to collapse.
1386 *
1387 * Note that we copy the page even if we didn't
1388 * need to... that's the breaks.
1389 */
1390
1391 /*
1392 * Allocate a page for the copy
1393 */
1394 copy_m = vm_page_grab();
1395 if (copy_m == VM_PAGE_NULL) {
1396 RELEASE_PAGE(m);
1397 vm_fault_cleanup(object, first_m);
9bccf70c 1398 thread_interrupt_level(interruptible_state);
1c79356b
A
1399 return(VM_FAULT_MEMORY_SHORTAGE);
1400 }
1401
1402
1403 XPR(XPR_VM_FAULT,
1404 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1405 (integer_t)object, offset,
1406 (integer_t)m, (integer_t)copy_m, 0);
1407 vm_page_copy(m, copy_m);
1408
1409 /*
1410 * If another map is truly sharing this
1411 * page with us, we have to flush all
1412 * uses of the original page, since we
1413 * can't distinguish those which want the
1414 * original from those which need the
1415 * new copy.
1416 *
1417 * XXXO If we know that only one map has
1418 * access to this page, then we could
1419 * avoid the pmap_page_protect() call.
1420 */
1421
1422 vm_page_lock_queues();
1423 assert(!m->cleaning);
de355530 1424 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
1c79356b
A
1425 vm_page_deactivate(m);
1426 copy_m->dirty = TRUE;
1427 /*
1428 * Setting reference here prevents this fault from
1429 * being counted as a (per-thread) reactivate as well
1430 * as a copy-on-write.
1431 */
1432 first_m->reference = TRUE;
1433 vm_page_unlock_queues();
1434
1435 /*
1436 * We no longer need the old page or object.
1437 */
1438
1439 PAGE_WAKEUP_DONE(m);
1440 vm_object_paging_end(object);
1441 vm_object_unlock(object);
1442
1443 if (type_of_fault)
1444 *type_of_fault = DBG_COW_FAULT;
1445 VM_STAT(cow_faults++);
1446 current_task()->cow_faults++;
1447 object = first_object;
1448 offset = first_offset;
1449
1450 vm_object_lock(object);
1451 VM_PAGE_FREE(first_m);
1452 first_m = VM_PAGE_NULL;
1453 assert(copy_m->busy);
1454 vm_page_insert(copy_m, object, offset);
1455 m = copy_m;
1456
1457 /*
1458 * Now that we've gotten the copy out of the
1459 * way, let's try to collapse the top object.
1460 * But we have to play ugly games with
1461 * paging_in_progress to do that...
1462 */
1463
1464 vm_object_paging_end(object);
1465 vm_object_collapse(object);
1466 vm_object_paging_begin(object);
1467
1468 }
1469 else {
1470 *protection &= (~VM_PROT_WRITE);
1471 }
1472 }
1473
1474 /*
1475 * Now check whether the page needs to be pushed into the
1476 * copy object. The use of asymmetric copy on write for
1477 * shared temporary objects means that we may do two copies to
1478 * satisfy the fault; one above to get the page from a
1479 * shadowed object, and one here to push it into the copy.
1480 */
1481
9bccf70c 1482 while ((copy_object = first_object->copy) != VM_OBJECT_NULL &&
0b4e3aa0 1483 (m!= VM_PAGE_NULL)) {
1c79356b
A
1484 vm_object_offset_t copy_offset;
1485 vm_page_t copy_m;
1486
1487#if TRACEFAULTPAGE
1488 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1489#endif
1490 /*
1491 * If the page is being written, but hasn't been
1492 * copied to the copy-object, we have to copy it there.
1493 */
1494
1495 if ((fault_type & VM_PROT_WRITE) == 0) {
1496 *protection &= ~VM_PROT_WRITE;
1497 break;
1498 }
1499
1500 /*
1501 * If the page was guaranteed to be resident,
1502 * we must have already performed the copy.
1503 */
1504
1505 if (must_be_resident)
1506 break;
1507
1508 /*
1509 * Try to get the lock on the copy_object.
1510 */
1511 if (!vm_object_lock_try(copy_object)) {
1512 vm_object_unlock(object);
1513
1514 mutex_pause(); /* wait a bit */
1515
1516 vm_object_lock(object);
1517 continue;
1518 }
1519
1520 /*
1521 * Make another reference to the copy-object,
1522 * to keep it from disappearing during the
1523 * copy.
1524 */
1525 assert(copy_object->ref_count > 0);
1526 copy_object->ref_count++;
1527 VM_OBJ_RES_INCR(copy_object);
1528
1529 /*
1530 * Does the page exist in the copy?
1531 */
1532 copy_offset = first_offset - copy_object->shadow_offset;
1533 if (copy_object->size <= copy_offset)
1534 /*
1535 * Copy object doesn't cover this page -- do nothing.
1536 */
1537 ;
1538 else if ((copy_m =
1539 vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
1540 /* Page currently exists in the copy object */
1541 if (copy_m->busy) {
1542 /*
1543 * If the page is being brought
1544 * in, wait for it and then retry.
1545 */
1546 RELEASE_PAGE(m);
1547 /* take an extra ref so object won't die */
1548 assert(copy_object->ref_count > 0);
1549 copy_object->ref_count++;
1550 vm_object_res_reference(copy_object);
1551 vm_object_unlock(copy_object);
1552 vm_fault_cleanup(object, first_m);
1553 counter(c_vm_fault_page_block_backoff_kernel++);
1554 vm_object_lock(copy_object);
1555 assert(copy_object->ref_count > 0);
1556 VM_OBJ_RES_DECR(copy_object);
1557 copy_object->ref_count--;
1558 assert(copy_object->ref_count > 0);
1559 copy_m = vm_page_lookup(copy_object, copy_offset);
1560 if (copy_m != VM_PAGE_NULL && copy_m->busy) {
1561 PAGE_ASSERT_WAIT(copy_m, interruptible);
1562 vm_object_unlock(copy_object);
9bccf70c 1563 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1564 vm_object_deallocate(copy_object);
1565 goto backoff;
1566 } else {
1567 vm_object_unlock(copy_object);
1568 vm_object_deallocate(copy_object);
9bccf70c 1569 thread_interrupt_level(interruptible_state);
1c79356b
A
1570 return VM_FAULT_RETRY;
1571 }
1572 }
1573 }
1574 else if (!PAGED_OUT(copy_object, copy_offset)) {
1575 /*
1576 * If PAGED_OUT is TRUE, then the page used to exist
1577 * in the copy-object, and has already been paged out.
1578 * We don't need to repeat this. If PAGED_OUT is
1579 * FALSE, then either we don't know (!pager_created,
1580 * for example) or it hasn't been paged out.
1581 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1582 * We must copy the page to the copy object.
1583 */
1584
1585 /*
1586 * Allocate a page for the copy
1587 */
1588 copy_m = vm_page_alloc(copy_object, copy_offset);
1589 if (copy_m == VM_PAGE_NULL) {
1590 RELEASE_PAGE(m);
1591 VM_OBJ_RES_DECR(copy_object);
1592 copy_object->ref_count--;
1593 assert(copy_object->ref_count > 0);
1594 vm_object_unlock(copy_object);
1595 vm_fault_cleanup(object, first_m);
9bccf70c 1596 thread_interrupt_level(interruptible_state);
1c79356b
A
1597 return(VM_FAULT_MEMORY_SHORTAGE);
1598 }
1599
1600 /*
1601 * Must copy page into copy-object.
1602 */
1603
1604 vm_page_copy(m, copy_m);
1605
1606 /*
1607 * If the old page was in use by any users
1608 * of the copy-object, it must be removed
1609 * from all pmaps. (We can't know which
1610 * pmaps use it.)
1611 */
1612
1613 vm_page_lock_queues();
1614 assert(!m->cleaning);
de355530 1615 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
1c79356b
A
1616 copy_m->dirty = TRUE;
1617 vm_page_unlock_queues();
1618
1619 /*
1620 * If there's a pager, then immediately
1621 * page out this page, using the "initialize"
1622 * option. Else, we use the copy.
1623 */
1624
1625 if
1626#if MACH_PAGEMAP
1627 ((!copy_object->pager_created) ||
1628 vm_external_state_get(
1629 copy_object->existence_map, copy_offset)
1630 == VM_EXTERNAL_STATE_ABSENT)
1631#else
1632 (!copy_object->pager_created)
1633#endif
1634 {
1635 vm_page_lock_queues();
1636 vm_page_activate(copy_m);
1637 vm_page_unlock_queues();
1638 PAGE_WAKEUP_DONE(copy_m);
1639 }
1640 else {
1641 assert(copy_m->busy == TRUE);
1642
1643 /*
1644 * The page is already ready for pageout:
1645 * not on pageout queues and busy.
1646 * Unlock everything except the
1647 * copy_object itself.
1648 */
1649
1650 vm_object_unlock(object);
1651
1652 /*
1653 * Write the page to the copy-object,
1654 * flushing it from the kernel.
1655 */
1656
1657 vm_pageout_initialize_page(copy_m);
1658
1659 /*
1660 * Since the pageout may have
1661 * temporarily dropped the
1662 * copy_object's lock, we
1663 * check whether we'll have
1664 * to deallocate the hard way.
1665 */
1666
1667 if ((copy_object->shadow != object) ||
1668 (copy_object->ref_count == 1)) {
1669 vm_object_unlock(copy_object);
1670 vm_object_deallocate(copy_object);
1671 vm_object_lock(object);
1672 continue;
1673 }
1674
1675 /*
1676 * Pick back up the old object's
1677 * lock. [It is safe to do so,
1678 * since it must be deeper in the
1679 * object tree.]
1680 */
1681
1682 vm_object_lock(object);
1683 }
1684
1685 /*
1686 * Because we're pushing a page upward
1687 * in the object tree, we must restart
1688 * any faults that are waiting here.
1689 * [Note that this is an expansion of
1690 * PAGE_WAKEUP that uses the THREAD_RESTART
1691 * wait result]. Can't turn off the page's
1692 * busy bit because we're not done with it.
1693 */
1694
1695 if (m->wanted) {
1696 m->wanted = FALSE;
1697 thread_wakeup_with_result((event_t) m,
1698 THREAD_RESTART);
1699 }
1700 }
1701
1702 /*
1703 * The reference count on copy_object must be
1704 * at least 2: one for our extra reference,
1705 * and at least one from the outside world
1706 * (we checked that when we last locked
1707 * copy_object).
1708 */
1709 copy_object->ref_count--;
1710 assert(copy_object->ref_count > 0);
1711 VM_OBJ_RES_DECR(copy_object);
1712 vm_object_unlock(copy_object);
1713
1714 break;
1715 }
1716
1717 *result_page = m;
1718 *top_page = first_m;
1719
1720 XPR(XPR_VM_FAULT,
1721 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1722 (integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
1723 /*
1724 * If the page can be written, assume that it will be.
1725 * [Earlier, we restrict the permission to allow write
1726 * access only if the fault so required, so we don't
1727 * mark read-only data as dirty.]
1728 */
1729
1730#if !VM_FAULT_STATIC_CONFIG
0b4e3aa0
A
1731 if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE) &&
1732 (m != VM_PAGE_NULL)) {
1c79356b 1733 m->dirty = TRUE;
0b4e3aa0 1734 }
1c79356b
A
1735#endif
1736#if TRACEFAULTPAGE
1737 dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_page_deactivate_behind); /* (TEST/DEBUG) */
1738#endif
1739 if (vm_page_deactivate_behind) {
1740 if (offset && /* don't underflow */
1741 (object->last_alloc == (offset - PAGE_SIZE_64))) {
1742 m = vm_page_lookup(object, object->last_alloc);
1743 if ((m != VM_PAGE_NULL) && !m->busy) {
1744 vm_page_lock_queues();
1745 vm_page_deactivate(m);
1746 vm_page_unlock_queues();
1747 }
1748#if TRACEFAULTPAGE
1749 dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1750#endif
1751 }
1752 object->last_alloc = offset;
1753 }
1754#if TRACEFAULTPAGE
1755 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0); /* (TEST/DEBUG) */
1756#endif
9bccf70c 1757 thread_interrupt_level(interruptible_state);
0b4e3aa0
A
1758 if(*result_page == VM_PAGE_NULL) {
1759 vm_object_unlock(object);
1760 }
1c79356b
A
1761 return(VM_FAULT_SUCCESS);
1762
1763#if 0
1764 block_and_backoff:
1765 vm_fault_cleanup(object, first_m);
1766
1767 counter(c_vm_fault_page_block_backoff_kernel++);
9bccf70c 1768 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1769#endif
1770
1771 backoff:
9bccf70c 1772 thread_interrupt_level(interruptible_state);
1c79356b
A
1773 if (wait_result == THREAD_INTERRUPTED)
1774 return VM_FAULT_INTERRUPTED;
1775 return VM_FAULT_RETRY;
1776
1777#undef RELEASE_PAGE
1778}
1779
1780/*
1781 * Routine: vm_fault
1782 * Purpose:
1783 * Handle page faults, including pseudo-faults
1784 * used to change the wiring status of pages.
1785 * Returns:
1786 * Explicit continuations have been removed.
1787 * Implementation:
1788 * vm_fault and vm_fault_page save mucho state
1789 * in the moral equivalent of a closure. The state
1790 * structure is allocated when first entering vm_fault
1791 * and deallocated when leaving vm_fault.
1792 */
1793
1794kern_return_t
1795vm_fault(
1796 vm_map_t map,
1797 vm_offset_t vaddr,
1798 vm_prot_t fault_type,
1799 boolean_t change_wiring,
9bccf70c
A
1800 int interruptible,
1801 pmap_t caller_pmap,
1802 vm_offset_t caller_pmap_addr)
1c79356b
A
1803{
1804 vm_map_version_t version; /* Map version for verificiation */
1805 boolean_t wired; /* Should mapping be wired down? */
1806 vm_object_t object; /* Top-level object */
1807 vm_object_offset_t offset; /* Top-level offset */
1808 vm_prot_t prot; /* Protection for mapping */
1809 vm_behavior_t behavior; /* Expected paging behavior */
1810 vm_object_offset_t lo_offset, hi_offset;
1811 vm_object_t old_copy_object; /* Saved copy object */
1812 vm_page_t result_page; /* Result of vm_fault_page */
1813 vm_page_t top_page; /* Placeholder page */
1814 kern_return_t kr;
1815
1816 register
1817 vm_page_t m; /* Fast access to result_page */
1818 kern_return_t error_code; /* page error reasons */
1819 register
1820 vm_object_t cur_object;
1821 register
1822 vm_object_offset_t cur_offset;
1823 vm_page_t cur_m;
1824 vm_object_t new_object;
1825 int type_of_fault;
1826 vm_map_t pmap_map = map;
1827 vm_map_t original_map = map;
1828 pmap_t pmap = NULL;
1829 boolean_t funnel_set = FALSE;
1830 funnel_t *curflock;
1831 thread_t cur_thread;
1832 boolean_t interruptible_state;
9bccf70c
A
1833 unsigned int cache_attr;
1834 int write_startup_file = 0;
1835 vm_prot_t full_fault_type;
1c79356b
A
1836
1837
de355530 1838
1c79356b
A
1839 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_START,
1840 vaddr,
1841 0,
1842 0,
1843 0,
1844 0);
1845
de355530 1846 cur_thread = current_thread();
9bccf70c
A
1847 /* at present we do not fully check for execute permission */
1848 /* we generally treat it is read except in certain device */
1849 /* memory settings */
1850 full_fault_type = fault_type;
1851 if(fault_type & VM_PROT_EXECUTE) {
1852 fault_type &= ~VM_PROT_EXECUTE;
1853 fault_type |= VM_PROT_READ;
1854 }
1c79356b 1855
9bccf70c 1856 interruptible_state = thread_interrupt_level(interruptible);
1c79356b
A
1857
1858 /*
1859 * assume we will hit a page in the cache
1860 * otherwise, explicitly override with
1861 * the real fault type once we determine it
1862 */
1863 type_of_fault = DBG_CACHE_HIT_FAULT;
1864
1865 VM_STAT(faults++);
1866 current_task()->faults++;
1867
1868 /*
1869 * drop funnel if it is already held. Then restore while returning
1870 */
1871 if ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED) {
1872 funnel_set = TRUE;
1873 curflock = cur_thread->funnel_lock;
1874 thread_funnel_set( curflock , FALSE);
1875 }
1876
1877 RetryFault: ;
1878
1879 /*
1880 * Find the backing store object and offset into
1881 * it to begin the search.
1882 */
1883 map = original_map;
1884 vm_map_lock_read(map);
1885 kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version,
1886 &object, &offset,
1887 &prot, &wired,
1888 &behavior, &lo_offset, &hi_offset, &pmap_map);
1889
1890 pmap = pmap_map->pmap;
1891
1892 if (kr != KERN_SUCCESS) {
1893 vm_map_unlock_read(map);
1894 goto done;
1895 }
1896
1897 /*
1898 * If the page is wired, we must fault for the current protection
1899 * value, to avoid further faults.
1900 */
1901
1902 if (wired)
1903 fault_type = prot | VM_PROT_WRITE;
1904
1905#if VM_FAULT_CLASSIFY
1906 /*
1907 * Temporary data gathering code
1908 */
1909 vm_fault_classify(object, offset, fault_type);
1910#endif
1911 /*
1912 * Fast fault code. The basic idea is to do as much as
1913 * possible while holding the map lock and object locks.
1914 * Busy pages are not used until the object lock has to
1915 * be dropped to do something (copy, zero fill, pmap enter).
1916 * Similarly, paging references aren't acquired until that
1917 * point, and object references aren't used.
1918 *
1919 * If we can figure out what to do
1920 * (zero fill, copy on write, pmap enter) while holding
1921 * the locks, then it gets done. Otherwise, we give up,
1922 * and use the original fault path (which doesn't hold
1923 * the map lock, and relies on busy pages).
1924 * The give up cases include:
1925 * - Have to talk to pager.
1926 * - Page is busy, absent or in error.
1927 * - Pager has locked out desired access.
1928 * - Fault needs to be restarted.
1929 * - Have to push page into copy object.
1930 *
1931 * The code is an infinite loop that moves one level down
1932 * the shadow chain each time. cur_object and cur_offset
1933 * refer to the current object being examined. object and offset
1934 * are the original object from the map. The loop is at the
1935 * top level if and only if object and cur_object are the same.
1936 *
1937 * Invariants: Map lock is held throughout. Lock is held on
1938 * original object and cur_object (if different) when
1939 * continuing or exiting loop.
1940 *
1941 */
1942
1943
1944 /*
1945 * If this page is to be inserted in a copy delay object
1946 * for writing, and if the object has a copy, then the
1947 * copy delay strategy is implemented in the slow fault page.
1948 */
1949 if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY ||
1950 object->copy == VM_OBJECT_NULL ||
1951 (fault_type & VM_PROT_WRITE) == 0) {
1952 cur_object = object;
1953 cur_offset = offset;
1954
1955 while (TRUE) {
1956 m = vm_page_lookup(cur_object, cur_offset);
1957 if (m != VM_PAGE_NULL) {
143cc14e
A
1958 if (m->busy) {
1959 wait_result_t result;
1960
1961 if (object != cur_object)
1962 vm_object_unlock(object);
1963
1964 vm_map_unlock_read(map);
1965 if (pmap_map != map)
1966 vm_map_unlock(pmap_map);
1967
1968#if !VM_FAULT_STATIC_CONFIG
1969 if (!vm_fault_interruptible)
1970 interruptible = THREAD_UNINT;
1971#endif
1972 result = PAGE_ASSERT_WAIT(m, interruptible);
1c79356b 1973
143cc14e
A
1974 vm_object_unlock(cur_object);
1975
1976 if (result == THREAD_WAITING) {
1977 result = thread_block(THREAD_CONTINUE_NULL);
1978
1979 counter(c_vm_fault_page_block_busy_kernel++);
1980 }
1981 if (result == THREAD_AWAKENED || result == THREAD_RESTART)
1982 goto RetryFault;
1983
1984 kr = KERN_ABORTED;
1985 goto done;
1986 }
0b4e3aa0
A
1987 if (m->unusual && (m->error || m->restart || m->private
1988 || m->absent || (fault_type & m->page_lock))) {
1c79356b 1989
143cc14e 1990 /*
1c79356b
A
1991 * Unusual case. Give up.
1992 */
1993 break;
1994 }
1995
1996 /*
1997 * Two cases of map in faults:
1998 * - At top level w/o copy object.
1999 * - Read fault anywhere.
2000 * --> must disallow write.
2001 */
2002
2003 if (object == cur_object &&
2004 object->copy == VM_OBJECT_NULL)
2005 goto FastMapInFault;
2006
2007 if ((fault_type & VM_PROT_WRITE) == 0) {
2008
2009 prot &= ~VM_PROT_WRITE;
2010
2011 /*
2012 * Set up to map the page ...
2013 * mark the page busy, drop
2014 * locks and take a paging reference
2015 * on the object with the page.
2016 */
2017
2018 if (object != cur_object) {
2019 vm_object_unlock(object);
2020 object = cur_object;
2021 }
2022FastMapInFault:
2023 m->busy = TRUE;
2024
2025 vm_object_paging_begin(object);
1c79356b
A
2026
2027FastPmapEnter:
2028 /*
2029 * Check a couple of global reasons to
2030 * be conservative about write access.
2031 * Then do the pmap_enter.
2032 */
2033#if !VM_FAULT_STATIC_CONFIG
2034 if (vm_fault_dirty_handling
2035#if MACH_KDB
2036 || db_watchpoint_list
2037#endif
2038 && (fault_type & VM_PROT_WRITE) == 0)
2039 prot &= ~VM_PROT_WRITE;
2040#else /* STATIC_CONFIG */
2041#if MACH_KDB
2042 if (db_watchpoint_list
2043 && (fault_type & VM_PROT_WRITE) == 0)
2044 prot &= ~VM_PROT_WRITE;
2045#endif /* MACH_KDB */
2046#endif /* STATIC_CONFIG */
de355530
A
2047 if (m->no_isync == TRUE) {
2048 pmap_sync_caches_phys(m->phys_addr);
143cc14e
A
2049 m->no_isync = FALSE;
2050 }
0b4e3aa0 2051
de355530 2052 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
9bccf70c
A
2053 if(caller_pmap) {
2054 PMAP_ENTER(caller_pmap,
2055 caller_pmap_addr, m,
2056 prot, cache_attr, wired);
2057 } else {
2058 PMAP_ENTER(pmap, vaddr, m,
2059 prot, cache_attr, wired);
2060 }
0b4e3aa0 2061
1c79356b 2062 /*
143cc14e 2063 * Grab the queues lock to manipulate
1c79356b
A
2064 * the page queues. Change wiring
2065 * case is obvious. In soft ref bits
2066 * case activate page only if it fell
2067 * off paging queues, otherwise just
2068 * activate it if it's inactive.
2069 *
2070 * NOTE: original vm_fault code will
2071 * move active page to back of active
2072 * queue. This code doesn't.
2073 */
1c79356b 2074 vm_page_lock_queues();
765c9de3
A
2075
2076 if (m->clustered) {
2077 vm_pagein_cluster_used++;
2078 m->clustered = FALSE;
2079 }
1c79356b
A
2080 m->reference = TRUE;
2081
2082 if (change_wiring) {
2083 if (wired)
2084 vm_page_wire(m);
2085 else
2086 vm_page_unwire(m);
2087 }
2088#if VM_FAULT_STATIC_CONFIG
2089 else {
2090 if (!m->active && !m->inactive)
2091 vm_page_activate(m);
2092 }
2093#else
2094 else if (software_reference_bits) {
2095 if (!m->active && !m->inactive)
2096 vm_page_activate(m);
2097 }
2098 else if (!m->active) {
2099 vm_page_activate(m);
2100 }
2101#endif
2102 vm_page_unlock_queues();
2103
2104 /*
2105 * That's it, clean up and return.
2106 */
2107 PAGE_WAKEUP_DONE(m);
2108 vm_object_paging_end(object);
143cc14e
A
2109
2110 {
2111 tws_hash_line_t line;
2112 task_t task;
2113
2114 task = current_task();
2115 if((map != NULL) &&
2116 (task->dynamic_working_set != 0) &&
2117 !(object->private)) {
2118 kern_return_t kr;
2119 vm_object_t base_object;
2120 vm_object_offset_t base_offset;
2121 base_object = object;
2122 base_offset = cur_offset;
2123 while(base_object->shadow) {
2124 base_offset +=
2125 base_object->shadow_offset;
2126 base_object =
2127 base_object->shadow;
2128 }
2129 kr = tws_lookup((tws_hash_t)
2130 task->dynamic_working_set,
2131 base_offset, base_object,
2132 &line);
2133 if(kr == KERN_OPERATION_TIMED_OUT){
2134 write_startup_file = 1;
2135 } else if (kr != KERN_SUCCESS) {
2136 kr = tws_insert((tws_hash_t)
2137 task->dynamic_working_set,
2138 base_offset, base_object,
2139 vaddr, pmap_map);
2140 if(kr == KERN_NO_SPACE) {
2141 vm_object_unlock(object);
2142
2143 tws_expand_working_set(
2144 task->dynamic_working_set,
2145 TWS_HASH_LINE_COUNT,
2146 FALSE);
2147
2148 vm_object_lock(object);
2149 }
2150 if(kr ==
2151 KERN_OPERATION_TIMED_OUT) {
2152 write_startup_file = 1;
2153 }
2154 }
2155 }
2156 }
1c79356b 2157 vm_object_unlock(object);
143cc14e 2158
1c79356b
A
2159 vm_map_unlock_read(map);
2160 if(pmap_map != map)
2161 vm_map_unlock(pmap_map);
2162
9bccf70c
A
2163 if(write_startup_file)
2164 tws_send_startup_info(current_task());
2165
143cc14e 2166 if (funnel_set)
1c79356b 2167 thread_funnel_set( curflock, TRUE);
143cc14e 2168
9bccf70c 2169 thread_interrupt_level(interruptible_state);
1c79356b 2170
143cc14e 2171
1c79356b
A
2172 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2173 vaddr,
9bccf70c 2174 type_of_fault & 0xff,
1c79356b 2175 KERN_SUCCESS,
9bccf70c 2176 type_of_fault >> 8,
1c79356b 2177 0);
143cc14e 2178
1c79356b
A
2179 return KERN_SUCCESS;
2180 }
2181
2182 /*
2183 * Copy on write fault. If objects match, then
2184 * object->copy must not be NULL (else control
2185 * would be in previous code block), and we
2186 * have a potential push into the copy object
2187 * with which we won't cope here.
2188 */
2189
2190 if (cur_object == object)
2191 break;
1c79356b
A
2192 /*
2193 * This is now a shadow based copy on write
2194 * fault -- it requires a copy up the shadow
2195 * chain.
2196 *
2197 * Allocate a page in the original top level
2198 * object. Give up if allocate fails. Also
2199 * need to remember current page, as it's the
2200 * source of the copy.
2201 */
2202 cur_m = m;
2203 m = vm_page_grab();
2204 if (m == VM_PAGE_NULL) {
2205 break;
2206 }
1c79356b
A
2207 /*
2208 * Now do the copy. Mark the source busy
2209 * and take out paging references on both
2210 * objects.
2211 *
2212 * NOTE: This code holds the map lock across
2213 * the page copy.
2214 */
2215
2216 cur_m->busy = TRUE;
2217 vm_page_copy(cur_m, m);
2218 vm_page_insert(m, object, offset);
2219
2220 vm_object_paging_begin(cur_object);
2221 vm_object_paging_begin(object);
2222
2223 type_of_fault = DBG_COW_FAULT;
2224 VM_STAT(cow_faults++);
2225 current_task()->cow_faults++;
2226
2227 /*
2228 * Now cope with the source page and object
2229 * If the top object has a ref count of 1
2230 * then no other map can access it, and hence
2231 * it's not necessary to do the pmap_page_protect.
2232 */
2233
2234
2235 vm_page_lock_queues();
2236 vm_page_deactivate(cur_m);
2237 m->dirty = TRUE;
de355530 2238 pmap_page_protect(cur_m->phys_addr,
1c79356b
A
2239 VM_PROT_NONE);
2240 vm_page_unlock_queues();
2241
2242 PAGE_WAKEUP_DONE(cur_m);
2243 vm_object_paging_end(cur_object);
2244 vm_object_unlock(cur_object);
2245
2246 /*
2247 * Slight hack to call vm_object collapse
2248 * and then reuse common map in code.
2249 * note that the object lock was taken above.
2250 */
2251
2252 vm_object_paging_end(object);
2253 vm_object_collapse(object);
2254 vm_object_paging_begin(object);
1c79356b
A
2255
2256 goto FastPmapEnter;
2257 }
2258 else {
2259
2260 /*
2261 * No page at cur_object, cur_offset
2262 */
2263
2264 if (cur_object->pager_created) {
2265
2266 /*
2267 * Have to talk to the pager. Give up.
2268 */
1c79356b
A
2269 break;
2270 }
2271
2272
2273 if (cur_object->shadow == VM_OBJECT_NULL) {
2274
2275 if (cur_object->shadow_severed) {
2276 vm_object_paging_end(object);
2277 vm_object_unlock(object);
2278 vm_map_unlock_read(map);
2279 if(pmap_map != map)
2280 vm_map_unlock(pmap_map);
2281
9bccf70c
A
2282 if(write_startup_file)
2283 tws_send_startup_info(
2284 current_task());
2285
1c79356b
A
2286 if (funnel_set) {
2287 thread_funnel_set( curflock, TRUE);
2288 funnel_set = FALSE;
2289 }
9bccf70c 2290 thread_interrupt_level(interruptible_state);
1c79356b
A
2291
2292 return VM_FAULT_MEMORY_ERROR;
2293 }
2294
2295 /*
2296 * Zero fill fault. Page gets
2297 * filled in top object. Insert
2298 * page, then drop any lower lock.
2299 * Give up if no page.
2300 */
2301 if ((vm_page_free_target -
2302 ((vm_page_free_target-vm_page_free_min)>>2))
2303 > vm_page_free_count) {
2304 break;
2305 }
2306 m = vm_page_alloc(object, offset);
2307 if (m == VM_PAGE_NULL) {
2308 break;
2309 }
0b4e3aa0
A
2310 /*
2311 * This is a zero-fill or initial fill
2312 * page fault. As such, we consider it
2313 * undefined with respect to instruction
2314 * execution. i.e. it is the responsibility
2315 * of higher layers to call for an instruction
2316 * sync after changing the contents and before
2317 * sending a program into this area. We
2318 * choose this approach for performance
2319 */
2320
2321 m->no_isync = FALSE;
1c79356b
A
2322
2323 if (cur_object != object)
2324 vm_object_unlock(cur_object);
2325
2326 vm_object_paging_begin(object);
2327 vm_object_unlock(object);
2328
2329 /*
2330 * Now zero fill page and map it.
2331 * the page is probably going to
2332 * be written soon, so don't bother
2333 * to clear the modified bit
2334 *
2335 * NOTE: This code holds the map
2336 * lock across the zero fill.
2337 */
2338
2339 if (!map->no_zero_fill) {
2340 vm_page_zero_fill(m);
2341 type_of_fault = DBG_ZERO_FILL_FAULT;
2342 VM_STAT(zero_fill_count++);
2343 }
2344 vm_page_lock_queues();
2345 VM_PAGE_QUEUES_REMOVE(m);
0b4e3aa0
A
2346
2347 m->page_ticket = vm_page_ticket;
9bccf70c
A
2348 if(m->object->size > 0x80000) {
2349 m->zero_fill = TRUE;
2350 /* depends on the queues lock */
2351 vm_zf_count += 1;
2352 queue_enter(&vm_page_queue_zf,
2353 m, vm_page_t, pageq);
2354 } else {
2355 queue_enter(
2356 &vm_page_queue_inactive,
2357 m, vm_page_t, pageq);
2358 }
0b4e3aa0
A
2359 vm_page_ticket_roll++;
2360 if(vm_page_ticket_roll ==
2361 VM_PAGE_TICKETS_IN_ROLL) {
2362 vm_page_ticket_roll = 0;
2363 if(vm_page_ticket ==
2364 VM_PAGE_TICKET_ROLL_IDS)
2365 vm_page_ticket= 0;
2366 else
2367 vm_page_ticket++;
2368 }
2369
1c79356b
A
2370 m->inactive = TRUE;
2371 vm_page_inactive_count++;
2372 vm_page_unlock_queues();
143cc14e
A
2373 vm_object_lock(object);
2374
1c79356b
A
2375 goto FastPmapEnter;
2376 }
2377
2378 /*
2379 * On to the next level
2380 */
2381
2382 cur_offset += cur_object->shadow_offset;
2383 new_object = cur_object->shadow;
2384 vm_object_lock(new_object);
2385 if (cur_object != object)
2386 vm_object_unlock(cur_object);
2387 cur_object = new_object;
2388
2389 continue;
2390 }
2391 }
2392
2393 /*
2394 * Cleanup from fast fault failure. Drop any object
2395 * lock other than original and drop map lock.
2396 */
2397
2398 if (object != cur_object)
2399 vm_object_unlock(cur_object);
2400 }
2401 vm_map_unlock_read(map);
143cc14e 2402
1c79356b
A
2403 if(pmap_map != map)
2404 vm_map_unlock(pmap_map);
2405
2406 /*
2407 * Make a reference to this object to
2408 * prevent its disposal while we are messing with
2409 * it. Once we have the reference, the map is free
2410 * to be diddled. Since objects reference their
2411 * shadows (and copies), they will stay around as well.
2412 */
2413
2414 assert(object->ref_count > 0);
2415 object->ref_count++;
2416 vm_object_res_reference(object);
2417 vm_object_paging_begin(object);
2418
2419 XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
9bccf70c
A
2420 {
2421 tws_hash_line_t line;
2422 task_t task;
2423 kern_return_t kr;
2424
2425 task = current_task();
2426 if((map != NULL) &&
2427 (task->dynamic_working_set != 0)
2428 && !(object->private)) {
2429 vm_object_t base_object;
2430 vm_object_offset_t base_offset;
2431 base_object = object;
2432 base_offset = offset;
2433 while(base_object->shadow) {
2434 base_offset +=
2435 base_object->shadow_offset;
2436 base_object =
2437 base_object->shadow;
2438 }
2439 kr = tws_lookup((tws_hash_t)
2440 task->dynamic_working_set,
2441 base_offset, base_object,
2442 &line);
2443 if(kr == KERN_OPERATION_TIMED_OUT){
2444 write_startup_file = 1;
2445 } else if (kr != KERN_SUCCESS) {
2446 tws_insert((tws_hash_t)
2447 task->dynamic_working_set,
2448 base_offset, base_object,
2449 vaddr, pmap_map);
2450 kr = tws_insert((tws_hash_t)
2451 task->dynamic_working_set,
2452 base_offset, base_object,
2453 vaddr, pmap_map);
2454 if(kr == KERN_NO_SPACE) {
2455 vm_object_unlock(object);
2456 tws_expand_working_set(
2457 task->dynamic_working_set,
2458 TWS_HASH_LINE_COUNT,
2459 FALSE);
2460 vm_object_lock(object);
2461 }
2462 if(kr == KERN_OPERATION_TIMED_OUT) {
2463 write_startup_file = 1;
2464 }
2465 }
2466 }
2467 }
1c79356b
A
2468 kr = vm_fault_page(object, offset, fault_type,
2469 (change_wiring && !wired),
2470 interruptible,
2471 lo_offset, hi_offset, behavior,
2472 &prot, &result_page, &top_page,
2473 &type_of_fault,
0b4e3aa0 2474 &error_code, map->no_zero_fill, FALSE, map, vaddr);
1c79356b
A
2475
2476 /*
2477 * If we didn't succeed, lose the object reference immediately.
2478 */
2479
2480 if (kr != VM_FAULT_SUCCESS)
2481 vm_object_deallocate(object);
2482
2483 /*
2484 * See why we failed, and take corrective action.
2485 */
2486
2487 switch (kr) {
2488 case VM_FAULT_SUCCESS:
2489 break;
2490 case VM_FAULT_MEMORY_SHORTAGE:
2491 if (vm_page_wait((change_wiring) ?
2492 THREAD_UNINT :
2493 THREAD_ABORTSAFE))
2494 goto RetryFault;
2495 /* fall thru */
2496 case VM_FAULT_INTERRUPTED:
2497 kr = KERN_ABORTED;
2498 goto done;
2499 case VM_FAULT_RETRY:
2500 goto RetryFault;
2501 case VM_FAULT_FICTITIOUS_SHORTAGE:
2502 vm_page_more_fictitious();
2503 goto RetryFault;
2504 case VM_FAULT_MEMORY_ERROR:
2505 if (error_code)
2506 kr = error_code;
2507 else
2508 kr = KERN_MEMORY_ERROR;
2509 goto done;
2510 }
2511
2512 m = result_page;
2513
0b4e3aa0
A
2514 if(m != VM_PAGE_NULL) {
2515 assert((change_wiring && !wired) ?
2516 (top_page == VM_PAGE_NULL) :
2517 ((top_page == VM_PAGE_NULL) == (m->object == object)));
2518 }
1c79356b
A
2519
2520 /*
2521 * How to clean up the result of vm_fault_page. This
2522 * happens whether the mapping is entered or not.
2523 */
2524
2525#define UNLOCK_AND_DEALLOCATE \
2526 MACRO_BEGIN \
2527 vm_fault_cleanup(m->object, top_page); \
2528 vm_object_deallocate(object); \
2529 MACRO_END
2530
2531 /*
2532 * What to do with the resulting page from vm_fault_page
2533 * if it doesn't get entered into the physical map:
2534 */
2535
2536#define RELEASE_PAGE(m) \
2537 MACRO_BEGIN \
2538 PAGE_WAKEUP_DONE(m); \
2539 vm_page_lock_queues(); \
2540 if (!m->active && !m->inactive) \
2541 vm_page_activate(m); \
2542 vm_page_unlock_queues(); \
2543 MACRO_END
2544
2545 /*
2546 * We must verify that the maps have not changed
2547 * since our last lookup.
2548 */
2549
0b4e3aa0
A
2550 if(m != VM_PAGE_NULL) {
2551 old_copy_object = m->object->copy;
0b4e3aa0
A
2552 vm_object_unlock(m->object);
2553 } else {
2554 old_copy_object = VM_OBJECT_NULL;
2555 }
1c79356b
A
2556 if ((map != original_map) || !vm_map_verify(map, &version)) {
2557 vm_object_t retry_object;
2558 vm_object_offset_t retry_offset;
2559 vm_prot_t retry_prot;
2560
2561 /*
2562 * To avoid trying to write_lock the map while another
2563 * thread has it read_locked (in vm_map_pageable), we
2564 * do not try for write permission. If the page is
2565 * still writable, we will get write permission. If it
2566 * is not, or has been marked needs_copy, we enter the
2567 * mapping without write permission, and will merely
2568 * take another fault.
2569 */
2570 map = original_map;
2571 vm_map_lock_read(map);
2572 kr = vm_map_lookup_locked(&map, vaddr,
2573 fault_type & ~VM_PROT_WRITE, &version,
2574 &retry_object, &retry_offset, &retry_prot,
2575 &wired, &behavior, &lo_offset, &hi_offset,
2576 &pmap_map);
2577 pmap = pmap_map->pmap;
2578
2579 if (kr != KERN_SUCCESS) {
2580 vm_map_unlock_read(map);
0b4e3aa0
A
2581 if(m != VM_PAGE_NULL) {
2582 vm_object_lock(m->object);
2583 RELEASE_PAGE(m);
2584 UNLOCK_AND_DEALLOCATE;
2585 } else {
2586 vm_object_deallocate(object);
2587 }
1c79356b
A
2588 goto done;
2589 }
2590
2591 vm_object_unlock(retry_object);
0b4e3aa0
A
2592 if(m != VM_PAGE_NULL) {
2593 vm_object_lock(m->object);
2594 } else {
2595 vm_object_lock(object);
2596 }
1c79356b
A
2597
2598 if ((retry_object != object) ||
2599 (retry_offset != offset)) {
2600 vm_map_unlock_read(map);
2601 if(pmap_map != map)
2602 vm_map_unlock(pmap_map);
0b4e3aa0
A
2603 if(m != VM_PAGE_NULL) {
2604 RELEASE_PAGE(m);
2605 UNLOCK_AND_DEALLOCATE;
2606 } else {
2607 vm_object_deallocate(object);
2608 }
1c79356b
A
2609 goto RetryFault;
2610 }
2611
2612 /*
2613 * Check whether the protection has changed or the object
2614 * has been copied while we left the map unlocked.
2615 */
2616 prot &= retry_prot;
0b4e3aa0
A
2617 if(m != VM_PAGE_NULL) {
2618 vm_object_unlock(m->object);
2619 } else {
2620 vm_object_unlock(object);
2621 }
2622 }
2623 if(m != VM_PAGE_NULL) {
2624 vm_object_lock(m->object);
2625 } else {
2626 vm_object_lock(object);
1c79356b 2627 }
1c79356b
A
2628
2629 /*
2630 * If the copy object changed while the top-level object
2631 * was unlocked, then we must take away write permission.
2632 */
2633
0b4e3aa0
A
2634 if(m != VM_PAGE_NULL) {
2635 if (m->object->copy != old_copy_object)
2636 prot &= ~VM_PROT_WRITE;
2637 }
1c79356b
A
2638
2639 /*
2640 * If we want to wire down this page, but no longer have
2641 * adequate permissions, we must start all over.
2642 */
2643
2644 if (wired && (fault_type != (prot|VM_PROT_WRITE))) {
2645 vm_map_verify_done(map, &version);
2646 if(pmap_map != map)
2647 vm_map_unlock(pmap_map);
0b4e3aa0
A
2648 if(m != VM_PAGE_NULL) {
2649 RELEASE_PAGE(m);
2650 UNLOCK_AND_DEALLOCATE;
2651 } else {
2652 vm_object_deallocate(object);
2653 }
1c79356b
A
2654 goto RetryFault;
2655 }
2656
1c79356b
A
2657 /*
2658 * Put this page into the physical map.
2659 * We had to do the unlock above because pmap_enter
2660 * may cause other faults. The page may be on
2661 * the pageout queues. If the pageout daemon comes
2662 * across the page, it will remove it from the queues.
2663 */
765c9de3
A
2664 if (m != VM_PAGE_NULL) {
2665 if (m->no_isync == TRUE) {
de355530 2666 pmap_sync_caches_phys(m->phys_addr);
0b4e3aa0 2667
765c9de3
A
2668 m->no_isync = FALSE;
2669 }
9bccf70c
A
2670
2671 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
0b4e3aa0 2672
9bccf70c
A
2673 if(caller_pmap) {
2674 PMAP_ENTER(caller_pmap,
2675 caller_pmap_addr, m,
2676 prot, cache_attr, wired);
2677 } else {
2678 PMAP_ENTER(pmap, vaddr, m,
2679 prot, cache_attr, wired);
2680 }
0b4e3aa0
A
2681 {
2682 tws_hash_line_t line;
2683 task_t task;
9bccf70c 2684 kern_return_t kr;
0b4e3aa0
A
2685
2686 task = current_task();
2687 if((map != NULL) &&
9bccf70c
A
2688 (task->dynamic_working_set != 0)
2689 && (object->private)) {
2690 vm_object_t base_object;
2691 vm_object_offset_t base_offset;
2692 base_object = m->object;
2693 base_offset = m->offset;
2694 while(base_object->shadow) {
2695 base_offset +=
2696 base_object->shadow_offset;
2697 base_object =
2698 base_object->shadow;
2699 }
2700 kr = tws_lookup((tws_hash_t)
0b4e3aa0 2701 task->dynamic_working_set,
9bccf70c
A
2702 base_offset, base_object, &line);
2703 if(kr == KERN_OPERATION_TIMED_OUT){
2704 write_startup_file = 1;
2705 } else if (kr != KERN_SUCCESS) {
0b4e3aa0
A
2706 tws_insert((tws_hash_t)
2707 task->dynamic_working_set,
9bccf70c 2708 base_offset, base_object,
0b4e3aa0 2709 vaddr, pmap_map);
9bccf70c 2710 kr = tws_insert((tws_hash_t)
0b4e3aa0 2711 task->dynamic_working_set,
9bccf70c
A
2712 base_offset, base_object,
2713 vaddr, pmap_map);
2714 if(kr == KERN_NO_SPACE) {
143cc14e 2715 vm_object_unlock(m->object);
0b4e3aa0 2716 tws_expand_working_set(
9bccf70c
A
2717 task->dynamic_working_set,
2718 TWS_HASH_LINE_COUNT,
2719 FALSE);
143cc14e 2720 vm_object_lock(m->object);
9bccf70c
A
2721 }
2722 if(kr == KERN_OPERATION_TIMED_OUT) {
2723 write_startup_file = 1;
0b4e3aa0
A
2724 }
2725 }
2726 }
2727 }
2728 } else {
2729
9bccf70c
A
2730#ifndef i386
2731 int memattr;
de355530 2732 struct phys_entry *pp;
9bccf70c
A
2733 vm_map_entry_t entry;
2734 vm_offset_t laddr;
2735 vm_offset_t ldelta, hdelta;
143cc14e 2736
0b4e3aa0
A
2737 /*
2738 * do a pmap block mapping from the physical address
2739 * in the object
2740 */
de355530
A
2741 if(pp = pmap_find_physentry(
2742 (vm_offset_t)object->shadow_offset)) {
2743 memattr = ((pp->pte1 & 0x00000078) >> 3);
2744 } else {
2745 memattr = VM_WIMG_MASK & (int)object->wimg_bits;
2746 }
9bccf70c
A
2747
2748
de355530
A
2749 /* While we do not worry about execution protection in */
2750 /* general, we may be able to read device memory and */
2751 /* still not be able to execute it. Here we check for */
2752 /* the guarded bit. If its set and we are attempting */
2753 /* to execute, we return with a protection failure. */
9bccf70c 2754
de355530
A
2755 if((memattr & VM_MEM_GUARDED) &&
2756 (full_fault_type & VM_PROT_EXECUTE)) {
9bccf70c
A
2757 vm_map_verify_done(map, &version);
2758 if(pmap_map != map)
2759 vm_map_unlock(pmap_map);
2760 vm_fault_cleanup(object, top_page);
2761 vm_object_deallocate(object);
2762 kr = KERN_PROTECTION_FAILURE;
2763 goto done;
0b4e3aa0 2764 }
1c79356b 2765
de355530
A
2766
2767
9bccf70c
A
2768 if(pmap_map != map) {
2769 vm_map_unlock(pmap_map);
2770 }
2771 if (original_map != map) {
2772 vm_map_unlock_read(map);
2773 vm_map_lock_read(original_map);
2774 map = original_map;
2775 }
2776 pmap_map = map;
2777
2778 laddr = vaddr;
2779 hdelta = 0xFFFFF000;
2780 ldelta = 0xFFFFF000;
2781
2782
2783 while(vm_map_lookup_entry(map, laddr, &entry)) {
2784 if(ldelta > (laddr - entry->vme_start))
2785 ldelta = laddr - entry->vme_start;
2786 if(hdelta > (entry->vme_end - laddr))
2787 hdelta = entry->vme_end - laddr;
2788 if(entry->is_sub_map) {
2789
2790 laddr = (laddr - entry->vme_start)
2791 + entry->offset;
2792 vm_map_lock_read(entry->object.sub_map);
2793 if(map != pmap_map)
2794 vm_map_unlock_read(map);
2795 if(entry->use_pmap) {
2796 vm_map_unlock_read(pmap_map);
2797 pmap_map = entry->object.sub_map;
2798 }
2799 map = entry->object.sub_map;
2800
2801 } else {
2802 break;
2803 }
2804 }
2805
2806 if(vm_map_lookup_entry(map, laddr, &entry) &&
2807 (entry->object.vm_object != NULL) &&
2808 (entry->object.vm_object == object)) {
2809
2810
2811 if(caller_pmap) {
2812 pmap_map_block(caller_pmap,
de355530
A
2813 caller_pmap_addr - ldelta,
2814 ((vm_offset_t)
9bccf70c
A
2815 (entry->object.vm_object->shadow_offset))
2816 + entry->offset +
de355530 2817 (laddr - entry->vme_start) - ldelta,
9bccf70c 2818 ldelta + hdelta, prot,
de355530
A
2819 memattr, 0); /* Set up a block mapped area */
2820 } else {
2821 pmap_map_block(pmap_map->pmap, vaddr - ldelta,
2822 ((vm_offset_t)
9bccf70c 2823 (entry->object.vm_object->shadow_offset))
de355530
A
2824 + entry->offset +
2825 (laddr - entry->vme_start) - ldelta,
2826 ldelta + hdelta, prot,
2827 memattr, 0); /* Set up a block mapped area */
9bccf70c
A
2828 }
2829 }
2830#else
2831#ifdef notyet
2832 if(caller_pmap) {
2833 pmap_enter(caller_pmap, caller_pmap_addr,
de355530 2834 object->shadow_offset, prot, 0, TRUE);
9bccf70c
A
2835 } else {
2836 pmap_enter(pmap, vaddr,
de355530 2837 object->shadow_offset, prot, 0, TRUE);
9bccf70c 2838 }
0b4e3aa0 2839 /* Map it in */
9bccf70c 2840#endif
0b4e3aa0
A
2841#endif
2842
2843 }
1c79356b
A
2844
2845 /*
2846 * If the page is not wired down and isn't already
2847 * on a pageout queue, then put it where the
2848 * pageout daemon can find it.
2849 */
0b4e3aa0 2850 if(m != VM_PAGE_NULL) {
0b4e3aa0
A
2851 vm_page_lock_queues();
2852
2853 if (change_wiring) {
2854 if (wired)
2855 vm_page_wire(m);
2856 else
2857 vm_page_unwire(m);
2858 }
1c79356b 2859#if VM_FAULT_STATIC_CONFIG
0b4e3aa0
A
2860 else {
2861 if (!m->active && !m->inactive)
2862 vm_page_activate(m);
2863 m->reference = TRUE;
2864 }
1c79356b 2865#else
0b4e3aa0
A
2866 else if (software_reference_bits) {
2867 if (!m->active && !m->inactive)
2868 vm_page_activate(m);
2869 m->reference = TRUE;
2870 } else {
1c79356b 2871 vm_page_activate(m);
0b4e3aa0 2872 }
1c79356b 2873#endif
0b4e3aa0
A
2874 vm_page_unlock_queues();
2875 }
1c79356b
A
2876
2877 /*
2878 * Unlock everything, and return
2879 */
2880
2881 vm_map_verify_done(map, &version);
2882 if(pmap_map != map)
2883 vm_map_unlock(pmap_map);
0b4e3aa0
A
2884 if(m != VM_PAGE_NULL) {
2885 PAGE_WAKEUP_DONE(m);
2886 UNLOCK_AND_DEALLOCATE;
2887 } else {
2888 vm_fault_cleanup(object, top_page);
2889 vm_object_deallocate(object);
2890 }
1c79356b 2891 kr = KERN_SUCCESS;
1c79356b
A
2892
2893#undef UNLOCK_AND_DEALLOCATE
2894#undef RELEASE_PAGE
2895
2896 done:
9bccf70c
A
2897 if(write_startup_file)
2898 tws_send_startup_info(current_task());
1c79356b
A
2899 if (funnel_set) {
2900 thread_funnel_set( curflock, TRUE);
2901 funnel_set = FALSE;
2902 }
9bccf70c 2903 thread_interrupt_level(interruptible_state);
1c79356b
A
2904
2905 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2906 vaddr,
9bccf70c 2907 type_of_fault & 0xff,
1c79356b 2908 kr,
9bccf70c 2909 type_of_fault >> 8,
1c79356b 2910 0);
143cc14e 2911
1c79356b
A
2912 return(kr);
2913}
2914
2915/*
2916 * vm_fault_wire:
2917 *
2918 * Wire down a range of virtual addresses in a map.
2919 */
2920kern_return_t
2921vm_fault_wire(
2922 vm_map_t map,
2923 vm_map_entry_t entry,
9bccf70c
A
2924 pmap_t pmap,
2925 vm_offset_t pmap_addr)
1c79356b
A
2926{
2927
2928 register vm_offset_t va;
2929 register vm_offset_t end_addr = entry->vme_end;
2930 register kern_return_t rc;
2931
2932 assert(entry->in_transition);
2933
9bccf70c
A
2934 if ((entry->object.vm_object != NULL) &&
2935 !entry->is_sub_map &&
2936 entry->object.vm_object->phys_contiguous) {
2937 return KERN_SUCCESS;
2938 }
2939
1c79356b
A
2940 /*
2941 * Inform the physical mapping system that the
2942 * range of addresses may not fault, so that
2943 * page tables and such can be locked down as well.
2944 */
2945
9bccf70c
A
2946 pmap_pageable(pmap, pmap_addr,
2947 pmap_addr + (end_addr - entry->vme_start), FALSE);
1c79356b
A
2948
2949 /*
2950 * We simulate a fault to get the page and enter it
2951 * in the physical map.
2952 */
2953
2954 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
2955 if ((rc = vm_fault_wire_fast(
9bccf70c
A
2956 map, va, entry, pmap,
2957 pmap_addr + (va - entry->vme_start)
2958 )) != KERN_SUCCESS) {
1c79356b 2959 rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
9bccf70c
A
2960 (pmap == kernel_pmap) ?
2961 THREAD_UNINT : THREAD_ABORTSAFE,
2962 pmap, pmap_addr + (va - entry->vme_start));
1c79356b
A
2963 }
2964
2965 if (rc != KERN_SUCCESS) {
2966 struct vm_map_entry tmp_entry = *entry;
2967
2968 /* unwire wired pages */
2969 tmp_entry.vme_end = va;
9bccf70c
A
2970 vm_fault_unwire(map,
2971 &tmp_entry, FALSE, pmap, pmap_addr);
1c79356b
A
2972
2973 return rc;
2974 }
2975 }
2976 return KERN_SUCCESS;
2977}
2978
2979/*
2980 * vm_fault_unwire:
2981 *
2982 * Unwire a range of virtual addresses in a map.
2983 */
2984void
2985vm_fault_unwire(
2986 vm_map_t map,
2987 vm_map_entry_t entry,
2988 boolean_t deallocate,
9bccf70c
A
2989 pmap_t pmap,
2990 vm_offset_t pmap_addr)
1c79356b
A
2991{
2992 register vm_offset_t va;
2993 register vm_offset_t end_addr = entry->vme_end;
2994 vm_object_t object;
2995
2996 object = (entry->is_sub_map)
2997 ? VM_OBJECT_NULL : entry->object.vm_object;
2998
2999 /*
3000 * Since the pages are wired down, we must be able to
3001 * get their mappings from the physical map system.
3002 */
3003
3004 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
9bccf70c
A
3005 pmap_change_wiring(pmap,
3006 pmap_addr + (va - entry->vme_start), FALSE);
1c79356b
A
3007
3008 if (object == VM_OBJECT_NULL) {
9bccf70c
A
3009 (void) vm_fault(map, va, VM_PROT_NONE,
3010 TRUE, THREAD_UNINT, pmap, pmap_addr);
3011 } else if (object->phys_contiguous) {
3012 continue;
1c79356b
A
3013 } else {
3014 vm_prot_t prot;
3015 vm_page_t result_page;
3016 vm_page_t top_page;
3017 vm_object_t result_object;
3018 vm_fault_return_t result;
3019
3020 do {
3021 prot = VM_PROT_NONE;
3022
3023 vm_object_lock(object);
3024 vm_object_paging_begin(object);
3025 XPR(XPR_VM_FAULT,
3026 "vm_fault_unwire -> vm_fault_page\n",
3027 0,0,0,0,0);
3028 result = vm_fault_page(object,
3029 entry->offset +
3030 (va - entry->vme_start),
3031 VM_PROT_NONE, TRUE,
3032 THREAD_UNINT,
3033 entry->offset,
3034 entry->offset +
3035 (entry->vme_end
3036 - entry->vme_start),
3037 entry->behavior,
3038 &prot,
3039 &result_page,
3040 &top_page,
3041 (int *)0,
3042 0, map->no_zero_fill,
0b4e3aa0 3043 FALSE, NULL, 0);
1c79356b
A
3044 } while (result == VM_FAULT_RETRY);
3045
3046 if (result != VM_FAULT_SUCCESS)
3047 panic("vm_fault_unwire: failure");
3048
3049 result_object = result_page->object;
3050 if (deallocate) {
3051 assert(!result_page->fictitious);
de355530 3052 pmap_page_protect(result_page->phys_addr,
1c79356b
A
3053 VM_PROT_NONE);
3054 VM_PAGE_FREE(result_page);
3055 } else {
3056 vm_page_lock_queues();
3057 vm_page_unwire(result_page);
3058 vm_page_unlock_queues();
3059 PAGE_WAKEUP_DONE(result_page);
3060 }
3061
3062 vm_fault_cleanup(result_object, top_page);
3063 }
3064 }
3065
3066 /*
3067 * Inform the physical mapping system that the range
3068 * of addresses may fault, so that page tables and
3069 * such may be unwired themselves.
3070 */
3071
9bccf70c
A
3072 pmap_pageable(pmap, pmap_addr,
3073 pmap_addr + (end_addr - entry->vme_start), TRUE);
1c79356b
A
3074
3075}
3076
3077/*
3078 * vm_fault_wire_fast:
3079 *
3080 * Handle common case of a wire down page fault at the given address.
3081 * If successful, the page is inserted into the associated physical map.
3082 * The map entry is passed in to avoid the overhead of a map lookup.
3083 *
3084 * NOTE: the given address should be truncated to the
3085 * proper page address.
3086 *
3087 * KERN_SUCCESS is returned if the page fault is handled; otherwise,
3088 * a standard error specifying why the fault is fatal is returned.
3089 *
3090 * The map in question must be referenced, and remains so.
3091 * Caller has a read lock on the map.
3092 *
3093 * This is a stripped version of vm_fault() for wiring pages. Anything
3094 * other than the common case will return KERN_FAILURE, and the caller
3095 * is expected to call vm_fault().
3096 */
3097kern_return_t
3098vm_fault_wire_fast(
3099 vm_map_t map,
3100 vm_offset_t va,
3101 vm_map_entry_t entry,
9bccf70c
A
3102 pmap_t pmap,
3103 vm_offset_t pmap_addr)
1c79356b
A
3104{
3105 vm_object_t object;
3106 vm_object_offset_t offset;
3107 register vm_page_t m;
3108 vm_prot_t prot;
3109 thread_act_t thr_act;
9bccf70c 3110 unsigned int cache_attr;
1c79356b
A
3111
3112 VM_STAT(faults++);
3113
3114 if((thr_act=current_act()) && (thr_act->task != TASK_NULL))
3115 thr_act->task->faults++;
3116
3117/*
3118 * Recovery actions
3119 */
3120
3121#undef RELEASE_PAGE
3122#define RELEASE_PAGE(m) { \
3123 PAGE_WAKEUP_DONE(m); \
3124 vm_page_lock_queues(); \
3125 vm_page_unwire(m); \
3126 vm_page_unlock_queues(); \
3127}
3128
3129
3130#undef UNLOCK_THINGS
3131#define UNLOCK_THINGS { \
3132 object->paging_in_progress--; \
3133 vm_object_unlock(object); \
3134}
3135
3136#undef UNLOCK_AND_DEALLOCATE
3137#define UNLOCK_AND_DEALLOCATE { \
3138 UNLOCK_THINGS; \
3139 vm_object_deallocate(object); \
3140}
3141/*
3142 * Give up and have caller do things the hard way.
3143 */
3144
3145#define GIVE_UP { \
3146 UNLOCK_AND_DEALLOCATE; \
3147 return(KERN_FAILURE); \
3148}
3149
3150
3151 /*
3152 * If this entry is not directly to a vm_object, bail out.
3153 */
3154 if (entry->is_sub_map)
3155 return(KERN_FAILURE);
3156
3157 /*
3158 * Find the backing store object and offset into it.
3159 */
3160
3161 object = entry->object.vm_object;
3162 offset = (va - entry->vme_start) + entry->offset;
3163 prot = entry->protection;
3164
3165 /*
3166 * Make a reference to this object to prevent its
3167 * disposal while we are messing with it.
3168 */
3169
3170 vm_object_lock(object);
3171 assert(object->ref_count > 0);
3172 object->ref_count++;
3173 vm_object_res_reference(object);
3174 object->paging_in_progress++;
3175
3176 /*
3177 * INVARIANTS (through entire routine):
3178 *
3179 * 1) At all times, we must either have the object
3180 * lock or a busy page in some object to prevent
3181 * some other thread from trying to bring in
3182 * the same page.
3183 *
3184 * 2) Once we have a busy page, we must remove it from
3185 * the pageout queues, so that the pageout daemon
3186 * will not grab it away.
3187 *
3188 */
3189
3190 /*
3191 * Look for page in top-level object. If it's not there or
3192 * there's something going on, give up.
3193 */
3194 m = vm_page_lookup(object, offset);
3195 if ((m == VM_PAGE_NULL) || (m->busy) ||
3196 (m->unusual && ( m->error || m->restart || m->absent ||
3197 prot & m->page_lock))) {
3198
3199 GIVE_UP;
3200 }
3201
3202 /*
3203 * Wire the page down now. All bail outs beyond this
3204 * point must unwire the page.
3205 */
3206
3207 vm_page_lock_queues();
3208 vm_page_wire(m);
3209 vm_page_unlock_queues();
3210
3211 /*
3212 * Mark page busy for other threads.
3213 */
3214 assert(!m->busy);
3215 m->busy = TRUE;
3216 assert(!m->absent);
3217
3218 /*
3219 * Give up if the page is being written and there's a copy object
3220 */
3221 if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
3222 RELEASE_PAGE(m);
3223 GIVE_UP;
3224 }
3225
3226 /*
3227 * Put this page into the physical map.
3228 * We have to unlock the object because pmap_enter
3229 * may cause other faults.
3230 */
765c9de3 3231 if (m->no_isync == TRUE) {
de355530 3232 pmap_sync_caches_phys(m->phys_addr);
0b4e3aa0 3233
765c9de3 3234 m->no_isync = FALSE;
0b4e3aa0 3235 }
9bccf70c
A
3236
3237 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
765c9de3 3238
9bccf70c 3239 PMAP_ENTER(pmap, pmap_addr, m, prot, cache_attr, TRUE);
1c79356b 3240
1c79356b
A
3241 /*
3242 * Unlock everything, and return
3243 */
3244
3245 PAGE_WAKEUP_DONE(m);
3246 UNLOCK_AND_DEALLOCATE;
3247
3248 return(KERN_SUCCESS);
3249
3250}
3251
3252/*
3253 * Routine: vm_fault_copy_cleanup
3254 * Purpose:
3255 * Release a page used by vm_fault_copy.
3256 */
3257
3258void
3259vm_fault_copy_cleanup(
3260 vm_page_t page,
3261 vm_page_t top_page)
3262{
3263 vm_object_t object = page->object;
3264
3265 vm_object_lock(object);
3266 PAGE_WAKEUP_DONE(page);
3267 vm_page_lock_queues();
3268 if (!page->active && !page->inactive)
3269 vm_page_activate(page);
3270 vm_page_unlock_queues();
3271 vm_fault_cleanup(object, top_page);
3272}
3273
3274void
3275vm_fault_copy_dst_cleanup(
3276 vm_page_t page)
3277{
3278 vm_object_t object;
3279
3280 if (page != VM_PAGE_NULL) {
3281 object = page->object;
3282 vm_object_lock(object);
3283 vm_page_lock_queues();
3284 vm_page_unwire(page);
3285 vm_page_unlock_queues();
3286 vm_object_paging_end(object);
3287 vm_object_unlock(object);
3288 }
3289}
3290
3291/*
3292 * Routine: vm_fault_copy
3293 *
3294 * Purpose:
3295 * Copy pages from one virtual memory object to another --
3296 * neither the source nor destination pages need be resident.
3297 *
3298 * Before actually copying a page, the version associated with
3299 * the destination address map wil be verified.
3300 *
3301 * In/out conditions:
3302 * The caller must hold a reference, but not a lock, to
3303 * each of the source and destination objects and to the
3304 * destination map.
3305 *
3306 * Results:
3307 * Returns KERN_SUCCESS if no errors were encountered in
3308 * reading or writing the data. Returns KERN_INTERRUPTED if
3309 * the operation was interrupted (only possible if the
3310 * "interruptible" argument is asserted). Other return values
3311 * indicate a permanent error in copying the data.
3312 *
3313 * The actual amount of data copied will be returned in the
3314 * "copy_size" argument. In the event that the destination map
3315 * verification failed, this amount may be less than the amount
3316 * requested.
3317 */
3318kern_return_t
3319vm_fault_copy(
3320 vm_object_t src_object,
3321 vm_object_offset_t src_offset,
3322 vm_size_t *src_size, /* INOUT */
3323 vm_object_t dst_object,
3324 vm_object_offset_t dst_offset,
3325 vm_map_t dst_map,
3326 vm_map_version_t *dst_version,
3327 int interruptible)
3328{
3329 vm_page_t result_page;
3330
3331 vm_page_t src_page;
3332 vm_page_t src_top_page;
3333 vm_prot_t src_prot;
3334
3335 vm_page_t dst_page;
3336 vm_page_t dst_top_page;
3337 vm_prot_t dst_prot;
3338
3339 vm_size_t amount_left;
3340 vm_object_t old_copy_object;
3341 kern_return_t error = 0;
3342
3343 vm_size_t part_size;
3344
3345 /*
3346 * In order not to confuse the clustered pageins, align
3347 * the different offsets on a page boundary.
3348 */
3349 vm_object_offset_t src_lo_offset = trunc_page_64(src_offset);
3350 vm_object_offset_t dst_lo_offset = trunc_page_64(dst_offset);
3351 vm_object_offset_t src_hi_offset = round_page_64(src_offset + *src_size);
3352 vm_object_offset_t dst_hi_offset = round_page_64(dst_offset + *src_size);
3353
3354#define RETURN(x) \
3355 MACRO_BEGIN \
3356 *src_size -= amount_left; \
3357 MACRO_RETURN(x); \
3358 MACRO_END
3359
3360 amount_left = *src_size;
3361 do { /* while (amount_left > 0) */
3362 /*
3363 * There may be a deadlock if both source and destination
3364 * pages are the same. To avoid this deadlock, the copy must
3365 * start by getting the destination page in order to apply
3366 * COW semantics if any.
3367 */
3368
3369 RetryDestinationFault: ;
3370
3371 dst_prot = VM_PROT_WRITE|VM_PROT_READ;
3372
3373 vm_object_lock(dst_object);
3374 vm_object_paging_begin(dst_object);
3375
3376 XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3377 switch (vm_fault_page(dst_object,
3378 trunc_page_64(dst_offset),
3379 VM_PROT_WRITE|VM_PROT_READ,
3380 FALSE,
3381 interruptible,
3382 dst_lo_offset,
3383 dst_hi_offset,
3384 VM_BEHAVIOR_SEQUENTIAL,
3385 &dst_prot,
3386 &dst_page,
3387 &dst_top_page,
3388 (int *)0,
3389 &error,
3390 dst_map->no_zero_fill,
0b4e3aa0 3391 FALSE, NULL, 0)) {
1c79356b
A
3392 case VM_FAULT_SUCCESS:
3393 break;
3394 case VM_FAULT_RETRY:
3395 goto RetryDestinationFault;
3396 case VM_FAULT_MEMORY_SHORTAGE:
3397 if (vm_page_wait(interruptible))
3398 goto RetryDestinationFault;
3399 /* fall thru */
3400 case VM_FAULT_INTERRUPTED:
3401 RETURN(MACH_SEND_INTERRUPTED);
3402 case VM_FAULT_FICTITIOUS_SHORTAGE:
3403 vm_page_more_fictitious();
3404 goto RetryDestinationFault;
3405 case VM_FAULT_MEMORY_ERROR:
3406 if (error)
3407 return (error);
3408 else
3409 return(KERN_MEMORY_ERROR);
3410 }
3411 assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
3412
3413 old_copy_object = dst_page->object->copy;
3414
3415 /*
3416 * There exists the possiblity that the source and
3417 * destination page are the same. But we can't
3418 * easily determine that now. If they are the
3419 * same, the call to vm_fault_page() for the
3420 * destination page will deadlock. To prevent this we
3421 * wire the page so we can drop busy without having
3422 * the page daemon steal the page. We clean up the
3423 * top page but keep the paging reference on the object
3424 * holding the dest page so it doesn't go away.
3425 */
3426
3427 vm_page_lock_queues();
3428 vm_page_wire(dst_page);
3429 vm_page_unlock_queues();
3430 PAGE_WAKEUP_DONE(dst_page);
3431 vm_object_unlock(dst_page->object);
3432
3433 if (dst_top_page != VM_PAGE_NULL) {
3434 vm_object_lock(dst_object);
3435 VM_PAGE_FREE(dst_top_page);
3436 vm_object_paging_end(dst_object);
3437 vm_object_unlock(dst_object);
3438 }
3439
3440 RetrySourceFault: ;
3441
3442 if (src_object == VM_OBJECT_NULL) {
3443 /*
3444 * No source object. We will just
3445 * zero-fill the page in dst_object.
3446 */
3447 src_page = VM_PAGE_NULL;
e3027f41 3448 result_page = VM_PAGE_NULL;
1c79356b
A
3449 } else {
3450 vm_object_lock(src_object);
3451 src_page = vm_page_lookup(src_object,
3452 trunc_page_64(src_offset));
e3027f41 3453 if (src_page == dst_page) {
1c79356b 3454 src_prot = dst_prot;
e3027f41
A
3455 result_page = VM_PAGE_NULL;
3456 } else {
1c79356b
A
3457 src_prot = VM_PROT_READ;
3458 vm_object_paging_begin(src_object);
3459
3460 XPR(XPR_VM_FAULT,
3461 "vm_fault_copy(2) -> vm_fault_page\n",
3462 0,0,0,0,0);
3463 switch (vm_fault_page(src_object,
3464 trunc_page_64(src_offset),
3465 VM_PROT_READ,
3466 FALSE,
3467 interruptible,
3468 src_lo_offset,
3469 src_hi_offset,
3470 VM_BEHAVIOR_SEQUENTIAL,
3471 &src_prot,
3472 &result_page,
3473 &src_top_page,
3474 (int *)0,
3475 &error,
3476 FALSE,
0b4e3aa0 3477 FALSE, NULL, 0)) {
1c79356b
A
3478
3479 case VM_FAULT_SUCCESS:
3480 break;
3481 case VM_FAULT_RETRY:
3482 goto RetrySourceFault;
3483 case VM_FAULT_MEMORY_SHORTAGE:
3484 if (vm_page_wait(interruptible))
3485 goto RetrySourceFault;
3486 /* fall thru */
3487 case VM_FAULT_INTERRUPTED:
3488 vm_fault_copy_dst_cleanup(dst_page);
3489 RETURN(MACH_SEND_INTERRUPTED);
3490 case VM_FAULT_FICTITIOUS_SHORTAGE:
3491 vm_page_more_fictitious();
3492 goto RetrySourceFault;
3493 case VM_FAULT_MEMORY_ERROR:
3494 vm_fault_copy_dst_cleanup(dst_page);
3495 if (error)
3496 return (error);
3497 else
3498 return(KERN_MEMORY_ERROR);
3499 }
3500
1c79356b
A
3501
3502 assert((src_top_page == VM_PAGE_NULL) ==
e3027f41 3503 (result_page->object == src_object));
1c79356b
A
3504 }
3505 assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
e3027f41 3506 vm_object_unlock(result_page->object);
1c79356b
A
3507 }
3508
3509 if (!vm_map_verify(dst_map, dst_version)) {
e3027f41
A
3510 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3511 vm_fault_copy_cleanup(result_page, src_top_page);
1c79356b
A
3512 vm_fault_copy_dst_cleanup(dst_page);
3513 break;
3514 }
3515
3516 vm_object_lock(dst_page->object);
3517
3518 if (dst_page->object->copy != old_copy_object) {
3519 vm_object_unlock(dst_page->object);
3520 vm_map_verify_done(dst_map, dst_version);
e3027f41
A
3521 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3522 vm_fault_copy_cleanup(result_page, src_top_page);
1c79356b
A
3523 vm_fault_copy_dst_cleanup(dst_page);
3524 break;
3525 }
3526 vm_object_unlock(dst_page->object);
3527
3528 /*
3529 * Copy the page, and note that it is dirty
3530 * immediately.
3531 */
3532
3533 if (!page_aligned(src_offset) ||
3534 !page_aligned(dst_offset) ||
3535 !page_aligned(amount_left)) {
3536
3537 vm_object_offset_t src_po,
3538 dst_po;
3539
3540 src_po = src_offset - trunc_page_64(src_offset);
3541 dst_po = dst_offset - trunc_page_64(dst_offset);
3542
3543 if (dst_po > src_po) {
3544 part_size = PAGE_SIZE - dst_po;
3545 } else {
3546 part_size = PAGE_SIZE - src_po;
3547 }
3548 if (part_size > (amount_left)){
3549 part_size = amount_left;
3550 }
3551
e3027f41 3552 if (result_page == VM_PAGE_NULL) {
1c79356b
A
3553 vm_page_part_zero_fill(dst_page,
3554 dst_po, part_size);
3555 } else {
e3027f41 3556 vm_page_part_copy(result_page, src_po,
1c79356b
A
3557 dst_page, dst_po, part_size);
3558 if(!dst_page->dirty){
3559 vm_object_lock(dst_object);
3560 dst_page->dirty = TRUE;
3561 vm_object_unlock(dst_page->object);
3562 }
3563
3564 }
3565 } else {
3566 part_size = PAGE_SIZE;
3567
e3027f41 3568 if (result_page == VM_PAGE_NULL)
1c79356b
A
3569 vm_page_zero_fill(dst_page);
3570 else{
e3027f41 3571 vm_page_copy(result_page, dst_page);
1c79356b
A
3572 if(!dst_page->dirty){
3573 vm_object_lock(dst_object);
3574 dst_page->dirty = TRUE;
3575 vm_object_unlock(dst_page->object);
3576 }
3577 }
3578
3579 }
3580
3581 /*
3582 * Unlock everything, and return
3583 */
3584
3585 vm_map_verify_done(dst_map, dst_version);
3586
e3027f41
A
3587 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3588 vm_fault_copy_cleanup(result_page, src_top_page);
1c79356b
A
3589 vm_fault_copy_dst_cleanup(dst_page);
3590
3591 amount_left -= part_size;
3592 src_offset += part_size;
3593 dst_offset += part_size;
3594 } while (amount_left > 0);
3595
3596 RETURN(KERN_SUCCESS);
3597#undef RETURN
3598
3599 /*NOTREACHED*/
3600}
3601
3602#ifdef notdef
3603
3604/*
3605 * Routine: vm_fault_page_overwrite
3606 *
3607 * Description:
3608 * A form of vm_fault_page that assumes that the
3609 * resulting page will be overwritten in its entirety,
3610 * making it unnecessary to obtain the correct *contents*
3611 * of the page.
3612 *
3613 * Implementation:
3614 * XXX Untested. Also unused. Eventually, this technology
3615 * could be used in vm_fault_copy() to advantage.
3616 */
3617vm_fault_return_t
3618vm_fault_page_overwrite(
3619 register
3620 vm_object_t dst_object,
3621 vm_object_offset_t dst_offset,
3622 vm_page_t *result_page) /* OUT */
3623{
3624 register
3625 vm_page_t dst_page;
3626 kern_return_t wait_result;
3627
3628#define interruptible THREAD_UNINT /* XXX */
3629
3630 while (TRUE) {
3631 /*
3632 * Look for a page at this offset
3633 */
3634
3635 while ((dst_page = vm_page_lookup(dst_object, dst_offset))
3636 == VM_PAGE_NULL) {
3637 /*
3638 * No page, no problem... just allocate one.
3639 */
3640
3641 dst_page = vm_page_alloc(dst_object, dst_offset);
3642 if (dst_page == VM_PAGE_NULL) {
3643 vm_object_unlock(dst_object);
3644 VM_PAGE_WAIT();
3645 vm_object_lock(dst_object);
3646 continue;
3647 }
3648
3649 /*
3650 * Pretend that the memory manager
3651 * write-protected the page.
3652 *
3653 * Note that we will be asking for write
3654 * permission without asking for the data
3655 * first.
3656 */
3657
3658 dst_page->overwriting = TRUE;
3659 dst_page->page_lock = VM_PROT_WRITE;
3660 dst_page->absent = TRUE;
3661 dst_page->unusual = TRUE;
3662 dst_object->absent_count++;
3663
3664 break;
3665
3666 /*
3667 * When we bail out, we might have to throw
3668 * away the page created here.
3669 */
3670
3671#define DISCARD_PAGE \
3672 MACRO_BEGIN \
3673 vm_object_lock(dst_object); \
3674 dst_page = vm_page_lookup(dst_object, dst_offset); \
3675 if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3676 VM_PAGE_FREE(dst_page); \
3677 vm_object_unlock(dst_object); \
3678 MACRO_END
3679 }
3680
3681 /*
3682 * If the page is write-protected...
3683 */
3684
3685 if (dst_page->page_lock & VM_PROT_WRITE) {
3686 /*
3687 * ... and an unlock request hasn't been sent
3688 */
3689
3690 if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
3691 vm_prot_t u;
3692 kern_return_t rc;
3693
3694 /*
3695 * ... then send one now.
3696 */
3697
3698 if (!dst_object->pager_ready) {
9bccf70c
A
3699 wait_result = vm_object_assert_wait(dst_object,
3700 VM_OBJECT_EVENT_PAGER_READY,
3701 interruptible);
1c79356b 3702 vm_object_unlock(dst_object);
9bccf70c
A
3703 if (wait_result == THREAD_WAITING)
3704 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
3705 if (wait_result != THREAD_AWAKENED) {
3706 DISCARD_PAGE;
3707 return(VM_FAULT_INTERRUPTED);
3708 }
3709 continue;
3710 }
3711
3712 u = dst_page->unlock_request |= VM_PROT_WRITE;
3713 vm_object_unlock(dst_object);
3714
3715 if ((rc = memory_object_data_unlock(
3716 dst_object->pager,
1c79356b
A
3717 dst_offset + dst_object->paging_offset,
3718 PAGE_SIZE,
3719 u)) != KERN_SUCCESS) {
3720 if (vm_fault_debug)
3721 printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3722 DISCARD_PAGE;
3723 return((rc == MACH_SEND_INTERRUPTED) ?
3724 VM_FAULT_INTERRUPTED :
3725 VM_FAULT_MEMORY_ERROR);
3726 }
3727 vm_object_lock(dst_object);
3728 continue;
3729 }
3730
3731 /* ... fall through to wait below */
3732 } else {
3733 /*
3734 * If the page isn't being used for other
3735 * purposes, then we're done.
3736 */
3737 if ( ! (dst_page->busy || dst_page->absent ||
3738 dst_page->error || dst_page->restart) )
3739 break;
3740 }
3741
9bccf70c 3742 wait_result = PAGE_ASSERT_WAIT(dst_page, interruptible);
1c79356b 3743 vm_object_unlock(dst_object);
9bccf70c
A
3744 if (wait_result == THREAD_WAITING)
3745 wait_result = thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
3746 if (wait_result != THREAD_AWAKENED) {
3747 DISCARD_PAGE;
3748 return(VM_FAULT_INTERRUPTED);
3749 }
3750 }
3751
3752 *result_page = dst_page;
3753 return(VM_FAULT_SUCCESS);
3754
3755#undef interruptible
3756#undef DISCARD_PAGE
3757}
3758
3759#endif /* notdef */
3760
3761#if VM_FAULT_CLASSIFY
3762/*
3763 * Temporary statistics gathering support.
3764 */
3765
3766/*
3767 * Statistics arrays:
3768 */
3769#define VM_FAULT_TYPES_MAX 5
3770#define VM_FAULT_LEVEL_MAX 8
3771
3772int vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
3773
3774#define VM_FAULT_TYPE_ZERO_FILL 0
3775#define VM_FAULT_TYPE_MAP_IN 1
3776#define VM_FAULT_TYPE_PAGER 2
3777#define VM_FAULT_TYPE_COPY 3
3778#define VM_FAULT_TYPE_OTHER 4
3779
3780
3781void
3782vm_fault_classify(vm_object_t object,
3783 vm_object_offset_t offset,
3784 vm_prot_t fault_type)
3785{
3786 int type, level = 0;
3787 vm_page_t m;
3788
3789 while (TRUE) {
3790 m = vm_page_lookup(object, offset);
3791 if (m != VM_PAGE_NULL) {
3792 if (m->busy || m->error || m->restart || m->absent ||
3793 fault_type & m->page_lock) {
3794 type = VM_FAULT_TYPE_OTHER;
3795 break;
3796 }
3797 if (((fault_type & VM_PROT_WRITE) == 0) ||
3798 ((level == 0) && object->copy == VM_OBJECT_NULL)) {
3799 type = VM_FAULT_TYPE_MAP_IN;
3800 break;
3801 }
3802 type = VM_FAULT_TYPE_COPY;
3803 break;
3804 }
3805 else {
3806 if (object->pager_created) {
3807 type = VM_FAULT_TYPE_PAGER;
3808 break;
3809 }
3810 if (object->shadow == VM_OBJECT_NULL) {
3811 type = VM_FAULT_TYPE_ZERO_FILL;
3812 break;
3813 }
3814
3815 offset += object->shadow_offset;
3816 object = object->shadow;
3817 level++;
3818 continue;
3819 }
3820 }
3821
3822 if (level > VM_FAULT_LEVEL_MAX)
3823 level = VM_FAULT_LEVEL_MAX;
3824
3825 vm_fault_stats[type][level] += 1;
3826
3827 return;
3828}
3829
3830/* cleanup routine to call from debugger */
3831
3832void
3833vm_fault_classify_init(void)
3834{
3835 int type, level;
3836
3837 for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
3838 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
3839 vm_fault_stats[type][level] = 0;
3840 }
3841 }
3842
3843 return;
3844}
3845#endif /* VM_FAULT_CLASSIFY */