osfmk/vm/vm_fault.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*
  23  * @OSF_COPYRIGHT@
  24  */
  25 /*
  26  * Mach Operating System
  27  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  28  * All Rights Reserved.
  29  *
  30  * Permission to use, copy, modify and distribute this software and its
  31  * documentation is hereby granted, provided that both the copyright
  32  * notice and this permission notice appear in all copies of the
  33  * software, derivative works or modified versions, and any portions
  34  * thereof, and that both notices appear in supporting documentation.
  35  *
  36  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  37  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  38  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  39  *
  40  * Carnegie Mellon requests users of this software to return to
  41  *
  42  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  43  *  School of Computer Science
  44  *  Carnegie Mellon University
  45  *  Pittsburgh PA 15213-3890
  46  *
  47  * any improvements or extensions that they make and grant Carnegie Mellon
  48  * the rights to redistribute these changes.
  49  */
  50 /*
  51  */
  52 /*
  53  *      File:   vm_fault.c
  54  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  55  *
  56  *      Page fault handling module.
  57  */
  58
  59 #include <mach_cluster_stats.h>
  60 #include <mach_pagemap.h>
  61 #include <mach_kdb.h>
  62
  63 #include <mach/mach_types.h>
  64 #include <mach/kern_return.h>
  65 #include <mach/message.h>       /* for error codes */
  66 #include <mach/vm_param.h>
  67 #include <mach/vm_behavior.h>
  68 #include <mach/memory_object.h>
  69                                 /* For memory_object_data_{request,unlock} */
  70
  71 #include <kern/kern_types.h>
  72 #include <kern/host_statistics.h>
  73 #include <kern/counters.h>
  74 #include <kern/task.h>
  75 #include <kern/thread.h>
  76 #include <kern/sched_prim.h>
  77 #include <kern/host.h>
  78 #include <kern/xpr.h>
  79 #include <kern/mach_param.h>
  80 #include <kern/macro_help.h>
  81 #include <kern/zalloc.h>
  82 #include <kern/misc_protos.h>
  83
  84 #include <ppc/proc_reg.h>
  85
  86 #include <vm/vm_fault.h>
  87 #include <vm/task_working_set.h>
  88 #include <vm/vm_map.h>
  89 #include <vm/vm_object.h>
  90 #include <vm/vm_page.h>
  91 #include <vm/vm_kern.h>
  92 #include <vm/pmap.h>
  93 #include <vm/vm_pageout.h>
  94 #include <vm/vm_protos.h>
  95
  96 #include <sys/kdebug.h>
  97
  98 #define VM_FAULT_CLASSIFY       0
  99 #define VM_FAULT_STATIC_CONFIG  1
 100
 101 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
 102
 103 unsigned int    vm_object_absent_max = 50;
 104
 105 int             vm_fault_debug = 0;
 106
 107 #if     !VM_FAULT_STATIC_CONFIG
 108 boolean_t       vm_fault_dirty_handling = FALSE;
 109 boolean_t       vm_fault_interruptible = FALSE;
 110 boolean_t       software_reference_bits = TRUE;
 111 #endif
 112
 113 #if     MACH_KDB
 114 extern struct db_watchpoint *db_watchpoint_list;
 115 #endif  /* MACH_KDB */
 116
 117
 118 /* Forward declarations of internal routines. */
 119 extern kern_return_t vm_fault_wire_fast(
 120                                 vm_map_t        map,
 121                                 vm_map_offset_t va,
 122                                 vm_map_entry_t  entry,
 123                                 pmap_t          pmap,
 124                                 vm_map_offset_t pmap_addr);
 125
 126 extern void vm_fault_continue(void);
 127
 128 extern void vm_fault_copy_cleanup(
 129                                 vm_page_t       page,
 130                                 vm_page_t       top_page);
 131
 132 extern void vm_fault_copy_dst_cleanup(
 133                                 vm_page_t       page);
 134
 135 #if     VM_FAULT_CLASSIFY
 136 extern void vm_fault_classify(vm_object_t       object,
 137                           vm_object_offset_t    offset,
 138                           vm_prot_t             fault_type);
 139
 140 extern void vm_fault_classify_init(void);
 141 #endif
 142
 143 /*
 144  *      Routine:        vm_fault_init
 145  *      Purpose:
 146  *              Initialize our private data structures.
 147  */
 148 void
 149 vm_fault_init(void)
 150 {
 151 }
 152
 153 /*
 154  *      Routine:        vm_fault_cleanup
 155  *      Purpose:
 156  *              Clean up the result of vm_fault_page.
 157  *      Results:
 158  *              The paging reference for "object" is released.
 159  *              "object" is unlocked.
 160  *              If "top_page" is not null,  "top_page" is
 161  *              freed and the paging reference for the object
 162  *              containing it is released.
 163  *
 164  *      In/out conditions:
 165  *              "object" must be locked.
 166  */
 167 void
 168 vm_fault_cleanup(
 169         register vm_object_t    object,
 170         register vm_page_t      top_page)
 171 {
 172         vm_object_paging_end(object);
 173         vm_object_unlock(object);
 174
 175         if (top_page != VM_PAGE_NULL) {
 176             object = top_page->object;
 177             vm_object_lock(object);
 178             VM_PAGE_FREE(top_page);
 179             vm_object_paging_end(object);
 180             vm_object_unlock(object);
 181         }
 182 }
 183
 184 #if     MACH_CLUSTER_STATS
 185 #define MAXCLUSTERPAGES 16
 186 struct {
 187         unsigned long pages_in_cluster;
 188         unsigned long pages_at_higher_offsets;
 189         unsigned long pages_at_lower_offsets;
 190 } cluster_stats_in[MAXCLUSTERPAGES];
 191 #define CLUSTER_STAT(clause)    clause
 192 #define CLUSTER_STAT_HIGHER(x)  \
 193         ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
 194 #define CLUSTER_STAT_LOWER(x)   \
 195          ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
 196 #define CLUSTER_STAT_CLUSTER(x) \
 197         ((cluster_stats_in[(x)].pages_in_cluster)++)
 198 #else   /* MACH_CLUSTER_STATS */
 199 #define CLUSTER_STAT(clause)
 200 #endif  /* MACH_CLUSTER_STATS */
 201
 202 /* XXX - temporary */
 203 boolean_t vm_allow_clustered_pagein = FALSE;
 204 int vm_pagein_cluster_used = 0;
 205
 206 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
 207
 208
 209 boolean_t       vm_page_deactivate_behind = TRUE;
 210 /*
 211  * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
 212  */
 213 int vm_default_ahead = 0;
 214 int vm_default_behind = MAX_UPL_TRANSFER;
 215
 216 /*
 217  *      vm_page_deactivate_behind
 218  *
 219  *      Determine if sequential access is in progress
 220  *      in accordance with the behavior specified.  If
 221  *      so, compute a potential page to deactive and
 222  *      deactivate it.
 223  *
 224  *      The object must be locked.
 225  */
 226 static
 227 boolean_t
 228 vm_fault_deactivate_behind(
 229         vm_object_t             object,
 230         vm_object_offset_t      offset,
 231         vm_behavior_t           behavior)
 232 {
 233         vm_page_t m;
 234
 235 #if TRACEFAULTPAGE
 236         dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind); /* (TEST/DEBUG) */
 237 #endif
 238
 239         if (object == kernel_object) {
 240                 /*
 241                  * Do not deactivate pages from the kernel object: they
 242                  * are not intended to become pageable.
 243                  */
 244                 return FALSE;
 245         }
 246
 247         switch (behavior) {
 248         case VM_BEHAVIOR_RANDOM:
 249                 object->sequential = PAGE_SIZE_64;
 250                 m = VM_PAGE_NULL;
 251                 break;
 252         case VM_BEHAVIOR_SEQUENTIAL:
 253                 if (offset &&
 254                         object->last_alloc == offset - PAGE_SIZE_64) {
 255                         object->sequential += PAGE_SIZE_64;
 256                         m = vm_page_lookup(object, offset - PAGE_SIZE_64);
 257                 } else {
 258                         object->sequential = PAGE_SIZE_64; /* reset */
 259                         m = VM_PAGE_NULL;
 260                 }
 261                 break;
 262         case VM_BEHAVIOR_RSEQNTL:
 263                 if (object->last_alloc &&
 264                         object->last_alloc == offset + PAGE_SIZE_64) {
 265                         object->sequential += PAGE_SIZE_64;
 266                         m = vm_page_lookup(object, offset + PAGE_SIZE_64);
 267                 } else {
 268                         object->sequential = PAGE_SIZE_64; /* reset */
 269                         m = VM_PAGE_NULL;
 270                 }
 271                 break;
 272         case VM_BEHAVIOR_DEFAULT:
 273         default:
 274                 if (offset &&
 275                         object->last_alloc == offset - PAGE_SIZE_64) {
 276                         vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
 277
 278                         object->sequential += PAGE_SIZE_64;
 279                         m = (offset >= behind &&
 280                                 object->sequential >= behind) ?
 281                                 vm_page_lookup(object, offset - behind) :
 282                                 VM_PAGE_NULL;
 283                 } else if (object->last_alloc &&
 284                         object->last_alloc == offset + PAGE_SIZE_64) {
 285                         vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
 286
 287                         object->sequential += PAGE_SIZE_64;
 288                         m = (offset < -behind &&
 289                                 object->sequential >= behind) ?
 290                                 vm_page_lookup(object, offset + behind) :
 291                                 VM_PAGE_NULL;
 292                 } else {
 293                         object->sequential = PAGE_SIZE_64;
 294                         m = VM_PAGE_NULL;
 295                 }
 296                 break;
 297         }
 298
 299         object->last_alloc = offset;
 300
 301         if (m) {
 302                 if (!m->busy) {
 303                         vm_page_lock_queues();
 304                         vm_page_deactivate(m);
 305                         vm_page_unlock_queues();
 306 #if TRACEFAULTPAGE
 307                         dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */
 308 #endif
 309                 }
 310                 return TRUE;
 311         }
 312         return FALSE;
 313 }
 314
 315
 316 /*
 317  *      Routine:        vm_fault_page
 318  *      Purpose:
 319  *              Find the resident page for the virtual memory
 320  *              specified by the given virtual memory object
 321  *              and offset.
 322  *      Additional arguments:
 323  *              The required permissions for the page is given
 324  *              in "fault_type".  Desired permissions are included
 325  *              in "protection".  The minimum and maximum valid offsets
 326  *              within the object for the relevant map entry are
 327  *              passed in "lo_offset" and "hi_offset" respectively and
 328  *              the expected page reference pattern is passed in "behavior".
 329  *              These three parameters are used to determine pagein cluster
 330  *              limits.
 331  *
 332  *              If the desired page is known to be resident (for
 333  *              example, because it was previously wired down), asserting
 334  *              the "unwiring" parameter will speed the search.
 335  *
 336  *              If the operation can be interrupted (by thread_abort
 337  *              or thread_terminate), then the "interruptible"
 338  *              parameter should be asserted.
 339  *
 340  *      Results:
 341  *              The page containing the proper data is returned
 342  *              in "result_page".
 343  *
 344  *      In/out conditions:
 345  *              The source object must be locked and referenced,
 346  *              and must donate one paging reference.  The reference
 347  *              is not affected.  The paging reference and lock are
 348  *              consumed.
 349  *
 350  *              If the call succeeds, the object in which "result_page"
 351  *              resides is left locked and holding a paging reference.
 352  *              If this is not the original object, a busy page in the
 353  *              original object is returned in "top_page", to prevent other
 354  *              callers from pursuing this same data, along with a paging
 355  *              reference for the original object.  The "top_page" should
 356  *              be destroyed when this guarantee is no longer required.
 357  *              The "result_page" is also left busy.  It is not removed
 358  *              from the pageout queues.
 359  */
 360
 361 vm_fault_return_t
 362 vm_fault_page(
 363         /* Arguments: */
 364         vm_object_t     first_object,   /* Object to begin search */
 365         vm_object_offset_t first_offset,        /* Offset into object */
 366         vm_prot_t       fault_type,     /* What access is requested */
 367         boolean_t       must_be_resident,/* Must page be resident? */
 368         int             interruptible,  /* how may fault be interrupted? */
 369         vm_map_offset_t lo_offset,      /* Map entry start */
 370         vm_map_offset_t hi_offset,      /* Map entry end */
 371         vm_behavior_t   behavior,       /* Page reference behavior */
 372         /* Modifies in place: */
 373         vm_prot_t       *protection,    /* Protection for mapping */
 374         /* Returns: */
 375         vm_page_t       *result_page,   /* Page found, if successful */
 376         vm_page_t       *top_page,      /* Page in top object, if
 377                                          * not result_page.  */
 378         int             *type_of_fault, /* if non-null, fill in with type of fault
 379                                          * COW, zero-fill, etc... returned in trace point */
 380         /* More arguments: */
 381         kern_return_t   *error_code,    /* code if page is in error */
 382         boolean_t       no_zero_fill,   /* don't zero fill absent pages */
 383         boolean_t       data_supply,    /* treat as data_supply if
 384                                          * it is a write fault and a full
 385                                          * page is provided */
 386         vm_map_t        map,
 387         __unused vm_map_offset_t        vaddr)
 388 {
 389         register
 390         vm_page_t               m;
 391         register
 392         vm_object_t             object;
 393         register
 394         vm_object_offset_t      offset;
 395         vm_page_t               first_m;
 396         vm_object_t             next_object;
 397         vm_object_t             copy_object;
 398         boolean_t               look_for_page;
 399         vm_prot_t               access_required = fault_type;
 400         vm_prot_t               wants_copy_flag;
 401         vm_object_size_t        length;
 402         vm_object_offset_t      cluster_start, cluster_end;
 403         CLUSTER_STAT(int pages_at_higher_offsets;)
 404         CLUSTER_STAT(int pages_at_lower_offsets;)
 405         kern_return_t   wait_result;
 406         boolean_t               interruptible_state;
 407         boolean_t               bumped_pagein = FALSE;
 408
 409
 410 #if     MACH_PAGEMAP
 411 /*
 412  * MACH page map - an optional optimization where a bit map is maintained
 413  * by the VM subsystem for internal objects to indicate which pages of
 414  * the object currently reside on backing store.  This existence map
 415  * duplicates information maintained by the vnode pager.  It is
 416  * created at the time of the first pageout against the object, i.e.
 417  * at the same time pager for the object is created.  The optimization
 418  * is designed to eliminate pager interaction overhead, if it is
 419  * 'known' that the page does not exist on backing store.
 420  *
 421  * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
 422  * either marked as paged out in the existence map for the object or no
 423  * existence map exists for the object.  LOOK_FOR() is one of the
 424  * criteria in the decision to invoke the pager.   It is also used as one
 425  * of the criteria to terminate the scan for adjacent pages in a clustered
 426  * pagein operation.  Note that LOOK_FOR() always evaluates to TRUE for
 427  * permanent objects.  Note also that if the pager for an internal object
 428  * has not been created, the pager is not invoked regardless of the value
 429  * of LOOK_FOR() and that clustered pagein scans are only done on an object
 430  * for which a pager has been created.
 431  *
 432  * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
 433  * is marked as paged out in the existence map for the object.  PAGED_OUT()
 434  * PAGED_OUT() is used to determine if a page has already been pushed
 435  * into a copy object in order to avoid a redundant page out operation.
 436  */
 437 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
 438                         != VM_EXTERNAL_STATE_ABSENT)
 439 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
 440                         == VM_EXTERNAL_STATE_EXISTS)
 441 #else /* MACH_PAGEMAP */
 442 /*
 443  * If the MACH page map optimization is not enabled,
 444  * LOOK_FOR() always evaluates to TRUE.  The pager will always be
 445  * invoked to resolve missing pages in an object, assuming the pager
 446  * has been created for the object.  In a clustered page operation, the
 447  * absence of a page on backing backing store cannot be used to terminate
 448  * a scan for adjacent pages since that information is available only in
 449  * the pager.  Hence pages that may not be paged out are potentially
 450  * included in a clustered request.  The vnode pager is coded to deal
 451  * with any combination of absent/present pages in a clustered
 452  * pagein request.  PAGED_OUT() always evaluates to FALSE, i.e. the pager
 453  * will always be invoked to push a dirty page into a copy object assuming
 454  * a pager has been created.  If the page has already been pushed, the
 455  * pager will ingore the new request.
 456  */
 457 #define LOOK_FOR(o, f) TRUE
 458 #define PAGED_OUT(o, f) FALSE
 459 #endif /* MACH_PAGEMAP */
 460
 461 /*
 462  *      Recovery actions
 463  */
 464 #define PREPARE_RELEASE_PAGE(m)                         \
 465         MACRO_BEGIN                                     \
 466         vm_page_lock_queues();                          \
 467         MACRO_END
 468
 469 #define DO_RELEASE_PAGE(m)                              \
 470         MACRO_BEGIN                                     \
 471         PAGE_WAKEUP_DONE(m);                            \
 472         if (!m->active && !m->inactive)                 \
 473                 vm_page_activate(m);                    \
 474         vm_page_unlock_queues();                        \
 475         MACRO_END
 476
 477 #define RELEASE_PAGE(m)                                 \
 478         MACRO_BEGIN                                     \
 479         PREPARE_RELEASE_PAGE(m);                        \
 480         DO_RELEASE_PAGE(m);                             \
 481         MACRO_END
 482
 483 #if TRACEFAULTPAGE
 484         dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
 485 #endif
 486
 487
 488
 489 #if     !VM_FAULT_STATIC_CONFIG
 490         if (vm_fault_dirty_handling
 491 #if     MACH_KDB
 492                 /*
 493                  *      If there are watchpoints set, then
 494                  *      we don't want to give away write permission
 495                  *      on a read fault.  Make the task write fault,
 496                  *      so that the watchpoint code notices the access.
 497                  */
 498             || db_watchpoint_list
 499 #endif  /* MACH_KDB */
 500             ) {
 501                 /*
 502                  *      If we aren't asking for write permission,
 503                  *      then don't give it away.  We're using write
 504                  *      faults to set the dirty bit.
 505                  */
 506                 if (!(fault_type & VM_PROT_WRITE))
 507                         *protection &= ~VM_PROT_WRITE;
 508         }
 509
 510         if (!vm_fault_interruptible)
 511                 interruptible = THREAD_UNINT;
 512 #else   /* STATIC_CONFIG */
 513 #if     MACH_KDB
 514                 /*
 515                  *      If there are watchpoints set, then
 516                  *      we don't want to give away write permission
 517                  *      on a read fault.  Make the task write fault,
 518                  *      so that the watchpoint code notices the access.
 519                  */
 520             if (db_watchpoint_list) {
 521                 /*
 522                  *      If we aren't asking for write permission,
 523                  *      then don't give it away.  We're using write
 524                  *      faults to set the dirty bit.
 525                  */
 526                 if (!(fault_type & VM_PROT_WRITE))
 527                         *protection &= ~VM_PROT_WRITE;
 528         }
 529
 530 #endif  /* MACH_KDB */
 531 #endif  /* STATIC_CONFIG */
 532
 533         interruptible_state = thread_interrupt_level(interruptible);
 534
 535         /*
 536          *      INVARIANTS (through entire routine):
 537          *
 538          *      1)      At all times, we must either have the object
 539          *              lock or a busy page in some object to prevent
 540          *              some other thread from trying to bring in
 541          *              the same page.
 542          *
 543          *              Note that we cannot hold any locks during the
 544          *              pager access or when waiting for memory, so
 545          *              we use a busy page then.
 546          *
 547          *              Note also that we aren't as concerned about more than
 548          *              one thread attempting to memory_object_data_unlock
 549          *              the same page at once, so we don't hold the page
 550          *              as busy then, but do record the highest unlock
 551          *              value so far.  [Unlock requests may also be delivered
 552          *              out of order.]
 553          *
 554          *      2)      To prevent another thread from racing us down the
 555          *              shadow chain and entering a new page in the top
 556          *              object before we do, we must keep a busy page in
 557          *              the top object while following the shadow chain.
 558          *
 559          *      3)      We must increment paging_in_progress on any object
 560          *              for which we have a busy page
 561          *
 562          *      4)      We leave busy pages on the pageout queues.
 563          *              If the pageout daemon comes across a busy page,
 564          *              it will remove the page from the pageout queues.
 565          */
 566
 567         /*
 568          *      Search for the page at object/offset.
 569          */
 570
 571         object = first_object;
 572         offset = first_offset;
 573         first_m = VM_PAGE_NULL;
 574         access_required = fault_type;
 575
 576         XPR(XPR_VM_FAULT,
 577                 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
 578                 (integer_t)object, offset, fault_type, *protection, 0);
 579
 580         /*
 581          *      See whether this page is resident
 582          */
 583
 584         while (TRUE) {
 585 #if TRACEFAULTPAGE
 586                 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0);       /* (TEST/DEBUG) */
 587 #endif
 588                 if (!object->alive) {
 589                         vm_fault_cleanup(object, first_m);
 590                         thread_interrupt_level(interruptible_state);
 591                         return(VM_FAULT_MEMORY_ERROR);
 592                 }
 593                 m = vm_page_lookup(object, offset);
 594 #if TRACEFAULTPAGE
 595                 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object);  /* (TEST/DEBUG) */
 596 #endif
 597                 if (m != VM_PAGE_NULL) {
 598                         /*
 599                          *      If the page was pre-paged as part of a
 600                          *      cluster, record the fact.
 601                          *      If we were passed a valid pointer for
 602                          *      "type_of_fault", than we came from
 603                          *      vm_fault... we'll let it deal with
 604                          *      this condition, since it
 605                          *      needs to see m->clustered to correctly
 606                          *      account the pageins.
 607                          */
 608                         if (type_of_fault == NULL && m->clustered) {
 609                                 vm_pagein_cluster_used++;
 610                                 m->clustered = FALSE;
 611                         }
 612
 613                         /*
 614                          *      If the page is being brought in,
 615                          *      wait for it and then retry.
 616                          *
 617                          *      A possible optimization: if the page
 618                          *      is known to be resident, we can ignore
 619                          *      pages that are absent (regardless of
 620                          *      whether they're busy).
 621                          */
 622
 623                         if (m->busy) {
 624 #if TRACEFAULTPAGE
 625                                 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
 626 #endif
 627                                 wait_result = PAGE_SLEEP(object, m, interruptible);
 628                                 XPR(XPR_VM_FAULT,
 629                                     "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
 630                                         (integer_t)object, offset,
 631                                         (integer_t)m, 0, 0);
 632                                 counter(c_vm_fault_page_block_busy_kernel++);
 633
 634                                 if (wait_result != THREAD_AWAKENED) {
 635                                         vm_fault_cleanup(object, first_m);
 636                                         thread_interrupt_level(interruptible_state);
 637                                         if (wait_result == THREAD_RESTART)
 638                                           {
 639                                                 return(VM_FAULT_RETRY);
 640                                           }
 641                                         else
 642                                           {
 643                                                 return(VM_FAULT_INTERRUPTED);
 644                                           }
 645                                 }
 646                                 continue;
 647                         }
 648
 649                         if (m->encrypted) {
 650                                 /*
 651                                  * ENCRYPTED SWAP:
 652                                  * the user needs access to a page that we
 653                                  * encrypted before paging it out.
 654                                  * Decrypt the page now.
 655                                  * Keep it busy to prevent anyone from
 656                                  * accessing it during the decryption.
 657                                  */
 658                                 m->busy = TRUE;
 659                                 vm_page_decrypt(m, 0);
 660                                 assert(object == m->object);
 661                                 assert(m->busy);
 662                                 PAGE_WAKEUP_DONE(m);
 663
 664                                 /*
 665                                  * Retry from the top, in case
 666                                  * something changed while we were
 667                                  * decrypting.
 668                                  */
 669                                 continue;
 670                         }
 671                         ASSERT_PAGE_DECRYPTED(m);
 672
 673                         /*
 674                          *      If the page is in error, give up now.
 675                          */
 676
 677                         if (m->error) {
 678 #if TRACEFAULTPAGE
 679                                 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code);      /* (TEST/DEBUG) */
 680 #endif
 681                                 if (error_code)
 682                                         *error_code = m->page_error;
 683                                 VM_PAGE_FREE(m);
 684                                 vm_fault_cleanup(object, first_m);
 685                                 thread_interrupt_level(interruptible_state);
 686                                 return(VM_FAULT_MEMORY_ERROR);
 687                         }
 688
 689                         /*
 690                          *      If the pager wants us to restart
 691                          *      at the top of the chain,
 692                          *      typically because it has moved the
 693                          *      page to another pager, then do so.
 694                          */
 695
 696                         if (m->restart) {
 697 #if TRACEFAULTPAGE
 698                                 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
 699 #endif
 700                                 VM_PAGE_FREE(m);
 701                                 vm_fault_cleanup(object, first_m);
 702                                 thread_interrupt_level(interruptible_state);
 703                                 return(VM_FAULT_RETRY);
 704                         }
 705
 706                         /*
 707                          *      If the page isn't busy, but is absent,
 708                          *      then it was deemed "unavailable".
 709                          */
 710
 711                         if (m->absent) {
 712                                 /*
 713                                  * Remove the non-existent page (unless it's
 714                                  * in the top object) and move on down to the
 715                                  * next object (if there is one).
 716                                  */
 717 #if TRACEFAULTPAGE
 718                                 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow);  /* (TEST/DEBUG) */
 719 #endif
 720
 721                                 next_object = object->shadow;
 722                                 if (next_object == VM_OBJECT_NULL) {
 723                                         vm_page_t real_m;
 724
 725                                         assert(!must_be_resident);
 726
 727                                         if (object->shadow_severed) {
 728                                                 vm_fault_cleanup(
 729                                                         object, first_m);
 730                                                 thread_interrupt_level(interruptible_state);
 731                                                 return VM_FAULT_MEMORY_ERROR;
 732                                         }
 733
 734                                         /*
 735                                          * Absent page at bottom of shadow
 736                                          * chain; zero fill the page we left
 737                                          * busy in the first object, and flush
 738                                          * the absent page.  But first we
 739                                          * need to allocate a real page.
 740                                          */
 741                                         if (VM_PAGE_THROTTLED() ||
 742                                             (real_m = vm_page_grab())
 743                                                         == VM_PAGE_NULL) {
 744                                                 vm_fault_cleanup(
 745                                                         object, first_m);
 746                                                 thread_interrupt_level(
 747                                                         interruptible_state);
 748                                                 return(
 749                                                    VM_FAULT_MEMORY_SHORTAGE);
 750                                         }
 751
 752                                         /*
 753                                          * are we protecting the system from
 754                                          * backing store exhaustion.  If so
 755                                          * sleep unless we are privileged.
 756                                          */
 757
 758                                         if(vm_backing_store_low) {
 759                                            if(!(current_task()->priv_flags
 760                                                 & VM_BACKING_STORE_PRIV)) {
 761                                                 assert_wait((event_t)
 762                                                         &vm_backing_store_low,
 763                                                         THREAD_UNINT);
 764                                                 vm_fault_cleanup(object,
 765                                                                     first_m);
 766                                                 thread_block(THREAD_CONTINUE_NULL);
 767                                                 thread_interrupt_level(
 768                                                         interruptible_state);
 769                                                 return(VM_FAULT_RETRY);
 770                                            }
 771                                         }
 772
 773
 774                                         XPR(XPR_VM_FAULT,
 775               "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
 776                                                 (integer_t)object, offset,
 777                                                 (integer_t)m,
 778                                                 (integer_t)first_object, 0);
 779                                         if (object != first_object) {
 780                                                 VM_PAGE_FREE(m);
 781                                                 vm_object_paging_end(object);
 782                                                 vm_object_unlock(object);
 783                                                 object = first_object;
 784                                                 offset = first_offset;
 785                                                 m = first_m;
 786                                                 first_m = VM_PAGE_NULL;
 787                                                 vm_object_lock(object);
 788                                         }
 789
 790                                         VM_PAGE_FREE(m);
 791                                         assert(real_m->busy);
 792                                         vm_page_insert(real_m, object, offset);
 793                                         m = real_m;
 794
 795                                         /*
 796                                          *  Drop the lock while zero filling
 797                                          *  page.  Then break because this
 798                                          *  is the page we wanted.  Checking
 799                                          *  the page lock is a waste of time;
 800                                          *  this page was either absent or
 801                                          *  newly allocated -- in both cases
 802                                          *  it can't be page locked by a pager.
 803                                          */
 804                                         m->no_isync = FALSE;
 805
 806                                         if (!no_zero_fill) {
 807                                                 vm_object_unlock(object);
 808                                                 vm_page_zero_fill(m);
 809                                                 vm_object_lock(object);
 810
 811                                                 if (type_of_fault)
 812                                                         *type_of_fault = DBG_ZERO_FILL_FAULT;
 813                                                 VM_STAT(zero_fill_count++);
 814                                         }
 815                                         if (bumped_pagein == TRUE) {
 816                                                 VM_STAT(pageins--);
 817                                                 current_task()->pageins--;
 818                                         }
 819                                         vm_page_lock_queues();
 820                                         VM_PAGE_QUEUES_REMOVE(m);
 821                                         m->page_ticket = vm_page_ticket;
 822                                         assert(!m->laundry);
 823                                         assert(m->object != kernel_object);
 824                                         assert(m->pageq.next == NULL &&
 825                                                m->pageq.prev == NULL);
 826                                         if(m->object->size > 0x200000) {
 827                                                 m->zero_fill = TRUE;
 828                                                 /* depends on the queues lock */
 829                                                 vm_zf_count += 1;
 830                                                 queue_enter(&vm_page_queue_zf,
 831                                                         m, vm_page_t, pageq);
 832                                         } else {
 833                                                 queue_enter(
 834                                                         &vm_page_queue_inactive,
 835                                                         m, vm_page_t, pageq);
 836                                         }
 837                                         vm_page_ticket_roll++;
 838                                         if(vm_page_ticket_roll ==
 839                                                 VM_PAGE_TICKETS_IN_ROLL) {
 840                                                 vm_page_ticket_roll = 0;
 841                                                 if(vm_page_ticket ==
 842                                                      VM_PAGE_TICKET_ROLL_IDS)
 843                                                         vm_page_ticket= 0;
 844                                                 else
 845                                                         vm_page_ticket++;
 846                                         }
 847                                         m->inactive = TRUE;
 848                                         vm_page_inactive_count++;
 849                                         vm_page_unlock_queues();
 850                                         break;
 851                                 } else {
 852                                         if (must_be_resident) {
 853                                                 vm_object_paging_end(object);
 854                                         } else if (object != first_object) {
 855                                                 vm_object_paging_end(object);
 856                                                 VM_PAGE_FREE(m);
 857                                         } else {
 858                                                 first_m = m;
 859                                                 m->absent = FALSE;
 860                                                 m->unusual = FALSE;
 861                                                 vm_object_absent_release(object);
 862                                                 m->busy = TRUE;
 863
 864                                                 vm_page_lock_queues();
 865                                                 VM_PAGE_QUEUES_REMOVE(m);
 866                                                 vm_page_unlock_queues();
 867                                         }
 868                                         XPR(XPR_VM_FAULT,
 869                                             "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
 870                                                 (integer_t)object, offset,
 871                                                 (integer_t)next_object,
 872                                                 offset+object->shadow_offset,0);
 873                                         offset += object->shadow_offset;
 874                                         hi_offset += object->shadow_offset;
 875                                         lo_offset += object->shadow_offset;
 876                                         access_required = VM_PROT_READ;
 877                                         vm_object_lock(next_object);
 878                                         vm_object_unlock(object);
 879                                         object = next_object;
 880                                         vm_object_paging_begin(object);
 881                                         continue;
 882                                 }
 883                         }
 884
 885                         if ((m->cleaning)
 886                                 && ((object != first_object) ||
 887                                     (object->copy != VM_OBJECT_NULL))
 888                                 && (fault_type & VM_PROT_WRITE)) {
 889                                 /*
 890                                  * This is a copy-on-write fault that will
 891                                  * cause us to revoke access to this page, but
 892                                  * this page is in the process of being cleaned
 893                                  * in a clustered pageout. We must wait until
 894                                  * the cleaning operation completes before
 895                                  * revoking access to the original page,
 896                                  * otherwise we might attempt to remove a
 897                                  * wired mapping.
 898                                  */
 899 #if TRACEFAULTPAGE
 900                                 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset);  /* (TEST/DEBUG) */
 901 #endif
 902                                 XPR(XPR_VM_FAULT,
 903                                     "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
 904                                         (integer_t)object, offset,
 905                                         (integer_t)m, 0, 0);
 906                                 /* take an extra ref so that object won't die */
 907                                 assert(object->ref_count > 0);
 908                                 object->ref_count++;
 909                                 vm_object_res_reference(object);
 910                                 vm_fault_cleanup(object, first_m);
 911                                 counter(c_vm_fault_page_block_backoff_kernel++);
 912                                 vm_object_lock(object);
 913                                 assert(object->ref_count > 0);
 914                                 m = vm_page_lookup(object, offset);
 915                                 if (m != VM_PAGE_NULL && m->cleaning) {
 916                                         PAGE_ASSERT_WAIT(m, interruptible);
 917                                         vm_object_unlock(object);
 918                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
 919                                         vm_object_deallocate(object);
 920                                         goto backoff;
 921                                 } else {
 922                                         vm_object_unlock(object);
 923                                         vm_object_deallocate(object);
 924                                         thread_interrupt_level(interruptible_state);
 925                                         return VM_FAULT_RETRY;
 926                                 }
 927                         }
 928
 929                         /*
 930                          *      If the desired access to this page has
 931                          *      been locked out, request that it be unlocked.
 932                          */
 933
 934                         if (access_required & m->page_lock) {
 935                                 if ((access_required & m->unlock_request) != access_required) {
 936                                         vm_prot_t       new_unlock_request;
 937                                         kern_return_t   rc;
 938
 939 #if TRACEFAULTPAGE
 940                                         dbgTrace(0xBEEF000A, (unsigned int) m, (unsigned int) object->pager_ready);     /* (TEST/DEBUG) */
 941 #endif
 942                                         if (!object->pager_ready) {
 943                                         XPR(XPR_VM_FAULT,
 944                                             "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
 945                                                 access_required,
 946                                                 (integer_t)object, offset,
 947                                                 (integer_t)m, 0);
 948                                                 /* take an extra ref */
 949                                                 assert(object->ref_count > 0);
 950                                                 object->ref_count++;
 951                                                 vm_object_res_reference(object);
 952                                                 vm_fault_cleanup(object,
 953                                                                  first_m);
 954                                                 counter(c_vm_fault_page_block_backoff_kernel++);
 955                                                 vm_object_lock(object);
 956                                                 assert(object->ref_count > 0);
 957                                                 if (!object->pager_ready) {
 958                                                         wait_result = vm_object_assert_wait(
 959                                                                 object,
 960                                                                 VM_OBJECT_EVENT_PAGER_READY,
 961                                                                 interruptible);
 962                                                         vm_object_unlock(object);
 963                                                         if (wait_result == THREAD_WAITING)
 964                                                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
 965                                                         vm_object_deallocate(object);
 966                                                         goto backoff;
 967                                                 } else {
 968                                                         vm_object_unlock(object);
 969                                                         vm_object_deallocate(object);
 970                                                         thread_interrupt_level(interruptible_state);
 971                                                         return VM_FAULT_RETRY;
 972                                                 }
 973                                         }
 974
 975                                         new_unlock_request = m->unlock_request =
 976                                                 (access_required | m->unlock_request);
 977                                         vm_object_unlock(object);
 978                                         XPR(XPR_VM_FAULT,
 979                                             "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
 980                                         (integer_t)object, offset,
 981                                         (integer_t)m, new_unlock_request, 0);
 982                                         if ((rc = memory_object_data_unlock(
 983                                                 object->pager,
 984                                                 offset + object->paging_offset,
 985                                                 PAGE_SIZE,
 986                                                 new_unlock_request))
 987                                              != KERN_SUCCESS) {
 988                                                 if (vm_fault_debug)
 989                                                     printf("vm_fault: memory_object_data_unlock failed\n");
 990                                                 vm_object_lock(object);
 991                                                 vm_fault_cleanup(object, first_m);
 992                                                 thread_interrupt_level(interruptible_state);
 993                                                 return((rc == MACH_SEND_INTERRUPTED) ?
 994                                                         VM_FAULT_INTERRUPTED :
 995                                                         VM_FAULT_MEMORY_ERROR);
 996                                         }
 997                                         vm_object_lock(object);
 998                                         continue;
 999                                 }
1000
1001                                 XPR(XPR_VM_FAULT,
1002         "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
1003                                         access_required, (integer_t)object,
1004                                         offset, (integer_t)m, 0);
1005                                 /* take an extra ref so object won't die */
1006                                 assert(object->ref_count > 0);
1007                                 object->ref_count++;
1008                                 vm_object_res_reference(object);
1009                                 vm_fault_cleanup(object, first_m);
1010                                 counter(c_vm_fault_page_block_backoff_kernel++);
1011                                 vm_object_lock(object);
1012                                 assert(object->ref_count > 0);
1013                                 m = vm_page_lookup(object, offset);
1014                                 if (m != VM_PAGE_NULL &&
1015                                     (access_required & m->page_lock) &&
1016                                     !((access_required & m->unlock_request) != access_required)) {
1017                                         PAGE_ASSERT_WAIT(m, interruptible);
1018                                         vm_object_unlock(object);
1019                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
1020                                         vm_object_deallocate(object);
1021                                         goto backoff;
1022                                 } else {
1023                                         vm_object_unlock(object);
1024                                         vm_object_deallocate(object);
1025                                         thread_interrupt_level(interruptible_state);
1026                                         return VM_FAULT_RETRY;
1027                                 }
1028                         }
1029                         /*
1030                          *      We mark the page busy and leave it on
1031                          *      the pageout queues.  If the pageout
1032                          *      deamon comes across it, then it will
1033                          *      remove the page.
1034                          */
1035
1036 #if TRACEFAULTPAGE
1037                         dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
1038 #endif
1039
1040 #if     !VM_FAULT_STATIC_CONFIG
1041                         if (!software_reference_bits) {
1042                                 vm_page_lock_queues();
1043                                 if (m->inactive)
1044                                         vm_stat.reactivations++;
1045
1046                                 VM_PAGE_QUEUES_REMOVE(m);
1047                                 vm_page_unlock_queues();
1048                         }
1049 #endif
1050                         XPR(XPR_VM_FAULT,
1051                             "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
1052                                 (integer_t)object, offset, (integer_t)m, 0, 0);
1053                         assert(!m->busy);
1054                         m->busy = TRUE;
1055                         assert(!m->absent);
1056                         break;
1057                 }
1058
1059                 look_for_page =
1060                         (object->pager_created) &&
1061                           LOOK_FOR(object, offset) &&
1062                             (!data_supply);
1063
1064 #if TRACEFAULTPAGE
1065                 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object);      /* (TEST/DEBUG) */
1066 #endif
1067                 if ((look_for_page || (object == first_object))
1068                                 && !must_be_resident
1069                                 && !(object->phys_contiguous))  {
1070                         /*
1071                          *      Allocate a new page for this object/offset
1072                          *      pair.
1073                          */
1074
1075                         m = vm_page_grab_fictitious();
1076 #if TRACEFAULTPAGE
1077                         dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object);  /* (TEST/DEBUG) */
1078 #endif
1079                         if (m == VM_PAGE_NULL) {
1080                                 vm_fault_cleanup(object, first_m);
1081                                 thread_interrupt_level(interruptible_state);
1082                                 return(VM_FAULT_FICTITIOUS_SHORTAGE);
1083                         }
1084                         vm_page_insert(m, object, offset);
1085                 }
1086
1087                 if ((look_for_page && !must_be_resident)) {
1088                         kern_return_t   rc;
1089
1090                         /*
1091                          *      If the memory manager is not ready, we
1092                          *      cannot make requests.
1093                          */
1094                         if (!object->pager_ready) {
1095 #if TRACEFAULTPAGE
1096                                 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0);       /* (TEST/DEBUG) */
1097 #endif
1098                                 if(m != VM_PAGE_NULL)
1099                                         VM_PAGE_FREE(m);
1100                                 XPR(XPR_VM_FAULT,
1101                                 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
1102                                         (integer_t)object, offset, 0, 0, 0);
1103                                 /* take an extra ref so object won't die */
1104                                 assert(object->ref_count > 0);
1105                                 object->ref_count++;
1106                                 vm_object_res_reference(object);
1107                                 vm_fault_cleanup(object, first_m);
1108                                 counter(c_vm_fault_page_block_backoff_kernel++);
1109                                 vm_object_lock(object);
1110                                 assert(object->ref_count > 0);
1111                                 if (!object->pager_ready) {
1112                                         wait_result = vm_object_assert_wait(object,
1113                                                               VM_OBJECT_EVENT_PAGER_READY,
1114                                                               interruptible);
1115                                         vm_object_unlock(object);
1116                                         if (wait_result == THREAD_WAITING)
1117                                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
1118                                         vm_object_deallocate(object);
1119                                         goto backoff;
1120                                 } else {
1121                                         vm_object_unlock(object);
1122                                         vm_object_deallocate(object);
1123                                         thread_interrupt_level(interruptible_state);
1124                                         return VM_FAULT_RETRY;
1125                                 }
1126                         }
1127
1128                         if(object->phys_contiguous) {
1129                                 if(m != VM_PAGE_NULL) {
1130                                         VM_PAGE_FREE(m);
1131                                         m = VM_PAGE_NULL;
1132                                 }
1133                                 goto no_clustering;
1134                         }
1135                         if (object->internal) {
1136                                 /*
1137                                  *      Requests to the default pager
1138                                  *      must reserve a real page in advance,
1139                                  *      because the pager's data-provided
1140                                  *      won't block for pages.  IMPORTANT:
1141                                  *      this acts as a throttling mechanism
1142                                  *      for data_requests to the default
1143                                  *      pager.
1144                                  */
1145
1146 #if TRACEFAULTPAGE
1147                                 dbgTrace(0xBEEF000F, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
1148 #endif
1149                                 if (m->fictitious && !vm_page_convert(m)) {
1150                                         VM_PAGE_FREE(m);
1151                                         vm_fault_cleanup(object, first_m);
1152                                         thread_interrupt_level(interruptible_state);
1153                                         return(VM_FAULT_MEMORY_SHORTAGE);
1154                                 }
1155                         } else if (object->absent_count >
1156                                                 vm_object_absent_max) {
1157                                 /*
1158                                  *      If there are too many outstanding page
1159                                  *      requests pending on this object, we
1160                                  *      wait for them to be resolved now.
1161                                  */
1162
1163 #if TRACEFAULTPAGE
1164                                 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
1165 #endif
1166                                 if(m != VM_PAGE_NULL)
1167                                         VM_PAGE_FREE(m);
1168                                 /* take an extra ref so object won't die */
1169                                 assert(object->ref_count > 0);
1170                                 object->ref_count++;
1171                                 vm_object_res_reference(object);
1172                                 vm_fault_cleanup(object, first_m);
1173                                 counter(c_vm_fault_page_block_backoff_kernel++);
1174                                 vm_object_lock(object);
1175                                 assert(object->ref_count > 0);
1176                                 if (object->absent_count > vm_object_absent_max) {
1177                                         vm_object_absent_assert_wait(object,
1178                                                                      interruptible);
1179                                         vm_object_unlock(object);
1180                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
1181                                         vm_object_deallocate(object);
1182                                         goto backoff;
1183                                 } else {
1184                                         vm_object_unlock(object);
1185                                         vm_object_deallocate(object);
1186                                         thread_interrupt_level(interruptible_state);
1187                                         return VM_FAULT_RETRY;
1188                                 }
1189                         }
1190
1191                         /*
1192                          *      Indicate that the page is waiting for data
1193                          *      from the memory manager.
1194                          */
1195
1196                         if(m != VM_PAGE_NULL) {
1197
1198                                 m->list_req_pending = TRUE;
1199                                 m->absent = TRUE;
1200                                 m->unusual = TRUE;
1201                                 object->absent_count++;
1202
1203                         }
1204
1205 no_clustering:
1206                         cluster_start = offset;
1207                         length = PAGE_SIZE;
1208
1209                         /*
1210                          * lengthen the cluster by the pages in the working set
1211                          */
1212                         if((map != NULL) &&
1213                                 (current_task()->dynamic_working_set != 0)) {
1214                                 cluster_end = cluster_start + length;
1215                                 /* tws values for start and end are just a
1216                                  * suggestions.  Therefore, as long as
1217                                  * build_cluster does not use pointers or
1218                                  * take action based on values that
1219                                  * could be affected by re-entrance we
1220                                  * do not need to take the map lock.
1221                                  */
1222                                 cluster_end = offset + PAGE_SIZE_64;
1223                                 tws_build_cluster(
1224                                         current_task()->dynamic_working_set,
1225                                         object, &cluster_start,
1226                                         &cluster_end, 0x40000);
1227                                 length = cluster_end - cluster_start;
1228                         }
1229 #if TRACEFAULTPAGE
1230                         dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0);  /* (TEST/DEBUG) */
1231 #endif
1232                         /*
1233                          *      We have a busy page, so we can
1234                          *      release the object lock.
1235                          */
1236                         vm_object_unlock(object);
1237
1238                         /*
1239                          *      Call the memory manager to retrieve the data.
1240                          */
1241
1242                         if (type_of_fault)
1243                                 *type_of_fault = ((int)length << 8) | DBG_PAGEIN_FAULT;
1244                         VM_STAT(pageins++);
1245                         current_task()->pageins++;
1246                         bumped_pagein = TRUE;
1247
1248                         /*
1249                          *      If this object uses a copy_call strategy,
1250                          *      and we are interested in a copy of this object
1251                          *      (having gotten here only by following a
1252                          *      shadow chain), then tell the memory manager
1253                          *      via a flag added to the desired_access
1254                          *      parameter, so that it can detect a race
1255                          *      between our walking down the shadow chain
1256                          *      and its pushing pages up into a copy of
1257                          *      the object that it manages.
1258                          */
1259
1260                         if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL &&
1261                             object != first_object) {
1262                                 wants_copy_flag = VM_PROT_WANTS_COPY;
1263                         } else {
1264                                 wants_copy_flag = VM_PROT_NONE;
1265                         }
1266
1267                         XPR(XPR_VM_FAULT,
1268                             "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1269                                 (integer_t)object, offset, (integer_t)m,
1270                                 access_required | wants_copy_flag, 0);
1271
1272                         rc = memory_object_data_request(object->pager,
1273                                         cluster_start + object->paging_offset,
1274                                         length,
1275                                         access_required | wants_copy_flag);
1276
1277
1278 #if TRACEFAULTPAGE
1279                         dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
1280 #endif
1281                         if (rc != KERN_SUCCESS) {
1282                                 if (rc != MACH_SEND_INTERRUPTED
1283                                     && vm_fault_debug)
1284                                         printf("%s(0x%x, 0x%xll, 0x%xll, 0x%x) failed, rc=%d\n",
1285                                                 "memory_object_data_request",
1286                                                 object->pager,
1287                                                 cluster_start + object->paging_offset,
1288                                                 length, access_required, rc);
1289                                 /*
1290                                  *      Don't want to leave a busy page around,
1291                                  *      but the data request may have blocked,
1292                                  *      so check if it's still there and busy.
1293                                  */
1294                                 if(!object->phys_contiguous) {
1295                                    vm_object_lock(object);
1296                                    for (; length; length -= PAGE_SIZE,
1297                                       cluster_start += PAGE_SIZE_64) {
1298                                       vm_page_t p;
1299                                       if ((p = vm_page_lookup(object,
1300                                                                 cluster_start))
1301                                             && p->absent && p->busy
1302                                             && p != first_m) {
1303                                          VM_PAGE_FREE(p);
1304                                       }
1305                                    }
1306                                 }
1307                                 vm_fault_cleanup(object, first_m);
1308                                 thread_interrupt_level(interruptible_state);
1309                                 return((rc == MACH_SEND_INTERRUPTED) ?
1310                                         VM_FAULT_INTERRUPTED :
1311                                         VM_FAULT_MEMORY_ERROR);
1312                         }
1313
1314                         vm_object_lock(object);
1315                         if ((interruptible != THREAD_UNINT) &&
1316                             (current_thread()->state & TH_ABORT)) {
1317                                 vm_fault_cleanup(object, first_m);
1318                                 thread_interrupt_level(interruptible_state);
1319                                 return(VM_FAULT_INTERRUPTED);
1320                         }
1321                         if (m == VM_PAGE_NULL &&
1322                             object->phys_contiguous) {
1323                                 /*
1324                                  * No page here means that the object we
1325                                  * initially looked up was "physically
1326                                  * contiguous" (i.e. device memory).  However,
1327                                  * with Virtual VRAM, the object might not
1328                                  * be backed by that device memory anymore,
1329                                  * so we're done here only if the object is
1330                                  * still "phys_contiguous".
1331                                  * Otherwise, if the object is no longer
1332                                  * "phys_contiguous", we need to retry the
1333                                  * page fault against the object's new backing
1334                                  * store (different memory object).
1335                                  */
1336                                 break;
1337                         }
1338
1339                         /*
1340                          * Retry with same object/offset, since new data may
1341                          * be in a different page (i.e., m is meaningless at
1342                          * this point).
1343                          */
1344                         continue;
1345                 }
1346
1347                 /*
1348                  * The only case in which we get here is if
1349                  * object has no pager (or unwiring).  If the pager doesn't
1350                  * have the page this is handled in the m->absent case above
1351                  * (and if you change things here you should look above).
1352                  */
1353 #if TRACEFAULTPAGE
1354                 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */
1355 #endif
1356                 if (object == first_object)
1357                         first_m = m;
1358                 else
1359                         assert(m == VM_PAGE_NULL);
1360
1361                 XPR(XPR_VM_FAULT,
1362                     "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1363                         (integer_t)object, offset, (integer_t)m,
1364                         (integer_t)object->shadow, 0);
1365                 /*
1366                  *      Move on to the next object.  Lock the next
1367                  *      object before unlocking the current one.
1368                  */
1369                 next_object = object->shadow;
1370                 if (next_object == VM_OBJECT_NULL) {
1371                         assert(!must_be_resident);
1372                         /*
1373                          *      If there's no object left, fill the page
1374                          *      in the top object with zeros.  But first we
1375                          *      need to allocate a real page.
1376                          */
1377
1378                         if (object != first_object) {
1379                                 vm_object_paging_end(object);
1380                                 vm_object_unlock(object);
1381
1382                                 object = first_object;
1383                                 offset = first_offset;
1384                                 vm_object_lock(object);
1385                         }
1386
1387                         m = first_m;
1388                         assert(m->object == object);
1389                         first_m = VM_PAGE_NULL;
1390
1391                         if(m == VM_PAGE_NULL) {
1392                                 m = vm_page_grab();
1393                                 if (m == VM_PAGE_NULL) {
1394                                         vm_fault_cleanup(
1395                                                 object, VM_PAGE_NULL);
1396                                         thread_interrupt_level(
1397                                                 interruptible_state);
1398                                         return(VM_FAULT_MEMORY_SHORTAGE);
1399                                 }
1400                                 vm_page_insert(
1401                                         m, object, offset);
1402                         }
1403
1404                         if (object->shadow_severed) {
1405                                 VM_PAGE_FREE(m);
1406                                 vm_fault_cleanup(object, VM_PAGE_NULL);
1407                                 thread_interrupt_level(interruptible_state);
1408                                 return VM_FAULT_MEMORY_ERROR;
1409                         }
1410
1411                         /*
1412                          * are we protecting the system from
1413                          * backing store exhaustion.  If so
1414                          * sleep unless we are privileged.
1415                          */
1416
1417                         if(vm_backing_store_low) {
1418                                 if(!(current_task()->priv_flags
1419                                                 & VM_BACKING_STORE_PRIV)) {
1420                                         assert_wait((event_t)
1421                                                 &vm_backing_store_low,
1422                                                 THREAD_UNINT);
1423                                         VM_PAGE_FREE(m);
1424                                         vm_fault_cleanup(object, VM_PAGE_NULL);
1425                                         thread_block(THREAD_CONTINUE_NULL);
1426                                         thread_interrupt_level(
1427                                                 interruptible_state);
1428                                         return(VM_FAULT_RETRY);
1429                                 }
1430                         }
1431
1432                         if (VM_PAGE_THROTTLED() ||
1433                             (m->fictitious && !vm_page_convert(m))) {
1434                                 VM_PAGE_FREE(m);
1435                                 vm_fault_cleanup(object, VM_PAGE_NULL);
1436                                 thread_interrupt_level(interruptible_state);
1437                                 return(VM_FAULT_MEMORY_SHORTAGE);
1438                         }
1439                         m->no_isync = FALSE;
1440
1441                         if (!no_zero_fill) {
1442                                 vm_object_unlock(object);
1443                                 vm_page_zero_fill(m);
1444                                 vm_object_lock(object);
1445
1446                                 if (type_of_fault)
1447                                         *type_of_fault = DBG_ZERO_FILL_FAULT;
1448                                 VM_STAT(zero_fill_count++);
1449                         }
1450                         if (bumped_pagein == TRUE) {
1451                                 VM_STAT(pageins--);
1452                                 current_task()->pageins--;
1453                         }
1454                         vm_page_lock_queues();
1455                         VM_PAGE_QUEUES_REMOVE(m);
1456                         assert(!m->laundry);
1457                         assert(m->object != kernel_object);
1458                         assert(m->pageq.next == NULL &&
1459                                m->pageq.prev == NULL);
1460                         if(m->object->size > 0x200000) {
1461                                 m->zero_fill = TRUE;
1462                                 /* depends on the queues lock */
1463                                 vm_zf_count += 1;
1464                                 queue_enter(&vm_page_queue_zf,
1465                                         m, vm_page_t, pageq);
1466                         } else {
1467                                 queue_enter(
1468                                         &vm_page_queue_inactive,
1469                                         m, vm_page_t, pageq);
1470                         }
1471                         m->page_ticket = vm_page_ticket;
1472                         vm_page_ticket_roll++;
1473                         if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
1474                                 vm_page_ticket_roll = 0;
1475                                 if(vm_page_ticket ==
1476                                         VM_PAGE_TICKET_ROLL_IDS)
1477                                         vm_page_ticket= 0;
1478                                 else
1479                                         vm_page_ticket++;
1480                         }
1481                         m->inactive = TRUE;
1482                         vm_page_inactive_count++;
1483                         vm_page_unlock_queues();
1484 #if 0
1485                         pmap_clear_modify(m->phys_page);
1486 #endif
1487                         break;
1488                 }
1489                 else {
1490                         if ((object != first_object) || must_be_resident)
1491                                 vm_object_paging_end(object);
1492                         offset += object->shadow_offset;
1493                         hi_offset += object->shadow_offset;
1494                         lo_offset += object->shadow_offset;
1495                         access_required = VM_PROT_READ;
1496                         vm_object_lock(next_object);
1497                         vm_object_unlock(object);
1498                         object = next_object;
1499                         vm_object_paging_begin(object);
1500                 }
1501         }
1502
1503         /*
1504          *      PAGE HAS BEEN FOUND.
1505          *
1506          *      This page (m) is:
1507          *              busy, so that we can play with it;
1508          *              not absent, so that nobody else will fill it;
1509          *              possibly eligible for pageout;
1510          *
1511          *      The top-level page (first_m) is:
1512          *              VM_PAGE_NULL if the page was found in the
1513          *               top-level object;
1514          *              busy, not absent, and ineligible for pageout.
1515          *
1516          *      The current object (object) is locked.  A paging
1517          *      reference is held for the current and top-level
1518          *      objects.
1519          */
1520
1521 #if TRACEFAULTPAGE
1522         dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */
1523 #endif
1524 #if     EXTRA_ASSERTIONS
1525         if(m != VM_PAGE_NULL) {
1526                 assert(m->busy && !m->absent);
1527                 assert((first_m == VM_PAGE_NULL) ||
1528                         (first_m->busy && !first_m->absent &&
1529                          !first_m->active && !first_m->inactive));
1530         }
1531 #endif  /* EXTRA_ASSERTIONS */
1532
1533         /*
1534          * ENCRYPTED SWAP:
1535          * If we found a page, we must have decrypted it before we
1536          * get here...
1537          */
1538         if (m != VM_PAGE_NULL) {
1539                 ASSERT_PAGE_DECRYPTED(m);
1540         }
1541
1542         XPR(XPR_VM_FAULT,
1543        "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1544                 (integer_t)object, offset, (integer_t)m,
1545                 (integer_t)first_object, (integer_t)first_m);
1546         /*
1547          *      If the page is being written, but isn't
1548          *      already owned by the top-level object,
1549          *      we have to copy it into a new page owned
1550          *      by the top-level object.
1551          */
1552
1553         if ((object != first_object) && (m != VM_PAGE_NULL)) {
1554                 /*
1555                  *      We only really need to copy if we
1556                  *      want to write it.
1557                  */
1558
1559 #if TRACEFAULTPAGE
1560                         dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1561 #endif
1562                 if (fault_type & VM_PROT_WRITE) {
1563                         vm_page_t copy_m;
1564
1565                         assert(!must_be_resident);
1566
1567                         /*
1568                          * are we protecting the system from
1569                          * backing store exhaustion.  If so
1570                          * sleep unless we are privileged.
1571                          */
1572
1573                         if(vm_backing_store_low) {
1574                                 if(!(current_task()->priv_flags
1575                                                 & VM_BACKING_STORE_PRIV)) {
1576                                         assert_wait((event_t)
1577                                                 &vm_backing_store_low,
1578                                                 THREAD_UNINT);
1579                                         RELEASE_PAGE(m);
1580                                         vm_fault_cleanup(object, first_m);
1581                                         thread_block(THREAD_CONTINUE_NULL);
1582                                         thread_interrupt_level(
1583                                                 interruptible_state);
1584                                         return(VM_FAULT_RETRY);
1585                                 }
1586                         }
1587
1588                         /*
1589                          *      If we try to collapse first_object at this
1590                          *      point, we may deadlock when we try to get
1591                          *      the lock on an intermediate object (since we
1592                          *      have the bottom object locked).  We can't
1593                          *      unlock the bottom object, because the page
1594                          *      we found may move (by collapse) if we do.
1595                          *
1596                          *      Instead, we first copy the page.  Then, when
1597                          *      we have no more use for the bottom object,
1598                          *      we unlock it and try to collapse.
1599                          *
1600                          *      Note that we copy the page even if we didn't
1601                          *      need to... that's the breaks.
1602                          */
1603
1604                         /*
1605                          *      Allocate a page for the copy
1606                          */
1607                         copy_m = vm_page_grab();
1608                         if (copy_m == VM_PAGE_NULL) {
1609                                 RELEASE_PAGE(m);
1610                                 vm_fault_cleanup(object, first_m);
1611                                 thread_interrupt_level(interruptible_state);
1612                                 return(VM_FAULT_MEMORY_SHORTAGE);
1613                         }
1614
1615
1616                         XPR(XPR_VM_FAULT,
1617                             "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1618                                 (integer_t)object, offset,
1619                                 (integer_t)m, (integer_t)copy_m, 0);
1620                         vm_page_copy(m, copy_m);
1621
1622                         /*
1623                          *      If another map is truly sharing this
1624                          *      page with us, we have to flush all
1625                          *      uses of the original page, since we
1626                          *      can't distinguish those which want the
1627                          *      original from those which need the
1628                          *      new copy.
1629                          *
1630                          *      XXXO If we know that only one map has
1631                          *      access to this page, then we could
1632                          *      avoid the pmap_disconnect() call.
1633                          */
1634
1635                         vm_page_lock_queues();
1636                         assert(!m->cleaning);
1637                         pmap_disconnect(m->phys_page);
1638                         vm_page_deactivate(m);
1639                         copy_m->dirty = TRUE;
1640                         /*
1641                          * Setting reference here prevents this fault from
1642                          * being counted as a (per-thread) reactivate as well
1643                          * as a copy-on-write.
1644                          */
1645                         first_m->reference = TRUE;
1646                         vm_page_unlock_queues();
1647
1648                         /*
1649                          *      We no longer need the old page or object.
1650                          */
1651
1652                         PAGE_WAKEUP_DONE(m);
1653                         vm_object_paging_end(object);
1654                         vm_object_unlock(object);
1655
1656                         if (type_of_fault)
1657                                 *type_of_fault = DBG_COW_FAULT;
1658                         VM_STAT(cow_faults++);
1659                         current_task()->cow_faults++;
1660                         object = first_object;
1661                         offset = first_offset;
1662
1663                         vm_object_lock(object);
1664                         VM_PAGE_FREE(first_m);
1665                         first_m = VM_PAGE_NULL;
1666                         assert(copy_m->busy);
1667                         vm_page_insert(copy_m, object, offset);
1668                         m = copy_m;
1669
1670                         /*
1671                          *      Now that we've gotten the copy out of the
1672                          *      way, let's try to collapse the top object.
1673                          *      But we have to play ugly games with
1674                          *      paging_in_progress to do that...
1675                          */
1676
1677                         vm_object_paging_end(object);
1678                         vm_object_collapse(object, offset);
1679                         vm_object_paging_begin(object);
1680
1681                 }
1682                 else {
1683                         *protection &= (~VM_PROT_WRITE);
1684                 }
1685         }
1686
1687         /*
1688          *      Now check whether the page needs to be pushed into the
1689          *      copy object.  The use of asymmetric copy on write for
1690          *      shared temporary objects means that we may do two copies to
1691          *      satisfy the fault; one above to get the page from a
1692          *      shadowed object, and one here to push it into the copy.
1693          */
1694
1695         while ((copy_object = first_object->copy) != VM_OBJECT_NULL &&
1696                    (m!= VM_PAGE_NULL)) {
1697                 vm_object_offset_t      copy_offset;
1698                 vm_page_t               copy_m;
1699
1700 #if TRACEFAULTPAGE
1701                 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type);    /* (TEST/DEBUG) */
1702 #endif
1703                 /*
1704                  *      If the page is being written, but hasn't been
1705                  *      copied to the copy-object, we have to copy it there.
1706                  */
1707
1708                 if ((fault_type & VM_PROT_WRITE) == 0) {
1709                         *protection &= ~VM_PROT_WRITE;
1710                         break;
1711                 }
1712
1713                 /*
1714                  *      If the page was guaranteed to be resident,
1715                  *      we must have already performed the copy.
1716                  */
1717
1718                 if (must_be_resident)
1719                         break;
1720
1721                 /*
1722                  *      Try to get the lock on the copy_object.
1723                  */
1724                 if (!vm_object_lock_try(copy_object)) {
1725                         vm_object_unlock(object);
1726
1727                         mutex_pause();  /* wait a bit */
1728
1729                         vm_object_lock(object);
1730                         continue;
1731                 }
1732
1733                 /*
1734                  *      Make another reference to the copy-object,
1735                  *      to keep it from disappearing during the
1736                  *      copy.
1737                  */
1738                 assert(copy_object->ref_count > 0);
1739                 copy_object->ref_count++;
1740                 VM_OBJ_RES_INCR(copy_object);
1741
1742                 /*
1743                  *      Does the page exist in the copy?
1744                  */
1745                 copy_offset = first_offset - copy_object->shadow_offset;
1746                 if (copy_object->size <= copy_offset)
1747                         /*
1748                          * Copy object doesn't cover this page -- do nothing.
1749                          */
1750                         ;
1751                 else if ((copy_m =
1752                         vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
1753                         /* Page currently exists in the copy object */
1754                         if (copy_m->busy) {
1755                                 /*
1756                                  *      If the page is being brought
1757                                  *      in, wait for it and then retry.
1758                                  */
1759                                 RELEASE_PAGE(m);
1760                                 /* take an extra ref so object won't die */
1761                                 assert(copy_object->ref_count > 0);
1762                                 copy_object->ref_count++;
1763                                 vm_object_res_reference(copy_object);
1764                                 vm_object_unlock(copy_object);
1765                                 vm_fault_cleanup(object, first_m);
1766                                 counter(c_vm_fault_page_block_backoff_kernel++);
1767                                 vm_object_lock(copy_object);
1768                                 assert(copy_object->ref_count > 0);
1769                                 VM_OBJ_RES_DECR(copy_object);
1770                                 copy_object->ref_count--;
1771                                 assert(copy_object->ref_count > 0);
1772                                 copy_m = vm_page_lookup(copy_object, copy_offset);
1773                                 /*
1774                                  * ENCRYPTED SWAP:
1775                                  * it's OK if the "copy_m" page is encrypted,
1776                                  * because we're not moving it nor handling its
1777                                  * contents.
1778                                  */
1779                                 if (copy_m != VM_PAGE_NULL && copy_m->busy) {
1780                                         PAGE_ASSERT_WAIT(copy_m, interruptible);
1781                                         vm_object_unlock(copy_object);
1782                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
1783                                         vm_object_deallocate(copy_object);
1784                                         goto backoff;
1785                                 } else {
1786                                         vm_object_unlock(copy_object);
1787                                         vm_object_deallocate(copy_object);
1788                                         thread_interrupt_level(interruptible_state);
1789                                         return VM_FAULT_RETRY;
1790                                 }
1791                         }
1792                 }
1793                 else if (!PAGED_OUT(copy_object, copy_offset)) {
1794                         /*
1795                          * If PAGED_OUT is TRUE, then the page used to exist
1796                          * in the copy-object, and has already been paged out.
1797                          * We don't need to repeat this. If PAGED_OUT is
1798                          * FALSE, then either we don't know (!pager_created,
1799                          * for example) or it hasn't been paged out.
1800                          * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1801                          * We must copy the page to the copy object.
1802                          */
1803
1804                         /*
1805                          * are we protecting the system from
1806                          * backing store exhaustion.  If so
1807                          * sleep unless we are privileged.
1808                          */
1809
1810                         if(vm_backing_store_low) {
1811                                 if(!(current_task()->priv_flags
1812                                                 & VM_BACKING_STORE_PRIV)) {
1813                                         assert_wait((event_t)
1814                                                 &vm_backing_store_low,
1815                                                 THREAD_UNINT);
1816                                         RELEASE_PAGE(m);
1817                                         VM_OBJ_RES_DECR(copy_object);
1818                                         copy_object->ref_count--;
1819                                         assert(copy_object->ref_count > 0);
1820                                         vm_object_unlock(copy_object);
1821                                         vm_fault_cleanup(object, first_m);
1822                                         thread_block(THREAD_CONTINUE_NULL);
1823                                         thread_interrupt_level(
1824                                                 interruptible_state);
1825                                         return(VM_FAULT_RETRY);
1826                                 }
1827                         }
1828
1829                         /*
1830                          *      Allocate a page for the copy
1831                          */
1832                         copy_m = vm_page_alloc(copy_object, copy_offset);
1833                         if (copy_m == VM_PAGE_NULL) {
1834                                 RELEASE_PAGE(m);
1835                                 VM_OBJ_RES_DECR(copy_object);
1836                                 copy_object->ref_count--;
1837                                 assert(copy_object->ref_count > 0);
1838                                 vm_object_unlock(copy_object);
1839                                 vm_fault_cleanup(object, first_m);
1840                                 thread_interrupt_level(interruptible_state);
1841                                 return(VM_FAULT_MEMORY_SHORTAGE);
1842                         }
1843
1844                         /*
1845                          *      Must copy page into copy-object.
1846                          */
1847
1848                         vm_page_copy(m, copy_m);
1849
1850                         /*
1851                          *      If the old page was in use by any users
1852                          *      of the copy-object, it must be removed
1853                          *      from all pmaps.  (We can't know which
1854                          *      pmaps use it.)
1855                          */
1856
1857                         vm_page_lock_queues();
1858                         assert(!m->cleaning);
1859                         pmap_disconnect(m->phys_page);
1860                         copy_m->dirty = TRUE;
1861                         vm_page_unlock_queues();
1862
1863                         /*
1864                          *      If there's a pager, then immediately
1865                          *      page out this page, using the "initialize"
1866                          *      option.  Else, we use the copy.
1867                          */
1868
1869                         if
1870 #if     MACH_PAGEMAP
1871                           ((!copy_object->pager_created) ||
1872                                 vm_external_state_get(
1873                                         copy_object->existence_map, copy_offset)
1874                                 == VM_EXTERNAL_STATE_ABSENT)
1875 #else
1876                           (!copy_object->pager_created)
1877 #endif
1878                                 {
1879                                 vm_page_lock_queues();
1880                                 vm_page_activate(copy_m);
1881                                 vm_page_unlock_queues();
1882                                 PAGE_WAKEUP_DONE(copy_m);
1883                         }
1884                         else {
1885                                 assert(copy_m->busy == TRUE);
1886
1887                                 /*
1888                                  *      The page is already ready for pageout:
1889                                  *      not on pageout queues and busy.
1890                                  *      Unlock everything except the
1891                                  *      copy_object itself.
1892                                  */
1893
1894                                 vm_object_unlock(object);
1895
1896                                 /*
1897                                  *      Write the page to the copy-object,
1898                                  *      flushing it from the kernel.
1899                                  */
1900
1901                                 vm_pageout_initialize_page(copy_m);
1902
1903                                 /*
1904                                  *      Since the pageout may have
1905                                  *      temporarily dropped the
1906                                  *      copy_object's lock, we
1907                                  *      check whether we'll have
1908                                  *      to deallocate the hard way.
1909                                  */
1910
1911                                 if ((copy_object->shadow != object) ||
1912                                     (copy_object->ref_count == 1)) {
1913                                         vm_object_unlock(copy_object);
1914                                         vm_object_deallocate(copy_object);
1915                                         vm_object_lock(object);
1916                                         continue;
1917                                 }
1918
1919                                 /*
1920                                  *      Pick back up the old object's
1921                                  *      lock.  [It is safe to do so,
1922                                  *      since it must be deeper in the
1923                                  *      object tree.]
1924                                  */
1925
1926                                 vm_object_lock(object);
1927                         }
1928
1929                         /*
1930                          *      Because we're pushing a page upward
1931                          *      in the object tree, we must restart
1932                          *      any faults that are waiting here.
1933                          *      [Note that this is an expansion of
1934                          *      PAGE_WAKEUP that uses the THREAD_RESTART
1935                          *      wait result].  Can't turn off the page's
1936                          *      busy bit because we're not done with it.
1937                          */
1938
1939                         if (m->wanted) {
1940                                 m->wanted = FALSE;
1941                                 thread_wakeup_with_result((event_t) m,
1942                                         THREAD_RESTART);
1943                         }
1944                 }
1945
1946                 /*
1947                  *      The reference count on copy_object must be
1948                  *      at least 2: one for our extra reference,
1949                  *      and at least one from the outside world
1950                  *      (we checked that when we last locked
1951                  *      copy_object).
1952                  */
1953                 copy_object->ref_count--;
1954                 assert(copy_object->ref_count > 0);
1955                 VM_OBJ_RES_DECR(copy_object);
1956                 vm_object_unlock(copy_object);
1957
1958                 break;
1959         }
1960
1961         *result_page = m;
1962         *top_page = first_m;
1963
1964         XPR(XPR_VM_FAULT,
1965                 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1966                 (integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
1967         /*
1968          *      If the page can be written, assume that it will be.
1969          *      [Earlier, we restrict the permission to allow write
1970          *      access only if the fault so required, so we don't
1971          *      mark read-only data as dirty.]
1972          */
1973
1974
1975         if(m != VM_PAGE_NULL) {
1976 #if     !VM_FAULT_STATIC_CONFIG
1977                 if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE))
1978                         m->dirty = TRUE;
1979 #endif
1980                 if (vm_page_deactivate_behind)
1981                         vm_fault_deactivate_behind(object, offset, behavior);
1982         } else {
1983                 vm_object_unlock(object);
1984         }
1985         thread_interrupt_level(interruptible_state);
1986
1987 #if TRACEFAULTPAGE
1988         dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0);       /* (TEST/DEBUG) */
1989 #endif
1990         return(VM_FAULT_SUCCESS);
1991
1992 #if 0
1993     block_and_backoff:
1994         vm_fault_cleanup(object, first_m);
1995
1996         counter(c_vm_fault_page_block_backoff_kernel++);
1997         thread_block(THREAD_CONTINUE_NULL);
1998 #endif
1999
2000     backoff:
2001         thread_interrupt_level(interruptible_state);
2002         if (wait_result == THREAD_INTERRUPTED)
2003                 return VM_FAULT_INTERRUPTED;
2004         return VM_FAULT_RETRY;
2005
2006 #undef  RELEASE_PAGE
2007 }
2008
2009 /*
2010  *      Routine:        vm_fault_tws_insert
2011  *      Purpose:
2012  *              Add fault information to the task working set.
2013  *      Implementation:
2014  *              We always insert the base object/offset pair
2015  *              rather the actual object/offset.
2016  *      Assumptions:
2017  *              Map and real_map locked.
2018  *              Object locked and referenced.
2019  *      Returns:
2020  *              TRUE if startup file should be written.
2021  *              With object locked and still referenced.
2022  *              But we may drop the object lock temporarily.
2023  */
2024 static boolean_t
2025 vm_fault_tws_insert(
2026         vm_map_t map,
2027         vm_map_t real_map,
2028         vm_map_offset_t vaddr,
2029         vm_object_t object,
2030         vm_object_offset_t offset)
2031 {
2032         tws_hash_line_t line;
2033         task_t          task;
2034         kern_return_t   kr;
2035         boolean_t       result = FALSE;
2036
2037         /* Avoid possible map lock deadlock issues */
2038         if (map == kernel_map || map == kalloc_map ||
2039             real_map == kernel_map || real_map == kalloc_map)
2040                 return result;
2041
2042         task = current_task();
2043         if (task->dynamic_working_set != 0) {
2044                 vm_object_t     base_object;
2045                 vm_object_t     base_shadow;
2046                 vm_object_offset_t base_offset;
2047                 base_object = object;
2048                 base_offset = offset;
2049                 while ((base_shadow = base_object->shadow)) {
2050                         vm_object_lock(base_shadow);
2051                         vm_object_unlock(base_object);
2052                         base_offset +=
2053                                 base_object->shadow_offset;
2054                         base_object = base_shadow;
2055                 }
2056                 kr = tws_lookup(
2057                         task->dynamic_working_set,
2058                         base_offset, base_object,
2059                         &line);
2060                 if (kr == KERN_OPERATION_TIMED_OUT){
2061                         result = TRUE;
2062                         if (base_object != object) {
2063                                 vm_object_unlock(base_object);
2064                                 vm_object_lock(object);
2065                         }
2066                 } else if (kr != KERN_SUCCESS) {
2067                         if(base_object != object)
2068                                 vm_object_reference_locked(base_object);
2069                         kr = tws_insert(
2070                                    task->dynamic_working_set,
2071                                    base_offset, base_object,
2072                                    vaddr, real_map);
2073                         if(base_object != object) {
2074                                 vm_object_unlock(base_object);
2075                                 vm_object_deallocate(base_object);
2076                         }
2077                         if(kr == KERN_NO_SPACE) {
2078                                 if (base_object == object)
2079                                         vm_object_unlock(object);
2080                                 tws_expand_working_set(
2081                                    task->dynamic_working_set,
2082                                    TWS_HASH_LINE_COUNT,
2083                                    FALSE);
2084                                 if (base_object == object)
2085                                         vm_object_lock(object);
2086                         } else if(kr == KERN_OPERATION_TIMED_OUT) {
2087                                 result = TRUE;
2088                         }
2089                         if(base_object != object)
2090                                 vm_object_lock(object);
2091                 } else if (base_object != object) {
2092                         vm_object_unlock(base_object);
2093                         vm_object_lock(object);
2094                 }
2095         }
2096         return result;
2097 }
2098
2099 /*
2100  *      Routine:        vm_fault
2101  *      Purpose:
2102  *              Handle page faults, including pseudo-faults
2103  *              used to change the wiring status of pages.
2104  *      Returns:
2105  *              Explicit continuations have been removed.
2106  *      Implementation:
2107  *              vm_fault and vm_fault_page save mucho state
2108  *              in the moral equivalent of a closure.  The state
2109  *              structure is allocated when first entering vm_fault
2110  *              and deallocated when leaving vm_fault.
2111  */
2112
2113 extern int _map_enter_debug;
2114
2115 kern_return_t
2116 vm_fault(
2117         vm_map_t        map,
2118         vm_map_offset_t vaddr,
2119         vm_prot_t       fault_type,
2120         boolean_t       change_wiring,
2121         int             interruptible,
2122         pmap_t          caller_pmap,
2123         vm_map_offset_t caller_pmap_addr)
2124 {
2125         vm_map_version_t        version;        /* Map version for verificiation */
2126         boolean_t               wired;          /* Should mapping be wired down? */
2127         vm_object_t             object;         /* Top-level object */
2128         vm_object_offset_t      offset;         /* Top-level offset */
2129         vm_prot_t               prot;           /* Protection for mapping */
2130         vm_behavior_t           behavior;       /* Expected paging behavior */
2131         vm_map_offset_t         lo_offset, hi_offset;
2132         vm_object_t             old_copy_object; /* Saved copy object */
2133         vm_page_t               result_page;    /* Result of vm_fault_page */
2134         vm_page_t               top_page;       /* Placeholder page */
2135         kern_return_t           kr;
2136
2137         register
2138         vm_page_t               m;      /* Fast access to result_page */
2139         kern_return_t           error_code = 0; /* page error reasons */
2140         register
2141         vm_object_t             cur_object;
2142         register
2143         vm_object_offset_t      cur_offset;
2144         vm_page_t               cur_m;
2145         vm_object_t             new_object;
2146         int                     type_of_fault;
2147         vm_map_t                real_map = map;
2148         vm_map_t                original_map = map;
2149         pmap_t                  pmap = NULL;
2150         boolean_t               interruptible_state;
2151         unsigned int            cache_attr;
2152         int                     write_startup_file = 0;
2153         boolean_t               need_activation;
2154         vm_prot_t               full_fault_type;
2155
2156         if (get_preemption_level() != 0)
2157                 return (KERN_FAILURE);
2158
2159         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_START,
2160                               vaddr,
2161                               0,
2162                               0,
2163                               0,
2164                               0);
2165
2166         /* at present we do not fully check for execute permission */
2167         /* we generally treat it is read except in certain device  */
2168         /* memory settings */
2169         full_fault_type = fault_type;
2170         if(fault_type & VM_PROT_EXECUTE) {
2171                 fault_type &= ~VM_PROT_EXECUTE;
2172                 fault_type |= VM_PROT_READ;
2173         }
2174
2175         interruptible_state = thread_interrupt_level(interruptible);
2176
2177         /*
2178          * assume we will hit a page in the cache
2179          * otherwise, explicitly override with
2180          * the real fault type once we determine it
2181          */
2182         type_of_fault = DBG_CACHE_HIT_FAULT;
2183
2184         VM_STAT(faults++);
2185         current_task()->faults++;
2186
2187     RetryFault: ;
2188
2189         /*
2190          *      Find the backing store object and offset into
2191          *      it to begin the search.
2192          */
2193         map = original_map;
2194         vm_map_lock_read(map);
2195         kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version,
2196                                 &object, &offset,
2197                                 &prot, &wired,
2198                                 &behavior, &lo_offset, &hi_offset, &real_map);
2199
2200 //if (_map_enter_debug)printf("vm_map_lookup_locked(map=0x%x, addr=0x%llx, prot=%d wired=%d) = %d\n", map, vaddr, prot, wired, kr);
2201
2202         pmap = real_map->pmap;
2203
2204         if (kr != KERN_SUCCESS) {
2205                 vm_map_unlock_read(map);
2206                 goto done;
2207         }
2208
2209         /*
2210          *      If the page is wired, we must fault for the current protection
2211          *      value, to avoid further faults.
2212          */
2213
2214         if (wired)
2215                 fault_type = prot | VM_PROT_WRITE;
2216
2217 #if     VM_FAULT_CLASSIFY
2218         /*
2219          *      Temporary data gathering code
2220          */
2221         vm_fault_classify(object, offset, fault_type);
2222 #endif
2223         /*
2224          *      Fast fault code.  The basic idea is to do as much as
2225          *      possible while holding the map lock and object locks.
2226          *      Busy pages are not used until the object lock has to
2227          *      be dropped to do something (copy, zero fill, pmap enter).
2228          *      Similarly, paging references aren't acquired until that
2229          *      point, and object references aren't used.
2230          *
2231          *      If we can figure out what to do
2232          *      (zero fill, copy on write, pmap enter) while holding
2233          *      the locks, then it gets done.  Otherwise, we give up,
2234          *      and use the original fault path (which doesn't hold
2235          *      the map lock, and relies on busy pages).
2236          *      The give up cases include:
2237          *              - Have to talk to pager.
2238          *              - Page is busy, absent or in error.
2239          *              - Pager has locked out desired access.
2240          *              - Fault needs to be restarted.
2241          *              - Have to push page into copy object.
2242          *
2243          *      The code is an infinite loop that moves one level down
2244          *      the shadow chain each time.  cur_object and cur_offset
2245          *      refer to the current object being examined. object and offset
2246          *      are the original object from the map.  The loop is at the
2247          *      top level if and only if object and cur_object are the same.
2248          *
2249          *      Invariants:  Map lock is held throughout.  Lock is held on
2250          *              original object and cur_object (if different) when
2251          *              continuing or exiting loop.
2252          *
2253          */
2254
2255
2256         /*
2257          *      If this page is to be inserted in a copy delay object
2258          *      for writing, and if the object has a copy, then the
2259          *      copy delay strategy is implemented in the slow fault page.
2260          */
2261         if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY ||
2262             object->copy == VM_OBJECT_NULL ||
2263             (fault_type & VM_PROT_WRITE) == 0) {
2264         cur_object = object;
2265         cur_offset = offset;
2266
2267         while (TRUE) {
2268                 m = vm_page_lookup(cur_object, cur_offset);
2269                 if (m != VM_PAGE_NULL) {
2270                         if (m->busy) {
2271                                 wait_result_t   result;
2272
2273                                 if (object != cur_object)
2274                                         vm_object_unlock(object);
2275
2276                                 vm_map_unlock_read(map);
2277                                 if (real_map != map)
2278                                         vm_map_unlock(real_map);
2279
2280 #if     !VM_FAULT_STATIC_CONFIG
2281                                 if (!vm_fault_interruptible)
2282                                         interruptible = THREAD_UNINT;
2283 #endif
2284                                 result = PAGE_ASSERT_WAIT(m, interruptible);
2285
2286                                 vm_object_unlock(cur_object);
2287
2288                                 if (result == THREAD_WAITING) {
2289                                         result = thread_block(THREAD_CONTINUE_NULL);
2290
2291                                         counter(c_vm_fault_page_block_busy_kernel++);
2292                                 }
2293                                 if (result == THREAD_AWAKENED || result == THREAD_RESTART)
2294                                         goto RetryFault;
2295
2296                                 kr = KERN_ABORTED;
2297                                 goto done;
2298                         }
2299                         if (m->unusual && (m->error || m->restart || m->private
2300                             || m->absent || (fault_type & m->page_lock))) {
2301
2302                                 /*
2303                                  *      Unusual case. Give up.
2304                                  */
2305                                 break;
2306                         }
2307
2308                         if (m->encrypted) {
2309                                 /*
2310                                  * ENCRYPTED SWAP:
2311                                  * We've soft-faulted (because it's not in the page
2312                                  * table) on an encrypted page.
2313                                  * Keep the page "busy" so that noone messes with
2314                                  * it during the decryption.
2315                                  * Release the extra locks we're holding, keep only
2316                                  * the page's VM object lock.
2317                                  */
2318                                 m->busy = TRUE;
2319                                 if (object != cur_object) {
2320                                         vm_object_unlock(object);
2321                                 }
2322                                 vm_map_unlock_read(map);
2323                                 if (real_map != map)
2324                                         vm_map_unlock(real_map);
2325
2326                                 vm_page_decrypt(m, 0);
2327
2328                                 assert(m->busy);
2329                                 PAGE_WAKEUP_DONE(m);
2330                                 vm_object_unlock(m->object);
2331
2332                                 /*
2333                                  * Retry from the top, in case anything
2334                                  * changed while we were decrypting...
2335                                  */
2336                                 goto RetryFault;
2337                         }
2338                         ASSERT_PAGE_DECRYPTED(m);
2339
2340                         /*
2341                          *      Two cases of map in faults:
2342                          *          - At top level w/o copy object.
2343                          *          - Read fault anywhere.
2344                          *              --> must disallow write.
2345                          */
2346
2347                         if (object == cur_object &&
2348                             object->copy == VM_OBJECT_NULL)
2349                                 goto FastMapInFault;
2350
2351                         if ((fault_type & VM_PROT_WRITE) == 0) {
2352                                 boolean_t sequential;
2353
2354                                 prot &= ~VM_PROT_WRITE;
2355
2356                                 /*
2357                                  *      Set up to map the page ...
2358                                  *      mark the page busy, drop
2359                                  *      locks and take a paging reference
2360                                  *      on the object with the page.
2361                                  */
2362
2363                                 if (object != cur_object) {
2364                                         vm_object_unlock(object);
2365                                         object = cur_object;
2366                                 }
2367 FastMapInFault:
2368                                 m->busy = TRUE;
2369
2370                                 vm_object_paging_begin(object);
2371
2372 FastPmapEnter:
2373                                 /*
2374                                  *      Check a couple of global reasons to
2375                                  *      be conservative about write access.
2376                                  *      Then do the pmap_enter.
2377                                  */
2378 #if     !VM_FAULT_STATIC_CONFIG
2379                                 if (vm_fault_dirty_handling
2380 #if     MACH_KDB
2381                                     || db_watchpoint_list
2382 #endif
2383                                     && (fault_type & VM_PROT_WRITE) == 0)
2384                                         prot &= ~VM_PROT_WRITE;
2385 #else   /* STATIC_CONFIG */
2386 #if     MACH_KDB
2387                                 if (db_watchpoint_list
2388                                     && (fault_type & VM_PROT_WRITE) == 0)
2389                                         prot &= ~VM_PROT_WRITE;
2390 #endif  /* MACH_KDB */
2391 #endif  /* STATIC_CONFIG */
2392                                 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2393
2394                                 sequential = FALSE;
2395                                 need_activation = FALSE;
2396
2397                                 if (m->no_isync == TRUE) {
2398                                         m->no_isync = FALSE;
2399                                         pmap_sync_page_data_phys(m->phys_page);
2400
2401                                         if ((type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) {
2402                                                 /*
2403                                                  * found it in the cache, but this
2404                                                  * is the first fault-in of the page (no_isync == TRUE)
2405                                                  * so it must have come in as part of
2406                                                  * a cluster... account 1 pagein against it
2407                                                  */
2408                                                 VM_STAT(pageins++);
2409                                                 current_task()->pageins++;
2410                                                 type_of_fault = DBG_PAGEIN_FAULT;
2411                                                 sequential = TRUE;
2412                                         }
2413                                         if (m->clustered)
2414                                                 need_activation = TRUE;
2415
2416                                 } else if (cache_attr != VM_WIMG_DEFAULT) {
2417                                         pmap_sync_page_attributes_phys(m->phys_page);
2418                                 }
2419
2420                                 if(caller_pmap) {
2421                                         PMAP_ENTER(caller_pmap,
2422                                                 caller_pmap_addr, m,
2423                                                 prot, cache_attr, wired);
2424                                 } else {
2425                                         PMAP_ENTER(pmap, vaddr, m,
2426                                                 prot, cache_attr, wired);
2427                                 }
2428
2429                                 /*
2430                                  *      Hold queues lock to manipulate
2431                                  *      the page queues.  Change wiring
2432                                  *      case is obvious.  In soft ref bits
2433                                  *      case activate page only if it fell
2434                                  *      off paging queues, otherwise just
2435                                  *      activate it if it's inactive.
2436                                  *
2437                                  *      NOTE: original vm_fault code will
2438                                  *      move active page to back of active
2439                                  *      queue.  This code doesn't.
2440                                  */
2441                                 vm_page_lock_queues();
2442
2443                                 if (m->clustered) {
2444                                         vm_pagein_cluster_used++;
2445                                         m->clustered = FALSE;
2446                                 }
2447                                 m->reference = TRUE;
2448
2449                                 if (change_wiring) {
2450                                         if (wired)
2451                                                 vm_page_wire(m);
2452                                         else
2453                                                 vm_page_unwire(m);
2454                                 }
2455 #if VM_FAULT_STATIC_CONFIG
2456                                 else {
2457                                         if ((!m->active && !m->inactive) || ((need_activation == TRUE) && !m->active))
2458                                                 vm_page_activate(m);
2459                                 }
2460 #else
2461                                 else if (software_reference_bits) {
2462                                         if (!m->active && !m->inactive)
2463                                                 vm_page_activate(m);
2464                                 }
2465                                 else if (!m->active) {
2466                                         vm_page_activate(m);
2467                                 }
2468 #endif
2469                                 vm_page_unlock_queues();
2470
2471                                 /*
2472                                  *      That's it, clean up and return.
2473                                  */
2474                                 PAGE_WAKEUP_DONE(m);
2475
2476                                 sequential = (sequential && vm_page_deactivate_behind) ?
2477                                         vm_fault_deactivate_behind(object, cur_offset, behavior) :
2478                                         FALSE;
2479
2480                                 /*
2481                                  * Add non-sequential pages to the working set.
2482                                  * The sequential pages will be brought in through
2483                                  * normal clustering behavior.
2484                                  */
2485                                 if (!sequential && !object->private) {
2486                                         write_startup_file =
2487                                                 vm_fault_tws_insert(map, real_map, vaddr,
2488                                                                 object, cur_offset);
2489                                 }
2490
2491                                 vm_object_paging_end(object);
2492                                 vm_object_unlock(object);
2493
2494                                 vm_map_unlock_read(map);
2495                                 if(real_map != map)
2496                                         vm_map_unlock(real_map);
2497
2498                                 if(write_startup_file)
2499                                         tws_send_startup_info(current_task());
2500
2501                                 thread_interrupt_level(interruptible_state);
2502
2503
2504                                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2505                                                       vaddr,
2506                                                       type_of_fault & 0xff,
2507                                                       KERN_SUCCESS,
2508                                                       type_of_fault >> 8,
2509                                                       0);
2510
2511                                 return KERN_SUCCESS;
2512                         }
2513
2514                         /*
2515                          *      Copy on write fault.  If objects match, then
2516                          *      object->copy must not be NULL (else control
2517                          *      would be in previous code block), and we
2518                          *      have a potential push into the copy object
2519                          *      with which we won't cope here.
2520                          */
2521
2522                         if (cur_object == object)
2523                                 break;
2524                         /*
2525                          *      This is now a shadow based copy on write
2526                          *      fault -- it requires a copy up the shadow
2527                          *      chain.
2528                          *
2529                          *      Allocate a page in the original top level
2530                          *      object. Give up if allocate fails.  Also
2531                          *      need to remember current page, as it's the
2532                          *      source of the copy.
2533                          */
2534                         cur_m = m;
2535                         m = vm_page_grab();
2536                         if (m == VM_PAGE_NULL) {
2537                                 break;
2538                         }
2539                         /*
2540                          *      Now do the copy.  Mark the source busy
2541                          *      and take out paging references on both
2542                          *      objects.
2543                          *
2544                          *      NOTE: This code holds the map lock across
2545                          *      the page copy.
2546                          */
2547
2548                         cur_m->busy = TRUE;
2549                         vm_page_copy(cur_m, m);
2550                         vm_page_insert(m, object, offset);
2551
2552                         vm_object_paging_begin(cur_object);
2553                         vm_object_paging_begin(object);
2554
2555                         type_of_fault = DBG_COW_FAULT;
2556                         VM_STAT(cow_faults++);
2557                         current_task()->cow_faults++;
2558
2559                         /*
2560                          *      Now cope with the source page and object
2561                          *      If the top object has a ref count of 1
2562                          *      then no other map can access it, and hence
2563                          *      it's not necessary to do the pmap_disconnect.
2564                          */
2565
2566                         vm_page_lock_queues();
2567                         vm_page_deactivate(cur_m);
2568                         m->dirty = TRUE;
2569                         pmap_disconnect(cur_m->phys_page);
2570                         vm_page_unlock_queues();
2571
2572                         PAGE_WAKEUP_DONE(cur_m);
2573                         vm_object_paging_end(cur_object);
2574                         vm_object_unlock(cur_object);
2575
2576                         /*
2577                          *      Slight hack to call vm_object collapse
2578                          *      and then reuse common map in code.
2579                          *      note that the object lock was taken above.
2580                          */
2581
2582                         vm_object_paging_end(object);
2583                         vm_object_collapse(object, offset);
2584                         vm_object_paging_begin(object);
2585
2586                         goto FastPmapEnter;
2587                 }
2588                 else {
2589
2590                         /*
2591                          *      No page at cur_object, cur_offset
2592                          */
2593
2594                         if (cur_object->pager_created) {
2595
2596                                 /*
2597                                  *      Have to talk to the pager.  Give up.
2598                                  */
2599                                 break;
2600                         }
2601
2602
2603                         if (cur_object->shadow == VM_OBJECT_NULL) {
2604
2605                                 if (cur_object->shadow_severed) {
2606                                         vm_object_paging_end(object);
2607                                         vm_object_unlock(object);
2608                                         vm_map_unlock_read(map);
2609                                         if(real_map != map)
2610                                                 vm_map_unlock(real_map);
2611
2612                                         if(write_startup_file)
2613                                                 tws_send_startup_info(
2614                                                                 current_task());
2615
2616                                         thread_interrupt_level(interruptible_state);
2617
2618                                         return KERN_MEMORY_ERROR;
2619                                 }
2620
2621                                 /*
2622                                  *      Zero fill fault.  Page gets
2623                                  *      filled in top object. Insert
2624                                  *      page, then drop any lower lock.
2625                                  *      Give up if no page.
2626                                  */
2627                                 if (VM_PAGE_THROTTLED()) {
2628                                         break;
2629                                 }
2630
2631                                 /*
2632                                  * are we protecting the system from
2633                                  * backing store exhaustion.  If so
2634                                  * sleep unless we are privileged.
2635                                  */
2636                                 if(vm_backing_store_low) {
2637                                         if(!(current_task()->priv_flags
2638                                                 & VM_BACKING_STORE_PRIV))
2639                                         break;
2640                                 }
2641                                 m = vm_page_alloc(object, offset);
2642                                 if (m == VM_PAGE_NULL) {
2643                                         break;
2644                                 }
2645                                 /*
2646                                  * This is a zero-fill or initial fill
2647                                  * page fault.  As such, we consider it
2648                                  * undefined with respect to instruction
2649                                  * execution.  i.e. it is the responsibility
2650                                  * of higher layers to call for an instruction
2651                                  * sync after changing the contents and before
2652                                  * sending a program into this area.  We
2653                                  * choose this approach for performance
2654                                  */
2655
2656                                 m->no_isync = FALSE;
2657
2658                                 if (cur_object != object)
2659                                         vm_object_unlock(cur_object);
2660
2661                                 vm_object_paging_begin(object);
2662                                 vm_object_unlock(object);
2663
2664                                 /*
2665                                  *      Now zero fill page and map it.
2666                                  *      the page is probably going to
2667                                  *      be written soon, so don't bother
2668                                  *      to clear the modified bit
2669                                  *
2670                                  *      NOTE: This code holds the map
2671                                  *      lock across the zero fill.
2672                                  */
2673
2674                                 if (!map->no_zero_fill) {
2675                                         vm_page_zero_fill(m);
2676                                         type_of_fault = DBG_ZERO_FILL_FAULT;
2677                                         VM_STAT(zero_fill_count++);
2678                                 }
2679                                 vm_page_lock_queues();
2680                                 VM_PAGE_QUEUES_REMOVE(m);
2681
2682                                 m->page_ticket = vm_page_ticket;
2683                                 assert(!m->laundry);
2684                                 assert(m->object != kernel_object);
2685                                 assert(m->pageq.next == NULL &&
2686                                        m->pageq.prev == NULL);
2687                                 if(m->object->size > 0x200000) {
2688                                         m->zero_fill = TRUE;
2689                                         /* depends on the queues lock */
2690                                         vm_zf_count += 1;
2691                                         queue_enter(&vm_page_queue_zf,
2692                                                 m, vm_page_t, pageq);
2693                                 } else {
2694                                         queue_enter(
2695                                                 &vm_page_queue_inactive,
2696                                                 m, vm_page_t, pageq);
2697                                 }
2698                                 vm_page_ticket_roll++;
2699                                 if(vm_page_ticket_roll ==
2700                                                 VM_PAGE_TICKETS_IN_ROLL) {
2701                                         vm_page_ticket_roll = 0;
2702                                         if(vm_page_ticket ==
2703                                                 VM_PAGE_TICKET_ROLL_IDS)
2704                                                 vm_page_ticket= 0;
2705                                         else
2706                                                 vm_page_ticket++;
2707                                 }
2708
2709                                 m->inactive = TRUE;
2710                                 vm_page_inactive_count++;
2711                                 vm_page_unlock_queues();
2712                                 vm_object_lock(object);
2713
2714                                 goto FastPmapEnter;
2715                         }
2716
2717                         /*
2718                          *      On to the next level
2719                          */
2720
2721                         cur_offset += cur_object->shadow_offset;
2722                         new_object = cur_object->shadow;
2723                         vm_object_lock(new_object);
2724                         if (cur_object != object)
2725                                 vm_object_unlock(cur_object);
2726                         cur_object = new_object;
2727
2728                         continue;
2729                 }
2730         }
2731
2732         /*
2733          *      Cleanup from fast fault failure.  Drop any object
2734          *      lock other than original and drop map lock.
2735          */
2736
2737         if (object != cur_object)
2738                 vm_object_unlock(cur_object);
2739         }
2740         vm_map_unlock_read(map);
2741
2742         if(real_map != map)
2743                 vm_map_unlock(real_map);
2744
2745         /*
2746          *      Make a reference to this object to
2747          *      prevent its disposal while we are messing with
2748          *      it.  Once we have the reference, the map is free
2749          *      to be diddled.  Since objects reference their
2750          *      shadows (and copies), they will stay around as well.
2751          */
2752
2753         assert(object->ref_count > 0);
2754         object->ref_count++;
2755         vm_object_res_reference(object);
2756         vm_object_paging_begin(object);
2757
2758         XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2759
2760         if (!object->private) {
2761                 write_startup_file =
2762                         vm_fault_tws_insert(map, real_map, vaddr, object, offset);
2763         }
2764
2765         kr = vm_fault_page(object, offset, fault_type,
2766                            (change_wiring && !wired),
2767                            interruptible,
2768                            lo_offset, hi_offset, behavior,
2769                            &prot, &result_page, &top_page,
2770                            &type_of_fault,
2771                            &error_code, map->no_zero_fill, FALSE, map, vaddr);
2772
2773         /*
2774          *      If we didn't succeed, lose the object reference immediately.
2775          */
2776
2777         if (kr != VM_FAULT_SUCCESS)
2778                 vm_object_deallocate(object);
2779
2780         /*
2781          *      See why we failed, and take corrective action.
2782          */
2783
2784         switch (kr) {
2785                 case VM_FAULT_SUCCESS:
2786                         break;
2787                 case VM_FAULT_MEMORY_SHORTAGE:
2788                         if (vm_page_wait((change_wiring) ?
2789                                          THREAD_UNINT :
2790                                          THREAD_ABORTSAFE))
2791                                 goto RetryFault;
2792                         /* fall thru */
2793                 case VM_FAULT_INTERRUPTED:
2794                         kr = KERN_ABORTED;
2795                         goto done;
2796                 case VM_FAULT_RETRY:
2797                         goto RetryFault;
2798                 case VM_FAULT_FICTITIOUS_SHORTAGE:
2799                         vm_page_more_fictitious();
2800                         goto RetryFault;
2801                 case VM_FAULT_MEMORY_ERROR:
2802                         if (error_code)
2803                                 kr = error_code;
2804                         else
2805                                 kr = KERN_MEMORY_ERROR;
2806                         goto done;
2807         }
2808
2809         m = result_page;
2810
2811         if(m != VM_PAGE_NULL) {
2812                 assert((change_wiring && !wired) ?
2813                     (top_page == VM_PAGE_NULL) :
2814                     ((top_page == VM_PAGE_NULL) == (m->object == object)));
2815         }
2816
2817         /*
2818          *      How to clean up the result of vm_fault_page.  This
2819          *      happens whether the mapping is entered or not.
2820          */
2821
2822 #define UNLOCK_AND_DEALLOCATE                           \
2823         MACRO_BEGIN                                     \
2824         vm_fault_cleanup(m->object, top_page);          \
2825         vm_object_deallocate(object);                   \
2826         MACRO_END
2827
2828         /*
2829          *      What to do with the resulting page from vm_fault_page
2830          *      if it doesn't get entered into the physical map:
2831          */
2832
2833 #define RELEASE_PAGE(m)                                 \
2834         MACRO_BEGIN                                     \
2835         PAGE_WAKEUP_DONE(m);                            \
2836         vm_page_lock_queues();                          \
2837         if (!m->active && !m->inactive)                 \
2838                 vm_page_activate(m);                    \
2839         vm_page_unlock_queues();                        \
2840         MACRO_END
2841
2842         /*
2843          *      We must verify that the maps have not changed
2844          *      since our last lookup.
2845          */
2846
2847         if(m != VM_PAGE_NULL) {
2848                 old_copy_object = m->object->copy;
2849                 vm_object_unlock(m->object);
2850         } else {
2851                 old_copy_object = VM_OBJECT_NULL;
2852         }
2853         if ((map != original_map) || !vm_map_verify(map, &version)) {
2854                 vm_object_t             retry_object;
2855                 vm_object_offset_t      retry_offset;
2856                 vm_prot_t               retry_prot;
2857
2858                 /*
2859                  *      To avoid trying to write_lock the map while another
2860                  *      thread has it read_locked (in vm_map_pageable), we
2861                  *      do not try for write permission.  If the page is
2862                  *      still writable, we will get write permission.  If it
2863                  *      is not, or has been marked needs_copy, we enter the
2864                  *      mapping without write permission, and will merely
2865                  *      take another fault.
2866                  */
2867                 map = original_map;
2868                 vm_map_lock_read(map);
2869                 kr = vm_map_lookup_locked(&map, vaddr,
2870                                    fault_type & ~VM_PROT_WRITE, &version,
2871                                    &retry_object, &retry_offset, &retry_prot,
2872                                    &wired, &behavior, &lo_offset, &hi_offset,
2873                                    &real_map);
2874                 pmap = real_map->pmap;
2875
2876                 if (kr != KERN_SUCCESS) {
2877                         vm_map_unlock_read(map);
2878                         if(m != VM_PAGE_NULL) {
2879                                 vm_object_lock(m->object);
2880                                 RELEASE_PAGE(m);
2881                                 UNLOCK_AND_DEALLOCATE;
2882                         } else {
2883                                 vm_object_deallocate(object);
2884                         }
2885                         goto done;
2886                 }
2887
2888                 vm_object_unlock(retry_object);
2889                 if(m != VM_PAGE_NULL) {
2890                         vm_object_lock(m->object);
2891                 } else {
2892                         vm_object_lock(object);
2893                 }
2894
2895                 if ((retry_object != object) ||
2896                     (retry_offset != offset)) {
2897                         vm_map_unlock_read(map);
2898                         if(real_map != map)
2899                                 vm_map_unlock(real_map);
2900                         if(m != VM_PAGE_NULL) {
2901                                 RELEASE_PAGE(m);
2902                                 UNLOCK_AND_DEALLOCATE;
2903                         } else {
2904                                 vm_object_deallocate(object);
2905                         }
2906                         goto RetryFault;
2907                 }
2908
2909                 /*
2910                  *      Check whether the protection has changed or the object
2911                  *      has been copied while we left the map unlocked.
2912                  */
2913                 prot &= retry_prot;
2914                 if(m != VM_PAGE_NULL) {
2915                         vm_object_unlock(m->object);
2916                 } else {
2917                         vm_object_unlock(object);
2918                 }
2919         }
2920         if(m != VM_PAGE_NULL) {
2921                 vm_object_lock(m->object);
2922         } else {
2923                 vm_object_lock(object);
2924         }
2925
2926         /*
2927          *      If the copy object changed while the top-level object
2928          *      was unlocked, then we must take away write permission.
2929          */
2930
2931         if(m != VM_PAGE_NULL) {
2932                 if (m->object->copy != old_copy_object)
2933                         prot &= ~VM_PROT_WRITE;
2934         }
2935
2936         /*
2937          *      If we want to wire down this page, but no longer have
2938          *      adequate permissions, we must start all over.
2939          */
2940
2941         if (wired && (fault_type != (prot|VM_PROT_WRITE))) {
2942                 vm_map_verify_done(map, &version);
2943                 if(real_map != map)
2944                         vm_map_unlock(real_map);
2945                 if(m != VM_PAGE_NULL) {
2946                         RELEASE_PAGE(m);
2947                         UNLOCK_AND_DEALLOCATE;
2948                 } else {
2949                         vm_object_deallocate(object);
2950                 }
2951                 goto RetryFault;
2952         }
2953
2954         /*
2955          *      Put this page into the physical map.
2956          *      We had to do the unlock above because pmap_enter
2957          *      may cause other faults.  The page may be on
2958          *      the pageout queues.  If the pageout daemon comes
2959          *      across the page, it will remove it from the queues.
2960          */
2961         need_activation = FALSE;
2962
2963         if (m != VM_PAGE_NULL) {
2964                 if (m->no_isync == TRUE) {
2965                         pmap_sync_page_data_phys(m->phys_page);
2966
2967                         if ((type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) {
2968                                 /*
2969                                  * found it in the cache, but this
2970                                  * is the first fault-in of the page (no_isync == TRUE)
2971                                  * so it must have come in as part of
2972                                  * a cluster... account 1 pagein against it
2973                                  */
2974                                  VM_STAT(pageins++);
2975                                  current_task()->pageins++;
2976
2977                                  type_of_fault = DBG_PAGEIN_FAULT;
2978                         }
2979                         if (m->clustered) {
2980                                 need_activation = TRUE;
2981                         }
2982                         m->no_isync = FALSE;
2983                 }
2984                 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2985
2986                 if(caller_pmap) {
2987                         PMAP_ENTER(caller_pmap,
2988                                         caller_pmap_addr, m,
2989                                         prot, cache_attr, wired);
2990                 } else {
2991                         PMAP_ENTER(pmap, vaddr, m,
2992                                         prot, cache_attr, wired);
2993                 }
2994
2995                 /*
2996                  * Add working set information for private objects here.
2997                  */
2998                 if (m->object->private) {
2999                         write_startup_file =
3000                                 vm_fault_tws_insert(map, real_map, vaddr,
3001                                             m->object, m->offset);
3002                 }
3003         } else {
3004
3005 #ifndef i386
3006                 vm_map_entry_t          entry;
3007                 vm_map_offset_t         laddr;
3008                 vm_map_offset_t         ldelta, hdelta;
3009
3010                 /*
3011                  * do a pmap block mapping from the physical address
3012                  * in the object
3013                  */
3014
3015                 /* While we do not worry about execution protection in   */
3016                 /* general, certian pages may have instruction execution */
3017                 /* disallowed.  We will check here, and if not allowed   */
3018                 /* to execute, we return with a protection failure.      */
3019
3020                 if((full_fault_type & VM_PROT_EXECUTE) &&
3021                         (!pmap_eligible_for_execute((ppnum_t)
3022                                 (object->shadow_offset >> 12)))) {
3023
3024                         vm_map_verify_done(map, &version);
3025                         if(real_map != map)
3026                                 vm_map_unlock(real_map);
3027                         vm_fault_cleanup(object, top_page);
3028                         vm_object_deallocate(object);
3029                         kr = KERN_PROTECTION_FAILURE;
3030                         goto done;
3031                 }
3032
3033                 if(real_map != map) {
3034                         vm_map_unlock(real_map);
3035                 }
3036                 if (original_map != map) {
3037                         vm_map_unlock_read(map);
3038                         vm_map_lock_read(original_map);
3039                         map = original_map;
3040                 }
3041                 real_map = map;
3042
3043                 laddr = vaddr;
3044                 hdelta = 0xFFFFF000;
3045                 ldelta = 0xFFFFF000;
3046
3047
3048                 while(vm_map_lookup_entry(map, laddr, &entry)) {
3049                         if(ldelta > (laddr - entry->vme_start))
3050                                 ldelta = laddr - entry->vme_start;
3051                         if(hdelta > (entry->vme_end - laddr))
3052                                 hdelta = entry->vme_end - laddr;
3053                         if(entry->is_sub_map) {
3054
3055                                 laddr = (laddr - entry->vme_start)
3056                                                         + entry->offset;
3057                                 vm_map_lock_read(entry->object.sub_map);
3058                                 if(map != real_map)
3059                                         vm_map_unlock_read(map);
3060                                 if(entry->use_pmap) {
3061                                         vm_map_unlock_read(real_map);
3062                                         real_map = entry->object.sub_map;
3063                                 }
3064                                 map = entry->object.sub_map;
3065
3066                         } else {
3067                                 break;
3068                         }
3069                 }
3070
3071                 if(vm_map_lookup_entry(map, laddr, &entry) &&
3072                                         (entry->object.vm_object != NULL) &&
3073                                         (entry->object.vm_object == object)) {
3074
3075
3076                         if(caller_pmap) {
3077                                 /* Set up a block mapped area */
3078                                 pmap_map_block(caller_pmap,
3079                                         (addr64_t)(caller_pmap_addr - ldelta),
3080                                         (((vm_map_offset_t)
3081                                     (entry->object.vm_object->shadow_offset))
3082                                         + entry->offset +
3083                                         (laddr - entry->vme_start)
3084                                                         - ldelta)>>12,
3085                                 ldelta + hdelta, prot,
3086                                 (VM_WIMG_MASK & (int)object->wimg_bits), 0);
3087                         } else {
3088                                 /* Set up a block mapped area */
3089                                 pmap_map_block(real_map->pmap,
3090                                    (addr64_t)(vaddr - ldelta),
3091                                    (((vm_map_offset_t)
3092                                     (entry->object.vm_object->shadow_offset))
3093                                        + entry->offset +
3094                                        (laddr - entry->vme_start) - ldelta)>>12,
3095                                    ldelta + hdelta, prot,
3096                                    (VM_WIMG_MASK & (int)object->wimg_bits), 0);
3097                         }
3098                 }
3099 #else
3100 #ifdef notyet
3101                 if(caller_pmap) {
3102                         pmap_enter(caller_pmap, caller_pmap_addr,
3103                                 object->shadow_offset>>12, prot, 0, TRUE);
3104                 } else {
3105                         pmap_enter(pmap, vaddr,
3106                                 object->shadow_offset>>12, prot, 0, TRUE);
3107                 }
3108                         /* Map it in */
3109 #endif
3110 #endif
3111
3112         }
3113
3114         /*
3115          *      If the page is not wired down and isn't already
3116          *      on a pageout queue, then put it where the
3117          *      pageout daemon can find it.
3118          */
3119         if(m != VM_PAGE_NULL) {
3120                 vm_page_lock_queues();
3121
3122                 if (m->clustered) {
3123                         vm_pagein_cluster_used++;
3124                         m->clustered = FALSE;
3125                 }
3126                 m->reference = TRUE;
3127
3128                 if (change_wiring) {
3129                         if (wired)
3130                                 vm_page_wire(m);
3131                         else
3132                                 vm_page_unwire(m);
3133                 }
3134 #if     VM_FAULT_STATIC_CONFIG
3135                 else {
3136                         if ((!m->active && !m->inactive) || ((need_activation == TRUE) && !m->active))
3137                                 vm_page_activate(m);
3138                 }
3139 #else
3140                 else if (software_reference_bits) {
3141                         if (!m->active && !m->inactive)
3142                                 vm_page_activate(m);
3143                         m->reference = TRUE;
3144                 } else {
3145                         vm_page_activate(m);
3146                 }
3147 #endif
3148                 vm_page_unlock_queues();
3149         }
3150
3151         /*
3152          *      Unlock everything, and return
3153          */
3154
3155         vm_map_verify_done(map, &version);
3156         if(real_map != map)
3157                 vm_map_unlock(real_map);
3158         if(m != VM_PAGE_NULL) {
3159                 PAGE_WAKEUP_DONE(m);
3160                 UNLOCK_AND_DEALLOCATE;
3161         } else {
3162                 vm_fault_cleanup(object, top_page);
3163                 vm_object_deallocate(object);
3164         }
3165         kr = KERN_SUCCESS;
3166
3167 #undef  UNLOCK_AND_DEALLOCATE
3168 #undef  RELEASE_PAGE
3169
3170     done:
3171         if(write_startup_file)
3172                 tws_send_startup_info(current_task());
3173
3174         thread_interrupt_level(interruptible_state);
3175
3176         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
3177                               vaddr,
3178                               type_of_fault & 0xff,
3179                               kr,
3180                               type_of_fault >> 8,
3181                               0);
3182
3183         return(kr);
3184 }
3185
3186 /*
3187  *      vm_fault_wire:
3188  *
3189  *      Wire down a range of virtual addresses in a map.
3190  */
3191 kern_return_t
3192 vm_fault_wire(
3193         vm_map_t        map,
3194         vm_map_entry_t  entry,
3195         pmap_t          pmap,
3196         vm_map_offset_t pmap_addr)
3197 {
3198
3199         register vm_map_offset_t        va;
3200         register vm_map_offset_t        end_addr = entry->vme_end;
3201         register kern_return_t  rc;
3202
3203         assert(entry->in_transition);
3204
3205         if ((entry->object.vm_object != NULL) &&
3206                         !entry->is_sub_map &&
3207                         entry->object.vm_object->phys_contiguous) {
3208                 return KERN_SUCCESS;
3209         }
3210
3211         /*
3212          *      Inform the physical mapping system that the
3213          *      range of addresses may not fault, so that
3214          *      page tables and such can be locked down as well.
3215          */
3216
3217         pmap_pageable(pmap, pmap_addr,
3218                 pmap_addr + (end_addr - entry->vme_start), FALSE);
3219
3220         /*
3221          *      We simulate a fault to get the page and enter it
3222          *      in the physical map.
3223          */
3224
3225         for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3226                 if ((rc = vm_fault_wire_fast(
3227                         map, va, entry, pmap,
3228                         pmap_addr + (va - entry->vme_start)
3229                         )) != KERN_SUCCESS) {
3230                         rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
3231                                 (pmap == kernel_pmap) ?
3232                                         THREAD_UNINT : THREAD_ABORTSAFE,
3233                                 pmap, pmap_addr + (va - entry->vme_start));
3234                 }
3235
3236                 if (rc != KERN_SUCCESS) {
3237                         struct vm_map_entry     tmp_entry = *entry;
3238
3239                         /* unwire wired pages */
3240                         tmp_entry.vme_end = va;
3241                         vm_fault_unwire(map,
3242                                 &tmp_entry, FALSE, pmap, pmap_addr);
3243
3244                         return rc;
3245                 }
3246         }
3247         return KERN_SUCCESS;
3248 }
3249
3250 /*
3251  *      vm_fault_unwire:
3252  *
3253  *      Unwire a range of virtual addresses in a map.
3254  */
3255 void
3256 vm_fault_unwire(
3257         vm_map_t        map,
3258         vm_map_entry_t  entry,
3259         boolean_t       deallocate,
3260         pmap_t          pmap,
3261         vm_map_offset_t pmap_addr)
3262 {
3263         register vm_map_offset_t        va;
3264         register vm_map_offset_t        end_addr = entry->vme_end;
3265         vm_object_t             object;
3266
3267         object = (entry->is_sub_map)
3268                         ? VM_OBJECT_NULL : entry->object.vm_object;
3269
3270         /*
3271          *      Since the pages are wired down, we must be able to
3272          *      get their mappings from the physical map system.
3273          */
3274
3275         for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3276                 pmap_change_wiring(pmap,
3277                         pmap_addr + (va - entry->vme_start), FALSE);
3278
3279                 if (object == VM_OBJECT_NULL) {
3280                         (void) vm_fault(map, va, VM_PROT_NONE,
3281                                         TRUE, THREAD_UNINT, pmap, pmap_addr);
3282                 } else if (object->phys_contiguous) {
3283                         continue;
3284                 } else {
3285                         vm_prot_t       prot;
3286                         vm_page_t       result_page;
3287                         vm_page_t       top_page;
3288                         vm_object_t     result_object;
3289                         vm_fault_return_t result;
3290
3291                         do {
3292                                 prot = VM_PROT_NONE;
3293
3294                                 vm_object_lock(object);
3295                                 vm_object_paging_begin(object);
3296                                 XPR(XPR_VM_FAULT,
3297                                         "vm_fault_unwire -> vm_fault_page\n",
3298                                         0,0,0,0,0);
3299                                 result = vm_fault_page(object,
3300                                                 entry->offset +
3301                                                   (va - entry->vme_start),
3302                                                 VM_PROT_NONE, TRUE,
3303                                                 THREAD_UNINT,
3304                                                 entry->offset,
3305                                                 entry->offset +
3306                                                        (entry->vme_end
3307                                                         - entry->vme_start),
3308                                                 entry->behavior,
3309                                                 &prot,
3310                                                 &result_page,
3311                                                 &top_page,
3312                                                 (int *)0,
3313                                                 0, map->no_zero_fill,
3314                                                 FALSE, NULL, 0);
3315                         } while (result == VM_FAULT_RETRY);
3316
3317                         if (result != VM_FAULT_SUCCESS)
3318                                 panic("vm_fault_unwire: failure");
3319
3320                         result_object = result_page->object;
3321                         if (deallocate) {
3322                                 assert(!result_page->fictitious);
3323                                 pmap_disconnect(result_page->phys_page);
3324                                 VM_PAGE_FREE(result_page);
3325                         } else {
3326                                 vm_page_lock_queues();
3327                                 vm_page_unwire(result_page);
3328                                 vm_page_unlock_queues();
3329                                 PAGE_WAKEUP_DONE(result_page);
3330                         }
3331
3332                         vm_fault_cleanup(result_object, top_page);
3333                 }
3334         }
3335
3336         /*
3337          *      Inform the physical mapping system that the range
3338          *      of addresses may fault, so that page tables and
3339          *      such may be unwired themselves.
3340          */
3341
3342         pmap_pageable(pmap, pmap_addr,
3343                 pmap_addr + (end_addr - entry->vme_start), TRUE);
3344
3345 }
3346
3347 /*
3348  *      vm_fault_wire_fast:
3349  *
3350  *      Handle common case of a wire down page fault at the given address.
3351  *      If successful, the page is inserted into the associated physical map.
3352  *      The map entry is passed in to avoid the overhead of a map lookup.
3353  *
3354  *      NOTE: the given address should be truncated to the
3355  *      proper page address.
3356  *
3357  *      KERN_SUCCESS is returned if the page fault is handled; otherwise,
3358  *      a standard error specifying why the fault is fatal is returned.
3359  *
3360  *      The map in question must be referenced, and remains so.
3361  *      Caller has a read lock on the map.
3362  *
3363  *      This is a stripped version of vm_fault() for wiring pages.  Anything
3364  *      other than the common case will return KERN_FAILURE, and the caller
3365  *      is expected to call vm_fault().
3366  */
3367 kern_return_t
3368 vm_fault_wire_fast(
3369         __unused vm_map_t       map,
3370         vm_map_offset_t va,
3371         vm_map_entry_t  entry,
3372         pmap_t                  pmap,
3373         vm_map_offset_t pmap_addr)
3374 {
3375         vm_object_t             object;
3376         vm_object_offset_t      offset;
3377         register vm_page_t      m;
3378         vm_prot_t               prot;
3379         thread_t                thread = current_thread();
3380         unsigned int            cache_attr;
3381
3382         VM_STAT(faults++);
3383
3384         if (thread != THREAD_NULL && thread->task != TASK_NULL)
3385           thread->task->faults++;
3386
3387 /*
3388  *      Recovery actions
3389  */
3390
3391 #undef  RELEASE_PAGE
3392 #define RELEASE_PAGE(m) {                               \
3393         PAGE_WAKEUP_DONE(m);                            \
3394         vm_page_lock_queues();                          \
3395         vm_page_unwire(m);                              \
3396         vm_page_unlock_queues();                        \
3397 }
3398
3399
3400 #undef  UNLOCK_THINGS
3401 #define UNLOCK_THINGS   {                               \
3402         object->paging_in_progress--;                   \
3403         vm_object_unlock(object);                       \
3404 }
3405
3406 #undef  UNLOCK_AND_DEALLOCATE
3407 #define UNLOCK_AND_DEALLOCATE   {                       \
3408         UNLOCK_THINGS;                                  \
3409         vm_object_deallocate(object);                   \
3410 }
3411 /*
3412  *      Give up and have caller do things the hard way.
3413  */
3414
3415 #define GIVE_UP {                                       \
3416         UNLOCK_AND_DEALLOCATE;                          \
3417         return(KERN_FAILURE);                           \
3418 }
3419
3420
3421         /*
3422          *      If this entry is not directly to a vm_object, bail out.
3423          */
3424         if (entry->is_sub_map)
3425                 return(KERN_FAILURE);
3426
3427         /*
3428          *      Find the backing store object and offset into it.
3429          */
3430
3431         object = entry->object.vm_object;
3432         offset = (va - entry->vme_start) + entry->offset;
3433         prot = entry->protection;
3434
3435         /*
3436          *      Make a reference to this object to prevent its
3437          *      disposal while we are messing with it.
3438          */
3439
3440         vm_object_lock(object);
3441         assert(object->ref_count > 0);
3442         object->ref_count++;
3443         vm_object_res_reference(object);
3444         object->paging_in_progress++;
3445
3446         /*
3447          *      INVARIANTS (through entire routine):
3448          *
3449          *      1)      At all times, we must either have the object
3450          *              lock or a busy page in some object to prevent
3451          *              some other thread from trying to bring in
3452          *              the same page.
3453          *
3454          *      2)      Once we have a busy page, we must remove it from
3455          *              the pageout queues, so that the pageout daemon
3456          *              will not grab it away.
3457          *
3458          */
3459
3460         /*
3461          *      Look for page in top-level object.  If it's not there or
3462          *      there's something going on, give up.
3463          * ENCRYPTED SWAP: use the slow fault path, since we'll need to
3464          * decrypt the page before wiring it down.
3465          */
3466         m = vm_page_lookup(object, offset);
3467         if ((m == VM_PAGE_NULL) || (m->busy) || (m->encrypted) ||
3468             (m->unusual && ( m->error || m->restart || m->absent ||
3469                                 prot & m->page_lock))) {
3470
3471                 GIVE_UP;
3472         }
3473         ASSERT_PAGE_DECRYPTED(m);
3474
3475         /*
3476          *      Wire the page down now.  All bail outs beyond this
3477          *      point must unwire the page.
3478          */
3479
3480         vm_page_lock_queues();
3481         vm_page_wire(m);
3482         vm_page_unlock_queues();
3483
3484         /*
3485          *      Mark page busy for other threads.
3486          */
3487         assert(!m->busy);
3488         m->busy = TRUE;
3489         assert(!m->absent);
3490
3491         /*
3492          *      Give up if the page is being written and there's a copy object
3493          */
3494         if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
3495                 RELEASE_PAGE(m);
3496                 GIVE_UP;
3497         }
3498
3499         /*
3500          *      Put this page into the physical map.
3501          *      We have to unlock the object because pmap_enter
3502          *      may cause other faults.
3503          */
3504         if (m->no_isync == TRUE) {
3505                 pmap_sync_page_data_phys(m->phys_page);
3506
3507                 m->no_isync = FALSE;
3508         }
3509
3510         cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3511
3512         PMAP_ENTER(pmap, pmap_addr, m, prot, cache_attr, TRUE);
3513
3514         /*
3515          *      Unlock everything, and return
3516          */
3517
3518         PAGE_WAKEUP_DONE(m);
3519         UNLOCK_AND_DEALLOCATE;
3520
3521         return(KERN_SUCCESS);
3522
3523 }
3524
3525 /*
3526  *      Routine:        vm_fault_copy_cleanup
3527  *      Purpose:
3528  *              Release a page used by vm_fault_copy.
3529  */
3530
3531 void
3532 vm_fault_copy_cleanup(
3533         vm_page_t       page,
3534         vm_page_t       top_page)
3535 {
3536         vm_object_t     object = page->object;
3537
3538         vm_object_lock(object);
3539         PAGE_WAKEUP_DONE(page);
3540         vm_page_lock_queues();
3541         if (!page->active && !page->inactive)
3542                 vm_page_activate(page);
3543         vm_page_unlock_queues();
3544         vm_fault_cleanup(object, top_page);
3545 }
3546
3547 void
3548 vm_fault_copy_dst_cleanup(
3549         vm_page_t       page)
3550 {
3551         vm_object_t     object;
3552
3553         if (page != VM_PAGE_NULL) {
3554                 object = page->object;
3555                 vm_object_lock(object);
3556                 vm_page_lock_queues();
3557                 vm_page_unwire(page);
3558                 vm_page_unlock_queues();
3559                 vm_object_paging_end(object);
3560                 vm_object_unlock(object);
3561         }
3562 }
3563
3564 /*
3565  *      Routine:        vm_fault_copy
3566  *
3567  *      Purpose:
3568  *              Copy pages from one virtual memory object to another --
3569  *              neither the source nor destination pages need be resident.
3570  *
3571  *              Before actually copying a page, the version associated with
3572  *              the destination address map wil be verified.
3573  *
3574  *      In/out conditions:
3575  *              The caller must hold a reference, but not a lock, to
3576  *              each of the source and destination objects and to the
3577  *              destination map.
3578  *
3579  *      Results:
3580  *              Returns KERN_SUCCESS if no errors were encountered in
3581  *              reading or writing the data.  Returns KERN_INTERRUPTED if
3582  *              the operation was interrupted (only possible if the
3583  *              "interruptible" argument is asserted).  Other return values
3584  *              indicate a permanent error in copying the data.
3585  *
3586  *              The actual amount of data copied will be returned in the
3587  *              "copy_size" argument.  In the event that the destination map
3588  *              verification failed, this amount may be less than the amount
3589  *              requested.
3590  */
3591 kern_return_t
3592 vm_fault_copy(
3593         vm_object_t             src_object,
3594         vm_object_offset_t      src_offset,
3595         vm_map_size_t           *copy_size,             /* INOUT */
3596         vm_object_t             dst_object,
3597         vm_object_offset_t      dst_offset,
3598         vm_map_t                dst_map,
3599         vm_map_version_t         *dst_version,
3600         int                     interruptible)
3601 {
3602         vm_page_t               result_page;
3603
3604         vm_page_t               src_page;
3605         vm_page_t               src_top_page;
3606         vm_prot_t               src_prot;
3607
3608         vm_page_t               dst_page;
3609         vm_page_t               dst_top_page;
3610         vm_prot_t               dst_prot;
3611
3612         vm_map_size_t           amount_left;
3613         vm_object_t             old_copy_object;
3614         kern_return_t           error = 0;
3615
3616         vm_map_size_t           part_size;
3617
3618         /*
3619          * In order not to confuse the clustered pageins, align
3620          * the different offsets on a page boundary.
3621          */
3622         vm_object_offset_t      src_lo_offset = vm_object_trunc_page(src_offset);
3623         vm_object_offset_t      dst_lo_offset = vm_object_trunc_page(dst_offset);
3624         vm_object_offset_t      src_hi_offset = vm_object_round_page(src_offset + *copy_size);
3625         vm_object_offset_t      dst_hi_offset = vm_object_round_page(dst_offset + *copy_size);
3626
3627 #define RETURN(x)                                       \
3628         MACRO_BEGIN                                     \
3629         *copy_size -= amount_left;                      \
3630         MACRO_RETURN(x);                                \
3631         MACRO_END
3632
3633         amount_left = *copy_size;
3634         do { /* while (amount_left > 0) */
3635                 /*
3636                  * There may be a deadlock if both source and destination
3637                  * pages are the same. To avoid this deadlock, the copy must
3638                  * start by getting the destination page in order to apply
3639                  * COW semantics if any.
3640                  */
3641
3642         RetryDestinationFault: ;
3643
3644                 dst_prot = VM_PROT_WRITE|VM_PROT_READ;
3645
3646                 vm_object_lock(dst_object);
3647                 vm_object_paging_begin(dst_object);
3648
3649                 XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3650                 switch (vm_fault_page(dst_object,
3651                                       vm_object_trunc_page(dst_offset),
3652                                       VM_PROT_WRITE|VM_PROT_READ,
3653                                       FALSE,
3654                                       interruptible,
3655                                       dst_lo_offset,
3656                                       dst_hi_offset,
3657                                       VM_BEHAVIOR_SEQUENTIAL,
3658                                       &dst_prot,
3659                                       &dst_page,
3660                                       &dst_top_page,
3661                                       (int *)0,
3662                                       &error,
3663                                       dst_map->no_zero_fill,
3664                                       FALSE, NULL, 0)) {
3665                 case VM_FAULT_SUCCESS:
3666                         break;
3667                 case VM_FAULT_RETRY:
3668                         goto RetryDestinationFault;
3669                 case VM_FAULT_MEMORY_SHORTAGE:
3670                         if (vm_page_wait(interruptible))
3671                                 goto RetryDestinationFault;
3672                         /* fall thru */
3673                 case VM_FAULT_INTERRUPTED:
3674                         RETURN(MACH_SEND_INTERRUPTED);
3675                 case VM_FAULT_FICTITIOUS_SHORTAGE:
3676                         vm_page_more_fictitious();
3677                         goto RetryDestinationFault;
3678                 case VM_FAULT_MEMORY_ERROR:
3679                         if (error)
3680                                 return (error);
3681                         else
3682                                 return(KERN_MEMORY_ERROR);
3683                 }
3684                 assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
3685
3686                 old_copy_object = dst_page->object->copy;
3687
3688                 /*
3689                  * There exists the possiblity that the source and
3690                  * destination page are the same.  But we can't
3691                  * easily determine that now.  If they are the
3692                  * same, the call to vm_fault_page() for the
3693                  * destination page will deadlock.  To prevent this we
3694                  * wire the page so we can drop busy without having
3695                  * the page daemon steal the page.  We clean up the
3696                  * top page  but keep the paging reference on the object
3697                  * holding the dest page so it doesn't go away.
3698                  */
3699
3700                 vm_page_lock_queues();
3701                 vm_page_wire(dst_page);
3702                 vm_page_unlock_queues();
3703                 PAGE_WAKEUP_DONE(dst_page);
3704                 vm_object_unlock(dst_page->object);
3705
3706                 if (dst_top_page != VM_PAGE_NULL) {
3707                         vm_object_lock(dst_object);
3708                         VM_PAGE_FREE(dst_top_page);
3709                         vm_object_paging_end(dst_object);
3710                         vm_object_unlock(dst_object);
3711                 }
3712
3713         RetrySourceFault: ;
3714
3715                 if (src_object == VM_OBJECT_NULL) {
3716                         /*
3717                          *      No source object.  We will just
3718                          *      zero-fill the page in dst_object.
3719                          */
3720                         src_page = VM_PAGE_NULL;
3721                         result_page = VM_PAGE_NULL;
3722                 } else {
3723                         vm_object_lock(src_object);
3724                         src_page = vm_page_lookup(src_object,
3725                                                   vm_object_trunc_page(src_offset));
3726                         if (src_page == dst_page) {
3727                                 src_prot = dst_prot;
3728                                 result_page = VM_PAGE_NULL;
3729                         } else {
3730                                 src_prot = VM_PROT_READ;
3731                                 vm_object_paging_begin(src_object);
3732
3733                                 XPR(XPR_VM_FAULT,
3734                                         "vm_fault_copy(2) -> vm_fault_page\n",
3735                                         0,0,0,0,0);
3736                                 switch (vm_fault_page(src_object,
3737                                                       vm_object_trunc_page(src_offset),
3738                                                       VM_PROT_READ,
3739                                                       FALSE,
3740                                                       interruptible,
3741                                                       src_lo_offset,
3742                                                       src_hi_offset,
3743                                                       VM_BEHAVIOR_SEQUENTIAL,
3744                                                       &src_prot,
3745                                                       &result_page,
3746                                                       &src_top_page,
3747                                                       (int *)0,
3748                                                       &error,
3749                                                       FALSE,
3750                                                       FALSE, NULL, 0)) {
3751
3752                                 case VM_FAULT_SUCCESS:
3753                                         break;
3754                                 case VM_FAULT_RETRY:
3755                                         goto RetrySourceFault;
3756                                 case VM_FAULT_MEMORY_SHORTAGE:
3757                                         if (vm_page_wait(interruptible))
3758                                                 goto RetrySourceFault;
3759                                         /* fall thru */
3760                                 case VM_FAULT_INTERRUPTED:
3761                                         vm_fault_copy_dst_cleanup(dst_page);
3762                                         RETURN(MACH_SEND_INTERRUPTED);
3763                                 case VM_FAULT_FICTITIOUS_SHORTAGE:
3764                                         vm_page_more_fictitious();
3765                                         goto RetrySourceFault;
3766                                 case VM_FAULT_MEMORY_ERROR:
3767                                         vm_fault_copy_dst_cleanup(dst_page);
3768                                         if (error)
3769                                                 return (error);
3770                                         else
3771                                                 return(KERN_MEMORY_ERROR);
3772                                 }
3773
3774
3775                                 assert((src_top_page == VM_PAGE_NULL) ==
3776                                        (result_page->object == src_object));
3777                         }
3778                         assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
3779                         vm_object_unlock(result_page->object);
3780                 }
3781
3782                 if (!vm_map_verify(dst_map, dst_version)) {
3783                         if (result_page != VM_PAGE_NULL && src_page != dst_page)
3784                                 vm_fault_copy_cleanup(result_page, src_top_page);
3785                         vm_fault_copy_dst_cleanup(dst_page);
3786                         break;
3787                 }
3788
3789                 vm_object_lock(dst_page->object);
3790
3791                 if (dst_page->object->copy != old_copy_object) {
3792                         vm_object_unlock(dst_page->object);
3793                         vm_map_verify_done(dst_map, dst_version);
3794                         if (result_page != VM_PAGE_NULL && src_page != dst_page)
3795                                 vm_fault_copy_cleanup(result_page, src_top_page);
3796                         vm_fault_copy_dst_cleanup(dst_page);
3797                         break;
3798                 }
3799                 vm_object_unlock(dst_page->object);
3800
3801                 /*
3802                  *      Copy the page, and note that it is dirty
3803                  *      immediately.
3804                  */
3805
3806                 if (!page_aligned(src_offset) ||
3807                         !page_aligned(dst_offset) ||
3808                         !page_aligned(amount_left)) {
3809
3810                         vm_object_offset_t      src_po,
3811                                                 dst_po;
3812
3813                         src_po = src_offset - vm_object_trunc_page(src_offset);
3814                         dst_po = dst_offset - vm_object_trunc_page(dst_offset);
3815
3816                         if (dst_po > src_po) {
3817                                 part_size = PAGE_SIZE - dst_po;
3818                         } else {
3819                                 part_size = PAGE_SIZE - src_po;
3820                         }
3821                         if (part_size > (amount_left)){
3822                                 part_size = amount_left;
3823                         }
3824
3825                         if (result_page == VM_PAGE_NULL) {
3826                                 vm_page_part_zero_fill(dst_page,
3827                                                         dst_po, part_size);
3828                         } else {
3829                                 vm_page_part_copy(result_page, src_po,
3830                                         dst_page, dst_po, part_size);
3831                                 if(!dst_page->dirty){
3832                                         vm_object_lock(dst_object);
3833                                         dst_page->dirty = TRUE;
3834                                         vm_object_unlock(dst_page->object);
3835                                 }
3836
3837                         }
3838                 } else {
3839                         part_size = PAGE_SIZE;
3840
3841                         if (result_page == VM_PAGE_NULL)
3842                                 vm_page_zero_fill(dst_page);
3843                         else{
3844                                 vm_page_copy(result_page, dst_page);
3845                                 if(!dst_page->dirty){
3846                                         vm_object_lock(dst_object);
3847                                         dst_page->dirty = TRUE;
3848                                         vm_object_unlock(dst_page->object);
3849                                 }
3850                         }
3851
3852                 }
3853
3854                 /*
3855                  *      Unlock everything, and return
3856                  */
3857
3858                 vm_map_verify_done(dst_map, dst_version);
3859
3860                 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3861                         vm_fault_copy_cleanup(result_page, src_top_page);
3862                 vm_fault_copy_dst_cleanup(dst_page);
3863
3864                 amount_left -= part_size;
3865                 src_offset += part_size;
3866                 dst_offset += part_size;
3867         } while (amount_left > 0);
3868
3869         RETURN(KERN_SUCCESS);
3870 #undef  RETURN
3871
3872         /*NOTREACHED*/
3873 }
3874
3875 #ifdef  notdef
3876
3877 /*
3878  *      Routine:        vm_fault_page_overwrite
3879  *
3880  *      Description:
3881  *              A form of vm_fault_page that assumes that the
3882  *              resulting page will be overwritten in its entirety,
3883  *              making it unnecessary to obtain the correct *contents*
3884  *              of the page.
3885  *
3886  *      Implementation:
3887  *              XXX Untested.  Also unused.  Eventually, this technology
3888  *              could be used in vm_fault_copy() to advantage.
3889  */
3890 vm_fault_return_t
3891 vm_fault_page_overwrite(
3892         register
3893         vm_object_t             dst_object,
3894         vm_object_offset_t      dst_offset,
3895         vm_page_t               *result_page)   /* OUT */
3896 {
3897         register
3898         vm_page_t       dst_page;
3899         kern_return_t   wait_result;
3900
3901 #define interruptible   THREAD_UNINT    /* XXX */
3902
3903         while (TRUE) {
3904                 /*
3905                  *      Look for a page at this offset
3906                  */
3907
3908                 while ((dst_page = vm_page_lookup(dst_object, dst_offset))
3909                                  == VM_PAGE_NULL) {
3910                         /*
3911                          *      No page, no problem... just allocate one.
3912                          */
3913
3914                         dst_page = vm_page_alloc(dst_object, dst_offset);
3915                         if (dst_page == VM_PAGE_NULL) {
3916                                 vm_object_unlock(dst_object);
3917                                 VM_PAGE_WAIT();
3918                                 vm_object_lock(dst_object);
3919                                 continue;
3920                         }
3921
3922                         /*
3923                          *      Pretend that the memory manager
3924                          *      write-protected the page.
3925                          *
3926                          *      Note that we will be asking for write
3927                          *      permission without asking for the data
3928                          *      first.
3929                          */
3930
3931                         dst_page->overwriting = TRUE;
3932                         dst_page->page_lock = VM_PROT_WRITE;
3933                         dst_page->absent = TRUE;
3934                         dst_page->unusual = TRUE;
3935                         dst_object->absent_count++;
3936
3937                         break;
3938
3939                         /*
3940                          *      When we bail out, we might have to throw
3941                          *      away the page created here.
3942                          */
3943
3944 #define DISCARD_PAGE                                            \
3945         MACRO_BEGIN                                             \
3946         vm_object_lock(dst_object);                             \
3947         dst_page = vm_page_lookup(dst_object, dst_offset);      \
3948         if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3949                 VM_PAGE_FREE(dst_page);                         \
3950         vm_object_unlock(dst_object);                           \
3951         MACRO_END
3952                 }
3953
3954                 /*
3955                  *      If the page is write-protected...
3956                  */
3957
3958                 if (dst_page->page_lock & VM_PROT_WRITE) {
3959                         /*
3960                          *      ... and an unlock request hasn't been sent
3961                          */
3962
3963                         if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
3964                                 vm_prot_t       u;
3965                                 kern_return_t   rc;
3966
3967                                 /*
3968                                  *      ... then send one now.
3969                                  */
3970
3971                                 if (!dst_object->pager_ready) {
3972                                         wait_result = vm_object_assert_wait(dst_object,
3973                                                                 VM_OBJECT_EVENT_PAGER_READY,
3974                                                                 interruptible);
3975                                         vm_object_unlock(dst_object);
3976                                         if (wait_result == THREAD_WAITING)
3977                                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
3978                                         if (wait_result != THREAD_AWAKENED) {
3979                                                 DISCARD_PAGE;
3980                                                 return(VM_FAULT_INTERRUPTED);
3981                                         }
3982                                         continue;
3983                                 }
3984
3985                                 u = dst_page->unlock_request |= VM_PROT_WRITE;
3986                                 vm_object_unlock(dst_object);
3987
3988                                 if ((rc = memory_object_data_unlock(
3989                                                 dst_object->pager,
3990                                                 dst_offset + dst_object->paging_offset,
3991                                                 PAGE_SIZE,
3992                                                 u)) != KERN_SUCCESS) {
3993                                         if (vm_fault_debug)
3994                                             printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3995                                         DISCARD_PAGE;
3996                                         return((rc == MACH_SEND_INTERRUPTED) ?
3997                                                 VM_FAULT_INTERRUPTED :
3998                                                 VM_FAULT_MEMORY_ERROR);
3999                                 }
4000                                 vm_object_lock(dst_object);
4001                                 continue;
4002                         }
4003
4004                         /* ... fall through to wait below */
4005                 } else {
4006                         /*
4007                          *      If the page isn't being used for other
4008                          *      purposes, then we're done.
4009                          */
4010                         if ( ! (dst_page->busy || dst_page->absent ||
4011                                 dst_page->error || dst_page->restart) )
4012                                 break;
4013                 }
4014
4015                 wait_result = PAGE_ASSERT_WAIT(dst_page, interruptible);
4016                 vm_object_unlock(dst_object);
4017                 if (wait_result == THREAD_WAITING)
4018                         wait_result = thread_block(THREAD_CONTINUE_NULL);
4019                 if (wait_result != THREAD_AWAKENED) {
4020                         DISCARD_PAGE;
4021                         return(VM_FAULT_INTERRUPTED);
4022                 }
4023         }
4024
4025         *result_page = dst_page;
4026         return(VM_FAULT_SUCCESS);
4027
4028 #undef  interruptible
4029 #undef  DISCARD_PAGE
4030 }
4031
4032 #endif  /* notdef */
4033
4034 #if     VM_FAULT_CLASSIFY
4035 /*
4036  *      Temporary statistics gathering support.
4037  */
4038
4039 /*
4040  *      Statistics arrays:
4041  */
4042 #define VM_FAULT_TYPES_MAX      5
4043 #define VM_FAULT_LEVEL_MAX      8
4044
4045 int     vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
4046
4047 #define VM_FAULT_TYPE_ZERO_FILL 0
4048 #define VM_FAULT_TYPE_MAP_IN    1
4049 #define VM_FAULT_TYPE_PAGER     2
4050 #define VM_FAULT_TYPE_COPY      3
4051 #define VM_FAULT_TYPE_OTHER     4
4052
4053
4054 void
4055 vm_fault_classify(vm_object_t           object,
4056                   vm_object_offset_t    offset,
4057                   vm_prot_t             fault_type)
4058 {
4059         int             type, level = 0;
4060         vm_page_t       m;
4061
4062         while (TRUE) {
4063                 m = vm_page_lookup(object, offset);
4064                 if (m != VM_PAGE_NULL) {
4065                         if (m->busy || m->error || m->restart || m->absent ||
4066                             fault_type & m->page_lock) {
4067                                 type = VM_FAULT_TYPE_OTHER;
4068                                 break;
4069                         }
4070                         if (((fault_type & VM_PROT_WRITE) == 0) ||
4071                             ((level == 0) && object->copy == VM_OBJECT_NULL)) {
4072                                 type = VM_FAULT_TYPE_MAP_IN;
4073                                 break;
4074                         }
4075                         type = VM_FAULT_TYPE_COPY;
4076                         break;
4077                 }
4078                 else {
4079                         if (object->pager_created) {
4080                                 type = VM_FAULT_TYPE_PAGER;
4081                                 break;
4082                         }
4083                         if (object->shadow == VM_OBJECT_NULL) {
4084                                 type = VM_FAULT_TYPE_ZERO_FILL;
4085                                 break;
4086                         }
4087
4088                         offset += object->shadow_offset;
4089                         object = object->shadow;
4090                         level++;
4091                         continue;
4092                 }
4093         }
4094
4095         if (level > VM_FAULT_LEVEL_MAX)
4096                 level = VM_FAULT_LEVEL_MAX;
4097
4098         vm_fault_stats[type][level] += 1;
4099
4100         return;
4101 }
4102
4103 /* cleanup routine to call from debugger */
4104
4105 void
4106 vm_fault_classify_init(void)
4107 {
4108         int type, level;
4109
4110         for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
4111                 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
4112                         vm_fault_stats[type][level] = 0;
4113                 }
4114         }
4115
4116         return;
4117 }
4118 #endif  /* VM_FAULT_CLASSIFY */