osfmk/vm/vm_fault.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm_fault.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *
  62  *      Page fault handling module.
  63  */
  64
  65 #include <mach_cluster_stats.h>
  66 #include <mach_pagemap.h>
  67 #include <mach_kdb.h>
  68
  69 #include <mach/mach_types.h>
  70 #include <mach/kern_return.h>
  71 #include <mach/message.h>       /* for error codes */
  72 #include <mach/vm_param.h>
  73 #include <mach/vm_behavior.h>
  74 #include <mach/memory_object.h>
  75                                 /* For memory_object_data_{request,unlock} */
  76
  77 #include <kern/kern_types.h>
  78 #include <kern/host_statistics.h>
  79 #include <kern/counters.h>
  80 #include <kern/task.h>
  81 #include <kern/thread.h>
  82 #include <kern/sched_prim.h>
  83 #include <kern/host.h>
  84 #include <kern/xpr.h>
  85 #include <kern/mach_param.h>
  86 #include <kern/macro_help.h>
  87 #include <kern/zalloc.h>
  88 #include <kern/misc_protos.h>
  89
  90 #include <ppc/proc_reg.h>
  91
  92 #include <vm/vm_fault.h>
  93 #include <vm/task_working_set.h>
  94 #include <vm/vm_map.h>
  95 #include <vm/vm_object.h>
  96 #include <vm/vm_page.h>
  97 #include <vm/vm_kern.h>
  98 #include <vm/pmap.h>
  99 #include <vm/vm_pageout.h>
 100 #include <vm/vm_protos.h>
 101
 102 #include <sys/kdebug.h>
 103
 104 #define VM_FAULT_CLASSIFY       0
 105 #define VM_FAULT_STATIC_CONFIG  1
 106
 107 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
 108
 109 unsigned int    vm_object_absent_max = 50;
 110
 111 int             vm_fault_debug = 0;
 112
 113 #if     !VM_FAULT_STATIC_CONFIG
 114 boolean_t       vm_fault_dirty_handling = FALSE;
 115 boolean_t       vm_fault_interruptible = FALSE;
 116 boolean_t       software_reference_bits = TRUE;
 117 #endif
 118
 119 #if     MACH_KDB
 120 extern struct db_watchpoint *db_watchpoint_list;
 121 #endif  /* MACH_KDB */
 122
 123
 124 /* Forward declarations of internal routines. */
 125 extern kern_return_t vm_fault_wire_fast(
 126                                 vm_map_t        map,
 127                                 vm_map_offset_t va,
 128                                 vm_map_entry_t  entry,
 129                                 pmap_t          pmap,
 130                                 vm_map_offset_t pmap_addr);
 131
 132 extern void vm_fault_continue(void);
 133
 134 extern void vm_fault_copy_cleanup(
 135                                 vm_page_t       page,
 136                                 vm_page_t       top_page);
 137
 138 extern void vm_fault_copy_dst_cleanup(
 139                                 vm_page_t       page);
 140
 141 #if     VM_FAULT_CLASSIFY
 142 extern void vm_fault_classify(vm_object_t       object,
 143                           vm_object_offset_t    offset,
 144                           vm_prot_t             fault_type);
 145
 146 extern void vm_fault_classify_init(void);
 147 #endif
 148
 149 /*
 150  *      Routine:        vm_fault_init
 151  *      Purpose:
 152  *              Initialize our private data structures.
 153  */
 154 void
 155 vm_fault_init(void)
 156 {
 157 }
 158
 159 /*
 160  *      Routine:        vm_fault_cleanup
 161  *      Purpose:
 162  *              Clean up the result of vm_fault_page.
 163  *      Results:
 164  *              The paging reference for "object" is released.
 165  *              "object" is unlocked.
 166  *              If "top_page" is not null,  "top_page" is
 167  *              freed and the paging reference for the object
 168  *              containing it is released.
 169  *
 170  *      In/out conditions:
 171  *              "object" must be locked.
 172  */
 173 void
 174 vm_fault_cleanup(
 175         register vm_object_t    object,
 176         register vm_page_t      top_page)
 177 {
 178         vm_object_paging_end(object);
 179         vm_object_unlock(object);
 180
 181         if (top_page != VM_PAGE_NULL) {
 182             object = top_page->object;
 183             vm_object_lock(object);
 184             VM_PAGE_FREE(top_page);
 185             vm_object_paging_end(object);
 186             vm_object_unlock(object);
 187         }
 188 }
 189
 190 #if     MACH_CLUSTER_STATS
 191 #define MAXCLUSTERPAGES 16
 192 struct {
 193         unsigned long pages_in_cluster;
 194         unsigned long pages_at_higher_offsets;
 195         unsigned long pages_at_lower_offsets;
 196 } cluster_stats_in[MAXCLUSTERPAGES];
 197 #define CLUSTER_STAT(clause)    clause
 198 #define CLUSTER_STAT_HIGHER(x)  \
 199         ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
 200 #define CLUSTER_STAT_LOWER(x)   \
 201          ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
 202 #define CLUSTER_STAT_CLUSTER(x) \
 203         ((cluster_stats_in[(x)].pages_in_cluster)++)
 204 #else   /* MACH_CLUSTER_STATS */
 205 #define CLUSTER_STAT(clause)
 206 #endif  /* MACH_CLUSTER_STATS */
 207
 208 /* XXX - temporary */
 209 boolean_t vm_allow_clustered_pagein = FALSE;
 210 int vm_pagein_cluster_used = 0;
 211
 212 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
 213
 214
 215 boolean_t       vm_page_deactivate_behind = TRUE;
 216 /*
 217  * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
 218  */
 219 int vm_default_ahead = 0;
 220 int vm_default_behind = MAX_UPL_TRANSFER;
 221
 222 /*
 223  *      vm_page_deactivate_behind
 224  *
 225  *      Determine if sequential access is in progress
 226  *      in accordance with the behavior specified.  If
 227  *      so, compute a potential page to deactive and
 228  *      deactivate it.
 229  *
 230  *      The object must be locked.
 231  */
 232 static
 233 boolean_t
 234 vm_fault_deactivate_behind(
 235         vm_object_t             object,
 236         vm_object_offset_t      offset,
 237         vm_behavior_t           behavior)
 238 {
 239         vm_page_t m;
 240
 241 #if TRACEFAULTPAGE
 242         dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind); /* (TEST/DEBUG) */
 243 #endif
 244
 245         if (object == kernel_object) {
 246                 /*
 247                  * Do not deactivate pages from the kernel object: they
 248                  * are not intended to become pageable.
 249                  */
 250                 return FALSE;
 251         }
 252
 253         switch (behavior) {
 254         case VM_BEHAVIOR_RANDOM:
 255                 object->sequential = PAGE_SIZE_64;
 256                 m = VM_PAGE_NULL;
 257                 break;
 258         case VM_BEHAVIOR_SEQUENTIAL:
 259                 if (offset &&
 260                         object->last_alloc == offset - PAGE_SIZE_64) {
 261                         object->sequential += PAGE_SIZE_64;
 262                         m = vm_page_lookup(object, offset - PAGE_SIZE_64);
 263                 } else {
 264                         object->sequential = PAGE_SIZE_64; /* reset */
 265                         m = VM_PAGE_NULL;
 266                 }
 267                 break;
 268         case VM_BEHAVIOR_RSEQNTL:
 269                 if (object->last_alloc &&
 270                         object->last_alloc == offset + PAGE_SIZE_64) {
 271                         object->sequential += PAGE_SIZE_64;
 272                         m = vm_page_lookup(object, offset + PAGE_SIZE_64);
 273                 } else {
 274                         object->sequential = PAGE_SIZE_64; /* reset */
 275                         m = VM_PAGE_NULL;
 276                 }
 277                 break;
 278         case VM_BEHAVIOR_DEFAULT:
 279         default:
 280                 if (offset &&
 281                         object->last_alloc == offset - PAGE_SIZE_64) {
 282                         vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
 283
 284                         object->sequential += PAGE_SIZE_64;
 285                         m = (offset >= behind &&
 286                                 object->sequential >= behind) ?
 287                                 vm_page_lookup(object, offset - behind) :
 288                                 VM_PAGE_NULL;
 289                 } else if (object->last_alloc &&
 290                         object->last_alloc == offset + PAGE_SIZE_64) {
 291                         vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
 292
 293                         object->sequential += PAGE_SIZE_64;
 294                         m = (offset < -behind &&
 295                                 object->sequential >= behind) ?
 296                                 vm_page_lookup(object, offset + behind) :
 297                                 VM_PAGE_NULL;
 298                 } else {
 299                         object->sequential = PAGE_SIZE_64;
 300                         m = VM_PAGE_NULL;
 301                 }
 302                 break;
 303         }
 304
 305         object->last_alloc = offset;
 306
 307         if (m) {
 308                 if (!m->busy) {
 309                         vm_page_lock_queues();
 310                         vm_page_deactivate(m);
 311                         vm_page_unlock_queues();
 312 #if TRACEFAULTPAGE
 313                         dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */
 314 #endif
 315                 }
 316                 return TRUE;
 317         }
 318         return FALSE;
 319 }
 320
 321
 322 /*
 323  *      Routine:        vm_fault_page
 324  *      Purpose:
 325  *              Find the resident page for the virtual memory
 326  *              specified by the given virtual memory object
 327  *              and offset.
 328  *      Additional arguments:
 329  *              The required permissions for the page is given
 330  *              in "fault_type".  Desired permissions are included
 331  *              in "protection".  The minimum and maximum valid offsets
 332  *              within the object for the relevant map entry are
 333  *              passed in "lo_offset" and "hi_offset" respectively and
 334  *              the expected page reference pattern is passed in "behavior".
 335  *              These three parameters are used to determine pagein cluster
 336  *              limits.
 337  *
 338  *              If the desired page is known to be resident (for
 339  *              example, because it was previously wired down), asserting
 340  *              the "unwiring" parameter will speed the search.
 341  *
 342  *              If the operation can be interrupted (by thread_abort
 343  *              or thread_terminate), then the "interruptible"
 344  *              parameter should be asserted.
 345  *
 346  *      Results:
 347  *              The page containing the proper data is returned
 348  *              in "result_page".
 349  *
 350  *      In/out conditions:
 351  *              The source object must be locked and referenced,
 352  *              and must donate one paging reference.  The reference
 353  *              is not affected.  The paging reference and lock are
 354  *              consumed.
 355  *
 356  *              If the call succeeds, the object in which "result_page"
 357  *              resides is left locked and holding a paging reference.
 358  *              If this is not the original object, a busy page in the
 359  *              original object is returned in "top_page", to prevent other
 360  *              callers from pursuing this same data, along with a paging
 361  *              reference for the original object.  The "top_page" should
 362  *              be destroyed when this guarantee is no longer required.
 363  *              The "result_page" is also left busy.  It is not removed
 364  *              from the pageout queues.
 365  */
 366
 367 vm_fault_return_t
 368 vm_fault_page(
 369         /* Arguments: */
 370         vm_object_t     first_object,   /* Object to begin search */
 371         vm_object_offset_t first_offset,        /* Offset into object */
 372         vm_prot_t       fault_type,     /* What access is requested */
 373         boolean_t       must_be_resident,/* Must page be resident? */
 374         int             interruptible,  /* how may fault be interrupted? */
 375         vm_map_offset_t lo_offset,      /* Map entry start */
 376         vm_map_offset_t hi_offset,      /* Map entry end */
 377         vm_behavior_t   behavior,       /* Page reference behavior */
 378         /* Modifies in place: */
 379         vm_prot_t       *protection,    /* Protection for mapping */
 380         /* Returns: */
 381         vm_page_t       *result_page,   /* Page found, if successful */
 382         vm_page_t       *top_page,      /* Page in top object, if
 383                                          * not result_page.  */
 384         int             *type_of_fault, /* if non-null, fill in with type of fault
 385                                          * COW, zero-fill, etc... returned in trace point */
 386         /* More arguments: */
 387         kern_return_t   *error_code,    /* code if page is in error */
 388         boolean_t       no_zero_fill,   /* don't zero fill absent pages */
 389         boolean_t       data_supply,    /* treat as data_supply if
 390                                          * it is a write fault and a full
 391                                          * page is provided */
 392         vm_map_t        map,
 393         __unused vm_map_offset_t        vaddr)
 394 {
 395         register
 396         vm_page_t               m;
 397         register
 398         vm_object_t             object;
 399         register
 400         vm_object_offset_t      offset;
 401         vm_page_t               first_m;
 402         vm_object_t             next_object;
 403         vm_object_t             copy_object;
 404         boolean_t               look_for_page;
 405         vm_prot_t               access_required = fault_type;
 406         vm_prot_t               wants_copy_flag;
 407         vm_object_size_t        length;
 408         vm_object_offset_t      cluster_start, cluster_end;
 409         CLUSTER_STAT(int pages_at_higher_offsets;)
 410         CLUSTER_STAT(int pages_at_lower_offsets;)
 411         kern_return_t   wait_result;
 412         boolean_t               interruptible_state;
 413         boolean_t               bumped_pagein = FALSE;
 414
 415
 416 #if     MACH_PAGEMAP
 417 /*
 418  * MACH page map - an optional optimization where a bit map is maintained
 419  * by the VM subsystem for internal objects to indicate which pages of
 420  * the object currently reside on backing store.  This existence map
 421  * duplicates information maintained by the vnode pager.  It is
 422  * created at the time of the first pageout against the object, i.e.
 423  * at the same time pager for the object is created.  The optimization
 424  * is designed to eliminate pager interaction overhead, if it is
 425  * 'known' that the page does not exist on backing store.
 426  *
 427  * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
 428  * either marked as paged out in the existence map for the object or no
 429  * existence map exists for the object.  LOOK_FOR() is one of the
 430  * criteria in the decision to invoke the pager.   It is also used as one
 431  * of the criteria to terminate the scan for adjacent pages in a clustered
 432  * pagein operation.  Note that LOOK_FOR() always evaluates to TRUE for
 433  * permanent objects.  Note also that if the pager for an internal object
 434  * has not been created, the pager is not invoked regardless of the value
 435  * of LOOK_FOR() and that clustered pagein scans are only done on an object
 436  * for which a pager has been created.
 437  *
 438  * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
 439  * is marked as paged out in the existence map for the object.  PAGED_OUT()
 440  * PAGED_OUT() is used to determine if a page has already been pushed
 441  * into a copy object in order to avoid a redundant page out operation.
 442  */
 443 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
 444                         != VM_EXTERNAL_STATE_ABSENT)
 445 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
 446                         == VM_EXTERNAL_STATE_EXISTS)
 447 #else /* MACH_PAGEMAP */
 448 /*
 449  * If the MACH page map optimization is not enabled,
 450  * LOOK_FOR() always evaluates to TRUE.  The pager will always be
 451  * invoked to resolve missing pages in an object, assuming the pager
 452  * has been created for the object.  In a clustered page operation, the
 453  * absence of a page on backing backing store cannot be used to terminate
 454  * a scan for adjacent pages since that information is available only in
 455  * the pager.  Hence pages that may not be paged out are potentially
 456  * included in a clustered request.  The vnode pager is coded to deal
 457  * with any combination of absent/present pages in a clustered
 458  * pagein request.  PAGED_OUT() always evaluates to FALSE, i.e. the pager
 459  * will always be invoked to push a dirty page into a copy object assuming
 460  * a pager has been created.  If the page has already been pushed, the
 461  * pager will ingore the new request.
 462  */
 463 #define LOOK_FOR(o, f) TRUE
 464 #define PAGED_OUT(o, f) FALSE
 465 #endif /* MACH_PAGEMAP */
 466
 467 /*
 468  *      Recovery actions
 469  */
 470 #define PREPARE_RELEASE_PAGE(m)                         \
 471         MACRO_BEGIN                                     \
 472         vm_page_lock_queues();                          \
 473         MACRO_END
 474
 475 #define DO_RELEASE_PAGE(m)                              \
 476         MACRO_BEGIN                                     \
 477         PAGE_WAKEUP_DONE(m);                            \
 478         if (!m->active && !m->inactive)                 \
 479                 vm_page_activate(m);                    \
 480         vm_page_unlock_queues();                        \
 481         MACRO_END
 482
 483 #define RELEASE_PAGE(m)                                 \
 484         MACRO_BEGIN                                     \
 485         PREPARE_RELEASE_PAGE(m);                        \
 486         DO_RELEASE_PAGE(m);                             \
 487         MACRO_END
 488
 489 #if TRACEFAULTPAGE
 490         dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
 491 #endif
 492
 493
 494
 495 #if     !VM_FAULT_STATIC_CONFIG
 496         if (vm_fault_dirty_handling
 497 #if     MACH_KDB
 498                 /*
 499                  *      If there are watchpoints set, then
 500                  *      we don't want to give away write permission
 501                  *      on a read fault.  Make the task write fault,
 502                  *      so that the watchpoint code notices the access.
 503                  */
 504             || db_watchpoint_list
 505 #endif  /* MACH_KDB */
 506             ) {
 507                 /*
 508                  *      If we aren't asking for write permission,
 509                  *      then don't give it away.  We're using write
 510                  *      faults to set the dirty bit.
 511                  */
 512                 if (!(fault_type & VM_PROT_WRITE))
 513                         *protection &= ~VM_PROT_WRITE;
 514         }
 515
 516         if (!vm_fault_interruptible)
 517                 interruptible = THREAD_UNINT;
 518 #else   /* STATIC_CONFIG */
 519 #if     MACH_KDB
 520                 /*
 521                  *      If there are watchpoints set, then
 522                  *      we don't want to give away write permission
 523                  *      on a read fault.  Make the task write fault,
 524                  *      so that the watchpoint code notices the access.
 525                  */
 526             if (db_watchpoint_list) {
 527                 /*
 528                  *      If we aren't asking for write permission,
 529                  *      then don't give it away.  We're using write
 530                  *      faults to set the dirty bit.
 531                  */
 532                 if (!(fault_type & VM_PROT_WRITE))
 533                         *protection &= ~VM_PROT_WRITE;
 534         }
 535
 536 #endif  /* MACH_KDB */
 537 #endif  /* STATIC_CONFIG */
 538
 539         interruptible_state = thread_interrupt_level(interruptible);
 540
 541         /*
 542          *      INVARIANTS (through entire routine):
 543          *
 544          *      1)      At all times, we must either have the object
 545          *              lock or a busy page in some object to prevent
 546          *              some other thread from trying to bring in
 547          *              the same page.
 548          *
 549          *              Note that we cannot hold any locks during the
 550          *              pager access or when waiting for memory, so
 551          *              we use a busy page then.
 552          *
 553          *              Note also that we aren't as concerned about more than
 554          *              one thread attempting to memory_object_data_unlock
 555          *              the same page at once, so we don't hold the page
 556          *              as busy then, but do record the highest unlock
 557          *              value so far.  [Unlock requests may also be delivered
 558          *              out of order.]
 559          *
 560          *      2)      To prevent another thread from racing us down the
 561          *              shadow chain and entering a new page in the top
 562          *              object before we do, we must keep a busy page in
 563          *              the top object while following the shadow chain.
 564          *
 565          *      3)      We must increment paging_in_progress on any object
 566          *              for which we have a busy page
 567          *
 568          *      4)      We leave busy pages on the pageout queues.
 569          *              If the pageout daemon comes across a busy page,
 570          *              it will remove the page from the pageout queues.
 571          */
 572
 573         /*
 574          *      Search for the page at object/offset.
 575          */
 576
 577         object = first_object;
 578         offset = first_offset;
 579         first_m = VM_PAGE_NULL;
 580         access_required = fault_type;
 581
 582         XPR(XPR_VM_FAULT,
 583                 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
 584                 (integer_t)object, offset, fault_type, *protection, 0);
 585
 586         /*
 587          *      See whether this page is resident
 588          */
 589
 590         while (TRUE) {
 591 #if TRACEFAULTPAGE
 592                 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0);       /* (TEST/DEBUG) */
 593 #endif
 594                 if (!object->alive) {
 595                         vm_fault_cleanup(object, first_m);
 596                         thread_interrupt_level(interruptible_state);
 597                         return(VM_FAULT_MEMORY_ERROR);
 598                 }
 599                 m = vm_page_lookup(object, offset);
 600 #if TRACEFAULTPAGE
 601                 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object);  /* (TEST/DEBUG) */
 602 #endif
 603                 if (m != VM_PAGE_NULL) {
 604                         /*
 605                          *      If the page was pre-paged as part of a
 606                          *      cluster, record the fact.
 607                          *      If we were passed a valid pointer for
 608                          *      "type_of_fault", than we came from
 609                          *      vm_fault... we'll let it deal with
 610                          *      this condition, since it
 611                          *      needs to see m->clustered to correctly
 612                          *      account the pageins.
 613                          */
 614                         if (type_of_fault == NULL && m->clustered) {
 615                                 vm_pagein_cluster_used++;
 616                                 m->clustered = FALSE;
 617                         }
 618
 619                         /*
 620                          *      If the page is being brought in,
 621                          *      wait for it and then retry.
 622                          *
 623                          *      A possible optimization: if the page
 624                          *      is known to be resident, we can ignore
 625                          *      pages that are absent (regardless of
 626                          *      whether they're busy).
 627                          */
 628
 629                         if (m->busy) {
 630 #if TRACEFAULTPAGE
 631                                 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
 632 #endif
 633                                 wait_result = PAGE_SLEEP(object, m, interruptible);
 634                                 XPR(XPR_VM_FAULT,
 635                                     "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
 636                                         (integer_t)object, offset,
 637                                         (integer_t)m, 0, 0);
 638                                 counter(c_vm_fault_page_block_busy_kernel++);
 639
 640                                 if (wait_result != THREAD_AWAKENED) {
 641                                         vm_fault_cleanup(object, first_m);
 642                                         thread_interrupt_level(interruptible_state);
 643                                         if (wait_result == THREAD_RESTART)
 644                                           {
 645                                                 return(VM_FAULT_RETRY);
 646                                           }
 647                                         else
 648                                           {
 649                                                 return(VM_FAULT_INTERRUPTED);
 650                                           }
 651                                 }
 652                                 continue;
 653                         }
 654
 655                         if (m->encrypted) {
 656                                 /*
 657                                  * ENCRYPTED SWAP:
 658                                  * the user needs access to a page that we
 659                                  * encrypted before paging it out.
 660                                  * Decrypt the page now.
 661                                  * Keep it busy to prevent anyone from
 662                                  * accessing it during the decryption.
 663                                  */
 664                                 m->busy = TRUE;
 665                                 vm_page_decrypt(m, 0);
 666                                 assert(object == m->object);
 667                                 assert(m->busy);
 668                                 PAGE_WAKEUP_DONE(m);
 669
 670                                 /*
 671                                  * Retry from the top, in case
 672                                  * something changed while we were
 673                                  * decrypting.
 674                                  */
 675                                 continue;
 676                         }
 677                         ASSERT_PAGE_DECRYPTED(m);
 678
 679                         /*
 680                          *      If the page is in error, give up now.
 681                          */
 682
 683                         if (m->error) {
 684 #if TRACEFAULTPAGE
 685                                 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code);      /* (TEST/DEBUG) */
 686 #endif
 687                                 if (error_code)
 688                                         *error_code = m->page_error;
 689                                 VM_PAGE_FREE(m);
 690                                 vm_fault_cleanup(object, first_m);
 691                                 thread_interrupt_level(interruptible_state);
 692                                 return(VM_FAULT_MEMORY_ERROR);
 693                         }
 694
 695                         /*
 696                          *      If the pager wants us to restart
 697                          *      at the top of the chain,
 698                          *      typically because it has moved the
 699                          *      page to another pager, then do so.
 700                          */
 701
 702                         if (m->restart) {
 703 #if TRACEFAULTPAGE
 704                                 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
 705 #endif
 706                                 VM_PAGE_FREE(m);
 707                                 vm_fault_cleanup(object, first_m);
 708                                 thread_interrupt_level(interruptible_state);
 709                                 return(VM_FAULT_RETRY);
 710                         }
 711
 712                         /*
 713                          *      If the page isn't busy, but is absent,
 714                          *      then it was deemed "unavailable".
 715                          */
 716
 717                         if (m->absent) {
 718                                 /*
 719                                  * Remove the non-existent page (unless it's
 720                                  * in the top object) and move on down to the
 721                                  * next object (if there is one).
 722                                  */
 723 #if TRACEFAULTPAGE
 724                                 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow);  /* (TEST/DEBUG) */
 725 #endif
 726
 727                                 next_object = object->shadow;
 728                                 if (next_object == VM_OBJECT_NULL) {
 729                                         vm_page_t real_m;
 730
 731                                         assert(!must_be_resident);
 732
 733                                         if (object->shadow_severed) {
 734                                                 vm_fault_cleanup(
 735                                                         object, first_m);
 736                                                 thread_interrupt_level(interruptible_state);
 737                                                 return VM_FAULT_MEMORY_ERROR;
 738                                         }
 739
 740                                         /*
 741                                          * Absent page at bottom of shadow
 742                                          * chain; zero fill the page we left
 743                                          * busy in the first object, and flush
 744                                          * the absent page.  But first we
 745                                          * need to allocate a real page.
 746                                          */
 747                                         if (VM_PAGE_THROTTLED() ||
 748                                             (real_m = vm_page_grab())
 749                                                         == VM_PAGE_NULL) {
 750                                                 vm_fault_cleanup(
 751                                                         object, first_m);
 752                                                 thread_interrupt_level(
 753                                                         interruptible_state);
 754                                                 return(
 755                                                    VM_FAULT_MEMORY_SHORTAGE);
 756                                         }
 757
 758                                         /*
 759                                          * are we protecting the system from
 760                                          * backing store exhaustion.  If so
 761                                          * sleep unless we are privileged.
 762                                          */
 763
 764                                         if(vm_backing_store_low) {
 765                                            if(!(current_task()->priv_flags
 766                                                 & VM_BACKING_STORE_PRIV)) {
 767                                                 assert_wait((event_t)
 768                                                         &vm_backing_store_low,
 769                                                         THREAD_UNINT);
 770                                                 vm_fault_cleanup(object,
 771                                                                     first_m);
 772                                                 thread_block(THREAD_CONTINUE_NULL);
 773                                                 thread_interrupt_level(
 774                                                         interruptible_state);
 775                                                 return(VM_FAULT_RETRY);
 776                                            }
 777                                         }
 778
 779
 780                                         XPR(XPR_VM_FAULT,
 781               "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
 782                                                 (integer_t)object, offset,
 783                                                 (integer_t)m,
 784                                                 (integer_t)first_object, 0);
 785                                         if (object != first_object) {
 786                                                 VM_PAGE_FREE(m);
 787                                                 vm_object_paging_end(object);
 788                                                 vm_object_unlock(object);
 789                                                 object = first_object;
 790                                                 offset = first_offset;
 791                                                 m = first_m;
 792                                                 first_m = VM_PAGE_NULL;
 793                                                 vm_object_lock(object);
 794                                         }
 795
 796                                         VM_PAGE_FREE(m);
 797                                         assert(real_m->busy);
 798                                         vm_page_insert(real_m, object, offset);
 799                                         m = real_m;
 800
 801                                         /*
 802                                          *  Drop the lock while zero filling
 803                                          *  page.  Then break because this
 804                                          *  is the page we wanted.  Checking
 805                                          *  the page lock is a waste of time;
 806                                          *  this page was either absent or
 807                                          *  newly allocated -- in both cases
 808                                          *  it can't be page locked by a pager.
 809                                          */
 810                                         m->no_isync = FALSE;
 811
 812                                         if (!no_zero_fill) {
 813                                                 vm_object_unlock(object);
 814                                                 vm_page_zero_fill(m);
 815                                                 vm_object_lock(object);
 816
 817                                                 if (type_of_fault)
 818                                                         *type_of_fault = DBG_ZERO_FILL_FAULT;
 819                                                 VM_STAT(zero_fill_count++);
 820                                         }
 821                                         if (bumped_pagein == TRUE) {
 822                                                 VM_STAT(pageins--);
 823                                                 current_task()->pageins--;
 824                                         }
 825                                         vm_page_lock_queues();
 826                                         VM_PAGE_QUEUES_REMOVE(m);
 827                                         m->page_ticket = vm_page_ticket;
 828                                         assert(!m->laundry);
 829                                         assert(m->object != kernel_object);
 830                                         assert(m->pageq.next == NULL &&
 831                                                m->pageq.prev == NULL);
 832                                         if(m->object->size > 0x200000) {
 833                                                 m->zero_fill = TRUE;
 834                                                 /* depends on the queues lock */
 835                                                 vm_zf_count += 1;
 836                                                 queue_enter(&vm_page_queue_zf,
 837                                                         m, vm_page_t, pageq);
 838                                         } else {
 839                                                 queue_enter(
 840                                                         &vm_page_queue_inactive,
 841                                                         m, vm_page_t, pageq);
 842                                         }
 843                                         vm_page_ticket_roll++;
 844                                         if(vm_page_ticket_roll ==
 845                                                 VM_PAGE_TICKETS_IN_ROLL) {
 846                                                 vm_page_ticket_roll = 0;
 847                                                 if(vm_page_ticket ==
 848                                                      VM_PAGE_TICKET_ROLL_IDS)
 849                                                         vm_page_ticket= 0;
 850                                                 else
 851                                                         vm_page_ticket++;
 852                                         }
 853                                         m->inactive = TRUE;
 854                                         vm_page_inactive_count++;
 855                                         vm_page_unlock_queues();
 856                                         break;
 857                                 } else {
 858                                         if (must_be_resident) {
 859                                                 vm_object_paging_end(object);
 860                                         } else if (object != first_object) {
 861                                                 vm_object_paging_end(object);
 862                                                 VM_PAGE_FREE(m);
 863                                         } else {
 864                                                 first_m = m;
 865                                                 m->absent = FALSE;
 866                                                 m->unusual = FALSE;
 867                                                 vm_object_absent_release(object);
 868                                                 m->busy = TRUE;
 869
 870                                                 vm_page_lock_queues();
 871                                                 VM_PAGE_QUEUES_REMOVE(m);
 872                                                 vm_page_unlock_queues();
 873                                         }
 874                                         XPR(XPR_VM_FAULT,
 875                                             "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
 876                                                 (integer_t)object, offset,
 877                                                 (integer_t)next_object,
 878                                                 offset+object->shadow_offset,0);
 879                                         offset += object->shadow_offset;
 880                                         hi_offset += object->shadow_offset;
 881                                         lo_offset += object->shadow_offset;
 882                                         access_required = VM_PROT_READ;
 883                                         vm_object_lock(next_object);
 884                                         vm_object_unlock(object);
 885                                         object = next_object;
 886                                         vm_object_paging_begin(object);
 887                                         continue;
 888                                 }
 889                         }
 890
 891                         if ((m->cleaning)
 892                                 && ((object != first_object) ||
 893                                     (object->copy != VM_OBJECT_NULL))
 894                                 && (fault_type & VM_PROT_WRITE)) {
 895                                 /*
 896                                  * This is a copy-on-write fault that will
 897                                  * cause us to revoke access to this page, but
 898                                  * this page is in the process of being cleaned
 899                                  * in a clustered pageout. We must wait until
 900                                  * the cleaning operation completes before
 901                                  * revoking access to the original page,
 902                                  * otherwise we might attempt to remove a
 903                                  * wired mapping.
 904                                  */
 905 #if TRACEFAULTPAGE
 906                                 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset);  /* (TEST/DEBUG) */
 907 #endif
 908                                 XPR(XPR_VM_FAULT,
 909                                     "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
 910                                         (integer_t)object, offset,
 911                                         (integer_t)m, 0, 0);
 912                                 /* take an extra ref so that object won't die */
 913                                 assert(object->ref_count > 0);
 914                                 object->ref_count++;
 915                                 vm_object_res_reference(object);
 916                                 vm_fault_cleanup(object, first_m);
 917                                 counter(c_vm_fault_page_block_backoff_kernel++);
 918                                 vm_object_lock(object);
 919                                 assert(object->ref_count > 0);
 920                                 m = vm_page_lookup(object, offset);
 921                                 if (m != VM_PAGE_NULL && m->cleaning) {
 922                                         PAGE_ASSERT_WAIT(m, interruptible);
 923                                         vm_object_unlock(object);
 924                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
 925                                         vm_object_deallocate(object);
 926                                         goto backoff;
 927                                 } else {
 928                                         vm_object_unlock(object);
 929                                         vm_object_deallocate(object);
 930                                         thread_interrupt_level(interruptible_state);
 931                                         return VM_FAULT_RETRY;
 932                                 }
 933                         }
 934
 935                         /*
 936                          *      If the desired access to this page has
 937                          *      been locked out, request that it be unlocked.
 938                          */
 939
 940                         if (access_required & m->page_lock) {
 941                                 if ((access_required & m->unlock_request) != access_required) {
 942                                         vm_prot_t       new_unlock_request;
 943                                         kern_return_t   rc;
 944
 945 #if TRACEFAULTPAGE
 946                                         dbgTrace(0xBEEF000A, (unsigned int) m, (unsigned int) object->pager_ready);     /* (TEST/DEBUG) */
 947 #endif
 948                                         if (!object->pager_ready) {
 949                                         XPR(XPR_VM_FAULT,
 950                                             "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
 951                                                 access_required,
 952                                                 (integer_t)object, offset,
 953                                                 (integer_t)m, 0);
 954                                                 /* take an extra ref */
 955                                                 assert(object->ref_count > 0);
 956                                                 object->ref_count++;
 957                                                 vm_object_res_reference(object);
 958                                                 vm_fault_cleanup(object,
 959                                                                  first_m);
 960                                                 counter(c_vm_fault_page_block_backoff_kernel++);
 961                                                 vm_object_lock(object);
 962                                                 assert(object->ref_count > 0);
 963                                                 if (!object->pager_ready) {
 964                                                         wait_result = vm_object_assert_wait(
 965                                                                 object,
 966                                                                 VM_OBJECT_EVENT_PAGER_READY,
 967                                                                 interruptible);
 968                                                         vm_object_unlock(object);
 969                                                         if (wait_result == THREAD_WAITING)
 970                                                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
 971                                                         vm_object_deallocate(object);
 972                                                         goto backoff;
 973                                                 } else {
 974                                                         vm_object_unlock(object);
 975                                                         vm_object_deallocate(object);
 976                                                         thread_interrupt_level(interruptible_state);
 977                                                         return VM_FAULT_RETRY;
 978                                                 }
 979                                         }
 980
 981                                         new_unlock_request = m->unlock_request =
 982                                                 (access_required | m->unlock_request);
 983                                         vm_object_unlock(object);
 984                                         XPR(XPR_VM_FAULT,
 985                                             "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
 986                                         (integer_t)object, offset,
 987                                         (integer_t)m, new_unlock_request, 0);
 988                                         if ((rc = memory_object_data_unlock(
 989                                                 object->pager,
 990                                                 offset + object->paging_offset,
 991                                                 PAGE_SIZE,
 992                                                 new_unlock_request))
 993                                              != KERN_SUCCESS) {
 994                                                 if (vm_fault_debug)
 995                                                     printf("vm_fault: memory_object_data_unlock failed\n");
 996                                                 vm_object_lock(object);
 997                                                 vm_fault_cleanup(object, first_m);
 998                                                 thread_interrupt_level(interruptible_state);
 999                                                 return((rc == MACH_SEND_INTERRUPTED) ?
1000                                                         VM_FAULT_INTERRUPTED :
1001                                                         VM_FAULT_MEMORY_ERROR);
1002                                         }
1003                                         vm_object_lock(object);
1004                                         continue;
1005                                 }
1006
1007                                 XPR(XPR_VM_FAULT,
1008         "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
1009                                         access_required, (integer_t)object,
1010                                         offset, (integer_t)m, 0);
1011                                 /* take an extra ref so object won't die */
1012                                 assert(object->ref_count > 0);
1013                                 object->ref_count++;
1014                                 vm_object_res_reference(object);
1015                                 vm_fault_cleanup(object, first_m);
1016                                 counter(c_vm_fault_page_block_backoff_kernel++);
1017                                 vm_object_lock(object);
1018                                 assert(object->ref_count > 0);
1019                                 m = vm_page_lookup(object, offset);
1020                                 if (m != VM_PAGE_NULL &&
1021                                     (access_required & m->page_lock) &&
1022                                     !((access_required & m->unlock_request) != access_required)) {
1023                                         PAGE_ASSERT_WAIT(m, interruptible);
1024                                         vm_object_unlock(object);
1025                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
1026                                         vm_object_deallocate(object);
1027                                         goto backoff;
1028                                 } else {
1029                                         vm_object_unlock(object);
1030                                         vm_object_deallocate(object);
1031                                         thread_interrupt_level(interruptible_state);
1032                                         return VM_FAULT_RETRY;
1033                                 }
1034                         }
1035                         /*
1036                          *      We mark the page busy and leave it on
1037                          *      the pageout queues.  If the pageout
1038                          *      deamon comes across it, then it will
1039                          *      remove the page.
1040                          */
1041
1042 #if TRACEFAULTPAGE
1043                         dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
1044 #endif
1045
1046 #if     !VM_FAULT_STATIC_CONFIG
1047                         if (!software_reference_bits) {
1048                                 vm_page_lock_queues();
1049                                 if (m->inactive)
1050                                         vm_stat.reactivations++;
1051
1052                                 VM_PAGE_QUEUES_REMOVE(m);
1053                                 vm_page_unlock_queues();
1054                         }
1055 #endif
1056                         XPR(XPR_VM_FAULT,
1057                             "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
1058                                 (integer_t)object, offset, (integer_t)m, 0, 0);
1059                         assert(!m->busy);
1060                         m->busy = TRUE;
1061                         assert(!m->absent);
1062                         break;
1063                 }
1064
1065                 look_for_page =
1066                         (object->pager_created) &&
1067                           LOOK_FOR(object, offset) &&
1068                             (!data_supply);
1069
1070 #if TRACEFAULTPAGE
1071                 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object);      /* (TEST/DEBUG) */
1072 #endif
1073                 if ((look_for_page || (object == first_object))
1074                                 && !must_be_resident
1075                                 && !(object->phys_contiguous))  {
1076                         /*
1077                          *      Allocate a new page for this object/offset
1078                          *      pair.
1079                          */
1080
1081                         m = vm_page_grab_fictitious();
1082 #if TRACEFAULTPAGE
1083                         dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object);  /* (TEST/DEBUG) */
1084 #endif
1085                         if (m == VM_PAGE_NULL) {
1086                                 vm_fault_cleanup(object, first_m);
1087                                 thread_interrupt_level(interruptible_state);
1088                                 return(VM_FAULT_FICTITIOUS_SHORTAGE);
1089                         }
1090                         vm_page_insert(m, object, offset);
1091                 }
1092
1093                 if ((look_for_page && !must_be_resident)) {
1094                         kern_return_t   rc;
1095
1096                         /*
1097                          *      If the memory manager is not ready, we
1098                          *      cannot make requests.
1099                          */
1100                         if (!object->pager_ready) {
1101 #if TRACEFAULTPAGE
1102                                 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0);       /* (TEST/DEBUG) */
1103 #endif
1104                                 if(m != VM_PAGE_NULL)
1105                                         VM_PAGE_FREE(m);
1106                                 XPR(XPR_VM_FAULT,
1107                                 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
1108                                         (integer_t)object, offset, 0, 0, 0);
1109                                 /* take an extra ref so object won't die */
1110                                 assert(object->ref_count > 0);
1111                                 object->ref_count++;
1112                                 vm_object_res_reference(object);
1113                                 vm_fault_cleanup(object, first_m);
1114                                 counter(c_vm_fault_page_block_backoff_kernel++);
1115                                 vm_object_lock(object);
1116                                 assert(object->ref_count > 0);
1117                                 if (!object->pager_ready) {
1118                                         wait_result = vm_object_assert_wait(object,
1119                                                               VM_OBJECT_EVENT_PAGER_READY,
1120                                                               interruptible);
1121                                         vm_object_unlock(object);
1122                                         if (wait_result == THREAD_WAITING)
1123                                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
1124                                         vm_object_deallocate(object);
1125                                         goto backoff;
1126                                 } else {
1127                                         vm_object_unlock(object);
1128                                         vm_object_deallocate(object);
1129                                         thread_interrupt_level(interruptible_state);
1130                                         return VM_FAULT_RETRY;
1131                                 }
1132                         }
1133
1134                         if(object->phys_contiguous) {
1135                                 if(m != VM_PAGE_NULL) {
1136                                         VM_PAGE_FREE(m);
1137                                         m = VM_PAGE_NULL;
1138                                 }
1139                                 goto no_clustering;
1140                         }
1141                         if (object->internal) {
1142                                 /*
1143                                  *      Requests to the default pager
1144                                  *      must reserve a real page in advance,
1145                                  *      because the pager's data-provided
1146                                  *      won't block for pages.  IMPORTANT:
1147                                  *      this acts as a throttling mechanism
1148                                  *      for data_requests to the default
1149                                  *      pager.
1150                                  */
1151
1152 #if TRACEFAULTPAGE
1153                                 dbgTrace(0xBEEF000F, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
1154 #endif
1155                                 if (m->fictitious && !vm_page_convert(m)) {
1156                                         VM_PAGE_FREE(m);
1157                                         vm_fault_cleanup(object, first_m);
1158                                         thread_interrupt_level(interruptible_state);
1159                                         return(VM_FAULT_MEMORY_SHORTAGE);
1160                                 }
1161                         } else if (object->absent_count >
1162                                                 vm_object_absent_max) {
1163                                 /*
1164                                  *      If there are too many outstanding page
1165                                  *      requests pending on this object, we
1166                                  *      wait for them to be resolved now.
1167                                  */
1168
1169 #if TRACEFAULTPAGE
1170                                 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
1171 #endif
1172                                 if(m != VM_PAGE_NULL)
1173                                         VM_PAGE_FREE(m);
1174                                 /* take an extra ref so object won't die */
1175                                 assert(object->ref_count > 0);
1176                                 object->ref_count++;
1177                                 vm_object_res_reference(object);
1178                                 vm_fault_cleanup(object, first_m);
1179                                 counter(c_vm_fault_page_block_backoff_kernel++);
1180                                 vm_object_lock(object);
1181                                 assert(object->ref_count > 0);
1182                                 if (object->absent_count > vm_object_absent_max) {
1183                                         vm_object_absent_assert_wait(object,
1184                                                                      interruptible);
1185                                         vm_object_unlock(object);
1186                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
1187                                         vm_object_deallocate(object);
1188                                         goto backoff;
1189                                 } else {
1190                                         vm_object_unlock(object);
1191                                         vm_object_deallocate(object);
1192                                         thread_interrupt_level(interruptible_state);
1193                                         return VM_FAULT_RETRY;
1194                                 }
1195                         }
1196
1197                         /*
1198                          *      Indicate that the page is waiting for data
1199                          *      from the memory manager.
1200                          */
1201
1202                         if(m != VM_PAGE_NULL) {
1203
1204                                 m->list_req_pending = TRUE;
1205                                 m->absent = TRUE;
1206                                 m->unusual = TRUE;
1207                                 object->absent_count++;
1208
1209                         }
1210
1211 no_clustering:
1212                         cluster_start = offset;
1213                         length = PAGE_SIZE;
1214
1215                         /*
1216                          * lengthen the cluster by the pages in the working set
1217                          */
1218                         if((map != NULL) &&
1219                                 (current_task()->dynamic_working_set != 0)) {
1220                                 cluster_end = cluster_start + length;
1221                                 /* tws values for start and end are just a
1222                                  * suggestions.  Therefore, as long as
1223                                  * build_cluster does not use pointers or
1224                                  * take action based on values that
1225                                  * could be affected by re-entrance we
1226                                  * do not need to take the map lock.
1227                                  */
1228                                 cluster_end = offset + PAGE_SIZE_64;
1229                                 tws_build_cluster(
1230                                         current_task()->dynamic_working_set,
1231                                         object, &cluster_start,
1232                                         &cluster_end, 0x40000);
1233                                 length = cluster_end - cluster_start;
1234                         }
1235 #if TRACEFAULTPAGE
1236                         dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0);  /* (TEST/DEBUG) */
1237 #endif
1238                         /*
1239                          *      We have a busy page, so we can
1240                          *      release the object lock.
1241                          */
1242                         vm_object_unlock(object);
1243
1244                         /*
1245                          *      Call the memory manager to retrieve the data.
1246                          */
1247
1248                         if (type_of_fault)
1249                                 *type_of_fault = ((int)length << 8) | DBG_PAGEIN_FAULT;
1250                         VM_STAT(pageins++);
1251                         current_task()->pageins++;
1252                         bumped_pagein = TRUE;
1253
1254                         /*
1255                          *      If this object uses a copy_call strategy,
1256                          *      and we are interested in a copy of this object
1257                          *      (having gotten here only by following a
1258                          *      shadow chain), then tell the memory manager
1259                          *      via a flag added to the desired_access
1260                          *      parameter, so that it can detect a race
1261                          *      between our walking down the shadow chain
1262                          *      and its pushing pages up into a copy of
1263                          *      the object that it manages.
1264                          */
1265
1266                         if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL &&
1267                             object != first_object) {
1268                                 wants_copy_flag = VM_PROT_WANTS_COPY;
1269                         } else {
1270                                 wants_copy_flag = VM_PROT_NONE;
1271                         }
1272
1273                         XPR(XPR_VM_FAULT,
1274                             "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1275                                 (integer_t)object, offset, (integer_t)m,
1276                                 access_required | wants_copy_flag, 0);
1277
1278                         rc = memory_object_data_request(object->pager,
1279                                         cluster_start + object->paging_offset,
1280                                         length,
1281                                         access_required | wants_copy_flag);
1282
1283
1284 #if TRACEFAULTPAGE
1285                         dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
1286 #endif
1287                         if (rc != KERN_SUCCESS) {
1288                                 if (rc != MACH_SEND_INTERRUPTED
1289                                     && vm_fault_debug)
1290                                         printf("%s(0x%x, 0x%xll, 0x%xll, 0x%x) failed, rc=%d\n",
1291                                                 "memory_object_data_request",
1292                                                 object->pager,
1293                                                 cluster_start + object->paging_offset,
1294                                                 length, access_required, rc);
1295                                 /*
1296                                  *      Don't want to leave a busy page around,
1297                                  *      but the data request may have blocked,
1298                                  *      so check if it's still there and busy.
1299                                  */
1300                                 if(!object->phys_contiguous) {
1301                                    vm_object_lock(object);
1302                                    for (; length; length -= PAGE_SIZE,
1303                                       cluster_start += PAGE_SIZE_64) {
1304                                       vm_page_t p;
1305                                       if ((p = vm_page_lookup(object,
1306                                                                 cluster_start))
1307                                             && p->absent && p->busy
1308                                             && p != first_m) {
1309                                          VM_PAGE_FREE(p);
1310                                       }
1311                                    }
1312                                 }
1313                                 vm_fault_cleanup(object, first_m);
1314                                 thread_interrupt_level(interruptible_state);
1315                                 return((rc == MACH_SEND_INTERRUPTED) ?
1316                                         VM_FAULT_INTERRUPTED :
1317                                         VM_FAULT_MEMORY_ERROR);
1318                         }
1319
1320                         vm_object_lock(object);
1321                         if ((interruptible != THREAD_UNINT) &&
1322                             (current_thread()->state & TH_ABORT)) {
1323                                 vm_fault_cleanup(object, first_m);
1324                                 thread_interrupt_level(interruptible_state);
1325                                 return(VM_FAULT_INTERRUPTED);
1326                         }
1327                         if (m == VM_PAGE_NULL &&
1328                             object->phys_contiguous) {
1329                                 /*
1330                                  * No page here means that the object we
1331                                  * initially looked up was "physically
1332                                  * contiguous" (i.e. device memory).  However,
1333                                  * with Virtual VRAM, the object might not
1334                                  * be backed by that device memory anymore,
1335                                  * so we're done here only if the object is
1336                                  * still "phys_contiguous".
1337                                  * Otherwise, if the object is no longer
1338                                  * "phys_contiguous", we need to retry the
1339                                  * page fault against the object's new backing
1340                                  * store (different memory object).
1341                                  */
1342                                 break;
1343                         }
1344
1345                         /*
1346                          * Retry with same object/offset, since new data may
1347                          * be in a different page (i.e., m is meaningless at
1348                          * this point).
1349                          */
1350                         continue;
1351                 }
1352
1353                 /*
1354                  * The only case in which we get here is if
1355                  * object has no pager (or unwiring).  If the pager doesn't
1356                  * have the page this is handled in the m->absent case above
1357                  * (and if you change things here you should look above).
1358                  */
1359 #if TRACEFAULTPAGE
1360                 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */
1361 #endif
1362                 if (object == first_object)
1363                         first_m = m;
1364                 else
1365                         assert(m == VM_PAGE_NULL);
1366
1367                 XPR(XPR_VM_FAULT,
1368                     "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1369                         (integer_t)object, offset, (integer_t)m,
1370                         (integer_t)object->shadow, 0);
1371                 /*
1372                  *      Move on to the next object.  Lock the next
1373                  *      object before unlocking the current one.
1374                  */
1375                 next_object = object->shadow;
1376                 if (next_object == VM_OBJECT_NULL) {
1377                         assert(!must_be_resident);
1378                         /*
1379                          *      If there's no object left, fill the page
1380                          *      in the top object with zeros.  But first we
1381                          *      need to allocate a real page.
1382                          */
1383
1384                         if (object != first_object) {
1385                                 vm_object_paging_end(object);
1386                                 vm_object_unlock(object);
1387
1388                                 object = first_object;
1389                                 offset = first_offset;
1390                                 vm_object_lock(object);
1391                         }
1392
1393                         m = first_m;
1394                         assert(m->object == object);
1395                         first_m = VM_PAGE_NULL;
1396
1397                         if(m == VM_PAGE_NULL) {
1398                                 m = vm_page_grab();
1399                                 if (m == VM_PAGE_NULL) {
1400                                         vm_fault_cleanup(
1401                                                 object, VM_PAGE_NULL);
1402                                         thread_interrupt_level(
1403                                                 interruptible_state);
1404                                         return(VM_FAULT_MEMORY_SHORTAGE);
1405                                 }
1406                                 vm_page_insert(
1407                                         m, object, offset);
1408                         }
1409
1410                         if (object->shadow_severed) {
1411                                 VM_PAGE_FREE(m);
1412                                 vm_fault_cleanup(object, VM_PAGE_NULL);
1413                                 thread_interrupt_level(interruptible_state);
1414                                 return VM_FAULT_MEMORY_ERROR;
1415                         }
1416
1417                         /*
1418                          * are we protecting the system from
1419                          * backing store exhaustion.  If so
1420                          * sleep unless we are privileged.
1421                          */
1422
1423                         if(vm_backing_store_low) {
1424                                 if(!(current_task()->priv_flags
1425                                                 & VM_BACKING_STORE_PRIV)) {
1426                                         assert_wait((event_t)
1427                                                 &vm_backing_store_low,
1428                                                 THREAD_UNINT);
1429                                         VM_PAGE_FREE(m);
1430                                         vm_fault_cleanup(object, VM_PAGE_NULL);
1431                                         thread_block(THREAD_CONTINUE_NULL);
1432                                         thread_interrupt_level(
1433                                                 interruptible_state);
1434                                         return(VM_FAULT_RETRY);
1435                                 }
1436                         }
1437
1438                         if (VM_PAGE_THROTTLED() ||
1439                             (m->fictitious && !vm_page_convert(m))) {
1440                                 VM_PAGE_FREE(m);
1441                                 vm_fault_cleanup(object, VM_PAGE_NULL);
1442                                 thread_interrupt_level(interruptible_state);
1443                                 return(VM_FAULT_MEMORY_SHORTAGE);
1444                         }
1445                         m->no_isync = FALSE;
1446
1447                         if (!no_zero_fill) {
1448                                 vm_object_unlock(object);
1449                                 vm_page_zero_fill(m);
1450                                 vm_object_lock(object);
1451
1452                                 if (type_of_fault)
1453                                         *type_of_fault = DBG_ZERO_FILL_FAULT;
1454                                 VM_STAT(zero_fill_count++);
1455                         }
1456                         if (bumped_pagein == TRUE) {
1457                                 VM_STAT(pageins--);
1458                                 current_task()->pageins--;
1459                         }
1460                         vm_page_lock_queues();
1461                         VM_PAGE_QUEUES_REMOVE(m);
1462                         assert(!m->laundry);
1463                         assert(m->object != kernel_object);
1464                         assert(m->pageq.next == NULL &&
1465                                m->pageq.prev == NULL);
1466                         if(m->object->size > 0x200000) {
1467                                 m->zero_fill = TRUE;
1468                                 /* depends on the queues lock */
1469                                 vm_zf_count += 1;
1470                                 queue_enter(&vm_page_queue_zf,
1471                                         m, vm_page_t, pageq);
1472                         } else {
1473                                 queue_enter(
1474                                         &vm_page_queue_inactive,
1475                                         m, vm_page_t, pageq);
1476                         }
1477                         m->page_ticket = vm_page_ticket;
1478                         vm_page_ticket_roll++;
1479                         if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
1480                                 vm_page_ticket_roll = 0;
1481                                 if(vm_page_ticket ==
1482                                         VM_PAGE_TICKET_ROLL_IDS)
1483                                         vm_page_ticket= 0;
1484                                 else
1485                                         vm_page_ticket++;
1486                         }
1487                         m->inactive = TRUE;
1488                         vm_page_inactive_count++;
1489                         vm_page_unlock_queues();
1490 #if 0
1491                         pmap_clear_modify(m->phys_page);
1492 #endif
1493                         break;
1494                 }
1495                 else {
1496                         if ((object != first_object) || must_be_resident)
1497                                 vm_object_paging_end(object);
1498                         offset += object->shadow_offset;
1499                         hi_offset += object->shadow_offset;
1500                         lo_offset += object->shadow_offset;
1501                         access_required = VM_PROT_READ;
1502                         vm_object_lock(next_object);
1503                         vm_object_unlock(object);
1504                         object = next_object;
1505                         vm_object_paging_begin(object);
1506                 }
1507         }
1508
1509         /*
1510          *      PAGE HAS BEEN FOUND.
1511          *
1512          *      This page (m) is:
1513          *              busy, so that we can play with it;
1514          *              not absent, so that nobody else will fill it;
1515          *              possibly eligible for pageout;
1516          *
1517          *      The top-level page (first_m) is:
1518          *              VM_PAGE_NULL if the page was found in the
1519          *               top-level object;
1520          *              busy, not absent, and ineligible for pageout.
1521          *
1522          *      The current object (object) is locked.  A paging
1523          *      reference is held for the current and top-level
1524          *      objects.
1525          */
1526
1527 #if TRACEFAULTPAGE
1528         dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */
1529 #endif
1530 #if     EXTRA_ASSERTIONS
1531         if(m != VM_PAGE_NULL) {
1532                 assert(m->busy && !m->absent);
1533                 assert((first_m == VM_PAGE_NULL) ||
1534                         (first_m->busy && !first_m->absent &&
1535                          !first_m->active && !first_m->inactive));
1536         }
1537 #endif  /* EXTRA_ASSERTIONS */
1538
1539         /*
1540          * ENCRYPTED SWAP:
1541          * If we found a page, we must have decrypted it before we
1542          * get here...
1543          */
1544         if (m != VM_PAGE_NULL) {
1545                 ASSERT_PAGE_DECRYPTED(m);
1546         }
1547
1548         XPR(XPR_VM_FAULT,
1549        "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1550                 (integer_t)object, offset, (integer_t)m,
1551                 (integer_t)first_object, (integer_t)first_m);
1552         /*
1553          *      If the page is being written, but isn't
1554          *      already owned by the top-level object,
1555          *      we have to copy it into a new page owned
1556          *      by the top-level object.
1557          */
1558
1559         if ((object != first_object) && (m != VM_PAGE_NULL)) {
1560                 /*
1561                  *      We only really need to copy if we
1562                  *      want to write it.
1563                  */
1564
1565 #if TRACEFAULTPAGE
1566                         dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1567 #endif
1568                 if (fault_type & VM_PROT_WRITE) {
1569                         vm_page_t copy_m;
1570
1571                         assert(!must_be_resident);
1572
1573                         /*
1574                          * are we protecting the system from
1575                          * backing store exhaustion.  If so
1576                          * sleep unless we are privileged.
1577                          */
1578
1579                         if(vm_backing_store_low) {
1580                                 if(!(current_task()->priv_flags
1581                                                 & VM_BACKING_STORE_PRIV)) {
1582                                         assert_wait((event_t)
1583                                                 &vm_backing_store_low,
1584                                                 THREAD_UNINT);
1585                                         RELEASE_PAGE(m);
1586                                         vm_fault_cleanup(object, first_m);
1587                                         thread_block(THREAD_CONTINUE_NULL);
1588                                         thread_interrupt_level(
1589                                                 interruptible_state);
1590                                         return(VM_FAULT_RETRY);
1591                                 }
1592                         }
1593
1594                         /*
1595                          *      If we try to collapse first_object at this
1596                          *      point, we may deadlock when we try to get
1597                          *      the lock on an intermediate object (since we
1598                          *      have the bottom object locked).  We can't
1599                          *      unlock the bottom object, because the page
1600                          *      we found may move (by collapse) if we do.
1601                          *
1602                          *      Instead, we first copy the page.  Then, when
1603                          *      we have no more use for the bottom object,
1604                          *      we unlock it and try to collapse.
1605                          *
1606                          *      Note that we copy the page even if we didn't
1607                          *      need to... that's the breaks.
1608                          */
1609
1610                         /*
1611                          *      Allocate a page for the copy
1612                          */
1613                         copy_m = vm_page_grab();
1614                         if (copy_m == VM_PAGE_NULL) {
1615                                 RELEASE_PAGE(m);
1616                                 vm_fault_cleanup(object, first_m);
1617                                 thread_interrupt_level(interruptible_state);
1618                                 return(VM_FAULT_MEMORY_SHORTAGE);
1619                         }
1620
1621
1622                         XPR(XPR_VM_FAULT,
1623                             "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1624                                 (integer_t)object, offset,
1625                                 (integer_t)m, (integer_t)copy_m, 0);
1626                         vm_page_copy(m, copy_m);
1627
1628                         /*
1629                          *      If another map is truly sharing this
1630                          *      page with us, we have to flush all
1631                          *      uses of the original page, since we
1632                          *      can't distinguish those which want the
1633                          *      original from those which need the
1634                          *      new copy.
1635                          *
1636                          *      XXXO If we know that only one map has
1637                          *      access to this page, then we could
1638                          *      avoid the pmap_disconnect() call.
1639                          */
1640
1641                         vm_page_lock_queues();
1642                         assert(!m->cleaning);
1643                         pmap_disconnect(m->phys_page);
1644                         vm_page_deactivate(m);
1645                         copy_m->dirty = TRUE;
1646                         /*
1647                          * Setting reference here prevents this fault from
1648                          * being counted as a (per-thread) reactivate as well
1649                          * as a copy-on-write.
1650                          */
1651                         first_m->reference = TRUE;
1652                         vm_page_unlock_queues();
1653
1654                         /*
1655                          *      We no longer need the old page or object.
1656                          */
1657
1658                         PAGE_WAKEUP_DONE(m);
1659                         vm_object_paging_end(object);
1660                         vm_object_unlock(object);
1661
1662                         if (type_of_fault)
1663                                 *type_of_fault = DBG_COW_FAULT;
1664                         VM_STAT(cow_faults++);
1665                         current_task()->cow_faults++;
1666                         object = first_object;
1667                         offset = first_offset;
1668
1669                         vm_object_lock(object);
1670                         VM_PAGE_FREE(first_m);
1671                         first_m = VM_PAGE_NULL;
1672                         assert(copy_m->busy);
1673                         vm_page_insert(copy_m, object, offset);
1674                         m = copy_m;
1675
1676                         /*
1677                          *      Now that we've gotten the copy out of the
1678                          *      way, let's try to collapse the top object.
1679                          *      But we have to play ugly games with
1680                          *      paging_in_progress to do that...
1681                          */
1682
1683                         vm_object_paging_end(object);
1684                         vm_object_collapse(object, offset, TRUE);
1685                         vm_object_paging_begin(object);
1686
1687                 }
1688                 else {
1689                         *protection &= (~VM_PROT_WRITE);
1690                 }
1691         }
1692
1693         /*
1694          *      Now check whether the page needs to be pushed into the
1695          *      copy object.  The use of asymmetric copy on write for
1696          *      shared temporary objects means that we may do two copies to
1697          *      satisfy the fault; one above to get the page from a
1698          *      shadowed object, and one here to push it into the copy.
1699          */
1700
1701         while ((copy_object = first_object->copy) != VM_OBJECT_NULL &&
1702                    (m!= VM_PAGE_NULL)) {
1703                 vm_object_offset_t      copy_offset;
1704                 vm_page_t               copy_m;
1705
1706 #if TRACEFAULTPAGE
1707                 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type);    /* (TEST/DEBUG) */
1708 #endif
1709                 /*
1710                  *      If the page is being written, but hasn't been
1711                  *      copied to the copy-object, we have to copy it there.
1712                  */
1713
1714                 if ((fault_type & VM_PROT_WRITE) == 0) {
1715                         *protection &= ~VM_PROT_WRITE;
1716                         break;
1717                 }
1718
1719                 /*
1720                  *      If the page was guaranteed to be resident,
1721                  *      we must have already performed the copy.
1722                  */
1723
1724                 if (must_be_resident)
1725                         break;
1726
1727                 /*
1728                  *      Try to get the lock on the copy_object.
1729                  */
1730                 if (!vm_object_lock_try(copy_object)) {
1731                         vm_object_unlock(object);
1732
1733                         mutex_pause();  /* wait a bit */
1734
1735                         vm_object_lock(object);
1736                         continue;
1737                 }
1738
1739                 /*
1740                  *      Make another reference to the copy-object,
1741                  *      to keep it from disappearing during the
1742                  *      copy.
1743                  */
1744                 assert(copy_object->ref_count > 0);
1745                 copy_object->ref_count++;
1746                 VM_OBJ_RES_INCR(copy_object);
1747
1748                 /*
1749                  *      Does the page exist in the copy?
1750                  */
1751                 copy_offset = first_offset - copy_object->shadow_offset;
1752                 if (copy_object->size <= copy_offset)
1753                         /*
1754                          * Copy object doesn't cover this page -- do nothing.
1755                          */
1756                         ;
1757                 else if ((copy_m =
1758                         vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
1759                         /* Page currently exists in the copy object */
1760                         if (copy_m->busy) {
1761                                 /*
1762                                  *      If the page is being brought
1763                                  *      in, wait for it and then retry.
1764                                  */
1765                                 RELEASE_PAGE(m);
1766                                 /* take an extra ref so object won't die */
1767                                 assert(copy_object->ref_count > 0);
1768                                 copy_object->ref_count++;
1769                                 vm_object_res_reference(copy_object);
1770                                 vm_object_unlock(copy_object);
1771                                 vm_fault_cleanup(object, first_m);
1772                                 counter(c_vm_fault_page_block_backoff_kernel++);
1773                                 vm_object_lock(copy_object);
1774                                 assert(copy_object->ref_count > 0);
1775                                 VM_OBJ_RES_DECR(copy_object);
1776                                 copy_object->ref_count--;
1777                                 assert(copy_object->ref_count > 0);
1778                                 copy_m = vm_page_lookup(copy_object, copy_offset);
1779                                 /*
1780                                  * ENCRYPTED SWAP:
1781                                  * it's OK if the "copy_m" page is encrypted,
1782                                  * because we're not moving it nor handling its
1783                                  * contents.
1784                                  */
1785                                 if (copy_m != VM_PAGE_NULL && copy_m->busy) {
1786                                         PAGE_ASSERT_WAIT(copy_m, interruptible);
1787                                         vm_object_unlock(copy_object);
1788                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
1789                                         vm_object_deallocate(copy_object);
1790                                         goto backoff;
1791                                 } else {
1792                                         vm_object_unlock(copy_object);
1793                                         vm_object_deallocate(copy_object);
1794                                         thread_interrupt_level(interruptible_state);
1795                                         return VM_FAULT_RETRY;
1796                                 }
1797                         }
1798                 }
1799                 else if (!PAGED_OUT(copy_object, copy_offset)) {
1800                         /*
1801                          * If PAGED_OUT is TRUE, then the page used to exist
1802                          * in the copy-object, and has already been paged out.
1803                          * We don't need to repeat this. If PAGED_OUT is
1804                          * FALSE, then either we don't know (!pager_created,
1805                          * for example) or it hasn't been paged out.
1806                          * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1807                          * We must copy the page to the copy object.
1808                          */
1809
1810                         /*
1811                          * are we protecting the system from
1812                          * backing store exhaustion.  If so
1813                          * sleep unless we are privileged.
1814                          */
1815
1816                         if(vm_backing_store_low) {
1817                                 if(!(current_task()->priv_flags
1818                                                 & VM_BACKING_STORE_PRIV)) {
1819                                         assert_wait((event_t)
1820                                                 &vm_backing_store_low,
1821                                                 THREAD_UNINT);
1822                                         RELEASE_PAGE(m);
1823                                         VM_OBJ_RES_DECR(copy_object);
1824                                         copy_object->ref_count--;
1825                                         assert(copy_object->ref_count > 0);
1826                                         vm_object_unlock(copy_object);
1827                                         vm_fault_cleanup(object, first_m);
1828                                         thread_block(THREAD_CONTINUE_NULL);
1829                                         thread_interrupt_level(
1830                                                 interruptible_state);
1831                                         return(VM_FAULT_RETRY);
1832                                 }
1833                         }
1834
1835                         /*
1836                          *      Allocate a page for the copy
1837                          */
1838                         copy_m = vm_page_alloc(copy_object, copy_offset);
1839                         if (copy_m == VM_PAGE_NULL) {
1840                                 RELEASE_PAGE(m);
1841                                 VM_OBJ_RES_DECR(copy_object);
1842                                 copy_object->ref_count--;
1843                                 assert(copy_object->ref_count > 0);
1844                                 vm_object_unlock(copy_object);
1845                                 vm_fault_cleanup(object, first_m);
1846                                 thread_interrupt_level(interruptible_state);
1847                                 return(VM_FAULT_MEMORY_SHORTAGE);
1848                         }
1849
1850                         /*
1851                          *      Must copy page into copy-object.
1852                          */
1853
1854                         vm_page_copy(m, copy_m);
1855
1856                         /*
1857                          *      If the old page was in use by any users
1858                          *      of the copy-object, it must be removed
1859                          *      from all pmaps.  (We can't know which
1860                          *      pmaps use it.)
1861                          */
1862
1863                         vm_page_lock_queues();
1864                         assert(!m->cleaning);
1865                         pmap_disconnect(m->phys_page);
1866                         copy_m->dirty = TRUE;
1867                         vm_page_unlock_queues();
1868
1869                         /*
1870                          *      If there's a pager, then immediately
1871                          *      page out this page, using the "initialize"
1872                          *      option.  Else, we use the copy.
1873                          */
1874
1875                         if
1876 #if     MACH_PAGEMAP
1877                           ((!copy_object->pager_created) ||
1878                                 vm_external_state_get(
1879                                         copy_object->existence_map, copy_offset)
1880                                 == VM_EXTERNAL_STATE_ABSENT)
1881 #else
1882                           (!copy_object->pager_created)
1883 #endif
1884                                 {
1885                                 vm_page_lock_queues();
1886                                 vm_page_activate(copy_m);
1887                                 vm_page_unlock_queues();
1888                                 PAGE_WAKEUP_DONE(copy_m);
1889                         }
1890                         else {
1891                                 assert(copy_m->busy == TRUE);
1892
1893                                 /*
1894                                  *      The page is already ready for pageout:
1895                                  *      not on pageout queues and busy.
1896                                  *      Unlock everything except the
1897                                  *      copy_object itself.
1898                                  */
1899
1900                                 vm_object_unlock(object);
1901
1902                                 /*
1903                                  *      Write the page to the copy-object,
1904                                  *      flushing it from the kernel.
1905                                  */
1906
1907                                 vm_pageout_initialize_page(copy_m);
1908
1909                                 /*
1910                                  *      Since the pageout may have
1911                                  *      temporarily dropped the
1912                                  *      copy_object's lock, we
1913                                  *      check whether we'll have
1914                                  *      to deallocate the hard way.
1915                                  */
1916
1917                                 if ((copy_object->shadow != object) ||
1918                                     (copy_object->ref_count == 1)) {
1919                                         vm_object_unlock(copy_object);
1920                                         vm_object_deallocate(copy_object);
1921                                         vm_object_lock(object);
1922                                         continue;
1923                                 }
1924
1925                                 /*
1926                                  *      Pick back up the old object's
1927                                  *      lock.  [It is safe to do so,
1928                                  *      since it must be deeper in the
1929                                  *      object tree.]
1930                                  */
1931
1932                                 vm_object_lock(object);
1933                         }
1934
1935                         /*
1936                          *      Because we're pushing a page upward
1937                          *      in the object tree, we must restart
1938                          *      any faults that are waiting here.
1939                          *      [Note that this is an expansion of
1940                          *      PAGE_WAKEUP that uses the THREAD_RESTART
1941                          *      wait result].  Can't turn off the page's
1942                          *      busy bit because we're not done with it.
1943                          */
1944
1945                         if (m->wanted) {
1946                                 m->wanted = FALSE;
1947                                 thread_wakeup_with_result((event_t) m,
1948                                         THREAD_RESTART);
1949                         }
1950                 }
1951
1952                 /*
1953                  *      The reference count on copy_object must be
1954                  *      at least 2: one for our extra reference,
1955                  *      and at least one from the outside world
1956                  *      (we checked that when we last locked
1957                  *      copy_object).
1958                  */
1959                 copy_object->ref_count--;
1960                 assert(copy_object->ref_count > 0);
1961                 VM_OBJ_RES_DECR(copy_object);
1962                 vm_object_unlock(copy_object);
1963
1964                 break;
1965         }
1966
1967         *result_page = m;
1968         *top_page = first_m;
1969
1970         XPR(XPR_VM_FAULT,
1971                 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1972                 (integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
1973         /*
1974          *      If the page can be written, assume that it will be.
1975          *      [Earlier, we restrict the permission to allow write
1976          *      access only if the fault so required, so we don't
1977          *      mark read-only data as dirty.]
1978          */
1979
1980
1981         if(m != VM_PAGE_NULL) {
1982 #if     !VM_FAULT_STATIC_CONFIG
1983                 if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE))
1984                         m->dirty = TRUE;
1985 #endif
1986                 if (vm_page_deactivate_behind)
1987                         vm_fault_deactivate_behind(object, offset, behavior);
1988         } else {
1989                 vm_object_unlock(object);
1990         }
1991         thread_interrupt_level(interruptible_state);
1992
1993 #if TRACEFAULTPAGE
1994         dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0);       /* (TEST/DEBUG) */
1995 #endif
1996         return(VM_FAULT_SUCCESS);
1997
1998 #if 0
1999     block_and_backoff:
2000         vm_fault_cleanup(object, first_m);
2001
2002         counter(c_vm_fault_page_block_backoff_kernel++);
2003         thread_block(THREAD_CONTINUE_NULL);
2004 #endif
2005
2006     backoff:
2007         thread_interrupt_level(interruptible_state);
2008         if (wait_result == THREAD_INTERRUPTED)
2009                 return VM_FAULT_INTERRUPTED;
2010         return VM_FAULT_RETRY;
2011
2012 #undef  RELEASE_PAGE
2013 }
2014
2015 /*
2016  *      Routine:        vm_fault_tws_insert
2017  *      Purpose:
2018  *              Add fault information to the task working set.
2019  *      Implementation:
2020  *              We always insert the base object/offset pair
2021  *              rather the actual object/offset.
2022  *      Assumptions:
2023  *              Map and real_map locked.
2024  *              Object locked and referenced.
2025  *      Returns:
2026  *              TRUE if startup file should be written.
2027  *              With object locked and still referenced.
2028  *              But we may drop the object lock temporarily.
2029  */
2030 static boolean_t
2031 vm_fault_tws_insert(
2032         vm_map_t map,
2033         vm_map_t real_map,
2034         vm_map_offset_t vaddr,
2035         vm_object_t object,
2036         vm_object_offset_t offset)
2037 {
2038         tws_hash_line_t line;
2039         task_t          task;
2040         kern_return_t   kr;
2041         boolean_t       result = FALSE;
2042
2043         /* Avoid possible map lock deadlock issues */
2044         if (map == kernel_map || map == kalloc_map ||
2045             real_map == kernel_map || real_map == kalloc_map)
2046                 return result;
2047
2048         task = current_task();
2049         if (task->dynamic_working_set != 0) {
2050                 vm_object_t     base_object;
2051                 vm_object_t     base_shadow;
2052                 vm_object_offset_t base_offset;
2053                 base_object = object;
2054                 base_offset = offset;
2055                 while ((base_shadow = base_object->shadow)) {
2056                         vm_object_lock(base_shadow);
2057                         vm_object_unlock(base_object);
2058                         base_offset +=
2059                                 base_object->shadow_offset;
2060                         base_object = base_shadow;
2061                 }
2062                 kr = tws_lookup(
2063                         task->dynamic_working_set,
2064                         base_offset, base_object,
2065                         &line);
2066                 if (kr == KERN_OPERATION_TIMED_OUT){
2067                         result = TRUE;
2068                         if (base_object != object) {
2069                                 vm_object_unlock(base_object);
2070                                 vm_object_lock(object);
2071                         }
2072                 } else if (kr != KERN_SUCCESS) {
2073                         if(base_object != object)
2074                                 vm_object_reference_locked(base_object);
2075                         kr = tws_insert(
2076                                    task->dynamic_working_set,
2077                                    base_offset, base_object,
2078                                    vaddr, real_map);
2079                         if(base_object != object) {
2080                                 vm_object_unlock(base_object);
2081                                 vm_object_deallocate(base_object);
2082                         }
2083                         if(kr == KERN_NO_SPACE) {
2084                                 if (base_object == object)
2085                                         vm_object_unlock(object);
2086                                 tws_expand_working_set(
2087                                    task->dynamic_working_set,
2088                                    TWS_HASH_LINE_COUNT,
2089                                    FALSE);
2090                                 if (base_object == object)
2091                                         vm_object_lock(object);
2092                         } else if(kr == KERN_OPERATION_TIMED_OUT) {
2093                                 result = TRUE;
2094                         }
2095                         if(base_object != object)
2096                                 vm_object_lock(object);
2097                 } else if (base_object != object) {
2098                         vm_object_unlock(base_object);
2099                         vm_object_lock(object);
2100                 }
2101         }
2102         return result;
2103 }
2104
2105 /*
2106  *      Routine:        vm_fault
2107  *      Purpose:
2108  *              Handle page faults, including pseudo-faults
2109  *              used to change the wiring status of pages.
2110  *      Returns:
2111  *              Explicit continuations have been removed.
2112  *      Implementation:
2113  *              vm_fault and vm_fault_page save mucho state
2114  *              in the moral equivalent of a closure.  The state
2115  *              structure is allocated when first entering vm_fault
2116  *              and deallocated when leaving vm_fault.
2117  */
2118
2119 extern int _map_enter_debug;
2120
2121 kern_return_t
2122 vm_fault(
2123         vm_map_t        map,
2124         vm_map_offset_t vaddr,
2125         vm_prot_t       fault_type,
2126         boolean_t       change_wiring,
2127         int             interruptible,
2128         pmap_t          caller_pmap,
2129         vm_map_offset_t caller_pmap_addr)
2130 {
2131         vm_map_version_t        version;        /* Map version for verificiation */
2132         boolean_t               wired;          /* Should mapping be wired down? */
2133         vm_object_t             object;         /* Top-level object */
2134         vm_object_offset_t      offset;         /* Top-level offset */
2135         vm_prot_t               prot;           /* Protection for mapping */
2136         vm_behavior_t           behavior;       /* Expected paging behavior */
2137         vm_map_offset_t         lo_offset, hi_offset;
2138         vm_object_t             old_copy_object; /* Saved copy object */
2139         vm_page_t               result_page;    /* Result of vm_fault_page */
2140         vm_page_t               top_page;       /* Placeholder page */
2141         kern_return_t           kr;
2142
2143         register
2144         vm_page_t               m;      /* Fast access to result_page */
2145         kern_return_t           error_code = 0; /* page error reasons */
2146         register
2147         vm_object_t             cur_object;
2148         register
2149         vm_object_offset_t      cur_offset;
2150         vm_page_t               cur_m;
2151         vm_object_t             new_object;
2152         int                     type_of_fault;
2153         vm_map_t                real_map = map;
2154         vm_map_t                original_map = map;
2155         pmap_t                  pmap = NULL;
2156         boolean_t               interruptible_state;
2157         unsigned int            cache_attr;
2158         int                     write_startup_file = 0;
2159         boolean_t               need_activation;
2160         vm_prot_t               original_fault_type;
2161
2162
2163         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_START,
2164                               vaddr,
2165                               0,
2166                               0,
2167                               0,
2168                               0);
2169
2170         if (get_preemption_level() != 0) {
2171                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2172                                       vaddr,
2173                                       0,
2174                                       KERN_FAILURE,
2175                                       0,
2176                                       0);
2177
2178                 return (KERN_FAILURE);
2179         }
2180
2181         interruptible_state = thread_interrupt_level(interruptible);
2182
2183         /*
2184          * assume we will hit a page in the cache
2185          * otherwise, explicitly override with
2186          * the real fault type once we determine it
2187          */
2188         type_of_fault = DBG_CACHE_HIT_FAULT;
2189
2190         VM_STAT(faults++);
2191         current_task()->faults++;
2192
2193         original_fault_type = fault_type;
2194
2195     RetryFault: ;
2196
2197         /*
2198          *      Find the backing store object and offset into
2199          *      it to begin the search.
2200          */
2201         fault_type = original_fault_type;
2202         map = original_map;
2203         vm_map_lock_read(map);
2204         kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version,
2205                                 &object, &offset,
2206                                 &prot, &wired,
2207                                 &behavior, &lo_offset, &hi_offset, &real_map);
2208
2209 //if (_map_enter_debug)printf("vm_map_lookup_locked(map=0x%x, addr=0x%llx, prot=%d wired=%d) = %d\n", map, vaddr, prot, wired, kr);
2210
2211         pmap = real_map->pmap;
2212
2213         if (kr != KERN_SUCCESS) {
2214                 vm_map_unlock_read(map);
2215                 goto done;
2216         }
2217
2218         /*
2219          *      If the page is wired, we must fault for the current protection
2220          *      value, to avoid further faults.
2221          */
2222
2223         if (wired)
2224                 fault_type = prot | VM_PROT_WRITE;
2225
2226 #if     VM_FAULT_CLASSIFY
2227         /*
2228          *      Temporary data gathering code
2229          */
2230         vm_fault_classify(object, offset, fault_type);
2231 #endif
2232         /*
2233          *      Fast fault code.  The basic idea is to do as much as
2234          *      possible while holding the map lock and object locks.
2235          *      Busy pages are not used until the object lock has to
2236          *      be dropped to do something (copy, zero fill, pmap enter).
2237          *      Similarly, paging references aren't acquired until that
2238          *      point, and object references aren't used.
2239          *
2240          *      If we can figure out what to do
2241          *      (zero fill, copy on write, pmap enter) while holding
2242          *      the locks, then it gets done.  Otherwise, we give up,
2243          *      and use the original fault path (which doesn't hold
2244          *      the map lock, and relies on busy pages).
2245          *      The give up cases include:
2246          *              - Have to talk to pager.
2247          *              - Page is busy, absent or in error.
2248          *              - Pager has locked out desired access.
2249          *              - Fault needs to be restarted.
2250          *              - Have to push page into copy object.
2251          *
2252          *      The code is an infinite loop that moves one level down
2253          *      the shadow chain each time.  cur_object and cur_offset
2254          *      refer to the current object being examined. object and offset
2255          *      are the original object from the map.  The loop is at the
2256          *      top level if and only if object and cur_object are the same.
2257          *
2258          *      Invariants:  Map lock is held throughout.  Lock is held on
2259          *              original object and cur_object (if different) when
2260          *              continuing or exiting loop.
2261          *
2262          */
2263
2264
2265         /*
2266          *      If this page is to be inserted in a copy delay object
2267          *      for writing, and if the object has a copy, then the
2268          *      copy delay strategy is implemented in the slow fault page.
2269          */
2270         if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY ||
2271             object->copy == VM_OBJECT_NULL ||
2272             (fault_type & VM_PROT_WRITE) == 0) {
2273         cur_object = object;
2274         cur_offset = offset;
2275
2276         while (TRUE) {
2277                 m = vm_page_lookup(cur_object, cur_offset);
2278                 if (m != VM_PAGE_NULL) {
2279                         if (m->busy) {
2280                                 wait_result_t   result;
2281
2282                                 if (object != cur_object)
2283                                         vm_object_unlock(object);
2284
2285                                 vm_map_unlock_read(map);
2286                                 if (real_map != map)
2287                                         vm_map_unlock(real_map);
2288
2289 #if     !VM_FAULT_STATIC_CONFIG
2290                                 if (!vm_fault_interruptible)
2291                                         interruptible = THREAD_UNINT;
2292 #endif
2293                                 result = PAGE_ASSERT_WAIT(m, interruptible);
2294
2295                                 vm_object_unlock(cur_object);
2296
2297                                 if (result == THREAD_WAITING) {
2298                                         result = thread_block(THREAD_CONTINUE_NULL);
2299
2300                                         counter(c_vm_fault_page_block_busy_kernel++);
2301                                 }
2302                                 if (result == THREAD_AWAKENED || result == THREAD_RESTART)
2303                                         goto RetryFault;
2304
2305                                 kr = KERN_ABORTED;
2306                                 goto done;
2307                         }
2308                         if (m->unusual && (m->error || m->restart || m->private
2309                             || m->absent || (fault_type & m->page_lock))) {
2310
2311                                 /*
2312                                  *      Unusual case. Give up.
2313                                  */
2314                                 break;
2315                         }
2316
2317                         if (m->encrypted) {
2318                                 /*
2319                                  * ENCRYPTED SWAP:
2320                                  * We've soft-faulted (because it's not in the page
2321                                  * table) on an encrypted page.
2322                                  * Keep the page "busy" so that noone messes with
2323                                  * it during the decryption.
2324                                  * Release the extra locks we're holding, keep only
2325                                  * the page's VM object lock.
2326                                  */
2327                                 m->busy = TRUE;
2328                                 if (object != cur_object) {
2329                                         vm_object_unlock(object);
2330                                 }
2331                                 vm_map_unlock_read(map);
2332                                 if (real_map != map)
2333                                         vm_map_unlock(real_map);
2334
2335                                 vm_page_decrypt(m, 0);
2336
2337                                 assert(m->busy);
2338                                 PAGE_WAKEUP_DONE(m);
2339                                 vm_object_unlock(m->object);
2340
2341                                 /*
2342                                  * Retry from the top, in case anything
2343                                  * changed while we were decrypting...
2344                                  */
2345                                 goto RetryFault;
2346                         }
2347                         ASSERT_PAGE_DECRYPTED(m);
2348
2349                         /*
2350                          *      Two cases of map in faults:
2351                          *          - At top level w/o copy object.
2352                          *          - Read fault anywhere.
2353                          *              --> must disallow write.
2354                          */
2355
2356                         if (object == cur_object &&
2357                             object->copy == VM_OBJECT_NULL)
2358                                 goto FastMapInFault;
2359
2360                         if ((fault_type & VM_PROT_WRITE) == 0) {
2361                                 boolean_t sequential;
2362
2363                                 prot &= ~VM_PROT_WRITE;
2364
2365                                 /*
2366                                  *      Set up to map the page ...
2367                                  *      mark the page busy, drop
2368                                  *      locks and take a paging reference
2369                                  *      on the object with the page.
2370                                  */
2371
2372                                 if (object != cur_object) {
2373                                         vm_object_unlock(object);
2374                                         object = cur_object;
2375                                 }
2376 FastMapInFault:
2377                                 m->busy = TRUE;
2378
2379 FastPmapEnter:
2380                                 /*
2381                                  *      Check a couple of global reasons to
2382                                  *      be conservative about write access.
2383                                  *      Then do the pmap_enter.
2384                                  */
2385 #if     !VM_FAULT_STATIC_CONFIG
2386                                 if (vm_fault_dirty_handling
2387 #if     MACH_KDB
2388                                     || db_watchpoint_list
2389 #endif
2390                                     && (fault_type & VM_PROT_WRITE) == 0)
2391                                         prot &= ~VM_PROT_WRITE;
2392 #else   /* STATIC_CONFIG */
2393 #if     MACH_KDB
2394                                 if (db_watchpoint_list
2395                                     && (fault_type & VM_PROT_WRITE) == 0)
2396                                         prot &= ~VM_PROT_WRITE;
2397 #endif  /* MACH_KDB */
2398 #endif  /* STATIC_CONFIG */
2399                                 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2400
2401                                 sequential = FALSE;
2402                                 need_activation = FALSE;
2403
2404                                 if (m->no_isync == TRUE) {
2405                                         m->no_isync = FALSE;
2406                                         pmap_sync_page_data_phys(m->phys_page);
2407
2408                                         if ((type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) {
2409                                                 /*
2410                                                  * found it in the cache, but this
2411                                                  * is the first fault-in of the page (no_isync == TRUE)
2412                                                  * so it must have come in as part of
2413                                                  * a cluster... account 1 pagein against it
2414                                                  */
2415                                                 VM_STAT(pageins++);
2416                                                 current_task()->pageins++;
2417                                                 type_of_fault = DBG_PAGEIN_FAULT;
2418                                                 sequential = TRUE;
2419                                         }
2420                                         if (m->clustered)
2421                                                 need_activation = TRUE;
2422
2423                                 } else if (cache_attr != VM_WIMG_DEFAULT) {
2424                                         pmap_sync_page_attributes_phys(m->phys_page);
2425                                 }
2426
2427                                 if(caller_pmap) {
2428                                         PMAP_ENTER(caller_pmap,
2429                                                 caller_pmap_addr, m,
2430                                                 prot, cache_attr, wired);
2431                                 } else {
2432                                         PMAP_ENTER(pmap, vaddr, m,
2433                                                 prot, cache_attr, wired);
2434                                 }
2435
2436                                 /*
2437                                  *      Hold queues lock to manipulate
2438                                  *      the page queues.  Change wiring
2439                                  *      case is obvious.  In soft ref bits
2440                                  *      case activate page only if it fell
2441                                  *      off paging queues, otherwise just
2442                                  *      activate it if it's inactive.
2443                                  *
2444                                  *      NOTE: original vm_fault code will
2445                                  *      move active page to back of active
2446                                  *      queue.  This code doesn't.
2447                                  */
2448                                 if (m->clustered) {
2449                                         vm_pagein_cluster_used++;
2450                                         m->clustered = FALSE;
2451                                 }
2452                                 if (change_wiring) {
2453                                         vm_page_lock_queues();
2454
2455                                         if (wired)
2456                                                 vm_page_wire(m);
2457                                         else
2458                                                 vm_page_unwire(m);
2459
2460                                         vm_page_unlock_queues();
2461                                 }
2462                                 else {
2463                                         if ((!m->active && !m->inactive) || ((need_activation == TRUE) && !m->active)) {
2464                                                 vm_page_lock_queues();
2465                                                 vm_page_activate(m);
2466                                                 vm_page_unlock_queues();
2467                                         }
2468                                 }
2469
2470                                 /*
2471                                  *      That's it, clean up and return.
2472                                  */
2473                                 PAGE_WAKEUP_DONE(m);
2474
2475                                 sequential = (sequential && vm_page_deactivate_behind) ?
2476                                         vm_fault_deactivate_behind(object, cur_offset, behavior) :
2477                                         FALSE;
2478
2479                                 /*
2480                                  * Add non-sequential pages to the working set.
2481                                  * The sequential pages will be brought in through
2482                                  * normal clustering behavior.
2483                                  */
2484                                 if (!sequential && !object->private) {
2485                                         vm_object_paging_begin(object);
2486
2487                                         write_startup_file =
2488                                                 vm_fault_tws_insert(map, real_map, vaddr,
2489                                                                 object, cur_offset);
2490
2491                                         vm_object_paging_end(object);
2492                                 }
2493                                 vm_object_unlock(object);
2494
2495                                 vm_map_unlock_read(map);
2496                                 if(real_map != map)
2497                                         vm_map_unlock(real_map);
2498
2499                                 if(write_startup_file)
2500                                         tws_send_startup_info(current_task());
2501
2502                                 thread_interrupt_level(interruptible_state);
2503
2504
2505                                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2506                                                       vaddr,
2507                                                       type_of_fault & 0xff,
2508                                                       KERN_SUCCESS,
2509                                                       type_of_fault >> 8,
2510                                                       0);
2511
2512                                 return KERN_SUCCESS;
2513                         }
2514
2515                         /*
2516                          *      Copy on write fault.  If objects match, then
2517                          *      object->copy must not be NULL (else control
2518                          *      would be in previous code block), and we
2519                          *      have a potential push into the copy object
2520                          *      with which we won't cope here.
2521                          */
2522
2523                         if (cur_object == object)
2524                                 break;
2525                         /*
2526                          *      This is now a shadow based copy on write
2527                          *      fault -- it requires a copy up the shadow
2528                          *      chain.
2529                          *
2530                          *      Allocate a page in the original top level
2531                          *      object. Give up if allocate fails.  Also
2532                          *      need to remember current page, as it's the
2533                          *      source of the copy.
2534                          */
2535                         cur_m = m;
2536                         m = vm_page_grab();
2537                         if (m == VM_PAGE_NULL) {
2538                                 break;
2539                         }
2540                         /*
2541                          *      Now do the copy.  Mark the source busy
2542                          *      and take out paging references on both
2543                          *      objects.
2544                          *
2545                          *      NOTE: This code holds the map lock across
2546                          *      the page copy.
2547                          */
2548
2549                         cur_m->busy = TRUE;
2550                         vm_page_copy(cur_m, m);
2551                         vm_page_insert(m, object, offset);
2552
2553                         vm_object_paging_begin(cur_object);
2554                         vm_object_paging_begin(object);
2555
2556                         type_of_fault = DBG_COW_FAULT;
2557                         VM_STAT(cow_faults++);
2558                         current_task()->cow_faults++;
2559
2560                         /*
2561                          *      Now cope with the source page and object
2562                          *      If the top object has a ref count of 1
2563                          *      then no other map can access it, and hence
2564                          *      it's not necessary to do the pmap_disconnect.
2565                          */
2566
2567                         vm_page_lock_queues();
2568                         vm_page_deactivate(cur_m);
2569                         m->dirty = TRUE;
2570                         pmap_disconnect(cur_m->phys_page);
2571                         vm_page_unlock_queues();
2572
2573                         PAGE_WAKEUP_DONE(cur_m);
2574                         vm_object_paging_end(cur_object);
2575                         vm_object_unlock(cur_object);
2576
2577                         /*
2578                          *      Slight hack to call vm_object collapse
2579                          *      and then reuse common map in code.
2580                          *      note that the object lock was taken above.
2581                          */
2582
2583                         vm_object_paging_end(object);
2584                         vm_object_collapse(object, offset, TRUE);
2585
2586                         goto FastPmapEnter;
2587                 }
2588                 else {
2589
2590                         /*
2591                          *      No page at cur_object, cur_offset
2592                          */
2593
2594                         if (cur_object->pager_created) {
2595
2596                                 /*
2597                                  *      Have to talk to the pager.  Give up.
2598                                  */
2599                                 break;
2600                         }
2601
2602
2603                         if (cur_object->shadow == VM_OBJECT_NULL) {
2604
2605                                 if (cur_object->shadow_severed) {
2606                                         vm_object_paging_end(object);
2607                                         vm_object_unlock(object);
2608                                         vm_map_unlock_read(map);
2609                                         if(real_map != map)
2610                                                 vm_map_unlock(real_map);
2611
2612                                         if(write_startup_file)
2613                                                 tws_send_startup_info(
2614                                                                 current_task());
2615
2616                                         thread_interrupt_level(interruptible_state);
2617
2618                                         return KERN_MEMORY_ERROR;
2619                                 }
2620
2621                                 /*
2622                                  *      Zero fill fault.  Page gets
2623                                  *      filled in top object. Insert
2624                                  *      page, then drop any lower lock.
2625                                  *      Give up if no page.
2626                                  */
2627                                 if (VM_PAGE_THROTTLED()) {
2628                                         break;
2629                                 }
2630
2631                                 /*
2632                                  * are we protecting the system from
2633                                  * backing store exhaustion.  If so
2634                                  * sleep unless we are privileged.
2635                                  */
2636                                 if(vm_backing_store_low) {
2637                                         if(!(current_task()->priv_flags
2638                                                 & VM_BACKING_STORE_PRIV))
2639                                         break;
2640                                 }
2641                                 m = vm_page_alloc(object, offset);
2642                                 if (m == VM_PAGE_NULL) {
2643                                         break;
2644                                 }
2645                                 /*
2646                                  * This is a zero-fill or initial fill
2647                                  * page fault.  As such, we consider it
2648                                  * undefined with respect to instruction
2649                                  * execution.  i.e. it is the responsibility
2650                                  * of higher layers to call for an instruction
2651                                  * sync after changing the contents and before
2652                                  * sending a program into this area.  We
2653                                  * choose this approach for performance
2654                                  */
2655
2656                                 m->no_isync = FALSE;
2657
2658                                 if (cur_object != object)
2659                                         vm_object_unlock(cur_object);
2660
2661                                 /*
2662                                  *      Now zero fill page and map it.
2663                                  *      the page is probably going to
2664                                  *      be written soon, so don't bother
2665                                  *      to clear the modified bit
2666                                  *
2667                                  *      NOTE: This code holds the map
2668                                  *      lock across the zero fill.
2669                                  */
2670
2671                                 if (!map->no_zero_fill) {
2672                                         vm_page_zero_fill(m);
2673                                         type_of_fault = DBG_ZERO_FILL_FAULT;
2674                                         VM_STAT(zero_fill_count++);
2675                                 }
2676                                 vm_page_lock_queues();
2677                                 VM_PAGE_QUEUES_REMOVE(m);
2678
2679                                 m->page_ticket = vm_page_ticket;
2680                                 assert(!m->laundry);
2681                                 assert(m->object != kernel_object);
2682                                 assert(m->pageq.next == NULL &&
2683                                        m->pageq.prev == NULL);
2684                                 if(m->object->size > 0x200000) {
2685                                         m->zero_fill = TRUE;
2686                                         /* depends on the queues lock */
2687                                         vm_zf_count += 1;
2688                                         queue_enter(&vm_page_queue_zf,
2689                                                 m, vm_page_t, pageq);
2690                                 } else {
2691                                         queue_enter(
2692                                                 &vm_page_queue_inactive,
2693                                                 m, vm_page_t, pageq);
2694                                 }
2695                                 vm_page_ticket_roll++;
2696                                 if(vm_page_ticket_roll ==
2697                                                 VM_PAGE_TICKETS_IN_ROLL) {
2698                                         vm_page_ticket_roll = 0;
2699                                         if(vm_page_ticket ==
2700                                                 VM_PAGE_TICKET_ROLL_IDS)
2701                                                 vm_page_ticket= 0;
2702                                         else
2703                                                 vm_page_ticket++;
2704                                 }
2705
2706                                 m->inactive = TRUE;
2707                                 vm_page_inactive_count++;
2708                                 vm_page_unlock_queues();
2709
2710                                 goto FastPmapEnter;
2711                         }
2712
2713                         /*
2714                          *      On to the next level
2715                          */
2716
2717                         cur_offset += cur_object->shadow_offset;
2718                         new_object = cur_object->shadow;
2719                         vm_object_lock(new_object);
2720                         if (cur_object != object)
2721                                 vm_object_unlock(cur_object);
2722                         cur_object = new_object;
2723
2724                         continue;
2725                 }
2726         }
2727
2728         /*
2729          *      Cleanup from fast fault failure.  Drop any object
2730          *      lock other than original and drop map lock.
2731          */
2732
2733         if (object != cur_object)
2734                 vm_object_unlock(cur_object);
2735         }
2736         vm_map_unlock_read(map);
2737
2738         if(real_map != map)
2739                 vm_map_unlock(real_map);
2740
2741         /*
2742          *      Make a reference to this object to
2743          *      prevent its disposal while we are messing with
2744          *      it.  Once we have the reference, the map is free
2745          *      to be diddled.  Since objects reference their
2746          *      shadows (and copies), they will stay around as well.
2747          */
2748
2749         assert(object->ref_count > 0);
2750         object->ref_count++;
2751         vm_object_res_reference(object);
2752         vm_object_paging_begin(object);
2753
2754         XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2755
2756         if (!object->private) {
2757                 write_startup_file =
2758                         vm_fault_tws_insert(map, real_map, vaddr, object, offset);
2759         }
2760
2761         kr = vm_fault_page(object, offset, fault_type,
2762                            (change_wiring && !wired),
2763                            interruptible,
2764                            lo_offset, hi_offset, behavior,
2765                            &prot, &result_page, &top_page,
2766                            &type_of_fault,
2767                            &error_code, map->no_zero_fill, FALSE, map, vaddr);
2768
2769         /*
2770          *      If we didn't succeed, lose the object reference immediately.
2771          */
2772
2773         if (kr != VM_FAULT_SUCCESS)
2774                 vm_object_deallocate(object);
2775
2776         /*
2777          *      See why we failed, and take corrective action.
2778          */
2779
2780         switch (kr) {
2781                 case VM_FAULT_SUCCESS:
2782                         break;
2783                 case VM_FAULT_MEMORY_SHORTAGE:
2784                         if (vm_page_wait((change_wiring) ?
2785                                          THREAD_UNINT :
2786                                          THREAD_ABORTSAFE))
2787                                 goto RetryFault;
2788                         /* fall thru */
2789                 case VM_FAULT_INTERRUPTED:
2790                         kr = KERN_ABORTED;
2791                         goto done;
2792                 case VM_FAULT_RETRY:
2793                         goto RetryFault;
2794                 case VM_FAULT_FICTITIOUS_SHORTAGE:
2795                         vm_page_more_fictitious();
2796                         goto RetryFault;
2797                 case VM_FAULT_MEMORY_ERROR:
2798                         if (error_code)
2799                                 kr = error_code;
2800                         else
2801                                 kr = KERN_MEMORY_ERROR;
2802                         goto done;
2803         }
2804
2805         m = result_page;
2806
2807         if(m != VM_PAGE_NULL) {
2808                 assert((change_wiring && !wired) ?
2809                     (top_page == VM_PAGE_NULL) :
2810                     ((top_page == VM_PAGE_NULL) == (m->object == object)));
2811         }
2812
2813         /*
2814          *      How to clean up the result of vm_fault_page.  This
2815          *      happens whether the mapping is entered or not.
2816          */
2817
2818 #define UNLOCK_AND_DEALLOCATE                           \
2819         MACRO_BEGIN                                     \
2820         vm_fault_cleanup(m->object, top_page);          \
2821         vm_object_deallocate(object);                   \
2822         MACRO_END
2823
2824         /*
2825          *      What to do with the resulting page from vm_fault_page
2826          *      if it doesn't get entered into the physical map:
2827          */
2828
2829 #define RELEASE_PAGE(m)                                 \
2830         MACRO_BEGIN                                     \
2831         PAGE_WAKEUP_DONE(m);                            \
2832         vm_page_lock_queues();                          \
2833         if (!m->active && !m->inactive)                 \
2834                 vm_page_activate(m);                    \
2835         vm_page_unlock_queues();                        \
2836         MACRO_END
2837
2838         /*
2839          *      We must verify that the maps have not changed
2840          *      since our last lookup.
2841          */
2842
2843         if(m != VM_PAGE_NULL) {
2844                 old_copy_object = m->object->copy;
2845                 vm_object_unlock(m->object);
2846         } else {
2847                 old_copy_object = VM_OBJECT_NULL;
2848         }
2849         if ((map != original_map) || !vm_map_verify(map, &version)) {
2850                 vm_object_t             retry_object;
2851                 vm_object_offset_t      retry_offset;
2852                 vm_prot_t               retry_prot;
2853
2854                 /*
2855                  *      To avoid trying to write_lock the map while another
2856                  *      thread has it read_locked (in vm_map_pageable), we
2857                  *      do not try for write permission.  If the page is
2858                  *      still writable, we will get write permission.  If it
2859                  *      is not, or has been marked needs_copy, we enter the
2860                  *      mapping without write permission, and will merely
2861                  *      take another fault.
2862                  */
2863                 map = original_map;
2864                 vm_map_lock_read(map);
2865                 kr = vm_map_lookup_locked(&map, vaddr,
2866                                    fault_type & ~VM_PROT_WRITE, &version,
2867                                    &retry_object, &retry_offset, &retry_prot,
2868                                    &wired, &behavior, &lo_offset, &hi_offset,
2869                                    &real_map);
2870                 pmap = real_map->pmap;
2871
2872                 if (kr != KERN_SUCCESS) {
2873                         vm_map_unlock_read(map);
2874                         if(m != VM_PAGE_NULL) {
2875                                 vm_object_lock(m->object);
2876                                 RELEASE_PAGE(m);
2877                                 UNLOCK_AND_DEALLOCATE;
2878                         } else {
2879                                 vm_object_deallocate(object);
2880                         }
2881                         goto done;
2882                 }
2883
2884                 vm_object_unlock(retry_object);
2885                 if(m != VM_PAGE_NULL) {
2886                         vm_object_lock(m->object);
2887                 } else {
2888                         vm_object_lock(object);
2889                 }
2890
2891                 if ((retry_object != object) ||
2892                     (retry_offset != offset)) {
2893                         vm_map_unlock_read(map);
2894                         if(real_map != map)
2895                                 vm_map_unlock(real_map);
2896                         if(m != VM_PAGE_NULL) {
2897                                 RELEASE_PAGE(m);
2898                                 UNLOCK_AND_DEALLOCATE;
2899                         } else {
2900                                 vm_object_deallocate(object);
2901                         }
2902                         goto RetryFault;
2903                 }
2904
2905                 /*
2906                  *      Check whether the protection has changed or the object
2907                  *      has been copied while we left the map unlocked.
2908                  */
2909                 prot &= retry_prot;
2910                 if(m != VM_PAGE_NULL) {
2911                         vm_object_unlock(m->object);
2912                 } else {
2913                         vm_object_unlock(object);
2914                 }
2915         }
2916         if(m != VM_PAGE_NULL) {
2917                 vm_object_lock(m->object);
2918         } else {
2919                 vm_object_lock(object);
2920         }
2921
2922         /*
2923          *      If the copy object changed while the top-level object
2924          *      was unlocked, then we must take away write permission.
2925          */
2926
2927         if(m != VM_PAGE_NULL) {
2928                 if (m->object->copy != old_copy_object)
2929                         prot &= ~VM_PROT_WRITE;
2930         }
2931
2932         /*
2933          *      If we want to wire down this page, but no longer have
2934          *      adequate permissions, we must start all over.
2935          */
2936
2937         if (wired && (fault_type != (prot|VM_PROT_WRITE))) {
2938                 vm_map_verify_done(map, &version);
2939                 if(real_map != map)
2940                         vm_map_unlock(real_map);
2941                 if(m != VM_PAGE_NULL) {
2942                         RELEASE_PAGE(m);
2943                         UNLOCK_AND_DEALLOCATE;
2944                 } else {
2945                         vm_object_deallocate(object);
2946                 }
2947                 goto RetryFault;
2948         }
2949
2950         /*
2951          *      Put this page into the physical map.
2952          *      We had to do the unlock above because pmap_enter
2953          *      may cause other faults.  The page may be on
2954          *      the pageout queues.  If the pageout daemon comes
2955          *      across the page, it will remove it from the queues.
2956          */
2957         need_activation = FALSE;
2958
2959         if (m != VM_PAGE_NULL) {
2960                 if (m->no_isync == TRUE) {
2961                         pmap_sync_page_data_phys(m->phys_page);
2962
2963                         if ((type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) {
2964                                 /*
2965                                  * found it in the cache, but this
2966                                  * is the first fault-in of the page (no_isync == TRUE)
2967                                  * so it must have come in as part of
2968                                  * a cluster... account 1 pagein against it
2969                                  */
2970                                  VM_STAT(pageins++);
2971                                  current_task()->pageins++;
2972
2973                                  type_of_fault = DBG_PAGEIN_FAULT;
2974                         }
2975                         if (m->clustered) {
2976                                 need_activation = TRUE;
2977                         }
2978                         m->no_isync = FALSE;
2979                 }
2980                 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2981
2982                 if(caller_pmap) {
2983                         PMAP_ENTER(caller_pmap,
2984                                         caller_pmap_addr, m,
2985                                         prot, cache_attr, wired);
2986                 } else {
2987                         PMAP_ENTER(pmap, vaddr, m,
2988                                         prot, cache_attr, wired);
2989                 }
2990
2991                 /*
2992                  * Add working set information for private objects here.
2993                  */
2994                 if (m->object->private) {
2995                         write_startup_file =
2996                                 vm_fault_tws_insert(map, real_map, vaddr,
2997                                             m->object, m->offset);
2998                 }
2999         } else {
3000
3001                 vm_map_entry_t          entry;
3002                 vm_map_offset_t         laddr;
3003                 vm_map_offset_t         ldelta, hdelta;
3004
3005                 /*
3006                  * do a pmap block mapping from the physical address
3007                  * in the object
3008                  */
3009
3010 #ifndef i386
3011                 /* While we do not worry about execution protection in   */
3012                 /* general, certian pages may have instruction execution */
3013                 /* disallowed.  We will check here, and if not allowed   */
3014                 /* to execute, we return with a protection failure.      */
3015
3016                 if((fault_type & VM_PROT_EXECUTE) &&
3017                         (!pmap_eligible_for_execute((ppnum_t)
3018                                 (object->shadow_offset >> 12)))) {
3019
3020                         vm_map_verify_done(map, &version);
3021                         if(real_map != map)
3022                                 vm_map_unlock(real_map);
3023                         vm_fault_cleanup(object, top_page);
3024                         vm_object_deallocate(object);
3025                         kr = KERN_PROTECTION_FAILURE;
3026                         goto done;
3027                 }
3028 #endif  /* !i386 */
3029
3030                 if(real_map != map) {
3031                         vm_map_unlock(real_map);
3032                 }
3033                 if (original_map != map) {
3034                         vm_map_unlock_read(map);
3035                         vm_map_lock_read(original_map);
3036                         map = original_map;
3037                 }
3038                 real_map = map;
3039
3040                 laddr = vaddr;
3041                 hdelta = 0xFFFFF000;
3042                 ldelta = 0xFFFFF000;
3043
3044
3045                 while(vm_map_lookup_entry(map, laddr, &entry)) {
3046                         if(ldelta > (laddr - entry->vme_start))
3047                                 ldelta = laddr - entry->vme_start;
3048                         if(hdelta > (entry->vme_end - laddr))
3049                                 hdelta = entry->vme_end - laddr;
3050                         if(entry->is_sub_map) {
3051
3052                                 laddr = (laddr - entry->vme_start)
3053                                                         + entry->offset;
3054                                 vm_map_lock_read(entry->object.sub_map);
3055                                 if(map != real_map)
3056                                         vm_map_unlock_read(map);
3057                                 if(entry->use_pmap) {
3058                                         vm_map_unlock_read(real_map);
3059                                         real_map = entry->object.sub_map;
3060                                 }
3061                                 map = entry->object.sub_map;
3062
3063                         } else {
3064                                 break;
3065                         }
3066                 }
3067
3068                 if(vm_map_lookup_entry(map, laddr, &entry) &&
3069                    (entry->object.vm_object != NULL) &&
3070                    (entry->object.vm_object == object)) {
3071
3072                         vm_map_offset_t phys_offset;
3073
3074                         phys_offset = (entry->object.vm_object->shadow_offset
3075                                        + entry->offset
3076                                        + laddr
3077                                        - entry->vme_start);
3078                         phys_offset -= ldelta;
3079                         if(caller_pmap) {
3080                                 /* Set up a block mapped area */
3081                                 pmap_map_block(
3082                                         caller_pmap,
3083                                         (addr64_t)(caller_pmap_addr - ldelta),
3084                                         phys_offset >> 12,
3085                                         (ldelta + hdelta) >> 12,
3086                                         prot,
3087                                         (VM_WIMG_MASK & (int)object->wimg_bits),
3088                                         0);
3089                         } else {
3090                                 /* Set up a block mapped area */
3091                                 pmap_map_block(
3092                                         real_map->pmap,
3093                                         (addr64_t)(vaddr - ldelta),
3094                                         phys_offset >> 12,
3095                                         (ldelta + hdelta) >> 12,
3096                                         prot,
3097                                         (VM_WIMG_MASK & (int)object->wimg_bits),
3098                                         0);
3099                         }
3100                 }
3101
3102         }
3103
3104         /*
3105          *      If the page is not wired down and isn't already
3106          *      on a pageout queue, then put it where the
3107          *      pageout daemon can find it.
3108          */
3109         if(m != VM_PAGE_NULL) {
3110                 vm_page_lock_queues();
3111
3112                 if (m->clustered) {
3113                         vm_pagein_cluster_used++;
3114                         m->clustered = FALSE;
3115                 }
3116                 m->reference = TRUE;
3117
3118                 if (change_wiring) {
3119                         if (wired)
3120                                 vm_page_wire(m);
3121                         else
3122                                 vm_page_unwire(m);
3123                 }
3124 #if     VM_FAULT_STATIC_CONFIG
3125                 else {
3126                         if ((!m->active && !m->inactive) || ((need_activation == TRUE) && !m->active))
3127                                 vm_page_activate(m);
3128                 }
3129 #else
3130                 else if (software_reference_bits) {
3131                         if (!m->active && !m->inactive)
3132                                 vm_page_activate(m);
3133                         m->reference = TRUE;
3134                 } else {
3135                         vm_page_activate(m);
3136                 }
3137 #endif
3138                 vm_page_unlock_queues();
3139         }
3140
3141         /*
3142          *      Unlock everything, and return
3143          */
3144
3145         vm_map_verify_done(map, &version);
3146         if(real_map != map)
3147                 vm_map_unlock(real_map);
3148         if(m != VM_PAGE_NULL) {
3149                 PAGE_WAKEUP_DONE(m);
3150                 UNLOCK_AND_DEALLOCATE;
3151         } else {
3152                 vm_fault_cleanup(object, top_page);
3153                 vm_object_deallocate(object);
3154         }
3155         kr = KERN_SUCCESS;
3156
3157 #undef  UNLOCK_AND_DEALLOCATE
3158 #undef  RELEASE_PAGE
3159
3160     done:
3161         if(write_startup_file)
3162                 tws_send_startup_info(current_task());
3163
3164         thread_interrupt_level(interruptible_state);
3165
3166         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
3167                               vaddr,
3168                               type_of_fault & 0xff,
3169                               kr,
3170                               type_of_fault >> 8,
3171                               0);
3172
3173         return(kr);
3174 }
3175
3176 /*
3177  *      vm_fault_wire:
3178  *
3179  *      Wire down a range of virtual addresses in a map.
3180  */
3181 kern_return_t
3182 vm_fault_wire(
3183         vm_map_t        map,
3184         vm_map_entry_t  entry,
3185         pmap_t          pmap,
3186         vm_map_offset_t pmap_addr)
3187 {
3188
3189         register vm_map_offset_t        va;
3190         register vm_map_offset_t        end_addr = entry->vme_end;
3191         register kern_return_t  rc;
3192
3193         assert(entry->in_transition);
3194
3195         if ((entry->object.vm_object != NULL) &&
3196                         !entry->is_sub_map &&
3197                         entry->object.vm_object->phys_contiguous) {
3198                 return KERN_SUCCESS;
3199         }
3200
3201         /*
3202          *      Inform the physical mapping system that the
3203          *      range of addresses may not fault, so that
3204          *      page tables and such can be locked down as well.
3205          */
3206
3207         pmap_pageable(pmap, pmap_addr,
3208                 pmap_addr + (end_addr - entry->vme_start), FALSE);
3209
3210         /*
3211          *      We simulate a fault to get the page and enter it
3212          *      in the physical map.
3213          */
3214
3215         for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3216                 if ((rc = vm_fault_wire_fast(
3217                         map, va, entry, pmap,
3218                         pmap_addr + (va - entry->vme_start)
3219                         )) != KERN_SUCCESS) {
3220                         rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
3221                                 (pmap == kernel_pmap) ?
3222                                         THREAD_UNINT : THREAD_ABORTSAFE,
3223                                 pmap, pmap_addr + (va - entry->vme_start));
3224                 }
3225
3226                 if (rc != KERN_SUCCESS) {
3227                         struct vm_map_entry     tmp_entry = *entry;
3228
3229                         /* unwire wired pages */
3230                         tmp_entry.vme_end = va;
3231                         vm_fault_unwire(map,
3232                                 &tmp_entry, FALSE, pmap, pmap_addr);
3233
3234                         return rc;
3235                 }
3236         }
3237         return KERN_SUCCESS;
3238 }
3239
3240 /*
3241  *      vm_fault_unwire:
3242  *
3243  *      Unwire a range of virtual addresses in a map.
3244  */
3245 void
3246 vm_fault_unwire(
3247         vm_map_t        map,
3248         vm_map_entry_t  entry,
3249         boolean_t       deallocate,
3250         pmap_t          pmap,
3251         vm_map_offset_t pmap_addr)
3252 {
3253         register vm_map_offset_t        va;
3254         register vm_map_offset_t        end_addr = entry->vme_end;
3255         vm_object_t             object;
3256
3257         object = (entry->is_sub_map)
3258                         ? VM_OBJECT_NULL : entry->object.vm_object;
3259
3260         /*
3261          *      Since the pages are wired down, we must be able to
3262          *      get their mappings from the physical map system.
3263          */
3264
3265         for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3266                 pmap_change_wiring(pmap,
3267                         pmap_addr + (va - entry->vme_start), FALSE);
3268
3269                 if (object == VM_OBJECT_NULL) {
3270                         (void) vm_fault(map, va, VM_PROT_NONE,
3271                                         TRUE, THREAD_UNINT, pmap, pmap_addr);
3272                 } else if (object->phys_contiguous) {
3273                         continue;
3274                 } else {
3275                         vm_prot_t       prot;
3276                         vm_page_t       result_page;
3277                         vm_page_t       top_page;
3278                         vm_object_t     result_object;
3279                         vm_fault_return_t result;
3280
3281                         do {
3282                                 prot = VM_PROT_NONE;
3283
3284                                 vm_object_lock(object);
3285                                 vm_object_paging_begin(object);
3286                                 XPR(XPR_VM_FAULT,
3287                                         "vm_fault_unwire -> vm_fault_page\n",
3288                                         0,0,0,0,0);
3289                                 result = vm_fault_page(object,
3290                                                 entry->offset +
3291                                                   (va - entry->vme_start),
3292                                                 VM_PROT_NONE, TRUE,
3293                                                 THREAD_UNINT,
3294                                                 entry->offset,
3295                                                 entry->offset +
3296                                                        (entry->vme_end
3297                                                         - entry->vme_start),
3298                                                 entry->behavior,
3299                                                 &prot,
3300                                                 &result_page,
3301                                                 &top_page,
3302                                                 (int *)0,
3303                                                 0, map->no_zero_fill,
3304                                                 FALSE, NULL, 0);
3305                         } while (result == VM_FAULT_RETRY);
3306
3307                         if (result != VM_FAULT_SUCCESS)
3308                                 panic("vm_fault_unwire: failure");
3309
3310                         result_object = result_page->object;
3311                         if (deallocate) {
3312                                 assert(!result_page->fictitious);
3313                                 pmap_disconnect(result_page->phys_page);
3314                                 VM_PAGE_FREE(result_page);
3315                         } else {
3316                                 vm_page_lock_queues();
3317                                 vm_page_unwire(result_page);
3318                                 vm_page_unlock_queues();
3319                                 PAGE_WAKEUP_DONE(result_page);
3320                         }
3321
3322                         vm_fault_cleanup(result_object, top_page);
3323                 }
3324         }
3325
3326         /*
3327          *      Inform the physical mapping system that the range
3328          *      of addresses may fault, so that page tables and
3329          *      such may be unwired themselves.
3330          */
3331
3332         pmap_pageable(pmap, pmap_addr,
3333                 pmap_addr + (end_addr - entry->vme_start), TRUE);
3334
3335 }
3336
3337 /*
3338  *      vm_fault_wire_fast:
3339  *
3340  *      Handle common case of a wire down page fault at the given address.
3341  *      If successful, the page is inserted into the associated physical map.
3342  *      The map entry is passed in to avoid the overhead of a map lookup.
3343  *
3344  *      NOTE: the given address should be truncated to the
3345  *      proper page address.
3346  *
3347  *      KERN_SUCCESS is returned if the page fault is handled; otherwise,
3348  *      a standard error specifying why the fault is fatal is returned.
3349  *
3350  *      The map in question must be referenced, and remains so.
3351  *      Caller has a read lock on the map.
3352  *
3353  *      This is a stripped version of vm_fault() for wiring pages.  Anything
3354  *      other than the common case will return KERN_FAILURE, and the caller
3355  *      is expected to call vm_fault().
3356  */
3357 kern_return_t
3358 vm_fault_wire_fast(
3359         __unused vm_map_t       map,
3360         vm_map_offset_t va,
3361         vm_map_entry_t  entry,
3362         pmap_t                  pmap,
3363         vm_map_offset_t pmap_addr)
3364 {
3365         vm_object_t             object;
3366         vm_object_offset_t      offset;
3367         register vm_page_t      m;
3368         vm_prot_t               prot;
3369         thread_t                thread = current_thread();
3370         unsigned int            cache_attr;
3371
3372         VM_STAT(faults++);
3373
3374         if (thread != THREAD_NULL && thread->task != TASK_NULL)
3375           thread->task->faults++;
3376
3377 /*
3378  *      Recovery actions
3379  */
3380
3381 #undef  RELEASE_PAGE
3382 #define RELEASE_PAGE(m) {                               \
3383         PAGE_WAKEUP_DONE(m);                            \
3384         vm_page_lock_queues();                          \
3385         vm_page_unwire(m);                              \
3386         vm_page_unlock_queues();                        \
3387 }
3388
3389
3390 #undef  UNLOCK_THINGS
3391 #define UNLOCK_THINGS   {                               \
3392         vm_object_paging_end(object);                      \
3393         vm_object_unlock(object);                          \
3394 }
3395
3396 #undef  UNLOCK_AND_DEALLOCATE
3397 #define UNLOCK_AND_DEALLOCATE   {                       \
3398         UNLOCK_THINGS;                                  \
3399         vm_object_deallocate(object);                   \
3400 }
3401 /*
3402  *      Give up and have caller do things the hard way.
3403  */
3404
3405 #define GIVE_UP {                                       \
3406         UNLOCK_AND_DEALLOCATE;                          \
3407         return(KERN_FAILURE);                           \
3408 }
3409
3410
3411         /*
3412          *      If this entry is not directly to a vm_object, bail out.
3413          */
3414         if (entry->is_sub_map)
3415                 return(KERN_FAILURE);
3416
3417         /*
3418          *      Find the backing store object and offset into it.
3419          */
3420
3421         object = entry->object.vm_object;
3422         offset = (va - entry->vme_start) + entry->offset;
3423         prot = entry->protection;
3424
3425         /*
3426          *      Make a reference to this object to prevent its
3427          *      disposal while we are messing with it.
3428          */
3429
3430         vm_object_lock(object);
3431         assert(object->ref_count > 0);
3432         object->ref_count++;
3433         vm_object_res_reference(object);
3434         vm_object_paging_begin(object);
3435
3436         /*
3437          *      INVARIANTS (through entire routine):
3438          *
3439          *      1)      At all times, we must either have the object
3440          *              lock or a busy page in some object to prevent
3441          *              some other thread from trying to bring in
3442          *              the same page.
3443          *
3444          *      2)      Once we have a busy page, we must remove it from
3445          *              the pageout queues, so that the pageout daemon
3446          *              will not grab it away.
3447          *
3448          */
3449
3450         /*
3451          *      Look for page in top-level object.  If it's not there or
3452          *      there's something going on, give up.
3453          * ENCRYPTED SWAP: use the slow fault path, since we'll need to
3454          * decrypt the page before wiring it down.
3455          */
3456         m = vm_page_lookup(object, offset);
3457         if ((m == VM_PAGE_NULL) || (m->busy) || (m->encrypted) ||
3458             (m->unusual && ( m->error || m->restart || m->absent ||
3459                                 prot & m->page_lock))) {
3460
3461                 GIVE_UP;
3462         }
3463         ASSERT_PAGE_DECRYPTED(m);
3464
3465         /*
3466          *      Wire the page down now.  All bail outs beyond this
3467          *      point must unwire the page.
3468          */
3469
3470         vm_page_lock_queues();
3471         vm_page_wire(m);
3472         vm_page_unlock_queues();
3473
3474         /*
3475          *      Mark page busy for other threads.
3476          */
3477         assert(!m->busy);
3478         m->busy = TRUE;
3479         assert(!m->absent);
3480
3481         /*
3482          *      Give up if the page is being written and there's a copy object
3483          */
3484         if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
3485                 RELEASE_PAGE(m);
3486                 GIVE_UP;
3487         }
3488
3489         /*
3490          *      Put this page into the physical map.
3491          *      We have to unlock the object because pmap_enter
3492          *      may cause other faults.
3493          */
3494         if (m->no_isync == TRUE) {
3495                 pmap_sync_page_data_phys(m->phys_page);
3496
3497                 m->no_isync = FALSE;
3498         }
3499
3500         cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3501
3502         PMAP_ENTER(pmap, pmap_addr, m, prot, cache_attr, TRUE);
3503
3504         /*
3505          *      Unlock everything, and return
3506          */
3507
3508         PAGE_WAKEUP_DONE(m);
3509         UNLOCK_AND_DEALLOCATE;
3510
3511         return(KERN_SUCCESS);
3512
3513 }
3514
3515 /*
3516  *      Routine:        vm_fault_copy_cleanup
3517  *      Purpose:
3518  *              Release a page used by vm_fault_copy.
3519  */
3520
3521 void
3522 vm_fault_copy_cleanup(
3523         vm_page_t       page,
3524         vm_page_t       top_page)
3525 {
3526         vm_object_t     object = page->object;
3527
3528         vm_object_lock(object);
3529         PAGE_WAKEUP_DONE(page);
3530         vm_page_lock_queues();
3531         if (!page->active && !page->inactive)
3532                 vm_page_activate(page);
3533         vm_page_unlock_queues();
3534         vm_fault_cleanup(object, top_page);
3535 }
3536
3537 void
3538 vm_fault_copy_dst_cleanup(
3539         vm_page_t       page)
3540 {
3541         vm_object_t     object;
3542
3543         if (page != VM_PAGE_NULL) {
3544                 object = page->object;
3545                 vm_object_lock(object);
3546                 vm_page_lock_queues();
3547                 vm_page_unwire(page);
3548                 vm_page_unlock_queues();
3549                 vm_object_paging_end(object);
3550                 vm_object_unlock(object);
3551         }
3552 }
3553
3554 /*
3555  *      Routine:        vm_fault_copy
3556  *
3557  *      Purpose:
3558  *              Copy pages from one virtual memory object to another --
3559  *              neither the source nor destination pages need be resident.
3560  *
3561  *              Before actually copying a page, the version associated with
3562  *              the destination address map wil be verified.
3563  *
3564  *      In/out conditions:
3565  *              The caller must hold a reference, but not a lock, to
3566  *              each of the source and destination objects and to the
3567  *              destination map.
3568  *
3569  *      Results:
3570  *              Returns KERN_SUCCESS if no errors were encountered in
3571  *              reading or writing the data.  Returns KERN_INTERRUPTED if
3572  *              the operation was interrupted (only possible if the
3573  *              "interruptible" argument is asserted).  Other return values
3574  *              indicate a permanent error in copying the data.
3575  *
3576  *              The actual amount of data copied will be returned in the
3577  *              "copy_size" argument.  In the event that the destination map
3578  *              verification failed, this amount may be less than the amount
3579  *              requested.
3580  */
3581 kern_return_t
3582 vm_fault_copy(
3583         vm_object_t             src_object,
3584         vm_object_offset_t      src_offset,
3585         vm_map_size_t           *copy_size,             /* INOUT */
3586         vm_object_t             dst_object,
3587         vm_object_offset_t      dst_offset,
3588         vm_map_t                dst_map,
3589         vm_map_version_t         *dst_version,
3590         int                     interruptible)
3591 {
3592         vm_page_t               result_page;
3593
3594         vm_page_t               src_page;
3595         vm_page_t               src_top_page;
3596         vm_prot_t               src_prot;
3597
3598         vm_page_t               dst_page;
3599         vm_page_t               dst_top_page;
3600         vm_prot_t               dst_prot;
3601
3602         vm_map_size_t           amount_left;
3603         vm_object_t             old_copy_object;
3604         kern_return_t           error = 0;
3605
3606         vm_map_size_t           part_size;
3607
3608         /*
3609          * In order not to confuse the clustered pageins, align
3610          * the different offsets on a page boundary.
3611          */
3612         vm_object_offset_t      src_lo_offset = vm_object_trunc_page(src_offset);
3613         vm_object_offset_t      dst_lo_offset = vm_object_trunc_page(dst_offset);
3614         vm_object_offset_t      src_hi_offset = vm_object_round_page(src_offset + *copy_size);
3615         vm_object_offset_t      dst_hi_offset = vm_object_round_page(dst_offset + *copy_size);
3616
3617 #define RETURN(x)                                       \
3618         MACRO_BEGIN                                     \
3619         *copy_size -= amount_left;                      \
3620         MACRO_RETURN(x);                                \
3621         MACRO_END
3622
3623         amount_left = *copy_size;
3624         do { /* while (amount_left > 0) */
3625                 /*
3626                  * There may be a deadlock if both source and destination
3627                  * pages are the same. To avoid this deadlock, the copy must
3628                  * start by getting the destination page in order to apply
3629                  * COW semantics if any.
3630                  */
3631
3632         RetryDestinationFault: ;
3633
3634                 dst_prot = VM_PROT_WRITE|VM_PROT_READ;
3635
3636                 vm_object_lock(dst_object);
3637                 vm_object_paging_begin(dst_object);
3638
3639                 XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3640                 switch (vm_fault_page(dst_object,
3641                                       vm_object_trunc_page(dst_offset),
3642                                       VM_PROT_WRITE|VM_PROT_READ,
3643                                       FALSE,
3644                                       interruptible,
3645                                       dst_lo_offset,
3646                                       dst_hi_offset,
3647                                       VM_BEHAVIOR_SEQUENTIAL,
3648                                       &dst_prot,
3649                                       &dst_page,
3650                                       &dst_top_page,
3651                                       (int *)0,
3652                                       &error,
3653                                       dst_map->no_zero_fill,
3654                                       FALSE, NULL, 0)) {
3655                 case VM_FAULT_SUCCESS:
3656                         break;
3657                 case VM_FAULT_RETRY:
3658                         goto RetryDestinationFault;
3659                 case VM_FAULT_MEMORY_SHORTAGE:
3660                         if (vm_page_wait(interruptible))
3661                                 goto RetryDestinationFault;
3662                         /* fall thru */
3663                 case VM_FAULT_INTERRUPTED:
3664                         RETURN(MACH_SEND_INTERRUPTED);
3665                 case VM_FAULT_FICTITIOUS_SHORTAGE:
3666                         vm_page_more_fictitious();
3667                         goto RetryDestinationFault;
3668                 case VM_FAULT_MEMORY_ERROR:
3669                         if (error)
3670                                 return (error);
3671                         else
3672                                 return(KERN_MEMORY_ERROR);
3673                 }
3674                 assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
3675
3676                 old_copy_object = dst_page->object->copy;
3677
3678                 /*
3679                  * There exists the possiblity that the source and
3680                  * destination page are the same.  But we can't
3681                  * easily determine that now.  If they are the
3682                  * same, the call to vm_fault_page() for the
3683                  * destination page will deadlock.  To prevent this we
3684                  * wire the page so we can drop busy without having
3685                  * the page daemon steal the page.  We clean up the
3686                  * top page  but keep the paging reference on the object
3687                  * holding the dest page so it doesn't go away.
3688                  */
3689
3690                 vm_page_lock_queues();
3691                 vm_page_wire(dst_page);
3692                 vm_page_unlock_queues();
3693                 PAGE_WAKEUP_DONE(dst_page);
3694                 vm_object_unlock(dst_page->object);
3695
3696                 if (dst_top_page != VM_PAGE_NULL) {
3697                         vm_object_lock(dst_object);
3698                         VM_PAGE_FREE(dst_top_page);
3699                         vm_object_paging_end(dst_object);
3700                         vm_object_unlock(dst_object);
3701                 }
3702
3703         RetrySourceFault: ;
3704
3705                 if (src_object == VM_OBJECT_NULL) {
3706                         /*
3707                          *      No source object.  We will just
3708                          *      zero-fill the page in dst_object.
3709                          */
3710                         src_page = VM_PAGE_NULL;
3711                         result_page = VM_PAGE_NULL;
3712                 } else {
3713                         vm_object_lock(src_object);
3714                         src_page = vm_page_lookup(src_object,
3715                                                   vm_object_trunc_page(src_offset));
3716                         if (src_page == dst_page) {
3717                                 src_prot = dst_prot;
3718                                 result_page = VM_PAGE_NULL;
3719                         } else {
3720                                 src_prot = VM_PROT_READ;
3721                                 vm_object_paging_begin(src_object);
3722
3723                                 XPR(XPR_VM_FAULT,
3724                                         "vm_fault_copy(2) -> vm_fault_page\n",
3725                                         0,0,0,0,0);
3726                                 switch (vm_fault_page(src_object,
3727                                                       vm_object_trunc_page(src_offset),
3728                                                       VM_PROT_READ,
3729                                                       FALSE,
3730                                                       interruptible,
3731                                                       src_lo_offset,
3732                                                       src_hi_offset,
3733                                                       VM_BEHAVIOR_SEQUENTIAL,
3734                                                       &src_prot,
3735                                                       &result_page,
3736                                                       &src_top_page,
3737                                                       (int *)0,
3738                                                       &error,
3739                                                       FALSE,
3740                                                       FALSE, NULL, 0)) {
3741
3742                                 case VM_FAULT_SUCCESS:
3743                                         break;
3744                                 case VM_FAULT_RETRY:
3745                                         goto RetrySourceFault;
3746                                 case VM_FAULT_MEMORY_SHORTAGE:
3747                                         if (vm_page_wait(interruptible))
3748                                                 goto RetrySourceFault;
3749                                         /* fall thru */
3750                                 case VM_FAULT_INTERRUPTED:
3751                                         vm_fault_copy_dst_cleanup(dst_page);
3752                                         RETURN(MACH_SEND_INTERRUPTED);
3753                                 case VM_FAULT_FICTITIOUS_SHORTAGE:
3754                                         vm_page_more_fictitious();
3755                                         goto RetrySourceFault;
3756                                 case VM_FAULT_MEMORY_ERROR:
3757                                         vm_fault_copy_dst_cleanup(dst_page);
3758                                         if (error)
3759                                                 return (error);
3760                                         else
3761                                                 return(KERN_MEMORY_ERROR);
3762                                 }
3763
3764
3765                                 assert((src_top_page == VM_PAGE_NULL) ==
3766                                        (result_page->object == src_object));
3767                         }
3768                         assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
3769                         vm_object_unlock(result_page->object);
3770                 }
3771
3772                 if (!vm_map_verify(dst_map, dst_version)) {
3773                         if (result_page != VM_PAGE_NULL && src_page != dst_page)
3774                                 vm_fault_copy_cleanup(result_page, src_top_page);
3775                         vm_fault_copy_dst_cleanup(dst_page);
3776                         break;
3777                 }
3778
3779                 vm_object_lock(dst_page->object);
3780
3781                 if (dst_page->object->copy != old_copy_object) {
3782                         vm_object_unlock(dst_page->object);
3783                         vm_map_verify_done(dst_map, dst_version);
3784                         if (result_page != VM_PAGE_NULL && src_page != dst_page)
3785                                 vm_fault_copy_cleanup(result_page, src_top_page);
3786                         vm_fault_copy_dst_cleanup(dst_page);
3787                         break;
3788                 }
3789                 vm_object_unlock(dst_page->object);
3790
3791                 /*
3792                  *      Copy the page, and note that it is dirty
3793                  *      immediately.
3794                  */
3795
3796                 if (!page_aligned(src_offset) ||
3797                         !page_aligned(dst_offset) ||
3798                         !page_aligned(amount_left)) {
3799
3800                         vm_object_offset_t      src_po,
3801                                                 dst_po;
3802
3803                         src_po = src_offset - vm_object_trunc_page(src_offset);
3804                         dst_po = dst_offset - vm_object_trunc_page(dst_offset);
3805
3806                         if (dst_po > src_po) {
3807                                 part_size = PAGE_SIZE - dst_po;
3808                         } else {
3809                                 part_size = PAGE_SIZE - src_po;
3810                         }
3811                         if (part_size > (amount_left)){
3812                                 part_size = amount_left;
3813                         }
3814
3815                         if (result_page == VM_PAGE_NULL) {
3816                                 vm_page_part_zero_fill(dst_page,
3817                                                         dst_po, part_size);
3818                         } else {
3819                                 vm_page_part_copy(result_page, src_po,
3820                                         dst_page, dst_po, part_size);
3821                                 if(!dst_page->dirty){
3822                                         vm_object_lock(dst_object);
3823                                         dst_page->dirty = TRUE;
3824                                         vm_object_unlock(dst_page->object);
3825                                 }
3826
3827                         }
3828                 } else {
3829                         part_size = PAGE_SIZE;
3830
3831                         if (result_page == VM_PAGE_NULL)
3832                                 vm_page_zero_fill(dst_page);
3833                         else{
3834                                 vm_page_copy(result_page, dst_page);
3835                                 if(!dst_page->dirty){
3836                                         vm_object_lock(dst_object);
3837                                         dst_page->dirty = TRUE;
3838                                         vm_object_unlock(dst_page->object);
3839                                 }
3840                         }
3841
3842                 }
3843
3844                 /*
3845                  *      Unlock everything, and return
3846                  */
3847
3848                 vm_map_verify_done(dst_map, dst_version);
3849
3850                 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3851                         vm_fault_copy_cleanup(result_page, src_top_page);
3852                 vm_fault_copy_dst_cleanup(dst_page);
3853
3854                 amount_left -= part_size;
3855                 src_offset += part_size;
3856                 dst_offset += part_size;
3857         } while (amount_left > 0);
3858
3859         RETURN(KERN_SUCCESS);
3860 #undef  RETURN
3861
3862         /*NOTREACHED*/
3863 }
3864
3865 #ifdef  notdef
3866
3867 /*
3868  *      Routine:        vm_fault_page_overwrite
3869  *
3870  *      Description:
3871  *              A form of vm_fault_page that assumes that the
3872  *              resulting page will be overwritten in its entirety,
3873  *              making it unnecessary to obtain the correct *contents*
3874  *              of the page.
3875  *
3876  *      Implementation:
3877  *              XXX Untested.  Also unused.  Eventually, this technology
3878  *              could be used in vm_fault_copy() to advantage.
3879  */
3880 vm_fault_return_t
3881 vm_fault_page_overwrite(
3882         register
3883         vm_object_t             dst_object,
3884         vm_object_offset_t      dst_offset,
3885         vm_page_t               *result_page)   /* OUT */
3886 {
3887         register
3888         vm_page_t       dst_page;
3889         kern_return_t   wait_result;
3890
3891 #define interruptible   THREAD_UNINT    /* XXX */
3892
3893         while (TRUE) {
3894                 /*
3895                  *      Look for a page at this offset
3896                  */
3897
3898                 while ((dst_page = vm_page_lookup(dst_object, dst_offset))
3899                                  == VM_PAGE_NULL) {
3900                         /*
3901                          *      No page, no problem... just allocate one.
3902                          */
3903
3904                         dst_page = vm_page_alloc(dst_object, dst_offset);
3905                         if (dst_page == VM_PAGE_NULL) {
3906                                 vm_object_unlock(dst_object);
3907                                 VM_PAGE_WAIT();
3908                                 vm_object_lock(dst_object);
3909                                 continue;
3910                         }
3911
3912                         /*
3913                          *      Pretend that the memory manager
3914                          *      write-protected the page.
3915                          *
3916                          *      Note that we will be asking for write
3917                          *      permission without asking for the data
3918                          *      first.
3919                          */
3920
3921                         dst_page->overwriting = TRUE;
3922                         dst_page->page_lock = VM_PROT_WRITE;
3923                         dst_page->absent = TRUE;
3924                         dst_page->unusual = TRUE;
3925                         dst_object->absent_count++;
3926
3927                         break;
3928
3929                         /*
3930                          *      When we bail out, we might have to throw
3931                          *      away the page created here.
3932                          */
3933
3934 #define DISCARD_PAGE                                            \
3935         MACRO_BEGIN                                             \
3936         vm_object_lock(dst_object);                             \
3937         dst_page = vm_page_lookup(dst_object, dst_offset);      \
3938         if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3939                 VM_PAGE_FREE(dst_page);                         \
3940         vm_object_unlock(dst_object);                           \
3941         MACRO_END
3942                 }
3943
3944                 /*
3945                  *      If the page is write-protected...
3946                  */
3947
3948                 if (dst_page->page_lock & VM_PROT_WRITE) {
3949                         /*
3950                          *      ... and an unlock request hasn't been sent
3951                          */
3952
3953                         if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
3954                                 vm_prot_t       u;
3955                                 kern_return_t   rc;
3956
3957                                 /*
3958                                  *      ... then send one now.
3959                                  */
3960
3961                                 if (!dst_object->pager_ready) {
3962                                         wait_result = vm_object_assert_wait(dst_object,
3963                                                                 VM_OBJECT_EVENT_PAGER_READY,
3964                                                                 interruptible);
3965                                         vm_object_unlock(dst_object);
3966                                         if (wait_result == THREAD_WAITING)
3967                                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
3968                                         if (wait_result != THREAD_AWAKENED) {
3969                                                 DISCARD_PAGE;
3970                                                 return(VM_FAULT_INTERRUPTED);
3971                                         }
3972                                         continue;
3973                                 }
3974
3975                                 u = dst_page->unlock_request |= VM_PROT_WRITE;
3976                                 vm_object_unlock(dst_object);
3977
3978                                 if ((rc = memory_object_data_unlock(
3979                                                 dst_object->pager,
3980                                                 dst_offset + dst_object->paging_offset,
3981                                                 PAGE_SIZE,
3982                                                 u)) != KERN_SUCCESS) {
3983                                         if (vm_fault_debug)
3984                                             printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3985                                         DISCARD_PAGE;
3986                                         return((rc == MACH_SEND_INTERRUPTED) ?
3987                                                 VM_FAULT_INTERRUPTED :
3988                                                 VM_FAULT_MEMORY_ERROR);
3989                                 }
3990                                 vm_object_lock(dst_object);
3991                                 continue;
3992                         }
3993
3994                         /* ... fall through to wait below */
3995                 } else {
3996                         /*
3997                          *      If the page isn't being used for other
3998                          *      purposes, then we're done.
3999                          */
4000                         if ( ! (dst_page->busy || dst_page->absent ||
4001                                 dst_page->error || dst_page->restart) )
4002                                 break;
4003                 }
4004
4005                 wait_result = PAGE_ASSERT_WAIT(dst_page, interruptible);
4006                 vm_object_unlock(dst_object);
4007                 if (wait_result == THREAD_WAITING)
4008                         wait_result = thread_block(THREAD_CONTINUE_NULL);
4009                 if (wait_result != THREAD_AWAKENED) {
4010                         DISCARD_PAGE;
4011                         return(VM_FAULT_INTERRUPTED);
4012                 }
4013         }
4014
4015         *result_page = dst_page;
4016         return(VM_FAULT_SUCCESS);
4017
4018 #undef  interruptible
4019 #undef  DISCARD_PAGE
4020 }
4021
4022 #endif  /* notdef */
4023
4024 #if     VM_FAULT_CLASSIFY
4025 /*
4026  *      Temporary statistics gathering support.
4027  */
4028
4029 /*
4030  *      Statistics arrays:
4031  */
4032 #define VM_FAULT_TYPES_MAX      5
4033 #define VM_FAULT_LEVEL_MAX      8
4034
4035 int     vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
4036
4037 #define VM_FAULT_TYPE_ZERO_FILL 0
4038 #define VM_FAULT_TYPE_MAP_IN    1
4039 #define VM_FAULT_TYPE_PAGER     2
4040 #define VM_FAULT_TYPE_COPY      3
4041 #define VM_FAULT_TYPE_OTHER     4
4042
4043
4044 void
4045 vm_fault_classify(vm_object_t           object,
4046                   vm_object_offset_t    offset,
4047                   vm_prot_t             fault_type)
4048 {
4049         int             type, level = 0;
4050         vm_page_t       m;
4051
4052         while (TRUE) {
4053                 m = vm_page_lookup(object, offset);
4054                 if (m != VM_PAGE_NULL) {
4055                         if (m->busy || m->error || m->restart || m->absent ||
4056                             fault_type & m->page_lock) {
4057                                 type = VM_FAULT_TYPE_OTHER;
4058                                 break;
4059                         }
4060                         if (((fault_type & VM_PROT_WRITE) == 0) ||
4061                             ((level == 0) && object->copy == VM_OBJECT_NULL)) {
4062                                 type = VM_FAULT_TYPE_MAP_IN;
4063                                 break;
4064                         }
4065                         type = VM_FAULT_TYPE_COPY;
4066                         break;
4067                 }
4068                 else {
4069                         if (object->pager_created) {
4070                                 type = VM_FAULT_TYPE_PAGER;
4071                                 break;
4072                         }
4073                         if (object->shadow == VM_OBJECT_NULL) {
4074                                 type = VM_FAULT_TYPE_ZERO_FILL;
4075                                 break;
4076                         }
4077
4078                         offset += object->shadow_offset;
4079                         object = object->shadow;
4080                         level++;
4081                         continue;
4082                 }
4083         }
4084
4085         if (level > VM_FAULT_LEVEL_MAX)
4086                 level = VM_FAULT_LEVEL_MAX;
4087
4088         vm_fault_stats[type][level] += 1;
4089
4090         return;
4091 }
4092
4093 /* cleanup routine to call from debugger */
4094
4095 void
4096 vm_fault_classify_init(void)
4097 {
4098         int type, level;
4099
4100         for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
4101                 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
4102                         vm_fault_stats[type][level] = 0;
4103                 }
4104         }
4105
4106         return;
4107 }
4108 #endif  /* VM_FAULT_CLASSIFY */