osfmk/vm/vm_fault.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm_fault.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *
  62  *      Page fault handling module.
  63  */
  64
  65 #include <mach_cluster_stats.h>
  66 #include <mach_pagemap.h>
  67 #include <mach_kdb.h>
  68
  69 #include <mach/mach_types.h>
  70 #include <mach/kern_return.h>
  71 #include <mach/message.h>       /* for error codes */
  72 #include <mach/vm_param.h>
  73 #include <mach/vm_behavior.h>
  74 #include <mach/memory_object.h>
  75                                 /* For memory_object_data_{request,unlock} */
  76
  77 #include <kern/kern_types.h>
  78 #include <kern/host_statistics.h>
  79 #include <kern/counters.h>
  80 #include <kern/task.h>
  81 #include <kern/thread.h>
  82 #include <kern/sched_prim.h>
  83 #include <kern/host.h>
  84 #include <kern/xpr.h>
  85 #include <kern/mach_param.h>
  86 #include <kern/macro_help.h>
  87 #include <kern/zalloc.h>
  88 #include <kern/misc_protos.h>
  89
  90 #include <ppc/proc_reg.h>
  91
  92 #include <vm/vm_fault.h>
  93 #include <vm/task_working_set.h>
  94 #include <vm/vm_map.h>
  95 #include <vm/vm_object.h>
  96 #include <vm/vm_page.h>
  97 #include <vm/vm_kern.h>
  98 #include <vm/pmap.h>
  99 #include <vm/vm_pageout.h>
 100 #include <vm/vm_protos.h>
 101
 102 #include <sys/kdebug.h>
 103
 104 #define VM_FAULT_CLASSIFY       0
 105 #define VM_FAULT_STATIC_CONFIG  1
 106
 107 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
 108
 109 unsigned int    vm_object_absent_max = 50;
 110
 111 int             vm_fault_debug = 0;
 112
 113 #if     !VM_FAULT_STATIC_CONFIG
 114 boolean_t       vm_fault_dirty_handling = FALSE;
 115 boolean_t       vm_fault_interruptible = FALSE;
 116 boolean_t       software_reference_bits = TRUE;
 117 #endif
 118
 119 #if     MACH_KDB
 120 extern struct db_watchpoint *db_watchpoint_list;
 121 #endif  /* MACH_KDB */
 122
 123
 124 /* Forward declarations of internal routines. */
 125 extern kern_return_t vm_fault_wire_fast(
 126                                 vm_map_t        map,
 127                                 vm_map_offset_t va,
 128                                 vm_map_entry_t  entry,
 129                                 pmap_t          pmap,
 130                                 vm_map_offset_t pmap_addr);
 131
 132 extern void vm_fault_continue(void);
 133
 134 extern void vm_fault_copy_cleanup(
 135                                 vm_page_t       page,
 136                                 vm_page_t       top_page);
 137
 138 extern void vm_fault_copy_dst_cleanup(
 139                                 vm_page_t       page);
 140
 141 #if     VM_FAULT_CLASSIFY
 142 extern void vm_fault_classify(vm_object_t       object,
 143                           vm_object_offset_t    offset,
 144                           vm_prot_t             fault_type);
 145
 146 extern void vm_fault_classify_init(void);
 147 #endif
 148
 149 /*
 150  *      Routine:        vm_fault_init
 151  *      Purpose:
 152  *              Initialize our private data structures.
 153  */
 154 void
 155 vm_fault_init(void)
 156 {
 157 }
 158
 159 /*
 160  *      Routine:        vm_fault_cleanup
 161  *      Purpose:
 162  *              Clean up the result of vm_fault_page.
 163  *      Results:
 164  *              The paging reference for "object" is released.
 165  *              "object" is unlocked.
 166  *              If "top_page" is not null,  "top_page" is
 167  *              freed and the paging reference for the object
 168  *              containing it is released.
 169  *
 170  *      In/out conditions:
 171  *              "object" must be locked.
 172  */
 173 void
 174 vm_fault_cleanup(
 175         register vm_object_t    object,
 176         register vm_page_t      top_page)
 177 {
 178         vm_object_paging_end(object);
 179         vm_object_unlock(object);
 180
 181         if (top_page != VM_PAGE_NULL) {
 182             object = top_page->object;
 183             vm_object_lock(object);
 184             VM_PAGE_FREE(top_page);
 185             vm_object_paging_end(object);
 186             vm_object_unlock(object);
 187         }
 188 }
 189
 190 #if     MACH_CLUSTER_STATS
 191 #define MAXCLUSTERPAGES 16
 192 struct {
 193         unsigned long pages_in_cluster;
 194         unsigned long pages_at_higher_offsets;
 195         unsigned long pages_at_lower_offsets;
 196 } cluster_stats_in[MAXCLUSTERPAGES];
 197 #define CLUSTER_STAT(clause)    clause
 198 #define CLUSTER_STAT_HIGHER(x)  \
 199         ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
 200 #define CLUSTER_STAT_LOWER(x)   \
 201          ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
 202 #define CLUSTER_STAT_CLUSTER(x) \
 203         ((cluster_stats_in[(x)].pages_in_cluster)++)
 204 #else   /* MACH_CLUSTER_STATS */
 205 #define CLUSTER_STAT(clause)
 206 #endif  /* MACH_CLUSTER_STATS */
 207
 208 /* XXX - temporary */
 209 boolean_t vm_allow_clustered_pagein = FALSE;
 210 int vm_pagein_cluster_used = 0;
 211
 212 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
 213
 214
 215 boolean_t       vm_page_deactivate_behind = TRUE;
 216 /*
 217  * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
 218  */
 219 int vm_default_ahead = 0;
 220 int vm_default_behind = MAX_UPL_TRANSFER;
 221
 222 /*
 223  *      vm_page_deactivate_behind
 224  *
 225  *      Determine if sequential access is in progress
 226  *      in accordance with the behavior specified.  If
 227  *      so, compute a potential page to deactive and
 228  *      deactivate it.
 229  *
 230  *      The object must be locked.
 231  */
 232 static
 233 boolean_t
 234 vm_fault_deactivate_behind(
 235         vm_object_t             object,
 236         vm_object_offset_t      offset,
 237         vm_behavior_t           behavior)
 238 {
 239         vm_page_t m;
 240
 241 #if TRACEFAULTPAGE
 242         dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind); /* (TEST/DEBUG) */
 243 #endif
 244
 245         if (object == kernel_object) {
 246                 /*
 247                  * Do not deactivate pages from the kernel object: they
 248                  * are not intended to become pageable.
 249                  */
 250                 return FALSE;
 251         }
 252
 253         switch (behavior) {
 254         case VM_BEHAVIOR_RANDOM:
 255                 object->sequential = PAGE_SIZE_64;
 256                 m = VM_PAGE_NULL;
 257                 break;
 258         case VM_BEHAVIOR_SEQUENTIAL:
 259                 if (offset &&
 260                         object->last_alloc == offset - PAGE_SIZE_64) {
 261                         object->sequential += PAGE_SIZE_64;
 262                         m = vm_page_lookup(object, offset - PAGE_SIZE_64);
 263                 } else {
 264                         object->sequential = PAGE_SIZE_64; /* reset */
 265                         m = VM_PAGE_NULL;
 266                 }
 267                 break;
 268         case VM_BEHAVIOR_RSEQNTL:
 269                 if (object->last_alloc &&
 270                         object->last_alloc == offset + PAGE_SIZE_64) {
 271                         object->sequential += PAGE_SIZE_64;
 272                         m = vm_page_lookup(object, offset + PAGE_SIZE_64);
 273                 } else {
 274                         object->sequential = PAGE_SIZE_64; /* reset */
 275                         m = VM_PAGE_NULL;
 276                 }
 277                 break;
 278         case VM_BEHAVIOR_DEFAULT:
 279         default:
 280                 if (offset &&
 281                         object->last_alloc == offset - PAGE_SIZE_64) {
 282                         vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
 283
 284                         object->sequential += PAGE_SIZE_64;
 285                         m = (offset >= behind &&
 286                                 object->sequential >= behind) ?
 287                                 vm_page_lookup(object, offset - behind) :
 288                                 VM_PAGE_NULL;
 289                 } else if (object->last_alloc &&
 290                         object->last_alloc == offset + PAGE_SIZE_64) {
 291                         vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
 292
 293                         object->sequential += PAGE_SIZE_64;
 294                         m = (offset < -behind &&
 295                                 object->sequential >= behind) ?
 296                                 vm_page_lookup(object, offset + behind) :
 297                                 VM_PAGE_NULL;
 298                 } else {
 299                         object->sequential = PAGE_SIZE_64;
 300                         m = VM_PAGE_NULL;
 301                 }
 302                 break;
 303         }
 304
 305         object->last_alloc = offset;
 306
 307         if (m) {
 308                 if (!m->busy) {
 309                         vm_page_lock_queues();
 310                         vm_page_deactivate(m);
 311                         vm_page_unlock_queues();
 312 #if TRACEFAULTPAGE
 313                         dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */
 314 #endif
 315                 }
 316                 return TRUE;
 317         }
 318         return FALSE;
 319 }
 320
 321
 322 /*
 323  *      Routine:        vm_fault_page
 324  *      Purpose:
 325  *              Find the resident page for the virtual memory
 326  *              specified by the given virtual memory object
 327  *              and offset.
 328  *      Additional arguments:
 329  *              The required permissions for the page is given
 330  *              in "fault_type".  Desired permissions are included
 331  *              in "protection".  The minimum and maximum valid offsets
 332  *              within the object for the relevant map entry are
 333  *              passed in "lo_offset" and "hi_offset" respectively and
 334  *              the expected page reference pattern is passed in "behavior".
 335  *              These three parameters are used to determine pagein cluster
 336  *              limits.
 337  *
 338  *              If the desired page is known to be resident (for
 339  *              example, because it was previously wired down), asserting
 340  *              the "unwiring" parameter will speed the search.
 341  *
 342  *              If the operation can be interrupted (by thread_abort
 343  *              or thread_terminate), then the "interruptible"
 344  *              parameter should be asserted.
 345  *
 346  *      Results:
 347  *              The page containing the proper data is returned
 348  *              in "result_page".
 349  *
 350  *      In/out conditions:
 351  *              The source object must be locked and referenced,
 352  *              and must donate one paging reference.  The reference
 353  *              is not affected.  The paging reference and lock are
 354  *              consumed.
 355  *
 356  *              If the call succeeds, the object in which "result_page"
 357  *              resides is left locked and holding a paging reference.
 358  *              If this is not the original object, a busy page in the
 359  *              original object is returned in "top_page", to prevent other
 360  *              callers from pursuing this same data, along with a paging
 361  *              reference for the original object.  The "top_page" should
 362  *              be destroyed when this guarantee is no longer required.
 363  *              The "result_page" is also left busy.  It is not removed
 364  *              from the pageout queues.
 365  */
 366
 367 vm_fault_return_t
 368 vm_fault_page(
 369         /* Arguments: */
 370         vm_object_t     first_object,   /* Object to begin search */
 371         vm_object_offset_t first_offset,        /* Offset into object */
 372         vm_prot_t       fault_type,     /* What access is requested */
 373         boolean_t       must_be_resident,/* Must page be resident? */
 374         int             interruptible,  /* how may fault be interrupted? */
 375         vm_map_offset_t lo_offset,      /* Map entry start */
 376         vm_map_offset_t hi_offset,      /* Map entry end */
 377         vm_behavior_t   behavior,       /* Page reference behavior */
 378         /* Modifies in place: */
 379         vm_prot_t       *protection,    /* Protection for mapping */
 380         /* Returns: */
 381         vm_page_t       *result_page,   /* Page found, if successful */
 382         vm_page_t       *top_page,      /* Page in top object, if
 383                                          * not result_page.  */
 384         int             *type_of_fault, /* if non-null, fill in with type of fault
 385                                          * COW, zero-fill, etc... returned in trace point */
 386         /* More arguments: */
 387         kern_return_t   *error_code,    /* code if page is in error */
 388         boolean_t       no_zero_fill,   /* don't zero fill absent pages */
 389         boolean_t       data_supply,    /* treat as data_supply if
 390                                          * it is a write fault and a full
 391                                          * page is provided */
 392         vm_map_t        map,
 393         __unused vm_map_offset_t        vaddr)
 394 {
 395         register
 396         vm_page_t               m;
 397         register
 398         vm_object_t             object;
 399         register
 400         vm_object_offset_t      offset;
 401         vm_page_t               first_m;
 402         vm_object_t             next_object;
 403         vm_object_t             copy_object;
 404         boolean_t               look_for_page;
 405         vm_prot_t               access_required = fault_type;
 406         vm_prot_t               wants_copy_flag;
 407         vm_object_size_t        length;
 408         vm_object_offset_t      cluster_start, cluster_end;
 409         CLUSTER_STAT(int pages_at_higher_offsets;)
 410         CLUSTER_STAT(int pages_at_lower_offsets;)
 411         kern_return_t   wait_result;
 412         boolean_t               interruptible_state;
 413         boolean_t               bumped_pagein = FALSE;
 414
 415
 416 #if     MACH_PAGEMAP
 417 /*
 418  * MACH page map - an optional optimization where a bit map is maintained
 419  * by the VM subsystem for internal objects to indicate which pages of
 420  * the object currently reside on backing store.  This existence map
 421  * duplicates information maintained by the vnode pager.  It is
 422  * created at the time of the first pageout against the object, i.e.
 423  * at the same time pager for the object is created.  The optimization
 424  * is designed to eliminate pager interaction overhead, if it is
 425  * 'known' that the page does not exist on backing store.
 426  *
 427  * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
 428  * either marked as paged out in the existence map for the object or no
 429  * existence map exists for the object.  LOOK_FOR() is one of the
 430  * criteria in the decision to invoke the pager.   It is also used as one
 431  * of the criteria to terminate the scan for adjacent pages in a clustered
 432  * pagein operation.  Note that LOOK_FOR() always evaluates to TRUE for
 433  * permanent objects.  Note also that if the pager for an internal object
 434  * has not been created, the pager is not invoked regardless of the value
 435  * of LOOK_FOR() and that clustered pagein scans are only done on an object
 436  * for which a pager has been created.
 437  *
 438  * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
 439  * is marked as paged out in the existence map for the object.  PAGED_OUT()
 440  * PAGED_OUT() is used to determine if a page has already been pushed
 441  * into a copy object in order to avoid a redundant page out operation.
 442  */
 443 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
 444                         != VM_EXTERNAL_STATE_ABSENT)
 445 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
 446                         == VM_EXTERNAL_STATE_EXISTS)
 447 #else /* MACH_PAGEMAP */
 448 /*
 449  * If the MACH page map optimization is not enabled,
 450  * LOOK_FOR() always evaluates to TRUE.  The pager will always be
 451  * invoked to resolve missing pages in an object, assuming the pager
 452  * has been created for the object.  In a clustered page operation, the
 453  * absence of a page on backing backing store cannot be used to terminate
 454  * a scan for adjacent pages since that information is available only in
 455  * the pager.  Hence pages that may not be paged out are potentially
 456  * included in a clustered request.  The vnode pager is coded to deal
 457  * with any combination of absent/present pages in a clustered
 458  * pagein request.  PAGED_OUT() always evaluates to FALSE, i.e. the pager
 459  * will always be invoked to push a dirty page into a copy object assuming
 460  * a pager has been created.  If the page has already been pushed, the
 461  * pager will ingore the new request.
 462  */
 463 #define LOOK_FOR(o, f) TRUE
 464 #define PAGED_OUT(o, f) FALSE
 465 #endif /* MACH_PAGEMAP */
 466
 467 /*
 468  *      Recovery actions
 469  */
 470 #define PREPARE_RELEASE_PAGE(m)                         \
 471         MACRO_BEGIN                                     \
 472         vm_page_lock_queues();                          \
 473         MACRO_END
 474
 475 #define DO_RELEASE_PAGE(m)                              \
 476         MACRO_BEGIN                                     \
 477         PAGE_WAKEUP_DONE(m);                            \
 478         if (!m->active && !m->inactive)                 \
 479                 vm_page_activate(m);                    \
 480         vm_page_unlock_queues();                        \
 481         MACRO_END
 482
 483 #define RELEASE_PAGE(m)                                 \
 484         MACRO_BEGIN                                     \
 485         PREPARE_RELEASE_PAGE(m);                        \
 486         DO_RELEASE_PAGE(m);                             \
 487         MACRO_END
 488
 489 #if TRACEFAULTPAGE
 490         dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
 491 #endif
 492
 493
 494
 495 #if     !VM_FAULT_STATIC_CONFIG
 496         if (vm_fault_dirty_handling
 497 #if     MACH_KDB
 498                 /*
 499                  *      If there are watchpoints set, then
 500                  *      we don't want to give away write permission
 501                  *      on a read fault.  Make the task write fault,
 502                  *      so that the watchpoint code notices the access.
 503                  */
 504             || db_watchpoint_list
 505 #endif  /* MACH_KDB */
 506             ) {
 507                 /*
 508                  *      If we aren't asking for write permission,
 509                  *      then don't give it away.  We're using write
 510                  *      faults to set the dirty bit.
 511                  */
 512                 if (!(fault_type & VM_PROT_WRITE))
 513                         *protection &= ~VM_PROT_WRITE;
 514         }
 515
 516         if (!vm_fault_interruptible)
 517                 interruptible = THREAD_UNINT;
 518 #else   /* STATIC_CONFIG */
 519 #if     MACH_KDB
 520                 /*
 521                  *      If there are watchpoints set, then
 522                  *      we don't want to give away write permission
 523                  *      on a read fault.  Make the task write fault,
 524                  *      so that the watchpoint code notices the access.
 525                  */
 526             if (db_watchpoint_list) {
 527                 /*
 528                  *      If we aren't asking for write permission,
 529                  *      then don't give it away.  We're using write
 530                  *      faults to set the dirty bit.
 531                  */
 532                 if (!(fault_type & VM_PROT_WRITE))
 533                         *protection &= ~VM_PROT_WRITE;
 534         }
 535
 536 #endif  /* MACH_KDB */
 537 #endif  /* STATIC_CONFIG */
 538
 539         interruptible_state = thread_interrupt_level(interruptible);
 540
 541         /*
 542          *      INVARIANTS (through entire routine):
 543          *
 544          *      1)      At all times, we must either have the object
 545          *              lock or a busy page in some object to prevent
 546          *              some other thread from trying to bring in
 547          *              the same page.
 548          *
 549          *              Note that we cannot hold any locks during the
 550          *              pager access or when waiting for memory, so
 551          *              we use a busy page then.
 552          *
 553          *              Note also that we aren't as concerned about more than
 554          *              one thread attempting to memory_object_data_unlock
 555          *              the same page at once, so we don't hold the page
 556          *              as busy then, but do record the highest unlock
 557          *              value so far.  [Unlock requests may also be delivered
 558          *              out of order.]
 559          *
 560          *      2)      To prevent another thread from racing us down the
 561          *              shadow chain and entering a new page in the top
 562          *              object before we do, we must keep a busy page in
 563          *              the top object while following the shadow chain.
 564          *
 565          *      3)      We must increment paging_in_progress on any object
 566          *              for which we have a busy page
 567          *
 568          *      4)      We leave busy pages on the pageout queues.
 569          *              If the pageout daemon comes across a busy page,
 570          *              it will remove the page from the pageout queues.
 571          */
 572
 573         /*
 574          *      Search for the page at object/offset.
 575          */
 576
 577         object = first_object;
 578         offset = first_offset;
 579         first_m = VM_PAGE_NULL;
 580         access_required = fault_type;
 581
 582         XPR(XPR_VM_FAULT,
 583                 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
 584                 (integer_t)object, offset, fault_type, *protection, 0);
 585
 586         /*
 587          *      See whether this page is resident
 588          */
 589
 590         while (TRUE) {
 591 #if TRACEFAULTPAGE
 592                 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0);       /* (TEST/DEBUG) */
 593 #endif
 594                 if (!object->alive) {
 595                         vm_fault_cleanup(object, first_m);
 596                         thread_interrupt_level(interruptible_state);
 597                         return(VM_FAULT_MEMORY_ERROR);
 598                 }
 599                 m = vm_page_lookup(object, offset);
 600 #if TRACEFAULTPAGE
 601                 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object);  /* (TEST/DEBUG) */
 602 #endif
 603                 if (m != VM_PAGE_NULL) {
 604                         /*
 605                          *      If the page was pre-paged as part of a
 606                          *      cluster, record the fact.
 607                          *      If we were passed a valid pointer for
 608                          *      "type_of_fault", than we came from
 609                          *      vm_fault... we'll let it deal with
 610                          *      this condition, since it
 611                          *      needs to see m->clustered to correctly
 612                          *      account the pageins.
 613                          */
 614                         if (type_of_fault == NULL && m->clustered) {
 615                                 vm_pagein_cluster_used++;
 616                                 m->clustered = FALSE;
 617                         }
 618
 619                         /*
 620                          *      If the page is being brought in,
 621                          *      wait for it and then retry.
 622                          *
 623                          *      A possible optimization: if the page
 624                          *      is known to be resident, we can ignore
 625                          *      pages that are absent (regardless of
 626                          *      whether they're busy).
 627                          */
 628
 629                         if (m->busy) {
 630 #if TRACEFAULTPAGE
 631                                 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
 632 #endif
 633                                 wait_result = PAGE_SLEEP(object, m, interruptible);
 634                                 XPR(XPR_VM_FAULT,
 635                                     "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
 636                                         (integer_t)object, offset,
 637                                         (integer_t)m, 0, 0);
 638                                 counter(c_vm_fault_page_block_busy_kernel++);
 639
 640                                 if (wait_result != THREAD_AWAKENED) {
 641                                         vm_fault_cleanup(object, first_m);
 642                                         thread_interrupt_level(interruptible_state);
 643                                         if (wait_result == THREAD_RESTART)
 644                                           {
 645                                                 return(VM_FAULT_RETRY);
 646                                           }
 647                                         else
 648                                           {
 649                                                 return(VM_FAULT_INTERRUPTED);
 650                                           }
 651                                 }
 652                                 continue;
 653                         }
 654
 655                         if (m->encrypted) {
 656                                 /*
 657                                  * ENCRYPTED SWAP:
 658                                  * the user needs access to a page that we
 659                                  * encrypted before paging it out.
 660                                  * Decrypt the page now.
 661                                  * Keep it busy to prevent anyone from
 662                                  * accessing it during the decryption.
 663                                  */
 664                                 m->busy = TRUE;
 665                                 vm_page_decrypt(m, 0);
 666                                 assert(object == m->object);
 667                                 assert(m->busy);
 668                                 PAGE_WAKEUP_DONE(m);
 669
 670                                 /*
 671                                  * Retry from the top, in case
 672                                  * something changed while we were
 673                                  * decrypting.
 674                                  */
 675                                 continue;
 676                         }
 677                         ASSERT_PAGE_DECRYPTED(m);
 678
 679                         /*
 680                          *      If the page is in error, give up now.
 681                          */
 682
 683                         if (m->error) {
 684 #if TRACEFAULTPAGE
 685                                 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code);      /* (TEST/DEBUG) */
 686 #endif
 687                                 if (error_code)
 688                                         *error_code = m->page_error;
 689                                 VM_PAGE_FREE(m);
 690                                 vm_fault_cleanup(object, first_m);
 691                                 thread_interrupt_level(interruptible_state);
 692                                 return(VM_FAULT_MEMORY_ERROR);
 693                         }
 694
 695                         /*
 696                          *      If the pager wants us to restart
 697                          *      at the top of the chain,
 698                          *      typically because it has moved the
 699                          *      page to another pager, then do so.
 700                          */
 701
 702                         if (m->restart) {
 703 #if TRACEFAULTPAGE
 704                                 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
 705 #endif
 706                                 VM_PAGE_FREE(m);
 707                                 vm_fault_cleanup(object, first_m);
 708                                 thread_interrupt_level(interruptible_state);
 709                                 return(VM_FAULT_RETRY);
 710                         }
 711
 712                         /*
 713                          *      If the page isn't busy, but is absent,
 714                          *      then it was deemed "unavailable".
 715                          */
 716
 717                         if (m->absent) {
 718                                 /*
 719                                  * Remove the non-existent page (unless it's
 720                                  * in the top object) and move on down to the
 721                                  * next object (if there is one).
 722                                  */
 723 #if TRACEFAULTPAGE
 724                                 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow);  /* (TEST/DEBUG) */
 725 #endif
 726
 727                                 next_object = object->shadow;
 728                                 if (next_object == VM_OBJECT_NULL) {
 729                                         vm_page_t real_m;
 730
 731                                         assert(!must_be_resident);
 732
 733                                         if (object->shadow_severed) {
 734                                                 vm_fault_cleanup(
 735                                                         object, first_m);
 736                                                 thread_interrupt_level(interruptible_state);
 737                                                 return VM_FAULT_MEMORY_ERROR;
 738                                         }
 739
 740                                         /*
 741                                          * Absent page at bottom of shadow
 742                                          * chain; zero fill the page we left
 743                                          * busy in the first object, and flush
 744                                          * the absent page.  But first we
 745                                          * need to allocate a real page.
 746                                          */
 747                                         if (VM_PAGE_THROTTLED() ||
 748                                             (real_m = vm_page_grab())
 749                                                         == VM_PAGE_NULL) {
 750                                                 vm_fault_cleanup(
 751                                                         object, first_m);
 752                                                 thread_interrupt_level(
 753                                                         interruptible_state);
 754                                                 return(
 755                                                    VM_FAULT_MEMORY_SHORTAGE);
 756                                         }
 757
 758                                         /*
 759                                          * are we protecting the system from
 760                                          * backing store exhaustion.  If so
 761                                          * sleep unless we are privileged.
 762                                          */
 763
 764                                         if(vm_backing_store_low) {
 765                                            if(!(current_task()->priv_flags
 766                                                 & VM_BACKING_STORE_PRIV)) {
 767                                                 assert_wait((event_t)
 768                                                         &vm_backing_store_low,
 769                                                         THREAD_UNINT);
 770                                                 vm_fault_cleanup(object,
 771                                                                     first_m);
 772                                                 thread_block(THREAD_CONTINUE_NULL);
 773                                                 thread_interrupt_level(
 774                                                         interruptible_state);
 775                                                 return(VM_FAULT_RETRY);
 776                                            }
 777                                         }
 778
 779
 780                                         XPR(XPR_VM_FAULT,
 781               "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
 782                                                 (integer_t)object, offset,
 783                                                 (integer_t)m,
 784                                                 (integer_t)first_object, 0);
 785                                         if (object != first_object) {
 786                                                 VM_PAGE_FREE(m);
 787                                                 vm_object_paging_end(object);
 788                                                 vm_object_unlock(object);
 789                                                 object = first_object;
 790                                                 offset = first_offset;
 791                                                 m = first_m;
 792                                                 first_m = VM_PAGE_NULL;
 793                                                 vm_object_lock(object);
 794                                         }
 795
 796                                         VM_PAGE_FREE(m);
 797                                         assert(real_m->busy);
 798                                         vm_page_insert(real_m, object, offset);
 799                                         m = real_m;
 800
 801                                         /*
 802                                          *  Drop the lock while zero filling
 803                                          *  page.  Then break because this
 804                                          *  is the page we wanted.  Checking
 805                                          *  the page lock is a waste of time;
 806                                          *  this page was either absent or
 807                                          *  newly allocated -- in both cases
 808                                          *  it can't be page locked by a pager.
 809                                          */
 810                                         m->no_isync = FALSE;
 811
 812                                         if (!no_zero_fill) {
 813                                                 vm_object_unlock(object);
 814                                                 vm_page_zero_fill(m);
 815                                                 vm_object_lock(object);
 816
 817                                                 if (type_of_fault)
 818                                                         *type_of_fault = DBG_ZERO_FILL_FAULT;
 819                                                 VM_STAT(zero_fill_count++);
 820                                         }
 821                                         if (bumped_pagein == TRUE) {
 822                                                 VM_STAT(pageins--);
 823                                                 current_task()->pageins--;
 824                                         }
 825                                         vm_page_lock_queues();
 826                                         VM_PAGE_QUEUES_REMOVE(m);
 827                                         m->page_ticket = vm_page_ticket;
 828                                         assert(!m->laundry);
 829                                         assert(m->object != kernel_object);
 830                                         assert(m->pageq.next == NULL &&
 831                                                m->pageq.prev == NULL);
 832                                         if(m->object->size > 0x200000) {
 833                                                 m->zero_fill = TRUE;
 834                                                 /* depends on the queues lock */
 835                                                 vm_zf_count += 1;
 836                                                 queue_enter(&vm_page_queue_zf,
 837                                                         m, vm_page_t, pageq);
 838                                         } else {
 839                                                 queue_enter(
 840                                                         &vm_page_queue_inactive,
 841                                                         m, vm_page_t, pageq);
 842                                         }
 843                                         vm_page_ticket_roll++;
 844                                         if(vm_page_ticket_roll ==
 845                                                 VM_PAGE_TICKETS_IN_ROLL) {
 846                                                 vm_page_ticket_roll = 0;
 847                                                 if(vm_page_ticket ==
 848                                                      VM_PAGE_TICKET_ROLL_IDS)
 849                                                         vm_page_ticket= 0;
 850                                                 else
 851                                                         vm_page_ticket++;
 852                                         }
 853                                         m->inactive = TRUE;
 854                                         vm_page_inactive_count++;
 855                                         vm_page_unlock_queues();
 856                                         break;
 857                                 } else {
 858                                         if (must_be_resident) {
 859                                                 vm_object_paging_end(object);
 860                                         } else if (object != first_object) {
 861                                                 vm_object_paging_end(object);
 862                                                 VM_PAGE_FREE(m);
 863                                         } else {
 864                                                 first_m = m;
 865                                                 m->absent = FALSE;
 866                                                 m->unusual = FALSE;
 867                                                 vm_object_absent_release(object);
 868                                                 m->busy = TRUE;
 869
 870                                                 vm_page_lock_queues();
 871                                                 VM_PAGE_QUEUES_REMOVE(m);
 872                                                 vm_page_unlock_queues();
 873                                         }
 874                                         XPR(XPR_VM_FAULT,
 875                                             "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
 876                                                 (integer_t)object, offset,
 877                                                 (integer_t)next_object,
 878                                                 offset+object->shadow_offset,0);
 879                                         offset += object->shadow_offset;
 880                                         hi_offset += object->shadow_offset;
 881                                         lo_offset += object->shadow_offset;
 882                                         access_required = VM_PROT_READ;
 883                                         vm_object_lock(next_object);
 884                                         vm_object_unlock(object);
 885                                         object = next_object;
 886                                         vm_object_paging_begin(object);
 887                                         continue;
 888                                 }
 889                         }
 890
 891                         if ((m->cleaning)
 892                                 && ((object != first_object) ||
 893                                     (object->copy != VM_OBJECT_NULL))
 894                                 && (fault_type & VM_PROT_WRITE)) {
 895                                 /*
 896                                  * This is a copy-on-write fault that will
 897                                  * cause us to revoke access to this page, but
 898                                  * this page is in the process of being cleaned
 899                                  * in a clustered pageout. We must wait until
 900                                  * the cleaning operation completes before
 901                                  * revoking access to the original page,
 902                                  * otherwise we might attempt to remove a
 903                                  * wired mapping.
 904                                  */
 905 #if TRACEFAULTPAGE
 906                                 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset);  /* (TEST/DEBUG) */
 907 #endif
 908                                 XPR(XPR_VM_FAULT,
 909                                     "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
 910                                         (integer_t)object, offset,
 911                                         (integer_t)m, 0, 0);
 912                                 /* take an extra ref so that object won't die */
 913                                 assert(object->ref_count > 0);
 914                                 object->ref_count++;
 915                                 vm_object_res_reference(object);
 916                                 vm_fault_cleanup(object, first_m);
 917                                 counter(c_vm_fault_page_block_backoff_kernel++);
 918                                 vm_object_lock(object);
 919                                 assert(object->ref_count > 0);
 920                                 m = vm_page_lookup(object, offset);
 921                                 if (m != VM_PAGE_NULL && m->cleaning) {
 922                                         PAGE_ASSERT_WAIT(m, interruptible);
 923                                         vm_object_unlock(object);
 924                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
 925                                         vm_object_deallocate(object);
 926                                         goto backoff;
 927                                 } else {
 928                                         vm_object_unlock(object);
 929                                         vm_object_deallocate(object);
 930                                         thread_interrupt_level(interruptible_state);
 931                                         return VM_FAULT_RETRY;
 932                                 }
 933                         }
 934
 935                         /*
 936                          *      If the desired access to this page has
 937                          *      been locked out, request that it be unlocked.
 938                          */
 939
 940                         if (access_required & m->page_lock) {
 941                                 if ((access_required & m->unlock_request) != access_required) {
 942                                         vm_prot_t       new_unlock_request;
 943                                         kern_return_t   rc;
 944
 945 #if TRACEFAULTPAGE
 946                                         dbgTrace(0xBEEF000A, (unsigned int) m, (unsigned int) object->pager_ready);     /* (TEST/DEBUG) */
 947 #endif
 948                                         if (!object->pager_ready) {
 949                                         XPR(XPR_VM_FAULT,
 950                                             "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
 951                                                 access_required,
 952                                                 (integer_t)object, offset,
 953                                                 (integer_t)m, 0);
 954                                                 /* take an extra ref */
 955                                                 assert(object->ref_count > 0);
 956                                                 object->ref_count++;
 957                                                 vm_object_res_reference(object);
 958                                                 vm_fault_cleanup(object,
 959                                                                  first_m);
 960                                                 counter(c_vm_fault_page_block_backoff_kernel++);
 961                                                 vm_object_lock(object);
 962                                                 assert(object->ref_count > 0);
 963                                                 if (!object->pager_ready) {
 964                                                         wait_result = vm_object_assert_wait(
 965                                                                 object,
 966                                                                 VM_OBJECT_EVENT_PAGER_READY,
 967                                                                 interruptible);
 968                                                         vm_object_unlock(object);
 969                                                         if (wait_result == THREAD_WAITING)
 970                                                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
 971                                                         vm_object_deallocate(object);
 972                                                         goto backoff;
 973                                                 } else {
 974                                                         vm_object_unlock(object);
 975                                                         vm_object_deallocate(object);
 976                                                         thread_interrupt_level(interruptible_state);
 977                                                         return VM_FAULT_RETRY;
 978                                                 }
 979                                         }
 980
 981                                         new_unlock_request = m->unlock_request =
 982                                                 (access_required | m->unlock_request);
 983                                         vm_object_unlock(object);
 984                                         XPR(XPR_VM_FAULT,
 985                                             "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
 986                                         (integer_t)object, offset,
 987                                         (integer_t)m, new_unlock_request, 0);
 988                                         if ((rc = memory_object_data_unlock(
 989                                                 object->pager,
 990                                                 offset + object->paging_offset,
 991                                                 PAGE_SIZE,
 992                                                 new_unlock_request))
 993                                              != KERN_SUCCESS) {
 994                                                 if (vm_fault_debug)
 995                                                     printf("vm_fault: memory_object_data_unlock failed\n");
 996                                                 vm_object_lock(object);
 997                                                 vm_fault_cleanup(object, first_m);
 998                                                 thread_interrupt_level(interruptible_state);
 999                                                 return((rc == MACH_SEND_INTERRUPTED) ?
1000                                                         VM_FAULT_INTERRUPTED :
1001                                                         VM_FAULT_MEMORY_ERROR);
1002                                         }
1003                                         vm_object_lock(object);
1004                                         continue;
1005                                 }
1006
1007                                 XPR(XPR_VM_FAULT,
1008         "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
1009                                         access_required, (integer_t)object,
1010                                         offset, (integer_t)m, 0);
1011                                 /* take an extra ref so object won't die */
1012                                 assert(object->ref_count > 0);
1013                                 object->ref_count++;
1014                                 vm_object_res_reference(object);
1015                                 vm_fault_cleanup(object, first_m);
1016                                 counter(c_vm_fault_page_block_backoff_kernel++);
1017                                 vm_object_lock(object);
1018                                 assert(object->ref_count > 0);
1019                                 m = vm_page_lookup(object, offset);
1020                                 if (m != VM_PAGE_NULL &&
1021                                     (access_required & m->page_lock) &&
1022                                     !((access_required & m->unlock_request) != access_required)) {
1023                                         PAGE_ASSERT_WAIT(m, interruptible);
1024                                         vm_object_unlock(object);
1025                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
1026                                         vm_object_deallocate(object);
1027                                         goto backoff;
1028                                 } else {
1029                                         vm_object_unlock(object);
1030                                         vm_object_deallocate(object);
1031                                         thread_interrupt_level(interruptible_state);
1032                                         return VM_FAULT_RETRY;
1033                                 }
1034                         }
1035                         /*
1036                          *      We mark the page busy and leave it on
1037                          *      the pageout queues.  If the pageout
1038                          *      deamon comes across it, then it will
1039                          *      remove the page.
1040                          */
1041
1042 #if TRACEFAULTPAGE
1043                         dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
1044 #endif
1045
1046 #if     !VM_FAULT_STATIC_CONFIG
1047                         if (!software_reference_bits) {
1048                                 vm_page_lock_queues();
1049                                 if (m->inactive)
1050                                         vm_stat.reactivations++;
1051
1052                                 VM_PAGE_QUEUES_REMOVE(m);
1053                                 vm_page_unlock_queues();
1054                         }
1055 #endif
1056                         XPR(XPR_VM_FAULT,
1057                             "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
1058                                 (integer_t)object, offset, (integer_t)m, 0, 0);
1059                         assert(!m->busy);
1060                         m->busy = TRUE;
1061                         assert(!m->absent);
1062                         break;
1063                 }
1064
1065                 look_for_page =
1066                         (object->pager_created) &&
1067                           LOOK_FOR(object, offset) &&
1068                             (!data_supply);
1069
1070 #if TRACEFAULTPAGE
1071                 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object);      /* (TEST/DEBUG) */
1072 #endif
1073                 if ((look_for_page || (object == first_object))
1074                                 && !must_be_resident
1075                                 && !(object->phys_contiguous))  {
1076                         /*
1077                          *      Allocate a new page for this object/offset
1078                          *      pair.
1079                          */
1080
1081                         m = vm_page_grab_fictitious();
1082 #if TRACEFAULTPAGE
1083                         dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object);  /* (TEST/DEBUG) */
1084 #endif
1085                         if (m == VM_PAGE_NULL) {
1086                                 vm_fault_cleanup(object, first_m);
1087                                 thread_interrupt_level(interruptible_state);
1088                                 return(VM_FAULT_FICTITIOUS_SHORTAGE);
1089                         }
1090                         vm_page_insert(m, object, offset);
1091                 }
1092
1093                 if ((look_for_page && !must_be_resident)) {
1094                         kern_return_t   rc;
1095
1096                         /*
1097                          *      If the memory manager is not ready, we
1098                          *      cannot make requests.
1099                          */
1100                         if (!object->pager_ready) {
1101 #if TRACEFAULTPAGE
1102                                 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0);       /* (TEST/DEBUG) */
1103 #endif
1104                                 if(m != VM_PAGE_NULL)
1105                                         VM_PAGE_FREE(m);
1106                                 XPR(XPR_VM_FAULT,
1107                                 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
1108                                         (integer_t)object, offset, 0, 0, 0);
1109                                 /* take an extra ref so object won't die */
1110                                 assert(object->ref_count > 0);
1111                                 object->ref_count++;
1112                                 vm_object_res_reference(object);
1113                                 vm_fault_cleanup(object, first_m);
1114                                 counter(c_vm_fault_page_block_backoff_kernel++);
1115                                 vm_object_lock(object);
1116                                 assert(object->ref_count > 0);
1117                                 if (!object->pager_ready) {
1118                                         wait_result = vm_object_assert_wait(object,
1119                                                               VM_OBJECT_EVENT_PAGER_READY,
1120                                                               interruptible);
1121                                         vm_object_unlock(object);
1122                                         if (wait_result == THREAD_WAITING)
1123                                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
1124                                         vm_object_deallocate(object);
1125                                         goto backoff;
1126                                 } else {
1127                                         vm_object_unlock(object);
1128                                         vm_object_deallocate(object);
1129                                         thread_interrupt_level(interruptible_state);
1130                                         return VM_FAULT_RETRY;
1131                                 }
1132                         }
1133
1134                         if(object->phys_contiguous) {
1135                                 if(m != VM_PAGE_NULL) {
1136                                         VM_PAGE_FREE(m);
1137                                         m = VM_PAGE_NULL;
1138                                 }
1139                                 goto no_clustering;
1140                         }
1141                         if (object->internal) {
1142                                 /*
1143                                  *      Requests to the default pager
1144                                  *      must reserve a real page in advance,
1145                                  *      because the pager's data-provided
1146                                  *      won't block for pages.  IMPORTANT:
1147                                  *      this acts as a throttling mechanism
1148                                  *      for data_requests to the default
1149                                  *      pager.
1150                                  */
1151
1152 #if TRACEFAULTPAGE
1153                                 dbgTrace(0xBEEF000F, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
1154 #endif
1155                                 if (m->fictitious && !vm_page_convert(m)) {
1156                                         VM_PAGE_FREE(m);
1157                                         vm_fault_cleanup(object, first_m);
1158                                         thread_interrupt_level(interruptible_state);
1159                                         return(VM_FAULT_MEMORY_SHORTAGE);
1160                                 }
1161                         } else if (object->absent_count >
1162                                                 vm_object_absent_max) {
1163                                 /*
1164                                  *      If there are too many outstanding page
1165                                  *      requests pending on this object, we
1166                                  *      wait for them to be resolved now.
1167                                  */
1168
1169 #if TRACEFAULTPAGE
1170                                 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
1171 #endif
1172                                 if(m != VM_PAGE_NULL)
1173                                         VM_PAGE_FREE(m);
1174                                 /* take an extra ref so object won't die */
1175                                 assert(object->ref_count > 0);
1176                                 object->ref_count++;
1177                                 vm_object_res_reference(object);
1178                                 vm_fault_cleanup(object, first_m);
1179                                 counter(c_vm_fault_page_block_backoff_kernel++);
1180                                 vm_object_lock(object);
1181                                 assert(object->ref_count > 0);
1182                                 if (object->absent_count > vm_object_absent_max) {
1183                                         vm_object_absent_assert_wait(object,
1184                                                                      interruptible);
1185                                         vm_object_unlock(object);
1186                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
1187                                         vm_object_deallocate(object);
1188                                         goto backoff;
1189                                 } else {
1190                                         vm_object_unlock(object);
1191                                         vm_object_deallocate(object);
1192                                         thread_interrupt_level(interruptible_state);
1193                                         return VM_FAULT_RETRY;
1194                                 }
1195                         }
1196
1197                         /*
1198                          *      Indicate that the page is waiting for data
1199                          *      from the memory manager.
1200                          */
1201
1202                         if(m != VM_PAGE_NULL) {
1203
1204                                 m->list_req_pending = TRUE;
1205                                 m->absent = TRUE;
1206                                 m->unusual = TRUE;
1207                                 object->absent_count++;
1208
1209                         }
1210
1211 no_clustering:
1212                         cluster_start = offset;
1213                         length = PAGE_SIZE;
1214
1215                         /*
1216                          * lengthen the cluster by the pages in the working set
1217                          */
1218                         if((map != NULL) &&
1219                                 (current_task()->dynamic_working_set != 0)) {
1220                                 cluster_end = cluster_start + length;
1221                                 /* tws values for start and end are just a
1222                                  * suggestions.  Therefore, as long as
1223                                  * build_cluster does not use pointers or
1224                                  * take action based on values that
1225                                  * could be affected by re-entrance we
1226                                  * do not need to take the map lock.
1227                                  */
1228                                 cluster_end = offset + PAGE_SIZE_64;
1229                                 tws_build_cluster(
1230                                         current_task()->dynamic_working_set,
1231                                         object, &cluster_start,
1232                                         &cluster_end, 0x40000);
1233                                 length = cluster_end - cluster_start;
1234                         }
1235 #if TRACEFAULTPAGE
1236                         dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0);  /* (TEST/DEBUG) */
1237 #endif
1238                         /*
1239                          *      We have a busy page, so we can
1240                          *      release the object lock.
1241                          */
1242                         vm_object_unlock(object);
1243
1244                         /*
1245                          *      Call the memory manager to retrieve the data.
1246                          */
1247
1248                         if (type_of_fault)
1249                                 *type_of_fault = ((int)length << 8) | DBG_PAGEIN_FAULT;
1250                         VM_STAT(pageins++);
1251                         current_task()->pageins++;
1252                         bumped_pagein = TRUE;
1253
1254                         /*
1255                          *      If this object uses a copy_call strategy,
1256                          *      and we are interested in a copy of this object
1257                          *      (having gotten here only by following a
1258                          *      shadow chain), then tell the memory manager
1259                          *      via a flag added to the desired_access
1260                          *      parameter, so that it can detect a race
1261                          *      between our walking down the shadow chain
1262                          *      and its pushing pages up into a copy of
1263                          *      the object that it manages.
1264                          */
1265
1266                         if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL &&
1267                             object != first_object) {
1268                                 wants_copy_flag = VM_PROT_WANTS_COPY;
1269                         } else {
1270                                 wants_copy_flag = VM_PROT_NONE;
1271                         }
1272
1273                         XPR(XPR_VM_FAULT,
1274                             "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1275                                 (integer_t)object, offset, (integer_t)m,
1276                                 access_required | wants_copy_flag, 0);
1277
1278                         rc = memory_object_data_request(object->pager,
1279                                         cluster_start + object->paging_offset,
1280                                         length,
1281                                         access_required | wants_copy_flag);
1282
1283
1284 #if TRACEFAULTPAGE
1285                         dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
1286 #endif
1287                         if (rc != KERN_SUCCESS) {
1288                                 if (rc != MACH_SEND_INTERRUPTED
1289                                     && vm_fault_debug)
1290                                         printf("%s(0x%x, 0x%xll, 0x%xll, 0x%x) failed, rc=%d\n",
1291                                                 "memory_object_data_request",
1292                                                 object->pager,
1293                                                 cluster_start + object->paging_offset,
1294                                                 length, access_required, rc);
1295                                 /*
1296                                  *      Don't want to leave a busy page around,
1297                                  *      but the data request may have blocked,
1298                                  *      so check if it's still there and busy.
1299                                  */
1300                                 if(!object->phys_contiguous) {
1301                                    vm_object_lock(object);
1302                                    for (; length; length -= PAGE_SIZE,
1303                                       cluster_start += PAGE_SIZE_64) {
1304                                       vm_page_t p;
1305                                       if ((p = vm_page_lookup(object,
1306                                                                 cluster_start))
1307                                             && p->absent && p->busy
1308                                             && p != first_m) {
1309                                          VM_PAGE_FREE(p);
1310                                       }
1311                                    }
1312                                 }
1313                                 vm_fault_cleanup(object, first_m);
1314                                 thread_interrupt_level(interruptible_state);
1315                                 return((rc == MACH_SEND_INTERRUPTED) ?
1316                                         VM_FAULT_INTERRUPTED :
1317                                         VM_FAULT_MEMORY_ERROR);
1318                         }
1319
1320                         vm_object_lock(object);
1321                         if ((interruptible != THREAD_UNINT) &&
1322                             (current_thread()->state & TH_ABORT)) {
1323                                 vm_fault_cleanup(object, first_m);
1324                                 thread_interrupt_level(interruptible_state);
1325                                 return(VM_FAULT_INTERRUPTED);
1326                         }
1327                         if (m == VM_PAGE_NULL &&
1328                             object->phys_contiguous) {
1329                                 /*
1330                                  * No page here means that the object we
1331                                  * initially looked up was "physically
1332                                  * contiguous" (i.e. device memory).  However,
1333                                  * with Virtual VRAM, the object might not
1334                                  * be backed by that device memory anymore,
1335                                  * so we're done here only if the object is
1336                                  * still "phys_contiguous".
1337                                  * Otherwise, if the object is no longer
1338                                  * "phys_contiguous", we need to retry the
1339                                  * page fault against the object's new backing
1340                                  * store (different memory object).
1341                                  */
1342                                 break;
1343                         }
1344
1345                         /*
1346                          * Retry with same object/offset, since new data may
1347                          * be in a different page (i.e., m is meaningless at
1348                          * this point).
1349                          */
1350                         continue;
1351                 }
1352
1353                 /*
1354                  * The only case in which we get here is if
1355                  * object has no pager (or unwiring).  If the pager doesn't
1356                  * have the page this is handled in the m->absent case above
1357                  * (and if you change things here you should look above).
1358                  */
1359 #if TRACEFAULTPAGE
1360                 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */
1361 #endif
1362                 if (object == first_object)
1363                         first_m = m;
1364                 else
1365                         assert(m == VM_PAGE_NULL);
1366
1367                 XPR(XPR_VM_FAULT,
1368                     "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1369                         (integer_t)object, offset, (integer_t)m,
1370                         (integer_t)object->shadow, 0);
1371                 /*
1372                  *      Move on to the next object.  Lock the next
1373                  *      object before unlocking the current one.
1374                  */
1375                 next_object = object->shadow;
1376                 if (next_object == VM_OBJECT_NULL) {
1377                         assert(!must_be_resident);
1378                         /*
1379                          *      If there's no object left, fill the page
1380                          *      in the top object with zeros.  But first we
1381                          *      need to allocate a real page.
1382                          */
1383
1384                         if (object != first_object) {
1385                                 vm_object_paging_end(object);
1386                                 vm_object_unlock(object);
1387
1388                                 object = first_object;
1389                                 offset = first_offset;
1390                                 vm_object_lock(object);
1391                         }
1392
1393                         m = first_m;
1394                         assert(m->object == object);
1395                         first_m = VM_PAGE_NULL;
1396
1397                         if(m == VM_PAGE_NULL) {
1398                                 m = vm_page_grab();
1399                                 if (m == VM_PAGE_NULL) {
1400                                         vm_fault_cleanup(
1401                                                 object, VM_PAGE_NULL);
1402                                         thread_interrupt_level(
1403                                                 interruptible_state);
1404                                         return(VM_FAULT_MEMORY_SHORTAGE);
1405                                 }
1406                                 vm_page_insert(
1407                                         m, object, offset);
1408                         }
1409
1410                         if (object->shadow_severed) {
1411                                 VM_PAGE_FREE(m);
1412                                 vm_fault_cleanup(object, VM_PAGE_NULL);
1413                                 thread_interrupt_level(interruptible_state);
1414                                 return VM_FAULT_MEMORY_ERROR;
1415                         }
1416
1417                         /*
1418                          * are we protecting the system from
1419                          * backing store exhaustion.  If so
1420                          * sleep unless we are privileged.
1421                          */
1422
1423                         if(vm_backing_store_low) {
1424                                 if(!(current_task()->priv_flags
1425                                                 & VM_BACKING_STORE_PRIV)) {
1426                                         assert_wait((event_t)
1427                                                 &vm_backing_store_low,
1428                                                 THREAD_UNINT);
1429                                         VM_PAGE_FREE(m);
1430                                         vm_fault_cleanup(object, VM_PAGE_NULL);
1431                                         thread_block(THREAD_CONTINUE_NULL);
1432                                         thread_interrupt_level(
1433                                                 interruptible_state);
1434                                         return(VM_FAULT_RETRY);
1435                                 }
1436                         }
1437
1438                         if (VM_PAGE_THROTTLED() ||
1439                             (m->fictitious && !vm_page_convert(m))) {
1440                                 VM_PAGE_FREE(m);
1441                                 vm_fault_cleanup(object, VM_PAGE_NULL);
1442                                 thread_interrupt_level(interruptible_state);
1443                                 return(VM_FAULT_MEMORY_SHORTAGE);
1444                         }
1445                         m->no_isync = FALSE;
1446
1447                         if (!no_zero_fill) {
1448                                 vm_object_unlock(object);
1449                                 vm_page_zero_fill(m);
1450                                 vm_object_lock(object);
1451
1452                                 if (type_of_fault)
1453                                         *type_of_fault = DBG_ZERO_FILL_FAULT;
1454                                 VM_STAT(zero_fill_count++);
1455                         }
1456                         if (bumped_pagein == TRUE) {
1457                                 VM_STAT(pageins--);
1458                                 current_task()->pageins--;
1459                         }
1460                         vm_page_lock_queues();
1461                         VM_PAGE_QUEUES_REMOVE(m);
1462                         assert(!m->laundry);
1463                         assert(m->object != kernel_object);
1464                         assert(m->pageq.next == NULL &&
1465                                m->pageq.prev == NULL);
1466                         if(m->object->size > 0x200000) {
1467                                 m->zero_fill = TRUE;
1468                                 /* depends on the queues lock */
1469                                 vm_zf_count += 1;
1470                                 queue_enter(&vm_page_queue_zf,
1471                                         m, vm_page_t, pageq);
1472                         } else {
1473                                 queue_enter(
1474                                         &vm_page_queue_inactive,
1475                                         m, vm_page_t, pageq);
1476                         }
1477                         m->page_ticket = vm_page_ticket;
1478                         vm_page_ticket_roll++;
1479                         if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
1480                                 vm_page_ticket_roll = 0;
1481                                 if(vm_page_ticket ==
1482                                         VM_PAGE_TICKET_ROLL_IDS)
1483                                         vm_page_ticket= 0;
1484                                 else
1485                                         vm_page_ticket++;
1486                         }
1487                         m->inactive = TRUE;
1488                         vm_page_inactive_count++;
1489                         vm_page_unlock_queues();
1490 #if 0
1491                         pmap_clear_modify(m->phys_page);
1492 #endif
1493                         break;
1494                 }
1495                 else {
1496                         if ((object != first_object) || must_be_resident)
1497                                 vm_object_paging_end(object);
1498                         offset += object->shadow_offset;
1499                         hi_offset += object->shadow_offset;
1500                         lo_offset += object->shadow_offset;
1501                         access_required = VM_PROT_READ;
1502                         vm_object_lock(next_object);
1503                         vm_object_unlock(object);
1504                         object = next_object;
1505                         vm_object_paging_begin(object);
1506                 }
1507         }
1508
1509         /*
1510          *      PAGE HAS BEEN FOUND.
1511          *
1512          *      This page (m) is:
1513          *              busy, so that we can play with it;
1514          *              not absent, so that nobody else will fill it;
1515          *              possibly eligible for pageout;
1516          *
1517          *      The top-level page (first_m) is:
1518          *              VM_PAGE_NULL if the page was found in the
1519          *               top-level object;
1520          *              busy, not absent, and ineligible for pageout.
1521          *
1522          *      The current object (object) is locked.  A paging
1523          *      reference is held for the current and top-level
1524          *      objects.
1525          */
1526
1527 #if TRACEFAULTPAGE
1528         dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */
1529 #endif
1530 #if     EXTRA_ASSERTIONS
1531         if(m != VM_PAGE_NULL) {
1532                 assert(m->busy && !m->absent);
1533                 assert((first_m == VM_PAGE_NULL) ||
1534                         (first_m->busy && !first_m->absent &&
1535                          !first_m->active && !first_m->inactive));
1536         }
1537 #endif  /* EXTRA_ASSERTIONS */
1538
1539         /*
1540          * ENCRYPTED SWAP:
1541          * If we found a page, we must have decrypted it before we
1542          * get here...
1543          */
1544         if (m != VM_PAGE_NULL) {
1545                 ASSERT_PAGE_DECRYPTED(m);
1546         }
1547
1548         XPR(XPR_VM_FAULT,
1549        "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1550                 (integer_t)object, offset, (integer_t)m,
1551                 (integer_t)first_object, (integer_t)first_m);
1552         /*
1553          *      If the page is being written, but isn't
1554          *      already owned by the top-level object,
1555          *      we have to copy it into a new page owned
1556          *      by the top-level object.
1557          */
1558
1559         if ((object != first_object) && (m != VM_PAGE_NULL)) {
1560                 /*
1561                  *      We only really need to copy if we
1562                  *      want to write it.
1563                  */
1564
1565 #if TRACEFAULTPAGE
1566                         dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1567 #endif
1568                 if (fault_type & VM_PROT_WRITE) {
1569                         vm_page_t copy_m;
1570
1571                         assert(!must_be_resident);
1572
1573                         /*
1574                          * are we protecting the system from
1575                          * backing store exhaustion.  If so
1576                          * sleep unless we are privileged.
1577                          */
1578
1579                         if(vm_backing_store_low) {
1580                                 if(!(current_task()->priv_flags
1581                                                 & VM_BACKING_STORE_PRIV)) {
1582                                         assert_wait((event_t)
1583                                                 &vm_backing_store_low,
1584                                                 THREAD_UNINT);
1585                                         RELEASE_PAGE(m);
1586                                         vm_fault_cleanup(object, first_m);
1587                                         thread_block(THREAD_CONTINUE_NULL);
1588                                         thread_interrupt_level(
1589                                                 interruptible_state);
1590                                         return(VM_FAULT_RETRY);
1591                                 }
1592                         }
1593
1594                         /*
1595                          *      If we try to collapse first_object at this
1596                          *      point, we may deadlock when we try to get
1597                          *      the lock on an intermediate object (since we
1598                          *      have the bottom object locked).  We can't
1599                          *      unlock the bottom object, because the page
1600                          *      we found may move (by collapse) if we do.
1601                          *
1602                          *      Instead, we first copy the page.  Then, when
1603                          *      we have no more use for the bottom object,
1604                          *      we unlock it and try to collapse.
1605                          *
1606                          *      Note that we copy the page even if we didn't
1607                          *      need to... that's the breaks.
1608                          */
1609
1610                         /*
1611                          *      Allocate a page for the copy
1612                          */
1613                         copy_m = vm_page_grab();
1614                         if (copy_m == VM_PAGE_NULL) {
1615                                 RELEASE_PAGE(m);
1616                                 vm_fault_cleanup(object, first_m);
1617                                 thread_interrupt_level(interruptible_state);
1618                                 return(VM_FAULT_MEMORY_SHORTAGE);
1619                         }
1620
1621
1622                         XPR(XPR_VM_FAULT,
1623                             "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1624                                 (integer_t)object, offset,
1625                                 (integer_t)m, (integer_t)copy_m, 0);
1626                         vm_page_copy(m, copy_m);
1627
1628                         /*
1629                          *      If another map is truly sharing this
1630                          *      page with us, we have to flush all
1631                          *      uses of the original page, since we
1632                          *      can't distinguish those which want the
1633                          *      original from those which need the
1634                          *      new copy.
1635                          *
1636                          *      XXXO If we know that only one map has
1637                          *      access to this page, then we could
1638                          *      avoid the pmap_disconnect() call.
1639                          */
1640
1641                         vm_page_lock_queues();
1642                         assert(!m->cleaning);
1643                         pmap_disconnect(m->phys_page);
1644                         vm_page_deactivate(m);
1645                         copy_m->dirty = TRUE;
1646                         /*
1647                          * Setting reference here prevents this fault from
1648                          * being counted as a (per-thread) reactivate as well
1649                          * as a copy-on-write.
1650                          */
1651                         first_m->reference = TRUE;
1652                         vm_page_unlock_queues();
1653
1654                         /*
1655                          *      We no longer need the old page or object.
1656                          */
1657
1658                         PAGE_WAKEUP_DONE(m);
1659                         vm_object_paging_end(object);
1660                         vm_object_unlock(object);
1661
1662                         if (type_of_fault)
1663                                 *type_of_fault = DBG_COW_FAULT;
1664                         VM_STAT(cow_faults++);
1665                         current_task()->cow_faults++;
1666                         object = first_object;
1667                         offset = first_offset;
1668
1669                         vm_object_lock(object);
1670                         VM_PAGE_FREE(first_m);
1671                         first_m = VM_PAGE_NULL;
1672                         assert(copy_m->busy);
1673                         vm_page_insert(copy_m, object, offset);
1674                         m = copy_m;
1675
1676                         /*
1677                          *      Now that we've gotten the copy out of the
1678                          *      way, let's try to collapse the top object.
1679                          *      But we have to play ugly games with
1680                          *      paging_in_progress to do that...
1681                          */
1682
1683                         vm_object_paging_end(object);
1684                         vm_object_collapse(object, offset);
1685                         vm_object_paging_begin(object);
1686
1687                 }
1688                 else {
1689                         *protection &= (~VM_PROT_WRITE);
1690                 }
1691         }
1692
1693         /*
1694          *      Now check whether the page needs to be pushed into the
1695          *      copy object.  The use of asymmetric copy on write for
1696          *      shared temporary objects means that we may do two copies to
1697          *      satisfy the fault; one above to get the page from a
1698          *      shadowed object, and one here to push it into the copy.
1699          */
1700
1701         while ((copy_object = first_object->copy) != VM_OBJECT_NULL &&
1702                    (m!= VM_PAGE_NULL)) {
1703                 vm_object_offset_t      copy_offset;
1704                 vm_page_t               copy_m;
1705
1706 #if TRACEFAULTPAGE
1707                 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type);    /* (TEST/DEBUG) */
1708 #endif
1709                 /*
1710                  *      If the page is being written, but hasn't been
1711                  *      copied to the copy-object, we have to copy it there.
1712                  */
1713
1714                 if ((fault_type & VM_PROT_WRITE) == 0) {
1715                         *protection &= ~VM_PROT_WRITE;
1716                         break;
1717                 }
1718
1719                 /*
1720                  *      If the page was guaranteed to be resident,
1721                  *      we must have already performed the copy.
1722                  */
1723
1724                 if (must_be_resident)
1725                         break;
1726
1727                 /*
1728                  *      Try to get the lock on the copy_object.
1729                  */
1730                 if (!vm_object_lock_try(copy_object)) {
1731                         vm_object_unlock(object);
1732
1733                         mutex_pause();  /* wait a bit */
1734
1735                         vm_object_lock(object);
1736                         continue;
1737                 }
1738
1739                 /*
1740                  *      Make another reference to the copy-object,
1741                  *      to keep it from disappearing during the
1742                  *      copy.
1743                  */
1744                 assert(copy_object->ref_count > 0);
1745                 copy_object->ref_count++;
1746                 VM_OBJ_RES_INCR(copy_object);
1747
1748                 /*
1749                  *      Does the page exist in the copy?
1750                  */
1751                 copy_offset = first_offset - copy_object->shadow_offset;
1752                 if (copy_object->size <= copy_offset)
1753                         /*
1754                          * Copy object doesn't cover this page -- do nothing.
1755                          */
1756                         ;
1757                 else if ((copy_m =
1758                         vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
1759                         /* Page currently exists in the copy object */
1760                         if (copy_m->busy) {
1761                                 /*
1762                                  *      If the page is being brought
1763                                  *      in, wait for it and then retry.
1764                                  */
1765                                 RELEASE_PAGE(m);
1766                                 /* take an extra ref so object won't die */
1767                                 assert(copy_object->ref_count > 0);
1768                                 copy_object->ref_count++;
1769                                 vm_object_res_reference(copy_object);
1770                                 vm_object_unlock(copy_object);
1771                                 vm_fault_cleanup(object, first_m);
1772                                 counter(c_vm_fault_page_block_backoff_kernel++);
1773                                 vm_object_lock(copy_object);
1774                                 assert(copy_object->ref_count > 0);
1775                                 VM_OBJ_RES_DECR(copy_object);
1776                                 copy_object->ref_count--;
1777                                 assert(copy_object->ref_count > 0);
1778                                 copy_m = vm_page_lookup(copy_object, copy_offset);
1779                                 /*
1780                                  * ENCRYPTED SWAP:
1781                                  * it's OK if the "copy_m" page is encrypted,
1782                                  * because we're not moving it nor handling its
1783                                  * contents.
1784                                  */
1785                                 if (copy_m != VM_PAGE_NULL && copy_m->busy) {
1786                                         PAGE_ASSERT_WAIT(copy_m, interruptible);
1787                                         vm_object_unlock(copy_object);
1788                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
1789                                         vm_object_deallocate(copy_object);
1790                                         goto backoff;
1791                                 } else {
1792                                         vm_object_unlock(copy_object);
1793                                         vm_object_deallocate(copy_object);
1794                                         thread_interrupt_level(interruptible_state);
1795                                         return VM_FAULT_RETRY;
1796                                 }
1797                         }
1798                 }
1799                 else if (!PAGED_OUT(copy_object, copy_offset)) {
1800                         /*
1801                          * If PAGED_OUT is TRUE, then the page used to exist
1802                          * in the copy-object, and has already been paged out.
1803                          * We don't need to repeat this. If PAGED_OUT is
1804                          * FALSE, then either we don't know (!pager_created,
1805                          * for example) or it hasn't been paged out.
1806                          * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1807                          * We must copy the page to the copy object.
1808                          */
1809
1810                         /*
1811                          * are we protecting the system from
1812                          * backing store exhaustion.  If so
1813                          * sleep unless we are privileged.
1814                          */
1815
1816                         if(vm_backing_store_low) {
1817                                 if(!(current_task()->priv_flags
1818                                                 & VM_BACKING_STORE_PRIV)) {
1819                                         assert_wait((event_t)
1820                                                 &vm_backing_store_low,
1821                                                 THREAD_UNINT);
1822                                         RELEASE_PAGE(m);
1823                                         VM_OBJ_RES_DECR(copy_object);
1824                                         copy_object->ref_count--;
1825                                         assert(copy_object->ref_count > 0);
1826                                         vm_object_unlock(copy_object);
1827                                         vm_fault_cleanup(object, first_m);
1828                                         thread_block(THREAD_CONTINUE_NULL);
1829                                         thread_interrupt_level(
1830                                                 interruptible_state);
1831                                         return(VM_FAULT_RETRY);
1832                                 }
1833                         }
1834
1835                         /*
1836                          *      Allocate a page for the copy
1837                          */
1838                         copy_m = vm_page_alloc(copy_object, copy_offset);
1839                         if (copy_m == VM_PAGE_NULL) {
1840                                 RELEASE_PAGE(m);
1841                                 VM_OBJ_RES_DECR(copy_object);
1842                                 copy_object->ref_count--;
1843                                 assert(copy_object->ref_count > 0);
1844                                 vm_object_unlock(copy_object);
1845                                 vm_fault_cleanup(object, first_m);
1846                                 thread_interrupt_level(interruptible_state);
1847                                 return(VM_FAULT_MEMORY_SHORTAGE);
1848                         }
1849
1850                         /*
1851                          *      Must copy page into copy-object.
1852                          */
1853
1854                         vm_page_copy(m, copy_m);
1855
1856                         /*
1857                          *      If the old page was in use by any users
1858                          *      of the copy-object, it must be removed
1859                          *      from all pmaps.  (We can't know which
1860                          *      pmaps use it.)
1861                          */
1862
1863                         vm_page_lock_queues();
1864                         assert(!m->cleaning);
1865                         pmap_disconnect(m->phys_page);
1866                         copy_m->dirty = TRUE;
1867                         vm_page_unlock_queues();
1868
1869                         /*
1870                          *      If there's a pager, then immediately
1871                          *      page out this page, using the "initialize"
1872                          *      option.  Else, we use the copy.
1873                          */
1874
1875                         if
1876 #if     MACH_PAGEMAP
1877                           ((!copy_object->pager_created) ||
1878                                 vm_external_state_get(
1879                                         copy_object->existence_map, copy_offset)
1880                                 == VM_EXTERNAL_STATE_ABSENT)
1881 #else
1882                           (!copy_object->pager_created)
1883 #endif
1884                                 {
1885                                 vm_page_lock_queues();
1886                                 vm_page_activate(copy_m);
1887                                 vm_page_unlock_queues();
1888                                 PAGE_WAKEUP_DONE(copy_m);
1889                         }
1890                         else {
1891                                 assert(copy_m->busy == TRUE);
1892
1893                                 /*
1894                                  *      The page is already ready for pageout:
1895                                  *      not on pageout queues and busy.
1896                                  *      Unlock everything except the
1897                                  *      copy_object itself.
1898                                  */
1899
1900                                 vm_object_unlock(object);
1901
1902                                 /*
1903                                  *      Write the page to the copy-object,
1904                                  *      flushing it from the kernel.
1905                                  */
1906
1907                                 vm_pageout_initialize_page(copy_m);
1908
1909                                 /*
1910                                  *      Since the pageout may have
1911                                  *      temporarily dropped the
1912                                  *      copy_object's lock, we
1913                                  *      check whether we'll have
1914                                  *      to deallocate the hard way.
1915                                  */
1916
1917                                 if ((copy_object->shadow != object) ||
1918                                     (copy_object->ref_count == 1)) {
1919                                         vm_object_unlock(copy_object);
1920                                         vm_object_deallocate(copy_object);
1921                                         vm_object_lock(object);
1922                                         continue;
1923                                 }
1924
1925                                 /*
1926                                  *      Pick back up the old object's
1927                                  *      lock.  [It is safe to do so,
1928                                  *      since it must be deeper in the
1929                                  *      object tree.]
1930                                  */
1931
1932                                 vm_object_lock(object);
1933                         }
1934
1935                         /*
1936                          *      Because we're pushing a page upward
1937                          *      in the object tree, we must restart
1938                          *      any faults that are waiting here.
1939                          *      [Note that this is an expansion of
1940                          *      PAGE_WAKEUP that uses the THREAD_RESTART
1941                          *      wait result].  Can't turn off the page's
1942                          *      busy bit because we're not done with it.
1943                          */
1944
1945                         if (m->wanted) {
1946                                 m->wanted = FALSE;
1947                                 thread_wakeup_with_result((event_t) m,
1948                                         THREAD_RESTART);
1949                         }
1950                 }
1951
1952                 /*
1953                  *      The reference count on copy_object must be
1954                  *      at least 2: one for our extra reference,
1955                  *      and at least one from the outside world
1956                  *      (we checked that when we last locked
1957                  *      copy_object).
1958                  */
1959                 copy_object->ref_count--;
1960                 assert(copy_object->ref_count > 0);
1961                 VM_OBJ_RES_DECR(copy_object);
1962                 vm_object_unlock(copy_object);
1963
1964                 break;
1965         }
1966
1967         *result_page = m;
1968         *top_page = first_m;
1969
1970         XPR(XPR_VM_FAULT,
1971                 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1972                 (integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
1973         /*
1974          *      If the page can be written, assume that it will be.
1975          *      [Earlier, we restrict the permission to allow write
1976          *      access only if the fault so required, so we don't
1977          *      mark read-only data as dirty.]
1978          */
1979
1980
1981         if(m != VM_PAGE_NULL) {
1982 #if     !VM_FAULT_STATIC_CONFIG
1983                 if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE))
1984                         m->dirty = TRUE;
1985 #endif
1986                 if (vm_page_deactivate_behind)
1987                         vm_fault_deactivate_behind(object, offset, behavior);
1988         } else {
1989                 vm_object_unlock(object);
1990         }
1991         thread_interrupt_level(interruptible_state);
1992
1993 #if TRACEFAULTPAGE
1994         dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0);       /* (TEST/DEBUG) */
1995 #endif
1996         return(VM_FAULT_SUCCESS);
1997
1998 #if 0
1999     block_and_backoff:
2000         vm_fault_cleanup(object, first_m);
2001
2002         counter(c_vm_fault_page_block_backoff_kernel++);
2003         thread_block(THREAD_CONTINUE_NULL);
2004 #endif
2005
2006     backoff:
2007         thread_interrupt_level(interruptible_state);
2008         if (wait_result == THREAD_INTERRUPTED)
2009                 return VM_FAULT_INTERRUPTED;
2010         return VM_FAULT_RETRY;
2011
2012 #undef  RELEASE_PAGE
2013 }
2014
2015 /*
2016  *      Routine:        vm_fault_tws_insert
2017  *      Purpose:
2018  *              Add fault information to the task working set.
2019  *      Implementation:
2020  *              We always insert the base object/offset pair
2021  *              rather the actual object/offset.
2022  *      Assumptions:
2023  *              Map and real_map locked.
2024  *              Object locked and referenced.
2025  *      Returns:
2026  *              TRUE if startup file should be written.
2027  *              With object locked and still referenced.
2028  *              But we may drop the object lock temporarily.
2029  */
2030 static boolean_t
2031 vm_fault_tws_insert(
2032         vm_map_t map,
2033         vm_map_t real_map,
2034         vm_map_offset_t vaddr,
2035         vm_object_t object,
2036         vm_object_offset_t offset)
2037 {
2038         tws_hash_line_t line;
2039         task_t          task;
2040         kern_return_t   kr;
2041         boolean_t       result = FALSE;
2042
2043         /* Avoid possible map lock deadlock issues */
2044         if (map == kernel_map || map == kalloc_map ||
2045             real_map == kernel_map || real_map == kalloc_map)
2046                 return result;
2047
2048         task = current_task();
2049         if (task->dynamic_working_set != 0) {
2050                 vm_object_t     base_object;
2051                 vm_object_t     base_shadow;
2052                 vm_object_offset_t base_offset;
2053                 base_object = object;
2054                 base_offset = offset;
2055                 while ((base_shadow = base_object->shadow)) {
2056                         vm_object_lock(base_shadow);
2057                         vm_object_unlock(base_object);
2058                         base_offset +=
2059                                 base_object->shadow_offset;
2060                         base_object = base_shadow;
2061                 }
2062                 kr = tws_lookup(
2063                         task->dynamic_working_set,
2064                         base_offset, base_object,
2065                         &line);
2066                 if (kr == KERN_OPERATION_TIMED_OUT){
2067                         result = TRUE;
2068                         if (base_object != object) {
2069                                 vm_object_unlock(base_object);
2070                                 vm_object_lock(object);
2071                         }
2072                 } else if (kr != KERN_SUCCESS) {
2073                         if(base_object != object)
2074                                 vm_object_reference_locked(base_object);
2075                         kr = tws_insert(
2076                                    task->dynamic_working_set,
2077                                    base_offset, base_object,
2078                                    vaddr, real_map);
2079                         if(base_object != object) {
2080                                 vm_object_unlock(base_object);
2081                                 vm_object_deallocate(base_object);
2082                         }
2083                         if(kr == KERN_NO_SPACE) {
2084                                 if (base_object == object)
2085                                         vm_object_unlock(object);
2086                                 tws_expand_working_set(
2087                                    task->dynamic_working_set,
2088                                    TWS_HASH_LINE_COUNT,
2089                                    FALSE);
2090                                 if (base_object == object)
2091                                         vm_object_lock(object);
2092                         } else if(kr == KERN_OPERATION_TIMED_OUT) {
2093                                 result = TRUE;
2094                         }
2095                         if(base_object != object)
2096                                 vm_object_lock(object);
2097                 } else if (base_object != object) {
2098                         vm_object_unlock(base_object);
2099                         vm_object_lock(object);
2100                 }
2101         }
2102         return result;
2103 }
2104
2105 /*
2106  *      Routine:        vm_fault
2107  *      Purpose:
2108  *              Handle page faults, including pseudo-faults
2109  *              used to change the wiring status of pages.
2110  *      Returns:
2111  *              Explicit continuations have been removed.
2112  *      Implementation:
2113  *              vm_fault and vm_fault_page save mucho state
2114  *              in the moral equivalent of a closure.  The state
2115  *              structure is allocated when first entering vm_fault
2116  *              and deallocated when leaving vm_fault.
2117  */
2118
2119 extern int _map_enter_debug;
2120
2121 kern_return_t
2122 vm_fault(
2123         vm_map_t        map,
2124         vm_map_offset_t vaddr,
2125         vm_prot_t       fault_type,
2126         boolean_t       change_wiring,
2127         int             interruptible,
2128         pmap_t          caller_pmap,
2129         vm_map_offset_t caller_pmap_addr)
2130 {
2131         vm_map_version_t        version;        /* Map version for verificiation */
2132         boolean_t               wired;          /* Should mapping be wired down? */
2133         vm_object_t             object;         /* Top-level object */
2134         vm_object_offset_t      offset;         /* Top-level offset */
2135         vm_prot_t               prot;           /* Protection for mapping */
2136         vm_behavior_t           behavior;       /* Expected paging behavior */
2137         vm_map_offset_t         lo_offset, hi_offset;
2138         vm_object_t             old_copy_object; /* Saved copy object */
2139         vm_page_t               result_page;    /* Result of vm_fault_page */
2140         vm_page_t               top_page;       /* Placeholder page */
2141         kern_return_t           kr;
2142
2143         register
2144         vm_page_t               m;      /* Fast access to result_page */
2145         kern_return_t           error_code = 0; /* page error reasons */
2146         register
2147         vm_object_t             cur_object;
2148         register
2149         vm_object_offset_t      cur_offset;
2150         vm_page_t               cur_m;
2151         vm_object_t             new_object;
2152         int                     type_of_fault;
2153         vm_map_t                real_map = map;
2154         vm_map_t                original_map = map;
2155         pmap_t                  pmap = NULL;
2156         boolean_t               interruptible_state;
2157         unsigned int            cache_attr;
2158         int                     write_startup_file = 0;
2159         boolean_t               need_activation;
2160         vm_prot_t               full_fault_type;
2161
2162         if (get_preemption_level() != 0)
2163                 return (KERN_FAILURE);
2164
2165         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_START,
2166                               vaddr,
2167                               0,
2168                               0,
2169                               0,
2170                               0);
2171
2172         /* at present we do not fully check for execute permission */
2173         /* we generally treat it is read except in certain device  */
2174         /* memory settings */
2175         full_fault_type = fault_type;
2176         if(fault_type & VM_PROT_EXECUTE) {
2177                 fault_type &= ~VM_PROT_EXECUTE;
2178                 fault_type |= VM_PROT_READ;
2179         }
2180
2181         interruptible_state = thread_interrupt_level(interruptible);
2182
2183         /*
2184          * assume we will hit a page in the cache
2185          * otherwise, explicitly override with
2186          * the real fault type once we determine it
2187          */
2188         type_of_fault = DBG_CACHE_HIT_FAULT;
2189
2190         VM_STAT(faults++);
2191         current_task()->faults++;
2192
2193     RetryFault: ;
2194
2195         /*
2196          *      Find the backing store object and offset into
2197          *      it to begin the search.
2198          */
2199         map = original_map;
2200         vm_map_lock_read(map);
2201         kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version,
2202                                 &object, &offset,
2203                                 &prot, &wired,
2204                                 &behavior, &lo_offset, &hi_offset, &real_map);
2205
2206 //if (_map_enter_debug)printf("vm_map_lookup_locked(map=0x%x, addr=0x%llx, prot=%d wired=%d) = %d\n", map, vaddr, prot, wired, kr);
2207
2208         pmap = real_map->pmap;
2209
2210         if (kr != KERN_SUCCESS) {
2211                 vm_map_unlock_read(map);
2212                 goto done;
2213         }
2214
2215         /*
2216          *      If the page is wired, we must fault for the current protection
2217          *      value, to avoid further faults.
2218          */
2219
2220         if (wired)
2221                 fault_type = prot | VM_PROT_WRITE;
2222
2223 #if     VM_FAULT_CLASSIFY
2224         /*
2225          *      Temporary data gathering code
2226          */
2227         vm_fault_classify(object, offset, fault_type);
2228 #endif
2229         /*
2230          *      Fast fault code.  The basic idea is to do as much as
2231          *      possible while holding the map lock and object locks.
2232          *      Busy pages are not used until the object lock has to
2233          *      be dropped to do something (copy, zero fill, pmap enter).
2234          *      Similarly, paging references aren't acquired until that
2235          *      point, and object references aren't used.
2236          *
2237          *      If we can figure out what to do
2238          *      (zero fill, copy on write, pmap enter) while holding
2239          *      the locks, then it gets done.  Otherwise, we give up,
2240          *      and use the original fault path (which doesn't hold
2241          *      the map lock, and relies on busy pages).
2242          *      The give up cases include:
2243          *              - Have to talk to pager.
2244          *              - Page is busy, absent or in error.
2245          *              - Pager has locked out desired access.
2246          *              - Fault needs to be restarted.
2247          *              - Have to push page into copy object.
2248          *
2249          *      The code is an infinite loop that moves one level down
2250          *      the shadow chain each time.  cur_object and cur_offset
2251          *      refer to the current object being examined. object and offset
2252          *      are the original object from the map.  The loop is at the
2253          *      top level if and only if object and cur_object are the same.
2254          *
2255          *      Invariants:  Map lock is held throughout.  Lock is held on
2256          *              original object and cur_object (if different) when
2257          *              continuing or exiting loop.
2258          *
2259          */
2260
2261
2262         /*
2263          *      If this page is to be inserted in a copy delay object
2264          *      for writing, and if the object has a copy, then the
2265          *      copy delay strategy is implemented in the slow fault page.
2266          */
2267         if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY ||
2268             object->copy == VM_OBJECT_NULL ||
2269             (fault_type & VM_PROT_WRITE) == 0) {
2270         cur_object = object;
2271         cur_offset = offset;
2272
2273         while (TRUE) {
2274                 m = vm_page_lookup(cur_object, cur_offset);
2275                 if (m != VM_PAGE_NULL) {
2276                         if (m->busy) {
2277                                 wait_result_t   result;
2278
2279                                 if (object != cur_object)
2280                                         vm_object_unlock(object);
2281
2282                                 vm_map_unlock_read(map);
2283                                 if (real_map != map)
2284                                         vm_map_unlock(real_map);
2285
2286 #if     !VM_FAULT_STATIC_CONFIG
2287                                 if (!vm_fault_interruptible)
2288                                         interruptible = THREAD_UNINT;
2289 #endif
2290                                 result = PAGE_ASSERT_WAIT(m, interruptible);
2291
2292                                 vm_object_unlock(cur_object);
2293
2294                                 if (result == THREAD_WAITING) {
2295                                         result = thread_block(THREAD_CONTINUE_NULL);
2296
2297                                         counter(c_vm_fault_page_block_busy_kernel++);
2298                                 }
2299                                 if (result == THREAD_AWAKENED || result == THREAD_RESTART)
2300                                         goto RetryFault;
2301
2302                                 kr = KERN_ABORTED;
2303                                 goto done;
2304                         }
2305                         if (m->unusual && (m->error || m->restart || m->private
2306                             || m->absent || (fault_type & m->page_lock))) {
2307
2308                                 /*
2309                                  *      Unusual case. Give up.
2310                                  */
2311                                 break;
2312                         }
2313
2314                         if (m->encrypted) {
2315                                 /*
2316                                  * ENCRYPTED SWAP:
2317                                  * We've soft-faulted (because it's not in the page
2318                                  * table) on an encrypted page.
2319                                  * Keep the page "busy" so that noone messes with
2320                                  * it during the decryption.
2321                                  * Release the extra locks we're holding, keep only
2322                                  * the page's VM object lock.
2323                                  */
2324                                 m->busy = TRUE;
2325                                 if (object != cur_object) {
2326                                         vm_object_unlock(object);
2327                                 }
2328                                 vm_map_unlock_read(map);
2329                                 if (real_map != map)
2330                                         vm_map_unlock(real_map);
2331
2332                                 vm_page_decrypt(m, 0);
2333
2334                                 assert(m->busy);
2335                                 PAGE_WAKEUP_DONE(m);
2336                                 vm_object_unlock(m->object);
2337
2338                                 /*
2339                                  * Retry from the top, in case anything
2340                                  * changed while we were decrypting...
2341                                  */
2342                                 goto RetryFault;
2343                         }
2344                         ASSERT_PAGE_DECRYPTED(m);
2345
2346                         /*
2347                          *      Two cases of map in faults:
2348                          *          - At top level w/o copy object.
2349                          *          - Read fault anywhere.
2350                          *              --> must disallow write.
2351                          */
2352
2353                         if (object == cur_object &&
2354                             object->copy == VM_OBJECT_NULL)
2355                                 goto FastMapInFault;
2356
2357                         if ((fault_type & VM_PROT_WRITE) == 0) {
2358                                 boolean_t sequential;
2359
2360                                 prot &= ~VM_PROT_WRITE;
2361
2362                                 /*
2363                                  *      Set up to map the page ...
2364                                  *      mark the page busy, drop
2365                                  *      locks and take a paging reference
2366                                  *      on the object with the page.
2367                                  */
2368
2369                                 if (object != cur_object) {
2370                                         vm_object_unlock(object);
2371                                         object = cur_object;
2372                                 }
2373 FastMapInFault:
2374                                 m->busy = TRUE;
2375
2376                                 vm_object_paging_begin(object);
2377
2378 FastPmapEnter:
2379                                 /*
2380                                  *      Check a couple of global reasons to
2381                                  *      be conservative about write access.
2382                                  *      Then do the pmap_enter.
2383                                  */
2384 #if     !VM_FAULT_STATIC_CONFIG
2385                                 if (vm_fault_dirty_handling
2386 #if     MACH_KDB
2387                                     || db_watchpoint_list
2388 #endif
2389                                     && (fault_type & VM_PROT_WRITE) == 0)
2390                                         prot &= ~VM_PROT_WRITE;
2391 #else   /* STATIC_CONFIG */
2392 #if     MACH_KDB
2393                                 if (db_watchpoint_list
2394                                     && (fault_type & VM_PROT_WRITE) == 0)
2395                                         prot &= ~VM_PROT_WRITE;
2396 #endif  /* MACH_KDB */
2397 #endif  /* STATIC_CONFIG */
2398                                 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2399
2400                                 sequential = FALSE;
2401                                 need_activation = FALSE;
2402
2403                                 if (m->no_isync == TRUE) {
2404                                         m->no_isync = FALSE;
2405                                         pmap_sync_page_data_phys(m->phys_page);
2406
2407                                         if ((type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) {
2408                                                 /*
2409                                                  * found it in the cache, but this
2410                                                  * is the first fault-in of the page (no_isync == TRUE)
2411                                                  * so it must have come in as part of
2412                                                  * a cluster... account 1 pagein against it
2413                                                  */
2414                                                 VM_STAT(pageins++);
2415                                                 current_task()->pageins++;
2416                                                 type_of_fault = DBG_PAGEIN_FAULT;
2417                                                 sequential = TRUE;
2418                                         }
2419                                         if (m->clustered)
2420                                                 need_activation = TRUE;
2421
2422                                 } else if (cache_attr != VM_WIMG_DEFAULT) {
2423                                         pmap_sync_page_attributes_phys(m->phys_page);
2424                                 }
2425
2426                                 if(caller_pmap) {
2427                                         PMAP_ENTER(caller_pmap,
2428                                                 caller_pmap_addr, m,
2429                                                 prot, cache_attr, wired);
2430                                 } else {
2431                                         PMAP_ENTER(pmap, vaddr, m,
2432                                                 prot, cache_attr, wired);
2433                                 }
2434
2435                                 /*
2436                                  *      Hold queues lock to manipulate
2437                                  *      the page queues.  Change wiring
2438                                  *      case is obvious.  In soft ref bits
2439                                  *      case activate page only if it fell
2440                                  *      off paging queues, otherwise just
2441                                  *      activate it if it's inactive.
2442                                  *
2443                                  *      NOTE: original vm_fault code will
2444                                  *      move active page to back of active
2445                                  *      queue.  This code doesn't.
2446                                  */
2447                                 vm_page_lock_queues();
2448
2449                                 if (m->clustered) {
2450                                         vm_pagein_cluster_used++;
2451                                         m->clustered = FALSE;
2452                                 }
2453                                 m->reference = TRUE;
2454
2455                                 if (change_wiring) {
2456                                         if (wired)
2457                                                 vm_page_wire(m);
2458                                         else
2459                                                 vm_page_unwire(m);
2460                                 }
2461 #if VM_FAULT_STATIC_CONFIG
2462                                 else {
2463                                         if ((!m->active && !m->inactive) || ((need_activation == TRUE) && !m->active))
2464                                                 vm_page_activate(m);
2465                                 }
2466 #else
2467                                 else if (software_reference_bits) {
2468                                         if (!m->active && !m->inactive)
2469                                                 vm_page_activate(m);
2470                                 }
2471                                 else if (!m->active) {
2472                                         vm_page_activate(m);
2473                                 }
2474 #endif
2475                                 vm_page_unlock_queues();
2476
2477                                 /*
2478                                  *      That's it, clean up and return.
2479                                  */
2480                                 PAGE_WAKEUP_DONE(m);
2481
2482                                 sequential = (sequential && vm_page_deactivate_behind) ?
2483                                         vm_fault_deactivate_behind(object, cur_offset, behavior) :
2484                                         FALSE;
2485
2486                                 /*
2487                                  * Add non-sequential pages to the working set.
2488                                  * The sequential pages will be brought in through
2489                                  * normal clustering behavior.
2490                                  */
2491                                 if (!sequential && !object->private) {
2492                                         write_startup_file =
2493                                                 vm_fault_tws_insert(map, real_map, vaddr,
2494                                                                 object, cur_offset);
2495                                 }
2496
2497                                 vm_object_paging_end(object);
2498                                 vm_object_unlock(object);
2499
2500                                 vm_map_unlock_read(map);
2501                                 if(real_map != map)
2502                                         vm_map_unlock(real_map);
2503
2504                                 if(write_startup_file)
2505                                         tws_send_startup_info(current_task());
2506
2507                                 thread_interrupt_level(interruptible_state);
2508
2509
2510                                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2511                                                       vaddr,
2512                                                       type_of_fault & 0xff,
2513                                                       KERN_SUCCESS,
2514                                                       type_of_fault >> 8,
2515                                                       0);
2516
2517                                 return KERN_SUCCESS;
2518                         }
2519
2520                         /*
2521                          *      Copy on write fault.  If objects match, then
2522                          *      object->copy must not be NULL (else control
2523                          *      would be in previous code block), and we
2524                          *      have a potential push into the copy object
2525                          *      with which we won't cope here.
2526                          */
2527
2528                         if (cur_object == object)
2529                                 break;
2530                         /*
2531                          *      This is now a shadow based copy on write
2532                          *      fault -- it requires a copy up the shadow
2533                          *      chain.
2534                          *
2535                          *      Allocate a page in the original top level
2536                          *      object. Give up if allocate fails.  Also
2537                          *      need to remember current page, as it's the
2538                          *      source of the copy.
2539                          */
2540                         cur_m = m;
2541                         m = vm_page_grab();
2542                         if (m == VM_PAGE_NULL) {
2543                                 break;
2544                         }
2545                         /*
2546                          *      Now do the copy.  Mark the source busy
2547                          *      and take out paging references on both
2548                          *      objects.
2549                          *
2550                          *      NOTE: This code holds the map lock across
2551                          *      the page copy.
2552                          */
2553
2554                         cur_m->busy = TRUE;
2555                         vm_page_copy(cur_m, m);
2556                         vm_page_insert(m, object, offset);
2557
2558                         vm_object_paging_begin(cur_object);
2559                         vm_object_paging_begin(object);
2560
2561                         type_of_fault = DBG_COW_FAULT;
2562                         VM_STAT(cow_faults++);
2563                         current_task()->cow_faults++;
2564
2565                         /*
2566                          *      Now cope with the source page and object
2567                          *      If the top object has a ref count of 1
2568                          *      then no other map can access it, and hence
2569                          *      it's not necessary to do the pmap_disconnect.
2570                          */
2571
2572                         vm_page_lock_queues();
2573                         vm_page_deactivate(cur_m);
2574                         m->dirty = TRUE;
2575                         pmap_disconnect(cur_m->phys_page);
2576                         vm_page_unlock_queues();
2577
2578                         PAGE_WAKEUP_DONE(cur_m);
2579                         vm_object_paging_end(cur_object);
2580                         vm_object_unlock(cur_object);
2581
2582                         /*
2583                          *      Slight hack to call vm_object collapse
2584                          *      and then reuse common map in code.
2585                          *      note that the object lock was taken above.
2586                          */
2587
2588                         vm_object_paging_end(object);
2589                         vm_object_collapse(object, offset);
2590                         vm_object_paging_begin(object);
2591
2592                         goto FastPmapEnter;
2593                 }
2594                 else {
2595
2596                         /*
2597                          *      No page at cur_object, cur_offset
2598                          */
2599
2600                         if (cur_object->pager_created) {
2601
2602                                 /*
2603                                  *      Have to talk to the pager.  Give up.
2604                                  */
2605                                 break;
2606                         }
2607
2608
2609                         if (cur_object->shadow == VM_OBJECT_NULL) {
2610
2611                                 if (cur_object->shadow_severed) {
2612                                         vm_object_paging_end(object);
2613                                         vm_object_unlock(object);
2614                                         vm_map_unlock_read(map);
2615                                         if(real_map != map)
2616                                                 vm_map_unlock(real_map);
2617
2618                                         if(write_startup_file)
2619                                                 tws_send_startup_info(
2620                                                                 current_task());
2621
2622                                         thread_interrupt_level(interruptible_state);
2623
2624                                         return KERN_MEMORY_ERROR;
2625                                 }
2626
2627                                 /*
2628                                  *      Zero fill fault.  Page gets
2629                                  *      filled in top object. Insert
2630                                  *      page, then drop any lower lock.
2631                                  *      Give up if no page.
2632                                  */
2633                                 if (VM_PAGE_THROTTLED()) {
2634                                         break;
2635                                 }
2636
2637                                 /*
2638                                  * are we protecting the system from
2639                                  * backing store exhaustion.  If so
2640                                  * sleep unless we are privileged.
2641                                  */
2642                                 if(vm_backing_store_low) {
2643                                         if(!(current_task()->priv_flags
2644                                                 & VM_BACKING_STORE_PRIV))
2645                                         break;
2646                                 }
2647                                 m = vm_page_alloc(object, offset);
2648                                 if (m == VM_PAGE_NULL) {
2649                                         break;
2650                                 }
2651                                 /*
2652                                  * This is a zero-fill or initial fill
2653                                  * page fault.  As such, we consider it
2654                                  * undefined with respect to instruction
2655                                  * execution.  i.e. it is the responsibility
2656                                  * of higher layers to call for an instruction
2657                                  * sync after changing the contents and before
2658                                  * sending a program into this area.  We
2659                                  * choose this approach for performance
2660                                  */
2661
2662                                 m->no_isync = FALSE;
2663
2664                                 if (cur_object != object)
2665                                         vm_object_unlock(cur_object);
2666
2667                                 vm_object_paging_begin(object);
2668                                 vm_object_unlock(object);
2669
2670                                 /*
2671                                  *      Now zero fill page and map it.
2672                                  *      the page is probably going to
2673                                  *      be written soon, so don't bother
2674                                  *      to clear the modified bit
2675                                  *
2676                                  *      NOTE: This code holds the map
2677                                  *      lock across the zero fill.
2678                                  */
2679
2680                                 if (!map->no_zero_fill) {
2681                                         vm_page_zero_fill(m);
2682                                         type_of_fault = DBG_ZERO_FILL_FAULT;
2683                                         VM_STAT(zero_fill_count++);
2684                                 }
2685                                 vm_page_lock_queues();
2686                                 VM_PAGE_QUEUES_REMOVE(m);
2687
2688                                 m->page_ticket = vm_page_ticket;
2689                                 assert(!m->laundry);
2690                                 assert(m->object != kernel_object);
2691                                 assert(m->pageq.next == NULL &&
2692                                        m->pageq.prev == NULL);
2693                                 if(m->object->size > 0x200000) {
2694                                         m->zero_fill = TRUE;
2695                                         /* depends on the queues lock */
2696                                         vm_zf_count += 1;
2697                                         queue_enter(&vm_page_queue_zf,
2698                                                 m, vm_page_t, pageq);
2699                                 } else {
2700                                         queue_enter(
2701                                                 &vm_page_queue_inactive,
2702                                                 m, vm_page_t, pageq);
2703                                 }
2704                                 vm_page_ticket_roll++;
2705                                 if(vm_page_ticket_roll ==
2706                                                 VM_PAGE_TICKETS_IN_ROLL) {
2707                                         vm_page_ticket_roll = 0;
2708                                         if(vm_page_ticket ==
2709                                                 VM_PAGE_TICKET_ROLL_IDS)
2710                                                 vm_page_ticket= 0;
2711                                         else
2712                                                 vm_page_ticket++;
2713                                 }
2714
2715                                 m->inactive = TRUE;
2716                                 vm_page_inactive_count++;
2717                                 vm_page_unlock_queues();
2718                                 vm_object_lock(object);
2719
2720                                 goto FastPmapEnter;
2721                         }
2722
2723                         /*
2724                          *      On to the next level
2725                          */
2726
2727                         cur_offset += cur_object->shadow_offset;
2728                         new_object = cur_object->shadow;
2729                         vm_object_lock(new_object);
2730                         if (cur_object != object)
2731                                 vm_object_unlock(cur_object);
2732                         cur_object = new_object;
2733
2734                         continue;
2735                 }
2736         }
2737
2738         /*
2739          *      Cleanup from fast fault failure.  Drop any object
2740          *      lock other than original and drop map lock.
2741          */
2742
2743         if (object != cur_object)
2744                 vm_object_unlock(cur_object);
2745         }
2746         vm_map_unlock_read(map);
2747
2748         if(real_map != map)
2749                 vm_map_unlock(real_map);
2750
2751         /*
2752          *      Make a reference to this object to
2753          *      prevent its disposal while we are messing with
2754          *      it.  Once we have the reference, the map is free
2755          *      to be diddled.  Since objects reference their
2756          *      shadows (and copies), they will stay around as well.
2757          */
2758
2759         assert(object->ref_count > 0);
2760         object->ref_count++;
2761         vm_object_res_reference(object);
2762         vm_object_paging_begin(object);
2763
2764         XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2765
2766         if (!object->private) {
2767                 write_startup_file =
2768                         vm_fault_tws_insert(map, real_map, vaddr, object, offset);
2769         }
2770
2771         kr = vm_fault_page(object, offset, fault_type,
2772                            (change_wiring && !wired),
2773                            interruptible,
2774                            lo_offset, hi_offset, behavior,
2775                            &prot, &result_page, &top_page,
2776                            &type_of_fault,
2777                            &error_code, map->no_zero_fill, FALSE, map, vaddr);
2778
2779         /*
2780          *      If we didn't succeed, lose the object reference immediately.
2781          */
2782
2783         if (kr != VM_FAULT_SUCCESS)
2784                 vm_object_deallocate(object);
2785
2786         /*
2787          *      See why we failed, and take corrective action.
2788          */
2789
2790         switch (kr) {
2791                 case VM_FAULT_SUCCESS:
2792                         break;
2793                 case VM_FAULT_MEMORY_SHORTAGE:
2794                         if (vm_page_wait((change_wiring) ?
2795                                          THREAD_UNINT :
2796                                          THREAD_ABORTSAFE))
2797                                 goto RetryFault;
2798                         /* fall thru */
2799                 case VM_FAULT_INTERRUPTED:
2800                         kr = KERN_ABORTED;
2801                         goto done;
2802                 case VM_FAULT_RETRY:
2803                         goto RetryFault;
2804                 case VM_FAULT_FICTITIOUS_SHORTAGE:
2805                         vm_page_more_fictitious();
2806                         goto RetryFault;
2807                 case VM_FAULT_MEMORY_ERROR:
2808                         if (error_code)
2809                                 kr = error_code;
2810                         else
2811                                 kr = KERN_MEMORY_ERROR;
2812                         goto done;
2813         }
2814
2815         m = result_page;
2816
2817         if(m != VM_PAGE_NULL) {
2818                 assert((change_wiring && !wired) ?
2819                     (top_page == VM_PAGE_NULL) :
2820                     ((top_page == VM_PAGE_NULL) == (m->object == object)));
2821         }
2822
2823         /*
2824          *      How to clean up the result of vm_fault_page.  This
2825          *      happens whether the mapping is entered or not.
2826          */
2827
2828 #define UNLOCK_AND_DEALLOCATE                           \
2829         MACRO_BEGIN                                     \
2830         vm_fault_cleanup(m->object, top_page);          \
2831         vm_object_deallocate(object);                   \
2832         MACRO_END
2833
2834         /*
2835          *      What to do with the resulting page from vm_fault_page
2836          *      if it doesn't get entered into the physical map:
2837          */
2838
2839 #define RELEASE_PAGE(m)                                 \
2840         MACRO_BEGIN                                     \
2841         PAGE_WAKEUP_DONE(m);                            \
2842         vm_page_lock_queues();                          \
2843         if (!m->active && !m->inactive)                 \
2844                 vm_page_activate(m);                    \
2845         vm_page_unlock_queues();                        \
2846         MACRO_END
2847
2848         /*
2849          *      We must verify that the maps have not changed
2850          *      since our last lookup.
2851          */
2852
2853         if(m != VM_PAGE_NULL) {
2854                 old_copy_object = m->object->copy;
2855                 vm_object_unlock(m->object);
2856         } else {
2857                 old_copy_object = VM_OBJECT_NULL;
2858         }
2859         if ((map != original_map) || !vm_map_verify(map, &version)) {
2860                 vm_object_t             retry_object;
2861                 vm_object_offset_t      retry_offset;
2862                 vm_prot_t               retry_prot;
2863
2864                 /*
2865                  *      To avoid trying to write_lock the map while another
2866                  *      thread has it read_locked (in vm_map_pageable), we
2867                  *      do not try for write permission.  If the page is
2868                  *      still writable, we will get write permission.  If it
2869                  *      is not, or has been marked needs_copy, we enter the
2870                  *      mapping without write permission, and will merely
2871                  *      take another fault.
2872                  */
2873                 map = original_map;
2874                 vm_map_lock_read(map);
2875                 kr = vm_map_lookup_locked(&map, vaddr,
2876                                    fault_type & ~VM_PROT_WRITE, &version,
2877                                    &retry_object, &retry_offset, &retry_prot,
2878                                    &wired, &behavior, &lo_offset, &hi_offset,
2879                                    &real_map);
2880                 pmap = real_map->pmap;
2881
2882                 if (kr != KERN_SUCCESS) {
2883                         vm_map_unlock_read(map);
2884                         if(m != VM_PAGE_NULL) {
2885                                 vm_object_lock(m->object);
2886                                 RELEASE_PAGE(m);
2887                                 UNLOCK_AND_DEALLOCATE;
2888                         } else {
2889                                 vm_object_deallocate(object);
2890                         }
2891                         goto done;
2892                 }
2893
2894                 vm_object_unlock(retry_object);
2895                 if(m != VM_PAGE_NULL) {
2896                         vm_object_lock(m->object);
2897                 } else {
2898                         vm_object_lock(object);
2899                 }
2900
2901                 if ((retry_object != object) ||
2902                     (retry_offset != offset)) {
2903                         vm_map_unlock_read(map);
2904                         if(real_map != map)
2905                                 vm_map_unlock(real_map);
2906                         if(m != VM_PAGE_NULL) {
2907                                 RELEASE_PAGE(m);
2908                                 UNLOCK_AND_DEALLOCATE;
2909                         } else {
2910                                 vm_object_deallocate(object);
2911                         }
2912                         goto RetryFault;
2913                 }
2914
2915                 /*
2916                  *      Check whether the protection has changed or the object
2917                  *      has been copied while we left the map unlocked.
2918                  */
2919                 prot &= retry_prot;
2920                 if(m != VM_PAGE_NULL) {
2921                         vm_object_unlock(m->object);
2922                 } else {
2923                         vm_object_unlock(object);
2924                 }
2925         }
2926         if(m != VM_PAGE_NULL) {
2927                 vm_object_lock(m->object);
2928         } else {
2929                 vm_object_lock(object);
2930         }
2931
2932         /*
2933          *      If the copy object changed while the top-level object
2934          *      was unlocked, then we must take away write permission.
2935          */
2936
2937         if(m != VM_PAGE_NULL) {
2938                 if (m->object->copy != old_copy_object)
2939                         prot &= ~VM_PROT_WRITE;
2940         }
2941
2942         /*
2943          *      If we want to wire down this page, but no longer have
2944          *      adequate permissions, we must start all over.
2945          */
2946
2947         if (wired && (fault_type != (prot|VM_PROT_WRITE))) {
2948                 vm_map_verify_done(map, &version);
2949                 if(real_map != map)
2950                         vm_map_unlock(real_map);
2951                 if(m != VM_PAGE_NULL) {
2952                         RELEASE_PAGE(m);
2953                         UNLOCK_AND_DEALLOCATE;
2954                 } else {
2955                         vm_object_deallocate(object);
2956                 }
2957                 goto RetryFault;
2958         }
2959
2960         /*
2961          *      Put this page into the physical map.
2962          *      We had to do the unlock above because pmap_enter
2963          *      may cause other faults.  The page may be on
2964          *      the pageout queues.  If the pageout daemon comes
2965          *      across the page, it will remove it from the queues.
2966          */
2967         need_activation = FALSE;
2968
2969         if (m != VM_PAGE_NULL) {
2970                 if (m->no_isync == TRUE) {
2971                         pmap_sync_page_data_phys(m->phys_page);
2972
2973                         if ((type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) {
2974                                 /*
2975                                  * found it in the cache, but this
2976                                  * is the first fault-in of the page (no_isync == TRUE)
2977                                  * so it must have come in as part of
2978                                  * a cluster... account 1 pagein against it
2979                                  */
2980                                  VM_STAT(pageins++);
2981                                  current_task()->pageins++;
2982
2983                                  type_of_fault = DBG_PAGEIN_FAULT;
2984                         }
2985                         if (m->clustered) {
2986                                 need_activation = TRUE;
2987                         }
2988                         m->no_isync = FALSE;
2989                 }
2990                 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2991
2992                 if(caller_pmap) {
2993                         PMAP_ENTER(caller_pmap,
2994                                         caller_pmap_addr, m,
2995                                         prot, cache_attr, wired);
2996                 } else {
2997                         PMAP_ENTER(pmap, vaddr, m,
2998                                         prot, cache_attr, wired);
2999                 }
3000
3001                 /*
3002                  * Add working set information for private objects here.
3003                  */
3004                 if (m->object->private) {
3005                         write_startup_file =
3006                                 vm_fault_tws_insert(map, real_map, vaddr,
3007                                             m->object, m->offset);
3008                 }
3009         } else {
3010
3011 #ifndef i386
3012                 vm_map_entry_t          entry;
3013                 vm_map_offset_t         laddr;
3014                 vm_map_offset_t         ldelta, hdelta;
3015
3016                 /*
3017                  * do a pmap block mapping from the physical address
3018                  * in the object
3019                  */
3020
3021                 /* While we do not worry about execution protection in   */
3022                 /* general, certian pages may have instruction execution */
3023                 /* disallowed.  We will check here, and if not allowed   */
3024                 /* to execute, we return with a protection failure.      */
3025
3026                 if((full_fault_type & VM_PROT_EXECUTE) &&
3027                         (!pmap_eligible_for_execute((ppnum_t)
3028                                 (object->shadow_offset >> 12)))) {
3029
3030                         vm_map_verify_done(map, &version);
3031                         if(real_map != map)
3032                                 vm_map_unlock(real_map);
3033                         vm_fault_cleanup(object, top_page);
3034                         vm_object_deallocate(object);
3035                         kr = KERN_PROTECTION_FAILURE;
3036                         goto done;
3037                 }
3038
3039                 if(real_map != map) {
3040                         vm_map_unlock(real_map);
3041                 }
3042                 if (original_map != map) {
3043                         vm_map_unlock_read(map);
3044                         vm_map_lock_read(original_map);
3045                         map = original_map;
3046                 }
3047                 real_map = map;
3048
3049                 laddr = vaddr;
3050                 hdelta = 0xFFFFF000;
3051                 ldelta = 0xFFFFF000;
3052
3053
3054                 while(vm_map_lookup_entry(map, laddr, &entry)) {
3055                         if(ldelta > (laddr - entry->vme_start))
3056                                 ldelta = laddr - entry->vme_start;
3057                         if(hdelta > (entry->vme_end - laddr))
3058                                 hdelta = entry->vme_end - laddr;
3059                         if(entry->is_sub_map) {
3060
3061                                 laddr = (laddr - entry->vme_start)
3062                                                         + entry->offset;
3063                                 vm_map_lock_read(entry->object.sub_map);
3064                                 if(map != real_map)
3065                                         vm_map_unlock_read(map);
3066                                 if(entry->use_pmap) {
3067                                         vm_map_unlock_read(real_map);
3068                                         real_map = entry->object.sub_map;
3069                                 }
3070                                 map = entry->object.sub_map;
3071
3072                         } else {
3073                                 break;
3074                         }
3075                 }
3076
3077                 if(vm_map_lookup_entry(map, laddr, &entry) &&
3078                                         (entry->object.vm_object != NULL) &&
3079                                         (entry->object.vm_object == object)) {
3080
3081
3082                         if(caller_pmap) {
3083                                 /* Set up a block mapped area */
3084                                 pmap_map_block(caller_pmap,
3085                                         (addr64_t)(caller_pmap_addr - ldelta),
3086                                         (((vm_map_offset_t)
3087                                     (entry->object.vm_object->shadow_offset))
3088                                         + entry->offset +
3089                                         (laddr - entry->vme_start)
3090                                                         - ldelta) >> 12,
3091                                 ((ldelta + hdelta) >> 12), prot,
3092                                 (VM_WIMG_MASK & (int)object->wimg_bits), 0);
3093                         } else {
3094                                 /* Set up a block mapped area */
3095                                 pmap_map_block(real_map->pmap,
3096                                    (addr64_t)(vaddr - ldelta),
3097                                    (((vm_map_offset_t)
3098                                     (entry->object.vm_object->shadow_offset))
3099                                        + entry->offset +
3100                                        (laddr - entry->vme_start) - ldelta) >> 12,
3101                                    ((ldelta + hdelta) >> 12), prot,
3102                                    (VM_WIMG_MASK & (int)object->wimg_bits), 0);
3103                         }
3104                 }
3105 #else
3106 #ifdef notyet
3107                 if(caller_pmap) {
3108                         pmap_enter(caller_pmap, caller_pmap_addr,
3109                                 object->shadow_offset>>12, prot, 0, TRUE);
3110                 } else {
3111                         pmap_enter(pmap, vaddr,
3112                                 object->shadow_offset>>12, prot, 0, TRUE);
3113                 }
3114                         /* Map it in */
3115 #endif
3116 #endif
3117
3118         }
3119
3120         /*
3121          *      If the page is not wired down and isn't already
3122          *      on a pageout queue, then put it where the
3123          *      pageout daemon can find it.
3124          */
3125         if(m != VM_PAGE_NULL) {
3126                 vm_page_lock_queues();
3127
3128                 if (m->clustered) {
3129                         vm_pagein_cluster_used++;
3130                         m->clustered = FALSE;
3131                 }
3132                 m->reference = TRUE;
3133
3134                 if (change_wiring) {
3135                         if (wired)
3136                                 vm_page_wire(m);
3137                         else
3138                                 vm_page_unwire(m);
3139                 }
3140 #if     VM_FAULT_STATIC_CONFIG
3141                 else {
3142                         if ((!m->active && !m->inactive) || ((need_activation == TRUE) && !m->active))
3143                                 vm_page_activate(m);
3144                 }
3145 #else
3146                 else if (software_reference_bits) {
3147                         if (!m->active && !m->inactive)
3148                                 vm_page_activate(m);
3149                         m->reference = TRUE;
3150                 } else {
3151                         vm_page_activate(m);
3152                 }
3153 #endif
3154                 vm_page_unlock_queues();
3155         }
3156
3157         /*
3158          *      Unlock everything, and return
3159          */
3160
3161         vm_map_verify_done(map, &version);
3162         if(real_map != map)
3163                 vm_map_unlock(real_map);
3164         if(m != VM_PAGE_NULL) {
3165                 PAGE_WAKEUP_DONE(m);
3166                 UNLOCK_AND_DEALLOCATE;
3167         } else {
3168                 vm_fault_cleanup(object, top_page);
3169                 vm_object_deallocate(object);
3170         }
3171         kr = KERN_SUCCESS;
3172
3173 #undef  UNLOCK_AND_DEALLOCATE
3174 #undef  RELEASE_PAGE
3175
3176     done:
3177         if(write_startup_file)
3178                 tws_send_startup_info(current_task());
3179
3180         thread_interrupt_level(interruptible_state);
3181
3182         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
3183                               vaddr,
3184                               type_of_fault & 0xff,
3185                               kr,
3186                               type_of_fault >> 8,
3187                               0);
3188
3189         return(kr);
3190 }
3191
3192 /*
3193  *      vm_fault_wire:
3194  *
3195  *      Wire down a range of virtual addresses in a map.
3196  */
3197 kern_return_t
3198 vm_fault_wire(
3199         vm_map_t        map,
3200         vm_map_entry_t  entry,
3201         pmap_t          pmap,
3202         vm_map_offset_t pmap_addr)
3203 {
3204
3205         register vm_map_offset_t        va;
3206         register vm_map_offset_t        end_addr = entry->vme_end;
3207         register kern_return_t  rc;
3208
3209         assert(entry->in_transition);
3210
3211         if ((entry->object.vm_object != NULL) &&
3212                         !entry->is_sub_map &&
3213                         entry->object.vm_object->phys_contiguous) {
3214                 return KERN_SUCCESS;
3215         }
3216
3217         /*
3218          *      Inform the physical mapping system that the
3219          *      range of addresses may not fault, so that
3220          *      page tables and such can be locked down as well.
3221          */
3222
3223         pmap_pageable(pmap, pmap_addr,
3224                 pmap_addr + (end_addr - entry->vme_start), FALSE);
3225
3226         /*
3227          *      We simulate a fault to get the page and enter it
3228          *      in the physical map.
3229          */
3230
3231         for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3232                 if ((rc = vm_fault_wire_fast(
3233                         map, va, entry, pmap,
3234                         pmap_addr + (va - entry->vme_start)
3235                         )) != KERN_SUCCESS) {
3236                         rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
3237                                 (pmap == kernel_pmap) ?
3238                                         THREAD_UNINT : THREAD_ABORTSAFE,
3239                                 pmap, pmap_addr + (va - entry->vme_start));
3240                 }
3241
3242                 if (rc != KERN_SUCCESS) {
3243                         struct vm_map_entry     tmp_entry = *entry;
3244
3245                         /* unwire wired pages */
3246                         tmp_entry.vme_end = va;
3247                         vm_fault_unwire(map,
3248                                 &tmp_entry, FALSE, pmap, pmap_addr);
3249
3250                         return rc;
3251                 }
3252         }
3253         return KERN_SUCCESS;
3254 }
3255
3256 /*
3257  *      vm_fault_unwire:
3258  *
3259  *      Unwire a range of virtual addresses in a map.
3260  */
3261 void
3262 vm_fault_unwire(
3263         vm_map_t        map,
3264         vm_map_entry_t  entry,
3265         boolean_t       deallocate,
3266         pmap_t          pmap,
3267         vm_map_offset_t pmap_addr)
3268 {
3269         register vm_map_offset_t        va;
3270         register vm_map_offset_t        end_addr = entry->vme_end;
3271         vm_object_t             object;
3272
3273         object = (entry->is_sub_map)
3274                         ? VM_OBJECT_NULL : entry->object.vm_object;
3275
3276         /*
3277          *      Since the pages are wired down, we must be able to
3278          *      get their mappings from the physical map system.
3279          */
3280
3281         for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3282                 pmap_change_wiring(pmap,
3283                         pmap_addr + (va - entry->vme_start), FALSE);
3284
3285                 if (object == VM_OBJECT_NULL) {
3286                         (void) vm_fault(map, va, VM_PROT_NONE,
3287                                         TRUE, THREAD_UNINT, pmap, pmap_addr);
3288                 } else if (object->phys_contiguous) {
3289                         continue;
3290                 } else {
3291                         vm_prot_t       prot;
3292                         vm_page_t       result_page;
3293                         vm_page_t       top_page;
3294                         vm_object_t     result_object;
3295                         vm_fault_return_t result;
3296
3297                         do {
3298                                 prot = VM_PROT_NONE;
3299
3300                                 vm_object_lock(object);
3301                                 vm_object_paging_begin(object);
3302                                 XPR(XPR_VM_FAULT,
3303                                         "vm_fault_unwire -> vm_fault_page\n",
3304                                         0,0,0,0,0);
3305                                 result = vm_fault_page(object,
3306                                                 entry->offset +
3307                                                   (va - entry->vme_start),
3308                                                 VM_PROT_NONE, TRUE,
3309                                                 THREAD_UNINT,
3310                                                 entry->offset,
3311                                                 entry->offset +
3312                                                        (entry->vme_end
3313                                                         - entry->vme_start),
3314                                                 entry->behavior,
3315                                                 &prot,
3316                                                 &result_page,
3317                                                 &top_page,
3318                                                 (int *)0,
3319                                                 0, map->no_zero_fill,
3320                                                 FALSE, NULL, 0);
3321                         } while (result == VM_FAULT_RETRY);
3322
3323                         if (result != VM_FAULT_SUCCESS)
3324                                 panic("vm_fault_unwire: failure");
3325
3326                         result_object = result_page->object;
3327                         if (deallocate) {
3328                                 assert(!result_page->fictitious);
3329                                 pmap_disconnect(result_page->phys_page);
3330                                 VM_PAGE_FREE(result_page);
3331                         } else {
3332                                 vm_page_lock_queues();
3333                                 vm_page_unwire(result_page);
3334                                 vm_page_unlock_queues();
3335                                 PAGE_WAKEUP_DONE(result_page);
3336                         }
3337
3338                         vm_fault_cleanup(result_object, top_page);
3339                 }
3340         }
3341
3342         /*
3343          *      Inform the physical mapping system that the range
3344          *      of addresses may fault, so that page tables and
3345          *      such may be unwired themselves.
3346          */
3347
3348         pmap_pageable(pmap, pmap_addr,
3349                 pmap_addr + (end_addr - entry->vme_start), TRUE);
3350
3351 }
3352
3353 /*
3354  *      vm_fault_wire_fast:
3355  *
3356  *      Handle common case of a wire down page fault at the given address.
3357  *      If successful, the page is inserted into the associated physical map.
3358  *      The map entry is passed in to avoid the overhead of a map lookup.
3359  *
3360  *      NOTE: the given address should be truncated to the
3361  *      proper page address.
3362  *
3363  *      KERN_SUCCESS is returned if the page fault is handled; otherwise,
3364  *      a standard error specifying why the fault is fatal is returned.
3365  *
3366  *      The map in question must be referenced, and remains so.
3367  *      Caller has a read lock on the map.
3368  *
3369  *      This is a stripped version of vm_fault() for wiring pages.  Anything
3370  *      other than the common case will return KERN_FAILURE, and the caller
3371  *      is expected to call vm_fault().
3372  */
3373 kern_return_t
3374 vm_fault_wire_fast(
3375         __unused vm_map_t       map,
3376         vm_map_offset_t va,
3377         vm_map_entry_t  entry,
3378         pmap_t                  pmap,
3379         vm_map_offset_t pmap_addr)
3380 {
3381         vm_object_t             object;
3382         vm_object_offset_t      offset;
3383         register vm_page_t      m;
3384         vm_prot_t               prot;
3385         thread_t                thread = current_thread();
3386         unsigned int            cache_attr;
3387
3388         VM_STAT(faults++);
3389
3390         if (thread != THREAD_NULL && thread->task != TASK_NULL)
3391           thread->task->faults++;
3392
3393 /*
3394  *      Recovery actions
3395  */
3396
3397 #undef  RELEASE_PAGE
3398 #define RELEASE_PAGE(m) {                               \
3399         PAGE_WAKEUP_DONE(m);                            \
3400         vm_page_lock_queues();                          \
3401         vm_page_unwire(m);                              \
3402         vm_page_unlock_queues();                        \
3403 }
3404
3405
3406 #undef  UNLOCK_THINGS
3407 #define UNLOCK_THINGS   {                               \
3408         vm_object_paging_end(object);                      \
3409         vm_object_unlock(object);                          \
3410 }
3411
3412 #undef  UNLOCK_AND_DEALLOCATE
3413 #define UNLOCK_AND_DEALLOCATE   {                       \
3414         UNLOCK_THINGS;                                  \
3415         vm_object_deallocate(object);                   \
3416 }
3417 /*
3418  *      Give up and have caller do things the hard way.
3419  */
3420
3421 #define GIVE_UP {                                       \
3422         UNLOCK_AND_DEALLOCATE;                          \
3423         return(KERN_FAILURE);                           \
3424 }
3425
3426
3427         /*
3428          *      If this entry is not directly to a vm_object, bail out.
3429          */
3430         if (entry->is_sub_map)
3431                 return(KERN_FAILURE);
3432
3433         /*
3434          *      Find the backing store object and offset into it.
3435          */
3436
3437         object = entry->object.vm_object;
3438         offset = (va - entry->vme_start) + entry->offset;
3439         prot = entry->protection;
3440
3441         /*
3442          *      Make a reference to this object to prevent its
3443          *      disposal while we are messing with it.
3444          */
3445
3446         vm_object_lock(object);
3447         assert(object->ref_count > 0);
3448         object->ref_count++;
3449         vm_object_res_reference(object);
3450         vm_object_paging_begin(object);
3451
3452         /*
3453          *      INVARIANTS (through entire routine):
3454          *
3455          *      1)      At all times, we must either have the object
3456          *              lock or a busy page in some object to prevent
3457          *              some other thread from trying to bring in
3458          *              the same page.
3459          *
3460          *      2)      Once we have a busy page, we must remove it from
3461          *              the pageout queues, so that the pageout daemon
3462          *              will not grab it away.
3463          *
3464          */
3465
3466         /*
3467          *      Look for page in top-level object.  If it's not there or
3468          *      there's something going on, give up.
3469          * ENCRYPTED SWAP: use the slow fault path, since we'll need to
3470          * decrypt the page before wiring it down.
3471          */
3472         m = vm_page_lookup(object, offset);
3473         if ((m == VM_PAGE_NULL) || (m->busy) || (m->encrypted) ||
3474             (m->unusual && ( m->error || m->restart || m->absent ||
3475                                 prot & m->page_lock))) {
3476
3477                 GIVE_UP;
3478         }
3479         ASSERT_PAGE_DECRYPTED(m);
3480
3481         /*
3482          *      Wire the page down now.  All bail outs beyond this
3483          *      point must unwire the page.
3484          */
3485
3486         vm_page_lock_queues();
3487         vm_page_wire(m);
3488         vm_page_unlock_queues();
3489
3490         /*
3491          *      Mark page busy for other threads.
3492          */
3493         assert(!m->busy);
3494         m->busy = TRUE;
3495         assert(!m->absent);
3496
3497         /*
3498          *      Give up if the page is being written and there's a copy object
3499          */
3500         if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
3501                 RELEASE_PAGE(m);
3502                 GIVE_UP;
3503         }
3504
3505         /*
3506          *      Put this page into the physical map.
3507          *      We have to unlock the object because pmap_enter
3508          *      may cause other faults.
3509          */
3510         if (m->no_isync == TRUE) {
3511                 pmap_sync_page_data_phys(m->phys_page);
3512
3513                 m->no_isync = FALSE;
3514         }
3515
3516         cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3517
3518         PMAP_ENTER(pmap, pmap_addr, m, prot, cache_attr, TRUE);
3519
3520         /*
3521          *      Unlock everything, and return
3522          */
3523
3524         PAGE_WAKEUP_DONE(m);
3525         UNLOCK_AND_DEALLOCATE;
3526
3527         return(KERN_SUCCESS);
3528
3529 }
3530
3531 /*
3532  *      Routine:        vm_fault_copy_cleanup
3533  *      Purpose:
3534  *              Release a page used by vm_fault_copy.
3535  */
3536
3537 void
3538 vm_fault_copy_cleanup(
3539         vm_page_t       page,
3540         vm_page_t       top_page)
3541 {
3542         vm_object_t     object = page->object;
3543
3544         vm_object_lock(object);
3545         PAGE_WAKEUP_DONE(page);
3546         vm_page_lock_queues();
3547         if (!page->active && !page->inactive)
3548                 vm_page_activate(page);
3549         vm_page_unlock_queues();
3550         vm_fault_cleanup(object, top_page);
3551 }
3552
3553 void
3554 vm_fault_copy_dst_cleanup(
3555         vm_page_t       page)
3556 {
3557         vm_object_t     object;
3558
3559         if (page != VM_PAGE_NULL) {
3560                 object = page->object;
3561                 vm_object_lock(object);
3562                 vm_page_lock_queues();
3563                 vm_page_unwire(page);
3564                 vm_page_unlock_queues();
3565                 vm_object_paging_end(object);
3566                 vm_object_unlock(object);
3567         }
3568 }
3569
3570 /*
3571  *      Routine:        vm_fault_copy
3572  *
3573  *      Purpose:
3574  *              Copy pages from one virtual memory object to another --
3575  *              neither the source nor destination pages need be resident.
3576  *
3577  *              Before actually copying a page, the version associated with
3578  *              the destination address map wil be verified.
3579  *
3580  *      In/out conditions:
3581  *              The caller must hold a reference, but not a lock, to
3582  *              each of the source and destination objects and to the
3583  *              destination map.
3584  *
3585  *      Results:
3586  *              Returns KERN_SUCCESS if no errors were encountered in
3587  *              reading or writing the data.  Returns KERN_INTERRUPTED if
3588  *              the operation was interrupted (only possible if the
3589  *              "interruptible" argument is asserted).  Other return values
3590  *              indicate a permanent error in copying the data.
3591  *
3592  *              The actual amount of data copied will be returned in the
3593  *              "copy_size" argument.  In the event that the destination map
3594  *              verification failed, this amount may be less than the amount
3595  *              requested.
3596  */
3597 kern_return_t
3598 vm_fault_copy(
3599         vm_object_t             src_object,
3600         vm_object_offset_t      src_offset,
3601         vm_map_size_t           *copy_size,             /* INOUT */
3602         vm_object_t             dst_object,
3603         vm_object_offset_t      dst_offset,
3604         vm_map_t                dst_map,
3605         vm_map_version_t         *dst_version,
3606         int                     interruptible)
3607 {
3608         vm_page_t               result_page;
3609
3610         vm_page_t               src_page;
3611         vm_page_t               src_top_page;
3612         vm_prot_t               src_prot;
3613
3614         vm_page_t               dst_page;
3615         vm_page_t               dst_top_page;
3616         vm_prot_t               dst_prot;
3617
3618         vm_map_size_t           amount_left;
3619         vm_object_t             old_copy_object;
3620         kern_return_t           error = 0;
3621
3622         vm_map_size_t           part_size;
3623
3624         /*
3625          * In order not to confuse the clustered pageins, align
3626          * the different offsets on a page boundary.
3627          */
3628         vm_object_offset_t      src_lo_offset = vm_object_trunc_page(src_offset);
3629         vm_object_offset_t      dst_lo_offset = vm_object_trunc_page(dst_offset);
3630         vm_object_offset_t      src_hi_offset = vm_object_round_page(src_offset + *copy_size);
3631         vm_object_offset_t      dst_hi_offset = vm_object_round_page(dst_offset + *copy_size);
3632
3633 #define RETURN(x)                                       \
3634         MACRO_BEGIN                                     \
3635         *copy_size -= amount_left;                      \
3636         MACRO_RETURN(x);                                \
3637         MACRO_END
3638
3639         amount_left = *copy_size;
3640         do { /* while (amount_left > 0) */
3641                 /*
3642                  * There may be a deadlock if both source and destination
3643                  * pages are the same. To avoid this deadlock, the copy must
3644                  * start by getting the destination page in order to apply
3645                  * COW semantics if any.
3646                  */
3647
3648         RetryDestinationFault: ;
3649
3650                 dst_prot = VM_PROT_WRITE|VM_PROT_READ;
3651
3652                 vm_object_lock(dst_object);
3653                 vm_object_paging_begin(dst_object);
3654
3655                 XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3656                 switch (vm_fault_page(dst_object,
3657                                       vm_object_trunc_page(dst_offset),
3658                                       VM_PROT_WRITE|VM_PROT_READ,
3659                                       FALSE,
3660                                       interruptible,
3661                                       dst_lo_offset,
3662                                       dst_hi_offset,
3663                                       VM_BEHAVIOR_SEQUENTIAL,
3664                                       &dst_prot,
3665                                       &dst_page,
3666                                       &dst_top_page,
3667                                       (int *)0,
3668                                       &error,
3669                                       dst_map->no_zero_fill,
3670                                       FALSE, NULL, 0)) {
3671                 case VM_FAULT_SUCCESS:
3672                         break;
3673                 case VM_FAULT_RETRY:
3674                         goto RetryDestinationFault;
3675                 case VM_FAULT_MEMORY_SHORTAGE:
3676                         if (vm_page_wait(interruptible))
3677                                 goto RetryDestinationFault;
3678                         /* fall thru */
3679                 case VM_FAULT_INTERRUPTED:
3680                         RETURN(MACH_SEND_INTERRUPTED);
3681                 case VM_FAULT_FICTITIOUS_SHORTAGE:
3682                         vm_page_more_fictitious();
3683                         goto RetryDestinationFault;
3684                 case VM_FAULT_MEMORY_ERROR:
3685                         if (error)
3686                                 return (error);
3687                         else
3688                                 return(KERN_MEMORY_ERROR);
3689                 }
3690                 assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
3691
3692                 old_copy_object = dst_page->object->copy;
3693
3694                 /*
3695                  * There exists the possiblity that the source and
3696                  * destination page are the same.  But we can't
3697                  * easily determine that now.  If they are the
3698                  * same, the call to vm_fault_page() for the
3699                  * destination page will deadlock.  To prevent this we
3700                  * wire the page so we can drop busy without having
3701                  * the page daemon steal the page.  We clean up the
3702                  * top page  but keep the paging reference on the object
3703                  * holding the dest page so it doesn't go away.
3704                  */
3705
3706                 vm_page_lock_queues();
3707                 vm_page_wire(dst_page);
3708                 vm_page_unlock_queues();
3709                 PAGE_WAKEUP_DONE(dst_page);
3710                 vm_object_unlock(dst_page->object);
3711
3712                 if (dst_top_page != VM_PAGE_NULL) {
3713                         vm_object_lock(dst_object);
3714                         VM_PAGE_FREE(dst_top_page);
3715                         vm_object_paging_end(dst_object);
3716                         vm_object_unlock(dst_object);
3717                 }
3718
3719         RetrySourceFault: ;
3720
3721                 if (src_object == VM_OBJECT_NULL) {
3722                         /*
3723                          *      No source object.  We will just
3724                          *      zero-fill the page in dst_object.
3725                          */
3726                         src_page = VM_PAGE_NULL;
3727                         result_page = VM_PAGE_NULL;
3728                 } else {
3729                         vm_object_lock(src_object);
3730                         src_page = vm_page_lookup(src_object,
3731                                                   vm_object_trunc_page(src_offset));
3732                         if (src_page == dst_page) {
3733                                 src_prot = dst_prot;
3734                                 result_page = VM_PAGE_NULL;
3735                         } else {
3736                                 src_prot = VM_PROT_READ;
3737                                 vm_object_paging_begin(src_object);
3738
3739                                 XPR(XPR_VM_FAULT,
3740                                         "vm_fault_copy(2) -> vm_fault_page\n",
3741                                         0,0,0,0,0);
3742                                 switch (vm_fault_page(src_object,
3743                                                       vm_object_trunc_page(src_offset),
3744                                                       VM_PROT_READ,
3745                                                       FALSE,
3746                                                       interruptible,
3747                                                       src_lo_offset,
3748                                                       src_hi_offset,
3749                                                       VM_BEHAVIOR_SEQUENTIAL,
3750                                                       &src_prot,
3751                                                       &result_page,
3752                                                       &src_top_page,
3753                                                       (int *)0,
3754                                                       &error,
3755                                                       FALSE,
3756                                                       FALSE, NULL, 0)) {
3757
3758                                 case VM_FAULT_SUCCESS:
3759                                         break;
3760                                 case VM_FAULT_RETRY:
3761                                         goto RetrySourceFault;
3762                                 case VM_FAULT_MEMORY_SHORTAGE:
3763                                         if (vm_page_wait(interruptible))
3764                                                 goto RetrySourceFault;
3765                                         /* fall thru */
3766                                 case VM_FAULT_INTERRUPTED:
3767                                         vm_fault_copy_dst_cleanup(dst_page);
3768                                         RETURN(MACH_SEND_INTERRUPTED);
3769                                 case VM_FAULT_FICTITIOUS_SHORTAGE:
3770                                         vm_page_more_fictitious();
3771                                         goto RetrySourceFault;
3772                                 case VM_FAULT_MEMORY_ERROR:
3773                                         vm_fault_copy_dst_cleanup(dst_page);
3774                                         if (error)
3775                                                 return (error);
3776                                         else
3777                                                 return(KERN_MEMORY_ERROR);
3778                                 }
3779
3780
3781                                 assert((src_top_page == VM_PAGE_NULL) ==
3782                                        (result_page->object == src_object));
3783                         }
3784                         assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
3785                         vm_object_unlock(result_page->object);
3786                 }
3787
3788                 if (!vm_map_verify(dst_map, dst_version)) {
3789                         if (result_page != VM_PAGE_NULL && src_page != dst_page)
3790                                 vm_fault_copy_cleanup(result_page, src_top_page);
3791                         vm_fault_copy_dst_cleanup(dst_page);
3792                         break;
3793                 }
3794
3795                 vm_object_lock(dst_page->object);
3796
3797                 if (dst_page->object->copy != old_copy_object) {
3798                         vm_object_unlock(dst_page->object);
3799                         vm_map_verify_done(dst_map, dst_version);
3800                         if (result_page != VM_PAGE_NULL && src_page != dst_page)
3801                                 vm_fault_copy_cleanup(result_page, src_top_page);
3802                         vm_fault_copy_dst_cleanup(dst_page);
3803                         break;
3804                 }
3805                 vm_object_unlock(dst_page->object);
3806
3807                 /*
3808                  *      Copy the page, and note that it is dirty
3809                  *      immediately.
3810                  */
3811
3812                 if (!page_aligned(src_offset) ||
3813                         !page_aligned(dst_offset) ||
3814                         !page_aligned(amount_left)) {
3815
3816                         vm_object_offset_t      src_po,
3817                                                 dst_po;
3818
3819                         src_po = src_offset - vm_object_trunc_page(src_offset);
3820                         dst_po = dst_offset - vm_object_trunc_page(dst_offset);
3821
3822                         if (dst_po > src_po) {
3823                                 part_size = PAGE_SIZE - dst_po;
3824                         } else {
3825                                 part_size = PAGE_SIZE - src_po;
3826                         }
3827                         if (part_size > (amount_left)){
3828                                 part_size = amount_left;
3829                         }
3830
3831                         if (result_page == VM_PAGE_NULL) {
3832                                 vm_page_part_zero_fill(dst_page,
3833                                                         dst_po, part_size);
3834                         } else {
3835                                 vm_page_part_copy(result_page, src_po,
3836                                         dst_page, dst_po, part_size);
3837                                 if(!dst_page->dirty){
3838                                         vm_object_lock(dst_object);
3839                                         dst_page->dirty = TRUE;
3840                                         vm_object_unlock(dst_page->object);
3841                                 }
3842
3843                         }
3844                 } else {
3845                         part_size = PAGE_SIZE;
3846
3847                         if (result_page == VM_PAGE_NULL)
3848                                 vm_page_zero_fill(dst_page);
3849                         else{
3850                                 vm_page_copy(result_page, dst_page);
3851                                 if(!dst_page->dirty){
3852                                         vm_object_lock(dst_object);
3853                                         dst_page->dirty = TRUE;
3854                                         vm_object_unlock(dst_page->object);
3855                                 }
3856                         }
3857
3858                 }
3859
3860                 /*
3861                  *      Unlock everything, and return
3862                  */
3863
3864                 vm_map_verify_done(dst_map, dst_version);
3865
3866                 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3867                         vm_fault_copy_cleanup(result_page, src_top_page);
3868                 vm_fault_copy_dst_cleanup(dst_page);
3869
3870                 amount_left -= part_size;
3871                 src_offset += part_size;
3872                 dst_offset += part_size;
3873         } while (amount_left > 0);
3874
3875         RETURN(KERN_SUCCESS);
3876 #undef  RETURN
3877
3878         /*NOTREACHED*/
3879 }
3880
3881 #ifdef  notdef
3882
3883 /*
3884  *      Routine:        vm_fault_page_overwrite
3885  *
3886  *      Description:
3887  *              A form of vm_fault_page that assumes that the
3888  *              resulting page will be overwritten in its entirety,
3889  *              making it unnecessary to obtain the correct *contents*
3890  *              of the page.
3891  *
3892  *      Implementation:
3893  *              XXX Untested.  Also unused.  Eventually, this technology
3894  *              could be used in vm_fault_copy() to advantage.
3895  */
3896 vm_fault_return_t
3897 vm_fault_page_overwrite(
3898         register
3899         vm_object_t             dst_object,
3900         vm_object_offset_t      dst_offset,
3901         vm_page_t               *result_page)   /* OUT */
3902 {
3903         register
3904         vm_page_t       dst_page;
3905         kern_return_t   wait_result;
3906
3907 #define interruptible   THREAD_UNINT    /* XXX */
3908
3909         while (TRUE) {
3910                 /*
3911                  *      Look for a page at this offset
3912                  */
3913
3914                 while ((dst_page = vm_page_lookup(dst_object, dst_offset))
3915                                  == VM_PAGE_NULL) {
3916                         /*
3917                          *      No page, no problem... just allocate one.
3918                          */
3919
3920                         dst_page = vm_page_alloc(dst_object, dst_offset);
3921                         if (dst_page == VM_PAGE_NULL) {
3922                                 vm_object_unlock(dst_object);
3923                                 VM_PAGE_WAIT();
3924                                 vm_object_lock(dst_object);
3925                                 continue;
3926                         }
3927
3928                         /*
3929                          *      Pretend that the memory manager
3930                          *      write-protected the page.
3931                          *
3932                          *      Note that we will be asking for write
3933                          *      permission without asking for the data
3934                          *      first.
3935                          */
3936
3937                         dst_page->overwriting = TRUE;
3938                         dst_page->page_lock = VM_PROT_WRITE;
3939                         dst_page->absent = TRUE;
3940                         dst_page->unusual = TRUE;
3941                         dst_object->absent_count++;
3942
3943                         break;
3944
3945                         /*
3946                          *      When we bail out, we might have to throw
3947                          *      away the page created here.
3948                          */
3949
3950 #define DISCARD_PAGE                                            \
3951         MACRO_BEGIN                                             \
3952         vm_object_lock(dst_object);                             \
3953         dst_page = vm_page_lookup(dst_object, dst_offset);      \
3954         if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3955                 VM_PAGE_FREE(dst_page);                         \
3956         vm_object_unlock(dst_object);                           \
3957         MACRO_END
3958                 }
3959
3960                 /*
3961                  *      If the page is write-protected...
3962                  */
3963
3964                 if (dst_page->page_lock & VM_PROT_WRITE) {
3965                         /*
3966                          *      ... and an unlock request hasn't been sent
3967                          */
3968
3969                         if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
3970                                 vm_prot_t       u;
3971                                 kern_return_t   rc;
3972
3973                                 /*
3974                                  *      ... then send one now.
3975                                  */
3976
3977                                 if (!dst_object->pager_ready) {
3978                                         wait_result = vm_object_assert_wait(dst_object,
3979                                                                 VM_OBJECT_EVENT_PAGER_READY,
3980                                                                 interruptible);
3981                                         vm_object_unlock(dst_object);
3982                                         if (wait_result == THREAD_WAITING)
3983                                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
3984                                         if (wait_result != THREAD_AWAKENED) {
3985                                                 DISCARD_PAGE;
3986                                                 return(VM_FAULT_INTERRUPTED);
3987                                         }
3988                                         continue;
3989                                 }
3990
3991                                 u = dst_page->unlock_request |= VM_PROT_WRITE;
3992                                 vm_object_unlock(dst_object);
3993
3994                                 if ((rc = memory_object_data_unlock(
3995                                                 dst_object->pager,
3996                                                 dst_offset + dst_object->paging_offset,
3997                                                 PAGE_SIZE,
3998                                                 u)) != KERN_SUCCESS) {
3999                                         if (vm_fault_debug)
4000                                             printf("vm_object_overwrite: memory_object_data_unlock failed\n");
4001                                         DISCARD_PAGE;
4002                                         return((rc == MACH_SEND_INTERRUPTED) ?
4003                                                 VM_FAULT_INTERRUPTED :
4004                                                 VM_FAULT_MEMORY_ERROR);
4005                                 }
4006                                 vm_object_lock(dst_object);
4007                                 continue;
4008                         }
4009
4010                         /* ... fall through to wait below */
4011                 } else {
4012                         /*
4013                          *      If the page isn't being used for other
4014                          *      purposes, then we're done.
4015                          */
4016                         if ( ! (dst_page->busy || dst_page->absent ||
4017                                 dst_page->error || dst_page->restart) )
4018                                 break;
4019                 }
4020
4021                 wait_result = PAGE_ASSERT_WAIT(dst_page, interruptible);
4022                 vm_object_unlock(dst_object);
4023                 if (wait_result == THREAD_WAITING)
4024                         wait_result = thread_block(THREAD_CONTINUE_NULL);
4025                 if (wait_result != THREAD_AWAKENED) {
4026                         DISCARD_PAGE;
4027                         return(VM_FAULT_INTERRUPTED);
4028                 }
4029         }
4030
4031         *result_page = dst_page;
4032         return(VM_FAULT_SUCCESS);
4033
4034 #undef  interruptible
4035 #undef  DISCARD_PAGE
4036 }
4037
4038 #endif  /* notdef */
4039
4040 #if     VM_FAULT_CLASSIFY
4041 /*
4042  *      Temporary statistics gathering support.
4043  */
4044
4045 /*
4046  *      Statistics arrays:
4047  */
4048 #define VM_FAULT_TYPES_MAX      5
4049 #define VM_FAULT_LEVEL_MAX      8
4050
4051 int     vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
4052
4053 #define VM_FAULT_TYPE_ZERO_FILL 0
4054 #define VM_FAULT_TYPE_MAP_IN    1
4055 #define VM_FAULT_TYPE_PAGER     2
4056 #define VM_FAULT_TYPE_COPY      3
4057 #define VM_FAULT_TYPE_OTHER     4
4058
4059
4060 void
4061 vm_fault_classify(vm_object_t           object,
4062                   vm_object_offset_t    offset,
4063                   vm_prot_t             fault_type)
4064 {
4065         int             type, level = 0;
4066         vm_page_t       m;
4067
4068         while (TRUE) {
4069                 m = vm_page_lookup(object, offset);
4070                 if (m != VM_PAGE_NULL) {
4071                         if (m->busy || m->error || m->restart || m->absent ||
4072                             fault_type & m->page_lock) {
4073                                 type = VM_FAULT_TYPE_OTHER;
4074                                 break;
4075                         }
4076                         if (((fault_type & VM_PROT_WRITE) == 0) ||
4077                             ((level == 0) && object->copy == VM_OBJECT_NULL)) {
4078                                 type = VM_FAULT_TYPE_MAP_IN;
4079                                 break;
4080                         }
4081                         type = VM_FAULT_TYPE_COPY;
4082                         break;
4083                 }
4084                 else {
4085                         if (object->pager_created) {
4086                                 type = VM_FAULT_TYPE_PAGER;
4087                                 break;
4088                         }
4089                         if (object->shadow == VM_OBJECT_NULL) {
4090                                 type = VM_FAULT_TYPE_ZERO_FILL;
4091                                 break;
4092                         }
4093
4094                         offset += object->shadow_offset;
4095                         object = object->shadow;
4096                         level++;
4097                         continue;
4098                 }
4099         }
4100
4101         if (level > VM_FAULT_LEVEL_MAX)
4102                 level = VM_FAULT_LEVEL_MAX;
4103
4104         vm_fault_stats[type][level] += 1;
4105
4106         return;
4107 }
4108
4109 /* cleanup routine to call from debugger */
4110
4111 void
4112 vm_fault_classify_init(void)
4113 {
4114         int type, level;
4115
4116         for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
4117                 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
4118                         vm_fault_stats[type][level] = 0;
4119                 }
4120         }
4121
4122         return;
4123 }
4124 #endif  /* VM_FAULT_CLASSIFY */